1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=riscv32 -mattr=+v \ 3; RUN: -lower-interleaved-accesses=false -verify-machineinstrs \ 4; RUN: | FileCheck %s --check-prefix=CHECK 5; RUN: llc < %s -mtriple=riscv64 -mattr=+v \ 6; RUN: -lower-interleaved-accesses=false -verify-machineinstrs \ 7; RUN: | FileCheck %s --check-prefix=CHECK 8 9define void @deinterleave3_0_i8(ptr %in, ptr %out) { 10; CHECK-LABEL: deinterleave3_0_i8: 11; CHECK: # %bb.0: # %entry 12; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 13; CHECK-NEXT: vle8.v v8, (a0) 14; CHECK-NEXT: lui a0, %hi(.LCPI0_0) 15; CHECK-NEXT: addi a0, a0, %lo(.LCPI0_0) 16; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 17; CHECK-NEXT: vle8.v v9, (a0) 18; CHECK-NEXT: li a0, 73 19; CHECK-NEXT: vmv.s.x v0, a0 20; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma 21; CHECK-NEXT: vslidedown.vi v10, v8, 8 22; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 23; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 24; CHECK-NEXT: vrgather.vv v10, v8, v9 25; CHECK-NEXT: vse8.v v10, (a1) 26; CHECK-NEXT: ret 27entry: 28 %0 = load <16 x i8>, ptr %in, align 1 29 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 undef, i32 undef> 30 store <8 x i8> %shuffle.i5, ptr %out, align 1 31 ret void 32} 33 34define void @deinterleave3_8_i8(ptr %in, ptr %out) { 35; CHECK-LABEL: deinterleave3_8_i8: 36; CHECK: # %bb.0: # %entry 37; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 38; CHECK-NEXT: vle8.v v8, (a0) 39; CHECK-NEXT: lui a0, %hi(.LCPI1_0) 40; CHECK-NEXT: addi a0, a0, %lo(.LCPI1_0) 41; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 42; CHECK-NEXT: vle8.v v9, (a0) 43; CHECK-NEXT: li a0, 146 44; CHECK-NEXT: vmv.s.x v0, a0 45; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma 46; CHECK-NEXT: vslidedown.vi v10, v8, 8 47; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 48; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 49; CHECK-NEXT: vrgather.vv v10, v8, v9 50; CHECK-NEXT: vse8.v v10, (a1) 51; CHECK-NEXT: ret 52entry: 53 %0 = load <16 x i8>, ptr %in, align 1 54 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 undef, i32 undef, i32 undef> 55 store <8 x i8> %shuffle.i5, ptr %out, align 1 56 ret void 57} 58 59define void @deinterleave4_0_i8(ptr %in, ptr %out) { 60; CHECK-LABEL: deinterleave4_0_i8: 61; CHECK: # %bb.0: # %entry 62; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 63; CHECK-NEXT: vle8.v v8, (a0) 64; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 65; CHECK-NEXT: vnsrl.wi v8, v8, 0 66; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 67; CHECK-NEXT: vnsrl.wi v8, v8, 0 68; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 69; CHECK-NEXT: vse8.v v8, (a1) 70; CHECK-NEXT: ret 71entry: 72 %0 = load <16 x i8>, ptr %in, align 1 73 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 undef, i32 undef, i32 undef, i32 undef> 74 store <8 x i8> %shuffle.i5, ptr %out, align 1 75 ret void 76} 77 78define void @deinterleave4_8_i8(ptr %in, ptr %out) { 79; CHECK-LABEL: deinterleave4_8_i8: 80; CHECK: # %bb.0: # %entry 81; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 82; CHECK-NEXT: vle8.v v8, (a0) 83; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 84; CHECK-NEXT: vnsrl.wi v8, v8, 8 85; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 86; CHECK-NEXT: vnsrl.wi v8, v8, 0 87; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 88; CHECK-NEXT: vse8.v v8, (a1) 89; CHECK-NEXT: ret 90entry: 91 %0 = load <16 x i8>, ptr %in, align 1 92 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 undef, i32 undef, i32 undef, i32 undef> 93 store <8 x i8> %shuffle.i5, ptr %out, align 1 94 ret void 95} 96 97define void @deinterleave5_0_i8(ptr %in, ptr %out) { 98; CHECK-LABEL: deinterleave5_0_i8: 99; CHECK: # %bb.0: # %entry 100; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 101; CHECK-NEXT: vle8.v v8, (a0) 102; CHECK-NEXT: li a0, 33 103; CHECK-NEXT: vmv.s.x v0, a0 104; CHECK-NEXT: lui a0, 28704 105; CHECK-NEXT: addi a0, a0, 1280 106; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma 107; CHECK-NEXT: vslidedown.vi v9, v8, 8 108; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 109; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 110; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 111; CHECK-NEXT: vmv.v.x v9, a0 112; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 113; CHECK-NEXT: vrgather.vv v10, v8, v9 114; CHECK-NEXT: vse8.v v10, (a1) 115; CHECK-NEXT: ret 116entry: 117 %0 = load <16 x i8>, ptr %in, align 1 118 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 5, i32 10, i32 15, i32 undef, i32 undef, i32 undef, i32 undef> 119 store <8 x i8> %shuffle.i5, ptr %out, align 1 120 ret void 121} 122 123define void @deinterleave5_8_i8(ptr %in, ptr %out) { 124; CHECK-LABEL: deinterleave5_8_i8: 125; CHECK: # %bb.0: # %entry 126; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 127; CHECK-NEXT: vle8.v v8, (a0) 128; CHECK-NEXT: li a0, 66 129; CHECK-NEXT: vmv.v.i v0, 4 130; CHECK-NEXT: vmv.s.x v9, a0 131; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 132; CHECK-NEXT: vcompress.vm v10, v8, v9 133; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma 134; CHECK-NEXT: vslidedown.vi v8, v8, 8 135; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu 136; CHECK-NEXT: vrgather.vi v10, v8, 3, v0.t 137; CHECK-NEXT: vse8.v v10, (a1) 138; CHECK-NEXT: ret 139entry: 140 %0 = load <16 x i8>, ptr %in, align 1 141 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 1, i32 6, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 142 store <8 x i8> %shuffle.i5, ptr %out, align 1 143 ret void 144} 145 146define void @deinterleave6_0_i8(ptr %in, ptr %out) { 147; CHECK-LABEL: deinterleave6_0_i8: 148; CHECK: # %bb.0: # %entry 149; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 150; CHECK-NEXT: vle8.v v8, (a0) 151; CHECK-NEXT: li a0, 65 152; CHECK-NEXT: vmv.v.i v0, 4 153; CHECK-NEXT: vmv.s.x v9, a0 154; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 155; CHECK-NEXT: vcompress.vm v10, v8, v9 156; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma 157; CHECK-NEXT: vslidedown.vi v8, v8, 8 158; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu 159; CHECK-NEXT: vrgather.vi v10, v8, 4, v0.t 160; CHECK-NEXT: vse8.v v10, (a1) 161; CHECK-NEXT: ret 162entry: 163 %0 = load <16 x i8>, ptr %in, align 1 164 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 6, i32 12, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 165 store <8 x i8> %shuffle.i5, ptr %out, align 1 166 ret void 167} 168 169define void @deinterleave6_8_i8(ptr %in, ptr %out) { 170; CHECK-LABEL: deinterleave6_8_i8: 171; CHECK: # %bb.0: # %entry 172; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 173; CHECK-NEXT: vle8.v v8, (a0) 174; CHECK-NEXT: li a0, 130 175; CHECK-NEXT: vmv.v.i v0, 4 176; CHECK-NEXT: vmv.s.x v9, a0 177; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 178; CHECK-NEXT: vcompress.vm v10, v8, v9 179; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma 180; CHECK-NEXT: vslidedown.vi v8, v8, 8 181; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu 182; CHECK-NEXT: vrgather.vi v10, v8, 5, v0.t 183; CHECK-NEXT: vse8.v v10, (a1) 184; CHECK-NEXT: ret 185entry: 186 %0 = load <16 x i8>, ptr %in, align 1 187 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 1, i32 7, i32 13, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 188 store <8 x i8> %shuffle.i5, ptr %out, align 1 189 ret void 190} 191 192define void @deinterleave7_0_i8(ptr %in, ptr %out) { 193; CHECK-LABEL: deinterleave7_0_i8: 194; CHECK: # %bb.0: # %entry 195; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 196; CHECK-NEXT: vle8.v v8, (a0) 197; CHECK-NEXT: li a0, 129 198; CHECK-NEXT: vmv.v.i v0, 4 199; CHECK-NEXT: vmv.s.x v9, a0 200; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 201; CHECK-NEXT: vcompress.vm v10, v8, v9 202; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma 203; CHECK-NEXT: vslidedown.vi v8, v8, 8 204; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu 205; CHECK-NEXT: vrgather.vi v10, v8, 6, v0.t 206; CHECK-NEXT: vse8.v v10, (a1) 207; CHECK-NEXT: ret 208entry: 209 %0 = load <16 x i8>, ptr %in, align 1 210 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 7, i32 14, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 211 store <8 x i8> %shuffle.i5, ptr %out, align 1 212 ret void 213} 214 215define void @deinterleave7_8_i8(ptr %in, ptr %out) { 216; CHECK-LABEL: deinterleave7_8_i8: 217; CHECK: # %bb.0: # %entry 218; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 219; CHECK-NEXT: vle8.v v8, (a0) 220; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 221; CHECK-NEXT: vmv.v.i v9, -6 222; CHECK-NEXT: vid.v v10 223; CHECK-NEXT: li a0, 6 224; CHECK-NEXT: vmv.v.i v0, 6 225; CHECK-NEXT: vmadd.vx v10, a0, v9 226; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma 227; CHECK-NEXT: vslidedown.vi v9, v8, 8 228; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu 229; CHECK-NEXT: vrgather.vi v11, v8, 1 230; CHECK-NEXT: vrgather.vv v11, v9, v10, v0.t 231; CHECK-NEXT: vse8.v v11, (a1) 232; CHECK-NEXT: ret 233entry: 234 %0 = load <16 x i8>, ptr %in, align 1 235 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 1, i32 8, i32 14, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 236 store <8 x i8> %shuffle.i5, ptr %out, align 1 237 ret void 238} 239 240define void @deinterleave8_0_i8(ptr %in, ptr %out) { 241; CHECK-LABEL: deinterleave8_0_i8: 242; CHECK: # %bb.0: # %entry 243; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 244; CHECK-NEXT: vle8.v v8, (a0) 245; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 246; CHECK-NEXT: vnsrl.wi v8, v8, 0 247; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 248; CHECK-NEXT: vnsrl.wi v8, v8, 0 249; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma 250; CHECK-NEXT: vnsrl.wi v8, v8, 0 251; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 252; CHECK-NEXT: vse8.v v8, (a1) 253; CHECK-NEXT: ret 254entry: 255 %0 = load <16 x i8>, ptr %in, align 1 256 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 257 store <8 x i8> %shuffle.i5, ptr %out, align 1 258 ret void 259} 260 261define void @deinterleave8_8_i8(ptr %in, ptr %out) { 262; CHECK-LABEL: deinterleave8_8_i8: 263; CHECK: # %bb.0: # %entry 264; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 265; CHECK-NEXT: vle8.v v8, (a0) 266; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 267; CHECK-NEXT: vnsrl.wi v8, v8, 8 268; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 269; CHECK-NEXT: vnsrl.wi v8, v8, 0 270; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma 271; CHECK-NEXT: vnsrl.wi v8, v8, 0 272; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 273; CHECK-NEXT: vse8.v v8, (a1) 274; CHECK-NEXT: ret 275entry: 276 %0 = load <16 x i8>, ptr %in, align 1 277 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 1, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 278 store <8 x i8> %shuffle.i5, ptr %out, align 1 279 ret void 280} 281 282; Exercise the high lmul case 283define void @deinterleave7_0_i64(ptr %in, ptr %out) { 284; CHECK-LABEL: deinterleave7_0_i64: 285; CHECK: # %bb.0: # %entry 286; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 287; CHECK-NEXT: vle64.v v8, (a0) 288; CHECK-NEXT: li a0, 129 289; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma 290; CHECK-NEXT: vmv.v.i v0, 4 291; CHECK-NEXT: vmv.s.x v16, a0 292; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma 293; CHECK-NEXT: vcompress.vm v20, v8, v16 294; CHECK-NEXT: vsetivli zero, 8, e64, m8, ta, ma 295; CHECK-NEXT: vslidedown.vi v8, v8, 8 296; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu 297; CHECK-NEXT: vrgather.vi v20, v8, 6, v0.t 298; CHECK-NEXT: vse64.v v20, (a1) 299; CHECK-NEXT: ret 300entry: 301 %0 = load <16 x i64>, ptr %in 302 %shuffle.i5 = shufflevector <16 x i64> %0, <16 x i64> poison, <8 x i32> <i32 0, i32 7, i32 14, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 303 store <8 x i64> %shuffle.i5, ptr %out 304 ret void 305} 306 307; Store back only the active subvector 308define void @deinterleave4_0_i8_subvec(ptr %in, ptr %out) { 309; CHECK-LABEL: deinterleave4_0_i8_subvec: 310; CHECK: # %bb.0: # %entry 311; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 312; CHECK-NEXT: vle8.v v8, (a0) 313; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 314; CHECK-NEXT: vnsrl.wi v8, v8, 0 315; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 316; CHECK-NEXT: vnsrl.wi v8, v8, 0 317; CHECK-NEXT: vse8.v v8, (a1) 318; CHECK-NEXT: ret 319entry: 320 %0 = load <16 x i8>, ptr %in, align 1 321 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 322 store <4 x i8> %shuffle.i5, ptr %out, align 1 323 ret void 324} 325 326; Store back only the active subvector 327define void @deinterleave7_0_i32_subvec(ptr %in, ptr %out) { 328; CHECK-LABEL: deinterleave7_0_i32_subvec: 329; CHECK: # %bb.0: # %entry 330; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 331; CHECK-NEXT: vle32.v v8, (a0) 332; CHECK-NEXT: li a0, 129 333; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma 334; CHECK-NEXT: vmv.v.i v0, 4 335; CHECK-NEXT: vmv.s.x v12, a0 336; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 337; CHECK-NEXT: vcompress.vm v14, v8, v12 338; CHECK-NEXT: vsetivli zero, 8, e32, m4, ta, ma 339; CHECK-NEXT: vslidedown.vi v8, v8, 8 340; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu 341; CHECK-NEXT: vrgather.vi v14, v8, 6, v0.t 342; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma 343; CHECK-NEXT: vse32.v v14, (a1) 344; CHECK-NEXT: ret 345entry: 346 %0 = load <16 x i32>, ptr %in 347 %shuffle.i5 = shufflevector <16 x i32> %0, <16 x i32> poison, <3 x i32> <i32 0, i32 7, i32 14> 348 store <3 x i32> %shuffle.i5, ptr %out 349 ret void 350} 351 352; Store back only the active subvector 353define void @deinterleave8_0_i8_subvec(ptr %in, ptr %out) { 354; CHECK-LABEL: deinterleave8_0_i8_subvec: 355; CHECK: # %bb.0: # %entry 356; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 357; CHECK-NEXT: vle8.v v8, (a0) 358; CHECK-NEXT: vslidedown.vi v9, v8, 8 359; CHECK-NEXT: vmv.x.s a0, v8 360; CHECK-NEXT: vmv.x.s a2, v9 361; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 362; CHECK-NEXT: vmv.v.x v8, a0 363; CHECK-NEXT: vslide1down.vx v8, v8, a2 364; CHECK-NEXT: vse8.v v8, (a1) 365; CHECK-NEXT: ret 366entry: 367 %0 = load <16 x i8>, ptr %in, align 1 368 %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <2 x i32> <i32 0, i32 8> 369 store <2 x i8> %shuffle.i5, ptr %out, align 1 370 ret void 371} 372