1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc < %s -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck -check-prefixes=CHECK,RV32 %s 3; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck -check-prefixes=CHECK,RV64 %s 4 5 6define <8 x i1> @v8i1_v16i1(<16 x i1>) { 7; RV32-LABEL: v8i1_v16i1: 8; RV32: # %bb.0: 9; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma 10; RV32-NEXT: vmv.x.s a0, v0 11; RV32-NEXT: slli a1, a0, 18 12; RV32-NEXT: srli a2, a0, 31 13; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu 14; RV32-NEXT: vmv.v.x v8, a2 15; RV32-NEXT: slli a2, a0, 27 16; RV32-NEXT: srli a1, a1, 31 17; RV32-NEXT: vslide1down.vx v8, v8, a1 18; RV32-NEXT: slli a1, a0, 26 19; RV32-NEXT: srli a1, a1, 31 20; RV32-NEXT: vmv.v.x v9, a1 21; RV32-NEXT: slli a1, a0, 28 22; RV32-NEXT: srli a2, a2, 31 23; RV32-NEXT: vslide1down.vx v8, v8, a2 24; RV32-NEXT: slli a2, a0, 19 25; RV32-NEXT: srli a2, a2, 31 26; RV32-NEXT: vslide1down.vx v9, v9, a2 27; RV32-NEXT: slli a2, a0, 24 28; RV32-NEXT: slli a0, a0, 29 29; RV32-NEXT: srli a1, a1, 31 30; RV32-NEXT: srli a2, a2, 31 31; RV32-NEXT: srli a0, a0, 31 32; RV32-NEXT: vslide1down.vx v8, v8, a1 33; RV32-NEXT: vslide1down.vx v9, v9, a2 34; RV32-NEXT: vmv.v.i v0, 15 35; RV32-NEXT: vslide1down.vx v9, v9, a0 36; RV32-NEXT: vslidedown.vi v8, v9, 4, v0.t 37; RV32-NEXT: vand.vi v8, v8, 1 38; RV32-NEXT: vmsne.vi v0, v8, 0 39; RV32-NEXT: ret 40; 41; RV64-LABEL: v8i1_v16i1: 42; RV64: # %bb.0: 43; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma 44; RV64-NEXT: vmv.x.s a0, v0 45; RV64-NEXT: slli a1, a0, 50 46; RV64-NEXT: srli a2, a0, 63 47; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu 48; RV64-NEXT: vmv.v.x v8, a2 49; RV64-NEXT: slli a2, a0, 59 50; RV64-NEXT: srli a1, a1, 63 51; RV64-NEXT: vslide1down.vx v8, v8, a1 52; RV64-NEXT: slli a1, a0, 58 53; RV64-NEXT: srli a1, a1, 63 54; RV64-NEXT: vmv.v.x v9, a1 55; RV64-NEXT: slli a1, a0, 60 56; RV64-NEXT: srli a2, a2, 63 57; RV64-NEXT: vslide1down.vx v8, v8, a2 58; RV64-NEXT: slli a2, a0, 51 59; RV64-NEXT: srli a2, a2, 63 60; RV64-NEXT: vslide1down.vx v9, v9, a2 61; RV64-NEXT: slli a2, a0, 56 62; RV64-NEXT: slli a0, a0, 61 63; RV64-NEXT: srli a1, a1, 63 64; RV64-NEXT: srli a2, a2, 63 65; RV64-NEXT: srli a0, a0, 63 66; RV64-NEXT: vslide1down.vx v8, v8, a1 67; RV64-NEXT: vslide1down.vx v9, v9, a2 68; RV64-NEXT: vmv.v.i v0, 15 69; RV64-NEXT: vslide1down.vx v9, v9, a0 70; RV64-NEXT: vslidedown.vi v8, v9, 4, v0.t 71; RV64-NEXT: vand.vi v8, v8, 1 72; RV64-NEXT: vmsne.vi v0, v8, 0 73; RV64-NEXT: ret 74 %2 = shufflevector <16 x i1> %0, <16 x i1> poison, <8 x i32> <i32 5, i32 12, i32 7, i32 2, i32 15, i32 13, i32 4, i32 3> 75 ret <8 x i1> %2 76} 77 78define <4 x i32> @v4i32_v8i32(<8 x i32>) { 79; CHECK-LABEL: v4i32_v8i32: 80; CHECK: # %bb.0: 81; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 82; CHECK-NEXT: vid.v v10 83; CHECK-NEXT: vmv.v.i v0, 5 84; CHECK-NEXT: vsrl.vi v10, v10, 1 85; CHECK-NEXT: vrsub.vi v11, v10, 3 86; CHECK-NEXT: vrgather.vv v10, v8, v11 87; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma 88; CHECK-NEXT: vslidedown.vi v8, v8, 4 89; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu 90; CHECK-NEXT: vslidedown.vi v10, v8, 1, v0.t 91; CHECK-NEXT: vmv.v.v v8, v10 92; CHECK-NEXT: ret 93 %2 = shufflevector <8 x i32> %0, <8 x i32> poison, <4 x i32> <i32 5, i32 3, i32 7, i32 2> 94 ret <4 x i32> %2 95} 96 97define <4 x i32> @v4i32_v16i32(<16 x i32>) { 98; RV32-LABEL: v4i32_v16i32: 99; RV32: # %bb.0: 100; RV32-NEXT: vsetivli zero, 2, e16, m1, ta, ma 101; RV32-NEXT: vmv.v.i v12, 1 102; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 103; RV32-NEXT: vmv.v.i v14, 6 104; RV32-NEXT: li a0, 32 105; RV32-NEXT: vmv.v.i v0, 10 106; RV32-NEXT: vsetivli zero, 2, e16, m1, tu, ma 107; RV32-NEXT: vslideup.vi v14, v12, 1 108; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 109; RV32-NEXT: vnsrl.wx v12, v8, a0 110; RV32-NEXT: vsetivli zero, 8, e32, m4, ta, ma 111; RV32-NEXT: vslidedown.vi v8, v8, 8 112; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu 113; RV32-NEXT: vrgatherei16.vv v12, v8, v14, v0.t 114; RV32-NEXT: vmv1r.v v8, v12 115; RV32-NEXT: ret 116; 117; RV64-LABEL: v4i32_v16i32: 118; RV64: # %bb.0: 119; RV64-NEXT: li a0, 32 120; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma 121; RV64-NEXT: vmv.v.i v0, 10 122; RV64-NEXT: vnsrl.wx v12, v8, a0 123; RV64-NEXT: vsetivli zero, 8, e32, m4, ta, ma 124; RV64-NEXT: vslidedown.vi v8, v8, 8 125; RV64-NEXT: li a0, 3 126; RV64-NEXT: slli a0, a0, 33 127; RV64-NEXT: addi a0, a0, 1 128; RV64-NEXT: slli a0, a0, 16 129; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 130; RV64-NEXT: vmv.v.x v10, a0 131; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu 132; RV64-NEXT: vrgatherei16.vv v12, v8, v10, v0.t 133; RV64-NEXT: vmv1r.v v8, v12 134; RV64-NEXT: ret 135 %2 = shufflevector <16 x i32> %0, <16 x i32> poison, <4 x i32> <i32 1, i32 9, i32 5, i32 14> 136 ret <4 x i32> %2 137} 138 139define <4 x i32> @v4i32_v32i32(<32 x i32>) { 140; RV32-LABEL: v4i32_v32i32: 141; RV32: # %bb.0: 142; RV32-NEXT: addi sp, sp, -256 143; RV32-NEXT: .cfi_def_cfa_offset 256 144; RV32-NEXT: sw ra, 252(sp) # 4-byte Folded Spill 145; RV32-NEXT: sw s0, 248(sp) # 4-byte Folded Spill 146; RV32-NEXT: .cfi_offset ra, -4 147; RV32-NEXT: .cfi_offset s0, -8 148; RV32-NEXT: addi s0, sp, 256 149; RV32-NEXT: .cfi_def_cfa s0, 0 150; RV32-NEXT: andi sp, sp, -128 151; RV32-NEXT: li a0, 32 152; RV32-NEXT: mv a1, sp 153; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma 154; RV32-NEXT: vslidedown.vi v16, v8, 1 155; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma 156; RV32-NEXT: vse32.v v8, (a1) 157; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma 158; RV32-NEXT: vslidedown.vi v8, v8, 4 159; RV32-NEXT: lw a0, 36(sp) 160; RV32-NEXT: vmv.x.s a1, v16 161; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 162; RV32-NEXT: vmv.v.x v9, a1 163; RV32-NEXT: lw a1, 120(sp) 164; RV32-NEXT: vslide1down.vx v9, v9, a0 165; RV32-NEXT: vmv.x.s a0, v8 166; RV32-NEXT: vslide1down.vx v8, v9, a0 167; RV32-NEXT: vslide1down.vx v8, v8, a1 168; RV32-NEXT: addi sp, s0, -256 169; RV32-NEXT: .cfi_def_cfa sp, 256 170; RV32-NEXT: lw ra, 252(sp) # 4-byte Folded Reload 171; RV32-NEXT: lw s0, 248(sp) # 4-byte Folded Reload 172; RV32-NEXT: .cfi_restore ra 173; RV32-NEXT: .cfi_restore s0 174; RV32-NEXT: addi sp, sp, 256 175; RV32-NEXT: .cfi_def_cfa_offset 0 176; RV32-NEXT: ret 177; 178; RV64-LABEL: v4i32_v32i32: 179; RV64: # %bb.0: 180; RV64-NEXT: addi sp, sp, -256 181; RV64-NEXT: .cfi_def_cfa_offset 256 182; RV64-NEXT: sd ra, 248(sp) # 8-byte Folded Spill 183; RV64-NEXT: sd s0, 240(sp) # 8-byte Folded Spill 184; RV64-NEXT: .cfi_offset ra, -8 185; RV64-NEXT: .cfi_offset s0, -16 186; RV64-NEXT: addi s0, sp, 256 187; RV64-NEXT: .cfi_def_cfa s0, 0 188; RV64-NEXT: andi sp, sp, -128 189; RV64-NEXT: li a0, 32 190; RV64-NEXT: mv a1, sp 191; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma 192; RV64-NEXT: vslidedown.vi v16, v8, 1 193; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma 194; RV64-NEXT: vse32.v v8, (a1) 195; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma 196; RV64-NEXT: vslidedown.vi v8, v8, 4 197; RV64-NEXT: lw a0, 36(sp) 198; RV64-NEXT: vmv.x.s a1, v16 199; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma 200; RV64-NEXT: vmv.v.x v9, a1 201; RV64-NEXT: lw a1, 120(sp) 202; RV64-NEXT: vslide1down.vx v9, v9, a0 203; RV64-NEXT: vmv.x.s a0, v8 204; RV64-NEXT: vslide1down.vx v8, v9, a0 205; RV64-NEXT: vslide1down.vx v8, v8, a1 206; RV64-NEXT: addi sp, s0, -256 207; RV64-NEXT: .cfi_def_cfa sp, 256 208; RV64-NEXT: ld ra, 248(sp) # 8-byte Folded Reload 209; RV64-NEXT: ld s0, 240(sp) # 8-byte Folded Reload 210; RV64-NEXT: .cfi_restore ra 211; RV64-NEXT: .cfi_restore s0 212; RV64-NEXT: addi sp, sp, 256 213; RV64-NEXT: .cfi_def_cfa_offset 0 214; RV64-NEXT: ret 215 %2 = shufflevector <32 x i32> %0, <32 x i32> poison, <4 x i32> <i32 1, i32 9, i32 4, i32 30> 216 ret <4 x i32> %2 217} 218 219define <16 x i1> @v16i1_v8i1(<8 x i1>) { 220; CHECK-LABEL: v16i1_v8i1: 221; CHECK: # %bb.0: 222; CHECK-NEXT: lui a0, %hi(.LCPI4_0) 223; CHECK-NEXT: addi a0, a0, %lo(.LCPI4_0) 224; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 225; CHECK-NEXT: vle8.v v8, (a0) 226; CHECK-NEXT: vmv.v.i v9, 0 227; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 228; CHECK-NEXT: vrgather.vv v10, v9, v8 229; CHECK-NEXT: vmsne.vi v0, v10, 0 230; CHECK-NEXT: ret 231 %2 = shufflevector <8 x i1> %0, <8 x i1> poison, <16 x i32> <i32 2, i32 3, i32 0, i32 5, i32 1, i32 2, i32 0, i32 6, i32 2, i32 3, i32 0, i32 7, i32 1, i32 2, i32 0, i32 4> 232 ret <16 x i1> %2 233} 234 235define <8 x i32> @v8i32_v4i32(<4 x i32>) { 236; CHECK-LABEL: v8i32_v4i32: 237; CHECK: # %bb.0: 238; CHECK-NEXT: lui a0, %hi(.LCPI5_0) 239; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_0) 240; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 241; CHECK-NEXT: vle16.v v12, (a0) 242; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 243; CHECK-NEXT: vmv.v.v v8, v10 244; CHECK-NEXT: ret 245 %2 = shufflevector <4 x i32> %0, <4 x i32> poison, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 1, i32 2, i32 0, i32 3> 246 ret <8 x i32> %2 247} 248 249define <16 x i32> @v16i32_v4i32(<4 x i32>) { 250; CHECK-LABEL: v16i32_v4i32: 251; CHECK: # %bb.0: 252; CHECK-NEXT: lui a0, 2 253; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 254; CHECK-NEXT: vmv.v.i v9, 3 255; CHECK-NEXT: addi a1, a0, 265 256; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 257; CHECK-NEXT: vmv.s.x v0, a1 258; CHECK-NEXT: lui a1, 4 259; CHECK-NEXT: addi a1, a1, 548 260; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma 261; CHECK-NEXT: vmerge.vim v9, v9, 2, v0 262; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 263; CHECK-NEXT: vmv.s.x v0, a1 264; CHECK-NEXT: addi a0, a0, -1856 265; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma 266; CHECK-NEXT: vmerge.vim v9, v9, 0, v0 267; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 268; CHECK-NEXT: vmv.s.x v0, a0 269; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma 270; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 271; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 272; CHECK-NEXT: vsext.vf2 v16, v9 273; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 274; CHECK-NEXT: vrgatherei16.vv v12, v8, v16 275; CHECK-NEXT: vmv.v.v v8, v12 276; CHECK-NEXT: ret 277 %2 = shufflevector <4 x i32> %0, <4 x i32> poison, <16 x i32> <i32 2, i32 3, i32 0, i32 2, i32 3, i32 0, i32 1, i32 1, i32 2, i32 0, i32 3, i32 1, i32 1, i32 2, i32 0, i32 3> 278 ret <16 x i32> %2 279} 280 281define <32 x i32> @v32i32_v4i32(<4 x i32>) { 282; CHECK-LABEL: v32i32_v4i32: 283; CHECK: # %bb.0: 284; CHECK-NEXT: li a0, 32 285; CHECK-NEXT: lui a1, 135432 286; CHECK-NEXT: addi a1, a1, 1161 287; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 288; CHECK-NEXT: vmv.s.x v0, a1 289; CHECK-NEXT: lui a1, 270865 290; CHECK-NEXT: addi a1, a1, 548 291; CHECK-NEXT: vmv.s.x v9, a1 292; CHECK-NEXT: lui a1, 100550 293; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma 294; CHECK-NEXT: vmv.v.i v10, 3 295; CHECK-NEXT: addi a0, a1, 64 296; CHECK-NEXT: vmerge.vim v18, v10, 2, v0 297; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 298; CHECK-NEXT: vmv.s.x v16, a0 299; CHECK-NEXT: vmv1r.v v0, v9 300; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma 301; CHECK-NEXT: vmerge.vim v18, v18, 0, v0 302; CHECK-NEXT: vmv1r.v v0, v16 303; CHECK-NEXT: vmerge.vim v16, v18, 1, v0 304; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 305; CHECK-NEXT: vsext.vf2 v24, v16 306; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 307; CHECK-NEXT: vrgatherei16.vv v16, v8, v24 308; CHECK-NEXT: vmv.v.v v8, v16 309; CHECK-NEXT: ret 310 %2 = shufflevector <4 x i32> %0, <4 x i32> poison, <32 x i32> <i32 2, i32 3, i32 0, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 2, i32 3, i32 0, i32 1, i32 1, i32 2, i32 0, i32 3, i32 1, i32 1, i32 2, i32 0, i32 3, i32 1, i32 2, i32 0, i32 3, i32 1, i32 1, i32 2, i32 0, i32 3> 311 ret <32 x i32> %2 312} 313 314define <32 x i8> @vnsrl_v32i8_v64i8(<64 x i8> %in) { 315; CHECK-LABEL: vnsrl_v32i8_v64i8: 316; CHECK: # %bb.0: 317; CHECK-NEXT: li a0, 32 318; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma 319; CHECK-NEXT: vnsrl.wi v12, v8, 8 320; CHECK-NEXT: vmv.v.v v8, v12 321; CHECK-NEXT: ret 322 %res = shufflevector <64 x i8> %in, <64 x i8> poison, <32 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31, i32 33, i32 35, i32 37, i32 39, i32 41, i32 43, i32 45, i32 47, i32 49, i32 51, i32 53, i32 55, i32 57, i32 59, i32 61, i32 63> 323 ret <32 x i8> %res 324} 325