1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 4 5define <4 x i16> @shuffle_v4i16(<4 x i16> %x, <4 x i16> %y) { 6; CHECK-LABEL: shuffle_v4i16: 7; CHECK: # %bb.0: 8; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 9; CHECK-NEXT: vmv.v.i v0, 11 10; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 11; CHECK-NEXT: ret 12 %s = shufflevector <4 x i16> %x, <4 x i16> %y, <4 x i32> <i32 0, i32 1, i32 6, i32 3> 13 ret <4 x i16> %s 14} 15 16define <8 x i32> @shuffle_v8i32(<8 x i32> %x, <8 x i32> %y) { 17; CHECK-LABEL: shuffle_v8i32: 18; CHECK: # %bb.0: 19; CHECK-NEXT: li a0, 203 20; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 21; CHECK-NEXT: vmv.s.x v0, a0 22; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 23; CHECK-NEXT: ret 24 %s = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 13, i32 6, i32 7> 25 ret <8 x i32> %s 26} 27 28define <4 x i16> @shuffle_xv_v4i16(<4 x i16> %x) { 29; CHECK-LABEL: shuffle_xv_v4i16: 30; CHECK: # %bb.0: 31; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 32; CHECK-NEXT: vmv.v.i v0, 9 33; CHECK-NEXT: vmerge.vim v8, v8, 5, v0 34; CHECK-NEXT: ret 35 %s = shufflevector <4 x i16> <i16 5, i16 5, i16 5, i16 5>, <4 x i16> %x, <4 x i32> <i32 0, i32 5, i32 6, i32 3> 36 ret <4 x i16> %s 37} 38 39define <4 x i16> @shuffle_vx_v4i16(<4 x i16> %x) { 40; CHECK-LABEL: shuffle_vx_v4i16: 41; CHECK: # %bb.0: 42; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 43; CHECK-NEXT: vmv.v.i v0, 6 44; CHECK-NEXT: vmerge.vim v8, v8, 5, v0 45; CHECK-NEXT: ret 46 %s = shufflevector <4 x i16> %x, <4 x i16> <i16 5, i16 5, i16 5, i16 5>, <4 x i32> <i32 0, i32 5, i32 6, i32 3> 47 ret <4 x i16> %s 48} 49 50define <4 x i16> @vrgather_permute_shuffle_vu_v4i16(<4 x i16> %x) { 51; CHECK-LABEL: vrgather_permute_shuffle_vu_v4i16: 52; CHECK: # %bb.0: 53; CHECK-NEXT: lui a0, 4096 54; CHECK-NEXT: addi a0, a0, 513 55; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 56; CHECK-NEXT: vmv.s.x v9, a0 57; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 58; CHECK-NEXT: vsext.vf2 v10, v9 59; CHECK-NEXT: vrgather.vv v9, v8, v10 60; CHECK-NEXT: vmv1r.v v8, v9 61; CHECK-NEXT: ret 62 %s = shufflevector <4 x i16> %x, <4 x i16> poison, <4 x i32> <i32 1, i32 2, i32 0, i32 1> 63 ret <4 x i16> %s 64} 65 66define <4 x i16> @vrgather_permute_shuffle_uv_v4i16(<4 x i16> %x) { 67; CHECK-LABEL: vrgather_permute_shuffle_uv_v4i16: 68; CHECK: # %bb.0: 69; CHECK-NEXT: lui a0, 4096 70; CHECK-NEXT: addi a0, a0, 513 71; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 72; CHECK-NEXT: vmv.s.x v9, a0 73; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 74; CHECK-NEXT: vsext.vf2 v10, v9 75; CHECK-NEXT: vrgather.vv v9, v8, v10 76; CHECK-NEXT: vmv1r.v v8, v9 77; CHECK-NEXT: ret 78 %s = shufflevector <4 x i16> poison, <4 x i16> %x, <4 x i32> <i32 5, i32 6, i32 4, i32 5> 79 ret <4 x i16> %s 80} 81 82define <4 x i16> @vrgather_shuffle_vv_v4i16(<4 x i16> %x, <4 x i16> %y) { 83; CHECK-LABEL: vrgather_shuffle_vv_v4i16: 84; CHECK: # %bb.0: 85; CHECK-NEXT: lui a0, %hi(.LCPI6_0) 86; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0) 87; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu 88; CHECK-NEXT: vle16.v v11, (a0) 89; CHECK-NEXT: vmv.v.i v0, 8 90; CHECK-NEXT: vrgather.vv v10, v8, v11 91; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t 92; CHECK-NEXT: vmv1r.v v8, v10 93; CHECK-NEXT: ret 94 %s = shufflevector <4 x i16> %x, <4 x i16> %y, <4 x i32> <i32 1, i32 2, i32 0, i32 5> 95 ret <4 x i16> %s 96} 97 98define <4 x i16> @vrgather_shuffle_xv_v4i16(<4 x i16> %x) { 99; CHECK-LABEL: vrgather_shuffle_xv_v4i16: 100; CHECK: # %bb.0: 101; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu 102; CHECK-NEXT: vid.v v9 103; CHECK-NEXT: vmv.v.i v0, 12 104; CHECK-NEXT: vrsub.vi v10, v9, 4 105; CHECK-NEXT: vmv.v.i v9, 5 106; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t 107; CHECK-NEXT: vmv1r.v v8, v9 108; CHECK-NEXT: ret 109 %s = shufflevector <4 x i16> <i16 5, i16 5, i16 5, i16 5>, <4 x i16> %x, <4 x i32> <i32 0, i32 3, i32 6, i32 5> 110 ret <4 x i16> %s 111} 112 113define <4 x i16> @vrgather_shuffle_vx_v4i16(<4 x i16> %x) { 114; CHECK-LABEL: vrgather_shuffle_vx_v4i16: 115; CHECK: # %bb.0: 116; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 117; CHECK-NEXT: vmv.v.i v9, 9 118; CHECK-NEXT: vmv.v.i v0, 3 119; CHECK-NEXT: vcompress.vm v10, v8, v9 120; CHECK-NEXT: vmv.v.i v8, 5 121; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 122; CHECK-NEXT: ret 123 %s = shufflevector <4 x i16> %x, <4 x i16> <i16 5, i16 5, i16 5, i16 5>, <4 x i32> <i32 0, i32 3, i32 6, i32 5> 124 ret <4 x i16> %s 125} 126 127define <8 x i64> @vrgather_permute_shuffle_vu_v8i64(<8 x i64> %x) { 128; CHECK-LABEL: vrgather_permute_shuffle_vu_v8i64: 129; CHECK: # %bb.0: 130; CHECK-NEXT: lui a0, %hi(.LCPI9_0) 131; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_0) 132; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma 133; CHECK-NEXT: vle16.v v16, (a0) 134; CHECK-NEXT: vrgatherei16.vv v12, v8, v16 135; CHECK-NEXT: vmv.v.v v8, v12 136; CHECK-NEXT: ret 137 %s = shufflevector <8 x i64> %x, <8 x i64> poison, <8 x i32> <i32 1, i32 2, i32 0, i32 1, i32 7, i32 6, i32 0, i32 1> 138 ret <8 x i64> %s 139} 140 141define <8 x i64> @vrgather_permute_shuffle_uv_v8i64(<8 x i64> %x) { 142; CHECK-LABEL: vrgather_permute_shuffle_uv_v8i64: 143; CHECK: # %bb.0: 144; CHECK-NEXT: lui a0, %hi(.LCPI10_0) 145; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_0) 146; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma 147; CHECK-NEXT: vle16.v v16, (a0) 148; CHECK-NEXT: vrgatherei16.vv v12, v8, v16 149; CHECK-NEXT: vmv.v.v v8, v12 150; CHECK-NEXT: ret 151 %s = shufflevector <8 x i64> poison, <8 x i64> %x, <8 x i32> <i32 9, i32 10, i32 8, i32 9, i32 15, i32 8, i32 8, i32 11> 152 ret <8 x i64> %s 153} 154 155define <8 x i64> @vrgather_shuffle_vv_v8i64(<8 x i64> %x, <8 x i64> %y) { 156; RV32-LABEL: vrgather_shuffle_vv_v8i64: 157; RV32: # %bb.0: 158; RV32-NEXT: lui a0, %hi(.LCPI11_0) 159; RV32-NEXT: addi a0, a0, %lo(.LCPI11_0) 160; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 161; RV32-NEXT: vle16.v v20, (a0) 162; RV32-NEXT: vmv.v.i v21, 2 163; RV32-NEXT: li a0, 164 164; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma 165; RV32-NEXT: vrgatherei16.vv v16, v8, v20 166; RV32-NEXT: vmv.s.x v0, a0 167; RV32-NEXT: li a0, 5 168; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma 169; RV32-NEXT: vslide1down.vx v8, v21, a0 170; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu 171; RV32-NEXT: vrgatherei16.vv v16, v12, v8, v0.t 172; RV32-NEXT: vmv.v.v v8, v16 173; RV32-NEXT: ret 174; 175; RV64-LABEL: vrgather_shuffle_vv_v8i64: 176; RV64: # %bb.0: 177; RV64-NEXT: li a0, 164 178; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 179; RV64-NEXT: vmv.s.x v0, a0 180; RV64-NEXT: lui a0, 327683 181; RV64-NEXT: slli a0, a0, 3 182; RV64-NEXT: addi a0, a0, 1 183; RV64-NEXT: slli a0, a0, 17 184; RV64-NEXT: addi a0, a0, 1 185; RV64-NEXT: vmv.v.x v20, a0 186; RV64-NEXT: lui a0, 163841 187; RV64-NEXT: slli a0, a0, 4 188; RV64-NEXT: addi a0, a0, 1 189; RV64-NEXT: slli a0, a0, 17 190; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 191; RV64-NEXT: vrgatherei16.vv v16, v8, v20 192; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 193; RV64-NEXT: vmv.v.x v8, a0 194; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu 195; RV64-NEXT: vrgatherei16.vv v16, v12, v8, v0.t 196; RV64-NEXT: vmv.v.v v8, v16 197; RV64-NEXT: ret 198 %s = shufflevector <8 x i64> %x, <8 x i64> %y, <8 x i32> <i32 1, i32 2, i32 10, i32 5, i32 1, i32 10, i32 3, i32 13> 199 ret <8 x i64> %s 200} 201 202define <8 x i64> @vrgather_shuffle_xv_v8i64(<8 x i64> %x) { 203; RV32-LABEL: vrgather_shuffle_xv_v8i64: 204; RV32: # %bb.0: 205; RV32-NEXT: lui a0, %hi(.LCPI12_0) 206; RV32-NEXT: addi a0, a0, %lo(.LCPI12_0) 207; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu 208; RV32-NEXT: vmv.v.i v16, -1 209; RV32-NEXT: vle16.v v20, (a0) 210; RV32-NEXT: lui a0, %hi(.LCPI12_1) 211; RV32-NEXT: addi a0, a0, %lo(.LCPI12_1) 212; RV32-NEXT: vle16.v v21, (a0) 213; RV32-NEXT: li a0, 113 214; RV32-NEXT: vmv.s.x v0, a0 215; RV32-NEXT: vrgatherei16.vv v12, v16, v20 216; RV32-NEXT: vrgatherei16.vv v12, v8, v21, v0.t 217; RV32-NEXT: vmv.v.v v8, v12 218; RV32-NEXT: ret 219; 220; RV64-LABEL: vrgather_shuffle_xv_v8i64: 221; RV64: # %bb.0: 222; RV64-NEXT: li a0, 113 223; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 224; RV64-NEXT: vmv.s.x v0, a0 225; RV64-NEXT: lui a0, 98305 226; RV64-NEXT: slli a0, a0, 6 227; RV64-NEXT: vmv.v.x v16, a0 228; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu 229; RV64-NEXT: vmv.v.i v12, -1 230; RV64-NEXT: vrgatherei16.vv v12, v8, v16, v0.t 231; RV64-NEXT: vmv.v.v v8, v12 232; RV64-NEXT: ret 233 %s = shufflevector <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, <8 x i64> %x, <8 x i32> <i32 8, i32 3, i32 6, i32 5, i32 8, i32 12, i32 14, i32 3> 234 ret <8 x i64> %s 235} 236 237define <8 x i64> @vrgather_shuffle_vx_v8i64(<8 x i64> %x) { 238; RV32-LABEL: vrgather_shuffle_vx_v8i64: 239; RV32: # %bb.0: 240; RV32-NEXT: lui a0, %hi(.LCPI13_0) 241; RV32-NEXT: addi a0, a0, %lo(.LCPI13_0) 242; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu 243; RV32-NEXT: vle16.v v16, (a0) 244; RV32-NEXT: lui a0, %hi(.LCPI13_1) 245; RV32-NEXT: addi a0, a0, %lo(.LCPI13_1) 246; RV32-NEXT: vle16.v v17, (a0) 247; RV32-NEXT: li a0, 140 248; RV32-NEXT: vmv.s.x v0, a0 249; RV32-NEXT: vrgatherei16.vv v12, v8, v16 250; RV32-NEXT: vmv.v.i v8, 5 251; RV32-NEXT: vrgatherei16.vv v12, v8, v17, v0.t 252; RV32-NEXT: vmv.v.v v8, v12 253; RV32-NEXT: ret 254; 255; RV64-LABEL: vrgather_shuffle_vx_v8i64: 256; RV64: # %bb.0: 257; RV64-NEXT: lui a0, %hi(.LCPI13_0) 258; RV64-NEXT: addi a0, a0, %lo(.LCPI13_0) 259; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu 260; RV64-NEXT: vle16.v v16, (a0) 261; RV64-NEXT: li a0, 115 262; RV64-NEXT: vmv.s.x v0, a0 263; RV64-NEXT: vmv.v.i v12, 5 264; RV64-NEXT: vrgatherei16.vv v12, v8, v16, v0.t 265; RV64-NEXT: vmv.v.v v8, v12 266; RV64-NEXT: ret 267 %s = shufflevector <8 x i64> %x, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i32> <i32 0, i32 3, i32 10, i32 9, i32 4, i32 1, i32 7, i32 14> 268 ret <8 x i64> %s 269} 270 271define <4 x i16> @shuffle_v8i16_to_vslidedown_1(<8 x i16> %x) { 272; CHECK-LABEL: shuffle_v8i16_to_vslidedown_1: 273; CHECK: # %bb.0: # %entry 274; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 275; CHECK-NEXT: vslidedown.vi v8, v8, 1 276; CHECK-NEXT: ret 277entry: 278 %s = shufflevector <8 x i16> %x, <8 x i16> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 4> 279 ret <4 x i16> %s 280} 281 282define <4 x i16> @shuffle_v8i16_to_vslidedown_3(<8 x i16> %x) { 283; CHECK-LABEL: shuffle_v8i16_to_vslidedown_3: 284; CHECK: # %bb.0: # %entry 285; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 286; CHECK-NEXT: vslidedown.vi v8, v8, 3 287; CHECK-NEXT: ret 288entry: 289 %s = shufflevector <8 x i16> %x, <8 x i16> poison, <4 x i32> <i32 3, i32 4, i32 5, i32 6> 290 ret <4 x i16> %s 291} 292 293define <2 x i32> @shuffle_v4i32_to_vslidedown(<4 x i32> %x) { 294; CHECK-LABEL: shuffle_v4i32_to_vslidedown: 295; CHECK: # %bb.0: # %entry 296; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 297; CHECK-NEXT: vslidedown.vi v8, v8, 1 298; CHECK-NEXT: ret 299entry: 300 %s = shufflevector <4 x i32> %x, <4 x i32> poison, <2 x i32> <i32 1, i32 2> 301 ret <2 x i32> %s 302} 303 304define <4 x i8> @interleave_shuffles(<4 x i8> %x) { 305; CHECK-LABEL: interleave_shuffles: 306; CHECK: # %bb.0: 307; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 308; CHECK-NEXT: vrgather.vi v9, v8, 0 309; CHECK-NEXT: vrgather.vi v10, v8, 1 310; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 311; CHECK-NEXT: vwaddu.vv v8, v9, v10 312; CHECK-NEXT: li a0, -1 313; CHECK-NEXT: vwmaccu.vx v8, a0, v10 314; CHECK-NEXT: ret 315 %y = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 316 %z = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 317 %w = shufflevector <4 x i8> %y, <4 x i8> %z, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 318 ret <4 x i8> %w 319} 320 321define <8 x i8> @splat_ve4(<8 x i8> %v) { 322; CHECK-LABEL: splat_ve4: 323; CHECK: # %bb.0: 324; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 325; CHECK-NEXT: vrgather.vi v9, v8, 4 326; CHECK-NEXT: vmv1r.v v8, v9 327; CHECK-NEXT: ret 328 %shuff = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 329 ret <8 x i8> %shuff 330} 331 332define <8 x i8> @splat_ve4_ins_i0ve2(<8 x i8> %v) { 333; CHECK-LABEL: splat_ve4_ins_i0ve2: 334; CHECK: # %bb.0: 335; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 336; CHECK-NEXT: vmv.v.i v10, 4 337; CHECK-NEXT: li a0, 2 338; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, ma 339; CHECK-NEXT: vmv.s.x v10, a0 340; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 341; CHECK-NEXT: vrgather.vv v9, v8, v10 342; CHECK-NEXT: vmv1r.v v8, v9 343; CHECK-NEXT: ret 344 %shuff = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 2, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 345 ret <8 x i8> %shuff 346} 347 348define <8 x i8> @splat_ve4_ins_i1ve3(<8 x i8> %v) { 349; CHECK-LABEL: splat_ve4_ins_i1ve3: 350; CHECK: # %bb.0: 351; CHECK-NEXT: vsetivli zero, 2, e8, mf2, ta, ma 352; CHECK-NEXT: vmv.v.i v9, 3 353; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 354; CHECK-NEXT: vmv.v.i v10, 4 355; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma 356; CHECK-NEXT: vslideup.vi v10, v9, 1 357; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 358; CHECK-NEXT: vrgather.vv v9, v8, v10 359; CHECK-NEXT: vmv1r.v v8, v9 360; CHECK-NEXT: ret 361 %shuff = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 4, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 362 ret <8 x i8> %shuff 363} 364 365define <8 x i8> @splat_ve2_we0(<8 x i8> %v, <8 x i8> %w) { 366; CHECK-LABEL: splat_ve2_we0: 367; CHECK: # %bb.0: 368; CHECK-NEXT: li a0, 66 369; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu 370; CHECK-NEXT: vmv.s.x v0, a0 371; CHECK-NEXT: vrgather.vi v10, v8, 2 372; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t 373; CHECK-NEXT: vmv1r.v v8, v10 374; CHECK-NEXT: ret 375 %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 8, i32 2, i32 2, i32 2, i32 2, i32 8, i32 2> 376 ret <8 x i8> %shuff 377} 378 379define <8 x i8> @splat_ve2_we0_ins_i0ve4(<8 x i8> %v, <8 x i8> %w) { 380; CHECK-LABEL: splat_ve2_we0_ins_i0ve4: 381; CHECK: # %bb.0: 382; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 383; CHECK-NEXT: vmv.v.i v11, 2 384; CHECK-NEXT: li a0, 4 385; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, ma 386; CHECK-NEXT: vmv.s.x v11, a0 387; CHECK-NEXT: li a0, 66 388; CHECK-NEXT: vmv.s.x v0, a0 389; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu 390; CHECK-NEXT: vrgather.vv v10, v8, v11 391; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t 392; CHECK-NEXT: vmv1r.v v8, v10 393; CHECK-NEXT: ret 394 %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 4, i32 8, i32 2, i32 2, i32 2, i32 2, i32 8, i32 2> 395 ret <8 x i8> %shuff 396} 397 398define <8 x i8> @splat_ve2_we0_ins_i0we4(<8 x i8> %v, <8 x i8> %w) { 399; CHECK-LABEL: splat_ve2_we0_ins_i0we4: 400; CHECK: # %bb.0: 401; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 402; CHECK-NEXT: vrgather.vi v10, v8, 2 403; CHECK-NEXT: li a0, 67 404; CHECK-NEXT: vmv.s.x v0, a0 405; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 406; CHECK-NEXT: vmv.v.i v8, 4 407; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu 408; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t 409; CHECK-NEXT: vmv1r.v v8, v10 410; CHECK-NEXT: ret 411 %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 12, i32 8, i32 2, i32 2, i32 2, i32 2, i32 8, i32 2> 412 ret <8 x i8> %shuff 413} 414 415define <8 x i8> @splat_ve2_we0_ins_i2ve4(<8 x i8> %v, <8 x i8> %w) { 416; CHECK-LABEL: splat_ve2_we0_ins_i2ve4: 417; CHECK: # %bb.0: 418; CHECK-NEXT: lui a0, 8256 419; CHECK-NEXT: addi a0, a0, 514 420; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 421; CHECK-NEXT: vmv.v.x v11, a0 422; CHECK-NEXT: li a0, 66 423; CHECK-NEXT: vmv.s.x v0, a0 424; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu 425; CHECK-NEXT: vrgather.vv v10, v8, v11 426; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t 427; CHECK-NEXT: vmv1r.v v8, v10 428; CHECK-NEXT: ret 429 %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 8, i32 4, i32 2, i32 2, i32 2, i32 8, i32 2> 430 ret <8 x i8> %shuff 431} 432 433define <8 x i8> @splat_ve2_we0_ins_i2we4(<8 x i8> %v, <8 x i8> %w) { 434; CHECK-LABEL: splat_ve2_we0_ins_i2we4: 435; CHECK: # %bb.0: 436; CHECK-NEXT: vsetivli zero, 3, e8, mf2, ta, ma 437; CHECK-NEXT: vmv.v.i v10, 4 438; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 439; CHECK-NEXT: vmv.v.i v11, 0 440; CHECK-NEXT: li a0, 70 441; CHECK-NEXT: vsetivli zero, 3, e8, mf2, tu, ma 442; CHECK-NEXT: vslideup.vi v11, v10, 2 443; CHECK-NEXT: vmv.s.x v0, a0 444; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu 445; CHECK-NEXT: vrgather.vi v10, v8, 2 446; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t 447; CHECK-NEXT: vmv1r.v v8, v10 448; CHECK-NEXT: ret 449 %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 8, i32 12, i32 2, i32 2, i32 2, i32 8, i32 2> 450 ret <8 x i8> %shuff 451} 452 453define <8 x i8> @splat_ve2_we0_ins_i2ve4_i5we6(<8 x i8> %v, <8 x i8> %w) { 454; CHECK-LABEL: splat_ve2_we0_ins_i2ve4_i5we6: 455; CHECK: # %bb.0: 456; CHECK-NEXT: lui a0, %hi(.LCPI26_0) 457; CHECK-NEXT: addi a0, a0, %lo(.LCPI26_0) 458; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 459; CHECK-NEXT: vle8.v v10, (a0) 460; CHECK-NEXT: li a0, 20 461; CHECK-NEXT: vmv.s.x v0, a0 462; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 463; CHECK-NEXT: vrgather.vv v8, v9, v10 464; CHECK-NEXT: ret 465 %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 8, i32 4, i32 2, i32 2, i32 14, i32 8, i32 2> 466 ret <8 x i8> %shuff 467} 468 469define <8 x i8> @widen_splat_ve3(<4 x i8> %v) { 470; CHECK-LABEL: widen_splat_ve3: 471; CHECK: # %bb.0: 472; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 473; CHECK-NEXT: vrgather.vi v9, v8, 3 474; CHECK-NEXT: vmv1r.v v8, v9 475; CHECK-NEXT: ret 476 %shuf = shufflevector <4 x i8> %v, <4 x i8> poison, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 477 ret <8 x i8> %shuf 478} 479 480define <4 x i16> @slidedown_v4i16(<4 x i16> %x) { 481; CHECK-LABEL: slidedown_v4i16: 482; CHECK: # %bb.0: 483; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 484; CHECK-NEXT: vslidedown.vi v8, v8, 1 485; CHECK-NEXT: ret 486 %s = shufflevector <4 x i16> %x, <4 x i16> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 undef> 487 ret <4 x i16> %s 488} 489 490define <8 x i32> @slidedown_v8i32(<8 x i32> %x) { 491; CHECK-LABEL: slidedown_v8i32: 492; CHECK: # %bb.0: 493; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 494; CHECK-NEXT: vslidedown.vi v8, v8, 3 495; CHECK-NEXT: ret 496 %s = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 3, i32 undef, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef> 497 ret <8 x i32> %s 498} 499 500define <4 x i16> @slideup_v4i16(<4 x i16> %x) { 501; CHECK-LABEL: slideup_v4i16: 502; CHECK: # %bb.0: 503; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 504; CHECK-NEXT: vslideup.vi v9, v8, 1 505; CHECK-NEXT: vmv1r.v v8, v9 506; CHECK-NEXT: ret 507 %s = shufflevector <4 x i16> %x, <4 x i16> poison, <4 x i32> <i32 undef, i32 0, i32 1, i32 2> 508 ret <4 x i16> %s 509} 510 511define <8 x i32> @slideup_v8i32(<8 x i32> %x) { 512; CHECK-LABEL: slideup_v8i32: 513; CHECK: # %bb.0: 514; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 515; CHECK-NEXT: vslideup.vi v10, v8, 3 516; CHECK-NEXT: vmv.v.v v8, v10 517; CHECK-NEXT: ret 518 %s = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 4> 519 ret <8 x i32> %s 520} 521 522define <8 x i16> @splice_unary(<8 x i16> %x) { 523; CHECK-LABEL: splice_unary: 524; CHECK: # %bb.0: 525; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 526; CHECK-NEXT: vslidedown.vi v9, v8, 2 527; CHECK-NEXT: vslideup.vi v9, v8, 6 528; CHECK-NEXT: vmv.v.v v8, v9 529; CHECK-NEXT: ret 530 %s = shufflevector <8 x i16> %x, <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1> 531 ret <8 x i16> %s 532} 533 534define <8 x i32> @splice_unary2(<8 x i32> %x) { 535; CHECK-LABEL: splice_unary2: 536; CHECK: # %bb.0: 537; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 538; CHECK-NEXT: vslidedown.vi v10, v8, 5 539; CHECK-NEXT: vslideup.vi v10, v8, 3 540; CHECK-NEXT: vmv.v.v v8, v10 541; CHECK-NEXT: ret 542 %s = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 undef, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4> 543 ret <8 x i32> %s 544} 545 546define <8 x i16> @splice_binary(<8 x i16> %x, <8 x i16> %y) { 547; CHECK-LABEL: splice_binary: 548; CHECK: # %bb.0: 549; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 550; CHECK-NEXT: vslidedown.vi v8, v8, 2 551; CHECK-NEXT: vslideup.vi v8, v9, 6 552; CHECK-NEXT: ret 553 %s = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 9> 554 ret <8 x i16> %s 555} 556 557define <8 x i32> @splice_binary2(<8 x i32> %x, <8 x i32> %y) { 558; CHECK-LABEL: splice_binary2: 559; CHECK: # %bb.0: 560; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 561; CHECK-NEXT: vslidedown.vi v8, v8, 5 562; CHECK-NEXT: vslideup.vi v8, v10, 3 563; CHECK-NEXT: ret 564 %s = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12> 565 ret <8 x i32> %s 566} 567 568define <4 x i16> @shuffle_shuffle_vslidedown(<16 x i16> %0) { 569; CHECK-LABEL: shuffle_shuffle_vslidedown: 570; CHECK: # %bb.0: # %entry 571; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 572; CHECK-NEXT: vslidedown.vi v8, v8, 5 573; CHECK-NEXT: ret 574entry: 575 %1 = shufflevector <16 x i16> %0, <16 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 576 %2 = shufflevector <16 x i16> %0, <16 x i16> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 577 %3 = shufflevector <8 x i16> %1, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 578 %4 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> 579 %5 = shufflevector <4 x i16> %3, <4 x i16> %4, <4 x i32> <i32 1, i32 2, i32 3, i32 4> 580 ret <4 x i16> %5 581} 582 583define <8 x i8> @concat_4xi8_start(<8 x i8> %v, <8 x i8> %w) { 584; CHECK-LABEL: concat_4xi8_start: 585; CHECK: # %bb.0: 586; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 587; CHECK-NEXT: vslideup.vi v8, v9, 4 588; CHECK-NEXT: ret 589 %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 590 ret <8 x i8> %res 591} 592 593define <8 x i8> @concat_4xi8_start_undef(<8 x i8> %v, <8 x i8> %w) { 594; CHECK-LABEL: concat_4xi8_start_undef: 595; CHECK: # %bb.0: 596; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 597; CHECK-NEXT: vslideup.vi v8, v9, 4 598; CHECK-NEXT: ret 599 %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 undef, i32 10, i32 11> 600 ret <8 x i8> %res 601} 602 603define <8 x i8> @concat_4xi8_start_undef_at_start(<8 x i8> %v, <8 x i8> %w) { 604; CHECK-LABEL: concat_4xi8_start_undef_at_start: 605; CHECK: # %bb.0: 606; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 607; CHECK-NEXT: vslideup.vi v8, v9, 2 608; CHECK-NEXT: ret 609 %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 9, i32 10, i32 11> 610 ret <8 x i8> %res 611} 612 613define <8 x i8> @merge_start_into_end_non_contiguous(<8 x i8> %v, <8 x i8> %w) { 614; CHECK-LABEL: merge_start_into_end_non_contiguous: 615; CHECK: # %bb.0: 616; CHECK-NEXT: li a0, 144 617; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu 618; CHECK-NEXT: vmv.s.x v0, a0 619; CHECK-NEXT: vslideup.vi v8, v9, 4, v0.t 620; CHECK-NEXT: ret 621 %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 11> 622 ret <8 x i8> %res 623} 624 625define <8 x i8> @merge_end_into_end(<8 x i8> %v, <8 x i8> %w) { 626; CHECK-LABEL: merge_end_into_end: 627; CHECK: # %bb.0: 628; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma 629; CHECK-NEXT: vmv.v.v v9, v8 630; CHECK-NEXT: vmv1r.v v8, v9 631; CHECK-NEXT: ret 632 %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15> 633 ret <8 x i8> %res 634} 635 636define <8 x i8> @merge_start_into_middle(<8 x i8> %v, <8 x i8> %w) { 637; CHECK-LABEL: merge_start_into_middle: 638; CHECK: # %bb.0: 639; CHECK-NEXT: vsetivli zero, 5, e8, mf2, tu, ma 640; CHECK-NEXT: vslideup.vi v8, v9, 1 641; CHECK-NEXT: ret 642 %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 11, i32 5, i32 6, i32 7> 643 ret <8 x i8> %res 644} 645 646define <8 x i8> @merge_start_into_start(<8 x i8> %v, <8 x i8> %w) { 647; CHECK-LABEL: merge_start_into_start: 648; CHECK: # %bb.0: 649; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma 650; CHECK-NEXT: vmv.v.v v8, v9 651; CHECK-NEXT: ret 652 %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7> 653 ret <8 x i8> %res 654} 655 656define <8 x i8> @merge_slidedown(<8 x i8> %v, <8 x i8> %w) { 657; CHECK-LABEL: merge_slidedown: 658; CHECK: # %bb.0: 659; CHECK-NEXT: li a0, 60 660; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu 661; CHECK-NEXT: vmv.s.x v0, a0 662; CHECK-NEXT: vslidedown.vi v9, v8, 1, v0.t 663; CHECK-NEXT: vmv1r.v v8, v9 664; CHECK-NEXT: ret 665 %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 8, i32 9, i32 3, i32 4, i32 5, i32 6, i32 14, i32 15> 666 ret <8 x i8> %res 667} 668 669; This should slide %v down by 2 and %w up by 1 before merging them 670define <8 x i8> @merge_non_contiguous_slideup_slidedown(<8 x i8> %v, <8 x i8> %w) { 671; CHECK-LABEL: merge_non_contiguous_slideup_slidedown: 672; CHECK: # %bb.0: 673; CHECK-NEXT: li a0, -22 674; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu 675; CHECK-NEXT: vmv.s.x v0, a0 676; CHECK-NEXT: vslidedown.vi v8, v8, 2 677; CHECK-NEXT: vslideup.vi v8, v9, 1, v0.t 678; CHECK-NEXT: ret 679 %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 8, i32 4, i32 10, i32 6, i32 12, i32 13, i32 14> 680 ret <8 x i8> %res 681} 682 683; This shouldn't generate a vmerge because the elements of %w are not consecutive 684define <8 x i8> @unmergable(<8 x i8> %v, <8 x i8> %w) { 685; CHECK-LABEL: unmergable: 686; CHECK: # %bb.0: 687; CHECK-NEXT: lui a0, %hi(.LCPI46_0) 688; CHECK-NEXT: addi a0, a0, %lo(.LCPI46_0) 689; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 690; CHECK-NEXT: vle8.v v10, (a0) 691; CHECK-NEXT: li a0, 84 692; CHECK-NEXT: vmv.s.x v0, a0 693; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 694; CHECK-NEXT: vrgather.vv v8, v9, v10 695; CHECK-NEXT: ret 696 %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 9, i32 4, i32 11, i32 6, i32 13, i32 8, i32 15> 697 ret <8 x i8> %res 698} 699 700; Make sure we use a vmv.v.i to load the mask constant. 701define <8 x i32> @shuffle_v8i32_2(<8 x i32> %x, <8 x i32> %y) { 702; CHECK-LABEL: shuffle_v8i32_2: 703; CHECK: # %bb.0: 704; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma 705; CHECK-NEXT: vmv.v.i v0, 13 706; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 707; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 708; CHECK-NEXT: ret 709 %s = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7> 710 ret <8 x i32> %s 711} 712 713; FIXME: This could be expressed as a vrgather.vv 714define <8 x i8> @shuffle_v64i8_v8i8(<64 x i8> %wide.vec) { 715; CHECK-LABEL: shuffle_v64i8_v8i8: 716; CHECK: # %bb.0: 717; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 718; CHECK-NEXT: vnsrl.wi v12, v8, 0 719; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 720; CHECK-NEXT: vnsrl.wi v8, v12, 0 721; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 722; CHECK-NEXT: vnsrl.wi v8, v8, 0 723; CHECK-NEXT: ret 724 %s = shufflevector <64 x i8> %wide.vec, <64 x i8> poison, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 40, i32 48, i32 56> 725 ret <8 x i8> %s 726} 727 728define <8 x i8> @shuffle_compress_singlesrc_e8(<8 x i8> %v) { 729; CHECK-LABEL: shuffle_compress_singlesrc_e8: 730; CHECK: # %bb.0: 731; CHECK-NEXT: li a0, 181 732; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 733; CHECK-NEXT: vmv.s.x v10, a0 734; CHECK-NEXT: vcompress.vm v9, v8, v10 735; CHECK-NEXT: vmv1r.v v8, v9 736; CHECK-NEXT: ret 737 %out = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 5, i32 7, i32 undef, i32 undef, i32 undef> 738 ret <8 x i8> %out 739} 740 741define <8 x i16> @shuffle_compress_singlesrc_e16(<8 x i16> %v) { 742; CHECK-LABEL: shuffle_compress_singlesrc_e16: 743; CHECK: # %bb.0: 744; CHECK-NEXT: li a0, 181 745; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 746; CHECK-NEXT: vmv.s.x v10, a0 747; CHECK-NEXT: vcompress.vm v9, v8, v10 748; CHECK-NEXT: vmv.v.v v8, v9 749; CHECK-NEXT: ret 750 %out = shufflevector <8 x i16> %v, <8 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 5, i32 7, i32 undef, i32 undef, i32 undef> 751 ret <8 x i16> %out 752} 753 754define <8 x i32> @shuffle_compress_singlesrc_e32(<8 x i32> %v) { 755; CHECK-LABEL: shuffle_compress_singlesrc_e32: 756; CHECK: # %bb.0: 757; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma 758; CHECK-NEXT: vmv.v.i v12, 13 759; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 760; CHECK-NEXT: vcompress.vm v10, v8, v12 761; CHECK-NEXT: vmv.v.v v8, v10 762; CHECK-NEXT: ret 763 %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef> 764 ret <8 x i32> %out 765} 766 767define <8 x i64> @shuffle_compress_singlesrc_e64(<8 x i64> %v) { 768; CHECK-LABEL: shuffle_compress_singlesrc_e64: 769; CHECK: # %bb.0: 770; CHECK-NEXT: li a0, 181 771; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma 772; CHECK-NEXT: vmv.s.x v16, a0 773; CHECK-NEXT: vcompress.vm v12, v8, v16 774; CHECK-NEXT: vmv.v.v v8, v12 775; CHECK-NEXT: ret 776 %out = shufflevector <8 x i64> %v, <8 x i64> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 5, i32 7, i32 undef, i32 undef, i32 undef> 777 ret <8 x i64> %out 778} 779 780define <8 x i32> @shuffle_compress_singlesrc_gaps_e32(<8 x i32> %v) { 781; CHECK-LABEL: shuffle_compress_singlesrc_gaps_e32: 782; CHECK: # %bb.0: 783; CHECK-NEXT: lui a0, %hi(.LCPI53_0) 784; CHECK-NEXT: addi a0, a0, %lo(.LCPI53_0) 785; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 786; CHECK-NEXT: vle16.v v12, (a0) 787; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 788; CHECK-NEXT: vmv.v.v v8, v10 789; CHECK-NEXT: ret 790 %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 undef, i32 4, i32 5, i32 7, i32 undef, i32 undef, i32 undef> 791 ret <8 x i32> %out 792} 793 794define <8 x i32> @shuffle_spread2_singlesrc_e32(<8 x i32> %v) { 795; CHECK-LABEL: shuffle_spread2_singlesrc_e32: 796; CHECK: # %bb.0: 797; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 798; CHECK-NEXT: vzext.vf2 v10, v8 799; CHECK-NEXT: vmv.v.v v8, v10 800; CHECK-NEXT: ret 801 %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef> 802 ret <8 x i32> %out 803} 804 805define <8 x i32> @shuffle_spread2_singlesrc_e32_index1(<8 x i32> %v) { 806; CHECK-LABEL: shuffle_spread2_singlesrc_e32_index1: 807; CHECK: # %bb.0: 808; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 809; CHECK-NEXT: vzext.vf2 v10, v8 810; CHECK-NEXT: li a0, 32 811; CHECK-NEXT: vsll.vx v8, v10, a0 812; CHECK-NEXT: ret 813 %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3> 814 ret <8 x i32> %out 815} 816 817define <8 x i32> @shuffle_spread2_singlesrc_e32_index2(<8 x i32> %v) { 818; CHECK-LABEL: shuffle_spread2_singlesrc_e32_index2: 819; CHECK: # %bb.0: 820; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 821; CHECK-NEXT: vid.v v10 822; CHECK-NEXT: vsrl.vi v10, v10, 1 823; CHECK-NEXT: vadd.vi v12, v10, -1 824; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 825; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 826; CHECK-NEXT: vmv.v.v v8, v10 827; CHECK-NEXT: ret 828 %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 undef, i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef> 829 ret <8 x i32> %out 830} 831 832define <8 x i32> @shuffle_spread3_singlesrc_e32(<8 x i32> %v) { 833; CHECK-LABEL: shuffle_spread3_singlesrc_e32: 834; CHECK: # %bb.0: 835; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 836; CHECK-NEXT: vmv.v.i v10, 0 837; CHECK-NEXT: li a0, 1 838; CHECK-NEXT: vslide1down.vx v12, v10, a0 839; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma 840; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 841; CHECK-NEXT: vmv.v.v v8, v10 842; CHECK-NEXT: ret 843 %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 2, i32 undef> 844 ret <8 x i32> %out 845} 846 847; TODO: This should be a single vslideup.vi 848define <8 x i32> @shuffle_spread4_singlesrc_e32(<8 x i32> %v) { 849; CHECK-LABEL: shuffle_spread4_singlesrc_e32: 850; CHECK: # %bb.0: 851; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 852; CHECK-NEXT: vid.v v10 853; CHECK-NEXT: vsrl.vi v12, v10, 2 854; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 855; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 856; CHECK-NEXT: vmv.v.v v8, v10 857; CHECK-NEXT: ret 858 %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef> 859 ret <8 x i32> %out 860} 861 862define <16 x i8> @shuffle_spread4_singlesrc_e8_idx0(<16 x i8> %v) { 863; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx0: 864; CHECK: # %bb.0: 865; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 866; CHECK-NEXT: vzext.vf4 v9, v8 867; CHECK-NEXT: vmv.v.v v8, v9 868; CHECK-NEXT: ret 869 %out = shufflevector <16 x i8> %v, <16 x i8> poison, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef> 870 ret <16 x i8> %out 871} 872 873define <16 x i8> @shuffle_spread4_singlesrc_e8_idx1(<16 x i8> %v) { 874; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx1: 875; CHECK: # %bb.0: 876; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 877; CHECK-NEXT: vzext.vf4 v9, v8 878; CHECK-NEXT: vsll.vi v8, v9, 8 879; CHECK-NEXT: ret 880 %out = shufflevector <16 x i8> %v, <16 x i8> poison, <16 x i32> <i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef> 881 ret <16 x i8> %out 882} 883 884define <16 x i8> @shuffle_spread4_singlesrc_e8_idx2(<16 x i8> %v) { 885; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx2: 886; CHECK: # %bb.0: 887; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 888; CHECK-NEXT: vzext.vf4 v9, v8 889; CHECK-NEXT: vsll.vi v8, v9, 16 890; CHECK-NEXT: ret 891 %out = shufflevector <16 x i8> %v, <16 x i8> poison, <16 x i32> <i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef> 892 ret <16 x i8> %out 893} 894 895define <16 x i8> @shuffle_spread4_singlesrc_e8_idx3(<16 x i8> %v) { 896; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx3: 897; CHECK: # %bb.0: 898; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 899; CHECK-NEXT: vzext.vf4 v9, v8 900; CHECK-NEXT: vsll.vi v8, v9, 24 901; CHECK-NEXT: ret 902 %out = shufflevector <16 x i8> %v, <16 x i8> poison, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3> 903 ret <16 x i8> %out 904} 905 906define <16 x i8> @shuffle_spread4_singlesrc_e8_idx4(<16 x i8> %v) { 907; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx4: 908; CHECK: # %bb.0: 909; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 910; CHECK-NEXT: vid.v v9 911; CHECK-NEXT: vsrl.vi v9, v9, 2 912; CHECK-NEXT: vadd.vi v10, v9, -1 913; CHECK-NEXT: vrgather.vv v9, v8, v10 914; CHECK-NEXT: vmv.v.v v8, v9 915; CHECK-NEXT: ret 916 %out = shufflevector <16 x i8> %v, <16 x i8> poison, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef> 917 ret <16 x i8> %out 918} 919 920 921define <32 x i8> @shuffle_spread8_singlesrc_e8(<32 x i8> %v) { 922; CHECK-LABEL: shuffle_spread8_singlesrc_e8: 923; CHECK: # %bb.0: 924; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 925; CHECK-NEXT: vzext.vf8 v10, v8 926; CHECK-NEXT: vmv.v.v v8, v10 927; CHECK-NEXT: ret 928 %out = shufflevector <32 x i8> %v, <32 x i8> poison, <32 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 929 ret <32 x i8> %out 930} 931 932define <8 x i32> @shuffle_decompress_singlesrc_e32(<8 x i32> %v) { 933; CHECK-LABEL: shuffle_decompress_singlesrc_e32: 934; CHECK: # %bb.0: 935; CHECK-NEXT: lui a0, %hi(.LCPI65_0) 936; CHECK-NEXT: addi a0, a0, %lo(.LCPI65_0) 937; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 938; CHECK-NEXT: vle16.v v12, (a0) 939; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 940; CHECK-NEXT: vmv.v.v v8, v10 941; CHECK-NEXT: ret 942 %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 3, i32 undef, i32 undef, i32 4> 943 ret <8 x i32> %out 944} 945 946; TODO: This should be a single vslideup.vi 947define <8 x i8> @shuffle_decompress_singlesrc_e8(<8 x i8> %v) { 948; CHECK-LABEL: shuffle_decompress_singlesrc_e8: 949; CHECK: # %bb.0: 950; CHECK-NEXT: lui a0, %hi(.LCPI66_0) 951; CHECK-NEXT: addi a0, a0, %lo(.LCPI66_0) 952; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 953; CHECK-NEXT: vle8.v v10, (a0) 954; CHECK-NEXT: vrgather.vv v9, v8, v10 955; CHECK-NEXT: vmv1r.v v8, v9 956; CHECK-NEXT: ret 957 %out = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 4> 958 ret <8 x i8> %out 959} 960 961 962define <8 x i32> @shuffle_repeat2_singlesrc_e32(<8 x i32> %v) { 963; CHECK-LABEL: shuffle_repeat2_singlesrc_e32: 964; CHECK: # %bb.0: 965; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 966; CHECK-NEXT: vwaddu.vv v10, v8, v8 967; CHECK-NEXT: li a0, -1 968; CHECK-NEXT: vwmaccu.vx v10, a0, v8 969; CHECK-NEXT: vmv2r.v v8, v10 970; CHECK-NEXT: ret 971 %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> 972 ret <8 x i32> %out 973} 974 975define <8 x i32> @shuffle_repeat3_singlesrc_e32(<8 x i32> %v) { 976; CHECK-LABEL: shuffle_repeat3_singlesrc_e32: 977; CHECK: # %bb.0: 978; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 979; CHECK-NEXT: vmv.v.i v0, 7 980; CHECK-NEXT: vmv.v.i v11, 1 981; CHECK-NEXT: li a0, 192 982; CHECK-NEXT: vmv.s.x v10, a0 983; CHECK-NEXT: vmerge.vim v11, v11, 0, v0 984; CHECK-NEXT: vmv.v.v v0, v10 985; CHECK-NEXT: vmerge.vim v12, v11, 2, v0 986; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 987; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 988; CHECK-NEXT: vmv.v.v v8, v10 989; CHECK-NEXT: ret 990 %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2> 991 ret <8 x i32> %out 992} 993 994define <8 x i32> @shuffle_repeat4_singlesrc_e32(<8 x i32> %v) { 995; CHECK-LABEL: shuffle_repeat4_singlesrc_e32: 996; CHECK: # %bb.0: 997; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 998; CHECK-NEXT: vid.v v10 999; CHECK-NEXT: vsrl.vi v12, v10, 2 1000; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 1001; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 1002; CHECK-NEXT: vmv.v.v v8, v10 1003; CHECK-NEXT: ret 1004 %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> 1005 ret <8 x i32> %out 1006} 1007 1008define <8 x i32> @shuffle_zipeven_v8i32(<8 x i32> %v1, <8 x i32> %v2) { 1009; CHECK-LABEL: shuffle_zipeven_v8i32: 1010; CHECK: # %bb.0: 1011; CHECK-NEXT: li a0, 170 1012; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu 1013; CHECK-NEXT: vmv.s.x v0, a0 1014; CHECK-NEXT: vslideup.vi v8, v10, 1, v0.t 1015; CHECK-NEXT: ret 1016 %out = shufflevector <8 x i32> %v1, <8 x i32> %v2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 1017 ret <8 x i32> %out 1018} 1019 1020define <8 x i32> @shuffle_zipodd_v8i32(<8 x i32> %v1, <8 x i32> %v2) { 1021; CHECK-LABEL: shuffle_zipodd_v8i32: 1022; CHECK: # %bb.0: 1023; CHECK-NEXT: li a0, 85 1024; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu 1025; CHECK-NEXT: vmv.s.x v0, a0 1026; CHECK-NEXT: vslidedown.vi v10, v8, 1, v0.t 1027; CHECK-NEXT: vmv.v.v v8, v10 1028; CHECK-NEXT: ret 1029 %out = shufflevector <8 x i32> %v1, <8 x i32> %v2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 1030 ret <8 x i32> %out 1031} 1032 1033define <16 x i64> @shuffle_zipeven_v16i64(<16 x i64> %v1, <16 x i64> %v2) { 1034; CHECK-LABEL: shuffle_zipeven_v16i64: 1035; CHECK: # %bb.0: 1036; CHECK-NEXT: lui a0, 11 1037; CHECK-NEXT: addi a0, a0, -1366 1038; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu 1039; CHECK-NEXT: vmv.s.x v0, a0 1040; CHECK-NEXT: vslideup.vi v8, v16, 1, v0.t 1041; CHECK-NEXT: ret 1042 %out = shufflevector <16 x i64> %v1, <16 x i64> %v2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30> 1043 ret <16 x i64> %out 1044} 1045 1046define <16 x i64> @shuffle_zipodd_v16i64(<16 x i64> %v1, <16 x i64> %v2) { 1047; CHECK-LABEL: shuffle_zipodd_v16i64: 1048; CHECK: # %bb.0: 1049; CHECK-NEXT: lui a0, 5 1050; CHECK-NEXT: addi a0, a0, 1365 1051; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu 1052; CHECK-NEXT: vmv.s.x v0, a0 1053; CHECK-NEXT: vslidedown.vi v16, v8, 1, v0.t 1054; CHECK-NEXT: vmv.v.v v8, v16 1055; CHECK-NEXT: ret 1056 %out = shufflevector <16 x i64> %v1, <16 x i64> %v2, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31> 1057 ret <16 x i64> %out 1058} 1059 1060define <16 x i32> @shuffle_disjoint_lanes(<16 x i32> %v, <16 x i32> %w) { 1061; CHECK-LABEL: shuffle_disjoint_lanes: 1062; CHECK: # %bb.0: 1063; CHECK-NEXT: lui a0, %hi(.LCPI74_0) 1064; CHECK-NEXT: addi a0, a0, %lo(.LCPI74_0) 1065; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1066; CHECK-NEXT: vle8.v v16, (a0) 1067; CHECK-NEXT: lui a0, 11 1068; CHECK-NEXT: addi a0, a0, -1366 1069; CHECK-NEXT: vmv.s.x v0, a0 1070; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 1071; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1072; CHECK-NEXT: vsext.vf2 v18, v16 1073; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1074; CHECK-NEXT: vrgatherei16.vv v8, v12, v18 1075; CHECK-NEXT: ret 1076 %out = shufflevector <16 x i32> %v, <16 x i32> %w, <16 x i32> <i32 11, i32 15, i32 7, i32 3, i32 26, i32 30, i32 22, i32 18, i32 9, i32 13, i32 5, i32 1, i32 24, i32 28, i32 20, i32 16> 1077 ret <16 x i32> %out 1078} 1079 1080define <16 x i32> @shuffle_disjoint_lanes_one_identity(<16 x i32> %v, <16 x i32> %w) { 1081; CHECK-LABEL: shuffle_disjoint_lanes_one_identity: 1082; CHECK: # %bb.0: 1083; CHECK-NEXT: lui a0, %hi(.LCPI75_0) 1084; CHECK-NEXT: addi a0, a0, %lo(.LCPI75_0) 1085; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu 1086; CHECK-NEXT: vle16.v v16, (a0) 1087; CHECK-NEXT: li a0, -272 1088; CHECK-NEXT: vmv.s.x v0, a0 1089; CHECK-NEXT: vrgatherei16.vv v8, v12, v16, v0.t 1090; CHECK-NEXT: ret 1091 %out = shufflevector <16 x i32> %v, <16 x i32> %w, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 26, i32 30, i32 22, i32 20, i32 8, i32 31, i32 29, i32 28, i32 27, i32 23, i32 25, i32 22> 1092 ret <16 x i32> %out 1093} 1094 1095define <16 x i32> @shuffle_disjoint_lanes_one_broadcast(<16 x i32> %v, <16 x i32> %w) { 1096; CHECK-LABEL: shuffle_disjoint_lanes_one_broadcast: 1097; CHECK: # %bb.0: 1098; CHECK-NEXT: lui a0, %hi(.LCPI76_0) 1099; CHECK-NEXT: addi a0, a0, %lo(.LCPI76_0) 1100; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu 1101; CHECK-NEXT: vle16.v v20, (a0) 1102; CHECK-NEXT: lui a0, 15 1103; CHECK-NEXT: addi a0, a0, 240 1104; CHECK-NEXT: vmv.s.x v0, a0 1105; CHECK-NEXT: vrgather.vi v16, v8, 7 1106; CHECK-NEXT: vrgatherei16.vv v16, v12, v20, v0.t 1107; CHECK-NEXT: vmv.v.v v8, v16 1108; CHECK-NEXT: ret 1109 %out = shufflevector <16 x i32> %v, <16 x i32> %w, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 26, i32 30, i32 22, i32 18, i32 7, i32 7, i32 7, i32 7, i32 24, i32 28, i32 20, i32 16> 1110 ret <16 x i32> %out 1111} 1112 1113define <16 x i32> @shuffle_disjoint_lanes_one_splat(i32 %v, <16 x i32> %w) { 1114; CHECK-LABEL: shuffle_disjoint_lanes_one_splat: 1115; CHECK: # %bb.0: 1116; CHECK-NEXT: lui a1, %hi(.LCPI77_0) 1117; CHECK-NEXT: addi a1, a1, %lo(.LCPI77_0) 1118; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu 1119; CHECK-NEXT: vle16.v v16, (a1) 1120; CHECK-NEXT: lui a1, 15 1121; CHECK-NEXT: addi a1, a1, 240 1122; CHECK-NEXT: vmv.s.x v0, a1 1123; CHECK-NEXT: vmv.v.x v12, a0 1124; CHECK-NEXT: vrgatherei16.vv v12, v8, v16, v0.t 1125; CHECK-NEXT: vmv.v.v v8, v12 1126; CHECK-NEXT: ret 1127 %head = insertelement <16 x i32> poison, i32 %v, i32 0 1128 %splat = shufflevector <16 x i32> %head, <16 x i32> poison, <16 x i32> zeroinitializer 1129 %out = shufflevector <16 x i32> %splat, <16 x i32> %w, <16 x i32> <i32 11, i32 15, i32 7, i32 3, i32 26, i32 30, i32 22, i32 18, i32 9, i32 13, i32 5, i32 1, i32 24, i32 28, i32 20, i32 16> 1130 ret <16 x i32> %out 1131} 1132 1133define <4 x i128> @shuffle_i128(<4 x i128> %a) { 1134; RV32-LABEL: shuffle_i128: 1135; RV32: # %bb.0: 1136; RV32-NEXT: addi sp, sp, -128 1137; RV32-NEXT: .cfi_def_cfa_offset 128 1138; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill 1139; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill 1140; RV32-NEXT: .cfi_offset ra, -4 1141; RV32-NEXT: .cfi_offset s0, -8 1142; RV32-NEXT: addi s0, sp, 128 1143; RV32-NEXT: .cfi_def_cfa s0, 0 1144; RV32-NEXT: andi sp, sp, -64 1145; RV32-NEXT: lw a2, 60(a1) 1146; RV32-NEXT: sw a2, 60(sp) 1147; RV32-NEXT: lw a2, 56(a1) 1148; RV32-NEXT: sw a2, 56(sp) 1149; RV32-NEXT: lw a2, 52(a1) 1150; RV32-NEXT: sw a2, 52(sp) 1151; RV32-NEXT: lw a2, 48(a1) 1152; RV32-NEXT: sw a2, 48(sp) 1153; RV32-NEXT: lw a2, 44(a1) 1154; RV32-NEXT: sw a2, 44(sp) 1155; RV32-NEXT: lw a2, 40(a1) 1156; RV32-NEXT: sw a2, 40(sp) 1157; RV32-NEXT: lw a2, 36(a1) 1158; RV32-NEXT: sw a2, 36(sp) 1159; RV32-NEXT: lw a2, 32(a1) 1160; RV32-NEXT: sw a2, 32(sp) 1161; RV32-NEXT: lw a2, 12(a1) 1162; RV32-NEXT: sw a2, 12(sp) 1163; RV32-NEXT: lw a2, 8(a1) 1164; RV32-NEXT: sw a2, 8(sp) 1165; RV32-NEXT: lw a2, 4(a1) 1166; RV32-NEXT: sw a2, 4(sp) 1167; RV32-NEXT: lw a1, 0(a1) 1168; RV32-NEXT: mv a2, sp 1169; RV32-NEXT: sw a1, 0(sp) 1170; RV32-NEXT: lui a1, %hi(.LCPI78_0) 1171; RV32-NEXT: addi a1, a1, %lo(.LCPI78_0) 1172; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1173; RV32-NEXT: vle32.v v8, (a2) 1174; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1175; RV32-NEXT: vle16.v v12, (a1) 1176; RV32-NEXT: vrgatherei16.vv v16, v8, v12 1177; RV32-NEXT: vse64.v v16, (a0) 1178; RV32-NEXT: addi sp, s0, -128 1179; RV32-NEXT: .cfi_def_cfa sp, 128 1180; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload 1181; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload 1182; RV32-NEXT: .cfi_restore ra 1183; RV32-NEXT: .cfi_restore s0 1184; RV32-NEXT: addi sp, sp, 128 1185; RV32-NEXT: .cfi_def_cfa_offset 0 1186; RV32-NEXT: ret 1187; 1188; RV64-LABEL: shuffle_i128: 1189; RV64: # %bb.0: 1190; RV64-NEXT: addi sp, sp, -128 1191; RV64-NEXT: .cfi_def_cfa_offset 128 1192; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill 1193; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill 1194; RV64-NEXT: .cfi_offset ra, -8 1195; RV64-NEXT: .cfi_offset s0, -16 1196; RV64-NEXT: addi s0, sp, 128 1197; RV64-NEXT: .cfi_def_cfa s0, 0 1198; RV64-NEXT: andi sp, sp, -64 1199; RV64-NEXT: ld a2, 56(a1) 1200; RV64-NEXT: sd a2, 56(sp) 1201; RV64-NEXT: ld a2, 48(a1) 1202; RV64-NEXT: sd a2, 48(sp) 1203; RV64-NEXT: ld a2, 40(a1) 1204; RV64-NEXT: sd a2, 40(sp) 1205; RV64-NEXT: ld a2, 32(a1) 1206; RV64-NEXT: sd a2, 32(sp) 1207; RV64-NEXT: ld a2, 8(a1) 1208; RV64-NEXT: sd a2, 8(sp) 1209; RV64-NEXT: ld a1, 0(a1) 1210; RV64-NEXT: mv a2, sp 1211; RV64-NEXT: sd a1, 0(sp) 1212; RV64-NEXT: lui a1, %hi(.LCPI78_0) 1213; RV64-NEXT: addi a1, a1, %lo(.LCPI78_0) 1214; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1215; RV64-NEXT: vle64.v v8, (a2) 1216; RV64-NEXT: vle16.v v12, (a1) 1217; RV64-NEXT: vrgatherei16.vv v16, v8, v12 1218; RV64-NEXT: vse64.v v16, (a0) 1219; RV64-NEXT: addi sp, s0, -128 1220; RV64-NEXT: .cfi_def_cfa sp, 128 1221; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload 1222; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload 1223; RV64-NEXT: .cfi_restore ra 1224; RV64-NEXT: .cfi_restore s0 1225; RV64-NEXT: addi sp, sp, 128 1226; RV64-NEXT: .cfi_def_cfa_offset 0 1227; RV64-NEXT: ret 1228 %res = shufflevector <4 x i128> %a, <4 x i128> poison, <4 x i32> <i32 0, i32 0, i32 3, i32 2> 1229 ret <4 x i128> %res 1230} 1231 1232define void @shuffle_i128_ldst(ptr %p) { 1233; CHECK-LABEL: shuffle_i128_ldst: 1234; CHECK: # %bb.0: 1235; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1236; CHECK-NEXT: vle64.v v8, (a0) 1237; CHECK-NEXT: lui a1, %hi(.LCPI79_0) 1238; CHECK-NEXT: addi a1, a1, %lo(.LCPI79_0) 1239; CHECK-NEXT: vle16.v v12, (a1) 1240; CHECK-NEXT: vrgatherei16.vv v16, v8, v12 1241; CHECK-NEXT: vse64.v v16, (a0) 1242; CHECK-NEXT: ret 1243 %a = load <4 x i128>, ptr %p 1244 %res = shufflevector <4 x i128> %a, <4 x i128> poison, <4 x i32> <i32 0, i32 0, i32 3, i32 2> 1245 store <4 x i128> %res, ptr %p 1246 ret void 1247} 1248 1249define void @shuffle_i256_ldst(ptr %p) { 1250; CHECK-LABEL: shuffle_i256_ldst: 1251; CHECK: # %bb.0: 1252; CHECK-NEXT: lui a1, %hi(.LCPI80_0) 1253; CHECK-NEXT: addi a1, a1, %lo(.LCPI80_0) 1254; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 1255; CHECK-NEXT: vle8.v v8, (a1) 1256; CHECK-NEXT: vle64.v v16, (a0) 1257; CHECK-NEXT: vsext.vf2 v10, v8 1258; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1259; CHECK-NEXT: vrgatherei16.vv v24, v16, v10 1260; CHECK-NEXT: vse64.v v24, (a0) 1261; CHECK-NEXT: ret 1262 %a = load <4 x i256>, ptr %p 1263 %res = shufflevector <4 x i256> %a, <4 x i256> poison, <4 x i32> <i32 0, i32 0, i32 3, i32 2> 1264 store <4 x i256> %res, ptr %p 1265 ret void 1266} 1267 1268define void @shuffle_i64_splat(ptr %p) nounwind { 1269; RV32-LABEL: shuffle_i64_splat: 1270; RV32: # %bb.0: 1271; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma 1272; RV32-NEXT: vlse64.v v8, (a0), zero 1273; RV32-NEXT: vse64.v v8, (a0) 1274; RV32-NEXT: ret 1275; 1276; RV64-LABEL: shuffle_i64_splat: 1277; RV64: # %bb.0: 1278; RV64-NEXT: ld a1, 0(a0) 1279; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma 1280; RV64-NEXT: vmv.v.x v8, a1 1281; RV64-NEXT: vse64.v v8, (a0) 1282; RV64-NEXT: ret 1283 %a = load <4 x i64>, ptr %p 1284 %res = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1285 store <4 x i64> %res, ptr %p 1286 ret void 1287} 1288 1289define void @shuffle_i128_splat(ptr %p) nounwind { 1290; CHECK-LABEL: shuffle_i128_splat: 1291; CHECK: # %bb.0: 1292; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1293; CHECK-NEXT: vle64.v v8, (a0) 1294; CHECK-NEXT: lui a1, 16 1295; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1296; CHECK-NEXT: vmv.v.x v12, a1 1297; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1298; CHECK-NEXT: vrgatherei16.vv v16, v8, v12 1299; CHECK-NEXT: vse64.v v16, (a0) 1300; CHECK-NEXT: ret 1301 %a = load <4 x i128>, ptr %p 1302 %res = shufflevector <4 x i128> %a, <4 x i128> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1303 store <4 x i128> %res, ptr %p 1304 ret void 1305} 1306 1307define void @shuffle_i256_splat(ptr %p) nounwind { 1308; RV32-LABEL: shuffle_i256_splat: 1309; RV32: # %bb.0: 1310; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1311; RV32-NEXT: vle64.v v8, (a0) 1312; RV32-NEXT: lui a1, 12320 1313; RV32-NEXT: addi a1, a1, 256 1314; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1315; RV32-NEXT: vmv.v.x v16, a1 1316; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma 1317; RV32-NEXT: vsext.vf2 v18, v16 1318; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1319; RV32-NEXT: vrgatherei16.vv v24, v8, v18 1320; RV32-NEXT: vse64.v v24, (a0) 1321; RV32-NEXT: ret 1322; 1323; RV64-LABEL: shuffle_i256_splat: 1324; RV64: # %bb.0: 1325; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1326; RV64-NEXT: vle64.v v8, (a0) 1327; RV64-NEXT: lui a1, 98305 1328; RV64-NEXT: slli a1, a1, 5 1329; RV64-NEXT: addi a1, a1, 1 1330; RV64-NEXT: slli a1, a1, 16 1331; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma 1332; RV64-NEXT: vmv.v.x v16, a1 1333; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1334; RV64-NEXT: vrgatherei16.vv v24, v8, v16 1335; RV64-NEXT: vse64.v v24, (a0) 1336; RV64-NEXT: ret 1337 %a = load <4 x i256>, ptr %p 1338 %res = shufflevector <4 x i256> %a, <4 x i256> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1339 store <4 x i256> %res, ptr %p 1340 ret void 1341} 1342 1343