1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 4 5define <4 x i64> @m2_splat_0(<4 x i64> %v1) vscale_range(2,2) { 6; CHECK-LABEL: m2_splat_0: 7; CHECK: # %bb.0: 8; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 9; CHECK-NEXT: vrgather.vi v10, v8, 0 10; CHECK-NEXT: vmv.v.v v8, v10 11; CHECK-NEXT: ret 12 %res = shufflevector <4 x i64> %v1, <4 x i64> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 13 ret <4 x i64> %res 14} 15 16define <4 x i64> @m2_splat_in_chunks(<4 x i64> %v1) vscale_range(2,2) { 17; CHECK-LABEL: m2_splat_in_chunks: 18; CHECK: # %bb.0: 19; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 20; CHECK-NEXT: vrgather.vi v10, v8, 0 21; CHECK-NEXT: vrgather.vi v11, v9, 0 22; CHECK-NEXT: vmv2r.v v8, v10 23; CHECK-NEXT: ret 24 %res = shufflevector <4 x i64> %v1, <4 x i64> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 25 ret <4 x i64> %res 26} 27 28define <8 x i64> @m4_splat_in_chunks(<8 x i64> %v1) vscale_range(2,2) { 29; CHECK-LABEL: m4_splat_in_chunks: 30; CHECK: # %bb.0: 31; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 32; CHECK-NEXT: vrgather.vi v12, v8, 0 33; CHECK-NEXT: vrgather.vi v13, v9, 0 34; CHECK-NEXT: vrgather.vi v14, v10, 0 35; CHECK-NEXT: vrgather.vi v15, v11, 1 36; CHECK-NEXT: vmv4r.v v8, v12 37; CHECK-NEXT: ret 38 %res = shufflevector <8 x i64> %v1, <8 x i64> poison, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 7, i32 7> 39 ret <8 x i64> %res 40} 41 42 43define <4 x i64> @m2_splat_with_tail(<4 x i64> %v1) vscale_range(2,2) { 44; CHECK-LABEL: m2_splat_with_tail: 45; CHECK: # %bb.0: 46; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 47; CHECK-NEXT: vrgather.vi v10, v8, 0 48; CHECK-NEXT: vmv1r.v v11, v9 49; CHECK-NEXT: vmv2r.v v8, v10 50; CHECK-NEXT: ret 51 %res = shufflevector <4 x i64> %v1, <4 x i64> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 3> 52 ret <4 x i64> %res 53} 54 55define <4 x i64> @m2_pair_swap_vl4(<4 x i64> %v1) vscale_range(2,2) { 56; CHECK-LABEL: m2_pair_swap_vl4: 57; CHECK: # %bb.0: 58; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 59; CHECK-NEXT: vslidedown.vi v11, v9, 1 60; CHECK-NEXT: vslideup.vi v11, v9, 1 61; CHECK-NEXT: vslidedown.vi v10, v8, 1 62; CHECK-NEXT: vslideup.vi v10, v8, 1 63; CHECK-NEXT: vmv2r.v v8, v10 64; CHECK-NEXT: ret 65 %res = shufflevector <4 x i64> %v1, <4 x i64> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 66 ret <4 x i64> %res 67} 68 69define <8 x i32> @m2_pair_swap_vl8(<8 x i32> %v1) vscale_range(2,2) { 70; RV32-LABEL: m2_pair_swap_vl8: 71; RV32: # %bb.0: 72; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 73; RV32-NEXT: vmv.v.i v10, 0 74; RV32-NEXT: li a0, 32 75; RV32-NEXT: li a1, 63 76; RV32-NEXT: vwsubu.vx v12, v10, a0 77; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma 78; RV32-NEXT: vmv.v.x v10, a0 79; RV32-NEXT: vand.vx v12, v12, a1 80; RV32-NEXT: vand.vx v10, v10, a1 81; RV32-NEXT: vsrl.vv v12, v8, v12 82; RV32-NEXT: vsll.vv v8, v8, v10 83; RV32-NEXT: vor.vv v8, v8, v12 84; RV32-NEXT: ret 85; 86; RV64-LABEL: m2_pair_swap_vl8: 87; RV64: # %bb.0: 88; RV64-NEXT: li a0, 32 89; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma 90; RV64-NEXT: vsrl.vx v10, v8, a0 91; RV64-NEXT: vsll.vx v8, v8, a0 92; RV64-NEXT: vor.vv v8, v8, v10 93; RV64-NEXT: ret 94 %res = shufflevector <8 x i32> %v1, <8 x i32> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 95 ret <8 x i32> %res 96} 97 98define <4 x i64> @m2_splat_into_identity(<4 x i64> %v1) vscale_range(2,2) { 99; CHECK-LABEL: m2_splat_into_identity: 100; CHECK: # %bb.0: 101; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 102; CHECK-NEXT: vrgather.vi v10, v8, 0 103; CHECK-NEXT: vmv1r.v v11, v9 104; CHECK-NEXT: vmv2r.v v8, v10 105; CHECK-NEXT: ret 106 %res = shufflevector <4 x i64> %v1, <4 x i64> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 3> 107 ret <4 x i64> %res 108} 109 110define <4 x i64> @m2_broadcast_i128(<4 x i64> %v1) vscale_range(2,2) { 111; CHECK-LABEL: m2_broadcast_i128: 112; CHECK: # %bb.0: 113; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma 114; CHECK-NEXT: vmv1r.v v9, v8 115; CHECK-NEXT: ret 116 %res = shufflevector <4 x i64> %v1, <4 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 117 ret <4 x i64> %res 118} 119 120define <8 x i64> @m4_broadcast_i128(<8 x i64> %v1) vscale_range(2,2) { 121; CHECK-LABEL: m4_broadcast_i128: 122; CHECK: # %bb.0: 123; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma 124; CHECK-NEXT: vmv1r.v v9, v8 125; CHECK-NEXT: vmv1r.v v10, v8 126; CHECK-NEXT: vmv1r.v v11, v8 127; CHECK-NEXT: ret 128 %res = shufflevector <8 x i64> %v1, <8 x i64> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 129 ret <8 x i64> %res 130} 131 132 133define <4 x i64> @m2_splat_two_source(<4 x i64> %v1, <4 x i64> %v2) vscale_range(2,2) { 134; CHECK-LABEL: m2_splat_two_source: 135; CHECK: # %bb.0: 136; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 137; CHECK-NEXT: vrgather.vi v12, v8, 0 138; CHECK-NEXT: vrgather.vi v13, v11, 1 139; CHECK-NEXT: vmv2r.v v8, v12 140; CHECK-NEXT: ret 141 %res = shufflevector <4 x i64> %v1, <4 x i64> %v2, <4 x i32> <i32 0, i32 0, i32 7, i32 7> 142 ret <4 x i64> %res 143} 144 145define <4 x i64> @m2_splat_into_identity_two_source_v2_hi(<4 x i64> %v1, <4 x i64> %v2) vscale_range(2,2) { 146; CHECK-LABEL: m2_splat_into_identity_two_source_v2_hi: 147; CHECK: # %bb.0: 148; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 149; CHECK-NEXT: vrgather.vi v10, v8, 0 150; CHECK-NEXT: vmv2r.v v8, v10 151; CHECK-NEXT: ret 152 %res = shufflevector <4 x i64> %v1, <4 x i64> %v2, <4 x i32> <i32 0, i32 0, i32 6, i32 7> 153 ret <4 x i64> %res 154} 155 156define <4 x i64> @m2_splat_into_slide_two_source_v2_lo(<4 x i64> %v1, <4 x i64> %v2) vscale_range(2,2) { 157; CHECK-LABEL: m2_splat_into_slide_two_source_v2_lo: 158; CHECK: # %bb.0: 159; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 160; CHECK-NEXT: vrgather.vi v12, v8, 0 161; CHECK-NEXT: vmv1r.v v13, v10 162; CHECK-NEXT: vmv2r.v v8, v12 163; CHECK-NEXT: ret 164 %res = shufflevector <4 x i64> %v1, <4 x i64> %v2, <4 x i32> <i32 0, i32 0, i32 4, i32 5> 165 ret <4 x i64> %res 166} 167 168define <4 x i64> @m2_splat_into_slide_two_source(<4 x i64> %v1, <4 x i64> %v2) vscale_range(2,2) { 169; CHECK-LABEL: m2_splat_into_slide_two_source: 170; CHECK: # %bb.0: 171; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 172; CHECK-NEXT: vslidedown.vi v13, v10, 1 173; CHECK-NEXT: vslideup.vi v13, v11, 1 174; CHECK-NEXT: vrgather.vi v12, v8, 0 175; CHECK-NEXT: vmv2r.v v8, v12 176; CHECK-NEXT: ret 177 %res = shufflevector <4 x i64> %v1, <4 x i64> %v2, <4 x i32> <i32 0, i32 0, i32 5, i32 6> 178 ret <4 x i64> %res 179} 180 181define void @shuffle1(ptr %explicit_0, ptr %explicit_1) vscale_range(2,2) { 182; CHECK-LABEL: shuffle1: 183; CHECK: # %bb.0: 184; CHECK-NEXT: addi a0, a0, 252 185; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 186; CHECK-NEXT: vmv.v.i v8, 0 187; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 188; CHECK-NEXT: vid.v v10 189; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma 190; CHECK-NEXT: vle32.v v11, (a0) 191; CHECK-NEXT: vmv.v.i v0, 5 192; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu 193; CHECK-NEXT: vsrl.vi v10, v10, 1 194; CHECK-NEXT: vadd.vi v10, v10, 1 195; CHECK-NEXT: vrgather.vv v9, v11, v10, v0.t 196; CHECK-NEXT: addi a0, a1, 672 197; CHECK-NEXT: vs2r.v v8, (a0) 198; CHECK-NEXT: ret 199 %1 = getelementptr i32, ptr %explicit_0, i64 63 200 %2 = load <3 x i32>, ptr %1, align 1 201 %3 = shufflevector <3 x i32> %2, <3 x i32> undef, <2 x i32> <i32 1, i32 2> 202 %4 = shufflevector <2 x i32> %3, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 203 %5 = shufflevector <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 undef, i32 0>, <8 x i32> %4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 5, i32 9, i32 7> 204 %6 = getelementptr inbounds <8 x i32>, ptr %explicit_1, i64 21 205 store <8 x i32> %5, ptr %6, align 32 206 ret void 207} 208 209define <16 x float> @shuffle2(<4 x float> %a) vscale_range(2,2) { 210; CHECK-LABEL: shuffle2: 211; CHECK: # %bb.0: 212; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 213; CHECK-NEXT: vmv1r.v v12, v8 214; CHECK-NEXT: vmv.v.i v8, 0 215; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu 216; CHECK-NEXT: vid.v v13 217; CHECK-NEXT: vadd.vv v13, v13, v13 218; CHECK-NEXT: vmv.v.i v0, 6 219; CHECK-NEXT: vrsub.vi v13, v13, 4 220; CHECK-NEXT: vrgather.vv v9, v12, v13, v0.t 221; CHECK-NEXT: ret 222 %b = extractelement <4 x float> %a, i32 2 223 %c = insertelement <16 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %b, i32 5 224 %b1 = extractelement <4 x float> %a, i32 0 225 %c1 = insertelement <16 x float> %c, float %b1, i32 6 226 ret <16 x float>%c1 227} 228 229define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) vscale_range(2,2) { 230; RV32-LABEL: extract_any_extend_vector_inreg_v16i64: 231; RV32: # %bb.0: 232; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 233; RV32-NEXT: vmv.v.i v16, 0 234; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu 235; RV32-NEXT: vmv.v.i v0, 1 236; RV32-NEXT: li a1, 32 237; RV32-NEXT: vrgather.vi v18, v15, 1, v0.t 238; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma 239; RV32-NEXT: vslidedown.vx v8, v16, a0 240; RV32-NEXT: vmv.x.s a0, v8 241; RV32-NEXT: vsrl.vx v8, v8, a1 242; RV32-NEXT: vmv.x.s a1, v8 243; RV32-NEXT: ret 244; 245; RV64-LABEL: extract_any_extend_vector_inreg_v16i64: 246; RV64: # %bb.0: 247; RV64-NEXT: addi sp, sp, -256 248; RV64-NEXT: .cfi_def_cfa_offset 256 249; RV64-NEXT: sd ra, 248(sp) # 8-byte Folded Spill 250; RV64-NEXT: sd s0, 240(sp) # 8-byte Folded Spill 251; RV64-NEXT: sd s2, 232(sp) # 8-byte Folded Spill 252; RV64-NEXT: .cfi_offset ra, -8 253; RV64-NEXT: .cfi_offset s0, -16 254; RV64-NEXT: .cfi_offset s2, -24 255; RV64-NEXT: addi s0, sp, 256 256; RV64-NEXT: .cfi_def_cfa s0, 0 257; RV64-NEXT: andi sp, sp, -128 258; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma 259; RV64-NEXT: vmv.v.i v0, 1 260; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 261; RV64-NEXT: vmv.v.i v16, 0 262; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu 263; RV64-NEXT: vrgather.vi v18, v15, 1, v0.t 264; RV64-NEXT: mv s2, sp 265; RV64-NEXT: vs8r.v v16, (s2) 266; RV64-NEXT: andi a0, a0, 15 267; RV64-NEXT: li a1, 8 268; RV64-NEXT: call __muldi3 269; RV64-NEXT: add a0, s2, a0 270; RV64-NEXT: ld a0, 0(a0) 271; RV64-NEXT: addi sp, s0, -256 272; RV64-NEXT: .cfi_def_cfa sp, 256 273; RV64-NEXT: ld ra, 248(sp) # 8-byte Folded Reload 274; RV64-NEXT: ld s0, 240(sp) # 8-byte Folded Reload 275; RV64-NEXT: ld s2, 232(sp) # 8-byte Folded Reload 276; RV64-NEXT: .cfi_restore ra 277; RV64-NEXT: .cfi_restore s0 278; RV64-NEXT: .cfi_restore s2 279; RV64-NEXT: addi sp, sp, 256 280; RV64-NEXT: .cfi_def_cfa_offset 0 281; RV64-NEXT: ret 282 %1 = extractelement <16 x i64> %a0, i32 15 283 %2 = insertelement <16 x i64> zeroinitializer, i64 %1, i32 4 284 %3 = extractelement <16 x i64> %2, i32 %a1 285 ret i64 %3 286} 287 288define <4 x double> @shuffles_add(<4 x double> %0, <4 x double> %1) vscale_range(2,2) { 289; CHECK-LABEL: shuffles_add: 290; CHECK: # %bb.0: 291; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu 292; CHECK-NEXT: vmv1r.v v13, v10 293; CHECK-NEXT: vslideup.vi v13, v11, 1 294; CHECK-NEXT: vmv1r.v v8, v9 295; CHECK-NEXT: vmv.v.i v0, 1 296; CHECK-NEXT: vrgather.vi v12, v9, 0 297; CHECK-NEXT: vmv1r.v v9, v11 298; CHECK-NEXT: vrgather.vi v9, v10, 1, v0.t 299; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 300; CHECK-NEXT: vfadd.vv v8, v12, v8 301; CHECK-NEXT: ret 302 %3 = shufflevector <4 x double> %0, <4 x double> %1, <4 x i32> <i32 undef, i32 2, i32 4, i32 6> 303 %4 = shufflevector <4 x double> %0, <4 x double> %1, <4 x i32> <i32 undef, i32 3, i32 5, i32 7> 304 %5 = fadd <4 x double> %3, %4 305 ret <4 x double> %5 306} 307 308define <16 x i32> @m4_square_num_of_shuffles_in_chunks(<16 x i32> %0) vscale_range(2,2) { 309; CHECK-LABEL: m4_square_num_of_shuffles_in_chunks: 310; CHECK: # %bb.0: # %entry 311; CHECK-NEXT: lui a0, %hi(.LCPI17_0) 312; CHECK-NEXT: addi a0, a0, %lo(.LCPI17_0) 313; CHECK-NEXT: vl1r.v v12, (a0) 314; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 315; CHECK-NEXT: vsext.vf2 v16, v12 316; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 317; CHECK-NEXT: vrgatherei16.vv v12, v8, v16 318; CHECK-NEXT: vmv.v.v v8, v12 319; CHECK-NEXT: ret 320entry: 321 %1 = shufflevector <16 x i32> %0, <16 x i32> poison, <16 x i32> <i32 0, i32 5, i32 8, i32 12, i32 1, i32 4, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> 322 ret <16 x i32> %1 323} 324 325define <16 x i32> @m4_linear_num_of_shuffles_in_chunks(<16 x i32> %0) vscale_range(2,2) { 326; CHECK-LABEL: m4_linear_num_of_shuffles_in_chunks: 327; CHECK: # %bb.0: # %entry 328; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu 329; CHECK-NEXT: vmv.v.i v0, 8 330; CHECK-NEXT: vrgather.vi v12, v10, 0 331; CHECK-NEXT: vrgather.vi v12, v11, 0, v0.t 332; CHECK-NEXT: vrgather.vi v14, v8, 2 333; CHECK-NEXT: vrgather.vi v15, v10, 3 334; CHECK-NEXT: vmv4r.v v8, v12 335; CHECK-NEXT: ret 336entry: 337 %1 = shufflevector <16 x i32> %0, <16 x i32> poison, <16 x i32> <i32 poison, i32 poison, i32 8, i32 12, i32 poison, i32 poison, i32 poison, i32 poison, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 11, i32 poison> 338 ret <16 x i32> %1 339} 340 341define i64 @multi_chunks_shuffle(<32 x i32> %0) vscale_range(8,8) { 342; RV32-LABEL: multi_chunks_shuffle: 343; RV32: # %bb.0: # %entry 344; RV32-NEXT: vsetivli zero, 16, e32, m1, ta, ma 345; RV32-NEXT: vmv.v.i v10, 0 346; RV32-NEXT: li a0, 32 347; RV32-NEXT: li a1, 63 348; RV32-NEXT: vwsubu.vx v12, v10, a0 349; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma 350; RV32-NEXT: vmv.v.x v10, a0 351; RV32-NEXT: lui a0, 61681 352; RV32-NEXT: addi a0, a0, -241 353; RV32-NEXT: vand.vx v12, v12, a1 354; RV32-NEXT: vand.vx v10, v10, a1 355; RV32-NEXT: vsrl.vv v12, v8, v12 356; RV32-NEXT: vsll.vv v8, v8, v10 357; RV32-NEXT: vmv.s.x v0, a0 358; RV32-NEXT: vor.vv v8, v8, v12 359; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma 360; RV32-NEXT: vmv.v.i v10, 0 361; RV32-NEXT: vmerge.vvm v8, v10, v8, v0 362; RV32-NEXT: vrgather.vi v10, v8, 2 363; RV32-NEXT: vor.vv v8, v8, v10 364; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma 365; RV32-NEXT: vslidedown.vi v8, v8, 1 366; RV32-NEXT: vmv.x.s a0, v8 367; RV32-NEXT: srai a1, a0, 31 368; RV32-NEXT: ret 369; 370; RV64-LABEL: multi_chunks_shuffle: 371; RV64: # %bb.0: # %entry 372; RV64-NEXT: li a0, 32 373; RV64-NEXT: vsetivli zero, 16, e64, m2, ta, ma 374; RV64-NEXT: vsrl.vx v10, v8, a0 375; RV64-NEXT: vsll.vx v8, v8, a0 376; RV64-NEXT: lui a0, 61681 377; RV64-NEXT: addi a0, a0, -241 378; RV64-NEXT: vor.vv v8, v8, v10 379; RV64-NEXT: vmv.s.x v0, a0 380; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma 381; RV64-NEXT: vmv.v.i v10, 0 382; RV64-NEXT: vmerge.vvm v8, v10, v8, v0 383; RV64-NEXT: vrgather.vi v10, v8, 2 384; RV64-NEXT: vor.vv v8, v8, v10 385; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma 386; RV64-NEXT: vslidedown.vi v8, v8, 1 387; RV64-NEXT: vmv.x.s a0, v8 388; RV64-NEXT: ret 389entry: 390 %1 = shufflevector <32 x i32> %0, <32 x i32> zeroinitializer, <32 x i32> <i32 1, i32 0, i32 3, i32 2, i32 37, i32 36, i32 39, i32 38, i32 9, i32 8, i32 11, i32 10, i32 45, i32 44, i32 47, i32 46, i32 17, i32 16, i32 19, i32 18, i32 53, i32 52, i32 55, i32 54, i32 25, i32 24, i32 27, i32 26, i32 61, i32 60, i32 63, i32 62> 391 %2 = shufflevector <32 x i32> zeroinitializer, <32 x i32> %1, <32 x i32> <i32 3, i32 34, i32 33, i32 0, i32 7, i32 38, i32 37, i32 4, i32 11, i32 42, i32 41, i32 8, i32 15, i32 46, i32 45, i32 12, i32 19, i32 50, i32 49, i32 16, i32 23, i32 54, i32 53, i32 20, i32 27, i32 58, i32 57, i32 24, i32 31, i32 62, i32 61, i32 28> 392 %3 = or <32 x i32> %1, %2 393 %4 = extractelement <32 x i32> %3, i64 1 394 %conv199 = sext i32 %4 to i64 395 ret i64 %conv199 396} 397 398define void @shuffle_i128_ldst(ptr %p) vscale_range(2,2) { 399; CHECK-LABEL: shuffle_i128_ldst: 400; CHECK: # %bb.0: 401; CHECK-NEXT: vl4re64.v v8, (a0) 402; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma 403; CHECK-NEXT: vmv1r.v v9, v8 404; CHECK-NEXT: vmv4r.v v12, v8 405; CHECK-NEXT: vmv1r.v v14, v11 406; CHECK-NEXT: vmv1r.v v15, v10 407; CHECK-NEXT: vs4r.v v12, (a0) 408; CHECK-NEXT: ret 409 %a = load <4 x i128>, ptr %p 410 %res = shufflevector <4 x i128> %a, <4 x i128> poison, <4 x i32> <i32 0, i32 0, i32 3, i32 2> 411 store <4 x i128> %res, ptr %p 412 ret void 413} 414 415define void @shuffle_i256_ldst(ptr %p) vscale_range(2,2) { 416; CHECK-LABEL: shuffle_i256_ldst: 417; CHECK: # %bb.0: 418; CHECK-NEXT: vl8re64.v v8, (a0) 419; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma 420; CHECK-NEXT: vmv1r.v v10, v8 421; CHECK-NEXT: vmv1r.v v11, v9 422; CHECK-NEXT: vmv8r.v v16, v8 423; CHECK-NEXT: vmv1r.v v20, v14 424; CHECK-NEXT: vmv1r.v v21, v15 425; CHECK-NEXT: vmv1r.v v22, v12 426; CHECK-NEXT: vmv1r.v v23, v13 427; CHECK-NEXT: vs8r.v v16, (a0) 428; CHECK-NEXT: ret 429 %a = load <4 x i256>, ptr %p 430 %res = shufflevector <4 x i256> %a, <4 x i256> poison, <4 x i32> <i32 0, i32 0, i32 3, i32 2> 431 store <4 x i256> %res, ptr %p 432 ret void 433} 434