1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v -verify-machineinstrs < %s | FileCheck %s 3; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v -verify-machineinstrs < %s | FileCheck %s 4; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v -verify-machineinstrs < %s | FileCheck %s 5; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v -verify-machineinstrs < %s | FileCheck %s 6 7define <4 x bfloat> @shuffle_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) { 8; CHECK-LABEL: shuffle_v4bf16: 9; CHECK: # %bb.0: 10; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 11; CHECK-NEXT: vmv.v.i v0, 11 12; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 13; CHECK-NEXT: ret 14 %s = shufflevector <4 x bfloat> %x, <4 x bfloat> %y, <4 x i32> <i32 0, i32 1, i32 6, i32 3> 15 ret <4 x bfloat> %s 16} 17 18define <4 x half> @shuffle_v4f16(<4 x half> %x, <4 x half> %y) { 19; CHECK-LABEL: shuffle_v4f16: 20; CHECK: # %bb.0: 21; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 22; CHECK-NEXT: vmv.v.i v0, 11 23; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 24; CHECK-NEXT: ret 25 %s = shufflevector <4 x half> %x, <4 x half> %y, <4 x i32> <i32 0, i32 1, i32 6, i32 3> 26 ret <4 x half> %s 27} 28 29define <8 x float> @shuffle_v8f32(<8 x float> %x, <8 x float> %y) { 30; CHECK-LABEL: shuffle_v8f32: 31; CHECK: # %bb.0: 32; CHECK-NEXT: li a0, -20 33; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 34; CHECK-NEXT: vmv.s.x v0, a0 35; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 36; CHECK-NEXT: ret 37 %s = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 12, i32 5, i32 6, i32 7> 38 ret <8 x float> %s 39} 40 41define <4 x double> @shuffle_fv_v4f64(<4 x double> %x) { 42; CHECK-LABEL: shuffle_fv_v4f64: 43; CHECK: # %bb.0: 44; CHECK-NEXT: lui a0, %hi(.LCPI3_0) 45; CHECK-NEXT: fld fa5, %lo(.LCPI3_0)(a0) 46; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma 47; CHECK-NEXT: vmv.v.i v0, 9 48; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 49; CHECK-NEXT: vfmerge.vfm v8, v8, fa5, v0 50; CHECK-NEXT: ret 51 %s = shufflevector <4 x double> <double 2.0, double 2.0, double 2.0, double 2.0>, <4 x double> %x, <4 x i32> <i32 0, i32 5, i32 6, i32 3> 52 ret <4 x double> %s 53} 54 55define <4 x double> @shuffle_vf_v4f64(<4 x double> %x) { 56; CHECK-LABEL: shuffle_vf_v4f64: 57; CHECK: # %bb.0: 58; CHECK-NEXT: lui a0, %hi(.LCPI4_0) 59; CHECK-NEXT: fld fa5, %lo(.LCPI4_0)(a0) 60; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma 61; CHECK-NEXT: vmv.v.i v0, 6 62; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 63; CHECK-NEXT: vfmerge.vfm v8, v8, fa5, v0 64; CHECK-NEXT: ret 65 %s = shufflevector <4 x double> %x, <4 x double> <double 2.0, double 2.0, double 2.0, double 2.0>, <4 x i32> <i32 0, i32 5, i32 6, i32 3> 66 ret <4 x double> %s 67} 68 69define <4 x double> @vrgather_permute_shuffle_vu_v4f64(<4 x double> %x) { 70; CHECK-LABEL: vrgather_permute_shuffle_vu_v4f64: 71; CHECK: # %bb.0: 72; CHECK-NEXT: lui a0, 4096 73; CHECK-NEXT: addi a0, a0, 513 74; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 75; CHECK-NEXT: vmv.s.x v10, a0 76; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 77; CHECK-NEXT: vsext.vf2 v12, v10 78; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma 79; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 80; CHECK-NEXT: vmv.v.v v8, v10 81; CHECK-NEXT: ret 82 %s = shufflevector <4 x double> %x, <4 x double> poison, <4 x i32> <i32 1, i32 2, i32 0, i32 1> 83 ret <4 x double> %s 84} 85 86define <4 x double> @vrgather_permute_shuffle_uv_v4f64(<4 x double> %x) { 87; CHECK-LABEL: vrgather_permute_shuffle_uv_v4f64: 88; CHECK: # %bb.0: 89; CHECK-NEXT: lui a0, 4096 90; CHECK-NEXT: addi a0, a0, 513 91; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 92; CHECK-NEXT: vmv.s.x v10, a0 93; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 94; CHECK-NEXT: vsext.vf2 v12, v10 95; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma 96; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 97; CHECK-NEXT: vmv.v.v v8, v10 98; CHECK-NEXT: ret 99 %s = shufflevector <4 x double> poison, <4 x double> %x, <4 x i32> <i32 5, i32 6, i32 4, i32 5> 100 ret <4 x double> %s 101} 102 103define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y) { 104; CHECK-LABEL: vrgather_shuffle_vv_v4f64: 105; CHECK: # %bb.0: 106; CHECK-NEXT: lui a0, %hi(.LCPI7_0) 107; CHECK-NEXT: addi a0, a0, %lo(.LCPI7_0) 108; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 109; CHECK-NEXT: vle16.v v14, (a0) 110; CHECK-NEXT: vmv.v.i v0, 8 111; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu 112; CHECK-NEXT: vrgatherei16.vv v12, v8, v14 113; CHECK-NEXT: vrgather.vi v12, v10, 1, v0.t 114; CHECK-NEXT: vmv.v.v v8, v12 115; CHECK-NEXT: ret 116 %s = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 1, i32 2, i32 0, i32 5> 117 ret <4 x double> %s 118} 119 120define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x double> %x) { 121; CHECK-LABEL: vrgather_shuffle_xv_v4f64: 122; CHECK: # %bb.0: 123; CHECK-NEXT: lui a0, %hi(.LCPI8_0) 124; CHECK-NEXT: fld fa5, %lo(.LCPI8_0)(a0) 125; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 126; CHECK-NEXT: vid.v v10 127; CHECK-NEXT: vrsub.vi v12, v10, 4 128; CHECK-NEXT: vmv.v.i v0, 12 129; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu 130; CHECK-NEXT: vfmv.v.f v10, fa5 131; CHECK-NEXT: vrgatherei16.vv v10, v8, v12, v0.t 132; CHECK-NEXT: vmv.v.v v8, v10 133; CHECK-NEXT: ret 134 %s = shufflevector <4 x double> <double 2.0, double 2.0, double 2.0, double 2.0>, <4 x double> %x, <4 x i32> <i32 0, i32 3, i32 6, i32 5> 135 ret <4 x double> %s 136} 137 138define <4 x double> @vrgather_shuffle_vx_v4f64(<4 x double> %x) { 139; CHECK-LABEL: vrgather_shuffle_vx_v4f64: 140; CHECK: # %bb.0: 141; CHECK-NEXT: lui a0, %hi(.LCPI9_0) 142; CHECK-NEXT: fld fa5, %lo(.LCPI9_0)(a0) 143; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma 144; CHECK-NEXT: vmv.v.i v10, 9 145; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 146; CHECK-NEXT: vcompress.vm v12, v8, v10 147; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma 148; CHECK-NEXT: vmv.v.i v0, 3 149; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 150; CHECK-NEXT: vfmv.v.f v8, fa5 151; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 152; CHECK-NEXT: ret 153 %s = shufflevector <4 x double> %x, <4 x double> <double 2.0, double 2.0, double 2.0, double 2.0>, <4 x i32> <i32 0, i32 3, i32 6, i32 5> 154 ret <4 x double> %s 155} 156 157define <4 x bfloat> @shuffle_v8bf16_to_vslidedown_1(<8 x bfloat> %x) { 158; CHECK-LABEL: shuffle_v8bf16_to_vslidedown_1: 159; CHECK: # %bb.0: # %entry 160; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 161; CHECK-NEXT: vslidedown.vi v8, v8, 1 162; CHECK-NEXT: ret 163entry: 164 %s = shufflevector <8 x bfloat> %x, <8 x bfloat> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 4> 165 ret <4 x bfloat> %s 166} 167 168define <4 x bfloat> @shuffle_v8bf16_to_vslidedown_3(<8 x bfloat> %x) { 169; CHECK-LABEL: shuffle_v8bf16_to_vslidedown_3: 170; CHECK: # %bb.0: # %entry 171; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 172; CHECK-NEXT: vslidedown.vi v8, v8, 3 173; CHECK-NEXT: ret 174entry: 175 %s = shufflevector <8 x bfloat> %x, <8 x bfloat> poison, <4 x i32> <i32 3, i32 4, i32 5, i32 6> 176 ret <4 x bfloat> %s 177} 178 179define <4 x half> @shuffle_v8f16_to_vslidedown_1(<8 x half> %x) { 180; CHECK-LABEL: shuffle_v8f16_to_vslidedown_1: 181; CHECK: # %bb.0: # %entry 182; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 183; CHECK-NEXT: vslidedown.vi v8, v8, 1 184; CHECK-NEXT: ret 185entry: 186 %s = shufflevector <8 x half> %x, <8 x half> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 4> 187 ret <4 x half> %s 188} 189 190define <4 x half> @shuffle_v8f16_to_vslidedown_3(<8 x half> %x) { 191; CHECK-LABEL: shuffle_v8f16_to_vslidedown_3: 192; CHECK: # %bb.0: # %entry 193; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 194; CHECK-NEXT: vslidedown.vi v8, v8, 3 195; CHECK-NEXT: ret 196entry: 197 %s = shufflevector <8 x half> %x, <8 x half> poison, <4 x i32> <i32 3, i32 4, i32 5, i32 6> 198 ret <4 x half> %s 199} 200 201define <2 x float> @shuffle_v4f32_to_vslidedown(<4 x float> %x) { 202; CHECK-LABEL: shuffle_v4f32_to_vslidedown: 203; CHECK: # %bb.0: # %entry 204; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 205; CHECK-NEXT: vslidedown.vi v8, v8, 1 206; CHECK-NEXT: ret 207entry: 208 %s = shufflevector <4 x float> %x, <4 x float> poison, <2 x i32> <i32 1, i32 2> 209 ret <2 x float> %s 210} 211 212define <4 x bfloat> @slidedown_v4bf16(<4 x bfloat> %x) { 213; CHECK-LABEL: slidedown_v4bf16: 214; CHECK: # %bb.0: 215; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 216; CHECK-NEXT: vslidedown.vi v8, v8, 1 217; CHECK-NEXT: ret 218 %s = shufflevector <4 x bfloat> %x, <4 x bfloat> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 undef> 219 ret <4 x bfloat> %s 220} 221 222define <4 x half> @slidedown_v4f16(<4 x half> %x) { 223; CHECK-LABEL: slidedown_v4f16: 224; CHECK: # %bb.0: 225; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 226; CHECK-NEXT: vslidedown.vi v8, v8, 1 227; CHECK-NEXT: ret 228 %s = shufflevector <4 x half> %x, <4 x half> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 undef> 229 ret <4 x half> %s 230} 231 232define <8 x float> @slidedown_v8f32(<8 x float> %x) { 233; CHECK-LABEL: slidedown_v8f32: 234; CHECK: # %bb.0: 235; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 236; CHECK-NEXT: vslidedown.vi v8, v8, 3 237; CHECK-NEXT: ret 238 %s = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> <i32 3, i32 undef, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef> 239 ret <8 x float> %s 240} 241 242define <4 x half> @slideup_v4f16(<4 x half> %x) { 243; CHECK-LABEL: slideup_v4f16: 244; CHECK: # %bb.0: 245; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 246; CHECK-NEXT: vslideup.vi v9, v8, 1 247; CHECK-NEXT: vmv1r.v v8, v9 248; CHECK-NEXT: ret 249 %s = shufflevector <4 x half> %x, <4 x half> poison, <4 x i32> <i32 undef, i32 0, i32 1, i32 2> 250 ret <4 x half> %s 251} 252 253define <8 x float> @slideup_v8f32(<8 x float> %x) { 254; CHECK-LABEL: slideup_v8f32: 255; CHECK: # %bb.0: 256; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 257; CHECK-NEXT: vslideup.vi v10, v8, 3 258; CHECK-NEXT: vmv.v.v v8, v10 259; CHECK-NEXT: ret 260 %s = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 4> 261 ret <8 x float> %s 262} 263 264define <8 x float> @splice_unary(<8 x float> %x) { 265; CHECK-LABEL: splice_unary: 266; CHECK: # %bb.0: 267; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 268; CHECK-NEXT: vslidedown.vi v10, v8, 1 269; CHECK-NEXT: vslideup.vi v10, v8, 7 270; CHECK-NEXT: vmv.v.v v8, v10 271; CHECK-NEXT: ret 272 %s = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0> 273 ret <8 x float> %s 274} 275 276define <8 x double> @splice_unary2(<8 x double> %x) { 277; CHECK-LABEL: splice_unary2: 278; CHECK: # %bb.0: 279; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma 280; CHECK-NEXT: vslidedown.vi v12, v8, 6 281; CHECK-NEXT: vslideup.vi v12, v8, 2 282; CHECK-NEXT: vmv.v.v v8, v12 283; CHECK-NEXT: ret 284 %s = shufflevector <8 x double> %x, <8 x double> poison, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5> 285 ret <8 x double> %s 286} 287 288define <8 x float> @splice_binary(<8 x float> %x, <8 x float> %y) { 289; CHECK-LABEL: splice_binary: 290; CHECK: # %bb.0: 291; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 292; CHECK-NEXT: vslidedown.vi v8, v8, 2 293; CHECK-NEXT: vslideup.vi v8, v10, 6 294; CHECK-NEXT: ret 295 %s = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 9> 296 ret <8 x float> %s 297} 298 299define <8 x double> @splice_binary2(<8 x double> %x, <8 x double> %y) { 300; CHECK-LABEL: splice_binary2: 301; CHECK: # %bb.0: 302; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma 303; CHECK-NEXT: vslidedown.vi v12, v12, 5 304; CHECK-NEXT: vslideup.vi v12, v8, 3 305; CHECK-NEXT: vmv.v.v v8, v12 306; CHECK-NEXT: ret 307 %s = shufflevector <8 x double> %x, <8 x double> %y, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4> 308 ret <8 x double> %s 309} 310 311define <4 x bfloat> @vrgather_permute_shuffle_vu_v4bf16(<4 x bfloat> %x) { 312; CHECK-LABEL: vrgather_permute_shuffle_vu_v4bf16: 313; CHECK: # %bb.0: 314; CHECK-NEXT: lui a0, 4096 315; CHECK-NEXT: addi a0, a0, 513 316; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 317; CHECK-NEXT: vmv.s.x v9, a0 318; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 319; CHECK-NEXT: vsext.vf2 v10, v9 320; CHECK-NEXT: vrgather.vv v9, v8, v10 321; CHECK-NEXT: vmv1r.v v8, v9 322; CHECK-NEXT: ret 323 %s = shufflevector <4 x bfloat> %x, <4 x bfloat> poison, <4 x i32> <i32 1, i32 2, i32 0, i32 1> 324 ret <4 x bfloat> %s 325} 326 327define <4 x bfloat> @vrgather_shuffle_vv_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) { 328; CHECK-LABEL: vrgather_shuffle_vv_v4bf16: 329; CHECK: # %bb.0: 330; CHECK-NEXT: lui a0, %hi(.LCPI25_0) 331; CHECK-NEXT: addi a0, a0, %lo(.LCPI25_0) 332; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu 333; CHECK-NEXT: vle16.v v11, (a0) 334; CHECK-NEXT: vmv.v.i v0, 8 335; CHECK-NEXT: vrgather.vv v10, v8, v11 336; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t 337; CHECK-NEXT: vmv1r.v v8, v10 338; CHECK-NEXT: ret 339 %s = shufflevector <4 x bfloat> %x, <4 x bfloat> %y, <4 x i32> <i32 1, i32 2, i32 0, i32 5> 340 ret <4 x bfloat> %s 341} 342 343define <4 x bfloat> @vrgather_shuffle_vx_v4bf16_load(ptr %p) { 344; CHECK-LABEL: vrgather_shuffle_vx_v4bf16_load: 345; CHECK: # %bb.0: 346; CHECK-NEXT: lh a0, 2(a0) 347; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 348; CHECK-NEXT: vmv.v.x v8, a0 349; CHECK-NEXT: ret 350 %v = load <4 x bfloat>, ptr %p 351 %s = shufflevector <4 x bfloat> %v, <4 x bfloat> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 352 ret <4 x bfloat> %s 353} 354 355define <4 x half> @vrgather_permute_shuffle_vu_v4f16(<4 x half> %x) { 356; CHECK-LABEL: vrgather_permute_shuffle_vu_v4f16: 357; CHECK: # %bb.0: 358; CHECK-NEXT: lui a0, 4096 359; CHECK-NEXT: addi a0, a0, 513 360; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 361; CHECK-NEXT: vmv.s.x v9, a0 362; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 363; CHECK-NEXT: vsext.vf2 v10, v9 364; CHECK-NEXT: vrgather.vv v9, v8, v10 365; CHECK-NEXT: vmv1r.v v8, v9 366; CHECK-NEXT: ret 367 %s = shufflevector <4 x half> %x, <4 x half> poison, <4 x i32> <i32 1, i32 2, i32 0, i32 1> 368 ret <4 x half> %s 369} 370 371define <4 x half> @vrgather_shuffle_vv_v4f16(<4 x half> %x, <4 x half> %y) { 372; CHECK-LABEL: vrgather_shuffle_vv_v4f16: 373; CHECK: # %bb.0: 374; CHECK-NEXT: lui a0, %hi(.LCPI28_0) 375; CHECK-NEXT: addi a0, a0, %lo(.LCPI28_0) 376; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu 377; CHECK-NEXT: vle16.v v11, (a0) 378; CHECK-NEXT: vmv.v.i v0, 8 379; CHECK-NEXT: vrgather.vv v10, v8, v11 380; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t 381; CHECK-NEXT: vmv1r.v v8, v10 382; CHECK-NEXT: ret 383 %s = shufflevector <4 x half> %x, <4 x half> %y, <4 x i32> <i32 1, i32 2, i32 0, i32 5> 384 ret <4 x half> %s 385} 386 387define <4 x half> @vrgather_shuffle_vx_v4f16_load(ptr %p) { 388; CHECK-LABEL: vrgather_shuffle_vx_v4f16_load: 389; CHECK: # %bb.0: 390; CHECK-NEXT: lh a0, 2(a0) 391; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 392; CHECK-NEXT: vmv.v.x v8, a0 393; CHECK-NEXT: ret 394 %v = load <4 x half>, ptr %p 395 %s = shufflevector <4 x half> %v, <4 x half> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 396 ret <4 x half> %s 397} 398 399define <16 x float> @shuffle_disjoint_lanes(<16 x float> %v, <16 x float> %w) { 400; CHECK-LABEL: shuffle_disjoint_lanes: 401; CHECK: # %bb.0: 402; CHECK-NEXT: lui a0, %hi(.LCPI30_0) 403; CHECK-NEXT: addi a0, a0, %lo(.LCPI30_0) 404; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 405; CHECK-NEXT: vle8.v v16, (a0) 406; CHECK-NEXT: lui a0, 11 407; CHECK-NEXT: addi a0, a0, -1366 408; CHECK-NEXT: vmv.s.x v0, a0 409; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 410; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 411; CHECK-NEXT: vsext.vf2 v18, v16 412; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 413; CHECK-NEXT: vrgatherei16.vv v8, v12, v18 414; CHECK-NEXT: ret 415 %out = shufflevector <16 x float> %v, <16 x float> %w, <16 x i32> <i32 11, i32 15, i32 7, i32 3, i32 26, i32 30, i32 22, i32 18, i32 9, i32 13, i32 5, i32 1, i32 24, i32 28, i32 20, i32 16> 416 ret <16 x float> %out 417} 418 419define <16 x float> @shuffle_disjoint_lanes_one_identity(<16 x float> %v, <16 x float> %w) { 420; CHECK-LABEL: shuffle_disjoint_lanes_one_identity: 421; CHECK: # %bb.0: 422; CHECK-NEXT: lui a0, %hi(.LCPI31_0) 423; CHECK-NEXT: addi a0, a0, %lo(.LCPI31_0) 424; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu 425; CHECK-NEXT: vle16.v v16, (a0) 426; CHECK-NEXT: li a0, -272 427; CHECK-NEXT: vmv.s.x v0, a0 428; CHECK-NEXT: vrgatherei16.vv v8, v12, v16, v0.t 429; CHECK-NEXT: ret 430 %out = shufflevector <16 x float> %v, <16 x float> %w, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 26, i32 30, i32 22, i32 20, i32 8, i32 31, i32 29, i32 28, i32 27, i32 23, i32 25, i32 22> 431 ret <16 x float> %out 432} 433 434define <16 x float> @shuffle_disjoint_lanes_one_broadcast(<16 x float> %v, <16 x float> %w) { 435; CHECK-LABEL: shuffle_disjoint_lanes_one_broadcast: 436; CHECK: # %bb.0: 437; CHECK-NEXT: lui a0, %hi(.LCPI32_0) 438; CHECK-NEXT: addi a0, a0, %lo(.LCPI32_0) 439; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu 440; CHECK-NEXT: vle16.v v20, (a0) 441; CHECK-NEXT: lui a0, 15 442; CHECK-NEXT: addi a0, a0, 240 443; CHECK-NEXT: vmv.s.x v0, a0 444; CHECK-NEXT: vrgather.vi v16, v8, 7 445; CHECK-NEXT: vrgatherei16.vv v16, v12, v20, v0.t 446; CHECK-NEXT: vmv.v.v v8, v16 447; CHECK-NEXT: ret 448 %out = shufflevector <16 x float> %v, <16 x float> %w, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 26, i32 30, i32 22, i32 18, i32 7, i32 7, i32 7, i32 7, i32 24, i32 28, i32 20, i32 16> 449 ret <16 x float> %out 450} 451 452define <16 x float> @shuffle_disjoint_lanes_one_splat(float %v, <16 x float> %w) { 453; CHECK-LABEL: shuffle_disjoint_lanes_one_splat: 454; CHECK: # %bb.0: 455; CHECK-NEXT: lui a0, %hi(.LCPI33_0) 456; CHECK-NEXT: addi a0, a0, %lo(.LCPI33_0) 457; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu 458; CHECK-NEXT: vle16.v v16, (a0) 459; CHECK-NEXT: lui a0, 15 460; CHECK-NEXT: addi a0, a0, 240 461; CHECK-NEXT: vmv.s.x v0, a0 462; CHECK-NEXT: vfmv.v.f v12, fa0 463; CHECK-NEXT: vrgatherei16.vv v12, v8, v16, v0.t 464; CHECK-NEXT: vmv.v.v v8, v12 465; CHECK-NEXT: ret 466 %head = insertelement <16 x float> poison, float %v, i32 0 467 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer 468 %out = shufflevector <16 x float> %splat, <16 x float> %w, <16 x i32> <i32 11, i32 15, i32 7, i32 3, i32 26, i32 30, i32 22, i32 18, i32 9, i32 13, i32 5, i32 1, i32 24, i32 28, i32 20, i32 16> 469 ret <16 x float> %out 470} 471