1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8-RV32 3; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32 4; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32 5; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8-RV64 6; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64 7; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64 8 9define void @splat_v16i8(ptr %x, i8 %y) { 10; CHECK-LABEL: splat_v16i8: 11; CHECK: # %bb.0: 12; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 13; CHECK-NEXT: vmv.v.x v8, a1 14; CHECK-NEXT: vse8.v v8, (a0) 15; CHECK-NEXT: ret 16 %a = insertelement <16 x i8> poison, i8 %y, i32 0 17 %b = shufflevector <16 x i8> %a, <16 x i8> poison, <16 x i32> zeroinitializer 18 store <16 x i8> %b, ptr %x 19 ret void 20} 21 22define void @splat_v8i16(ptr %x, i16 %y) { 23; CHECK-LABEL: splat_v8i16: 24; CHECK: # %bb.0: 25; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 26; CHECK-NEXT: vmv.v.x v8, a1 27; CHECK-NEXT: vse16.v v8, (a0) 28; CHECK-NEXT: ret 29 %a = insertelement <8 x i16> poison, i16 %y, i32 0 30 %b = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> zeroinitializer 31 store <8 x i16> %b, ptr %x 32 ret void 33} 34 35define void @splat_v4i32(ptr %x, i32 %y) { 36; CHECK-LABEL: splat_v4i32: 37; CHECK: # %bb.0: 38; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 39; CHECK-NEXT: vmv.v.x v8, a1 40; CHECK-NEXT: vse32.v v8, (a0) 41; CHECK-NEXT: ret 42 %a = insertelement <4 x i32> poison, i32 %y, i32 0 43 %b = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> zeroinitializer 44 store <4 x i32> %b, ptr %x 45 ret void 46} 47 48define void @splat_v2i64(ptr %x, i64 %y) { 49; LMULMAX8-RV32-LABEL: splat_v2i64: 50; LMULMAX8-RV32: # %bb.0: 51; LMULMAX8-RV32-NEXT: addi sp, sp, -16 52; LMULMAX8-RV32-NEXT: .cfi_def_cfa_offset 16 53; LMULMAX8-RV32-NEXT: sw a2, 12(sp) 54; LMULMAX8-RV32-NEXT: sw a1, 8(sp) 55; LMULMAX8-RV32-NEXT: addi a1, sp, 8 56; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 57; LMULMAX8-RV32-NEXT: vlse64.v v8, (a1), zero 58; LMULMAX8-RV32-NEXT: vse64.v v8, (a0) 59; LMULMAX8-RV32-NEXT: addi sp, sp, 16 60; LMULMAX8-RV32-NEXT: ret 61; 62; LMULMAX2-RV32-LABEL: splat_v2i64: 63; LMULMAX2-RV32: # %bb.0: 64; LMULMAX2-RV32-NEXT: addi sp, sp, -16 65; LMULMAX2-RV32-NEXT: .cfi_def_cfa_offset 16 66; LMULMAX2-RV32-NEXT: sw a2, 12(sp) 67; LMULMAX2-RV32-NEXT: sw a1, 8(sp) 68; LMULMAX2-RV32-NEXT: addi a1, sp, 8 69; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 70; LMULMAX2-RV32-NEXT: vlse64.v v8, (a1), zero 71; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) 72; LMULMAX2-RV32-NEXT: addi sp, sp, 16 73; LMULMAX2-RV32-NEXT: ret 74; 75; LMULMAX1-RV32-LABEL: splat_v2i64: 76; LMULMAX1-RV32: # %bb.0: 77; LMULMAX1-RV32-NEXT: addi sp, sp, -16 78; LMULMAX1-RV32-NEXT: .cfi_def_cfa_offset 16 79; LMULMAX1-RV32-NEXT: sw a2, 12(sp) 80; LMULMAX1-RV32-NEXT: sw a1, 8(sp) 81; LMULMAX1-RV32-NEXT: addi a1, sp, 8 82; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 83; LMULMAX1-RV32-NEXT: vlse64.v v8, (a1), zero 84; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) 85; LMULMAX1-RV32-NEXT: addi sp, sp, 16 86; LMULMAX1-RV32-NEXT: ret 87; 88; LMULMAX8-RV64-LABEL: splat_v2i64: 89; LMULMAX8-RV64: # %bb.0: 90; LMULMAX8-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 91; LMULMAX8-RV64-NEXT: vmv.v.x v8, a1 92; LMULMAX8-RV64-NEXT: vse64.v v8, (a0) 93; LMULMAX8-RV64-NEXT: ret 94; 95; LMULMAX2-RV64-LABEL: splat_v2i64: 96; LMULMAX2-RV64: # %bb.0: 97; LMULMAX2-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 98; LMULMAX2-RV64-NEXT: vmv.v.x v8, a1 99; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) 100; LMULMAX2-RV64-NEXT: ret 101; 102; LMULMAX1-RV64-LABEL: splat_v2i64: 103; LMULMAX1-RV64: # %bb.0: 104; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 105; LMULMAX1-RV64-NEXT: vmv.v.x v8, a1 106; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) 107; LMULMAX1-RV64-NEXT: ret 108 %a = insertelement <2 x i64> poison, i64 %y, i32 0 109 %b = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> zeroinitializer 110 store <2 x i64> %b, ptr %x 111 ret void 112} 113 114define void @splat_v32i8(ptr %x, i8 %y) { 115; LMULMAX8-LABEL: splat_v32i8: 116; LMULMAX8: # %bb.0: 117; LMULMAX8-NEXT: li a2, 32 118; LMULMAX8-NEXT: vsetvli zero, a2, e8, m2, ta, ma 119; LMULMAX8-NEXT: vmv.v.x v8, a1 120; LMULMAX8-NEXT: vse8.v v8, (a0) 121; LMULMAX8-NEXT: ret 122; 123; LMULMAX2-LABEL: splat_v32i8: 124; LMULMAX2: # %bb.0: 125; LMULMAX2-NEXT: li a2, 32 126; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma 127; LMULMAX2-NEXT: vmv.v.x v8, a1 128; LMULMAX2-NEXT: vse8.v v8, (a0) 129; LMULMAX2-NEXT: ret 130; 131; LMULMAX1-LABEL: splat_v32i8: 132; LMULMAX1: # %bb.0: 133; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma 134; LMULMAX1-NEXT: vmv.v.x v8, a1 135; LMULMAX1-NEXT: addi a1, a0, 16 136; LMULMAX1-NEXT: vse8.v v8, (a1) 137; LMULMAX1-NEXT: vse8.v v8, (a0) 138; LMULMAX1-NEXT: ret 139 %a = insertelement <32 x i8> poison, i8 %y, i32 0 140 %b = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> zeroinitializer 141 store <32 x i8> %b, ptr %x 142 ret void 143} 144 145define void @splat_v16i16(ptr %x, i16 %y) { 146; LMULMAX8-LABEL: splat_v16i16: 147; LMULMAX8: # %bb.0: 148; LMULMAX8-NEXT: vsetivli zero, 16, e16, m2, ta, ma 149; LMULMAX8-NEXT: vmv.v.x v8, a1 150; LMULMAX8-NEXT: vse16.v v8, (a0) 151; LMULMAX8-NEXT: ret 152; 153; LMULMAX2-LABEL: splat_v16i16: 154; LMULMAX2: # %bb.0: 155; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma 156; LMULMAX2-NEXT: vmv.v.x v8, a1 157; LMULMAX2-NEXT: vse16.v v8, (a0) 158; LMULMAX2-NEXT: ret 159; 160; LMULMAX1-LABEL: splat_v16i16: 161; LMULMAX1: # %bb.0: 162; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma 163; LMULMAX1-NEXT: vmv.v.x v8, a1 164; LMULMAX1-NEXT: addi a1, a0, 16 165; LMULMAX1-NEXT: vse16.v v8, (a1) 166; LMULMAX1-NEXT: vse16.v v8, (a0) 167; LMULMAX1-NEXT: ret 168 %a = insertelement <16 x i16> poison, i16 %y, i32 0 169 %b = shufflevector <16 x i16> %a, <16 x i16> poison, <16 x i32> zeroinitializer 170 store <16 x i16> %b, ptr %x 171 ret void 172} 173 174define void @splat_v8i32(ptr %x, i32 %y) { 175; LMULMAX8-LABEL: splat_v8i32: 176; LMULMAX8: # %bb.0: 177; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma 178; LMULMAX8-NEXT: vmv.v.x v8, a1 179; LMULMAX8-NEXT: vse32.v v8, (a0) 180; LMULMAX8-NEXT: ret 181; 182; LMULMAX2-LABEL: splat_v8i32: 183; LMULMAX2: # %bb.0: 184; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma 185; LMULMAX2-NEXT: vmv.v.x v8, a1 186; LMULMAX2-NEXT: vse32.v v8, (a0) 187; LMULMAX2-NEXT: ret 188; 189; LMULMAX1-LABEL: splat_v8i32: 190; LMULMAX1: # %bb.0: 191; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma 192; LMULMAX1-NEXT: vmv.v.x v8, a1 193; LMULMAX1-NEXT: addi a1, a0, 16 194; LMULMAX1-NEXT: vse32.v v8, (a1) 195; LMULMAX1-NEXT: vse32.v v8, (a0) 196; LMULMAX1-NEXT: ret 197 %a = insertelement <8 x i32> poison, i32 %y, i32 0 198 %b = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> zeroinitializer 199 store <8 x i32> %b, ptr %x 200 ret void 201} 202 203define void @splat_v4i64(ptr %x, i64 %y) { 204; LMULMAX8-RV32-LABEL: splat_v4i64: 205; LMULMAX8-RV32: # %bb.0: 206; LMULMAX8-RV32-NEXT: addi sp, sp, -16 207; LMULMAX8-RV32-NEXT: .cfi_def_cfa_offset 16 208; LMULMAX8-RV32-NEXT: sw a2, 12(sp) 209; LMULMAX8-RV32-NEXT: sw a1, 8(sp) 210; LMULMAX8-RV32-NEXT: addi a1, sp, 8 211; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma 212; LMULMAX8-RV32-NEXT: vlse64.v v8, (a1), zero 213; LMULMAX8-RV32-NEXT: vse64.v v8, (a0) 214; LMULMAX8-RV32-NEXT: addi sp, sp, 16 215; LMULMAX8-RV32-NEXT: ret 216; 217; LMULMAX2-RV32-LABEL: splat_v4i64: 218; LMULMAX2-RV32: # %bb.0: 219; LMULMAX2-RV32-NEXT: addi sp, sp, -16 220; LMULMAX2-RV32-NEXT: .cfi_def_cfa_offset 16 221; LMULMAX2-RV32-NEXT: sw a2, 12(sp) 222; LMULMAX2-RV32-NEXT: sw a1, 8(sp) 223; LMULMAX2-RV32-NEXT: addi a1, sp, 8 224; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma 225; LMULMAX2-RV32-NEXT: vlse64.v v8, (a1), zero 226; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) 227; LMULMAX2-RV32-NEXT: addi sp, sp, 16 228; LMULMAX2-RV32-NEXT: ret 229; 230; LMULMAX1-RV32-LABEL: splat_v4i64: 231; LMULMAX1-RV32: # %bb.0: 232; LMULMAX1-RV32-NEXT: li a3, 5 233; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 234; LMULMAX1-RV32-NEXT: vmv.s.x v0, a3 235; LMULMAX1-RV32-NEXT: vmv.v.x v8, a2 236; LMULMAX1-RV32-NEXT: vmerge.vxm v8, v8, a1, v0 237; LMULMAX1-RV32-NEXT: addi a1, a0, 16 238; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) 239; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) 240; LMULMAX1-RV32-NEXT: ret 241; 242; LMULMAX8-RV64-LABEL: splat_v4i64: 243; LMULMAX8-RV64: # %bb.0: 244; LMULMAX8-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma 245; LMULMAX8-RV64-NEXT: vmv.v.x v8, a1 246; LMULMAX8-RV64-NEXT: vse64.v v8, (a0) 247; LMULMAX8-RV64-NEXT: ret 248; 249; LMULMAX2-RV64-LABEL: splat_v4i64: 250; LMULMAX2-RV64: # %bb.0: 251; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma 252; LMULMAX2-RV64-NEXT: vmv.v.x v8, a1 253; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) 254; LMULMAX2-RV64-NEXT: ret 255; 256; LMULMAX1-RV64-LABEL: splat_v4i64: 257; LMULMAX1-RV64: # %bb.0: 258; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 259; LMULMAX1-RV64-NEXT: vmv.v.x v8, a1 260; LMULMAX1-RV64-NEXT: addi a1, a0, 16 261; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) 262; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) 263; LMULMAX1-RV64-NEXT: ret 264 %a = insertelement <4 x i64> poison, i64 %y, i32 0 265 %b = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> zeroinitializer 266 store <4 x i64> %b, ptr %x 267 ret void 268} 269 270define void @splat_zero_v16i8(ptr %x) { 271; CHECK-LABEL: splat_zero_v16i8: 272; CHECK: # %bb.0: 273; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 274; CHECK-NEXT: vmv.v.i v8, 0 275; CHECK-NEXT: vse8.v v8, (a0) 276; CHECK-NEXT: ret 277 %a = insertelement <16 x i8> poison, i8 0, i32 0 278 %b = shufflevector <16 x i8> %a, <16 x i8> poison, <16 x i32> zeroinitializer 279 store <16 x i8> %b, ptr %x 280 ret void 281} 282 283define void @splat_zero_v8i16(ptr %x) { 284; CHECK-LABEL: splat_zero_v8i16: 285; CHECK: # %bb.0: 286; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 287; CHECK-NEXT: vmv.v.i v8, 0 288; CHECK-NEXT: vse16.v v8, (a0) 289; CHECK-NEXT: ret 290 %a = insertelement <8 x i16> poison, i16 0, i32 0 291 %b = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> zeroinitializer 292 store <8 x i16> %b, ptr %x 293 ret void 294} 295 296define void @splat_zero_v4i32(ptr %x) { 297; CHECK-LABEL: splat_zero_v4i32: 298; CHECK: # %bb.0: 299; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 300; CHECK-NEXT: vmv.v.i v8, 0 301; CHECK-NEXT: vse32.v v8, (a0) 302; CHECK-NEXT: ret 303 %a = insertelement <4 x i32> poison, i32 0, i32 0 304 %b = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> zeroinitializer 305 store <4 x i32> %b, ptr %x 306 ret void 307} 308 309define void @splat_zero_v2i64(ptr %x) { 310; CHECK-LABEL: splat_zero_v2i64: 311; CHECK: # %bb.0: 312; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 313; CHECK-NEXT: vmv.v.i v8, 0 314; CHECK-NEXT: vse64.v v8, (a0) 315; CHECK-NEXT: ret 316 %a = insertelement <2 x i64> poison, i64 0, i32 0 317 %b = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> zeroinitializer 318 store <2 x i64> %b, ptr %x 319 ret void 320} 321 322define void @splat_zero_v32i8(ptr %x) { 323; LMULMAX8-LABEL: splat_zero_v32i8: 324; LMULMAX8: # %bb.0: 325; LMULMAX8-NEXT: li a1, 32 326; LMULMAX8-NEXT: vsetvli zero, a1, e8, m2, ta, ma 327; LMULMAX8-NEXT: vmv.v.i v8, 0 328; LMULMAX8-NEXT: vse8.v v8, (a0) 329; LMULMAX8-NEXT: ret 330; 331; LMULMAX2-LABEL: splat_zero_v32i8: 332; LMULMAX2: # %bb.0: 333; LMULMAX2-NEXT: li a1, 32 334; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma 335; LMULMAX2-NEXT: vmv.v.i v8, 0 336; LMULMAX2-NEXT: vse8.v v8, (a0) 337; LMULMAX2-NEXT: ret 338; 339; LMULMAX1-LABEL: splat_zero_v32i8: 340; LMULMAX1: # %bb.0: 341; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma 342; LMULMAX1-NEXT: vmv.v.i v8, 0 343; LMULMAX1-NEXT: vse8.v v8, (a0) 344; LMULMAX1-NEXT: addi a0, a0, 16 345; LMULMAX1-NEXT: vse8.v v8, (a0) 346; LMULMAX1-NEXT: ret 347 %a = insertelement <32 x i8> poison, i8 0, i32 0 348 %b = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> zeroinitializer 349 store <32 x i8> %b, ptr %x 350 ret void 351} 352 353define void @splat_zero_v16i16(ptr %x) { 354; LMULMAX8-LABEL: splat_zero_v16i16: 355; LMULMAX8: # %bb.0: 356; LMULMAX8-NEXT: vsetivli zero, 16, e16, m2, ta, ma 357; LMULMAX8-NEXT: vmv.v.i v8, 0 358; LMULMAX8-NEXT: vse16.v v8, (a0) 359; LMULMAX8-NEXT: ret 360; 361; LMULMAX2-LABEL: splat_zero_v16i16: 362; LMULMAX2: # %bb.0: 363; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma 364; LMULMAX2-NEXT: vmv.v.i v8, 0 365; LMULMAX2-NEXT: vse16.v v8, (a0) 366; LMULMAX2-NEXT: ret 367; 368; LMULMAX1-LABEL: splat_zero_v16i16: 369; LMULMAX1: # %bb.0: 370; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma 371; LMULMAX1-NEXT: vmv.v.i v8, 0 372; LMULMAX1-NEXT: vse16.v v8, (a0) 373; LMULMAX1-NEXT: addi a0, a0, 16 374; LMULMAX1-NEXT: vse16.v v8, (a0) 375; LMULMAX1-NEXT: ret 376 %a = insertelement <16 x i16> poison, i16 0, i32 0 377 %b = shufflevector <16 x i16> %a, <16 x i16> poison, <16 x i32> zeroinitializer 378 store <16 x i16> %b, ptr %x 379 ret void 380} 381 382define void @splat_zero_v8i32(ptr %x) { 383; LMULMAX8-LABEL: splat_zero_v8i32: 384; LMULMAX8: # %bb.0: 385; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma 386; LMULMAX8-NEXT: vmv.v.i v8, 0 387; LMULMAX8-NEXT: vse32.v v8, (a0) 388; LMULMAX8-NEXT: ret 389; 390; LMULMAX2-LABEL: splat_zero_v8i32: 391; LMULMAX2: # %bb.0: 392; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma 393; LMULMAX2-NEXT: vmv.v.i v8, 0 394; LMULMAX2-NEXT: vse32.v v8, (a0) 395; LMULMAX2-NEXT: ret 396; 397; LMULMAX1-LABEL: splat_zero_v8i32: 398; LMULMAX1: # %bb.0: 399; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma 400; LMULMAX1-NEXT: vmv.v.i v8, 0 401; LMULMAX1-NEXT: vse32.v v8, (a0) 402; LMULMAX1-NEXT: addi a0, a0, 16 403; LMULMAX1-NEXT: vse32.v v8, (a0) 404; LMULMAX1-NEXT: ret 405 %a = insertelement <8 x i32> poison, i32 0, i32 0 406 %b = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> zeroinitializer 407 store <8 x i32> %b, ptr %x 408 ret void 409} 410 411define void @splat_zero_v4i64(ptr %x) { 412; LMULMAX8-LABEL: splat_zero_v4i64: 413; LMULMAX8: # %bb.0: 414; LMULMAX8-NEXT: vsetivli zero, 4, e64, m2, ta, ma 415; LMULMAX8-NEXT: vmv.v.i v8, 0 416; LMULMAX8-NEXT: vse64.v v8, (a0) 417; LMULMAX8-NEXT: ret 418; 419; LMULMAX2-LABEL: splat_zero_v4i64: 420; LMULMAX2: # %bb.0: 421; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma 422; LMULMAX2-NEXT: vmv.v.i v8, 0 423; LMULMAX2-NEXT: vse64.v v8, (a0) 424; LMULMAX2-NEXT: ret 425; 426; LMULMAX1-RV32-LABEL: splat_zero_v4i64: 427; LMULMAX1-RV32: # %bb.0: 428; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 429; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0 430; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) 431; LMULMAX1-RV32-NEXT: addi a0, a0, 16 432; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) 433; LMULMAX1-RV32-NEXT: ret 434; 435; LMULMAX1-RV64-LABEL: splat_zero_v4i64: 436; LMULMAX1-RV64: # %bb.0: 437; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 438; LMULMAX1-RV64-NEXT: vmv.v.i v8, 0 439; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) 440; LMULMAX1-RV64-NEXT: addi a0, a0, 16 441; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) 442; LMULMAX1-RV64-NEXT: ret 443 %a = insertelement <4 x i64> poison, i64 0, i32 0 444 %b = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> zeroinitializer 445 store <4 x i64> %b, ptr %x 446 ret void 447} 448 449define void @splat_zero_v2i16(ptr %p) { 450; CHECK-LABEL: splat_zero_v2i16: 451; CHECK: # %bb.0: 452; CHECK-NEXT: sw zero, 0(a0) 453; CHECK-NEXT: ret 454 store <2 x i16> zeroinitializer, ptr %p 455 ret void 456} 457 458define void @splat_zero_v2i16_unaligned(ptr %p) { 459; CHECK-LABEL: splat_zero_v2i16_unaligned: 460; CHECK: # %bb.0: 461; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 462; CHECK-NEXT: vmv.v.i v8, 0 463; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 464; CHECK-NEXT: vse8.v v8, (a0) 465; CHECK-NEXT: ret 466 store <2 x i16> zeroinitializer, ptr %p, align 1 467 ret void 468} 469 470define void @splat_zero_v4i16(ptr %p) { 471; LMULMAX8-RV32-LABEL: splat_zero_v4i16: 472; LMULMAX8-RV32: # %bb.0: 473; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 474; LMULMAX8-RV32-NEXT: vmv.v.i v8, 0 475; LMULMAX8-RV32-NEXT: vse16.v v8, (a0) 476; LMULMAX8-RV32-NEXT: ret 477; 478; LMULMAX2-RV32-LABEL: splat_zero_v4i16: 479; LMULMAX2-RV32: # %bb.0: 480; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 481; LMULMAX2-RV32-NEXT: vmv.v.i v8, 0 482; LMULMAX2-RV32-NEXT: vse16.v v8, (a0) 483; LMULMAX2-RV32-NEXT: ret 484; 485; LMULMAX1-RV32-LABEL: splat_zero_v4i16: 486; LMULMAX1-RV32: # %bb.0: 487; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 488; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0 489; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) 490; LMULMAX1-RV32-NEXT: ret 491; 492; LMULMAX8-RV64-LABEL: splat_zero_v4i16: 493; LMULMAX8-RV64: # %bb.0: 494; LMULMAX8-RV64-NEXT: sd zero, 0(a0) 495; LMULMAX8-RV64-NEXT: ret 496; 497; LMULMAX2-RV64-LABEL: splat_zero_v4i16: 498; LMULMAX2-RV64: # %bb.0: 499; LMULMAX2-RV64-NEXT: sd zero, 0(a0) 500; LMULMAX2-RV64-NEXT: ret 501; 502; LMULMAX1-RV64-LABEL: splat_zero_v4i16: 503; LMULMAX1-RV64: # %bb.0: 504; LMULMAX1-RV64-NEXT: sd zero, 0(a0) 505; LMULMAX1-RV64-NEXT: ret 506 store <4 x i16> zeroinitializer, ptr %p 507 ret void 508} 509 510define void @splat_zero_v2i32(ptr %p) { 511; LMULMAX8-RV32-LABEL: splat_zero_v2i32: 512; LMULMAX8-RV32: # %bb.0: 513; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 514; LMULMAX8-RV32-NEXT: vmv.v.i v8, 0 515; LMULMAX8-RV32-NEXT: vse32.v v8, (a0) 516; LMULMAX8-RV32-NEXT: ret 517; 518; LMULMAX2-RV32-LABEL: splat_zero_v2i32: 519; LMULMAX2-RV32: # %bb.0: 520; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 521; LMULMAX2-RV32-NEXT: vmv.v.i v8, 0 522; LMULMAX2-RV32-NEXT: vse32.v v8, (a0) 523; LMULMAX2-RV32-NEXT: ret 524; 525; LMULMAX1-RV32-LABEL: splat_zero_v2i32: 526; LMULMAX1-RV32: # %bb.0: 527; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 528; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0 529; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) 530; LMULMAX1-RV32-NEXT: ret 531; 532; LMULMAX8-RV64-LABEL: splat_zero_v2i32: 533; LMULMAX8-RV64: # %bb.0: 534; LMULMAX8-RV64-NEXT: sd zero, 0(a0) 535; LMULMAX8-RV64-NEXT: ret 536; 537; LMULMAX2-RV64-LABEL: splat_zero_v2i32: 538; LMULMAX2-RV64: # %bb.0: 539; LMULMAX2-RV64-NEXT: sd zero, 0(a0) 540; LMULMAX2-RV64-NEXT: ret 541; 542; LMULMAX1-RV64-LABEL: splat_zero_v2i32: 543; LMULMAX1-RV64: # %bb.0: 544; LMULMAX1-RV64-NEXT: sd zero, 0(a0) 545; LMULMAX1-RV64-NEXT: ret 546 store <2 x i32> zeroinitializer, ptr %p 547 ret void 548} 549 550; Not a power of two and requires more than two scalar stores. 551define void @splat_zero_v7i16(ptr %p) { 552; LMULMAX8-RV32-LABEL: splat_zero_v7i16: 553; LMULMAX8-RV32: # %bb.0: 554; LMULMAX8-RV32-NEXT: sh zero, 12(a0) 555; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 556; LMULMAX8-RV32-NEXT: vmv.v.i v8, 0 557; LMULMAX8-RV32-NEXT: vse16.v v8, (a0) 558; LMULMAX8-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 559; LMULMAX8-RV32-NEXT: vmv.v.i v8, 0 560; LMULMAX8-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma 561; LMULMAX8-RV32-NEXT: vslidedown.vi v8, v8, 2 562; LMULMAX8-RV32-NEXT: addi a0, a0, 8 563; LMULMAX8-RV32-NEXT: vse32.v v8, (a0) 564; LMULMAX8-RV32-NEXT: ret 565; 566; LMULMAX2-RV32-LABEL: splat_zero_v7i16: 567; LMULMAX2-RV32: # %bb.0: 568; LMULMAX2-RV32-NEXT: sh zero, 12(a0) 569; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 570; LMULMAX2-RV32-NEXT: vmv.v.i v8, 0 571; LMULMAX2-RV32-NEXT: vse16.v v8, (a0) 572; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 573; LMULMAX2-RV32-NEXT: vmv.v.i v8, 0 574; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma 575; LMULMAX2-RV32-NEXT: vslidedown.vi v8, v8, 2 576; LMULMAX2-RV32-NEXT: addi a0, a0, 8 577; LMULMAX2-RV32-NEXT: vse32.v v8, (a0) 578; LMULMAX2-RV32-NEXT: ret 579; 580; LMULMAX1-RV32-LABEL: splat_zero_v7i16: 581; LMULMAX1-RV32: # %bb.0: 582; LMULMAX1-RV32-NEXT: sh zero, 12(a0) 583; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 584; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0 585; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) 586; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 587; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0 588; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma 589; LMULMAX1-RV32-NEXT: vslidedown.vi v8, v8, 2 590; LMULMAX1-RV32-NEXT: addi a0, a0, 8 591; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) 592; LMULMAX1-RV32-NEXT: ret 593; 594; LMULMAX8-RV64-LABEL: splat_zero_v7i16: 595; LMULMAX8-RV64: # %bb.0: 596; LMULMAX8-RV64-NEXT: sh zero, 12(a0) 597; LMULMAX8-RV64-NEXT: sw zero, 8(a0) 598; LMULMAX8-RV64-NEXT: sd zero, 0(a0) 599; LMULMAX8-RV64-NEXT: ret 600; 601; LMULMAX2-RV64-LABEL: splat_zero_v7i16: 602; LMULMAX2-RV64: # %bb.0: 603; LMULMAX2-RV64-NEXT: sh zero, 12(a0) 604; LMULMAX2-RV64-NEXT: sw zero, 8(a0) 605; LMULMAX2-RV64-NEXT: sd zero, 0(a0) 606; LMULMAX2-RV64-NEXT: ret 607; 608; LMULMAX1-RV64-LABEL: splat_zero_v7i16: 609; LMULMAX1-RV64: # %bb.0: 610; LMULMAX1-RV64-NEXT: sh zero, 12(a0) 611; LMULMAX1-RV64-NEXT: sw zero, 8(a0) 612; LMULMAX1-RV64-NEXT: sd zero, 0(a0) 613; LMULMAX1-RV64-NEXT: ret 614 store <7 x i16> zeroinitializer, ptr %p 615 ret void 616} 617 618define void @splat_allones_v16i8(ptr %x) { 619; CHECK-LABEL: splat_allones_v16i8: 620; CHECK: # %bb.0: 621; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 622; CHECK-NEXT: vmv.v.i v8, -1 623; CHECK-NEXT: vse8.v v8, (a0) 624; CHECK-NEXT: ret 625 %a = insertelement <16 x i8> poison, i8 -1, i32 0 626 %b = shufflevector <16 x i8> %a, <16 x i8> poison, <16 x i32> zeroinitializer 627 store <16 x i8> %b, ptr %x 628 ret void 629} 630 631define void @splat_allones_v8i16(ptr %x) { 632; CHECK-LABEL: splat_allones_v8i16: 633; CHECK: # %bb.0: 634; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 635; CHECK-NEXT: vmv.v.i v8, -1 636; CHECK-NEXT: vse16.v v8, (a0) 637; CHECK-NEXT: ret 638 %a = insertelement <8 x i16> poison, i16 -1, i32 0 639 %b = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> zeroinitializer 640 store <8 x i16> %b, ptr %x 641 ret void 642} 643 644define void @splat_allones_v4i32(ptr %x) { 645; CHECK-LABEL: splat_allones_v4i32: 646; CHECK: # %bb.0: 647; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 648; CHECK-NEXT: vmv.v.i v8, -1 649; CHECK-NEXT: vse32.v v8, (a0) 650; CHECK-NEXT: ret 651 %a = insertelement <4 x i32> poison, i32 -1, i32 0 652 %b = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> zeroinitializer 653 store <4 x i32> %b, ptr %x 654 ret void 655} 656 657define void @splat_allones_v2i64(ptr %x) { 658; CHECK-LABEL: splat_allones_v2i64: 659; CHECK: # %bb.0: 660; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 661; CHECK-NEXT: vmv.v.i v8, -1 662; CHECK-NEXT: vse64.v v8, (a0) 663; CHECK-NEXT: ret 664 %a = insertelement <2 x i64> poison, i64 -1, i32 0 665 %b = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> zeroinitializer 666 store <2 x i64> %b, ptr %x 667 ret void 668} 669 670define void @splat_allones_v32i8(ptr %x) { 671; LMULMAX8-LABEL: splat_allones_v32i8: 672; LMULMAX8: # %bb.0: 673; LMULMAX8-NEXT: li a1, 32 674; LMULMAX8-NEXT: vsetvli zero, a1, e8, m2, ta, ma 675; LMULMAX8-NEXT: vmv.v.i v8, -1 676; LMULMAX8-NEXT: vse8.v v8, (a0) 677; LMULMAX8-NEXT: ret 678; 679; LMULMAX2-LABEL: splat_allones_v32i8: 680; LMULMAX2: # %bb.0: 681; LMULMAX2-NEXT: li a1, 32 682; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma 683; LMULMAX2-NEXT: vmv.v.i v8, -1 684; LMULMAX2-NEXT: vse8.v v8, (a0) 685; LMULMAX2-NEXT: ret 686; 687; LMULMAX1-LABEL: splat_allones_v32i8: 688; LMULMAX1: # %bb.0: 689; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma 690; LMULMAX1-NEXT: vmv.v.i v8, -1 691; LMULMAX1-NEXT: vse8.v v8, (a0) 692; LMULMAX1-NEXT: addi a0, a0, 16 693; LMULMAX1-NEXT: vse8.v v8, (a0) 694; LMULMAX1-NEXT: ret 695 %a = insertelement <32 x i8> poison, i8 -1, i32 0 696 %b = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> zeroinitializer 697 store <32 x i8> %b, ptr %x 698 ret void 699} 700 701define void @splat_allones_v16i16(ptr %x) { 702; LMULMAX8-LABEL: splat_allones_v16i16: 703; LMULMAX8: # %bb.0: 704; LMULMAX8-NEXT: vsetivli zero, 16, e16, m2, ta, ma 705; LMULMAX8-NEXT: vmv.v.i v8, -1 706; LMULMAX8-NEXT: vse16.v v8, (a0) 707; LMULMAX8-NEXT: ret 708; 709; LMULMAX2-LABEL: splat_allones_v16i16: 710; LMULMAX2: # %bb.0: 711; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma 712; LMULMAX2-NEXT: vmv.v.i v8, -1 713; LMULMAX2-NEXT: vse16.v v8, (a0) 714; LMULMAX2-NEXT: ret 715; 716; LMULMAX1-LABEL: splat_allones_v16i16: 717; LMULMAX1: # %bb.0: 718; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma 719; LMULMAX1-NEXT: vmv.v.i v8, -1 720; LMULMAX1-NEXT: vse16.v v8, (a0) 721; LMULMAX1-NEXT: addi a0, a0, 16 722; LMULMAX1-NEXT: vse16.v v8, (a0) 723; LMULMAX1-NEXT: ret 724 %a = insertelement <16 x i16> poison, i16 -1, i32 0 725 %b = shufflevector <16 x i16> %a, <16 x i16> poison, <16 x i32> zeroinitializer 726 store <16 x i16> %b, ptr %x 727 ret void 728} 729 730define void @splat_allones_v8i32(ptr %x) { 731; LMULMAX8-LABEL: splat_allones_v8i32: 732; LMULMAX8: # %bb.0: 733; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma 734; LMULMAX8-NEXT: vmv.v.i v8, -1 735; LMULMAX8-NEXT: vse32.v v8, (a0) 736; LMULMAX8-NEXT: ret 737; 738; LMULMAX2-LABEL: splat_allones_v8i32: 739; LMULMAX2: # %bb.0: 740; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma 741; LMULMAX2-NEXT: vmv.v.i v8, -1 742; LMULMAX2-NEXT: vse32.v v8, (a0) 743; LMULMAX2-NEXT: ret 744; 745; LMULMAX1-LABEL: splat_allones_v8i32: 746; LMULMAX1: # %bb.0: 747; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma 748; LMULMAX1-NEXT: vmv.v.i v8, -1 749; LMULMAX1-NEXT: vse32.v v8, (a0) 750; LMULMAX1-NEXT: addi a0, a0, 16 751; LMULMAX1-NEXT: vse32.v v8, (a0) 752; LMULMAX1-NEXT: ret 753 %a = insertelement <8 x i32> poison, i32 -1, i32 0 754 %b = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> zeroinitializer 755 store <8 x i32> %b, ptr %x 756 ret void 757} 758 759define void @splat_allones_v4i64(ptr %x) { 760; LMULMAX8-LABEL: splat_allones_v4i64: 761; LMULMAX8: # %bb.0: 762; LMULMAX8-NEXT: vsetivli zero, 4, e64, m2, ta, ma 763; LMULMAX8-NEXT: vmv.v.i v8, -1 764; LMULMAX8-NEXT: vse64.v v8, (a0) 765; LMULMAX8-NEXT: ret 766; 767; LMULMAX2-LABEL: splat_allones_v4i64: 768; LMULMAX2: # %bb.0: 769; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma 770; LMULMAX2-NEXT: vmv.v.i v8, -1 771; LMULMAX2-NEXT: vse64.v v8, (a0) 772; LMULMAX2-NEXT: ret 773; 774; LMULMAX1-RV32-LABEL: splat_allones_v4i64: 775; LMULMAX1-RV32: # %bb.0: 776; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 777; LMULMAX1-RV32-NEXT: vmv.v.i v8, -1 778; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) 779; LMULMAX1-RV32-NEXT: addi a0, a0, 16 780; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) 781; LMULMAX1-RV32-NEXT: ret 782; 783; LMULMAX1-RV64-LABEL: splat_allones_v4i64: 784; LMULMAX1-RV64: # %bb.0: 785; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 786; LMULMAX1-RV64-NEXT: vmv.v.i v8, -1 787; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) 788; LMULMAX1-RV64-NEXT: addi a0, a0, 16 789; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) 790; LMULMAX1-RV64-NEXT: ret 791 %a = insertelement <4 x i64> poison, i64 -1, i32 0 792 %b = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> zeroinitializer 793 store <4 x i64> %b, ptr %x 794 ret void 795} 796 797; This requires a bitcast on RV32 due to type legalization rewriting the 798; build_vector to v8i32. 799; FIXME: We should prevent this and use the implicit sign extension of vmv.v.x 800; with SEW=64 on RV32. 801define void @splat_allones_with_use_v4i64(ptr %x) { 802; LMULMAX8-LABEL: splat_allones_with_use_v4i64: 803; LMULMAX8: # %bb.0: 804; LMULMAX8-NEXT: vsetivli zero, 4, e64, m2, ta, ma 805; LMULMAX8-NEXT: vle64.v v8, (a0) 806; LMULMAX8-NEXT: vadd.vi v8, v8, -1 807; LMULMAX8-NEXT: vse64.v v8, (a0) 808; LMULMAX8-NEXT: ret 809; 810; LMULMAX2-LABEL: splat_allones_with_use_v4i64: 811; LMULMAX2: # %bb.0: 812; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma 813; LMULMAX2-NEXT: vle64.v v8, (a0) 814; LMULMAX2-NEXT: vadd.vi v8, v8, -1 815; LMULMAX2-NEXT: vse64.v v8, (a0) 816; LMULMAX2-NEXT: ret 817; 818; LMULMAX1-RV32-LABEL: splat_allones_with_use_v4i64: 819; LMULMAX1-RV32: # %bb.0: 820; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 821; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) 822; LMULMAX1-RV32-NEXT: addi a1, a0, 16 823; LMULMAX1-RV32-NEXT: vle64.v v9, (a1) 824; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 825; LMULMAX1-RV32-NEXT: vmv.v.i v10, -1 826; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 827; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10 828; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10 829; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) 830; LMULMAX1-RV32-NEXT: vse64.v v9, (a1) 831; LMULMAX1-RV32-NEXT: ret 832; 833; LMULMAX1-RV64-LABEL: splat_allones_with_use_v4i64: 834; LMULMAX1-RV64: # %bb.0: 835; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 836; LMULMAX1-RV64-NEXT: addi a1, a0, 16 837; LMULMAX1-RV64-NEXT: vle64.v v8, (a1) 838; LMULMAX1-RV64-NEXT: vle64.v v9, (a0) 839; LMULMAX1-RV64-NEXT: vadd.vi v8, v8, -1 840; LMULMAX1-RV64-NEXT: vadd.vi v9, v9, -1 841; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) 842; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) 843; LMULMAX1-RV64-NEXT: ret 844 %a = load <4 x i64>, ptr %x 845 %b = add <4 x i64> %a, <i64 -1, i64 -1, i64 -1, i64 -1> 846 store <4 x i64> %b, ptr %x 847 ret void 848} 849 850; This test used to crash at LMUL=8 when inserting a v16i64 subvector into 851; nxv8i64 at index 0: the v16i64 type was used to get the LMUL, the size of 852; which exceeded maximum-expected size of 512. The scalable container type of 853; nxv8i64 should have been used instead. 854define void @vadd_vx_v16i64(ptr %a, i64 %b, ptr %c) { 855; LMULMAX8-RV32-LABEL: vadd_vx_v16i64: 856; LMULMAX8-RV32: # %bb.0: 857; LMULMAX8-RV32-NEXT: addi sp, sp, -16 858; LMULMAX8-RV32-NEXT: .cfi_def_cfa_offset 16 859; LMULMAX8-RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 860; LMULMAX8-RV32-NEXT: vle64.v v8, (a0) 861; LMULMAX8-RV32-NEXT: sw a2, 12(sp) 862; LMULMAX8-RV32-NEXT: sw a1, 8(sp) 863; LMULMAX8-RV32-NEXT: addi a0, sp, 8 864; LMULMAX8-RV32-NEXT: vlse64.v v16, (a0), zero 865; LMULMAX8-RV32-NEXT: vadd.vv v8, v8, v16 866; LMULMAX8-RV32-NEXT: vse64.v v8, (a3) 867; LMULMAX8-RV32-NEXT: addi sp, sp, 16 868; LMULMAX8-RV32-NEXT: ret 869; 870; LMULMAX2-RV32-LABEL: vadd_vx_v16i64: 871; LMULMAX2-RV32: # %bb.0: 872; LMULMAX2-RV32-NEXT: addi a4, a0, 64 873; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma 874; LMULMAX2-RV32-NEXT: vle64.v v8, (a4) 875; LMULMAX2-RV32-NEXT: addi a4, a0, 96 876; LMULMAX2-RV32-NEXT: vle64.v v10, (a4) 877; LMULMAX2-RV32-NEXT: vle64.v v12, (a0) 878; LMULMAX2-RV32-NEXT: addi a0, a0, 32 879; LMULMAX2-RV32-NEXT: vle64.v v14, (a0) 880; LMULMAX2-RV32-NEXT: li a0, 85 881; LMULMAX2-RV32-NEXT: vmv.s.x v0, a0 882; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 883; LMULMAX2-RV32-NEXT: vmv.v.x v16, a2 884; LMULMAX2-RV32-NEXT: vmerge.vxm v16, v16, a1, v0 885; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma 886; LMULMAX2-RV32-NEXT: vadd.vv v14, v14, v16 887; LMULMAX2-RV32-NEXT: vadd.vv v12, v12, v16 888; LMULMAX2-RV32-NEXT: vadd.vv v10, v10, v16 889; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v16 890; LMULMAX2-RV32-NEXT: addi a0, a3, 64 891; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) 892; LMULMAX2-RV32-NEXT: addi a0, a3, 96 893; LMULMAX2-RV32-NEXT: vse64.v v10, (a0) 894; LMULMAX2-RV32-NEXT: vse64.v v12, (a3) 895; LMULMAX2-RV32-NEXT: addi a0, a3, 32 896; LMULMAX2-RV32-NEXT: vse64.v v14, (a0) 897; LMULMAX2-RV32-NEXT: ret 898; 899; LMULMAX1-RV32-LABEL: vadd_vx_v16i64: 900; LMULMAX1-RV32: # %bb.0: 901; LMULMAX1-RV32-NEXT: addi a4, a0, 96 902; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 903; LMULMAX1-RV32-NEXT: vle64.v v8, (a4) 904; LMULMAX1-RV32-NEXT: addi a4, a0, 112 905; LMULMAX1-RV32-NEXT: vle64.v v9, (a4) 906; LMULMAX1-RV32-NEXT: addi a4, a0, 64 907; LMULMAX1-RV32-NEXT: vle64.v v10, (a4) 908; LMULMAX1-RV32-NEXT: addi a4, a0, 80 909; LMULMAX1-RV32-NEXT: vle64.v v11, (a4) 910; LMULMAX1-RV32-NEXT: addi a4, a0, 32 911; LMULMAX1-RV32-NEXT: vle64.v v12, (a4) 912; LMULMAX1-RV32-NEXT: addi a4, a0, 48 913; LMULMAX1-RV32-NEXT: vle64.v v13, (a4) 914; LMULMAX1-RV32-NEXT: vle64.v v14, (a0) 915; LMULMAX1-RV32-NEXT: addi a0, a0, 16 916; LMULMAX1-RV32-NEXT: vle64.v v15, (a0) 917; LMULMAX1-RV32-NEXT: li a0, 5 918; LMULMAX1-RV32-NEXT: vmv.s.x v0, a0 919; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 920; LMULMAX1-RV32-NEXT: vmv.v.x v16, a2 921; LMULMAX1-RV32-NEXT: vmerge.vxm v16, v16, a1, v0 922; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 923; LMULMAX1-RV32-NEXT: vadd.vv v15, v15, v16 924; LMULMAX1-RV32-NEXT: vadd.vv v14, v14, v16 925; LMULMAX1-RV32-NEXT: vadd.vv v13, v13, v16 926; LMULMAX1-RV32-NEXT: vadd.vv v12, v12, v16 927; LMULMAX1-RV32-NEXT: vadd.vv v11, v11, v16 928; LMULMAX1-RV32-NEXT: vadd.vv v10, v10, v16 929; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v16 930; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v16 931; LMULMAX1-RV32-NEXT: addi a0, a3, 96 932; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) 933; LMULMAX1-RV32-NEXT: addi a0, a3, 112 934; LMULMAX1-RV32-NEXT: vse64.v v9, (a0) 935; LMULMAX1-RV32-NEXT: addi a0, a3, 64 936; LMULMAX1-RV32-NEXT: vse64.v v10, (a0) 937; LMULMAX1-RV32-NEXT: addi a0, a3, 80 938; LMULMAX1-RV32-NEXT: vse64.v v11, (a0) 939; LMULMAX1-RV32-NEXT: addi a0, a3, 32 940; LMULMAX1-RV32-NEXT: vse64.v v12, (a0) 941; LMULMAX1-RV32-NEXT: addi a0, a3, 48 942; LMULMAX1-RV32-NEXT: vse64.v v13, (a0) 943; LMULMAX1-RV32-NEXT: vse64.v v14, (a3) 944; LMULMAX1-RV32-NEXT: addi a3, a3, 16 945; LMULMAX1-RV32-NEXT: vse64.v v15, (a3) 946; LMULMAX1-RV32-NEXT: ret 947; 948; LMULMAX8-RV64-LABEL: vadd_vx_v16i64: 949; LMULMAX8-RV64: # %bb.0: 950; LMULMAX8-RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 951; LMULMAX8-RV64-NEXT: vle64.v v8, (a0) 952; LMULMAX8-RV64-NEXT: vadd.vx v8, v8, a1 953; LMULMAX8-RV64-NEXT: vse64.v v8, (a2) 954; LMULMAX8-RV64-NEXT: ret 955; 956; LMULMAX2-RV64-LABEL: vadd_vx_v16i64: 957; LMULMAX2-RV64: # %bb.0: 958; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma 959; LMULMAX2-RV64-NEXT: addi a3, a0, 96 960; LMULMAX2-RV64-NEXT: vle64.v v8, (a3) 961; LMULMAX2-RV64-NEXT: addi a3, a0, 32 962; LMULMAX2-RV64-NEXT: vle64.v v10, (a3) 963; LMULMAX2-RV64-NEXT: addi a3, a0, 64 964; LMULMAX2-RV64-NEXT: vle64.v v12, (a3) 965; LMULMAX2-RV64-NEXT: vle64.v v14, (a0) 966; LMULMAX2-RV64-NEXT: vadd.vx v10, v10, a1 967; LMULMAX2-RV64-NEXT: vadd.vx v8, v8, a1 968; LMULMAX2-RV64-NEXT: vadd.vx v12, v12, a1 969; LMULMAX2-RV64-NEXT: vadd.vx v14, v14, a1 970; LMULMAX2-RV64-NEXT: vse64.v v14, (a2) 971; LMULMAX2-RV64-NEXT: addi a0, a2, 64 972; LMULMAX2-RV64-NEXT: vse64.v v12, (a0) 973; LMULMAX2-RV64-NEXT: addi a0, a2, 96 974; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) 975; LMULMAX2-RV64-NEXT: addi a0, a2, 32 976; LMULMAX2-RV64-NEXT: vse64.v v10, (a0) 977; LMULMAX2-RV64-NEXT: ret 978; 979; LMULMAX1-RV64-LABEL: vadd_vx_v16i64: 980; LMULMAX1-RV64: # %bb.0: 981; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 982; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) 983; LMULMAX1-RV64-NEXT: addi a3, a0, 96 984; LMULMAX1-RV64-NEXT: vle64.v v9, (a3) 985; LMULMAX1-RV64-NEXT: addi a3, a0, 112 986; LMULMAX1-RV64-NEXT: vle64.v v10, (a3) 987; LMULMAX1-RV64-NEXT: addi a3, a0, 64 988; LMULMAX1-RV64-NEXT: vle64.v v11, (a3) 989; LMULMAX1-RV64-NEXT: addi a3, a0, 48 990; LMULMAX1-RV64-NEXT: vle64.v v12, (a3) 991; LMULMAX1-RV64-NEXT: addi a3, a0, 16 992; LMULMAX1-RV64-NEXT: vle64.v v13, (a3) 993; LMULMAX1-RV64-NEXT: addi a3, a0, 80 994; LMULMAX1-RV64-NEXT: addi a0, a0, 32 995; LMULMAX1-RV64-NEXT: vle64.v v14, (a0) 996; LMULMAX1-RV64-NEXT: vle64.v v15, (a3) 997; LMULMAX1-RV64-NEXT: vadd.vx v13, v13, a1 998; LMULMAX1-RV64-NEXT: vadd.vx v12, v12, a1 999; LMULMAX1-RV64-NEXT: vadd.vx v14, v14, a1 1000; LMULMAX1-RV64-NEXT: vadd.vx v15, v15, a1 1001; LMULMAX1-RV64-NEXT: vadd.vx v11, v11, a1 1002; LMULMAX1-RV64-NEXT: vadd.vx v10, v10, a1 1003; LMULMAX1-RV64-NEXT: vadd.vx v9, v9, a1 1004; LMULMAX1-RV64-NEXT: vadd.vx v8, v8, a1 1005; LMULMAX1-RV64-NEXT: vse64.v v8, (a2) 1006; LMULMAX1-RV64-NEXT: addi a0, a2, 96 1007; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) 1008; LMULMAX1-RV64-NEXT: addi a0, a2, 112 1009; LMULMAX1-RV64-NEXT: vse64.v v10, (a0) 1010; LMULMAX1-RV64-NEXT: addi a0, a2, 64 1011; LMULMAX1-RV64-NEXT: vse64.v v11, (a0) 1012; LMULMAX1-RV64-NEXT: addi a0, a2, 80 1013; LMULMAX1-RV64-NEXT: vse64.v v15, (a0) 1014; LMULMAX1-RV64-NEXT: addi a0, a2, 32 1015; LMULMAX1-RV64-NEXT: vse64.v v14, (a0) 1016; LMULMAX1-RV64-NEXT: addi a0, a2, 48 1017; LMULMAX1-RV64-NEXT: vse64.v v12, (a0) 1018; LMULMAX1-RV64-NEXT: addi a2, a2, 16 1019; LMULMAX1-RV64-NEXT: vse64.v v13, (a2) 1020; LMULMAX1-RV64-NEXT: ret 1021 %va = load <16 x i64>, ptr %a 1022 %head = insertelement <16 x i64> poison, i64 %b, i32 0 1023 %splat = shufflevector <16 x i64> %head, <16 x i64> poison, <16 x i32> zeroinitializer 1024 %vc = add <16 x i64> %va, %splat 1025 store <16 x i64> %vc, ptr %c 1026 ret void 1027} 1028