1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8-RV32 3; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32 4; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32 5; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8-RV64 6; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64 7; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64 8 9define void @splat_v16i8(ptr %x, i8 %y) { 10; CHECK-LABEL: splat_v16i8: 11; CHECK: # %bb.0: 12; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 13; CHECK-NEXT: vmv.v.x v8, a1 14; CHECK-NEXT: vse8.v v8, (a0) 15; CHECK-NEXT: ret 16 %a = insertelement <16 x i8> poison, i8 %y, i32 0 17 %b = shufflevector <16 x i8> %a, <16 x i8> poison, <16 x i32> zeroinitializer 18 store <16 x i8> %b, ptr %x 19 ret void 20} 21 22define void @splat_v8i16(ptr %x, i16 %y) { 23; CHECK-LABEL: splat_v8i16: 24; CHECK: # %bb.0: 25; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 26; CHECK-NEXT: vmv.v.x v8, a1 27; CHECK-NEXT: vse16.v v8, (a0) 28; CHECK-NEXT: ret 29 %a = insertelement <8 x i16> poison, i16 %y, i32 0 30 %b = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> zeroinitializer 31 store <8 x i16> %b, ptr %x 32 ret void 33} 34 35define void @splat_v4i32(ptr %x, i32 %y) { 36; CHECK-LABEL: splat_v4i32: 37; CHECK: # %bb.0: 38; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 39; CHECK-NEXT: vmv.v.x v8, a1 40; CHECK-NEXT: vse32.v v8, (a0) 41; CHECK-NEXT: ret 42 %a = insertelement <4 x i32> poison, i32 %y, i32 0 43 %b = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> zeroinitializer 44 store <4 x i32> %b, ptr %x 45 ret void 46} 47 48define void @splat_v2i64(ptr %x, i64 %y) { 49; LMULMAX8-RV32-LABEL: splat_v2i64: 50; LMULMAX8-RV32: # %bb.0: 51; LMULMAX8-RV32-NEXT: addi sp, sp, -16 52; LMULMAX8-RV32-NEXT: .cfi_def_cfa_offset 16 53; LMULMAX8-RV32-NEXT: sw a2, 12(sp) 54; LMULMAX8-RV32-NEXT: sw a1, 8(sp) 55; LMULMAX8-RV32-NEXT: addi a1, sp, 8 56; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 57; LMULMAX8-RV32-NEXT: vlse64.v v8, (a1), zero 58; LMULMAX8-RV32-NEXT: vse64.v v8, (a0) 59; LMULMAX8-RV32-NEXT: addi sp, sp, 16 60; LMULMAX8-RV32-NEXT: ret 61; 62; LMULMAX2-RV32-LABEL: splat_v2i64: 63; LMULMAX2-RV32: # %bb.0: 64; LMULMAX2-RV32-NEXT: addi sp, sp, -16 65; LMULMAX2-RV32-NEXT: .cfi_def_cfa_offset 16 66; LMULMAX2-RV32-NEXT: sw a2, 12(sp) 67; LMULMAX2-RV32-NEXT: sw a1, 8(sp) 68; LMULMAX2-RV32-NEXT: addi a1, sp, 8 69; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 70; LMULMAX2-RV32-NEXT: vlse64.v v8, (a1), zero 71; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) 72; LMULMAX2-RV32-NEXT: addi sp, sp, 16 73; LMULMAX2-RV32-NEXT: ret 74; 75; LMULMAX1-RV32-LABEL: splat_v2i64: 76; LMULMAX1-RV32: # %bb.0: 77; LMULMAX1-RV32-NEXT: addi sp, sp, -16 78; LMULMAX1-RV32-NEXT: .cfi_def_cfa_offset 16 79; LMULMAX1-RV32-NEXT: sw a2, 12(sp) 80; LMULMAX1-RV32-NEXT: sw a1, 8(sp) 81; LMULMAX1-RV32-NEXT: addi a1, sp, 8 82; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 83; LMULMAX1-RV32-NEXT: vlse64.v v8, (a1), zero 84; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) 85; LMULMAX1-RV32-NEXT: addi sp, sp, 16 86; LMULMAX1-RV32-NEXT: ret 87; 88; LMULMAX8-RV64-LABEL: splat_v2i64: 89; LMULMAX8-RV64: # %bb.0: 90; LMULMAX8-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 91; LMULMAX8-RV64-NEXT: vmv.v.x v8, a1 92; LMULMAX8-RV64-NEXT: vse64.v v8, (a0) 93; LMULMAX8-RV64-NEXT: ret 94; 95; LMULMAX2-RV64-LABEL: splat_v2i64: 96; LMULMAX2-RV64: # %bb.0: 97; LMULMAX2-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 98; LMULMAX2-RV64-NEXT: vmv.v.x v8, a1 99; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) 100; LMULMAX2-RV64-NEXT: ret 101; 102; LMULMAX1-RV64-LABEL: splat_v2i64: 103; LMULMAX1-RV64: # %bb.0: 104; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 105; LMULMAX1-RV64-NEXT: vmv.v.x v8, a1 106; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) 107; LMULMAX1-RV64-NEXT: ret 108 %a = insertelement <2 x i64> poison, i64 %y, i32 0 109 %b = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> zeroinitializer 110 store <2 x i64> %b, ptr %x 111 ret void 112} 113 114define void @splat_v32i8(ptr %x, i8 %y) { 115; LMULMAX8-LABEL: splat_v32i8: 116; LMULMAX8: # %bb.0: 117; LMULMAX8-NEXT: li a2, 32 118; LMULMAX8-NEXT: vsetvli zero, a2, e8, m2, ta, ma 119; LMULMAX8-NEXT: vmv.v.x v8, a1 120; LMULMAX8-NEXT: vse8.v v8, (a0) 121; LMULMAX8-NEXT: ret 122; 123; LMULMAX2-LABEL: splat_v32i8: 124; LMULMAX2: # %bb.0: 125; LMULMAX2-NEXT: li a2, 32 126; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma 127; LMULMAX2-NEXT: vmv.v.x v8, a1 128; LMULMAX2-NEXT: vse8.v v8, (a0) 129; LMULMAX2-NEXT: ret 130; 131; LMULMAX1-LABEL: splat_v32i8: 132; LMULMAX1: # %bb.0: 133; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma 134; LMULMAX1-NEXT: vmv.v.x v8, a1 135; LMULMAX1-NEXT: addi a1, a0, 16 136; LMULMAX1-NEXT: vse8.v v8, (a1) 137; LMULMAX1-NEXT: vse8.v v8, (a0) 138; LMULMAX1-NEXT: ret 139 %a = insertelement <32 x i8> poison, i8 %y, i32 0 140 %b = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> zeroinitializer 141 store <32 x i8> %b, ptr %x 142 ret void 143} 144 145define void @splat_v16i16(ptr %x, i16 %y) { 146; LMULMAX8-LABEL: splat_v16i16: 147; LMULMAX8: # %bb.0: 148; LMULMAX8-NEXT: vsetivli zero, 16, e16, m2, ta, ma 149; LMULMAX8-NEXT: vmv.v.x v8, a1 150; LMULMAX8-NEXT: vse16.v v8, (a0) 151; LMULMAX8-NEXT: ret 152; 153; LMULMAX2-LABEL: splat_v16i16: 154; LMULMAX2: # %bb.0: 155; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma 156; LMULMAX2-NEXT: vmv.v.x v8, a1 157; LMULMAX2-NEXT: vse16.v v8, (a0) 158; LMULMAX2-NEXT: ret 159; 160; LMULMAX1-LABEL: splat_v16i16: 161; LMULMAX1: # %bb.0: 162; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma 163; LMULMAX1-NEXT: vmv.v.x v8, a1 164; LMULMAX1-NEXT: addi a1, a0, 16 165; LMULMAX1-NEXT: vse16.v v8, (a1) 166; LMULMAX1-NEXT: vse16.v v8, (a0) 167; LMULMAX1-NEXT: ret 168 %a = insertelement <16 x i16> poison, i16 %y, i32 0 169 %b = shufflevector <16 x i16> %a, <16 x i16> poison, <16 x i32> zeroinitializer 170 store <16 x i16> %b, ptr %x 171 ret void 172} 173 174define void @splat_v8i32(ptr %x, i32 %y) { 175; LMULMAX8-LABEL: splat_v8i32: 176; LMULMAX8: # %bb.0: 177; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma 178; LMULMAX8-NEXT: vmv.v.x v8, a1 179; LMULMAX8-NEXT: vse32.v v8, (a0) 180; LMULMAX8-NEXT: ret 181; 182; LMULMAX2-LABEL: splat_v8i32: 183; LMULMAX2: # %bb.0: 184; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma 185; LMULMAX2-NEXT: vmv.v.x v8, a1 186; LMULMAX2-NEXT: vse32.v v8, (a0) 187; LMULMAX2-NEXT: ret 188; 189; LMULMAX1-LABEL: splat_v8i32: 190; LMULMAX1: # %bb.0: 191; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma 192; LMULMAX1-NEXT: vmv.v.x v8, a1 193; LMULMAX1-NEXT: addi a1, a0, 16 194; LMULMAX1-NEXT: vse32.v v8, (a1) 195; LMULMAX1-NEXT: vse32.v v8, (a0) 196; LMULMAX1-NEXT: ret 197 %a = insertelement <8 x i32> poison, i32 %y, i32 0 198 %b = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> zeroinitializer 199 store <8 x i32> %b, ptr %x 200 ret void 201} 202 203define void @splat_v4i64(ptr %x, i64 %y) { 204; LMULMAX8-RV32-LABEL: splat_v4i64: 205; LMULMAX8-RV32: # %bb.0: 206; LMULMAX8-RV32-NEXT: addi sp, sp, -16 207; LMULMAX8-RV32-NEXT: .cfi_def_cfa_offset 16 208; LMULMAX8-RV32-NEXT: sw a2, 12(sp) 209; LMULMAX8-RV32-NEXT: sw a1, 8(sp) 210; LMULMAX8-RV32-NEXT: addi a1, sp, 8 211; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma 212; LMULMAX8-RV32-NEXT: vlse64.v v8, (a1), zero 213; LMULMAX8-RV32-NEXT: vse64.v v8, (a0) 214; LMULMAX8-RV32-NEXT: addi sp, sp, 16 215; LMULMAX8-RV32-NEXT: ret 216; 217; LMULMAX2-RV32-LABEL: splat_v4i64: 218; LMULMAX2-RV32: # %bb.0: 219; LMULMAX2-RV32-NEXT: addi sp, sp, -16 220; LMULMAX2-RV32-NEXT: .cfi_def_cfa_offset 16 221; LMULMAX2-RV32-NEXT: sw a2, 12(sp) 222; LMULMAX2-RV32-NEXT: sw a1, 8(sp) 223; LMULMAX2-RV32-NEXT: addi a1, sp, 8 224; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma 225; LMULMAX2-RV32-NEXT: vlse64.v v8, (a1), zero 226; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) 227; LMULMAX2-RV32-NEXT: addi sp, sp, 16 228; LMULMAX2-RV32-NEXT: ret 229; 230; LMULMAX1-RV32-LABEL: splat_v4i64: 231; LMULMAX1-RV32: # %bb.0: 232; LMULMAX1-RV32-NEXT: li a3, 5 233; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma 234; LMULMAX1-RV32-NEXT: vmv.s.x v0, a3 235; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 236; LMULMAX1-RV32-NEXT: vmv.v.x v8, a2 237; LMULMAX1-RV32-NEXT: vmerge.vxm v8, v8, a1, v0 238; LMULMAX1-RV32-NEXT: addi a1, a0, 16 239; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) 240; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) 241; LMULMAX1-RV32-NEXT: ret 242; 243; LMULMAX8-RV64-LABEL: splat_v4i64: 244; LMULMAX8-RV64: # %bb.0: 245; LMULMAX8-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma 246; LMULMAX8-RV64-NEXT: vmv.v.x v8, a1 247; LMULMAX8-RV64-NEXT: vse64.v v8, (a0) 248; LMULMAX8-RV64-NEXT: ret 249; 250; LMULMAX2-RV64-LABEL: splat_v4i64: 251; LMULMAX2-RV64: # %bb.0: 252; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma 253; LMULMAX2-RV64-NEXT: vmv.v.x v8, a1 254; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) 255; LMULMAX2-RV64-NEXT: ret 256; 257; LMULMAX1-RV64-LABEL: splat_v4i64: 258; LMULMAX1-RV64: # %bb.0: 259; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 260; LMULMAX1-RV64-NEXT: vmv.v.x v8, a1 261; LMULMAX1-RV64-NEXT: addi a1, a0, 16 262; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) 263; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) 264; LMULMAX1-RV64-NEXT: ret 265 %a = insertelement <4 x i64> poison, i64 %y, i32 0 266 %b = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> zeroinitializer 267 store <4 x i64> %b, ptr %x 268 ret void 269} 270 271define void @splat_zero_v16i8(ptr %x) { 272; CHECK-LABEL: splat_zero_v16i8: 273; CHECK: # %bb.0: 274; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 275; CHECK-NEXT: vmv.v.i v8, 0 276; CHECK-NEXT: vse8.v v8, (a0) 277; CHECK-NEXT: ret 278 %a = insertelement <16 x i8> poison, i8 0, i32 0 279 %b = shufflevector <16 x i8> %a, <16 x i8> poison, <16 x i32> zeroinitializer 280 store <16 x i8> %b, ptr %x 281 ret void 282} 283 284define void @splat_zero_v8i16(ptr %x) { 285; CHECK-LABEL: splat_zero_v8i16: 286; CHECK: # %bb.0: 287; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 288; CHECK-NEXT: vmv.v.i v8, 0 289; CHECK-NEXT: vse16.v v8, (a0) 290; CHECK-NEXT: ret 291 %a = insertelement <8 x i16> poison, i16 0, i32 0 292 %b = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> zeroinitializer 293 store <8 x i16> %b, ptr %x 294 ret void 295} 296 297define void @splat_zero_v4i32(ptr %x) { 298; CHECK-LABEL: splat_zero_v4i32: 299; CHECK: # %bb.0: 300; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 301; CHECK-NEXT: vmv.v.i v8, 0 302; CHECK-NEXT: vse32.v v8, (a0) 303; CHECK-NEXT: ret 304 %a = insertelement <4 x i32> poison, i32 0, i32 0 305 %b = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> zeroinitializer 306 store <4 x i32> %b, ptr %x 307 ret void 308} 309 310define void @splat_zero_v2i64(ptr %x) { 311; CHECK-LABEL: splat_zero_v2i64: 312; CHECK: # %bb.0: 313; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 314; CHECK-NEXT: vmv.v.i v8, 0 315; CHECK-NEXT: vse64.v v8, (a0) 316; CHECK-NEXT: ret 317 %a = insertelement <2 x i64> poison, i64 0, i32 0 318 %b = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> zeroinitializer 319 store <2 x i64> %b, ptr %x 320 ret void 321} 322 323define void @splat_zero_v32i8(ptr %x) { 324; LMULMAX8-LABEL: splat_zero_v32i8: 325; LMULMAX8: # %bb.0: 326; LMULMAX8-NEXT: li a1, 32 327; LMULMAX8-NEXT: vsetvli zero, a1, e8, m2, ta, ma 328; LMULMAX8-NEXT: vmv.v.i v8, 0 329; LMULMAX8-NEXT: vse8.v v8, (a0) 330; LMULMAX8-NEXT: ret 331; 332; LMULMAX2-LABEL: splat_zero_v32i8: 333; LMULMAX2: # %bb.0: 334; LMULMAX2-NEXT: li a1, 32 335; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma 336; LMULMAX2-NEXT: vmv.v.i v8, 0 337; LMULMAX2-NEXT: vse8.v v8, (a0) 338; LMULMAX2-NEXT: ret 339; 340; LMULMAX1-LABEL: splat_zero_v32i8: 341; LMULMAX1: # %bb.0: 342; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma 343; LMULMAX1-NEXT: vmv.v.i v8, 0 344; LMULMAX1-NEXT: vse8.v v8, (a0) 345; LMULMAX1-NEXT: addi a0, a0, 16 346; LMULMAX1-NEXT: vse8.v v8, (a0) 347; LMULMAX1-NEXT: ret 348 %a = insertelement <32 x i8> poison, i8 0, i32 0 349 %b = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> zeroinitializer 350 store <32 x i8> %b, ptr %x 351 ret void 352} 353 354define void @splat_zero_v16i16(ptr %x) { 355; LMULMAX8-LABEL: splat_zero_v16i16: 356; LMULMAX8: # %bb.0: 357; LMULMAX8-NEXT: vsetivli zero, 16, e16, m2, ta, ma 358; LMULMAX8-NEXT: vmv.v.i v8, 0 359; LMULMAX8-NEXT: vse16.v v8, (a0) 360; LMULMAX8-NEXT: ret 361; 362; LMULMAX2-LABEL: splat_zero_v16i16: 363; LMULMAX2: # %bb.0: 364; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma 365; LMULMAX2-NEXT: vmv.v.i v8, 0 366; LMULMAX2-NEXT: vse16.v v8, (a0) 367; LMULMAX2-NEXT: ret 368; 369; LMULMAX1-LABEL: splat_zero_v16i16: 370; LMULMAX1: # %bb.0: 371; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma 372; LMULMAX1-NEXT: vmv.v.i v8, 0 373; LMULMAX1-NEXT: vse16.v v8, (a0) 374; LMULMAX1-NEXT: addi a0, a0, 16 375; LMULMAX1-NEXT: vse16.v v8, (a0) 376; LMULMAX1-NEXT: ret 377 %a = insertelement <16 x i16> poison, i16 0, i32 0 378 %b = shufflevector <16 x i16> %a, <16 x i16> poison, <16 x i32> zeroinitializer 379 store <16 x i16> %b, ptr %x 380 ret void 381} 382 383define void @splat_zero_v8i32(ptr %x) { 384; LMULMAX8-LABEL: splat_zero_v8i32: 385; LMULMAX8: # %bb.0: 386; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma 387; LMULMAX8-NEXT: vmv.v.i v8, 0 388; LMULMAX8-NEXT: vse32.v v8, (a0) 389; LMULMAX8-NEXT: ret 390; 391; LMULMAX2-LABEL: splat_zero_v8i32: 392; LMULMAX2: # %bb.0: 393; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma 394; LMULMAX2-NEXT: vmv.v.i v8, 0 395; LMULMAX2-NEXT: vse32.v v8, (a0) 396; LMULMAX2-NEXT: ret 397; 398; LMULMAX1-LABEL: splat_zero_v8i32: 399; LMULMAX1: # %bb.0: 400; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma 401; LMULMAX1-NEXT: vmv.v.i v8, 0 402; LMULMAX1-NEXT: vse32.v v8, (a0) 403; LMULMAX1-NEXT: addi a0, a0, 16 404; LMULMAX1-NEXT: vse32.v v8, (a0) 405; LMULMAX1-NEXT: ret 406 %a = insertelement <8 x i32> poison, i32 0, i32 0 407 %b = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> zeroinitializer 408 store <8 x i32> %b, ptr %x 409 ret void 410} 411 412define void @splat_zero_v4i64(ptr %x) { 413; LMULMAX8-LABEL: splat_zero_v4i64: 414; LMULMAX8: # %bb.0: 415; LMULMAX8-NEXT: vsetivli zero, 4, e64, m2, ta, ma 416; LMULMAX8-NEXT: vmv.v.i v8, 0 417; LMULMAX8-NEXT: vse64.v v8, (a0) 418; LMULMAX8-NEXT: ret 419; 420; LMULMAX2-LABEL: splat_zero_v4i64: 421; LMULMAX2: # %bb.0: 422; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma 423; LMULMAX2-NEXT: vmv.v.i v8, 0 424; LMULMAX2-NEXT: vse64.v v8, (a0) 425; LMULMAX2-NEXT: ret 426; 427; LMULMAX1-RV32-LABEL: splat_zero_v4i64: 428; LMULMAX1-RV32: # %bb.0: 429; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 430; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0 431; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) 432; LMULMAX1-RV32-NEXT: addi a0, a0, 16 433; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) 434; LMULMAX1-RV32-NEXT: ret 435; 436; LMULMAX1-RV64-LABEL: splat_zero_v4i64: 437; LMULMAX1-RV64: # %bb.0: 438; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 439; LMULMAX1-RV64-NEXT: vmv.v.i v8, 0 440; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) 441; LMULMAX1-RV64-NEXT: addi a0, a0, 16 442; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) 443; LMULMAX1-RV64-NEXT: ret 444 %a = insertelement <4 x i64> poison, i64 0, i32 0 445 %b = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> zeroinitializer 446 store <4 x i64> %b, ptr %x 447 ret void 448} 449 450define void @splat_zero_v2i16(ptr %p) { 451; CHECK-LABEL: splat_zero_v2i16: 452; CHECK: # %bb.0: 453; CHECK-NEXT: sw zero, 0(a0) 454; CHECK-NEXT: ret 455 store <2 x i16> zeroinitializer, ptr %p 456 ret void 457} 458 459define void @splat_zero_v2i16_unaligned(ptr %p) { 460; CHECK-LABEL: splat_zero_v2i16_unaligned: 461; CHECK: # %bb.0: 462; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 463; CHECK-NEXT: vmv.v.i v8, 0 464; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 465; CHECK-NEXT: vse8.v v8, (a0) 466; CHECK-NEXT: ret 467 store <2 x i16> zeroinitializer, ptr %p, align 1 468 ret void 469} 470 471define void @splat_zero_v4i16(ptr %p) { 472; LMULMAX8-RV32-LABEL: splat_zero_v4i16: 473; LMULMAX8-RV32: # %bb.0: 474; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 475; LMULMAX8-RV32-NEXT: vmv.v.i v8, 0 476; LMULMAX8-RV32-NEXT: vse16.v v8, (a0) 477; LMULMAX8-RV32-NEXT: ret 478; 479; LMULMAX2-RV32-LABEL: splat_zero_v4i16: 480; LMULMAX2-RV32: # %bb.0: 481; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 482; LMULMAX2-RV32-NEXT: vmv.v.i v8, 0 483; LMULMAX2-RV32-NEXT: vse16.v v8, (a0) 484; LMULMAX2-RV32-NEXT: ret 485; 486; LMULMAX1-RV32-LABEL: splat_zero_v4i16: 487; LMULMAX1-RV32: # %bb.0: 488; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 489; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0 490; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) 491; LMULMAX1-RV32-NEXT: ret 492; 493; LMULMAX8-RV64-LABEL: splat_zero_v4i16: 494; LMULMAX8-RV64: # %bb.0: 495; LMULMAX8-RV64-NEXT: sd zero, 0(a0) 496; LMULMAX8-RV64-NEXT: ret 497; 498; LMULMAX2-RV64-LABEL: splat_zero_v4i16: 499; LMULMAX2-RV64: # %bb.0: 500; LMULMAX2-RV64-NEXT: sd zero, 0(a0) 501; LMULMAX2-RV64-NEXT: ret 502; 503; LMULMAX1-RV64-LABEL: splat_zero_v4i16: 504; LMULMAX1-RV64: # %bb.0: 505; LMULMAX1-RV64-NEXT: sd zero, 0(a0) 506; LMULMAX1-RV64-NEXT: ret 507 store <4 x i16> zeroinitializer, ptr %p 508 ret void 509} 510 511define void @splat_zero_v2i32(ptr %p) { 512; LMULMAX8-RV32-LABEL: splat_zero_v2i32: 513; LMULMAX8-RV32: # %bb.0: 514; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 515; LMULMAX8-RV32-NEXT: vmv.v.i v8, 0 516; LMULMAX8-RV32-NEXT: vse32.v v8, (a0) 517; LMULMAX8-RV32-NEXT: ret 518; 519; LMULMAX2-RV32-LABEL: splat_zero_v2i32: 520; LMULMAX2-RV32: # %bb.0: 521; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 522; LMULMAX2-RV32-NEXT: vmv.v.i v8, 0 523; LMULMAX2-RV32-NEXT: vse32.v v8, (a0) 524; LMULMAX2-RV32-NEXT: ret 525; 526; LMULMAX1-RV32-LABEL: splat_zero_v2i32: 527; LMULMAX1-RV32: # %bb.0: 528; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 529; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0 530; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) 531; LMULMAX1-RV32-NEXT: ret 532; 533; LMULMAX8-RV64-LABEL: splat_zero_v2i32: 534; LMULMAX8-RV64: # %bb.0: 535; LMULMAX8-RV64-NEXT: sd zero, 0(a0) 536; LMULMAX8-RV64-NEXT: ret 537; 538; LMULMAX2-RV64-LABEL: splat_zero_v2i32: 539; LMULMAX2-RV64: # %bb.0: 540; LMULMAX2-RV64-NEXT: sd zero, 0(a0) 541; LMULMAX2-RV64-NEXT: ret 542; 543; LMULMAX1-RV64-LABEL: splat_zero_v2i32: 544; LMULMAX1-RV64: # %bb.0: 545; LMULMAX1-RV64-NEXT: sd zero, 0(a0) 546; LMULMAX1-RV64-NEXT: ret 547 store <2 x i32> zeroinitializer, ptr %p 548 ret void 549} 550 551; Not a power of two and requires more than two scalar stores. 552define void @splat_zero_v7i16(ptr %p) { 553; LMULMAX8-RV32-LABEL: splat_zero_v7i16: 554; LMULMAX8-RV32: # %bb.0: 555; LMULMAX8-RV32-NEXT: sh zero, 12(a0) 556; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 557; LMULMAX8-RV32-NEXT: vmv.v.i v8, 0 558; LMULMAX8-RV32-NEXT: vse16.v v8, (a0) 559; LMULMAX8-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 560; LMULMAX8-RV32-NEXT: vmv.v.i v8, 0 561; LMULMAX8-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma 562; LMULMAX8-RV32-NEXT: vslidedown.vi v8, v8, 2 563; LMULMAX8-RV32-NEXT: addi a0, a0, 8 564; LMULMAX8-RV32-NEXT: vse32.v v8, (a0) 565; LMULMAX8-RV32-NEXT: ret 566; 567; LMULMAX2-RV32-LABEL: splat_zero_v7i16: 568; LMULMAX2-RV32: # %bb.0: 569; LMULMAX2-RV32-NEXT: sh zero, 12(a0) 570; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 571; LMULMAX2-RV32-NEXT: vmv.v.i v8, 0 572; LMULMAX2-RV32-NEXT: vse16.v v8, (a0) 573; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 574; LMULMAX2-RV32-NEXT: vmv.v.i v8, 0 575; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma 576; LMULMAX2-RV32-NEXT: vslidedown.vi v8, v8, 2 577; LMULMAX2-RV32-NEXT: addi a0, a0, 8 578; LMULMAX2-RV32-NEXT: vse32.v v8, (a0) 579; LMULMAX2-RV32-NEXT: ret 580; 581; LMULMAX1-RV32-LABEL: splat_zero_v7i16: 582; LMULMAX1-RV32: # %bb.0: 583; LMULMAX1-RV32-NEXT: sh zero, 12(a0) 584; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 585; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0 586; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) 587; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 588; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0 589; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma 590; LMULMAX1-RV32-NEXT: vslidedown.vi v8, v8, 2 591; LMULMAX1-RV32-NEXT: addi a0, a0, 8 592; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) 593; LMULMAX1-RV32-NEXT: ret 594; 595; LMULMAX8-RV64-LABEL: splat_zero_v7i16: 596; LMULMAX8-RV64: # %bb.0: 597; LMULMAX8-RV64-NEXT: sh zero, 12(a0) 598; LMULMAX8-RV64-NEXT: sw zero, 8(a0) 599; LMULMAX8-RV64-NEXT: sd zero, 0(a0) 600; LMULMAX8-RV64-NEXT: ret 601; 602; LMULMAX2-RV64-LABEL: splat_zero_v7i16: 603; LMULMAX2-RV64: # %bb.0: 604; LMULMAX2-RV64-NEXT: sh zero, 12(a0) 605; LMULMAX2-RV64-NEXT: sw zero, 8(a0) 606; LMULMAX2-RV64-NEXT: sd zero, 0(a0) 607; LMULMAX2-RV64-NEXT: ret 608; 609; LMULMAX1-RV64-LABEL: splat_zero_v7i16: 610; LMULMAX1-RV64: # %bb.0: 611; LMULMAX1-RV64-NEXT: sh zero, 12(a0) 612; LMULMAX1-RV64-NEXT: sw zero, 8(a0) 613; LMULMAX1-RV64-NEXT: sd zero, 0(a0) 614; LMULMAX1-RV64-NEXT: ret 615 store <7 x i16> zeroinitializer, ptr %p 616 ret void 617} 618 619define void @splat_allones_v16i8(ptr %x) { 620; CHECK-LABEL: splat_allones_v16i8: 621; CHECK: # %bb.0: 622; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 623; CHECK-NEXT: vmv.v.i v8, -1 624; CHECK-NEXT: vse8.v v8, (a0) 625; CHECK-NEXT: ret 626 %a = insertelement <16 x i8> poison, i8 -1, i32 0 627 %b = shufflevector <16 x i8> %a, <16 x i8> poison, <16 x i32> zeroinitializer 628 store <16 x i8> %b, ptr %x 629 ret void 630} 631 632define void @splat_allones_v8i16(ptr %x) { 633; CHECK-LABEL: splat_allones_v8i16: 634; CHECK: # %bb.0: 635; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 636; CHECK-NEXT: vmv.v.i v8, -1 637; CHECK-NEXT: vse16.v v8, (a0) 638; CHECK-NEXT: ret 639 %a = insertelement <8 x i16> poison, i16 -1, i32 0 640 %b = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> zeroinitializer 641 store <8 x i16> %b, ptr %x 642 ret void 643} 644 645define void @splat_allones_v4i32(ptr %x) { 646; CHECK-LABEL: splat_allones_v4i32: 647; CHECK: # %bb.0: 648; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 649; CHECK-NEXT: vmv.v.i v8, -1 650; CHECK-NEXT: vse32.v v8, (a0) 651; CHECK-NEXT: ret 652 %a = insertelement <4 x i32> poison, i32 -1, i32 0 653 %b = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> zeroinitializer 654 store <4 x i32> %b, ptr %x 655 ret void 656} 657 658define void @splat_allones_v2i64(ptr %x) { 659; CHECK-LABEL: splat_allones_v2i64: 660; CHECK: # %bb.0: 661; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 662; CHECK-NEXT: vmv.v.i v8, -1 663; CHECK-NEXT: vse64.v v8, (a0) 664; CHECK-NEXT: ret 665 %a = insertelement <2 x i64> poison, i64 -1, i32 0 666 %b = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> zeroinitializer 667 store <2 x i64> %b, ptr %x 668 ret void 669} 670 671define void @splat_allones_v32i8(ptr %x) { 672; LMULMAX8-LABEL: splat_allones_v32i8: 673; LMULMAX8: # %bb.0: 674; LMULMAX8-NEXT: li a1, 32 675; LMULMAX8-NEXT: vsetvli zero, a1, e8, m2, ta, ma 676; LMULMAX8-NEXT: vmv.v.i v8, -1 677; LMULMAX8-NEXT: vse8.v v8, (a0) 678; LMULMAX8-NEXT: ret 679; 680; LMULMAX2-LABEL: splat_allones_v32i8: 681; LMULMAX2: # %bb.0: 682; LMULMAX2-NEXT: li a1, 32 683; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma 684; LMULMAX2-NEXT: vmv.v.i v8, -1 685; LMULMAX2-NEXT: vse8.v v8, (a0) 686; LMULMAX2-NEXT: ret 687; 688; LMULMAX1-LABEL: splat_allones_v32i8: 689; LMULMAX1: # %bb.0: 690; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma 691; LMULMAX1-NEXT: vmv.v.i v8, -1 692; LMULMAX1-NEXT: vse8.v v8, (a0) 693; LMULMAX1-NEXT: addi a0, a0, 16 694; LMULMAX1-NEXT: vse8.v v8, (a0) 695; LMULMAX1-NEXT: ret 696 %a = insertelement <32 x i8> poison, i8 -1, i32 0 697 %b = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> zeroinitializer 698 store <32 x i8> %b, ptr %x 699 ret void 700} 701 702define void @splat_allones_v16i16(ptr %x) { 703; LMULMAX8-LABEL: splat_allones_v16i16: 704; LMULMAX8: # %bb.0: 705; LMULMAX8-NEXT: vsetivli zero, 16, e16, m2, ta, ma 706; LMULMAX8-NEXT: vmv.v.i v8, -1 707; LMULMAX8-NEXT: vse16.v v8, (a0) 708; LMULMAX8-NEXT: ret 709; 710; LMULMAX2-LABEL: splat_allones_v16i16: 711; LMULMAX2: # %bb.0: 712; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma 713; LMULMAX2-NEXT: vmv.v.i v8, -1 714; LMULMAX2-NEXT: vse16.v v8, (a0) 715; LMULMAX2-NEXT: ret 716; 717; LMULMAX1-LABEL: splat_allones_v16i16: 718; LMULMAX1: # %bb.0: 719; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma 720; LMULMAX1-NEXT: vmv.v.i v8, -1 721; LMULMAX1-NEXT: vse16.v v8, (a0) 722; LMULMAX1-NEXT: addi a0, a0, 16 723; LMULMAX1-NEXT: vse16.v v8, (a0) 724; LMULMAX1-NEXT: ret 725 %a = insertelement <16 x i16> poison, i16 -1, i32 0 726 %b = shufflevector <16 x i16> %a, <16 x i16> poison, <16 x i32> zeroinitializer 727 store <16 x i16> %b, ptr %x 728 ret void 729} 730 731define void @splat_allones_v8i32(ptr %x) { 732; LMULMAX8-LABEL: splat_allones_v8i32: 733; LMULMAX8: # %bb.0: 734; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma 735; LMULMAX8-NEXT: vmv.v.i v8, -1 736; LMULMAX8-NEXT: vse32.v v8, (a0) 737; LMULMAX8-NEXT: ret 738; 739; LMULMAX2-LABEL: splat_allones_v8i32: 740; LMULMAX2: # %bb.0: 741; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma 742; LMULMAX2-NEXT: vmv.v.i v8, -1 743; LMULMAX2-NEXT: vse32.v v8, (a0) 744; LMULMAX2-NEXT: ret 745; 746; LMULMAX1-LABEL: splat_allones_v8i32: 747; LMULMAX1: # %bb.0: 748; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma 749; LMULMAX1-NEXT: vmv.v.i v8, -1 750; LMULMAX1-NEXT: vse32.v v8, (a0) 751; LMULMAX1-NEXT: addi a0, a0, 16 752; LMULMAX1-NEXT: vse32.v v8, (a0) 753; LMULMAX1-NEXT: ret 754 %a = insertelement <8 x i32> poison, i32 -1, i32 0 755 %b = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> zeroinitializer 756 store <8 x i32> %b, ptr %x 757 ret void 758} 759 760define void @splat_allones_v4i64(ptr %x) { 761; LMULMAX8-LABEL: splat_allones_v4i64: 762; LMULMAX8: # %bb.0: 763; LMULMAX8-NEXT: vsetivli zero, 4, e64, m2, ta, ma 764; LMULMAX8-NEXT: vmv.v.i v8, -1 765; LMULMAX8-NEXT: vse64.v v8, (a0) 766; LMULMAX8-NEXT: ret 767; 768; LMULMAX2-LABEL: splat_allones_v4i64: 769; LMULMAX2: # %bb.0: 770; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma 771; LMULMAX2-NEXT: vmv.v.i v8, -1 772; LMULMAX2-NEXT: vse64.v v8, (a0) 773; LMULMAX2-NEXT: ret 774; 775; LMULMAX1-RV32-LABEL: splat_allones_v4i64: 776; LMULMAX1-RV32: # %bb.0: 777; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 778; LMULMAX1-RV32-NEXT: vmv.v.i v8, -1 779; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) 780; LMULMAX1-RV32-NEXT: addi a0, a0, 16 781; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) 782; LMULMAX1-RV32-NEXT: ret 783; 784; LMULMAX1-RV64-LABEL: splat_allones_v4i64: 785; LMULMAX1-RV64: # %bb.0: 786; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 787; LMULMAX1-RV64-NEXT: vmv.v.i v8, -1 788; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) 789; LMULMAX1-RV64-NEXT: addi a0, a0, 16 790; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) 791; LMULMAX1-RV64-NEXT: ret 792 %a = insertelement <4 x i64> poison, i64 -1, i32 0 793 %b = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> zeroinitializer 794 store <4 x i64> %b, ptr %x 795 ret void 796} 797 798; This requires a bitcast on RV32 due to type legalization rewriting the 799; build_vector to v8i32. 800; FIXME: We should prevent this and use the implicit sign extension of vmv.v.x 801; with SEW=64 on RV32. 802define void @splat_allones_with_use_v4i64(ptr %x) { 803; LMULMAX8-LABEL: splat_allones_with_use_v4i64: 804; LMULMAX8: # %bb.0: 805; LMULMAX8-NEXT: vsetivli zero, 4, e64, m2, ta, ma 806; LMULMAX8-NEXT: vle64.v v8, (a0) 807; LMULMAX8-NEXT: vadd.vi v8, v8, -1 808; LMULMAX8-NEXT: vse64.v v8, (a0) 809; LMULMAX8-NEXT: ret 810; 811; LMULMAX2-LABEL: splat_allones_with_use_v4i64: 812; LMULMAX2: # %bb.0: 813; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma 814; LMULMAX2-NEXT: vle64.v v8, (a0) 815; LMULMAX2-NEXT: vadd.vi v8, v8, -1 816; LMULMAX2-NEXT: vse64.v v8, (a0) 817; LMULMAX2-NEXT: ret 818; 819; LMULMAX1-RV32-LABEL: splat_allones_with_use_v4i64: 820; LMULMAX1-RV32: # %bb.0: 821; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 822; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) 823; LMULMAX1-RV32-NEXT: addi a1, a0, 16 824; LMULMAX1-RV32-NEXT: vle64.v v9, (a1) 825; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 826; LMULMAX1-RV32-NEXT: vmv.v.i v10, -1 827; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 828; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10 829; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10 830; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) 831; LMULMAX1-RV32-NEXT: vse64.v v9, (a1) 832; LMULMAX1-RV32-NEXT: ret 833; 834; LMULMAX1-RV64-LABEL: splat_allones_with_use_v4i64: 835; LMULMAX1-RV64: # %bb.0: 836; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 837; LMULMAX1-RV64-NEXT: addi a1, a0, 16 838; LMULMAX1-RV64-NEXT: vle64.v v8, (a1) 839; LMULMAX1-RV64-NEXT: vle64.v v9, (a0) 840; LMULMAX1-RV64-NEXT: vadd.vi v8, v8, -1 841; LMULMAX1-RV64-NEXT: vadd.vi v9, v9, -1 842; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) 843; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) 844; LMULMAX1-RV64-NEXT: ret 845 %a = load <4 x i64>, ptr %x 846 %b = add <4 x i64> %a, <i64 -1, i64 -1, i64 -1, i64 -1> 847 store <4 x i64> %b, ptr %x 848 ret void 849} 850 851; This test used to crash at LMUL=8 when inserting a v16i64 subvector into 852; nxv8i64 at index 0: the v16i64 type was used to get the LMUL, the size of 853; which exceeded maximum-expected size of 512. The scalable container type of 854; nxv8i64 should have been used instead. 855define void @vadd_vx_v16i64(ptr %a, i64 %b, ptr %c) { 856; LMULMAX8-RV32-LABEL: vadd_vx_v16i64: 857; LMULMAX8-RV32: # %bb.0: 858; LMULMAX8-RV32-NEXT: addi sp, sp, -16 859; LMULMAX8-RV32-NEXT: .cfi_def_cfa_offset 16 860; LMULMAX8-RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 861; LMULMAX8-RV32-NEXT: vle64.v v8, (a0) 862; LMULMAX8-RV32-NEXT: sw a2, 12(sp) 863; LMULMAX8-RV32-NEXT: sw a1, 8(sp) 864; LMULMAX8-RV32-NEXT: addi a0, sp, 8 865; LMULMAX8-RV32-NEXT: vlse64.v v16, (a0), zero 866; LMULMAX8-RV32-NEXT: vadd.vv v8, v8, v16 867; LMULMAX8-RV32-NEXT: vse64.v v8, (a3) 868; LMULMAX8-RV32-NEXT: addi sp, sp, 16 869; LMULMAX8-RV32-NEXT: ret 870; 871; LMULMAX2-RV32-LABEL: vadd_vx_v16i64: 872; LMULMAX2-RV32: # %bb.0: 873; LMULMAX2-RV32-NEXT: addi a4, a0, 64 874; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma 875; LMULMAX2-RV32-NEXT: vle64.v v8, (a4) 876; LMULMAX2-RV32-NEXT: addi a4, a0, 96 877; LMULMAX2-RV32-NEXT: vle64.v v10, (a4) 878; LMULMAX2-RV32-NEXT: vle64.v v12, (a0) 879; LMULMAX2-RV32-NEXT: addi a0, a0, 32 880; LMULMAX2-RV32-NEXT: vle64.v v14, (a0) 881; LMULMAX2-RV32-NEXT: li a0, 85 882; LMULMAX2-RV32-NEXT: vmv.s.x v0, a0 883; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 884; LMULMAX2-RV32-NEXT: vmv.v.x v16, a2 885; LMULMAX2-RV32-NEXT: vmerge.vxm v16, v16, a1, v0 886; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma 887; LMULMAX2-RV32-NEXT: vadd.vv v14, v14, v16 888; LMULMAX2-RV32-NEXT: vadd.vv v12, v12, v16 889; LMULMAX2-RV32-NEXT: vadd.vv v10, v10, v16 890; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v16 891; LMULMAX2-RV32-NEXT: addi a0, a3, 64 892; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) 893; LMULMAX2-RV32-NEXT: addi a0, a3, 96 894; LMULMAX2-RV32-NEXT: vse64.v v10, (a0) 895; LMULMAX2-RV32-NEXT: vse64.v v12, (a3) 896; LMULMAX2-RV32-NEXT: addi a0, a3, 32 897; LMULMAX2-RV32-NEXT: vse64.v v14, (a0) 898; LMULMAX2-RV32-NEXT: ret 899; 900; LMULMAX1-RV32-LABEL: vadd_vx_v16i64: 901; LMULMAX1-RV32: # %bb.0: 902; LMULMAX1-RV32-NEXT: addi a4, a0, 96 903; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 904; LMULMAX1-RV32-NEXT: vle64.v v8, (a4) 905; LMULMAX1-RV32-NEXT: addi a4, a0, 112 906; LMULMAX1-RV32-NEXT: vle64.v v9, (a4) 907; LMULMAX1-RV32-NEXT: addi a4, a0, 64 908; LMULMAX1-RV32-NEXT: vle64.v v10, (a4) 909; LMULMAX1-RV32-NEXT: addi a4, a0, 80 910; LMULMAX1-RV32-NEXT: vle64.v v11, (a4) 911; LMULMAX1-RV32-NEXT: addi a4, a0, 32 912; LMULMAX1-RV32-NEXT: vle64.v v12, (a4) 913; LMULMAX1-RV32-NEXT: addi a4, a0, 48 914; LMULMAX1-RV32-NEXT: vle64.v v13, (a4) 915; LMULMAX1-RV32-NEXT: vle64.v v14, (a0) 916; LMULMAX1-RV32-NEXT: addi a0, a0, 16 917; LMULMAX1-RV32-NEXT: vle64.v v15, (a0) 918; LMULMAX1-RV32-NEXT: li a0, 5 919; LMULMAX1-RV32-NEXT: vmv.s.x v0, a0 920; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 921; LMULMAX1-RV32-NEXT: vmv.v.x v16, a2 922; LMULMAX1-RV32-NEXT: vmerge.vxm v16, v16, a1, v0 923; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 924; LMULMAX1-RV32-NEXT: vadd.vv v15, v15, v16 925; LMULMAX1-RV32-NEXT: vadd.vv v14, v14, v16 926; LMULMAX1-RV32-NEXT: vadd.vv v13, v13, v16 927; LMULMAX1-RV32-NEXT: vadd.vv v12, v12, v16 928; LMULMAX1-RV32-NEXT: vadd.vv v11, v11, v16 929; LMULMAX1-RV32-NEXT: vadd.vv v10, v10, v16 930; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v16 931; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v16 932; LMULMAX1-RV32-NEXT: addi a0, a3, 96 933; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) 934; LMULMAX1-RV32-NEXT: addi a0, a3, 112 935; LMULMAX1-RV32-NEXT: vse64.v v9, (a0) 936; LMULMAX1-RV32-NEXT: addi a0, a3, 64 937; LMULMAX1-RV32-NEXT: vse64.v v10, (a0) 938; LMULMAX1-RV32-NEXT: addi a0, a3, 80 939; LMULMAX1-RV32-NEXT: vse64.v v11, (a0) 940; LMULMAX1-RV32-NEXT: addi a0, a3, 32 941; LMULMAX1-RV32-NEXT: vse64.v v12, (a0) 942; LMULMAX1-RV32-NEXT: addi a0, a3, 48 943; LMULMAX1-RV32-NEXT: vse64.v v13, (a0) 944; LMULMAX1-RV32-NEXT: vse64.v v14, (a3) 945; LMULMAX1-RV32-NEXT: addi a3, a3, 16 946; LMULMAX1-RV32-NEXT: vse64.v v15, (a3) 947; LMULMAX1-RV32-NEXT: ret 948; 949; LMULMAX8-RV64-LABEL: vadd_vx_v16i64: 950; LMULMAX8-RV64: # %bb.0: 951; LMULMAX8-RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 952; LMULMAX8-RV64-NEXT: vle64.v v8, (a0) 953; LMULMAX8-RV64-NEXT: vadd.vx v8, v8, a1 954; LMULMAX8-RV64-NEXT: vse64.v v8, (a2) 955; LMULMAX8-RV64-NEXT: ret 956; 957; LMULMAX2-RV64-LABEL: vadd_vx_v16i64: 958; LMULMAX2-RV64: # %bb.0: 959; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma 960; LMULMAX2-RV64-NEXT: addi a3, a0, 96 961; LMULMAX2-RV64-NEXT: vle64.v v8, (a3) 962; LMULMAX2-RV64-NEXT: addi a3, a0, 32 963; LMULMAX2-RV64-NEXT: vle64.v v10, (a3) 964; LMULMAX2-RV64-NEXT: addi a3, a0, 64 965; LMULMAX2-RV64-NEXT: vle64.v v12, (a3) 966; LMULMAX2-RV64-NEXT: vle64.v v14, (a0) 967; LMULMAX2-RV64-NEXT: vadd.vx v10, v10, a1 968; LMULMAX2-RV64-NEXT: vadd.vx v8, v8, a1 969; LMULMAX2-RV64-NEXT: vadd.vx v12, v12, a1 970; LMULMAX2-RV64-NEXT: vadd.vx v14, v14, a1 971; LMULMAX2-RV64-NEXT: vse64.v v14, (a2) 972; LMULMAX2-RV64-NEXT: addi a0, a2, 64 973; LMULMAX2-RV64-NEXT: vse64.v v12, (a0) 974; LMULMAX2-RV64-NEXT: addi a0, a2, 96 975; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) 976; LMULMAX2-RV64-NEXT: addi a0, a2, 32 977; LMULMAX2-RV64-NEXT: vse64.v v10, (a0) 978; LMULMAX2-RV64-NEXT: ret 979; 980; LMULMAX1-RV64-LABEL: vadd_vx_v16i64: 981; LMULMAX1-RV64: # %bb.0: 982; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 983; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) 984; LMULMAX1-RV64-NEXT: addi a3, a0, 96 985; LMULMAX1-RV64-NEXT: vle64.v v9, (a3) 986; LMULMAX1-RV64-NEXT: addi a3, a0, 112 987; LMULMAX1-RV64-NEXT: vle64.v v10, (a3) 988; LMULMAX1-RV64-NEXT: addi a3, a0, 64 989; LMULMAX1-RV64-NEXT: vle64.v v11, (a3) 990; LMULMAX1-RV64-NEXT: addi a3, a0, 48 991; LMULMAX1-RV64-NEXT: vle64.v v12, (a3) 992; LMULMAX1-RV64-NEXT: addi a3, a0, 16 993; LMULMAX1-RV64-NEXT: vle64.v v13, (a3) 994; LMULMAX1-RV64-NEXT: addi a3, a0, 80 995; LMULMAX1-RV64-NEXT: addi a0, a0, 32 996; LMULMAX1-RV64-NEXT: vle64.v v14, (a0) 997; LMULMAX1-RV64-NEXT: vle64.v v15, (a3) 998; LMULMAX1-RV64-NEXT: vadd.vx v13, v13, a1 999; LMULMAX1-RV64-NEXT: vadd.vx v12, v12, a1 1000; LMULMAX1-RV64-NEXT: vadd.vx v14, v14, a1 1001; LMULMAX1-RV64-NEXT: vadd.vx v15, v15, a1 1002; LMULMAX1-RV64-NEXT: vadd.vx v11, v11, a1 1003; LMULMAX1-RV64-NEXT: vadd.vx v10, v10, a1 1004; LMULMAX1-RV64-NEXT: vadd.vx v9, v9, a1 1005; LMULMAX1-RV64-NEXT: vadd.vx v8, v8, a1 1006; LMULMAX1-RV64-NEXT: vse64.v v8, (a2) 1007; LMULMAX1-RV64-NEXT: addi a0, a2, 96 1008; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) 1009; LMULMAX1-RV64-NEXT: addi a0, a2, 112 1010; LMULMAX1-RV64-NEXT: vse64.v v10, (a0) 1011; LMULMAX1-RV64-NEXT: addi a0, a2, 64 1012; LMULMAX1-RV64-NEXT: vse64.v v11, (a0) 1013; LMULMAX1-RV64-NEXT: addi a0, a2, 80 1014; LMULMAX1-RV64-NEXT: vse64.v v15, (a0) 1015; LMULMAX1-RV64-NEXT: addi a0, a2, 32 1016; LMULMAX1-RV64-NEXT: vse64.v v14, (a0) 1017; LMULMAX1-RV64-NEXT: addi a0, a2, 48 1018; LMULMAX1-RV64-NEXT: vse64.v v12, (a0) 1019; LMULMAX1-RV64-NEXT: addi a2, a2, 16 1020; LMULMAX1-RV64-NEXT: vse64.v v13, (a2) 1021; LMULMAX1-RV64-NEXT: ret 1022 %va = load <16 x i64>, ptr %a 1023 %head = insertelement <16 x i64> poison, i64 %b, i32 0 1024 %splat = shufflevector <16 x i64> %head, <16 x i64> poison, <16 x i32> zeroinitializer 1025 %vc = add <16 x i64> %va, %splat 1026 store <16 x i64> %vc, ptr %c 1027 ret void 1028} 1029