1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 4 5define void @splat_v16i8(ptr %x, i8 %y) { 6; CHECK-LABEL: splat_v16i8: 7; CHECK: # %bb.0: 8; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 9; CHECK-NEXT: vmv.v.x v8, a1 10; CHECK-NEXT: vse8.v v8, (a0) 11; CHECK-NEXT: ret 12 %a = insertelement <16 x i8> poison, i8 %y, i32 0 13 %b = shufflevector <16 x i8> %a, <16 x i8> poison, <16 x i32> zeroinitializer 14 store <16 x i8> %b, ptr %x 15 ret void 16} 17 18define void @splat_v8i16(ptr %x, i16 %y) { 19; CHECK-LABEL: splat_v8i16: 20; CHECK: # %bb.0: 21; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 22; CHECK-NEXT: vmv.v.x v8, a1 23; CHECK-NEXT: vse16.v v8, (a0) 24; CHECK-NEXT: ret 25 %a = insertelement <8 x i16> poison, i16 %y, i32 0 26 %b = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> zeroinitializer 27 store <8 x i16> %b, ptr %x 28 ret void 29} 30 31define void @splat_v4i32(ptr %x, i32 %y) { 32; CHECK-LABEL: splat_v4i32: 33; CHECK: # %bb.0: 34; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 35; CHECK-NEXT: vmv.v.x v8, a1 36; CHECK-NEXT: vse32.v v8, (a0) 37; CHECK-NEXT: ret 38 %a = insertelement <4 x i32> poison, i32 %y, i32 0 39 %b = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> zeroinitializer 40 store <4 x i32> %b, ptr %x 41 ret void 42} 43 44define void @splat_v2i64(ptr %x, i64 %y) { 45; RV32-LABEL: splat_v2i64: 46; RV32: # %bb.0: 47; RV32-NEXT: addi sp, sp, -16 48; RV32-NEXT: .cfi_def_cfa_offset 16 49; RV32-NEXT: sw a1, 8(sp) 50; RV32-NEXT: sw a2, 12(sp) 51; RV32-NEXT: addi a1, sp, 8 52; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 53; RV32-NEXT: vlse64.v v8, (a1), zero 54; RV32-NEXT: vse64.v v8, (a0) 55; RV32-NEXT: addi sp, sp, 16 56; RV32-NEXT: .cfi_def_cfa_offset 0 57; RV32-NEXT: ret 58; 59; RV64-LABEL: splat_v2i64: 60; RV64: # %bb.0: 61; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 62; RV64-NEXT: vmv.v.x v8, a1 63; RV64-NEXT: vse64.v v8, (a0) 64; RV64-NEXT: ret 65 %a = insertelement <2 x i64> poison, i64 %y, i32 0 66 %b = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> zeroinitializer 67 store <2 x i64> %b, ptr %x 68 ret void 69} 70 71define void @splat_v32i8(ptr %x, i8 %y) { 72; CHECK-LABEL: splat_v32i8: 73; CHECK: # %bb.0: 74; CHECK-NEXT: li a2, 32 75; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma 76; CHECK-NEXT: vmv.v.x v8, a1 77; CHECK-NEXT: vse8.v v8, (a0) 78; CHECK-NEXT: ret 79 %a = insertelement <32 x i8> poison, i8 %y, i32 0 80 %b = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> zeroinitializer 81 store <32 x i8> %b, ptr %x 82 ret void 83} 84 85define void @splat_v16i16(ptr %x, i16 %y) { 86; CHECK-LABEL: splat_v16i16: 87; CHECK: # %bb.0: 88; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 89; CHECK-NEXT: vmv.v.x v8, a1 90; CHECK-NEXT: vse16.v v8, (a0) 91; CHECK-NEXT: ret 92 %a = insertelement <16 x i16> poison, i16 %y, i32 0 93 %b = shufflevector <16 x i16> %a, <16 x i16> poison, <16 x i32> zeroinitializer 94 store <16 x i16> %b, ptr %x 95 ret void 96} 97 98define void @splat_v8i32(ptr %x, i32 %y) { 99; CHECK-LABEL: splat_v8i32: 100; CHECK: # %bb.0: 101; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 102; CHECK-NEXT: vmv.v.x v8, a1 103; CHECK-NEXT: vse32.v v8, (a0) 104; CHECK-NEXT: ret 105 %a = insertelement <8 x i32> poison, i32 %y, i32 0 106 %b = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> zeroinitializer 107 store <8 x i32> %b, ptr %x 108 ret void 109} 110 111define void @splat_v4i64(ptr %x, i64 %y) { 112; RV32-LABEL: splat_v4i64: 113; RV32: # %bb.0: 114; RV32-NEXT: addi sp, sp, -16 115; RV32-NEXT: .cfi_def_cfa_offset 16 116; RV32-NEXT: sw a1, 8(sp) 117; RV32-NEXT: sw a2, 12(sp) 118; RV32-NEXT: addi a1, sp, 8 119; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma 120; RV32-NEXT: vlse64.v v8, (a1), zero 121; RV32-NEXT: vse64.v v8, (a0) 122; RV32-NEXT: addi sp, sp, 16 123; RV32-NEXT: .cfi_def_cfa_offset 0 124; RV32-NEXT: ret 125; 126; RV64-LABEL: splat_v4i64: 127; RV64: # %bb.0: 128; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma 129; RV64-NEXT: vmv.v.x v8, a1 130; RV64-NEXT: vse64.v v8, (a0) 131; RV64-NEXT: ret 132 %a = insertelement <4 x i64> poison, i64 %y, i32 0 133 %b = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> zeroinitializer 134 store <4 x i64> %b, ptr %x 135 ret void 136} 137 138define void @splat_zero_v16i8(ptr %x) { 139; CHECK-LABEL: splat_zero_v16i8: 140; CHECK: # %bb.0: 141; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 142; CHECK-NEXT: vmv.v.i v8, 0 143; CHECK-NEXT: vse8.v v8, (a0) 144; CHECK-NEXT: ret 145 store <16 x i8> splat (i8 0), ptr %x 146 ret void 147} 148 149define void @splat_zero_v8i16(ptr %x) { 150; CHECK-LABEL: splat_zero_v8i16: 151; CHECK: # %bb.0: 152; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 153; CHECK-NEXT: vmv.v.i v8, 0 154; CHECK-NEXT: vse16.v v8, (a0) 155; CHECK-NEXT: ret 156 store <8 x i16> splat (i16 0), ptr %x 157 ret void 158} 159 160define void @splat_zero_v4i32(ptr %x) { 161; CHECK-LABEL: splat_zero_v4i32: 162; CHECK: # %bb.0: 163; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 164; CHECK-NEXT: vmv.v.i v8, 0 165; CHECK-NEXT: vse32.v v8, (a0) 166; CHECK-NEXT: ret 167 store <4 x i32> splat (i32 0), ptr %x 168 ret void 169} 170 171define void @splat_zero_v2i64(ptr %x) { 172; CHECK-LABEL: splat_zero_v2i64: 173; CHECK: # %bb.0: 174; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 175; CHECK-NEXT: vmv.v.i v8, 0 176; CHECK-NEXT: vse64.v v8, (a0) 177; CHECK-NEXT: ret 178 store <2 x i64> splat (i64 0), ptr %x 179 ret void 180} 181 182define void @splat_zero_v32i8(ptr %x) { 183; CHECK-LABEL: splat_zero_v32i8: 184; CHECK: # %bb.0: 185; CHECK-NEXT: li a1, 32 186; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma 187; CHECK-NEXT: vmv.v.i v8, 0 188; CHECK-NEXT: vse8.v v8, (a0) 189; CHECK-NEXT: ret 190 store <32 x i8> splat (i8 0), ptr %x 191 ret void 192} 193 194define void @splat_zero_v16i16(ptr %x) { 195; CHECK-LABEL: splat_zero_v16i16: 196; CHECK: # %bb.0: 197; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 198; CHECK-NEXT: vmv.v.i v8, 0 199; CHECK-NEXT: vse16.v v8, (a0) 200; CHECK-NEXT: ret 201 store <16 x i16> splat (i16 0), ptr %x 202 ret void 203} 204 205define void @splat_zero_v8i32(ptr %x) { 206; CHECK-LABEL: splat_zero_v8i32: 207; CHECK: # %bb.0: 208; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 209; CHECK-NEXT: vmv.v.i v8, 0 210; CHECK-NEXT: vse32.v v8, (a0) 211; CHECK-NEXT: ret 212 store <8 x i32> splat (i32 0), ptr %x 213 ret void 214} 215 216define void @splat_zero_v4i64(ptr %x) { 217; CHECK-LABEL: splat_zero_v4i64: 218; CHECK: # %bb.0: 219; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 220; CHECK-NEXT: vmv.v.i v8, 0 221; CHECK-NEXT: vse64.v v8, (a0) 222; CHECK-NEXT: ret 223 store <4 x i64> splat (i64 0), ptr %x 224 ret void 225} 226 227define void @splat_zero_v2i16(ptr %p) { 228; CHECK-LABEL: splat_zero_v2i16: 229; CHECK: # %bb.0: 230; CHECK-NEXT: sw zero, 0(a0) 231; CHECK-NEXT: ret 232 store <2 x i16> zeroinitializer, ptr %p 233 ret void 234} 235 236define void @splat_zero_v2i16_unaligned(ptr %p) { 237; CHECK-LABEL: splat_zero_v2i16_unaligned: 238; CHECK: # %bb.0: 239; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 240; CHECK-NEXT: vmv.v.i v8, 0 241; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 242; CHECK-NEXT: vse8.v v8, (a0) 243; CHECK-NEXT: ret 244 store <2 x i16> zeroinitializer, ptr %p, align 1 245 ret void 246} 247 248define void @splat_zero_v4i16(ptr %p) { 249; RV32-LABEL: splat_zero_v4i16: 250; RV32: # %bb.0: 251; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 252; RV32-NEXT: vmv.v.i v8, 0 253; RV32-NEXT: vse16.v v8, (a0) 254; RV32-NEXT: ret 255; 256; RV64-LABEL: splat_zero_v4i16: 257; RV64: # %bb.0: 258; RV64-NEXT: sd zero, 0(a0) 259; RV64-NEXT: ret 260 store <4 x i16> zeroinitializer, ptr %p 261 ret void 262} 263 264define void @splat_zero_v2i32(ptr %p) { 265; RV32-LABEL: splat_zero_v2i32: 266; RV32: # %bb.0: 267; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 268; RV32-NEXT: vmv.v.i v8, 0 269; RV32-NEXT: vse32.v v8, (a0) 270; RV32-NEXT: ret 271; 272; RV64-LABEL: splat_zero_v2i32: 273; RV64: # %bb.0: 274; RV64-NEXT: sd zero, 0(a0) 275; RV64-NEXT: ret 276 store <2 x i32> zeroinitializer, ptr %p 277 ret void 278} 279 280; Not a power of two and requires more than two scalar stores. 281define void @splat_zero_v7i16(ptr %p) { 282; CHECK-LABEL: splat_zero_v7i16: 283; CHECK: # %bb.0: 284; CHECK-NEXT: vsetivli zero, 7, e16, m1, ta, ma 285; CHECK-NEXT: vmv.v.i v8, 0 286; CHECK-NEXT: vse16.v v8, (a0) 287; CHECK-NEXT: ret 288 store <7 x i16> zeroinitializer, ptr %p 289 ret void 290} 291 292define void @splat_allones_v16i8(ptr %x) { 293; CHECK-LABEL: splat_allones_v16i8: 294; CHECK: # %bb.0: 295; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 296; CHECK-NEXT: vmv.v.i v8, -1 297; CHECK-NEXT: vse8.v v8, (a0) 298; CHECK-NEXT: ret 299 store <16 x i8> splat (i8 -1), ptr %x 300 ret void 301} 302 303define void @splat_allones_v8i16(ptr %x) { 304; CHECK-LABEL: splat_allones_v8i16: 305; CHECK: # %bb.0: 306; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 307; CHECK-NEXT: vmv.v.i v8, -1 308; CHECK-NEXT: vse16.v v8, (a0) 309; CHECK-NEXT: ret 310 store <8 x i16> splat (i16 -1), ptr %x 311 ret void 312} 313 314define void @splat_allones_v4i32(ptr %x) { 315; CHECK-LABEL: splat_allones_v4i32: 316; CHECK: # %bb.0: 317; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 318; CHECK-NEXT: vmv.v.i v8, -1 319; CHECK-NEXT: vse32.v v8, (a0) 320; CHECK-NEXT: ret 321 store <4 x i32> splat (i32 -1), ptr %x 322 ret void 323} 324 325define void @splat_allones_v2i64(ptr %x) { 326; CHECK-LABEL: splat_allones_v2i64: 327; CHECK: # %bb.0: 328; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 329; CHECK-NEXT: vmv.v.i v8, -1 330; CHECK-NEXT: vse64.v v8, (a0) 331; CHECK-NEXT: ret 332 store <2 x i64> splat (i64 -1), ptr %x 333 ret void 334} 335 336define void @splat_allones_v32i8(ptr %x) { 337; CHECK-LABEL: splat_allones_v32i8: 338; CHECK: # %bb.0: 339; CHECK-NEXT: li a1, 32 340; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma 341; CHECK-NEXT: vmv.v.i v8, -1 342; CHECK-NEXT: vse8.v v8, (a0) 343; CHECK-NEXT: ret 344 store <32 x i8> splat (i8 -1), ptr %x 345 ret void 346} 347 348define void @splat_allones_v16i16(ptr %x) { 349; CHECK-LABEL: splat_allones_v16i16: 350; CHECK: # %bb.0: 351; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 352; CHECK-NEXT: vmv.v.i v8, -1 353; CHECK-NEXT: vse16.v v8, (a0) 354; CHECK-NEXT: ret 355 store <16 x i16> splat (i16 -1), ptr %x 356 ret void 357} 358 359define void @splat_allones_v8i32(ptr %x) { 360; CHECK-LABEL: splat_allones_v8i32: 361; CHECK: # %bb.0: 362; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 363; CHECK-NEXT: vmv.v.i v8, -1 364; CHECK-NEXT: vse32.v v8, (a0) 365; CHECK-NEXT: ret 366 store <8 x i32> splat (i32 -1), ptr %x 367 ret void 368} 369 370define void @splat_allones_v4i64(ptr %x) { 371; CHECK-LABEL: splat_allones_v4i64: 372; CHECK: # %bb.0: 373; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 374; CHECK-NEXT: vmv.v.i v8, -1 375; CHECK-NEXT: vse64.v v8, (a0) 376; CHECK-NEXT: ret 377 store <4 x i64> splat (i64 -1), ptr %x 378 ret void 379} 380 381; This requires a bitcast on RV32 due to type legalization rewriting the 382; build_vector to v8i32. 383; FIXME: We should prevent this and use the implicit sign extension of vmv.v.x 384; with SEW=64 on RV32. 385define void @splat_allones_with_use_v4i64(ptr %x) { 386; CHECK-LABEL: splat_allones_with_use_v4i64: 387; CHECK: # %bb.0: 388; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 389; CHECK-NEXT: vle64.v v8, (a0) 390; CHECK-NEXT: vadd.vi v8, v8, -1 391; CHECK-NEXT: vse64.v v8, (a0) 392; CHECK-NEXT: ret 393 %a = load <4 x i64>, ptr %x 394 %b = add <4 x i64> %a, <i64 -1, i64 -1, i64 -1, i64 -1> 395 store <4 x i64> %b, ptr %x 396 ret void 397} 398 399; This test used to crash at LMUL=8 when inserting a v16i64 subvector into 400; nxv8i64 at index 0: the v16i64 type was used to get the LMUL, the size of 401; which exceeded maximum-expected size of 512. The scalable container type of 402; nxv8i64 should have been used instead. 403define void @vadd_vx_v16i64(ptr %a, i64 %b, ptr %c) { 404; RV32-LABEL: vadd_vx_v16i64: 405; RV32: # %bb.0: 406; RV32-NEXT: addi sp, sp, -16 407; RV32-NEXT: .cfi_def_cfa_offset 16 408; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 409; RV32-NEXT: vle64.v v8, (a0) 410; RV32-NEXT: sw a1, 8(sp) 411; RV32-NEXT: sw a2, 12(sp) 412; RV32-NEXT: addi a0, sp, 8 413; RV32-NEXT: vlse64.v v16, (a0), zero 414; RV32-NEXT: vadd.vv v8, v8, v16 415; RV32-NEXT: vse64.v v8, (a3) 416; RV32-NEXT: addi sp, sp, 16 417; RV32-NEXT: .cfi_def_cfa_offset 0 418; RV32-NEXT: ret 419; 420; RV64-LABEL: vadd_vx_v16i64: 421; RV64: # %bb.0: 422; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 423; RV64-NEXT: vle64.v v8, (a0) 424; RV64-NEXT: vadd.vx v8, v8, a1 425; RV64-NEXT: vse64.v v8, (a2) 426; RV64-NEXT: ret 427 %va = load <16 x i64>, ptr %a 428 %head = insertelement <16 x i64> poison, i64 %b, i32 0 429 %splat = shufflevector <16 x i64> %head, <16 x i64> poison, <16 x i32> zeroinitializer 430 %vc = add <16 x i64> %va, %splat 431 store <16 x i64> %vc, ptr %c 432 ret void 433} 434