1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 4 5define void @splat_v16i8(ptr %x, i8 %y) { 6; CHECK-LABEL: splat_v16i8: 7; CHECK: # %bb.0: 8; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 9; CHECK-NEXT: vmv.v.x v8, a1 10; CHECK-NEXT: vse8.v v8, (a0) 11; CHECK-NEXT: ret 12 %a = insertelement <16 x i8> poison, i8 %y, i32 0 13 %b = shufflevector <16 x i8> %a, <16 x i8> poison, <16 x i32> zeroinitializer 14 store <16 x i8> %b, ptr %x 15 ret void 16} 17 18define void @splat_v8i16(ptr %x, i16 %y) { 19; CHECK-LABEL: splat_v8i16: 20; CHECK: # %bb.0: 21; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 22; CHECK-NEXT: vmv.v.x v8, a1 23; CHECK-NEXT: vse16.v v8, (a0) 24; CHECK-NEXT: ret 25 %a = insertelement <8 x i16> poison, i16 %y, i32 0 26 %b = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> zeroinitializer 27 store <8 x i16> %b, ptr %x 28 ret void 29} 30 31define void @splat_v4i32(ptr %x, i32 %y) { 32; CHECK-LABEL: splat_v4i32: 33; CHECK: # %bb.0: 34; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 35; CHECK-NEXT: vmv.v.x v8, a1 36; CHECK-NEXT: vse32.v v8, (a0) 37; CHECK-NEXT: ret 38 %a = insertelement <4 x i32> poison, i32 %y, i32 0 39 %b = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> zeroinitializer 40 store <4 x i32> %b, ptr %x 41 ret void 42} 43 44define void @splat_v2i64(ptr %x, i64 %y) { 45; RV32-LABEL: splat_v2i64: 46; RV32: # %bb.0: 47; RV32-NEXT: addi sp, sp, -16 48; RV32-NEXT: .cfi_def_cfa_offset 16 49; RV32-NEXT: sw a1, 8(sp) 50; RV32-NEXT: sw a2, 12(sp) 51; RV32-NEXT: addi a1, sp, 8 52; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 53; RV32-NEXT: vlse64.v v8, (a1), zero 54; RV32-NEXT: vse64.v v8, (a0) 55; RV32-NEXT: addi sp, sp, 16 56; RV32-NEXT: ret 57; 58; RV64-LABEL: splat_v2i64: 59; RV64: # %bb.0: 60; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 61; RV64-NEXT: vmv.v.x v8, a1 62; RV64-NEXT: vse64.v v8, (a0) 63; RV64-NEXT: ret 64 %a = insertelement <2 x i64> poison, i64 %y, i32 0 65 %b = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> zeroinitializer 66 store <2 x i64> %b, ptr %x 67 ret void 68} 69 70define void @splat_v32i8(ptr %x, i8 %y) { 71; CHECK-LABEL: splat_v32i8: 72; CHECK: # %bb.0: 73; CHECK-NEXT: li a2, 32 74; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma 75; CHECK-NEXT: vmv.v.x v8, a1 76; CHECK-NEXT: vse8.v v8, (a0) 77; CHECK-NEXT: ret 78 %a = insertelement <32 x i8> poison, i8 %y, i32 0 79 %b = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> zeroinitializer 80 store <32 x i8> %b, ptr %x 81 ret void 82} 83 84define void @splat_v16i16(ptr %x, i16 %y) { 85; CHECK-LABEL: splat_v16i16: 86; CHECK: # %bb.0: 87; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 88; CHECK-NEXT: vmv.v.x v8, a1 89; CHECK-NEXT: vse16.v v8, (a0) 90; CHECK-NEXT: ret 91 %a = insertelement <16 x i16> poison, i16 %y, i32 0 92 %b = shufflevector <16 x i16> %a, <16 x i16> poison, <16 x i32> zeroinitializer 93 store <16 x i16> %b, ptr %x 94 ret void 95} 96 97define void @splat_v8i32(ptr %x, i32 %y) { 98; CHECK-LABEL: splat_v8i32: 99; CHECK: # %bb.0: 100; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 101; CHECK-NEXT: vmv.v.x v8, a1 102; CHECK-NEXT: vse32.v v8, (a0) 103; CHECK-NEXT: ret 104 %a = insertelement <8 x i32> poison, i32 %y, i32 0 105 %b = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> zeroinitializer 106 store <8 x i32> %b, ptr %x 107 ret void 108} 109 110define void @splat_v4i64(ptr %x, i64 %y) { 111; RV32-LABEL: splat_v4i64: 112; RV32: # %bb.0: 113; RV32-NEXT: addi sp, sp, -16 114; RV32-NEXT: .cfi_def_cfa_offset 16 115; RV32-NEXT: sw a1, 8(sp) 116; RV32-NEXT: sw a2, 12(sp) 117; RV32-NEXT: addi a1, sp, 8 118; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma 119; RV32-NEXT: vlse64.v v8, (a1), zero 120; RV32-NEXT: vse64.v v8, (a0) 121; RV32-NEXT: addi sp, sp, 16 122; RV32-NEXT: ret 123; 124; RV64-LABEL: splat_v4i64: 125; RV64: # %bb.0: 126; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma 127; RV64-NEXT: vmv.v.x v8, a1 128; RV64-NEXT: vse64.v v8, (a0) 129; RV64-NEXT: ret 130 %a = insertelement <4 x i64> poison, i64 %y, i32 0 131 %b = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> zeroinitializer 132 store <4 x i64> %b, ptr %x 133 ret void 134} 135 136define void @splat_zero_v16i8(ptr %x) { 137; CHECK-LABEL: splat_zero_v16i8: 138; CHECK: # %bb.0: 139; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 140; CHECK-NEXT: vmv.v.i v8, 0 141; CHECK-NEXT: vse8.v v8, (a0) 142; CHECK-NEXT: ret 143 store <16 x i8> splat (i8 0), ptr %x 144 ret void 145} 146 147define void @splat_zero_v8i16(ptr %x) { 148; CHECK-LABEL: splat_zero_v8i16: 149; CHECK: # %bb.0: 150; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 151; CHECK-NEXT: vmv.v.i v8, 0 152; CHECK-NEXT: vse16.v v8, (a0) 153; CHECK-NEXT: ret 154 store <8 x i16> splat (i16 0), ptr %x 155 ret void 156} 157 158define void @splat_zero_v4i32(ptr %x) { 159; CHECK-LABEL: splat_zero_v4i32: 160; CHECK: # %bb.0: 161; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 162; CHECK-NEXT: vmv.v.i v8, 0 163; CHECK-NEXT: vse32.v v8, (a0) 164; CHECK-NEXT: ret 165 store <4 x i32> splat (i32 0), ptr %x 166 ret void 167} 168 169define void @splat_zero_v2i64(ptr %x) { 170; CHECK-LABEL: splat_zero_v2i64: 171; CHECK: # %bb.0: 172; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 173; CHECK-NEXT: vmv.v.i v8, 0 174; CHECK-NEXT: vse64.v v8, (a0) 175; CHECK-NEXT: ret 176 store <2 x i64> splat (i64 0), ptr %x 177 ret void 178} 179 180define void @splat_zero_v32i8(ptr %x) { 181; CHECK-LABEL: splat_zero_v32i8: 182; CHECK: # %bb.0: 183; CHECK-NEXT: li a1, 32 184; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma 185; CHECK-NEXT: vmv.v.i v8, 0 186; CHECK-NEXT: vse8.v v8, (a0) 187; CHECK-NEXT: ret 188 store <32 x i8> splat (i8 0), ptr %x 189 ret void 190} 191 192define void @splat_zero_v16i16(ptr %x) { 193; CHECK-LABEL: splat_zero_v16i16: 194; CHECK: # %bb.0: 195; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 196; CHECK-NEXT: vmv.v.i v8, 0 197; CHECK-NEXT: vse16.v v8, (a0) 198; CHECK-NEXT: ret 199 store <16 x i16> splat (i16 0), ptr %x 200 ret void 201} 202 203define void @splat_zero_v8i32(ptr %x) { 204; CHECK-LABEL: splat_zero_v8i32: 205; CHECK: # %bb.0: 206; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 207; CHECK-NEXT: vmv.v.i v8, 0 208; CHECK-NEXT: vse32.v v8, (a0) 209; CHECK-NEXT: ret 210 store <8 x i32> splat (i32 0), ptr %x 211 ret void 212} 213 214define void @splat_zero_v4i64(ptr %x) { 215; CHECK-LABEL: splat_zero_v4i64: 216; CHECK: # %bb.0: 217; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 218; CHECK-NEXT: vmv.v.i v8, 0 219; CHECK-NEXT: vse64.v v8, (a0) 220; CHECK-NEXT: ret 221 store <4 x i64> splat (i64 0), ptr %x 222 ret void 223} 224 225define void @splat_zero_v2i16(ptr %p) { 226; CHECK-LABEL: splat_zero_v2i16: 227; CHECK: # %bb.0: 228; CHECK-NEXT: sw zero, 0(a0) 229; CHECK-NEXT: ret 230 store <2 x i16> zeroinitializer, ptr %p 231 ret void 232} 233 234define void @splat_zero_v2i16_unaligned(ptr %p) { 235; CHECK-LABEL: splat_zero_v2i16_unaligned: 236; CHECK: # %bb.0: 237; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 238; CHECK-NEXT: vmv.v.i v8, 0 239; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 240; CHECK-NEXT: vse8.v v8, (a0) 241; CHECK-NEXT: ret 242 store <2 x i16> zeroinitializer, ptr %p, align 1 243 ret void 244} 245 246define void @splat_zero_v4i16(ptr %p) { 247; RV32-LABEL: splat_zero_v4i16: 248; RV32: # %bb.0: 249; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 250; RV32-NEXT: vmv.v.i v8, 0 251; RV32-NEXT: vse16.v v8, (a0) 252; RV32-NEXT: ret 253; 254; RV64-LABEL: splat_zero_v4i16: 255; RV64: # %bb.0: 256; RV64-NEXT: sd zero, 0(a0) 257; RV64-NEXT: ret 258 store <4 x i16> zeroinitializer, ptr %p 259 ret void 260} 261 262define void @splat_zero_v2i32(ptr %p) { 263; RV32-LABEL: splat_zero_v2i32: 264; RV32: # %bb.0: 265; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 266; RV32-NEXT: vmv.v.i v8, 0 267; RV32-NEXT: vse32.v v8, (a0) 268; RV32-NEXT: ret 269; 270; RV64-LABEL: splat_zero_v2i32: 271; RV64: # %bb.0: 272; RV64-NEXT: sd zero, 0(a0) 273; RV64-NEXT: ret 274 store <2 x i32> zeroinitializer, ptr %p 275 ret void 276} 277 278; Not a power of two and requires more than two scalar stores. 279define void @splat_zero_v7i16(ptr %p) { 280; CHECK-LABEL: splat_zero_v7i16: 281; CHECK: # %bb.0: 282; CHECK-NEXT: vsetivli zero, 7, e16, m1, ta, ma 283; CHECK-NEXT: vmv.v.i v8, 0 284; CHECK-NEXT: vse16.v v8, (a0) 285; CHECK-NEXT: ret 286 store <7 x i16> zeroinitializer, ptr %p 287 ret void 288} 289 290define void @splat_allones_v16i8(ptr %x) { 291; CHECK-LABEL: splat_allones_v16i8: 292; CHECK: # %bb.0: 293; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 294; CHECK-NEXT: vmv.v.i v8, -1 295; CHECK-NEXT: vse8.v v8, (a0) 296; CHECK-NEXT: ret 297 store <16 x i8> splat (i8 -1), ptr %x 298 ret void 299} 300 301define void @splat_allones_v8i16(ptr %x) { 302; CHECK-LABEL: splat_allones_v8i16: 303; CHECK: # %bb.0: 304; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 305; CHECK-NEXT: vmv.v.i v8, -1 306; CHECK-NEXT: vse16.v v8, (a0) 307; CHECK-NEXT: ret 308 store <8 x i16> splat (i16 -1), ptr %x 309 ret void 310} 311 312define void @splat_allones_v4i32(ptr %x) { 313; CHECK-LABEL: splat_allones_v4i32: 314; CHECK: # %bb.0: 315; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 316; CHECK-NEXT: vmv.v.i v8, -1 317; CHECK-NEXT: vse32.v v8, (a0) 318; CHECK-NEXT: ret 319 store <4 x i32> splat (i32 -1), ptr %x 320 ret void 321} 322 323define void @splat_allones_v2i64(ptr %x) { 324; CHECK-LABEL: splat_allones_v2i64: 325; CHECK: # %bb.0: 326; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 327; CHECK-NEXT: vmv.v.i v8, -1 328; CHECK-NEXT: vse64.v v8, (a0) 329; CHECK-NEXT: ret 330 store <2 x i64> splat (i64 -1), ptr %x 331 ret void 332} 333 334define void @splat_allones_v32i8(ptr %x) { 335; CHECK-LABEL: splat_allones_v32i8: 336; CHECK: # %bb.0: 337; CHECK-NEXT: li a1, 32 338; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma 339; CHECK-NEXT: vmv.v.i v8, -1 340; CHECK-NEXT: vse8.v v8, (a0) 341; CHECK-NEXT: ret 342 store <32 x i8> splat (i8 -1), ptr %x 343 ret void 344} 345 346define void @splat_allones_v16i16(ptr %x) { 347; CHECK-LABEL: splat_allones_v16i16: 348; CHECK: # %bb.0: 349; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 350; CHECK-NEXT: vmv.v.i v8, -1 351; CHECK-NEXT: vse16.v v8, (a0) 352; CHECK-NEXT: ret 353 store <16 x i16> splat (i16 -1), ptr %x 354 ret void 355} 356 357define void @splat_allones_v8i32(ptr %x) { 358; CHECK-LABEL: splat_allones_v8i32: 359; CHECK: # %bb.0: 360; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 361; CHECK-NEXT: vmv.v.i v8, -1 362; CHECK-NEXT: vse32.v v8, (a0) 363; CHECK-NEXT: ret 364 store <8 x i32> splat (i32 -1), ptr %x 365 ret void 366} 367 368define void @splat_allones_v4i64(ptr %x) { 369; CHECK-LABEL: splat_allones_v4i64: 370; CHECK: # %bb.0: 371; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 372; CHECK-NEXT: vmv.v.i v8, -1 373; CHECK-NEXT: vse64.v v8, (a0) 374; CHECK-NEXT: ret 375 store <4 x i64> splat (i64 -1), ptr %x 376 ret void 377} 378 379; This requires a bitcast on RV32 due to type legalization rewriting the 380; build_vector to v8i32. 381; FIXME: We should prevent this and use the implicit sign extension of vmv.v.x 382; with SEW=64 on RV32. 383define void @splat_allones_with_use_v4i64(ptr %x) { 384; CHECK-LABEL: splat_allones_with_use_v4i64: 385; CHECK: # %bb.0: 386; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 387; CHECK-NEXT: vle64.v v8, (a0) 388; CHECK-NEXT: vadd.vi v8, v8, -1 389; CHECK-NEXT: vse64.v v8, (a0) 390; CHECK-NEXT: ret 391 %a = load <4 x i64>, ptr %x 392 %b = add <4 x i64> %a, <i64 -1, i64 -1, i64 -1, i64 -1> 393 store <4 x i64> %b, ptr %x 394 ret void 395} 396 397; This test used to crash at LMUL=8 when inserting a v16i64 subvector into 398; nxv8i64 at index 0: the v16i64 type was used to get the LMUL, the size of 399; which exceeded maximum-expected size of 512. The scalable container type of 400; nxv8i64 should have been used instead. 401define void @vadd_vx_v16i64(ptr %a, i64 %b, ptr %c) { 402; RV32-LABEL: vadd_vx_v16i64: 403; RV32: # %bb.0: 404; RV32-NEXT: addi sp, sp, -16 405; RV32-NEXT: .cfi_def_cfa_offset 16 406; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 407; RV32-NEXT: vle64.v v8, (a0) 408; RV32-NEXT: sw a1, 8(sp) 409; RV32-NEXT: sw a2, 12(sp) 410; RV32-NEXT: addi a0, sp, 8 411; RV32-NEXT: vlse64.v v16, (a0), zero 412; RV32-NEXT: vadd.vv v8, v8, v16 413; RV32-NEXT: vse64.v v8, (a3) 414; RV32-NEXT: addi sp, sp, 16 415; RV32-NEXT: ret 416; 417; RV64-LABEL: vadd_vx_v16i64: 418; RV64: # %bb.0: 419; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 420; RV64-NEXT: vle64.v v8, (a0) 421; RV64-NEXT: vadd.vx v8, v8, a1 422; RV64-NEXT: vse64.v v8, (a2) 423; RV64-NEXT: ret 424 %va = load <16 x i64>, ptr %a 425 %head = insertelement <16 x i64> poison, i64 %b, i32 0 426 %splat = shufflevector <16 x i64> %head, <16 x i64> poison, <16 x i32> zeroinitializer 427 %vc = add <16 x i64> %va, %splat 428 store <16 x i64> %vc, ptr %c 429 ret void 430} 431