1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZVFH 3; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZVFH 4; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZVFHMIN,ZVFHMINRV32 5; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZVFHMIN,ZVFHMINRV64 6 7define <4 x i32> @insertelt_v4i32_0(<4 x i32> %a, i32 %y) { 8; CHECK-LABEL: insertelt_v4i32_0: 9; CHECK: # %bb.0: 10; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma 11; CHECK-NEXT: vmv.s.x v8, a0 12; CHECK-NEXT: ret 13 %b = insertelement <4 x i32> %a, i32 %y, i32 0 14 ret <4 x i32> %b 15} 16 17define <4 x i32> @insertelt_v4i32_3(<4 x i32> %a, i32 %y) { 18; CHECK-LABEL: insertelt_v4i32_3: 19; CHECK: # %bb.0: 20; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 21; CHECK-NEXT: vmv.s.x v9, a0 22; CHECK-NEXT: vslideup.vi v8, v9, 3 23; CHECK-NEXT: ret 24 %b = insertelement <4 x i32> %a, i32 %y, i32 3 25 ret <4 x i32> %b 26} 27 28define <4 x i32> @insertelt_v4i32_idx(<4 x i32> %a, i32 %y, i32 zeroext %idx) { 29; CHECK-LABEL: insertelt_v4i32_idx: 30; CHECK: # %bb.0: 31; CHECK-NEXT: addi a2, a1, 1 32; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 33; CHECK-NEXT: vmv.s.x v9, a0 34; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, ma 35; CHECK-NEXT: vslideup.vx v8, v9, a1 36; CHECK-NEXT: ret 37 %b = insertelement <4 x i32> %a, i32 %y, i32 %idx 38 ret <4 x i32> %b 39} 40 41define <32 x i32> @insertelt_v32i32_0(<32 x i32> %a, i32 %y) { 42; CHECK-LABEL: insertelt_v32i32_0: 43; CHECK: # %bb.0: 44; CHECK-NEXT: li a1, 32 45; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma 46; CHECK-NEXT: vmv.s.x v8, a0 47; CHECK-NEXT: ret 48 %b = insertelement <32 x i32> %a, i32 %y, i32 0 49 ret <32 x i32> %b 50} 51 52define <32 x i32> @insertelt_v32i32_4(<32 x i32> %a, i32 %y) { 53; CHECK-LABEL: insertelt_v32i32_4: 54; CHECK: # %bb.0: 55; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma 56; CHECK-NEXT: vmv.s.x v16, a0 57; CHECK-NEXT: vslideup.vi v8, v16, 4 58; CHECK-NEXT: ret 59 %b = insertelement <32 x i32> %a, i32 %y, i32 4 60 ret <32 x i32> %b 61} 62 63define <32 x i32> @insertelt_v32i32_31(<32 x i32> %a, i32 %y) { 64; CHECK-LABEL: insertelt_v32i32_31: 65; CHECK: # %bb.0: 66; CHECK-NEXT: li a1, 32 67; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma 68; CHECK-NEXT: vmv.s.x v16, a0 69; CHECK-NEXT: vslideup.vi v8, v16, 31 70; CHECK-NEXT: ret 71 %b = insertelement <32 x i32> %a, i32 %y, i32 31 72 ret <32 x i32> %b 73} 74 75define <32 x i32> @insertelt_v32i32_idx(<32 x i32> %a, i32 %y, i32 zeroext %idx) { 76; CHECK-LABEL: insertelt_v32i32_idx: 77; CHECK: # %bb.0: 78; CHECK-NEXT: li a2, 32 79; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 80; CHECK-NEXT: vmv.s.x v16, a0 81; CHECK-NEXT: addi a0, a1, 1 82; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma 83; CHECK-NEXT: vslideup.vx v8, v16, a1 84; CHECK-NEXT: ret 85 %b = insertelement <32 x i32> %a, i32 %y, i32 %idx 86 ret <32 x i32> %b 87} 88 89define <64 x i32> @insertelt_v64i32_0(<64 x i32> %a, i32 %y) { 90; CHECK-LABEL: insertelt_v64i32_0: 91; CHECK: # %bb.0: 92; CHECK-NEXT: li a1, 32 93; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma 94; CHECK-NEXT: vmv.s.x v8, a0 95; CHECK-NEXT: ret 96 %b = insertelement <64 x i32> %a, i32 %y, i32 0 97 ret <64 x i32> %b 98} 99 100define <64 x i32> @insertelt_v64i32_63(<64 x i32> %a, i32 %y) { 101; CHECK-LABEL: insertelt_v64i32_63: 102; CHECK: # %bb.0: 103; CHECK-NEXT: li a1, 32 104; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma 105; CHECK-NEXT: vmv.s.x v24, a0 106; CHECK-NEXT: vslideup.vi v16, v24, 31 107; CHECK-NEXT: ret 108 %b = insertelement <64 x i32> %a, i32 %y, i32 63 109 ret <64 x i32> %b 110} 111 112define <64 x i32> @insertelt_v64i32_idx(<64 x i32> %a, i32 %y, i32 zeroext %idx) { 113; RV32-LABEL: insertelt_v64i32_idx: 114; RV32: # %bb.0: 115; RV32-NEXT: addi sp, sp, -384 116; RV32-NEXT: .cfi_def_cfa_offset 384 117; RV32-NEXT: sw ra, 380(sp) # 4-byte Folded Spill 118; RV32-NEXT: sw s0, 376(sp) # 4-byte Folded Spill 119; RV32-NEXT: .cfi_offset ra, -4 120; RV32-NEXT: .cfi_offset s0, -8 121; RV32-NEXT: addi s0, sp, 384 122; RV32-NEXT: .cfi_def_cfa s0, 0 123; RV32-NEXT: andi sp, sp, -128 124; RV32-NEXT: andi a1, a1, 63 125; RV32-NEXT: mv a2, sp 126; RV32-NEXT: addi a3, sp, 128 127; RV32-NEXT: li a4, 32 128; RV32-NEXT: slli a1, a1, 2 129; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma 130; RV32-NEXT: vse32.v v16, (a3) 131; RV32-NEXT: vse32.v v8, (a2) 132; RV32-NEXT: add a1, a2, a1 133; RV32-NEXT: sw a0, 0(a1) 134; RV32-NEXT: vle32.v v8, (a2) 135; RV32-NEXT: vle32.v v16, (a3) 136; RV32-NEXT: addi sp, s0, -384 137; RV32-NEXT: .cfi_def_cfa sp, 384 138; RV32-NEXT: lw ra, 380(sp) # 4-byte Folded Reload 139; RV32-NEXT: lw s0, 376(sp) # 4-byte Folded Reload 140; RV32-NEXT: .cfi_restore ra 141; RV32-NEXT: .cfi_restore s0 142; RV32-NEXT: addi sp, sp, 384 143; RV32-NEXT: .cfi_def_cfa_offset 0 144; RV32-NEXT: ret 145; 146; RV64-LABEL: insertelt_v64i32_idx: 147; RV64: # %bb.0: 148; RV64-NEXT: addi sp, sp, -384 149; RV64-NEXT: .cfi_def_cfa_offset 384 150; RV64-NEXT: sd ra, 376(sp) # 8-byte Folded Spill 151; RV64-NEXT: sd s0, 368(sp) # 8-byte Folded Spill 152; RV64-NEXT: .cfi_offset ra, -8 153; RV64-NEXT: .cfi_offset s0, -16 154; RV64-NEXT: addi s0, sp, 384 155; RV64-NEXT: .cfi_def_cfa s0, 0 156; RV64-NEXT: andi sp, sp, -128 157; RV64-NEXT: andi a1, a1, 63 158; RV64-NEXT: mv a2, sp 159; RV64-NEXT: addi a3, sp, 128 160; RV64-NEXT: li a4, 32 161; RV64-NEXT: slli a1, a1, 2 162; RV64-NEXT: vsetvli zero, a4, e32, m8, ta, ma 163; RV64-NEXT: vse32.v v16, (a3) 164; RV64-NEXT: vse32.v v8, (a2) 165; RV64-NEXT: add a1, a2, a1 166; RV64-NEXT: sw a0, 0(a1) 167; RV64-NEXT: vle32.v v8, (a2) 168; RV64-NEXT: vle32.v v16, (a3) 169; RV64-NEXT: addi sp, s0, -384 170; RV64-NEXT: .cfi_def_cfa sp, 384 171; RV64-NEXT: ld ra, 376(sp) # 8-byte Folded Reload 172; RV64-NEXT: ld s0, 368(sp) # 8-byte Folded Reload 173; RV64-NEXT: .cfi_restore ra 174; RV64-NEXT: .cfi_restore s0 175; RV64-NEXT: addi sp, sp, 384 176; RV64-NEXT: .cfi_def_cfa_offset 0 177; RV64-NEXT: ret 178 %b = insertelement <64 x i32> %a, i32 %y, i32 %idx 179 ret <64 x i32> %b 180} 181 182; FIXME: This codegen needs to be improved. These tests previously asserted 183; type legalizing the i64 type on RV32. 184 185define <4 x i64> @insertelt_v4i64(<4 x i64> %a, i64 %y) { 186; RV32-LABEL: insertelt_v4i64: 187; RV32: # %bb.0: 188; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma 189; RV32-NEXT: vslide1down.vx v10, v8, a0 190; RV32-NEXT: vslide1down.vx v10, v10, a1 191; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma 192; RV32-NEXT: vslideup.vi v8, v10, 3 193; RV32-NEXT: ret 194; 195; RV64-LABEL: insertelt_v4i64: 196; RV64: # %bb.0: 197; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma 198; RV64-NEXT: vmv.s.x v10, a0 199; RV64-NEXT: vslideup.vi v8, v10, 3 200; RV64-NEXT: ret 201 %b = insertelement <4 x i64> %a, i64 %y, i32 3 202 ret <4 x i64> %b 203} 204 205define void @insertelt_v4i64_store(ptr %x, i64 %y) { 206; RV32-LABEL: insertelt_v4i64_store: 207; RV32: # %bb.0: 208; RV32-NEXT: sw a1, 24(a0) 209; RV32-NEXT: sw a2, 28(a0) 210; RV32-NEXT: ret 211; 212; RV64-LABEL: insertelt_v4i64_store: 213; RV64: # %bb.0: 214; RV64-NEXT: sd a1, 24(a0) 215; RV64-NEXT: ret 216 %a = load <4 x i64>, ptr %x 217 %b = insertelement <4 x i64> %a, i64 %y, i32 3 218 store <4 x i64> %b, ptr %x 219 ret void 220} 221 222; This uses a non-power of 2 type so that it isn't an MVT. 223; The align keeps the type legalizer from using a 256 bit load so we must split 224; it. This some operations that weren't support for scalable vectors when 225; this test was written. 226define <3 x i64> @insertelt_v3i64(<3 x i64> %a, i64 %y) { 227; RV32-LABEL: insertelt_v3i64: 228; RV32: # %bb.0: 229; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma 230; RV32-NEXT: vslidedown.vi v9, v8, 3 231; RV32-NEXT: vslidedown.vi v10, v8, 2 232; RV32-NEXT: vmv.x.s a2, v8 233; RV32-NEXT: vslidedown.vi v8, v8, 1 234; RV32-NEXT: vmv.x.s a3, v9 235; RV32-NEXT: vmv.x.s a4, v10 236; RV32-NEXT: vmv.x.s a5, v8 237; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 238; RV32-NEXT: vmv.v.x v8, a2 239; RV32-NEXT: vslide1down.vx v8, v8, a5 240; RV32-NEXT: vslide1down.vx v8, v8, a4 241; RV32-NEXT: vslide1down.vx v8, v8, a3 242; RV32-NEXT: vslide1down.vx v8, v8, a0 243; RV32-NEXT: vslide1down.vx v8, v8, a1 244; RV32-NEXT: vslidedown.vi v8, v8, 2 245; RV32-NEXT: ret 246; 247; RV64-LABEL: insertelt_v3i64: 248; RV64: # %bb.0: 249; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma 250; RV64-NEXT: vslidedown.vi v9, v8, 1 251; RV64-NEXT: vmv.x.s a1, v8 252; RV64-NEXT: vmv.x.s a2, v9 253; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma 254; RV64-NEXT: vmv.v.x v8, a1 255; RV64-NEXT: vslide1down.vx v8, v8, a2 256; RV64-NEXT: vslide1down.vx v8, v8, a0 257; RV64-NEXT: vslidedown.vi v8, v8, 1 258; RV64-NEXT: ret 259 %b = insertelement <3 x i64> %a, i64 %y, i32 2 260 ret <3 x i64> %b 261} 262 263define void @insertelt_v3i64_store(ptr %x, i64 %y) { 264; RV32-LABEL: insertelt_v3i64_store: 265; RV32: # %bb.0: 266; RV32-NEXT: sw a1, 16(a0) 267; RV32-NEXT: sw a2, 20(a0) 268; RV32-NEXT: ret 269; 270; RV64-LABEL: insertelt_v3i64_store: 271; RV64: # %bb.0: 272; RV64-NEXT: sd a1, 16(a0) 273; RV64-NEXT: ret 274 %a = load <3 x i64>, ptr %x, align 8 275 %b = insertelement <3 x i64> %a, i64 %y, i32 2 276 store <3 x i64> %b, ptr %x 277 ret void 278} 279 280define <16 x i8> @insertelt_v16i8(<16 x i8> %a, i8 %y) { 281; CHECK-LABEL: insertelt_v16i8: 282; CHECK: # %bb.0: 283; CHECK-NEXT: vsetivli zero, 15, e8, m1, tu, ma 284; CHECK-NEXT: vmv.s.x v9, a0 285; CHECK-NEXT: vslideup.vi v8, v9, 14 286; CHECK-NEXT: ret 287 %b = insertelement <16 x i8> %a, i8 %y, i32 14 288 ret <16 x i8> %b 289} 290 291define void @insertelt_v16i8_store(ptr %x, i8 %y) { 292; CHECK-LABEL: insertelt_v16i8_store: 293; CHECK: # %bb.0: 294; CHECK-NEXT: sb a1, 14(a0) 295; CHECK-NEXT: ret 296 %a = load <16 x i8>, ptr %x 297 %b = insertelement <16 x i8> %a, i8 %y, i32 14 298 store <16 x i8> %b, ptr %x 299 ret void 300} 301 302define <32 x i16> @insertelt_v32i16(<32 x i16> %a, i16 %y, i32 %idx) { 303; RV32-LABEL: insertelt_v32i16: 304; RV32: # %bb.0: 305; RV32-NEXT: li a2, 32 306; RV32-NEXT: vsetvli zero, a2, e16, m1, ta, ma 307; RV32-NEXT: vmv.s.x v12, a0 308; RV32-NEXT: addi a0, a1, 1 309; RV32-NEXT: vsetvli zero, a0, e16, m4, tu, ma 310; RV32-NEXT: vslideup.vx v8, v12, a1 311; RV32-NEXT: ret 312; 313; RV64-LABEL: insertelt_v32i16: 314; RV64: # %bb.0: 315; RV64-NEXT: li a2, 32 316; RV64-NEXT: slli a1, a1, 32 317; RV64-NEXT: vsetvli zero, a2, e16, m1, ta, ma 318; RV64-NEXT: vmv.s.x v12, a0 319; RV64-NEXT: srli a1, a1, 32 320; RV64-NEXT: addi a0, a1, 1 321; RV64-NEXT: vsetvli zero, a0, e16, m4, tu, ma 322; RV64-NEXT: vslideup.vx v8, v12, a1 323; RV64-NEXT: ret 324 %b = insertelement <32 x i16> %a, i16 %y, i32 %idx 325 ret <32 x i16> %b 326} 327 328define void @insertelt_v32i16_store(ptr %x, i16 %y, i32 %idx) { 329; CHECK-LABEL: insertelt_v32i16_store: 330; CHECK: # %bb.0: 331; CHECK-NEXT: andi a2, a2, 31 332; CHECK-NEXT: slli a2, a2, 1 333; CHECK-NEXT: add a0, a0, a2 334; CHECK-NEXT: sh a1, 0(a0) 335; CHECK-NEXT: ret 336 %a = load <32 x i16>, ptr %x 337 %b = insertelement <32 x i16> %a, i16 %y, i32 %idx 338 store <32 x i16> %b, ptr %x 339 ret void 340} 341 342define <8 x float> @insertelt_v8f32(<8 x float> %a, float %y, i32 %idx) { 343; RV32-LABEL: insertelt_v8f32: 344; RV32: # %bb.0: 345; RV32-NEXT: vsetivli zero, 8, e32, m1, ta, ma 346; RV32-NEXT: vfmv.s.f v10, fa0 347; RV32-NEXT: addi a1, a0, 1 348; RV32-NEXT: vsetvli zero, a1, e32, m2, tu, ma 349; RV32-NEXT: vslideup.vx v8, v10, a0 350; RV32-NEXT: ret 351; 352; RV64-LABEL: insertelt_v8f32: 353; RV64: # %bb.0: 354; RV64-NEXT: vsetivli zero, 8, e32, m1, ta, ma 355; RV64-NEXT: vfmv.s.f v10, fa0 356; RV64-NEXT: slli a0, a0, 32 357; RV64-NEXT: srli a0, a0, 32 358; RV64-NEXT: addi a1, a0, 1 359; RV64-NEXT: vsetvli zero, a1, e32, m2, tu, ma 360; RV64-NEXT: vslideup.vx v8, v10, a0 361; RV64-NEXT: ret 362 %b = insertelement <8 x float> %a, float %y, i32 %idx 363 ret <8 x float> %b 364} 365 366define void @insertelt_v8f32_store(ptr %x, float %y, i32 %idx) { 367; CHECK-LABEL: insertelt_v8f32_store: 368; CHECK: # %bb.0: 369; CHECK-NEXT: andi a1, a1, 7 370; CHECK-NEXT: slli a1, a1, 2 371; CHECK-NEXT: add a0, a0, a1 372; CHECK-NEXT: fsw fa0, 0(a0) 373; CHECK-NEXT: ret 374 %a = load <8 x float>, ptr %x 375 %b = insertelement <8 x float> %a, float %y, i32 %idx 376 store <8 x float> %b, ptr %x 377 ret void 378} 379 380define <8 x i64> @insertelt_v8i64_0(<8 x i64> %a, ptr %x) { 381; CHECK-LABEL: insertelt_v8i64_0: 382; CHECK: # %bb.0: 383; CHECK-NEXT: li a0, -1 384; CHECK-NEXT: vsetivli zero, 8, e64, m1, tu, ma 385; CHECK-NEXT: vmv.s.x v8, a0 386; CHECK-NEXT: ret 387 %b = insertelement <8 x i64> %a, i64 -1, i32 0 388 ret <8 x i64> %b 389} 390 391define void @insertelt_v8i64_0_store(ptr %x) { 392; RV32-LABEL: insertelt_v8i64_0_store: 393; RV32: # %bb.0: 394; RV32-NEXT: li a1, -1 395; RV32-NEXT: sw a1, 0(a0) 396; RV32-NEXT: sw a1, 4(a0) 397; RV32-NEXT: ret 398; 399; RV64-LABEL: insertelt_v8i64_0_store: 400; RV64: # %bb.0: 401; RV64-NEXT: li a1, -1 402; RV64-NEXT: sd a1, 0(a0) 403; RV64-NEXT: ret 404 %a = load <8 x i64>, ptr %x 405 %b = insertelement <8 x i64> %a, i64 -1, i32 0 406 store <8 x i64> %b, ptr %x 407 ret void 408} 409 410define <8 x i64> @insertelt_v8i64(<8 x i64> %a, i32 %idx) { 411; RV32-LABEL: insertelt_v8i64: 412; RV32: # %bb.0: 413; RV32-NEXT: vsetivli zero, 8, e64, m1, ta, ma 414; RV32-NEXT: vmv.v.i v12, -1 415; RV32-NEXT: addi a1, a0, 1 416; RV32-NEXT: vsetvli zero, a1, e64, m4, tu, ma 417; RV32-NEXT: vslideup.vx v8, v12, a0 418; RV32-NEXT: ret 419; 420; RV64-LABEL: insertelt_v8i64: 421; RV64: # %bb.0: 422; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma 423; RV64-NEXT: vmv.v.i v12, -1 424; RV64-NEXT: slli a0, a0, 32 425; RV64-NEXT: srli a0, a0, 32 426; RV64-NEXT: addi a1, a0, 1 427; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, ma 428; RV64-NEXT: vslideup.vx v8, v12, a0 429; RV64-NEXT: ret 430 %b = insertelement <8 x i64> %a, i64 -1, i32 %idx 431 ret <8 x i64> %b 432} 433 434define void @insertelt_v8i64_store(ptr %x, i32 %idx) { 435; RV32-LABEL: insertelt_v8i64_store: 436; RV32: # %bb.0: 437; RV32-NEXT: andi a1, a1, 7 438; RV32-NEXT: slli a1, a1, 3 439; RV32-NEXT: add a0, a0, a1 440; RV32-NEXT: li a1, -1 441; RV32-NEXT: sw a1, 0(a0) 442; RV32-NEXT: sw a1, 4(a0) 443; RV32-NEXT: ret 444; 445; RV64-LABEL: insertelt_v8i64_store: 446; RV64: # %bb.0: 447; RV64-NEXT: andi a1, a1, 7 448; RV64-NEXT: slli a1, a1, 3 449; RV64-NEXT: add a0, a0, a1 450; RV64-NEXT: li a1, -1 451; RV64-NEXT: sd a1, 0(a0) 452; RV64-NEXT: ret 453 %a = load <8 x i64>, ptr %x 454 %b = insertelement <8 x i64> %a, i64 -1, i32 %idx 455 store <8 x i64> %b, ptr %x 456 ret void 457} 458 459define <8 x i64> @insertelt_c6_v8i64_0(<8 x i64> %a, ptr %x) { 460; CHECK-LABEL: insertelt_c6_v8i64_0: 461; CHECK: # %bb.0: 462; CHECK-NEXT: li a0, 6 463; CHECK-NEXT: vsetivli zero, 8, e64, m1, tu, ma 464; CHECK-NEXT: vmv.s.x v8, a0 465; CHECK-NEXT: ret 466 %b = insertelement <8 x i64> %a, i64 6, i32 0 467 ret <8 x i64> %b 468} 469 470define void @insertelt_c6_v8i64_0_store(ptr %x) { 471; RV32-LABEL: insertelt_c6_v8i64_0_store: 472; RV32: # %bb.0: 473; RV32-NEXT: li a1, 6 474; RV32-NEXT: sw a1, 0(a0) 475; RV32-NEXT: sw zero, 4(a0) 476; RV32-NEXT: ret 477; 478; RV64-LABEL: insertelt_c6_v8i64_0_store: 479; RV64: # %bb.0: 480; RV64-NEXT: li a1, 6 481; RV64-NEXT: sd a1, 0(a0) 482; RV64-NEXT: ret 483 %a = load <8 x i64>, ptr %x 484 %b = insertelement <8 x i64> %a, i64 6, i32 0 485 store <8 x i64> %b, ptr %x 486 ret void 487} 488 489define <8 x i64> @insertelt_c6_v8i64(<8 x i64> %a, i32 %idx) { 490; RV32-LABEL: insertelt_c6_v8i64: 491; RV32: # %bb.0: 492; RV32-NEXT: vsetivli zero, 8, e64, m1, ta, ma 493; RV32-NEXT: vmv.v.i v12, 6 494; RV32-NEXT: addi a1, a0, 1 495; RV32-NEXT: vsetvli zero, a1, e64, m4, tu, ma 496; RV32-NEXT: vslideup.vx v8, v12, a0 497; RV32-NEXT: ret 498; 499; RV64-LABEL: insertelt_c6_v8i64: 500; RV64: # %bb.0: 501; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma 502; RV64-NEXT: vmv.v.i v12, 6 503; RV64-NEXT: slli a0, a0, 32 504; RV64-NEXT: srli a0, a0, 32 505; RV64-NEXT: addi a1, a0, 1 506; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, ma 507; RV64-NEXT: vslideup.vx v8, v12, a0 508; RV64-NEXT: ret 509 %b = insertelement <8 x i64> %a, i64 6, i32 %idx 510 ret <8 x i64> %b 511} 512 513define void @insertelt_c6_v8i64_store(ptr %x, i32 %idx) { 514; RV32-LABEL: insertelt_c6_v8i64_store: 515; RV32: # %bb.0: 516; RV32-NEXT: andi a1, a1, 7 517; RV32-NEXT: slli a1, a1, 3 518; RV32-NEXT: add a0, a0, a1 519; RV32-NEXT: li a1, 6 520; RV32-NEXT: sw a1, 0(a0) 521; RV32-NEXT: sw zero, 4(a0) 522; RV32-NEXT: ret 523; 524; RV64-LABEL: insertelt_c6_v8i64_store: 525; RV64: # %bb.0: 526; RV64-NEXT: andi a1, a1, 7 527; RV64-NEXT: slli a1, a1, 3 528; RV64-NEXT: add a0, a0, a1 529; RV64-NEXT: li a1, 6 530; RV64-NEXT: sd a1, 0(a0) 531; RV64-NEXT: ret 532 %a = load <8 x i64>, ptr %x 533 %b = insertelement <8 x i64> %a, i64 6, i32 %idx 534 store <8 x i64> %b, ptr %x 535 ret void 536} 537 538; Test that using a insertelement at element 0 by a later operation doesn't 539; crash the compiler. 540define void @insertelt_c6_v8i64_0_add(ptr %x, ptr %y) { 541; CHECK-LABEL: insertelt_c6_v8i64_0_add: 542; CHECK: # %bb.0: 543; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma 544; CHECK-NEXT: vle64.v v8, (a0) 545; CHECK-NEXT: vle64.v v12, (a1) 546; CHECK-NEXT: li a1, 6 547; CHECK-NEXT: vsetvli zero, zero, e64, m4, tu, ma 548; CHECK-NEXT: vmv.s.x v8, a1 549; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma 550; CHECK-NEXT: vadd.vv v8, v8, v12 551; CHECK-NEXT: vse64.v v8, (a0) 552; CHECK-NEXT: ret 553 %a = load <8 x i64>, ptr %x 554 %b = insertelement <8 x i64> %a, i64 6, i32 0 555 %c = load <8 x i64>, ptr %y 556 %d = add <8 x i64> %b, %c 557 store <8 x i64> %d, ptr %x 558 ret void 559} 560 561; The next batch of tests cover inserts into high LMUL vectors when the 562; exact VLEM is known. FIXME: These can directly access the sub-registers 563 564define <16 x i32> @insertelt_c0_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_range(2,2) { 565; CHECK-LABEL: insertelt_c0_v16xi32_exact: 566; CHECK: # %bb.0: 567; CHECK-NEXT: vsetivli zero, 16, e32, m1, tu, ma 568; CHECK-NEXT: vmv.s.x v8, a0 569; CHECK-NEXT: ret 570 %v = insertelement <16 x i32> %vin, i32 %a, i32 0 571 ret <16 x i32> %v 572} 573 574define <16 x i32> @insertelt_c1_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_range(2,2) { 575; CHECK-LABEL: insertelt_c1_v16xi32_exact: 576; CHECK: # %bb.0: 577; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma 578; CHECK-NEXT: vmv.s.x v12, a0 579; CHECK-NEXT: vslideup.vi v8, v12, 1 580; CHECK-NEXT: ret 581 %v = insertelement <16 x i32> %vin, i32 %a, i32 1 582 ret <16 x i32> %v 583} 584 585define <16 x i32> @insertelt_c2_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_range(2,2) { 586; CHECK-LABEL: insertelt_c2_v16xi32_exact: 587; CHECK: # %bb.0: 588; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma 589; CHECK-NEXT: vmv.s.x v12, a0 590; CHECK-NEXT: vslideup.vi v8, v12, 2 591; CHECK-NEXT: ret 592 %v = insertelement <16 x i32> %vin, i32 %a, i32 2 593 ret <16 x i32> %v 594} 595 596define <16 x i32> @insertelt_c3_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_range(2,2) { 597; CHECK-LABEL: insertelt_c3_v16xi32_exact: 598; CHECK: # %bb.0: 599; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma 600; CHECK-NEXT: vmv.s.x v12, a0 601; CHECK-NEXT: vslideup.vi v8, v12, 3 602; CHECK-NEXT: ret 603 %v = insertelement <16 x i32> %vin, i32 %a, i32 3 604 ret <16 x i32> %v 605} 606 607define <16 x i32> @insertelt_c12_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_range(2,2) { 608; CHECK-LABEL: insertelt_c12_v16xi32_exact: 609; CHECK: # %bb.0: 610; CHECK-NEXT: vsetivli zero, 16, e32, m1, tu, ma 611; CHECK-NEXT: vmv.s.x v11, a0 612; CHECK-NEXT: ret 613 %v = insertelement <16 x i32> %vin, i32 %a, i32 12 614 ret <16 x i32> %v 615} 616 617define <16 x i32> @insertelt_c13_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_range(2,2) { 618; CHECK-LABEL: insertelt_c13_v16xi32_exact: 619; CHECK: # %bb.0: 620; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma 621; CHECK-NEXT: vmv.s.x v12, a0 622; CHECK-NEXT: vslideup.vi v11, v12, 1 623; CHECK-NEXT: ret 624 %v = insertelement <16 x i32> %vin, i32 %a, i32 13 625 ret <16 x i32> %v 626} 627 628define <16 x i32> @insertelt_c14_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_range(2,2) { 629; CHECK-LABEL: insertelt_c14_v16xi32_exact: 630; CHECK: # %bb.0: 631; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma 632; CHECK-NEXT: vmv.s.x v12, a0 633; CHECK-NEXT: vslideup.vi v11, v12, 2 634; CHECK-NEXT: ret 635 %v = insertelement <16 x i32> %vin, i32 %a, i32 14 636 ret <16 x i32> %v 637} 638 639define <16 x i32> @insertelt_c15_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_range(2,2) { 640; CHECK-LABEL: insertelt_c15_v16xi32_exact: 641; CHECK: # %bb.0: 642; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma 643; CHECK-NEXT: vmv.s.x v12, a0 644; CHECK-NEXT: vslideup.vi v11, v12, 3 645; CHECK-NEXT: ret 646 %v = insertelement <16 x i32> %vin, i32 %a, i32 15 647 ret <16 x i32> %v 648} 649 650define <8 x i64> @insertelt_c4_v8xi64_exact(<8 x i64> %vin, i64 %a) vscale_range(2,2) { 651; RV32-LABEL: insertelt_c4_v8xi64_exact: 652; RV32: # %bb.0: 653; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma 654; RV32-NEXT: vslide1down.vx v10, v10, a0 655; RV32-NEXT: vslide1down.vx v10, v10, a1 656; RV32-NEXT: ret 657; 658; RV64-LABEL: insertelt_c4_v8xi64_exact: 659; RV64: # %bb.0: 660; RV64-NEXT: vsetivli zero, 8, e64, m1, tu, ma 661; RV64-NEXT: vmv.s.x v10, a0 662; RV64-NEXT: ret 663 %v = insertelement <8 x i64> %vin, i64 %a, i32 4 664 ret <8 x i64> %v 665} 666 667define <8 x i64> @insertelt_c5_v8xi64_exact(<8 x i64> %vin, i64 %a) vscale_range(2,2) { 668; RV32-LABEL: insertelt_c5_v8xi64_exact: 669; RV32: # %bb.0: 670; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma 671; RV32-NEXT: vslide1down.vx v12, v8, a0 672; RV32-NEXT: vslide1down.vx v12, v12, a1 673; RV32-NEXT: vsetivli zero, 2, e64, m1, tu, ma 674; RV32-NEXT: vslideup.vi v10, v12, 1 675; RV32-NEXT: ret 676; 677; RV64-LABEL: insertelt_c5_v8xi64_exact: 678; RV64: # %bb.0: 679; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma 680; RV64-NEXT: vmv.s.x v12, a0 681; RV64-NEXT: vslideup.vi v10, v12, 1 682; RV64-NEXT: ret 683 %v = insertelement <8 x i64> %vin, i64 %a, i32 5 684 ret <8 x i64> %v 685} 686 687define <4 x bfloat> @insertelt_v4bf16_0(<4 x bfloat> %a, bfloat %y) { 688; CHECK-LABEL: insertelt_v4bf16_0: 689; CHECK: # %bb.0: 690; CHECK-NEXT: fmv.x.h a0, fa0 691; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma 692; CHECK-NEXT: vmv.s.x v8, a0 693; CHECK-NEXT: ret 694 %b = insertelement <4 x bfloat> %a, bfloat %y, i32 0 695 ret <4 x bfloat> %b 696} 697 698define <4 x bfloat> @insertelt_v4bf16_3(<4 x bfloat> %a, bfloat %y) { 699; CHECK-LABEL: insertelt_v4bf16_3: 700; CHECK: # %bb.0: 701; CHECK-NEXT: fmv.x.h a0, fa0 702; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 703; CHECK-NEXT: vmv.s.x v9, a0 704; CHECK-NEXT: vslideup.vi v8, v9, 3 705; CHECK-NEXT: ret 706 %b = insertelement <4 x bfloat> %a, bfloat %y, i32 3 707 ret <4 x bfloat> %b 708} 709 710define <4 x bfloat> @insertelt_v4bf16_idx(<4 x bfloat> %a, bfloat %y, i32 zeroext %idx) { 711; CHECK-LABEL: insertelt_v4bf16_idx: 712; CHECK: # %bb.0: 713; CHECK-NEXT: addi a1, a0, 1 714; CHECK-NEXT: fmv.x.h a2, fa0 715; CHECK-NEXT: vsetivli zero, 4, e16, m1, ta, ma 716; CHECK-NEXT: vmv.s.x v9, a2 717; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma 718; CHECK-NEXT: vslideup.vx v8, v9, a0 719; CHECK-NEXT: ret 720 %b = insertelement <4 x bfloat> %a, bfloat %y, i32 %idx 721 ret <4 x bfloat> %b 722} 723 724define <4 x half> @insertelt_v4f16_0(<4 x half> %a, half %y) { 725; ZVFH-LABEL: insertelt_v4f16_0: 726; ZVFH: # %bb.0: 727; ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma 728; ZVFH-NEXT: vfmv.s.f v8, fa0 729; ZVFH-NEXT: ret 730; 731; ZVFHMIN-LABEL: insertelt_v4f16_0: 732; ZVFHMIN: # %bb.0: 733; ZVFHMIN-NEXT: fmv.x.h a0, fa0 734; ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma 735; ZVFHMIN-NEXT: vmv.s.x v8, a0 736; ZVFHMIN-NEXT: ret 737 %b = insertelement <4 x half> %a, half %y, i32 0 738 ret <4 x half> %b 739} 740 741define <4 x half> @insertelt_v4f16_3(<4 x half> %a, half %y) { 742; ZVFH-LABEL: insertelt_v4f16_3: 743; ZVFH: # %bb.0: 744; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 745; ZVFH-NEXT: vfmv.s.f v9, fa0 746; ZVFH-NEXT: vslideup.vi v8, v9, 3 747; ZVFH-NEXT: ret 748; 749; ZVFHMIN-LABEL: insertelt_v4f16_3: 750; ZVFHMIN: # %bb.0: 751; ZVFHMIN-NEXT: fmv.x.h a0, fa0 752; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 753; ZVFHMIN-NEXT: vmv.s.x v9, a0 754; ZVFHMIN-NEXT: vslideup.vi v8, v9, 3 755; ZVFHMIN-NEXT: ret 756 %b = insertelement <4 x half> %a, half %y, i32 3 757 ret <4 x half> %b 758} 759 760define <4 x half> @insertelt_v4f16_idx(<4 x half> %a, half %y, i32 zeroext %idx) { 761; ZVFH-LABEL: insertelt_v4f16_idx: 762; ZVFH: # %bb.0: 763; ZVFH-NEXT: addi a1, a0, 1 764; ZVFH-NEXT: vsetivli zero, 4, e16, m1, ta, ma 765; ZVFH-NEXT: vfmv.s.f v9, fa0 766; ZVFH-NEXT: vsetvli zero, a1, e16, mf2, tu, ma 767; ZVFH-NEXT: vslideup.vx v8, v9, a0 768; ZVFH-NEXT: ret 769; 770; ZVFHMIN-LABEL: insertelt_v4f16_idx: 771; ZVFHMIN: # %bb.0: 772; ZVFHMIN-NEXT: addi a1, a0, 1 773; ZVFHMIN-NEXT: fmv.x.h a2, fa0 774; ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, ta, ma 775; ZVFHMIN-NEXT: vmv.s.x v9, a2 776; ZVFHMIN-NEXT: vsetvli zero, a1, e16, mf2, tu, ma 777; ZVFHMIN-NEXT: vslideup.vx v8, v9, a0 778; ZVFHMIN-NEXT: ret 779 %b = insertelement <4 x half> %a, half %y, i32 %idx 780 ret <4 x half> %b 781} 782;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 783; ZVFHMINRV32: {{.*}} 784; ZVFHMINRV64: {{.*}} 785