; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32-ONLY ; RUN: llc -mtriple=riscv32 -mattr=+v,+zba,+zbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32VB ; RUN: llc -mtriple=riscv32 -mattr=+v,+zba,+zbb,+zbkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32VB-PACK ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RV64V-ONLY ; RUN: llc -mtriple=riscv64 -mattr=+v,+rva22u64 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RVA22U64 ; RUN: llc -mtriple=riscv64 -mattr=+v,+rva22u64,+zbkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RVA22U64-PACK ; RUN: llc -mtriple=riscv64 -mattr=+zve32x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32 define void @buildvec_vid_v16i8(ptr %x) { ; CHECK-LABEL: buildvec_vid_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret store <16 x i8> , ptr %x ret void } define void @buildvec_vid_undefelts_v16i8(ptr %x) { ; CHECK-LABEL: buildvec_vid_undefelts_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret store <16 x i8> , ptr %x ret void } ; TODO: Could do VID then insertelement on missing elements define void @buildvec_notquite_vid_v16i8(ptr %x) { ; CHECK-LABEL: buildvec_notquite_vid_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI2_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI2_0) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a1) ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret store <16 x i8> , ptr %x ret void } define void @buildvec_vid_plus_imm_v16i8(ptr %x) { ; CHECK-LABEL: buildvec_vid_plus_imm_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vadd.vi v8, v8, 2 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret store <16 x i8> , ptr %x ret void } define void @buildvec_vid_plus_nonimm_v16i8(ptr %x) { ; CHECK-LABEL: buildvec_vid_plus_nonimm_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI4_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI4_0) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a1) ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret store <16 x i8> , ptr %x ret void } define void @buildvec_vid_mpy_imm_v16i8(ptr %x) { ; CHECK-LABEL: buildvec_vid_mpy_imm_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: li a1, 3 ; CHECK-NEXT: vmul.vx v8, v8, a1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret store <16 x i8> , ptr %x ret void } define <4 x i8> @buildvec_vid_step2_add0_v4i8() { ; CHECK-LABEL: buildvec_vid_step2_add0_v4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: ret ret <4 x i8> } define <4 x i8> @buildvec_vid_step2_add0_v4i8_undef0() { ; CHECK-LABEL: buildvec_vid_step2_add0_v4i8_undef0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: ret ret <4 x i8> } define <4 x i8> @buildvec_vid_step2_add0_v4i8_undef1() { ; CHECK-LABEL: buildvec_vid_step2_add0_v4i8_undef1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: ret ret <4 x i8> } define <4 x i8> @buildvec_vid_step2_add0_v4i8_undef2() { ; CHECK-LABEL: buildvec_vid_step2_add0_v4i8_undef2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: ret ret <4 x i8> } define <4 x i8> @buildvec_vid_step2_add1_v4i8() { ; CHECK-LABEL: buildvec_vid_step2_add1_v4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: vadd.vi v8, v8, 1 ; CHECK-NEXT: ret ret <4 x i8> } define <4 x i8> @buildvec_vid_step2_add1_v4i8_undef0() { ; CHECK-LABEL: buildvec_vid_step2_add1_v4i8_undef0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: vadd.vi v8, v8, 1 ; CHECK-NEXT: ret ret <4 x i8> } define <4 x i8> @buildvec_vid_step2_add1_v4i8_undef1() { ; CHECK-LABEL: buildvec_vid_step2_add1_v4i8_undef1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: vadd.vi v8, v8, 1 ; CHECK-NEXT: ret ret <4 x i8> } define <4 x i8> @buildvec_vid_step2_add1_v4i8_undef2() { ; CHECK-LABEL: buildvec_vid_step2_add1_v4i8_undef2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: vadd.vi v8, v8, 1 ; CHECK-NEXT: ret ret <4 x i8> } define <4 x i8> @buildvec_vid_stepn1_add0_v4i8() { ; CHECK-LABEL: buildvec_vid_stepn1_add0_v4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vrsub.vi v8, v8, 0 ; CHECK-NEXT: ret ret <4 x i8> } define <4 x i8> @buildvec_vid_stepn1_add0_v4i8_undef0() { ; CHECK-LABEL: buildvec_vid_stepn1_add0_v4i8_undef0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vrsub.vi v8, v8, 0 ; CHECK-NEXT: ret ret <4 x i8> } define <4 x i8> @buildvec_vid_stepn1_add0_v4i8_undef1() { ; CHECK-LABEL: buildvec_vid_stepn1_add0_v4i8_undef1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vrsub.vi v8, v8, 0 ; CHECK-NEXT: ret ret <4 x i8> } define <4 x i8> @buildvec_vid_stepn1_add0_v4i8_undef2() { ; CHECK-LABEL: buildvec_vid_stepn1_add0_v4i8_undef2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vrsub.vi v8, v8, 0 ; CHECK-NEXT: ret ret <4 x i8> } define <4 x i8> @buildvec_vid_stepn2_add0_v4i8() { ; CHECK-LABEL: buildvec_vid_stepn2_add0_v4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: vrsub.vi v8, v8, 0 ; CHECK-NEXT: ret ret <4 x i8> } define <4 x i8> @buildvec_vid_stepn2_add0_v4i8_undef0() { ; CHECK-LABEL: buildvec_vid_stepn2_add0_v4i8_undef0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: vrsub.vi v8, v8, 0 ; CHECK-NEXT: ret ret <4 x i8> } define <4 x i8> @buildvec_vid_stepn2_add0_v4i8_undef1() { ; CHECK-LABEL: buildvec_vid_stepn2_add0_v4i8_undef1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: vrsub.vi v8, v8, 0 ; CHECK-NEXT: ret ret <4 x i8> } define <4 x i8> @buildvec_vid_stepn2_add0_v4i8_undef2() { ; CHECK-LABEL: buildvec_vid_stepn2_add0_v4i8_undef2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.i v8, -6 ; CHECK-NEXT: ret ret <4 x i8> } define <4 x i8> @buildvec_vid_stepn2_add3_v4i8() { ; CHECK-LABEL: buildvec_vid_stepn2_add3_v4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: vrsub.vi v8, v8, 3 ; CHECK-NEXT: ret ret <4 x i8> } define <4 x i8> @buildvec_vid_stepn3_add3_v4i8() { ; CHECK-LABEL: buildvec_vid_stepn3_add3_v4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.i v9, 3 ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: li a0, -3 ; CHECK-NEXT: vmadd.vx v8, a0, v9 ; CHECK-NEXT: ret ret <4 x i8> } define void @buildvec_vid_stepn3_addn3_v4i32(ptr %z0, ptr %z1, ptr %z2, ptr %z3) { ; CHECK-LABEL: buildvec_vid_stepn3_addn3_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v8, -3 ; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: li a4, -3 ; CHECK-NEXT: vmadd.vx v9, a4, v8 ; CHECK-NEXT: vse32.v v9, (a0) ; CHECK-NEXT: vse32.v v9, (a1) ; CHECK-NEXT: vse32.v v9, (a2) ; CHECK-NEXT: vse32.v v9, (a3) ; CHECK-NEXT: ret store <4 x i32> , ptr %z0 store <4 x i32> , ptr %z1 store <4 x i32> , ptr %z2 store <4 x i32> , ptr %z3 ret void } ; FIXME: RV32 doesn't catch this pattern due to BUILD_VECTOR legalization. define <4 x i64> @buildvec_vid_step1_add0_v4i64() { ; RV32-LABEL: buildvec_vid_step1_add0_v4i64: ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI25_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI25_0) ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vle8.v v10, (a0) ; RV32-NEXT: vsext.vf4 v8, v10 ; RV32-NEXT: ret ; ; RV64V-LABEL: buildvec_vid_step1_add0_v4i64: ; RV64V: # %bb.0: ; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64V-NEXT: vid.v v8 ; RV64V-NEXT: ret ; ; RV64ZVE32-LABEL: buildvec_vid_step1_add0_v4i64: ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: li a1, 3 ; RV64ZVE32-NEXT: li a2, 2 ; RV64ZVE32-NEXT: li a3, 1 ; RV64ZVE32-NEXT: sd zero, 0(a0) ; RV64ZVE32-NEXT: sd a3, 8(a0) ; RV64ZVE32-NEXT: sd a2, 16(a0) ; RV64ZVE32-NEXT: sd a1, 24(a0) ; RV64ZVE32-NEXT: ret ret <4 x i64> } define <4 x i64> @buildvec_vid_step2_add0_v4i64() { ; RV32-LABEL: buildvec_vid_step2_add0_v4i64: ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI26_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI26_0) ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vle8.v v10, (a0) ; RV32-NEXT: vsext.vf4 v8, v10 ; RV32-NEXT: ret ; ; RV64V-LABEL: buildvec_vid_step2_add0_v4i64: ; RV64V: # %bb.0: ; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64V-NEXT: vid.v v8 ; RV64V-NEXT: vadd.vv v8, v8, v8 ; RV64V-NEXT: ret ; ; RV64ZVE32-LABEL: buildvec_vid_step2_add0_v4i64: ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: li a1, 6 ; RV64ZVE32-NEXT: li a2, 4 ; RV64ZVE32-NEXT: li a3, 2 ; RV64ZVE32-NEXT: sd zero, 0(a0) ; RV64ZVE32-NEXT: sd a3, 8(a0) ; RV64ZVE32-NEXT: sd a2, 16(a0) ; RV64ZVE32-NEXT: sd a1, 24(a0) ; RV64ZVE32-NEXT: ret ret <4 x i64> } define <4 x i8> @buildvec_no_vid_v4i8_0() { ; CHECK-LABEL: buildvec_no_vid_v4i8_0: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, 28768 ; CHECK-NEXT: addi a0, a0, 769 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret ret <4 x i8> } define <4 x i8> @buildvec_no_vid_v4i8_1() { ; CHECK-LABEL: buildvec_no_vid_v4i8_1: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, 28752 ; CHECK-NEXT: addi a0, a0, 512 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret ret <4 x i8> } define <4 x i8> @buildvec_no_vid_v4i8_2() { ; CHECK-LABEL: buildvec_no_vid_v4i8_2: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, 32768 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret ret <4 x i8> } define <4 x i8> @buildvec_no_vid_v4i8_3() { ; CHECK-LABEL: buildvec_no_vid_v4i8_3: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, 28672 ; CHECK-NEXT: addi a0, a0, 255 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret ret <4 x i8> } define <4 x i8> @buildvec_no_vid_v4i8_4() { ; CHECK-LABEL: buildvec_no_vid_v4i8_4: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.i v8, -2 ; CHECK-NEXT: ret ret <4 x i8> } define <4 x i8> @buildvec_no_vid_v4i8_5() { ; CHECK-LABEL: buildvec_no_vid_v4i8_5: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, 1032144 ; CHECK-NEXT: addi a0, a0, -257 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret ret <4 x i8> } define void @buildvec_dominant0_v8i16(ptr %x) { ; CHECK-LABEL: buildvec_dominant0_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v8, zero ; CHECK-NEXT: vmv.v.i v9, 8 ; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vi v9, v8, 3 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v9, (a0) ; CHECK-NEXT: ret store <8 x i16> , ptr %x ret void } define void @buildvec_dominant0_v8i16_with_end_element(ptr %x) { ; CHECK-LABEL: buildvec_dominant0_v8i16_with_end_element: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v8, 8 ; CHECK-NEXT: li a1, 3 ; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret store <8 x i16> , ptr %x ret void } define void @buildvec_dominant0_v8i16_with_tail(ptr %x) { ; CHECK-LABEL: buildvec_dominant0_v8i16_with_tail: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI35_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI35_0) ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a1) ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret store <8 x i16> , ptr %x ret void } define void @buildvec_dominant1_v8i16(ptr %x) { ; CHECK-LABEL: buildvec_dominant1_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v8, 8 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret store <8 x i16> , ptr %x ret void } define <2 x i8> @buildvec_dominant0_v2i8() { ; CHECK-LABEL: buildvec_dominant0_v2i8: ; CHECK: # %bb.0: ; CHECK-NEXT: ret ret <2 x i8> } define <2 x i8> @buildvec_dominant1_v2i8() { ; RV32-LABEL: buildvec_dominant1_v2i8: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; RV32-NEXT: vmv.v.i v8, -1 ; RV32-NEXT: ret ; ; RV64V-LABEL: buildvec_dominant1_v2i8: ; RV64V: # %bb.0: ; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; RV64V-NEXT: vmv.v.i v8, -1 ; RV64V-NEXT: ret ; ; RV64ZVE32-LABEL: buildvec_dominant1_v2i8: ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32-NEXT: vmv.v.i v8, -1 ; RV64ZVE32-NEXT: ret ret <2 x i8> } define <2 x i8> @buildvec_dominant2_v2i8() { ; RV32-LABEL: buildvec_dominant2_v2i8: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; RV32-NEXT: vid.v v8 ; RV32-NEXT: vrsub.vi v8, v8, 0 ; RV32-NEXT: ret ; ; RV64V-LABEL: buildvec_dominant2_v2i8: ; RV64V: # %bb.0: ; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; RV64V-NEXT: vid.v v8 ; RV64V-NEXT: vrsub.vi v8, v8, 0 ; RV64V-NEXT: ret ; ; RV64ZVE32-LABEL: buildvec_dominant2_v2i8: ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32-NEXT: vid.v v8 ; RV64ZVE32-NEXT: vrsub.vi v8, v8, 0 ; RV64ZVE32-NEXT: ret ret <2 x i8> } define void @buildvec_dominant0_v2i32(ptr %x) { ; RV32-LABEL: buildvec_dominant0_v2i32: ; RV32: # %bb.0: ; RV32-NEXT: lui a1, %hi(.LCPI40_0) ; RV32-NEXT: addi a1, a1, %lo(.LCPI40_0) ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vle32.v v8, (a1) ; RV32-NEXT: vse32.v v8, (a0) ; RV32-NEXT: ret ; ; RV64V-LABEL: buildvec_dominant0_v2i32: ; RV64V: # %bb.0: ; RV64V-NEXT: lui a1, %hi(.LCPI40_0) ; RV64V-NEXT: ld a1, %lo(.LCPI40_0)(a1) ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64V-NEXT: vmv.v.i v8, -1 ; RV64V-NEXT: vsetvli zero, zero, e64, m1, tu, ma ; RV64V-NEXT: vmv.s.x v8, a1 ; RV64V-NEXT: vse64.v v8, (a0) ; RV64V-NEXT: ret ; ; RV64ZVE32-LABEL: buildvec_dominant0_v2i32: ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: lui a1, %hi(.LCPI40_0) ; RV64ZVE32-NEXT: ld a1, %lo(.LCPI40_0)(a1) ; RV64ZVE32-NEXT: li a2, -1 ; RV64ZVE32-NEXT: sd a1, 0(a0) ; RV64ZVE32-NEXT: sd a2, 8(a0) ; RV64ZVE32-NEXT: ret store <2 x i64> , ptr %x ret void } define void @buildvec_dominant1_optsize_v2i32(ptr %x) optsize { ; RV32-LABEL: buildvec_dominant1_optsize_v2i32: ; RV32: # %bb.0: ; RV32-NEXT: lui a1, %hi(.LCPI41_0) ; RV32-NEXT: addi a1, a1, %lo(.LCPI41_0) ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vle32.v v8, (a1) ; RV32-NEXT: vse32.v v8, (a0) ; RV32-NEXT: ret ; ; RV64V-LABEL: buildvec_dominant1_optsize_v2i32: ; RV64V: # %bb.0: ; RV64V-NEXT: lui a1, %hi(.LCPI41_0) ; RV64V-NEXT: addi a1, a1, %lo(.LCPI41_0) ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64V-NEXT: vle64.v v8, (a1) ; RV64V-NEXT: vse64.v v8, (a0) ; RV64V-NEXT: ret ; ; RV64ZVE32-LABEL: buildvec_dominant1_optsize_v2i32: ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: lui a1, %hi(.LCPI41_0) ; RV64ZVE32-NEXT: ld a1, %lo(.LCPI41_0)(a1) ; RV64ZVE32-NEXT: li a2, -1 ; RV64ZVE32-NEXT: sd a1, 0(a0) ; RV64ZVE32-NEXT: sd a2, 8(a0) ; RV64ZVE32-NEXT: ret store <2 x i64> , ptr %x ret void } define void @buildvec_seq_v8i8_v4i16(ptr %x) { ; CHECK-LABEL: buildvec_seq_v8i8_v4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 513 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret store <8 x i8> , ptr %x ret void } define void @buildvec_seq_v8i8_v2i32(ptr %x) { ; RV32-LABEL: buildvec_seq_v8i8_v2i32: ; RV32: # %bb.0: ; RV32-NEXT: lui a1, 48 ; RV32-NEXT: addi a1, a1, 513 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vmv.v.x v8, a1 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: ret ; ; RV64V-LABEL: buildvec_seq_v8i8_v2i32: ; RV64V: # %bb.0: ; RV64V-NEXT: lui a1, 48 ; RV64V-NEXT: addi a1, a1, 513 ; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64V-NEXT: vmv.v.x v8, a1 ; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64V-NEXT: vse8.v v8, (a0) ; RV64V-NEXT: ret ; ; RV64ZVE32-LABEL: buildvec_seq_v8i8_v2i32: ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: lui a1, 48 ; RV64ZVE32-NEXT: addi a1, a1, 513 ; RV64ZVE32-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32-NEXT: vmv.v.x v8, a1 ; RV64ZVE32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64ZVE32-NEXT: vse8.v v8, (a0) ; RV64ZVE32-NEXT: ret store <8 x i8> , ptr %x ret void } define void @buildvec_seq_v16i8_v2i64(ptr %x) { ; RV32-LABEL: buildvec_seq_v16i8_v2i64: ; RV32: # %bb.0: ; RV32-NEXT: lui a1, %hi(.LCPI44_0) ; RV32-NEXT: addi a1, a1, %lo(.LCPI44_0) ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-NEXT: vle8.v v8, (a1) ; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: ret ; ; RV64V-LABEL: buildvec_seq_v16i8_v2i64: ; RV64V: # %bb.0: ; RV64V-NEXT: lui a1, %hi(.LCPI44_0) ; RV64V-NEXT: ld a1, %lo(.LCPI44_0)(a1) ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64V-NEXT: vmv.v.x v8, a1 ; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64V-NEXT: vse8.v v8, (a0) ; RV64V-NEXT: ret ; ; RV64ZVE32-LABEL: buildvec_seq_v16i8_v2i64: ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: lui a1, %hi(.LCPI44_0) ; RV64ZVE32-NEXT: addi a1, a1, %lo(.LCPI44_0) ; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64ZVE32-NEXT: vle8.v v8, (a1) ; RV64ZVE32-NEXT: vse8.v v8, (a0) ; RV64ZVE32-NEXT: ret store <16 x i8> , ptr %x ret void } define void @buildvec_seq2_v16i8_v2i64(ptr %x) { ; RV32-LABEL: buildvec_seq2_v16i8_v2i64: ; RV32: # %bb.0: ; RV32-NEXT: lui a1, 528432 ; RV32-NEXT: addi a1, a1, 513 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vmv.v.x v8, a1 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: ret ; ; RV64V-LABEL: buildvec_seq2_v16i8_v2i64: ; RV64V: # %bb.0: ; RV64V-NEXT: lui a1, 528432 ; RV64V-NEXT: addiw a1, a1, 513 ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64V-NEXT: vmv.v.x v8, a1 ; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64V-NEXT: vse8.v v8, (a0) ; RV64V-NEXT: ret ; ; RV64ZVE32-LABEL: buildvec_seq2_v16i8_v2i64: ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: lui a1, %hi(.LCPI45_0) ; RV64ZVE32-NEXT: addi a1, a1, %lo(.LCPI45_0) ; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64ZVE32-NEXT: vle8.v v8, (a1) ; RV64ZVE32-NEXT: vse8.v v8, (a0) ; RV64ZVE32-NEXT: ret store <16 x i8> , ptr %x ret void } define void @buildvec_seq_v9i8(ptr %x) { ; CHECK-LABEL: buildvec_seq_v9i8: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 73 ; CHECK-NEXT: vsetivli zero, 9, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: li a1, 146 ; CHECK-NEXT: vmv.s.x v8, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v8, v9, 2, v0 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret store <9 x i8> , ptr %x ret void } define void @buildvec_seq_v4i16_v2i32(ptr %x) { ; CHECK-LABEL: buildvec_seq_v4i16_v2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, -127 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret store <4 x i16> , ptr %x ret void } define void @buildvec_vid_step1o2_v4i32(ptr %z0, ptr %z1, ptr %z2, ptr %z3, ptr %z4, ptr %z5, ptr %z6) { ; CHECK-LABEL: buildvec_vid_step1o2_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v8, 1 ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vsrl.vi v9, v9, 1 ; CHECK-NEXT: vse32.v v9, (a0) ; CHECK-NEXT: vse32.v v9, (a1) ; CHECK-NEXT: vse32.v v9, (a2) ; CHECK-NEXT: vse32.v v9, (a3) ; CHECK-NEXT: vse32.v v9, (a4) ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: li a0, 1 ; CHECK-NEXT: vslide1down.vx v9, v9, a0 ; CHECK-NEXT: vse32.v v8, (a5) ; CHECK-NEXT: vse32.v v9, (a6) ; CHECK-NEXT: ret store <4 x i32> , ptr %z0 store <4 x i32> , ptr %z1 store <4 x i32> , ptr %z2 store <4 x i32> , ptr %z3 store <4 x i32> , ptr %z4 ; We don't catch this one store <4 x i32> , ptr %z5 ; We catch this one but as VID/3 rather than VID/2 store <4 x i32> , ptr %z6 ret void } define void @buildvec_vid_step1o2_add3_v4i16(ptr %z0, ptr %z1, ptr %z2, ptr %z3, ptr %z4, ptr %z5, ptr %z6) { ; CHECK-LABEL: buildvec_vid_step1o2_add3_v4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vmv.v.i v9, 3 ; CHECK-NEXT: vsrl.vi v8, v8, 1 ; CHECK-NEXT: vadd.vi v8, v8, 3 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: vse16.v v8, (a1) ; CHECK-NEXT: vse16.v v8, (a2) ; CHECK-NEXT: vse16.v v8, (a3) ; CHECK-NEXT: vse16.v v8, (a4) ; CHECK-NEXT: vmv.v.i v8, 4 ; CHECK-NEXT: li a0, 4 ; CHECK-NEXT: vsetivli zero, 2, e16, mf2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vslide1down.vx v9, v9, a0 ; CHECK-NEXT: vse16.v v8, (a5) ; CHECK-NEXT: vse16.v v9, (a6) ; CHECK-NEXT: ret store <4 x i16> , ptr %z0 store <4 x i16> , ptr %z1 store <4 x i16> , ptr %z2 store <4 x i16> , ptr %z3 store <4 x i16> , ptr %z4 ; We don't catch this one store <4 x i16> , ptr %z5 ; We catch this one but as VID/3 rather than VID/2 store <4 x i16> , ptr %z6 ret void } define void @buildvec_vid_stepn1o4_addn5_v8i8(ptr %z0) { ; CHECK-LABEL: buildvec_vid_stepn1o4_addn5_v8i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vsrl.vi v8, v8, 2 ; CHECK-NEXT: vrsub.vi v8, v8, -5 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret store <8 x i8> , ptr %z0 ret void } define void @buildvec_vid_mpy_imm_v8i16(ptr %x) { ; CHECK-LABEL: buildvec_vid_mpy_imm_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: li a1, 17 ; CHECK-NEXT: vmul.vx v8, v8, a1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret store <8 x i16> , ptr %x ret void } define void @buildvec_vid_shl_imm_v8i16(ptr %x) { ; CHECK-LABEL: buildvec_vid_shl_imm_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vsll.vi v8, v8, 9 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret store <8 x i16> , ptr %x ret void } define <4 x i32> @splat_c3_v4i32(<4 x i32> %v) { ; CHECK-LABEL: splat_c3_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vrgather.vi v9, v8, 3 ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret %x = extractelement <4 x i32> %v, i32 3 %ins = insertelement <4 x i32> poison, i32 %x, i32 0 %splat = shufflevector <4 x i32> %ins, <4 x i32> poison, <4 x i32> zeroinitializer ret <4 x i32> %splat } define <4 x i32> @splat_idx_v4i32(<4 x i32> %v, i64 %idx) { ; CHECK-LABEL: splat_idx_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vrgather.vx v9, v8, a0 ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret %x = extractelement <4 x i32> %v, i64 %idx %ins = insertelement <4 x i32> poison, i32 %x, i32 0 %splat = shufflevector <4 x i32> %ins, <4 x i32> poison, <4 x i32> zeroinitializer ret <4 x i32> %splat } define <8 x i16> @splat_c4_v8i16(<8 x i16> %v) { ; CHECK-LABEL: splat_c4_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vrgather.vi v9, v8, 4 ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret %x = extractelement <8 x i16> %v, i32 4 %ins = insertelement <8 x i16> poison, i16 %x, i32 0 %splat = shufflevector <8 x i16> %ins, <8 x i16> poison, <8 x i32> zeroinitializer ret <8 x i16> %splat } define <8 x i16> @splat_idx_v8i16(<8 x i16> %v, i64 %idx) { ; CHECK-LABEL: splat_idx_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vrgather.vx v9, v8, a0 ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret %x = extractelement <8 x i16> %v, i64 %idx %ins = insertelement <8 x i16> poison, i16 %x, i32 0 %splat = shufflevector <8 x i16> %ins, <8 x i16> poison, <8 x i32> zeroinitializer ret <8 x i16> %splat } define <4 x i8> @buildvec_not_vid_v4i8_1() { ; CHECK-LABEL: buildvec_not_vid_v4i8_1: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, 12320 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret ret <4 x i8> } define <4 x i8> @buildvec_not_vid_v4i8_2() { ; CHECK-LABEL: buildvec_not_vid_v4i8_2: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, 16 ; CHECK-NEXT: addi a0, a0, 771 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret ret <4 x i8> } ; We match this as a VID sequence (-3 / 8) + 5 but choose not to introduce ; division to compute it. define <16 x i8> @buildvec_not_vid_v16i8() { ; CHECK-LABEL: buildvec_not_vid_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 7, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, 3 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vsetivli zero, 7, e8, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v9, 6 ; CHECK-NEXT: ret ret <16 x i8> } define <512 x i8> @buildvec_vid_v512i8_indices_overflow() vscale_range(16, 1024) { ; CHECK-LABEL: buildvec_vid_v512i8_indices_overflow: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 512 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: ret ret <512 x i8> } define <512 x i8> @buildvec_not_vid_v512i8_indices_overflow_1() vscale_range(16, 1024) { ; RV32-LABEL: buildvec_not_vid_v512i8_indices_overflow_1: ; RV32: # %bb.0: ; RV32-NEXT: li a0, 512 ; RV32-NEXT: vsetivli zero, 16, e32, mf2, ta, ma ; RV32-NEXT: vid.v v8 ; RV32-NEXT: vsrl.vi v8, v8, 3 ; RV32-NEXT: vadd.vi v0, v8, -1 ; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV32-NEXT: vmv.v.i v8, 1 ; RV32-NEXT: vmerge.vim v8, v8, 0, v0 ; RV32-NEXT: ret ; ; RV64V-LABEL: buildvec_not_vid_v512i8_indices_overflow_1: ; RV64V: # %bb.0: ; RV64V-NEXT: li a0, 512 ; RV64V-NEXT: vsetivli zero, 8, e64, m1, ta, ma ; RV64V-NEXT: vid.v v8 ; RV64V-NEXT: vsrl.vi v8, v8, 2 ; RV64V-NEXT: vadd.vi v0, v8, -1 ; RV64V-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV64V-NEXT: vmv.v.i v8, 1 ; RV64V-NEXT: vmerge.vim v8, v8, 0, v0 ; RV64V-NEXT: ret ; ; RV64ZVE32-LABEL: buildvec_not_vid_v512i8_indices_overflow_1: ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: li a0, 512 ; RV64ZVE32-NEXT: vsetivli zero, 16, e32, m1, ta, ma ; RV64ZVE32-NEXT: vid.v v8 ; RV64ZVE32-NEXT: vsrl.vi v8, v8, 3 ; RV64ZVE32-NEXT: vadd.vi v0, v8, -1 ; RV64ZVE32-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV64ZVE32-NEXT: vmv.v.i v8, 1 ; RV64ZVE32-NEXT: vmerge.vim v8, v8, 0, v0 ; RV64ZVE32-NEXT: ret ret <512 x i8> } define <512 x i8> @buildvec_not_vid_v512i8_indices_overflow_2() vscale_range(16, 1024) { ; RV32-LABEL: buildvec_not_vid_v512i8_indices_overflow_2: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e32, mf2, ta, ma ; RV32-NEXT: vmv.v.i v0, 15 ; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: li a0, 512 ; RV32-NEXT: li a1, 240 ; RV32-NEXT: vmv.s.x v8, a1 ; RV32-NEXT: li a1, 15 ; RV32-NEXT: vmerge.vim v10, v9, -1, v0 ; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV32-NEXT: vmv.v.i v12, 3 ; RV32-NEXT: slli a1, a1, 8 ; RV32-NEXT: vmv1r.v v0, v10 ; RV32-NEXT: vmerge.vim v12, v12, 0, v0 ; RV32-NEXT: vmv1r.v v0, v8 ; RV32-NEXT: vsetivli zero, 16, e32, mf2, ta, ma ; RV32-NEXT: vmerge.vim v10, v9, -1, v0 ; RV32-NEXT: vmv.s.x v8, a1 ; RV32-NEXT: vmv1r.v v0, v10 ; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV32-NEXT: vmerge.vim v12, v12, 1, v0 ; RV32-NEXT: vmv1r.v v0, v8 ; RV32-NEXT: vsetivli zero, 16, e32, mf2, ta, ma ; RV32-NEXT: vmerge.vim v8, v9, -1, v0 ; RV32-NEXT: vmv1r.v v0, v8 ; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV32-NEXT: vmerge.vim v8, v12, 2, v0 ; RV32-NEXT: ret ; ; RV64V-LABEL: buildvec_not_vid_v512i8_indices_overflow_2: ; RV64V: # %bb.0: ; RV64V-NEXT: vsetivli zero, 8, e64, m1, ta, ma ; RV64V-NEXT: vmv.v.i v0, 3 ; RV64V-NEXT: vmv.v.i v9, 0 ; RV64V-NEXT: li a0, 512 ; RV64V-NEXT: vmv.v.i v8, 12 ; RV64V-NEXT: li a1, 48 ; RV64V-NEXT: vmerge.vim v10, v9, -1, v0 ; RV64V-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV64V-NEXT: vmv.v.i v12, 3 ; RV64V-NEXT: vmv1r.v v0, v10 ; RV64V-NEXT: vmerge.vim v12, v12, 0, v0 ; RV64V-NEXT: vmv1r.v v0, v8 ; RV64V-NEXT: vsetivli zero, 8, e64, m1, ta, ma ; RV64V-NEXT: vmerge.vim v10, v9, -1, v0 ; RV64V-NEXT: vmv.s.x v8, a1 ; RV64V-NEXT: vmv.v.v v0, v10 ; RV64V-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV64V-NEXT: vmerge.vim v12, v12, 1, v0 ; RV64V-NEXT: vmv1r.v v0, v8 ; RV64V-NEXT: vsetivli zero, 8, e64, m1, ta, ma ; RV64V-NEXT: vmerge.vim v8, v9, -1, v0 ; RV64V-NEXT: vmv.v.v v0, v8 ; RV64V-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV64V-NEXT: vmerge.vim v8, v12, 2, v0 ; RV64V-NEXT: ret ; ; RV64ZVE32-LABEL: buildvec_not_vid_v512i8_indices_overflow_2: ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: vsetivli zero, 16, e32, m1, ta, ma ; RV64ZVE32-NEXT: vmv.v.i v0, 15 ; RV64ZVE32-NEXT: vmv.v.i v9, 0 ; RV64ZVE32-NEXT: li a0, 512 ; RV64ZVE32-NEXT: li a1, 240 ; RV64ZVE32-NEXT: vmv.s.x v8, a1 ; RV64ZVE32-NEXT: li a1, 15 ; RV64ZVE32-NEXT: vmerge.vim v10, v9, -1, v0 ; RV64ZVE32-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV64ZVE32-NEXT: vmv.v.i v12, 3 ; RV64ZVE32-NEXT: slli a1, a1, 8 ; RV64ZVE32-NEXT: vmv1r.v v0, v10 ; RV64ZVE32-NEXT: vmerge.vim v12, v12, 0, v0 ; RV64ZVE32-NEXT: vmv1r.v v0, v8 ; RV64ZVE32-NEXT: vsetivli zero, 16, e32, m1, ta, ma ; RV64ZVE32-NEXT: vmerge.vim v10, v9, -1, v0 ; RV64ZVE32-NEXT: vmv.s.x v8, a1 ; RV64ZVE32-NEXT: vmv.v.v v0, v10 ; RV64ZVE32-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV64ZVE32-NEXT: vmerge.vim v12, v12, 1, v0 ; RV64ZVE32-NEXT: vmv1r.v v0, v8 ; RV64ZVE32-NEXT: vsetivli zero, 16, e32, m1, ta, ma ; RV64ZVE32-NEXT: vmerge.vim v8, v9, -1, v0 ; RV64ZVE32-NEXT: vmv.v.v v0, v8 ; RV64ZVE32-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV64ZVE32-NEXT: vmerge.vim v8, v12, 2, v0 ; RV64ZVE32-NEXT: ret ret <512 x i8> } define <8 x i32> @prefix_overwrite(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-LABEL: prefix_overwrite: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v10, a1 ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 1 ; CHECK-NEXT: vmv.s.x v10, a2 ; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 2 ; CHECK-NEXT: vmv.s.x v10, a3 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret %v0 = insertelement <8 x i32> %vin, i32 %a, i32 0 %v1 = insertelement <8 x i32> %v0, i32 %b, i32 1 %v2 = insertelement <8 x i32> %v1, i32 %c, i32 2 %v3 = insertelement <8 x i32> %v2, i32 %d, i32 3 ret <8 x i32> %v3 } define <8 x i32> @suffix_overwrite(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-LABEL: suffix_overwrite: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vslideup.vi v8, v10, 4 ; CHECK-NEXT: vmv.s.x v10, a1 ; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 5 ; CHECK-NEXT: vmv.s.x v10, a2 ; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 6 ; CHECK-NEXT: vmv.s.x v10, a3 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vslideup.vi v8, v10, 7 ; CHECK-NEXT: ret %v0 = insertelement <8 x i32> %vin, i32 %a, i32 4 %v1 = insertelement <8 x i32> %v0, i32 %b, i32 5 %v2 = insertelement <8 x i32> %v1, i32 %c, i32 6 %v3 = insertelement <8 x i32> %v2, i32 %d, i32 7 ret <8 x i32> %v3 } define <4 x i64> @v4xi64_exact(i64 %a, i64 %b, i64 %c, i64 %d) vscale_range(2,2) { ; RV32-LABEL: v4xi64_exact: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.x v8, a4 ; RV32-NEXT: vmv.v.x v9, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a5 ; RV32-NEXT: vslide1down.vx v10, v9, a1 ; RV32-NEXT: vslide1down.vx v8, v8, a6 ; RV32-NEXT: vslide1down.vx v9, v8, a7 ; RV32-NEXT: vslide1down.vx v8, v10, a2 ; RV32-NEXT: vslide1down.vx v8, v8, a3 ; RV32-NEXT: ret ; ; RV64V-LABEL: v4xi64_exact: ; RV64V: # %bb.0: ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64V-NEXT: vmv.v.x v8, a2 ; RV64V-NEXT: vslide1down.vx v9, v8, a3 ; RV64V-NEXT: vmv.v.x v8, a0 ; RV64V-NEXT: vslide1down.vx v8, v8, a1 ; RV64V-NEXT: ret ; ; RV64ZVE32-LABEL: v4xi64_exact: ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: sd a1, 0(a0) ; RV64ZVE32-NEXT: sd a2, 8(a0) ; RV64ZVE32-NEXT: sd a3, 16(a0) ; RV64ZVE32-NEXT: sd a4, 24(a0) ; RV64ZVE32-NEXT: ret %v1 = insertelement <4 x i64> poison, i64 %a, i32 0 %v2 = insertelement <4 x i64> %v1, i64 %b, i32 1 %v3 = insertelement <4 x i64> %v2, i64 %c, i32 2 %v4 = insertelement <4 x i64> %v3, i64 %d, i32 3 ret <4 x i64> %v4 } define <8 x i64> @v8xi64_exact(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i64 %h) vscale_range(2,2) { ; RV32-LABEL: v8xi64_exact: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset s0, -4 ; RV32-NEXT: lw t0, 44(sp) ; RV32-NEXT: lw t1, 40(sp) ; RV32-NEXT: lw t2, 36(sp) ; RV32-NEXT: lw t3, 32(sp) ; RV32-NEXT: lw t4, 28(sp) ; RV32-NEXT: lw t5, 24(sp) ; RV32-NEXT: lw t6, 20(sp) ; RV32-NEXT: lw s0, 16(sp) ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.x v8, a4 ; RV32-NEXT: vmv.v.x v9, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a5 ; RV32-NEXT: vslide1down.vx v9, v9, a1 ; RV32-NEXT: vslide1down.vx v8, v8, a6 ; RV32-NEXT: vslide1down.vx v10, v9, a2 ; RV32-NEXT: vslide1down.vx v9, v8, a7 ; RV32-NEXT: vslide1down.vx v8, v10, a3 ; RV32-NEXT: vmv.v.x v10, s0 ; RV32-NEXT: vslide1down.vx v10, v10, t6 ; RV32-NEXT: vslide1down.vx v10, v10, t5 ; RV32-NEXT: vslide1down.vx v10, v10, t4 ; RV32-NEXT: vmv.v.x v11, t3 ; RV32-NEXT: vslide1down.vx v11, v11, t2 ; RV32-NEXT: vslide1down.vx v11, v11, t1 ; RV32-NEXT: vslide1down.vx v11, v11, t0 ; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: .cfi_restore s0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; ; RV64V-LABEL: v8xi64_exact: ; RV64V: # %bb.0: ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64V-NEXT: vmv.v.x v8, a2 ; RV64V-NEXT: vmv.v.x v10, a0 ; RV64V-NEXT: vslide1down.vx v9, v8, a3 ; RV64V-NEXT: vslide1down.vx v8, v10, a1 ; RV64V-NEXT: vmv.v.x v10, a4 ; RV64V-NEXT: vslide1down.vx v10, v10, a5 ; RV64V-NEXT: vmv.v.x v11, a6 ; RV64V-NEXT: vslide1down.vx v11, v11, a7 ; RV64V-NEXT: ret ; ; RV64ZVE32-LABEL: v8xi64_exact: ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: ld t0, 0(sp) ; RV64ZVE32-NEXT: sd a5, 32(a0) ; RV64ZVE32-NEXT: sd a6, 40(a0) ; RV64ZVE32-NEXT: sd a7, 48(a0) ; RV64ZVE32-NEXT: sd t0, 56(a0) ; RV64ZVE32-NEXT: sd a1, 0(a0) ; RV64ZVE32-NEXT: sd a2, 8(a0) ; RV64ZVE32-NEXT: sd a3, 16(a0) ; RV64ZVE32-NEXT: sd a4, 24(a0) ; RV64ZVE32-NEXT: ret %v1 = insertelement <8 x i64> poison, i64 %a, i32 0 %v2 = insertelement <8 x i64> %v1, i64 %b, i32 1 %v3 = insertelement <8 x i64> %v2, i64 %c, i32 2 %v4 = insertelement <8 x i64> %v3, i64 %d, i32 3 %v5 = insertelement <8 x i64> %v4, i64 %e, i32 4 %v6 = insertelement <8 x i64> %v5, i64 %f, i32 5 %v7 = insertelement <8 x i64> %v6, i64 %g, i32 6 %v8 = insertelement <8 x i64> %v7, i64 %h, i32 7 ret <8 x i64> %v8 } define <8 x i64> @v8xi64_exact_equal_halves(i64 %a, i64 %b, i64 %c, i64 %d) vscale_range(2,2) { ; RV32-LABEL: v8xi64_exact_equal_halves: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.x v8, a4 ; RV32-NEXT: vmv.v.x v9, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a5 ; RV32-NEXT: vslide1down.vx v10, v9, a1 ; RV32-NEXT: vslide1down.vx v8, v8, a6 ; RV32-NEXT: vslide1down.vx v9, v8, a7 ; RV32-NEXT: vslide1down.vx v8, v10, a2 ; RV32-NEXT: vslide1down.vx v8, v8, a3 ; RV32-NEXT: vmv.v.v v10, v8 ; RV32-NEXT: vmv.v.v v11, v9 ; RV32-NEXT: ret ; ; RV64V-LABEL: v8xi64_exact_equal_halves: ; RV64V: # %bb.0: ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64V-NEXT: vmv.v.x v8, a2 ; RV64V-NEXT: vslide1down.vx v9, v8, a3 ; RV64V-NEXT: vmv.v.x v8, a0 ; RV64V-NEXT: vslide1down.vx v8, v8, a1 ; RV64V-NEXT: vmv.v.v v10, v8 ; RV64V-NEXT: vmv.v.v v11, v9 ; RV64V-NEXT: ret ; ; RV64ZVE32-LABEL: v8xi64_exact_equal_halves: ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: sd a1, 32(a0) ; RV64ZVE32-NEXT: sd a2, 40(a0) ; RV64ZVE32-NEXT: sd a3, 48(a0) ; RV64ZVE32-NEXT: sd a4, 56(a0) ; RV64ZVE32-NEXT: sd a1, 0(a0) ; RV64ZVE32-NEXT: sd a2, 8(a0) ; RV64ZVE32-NEXT: sd a3, 16(a0) ; RV64ZVE32-NEXT: sd a4, 24(a0) ; RV64ZVE32-NEXT: ret %v1 = insertelement <8 x i64> poison, i64 %a, i32 0 %v2 = insertelement <8 x i64> %v1, i64 %b, i32 1 %v3 = insertelement <8 x i64> %v2, i64 %c, i32 2 %v4 = insertelement <8 x i64> %v3, i64 %d, i32 3 %v5 = insertelement <8 x i64> %v4, i64 %a, i32 4 %v6 = insertelement <8 x i64> %v5, i64 %b, i32 5 %v7 = insertelement <8 x i64> %v6, i64 %c, i32 6 %v8 = insertelement <8 x i64> %v7, i64 %d, i32 7 ret <8 x i64> %v8 } define <8 x i64> @v8xi64_exact_undef_suffix(i64 %a, i64 %b, i64 %c, i64 %d) vscale_range(2,2) { ; RV32-LABEL: v8xi64_exact_undef_suffix: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.x v8, a4 ; RV32-NEXT: vmv.v.x v9, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a5 ; RV32-NEXT: vslide1down.vx v10, v9, a1 ; RV32-NEXT: vslide1down.vx v8, v8, a6 ; RV32-NEXT: vslide1down.vx v9, v8, a7 ; RV32-NEXT: vslide1down.vx v8, v10, a2 ; RV32-NEXT: vslide1down.vx v8, v8, a3 ; RV32-NEXT: ret ; ; RV64V-LABEL: v8xi64_exact_undef_suffix: ; RV64V: # %bb.0: ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64V-NEXT: vmv.v.x v8, a2 ; RV64V-NEXT: vslide1down.vx v9, v8, a3 ; RV64V-NEXT: vmv.v.x v8, a0 ; RV64V-NEXT: vslide1down.vx v8, v8, a1 ; RV64V-NEXT: ret ; ; RV64ZVE32-LABEL: v8xi64_exact_undef_suffix: ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: sd a1, 0(a0) ; RV64ZVE32-NEXT: sd a2, 8(a0) ; RV64ZVE32-NEXT: sd a3, 16(a0) ; RV64ZVE32-NEXT: sd a4, 24(a0) ; RV64ZVE32-NEXT: ret %v1 = insertelement <8 x i64> poison, i64 %a, i32 0 %v2 = insertelement <8 x i64> %v1, i64 %b, i32 1 %v3 = insertelement <8 x i64> %v2, i64 %c, i32 2 %v4 = insertelement <8 x i64> %v3, i64 %d, i32 3 ret <8 x i64> %v4 } define <8 x i64> @v8xi64_exact_undef_prefix(i64 %a, i64 %b, i64 %c, i64 %d) vscale_range(2,2) { ; RV32-LABEL: v8xi64_exact_undef_prefix: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.x v8, a4 ; RV32-NEXT: vmv.v.x v9, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a5 ; RV32-NEXT: vslide1down.vx v9, v9, a1 ; RV32-NEXT: vslide1down.vx v8, v8, a6 ; RV32-NEXT: vslide1down.vx v11, v8, a7 ; RV32-NEXT: vslide1down.vx v8, v9, a2 ; RV32-NEXT: vslide1down.vx v10, v8, a3 ; RV32-NEXT: ret ; ; RV64V-LABEL: v8xi64_exact_undef_prefix: ; RV64V: # %bb.0: ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64V-NEXT: vmv.v.x v8, a2 ; RV64V-NEXT: vslide1down.vx v11, v8, a3 ; RV64V-NEXT: vmv.v.x v8, a0 ; RV64V-NEXT: vslide1down.vx v10, v8, a1 ; RV64V-NEXT: ret ; ; RV64ZVE32-LABEL: v8xi64_exact_undef_prefix: ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: sd a1, 32(a0) ; RV64ZVE32-NEXT: sd a2, 40(a0) ; RV64ZVE32-NEXT: sd a3, 48(a0) ; RV64ZVE32-NEXT: sd a4, 56(a0) ; RV64ZVE32-NEXT: ret %v1 = insertelement <8 x i64> poison, i64 %a, i32 4 %v2 = insertelement <8 x i64> %v1, i64 %b, i32 5 %v3 = insertelement <8 x i64> %v2, i64 %c, i32 6 %v4 = insertelement <8 x i64> %v3, i64 %d, i32 7 ret <8 x i64> %v4 } define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) { ; RV32-ONLY-LABEL: buildvec_v16i8_loads_contigous: ; RV32-ONLY: # %bb.0: ; RV32-ONLY-NEXT: addi sp, sp, -16 ; RV32-ONLY-NEXT: .cfi_def_cfa_offset 16 ; RV32-ONLY-NEXT: sw s0, 12(sp) # 4-byte Folded Spill ; RV32-ONLY-NEXT: .cfi_offset s0, -4 ; RV32-ONLY-NEXT: lbu a1, 0(a0) ; RV32-ONLY-NEXT: lbu a2, 1(a0) ; RV32-ONLY-NEXT: lbu a3, 2(a0) ; RV32-ONLY-NEXT: lbu a4, 3(a0) ; RV32-ONLY-NEXT: lbu a5, 4(a0) ; RV32-ONLY-NEXT: lbu a6, 5(a0) ; RV32-ONLY-NEXT: lbu a7, 6(a0) ; RV32-ONLY-NEXT: lbu t0, 7(a0) ; RV32-ONLY-NEXT: lbu t1, 8(a0) ; RV32-ONLY-NEXT: lbu t2, 9(a0) ; RV32-ONLY-NEXT: lbu t3, 10(a0) ; RV32-ONLY-NEXT: lbu t4, 11(a0) ; RV32-ONLY-NEXT: li t5, 255 ; RV32-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV32-ONLY-NEXT: vmv.s.x v0, t5 ; RV32-ONLY-NEXT: lbu t5, 12(a0) ; RV32-ONLY-NEXT: lbu t6, 13(a0) ; RV32-ONLY-NEXT: lbu s0, 14(a0) ; RV32-ONLY-NEXT: lbu a0, 15(a0) ; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV32-ONLY-NEXT: vmv.v.x v8, a1 ; RV32-ONLY-NEXT: vmv.v.x v9, t1 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t2 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3 ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t3 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4 ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t4 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5 ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t5 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a6 ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t6 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7 ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, s0 ; RV32-ONLY-NEXT: vslide1down.vx v10, v8, t0 ; RV32-ONLY-NEXT: vslide1down.vx v8, v9, a0 ; RV32-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV32-ONLY-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32-ONLY-NEXT: .cfi_restore s0 ; RV32-ONLY-NEXT: addi sp, sp, 16 ; RV32-ONLY-NEXT: .cfi_def_cfa_offset 0 ; RV32-ONLY-NEXT: ret ; ; RV32VB-LABEL: buildvec_v16i8_loads_contigous: ; RV32VB: # %bb.0: ; RV32VB-NEXT: lbu a1, 0(a0) ; RV32VB-NEXT: lbu a2, 1(a0) ; RV32VB-NEXT: lbu a3, 2(a0) ; RV32VB-NEXT: lbu a4, 3(a0) ; RV32VB-NEXT: lbu a5, 4(a0) ; RV32VB-NEXT: lbu a6, 5(a0) ; RV32VB-NEXT: lbu a7, 6(a0) ; RV32VB-NEXT: lbu t0, 7(a0) ; RV32VB-NEXT: slli a2, a2, 8 ; RV32VB-NEXT: slli a3, a3, 16 ; RV32VB-NEXT: slli a4, a4, 24 ; RV32VB-NEXT: slli a6, a6, 8 ; RV32VB-NEXT: or a1, a1, a2 ; RV32VB-NEXT: or a3, a4, a3 ; RV32VB-NEXT: or a2, a5, a6 ; RV32VB-NEXT: lbu a4, 8(a0) ; RV32VB-NEXT: lbu a5, 9(a0) ; RV32VB-NEXT: lbu a6, 10(a0) ; RV32VB-NEXT: lbu t1, 11(a0) ; RV32VB-NEXT: slli a7, a7, 16 ; RV32VB-NEXT: slli t0, t0, 24 ; RV32VB-NEXT: slli a5, a5, 8 ; RV32VB-NEXT: slli a6, a6, 16 ; RV32VB-NEXT: slli t1, t1, 24 ; RV32VB-NEXT: or a7, t0, a7 ; RV32VB-NEXT: or a4, a4, a5 ; RV32VB-NEXT: lbu a5, 12(a0) ; RV32VB-NEXT: lbu t0, 13(a0) ; RV32VB-NEXT: or a6, t1, a6 ; RV32VB-NEXT: lbu t1, 14(a0) ; RV32VB-NEXT: lbu a0, 15(a0) ; RV32VB-NEXT: slli t0, t0, 8 ; RV32VB-NEXT: or a5, a5, t0 ; RV32VB-NEXT: slli t1, t1, 16 ; RV32VB-NEXT: slli a0, a0, 24 ; RV32VB-NEXT: or a0, a0, t1 ; RV32VB-NEXT: or a1, a1, a3 ; RV32VB-NEXT: or a2, a2, a7 ; RV32VB-NEXT: or a3, a4, a6 ; RV32VB-NEXT: or a0, a5, a0 ; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32VB-NEXT: vmv.v.x v8, a1 ; RV32VB-NEXT: vslide1down.vx v8, v8, a2 ; RV32VB-NEXT: vslide1down.vx v8, v8, a3 ; RV32VB-NEXT: vslide1down.vx v8, v8, a0 ; RV32VB-NEXT: ret ; ; RV32VB-PACK-LABEL: buildvec_v16i8_loads_contigous: ; RV32VB-PACK: # %bb.0: ; RV32VB-PACK-NEXT: lbu a1, 0(a0) ; RV32VB-PACK-NEXT: lbu a2, 1(a0) ; RV32VB-PACK-NEXT: lbu a3, 2(a0) ; RV32VB-PACK-NEXT: lbu a4, 3(a0) ; RV32VB-PACK-NEXT: lbu a5, 4(a0) ; RV32VB-PACK-NEXT: lbu a6, 5(a0) ; RV32VB-PACK-NEXT: lbu a7, 6(a0) ; RV32VB-PACK-NEXT: lbu t0, 7(a0) ; RV32VB-PACK-NEXT: packh a1, a1, a2 ; RV32VB-PACK-NEXT: lbu a2, 8(a0) ; RV32VB-PACK-NEXT: lbu t1, 9(a0) ; RV32VB-PACK-NEXT: lbu t2, 10(a0) ; RV32VB-PACK-NEXT: lbu t3, 11(a0) ; RV32VB-PACK-NEXT: packh a3, a3, a4 ; RV32VB-PACK-NEXT: packh a4, a5, a6 ; RV32VB-PACK-NEXT: packh a5, a7, t0 ; RV32VB-PACK-NEXT: lbu a6, 12(a0) ; RV32VB-PACK-NEXT: lbu a7, 13(a0) ; RV32VB-PACK-NEXT: lbu t0, 14(a0) ; RV32VB-PACK-NEXT: lbu a0, 15(a0) ; RV32VB-PACK-NEXT: packh a2, a2, t1 ; RV32VB-PACK-NEXT: packh t1, t2, t3 ; RV32VB-PACK-NEXT: packh a6, a6, a7 ; RV32VB-PACK-NEXT: packh a0, t0, a0 ; RV32VB-PACK-NEXT: pack a1, a1, a3 ; RV32VB-PACK-NEXT: pack a3, a4, a5 ; RV32VB-PACK-NEXT: pack a2, a2, t1 ; RV32VB-PACK-NEXT: pack a0, a6, a0 ; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32VB-PACK-NEXT: vmv.v.x v8, a1 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a3 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a2 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0 ; RV32VB-PACK-NEXT: ret ; ; RV64V-ONLY-LABEL: buildvec_v16i8_loads_contigous: ; RV64V-ONLY: # %bb.0: ; RV64V-ONLY-NEXT: addi sp, sp, -16 ; RV64V-ONLY-NEXT: .cfi_def_cfa_offset 16 ; RV64V-ONLY-NEXT: sd s0, 8(sp) # 8-byte Folded Spill ; RV64V-ONLY-NEXT: .cfi_offset s0, -8 ; RV64V-ONLY-NEXT: lbu a1, 0(a0) ; RV64V-ONLY-NEXT: lbu a2, 1(a0) ; RV64V-ONLY-NEXT: lbu a3, 2(a0) ; RV64V-ONLY-NEXT: lbu a4, 3(a0) ; RV64V-ONLY-NEXT: lbu a5, 4(a0) ; RV64V-ONLY-NEXT: lbu a6, 5(a0) ; RV64V-ONLY-NEXT: lbu a7, 6(a0) ; RV64V-ONLY-NEXT: lbu t0, 7(a0) ; RV64V-ONLY-NEXT: lbu t1, 8(a0) ; RV64V-ONLY-NEXT: lbu t2, 9(a0) ; RV64V-ONLY-NEXT: lbu t3, 10(a0) ; RV64V-ONLY-NEXT: lbu t4, 11(a0) ; RV64V-ONLY-NEXT: li t5, 255 ; RV64V-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64V-ONLY-NEXT: vmv.s.x v0, t5 ; RV64V-ONLY-NEXT: lbu t5, 12(a0) ; RV64V-ONLY-NEXT: lbu t6, 13(a0) ; RV64V-ONLY-NEXT: lbu s0, 14(a0) ; RV64V-ONLY-NEXT: lbu a0, 15(a0) ; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV64V-ONLY-NEXT: vmv.v.x v8, a1 ; RV64V-ONLY-NEXT: vmv.v.x v9, t1 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t2 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t3 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t4 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t5 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a6 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t6 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, s0 ; RV64V-ONLY-NEXT: vslide1down.vx v10, v8, t0 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v9, a0 ; RV64V-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV64V-ONLY-NEXT: ld s0, 8(sp) # 8-byte Folded Reload ; RV64V-ONLY-NEXT: .cfi_restore s0 ; RV64V-ONLY-NEXT: addi sp, sp, 16 ; RV64V-ONLY-NEXT: .cfi_def_cfa_offset 0 ; RV64V-ONLY-NEXT: ret ; ; RVA22U64-LABEL: buildvec_v16i8_loads_contigous: ; RVA22U64: # %bb.0: ; RVA22U64-NEXT: lbu a6, 0(a0) ; RVA22U64-NEXT: lbu a2, 1(a0) ; RVA22U64-NEXT: lbu a3, 2(a0) ; RVA22U64-NEXT: lbu a4, 3(a0) ; RVA22U64-NEXT: lbu a5, 4(a0) ; RVA22U64-NEXT: lbu a1, 5(a0) ; RVA22U64-NEXT: lbu a7, 6(a0) ; RVA22U64-NEXT: lbu t0, 7(a0) ; RVA22U64-NEXT: slli a2, a2, 8 ; RVA22U64-NEXT: slli a3, a3, 16 ; RVA22U64-NEXT: slli a4, a4, 24 ; RVA22U64-NEXT: slli a5, a5, 32 ; RVA22U64-NEXT: slli a1, a1, 40 ; RVA22U64-NEXT: or a6, a6, a2 ; RVA22U64-NEXT: or t2, a4, a3 ; RVA22U64-NEXT: or t1, a1, a5 ; RVA22U64-NEXT: lbu a4, 8(a0) ; RVA22U64-NEXT: lbu a5, 9(a0) ; RVA22U64-NEXT: lbu a2, 10(a0) ; RVA22U64-NEXT: lbu a1, 11(a0) ; RVA22U64-NEXT: slli a7, a7, 48 ; RVA22U64-NEXT: slli t0, t0, 56 ; RVA22U64-NEXT: slli a5, a5, 8 ; RVA22U64-NEXT: slli a2, a2, 16 ; RVA22U64-NEXT: slli a1, a1, 24 ; RVA22U64-NEXT: or a7, t0, a7 ; RVA22U64-NEXT: or a4, a4, a5 ; RVA22U64-NEXT: or a1, a1, a2 ; RVA22U64-NEXT: lbu a2, 12(a0) ; RVA22U64-NEXT: lbu a5, 13(a0) ; RVA22U64-NEXT: lbu a3, 14(a0) ; RVA22U64-NEXT: lbu a0, 15(a0) ; RVA22U64-NEXT: slli a2, a2, 32 ; RVA22U64-NEXT: slli a5, a5, 40 ; RVA22U64-NEXT: or a2, a2, a5 ; RVA22U64-NEXT: slli a3, a3, 48 ; RVA22U64-NEXT: slli a0, a0, 56 ; RVA22U64-NEXT: or a0, a0, a3 ; RVA22U64-NEXT: or a3, a6, t2 ; RVA22U64-NEXT: or a5, a7, t1 ; RVA22U64-NEXT: or a1, a1, a4 ; RVA22U64-NEXT: or a0, a0, a2 ; RVA22U64-NEXT: or a3, a3, a5 ; RVA22U64-NEXT: or a0, a0, a1 ; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RVA22U64-NEXT: vmv.v.x v8, a3 ; RVA22U64-NEXT: vslide1down.vx v8, v8, a0 ; RVA22U64-NEXT: ret ; ; RVA22U64-PACK-LABEL: buildvec_v16i8_loads_contigous: ; RVA22U64-PACK: # %bb.0: ; RVA22U64-PACK-NEXT: lbu a1, 0(a0) ; RVA22U64-PACK-NEXT: lbu a2, 1(a0) ; RVA22U64-PACK-NEXT: lbu a6, 2(a0) ; RVA22U64-PACK-NEXT: lbu a7, 3(a0) ; RVA22U64-PACK-NEXT: lbu t0, 4(a0) ; RVA22U64-PACK-NEXT: lbu a3, 5(a0) ; RVA22U64-PACK-NEXT: lbu a4, 6(a0) ; RVA22U64-PACK-NEXT: lbu a5, 7(a0) ; RVA22U64-PACK-NEXT: packh t1, a1, a2 ; RVA22U64-PACK-NEXT: lbu t2, 8(a0) ; RVA22U64-PACK-NEXT: lbu t3, 9(a0) ; RVA22U64-PACK-NEXT: lbu t4, 10(a0) ; RVA22U64-PACK-NEXT: lbu a1, 11(a0) ; RVA22U64-PACK-NEXT: packh a6, a6, a7 ; RVA22U64-PACK-NEXT: packh a7, t0, a3 ; RVA22U64-PACK-NEXT: packh t0, a4, a5 ; RVA22U64-PACK-NEXT: lbu a5, 12(a0) ; RVA22U64-PACK-NEXT: lbu a3, 13(a0) ; RVA22U64-PACK-NEXT: lbu a2, 14(a0) ; RVA22U64-PACK-NEXT: lbu a0, 15(a0) ; RVA22U64-PACK-NEXT: packh a4, t2, t3 ; RVA22U64-PACK-NEXT: packh a1, t4, a1 ; RVA22U64-PACK-NEXT: packh a3, a5, a3 ; RVA22U64-PACK-NEXT: packh a0, a2, a0 ; RVA22U64-PACK-NEXT: packw a2, t1, a6 ; RVA22U64-PACK-NEXT: packw a5, a7, t0 ; RVA22U64-PACK-NEXT: packw a1, a4, a1 ; RVA22U64-PACK-NEXT: packw a0, a3, a0 ; RVA22U64-PACK-NEXT: pack a2, a2, a5 ; RVA22U64-PACK-NEXT: pack a0, a1, a0 ; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RVA22U64-PACK-NEXT: vmv.v.x v8, a2 ; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0 ; RVA22U64-PACK-NEXT: ret ; ; RV64ZVE32-LABEL: buildvec_v16i8_loads_contigous: ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: addi sp, sp, -16 ; RV64ZVE32-NEXT: .cfi_def_cfa_offset 16 ; RV64ZVE32-NEXT: sd s0, 8(sp) # 8-byte Folded Spill ; RV64ZVE32-NEXT: .cfi_offset s0, -8 ; RV64ZVE32-NEXT: lbu a1, 0(a0) ; RV64ZVE32-NEXT: lbu a2, 1(a0) ; RV64ZVE32-NEXT: lbu a3, 2(a0) ; RV64ZVE32-NEXT: lbu a4, 3(a0) ; RV64ZVE32-NEXT: lbu a5, 4(a0) ; RV64ZVE32-NEXT: lbu a6, 5(a0) ; RV64ZVE32-NEXT: lbu a7, 6(a0) ; RV64ZVE32-NEXT: lbu t0, 7(a0) ; RV64ZVE32-NEXT: lbu t1, 8(a0) ; RV64ZVE32-NEXT: lbu t2, 9(a0) ; RV64ZVE32-NEXT: lbu t3, 10(a0) ; RV64ZVE32-NEXT: lbu t4, 11(a0) ; RV64ZVE32-NEXT: li t5, 255 ; RV64ZVE32-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32-NEXT: vmv.s.x v0, t5 ; RV64ZVE32-NEXT: lbu t5, 12(a0) ; RV64ZVE32-NEXT: lbu t6, 13(a0) ; RV64ZVE32-NEXT: lbu s0, 14(a0) ; RV64ZVE32-NEXT: lbu a0, 15(a0) ; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV64ZVE32-NEXT: vmv.v.x v8, a1 ; RV64ZVE32-NEXT: vmv.v.x v9, t1 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2 ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t2 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3 ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t3 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4 ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t4 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5 ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t5 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a6 ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t6 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7 ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, s0 ; RV64ZVE32-NEXT: vslide1down.vx v10, v8, t0 ; RV64ZVE32-NEXT: vslide1down.vx v8, v9, a0 ; RV64ZVE32-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV64ZVE32-NEXT: ld s0, 8(sp) # 8-byte Folded Reload ; RV64ZVE32-NEXT: .cfi_restore s0 ; RV64ZVE32-NEXT: addi sp, sp, 16 ; RV64ZVE32-NEXT: .cfi_def_cfa_offset 0 ; RV64ZVE32-NEXT: ret %p2 = getelementptr i8, ptr %p, i32 1 %p3 = getelementptr i8, ptr %p, i32 2 %p4 = getelementptr i8, ptr %p, i32 3 %p5 = getelementptr i8, ptr %p, i32 4 %p6 = getelementptr i8, ptr %p, i32 5 %p7 = getelementptr i8, ptr %p, i32 6 %p8 = getelementptr i8, ptr %p, i32 7 %p9 = getelementptr i8, ptr %p, i32 8 %p10 = getelementptr i8, ptr %p, i32 9 %p11 = getelementptr i8, ptr %p, i32 10 %p12 = getelementptr i8, ptr %p, i32 11 %p13 = getelementptr i8, ptr %p, i32 12 %p14 = getelementptr i8, ptr %p, i32 13 %p15 = getelementptr i8, ptr %p, i32 14 %p16 = getelementptr i8, ptr %p, i32 15 %ld1 = load i8, ptr %p %ld2 = load i8, ptr %p2 %ld3 = load i8, ptr %p3 %ld4 = load i8, ptr %p4 %ld5 = load i8, ptr %p5 %ld6 = load i8, ptr %p6 %ld7 = load i8, ptr %p7 %ld8 = load i8, ptr %p8 %ld9 = load i8, ptr %p9 %ld10 = load i8, ptr %p10 %ld11 = load i8, ptr %p11 %ld12 = load i8, ptr %p12 %ld13 = load i8, ptr %p13 %ld14 = load i8, ptr %p14 %ld15 = load i8, ptr %p15 %ld16 = load i8, ptr %p16 %v1 = insertelement <16 x i8> poison, i8 %ld1, i32 0 %v2 = insertelement <16 x i8> %v1, i8 %ld2, i32 1 %v3 = insertelement <16 x i8> %v2, i8 %ld3, i32 2 %v4 = insertelement <16 x i8> %v3, i8 %ld4, i32 3 %v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4 %v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5 %v7 = insertelement <16 x i8> %v6, i8 %ld7, i32 6 %v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7 %v9 = insertelement <16 x i8> %v8, i8 %ld9, i32 8 %v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9 %v11 = insertelement <16 x i8> %v10, i8 %ld11, i32 10 %v12 = insertelement <16 x i8> %v11, i8 %ld12, i32 11 %v13 = insertelement <16 x i8> %v12, i8 %ld13, i32 12 %v14 = insertelement <16 x i8> %v13, i8 %ld14, i32 13 %v15 = insertelement <16 x i8> %v14, i8 %ld15, i32 14 %v16 = insertelement <16 x i8> %v15, i8 %ld16, i32 15 ret <16 x i8> %v16 } define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) { ; RV32-ONLY-LABEL: buildvec_v16i8_loads_gather: ; RV32-ONLY: # %bb.0: ; RV32-ONLY-NEXT: addi sp, sp, -16 ; RV32-ONLY-NEXT: .cfi_def_cfa_offset 16 ; RV32-ONLY-NEXT: sw s0, 12(sp) # 4-byte Folded Spill ; RV32-ONLY-NEXT: .cfi_offset s0, -4 ; RV32-ONLY-NEXT: lbu a1, 0(a0) ; RV32-ONLY-NEXT: lbu a2, 1(a0) ; RV32-ONLY-NEXT: lbu a3, 22(a0) ; RV32-ONLY-NEXT: lbu a4, 31(a0) ; RV32-ONLY-NEXT: lbu a5, 623(a0) ; RV32-ONLY-NEXT: lbu a6, 44(a0) ; RV32-ONLY-NEXT: lbu a7, 55(a0) ; RV32-ONLY-NEXT: lbu t0, 75(a0) ; RV32-ONLY-NEXT: lbu t1, 82(a0) ; RV32-ONLY-NEXT: lbu t2, 154(a0) ; RV32-ONLY-NEXT: lbu t3, 161(a0) ; RV32-ONLY-NEXT: lbu t4, 163(a0) ; RV32-ONLY-NEXT: li t5, 255 ; RV32-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV32-ONLY-NEXT: vmv.s.x v0, t5 ; RV32-ONLY-NEXT: lbu t5, 93(a0) ; RV32-ONLY-NEXT: lbu t6, 105(a0) ; RV32-ONLY-NEXT: lbu s0, 124(a0) ; RV32-ONLY-NEXT: lbu a0, 144(a0) ; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV32-ONLY-NEXT: vmv.v.x v8, a1 ; RV32-ONLY-NEXT: vmv.v.x v9, t1 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t5 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3 ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t6 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4 ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t3 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a6 ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, s0 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7 ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t4 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5 ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a0 ; RV32-ONLY-NEXT: vslide1down.vx v10, v8, t0 ; RV32-ONLY-NEXT: vslide1down.vx v8, v9, t2 ; RV32-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV32-ONLY-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32-ONLY-NEXT: .cfi_restore s0 ; RV32-ONLY-NEXT: addi sp, sp, 16 ; RV32-ONLY-NEXT: .cfi_def_cfa_offset 0 ; RV32-ONLY-NEXT: ret ; ; RV32VB-LABEL: buildvec_v16i8_loads_gather: ; RV32VB: # %bb.0: ; RV32VB-NEXT: lbu a1, 0(a0) ; RV32VB-NEXT: lbu a2, 1(a0) ; RV32VB-NEXT: lbu a3, 22(a0) ; RV32VB-NEXT: lbu a4, 31(a0) ; RV32VB-NEXT: lbu a5, 623(a0) ; RV32VB-NEXT: lbu a6, 44(a0) ; RV32VB-NEXT: lbu a7, 55(a0) ; RV32VB-NEXT: lbu t0, 75(a0) ; RV32VB-NEXT: lbu t1, 82(a0) ; RV32VB-NEXT: slli a2, a2, 8 ; RV32VB-NEXT: slli a3, a3, 16 ; RV32VB-NEXT: slli a4, a4, 24 ; RV32VB-NEXT: or a1, a1, a2 ; RV32VB-NEXT: or a3, a4, a3 ; RV32VB-NEXT: lbu a2, 93(a0) ; RV32VB-NEXT: lbu a4, 105(a0) ; RV32VB-NEXT: lbu t2, 124(a0) ; RV32VB-NEXT: lbu t3, 144(a0) ; RV32VB-NEXT: slli a7, a7, 8 ; RV32VB-NEXT: slli a5, a5, 16 ; RV32VB-NEXT: slli t0, t0, 24 ; RV32VB-NEXT: slli a2, a2, 8 ; RV32VB-NEXT: or a6, a6, a7 ; RV32VB-NEXT: or a5, t0, a5 ; RV32VB-NEXT: lbu a7, 154(a0) ; RV32VB-NEXT: lbu t0, 161(a0) ; RV32VB-NEXT: or a2, t1, a2 ; RV32VB-NEXT: lbu a0, 163(a0) ; RV32VB-NEXT: slli a4, a4, 16 ; RV32VB-NEXT: slli t0, t0, 24 ; RV32VB-NEXT: or a4, t0, a4 ; RV32VB-NEXT: slli a0, a0, 8 ; RV32VB-NEXT: or a0, t2, a0 ; RV32VB-NEXT: slli t3, t3, 16 ; RV32VB-NEXT: slli a7, a7, 24 ; RV32VB-NEXT: or a7, a7, t3 ; RV32VB-NEXT: or a1, a1, a3 ; RV32VB-NEXT: or a3, a6, a5 ; RV32VB-NEXT: or a2, a2, a4 ; RV32VB-NEXT: or a0, a0, a7 ; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32VB-NEXT: vmv.v.x v8, a1 ; RV32VB-NEXT: vslide1down.vx v8, v8, a3 ; RV32VB-NEXT: vslide1down.vx v8, v8, a2 ; RV32VB-NEXT: vslide1down.vx v8, v8, a0 ; RV32VB-NEXT: ret ; ; RV32VB-PACK-LABEL: buildvec_v16i8_loads_gather: ; RV32VB-PACK: # %bb.0: ; RV32VB-PACK-NEXT: lbu a1, 0(a0) ; RV32VB-PACK-NEXT: lbu a2, 1(a0) ; RV32VB-PACK-NEXT: lbu a3, 22(a0) ; RV32VB-PACK-NEXT: lbu a4, 31(a0) ; RV32VB-PACK-NEXT: lbu a5, 623(a0) ; RV32VB-PACK-NEXT: lbu a6, 44(a0) ; RV32VB-PACK-NEXT: lbu a7, 55(a0) ; RV32VB-PACK-NEXT: lbu t0, 75(a0) ; RV32VB-PACK-NEXT: lbu t1, 82(a0) ; RV32VB-PACK-NEXT: packh a1, a1, a2 ; RV32VB-PACK-NEXT: lbu a2, 154(a0) ; RV32VB-PACK-NEXT: lbu t2, 161(a0) ; RV32VB-PACK-NEXT: lbu t3, 163(a0) ; RV32VB-PACK-NEXT: packh a3, a3, a4 ; RV32VB-PACK-NEXT: packh a4, a6, a7 ; RV32VB-PACK-NEXT: packh a5, a5, t0 ; RV32VB-PACK-NEXT: lbu a6, 93(a0) ; RV32VB-PACK-NEXT: lbu a7, 105(a0) ; RV32VB-PACK-NEXT: lbu t0, 124(a0) ; RV32VB-PACK-NEXT: lbu a0, 144(a0) ; RV32VB-PACK-NEXT: packh a6, t1, a6 ; RV32VB-PACK-NEXT: packh a7, a7, t2 ; RV32VB-PACK-NEXT: packh t0, t0, t3 ; RV32VB-PACK-NEXT: packh a0, a0, a2 ; RV32VB-PACK-NEXT: pack a1, a1, a3 ; RV32VB-PACK-NEXT: pack a2, a4, a5 ; RV32VB-PACK-NEXT: pack a3, a6, a7 ; RV32VB-PACK-NEXT: pack a0, t0, a0 ; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32VB-PACK-NEXT: vmv.v.x v8, a1 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a2 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a3 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0 ; RV32VB-PACK-NEXT: ret ; ; RV64V-ONLY-LABEL: buildvec_v16i8_loads_gather: ; RV64V-ONLY: # %bb.0: ; RV64V-ONLY-NEXT: addi sp, sp, -16 ; RV64V-ONLY-NEXT: .cfi_def_cfa_offset 16 ; RV64V-ONLY-NEXT: sd s0, 8(sp) # 8-byte Folded Spill ; RV64V-ONLY-NEXT: .cfi_offset s0, -8 ; RV64V-ONLY-NEXT: lbu a1, 0(a0) ; RV64V-ONLY-NEXT: lbu a2, 1(a0) ; RV64V-ONLY-NEXT: lbu a3, 22(a0) ; RV64V-ONLY-NEXT: lbu a4, 31(a0) ; RV64V-ONLY-NEXT: lbu a5, 623(a0) ; RV64V-ONLY-NEXT: lbu a6, 44(a0) ; RV64V-ONLY-NEXT: lbu a7, 55(a0) ; RV64V-ONLY-NEXT: lbu t0, 75(a0) ; RV64V-ONLY-NEXT: lbu t1, 82(a0) ; RV64V-ONLY-NEXT: lbu t2, 154(a0) ; RV64V-ONLY-NEXT: lbu t3, 161(a0) ; RV64V-ONLY-NEXT: lbu t4, 163(a0) ; RV64V-ONLY-NEXT: li t5, 255 ; RV64V-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64V-ONLY-NEXT: vmv.s.x v0, t5 ; RV64V-ONLY-NEXT: lbu t5, 93(a0) ; RV64V-ONLY-NEXT: lbu t6, 105(a0) ; RV64V-ONLY-NEXT: lbu s0, 124(a0) ; RV64V-ONLY-NEXT: lbu a0, 144(a0) ; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV64V-ONLY-NEXT: vmv.v.x v8, a1 ; RV64V-ONLY-NEXT: vmv.v.x v9, t1 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t5 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t6 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t3 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a6 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, s0 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t4 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a0 ; RV64V-ONLY-NEXT: vslide1down.vx v10, v8, t0 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v9, t2 ; RV64V-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV64V-ONLY-NEXT: ld s0, 8(sp) # 8-byte Folded Reload ; RV64V-ONLY-NEXT: .cfi_restore s0 ; RV64V-ONLY-NEXT: addi sp, sp, 16 ; RV64V-ONLY-NEXT: .cfi_def_cfa_offset 0 ; RV64V-ONLY-NEXT: ret ; ; RVA22U64-LABEL: buildvec_v16i8_loads_gather: ; RVA22U64: # %bb.0: ; RVA22U64-NEXT: lbu a1, 0(a0) ; RVA22U64-NEXT: lbu a2, 1(a0) ; RVA22U64-NEXT: lbu a3, 22(a0) ; RVA22U64-NEXT: lbu a4, 31(a0) ; RVA22U64-NEXT: lbu a6, 623(a0) ; RVA22U64-NEXT: lbu t0, 44(a0) ; RVA22U64-NEXT: lbu a7, 55(a0) ; RVA22U64-NEXT: lbu a5, 75(a0) ; RVA22U64-NEXT: lbu t1, 82(a0) ; RVA22U64-NEXT: slli a2, a2, 8 ; RVA22U64-NEXT: slli a3, a3, 16 ; RVA22U64-NEXT: slli a4, a4, 24 ; RVA22U64-NEXT: or t2, a1, a2 ; RVA22U64-NEXT: or t3, a4, a3 ; RVA22U64-NEXT: lbu a2, 93(a0) ; RVA22U64-NEXT: lbu t4, 105(a0) ; RVA22U64-NEXT: lbu t6, 124(a0) ; RVA22U64-NEXT: lbu t5, 144(a0) ; RVA22U64-NEXT: slli t0, t0, 32 ; RVA22U64-NEXT: slli a7, a7, 40 ; RVA22U64-NEXT: slli a6, a6, 48 ; RVA22U64-NEXT: slli a5, a5, 56 ; RVA22U64-NEXT: slli a2, a2, 8 ; RVA22U64-NEXT: or a7, a7, t0 ; RVA22U64-NEXT: or a5, a5, a6 ; RVA22U64-NEXT: lbu a3, 154(a0) ; RVA22U64-NEXT: lbu a1, 161(a0) ; RVA22U64-NEXT: or a2, t1, a2 ; RVA22U64-NEXT: lbu a0, 163(a0) ; RVA22U64-NEXT: slli t4, t4, 16 ; RVA22U64-NEXT: slli a1, a1, 24 ; RVA22U64-NEXT: or a1, a1, t4 ; RVA22U64-NEXT: slli t6, t6, 32 ; RVA22U64-NEXT: slli a0, a0, 40 ; RVA22U64-NEXT: or a0, a0, t6 ; RVA22U64-NEXT: slli t5, t5, 48 ; RVA22U64-NEXT: slli a3, a3, 56 ; RVA22U64-NEXT: or a3, a3, t5 ; RVA22U64-NEXT: or a4, t2, t3 ; RVA22U64-NEXT: or a5, a5, a7 ; RVA22U64-NEXT: or a1, a1, a2 ; RVA22U64-NEXT: or a0, a0, a3 ; RVA22U64-NEXT: or a4, a4, a5 ; RVA22U64-NEXT: or a0, a0, a1 ; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RVA22U64-NEXT: vmv.v.x v8, a4 ; RVA22U64-NEXT: vslide1down.vx v8, v8, a0 ; RVA22U64-NEXT: ret ; ; RVA22U64-PACK-LABEL: buildvec_v16i8_loads_gather: ; RVA22U64-PACK: # %bb.0: ; RVA22U64-PACK-NEXT: addi sp, sp, -16 ; RVA22U64-PACK-NEXT: .cfi_def_cfa_offset 16 ; RVA22U64-PACK-NEXT: sd s0, 8(sp) # 8-byte Folded Spill ; RVA22U64-PACK-NEXT: .cfi_offset s0, -8 ; RVA22U64-PACK-NEXT: lbu a1, 0(a0) ; RVA22U64-PACK-NEXT: lbu a2, 1(a0) ; RVA22U64-PACK-NEXT: lbu a6, 22(a0) ; RVA22U64-PACK-NEXT: lbu a7, 31(a0) ; RVA22U64-PACK-NEXT: lbu t0, 623(a0) ; RVA22U64-PACK-NEXT: lbu t3, 44(a0) ; RVA22U64-PACK-NEXT: lbu t4, 55(a0) ; RVA22U64-PACK-NEXT: lbu t5, 75(a0) ; RVA22U64-PACK-NEXT: lbu t1, 82(a0) ; RVA22U64-PACK-NEXT: packh t2, a1, a2 ; RVA22U64-PACK-NEXT: lbu t6, 154(a0) ; RVA22U64-PACK-NEXT: lbu s0, 161(a0) ; RVA22U64-PACK-NEXT: lbu a3, 163(a0) ; RVA22U64-PACK-NEXT: packh a6, a6, a7 ; RVA22U64-PACK-NEXT: packh a7, t3, t4 ; RVA22U64-PACK-NEXT: packh a2, t0, t5 ; RVA22U64-PACK-NEXT: lbu a4, 93(a0) ; RVA22U64-PACK-NEXT: lbu a5, 105(a0) ; RVA22U64-PACK-NEXT: lbu a1, 124(a0) ; RVA22U64-PACK-NEXT: lbu a0, 144(a0) ; RVA22U64-PACK-NEXT: packh a4, t1, a4 ; RVA22U64-PACK-NEXT: packh a5, a5, s0 ; RVA22U64-PACK-NEXT: packh a1, a1, a3 ; RVA22U64-PACK-NEXT: packh a0, a0, t6 ; RVA22U64-PACK-NEXT: packw a3, t2, a6 ; RVA22U64-PACK-NEXT: packw a2, a7, a2 ; RVA22U64-PACK-NEXT: packw a4, a4, a5 ; RVA22U64-PACK-NEXT: packw a0, a1, a0 ; RVA22U64-PACK-NEXT: pack a1, a3, a2 ; RVA22U64-PACK-NEXT: pack a0, a4, a0 ; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RVA22U64-PACK-NEXT: vmv.v.x v8, a1 ; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0 ; RVA22U64-PACK-NEXT: ld s0, 8(sp) # 8-byte Folded Reload ; RVA22U64-PACK-NEXT: .cfi_restore s0 ; RVA22U64-PACK-NEXT: addi sp, sp, 16 ; RVA22U64-PACK-NEXT: .cfi_def_cfa_offset 0 ; RVA22U64-PACK-NEXT: ret ; ; RV64ZVE32-LABEL: buildvec_v16i8_loads_gather: ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: addi sp, sp, -16 ; RV64ZVE32-NEXT: .cfi_def_cfa_offset 16 ; RV64ZVE32-NEXT: sd s0, 8(sp) # 8-byte Folded Spill ; RV64ZVE32-NEXT: .cfi_offset s0, -8 ; RV64ZVE32-NEXT: lbu a1, 0(a0) ; RV64ZVE32-NEXT: lbu a2, 1(a0) ; RV64ZVE32-NEXT: lbu a3, 22(a0) ; RV64ZVE32-NEXT: lbu a4, 31(a0) ; RV64ZVE32-NEXT: lbu a5, 623(a0) ; RV64ZVE32-NEXT: lbu a6, 44(a0) ; RV64ZVE32-NEXT: lbu a7, 55(a0) ; RV64ZVE32-NEXT: lbu t0, 75(a0) ; RV64ZVE32-NEXT: lbu t1, 82(a0) ; RV64ZVE32-NEXT: lbu t2, 154(a0) ; RV64ZVE32-NEXT: lbu t3, 161(a0) ; RV64ZVE32-NEXT: lbu t4, 163(a0) ; RV64ZVE32-NEXT: li t5, 255 ; RV64ZVE32-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32-NEXT: vmv.s.x v0, t5 ; RV64ZVE32-NEXT: lbu t5, 93(a0) ; RV64ZVE32-NEXT: lbu t6, 105(a0) ; RV64ZVE32-NEXT: lbu s0, 124(a0) ; RV64ZVE32-NEXT: lbu a0, 144(a0) ; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV64ZVE32-NEXT: vmv.v.x v8, a1 ; RV64ZVE32-NEXT: vmv.v.x v9, t1 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2 ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t5 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3 ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t6 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4 ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t3 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a6 ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, s0 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7 ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t4 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5 ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a0 ; RV64ZVE32-NEXT: vslide1down.vx v10, v8, t0 ; RV64ZVE32-NEXT: vslide1down.vx v8, v9, t2 ; RV64ZVE32-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV64ZVE32-NEXT: ld s0, 8(sp) # 8-byte Folded Reload ; RV64ZVE32-NEXT: .cfi_restore s0 ; RV64ZVE32-NEXT: addi sp, sp, 16 ; RV64ZVE32-NEXT: .cfi_def_cfa_offset 0 ; RV64ZVE32-NEXT: ret %p2 = getelementptr i8, ptr %p, i32 1 %p3 = getelementptr i8, ptr %p, i32 22 %p4 = getelementptr i8, ptr %p, i32 31 %p5 = getelementptr i8, ptr %p, i32 44 %p6 = getelementptr i8, ptr %p, i32 55 %p7 = getelementptr i8, ptr %p, i32 623 %p8 = getelementptr i8, ptr %p, i32 75 %p9 = getelementptr i8, ptr %p, i32 82 %p10 = getelementptr i8, ptr %p, i32 93 %p11 = getelementptr i8, ptr %p, i32 105 %p12 = getelementptr i8, ptr %p, i32 161 %p13 = getelementptr i8, ptr %p, i32 124 %p14 = getelementptr i8, ptr %p, i32 163 %p15 = getelementptr i8, ptr %p, i32 144 %p16 = getelementptr i8, ptr %p, i32 154 %ld1 = load i8, ptr %p %ld2 = load i8, ptr %p2 %ld3 = load i8, ptr %p3 %ld4 = load i8, ptr %p4 %ld5 = load i8, ptr %p5 %ld6 = load i8, ptr %p6 %ld7 = load i8, ptr %p7 %ld8 = load i8, ptr %p8 %ld9 = load i8, ptr %p9 %ld10 = load i8, ptr %p10 %ld11 = load i8, ptr %p11 %ld12 = load i8, ptr %p12 %ld13 = load i8, ptr %p13 %ld14 = load i8, ptr %p14 %ld15 = load i8, ptr %p15 %ld16 = load i8, ptr %p16 %v1 = insertelement <16 x i8> poison, i8 %ld1, i32 0 %v2 = insertelement <16 x i8> %v1, i8 %ld2, i32 1 %v3 = insertelement <16 x i8> %v2, i8 %ld3, i32 2 %v4 = insertelement <16 x i8> %v3, i8 %ld4, i32 3 %v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4 %v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5 %v7 = insertelement <16 x i8> %v6, i8 %ld7, i32 6 %v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7 %v9 = insertelement <16 x i8> %v8, i8 %ld9, i32 8 %v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9 %v11 = insertelement <16 x i8> %v10, i8 %ld11, i32 10 %v12 = insertelement <16 x i8> %v11, i8 %ld12, i32 11 %v13 = insertelement <16 x i8> %v12, i8 %ld13, i32 12 %v14 = insertelement <16 x i8> %v13, i8 %ld14, i32 13 %v15 = insertelement <16 x i8> %v14, i8 %ld15, i32 14 %v16 = insertelement <16 x i8> %v15, i8 %ld16, i32 15 ret <16 x i8> %v16 } define <16 x i8> @buildvec_v16i8_undef_low_half(ptr %p) { ; RV32-ONLY-LABEL: buildvec_v16i8_undef_low_half: ; RV32-ONLY: # %bb.0: ; RV32-ONLY-NEXT: lbu a1, 82(a0) ; RV32-ONLY-NEXT: lbu a2, 93(a0) ; RV32-ONLY-NEXT: lbu a3, 105(a0) ; RV32-ONLY-NEXT: lbu a4, 124(a0) ; RV32-ONLY-NEXT: lbu a5, 144(a0) ; RV32-ONLY-NEXT: lbu a6, 154(a0) ; RV32-ONLY-NEXT: lbu a7, 161(a0) ; RV32-ONLY-NEXT: lbu a0, 163(a0) ; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-ONLY-NEXT: vmv.v.x v8, a1 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a6 ; RV32-ONLY-NEXT: ret ; ; RV32VB-LABEL: buildvec_v16i8_undef_low_half: ; RV32VB: # %bb.0: ; RV32VB-NEXT: lbu a1, 93(a0) ; RV32VB-NEXT: lbu a2, 82(a0) ; RV32VB-NEXT: lbu a3, 105(a0) ; RV32VB-NEXT: lbu a4, 124(a0) ; RV32VB-NEXT: slli a1, a1, 8 ; RV32VB-NEXT: lbu a5, 144(a0) ; RV32VB-NEXT: lbu a6, 154(a0) ; RV32VB-NEXT: lbu a7, 161(a0) ; RV32VB-NEXT: or a1, a2, a1 ; RV32VB-NEXT: lbu a0, 163(a0) ; RV32VB-NEXT: slli a3, a3, 16 ; RV32VB-NEXT: slli a7, a7, 24 ; RV32VB-NEXT: or a2, a7, a3 ; RV32VB-NEXT: slli a0, a0, 8 ; RV32VB-NEXT: or a0, a4, a0 ; RV32VB-NEXT: slli a5, a5, 16 ; RV32VB-NEXT: slli a6, a6, 24 ; RV32VB-NEXT: or a3, a6, a5 ; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32VB-NEXT: vmv.v.i v8, 0 ; RV32VB-NEXT: or a1, a1, a2 ; RV32VB-NEXT: or a0, a0, a3 ; RV32VB-NEXT: vslide1down.vx v8, v8, zero ; RV32VB-NEXT: vslide1down.vx v8, v8, a1 ; RV32VB-NEXT: vslide1down.vx v8, v8, a0 ; RV32VB-NEXT: ret ; ; RV32VB-PACK-LABEL: buildvec_v16i8_undef_low_half: ; RV32VB-PACK: # %bb.0: ; RV32VB-PACK-NEXT: lbu a1, 82(a0) ; RV32VB-PACK-NEXT: lbu a2, 93(a0) ; RV32VB-PACK-NEXT: lbu a3, 105(a0) ; RV32VB-PACK-NEXT: lbu a4, 124(a0) ; RV32VB-PACK-NEXT: lbu a5, 161(a0) ; RV32VB-PACK-NEXT: lbu a6, 163(a0) ; RV32VB-PACK-NEXT: lbu a7, 144(a0) ; RV32VB-PACK-NEXT: lbu a0, 154(a0) ; RV32VB-PACK-NEXT: packh a1, a1, a2 ; RV32VB-PACK-NEXT: packh a2, a3, a5 ; RV32VB-PACK-NEXT: packh a3, a4, a6 ; RV32VB-PACK-NEXT: packh a0, a7, a0 ; RV32VB-PACK-NEXT: pack a1, a1, a2 ; RV32VB-PACK-NEXT: packh a2, a0, a0 ; RV32VB-PACK-NEXT: pack a2, a2, a2 ; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32VB-PACK-NEXT: vmv.v.x v8, a2 ; RV32VB-PACK-NEXT: pack a0, a3, a0 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a2 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a1 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0 ; RV32VB-PACK-NEXT: ret ; ; RV64V-ONLY-LABEL: buildvec_v16i8_undef_low_half: ; RV64V-ONLY: # %bb.0: ; RV64V-ONLY-NEXT: lbu a1, 82(a0) ; RV64V-ONLY-NEXT: lbu a2, 93(a0) ; RV64V-ONLY-NEXT: lbu a3, 105(a0) ; RV64V-ONLY-NEXT: lbu a4, 124(a0) ; RV64V-ONLY-NEXT: lbu a5, 144(a0) ; RV64V-ONLY-NEXT: lbu a6, 154(a0) ; RV64V-ONLY-NEXT: lbu a7, 161(a0) ; RV64V-ONLY-NEXT: lbu a0, 163(a0) ; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64V-ONLY-NEXT: vmv.v.x v8, a1 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a6 ; RV64V-ONLY-NEXT: ret ; ; RVA22U64-LABEL: buildvec_v16i8_undef_low_half: ; RVA22U64: # %bb.0: ; RVA22U64-NEXT: lbu a1, 93(a0) ; RVA22U64-NEXT: lbu a6, 82(a0) ; RVA22U64-NEXT: lbu a7, 105(a0) ; RVA22U64-NEXT: lbu a4, 124(a0) ; RVA22U64-NEXT: slli a1, a1, 8 ; RVA22U64-NEXT: lbu a5, 144(a0) ; RVA22U64-NEXT: lbu a2, 154(a0) ; RVA22U64-NEXT: lbu a3, 161(a0) ; RVA22U64-NEXT: or a1, a6, a1 ; RVA22U64-NEXT: lbu a0, 163(a0) ; RVA22U64-NEXT: slli a7, a7, 16 ; RVA22U64-NEXT: slli a3, a3, 24 ; RVA22U64-NEXT: or a3, a3, a7 ; RVA22U64-NEXT: slli a4, a4, 32 ; RVA22U64-NEXT: slli a0, a0, 40 ; RVA22U64-NEXT: or a0, a0, a4 ; RVA22U64-NEXT: slli a5, a5, 48 ; RVA22U64-NEXT: slli a2, a2, 56 ; RVA22U64-NEXT: or a2, a2, a5 ; RVA22U64-NEXT: or a1, a1, a3 ; RVA22U64-NEXT: or a0, a0, a2 ; RVA22U64-NEXT: or a0, a0, a1 ; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RVA22U64-NEXT: vmv.v.i v8, 0 ; RVA22U64-NEXT: vslide1down.vx v8, v8, a0 ; RVA22U64-NEXT: ret ; ; RVA22U64-PACK-LABEL: buildvec_v16i8_undef_low_half: ; RVA22U64-PACK: # %bb.0: ; RVA22U64-PACK-NEXT: lbu a6, 82(a0) ; RVA22U64-PACK-NEXT: lbu a7, 93(a0) ; RVA22U64-PACK-NEXT: lbu t0, 105(a0) ; RVA22U64-PACK-NEXT: lbu a4, 124(a0) ; RVA22U64-PACK-NEXT: lbu a5, 161(a0) ; RVA22U64-PACK-NEXT: lbu a1, 163(a0) ; RVA22U64-PACK-NEXT: lbu a2, 144(a0) ; RVA22U64-PACK-NEXT: lbu a0, 154(a0) ; RVA22U64-PACK-NEXT: packh a3, a6, a7 ; RVA22U64-PACK-NEXT: packh a5, t0, a5 ; RVA22U64-PACK-NEXT: packh a1, a4, a1 ; RVA22U64-PACK-NEXT: packh a0, a2, a0 ; RVA22U64-PACK-NEXT: packw a2, a3, a5 ; RVA22U64-PACK-NEXT: packh a3, a0, a0 ; RVA22U64-PACK-NEXT: packw a3, a3, a3 ; RVA22U64-PACK-NEXT: pack a3, a3, a3 ; RVA22U64-PACK-NEXT: packw a0, a1, a0 ; RVA22U64-PACK-NEXT: pack a0, a2, a0 ; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RVA22U64-PACK-NEXT: vmv.v.x v8, a3 ; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0 ; RVA22U64-PACK-NEXT: ret ; ; RV64ZVE32-LABEL: buildvec_v16i8_undef_low_half: ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: lbu a1, 82(a0) ; RV64ZVE32-NEXT: lbu a2, 93(a0) ; RV64ZVE32-NEXT: lbu a3, 105(a0) ; RV64ZVE32-NEXT: lbu a4, 124(a0) ; RV64ZVE32-NEXT: lbu a5, 144(a0) ; RV64ZVE32-NEXT: lbu a6, 154(a0) ; RV64ZVE32-NEXT: lbu a7, 161(a0) ; RV64ZVE32-NEXT: lbu a0, 163(a0) ; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64ZVE32-NEXT: vmv.v.x v8, a1 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a6 ; RV64ZVE32-NEXT: ret %p9 = getelementptr i8, ptr %p, i32 82 %p10 = getelementptr i8, ptr %p, i32 93 %p11 = getelementptr i8, ptr %p, i32 105 %p12 = getelementptr i8, ptr %p, i32 161 %p13 = getelementptr i8, ptr %p, i32 124 %p14 = getelementptr i8, ptr %p, i32 163 %p15 = getelementptr i8, ptr %p, i32 144 %p16 = getelementptr i8, ptr %p, i32 154 %ld9 = load i8, ptr %p9 %ld10 = load i8, ptr %p10 %ld11 = load i8, ptr %p11 %ld12 = load i8, ptr %p12 %ld13 = load i8, ptr %p13 %ld14 = load i8, ptr %p14 %ld15 = load i8, ptr %p15 %ld16 = load i8, ptr %p16 %v9 = insertelement <16 x i8> poison, i8 %ld9, i32 8 %v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9 %v11 = insertelement <16 x i8> %v10, i8 %ld11, i32 10 %v12 = insertelement <16 x i8> %v11, i8 %ld12, i32 11 %v13 = insertelement <16 x i8> %v12, i8 %ld13, i32 12 %v14 = insertelement <16 x i8> %v13, i8 %ld14, i32 13 %v15 = insertelement <16 x i8> %v14, i8 %ld15, i32 14 %v16 = insertelement <16 x i8> %v15, i8 %ld16, i32 15 ret <16 x i8> %v16 } define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) { ; RV32-ONLY-LABEL: buildvec_v16i8_undef_high_half: ; RV32-ONLY: # %bb.0: ; RV32-ONLY-NEXT: lbu a1, 0(a0) ; RV32-ONLY-NEXT: lbu a2, 1(a0) ; RV32-ONLY-NEXT: lbu a3, 22(a0) ; RV32-ONLY-NEXT: lbu a4, 31(a0) ; RV32-ONLY-NEXT: lbu a5, 623(a0) ; RV32-ONLY-NEXT: lbu a6, 44(a0) ; RV32-ONLY-NEXT: lbu a7, 55(a0) ; RV32-ONLY-NEXT: lbu a0, 75(a0) ; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-ONLY-NEXT: vmv.v.x v8, a1 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a6 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0 ; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 8 ; RV32-ONLY-NEXT: ret ; ; RV32VB-LABEL: buildvec_v16i8_undef_high_half: ; RV32VB: # %bb.0: ; RV32VB-NEXT: lbu a1, 1(a0) ; RV32VB-NEXT: lbu a2, 22(a0) ; RV32VB-NEXT: lbu a3, 31(a0) ; RV32VB-NEXT: lbu a4, 0(a0) ; RV32VB-NEXT: slli a1, a1, 8 ; RV32VB-NEXT: slli a2, a2, 16 ; RV32VB-NEXT: slli a3, a3, 24 ; RV32VB-NEXT: or a1, a4, a1 ; RV32VB-NEXT: lbu a4, 44(a0) ; RV32VB-NEXT: lbu a5, 55(a0) ; RV32VB-NEXT: or a2, a3, a2 ; RV32VB-NEXT: lbu a3, 623(a0) ; RV32VB-NEXT: lbu a0, 75(a0) ; RV32VB-NEXT: slli a5, a5, 8 ; RV32VB-NEXT: or a4, a4, a5 ; RV32VB-NEXT: slli a3, a3, 16 ; RV32VB-NEXT: slli a0, a0, 24 ; RV32VB-NEXT: or a0, a0, a3 ; RV32VB-NEXT: or a1, a1, a2 ; RV32VB-NEXT: or a0, a4, a0 ; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32VB-NEXT: vmv.v.x v8, a1 ; RV32VB-NEXT: vslide1down.vx v8, v8, a0 ; RV32VB-NEXT: vslide1down.vx v8, v8, zero ; RV32VB-NEXT: vslide1down.vx v8, v8, zero ; RV32VB-NEXT: ret ; ; RV32VB-PACK-LABEL: buildvec_v16i8_undef_high_half: ; RV32VB-PACK: # %bb.0: ; RV32VB-PACK-NEXT: lbu a1, 0(a0) ; RV32VB-PACK-NEXT: lbu a2, 1(a0) ; RV32VB-PACK-NEXT: lbu a3, 22(a0) ; RV32VB-PACK-NEXT: lbu a4, 31(a0) ; RV32VB-PACK-NEXT: lbu a5, 623(a0) ; RV32VB-PACK-NEXT: lbu a6, 44(a0) ; RV32VB-PACK-NEXT: lbu a7, 55(a0) ; RV32VB-PACK-NEXT: lbu a0, 75(a0) ; RV32VB-PACK-NEXT: packh a1, a1, a2 ; RV32VB-PACK-NEXT: packh a2, a3, a4 ; RV32VB-PACK-NEXT: packh a3, a6, a7 ; RV32VB-PACK-NEXT: packh a0, a5, a0 ; RV32VB-PACK-NEXT: pack a1, a1, a2 ; RV32VB-PACK-NEXT: packh a2, a0, a0 ; RV32VB-PACK-NEXT: pack a0, a3, a0 ; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32VB-PACK-NEXT: vmv.v.x v8, a1 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0 ; RV32VB-PACK-NEXT: pack a0, a2, a2 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0 ; RV32VB-PACK-NEXT: ret ; ; RV64V-ONLY-LABEL: buildvec_v16i8_undef_high_half: ; RV64V-ONLY: # %bb.0: ; RV64V-ONLY-NEXT: lbu a1, 0(a0) ; RV64V-ONLY-NEXT: lbu a2, 1(a0) ; RV64V-ONLY-NEXT: lbu a3, 22(a0) ; RV64V-ONLY-NEXT: lbu a4, 31(a0) ; RV64V-ONLY-NEXT: lbu a5, 623(a0) ; RV64V-ONLY-NEXT: lbu a6, 44(a0) ; RV64V-ONLY-NEXT: lbu a7, 55(a0) ; RV64V-ONLY-NEXT: lbu a0, 75(a0) ; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64V-ONLY-NEXT: vmv.v.x v8, a1 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a6 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0 ; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 8 ; RV64V-ONLY-NEXT: ret ; ; RVA22U64-LABEL: buildvec_v16i8_undef_high_half: ; RVA22U64: # %bb.0: ; RVA22U64-NEXT: lbu a1, 1(a0) ; RVA22U64-NEXT: lbu a2, 22(a0) ; RVA22U64-NEXT: lbu a3, 31(a0) ; RVA22U64-NEXT: lbu a4, 0(a0) ; RVA22U64-NEXT: slli a1, a1, 8 ; RVA22U64-NEXT: slli a2, a2, 16 ; RVA22U64-NEXT: slli a3, a3, 24 ; RVA22U64-NEXT: or a1, a1, a4 ; RVA22U64-NEXT: or a2, a2, a3 ; RVA22U64-NEXT: lbu a3, 44(a0) ; RVA22U64-NEXT: lbu a4, 55(a0) ; RVA22U64-NEXT: lbu a5, 623(a0) ; RVA22U64-NEXT: lbu a0, 75(a0) ; RVA22U64-NEXT: slli a3, a3, 32 ; RVA22U64-NEXT: slli a4, a4, 40 ; RVA22U64-NEXT: or a3, a3, a4 ; RVA22U64-NEXT: slli a5, a5, 48 ; RVA22U64-NEXT: slli a0, a0, 56 ; RVA22U64-NEXT: or a0, a0, a5 ; RVA22U64-NEXT: or a1, a1, a2 ; RVA22U64-NEXT: or a0, a0, a3 ; RVA22U64-NEXT: or a0, a0, a1 ; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RVA22U64-NEXT: vmv.v.x v8, a0 ; RVA22U64-NEXT: vslide1down.vx v8, v8, zero ; RVA22U64-NEXT: ret ; ; RVA22U64-PACK-LABEL: buildvec_v16i8_undef_high_half: ; RVA22U64-PACK: # %bb.0: ; RVA22U64-PACK-NEXT: lbu a6, 0(a0) ; RVA22U64-PACK-NEXT: lbu a7, 1(a0) ; RVA22U64-PACK-NEXT: lbu t0, 22(a0) ; RVA22U64-PACK-NEXT: lbu a4, 31(a0) ; RVA22U64-PACK-NEXT: lbu a5, 623(a0) ; RVA22U64-PACK-NEXT: lbu a1, 44(a0) ; RVA22U64-PACK-NEXT: lbu a2, 55(a0) ; RVA22U64-PACK-NEXT: lbu a0, 75(a0) ; RVA22U64-PACK-NEXT: packh a3, a6, a7 ; RVA22U64-PACK-NEXT: packh a4, t0, a4 ; RVA22U64-PACK-NEXT: packh a1, a1, a2 ; RVA22U64-PACK-NEXT: packh a0, a5, a0 ; RVA22U64-PACK-NEXT: packw a2, a3, a4 ; RVA22U64-PACK-NEXT: packh a3, a0, a0 ; RVA22U64-PACK-NEXT: packw a3, a3, a3 ; RVA22U64-PACK-NEXT: packw a0, a1, a0 ; RVA22U64-PACK-NEXT: pack a0, a2, a0 ; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RVA22U64-PACK-NEXT: vmv.v.x v8, a0 ; RVA22U64-PACK-NEXT: pack a0, a3, a3 ; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0 ; RVA22U64-PACK-NEXT: ret ; ; RV64ZVE32-LABEL: buildvec_v16i8_undef_high_half: ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: lbu a1, 0(a0) ; RV64ZVE32-NEXT: lbu a2, 1(a0) ; RV64ZVE32-NEXT: lbu a3, 22(a0) ; RV64ZVE32-NEXT: lbu a4, 31(a0) ; RV64ZVE32-NEXT: lbu a5, 623(a0) ; RV64ZVE32-NEXT: lbu a6, 44(a0) ; RV64ZVE32-NEXT: lbu a7, 55(a0) ; RV64ZVE32-NEXT: lbu a0, 75(a0) ; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64ZVE32-NEXT: vmv.v.x v8, a1 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a6 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 8 ; RV64ZVE32-NEXT: ret %p2 = getelementptr i8, ptr %p, i32 1 %p3 = getelementptr i8, ptr %p, i32 22 %p4 = getelementptr i8, ptr %p, i32 31 %p5 = getelementptr i8, ptr %p, i32 44 %p6 = getelementptr i8, ptr %p, i32 55 %p7 = getelementptr i8, ptr %p, i32 623 %p8 = getelementptr i8, ptr %p, i32 75 %ld1 = load i8, ptr %p %ld2 = load i8, ptr %p2 %ld3 = load i8, ptr %p3 %ld4 = load i8, ptr %p4 %ld5 = load i8, ptr %p5 %ld6 = load i8, ptr %p6 %ld7 = load i8, ptr %p7 %ld8 = load i8, ptr %p8 %v1 = insertelement <16 x i8> poison, i8 %ld1, i32 0 %v2 = insertelement <16 x i8> %v1, i8 %ld2, i32 1 %v3 = insertelement <16 x i8> %v2, i8 %ld3, i32 2 %v4 = insertelement <16 x i8> %v3, i8 %ld4, i32 3 %v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4 %v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5 %v7 = insertelement <16 x i8> %v6, i8 %ld7, i32 6 %v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7 ret <16 x i8> %v8 } define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) { ; RV32-ONLY-LABEL: buildvec_v16i8_undef_edges: ; RV32-ONLY: # %bb.0: ; RV32-ONLY-NEXT: lbu a1, 623(a0) ; RV32-ONLY-NEXT: lbu a2, 31(a0) ; RV32-ONLY-NEXT: lbu a3, 44(a0) ; RV32-ONLY-NEXT: lbu a4, 55(a0) ; RV32-ONLY-NEXT: lbu a5, 75(a0) ; RV32-ONLY-NEXT: li a6, 255 ; RV32-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV32-ONLY-NEXT: vmv.s.x v0, a6 ; RV32-ONLY-NEXT: lbu a6, 82(a0) ; RV32-ONLY-NEXT: lbu a7, 93(a0) ; RV32-ONLY-NEXT: lbu t0, 105(a0) ; RV32-ONLY-NEXT: lbu a0, 161(a0) ; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV32-ONLY-NEXT: vmv.v.x v8, a2 ; RV32-ONLY-NEXT: vmv.v.x v9, a6 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3 ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a7 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4 ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t0 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a1 ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a0 ; RV32-ONLY-NEXT: vslide1down.vx v10, v8, a5 ; RV32-ONLY-NEXT: vslidedown.vi v8, v9, 4 ; RV32-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV32-ONLY-NEXT: ret ; ; RV32VB-LABEL: buildvec_v16i8_undef_edges: ; RV32VB: # %bb.0: ; RV32VB-NEXT: lbu a1, 623(a0) ; RV32VB-NEXT: lbu a2, 55(a0) ; RV32VB-NEXT: lbu a3, 75(a0) ; RV32VB-NEXT: lbu a4, 31(a0) ; RV32VB-NEXT: lbu a5, 44(a0) ; RV32VB-NEXT: slli a2, a2, 8 ; RV32VB-NEXT: slli a1, a1, 16 ; RV32VB-NEXT: slli a3, a3, 24 ; RV32VB-NEXT: or a2, a5, a2 ; RV32VB-NEXT: lbu a5, 82(a0) ; RV32VB-NEXT: lbu a6, 93(a0) ; RV32VB-NEXT: or a1, a3, a1 ; RV32VB-NEXT: lbu a3, 105(a0) ; RV32VB-NEXT: lbu a0, 161(a0) ; RV32VB-NEXT: slli a6, a6, 8 ; RV32VB-NEXT: or a5, a5, a6 ; RV32VB-NEXT: slli a3, a3, 16 ; RV32VB-NEXT: slli a0, a0, 24 ; RV32VB-NEXT: or a0, a0, a3 ; RV32VB-NEXT: slli a4, a4, 24 ; RV32VB-NEXT: or a1, a2, a1 ; RV32VB-NEXT: or a0, a5, a0 ; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32VB-NEXT: vmv.v.x v8, a4 ; RV32VB-NEXT: vslide1down.vx v8, v8, a1 ; RV32VB-NEXT: vslide1down.vx v8, v8, a0 ; RV32VB-NEXT: vslide1down.vx v8, v8, zero ; RV32VB-NEXT: ret ; ; RV32VB-PACK-LABEL: buildvec_v16i8_undef_edges: ; RV32VB-PACK: # %bb.0: ; RV32VB-PACK-NEXT: lbu a1, 623(a0) ; RV32VB-PACK-NEXT: lbu a2, 31(a0) ; RV32VB-PACK-NEXT: lbu a3, 44(a0) ; RV32VB-PACK-NEXT: lbu a4, 55(a0) ; RV32VB-PACK-NEXT: lbu a5, 75(a0) ; RV32VB-PACK-NEXT: lbu a6, 82(a0) ; RV32VB-PACK-NEXT: lbu a7, 93(a0) ; RV32VB-PACK-NEXT: lbu t0, 105(a0) ; RV32VB-PACK-NEXT: lbu a0, 161(a0) ; RV32VB-PACK-NEXT: packh a3, a3, a4 ; RV32VB-PACK-NEXT: packh a1, a1, a5 ; RV32VB-PACK-NEXT: packh a4, a6, a7 ; RV32VB-PACK-NEXT: packh a0, t0, a0 ; RV32VB-PACK-NEXT: packh a5, a0, a0 ; RV32VB-PACK-NEXT: packh a2, a0, a2 ; RV32VB-PACK-NEXT: pack a2, a5, a2 ; RV32VB-PACK-NEXT: pack a1, a3, a1 ; RV32VB-PACK-NEXT: pack a0, a4, a0 ; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32VB-PACK-NEXT: vmv.v.x v8, a2 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a1 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0 ; RV32VB-PACK-NEXT: pack a0, a5, a5 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0 ; RV32VB-PACK-NEXT: ret ; ; RV64V-ONLY-LABEL: buildvec_v16i8_undef_edges: ; RV64V-ONLY: # %bb.0: ; RV64V-ONLY-NEXT: lbu a1, 623(a0) ; RV64V-ONLY-NEXT: lbu a2, 31(a0) ; RV64V-ONLY-NEXT: lbu a3, 44(a0) ; RV64V-ONLY-NEXT: lbu a4, 55(a0) ; RV64V-ONLY-NEXT: lbu a5, 75(a0) ; RV64V-ONLY-NEXT: li a6, 255 ; RV64V-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64V-ONLY-NEXT: vmv.s.x v0, a6 ; RV64V-ONLY-NEXT: lbu a6, 82(a0) ; RV64V-ONLY-NEXT: lbu a7, 93(a0) ; RV64V-ONLY-NEXT: lbu t0, 105(a0) ; RV64V-ONLY-NEXT: lbu a0, 161(a0) ; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV64V-ONLY-NEXT: vmv.v.x v8, a2 ; RV64V-ONLY-NEXT: vmv.v.x v9, a6 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a7 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t0 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a0 ; RV64V-ONLY-NEXT: vslide1down.vx v10, v8, a5 ; RV64V-ONLY-NEXT: vslidedown.vi v8, v9, 4 ; RV64V-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV64V-ONLY-NEXT: ret ; ; RVA22U64-LABEL: buildvec_v16i8_undef_edges: ; RVA22U64: # %bb.0: ; RVA22U64-NEXT: lbu a6, 31(a0) ; RVA22U64-NEXT: lbu a2, 44(a0) ; RVA22U64-NEXT: lbu a3, 55(a0) ; RVA22U64-NEXT: lbu a4, 623(a0) ; RVA22U64-NEXT: lbu a5, 75(a0) ; RVA22U64-NEXT: slli a2, a2, 32 ; RVA22U64-NEXT: slli a3, a3, 40 ; RVA22U64-NEXT: slli a4, a4, 48 ; RVA22U64-NEXT: slli a5, a5, 56 ; RVA22U64-NEXT: or a2, a2, a3 ; RVA22U64-NEXT: lbu a3, 82(a0) ; RVA22U64-NEXT: lbu a1, 93(a0) ; RVA22U64-NEXT: or a4, a4, a5 ; RVA22U64-NEXT: lbu a5, 105(a0) ; RVA22U64-NEXT: lbu a0, 161(a0) ; RVA22U64-NEXT: slli a1, a1, 8 ; RVA22U64-NEXT: or a1, a1, a3 ; RVA22U64-NEXT: slli a5, a5, 16 ; RVA22U64-NEXT: slli a0, a0, 24 ; RVA22U64-NEXT: or a0, a0, a5 ; RVA22U64-NEXT: slli a6, a6, 24 ; RVA22U64-NEXT: or a2, a2, a4 ; RVA22U64-NEXT: add.uw a2, a6, a2 ; RVA22U64-NEXT: or a0, a0, a1 ; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RVA22U64-NEXT: vmv.v.x v8, a2 ; RVA22U64-NEXT: vslide1down.vx v8, v8, a0 ; RVA22U64-NEXT: ret ; ; RVA22U64-PACK-LABEL: buildvec_v16i8_undef_edges: ; RVA22U64-PACK: # %bb.0: ; RVA22U64-PACK-NEXT: lbu a7, 623(a0) ; RVA22U64-PACK-NEXT: lbu a6, 31(a0) ; RVA22U64-PACK-NEXT: lbu t0, 44(a0) ; RVA22U64-PACK-NEXT: lbu a4, 55(a0) ; RVA22U64-PACK-NEXT: lbu a5, 75(a0) ; RVA22U64-PACK-NEXT: lbu a2, 82(a0) ; RVA22U64-PACK-NEXT: lbu a1, 93(a0) ; RVA22U64-PACK-NEXT: lbu a3, 105(a0) ; RVA22U64-PACK-NEXT: lbu a0, 161(a0) ; RVA22U64-PACK-NEXT: packh a4, t0, a4 ; RVA22U64-PACK-NEXT: packh a5, a7, a5 ; RVA22U64-PACK-NEXT: packh a1, a2, a1 ; RVA22U64-PACK-NEXT: packh a0, a3, a0 ; RVA22U64-PACK-NEXT: packh a2, a0, a0 ; RVA22U64-PACK-NEXT: packh a3, a0, a6 ; RVA22U64-PACK-NEXT: packw a3, a2, a3 ; RVA22U64-PACK-NEXT: packw a2, a2, a2 ; RVA22U64-PACK-NEXT: packw a4, a4, a5 ; RVA22U64-PACK-NEXT: packw a0, a1, a0 ; RVA22U64-PACK-NEXT: pack a1, a3, a4 ; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RVA22U64-PACK-NEXT: vmv.v.x v8, a1 ; RVA22U64-PACK-NEXT: pack a0, a0, a2 ; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0 ; RVA22U64-PACK-NEXT: ret ; ; RV64ZVE32-LABEL: buildvec_v16i8_undef_edges: ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: lbu a1, 623(a0) ; RV64ZVE32-NEXT: lbu a2, 31(a0) ; RV64ZVE32-NEXT: lbu a3, 44(a0) ; RV64ZVE32-NEXT: lbu a4, 55(a0) ; RV64ZVE32-NEXT: lbu a5, 75(a0) ; RV64ZVE32-NEXT: li a6, 255 ; RV64ZVE32-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32-NEXT: vmv.s.x v0, a6 ; RV64ZVE32-NEXT: lbu a6, 82(a0) ; RV64ZVE32-NEXT: lbu a7, 93(a0) ; RV64ZVE32-NEXT: lbu t0, 105(a0) ; RV64ZVE32-NEXT: lbu a0, 161(a0) ; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV64ZVE32-NEXT: vmv.v.x v8, a2 ; RV64ZVE32-NEXT: vmv.v.x v9, a6 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3 ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a7 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4 ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t0 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1 ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a0 ; RV64ZVE32-NEXT: vslide1down.vx v10, v8, a5 ; RV64ZVE32-NEXT: vslidedown.vi v8, v9, 4 ; RV64ZVE32-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV64ZVE32-NEXT: ret %p4 = getelementptr i8, ptr %p, i32 31 %p5 = getelementptr i8, ptr %p, i32 44 %p6 = getelementptr i8, ptr %p, i32 55 %p7 = getelementptr i8, ptr %p, i32 623 %p8 = getelementptr i8, ptr %p, i32 75 %p9 = getelementptr i8, ptr %p, i32 82 %p10 = getelementptr i8, ptr %p, i32 93 %p11 = getelementptr i8, ptr %p, i32 105 %p12 = getelementptr i8, ptr %p, i32 161 %ld4 = load i8, ptr %p4 %ld5 = load i8, ptr %p5 %ld6 = load i8, ptr %p6 %ld7 = load i8, ptr %p7 %ld8 = load i8, ptr %p8 %ld9 = load i8, ptr %p9 %ld10 = load i8, ptr %p10 %ld11 = load i8, ptr %p11 %ld12 = load i8, ptr %p12 %v4 = insertelement <16 x i8> poison, i8 %ld4, i32 3 %v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4 %v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5 %v7 = insertelement <16 x i8> %v6, i8 %ld7, i32 6 %v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7 %v9 = insertelement <16 x i8> %v8, i8 %ld9, i32 8 %v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9 %v11 = insertelement <16 x i8> %v10, i8 %ld11, i32 10 %v12 = insertelement <16 x i8> %v11, i8 %ld12, i32 11 ret <16 x i8> %v12 } define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) { ; RV32-ONLY-LABEL: buildvec_v16i8_loads_undef_scattered: ; RV32-ONLY: # %bb.0: ; RV32-ONLY-NEXT: lbu a1, 0(a0) ; RV32-ONLY-NEXT: lbu a2, 1(a0) ; RV32-ONLY-NEXT: lbu a3, 44(a0) ; RV32-ONLY-NEXT: lbu a4, 55(a0) ; RV32-ONLY-NEXT: lbu a5, 75(a0) ; RV32-ONLY-NEXT: lbu a6, 82(a0) ; RV32-ONLY-NEXT: lbu a7, 93(a0) ; RV32-ONLY-NEXT: lbu t0, 124(a0) ; RV32-ONLY-NEXT: li t1, 255 ; RV32-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV32-ONLY-NEXT: vmv.s.x v0, t1 ; RV32-ONLY-NEXT: lbu t1, 144(a0) ; RV32-ONLY-NEXT: lbu a0, 154(a0) ; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV32-ONLY-NEXT: vmv.v.x v8, a1 ; RV32-ONLY-NEXT: vmv.v.x v9, a6 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a7 ; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 2 ; RV32-ONLY-NEXT: vslidedown.vi v9, v9, 2 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3 ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t0 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4 ; RV32-ONLY-NEXT: vslidedown.vi v9, v9, 1 ; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 1 ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t1 ; RV32-ONLY-NEXT: vslide1down.vx v10, v8, a5 ; RV32-ONLY-NEXT: vslide1down.vx v8, v9, a0 ; RV32-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV32-ONLY-NEXT: ret ; ; RV32VB-LABEL: buildvec_v16i8_loads_undef_scattered: ; RV32VB: # %bb.0: ; RV32VB-NEXT: lbu a1, 1(a0) ; RV32VB-NEXT: lbu a2, 0(a0) ; RV32VB-NEXT: lbu a3, 44(a0) ; RV32VB-NEXT: lbu a4, 55(a0) ; RV32VB-NEXT: slli a1, a1, 8 ; RV32VB-NEXT: or a1, a2, a1 ; RV32VB-NEXT: lbu a2, 75(a0) ; RV32VB-NEXT: lbu a5, 82(a0) ; RV32VB-NEXT: lbu a6, 93(a0) ; RV32VB-NEXT: lbu a7, 124(a0) ; RV32VB-NEXT: slli a4, a4, 8 ; RV32VB-NEXT: or a3, a3, a4 ; RV32VB-NEXT: lbu a4, 144(a0) ; RV32VB-NEXT: lbu a0, 154(a0) ; RV32VB-NEXT: slli a6, a6, 8 ; RV32VB-NEXT: or a5, a5, a6 ; RV32VB-NEXT: slli a4, a4, 16 ; RV32VB-NEXT: slli a0, a0, 24 ; RV32VB-NEXT: or a0, a0, a4 ; RV32VB-NEXT: slli a2, a2, 24 ; RV32VB-NEXT: or a2, a3, a2 ; RV32VB-NEXT: or a0, a7, a0 ; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32VB-NEXT: vmv.v.x v8, a1 ; RV32VB-NEXT: vslide1down.vx v8, v8, a2 ; RV32VB-NEXT: vslide1down.vx v8, v8, a5 ; RV32VB-NEXT: vslide1down.vx v8, v8, a0 ; RV32VB-NEXT: ret ; ; RV32VB-PACK-LABEL: buildvec_v16i8_loads_undef_scattered: ; RV32VB-PACK: # %bb.0: ; RV32VB-PACK-NEXT: lbu a1, 0(a0) ; RV32VB-PACK-NEXT: lbu a2, 1(a0) ; RV32VB-PACK-NEXT: lbu a3, 44(a0) ; RV32VB-PACK-NEXT: lbu a4, 55(a0) ; RV32VB-PACK-NEXT: lbu a5, 75(a0) ; RV32VB-PACK-NEXT: lbu a6, 82(a0) ; RV32VB-PACK-NEXT: lbu a7, 93(a0) ; RV32VB-PACK-NEXT: packh a1, a1, a2 ; RV32VB-PACK-NEXT: lbu a2, 144(a0) ; RV32VB-PACK-NEXT: lbu t0, 154(a0) ; RV32VB-PACK-NEXT: packh a3, a3, a4 ; RV32VB-PACK-NEXT: lbu a0, 124(a0) ; RV32VB-PACK-NEXT: packh a4, a6, a7 ; RV32VB-PACK-NEXT: packh a2, a2, t0 ; RV32VB-PACK-NEXT: packh a5, a0, a5 ; RV32VB-PACK-NEXT: pack a3, a3, a5 ; RV32VB-PACK-NEXT: packh a5, a0, a0 ; RV32VB-PACK-NEXT: packh a0, a0, a0 ; RV32VB-PACK-NEXT: pack a0, a0, a2 ; RV32VB-PACK-NEXT: pack a1, a1, a5 ; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32VB-PACK-NEXT: vmv.v.x v8, a1 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a3 ; RV32VB-PACK-NEXT: pack a1, a4, a5 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a1 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0 ; RV32VB-PACK-NEXT: ret ; ; RV64V-ONLY-LABEL: buildvec_v16i8_loads_undef_scattered: ; RV64V-ONLY: # %bb.0: ; RV64V-ONLY-NEXT: lbu a1, 0(a0) ; RV64V-ONLY-NEXT: lbu a2, 1(a0) ; RV64V-ONLY-NEXT: lbu a3, 44(a0) ; RV64V-ONLY-NEXT: lbu a4, 55(a0) ; RV64V-ONLY-NEXT: lbu a5, 75(a0) ; RV64V-ONLY-NEXT: lbu a6, 82(a0) ; RV64V-ONLY-NEXT: lbu a7, 93(a0) ; RV64V-ONLY-NEXT: lbu t0, 124(a0) ; RV64V-ONLY-NEXT: li t1, 255 ; RV64V-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64V-ONLY-NEXT: vmv.s.x v0, t1 ; RV64V-ONLY-NEXT: lbu t1, 144(a0) ; RV64V-ONLY-NEXT: lbu a0, 154(a0) ; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV64V-ONLY-NEXT: vmv.v.x v8, a1 ; RV64V-ONLY-NEXT: vmv.v.x v9, a6 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a7 ; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 2 ; RV64V-ONLY-NEXT: vslidedown.vi v9, v9, 2 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t0 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4 ; RV64V-ONLY-NEXT: vslidedown.vi v9, v9, 1 ; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 1 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t1 ; RV64V-ONLY-NEXT: vslide1down.vx v10, v8, a5 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v9, a0 ; RV64V-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV64V-ONLY-NEXT: ret ; ; RVA22U64-LABEL: buildvec_v16i8_loads_undef_scattered: ; RVA22U64: # %bb.0: ; RVA22U64-NEXT: lbu a1, 1(a0) ; RVA22U64-NEXT: lbu a2, 0(a0) ; RVA22U64-NEXT: lbu a3, 44(a0) ; RVA22U64-NEXT: lbu a4, 55(a0) ; RVA22U64-NEXT: slli a1, a1, 8 ; RVA22U64-NEXT: or a6, a2, a1 ; RVA22U64-NEXT: lbu a7, 75(a0) ; RVA22U64-NEXT: lbu a5, 82(a0) ; RVA22U64-NEXT: lbu a1, 93(a0) ; RVA22U64-NEXT: lbu a2, 124(a0) ; RVA22U64-NEXT: slli a3, a3, 32 ; RVA22U64-NEXT: slli a4, a4, 40 ; RVA22U64-NEXT: or a3, a3, a4 ; RVA22U64-NEXT: lbu a4, 144(a0) ; RVA22U64-NEXT: lbu a0, 154(a0) ; RVA22U64-NEXT: slli a1, a1, 8 ; RVA22U64-NEXT: or a1, a1, a5 ; RVA22U64-NEXT: slli a4, a4, 48 ; RVA22U64-NEXT: slli a0, a0, 56 ; RVA22U64-NEXT: or a0, a0, a4 ; RVA22U64-NEXT: slli a7, a7, 56 ; RVA22U64-NEXT: or a3, a7, a3 ; RVA22U64-NEXT: slli a2, a2, 32 ; RVA22U64-NEXT: or a0, a0, a2 ; RVA22U64-NEXT: or a2, a6, a3 ; RVA22U64-NEXT: or a0, a0, a1 ; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RVA22U64-NEXT: vmv.v.x v8, a2 ; RVA22U64-NEXT: vslide1down.vx v8, v8, a0 ; RVA22U64-NEXT: ret ; ; RVA22U64-PACK-LABEL: buildvec_v16i8_loads_undef_scattered: ; RVA22U64-PACK: # %bb.0: ; RVA22U64-PACK-NEXT: lbu a1, 0(a0) ; RVA22U64-PACK-NEXT: lbu a2, 1(a0) ; RVA22U64-PACK-NEXT: lbu a7, 44(a0) ; RVA22U64-PACK-NEXT: lbu t0, 55(a0) ; RVA22U64-PACK-NEXT: lbu a6, 75(a0) ; RVA22U64-PACK-NEXT: lbu a5, 82(a0) ; RVA22U64-PACK-NEXT: lbu a3, 93(a0) ; RVA22U64-PACK-NEXT: packh t1, a1, a2 ; RVA22U64-PACK-NEXT: lbu a2, 144(a0) ; RVA22U64-PACK-NEXT: lbu a4, 154(a0) ; RVA22U64-PACK-NEXT: packh a1, a7, t0 ; RVA22U64-PACK-NEXT: lbu a0, 124(a0) ; RVA22U64-PACK-NEXT: packh a3, a5, a3 ; RVA22U64-PACK-NEXT: packh a2, a2, a4 ; RVA22U64-PACK-NEXT: packh a4, a0, a6 ; RVA22U64-PACK-NEXT: packw a1, a1, a4 ; RVA22U64-PACK-NEXT: packh a4, a0, a0 ; RVA22U64-PACK-NEXT: packh a0, a0, a0 ; RVA22U64-PACK-NEXT: packw a5, t1, a4 ; RVA22U64-PACK-NEXT: packw a0, a0, a2 ; RVA22U64-PACK-NEXT: packw a2, a3, a4 ; RVA22U64-PACK-NEXT: pack a1, a5, a1 ; RVA22U64-PACK-NEXT: pack a0, a2, a0 ; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RVA22U64-PACK-NEXT: vmv.v.x v8, a1 ; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0 ; RVA22U64-PACK-NEXT: ret ; ; RV64ZVE32-LABEL: buildvec_v16i8_loads_undef_scattered: ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: lbu a1, 0(a0) ; RV64ZVE32-NEXT: lbu a2, 1(a0) ; RV64ZVE32-NEXT: lbu a3, 44(a0) ; RV64ZVE32-NEXT: lbu a4, 55(a0) ; RV64ZVE32-NEXT: lbu a5, 75(a0) ; RV64ZVE32-NEXT: lbu a6, 82(a0) ; RV64ZVE32-NEXT: lbu a7, 93(a0) ; RV64ZVE32-NEXT: lbu t0, 124(a0) ; RV64ZVE32-NEXT: li t1, 255 ; RV64ZVE32-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32-NEXT: vmv.s.x v0, t1 ; RV64ZVE32-NEXT: lbu t1, 144(a0) ; RV64ZVE32-NEXT: lbu a0, 154(a0) ; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV64ZVE32-NEXT: vmv.v.x v8, a1 ; RV64ZVE32-NEXT: vmv.v.x v9, a6 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2 ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a7 ; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32-NEXT: vslidedown.vi v9, v9, 2 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3 ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t0 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4 ; RV64ZVE32-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t1 ; RV64ZVE32-NEXT: vslide1down.vx v10, v8, a5 ; RV64ZVE32-NEXT: vslide1down.vx v8, v9, a0 ; RV64ZVE32-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV64ZVE32-NEXT: ret %p2 = getelementptr i8, ptr %p, i32 1 %p3 = getelementptr i8, ptr %p, i32 22 %p4 = getelementptr i8, ptr %p, i32 31 %p5 = getelementptr i8, ptr %p, i32 44 %p6 = getelementptr i8, ptr %p, i32 55 %p7 = getelementptr i8, ptr %p, i32 623 %p8 = getelementptr i8, ptr %p, i32 75 %p9 = getelementptr i8, ptr %p, i32 82 %p10 = getelementptr i8, ptr %p, i32 93 %p11 = getelementptr i8, ptr %p, i32 105 %p12 = getelementptr i8, ptr %p, i32 161 %p13 = getelementptr i8, ptr %p, i32 124 %p14 = getelementptr i8, ptr %p, i32 163 %p15 = getelementptr i8, ptr %p, i32 144 %p16 = getelementptr i8, ptr %p, i32 154 %ld1 = load i8, ptr %p %ld2 = load i8, ptr %p2 %ld3 = load i8, ptr %p3 %ld4 = load i8, ptr %p4 %ld5 = load i8, ptr %p5 %ld6 = load i8, ptr %p6 %ld7 = load i8, ptr %p7 %ld8 = load i8, ptr %p8 %ld9 = load i8, ptr %p9 %ld10 = load i8, ptr %p10 %ld11 = load i8, ptr %p11 %ld12 = load i8, ptr %p12 %ld13 = load i8, ptr %p13 %ld14 = load i8, ptr %p14 %ld15 = load i8, ptr %p15 %ld16 = load i8, ptr %p16 %v1 = insertelement <16 x i8> poison, i8 %ld1, i32 0 %v2 = insertelement <16 x i8> %v1, i8 %ld2, i32 1 %v3 = insertelement <16 x i8> %v2, i8 undef, i32 2 %v4 = insertelement <16 x i8> %v3, i8 undef, i32 3 %v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4 %v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5 %v7 = insertelement <16 x i8> %v6, i8 undef, i32 6 %v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7 %v9 = insertelement <16 x i8> %v8, i8 %ld9, i32 8 %v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9 %v11 = insertelement <16 x i8> %v10, i8 undef, i32 10 %v12 = insertelement <16 x i8> %v11, i8 undef, i32 11 %v13 = insertelement <16 x i8> %v12, i8 %ld13, i32 12 %v14 = insertelement <16 x i8> %v13, i8 undef, i32 13 %v15 = insertelement <16 x i8> %v14, i8 %ld15, i32 14 %v16 = insertelement <16 x i8> %v15, i8 %ld16, i32 15 ret <16 x i8> %v16 } define <8 x i8> @buildvec_v8i8_pack(i8 %e1, i8 %e2, i8 %e3, i8 %e4, i8 %e5, i8 %e6, i8 %e7, i8 %e8) { ; RV32-ONLY-LABEL: buildvec_v8i8_pack: ; RV32-ONLY: # %bb.0: ; RV32-ONLY-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; RV32-ONLY-NEXT: vmv.v.x v8, a0 ; RV32-ONLY-NEXT: vmv.v.x v9, a4 ; RV32-ONLY-NEXT: vmv.v.i v0, 15 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a1 ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a5 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a6 ; RV32-ONLY-NEXT: vslide1down.vx v10, v8, a3 ; RV32-ONLY-NEXT: vslide1down.vx v8, v9, a7 ; RV32-ONLY-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV32-ONLY-NEXT: ret ; ; RV32VB-LABEL: buildvec_v8i8_pack: ; RV32VB: # %bb.0: ; RV32VB-NEXT: slli a7, a7, 24 ; RV32VB-NEXT: andi a6, a6, 255 ; RV32VB-NEXT: andi a4, a4, 255 ; RV32VB-NEXT: andi a5, a5, 255 ; RV32VB-NEXT: slli a3, a3, 24 ; RV32VB-NEXT: andi a2, a2, 255 ; RV32VB-NEXT: andi a0, a0, 255 ; RV32VB-NEXT: andi a1, a1, 255 ; RV32VB-NEXT: slli a6, a6, 16 ; RV32VB-NEXT: slli a5, a5, 8 ; RV32VB-NEXT: slli a2, a2, 16 ; RV32VB-NEXT: slli a1, a1, 8 ; RV32VB-NEXT: or a6, a7, a6 ; RV32VB-NEXT: or a4, a4, a5 ; RV32VB-NEXT: or a2, a3, a2 ; RV32VB-NEXT: or a0, a0, a1 ; RV32VB-NEXT: or a1, a4, a6 ; RV32VB-NEXT: or a0, a0, a2 ; RV32VB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32VB-NEXT: vmv.v.x v8, a0 ; RV32VB-NEXT: vslide1down.vx v8, v8, a1 ; RV32VB-NEXT: ret ; ; RV32VB-PACK-LABEL: buildvec_v8i8_pack: ; RV32VB-PACK: # %bb.0: ; RV32VB-PACK-NEXT: packh a6, a6, a7 ; RV32VB-PACK-NEXT: packh a4, a4, a5 ; RV32VB-PACK-NEXT: packh a2, a2, a3 ; RV32VB-PACK-NEXT: packh a0, a0, a1 ; RV32VB-PACK-NEXT: pack a1, a4, a6 ; RV32VB-PACK-NEXT: pack a0, a0, a2 ; RV32VB-PACK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32VB-PACK-NEXT: vmv.v.x v8, a0 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a1 ; RV32VB-PACK-NEXT: ret ; ; RV64V-ONLY-LABEL: buildvec_v8i8_pack: ; RV64V-ONLY: # %bb.0: ; RV64V-ONLY-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; RV64V-ONLY-NEXT: vmv.v.x v8, a0 ; RV64V-ONLY-NEXT: vmv.v.x v9, a4 ; RV64V-ONLY-NEXT: vmv.v.i v0, 15 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a5 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a6 ; RV64V-ONLY-NEXT: vslide1down.vx v10, v8, a3 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v9, a7 ; RV64V-ONLY-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64V-ONLY-NEXT: ret ; ; RVA22U64-LABEL: buildvec_v8i8_pack: ; RVA22U64: # %bb.0: ; RVA22U64-NEXT: andi t0, a4, 255 ; RVA22U64-NEXT: andi a5, a5, 255 ; RVA22U64-NEXT: slli a7, a7, 56 ; RVA22U64-NEXT: andi a4, a6, 255 ; RVA22U64-NEXT: andi a2, a2, 255 ; RVA22U64-NEXT: andi a3, a3, 255 ; RVA22U64-NEXT: andi a0, a0, 255 ; RVA22U64-NEXT: andi a1, a1, 255 ; RVA22U64-NEXT: slli t0, t0, 32 ; RVA22U64-NEXT: slli a5, a5, 40 ; RVA22U64-NEXT: slli a4, a4, 48 ; RVA22U64-NEXT: slli a2, a2, 16 ; RVA22U64-NEXT: slli a3, a3, 24 ; RVA22U64-NEXT: slli a1, a1, 8 ; RVA22U64-NEXT: or a5, a5, t0 ; RVA22U64-NEXT: or a4, a7, a4 ; RVA22U64-NEXT: or a2, a2, a3 ; RVA22U64-NEXT: or a0, a0, a1 ; RVA22U64-NEXT: or a4, a4, a5 ; RVA22U64-NEXT: or a0, a0, a2 ; RVA22U64-NEXT: or a0, a0, a4 ; RVA22U64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RVA22U64-NEXT: vmv.s.x v8, a0 ; RVA22U64-NEXT: ret ; ; RVA22U64-PACK-LABEL: buildvec_v8i8_pack: ; RVA22U64-PACK: # %bb.0: ; RVA22U64-PACK-NEXT: packh a6, a6, a7 ; RVA22U64-PACK-NEXT: packh a4, a4, a5 ; RVA22U64-PACK-NEXT: packh a2, a2, a3 ; RVA22U64-PACK-NEXT: packh a0, a0, a1 ; RVA22U64-PACK-NEXT: packw a1, a4, a6 ; RVA22U64-PACK-NEXT: packw a0, a0, a2 ; RVA22U64-PACK-NEXT: pack a0, a0, a1 ; RVA22U64-PACK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RVA22U64-PACK-NEXT: vmv.s.x v8, a0 ; RVA22U64-PACK-NEXT: ret ; ; RV64ZVE32-LABEL: buildvec_v8i8_pack: ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; RV64ZVE32-NEXT: vmv.v.x v8, a0 ; RV64ZVE32-NEXT: vmv.v.x v9, a4 ; RV64ZVE32-NEXT: vmv.v.i v0, 15 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1 ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a5 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2 ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a6 ; RV64ZVE32-NEXT: vslide1down.vx v10, v8, a3 ; RV64ZVE32-NEXT: vslide1down.vx v8, v9, a7 ; RV64ZVE32-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32-NEXT: ret %v1 = insertelement <8 x i8> poison, i8 %e1, i32 0 %v2 = insertelement <8 x i8> %v1, i8 %e2, i32 1 %v3 = insertelement <8 x i8> %v2, i8 %e3, i32 2 %v4 = insertelement <8 x i8> %v3, i8 %e4, i32 3 %v5 = insertelement <8 x i8> %v4, i8 %e5, i32 4 %v6 = insertelement <8 x i8> %v5, i8 %e6, i32 5 %v7 = insertelement <8 x i8> %v6, i8 %e7, i32 6 %v8 = insertelement <8 x i8> %v7, i8 %e8, i32 7 ret <8 x i8> %v8 } define <6 x i8> @buildvec_v6i8_pack(i8 %e1, i8 %e2, i8 %e3, i8 %e4, i8 %e5, i8 %e6) { ; RV32-ONLY-LABEL: buildvec_v6i8_pack: ; RV32-ONLY: # %bb.0: ; RV32-ONLY-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32-ONLY-NEXT: vmv.v.x v8, a0 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a1 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5 ; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 2 ; RV32-ONLY-NEXT: ret ; ; RV32VB-LABEL: buildvec_v6i8_pack: ; RV32VB: # %bb.0: ; RV32VB-NEXT: slli a3, a3, 24 ; RV32VB-NEXT: andi a2, a2, 255 ; RV32VB-NEXT: andi a0, a0, 255 ; RV32VB-NEXT: andi a1, a1, 255 ; RV32VB-NEXT: andi a4, a4, 255 ; RV32VB-NEXT: andi a5, a5, 255 ; RV32VB-NEXT: slli a2, a2, 16 ; RV32VB-NEXT: slli a1, a1, 8 ; RV32VB-NEXT: slli a5, a5, 8 ; RV32VB-NEXT: or a2, a3, a2 ; RV32VB-NEXT: or a0, a0, a1 ; RV32VB-NEXT: or a0, a0, a2 ; RV32VB-NEXT: or a4, a4, a5 ; RV32VB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32VB-NEXT: vmv.v.x v8, a0 ; RV32VB-NEXT: vslide1down.vx v8, v8, a4 ; RV32VB-NEXT: ret ; ; RV32VB-PACK-LABEL: buildvec_v6i8_pack: ; RV32VB-PACK: # %bb.0: ; RV32VB-PACK-NEXT: packh a2, a2, a3 ; RV32VB-PACK-NEXT: packh a0, a0, a1 ; RV32VB-PACK-NEXT: packh a1, a4, a5 ; RV32VB-PACK-NEXT: packh a3, a0, a0 ; RV32VB-PACK-NEXT: pack a0, a0, a2 ; RV32VB-PACK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32VB-PACK-NEXT: vmv.v.x v8, a0 ; RV32VB-PACK-NEXT: pack a0, a1, a3 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0 ; RV32VB-PACK-NEXT: ret ; ; RV64V-ONLY-LABEL: buildvec_v6i8_pack: ; RV64V-ONLY: # %bb.0: ; RV64V-ONLY-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64V-ONLY-NEXT: vmv.v.x v8, a0 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5 ; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 2 ; RV64V-ONLY-NEXT: ret ; ; RVA22U64-LABEL: buildvec_v6i8_pack: ; RVA22U64: # %bb.0: ; RVA22U64-NEXT: andi a2, a2, 255 ; RVA22U64-NEXT: andi a3, a3, 255 ; RVA22U64-NEXT: andi a0, a0, 255 ; RVA22U64-NEXT: andi a1, a1, 255 ; RVA22U64-NEXT: andi a4, a4, 255 ; RVA22U64-NEXT: andi a5, a5, 255 ; RVA22U64-NEXT: slli a2, a2, 16 ; RVA22U64-NEXT: slli a3, a3, 24 ; RVA22U64-NEXT: slli a1, a1, 8 ; RVA22U64-NEXT: slli a4, a4, 32 ; RVA22U64-NEXT: slli a5, a5, 40 ; RVA22U64-NEXT: or a2, a2, a3 ; RVA22U64-NEXT: or a0, a0, a1 ; RVA22U64-NEXT: or a0, a0, a2 ; RVA22U64-NEXT: or a4, a4, a5 ; RVA22U64-NEXT: or a0, a0, a4 ; RVA22U64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RVA22U64-NEXT: vmv.s.x v8, a0 ; RVA22U64-NEXT: ret ; ; RVA22U64-PACK-LABEL: buildvec_v6i8_pack: ; RVA22U64-PACK: # %bb.0: ; RVA22U64-PACK-NEXT: packh a2, a2, a3 ; RVA22U64-PACK-NEXT: packh a0, a0, a1 ; RVA22U64-PACK-NEXT: packh a1, a4, a5 ; RVA22U64-PACK-NEXT: packh a3, a0, a0 ; RVA22U64-PACK-NEXT: packw a0, a0, a2 ; RVA22U64-PACK-NEXT: packw a1, a1, a3 ; RVA22U64-PACK-NEXT: pack a0, a0, a1 ; RVA22U64-PACK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RVA22U64-PACK-NEXT: vmv.s.x v8, a0 ; RVA22U64-PACK-NEXT: ret ; ; RV64ZVE32-LABEL: buildvec_v6i8_pack: ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64ZVE32-NEXT: vmv.v.x v8, a0 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5 ; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32-NEXT: ret %v1 = insertelement <6 x i8> poison, i8 %e1, i32 0 %v2 = insertelement <6 x i8> %v1, i8 %e2, i32 1 %v3 = insertelement <6 x i8> %v2, i8 %e3, i32 2 %v4 = insertelement <6 x i8> %v3, i8 %e4, i32 3 %v5 = insertelement <6 x i8> %v4, i8 %e5, i32 4 %v6 = insertelement <6 x i8> %v5, i8 %e6, i32 5 ret <6 x i8> %v6 } define <4 x i16> @buildvec_v4i16_pack(i16 %e1, i16 %e2, i16 %e3, i16 %e4) { ; RV32-ONLY-LABEL: buildvec_v4i16_pack: ; RV32-ONLY: # %bb.0: ; RV32-ONLY-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV32-ONLY-NEXT: vmv.v.x v8, a0 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a1 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3 ; RV32-ONLY-NEXT: ret ; ; RV32VB-LABEL: buildvec_v4i16_pack: ; RV32VB: # %bb.0: ; RV32VB-NEXT: slli a3, a3, 16 ; RV32VB-NEXT: zext.h a2, a2 ; RV32VB-NEXT: slli a1, a1, 16 ; RV32VB-NEXT: zext.h a0, a0 ; RV32VB-NEXT: or a2, a2, a3 ; RV32VB-NEXT: or a0, a0, a1 ; RV32VB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32VB-NEXT: vmv.v.x v8, a0 ; RV32VB-NEXT: vslide1down.vx v8, v8, a2 ; RV32VB-NEXT: ret ; ; RV32VB-PACK-LABEL: buildvec_v4i16_pack: ; RV32VB-PACK: # %bb.0: ; RV32VB-PACK-NEXT: pack a2, a2, a3 ; RV32VB-PACK-NEXT: pack a0, a0, a1 ; RV32VB-PACK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32VB-PACK-NEXT: vmv.v.x v8, a0 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a2 ; RV32VB-PACK-NEXT: ret ; ; RV64V-ONLY-LABEL: buildvec_v4i16_pack: ; RV64V-ONLY: # %bb.0: ; RV64V-ONLY-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV64V-ONLY-NEXT: vmv.v.x v8, a0 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3 ; RV64V-ONLY-NEXT: ret ; ; RVA22U64-LABEL: buildvec_v4i16_pack: ; RVA22U64: # %bb.0: ; RVA22U64-NEXT: slli a3, a3, 48 ; RVA22U64-NEXT: slli a2, a2, 48 ; RVA22U64-NEXT: zext.h a0, a0 ; RVA22U64-NEXT: slli a1, a1, 48 ; RVA22U64-NEXT: srli a2, a2, 16 ; RVA22U64-NEXT: srli a1, a1, 32 ; RVA22U64-NEXT: or a2, a2, a3 ; RVA22U64-NEXT: or a0, a0, a1 ; RVA22U64-NEXT: or a0, a0, a2 ; RVA22U64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RVA22U64-NEXT: vmv.s.x v8, a0 ; RVA22U64-NEXT: ret ; ; RVA22U64-PACK-LABEL: buildvec_v4i16_pack: ; RVA22U64-PACK: # %bb.0: ; RVA22U64-PACK-NEXT: packw a2, a2, a3 ; RVA22U64-PACK-NEXT: packw a0, a0, a1 ; RVA22U64-PACK-NEXT: pack a0, a0, a2 ; RVA22U64-PACK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RVA22U64-PACK-NEXT: vmv.s.x v8, a0 ; RVA22U64-PACK-NEXT: ret ; ; RV64ZVE32-LABEL: buildvec_v4i16_pack: ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV64ZVE32-NEXT: vmv.v.x v8, a0 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3 ; RV64ZVE32-NEXT: ret %v1 = insertelement <4 x i16> poison, i16 %e1, i32 0 %v2 = insertelement <4 x i16> %v1, i16 %e2, i32 1 %v3 = insertelement <4 x i16> %v2, i16 %e3, i32 2 %v4 = insertelement <4 x i16> %v3, i16 %e4, i32 3 ret <4 x i16> %v4 } define <2 x i32> @buildvec_v2i32_pack(i32 %e1, i32 %e2) { ; RV32-LABEL: buildvec_v2i32_pack: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vmv.v.x v8, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: ret ; ; RV64V-ONLY-LABEL: buildvec_v2i32_pack: ; RV64V-ONLY: # %bb.0: ; RV64V-ONLY-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64V-ONLY-NEXT: vmv.v.x v8, a0 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1 ; RV64V-ONLY-NEXT: ret ; ; RVA22U64-LABEL: buildvec_v2i32_pack: ; RVA22U64: # %bb.0: ; RVA22U64-NEXT: slli a1, a1, 32 ; RVA22U64-NEXT: add.uw a0, a0, a1 ; RVA22U64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RVA22U64-NEXT: vmv.s.x v8, a0 ; RVA22U64-NEXT: ret ; ; RVA22U64-PACK-LABEL: buildvec_v2i32_pack: ; RVA22U64-PACK: # %bb.0: ; RVA22U64-PACK-NEXT: pack a0, a0, a1 ; RVA22U64-PACK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RVA22U64-PACK-NEXT: vmv.s.x v8, a0 ; RVA22U64-PACK-NEXT: ret ; ; RV64ZVE32-LABEL: buildvec_v2i32_pack: ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32-NEXT: vmv.v.x v8, a0 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1 ; RV64ZVE32-NEXT: ret %v1 = insertelement <2 x i32> poison, i32 %e1, i32 0 %v2 = insertelement <2 x i32> %v1, i32 %e2, i32 1 ret <2 x i32> %v2 } define <1 x i16> @buildvec_v1i16_pack(i16 %e1) { ; CHECK-LABEL: buildvec_v1i16_pack: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %v1 = insertelement <1 x i16> poison, i16 %e1, i32 0 ret <1 x i16> %v1 } define <1 x i32> @buildvec_v1i32_pack(i32 %e1) { ; CHECK-LABEL: buildvec_v1i32_pack: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %v1 = insertelement <1 x i32> poison, i32 %e1, i32 0 ret <1 x i32> %v1 } define <4 x i32> @buildvec_vslide1up(i32 %e1, i32 %e2) { ; CHECK-LABEL: buildvec_vslide1up: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a1 ; CHECK-NEXT: ret %v1 = insertelement <4 x i32> poison, i32 %e2, i32 0 %v2 = insertelement <4 x i32> %v1, i32 %e1, i32 1 %v3 = insertelement <4 x i32> %v2, i32 %e1, i32 2 %v4 = insertelement <4 x i32> %v3, i32 %e1, i32 3 ret <4 x i32> %v4 } define <4 x i1> @buildvec_i1_splat(i1 %e1) { ; CHECK-LABEL: buildvec_i1_splat: ; CHECK: # %bb.0: ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %v1 = insertelement <4 x i1> poison, i1 %e1, i32 0 %v2 = insertelement <4 x i1> %v1, i1 %e1, i32 1 %v3 = insertelement <4 x i1> %v2, i1 %e1, i32 2 %v4 = insertelement <4 x i1> %v3, i1 %e1, i32 3 ret <4 x i1> %v4 } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; RV64: {{.*}}