xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/concat-vector-insert-elt.ll (revision 675e7bd1b94f78f0567b4327f187841c0cde36f9)
11aa493f0SPhilip Reames; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
21aa493f0SPhilip Reames; RUN: llc -mtriple=riscv32 -mattr=+v -target-abi=ilp32 \
31aa493f0SPhilip Reames; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
41aa493f0SPhilip Reames; RUN: llc -mtriple=riscv64 -mattr=+v -target-abi=lp64 \
51aa493f0SPhilip Reames; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
61aa493f0SPhilip Reames
71aa493f0SPhilip Reamesdefine void @v4xi8_concat_vector_insert_idx0(ptr %a, ptr %b, i8 %x) {
81aa493f0SPhilip Reames; CHECK-LABEL: v4xi8_concat_vector_insert_idx0:
91aa493f0SPhilip Reames; CHECK:       # %bb.0:
101aa493f0SPhilip Reames; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
111aa493f0SPhilip Reames; CHECK-NEXT:    vle8.v v8, (a0)
121aa493f0SPhilip Reames; CHECK-NEXT:    vle8.v v9, (a1)
13ff5e536bSPhilip Reames; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, tu, ma
14ff5e536bSPhilip Reames; CHECK-NEXT:    vmv.s.x v8, a2
151aa493f0SPhilip Reames; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
161aa493f0SPhilip Reames; CHECK-NEXT:    vslideup.vi v8, v9, 2
171aa493f0SPhilip Reames; CHECK-NEXT:    vse8.v v8, (a0)
181aa493f0SPhilip Reames; CHECK-NEXT:    ret
191aa493f0SPhilip Reames  %v1 = load <2 x i8>, ptr %a
201aa493f0SPhilip Reames  %v2 = load <2 x i8>, ptr %b
211aa493f0SPhilip Reames  %concat = shufflevector <2 x i8> %v1, <2 x i8> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
22233971b4SPhilip Reames  %ins = insertelement <4 x i8> %concat, i8 %x, i32 0
231aa493f0SPhilip Reames  store <4 x i8> %ins, ptr %a
241aa493f0SPhilip Reames  ret void
251aa493f0SPhilip Reames}
261aa493f0SPhilip Reames
271aa493f0SPhilip Reamesdefine void @v4xi8_concat_vector_insert_idx1(ptr %a, ptr %b, i8 %x) {
281aa493f0SPhilip Reames; CHECK-LABEL: v4xi8_concat_vector_insert_idx1:
291aa493f0SPhilip Reames; CHECK:       # %bb.0:
301aa493f0SPhilip Reames; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
311aa493f0SPhilip Reames; CHECK-NEXT:    vle8.v v8, (a0)
321aa493f0SPhilip Reames; CHECK-NEXT:    vle8.v v9, (a1)
33ff5e536bSPhilip Reames; CHECK-NEXT:    vmv.s.x v10, a2
34ff5e536bSPhilip Reames; CHECK-NEXT:    vslideup.vi v8, v10, 1
351aa493f0SPhilip Reames; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
361aa493f0SPhilip Reames; CHECK-NEXT:    vslideup.vi v8, v9, 2
371aa493f0SPhilip Reames; CHECK-NEXT:    vse8.v v8, (a0)
381aa493f0SPhilip Reames; CHECK-NEXT:    ret
391aa493f0SPhilip Reames  %v1 = load <2 x i8>, ptr %a
401aa493f0SPhilip Reames  %v2 = load <2 x i8>, ptr %b
411aa493f0SPhilip Reames  %concat = shufflevector <2 x i8> %v1, <2 x i8> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
421aa493f0SPhilip Reames  %ins = insertelement <4 x i8> %concat, i8 %x, i32 1
431aa493f0SPhilip Reames  store <4 x i8> %ins, ptr %a
441aa493f0SPhilip Reames  ret void
451aa493f0SPhilip Reames}
461aa493f0SPhilip Reames
471aa493f0SPhilip Reamesdefine void @v4xi8_concat_vector_insert_idx2(ptr %a, ptr %b, i8 %x) {
481aa493f0SPhilip Reames; CHECK-LABEL: v4xi8_concat_vector_insert_idx2:
491aa493f0SPhilip Reames; CHECK:       # %bb.0:
501aa493f0SPhilip Reames; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
51ff5e536bSPhilip Reames; CHECK-NEXT:    vle8.v v8, (a1)
52ff5e536bSPhilip Reames; CHECK-NEXT:    vle8.v v9, (a0)
53ff5e536bSPhilip Reames; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, tu, ma
54ff5e536bSPhilip Reames; CHECK-NEXT:    vmv.s.x v8, a2
551aa493f0SPhilip Reames; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
56ff5e536bSPhilip Reames; CHECK-NEXT:    vslideup.vi v9, v8, 2
57ff5e536bSPhilip Reames; CHECK-NEXT:    vse8.v v9, (a0)
581aa493f0SPhilip Reames; CHECK-NEXT:    ret
591aa493f0SPhilip Reames  %v1 = load <2 x i8>, ptr %a
601aa493f0SPhilip Reames  %v2 = load <2 x i8>, ptr %b
611aa493f0SPhilip Reames  %concat = shufflevector <2 x i8> %v1, <2 x i8> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
621aa493f0SPhilip Reames  %ins = insertelement <4 x i8> %concat, i8 %x, i32 2
631aa493f0SPhilip Reames  store <4 x i8> %ins, ptr %a
641aa493f0SPhilip Reames  ret void
651aa493f0SPhilip Reames}
661aa493f0SPhilip Reames
671aa493f0SPhilip Reamesdefine void @v4xi8_concat_vector_insert_idx3(ptr %a, ptr %b, i8 %x) {
681aa493f0SPhilip Reames; CHECK-LABEL: v4xi8_concat_vector_insert_idx3:
691aa493f0SPhilip Reames; CHECK:       # %bb.0:
701aa493f0SPhilip Reames; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
71ff5e536bSPhilip Reames; CHECK-NEXT:    vle8.v v8, (a1)
72ff5e536bSPhilip Reames; CHECK-NEXT:    vle8.v v9, (a0)
73ff5e536bSPhilip Reames; CHECK-NEXT:    vmv.s.x v10, a2
74ff5e536bSPhilip Reames; CHECK-NEXT:    vslideup.vi v8, v10, 1
751aa493f0SPhilip Reames; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
76ff5e536bSPhilip Reames; CHECK-NEXT:    vslideup.vi v9, v8, 2
77ff5e536bSPhilip Reames; CHECK-NEXT:    vse8.v v9, (a0)
781aa493f0SPhilip Reames; CHECK-NEXT:    ret
791aa493f0SPhilip Reames  %v1 = load <2 x i8>, ptr %a
801aa493f0SPhilip Reames  %v2 = load <2 x i8>, ptr %b
811aa493f0SPhilip Reames  %concat = shufflevector <2 x i8> %v1, <2 x i8> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
821aa493f0SPhilip Reames  %ins = insertelement <4 x i8> %concat, i8 %x, i32 3
831aa493f0SPhilip Reames  store <4 x i8> %ins, ptr %a
841aa493f0SPhilip Reames  ret void
851aa493f0SPhilip Reames}
861aa493f0SPhilip Reames
871aa493f0SPhilip Reamesdefine void @v4xi64_concat_vector_insert_idx0(ptr %a, ptr %b, i64 %x) {
881aa493f0SPhilip Reames; RV32-LABEL: v4xi64_concat_vector_insert_idx0:
891aa493f0SPhilip Reames; RV32:       # %bb.0:
901aa493f0SPhilip Reames; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
911aa493f0SPhilip Reames; RV32-NEXT:    vle64.v v8, (a0)
921aa493f0SPhilip Reames; RV32-NEXT:    vle64.v v10, (a1)
93233971b4SPhilip Reames; RV32-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
94233971b4SPhilip Reames; RV32-NEXT:    vslide1down.vx v8, v8, a2
95233971b4SPhilip Reames; RV32-NEXT:    vslide1down.vx v8, v8, a3
961aa493f0SPhilip Reames; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
97ff5e536bSPhilip Reames; RV32-NEXT:    vslideup.vi v8, v10, 2
981aa493f0SPhilip Reames; RV32-NEXT:    vse64.v v8, (a0)
991aa493f0SPhilip Reames; RV32-NEXT:    ret
1001aa493f0SPhilip Reames;
1011aa493f0SPhilip Reames; RV64-LABEL: v4xi64_concat_vector_insert_idx0:
1021aa493f0SPhilip Reames; RV64:       # %bb.0:
1031aa493f0SPhilip Reames; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1041aa493f0SPhilip Reames; RV64-NEXT:    vle64.v v8, (a0)
1051aa493f0SPhilip Reames; RV64-NEXT:    vle64.v v10, (a1)
106ff5e536bSPhilip Reames; RV64-NEXT:    vsetvli zero, zero, e64, m1, tu, ma
107ff5e536bSPhilip Reames; RV64-NEXT:    vmv.s.x v8, a2
1081aa493f0SPhilip Reames; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1091aa493f0SPhilip Reames; RV64-NEXT:    vslideup.vi v8, v10, 2
1101aa493f0SPhilip Reames; RV64-NEXT:    vse64.v v8, (a0)
1111aa493f0SPhilip Reames; RV64-NEXT:    ret
1121aa493f0SPhilip Reames  %v1 = load <2 x i64>, ptr %a
1131aa493f0SPhilip Reames  %v2 = load <2 x i64>, ptr %b
1141aa493f0SPhilip Reames  %concat = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
115233971b4SPhilip Reames  %ins = insertelement <4 x i64> %concat, i64 %x, i32 0
1161aa493f0SPhilip Reames  store <4 x i64> %ins, ptr %a
1171aa493f0SPhilip Reames  ret void
1181aa493f0SPhilip Reames}
1191aa493f0SPhilip Reames
1201aa493f0SPhilip Reamesdefine void @v4xi64_concat_vector_insert_idx1(ptr %a, ptr %b, i64 %x) {
1211aa493f0SPhilip Reames; RV32-LABEL: v4xi64_concat_vector_insert_idx1:
1221aa493f0SPhilip Reames; RV32:       # %bb.0:
1231aa493f0SPhilip Reames; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1241aa493f0SPhilip Reames; RV32-NEXT:    vle64.v v8, (a0)
1251aa493f0SPhilip Reames; RV32-NEXT:    vle64.v v10, (a1)
126ff5e536bSPhilip Reames; RV32-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
127ff5e536bSPhilip Reames; RV32-NEXT:    vslide1down.vx v9, v8, a2
128ff5e536bSPhilip Reames; RV32-NEXT:    vslide1down.vx v9, v9, a3
129ff5e536bSPhilip Reames; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
130ff5e536bSPhilip Reames; RV32-NEXT:    vslideup.vi v8, v9, 1
1311aa493f0SPhilip Reames; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1321aa493f0SPhilip Reames; RV32-NEXT:    vslideup.vi v8, v10, 2
1331aa493f0SPhilip Reames; RV32-NEXT:    vse64.v v8, (a0)
1341aa493f0SPhilip Reames; RV32-NEXT:    ret
1351aa493f0SPhilip Reames;
1361aa493f0SPhilip Reames; RV64-LABEL: v4xi64_concat_vector_insert_idx1:
1371aa493f0SPhilip Reames; RV64:       # %bb.0:
1381aa493f0SPhilip Reames; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1391aa493f0SPhilip Reames; RV64-NEXT:    vle64.v v8, (a0)
1401aa493f0SPhilip Reames; RV64-NEXT:    vle64.v v10, (a1)
141ff5e536bSPhilip Reames; RV64-NEXT:    vmv.s.x v9, a2
142ff5e536bSPhilip Reames; RV64-NEXT:    vslideup.vi v8, v9, 1
1431aa493f0SPhilip Reames; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1441aa493f0SPhilip Reames; RV64-NEXT:    vslideup.vi v8, v10, 2
1451aa493f0SPhilip Reames; RV64-NEXT:    vse64.v v8, (a0)
1461aa493f0SPhilip Reames; RV64-NEXT:    ret
1471aa493f0SPhilip Reames  %v1 = load <2 x i64>, ptr %a
1481aa493f0SPhilip Reames  %v2 = load <2 x i64>, ptr %b
1491aa493f0SPhilip Reames  %concat = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1501aa493f0SPhilip Reames  %ins = insertelement <4 x i64> %concat, i64 %x, i32 1
1511aa493f0SPhilip Reames  store <4 x i64> %ins, ptr %a
1521aa493f0SPhilip Reames  ret void
1531aa493f0SPhilip Reames}
1541aa493f0SPhilip Reames
1551aa493f0SPhilip Reamesdefine void @v4xi64_concat_vector_insert_idx2(ptr %a, ptr %b, i64 %x) {
1561aa493f0SPhilip Reames; RV32-LABEL: v4xi64_concat_vector_insert_idx2:
1571aa493f0SPhilip Reames; RV32:       # %bb.0:
1581aa493f0SPhilip Reames; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
159ff5e536bSPhilip Reames; RV32-NEXT:    vle64.v v8, (a1)
160ff5e536bSPhilip Reames; RV32-NEXT:    vle64.v v10, (a0)
161ff5e536bSPhilip Reames; RV32-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
162ff5e536bSPhilip Reames; RV32-NEXT:    vslide1down.vx v8, v8, a2
163ff5e536bSPhilip Reames; RV32-NEXT:    vslide1down.vx v8, v8, a3
1641aa493f0SPhilip Reames; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
165ff5e536bSPhilip Reames; RV32-NEXT:    vslideup.vi v10, v8, 2
166ff5e536bSPhilip Reames; RV32-NEXT:    vse64.v v10, (a0)
1671aa493f0SPhilip Reames; RV32-NEXT:    ret
1681aa493f0SPhilip Reames;
1691aa493f0SPhilip Reames; RV64-LABEL: v4xi64_concat_vector_insert_idx2:
1701aa493f0SPhilip Reames; RV64:       # %bb.0:
1711aa493f0SPhilip Reames; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
172ff5e536bSPhilip Reames; RV64-NEXT:    vle64.v v8, (a1)
173ff5e536bSPhilip Reames; RV64-NEXT:    vle64.v v10, (a0)
174ff5e536bSPhilip Reames; RV64-NEXT:    vsetvli zero, zero, e64, m1, tu, ma
175ff5e536bSPhilip Reames; RV64-NEXT:    vmv.s.x v8, a2
1761aa493f0SPhilip Reames; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
177ff5e536bSPhilip Reames; RV64-NEXT:    vslideup.vi v10, v8, 2
178ff5e536bSPhilip Reames; RV64-NEXT:    vse64.v v10, (a0)
1791aa493f0SPhilip Reames; RV64-NEXT:    ret
1801aa493f0SPhilip Reames  %v1 = load <2 x i64>, ptr %a
1811aa493f0SPhilip Reames  %v2 = load <2 x i64>, ptr %b
1821aa493f0SPhilip Reames  %concat = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1831aa493f0SPhilip Reames  %ins = insertelement <4 x i64> %concat, i64 %x, i32 2
1841aa493f0SPhilip Reames  store <4 x i64> %ins, ptr %a
1851aa493f0SPhilip Reames  ret void
1861aa493f0SPhilip Reames}
1871aa493f0SPhilip Reames
1881aa493f0SPhilip Reamesdefine void @v4xi64_concat_vector_insert_idx3(ptr %a, ptr %b, i64 %x) {
1891aa493f0SPhilip Reames; RV32-LABEL: v4xi64_concat_vector_insert_idx3:
1901aa493f0SPhilip Reames; RV32:       # %bb.0:
1911aa493f0SPhilip Reames; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
192*675e7bd1SPiyou Chen; RV32-NEXT:    vle64.v v8, (a1)
193*675e7bd1SPiyou Chen; RV32-NEXT:    vle64.v v10, (a0)
194ff5e536bSPhilip Reames; RV32-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
195ff5e536bSPhilip Reames; RV32-NEXT:    vslide1down.vx v9, v8, a2
196ff5e536bSPhilip Reames; RV32-NEXT:    vslide1down.vx v9, v9, a3
197ff5e536bSPhilip Reames; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
198*675e7bd1SPiyou Chen; RV32-NEXT:    vslideup.vi v8, v9, 1
1991aa493f0SPhilip Reames; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
200*675e7bd1SPiyou Chen; RV32-NEXT:    vslideup.vi v10, v8, 2
201*675e7bd1SPiyou Chen; RV32-NEXT:    vse64.v v10, (a0)
2021aa493f0SPhilip Reames; RV32-NEXT:    ret
2031aa493f0SPhilip Reames;
2041aa493f0SPhilip Reames; RV64-LABEL: v4xi64_concat_vector_insert_idx3:
2051aa493f0SPhilip Reames; RV64:       # %bb.0:
2061aa493f0SPhilip Reames; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
207ff5e536bSPhilip Reames; RV64-NEXT:    vle64.v v8, (a1)
208ff5e536bSPhilip Reames; RV64-NEXT:    vle64.v v10, (a0)
209ff5e536bSPhilip Reames; RV64-NEXT:    vmv.s.x v9, a2
210ff5e536bSPhilip Reames; RV64-NEXT:    vslideup.vi v8, v9, 1
2111aa493f0SPhilip Reames; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
212ff5e536bSPhilip Reames; RV64-NEXT:    vslideup.vi v10, v8, 2
213ff5e536bSPhilip Reames; RV64-NEXT:    vse64.v v10, (a0)
2141aa493f0SPhilip Reames; RV64-NEXT:    ret
2151aa493f0SPhilip Reames  %v1 = load <2 x i64>, ptr %a
2161aa493f0SPhilip Reames  %v2 = load <2 x i64>, ptr %b
2171aa493f0SPhilip Reames  %concat = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2181aa493f0SPhilip Reames  %ins = insertelement <4 x i64> %concat, i64 %x, i32 3
2191aa493f0SPhilip Reames  store <4 x i64> %ins, ptr %a
2201aa493f0SPhilip Reames  ret void
2211aa493f0SPhilip Reames}
222