xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/concat-vector-insert-elt.ll (revision 675e7bd1b94f78f0567b4327f187841c0cde36f9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v -target-abi=ilp32 \
3; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
4; RUN: llc -mtriple=riscv64 -mattr=+v -target-abi=lp64 \
5; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
6
7define void @v4xi8_concat_vector_insert_idx0(ptr %a, ptr %b, i8 %x) {
8; CHECK-LABEL: v4xi8_concat_vector_insert_idx0:
9; CHECK:       # %bb.0:
10; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
11; CHECK-NEXT:    vle8.v v8, (a0)
12; CHECK-NEXT:    vle8.v v9, (a1)
13; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, tu, ma
14; CHECK-NEXT:    vmv.s.x v8, a2
15; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
16; CHECK-NEXT:    vslideup.vi v8, v9, 2
17; CHECK-NEXT:    vse8.v v8, (a0)
18; CHECK-NEXT:    ret
19  %v1 = load <2 x i8>, ptr %a
20  %v2 = load <2 x i8>, ptr %b
21  %concat = shufflevector <2 x i8> %v1, <2 x i8> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
22  %ins = insertelement <4 x i8> %concat, i8 %x, i32 0
23  store <4 x i8> %ins, ptr %a
24  ret void
25}
26
27define void @v4xi8_concat_vector_insert_idx1(ptr %a, ptr %b, i8 %x) {
28; CHECK-LABEL: v4xi8_concat_vector_insert_idx1:
29; CHECK:       # %bb.0:
30; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
31; CHECK-NEXT:    vle8.v v8, (a0)
32; CHECK-NEXT:    vle8.v v9, (a1)
33; CHECK-NEXT:    vmv.s.x v10, a2
34; CHECK-NEXT:    vslideup.vi v8, v10, 1
35; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
36; CHECK-NEXT:    vslideup.vi v8, v9, 2
37; CHECK-NEXT:    vse8.v v8, (a0)
38; CHECK-NEXT:    ret
39  %v1 = load <2 x i8>, ptr %a
40  %v2 = load <2 x i8>, ptr %b
41  %concat = shufflevector <2 x i8> %v1, <2 x i8> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
42  %ins = insertelement <4 x i8> %concat, i8 %x, i32 1
43  store <4 x i8> %ins, ptr %a
44  ret void
45}
46
47define void @v4xi8_concat_vector_insert_idx2(ptr %a, ptr %b, i8 %x) {
48; CHECK-LABEL: v4xi8_concat_vector_insert_idx2:
49; CHECK:       # %bb.0:
50; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
51; CHECK-NEXT:    vle8.v v8, (a1)
52; CHECK-NEXT:    vle8.v v9, (a0)
53; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, tu, ma
54; CHECK-NEXT:    vmv.s.x v8, a2
55; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
56; CHECK-NEXT:    vslideup.vi v9, v8, 2
57; CHECK-NEXT:    vse8.v v9, (a0)
58; CHECK-NEXT:    ret
59  %v1 = load <2 x i8>, ptr %a
60  %v2 = load <2 x i8>, ptr %b
61  %concat = shufflevector <2 x i8> %v1, <2 x i8> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
62  %ins = insertelement <4 x i8> %concat, i8 %x, i32 2
63  store <4 x i8> %ins, ptr %a
64  ret void
65}
66
67define void @v4xi8_concat_vector_insert_idx3(ptr %a, ptr %b, i8 %x) {
68; CHECK-LABEL: v4xi8_concat_vector_insert_idx3:
69; CHECK:       # %bb.0:
70; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
71; CHECK-NEXT:    vle8.v v8, (a1)
72; CHECK-NEXT:    vle8.v v9, (a0)
73; CHECK-NEXT:    vmv.s.x v10, a2
74; CHECK-NEXT:    vslideup.vi v8, v10, 1
75; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
76; CHECK-NEXT:    vslideup.vi v9, v8, 2
77; CHECK-NEXT:    vse8.v v9, (a0)
78; CHECK-NEXT:    ret
79  %v1 = load <2 x i8>, ptr %a
80  %v2 = load <2 x i8>, ptr %b
81  %concat = shufflevector <2 x i8> %v1, <2 x i8> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
82  %ins = insertelement <4 x i8> %concat, i8 %x, i32 3
83  store <4 x i8> %ins, ptr %a
84  ret void
85}
86
87define void @v4xi64_concat_vector_insert_idx0(ptr %a, ptr %b, i64 %x) {
88; RV32-LABEL: v4xi64_concat_vector_insert_idx0:
89; RV32:       # %bb.0:
90; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
91; RV32-NEXT:    vle64.v v8, (a0)
92; RV32-NEXT:    vle64.v v10, (a1)
93; RV32-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
94; RV32-NEXT:    vslide1down.vx v8, v8, a2
95; RV32-NEXT:    vslide1down.vx v8, v8, a3
96; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
97; RV32-NEXT:    vslideup.vi v8, v10, 2
98; RV32-NEXT:    vse64.v v8, (a0)
99; RV32-NEXT:    ret
100;
101; RV64-LABEL: v4xi64_concat_vector_insert_idx0:
102; RV64:       # %bb.0:
103; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
104; RV64-NEXT:    vle64.v v8, (a0)
105; RV64-NEXT:    vle64.v v10, (a1)
106; RV64-NEXT:    vsetvli zero, zero, e64, m1, tu, ma
107; RV64-NEXT:    vmv.s.x v8, a2
108; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
109; RV64-NEXT:    vslideup.vi v8, v10, 2
110; RV64-NEXT:    vse64.v v8, (a0)
111; RV64-NEXT:    ret
112  %v1 = load <2 x i64>, ptr %a
113  %v2 = load <2 x i64>, ptr %b
114  %concat = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
115  %ins = insertelement <4 x i64> %concat, i64 %x, i32 0
116  store <4 x i64> %ins, ptr %a
117  ret void
118}
119
120define void @v4xi64_concat_vector_insert_idx1(ptr %a, ptr %b, i64 %x) {
121; RV32-LABEL: v4xi64_concat_vector_insert_idx1:
122; RV32:       # %bb.0:
123; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
124; RV32-NEXT:    vle64.v v8, (a0)
125; RV32-NEXT:    vle64.v v10, (a1)
126; RV32-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
127; RV32-NEXT:    vslide1down.vx v9, v8, a2
128; RV32-NEXT:    vslide1down.vx v9, v9, a3
129; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
130; RV32-NEXT:    vslideup.vi v8, v9, 1
131; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
132; RV32-NEXT:    vslideup.vi v8, v10, 2
133; RV32-NEXT:    vse64.v v8, (a0)
134; RV32-NEXT:    ret
135;
136; RV64-LABEL: v4xi64_concat_vector_insert_idx1:
137; RV64:       # %bb.0:
138; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
139; RV64-NEXT:    vle64.v v8, (a0)
140; RV64-NEXT:    vle64.v v10, (a1)
141; RV64-NEXT:    vmv.s.x v9, a2
142; RV64-NEXT:    vslideup.vi v8, v9, 1
143; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
144; RV64-NEXT:    vslideup.vi v8, v10, 2
145; RV64-NEXT:    vse64.v v8, (a0)
146; RV64-NEXT:    ret
147  %v1 = load <2 x i64>, ptr %a
148  %v2 = load <2 x i64>, ptr %b
149  %concat = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
150  %ins = insertelement <4 x i64> %concat, i64 %x, i32 1
151  store <4 x i64> %ins, ptr %a
152  ret void
153}
154
155define void @v4xi64_concat_vector_insert_idx2(ptr %a, ptr %b, i64 %x) {
156; RV32-LABEL: v4xi64_concat_vector_insert_idx2:
157; RV32:       # %bb.0:
158; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
159; RV32-NEXT:    vle64.v v8, (a1)
160; RV32-NEXT:    vle64.v v10, (a0)
161; RV32-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
162; RV32-NEXT:    vslide1down.vx v8, v8, a2
163; RV32-NEXT:    vslide1down.vx v8, v8, a3
164; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
165; RV32-NEXT:    vslideup.vi v10, v8, 2
166; RV32-NEXT:    vse64.v v10, (a0)
167; RV32-NEXT:    ret
168;
169; RV64-LABEL: v4xi64_concat_vector_insert_idx2:
170; RV64:       # %bb.0:
171; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
172; RV64-NEXT:    vle64.v v8, (a1)
173; RV64-NEXT:    vle64.v v10, (a0)
174; RV64-NEXT:    vsetvli zero, zero, e64, m1, tu, ma
175; RV64-NEXT:    vmv.s.x v8, a2
176; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
177; RV64-NEXT:    vslideup.vi v10, v8, 2
178; RV64-NEXT:    vse64.v v10, (a0)
179; RV64-NEXT:    ret
180  %v1 = load <2 x i64>, ptr %a
181  %v2 = load <2 x i64>, ptr %b
182  %concat = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
183  %ins = insertelement <4 x i64> %concat, i64 %x, i32 2
184  store <4 x i64> %ins, ptr %a
185  ret void
186}
187
188define void @v4xi64_concat_vector_insert_idx3(ptr %a, ptr %b, i64 %x) {
189; RV32-LABEL: v4xi64_concat_vector_insert_idx3:
190; RV32:       # %bb.0:
191; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
192; RV32-NEXT:    vle64.v v8, (a1)
193; RV32-NEXT:    vle64.v v10, (a0)
194; RV32-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
195; RV32-NEXT:    vslide1down.vx v9, v8, a2
196; RV32-NEXT:    vslide1down.vx v9, v9, a3
197; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
198; RV32-NEXT:    vslideup.vi v8, v9, 1
199; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
200; RV32-NEXT:    vslideup.vi v10, v8, 2
201; RV32-NEXT:    vse64.v v10, (a0)
202; RV32-NEXT:    ret
203;
204; RV64-LABEL: v4xi64_concat_vector_insert_idx3:
205; RV64:       # %bb.0:
206; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
207; RV64-NEXT:    vle64.v v8, (a1)
208; RV64-NEXT:    vle64.v v10, (a0)
209; RV64-NEXT:    vmv.s.x v9, a2
210; RV64-NEXT:    vslideup.vi v8, v9, 1
211; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
212; RV64-NEXT:    vslideup.vi v10, v8, 2
213; RV64-NEXT:    vse64.v v10, (a0)
214; RV64-NEXT:    ret
215  %v1 = load <2 x i64>, ptr %a
216  %v2 = load <2 x i64>, ptr %b
217  %concat = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
218  %ins = insertelement <4 x i64> %concat, i64 %x, i32 3
219  store <4 x i64> %ins, ptr %a
220  ret void
221}
222