xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-insertshuffleload.ll (revision 7740216f2e1e6f4243a868bc41b9397ad2c7fb38)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc < %s -mtriple=thumbv8.1m.main-none-eabihf -mattr=+mve.fp | FileCheck %s --check-prefix=CHECKLE
3; RUN: llc < %s -mtriple=thumbebv8.1m.main-none-eabihf -mattr=+mve.fp | FileCheck %s --check-prefix=CHECKBE
4
5
6define <8 x i8> @inserti8_first(ptr %p) {
7; CHECKLE-LABEL: inserti8_first:
8; CHECKLE:       @ %bb.0:
9; CHECKLE-NEXT:    vldrb.u16 q0, [r0]
10; CHECKLE-NEXT:    bx lr
11;
12; CHECKBE-LABEL: inserti8_first:
13; CHECKBE:       @ %bb.0:
14; CHECKBE-NEXT:    vldrb.u16 q1, [r0]
15; CHECKBE-NEXT:    vrev64.16 q0, q1
16; CHECKBE-NEXT:    bx lr
17  %q = getelementptr inbounds i8, ptr %p, i32 1
18  %l1 = load <8 x i8>, ptr %q
19  %l2 = load i8, ptr %p
20  %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
21  %ins = insertelement <8 x i8> %s, i8 %l2, i32 0
22  ret <8 x i8> %ins
23}
24
25define <8 x i8> @inserti8_last(ptr %p) {
26; CHECKLE-LABEL: inserti8_last:
27; CHECKLE:       @ %bb.0:
28; CHECKLE-NEXT:    vldrb.u16 q0, [r0, #1]
29; CHECKLE-NEXT:    bx lr
30;
31; CHECKBE-LABEL: inserti8_last:
32; CHECKBE:       @ %bb.0:
33; CHECKBE-NEXT:    vldrb.u16 q1, [r0, #1]
34; CHECKBE-NEXT:    vrev64.16 q0, q1
35; CHECKBE-NEXT:    bx lr
36  %q = getelementptr inbounds i8, ptr %p, i32 8
37  %l1 = load <8 x i8>, ptr %p
38  %l2 = load i8, ptr %q
39  %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
40  %ins = insertelement <8 x i8> %s, i8 %l2, i32 7
41  ret <8 x i8> %ins
42}
43
44define <8 x i16> @inserti8_first_sext(ptr %p) {
45; CHECKLE-LABEL: inserti8_first_sext:
46; CHECKLE:       @ %bb.0:
47; CHECKLE-NEXT:    vldrb.s16 q0, [r0]
48; CHECKLE-NEXT:    bx lr
49;
50; CHECKBE-LABEL: inserti8_first_sext:
51; CHECKBE:       @ %bb.0:
52; CHECKBE-NEXT:    vldrb.s16 q1, [r0]
53; CHECKBE-NEXT:    vrev64.16 q0, q1
54; CHECKBE-NEXT:    bx lr
55  %q = getelementptr inbounds i8, ptr %p, i32 1
56  %l1 = load <8 x i8>, ptr %q
57  %s1 = sext <8 x i8> %l1 to <8 x i16>
58  %l2 = load i8, ptr %p
59  %s2 = sext i8 %l2 to i16
60  %s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
61  %ins = insertelement <8 x i16> %s, i16 %s2, i32 0
62  ret <8 x i16> %ins
63}
64
65define <8 x i16> @inserti8_last_sext(ptr %p) {
66; CHECKLE-LABEL: inserti8_last_sext:
67; CHECKLE:       @ %bb.0:
68; CHECKLE-NEXT:    vldrb.s16 q0, [r0, #1]
69; CHECKLE-NEXT:    bx lr
70;
71; CHECKBE-LABEL: inserti8_last_sext:
72; CHECKBE:       @ %bb.0:
73; CHECKBE-NEXT:    vldrb.s16 q1, [r0, #1]
74; CHECKBE-NEXT:    vrev64.16 q0, q1
75; CHECKBE-NEXT:    bx lr
76  %q = getelementptr inbounds i8, ptr %p, i32 8
77  %l1 = load <8 x i8>, ptr %p
78  %s1 = sext <8 x i8> %l1 to <8 x i16>
79  %l2 = load i8, ptr %q
80  %s2 = sext i8 %l2 to i16
81  %s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
82  %ins = insertelement <8 x i16> %s, i16 %s2, i32 7
83  ret <8 x i16> %ins
84}
85
86define <8 x i16> @inserti8_first_zext(ptr %p) {
87; CHECKLE-LABEL: inserti8_first_zext:
88; CHECKLE:       @ %bb.0:
89; CHECKLE-NEXT:    vldrb.u16 q0, [r0]
90; CHECKLE-NEXT:    bx lr
91;
92; CHECKBE-LABEL: inserti8_first_zext:
93; CHECKBE:       @ %bb.0:
94; CHECKBE-NEXT:    vldrb.u16 q1, [r0]
95; CHECKBE-NEXT:    vrev64.16 q0, q1
96; CHECKBE-NEXT:    bx lr
97  %q = getelementptr inbounds i8, ptr %p, i32 1
98  %l1 = load <8 x i8>, ptr %q
99  %s1 = zext <8 x i8> %l1 to <8 x i16>
100  %l2 = load i8, ptr %p
101  %s2 = zext i8 %l2 to i16
102  %s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
103  %ins = insertelement <8 x i16> %s, i16 %s2, i32 0
104  ret <8 x i16> %ins
105}
106
107define <8 x i16> @inserti8_last_zext(ptr %p) {
108; CHECKLE-LABEL: inserti8_last_zext:
109; CHECKLE:       @ %bb.0:
110; CHECKLE-NEXT:    vldrb.u16 q0, [r0, #1]
111; CHECKLE-NEXT:    bx lr
112;
113; CHECKBE-LABEL: inserti8_last_zext:
114; CHECKBE:       @ %bb.0:
115; CHECKBE-NEXT:    vldrb.u16 q1, [r0, #1]
116; CHECKBE-NEXT:    vrev64.16 q0, q1
117; CHECKBE-NEXT:    bx lr
118  %q = getelementptr inbounds i8, ptr %p, i32 8
119  %l1 = load <8 x i8>, ptr %p
120  %s1 = zext <8 x i8> %l1 to <8 x i16>
121  %l2 = load i8, ptr %q
122  %s2 = zext i8 %l2 to i16
123  %s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
124  %ins = insertelement <8 x i16> %s, i16 %s2, i32 7
125  ret <8 x i16> %ins
126}
127
128define <8 x i32> @inserti32_first(ptr %p) {
129; CHECKLE-LABEL: inserti32_first:
130; CHECKLE:       @ %bb.0:
131; CHECKLE-NEXT:    vldrw.u32 q2, [r0, #20]
132; CHECKLE-NEXT:    vldr s4, [r0, #16]
133; CHECKLE-NEXT:    vldrw.u32 q0, [r0]
134; CHECKLE-NEXT:    vmov.f32 s5, s8
135; CHECKLE-NEXT:    vmov.f32 s6, s9
136; CHECKLE-NEXT:    vmov.f32 s7, s10
137; CHECKLE-NEXT:    bx lr
138;
139; CHECKBE-LABEL: inserti32_first:
140; CHECKBE:       @ %bb.0:
141; CHECKBE-NEXT:    vldrw.u32 q3, [r0, #20]
142; CHECKBE-NEXT:    vldrb.u8 q1, [r0]
143; CHECKBE-NEXT:    vldr s8, [r0, #16]
144; CHECKBE-NEXT:    vmov.f32 s9, s12
145; CHECKBE-NEXT:    vrev64.8 q0, q1
146; CHECKBE-NEXT:    vmov.f32 s10, s13
147; CHECKBE-NEXT:    vmov.f32 s11, s14
148; CHECKBE-NEXT:    vrev64.32 q1, q2
149; CHECKBE-NEXT:    bx lr
150  %q = getelementptr inbounds i8, ptr %p, i32 4
151  %l1 = load <8 x i32>, ptr %q
152  %l2 = load i32, ptr %p
153  %s = shufflevector <8 x i32> %l1, <8 x i32> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
154  %ins = insertelement <8 x i32> %s, i32 %l2, i32 0
155  ret <8 x i32> %ins
156}
157
158define <8 x i32> @inserti32_last(ptr %p) {
159; CHECKLE-LABEL: inserti32_last:
160; CHECKLE:       @ %bb.0:
161; CHECKLE-NEXT:    vldrw.u32 q2, [r0]
162; CHECKLE-NEXT:    vldr s3, [r0, #16]
163; CHECKLE-NEXT:    vldrw.u32 q1, [r0, #20]
164; CHECKLE-NEXT:    vmov.f32 s0, s9
165; CHECKLE-NEXT:    vmov.f32 s1, s10
166; CHECKLE-NEXT:    vmov.f32 s2, s11
167; CHECKLE-NEXT:    bx lr
168;
169; CHECKBE-LABEL: inserti32_last:
170; CHECKBE:       @ %bb.0:
171; CHECKBE-NEXT:    vldrw.u32 q3, [r0]
172; CHECKBE-NEXT:    vldrb.u8 q0, [r0, #20]
173; CHECKBE-NEXT:    vldr s11, [r0, #16]
174; CHECKBE-NEXT:    vmov.f32 s8, s13
175; CHECKBE-NEXT:    vrev64.8 q1, q0
176; CHECKBE-NEXT:    vmov.f32 s9, s14
177; CHECKBE-NEXT:    vmov.f32 s10, s15
178; CHECKBE-NEXT:    vrev64.32 q0, q2
179; CHECKBE-NEXT:    bx lr
180  %q = getelementptr inbounds i8, ptr %p, i32 32
181  %l1 = load <8 x i32>, ptr %p
182  %l2 = load i32, ptr %q
183  %s = shufflevector <8 x i32> %l1, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
184  %ins = insertelement <8 x i32> %s, i32 %l2, i32 7
185  ret <8 x i32> %ins
186}
187
188define <8 x i32> @inserti32_first_multiuse(ptr %p) {
189; CHECKLE-LABEL: inserti32_first_multiuse:
190; CHECKLE:       @ %bb.0:
191; CHECKLE-NEXT:    vldrw.u32 q0, [r0, #20]
192; CHECKLE-NEXT:    vldrw.u32 q2, [r0, #4]
193; CHECKLE-NEXT:    vmov.f32 s4, s11
194; CHECKLE-NEXT:    vmov.f32 s5, s0
195; CHECKLE-NEXT:    vmov.f32 s6, s1
196; CHECKLE-NEXT:    vmov.f32 s7, s2
197; CHECKLE-NEXT:    vadd.i32 q1, q0, q1
198; CHECKLE-NEXT:    vldrw.u32 q0, [r0]
199; CHECKLE-NEXT:    vadd.i32 q0, q2, q0
200; CHECKLE-NEXT:    bx lr
201;
202; CHECKBE-LABEL: inserti32_first_multiuse:
203; CHECKBE:       @ %bb.0:
204; CHECKBE-NEXT:    vldrw.u32 q0, [r0, #20]
205; CHECKBE-NEXT:    vldrw.u32 q2, [r0, #4]
206; CHECKBE-NEXT:    vmov.f32 s4, s11
207; CHECKBE-NEXT:    vmov.f32 s5, s0
208; CHECKBE-NEXT:    vmov.f32 s6, s1
209; CHECKBE-NEXT:    vmov.f32 s7, s2
210; CHECKBE-NEXT:    vadd.i32 q0, q0, q1
211; CHECKBE-NEXT:    vrev64.32 q1, q0
212; CHECKBE-NEXT:    vldrw.u32 q0, [r0]
213; CHECKBE-NEXT:    vadd.i32 q2, q2, q0
214; CHECKBE-NEXT:    vrev64.32 q0, q2
215; CHECKBE-NEXT:    bx lr
216  %q = getelementptr inbounds i8, ptr %p, i32 4
217  %l1 = load <8 x i32>, ptr %q
218  %l2 = load i32, ptr %p
219  %s = shufflevector <8 x i32> %l1, <8 x i32> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
220  %ins = insertelement <8 x i32> %s, i32 %l2, i32 0
221  %a = add <8 x i32> %l1, %ins
222  ret <8 x i32> %a
223}
224
225define <8 x i32> @inserti32_last_multiuse(ptr %p) {
226; CHECKLE-LABEL: inserti32_last_multiuse:
227; CHECKLE:       @ %bb.0:
228; CHECKLE-NEXT:    vldrw.u32 q0, [r0]
229; CHECKLE-NEXT:    vldrw.u32 q1, [r0, #16]
230; CHECKLE-NEXT:    vmov.f32 s8, s1
231; CHECKLE-NEXT:    vmov.f32 s9, s2
232; CHECKLE-NEXT:    vmov.f32 s10, s3
233; CHECKLE-NEXT:    vmov.f32 s11, s4
234; CHECKLE-NEXT:    vadd.i32 q0, q0, q2
235; CHECKLE-NEXT:    vldrw.u32 q2, [r0, #20]
236; CHECKLE-NEXT:    vadd.i32 q1, q1, q2
237; CHECKLE-NEXT:    bx lr
238;
239; CHECKBE-LABEL: inserti32_last_multiuse:
240; CHECKBE:       @ %bb.0:
241; CHECKBE-NEXT:    vldrw.u32 q0, [r0]
242; CHECKBE-NEXT:    vldrw.u32 q1, [r0, #16]
243; CHECKBE-NEXT:    vmov.f32 s8, s1
244; CHECKBE-NEXT:    vmov.f32 s9, s2
245; CHECKBE-NEXT:    vmov.f32 s10, s3
246; CHECKBE-NEXT:    vmov.f32 s11, s4
247; CHECKBE-NEXT:    vadd.i32 q2, q0, q2
248; CHECKBE-NEXT:    vrev64.32 q0, q2
249; CHECKBE-NEXT:    vldrw.u32 q2, [r0, #20]
250; CHECKBE-NEXT:    vadd.i32 q2, q1, q2
251; CHECKBE-NEXT:    vrev64.32 q1, q2
252; CHECKBE-NEXT:    bx lr
253  %q = getelementptr inbounds i8, ptr %p, i32 32
254  %l1 = load <8 x i32>, ptr %p
255  %l2 = load i32, ptr %q
256  %s = shufflevector <8 x i32> %l1, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
257  %ins = insertelement <8 x i32> %s, i32 %l2, i32 7
258  %a = add <8 x i32> %l1, %ins
259  ret <8 x i32> %a
260}
261
262define <4 x float> @insertf32_first(ptr %p) {
263; CHECKLE-LABEL: insertf32_first:
264; CHECKLE:       @ %bb.0:
265; CHECKLE-NEXT:    vldrw.u32 q0, [r0]
266; CHECKLE-NEXT:    bx lr
267;
268; CHECKBE-LABEL: insertf32_first:
269; CHECKBE:       @ %bb.0:
270; CHECKBE-NEXT:    vldrb.u8 q1, [r0]
271; CHECKBE-NEXT:    vrev64.8 q0, q1
272; CHECKBE-NEXT:    bx lr
273  %q = getelementptr inbounds i8, ptr %p, i32 4
274  %l1 = load <4 x float>, ptr %q
275  %l2 = load float, ptr %p
276  %s = shufflevector <4 x float> %l1, <4 x float> undef, <4 x i32> <i32 undef, i32 0, i32 1, i32 2>
277  %ins = insertelement <4 x float> %s, float %l2, i32 0
278  ret <4 x float> %ins
279}
280
281define <4 x float> @insertf32_last(ptr %p) {
282; CHECKLE-LABEL: insertf32_last:
283; CHECKLE:       @ %bb.0:
284; CHECKLE-NEXT:    vldrw.u32 q0, [r0, #4]
285; CHECKLE-NEXT:    bx lr
286;
287; CHECKBE-LABEL: insertf32_last:
288; CHECKBE:       @ %bb.0:
289; CHECKBE-NEXT:    vldrb.u8 q1, [r0, #4]
290; CHECKBE-NEXT:    vrev64.8 q0, q1
291; CHECKBE-NEXT:    bx lr
292  %q = getelementptr inbounds i8, ptr %p, i32 16
293  %l1 = load <4 x float>, ptr %p
294  %l2 = load float, ptr %q
295  %s = shufflevector <4 x float> %l1, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 undef>
296  %ins = insertelement <4 x float> %s, float %l2, i32 3
297  ret <4 x float> %ins
298}
299