xref: /llvm-project/llvm/test/CodeGen/ARM/vrev.ll (revision bed1c7f061aa12417aa081e334afdba45767b938)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=arm-eabi -mattr=+neon,+fullfp16 %s -o - | FileCheck %s
3
4define <8 x i8> @test_vrev64D8(ptr %A) nounwind {
5; CHECK-LABEL: test_vrev64D8:
6; CHECK:       @ %bb.0:
7; CHECK-NEXT:    vldr d16, [r0]
8; CHECK-NEXT:    vrev64.8 d16, d16
9; CHECK-NEXT:    vmov r0, r1, d16
10; CHECK-NEXT:    mov pc, lr
11	%tmp1 = load <8 x i8>, ptr %A
12	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
13	ret <8 x i8> %tmp2
14}
15
16define <4 x i16> @test_vrev64D16(ptr %A) nounwind {
17; CHECK-LABEL: test_vrev64D16:
18; CHECK:       @ %bb.0:
19; CHECK-NEXT:    vldr d16, [r0]
20; CHECK-NEXT:    vrev64.16 d16, d16
21; CHECK-NEXT:    vmov r0, r1, d16
22; CHECK-NEXT:    mov pc, lr
23	%tmp1 = load <4 x i16>, ptr %A
24	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
25	ret <4 x i16> %tmp2
26}
27
28define <4 x half> @test_vrev64Df16(ptr %A) nounwind {
29; CHECK-LABEL: test_vrev64Df16:
30; CHECK:       @ %bb.0:
31; CHECK-NEXT:    vldr d16, [r0]
32; CHECK-NEXT:    vrev64.16 d16, d16
33; CHECK-NEXT:    vmov r0, r1, d16
34; CHECK-NEXT:    mov pc, lr
35	%tmp1 = load <4 x half>, ptr %A
36	%tmp2 = shufflevector <4 x half> %tmp1, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
37	ret <4 x half> %tmp2
38}
39
40define <2 x i32> @test_vrev64D32(ptr %A) nounwind {
41; CHECK-LABEL: test_vrev64D32:
42; CHECK:       @ %bb.0:
43; CHECK-NEXT:    vldr d16, [r0]
44; CHECK-NEXT:    vrev64.32 d16, d16
45; CHECK-NEXT:    vmov r0, r1, d16
46; CHECK-NEXT:    mov pc, lr
47	%tmp1 = load <2 x i32>, ptr %A
48	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
49	ret <2 x i32> %tmp2
50}
51
52define <2 x float> @test_vrev64Df(ptr %A) nounwind {
53; CHECK-LABEL: test_vrev64Df:
54; CHECK:       @ %bb.0:
55; CHECK-NEXT:    vldr d16, [r0]
56; CHECK-NEXT:    vrev64.32 d16, d16
57; CHECK-NEXT:    vmov r0, r1, d16
58; CHECK-NEXT:    mov pc, lr
59	%tmp1 = load <2 x float>, ptr %A
60	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
61	ret <2 x float> %tmp2
62}
63
64define <16 x i8> @test_vrev64Q8(ptr %A) nounwind {
65; CHECK-LABEL: test_vrev64Q8:
66; CHECK:       @ %bb.0:
67; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
68; CHECK-NEXT:    vrev64.8 q8, q8
69; CHECK-NEXT:    vmov r0, r1, d16
70; CHECK-NEXT:    vmov r2, r3, d17
71; CHECK-NEXT:    mov pc, lr
72	%tmp1 = load <16 x i8>, ptr %A
73	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
74	ret <16 x i8> %tmp2
75}
76
77define <8 x i16> @test_vrev64Q16(ptr %A) nounwind {
78; CHECK-LABEL: test_vrev64Q16:
79; CHECK:       @ %bb.0:
80; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
81; CHECK-NEXT:    vrev64.16 q8, q8
82; CHECK-NEXT:    vmov r0, r1, d16
83; CHECK-NEXT:    vmov r2, r3, d17
84; CHECK-NEXT:    mov pc, lr
85	%tmp1 = load <8 x i16>, ptr %A
86	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
87	ret <8 x i16> %tmp2
88}
89
90define <8 x half> @test_vrev64Qf16(ptr %A) nounwind {
91; CHECK-LABEL: test_vrev64Qf16:
92; CHECK:       @ %bb.0:
93; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
94; CHECK-NEXT:    vrev64.16 q8, q8
95; CHECK-NEXT:    vmov r0, r1, d16
96; CHECK-NEXT:    vmov r2, r3, d17
97; CHECK-NEXT:    mov pc, lr
98	%tmp1 = load <8 x half>, ptr %A
99	%tmp2 = shufflevector <8 x half> %tmp1, <8 x half> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
100	ret <8 x half> %tmp2
101}
102
103define <4 x i32> @test_vrev64Q32(ptr %A) nounwind {
104; CHECK-LABEL: test_vrev64Q32:
105; CHECK:       @ %bb.0:
106; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
107; CHECK-NEXT:    vrev64.32 q8, q8
108; CHECK-NEXT:    vmov r0, r1, d16
109; CHECK-NEXT:    vmov r2, r3, d17
110; CHECK-NEXT:    mov pc, lr
111	%tmp1 = load <4 x i32>, ptr %A
112	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
113	ret <4 x i32> %tmp2
114}
115
116define <4 x float> @test_vrev64Qf(ptr %A) nounwind {
117; CHECK-LABEL: test_vrev64Qf:
118; CHECK:       @ %bb.0:
119; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
120; CHECK-NEXT:    vrev64.32 q8, q8
121; CHECK-NEXT:    vmov r0, r1, d16
122; CHECK-NEXT:    vmov r2, r3, d17
123; CHECK-NEXT:    mov pc, lr
124	%tmp1 = load <4 x float>, ptr %A
125	%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
126	ret <4 x float> %tmp2
127}
128
129define <8 x i8> @test_vrev32D8(ptr %A) nounwind {
130; CHECK-LABEL: test_vrev32D8:
131; CHECK:       @ %bb.0:
132; CHECK-NEXT:    vldr d16, [r0]
133; CHECK-NEXT:    vrev32.8 d16, d16
134; CHECK-NEXT:    vmov r0, r1, d16
135; CHECK-NEXT:    mov pc, lr
136	%tmp1 = load <8 x i8>, ptr %A
137	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
138	ret <8 x i8> %tmp2
139}
140
141define <4 x i16> @test_vrev32D16(ptr %A) nounwind {
142; CHECK-LABEL: test_vrev32D16:
143; CHECK:       @ %bb.0:
144; CHECK-NEXT:    vldr d16, [r0]
145; CHECK-NEXT:    vrev32.16 d16, d16
146; CHECK-NEXT:    vmov r0, r1, d16
147; CHECK-NEXT:    mov pc, lr
148	%tmp1 = load <4 x i16>, ptr %A
149	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
150	ret <4 x i16> %tmp2
151}
152
153define <4 x half> @test_vrev32Df16(ptr %A) nounwind {
154; CHECK-LABEL: test_vrev32Df16:
155; CHECK:       @ %bb.0:
156; CHECK-NEXT:    vldr d16, [r0]
157; CHECK-NEXT:    vrev32.16 d16, d16
158; CHECK-NEXT:    vmov r0, r1, d16
159; CHECK-NEXT:    mov pc, lr
160	%tmp1 = load <4 x half>, ptr %A
161	%tmp2 = shufflevector <4 x half> %tmp1, <4 x half> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
162	ret <4 x half> %tmp2
163}
164
165define <16 x i8> @test_vrev32Q8(ptr %A) nounwind {
166; CHECK-LABEL: test_vrev32Q8:
167; CHECK:       @ %bb.0:
168; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
169; CHECK-NEXT:    vrev32.8 q8, q8
170; CHECK-NEXT:    vmov r0, r1, d16
171; CHECK-NEXT:    vmov r2, r3, d17
172; CHECK-NEXT:    mov pc, lr
173	%tmp1 = load <16 x i8>, ptr %A
174	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
175	ret <16 x i8> %tmp2
176}
177
178define <8 x i16> @test_vrev32Q16(ptr %A) nounwind {
179; CHECK-LABEL: test_vrev32Q16:
180; CHECK:       @ %bb.0:
181; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
182; CHECK-NEXT:    vrev32.16 q8, q8
183; CHECK-NEXT:    vmov r0, r1, d16
184; CHECK-NEXT:    vmov r2, r3, d17
185; CHECK-NEXT:    mov pc, lr
186	%tmp1 = load <8 x i16>, ptr %A
187	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
188	ret <8 x i16> %tmp2
189}
190
191define <8 x half> @test_vrev32Qf16(ptr %A) nounwind {
192; CHECK-LABEL: test_vrev32Qf16:
193; CHECK:       @ %bb.0:
194; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
195; CHECK-NEXT:    vrev32.16 q8, q8
196; CHECK-NEXT:    vmov r0, r1, d16
197; CHECK-NEXT:    vmov r2, r3, d17
198; CHECK-NEXT:    mov pc, lr
199	%tmp1 = load <8 x half>, ptr %A
200	%tmp2 = shufflevector <8 x half> %tmp1, <8 x half> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
201	ret <8 x half> %tmp2
202}
203
204define <8 x i8> @test_vrev16D8(ptr %A) nounwind {
205; CHECK-LABEL: test_vrev16D8:
206; CHECK:       @ %bb.0:
207; CHECK-NEXT:    vldr d16, [r0]
208; CHECK-NEXT:    vrev16.8 d16, d16
209; CHECK-NEXT:    vmov r0, r1, d16
210; CHECK-NEXT:    mov pc, lr
211	%tmp1 = load <8 x i8>, ptr %A
212	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
213	ret <8 x i8> %tmp2
214}
215
216define <16 x i8> @test_vrev16Q8(ptr %A) nounwind {
217; CHECK-LABEL: test_vrev16Q8:
218; CHECK:       @ %bb.0:
219; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
220; CHECK-NEXT:    vrev16.8 q8, q8
221; CHECK-NEXT:    vmov r0, r1, d16
222; CHECK-NEXT:    vmov r2, r3, d17
223; CHECK-NEXT:    mov pc, lr
224	%tmp1 = load <16 x i8>, ptr %A
225	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
226	ret <16 x i8> %tmp2
227}
228
229; Undef shuffle indices should not prevent matching to VREV:
230
231define <8 x i8> @test_vrev64D8_undef(ptr %A) nounwind {
232; CHECK-LABEL: test_vrev64D8_undef:
233; CHECK:       @ %bb.0:
234; CHECK-NEXT:    vldr d16, [r0]
235; CHECK-NEXT:    vrev64.8 d16, d16
236; CHECK-NEXT:    vmov r0, r1, d16
237; CHECK-NEXT:    mov pc, lr
238	%tmp1 = load <8 x i8>, ptr %A
239	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0>
240	ret <8 x i8> %tmp2
241}
242
243define <8 x i16> @test_vrev32Q16_undef(ptr %A) nounwind {
244; CHECK-LABEL: test_vrev32Q16_undef:
245; CHECK:       @ %bb.0:
246; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
247; CHECK-NEXT:    vrev32.16 q8, q8
248; CHECK-NEXT:    vmov r0, r1, d16
249; CHECK-NEXT:    vmov r2, r3, d17
250; CHECK-NEXT:    mov pc, lr
251	%tmp1 = load <8 x i16>, ptr %A
252	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
253	ret <8 x i16> %tmp2
254}
255
256define <8 x half> @test_vrev32Qf16_undef(ptr %A) nounwind {
257; CHECK-LABEL: test_vrev32Qf16_undef:
258; CHECK:       @ %bb.0:
259; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
260; CHECK-NEXT:    vrev32.16 q8, q8
261; CHECK-NEXT:    vmov r0, r1, d16
262; CHECK-NEXT:    vmov r2, r3, d17
263; CHECK-NEXT:    mov pc, lr
264	%tmp1 = load <8 x half>, ptr %A
265	%tmp2 = shufflevector <8 x half> %tmp1, <8 x half> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
266	ret <8 x half> %tmp2
267}
268
269; A vcombine feeding a VREV should not obscure things.  Radar 8597007.
270
271define void @test_with_vcombine(ptr %v) nounwind {
272; CHECK-LABEL: test_with_vcombine:
273; CHECK:       @ %bb.0:
274; CHECK-NEXT:    vld1.64 {d16, d17}, [r0:128]
275; CHECK-NEXT:    vadd.f32 d18, d17, d17
276; CHECK-NEXT:    vrev64.32 d16, d16
277; CHECK-NEXT:    vrev64.32 d17, d18
278; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
279; CHECK-NEXT:    mov pc, lr
280  %tmp1 = load <4 x float>, ptr %v, align 16
281  %tmp2 = bitcast <4 x float> %tmp1 to <2 x double>
282  %tmp3 = extractelement <2 x double> %tmp2, i32 0
283  %tmp4 = bitcast double %tmp3 to <2 x float>
284  %tmp5 = extractelement <2 x double> %tmp2, i32 1
285  %tmp6 = bitcast double %tmp5 to <2 x float>
286  %tmp7 = fadd <2 x float> %tmp6, %tmp6
287  %tmp8 = shufflevector <2 x float> %tmp4, <2 x float> %tmp7, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
288  store <4 x float> %tmp8, ptr %v, align 16
289  ret void
290}
291
292; The type <2 x i16> is legalized to <2 x i32> and need to be trunc-stored
293; to <2 x i16> when stored to memory.
294define void @test_vrev64(ptr nocapture %source, ptr nocapture %dst) nounwind ssp {
295; CHECK-LABEL: test_vrev64:
296; CHECK:       @ %bb.0: @ %entry
297; CHECK-NEXT:    vld1.32 {d16, d17}, [r0]
298; CHECK-NEXT:    vmov.u16 r0, d17[2]
299; CHECK-NEXT:    vmov.u16 r2, d17[1]
300; CHECK-NEXT:    vmov.32 d16[0], r0
301; CHECK-NEXT:    vmov.32 d16[1], r2
302; CHECK-NEXT:    vuzp.16 d16, d17
303; CHECK-NEXT:    vst1.32 {d16[0]}, [r1:32]
304; CHECK-NEXT:    mov pc, lr
305entry:
306  %tmp2 = load <8 x i16>, ptr %source, align 4
307  %tmp3 = extractelement <8 x i16> %tmp2, i32 6
308  %tmp5 = insertelement <2 x i16> undef, i16 %tmp3, i32 0
309  %tmp9 = extractelement <8 x i16> %tmp2, i32 5
310  %tmp11 = insertelement <2 x i16> %tmp5, i16 %tmp9, i32 1
311  store <2 x i16> %tmp11, ptr %dst, align 4
312  ret void
313}
314
315; Test vrev of float4
316define void @float_vrev64(ptr nocapture %source, ptr nocapture %dest) nounwind noinline ssp {
317; CHECK-LABEL: float_vrev64:
318; CHECK:       @ %bb.0: @ %entry
319; CHECK-NEXT:    vmov.i32 q8, #0x0
320; CHECK-NEXT:    vld1.32 {d18, d19}, [r0]
321; CHECK-NEXT:    add r0, r1, #176
322; CHECK-NEXT:    vext.32 q8, q9, q8, #3
323; CHECK-NEXT:    vrev64.32 q8, q8
324; CHECK-NEXT:    vst1.32 {d16, d17}, [r0]
325; CHECK-NEXT:    mov pc, lr
326entry:
327  %tmp2 = load <4 x float>, ptr %source, align 4
328  %tmp5 = shufflevector <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x float> %tmp2, <4 x i32> <i32 0, i32 7, i32 0, i32 0>
329  %arrayidx8 = getelementptr inbounds <4 x float>, ptr %dest, i32 11
330  store <4 x float> %tmp5, ptr %arrayidx8, align 4
331  ret void
332}
333
334define <4 x i32> @test_vrev32_bswap(<4 x i32> %source) nounwind {
335; CHECK-LABEL: test_vrev32_bswap:
336; CHECK:       @ %bb.0:
337; CHECK-NEXT:    vmov d17, r2, r3
338; CHECK-NEXT:    vmov d16, r0, r1
339; CHECK-NEXT:    vrev32.8 q8, q8
340; CHECK-NEXT:    vmov r0, r1, d16
341; CHECK-NEXT:    vmov r2, r3, d17
342; CHECK-NEXT:    mov pc, lr
343  %bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %source)
344  ret <4 x i32> %bswap
345}
346
347declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) nounwind readnone
348