xref: /llvm-project/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll (revision bfc0317153dca75137fba00b5c28758d6f720963)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs  < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-SELDAG  %s
3; RUN: llc -verify-machineinstrs -O0 < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-FASTISEL %s
4
5target triple = "aarch64-unknown-linux-gnu"
6
7;
8; VECTOR_REVERSE
9;
10
11define <16 x i8> @reverse_v16i8(<16 x i8> %a) #0 {
12; CHECK-LABEL: reverse_v16i8:
13; CHECK:       // %bb.0:
14; CHECK-NEXT:    rev64 v0.16b, v0.16b
15; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
16; CHECK-NEXT:    ret
17
18  %res = call <16 x i8> @llvm.vector.reverse.v16i8(<16 x i8> %a)
19  ret <16 x i8> %res
20}
21
22define <8 x i16> @reverse_v8i16(<8 x i16> %a) #0 {
23; CHECK-LABEL: reverse_v8i16:
24; CHECK:       // %bb.0:
25; CHECK-NEXT:    rev64 v0.8h, v0.8h
26; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
27; CHECK-NEXT:    ret
28
29  %res = call <8 x i16> @llvm.vector.reverse.v8i16(<8 x i16> %a)
30  ret <8 x i16> %res
31}
32
33define <2 x i16> @reverse_v2i16(<2 x i16> %a) #0 {
34; CHECK-LABEL: reverse_v2i16:
35; CHECK:       // %bb.0:
36; CHECK-NEXT:    rev64 v0.2s, v0.2s
37; CHECK-NEXT:    ret
38  %res = call <2 x i16> @llvm.vector.reverse.v2i16(<2 x i16> %a)
39  ret <2 x i16> %res
40}
41
42define <2 x i32> @reverse_v2i32(<2 x i32> %a) #0 {
43; CHECK-LABEL: reverse_v2i32:
44; CHECK:       // %bb.0:
45; CHECK-NEXT:    rev64 v0.2s, v0.2s
46; CHECK-NEXT:    ret
47  %res = call <2 x i32> @llvm.vector.reverse.v2i32(<2 x i32> %a)
48  ret <2 x i32> %res
49}
50
51define <4 x i32> @reverse_v4i32(<4 x i32> %a) #0 {
52; CHECK-LABEL: reverse_v4i32:
53; CHECK:       // %bb.0:
54; CHECK-NEXT:    rev64 v0.4s, v0.4s
55; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
56; CHECK-NEXT:    ret
57
58  %res = call <4 x i32> @llvm.vector.reverse.v4i32(<4 x i32> %a)
59  ret <4 x i32> %res
60}
61
62define <2 x i64> @reverse_v2i64(<2 x i64> %a) #0 {
63; CHECK-LABEL: reverse_v2i64:
64; CHECK:       // %bb.0:
65; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
66; CHECK-NEXT:    ret
67
68  %res = call <2 x i64> @llvm.vector.reverse.v2i64(<2 x i64> %a)
69  ret <2 x i64> %res
70}
71
72define <8 x half> @reverse_v8f16(<8 x half> %a) #0 {
73; CHECK-LABEL: reverse_v8f16:
74; CHECK:       // %bb.0:
75; CHECK-NEXT:    rev64 v0.8h, v0.8h
76; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
77; CHECK-NEXT:    ret
78
79  %res = call <8 x half> @llvm.vector.reverse.v8f16(<8 x half> %a)
80  ret <8 x half> %res
81}
82
83define <2 x float> @reverse_v2f32(<2 x float> %a) #0 {
84; CHECK-LABEL: reverse_v2f32:
85; CHECK:       // %bb.0:
86; CHECK-NEXT:    rev64 v0.2s, v0.2s
87; CHECK-NEXT:    ret
88  %res = call <2 x float> @llvm.vector.reverse.v2f32(<2 x float> %a)
89  ret <2 x float> %res
90}
91
92define <4 x float> @reverse_v4f32(<4 x float> %a) #0 {
93; CHECK-LABEL: reverse_v4f32:
94; CHECK:       // %bb.0:
95; CHECK-NEXT:    rev64 v0.4s, v0.4s
96; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
97; CHECK-NEXT:    ret
98
99  %res = call <4 x float> @llvm.vector.reverse.v4f32(<4 x float> %a)
100  ret <4 x float> %res
101}
102
103define <2 x double> @reverse_v2f64(<2 x double> %a) #0 {
104; CHECK-LABEL: reverse_v2f64:
105; CHECK:       // %bb.0:
106; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
107; CHECK-NEXT:    ret
108
109  %res = call <2 x double> @llvm.vector.reverse.v2f64(<2 x double> %a)
110  ret <2 x double> %res
111}
112
113; Verify promote type legalisation works as expected.
114define <2 x i8> @reverse_v2i8(<2 x i8> %a) #0 {
115; CHECK-LABEL: reverse_v2i8:
116; CHECK:       // %bb.0:
117; CHECK-NEXT:    rev64 v0.2s, v0.2s
118; CHECK-NEXT:    ret
119
120  %res = call <2 x i8> @llvm.vector.reverse.v2i8(<2 x i8> %a)
121  ret <2 x i8> %res
122}
123
124; Verify splitvec type legalisation works as expected.
125define <8 x i32> @reverse_v8i32(<8 x i32> %a) #0 {
126; CHECK-SELDAG-LABEL: reverse_v8i32:
127; CHECK-SELDAG:       // %bb.0:
128; CHECK-SELDAG-NEXT:    rev64 v1.4s, v1.4s
129; CHECK-SELDAG-NEXT:    rev64 v2.4s, v0.4s
130; CHECK-SELDAG-NEXT:    ext v0.16b, v1.16b, v1.16b, #8
131; CHECK-SELDAG-NEXT:    ext v1.16b, v2.16b, v2.16b, #8
132; CHECK-SELDAG-NEXT:    ret
133;
134; CHECK-FASTISEL-LABEL: reverse_v8i32:
135; CHECK-FASTISEL:       // %bb.0:
136; CHECK-FASTISEL-NEXT:    sub sp, sp, #16
137; CHECK-FASTISEL-NEXT:    str q1, [sp] // 16-byte Folded Spill
138; CHECK-FASTISEL-NEXT:    mov v1.16b, v0.16b
139; CHECK-FASTISEL-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
140; CHECK-FASTISEL-NEXT:    rev64 v0.4s, v0.4s
141; CHECK-FASTISEL-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
142; CHECK-FASTISEL-NEXT:    rev64 v1.4s, v1.4s
143; CHECK-FASTISEL-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
144; CHECK-FASTISEL-NEXT:    add sp, sp, #16
145; CHECK-FASTISEL-NEXT:    ret
146
147  %res = call <8 x i32> @llvm.vector.reverse.v8i32(<8 x i32> %a)
148  ret <8 x i32> %res
149}
150
151; Verify splitvec type legalisation works as expected.
152define <16 x float> @reverse_v16f32(<16 x float> %a) #0 {
153; CHECK-SELDAG-LABEL: reverse_v16f32:
154; CHECK-SELDAG:       // %bb.0:
155; CHECK-SELDAG-NEXT:    rev64 v3.4s, v3.4s
156; CHECK-SELDAG-NEXT:    rev64 v2.4s, v2.4s
157; CHECK-SELDAG-NEXT:    rev64 v4.4s, v1.4s
158; CHECK-SELDAG-NEXT:    rev64 v5.4s, v0.4s
159; CHECK-SELDAG-NEXT:    ext v0.16b, v3.16b, v3.16b, #8
160; CHECK-SELDAG-NEXT:    ext v1.16b, v2.16b, v2.16b, #8
161; CHECK-SELDAG-NEXT:    ext v2.16b, v4.16b, v4.16b, #8
162; CHECK-SELDAG-NEXT:    ext v3.16b, v5.16b, v5.16b, #8
163; CHECK-SELDAG-NEXT:    ret
164;
165; CHECK-FASTISEL-LABEL: reverse_v16f32:
166; CHECK-FASTISEL:       // %bb.0:
167; CHECK-FASTISEL-NEXT:    sub sp, sp, #32
168; CHECK-FASTISEL-NEXT:    str q3, [sp, #16] // 16-byte Folded Spill
169; CHECK-FASTISEL-NEXT:    str q2, [sp] // 16-byte Folded Spill
170; CHECK-FASTISEL-NEXT:    mov v2.16b, v1.16b
171; CHECK-FASTISEL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
172; CHECK-FASTISEL-NEXT:    mov v3.16b, v0.16b
173; CHECK-FASTISEL-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
174; CHECK-FASTISEL-NEXT:    rev64 v0.4s, v0.4s
175; CHECK-FASTISEL-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
176; CHECK-FASTISEL-NEXT:    rev64 v1.4s, v1.4s
177; CHECK-FASTISEL-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
178; CHECK-FASTISEL-NEXT:    rev64 v2.4s, v2.4s
179; CHECK-FASTISEL-NEXT:    ext v2.16b, v2.16b, v2.16b, #8
180; CHECK-FASTISEL-NEXT:    rev64 v3.4s, v3.4s
181; CHECK-FASTISEL-NEXT:    ext v3.16b, v3.16b, v3.16b, #8
182; CHECK-FASTISEL-NEXT:    add sp, sp, #32
183; CHECK-FASTISEL-NEXT:    ret
184
185  %res = call <16 x float> @llvm.vector.reverse.v16f32(<16 x float> %a)
186  ret <16 x float> %res
187}
188
189
190declare <2 x i8> @llvm.vector.reverse.v2i8(<2 x i8>)
191declare <16 x i8> @llvm.vector.reverse.v16i8(<16 x i8>)
192declare <8 x i16> @llvm.vector.reverse.v8i16(<8 x i16>)
193declare <2 x i16> @llvm.vector.reverse.v2i16(<2 x i16>)
194declare <2 x i32> @llvm.vector.reverse.v2i32(<2 x i32>)
195declare <4 x i32> @llvm.vector.reverse.v4i32(<4 x i32>)
196declare <8 x i32> @llvm.vector.reverse.v8i32(<8 x i32>)
197declare <2 x i64> @llvm.vector.reverse.v2i64(<2 x i64>)
198declare <8 x half> @llvm.vector.reverse.v8f16(<8 x half>)
199declare <2 x float> @llvm.vector.reverse.v2f32(<2 x float>)
200declare <4 x float> @llvm.vector.reverse.v4f32(<4 x float>)
201declare <16 x float> @llvm.vector.reverse.v16f32(<16 x float>)
202declare <2 x double> @llvm.vector.reverse.v2f64(<2 x double>)
203
204attributes #0 = { nounwind "target-features"="+neon" }
205