xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll (revision 8e0cd7382adacd8bc1741dc26bc0be6bdf8e238a)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
3; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
4; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
5
6
7target triple = "aarch64-unknown-linux-gnu"
8
9; REVB pattern for shuffle v32i8 -> v16i16
10define void @test_revbv16i16(ptr %a) {
11; CHECK-LABEL: test_revbv16i16:
12; CHECK:       // %bb.0:
13; CHECK-NEXT:    ldp q0, q1, [x0]
14; CHECK-NEXT:    ptrue p0.h
15; CHECK-NEXT:    revb z0.h, p0/m, z0.h
16; CHECK-NEXT:    revb z1.h, p0/m, z1.h
17; CHECK-NEXT:    stp q0, q1, [x0]
18; CHECK-NEXT:    ret
19;
20; NONEON-NOSVE-LABEL: test_revbv16i16:
21; NONEON-NOSVE:       // %bb.0:
22; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
23; NONEON-NOSVE-NEXT:    str q0, [sp, #-64]!
24; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
25; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #12]
26; NONEON-NOSVE-NEXT:    str q1, [sp, #32]
27; NONEON-NOSVE-NEXT:    strb w8, [sp, #29]
28; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #13]
29; NONEON-NOSVE-NEXT:    strb w8, [sp, #28]
30; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #11]
31; NONEON-NOSVE-NEXT:    strb w8, [sp, #26]
32; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #8]
33; NONEON-NOSVE-NEXT:    strb w8, [sp, #25]
34; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #6]
35; NONEON-NOSVE-NEXT:    strb w8, [sp, #23]
36; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #7]
37; NONEON-NOSVE-NEXT:    strb w8, [sp, #22]
38; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #4]
39; NONEON-NOSVE-NEXT:    strb w8, [sp, #21]
40; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #5]
41; NONEON-NOSVE-NEXT:    strb w8, [sp, #20]
42; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #2]
43; NONEON-NOSVE-NEXT:    strb w8, [sp, #19]
44; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #3]
45; NONEON-NOSVE-NEXT:    strb w8, [sp, #18]
46; NONEON-NOSVE-NEXT:    ldrb w8, [sp]
47; NONEON-NOSVE-NEXT:    strb w8, [sp, #17]
48; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #1]
49; NONEON-NOSVE-NEXT:    strb w8, [sp, #16]
50; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #46]
51; NONEON-NOSVE-NEXT:    ldr q1, [sp, #16]
52; NONEON-NOSVE-NEXT:    strb w8, [sp, #63]
53; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #47]
54; NONEON-NOSVE-NEXT:    strb w8, [sp, #62]
55; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #44]
56; NONEON-NOSVE-NEXT:    strb w8, [sp, #61]
57; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #45]
58; NONEON-NOSVE-NEXT:    strb w8, [sp, #60]
59; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #42]
60; NONEON-NOSVE-NEXT:    strb w8, [sp, #59]
61; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #43]
62; NONEON-NOSVE-NEXT:    strb w8, [sp, #58]
63; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #40]
64; NONEON-NOSVE-NEXT:    strb w8, [sp, #57]
65; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #41]
66; NONEON-NOSVE-NEXT:    strb w8, [sp, #56]
67; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #38]
68; NONEON-NOSVE-NEXT:    strb w8, [sp, #55]
69; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #39]
70; NONEON-NOSVE-NEXT:    strb w8, [sp, #54]
71; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #36]
72; NONEON-NOSVE-NEXT:    strb w8, [sp, #53]
73; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #37]
74; NONEON-NOSVE-NEXT:    strb w8, [sp, #52]
75; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #34]
76; NONEON-NOSVE-NEXT:    strb w8, [sp, #51]
77; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #35]
78; NONEON-NOSVE-NEXT:    strb w8, [sp, #50]
79; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #32]
80; NONEON-NOSVE-NEXT:    strb w8, [sp, #49]
81; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #33]
82; NONEON-NOSVE-NEXT:    strb w8, [sp, #48]
83; NONEON-NOSVE-NEXT:    ldr q0, [sp, #48]
84; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
85; NONEON-NOSVE-NEXT:    add sp, sp, #64
86; NONEON-NOSVE-NEXT:    ret
87  %tmp1 = load <32 x i8>, ptr %a
88  %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14, i32 17, i32 16, i32 19, i32 18, i32 21, i32 20, i32 23, i32 22, i32 undef, i32 24, i32 27, i32 undef, i32 29, i32 28, i32 undef, i32 undef>
89  store <32 x i8> %tmp2, ptr %a
90  ret void
91}
92
93; REVB pattern for shuffle v32i8 -> v8i32
94define void @test_revbv8i32(ptr %a) {
95; CHECK-LABEL: test_revbv8i32:
96; CHECK:       // %bb.0:
97; CHECK-NEXT:    ldp q0, q1, [x0]
98; CHECK-NEXT:    ptrue p0.s
99; CHECK-NEXT:    revb z0.s, p0/m, z0.s
100; CHECK-NEXT:    revb z1.s, p0/m, z1.s
101; CHECK-NEXT:    stp q0, q1, [x0]
102; CHECK-NEXT:    ret
103;
104; NONEON-NOSVE-LABEL: test_revbv8i32:
105; NONEON-NOSVE:       // %bb.0:
106; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
107; NONEON-NOSVE-NEXT:    str q0, [sp, #-64]!
108; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
109; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #13]
110; NONEON-NOSVE-NEXT:    str q1, [sp, #32]
111; NONEON-NOSVE-NEXT:    strb w8, [sp, #30]
112; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #14]
113; NONEON-NOSVE-NEXT:    strb w8, [sp, #29]
114; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #15]
115; NONEON-NOSVE-NEXT:    strb w8, [sp, #28]
116; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #11]
117; NONEON-NOSVE-NEXT:    strb w8, [sp, #24]
118; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #4]
119; NONEON-NOSVE-NEXT:    strb w8, [sp, #23]
120; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #5]
121; NONEON-NOSVE-NEXT:    strb w8, [sp, #22]
122; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #6]
123; NONEON-NOSVE-NEXT:    strb w8, [sp, #21]
124; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #7]
125; NONEON-NOSVE-NEXT:    strb w8, [sp, #20]
126; NONEON-NOSVE-NEXT:    ldrb w8, [sp]
127; NONEON-NOSVE-NEXT:    strb w8, [sp, #19]
128; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #1]
129; NONEON-NOSVE-NEXT:    strb w8, [sp, #18]
130; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #2]
131; NONEON-NOSVE-NEXT:    strb w8, [sp, #17]
132; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #3]
133; NONEON-NOSVE-NEXT:    strb w8, [sp, #16]
134; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #44]
135; NONEON-NOSVE-NEXT:    ldr q1, [sp, #16]
136; NONEON-NOSVE-NEXT:    strb w8, [sp, #63]
137; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #45]
138; NONEON-NOSVE-NEXT:    strb w8, [sp, #62]
139; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #46]
140; NONEON-NOSVE-NEXT:    strb w8, [sp, #61]
141; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #47]
142; NONEON-NOSVE-NEXT:    strb w8, [sp, #60]
143; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #40]
144; NONEON-NOSVE-NEXT:    strb w8, [sp, #59]
145; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #41]
146; NONEON-NOSVE-NEXT:    strb w8, [sp, #58]
147; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #42]
148; NONEON-NOSVE-NEXT:    strb w8, [sp, #57]
149; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #43]
150; NONEON-NOSVE-NEXT:    strb w8, [sp, #56]
151; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #36]
152; NONEON-NOSVE-NEXT:    strb w8, [sp, #55]
153; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #37]
154; NONEON-NOSVE-NEXT:    strb w8, [sp, #54]
155; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #38]
156; NONEON-NOSVE-NEXT:    strb w8, [sp, #53]
157; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #39]
158; NONEON-NOSVE-NEXT:    strb w8, [sp, #52]
159; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #32]
160; NONEON-NOSVE-NEXT:    strb w8, [sp, #51]
161; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #33]
162; NONEON-NOSVE-NEXT:    strb w8, [sp, #50]
163; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #34]
164; NONEON-NOSVE-NEXT:    strb w8, [sp, #49]
165; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #35]
166; NONEON-NOSVE-NEXT:    strb w8, [sp, #48]
167; NONEON-NOSVE-NEXT:    ldr q0, [sp, #48]
168; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
169; NONEON-NOSVE-NEXT:    add sp, sp, #64
170; NONEON-NOSVE-NEXT:    ret
171  %tmp1 = load <32 x i8>, ptr %a
172  %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12, i32 19, i32 18, i32 17, i32 16, i32 23, i32 22, i32 21, i32 20, i32 27, i32 undef, i32 undef, i32 undef, i32 31, i32 30, i32 29, i32 undef>
173  store <32 x i8> %tmp2, ptr %a
174  ret void
175}
176
177; REVB pattern for shuffle v32i8 -> v4i64
178define void @test_revbv4i64(ptr %a) {
179; CHECK-LABEL: test_revbv4i64:
180; CHECK:       // %bb.0:
181; CHECK-NEXT:    ldp q0, q1, [x0]
182; CHECK-NEXT:    ptrue p0.d
183; CHECK-NEXT:    revb z0.d, p0/m, z0.d
184; CHECK-NEXT:    revb z1.d, p0/m, z1.d
185; CHECK-NEXT:    stp q0, q1, [x0]
186; CHECK-NEXT:    ret
187;
188; NONEON-NOSVE-LABEL: test_revbv4i64:
189; NONEON-NOSVE:       // %bb.0:
190; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
191; NONEON-NOSVE-NEXT:    str q0, [sp, #-64]!
192; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
193; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #11]
194; NONEON-NOSVE-NEXT:    str q1, [sp, #32]
195; NONEON-NOSVE-NEXT:    strb w8, [sp, #28]
196; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #13]
197; NONEON-NOSVE-NEXT:    strb w8, [sp, #26]
198; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #14]
199; NONEON-NOSVE-NEXT:    strb w8, [sp, #25]
200; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #15]
201; NONEON-NOSVE-NEXT:    strb w8, [sp, #24]
202; NONEON-NOSVE-NEXT:    ldrb w8, [sp]
203; NONEON-NOSVE-NEXT:    strb w8, [sp, #23]
204; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #1]
205; NONEON-NOSVE-NEXT:    strb w8, [sp, #22]
206; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #2]
207; NONEON-NOSVE-NEXT:    strb w8, [sp, #21]
208; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #3]
209; NONEON-NOSVE-NEXT:    strb w8, [sp, #20]
210; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #4]
211; NONEON-NOSVE-NEXT:    strb w8, [sp, #19]
212; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #5]
213; NONEON-NOSVE-NEXT:    strb w8, [sp, #18]
214; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #6]
215; NONEON-NOSVE-NEXT:    strb w8, [sp, #17]
216; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #7]
217; NONEON-NOSVE-NEXT:    strb w8, [sp, #16]
218; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #40]
219; NONEON-NOSVE-NEXT:    ldr q1, [sp, #16]
220; NONEON-NOSVE-NEXT:    strb w8, [sp, #63]
221; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #41]
222; NONEON-NOSVE-NEXT:    strb w8, [sp, #62]
223; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #42]
224; NONEON-NOSVE-NEXT:    strb w8, [sp, #61]
225; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #43]
226; NONEON-NOSVE-NEXT:    strb w8, [sp, #60]
227; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #44]
228; NONEON-NOSVE-NEXT:    strb w8, [sp, #59]
229; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #45]
230; NONEON-NOSVE-NEXT:    strb w8, [sp, #58]
231; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #46]
232; NONEON-NOSVE-NEXT:    strb w8, [sp, #57]
233; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #47]
234; NONEON-NOSVE-NEXT:    strb w8, [sp, #56]
235; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #32]
236; NONEON-NOSVE-NEXT:    strb w8, [sp, #55]
237; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #33]
238; NONEON-NOSVE-NEXT:    strb w8, [sp, #54]
239; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #34]
240; NONEON-NOSVE-NEXT:    strb w8, [sp, #53]
241; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #35]
242; NONEON-NOSVE-NEXT:    strb w8, [sp, #52]
243; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #36]
244; NONEON-NOSVE-NEXT:    strb w8, [sp, #51]
245; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #37]
246; NONEON-NOSVE-NEXT:    strb w8, [sp, #50]
247; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #38]
248; NONEON-NOSVE-NEXT:    strb w8, [sp, #49]
249; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #39]
250; NONEON-NOSVE-NEXT:    strb w8, [sp, #48]
251; NONEON-NOSVE-NEXT:    ldr q0, [sp, #48]
252; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
253; NONEON-NOSVE-NEXT:    add sp, sp, #64
254; NONEON-NOSVE-NEXT:    ret
255  %tmp1 = load <32 x i8>, ptr %a
256  %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 31, i32 30, i32 29, i32 undef, i32 27, i32 undef, i32 undef, i32 undef>
257  store <32 x i8> %tmp2, ptr %a
258  ret void
259}
260
261; REVH pattern for shuffle v16i16 -> v8i32
262define void @test_revhv8i32(ptr %a) {
263; CHECK-LABEL: test_revhv8i32:
264; CHECK:       // %bb.0:
265; CHECK-NEXT:    ldp q0, q1, [x0]
266; CHECK-NEXT:    ptrue p0.s
267; CHECK-NEXT:    revh z0.s, p0/m, z0.s
268; CHECK-NEXT:    revh z1.s, p0/m, z1.s
269; CHECK-NEXT:    stp q0, q1, [x0]
270; CHECK-NEXT:    ret
271;
272; NONEON-NOSVE-LABEL: test_revhv8i32:
273; NONEON-NOSVE:       // %bb.0:
274; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
275; NONEON-NOSVE-NEXT:    str q0, [sp, #-64]!
276; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
277; NONEON-NOSVE-NEXT:    ldr w8, [sp, #12]
278; NONEON-NOSVE-NEXT:    str q1, [sp, #32]
279; NONEON-NOSVE-NEXT:    ror w9, w8, #16
280; NONEON-NOSVE-NEXT:    ldr w8, [sp, #8]
281; NONEON-NOSVE-NEXT:    ror w8, w8, #16
282; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #24]
283; NONEON-NOSVE-NEXT:    ldr w8, [sp, #4]
284; NONEON-NOSVE-NEXT:    ror w9, w8, #16
285; NONEON-NOSVE-NEXT:    ldr w8, [sp]
286; NONEON-NOSVE-NEXT:    ror w8, w8, #16
287; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #16]
288; NONEON-NOSVE-NEXT:    ldr w8, [sp, #44]
289; NONEON-NOSVE-NEXT:    ldr q1, [sp, #16]
290; NONEON-NOSVE-NEXT:    ror w9, w8, #16
291; NONEON-NOSVE-NEXT:    ldr w8, [sp, #40]
292; NONEON-NOSVE-NEXT:    ror w8, w8, #16
293; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #56]
294; NONEON-NOSVE-NEXT:    ldr w8, [sp, #36]
295; NONEON-NOSVE-NEXT:    ror w9, w8, #16
296; NONEON-NOSVE-NEXT:    ldr w8, [sp, #32]
297; NONEON-NOSVE-NEXT:    ror w8, w8, #16
298; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #48]
299; NONEON-NOSVE-NEXT:    ldr q0, [sp, #48]
300; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
301; NONEON-NOSVE-NEXT:    add sp, sp, #64
302; NONEON-NOSVE-NEXT:    ret
303  %tmp1 = load <16 x i16>, ptr %a
304  %tmp2 = shufflevector <16 x i16> %tmp1, <16 x i16> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
305  store <16 x i16> %tmp2, ptr %a
306  ret void
307}
308
309; REVH pattern for shuffle v16f16 -> v8f32
310define void @test_revhv8f32(ptr %a) {
311; CHECK-LABEL: test_revhv8f32:
312; CHECK:       // %bb.0:
313; CHECK-NEXT:    ldp q0, q1, [x0]
314; CHECK-NEXT:    ptrue p0.s
315; CHECK-NEXT:    revh z0.s, p0/m, z0.s
316; CHECK-NEXT:    revh z1.s, p0/m, z1.s
317; CHECK-NEXT:    stp q0, q1, [x0]
318; CHECK-NEXT:    ret
319;
320; NONEON-NOSVE-LABEL: test_revhv8f32:
321; NONEON-NOSVE:       // %bb.0:
322; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
323; NONEON-NOSVE-NEXT:    str q0, [sp, #-64]!
324; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
325; NONEON-NOSVE-NEXT:    ldr w8, [sp, #12]
326; NONEON-NOSVE-NEXT:    str q1, [sp, #32]
327; NONEON-NOSVE-NEXT:    ror w9, w8, #16
328; NONEON-NOSVE-NEXT:    ldr w8, [sp, #8]
329; NONEON-NOSVE-NEXT:    ror w8, w8, #16
330; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #24]
331; NONEON-NOSVE-NEXT:    ldr w8, [sp, #4]
332; NONEON-NOSVE-NEXT:    ror w9, w8, #16
333; NONEON-NOSVE-NEXT:    ldr w8, [sp]
334; NONEON-NOSVE-NEXT:    ror w8, w8, #16
335; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #16]
336; NONEON-NOSVE-NEXT:    ldr w8, [sp, #44]
337; NONEON-NOSVE-NEXT:    ldr q1, [sp, #16]
338; NONEON-NOSVE-NEXT:    ror w9, w8, #16
339; NONEON-NOSVE-NEXT:    ldr w8, [sp, #40]
340; NONEON-NOSVE-NEXT:    ror w8, w8, #16
341; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #56]
342; NONEON-NOSVE-NEXT:    ldr w8, [sp, #36]
343; NONEON-NOSVE-NEXT:    ror w9, w8, #16
344; NONEON-NOSVE-NEXT:    ldr w8, [sp, #32]
345; NONEON-NOSVE-NEXT:    ror w8, w8, #16
346; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #48]
347; NONEON-NOSVE-NEXT:    ldr q0, [sp, #48]
348; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
349; NONEON-NOSVE-NEXT:    add sp, sp, #64
350; NONEON-NOSVE-NEXT:    ret
351  %tmp1 = load <16 x half>, ptr %a
352  %tmp2 = shufflevector <16 x half> %tmp1, <16 x half> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
353  store <16 x half> %tmp2, ptr %a
354  ret void
355}
356
357; REVH pattern for shuffle v16i16 -> v4i64
358define void @test_revhv4i64(ptr %a) {
359; CHECK-LABEL: test_revhv4i64:
360; CHECK:       // %bb.0:
361; CHECK-NEXT:    ldp q0, q1, [x0]
362; CHECK-NEXT:    ptrue p0.d
363; CHECK-NEXT:    revh z0.d, p0/m, z0.d
364; CHECK-NEXT:    revh z1.d, p0/m, z1.d
365; CHECK-NEXT:    stp q0, q1, [x0]
366; CHECK-NEXT:    ret
367;
368; NONEON-NOSVE-LABEL: test_revhv4i64:
369; NONEON-NOSVE:       // %bb.0:
370; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
371; NONEON-NOSVE-NEXT:    str q0, [sp, #-64]!
372; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
373; NONEON-NOSVE-NEXT:    ldr w8, [sp, #8]
374; NONEON-NOSVE-NEXT:    str q1, [sp, #32]
375; NONEON-NOSVE-NEXT:    ror w9, w8, #16
376; NONEON-NOSVE-NEXT:    ldr w8, [sp, #12]
377; NONEON-NOSVE-NEXT:    ror w8, w8, #16
378; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #24]
379; NONEON-NOSVE-NEXT:    ldr w8, [sp]
380; NONEON-NOSVE-NEXT:    ror w9, w8, #16
381; NONEON-NOSVE-NEXT:    ldr w8, [sp, #4]
382; NONEON-NOSVE-NEXT:    ror w8, w8, #16
383; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #16]
384; NONEON-NOSVE-NEXT:    ldr w8, [sp, #40]
385; NONEON-NOSVE-NEXT:    ldr q1, [sp, #16]
386; NONEON-NOSVE-NEXT:    ror w9, w8, #16
387; NONEON-NOSVE-NEXT:    ldr w8, [sp, #44]
388; NONEON-NOSVE-NEXT:    ror w8, w8, #16
389; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #56]
390; NONEON-NOSVE-NEXT:    ldr w8, [sp, #32]
391; NONEON-NOSVE-NEXT:    ror w9, w8, #16
392; NONEON-NOSVE-NEXT:    ldr w8, [sp, #36]
393; NONEON-NOSVE-NEXT:    ror w8, w8, #16
394; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #48]
395; NONEON-NOSVE-NEXT:    ldr q0, [sp, #48]
396; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
397; NONEON-NOSVE-NEXT:    add sp, sp, #64
398; NONEON-NOSVE-NEXT:    ret
399  %tmp1 = load <16 x i16>, ptr %a
400  %tmp2 = shufflevector <16 x i16> %tmp1, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
401  store <16 x i16> %tmp2, ptr %a
402  ret void
403}
404
405; REVW pattern for shuffle v8i32 -> v4i64
406define void @test_revwv4i64(ptr %a) {
407; CHECK-LABEL: test_revwv4i64:
408; CHECK:       // %bb.0:
409; CHECK-NEXT:    ldp q0, q1, [x0]
410; CHECK-NEXT:    ptrue p0.d
411; CHECK-NEXT:    revw z0.d, p0/m, z0.d
412; CHECK-NEXT:    revw z1.d, p0/m, z1.d
413; CHECK-NEXT:    stp q0, q1, [x0]
414; CHECK-NEXT:    ret
415;
416; NONEON-NOSVE-LABEL: test_revwv4i64:
417; NONEON-NOSVE:       // %bb.0:
418; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
419; NONEON-NOSVE-NEXT:    str q0, [sp, #-64]!
420; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
421; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #8]
422; NONEON-NOSVE-NEXT:    str q1, [sp, #32]
423; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #24]
424; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp]
425; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #16]
426; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #40]
427; NONEON-NOSVE-NEXT:    ldr q1, [sp, #16]
428; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #56]
429; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #32]
430; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #48]
431; NONEON-NOSVE-NEXT:    ldr q0, [sp, #48]
432; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
433; NONEON-NOSVE-NEXT:    add sp, sp, #64
434; NONEON-NOSVE-NEXT:    ret
435  %tmp1 = load <8 x i32>, ptr %a
436  %tmp2 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
437  store <8 x i32> %tmp2, ptr %a
438  ret void
439}
440
441; REVW pattern for shuffle v8f32 -> v4f64
442define void @test_revwv4f64(ptr %a) {
443; CHECK-LABEL: test_revwv4f64:
444; CHECK:       // %bb.0:
445; CHECK-NEXT:    ldp q0, q1, [x0]
446; CHECK-NEXT:    ptrue p0.d
447; CHECK-NEXT:    revw z0.d, p0/m, z0.d
448; CHECK-NEXT:    revw z1.d, p0/m, z1.d
449; CHECK-NEXT:    stp q0, q1, [x0]
450; CHECK-NEXT:    ret
451;
452; NONEON-NOSVE-LABEL: test_revwv4f64:
453; NONEON-NOSVE:       // %bb.0:
454; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
455; NONEON-NOSVE-NEXT:    str q0, [sp, #-64]!
456; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
457; NONEON-NOSVE-NEXT:    str q1, [sp, #32]
458; NONEON-NOSVE-NEXT:    ldp s1, s0, [sp, #8]
459; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #24]
460; NONEON-NOSVE-NEXT:    ldp s1, s0, [sp]
461; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #16]
462; NONEON-NOSVE-NEXT:    ldp s1, s0, [sp, #40]
463; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #56]
464; NONEON-NOSVE-NEXT:    ldp s1, s0, [sp, #32]
465; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #48]
466; NONEON-NOSVE-NEXT:    ldr q1, [sp, #16]
467; NONEON-NOSVE-NEXT:    ldr q0, [sp, #48]
468; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
469; NONEON-NOSVE-NEXT:    add sp, sp, #64
470; NONEON-NOSVE-NEXT:    ret
471  %tmp1 = load <8 x float>, ptr %a
472  %tmp2 = shufflevector <8 x float> %tmp1, <8 x float> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
473  store <8 x float> %tmp2, ptr %a
474  ret void
475}
476
477define <16 x i8> @test_revv16i8(ptr %a) {
478; CHECK-LABEL: test_revv16i8:
479; CHECK:       // %bb.0:
480; CHECK-NEXT:    ptrue p0.d
481; CHECK-NEXT:    ldr q0, [x0]
482; CHECK-NEXT:    revb z0.d, p0/m, z0.d
483; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
484; CHECK-NEXT:    ret
485;
486; NONEON-NOSVE-LABEL: test_revv16i8:
487; NONEON-NOSVE:       // %bb.0:
488; NONEON-NOSVE-NEXT:    ldr q0, [x0]
489; NONEON-NOSVE-NEXT:    str q0, [sp, #-32]!
490; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
491; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #8]
492; NONEON-NOSVE-NEXT:    strb w8, [sp, #31]
493; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #9]
494; NONEON-NOSVE-NEXT:    strb w8, [sp, #30]
495; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #10]
496; NONEON-NOSVE-NEXT:    strb w8, [sp, #29]
497; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #11]
498; NONEON-NOSVE-NEXT:    strb w8, [sp, #28]
499; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #12]
500; NONEON-NOSVE-NEXT:    strb w8, [sp, #27]
501; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #13]
502; NONEON-NOSVE-NEXT:    strb w8, [sp, #26]
503; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #14]
504; NONEON-NOSVE-NEXT:    strb w8, [sp, #25]
505; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #15]
506; NONEON-NOSVE-NEXT:    strb w8, [sp, #24]
507; NONEON-NOSVE-NEXT:    ldrb w8, [sp]
508; NONEON-NOSVE-NEXT:    strb w8, [sp, #23]
509; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #1]
510; NONEON-NOSVE-NEXT:    strb w8, [sp, #22]
511; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #2]
512; NONEON-NOSVE-NEXT:    strb w8, [sp, #21]
513; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #3]
514; NONEON-NOSVE-NEXT:    strb w8, [sp, #20]
515; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #4]
516; NONEON-NOSVE-NEXT:    strb w8, [sp, #19]
517; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #5]
518; NONEON-NOSVE-NEXT:    strb w8, [sp, #18]
519; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #6]
520; NONEON-NOSVE-NEXT:    strb w8, [sp, #17]
521; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #7]
522; NONEON-NOSVE-NEXT:    strb w8, [sp, #16]
523; NONEON-NOSVE-NEXT:    ldr q0, [sp, #16]
524; NONEON-NOSVE-NEXT:    add sp, sp, #32
525; NONEON-NOSVE-NEXT:    ret
526  %tmp1 = load <16 x i8>, ptr %a
527  %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
528  ret <16 x i8> %tmp2
529}
530
531; REVW pattern for shuffle two v8i32 inputs with the second input available.
532define void @test_revwv8i32v8i32(ptr %a, ptr %b) {
533; CHECK-LABEL: test_revwv8i32v8i32:
534; CHECK:       // %bb.0:
535; CHECK-NEXT:    ldp q0, q1, [x1]
536; CHECK-NEXT:    ptrue p0.d
537; CHECK-NEXT:    revw z0.d, p0/m, z0.d
538; CHECK-NEXT:    revw z1.d, p0/m, z1.d
539; CHECK-NEXT:    stp q0, q1, [x0]
540; CHECK-NEXT:    ret
541;
542; NONEON-NOSVE-LABEL: test_revwv8i32v8i32:
543; NONEON-NOSVE:       // %bb.0:
544; NONEON-NOSVE-NEXT:    ldp q1, q0, [x1]
545; NONEON-NOSVE-NEXT:    str q0, [sp, #-64]!
546; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
547; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #8]
548; NONEON-NOSVE-NEXT:    str q1, [sp, #32]
549; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #24]
550; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp]
551; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #16]
552; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #40]
553; NONEON-NOSVE-NEXT:    ldr q1, [sp, #16]
554; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #56]
555; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #32]
556; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #48]
557; NONEON-NOSVE-NEXT:    ldr q0, [sp, #48]
558; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
559; NONEON-NOSVE-NEXT:    add sp, sp, #64
560; NONEON-NOSVE-NEXT:    ret
561  %tmp1 = load <8 x i32>, ptr %a
562  %tmp2 = load <8 x i32>, ptr %b
563  %tmp3 = shufflevector <8 x i32> %tmp1, <8 x i32> %tmp2, <8 x i32> <i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
564  store <8 x i32> %tmp3, ptr %a
565  ret void
566}
567
568define void @test_revhv32i16(ptr %a) {
569; CHECK-LABEL: test_revhv32i16:
570; CHECK:       // %bb.0:
571; CHECK-NEXT:    ldp q0, q1, [x0, #32]
572; CHECK-NEXT:    ptrue p0.d
573; CHECK-NEXT:    ldp q2, q3, [x0]
574; CHECK-NEXT:    revh z0.d, p0/m, z0.d
575; CHECK-NEXT:    revh z1.d, p0/m, z1.d
576; CHECK-NEXT:    revh z2.d, p0/m, z2.d
577; CHECK-NEXT:    revh z3.d, p0/m, z3.d
578; CHECK-NEXT:    stp q0, q1, [x0, #32]
579; CHECK-NEXT:    stp q2, q3, [x0]
580; CHECK-NEXT:    ret
581;
582; NONEON-NOSVE-LABEL: test_revhv32i16:
583; NONEON-NOSVE:       // %bb.0:
584; NONEON-NOSVE-NEXT:    sub sp, sp, #128
585; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 128
586; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
587; NONEON-NOSVE-NEXT:    ldp q2, q3, [x0, #32]
588; NONEON-NOSVE-NEXT:    str q0, [sp, #32]
589; NONEON-NOSVE-NEXT:    ldr w8, [sp, #40]
590; NONEON-NOSVE-NEXT:    str q1, [sp, #96]
591; NONEON-NOSVE-NEXT:    str q3, [sp, #64]
592; NONEON-NOSVE-NEXT:    ror w9, w8, #16
593; NONEON-NOSVE-NEXT:    ldr w8, [sp, #44]
594; NONEON-NOSVE-NEXT:    str q2, [sp]
595; NONEON-NOSVE-NEXT:    ror w8, w8, #16
596; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #56]
597; NONEON-NOSVE-NEXT:    ldr w8, [sp, #32]
598; NONEON-NOSVE-NEXT:    ror w9, w8, #16
599; NONEON-NOSVE-NEXT:    ldr w8, [sp, #36]
600; NONEON-NOSVE-NEXT:    ror w8, w8, #16
601; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #48]
602; NONEON-NOSVE-NEXT:    ldr w8, [sp, #104]
603; NONEON-NOSVE-NEXT:    ldr q2, [sp, #48]
604; NONEON-NOSVE-NEXT:    ror w9, w8, #16
605; NONEON-NOSVE-NEXT:    ldr w8, [sp, #108]
606; NONEON-NOSVE-NEXT:    ror w8, w8, #16
607; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #120]
608; NONEON-NOSVE-NEXT:    ldr w8, [sp, #96]
609; NONEON-NOSVE-NEXT:    ror w9, w8, #16
610; NONEON-NOSVE-NEXT:    ldr w8, [sp, #100]
611; NONEON-NOSVE-NEXT:    ror w8, w8, #16
612; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #112]
613; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #78]
614; NONEON-NOSVE-NEXT:    ldr q3, [sp, #112]
615; NONEON-NOSVE-NEXT:    strh w8, [sp, #88]
616; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #70]
617; NONEON-NOSVE-NEXT:    strh w8, [sp, #80]
618; NONEON-NOSVE-NEXT:    ldur w8, [sp, #74]
619; NONEON-NOSVE-NEXT:    ror w8, w8, #16
620; NONEON-NOSVE-NEXT:    stur w8, [sp, #90]
621; NONEON-NOSVE-NEXT:    ldr w8, [sp, #8]
622; NONEON-NOSVE-NEXT:    ldr q1, [sp, #80]
623; NONEON-NOSVE-NEXT:    ror w9, w8, #16
624; NONEON-NOSVE-NEXT:    ldr w8, [sp, #12]
625; NONEON-NOSVE-NEXT:    ror w8, w8, #16
626; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #24]
627; NONEON-NOSVE-NEXT:    ldr w8, [sp]
628; NONEON-NOSVE-NEXT:    ror w9, w8, #16
629; NONEON-NOSVE-NEXT:    ldr w8, [sp, #4]
630; NONEON-NOSVE-NEXT:    ror w8, w8, #16
631; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #16]
632; NONEON-NOSVE-NEXT:    ldr q0, [sp, #16]
633; NONEON-NOSVE-NEXT:    stp q3, q2, [x0]
634; NONEON-NOSVE-NEXT:    stp q0, q1, [x0, #32]
635; NONEON-NOSVE-NEXT:    add sp, sp, #128
636; NONEON-NOSVE-NEXT:    ret
637  %tmp1 = load <32 x i16>, ptr %a
638  %tmp2 = shufflevector <32 x i16> %tmp1, <32 x i16> undef, <32 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12, i32 19, i32 18, i32 17, i32 16, i32 23, i32 22, i32 21, i32 20, i32 27, i32 undef, i32 undef, i32 undef, i32 31, i32 30, i32 29, i32 undef>
639  store <32 x i16> %tmp2, ptr %a
640  ret void
641}
642
643define void @test_rev_elts_fail(ptr %a) {
644; CHECK-LABEL: test_rev_elts_fail:
645; CHECK:       // %bb.0:
646; CHECK-NEXT:    index z0.d, #1, #-1
647; CHECK-NEXT:    ldp q1, q2, [x0]
648; CHECK-NEXT:    tbl z1.d, { z1.d }, z0.d
649; CHECK-NEXT:    tbl z0.d, { z2.d }, z0.d
650; CHECK-NEXT:    stp q1, q0, [x0]
651; CHECK-NEXT:    ret
652;
653; NONEON-NOSVE-LABEL: test_rev_elts_fail:
654; NONEON-NOSVE:       // %bb.0:
655; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
656; NONEON-NOSVE-NEXT:    str q0, [sp, #-64]!
657; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
658; NONEON-NOSVE-NEXT:    ldp x9, x8, [sp]
659; NONEON-NOSVE-NEXT:    str q1, [sp, #32]
660; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #16]
661; NONEON-NOSVE-NEXT:    ldp x9, x8, [sp, #32]
662; NONEON-NOSVE-NEXT:    ldr q1, [sp, #16]
663; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #48]
664; NONEON-NOSVE-NEXT:    ldr q0, [sp, #48]
665; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
666; NONEON-NOSVE-NEXT:    add sp, sp, #64
667; NONEON-NOSVE-NEXT:    ret
668  %tmp1 = load <4 x i64>, ptr %a
669  %tmp2 = shufflevector <4 x i64> %tmp1, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
670  store <4 x i64> %tmp2, ptr %a
671  ret void
672}
673
674; This is the same test as above, but with sve2p1 it can use the REVD instruction to reverse
675; the double-words within quard-words.
676define void @test_revdv4i64_sve2p1(ptr %a) #1 {
677; CHECK-LABEL: test_revdv4i64_sve2p1:
678; CHECK:       // %bb.0:
679; CHECK-NEXT:    ldp q0, q1, [x0]
680; CHECK-NEXT:    ptrue p0.d, vl2
681; CHECK-NEXT:    revd z0.q, p0/m, z0.q
682; CHECK-NEXT:    revd z1.q, p0/m, z1.q
683; CHECK-NEXT:    stp q0, q1, [x0]
684; CHECK-NEXT:    ret
685;
686; NONEON-NOSVE-LABEL: test_revdv4i64_sve2p1:
687; NONEON-NOSVE:       // %bb.0:
688; NONEON-NOSVE-NEXT:    ldp q0, q1, [x0]
689; NONEON-NOSVE-NEXT:    ptrue p0.d, vl2
690; NONEON-NOSVE-NEXT:    revd z0.q, p0/m, z0.q
691; NONEON-NOSVE-NEXT:    revd z1.q, p0/m, z1.q
692; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
693; NONEON-NOSVE-NEXT:    ret
694  %tmp1 = load <4 x i64>, ptr %a
695  %tmp2 = shufflevector <4 x i64> %tmp1, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
696  store <4 x i64> %tmp2, ptr %a
697  ret void
698}
699
700define void @test_revdv4f64_sve2p1(ptr %a) #1 {
701; CHECK-LABEL: test_revdv4f64_sve2p1:
702; CHECK:       // %bb.0:
703; CHECK-NEXT:    ldp q0, q1, [x0]
704; CHECK-NEXT:    ptrue p0.d
705; CHECK-NEXT:    revd z0.q, p0/m, z0.q
706; CHECK-NEXT:    revd z1.q, p0/m, z1.q
707; CHECK-NEXT:    stp q0, q1, [x0]
708; CHECK-NEXT:    ret
709;
710; NONEON-NOSVE-LABEL: test_revdv4f64_sve2p1:
711; NONEON-NOSVE:       // %bb.0:
712; NONEON-NOSVE-NEXT:    ldp q0, q1, [x0]
713; NONEON-NOSVE-NEXT:    ptrue p0.d
714; NONEON-NOSVE-NEXT:    revd z0.q, p0/m, z0.q
715; NONEON-NOSVE-NEXT:    revd z1.q, p0/m, z1.q
716; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
717; NONEON-NOSVE-NEXT:    ret
718  %tmp1 = load <4 x double>, ptr %a
719  %tmp2 = shufflevector <4 x double> %tmp1, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
720  store <4 x double> %tmp2, ptr %a
721  ret void
722}
723
724define void @test_revv8i32(ptr %a) {
725; CHECK-LABEL: test_revv8i32:
726; CHECK:       // %bb.0:
727; CHECK-NEXT:    index z0.s, #3, #-1
728; CHECK-NEXT:    ldp q2, q1, [x0]
729; CHECK-NEXT:    tbl z1.s, { z1.s }, z0.s
730; CHECK-NEXT:    tbl z0.s, { z2.s }, z0.s
731; CHECK-NEXT:    stp q1, q0, [x0]
732; CHECK-NEXT:    ret
733;
734; NONEON-NOSVE-LABEL: test_revv8i32:
735; NONEON-NOSVE:       // %bb.0:
736; NONEON-NOSVE-NEXT:    sub sp, sp, #64
737; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
738; NONEON-NOSVE-NEXT:    ldp q0, q1, [x0]
739; NONEON-NOSVE-NEXT:    str q0, [sp, #32]
740; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #32]
741; NONEON-NOSVE-NEXT:    str q1, [sp]
742; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #56]
743; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #40]
744; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #48]
745; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp]
746; NONEON-NOSVE-NEXT:    ldr q1, [sp, #48]
747; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #24]
748; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #8]
749; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #16]
750; NONEON-NOSVE-NEXT:    ldr q0, [sp, #16]
751; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
752; NONEON-NOSVE-NEXT:    add sp, sp, #64
753; NONEON-NOSVE-NEXT:    ret
754  %tmp1 = load <8 x i32>, ptr %a
755  %tmp2 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
756  store <8 x i32> %tmp2, ptr %a
757  ret void
758}
759attributes #1 = { "target-features"="+sve2p1" }
760