xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm-dyadic.ll (revision 3100480925df10960c1e0a077dd9875037d3fe29)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
3
4define arm_aapcs_vfpcc <16 x i8> @test_vshrnbq_n_s16(<16 x i8> %a, <8 x i16> %b) {
5; CHECK-LABEL: test_vshrnbq_n_s16:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vshrnb.i16 q0, q1, #3
8; CHECK-NEXT:    bx lr
9entry:
10  %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0)
11  ret <16 x i8> %0
12}
13
14define arm_aapcs_vfpcc <8 x i16> @test_vshrnbq_n_s32(<8 x i16> %a, <4 x i32> %b) {
15; CHECK-LABEL: test_vshrnbq_n_s32:
16; CHECK:       @ %bb.0: @ %entry
17; CHECK-NEXT:    vshrnb.i32 q0, q1, #9
18; CHECK-NEXT:    bx lr
19entry:
20  %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 9, i32 0, i32 0, i32 0, i32 0, i32 0)
21  ret <8 x i16> %0
22}
23
24define arm_aapcs_vfpcc <16 x i8> @test_vshrnbq_n_u16(<16 x i8> %a, <8 x i16> %b) {
25; CHECK-LABEL: test_vshrnbq_n_u16:
26; CHECK:       @ %bb.0: @ %entry
27; CHECK-NEXT:    vshrnb.i16 q0, q1, #1
28; CHECK-NEXT:    bx lr
29entry:
30  %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 1, i32 0, i32 0, i32 1, i32 1, i32 0)
31  ret <16 x i8> %0
32}
33
34define arm_aapcs_vfpcc <8 x i16> @test_vshrnbq_n_u32(<8 x i16> %a, <4 x i32> %b) {
35; CHECK-LABEL: test_vshrnbq_n_u32:
36; CHECK:       @ %bb.0: @ %entry
37; CHECK-NEXT:    vshrnb.i32 q0, q1, #3
38; CHECK-NEXT:    bx lr
39entry:
40  %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 3, i32 0, i32 0, i32 1, i32 1, i32 0)
41  ret <8 x i16> %0
42}
43
44define arm_aapcs_vfpcc <16 x i8> @test_vshrntq_n_s16(<16 x i8> %a, <8 x i16> %b) {
45; CHECK-LABEL: test_vshrntq_n_s16:
46; CHECK:       @ %bb.0: @ %entry
47; CHECK-NEXT:    vshrnt.i16 q0, q1, #1
48; CHECK-NEXT:    bx lr
49entry:
50  %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 1, i32 0, i32 0, i32 0, i32 0, i32 1)
51  ret <16 x i8> %0
52}
53
54define arm_aapcs_vfpcc <8 x i16> @test_vshrntq_n_s32(<8 x i16> %a, <4 x i32> %b) {
55; CHECK-LABEL: test_vshrntq_n_s32:
56; CHECK:       @ %bb.0: @ %entry
57; CHECK-NEXT:    vshrnt.i32 q0, q1, #10
58; CHECK-NEXT:    bx lr
59entry:
60  %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 10, i32 0, i32 0, i32 0, i32 0, i32 1)
61  ret <8 x i16> %0
62}
63
64define arm_aapcs_vfpcc <16 x i8> @test_vshrntq_n_u16(<16 x i8> %a, <8 x i16> %b) {
65; CHECK-LABEL: test_vshrntq_n_u16:
66; CHECK:       @ %bb.0: @ %entry
67; CHECK-NEXT:    vshrnt.i16 q0, q1, #6
68; CHECK-NEXT:    bx lr
69entry:
70  %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 6, i32 0, i32 0, i32 1, i32 1, i32 1)
71  ret <16 x i8> %0
72}
73
74define arm_aapcs_vfpcc <8 x i16> @test_vshrntq_n_u32(<8 x i16> %a, <4 x i32> %b) {
75; CHECK-LABEL: test_vshrntq_n_u32:
76; CHECK:       @ %bb.0: @ %entry
77; CHECK-NEXT:    vshrnt.i32 q0, q1, #10
78; CHECK-NEXT:    bx lr
79entry:
80  %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 10, i32 0, i32 0, i32 1, i32 1, i32 1)
81  ret <8 x i16> %0
82}
83
84define arm_aapcs_vfpcc <16 x i8> @test_vshrnbq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
85; CHECK-LABEL: test_vshrnbq_m_n_s16:
86; CHECK:       @ %bb.0: @ %entry
87; CHECK-NEXT:    vmsr p0, r0
88; CHECK-NEXT:    vpst
89; CHECK-NEXT:    vshrnbt.i16 q0, q1, #4
90; CHECK-NEXT:    bx lr
91entry:
92  %0 = zext i16 %p to i32
93  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
94  %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, <8 x i1> %1)
95  ret <16 x i8> %2
96}
97
98define arm_aapcs_vfpcc <8 x i16> @test_vshrnbq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
99; CHECK-LABEL: test_vshrnbq_m_n_s32:
100; CHECK:       @ %bb.0: @ %entry
101; CHECK-NEXT:    vmsr p0, r0
102; CHECK-NEXT:    vpst
103; CHECK-NEXT:    vshrnbt.i32 q0, q1, #13
104; CHECK-NEXT:    bx lr
105entry:
106  %0 = zext i16 %p to i32
107  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
108  %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 13, i32 0, i32 0, i32 0, i32 0, i32 0, <4 x i1> %1)
109  ret <8 x i16> %2
110}
111
112define arm_aapcs_vfpcc <16 x i8> @test_vshrnbq_m_n_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
113; CHECK-LABEL: test_vshrnbq_m_n_u16:
114; CHECK:       @ %bb.0: @ %entry
115; CHECK-NEXT:    vmsr p0, r0
116; CHECK-NEXT:    vpst
117; CHECK-NEXT:    vshrnbt.i16 q0, q1, #7
118; CHECK-NEXT:    bx lr
119entry:
120  %0 = zext i16 %p to i32
121  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
122  %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 7, i32 0, i32 0, i32 1, i32 1, i32 0, <8 x i1> %1)
123  ret <16 x i8> %2
124}
125
126define arm_aapcs_vfpcc <8 x i16> @test_vshrnbq_m_n_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
127; CHECK-LABEL: test_vshrnbq_m_n_u32:
128; CHECK:       @ %bb.0: @ %entry
129; CHECK-NEXT:    vmsr p0, r0
130; CHECK-NEXT:    vpst
131; CHECK-NEXT:    vshrnbt.i32 q0, q1, #15
132; CHECK-NEXT:    bx lr
133entry:
134  %0 = zext i16 %p to i32
135  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
136  %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 15, i32 0, i32 0, i32 1, i32 1, i32 0, <4 x i1> %1)
137  ret <8 x i16> %2
138}
139
140define arm_aapcs_vfpcc <16 x i8> @test_vshrntq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
141; CHECK-LABEL: test_vshrntq_m_n_s16:
142; CHECK:       @ %bb.0: @ %entry
143; CHECK-NEXT:    vmsr p0, r0
144; CHECK-NEXT:    vpst
145; CHECK-NEXT:    vshrntt.i16 q0, q1, #6
146; CHECK-NEXT:    bx lr
147entry:
148  %0 = zext i16 %p to i32
149  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
150  %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 6, i32 0, i32 0, i32 0, i32 0, i32 1, <8 x i1> %1)
151  ret <16 x i8> %2
152}
153
154define arm_aapcs_vfpcc <8 x i16> @test_vshrntq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
155; CHECK-LABEL: test_vshrntq_m_n_s32:
156; CHECK:       @ %bb.0: @ %entry
157; CHECK-NEXT:    vmsr p0, r0
158; CHECK-NEXT:    vpst
159; CHECK-NEXT:    vshrntt.i32 q0, q1, #13
160; CHECK-NEXT:    bx lr
161entry:
162  %0 = zext i16 %p to i32
163  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
164  %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 13, i32 0, i32 0, i32 0, i32 0, i32 1, <4 x i1> %1)
165  ret <8 x i16> %2
166}
167
168define arm_aapcs_vfpcc <16 x i8> @test_vshrntq_m_n_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
169; CHECK-LABEL: test_vshrntq_m_n_u16:
170; CHECK:       @ %bb.0: @ %entry
171; CHECK-NEXT:    vmsr p0, r0
172; CHECK-NEXT:    vpst
173; CHECK-NEXT:    vshrntt.i16 q0, q1, #1
174; CHECK-NEXT:    bx lr
175entry:
176  %0 = zext i16 %p to i32
177  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
178  %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 1, i32 0, i32 0, i32 1, i32 1, i32 1, <8 x i1> %1)
179  ret <16 x i8> %2
180}
181
182define arm_aapcs_vfpcc <8 x i16> @test_vshrntq_m_n_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
183; CHECK-LABEL: test_vshrntq_m_n_u32:
184; CHECK:       @ %bb.0: @ %entry
185; CHECK-NEXT:    vmsr p0, r0
186; CHECK-NEXT:    vpst
187; CHECK-NEXT:    vshrntt.i32 q0, q1, #10
188; CHECK-NEXT:    bx lr
189entry:
190  %0 = zext i16 %p to i32
191  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
192  %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 10, i32 0, i32 0, i32 1, i32 1, i32 1, <4 x i1> %1)
193  ret <8 x i16> %2
194}
195
196define arm_aapcs_vfpcc <16 x i8> @test_vrshrnbq_n_s16(<16 x i8> %a, <8 x i16> %b) {
197; CHECK-LABEL: test_vrshrnbq_n_s16:
198; CHECK:       @ %bb.0: @ %entry
199; CHECK-NEXT:    vrshrnb.i16 q0, q1, #5
200; CHECK-NEXT:    bx lr
201entry:
202  %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 5, i32 0, i32 1, i32 0, i32 0, i32 0)
203  ret <16 x i8> %0
204}
205
206define arm_aapcs_vfpcc <8 x i16> @test_vrshrnbq_n_s32(<8 x i16> %a, <4 x i32> %b) {
207; CHECK-LABEL: test_vrshrnbq_n_s32:
208; CHECK:       @ %bb.0: @ %entry
209; CHECK-NEXT:    vrshrnb.i32 q0, q1, #10
210; CHECK-NEXT:    bx lr
211entry:
212  %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 10, i32 0, i32 1, i32 0, i32 0, i32 0)
213  ret <8 x i16> %0
214}
215
216define arm_aapcs_vfpcc <16 x i8> @test_vrshrnbq_n_u16(<16 x i8> %a, <8 x i16> %b) {
217; CHECK-LABEL: test_vrshrnbq_n_u16:
218; CHECK:       @ %bb.0: @ %entry
219; CHECK-NEXT:    vrshrnb.i16 q0, q1, #2
220; CHECK-NEXT:    bx lr
221entry:
222  %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 2, i32 0, i32 1, i32 1, i32 1, i32 0)
223  ret <16 x i8> %0
224}
225
226define arm_aapcs_vfpcc <8 x i16> @test_vrshrnbq_n_u32(<8 x i16> %a, <4 x i32> %b) {
227; CHECK-LABEL: test_vrshrnbq_n_u32:
228; CHECK:       @ %bb.0: @ %entry
229; CHECK-NEXT:    vrshrnb.i32 q0, q1, #12
230; CHECK-NEXT:    bx lr
231entry:
232  %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 12, i32 0, i32 1, i32 1, i32 1, i32 0)
233  ret <8 x i16> %0
234}
235
236define arm_aapcs_vfpcc <16 x i8> @test_vrshrntq_n_s16(<16 x i8> %a, <8 x i16> %b) {
237; CHECK-LABEL: test_vrshrntq_n_s16:
238; CHECK:       @ %bb.0: @ %entry
239; CHECK-NEXT:    vrshrnt.i16 q0, q1, #4
240; CHECK-NEXT:    bx lr
241entry:
242  %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 4, i32 0, i32 1, i32 0, i32 0, i32 1)
243  ret <16 x i8> %0
244}
245
246define arm_aapcs_vfpcc <8 x i16> @test_vrshrntq_n_s32(<8 x i16> %a, <4 x i32> %b) {
247; CHECK-LABEL: test_vrshrntq_n_s32:
248; CHECK:       @ %bb.0: @ %entry
249; CHECK-NEXT:    vrshrnt.i32 q0, q1, #11
250; CHECK-NEXT:    bx lr
251entry:
252  %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 11, i32 0, i32 1, i32 0, i32 0, i32 1)
253  ret <8 x i16> %0
254}
255
256define arm_aapcs_vfpcc <16 x i8> @test_vrshrntq_n_u16(<16 x i8> %a, <8 x i16> %b) {
257; CHECK-LABEL: test_vrshrntq_n_u16:
258; CHECK:       @ %bb.0: @ %entry
259; CHECK-NEXT:    vrshrnt.i16 q0, q1, #1
260; CHECK-NEXT:    bx lr
261entry:
262  %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 1, i32 0, i32 1, i32 1, i32 1, i32 1)
263  ret <16 x i8> %0
264}
265
266define arm_aapcs_vfpcc <8 x i16> @test_vrshrntq_n_u32(<8 x i16> %a, <4 x i32> %b) {
267; CHECK-LABEL: test_vrshrntq_n_u32:
268; CHECK:       @ %bb.0: @ %entry
269; CHECK-NEXT:    vrshrnt.i32 q0, q1, #6
270; CHECK-NEXT:    bx lr
271entry:
272  %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 6, i32 0, i32 1, i32 1, i32 1, i32 1)
273  ret <8 x i16> %0
274}
275
276define arm_aapcs_vfpcc <16 x i8> @test_vrshrnbq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
277; CHECK-LABEL: test_vrshrnbq_m_n_s16:
278; CHECK:       @ %bb.0: @ %entry
279; CHECK-NEXT:    vmsr p0, r0
280; CHECK-NEXT:    vpst
281; CHECK-NEXT:    vrshrnbt.i16 q0, q1, #1
282; CHECK-NEXT:    bx lr
283entry:
284  %0 = zext i16 %p to i32
285  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
286  %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0, <8 x i1> %1)
287  ret <16 x i8> %2
288}
289
290define arm_aapcs_vfpcc <8 x i16> @test_vrshrnbq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
291; CHECK-LABEL: test_vrshrnbq_m_n_s32:
292; CHECK:       @ %bb.0: @ %entry
293; CHECK-NEXT:    vmsr p0, r0
294; CHECK-NEXT:    vpst
295; CHECK-NEXT:    vrshrnbt.i32 q0, q1, #14
296; CHECK-NEXT:    bx lr
297entry:
298  %0 = zext i16 %p to i32
299  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
300  %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 14, i32 0, i32 1, i32 0, i32 0, i32 0, <4 x i1> %1)
301  ret <8 x i16> %2
302}
303
304define arm_aapcs_vfpcc <16 x i8> @test_vrshrnbq_m_n_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
305; CHECK-LABEL: test_vrshrnbq_m_n_u16:
306; CHECK:       @ %bb.0: @ %entry
307; CHECK-NEXT:    vmsr p0, r0
308; CHECK-NEXT:    vpst
309; CHECK-NEXT:    vrshrnbt.i16 q0, q1, #2
310; CHECK-NEXT:    bx lr
311entry:
312  %0 = zext i16 %p to i32
313  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
314  %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 2, i32 0, i32 1, i32 1, i32 1, i32 0, <8 x i1> %1)
315  ret <16 x i8> %2
316}
317
318define arm_aapcs_vfpcc <8 x i16> @test_vrshrnbq_m_n_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
319; CHECK-LABEL: test_vrshrnbq_m_n_u32:
320; CHECK:       @ %bb.0: @ %entry
321; CHECK-NEXT:    vmsr p0, r0
322; CHECK-NEXT:    vpst
323; CHECK-NEXT:    vrshrnbt.i32 q0, q1, #12
324; CHECK-NEXT:    bx lr
325entry:
326  %0 = zext i16 %p to i32
327  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
328  %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 12, i32 0, i32 1, i32 1, i32 1, i32 0, <4 x i1> %1)
329  ret <8 x i16> %2
330}
331
332define arm_aapcs_vfpcc <16 x i8> @test_vrshrntq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
333; CHECK-LABEL: test_vrshrntq_m_n_s16:
334; CHECK:       @ %bb.0: @ %entry
335; CHECK-NEXT:    vmsr p0, r0
336; CHECK-NEXT:    vpst
337; CHECK-NEXT:    vrshrntt.i16 q0, q1, #4
338; CHECK-NEXT:    bx lr
339entry:
340  %0 = zext i16 %p to i32
341  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
342  %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 4, i32 0, i32 1, i32 0, i32 0, i32 1, <8 x i1> %1)
343  ret <16 x i8> %2
344}
345
346define arm_aapcs_vfpcc <8 x i16> @test_vrshrntq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
347; CHECK-LABEL: test_vrshrntq_m_n_s32:
348; CHECK:       @ %bb.0: @ %entry
349; CHECK-NEXT:    vmsr p0, r0
350; CHECK-NEXT:    vpst
351; CHECK-NEXT:    vrshrntt.i32 q0, q1, #6
352; CHECK-NEXT:    bx lr
353entry:
354  %0 = zext i16 %p to i32
355  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
356  %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 6, i32 0, i32 1, i32 0, i32 0, i32 1, <4 x i1> %1)
357  ret <8 x i16> %2
358}
359
360define arm_aapcs_vfpcc <16 x i8> @test_vrshrntq_m_n_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
361; CHECK-LABEL: test_vrshrntq_m_n_u16:
362; CHECK:       @ %bb.0: @ %entry
363; CHECK-NEXT:    vmsr p0, r0
364; CHECK-NEXT:    vpst
365; CHECK-NEXT:    vrshrntt.i16 q0, q1, #6
366; CHECK-NEXT:    bx lr
367entry:
368  %0 = zext i16 %p to i32
369  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
370  %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 6, i32 0, i32 1, i32 1, i32 1, i32 1, <8 x i1> %1)
371  ret <16 x i8> %2
372}
373
374define arm_aapcs_vfpcc <8 x i16> @test_vrshrntq_m_n_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
375; CHECK-LABEL: test_vrshrntq_m_n_u32:
376; CHECK:       @ %bb.0: @ %entry
377; CHECK-NEXT:    vmsr p0, r0
378; CHECK-NEXT:    vpst
379; CHECK-NEXT:    vrshrntt.i32 q0, q1, #10
380; CHECK-NEXT:    bx lr
381entry:
382  %0 = zext i16 %p to i32
383  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
384  %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 10, i32 0, i32 1, i32 1, i32 1, i32 1, <4 x i1> %1)
385  ret <8 x i16> %2
386}
387
388define arm_aapcs_vfpcc <16 x i8> @test_vqshrnbq_n_s16(<16 x i8> %a, <8 x i16> %b) {
389; CHECK-LABEL: test_vqshrnbq_n_s16:
390; CHECK:       @ %bb.0: @ %entry
391; CHECK-NEXT:    vqshrnb.s16 q0, q1, #7
392; CHECK-NEXT:    bx lr
393entry:
394  %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 7, i32 1, i32 0, i32 0, i32 0, i32 0)
395  ret <16 x i8> %0
396}
397
398define arm_aapcs_vfpcc <8 x i16> @test_vqshrnbq_n_s32(<8 x i16> %a, <4 x i32> %b) {
399; CHECK-LABEL: test_vqshrnbq_n_s32:
400; CHECK:       @ %bb.0: @ %entry
401; CHECK-NEXT:    vqshrnb.s32 q0, q1, #15
402; CHECK-NEXT:    bx lr
403entry:
404  %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 15, i32 1, i32 0, i32 0, i32 0, i32 0)
405  ret <8 x i16> %0
406}
407
408define arm_aapcs_vfpcc <16 x i8> @test_vqshrnbq_n_u16(<16 x i8> %a, <8 x i16> %b) {
409; CHECK-LABEL: test_vqshrnbq_n_u16:
410; CHECK:       @ %bb.0: @ %entry
411; CHECK-NEXT:    vqshrnb.u16 q0, q1, #3
412; CHECK-NEXT:    bx lr
413entry:
414  %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 3, i32 1, i32 0, i32 1, i32 1, i32 0)
415  ret <16 x i8> %0
416}
417
418define arm_aapcs_vfpcc <8 x i16> @test_vqshrnbq_n_u32(<8 x i16> %a, <4 x i32> %b) {
419; CHECK-LABEL: test_vqshrnbq_n_u32:
420; CHECK:       @ %bb.0: @ %entry
421; CHECK-NEXT:    vqshrnb.u32 q0, q1, #3
422; CHECK-NEXT:    bx lr
423entry:
424  %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 3, i32 1, i32 0, i32 1, i32 1, i32 0)
425  ret <8 x i16> %0
426}
427
428define arm_aapcs_vfpcc <16 x i8> @test_vqshrntq_n_s16(<16 x i8> %a, <8 x i16> %b) {
429; CHECK-LABEL: test_vqshrntq_n_s16:
430; CHECK:       @ %bb.0: @ %entry
431; CHECK-NEXT:    vqshrnt.s16 q0, q1, #5
432; CHECK-NEXT:    bx lr
433entry:
434  %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 5, i32 1, i32 0, i32 0, i32 0, i32 1)
435  ret <16 x i8> %0
436}
437
438define arm_aapcs_vfpcc <8 x i16> @test_vqshrntq_n_s32(<8 x i16> %a, <4 x i32> %b) {
439; CHECK-LABEL: test_vqshrntq_n_s32:
440; CHECK:       @ %bb.0: @ %entry
441; CHECK-NEXT:    vqshrnt.s32 q0, q1, #6
442; CHECK-NEXT:    bx lr
443entry:
444  %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 6, i32 1, i32 0, i32 0, i32 0, i32 1)
445  ret <8 x i16> %0
446}
447
448define arm_aapcs_vfpcc <16 x i8> @test_vqshrntq_n_u16(<16 x i8> %a, <8 x i16> %b) {
449; CHECK-LABEL: test_vqshrntq_n_u16:
450; CHECK:       @ %bb.0: @ %entry
451; CHECK-NEXT:    vqshrnt.u16 q0, q1, #1
452; CHECK-NEXT:    bx lr
453entry:
454  %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 1, i32 1, i32 0, i32 1, i32 1, i32 1)
455  ret <16 x i8> %0
456}
457
458define arm_aapcs_vfpcc <8 x i16> @test_vqshrntq_n_u32(<8 x i16> %a, <4 x i32> %b) {
459; CHECK-LABEL: test_vqshrntq_n_u32:
460; CHECK:       @ %bb.0: @ %entry
461; CHECK-NEXT:    vqshrnt.u32 q0, q1, #15
462; CHECK-NEXT:    bx lr
463entry:
464  %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 15, i32 1, i32 0, i32 1, i32 1, i32 1)
465  ret <8 x i16> %0
466}
467
468define arm_aapcs_vfpcc <16 x i8> @test_vqshrnbq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
469; CHECK-LABEL: test_vqshrnbq_m_n_s16:
470; CHECK:       @ %bb.0: @ %entry
471; CHECK-NEXT:    vmsr p0, r0
472; CHECK-NEXT:    vpst
473; CHECK-NEXT:    vqshrnbt.s16 q0, q1, #7
474; CHECK-NEXT:    bx lr
475entry:
476  %0 = zext i16 %p to i32
477  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
478  %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 7, i32 1, i32 0, i32 0, i32 0, i32 0, <8 x i1> %1)
479  ret <16 x i8> %2
480}
481
482define arm_aapcs_vfpcc <8 x i16> @test_vqshrnbq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
483; CHECK-LABEL: test_vqshrnbq_m_n_s32:
484; CHECK:       @ %bb.0: @ %entry
485; CHECK-NEXT:    vmsr p0, r0
486; CHECK-NEXT:    vpst
487; CHECK-NEXT:    vqshrnbt.s32 q0, q1, #1
488; CHECK-NEXT:    bx lr
489entry:
490  %0 = zext i16 %p to i32
491  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
492  %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, <4 x i1> %1)
493  ret <8 x i16> %2
494}
495
496define arm_aapcs_vfpcc <16 x i8> @test_vqshrnbq_m_n_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
497; CHECK-LABEL: test_vqshrnbq_m_n_u16:
498; CHECK:       @ %bb.0: @ %entry
499; CHECK-NEXT:    vmsr p0, r0
500; CHECK-NEXT:    vpst
501; CHECK-NEXT:    vqshrnbt.u16 q0, q1, #1
502; CHECK-NEXT:    bx lr
503entry:
504  %0 = zext i16 %p to i32
505  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
506  %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 1, i32 1, i32 0, i32 1, i32 1, i32 0, <8 x i1> %1)
507  ret <16 x i8> %2
508}
509
510define arm_aapcs_vfpcc <8 x i16> @test_vqshrnbq_m_n_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
511; CHECK-LABEL: test_vqshrnbq_m_n_u32:
512; CHECK:       @ %bb.0: @ %entry
513; CHECK-NEXT:    vmsr p0, r0
514; CHECK-NEXT:    vpst
515; CHECK-NEXT:    vqshrnbt.u32 q0, q1, #8
516; CHECK-NEXT:    bx lr
517entry:
518  %0 = zext i16 %p to i32
519  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
520  %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 8, i32 1, i32 0, i32 1, i32 1, i32 0, <4 x i1> %1)
521  ret <8 x i16> %2
522}
523
524define arm_aapcs_vfpcc <16 x i8> @test_vqshrntq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
525; CHECK-LABEL: test_vqshrntq_m_n_s16:
526; CHECK:       @ %bb.0: @ %entry
527; CHECK-NEXT:    vmsr p0, r0
528; CHECK-NEXT:    vpst
529; CHECK-NEXT:    vqshrntt.s16 q0, q1, #1
530; CHECK-NEXT:    bx lr
531entry:
532  %0 = zext i16 %p to i32
533  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
534  %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 1, i32 1, i32 0, i32 0, i32 0, i32 1, <8 x i1> %1)
535  ret <16 x i8> %2
536}
537
538define arm_aapcs_vfpcc <8 x i16> @test_vqshrntq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
539; CHECK-LABEL: test_vqshrntq_m_n_s32:
540; CHECK:       @ %bb.0: @ %entry
541; CHECK-NEXT:    vmsr p0, r0
542; CHECK-NEXT:    vpst
543; CHECK-NEXT:    vqshrntt.s32 q0, q1, #11
544; CHECK-NEXT:    bx lr
545entry:
546  %0 = zext i16 %p to i32
547  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
548  %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 11, i32 1, i32 0, i32 0, i32 0, i32 1, <4 x i1> %1)
549  ret <8 x i16> %2
550}
551
552define arm_aapcs_vfpcc <16 x i8> @test_vqshrntq_m_n_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
553; CHECK-LABEL: test_vqshrntq_m_n_u16:
554; CHECK:       @ %bb.0: @ %entry
555; CHECK-NEXT:    vmsr p0, r0
556; CHECK-NEXT:    vpst
557; CHECK-NEXT:    vqshrntt.u16 q0, q1, #3
558; CHECK-NEXT:    bx lr
559entry:
560  %0 = zext i16 %p to i32
561  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
562  %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 3, i32 1, i32 0, i32 1, i32 1, i32 1, <8 x i1> %1)
563  ret <16 x i8> %2
564}
565
566define arm_aapcs_vfpcc <8 x i16> @test_vqshrntq_m_n_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
567; CHECK-LABEL: test_vqshrntq_m_n_u32:
568; CHECK:       @ %bb.0: @ %entry
569; CHECK-NEXT:    vmsr p0, r0
570; CHECK-NEXT:    vpst
571; CHECK-NEXT:    vqshrntt.u32 q0, q1, #1
572; CHECK-NEXT:    bx lr
573entry:
574  %0 = zext i16 %p to i32
575  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
576  %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 1, i32 1, i32 0, i32 1, i32 1, i32 1, <4 x i1> %1)
577  ret <8 x i16> %2
578}
579
580define arm_aapcs_vfpcc <16 x i8> @test_vqshrunbq_n_s16(<16 x i8> %a, <8 x i16> %b) {
581; CHECK-LABEL: test_vqshrunbq_n_s16:
582; CHECK:       @ %bb.0: @ %entry
583; CHECK-NEXT:    vqshrunb.s16 q0, q1, #5
584; CHECK-NEXT:    bx lr
585entry:
586  %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 5, i32 1, i32 0, i32 1, i32 0, i32 0)
587  ret <16 x i8> %0
588}
589
590define arm_aapcs_vfpcc <8 x i16> @test_vqshrunbq_n_s32(<8 x i16> %a, <4 x i32> %b) {
591; CHECK-LABEL: test_vqshrunbq_n_s32:
592; CHECK:       @ %bb.0: @ %entry
593; CHECK-NEXT:    vqshrunb.s32 q0, q1, #13
594; CHECK-NEXT:    bx lr
595entry:
596  %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 13, i32 1, i32 0, i32 1, i32 0, i32 0)
597  ret <8 x i16> %0
598}
599
600define arm_aapcs_vfpcc <16 x i8> @test_vqshruntq_n_s16(<16 x i8> %a, <8 x i16> %b) {
601; CHECK-LABEL: test_vqshruntq_n_s16:
602; CHECK:       @ %bb.0: @ %entry
603; CHECK-NEXT:    vqshrunt.s16 q0, q1, #2
604; CHECK-NEXT:    bx lr
605entry:
606  %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 2, i32 1, i32 0, i32 1, i32 0, i32 1)
607  ret <16 x i8> %0
608}
609
610define arm_aapcs_vfpcc <8 x i16> @test_vqshruntq_n_s32(<8 x i16> %a, <4 x i32> %b) {
611; CHECK-LABEL: test_vqshruntq_n_s32:
612; CHECK:       @ %bb.0: @ %entry
613; CHECK-NEXT:    vqshrunt.s32 q0, q1, #7
614; CHECK-NEXT:    bx lr
615entry:
616  %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 7, i32 1, i32 0, i32 1, i32 0, i32 1)
617  ret <8 x i16> %0
618}
619
620define arm_aapcs_vfpcc <16 x i8> @test_vqshrunbq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
621; CHECK-LABEL: test_vqshrunbq_m_n_s16:
622; CHECK:       @ %bb.0: @ %entry
623; CHECK-NEXT:    vmsr p0, r0
624; CHECK-NEXT:    vpst
625; CHECK-NEXT:    vqshrunbt.s16 q0, q1, #7
626; CHECK-NEXT:    bx lr
627entry:
628  %0 = zext i16 %p to i32
629  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
630  %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 7, i32 1, i32 0, i32 1, i32 0, i32 0, <8 x i1> %1)
631  ret <16 x i8> %2
632}
633
634define arm_aapcs_vfpcc <8 x i16> @test_vqshrunbq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
635; CHECK-LABEL: test_vqshrunbq_m_n_s32:
636; CHECK:       @ %bb.0: @ %entry
637; CHECK-NEXT:    vmsr p0, r0
638; CHECK-NEXT:    vpst
639; CHECK-NEXT:    vqshrunbt.s32 q0, q1, #7
640; CHECK-NEXT:    bx lr
641entry:
642  %0 = zext i16 %p to i32
643  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
644  %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 7, i32 1, i32 0, i32 1, i32 0, i32 0, <4 x i1> %1)
645  ret <8 x i16> %2
646}
647
648define arm_aapcs_vfpcc <16 x i8> @test_vqshruntq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
649; CHECK-LABEL: test_vqshruntq_m_n_s16:
650; CHECK:       @ %bb.0: @ %entry
651; CHECK-NEXT:    vmsr p0, r0
652; CHECK-NEXT:    vpst
653; CHECK-NEXT:    vqshruntt.s16 q0, q1, #7
654; CHECK-NEXT:    bx lr
655entry:
656  %0 = zext i16 %p to i32
657  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
658  %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 7, i32 1, i32 0, i32 1, i32 0, i32 1, <8 x i1> %1)
659  ret <16 x i8> %2
660}
661
662define arm_aapcs_vfpcc <8 x i16> @test_vqshruntq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
663; CHECK-LABEL: test_vqshruntq_m_n_s32:
664; CHECK:       @ %bb.0: @ %entry
665; CHECK-NEXT:    vmsr p0, r0
666; CHECK-NEXT:    vpst
667; CHECK-NEXT:    vqshruntt.s32 q0, q1, #7
668; CHECK-NEXT:    bx lr
669entry:
670  %0 = zext i16 %p to i32
671  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
672  %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 7, i32 1, i32 0, i32 1, i32 0, i32 1, <4 x i1> %1)
673  ret <8 x i16> %2
674}
675
676define arm_aapcs_vfpcc <16 x i8> @test_vqrshrnbq_n_s16(<16 x i8> %a, <8 x i16> %b) {
677; CHECK-LABEL: test_vqrshrnbq_n_s16:
678; CHECK:       @ %bb.0: @ %entry
679; CHECK-NEXT:    vqrshrnb.s16 q0, q1, #5
680; CHECK-NEXT:    bx lr
681entry:
682  %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 5, i32 1, i32 1, i32 0, i32 0, i32 0)
683  ret <16 x i8> %0
684}
685
686define arm_aapcs_vfpcc <8 x i16> @test_vqrshrnbq_n_s32(<8 x i16> %a, <4 x i32> %b) {
687; CHECK-LABEL: test_vqrshrnbq_n_s32:
688; CHECK:       @ %bb.0: @ %entry
689; CHECK-NEXT:    vqrshrnb.s32 q0, q1, #13
690; CHECK-NEXT:    bx lr
691entry:
692  %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 13, i32 1, i32 1, i32 0, i32 0, i32 0)
693  ret <8 x i16> %0
694}
695
696define arm_aapcs_vfpcc <16 x i8> @test_vqrshrnbq_n_u16(<16 x i8> %a, <8 x i16> %b) {
697; CHECK-LABEL: test_vqrshrnbq_n_u16:
698; CHECK:       @ %bb.0: @ %entry
699; CHECK-NEXT:    vqrshrnb.u16 q0, q1, #7
700; CHECK-NEXT:    bx lr
701entry:
702  %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 7, i32 1, i32 1, i32 1, i32 1, i32 0)
703  ret <16 x i8> %0
704}
705
706define arm_aapcs_vfpcc <8 x i16> @test_vqrshrnbq_n_u32(<8 x i16> %a, <4 x i32> %b) {
707; CHECK-LABEL: test_vqrshrnbq_n_u32:
708; CHECK:       @ %bb.0: @ %entry
709; CHECK-NEXT:    vqrshrnb.u32 q0, q1, #8
710; CHECK-NEXT:    bx lr
711entry:
712  %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 8, i32 1, i32 1, i32 1, i32 1, i32 0)
713  ret <8 x i16> %0
714}
715
716define arm_aapcs_vfpcc <16 x i8> @test_vqrshrntq_n_s16(<16 x i8> %a, <8 x i16> %b) {
717; CHECK-LABEL: test_vqrshrntq_n_s16:
718; CHECK:       @ %bb.0: @ %entry
719; CHECK-NEXT:    vqrshrnt.s16 q0, q1, #7
720; CHECK-NEXT:    bx lr
721entry:
722  %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 7, i32 1, i32 1, i32 0, i32 0, i32 1)
723  ret <16 x i8> %0
724}
725
726define arm_aapcs_vfpcc <8 x i16> @test_vqrshrntq_n_s32(<8 x i16> %a, <4 x i32> %b) {
727; CHECK-LABEL: test_vqrshrntq_n_s32:
728; CHECK:       @ %bb.0: @ %entry
729; CHECK-NEXT:    vqrshrnt.s32 q0, q1, #2
730; CHECK-NEXT:    bx lr
731entry:
732  %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 2, i32 1, i32 1, i32 0, i32 0, i32 1)
733  ret <8 x i16> %0
734}
735
736define arm_aapcs_vfpcc <16 x i8> @test_vqrshrntq_n_u16(<16 x i8> %a, <8 x i16> %b) {
737; CHECK-LABEL: test_vqrshrntq_n_u16:
738; CHECK:       @ %bb.0: @ %entry
739; CHECK-NEXT:    vqrshrnt.u16 q0, q1, #1
740; CHECK-NEXT:    bx lr
741entry:
742  %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1)
743  ret <16 x i8> %0
744}
745
746define arm_aapcs_vfpcc <8 x i16> @test_vqrshrntq_n_u32(<8 x i16> %a, <4 x i32> %b) {
747; CHECK-LABEL: test_vqrshrntq_n_u32:
748; CHECK:       @ %bb.0: @ %entry
749; CHECK-NEXT:    vqrshrnt.u32 q0, q1, #11
750; CHECK-NEXT:    bx lr
751entry:
752  %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 11, i32 1, i32 1, i32 1, i32 1, i32 1)
753  ret <8 x i16> %0
754}
755
756define arm_aapcs_vfpcc <16 x i8> @test_vqrshrnbq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
757; CHECK-LABEL: test_vqrshrnbq_m_n_s16:
758; CHECK:       @ %bb.0: @ %entry
759; CHECK-NEXT:    vmsr p0, r0
760; CHECK-NEXT:    vpst
761; CHECK-NEXT:    vqrshrnbt.s16 q0, q1, #2
762; CHECK-NEXT:    bx lr
763entry:
764  %0 = zext i16 %p to i32
765  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
766  %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 2, i32 1, i32 1, i32 0, i32 0, i32 0, <8 x i1> %1)
767  ret <16 x i8> %2
768}
769
770define arm_aapcs_vfpcc <8 x i16> @test_vqrshrnbq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
771; CHECK-LABEL: test_vqrshrnbq_m_n_s32:
772; CHECK:       @ %bb.0: @ %entry
773; CHECK-NEXT:    vmsr p0, r0
774; CHECK-NEXT:    vpst
775; CHECK-NEXT:    vqrshrnbt.s32 q0, q1, #12
776; CHECK-NEXT:    bx lr
777entry:
778  %0 = zext i16 %p to i32
779  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
780  %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 12, i32 1, i32 1, i32 0, i32 0, i32 0, <4 x i1> %1)
781  ret <8 x i16> %2
782}
783
784define arm_aapcs_vfpcc <16 x i8> @test_vqrshrnbq_m_n_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
785; CHECK-LABEL: test_vqrshrnbq_m_n_u16:
786; CHECK:       @ %bb.0: @ %entry
787; CHECK-NEXT:    vmsr p0, r0
788; CHECK-NEXT:    vpst
789; CHECK-NEXT:    vqrshrnbt.u16 q0, q1, #5
790; CHECK-NEXT:    bx lr
791entry:
792  %0 = zext i16 %p to i32
793  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
794  %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 5, i32 1, i32 1, i32 1, i32 1, i32 0, <8 x i1> %1)
795  ret <16 x i8> %2
796}
797
798define arm_aapcs_vfpcc <8 x i16> @test_vqrshrnbq_m_n_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
799; CHECK-LABEL: test_vqrshrnbq_m_n_u32:
800; CHECK:       @ %bb.0: @ %entry
801; CHECK-NEXT:    vmsr p0, r0
802; CHECK-NEXT:    vpst
803; CHECK-NEXT:    vqrshrnbt.u32 q0, q1, #11
804; CHECK-NEXT:    bx lr
805entry:
806  %0 = zext i16 %p to i32
807  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
808  %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 11, i32 1, i32 1, i32 1, i32 1, i32 0, <4 x i1> %1)
809  ret <8 x i16> %2
810}
811
812define arm_aapcs_vfpcc <16 x i8> @test_vqrshrntq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
813; CHECK-LABEL: test_vqrshrntq_m_n_s16:
814; CHECK:       @ %bb.0: @ %entry
815; CHECK-NEXT:    vmsr p0, r0
816; CHECK-NEXT:    vpst
817; CHECK-NEXT:    vqrshrntt.s16 q0, q1, #4
818; CHECK-NEXT:    bx lr
819entry:
820  %0 = zext i16 %p to i32
821  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
822  %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 4, i32 1, i32 1, i32 0, i32 0, i32 1, <8 x i1> %1)
823  ret <16 x i8> %2
824}
825
826define arm_aapcs_vfpcc <8 x i16> @test_vqrshrntq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
827; CHECK-LABEL: test_vqrshrntq_m_n_s32:
828; CHECK:       @ %bb.0: @ %entry
829; CHECK-NEXT:    vmsr p0, r0
830; CHECK-NEXT:    vpst
831; CHECK-NEXT:    vqrshrntt.s32 q0, q1, #6
832; CHECK-NEXT:    bx lr
833entry:
834  %0 = zext i16 %p to i32
835  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
836  %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 6, i32 1, i32 1, i32 0, i32 0, i32 1, <4 x i1> %1)
837  ret <8 x i16> %2
838}
839
840define arm_aapcs_vfpcc <16 x i8> @test_vqrshrntq_m_n_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
841; CHECK-LABEL: test_vqrshrntq_m_n_u16:
842; CHECK:       @ %bb.0: @ %entry
843; CHECK-NEXT:    vmsr p0, r0
844; CHECK-NEXT:    vpst
845; CHECK-NEXT:    vqrshrntt.u16 q0, q1, #7
846; CHECK-NEXT:    bx lr
847entry:
848  %0 = zext i16 %p to i32
849  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
850  %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 7, i32 1, i32 1, i32 1, i32 1, i32 1, <8 x i1> %1)
851  ret <16 x i8> %2
852}
853
854define arm_aapcs_vfpcc <8 x i16> @test_vqrshrntq_m_n_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
855; CHECK-LABEL: test_vqrshrntq_m_n_u32:
856; CHECK:       @ %bb.0: @ %entry
857; CHECK-NEXT:    vmsr p0, r0
858; CHECK-NEXT:    vpst
859; CHECK-NEXT:    vqrshrntt.u32 q0, q1, #15
860; CHECK-NEXT:    bx lr
861entry:
862  %0 = zext i16 %p to i32
863  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
864  %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 15, i32 1, i32 1, i32 1, i32 1, i32 1, <4 x i1> %1)
865  ret <8 x i16> %2
866}
867
868define arm_aapcs_vfpcc <16 x i8> @test_vqrshrunbq_n_s16(<16 x i8> %a, <8 x i16> %b) {
869; CHECK-LABEL: test_vqrshrunbq_n_s16:
870; CHECK:       @ %bb.0: @ %entry
871; CHECK-NEXT:    vqshrunb.s16 q0, q1, #7
872; CHECK-NEXT:    bx lr
873entry:
874  %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 7, i32 1, i32 0, i32 1, i32 0, i32 0)
875  ret <16 x i8> %0
876}
877
878define arm_aapcs_vfpcc <8 x i16> @test_vqrshrunbq_n_s32(<8 x i16> %a, <4 x i32> %b) {
879; CHECK-LABEL: test_vqrshrunbq_n_s32:
880; CHECK:       @ %bb.0: @ %entry
881; CHECK-NEXT:    vqshrunb.s32 q0, q1, #1
882; CHECK-NEXT:    bx lr
883entry:
884  %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 1, i32 1, i32 0, i32 1, i32 0, i32 0)
885  ret <8 x i16> %0
886}
887
888define arm_aapcs_vfpcc <16 x i8> @test_vqrshruntq_n_s16(<16 x i8> %a, <8 x i16> %b) {
889; CHECK-LABEL: test_vqrshruntq_n_s16:
890; CHECK:       @ %bb.0: @ %entry
891; CHECK-NEXT:    vqshrunt.s16 q0, q1, #1
892; CHECK-NEXT:    bx lr
893entry:
894  %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 1, i32 1, i32 0, i32 1, i32 0, i32 1)
895  ret <16 x i8> %0
896}
897
898define arm_aapcs_vfpcc <8 x i16> @test_vqrshruntq_n_s32(<8 x i16> %a, <4 x i32> %b) {
899; CHECK-LABEL: test_vqrshruntq_n_s32:
900; CHECK:       @ %bb.0: @ %entry
901; CHECK-NEXT:    vqshrunt.s32 q0, q1, #3
902; CHECK-NEXT:    bx lr
903entry:
904  %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 3, i32 1, i32 0, i32 1, i32 0, i32 1)
905  ret <8 x i16> %0
906}
907
908define arm_aapcs_vfpcc <16 x i8> @test_vqrshrunbq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
909; CHECK-LABEL: test_vqrshrunbq_m_n_s16:
910; CHECK:       @ %bb.0: @ %entry
911; CHECK-NEXT:    vmsr p0, r0
912; CHECK-NEXT:    vpst
913; CHECK-NEXT:    vqshrunbt.s16 q0, q1, #4
914; CHECK-NEXT:    bx lr
915entry:
916  %0 = zext i16 %p to i32
917  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
918  %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 4, i32 1, i32 0, i32 1, i32 0, i32 0, <8 x i1> %1)
919  ret <16 x i8> %2
920}
921
922define arm_aapcs_vfpcc <8 x i16> @test_vqrshrunbq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
923; CHECK-LABEL: test_vqrshrunbq_m_n_s32:
924; CHECK:       @ %bb.0: @ %entry
925; CHECK-NEXT:    vmsr p0, r0
926; CHECK-NEXT:    vpst
927; CHECK-NEXT:    vqshrunbt.s32 q0, q1, #10
928; CHECK-NEXT:    bx lr
929entry:
930  %0 = zext i16 %p to i32
931  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
932  %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 10, i32 1, i32 0, i32 1, i32 0, i32 0, <4 x i1> %1)
933  ret <8 x i16> %2
934}
935
936define arm_aapcs_vfpcc <16 x i8> @test_vqrshruntq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
937; CHECK-LABEL: test_vqrshruntq_m_n_s16:
938; CHECK:       @ %bb.0: @ %entry
939; CHECK-NEXT:    vmsr p0, r0
940; CHECK-NEXT:    vpst
941; CHECK-NEXT:    vqshruntt.s16 q0, q1, #3
942; CHECK-NEXT:    bx lr
943entry:
944  %0 = zext i16 %p to i32
945  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
946  %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 3, i32 1, i32 0, i32 1, i32 0, i32 1, <8 x i1> %1)
947  ret <16 x i8> %2
948}
949
950define arm_aapcs_vfpcc <8 x i16> @test_vqrshruntq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
951; CHECK-LABEL: test_vqrshruntq_m_n_s32:
952; CHECK:       @ %bb.0: @ %entry
953; CHECK-NEXT:    vmsr p0, r0
954; CHECK-NEXT:    vpst
955; CHECK-NEXT:    vqshruntt.s32 q0, q1, #13
956; CHECK-NEXT:    bx lr
957entry:
958  %0 = zext i16 %p to i32
959  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
960  %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 13, i32 1, i32 0, i32 1, i32 0, i32 1, <4 x i1> %1)
961  ret <8 x i16> %2
962}
963
964define arm_aapcs_vfpcc <16 x i8> @test_vsliq_n_s8(<16 x i8> %a, <16 x i8> %b) {
965; CHECK-LABEL: test_vsliq_n_s8:
966; CHECK:       @ %bb.0: @ %entry
967; CHECK-NEXT:    vsli.8 q0, q1, #2
968; CHECK-NEXT:    bx lr
969entry:
970  %0 = call <16 x i8> @llvm.arm.mve.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 2)
971  ret <16 x i8> %0
972}
973
974define arm_aapcs_vfpcc <8 x i16> @test_vsliq_n_s16(<8 x i16> %a, <8 x i16> %b) {
975; CHECK-LABEL: test_vsliq_n_s16:
976; CHECK:       @ %bb.0: @ %entry
977; CHECK-NEXT:    vsli.16 q0, q1, #10
978; CHECK-NEXT:    bx lr
979entry:
980  %0 = call <8 x i16> @llvm.arm.mve.vsli.v8i16(<8 x i16> %a, <8 x i16> %b, i32 10)
981  ret <8 x i16> %0
982}
983
984define arm_aapcs_vfpcc <4 x i32> @test_vsliq_n_s32(<4 x i32> %a, <4 x i32> %b) {
985; CHECK-LABEL: test_vsliq_n_s32:
986; CHECK:       @ %bb.0: @ %entry
987; CHECK-NEXT:    vsli.32 q0, q1, #1
988; CHECK-NEXT:    bx lr
989entry:
990  %0 = call <4 x i32> @llvm.arm.mve.vsli.v4i32(<4 x i32> %a, <4 x i32> %b, i32 1)
991  ret <4 x i32> %0
992}
993
994define arm_aapcs_vfpcc <16 x i8> @test_vsliq_n_u8(<16 x i8> %a, <16 x i8> %b) {
995; CHECK-LABEL: test_vsliq_n_u8:
996; CHECK:       @ %bb.0: @ %entry
997; CHECK-NEXT:    vsli.8 q0, q1, #1
998; CHECK-NEXT:    bx lr
999entry:
1000  %0 = call <16 x i8> @llvm.arm.mve.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 1)
1001  ret <16 x i8> %0
1002}
1003
1004define arm_aapcs_vfpcc <8 x i16> @test_vsliq_n_u16(<8 x i16> %a, <8 x i16> %b) {
1005; CHECK-LABEL: test_vsliq_n_u16:
1006; CHECK:       @ %bb.0: @ %entry
1007; CHECK-NEXT:    vsli.16 q0, q1, #1
1008; CHECK-NEXT:    bx lr
1009entry:
1010  %0 = call <8 x i16> @llvm.arm.mve.vsli.v8i16(<8 x i16> %a, <8 x i16> %b, i32 1)
1011  ret <8 x i16> %0
1012}
1013
1014define arm_aapcs_vfpcc <4 x i32> @test_vsliq_n_u32(<4 x i32> %a, <4 x i32> %b) {
1015; CHECK-LABEL: test_vsliq_n_u32:
1016; CHECK:       @ %bb.0: @ %entry
1017; CHECK-NEXT:    vsli.32 q0, q1, #28
1018; CHECK-NEXT:    bx lr
1019entry:
1020  %0 = call <4 x i32> @llvm.arm.mve.vsli.v4i32(<4 x i32> %a, <4 x i32> %b, i32 28)
1021  ret <4 x i32> %0
1022}
1023
1024define arm_aapcs_vfpcc <16 x i8> @test_vsliq_m_n_s8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
1025; CHECK-LABEL: test_vsliq_m_n_s8:
1026; CHECK:       @ %bb.0: @ %entry
1027; CHECK-NEXT:    vmsr p0, r0
1028; CHECK-NEXT:    vpst
1029; CHECK-NEXT:    vslit.8 q0, q1, #4
1030; CHECK-NEXT:    bx lr
1031entry:
1032  %0 = zext i16 %p to i32
1033  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
1034  %2 = call <16 x i8> @llvm.arm.mve.vsli.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 4, <16 x i1> %1)
1035  ret <16 x i8> %2
1036}
1037
1038define arm_aapcs_vfpcc <8 x i16> @test_vsliq_m_n_s16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
1039; CHECK-LABEL: test_vsliq_m_n_s16:
1040; CHECK:       @ %bb.0: @ %entry
1041; CHECK-NEXT:    vmsr p0, r0
1042; CHECK-NEXT:    vpst
1043; CHECK-NEXT:    vslit.16 q0, q1, #1
1044; CHECK-NEXT:    bx lr
1045entry:
1046  %0 = zext i16 %p to i32
1047  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1048  %2 = call <8 x i16> @llvm.arm.mve.vsli.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 1, <8 x i1> %1)
1049  ret <8 x i16> %2
1050}
1051
1052define arm_aapcs_vfpcc <4 x i32> @test_vsliq_m_n_s32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
1053; CHECK-LABEL: test_vsliq_m_n_s32:
1054; CHECK:       @ %bb.0: @ %entry
1055; CHECK-NEXT:    vmsr p0, r0
1056; CHECK-NEXT:    vpst
1057; CHECK-NEXT:    vslit.32 q0, q1, #1
1058; CHECK-NEXT:    bx lr
1059entry:
1060  %0 = zext i16 %p to i32
1061  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1062  %2 = call <4 x i32> @llvm.arm.mve.vsli.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 1, <4 x i1> %1)
1063  ret <4 x i32> %2
1064}
1065
1066define arm_aapcs_vfpcc <16 x i8> @test_vsliq_m_n_u8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
1067; CHECK-LABEL: test_vsliq_m_n_u8:
1068; CHECK:       @ %bb.0: @ %entry
1069; CHECK-NEXT:    vmsr p0, r0
1070; CHECK-NEXT:    vpst
1071; CHECK-NEXT:    vslit.8 q0, q1, #5
1072; CHECK-NEXT:    bx lr
1073entry:
1074  %0 = zext i16 %p to i32
1075  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
1076  %2 = call <16 x i8> @llvm.arm.mve.vsli.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 5, <16 x i1> %1)
1077  ret <16 x i8> %2
1078}
1079
1080define arm_aapcs_vfpcc <8 x i16> @test_vsliq_m_n_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
1081; CHECK-LABEL: test_vsliq_m_n_u16:
1082; CHECK:       @ %bb.0: @ %entry
1083; CHECK-NEXT:    vmsr p0, r0
1084; CHECK-NEXT:    vpst
1085; CHECK-NEXT:    vslit.16 q0, q1, #3
1086; CHECK-NEXT:    bx lr
1087entry:
1088  %0 = zext i16 %p to i32
1089  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1090  %2 = call <8 x i16> @llvm.arm.mve.vsli.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 3, <8 x i1> %1)
1091  ret <8 x i16> %2
1092}
1093
1094define arm_aapcs_vfpcc <4 x i32> @test_vsliq_m_n_u32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
1095; CHECK-LABEL: test_vsliq_m_n_u32:
1096; CHECK:       @ %bb.0: @ %entry
1097; CHECK-NEXT:    vmsr p0, r0
1098; CHECK-NEXT:    vpst
1099; CHECK-NEXT:    vslit.32 q0, q1, #9
1100; CHECK-NEXT:    bx lr
1101entry:
1102  %0 = zext i16 %p to i32
1103  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1104  %2 = call <4 x i32> @llvm.arm.mve.vsli.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 9, <4 x i1> %1)
1105  ret <4 x i32> %2
1106}
1107
1108define arm_aapcs_vfpcc <16 x i8> @test_vsriq_n_s8(<16 x i8> %a, <16 x i8> %b) {
1109; CHECK-LABEL: test_vsriq_n_s8:
1110; CHECK:       @ %bb.0: @ %entry
1111; CHECK-NEXT:    vsri.8 q0, q1, #3
1112; CHECK-NEXT:    bx lr
1113entry:
1114  %0 = call <16 x i8> @llvm.arm.mve.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
1115  ret <16 x i8> %0
1116}
1117
1118define arm_aapcs_vfpcc <8 x i16> @test_vsriq_n_s16(<8 x i16> %a, <8 x i16> %b) {
1119; CHECK-LABEL: test_vsriq_n_s16:
1120; CHECK:       @ %bb.0: @ %entry
1121; CHECK-NEXT:    vsri.16 q0, q1, #2
1122; CHECK-NEXT:    bx lr
1123entry:
1124  %0 = call <8 x i16> @llvm.arm.mve.vsri.v8i16(<8 x i16> %a, <8 x i16> %b, i32 2)
1125  ret <8 x i16> %0
1126}
1127
1128define arm_aapcs_vfpcc <4 x i32> @test_vsriq_n_s32(<4 x i32> %a, <4 x i32> %b) {
1129; CHECK-LABEL: test_vsriq_n_s32:
1130; CHECK:       @ %bb.0: @ %entry
1131; CHECK-NEXT:    vsri.32 q0, q1, #28
1132; CHECK-NEXT:    bx lr
1133entry:
1134  %0 = call <4 x i32> @llvm.arm.mve.vsri.v4i32(<4 x i32> %a, <4 x i32> %b, i32 28)
1135  ret <4 x i32> %0
1136}
1137
1138define arm_aapcs_vfpcc <16 x i8> @test_vsriq_n_u8(<16 x i8> %a, <16 x i8> %b) {
1139; CHECK-LABEL: test_vsriq_n_u8:
1140; CHECK:       @ %bb.0: @ %entry
1141; CHECK-NEXT:    vsri.8 q0, q1, #3
1142; CHECK-NEXT:    bx lr
1143entry:
1144  %0 = call <16 x i8> @llvm.arm.mve.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
1145  ret <16 x i8> %0
1146}
1147
1148define arm_aapcs_vfpcc <8 x i16> @test_vsriq_n_u16(<8 x i16> %a, <8 x i16> %b) {
1149; CHECK-LABEL: test_vsriq_n_u16:
1150; CHECK:       @ %bb.0: @ %entry
1151; CHECK-NEXT:    vsri.16 q0, q1, #3
1152; CHECK-NEXT:    bx lr
1153entry:
1154  %0 = call <8 x i16> @llvm.arm.mve.vsri.v8i16(<8 x i16> %a, <8 x i16> %b, i32 3)
1155  ret <8 x i16> %0
1156}
1157
1158define arm_aapcs_vfpcc <4 x i32> @test_vsriq_n_u32(<4 x i32> %a, <4 x i32> %b) {
1159; CHECK-LABEL: test_vsriq_n_u32:
1160; CHECK:       @ %bb.0: @ %entry
1161; CHECK-NEXT:    vsri.32 q0, q1, #26
1162; CHECK-NEXT:    bx lr
1163entry:
1164  %0 = call <4 x i32> @llvm.arm.mve.vsri.v4i32(<4 x i32> %a, <4 x i32> %b, i32 26)
1165  ret <4 x i32> %0
1166}
1167
1168define arm_aapcs_vfpcc <16 x i8> @test_vsriq_m_n_s8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
1169; CHECK-LABEL: test_vsriq_m_n_s8:
1170; CHECK:       @ %bb.0: @ %entry
1171; CHECK-NEXT:    vmsr p0, r0
1172; CHECK-NEXT:    vpst
1173; CHECK-NEXT:    vsrit.8 q0, q1, #4
1174; CHECK-NEXT:    bx lr
1175entry:
1176  %0 = zext i16 %p to i32
1177  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
1178  %2 = call <16 x i8> @llvm.arm.mve.vsri.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 4, <16 x i1> %1)
1179  ret <16 x i8> %2
1180}
1181
1182define arm_aapcs_vfpcc <8 x i16> @test_vsriq_m_n_s16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
1183; CHECK-LABEL: test_vsriq_m_n_s16:
1184; CHECK:       @ %bb.0: @ %entry
1185; CHECK-NEXT:    vmsr p0, r0
1186; CHECK-NEXT:    vpst
1187; CHECK-NEXT:    vsrit.16 q0, q1, #1
1188; CHECK-NEXT:    bx lr
1189entry:
1190  %0 = zext i16 %p to i32
1191  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1192  %2 = call <8 x i16> @llvm.arm.mve.vsri.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 1, <8 x i1> %1)
1193  ret <8 x i16> %2
1194}
1195
1196define arm_aapcs_vfpcc <4 x i32> @test_vsriq_m_n_s32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
1197; CHECK-LABEL: test_vsriq_m_n_s32:
1198; CHECK:       @ %bb.0: @ %entry
1199; CHECK-NEXT:    vmsr p0, r0
1200; CHECK-NEXT:    vpst
1201; CHECK-NEXT:    vsrit.32 q0, q1, #27
1202; CHECK-NEXT:    bx lr
1203entry:
1204  %0 = zext i16 %p to i32
1205  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1206  %2 = call <4 x i32> @llvm.arm.mve.vsri.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 27, <4 x i1> %1)
1207  ret <4 x i32> %2
1208}
1209
1210define arm_aapcs_vfpcc <16 x i8> @test_vsriq_m_n_u8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
1211; CHECK-LABEL: test_vsriq_m_n_u8:
1212; CHECK:       @ %bb.0: @ %entry
1213; CHECK-NEXT:    vmsr p0, r0
1214; CHECK-NEXT:    vpst
1215; CHECK-NEXT:    vsrit.8 q0, q1, #7
1216; CHECK-NEXT:    bx lr
1217entry:
1218  %0 = zext i16 %p to i32
1219  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
1220  %2 = call <16 x i8> @llvm.arm.mve.vsri.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 7, <16 x i1> %1)
1221  ret <16 x i8> %2
1222}
1223
1224define arm_aapcs_vfpcc <8 x i16> @test_vsriq_m_n_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
1225; CHECK-LABEL: test_vsriq_m_n_u16:
1226; CHECK:       @ %bb.0: @ %entry
1227; CHECK-NEXT:    vmsr p0, r0
1228; CHECK-NEXT:    vpst
1229; CHECK-NEXT:    vsrit.16 q0, q1, #9
1230; CHECK-NEXT:    bx lr
1231entry:
1232  %0 = zext i16 %p to i32
1233  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1234  %2 = call <8 x i16> @llvm.arm.mve.vsri.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 9, <8 x i1> %1)
1235  ret <8 x i16> %2
1236}
1237
1238define arm_aapcs_vfpcc <4 x i32> @test_vsriq_m_n_u32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
1239; CHECK-LABEL: test_vsriq_m_n_u32:
1240; CHECK:       @ %bb.0: @ %entry
1241; CHECK-NEXT:    vmsr p0, r0
1242; CHECK-NEXT:    vpst
1243; CHECK-NEXT:    vsrit.32 q0, q1, #13
1244; CHECK-NEXT:    bx lr
1245entry:
1246  %0 = zext i16 %p to i32
1247  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1248  %2 = call <4 x i32> @llvm.arm.mve.vsri.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 13, <4 x i1> %1)
1249  ret <4 x i32> %2
1250}
1251
1252declare <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8>, <8 x i16>, i32, i32, i32, i32, i32, i32)
1253declare <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16>, <4 x i32>, i32, i32, i32, i32, i32, i32)
1254declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
1255declare <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8>, <8 x i16>, i32, i32, i32, i32, i32, i32, <8 x i1>)
1256declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
1257declare <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16>, <4 x i32>, i32, i32, i32, i32, i32, i32, <4 x i1>)
1258declare <16 x i8> @llvm.arm.mve.vsli.v16i8(<16 x i8>, <16 x i8>, i32)
1259declare <8 x i16> @llvm.arm.mve.vsli.v8i16(<8 x i16>, <8 x i16>, i32)
1260declare <4 x i32> @llvm.arm.mve.vsli.v4i32(<4 x i32>, <4 x i32>, i32)
1261declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
1262declare <16 x i8> @llvm.arm.mve.vsli.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, i32, <16 x i1>)
1263declare <8 x i16> @llvm.arm.mve.vsli.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, i32, <8 x i1>)
1264declare <4 x i32> @llvm.arm.mve.vsli.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>)
1265declare <16 x i8> @llvm.arm.mve.vsri.v16i8(<16 x i8>, <16 x i8>, i32)
1266declare <8 x i16> @llvm.arm.mve.vsri.v8i16(<8 x i16>, <8 x i16>, i32)
1267declare <4 x i32> @llvm.arm.mve.vsri.v4i32(<4 x i32>, <4 x i32>, i32)
1268declare <16 x i8> @llvm.arm.mve.vsri.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, i32, <16 x i1>)
1269declare <8 x i16> @llvm.arm.mve.vsri.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, i32, <8 x i1>)
1270declare <4 x i32> @llvm.arm.mve.vsri.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>)
1271