xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll (revision 461fd94f004c78374fc7b0338d018f9610f2e5f5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -verify-machineinstrs -o - %s | FileCheck %s
3
4define arm_aapcs_vfpcc <16 x i8> @test_vshlq_n_s8(<16 x i8> %a) {
5; CHECK-LABEL: test_vshlq_n_s8:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vshl.i8 q0, q0, #5
8; CHECK-NEXT:    bx lr
9entry:
10  %0 = shl <16 x i8> %a, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
11  ret <16 x i8> %0
12}
13
14define arm_aapcs_vfpcc <8 x i16> @test_vshlq_n_s16(<8 x i16> %a) {
15; CHECK-LABEL: test_vshlq_n_s16:
16; CHECK:       @ %bb.0: @ %entry
17; CHECK-NEXT:    vshl.i16 q0, q0, #5
18; CHECK-NEXT:    bx lr
19entry:
20  %0 = shl <8 x i16> %a, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
21  ret <8 x i16> %0
22}
23
24define arm_aapcs_vfpcc <4 x i32> @test_vshlq_n_s32(<4 x i32> %a) {
25; CHECK-LABEL: test_vshlq_n_s32:
26; CHECK:       @ %bb.0: @ %entry
27; CHECK-NEXT:    vshl.i32 q0, q0, #18
28; CHECK-NEXT:    bx lr
29entry:
30  %0 = shl <4 x i32> %a, <i32 18, i32 18, i32 18, i32 18>
31  ret <4 x i32> %0
32}
33
34define arm_aapcs_vfpcc <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) {
35; CHECK-LABEL: test_vshrq_n_s8:
36; CHECK:       @ %bb.0: @ %entry
37; CHECK-NEXT:    vshr.s8 q0, q0, #4
38; CHECK-NEXT:    bx lr
39entry:
40  %0 = ashr <16 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
41  ret <16 x i8> %0
42}
43
44define arm_aapcs_vfpcc <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) {
45; CHECK-LABEL: test_vshrq_n_s16:
46; CHECK:       @ %bb.0: @ %entry
47; CHECK-NEXT:    vshr.s16 q0, q0, #10
48; CHECK-NEXT:    bx lr
49entry:
50  %0 = ashr <8 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
51  ret <8 x i16> %0
52}
53
54define arm_aapcs_vfpcc <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) {
55; CHECK-LABEL: test_vshrq_n_s32:
56; CHECK:       @ %bb.0: @ %entry
57; CHECK-NEXT:    vshr.s32 q0, q0, #19
58; CHECK-NEXT:    bx lr
59entry:
60  %0 = ashr <4 x i32> %a, <i32 19, i32 19, i32 19, i32 19>
61  ret <4 x i32> %0
62}
63
64define arm_aapcs_vfpcc <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) {
65; CHECK-LABEL: test_vshrq_n_u8:
66; CHECK:       @ %bb.0: @ %entry
67; CHECK-NEXT:    vshr.u8 q0, q0, #1
68; CHECK-NEXT:    bx lr
69entry:
70  %0 = lshr <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
71  ret <16 x i8> %0
72}
73
74define arm_aapcs_vfpcc <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) {
75; CHECK-LABEL: test_vshrq_n_u16:
76; CHECK:       @ %bb.0: @ %entry
77; CHECK-NEXT:    vshr.u16 q0, q0, #10
78; CHECK-NEXT:    bx lr
79entry:
80  %0 = lshr <8 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
81  ret <8 x i16> %0
82}
83
84define arm_aapcs_vfpcc <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) {
85; CHECK-LABEL: test_vshrq_n_u32:
86; CHECK:       @ %bb.0: @ %entry
87; CHECK-NEXT:    vshr.u32 q0, q0, #10
88; CHECK-NEXT:    bx lr
89entry:
90  %0 = lshr <4 x i32> %a, <i32 10, i32 10, i32 10, i32 10>
91  ret <4 x i32> %0
92}
93
94define arm_aapcs_vfpcc <16 x i8> @test_vshlq_m_n_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
95; CHECK-LABEL: test_vshlq_m_n_s8:
96; CHECK:       @ %bb.0: @ %entry
97; CHECK-NEXT:    vmsr p0, r0
98; CHECK-NEXT:    vpst
99; CHECK-NEXT:    vshlt.i8 q0, q1, #6
100; CHECK-NEXT:    bx lr
101entry:
102  %0 = zext i16 %p to i32
103  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
104  %2 = tail call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 6, <16 x i1> %1, <16 x i8> %inactive)
105  ret <16 x i8> %2
106}
107
108define arm_aapcs_vfpcc <8 x i16> @test_vshlq_m_n_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
109; CHECK-LABEL: test_vshlq_m_n_s16:
110; CHECK:       @ %bb.0: @ %entry
111; CHECK-NEXT:    vmsr p0, r0
112; CHECK-NEXT:    vpst
113; CHECK-NEXT:    vshlt.i16 q0, q1, #13
114; CHECK-NEXT:    bx lr
115entry:
116  %0 = zext i16 %p to i32
117  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
118  %2 = tail call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 13, <8 x i1> %1, <8 x i16> %inactive)
119  ret <8 x i16> %2
120}
121
122define arm_aapcs_vfpcc <4 x i32> @test_vshlq_m_n_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
123; CHECK-LABEL: test_vshlq_m_n_s32:
124; CHECK:       @ %bb.0: @ %entry
125; CHECK-NEXT:    vmsr p0, r0
126; CHECK-NEXT:    vpst
127; CHECK-NEXT:    vshlt.i32 q0, q1, #0
128; CHECK-NEXT:    bx lr
129entry:
130  %0 = zext i16 %p to i32
131  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
132  %2 = tail call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 0, <4 x i1> %1, <4 x i32> %inactive)
133  ret <4 x i32> %2
134}
135
136define arm_aapcs_vfpcc <16 x i8> @test_vshrq_m_n_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
137; CHECK-LABEL: test_vshrq_m_n_s8:
138; CHECK:       @ %bb.0: @ %entry
139; CHECK-NEXT:    vmsr p0, r0
140; CHECK-NEXT:    vpst
141; CHECK-NEXT:    vshrt.s8 q0, q1, #2
142; CHECK-NEXT:    bx lr
143entry:
144  %0 = zext i16 %p to i32
145  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
146  %2 = tail call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 2, i32 0, <16 x i1> %1, <16 x i8> %inactive)
147  ret <16 x i8> %2
148}
149
150define arm_aapcs_vfpcc <8 x i16> @test_vshrq_m_n_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
151; CHECK-LABEL: test_vshrq_m_n_s16:
152; CHECK:       @ %bb.0: @ %entry
153; CHECK-NEXT:    vmsr p0, r0
154; CHECK-NEXT:    vpst
155; CHECK-NEXT:    vshrt.s16 q0, q1, #3
156; CHECK-NEXT:    bx lr
157entry:
158  %0 = zext i16 %p to i32
159  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
160  %2 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 3, i32 0, <8 x i1> %1, <8 x i16> %inactive)
161  ret <8 x i16> %2
162}
163
164define arm_aapcs_vfpcc <4 x i32> @test_vshrq_m_n_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
165; CHECK-LABEL: test_vshrq_m_n_s32:
166; CHECK:       @ %bb.0: @ %entry
167; CHECK-NEXT:    vmsr p0, r0
168; CHECK-NEXT:    vpst
169; CHECK-NEXT:    vshrt.s32 q0, q1, #13
170; CHECK-NEXT:    bx lr
171entry:
172  %0 = zext i16 %p to i32
173  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
174  %2 = tail call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 13, i32 0, <4 x i1> %1, <4 x i32> %inactive)
175  ret <4 x i32> %2
176}
177
178define arm_aapcs_vfpcc <16 x i8> @test_vshrq_m_n_u8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
179; CHECK-LABEL: test_vshrq_m_n_u8:
180; CHECK:       @ %bb.0: @ %entry
181; CHECK-NEXT:    vmsr p0, r0
182; CHECK-NEXT:    vpst
183; CHECK-NEXT:    vshrt.u8 q0, q1, #4
184; CHECK-NEXT:    bx lr
185entry:
186  %0 = zext i16 %p to i32
187  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
188  %2 = tail call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 4, i32 1, <16 x i1> %1, <16 x i8> %inactive)
189  ret <16 x i8> %2
190}
191
192define arm_aapcs_vfpcc <8 x i16> @test_vshrq_m_n_u16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
193; CHECK-LABEL: test_vshrq_m_n_u16:
194; CHECK:       @ %bb.0: @ %entry
195; CHECK-NEXT:    vmsr p0, r0
196; CHECK-NEXT:    vpst
197; CHECK-NEXT:    vshrt.u16 q0, q1, #14
198; CHECK-NEXT:    bx lr
199entry:
200  %0 = zext i16 %p to i32
201  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
202  %2 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 14, i32 1, <8 x i1> %1, <8 x i16> %inactive)
203  ret <8 x i16> %2
204}
205
206define arm_aapcs_vfpcc <4 x i32> @test_vshrq_m_n_u32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
207; CHECK-LABEL: test_vshrq_m_n_u32:
208; CHECK:       @ %bb.0: @ %entry
209; CHECK-NEXT:    vmsr p0, r0
210; CHECK-NEXT:    vpst
211; CHECK-NEXT:    vshrt.u32 q0, q1, #21
212; CHECK-NEXT:    bx lr
213entry:
214  %0 = zext i16 %p to i32
215  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
216  %2 = tail call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 21, i32 1, <4 x i1> %1, <4 x i32> %inactive)
217  ret <4 x i32> %2
218}
219
220define arm_aapcs_vfpcc <16 x i8> @test_vshlq_x_n_s8(<16 x i8> %a, i16 zeroext %p) {
221; CHECK-LABEL: test_vshlq_x_n_s8:
222; CHECK:       @ %bb.0: @ %entry
223; CHECK-NEXT:    vmsr p0, r0
224; CHECK-NEXT:    vpst
225; CHECK-NEXT:    vshlt.i8 q0, q0, #1
226; CHECK-NEXT:    bx lr
227entry:
228  %0 = zext i16 %p to i32
229  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
230  %2 = tail call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 1, <16 x i1> %1, <16 x i8> undef)
231  ret <16 x i8> %2
232}
233
234define arm_aapcs_vfpcc <8 x i16> @test_vshlq_x_n_s16(<8 x i16> %a, i16 zeroext %p) {
235; CHECK-LABEL: test_vshlq_x_n_s16:
236; CHECK:       @ %bb.0: @ %entry
237; CHECK-NEXT:    vmsr p0, r0
238; CHECK-NEXT:    vpst
239; CHECK-NEXT:    vshlt.i16 q0, q0, #15
240; CHECK-NEXT:    bx lr
241entry:
242  %0 = zext i16 %p to i32
243  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
244  %2 = tail call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 15, <8 x i1> %1, <8 x i16> undef)
245  ret <8 x i16> %2
246}
247
248define arm_aapcs_vfpcc <4 x i32> @test_vshlq_x_n_s32(<4 x i32> %a, i16 zeroext %p) {
249; CHECK-LABEL: test_vshlq_x_n_s32:
250; CHECK:       @ %bb.0: @ %entry
251; CHECK-NEXT:    vmsr p0, r0
252; CHECK-NEXT:    vpst
253; CHECK-NEXT:    vshlt.i32 q0, q0, #13
254; CHECK-NEXT:    bx lr
255entry:
256  %0 = zext i16 %p to i32
257  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
258  %2 = tail call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 13, <4 x i1> %1, <4 x i32> undef)
259  ret <4 x i32> %2
260}
261
262define arm_aapcs_vfpcc <16 x i8> @test_vshlq_x_n_u8(<16 x i8> %a, i16 zeroext %p) {
263; CHECK-LABEL: test_vshlq_x_n_u8:
264; CHECK:       @ %bb.0: @ %entry
265; CHECK-NEXT:    vmsr p0, r0
266; CHECK-NEXT:    vpst
267; CHECK-NEXT:    vshlt.i8 q0, q0, #4
268; CHECK-NEXT:    bx lr
269entry:
270  %0 = zext i16 %p to i32
271  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
272  %2 = tail call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 4, <16 x i1> %1, <16 x i8> undef)
273  ret <16 x i8> %2
274}
275
276define arm_aapcs_vfpcc <8 x i16> @test_vshlq_x_n_u16(<8 x i16> %a, i16 zeroext %p) {
277; CHECK-LABEL: test_vshlq_x_n_u16:
278; CHECK:       @ %bb.0: @ %entry
279; CHECK-NEXT:    vmsr p0, r0
280; CHECK-NEXT:    vpst
281; CHECK-NEXT:    vshlt.i16 q0, q0, #10
282; CHECK-NEXT:    bx lr
283entry:
284  %0 = zext i16 %p to i32
285  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
286  %2 = tail call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 10, <8 x i1> %1, <8 x i16> undef)
287  ret <8 x i16> %2
288}
289
290define arm_aapcs_vfpcc <4 x i32> @test_vshlq_x_n_u32(<4 x i32> %a, i16 zeroext %p) {
291; CHECK-LABEL: test_vshlq_x_n_u32:
292; CHECK:       @ %bb.0: @ %entry
293; CHECK-NEXT:    vmsr p0, r0
294; CHECK-NEXT:    vpst
295; CHECK-NEXT:    vshlt.i32 q0, q0, #30
296; CHECK-NEXT:    bx lr
297entry:
298  %0 = zext i16 %p to i32
299  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
300  %2 = tail call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 30, <4 x i1> %1, <4 x i32> undef)
301  ret <4 x i32> %2
302}
303
304define arm_aapcs_vfpcc <16 x i8> @test_vshrq_x_n_s8(<16 x i8> %a, i16 zeroext %p) {
305; CHECK-LABEL: test_vshrq_x_n_s8:
306; CHECK:       @ %bb.0: @ %entry
307; CHECK-NEXT:    vmsr p0, r0
308; CHECK-NEXT:    vpst
309; CHECK-NEXT:    vshrt.s8 q0, q0, #4
310; CHECK-NEXT:    bx lr
311entry:
312  %0 = zext i16 %p to i32
313  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
314  %2 = tail call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 4, i32 0, <16 x i1> %1, <16 x i8> undef)
315  ret <16 x i8> %2
316}
317
318define arm_aapcs_vfpcc <8 x i16> @test_vshrq_x_n_s16(<8 x i16> %a, i16 zeroext %p) {
319; CHECK-LABEL: test_vshrq_x_n_s16:
320; CHECK:       @ %bb.0: @ %entry
321; CHECK-NEXT:    vmsr p0, r0
322; CHECK-NEXT:    vpst
323; CHECK-NEXT:    vshrt.s16 q0, q0, #10
324; CHECK-NEXT:    bx lr
325entry:
326  %0 = zext i16 %p to i32
327  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
328  %2 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 10, i32 0, <8 x i1> %1, <8 x i16> undef)
329  ret <8 x i16> %2
330}
331
332define arm_aapcs_vfpcc <4 x i32> @test_vshrq_x_n_s32(<4 x i32> %a, i16 zeroext %p) {
333; CHECK-LABEL: test_vshrq_x_n_s32:
334; CHECK:       @ %bb.0: @ %entry
335; CHECK-NEXT:    vmsr p0, r0
336; CHECK-NEXT:    vpst
337; CHECK-NEXT:    vshrt.s32 q0, q0, #7
338; CHECK-NEXT:    bx lr
339entry:
340  %0 = zext i16 %p to i32
341  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
342  %2 = tail call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 7, i32 0, <4 x i1> %1, <4 x i32> undef)
343  ret <4 x i32> %2
344}
345
346define arm_aapcs_vfpcc <16 x i8> @test_vshrq_x_n_u8(<16 x i8> %a, i16 zeroext %p) {
347; CHECK-LABEL: test_vshrq_x_n_u8:
348; CHECK:       @ %bb.0: @ %entry
349; CHECK-NEXT:    vmsr p0, r0
350; CHECK-NEXT:    vpst
351; CHECK-NEXT:    vshrt.u8 q0, q0, #7
352; CHECK-NEXT:    bx lr
353entry:
354  %0 = zext i16 %p to i32
355  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
356  %2 = tail call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 7, i32 1, <16 x i1> %1, <16 x i8> undef)
357  ret <16 x i8> %2
358}
359
360define arm_aapcs_vfpcc <8 x i16> @test_vshrq_x_n_u16(<8 x i16> %a, i16 zeroext %p) {
361; CHECK-LABEL: test_vshrq_x_n_u16:
362; CHECK:       @ %bb.0: @ %entry
363; CHECK-NEXT:    vmsr p0, r0
364; CHECK-NEXT:    vpst
365; CHECK-NEXT:    vshrt.u16 q0, q0, #7
366; CHECK-NEXT:    bx lr
367entry:
368  %0 = zext i16 %p to i32
369  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
370  %2 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 7, i32 1, <8 x i1> %1, <8 x i16> undef)
371  ret <8 x i16> %2
372}
373
374define arm_aapcs_vfpcc <4 x i32> @test_vshrq_x_n_u32(<4 x i32> %a, i16 zeroext %p) {
375; CHECK-LABEL: test_vshrq_x_n_u32:
376; CHECK:       @ %bb.0: @ %entry
377; CHECK-NEXT:    vmsr p0, r0
378; CHECK-NEXT:    vpst
379; CHECK-NEXT:    vshrt.u32 q0, q0, #6
380; CHECK-NEXT:    bx lr
381entry:
382  %0 = zext i16 %p to i32
383  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
384  %2 = tail call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 6, i32 1, <4 x i1> %1, <4 x i32> undef)
385  ret <4 x i32> %2
386}
387
388define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_n_s8(<16 x i8> %a) {
389; CHECK-LABEL: test_vqshlq_n_s8:
390; CHECK:       @ %bb.0: @ %entry
391; CHECK-NEXT:    vqshl.s8 q0, q0, #3
392; CHECK-NEXT:    bx lr
393entry:
394  %0 = tail call <16 x i8> @llvm.arm.mve.vqshl.imm.v16i8(<16 x i8> %a, i32 3, i32 0)
395  ret <16 x i8> %0
396}
397
398define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_n_s16(<8 x i16> %a) {
399; CHECK-LABEL: test_vqshlq_n_s16:
400; CHECK:       @ %bb.0: @ %entry
401; CHECK-NEXT:    vqshl.s16 q0, q0, #4
402; CHECK-NEXT:    bx lr
403entry:
404  %0 = tail call <8 x i16> @llvm.arm.mve.vqshl.imm.v8i16(<8 x i16> %a, i32 4, i32 0)
405  ret <8 x i16> %0
406}
407
408define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_n_s32(<4 x i32> %a) {
409; CHECK-LABEL: test_vqshlq_n_s32:
410; CHECK:       @ %bb.0: @ %entry
411; CHECK-NEXT:    vqshl.s32 q0, q0, #4
412; CHECK-NEXT:    bx lr
413entry:
414  %0 = tail call <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32> %a, i32 4, i32 0)
415  ret <4 x i32> %0
416}
417
418define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_n_u8(<16 x i8> %a) {
419; CHECK-LABEL: test_vqshlq_n_u8:
420; CHECK:       @ %bb.0: @ %entry
421; CHECK-NEXT:    vqshl.u8 q0, q0, #0
422; CHECK-NEXT:    bx lr
423entry:
424  %0 = tail call <16 x i8> @llvm.arm.mve.vqshl.imm.v16i8(<16 x i8> %a, i32 0, i32 1)
425  ret <16 x i8> %0
426}
427
428define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_n_u16(<8 x i16> %a) {
429; CHECK-LABEL: test_vqshlq_n_u16:
430; CHECK:       @ %bb.0: @ %entry
431; CHECK-NEXT:    vqshl.u16 q0, q0, #13
432; CHECK-NEXT:    bx lr
433entry:
434  %0 = tail call <8 x i16> @llvm.arm.mve.vqshl.imm.v8i16(<8 x i16> %a, i32 13, i32 1)
435  ret <8 x i16> %0
436}
437
438define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_n_u32(<4 x i32> %a) {
439; CHECK-LABEL: test_vqshlq_n_u32:
440; CHECK:       @ %bb.0: @ %entry
441; CHECK-NEXT:    vqshl.u32 q0, q0, #6
442; CHECK-NEXT:    bx lr
443entry:
444  %0 = tail call <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32> %a, i32 6, i32 1)
445  ret <4 x i32> %0
446}
447
448define arm_aapcs_vfpcc <16 x i8> @test_vqshluq_n_s8(<16 x i8> %a) {
449; CHECK-LABEL: test_vqshluq_n_s8:
450; CHECK:       @ %bb.0: @ %entry
451; CHECK-NEXT:    vqshlu.s8 q0, q0, #5
452; CHECK-NEXT:    bx lr
453entry:
454  %0 = tail call <16 x i8> @llvm.arm.mve.vqshlu.imm.v16i8(<16 x i8> %a, i32 5)
455  ret <16 x i8> %0
456}
457
458define arm_aapcs_vfpcc <8 x i16> @test_vqshluq_n_s16(<8 x i16> %a) {
459; CHECK-LABEL: test_vqshluq_n_s16:
460; CHECK:       @ %bb.0: @ %entry
461; CHECK-NEXT:    vqshlu.s16 q0, q0, #5
462; CHECK-NEXT:    bx lr
463entry:
464  %0 = tail call <8 x i16> @llvm.arm.mve.vqshlu.imm.v8i16(<8 x i16> %a, i32 5)
465  ret <8 x i16> %0
466}
467
468define arm_aapcs_vfpcc <4 x i32> @test_vqshluq_n_s32(<4 x i32> %a) {
469; CHECK-LABEL: test_vqshluq_n_s32:
470; CHECK:       @ %bb.0: @ %entry
471; CHECK-NEXT:    vqshlu.s32 q0, q0, #4
472; CHECK-NEXT:    bx lr
473entry:
474  %0 = tail call <4 x i32> @llvm.arm.mve.vqshlu.imm.v4i32(<4 x i32> %a, i32 4)
475  ret <4 x i32> %0
476}
477
478define arm_aapcs_vfpcc <16 x i8> @test_vrshrq_n_s8(<16 x i8> %a) {
479; CHECK-LABEL: test_vrshrq_n_s8:
480; CHECK:       @ %bb.0: @ %entry
481; CHECK-NEXT:    vrshr.s8 q0, q0, #4
482; CHECK-NEXT:    bx lr
483entry:
484  %0 = tail call <16 x i8> @llvm.arm.mve.vrshr.imm.v16i8(<16 x i8> %a, i32 4, i32 0)
485  ret <16 x i8> %0
486}
487
488define arm_aapcs_vfpcc <8 x i16> @test_vrshrq_n_s16(<8 x i16> %a) {
489; CHECK-LABEL: test_vrshrq_n_s16:
490; CHECK:       @ %bb.0: @ %entry
491; CHECK-NEXT:    vrshr.s16 q0, q0, #12
492; CHECK-NEXT:    bx lr
493entry:
494  %0 = tail call <8 x i16> @llvm.arm.mve.vrshr.imm.v8i16(<8 x i16> %a, i32 12, i32 0)
495  ret <8 x i16> %0
496}
497
498define arm_aapcs_vfpcc <4 x i32> @test_vrshrq_n_s32(<4 x i32> %a) {
499; CHECK-LABEL: test_vrshrq_n_s32:
500; CHECK:       @ %bb.0: @ %entry
501; CHECK-NEXT:    vrshr.s32 q0, q0, #30
502; CHECK-NEXT:    bx lr
503entry:
504  %0 = tail call <4 x i32> @llvm.arm.mve.vrshr.imm.v4i32(<4 x i32> %a, i32 30, i32 0)
505  ret <4 x i32> %0
506}
507
508define arm_aapcs_vfpcc <16 x i8> @test_vrshrq_n_u8(<16 x i8> %a) {
509; CHECK-LABEL: test_vrshrq_n_u8:
510; CHECK:       @ %bb.0: @ %entry
511; CHECK-NEXT:    vrshr.u8 q0, q0, #1
512; CHECK-NEXT:    bx lr
513entry:
514  %0 = tail call <16 x i8> @llvm.arm.mve.vrshr.imm.v16i8(<16 x i8> %a, i32 1, i32 1)
515  ret <16 x i8> %0
516}
517
518define arm_aapcs_vfpcc <8 x i16> @test_vrshrq_n_u16(<8 x i16> %a) {
519; CHECK-LABEL: test_vrshrq_n_u16:
520; CHECK:       @ %bb.0: @ %entry
521; CHECK-NEXT:    vrshr.u16 q0, q0, #15
522; CHECK-NEXT:    bx lr
523entry:
524  %0 = tail call <8 x i16> @llvm.arm.mve.vrshr.imm.v8i16(<8 x i16> %a, i32 15, i32 1)
525  ret <8 x i16> %0
526}
527
528define arm_aapcs_vfpcc <4 x i32> @test_vrshrq_n_u32(<4 x i32> %a) {
529; CHECK-LABEL: test_vrshrq_n_u32:
530; CHECK:       @ %bb.0: @ %entry
531; CHECK-NEXT:    vrshr.u32 q0, q0, #20
532; CHECK-NEXT:    bx lr
533entry:
534  %0 = tail call <4 x i32> @llvm.arm.mve.vrshr.imm.v4i32(<4 x i32> %a, i32 20, i32 1)
535  ret <4 x i32> %0
536}
537
538define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_m_n_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
539; CHECK-LABEL: test_vqshlq_m_n_s8:
540; CHECK:       @ %bb.0: @ %entry
541; CHECK-NEXT:    vmsr p0, r0
542; CHECK-NEXT:    vpst
543; CHECK-NEXT:    vqshlt.s8 q0, q1, #6
544; CHECK-NEXT:    bx lr
545entry:
546  %0 = zext i16 %p to i32
547  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
548  %2 = tail call <16 x i8> @llvm.arm.mve.vqshl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 6, i32 0, <16 x i1> %1, <16 x i8> %inactive)
549  ret <16 x i8> %2
550}
551
552define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_m_n_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
553; CHECK-LABEL: test_vqshlq_m_n_s16:
554; CHECK:       @ %bb.0: @ %entry
555; CHECK-NEXT:    vmsr p0, r0
556; CHECK-NEXT:    vpst
557; CHECK-NEXT:    vqshlt.s16 q0, q1, #13
558; CHECK-NEXT:    bx lr
559entry:
560  %0 = zext i16 %p to i32
561  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
562  %2 = tail call <8 x i16> @llvm.arm.mve.vqshl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 13, i32 0, <8 x i1> %1, <8 x i16> %inactive)
563  ret <8 x i16> %2
564}
565
566define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_m_n_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
567; CHECK-LABEL: test_vqshlq_m_n_s32:
568; CHECK:       @ %bb.0: @ %entry
569; CHECK-NEXT:    vmsr p0, r0
570; CHECK-NEXT:    vpst
571; CHECK-NEXT:    vqshlt.s32 q0, q1, #14
572; CHECK-NEXT:    bx lr
573entry:
574  %0 = zext i16 %p to i32
575  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
576  %2 = tail call <4 x i32> @llvm.arm.mve.vqshl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 14, i32 0, <4 x i1> %1, <4 x i32> %inactive)
577  ret <4 x i32> %2
578}
579
580define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_m_n_u8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
581; CHECK-LABEL: test_vqshlq_m_n_u8:
582; CHECK:       @ %bb.0: @ %entry
583; CHECK-NEXT:    vmsr p0, r0
584; CHECK-NEXT:    vpst
585; CHECK-NEXT:    vqshlt.u8 q0, q1, #4
586; CHECK-NEXT:    bx lr
587entry:
588  %0 = zext i16 %p to i32
589  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
590  %2 = tail call <16 x i8> @llvm.arm.mve.vqshl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 4, i32 1, <16 x i1> %1, <16 x i8> %inactive)
591  ret <16 x i8> %2
592}
593
594define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_m_n_u16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
595; CHECK-LABEL: test_vqshlq_m_n_u16:
596; CHECK:       @ %bb.0: @ %entry
597; CHECK-NEXT:    vmsr p0, r0
598; CHECK-NEXT:    vpst
599; CHECK-NEXT:    vqshlt.u16 q0, q1, #9
600; CHECK-NEXT:    bx lr
601entry:
602  %0 = zext i16 %p to i32
603  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
604  %2 = tail call <8 x i16> @llvm.arm.mve.vqshl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 9, i32 1, <8 x i1> %1, <8 x i16> %inactive)
605  ret <8 x i16> %2
606}
607
608define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_m_n_u32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
609; CHECK-LABEL: test_vqshlq_m_n_u32:
610; CHECK:       @ %bb.0: @ %entry
611; CHECK-NEXT:    vmsr p0, r0
612; CHECK-NEXT:    vpst
613; CHECK-NEXT:    vqshlt.u32 q0, q1, #25
614; CHECK-NEXT:    bx lr
615entry:
616  %0 = zext i16 %p to i32
617  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
618  %2 = tail call <4 x i32> @llvm.arm.mve.vqshl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 25, i32 1, <4 x i1> %1, <4 x i32> %inactive)
619  ret <4 x i32> %2
620}
621
622define arm_aapcs_vfpcc <16 x i8> @test_vqshluq_m_n_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
623; CHECK-LABEL: test_vqshluq_m_n_s8:
624; CHECK:       @ %bb.0: @ %entry
625; CHECK-NEXT:    vmsr p0, r0
626; CHECK-NEXT:    vpst
627; CHECK-NEXT:    vqshlut.s8 q0, q1, #2
628; CHECK-NEXT:    bx lr
629entry:
630  %0 = zext i16 %p to i32
631  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
632  %2 = tail call <16 x i8> @llvm.arm.mve.vqshlu.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 2, <16 x i1> %1, <16 x i8> %inactive)
633  ret <16 x i8> %2
634}
635
636define arm_aapcs_vfpcc <8 x i16> @test_vqshluq_m_n_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
637; CHECK-LABEL: test_vqshluq_m_n_s16:
638; CHECK:       @ %bb.0: @ %entry
639; CHECK-NEXT:    vmsr p0, r0
640; CHECK-NEXT:    vpst
641; CHECK-NEXT:    vqshlut.s16 q0, q1, #12
642; CHECK-NEXT:    bx lr
643entry:
644  %0 = zext i16 %p to i32
645  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
646  %2 = tail call <8 x i16> @llvm.arm.mve.vqshlu.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 12, <8 x i1> %1, <8 x i16> %inactive)
647  ret <8 x i16> %2
648}
649
650define arm_aapcs_vfpcc <4 x i32> @test_vqshluq_m_n_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
651; CHECK-LABEL: test_vqshluq_m_n_s32:
652; CHECK:       @ %bb.0: @ %entry
653; CHECK-NEXT:    vmsr p0, r0
654; CHECK-NEXT:    vpst
655; CHECK-NEXT:    vqshlut.s32 q0, q1, #24
656; CHECK-NEXT:    bx lr
657entry:
658  %0 = zext i16 %p to i32
659  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
660  %2 = tail call <4 x i32> @llvm.arm.mve.vqshlu.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 24, <4 x i1> %1, <4 x i32> %inactive)
661  ret <4 x i32> %2
662}
663
664define arm_aapcs_vfpcc <16 x i8> @test_vrshrq_m_n_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
665; CHECK-LABEL: test_vrshrq_m_n_s8:
666; CHECK:       @ %bb.0: @ %entry
667; CHECK-NEXT:    vmsr p0, r0
668; CHECK-NEXT:    vpst
669; CHECK-NEXT:    vrshrt.s8 q0, q1, #2
670; CHECK-NEXT:    bx lr
671entry:
672  %0 = zext i16 %p to i32
673  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
674  %2 = tail call <16 x i8> @llvm.arm.mve.vrshr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 2, i32 0, <16 x i1> %1, <16 x i8> %inactive)
675  ret <16 x i8> %2
676}
677
678define arm_aapcs_vfpcc <8 x i16> @test_vrshrq_m_n_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
679; CHECK-LABEL: test_vrshrq_m_n_s16:
680; CHECK:       @ %bb.0: @ %entry
681; CHECK-NEXT:    vmsr p0, r0
682; CHECK-NEXT:    vpst
683; CHECK-NEXT:    vrshrt.s16 q0, q1, #11
684; CHECK-NEXT:    bx lr
685entry:
686  %0 = zext i16 %p to i32
687  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
688  %2 = tail call <8 x i16> @llvm.arm.mve.vrshr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 11, i32 0, <8 x i1> %1, <8 x i16> %inactive)
689  ret <8 x i16> %2
690}
691
692define arm_aapcs_vfpcc <4 x i32> @test_vrshrq_m_n_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
693; CHECK-LABEL: test_vrshrq_m_n_s32:
694; CHECK:       @ %bb.0: @ %entry
695; CHECK-NEXT:    vmsr p0, r0
696; CHECK-NEXT:    vpst
697; CHECK-NEXT:    vrshrt.s32 q0, q1, #24
698; CHECK-NEXT:    bx lr
699entry:
700  %0 = zext i16 %p to i32
701  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
702  %2 = tail call <4 x i32> @llvm.arm.mve.vrshr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 24, i32 0, <4 x i1> %1, <4 x i32> %inactive)
703  ret <4 x i32> %2
704}
705
706define arm_aapcs_vfpcc <16 x i8> @test_vrshrq_m_n_u8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
707; CHECK-LABEL: test_vrshrq_m_n_u8:
708; CHECK:       @ %bb.0: @ %entry
709; CHECK-NEXT:    vmsr p0, r0
710; CHECK-NEXT:    vpst
711; CHECK-NEXT:    vrshrt.u8 q0, q1, #7
712; CHECK-NEXT:    bx lr
713entry:
714  %0 = zext i16 %p to i32
715  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
716  %2 = tail call <16 x i8> @llvm.arm.mve.vrshr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 7, i32 1, <16 x i1> %1, <16 x i8> %inactive)
717  ret <16 x i8> %2
718}
719
720define arm_aapcs_vfpcc <8 x i16> @test_vrshrq_m_n_u16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
721; CHECK-LABEL: test_vrshrq_m_n_u16:
722; CHECK:       @ %bb.0: @ %entry
723; CHECK-NEXT:    vmsr p0, r0
724; CHECK-NEXT:    vpst
725; CHECK-NEXT:    vrshrt.u16 q0, q1, #4
726; CHECK-NEXT:    bx lr
727entry:
728  %0 = zext i16 %p to i32
729  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
730  %2 = tail call <8 x i16> @llvm.arm.mve.vrshr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 4, i32 1, <8 x i1> %1, <8 x i16> %inactive)
731  ret <8 x i16> %2
732}
733
734define arm_aapcs_vfpcc <4 x i32> @test_vrshrq_m_n_u32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
735; CHECK-LABEL: test_vrshrq_m_n_u32:
736; CHECK:       @ %bb.0: @ %entry
737; CHECK-NEXT:    vmsr p0, r0
738; CHECK-NEXT:    vpst
739; CHECK-NEXT:    vrshrt.u32 q0, q1, #27
740; CHECK-NEXT:    bx lr
741entry:
742  %0 = zext i16 %p to i32
743  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
744  %2 = tail call <4 x i32> @llvm.arm.mve.vrshr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 27, i32 1, <4 x i1> %1, <4 x i32> %inactive)
745  ret <4 x i32> %2
746}
747
748define arm_aapcs_vfpcc <16 x i8> @test_vrshrq_x_n_s8(<16 x i8> %a, i16 zeroext %p) {
749; CHECK-LABEL: test_vrshrq_x_n_s8:
750; CHECK:       @ %bb.0: @ %entry
751; CHECK-NEXT:    vmsr p0, r0
752; CHECK-NEXT:    vpst
753; CHECK-NEXT:    vrshrt.s8 q0, q0, #3
754; CHECK-NEXT:    bx lr
755entry:
756  %0 = zext i16 %p to i32
757  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
758  %2 = tail call <16 x i8> @llvm.arm.mve.vrshr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 3, i32 0, <16 x i1> %1, <16 x i8> undef)
759  ret <16 x i8> %2
760}
761
762define arm_aapcs_vfpcc <8 x i16> @test_vrshrq_x_n_s16(<8 x i16> %a, i16 zeroext %p) {
763; CHECK-LABEL: test_vrshrq_x_n_s16:
764; CHECK:       @ %bb.0: @ %entry
765; CHECK-NEXT:    vmsr p0, r0
766; CHECK-NEXT:    vpst
767; CHECK-NEXT:    vrshrt.s16 q0, q0, #12
768; CHECK-NEXT:    bx lr
769entry:
770  %0 = zext i16 %p to i32
771  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
772  %2 = tail call <8 x i16> @llvm.arm.mve.vrshr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 12, i32 0, <8 x i1> %1, <8 x i16> undef)
773  ret <8 x i16> %2
774}
775
776define arm_aapcs_vfpcc <4 x i32> @test_vrshrq_x_n_s32(<4 x i32> %a, i16 zeroext %p) {
777; CHECK-LABEL: test_vrshrq_x_n_s32:
778; CHECK:       @ %bb.0: @ %entry
779; CHECK-NEXT:    vmsr p0, r0
780; CHECK-NEXT:    vpst
781; CHECK-NEXT:    vrshrt.s32 q0, q0, #20
782; CHECK-NEXT:    bx lr
783entry:
784  %0 = zext i16 %p to i32
785  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
786  %2 = tail call <4 x i32> @llvm.arm.mve.vrshr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 20, i32 0, <4 x i1> %1, <4 x i32> undef)
787  ret <4 x i32> %2
788}
789
790define arm_aapcs_vfpcc <16 x i8> @test_vrshrq_x_n_u8(<16 x i8> %a, i16 zeroext %p) {
791; CHECK-LABEL: test_vrshrq_x_n_u8:
792; CHECK:       @ %bb.0: @ %entry
793; CHECK-NEXT:    vmsr p0, r0
794; CHECK-NEXT:    vpst
795; CHECK-NEXT:    vrshrt.u8 q0, q0, #1
796; CHECK-NEXT:    bx lr
797entry:
798  %0 = zext i16 %p to i32
799  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
800  %2 = tail call <16 x i8> @llvm.arm.mve.vrshr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 1, i32 1, <16 x i1> %1, <16 x i8> undef)
801  ret <16 x i8> %2
802}
803
804define arm_aapcs_vfpcc <8 x i16> @test_vrshrq_x_n_u16(<8 x i16> %a, i16 zeroext %p) {
805; CHECK-LABEL: test_vrshrq_x_n_u16:
806; CHECK:       @ %bb.0: @ %entry
807; CHECK-NEXT:    vmsr p0, r0
808; CHECK-NEXT:    vpst
809; CHECK-NEXT:    vrshrt.u16 q0, q0, #13
810; CHECK-NEXT:    bx lr
811entry:
812  %0 = zext i16 %p to i32
813  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
814  %2 = tail call <8 x i16> @llvm.arm.mve.vrshr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 13, i32 1, <8 x i1> %1, <8 x i16> undef)
815  ret <8 x i16> %2
816}
817
818define arm_aapcs_vfpcc <4 x i32> @test_vrshrq_x_n_u32(<4 x i32> %a, i16 zeroext %p) {
819; CHECK-LABEL: test_vrshrq_x_n_u32:
820; CHECK:       @ %bb.0: @ %entry
821; CHECK-NEXT:    vmsr p0, r0
822; CHECK-NEXT:    vpst
823; CHECK-NEXT:    vrshrt.u32 q0, q0, #6
824; CHECK-NEXT:    bx lr
825entry:
826  %0 = zext i16 %p to i32
827  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
828  %2 = tail call <4 x i32> @llvm.arm.mve.vrshr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 6, i32 1, <4 x i1> %1, <4 x i32> undef)
829  ret <4 x i32> %2
830}
831
832define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_n_s8(<16 x i8> %a) {
833; CHECK-LABEL: test_vshllbq_n_s8:
834; CHECK:       @ %bb.0: @ %entry
835; CHECK-NEXT:    vshllb.s8 q0, q0, #2
836; CHECK-NEXT:    bx lr
837entry:
838  %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 2, i32 0, i32 0)
839  ret <8 x i16> %0
840}
841
842define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_n_s8_lanewidth(<16 x i8> %a) {
843; CHECK-LABEL: test_vshllbq_n_s8_lanewidth:
844; CHECK:       @ %bb.0: @ %entry
845; CHECK-NEXT:    vshllb.s8 q0, q0, #8
846; CHECK-NEXT:    bx lr
847entry:
848  %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 8, i32 0, i32 0)
849  ret <8 x i16> %0
850}
851
852define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_n_s16(<8 x i16> %a) {
853; CHECK-LABEL: test_vshllbq_n_s16:
854; CHECK:       @ %bb.0: @ %entry
855; CHECK-NEXT:    vshllb.s16 q0, q0, #13
856; CHECK-NEXT:    bx lr
857entry:
858  %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 13, i32 0, i32 0)
859  ret <4 x i32> %0
860}
861
862define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_n_s16_lanewidth(<8 x i16> %a) {
863; CHECK-LABEL: test_vshllbq_n_s16_lanewidth:
864; CHECK:       @ %bb.0: @ %entry
865; CHECK-NEXT:    vshllb.s16 q0, q0, #16
866; CHECK-NEXT:    bx lr
867entry:
868  %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 16, i32 0, i32 0)
869  ret <4 x i32> %0
870}
871
872define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_n_u8(<16 x i8> %a) {
873; CHECK-LABEL: test_vshllbq_n_u8:
874; CHECK:       @ %bb.0: @ %entry
875; CHECK-NEXT:    vshllb.u8 q0, q0, #5
876; CHECK-NEXT:    bx lr
877entry:
878  %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 5, i32 1, i32 0)
879  ret <8 x i16> %0
880}
881
882define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_n_u8_lanewidth(<16 x i8> %a) {
883; CHECK-LABEL: test_vshllbq_n_u8_lanewidth:
884; CHECK:       @ %bb.0: @ %entry
885; CHECK-NEXT:    vshllb.u8 q0, q0, #8
886; CHECK-NEXT:    bx lr
887entry:
888  %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 8, i32 1, i32 0)
889  ret <8 x i16> %0
890}
891
892define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_n_u16(<8 x i16> %a) {
893; CHECK-LABEL: test_vshllbq_n_u16:
894; CHECK:       @ %bb.0: @ %entry
895; CHECK-NEXT:    vshllb.u16 q0, q0, #6
896; CHECK-NEXT:    bx lr
897entry:
898  %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 6, i32 1, i32 0)
899  ret <4 x i32> %0
900}
901
902define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_n_u16_lanewidth(<8 x i16> %a) {
903; CHECK-LABEL: test_vshllbq_n_u16_lanewidth:
904; CHECK:       @ %bb.0: @ %entry
905; CHECK-NEXT:    vshllb.u16 q0, q0, #16
906; CHECK-NEXT:    bx lr
907entry:
908  %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 16, i32 1, i32 0)
909  ret <4 x i32> %0
910}
911
912define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_n_s8(<16 x i8> %a) {
913; CHECK-LABEL: test_vshlltq_n_s8:
914; CHECK:       @ %bb.0: @ %entry
915; CHECK-NEXT:    vshllt.s8 q0, q0, #7
916; CHECK-NEXT:    bx lr
917entry:
918  %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 7, i32 0, i32 1)
919  ret <8 x i16> %0
920}
921
922define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_n_s8_lanewidth(<16 x i8> %a) {
923; CHECK-LABEL: test_vshlltq_n_s8_lanewidth:
924; CHECK:       @ %bb.0: @ %entry
925; CHECK-NEXT:    vshllt.s8 q0, q0, #8
926; CHECK-NEXT:    bx lr
927entry:
928  %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 8, i32 0, i32 1)
929  ret <8 x i16> %0
930}
931
932define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_n_s16(<8 x i16> %a) {
933; CHECK-LABEL: test_vshlltq_n_s16:
934; CHECK:       @ %bb.0: @ %entry
935; CHECK-NEXT:    vshllt.s16 q0, q0, #2
936; CHECK-NEXT:    bx lr
937entry:
938  %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 2, i32 0, i32 1)
939  ret <4 x i32> %0
940}
941
942define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_n_s16_lanewidth(<8 x i16> %a) {
943; CHECK-LABEL: test_vshlltq_n_s16_lanewidth:
944; CHECK:       @ %bb.0: @ %entry
945; CHECK-NEXT:    vshllt.s16 q0, q0, #16
946; CHECK-NEXT:    bx lr
947entry:
948  %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 16, i32 0, i32 1)
949  ret <4 x i32> %0
950}
951
952define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_n_u8(<16 x i8> %a) {
953; CHECK-LABEL: test_vshlltq_n_u8:
954; CHECK:       @ %bb.0: @ %entry
955; CHECK-NEXT:    vshllt.u8 q0, q0, #7
956; CHECK-NEXT:    bx lr
957entry:
958  %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 7, i32 1, i32 1)
959  ret <8 x i16> %0
960}
961
962define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_n_u8_lanewidth(<16 x i8> %a) {
963; CHECK-LABEL: test_vshlltq_n_u8_lanewidth:
964; CHECK:       @ %bb.0: @ %entry
965; CHECK-NEXT:    vshllt.u8 q0, q0, #8
966; CHECK-NEXT:    bx lr
967entry:
968  %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 8, i32 1, i32 1)
969  ret <8 x i16> %0
970}
971
972define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_n_u16(<8 x i16> %a) {
973; CHECK-LABEL: test_vshlltq_n_u16:
974; CHECK:       @ %bb.0: @ %entry
975; CHECK-NEXT:    vshllt.u16 q0, q0, #14
976; CHECK-NEXT:    bx lr
977entry:
978  %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 14, i32 1, i32 1)
979  ret <4 x i32> %0
980}
981
982define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_n_u16_lanewidth(<8 x i16> %a) {
983; CHECK-LABEL: test_vshlltq_n_u16_lanewidth:
984; CHECK:       @ %bb.0: @ %entry
985; CHECK-NEXT:    vshllt.u16 q0, q0, #16
986; CHECK-NEXT:    bx lr
987entry:
988  %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 16, i32 1, i32 1)
989  ret <4 x i32> %0
990}
991
992define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_m_n_s8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
993; CHECK-LABEL: test_vshllbq_m_n_s8:
994; CHECK:       @ %bb.0: @ %entry
995; CHECK-NEXT:    vmsr p0, r0
996; CHECK-NEXT:    vpst
997; CHECK-NEXT:    vshllbt.s8 q0, q1, #6
998; CHECK-NEXT:    bx lr
999entry:
1000  %0 = zext i16 %p to i32
1001  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1002  %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 6, i32 0, i32 0, <8 x i1> %1, <8 x i16> %inactive)
1003  ret <8 x i16> %2
1004}
1005
1006define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_m_n_s8_lanewidth(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
1007; CHECK-LABEL: test_vshllbq_m_n_s8_lanewidth:
1008; CHECK:       @ %bb.0: @ %entry
1009; CHECK-NEXT:    vmsr p0, r0
1010; CHECK-NEXT:    vpst
1011; CHECK-NEXT:    vshllbt.s8 q0, q1, #8
1012; CHECK-NEXT:    bx lr
1013entry:
1014  %0 = zext i16 %p to i32
1015  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1016  %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 8, i32 0, i32 0, <8 x i1> %1, <8 x i16> %inactive)
1017  ret <8 x i16> %2
1018}
1019
1020define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_m_n_s16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
1021; CHECK-LABEL: test_vshllbq_m_n_s16:
1022; CHECK:       @ %bb.0: @ %entry
1023; CHECK-NEXT:    vmsr p0, r0
1024; CHECK-NEXT:    vpst
1025; CHECK-NEXT:    vshllbt.s16 q0, q1, #10
1026; CHECK-NEXT:    bx lr
1027entry:
1028  %0 = zext i16 %p to i32
1029  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1030  %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 10, i32 0, i32 0, <4 x i1> %1, <4 x i32> %inactive)
1031  ret <4 x i32> %2
1032}
1033
1034define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_m_n_s16_lanewidth(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
1035; CHECK-LABEL: test_vshllbq_m_n_s16_lanewidth:
1036; CHECK:       @ %bb.0: @ %entry
1037; CHECK-NEXT:    vmsr p0, r0
1038; CHECK-NEXT:    vpst
1039; CHECK-NEXT:    vshllbt.s16 q0, q1, #16
1040; CHECK-NEXT:    bx lr
1041entry:
1042  %0 = zext i16 %p to i32
1043  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1044  %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 16, i32 0, i32 0, <4 x i1> %1, <4 x i32> %inactive)
1045  ret <4 x i32> %2
1046}
1047
1048define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_m_n_u8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
1049; CHECK-LABEL: test_vshllbq_m_n_u8:
1050; CHECK:       @ %bb.0: @ %entry
1051; CHECK-NEXT:    vmsr p0, r0
1052; CHECK-NEXT:    vpst
1053; CHECK-NEXT:    vshllbt.u8 q0, q1, #3
1054; CHECK-NEXT:    bx lr
1055entry:
1056  %0 = zext i16 %p to i32
1057  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1058  %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 3, i32 1, i32 0, <8 x i1> %1, <8 x i16> %inactive)
1059  ret <8 x i16> %2
1060}
1061
1062define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_m_n_u8_lanewidth(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
1063; CHECK-LABEL: test_vshllbq_m_n_u8_lanewidth:
1064; CHECK:       @ %bb.0: @ %entry
1065; CHECK-NEXT:    vmsr p0, r0
1066; CHECK-NEXT:    vpst
1067; CHECK-NEXT:    vshllbt.u8 q0, q1, #8
1068; CHECK-NEXT:    bx lr
1069entry:
1070  %0 = zext i16 %p to i32
1071  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1072  %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 8, i32 1, i32 0, <8 x i1> %1, <8 x i16> %inactive)
1073  ret <8 x i16> %2
1074}
1075
1076define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_m_n_u16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
1077; CHECK-LABEL: test_vshllbq_m_n_u16:
1078; CHECK:       @ %bb.0: @ %entry
1079; CHECK-NEXT:    vmsr p0, r0
1080; CHECK-NEXT:    vpst
1081; CHECK-NEXT:    vshllbt.u16 q0, q1, #14
1082; CHECK-NEXT:    bx lr
1083entry:
1084  %0 = zext i16 %p to i32
1085  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1086  %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 14, i32 1, i32 0, <4 x i1> %1, <4 x i32> %inactive)
1087  ret <4 x i32> %2
1088}
1089
1090define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_m_n_u16_lanewidth(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
1091; CHECK-LABEL: test_vshllbq_m_n_u16_lanewidth:
1092; CHECK:       @ %bb.0: @ %entry
1093; CHECK-NEXT:    vmsr p0, r0
1094; CHECK-NEXT:    vpst
1095; CHECK-NEXT:    vshllbt.u16 q0, q1, #16
1096; CHECK-NEXT:    bx lr
1097entry:
1098  %0 = zext i16 %p to i32
1099  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1100  %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 16, i32 1, i32 0, <4 x i1> %1, <4 x i32> %inactive)
1101  ret <4 x i32> %2
1102}
1103
1104define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_m_n_s8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
1105; CHECK-LABEL: test_vshlltq_m_n_s8:
1106; CHECK:       @ %bb.0: @ %entry
1107; CHECK-NEXT:    vmsr p0, r0
1108; CHECK-NEXT:    vpst
1109; CHECK-NEXT:    vshlltt.s8 q0, q1, #4
1110; CHECK-NEXT:    bx lr
1111entry:
1112  %0 = zext i16 %p to i32
1113  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1114  %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 4, i32 0, i32 1, <8 x i1> %1, <8 x i16> %inactive)
1115  ret <8 x i16> %2
1116}
1117
1118define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_m_n_s8_lanewidth(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
1119; CHECK-LABEL: test_vshlltq_m_n_s8_lanewidth:
1120; CHECK:       @ %bb.0: @ %entry
1121; CHECK-NEXT:    vmsr p0, r0
1122; CHECK-NEXT:    vpst
1123; CHECK-NEXT:    vshlltt.s8 q0, q1, #8
1124; CHECK-NEXT:    bx lr
1125entry:
1126  %0 = zext i16 %p to i32
1127  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1128  %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 8, i32 0, i32 1, <8 x i1> %1, <8 x i16> %inactive)
1129  ret <8 x i16> %2
1130}
1131
1132define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_m_n_s16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
1133; CHECK-LABEL: test_vshlltq_m_n_s16:
1134; CHECK:       @ %bb.0: @ %entry
1135; CHECK-NEXT:    vmsr p0, r0
1136; CHECK-NEXT:    vpst
1137; CHECK-NEXT:    vshlltt.s16 q0, q1, #12
1138; CHECK-NEXT:    bx lr
1139entry:
1140  %0 = zext i16 %p to i32
1141  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1142  %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 12, i32 0, i32 1, <4 x i1> %1, <4 x i32> %inactive)
1143  ret <4 x i32> %2
1144}
1145
1146define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_m_n_s16_lanewidth(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
1147; CHECK-LABEL: test_vshlltq_m_n_s16_lanewidth:
1148; CHECK:       @ %bb.0: @ %entry
1149; CHECK-NEXT:    vmsr p0, r0
1150; CHECK-NEXT:    vpst
1151; CHECK-NEXT:    vshlltt.s16 q0, q1, #16
1152; CHECK-NEXT:    bx lr
1153entry:
1154  %0 = zext i16 %p to i32
1155  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1156  %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 16, i32 0, i32 1, <4 x i1> %1, <4 x i32> %inactive)
1157  ret <4 x i32> %2
1158}
1159
1160define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_m_n_u8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
1161; CHECK-LABEL: test_vshlltq_m_n_u8:
1162; CHECK:       @ %bb.0: @ %entry
1163; CHECK-NEXT:    vmsr p0, r0
1164; CHECK-NEXT:    vpst
1165; CHECK-NEXT:    vshlltt.u8 q0, q1, #2
1166; CHECK-NEXT:    bx lr
1167entry:
1168  %0 = zext i16 %p to i32
1169  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1170  %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 2, i32 1, i32 1, <8 x i1> %1, <8 x i16> %inactive)
1171  ret <8 x i16> %2
1172}
1173
1174define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_m_n_u8_lanewidth(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
1175; CHECK-LABEL: test_vshlltq_m_n_u8_lanewidth:
1176; CHECK:       @ %bb.0: @ %entry
1177; CHECK-NEXT:    vmsr p0, r0
1178; CHECK-NEXT:    vpst
1179; CHECK-NEXT:    vshlltt.u8 q0, q1, #8
1180; CHECK-NEXT:    bx lr
1181entry:
1182  %0 = zext i16 %p to i32
1183  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1184  %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 8, i32 1, i32 1, <8 x i1> %1, <8 x i16> %inactive)
1185  ret <8 x i16> %2
1186}
1187
1188define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_m_n_u16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
1189; CHECK-LABEL: test_vshlltq_m_n_u16:
1190; CHECK:       @ %bb.0: @ %entry
1191; CHECK-NEXT:    vmsr p0, r0
1192; CHECK-NEXT:    vpst
1193; CHECK-NEXT:    vshlltt.u16 q0, q1, #9
1194; CHECK-NEXT:    bx lr
1195entry:
1196  %0 = zext i16 %p to i32
1197  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1198  %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 9, i32 1, i32 1, <4 x i1> %1, <4 x i32> %inactive)
1199  ret <4 x i32> %2
1200}
1201
1202define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_m_n_u16_lanewidth(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
1203; CHECK-LABEL: test_vshlltq_m_n_u16_lanewidth:
1204; CHECK:       @ %bb.0: @ %entry
1205; CHECK-NEXT:    vmsr p0, r0
1206; CHECK-NEXT:    vpst
1207; CHECK-NEXT:    vshlltt.u16 q0, q1, #16
1208; CHECK-NEXT:    bx lr
1209entry:
1210  %0 = zext i16 %p to i32
1211  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1212  %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 16, i32 1, i32 1, <4 x i1> %1, <4 x i32> %inactive)
1213  ret <4 x i32> %2
1214}
1215
1216define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_x_n_s8(<16 x i8> %a, i16 zeroext %p) {
1217; CHECK-LABEL: test_vshllbq_x_n_s8:
1218; CHECK:       @ %bb.0: @ %entry
1219; CHECK-NEXT:    vmsr p0, r0
1220; CHECK-NEXT:    vpst
1221; CHECK-NEXT:    vshllbt.s8 q0, q0, #1
1222; CHECK-NEXT:    bx lr
1223entry:
1224  %0 = zext i16 %p to i32
1225  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1226  %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 1, i32 0, i32 0, <8 x i1> %1, <8 x i16> undef)
1227  ret <8 x i16> %2
1228}
1229
1230define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_x_n_s8_lanewidth(<16 x i8> %a, i16 zeroext %p) {
1231; CHECK-LABEL: test_vshllbq_x_n_s8_lanewidth:
1232; CHECK:       @ %bb.0: @ %entry
1233; CHECK-NEXT:    vmsr p0, r0
1234; CHECK-NEXT:    vpst
1235; CHECK-NEXT:    vshllbt.s8 q0, q0, #8
1236; CHECK-NEXT:    bx lr
1237entry:
1238  %0 = zext i16 %p to i32
1239  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1240  %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 8, i32 0, i32 0, <8 x i1> %1, <8 x i16> undef)
1241  ret <8 x i16> %2
1242}
1243
1244define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_x_n_s16(<8 x i16> %a, i16 zeroext %p) {
1245; CHECK-LABEL: test_vshllbq_x_n_s16:
1246; CHECK:       @ %bb.0: @ %entry
1247; CHECK-NEXT:    vmsr p0, r0
1248; CHECK-NEXT:    vpst
1249; CHECK-NEXT:    vshllbt.s16 q0, q0, #10
1250; CHECK-NEXT:    bx lr
1251entry:
1252  %0 = zext i16 %p to i32
1253  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1254  %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 10, i32 0, i32 0, <4 x i1> %1, <4 x i32> undef)
1255  ret <4 x i32> %2
1256}
1257
1258define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_x_n_s16_lanewidth(<8 x i16> %a, i16 zeroext %p) {
1259; CHECK-LABEL: test_vshllbq_x_n_s16_lanewidth:
1260; CHECK:       @ %bb.0: @ %entry
1261; CHECK-NEXT:    vmsr p0, r0
1262; CHECK-NEXT:    vpst
1263; CHECK-NEXT:    vshllbt.s16 q0, q0, #16
1264; CHECK-NEXT:    bx lr
1265entry:
1266  %0 = zext i16 %p to i32
1267  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1268  %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 16, i32 0, i32 0, <4 x i1> %1, <4 x i32> undef)
1269  ret <4 x i32> %2
1270}
1271
1272define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_x_n_u8(<16 x i8> %a, i16 zeroext %p) {
1273; CHECK-LABEL: test_vshllbq_x_n_u8:
1274; CHECK:       @ %bb.0: @ %entry
1275; CHECK-NEXT:    vmsr p0, r0
1276; CHECK-NEXT:    vpst
1277; CHECK-NEXT:    vshllbt.u8 q0, q0, #6
1278; CHECK-NEXT:    bx lr
1279entry:
1280  %0 = zext i16 %p to i32
1281  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1282  %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 6, i32 1, i32 0, <8 x i1> %1, <8 x i16> undef)
1283  ret <8 x i16> %2
1284}
1285
1286define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_x_n_u8_lanewidth(<16 x i8> %a, i16 zeroext %p) {
1287; CHECK-LABEL: test_vshllbq_x_n_u8_lanewidth:
1288; CHECK:       @ %bb.0: @ %entry
1289; CHECK-NEXT:    vmsr p0, r0
1290; CHECK-NEXT:    vpst
1291; CHECK-NEXT:    vshllbt.u8 q0, q0, #8
1292; CHECK-NEXT:    bx lr
1293entry:
1294  %0 = zext i16 %p to i32
1295  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1296  %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 8, i32 1, i32 0, <8 x i1> %1, <8 x i16> undef)
1297  ret <8 x i16> %2
1298}
1299
1300define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_x_n_u16(<8 x i16> %a, i16 zeroext %p) {
1301; CHECK-LABEL: test_vshllbq_x_n_u16:
1302; CHECK:       @ %bb.0: @ %entry
1303; CHECK-NEXT:    vmsr p0, r0
1304; CHECK-NEXT:    vpst
1305; CHECK-NEXT:    vshllbt.u16 q0, q0, #10
1306; CHECK-NEXT:    bx lr
1307entry:
1308  %0 = zext i16 %p to i32
1309  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1310  %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 10, i32 1, i32 0, <4 x i1> %1, <4 x i32> undef)
1311  ret <4 x i32> %2
1312}
1313
1314define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_x_n_u16_lanewidth(<8 x i16> %a, i16 zeroext %p) {
1315; CHECK-LABEL: test_vshllbq_x_n_u16_lanewidth:
1316; CHECK:       @ %bb.0: @ %entry
1317; CHECK-NEXT:    vmsr p0, r0
1318; CHECK-NEXT:    vpst
1319; CHECK-NEXT:    vshllbt.u16 q0, q0, #16
1320; CHECK-NEXT:    bx lr
1321entry:
1322  %0 = zext i16 %p to i32
1323  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1324  %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 16, i32 1, i32 0, <4 x i1> %1, <4 x i32> undef)
1325  ret <4 x i32> %2
1326}
1327
1328define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_x_n_s8(<16 x i8> %a, i16 zeroext %p) {
1329; CHECK-LABEL: test_vshlltq_x_n_s8:
1330; CHECK:       @ %bb.0: @ %entry
1331; CHECK-NEXT:    vmsr p0, r0
1332; CHECK-NEXT:    vpst
1333; CHECK-NEXT:    vshlltt.s8 q0, q0, #2
1334; CHECK-NEXT:    bx lr
1335entry:
1336  %0 = zext i16 %p to i32
1337  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1338  %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 2, i32 0, i32 1, <8 x i1> %1, <8 x i16> undef)
1339  ret <8 x i16> %2
1340}
1341
1342define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_x_n_s8_lanewidth(<16 x i8> %a, i16 zeroext %p) {
1343; CHECK-LABEL: test_vshlltq_x_n_s8_lanewidth:
1344; CHECK:       @ %bb.0: @ %entry
1345; CHECK-NEXT:    vmsr p0, r0
1346; CHECK-NEXT:    vpst
1347; CHECK-NEXT:    vshlltt.s8 q0, q0, #8
1348; CHECK-NEXT:    bx lr
1349entry:
1350  %0 = zext i16 %p to i32
1351  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1352  %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 8, i32 0, i32 1, <8 x i1> %1, <8 x i16> undef)
1353  ret <8 x i16> %2
1354}
1355
1356define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_x_n_s16(<8 x i16> %a, i16 zeroext %p) {
1357; CHECK-LABEL: test_vshlltq_x_n_s16:
1358; CHECK:       @ %bb.0: @ %entry
1359; CHECK-NEXT:    vmsr p0, r0
1360; CHECK-NEXT:    vpst
1361; CHECK-NEXT:    vshlltt.s16 q0, q0, #6
1362; CHECK-NEXT:    bx lr
1363entry:
1364  %0 = zext i16 %p to i32
1365  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1366  %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 6, i32 0, i32 1, <4 x i1> %1, <4 x i32> undef)
1367  ret <4 x i32> %2
1368}
1369
1370define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_x_n_s16_lanewidth(<8 x i16> %a, i16 zeroext %p) {
1371; CHECK-LABEL: test_vshlltq_x_n_s16_lanewidth:
1372; CHECK:       @ %bb.0: @ %entry
1373; CHECK-NEXT:    vmsr p0, r0
1374; CHECK-NEXT:    vpst
1375; CHECK-NEXT:    vshlltt.s16 q0, q0, #16
1376; CHECK-NEXT:    bx lr
1377entry:
1378  %0 = zext i16 %p to i32
1379  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1380  %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 16, i32 0, i32 1, <4 x i1> %1, <4 x i32> undef)
1381  ret <4 x i32> %2
1382}
1383
1384define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_x_n_u8(<16 x i8> %a, i16 zeroext %p) {
1385; CHECK-LABEL: test_vshlltq_x_n_u8:
1386; CHECK:       @ %bb.0: @ %entry
1387; CHECK-NEXT:    vmsr p0, r0
1388; CHECK-NEXT:    vpst
1389; CHECK-NEXT:    vshlltt.u8 q0, q0, #5
1390; CHECK-NEXT:    bx lr
1391entry:
1392  %0 = zext i16 %p to i32
1393  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1394  %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 5, i32 1, i32 1, <8 x i1> %1, <8 x i16> undef)
1395  ret <8 x i16> %2
1396}
1397
1398define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_x_n_u8_lanewidth(<16 x i8> %a, i16 zeroext %p) {
1399; CHECK-LABEL: test_vshlltq_x_n_u8_lanewidth:
1400; CHECK:       @ %bb.0: @ %entry
1401; CHECK-NEXT:    vmsr p0, r0
1402; CHECK-NEXT:    vpst
1403; CHECK-NEXT:    vshlltt.u8 q0, q0, #8
1404; CHECK-NEXT:    bx lr
1405entry:
1406  %0 = zext i16 %p to i32
1407  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1408  %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 8, i32 1, i32 1, <8 x i1> %1, <8 x i16> undef)
1409  ret <8 x i16> %2
1410}
1411
1412define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_x_n_u16(<8 x i16> %a, i16 zeroext %p) {
1413; CHECK-LABEL: test_vshlltq_x_n_u16:
1414; CHECK:       @ %bb.0: @ %entry
1415; CHECK-NEXT:    vmsr p0, r0
1416; CHECK-NEXT:    vpst
1417; CHECK-NEXT:    vshlltt.u16 q0, q0, #3
1418; CHECK-NEXT:    bx lr
1419entry:
1420  %0 = zext i16 %p to i32
1421  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1422  %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 3, i32 1, i32 1, <4 x i1> %1, <4 x i32> undef)
1423  ret <4 x i32> %2
1424}
1425
1426define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_x_n_u16_lanewidth(<8 x i16> %a, i16 zeroext %p) {
1427; CHECK-LABEL: test_vshlltq_x_n_u16_lanewidth:
1428; CHECK:       @ %bb.0: @ %entry
1429; CHECK-NEXT:    vmsr p0, r0
1430; CHECK-NEXT:    vpst
1431; CHECK-NEXT:    vshlltt.u16 q0, q0, #16
1432; CHECK-NEXT:    bx lr
1433entry:
1434  %0 = zext i16 %p to i32
1435  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1436  %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 16, i32 1, i32 1, <4 x i1> %1, <4 x i32> undef)
1437  ret <4 x i32> %2
1438}
1439
1440declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
1441declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
1442declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
1443
1444declare <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8>, i32, <16 x i1>, <16 x i8>)
1445declare <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16>, i32, <8 x i1>, <8 x i16>)
1446declare <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32>, i32, <4 x i1>, <4 x i32>)
1447
1448declare <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8>, i32, i32, <16 x i1>, <16 x i8>)
1449declare <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16>, i32, i32, <8 x i1>, <8 x i16>)
1450declare <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32>, i32, i32, <4 x i1>, <4 x i32>)
1451
1452declare <16 x i8> @llvm.arm.mve.vqshl.imm.v16i8(<16 x i8>, i32, i32)
1453declare <8 x i16> @llvm.arm.mve.vqshl.imm.v8i16(<8 x i16>, i32, i32)
1454declare <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32>, i32, i32)
1455declare <16 x i8> @llvm.arm.mve.vqshl.imm.predicated.v16i8.v16i1(<16 x i8>, i32, i32, <16 x i1>, <16 x i8>)
1456declare <8 x i16> @llvm.arm.mve.vqshl.imm.predicated.v8i16.v8i1(<8 x i16>, i32, i32, <8 x i1>, <8 x i16>)
1457declare <4 x i32> @llvm.arm.mve.vqshl.imm.predicated.v4i32.v4i1(<4 x i32>, i32, i32, <4 x i1>, <4 x i32>)
1458
1459declare <16 x i8> @llvm.arm.mve.vqshlu.imm.v16i8(<16 x i8>, i32)
1460declare <8 x i16> @llvm.arm.mve.vqshlu.imm.v8i16(<8 x i16>, i32)
1461declare <4 x i32> @llvm.arm.mve.vqshlu.imm.v4i32(<4 x i32>, i32)
1462declare <16 x i8> @llvm.arm.mve.vqshlu.imm.predicated.v16i8.v16i1(<16 x i8>, i32, <16 x i1>, <16 x i8>)
1463declare <8 x i16> @llvm.arm.mve.vqshlu.imm.predicated.v8i16.v8i1(<8 x i16>, i32, <8 x i1>, <8 x i16>)
1464declare <4 x i32> @llvm.arm.mve.vqshlu.imm.predicated.v4i32.v4i1(<4 x i32>, i32, <4 x i1>, <4 x i32>)
1465
1466declare <16 x i8> @llvm.arm.mve.vrshr.imm.v16i8(<16 x i8>, i32, i32)
1467declare <8 x i16> @llvm.arm.mve.vrshr.imm.v8i16(<8 x i16>, i32, i32)
1468declare <4 x i32> @llvm.arm.mve.vrshr.imm.v4i32(<4 x i32>, i32, i32)
1469declare <16 x i8> @llvm.arm.mve.vrshr.imm.predicated.v16i8.v16i1(<16 x i8>, i32, i32, <16 x i1>, <16 x i8>)
1470declare <8 x i16> @llvm.arm.mve.vrshr.imm.predicated.v8i16.v8i1(<8 x i16>, i32, i32, <8 x i1>, <8 x i16>)
1471declare <4 x i32> @llvm.arm.mve.vrshr.imm.predicated.v4i32.v4i1(<4 x i32>, i32, i32, <4 x i1>, <4 x i32>)
1472
1473declare <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8>, i32, i32, i32)
1474declare <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16>, i32, i32, i32)
1475declare <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8>, i32, i32, i32, <8 x i1>, <8 x i16>)
1476declare <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16>, i32, i32, i32, <4 x i1>, <4 x i32>)
1477