xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-intrinsics/load-store.ll (revision b5b663aac17415625340eb29c8010832bfc4c21c)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
3
4define arm_aapcs_vfpcc <8 x half> @test_vld1q_f16(ptr %base) {
5; CHECK-LABEL: test_vld1q_f16:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vldrh.u16 q0, [r0]
8; CHECK-NEXT:    bx lr
9entry:
10  %0 = load <8 x half>, ptr %base, align 2
11  ret <8 x half> %0
12}
13
14define arm_aapcs_vfpcc <4 x float> @test_vld1q_f32(ptr %base) {
15; CHECK-LABEL: test_vld1q_f32:
16; CHECK:       @ %bb.0: @ %entry
17; CHECK-NEXT:    vldrw.u32 q0, [r0]
18; CHECK-NEXT:    bx lr
19entry:
20  %0 = load <4 x float>, ptr %base, align 4
21  ret <4 x float> %0
22}
23
24define arm_aapcs_vfpcc <16 x i8> @test_vld1q_s8(ptr %base) {
25; CHECK-LABEL: test_vld1q_s8:
26; CHECK:       @ %bb.0: @ %entry
27; CHECK-NEXT:    vldrb.u8 q0, [r0]
28; CHECK-NEXT:    bx lr
29entry:
30  %0 = load <16 x i8>, ptr %base, align 1
31  ret <16 x i8> %0
32}
33
34define arm_aapcs_vfpcc <8 x i16> @test_vld1q_s16(ptr %base) {
35; CHECK-LABEL: test_vld1q_s16:
36; CHECK:       @ %bb.0: @ %entry
37; CHECK-NEXT:    vldrh.u16 q0, [r0]
38; CHECK-NEXT:    bx lr
39entry:
40  %0 = load <8 x i16>, ptr %base, align 2
41  ret <8 x i16> %0
42}
43
44define arm_aapcs_vfpcc <4 x i32> @test_vld1q_s32(ptr %base) {
45; CHECK-LABEL: test_vld1q_s32:
46; CHECK:       @ %bb.0: @ %entry
47; CHECK-NEXT:    vldrw.u32 q0, [r0]
48; CHECK-NEXT:    bx lr
49entry:
50  %0 = load <4 x i32>, ptr %base, align 4
51  ret <4 x i32> %0
52}
53
54define arm_aapcs_vfpcc <16 x i8> @test_vld1q_u8(ptr %base) {
55; CHECK-LABEL: test_vld1q_u8:
56; CHECK:       @ %bb.0: @ %entry
57; CHECK-NEXT:    vldrb.u8 q0, [r0]
58; CHECK-NEXT:    bx lr
59entry:
60  %0 = load <16 x i8>, ptr %base, align 1
61  ret <16 x i8> %0
62}
63
64define arm_aapcs_vfpcc <8 x i16> @test_vld1q_u16(ptr %base) {
65; CHECK-LABEL: test_vld1q_u16:
66; CHECK:       @ %bb.0: @ %entry
67; CHECK-NEXT:    vldrh.u16 q0, [r0]
68; CHECK-NEXT:    bx lr
69entry:
70  %0 = load <8 x i16>, ptr %base, align 2
71  ret <8 x i16> %0
72}
73
74define arm_aapcs_vfpcc <4 x i32> @test_vld1q_u32(ptr %base) {
75; CHECK-LABEL: test_vld1q_u32:
76; CHECK:       @ %bb.0: @ %entry
77; CHECK-NEXT:    vldrw.u32 q0, [r0]
78; CHECK-NEXT:    bx lr
79entry:
80  %0 = load <4 x i32>, ptr %base, align 4
81  ret <4 x i32> %0
82}
83
84define arm_aapcs_vfpcc <8 x half> @test_vld1q_z_f16(ptr %base, i16 zeroext %p) {
85; CHECK-LABEL: test_vld1q_z_f16:
86; CHECK:       @ %bb.0: @ %entry
87; CHECK-NEXT:    vmsr p0, r1
88; CHECK-NEXT:    vpst
89; CHECK-NEXT:    vldrht.u16 q0, [r0]
90; CHECK-NEXT:    bx lr
91entry:
92  %0 = zext i16 %p to i32
93  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
94  %2 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr %base, i32 2, <8 x i1> %1, <8 x half> zeroinitializer)
95  ret <8 x half> %2
96}
97
98declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
99
100declare <8 x half> @llvm.masked.load.v8f16.p0(ptr, i32 immarg, <8 x i1>, <8 x half>)
101
102define arm_aapcs_vfpcc <4 x float> @test_vld1q_z_f32(ptr %base, i16 zeroext %p) {
103; CHECK-LABEL: test_vld1q_z_f32:
104; CHECK:       @ %bb.0: @ %entry
105; CHECK-NEXT:    vmsr p0, r1
106; CHECK-NEXT:    vpst
107; CHECK-NEXT:    vldrwt.u32 q0, [r0]
108; CHECK-NEXT:    bx lr
109entry:
110  %0 = zext i16 %p to i32
111  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
112  %2 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %base, i32 4, <4 x i1> %1, <4 x float> zeroinitializer)
113  ret <4 x float> %2
114}
115
116declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
117
118declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32 immarg, <4 x i1>, <4 x float>)
119
120define arm_aapcs_vfpcc <16 x i8> @test_vld1q_z_s8(ptr %base, i16 zeroext %p) {
121; CHECK-LABEL: test_vld1q_z_s8:
122; CHECK:       @ %bb.0: @ %entry
123; CHECK-NEXT:    vmsr p0, r1
124; CHECK-NEXT:    vpst
125; CHECK-NEXT:    vldrbt.u8 q0, [r0]
126; CHECK-NEXT:    bx lr
127entry:
128  %0 = zext i16 %p to i32
129  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
130  %2 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %base, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer)
131  ret <16 x i8> %2
132}
133
134declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
135
136declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32 immarg, <16 x i1>, <16 x i8>)
137
138define arm_aapcs_vfpcc <8 x i16> @test_vld1q_z_s16(ptr %base, i16 zeroext %p) {
139; CHECK-LABEL: test_vld1q_z_s16:
140; CHECK:       @ %bb.0: @ %entry
141; CHECK-NEXT:    vmsr p0, r1
142; CHECK-NEXT:    vpst
143; CHECK-NEXT:    vldrht.u16 q0, [r0]
144; CHECK-NEXT:    bx lr
145entry:
146  %0 = zext i16 %p to i32
147  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
148  %2 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %base, i32 2, <8 x i1> %1, <8 x i16> zeroinitializer)
149  ret <8 x i16> %2
150}
151
152declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32 immarg, <8 x i1>, <8 x i16>)
153
154define arm_aapcs_vfpcc <4 x i32> @test_vld1q_z_s32(ptr %base, i16 zeroext %p) {
155; CHECK-LABEL: test_vld1q_z_s32:
156; CHECK:       @ %bb.0: @ %entry
157; CHECK-NEXT:    vmsr p0, r1
158; CHECK-NEXT:    vpst
159; CHECK-NEXT:    vldrwt.u32 q0, [r0]
160; CHECK-NEXT:    bx lr
161entry:
162  %0 = zext i16 %p to i32
163  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
164  %2 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %base, i32 4, <4 x i1> %1, <4 x i32> zeroinitializer)
165  ret <4 x i32> %2
166}
167
168declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>)
169
170define arm_aapcs_vfpcc <16 x i8> @test_vld1q_z_u8(ptr %base, i16 zeroext %p) {
171; CHECK-LABEL: test_vld1q_z_u8:
172; CHECK:       @ %bb.0: @ %entry
173; CHECK-NEXT:    vmsr p0, r1
174; CHECK-NEXT:    vpst
175; CHECK-NEXT:    vldrbt.u8 q0, [r0]
176; CHECK-NEXT:    bx lr
177entry:
178  %0 = zext i16 %p to i32
179  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
180  %2 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %base, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer)
181  ret <16 x i8> %2
182}
183
184define arm_aapcs_vfpcc <8 x i16> @test_vld1q_z_u16(ptr %base, i16 zeroext %p) {
185; CHECK-LABEL: test_vld1q_z_u16:
186; CHECK:       @ %bb.0: @ %entry
187; CHECK-NEXT:    vmsr p0, r1
188; CHECK-NEXT:    vpst
189; CHECK-NEXT:    vldrht.u16 q0, [r0]
190; CHECK-NEXT:    bx lr
191entry:
192  %0 = zext i16 %p to i32
193  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
194  %2 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %base, i32 2, <8 x i1> %1, <8 x i16> zeroinitializer)
195  ret <8 x i16> %2
196}
197
198define arm_aapcs_vfpcc <4 x i32> @test_vld1q_z_u32(ptr %base, i16 zeroext %p) {
199; CHECK-LABEL: test_vld1q_z_u32:
200; CHECK:       @ %bb.0: @ %entry
201; CHECK-NEXT:    vmsr p0, r1
202; CHECK-NEXT:    vpst
203; CHECK-NEXT:    vldrwt.u32 q0, [r0]
204; CHECK-NEXT:    bx lr
205entry:
206  %0 = zext i16 %p to i32
207  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
208  %2 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %base, i32 4, <4 x i1> %1, <4 x i32> zeroinitializer)
209  ret <4 x i32> %2
210}
211
212define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_s8(ptr %base) {
213; CHECK-LABEL: test_vldrbq_s8:
214; CHECK:       @ %bb.0: @ %entry
215; CHECK-NEXT:    vldrb.u8 q0, [r0]
216; CHECK-NEXT:    bx lr
217entry:
218  %0 = load <16 x i8>, ptr %base, align 1
219  ret <16 x i8> %0
220}
221
222define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_s16(ptr %base) {
223; CHECK-LABEL: test_vldrbq_s16:
224; CHECK:       @ %bb.0: @ %entry
225; CHECK-NEXT:    vldrb.s16 q0, [r0]
226; CHECK-NEXT:    bx lr
227entry:
228  %0 = load <8 x i8>, ptr %base, align 1
229  %1 = sext <8 x i8> %0 to <8 x i16>
230  ret <8 x i16> %1
231}
232
233define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_s32(ptr %base) {
234; CHECK-LABEL: test_vldrbq_s32:
235; CHECK:       @ %bb.0: @ %entry
236; CHECK-NEXT:    vldrb.s32 q0, [r0]
237; CHECK-NEXT:    bx lr
238entry:
239  %0 = load <4 x i8>, ptr %base, align 1
240  %1 = sext <4 x i8> %0 to <4 x i32>
241  ret <4 x i32> %1
242}
243
244define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_u8(ptr %base) {
245; CHECK-LABEL: test_vldrbq_u8:
246; CHECK:       @ %bb.0: @ %entry
247; CHECK-NEXT:    vldrb.u8 q0, [r0]
248; CHECK-NEXT:    bx lr
249entry:
250  %0 = load <16 x i8>, ptr %base, align 1
251  ret <16 x i8> %0
252}
253
254define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_u16(ptr %base) {
255; CHECK-LABEL: test_vldrbq_u16:
256; CHECK:       @ %bb.0: @ %entry
257; CHECK-NEXT:    vldrb.u16 q0, [r0]
258; CHECK-NEXT:    bx lr
259entry:
260  %0 = load <8 x i8>, ptr %base, align 1
261  %1 = zext <8 x i8> %0 to <8 x i16>
262  ret <8 x i16> %1
263}
264
265define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_u32(ptr %base) {
266; CHECK-LABEL: test_vldrbq_u32:
267; CHECK:       @ %bb.0: @ %entry
268; CHECK-NEXT:    vldrb.u32 q0, [r0]
269; CHECK-NEXT:    bx lr
270entry:
271  %0 = load <4 x i8>, ptr %base, align 1
272  %1 = zext <4 x i8> %0 to <4 x i32>
273  ret <4 x i32> %1
274}
275
276define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_z_s8(ptr %base, i16 zeroext %p) {
277; CHECK-LABEL: test_vldrbq_z_s8:
278; CHECK:       @ %bb.0: @ %entry
279; CHECK-NEXT:    vmsr p0, r1
280; CHECK-NEXT:    vpst
281; CHECK-NEXT:    vldrbt.u8 q0, [r0]
282; CHECK-NEXT:    bx lr
283entry:
284  %0 = zext i16 %p to i32
285  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
286  %2 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %base, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer)
287  ret <16 x i8> %2
288}
289
290define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_z_s16(ptr %base, i16 zeroext %p) {
291; CHECK-LABEL: test_vldrbq_z_s16:
292; CHECK:       @ %bb.0: @ %entry
293; CHECK-NEXT:    vmsr p0, r1
294; CHECK-NEXT:    vpst
295; CHECK-NEXT:    vldrbt.s16 q0, [r0]
296; CHECK-NEXT:    bx lr
297entry:
298  %0 = zext i16 %p to i32
299  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
300  %2 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %base, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer)
301  %3 = sext <8 x i8> %2 to <8 x i16>
302  ret <8 x i16> %3
303}
304
305declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32 immarg, <8 x i1>, <8 x i8>)
306
307define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_z_s32(ptr %base, i16 zeroext %p) {
308; CHECK-LABEL: test_vldrbq_z_s32:
309; CHECK:       @ %bb.0: @ %entry
310; CHECK-NEXT:    vmsr p0, r1
311; CHECK-NEXT:    vpst
312; CHECK-NEXT:    vldrbt.s32 q0, [r0]
313; CHECK-NEXT:    bx lr
314entry:
315  %0 = zext i16 %p to i32
316  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
317  %2 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %base, i32 1, <4 x i1> %1, <4 x i8> zeroinitializer)
318  %3 = sext <4 x i8> %2 to <4 x i32>
319  ret <4 x i32> %3
320}
321
322declare <4 x i8> @llvm.masked.load.v4i8.p0(ptr, i32 immarg, <4 x i1>, <4 x i8>)
323
324define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_z_u8(ptr %base, i16 zeroext %p) {
325; CHECK-LABEL: test_vldrbq_z_u8:
326; CHECK:       @ %bb.0: @ %entry
327; CHECK-NEXT:    vmsr p0, r1
328; CHECK-NEXT:    vpst
329; CHECK-NEXT:    vldrbt.u8 q0, [r0]
330; CHECK-NEXT:    bx lr
331entry:
332  %0 = zext i16 %p to i32
333  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
334  %2 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %base, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer)
335  ret <16 x i8> %2
336}
337
338define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_z_u16(ptr %base, i16 zeroext %p) {
339; CHECK-LABEL: test_vldrbq_z_u16:
340; CHECK:       @ %bb.0: @ %entry
341; CHECK-NEXT:    vmsr p0, r1
342; CHECK-NEXT:    vpst
343; CHECK-NEXT:    vldrbt.u16 q0, [r0]
344; CHECK-NEXT:    bx lr
345entry:
346  %0 = zext i16 %p to i32
347  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
348  %2 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %base, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer)
349  %3 = zext <8 x i8> %2 to <8 x i16>
350  ret <8 x i16> %3
351}
352
353define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_z_u32(ptr %base, i16 zeroext %p) {
354; CHECK-LABEL: test_vldrbq_z_u32:
355; CHECK:       @ %bb.0: @ %entry
356; CHECK-NEXT:    vmsr p0, r1
357; CHECK-NEXT:    vpst
358; CHECK-NEXT:    vldrbt.u32 q0, [r0]
359; CHECK-NEXT:    bx lr
360entry:
361  %0 = zext i16 %p to i32
362  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
363  %2 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %base, i32 1, <4 x i1> %1, <4 x i8> zeroinitializer)
364  %3 = zext <4 x i8> %2 to <4 x i32>
365  ret <4 x i32> %3
366}
367
368define arm_aapcs_vfpcc <8 x half> @test_vldrhq_f16(ptr %base) {
369; CHECK-LABEL: test_vldrhq_f16:
370; CHECK:       @ %bb.0: @ %entry
371; CHECK-NEXT:    vldrh.u16 q0, [r0]
372; CHECK-NEXT:    bx lr
373entry:
374  %0 = load <8 x half>, ptr %base, align 2
375  ret <8 x half> %0
376}
377
378define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_s16(ptr %base) {
379; CHECK-LABEL: test_vldrhq_s16:
380; CHECK:       @ %bb.0: @ %entry
381; CHECK-NEXT:    vldrh.u16 q0, [r0]
382; CHECK-NEXT:    bx lr
383entry:
384  %0 = load <8 x i16>, ptr %base, align 2
385  ret <8 x i16> %0
386}
387
388define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_s32(ptr %base) {
389; CHECK-LABEL: test_vldrhq_s32:
390; CHECK:       @ %bb.0: @ %entry
391; CHECK-NEXT:    vldrh.s32 q0, [r0]
392; CHECK-NEXT:    bx lr
393entry:
394  %0 = load <4 x i16>, ptr %base, align 2
395  %1 = sext <4 x i16> %0 to <4 x i32>
396  ret <4 x i32> %1
397}
398
399define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_u16(ptr %base) {
400; CHECK-LABEL: test_vldrhq_u16:
401; CHECK:       @ %bb.0: @ %entry
402; CHECK-NEXT:    vldrh.u16 q0, [r0]
403; CHECK-NEXT:    bx lr
404entry:
405  %0 = load <8 x i16>, ptr %base, align 2
406  ret <8 x i16> %0
407}
408
409define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_u32(ptr %base) {
410; CHECK-LABEL: test_vldrhq_u32:
411; CHECK:       @ %bb.0: @ %entry
412; CHECK-NEXT:    vldrh.u32 q0, [r0]
413; CHECK-NEXT:    bx lr
414entry:
415  %0 = load <4 x i16>, ptr %base, align 2
416  %1 = zext <4 x i16> %0 to <4 x i32>
417  ret <4 x i32> %1
418}
419
420define arm_aapcs_vfpcc <8 x half> @test_vldrhq_z_f16(ptr %base, i16 zeroext %p) {
421; CHECK-LABEL: test_vldrhq_z_f16:
422; CHECK:       @ %bb.0: @ %entry
423; CHECK-NEXT:    vmsr p0, r1
424; CHECK-NEXT:    vpst
425; CHECK-NEXT:    vldrht.u16 q0, [r0]
426; CHECK-NEXT:    bx lr
427entry:
428  %0 = zext i16 %p to i32
429  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
430  %2 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr %base, i32 2, <8 x i1> %1, <8 x half> zeroinitializer)
431  ret <8 x half> %2
432}
433
434define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_z_s16(ptr %base, i16 zeroext %p) {
435; CHECK-LABEL: test_vldrhq_z_s16:
436; CHECK:       @ %bb.0: @ %entry
437; CHECK-NEXT:    vmsr p0, r1
438; CHECK-NEXT:    vpst
439; CHECK-NEXT:    vldrht.u16 q0, [r0]
440; CHECK-NEXT:    bx lr
441entry:
442  %0 = zext i16 %p to i32
443  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
444  %2 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %base, i32 2, <8 x i1> %1, <8 x i16> zeroinitializer)
445  ret <8 x i16> %2
446}
447
448define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_z_s32(ptr %base, i16 zeroext %p) {
449; CHECK-LABEL: test_vldrhq_z_s32:
450; CHECK:       @ %bb.0: @ %entry
451; CHECK-NEXT:    vmsr p0, r1
452; CHECK-NEXT:    vpst
453; CHECK-NEXT:    vldrht.s32 q0, [r0]
454; CHECK-NEXT:    bx lr
455entry:
456  %0 = zext i16 %p to i32
457  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
458  %2 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %base, i32 2, <4 x i1> %1, <4 x i16> zeroinitializer)
459  %3 = sext <4 x i16> %2 to <4 x i32>
460  ret <4 x i32> %3
461}
462
463declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>)
464
465define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_z_u16(ptr %base, i16 zeroext %p) {
466; CHECK-LABEL: test_vldrhq_z_u16:
467; CHECK:       @ %bb.0: @ %entry
468; CHECK-NEXT:    vmsr p0, r1
469; CHECK-NEXT:    vpst
470; CHECK-NEXT:    vldrht.u16 q0, [r0]
471; CHECK-NEXT:    bx lr
472entry:
473  %0 = zext i16 %p to i32
474  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
475  %2 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %base, i32 2, <8 x i1> %1, <8 x i16> zeroinitializer)
476  ret <8 x i16> %2
477}
478
479define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_z_u32(ptr %base, i16 zeroext %p) {
480; CHECK-LABEL: test_vldrhq_z_u32:
481; CHECK:       @ %bb.0: @ %entry
482; CHECK-NEXT:    vmsr p0, r1
483; CHECK-NEXT:    vpst
484; CHECK-NEXT:    vldrht.u32 q0, [r0]
485; CHECK-NEXT:    bx lr
486entry:
487  %0 = zext i16 %p to i32
488  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
489  %2 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %base, i32 2, <4 x i1> %1, <4 x i16> zeroinitializer)
490  %3 = zext <4 x i16> %2 to <4 x i32>
491  ret <4 x i32> %3
492}
493
494define arm_aapcs_vfpcc <4 x float> @test_vldrwq_f32(ptr %base) {
495; CHECK-LABEL: test_vldrwq_f32:
496; CHECK:       @ %bb.0: @ %entry
497; CHECK-NEXT:    vldrw.u32 q0, [r0]
498; CHECK-NEXT:    bx lr
499entry:
500  %0 = load <4 x float>, ptr %base, align 4
501  ret <4 x float> %0
502}
503
504define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_s32(ptr %base) {
505; CHECK-LABEL: test_vldrwq_s32:
506; CHECK:       @ %bb.0: @ %entry
507; CHECK-NEXT:    vldrw.u32 q0, [r0]
508; CHECK-NEXT:    bx lr
509entry:
510  %0 = load <4 x i32>, ptr %base, align 4
511  ret <4 x i32> %0
512}
513
514define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_u32(ptr %base) {
515; CHECK-LABEL: test_vldrwq_u32:
516; CHECK:       @ %bb.0: @ %entry
517; CHECK-NEXT:    vldrw.u32 q0, [r0]
518; CHECK-NEXT:    bx lr
519entry:
520  %0 = load <4 x i32>, ptr %base, align 4
521  ret <4 x i32> %0
522}
523
524define arm_aapcs_vfpcc <4 x float> @test_vldrwq_z_f32(ptr %base, i16 zeroext %p) {
525; CHECK-LABEL: test_vldrwq_z_f32:
526; CHECK:       @ %bb.0: @ %entry
527; CHECK-NEXT:    vmsr p0, r1
528; CHECK-NEXT:    vpst
529; CHECK-NEXT:    vldrwt.u32 q0, [r0]
530; CHECK-NEXT:    bx lr
531entry:
532  %0 = zext i16 %p to i32
533  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
534  %2 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %base, i32 4, <4 x i1> %1, <4 x float> zeroinitializer)
535  ret <4 x float> %2
536}
537
538define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_z_s32(ptr %base, i16 zeroext %p) {
539; CHECK-LABEL: test_vldrwq_z_s32:
540; CHECK:       @ %bb.0: @ %entry
541; CHECK-NEXT:    vmsr p0, r1
542; CHECK-NEXT:    vpst
543; CHECK-NEXT:    vldrwt.u32 q0, [r0]
544; CHECK-NEXT:    bx lr
545entry:
546  %0 = zext i16 %p to i32
547  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
548  %2 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %base, i32 4, <4 x i1> %1, <4 x i32> zeroinitializer)
549  ret <4 x i32> %2
550}
551
552define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_z_u32(ptr %base, i16 zeroext %p) {
553; CHECK-LABEL: test_vldrwq_z_u32:
554; CHECK:       @ %bb.0: @ %entry
555; CHECK-NEXT:    vmsr p0, r1
556; CHECK-NEXT:    vpst
557; CHECK-NEXT:    vldrwt.u32 q0, [r0]
558; CHECK-NEXT:    bx lr
559entry:
560  %0 = zext i16 %p to i32
561  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
562  %2 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %base, i32 4, <4 x i1> %1, <4 x i32> zeroinitializer)
563  ret <4 x i32> %2
564}
565
566define arm_aapcs_vfpcc void @test_vst1q_f16(ptr %base, <8 x half> %value) {
567; CHECK-LABEL: test_vst1q_f16:
568; CHECK:       @ %bb.0: @ %entry
569; CHECK-NEXT:    vstrh.16 q0, [r0]
570; CHECK-NEXT:    bx lr
571entry:
572  store <8 x half> %value, ptr %base, align 2
573  ret void
574}
575
576define arm_aapcs_vfpcc void @test_vst1q_f32(ptr %base, <4 x float> %value) {
577; CHECK-LABEL: test_vst1q_f32:
578; CHECK:       @ %bb.0: @ %entry
579; CHECK-NEXT:    vstrw.32 q0, [r0]
580; CHECK-NEXT:    bx lr
581entry:
582  store <4 x float> %value, ptr %base, align 4
583  ret void
584}
585
586define arm_aapcs_vfpcc void @test_vst1q_s8(ptr %base, <16 x i8> %value) {
587; CHECK-LABEL: test_vst1q_s8:
588; CHECK:       @ %bb.0: @ %entry
589; CHECK-NEXT:    vstrb.8 q0, [r0]
590; CHECK-NEXT:    bx lr
591entry:
592  store <16 x i8> %value, ptr %base, align 1
593  ret void
594}
595
596define arm_aapcs_vfpcc void @test_vst1q_s16(ptr %base, <8 x i16> %value) {
597; CHECK-LABEL: test_vst1q_s16:
598; CHECK:       @ %bb.0: @ %entry
599; CHECK-NEXT:    vstrh.16 q0, [r0]
600; CHECK-NEXT:    bx lr
601entry:
602  store <8 x i16> %value, ptr %base, align 2
603  ret void
604}
605
606define arm_aapcs_vfpcc void @test_vst1q_s32(ptr %base, <4 x i32> %value) {
607; CHECK-LABEL: test_vst1q_s32:
608; CHECK:       @ %bb.0: @ %entry
609; CHECK-NEXT:    vstrw.32 q0, [r0]
610; CHECK-NEXT:    bx lr
611entry:
612  store <4 x i32> %value, ptr %base, align 4
613  ret void
614}
615
616define arm_aapcs_vfpcc void @test_vst1q_u8(ptr %base, <16 x i8> %value) {
617; CHECK-LABEL: test_vst1q_u8:
618; CHECK:       @ %bb.0: @ %entry
619; CHECK-NEXT:    vstrb.8 q0, [r0]
620; CHECK-NEXT:    bx lr
621entry:
622  store <16 x i8> %value, ptr %base, align 1
623  ret void
624}
625
626define arm_aapcs_vfpcc void @test_vst1q_u16(ptr %base, <8 x i16> %value) {
627; CHECK-LABEL: test_vst1q_u16:
628; CHECK:       @ %bb.0: @ %entry
629; CHECK-NEXT:    vstrh.16 q0, [r0]
630; CHECK-NEXT:    bx lr
631entry:
632  store <8 x i16> %value, ptr %base, align 2
633  ret void
634}
635
636define arm_aapcs_vfpcc void @test_vst1q_u32(ptr %base, <4 x i32> %value) {
637; CHECK-LABEL: test_vst1q_u32:
638; CHECK:       @ %bb.0: @ %entry
639; CHECK-NEXT:    vstrw.32 q0, [r0]
640; CHECK-NEXT:    bx lr
641entry:
642  store <4 x i32> %value, ptr %base, align 4
643  ret void
644}
645
646define arm_aapcs_vfpcc void @test_vst1q_p_f16(ptr %base, <8 x half> %value, i16 zeroext %p) {
647; CHECK-LABEL: test_vst1q_p_f16:
648; CHECK:       @ %bb.0: @ %entry
649; CHECK-NEXT:    vmsr p0, r1
650; CHECK-NEXT:    vpst
651; CHECK-NEXT:    vstrht.16 q0, [r0]
652; CHECK-NEXT:    bx lr
653entry:
654  %0 = zext i16 %p to i32
655  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
656  call void @llvm.masked.store.v8f16.p0(<8 x half> %value, ptr %base, i32 2, <8 x i1> %1)
657  ret void
658}
659
660declare void @llvm.masked.store.v8f16.p0(<8 x half>, ptr, i32 immarg, <8 x i1>)
661
662define arm_aapcs_vfpcc void @test_vst1q_p_f32(ptr %base, <4 x float> %value, i16 zeroext %p) {
663; CHECK-LABEL: test_vst1q_p_f32:
664; CHECK:       @ %bb.0: @ %entry
665; CHECK-NEXT:    vmsr p0, r1
666; CHECK-NEXT:    vpst
667; CHECK-NEXT:    vstrwt.32 q0, [r0]
668; CHECK-NEXT:    bx lr
669entry:
670  %0 = zext i16 %p to i32
671  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
672  call void @llvm.masked.store.v4f32.p0(<4 x float> %value, ptr %base, i32 4, <4 x i1> %1)
673  ret void
674}
675
676declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32 immarg, <4 x i1>)
677
678define arm_aapcs_vfpcc void @test_vst1q_p_s8(ptr %base, <16 x i8> %value, i16 zeroext %p) {
679; CHECK-LABEL: test_vst1q_p_s8:
680; CHECK:       @ %bb.0: @ %entry
681; CHECK-NEXT:    vmsr p0, r1
682; CHECK-NEXT:    vpst
683; CHECK-NEXT:    vstrbt.8 q0, [r0]
684; CHECK-NEXT:    bx lr
685entry:
686  %0 = zext i16 %p to i32
687  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
688  call void @llvm.masked.store.v16i8.p0(<16 x i8> %value, ptr %base, i32 1, <16 x i1> %1)
689  ret void
690}
691
692declare void @llvm.masked.store.v16i8.p0(<16 x i8>, ptr, i32 immarg, <16 x i1>)
693
694define arm_aapcs_vfpcc void @test_vst1q_p_s16(ptr %base, <8 x i16> %value, i16 zeroext %p) {
695; CHECK-LABEL: test_vst1q_p_s16:
696; CHECK:       @ %bb.0: @ %entry
697; CHECK-NEXT:    vmsr p0, r1
698; CHECK-NEXT:    vpst
699; CHECK-NEXT:    vstrht.16 q0, [r0]
700; CHECK-NEXT:    bx lr
701entry:
702  %0 = zext i16 %p to i32
703  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
704  call void @llvm.masked.store.v8i16.p0(<8 x i16> %value, ptr %base, i32 2, <8 x i1> %1)
705  ret void
706}
707
708declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32 immarg, <8 x i1>)
709
710define arm_aapcs_vfpcc void @test_vst1q_p_s32(ptr %base, <4 x i32> %value, i16 zeroext %p) {
711; CHECK-LABEL: test_vst1q_p_s32:
712; CHECK:       @ %bb.0: @ %entry
713; CHECK-NEXT:    vmsr p0, r1
714; CHECK-NEXT:    vpst
715; CHECK-NEXT:    vstrwt.32 q0, [r0]
716; CHECK-NEXT:    bx lr
717entry:
718  %0 = zext i16 %p to i32
719  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
720  call void @llvm.masked.store.v4i32.p0(<4 x i32> %value, ptr %base, i32 4, <4 x i1> %1)
721  ret void
722}
723
724declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>)
725
726define arm_aapcs_vfpcc void @test_vst1q_p_u8(ptr %base, <16 x i8> %value, i16 zeroext %p) {
727; CHECK-LABEL: test_vst1q_p_u8:
728; CHECK:       @ %bb.0: @ %entry
729; CHECK-NEXT:    vmsr p0, r1
730; CHECK-NEXT:    vpst
731; CHECK-NEXT:    vstrbt.8 q0, [r0]
732; CHECK-NEXT:    bx lr
733entry:
734  %0 = zext i16 %p to i32
735  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
736  call void @llvm.masked.store.v16i8.p0(<16 x i8> %value, ptr %base, i32 1, <16 x i1> %1)
737  ret void
738}
739
740define arm_aapcs_vfpcc void @test_vst1q_p_u16(ptr %base, <8 x i16> %value, i16 zeroext %p) {
741; CHECK-LABEL: test_vst1q_p_u16:
742; CHECK:       @ %bb.0: @ %entry
743; CHECK-NEXT:    vmsr p0, r1
744; CHECK-NEXT:    vpst
745; CHECK-NEXT:    vstrht.16 q0, [r0]
746; CHECK-NEXT:    bx lr
747entry:
748  %0 = zext i16 %p to i32
749  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
750  call void @llvm.masked.store.v8i16.p0(<8 x i16> %value, ptr %base, i32 2, <8 x i1> %1)
751  ret void
752}
753
754define arm_aapcs_vfpcc void @test_vst1q_p_u32(ptr %base, <4 x i32> %value, i16 zeroext %p) {
755; CHECK-LABEL: test_vst1q_p_u32:
756; CHECK:       @ %bb.0: @ %entry
757; CHECK-NEXT:    vmsr p0, r1
758; CHECK-NEXT:    vpst
759; CHECK-NEXT:    vstrwt.32 q0, [r0]
760; CHECK-NEXT:    bx lr
761entry:
762  %0 = zext i16 %p to i32
763  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
764  call void @llvm.masked.store.v4i32.p0(<4 x i32> %value, ptr %base, i32 4, <4 x i1> %1)
765  ret void
766}
767
768define arm_aapcs_vfpcc void @test_vstrbq_s8(ptr %base, <16 x i8> %value) {
769; CHECK-LABEL: test_vstrbq_s8:
770; CHECK:       @ %bb.0: @ %entry
771; CHECK-NEXT:    vstrb.8 q0, [r0]
772; CHECK-NEXT:    bx lr
773entry:
774  store <16 x i8> %value, ptr %base, align 1
775  ret void
776}
777
778define arm_aapcs_vfpcc void @test_vstrbq_s16(ptr %base, <8 x i16> %value) {
779; CHECK-LABEL: test_vstrbq_s16:
780; CHECK:       @ %bb.0: @ %entry
781; CHECK-NEXT:    vstrb.16 q0, [r0]
782; CHECK-NEXT:    bx lr
783entry:
784  %0 = trunc <8 x i16> %value to <8 x i8>
785  store <8 x i8> %0, ptr %base, align 1
786  ret void
787}
788
789define arm_aapcs_vfpcc void @test_vstrbq_s32(ptr %base, <4 x i32> %value) {
790; CHECK-LABEL: test_vstrbq_s32:
791; CHECK:       @ %bb.0: @ %entry
792; CHECK-NEXT:    vstrb.32 q0, [r0]
793; CHECK-NEXT:    bx lr
794entry:
795  %0 = trunc <4 x i32> %value to <4 x i8>
796  store <4 x i8> %0, ptr %base, align 1
797  ret void
798}
799
800define arm_aapcs_vfpcc void @test_vstrbq_u8(ptr %base, <16 x i8> %value) {
801; CHECK-LABEL: test_vstrbq_u8:
802; CHECK:       @ %bb.0: @ %entry
803; CHECK-NEXT:    vstrb.8 q0, [r0]
804; CHECK-NEXT:    bx lr
805entry:
806  store <16 x i8> %value, ptr %base, align 1
807  ret void
808}
809
810define arm_aapcs_vfpcc void @test_vstrbq_u16(ptr %base, <8 x i16> %value) {
811; CHECK-LABEL: test_vstrbq_u16:
812; CHECK:       @ %bb.0: @ %entry
813; CHECK-NEXT:    vstrb.16 q0, [r0]
814; CHECK-NEXT:    bx lr
815entry:
816  %0 = trunc <8 x i16> %value to <8 x i8>
817  store <8 x i8> %0, ptr %base, align 1
818  ret void
819}
820
821define arm_aapcs_vfpcc void @test_vstrbq_u32(ptr %base, <4 x i32> %value) {
822; CHECK-LABEL: test_vstrbq_u32:
823; CHECK:       @ %bb.0: @ %entry
824; CHECK-NEXT:    vstrb.32 q0, [r0]
825; CHECK-NEXT:    bx lr
826entry:
827  %0 = trunc <4 x i32> %value to <4 x i8>
828  store <4 x i8> %0, ptr %base, align 1
829  ret void
830}
831
832define arm_aapcs_vfpcc void @test_vstrbq_p_s8(ptr %base, <16 x i8> %value, i16 zeroext %p) {
833; CHECK-LABEL: test_vstrbq_p_s8:
834; CHECK:       @ %bb.0: @ %entry
835; CHECK-NEXT:    vmsr p0, r1
836; CHECK-NEXT:    vpst
837; CHECK-NEXT:    vstrbt.8 q0, [r0]
838; CHECK-NEXT:    bx lr
839entry:
840  %0 = zext i16 %p to i32
841  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
842  call void @llvm.masked.store.v16i8.p0(<16 x i8> %value, ptr %base, i32 1, <16 x i1> %1)
843  ret void
844}
845
846define arm_aapcs_vfpcc void @test_vstrbq_p_s16(ptr %base, <8 x i16> %value, i16 zeroext %p) {
847; CHECK-LABEL: test_vstrbq_p_s16:
848; CHECK:       @ %bb.0: @ %entry
849; CHECK-NEXT:    vmsr p0, r1
850; CHECK-NEXT:    vpst
851; CHECK-NEXT:    vstrbt.16 q0, [r0]
852; CHECK-NEXT:    bx lr
853entry:
854  %0 = trunc <8 x i16> %value to <8 x i8>
855  %1 = zext i16 %p to i32
856  %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
857  call void @llvm.masked.store.v8i8.p0(<8 x i8> %0, ptr %base, i32 1, <8 x i1> %2)
858  ret void
859}
860
861declare void @llvm.masked.store.v8i8.p0(<8 x i8>, ptr, i32 immarg, <8 x i1>)
862
863define arm_aapcs_vfpcc void @test_vstrbq_p_s32(ptr %base, <4 x i32> %value, i16 zeroext %p) {
864; CHECK-LABEL: test_vstrbq_p_s32:
865; CHECK:       @ %bb.0: @ %entry
866; CHECK-NEXT:    vmsr p0, r1
867; CHECK-NEXT:    vpst
868; CHECK-NEXT:    vstrbt.32 q0, [r0]
869; CHECK-NEXT:    bx lr
870entry:
871  %0 = trunc <4 x i32> %value to <4 x i8>
872  %1 = zext i16 %p to i32
873  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
874  call void @llvm.masked.store.v4i8.p0(<4 x i8> %0, ptr %base, i32 1, <4 x i1> %2)
875  ret void
876}
877
878declare void @llvm.masked.store.v4i8.p0(<4 x i8>, ptr, i32 immarg, <4 x i1>)
879
880define arm_aapcs_vfpcc void @test_vstrbq_p_u8(ptr %base, <16 x i8> %value, i16 zeroext %p) {
881; CHECK-LABEL: test_vstrbq_p_u8:
882; CHECK:       @ %bb.0: @ %entry
883; CHECK-NEXT:    vmsr p0, r1
884; CHECK-NEXT:    vpst
885; CHECK-NEXT:    vstrbt.8 q0, [r0]
886; CHECK-NEXT:    bx lr
887entry:
888  %0 = zext i16 %p to i32
889  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
890  call void @llvm.masked.store.v16i8.p0(<16 x i8> %value, ptr %base, i32 1, <16 x i1> %1)
891  ret void
892}
893
894define arm_aapcs_vfpcc void @test_vstrbq_p_u16(ptr %base, <8 x i16> %value, i16 zeroext %p) {
895; CHECK-LABEL: test_vstrbq_p_u16:
896; CHECK:       @ %bb.0: @ %entry
897; CHECK-NEXT:    vmsr p0, r1
898; CHECK-NEXT:    vpst
899; CHECK-NEXT:    vstrbt.16 q0, [r0]
900; CHECK-NEXT:    bx lr
901entry:
902  %0 = trunc <8 x i16> %value to <8 x i8>
903  %1 = zext i16 %p to i32
904  %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
905  call void @llvm.masked.store.v8i8.p0(<8 x i8> %0, ptr %base, i32 1, <8 x i1> %2)
906  ret void
907}
908
909define arm_aapcs_vfpcc void @test_vstrbq_p_u32(ptr %base, <4 x i32> %value, i16 zeroext %p) {
910; CHECK-LABEL: test_vstrbq_p_u32:
911; CHECK:       @ %bb.0: @ %entry
912; CHECK-NEXT:    vmsr p0, r1
913; CHECK-NEXT:    vpst
914; CHECK-NEXT:    vstrbt.32 q0, [r0]
915; CHECK-NEXT:    bx lr
916entry:
917  %0 = trunc <4 x i32> %value to <4 x i8>
918  %1 = zext i16 %p to i32
919  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
920  call void @llvm.masked.store.v4i8.p0(<4 x i8> %0, ptr %base, i32 1, <4 x i1> %2)
921  ret void
922}
923
924define arm_aapcs_vfpcc void @test_vstrhq_f16(ptr %base, <8 x half> %value) {
925; CHECK-LABEL: test_vstrhq_f16:
926; CHECK:       @ %bb.0: @ %entry
927; CHECK-NEXT:    vstrh.16 q0, [r0]
928; CHECK-NEXT:    bx lr
929entry:
930  store <8 x half> %value, ptr %base, align 2
931  ret void
932}
933
934define arm_aapcs_vfpcc void @test_vstrhq_s16(ptr %base, <8 x i16> %value) {
935; CHECK-LABEL: test_vstrhq_s16:
936; CHECK:       @ %bb.0: @ %entry
937; CHECK-NEXT:    vstrh.16 q0, [r0]
938; CHECK-NEXT:    bx lr
939entry:
940  store <8 x i16> %value, ptr %base, align 2
941  ret void
942}
943
944define arm_aapcs_vfpcc void @test_vstrhq_s32(ptr %base, <4 x i32> %value) {
945; CHECK-LABEL: test_vstrhq_s32:
946; CHECK:       @ %bb.0: @ %entry
947; CHECK-NEXT:    vstrh.32 q0, [r0]
948; CHECK-NEXT:    bx lr
949entry:
950  %0 = trunc <4 x i32> %value to <4 x i16>
951  store <4 x i16> %0, ptr %base, align 2
952  ret void
953}
954
955define arm_aapcs_vfpcc void @test_vstrhq_u16(ptr %base, <8 x i16> %value) {
956; CHECK-LABEL: test_vstrhq_u16:
957; CHECK:       @ %bb.0: @ %entry
958; CHECK-NEXT:    vstrh.16 q0, [r0]
959; CHECK-NEXT:    bx lr
960entry:
961  store <8 x i16> %value, ptr %base, align 2
962  ret void
963}
964
965define arm_aapcs_vfpcc void @test_vstrhq_u32(ptr %base, <4 x i32> %value) {
966; CHECK-LABEL: test_vstrhq_u32:
967; CHECK:       @ %bb.0: @ %entry
968; CHECK-NEXT:    vstrh.32 q0, [r0]
969; CHECK-NEXT:    bx lr
970entry:
971  %0 = trunc <4 x i32> %value to <4 x i16>
972  store <4 x i16> %0, ptr %base, align 2
973  ret void
974}
975
976define arm_aapcs_vfpcc void @test_vstrhq_p_f16(ptr %base, <8 x half> %value, i16 zeroext %p) {
977; CHECK-LABEL: test_vstrhq_p_f16:
978; CHECK:       @ %bb.0: @ %entry
979; CHECK-NEXT:    vmsr p0, r1
980; CHECK-NEXT:    vpst
981; CHECK-NEXT:    vstrht.16 q0, [r0]
982; CHECK-NEXT:    bx lr
983entry:
984  %0 = zext i16 %p to i32
985  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
986  call void @llvm.masked.store.v8f16.p0(<8 x half> %value, ptr %base, i32 2, <8 x i1> %1)
987  ret void
988}
989
990define arm_aapcs_vfpcc void @test_vstrhq_p_s16(ptr %base, <8 x i16> %value, i16 zeroext %p) {
991; CHECK-LABEL: test_vstrhq_p_s16:
992; CHECK:       @ %bb.0: @ %entry
993; CHECK-NEXT:    vmsr p0, r1
994; CHECK-NEXT:    vpst
995; CHECK-NEXT:    vstrht.16 q0, [r0]
996; CHECK-NEXT:    bx lr
997entry:
998  %0 = zext i16 %p to i32
999  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1000  call void @llvm.masked.store.v8i16.p0(<8 x i16> %value, ptr %base, i32 2, <8 x i1> %1)
1001  ret void
1002}
1003
1004define arm_aapcs_vfpcc void @test_vstrhq_p_s32(ptr %base, <4 x i32> %value, i16 zeroext %p) {
1005; CHECK-LABEL: test_vstrhq_p_s32:
1006; CHECK:       @ %bb.0: @ %entry
1007; CHECK-NEXT:    vmsr p0, r1
1008; CHECK-NEXT:    vpst
1009; CHECK-NEXT:    vstrht.32 q0, [r0]
1010; CHECK-NEXT:    bx lr
1011entry:
1012  %0 = trunc <4 x i32> %value to <4 x i16>
1013  %1 = zext i16 %p to i32
1014  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
1015  call void @llvm.masked.store.v4i16.p0(<4 x i16> %0, ptr %base, i32 2, <4 x i1> %2)
1016  ret void
1017}
1018
1019declare void @llvm.masked.store.v4i16.p0(<4 x i16>, ptr, i32 immarg, <4 x i1>)
1020
1021define arm_aapcs_vfpcc void @test_vstrhq_p_u16(ptr %base, <8 x i16> %value, i16 zeroext %p) {
1022; CHECK-LABEL: test_vstrhq_p_u16:
1023; CHECK:       @ %bb.0: @ %entry
1024; CHECK-NEXT:    vmsr p0, r1
1025; CHECK-NEXT:    vpst
1026; CHECK-NEXT:    vstrht.16 q0, [r0]
1027; CHECK-NEXT:    bx lr
1028entry:
1029  %0 = zext i16 %p to i32
1030  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1031  call void @llvm.masked.store.v8i16.p0(<8 x i16> %value, ptr %base, i32 2, <8 x i1> %1)
1032  ret void
1033}
1034
1035define arm_aapcs_vfpcc void @test_vstrhq_p_u32(ptr %base, <4 x i32> %value, i16 zeroext %p) {
1036; CHECK-LABEL: test_vstrhq_p_u32:
1037; CHECK:       @ %bb.0: @ %entry
1038; CHECK-NEXT:    vmsr p0, r1
1039; CHECK-NEXT:    vpst
1040; CHECK-NEXT:    vstrht.32 q0, [r0]
1041; CHECK-NEXT:    bx lr
1042entry:
1043  %0 = trunc <4 x i32> %value to <4 x i16>
1044  %1 = zext i16 %p to i32
1045  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
1046  call void @llvm.masked.store.v4i16.p0(<4 x i16> %0, ptr %base, i32 2, <4 x i1> %2)
1047  ret void
1048}
1049
1050define arm_aapcs_vfpcc void @test_vstrwq_f32(ptr %base, <4 x float> %value) {
1051; CHECK-LABEL: test_vstrwq_f32:
1052; CHECK:       @ %bb.0: @ %entry
1053; CHECK-NEXT:    vstrw.32 q0, [r0]
1054; CHECK-NEXT:    bx lr
1055entry:
1056  store <4 x float> %value, ptr %base, align 4
1057  ret void
1058}
1059
1060define arm_aapcs_vfpcc void @test_vstrwq_s32(ptr %base, <4 x i32> %value) {
1061; CHECK-LABEL: test_vstrwq_s32:
1062; CHECK:       @ %bb.0: @ %entry
1063; CHECK-NEXT:    vstrw.32 q0, [r0]
1064; CHECK-NEXT:    bx lr
1065entry:
1066  store <4 x i32> %value, ptr %base, align 4
1067  ret void
1068}
1069
1070define arm_aapcs_vfpcc void @test_vstrwq_u32(ptr %base, <4 x i32> %value) {
1071; CHECK-LABEL: test_vstrwq_u32:
1072; CHECK:       @ %bb.0: @ %entry
1073; CHECK-NEXT:    vstrw.32 q0, [r0]
1074; CHECK-NEXT:    bx lr
1075entry:
1076  store <4 x i32> %value, ptr %base, align 4
1077  ret void
1078}
1079
1080define arm_aapcs_vfpcc void @test_vstrwq_p_f32(ptr %base, <4 x float> %value, i16 zeroext %p) {
1081; CHECK-LABEL: test_vstrwq_p_f32:
1082; CHECK:       @ %bb.0: @ %entry
1083; CHECK-NEXT:    vmsr p0, r1
1084; CHECK-NEXT:    vpst
1085; CHECK-NEXT:    vstrwt.32 q0, [r0]
1086; CHECK-NEXT:    bx lr
1087entry:
1088  %0 = zext i16 %p to i32
1089  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1090  call void @llvm.masked.store.v4f32.p0(<4 x float> %value, ptr %base, i32 4, <4 x i1> %1)
1091  ret void
1092}
1093
1094define arm_aapcs_vfpcc void @test_vstrwq_p_s32(ptr %base, <4 x i32> %value, i16 zeroext %p) {
1095; CHECK-LABEL: test_vstrwq_p_s32:
1096; CHECK:       @ %bb.0: @ %entry
1097; CHECK-NEXT:    vmsr p0, r1
1098; CHECK-NEXT:    vpst
1099; CHECK-NEXT:    vstrwt.32 q0, [r0]
1100; CHECK-NEXT:    bx lr
1101entry:
1102  %0 = zext i16 %p to i32
1103  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1104  call void @llvm.masked.store.v4i32.p0(<4 x i32> %value, ptr %base, i32 4, <4 x i1> %1)
1105  ret void
1106}
1107
1108define arm_aapcs_vfpcc void @test_vstrwq_p_u32(ptr %base, <4 x i32> %value, i16 zeroext %p) {
1109; CHECK-LABEL: test_vstrwq_p_u32:
1110; CHECK:       @ %bb.0: @ %entry
1111; CHECK-NEXT:    vmsr p0, r1
1112; CHECK-NEXT:    vpst
1113; CHECK-NEXT:    vstrwt.32 q0, [r0]
1114; CHECK-NEXT:    bx lr
1115entry:
1116  %0 = zext i16 %p to i32
1117  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1118  call void @llvm.masked.store.v4i32.p0(<4 x i32> %value, ptr %base, i32 4, <4 x i1> %1)
1119  ret void
1120}
1121