xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-intrinsics/scatter-gather.ll (revision b5b663aac17415625340eb29c8010832bfc4c21c)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
3
4define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_s16(ptr %base, <8 x i16> %offset) {
5; CHECK-LABEL: test_vldrbq_gather_offset_s16:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vldrb.s16 q1, [r0, q0]
8; CHECK-NEXT:    vmov q0, q1
9; CHECK-NEXT:    bx lr
10entry:
11  %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0.v8i16(ptr %base, <8 x i16> %offset, i32 8, i32 0, i32 0)
12  ret <8 x i16> %0
13}
14
15declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0.v8i16(ptr, <8 x i16>, i32, i32, i32)
16
17define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_s32(ptr %base, <4 x i32> %offset) {
18; CHECK-LABEL: test_vldrbq_gather_offset_s32:
19; CHECK:       @ %bb.0: @ %entry
20; CHECK-NEXT:    vldrb.s32 q1, [r0, q0]
21; CHECK-NEXT:    vmov q0, q1
22; CHECK-NEXT:    bx lr
23entry:
24  %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 8, i32 0, i32 0)
25  ret <4 x i32> %0
26}
27
28declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr, <4 x i32>, i32, i32, i32)
29
30define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_s8(ptr %base, <16 x i8> %offset) {
31; CHECK-LABEL: test_vldrbq_gather_offset_s8:
32; CHECK:       @ %bb.0: @ %entry
33; CHECK-NEXT:    vldrb.u8 q1, [r0, q0]
34; CHECK-NEXT:    vmov q0, q1
35; CHECK-NEXT:    bx lr
36entry:
37  %0 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0.v16i8(ptr %base, <16 x i8> %offset, i32 8, i32 0, i32 0)
38  ret <16 x i8> %0
39}
40
41declare <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0.v16i8(ptr, <16 x i8>, i32, i32, i32)
42
43define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_u16(ptr %base, <8 x i16> %offset) {
44; CHECK-LABEL: test_vldrbq_gather_offset_u16:
45; CHECK:       @ %bb.0: @ %entry
46; CHECK-NEXT:    vldrb.u16 q1, [r0, q0]
47; CHECK-NEXT:    vmov q0, q1
48; CHECK-NEXT:    bx lr
49entry:
50  %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0.v8i16(ptr %base, <8 x i16> %offset, i32 8, i32 0, i32 1)
51  ret <8 x i16> %0
52}
53
54define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_u32(ptr %base, <4 x i32> %offset) {
55; CHECK-LABEL: test_vldrbq_gather_offset_u32:
56; CHECK:       @ %bb.0: @ %entry
57; CHECK-NEXT:    vldrb.u32 q1, [r0, q0]
58; CHECK-NEXT:    vmov q0, q1
59; CHECK-NEXT:    bx lr
60entry:
61  %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 8, i32 0, i32 1)
62  ret <4 x i32> %0
63}
64
65define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_u8(ptr %base, <16 x i8> %offset) {
66; CHECK-LABEL: test_vldrbq_gather_offset_u8:
67; CHECK:       @ %bb.0: @ %entry
68; CHECK-NEXT:    vldrb.u8 q1, [r0, q0]
69; CHECK-NEXT:    vmov q0, q1
70; CHECK-NEXT:    bx lr
71entry:
72  %0 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0.v16i8(ptr %base, <16 x i8> %offset, i32 8, i32 0, i32 1)
73  ret <16 x i8> %0
74}
75
76define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_z_s16(ptr %base, <8 x i16> %offset, i16 zeroext %p) {
77; CHECK-LABEL: test_vldrbq_gather_offset_z_s16:
78; CHECK:       @ %bb.0: @ %entry
79; CHECK-NEXT:    vmsr p0, r1
80; CHECK-NEXT:    vpst
81; CHECK-NEXT:    vldrbt.s16 q1, [r0, q0]
82; CHECK-NEXT:    vmov q0, q1
83; CHECK-NEXT:    bx lr
84entry:
85  %0 = zext i16 %p to i32
86  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
87  %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0.v8i16.v8i1(ptr %base, <8 x i16> %offset, i32 8, i32 0, i32 0, <8 x i1> %1)
88  ret <8 x i16> %2
89}
90
91declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
92
93declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0.v8i16.v8i1(ptr, <8 x i16>, i32, i32, i32, <8 x i1>)
94
95define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_z_s32(ptr %base, <4 x i32> %offset, i16 zeroext %p) {
96; CHECK-LABEL: test_vldrbq_gather_offset_z_s32:
97; CHECK:       @ %bb.0: @ %entry
98; CHECK-NEXT:    vmsr p0, r1
99; CHECK-NEXT:    vpst
100; CHECK-NEXT:    vldrbt.s32 q1, [r0, q0]
101; CHECK-NEXT:    vmov q0, q1
102; CHECK-NEXT:    bx lr
103entry:
104  %0 = zext i16 %p to i32
105  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
106  %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 8, i32 0, i32 0, <4 x i1> %1)
107  ret <4 x i32> %2
108}
109
110declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
111
112declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr, <4 x i32>, i32, i32, i32, <4 x i1>)
113
114define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_z_s8(ptr %base, <16 x i8> %offset, i16 zeroext %p) {
115; CHECK-LABEL: test_vldrbq_gather_offset_z_s8:
116; CHECK:       @ %bb.0: @ %entry
117; CHECK-NEXT:    vmsr p0, r1
118; CHECK-NEXT:    vpst
119; CHECK-NEXT:    vldrbt.u8 q1, [r0, q0]
120; CHECK-NEXT:    vmov q0, q1
121; CHECK-NEXT:    bx lr
122entry:
123  %0 = zext i16 %p to i32
124  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
125  %2 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0.v16i8.v16i1(ptr %base, <16 x i8> %offset, i32 8, i32 0, i32 0, <16 x i1> %1)
126  ret <16 x i8> %2
127}
128
129declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
130
131declare <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0.v16i8.v16i1(ptr, <16 x i8>, i32, i32, i32, <16 x i1>)
132
133define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_z_u16(ptr %base, <8 x i16> %offset, i16 zeroext %p) {
134; CHECK-LABEL: test_vldrbq_gather_offset_z_u16:
135; CHECK:       @ %bb.0: @ %entry
136; CHECK-NEXT:    vmsr p0, r1
137; CHECK-NEXT:    vpst
138; CHECK-NEXT:    vldrbt.u16 q1, [r0, q0]
139; CHECK-NEXT:    vmov q0, q1
140; CHECK-NEXT:    bx lr
141entry:
142  %0 = zext i16 %p to i32
143  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
144  %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0.v8i16.v8i1(ptr %base, <8 x i16> %offset, i32 8, i32 0, i32 1, <8 x i1> %1)
145  ret <8 x i16> %2
146}
147
148define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_z_u32(ptr %base, <4 x i32> %offset, i16 zeroext %p) {
149; CHECK-LABEL: test_vldrbq_gather_offset_z_u32:
150; CHECK:       @ %bb.0: @ %entry
151; CHECK-NEXT:    vmsr p0, r1
152; CHECK-NEXT:    vpst
153; CHECK-NEXT:    vldrbt.u32 q1, [r0, q0]
154; CHECK-NEXT:    vmov q0, q1
155; CHECK-NEXT:    bx lr
156entry:
157  %0 = zext i16 %p to i32
158  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
159  %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 8, i32 0, i32 1, <4 x i1> %1)
160  ret <4 x i32> %2
161}
162
163define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_z_u8(ptr %base, <16 x i8> %offset, i16 zeroext %p) {
164; CHECK-LABEL: test_vldrbq_gather_offset_z_u8:
165; CHECK:       @ %bb.0: @ %entry
166; CHECK-NEXT:    vmsr p0, r1
167; CHECK-NEXT:    vpst
168; CHECK-NEXT:    vldrbt.u8 q1, [r0, q0]
169; CHECK-NEXT:    vmov q0, q1
170; CHECK-NEXT:    bx lr
171entry:
172  %0 = zext i16 %p to i32
173  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
174  %2 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0.v16i8.v16i1(ptr %base, <16 x i8> %offset, i32 8, i32 0, i32 1, <16 x i1> %1)
175  ret <16 x i8> %2
176}
177
178define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_s64(<2 x i64> %addr) {
179; CHECK-LABEL: test_vldrdq_gather_base_s64:
180; CHECK:       @ %bb.0: @ %entry
181; CHECK-NEXT:    vldrd.u64 q1, [q0, #616]
182; CHECK-NEXT:    vmov q0, q1
183; CHECK-NEXT:    bx lr
184entry:
185  %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64> %addr, i32 616)
186  ret <2 x i64> %0
187}
188
189declare <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64>, i32)
190
191define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_u64(<2 x i64> %addr) {
192; CHECK-LABEL: test_vldrdq_gather_base_u64:
193; CHECK:       @ %bb.0: @ %entry
194; CHECK-NEXT:    vldrd.u64 q1, [q0, #-336]
195; CHECK-NEXT:    vmov q0, q1
196; CHECK-NEXT:    bx lr
197entry:
198  %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64> %addr, i32 -336)
199  ret <2 x i64> %0
200}
201
202define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_s64(ptr %addr) {
203; CHECK-LABEL: test_vldrdq_gather_base_wb_s64:
204; CHECK:       @ %bb.0: @ %entry
205; CHECK-NEXT:    vldrw.u32 q1, [r0]
206; CHECK-NEXT:    vldrd.u64 q0, [q1, #576]!
207; CHECK-NEXT:    vstrw.32 q1, [r0]
208; CHECK-NEXT:    bx lr
209entry:
210  %0 = load <2 x i64>, ptr %addr, align 8
211  %1 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64> %0, i32 576)
212  %2 = extractvalue { <2 x i64>, <2 x i64> } %1, 1
213  store <2 x i64> %2, ptr %addr, align 8
214  %3 = extractvalue { <2 x i64>, <2 x i64> } %1, 0
215  ret <2 x i64> %3
216}
217
218declare { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64>, i32)
219
220define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_u64(ptr %addr) {
221; CHECK-LABEL: test_vldrdq_gather_base_wb_u64:
222; CHECK:       @ %bb.0: @ %entry
223; CHECK-NEXT:    vldrw.u32 q1, [r0]
224; CHECK-NEXT:    vldrd.u64 q0, [q1, #-328]!
225; CHECK-NEXT:    vstrw.32 q1, [r0]
226; CHECK-NEXT:    bx lr
227entry:
228  %0 = load <2 x i64>, ptr %addr, align 8
229  %1 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64> %0, i32 -328)
230  %2 = extractvalue { <2 x i64>, <2 x i64> } %1, 1
231  store <2 x i64> %2, ptr %addr, align 8
232  %3 = extractvalue { <2 x i64>, <2 x i64> } %1, 0
233  ret <2 x i64> %3
234}
235
236define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_z_s64(ptr %addr, i16 zeroext %p) {
237; CHECK-LABEL: test_vldrdq_gather_base_wb_z_s64:
238; CHECK:       @ %bb.0: @ %entry
239; CHECK-NEXT:    vmsr p0, r1
240; CHECK-NEXT:    vldrw.u32 q1, [r0]
241; CHECK-NEXT:    vpst
242; CHECK-NEXT:    vldrdt.u64 q0, [q1, #664]!
243; CHECK-NEXT:    vstrw.32 q1, [r0]
244; CHECK-NEXT:    bx lr
245entry:
246  %0 = load <2 x i64>, ptr %addr, align 8
247  %1 = zext i16 %p to i32
248  %2 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %1)
249  %3 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64> %0, i32 664, <2 x i1> %2)
250  %4 = extractvalue { <2 x i64>, <2 x i64> } %3, 1
251  store <2 x i64> %4, ptr %addr, align 8
252  %5 = extractvalue { <2 x i64>, <2 x i64> } %3, 0
253  ret <2 x i64> %5
254}
255
256declare <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32)
257declare { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64>, i32, <2 x i1>)
258
259define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_z_u64(ptr %addr, i16 zeroext %p) {
260; CHECK-LABEL: test_vldrdq_gather_base_wb_z_u64:
261; CHECK:       @ %bb.0: @ %entry
262; CHECK-NEXT:    vmsr p0, r1
263; CHECK-NEXT:    vldrw.u32 q1, [r0]
264; CHECK-NEXT:    vpst
265; CHECK-NEXT:    vldrdt.u64 q0, [q1, #656]!
266; CHECK-NEXT:    vstrw.32 q1, [r0]
267; CHECK-NEXT:    bx lr
268entry:
269  %0 = load <2 x i64>, ptr %addr, align 8
270  %1 = zext i16 %p to i32
271  %2 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %1)
272  %3 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64> %0, i32 656, <2 x i1> %2)
273  %4 = extractvalue { <2 x i64>, <2 x i64> } %3, 1
274  store <2 x i64> %4, ptr %addr, align 8
275  %5 = extractvalue { <2 x i64>, <2 x i64> } %3, 0
276  ret <2 x i64> %5
277}
278
279define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_z_s64(<2 x i64> %addr, i16 zeroext %p) {
280; CHECK-LABEL: test_vldrdq_gather_base_z_s64:
281; CHECK:       @ %bb.0: @ %entry
282; CHECK-NEXT:    vmsr p0, r0
283; CHECK-NEXT:    vpst
284; CHECK-NEXT:    vldrdt.u64 q1, [q0, #888]
285; CHECK-NEXT:    vmov q0, q1
286; CHECK-NEXT:    bx lr
287entry:
288  %0 = zext i16 %p to i32
289  %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0)
290  %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v2i1(<2 x i64> %addr, i32 888, <2 x i1> %1)
291  ret <2 x i64> %2
292}
293
294declare <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v2i1(<2 x i64>, i32, <2 x i1>)
295
296define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_z_u64(<2 x i64> %addr, i16 zeroext %p) {
297; CHECK-LABEL: test_vldrdq_gather_base_z_u64:
298; CHECK:       @ %bb.0: @ %entry
299; CHECK-NEXT:    vmsr p0, r0
300; CHECK-NEXT:    vpst
301; CHECK-NEXT:    vldrdt.u64 q1, [q0, #-1000]
302; CHECK-NEXT:    vmov q0, q1
303; CHECK-NEXT:    bx lr
304entry:
305  %0 = zext i16 %p to i32
306  %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0)
307  %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v2i1(<2 x i64> %addr, i32 -1000, <2 x i1> %1)
308  ret <2 x i64> %2
309}
310
311define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_s64(ptr %base, <2 x i64> %offset) {
312; CHECK-LABEL: test_vldrdq_gather_offset_s64:
313; CHECK:       @ %bb.0: @ %entry
314; CHECK-NEXT:    vldrd.u64 q1, [r0, q0]
315; CHECK-NEXT:    vmov q0, q1
316; CHECK-NEXT:    bx lr
317entry:
318  %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0.v2i64(ptr %base, <2 x i64> %offset, i32 64, i32 0, i32 0)
319  ret <2 x i64> %0
320}
321
322declare <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0.v2i64(ptr, <2 x i64>, i32, i32, i32)
323
324define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_u64(ptr %base, <2 x i64> %offset) {
325; CHECK-LABEL: test_vldrdq_gather_offset_u64:
326; CHECK:       @ %bb.0: @ %entry
327; CHECK-NEXT:    vldrd.u64 q1, [r0, q0]
328; CHECK-NEXT:    vmov q0, q1
329; CHECK-NEXT:    bx lr
330entry:
331  %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0.v2i64(ptr %base, <2 x i64> %offset, i32 64, i32 0, i32 1)
332  ret <2 x i64> %0
333}
334
335define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_z_s64(ptr %base, <2 x i64> %offset, i16 zeroext %p) {
336; CHECK-LABEL: test_vldrdq_gather_offset_z_s64:
337; CHECK:       @ %bb.0: @ %entry
338; CHECK-NEXT:    vmsr p0, r1
339; CHECK-NEXT:    vpst
340; CHECK-NEXT:    vldrdt.u64 q1, [r0, q0]
341; CHECK-NEXT:    vmov q0, q1
342; CHECK-NEXT:    bx lr
343entry:
344  %0 = zext i16 %p to i32
345  %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0)
346  %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v2i1(ptr %base, <2 x i64> %offset, i32 64, i32 0, i32 0, <2 x i1> %1)
347  ret <2 x i64> %2
348}
349
350declare <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v2i1(ptr, <2 x i64>, i32, i32, i32, <2 x i1>)
351
352define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_z_u64(ptr %base, <2 x i64> %offset, i16 zeroext %p) {
353; CHECK-LABEL: test_vldrdq_gather_offset_z_u64:
354; CHECK:       @ %bb.0: @ %entry
355; CHECK-NEXT:    vmsr p0, r1
356; CHECK-NEXT:    vpst
357; CHECK-NEXT:    vldrdt.u64 q1, [r0, q0]
358; CHECK-NEXT:    vmov q0, q1
359; CHECK-NEXT:    bx lr
360entry:
361  %0 = zext i16 %p to i32
362  %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0)
363  %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v2i1(ptr %base, <2 x i64> %offset, i32 64, i32 0, i32 1, <2 x i1> %1)
364  ret <2 x i64> %2
365}
366
367define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_s64(ptr %base, <2 x i64> %offset) {
368; CHECK-LABEL: test_vldrdq_gather_shifted_offset_s64:
369; CHECK:       @ %bb.0: @ %entry
370; CHECK-NEXT:    vldrd.u64 q1, [r0, q0, uxtw #3]
371; CHECK-NEXT:    vmov q0, q1
372; CHECK-NEXT:    bx lr
373entry:
374  %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0.v2i64(ptr %base, <2 x i64> %offset, i32 64, i32 3, i32 0)
375  ret <2 x i64> %0
376}
377
378define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_u64(ptr %base, <2 x i64> %offset) {
379; CHECK-LABEL: test_vldrdq_gather_shifted_offset_u64:
380; CHECK:       @ %bb.0: @ %entry
381; CHECK-NEXT:    vldrd.u64 q1, [r0, q0, uxtw #3]
382; CHECK-NEXT:    vmov q0, q1
383; CHECK-NEXT:    bx lr
384entry:
385  %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0.v2i64(ptr %base, <2 x i64> %offset, i32 64, i32 3, i32 1)
386  ret <2 x i64> %0
387}
388
389define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_z_s64(ptr %base, <2 x i64> %offset, i16 zeroext %p) {
390; CHECK-LABEL: test_vldrdq_gather_shifted_offset_z_s64:
391; CHECK:       @ %bb.0: @ %entry
392; CHECK-NEXT:    vmsr p0, r1
393; CHECK-NEXT:    vpst
394; CHECK-NEXT:    vldrdt.u64 q1, [r0, q0, uxtw #3]
395; CHECK-NEXT:    vmov q0, q1
396; CHECK-NEXT:    bx lr
397entry:
398  %0 = zext i16 %p to i32
399  %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0)
400  %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v2i1(ptr %base, <2 x i64> %offset, i32 64, i32 3, i32 0, <2 x i1> %1)
401  ret <2 x i64> %2
402}
403
404define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_z_u64(ptr %base, <2 x i64> %offset, i16 zeroext %p) {
405; CHECK-LABEL: test_vldrdq_gather_shifted_offset_z_u64:
406; CHECK:       @ %bb.0: @ %entry
407; CHECK-NEXT:    vmsr p0, r1
408; CHECK-NEXT:    vpst
409; CHECK-NEXT:    vldrdt.u64 q1, [r0, q0, uxtw #3]
410; CHECK-NEXT:    vmov q0, q1
411; CHECK-NEXT:    bx lr
412entry:
413  %0 = zext i16 %p to i32
414  %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0)
415  %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v2i1(ptr %base, <2 x i64> %offset, i32 64, i32 3, i32 1, <2 x i1> %1)
416  ret <2 x i64> %2
417}
418
419define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_offset_f16(ptr %base, <8 x i16> %offset) {
420; CHECK-LABEL: test_vldrhq_gather_offset_f16:
421; CHECK:       @ %bb.0: @ %entry
422; CHECK-NEXT:    vldrh.u16 q1, [r0, q0]
423; CHECK-NEXT:    vmov q0, q1
424; CHECK-NEXT:    bx lr
425entry:
426  %0 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0.v8i16(ptr %base, <8 x i16> %offset, i32 16, i32 0, i32 0)
427  ret <8 x half> %0
428}
429
430declare <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0.v8i16(ptr, <8 x i16>, i32, i32, i32)
431
432define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_s16(ptr %base, <8 x i16> %offset) {
433; CHECK-LABEL: test_vldrhq_gather_offset_s16:
434; CHECK:       @ %bb.0: @ %entry
435; CHECK-NEXT:    vldrh.u16 q1, [r0, q0]
436; CHECK-NEXT:    vmov q0, q1
437; CHECK-NEXT:    bx lr
438entry:
439  %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0.v8i16(ptr %base, <8 x i16> %offset, i32 16, i32 0, i32 0)
440  ret <8 x i16> %0
441}
442
443
444define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_s32(ptr %base, <4 x i32> %offset) {
445; CHECK-LABEL: test_vldrhq_gather_offset_s32:
446; CHECK:       @ %bb.0: @ %entry
447; CHECK-NEXT:    vldrh.s32 q1, [r0, q0]
448; CHECK-NEXT:    vmov q0, q1
449; CHECK-NEXT:    bx lr
450entry:
451  %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 16, i32 0, i32 0)
452  ret <4 x i32> %0
453}
454
455
456define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_u16(ptr %base, <8 x i16> %offset) {
457; CHECK-LABEL: test_vldrhq_gather_offset_u16:
458; CHECK:       @ %bb.0: @ %entry
459; CHECK-NEXT:    vldrh.u16 q1, [r0, q0]
460; CHECK-NEXT:    vmov q0, q1
461; CHECK-NEXT:    bx lr
462entry:
463  %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0.v8i16(ptr %base, <8 x i16> %offset, i32 16, i32 0, i32 1)
464  ret <8 x i16> %0
465}
466
467define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_u32(ptr %base, <4 x i32> %offset) {
468; CHECK-LABEL: test_vldrhq_gather_offset_u32:
469; CHECK:       @ %bb.0: @ %entry
470; CHECK-NEXT:    vldrh.u32 q1, [r0, q0]
471; CHECK-NEXT:    vmov q0, q1
472; CHECK-NEXT:    bx lr
473entry:
474  %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 16, i32 0, i32 1)
475  ret <4 x i32> %0
476}
477
478define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_offset_z_f16(ptr %base, <8 x i16> %offset, i16 zeroext %p) {
479; CHECK-LABEL: test_vldrhq_gather_offset_z_f16:
480; CHECK:       @ %bb.0: @ %entry
481; CHECK-NEXT:    vmsr p0, r1
482; CHECK-NEXT:    vpst
483; CHECK-NEXT:    vldrht.u16 q1, [r0, q0]
484; CHECK-NEXT:    vmov q0, q1
485; CHECK-NEXT:    bx lr
486entry:
487  %0 = zext i16 %p to i32
488  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
489  %2 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0.v8i16.v8i1(ptr %base, <8 x i16> %offset, i32 16, i32 0, i32 0, <8 x i1> %1)
490  ret <8 x half> %2
491}
492
493declare <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0.v8i16.v8i1(ptr, <8 x i16>, i32, i32, i32, <8 x i1>)
494
495define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_z_s16(ptr %base, <8 x i16> %offset, i16 zeroext %p) {
496; CHECK-LABEL: test_vldrhq_gather_offset_z_s16:
497; CHECK:       @ %bb.0: @ %entry
498; CHECK-NEXT:    vmsr p0, r1
499; CHECK-NEXT:    vpst
500; CHECK-NEXT:    vldrht.u16 q1, [r0, q0]
501; CHECK-NEXT:    vmov q0, q1
502; CHECK-NEXT:    bx lr
503entry:
504  %0 = zext i16 %p to i32
505  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
506  %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0.v8i16.v8i1(ptr %base, <8 x i16> %offset, i32 16, i32 0, i32 0, <8 x i1> %1)
507  ret <8 x i16> %2
508}
509
510
511define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_z_s32(ptr %base, <4 x i32> %offset, i16 zeroext %p) {
512; CHECK-LABEL: test_vldrhq_gather_offset_z_s32:
513; CHECK:       @ %bb.0: @ %entry
514; CHECK-NEXT:    vmsr p0, r1
515; CHECK-NEXT:    vpst
516; CHECK-NEXT:    vldrht.s32 q1, [r0, q0]
517; CHECK-NEXT:    vmov q0, q1
518; CHECK-NEXT:    bx lr
519entry:
520  %0 = zext i16 %p to i32
521  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
522  %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 16, i32 0, i32 0, <4 x i1> %1)
523  ret <4 x i32> %2
524}
525
526
527define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_z_u16(ptr %base, <8 x i16> %offset, i16 zeroext %p) {
528; CHECK-LABEL: test_vldrhq_gather_offset_z_u16:
529; CHECK:       @ %bb.0: @ %entry
530; CHECK-NEXT:    vmsr p0, r1
531; CHECK-NEXT:    vpst
532; CHECK-NEXT:    vldrht.u16 q1, [r0, q0]
533; CHECK-NEXT:    vmov q0, q1
534; CHECK-NEXT:    bx lr
535entry:
536  %0 = zext i16 %p to i32
537  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
538  %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0.v8i16.v8i1(ptr %base, <8 x i16> %offset, i32 16, i32 0, i32 1, <8 x i1> %1)
539  ret <8 x i16> %2
540}
541
542define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_z_u32(ptr %base, <4 x i32> %offset, i16 zeroext %p) {
543; CHECK-LABEL: test_vldrhq_gather_offset_z_u32:
544; CHECK:       @ %bb.0: @ %entry
545; CHECK-NEXT:    vmsr p0, r1
546; CHECK-NEXT:    vpst
547; CHECK-NEXT:    vldrht.u32 q1, [r0, q0]
548; CHECK-NEXT:    vmov q0, q1
549; CHECK-NEXT:    bx lr
550entry:
551  %0 = zext i16 %p to i32
552  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
553  %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 16, i32 0, i32 1, <4 x i1> %1)
554  ret <4 x i32> %2
555}
556
557define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_shifted_offset_f16(ptr %base, <8 x i16> %offset) {
558; CHECK-LABEL: test_vldrhq_gather_shifted_offset_f16:
559; CHECK:       @ %bb.0: @ %entry
560; CHECK-NEXT:    vldrh.u16 q1, [r0, q0, uxtw #1]
561; CHECK-NEXT:    vmov q0, q1
562; CHECK-NEXT:    bx lr
563entry:
564  %0 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0.v8i16(ptr %base, <8 x i16> %offset, i32 16, i32 1, i32 0)
565  ret <8 x half> %0
566}
567
568define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_s16(ptr %base, <8 x i16> %offset) {
569; CHECK-LABEL: test_vldrhq_gather_shifted_offset_s16:
570; CHECK:       @ %bb.0: @ %entry
571; CHECK-NEXT:    vldrh.u16 q1, [r0, q0, uxtw #1]
572; CHECK-NEXT:    vmov q0, q1
573; CHECK-NEXT:    bx lr
574entry:
575  %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0.v8i16(ptr %base, <8 x i16> %offset, i32 16, i32 1, i32 0)
576  ret <8 x i16> %0
577}
578
579define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_s32(ptr %base, <4 x i32> %offset) {
580; CHECK-LABEL: test_vldrhq_gather_shifted_offset_s32:
581; CHECK:       @ %bb.0: @ %entry
582; CHECK-NEXT:    vldrh.s32 q1, [r0, q0, uxtw #1]
583; CHECK-NEXT:    vmov q0, q1
584; CHECK-NEXT:    bx lr
585entry:
586  %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 16, i32 1, i32 0)
587  ret <4 x i32> %0
588}
589
590define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_u16(ptr %base, <8 x i16> %offset) {
591; CHECK-LABEL: test_vldrhq_gather_shifted_offset_u16:
592; CHECK:       @ %bb.0: @ %entry
593; CHECK-NEXT:    vldrh.u16 q1, [r0, q0, uxtw #1]
594; CHECK-NEXT:    vmov q0, q1
595; CHECK-NEXT:    bx lr
596entry:
597  %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0.v8i16(ptr %base, <8 x i16> %offset, i32 16, i32 1, i32 1)
598  ret <8 x i16> %0
599}
600
601define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_u32(ptr %base, <4 x i32> %offset) {
602; CHECK-LABEL: test_vldrhq_gather_shifted_offset_u32:
603; CHECK:       @ %bb.0: @ %entry
604; CHECK-NEXT:    vldrh.u32 q1, [r0, q0, uxtw #1]
605; CHECK-NEXT:    vmov q0, q1
606; CHECK-NEXT:    bx lr
607entry:
608  %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 16, i32 1, i32 1)
609  ret <4 x i32> %0
610}
611
612define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_shifted_offset_z_f16(ptr %base, <8 x i16> %offset, i16 zeroext %p) {
613; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_f16:
614; CHECK:       @ %bb.0: @ %entry
615; CHECK-NEXT:    vmsr p0, r1
616; CHECK-NEXT:    vpst
617; CHECK-NEXT:    vldrht.u16 q1, [r0, q0, uxtw #1]
618; CHECK-NEXT:    vmov q0, q1
619; CHECK-NEXT:    bx lr
620entry:
621  %0 = zext i16 %p to i32
622  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
623  %2 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0.v8i16.v8i1(ptr %base, <8 x i16> %offset, i32 16, i32 1, i32 0, <8 x i1> %1)
624  ret <8 x half> %2
625}
626
627define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_z_s16(ptr %base, <8 x i16> %offset, i16 zeroext %p) {
628; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_s16:
629; CHECK:       @ %bb.0: @ %entry
630; CHECK-NEXT:    vmsr p0, r1
631; CHECK-NEXT:    vpst
632; CHECK-NEXT:    vldrht.u16 q1, [r0, q0, uxtw #1]
633; CHECK-NEXT:    vmov q0, q1
634; CHECK-NEXT:    bx lr
635entry:
636  %0 = zext i16 %p to i32
637  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
638  %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0.v8i16.v8i1(ptr %base, <8 x i16> %offset, i32 16, i32 1, i32 0, <8 x i1> %1)
639  ret <8 x i16> %2
640}
641
642define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_z_s32(ptr %base, <4 x i32> %offset, i16 zeroext %p) {
643; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_s32:
644; CHECK:       @ %bb.0: @ %entry
645; CHECK-NEXT:    vmsr p0, r1
646; CHECK-NEXT:    vpst
647; CHECK-NEXT:    vldrht.s32 q1, [r0, q0, uxtw #1]
648; CHECK-NEXT:    vmov q0, q1
649; CHECK-NEXT:    bx lr
650entry:
651  %0 = zext i16 %p to i32
652  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
653  %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 16, i32 1, i32 0, <4 x i1> %1)
654  ret <4 x i32> %2
655}
656
657define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_z_u16(ptr %base, <8 x i16> %offset, i16 zeroext %p) {
658; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_u16:
659; CHECK:       @ %bb.0: @ %entry
660; CHECK-NEXT:    vmsr p0, r1
661; CHECK-NEXT:    vpst
662; CHECK-NEXT:    vldrht.u16 q1, [r0, q0, uxtw #1]
663; CHECK-NEXT:    vmov q0, q1
664; CHECK-NEXT:    bx lr
665entry:
666  %0 = zext i16 %p to i32
667  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
668  %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0.v8i16.v8i1(ptr %base, <8 x i16> %offset, i32 16, i32 1, i32 1, <8 x i1> %1)
669  ret <8 x i16> %2
670}
671
672define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_z_u32(ptr %base, <4 x i32> %offset, i16 zeroext %p) {
673; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_u32:
674; CHECK:       @ %bb.0: @ %entry
675; CHECK-NEXT:    vmsr p0, r1
676; CHECK-NEXT:    vpst
677; CHECK-NEXT:    vldrht.u32 q1, [r0, q0, uxtw #1]
678; CHECK-NEXT:    vmov q0, q1
679; CHECK-NEXT:    bx lr
680entry:
681  %0 = zext i16 %p to i32
682  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
683  %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 16, i32 1, i32 1, <4 x i1> %1)
684  ret <4 x i32> %2
685}
686
687define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_f32(<4 x i32> %addr) {
688; CHECK-LABEL: test_vldrwq_gather_base_f32:
689; CHECK:       @ %bb.0: @ %entry
690; CHECK-NEXT:    vldrw.u32 q1, [q0, #12]
691; CHECK-NEXT:    vmov q0, q1
692; CHECK-NEXT:    bx lr
693entry:
694  %0 = call <4 x float> @llvm.arm.mve.vldr.gather.base.v4f32.v4i32(<4 x i32> %addr, i32 12)
695  ret <4 x float> %0
696}
697
698declare <4 x float> @llvm.arm.mve.vldr.gather.base.v4f32.v4i32(<4 x i32>, i32)
699
700define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_s32(<4 x i32> %addr) {
701; CHECK-LABEL: test_vldrwq_gather_base_s32:
702; CHECK:       @ %bb.0: @ %entry
703; CHECK-NEXT:    vldrw.u32 q1, [q0, #400]
704; CHECK-NEXT:    vmov q0, q1
705; CHECK-NEXT:    bx lr
706entry:
707  %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32> %addr, i32 400)
708  ret <4 x i32> %0
709}
710
711declare <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32>, i32)
712
713define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_u32(<4 x i32> %addr) {
714; CHECK-LABEL: test_vldrwq_gather_base_u32:
715; CHECK:       @ %bb.0: @ %entry
716; CHECK-NEXT:    vldrw.u32 q1, [q0, #284]
717; CHECK-NEXT:    vmov q0, q1
718; CHECK-NEXT:    bx lr
719entry:
720  %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32> %addr, i32 284)
721  ret <4 x i32> %0
722}
723
724define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_wb_f32(ptr %addr) {
725; CHECK-LABEL: test_vldrwq_gather_base_wb_f32:
726; CHECK:       @ %bb.0: @ %entry
727; CHECK-NEXT:    vldrw.u32 q1, [r0]
728; CHECK-NEXT:    vldrw.u32 q0, [q1, #-64]!
729; CHECK-NEXT:    vstrw.32 q1, [r0]
730; CHECK-NEXT:    bx lr
731entry:
732  %0 = load <4 x i32>, ptr %addr, align 8
733  %1 = call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4f32.v4i32(<4 x i32> %0, i32 -64)
734  %2 = extractvalue { <4 x float>, <4 x i32> } %1, 1
735  store <4 x i32> %2, ptr %addr, align 8
736  %3 = extractvalue { <4 x float>, <4 x i32> } %1, 0
737  ret <4 x float> %3
738}
739
740declare { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4f32.v4i32(<4 x i32>, i32)
741
742define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_s32(ptr %addr) {
743; CHECK-LABEL: test_vldrwq_gather_base_wb_s32:
744; CHECK:       @ %bb.0: @ %entry
745; CHECK-NEXT:    vldrw.u32 q1, [r0]
746; CHECK-NEXT:    vldrw.u32 q0, [q1, #80]!
747; CHECK-NEXT:    vstrw.32 q1, [r0]
748; CHECK-NEXT:    bx lr
749entry:
750  %0 = load <4 x i32>, ptr %addr, align 8
751  %1 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32> %0, i32 80)
752  %2 = extractvalue { <4 x i32>, <4 x i32> } %1, 1
753  store <4 x i32> %2, ptr %addr, align 8
754  %3 = extractvalue { <4 x i32>, <4 x i32> } %1, 0
755  ret <4 x i32> %3
756}
757
758declare { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32>, i32)
759
760define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_u32(ptr %addr) {
761; CHECK-LABEL: test_vldrwq_gather_base_wb_u32:
762; CHECK:       @ %bb.0: @ %entry
763; CHECK-NEXT:    vldrw.u32 q1, [r0]
764; CHECK-NEXT:    vldrw.u32 q0, [q1, #480]!
765; CHECK-NEXT:    vstrw.32 q1, [r0]
766; CHECK-NEXT:    bx lr
767entry:
768  %0 = load <4 x i32>, ptr %addr, align 8
769  %1 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32> %0, i32 480)
770  %2 = extractvalue { <4 x i32>, <4 x i32> } %1, 1
771  store <4 x i32> %2, ptr %addr, align 8
772  %3 = extractvalue { <4 x i32>, <4 x i32> } %1, 0
773  ret <4 x i32> %3
774}
775
776define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_wb_z_f32(ptr %addr, i16 zeroext %p) {
777; CHECK-LABEL: test_vldrwq_gather_base_wb_z_f32:
778; CHECK:       @ %bb.0: @ %entry
779; CHECK-NEXT:    vmsr p0, r1
780; CHECK-NEXT:    vldrw.u32 q1, [r0]
781; CHECK-NEXT:    vpst
782; CHECK-NEXT:    vldrwt.u32 q0, [q1, #-352]!
783; CHECK-NEXT:    vstrw.32 q1, [r0]
784; CHECK-NEXT:    bx lr
785entry:
786  %0 = load <4 x i32>, ptr %addr, align 8
787  %1 = zext i16 %p to i32
788  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
789  %3 = call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> %0, i32 -352, <4 x i1> %2)
790  %4 = extractvalue { <4 x float>, <4 x i32> } %3, 1
791  store <4 x i32> %4, ptr %addr, align 8
792  %5 = extractvalue { <4 x float>, <4 x i32> } %3, 0
793  ret <4 x float> %5
794}
795
796declare { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)
797
798define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_z_s32(ptr %addr, i16 zeroext %p) {
799; CHECK-LABEL: test_vldrwq_gather_base_wb_z_s32:
800; CHECK:       @ %bb.0: @ %entry
801; CHECK-NEXT:    vmsr p0, r1
802; CHECK-NEXT:    vldrw.u32 q1, [r0]
803; CHECK-NEXT:    vpst
804; CHECK-NEXT:    vldrwt.u32 q0, [q1, #276]!
805; CHECK-NEXT:    vstrw.32 q1, [r0]
806; CHECK-NEXT:    bx lr
807entry:
808  %0 = load <4 x i32>, ptr %addr, align 8
809  %1 = zext i16 %p to i32
810  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
811  %3 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 276, <4 x i1> %2)
812  %4 = extractvalue { <4 x i32>, <4 x i32> } %3, 1
813  store <4 x i32> %4, ptr %addr, align 8
814  %5 = extractvalue { <4 x i32>, <4 x i32> } %3, 0
815  ret <4 x i32> %5
816}
817
818declare { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)
819
820define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_z_u32(ptr %addr, i16 zeroext %p) {
821; CHECK-LABEL: test_vldrwq_gather_base_wb_z_u32:
822; CHECK:       @ %bb.0: @ %entry
823; CHECK-NEXT:    vmsr p0, r1
824; CHECK-NEXT:    vldrw.u32 q1, [r0]
825; CHECK-NEXT:    vpst
826; CHECK-NEXT:    vldrwt.u32 q0, [q1, #88]!
827; CHECK-NEXT:    vstrw.32 q1, [r0]
828; CHECK-NEXT:    bx lr
829entry:
830  %0 = load <4 x i32>, ptr %addr, align 8
831  %1 = zext i16 %p to i32
832  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
833  %3 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 88, <4 x i1> %2)
834  %4 = extractvalue { <4 x i32>, <4 x i32> } %3, 1
835  store <4 x i32> %4, ptr %addr, align 8
836  %5 = extractvalue { <4 x i32>, <4 x i32> } %3, 0
837  ret <4 x i32> %5
838}
839
840define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_z_f32(<4 x i32> %addr, i16 zeroext %p) {
841; CHECK-LABEL: test_vldrwq_gather_base_z_f32:
842; CHECK:       @ %bb.0: @ %entry
843; CHECK-NEXT:    vmsr p0, r0
844; CHECK-NEXT:    vpst
845; CHECK-NEXT:    vldrwt.u32 q1, [q0, #-300]
846; CHECK-NEXT:    vmov q0, q1
847; CHECK-NEXT:    bx lr
848entry:
849  %0 = zext i16 %p to i32
850  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
851  %2 = call <4 x float> @llvm.arm.mve.vldr.gather.base.predicated.v4f32.v4i32.v4i1(<4 x i32> %addr, i32 -300, <4 x i1> %1)
852  ret <4 x float> %2
853}
854
855declare <4 x float> @llvm.arm.mve.vldr.gather.base.predicated.v4f32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)
856
857define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_z_s32(<4 x i32> %addr, i16 zeroext %p) {
858; CHECK-LABEL: test_vldrwq_gather_base_z_s32:
859; CHECK:       @ %bb.0: @ %entry
860; CHECK-NEXT:    vmsr p0, r0
861; CHECK-NEXT:    vpst
862; CHECK-NEXT:    vldrwt.u32 q1, [q0, #440]
863; CHECK-NEXT:    vmov q0, q1
864; CHECK-NEXT:    bx lr
865entry:
866  %0 = zext i16 %p to i32
867  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
868  %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 440, <4 x i1> %1)
869  ret <4 x i32> %2
870}
871
872declare <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)
873
874define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_z_u32(<4 x i32> %addr, i16 zeroext %p) {
875; CHECK-LABEL: test_vldrwq_gather_base_z_u32:
876; CHECK:       @ %bb.0: @ %entry
877; CHECK-NEXT:    vmsr p0, r0
878; CHECK-NEXT:    vpst
879; CHECK-NEXT:    vldrwt.u32 q1, [q0, #300]
880; CHECK-NEXT:    vmov q0, q1
881; CHECK-NEXT:    bx lr
882entry:
883  %0 = zext i16 %p to i32
884  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
885  %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 300, <4 x i1> %1)
886  ret <4 x i32> %2
887}
888
889define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_offset_f32(ptr %base, <4 x i32> %offset) {
890; CHECK-LABEL: test_vldrwq_gather_offset_f32:
891; CHECK:       @ %bb.0: @ %entry
892; CHECK-NEXT:    vldrw.u32 q1, [r0, q0]
893; CHECK-NEXT:    vmov q0, q1
894; CHECK-NEXT:    bx lr
895entry:
896  %0 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 32, i32 0, i32 0)
897  ret <4 x float> %0
898}
899
900declare <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0.v4i32(ptr, <4 x i32>, i32, i32, i32)
901
902define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_s32(ptr %base, <4 x i32> %offset) {
903; CHECK-LABEL: test_vldrwq_gather_offset_s32:
904; CHECK:       @ %bb.0: @ %entry
905; CHECK-NEXT:    vldrw.u32 q1, [r0, q0]
906; CHECK-NEXT:    vmov q0, q1
907; CHECK-NEXT:    bx lr
908entry:
909  %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 32, i32 0, i32 0)
910  ret <4 x i32> %0
911}
912
913
914define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_u32(ptr %base, <4 x i32> %offset) {
915; CHECK-LABEL: test_vldrwq_gather_offset_u32:
916; CHECK:       @ %bb.0: @ %entry
917; CHECK-NEXT:    vldrw.u32 q1, [r0, q0]
918; CHECK-NEXT:    vmov q0, q1
919; CHECK-NEXT:    bx lr
920entry:
921  %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 32, i32 0, i32 1)
922  ret <4 x i32> %0
923}
924
925define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_offset_z_f32(ptr %base, <4 x i32> %offset, i16 zeroext %p) {
926; CHECK-LABEL: test_vldrwq_gather_offset_z_f32:
927; CHECK:       @ %bb.0: @ %entry
928; CHECK-NEXT:    vmsr p0, r1
929; CHECK-NEXT:    vpst
930; CHECK-NEXT:    vldrwt.u32 q1, [r0, q0]
931; CHECK-NEXT:    vmov q0, q1
932; CHECK-NEXT:    bx lr
933entry:
934  %0 = zext i16 %p to i32
935  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
936  %2 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 32, i32 0, i32 0, <4 x i1> %1)
937  ret <4 x float> %2
938}
939
940declare <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0.v4i32.v4i1(ptr, <4 x i32>, i32, i32, i32, <4 x i1>)
941
942define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_z_s32(ptr %base, <4 x i32> %offset, i16 zeroext %p) {
943; CHECK-LABEL: test_vldrwq_gather_offset_z_s32:
944; CHECK:       @ %bb.0: @ %entry
945; CHECK-NEXT:    vmsr p0, r1
946; CHECK-NEXT:    vpst
947; CHECK-NEXT:    vldrwt.u32 q1, [r0, q0]
948; CHECK-NEXT:    vmov q0, q1
949; CHECK-NEXT:    bx lr
950entry:
951  %0 = zext i16 %p to i32
952  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
953  %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 32, i32 0, i32 0, <4 x i1> %1)
954  ret <4 x i32> %2
955}
956
957
958define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_z_u32(ptr %base, <4 x i32> %offset, i16 zeroext %p) {
959; CHECK-LABEL: test_vldrwq_gather_offset_z_u32:
960; CHECK:       @ %bb.0: @ %entry
961; CHECK-NEXT:    vmsr p0, r1
962; CHECK-NEXT:    vpst
963; CHECK-NEXT:    vldrwt.u32 q1, [r0, q0]
964; CHECK-NEXT:    vmov q0, q1
965; CHECK-NEXT:    bx lr
966entry:
967  %0 = zext i16 %p to i32
968  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
969  %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 32, i32 0, i32 1, <4 x i1> %1)
970  ret <4 x i32> %2
971}
972
973define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_shifted_offset_f32(ptr %base, <4 x i32> %offset) {
974; CHECK-LABEL: test_vldrwq_gather_shifted_offset_f32:
975; CHECK:       @ %bb.0: @ %entry
976; CHECK-NEXT:    vldrw.u32 q1, [r0, q0, uxtw #2]
977; CHECK-NEXT:    vmov q0, q1
978; CHECK-NEXT:    bx lr
979entry:
980  %0 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 32, i32 2, i32 0)
981  ret <4 x float> %0
982}
983
984define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_s32(ptr %base, <4 x i32> %offset) {
985; CHECK-LABEL: test_vldrwq_gather_shifted_offset_s32:
986; CHECK:       @ %bb.0: @ %entry
987; CHECK-NEXT:    vldrw.u32 q1, [r0, q0, uxtw #2]
988; CHECK-NEXT:    vmov q0, q1
989; CHECK-NEXT:    bx lr
990entry:
991  %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 32, i32 2, i32 0)
992  ret <4 x i32> %0
993}
994
995define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_u32(ptr %base, <4 x i32> %offset) {
996; CHECK-LABEL: test_vldrwq_gather_shifted_offset_u32:
997; CHECK:       @ %bb.0: @ %entry
998; CHECK-NEXT:    vldrw.u32 q1, [r0, q0, uxtw #2]
999; CHECK-NEXT:    vmov q0, q1
1000; CHECK-NEXT:    bx lr
1001entry:
1002  %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 32, i32 2, i32 1)
1003  ret <4 x i32> %0
1004}
1005
1006define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_shifted_offset_z_f32(ptr %base, <4 x i32> %offset, i16 zeroext %p) {
1007; CHECK-LABEL: test_vldrwq_gather_shifted_offset_z_f32:
1008; CHECK:       @ %bb.0: @ %entry
1009; CHECK-NEXT:    vmsr p0, r1
1010; CHECK-NEXT:    vpst
1011; CHECK-NEXT:    vldrwt.u32 q1, [r0, q0, uxtw #2]
1012; CHECK-NEXT:    vmov q0, q1
1013; CHECK-NEXT:    bx lr
1014entry:
1015  %0 = zext i16 %p to i32
1016  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1017  %2 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 32, i32 2, i32 0, <4 x i1> %1)
1018  ret <4 x float> %2
1019}
1020
1021define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_z_s32(ptr %base, <4 x i32> %offset, i16 zeroext %p) {
1022; CHECK-LABEL: test_vldrwq_gather_shifted_offset_z_s32:
1023; CHECK:       @ %bb.0: @ %entry
1024; CHECK-NEXT:    vmsr p0, r1
1025; CHECK-NEXT:    vpst
1026; CHECK-NEXT:    vldrwt.u32 q1, [r0, q0, uxtw #2]
1027; CHECK-NEXT:    vmov q0, q1
1028; CHECK-NEXT:    bx lr
1029entry:
1030  %0 = zext i16 %p to i32
1031  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1032  %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 32, i32 2, i32 0, <4 x i1> %1)
1033  ret <4 x i32> %2
1034}
1035
1036define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_z_u32(ptr %base, <4 x i32> %offset, i16 zeroext %p) {
1037; CHECK-LABEL: test_vldrwq_gather_shifted_offset_z_u32:
1038; CHECK:       @ %bb.0: @ %entry
1039; CHECK-NEXT:    vmsr p0, r1
1040; CHECK-NEXT:    vpst
1041; CHECK-NEXT:    vldrwt.u32 q1, [r0, q0, uxtw #2]
1042; CHECK-NEXT:    vmov q0, q1
1043; CHECK-NEXT:    bx lr
1044entry:
1045  %0 = zext i16 %p to i32
1046  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1047  %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 32, i32 2, i32 1, <4 x i1> %1)
1048  ret <4 x i32> %2
1049}
1050
1051define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_s16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
1052; CHECK-LABEL: test_vstrbq_scatter_offset_p_s16:
1053; CHECK:       @ %bb.0: @ %entry
1054; CHECK-NEXT:    vmsr p0, r1
1055; CHECK-NEXT:    vpst
1056; CHECK-NEXT:    vstrbt.16 q1, [r0, q0]
1057; CHECK-NEXT:    bx lr
1058entry:
1059  %0 = zext i16 %p to i32
1060  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1061  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v8i16.v8i16.v8i1(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0, <8 x i1> %1)
1062  ret void
1063}
1064
1065declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v8i16.v8i16.v8i1(ptr, <8 x i16>, <8 x i16>, i32, i32, <8 x i1>)
1066
1067define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_s32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1068; CHECK-LABEL: test_vstrbq_scatter_offset_p_s32:
1069; CHECK:       @ %bb.0: @ %entry
1070; CHECK-NEXT:    vmsr p0, r1
1071; CHECK-NEXT:    vpst
1072; CHECK-NEXT:    vstrbt.32 q1, [r0, q0]
1073; CHECK-NEXT:    bx lr
1074entry:
1075  %0 = zext i16 %p to i32
1076  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1077  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4i32.v4i1(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0, <4 x i1> %1)
1078  ret void
1079}
1080
1081declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4i32.v4i1(ptr, <4 x i32>, <4 x i32>, i32, i32, <4 x i1>)
1082
1083define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_s8(ptr %base, <16 x i8> %offset, <16 x i8> %value, i16 zeroext %p) {
1084; CHECK-LABEL: test_vstrbq_scatter_offset_p_s8:
1085; CHECK:       @ %bb.0: @ %entry
1086; CHECK-NEXT:    vmsr p0, r1
1087; CHECK-NEXT:    vpst
1088; CHECK-NEXT:    vstrbt.8 q1, [r0, q0]
1089; CHECK-NEXT:    bx lr
1090entry:
1091  %0 = zext i16 %p to i32
1092  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
1093  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v16i8.v16i8.v16i1(ptr %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0, <16 x i1> %1)
1094  ret void
1095}
1096
1097declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v16i8.v16i8.v16i1(ptr, <16 x i8>, <16 x i8>, i32, i32, <16 x i1>)
1098
1099define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_u16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
1100; CHECK-LABEL: test_vstrbq_scatter_offset_p_u16:
1101; CHECK:       @ %bb.0: @ %entry
1102; CHECK-NEXT:    vmsr p0, r1
1103; CHECK-NEXT:    vpst
1104; CHECK-NEXT:    vstrbt.16 q1, [r0, q0]
1105; CHECK-NEXT:    bx lr
1106entry:
1107  %0 = zext i16 %p to i32
1108  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1109  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v8i16.v8i16.v8i1(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0, <8 x i1> %1)
1110  ret void
1111}
1112
1113define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_u32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1114; CHECK-LABEL: test_vstrbq_scatter_offset_p_u32:
1115; CHECK:       @ %bb.0: @ %entry
1116; CHECK-NEXT:    vmsr p0, r1
1117; CHECK-NEXT:    vpst
1118; CHECK-NEXT:    vstrbt.32 q1, [r0, q0]
1119; CHECK-NEXT:    bx lr
1120entry:
1121  %0 = zext i16 %p to i32
1122  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1123  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4i32.v4i1(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0, <4 x i1> %1)
1124  ret void
1125}
1126
1127define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_u8(ptr %base, <16 x i8> %offset, <16 x i8> %value, i16 zeroext %p) {
1128; CHECK-LABEL: test_vstrbq_scatter_offset_p_u8:
1129; CHECK:       @ %bb.0: @ %entry
1130; CHECK-NEXT:    vmsr p0, r1
1131; CHECK-NEXT:    vpst
1132; CHECK-NEXT:    vstrbt.8 q1, [r0, q0]
1133; CHECK-NEXT:    bx lr
1134entry:
1135  %0 = zext i16 %p to i32
1136  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
1137  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v16i8.v16i8.v16i1(ptr %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0, <16 x i1> %1)
1138  ret void
1139}
1140
1141define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_s16(ptr %base, <8 x i16> %offset, <8 x i16> %value) {
1142; CHECK-LABEL: test_vstrbq_scatter_offset_s16:
1143; CHECK:       @ %bb.0: @ %entry
1144; CHECK-NEXT:    vstrb.16 q1, [r0, q0]
1145; CHECK-NEXT:    bx lr
1146entry:
1147  call void @llvm.arm.mve.vstr.scatter.offset.p0.v8i16.v8i16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0)
1148  ret void
1149}
1150
1151declare void @llvm.arm.mve.vstr.scatter.offset.p0.v8i16.v8i16(ptr, <8 x i16>, <8 x i16>, i32, i32)
1152
1153define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_s32(ptr %base, <4 x i32> %offset, <4 x i32> %value) {
1154; CHECK-LABEL: test_vstrbq_scatter_offset_s32:
1155; CHECK:       @ %bb.0: @ %entry
1156; CHECK-NEXT:    vstrb.32 q1, [r0, q0]
1157; CHECK-NEXT:    bx lr
1158entry:
1159  call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0)
1160  ret void
1161}
1162
1163declare void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr, <4 x i32>, <4 x i32>, i32, i32)
1164
1165define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_s8(ptr %base, <16 x i8> %offset, <16 x i8> %value) {
1166; CHECK-LABEL: test_vstrbq_scatter_offset_s8:
1167; CHECK:       @ %bb.0: @ %entry
1168; CHECK-NEXT:    vstrb.8 q1, [r0, q0]
1169; CHECK-NEXT:    bx lr
1170entry:
1171  call void @llvm.arm.mve.vstr.scatter.offset.p0.v16i8.v16i8(ptr %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0)
1172  ret void
1173}
1174
1175declare void @llvm.arm.mve.vstr.scatter.offset.p0.v16i8.v16i8(ptr, <16 x i8>, <16 x i8>, i32, i32)
1176
1177define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_u16(ptr %base, <8 x i16> %offset, <8 x i16> %value) {
1178; CHECK-LABEL: test_vstrbq_scatter_offset_u16:
1179; CHECK:       @ %bb.0: @ %entry
1180; CHECK-NEXT:    vstrb.16 q1, [r0, q0]
1181; CHECK-NEXT:    bx lr
1182entry:
1183  call void @llvm.arm.mve.vstr.scatter.offset.p0.v8i16.v8i16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0)
1184  ret void
1185}
1186
1187define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_u32(ptr %base, <4 x i32> %offset, <4 x i32> %value) {
1188; CHECK-LABEL: test_vstrbq_scatter_offset_u32:
1189; CHECK:       @ %bb.0: @ %entry
1190; CHECK-NEXT:    vstrb.32 q1, [r0, q0]
1191; CHECK-NEXT:    bx lr
1192entry:
1193  call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0)
1194  ret void
1195}
1196
1197define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_u8(ptr %base, <16 x i8> %offset, <16 x i8> %value) {
1198; CHECK-LABEL: test_vstrbq_scatter_offset_u8:
1199; CHECK:       @ %bb.0: @ %entry
1200; CHECK-NEXT:    vstrb.8 q1, [r0, q0]
1201; CHECK-NEXT:    bx lr
1202entry:
1203  call void @llvm.arm.mve.vstr.scatter.offset.p0.v16i8.v16i8(ptr %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0)
1204  ret void
1205}
1206
1207define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_p_s64(<2 x i64> %addr, <2 x i64> %value, i16 zeroext %p) {
1208; CHECK-LABEL: test_vstrdq_scatter_base_p_s64:
1209; CHECK:       @ %bb.0: @ %entry
1210; CHECK-NEXT:    vmsr p0, r0
1211; CHECK-NEXT:    vpst
1212; CHECK-NEXT:    vstrdt.64 q1, [q0, #888]
1213; CHECK-NEXT:    bx lr
1214entry:
1215  %0 = zext i16 %p to i32
1216  %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0)
1217  call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v2i1(<2 x i64> %addr, i32 888, <2 x i64> %value, <2 x i1> %1)
1218  ret void
1219}
1220
1221declare void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v2i1(<2 x i64>, i32, <2 x i64>, <2 x i1>)
1222
1223define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_p_u64(<2 x i64> %addr, <2 x i64> %value, i16 zeroext %p) {
1224; CHECK-LABEL: test_vstrdq_scatter_base_p_u64:
1225; CHECK:       @ %bb.0: @ %entry
1226; CHECK-NEXT:    vmsr p0, r0
1227; CHECK-NEXT:    vpst
1228; CHECK-NEXT:    vstrdt.64 q1, [q0, #264]
1229; CHECK-NEXT:    bx lr
1230entry:
1231  %0 = zext i16 %p to i32
1232  %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0)
1233  call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v2i1(<2 x i64> %addr, i32 264, <2 x i64> %value, <2 x i1> %1)
1234  ret void
1235}
1236
1237define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_s64(<2 x i64> %addr, <2 x i64> %value) {
1238; CHECK-LABEL: test_vstrdq_scatter_base_s64:
1239; CHECK:       @ %bb.0: @ %entry
1240; CHECK-NEXT:    vstrd.64 q1, [q0, #408]
1241; CHECK-NEXT:    bx lr
1242entry:
1243  call void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64> %addr, i32 408, <2 x i64> %value)
1244  ret void
1245}
1246
1247declare void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64>, i32, <2 x i64>)
1248
1249define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_u64(<2 x i64> %addr, <2 x i64> %value) {
1250; CHECK-LABEL: test_vstrdq_scatter_base_u64:
1251; CHECK:       @ %bb.0: @ %entry
1252; CHECK-NEXT:    vstrd.64 q1, [q0, #-472]
1253; CHECK-NEXT:    bx lr
1254entry:
1255  call void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64> %addr, i32 -472, <2 x i64> %value)
1256  ret void
1257}
1258
1259define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_p_s64(ptr %addr, <2 x i64> %value, i16 zeroext %p) {
1260; CHECK-LABEL: test_vstrdq_scatter_base_wb_p_s64:
1261; CHECK:       @ %bb.0: @ %entry
1262; CHECK-NEXT:    vldrw.u32 q1, [r0]
1263; CHECK-NEXT:    vmsr p0, r1
1264; CHECK-NEXT:    vpst
1265; CHECK-NEXT:    vstrdt.64 q0, [q1, #248]!
1266; CHECK-NEXT:    vstrw.32 q1, [r0]
1267; CHECK-NEXT:    bx lr
1268entry:
1269  %0 = load <2 x i64>, ptr %addr, align 8
1270  %1 = zext i16 %p to i32
1271  %2 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %1)
1272  %3 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64> %0, i32 248, <2 x i64> %value, <2 x i1> %2)
1273  store <2 x i64> %3, ptr %addr, align 8
1274  ret void
1275}
1276
1277declare <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64>, i32, <2 x i64>, <2 x i1>)
1278
1279define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_p_u64(ptr %addr, <2 x i64> %value, i16 zeroext %p) {
1280; CHECK-LABEL: test_vstrdq_scatter_base_wb_p_u64:
1281; CHECK:       @ %bb.0: @ %entry
1282; CHECK-NEXT:    vldrw.u32 q1, [r0]
1283; CHECK-NEXT:    vmsr p0, r1
1284; CHECK-NEXT:    vpst
1285; CHECK-NEXT:    vstrdt.64 q0, [q1, #136]!
1286; CHECK-NEXT:    vstrw.32 q1, [r0]
1287; CHECK-NEXT:    bx lr
1288entry:
1289  %0 = load <2 x i64>, ptr %addr, align 8
1290  %1 = zext i16 %p to i32
1291  %2 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %1)
1292  %3 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64> %0, i32 136, <2 x i64> %value, <2 x i1> %2)
1293  store <2 x i64> %3, ptr %addr, align 8
1294  ret void
1295}
1296
1297define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_s64(ptr %addr, <2 x i64> %value) {
1298; CHECK-LABEL: test_vstrdq_scatter_base_wb_s64:
1299; CHECK:       @ %bb.0: @ %entry
1300; CHECK-NEXT:    vldrw.u32 q1, [r0]
1301; CHECK-NEXT:    vstrd.64 q0, [q1, #208]!
1302; CHECK-NEXT:    vstrw.32 q1, [r0]
1303; CHECK-NEXT:    bx lr
1304entry:
1305  %0 = load <2 x i64>, ptr %addr, align 8
1306  %1 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64> %0, i32 208, <2 x i64> %value)
1307  store <2 x i64> %1, ptr %addr, align 8
1308  ret void
1309}
1310
1311declare <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64>, i32, <2 x i64>)
1312
1313define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_u64(ptr %addr, <2 x i64> %value) {
1314; CHECK-LABEL: test_vstrdq_scatter_base_wb_u64:
1315; CHECK:       @ %bb.0: @ %entry
1316; CHECK-NEXT:    vldrw.u32 q1, [r0]
1317; CHECK-NEXT:    vstrd.64 q0, [q1, #-168]!
1318; CHECK-NEXT:    vstrw.32 q1, [r0]
1319; CHECK-NEXT:    bx lr
1320entry:
1321  %0 = load <2 x i64>, ptr %addr, align 8
1322  %1 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64> %0, i32 -168, <2 x i64> %value)
1323  store <2 x i64> %1, ptr %addr, align 8
1324  ret void
1325}
1326
1327define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_p_s64(ptr %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) {
1328; CHECK-LABEL: test_vstrdq_scatter_offset_p_s64:
1329; CHECK:       @ %bb.0: @ %entry
1330; CHECK-NEXT:    vmsr p0, r1
1331; CHECK-NEXT:    vpst
1332; CHECK-NEXT:    vstrdt.64 q1, [r0, q0]
1333; CHECK-NEXT:    bx lr
1334entry:
1335  %0 = zext i16 %p to i32
1336  %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0)
1337  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v2i1(ptr %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0, <2 x i1> %1)
1338  ret void
1339}
1340
1341declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v2i1(ptr, <2 x i64>, <2 x i64>, i32, i32, <2 x i1>)
1342
1343define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_p_u64(ptr %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) {
1344; CHECK-LABEL: test_vstrdq_scatter_offset_p_u64:
1345; CHECK:       @ %bb.0: @ %entry
1346; CHECK-NEXT:    vmsr p0, r1
1347; CHECK-NEXT:    vpst
1348; CHECK-NEXT:    vstrdt.64 q1, [r0, q0]
1349; CHECK-NEXT:    bx lr
1350entry:
1351  %0 = zext i16 %p to i32
1352  %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0)
1353  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v2i1(ptr %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0, <2 x i1> %1)
1354  ret void
1355}
1356
1357define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_s64(ptr %base, <2 x i64> %offset, <2 x i64> %value) {
1358; CHECK-LABEL: test_vstrdq_scatter_offset_s64:
1359; CHECK:       @ %bb.0: @ %entry
1360; CHECK-NEXT:    vstrd.64 q1, [r0, q0]
1361; CHECK-NEXT:    bx lr
1362entry:
1363  call void @llvm.arm.mve.vstr.scatter.offset.p0.v2i64.v2i64(ptr %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0)
1364  ret void
1365}
1366
1367declare void @llvm.arm.mve.vstr.scatter.offset.p0.v2i64.v2i64(ptr, <2 x i64>, <2 x i64>, i32, i32)
1368
1369define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_u64(ptr %base, <2 x i64> %offset, <2 x i64> %value) {
1370; CHECK-LABEL: test_vstrdq_scatter_offset_u64:
1371; CHECK:       @ %bb.0: @ %entry
1372; CHECK-NEXT:    vstrd.64 q1, [r0, q0]
1373; CHECK-NEXT:    bx lr
1374entry:
1375  call void @llvm.arm.mve.vstr.scatter.offset.p0.v2i64.v2i64(ptr %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0)
1376  ret void
1377}
1378
1379define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_p_s64(ptr %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) {
1380; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_p_s64:
1381; CHECK:       @ %bb.0: @ %entry
1382; CHECK-NEXT:    vmsr p0, r1
1383; CHECK-NEXT:    vpst
1384; CHECK-NEXT:    vstrdt.64 q1, [r0, q0, uxtw #3]
1385; CHECK-NEXT:    bx lr
1386entry:
1387  %0 = zext i16 %p to i32
1388  %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0)
1389  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v2i1(ptr %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3, <2 x i1> %1)
1390  ret void
1391}
1392
1393define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_p_u64(ptr %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) {
1394; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_p_u64:
1395; CHECK:       @ %bb.0: @ %entry
1396; CHECK-NEXT:    vmsr p0, r1
1397; CHECK-NEXT:    vpst
1398; CHECK-NEXT:    vstrdt.64 q1, [r0, q0, uxtw #3]
1399; CHECK-NEXT:    bx lr
1400entry:
1401  %0 = zext i16 %p to i32
1402  %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0)
1403  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v2i1(ptr %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3, <2 x i1> %1)
1404  ret void
1405}
1406
1407define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_s64(ptr %base, <2 x i64> %offset, <2 x i64> %value) {
1408; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_s64:
1409; CHECK:       @ %bb.0: @ %entry
1410; CHECK-NEXT:    vstrd.64 q1, [r0, q0, uxtw #3]
1411; CHECK-NEXT:    bx lr
1412entry:
1413  call void @llvm.arm.mve.vstr.scatter.offset.p0.v2i64.v2i64(ptr %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3)
1414  ret void
1415}
1416
1417define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_u64(ptr %base, <2 x i64> %offset, <2 x i64> %value) {
1418; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_u64:
1419; CHECK:       @ %bb.0: @ %entry
1420; CHECK-NEXT:    vstrd.64 q1, [r0, q0, uxtw #3]
1421; CHECK-NEXT:    bx lr
1422entry:
1423  call void @llvm.arm.mve.vstr.scatter.offset.p0.v2i64.v2i64(ptr %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3)
1424  ret void
1425}
1426
1427define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_f16(ptr %base, <8 x i16> %offset, <8 x half> %value) {
1428; CHECK-LABEL: test_vstrhq_scatter_offset_f16:
1429; CHECK:       @ %bb.0: @ %entry
1430; CHECK-NEXT:    vstrh.16 q1, [r0, q0]
1431; CHECK-NEXT:    bx lr
1432entry:
1433  call void @llvm.arm.mve.vstr.scatter.offset.p0.v8i16.v8f16(ptr %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 0)
1434  ret void
1435}
1436
1437declare void @llvm.arm.mve.vstr.scatter.offset.p0.v8i16.v8f16(ptr, <8 x i16>, <8 x half>, i32, i32)
1438
1439define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_f16(ptr %base, <8 x i16> %offset, <8 x half> %value, i16 zeroext %p) {
1440; CHECK-LABEL: test_vstrhq_scatter_offset_p_f16:
1441; CHECK:       @ %bb.0: @ %entry
1442; CHECK-NEXT:    vmsr p0, r1
1443; CHECK-NEXT:    vpst
1444; CHECK-NEXT:    vstrht.16 q1, [r0, q0]
1445; CHECK-NEXT:    bx lr
1446entry:
1447  %0 = zext i16 %p to i32
1448  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1449  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v8i16.v8f16.v8i1(ptr %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 0, <8 x i1> %1)
1450  ret void
1451}
1452
1453declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v8i16.v8f16.v8i1(ptr, <8 x i16>, <8 x half>, i32, i32, <8 x i1>)
1454
1455define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_s16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
1456; CHECK-LABEL: test_vstrhq_scatter_offset_p_s16:
1457; CHECK:       @ %bb.0: @ %entry
1458; CHECK-NEXT:    vmsr p0, r1
1459; CHECK-NEXT:    vpst
1460; CHECK-NEXT:    vstrht.16 q1, [r0, q0]
1461; CHECK-NEXT:    bx lr
1462entry:
1463  %0 = zext i16 %p to i32
1464  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1465  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v8i16.v8i16.v8i1(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0, <8 x i1> %1)
1466  ret void
1467}
1468
1469
1470define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_s32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1471; CHECK-LABEL: test_vstrhq_scatter_offset_p_s32:
1472; CHECK:       @ %bb.0: @ %entry
1473; CHECK-NEXT:    vmsr p0, r1
1474; CHECK-NEXT:    vpst
1475; CHECK-NEXT:    vstrht.32 q1, [r0, q0]
1476; CHECK-NEXT:    bx lr
1477entry:
1478  %0 = zext i16 %p to i32
1479  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1480  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4i32.v4i1(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0, <4 x i1> %1)
1481  ret void
1482}
1483
1484
1485define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_u16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
1486; CHECK-LABEL: test_vstrhq_scatter_offset_p_u16:
1487; CHECK:       @ %bb.0: @ %entry
1488; CHECK-NEXT:    vmsr p0, r1
1489; CHECK-NEXT:    vpst
1490; CHECK-NEXT:    vstrht.16 q1, [r0, q0]
1491; CHECK-NEXT:    bx lr
1492entry:
1493  %0 = zext i16 %p to i32
1494  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1495  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v8i16.v8i16.v8i1(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0, <8 x i1> %1)
1496  ret void
1497}
1498
1499define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_u32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1500; CHECK-LABEL: test_vstrhq_scatter_offset_p_u32:
1501; CHECK:       @ %bb.0: @ %entry
1502; CHECK-NEXT:    vmsr p0, r1
1503; CHECK-NEXT:    vpst
1504; CHECK-NEXT:    vstrht.32 q1, [r0, q0]
1505; CHECK-NEXT:    bx lr
1506entry:
1507  %0 = zext i16 %p to i32
1508  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1509  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4i32.v4i1(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0, <4 x i1> %1)
1510  ret void
1511}
1512
1513define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_s16(ptr %base, <8 x i16> %offset, <8 x i16> %value) {
1514; CHECK-LABEL: test_vstrhq_scatter_offset_s16:
1515; CHECK:       @ %bb.0: @ %entry
1516; CHECK-NEXT:    vstrh.16 q1, [r0, q0]
1517; CHECK-NEXT:    bx lr
1518entry:
1519  call void @llvm.arm.mve.vstr.scatter.offset.p0.v8i16.v8i16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0)
1520  ret void
1521}
1522
1523
1524define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_s32(ptr %base, <4 x i32> %offset, <4 x i32> %value) {
1525; CHECK-LABEL: test_vstrhq_scatter_offset_s32:
1526; CHECK:       @ %bb.0: @ %entry
1527; CHECK-NEXT:    vstrh.32 q1, [r0, q0]
1528; CHECK-NEXT:    bx lr
1529entry:
1530  call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0)
1531  ret void
1532}
1533
1534
1535define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_u16(ptr %base, <8 x i16> %offset, <8 x i16> %value) {
1536; CHECK-LABEL: test_vstrhq_scatter_offset_u16:
1537; CHECK:       @ %bb.0: @ %entry
1538; CHECK-NEXT:    vstrh.16 q1, [r0, q0]
1539; CHECK-NEXT:    bx lr
1540entry:
1541  call void @llvm.arm.mve.vstr.scatter.offset.p0.v8i16.v8i16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0)
1542  ret void
1543}
1544
1545define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_u32(ptr %base, <4 x i32> %offset, <4 x i32> %value) {
1546; CHECK-LABEL: test_vstrhq_scatter_offset_u32:
1547; CHECK:       @ %bb.0: @ %entry
1548; CHECK-NEXT:    vstrh.32 q1, [r0, q0]
1549; CHECK-NEXT:    bx lr
1550entry:
1551  call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0)
1552  ret void
1553}
1554
1555define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_f16(ptr %base, <8 x i16> %offset, <8 x half> %value) {
1556; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_f16:
1557; CHECK:       @ %bb.0: @ %entry
1558; CHECK-NEXT:    vstrh.16 q1, [r0, q0, uxtw #1]
1559; CHECK-NEXT:    bx lr
1560entry:
1561  call void @llvm.arm.mve.vstr.scatter.offset.p0.v8i16.v8f16(ptr %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 1)
1562  ret void
1563}
1564
1565define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_f16(ptr %base, <8 x i16> %offset, <8 x half> %value, i16 zeroext %p) {
1566; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_f16:
1567; CHECK:       @ %bb.0: @ %entry
1568; CHECK-NEXT:    vmsr p0, r1
1569; CHECK-NEXT:    vpst
1570; CHECK-NEXT:    vstrht.16 q1, [r0, q0, uxtw #1]
1571; CHECK-NEXT:    bx lr
1572entry:
1573  %0 = zext i16 %p to i32
1574  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1575  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v8i16.v8f16.v8i1(ptr %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 1, <8 x i1> %1)
1576  ret void
1577}
1578
1579define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_s16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
1580; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_s16:
1581; CHECK:       @ %bb.0: @ %entry
1582; CHECK-NEXT:    vmsr p0, r1
1583; CHECK-NEXT:    vpst
1584; CHECK-NEXT:    vstrht.16 q1, [r0, q0, uxtw #1]
1585; CHECK-NEXT:    bx lr
1586entry:
1587  %0 = zext i16 %p to i32
1588  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1589  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v8i16.v8i16.v8i1(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1, <8 x i1> %1)
1590  ret void
1591}
1592
1593define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_s32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1594; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_s32:
1595; CHECK:       @ %bb.0: @ %entry
1596; CHECK-NEXT:    vmsr p0, r1
1597; CHECK-NEXT:    vpst
1598; CHECK-NEXT:    vstrht.32 q1, [r0, q0, uxtw #1]
1599; CHECK-NEXT:    bx lr
1600entry:
1601  %0 = zext i16 %p to i32
1602  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1603  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4i32.v4i1(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1, <4 x i1> %1)
1604  ret void
1605}
1606
1607define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_u16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
1608; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_u16:
1609; CHECK:       @ %bb.0: @ %entry
1610; CHECK-NEXT:    vmsr p0, r1
1611; CHECK-NEXT:    vpst
1612; CHECK-NEXT:    vstrht.16 q1, [r0, q0, uxtw #1]
1613; CHECK-NEXT:    bx lr
1614entry:
1615  %0 = zext i16 %p to i32
1616  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1617  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v8i16.v8i16.v8i1(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1, <8 x i1> %1)
1618  ret void
1619}
1620
1621define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_u32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1622; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_u32:
1623; CHECK:       @ %bb.0: @ %entry
1624; CHECK-NEXT:    vmsr p0, r1
1625; CHECK-NEXT:    vpst
1626; CHECK-NEXT:    vstrht.32 q1, [r0, q0, uxtw #1]
1627; CHECK-NEXT:    bx lr
1628entry:
1629  %0 = zext i16 %p to i32
1630  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1631  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4i32.v4i1(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1, <4 x i1> %1)
1632  ret void
1633}
1634
1635define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_s16(ptr %base, <8 x i16> %offset, <8 x i16> %value) {
1636; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_s16:
1637; CHECK:       @ %bb.0: @ %entry
1638; CHECK-NEXT:    vstrh.16 q1, [r0, q0, uxtw #1]
1639; CHECK-NEXT:    bx lr
1640entry:
1641  call void @llvm.arm.mve.vstr.scatter.offset.p0.v8i16.v8i16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1)
1642  ret void
1643}
1644
1645define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_s32(ptr %base, <4 x i32> %offset, <4 x i32> %value) {
1646; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_s32:
1647; CHECK:       @ %bb.0: @ %entry
1648; CHECK-NEXT:    vstrh.32 q1, [r0, q0, uxtw #1]
1649; CHECK-NEXT:    bx lr
1650entry:
1651  call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1)
1652  ret void
1653}
1654
1655define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_u16(ptr %base, <8 x i16> %offset, <8 x i16> %value) {
1656; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_u16:
1657; CHECK:       @ %bb.0: @ %entry
1658; CHECK-NEXT:    vstrh.16 q1, [r0, q0, uxtw #1]
1659; CHECK-NEXT:    bx lr
1660entry:
1661  call void @llvm.arm.mve.vstr.scatter.offset.p0.v8i16.v8i16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1)
1662  ret void
1663}
1664
1665define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_u32(ptr %base, <4 x i32> %offset, <4 x i32> %value) {
1666; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_u32:
1667; CHECK:       @ %bb.0: @ %entry
1668; CHECK-NEXT:    vstrh.32 q1, [r0, q0, uxtw #1]
1669; CHECK-NEXT:    bx lr
1670entry:
1671  call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1)
1672  ret void
1673}
1674
1675define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_f32(<4 x i32> %addr, <4 x float> %value) {
1676; CHECK-LABEL: test_vstrwq_scatter_base_f32:
1677; CHECK:       @ %bb.0: @ %entry
1678; CHECK-NEXT:    vstrw.32 q1, [q0, #380]
1679; CHECK-NEXT:    bx lr
1680entry:
1681  call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4f32(<4 x i32> %addr, i32 380, <4 x float> %value)
1682  ret void
1683}
1684
1685declare void @llvm.arm.mve.vstr.scatter.base.v4i32.v4f32(<4 x i32>, i32, <4 x float>)
1686
1687define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_p_f32(<4 x i32> %addr, <4 x float> %value, i16 zeroext %p) {
1688; CHECK-LABEL: test_vstrwq_scatter_base_p_f32:
1689; CHECK:       @ %bb.0: @ %entry
1690; CHECK-NEXT:    vmsr p0, r0
1691; CHECK-NEXT:    vpst
1692; CHECK-NEXT:    vstrwt.32 q1, [q0, #-400]
1693; CHECK-NEXT:    bx lr
1694entry:
1695  %0 = zext i16 %p to i32
1696  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1697  call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4f32.v4i1(<4 x i32> %addr, i32 -400, <4 x float> %value, <4 x i1> %1)
1698  ret void
1699}
1700
1701declare void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4f32.v4i1(<4 x i32>, i32, <4 x float>, <4 x i1>)
1702
1703define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_p_s32(<4 x i32> %addr, <4 x i32> %value, i16 zeroext %p) {
1704; CHECK-LABEL: test_vstrwq_scatter_base_p_s32:
1705; CHECK:       @ %bb.0: @ %entry
1706; CHECK-NEXT:    vmsr p0, r0
1707; CHECK-NEXT:    vpst
1708; CHECK-NEXT:    vstrwt.32 q1, [q0, #48]
1709; CHECK-NEXT:    bx lr
1710entry:
1711  %0 = zext i16 %p to i32
1712  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1713  call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 48, <4 x i32> %value, <4 x i1> %1)
1714  ret void
1715}
1716
1717declare void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i32>, <4 x i1>)
1718
1719define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_p_u32(<4 x i32> %addr, <4 x i32> %value, i16 zeroext %p) {
1720; CHECK-LABEL: test_vstrwq_scatter_base_p_u32:
1721; CHECK:       @ %bb.0: @ %entry
1722; CHECK-NEXT:    vmsr p0, r0
1723; CHECK-NEXT:    vpst
1724; CHECK-NEXT:    vstrwt.32 q1, [q0, #-376]
1725; CHECK-NEXT:    bx lr
1726entry:
1727  %0 = zext i16 %p to i32
1728  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1729  call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 -376, <4 x i32> %value, <4 x i1> %1)
1730  ret void
1731}
1732
1733define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_s32(<4 x i32> %addr, <4 x i32> %value) {
1734; CHECK-LABEL: test_vstrwq_scatter_base_s32:
1735; CHECK:       @ %bb.0: @ %entry
1736; CHECK-NEXT:    vstrw.32 q1, [q0, #156]
1737; CHECK-NEXT:    bx lr
1738entry:
1739  call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32> %addr, i32 156, <4 x i32> %value)
1740  ret void
1741}
1742
1743declare void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32>, i32, <4 x i32>)
1744
1745define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_u32(<4 x i32> %addr, <4 x i32> %value) {
1746; CHECK-LABEL: test_vstrwq_scatter_base_u32:
1747; CHECK:       @ %bb.0: @ %entry
1748; CHECK-NEXT:    vstrw.32 q1, [q0, #212]
1749; CHECK-NEXT:    bx lr
1750entry:
1751  call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32> %addr, i32 212, <4 x i32> %value)
1752  ret void
1753}
1754
1755define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_f32(ptr %addr, <4 x float> %value) {
1756; CHECK-LABEL: test_vstrwq_scatter_base_wb_f32:
1757; CHECK:       @ %bb.0: @ %entry
1758; CHECK-NEXT:    vldrw.u32 q1, [r0]
1759; CHECK-NEXT:    vstrw.32 q0, [q1, #-412]!
1760; CHECK-NEXT:    vstrw.32 q1, [r0]
1761; CHECK-NEXT:    bx lr
1762entry:
1763  %0 = load <4 x i32>, ptr %addr, align 8
1764  %1 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4f32(<4 x i32> %0, i32 -412, <4 x float> %value)
1765  store <4 x i32> %1, ptr %addr, align 8
1766  ret void
1767}
1768
1769declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4f32(<4 x i32>, i32, <4 x float>)
1770
1771define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_p_f32(ptr %addr, <4 x float> %value, i16 zeroext %p) {
1772; CHECK-LABEL: test_vstrwq_scatter_base_wb_p_f32:
1773; CHECK:       @ %bb.0: @ %entry
1774; CHECK-NEXT:    vldrw.u32 q1, [r0]
1775; CHECK-NEXT:    vmsr p0, r1
1776; CHECK-NEXT:    vpst
1777; CHECK-NEXT:    vstrwt.32 q0, [q1, #236]!
1778; CHECK-NEXT:    vstrw.32 q1, [r0]
1779; CHECK-NEXT:    bx lr
1780entry:
1781  %0 = load <4 x i32>, ptr %addr, align 8
1782  %1 = zext i16 %p to i32
1783  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
1784  %3 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4f32.v4i1(<4 x i32> %0, i32 236, <4 x float> %value, <4 x i1> %2)
1785  store <4 x i32> %3, ptr %addr, align 8
1786  ret void
1787}
1788
1789declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4f32.v4i1(<4 x i32>, i32, <4 x float>, <4 x i1>)
1790
1791define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_p_s32(ptr %addr, <4 x i32> %value, i16 zeroext %p) {
1792; CHECK-LABEL: test_vstrwq_scatter_base_wb_p_s32:
1793; CHECK:       @ %bb.0: @ %entry
1794; CHECK-NEXT:    vldrw.u32 q1, [r0]
1795; CHECK-NEXT:    vmsr p0, r1
1796; CHECK-NEXT:    vpst
1797; CHECK-NEXT:    vstrwt.32 q0, [q1, #328]!
1798; CHECK-NEXT:    vstrw.32 q1, [r0]
1799; CHECK-NEXT:    bx lr
1800entry:
1801  %0 = load <4 x i32>, ptr %addr, align 8
1802  %1 = zext i16 %p to i32
1803  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
1804  %3 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 328, <4 x i32> %value, <4 x i1> %2)
1805  store <4 x i32> %3, ptr %addr, align 8
1806  ret void
1807}
1808
1809declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i32>, <4 x i1>)
1810
1811define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_p_u32(ptr %addr, <4 x i32> %value, i16 zeroext %p) {
1812; CHECK-LABEL: test_vstrwq_scatter_base_wb_p_u32:
1813; CHECK:       @ %bb.0: @ %entry
1814; CHECK-NEXT:    vldrw.u32 q1, [r0]
1815; CHECK-NEXT:    vmsr p0, r1
1816; CHECK-NEXT:    vpst
1817; CHECK-NEXT:    vstrwt.32 q0, [q1, #412]!
1818; CHECK-NEXT:    vstrw.32 q1, [r0]
1819; CHECK-NEXT:    bx lr
1820entry:
1821  %0 = load <4 x i32>, ptr %addr, align 8
1822  %1 = zext i16 %p to i32
1823  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
1824  %3 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 412, <4 x i32> %value, <4 x i1> %2)
1825  store <4 x i32> %3, ptr %addr, align 8
1826  ret void
1827}
1828
1829define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_s32(ptr %addr, <4 x i32> %value) {
1830; CHECK-LABEL: test_vstrwq_scatter_base_wb_s32:
1831; CHECK:       @ %bb.0: @ %entry
1832; CHECK-NEXT:    vldrw.u32 q1, [r0]
1833; CHECK-NEXT:    vstrw.32 q0, [q1, #-152]!
1834; CHECK-NEXT:    vstrw.32 q1, [r0]
1835; CHECK-NEXT:    bx lr
1836entry:
1837  %0 = load <4 x i32>, ptr %addr, align 8
1838  %1 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32> %0, i32 -152, <4 x i32> %value)
1839  store <4 x i32> %1, ptr %addr, align 8
1840  ret void
1841}
1842
1843declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32>, i32, <4 x i32>)
1844
1845define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_u32(ptr %addr, <4 x i32> %value) {
1846; CHECK-LABEL: test_vstrwq_scatter_base_wb_u32:
1847; CHECK:       @ %bb.0: @ %entry
1848; CHECK-NEXT:    vldrw.u32 q1, [r0]
1849; CHECK-NEXT:    vstrw.32 q0, [q1, #64]!
1850; CHECK-NEXT:    vstrw.32 q1, [r0]
1851; CHECK-NEXT:    bx lr
1852entry:
1853  %0 = load <4 x i32>, ptr %addr, align 8
1854  %1 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32> %0, i32 64, <4 x i32> %value)
1855  store <4 x i32> %1, ptr %addr, align 8
1856  ret void
1857}
1858
1859define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_f32(ptr %base, <4 x i32> %offset, <4 x float> %value) {
1860; CHECK-LABEL: test_vstrwq_scatter_offset_f32:
1861; CHECK:       @ %bb.0: @ %entry
1862; CHECK-NEXT:    vstrw.32 q1, [r0, q0]
1863; CHECK-NEXT:    bx lr
1864entry:
1865  call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4f32(ptr %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 0)
1866  ret void
1867}
1868
1869declare void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4f32(ptr, <4 x i32>, <4 x float>, i32, i32)
1870
1871define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_p_f32(ptr %base, <4 x i32> %offset, <4 x float> %value, i16 zeroext %p) {
1872; CHECK-LABEL: test_vstrwq_scatter_offset_p_f32:
1873; CHECK:       @ %bb.0: @ %entry
1874; CHECK-NEXT:    vmsr p0, r1
1875; CHECK-NEXT:    vpst
1876; CHECK-NEXT:    vstrwt.32 q1, [r0, q0]
1877; CHECK-NEXT:    bx lr
1878entry:
1879  %0 = zext i16 %p to i32
1880  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1881  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4f32.v4i1(ptr %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 0, <4 x i1> %1)
1882  ret void
1883}
1884
1885declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4f32.v4i1(ptr, <4 x i32>, <4 x float>, i32, i32, <4 x i1>)
1886
1887define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_p_s32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1888; CHECK-LABEL: test_vstrwq_scatter_offset_p_s32:
1889; CHECK:       @ %bb.0: @ %entry
1890; CHECK-NEXT:    vmsr p0, r1
1891; CHECK-NEXT:    vpst
1892; CHECK-NEXT:    vstrwt.32 q1, [r0, q0]
1893; CHECK-NEXT:    bx lr
1894entry:
1895  %0 = zext i16 %p to i32
1896  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1897  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4i32.v4i1(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0, <4 x i1> %1)
1898  ret void
1899}
1900
1901
1902define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_p_u32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1903; CHECK-LABEL: test_vstrwq_scatter_offset_p_u32:
1904; CHECK:       @ %bb.0: @ %entry
1905; CHECK-NEXT:    vmsr p0, r1
1906; CHECK-NEXT:    vpst
1907; CHECK-NEXT:    vstrwt.32 q1, [r0, q0]
1908; CHECK-NEXT:    bx lr
1909entry:
1910  %0 = zext i16 %p to i32
1911  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1912  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4i32.v4i1(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0, <4 x i1> %1)
1913  ret void
1914}
1915
1916define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_s32(ptr %base, <4 x i32> %offset, <4 x i32> %value) {
1917; CHECK-LABEL: test_vstrwq_scatter_offset_s32:
1918; CHECK:       @ %bb.0: @ %entry
1919; CHECK-NEXT:    vstrw.32 q1, [r0, q0]
1920; CHECK-NEXT:    bx lr
1921entry:
1922  call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0)
1923  ret void
1924}
1925
1926
1927define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_u32(ptr %base, <4 x i32> %offset, <4 x i32> %value) {
1928; CHECK-LABEL: test_vstrwq_scatter_offset_u32:
1929; CHECK:       @ %bb.0: @ %entry
1930; CHECK-NEXT:    vstrw.32 q1, [r0, q0]
1931; CHECK-NEXT:    bx lr
1932entry:
1933  call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0)
1934  ret void
1935}
1936
1937define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_f32(ptr %base, <4 x i32> %offset, <4 x float> %value) {
1938; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_f32:
1939; CHECK:       @ %bb.0: @ %entry
1940; CHECK-NEXT:    vstrw.32 q1, [r0, q0, uxtw #2]
1941; CHECK-NEXT:    bx lr
1942entry:
1943  call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4f32(ptr %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 2)
1944  ret void
1945}
1946
1947define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_p_f32(ptr %base, <4 x i32> %offset, <4 x float> %value, i16 zeroext %p) {
1948; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_p_f32:
1949; CHECK:       @ %bb.0: @ %entry
1950; CHECK-NEXT:    vmsr p0, r1
1951; CHECK-NEXT:    vpst
1952; CHECK-NEXT:    vstrwt.32 q1, [r0, q0, uxtw #2]
1953; CHECK-NEXT:    bx lr
1954entry:
1955  %0 = zext i16 %p to i32
1956  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1957  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4f32.v4i1(ptr %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 2, <4 x i1> %1)
1958  ret void
1959}
1960
1961define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_p_s32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1962; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_p_s32:
1963; CHECK:       @ %bb.0: @ %entry
1964; CHECK-NEXT:    vmsr p0, r1
1965; CHECK-NEXT:    vpst
1966; CHECK-NEXT:    vstrwt.32 q1, [r0, q0, uxtw #2]
1967; CHECK-NEXT:    bx lr
1968entry:
1969  %0 = zext i16 %p to i32
1970  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1971  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4i32.v4i1(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2, <4 x i1> %1)
1972  ret void
1973}
1974
1975define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_p_u32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1976; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_p_u32:
1977; CHECK:       @ %bb.0: @ %entry
1978; CHECK-NEXT:    vmsr p0, r1
1979; CHECK-NEXT:    vpst
1980; CHECK-NEXT:    vstrwt.32 q1, [r0, q0, uxtw #2]
1981; CHECK-NEXT:    bx lr
1982entry:
1983  %0 = zext i16 %p to i32
1984  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1985  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4i32.v4i1(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2, <4 x i1> %1)
1986  ret void
1987}
1988
1989define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_s32(ptr %base, <4 x i32> %offset, <4 x i32> %value) {
1990; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_s32:
1991; CHECK:       @ %bb.0: @ %entry
1992; CHECK-NEXT:    vstrw.32 q1, [r0, q0, uxtw #2]
1993; CHECK-NEXT:    bx lr
1994entry:
1995  call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2)
1996  ret void
1997}
1998
1999define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_u32(ptr %base, <4 x i32> %offset, <4 x i32> %value) {
2000; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_u32:
2001; CHECK:       @ %bb.0: @ %entry
2002; CHECK-NEXT:    vstrw.32 q1, [r0, q0, uxtw #2]
2003; CHECK-NEXT:    bx lr
2004entry:
2005  call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2)
2006  ret void
2007}
2008