xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-gather-ind32-unscaled.ll (revision edb2fc6dab2cf04779959829434e9e8572d48a26)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s
3
4define arm_aapcs_vfpcc <4 x i32> @zext_unscaled_i8_i32(ptr %base, ptr %offptr) {
5; CHECK-LABEL: zext_unscaled_i8_i32:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vldrw.u32 q1, [r1]
8; CHECK-NEXT:    vldrb.u32 q0, [r0, q1]
9; CHECK-NEXT:    bx lr
10entry:
11  %offs = load <4 x i32>, ptr %offptr, align 4
12  %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs
13  %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
14  %gather.zext = zext <4 x i8> %gather to <4 x i32>
15  ret <4 x i32> %gather.zext
16}
17
18define arm_aapcs_vfpcc <4 x i32> @sext_unscaled_i8_i32(ptr %base, ptr %offptr) {
19; CHECK-LABEL: sext_unscaled_i8_i32:
20; CHECK:       @ %bb.0: @ %entry
21; CHECK-NEXT:    vldrw.u32 q1, [r1]
22; CHECK-NEXT:    vldrb.s32 q0, [r0, q1]
23; CHECK-NEXT:    bx lr
24entry:
25  %offs = load <4 x i32>, ptr %offptr, align 4
26  %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs
27  %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
28  %gather.sext = sext <4 x i8> %gather to <4 x i32>
29  ret <4 x i32> %gather.sext
30}
31
32define arm_aapcs_vfpcc <4 x i32> @zext_unscaled_i16_i32(ptr %base, ptr %offptr) {
33; CHECK-LABEL: zext_unscaled_i16_i32:
34; CHECK:       @ %bb.0: @ %entry
35; CHECK-NEXT:    vldrw.u32 q1, [r1]
36; CHECK-NEXT:    vldrh.u32 q0, [r0, q1]
37; CHECK-NEXT:    bx lr
38entry:
39  %offs = load <4 x i32>, ptr %offptr, align 4
40  %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs
41  %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
42  %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
43  %gather.zext = zext <4 x i16> %gather to <4 x i32>
44  ret <4 x i32> %gather.zext
45}
46
47define arm_aapcs_vfpcc <4 x i32> @sext_unscaled_i16_i32(ptr %base, ptr %offptr) {
48; CHECK-LABEL: sext_unscaled_i16_i32:
49; CHECK:       @ %bb.0: @ %entry
50; CHECK-NEXT:    vldrw.u32 q1, [r1]
51; CHECK-NEXT:    vldrh.s32 q0, [r0, q1]
52; CHECK-NEXT:    bx lr
53entry:
54  %offs = load <4 x i32>, ptr %offptr, align 4
55  %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs
56  %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
57  %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
58  %gather.sext = sext <4 x i16> %gather to <4 x i32>
59  ret <4 x i32> %gather.sext
60}
61
62define arm_aapcs_vfpcc <4 x i32> @unscaled_i32_i32(ptr %base, ptr %offptr) {
63; CHECK-LABEL: unscaled_i32_i32:
64; CHECK:       @ %bb.0: @ %entry
65; CHECK-NEXT:    vldrw.u32 q1, [r1]
66; CHECK-NEXT:    vldrw.u32 q0, [r0, q1]
67; CHECK-NEXT:    bx lr
68entry:
69  %offs = load <4 x i32>, ptr %offptr, align 4
70  %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs
71  %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
72  %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
73  ret <4 x i32> %gather
74}
75
76define arm_aapcs_vfpcc <4 x float> @unscaled_f32_i32(ptr %base, ptr %offptr) {
77; CHECK-LABEL: unscaled_f32_i32:
78; CHECK:       @ %bb.0: @ %entry
79; CHECK-NEXT:    vldrw.u32 q1, [r1]
80; CHECK-NEXT:    vldrw.u32 q0, [r0, q1]
81; CHECK-NEXT:    bx lr
82entry:
83  %offs = load <4 x i32>, ptr %offptr, align 4
84  %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs
85  %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
86  %gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
87  ret <4 x float> %gather
88}
89
90define arm_aapcs_vfpcc <4 x i32> @unsigned_unscaled_b_i32_i16(ptr %base, ptr %offptr) {
91; CHECK-LABEL: unsigned_unscaled_b_i32_i16:
92; CHECK:       @ %bb.0: @ %entry
93; CHECK-NEXT:    vldrh.u32 q1, [r1]
94; CHECK-NEXT:    vldrw.u32 q0, [r0, q1]
95; CHECK-NEXT:    bx lr
96entry:
97  %offs = load <4 x i16>, ptr %offptr, align 2
98  %offs.zext = zext <4 x i16> %offs to <4 x i32>
99  %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
100  %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
101  %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
102  ret <4 x i32> %gather
103}
104
105define arm_aapcs_vfpcc <4 x i32> @signed_unscaled_i32_i16(ptr %base, ptr %offptr) {
106; CHECK-LABEL: signed_unscaled_i32_i16:
107; CHECK:       @ %bb.0: @ %entry
108; CHECK-NEXT:    vldrh.s32 q1, [r1]
109; CHECK-NEXT:    vldrw.u32 q0, [r0, q1]
110; CHECK-NEXT:    bx lr
111entry:
112  %offs = load <4 x i16>, ptr %offptr, align 2
113  %offs.sext = sext <4 x i16> %offs to <4 x i32>
114  %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
115  %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
116  %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
117  ret <4 x i32> %gather
118}
119
120define arm_aapcs_vfpcc <4 x float> @a_unsigned_unscaled_f32_i16(ptr %base, ptr %offptr) {
121; CHECK-LABEL: a_unsigned_unscaled_f32_i16:
122; CHECK:       @ %bb.0: @ %entry
123; CHECK-NEXT:    vldrh.u32 q1, [r1]
124; CHECK-NEXT:    vldrw.u32 q0, [r0, q1]
125; CHECK-NEXT:    bx lr
126entry:
127  %offs = load <4 x i16>, ptr %offptr, align 2
128  %offs.zext = zext <4 x i16> %offs to <4 x i32>
129  %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
130  %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
131  %gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
132  ret <4 x float> %gather
133}
134
135define arm_aapcs_vfpcc <4 x float> @b_signed_unscaled_f32_i16(ptr %base, ptr %offptr) {
136; CHECK-LABEL: b_signed_unscaled_f32_i16:
137; CHECK:       @ %bb.0: @ %entry
138; CHECK-NEXT:    vldrh.s32 q1, [r1]
139; CHECK-NEXT:    vldrw.u32 q0, [r0, q1]
140; CHECK-NEXT:    bx lr
141entry:
142  %offs = load <4 x i16>, ptr %offptr, align 2
143  %offs.sext = sext <4 x i16> %offs to <4 x i32>
144  %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
145  %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
146  %gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
147  ret <4 x float> %gather
148}
149
150define arm_aapcs_vfpcc <4 x i32> @zext_signed_unscaled_i16_i16(ptr %base, ptr %offptr) {
151; CHECK-LABEL: zext_signed_unscaled_i16_i16:
152; CHECK:       @ %bb.0: @ %entry
153; CHECK-NEXT:    vldrh.s32 q1, [r1]
154; CHECK-NEXT:    vldrh.u32 q0, [r0, q1]
155; CHECK-NEXT:    bx lr
156entry:
157  %offs = load <4 x i16>, ptr %offptr, align 2
158  %offs.sext = sext <4 x i16> %offs to <4 x i32>
159  %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
160  %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
161  %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
162  %gather.zext = zext <4 x i16> %gather to <4 x i32>
163  ret <4 x i32> %gather.zext
164}
165
166define arm_aapcs_vfpcc <4 x i32> @sext_signed_unscaled_i16_i16(ptr %base, ptr %offptr) {
167; CHECK-LABEL: sext_signed_unscaled_i16_i16:
168; CHECK:       @ %bb.0: @ %entry
169; CHECK-NEXT:    vldrh.s32 q1, [r1]
170; CHECK-NEXT:    vldrh.s32 q0, [r0, q1]
171; CHECK-NEXT:    bx lr
172entry:
173  %offs = load <4 x i16>, ptr %offptr, align 2
174  %offs.sext = sext <4 x i16> %offs to <4 x i32>
175  %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
176  %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
177  %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
178  %gather.sext = sext <4 x i16> %gather to <4 x i32>
179  ret <4 x i32> %gather.sext
180}
181
182define arm_aapcs_vfpcc <4 x i32> @zext_unsigned_unscaled_i16_i16(ptr %base, ptr %offptr) {
183; CHECK-LABEL: zext_unsigned_unscaled_i16_i16:
184; CHECK:       @ %bb.0: @ %entry
185; CHECK-NEXT:    vldrh.u32 q1, [r1]
186; CHECK-NEXT:    vldrh.u32 q0, [r0, q1]
187; CHECK-NEXT:    bx lr
188entry:
189  %offs = load <4 x i16>, ptr %offptr, align 2
190  %offs.zext = zext <4 x i16> %offs to <4 x i32>
191  %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
192  %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
193  %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
194  %gather.zext = zext <4 x i16> %gather to <4 x i32>
195  ret <4 x i32> %gather.zext
196}
197
198define arm_aapcs_vfpcc <4 x i32> @sext_unsigned_unscaled_i16_i16(ptr %base, ptr %offptr) {
199; CHECK-LABEL: sext_unsigned_unscaled_i16_i16:
200; CHECK:       @ %bb.0: @ %entry
201; CHECK-NEXT:    vldrh.u32 q1, [r1]
202; CHECK-NEXT:    vldrh.s32 q0, [r0, q1]
203; CHECK-NEXT:    bx lr
204entry:
205  %offs = load <4 x i16>, ptr %offptr, align 2
206  %offs.zext = zext <4 x i16> %offs to <4 x i32>
207  %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
208  %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
209  %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
210  %gather.sext = sext <4 x i16> %gather to <4 x i32>
211  ret <4 x i32> %gather.sext
212}
213
214define arm_aapcs_vfpcc <4 x i32> @zext_signed_unscaled_i8_i16(ptr %base, ptr %offptr) {
215; CHECK-LABEL: zext_signed_unscaled_i8_i16:
216; CHECK:       @ %bb.0: @ %entry
217; CHECK-NEXT:    vldrh.s32 q1, [r1]
218; CHECK-NEXT:    vldrb.u32 q0, [r0, q1]
219; CHECK-NEXT:    bx lr
220entry:
221  %offs = load <4 x i16>, ptr %offptr, align 2
222  %offs.sext = sext <4 x i16> %offs to <4 x i32>
223  %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
224  %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
225  %gather.zext = zext <4 x i8> %gather to <4 x i32>
226  ret <4 x i32> %gather.zext
227}
228
229define arm_aapcs_vfpcc <4 x i32> @sext_signed_unscaled_i8_i16(ptr %base, ptr %offptr) {
230; CHECK-LABEL: sext_signed_unscaled_i8_i16:
231; CHECK:       @ %bb.0: @ %entry
232; CHECK-NEXT:    vldrh.s32 q1, [r1]
233; CHECK-NEXT:    vldrb.s32 q0, [r0, q1]
234; CHECK-NEXT:    bx lr
235entry:
236  %offs = load <4 x i16>, ptr %offptr, align 2
237  %offs.sext = sext <4 x i16> %offs to <4 x i32>
238  %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
239  %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
240  %gather.sext = sext <4 x i8> %gather to <4 x i32>
241  ret <4 x i32> %gather.sext
242}
243
244define arm_aapcs_vfpcc <4 x i32> @zext_unsigned_unscaled_i8_i16(ptr %base, ptr %offptr) {
245; CHECK-LABEL: zext_unsigned_unscaled_i8_i16:
246; CHECK:       @ %bb.0: @ %entry
247; CHECK-NEXT:    vldrh.u32 q1, [r1]
248; CHECK-NEXT:    vldrb.u32 q0, [r0, q1]
249; CHECK-NEXT:    bx lr
250entry:
251  %offs = load <4 x i16>, ptr %offptr, align 2
252  %offs.zext = zext <4 x i16> %offs to <4 x i32>
253  %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
254  %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
255  %gather.zext = zext <4 x i8> %gather to <4 x i32>
256  ret <4 x i32> %gather.zext
257}
258
259define arm_aapcs_vfpcc <4 x i32> @sext_unsigned_unscaled_i8_i16(ptr %base, ptr %offptr) {
260; CHECK-LABEL: sext_unsigned_unscaled_i8_i16:
261; CHECK:       @ %bb.0: @ %entry
262; CHECK-NEXT:    vldrh.u32 q1, [r1]
263; CHECK-NEXT:    vldrb.s32 q0, [r0, q1]
264; CHECK-NEXT:    bx lr
265entry:
266  %offs = load <4 x i16>, ptr %offptr, align 2
267  %offs.zext = zext <4 x i16> %offs to <4 x i32>
268  %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
269  %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
270  %gather.sext = sext <4 x i8> %gather to <4 x i32>
271  ret <4 x i32> %gather.sext
272}
273
274define arm_aapcs_vfpcc <4 x i32> @unsigned_unscaled_b_i32_i8(ptr %base, ptr %offptr) {
275; CHECK-LABEL: unsigned_unscaled_b_i32_i8:
276; CHECK:       @ %bb.0: @ %entry
277; CHECK-NEXT:    vldrb.u32 q1, [r1]
278; CHECK-NEXT:    vldrw.u32 q0, [r0, q1]
279; CHECK-NEXT:    bx lr
280entry:
281  %offs = load <4 x i8>, ptr %offptr, align 1
282  %offs.zext = zext <4 x i8> %offs to <4 x i32>
283  %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
284  %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
285  %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
286  ret <4 x i32> %gather
287}
288
289define arm_aapcs_vfpcc <4 x i32> @signed_unscaled_i32_i8(ptr %base, ptr %offptr) {
290; CHECK-LABEL: signed_unscaled_i32_i8:
291; CHECK:       @ %bb.0: @ %entry
292; CHECK-NEXT:    vldrb.s32 q1, [r1]
293; CHECK-NEXT:    vldrw.u32 q0, [r0, q1]
294; CHECK-NEXT:    bx lr
295entry:
296  %offs = load <4 x i8>, ptr %offptr, align 1
297  %offs.sext = sext <4 x i8> %offs to <4 x i32>
298  %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
299  %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
300  %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
301  ret <4 x i32> %gather
302}
303
304define arm_aapcs_vfpcc <4 x float> @a_unsigned_unscaled_f32_i8(ptr %base, ptr %offptr) {
305; CHECK-LABEL: a_unsigned_unscaled_f32_i8:
306; CHECK:       @ %bb.0: @ %entry
307; CHECK-NEXT:    vldrb.u32 q1, [r1]
308; CHECK-NEXT:    vldrw.u32 q0, [r0, q1]
309; CHECK-NEXT:    bx lr
310entry:
311  %offs = load <4 x i8>, ptr %offptr, align 1
312  %offs.zext = zext <4 x i8> %offs to <4 x i32>
313  %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
314  %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
315  %gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
316  ret <4 x float> %gather
317}
318
319define arm_aapcs_vfpcc <4 x float> @b_signed_unscaled_f32_i8(ptr %base, ptr %offptr) {
320; CHECK-LABEL: b_signed_unscaled_f32_i8:
321; CHECK:       @ %bb.0: @ %entry
322; CHECK-NEXT:    vldrb.s32 q1, [r1]
323; CHECK-NEXT:    vldrw.u32 q0, [r0, q1]
324; CHECK-NEXT:    bx lr
325entry:
326  %offs = load <4 x i8>, ptr %offptr, align 1
327  %offs.sext = sext <4 x i8> %offs to <4 x i32>
328  %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
329  %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
330  %gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
331  ret <4 x float> %gather
332}
333
334define arm_aapcs_vfpcc <4 x i32> @zext_signed_unscaled_i16_i8(ptr %base, ptr %offptr) {
335; CHECK-LABEL: zext_signed_unscaled_i16_i8:
336; CHECK:       @ %bb.0: @ %entry
337; CHECK-NEXT:    vldrb.s32 q1, [r1]
338; CHECK-NEXT:    vldrh.u32 q0, [r0, q1]
339; CHECK-NEXT:    bx lr
340entry:
341  %offs = load <4 x i8>, ptr %offptr, align 1
342  %offs.sext = sext <4 x i8> %offs to <4 x i32>
343  %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
344  %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
345  %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
346  %gather.zext = zext <4 x i16> %gather to <4 x i32>
347  ret <4 x i32> %gather.zext
348}
349
350define arm_aapcs_vfpcc <4 x i32> @sext_signed_unscaled_i16_i8(ptr %base, ptr %offptr) {
351; CHECK-LABEL: sext_signed_unscaled_i16_i8:
352; CHECK:       @ %bb.0: @ %entry
353; CHECK-NEXT:    vldrb.s32 q1, [r1]
354; CHECK-NEXT:    vldrh.s32 q0, [r0, q1]
355; CHECK-NEXT:    bx lr
356entry:
357  %offs = load <4 x i8>, ptr %offptr, align 1
358  %offs.sext = sext <4 x i8> %offs to <4 x i32>
359  %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
360  %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
361  %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
362  %gather.sext = sext <4 x i16> %gather to <4 x i32>
363  ret <4 x i32> %gather.sext
364}
365
366define arm_aapcs_vfpcc <4 x i32> @zext_unsigned_unscaled_i16_i8(ptr %base, ptr %offptr) {
367; CHECK-LABEL: zext_unsigned_unscaled_i16_i8:
368; CHECK:       @ %bb.0: @ %entry
369; CHECK-NEXT:    vldrb.u32 q1, [r1]
370; CHECK-NEXT:    vldrh.u32 q0, [r0, q1]
371; CHECK-NEXT:    bx lr
372entry:
373  %offs = load <4 x i8>, ptr %offptr, align 1
374  %offs.zext = zext <4 x i8> %offs to <4 x i32>
375  %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
376  %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
377  %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
378  %gather.zext = zext <4 x i16> %gather to <4 x i32>
379  ret <4 x i32> %gather.zext
380}
381
382define arm_aapcs_vfpcc <4 x i32> @sext_unsigned_unscaled_i16_i8(ptr %base, ptr %offptr) {
383; CHECK-LABEL: sext_unsigned_unscaled_i16_i8:
384; CHECK:       @ %bb.0: @ %entry
385; CHECK-NEXT:    vldrb.u32 q1, [r1]
386; CHECK-NEXT:    vldrh.s32 q0, [r0, q1]
387; CHECK-NEXT:    bx lr
388entry:
389  %offs = load <4 x i8>, ptr %offptr, align 1
390  %offs.zext = zext <4 x i8> %offs to <4 x i32>
391  %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
392  %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
393  %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
394  %gather.sext = sext <4 x i16> %gather to <4 x i32>
395  ret <4 x i32> %gather.sext
396}
397
398define arm_aapcs_vfpcc <4 x i32> @zext_signed_unscaled_i8_i8(ptr %base, ptr %offptr) {
399; CHECK-LABEL: zext_signed_unscaled_i8_i8:
400; CHECK:       @ %bb.0: @ %entry
401; CHECK-NEXT:    vldrb.s32 q1, [r1]
402; CHECK-NEXT:    vldrb.u32 q0, [r0, q1]
403; CHECK-NEXT:    bx lr
404entry:
405  %offs = load <4 x i8>, ptr %offptr, align 1
406  %offs.sext = sext <4 x i8> %offs to <4 x i32>
407  %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
408  %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
409  %gather.zext = zext <4 x i8> %gather to <4 x i32>
410  ret <4 x i32> %gather.zext
411}
412
413define arm_aapcs_vfpcc <4 x i32> @sext_signed_unscaled_i8_i8(ptr %base, ptr %offptr) {
414; CHECK-LABEL: sext_signed_unscaled_i8_i8:
415; CHECK:       @ %bb.0: @ %entry
416; CHECK-NEXT:    vldrb.s32 q1, [r1]
417; CHECK-NEXT:    vldrb.s32 q0, [r0, q1]
418; CHECK-NEXT:    bx lr
419entry:
420  %offs = load <4 x i8>, ptr %offptr, align 1
421  %offs.sext = sext <4 x i8> %offs to <4 x i32>
422  %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
423  %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
424  %gather.sext = sext <4 x i8> %gather to <4 x i32>
425  ret <4 x i32> %gather.sext
426}
427
428define arm_aapcs_vfpcc <4 x i32> @zext_unsigned_unscaled_i8_i8(ptr %base, ptr %offptr) {
429; CHECK-LABEL: zext_unsigned_unscaled_i8_i8:
430; CHECK:       @ %bb.0: @ %entry
431; CHECK-NEXT:    vldrb.u32 q1, [r1]
432; CHECK-NEXT:    vldrb.u32 q0, [r0, q1]
433; CHECK-NEXT:    bx lr
434entry:
435  %offs = load <4 x i8>, ptr %offptr, align 1
436  %offs.zext = zext <4 x i8> %offs to <4 x i32>
437  %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
438  %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
439  %gather.zext = zext <4 x i8> %gather to <4 x i32>
440  ret <4 x i32> %gather.zext
441}
442
443define arm_aapcs_vfpcc <4 x i32> @sext_unsigned_unscaled_i8_i8(ptr %base, ptr %offptr) {
444; CHECK-LABEL: sext_unsigned_unscaled_i8_i8:
445; CHECK:       @ %bb.0: @ %entry
446; CHECK-NEXT:    vldrb.u32 q1, [r1]
447; CHECK-NEXT:    vldrb.s32 q0, [r0, q1]
448; CHECK-NEXT:    bx lr
449entry:
450  %offs = load <4 x i8>, ptr %offptr, align 1
451  %offs.zext = zext <4 x i8> %offs to <4 x i32>
452  %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
453  %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
454  %gather.sext = sext <4 x i8> %gather to <4 x i32>
455  ret <4 x i32> %gather.sext
456}
457
458; VLDRW.u32 Qd, [P, 4]
459define arm_aapcs_vfpcc <4 x i32> @qi4(<4 x ptr> %p) {
460; CHECK-LABEL: qi4:
461; CHECK:       @ %bb.0: @ %entry
462; CHECK-NEXT:    movs r0, #16
463; CHECK-NEXT:    vadd.i32 q1, q0, r0
464; CHECK-NEXT:    vldrw.u32 q0, [q1]
465; CHECK-NEXT:    bx lr
466entry:
467  %g = getelementptr inbounds i32, <4 x ptr> %p, i32 4
468  %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %g, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
469  ret <4 x i32> %gather
470}
471
472define arm_aapcs_vfpcc <4 x i32> @qi4_unaligned(<4 x ptr> %p) {
473; CHECK-LABEL: qi4_unaligned:
474; CHECK:       @ %bb.0: @ %entry
475; CHECK-NEXT:    movs r0, #16
476; CHECK-NEXT:    vadd.i32 q0, q0, r0
477; CHECK-NEXT:    vmov r0, r1, d1
478; CHECK-NEXT:    vmov r2, r3, d0
479; CHECK-NEXT:    ldr r0, [r0]
480; CHECK-NEXT:    ldr r2, [r2]
481; CHECK-NEXT:    ldr r1, [r1]
482; CHECK-NEXT:    ldr r3, [r3]
483; CHECK-NEXT:    vmov q0[2], q0[0], r2, r0
484; CHECK-NEXT:    vmov q0[3], q0[1], r3, r1
485; CHECK-NEXT:    bx lr
486entry:
487  %g = getelementptr inbounds i32, <4 x ptr> %p, i32 4
488  %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %g, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
489  ret <4 x i32> %gather
490}
491
492declare <4 x i8>  @llvm.masked.gather.v4i8.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i8>)
493declare <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i16>)
494declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>)
495declare <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x half>)
496declare <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x float>)
497