xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-gather-ind16-scaled.ll (revision 7b3bbd83c0c24087072ec5b22a76799ab31f87d5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s
3
4define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16(ptr %base, ptr %offptr) {
5; CHECK-LABEL: scaled_v8i16_i16:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vldrh.u16 q1, [r1]
8; CHECK-NEXT:    vldrh.u16 q0, [r0, q1, uxtw #1]
9; CHECK-NEXT:    bx lr
10entry:
11  %offs = load <8 x i16>, ptr %offptr, align 2
12  %offs.zext = zext <8 x i16> %offs to <8 x i32>
13  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> %offs.zext
14  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
15  ret <8 x i16> %gather
16}
17
18define arm_aapcs_vfpcc <8 x half> @scaled_v8f16_i16(ptr %base, ptr %offptr) {
19; CHECK-LABEL: scaled_v8f16_i16:
20; CHECK:       @ %bb.0: @ %entry
21; CHECK-NEXT:    vldrh.u16 q1, [r1]
22; CHECK-NEXT:    vldrh.u16 q0, [r0, q1, uxtw #1]
23; CHECK-NEXT:    bx lr
24entry:
25  %offs = load <8 x i16>, ptr %offptr, align 2
26  %offs.zext = zext <8 x i16> %offs to <8 x i32>
27  %i16_ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> %offs.zext
28  %ptrs = bitcast <8 x ptr> %i16_ptrs to <8 x ptr>
29  %gather = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x half> undef)
30  ret <8 x half> %gather
31}
32
33define arm_aapcs_vfpcc <8 x half> @scaled_v8f16_half(ptr %base, ptr %offptr) {
34; CHECK-LABEL: scaled_v8f16_half:
35; CHECK:       @ %bb.0: @ %entry
36; CHECK-NEXT:    vldrh.u16 q1, [r1]
37; CHECK-NEXT:    vldrh.u16 q0, [r0, q1, uxtw #1]
38; CHECK-NEXT:    bx lr
39entry:
40  %offs = load <8 x i16>, ptr %offptr, align 2
41  %offs.zext = zext <8 x i16> %offs to <8 x i32>
42  %ptrs = getelementptr inbounds half, ptr %base, <8 x i32> %offs.zext
43  %gather = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x half> undef)
44  ret <8 x half> %gather
45}
46
47define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_sext(ptr %base, ptr %offptr) {
48; CHECK-LABEL: scaled_v8i16_sext:
49; CHECK:       @ %bb.0: @ %entry
50; CHECK-NEXT:    .save {r4, r5, r7, lr}
51; CHECK-NEXT:    push {r4, r5, r7, lr}
52; CHECK-NEXT:    vldrh.s32 q0, [r1, #8]
53; CHECK-NEXT:    vshl.i32 q0, q0, #1
54; CHECK-NEXT:    vadd.i32 q0, q0, r0
55; CHECK-NEXT:    vmov r2, r12, d0
56; CHECK-NEXT:    vmov r3, lr, d1
57; CHECK-NEXT:    vldrh.s32 q0, [r1]
58; CHECK-NEXT:    vshl.i32 q0, q0, #1
59; CHECK-NEXT:    vadd.i32 q0, q0, r0
60; CHECK-NEXT:    vmov r4, r5, d0
61; CHECK-NEXT:    vmov r0, r1, d1
62; CHECK-NEXT:    ldrh r2, [r2]
63; CHECK-NEXT:    ldrh.w r12, [r12]
64; CHECK-NEXT:    ldrh r3, [r3]
65; CHECK-NEXT:    ldrh.w lr, [lr]
66; CHECK-NEXT:    ldrh r4, [r4]
67; CHECK-NEXT:    ldrh r5, [r5]
68; CHECK-NEXT:    vmov.16 q0[0], r4
69; CHECK-NEXT:    ldrh r0, [r0]
70; CHECK-NEXT:    vmov.16 q0[1], r5
71; CHECK-NEXT:    ldrh r1, [r1]
72; CHECK-NEXT:    vmov.16 q0[2], r0
73; CHECK-NEXT:    vmov.16 q0[3], r1
74; CHECK-NEXT:    vmov.16 q0[4], r2
75; CHECK-NEXT:    vmov.16 q0[5], r12
76; CHECK-NEXT:    vmov.16 q0[6], r3
77; CHECK-NEXT:    vmov.16 q0[7], lr
78; CHECK-NEXT:    pop {r4, r5, r7, pc}
79entry:
80  %offs = load <8 x i16>, ptr %offptr, align 2
81  %offs.sext = sext <8 x i16> %offs to <8 x i32>
82  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> %offs.sext
83  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
84  ret <8 x i16> %gather
85}
86
87define arm_aapcs_vfpcc <8 x half> @scaled_v8f16_sext(ptr %base, ptr %offptr) {
88; CHECK-LABEL: scaled_v8f16_sext:
89; CHECK:       @ %bb.0: @ %entry
90; CHECK-NEXT:    vldrh.s32 q0, [r1]
91; CHECK-NEXT:    vshl.i32 q0, q0, #1
92; CHECK-NEXT:    vadd.i32 q0, q0, r0
93; CHECK-NEXT:    vmov r2, r3, d0
94; CHECK-NEXT:    vldr.16 s4, [r3]
95; CHECK-NEXT:    vldr.16 s0, [r2]
96; CHECK-NEXT:    vmov r2, r3, d1
97; CHECK-NEXT:    vins.f16 s0, s4
98; CHECK-NEXT:    vldrh.s32 q1, [r1, #8]
99; CHECK-NEXT:    vldr.16 s2, [r3]
100; CHECK-NEXT:    vldr.16 s1, [r2]
101; CHECK-NEXT:    vshl.i32 q1, q1, #1
102; CHECK-NEXT:    vadd.i32 q1, q1, r0
103; CHECK-NEXT:    vins.f16 s1, s2
104; CHECK-NEXT:    vmov r0, r1, d2
105; CHECK-NEXT:    vldr.16 s4, [r1]
106; CHECK-NEXT:    vldr.16 s2, [r0]
107; CHECK-NEXT:    vmov r0, r1, d3
108; CHECK-NEXT:    vins.f16 s2, s4
109; CHECK-NEXT:    vldr.16 s4, [r1]
110; CHECK-NEXT:    vldr.16 s3, [r0]
111; CHECK-NEXT:    vins.f16 s3, s4
112; CHECK-NEXT:    bx lr
113entry:
114  %offs = load <8 x i16>, ptr %offptr, align 2
115  %offs.sext = sext <8 x i16> %offs to <8 x i32>
116  %i16_ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> %offs.sext
117  %ptrs = bitcast <8 x ptr> %i16_ptrs to <8 x ptr>
118  %gather = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x half> undef)
119  ret <8 x half> %gather
120}
121
122define arm_aapcs_vfpcc <8 x i16> @unsigned_scaled_v8i16_i8(ptr %base, ptr %offptr) {
123; CHECK-LABEL: unsigned_scaled_v8i16_i8:
124; CHECK:       @ %bb.0: @ %entry
125; CHECK-NEXT:    vldrb.u16 q1, [r1]
126; CHECK-NEXT:    vldrh.u16 q0, [r0, q1, uxtw #1]
127; CHECK-NEXT:    bx lr
128entry:
129  %offs = load <8 x i8>, ptr %offptr, align 1
130  %offs.zext = zext <8 x i8> %offs to <8 x i32>
131  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> %offs.zext
132  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
133  ret <8 x i16> %gather
134}
135
136define arm_aapcs_vfpcc <8 x half> @unsigned_scaled_v8f16_i8(ptr %base, ptr %offptr) {
137; CHECK-LABEL: unsigned_scaled_v8f16_i8:
138; CHECK:       @ %bb.0: @ %entry
139; CHECK-NEXT:    vldrb.u16 q1, [r1]
140; CHECK-NEXT:    vldrh.u16 q0, [r0, q1, uxtw #1]
141; CHECK-NEXT:    bx lr
142entry:
143  %offs = load <8 x i8>, ptr %offptr, align 1
144  %offs.zext = zext <8 x i8> %offs to <8 x i32>
145  %i16_ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> %offs.zext
146  %ptrs = bitcast <8 x ptr> %i16_ptrs to <8 x ptr>
147  %gather = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x half> undef)
148  ret <8 x half> %gather
149}
150
151define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_passthru0t(ptr %base, ptr %offptr) {
152; CHECK-LABEL: scaled_v8i16_i16_passthru0t:
153; CHECK:       @ %bb.0: @ %entry
154; CHECK-NEXT:    vldrh.u16 q1, [r1]
155; CHECK-NEXT:    vldrh.u16 q0, [r0, q1, uxtw #1]
156; CHECK-NEXT:    bx lr
157entry:
158  %offs = load <8 x i16>, ptr %offptr, align 2
159  %offs.zext = zext <8 x i16> %offs to <8 x i32>
160  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> %offs.zext
161  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> zeroinitializer)
162  ret <8 x i16> %gather
163}
164
165define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_passthru1t(ptr %base, ptr %offptr) {
166; CHECK-LABEL: scaled_v8i16_i16_passthru1t:
167; CHECK:       @ %bb.0: @ %entry
168; CHECK-NEXT:    vldrh.u16 q1, [r1]
169; CHECK-NEXT:    vldrh.u16 q0, [r0, q1, uxtw #1]
170; CHECK-NEXT:    bx lr
171entry:
172  %offs = load <8 x i16>, ptr %offptr, align 2
173  %offs.zext = zext <8 x i16> %offs to <8 x i32>
174  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> %offs.zext
175  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
176  ret <8 x i16> %gather
177}
178
179define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_passthru1f(ptr %base, ptr %offptr) {
180; CHECK-LABEL: scaled_v8i16_i16_passthru1f:
181; CHECK:       @ %bb.0: @ %entry
182; CHECK-NEXT:    movw r2, #65487
183; CHECK-NEXT:    vmov.i16 q0, #0x1
184; CHECK-NEXT:    vmsr p0, r2
185; CHECK-NEXT:    vldrh.u16 q1, [r1]
186; CHECK-NEXT:    vpst
187; CHECK-NEXT:    vldrht.u16 q2, [r0, q1, uxtw #1]
188; CHECK-NEXT:    vpsel q0, q2, q0
189; CHECK-NEXT:    bx lr
190entry:
191  %offs = load <8 x i16>, ptr %offptr, align 2
192  %offs.zext = zext <8 x i16> %offs to <8 x i32>
193  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> %offs.zext
194  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
195  ret <8 x i16> %gather
196}
197
198define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_passthru0f(ptr %base, ptr %offptr) {
199; CHECK-LABEL: scaled_v8i16_i16_passthru0f:
200; CHECK:       @ %bb.0: @ %entry
201; CHECK-NEXT:    movw r2, #65523
202; CHECK-NEXT:    vmsr p0, r2
203; CHECK-NEXT:    vldrh.u16 q1, [r1]
204; CHECK-NEXT:    vpst
205; CHECK-NEXT:    vldrht.u16 q0, [r0, q1, uxtw #1]
206; CHECK-NEXT:    bx lr
207entry:
208  %offs = load <8 x i16>, ptr %offptr, align 2
209  %offs.zext = zext <8 x i16> %offs to <8 x i32>
210  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> %offs.zext
211  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
212  ret <8 x i16> %gather
213}
214
215define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_passthru_icmp0(ptr %base, ptr %offptr) {
216; CHECK-LABEL: scaled_v8i16_i16_passthru_icmp0:
217; CHECK:       @ %bb.0: @ %entry
218; CHECK-NEXT:    vldrh.u16 q1, [r1]
219; CHECK-NEXT:    vpt.s16 gt, q1, zr
220; CHECK-NEXT:    vldrht.u16 q0, [r0, q1, uxtw #1]
221; CHECK-NEXT:    bx lr
222entry:
223  %offs = load <8 x i16>, ptr %offptr, align 2
224  %offs.zext = zext <8 x i16> %offs to <8 x i32>
225  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> %offs.zext
226  %mask = icmp sgt <8 x i16> %offs, zeroinitializer
227  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %mask, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
228  ret <8 x i16> %gather
229}
230
231define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_passthru_icmp1(ptr %base, ptr %offptr) {
232; CHECK-LABEL: scaled_v8i16_i16_passthru_icmp1:
233; CHECK:       @ %bb.0: @ %entry
234; CHECK-NEXT:    vldrh.u16 q1, [r1]
235; CHECK-NEXT:    vmov.i16 q0, #0x1
236; CHECK-NEXT:    vpt.s16 gt, q1, zr
237; CHECK-NEXT:    vldrht.u16 q2, [r0, q1, uxtw #1]
238; CHECK-NEXT:    vpsel q0, q2, q0
239; CHECK-NEXT:    bx lr
240entry:
241  %offs = load <8 x i16>, ptr %offptr, align 2
242  %offs.zext = zext <8 x i16> %offs to <8 x i32>
243  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> %offs.zext
244  %mask = icmp sgt <8 x i16> %offs, zeroinitializer
245  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %mask, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
246  ret <8 x i16> %gather
247}
248
249define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_2gep(ptr %base, ptr %offptr) {
250; CHECK-LABEL: scaled_v8i16_i16_2gep:
251; CHECK:       @ %bb.0: @ %entry
252; CHECK-NEXT:    .save {r4, r5, r7, lr}
253; CHECK-NEXT:    push {r4, r5, r7, lr}
254; CHECK-NEXT:    vldrh.s32 q0, [r1, #8]
255; CHECK-NEXT:    mov.w r12, #40
256; CHECK-NEXT:    vshl.i32 q0, q0, #1
257; CHECK-NEXT:    vadd.i32 q0, q0, r0
258; CHECK-NEXT:    vadd.i32 q0, q0, r12
259; CHECK-NEXT:    vmov r3, lr, d0
260; CHECK-NEXT:    vmov r2, r4, d1
261; CHECK-NEXT:    vldrh.s32 q0, [r1]
262; CHECK-NEXT:    vshl.i32 q0, q0, #1
263; CHECK-NEXT:    vadd.i32 q0, q0, r0
264; CHECK-NEXT:    vadd.i32 q0, q0, r12
265; CHECK-NEXT:    vmov r0, r1, d1
266; CHECK-NEXT:    ldrh.w r12, [lr]
267; CHECK-NEXT:    ldrh.w lr, [r4]
268; CHECK-NEXT:    vmov r4, r5, d0
269; CHECK-NEXT:    ldrh r3, [r3]
270; CHECK-NEXT:    ldrh r2, [r2]
271; CHECK-NEXT:    ldrh r0, [r0]
272; CHECK-NEXT:    ldrh r1, [r1]
273; CHECK-NEXT:    ldrh r4, [r4]
274; CHECK-NEXT:    ldrh r5, [r5]
275; CHECK-NEXT:    vmov.16 q0[0], r4
276; CHECK-NEXT:    vmov.16 q0[1], r5
277; CHECK-NEXT:    vmov.16 q0[2], r0
278; CHECK-NEXT:    vmov.16 q0[3], r1
279; CHECK-NEXT:    vmov.16 q0[4], r3
280; CHECK-NEXT:    vmov.16 q0[5], r12
281; CHECK-NEXT:    vmov.16 q0[6], r2
282; CHECK-NEXT:    vmov.16 q0[7], lr
283; CHECK-NEXT:    pop {r4, r5, r7, pc}
284entry:
285  %offs = load <8 x i16>, ptr %offptr, align 2
286  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %offs
287  %ptrs2 = getelementptr inbounds i16, <8 x ptr> %ptrs, i16 20
288  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs2, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
289  ret <8 x i16> %gather
290}
291
292define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_2gep2(ptr %base, ptr %offptr) {
293; CHECK-LABEL: scaled_v8i16_i16_2gep2:
294; CHECK:       @ %bb.0: @ %entry
295; CHECK-NEXT:    adr r1, .LCPI14_0
296; CHECK-NEXT:    vldrw.u32 q1, [r1]
297; CHECK-NEXT:    vldrh.u16 q0, [r0, q1]
298; CHECK-NEXT:    bx lr
299; CHECK-NEXT:    .p2align 4
300; CHECK-NEXT:  @ %bb.1:
301; CHECK-NEXT:  .LCPI14_0:
302; CHECK-NEXT:    .short 40 @ 0x28
303; CHECK-NEXT:    .short 46 @ 0x2e
304; CHECK-NEXT:    .short 52 @ 0x34
305; CHECK-NEXT:    .short 58 @ 0x3a
306; CHECK-NEXT:    .short 64 @ 0x40
307; CHECK-NEXT:    .short 70 @ 0x46
308; CHECK-NEXT:    .short 76 @ 0x4c
309; CHECK-NEXT:    .short 82 @ 0x52
310entry:
311  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> <i16 0, i16 3, i16 6, i16 9, i16 12, i16 15, i16 18, i16 21>
312  %ptrs2 = getelementptr inbounds i16,<8 x ptr> %ptrs, i16 20
313  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs2, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
314  ret <8 x i16> %gather
315}
316
317define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_biggep(ptr %base) {
318; CHECK-LABEL: scaled_v8i16_i16_biggep:
319; CHECK:       @ %bb.0: @ %entry
320; CHECK-NEXT:    adr r1, .LCPI15_0
321; CHECK-NEXT:    vldrw.u32 q1, [r1]
322; CHECK-NEXT:    vldrh.u16 q0, [r0, q1]
323; CHECK-NEXT:    bx lr
324; CHECK-NEXT:    .p2align 4
325; CHECK-NEXT:  @ %bb.1:
326; CHECK-NEXT:  .LCPI15_0:
327; CHECK-NEXT:    .short 40 @ 0x28
328; CHECK-NEXT:    .short 46 @ 0x2e
329; CHECK-NEXT:    .short 52 @ 0x34
330; CHECK-NEXT:    .short 58 @ 0x3a
331; CHECK-NEXT:    .short 64 @ 0x40
332; CHECK-NEXT:    .short 70 @ 0x46
333; CHECK-NEXT:    .short 76 @ 0x4c
334; CHECK-NEXT:    .short 82 @ 0x52
335entry:
336  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
337  %ptrs2 = getelementptr inbounds i16,<8 x ptr> %ptrs, i32 20
338  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs2, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
339  ret <8 x i16> %gather
340}
341
342define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_biggep2(ptr %base) {
343; CHECK-LABEL: scaled_v8i16_i16_biggep2:
344; CHECK:       @ %bb.0: @ %entry
345; CHECK-NEXT:    adr r1, .LCPI16_0
346; CHECK-NEXT:    vldrw.u32 q1, [r1]
347; CHECK-NEXT:    vldrh.u16 q0, [r0, q1, uxtw #1]
348; CHECK-NEXT:    bx lr
349; CHECK-NEXT:    .p2align 4
350; CHECK-NEXT:  @ %bb.1:
351; CHECK-NEXT:  .LCPI16_0:
352; CHECK-NEXT:    .short 0 @ 0x0
353; CHECK-NEXT:    .short 3 @ 0x3
354; CHECK-NEXT:    .short 6 @ 0x6
355; CHECK-NEXT:    .short 9 @ 0x9
356; CHECK-NEXT:    .short 12 @ 0xc
357; CHECK-NEXT:    .short 15 @ 0xf
358; CHECK-NEXT:    .short 18 @ 0x12
359; CHECK-NEXT:    .short 21 @ 0x15
360entry:
361  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
362  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
363  ret <8 x i16> %gather
364}
365
366define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_biggep3(ptr %base) {
367; CHECK-LABEL: scaled_v8i16_i16_biggep3:
368; CHECK:       @ %bb.0: @ %entry
369; CHECK-NEXT:    .save {r4, r5, r7, lr}
370; CHECK-NEXT:    push {r4, r5, r7, lr}
371; CHECK-NEXT:    adr r1, .LCPI17_0
372; CHECK-NEXT:    adr r2, .LCPI17_1
373; CHECK-NEXT:    vldrw.u32 q0, [r1]
374; CHECK-NEXT:    vadd.i32 q0, q0, r0
375; CHECK-NEXT:    vmov r1, lr, d0
376; CHECK-NEXT:    vmov r3, r12, d1
377; CHECK-NEXT:    vldrw.u32 q0, [r2]
378; CHECK-NEXT:    vadd.i32 q0, q0, r0
379; CHECK-NEXT:    vmov r4, r5, d0
380; CHECK-NEXT:    vmov r0, r2, d1
381; CHECK-NEXT:    ldrh r1, [r1]
382; CHECK-NEXT:    ldrh.w lr, [lr]
383; CHECK-NEXT:    ldrh r3, [r3]
384; CHECK-NEXT:    ldrh.w r12, [r12]
385; CHECK-NEXT:    ldrh r4, [r4]
386; CHECK-NEXT:    ldrh r5, [r5]
387; CHECK-NEXT:    vmov.16 q0[0], r4
388; CHECK-NEXT:    ldrh r0, [r0]
389; CHECK-NEXT:    vmov.16 q0[1], r5
390; CHECK-NEXT:    ldrh r2, [r2]
391; CHECK-NEXT:    vmov.16 q0[2], r0
392; CHECK-NEXT:    vmov.16 q0[3], r2
393; CHECK-NEXT:    vmov.16 q0[4], r1
394; CHECK-NEXT:    vmov.16 q0[5], lr
395; CHECK-NEXT:    vmov.16 q0[6], r3
396; CHECK-NEXT:    vmov.16 q0[7], r12
397; CHECK-NEXT:    pop {r4, r5, r7, pc}
398; CHECK-NEXT:    .p2align 4
399; CHECK-NEXT:  @ %bb.1:
400; CHECK-NEXT:  .LCPI17_0:
401; CHECK-NEXT:    .long 131096 @ 0x20018
402; CHECK-NEXT:    .long 131102 @ 0x2001e
403; CHECK-NEXT:    .long 131108 @ 0x20024
404; CHECK-NEXT:    .long 131114 @ 0x2002a
405; CHECK-NEXT:  .LCPI17_1:
406; CHECK-NEXT:    .long 131072 @ 0x20000
407; CHECK-NEXT:    .long 131078 @ 0x20006
408; CHECK-NEXT:    .long 131084 @ 0x2000c
409; CHECK-NEXT:    .long 131090 @ 0x20012
410entry:
411  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
412  %ptrs2 = getelementptr inbounds i16,<8 x ptr> %ptrs, i32 65536
413  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs2, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
414  ret <8 x i16> %gather
415}
416
417define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_biggep4(ptr %base) {
418; CHECK-LABEL: scaled_v8i16_i16_biggep4:
419; CHECK:       @ %bb.0: @ %entry
420; CHECK-NEXT:    .save {r4, r5, r7, lr}
421; CHECK-NEXT:    push {r4, r5, r7, lr}
422; CHECK-NEXT:    adr r1, .LCPI18_0
423; CHECK-NEXT:    adr r2, .LCPI18_1
424; CHECK-NEXT:    vldrw.u32 q0, [r1]
425; CHECK-NEXT:    vadd.i32 q0, q0, r0
426; CHECK-NEXT:    vmov r1, lr, d0
427; CHECK-NEXT:    vmov r3, r12, d1
428; CHECK-NEXT:    vldrw.u32 q0, [r2]
429; CHECK-NEXT:    vadd.i32 q0, q0, r0
430; CHECK-NEXT:    vmov r4, r5, d0
431; CHECK-NEXT:    vmov r0, r2, d1
432; CHECK-NEXT:    ldrh r1, [r1]
433; CHECK-NEXT:    ldrh.w lr, [lr]
434; CHECK-NEXT:    ldrh r3, [r3]
435; CHECK-NEXT:    ldrh.w r12, [r12]
436; CHECK-NEXT:    ldrh r4, [r4]
437; CHECK-NEXT:    ldrh r5, [r5]
438; CHECK-NEXT:    vmov.16 q0[0], r4
439; CHECK-NEXT:    ldrh r0, [r0]
440; CHECK-NEXT:    vmov.16 q0[1], r5
441; CHECK-NEXT:    ldrh r2, [r2]
442; CHECK-NEXT:    vmov.16 q0[2], r0
443; CHECK-NEXT:    vmov.16 q0[3], r2
444; CHECK-NEXT:    vmov.16 q0[4], r1
445; CHECK-NEXT:    vmov.16 q0[5], lr
446; CHECK-NEXT:    vmov.16 q0[6], r3
447; CHECK-NEXT:    vmov.16 q0[7], r12
448; CHECK-NEXT:    pop {r4, r5, r7, pc}
449; CHECK-NEXT:    .p2align 4
450; CHECK-NEXT:  @ %bb.1:
451; CHECK-NEXT:  .LCPI18_0:
452; CHECK-NEXT:    .long 24 @ 0x18
453; CHECK-NEXT:    .long 131072 @ 0x20000
454; CHECK-NEXT:    .long 36 @ 0x24
455; CHECK-NEXT:    .long 42 @ 0x2a
456; CHECK-NEXT:  .LCPI18_1:
457; CHECK-NEXT:    .long 0 @ 0x0
458; CHECK-NEXT:    .long 6 @ 0x6
459; CHECK-NEXT:    .long 12 @ 0xc
460; CHECK-NEXT:    .long 18 @ 0x12
461entry:
462  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 65536, i32 18, i32 21>
463  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
464  ret <8 x i16> %gather
465}
466
467define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_biggep5(<8 x ptr> %base) {
468; CHECK-LABEL: scaled_v8i16_i16_biggep5:
469; CHECK:       @ %bb.0: @ %entry
470; CHECK-NEXT:    .save {r4, r5, r6, lr}
471; CHECK-NEXT:    push {r4, r5, r6, lr}
472; CHECK-NEXT:    mov.w r12, #131072
473; CHECK-NEXT:    vadd.i32 q0, q0, r12
474; CHECK-NEXT:    vadd.i32 q1, q1, r12
475; CHECK-NEXT:    vmov r4, r5, d0
476; CHECK-NEXT:    vmov r0, r12, d1
477; CHECK-NEXT:    vmov r3, lr, d3
478; CHECK-NEXT:    vmov r1, r2, d2
479; CHECK-NEXT:    ldrh r4, [r4]
480; CHECK-NEXT:    ldrh r5, [r5]
481; CHECK-NEXT:    vmov.16 q0[0], r4
482; CHECK-NEXT:    ldrh r0, [r0]
483; CHECK-NEXT:    vmov.16 q0[1], r5
484; CHECK-NEXT:    ldrh r6, [r3]
485; CHECK-NEXT:    ldrh.w r3, [r12]
486; CHECK-NEXT:    vmov.16 q0[2], r0
487; CHECK-NEXT:    ldrh r1, [r1]
488; CHECK-NEXT:    vmov.16 q0[3], r3
489; CHECK-NEXT:    ldrh r2, [r2]
490; CHECK-NEXT:    vmov.16 q0[4], r1
491; CHECK-NEXT:    ldrh.w lr, [lr]
492; CHECK-NEXT:    vmov.16 q0[5], r2
493; CHECK-NEXT:    vmov.16 q0[6], r6
494; CHECK-NEXT:    vmov.16 q0[7], lr
495; CHECK-NEXT:    pop {r4, r5, r6, pc}
496entry:
497  %ptrs2 = getelementptr inbounds i16,<8 x ptr> %base, i32 65536
498  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs2, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
499  ret <8 x i16> %gather
500}
501
502define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_biggep6(ptr %base) {
503; CHECK-LABEL: scaled_v8i16_i16_biggep6:
504; CHECK:       @ %bb.0: @ %entry
505; CHECK-NEXT:    .save {r4, r5, r7, lr}
506; CHECK-NEXT:    push {r4, r5, r7, lr}
507; CHECK-NEXT:    adr r1, .LCPI20_0
508; CHECK-NEXT:    adr r2, .LCPI20_1
509; CHECK-NEXT:    vldrw.u32 q0, [r1]
510; CHECK-NEXT:    vadd.i32 q0, q0, r0
511; CHECK-NEXT:    vmov r1, lr, d0
512; CHECK-NEXT:    vmov r3, r12, d1
513; CHECK-NEXT:    vldrw.u32 q0, [r2]
514; CHECK-NEXT:    vadd.i32 q0, q0, r0
515; CHECK-NEXT:    vmov r4, r5, d0
516; CHECK-NEXT:    vmov r0, r2, d1
517; CHECK-NEXT:    ldrh r1, [r1]
518; CHECK-NEXT:    ldrh.w lr, [lr]
519; CHECK-NEXT:    ldrh r3, [r3]
520; CHECK-NEXT:    ldrh.w r12, [r12]
521; CHECK-NEXT:    ldrh r4, [r4]
522; CHECK-NEXT:    ldrh r5, [r5]
523; CHECK-NEXT:    vmov.16 q0[0], r4
524; CHECK-NEXT:    ldrh r0, [r0]
525; CHECK-NEXT:    vmov.16 q0[1], r5
526; CHECK-NEXT:    ldrh r2, [r2]
527; CHECK-NEXT:    vmov.16 q0[2], r0
528; CHECK-NEXT:    vmov.16 q0[3], r2
529; CHECK-NEXT:    vmov.16 q0[4], r1
530; CHECK-NEXT:    vmov.16 q0[5], lr
531; CHECK-NEXT:    vmov.16 q0[6], r3
532; CHECK-NEXT:    vmov.16 q0[7], r12
533; CHECK-NEXT:    pop {r4, r5, r7, pc}
534; CHECK-NEXT:    .p2align 4
535; CHECK-NEXT:  @ %bb.1:
536; CHECK-NEXT:  .LCPI20_0:
537; CHECK-NEXT:    .long 131074 @ 0x20002
538; CHECK-NEXT:    .long 32 @ 0x20
539; CHECK-NEXT:    .long 38 @ 0x26
540; CHECK-NEXT:    .long 44 @ 0x2c
541; CHECK-NEXT:  .LCPI20_1:
542; CHECK-NEXT:    .long 2 @ 0x2
543; CHECK-NEXT:    .long 8 @ 0x8
544; CHECK-NEXT:    .long 14 @ 0xe
545; CHECK-NEXT:    .long 20 @ 0x14
546entry:
547  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 65536, i32 15, i32 18, i32 21>
548  %ptrs2 = getelementptr inbounds i16,<8 x ptr> %ptrs, i32 1
549  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs2, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
550  ret <8 x i16> %gather
551}
552
553define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_biggep7(ptr %base, ptr %offptr) {
554; CHECK-LABEL: scaled_v8i16_i16_biggep7:
555; CHECK:       @ %bb.0: @ %entry
556; CHECK-NEXT:    .save {r4, r5, r7, lr}
557; CHECK-NEXT:    push {r4, r5, r7, lr}
558; CHECK-NEXT:    adr r1, .LCPI21_0
559; CHECK-NEXT:    adr r2, .LCPI21_1
560; CHECK-NEXT:    vldrw.u32 q0, [r1]
561; CHECK-NEXT:    vadd.i32 q0, q0, r0
562; CHECK-NEXT:    vmov r1, lr, d0
563; CHECK-NEXT:    vmov r3, r12, d1
564; CHECK-NEXT:    vldrw.u32 q0, [r2]
565; CHECK-NEXT:    vadd.i32 q0, q0, r0
566; CHECK-NEXT:    vmov r4, r5, d0
567; CHECK-NEXT:    vmov r0, r2, d1
568; CHECK-NEXT:    ldrh r1, [r1]
569; CHECK-NEXT:    ldrh.w lr, [lr]
570; CHECK-NEXT:    ldrh r3, [r3]
571; CHECK-NEXT:    ldrh.w r12, [r12]
572; CHECK-NEXT:    ldrh r4, [r4]
573; CHECK-NEXT:    ldrh r5, [r5]
574; CHECK-NEXT:    vmov.16 q0[0], r4
575; CHECK-NEXT:    ldrh r0, [r0]
576; CHECK-NEXT:    vmov.16 q0[1], r5
577; CHECK-NEXT:    ldrh r2, [r2]
578; CHECK-NEXT:    vmov.16 q0[2], r0
579; CHECK-NEXT:    vmov.16 q0[3], r2
580; CHECK-NEXT:    vmov.16 q0[4], r1
581; CHECK-NEXT:    vmov.16 q0[5], lr
582; CHECK-NEXT:    vmov.16 q0[6], r3
583; CHECK-NEXT:    vmov.16 q0[7], r12
584; CHECK-NEXT:    pop {r4, r5, r7, pc}
585; CHECK-NEXT:    .p2align 4
586; CHECK-NEXT:  @ %bb.1:
587; CHECK-NEXT:  .LCPI21_0:
588; CHECK-NEXT:    .long 1224 @ 0x4c8
589; CHECK-NEXT:    .long 1230 @ 0x4ce
590; CHECK-NEXT:    .long 1236 @ 0x4d4
591; CHECK-NEXT:    .long 1242 @ 0x4da
592; CHECK-NEXT:  .LCPI21_1:
593; CHECK-NEXT:    .long 128 @ 0x80
594; CHECK-NEXT:    .long 1206 @ 0x4b6
595; CHECK-NEXT:    .long 1212 @ 0x4bc
596; CHECK-NEXT:    .long 1218 @ 0x4c2
597entry:
598  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> <i16 65000, i16 3, i16 6, i16 9, i16 12, i16 15, i16 18, i16 21>
599  %ptrs2 = getelementptr inbounds i16,<8 x ptr> %ptrs, i16 600
600  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs2, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
601  ret <8 x i16> %gather
602}
603
604define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_basei32(ptr %base, ptr %offptr) {
605; CHECK-LABEL: scaled_v8i16_i16_basei32:
606; CHECK:       @ %bb.0: @ %entry
607; CHECK-NEXT:    .save {r4, r5, r7, lr}
608; CHECK-NEXT:    push {r4, r5, r7, lr}
609; CHECK-NEXT:    vldrh.u32 q0, [r1, #8]
610; CHECK-NEXT:    vshl.i32 q0, q0, #2
611; CHECK-NEXT:    vadd.i32 q0, q0, r0
612; CHECK-NEXT:    vmov r2, r12, d0
613; CHECK-NEXT:    vmov r3, lr, d1
614; CHECK-NEXT:    vldrh.u32 q0, [r1]
615; CHECK-NEXT:    vshl.i32 q0, q0, #2
616; CHECK-NEXT:    vadd.i32 q0, q0, r0
617; CHECK-NEXT:    vmov r4, r5, d0
618; CHECK-NEXT:    vmov r0, r1, d1
619; CHECK-NEXT:    ldrh r2, [r2]
620; CHECK-NEXT:    ldrh.w r12, [r12]
621; CHECK-NEXT:    ldrh r3, [r3]
622; CHECK-NEXT:    ldrh.w lr, [lr]
623; CHECK-NEXT:    ldrh r4, [r4]
624; CHECK-NEXT:    ldrh r5, [r5]
625; CHECK-NEXT:    vmov.16 q0[0], r4
626; CHECK-NEXT:    ldrh r0, [r0]
627; CHECK-NEXT:    vmov.16 q0[1], r5
628; CHECK-NEXT:    ldrh r1, [r1]
629; CHECK-NEXT:    vmov.16 q0[2], r0
630; CHECK-NEXT:    vmov.16 q0[3], r1
631; CHECK-NEXT:    vmov.16 q0[4], r2
632; CHECK-NEXT:    vmov.16 q0[5], r12
633; CHECK-NEXT:    vmov.16 q0[6], r3
634; CHECK-NEXT:    vmov.16 q0[7], lr
635; CHECK-NEXT:    pop {r4, r5, r7, pc}
636entry:
637  %offs = load <8 x i16>, ptr %offptr, align 2
638  %offs.zext = zext <8 x i16> %offs to <8 x i32>
639  %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %offs.zext
640  %ptrs.cast = bitcast <8 x ptr> %ptrs to <8 x ptr>
641  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs.cast, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
642  ret <8 x i16> %gather
643}
644
645declare <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i8>) #1
646declare <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i16>) #1
647declare <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x half>) #1
648