xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-gather-ind16-unscaled.ll (revision 7b3bbd83c0c24087072ec5b22a76799ab31f87d5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s
3
4define arm_aapcs_vfpcc <8 x i16> @zext_unscaled_i8_i16(ptr %base, ptr %offptr) {
5; CHECK-LABEL: zext_unscaled_i8_i16:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vldrh.u16 q1, [r1]
8; CHECK-NEXT:    vldrb.u16 q0, [r0, q1]
9; CHECK-NEXT:    bx lr
10entry:
11  %offs = load <8 x i16>, ptr %offptr, align 2
12  %offs.zext = zext <8 x i16> %offs to <8 x i32>
13  %ptrs = getelementptr inbounds i8, ptr %base, <8 x i32> %offs.zext
14  %gather = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %ptrs, i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> undef)
15  %gather.zext = zext <8 x i8> %gather to <8 x i16>
16  ret <8 x i16> %gather.zext
17}
18
19define arm_aapcs_vfpcc <8 x i16> @zext_unscaled_i8_i16_noext(ptr %base, ptr %offptr) {
20; CHECK-LABEL: zext_unscaled_i8_i16_noext:
21; CHECK:       @ %bb.0: @ %entry
22; CHECK-NEXT:    .save {r4, r5, r6, lr}
23; CHECK-NEXT:    push {r4, r5, r6, lr}
24; CHECK-NEXT:    vldrb.s32 q0, [r1, #4]
25; CHECK-NEXT:    vadd.i32 q0, q0, r0
26; CHECK-NEXT:    vmov r2, lr, d1
27; CHECK-NEXT:    vmov r12, r3, d0
28; CHECK-NEXT:    vldrb.s32 q0, [r1]
29; CHECK-NEXT:    vadd.i32 q0, q0, r0
30; CHECK-NEXT:    vmov r4, r5, d0
31; CHECK-NEXT:    vmov r0, r1, d1
32; CHECK-NEXT:    ldrb r6, [r2]
33; CHECK-NEXT:    ldrb.w r2, [r12]
34; CHECK-NEXT:    ldrb r3, [r3]
35; CHECK-NEXT:    ldrb.w lr, [lr]
36; CHECK-NEXT:    ldrb r4, [r4]
37; CHECK-NEXT:    ldrb r5, [r5]
38; CHECK-NEXT:    vmov.16 q0[0], r4
39; CHECK-NEXT:    ldrb r0, [r0]
40; CHECK-NEXT:    vmov.16 q0[1], r5
41; CHECK-NEXT:    ldrb r1, [r1]
42; CHECK-NEXT:    vmov.16 q0[2], r0
43; CHECK-NEXT:    vmov.16 q0[3], r1
44; CHECK-NEXT:    vmov.16 q0[4], r2
45; CHECK-NEXT:    vmov.16 q0[5], r3
46; CHECK-NEXT:    vmov.16 q0[6], r6
47; CHECK-NEXT:    vmov.16 q0[7], lr
48; CHECK-NEXT:    vmovlb.u8 q0, q0
49; CHECK-NEXT:    pop {r4, r5, r6, pc}
50entry:
51  %offs = load <8 x i8>, ptr %offptr, align 2
52  %ptrs = getelementptr inbounds i8, ptr %base, <8 x i8> %offs
53  %gather = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %ptrs, i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> undef)
54  %gather.zext = zext <8 x i8> %gather to <8 x i16>
55  ret <8 x i16> %gather.zext
56}
57
58define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_sext(ptr %base, ptr %offptr) {
59; CHECK-LABEL: scaled_v8i16_sext:
60; CHECK:       @ %bb.0: @ %entry
61; CHECK-NEXT:    .save {r4, r5, r7, lr}
62; CHECK-NEXT:    push {r4, r5, r7, lr}
63; CHECK-NEXT:    vldrb.s32 q0, [r1, #4]
64; CHECK-NEXT:    vshl.i32 q0, q0, #1
65; CHECK-NEXT:    vadd.i32 q0, q0, r0
66; CHECK-NEXT:    vmov r2, r12, d0
67; CHECK-NEXT:    vmov r3, lr, d1
68; CHECK-NEXT:    vldrb.s32 q0, [r1]
69; CHECK-NEXT:    vshl.i32 q0, q0, #1
70; CHECK-NEXT:    vadd.i32 q0, q0, r0
71; CHECK-NEXT:    vmov r4, r5, d0
72; CHECK-NEXT:    vmov r0, r1, d1
73; CHECK-NEXT:    ldrh r2, [r2]
74; CHECK-NEXT:    ldrh.w r12, [r12]
75; CHECK-NEXT:    ldrh r3, [r3]
76; CHECK-NEXT:    ldrh.w lr, [lr]
77; CHECK-NEXT:    ldrh r4, [r4]
78; CHECK-NEXT:    ldrh r5, [r5]
79; CHECK-NEXT:    vmov.16 q0[0], r4
80; CHECK-NEXT:    ldrh r0, [r0]
81; CHECK-NEXT:    vmov.16 q0[1], r5
82; CHECK-NEXT:    ldrh r1, [r1]
83; CHECK-NEXT:    vmov.16 q0[2], r0
84; CHECK-NEXT:    vmov.16 q0[3], r1
85; CHECK-NEXT:    vmov.16 q0[4], r2
86; CHECK-NEXT:    vmov.16 q0[5], r12
87; CHECK-NEXT:    vmov.16 q0[6], r3
88; CHECK-NEXT:    vmov.16 q0[7], lr
89; CHECK-NEXT:    pop {r4, r5, r7, pc}
90entry:
91  %offs = load <8 x i8>, ptr %offptr, align 2
92  %offs.sext = sext <8 x i8> %offs to <8 x i16>
93  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %offs.sext
94  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
95  ret <8 x i16> %gather
96}
97
98define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_zext(ptr %base, ptr %offptr) {
99; CHECK-LABEL: scaled_v8i16_zext:
100; CHECK:       @ %bb.0: @ %entry
101; CHECK-NEXT:    .save {r4, r5, r7, lr}
102; CHECK-NEXT:    push {r4, r5, r7, lr}
103; CHECK-NEXT:    vldrb.u32 q0, [r1, #4]
104; CHECK-NEXT:    vshl.i32 q0, q0, #1
105; CHECK-NEXT:    vadd.i32 q0, q0, r0
106; CHECK-NEXT:    vmov r2, r12, d0
107; CHECK-NEXT:    vmov r3, lr, d1
108; CHECK-NEXT:    vldrb.u32 q0, [r1]
109; CHECK-NEXT:    vshl.i32 q0, q0, #1
110; CHECK-NEXT:    vadd.i32 q0, q0, r0
111; CHECK-NEXT:    vmov r4, r5, d0
112; CHECK-NEXT:    vmov r0, r1, d1
113; CHECK-NEXT:    ldrh r2, [r2]
114; CHECK-NEXT:    ldrh.w r12, [r12]
115; CHECK-NEXT:    ldrh r3, [r3]
116; CHECK-NEXT:    ldrh.w lr, [lr]
117; CHECK-NEXT:    ldrh r4, [r4]
118; CHECK-NEXT:    ldrh r5, [r5]
119; CHECK-NEXT:    vmov.16 q0[0], r4
120; CHECK-NEXT:    ldrh r0, [r0]
121; CHECK-NEXT:    vmov.16 q0[1], r5
122; CHECK-NEXT:    ldrh r1, [r1]
123; CHECK-NEXT:    vmov.16 q0[2], r0
124; CHECK-NEXT:    vmov.16 q0[3], r1
125; CHECK-NEXT:    vmov.16 q0[4], r2
126; CHECK-NEXT:    vmov.16 q0[5], r12
127; CHECK-NEXT:    vmov.16 q0[6], r3
128; CHECK-NEXT:    vmov.16 q0[7], lr
129; CHECK-NEXT:    pop {r4, r5, r7, pc}
130entry:
131  %offs = load <8 x i8>, ptr %offptr, align 2
132  %offs.zext = zext <8 x i8> %offs to <8 x i16>
133  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %offs.zext
134  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
135  ret <8 x i16> %gather
136}
137
138define arm_aapcs_vfpcc <8 x i16> @sext_unscaled_i8_i16(ptr %base, ptr %offptr) {
139; CHECK-LABEL: sext_unscaled_i8_i16:
140; CHECK:       @ %bb.0: @ %entry
141; CHECK-NEXT:    vldrh.u16 q1, [r1]
142; CHECK-NEXT:    vldrb.s16 q0, [r0, q1]
143; CHECK-NEXT:    bx lr
144entry:
145  %offs = load <8 x i16>, ptr %offptr, align 2
146  %offs.zext = zext <8 x i16> %offs to <8 x i32>
147  %ptrs = getelementptr inbounds i8, ptr %base, <8 x i32> %offs.zext
148  %gather = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %ptrs, i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> undef)
149  %gather.sext = sext <8 x i8> %gather to <8 x i16>
150  ret <8 x i16> %gather.sext
151}
152
153define arm_aapcs_vfpcc <8 x i16> @unscaled_i16_i16(ptr %base, ptr %offptr) {
154; CHECK-LABEL: unscaled_i16_i16:
155; CHECK:       @ %bb.0: @ %entry
156; CHECK-NEXT:    vldrh.u16 q1, [r1]
157; CHECK-NEXT:    vldrh.u16 q0, [r0, q1]
158; CHECK-NEXT:    bx lr
159entry:
160  %offs = load <8 x i16>, ptr %offptr, align 2
161  %offs.zext = zext <8 x i16> %offs to <8 x i32>
162  %byte_ptrs = getelementptr inbounds i8, ptr %base, <8 x i32> %offs.zext
163  %ptrs = bitcast <8 x ptr> %byte_ptrs to <8 x ptr>
164  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
165  ret <8 x i16> %gather
166}
167
168define arm_aapcs_vfpcc <8 x half> @unscaled_f16_i16(ptr %base, ptr %offptr) {
169; CHECK-LABEL: unscaled_f16_i16:
170; CHECK:       @ %bb.0: @ %entry
171; CHECK-NEXT:    vldrh.u16 q1, [r1]
172; CHECK-NEXT:    vldrh.u16 q0, [r0, q1]
173; CHECK-NEXT:    bx lr
174entry:
175  %offs = load <8 x i16>, ptr %offptr, align 2
176  %offs.zext = zext <8 x i16> %offs to <8 x i32>
177  %byte_ptrs = getelementptr inbounds i8, ptr %base, <8 x i32> %offs.zext
178  %ptrs = bitcast <8 x ptr> %byte_ptrs to <8 x ptr>
179  %gather = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x half> undef)
180  ret <8 x half> %gather
181}
182
183define arm_aapcs_vfpcc <8 x i16> @zext_unsigned_unscaled_i8_i8(ptr %base, ptr %offptr) {
184; CHECK-LABEL: zext_unsigned_unscaled_i8_i8:
185; CHECK:       @ %bb.0: @ %entry
186; CHECK-NEXT:    vldrb.u16 q1, [r1]
187; CHECK-NEXT:    vldrb.u16 q0, [r0, q1]
188; CHECK-NEXT:    bx lr
189entry:
190  %offs = load <8 x i8>, ptr %offptr, align 1
191  %offs.zext = zext <8 x i8> %offs to <8 x i32>
192  %ptrs = getelementptr inbounds i8, ptr %base, <8 x i32> %offs.zext
193  %gather = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %ptrs, i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> undef)
194  %gather.zext = zext <8 x i8> %gather to <8 x i16>
195  ret <8 x i16> %gather.zext
196}
197
198define arm_aapcs_vfpcc <8 x i16> @sext_unsigned_unscaled_i8_i8(ptr %base, ptr %offptr) {
199; CHECK-LABEL: sext_unsigned_unscaled_i8_i8:
200; CHECK:       @ %bb.0: @ %entry
201; CHECK-NEXT:    vldrb.u16 q1, [r1]
202; CHECK-NEXT:    vldrb.s16 q0, [r0, q1]
203; CHECK-NEXT:    bx lr
204entry:
205  %offs = load <8 x i8>, ptr %offptr, align 1
206  %offs.zext = zext <8 x i8> %offs to <8 x i32>
207  %ptrs = getelementptr inbounds i8, ptr %base, <8 x i32> %offs.zext
208  %gather = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %ptrs, i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> undef)
209  %gather.sext = sext <8 x i8> %gather to <8 x i16>
210  ret <8 x i16> %gather.sext
211}
212
213define arm_aapcs_vfpcc <8 x i16> @unsigned_unscaled_i16_i8(ptr %base, ptr %offptr) {
214; CHECK-LABEL: unsigned_unscaled_i16_i8:
215; CHECK:       @ %bb.0: @ %entry
216; CHECK-NEXT:    vldrb.u16 q1, [r1]
217; CHECK-NEXT:    vldrh.u16 q0, [r0, q1]
218; CHECK-NEXT:    bx lr
219entry:
220  %offs = load <8 x i8>, ptr %offptr, align 1
221  %offs.zext = zext <8 x i8> %offs to <8 x i32>
222  %byte_ptrs = getelementptr inbounds i8, ptr %base, <8 x i32> %offs.zext
223  %ptrs = bitcast <8 x ptr> %byte_ptrs to <8 x ptr>
224  %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
225  ret <8 x i16> %gather
226}
227
228define arm_aapcs_vfpcc <8 x half> @unsigned_unscaled_f16_i8(ptr %base, ptr %offptr) {
229; CHECK-LABEL: unsigned_unscaled_f16_i8:
230; CHECK:       @ %bb.0: @ %entry
231; CHECK-NEXT:    vldrb.u16 q1, [r1]
232; CHECK-NEXT:    vldrh.u16 q0, [r0, q1]
233; CHECK-NEXT:    bx lr
234entry:
235  %offs = load <8 x i8>, ptr %offptr, align 1
236  %offs.zext = zext <8 x i8> %offs to <8 x i32>
237  %byte_ptrs = getelementptr inbounds i8, ptr %base, <8 x i32> %offs.zext
238  %ptrs = bitcast <8 x ptr> %byte_ptrs to <8 x ptr>
239  %gather = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x half> undef)
240  ret <8 x half> %gather
241}
242
243declare <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i8>) #1
244declare <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i16>) #1
245declare <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x half>) #1
246