xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-scatter-ind32-scaled.ll (revision edb2fc6dab2cf04779959829434e9e8572d48a26)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s
3
4; VLDRH.u32 Qd, [base, offs, #uxtw #1]
5define arm_aapcs_vfpcc void @ext_scaled_i16_i32(ptr %base, ptr %offptr, <4 x i32> %input) {
6; CHECK-LABEL: ext_scaled_i16_i32:
7; CHECK:       @ %bb.0: @ %entry
8; CHECK-NEXT:    vldrw.u32 q1, [r1]
9; CHECK-NEXT:    vstrh.32 q0, [r0, q1, uxtw #1]
10; CHECK-NEXT:    bx lr
11entry:
12  %offs = load <4 x i32>, ptr %offptr, align 4
13  %ptrs = getelementptr inbounds i16, ptr %base, <4 x i32> %offs
14  %t = trunc <4 x i32> %input to <4 x i16>
15  call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %t, <4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
16  ret void
17}
18
19; VSTRW.32 Qd, [base, offs, uxtw #2]
20define arm_aapcs_vfpcc void @scaled_i32_i32(ptr %base, ptr %offptr, <4 x i32> %input) {
21; CHECK-LABEL: scaled_i32_i32:
22; CHECK:       @ %bb.0: @ %entry
23; CHECK-NEXT:    vldrw.u32 q1, [r1]
24; CHECK-NEXT:    vstrw.32 q0, [r0, q1, uxtw #2]
25; CHECK-NEXT:    bx lr
26entry:
27  %offs = load <4 x i32>, ptr %offptr, align 4
28  %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs
29  call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
30  ret void
31}
32
33; VSTRW.32 Qd, [base, offs, uxtw #2]
34define arm_aapcs_vfpcc void @scaled_f32_i32(ptr %base, ptr %offptr, <4 x float> %input) {
35; CHECK-LABEL: scaled_f32_i32:
36; CHECK:       @ %bb.0: @ %entry
37; CHECK-NEXT:    vldrw.u32 q1, [r1]
38; CHECK-NEXT:    vstrw.32 q0, [r0, q1, uxtw #2]
39; CHECK-NEXT:    bx lr
40entry:
41  %offs = load <4 x i32>, ptr %offptr, align 4
42  %i32_ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs
43  %ptrs = bitcast <4 x ptr> %i32_ptrs to <4 x ptr>
44  call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
45  ret void
46}
47
48; VSTRW.32 Qd, [base, offs.zext, uxtw #2]
49define arm_aapcs_vfpcc void @unsigned_scaled_b_i32_i16(ptr %base, ptr %offptr, <4 x i32> %input) {
50; CHECK-LABEL: unsigned_scaled_b_i32_i16:
51; CHECK:       @ %bb.0: @ %entry
52; CHECK-NEXT:    vldrh.u32 q1, [r1]
53; CHECK-NEXT:    vstrw.32 q0, [r0, q1, uxtw #2]
54; CHECK-NEXT:    bx lr
55entry:
56  %offs = load <4 x i16>, ptr %offptr, align 2
57  %offs.zext = zext <4 x i16> %offs to <4 x i32>
58  %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs.zext
59  call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
60  ret void
61}
62
63; VSTRW.32 Qd, [base, offs.sext, uxtw #2]
64define arm_aapcs_vfpcc void @signed_scaled_i32_i16(ptr %base, ptr %offptr, <4 x i32> %input) {
65; CHECK-LABEL: signed_scaled_i32_i16:
66; CHECK:       @ %bb.0: @ %entry
67; CHECK-NEXT:    vldrh.s32 q1, [r1]
68; CHECK-NEXT:    vstrw.32 q0, [r0, q1, uxtw #2]
69; CHECK-NEXT:    bx lr
70entry:
71  %offs = load <4 x i16>, ptr %offptr, align 2
72  %offs.sext = sext <4 x i16> %offs to <4 x i32>
73  %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs.sext
74  call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
75  ret void
76}
77
78; VSTRW.32 Qd, [base, offs.zext, uxtw #2]
79define arm_aapcs_vfpcc void @a_unsigned_scaled_f32_i16(ptr %base, ptr %offptr, <4 x float> %input) {
80; CHECK-LABEL: a_unsigned_scaled_f32_i16:
81; CHECK:       @ %bb.0: @ %entry
82; CHECK-NEXT:    vldrh.u32 q1, [r1]
83; CHECK-NEXT:    vstrw.32 q0, [r0, q1, uxtw #2]
84; CHECK-NEXT:    bx lr
85entry:
86  %offs = load <4 x i16>, ptr %offptr, align 2
87  %offs.zext = zext <4 x i16> %offs to <4 x i32>
88  %i32_ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs.zext
89  %ptrs = bitcast <4 x ptr> %i32_ptrs to <4 x ptr>
90  call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
91  ret void
92}
93
94; VSTRW.32 Qd, [base, offs.sext, uxtw #2]
95define arm_aapcs_vfpcc void @b_signed_scaled_f32_i16(ptr %base, ptr %offptr, <4 x float> %input) {
96; CHECK-LABEL: b_signed_scaled_f32_i16:
97; CHECK:       @ %bb.0: @ %entry
98; CHECK-NEXT:    vldrh.s32 q1, [r1]
99; CHECK-NEXT:    vstrw.32 q0, [r0, q1, uxtw #2]
100; CHECK-NEXT:    bx lr
101entry:
102  %offs = load <4 x i16>, ptr %offptr, align 2
103  %offs.sext = sext <4 x i16> %offs to <4 x i32>
104  %i32_ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs.sext
105  %ptrs = bitcast <4 x ptr> %i32_ptrs to <4 x ptr>
106  call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
107  ret void
108}
109
110; VLDRH.u32 Qd, [base, offs.sext, uxtw #1]
111define arm_aapcs_vfpcc void @ext_signed_scaled_i16_i16(ptr %base, ptr %offptr, <4 x i32> %input) {
112; CHECK-LABEL: ext_signed_scaled_i16_i16:
113; CHECK:       @ %bb.0: @ %entry
114; CHECK-NEXT:    vldrh.s32 q1, [r1]
115; CHECK-NEXT:    vstrh.32 q0, [r0, q1, uxtw #1]
116; CHECK-NEXT:    bx lr
117entry:
118  %offs = load <4 x i16>, ptr %offptr, align 2
119  %offs.sext = sext <4 x i16> %offs to <4 x i32>
120  %ptrs = getelementptr inbounds i16, ptr %base, <4 x i32> %offs.sext
121  %t = trunc <4 x i32> %input to <4 x i16>
122  call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %t, <4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
123  ret void
124}
125
126; VSTRH.32 Qd, [base, offs.sext, uxtw #1]
127define arm_aapcs_vfpcc void @ext_unsigned_scaled_i16_i16(ptr %base, ptr %offptr, <4 x i32> %input) {
128; CHECK-LABEL: ext_unsigned_scaled_i16_i16:
129; CHECK:       @ %bb.0: @ %entry
130; CHECK-NEXT:    vldrh.u32 q1, [r1]
131; CHECK-NEXT:    vstrh.32 q0, [r0, q1, uxtw #1]
132; CHECK-NEXT:    bx lr
133entry:
134  %offs = load <4 x i16>, ptr %offptr, align 2
135  %offs.zext = zext <4 x i16> %offs to <4 x i32>
136  %ptrs = getelementptr inbounds i16, ptr %base, <4 x i32> %offs.zext
137  %t = trunc <4 x i32> %input to <4 x i16>
138  call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %t, <4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
139  ret void
140}
141
142; VSTRW.32 Qd, [base, offs.zext, uxtw #2]
143define arm_aapcs_vfpcc void @unsigned_scaled_b_i32_i8(ptr %base, ptr %offptr, <4 x i32> %input) {
144; CHECK-LABEL: unsigned_scaled_b_i32_i8:
145; CHECK:       @ %bb.0: @ %entry
146; CHECK-NEXT:    vldrb.u32 q1, [r1]
147; CHECK-NEXT:    vstrw.32 q0, [r0, q1, uxtw #2]
148; CHECK-NEXT:    bx lr
149entry:
150  %offs = load <4 x i8>, ptr %offptr, align 1
151  %offs.zext = zext <4 x i8> %offs to <4 x i32>
152  %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs.zext
153  call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
154  ret void
155}
156
157; VSTRW.32 Qd, [base, offs.sext, uxtw #2]
158define arm_aapcs_vfpcc void @signed_scaled_i32_i8(ptr %base, ptr %offptr, <4 x i32> %input) {
159; CHECK-LABEL: signed_scaled_i32_i8:
160; CHECK:       @ %bb.0: @ %entry
161; CHECK-NEXT:    vldrb.s32 q1, [r1]
162; CHECK-NEXT:    vstrw.32 q0, [r0, q1, uxtw #2]
163; CHECK-NEXT:    bx lr
164entry:
165  %offs = load <4 x i8>, ptr %offptr, align 1
166  %offs.sext = sext <4 x i8> %offs to <4 x i32>
167  %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs.sext
168  call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
169  ret void
170}
171
172; VSTRW.32 Qd, [base, offs.zext, uxtw #2]
173define arm_aapcs_vfpcc void @a_unsigned_scaled_f32_i8(ptr %base, ptr %offptr, <4 x float> %input) {
174; CHECK-LABEL: a_unsigned_scaled_f32_i8:
175; CHECK:       @ %bb.0: @ %entry
176; CHECK-NEXT:    vldrb.u32 q1, [r1]
177; CHECK-NEXT:    vstrw.32 q0, [r0, q1, uxtw #2]
178; CHECK-NEXT:    bx lr
179entry:
180  %offs = load <4 x i8>, ptr %offptr, align 1
181  %offs.zext = zext <4 x i8> %offs to <4 x i32>
182  %i32_ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs.zext
183  %ptrs = bitcast <4 x ptr> %i32_ptrs to <4 x ptr>
184  call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
185  ret void
186}
187
188; VSTRW.32 Qd, [base, offs.sext, uxtw #2]
189define arm_aapcs_vfpcc void @b_signed_scaled_f32_i8(ptr %base, ptr %offptr, <4 x float> %input) {
190; CHECK-LABEL: b_signed_scaled_f32_i8:
191; CHECK:       @ %bb.0: @ %entry
192; CHECK-NEXT:    vldrb.s32 q1, [r1]
193; CHECK-NEXT:    vstrw.32 q0, [r0, q1, uxtw #2]
194; CHECK-NEXT:    bx lr
195entry:
196  %offs = load <4 x i8>, ptr %offptr, align 1
197  %offs.sext = sext <4 x i8> %offs to <4 x i32>
198  %i32_ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs.sext
199  %ptrs = bitcast <4 x ptr> %i32_ptrs to <4 x ptr>
200  call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
201  ret void
202}
203
204; VLDRH.z32 Qd, [base, offs.sext, uxtw #1]
205define arm_aapcs_vfpcc void @ext_signed_scaled_i16_i8(ptr %base, ptr %offptr, <4 x i32> %input) {
206; CHECK-LABEL: ext_signed_scaled_i16_i8:
207; CHECK:       @ %bb.0: @ %entry
208; CHECK-NEXT:    vldrb.s32 q1, [r1]
209; CHECK-NEXT:    vstrh.32 q0, [r0, q1, uxtw #1]
210; CHECK-NEXT:    bx lr
211entry:
212  %offs = load <4 x i8>, ptr %offptr, align 1
213  %offs.sext = sext <4 x i8> %offs to <4 x i32>
214  %ptrs = getelementptr inbounds i16, ptr %base, <4 x i32> %offs.sext
215  %t = trunc <4 x i32> %input to <4 x i16>
216  call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %t, <4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
217  ret void
218}
219
220; VLDRH.z32 Qd, [base, offs.zext, uxtw #1]
221define arm_aapcs_vfpcc void @ext_unsigned_scaled_i16_i8(ptr %base, ptr %offptr, <4 x i32> %input) {
222; CHECK-LABEL: ext_unsigned_scaled_i16_i8:
223; CHECK:       @ %bb.0: @ %entry
224; CHECK-NEXT:    vldrb.u32 q1, [r1]
225; CHECK-NEXT:    vstrh.32 q0, [r0, q1, uxtw #1]
226; CHECK-NEXT:    bx lr
227entry:
228  %offs = load <4 x i8>, ptr %offptr, align 1
229  %offs.zext = zext <4 x i8> %offs to <4 x i32>
230  %ptrs = getelementptr inbounds i16, ptr %base, <4 x i32> %offs.zext
231  %t = trunc <4 x i32> %input to <4 x i16>
232  call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %t, <4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
233  ret void
234}
235
236define arm_aapcs_vfpcc void @ext_scaled_i16_i32_2gep(ptr %base, ptr %offptr, <4 x i32> %input) {
237; CHECK-LABEL: ext_scaled_i16_i32_2gep:
238; CHECK:       @ %bb.0: @ %entry
239; CHECK-NEXT:    vldrw.u32 q1, [r1]
240; CHECK-NEXT:    movs r2, #10
241; CHECK-NEXT:    movs r3, #0
242; CHECK-NEXT:    vshl.i32 q1, q1, #1
243; CHECK-NEXT:    vadd.i32 q1, q1, r0
244; CHECK-NEXT:    vadd.i32 q1, q1, r2
245; CHECK-NEXT:    vstrh.32 q0, [r3, q1]
246; CHECK-NEXT:    bx lr
247entry:
248  %offs = load <4 x i32>, ptr %offptr, align 4
249  %ptrs = getelementptr inbounds i16, ptr %base, <4 x i32> %offs
250  %ptrs2 = getelementptr inbounds i16, <4 x ptr> %ptrs, i16 5
251  %t = trunc <4 x i32> %input to <4 x i16>
252  call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %t, <4 x ptr> %ptrs2, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
253  ret void
254}
255
256define arm_aapcs_vfpcc void @ext_scaled_i16_i32_2gep2(ptr %base, ptr %offptr, <4 x i32> %input) {
257; CHECK-LABEL: ext_scaled_i16_i32_2gep2:
258; CHECK:       @ %bb.0: @ %entry
259; CHECK-NEXT:    adr r1, .LCPI16_0
260; CHECK-NEXT:    vldrw.u32 q1, [r1]
261; CHECK-NEXT:    vstrh.32 q0, [r0, q1]
262; CHECK-NEXT:    bx lr
263; CHECK-NEXT:    .p2align 4
264; CHECK-NEXT:  @ %bb.1:
265; CHECK-NEXT:  .LCPI16_0:
266; CHECK-NEXT:    .long 10 @ 0xa
267; CHECK-NEXT:    .long 16 @ 0x10
268; CHECK-NEXT:    .long 22 @ 0x16
269; CHECK-NEXT:    .long 28 @ 0x1c
270entry:
271  %ptrs = getelementptr inbounds i16, ptr %base, <4 x i16> <i16 0, i16 3, i16 6, i16 9>
272  %ptrs2 = getelementptr inbounds i16, <4 x ptr> %ptrs, i16 5
273  %t = trunc <4 x i32> %input to <4 x i16>
274  call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %t, <4 x ptr> %ptrs2, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
275  ret void
276}
277
278declare void @llvm.masked.scatter.v4i8.v4p0(<4 x i8>, <4 x ptr>, i32, <4 x i1>)
279declare void @llvm.masked.scatter.v4i16.v4p0(<4 x i16>, <4 x ptr>, i32, <4 x i1>)
280declare void @llvm.masked.scatter.v4f16.v4p0(<4 x half>, <4 x ptr>, i32, <4 x i1>)
281declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32, <4 x i1>)
282declare void @llvm.masked.scatter.v4f32.v4p0(<4 x float>, <4 x ptr>, i32, <4 x i1>)
283