xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-scatter-ind16-scaled.ll (revision 7b3bbd83c0c24087072ec5b22a76799ab31f87d5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s
3
4; VLDRH.16 Qd, [base, offs, uxtw #1]
5define arm_aapcs_vfpcc void @scaled_v8i16_i16(ptr %base, ptr %offptr, <8 x i16> %input) {
6; CHECK-LABEL: scaled_v8i16_i16:
7; CHECK:       @ %bb.0: @ %entry
8; CHECK-NEXT:    vldrh.u16 q1, [r1]
9; CHECK-NEXT:    vstrh.16 q0, [r0, q1, uxtw #1]
10; CHECK-NEXT:    bx lr
11entry:
12  %offs = load <8 x i16>, ptr %offptr, align 2
13  %offs.zext = zext <8 x i16> %offs to <8 x i32>
14  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> %offs.zext
15  call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %input, <8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
16  ret void
17}
18
19; VLDRH.16 Qd, [base, offs, uxtw #1]
20define arm_aapcs_vfpcc void @scaled_v8f16_i16(ptr %base, ptr %offptr, <8 x half> %input) {
21; CHECK-LABEL: scaled_v8f16_i16:
22; CHECK:       @ %bb.0: @ %entry
23; CHECK-NEXT:    vldrh.u16 q1, [r1]
24; CHECK-NEXT:    vstrh.16 q0, [r0, q1, uxtw #1]
25; CHECK-NEXT:    bx lr
26entry:
27  %offs = load <8 x i16>, ptr %offptr, align 2
28  %offs.zext = zext <8 x i16> %offs to <8 x i32>
29  %i16_ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> %offs.zext
30  %ptrs = bitcast <8 x ptr> %i16_ptrs to <8 x ptr>
31  call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %input, <8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
32  ret void
33}
34
35; VLDRH.16 Qd, [base, offs, uxtw #1]
36define arm_aapcs_vfpcc void @scaled_v8f16_half(ptr %base, ptr %offptr, <8 x half> %input) {
37; CHECK-LABEL: scaled_v8f16_half:
38; CHECK:       @ %bb.0: @ %entry
39; CHECK-NEXT:    vldrh.u16 q1, [r1]
40; CHECK-NEXT:    vstrh.16 q0, [r0, q1, uxtw #1]
41; CHECK-NEXT:    bx lr
42entry:
43  %offs = load <8 x i16>, ptr %offptr, align 2
44  %offs.zext = zext <8 x i16> %offs to <8 x i32>
45  %ptrs = getelementptr inbounds half, ptr %base, <8 x i32> %offs.zext
46  call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %input, <8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
47  ret void
48}
49
50; Expand - sext offset
51define arm_aapcs_vfpcc void @scaled_v8i16_sext(ptr %base, ptr %offptr, <8 x i16> %input) {
52; CHECK-LABEL: scaled_v8i16_sext:
53; CHECK:       @ %bb.0: @ %entry
54; CHECK-NEXT:    .save {r4, r5, r6, lr}
55; CHECK-NEXT:    push {r4, r5, r6, lr}
56; CHECK-NEXT:    vldrh.s32 q1, [r1]
57; CHECK-NEXT:    vmov.u16 r6, q0[0]
58; CHECK-NEXT:    vshl.i32 q1, q1, #1
59; CHECK-NEXT:    vadd.i32 q1, q1, r0
60; CHECK-NEXT:    vmov r2, r3, d2
61; CHECK-NEXT:    vmov r12, lr, d3
62; CHECK-NEXT:    vldrh.s32 q1, [r1, #8]
63; CHECK-NEXT:    vshl.i32 q1, q1, #1
64; CHECK-NEXT:    vadd.i32 q1, q1, r0
65; CHECK-NEXT:    vmov r0, r1, d2
66; CHECK-NEXT:    vmov r4, r5, d3
67; CHECK-NEXT:    strh r6, [r2]
68; CHECK-NEXT:    vmov.u16 r2, q0[1]
69; CHECK-NEXT:    strh r2, [r3]
70; CHECK-NEXT:    vmov.u16 r2, q0[2]
71; CHECK-NEXT:    strh.w r2, [r12]
72; CHECK-NEXT:    vmov.u16 r2, q0[3]
73; CHECK-NEXT:    strh.w r2, [lr]
74; CHECK-NEXT:    vmov.u16 r2, q0[4]
75; CHECK-NEXT:    strh r2, [r0]
76; CHECK-NEXT:    vmov.u16 r0, q0[5]
77; CHECK-NEXT:    strh r0, [r1]
78; CHECK-NEXT:    vmov.u16 r0, q0[6]
79; CHECK-NEXT:    strh r0, [r4]
80; CHECK-NEXT:    vmov.u16 r0, q0[7]
81; CHECK-NEXT:    strh r0, [r5]
82; CHECK-NEXT:    pop {r4, r5, r6, pc}
83entry:
84  %offs = load <8 x i16>, ptr %offptr, align 2
85  %offs.sext = sext <8 x i16> %offs to <8 x i32>
86  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> %offs.sext
87  call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %input, <8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
88  ret void
89}
90
91; Expand - sext offset
92define arm_aapcs_vfpcc void @scaled_v8f16_sext(ptr %base, ptr %offptr, <8 x half> %input) {
93; CHECK-LABEL: scaled_v8f16_sext:
94; CHECK:       @ %bb.0: @ %entry
95; CHECK-NEXT:    vldrh.s32 q1, [r1]
96; CHECK-NEXT:    vshl.i32 q2, q1, #1
97; CHECK-NEXT:    vldrh.s32 q1, [r1, #8]
98; CHECK-NEXT:    vadd.i32 q2, q2, r0
99; CHECK-NEXT:    vmov r1, r2, d4
100; CHECK-NEXT:    vshl.i32 q1, q1, #1
101; CHECK-NEXT:    vstr.16 s0, [r1]
102; CHECK-NEXT:    vmovx.f16 s0, s0
103; CHECK-NEXT:    vstr.16 s0, [r2]
104; CHECK-NEXT:    vmov r1, r2, d5
105; CHECK-NEXT:    vmovx.f16 s0, s1
106; CHECK-NEXT:    vadd.i32 q1, q1, r0
107; CHECK-NEXT:    vstr.16 s1, [r1]
108; CHECK-NEXT:    vstr.16 s0, [r2]
109; CHECK-NEXT:    vmov r0, r1, d2
110; CHECK-NEXT:    vmovx.f16 s0, s2
111; CHECK-NEXT:    vstr.16 s2, [r0]
112; CHECK-NEXT:    vstr.16 s0, [r1]
113; CHECK-NEXT:    vmov r0, r1, d3
114; CHECK-NEXT:    vmovx.f16 s0, s3
115; CHECK-NEXT:    vstr.16 s3, [r0]
116; CHECK-NEXT:    vstr.16 s0, [r1]
117; CHECK-NEXT:    bx lr
118entry:
119  %offs = load <8 x i16>, ptr %offptr, align 2
120  %offs.sext = sext <8 x i16> %offs to <8 x i32>
121  %i16_ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> %offs.sext
122  %ptrs = bitcast <8 x ptr> %i16_ptrs to <8 x ptr>
123  call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %input, <8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
124  ret void
125}
126
127; VLDRH.16 Qd, [base, zext(offs), uxtw #1]
128define arm_aapcs_vfpcc void @unsigned_scaled_v8i16_i8(ptr %base, ptr %offptr, <8 x i16> %input) {
129; CHECK-LABEL: unsigned_scaled_v8i16_i8:
130; CHECK:       @ %bb.0: @ %entry
131; CHECK-NEXT:    vldrb.u16 q1, [r1]
132; CHECK-NEXT:    vstrh.16 q0, [r0, q1, uxtw #1]
133; CHECK-NEXT:    bx lr
134entry:
135  %offs = load <8 x i8>, ptr %offptr, align 1
136  %offs.zext = zext <8 x i8> %offs to <8 x i32>
137  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> %offs.zext
138  call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %input, <8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
139  ret void
140}
141
142; VLDRH.16 Qd, [base, zext(offs), uxtw #1]
143define arm_aapcs_vfpcc void @unsigned_scaled_v8f16_i8(ptr %base, ptr %offptr, <8 x half> %input) {
144; CHECK-LABEL: unsigned_scaled_v8f16_i8:
145; CHECK:       @ %bb.0: @ %entry
146; CHECK-NEXT:    vldrb.u16 q1, [r1]
147; CHECK-NEXT:    vstrh.16 q0, [r0, q1, uxtw #1]
148; CHECK-NEXT:    bx lr
149entry:
150  %offs = load <8 x i8>, ptr %offptr, align 1
151  %offs.zext = zext <8 x i8> %offs to <8 x i32>
152  %i16_ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> %offs.zext
153  %ptrs = bitcast <8 x ptr> %i16_ptrs to <8 x ptr>
154  call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %input, <8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
155  ret void
156}
157
158define arm_aapcs_vfpcc void @scaled_v8i16_i16_passthru_icmp0(ptr %base, ptr %offptr, <8 x i16> %input) {
159; CHECK-LABEL: scaled_v8i16_i16_passthru_icmp0:
160; CHECK:       @ %bb.0: @ %entry
161; CHECK-NEXT:    vldrh.u16 q1, [r1]
162; CHECK-NEXT:    vpt.s16 gt, q1, zr
163; CHECK-NEXT:    vstrht.16 q0, [r0, q1, uxtw #1]
164; CHECK-NEXT:    bx lr
165entry:
166  %offs = load <8 x i16>, ptr %offptr, align 2
167  %offs.zext = zext <8 x i16> %offs to <8 x i32>
168  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> %offs.zext
169  %mask = icmp sgt <8 x i16> %offs, zeroinitializer
170  call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %input, <8 x ptr> %ptrs, i32 2, <8 x i1> %mask)
171  ret void
172}
173
174define arm_aapcs_vfpcc void @scaled_v8i16_i16_2gep(ptr %base, ptr %offptr, <8 x i16> %input) {
175; CHECK-LABEL: scaled_v8i16_i16_2gep:
176; CHECK:       @ %bb.0: @ %entry
177; CHECK-NEXT:    .save {r4, r5, r6, lr}
178; CHECK-NEXT:    push {r4, r5, r6, lr}
179; CHECK-NEXT:    vldrh.s32 q1, [r1]
180; CHECK-NEXT:    mov.w r12, #40
181; CHECK-NEXT:    vmov.u16 r6, q0[0]
182; CHECK-NEXT:    vshl.i32 q1, q1, #1
183; CHECK-NEXT:    vadd.i32 q1, q1, r0
184; CHECK-NEXT:    vadd.i32 q1, q1, r12
185; CHECK-NEXT:    vmov r3, r2, d2
186; CHECK-NEXT:    vmov lr, r5, d3
187; CHECK-NEXT:    vldrh.s32 q1, [r1, #8]
188; CHECK-NEXT:    vshl.i32 q1, q1, #1
189; CHECK-NEXT:    vadd.i32 q1, q1, r0
190; CHECK-NEXT:    vadd.i32 q1, q1, r12
191; CHECK-NEXT:    vmov r0, r1, d2
192; CHECK-NEXT:    vmov r4, r12, d3
193; CHECK-NEXT:    strh r6, [r3]
194; CHECK-NEXT:    vmov.u16 r3, q0[1]
195; CHECK-NEXT:    strh r3, [r2]
196; CHECK-NEXT:    vmov.u16 r2, q0[2]
197; CHECK-NEXT:    strh.w r2, [lr]
198; CHECK-NEXT:    vmov.u16 r2, q0[3]
199; CHECK-NEXT:    strh r2, [r5]
200; CHECK-NEXT:    vmov.u16 r2, q0[4]
201; CHECK-NEXT:    strh r2, [r0]
202; CHECK-NEXT:    vmov.u16 r0, q0[5]
203; CHECK-NEXT:    strh r0, [r1]
204; CHECK-NEXT:    vmov.u16 r0, q0[6]
205; CHECK-NEXT:    strh r0, [r4]
206; CHECK-NEXT:    vmov.u16 r0, q0[7]
207; CHECK-NEXT:    strh.w r0, [r12]
208; CHECK-NEXT:    pop {r4, r5, r6, pc}
209entry:
210  %offs = load <8 x i16>, ptr %offptr, align 2
211  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %offs
212  %ptrs2 = getelementptr inbounds i16, <8 x ptr> %ptrs, i16 20
213  call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %input, <8 x ptr> %ptrs2, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
214  ret void
215}
216
217define arm_aapcs_vfpcc void @scaled_v8i16_i16_2gep2(ptr %base, ptr %offptr, <8 x i16> %input) {
218; CHECK-LABEL: scaled_v8i16_i16_2gep2:
219; CHECK:       @ %bb.0: @ %entry
220; CHECK-NEXT:    adr r1, .LCPI9_0
221; CHECK-NEXT:    vldrw.u32 q1, [r1]
222; CHECK-NEXT:    vstrh.16 q0, [r0, q1]
223; CHECK-NEXT:    bx lr
224; CHECK-NEXT:    .p2align 4
225; CHECK-NEXT:  @ %bb.1:
226; CHECK-NEXT:  .LCPI9_0:
227; CHECK-NEXT:    .short 40 @ 0x28
228; CHECK-NEXT:    .short 46 @ 0x2e
229; CHECK-NEXT:    .short 52 @ 0x34
230; CHECK-NEXT:    .short 58 @ 0x3a
231; CHECK-NEXT:    .short 64 @ 0x40
232; CHECK-NEXT:    .short 70 @ 0x46
233; CHECK-NEXT:    .short 76 @ 0x4c
234; CHECK-NEXT:    .short 82 @ 0x52
235entry:
236  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> <i16 0, i16 3, i16 6, i16 9, i16 12, i16 15, i16 18, i16 21>
237  %ptrs2 = getelementptr inbounds i16, <8 x ptr> %ptrs, i16 20
238  call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %input, <8 x ptr> %ptrs2, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
239  ret void
240}
241
242declare void @llvm.masked.scatter.v8i16.v8p0(<8 x i16>, <8 x ptr>, i32, <8 x i1>)
243declare void @llvm.masked.scatter.v8f16.v8p0(<8 x half>, <8 x ptr>, i32, <8 x i1>)
244