xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-scaled-offsets.ll (revision 5ddce70ef0e5a641d7fea95e31fc5e2439cb98cb)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
3
4;
5; LD1H, LD1W, LD1D: base + 32-bit scaled offset, sign (sxtw) or zero (uxtw)
6; extended to 64 bits
7;   e.g. ld1h z0.d, p0/z, [x0, z0.d, uxtw #1]
8;
9
10; LD1H
11define <vscale x 4 x i32> @gld1h_s_uxtw_index(<vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %b) {
12; CHECK-LABEL: gld1h_s_uxtw_index:
13; CHECK:       // %bb.0:
14; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0, z0.s, uxtw #1]
15; CHECK-NEXT:    ret
16  %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> %pg,
17                                                                                  ptr %base,
18                                                                                  <vscale x 4 x i32> %b)
19  %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
20  ret <vscale x 4 x i32> %res
21}
22
23define <vscale x 4 x i32> @gld1h_s_sxtw_index(<vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %b) {
24; CHECK-LABEL: gld1h_s_sxtw_index:
25; CHECK:       // %bb.0:
26; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0, z0.s, sxtw #1]
27; CHECK-NEXT:    ret
28  %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> %pg,
29                                                                                  ptr %base,
30                                                                                  <vscale x 4 x i32> %b)
31  %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
32  ret <vscale x 4 x i32> %res
33}
34
35define <vscale x 2 x i64> @gld1h_d_uxtw_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) {
36; CHECK-LABEL: gld1h_d_uxtw_index:
37; CHECK:       // %bb.0:
38; CHECK-NEXT:    ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
39; CHECK-NEXT:    ret
40  %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i16(<vscale x 2 x i1> %pg,
41                                                                                  ptr %base,
42                                                                                  <vscale x 2 x i32> %b)
43  %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
44  ret <vscale x 2 x i64> %res
45}
46
47define <vscale x 2 x i64> @gld1h_d_sxtw_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) {
48; CHECK-LABEL: gld1h_d_sxtw_index:
49; CHECK:       // %bb.0:
50; CHECK-NEXT:    ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
51; CHECK-NEXT:    ret
52  %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i16(<vscale x 2 x i1> %pg,
53                                                                                  ptr %base,
54                                                                                  <vscale x 2 x i32> %b)
55  %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
56  ret <vscale x 2 x i64> %res
57}
58
59; LD1W
60define <vscale x 4 x i32> @gld1w_s_uxtw_index(<vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %b) {
61; CHECK-LABEL: gld1w_s_uxtw_index:
62; CHECK:       // %bb.0:
63; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0, z0.s, uxtw #2]
64; CHECK-NEXT:    ret
65  %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> %pg,
66                                                                                  ptr %base,
67                                                                                  <vscale x 4 x i32> %b)
68  ret <vscale x 4 x i32> %load
69}
70
71define <vscale x 4 x i32> @gld1w_s_sxtw_index(<vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %b) {
72; CHECK-LABEL: gld1w_s_sxtw_index:
73; CHECK:       // %bb.0:
74; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0, z0.s, sxtw #2]
75; CHECK-NEXT:    ret
76  %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> %pg,
77                                                                                  ptr %base,
78                                                                                  <vscale x 4 x i32> %b)
79  ret <vscale x 4 x i32> %load
80}
81
82define <vscale x 2 x i64> @gld1w_d_uxtw_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) {
83; CHECK-LABEL: gld1w_d_uxtw_index:
84; CHECK:       // %bb.0:
85; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
86; CHECK-NEXT:    ret
87  %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i32(<vscale x 2 x i1> %pg,
88                                                                                  ptr %base,
89                                                                                  <vscale x 2 x i32> %b)
90  %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
91  ret <vscale x 2 x i64> %res
92}
93
94define <vscale x 2 x i64> @gld1w_d_sxtw_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) {
95; CHECK-LABEL: gld1w_d_sxtw_index:
96; CHECK:       // %bb.0:
97; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
98; CHECK-NEXT:    ret
99  %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i32(<vscale x 2 x i1> %pg,
100                                                                                  ptr %base,
101                                                                                  <vscale x 2 x i32> %b)
102  %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
103  ret <vscale x 2 x i64> %res
104}
105
106define <vscale x 4 x float> @gld1w_s_uxtw_index_float(<vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %b) {
107; CHECK-LABEL: gld1w_s_uxtw_index_float:
108; CHECK:       // %bb.0:
109; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0, z0.s, uxtw #2]
110; CHECK-NEXT:    ret
111  %load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4f32(<vscale x 4 x i1> %pg,
112                                                                                    ptr %base,
113                                                                                    <vscale x 4 x i32> %b)
114  ret <vscale x 4 x float> %load
115}
116
117define <vscale x 4 x float> @gld1w_s_sxtw_index_float(<vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %b) {
118; CHECK-LABEL: gld1w_s_sxtw_index_float:
119; CHECK:       // %bb.0:
120; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0, z0.s, sxtw #2]
121; CHECK-NEXT:    ret
122  %load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4f32(<vscale x 4 x i1> %pg,
123                                                                                    ptr %base,
124                                                                                    <vscale x 4 x i32> %b)
125  ret <vscale x 4 x float> %load
126}
127
128; LD1D
129define <vscale x 2 x i64> @gld1d_s_uxtw_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) {
130; CHECK-LABEL: gld1d_s_uxtw_index:
131; CHECK:       // %bb.0:
132; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
133; CHECK-NEXT:    ret
134  %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i64(<vscale x 2 x i1> %pg,
135                                                                                  ptr %base,
136                                                                                  <vscale x 2 x i32> %b)
137  ret <vscale x 2 x i64> %load
138}
139
140define <vscale x 2 x i64> @gld1d_sxtw_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) {
141; CHECK-LABEL: gld1d_sxtw_index:
142; CHECK:       // %bb.0:
143; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
144; CHECK-NEXT:    ret
145  %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i64(<vscale x 2 x i1> %pg,
146                                                                                  ptr %base,
147                                                                                  <vscale x 2 x i32> %b)
148  ret <vscale x 2 x i64> %load
149}
150
151define <vscale x 2 x double> @gld1d_uxtw_index_double(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) {
152; CHECK-LABEL: gld1d_uxtw_index_double:
153; CHECK:       // %bb.0:
154; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
155; CHECK-NEXT:    ret
156  %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2f64(<vscale x 2 x i1> %pg,
157                                                                                     ptr %base,
158                                                                                     <vscale x 2 x i32> %b)
159  ret <vscale x 2 x double> %load
160}
161
162define <vscale x 2 x double> @gld1d_sxtw_index_double(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) {
163; CHECK-LABEL: gld1d_sxtw_index_double:
164; CHECK:       // %bb.0:
165; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
166; CHECK-NEXT:    ret
167  %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2f64(<vscale x 2 x i1> %pg,
168                                                                                     ptr %base,
169                                                                                     <vscale x 2 x i32> %b)
170  ret <vscale x 2 x double> %load
171}
172
173;
174; LD1SH, LD1SW, LD1SD: base + 32-bit scaled offset, sign (sxtw) or zero (uxtw)
175; extended to 64 bits
176;   e.g. ld1sh z0.d, p0/z, [x0, z0.d, uxtw #1]
177;
178
179; LD1SH
180define <vscale x 4 x i32> @gld1sh_s_uxtw_index(<vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %b) {
181; CHECK-LABEL: gld1sh_s_uxtw_index:
182; CHECK:       // %bb.0:
183; CHECK-NEXT:    ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw #1]
184; CHECK-NEXT:    ret
185  %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> %pg,
186                                                                                  ptr %base,
187                                                                                  <vscale x 4 x i32> %b)
188  %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
189  ret <vscale x 4 x i32> %res
190}
191
192define <vscale x 4 x i32> @gld1sh_s_sxtw_index(<vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %b) {
193; CHECK-LABEL: gld1sh_s_sxtw_index:
194; CHECK:       // %bb.0:
195; CHECK-NEXT:    ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw #1]
196; CHECK-NEXT:    ret
197  %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> %pg,
198                                                                                  ptr %base,
199                                                                                  <vscale x 4 x i32> %b)
200  %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
201  ret <vscale x 4 x i32> %res
202}
203
204define <vscale x 2 x i64> @gld1sh_d_uxtw_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) {
205; CHECK-LABEL: gld1sh_d_uxtw_index:
206; CHECK:       // %bb.0:
207; CHECK-NEXT:    ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
208; CHECK-NEXT:    ret
209  %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i16(<vscale x 2 x i1> %pg,
210                                                                                  ptr %base,
211                                                                                  <vscale x 2 x i32> %b)
212  %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
213  ret <vscale x 2 x i64> %res
214}
215
216define <vscale x 2 x i64> @gld1sh_d_sxtw_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) {
217; CHECK-LABEL: gld1sh_d_sxtw_index:
218; CHECK:       // %bb.0:
219; CHECK-NEXT:    ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
220; CHECK-NEXT:    ret
221  %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i16(<vscale x 2 x i1> %pg,
222                                                                                  ptr %base,
223                                                                                  <vscale x 2 x i32> %b)
224  %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
225  ret <vscale x 2 x i64> %res
226}
227
228; LD1SW
229define <vscale x 2 x i64> @gld1sw_d_uxtw_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) {
230; CHECK-LABEL: gld1sw_d_uxtw_index:
231; CHECK:       // %bb.0:
232; CHECK-NEXT:    ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
233; CHECK-NEXT:    ret
234  %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i32(<vscale x 2 x i1> %pg,
235                                                                                  ptr %base,
236                                                                                  <vscale x 2 x i32> %b)
237  %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
238  ret <vscale x 2 x i64> %res
239}
240
241define <vscale x 2 x i64> @gld1sw_d_sxtw_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) {
242; CHECK-LABEL: gld1sw_d_sxtw_index:
243; CHECK:       // %bb.0:
244; CHECK-NEXT:    ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
245; CHECK-NEXT:    ret
246  %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i32(<vscale x 2 x i1> %pg,
247                                                                                  ptr %base,
248                                                                                  <vscale x 2 x i32> %b)
249  %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
250  ret <vscale x 2 x i64> %res
251}
252
253
254; LD1H/LD1SH
255declare <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1>, ptr, <vscale x 4 x i32>)
256declare <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1>, ptr, <vscale x 4 x i32>)
257
258declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i16(<vscale x 2 x i1>, ptr, <vscale x 2 x i32>)
259declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i16(<vscale x 2 x i1>, ptr, <vscale x 2 x i32>)
260
261; LD1W/LD1SW
262declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1>, ptr, <vscale x 4 x i32>)
263declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1>, ptr, <vscale x 4 x i32>)
264
265declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i32(<vscale x 2 x i1>, ptr, <vscale x 2 x i32>)
266declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i32(<vscale x 2 x i1>, ptr, <vscale x 2 x i32>)
267
268declare <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4f32(<vscale x 4 x i1>, ptr, <vscale x 4 x i32>)
269declare <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4f32(<vscale x 4 x i1>, ptr, <vscale x 4 x i32>)
270
271; LD1D
272declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i64(<vscale x 2 x i1>, ptr, <vscale x 2 x i32>)
273declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i64(<vscale x 2 x i1>, ptr, <vscale x 2 x i32>)
274
275declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2f64(<vscale x 2 x i1>, ptr, <vscale x 2 x i32>)
276declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2f64(<vscale x 2 x i1>, ptr, <vscale x 2 x i32>)
277