xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-scaled-offset.ll (revision 5ddce70ef0e5a641d7fea95e31fc5e2439cb98cb)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
3
4;
5; LD1H, LD1W, LD1D: base + 64-bit scaled offset
6;   e.g. ld1h z0.d, p0/z, [x0, z0.d, lsl #1]
7;
8
9define <vscale x 2 x i64> @gld1h_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
10; CHECK-LABEL: gld1h_index:
11; CHECK:       // %bb.0:
12; CHECK-NEXT:    ld1h { z0.d }, p0/z, [x0, z0.d, lsl #1]
13; CHECK-NEXT:    ret
14  %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
15                                                                             ptr %base,
16                                                                             <vscale x 2 x i64> %b)
17  %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
18  ret <vscale x 2 x i64> %res
19}
20
21define <vscale x 2 x i64> @gld1w_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
22; CHECK-LABEL: gld1w_index:
23; CHECK:       // %bb.0:
24; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0, z0.d, lsl #2]
25; CHECK-NEXT:    ret
26  %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
27                                                                             ptr %base,
28                                                                             <vscale x 2 x i64> %b)
29  %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
30  ret <vscale x 2 x i64> %res
31}
32
33define <vscale x 2 x i64> @gld1d_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
34; CHECK-LABEL: gld1d_index:
35; CHECK:       // %bb.0:
36; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d, lsl #3]
37; CHECK-NEXT:    ret
38  %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> %pg,
39                                                                             ptr %base,
40                                                                             <vscale x 2 x i64> %b)
41  ret <vscale x 2 x i64> %load
42}
43
44define <vscale x 2 x double> @gld1d_index_double(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
45; CHECK-LABEL: gld1d_index_double:
46; CHECK:       // %bb.0:
47; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d, lsl #3]
48; CHECK-NEXT:    ret
49  %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> %pg,
50                                                                                ptr %base,
51                                                                                <vscale x 2 x i64> %b)
52  ret <vscale x 2 x double> %load
53}
54
55;
56; LD1SH, LD1SW: base + 64-bit scaled offset
57;   e.g. ld1sh z0.d, p0/z, [x0, z0.d, lsl #1]
58;
59
60define <vscale x 2 x i64> @gld1sh_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
61; CHECK-LABEL: gld1sh_index:
62; CHECK:       // %bb.0:
63; CHECK-NEXT:    ld1sh { z0.d }, p0/z, [x0, z0.d, lsl #1]
64; CHECK-NEXT:    ret
65  %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
66                                                                             ptr %base,
67                                                                             <vscale x 2 x i64> %b)
68  %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
69  ret <vscale x 2 x i64> %res
70}
71
72define <vscale x 2 x i64> @gld1sw_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
73; CHECK-LABEL: gld1sw_index:
74; CHECK:       // %bb.0:
75; CHECK-NEXT:    ld1sw { z0.d }, p0/z, [x0, z0.d, lsl #2]
76; CHECK-NEXT:    ret
77  %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
78                                                                             ptr %base,
79                                                                             <vscale x 2 x i64> %b)
80  %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
81  ret <vscale x 2 x i64> %res
82}
83
84;
85; LD1H, LD1W, LD1D: base + 64-bit sxtw'd scaled offset
86;   e.g. ld1h z0.d, p0/z, [x0, z0.d, sxtw #1]
87;
88
89define <vscale x 2 x i64> @gld1h_index_sxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
90; CHECK-LABEL: gld1h_index_sxtw:
91; CHECK:       // %bb.0:
92; CHECK-NEXT:    ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
93; CHECK-NEXT:    ret
94  %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
95                                                                 <vscale x 2 x i1> %pg,
96                                                                 <vscale x 2 x i64> %b)
97  %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
98                                                                             ptr %base,
99                                                                             <vscale x 2 x i64> %sxtw)
100  %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
101  ret <vscale x 2 x i64> %res
102}
103
104define <vscale x 2 x i64> @gld1w_index_sxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
105; CHECK-LABEL: gld1w_index_sxtw:
106; CHECK:       // %bb.0:
107; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
108; CHECK-NEXT:    ret
109  %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
110                                                                 <vscale x 2 x i1> %pg,
111                                                                 <vscale x 2 x i64> %b)
112  %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
113                                                                             ptr %base,
114                                                                             <vscale x 2 x i64> %sxtw)
115  %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
116  ret <vscale x 2 x i64> %res
117}
118
119define <vscale x 2 x i64> @gld1d_index_sxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
120; CHECK-LABEL: gld1d_index_sxtw:
121; CHECK:       // %bb.0:
122; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
123; CHECK-NEXT:    ret
124  %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
125                                                                 <vscale x 2 x i1> %pg,
126                                                                 <vscale x 2 x i64> %b)
127  %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> %pg,
128                                                                             ptr %base,
129                                                                             <vscale x 2 x i64> %sxtw)
130  ret <vscale x 2 x i64> %load
131}
132
133define <vscale x 2 x double> @gld1d_index_double_sxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
134; CHECK-LABEL: gld1d_index_double_sxtw:
135; CHECK:       // %bb.0:
136; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
137; CHECK-NEXT:    ret
138  %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
139                                                                 <vscale x 2 x i1> %pg,
140                                                                 <vscale x 2 x i64> %b)
141  %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> %pg,
142                                                                                ptr %base,
143                                                                                <vscale x 2 x i64> %sxtw)
144  ret <vscale x 2 x double> %load
145}
146
147;
148; LD1SH, LD1SW: base + 64-bit sxtw'd scaled offset
149;   e.g. ld1sh z0.d, p0/z, [x0, z0.d, sxtw #1]
150;
151
152define <vscale x 2 x i64> @gld1sh_index_sxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
153; CHECK-LABEL: gld1sh_index_sxtw:
154; CHECK:       // %bb.0:
155; CHECK-NEXT:    ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
156; CHECK-NEXT:    ret
157  %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
158                                                                 <vscale x 2 x i1> %pg,
159                                                                 <vscale x 2 x i64> %b)
160  %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
161                                                                             ptr %base,
162                                                                             <vscale x 2 x i64> %sxtw)
163  %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
164  ret <vscale x 2 x i64> %res
165}
166
167define <vscale x 2 x i64> @gld1sw_index_sxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
168; CHECK-LABEL: gld1sw_index_sxtw:
169; CHECK:       // %bb.0:
170; CHECK-NEXT:    ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
171; CHECK-NEXT:    ret
172  %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
173                                                                 <vscale x 2 x i1> %pg,
174                                                                 <vscale x 2 x i64> %b)
175  %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
176                                                                             ptr %base,
177                                                                             <vscale x 2 x i64> %sxtw)
178  %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
179  ret <vscale x 2 x i64> %res
180}
181
182;
183; LD1H, LD1W, LD1D: base + 64-bit sxtw'd scaled offset
184;   e.g. ld1h z0.d, p0/z, [x0, z0.d, uxtw #1]
185;
186
187define <vscale x 2 x i64> @gld1h_index_uxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
188; CHECK-LABEL: gld1h_index_uxtw:
189; CHECK:       // %bb.0:
190; CHECK-NEXT:    ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
191; CHECK-NEXT:    ret
192  %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
193                                                                 <vscale x 2 x i1> %pg,
194                                                                 <vscale x 2 x i64> %b)
195  %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
196                                                                             ptr %base,
197                                                                             <vscale x 2 x i64> %uxtw)
198  %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
199  ret <vscale x 2 x i64> %res
200}
201
202define <vscale x 2 x i64> @gld1w_index_uxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
203; CHECK-LABEL: gld1w_index_uxtw:
204; CHECK:       // %bb.0:
205; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
206; CHECK-NEXT:    ret
207  %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
208                                                                 <vscale x 2 x i1> %pg,
209                                                                 <vscale x 2 x i64> %b)
210  %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
211                                                                             ptr %base,
212                                                                             <vscale x 2 x i64> %uxtw)
213  %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
214  ret <vscale x 2 x i64> %res
215}
216
217define <vscale x 2 x i64> @gld1d_index_uxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
218; CHECK-LABEL: gld1d_index_uxtw:
219; CHECK:       // %bb.0:
220; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
221; CHECK-NEXT:    ret
222  %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
223                                                                 <vscale x 2 x i1> %pg,
224                                                                 <vscale x 2 x i64> %b)
225  %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> %pg,
226                                                                             ptr %base,
227                                                                             <vscale x 2 x i64> %uxtw)
228  ret <vscale x 2 x i64> %load
229}
230
231define <vscale x 2 x double> @gld1d_index_double_uxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
232; CHECK-LABEL: gld1d_index_double_uxtw:
233; CHECK:       // %bb.0:
234; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
235; CHECK-NEXT:    ret
236  %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
237                                                                 <vscale x 2 x i1> %pg,
238                                                                 <vscale x 2 x i64> %b)
239  %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> %pg,
240                                                                                ptr %base,
241                                                                                <vscale x 2 x i64> %uxtw)
242  ret <vscale x 2 x double> %load
243}
244
245;
246; LD1SH, LD1SW: base + 64-bit uxtw'd scaled offset
247;   e.g. ld1sh z0.d, p0/z, [x0, z0.d, uxtw #1]
248;
249
250define <vscale x 2 x i64> @gld1sh_index_uxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
251; CHECK-LABEL: gld1sh_index_uxtw:
252; CHECK:       // %bb.0:
253; CHECK-NEXT:    ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
254; CHECK-NEXT:    ret
255  %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
256                                                                 <vscale x 2 x i1> %pg,
257                                                                 <vscale x 2 x i64> %b)
258  %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
259                                                                             ptr %base,
260                                                                             <vscale x 2 x i64> %uxtw)
261  %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
262  ret <vscale x 2 x i64> %res
263}
264
265define <vscale x 2 x i64> @gld1sw_index_uxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
266; CHECK-LABEL: gld1sw_index_uxtw:
267; CHECK:       // %bb.0:
268; CHECK-NEXT:    ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
269; CHECK-NEXT:    ret
270  %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
271                                                                 <vscale x 2 x i1> %pg,
272                                                                 <vscale x 2 x i64> %b)
273  %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
274                                                                             ptr %base,
275                                                                             <vscale x 2 x i64> %uxtw)
276  %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
277  ret <vscale x 2 x i64> %res
278}
279
280declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1>, ptr, <vscale x 2 x i64>)
281declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1>, ptr, <vscale x 2 x i64>)
282declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1>, ptr, <vscale x 2 x i64>)
283declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1>, ptr, <vscale x 2 x i64>)
284
285declare <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)
286declare <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)
287