xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-imm.ll (revision 1ee315ae7964c8433b772e0b5d667834994ba753)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
3
4;
5; LD1B
6;
7
8define <vscale x 16 x i8> @ld1b_upper_bound(<vscale x 16 x i1> %pg, ptr %a) {
9; CHECK-LABEL: ld1b_upper_bound:
10; CHECK:       // %bb.0:
11; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0, #7, mul vl]
12; CHECK-NEXT:    ret
13  %base = getelementptr <vscale x 16 x i8>, ptr %a, i64 7
14  %base_scalar = bitcast ptr %base to ptr
15  %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1.nxv16i8(<vscale x 16 x i1> %pg, ptr %base_scalar)
16  ret <vscale x 16 x i8> %load
17}
18
19define <vscale x 16 x i8> @ld1b_inbound(<vscale x 16 x i1> %pg, ptr %a) {
20; CHECK-LABEL: ld1b_inbound:
21; CHECK:       // %bb.0:
22; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0, #1, mul vl]
23; CHECK-NEXT:    ret
24  %base = getelementptr <vscale x 16 x i8>, ptr %a, i64 1
25  %base_scalar = bitcast ptr %base to ptr
26  %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1.nxv16i8(<vscale x 16 x i1> %pg, ptr %base_scalar)
27  ret <vscale x 16 x i8> %load
28}
29
30define <vscale x 4 x i32> @ld1b_s_inbound(<vscale x 4 x i1> %pg, ptr %a) {
31; CHECK-LABEL: ld1b_s_inbound:
32; CHECK:       // %bb.0:
33; CHECK-NEXT:    ld1b { z0.s }, p0/z, [x0, #7, mul vl]
34; CHECK-NEXT:    ret
35  %base = getelementptr <vscale x 4 x i8>, ptr %a, i64 7
36  %base_scalar = bitcast ptr %base to ptr
37  %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1> %pg, ptr %base_scalar)
38  %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
39  ret <vscale x 4 x i32> %res
40}
41
42define <vscale x 4 x i32> @ld1sb_s_inbound(<vscale x 4 x i1> %pg, ptr %a) {
43; CHECK-LABEL: ld1sb_s_inbound:
44; CHECK:       // %bb.0:
45; CHECK-NEXT:    ld1sb { z0.s }, p0/z, [x0, #7, mul vl]
46; CHECK-NEXT:    ret
47  %base = getelementptr <vscale x 4 x i8>, ptr %a, i64 7
48  %base_scalar = bitcast ptr %base to ptr
49  %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1> %pg, ptr %base_scalar)
50  %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
51  ret <vscale x 4 x i32> %res
52}
53
54define <vscale x 16 x i8> @ld1b_lower_bound(<vscale x 16 x i1> %pg, ptr %a) {
55; CHECK-LABEL: ld1b_lower_bound:
56; CHECK:       // %bb.0:
57; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0, #-8, mul vl]
58; CHECK-NEXT:    ret
59  %base = getelementptr <vscale x 16 x i8>, ptr %a, i64 -8
60  %base_scalar = bitcast ptr %base to ptr
61  %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1.nxv16i8(<vscale x 16 x i1> %pg, ptr %base_scalar)
62  ret <vscale x 16 x i8> %load
63}
64
65define <vscale x 16 x i8> @ld1b_out_of_upper_bound(<vscale x 16 x i1> %pg, ptr %a) {
66; CHECK-LABEL: ld1b_out_of_upper_bound:
67; CHECK:       // %bb.0:
68; CHECK-NEXT:    rdvl x8, #8
69; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0, x8]
70; CHECK-NEXT:    ret
71  %base = getelementptr <vscale x 16 x i8>, ptr %a, i64 8
72  %base_scalar = bitcast ptr %base to ptr
73  %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1.nxv16i8(<vscale x 16 x i1> %pg, ptr %base_scalar)
74  ret <vscale x 16 x i8> %load
75}
76
77define <vscale x 16 x i8> @ld1b_out_of_lower_bound(<vscale x 16 x i1> %pg, ptr %a) {
78; CHECK-LABEL: ld1b_out_of_lower_bound:
79; CHECK:       // %bb.0:
80; CHECK-NEXT:    rdvl x8, #-9
81; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0, x8]
82; CHECK-NEXT:    ret
83  %base = getelementptr <vscale x 16 x i8>, ptr %a, i64 -9
84  %base_scalar = bitcast ptr %base to ptr
85  %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1.nxv16i8(<vscale x 16 x i1> %pg, ptr %base_scalar)
86  ret <vscale x 16 x i8> %load
87}
88
89;
90; LD1H
91;
92
93define <vscale x 8 x i16> @ld1b_h_inbound(<vscale x 8 x i1> %pg, ptr %a) {
94; CHECK-LABEL: ld1b_h_inbound:
95; CHECK:       // %bb.0:
96; CHECK-NEXT:    ld1b { z0.h }, p0/z, [x0, #7, mul vl]
97; CHECK-NEXT:    ret
98  %base = getelementptr <vscale x 8 x i8>, ptr %a, i64 7
99  %base_scalar = bitcast ptr %base to ptr
100  %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1> %pg, ptr %base_scalar)
101  %res = zext <vscale x 8 x i8> %load to <vscale x 8 x i16>
102  ret <vscale x 8 x i16> %res
103}
104
105define <vscale x 8 x i16> @ld1sb_h_inbound(<vscale x 8 x i1> %pg, ptr %a) {
106; CHECK-LABEL: ld1sb_h_inbound:
107; CHECK:       // %bb.0:
108; CHECK-NEXT:    ld1sb { z0.h }, p0/z, [x0, #7, mul vl]
109; CHECK-NEXT:    ret
110  %base = getelementptr <vscale x 8 x i8>, ptr %a, i64 7
111  %base_scalar = bitcast ptr %base to ptr
112  %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1> %pg, ptr %base_scalar)
113  %res = sext <vscale x 8 x i8> %load to <vscale x 8 x i16>
114  ret <vscale x 8 x i16> %res
115}
116
117define <vscale x 8 x i16> @ld1h_inbound(<vscale x 8 x i1> %pg, ptr %a) {
118; CHECK-LABEL: ld1h_inbound:
119; CHECK:       // %bb.0:
120; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0, #1, mul vl]
121; CHECK-NEXT:    ret
122  %base = getelementptr <vscale x 8 x i16>, ptr %a, i64 1
123  %base_scalar = bitcast ptr %base to ptr
124  %load = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1> %pg, ptr %base_scalar)
125  ret <vscale x 8 x i16> %load
126}
127
128define <vscale x 4 x i32> @ld1h_s_inbound(<vscale x 4 x i1> %pg, ptr %a) {
129; CHECK-LABEL: ld1h_s_inbound:
130; CHECK:       // %bb.0:
131; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0, #7, mul vl]
132; CHECK-NEXT:    ret
133  %base = getelementptr <vscale x 4 x i16>, ptr %a, i64 7
134  %base_scalar = bitcast ptr %base to ptr
135  %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1> %pg, ptr %base_scalar)
136  %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
137  ret <vscale x 4 x i32> %res
138}
139
140define <vscale x 4 x i32> @ld1sh_s_inbound(<vscale x 4 x i1> %pg, ptr %a) {
141; CHECK-LABEL: ld1sh_s_inbound:
142; CHECK:       // %bb.0:
143; CHECK-NEXT:    ld1sh { z0.s }, p0/z, [x0, #7, mul vl]
144; CHECK-NEXT:    ret
145  %base = getelementptr <vscale x 4 x i16>, ptr %a, i64 7
146  %base_scalar = bitcast ptr %base to ptr
147  %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1> %pg, ptr %base_scalar)
148  %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
149  ret <vscale x 4 x i32> %res
150}
151
152define <vscale x 2 x i64> @ld1b_d_inbound(<vscale x 2 x i1> %pg, ptr %a) {
153; CHECK-LABEL: ld1b_d_inbound:
154; CHECK:       // %bb.0:
155; CHECK-NEXT:    ld1b { z0.d }, p0/z, [x0, #7, mul vl]
156; CHECK-NEXT:    ret
157  %base = getelementptr <vscale x 2 x i8>, ptr %a, i64 7
158  %base_scalar = bitcast ptr %base to ptr
159  %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1> %pg, ptr %base_scalar)
160  %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
161  ret <vscale x 2 x i64> %res
162}
163
164define <vscale x 2 x i64> @ld1sb_d_inbound(<vscale x 2 x i1> %pg, ptr %a) {
165; CHECK-LABEL: ld1sb_d_inbound:
166; CHECK:       // %bb.0:
167; CHECK-NEXT:    ld1sb { z0.d }, p0/z, [x0, #7, mul vl]
168; CHECK-NEXT:    ret
169  %base = getelementptr <vscale x 2 x i8>, ptr %a, i64 7
170  %base_scalar = bitcast ptr %base to ptr
171  %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1> %pg, ptr %base_scalar)
172  %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
173  ret <vscale x 2 x i64> %res
174}
175
176define <vscale x 2 x i64> @ld1h_d_inbound(<vscale x 2 x i1> %pg, ptr %a) {
177; CHECK-LABEL: ld1h_d_inbound:
178; CHECK:       // %bb.0:
179; CHECK-NEXT:    ld1h { z0.d }, p0/z, [x0, #7, mul vl]
180; CHECK-NEXT:    ret
181  %base = getelementptr <vscale x 2 x i16>, ptr %a, i64 7
182  %base_scalar = bitcast ptr %base to ptr
183  %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1> %pg, ptr %base_scalar)
184  %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
185  ret <vscale x 2 x i64> %res
186}
187
188define <vscale x 2 x i64> @ld1sh_d_inbound(<vscale x 2 x i1> %pg, ptr %a) {
189; CHECK-LABEL: ld1sh_d_inbound:
190; CHECK:       // %bb.0:
191; CHECK-NEXT:    ld1sh { z0.d }, p0/z, [x0, #7, mul vl]
192; CHECK-NEXT:    ret
193  %base = getelementptr <vscale x 2 x i16>, ptr %a, i64 7
194  %base_scalar = bitcast ptr %base to ptr
195  %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1> %pg, ptr %base_scalar)
196  %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
197  ret <vscale x 2 x i64> %res
198}
199
200define <vscale x 8 x half> @ld1h_f16_inbound(<vscale x 8 x i1> %pg, ptr %a) {
201; CHECK-LABEL: ld1h_f16_inbound:
202; CHECK:       // %bb.0:
203; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0, #1, mul vl]
204; CHECK-NEXT:    ret
205  %base = getelementptr <vscale x 8 x half>, ptr %a, i64 1
206  %base_scalar = bitcast ptr %base to ptr
207  %load = call <vscale x 8 x half> @llvm.aarch64.sve.ld1.nxv8f16(<vscale x 8 x i1> %pg, ptr %base_scalar)
208  ret <vscale x 8 x half> %load
209}
210
211define <vscale x 8 x bfloat> @ld1h_bf16_inbound(<vscale x 8 x i1> %pg, ptr %a) #0 {
212; CHECK-LABEL: ld1h_bf16_inbound:
213; CHECK:       // %bb.0:
214; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0, #1, mul vl]
215; CHECK-NEXT:    ret
216  %base = getelementptr <vscale x 8 x bfloat>, ptr %a, i64 1
217  %base_scalar = bitcast ptr %base to ptr
218  %load = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1.nxv8bf16(<vscale x 8 x i1> %pg, ptr %base_scalar)
219  ret <vscale x 8 x bfloat> %load
220}
221
222;
223; LD1W
224;
225
226define <vscale x 4 x i32> @ld1w_inbound(<vscale x 4 x i1> %pg, ptr %a) {
227; CHECK-LABEL: ld1w_inbound:
228; CHECK:       // %bb.0:
229; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0, #7, mul vl]
230; CHECK-NEXT:    ret
231  %base = getelementptr <vscale x 4 x i32>, ptr %a, i64 7
232  %base_scalar = bitcast ptr %base to ptr
233  %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> %pg, ptr %base_scalar)
234  ret <vscale x 4 x i32> %load
235}
236
237define <vscale x 4 x float> @ld1w_f32_inbound(<vscale x 4 x i1> %pg, ptr %a) {
238; CHECK-LABEL: ld1w_f32_inbound:
239; CHECK:       // %bb.0:
240; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0, #7, mul vl]
241; CHECK-NEXT:    ret
242  %base = getelementptr <vscale x 4 x float>, ptr %a, i64 7
243  %base_scalar = bitcast ptr %base to ptr
244  %load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.nxv4f32(<vscale x 4 x i1> %pg, ptr %base_scalar)
245  ret <vscale x 4 x float> %load
246}
247
248;
249; LD1D
250;
251
252define <vscale x 2 x i64> @ld1d_inbound(<vscale x 2 x i1> %pg, ptr %a) {
253; CHECK-LABEL: ld1d_inbound:
254; CHECK:       // %bb.0:
255; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, #1, mul vl]
256; CHECK-NEXT:    ret
257  %base = getelementptr <vscale x 2 x i64>, ptr %a, i64 1
258  %base_scalar = bitcast ptr %base to ptr
259  %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.nxv2i64(<vscale x 2 x i1> %pg, ptr %base_scalar)
260  ret <vscale x 2 x i64> %load
261}
262
263define <vscale x 2 x i64> @ld1w_d_inbound(<vscale x 2 x i1> %pg, ptr %a) {
264; CHECK-LABEL: ld1w_d_inbound:
265; CHECK:       // %bb.0:
266; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0, #7, mul vl]
267; CHECK-NEXT:    ret
268  %base = getelementptr <vscale x 2 x i32>, ptr %a, i64 7
269  %base_scalar = bitcast ptr %base to ptr
270  %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1> %pg, ptr %base_scalar)
271  %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
272  ret <vscale x 2 x i64> %res
273}
274
275define <vscale x 2 x i64> @ld1sw_d_inbound(<vscale x 2 x i1> %pg, ptr %a) {
276; CHECK-LABEL: ld1sw_d_inbound:
277; CHECK:       // %bb.0:
278; CHECK-NEXT:    ld1sw { z0.d }, p0/z, [x0, #7, mul vl]
279; CHECK-NEXT:    ret
280  %base = getelementptr <vscale x 2 x i32>, ptr %a, i64 7
281  %base_scalar = bitcast ptr %base to ptr
282  %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1> %pg, ptr %base_scalar)
283  %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
284  ret <vscale x 2 x i64> %res
285}
286
287define <vscale x 2 x double> @ld1d_f64_inbound(<vscale x 2 x i1> %pg, ptr %a) {
288; CHECK-LABEL: ld1d_f64_inbound:
289; CHECK:       // %bb.0:
290; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, #1, mul vl]
291; CHECK-NEXT:    ret
292  %base = getelementptr <vscale x 2 x double>, ptr %a, i64 1
293  %base_scalar = bitcast ptr %base to ptr
294  %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.nxv2f64(<vscale x 2 x i1> %pg, ptr %base_scalar)
295  ret <vscale x 2 x double> %load
296}
297
298declare <vscale x 16 x i8> @llvm.aarch64.sve.ld1.nxv16i8(<vscale x 16 x i1>, ptr)
299
300declare <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1>, ptr)
301declare <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1>, ptr)
302declare <vscale x 8 x half> @llvm.aarch64.sve.ld1.nxv8f16(<vscale x 8 x i1>, ptr)
303declare <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1.nxv8bf16(<vscale x 8 x i1>, ptr)
304
305declare <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1>, ptr)
306declare <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1>, ptr)
307declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1>, ptr)
308declare <vscale x 4 x float> @llvm.aarch64.sve.ld1.nxv4f32(<vscale x 4 x i1>, ptr)
309
310declare <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1>, ptr)
311declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1>, ptr)
312declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1>, ptr)
313declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.nxv2i64(<vscale x 2 x i1>, ptr)
314declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.nxv2f64(<vscale x 2 x i1>, ptr)
315
316; +bf16 is required for the bfloat version.
317attributes #0 = { "target-features"="+sve,+bf16" }
318