xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll (revision 341d674b6f1863d027ed30c44a14cd32599eb42d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s
3
4; Range testing for the immediate in the reg+imm(mulvl) addressing
5; mode is done only for one instruction. The rest of the instrucions
6; test only one immediate value in bound.
7
8define <vscale x 16 x i8> @ldnf1b(<vscale x 16 x i1> %pg, ptr %a) {
9; CHECK-LABEL: ldnf1b:
10; CHECK:       // %bb.0:
11; CHECK-NEXT:    ldnf1b { z0.b }, p0/z, [x0]
12; CHECK-NEXT:    ret
13  %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, ptr %a)
14  ret <vscale x 16 x i8> %load
15}
16
17define <vscale x 16 x i8> @ldnf1b_out_of_lower_bound(<vscale x 16 x i1> %pg, ptr %a) {
18; CHECK-LABEL: ldnf1b_out_of_lower_bound:
19; CHECK:       // %bb.0:
20; CHECK-NEXT:    rdvl x8, #-9
21; CHECK-NEXT:    add x8, x0, x8
22; CHECK-NEXT:    ldnf1b { z0.b }, p0/z, [x8]
23; CHECK-NEXT:    ret
24  %base = getelementptr <vscale x 16 x i8>, ptr %a, i64 -9
25  %base_scalar = bitcast ptr %base to ptr
26  %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, ptr %base_scalar)
27  ret <vscale x 16 x i8> %load
28}
29
30define <vscale x 16 x i8> @ldnf1b_lower_bound(<vscale x 16 x i1> %pg, ptr %a) {
31; CHECK-LABEL: ldnf1b_lower_bound:
32; CHECK:       // %bb.0:
33; CHECK-NEXT:    ldnf1b { z0.b }, p0/z, [x0, #-8, mul vl]
34; CHECK-NEXT:    ret
35  %base = getelementptr <vscale x 16 x i8>, ptr %a, i64 -8
36  %base_scalar = bitcast ptr %base to ptr
37  %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, ptr %base_scalar)
38  ret <vscale x 16 x i8> %load
39}
40
41define <vscale x 16 x i8> @ldnf1b_inbound(<vscale x 16 x i1> %pg, ptr %a) {
42; CHECK-LABEL: ldnf1b_inbound:
43; CHECK:       // %bb.0:
44; CHECK-NEXT:    ldnf1b { z0.b }, p0/z, [x0, #1, mul vl]
45; CHECK-NEXT:    ret
46  %base = getelementptr <vscale x 16 x i8>, ptr %a, i64 1
47  %base_scalar = bitcast ptr %base to ptr
48  %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, ptr %base_scalar)
49  ret <vscale x 16 x i8> %load
50}
51
52define <vscale x 16 x i8> @ldnf1b_upper_bound(<vscale x 16 x i1> %pg, ptr %a) {
53; CHECK-LABEL: ldnf1b_upper_bound:
54; CHECK:       // %bb.0:
55; CHECK-NEXT:    ldnf1b { z0.b }, p0/z, [x0, #7, mul vl]
56; CHECK-NEXT:    ret
57  %base = getelementptr <vscale x 16 x i8>, ptr %a, i64 7
58  %base_scalar = bitcast ptr %base to ptr
59  %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, ptr %base_scalar)
60  ret <vscale x 16 x i8> %load
61}
62
63define <vscale x 16 x i8> @ldnf1b_out_of_upper_bound(<vscale x 16 x i1> %pg, ptr %a) {
64; CHECK-LABEL: ldnf1b_out_of_upper_bound:
65; CHECK:       // %bb.0:
66; CHECK-NEXT:    rdvl x8, #8
67; CHECK-NEXT:    add x8, x0, x8
68; CHECK-NEXT:    ldnf1b { z0.b }, p0/z, [x8]
69; CHECK-NEXT:    ret
70  %base = getelementptr <vscale x 16 x i8>, ptr %a, i64 8
71  %base_scalar = bitcast ptr %base to ptr
72  %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, ptr %base_scalar)
73  ret <vscale x 16 x i8> %load
74}
75
76define <vscale x 8 x i16> @ldnf1b_h(<vscale x 8 x i1> %pg, ptr %a) {
77; CHECK-LABEL: ldnf1b_h:
78; CHECK:       // %bb.0:
79; CHECK-NEXT:    ldnf1b { z0.h }, p0/z, [x0]
80; CHECK-NEXT:    ret
81  %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> %pg, ptr %a)
82  %res = zext <vscale x 8 x i8> %load to <vscale x 8 x i16>
83  ret <vscale x 8 x i16> %res
84}
85
86define <vscale x 8 x i16> @ldnf1b_h_inbound(<vscale x 8 x i1> %pg, ptr %a) {
87; CHECK-LABEL: ldnf1b_h_inbound:
88; CHECK:       // %bb.0:
89; CHECK-NEXT:    ldnf1b { z0.h }, p0/z, [x0, #7, mul vl]
90; CHECK-NEXT:    ret
91  %base = getelementptr <vscale x 8 x i8>, ptr %a, i64 7
92  %base_scalar = bitcast ptr %base to ptr
93  %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> %pg, ptr %base_scalar)
94  %res = zext <vscale x 8 x i8> %load to <vscale x 8 x i16>
95  ret <vscale x 8 x i16> %res
96}
97
98define <vscale x 8 x i16> @ldnf1sb_h(<vscale x 8 x i1> %pg, ptr %a) {
99; CHECK-LABEL: ldnf1sb_h:
100; CHECK:       // %bb.0:
101; CHECK-NEXT:    ldnf1sb { z0.h }, p0/z, [x0]
102; CHECK-NEXT:    ret
103  %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> %pg, ptr %a)
104  %res = sext <vscale x 8 x i8> %load to <vscale x 8 x i16>
105  ret <vscale x 8 x i16> %res
106}
107
108define <vscale x 8 x i16> @ldnf1sb_h_inbound(<vscale x 8 x i1> %pg, ptr %a) {
109; CHECK-LABEL: ldnf1sb_h_inbound:
110; CHECK:       // %bb.0:
111; CHECK-NEXT:    ldnf1sb { z0.h }, p0/z, [x0, #7, mul vl]
112; CHECK-NEXT:    ret
113  %base = getelementptr <vscale x 8 x i8>, ptr %a, i64 7
114  %base_scalar = bitcast ptr %base to ptr
115  %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> %pg, ptr %base_scalar)
116  %res = sext <vscale x 8 x i8> %load to <vscale x 8 x i16>
117  ret <vscale x 8 x i16> %res
118}
119
120define <vscale x 8 x i16> @ldnf1h(<vscale x 8 x i1> %pg, ptr %a) {
121; CHECK-LABEL: ldnf1h:
122; CHECK:       // %bb.0:
123; CHECK-NEXT:    ldnf1h { z0.h }, p0/z, [x0]
124; CHECK-NEXT:    ret
125  %load = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> %pg, ptr %a)
126  ret <vscale x 8 x i16> %load
127}
128
129define <vscale x 8 x i16> @ldnf1h_inbound(<vscale x 8 x i1> %pg, ptr %a) {
130; CHECK-LABEL: ldnf1h_inbound:
131; CHECK:       // %bb.0:
132; CHECK-NEXT:    ldnf1h { z0.h }, p0/z, [x0, #1, mul vl]
133; CHECK-NEXT:    ret
134  %base = getelementptr <vscale x 8 x i16>, ptr %a, i64 1
135  %base_scalar = bitcast ptr %base to ptr
136  %load = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> %pg, ptr %base_scalar)
137  ret <vscale x 8 x i16> %load
138}
139
140define <vscale x 8 x half> @ldnf1h_f16(<vscale x 8 x i1> %pg, ptr %a) {
141; CHECK-LABEL: ldnf1h_f16:
142; CHECK:       // %bb.0:
143; CHECK-NEXT:    ldnf1h { z0.h }, p0/z, [x0]
144; CHECK-NEXT:    ret
145  %load = call <vscale x 8 x half> @llvm.aarch64.sve.ldnf1.nxv8f16(<vscale x 8 x i1> %pg, ptr %a)
146  ret <vscale x 8 x half> %load
147}
148
149define <vscale x 8 x bfloat> @ldnf1h_bf16(<vscale x 8 x i1> %pg, ptr %a) #0 {
150; CHECK-LABEL: ldnf1h_bf16:
151; CHECK:       // %bb.0:
152; CHECK-NEXT:    ldnf1h { z0.h }, p0/z, [x0]
153; CHECK-NEXT:    ret
154  %load = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnf1.nxv8bf16(<vscale x 8 x i1> %pg, ptr %a)
155  ret <vscale x 8 x bfloat> %load
156}
157
158define <vscale x 8 x half> @ldnf1h_f16_inbound(<vscale x 8 x i1> %pg, ptr %a) {
159; CHECK-LABEL: ldnf1h_f16_inbound:
160; CHECK:       // %bb.0:
161; CHECK-NEXT:    ldnf1h { z0.h }, p0/z, [x0, #1, mul vl]
162; CHECK-NEXT:    ret
163  %base = getelementptr <vscale x 8 x half>, ptr %a, i64 1
164  %base_scalar = bitcast ptr %base to ptr
165  %load = call <vscale x 8 x half> @llvm.aarch64.sve.ldnf1.nxv8f16(<vscale x 8 x i1> %pg, ptr %base_scalar)
166  ret <vscale x 8 x half> %load
167}
168
169define <vscale x 8 x bfloat> @ldnf1h_bf16_inbound(<vscale x 8 x i1> %pg, ptr %a) #0 {
170; CHECK-LABEL: ldnf1h_bf16_inbound:
171; CHECK:       // %bb.0:
172; CHECK-NEXT:    ldnf1h { z0.h }, p0/z, [x0, #1, mul vl]
173; CHECK-NEXT:    ret
174  %base = getelementptr <vscale x 8 x bfloat>, ptr %a, i64 1
175  %base_scalar = bitcast ptr %base to ptr
176  %load = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnf1.nxv8bf16(<vscale x 8 x i1> %pg, ptr %base_scalar)
177  ret <vscale x 8 x bfloat> %load
178}
179
180define <vscale x 4 x i32> @ldnf1b_s(<vscale x 4 x i1> %pg, ptr %a) {
181; CHECK-LABEL: ldnf1b_s:
182; CHECK:       // %bb.0:
183; CHECK-NEXT:    ldnf1b { z0.s }, p0/z, [x0]
184; CHECK-NEXT:    ret
185  %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> %pg, ptr %a)
186  %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
187  ret <vscale x 4 x i32> %res
188}
189
190define <vscale x 4 x i32> @ldnf1b_s_inbound(<vscale x 4 x i1> %pg, ptr %a) {
191; CHECK-LABEL: ldnf1b_s_inbound:
192; CHECK:       // %bb.0:
193; CHECK-NEXT:    ldnf1b { z0.s }, p0/z, [x0, #7, mul vl]
194; CHECK-NEXT:    ret
195  %base = getelementptr <vscale x 4 x i8>, ptr %a, i64 7
196  %base_scalar = bitcast ptr %base to ptr
197  %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> %pg, ptr %base_scalar)
198  %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
199  ret <vscale x 4 x i32> %res
200}
201
202define <vscale x 4 x i32> @ldnf1sb_s(<vscale x 4 x i1> %pg, ptr %a) {
203; CHECK-LABEL: ldnf1sb_s:
204; CHECK:       // %bb.0:
205; CHECK-NEXT:    ldnf1sb { z0.s }, p0/z, [x0]
206; CHECK-NEXT:    ret
207  %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> %pg, ptr %a)
208  %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
209  ret <vscale x 4 x i32> %res
210}
211
212define <vscale x 4 x i32> @ldnf1sb_s_inbound(<vscale x 4 x i1> %pg, ptr %a) {
213; CHECK-LABEL: ldnf1sb_s_inbound:
214; CHECK:       // %bb.0:
215; CHECK-NEXT:    ldnf1sb { z0.s }, p0/z, [x0, #7, mul vl]
216; CHECK-NEXT:    ret
217  %base = getelementptr <vscale x 4 x i8>, ptr %a, i64 7
218  %base_scalar = bitcast ptr %base to ptr
219  %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> %pg, ptr %base_scalar)
220  %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
221  ret <vscale x 4 x i32> %res
222}
223
224define <vscale x 4 x i32> @ldnf1h_s(<vscale x 4 x i1> %pg, ptr %a) {
225; CHECK-LABEL: ldnf1h_s:
226; CHECK:       // %bb.0:
227; CHECK-NEXT:    ldnf1h { z0.s }, p0/z, [x0]
228; CHECK-NEXT:    ret
229  %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %pg, ptr %a)
230  %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
231  ret <vscale x 4 x i32> %res
232}
233
234define <vscale x 4 x i32> @ldnf1h_s_inbound(<vscale x 4 x i1> %pg, ptr %a) {
235; CHECK-LABEL: ldnf1h_s_inbound:
236; CHECK:       // %bb.0:
237; CHECK-NEXT:    ldnf1h { z0.s }, p0/z, [x0, #7, mul vl]
238; CHECK-NEXT:    ret
239  %base = getelementptr <vscale x 4 x i16>, ptr %a, i64 7
240  %base_scalar = bitcast ptr %base to ptr
241  %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %pg, ptr %base_scalar)
242  %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
243  ret <vscale x 4 x i32> %res
244}
245
246define <vscale x 4 x i32> @ldnf1sh_s(<vscale x 4 x i1> %pg, ptr %a) {
247; CHECK-LABEL: ldnf1sh_s:
248; CHECK:       // %bb.0:
249; CHECK-NEXT:    ldnf1sh { z0.s }, p0/z, [x0]
250; CHECK-NEXT:    ret
251  %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %pg, ptr %a)
252  %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
253  ret <vscale x 4 x i32> %res
254}
255
256define <vscale x 4 x i32> @ldnf1sh_s_inbound(<vscale x 4 x i1> %pg, ptr %a) {
257; CHECK-LABEL: ldnf1sh_s_inbound:
258; CHECK:       // %bb.0:
259; CHECK-NEXT:    ldnf1sh { z0.s }, p0/z, [x0, #7, mul vl]
260; CHECK-NEXT:    ret
261  %base = getelementptr <vscale x 4 x i16>, ptr %a, i64 7
262  %base_scalar = bitcast ptr %base to ptr
263  %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %pg, ptr %base_scalar)
264  %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
265  ret <vscale x 4 x i32> %res
266}
267
268define <vscale x 4 x i32> @ldnf1w(<vscale x 4 x i1> %pg, ptr %a) {
269; CHECK-LABEL: ldnf1w:
270; CHECK:       // %bb.0:
271; CHECK-NEXT:    ldnf1w { z0.s }, p0/z, [x0]
272; CHECK-NEXT:    ret
273  %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> %pg, ptr %a)
274  ret <vscale x 4 x i32> %load
275}
276
277define <vscale x 4 x i32> @ldnf1w_inbound(<vscale x 4 x i1> %pg, ptr %a) {
278; CHECK-LABEL: ldnf1w_inbound:
279; CHECK:       // %bb.0:
280; CHECK-NEXT:    ldnf1w { z0.s }, p0/z, [x0, #7, mul vl]
281; CHECK-NEXT:    ret
282  %base = getelementptr <vscale x 4 x i32>, ptr %a, i64 7
283  %base_scalar = bitcast ptr %base to ptr
284  %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> %pg, ptr %base_scalar)
285  ret <vscale x 4 x i32> %load
286}
287
288define <vscale x 4 x float> @ldnf1w_f32(<vscale x 4 x i1> %pg, ptr %a) {
289; CHECK-LABEL: ldnf1w_f32:
290; CHECK:       // %bb.0:
291; CHECK-NEXT:    ldnf1w { z0.s }, p0/z, [x0]
292; CHECK-NEXT:    ret
293  %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldnf1.nxv4f32(<vscale x 4 x i1> %pg, ptr %a)
294  ret <vscale x 4 x float> %load
295}
296
297define <vscale x 4 x float> @ldnf1w_f32_inbound(<vscale x 4 x i1> %pg, ptr %a) {
298; CHECK-LABEL: ldnf1w_f32_inbound:
299; CHECK:       // %bb.0:
300; CHECK-NEXT:    ldnf1w { z0.s }, p0/z, [x0, #7, mul vl]
301; CHECK-NEXT:    ret
302  %base = getelementptr <vscale x 4 x float>, ptr %a, i64 7
303  %base_scalar = bitcast ptr %base to ptr
304  %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldnf1.nxv4f32(<vscale x 4 x i1> %pg, ptr %base_scalar)
305  ret <vscale x 4 x float> %load
306}
307
308define <vscale x 2 x i64> @ldnf1b_d(<vscale x 2 x i1> %pg, ptr %a) {
309; CHECK-LABEL: ldnf1b_d:
310; CHECK:       // %bb.0:
311; CHECK-NEXT:    ldnf1b { z0.d }, p0/z, [x0]
312; CHECK-NEXT:    ret
313  %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> %pg, ptr %a)
314  %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
315  ret <vscale x 2 x i64> %res
316}
317
318define <vscale x 2 x i64> @ldnf1b_d_inbound(<vscale x 2 x i1> %pg, ptr %a) {
319; CHECK-LABEL: ldnf1b_d_inbound:
320; CHECK:       // %bb.0:
321; CHECK-NEXT:    ldnf1b { z0.d }, p0/z, [x0, #7, mul vl]
322; CHECK-NEXT:    ret
323  %base = getelementptr <vscale x 2 x i8>, ptr %a, i64 7
324  %base_scalar = bitcast ptr %base to ptr
325  %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> %pg, ptr %base_scalar)
326  %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
327  ret <vscale x 2 x i64> %res
328}
329
330define <vscale x 2 x i64> @ldnf1sb_d(<vscale x 2 x i1> %pg, ptr %a) {
331; CHECK-LABEL: ldnf1sb_d:
332; CHECK:       // %bb.0:
333; CHECK-NEXT:    ldnf1sb { z0.d }, p0/z, [x0]
334; CHECK-NEXT:    ret
335  %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> %pg, ptr %a)
336  %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
337  ret <vscale x 2 x i64> %res
338}
339
340define <vscale x 2 x i64> @ldnf1sb_d_inbound(<vscale x 2 x i1> %pg, ptr %a) {
341; CHECK-LABEL: ldnf1sb_d_inbound:
342; CHECK:       // %bb.0:
343; CHECK-NEXT:    ldnf1sb { z0.d }, p0/z, [x0, #7, mul vl]
344; CHECK-NEXT:    ret
345  %base = getelementptr <vscale x 2 x i8>, ptr %a, i64 7
346  %base_scalar = bitcast ptr %base to ptr
347  %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> %pg, ptr %base_scalar)
348  %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
349  ret <vscale x 2 x i64> %res
350}
351
352define <vscale x 2 x i64> @ldnf1h_d(<vscale x 2 x i1> %pg, ptr %a) {
353; CHECK-LABEL: ldnf1h_d:
354; CHECK:       // %bb.0:
355; CHECK-NEXT:    ldnf1h { z0.d }, p0/z, [x0]
356; CHECK-NEXT:    ret
357  %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %pg, ptr %a)
358  %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
359  ret <vscale x 2 x i64> %res
360}
361
362define <vscale x 2 x i64> @ldnf1h_d_inbound(<vscale x 2 x i1> %pg, ptr %a) {
363; CHECK-LABEL: ldnf1h_d_inbound:
364; CHECK:       // %bb.0:
365; CHECK-NEXT:    ldnf1h { z0.d }, p0/z, [x0, #7, mul vl]
366; CHECK-NEXT:    ret
367  %base = getelementptr <vscale x 2 x i16>, ptr %a, i64 7
368  %base_scalar = bitcast ptr %base to ptr
369  %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %pg, ptr %base_scalar)
370  %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
371  ret <vscale x 2 x i64> %res
372}
373
374define <vscale x 2 x i64> @ldnf1sh_d(<vscale x 2 x i1> %pg, ptr %a) {
375; CHECK-LABEL: ldnf1sh_d:
376; CHECK:       // %bb.0:
377; CHECK-NEXT:    ldnf1sh { z0.d }, p0/z, [x0]
378; CHECK-NEXT:    ret
379  %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %pg, ptr %a)
380  %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
381  ret <vscale x 2 x i64> %res
382}
383
384define <vscale x 2 x i64> @ldnf1sh_d_inbound(<vscale x 2 x i1> %pg, ptr %a) {
385; CHECK-LABEL: ldnf1sh_d_inbound:
386; CHECK:       // %bb.0:
387; CHECK-NEXT:    ldnf1sh { z0.d }, p0/z, [x0, #7, mul vl]
388; CHECK-NEXT:    ret
389  %base = getelementptr <vscale x 2 x i16>, ptr %a, i64 7
390  %base_scalar = bitcast ptr %base to ptr
391  %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %pg, ptr %base_scalar)
392  %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
393  ret <vscale x 2 x i64> %res
394}
395
396define <vscale x 2 x i64> @ldnf1w_d(<vscale x 2 x i1> %pg, ptr %a) {
397; CHECK-LABEL: ldnf1w_d:
398; CHECK:       // %bb.0:
399; CHECK-NEXT:    ldnf1w { z0.d }, p0/z, [x0]
400; CHECK-NEXT:    ret
401  %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %pg, ptr %a)
402  %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
403  ret <vscale x 2 x i64> %res
404}
405
406define <vscale x 2 x i64> @ldnf1w_d_inbound(<vscale x 2 x i1> %pg, ptr %a) {
407; CHECK-LABEL: ldnf1w_d_inbound:
408; CHECK:       // %bb.0:
409; CHECK-NEXT:    ldnf1w { z0.d }, p0/z, [x0, #7, mul vl]
410; CHECK-NEXT:    ret
411  %base = getelementptr <vscale x 2 x i32>, ptr %a, i64 7
412  %base_scalar = bitcast ptr %base to ptr
413  %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %pg, ptr %base_scalar)
414  %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
415  ret <vscale x 2 x i64> %res
416}
417
418define <vscale x 2 x i64> @ldnf1sw_d(<vscale x 2 x i1> %pg, ptr %a) {
419; CHECK-LABEL: ldnf1sw_d:
420; CHECK:       // %bb.0:
421; CHECK-NEXT:    ldnf1sw { z0.d }, p0/z, [x0]
422; CHECK-NEXT:    ret
423  %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %pg, ptr %a)
424  %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
425  ret <vscale x 2 x i64> %res
426}
427
428define <vscale x 2 x i64> @ldnf1sw_d_inbound(<vscale x 2 x i1> %pg, ptr %a) {
429; CHECK-LABEL: ldnf1sw_d_inbound:
430; CHECK:       // %bb.0:
431; CHECK-NEXT:    ldnf1sw { z0.d }, p0/z, [x0, #7, mul vl]
432; CHECK-NEXT:    ret
433  %base = getelementptr <vscale x 2 x i32>, ptr %a, i64 7
434  %base_scalar = bitcast ptr %base to ptr
435  %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %pg, ptr %base_scalar)
436  %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
437  ret <vscale x 2 x i64> %res
438}
439
440define <vscale x 2 x i64> @ldnf1d(<vscale x 2 x i1> %pg, ptr %a) {
441; CHECK-LABEL: ldnf1d:
442; CHECK:       // %bb.0:
443; CHECK-NEXT:    ldnf1d { z0.d }, p0/z, [x0]
444; CHECK-NEXT:    ret
445  %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> %pg, ptr %a)
446  ret <vscale x 2 x i64> %load
447}
448
449define <vscale x 2 x i64> @ldnf1d_inbound(<vscale x 2 x i1> %pg, ptr %a) {
450; CHECK-LABEL: ldnf1d_inbound:
451; CHECK:       // %bb.0:
452; CHECK-NEXT:    ldnf1d { z0.d }, p0/z, [x0, #1, mul vl]
453; CHECK-NEXT:    ret
454  %base = getelementptr <vscale x 2 x i64>, ptr %a, i64 1
455  %base_scalar = bitcast ptr %base to ptr
456  %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> %pg, ptr %base_scalar)
457  ret <vscale x 2 x i64> %load
458}
459
460define <vscale x 2 x double> @ldnf1d_f64(<vscale x 2 x i1> %pg, ptr %a) {
461; CHECK-LABEL: ldnf1d_f64:
462; CHECK:       // %bb.0:
463; CHECK-NEXT:    ldnf1d { z0.d }, p0/z, [x0]
464; CHECK-NEXT:    ret
465  %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldnf1.nxv2f64(<vscale x 2 x i1> %pg, ptr %a)
466  ret <vscale x 2 x double> %load
467}
468
469define <vscale x 2 x double> @ldnf1d_f64_inbound(<vscale x 2 x i1> %pg, ptr %a) {
470; CHECK-LABEL: ldnf1d_f64_inbound:
471; CHECK:       // %bb.0:
472; CHECK-NEXT:    ldnf1d { z0.d }, p0/z, [x0, #1, mul vl]
473; CHECK-NEXT:    ret
474  %base = getelementptr <vscale x 2 x double>, ptr %a, i64 1
475  %base_scalar = bitcast ptr %base to ptr
476  %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldnf1.nxv2f64(<vscale x 2 x i1> %pg, ptr %base_scalar)
477  ret <vscale x 2 x double> %load
478}
479
480declare <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1>, ptr)
481
482declare <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1>, ptr)
483declare <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1>, ptr)
484declare <vscale x 8 x half> @llvm.aarch64.sve.ldnf1.nxv8f16(<vscale x 8 x i1>, ptr)
485declare <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnf1.nxv8bf16(<vscale x 8 x i1>, ptr)
486
487declare <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1>, ptr)
488declare <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1>, ptr)
489declare <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1>, ptr)
490declare <vscale x 4 x float> @llvm.aarch64.sve.ldnf1.nxv4f32(<vscale x 4 x i1>, ptr)
491
492declare <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1>, ptr)
493declare <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1>, ptr)
494declare <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1>, ptr)
495declare <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1>, ptr)
496declare <vscale x 2 x double> @llvm.aarch64.sve.ldnf1.nxv2f64(<vscale x 2 x i1>, ptr)
497
498; +bf16 is required for the bfloat version.
499attributes #0 = { "target-features"="+sve,+bf16" }
500