xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll (revision 341d674b6f1863d027ed30c44a14cd32599eb42d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s
3
4;
5; LDFF1B
6;
7
8define <vscale x 16 x i8> @ldff1b(<vscale x 16 x i1> %pg, ptr %a) {
9; CHECK-LABEL: ldff1b:
10; CHECK:       // %bb.0:
11; CHECK-NEXT:    ldff1b { z0.b }, p0/z, [x0]
12; CHECK-NEXT:    ret
13  %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> %pg, ptr %a)
14  ret <vscale x 16 x i8> %load
15}
16
17define <vscale x 16 x i8> @ldff1b_reg(<vscale x 16 x i1> %pg, ptr %a, i64 %offset) {
18; CHECK-LABEL: ldff1b_reg:
19; CHECK:       // %bb.0:
20; CHECK-NEXT:    ldff1b { z0.b }, p0/z, [x0, x1]
21; CHECK-NEXT:    ret
22  %base = getelementptr i8, ptr %a, i64 %offset
23  %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> %pg, ptr %base)
24  ret <vscale x 16 x i8> %load
25}
26
27define <vscale x 8 x i16> @ldff1b_h(<vscale x 8 x i1> %pg, ptr %a) {
28; CHECK-LABEL: ldff1b_h:
29; CHECK:       // %bb.0:
30; CHECK-NEXT:    ldff1b { z0.h }, p0/z, [x0]
31; CHECK-NEXT:    ret
32  %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> %pg, ptr %a)
33  %res = zext <vscale x 8 x i8> %load to <vscale x 8 x i16>
34  ret <vscale x 8 x i16> %res
35}
36
37define <vscale x 8 x i16> @ldff1b_h_reg(<vscale x 8 x i1> %pg, ptr %a, i64 %offset) {
38; CHECK-LABEL: ldff1b_h_reg:
39; CHECK:       // %bb.0:
40; CHECK-NEXT:    ldff1b { z0.h }, p0/z, [x0, x1]
41; CHECK-NEXT:    ret
42  %base = getelementptr i8, ptr %a, i64 %offset
43  %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> %pg, ptr %base)
44  %res = zext <vscale x 8 x i8> %load to <vscale x 8 x i16>
45  ret <vscale x 8 x i16> %res
46}
47
48define <vscale x 4 x i32> @ldff1b_s(<vscale x 4 x i1> %pg, ptr %a) {
49; CHECK-LABEL: ldff1b_s:
50; CHECK:       // %bb.0:
51; CHECK-NEXT:    ldff1b { z0.s }, p0/z, [x0]
52; CHECK-NEXT:    ret
53  %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> %pg, ptr %a)
54  %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
55  ret <vscale x 4 x i32> %res
56}
57
58define <vscale x 4 x i32> @ldff1b_s_reg(<vscale x 4 x i1> %pg, ptr %a, i64 %offset) {
59; CHECK-LABEL: ldff1b_s_reg:
60; CHECK:       // %bb.0:
61; CHECK-NEXT:    ldff1b { z0.s }, p0/z, [x0, x1]
62; CHECK-NEXT:    ret
63  %base = getelementptr i8, ptr %a, i64 %offset
64  %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> %pg, ptr %base)
65  %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
66  ret <vscale x 4 x i32> %res
67}
68
69define <vscale x 2 x i64> @ldff1b_d(<vscale x 2 x i1> %pg, ptr %a) {
70; CHECK-LABEL: ldff1b_d:
71; CHECK:       // %bb.0:
72; CHECK-NEXT:    ldff1b { z0.d }, p0/z, [x0]
73; CHECK-NEXT:    ret
74  %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> %pg, ptr %a)
75  %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
76  ret <vscale x 2 x i64> %res
77}
78
79define <vscale x 2 x i64> @ldff1b_d_reg(<vscale x 2 x i1> %pg, ptr %a, i64 %offset) {
80; CHECK-LABEL: ldff1b_d_reg:
81; CHECK:       // %bb.0:
82; CHECK-NEXT:    ldff1b { z0.d }, p0/z, [x0, x1]
83; CHECK-NEXT:    ret
84  %base = getelementptr i8, ptr %a, i64 %offset
85  %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> %pg, ptr %base)
86  %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
87  ret <vscale x 2 x i64> %res
88}
89
90;
91; LDFF1SB
92;
93
94define <vscale x 8 x i16> @ldff1sb_h(<vscale x 8 x i1> %pg, ptr %a) {
95; CHECK-LABEL: ldff1sb_h:
96; CHECK:       // %bb.0:
97; CHECK-NEXT:    ldff1sb { z0.h }, p0/z, [x0]
98; CHECK-NEXT:    ret
99  %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> %pg, ptr %a)
100  %res = sext <vscale x 8 x i8> %load to <vscale x 8 x i16>
101  ret <vscale x 8 x i16> %res
102}
103
104define <vscale x 8 x i16> @ldff1sb_h_reg(<vscale x 8 x i1> %pg, ptr %a, i64 %offset) {
105; CHECK-LABEL: ldff1sb_h_reg:
106; CHECK:       // %bb.0:
107; CHECK-NEXT:    ldff1sb { z0.h }, p0/z, [x0, x1]
108; CHECK-NEXT:    ret
109  %base = getelementptr i8, ptr %a, i64 %offset
110  %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> %pg, ptr %base)
111  %res = sext <vscale x 8 x i8> %load to <vscale x 8 x i16>
112  ret <vscale x 8 x i16> %res
113}
114
115define <vscale x 4 x i32> @ldff1sb_s(<vscale x 4 x i1> %pg, ptr %a) {
116; CHECK-LABEL: ldff1sb_s:
117; CHECK:       // %bb.0:
118; CHECK-NEXT:    ldff1sb { z0.s }, p0/z, [x0]
119; CHECK-NEXT:    ret
120  %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> %pg, ptr %a)
121  %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
122  ret <vscale x 4 x i32> %res
123}
124
125define <vscale x 4 x i32> @ldff1sb_s_reg(<vscale x 4 x i1> %pg, ptr %a, i64 %offset) {
126; CHECK-LABEL: ldff1sb_s_reg:
127; CHECK:       // %bb.0:
128; CHECK-NEXT:    ldff1sb { z0.s }, p0/z, [x0, x1]
129; CHECK-NEXT:    ret
130  %base = getelementptr i8, ptr %a, i64 %offset
131  %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> %pg, ptr %base)
132  %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
133  ret <vscale x 4 x i32> %res
134}
135
136define <vscale x 2 x i64> @ldff1sb_d(<vscale x 2 x i1> %pg, ptr %a) {
137; CHECK-LABEL: ldff1sb_d:
138; CHECK:       // %bb.0:
139; CHECK-NEXT:    ldff1sb { z0.d }, p0/z, [x0]
140; CHECK-NEXT:    ret
141  %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> %pg, ptr %a)
142  %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
143  ret <vscale x 2 x i64> %res
144}
145
146define <vscale x 2 x i64> @ldff1sb_d_reg(<vscale x 2 x i1> %pg, ptr %a, i64 %offset) {
147; CHECK-LABEL: ldff1sb_d_reg:
148; CHECK:       // %bb.0:
149; CHECK-NEXT:    ldff1sb { z0.d }, p0/z, [x0, x1]
150; CHECK-NEXT:    ret
151  %base = getelementptr i8, ptr %a, i64 %offset
152  %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> %pg, ptr %base)
153  %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
154  ret <vscale x 2 x i64> %res
155}
156
157;
158; LDFF1H
159;
160
161define <vscale x 8 x i16> @ldff1h(<vscale x 8 x i1> %pg, ptr %a) {
162; CHECK-LABEL: ldff1h:
163; CHECK:       // %bb.0:
164; CHECK-NEXT:    ldff1h { z0.h }, p0/z, [x0]
165; CHECK-NEXT:    ret
166  %load = call <vscale x 8 x i16> @llvm.aarch64.sve.ldff1.nxv8i16(<vscale x 8 x i1> %pg, ptr %a)
167  ret <vscale x 8 x i16> %load
168}
169
170define <vscale x 8 x i16> @ldff1h_reg(<vscale x 8 x i1> %pg, ptr %a, i64 %offset) {
171; CHECK-LABEL: ldff1h_reg:
172; CHECK:       // %bb.0:
173; CHECK-NEXT:    ldff1h { z0.h }, p0/z, [x0, x1, lsl #1]
174; CHECK-NEXT:    ret
175  %base = getelementptr i16, ptr %a, i64 %offset
176  %load = call <vscale x 8 x i16> @llvm.aarch64.sve.ldff1.nxv8i16(<vscale x 8 x i1> %pg, ptr %base)
177  ret <vscale x 8 x i16> %load
178}
179
180define <vscale x 4 x i32> @ldff1h_s(<vscale x 4 x i1> %pg, ptr %a) {
181; CHECK-LABEL: ldff1h_s:
182; CHECK:       // %bb.0:
183; CHECK-NEXT:    ldff1h { z0.s }, p0/z, [x0]
184; CHECK-NEXT:    ret
185  %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> %pg, ptr %a)
186  %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
187  ret <vscale x 4 x i32> %res
188}
189
190define <vscale x 4 x i32> @ldff1h_s_reg(<vscale x 4 x i1> %pg, ptr %a, i64 %offset) {
191; CHECK-LABEL: ldff1h_s_reg:
192; CHECK:       // %bb.0:
193; CHECK-NEXT:    ldff1h { z0.s }, p0/z, [x0, x1, lsl #1]
194; CHECK-NEXT:    ret
195  %base = getelementptr i16, ptr %a, i64 %offset
196  %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> %pg, ptr %base)
197  %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
198  ret <vscale x 4 x i32> %res
199}
200
201define <vscale x 2 x i64> @ldff1h_d(<vscale x 2 x i1> %pg, ptr %a) {
202; CHECK-LABEL: ldff1h_d:
203; CHECK:       // %bb.0:
204; CHECK-NEXT:    ldff1h { z0.d }, p0/z, [x0]
205; CHECK-NEXT:    ret
206  %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> %pg, ptr %a)
207  %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
208  ret <vscale x 2 x i64> %res
209}
210
211define <vscale x 2 x i64> @ldff1h_d_reg(<vscale x 2 x i1> %pg, ptr %a, i64 %offset) {
212; CHECK-LABEL: ldff1h_d_reg:
213; CHECK:       // %bb.0:
214; CHECK-NEXT:    ldff1h { z0.d }, p0/z, [x0, x1, lsl #1]
215; CHECK-NEXT:    ret
216  %base = getelementptr i16, ptr %a, i64 %offset
217  %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> %pg, ptr %base)
218  %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
219  ret <vscale x 2 x i64> %res
220}
221
222define <vscale x 8 x half> @ldff1h_f16(<vscale x 8 x i1> %pg, ptr %a) {
223; CHECK-LABEL: ldff1h_f16:
224; CHECK:       // %bb.0:
225; CHECK-NEXT:    ldff1h { z0.h }, p0/z, [x0]
226; CHECK-NEXT:    ret
227  %load = call <vscale x 8 x half> @llvm.aarch64.sve.ldff1.nxv8f16(<vscale x 8 x i1> %pg, ptr %a)
228  ret <vscale x 8 x half> %load
229}
230
231define <vscale x 8 x bfloat> @ldff1h_bf16(<vscale x 8 x i1> %pg, ptr %a) #0 {
232; CHECK-LABEL: ldff1h_bf16:
233; CHECK:       // %bb.0:
234; CHECK-NEXT:    ldff1h { z0.h }, p0/z, [x0]
235; CHECK-NEXT:    ret
236  %load = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldff1.nxv8bf16(<vscale x 8 x i1> %pg, ptr %a)
237  ret <vscale x 8 x bfloat> %load
238}
239
240define <vscale x 8 x half> @ldff1h_f16_reg(<vscale x 8 x i1> %pg, ptr %a, i64 %offset) {
241; CHECK-LABEL: ldff1h_f16_reg:
242; CHECK:       // %bb.0:
243; CHECK-NEXT:    ldff1h { z0.h }, p0/z, [x0, x1, lsl #1]
244; CHECK-NEXT:    ret
245  %base = getelementptr half, ptr %a, i64 %offset
246  %load = call <vscale x 8 x half> @llvm.aarch64.sve.ldff1.nxv8f16(<vscale x 8 x i1> %pg, ptr %base)
247  ret <vscale x 8 x half> %load
248}
249
250define <vscale x 8 x bfloat> @ldff1h_bf16_reg(<vscale x 8 x i1> %pg, ptr %a, i64 %offset) #0 {
251; CHECK-LABEL: ldff1h_bf16_reg:
252; CHECK:       // %bb.0:
253; CHECK-NEXT:    ldff1h { z0.h }, p0/z, [x0, x1, lsl #1]
254; CHECK-NEXT:    ret
255  %base = getelementptr bfloat, ptr %a, i64 %offset
256  %load = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldff1.nxv8bf16(<vscale x 8 x i1> %pg, ptr %base)
257  ret <vscale x 8 x bfloat> %load
258}
259
260;
261; LDFF1SH
262;
263
264define <vscale x 4 x i32> @ldff1sh_s(<vscale x 4 x i1> %pg, ptr %a) {
265; CHECK-LABEL: ldff1sh_s:
266; CHECK:       // %bb.0:
267; CHECK-NEXT:    ldff1sh { z0.s }, p0/z, [x0]
268; CHECK-NEXT:    ret
269  %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> %pg, ptr %a)
270  %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
271  ret <vscale x 4 x i32> %res
272}
273
274define <vscale x 4 x i32> @ldff1sh_s_reg(<vscale x 4 x i1> %pg, ptr %a, i64 %offset) {
275; CHECK-LABEL: ldff1sh_s_reg:
276; CHECK:       // %bb.0:
277; CHECK-NEXT:    ldff1sh { z0.s }, p0/z, [x0, x1, lsl #1]
278; CHECK-NEXT:    ret
279  %base = getelementptr i16, ptr %a, i64 %offset
280  %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> %pg, ptr %base)
281  %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
282  ret <vscale x 4 x i32> %res
283}
284
285define <vscale x 2 x i64> @ldff1sh_d(<vscale x 2 x i1> %pg, ptr %a) {
286; CHECK-LABEL: ldff1sh_d:
287; CHECK:       // %bb.0:
288; CHECK-NEXT:    ldff1sh { z0.d }, p0/z, [x0]
289; CHECK-NEXT:    ret
290  %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> %pg, ptr %a)
291  %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
292  ret <vscale x 2 x i64> %res
293}
294
295define <vscale x 2 x i64> @ldff1sh_d_reg(<vscale x 2 x i1> %pg, ptr %a, i64 %offset) {
296; CHECK-LABEL: ldff1sh_d_reg:
297; CHECK:       // %bb.0:
298; CHECK-NEXT:    ldff1sh { z0.d }, p0/z, [x0, x1, lsl #1]
299; CHECK-NEXT:    ret
300  %base = getelementptr i16, ptr %a, i64 %offset
301  %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> %pg, ptr %base)
302  %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
303  ret <vscale x 2 x i64> %res
304}
305
306;
307; LDFF1W
308;
309
310define <vscale x 4 x i32> @ldff1w(<vscale x 4 x i1> %pg, ptr %a) {
311; CHECK-LABEL: ldff1w:
312; CHECK:       // %bb.0:
313; CHECK-NEXT:    ldff1w { z0.s }, p0/z, [x0]
314; CHECK-NEXT:    ret
315  %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.nxv4i32(<vscale x 4 x i1> %pg, ptr %a)
316  ret <vscale x 4 x i32> %load
317}
318
319define <vscale x 4 x i32> @ldff1w_reg(<vscale x 4 x i1> %pg, ptr %a, i64 %offset) {
320; CHECK-LABEL: ldff1w_reg:
321; CHECK:       // %bb.0:
322; CHECK-NEXT:    ldff1w { z0.s }, p0/z, [x0, x1, lsl #2]
323; CHECK-NEXT:    ret
324  %base = getelementptr i32, ptr %a, i64 %offset
325  %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.nxv4i32(<vscale x 4 x i1> %pg, ptr %base)
326  ret <vscale x 4 x i32> %load
327}
328
329define <vscale x 2 x i64> @ldff1w_d(<vscale x 2 x i1> %pg, ptr %a) {
330; CHECK-LABEL: ldff1w_d:
331; CHECK:       // %bb.0:
332; CHECK-NEXT:    ldff1w { z0.d }, p0/z, [x0]
333; CHECK-NEXT:    ret
334  %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> %pg, ptr %a)
335  %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
336  ret <vscale x 2 x i64> %res
337}
338
339define <vscale x 2 x i64> @ldff1w_d_reg(<vscale x 2 x i1> %pg, ptr %a, i64 %offset) {
340; CHECK-LABEL: ldff1w_d_reg:
341; CHECK:       // %bb.0:
342; CHECK-NEXT:    ldff1w { z0.d }, p0/z, [x0, x1, lsl #2]
343; CHECK-NEXT:    ret
344  %base = getelementptr i32, ptr %a, i64 %offset
345  %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> %pg, ptr %base)
346  %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
347  ret <vscale x 2 x i64> %res
348}
349
350define <vscale x 4 x float> @ldff1w_f32(<vscale x 4 x i1> %pg, ptr %a) {
351; CHECK-LABEL: ldff1w_f32:
352; CHECK:       // %bb.0:
353; CHECK-NEXT:    ldff1w { z0.s }, p0/z, [x0]
354; CHECK-NEXT:    ret
355  %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.nxv4f32(<vscale x 4 x i1> %pg, ptr %a)
356  ret <vscale x 4 x float> %load
357}
358
359define <vscale x 4 x float> @ldff1w_f32_reg(<vscale x 4 x i1> %pg, ptr %a, i64 %offset) {
360; CHECK-LABEL: ldff1w_f32_reg:
361; CHECK:       // %bb.0:
362; CHECK-NEXT:    ldff1w { z0.s }, p0/z, [x0, x1, lsl #2]
363; CHECK-NEXT:    ret
364  %base = getelementptr float, ptr %a, i64 %offset
365  %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.nxv4f32(<vscale x 4 x i1> %pg, ptr %base)
366  ret <vscale x 4 x float> %load
367}
368
369define <vscale x 2 x float> @ldff1w_2f32(<vscale x 2 x i1> %pg, ptr %a) {
370; CHECK-LABEL: ldff1w_2f32:
371; CHECK:       // %bb.0:
372; CHECK-NEXT:    ldff1w { z0.d }, p0/z, [x0]
373; CHECK-NEXT:    ret
374  %load = call <vscale x 2 x float> @llvm.aarch64.sve.ldff1.nxv2f32(<vscale x 2 x i1> %pg, ptr %a)
375  ret <vscale x 2 x float> %load
376}
377
378define <vscale x 2 x float> @ldff1w_2f32_reg(<vscale x 2 x i1> %pg, ptr %a, i64 %offset) {
379; CHECK-LABEL: ldff1w_2f32_reg:
380; CHECK:       // %bb.0:
381; CHECK-NEXT:    ldff1w { z0.d }, p0/z, [x0, x1, lsl #2]
382; CHECK-NEXT:    ret
383  %base = getelementptr float, ptr %a, i64 %offset
384  %load = call <vscale x 2 x float> @llvm.aarch64.sve.ldff1.nxv2f32(<vscale x 2 x i1> %pg, ptr %base)
385  ret <vscale x 2 x float> %load
386}
387
388;
389; LDFF1SW
390;
391
392define <vscale x 2 x i64> @ldff1sw_d(<vscale x 2 x i1> %pg, ptr %a) {
393; CHECK-LABEL: ldff1sw_d:
394; CHECK:       // %bb.0:
395; CHECK-NEXT:    ldff1sw { z0.d }, p0/z, [x0]
396; CHECK-NEXT:    ret
397  %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> %pg, ptr %a)
398  %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
399  ret <vscale x 2 x i64> %res
400}
401
402define <vscale x 2 x i64> @ldff1sw_d_reg(<vscale x 2 x i1> %pg, ptr %a, i64 %offset) {
403; CHECK-LABEL: ldff1sw_d_reg:
404; CHECK:       // %bb.0:
405; CHECK-NEXT:    ldff1sw { z0.d }, p0/z, [x0, x1, lsl #2]
406; CHECK-NEXT:    ret
407  %base = getelementptr i32, ptr %a, i64 %offset
408  %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> %pg, ptr %base)
409  %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
410  ret <vscale x 2 x i64> %res
411}
412
413;
414; LDFF1D
415;
416
417define <vscale x 2 x i64> @ldff1d(<vscale x 2 x i1> %pg, ptr %a) {
418; CHECK-LABEL: ldff1d:
419; CHECK:       // %bb.0:
420; CHECK-NEXT:    ldff1d { z0.d }, p0/z, [x0]
421; CHECK-NEXT:    ret
422  %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.nxv2i64(<vscale x 2 x i1> %pg, ptr %a)
423  ret <vscale x 2 x i64> %load
424}
425
426define <vscale x 2 x i64> @ldff1d_reg(<vscale x 2 x i1> %pg, ptr %a, i64 %offset) {
427; CHECK-LABEL: ldff1d_reg:
428; CHECK:       // %bb.0:
429; CHECK-NEXT:    ldff1d { z0.d }, p0/z, [x0, x1, lsl #3]
430; CHECK-NEXT:    ret
431  %base = getelementptr i64, ptr %a, i64 %offset
432  %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.nxv2i64(<vscale x 2 x i1> %pg, ptr %base)
433  ret <vscale x 2 x i64> %load
434}
435
436
437define <vscale x 2 x double> @ldff1d_f64(<vscale x 2 x i1> %pg, ptr %a) {
438; CHECK-LABEL: ldff1d_f64:
439; CHECK:       // %bb.0:
440; CHECK-NEXT:    ldff1d { z0.d }, p0/z, [x0]
441; CHECK-NEXT:    ret
442  %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.nxv2f64(<vscale x 2 x i1> %pg, ptr %a)
443  ret <vscale x 2 x double> %load
444}
445
446define <vscale x 2 x double> @ldff1d_f64_reg(<vscale x 2 x i1> %pg, ptr %a, i64 %offset) {
447; CHECK-LABEL: ldff1d_f64_reg:
448; CHECK:       // %bb.0:
449; CHECK-NEXT:    ldff1d { z0.d }, p0/z, [x0, x1, lsl #3]
450; CHECK-NEXT:    ret
451  %base = getelementptr double, ptr %a, i64 %offset
452  %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.nxv2f64(<vscale x 2 x i1> %pg, ptr %base)
453  ret <vscale x 2 x double> %load
454}
455
456declare <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1>, ptr)
457
458declare <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1>, ptr)
459declare <vscale x 8 x i16> @llvm.aarch64.sve.ldff1.nxv8i16(<vscale x 8 x i1>, ptr)
460declare <vscale x 8 x half> @llvm.aarch64.sve.ldff1.nxv8f16(<vscale x 8 x i1>, ptr)
461declare <vscale x 8 x bfloat> @llvm.aarch64.sve.ldff1.nxv8bf16(<vscale x 8 x i1>, ptr)
462
463declare <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1>, ptr)
464declare <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1>, ptr)
465declare <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.nxv4i32(<vscale x 4 x i1>, ptr)
466declare <vscale x 2 x float> @llvm.aarch64.sve.ldff1.nxv2f32(<vscale x 2 x i1>, ptr)
467declare <vscale x 4 x float> @llvm.aarch64.sve.ldff1.nxv4f32(<vscale x 4 x i1>, ptr)
468
469declare <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1>, ptr)
470declare <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1>, ptr)
471declare <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1>, ptr)
472declare <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.nxv2i64(<vscale x 2 x i1>, ptr)
473declare <vscale x 2 x double> @llvm.aarch64.sve.ldff1.nxv2f64(<vscale x 2 x i1>, ptr)
474
475; +bf16 is required for the bfloat version.
476attributes #0 = { "target-features"="+sve,+bf16" }
477