xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1.ll (revision 5ddce70ef0e5a641d7fea95e31fc5e2439cb98cb)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
3; RUN: llc -O0 -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
4
5;
6; LD1B
7;
8
9define <vscale x 16 x i8> @ld1b_i8(<vscale x 16 x i1> %pred, ptr %addr) {
10; CHECK-LABEL: ld1b_i8:
11; CHECK:       // %bb.0:
12; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
13; CHECK-NEXT:    ret
14  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1.nxv16i8(<vscale x 16 x i1> %pred, ptr %addr)
15  ret <vscale x 16 x i8> %res
16}
17
18define <vscale x 8 x i16> @ld1b_h(<vscale x 8 x i1> %pred, ptr %addr) {
19; CHECK-LABEL: ld1b_h:
20; CHECK:       // %bb.0:
21; CHECK-NEXT:    ld1b { z0.h }, p0/z, [x0]
22; CHECK-NEXT:    ret
23  %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1> %pred, ptr %addr)
24  %res = zext <vscale x 8 x i8> %load to <vscale x 8 x i16>
25  ret <vscale x 8 x i16> %res
26}
27
28define <vscale x 8 x i16> @ld1sb_h(<vscale x 8 x i1> %pred, ptr %addr) {
29; CHECK-LABEL: ld1sb_h:
30; CHECK:       // %bb.0:
31; CHECK-NEXT:    ld1sb { z0.h }, p0/z, [x0]
32; CHECK-NEXT:    ret
33  %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1> %pred, ptr %addr)
34  %res = sext <vscale x 8 x i8> %load to <vscale x 8 x i16>
35  ret <vscale x 8 x i16> %res
36}
37
38define <vscale x 4 x i32> @ld1b_s(<vscale x 4 x i1> %pred, ptr %addr) {
39; CHECK-LABEL: ld1b_s:
40; CHECK:       // %bb.0:
41; CHECK-NEXT:    ld1b { z0.s }, p0/z, [x0]
42; CHECK-NEXT:    ret
43  %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1> %pred, ptr %addr)
44  %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
45  ret <vscale x 4 x i32> %res
46}
47
48define <vscale x 4 x i32> @ld1sb_s(<vscale x 4 x i1> %pred, ptr %addr) {
49; CHECK-LABEL: ld1sb_s:
50; CHECK:       // %bb.0:
51; CHECK-NEXT:    ld1sb { z0.s }, p0/z, [x0]
52; CHECK-NEXT:    ret
53  %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1> %pred, ptr %addr)
54  %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
55  ret <vscale x 4 x i32> %res
56}
57
58define <vscale x 2 x i64> @ld1b_d(<vscale x 2 x i1> %pred, ptr %addr) {
59; CHECK-LABEL: ld1b_d:
60; CHECK:       // %bb.0:
61; CHECK-NEXT:    ld1b { z0.d }, p0/z, [x0]
62; CHECK-NEXT:    ret
63  %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1> %pred, ptr %addr)
64  %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
65  ret <vscale x 2 x i64> %res
66}
67
68define <vscale x 2 x i64> @ld1sb_d(<vscale x 2 x i1> %pred, ptr %addr) {
69; CHECK-LABEL: ld1sb_d:
70; CHECK:       // %bb.0:
71; CHECK-NEXT:    ld1sb { z0.d }, p0/z, [x0]
72; CHECK-NEXT:    ret
73  %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1> %pred, ptr %addr)
74  %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
75  ret <vscale x 2 x i64> %res
76}
77
78;
79; LD1H
80;
81
82define <vscale x 8 x i16> @ld1h_i16(<vscale x 8 x i1> %pred, ptr %addr) {
83; CHECK-LABEL: ld1h_i16:
84; CHECK:       // %bb.0:
85; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
86; CHECK-NEXT:    ret
87  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1> %pred, ptr %addr)
88  ret <vscale x 8 x i16> %res
89}
90
91define <vscale x 8 x half> @ld1h_f16(<vscale x 8 x i1> %pred, ptr %addr) {
92; CHECK-LABEL: ld1h_f16:
93; CHECK:       // %bb.0:
94; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
95; CHECK-NEXT:    ret
96  %res = call <vscale x 8 x half> @llvm.aarch64.sve.ld1.nxv8f16(<vscale x 8 x i1> %pred, ptr %addr)
97  ret <vscale x 8 x half> %res
98}
99
100define <vscale x 8 x bfloat> @ld1h_bf16(<vscale x 8 x i1> %pred, ptr %addr) #0 {
101; CHECK-LABEL: ld1h_bf16:
102; CHECK:       // %bb.0:
103; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
104; CHECK-NEXT:    ret
105  %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1.nxv8bf16(<vscale x 8 x i1> %pred, ptr %addr)
106  ret <vscale x 8 x bfloat> %res
107}
108
109define <vscale x 4 x i32> @ld1h_s(<vscale x 4 x i1> %pred, ptr %addr) {
110; CHECK-LABEL: ld1h_s:
111; CHECK:       // %bb.0:
112; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0]
113; CHECK-NEXT:    ret
114  %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1> %pred, ptr %addr)
115  %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
116  ret <vscale x 4 x i32> %res
117}
118
119define <vscale x 4 x i32> @ld1sh_s(<vscale x 4 x i1> %pred, ptr %addr) {
120; CHECK-LABEL: ld1sh_s:
121; CHECK:       // %bb.0:
122; CHECK-NEXT:    ld1sh { z0.s }, p0/z, [x0]
123; CHECK-NEXT:    ret
124  %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1> %pred, ptr %addr)
125  %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
126  ret <vscale x 4 x i32> %res
127}
128
129define <vscale x 2 x i64> @ld1h_d(<vscale x 2 x i1> %pred, ptr %addr) {
130; CHECK-LABEL: ld1h_d:
131; CHECK:       // %bb.0:
132; CHECK-NEXT:    ld1h { z0.d }, p0/z, [x0]
133; CHECK-NEXT:    ret
134  %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1> %pred, ptr %addr)
135  %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
136  ret <vscale x 2 x i64> %res
137}
138
139define <vscale x 2 x i64> @ld1sh_d(<vscale x 2 x i1> %pred, ptr %addr) {
140; CHECK-LABEL: ld1sh_d:
141; CHECK:       // %bb.0:
142; CHECK-NEXT:    ld1sh { z0.d }, p0/z, [x0]
143; CHECK-NEXT:    ret
144  %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1> %pred, ptr %addr)
145  %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
146  ret <vscale x 2 x i64> %res
147}
148
149;
150; LD1W
151;
152
153define <vscale x 4 x i32> @ld1w_i32(<vscale x 4 x i1> %pred, ptr %addr) {
154; CHECK-LABEL: ld1w_i32:
155; CHECK:       // %bb.0:
156; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
157; CHECK-NEXT:    ret
158  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> %pred, ptr %addr)
159  ret <vscale x 4 x i32> %res
160}
161
162define <vscale x 4 x float> @ld1w_f32(<vscale x 4 x i1> %pred, ptr %addr) {
163; CHECK-LABEL: ld1w_f32:
164; CHECK:       // %bb.0:
165; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
166; CHECK-NEXT:    ret
167  %res = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.nxv4f32(<vscale x 4 x i1> %pred, ptr %addr)
168  ret <vscale x 4 x float> %res
169}
170
171define <vscale x 2 x i64> @ld1w_d(<vscale x 2 x i1> %pred, ptr %addr) {
172; CHECK-LABEL: ld1w_d:
173; CHECK:       // %bb.0:
174; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0]
175; CHECK-NEXT:    ret
176  %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1> %pred, ptr %addr)
177  %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
178  ret <vscale x 2 x i64> %res
179}
180
181define <vscale x 2 x i64> @ld1sw_d(<vscale x 2 x i1> %pred, ptr %addr) {
182; CHECK-LABEL: ld1sw_d:
183; CHECK:       // %bb.0:
184; CHECK-NEXT:    ld1sw { z0.d }, p0/z, [x0]
185; CHECK-NEXT:    ret
186  %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1> %pred, ptr %addr)
187  %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
188  ret <vscale x 2 x i64> %res
189}
190
191;
192; LD1D
193;
194
195define <vscale x 2 x i64> @ld1d_i64(<vscale x 2 x i1> %pred, ptr %addr) {
196; CHECK-LABEL: ld1d_i64:
197; CHECK:       // %bb.0:
198; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
199; CHECK-NEXT:    ret
200  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.nxv2i64(<vscale x 2 x i1> %pred,
201                                                               ptr %addr)
202  ret <vscale x 2 x i64> %res
203}
204
205define <vscale x 2 x double> @ld1d_f64(<vscale x 2 x i1> %pred, ptr %addr) {
206; CHECK-LABEL: ld1d_f64:
207; CHECK:       // %bb.0:
208; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
209; CHECK-NEXT:    ret
210  %res = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.nxv2f64(<vscale x 2 x i1> %pred,
211                                                                  ptr %addr)
212  ret <vscale x 2 x double> %res
213}
214
215declare <vscale x 16 x i8> @llvm.aarch64.sve.ld1.nxv16i8(<vscale x 16 x i1>, ptr)
216
217declare <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1>, ptr)
218declare <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1>, ptr)
219declare <vscale x 8 x half> @llvm.aarch64.sve.ld1.nxv8f16(<vscale x 8 x i1>, ptr)
220declare <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1.nxv8bf16(<vscale x 8 x i1>, ptr)
221
222declare <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1>, ptr)
223declare <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1>, ptr)
224declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1>, ptr)
225declare <vscale x 4 x float> @llvm.aarch64.sve.ld1.nxv4f32(<vscale x 4 x i1>, ptr)
226
227declare <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1>, ptr)
228declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1>, ptr)
229declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1>, ptr)
230declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.nxv2i64(<vscale x 2 x i1>, ptr)
231declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.nxv2f64(<vscale x 2 x i1>, ptr)
232
233; +bf16 is required for the bfloat version.
234attributes #0 = { "target-features"="+sve,+bf16" }
235