xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
3; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
4; RUN: llc -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
5; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
6
7define <1 x bfloat> @masked_load_v1bf16(ptr %a, <1 x i1> %mask) {
8; CHECK-LABEL: masked_load_v1bf16:
9; CHECK:       # %bb.0:
10; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
11; CHECK-NEXT:    vle16.v v8, (a0), v0.t
12; CHECK-NEXT:    ret
13  %load = call <1 x bfloat> @llvm.masked.load.v1bf16(ptr %a, i32 8, <1 x i1> %mask, <1 x bfloat> undef)
14  ret <1 x bfloat> %load
15}
16
17define <1 x half> @masked_load_v1f16(ptr %a, <1 x i1> %mask) {
18; CHECK-LABEL: masked_load_v1f16:
19; CHECK:       # %bb.0:
20; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
21; CHECK-NEXT:    vle16.v v8, (a0), v0.t
22; CHECK-NEXT:    ret
23  %load = call <1 x half> @llvm.masked.load.v1f16(ptr %a, i32 8, <1 x i1> %mask, <1 x half> undef)
24  ret <1 x half> %load
25}
26
27define <1 x float> @masked_load_v1f32(ptr %a, <1 x i1> %mask) {
28; CHECK-LABEL: masked_load_v1f32:
29; CHECK:       # %bb.0:
30; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
31; CHECK-NEXT:    vle32.v v8, (a0), v0.t
32; CHECK-NEXT:    ret
33  %load = call <1 x float> @llvm.masked.load.v1f32(ptr %a, i32 8, <1 x i1> %mask, <1 x float> undef)
34  ret <1 x float> %load
35}
36
37define <1 x double> @masked_load_v1f64(ptr %a, <1 x i1> %mask) {
38; CHECK-LABEL: masked_load_v1f64:
39; CHECK:       # %bb.0:
40; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
41; CHECK-NEXT:    vle64.v v8, (a0), v0.t
42; CHECK-NEXT:    ret
43  %load = call <1 x double> @llvm.masked.load.v1f64(ptr %a, i32 8, <1 x i1> %mask, <1 x double> undef)
44  ret <1 x double> %load
45}
46
47define <2 x bfloat> @masked_load_v2bf16(ptr %a, <2 x i1> %mask) {
48; CHECK-LABEL: masked_load_v2bf16:
49; CHECK:       # %bb.0:
50; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
51; CHECK-NEXT:    vle16.v v8, (a0), v0.t
52; CHECK-NEXT:    ret
53  %load = call <2 x bfloat> @llvm.masked.load.v2bf16(ptr %a, i32 8, <2 x i1> %mask, <2 x bfloat> undef)
54  ret <2 x bfloat> %load
55}
56
57define <2 x half> @masked_load_v2f16(ptr %a, <2 x i1> %mask) {
58; CHECK-LABEL: masked_load_v2f16:
59; CHECK:       # %bb.0:
60; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
61; CHECK-NEXT:    vle16.v v8, (a0), v0.t
62; CHECK-NEXT:    ret
63  %load = call <2 x half> @llvm.masked.load.v2f16(ptr %a, i32 8, <2 x i1> %mask, <2 x half> undef)
64  ret <2 x half> %load
65}
66
67define <2 x float> @masked_load_v2f32(ptr %a, <2 x i1> %mask) {
68; CHECK-LABEL: masked_load_v2f32:
69; CHECK:       # %bb.0:
70; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
71; CHECK-NEXT:    vle32.v v8, (a0), v0.t
72; CHECK-NEXT:    ret
73  %load = call <2 x float> @llvm.masked.load.v2f32(ptr %a, i32 8, <2 x i1> %mask, <2 x float> undef)
74  ret <2 x float> %load
75}
76
77define <2 x double> @masked_load_v2f64(ptr %a, <2 x i1> %mask) {
78; CHECK-LABEL: masked_load_v2f64:
79; CHECK:       # %bb.0:
80; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
81; CHECK-NEXT:    vle64.v v8, (a0), v0.t
82; CHECK-NEXT:    ret
83  %load = call <2 x double> @llvm.masked.load.v2f64(ptr %a, i32 8, <2 x i1> %mask, <2 x double> undef)
84  ret <2 x double> %load
85}
86
87define <4 x bfloat> @masked_load_v4bf16(ptr %a, <4 x i1> %mask) {
88; CHECK-LABEL: masked_load_v4bf16:
89; CHECK:       # %bb.0:
90; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
91; CHECK-NEXT:    vle16.v v8, (a0), v0.t
92; CHECK-NEXT:    ret
93  %load = call <4 x bfloat> @llvm.masked.load.v4bf16(ptr %a, i32 8, <4 x i1> %mask, <4 x bfloat> undef)
94  ret <4 x bfloat> %load
95}
96
97define <4 x half> @masked_load_v4f16(ptr %a, <4 x i1> %mask) {
98; CHECK-LABEL: masked_load_v4f16:
99; CHECK:       # %bb.0:
100; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
101; CHECK-NEXT:    vle16.v v8, (a0), v0.t
102; CHECK-NEXT:    ret
103  %load = call <4 x half> @llvm.masked.load.v4f16(ptr %a, i32 8, <4 x i1> %mask, <4 x half> undef)
104  ret <4 x half> %load
105}
106
107define <4 x float> @masked_load_v4f32(ptr %a, <4 x i1> %mask) {
108; CHECK-LABEL: masked_load_v4f32:
109; CHECK:       # %bb.0:
110; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
111; CHECK-NEXT:    vle32.v v8, (a0), v0.t
112; CHECK-NEXT:    ret
113  %load = call <4 x float> @llvm.masked.load.v4f32(ptr %a, i32 8, <4 x i1> %mask, <4 x float> undef)
114  ret <4 x float> %load
115}
116
117define <4 x double> @masked_load_v4f64(ptr %a, <4 x i1> %mask) {
118; CHECK-LABEL: masked_load_v4f64:
119; CHECK:       # %bb.0:
120; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
121; CHECK-NEXT:    vle64.v v8, (a0), v0.t
122; CHECK-NEXT:    ret
123  %load = call <4 x double> @llvm.masked.load.v4f64(ptr %a, i32 8, <4 x i1> %mask, <4 x double> undef)
124  ret <4 x double> %load
125}
126
127define <8 x bfloat> @masked_load_v8bf16(ptr %a, <8 x i1> %mask) {
128; CHECK-LABEL: masked_load_v8bf16:
129; CHECK:       # %bb.0:
130; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
131; CHECK-NEXT:    vle16.v v8, (a0), v0.t
132; CHECK-NEXT:    ret
133  %load = call <8 x bfloat> @llvm.masked.load.v8bf16(ptr %a, i32 8, <8 x i1> %mask, <8 x bfloat> undef)
134  ret <8 x bfloat> %load
135}
136
137define <8 x half> @masked_load_v8f16(ptr %a, <8 x i1> %mask) {
138; CHECK-LABEL: masked_load_v8f16:
139; CHECK:       # %bb.0:
140; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
141; CHECK-NEXT:    vle16.v v8, (a0), v0.t
142; CHECK-NEXT:    ret
143  %load = call <8 x half> @llvm.masked.load.v8f16(ptr %a, i32 8, <8 x i1> %mask, <8 x half> undef)
144  ret <8 x half> %load
145}
146
147define <8 x float> @masked_load_v8f32(ptr %a, <8 x i1> %mask) {
148; CHECK-LABEL: masked_load_v8f32:
149; CHECK:       # %bb.0:
150; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
151; CHECK-NEXT:    vle32.v v8, (a0), v0.t
152; CHECK-NEXT:    ret
153  %load = call <8 x float> @llvm.masked.load.v8f32(ptr %a, i32 8, <8 x i1> %mask, <8 x float> undef)
154  ret <8 x float> %load
155}
156
157define <8 x double> @masked_load_v8f64(ptr %a, <8 x i1> %mask) {
158; CHECK-LABEL: masked_load_v8f64:
159; CHECK:       # %bb.0:
160; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
161; CHECK-NEXT:    vle64.v v8, (a0), v0.t
162; CHECK-NEXT:    ret
163  %load = call <8 x double> @llvm.masked.load.v8f64(ptr %a, i32 8, <8 x i1> %mask, <8 x double> undef)
164  ret <8 x double> %load
165}
166
167define <16 x bfloat> @masked_load_v16bf16(ptr %a, <16 x i1> %mask) {
168; CHECK-LABEL: masked_load_v16bf16:
169; CHECK:       # %bb.0:
170; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
171; CHECK-NEXT:    vle16.v v8, (a0), v0.t
172; CHECK-NEXT:    ret
173  %load = call <16 x bfloat> @llvm.masked.load.v16bf16(ptr %a, i32 8, <16 x i1> %mask, <16 x bfloat> undef)
174  ret <16 x bfloat> %load
175}
176
177define <16 x half> @masked_load_v16f16(ptr %a, <16 x i1> %mask) {
178; CHECK-LABEL: masked_load_v16f16:
179; CHECK:       # %bb.0:
180; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
181; CHECK-NEXT:    vle16.v v8, (a0), v0.t
182; CHECK-NEXT:    ret
183  %load = call <16 x half> @llvm.masked.load.v16f16(ptr %a, i32 8, <16 x i1> %mask, <16 x half> undef)
184  ret <16 x half> %load
185}
186
187define <16 x float> @masked_load_v16f32(ptr %a, <16 x i1> %mask) {
188; CHECK-LABEL: masked_load_v16f32:
189; CHECK:       # %bb.0:
190; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
191; CHECK-NEXT:    vle32.v v8, (a0), v0.t
192; CHECK-NEXT:    ret
193  %load = call <16 x float> @llvm.masked.load.v16f32(ptr %a, i32 8, <16 x i1> %mask, <16 x float> undef)
194  ret <16 x float> %load
195}
196
197define <16 x double> @masked_load_v16f64(ptr %a, <16 x i1> %mask) {
198; CHECK-LABEL: masked_load_v16f64:
199; CHECK:       # %bb.0:
200; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
201; CHECK-NEXT:    vle64.v v8, (a0), v0.t
202; CHECK-NEXT:    ret
203  %load = call <16 x double> @llvm.masked.load.v16f64(ptr %a, i32 8, <16 x i1> %mask, <16 x double> undef)
204  ret <16 x double> %load
205}
206
207define <32 x bfloat> @masked_load_v32bf16(ptr %a, <32 x i1> %mask) {
208; CHECK-LABEL: masked_load_v32bf16:
209; CHECK:       # %bb.0:
210; CHECK-NEXT:    li a1, 32
211; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
212; CHECK-NEXT:    vle16.v v8, (a0), v0.t
213; CHECK-NEXT:    ret
214  %load = call <32 x bfloat> @llvm.masked.load.v32bf16(ptr %a, i32 8, <32 x i1> %mask, <32 x bfloat> undef)
215  ret <32 x bfloat> %load
216}
217
218define <32 x half> @masked_load_v32f16(ptr %a, <32 x i1> %mask) {
219; CHECK-LABEL: masked_load_v32f16:
220; CHECK:       # %bb.0:
221; CHECK-NEXT:    li a1, 32
222; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
223; CHECK-NEXT:    vle16.v v8, (a0), v0.t
224; CHECK-NEXT:    ret
225  %load = call <32 x half> @llvm.masked.load.v32f16(ptr %a, i32 8, <32 x i1> %mask, <32 x half> undef)
226  ret <32 x half> %load
227}
228
229define <32 x float> @masked_load_v32f32(ptr %a, <32 x i1> %mask) {
230; CHECK-LABEL: masked_load_v32f32:
231; CHECK:       # %bb.0:
232; CHECK-NEXT:    li a1, 32
233; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
234; CHECK-NEXT:    vle32.v v8, (a0), v0.t
235; CHECK-NEXT:    ret
236  %load = call <32 x float> @llvm.masked.load.v32f32(ptr %a, i32 8, <32 x i1> %mask, <32 x float> undef)
237  ret <32 x float> %load
238}
239
240define <32 x double> @masked_load_v32f64(ptr %a, <32 x i1> %mask) {
241; CHECK-LABEL: masked_load_v32f64:
242; CHECK:       # %bb.0:
243; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
244; CHECK-NEXT:    vle64.v v8, (a0), v0.t
245; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
246; CHECK-NEXT:    vslidedown.vi v0, v0, 2
247; CHECK-NEXT:    addi a0, a0, 128
248; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
249; CHECK-NEXT:    vle64.v v16, (a0), v0.t
250; CHECK-NEXT:    ret
251  %load = call <32 x double> @llvm.masked.load.v32f64(ptr %a, i32 8, <32 x i1> %mask, <32 x double> undef)
252  ret <32 x double> %load
253}
254
255define <64 x bfloat> @masked_load_v64bf16(ptr %a, <64 x i1> %mask) {
256; CHECK-LABEL: masked_load_v64bf16:
257; CHECK:       # %bb.0:
258; CHECK-NEXT:    li a1, 64
259; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
260; CHECK-NEXT:    vle16.v v8, (a0), v0.t
261; CHECK-NEXT:    ret
262  %load = call <64 x bfloat> @llvm.masked.load.v64bf16(ptr %a, i32 8, <64 x i1> %mask, <64 x bfloat> undef)
263  ret <64 x bfloat> %load
264}
265
266define <64 x half> @masked_load_v64f16(ptr %a, <64 x i1> %mask) {
267; CHECK-LABEL: masked_load_v64f16:
268; CHECK:       # %bb.0:
269; CHECK-NEXT:    li a1, 64
270; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
271; CHECK-NEXT:    vle16.v v8, (a0), v0.t
272; CHECK-NEXT:    ret
273  %load = call <64 x half> @llvm.masked.load.v64f16(ptr %a, i32 8, <64 x i1> %mask, <64 x half> undef)
274  ret <64 x half> %load
275}
276
277define <64 x float> @masked_load_v64f32(ptr %a, <64 x i1> %mask) {
278; CHECK-LABEL: masked_load_v64f32:
279; CHECK:       # %bb.0:
280; CHECK-NEXT:    li a1, 32
281; CHECK-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
282; CHECK-NEXT:    vslidedown.vi v16, v0, 4
283; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
284; CHECK-NEXT:    vle32.v v8, (a0), v0.t
285; CHECK-NEXT:    addi a0, a0, 128
286; CHECK-NEXT:    vmv1r.v v0, v16
287; CHECK-NEXT:    vle32.v v16, (a0), v0.t
288; CHECK-NEXT:    ret
289  %load = call <64 x float> @llvm.masked.load.v64f32(ptr %a, i32 8, <64 x i1> %mask, <64 x float> undef)
290  ret <64 x float> %load
291}
292
293define <128 x bfloat> @masked_load_v128bf16(ptr %a, <128 x i1> %mask) {
294; CHECK-LABEL: masked_load_v128bf16:
295; CHECK:       # %bb.0:
296; CHECK-NEXT:    li a1, 64
297; CHECK-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
298; CHECK-NEXT:    vslidedown.vi v16, v0, 8
299; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
300; CHECK-NEXT:    vle16.v v8, (a0), v0.t
301; CHECK-NEXT:    addi a0, a0, 128
302; CHECK-NEXT:    vmv1r.v v0, v16
303; CHECK-NEXT:    vle16.v v16, (a0), v0.t
304; CHECK-NEXT:    ret
305  %load = call <128 x bfloat> @llvm.masked.load.v128bf16(ptr %a, i32 8, <128 x i1> %mask, <128 x bfloat> undef)
306  ret <128 x bfloat> %load
307}
308
309define <128 x half> @masked_load_v128f16(ptr %a, <128 x i1> %mask) {
310; CHECK-LABEL: masked_load_v128f16:
311; CHECK:       # %bb.0:
312; CHECK-NEXT:    li a1, 64
313; CHECK-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
314; CHECK-NEXT:    vslidedown.vi v16, v0, 8
315; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
316; CHECK-NEXT:    vle16.v v8, (a0), v0.t
317; CHECK-NEXT:    addi a0, a0, 128
318; CHECK-NEXT:    vmv1r.v v0, v16
319; CHECK-NEXT:    vle16.v v16, (a0), v0.t
320; CHECK-NEXT:    ret
321  %load = call <128 x half> @llvm.masked.load.v128f16(ptr %a, i32 8, <128 x i1> %mask, <128 x half> undef)
322  ret <128 x half> %load
323}
324
325