xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmaccbf16.ll (revision 20864d2cf610639a70e43aa417f90b457f8e3c90)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfbfwma -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFWMA
3; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfbfwma -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFWMA
4; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFMIN
5; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFMIN
6
7define <1 x float> @vfwmaccbf16_vv_v1f32(<1 x float> %a, <1 x bfloat> %b, <1 x bfloat> %c) {
8; ZVFBFWMA-LABEL: vfwmaccbf16_vv_v1f32:
9; ZVFBFWMA:       # %bb.0:
10; ZVFBFWMA-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
11; ZVFBFWMA-NEXT:    vfwmaccbf16.vv v8, v9, v10
12; ZVFBFWMA-NEXT:    ret
13;
14; ZVFBFMIN-LABEL: vfwmaccbf16_vv_v1f32:
15; ZVFBFMIN:       # %bb.0:
16; ZVFBFMIN-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
17; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v11, v9
18; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v9, v10
19; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
20; ZVFBFMIN-NEXT:    vfmacc.vv v8, v11, v9
21; ZVFBFMIN-NEXT:    ret
22  %b.ext = fpext <1 x bfloat> %b to <1 x float>
23  %c.ext = fpext <1 x bfloat> %c to <1 x float>
24  %res = call <1 x float> @llvm.fma.v1f32(<1 x float> %b.ext, <1 x float> %c.ext, <1 x float> %a)
25  ret <1 x float> %res
26}
27
28define <1 x float> @vfwmaccbf16_vf_v1f32(<1 x float> %a, bfloat %b, <1 x bfloat> %c) {
29; ZVFBFWMA-LABEL: vfwmaccbf16_vf_v1f32:
30; ZVFBFWMA:       # %bb.0:
31; ZVFBFWMA-NEXT:    fmv.x.h a0, fa0
32; ZVFBFWMA-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
33; ZVFBFWMA-NEXT:    vmv.s.x v10, a0
34; ZVFBFWMA-NEXT:    vfwmaccbf16.vv v8, v10, v9
35; ZVFBFWMA-NEXT:    ret
36;
37; ZVFBFMIN-LABEL: vfwmaccbf16_vf_v1f32:
38; ZVFBFMIN:       # %bb.0:
39; ZVFBFMIN-NEXT:    fmv.x.w a0, fa0
40; ZVFBFMIN-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
41; ZVFBFMIN-NEXT:    vmv.s.x v10, a0
42; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v11, v10
43; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v10, v9
44; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
45; ZVFBFMIN-NEXT:    vfmacc.vv v8, v11, v10
46; ZVFBFMIN-NEXT:    ret
47  %b.head = insertelement <1 x bfloat> poison, bfloat %b, i32 0
48  %b.splat = shufflevector <1 x bfloat> %b.head, <1 x bfloat> poison, <1 x i32> zeroinitializer
49  %b.ext = fpext <1 x bfloat> %b.splat to <1 x float>
50  %c.ext = fpext <1 x bfloat> %c to <1 x float>
51  %res = call <1 x float> @llvm.fma.v1f32(<1 x float> %b.ext, <1 x float> %c.ext, <1 x float> %a)
52  ret <1 x float> %res
53}
54
55define <2 x float> @vfwmaccbf16_vv_v2f32(<2 x float> %a, <2 x bfloat> %b, <2 x bfloat> %c) {
56; ZVFBFWMA-LABEL: vfwmaccbf16_vv_v2f32:
57; ZVFBFWMA:       # %bb.0:
58; ZVFBFWMA-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
59; ZVFBFWMA-NEXT:    vfwmaccbf16.vv v8, v9, v10
60; ZVFBFWMA-NEXT:    ret
61;
62; ZVFBFMIN-LABEL: vfwmaccbf16_vv_v2f32:
63; ZVFBFMIN:       # %bb.0:
64; ZVFBFMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
65; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v11, v9
66; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v9, v10
67; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
68; ZVFBFMIN-NEXT:    vfmacc.vv v8, v11, v9
69; ZVFBFMIN-NEXT:    ret
70  %b.ext = fpext <2 x bfloat> %b to <2 x float>
71  %c.ext = fpext <2 x bfloat> %c to <2 x float>
72  %res = call <2 x float> @llvm.fma.v2f32(<2 x float> %b.ext, <2 x float> %c.ext, <2 x float> %a)
73  ret <2 x float> %res
74}
75
76define <2 x float> @vfwmaccbf16_vf_v2f32(<2 x float> %a, bfloat %b, <2 x bfloat> %c) {
77; ZVFBFWMA-LABEL: vfwmaccbf16_vf_v2f32:
78; ZVFBFWMA:       # %bb.0:
79; ZVFBFWMA-NEXT:    fmv.x.h a0, fa0
80; ZVFBFWMA-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
81; ZVFBFWMA-NEXT:    vmv.v.x v10, a0
82; ZVFBFWMA-NEXT:    vfwmaccbf16.vv v8, v10, v9
83; ZVFBFWMA-NEXT:    ret
84;
85; ZVFBFMIN-LABEL: vfwmaccbf16_vf_v2f32:
86; ZVFBFMIN:       # %bb.0:
87; ZVFBFMIN-NEXT:    fmv.x.w a0, fa0
88; ZVFBFMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
89; ZVFBFMIN-NEXT:    vmv.v.x v10, a0
90; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v11, v10
91; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v10, v9
92; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
93; ZVFBFMIN-NEXT:    vfmacc.vv v8, v11, v10
94; ZVFBFMIN-NEXT:    ret
95  %b.head = insertelement <2 x bfloat> poison, bfloat %b, i32 0
96  %b.splat = shufflevector <2 x bfloat> %b.head, <2 x bfloat> poison, <2 x i32> zeroinitializer
97  %b.ext = fpext <2 x bfloat> %b.splat to <2 x float>
98  %c.ext = fpext <2 x bfloat> %c to <2 x float>
99  %res = call <2 x float> @llvm.fma.v2f32(<2 x float> %b.ext, <2 x float> %c.ext, <2 x float> %a)
100  ret <2 x float> %res
101}
102
103define <4 x float> @vfwmaccbf16_vv_v4f32(<4 x float> %a, <4 x bfloat> %b, <4 x bfloat> %c) {
104; ZVFBFWMA-LABEL: vfwmaccbf16_vv_v4f32:
105; ZVFBFWMA:       # %bb.0:
106; ZVFBFWMA-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
107; ZVFBFWMA-NEXT:    vfwmaccbf16.vv v8, v9, v10
108; ZVFBFWMA-NEXT:    ret
109;
110; ZVFBFMIN-LABEL: vfwmaccbf16_vv_v4f32:
111; ZVFBFMIN:       # %bb.0:
112; ZVFBFMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
113; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v11, v9
114; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v9, v10
115; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
116; ZVFBFMIN-NEXT:    vfmacc.vv v8, v11, v9
117; ZVFBFMIN-NEXT:    ret
118  %b.ext = fpext <4 x bfloat> %b to <4 x float>
119  %c.ext = fpext <4 x bfloat> %c to <4 x float>
120  %res = call <4 x float> @llvm.fma.v4f32(<4 x float> %b.ext, <4 x float> %c.ext, <4 x float> %a)
121  ret <4 x float> %res
122}
123
124define <4 x float> @vfwmaccbf16_vf_v4f32(<4 x float> %a, bfloat %b, <4 x bfloat> %c) {
125; ZVFBFWMA-LABEL: vfwmaccbf16_vf_v4f32:
126; ZVFBFWMA:       # %bb.0:
127; ZVFBFWMA-NEXT:    fmv.x.h a0, fa0
128; ZVFBFWMA-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
129; ZVFBFWMA-NEXT:    vmv.v.x v10, a0
130; ZVFBFWMA-NEXT:    vfwmaccbf16.vv v8, v10, v9
131; ZVFBFWMA-NEXT:    ret
132;
133; ZVFBFMIN-LABEL: vfwmaccbf16_vf_v4f32:
134; ZVFBFMIN:       # %bb.0:
135; ZVFBFMIN-NEXT:    fmv.x.w a0, fa0
136; ZVFBFMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
137; ZVFBFMIN-NEXT:    vmv.v.x v10, a0
138; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v11, v10
139; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v10, v9
140; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
141; ZVFBFMIN-NEXT:    vfmacc.vv v8, v11, v10
142; ZVFBFMIN-NEXT:    ret
143  %b.head = insertelement <4 x bfloat> poison, bfloat %b, i32 0
144  %b.splat = shufflevector <4 x bfloat> %b.head, <4 x bfloat> poison, <4 x i32> zeroinitializer
145  %b.ext = fpext <4 x bfloat> %b.splat to <4 x float>
146  %c.ext = fpext <4 x bfloat> %c to <4 x float>
147  %res = call <4 x float> @llvm.fma.v4f32(<4 x float> %b.ext, <4 x float> %c.ext, <4 x float> %a)
148  ret <4 x float> %res
149}
150
151define <8 x float> @vfwmaccbf16_vv_v8f32(<8 x float> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
152; ZVFBFWMA-LABEL: vfwmaccbf16_vv_v8f32:
153; ZVFBFWMA:       # %bb.0:
154; ZVFBFWMA-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
155; ZVFBFWMA-NEXT:    vfwmaccbf16.vv v8, v10, v11
156; ZVFBFWMA-NEXT:    ret
157;
158; ZVFBFMIN-LABEL: vfwmaccbf16_vv_v8f32:
159; ZVFBFMIN:       # %bb.0:
160; ZVFBFMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
161; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v12, v10
162; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v14, v11
163; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
164; ZVFBFMIN-NEXT:    vfmacc.vv v8, v12, v14
165; ZVFBFMIN-NEXT:    ret
166  %b.ext = fpext <8 x bfloat> %b to <8 x float>
167  %c.ext = fpext <8 x bfloat> %c to <8 x float>
168  %res = call <8 x float> @llvm.fma.v8f32(<8 x float> %b.ext, <8 x float> %c.ext, <8 x float> %a)
169  ret <8 x float> %res
170}
171
172define <8 x float> @vfwmaccbf16_vf_v8f32(<8 x float> %a, bfloat %b, <8 x bfloat> %c) {
173; ZVFBFWMA-LABEL: vfwmaccbf16_vf_v8f32:
174; ZVFBFWMA:       # %bb.0:
175; ZVFBFWMA-NEXT:    fmv.x.h a0, fa0
176; ZVFBFWMA-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
177; ZVFBFWMA-NEXT:    vmv.v.x v11, a0
178; ZVFBFWMA-NEXT:    vfwmaccbf16.vv v8, v11, v10
179; ZVFBFWMA-NEXT:    ret
180;
181; ZVFBFMIN-LABEL: vfwmaccbf16_vf_v8f32:
182; ZVFBFMIN:       # %bb.0:
183; ZVFBFMIN-NEXT:    fmv.x.w a0, fa0
184; ZVFBFMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
185; ZVFBFMIN-NEXT:    vmv.v.x v11, a0
186; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v12, v11
187; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v14, v10
188; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
189; ZVFBFMIN-NEXT:    vfmacc.vv v8, v12, v14
190; ZVFBFMIN-NEXT:    ret
191  %b.head = insertelement <8 x bfloat> poison, bfloat %b, i32 0
192  %b.splat = shufflevector <8 x bfloat> %b.head, <8 x bfloat> poison, <8 x i32> zeroinitializer
193  %b.ext = fpext <8 x bfloat> %b.splat to <8 x float>
194  %c.ext = fpext <8 x bfloat> %c to <8 x float>
195  %res = call <8 x float> @llvm.fma.v8f32(<8 x float> %b.ext, <8 x float> %c.ext, <8 x float> %a)
196  ret <8 x float> %res
197}
198
199define <16 x float> @vfwmaccbf16_vv_v16f32(<16 x float> %a, <16 x bfloat> %b, <16 x bfloat> %c) {
200; ZVFBFWMA-LABEL: vfwmaccbf16_vv_v16f32:
201; ZVFBFWMA:       # %bb.0:
202; ZVFBFWMA-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
203; ZVFBFWMA-NEXT:    vfwmaccbf16.vv v8, v12, v14
204; ZVFBFWMA-NEXT:    ret
205;
206; ZVFBFMIN-LABEL: vfwmaccbf16_vv_v16f32:
207; ZVFBFMIN:       # %bb.0:
208; ZVFBFMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
209; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v16, v12
210; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v20, v14
211; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
212; ZVFBFMIN-NEXT:    vfmacc.vv v8, v16, v20
213; ZVFBFMIN-NEXT:    ret
214  %b.ext = fpext <16 x bfloat> %b to <16 x float>
215  %c.ext = fpext <16 x bfloat> %c to <16 x float>
216  %res = call <16 x float> @llvm.fma.v16f32(<16 x float> %b.ext, <16 x float> %c.ext, <16 x float> %a)
217  ret <16 x float> %res
218}
219
220define <16 x float> @vfwmaccbf16_vf_v16f32(<16 x float> %a, bfloat %b, <16 x bfloat> %c) {
221; ZVFBFWMA-LABEL: vfwmaccbf16_vf_v16f32:
222; ZVFBFWMA:       # %bb.0:
223; ZVFBFWMA-NEXT:    fmv.x.h a0, fa0
224; ZVFBFWMA-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
225; ZVFBFWMA-NEXT:    vmv.v.x v14, a0
226; ZVFBFWMA-NEXT:    vfwmaccbf16.vv v8, v14, v12
227; ZVFBFWMA-NEXT:    ret
228;
229; ZVFBFMIN-LABEL: vfwmaccbf16_vf_v16f32:
230; ZVFBFMIN:       # %bb.0:
231; ZVFBFMIN-NEXT:    fmv.x.w a0, fa0
232; ZVFBFMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
233; ZVFBFMIN-NEXT:    vmv.v.x v14, a0
234; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v16, v14
235; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v20, v12
236; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
237; ZVFBFMIN-NEXT:    vfmacc.vv v8, v16, v20
238; ZVFBFMIN-NEXT:    ret
239  %b.head = insertelement <16 x bfloat> poison, bfloat %b, i32 0
240  %b.splat = shufflevector <16 x bfloat> %b.head, <16 x bfloat> poison, <16 x i32> zeroinitializer
241  %b.ext = fpext <16 x bfloat> %b.splat to <16 x float>
242  %c.ext = fpext <16 x bfloat> %c to <16 x float>
243  %res = call <16 x float> @llvm.fma.v16f32(<16 x float> %b.ext, <16 x float> %c.ext, <16 x float> %a)
244  ret <16 x float> %res
245}
246
247define <32 x float> @vfwmaccbf32_vv_v32f32(<32 x float> %a, <32 x bfloat> %b, <32 x bfloat> %c) {
248; ZVFBFWMA-LABEL: vfwmaccbf32_vv_v32f32:
249; ZVFBFWMA:       # %bb.0:
250; ZVFBFWMA-NEXT:    li a0, 32
251; ZVFBFWMA-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
252; ZVFBFWMA-NEXT:    vfwmaccbf16.vv v8, v16, v20
253; ZVFBFWMA-NEXT:    ret
254;
255; ZVFBFMIN-LABEL: vfwmaccbf32_vv_v32f32:
256; ZVFBFMIN:       # %bb.0:
257; ZVFBFMIN-NEXT:    li a0, 32
258; ZVFBFMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
259; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v24, v16
260; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v0, v20
261; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
262; ZVFBFMIN-NEXT:    vfmacc.vv v8, v24, v0
263; ZVFBFMIN-NEXT:    ret
264  %b.ext = fpext <32 x bfloat> %b to <32 x float>
265  %c.ext = fpext <32 x bfloat> %c to <32 x float>
266  %res = call <32 x float> @llvm.fma.v32f32(<32 x float> %b.ext, <32 x float> %c.ext, <32 x float> %a)
267  ret <32 x float> %res
268}
269
270define <32 x float> @vfwmaccbf32_vf_v32f32(<32 x float> %a, bfloat %b, <32 x bfloat> %c) {
271; ZVFBFWMA-LABEL: vfwmaccbf32_vf_v32f32:
272; ZVFBFWMA:       # %bb.0:
273; ZVFBFWMA-NEXT:    fmv.x.h a0, fa0
274; ZVFBFWMA-NEXT:    li a1, 32
275; ZVFBFWMA-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
276; ZVFBFWMA-NEXT:    vmv.v.x v20, a0
277; ZVFBFWMA-NEXT:    vfwmaccbf16.vv v8, v20, v16
278; ZVFBFWMA-NEXT:    ret
279;
280; ZVFBFMIN-LABEL: vfwmaccbf32_vf_v32f32:
281; ZVFBFMIN:       # %bb.0:
282; ZVFBFMIN-NEXT:    fmv.x.w a0, fa0
283; ZVFBFMIN-NEXT:    li a1, 32
284; ZVFBFMIN-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
285; ZVFBFMIN-NEXT:    vmv.v.x v20, a0
286; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v24, v20
287; ZVFBFMIN-NEXT:    vfwcvtbf16.f.f.v v0, v16
288; ZVFBFMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
289; ZVFBFMIN-NEXT:    vfmacc.vv v8, v24, v0
290; ZVFBFMIN-NEXT:    ret
291  %b.head = insertelement <32 x bfloat> poison, bfloat %b, i32 0
292  %b.splat = shufflevector <32 x bfloat> %b.head, <32 x bfloat> poison, <32 x i32> zeroinitializer
293  %b.ext = fpext <32 x bfloat> %b.splat to <32 x float>
294  %c.ext = fpext <32 x bfloat> %c to <32 x float>
295  %res = call <32 x float> @llvm.fma.v32f32(<32 x float> %b.ext, <32 x float> %c.ext, <32 x float> %a)
296  ret <32 x float> %res
297}
298