xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll (revision 8ce81f17a16b8b689895c7c093d0401a75c09882)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \
3; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
4; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \
5; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
6; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \
7; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
8; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfhmin,+v -target-abi=lp64d \
9; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
10
11declare <2 x half> @llvm.vp.fma.v2f16(<2 x half>, <2 x half>, <2 x half>, <2 x i1>, i32)
12
13define <2 x half> @vfma_vv_v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) {
14; ZVFH-LABEL: vfma_vv_v2f16:
15; ZVFH:       # %bb.0:
16; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
17; ZVFH-NEXT:    vfmadd.vv v9, v8, v10, v0.t
18; ZVFH-NEXT:    vmv1r.v v8, v9
19; ZVFH-NEXT:    ret
20;
21; ZVFHMIN-LABEL: vfma_vv_v2f16:
22; ZVFHMIN:       # %bb.0:
23; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
24; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10, v0.t
25; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
26; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9, v0.t
27; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
28; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v11, v0.t
29; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
30; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
31; ZVFHMIN-NEXT:    ret
32  %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 %evl)
33  ret <2 x half> %v
34}
35
36define <2 x half> @vfma_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %b, <2 x half> %c, i32 zeroext %evl) {
37; ZVFH-LABEL: vfma_vv_v2f16_unmasked:
38; ZVFH:       # %bb.0:
39; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
40; ZVFH-NEXT:    vfmadd.vv v8, v9, v10
41; ZVFH-NEXT:    ret
42;
43; ZVFHMIN-LABEL: vfma_vv_v2f16_unmasked:
44; ZVFHMIN:       # %bb.0:
45; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
46; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
47; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
48; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
49; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
50; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v11
51; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
52; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
53; ZVFHMIN-NEXT:    ret
54  %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> splat (i1 true), i32 %evl)
55  ret <2 x half> %v
56}
57
58define <2 x half> @vfma_vf_v2f16(<2 x half> %va, half %b, <2 x half> %vc, <2 x i1> %m, i32 zeroext %evl) {
59; ZVFH-LABEL: vfma_vf_v2f16:
60; ZVFH:       # %bb.0:
61; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
62; ZVFH-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
63; ZVFH-NEXT:    ret
64;
65; ZVFHMIN-LABEL: vfma_vf_v2f16:
66; ZVFHMIN:       # %bb.0:
67; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
68; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
69; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
70; ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
71; ZVFHMIN-NEXT:    vmv.v.x v9, a1
72; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
73; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
74; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9, v0.t
75; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
76; ZVFHMIN-NEXT:    vfmadd.vv v12, v11, v10, v0.t
77; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
78; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
79; ZVFHMIN-NEXT:    ret
80  %elt.head = insertelement <2 x half> poison, half %b, i32 0
81  %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
82  %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %vb, <2 x half> %vc, <2 x i1> %m, i32 %evl)
83  ret <2 x half> %v
84}
85
86define <2 x half> @vfma_vf_v2f16_unmasked(<2 x half> %va, half %b, <2 x half> %vc, i32 zeroext %evl) {
87; ZVFH-LABEL: vfma_vf_v2f16_unmasked:
88; ZVFH:       # %bb.0:
89; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
90; ZVFH-NEXT:    vfmadd.vf v8, fa0, v9
91; ZVFH-NEXT:    ret
92;
93; ZVFHMIN-LABEL: vfma_vf_v2f16_unmasked:
94; ZVFHMIN:       # %bb.0:
95; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
96; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
97; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
98; ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
99; ZVFHMIN-NEXT:    vmv.v.x v9, a1
100; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
101; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
102; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
103; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
104; ZVFHMIN-NEXT:    vfmadd.vv v12, v11, v10
105; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
106; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
107; ZVFHMIN-NEXT:    ret
108  %elt.head = insertelement <2 x half> poison, half %b, i32 0
109  %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
110  %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %vb, <2 x half> %vc, <2 x i1> splat (i1 true), i32 %evl)
111  ret <2 x half> %v
112}
113
114declare <4 x half> @llvm.vp.fma.v4f16(<4 x half>, <4 x half>, <4 x half>, <4 x i1>, i32)
115
116define <4 x half> @vfma_vv_v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) {
117; ZVFH-LABEL: vfma_vv_v4f16:
118; ZVFH:       # %bb.0:
119; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
120; ZVFH-NEXT:    vfmadd.vv v9, v8, v10, v0.t
121; ZVFH-NEXT:    vmv1r.v v8, v9
122; ZVFH-NEXT:    ret
123;
124; ZVFHMIN-LABEL: vfma_vv_v4f16:
125; ZVFHMIN:       # %bb.0:
126; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
127; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10, v0.t
128; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
129; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9, v0.t
130; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
131; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v11, v0.t
132; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
133; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
134; ZVFHMIN-NEXT:    ret
135  %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 %evl)
136  ret <4 x half> %v
137}
138
139define <4 x half> @vfma_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %b, <4 x half> %c, i32 zeroext %evl) {
140; ZVFH-LABEL: vfma_vv_v4f16_unmasked:
141; ZVFH:       # %bb.0:
142; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
143; ZVFH-NEXT:    vfmadd.vv v8, v9, v10
144; ZVFH-NEXT:    ret
145;
146; ZVFHMIN-LABEL: vfma_vv_v4f16_unmasked:
147; ZVFHMIN:       # %bb.0:
148; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
149; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
150; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
151; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
152; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
153; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v11
154; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
155; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
156; ZVFHMIN-NEXT:    ret
157  %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> splat (i1 true), i32 %evl)
158  ret <4 x half> %v
159}
160
161define <4 x half> @vfma_vf_v4f16(<4 x half> %va, half %b, <4 x half> %vc, <4 x i1> %m, i32 zeroext %evl) {
162; ZVFH-LABEL: vfma_vf_v4f16:
163; ZVFH:       # %bb.0:
164; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
165; ZVFH-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
166; ZVFH-NEXT:    ret
167;
168; ZVFHMIN-LABEL: vfma_vf_v4f16:
169; ZVFHMIN:       # %bb.0:
170; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
171; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
172; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
173; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
174; ZVFHMIN-NEXT:    vmv.v.x v9, a1
175; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
176; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
177; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9, v0.t
178; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
179; ZVFHMIN-NEXT:    vfmadd.vv v12, v11, v10, v0.t
180; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
181; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
182; ZVFHMIN-NEXT:    ret
183  %elt.head = insertelement <4 x half> poison, half %b, i32 0
184  %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
185  %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %vb, <4 x half> %vc, <4 x i1> %m, i32 %evl)
186  ret <4 x half> %v
187}
188
189define <4 x half> @vfma_vf_v4f16_unmasked(<4 x half> %va, half %b, <4 x half> %vc, i32 zeroext %evl) {
190; ZVFH-LABEL: vfma_vf_v4f16_unmasked:
191; ZVFH:       # %bb.0:
192; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
193; ZVFH-NEXT:    vfmadd.vf v8, fa0, v9
194; ZVFH-NEXT:    ret
195;
196; ZVFHMIN-LABEL: vfma_vf_v4f16_unmasked:
197; ZVFHMIN:       # %bb.0:
198; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
199; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
200; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
201; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
202; ZVFHMIN-NEXT:    vmv.v.x v9, a1
203; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
204; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
205; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
206; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
207; ZVFHMIN-NEXT:    vfmadd.vv v12, v11, v10
208; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
209; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
210; ZVFHMIN-NEXT:    ret
211  %elt.head = insertelement <4 x half> poison, half %b, i32 0
212  %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
213  %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %vb, <4 x half> %vc, <4 x i1> splat (i1 true), i32 %evl)
214  ret <4 x half> %v
215}
216
217declare <8 x half> @llvm.vp.fma.v8f16(<8 x half>, <8 x half>, <8 x half>, <8 x i1>, i32)
218
219define <8 x half> @vfma_vv_v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) {
220; ZVFH-LABEL: vfma_vv_v8f16:
221; ZVFH:       # %bb.0:
222; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
223; ZVFH-NEXT:    vfmadd.vv v9, v8, v10, v0.t
224; ZVFH-NEXT:    vmv.v.v v8, v9
225; ZVFH-NEXT:    ret
226;
227; ZVFHMIN-LABEL: vfma_vv_v8f16:
228; ZVFHMIN:       # %bb.0:
229; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
230; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10, v0.t
231; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
232; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9, v0.t
233; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
234; ZVFHMIN-NEXT:    vfmadd.vv v14, v10, v12, v0.t
235; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
236; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14, v0.t
237; ZVFHMIN-NEXT:    ret
238  %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 %evl)
239  ret <8 x half> %v
240}
241
242define <8 x half> @vfma_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %b, <8 x half> %c, i32 zeroext %evl) {
243; ZVFH-LABEL: vfma_vv_v8f16_unmasked:
244; ZVFH:       # %bb.0:
245; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
246; ZVFH-NEXT:    vfmadd.vv v8, v9, v10
247; ZVFH-NEXT:    ret
248;
249; ZVFHMIN-LABEL: vfma_vv_v8f16_unmasked:
250; ZVFHMIN:       # %bb.0:
251; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
252; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
253; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
254; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
255; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
256; ZVFHMIN-NEXT:    vfmadd.vv v14, v10, v12
257; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
258; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14
259; ZVFHMIN-NEXT:    ret
260  %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> splat (i1 true), i32 %evl)
261  ret <8 x half> %v
262}
263
264define <8 x half> @vfma_vf_v8f16(<8 x half> %va, half %b, <8 x half> %vc, <8 x i1> %m, i32 zeroext %evl) {
265; ZVFH-LABEL: vfma_vf_v8f16:
266; ZVFH:       # %bb.0:
267; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
268; ZVFH-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
269; ZVFH-NEXT:    ret
270;
271; ZVFHMIN-LABEL: vfma_vf_v8f16:
272; ZVFHMIN:       # %bb.0:
273; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
274; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
275; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
276; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
277; ZVFHMIN-NEXT:    vmv.v.x v9, a1
278; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
279; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
280; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9, v0.t
281; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
282; ZVFHMIN-NEXT:    vfmadd.vv v14, v12, v10, v0.t
283; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
284; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14, v0.t
285; ZVFHMIN-NEXT:    ret
286  %elt.head = insertelement <8 x half> poison, half %b, i32 0
287  %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
288  %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %vb, <8 x half> %vc, <8 x i1> %m, i32 %evl)
289  ret <8 x half> %v
290}
291
292define <8 x half> @vfma_vf_v8f16_unmasked(<8 x half> %va, half %b, <8 x half> %vc, i32 zeroext %evl) {
293; ZVFH-LABEL: vfma_vf_v8f16_unmasked:
294; ZVFH:       # %bb.0:
295; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
296; ZVFH-NEXT:    vfmadd.vf v8, fa0, v9
297; ZVFH-NEXT:    ret
298;
299; ZVFHMIN-LABEL: vfma_vf_v8f16_unmasked:
300; ZVFHMIN:       # %bb.0:
301; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
302; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
303; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
304; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
305; ZVFHMIN-NEXT:    vmv.v.x v9, a1
306; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
307; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
308; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
309; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
310; ZVFHMIN-NEXT:    vfmadd.vv v14, v12, v10
311; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
312; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14
313; ZVFHMIN-NEXT:    ret
314  %elt.head = insertelement <8 x half> poison, half %b, i32 0
315  %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
316  %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %vb, <8 x half> %vc, <8 x i1> splat (i1 true), i32 %evl)
317  ret <8 x half> %v
318}
319
320declare <16 x half> @llvm.vp.fma.v16f16(<16 x half>, <16 x half>, <16 x half>, <16 x i1>, i32)
321
322define <16 x half> @vfma_vv_v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) {
323; ZVFH-LABEL: vfma_vv_v16f16:
324; ZVFH:       # %bb.0:
325; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
326; ZVFH-NEXT:    vfmadd.vv v10, v8, v12, v0.t
327; ZVFH-NEXT:    vmv.v.v v8, v10
328; ZVFH-NEXT:    ret
329;
330; ZVFHMIN-LABEL: vfma_vv_v16f16:
331; ZVFHMIN:       # %bb.0:
332; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
333; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
334; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
335; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10, v0.t
336; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
337; ZVFHMIN-NEXT:    vfmadd.vv v20, v12, v16, v0.t
338; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
339; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v20, v0.t
340; ZVFHMIN-NEXT:    ret
341  %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 %evl)
342  ret <16 x half> %v
343}
344
345define <16 x half> @vfma_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %b, <16 x half> %c, i32 zeroext %evl) {
346; ZVFH-LABEL: vfma_vv_v16f16_unmasked:
347; ZVFH:       # %bb.0:
348; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
349; ZVFH-NEXT:    vfmadd.vv v8, v10, v12
350; ZVFH-NEXT:    ret
351;
352; ZVFHMIN-LABEL: vfma_vv_v16f16_unmasked:
353; ZVFHMIN:       # %bb.0:
354; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
355; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
356; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
357; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10
358; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
359; ZVFHMIN-NEXT:    vfmadd.vv v20, v12, v16
360; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
361; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v20
362; ZVFHMIN-NEXT:    ret
363  %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> splat (i1 true), i32 %evl)
364  ret <16 x half> %v
365}
366
367define <16 x half> @vfma_vf_v16f16(<16 x half> %va, half %b, <16 x half> %vc, <16 x i1> %m, i32 zeroext %evl) {
368; ZVFH-LABEL: vfma_vf_v16f16:
369; ZVFH:       # %bb.0:
370; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
371; ZVFH-NEXT:    vfmadd.vf v8, fa0, v10, v0.t
372; ZVFH-NEXT:    ret
373;
374; ZVFHMIN-LABEL: vfma_vf_v16f16:
375; ZVFHMIN:       # %bb.0:
376; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
377; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
378; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10, v0.t
379; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
380; ZVFHMIN-NEXT:    vmv.v.x v10, a1
381; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
382; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
383; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10, v0.t
384; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
385; ZVFHMIN-NEXT:    vfmadd.vv v20, v16, v12, v0.t
386; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
387; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v20, v0.t
388; ZVFHMIN-NEXT:    ret
389  %elt.head = insertelement <16 x half> poison, half %b, i32 0
390  %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
391  %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %vb, <16 x half> %vc, <16 x i1> %m, i32 %evl)
392  ret <16 x half> %v
393}
394
395define <16 x half> @vfma_vf_v16f16_unmasked(<16 x half> %va, half %b, <16 x half> %vc, i32 zeroext %evl) {
396; ZVFH-LABEL: vfma_vf_v16f16_unmasked:
397; ZVFH:       # %bb.0:
398; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
399; ZVFH-NEXT:    vfmadd.vf v8, fa0, v10
400; ZVFH-NEXT:    ret
401;
402; ZVFHMIN-LABEL: vfma_vf_v16f16_unmasked:
403; ZVFHMIN:       # %bb.0:
404; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
405; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
406; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
407; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
408; ZVFHMIN-NEXT:    vmv.v.x v10, a1
409; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
410; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
411; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10
412; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
413; ZVFHMIN-NEXT:    vfmadd.vv v20, v16, v12
414; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
415; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v20
416; ZVFHMIN-NEXT:    ret
417  %elt.head = insertelement <16 x half> poison, half %b, i32 0
418  %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
419  %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %vb, <16 x half> %vc, <16 x i1> splat (i1 true), i32 %evl)
420  ret <16 x half> %v
421}
422
423declare <2 x float> @llvm.vp.fma.v2f32(<2 x float>, <2 x float>, <2 x float>, <2 x i1>, i32)
424
425define <2 x float> @vfma_vv_v2f32(<2 x float> %va, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) {
426; CHECK-LABEL: vfma_vv_v2f32:
427; CHECK:       # %bb.0:
428; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
429; CHECK-NEXT:    vfmadd.vv v9, v8, v10, v0.t
430; CHECK-NEXT:    vmv1r.v v8, v9
431; CHECK-NEXT:    ret
432  %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %va, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 %evl)
433  ret <2 x float> %v
434}
435
436define <2 x float> @vfma_vv_v2f32_unmasked(<2 x float> %va, <2 x float> %b, <2 x float> %c, i32 zeroext %evl) {
437; CHECK-LABEL: vfma_vv_v2f32_unmasked:
438; CHECK:       # %bb.0:
439; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
440; CHECK-NEXT:    vfmadd.vv v8, v9, v10
441; CHECK-NEXT:    ret
442  %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %va, <2 x float> %b, <2 x float> %c, <2 x i1> splat (i1 true), i32 %evl)
443  ret <2 x float> %v
444}
445
446define <2 x float> @vfma_vf_v2f32(<2 x float> %va, float %b, <2 x float> %vc, <2 x i1> %m, i32 zeroext %evl) {
447; CHECK-LABEL: vfma_vf_v2f32:
448; CHECK:       # %bb.0:
449; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
450; CHECK-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
451; CHECK-NEXT:    ret
452  %elt.head = insertelement <2 x float> poison, float %b, i32 0
453  %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer
454  %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %va, <2 x float> %vb, <2 x float> %vc, <2 x i1> %m, i32 %evl)
455  ret <2 x float> %v
456}
457
458define <2 x float> @vfma_vf_v2f32_unmasked(<2 x float> %va, float %b, <2 x float> %vc, i32 zeroext %evl) {
459; CHECK-LABEL: vfma_vf_v2f32_unmasked:
460; CHECK:       # %bb.0:
461; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
462; CHECK-NEXT:    vfmadd.vf v8, fa0, v9
463; CHECK-NEXT:    ret
464  %elt.head = insertelement <2 x float> poison, float %b, i32 0
465  %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer
466  %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %va, <2 x float> %vb, <2 x float> %vc, <2 x i1> splat (i1 true), i32 %evl)
467  ret <2 x float> %v
468}
469
470declare <4 x float> @llvm.vp.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, <4 x i1>, i32)
471
472define <4 x float> @vfma_vv_v4f32(<4 x float> %va, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) {
473; CHECK-LABEL: vfma_vv_v4f32:
474; CHECK:       # %bb.0:
475; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
476; CHECK-NEXT:    vfmadd.vv v9, v8, v10, v0.t
477; CHECK-NEXT:    vmv.v.v v8, v9
478; CHECK-NEXT:    ret
479  %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %va, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 %evl)
480  ret <4 x float> %v
481}
482
483define <4 x float> @vfma_vv_v4f32_unmasked(<4 x float> %va, <4 x float> %b, <4 x float> %c, i32 zeroext %evl) {
484; CHECK-LABEL: vfma_vv_v4f32_unmasked:
485; CHECK:       # %bb.0:
486; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
487; CHECK-NEXT:    vfmadd.vv v8, v9, v10
488; CHECK-NEXT:    ret
489  %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %va, <4 x float> %b, <4 x float> %c, <4 x i1> splat (i1 true), i32 %evl)
490  ret <4 x float> %v
491}
492
493define <4 x float> @vfma_vf_v4f32(<4 x float> %va, float %b, <4 x float> %vc, <4 x i1> %m, i32 zeroext %evl) {
494; CHECK-LABEL: vfma_vf_v4f32:
495; CHECK:       # %bb.0:
496; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
497; CHECK-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
498; CHECK-NEXT:    ret
499  %elt.head = insertelement <4 x float> poison, float %b, i32 0
500  %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer
501  %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %va, <4 x float> %vb, <4 x float> %vc, <4 x i1> %m, i32 %evl)
502  ret <4 x float> %v
503}
504
505define <4 x float> @vfma_vf_v4f32_unmasked(<4 x float> %va, float %b, <4 x float> %vc, i32 zeroext %evl) {
506; CHECK-LABEL: vfma_vf_v4f32_unmasked:
507; CHECK:       # %bb.0:
508; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
509; CHECK-NEXT:    vfmadd.vf v8, fa0, v9
510; CHECK-NEXT:    ret
511  %elt.head = insertelement <4 x float> poison, float %b, i32 0
512  %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer
513  %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %va, <4 x float> %vb, <4 x float> %vc, <4 x i1> splat (i1 true), i32 %evl)
514  ret <4 x float> %v
515}
516
517declare <8 x float> @llvm.vp.fma.v8f32(<8 x float>, <8 x float>, <8 x float>, <8 x i1>, i32)
518
519define <8 x float> @vfma_vv_v8f32(<8 x float> %va, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) {
520; CHECK-LABEL: vfma_vv_v8f32:
521; CHECK:       # %bb.0:
522; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
523; CHECK-NEXT:    vfmadd.vv v10, v8, v12, v0.t
524; CHECK-NEXT:    vmv.v.v v8, v10
525; CHECK-NEXT:    ret
526  %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %va, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 %evl)
527  ret <8 x float> %v
528}
529
530define <8 x float> @vfma_vv_v8f32_unmasked(<8 x float> %va, <8 x float> %b, <8 x float> %c, i32 zeroext %evl) {
531; CHECK-LABEL: vfma_vv_v8f32_unmasked:
532; CHECK:       # %bb.0:
533; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
534; CHECK-NEXT:    vfmadd.vv v8, v10, v12
535; CHECK-NEXT:    ret
536  %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %va, <8 x float> %b, <8 x float> %c, <8 x i1> splat (i1 true), i32 %evl)
537  ret <8 x float> %v
538}
539
540define <8 x float> @vfma_vf_v8f32(<8 x float> %va, float %b, <8 x float> %vc, <8 x i1> %m, i32 zeroext %evl) {
541; CHECK-LABEL: vfma_vf_v8f32:
542; CHECK:       # %bb.0:
543; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
544; CHECK-NEXT:    vfmadd.vf v8, fa0, v10, v0.t
545; CHECK-NEXT:    ret
546  %elt.head = insertelement <8 x float> poison, float %b, i32 0
547  %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer
548  %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %va, <8 x float> %vb, <8 x float> %vc, <8 x i1> %m, i32 %evl)
549  ret <8 x float> %v
550}
551
552define <8 x float> @vfma_vf_v8f32_unmasked(<8 x float> %va, float %b, <8 x float> %vc, i32 zeroext %evl) {
553; CHECK-LABEL: vfma_vf_v8f32_unmasked:
554; CHECK:       # %bb.0:
555; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
556; CHECK-NEXT:    vfmadd.vf v8, fa0, v10
557; CHECK-NEXT:    ret
558  %elt.head = insertelement <8 x float> poison, float %b, i32 0
559  %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer
560  %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %va, <8 x float> %vb, <8 x float> %vc, <8 x i1> splat (i1 true), i32 %evl)
561  ret <8 x float> %v
562}
563
564declare <16 x float> @llvm.vp.fma.v16f32(<16 x float>, <16 x float>, <16 x float>, <16 x i1>, i32)
565
566define <16 x float> @vfma_vv_v16f32(<16 x float> %va, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) {
567; CHECK-LABEL: vfma_vv_v16f32:
568; CHECK:       # %bb.0:
569; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
570; CHECK-NEXT:    vfmadd.vv v12, v8, v16, v0.t
571; CHECK-NEXT:    vmv.v.v v8, v12
572; CHECK-NEXT:    ret
573  %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %va, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 %evl)
574  ret <16 x float> %v
575}
576
577define <16 x float> @vfma_vv_v16f32_unmasked(<16 x float> %va, <16 x float> %b, <16 x float> %c, i32 zeroext %evl) {
578; CHECK-LABEL: vfma_vv_v16f32_unmasked:
579; CHECK:       # %bb.0:
580; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
581; CHECK-NEXT:    vfmadd.vv v8, v12, v16
582; CHECK-NEXT:    ret
583  %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %va, <16 x float> %b, <16 x float> %c, <16 x i1> splat (i1 true), i32 %evl)
584  ret <16 x float> %v
585}
586
587define <16 x float> @vfma_vf_v16f32(<16 x float> %va, float %b, <16 x float> %vc, <16 x i1> %m, i32 zeroext %evl) {
588; CHECK-LABEL: vfma_vf_v16f32:
589; CHECK:       # %bb.0:
590; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
591; CHECK-NEXT:    vfmadd.vf v8, fa0, v12, v0.t
592; CHECK-NEXT:    ret
593  %elt.head = insertelement <16 x float> poison, float %b, i32 0
594  %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer
595  %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc, <16 x i1> %m, i32 %evl)
596  ret <16 x float> %v
597}
598
599define <16 x float> @vfma_vf_v16f32_unmasked(<16 x float> %va, float %b, <16 x float> %vc, i32 zeroext %evl) {
600; CHECK-LABEL: vfma_vf_v16f32_unmasked:
601; CHECK:       # %bb.0:
602; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
603; CHECK-NEXT:    vfmadd.vf v8, fa0, v12
604; CHECK-NEXT:    ret
605  %elt.head = insertelement <16 x float> poison, float %b, i32 0
606  %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer
607  %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc, <16 x i1> splat (i1 true), i32 %evl)
608  ret <16 x float> %v
609}
610
611declare <2 x double> @llvm.vp.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, <2 x i1>, i32)
612
613define <2 x double> @vfma_vv_v2f64(<2 x double> %va, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) {
614; CHECK-LABEL: vfma_vv_v2f64:
615; CHECK:       # %bb.0:
616; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
617; CHECK-NEXT:    vfmadd.vv v9, v8, v10, v0.t
618; CHECK-NEXT:    vmv.v.v v8, v9
619; CHECK-NEXT:    ret
620  %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %va, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 %evl)
621  ret <2 x double> %v
622}
623
624define <2 x double> @vfma_vv_v2f64_unmasked(<2 x double> %va, <2 x double> %b, <2 x double> %c, i32 zeroext %evl) {
625; CHECK-LABEL: vfma_vv_v2f64_unmasked:
626; CHECK:       # %bb.0:
627; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
628; CHECK-NEXT:    vfmadd.vv v8, v9, v10
629; CHECK-NEXT:    ret
630  %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %va, <2 x double> %b, <2 x double> %c, <2 x i1> splat (i1 true), i32 %evl)
631  ret <2 x double> %v
632}
633
634define <2 x double> @vfma_vf_v2f64(<2 x double> %va, double %b, <2 x double> %vc, <2 x i1> %m, i32 zeroext %evl) {
635; CHECK-LABEL: vfma_vf_v2f64:
636; CHECK:       # %bb.0:
637; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
638; CHECK-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
639; CHECK-NEXT:    ret
640  %elt.head = insertelement <2 x double> poison, double %b, i32 0
641  %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer
642  %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %va, <2 x double> %vb, <2 x double> %vc, <2 x i1> %m, i32 %evl)
643  ret <2 x double> %v
644}
645
646define <2 x double> @vfma_vf_v2f64_unmasked(<2 x double> %va, double %b, <2 x double> %vc, i32 zeroext %evl) {
647; CHECK-LABEL: vfma_vf_v2f64_unmasked:
648; CHECK:       # %bb.0:
649; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
650; CHECK-NEXT:    vfmadd.vf v8, fa0, v9
651; CHECK-NEXT:    ret
652  %elt.head = insertelement <2 x double> poison, double %b, i32 0
653  %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer
654  %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %va, <2 x double> %vb, <2 x double> %vc, <2 x i1> splat (i1 true), i32 %evl)
655  ret <2 x double> %v
656}
657
658declare <4 x double> @llvm.vp.fma.v4f64(<4 x double>, <4 x double>, <4 x double>, <4 x i1>, i32)
659
660define <4 x double> @vfma_vv_v4f64(<4 x double> %va, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) {
661; CHECK-LABEL: vfma_vv_v4f64:
662; CHECK:       # %bb.0:
663; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
664; CHECK-NEXT:    vfmadd.vv v10, v8, v12, v0.t
665; CHECK-NEXT:    vmv.v.v v8, v10
666; CHECK-NEXT:    ret
667  %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %va, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 %evl)
668  ret <4 x double> %v
669}
670
671define <4 x double> @vfma_vv_v4f64_unmasked(<4 x double> %va, <4 x double> %b, <4 x double> %c, i32 zeroext %evl) {
672; CHECK-LABEL: vfma_vv_v4f64_unmasked:
673; CHECK:       # %bb.0:
674; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
675; CHECK-NEXT:    vfmadd.vv v8, v10, v12
676; CHECK-NEXT:    ret
677  %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %va, <4 x double> %b, <4 x double> %c, <4 x i1> splat (i1 true), i32 %evl)
678  ret <4 x double> %v
679}
680
681define <4 x double> @vfma_vf_v4f64(<4 x double> %va, double %b, <4 x double> %vc, <4 x i1> %m, i32 zeroext %evl) {
682; CHECK-LABEL: vfma_vf_v4f64:
683; CHECK:       # %bb.0:
684; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
685; CHECK-NEXT:    vfmadd.vf v8, fa0, v10, v0.t
686; CHECK-NEXT:    ret
687  %elt.head = insertelement <4 x double> poison, double %b, i32 0
688  %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer
689  %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %va, <4 x double> %vb, <4 x double> %vc, <4 x i1> %m, i32 %evl)
690  ret <4 x double> %v
691}
692
693define <4 x double> @vfma_vf_v4f64_unmasked(<4 x double> %va, double %b, <4 x double> %vc, i32 zeroext %evl) {
694; CHECK-LABEL: vfma_vf_v4f64_unmasked:
695; CHECK:       # %bb.0:
696; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
697; CHECK-NEXT:    vfmadd.vf v8, fa0, v10
698; CHECK-NEXT:    ret
699  %elt.head = insertelement <4 x double> poison, double %b, i32 0
700  %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer
701  %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %va, <4 x double> %vb, <4 x double> %vc, <4 x i1> splat (i1 true), i32 %evl)
702  ret <4 x double> %v
703}
704
705declare <8 x double> @llvm.vp.fma.v8f64(<8 x double>, <8 x double>, <8 x double>, <8 x i1>, i32)
706
707define <8 x double> @vfma_vv_v8f64(<8 x double> %va, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) {
708; CHECK-LABEL: vfma_vv_v8f64:
709; CHECK:       # %bb.0:
710; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
711; CHECK-NEXT:    vfmadd.vv v12, v8, v16, v0.t
712; CHECK-NEXT:    vmv.v.v v8, v12
713; CHECK-NEXT:    ret
714  %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %va, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 %evl)
715  ret <8 x double> %v
716}
717
718define <8 x double> @vfma_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %b, <8 x double> %c, i32 zeroext %evl) {
719; CHECK-LABEL: vfma_vv_v8f64_unmasked:
720; CHECK:       # %bb.0:
721; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
722; CHECK-NEXT:    vfmadd.vv v8, v12, v16
723; CHECK-NEXT:    ret
724  %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %va, <8 x double> %b, <8 x double> %c, <8 x i1> splat (i1 true), i32 %evl)
725  ret <8 x double> %v
726}
727
728define <8 x double> @vfma_vf_v8f64(<8 x double> %va, double %b, <8 x double> %vc, <8 x i1> %m, i32 zeroext %evl) {
729; CHECK-LABEL: vfma_vf_v8f64:
730; CHECK:       # %bb.0:
731; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
732; CHECK-NEXT:    vfmadd.vf v8, fa0, v12, v0.t
733; CHECK-NEXT:    ret
734  %elt.head = insertelement <8 x double> poison, double %b, i32 0
735  %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
736  %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %va, <8 x double> %vb, <8 x double> %vc, <8 x i1> %m, i32 %evl)
737  ret <8 x double> %v
738}
739
740define <8 x double> @vfma_vf_v8f64_unmasked(<8 x double> %va, double %b, <8 x double> %vc, i32 zeroext %evl) {
741; CHECK-LABEL: vfma_vf_v8f64_unmasked:
742; CHECK:       # %bb.0:
743; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
744; CHECK-NEXT:    vfmadd.vf v8, fa0, v12
745; CHECK-NEXT:    ret
746  %elt.head = insertelement <8 x double> poison, double %b, i32 0
747  %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
748  %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %va, <8 x double> %vb, <8 x double> %vc, <8 x i1> splat (i1 true), i32 %evl)
749  ret <8 x double> %v
750}
751
752declare <15 x double> @llvm.vp.fma.v15f64(<15 x double>, <15 x double>, <15 x double>, <15 x i1>, i32)
753
754define <15 x double> @vfma_vv_v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> %m, i32 zeroext %evl) {
755; CHECK-LABEL: vfma_vv_v15f64:
756; CHECK:       # %bb.0:
757; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
758; CHECK-NEXT:    vle64.v v24, (a0)
759; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
760; CHECK-NEXT:    vfmadd.vv v16, v8, v24, v0.t
761; CHECK-NEXT:    vmv.v.v v8, v16
762; CHECK-NEXT:    ret
763  %v = call <15 x double> @llvm.vp.fma.v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> %m, i32 %evl)
764  ret <15 x double> %v
765}
766
767define <15 x double> @vfma_vv_v15f64_unmasked(<15 x double> %va, <15 x double> %b, <15 x double> %c, i32 zeroext %evl) {
768; CHECK-LABEL: vfma_vv_v15f64_unmasked:
769; CHECK:       # %bb.0:
770; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
771; CHECK-NEXT:    vle64.v v24, (a0)
772; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
773; CHECK-NEXT:    vfmadd.vv v8, v16, v24
774; CHECK-NEXT:    ret
775  %v = call <15 x double> @llvm.vp.fma.v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> splat (i1 true), i32 %evl)
776  ret <15 x double> %v
777}
778
779declare <16 x double> @llvm.vp.fma.v16f64(<16 x double>, <16 x double>, <16 x double>, <16 x i1>, i32)
780
781define <16 x double> @vfma_vv_v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> %m, i32 zeroext %evl) {
782; CHECK-LABEL: vfma_vv_v16f64:
783; CHECK:       # %bb.0:
784; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
785; CHECK-NEXT:    vle64.v v24, (a0)
786; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
787; CHECK-NEXT:    vfmadd.vv v16, v8, v24, v0.t
788; CHECK-NEXT:    vmv.v.v v8, v16
789; CHECK-NEXT:    ret
790  %v = call <16 x double> @llvm.vp.fma.v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> %m, i32 %evl)
791  ret <16 x double> %v
792}
793
794define <16 x double> @vfma_vv_v16f64_unmasked(<16 x double> %va, <16 x double> %b, <16 x double> %c, i32 zeroext %evl) {
795; CHECK-LABEL: vfma_vv_v16f64_unmasked:
796; CHECK:       # %bb.0:
797; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
798; CHECK-NEXT:    vle64.v v24, (a0)
799; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
800; CHECK-NEXT:    vfmadd.vv v8, v16, v24
801; CHECK-NEXT:    ret
802  %v = call <16 x double> @llvm.vp.fma.v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> splat (i1 true), i32 %evl)
803  ret <16 x double> %v
804}
805
806define <16 x double> @vfma_vf_v16f64(<16 x double> %va, double %b, <16 x double> %vc, <16 x i1> %m, i32 zeroext %evl) {
807; CHECK-LABEL: vfma_vf_v16f64:
808; CHECK:       # %bb.0:
809; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
810; CHECK-NEXT:    vfmadd.vf v8, fa0, v16, v0.t
811; CHECK-NEXT:    ret
812  %elt.head = insertelement <16 x double> poison, double %b, i32 0
813  %vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer
814  %v = call <16 x double> @llvm.vp.fma.v16f64(<16 x double> %va, <16 x double> %vb, <16 x double> %vc, <16 x i1> %m, i32 %evl)
815  ret <16 x double> %v
816}
817
818define <16 x double> @vfma_vf_v16f64_unmasked(<16 x double> %va, double %b, <16 x double> %vc, i32 zeroext %evl) {
819; CHECK-LABEL: vfma_vf_v16f64_unmasked:
820; CHECK:       # %bb.0:
821; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
822; CHECK-NEXT:    vfmadd.vf v8, fa0, v16
823; CHECK-NEXT:    ret
824  %elt.head = insertelement <16 x double> poison, double %b, i32 0
825  %vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer
826  %v = call <16 x double> @llvm.vp.fma.v16f64(<16 x double> %va, <16 x double> %vb, <16 x double> %vc, <16 x i1> splat (i1 true), i32 %evl)
827  ret <16 x double> %v
828}
829
830declare <32 x double> @llvm.vp.fma.v32f64(<32 x double>, <32 x double>, <32 x double>, <32 x i1>, i32)
831
832define <32 x double> @vfma_vv_v32f64(<32 x double> %va, <32 x double> %b, <32 x double> %c, <32 x i1> %m, i32 zeroext %evl) {
833; CHECK-LABEL: vfma_vv_v32f64:
834; CHECK:       # %bb.0:
835; CHECK-NEXT:    addi sp, sp, -16
836; CHECK-NEXT:    .cfi_def_cfa_offset 16
837; CHECK-NEXT:    csrr a1, vlenb
838; CHECK-NEXT:    slli a1, a1, 5
839; CHECK-NEXT:    sub sp, sp, a1
840; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
841; CHECK-NEXT:    csrr a1, vlenb
842; CHECK-NEXT:    li a3, 24
843; CHECK-NEXT:    mul a1, a1, a3
844; CHECK-NEXT:    add a1, sp, a1
845; CHECK-NEXT:    addi a1, a1, 16
846; CHECK-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
847; CHECK-NEXT:    csrr a1, vlenb
848; CHECK-NEXT:    slli a1, a1, 4
849; CHECK-NEXT:    add a1, sp, a1
850; CHECK-NEXT:    addi a1, a1, 16
851; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
852; CHECK-NEXT:    addi a1, a2, 128
853; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
854; CHECK-NEXT:    vle64.v v24, (a2)
855; CHECK-NEXT:    addi a2, a0, 128
856; CHECK-NEXT:    vle64.v v8, (a1)
857; CHECK-NEXT:    csrr a1, vlenb
858; CHECK-NEXT:    slli a1, a1, 3
859; CHECK-NEXT:    add a1, sp, a1
860; CHECK-NEXT:    addi a1, a1, 16
861; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
862; CHECK-NEXT:    vle64.v v8, (a2)
863; CHECK-NEXT:    addi a1, sp, 16
864; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
865; CHECK-NEXT:    vle64.v v8, (a0)
866; CHECK-NEXT:    li a1, 16
867; CHECK-NEXT:    mv a0, a4
868; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
869; CHECK-NEXT:    vslidedown.vi v7, v0, 2
870; CHECK-NEXT:    bltu a4, a1, .LBB50_2
871; CHECK-NEXT:  # %bb.1:
872; CHECK-NEXT:    li a0, 16
873; CHECK-NEXT:  .LBB50_2:
874; CHECK-NEXT:    csrr a1, vlenb
875; CHECK-NEXT:    slli a1, a1, 4
876; CHECK-NEXT:    add a1, sp, a1
877; CHECK-NEXT:    addi a1, a1, 16
878; CHECK-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
879; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
880; CHECK-NEXT:    vfmadd.vv v8, v16, v24, v0.t
881; CHECK-NEXT:    csrr a0, vlenb
882; CHECK-NEXT:    slli a0, a0, 4
883; CHECK-NEXT:    add a0, sp, a0
884; CHECK-NEXT:    addi a0, a0, 16
885; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
886; CHECK-NEXT:    addi a0, a4, -16
887; CHECK-NEXT:    sltu a1, a4, a0
888; CHECK-NEXT:    addi a1, a1, -1
889; CHECK-NEXT:    and a0, a1, a0
890; CHECK-NEXT:    vmv1r.v v0, v7
891; CHECK-NEXT:    csrr a1, vlenb
892; CHECK-NEXT:    li a2, 24
893; CHECK-NEXT:    mul a1, a1, a2
894; CHECK-NEXT:    add a1, sp, a1
895; CHECK-NEXT:    addi a1, a1, 16
896; CHECK-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
897; CHECK-NEXT:    csrr a1, vlenb
898; CHECK-NEXT:    slli a1, a1, 3
899; CHECK-NEXT:    add a1, sp, a1
900; CHECK-NEXT:    addi a1, a1, 16
901; CHECK-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
902; CHECK-NEXT:    addi a1, sp, 16
903; CHECK-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
904; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
905; CHECK-NEXT:    vfmadd.vv v8, v16, v24, v0.t
906; CHECK-NEXT:    vmv.v.v v16, v8
907; CHECK-NEXT:    csrr a0, vlenb
908; CHECK-NEXT:    slli a0, a0, 4
909; CHECK-NEXT:    add a0, sp, a0
910; CHECK-NEXT:    addi a0, a0, 16
911; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
912; CHECK-NEXT:    csrr a0, vlenb
913; CHECK-NEXT:    slli a0, a0, 5
914; CHECK-NEXT:    add sp, sp, a0
915; CHECK-NEXT:    .cfi_def_cfa sp, 16
916; CHECK-NEXT:    addi sp, sp, 16
917; CHECK-NEXT:    .cfi_def_cfa_offset 0
918; CHECK-NEXT:    ret
919  %v = call <32 x double> @llvm.vp.fma.v32f64(<32 x double> %va, <32 x double> %b, <32 x double> %c, <32 x i1> %m, i32 %evl)
920  ret <32 x double> %v
921}
922
923define <32 x double> @vfma_vv_v32f64_unmasked(<32 x double> %va, <32 x double> %b, <32 x double> %c, i32 zeroext %evl) {
924; CHECK-LABEL: vfma_vv_v32f64_unmasked:
925; CHECK:       # %bb.0:
926; CHECK-NEXT:    addi sp, sp, -16
927; CHECK-NEXT:    .cfi_def_cfa_offset 16
928; CHECK-NEXT:    csrr a1, vlenb
929; CHECK-NEXT:    li a3, 24
930; CHECK-NEXT:    mul a1, a1, a3
931; CHECK-NEXT:    sub sp, sp, a1
932; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
933; CHECK-NEXT:    csrr a1, vlenb
934; CHECK-NEXT:    slli a1, a1, 4
935; CHECK-NEXT:    add a1, sp, a1
936; CHECK-NEXT:    addi a1, a1, 16
937; CHECK-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
938; CHECK-NEXT:    csrr a1, vlenb
939; CHECK-NEXT:    slli a1, a1, 3
940; CHECK-NEXT:    add a1, sp, a1
941; CHECK-NEXT:    addi a1, a1, 16
942; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
943; CHECK-NEXT:    addi a1, a2, 128
944; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
945; CHECK-NEXT:    vle64.v v16, (a2)
946; CHECK-NEXT:    addi a2, a0, 128
947; CHECK-NEXT:    vle64.v v8, (a1)
948; CHECK-NEXT:    addi a1, sp, 16
949; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
950; CHECK-NEXT:    vle64.v v24, (a2)
951; CHECK-NEXT:    vle64.v v0, (a0)
952; CHECK-NEXT:    li a1, 16
953; CHECK-NEXT:    mv a0, a4
954; CHECK-NEXT:    bltu a4, a1, .LBB51_2
955; CHECK-NEXT:  # %bb.1:
956; CHECK-NEXT:    li a0, 16
957; CHECK-NEXT:  .LBB51_2:
958; CHECK-NEXT:    csrr a1, vlenb
959; CHECK-NEXT:    slli a1, a1, 3
960; CHECK-NEXT:    add a1, sp, a1
961; CHECK-NEXT:    addi a1, a1, 16
962; CHECK-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
963; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
964; CHECK-NEXT:    vfmadd.vv v0, v8, v16
965; CHECK-NEXT:    addi a0, a4, -16
966; CHECK-NEXT:    sltu a1, a4, a0
967; CHECK-NEXT:    addi a1, a1, -1
968; CHECK-NEXT:    and a0, a1, a0
969; CHECK-NEXT:    csrr a1, vlenb
970; CHECK-NEXT:    slli a1, a1, 4
971; CHECK-NEXT:    add a1, sp, a1
972; CHECK-NEXT:    addi a1, a1, 16
973; CHECK-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
974; CHECK-NEXT:    addi a1, sp, 16
975; CHECK-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
976; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
977; CHECK-NEXT:    vfmadd.vv v24, v16, v8
978; CHECK-NEXT:    vmv8r.v v8, v0
979; CHECK-NEXT:    vmv.v.v v16, v24
980; CHECK-NEXT:    csrr a0, vlenb
981; CHECK-NEXT:    li a1, 24
982; CHECK-NEXT:    mul a0, a0, a1
983; CHECK-NEXT:    add sp, sp, a0
984; CHECK-NEXT:    .cfi_def_cfa sp, 16
985; CHECK-NEXT:    addi sp, sp, 16
986; CHECK-NEXT:    .cfi_def_cfa_offset 0
987; CHECK-NEXT:    ret
988  %v = call <32 x double> @llvm.vp.fma.v32f64(<32 x double> %va, <32 x double> %b, <32 x double> %c, <32 x i1> splat (i1 true), i32 %evl)
989  ret <32 x double> %v
990}
991