xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v,+m -target-abi=ilp32d \
3; RUN:   -verify-machineinstrs < %s | FileCheck %s
4; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v,+m -target-abi=lp64d \
5; RUN:   -verify-machineinstrs < %s | FileCheck %s
6
7declare <2 x half> @llvm.vp.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>, <2 x i1>, i32)
8
9define <2 x half> @vfma_vv_v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) {
10; CHECK-LABEL: vfma_vv_v2f16:
11; CHECK:       # %bb.0:
12; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
13; CHECK-NEXT:    vfmadd.vv v9, v8, v10, v0.t
14; CHECK-NEXT:    vmv1r.v v8, v9
15; CHECK-NEXT:    ret
16  %v = call <2 x half> @llvm.vp.fmuladd.v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 %evl)
17  ret <2 x half> %v
18}
19
20define <2 x half> @vfma_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %b, <2 x half> %c, i32 zeroext %evl) {
21; CHECK-LABEL: vfma_vv_v2f16_unmasked:
22; CHECK:       # %bb.0:
23; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
24; CHECK-NEXT:    vfmadd.vv v8, v9, v10
25; CHECK-NEXT:    ret
26  %v = call <2 x half> @llvm.vp.fmuladd.v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> splat (i1 true), i32 %evl)
27  ret <2 x half> %v
28}
29
30define <2 x half> @vfma_vf_v2f16(<2 x half> %va, half %b, <2 x half> %vc, <2 x i1> %m, i32 zeroext %evl) {
31; CHECK-LABEL: vfma_vf_v2f16:
32; CHECK:       # %bb.0:
33; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
34; CHECK-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
35; CHECK-NEXT:    ret
36  %elt.head = insertelement <2 x half> poison, half %b, i32 0
37  %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
38  %v = call <2 x half> @llvm.vp.fmuladd.v2f16(<2 x half> %va, <2 x half> %vb, <2 x half> %vc, <2 x i1> %m, i32 %evl)
39  ret <2 x half> %v
40}
41
42define <2 x half> @vfma_vf_v2f16_unmasked(<2 x half> %va, half %b, <2 x half> %vc, i32 zeroext %evl) {
43; CHECK-LABEL: vfma_vf_v2f16_unmasked:
44; CHECK:       # %bb.0:
45; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
46; CHECK-NEXT:    vfmadd.vf v8, fa0, v9
47; CHECK-NEXT:    ret
48  %elt.head = insertelement <2 x half> poison, half %b, i32 0
49  %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
50  %v = call <2 x half> @llvm.vp.fmuladd.v2f16(<2 x half> %va, <2 x half> %vb, <2 x half> %vc, <2 x i1> splat (i1 true), i32 %evl)
51  ret <2 x half> %v
52}
53
54declare <4 x half> @llvm.vp.fmuladd.v4f16(<4 x half>, <4 x half>, <4 x half>, <4 x i1>, i32)
55
56define <4 x half> @vfma_vv_v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) {
57; CHECK-LABEL: vfma_vv_v4f16:
58; CHECK:       # %bb.0:
59; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
60; CHECK-NEXT:    vfmadd.vv v9, v8, v10, v0.t
61; CHECK-NEXT:    vmv1r.v v8, v9
62; CHECK-NEXT:    ret
63  %v = call <4 x half> @llvm.vp.fmuladd.v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 %evl)
64  ret <4 x half> %v
65}
66
67define <4 x half> @vfma_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %b, <4 x half> %c, i32 zeroext %evl) {
68; CHECK-LABEL: vfma_vv_v4f16_unmasked:
69; CHECK:       # %bb.0:
70; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
71; CHECK-NEXT:    vfmadd.vv v8, v9, v10
72; CHECK-NEXT:    ret
73  %v = call <4 x half> @llvm.vp.fmuladd.v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> splat (i1 true), i32 %evl)
74  ret <4 x half> %v
75}
76
77define <4 x half> @vfma_vf_v4f16(<4 x half> %va, half %b, <4 x half> %vc, <4 x i1> %m, i32 zeroext %evl) {
78; CHECK-LABEL: vfma_vf_v4f16:
79; CHECK:       # %bb.0:
80; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
81; CHECK-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
82; CHECK-NEXT:    ret
83  %elt.head = insertelement <4 x half> poison, half %b, i32 0
84  %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
85  %v = call <4 x half> @llvm.vp.fmuladd.v4f16(<4 x half> %va, <4 x half> %vb, <4 x half> %vc, <4 x i1> %m, i32 %evl)
86  ret <4 x half> %v
87}
88
89define <4 x half> @vfma_vf_v4f16_unmasked(<4 x half> %va, half %b, <4 x half> %vc, i32 zeroext %evl) {
90; CHECK-LABEL: vfma_vf_v4f16_unmasked:
91; CHECK:       # %bb.0:
92; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
93; CHECK-NEXT:    vfmadd.vf v8, fa0, v9
94; CHECK-NEXT:    ret
95  %elt.head = insertelement <4 x half> poison, half %b, i32 0
96  %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
97  %v = call <4 x half> @llvm.vp.fmuladd.v4f16(<4 x half> %va, <4 x half> %vb, <4 x half> %vc, <4 x i1> splat (i1 true), i32 %evl)
98  ret <4 x half> %v
99}
100
101declare <8 x half> @llvm.vp.fmuladd.v8f16(<8 x half>, <8 x half>, <8 x half>, <8 x i1>, i32)
102
103define <8 x half> @vfma_vv_v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) {
104; CHECK-LABEL: vfma_vv_v8f16:
105; CHECK:       # %bb.0:
106; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
107; CHECK-NEXT:    vfmadd.vv v9, v8, v10, v0.t
108; CHECK-NEXT:    vmv.v.v v8, v9
109; CHECK-NEXT:    ret
110  %v = call <8 x half> @llvm.vp.fmuladd.v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 %evl)
111  ret <8 x half> %v
112}
113
114define <8 x half> @vfma_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %b, <8 x half> %c, i32 zeroext %evl) {
115; CHECK-LABEL: vfma_vv_v8f16_unmasked:
116; CHECK:       # %bb.0:
117; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
118; CHECK-NEXT:    vfmadd.vv v8, v9, v10
119; CHECK-NEXT:    ret
120  %v = call <8 x half> @llvm.vp.fmuladd.v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> splat (i1 true), i32 %evl)
121  ret <8 x half> %v
122}
123
124define <8 x half> @vfma_vf_v8f16(<8 x half> %va, half %b, <8 x half> %vc, <8 x i1> %m, i32 zeroext %evl) {
125; CHECK-LABEL: vfma_vf_v8f16:
126; CHECK:       # %bb.0:
127; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
128; CHECK-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
129; CHECK-NEXT:    ret
130  %elt.head = insertelement <8 x half> poison, half %b, i32 0
131  %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
132  %v = call <8 x half> @llvm.vp.fmuladd.v8f16(<8 x half> %va, <8 x half> %vb, <8 x half> %vc, <8 x i1> %m, i32 %evl)
133  ret <8 x half> %v
134}
135
136define <8 x half> @vfma_vf_v8f16_unmasked(<8 x half> %va, half %b, <8 x half> %vc, i32 zeroext %evl) {
137; CHECK-LABEL: vfma_vf_v8f16_unmasked:
138; CHECK:       # %bb.0:
139; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
140; CHECK-NEXT:    vfmadd.vf v8, fa0, v9
141; CHECK-NEXT:    ret
142  %elt.head = insertelement <8 x half> poison, half %b, i32 0
143  %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
144  %v = call <8 x half> @llvm.vp.fmuladd.v8f16(<8 x half> %va, <8 x half> %vb, <8 x half> %vc, <8 x i1> splat (i1 true), i32 %evl)
145  ret <8 x half> %v
146}
147
148declare <16 x half> @llvm.vp.fmuladd.v16f16(<16 x half>, <16 x half>, <16 x half>, <16 x i1>, i32)
149
150define <16 x half> @vfma_vv_v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) {
151; CHECK-LABEL: vfma_vv_v16f16:
152; CHECK:       # %bb.0:
153; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
154; CHECK-NEXT:    vfmadd.vv v10, v8, v12, v0.t
155; CHECK-NEXT:    vmv.v.v v8, v10
156; CHECK-NEXT:    ret
157  %v = call <16 x half> @llvm.vp.fmuladd.v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 %evl)
158  ret <16 x half> %v
159}
160
161define <16 x half> @vfma_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %b, <16 x half> %c, i32 zeroext %evl) {
162; CHECK-LABEL: vfma_vv_v16f16_unmasked:
163; CHECK:       # %bb.0:
164; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
165; CHECK-NEXT:    vfmadd.vv v8, v10, v12
166; CHECK-NEXT:    ret
167  %v = call <16 x half> @llvm.vp.fmuladd.v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> splat (i1 true), i32 %evl)
168  ret <16 x half> %v
169}
170
171define <16 x half> @vfma_vf_v16f16(<16 x half> %va, half %b, <16 x half> %vc, <16 x i1> %m, i32 zeroext %evl) {
172; CHECK-LABEL: vfma_vf_v16f16:
173; CHECK:       # %bb.0:
174; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
175; CHECK-NEXT:    vfmadd.vf v8, fa0, v10, v0.t
176; CHECK-NEXT:    ret
177  %elt.head = insertelement <16 x half> poison, half %b, i32 0
178  %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
179  %v = call <16 x half> @llvm.vp.fmuladd.v16f16(<16 x half> %va, <16 x half> %vb, <16 x half> %vc, <16 x i1> %m, i32 %evl)
180  ret <16 x half> %v
181}
182
183define <16 x half> @vfma_vf_v16f16_unmasked(<16 x half> %va, half %b, <16 x half> %vc, i32 zeroext %evl) {
184; CHECK-LABEL: vfma_vf_v16f16_unmasked:
185; CHECK:       # %bb.0:
186; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
187; CHECK-NEXT:    vfmadd.vf v8, fa0, v10
188; CHECK-NEXT:    ret
189  %elt.head = insertelement <16 x half> poison, half %b, i32 0
190  %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
191  %v = call <16 x half> @llvm.vp.fmuladd.v16f16(<16 x half> %va, <16 x half> %vb, <16 x half> %vc, <16 x i1> splat (i1 true), i32 %evl)
192  ret <16 x half> %v
193}
194
195declare <2 x float> @llvm.vp.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>, <2 x i1>, i32)
196
197define <2 x float> @vfma_vv_v2f32(<2 x float> %va, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) {
198; CHECK-LABEL: vfma_vv_v2f32:
199; CHECK:       # %bb.0:
200; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
201; CHECK-NEXT:    vfmadd.vv v9, v8, v10, v0.t
202; CHECK-NEXT:    vmv1r.v v8, v9
203; CHECK-NEXT:    ret
204  %v = call <2 x float> @llvm.vp.fmuladd.v2f32(<2 x float> %va, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 %evl)
205  ret <2 x float> %v
206}
207
208define <2 x float> @vfma_vv_v2f32_unmasked(<2 x float> %va, <2 x float> %b, <2 x float> %c, i32 zeroext %evl) {
209; CHECK-LABEL: vfma_vv_v2f32_unmasked:
210; CHECK:       # %bb.0:
211; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
212; CHECK-NEXT:    vfmadd.vv v8, v9, v10
213; CHECK-NEXT:    ret
214  %v = call <2 x float> @llvm.vp.fmuladd.v2f32(<2 x float> %va, <2 x float> %b, <2 x float> %c, <2 x i1> splat (i1 true), i32 %evl)
215  ret <2 x float> %v
216}
217
218define <2 x float> @vfma_vf_v2f32(<2 x float> %va, float %b, <2 x float> %vc, <2 x i1> %m, i32 zeroext %evl) {
219; CHECK-LABEL: vfma_vf_v2f32:
220; CHECK:       # %bb.0:
221; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
222; CHECK-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
223; CHECK-NEXT:    ret
224  %elt.head = insertelement <2 x float> poison, float %b, i32 0
225  %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer
226  %v = call <2 x float> @llvm.vp.fmuladd.v2f32(<2 x float> %va, <2 x float> %vb, <2 x float> %vc, <2 x i1> %m, i32 %evl)
227  ret <2 x float> %v
228}
229
230define <2 x float> @vfma_vf_v2f32_unmasked(<2 x float> %va, float %b, <2 x float> %vc, i32 zeroext %evl) {
231; CHECK-LABEL: vfma_vf_v2f32_unmasked:
232; CHECK:       # %bb.0:
233; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
234; CHECK-NEXT:    vfmadd.vf v8, fa0, v9
235; CHECK-NEXT:    ret
236  %elt.head = insertelement <2 x float> poison, float %b, i32 0
237  %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer
238  %v = call <2 x float> @llvm.vp.fmuladd.v2f32(<2 x float> %va, <2 x float> %vb, <2 x float> %vc, <2 x i1> splat (i1 true), i32 %evl)
239  ret <2 x float> %v
240}
241
242declare <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>, <4 x i1>, i32)
243
244define <4 x float> @vfma_vv_v4f32(<4 x float> %va, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) {
245; CHECK-LABEL: vfma_vv_v4f32:
246; CHECK:       # %bb.0:
247; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
248; CHECK-NEXT:    vfmadd.vv v9, v8, v10, v0.t
249; CHECK-NEXT:    vmv.v.v v8, v9
250; CHECK-NEXT:    ret
251  %v = call <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float> %va, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 %evl)
252  ret <4 x float> %v
253}
254
255define <4 x float> @vfma_vv_v4f32_unmasked(<4 x float> %va, <4 x float> %b, <4 x float> %c, i32 zeroext %evl) {
256; CHECK-LABEL: vfma_vv_v4f32_unmasked:
257; CHECK:       # %bb.0:
258; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
259; CHECK-NEXT:    vfmadd.vv v8, v9, v10
260; CHECK-NEXT:    ret
261  %v = call <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float> %va, <4 x float> %b, <4 x float> %c, <4 x i1> splat (i1 true), i32 %evl)
262  ret <4 x float> %v
263}
264
265define <4 x float> @vfma_vf_v4f32(<4 x float> %va, float %b, <4 x float> %vc, <4 x i1> %m, i32 zeroext %evl) {
266; CHECK-LABEL: vfma_vf_v4f32:
267; CHECK:       # %bb.0:
268; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
269; CHECK-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
270; CHECK-NEXT:    ret
271  %elt.head = insertelement <4 x float> poison, float %b, i32 0
272  %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer
273  %v = call <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float> %va, <4 x float> %vb, <4 x float> %vc, <4 x i1> %m, i32 %evl)
274  ret <4 x float> %v
275}
276
277define <4 x float> @vfma_vf_v4f32_unmasked(<4 x float> %va, float %b, <4 x float> %vc, i32 zeroext %evl) {
278; CHECK-LABEL: vfma_vf_v4f32_unmasked:
279; CHECK:       # %bb.0:
280; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
281; CHECK-NEXT:    vfmadd.vf v8, fa0, v9
282; CHECK-NEXT:    ret
283  %elt.head = insertelement <4 x float> poison, float %b, i32 0
284  %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer
285  %v = call <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float> %va, <4 x float> %vb, <4 x float> %vc, <4 x i1> splat (i1 true), i32 %evl)
286  ret <4 x float> %v
287}
288
289declare <8 x float> @llvm.vp.fmuladd.v8f32(<8 x float>, <8 x float>, <8 x float>, <8 x i1>, i32)
290
291define <8 x float> @vfma_vv_v8f32(<8 x float> %va, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) {
292; CHECK-LABEL: vfma_vv_v8f32:
293; CHECK:       # %bb.0:
294; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
295; CHECK-NEXT:    vfmadd.vv v10, v8, v12, v0.t
296; CHECK-NEXT:    vmv.v.v v8, v10
297; CHECK-NEXT:    ret
298  %v = call <8 x float> @llvm.vp.fmuladd.v8f32(<8 x float> %va, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 %evl)
299  ret <8 x float> %v
300}
301
302define <8 x float> @vfma_vv_v8f32_unmasked(<8 x float> %va, <8 x float> %b, <8 x float> %c, i32 zeroext %evl) {
303; CHECK-LABEL: vfma_vv_v8f32_unmasked:
304; CHECK:       # %bb.0:
305; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
306; CHECK-NEXT:    vfmadd.vv v8, v10, v12
307; CHECK-NEXT:    ret
308  %v = call <8 x float> @llvm.vp.fmuladd.v8f32(<8 x float> %va, <8 x float> %b, <8 x float> %c, <8 x i1> splat (i1 true), i32 %evl)
309  ret <8 x float> %v
310}
311
312define <8 x float> @vfma_vf_v8f32(<8 x float> %va, float %b, <8 x float> %vc, <8 x i1> %m, i32 zeroext %evl) {
313; CHECK-LABEL: vfma_vf_v8f32:
314; CHECK:       # %bb.0:
315; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
316; CHECK-NEXT:    vfmadd.vf v8, fa0, v10, v0.t
317; CHECK-NEXT:    ret
318  %elt.head = insertelement <8 x float> poison, float %b, i32 0
319  %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer
320  %v = call <8 x float> @llvm.vp.fmuladd.v8f32(<8 x float> %va, <8 x float> %vb, <8 x float> %vc, <8 x i1> %m, i32 %evl)
321  ret <8 x float> %v
322}
323
324define <8 x float> @vfma_vf_v8f32_unmasked(<8 x float> %va, float %b, <8 x float> %vc, i32 zeroext %evl) {
325; CHECK-LABEL: vfma_vf_v8f32_unmasked:
326; CHECK:       # %bb.0:
327; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
328; CHECK-NEXT:    vfmadd.vf v8, fa0, v10
329; CHECK-NEXT:    ret
330  %elt.head = insertelement <8 x float> poison, float %b, i32 0
331  %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer
332  %v = call <8 x float> @llvm.vp.fmuladd.v8f32(<8 x float> %va, <8 x float> %vb, <8 x float> %vc, <8 x i1> splat (i1 true), i32 %evl)
333  ret <8 x float> %v
334}
335
336declare <16 x float> @llvm.vp.fmuladd.v16f32(<16 x float>, <16 x float>, <16 x float>, <16 x i1>, i32)
337
338define <16 x float> @vfma_vv_v16f32(<16 x float> %va, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) {
339; CHECK-LABEL: vfma_vv_v16f32:
340; CHECK:       # %bb.0:
341; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
342; CHECK-NEXT:    vfmadd.vv v12, v8, v16, v0.t
343; CHECK-NEXT:    vmv.v.v v8, v12
344; CHECK-NEXT:    ret
345  %v = call <16 x float> @llvm.vp.fmuladd.v16f32(<16 x float> %va, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 %evl)
346  ret <16 x float> %v
347}
348
349define <16 x float> @vfma_vv_v16f32_unmasked(<16 x float> %va, <16 x float> %b, <16 x float> %c, i32 zeroext %evl) {
350; CHECK-LABEL: vfma_vv_v16f32_unmasked:
351; CHECK:       # %bb.0:
352; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
353; CHECK-NEXT:    vfmadd.vv v8, v12, v16
354; CHECK-NEXT:    ret
355  %v = call <16 x float> @llvm.vp.fmuladd.v16f32(<16 x float> %va, <16 x float> %b, <16 x float> %c, <16 x i1> splat (i1 true), i32 %evl)
356  ret <16 x float> %v
357}
358
359define <16 x float> @vfma_vf_v16f32(<16 x float> %va, float %b, <16 x float> %vc, <16 x i1> %m, i32 zeroext %evl) {
360; CHECK-LABEL: vfma_vf_v16f32:
361; CHECK:       # %bb.0:
362; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
363; CHECK-NEXT:    vfmadd.vf v8, fa0, v12, v0.t
364; CHECK-NEXT:    ret
365  %elt.head = insertelement <16 x float> poison, float %b, i32 0
366  %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer
367  %v = call <16 x float> @llvm.vp.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc, <16 x i1> %m, i32 %evl)
368  ret <16 x float> %v
369}
370
371define <16 x float> @vfma_vf_v16f32_unmasked(<16 x float> %va, float %b, <16 x float> %vc, i32 zeroext %evl) {
372; CHECK-LABEL: vfma_vf_v16f32_unmasked:
373; CHECK:       # %bb.0:
374; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
375; CHECK-NEXT:    vfmadd.vf v8, fa0, v12
376; CHECK-NEXT:    ret
377  %elt.head = insertelement <16 x float> poison, float %b, i32 0
378  %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer
379  %v = call <16 x float> @llvm.vp.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc, <16 x i1> splat (i1 true), i32 %evl)
380  ret <16 x float> %v
381}
382
383declare <2 x double> @llvm.vp.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>, <2 x i1>, i32)
384
385define <2 x double> @vfma_vv_v2f64(<2 x double> %va, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) {
386; CHECK-LABEL: vfma_vv_v2f64:
387; CHECK:       # %bb.0:
388; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
389; CHECK-NEXT:    vfmadd.vv v9, v8, v10, v0.t
390; CHECK-NEXT:    vmv.v.v v8, v9
391; CHECK-NEXT:    ret
392  %v = call <2 x double> @llvm.vp.fmuladd.v2f64(<2 x double> %va, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 %evl)
393  ret <2 x double> %v
394}
395
396define <2 x double> @vfma_vv_v2f64_unmasked(<2 x double> %va, <2 x double> %b, <2 x double> %c, i32 zeroext %evl) {
397; CHECK-LABEL: vfma_vv_v2f64_unmasked:
398; CHECK:       # %bb.0:
399; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
400; CHECK-NEXT:    vfmadd.vv v8, v9, v10
401; CHECK-NEXT:    ret
402  %v = call <2 x double> @llvm.vp.fmuladd.v2f64(<2 x double> %va, <2 x double> %b, <2 x double> %c, <2 x i1> splat (i1 true), i32 %evl)
403  ret <2 x double> %v
404}
405
406define <2 x double> @vfma_vf_v2f64(<2 x double> %va, double %b, <2 x double> %vc, <2 x i1> %m, i32 zeroext %evl) {
407; CHECK-LABEL: vfma_vf_v2f64:
408; CHECK:       # %bb.0:
409; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
410; CHECK-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
411; CHECK-NEXT:    ret
412  %elt.head = insertelement <2 x double> poison, double %b, i32 0
413  %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer
414  %v = call <2 x double> @llvm.vp.fmuladd.v2f64(<2 x double> %va, <2 x double> %vb, <2 x double> %vc, <2 x i1> %m, i32 %evl)
415  ret <2 x double> %v
416}
417
418define <2 x double> @vfma_vf_v2f64_unmasked(<2 x double> %va, double %b, <2 x double> %vc, i32 zeroext %evl) {
419; CHECK-LABEL: vfma_vf_v2f64_unmasked:
420; CHECK:       # %bb.0:
421; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
422; CHECK-NEXT:    vfmadd.vf v8, fa0, v9
423; CHECK-NEXT:    ret
424  %elt.head = insertelement <2 x double> poison, double %b, i32 0
425  %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer
426  %v = call <2 x double> @llvm.vp.fmuladd.v2f64(<2 x double> %va, <2 x double> %vb, <2 x double> %vc, <2 x i1> splat (i1 true), i32 %evl)
427  ret <2 x double> %v
428}
429
430declare <4 x double> @llvm.vp.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>, <4 x i1>, i32)
431
432define <4 x double> @vfma_vv_v4f64(<4 x double> %va, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) {
433; CHECK-LABEL: vfma_vv_v4f64:
434; CHECK:       # %bb.0:
435; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
436; CHECK-NEXT:    vfmadd.vv v10, v8, v12, v0.t
437; CHECK-NEXT:    vmv.v.v v8, v10
438; CHECK-NEXT:    ret
439  %v = call <4 x double> @llvm.vp.fmuladd.v4f64(<4 x double> %va, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 %evl)
440  ret <4 x double> %v
441}
442
443define <4 x double> @vfma_vv_v4f64_unmasked(<4 x double> %va, <4 x double> %b, <4 x double> %c, i32 zeroext %evl) {
444; CHECK-LABEL: vfma_vv_v4f64_unmasked:
445; CHECK:       # %bb.0:
446; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
447; CHECK-NEXT:    vfmadd.vv v8, v10, v12
448; CHECK-NEXT:    ret
449  %v = call <4 x double> @llvm.vp.fmuladd.v4f64(<4 x double> %va, <4 x double> %b, <4 x double> %c, <4 x i1> splat (i1 true), i32 %evl)
450  ret <4 x double> %v
451}
452
453define <4 x double> @vfma_vf_v4f64(<4 x double> %va, double %b, <4 x double> %vc, <4 x i1> %m, i32 zeroext %evl) {
454; CHECK-LABEL: vfma_vf_v4f64:
455; CHECK:       # %bb.0:
456; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
457; CHECK-NEXT:    vfmadd.vf v8, fa0, v10, v0.t
458; CHECK-NEXT:    ret
459  %elt.head = insertelement <4 x double> poison, double %b, i32 0
460  %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer
461  %v = call <4 x double> @llvm.vp.fmuladd.v4f64(<4 x double> %va, <4 x double> %vb, <4 x double> %vc, <4 x i1> %m, i32 %evl)
462  ret <4 x double> %v
463}
464
465define <4 x double> @vfma_vf_v4f64_unmasked(<4 x double> %va, double %b, <4 x double> %vc, i32 zeroext %evl) {
466; CHECK-LABEL: vfma_vf_v4f64_unmasked:
467; CHECK:       # %bb.0:
468; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
469; CHECK-NEXT:    vfmadd.vf v8, fa0, v10
470; CHECK-NEXT:    ret
471  %elt.head = insertelement <4 x double> poison, double %b, i32 0
472  %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer
473  %v = call <4 x double> @llvm.vp.fmuladd.v4f64(<4 x double> %va, <4 x double> %vb, <4 x double> %vc, <4 x i1> splat (i1 true), i32 %evl)
474  ret <4 x double> %v
475}
476
477declare <8 x double> @llvm.vp.fmuladd.v8f64(<8 x double>, <8 x double>, <8 x double>, <8 x i1>, i32)
478
479define <8 x double> @vfma_vv_v8f64(<8 x double> %va, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) {
480; CHECK-LABEL: vfma_vv_v8f64:
481; CHECK:       # %bb.0:
482; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
483; CHECK-NEXT:    vfmadd.vv v12, v8, v16, v0.t
484; CHECK-NEXT:    vmv.v.v v8, v12
485; CHECK-NEXT:    ret
486  %v = call <8 x double> @llvm.vp.fmuladd.v8f64(<8 x double> %va, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 %evl)
487  ret <8 x double> %v
488}
489
490define <8 x double> @vfma_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %b, <8 x double> %c, i32 zeroext %evl) {
491; CHECK-LABEL: vfma_vv_v8f64_unmasked:
492; CHECK:       # %bb.0:
493; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
494; CHECK-NEXT:    vfmadd.vv v8, v12, v16
495; CHECK-NEXT:    ret
496  %v = call <8 x double> @llvm.vp.fmuladd.v8f64(<8 x double> %va, <8 x double> %b, <8 x double> %c, <8 x i1> splat (i1 true), i32 %evl)
497  ret <8 x double> %v
498}
499
500define <8 x double> @vfma_vf_v8f64(<8 x double> %va, double %b, <8 x double> %vc, <8 x i1> %m, i32 zeroext %evl) {
501; CHECK-LABEL: vfma_vf_v8f64:
502; CHECK:       # %bb.0:
503; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
504; CHECK-NEXT:    vfmadd.vf v8, fa0, v12, v0.t
505; CHECK-NEXT:    ret
506  %elt.head = insertelement <8 x double> poison, double %b, i32 0
507  %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
508  %v = call <8 x double> @llvm.vp.fmuladd.v8f64(<8 x double> %va, <8 x double> %vb, <8 x double> %vc, <8 x i1> %m, i32 %evl)
509  ret <8 x double> %v
510}
511
512define <8 x double> @vfma_vf_v8f64_unmasked(<8 x double> %va, double %b, <8 x double> %vc, i32 zeroext %evl) {
513; CHECK-LABEL: vfma_vf_v8f64_unmasked:
514; CHECK:       # %bb.0:
515; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
516; CHECK-NEXT:    vfmadd.vf v8, fa0, v12
517; CHECK-NEXT:    ret
518  %elt.head = insertelement <8 x double> poison, double %b, i32 0
519  %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
520  %v = call <8 x double> @llvm.vp.fmuladd.v8f64(<8 x double> %va, <8 x double> %vb, <8 x double> %vc, <8 x i1> splat (i1 true), i32 %evl)
521  ret <8 x double> %v
522}
523
524declare <15 x double> @llvm.vp.fmuladd.v15f64(<15 x double>, <15 x double>, <15 x double>, <15 x i1>, i32)
525
526define <15 x double> @vfma_vv_v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> %m, i32 zeroext %evl) {
527; CHECK-LABEL: vfma_vv_v15f64:
528; CHECK:       # %bb.0:
529; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
530; CHECK-NEXT:    vle64.v v24, (a0)
531; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
532; CHECK-NEXT:    vfmadd.vv v16, v8, v24, v0.t
533; CHECK-NEXT:    vmv.v.v v8, v16
534; CHECK-NEXT:    ret
535  %v = call <15 x double> @llvm.vp.fmuladd.v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> %m, i32 %evl)
536  ret <15 x double> %v
537}
538
539define <15 x double> @vfma_vv_v15f64_unmasked(<15 x double> %va, <15 x double> %b, <15 x double> %c, i32 zeroext %evl) {
540; CHECK-LABEL: vfma_vv_v15f64_unmasked:
541; CHECK:       # %bb.0:
542; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
543; CHECK-NEXT:    vle64.v v24, (a0)
544; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
545; CHECK-NEXT:    vfmadd.vv v8, v16, v24
546; CHECK-NEXT:    ret
547  %v = call <15 x double> @llvm.vp.fmuladd.v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> splat (i1 true), i32 %evl)
548  ret <15 x double> %v
549}
550
551declare <16 x double> @llvm.vp.fmuladd.v16f64(<16 x double>, <16 x double>, <16 x double>, <16 x i1>, i32)
552
553define <16 x double> @vfma_vv_v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> %m, i32 zeroext %evl) {
554; CHECK-LABEL: vfma_vv_v16f64:
555; CHECK:       # %bb.0:
556; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
557; CHECK-NEXT:    vle64.v v24, (a0)
558; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
559; CHECK-NEXT:    vfmadd.vv v16, v8, v24, v0.t
560; CHECK-NEXT:    vmv.v.v v8, v16
561; CHECK-NEXT:    ret
562  %v = call <16 x double> @llvm.vp.fmuladd.v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> %m, i32 %evl)
563  ret <16 x double> %v
564}
565
566define <16 x double> @vfma_vv_v16f64_unmasked(<16 x double> %va, <16 x double> %b, <16 x double> %c, i32 zeroext %evl) {
567; CHECK-LABEL: vfma_vv_v16f64_unmasked:
568; CHECK:       # %bb.0:
569; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
570; CHECK-NEXT:    vle64.v v24, (a0)
571; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
572; CHECK-NEXT:    vfmadd.vv v8, v16, v24
573; CHECK-NEXT:    ret
574  %v = call <16 x double> @llvm.vp.fmuladd.v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> splat (i1 true), i32 %evl)
575  ret <16 x double> %v
576}
577
578define <16 x double> @vfma_vf_v16f64(<16 x double> %va, double %b, <16 x double> %vc, <16 x i1> %m, i32 zeroext %evl) {
579; CHECK-LABEL: vfma_vf_v16f64:
580; CHECK:       # %bb.0:
581; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
582; CHECK-NEXT:    vfmadd.vf v8, fa0, v16, v0.t
583; CHECK-NEXT:    ret
584  %elt.head = insertelement <16 x double> poison, double %b, i32 0
585  %vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer
586  %v = call <16 x double> @llvm.vp.fmuladd.v16f64(<16 x double> %va, <16 x double> %vb, <16 x double> %vc, <16 x i1> %m, i32 %evl)
587  ret <16 x double> %v
588}
589
590define <16 x double> @vfma_vf_v16f64_unmasked(<16 x double> %va, double %b, <16 x double> %vc, i32 zeroext %evl) {
591; CHECK-LABEL: vfma_vf_v16f64_unmasked:
592; CHECK:       # %bb.0:
593; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
594; CHECK-NEXT:    vfmadd.vf v8, fa0, v16
595; CHECK-NEXT:    ret
596  %elt.head = insertelement <16 x double> poison, double %b, i32 0
597  %vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer
598  %v = call <16 x double> @llvm.vp.fmuladd.v16f64(<16 x double> %va, <16 x double> %vb, <16 x double> %vc, <16 x i1> splat (i1 true), i32 %evl)
599  ret <16 x double> %v
600}
601
602declare <32 x double> @llvm.vp.fmuladd.v32f64(<32 x double>, <32 x double>, <32 x double>, <32 x i1>, i32)
603
604define <32 x double> @vfma_vv_v32f64(<32 x double> %va, <32 x double> %b, <32 x double> %c, <32 x i1> %m, i32 zeroext %evl) {
605; CHECK-LABEL: vfma_vv_v32f64:
606; CHECK:       # %bb.0:
607; CHECK-NEXT:    addi sp, sp, -16
608; CHECK-NEXT:    .cfi_def_cfa_offset 16
609; CHECK-NEXT:    csrr a1, vlenb
610; CHECK-NEXT:    slli a1, a1, 5
611; CHECK-NEXT:    sub sp, sp, a1
612; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
613; CHECK-NEXT:    csrr a1, vlenb
614; CHECK-NEXT:    li a3, 24
615; CHECK-NEXT:    mul a1, a1, a3
616; CHECK-NEXT:    add a1, sp, a1
617; CHECK-NEXT:    addi a1, a1, 16
618; CHECK-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
619; CHECK-NEXT:    csrr a1, vlenb
620; CHECK-NEXT:    slli a1, a1, 4
621; CHECK-NEXT:    add a1, sp, a1
622; CHECK-NEXT:    addi a1, a1, 16
623; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
624; CHECK-NEXT:    addi a1, a2, 128
625; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
626; CHECK-NEXT:    vle64.v v24, (a2)
627; CHECK-NEXT:    addi a2, a0, 128
628; CHECK-NEXT:    vle64.v v8, (a1)
629; CHECK-NEXT:    csrr a1, vlenb
630; CHECK-NEXT:    slli a1, a1, 3
631; CHECK-NEXT:    add a1, sp, a1
632; CHECK-NEXT:    addi a1, a1, 16
633; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
634; CHECK-NEXT:    vle64.v v8, (a2)
635; CHECK-NEXT:    addi a1, sp, 16
636; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
637; CHECK-NEXT:    vle64.v v8, (a0)
638; CHECK-NEXT:    li a1, 16
639; CHECK-NEXT:    mv a0, a4
640; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
641; CHECK-NEXT:    vslidedown.vi v7, v0, 2
642; CHECK-NEXT:    bltu a4, a1, .LBB50_2
643; CHECK-NEXT:  # %bb.1:
644; CHECK-NEXT:    li a0, 16
645; CHECK-NEXT:  .LBB50_2:
646; CHECK-NEXT:    csrr a1, vlenb
647; CHECK-NEXT:    slli a1, a1, 4
648; CHECK-NEXT:    add a1, sp, a1
649; CHECK-NEXT:    addi a1, a1, 16
650; CHECK-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
651; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
652; CHECK-NEXT:    vfmadd.vv v8, v16, v24, v0.t
653; CHECK-NEXT:    csrr a0, vlenb
654; CHECK-NEXT:    slli a0, a0, 4
655; CHECK-NEXT:    add a0, sp, a0
656; CHECK-NEXT:    addi a0, a0, 16
657; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
658; CHECK-NEXT:    addi a0, a4, -16
659; CHECK-NEXT:    sltu a1, a4, a0
660; CHECK-NEXT:    addi a1, a1, -1
661; CHECK-NEXT:    and a0, a1, a0
662; CHECK-NEXT:    vmv1r.v v0, v7
663; CHECK-NEXT:    csrr a1, vlenb
664; CHECK-NEXT:    li a2, 24
665; CHECK-NEXT:    mul a1, a1, a2
666; CHECK-NEXT:    add a1, sp, a1
667; CHECK-NEXT:    addi a1, a1, 16
668; CHECK-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
669; CHECK-NEXT:    csrr a1, vlenb
670; CHECK-NEXT:    slli a1, a1, 3
671; CHECK-NEXT:    add a1, sp, a1
672; CHECK-NEXT:    addi a1, a1, 16
673; CHECK-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
674; CHECK-NEXT:    addi a1, sp, 16
675; CHECK-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
676; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
677; CHECK-NEXT:    vfmadd.vv v8, v16, v24, v0.t
678; CHECK-NEXT:    vmv.v.v v16, v8
679; CHECK-NEXT:    csrr a0, vlenb
680; CHECK-NEXT:    slli a0, a0, 4
681; CHECK-NEXT:    add a0, sp, a0
682; CHECK-NEXT:    addi a0, a0, 16
683; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
684; CHECK-NEXT:    csrr a0, vlenb
685; CHECK-NEXT:    slli a0, a0, 5
686; CHECK-NEXT:    add sp, sp, a0
687; CHECK-NEXT:    .cfi_def_cfa sp, 16
688; CHECK-NEXT:    addi sp, sp, 16
689; CHECK-NEXT:    .cfi_def_cfa_offset 0
690; CHECK-NEXT:    ret
691  %v = call <32 x double> @llvm.vp.fmuladd.v32f64(<32 x double> %va, <32 x double> %b, <32 x double> %c, <32 x i1> %m, i32 %evl)
692  ret <32 x double> %v
693}
694
695define <32 x double> @vfma_vv_v32f64_unmasked(<32 x double> %va, <32 x double> %b, <32 x double> %c, i32 zeroext %evl) {
696; CHECK-LABEL: vfma_vv_v32f64_unmasked:
697; CHECK:       # %bb.0:
698; CHECK-NEXT:    addi sp, sp, -16
699; CHECK-NEXT:    .cfi_def_cfa_offset 16
700; CHECK-NEXT:    csrr a1, vlenb
701; CHECK-NEXT:    li a3, 24
702; CHECK-NEXT:    mul a1, a1, a3
703; CHECK-NEXT:    sub sp, sp, a1
704; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
705; CHECK-NEXT:    csrr a1, vlenb
706; CHECK-NEXT:    slli a1, a1, 4
707; CHECK-NEXT:    add a1, sp, a1
708; CHECK-NEXT:    addi a1, a1, 16
709; CHECK-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
710; CHECK-NEXT:    csrr a1, vlenb
711; CHECK-NEXT:    slli a1, a1, 3
712; CHECK-NEXT:    add a1, sp, a1
713; CHECK-NEXT:    addi a1, a1, 16
714; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
715; CHECK-NEXT:    addi a1, a2, 128
716; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
717; CHECK-NEXT:    vle64.v v16, (a2)
718; CHECK-NEXT:    addi a2, a0, 128
719; CHECK-NEXT:    vle64.v v8, (a1)
720; CHECK-NEXT:    addi a1, sp, 16
721; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
722; CHECK-NEXT:    vle64.v v24, (a2)
723; CHECK-NEXT:    vle64.v v0, (a0)
724; CHECK-NEXT:    li a1, 16
725; CHECK-NEXT:    mv a0, a4
726; CHECK-NEXT:    bltu a4, a1, .LBB51_2
727; CHECK-NEXT:  # %bb.1:
728; CHECK-NEXT:    li a0, 16
729; CHECK-NEXT:  .LBB51_2:
730; CHECK-NEXT:    csrr a1, vlenb
731; CHECK-NEXT:    slli a1, a1, 3
732; CHECK-NEXT:    add a1, sp, a1
733; CHECK-NEXT:    addi a1, a1, 16
734; CHECK-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
735; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
736; CHECK-NEXT:    vfmadd.vv v0, v8, v16
737; CHECK-NEXT:    addi a0, a4, -16
738; CHECK-NEXT:    sltu a1, a4, a0
739; CHECK-NEXT:    addi a1, a1, -1
740; CHECK-NEXT:    and a0, a1, a0
741; CHECK-NEXT:    csrr a1, vlenb
742; CHECK-NEXT:    slli a1, a1, 4
743; CHECK-NEXT:    add a1, sp, a1
744; CHECK-NEXT:    addi a1, a1, 16
745; CHECK-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
746; CHECK-NEXT:    addi a1, sp, 16
747; CHECK-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
748; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
749; CHECK-NEXT:    vfmadd.vv v24, v16, v8
750; CHECK-NEXT:    vmv8r.v v8, v0
751; CHECK-NEXT:    vmv.v.v v16, v24
752; CHECK-NEXT:    csrr a0, vlenb
753; CHECK-NEXT:    li a1, 24
754; CHECK-NEXT:    mul a0, a0, a1
755; CHECK-NEXT:    add sp, sp, a0
756; CHECK-NEXT:    .cfi_def_cfa sp, 16
757; CHECK-NEXT:    addi sp, sp, 16
758; CHECK-NEXT:    .cfi_def_cfa_offset 0
759; CHECK-NEXT:    ret
760  %v = call <32 x double> @llvm.vp.fmuladd.v32f64(<32 x double> %va, <32 x double> %b, <32 x double> %c, <32 x i1> splat (i1 true), i32 %evl)
761  ret <32 x double> %v
762}
763