xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll (revision b48e5f0ff3f25e8bdd3ae473dca00511336cbd6f)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \
3; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
4; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \
5; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
6; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \
7; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
8; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \
9; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
10
11declare <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x i1>, i32)
12declare <vscale x 1 x float> @llvm.vp.fneg.nxv1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
13declare <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
14declare <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1>, <vscale x 1 x float>, <vscale x 1 x float>, i32)
15
16define <vscale x 1 x float> @vfmacc_vv_nxv1f32(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
17; ZVFH-LABEL: vfmacc_vv_nxv1f32:
18; ZVFH:       # %bb.0:
19; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
20; ZVFH-NEXT:    vfwmacc.vv v10, v8, v9, v0.t
21; ZVFH-NEXT:    vmv1r.v v8, v10
22; ZVFH-NEXT:    ret
23;
24; ZVFHMIN-LABEL: vfmacc_vv_nxv1f32:
25; ZVFHMIN:       # %bb.0:
26; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
27; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
28; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
29; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
30; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v10, v0.t
31; ZVFHMIN-NEXT:    ret
32  %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %m, i32 %evl)
33  %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 %evl)
34  %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 %evl)
35  ret <vscale x 1 x float> %v
36}
37
38define <vscale x 1 x float> @vfmacc_vv_nxv1f32_unmasked(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, i32 zeroext %evl) {
39; ZVFH-LABEL: vfmacc_vv_nxv1f32_unmasked:
40; ZVFH:       # %bb.0:
41; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
42; ZVFH-NEXT:    vfwmacc.vv v10, v8, v9
43; ZVFH-NEXT:    vmv1r.v v8, v10
44; ZVFH-NEXT:    ret
45;
46; ZVFHMIN-LABEL: vfmacc_vv_nxv1f32_unmasked:
47; ZVFHMIN:       # %bb.0:
48; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
49; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
50; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
51; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
52; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v10
53; ZVFHMIN-NEXT:    ret
54  %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
55  %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
56  %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
57  ret <vscale x 1 x float> %v
58}
59
60define <vscale x 1 x float> @vfmacc_vv_nxv1f32_tu(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
61; ZVFH-LABEL: vfmacc_vv_nxv1f32_tu:
62; ZVFH:       # %bb.0:
63; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
64; ZVFH-NEXT:    vfwmacc.vv v10, v8, v9, v0.t
65; ZVFH-NEXT:    vmv1r.v v8, v10
66; ZVFH-NEXT:    ret
67;
68; ZVFHMIN-LABEL: vfmacc_vv_nxv1f32_tu:
69; ZVFHMIN:       # %bb.0:
70; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
71; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
72; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
73; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, tu, mu
74; ZVFHMIN-NEXT:    vfmacc.vv v10, v11, v8, v0.t
75; ZVFHMIN-NEXT:    vmv1r.v v8, v10
76; ZVFHMIN-NEXT:    ret
77  %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
78  %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
79  %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
80  %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> %m, <vscale x 1 x float> %v, <vscale x 1 x float> %c, i32 %evl)
81  ret <vscale x 1 x float> %u
82}
83
84; FIXME: Support this case?
85define <vscale x 1 x float> @vfmacc_vv_nxv1f32_masked__tu(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
86; ZVFH-LABEL: vfmacc_vv_nxv1f32_masked__tu:
87; ZVFH:       # %bb.0:
88; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
89; ZVFH-NEXT:    vfwmacc.vv v10, v8, v9, v0.t
90; ZVFH-NEXT:    vmv1r.v v8, v10
91; ZVFH-NEXT:    ret
92;
93; ZVFHMIN-LABEL: vfmacc_vv_nxv1f32_masked__tu:
94; ZVFHMIN:       # %bb.0:
95; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
96; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
97; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
98; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
99; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v10, v0.t
100; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, tu, ma
101; ZVFHMIN-NEXT:    vmerge.vvm v10, v10, v8, v0
102; ZVFHMIN-NEXT:    vmv1r.v v8, v10
103; ZVFHMIN-NEXT:    ret
104  %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %m, i32 %evl)
105  %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 %evl)
106  %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 %evl)
107  %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> %m, <vscale x 1 x float> %v, <vscale x 1 x float> %c, i32 %evl)
108  ret <vscale x 1 x float> %u
109}
110
111define <vscale x 1 x float> @vfmacc_vv_nxv1f32_unmasked_tu(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, i32 zeroext %evl) {
112; ZVFH-LABEL: vfmacc_vv_nxv1f32_unmasked_tu:
113; ZVFH:       # %bb.0:
114; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, tu, ma
115; ZVFH-NEXT:    vfwmacc.vv v10, v8, v9
116; ZVFH-NEXT:    vmv1r.v v8, v10
117; ZVFH-NEXT:    ret
118;
119; ZVFHMIN-LABEL: vfmacc_vv_nxv1f32_unmasked_tu:
120; ZVFHMIN:       # %bb.0:
121; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
122; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
123; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
124; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, tu, ma
125; ZVFHMIN-NEXT:    vfmacc.vv v10, v11, v8
126; ZVFHMIN-NEXT:    vmv1r.v v8, v10
127; ZVFHMIN-NEXT:    ret
128  %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
129  %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
130  %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
131  %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x float> %v, <vscale x 1 x float> %c, i32 %evl)
132  ret <vscale x 1 x float> %u
133}
134
135define <vscale x 1 x float> @vfmacc_vf_nxv1f32(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) {
136; ZVFH-LABEL: vfmacc_vf_nxv1f32:
137; ZVFH:       # %bb.0:
138; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
139; ZVFH-NEXT:    vfwmacc.vf v9, fa0, v8, v0.t
140; ZVFH-NEXT:    vmv1r.v v8, v9
141; ZVFH-NEXT:    ret
142;
143; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32:
144; ZVFHMIN:       # %bb.0:
145; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
146; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
147; ZVFHMIN-NEXT:    vmv.v.x v10, a1
148; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
149; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
150; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
151; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v9, v0.t
152; ZVFHMIN-NEXT:    ret
153  %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
154  %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
155  %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
156  %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 %evl)
157  %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vaext, <vscale x 1 x float> %vbext, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 %evl)
158  ret <vscale x 1 x float> %v
159}
160
161define <vscale x 1 x float> @vfmacc_vf_nxv1f32_commute(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) {
162; ZVFH-LABEL: vfmacc_vf_nxv1f32_commute:
163; ZVFH:       # %bb.0:
164; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
165; ZVFH-NEXT:    vfwmacc.vf v9, fa0, v8, v0.t
166; ZVFH-NEXT:    vmv1r.v v8, v9
167; ZVFH-NEXT:    ret
168;
169; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_commute:
170; ZVFHMIN:       # %bb.0:
171; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
172; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
173; ZVFHMIN-NEXT:    vmv.v.x v11, a1
174; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
175; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11, v0.t
176; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
177; ZVFHMIN-NEXT:    vfmadd.vv v10, v8, v9, v0.t
178; ZVFHMIN-NEXT:    vmv1r.v v8, v10
179; ZVFHMIN-NEXT:    ret
180  %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
181  %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
182  %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
183  %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 %evl)
184  %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vbext, <vscale x 1 x float> %vaext, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 %evl)
185  ret <vscale x 1 x float> %v
186}
187
188define <vscale x 1 x float> @vfmacc_vf_nxv1f32_unmasked(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, i32 zeroext %evl) {
189; ZVFH-LABEL: vfmacc_vf_nxv1f32_unmasked:
190; ZVFH:       # %bb.0:
191; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
192; ZVFH-NEXT:    vfwmacc.vf v9, fa0, v8
193; ZVFH-NEXT:    vmv1r.v v8, v9
194; ZVFH-NEXT:    ret
195;
196; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_unmasked:
197; ZVFHMIN:       # %bb.0:
198; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
199; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
200; ZVFHMIN-NEXT:    vmv.v.x v10, a1
201; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
202; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
203; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
204; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v9
205; ZVFHMIN-NEXT:    ret
206  %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
207  %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
208  %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
209  %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
210  %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vaext, <vscale x 1 x float> %vbext, <vscale x 1 x float> %vc, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
211  ret <vscale x 1 x float> %v
212}
213
214define <vscale x 1 x float> @vfmacc_vf_nxv1f32_tu(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) {
215; ZVFH-LABEL: vfmacc_vf_nxv1f32_tu:
216; ZVFH:       # %bb.0:
217; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
218; ZVFH-NEXT:    vfwmacc.vf v9, fa0, v8, v0.t
219; ZVFH-NEXT:    vmv1r.v v8, v9
220; ZVFH-NEXT:    ret
221;
222; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_tu:
223; ZVFHMIN:       # %bb.0:
224; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
225; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
226; ZVFHMIN-NEXT:    vmv.v.x v10, a1
227; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
228; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
229; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, tu, mu
230; ZVFHMIN-NEXT:    vfmacc.vv v9, v11, v8, v0.t
231; ZVFHMIN-NEXT:    vmv1r.v v8, v9
232; ZVFHMIN-NEXT:    ret
233  %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
234  %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
235  %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
236  %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
237  %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vaext, <vscale x 1 x float> %vbext, <vscale x 1 x float> %vc, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
238  %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> %m, <vscale x 1 x float> %v, <vscale x 1 x float> %vc, i32 %evl)
239  ret <vscale x 1 x float> %u
240}
241
242define <vscale x 1 x float> @vfmacc_vf_nxv1f32_commute_tu(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) {
243; ZVFH-LABEL: vfmacc_vf_nxv1f32_commute_tu:
244; ZVFH:       # %bb.0:
245; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
246; ZVFH-NEXT:    vfwmacc.vf v9, fa0, v8, v0.t
247; ZVFH-NEXT:    vmv1r.v v8, v9
248; ZVFH-NEXT:    ret
249;
250; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_commute_tu:
251; ZVFHMIN:       # %bb.0:
252; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
253; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
254; ZVFHMIN-NEXT:    vmv.v.x v10, a1
255; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
256; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
257; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, tu, mu
258; ZVFHMIN-NEXT:    vfmacc.vv v9, v8, v11, v0.t
259; ZVFHMIN-NEXT:    vmv1r.v v8, v9
260; ZVFHMIN-NEXT:    ret
261  %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
262  %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
263  %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
264  %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
265  %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vbext, <vscale x 1 x float> %vaext, <vscale x 1 x float> %vc, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
266  %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> %m, <vscale x 1 x float> %v, <vscale x 1 x float> %vc, i32 %evl)
267  ret <vscale x 1 x float> %u
268}
269
270define <vscale x 1 x float> @vfmacc_vf_nxv1f32_unmasked_tu(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, i32 zeroext %evl) {
271; ZVFH-LABEL: vfmacc_vf_nxv1f32_unmasked_tu:
272; ZVFH:       # %bb.0:
273; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, tu, ma
274; ZVFH-NEXT:    vfwmacc.vf v9, fa0, v8
275; ZVFH-NEXT:    vmv1r.v v8, v9
276; ZVFH-NEXT:    ret
277;
278; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_unmasked_tu:
279; ZVFHMIN:       # %bb.0:
280; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
281; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
282; ZVFHMIN-NEXT:    vmv.v.x v10, a1
283; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
284; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
285; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, tu, ma
286; ZVFHMIN-NEXT:    vfmacc.vv v9, v11, v8
287; ZVFHMIN-NEXT:    vmv1r.v v8, v9
288; ZVFHMIN-NEXT:    ret
289  %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
290  %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
291  %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
292  %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
293  %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vaext, <vscale x 1 x float> %vbext, <vscale x 1 x float> %vc, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
294  %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x float> %v, <vscale x 1 x float> %vc, i32 %evl)
295  ret <vscale x 1 x float> %u
296}
297
298declare <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x i1>, i32)
299declare <vscale x 2 x float> @llvm.vp.fneg.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
300declare <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>, <vscale x 2 x float>, i32)
301declare <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>, <vscale x 2 x float>, i32)
302declare <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, i32)
303
304define <vscale x 2 x float> @vfmacc_vv_nxv2f32(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x float> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
305; ZVFH-LABEL: vfmacc_vv_nxv2f32:
306; ZVFH:       # %bb.0:
307; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
308; ZVFH-NEXT:    vfwmacc.vv v10, v8, v9, v0.t
309; ZVFH-NEXT:    vmv1r.v v8, v10
310; ZVFH-NEXT:    ret
311;
312; ZVFHMIN-LABEL: vfmacc_vv_nxv2f32:
313; ZVFHMIN:       # %bb.0:
314; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
315; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
316; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
317; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
318; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v10, v0.t
319; ZVFHMIN-NEXT:    ret
320  %aext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> %m, i32 %evl)
321  %bext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %b, <vscale x 2 x i1> %m, i32 %evl)
322  %v = call <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float> %aext, <vscale x 2 x float> %bext, <vscale x 2 x float> %c, <vscale x 2 x i1> %m, i32 %evl)
323  ret <vscale x 2 x float> %v
324}
325
326define <vscale x 2 x float> @vfmacc_vv_nxv2f32_unmasked(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x float> %c, i32 zeroext %evl) {
327; ZVFH-LABEL: vfmacc_vv_nxv2f32_unmasked:
328; ZVFH:       # %bb.0:
329; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
330; ZVFH-NEXT:    vfwmacc.vv v10, v8, v9
331; ZVFH-NEXT:    vmv1r.v v8, v10
332; ZVFH-NEXT:    ret
333;
334; ZVFHMIN-LABEL: vfmacc_vv_nxv2f32_unmasked:
335; ZVFHMIN:       # %bb.0:
336; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
337; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
338; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
339; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
340; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v10
341; ZVFHMIN-NEXT:    ret
342  %aext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
343  %bext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
344  %v = call <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float> %aext, <vscale x 2 x float> %bext, <vscale x 2 x float> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
345  ret <vscale x 2 x float> %v
346}
347
348define <vscale x 2 x float> @vfmacc_vf_nxv2f32(<vscale x 2 x half> %va, half %b, <vscale x 2 x float> %vc, <vscale x 2 x i1> %m, i32 zeroext %evl) {
349; ZVFH-LABEL: vfmacc_vf_nxv2f32:
350; ZVFH:       # %bb.0:
351; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
352; ZVFH-NEXT:    vfwmacc.vf v9, fa0, v8, v0.t
353; ZVFH-NEXT:    vmv1r.v v8, v9
354; ZVFH-NEXT:    ret
355;
356; ZVFHMIN-LABEL: vfmacc_vf_nxv2f32:
357; ZVFHMIN:       # %bb.0:
358; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
359; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
360; ZVFHMIN-NEXT:    vmv.v.x v10, a1
361; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
362; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
363; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
364; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v9, v0.t
365; ZVFHMIN-NEXT:    ret
366  %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
367  %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
368  %vaext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl)
369  %vbext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %vb, <vscale x 2 x i1> %m, i32 %evl)
370  %v = call <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float> %vaext, <vscale x 2 x float> %vbext, <vscale x 2 x float> %vc, <vscale x 2 x i1> %m, i32 %evl)
371  ret <vscale x 2 x float> %v
372}
373
374define <vscale x 2 x float> @vfmacc_vf_nxv2f32_unmasked(<vscale x 2 x half> %va, half %b, <vscale x 2 x float> %vc, i32 zeroext %evl) {
375; ZVFH-LABEL: vfmacc_vf_nxv2f32_unmasked:
376; ZVFH:       # %bb.0:
377; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
378; ZVFH-NEXT:    vfwmacc.vf v9, fa0, v8
379; ZVFH-NEXT:    vmv1r.v v8, v9
380; ZVFH-NEXT:    ret
381;
382; ZVFHMIN-LABEL: vfmacc_vf_nxv2f32_unmasked:
383; ZVFHMIN:       # %bb.0:
384; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
385; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
386; ZVFHMIN-NEXT:    vmv.v.x v10, a1
387; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
388; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
389; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
390; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v9
391; ZVFHMIN-NEXT:    ret
392  %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
393  %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
394  %vaext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
395  %vbext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
396  %v = call <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float> %vaext, <vscale x 2 x float> %vbext, <vscale x 2 x float> %vc, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
397  ret <vscale x 2 x float> %v
398}
399
400declare <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, i32)
401declare <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
402declare <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, i32)
403declare <vscale x 4 x float> @llvm.vp.select.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, i32)
404declare <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half>, <vscale x 4 x i1>, i32)
405
406define <vscale x 4 x float> @vfmacc_vv_nxv4f32(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x float> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
407; ZVFH-LABEL: vfmacc_vv_nxv4f32:
408; ZVFH:       # %bb.0:
409; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
410; ZVFH-NEXT:    vfwmacc.vv v10, v8, v9, v0.t
411; ZVFH-NEXT:    vmv2r.v v8, v10
412; ZVFH-NEXT:    ret
413;
414; ZVFHMIN-LABEL: vfmacc_vv_nxv4f32:
415; ZVFHMIN:       # %bb.0:
416; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
417; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8, v0.t
418; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9, v0.t
419; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
420; ZVFHMIN-NEXT:    vfmadd.vv v12, v14, v10, v0.t
421; ZVFHMIN-NEXT:    vmv.v.v v8, v12
422; ZVFHMIN-NEXT:    ret
423  %aext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> %m, i32 %evl)
424  %bext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 %evl)
425  %v = call <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float> %aext, <vscale x 4 x float> %bext, <vscale x 4 x float> %c, <vscale x 4 x i1> %m, i32 %evl)
426  ret <vscale x 4 x float> %v
427}
428
429define <vscale x 4 x float> @vfmacc_vv_nxv4f32_unmasked(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x float> %c, i32 zeroext %evl) {
430; ZVFH-LABEL: vfmacc_vv_nxv4f32_unmasked:
431; ZVFH:       # %bb.0:
432; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
433; ZVFH-NEXT:    vfwmacc.vv v10, v8, v9
434; ZVFH-NEXT:    vmv2r.v v8, v10
435; ZVFH-NEXT:    ret
436;
437; ZVFHMIN-LABEL: vfmacc_vv_nxv4f32_unmasked:
438; ZVFHMIN:       # %bb.0:
439; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
440; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
441; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
442; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
443; ZVFHMIN-NEXT:    vfmadd.vv v12, v14, v10
444; ZVFHMIN-NEXT:    vmv.v.v v8, v12
445; ZVFHMIN-NEXT:    ret
446  %aext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
447  %bext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
448  %v = call <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float> %aext, <vscale x 4 x float> %bext, <vscale x 4 x float> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
449  ret <vscale x 4 x float> %v
450}
451
452define <vscale x 4 x float> @vfmacc_vf_nxv4f32(<vscale x 4 x half> %va, half %b, <vscale x 4 x float> %vc, <vscale x 4 x i1> %m, i32 zeroext %evl) {
453; ZVFH-LABEL: vfmacc_vf_nxv4f32:
454; ZVFH:       # %bb.0:
455; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
456; ZVFH-NEXT:    vfwmacc.vf v10, fa0, v8, v0.t
457; ZVFH-NEXT:    vmv2r.v v8, v10
458; ZVFH-NEXT:    ret
459;
460; ZVFHMIN-LABEL: vfmacc_vf_nxv4f32:
461; ZVFHMIN:       # %bb.0:
462; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
463; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
464; ZVFHMIN-NEXT:    vmv.v.x v12, a1
465; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8, v0.t
466; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
467; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
468; ZVFHMIN-NEXT:    vfmadd.vv v8, v14, v10, v0.t
469; ZVFHMIN-NEXT:    ret
470  %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
471  %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
472  %vaext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl)
473  %vbext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %vb, <vscale x 4 x i1> %m, i32 %evl)
474  %v = call <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float> %vaext, <vscale x 4 x float> %vbext, <vscale x 4 x float> %vc, <vscale x 4 x i1> %m, i32 %evl)
475  ret <vscale x 4 x float> %v
476}
477
478define <vscale x 4 x float> @vfmacc_vf_nxv4f32_unmasked(<vscale x 4 x half> %va, half %b, <vscale x 4 x float> %vc, i32 zeroext %evl) {
479; ZVFH-LABEL: vfmacc_vf_nxv4f32_unmasked:
480; ZVFH:       # %bb.0:
481; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
482; ZVFH-NEXT:    vfwmacc.vf v10, fa0, v8
483; ZVFH-NEXT:    vmv2r.v v8, v10
484; ZVFH-NEXT:    ret
485;
486; ZVFHMIN-LABEL: vfmacc_vf_nxv4f32_unmasked:
487; ZVFHMIN:       # %bb.0:
488; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
489; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
490; ZVFHMIN-NEXT:    vmv.v.x v12, a1
491; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
492; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
493; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
494; ZVFHMIN-NEXT:    vfmadd.vv v8, v14, v10
495; ZVFHMIN-NEXT:    ret
496  %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
497  %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
498  %vaext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
499  %vbext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
500  %v = call <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float> %vaext, <vscale x 4 x float> %vbext, <vscale x 4 x float> %vc, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
501  ret <vscale x 4 x float> %v
502}
503
504declare <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>, <vscale x 8 x float>, <vscale x 8 x i1>, i32)
505declare <vscale x 8 x float> @llvm.vp.fneg.nxv8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
506declare <vscale x 8 x float> @llvm.vp.merge.nxv8f32(<vscale x 8 x i1>, <vscale x 8 x float>, <vscale x 8 x float>, i32)
507declare <vscale x 8 x float> @llvm.vp.select.nxv8f32(<vscale x 8 x i1>, <vscale x 8 x float>, <vscale x 8 x float>, i32)
508declare <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i32)
509
510define <vscale x 8 x float> @vfmacc_vv_nxv8f32(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x float> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
511; ZVFH-LABEL: vfmacc_vv_nxv8f32:
512; ZVFH:       # %bb.0:
513; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
514; ZVFH-NEXT:    vfwmacc.vv v12, v8, v10, v0.t
515; ZVFH-NEXT:    vmv4r.v v8, v12
516; ZVFH-NEXT:    ret
517;
518; ZVFHMIN-LABEL: vfmacc_vv_nxv8f32:
519; ZVFHMIN:       # %bb.0:
520; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
521; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8, v0.t
522; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10, v0.t
523; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
524; ZVFHMIN-NEXT:    vfmadd.vv v16, v20, v12, v0.t
525; ZVFHMIN-NEXT:    vmv.v.v v8, v16
526; ZVFHMIN-NEXT:    ret
527  %aext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %m, i32 %evl)
528  %bext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 %evl)
529  %v = call <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float> %aext, <vscale x 8 x float> %bext, <vscale x 8 x float> %c, <vscale x 8 x i1> %m, i32 %evl)
530  ret <vscale x 8 x float> %v
531}
532
533define <vscale x 8 x float> @vfmacc_vv_nxv8f32_unmasked(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x float> %c, i32 zeroext %evl) {
534; ZVFH-LABEL: vfmacc_vv_nxv8f32_unmasked:
535; ZVFH:       # %bb.0:
536; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
537; ZVFH-NEXT:    vfwmacc.vv v12, v8, v10
538; ZVFH-NEXT:    vmv4r.v v8, v12
539; ZVFH-NEXT:    ret
540;
541; ZVFHMIN-LABEL: vfmacc_vv_nxv8f32_unmasked:
542; ZVFHMIN:       # %bb.0:
543; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
544; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8
545; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
546; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
547; ZVFHMIN-NEXT:    vfmadd.vv v16, v20, v12
548; ZVFHMIN-NEXT:    vmv.v.v v8, v16
549; ZVFHMIN-NEXT:    ret
550  %aext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
551  %bext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
552  %v = call <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float> %aext, <vscale x 8 x float> %bext, <vscale x 8 x float> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
553  ret <vscale x 8 x float> %v
554}
555
556define <vscale x 8 x float> @vfmacc_vf_nxv8f32(<vscale x 8 x half> %va, half %b, <vscale x 8 x float> %vc, <vscale x 8 x i1> %m, i32 zeroext %evl) {
557; ZVFH-LABEL: vfmacc_vf_nxv8f32:
558; ZVFH:       # %bb.0:
559; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
560; ZVFH-NEXT:    vfwmacc.vf v12, fa0, v8, v0.t
561; ZVFH-NEXT:    vmv4r.v v8, v12
562; ZVFH-NEXT:    ret
563;
564; ZVFHMIN-LABEL: vfmacc_vf_nxv8f32:
565; ZVFHMIN:       # %bb.0:
566; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
567; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
568; ZVFHMIN-NEXT:    vmv.v.x v16, a1
569; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8, v0.t
570; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
571; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
572; ZVFHMIN-NEXT:    vfmadd.vv v8, v20, v12, v0.t
573; ZVFHMIN-NEXT:    ret
574  %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
575  %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
576  %vaext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl)
577  %vbext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %vb, <vscale x 8 x i1> %m, i32 %evl)
578  %v = call <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float> %vaext, <vscale x 8 x float> %vbext, <vscale x 8 x float> %vc, <vscale x 8 x i1> %m, i32 %evl)
579  ret <vscale x 8 x float> %v
580}
581
582define <vscale x 8 x float> @vfmacc_vf_nxv8f32_unmasked(<vscale x 8 x half> %va, half %b, <vscale x 8 x float> %vc, i32 zeroext %evl) {
583; ZVFH-LABEL: vfmacc_vf_nxv8f32_unmasked:
584; ZVFH:       # %bb.0:
585; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
586; ZVFH-NEXT:    vfwmacc.vf v12, fa0, v8
587; ZVFH-NEXT:    vmv4r.v v8, v12
588; ZVFH-NEXT:    ret
589;
590; ZVFHMIN-LABEL: vfmacc_vf_nxv8f32_unmasked:
591; ZVFHMIN:       # %bb.0:
592; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
593; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
594; ZVFHMIN-NEXT:    vmv.v.x v16, a1
595; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8
596; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
597; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
598; ZVFHMIN-NEXT:    vfmadd.vv v8, v20, v12
599; ZVFHMIN-NEXT:    ret
600  %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
601  %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
602  %vaext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
603  %vbext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
604  %v = call <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float> %vaext, <vscale x 8 x float> %vbext, <vscale x 8 x float> %vc, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
605  ret <vscale x 8 x float> %v
606}
607
608declare <vscale x 16 x float> @llvm.vp.fma.nxv16f32(<vscale x 16 x float>, <vscale x 16 x float>, <vscale x 16 x float>, <vscale x 16 x i1>, i32)
609declare <vscale x 16 x float> @llvm.vp.fneg.nxv16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32)
610declare <vscale x 16 x float> @llvm.vp.merge.nxv16f32(<vscale x 16 x i1>, <vscale x 16 x float>, <vscale x 16 x float>, i32)
611declare <vscale x 16 x float> @llvm.vp.select.nxv16f32(<vscale x 16 x i1>, <vscale x 16 x float>, <vscale x 16 x float>, i32)
612declare <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half>, <vscale x 16 x i1>, i32)
613
614define <vscale x 16 x float> @vfmacc_vv_nxv16f32(<vscale x 16 x half> %a, <vscale x 16 x half> %b, <vscale x 16 x float> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) {
615; ZVFH-LABEL: vfmacc_vv_nxv16f32:
616; ZVFH:       # %bb.0:
617; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
618; ZVFH-NEXT:    vfwmacc.vv v16, v8, v12, v0.t
619; ZVFH-NEXT:    vmv8r.v v8, v16
620; ZVFH-NEXT:    ret
621;
622; ZVFHMIN-LABEL: vfmacc_vv_nxv16f32:
623; ZVFHMIN:       # %bb.0:
624; ZVFHMIN-NEXT:    addi sp, sp, -16
625; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
626; ZVFHMIN-NEXT:    csrr a1, vlenb
627; ZVFHMIN-NEXT:    slli a1, a1, 3
628; ZVFHMIN-NEXT:    sub sp, sp, a1
629; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
630; ZVFHMIN-NEXT:    addi a1, sp, 16
631; ZVFHMIN-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
632; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
633; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
634; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
635; ZVFHMIN-NEXT:    addi a0, sp, 16
636; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
637; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
638; ZVFHMIN-NEXT:    vfmadd.vv v24, v16, v8, v0.t
639; ZVFHMIN-NEXT:    vmv.v.v v8, v24
640; ZVFHMIN-NEXT:    csrr a0, vlenb
641; ZVFHMIN-NEXT:    slli a0, a0, 3
642; ZVFHMIN-NEXT:    add sp, sp, a0
643; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
644; ZVFHMIN-NEXT:    addi sp, sp, 16
645; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
646; ZVFHMIN-NEXT:    ret
647  %aext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x i1> %m, i32 %evl)
648  %bext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %b, <vscale x 16 x i1> %m, i32 %evl)
649  %v = call <vscale x 16 x float> @llvm.vp.fma.nxv16f32(<vscale x 16 x float> %aext, <vscale x 16 x float> %bext, <vscale x 16 x float> %c, <vscale x 16 x i1> %m, i32 %evl)
650  ret <vscale x 16 x float> %v
651}
652
653define <vscale x 16 x float> @vfmacc_vv_nxv16f32_unmasked(<vscale x 16 x half> %a, <vscale x 16 x half> %b, <vscale x 16 x float> %c, i32 zeroext %evl) {
654; ZVFH-LABEL: vfmacc_vv_nxv16f32_unmasked:
655; ZVFH:       # %bb.0:
656; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
657; ZVFH-NEXT:    vfwmacc.vv v16, v8, v12
658; ZVFH-NEXT:    vmv8r.v v8, v16
659; ZVFH-NEXT:    ret
660;
661; ZVFHMIN-LABEL: vfmacc_vv_nxv16f32_unmasked:
662; ZVFHMIN:       # %bb.0:
663; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
664; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
665; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
666; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
667; ZVFHMIN-NEXT:    vfmadd.vv v24, v0, v16
668; ZVFHMIN-NEXT:    vmv.v.v v8, v24
669; ZVFHMIN-NEXT:    ret
670  %aext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
671  %bext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
672  %v = call <vscale x 16 x float> @llvm.vp.fma.nxv16f32(<vscale x 16 x float> %aext, <vscale x 16 x float> %bext, <vscale x 16 x float> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
673  ret <vscale x 16 x float> %v
674}
675
676define <vscale x 16 x float> @vfmacc_vf_nxv16f32(<vscale x 16 x half> %va, half %b, <vscale x 16 x float> %vc, <vscale x 16 x i1> %m, i32 zeroext %evl) {
677; ZVFH-LABEL: vfmacc_vf_nxv16f32:
678; ZVFH:       # %bb.0:
679; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
680; ZVFH-NEXT:    vfwmacc.vf v16, fa0, v8, v0.t
681; ZVFH-NEXT:    vmv8r.v v8, v16
682; ZVFH-NEXT:    ret
683;
684; ZVFHMIN-LABEL: vfmacc_vf_nxv16f32:
685; ZVFHMIN:       # %bb.0:
686; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
687; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
688; ZVFHMIN-NEXT:    vmv.v.x v4, a1
689; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
690; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4, v0.t
691; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
692; ZVFHMIN-NEXT:    vfmadd.vv v8, v24, v16, v0.t
693; ZVFHMIN-NEXT:    ret
694  %elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
695  %vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
696  %vaext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl)
697  %vbext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 %evl)
698  %v = call <vscale x 16 x float> @llvm.vp.fma.nxv16f32(<vscale x 16 x float> %vaext, <vscale x 16 x float> %vbext, <vscale x 16 x float> %vc, <vscale x 16 x i1> %m, i32 %evl)
699  ret <vscale x 16 x float> %v
700}
701
702define <vscale x 16 x float> @vfmacc_vf_nxv16f32_unmasked(<vscale x 16 x half> %va, half %b, <vscale x 16 x float> %vc, i32 zeroext %evl) {
703; ZVFH-LABEL: vfmacc_vf_nxv16f32_unmasked:
704; ZVFH:       # %bb.0:
705; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
706; ZVFH-NEXT:    vfwmacc.vf v16, fa0, v8
707; ZVFH-NEXT:    vmv8r.v v8, v16
708; ZVFH-NEXT:    ret
709;
710; ZVFHMIN-LABEL: vfmacc_vf_nxv16f32_unmasked:
711; ZVFHMIN:       # %bb.0:
712; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
713; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
714; ZVFHMIN-NEXT:    vmv.v.x v24, a1
715; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
716; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24
717; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
718; ZVFHMIN-NEXT:    vfmadd.vv v8, v0, v16
719; ZVFHMIN-NEXT:    ret
720  %elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
721  %vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
722  %vaext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
723  %vbext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
724  %v = call <vscale x 16 x float> @llvm.vp.fma.nxv16f32(<vscale x 16 x float> %vaext, <vscale x 16 x float> %vbext, <vscale x 16 x float> %vc, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
725  ret <vscale x 16 x float> %v
726}
727
728declare <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x i1>, i32)
729declare <vscale x 1 x double> @llvm.vp.fneg.nxv1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
730declare <vscale x 1 x double> @llvm.vp.merge.nxv1f64(<vscale x 1 x i1>, <vscale x 1 x double>, <vscale x 1 x double>, i32)
731declare <vscale x 1 x double> @llvm.vp.select.nxv1f64(<vscale x 1 x i1>, <vscale x 1 x double>, <vscale x 1 x double>, i32)
732declare <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
733
734define <vscale x 1 x double> @vfmacc_vv_nxv1f64(<vscale x 1 x float> %a, <vscale x 1 x float> %b, <vscale x 1 x double> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
735; CHECK-LABEL: vfmacc_vv_nxv1f64:
736; CHECK:       # %bb.0:
737; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
738; CHECK-NEXT:    vfwmacc.vv v10, v8, v9, v0.t
739; CHECK-NEXT:    vmv1r.v v8, v10
740; CHECK-NEXT:    ret
741  %aext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %a, <vscale x 1 x i1> %m, i32 %evl)
742  %bext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %b, <vscale x 1 x i1> %m, i32 %evl)
743  %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %aext, <vscale x 1 x double> %bext, <vscale x 1 x double> %c, <vscale x 1 x i1> %m, i32 %evl)
744  ret <vscale x 1 x double> %v
745}
746
747define <vscale x 1 x double> @vfmacc_vv_nxv1f64_unmasked(<vscale x 1 x float> %a, <vscale x 1 x float> %b, <vscale x 1 x double> %c, i32 zeroext %evl) {
748; CHECK-LABEL: vfmacc_vv_nxv1f64_unmasked:
749; CHECK:       # %bb.0:
750; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
751; CHECK-NEXT:    vfwmacc.vv v10, v8, v9
752; CHECK-NEXT:    vmv1r.v v8, v10
753; CHECK-NEXT:    ret
754  %aext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %a, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
755  %bext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
756  %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %aext, <vscale x 1 x double> %bext, <vscale x 1 x double> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
757  ret <vscale x 1 x double> %v
758}
759
760define <vscale x 1 x double> @vfmacc_vf_nxv1f64(<vscale x 1 x float> %va, float %b, <vscale x 1 x double> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) {
761; CHECK-LABEL: vfmacc_vf_nxv1f64:
762; CHECK:       # %bb.0:
763; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
764; CHECK-NEXT:    vfwmacc.vf v9, fa0, v8, v0.t
765; CHECK-NEXT:    vmv1r.v v8, v9
766; CHECK-NEXT:    ret
767  %elt.head = insertelement <vscale x 1 x float> poison, float %b, i32 0
768  %vb = shufflevector <vscale x 1 x float> %elt.head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
769  %vaext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 %evl)
770  %vbext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %vb, <vscale x 1 x i1> %m, i32 %evl)
771  %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %vaext, <vscale x 1 x double> %vbext, <vscale x 1 x double> %vc, <vscale x 1 x i1> %m, i32 %evl)
772  ret <vscale x 1 x double> %v
773}
774
775define <vscale x 1 x double> @vfmacc_vf_nxv1f64_unmasked(<vscale x 1 x float> %va, float %b, <vscale x 1 x double> %vc, i32 zeroext %evl) {
776; CHECK-LABEL: vfmacc_vf_nxv1f64_unmasked:
777; CHECK:       # %bb.0:
778; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
779; CHECK-NEXT:    vfwmacc.vf v9, fa0, v8
780; CHECK-NEXT:    vmv1r.v v8, v9
781; CHECK-NEXT:    ret
782  %elt.head = insertelement <vscale x 1 x float> poison, float %b, i32 0
783  %vb = shufflevector <vscale x 1 x float> %elt.head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
784  %vaext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
785  %vbext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
786  %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %vaext, <vscale x 1 x double> %vbext, <vscale x 1 x double> %vc, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
787  ret <vscale x 1 x double> %v
788}
789
790declare <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, i32)
791declare <vscale x 2 x double> @llvm.vp.fneg.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
792declare <vscale x 2 x double> @llvm.vp.merge.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, i32)
793declare <vscale x 2 x double> @llvm.vp.select.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, i32)
794declare <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
795
796define <vscale x 2 x double> @vfmacc_vv_nxv2f64(<vscale x 2 x float> %a, <vscale x 2 x float> %b, <vscale x 2 x double> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
797; CHECK-LABEL: vfmacc_vv_nxv2f64:
798; CHECK:       # %bb.0:
799; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
800; CHECK-NEXT:    vfwmacc.vv v10, v8, v9, v0.t
801; CHECK-NEXT:    vmv2r.v v8, v10
802; CHECK-NEXT:    ret
803  %aext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> %m, i32 %evl)
804  %bext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %b, <vscale x 2 x i1> %m, i32 %evl)
805  %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %aext, <vscale x 2 x double> %bext, <vscale x 2 x double> %c, <vscale x 2 x i1> %m, i32 %evl)
806  ret <vscale x 2 x double> %v
807}
808
809define <vscale x 2 x double> @vfmacc_vv_nxv2f64_unmasked(<vscale x 2 x float> %a, <vscale x 2 x float> %b, <vscale x 2 x double> %c, i32 zeroext %evl) {
810; CHECK-LABEL: vfmacc_vv_nxv2f64_unmasked:
811; CHECK:       # %bb.0:
812; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
813; CHECK-NEXT:    vfwmacc.vv v10, v8, v9
814; CHECK-NEXT:    vmv2r.v v8, v10
815; CHECK-NEXT:    ret
816  %aext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
817  %bext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
818  %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %aext, <vscale x 2 x double> %bext, <vscale x 2 x double> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
819  ret <vscale x 2 x double> %v
820}
821
822define <vscale x 2 x double> @vfmacc_vf_nxv2f64(<vscale x 2 x float> %va, float %b, <vscale x 2 x double> %vc, <vscale x 2 x i1> %m, i32 zeroext %evl) {
823; CHECK-LABEL: vfmacc_vf_nxv2f64:
824; CHECK:       # %bb.0:
825; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
826; CHECK-NEXT:    vfwmacc.vf v10, fa0, v8, v0.t
827; CHECK-NEXT:    vmv2r.v v8, v10
828; CHECK-NEXT:    ret
829  %elt.head = insertelement <vscale x 2 x float> poison, float %b, i32 0
830  %vb = shufflevector <vscale x 2 x float> %elt.head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
831  %vaext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl)
832  %vbext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %vb, <vscale x 2 x i1> %m, i32 %evl)
833  %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %vaext, <vscale x 2 x double> %vbext, <vscale x 2 x double> %vc, <vscale x 2 x i1> %m, i32 %evl)
834  ret <vscale x 2 x double> %v
835}
836
837define <vscale x 2 x double> @vfmacc_vf_nxv2f64_unmasked(<vscale x 2 x float> %va, float %b, <vscale x 2 x double> %vc, i32 zeroext %evl) {
838; CHECK-LABEL: vfmacc_vf_nxv2f64_unmasked:
839; CHECK:       # %bb.0:
840; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
841; CHECK-NEXT:    vfwmacc.vf v10, fa0, v8
842; CHECK-NEXT:    vmv2r.v v8, v10
843; CHECK-NEXT:    ret
844  %elt.head = insertelement <vscale x 2 x float> poison, float %b, i32 0
845  %vb = shufflevector <vscale x 2 x float> %elt.head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
846  %vaext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
847  %vbext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
848  %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %vaext, <vscale x 2 x double> %vbext, <vscale x 2 x double> %vc, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
849  ret <vscale x 2 x double> %v
850}
851
852declare <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>, <vscale x 4 x double>, <vscale x 4 x i1>, i32)
853declare <vscale x 4 x double> @llvm.vp.fneg.nxv4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
854declare <vscale x 4 x double> @llvm.vp.merge.nxv4f64(<vscale x 4 x i1>, <vscale x 4 x double>, <vscale x 4 x double>, i32)
855declare <vscale x 4 x double> @llvm.vp.select.nxv4f64(<vscale x 4 x i1>, <vscale x 4 x double>, <vscale x 4 x double>, i32)
856declare <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
857
858define <vscale x 4 x double> @vfmacc_vv_nxv4f64(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x double> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
859; CHECK-LABEL: vfmacc_vv_nxv4f64:
860; CHECK:       # %bb.0:
861; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
862; CHECK-NEXT:    vfwmacc.vv v12, v8, v10, v0.t
863; CHECK-NEXT:    vmv4r.v v8, v12
864; CHECK-NEXT:    ret
865  %aext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x i1> %m, i32 %evl)
866  %bext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %b, <vscale x 4 x i1> %m, i32 %evl)
867  %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %aext, <vscale x 4 x double> %bext, <vscale x 4 x double> %c, <vscale x 4 x i1> %m, i32 %evl)
868  ret <vscale x 4 x double> %v
869}
870
871define <vscale x 4 x double> @vfmacc_vv_nxv4f64_unmasked(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x double> %c, i32 zeroext %evl) {
872; CHECK-LABEL: vfmacc_vv_nxv4f64_unmasked:
873; CHECK:       # %bb.0:
874; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
875; CHECK-NEXT:    vfwmacc.vv v12, v8, v10
876; CHECK-NEXT:    vmv4r.v v8, v12
877; CHECK-NEXT:    ret
878  %aext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
879  %bext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
880  %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %aext, <vscale x 4 x double> %bext, <vscale x 4 x double> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
881  ret <vscale x 4 x double> %v
882}
883
884define <vscale x 4 x double> @vfmacc_vf_nxv4f64(<vscale x 4 x float> %va, float %b, <vscale x 4 x double> %vc, <vscale x 4 x i1> %m, i32 zeroext %evl) {
885; CHECK-LABEL: vfmacc_vf_nxv4f64:
886; CHECK:       # %bb.0:
887; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
888; CHECK-NEXT:    vfwmacc.vf v12, fa0, v8, v0.t
889; CHECK-NEXT:    vmv4r.v v8, v12
890; CHECK-NEXT:    ret
891  %elt.head = insertelement <vscale x 4 x float> poison, float %b, i32 0
892  %vb = shufflevector <vscale x 4 x float> %elt.head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
893  %vaext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
894  %vbext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %vb, <vscale x 4 x i1> %m, i32 %evl)
895  %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %vaext, <vscale x 4 x double> %vbext, <vscale x 4 x double> %vc, <vscale x 4 x i1> %m, i32 %evl)
896  ret <vscale x 4 x double> %v
897}
898
899define <vscale x 4 x double> @vfmacc_vf_nxv4f64_unmasked(<vscale x 4 x float> %va, float %b, <vscale x 4 x double> %vc, i32 zeroext %evl) {
900; CHECK-LABEL: vfmacc_vf_nxv4f64_unmasked:
901; CHECK:       # %bb.0:
902; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
903; CHECK-NEXT:    vfwmacc.vf v12, fa0, v8
904; CHECK-NEXT:    vmv4r.v v8, v12
905; CHECK-NEXT:    ret
906  %elt.head = insertelement <vscale x 4 x float> poison, float %b, i32 0
907  %vb = shufflevector <vscale x 4 x float> %elt.head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
908  %vaext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
909  %vbext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
910  %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %vaext, <vscale x 4 x double> %vbext, <vscale x 4 x double> %vc, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
911  ret <vscale x 4 x double> %v
912}
913
914declare <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x i1>, i32)
915declare <vscale x 8 x double> @llvm.vp.fneg.nxv8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
916declare <vscale x 8 x double> @llvm.vp.merge.nxv8f64(<vscale x 8 x i1>, <vscale x 8 x double>, <vscale x 8 x double>, i32)
917declare <vscale x 8 x double> @llvm.vp.select.nxv8f64(<vscale x 8 x i1>, <vscale x 8 x double>, <vscale x 8 x double>, i32)
918declare <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
919
920define <vscale x 8 x double> @vfmacc_vv_nxv8f64(<vscale x 8 x float> %a, <vscale x 8 x float> %b, <vscale x 8 x double> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
921; CHECK-LABEL: vfmacc_vv_nxv8f64:
922; CHECK:       # %bb.0:
923; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
924; CHECK-NEXT:    vfwmacc.vv v16, v8, v12, v0.t
925; CHECK-NEXT:    vmv8r.v v8, v16
926; CHECK-NEXT:    ret
927  %aext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x i1> %m, i32 %evl)
928  %bext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %b, <vscale x 8 x i1> %m, i32 %evl)
929  %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %aext, <vscale x 8 x double> %bext, <vscale x 8 x double> %c, <vscale x 8 x i1> %m, i32 %evl)
930  ret <vscale x 8 x double> %v
931}
932
933define <vscale x 8 x double> @vfmacc_vv_nxv8f64_unmasked(<vscale x 8 x float> %a, <vscale x 8 x float> %b, <vscale x 8 x double> %c, i32 zeroext %evl) {
934; CHECK-LABEL: vfmacc_vv_nxv8f64_unmasked:
935; CHECK:       # %bb.0:
936; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
937; CHECK-NEXT:    vfwmacc.vv v16, v8, v12
938; CHECK-NEXT:    vmv8r.v v8, v16
939; CHECK-NEXT:    ret
940  %aext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
941  %bext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
942  %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %aext, <vscale x 8 x double> %bext, <vscale x 8 x double> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
943  ret <vscale x 8 x double> %v
944}
945
946define <vscale x 8 x double> @vfmacc_vf_nxv8f64(<vscale x 8 x float> %va, float %b, <vscale x 8 x double> %vc, <vscale x 8 x i1> %m, i32 zeroext %evl) {
947; CHECK-LABEL: vfmacc_vf_nxv8f64:
948; CHECK:       # %bb.0:
949; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
950; CHECK-NEXT:    vfwmacc.vf v16, fa0, v8, v0.t
951; CHECK-NEXT:    vmv8r.v v8, v16
952; CHECK-NEXT:    ret
953  %elt.head = insertelement <vscale x 8 x float> poison, float %b, i32 0
954  %vb = shufflevector <vscale x 8 x float> %elt.head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
955  %vaext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
956  %vbext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %vb, <vscale x 8 x i1> %m, i32 %evl)
957  %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %vaext, <vscale x 8 x double> %vbext, <vscale x 8 x double> %vc, <vscale x 8 x i1> %m, i32 %evl)
958  ret <vscale x 8 x double> %v
959}
960
961define <vscale x 8 x double> @vfmacc_vf_nxv8f64_unmasked(<vscale x 8 x float> %va, float %b, <vscale x 8 x double> %vc, i32 zeroext %evl) {
962; CHECK-LABEL: vfmacc_vf_nxv8f64_unmasked:
963; CHECK:       # %bb.0:
964; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
965; CHECK-NEXT:    vfwmacc.vf v16, fa0, v8
966; CHECK-NEXT:    vmv8r.v v8, v16
967; CHECK-NEXT:    ret
968  %elt.head = insertelement <vscale x 8 x float> poison, float %b, i32 0
969  %vb = shufflevector <vscale x 8 x float> %elt.head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
970  %vaext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
971  %vbext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
972  %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %vaext, <vscale x 8 x double> %vbext, <vscale x 8 x double> %vc, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
973  ret <vscale x 8 x double> %v
974}
975
976declare <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
977
978define <vscale x 1 x double> @vfmacc_vv_nxv1f64_nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x double> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
979; CHECK-LABEL: vfmacc_vv_nxv1f64_nxv1f16:
980; CHECK:       # %bb.0:
981; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
982; CHECK-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
983; CHECK-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
984; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
985; CHECK-NEXT:    vfwmacc.vv v10, v11, v8, v0.t
986; CHECK-NEXT:    vmv1r.v v8, v10
987; CHECK-NEXT:    ret
988  %aext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %m, i32 %evl)
989  %bext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 %evl)
990  %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %aext, <vscale x 1 x double> %bext, <vscale x 1 x double> %c, <vscale x 1 x i1> %m, i32 %evl)
991  ret <vscale x 1 x double> %v
992}
993
994define <vscale x 1 x double> @vfmacc_vv_nxv1f64_nxv1f16_unmasked(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x double> %c, i32 zeroext %evl) {
995; CHECK-LABEL: vfmacc_vv_nxv1f64_nxv1f16_unmasked:
996; CHECK:       # %bb.0:
997; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
998; CHECK-NEXT:    vfwcvt.f.f.v v11, v8
999; CHECK-NEXT:    vfwcvt.f.f.v v8, v9
1000; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
1001; CHECK-NEXT:    vfwmacc.vv v10, v11, v8
1002; CHECK-NEXT:    vmv1r.v v8, v10
1003; CHECK-NEXT:    ret
1004  %aext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1005  %bext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1006  %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %aext, <vscale x 1 x double> %bext, <vscale x 1 x double> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1007  ret <vscale x 1 x double> %v
1008}
1009
1010declare <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, i32)
1011
1012define <vscale x 2 x double> @vfmacc_vv_nxv2f64_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x double> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1013; CHECK-LABEL: vfmacc_vv_nxv2f64_nxv2f16:
1014; CHECK:       # %bb.0:
1015; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
1016; CHECK-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
1017; CHECK-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
1018; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
1019; CHECK-NEXT:    vfwmacc.vv v10, v12, v8, v0.t
1020; CHECK-NEXT:    vmv2r.v v8, v10
1021; CHECK-NEXT:    ret
1022  %aext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> %m, i32 %evl)
1023  %bext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> %b, <vscale x 2 x i1> %m, i32 %evl)
1024  %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %aext, <vscale x 2 x double> %bext, <vscale x 2 x double> %c, <vscale x 2 x i1> %m, i32 %evl)
1025  ret <vscale x 2 x double> %v
1026}
1027
1028define <vscale x 2 x double> @vfmacc_vv_nxv2f64_nxv2f16_unmasked(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x double> %c, i32 zeroext %evl) {
1029; CHECK-LABEL: vfmacc_vv_nxv2f64_nxv2f16_unmasked:
1030; CHECK:       # %bb.0:
1031; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
1032; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
1033; CHECK-NEXT:    vfwcvt.f.f.v v8, v9
1034; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
1035; CHECK-NEXT:    vfwmacc.vv v10, v12, v8
1036; CHECK-NEXT:    vmv2r.v v8, v10
1037; CHECK-NEXT:    ret
1038  %aext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1039  %bext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1040  %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %aext, <vscale x 2 x double> %bext, <vscale x 2 x double> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1041  ret <vscale x 2 x double> %v
1042}
1043
1044declare <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half>, <vscale x 4 x i1>, i32)
1045
1046define <vscale x 4 x double> @vfmacc_vv_nxv4f64_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x double> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1047; CHECK-LABEL: vfmacc_vv_nxv4f64_nxv4f16:
1048; CHECK:       # %bb.0:
1049; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
1050; CHECK-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
1051; CHECK-NEXT:    vfwcvt.f.f.v v16, v9, v0.t
1052; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
1053; CHECK-NEXT:    vfwmacc.vv v12, v10, v16, v0.t
1054; CHECK-NEXT:    vmv4r.v v8, v12
1055; CHECK-NEXT:    ret
1056  %aext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> %m, i32 %evl)
1057  %bext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 %evl)
1058  %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %aext, <vscale x 4 x double> %bext, <vscale x 4 x double> %c, <vscale x 4 x i1> %m, i32 %evl)
1059  ret <vscale x 4 x double> %v
1060}
1061
1062define <vscale x 4 x double> @vfmacc_vv_nxv4f64_nxv4f16_unmasked(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x double> %c, i32 zeroext %evl) {
1063; CHECK-LABEL: vfmacc_vv_nxv4f64_nxv4f16_unmasked:
1064; CHECK:       # %bb.0:
1065; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
1066; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
1067; CHECK-NEXT:    vfwcvt.f.f.v v16, v9
1068; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
1069; CHECK-NEXT:    vfwmacc.vv v12, v10, v16
1070; CHECK-NEXT:    vmv4r.v v8, v12
1071; CHECK-NEXT:    ret
1072  %aext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1073  %bext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1074  %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %aext, <vscale x 4 x double> %bext, <vscale x 4 x double> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1075  ret <vscale x 4 x double> %v
1076}
1077
1078declare <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i32)
1079
1080define <vscale x 8 x double> @vfmacc_vv_nxv8f64_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x double> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1081; CHECK-LABEL: vfmacc_vv_nxv8f64_nxv8f16:
1082; CHECK:       # %bb.0:
1083; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
1084; CHECK-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
1085; CHECK-NEXT:    vfwcvt.f.f.v v24, v10, v0.t
1086; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1087; CHECK-NEXT:    vfwmacc.vv v16, v12, v24, v0.t
1088; CHECK-NEXT:    vmv8r.v v8, v16
1089; CHECK-NEXT:    ret
1090  %aext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %m, i32 %evl)
1091  %bext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 %evl)
1092  %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %aext, <vscale x 8 x double> %bext, <vscale x 8 x double> %c, <vscale x 8 x i1> %m, i32 %evl)
1093  ret <vscale x 8 x double> %v
1094}
1095
1096define <vscale x 8 x double> @vfmacc_vv_nxv8f64_nxv8f16_unmasked(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x double> %c, i32 zeroext %evl) {
1097; CHECK-LABEL: vfmacc_vv_nxv8f64_nxv8f16_unmasked:
1098; CHECK:       # %bb.0:
1099; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
1100; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
1101; CHECK-NEXT:    vfwcvt.f.f.v v24, v10
1102; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1103; CHECK-NEXT:    vfwmacc.vv v16, v12, v24
1104; CHECK-NEXT:    vmv8r.v v8, v16
1105; CHECK-NEXT:    ret
1106  %aext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1107  %bext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1108  %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %aext, <vscale x 8 x double> %bext, <vscale x 8 x double> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1109  ret <vscale x 8 x double> %v
1110}
1111
1112define <vscale x 1 x float> @vfmacc_squared_nxv1f32(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1113; ZVFH-LABEL: vfmacc_squared_nxv1f32:
1114; ZVFH:       # %bb.0:
1115; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
1116; ZVFH-NEXT:    vfwmacc.vv v10, v8, v8, v0.t
1117; ZVFH-NEXT:    vmv1r.v v8, v10
1118; ZVFH-NEXT:    ret
1119;
1120; ZVFHMIN-LABEL: vfmacc_squared_nxv1f32:
1121; ZVFHMIN:       # %bb.0:
1122; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
1123; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
1124; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
1125; ZVFHMIN-NEXT:    vfmadd.vv v9, v9, v10, v0.t
1126; ZVFHMIN-NEXT:    vmv1r.v v8, v9
1127; ZVFHMIN-NEXT:    ret
1128  %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %m, i32 %evl)
1129  %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %aext, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 %evl)
1130  ret <vscale x 1 x float> %v
1131}
1132
1133define <vscale x 1 x float> @vfmacc_squared_nxv1f32_unmasked(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, i32 zeroext %evl) {
1134; ZVFH-LABEL: vfmacc_squared_nxv1f32_unmasked:
1135; ZVFH:       # %bb.0:
1136; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
1137; ZVFH-NEXT:    vfwmacc.vv v10, v8, v8
1138; ZVFH-NEXT:    vmv1r.v v8, v10
1139; ZVFH-NEXT:    ret
1140;
1141; ZVFHMIN-LABEL: vfmacc_squared_nxv1f32_unmasked:
1142; ZVFHMIN:       # %bb.0:
1143; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
1144; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
1145; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
1146; ZVFHMIN-NEXT:    vfmadd.vv v9, v9, v10
1147; ZVFHMIN-NEXT:    vmv1r.v v8, v9
1148; ZVFHMIN-NEXT:    ret
1149  %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1150  %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %aext, <vscale x 1 x float> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1151  ret <vscale x 1 x float> %v
1152}
1153