xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll (revision b6c0f1bfa79a3a32d841ac5ab1f94c3aee3b5d90)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
3; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
4; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
5; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
6; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
7; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
8; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
9; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
10
11; This tests a mix of vfmacc and vfmadd by using different operand orders to
12; trigger commuting in TwoAddressInstructionPass.
13
14define <vscale x 1 x bfloat> @vfmadd_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %vc) {
15; CHECK-LABEL: vfmadd_vv_nxv1bf16:
16; CHECK:       # %bb.0:
17; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
18; CHECK-NEXT:    vfwcvtbf16.f.f.v v11, v10
19; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
20; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
21; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
22; CHECK-NEXT:    vfmadd.vv v12, v10, v11
23; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
24; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
25; CHECK-NEXT:    ret
26  %vd = call <vscale x 1 x bfloat> @llvm.fma.v1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %vc)
27  ret <vscale x 1 x bfloat> %vd
28}
29
30define <vscale x 1 x bfloat> @vfmadd_vv_nxv1bf16_commuted(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %vc) {
31; CHECK-LABEL: vfmadd_vv_nxv1bf16_commuted:
32; CHECK:       # %bb.0:
33; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
34; CHECK-NEXT:    vfwcvtbf16.f.f.v v11, v8
35; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
36; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v10
37; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
38; CHECK-NEXT:    vfmadd.vv v9, v8, v11
39; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
40; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
41; CHECK-NEXT:    ret
42  %vd = call <vscale x 1 x bfloat> @llvm.fma.v1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %vc, <vscale x 1 x bfloat> %va)
43  ret <vscale x 1 x bfloat> %vd
44}
45
46define <vscale x 1 x bfloat> @vfmadd_vf_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, bfloat %c) {
47; CHECK-LABEL: vfmadd_vf_nxv1bf16:
48; CHECK:       # %bb.0:
49; CHECK-NEXT:    fmv.x.h a0, fa0
50; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
51; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
52; CHECK-NEXT:    vmv.v.x v9, a0
53; CHECK-NEXT:    vfwcvtbf16.f.f.v v11, v8
54; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
55; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
56; CHECK-NEXT:    vfmadd.vv v12, v11, v10
57; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
58; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
59; CHECK-NEXT:    ret
60  %head = insertelement <vscale x 1 x bfloat> poison, bfloat %c, i32 0
61  %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
62  %vd = call <vscale x 1 x bfloat> @llvm.fma.v1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %splat, <vscale x 1 x bfloat> %vb)
63  ret <vscale x 1 x bfloat> %vd
64}
65
66declare <vscale x 2 x bfloat> @llvm.fma.v2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>)
67
68define <vscale x 2 x bfloat> @vfmadd_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x bfloat> %vc) {
69; CHECK-LABEL: vfmadd_vv_nxv2bf16:
70; CHECK:       # %bb.0:
71; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
72; CHECK-NEXT:    vfwcvtbf16.f.f.v v11, v9
73; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
74; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
75; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
76; CHECK-NEXT:    vfmadd.vv v12, v9, v11
77; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
78; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
79; CHECK-NEXT:    ret
80  %vd = call <vscale x 2 x bfloat> @llvm.fma.v2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vc, <vscale x 2 x bfloat> %vb)
81  ret <vscale x 2 x bfloat> %vd
82}
83
84define <vscale x 2 x bfloat> @vfmadd_vf_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, bfloat %c) {
85; CHECK-LABEL: vfmadd_vf_nxv2bf16:
86; CHECK:       # %bb.0:
87; CHECK-NEXT:    fmv.x.h a0, fa0
88; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
89; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
90; CHECK-NEXT:    vmv.v.x v8, a0
91; CHECK-NEXT:    vfwcvtbf16.f.f.v v11, v9
92; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
93; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
94; CHECK-NEXT:    vfmadd.vv v9, v11, v10
95; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
96; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
97; CHECK-NEXT:    ret
98  %head = insertelement <vscale x 2 x bfloat> poison, bfloat %c, i32 0
99  %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
100  %vd = call <vscale x 2 x bfloat> @llvm.fma.v2bf16(<vscale x 2 x bfloat> %vb, <vscale x 2 x bfloat> %splat, <vscale x 2 x bfloat> %va)
101  ret <vscale x 2 x bfloat> %vd
102}
103
104declare <vscale x 4 x bfloat> @llvm.fma.v4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>)
105
106define <vscale x 4 x bfloat> @vfmadd_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x bfloat> %vc) {
107; CHECK-LABEL: vfmadd_vv_nxv4bf16:
108; CHECK:       # %bb.0:
109; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
110; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
111; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
112; CHECK-NEXT:    vfwcvtbf16.f.f.v v14, v8
113; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
114; CHECK-NEXT:    vfmadd.vv v14, v10, v12
115; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
116; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v14
117; CHECK-NEXT:    ret
118  %vd = call <vscale x 4 x bfloat> @llvm.fma.v4bf16(<vscale x 4 x bfloat> %vb, <vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vc)
119  ret <vscale x 4 x bfloat> %vd
120}
121
122define <vscale x 4 x bfloat> @vfmadd_vf_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, bfloat %c) {
123; CHECK-LABEL: vfmadd_vf_nxv4bf16:
124; CHECK:       # %bb.0:
125; CHECK-NEXT:    fmv.x.h a0, fa0
126; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
127; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
128; CHECK-NEXT:    vmv.v.x v9, a0
129; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
130; CHECK-NEXT:    vfwcvtbf16.f.f.v v14, v9
131; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
132; CHECK-NEXT:    vfmadd.vv v14, v12, v10
133; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
134; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v14
135; CHECK-NEXT:    ret
136  %head = insertelement <vscale x 4 x bfloat> poison, bfloat %c, i32 0
137  %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
138  %vd = call <vscale x 4 x bfloat> @llvm.fma.v4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %splat, <vscale x 4 x bfloat> %vb)
139  ret <vscale x 4 x bfloat> %vd
140}
141
142declare <vscale x 8 x bfloat> @llvm.fma.v8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
143
144define <vscale x 8 x bfloat> @vfmadd_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %vc) {
145; CHECK-LABEL: vfmadd_vv_nxv8bf16:
146; CHECK:       # %bb.0:
147; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
148; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
149; CHECK-NEXT:    vfwcvtbf16.f.f.v v20, v10
150; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12
151; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
152; CHECK-NEXT:    vfmadd.vv v24, v20, v16
153; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
154; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v24
155; CHECK-NEXT:    ret
156  %vd = call <vscale x 8 x bfloat> @llvm.fma.v8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %vc, <vscale x 8 x bfloat> %va)
157  ret <vscale x 8 x bfloat> %vd
158}
159
160define <vscale x 8 x bfloat> @vfmadd_vf_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, bfloat %c) {
161; CHECK-LABEL: vfmadd_vf_nxv8bf16:
162; CHECK:       # %bb.0:
163; CHECK-NEXT:    fmv.x.h a0, fa0
164; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
165; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
166; CHECK-NEXT:    vmv.v.x v8, a0
167; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
168; CHECK-NEXT:    vfwcvtbf16.f.f.v v20, v8
169; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
170; CHECK-NEXT:    vfmadd.vv v20, v16, v12
171; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
172; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v20
173; CHECK-NEXT:    ret
174  %head = insertelement <vscale x 8 x bfloat> poison, bfloat %c, i32 0
175  %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
176  %vd = call <vscale x 8 x bfloat> @llvm.fma.v8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %splat, <vscale x 8 x bfloat> %va)
177  ret <vscale x 8 x bfloat> %vd
178}
179
180declare <vscale x 16 x bfloat> @llvm.fma.v16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>, <vscale x 16 x bfloat>)
181
182define <vscale x 16 x bfloat> @vfmadd_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x bfloat> %vc) {
183; CHECK-LABEL: vfmadd_vv_nxv16bf16:
184; CHECK:       # %bb.0:
185; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
186; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12
187; CHECK-NEXT:    vfwcvtbf16.f.f.v v0, v16
188; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
189; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
190; CHECK-NEXT:    vfmadd.vv v16, v0, v24
191; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
192; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
193; CHECK-NEXT:    ret
194  %vd = call <vscale x 16 x bfloat> @llvm.fma.v16bf16(<vscale x 16 x bfloat> %vc, <vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb)
195  ret <vscale x 16 x bfloat> %vd
196}
197
198define <vscale x 16 x bfloat> @vfmadd_vf_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, bfloat %c) {
199; CHECK-LABEL: vfmadd_vf_nxv16bf16:
200; CHECK:       # %bb.0:
201; CHECK-NEXT:    fmv.x.h a0, fa0
202; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
203; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
204; CHECK-NEXT:    vmv.v.x v12, a0
205; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
206; CHECK-NEXT:    vfwcvtbf16.f.f.v v0, v12
207; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
208; CHECK-NEXT:    vfmadd.vv v0, v24, v16
209; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
210; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
211; CHECK-NEXT:    ret
212  %head = insertelement <vscale x 16 x bfloat> poison, bfloat %c, i32 0
213  %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
214  %vd = call <vscale x 16 x bfloat> @llvm.fma.v16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %splat, <vscale x 16 x bfloat> %vb)
215  ret <vscale x 16 x bfloat> %vd
216}
217
218declare <vscale x 32 x bfloat> @llvm.fma.v32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>, <vscale x 32 x bfloat>)
219
220define <vscale x 32 x bfloat> @vfmadd_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x bfloat> %vc) {
221; ZVFH-LABEL: vfmadd_vv_nxv32bf16:
222; ZVFH:       # %bb.0:
223; ZVFH-NEXT:    addi sp, sp, -16
224; ZVFH-NEXT:    .cfi_def_cfa_offset 16
225; ZVFH-NEXT:    csrr a1, vlenb
226; ZVFH-NEXT:    slli a1, a1, 5
227; ZVFH-NEXT:    sub sp, sp, a1
228; ZVFH-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
229; ZVFH-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
230; ZVFH-NEXT:    vmv8r.v v0, v16
231; ZVFH-NEXT:    addi a1, sp, 16
232; ZVFH-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
233; ZVFH-NEXT:    vmv8r.v v16, v8
234; ZVFH-NEXT:    vl8re16.v v8, (a0)
235; ZVFH-NEXT:    csrr a0, vlenb
236; ZVFH-NEXT:    slli a0, a0, 4
237; ZVFH-NEXT:    add a0, sp, a0
238; ZVFH-NEXT:    addi a0, a0, 16
239; ZVFH-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
240; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v16
241; ZVFH-NEXT:    csrr a0, vlenb
242; ZVFH-NEXT:    slli a0, a0, 3
243; ZVFH-NEXT:    mv a1, a0
244; ZVFH-NEXT:    slli a0, a0, 1
245; ZVFH-NEXT:    add a0, a0, a1
246; ZVFH-NEXT:    add a0, sp, a0
247; ZVFH-NEXT:    addi a0, a0, 16
248; ZVFH-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
249; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v0
250; ZVFH-NEXT:    csrr a0, vlenb
251; ZVFH-NEXT:    slli a0, a0, 3
252; ZVFH-NEXT:    add a0, sp, a0
253; ZVFH-NEXT:    addi a0, a0, 16
254; ZVFH-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
255; ZVFH-NEXT:    vfwcvtbf16.f.f.v v0, v8
256; ZVFH-NEXT:    csrr a0, vlenb
257; ZVFH-NEXT:    slli a0, a0, 3
258; ZVFH-NEXT:    mv a1, a0
259; ZVFH-NEXT:    slli a0, a0, 1
260; ZVFH-NEXT:    add a0, a0, a1
261; ZVFH-NEXT:    add a0, sp, a0
262; ZVFH-NEXT:    addi a0, a0, 16
263; ZVFH-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
264; ZVFH-NEXT:    csrr a0, vlenb
265; ZVFH-NEXT:    slli a0, a0, 3
266; ZVFH-NEXT:    add a0, sp, a0
267; ZVFH-NEXT:    addi a0, a0, 16
268; ZVFH-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
269; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
270; ZVFH-NEXT:    vfmadd.vv v0, v8, v24
271; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
272; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v20
273; ZVFH-NEXT:    csrr a0, vlenb
274; ZVFH-NEXT:    slli a0, a0, 3
275; ZVFH-NEXT:    mv a1, a0
276; ZVFH-NEXT:    slli a0, a0, 1
277; ZVFH-NEXT:    add a0, a0, a1
278; ZVFH-NEXT:    add a0, sp, a0
279; ZVFH-NEXT:    addi a0, a0, 16
280; ZVFH-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
281; ZVFH-NEXT:    addi a0, sp, 16
282; ZVFH-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
283; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v20
284; ZVFH-NEXT:    csrr a0, vlenb
285; ZVFH-NEXT:    slli a0, a0, 4
286; ZVFH-NEXT:    add a0, sp, a0
287; ZVFH-NEXT:    addi a0, a0, 16
288; ZVFH-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
289; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v28
290; ZVFH-NEXT:    csrr a0, vlenb
291; ZVFH-NEXT:    slli a0, a0, 3
292; ZVFH-NEXT:    mv a1, a0
293; ZVFH-NEXT:    slli a0, a0, 1
294; ZVFH-NEXT:    add a0, a0, a1
295; ZVFH-NEXT:    add a0, sp, a0
296; ZVFH-NEXT:    addi a0, a0, 16
297; ZVFH-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
298; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
299; ZVFH-NEXT:    vfmadd.vv v16, v8, v24
300; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
301; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v0
302; ZVFH-NEXT:    vfncvtbf16.f.f.w v12, v16
303; ZVFH-NEXT:    csrr a0, vlenb
304; ZVFH-NEXT:    slli a0, a0, 5
305; ZVFH-NEXT:    add sp, sp, a0
306; ZVFH-NEXT:    .cfi_def_cfa sp, 16
307; ZVFH-NEXT:    addi sp, sp, 16
308; ZVFH-NEXT:    .cfi_def_cfa_offset 0
309; ZVFH-NEXT:    ret
310;
311; ZVFHMIN-LABEL: vfmadd_vv_nxv32bf16:
312; ZVFHMIN:       # %bb.0:
313; ZVFHMIN-NEXT:    addi sp, sp, -16
314; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
315; ZVFHMIN-NEXT:    csrr a1, vlenb
316; ZVFHMIN-NEXT:    slli a1, a1, 5
317; ZVFHMIN-NEXT:    sub sp, sp, a1
318; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
319; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
320; ZVFHMIN-NEXT:    vmv8r.v v0, v16
321; ZVFHMIN-NEXT:    addi a1, sp, 16
322; ZVFHMIN-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
323; ZVFHMIN-NEXT:    vmv8r.v v16, v8
324; ZVFHMIN-NEXT:    vl8re16.v v8, (a0)
325; ZVFHMIN-NEXT:    csrr a0, vlenb
326; ZVFHMIN-NEXT:    slli a0, a0, 4
327; ZVFHMIN-NEXT:    add a0, sp, a0
328; ZVFHMIN-NEXT:    addi a0, a0, 16
329; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
330; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v16
331; ZVFHMIN-NEXT:    csrr a0, vlenb
332; ZVFHMIN-NEXT:    li a1, 24
333; ZVFHMIN-NEXT:    mul a0, a0, a1
334; ZVFHMIN-NEXT:    add a0, sp, a0
335; ZVFHMIN-NEXT:    addi a0, a0, 16
336; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
337; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v0
338; ZVFHMIN-NEXT:    csrr a0, vlenb
339; ZVFHMIN-NEXT:    slli a0, a0, 3
340; ZVFHMIN-NEXT:    add a0, sp, a0
341; ZVFHMIN-NEXT:    addi a0, a0, 16
342; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
343; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v0, v8
344; ZVFHMIN-NEXT:    csrr a0, vlenb
345; ZVFHMIN-NEXT:    li a1, 24
346; ZVFHMIN-NEXT:    mul a0, a0, a1
347; ZVFHMIN-NEXT:    add a0, sp, a0
348; ZVFHMIN-NEXT:    addi a0, a0, 16
349; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
350; ZVFHMIN-NEXT:    csrr a0, vlenb
351; ZVFHMIN-NEXT:    slli a0, a0, 3
352; ZVFHMIN-NEXT:    add a0, sp, a0
353; ZVFHMIN-NEXT:    addi a0, a0, 16
354; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
355; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
356; ZVFHMIN-NEXT:    vfmadd.vv v0, v8, v24
357; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
358; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v20
359; ZVFHMIN-NEXT:    csrr a0, vlenb
360; ZVFHMIN-NEXT:    li a1, 24
361; ZVFHMIN-NEXT:    mul a0, a0, a1
362; ZVFHMIN-NEXT:    add a0, sp, a0
363; ZVFHMIN-NEXT:    addi a0, a0, 16
364; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
365; ZVFHMIN-NEXT:    addi a0, sp, 16
366; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
367; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v20
368; ZVFHMIN-NEXT:    csrr a0, vlenb
369; ZVFHMIN-NEXT:    slli a0, a0, 4
370; ZVFHMIN-NEXT:    add a0, sp, a0
371; ZVFHMIN-NEXT:    addi a0, a0, 16
372; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
373; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v28
374; ZVFHMIN-NEXT:    csrr a0, vlenb
375; ZVFHMIN-NEXT:    li a1, 24
376; ZVFHMIN-NEXT:    mul a0, a0, a1
377; ZVFHMIN-NEXT:    add a0, sp, a0
378; ZVFHMIN-NEXT:    addi a0, a0, 16
379; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
380; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
381; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v24
382; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
383; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v0
384; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v12, v16
385; ZVFHMIN-NEXT:    csrr a0, vlenb
386; ZVFHMIN-NEXT:    slli a0, a0, 5
387; ZVFHMIN-NEXT:    add sp, sp, a0
388; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
389; ZVFHMIN-NEXT:    addi sp, sp, 16
390; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
391; ZVFHMIN-NEXT:    ret
392  %vd = call <vscale x 32 x bfloat> @llvm.fma.v32bf16(<vscale x 32 x bfloat> %vc, <vscale x 32 x bfloat> %vb, <vscale x 32 x bfloat> %va)
393  ret <vscale x 32 x bfloat> %vd
394}
395
396define <vscale x 32 x bfloat> @vfmadd_vf_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, bfloat %c) {
397; ZVFH-LABEL: vfmadd_vf_nxv32bf16:
398; ZVFH:       # %bb.0:
399; ZVFH-NEXT:    addi sp, sp, -16
400; ZVFH-NEXT:    .cfi_def_cfa_offset 16
401; ZVFH-NEXT:    csrr a0, vlenb
402; ZVFH-NEXT:    slli a0, a0, 5
403; ZVFH-NEXT:    sub sp, sp, a0
404; ZVFH-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
405; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
406; ZVFH-NEXT:    vmv8r.v v0, v16
407; ZVFH-NEXT:    addi a0, sp, 16
408; ZVFH-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
409; ZVFH-NEXT:    vmv8r.v v16, v8
410; ZVFH-NEXT:    fmv.x.h a0, fa0
411; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v16
412; ZVFH-NEXT:    csrr a1, vlenb
413; ZVFH-NEXT:    slli a1, a1, 4
414; ZVFH-NEXT:    add a1, sp, a1
415; ZVFH-NEXT:    addi a1, a1, 16
416; ZVFH-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
417; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v0
418; ZVFH-NEXT:    csrr a1, vlenb
419; ZVFH-NEXT:    slli a1, a1, 3
420; ZVFH-NEXT:    add a1, sp, a1
421; ZVFH-NEXT:    addi a1, a1, 16
422; ZVFH-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
423; ZVFH-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
424; ZVFH-NEXT:    vmv.v.x v24, a0
425; ZVFH-NEXT:    csrr a0, vlenb
426; ZVFH-NEXT:    slli a0, a0, 3
427; ZVFH-NEXT:    mv a1, a0
428; ZVFH-NEXT:    slli a0, a0, 1
429; ZVFH-NEXT:    add a0, a0, a1
430; ZVFH-NEXT:    add a0, sp, a0
431; ZVFH-NEXT:    addi a0, a0, 16
432; ZVFH-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
433; ZVFH-NEXT:    csrr a0, vlenb
434; ZVFH-NEXT:    slli a0, a0, 3
435; ZVFH-NEXT:    mv a1, a0
436; ZVFH-NEXT:    slli a0, a0, 1
437; ZVFH-NEXT:    add a0, a0, a1
438; ZVFH-NEXT:    add a0, sp, a0
439; ZVFH-NEXT:    addi a0, a0, 16
440; ZVFH-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
441; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
442; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v0
443; ZVFH-NEXT:    csrr a0, vlenb
444; ZVFH-NEXT:    slli a0, a0, 4
445; ZVFH-NEXT:    add a0, sp, a0
446; ZVFH-NEXT:    addi a0, a0, 16
447; ZVFH-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
448; ZVFH-NEXT:    csrr a0, vlenb
449; ZVFH-NEXT:    slli a0, a0, 3
450; ZVFH-NEXT:    add a0, sp, a0
451; ZVFH-NEXT:    addi a0, a0, 16
452; ZVFH-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
453; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
454; ZVFH-NEXT:    vfmadd.vv v8, v24, v0
455; ZVFH-NEXT:    vmv.v.v v24, v8
456; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
457; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v20
458; ZVFH-NEXT:    csrr a0, vlenb
459; ZVFH-NEXT:    slli a0, a0, 4
460; ZVFH-NEXT:    add a0, sp, a0
461; ZVFH-NEXT:    addi a0, a0, 16
462; ZVFH-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
463; ZVFH-NEXT:    addi a0, sp, 16
464; ZVFH-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
465; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v20
466; ZVFH-NEXT:    csrr a0, vlenb
467; ZVFH-NEXT:    slli a0, a0, 3
468; ZVFH-NEXT:    mv a1, a0
469; ZVFH-NEXT:    slli a0, a0, 1
470; ZVFH-NEXT:    add a0, a0, a1
471; ZVFH-NEXT:    add a0, sp, a0
472; ZVFH-NEXT:    addi a0, a0, 16
473; ZVFH-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
474; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v4
475; ZVFH-NEXT:    csrr a0, vlenb
476; ZVFH-NEXT:    slli a0, a0, 4
477; ZVFH-NEXT:    add a0, sp, a0
478; ZVFH-NEXT:    addi a0, a0, 16
479; ZVFH-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
480; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
481; ZVFH-NEXT:    vfmadd.vv v16, v8, v0
482; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
483; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v24
484; ZVFH-NEXT:    vfncvtbf16.f.f.w v12, v16
485; ZVFH-NEXT:    csrr a0, vlenb
486; ZVFH-NEXT:    slli a0, a0, 5
487; ZVFH-NEXT:    add sp, sp, a0
488; ZVFH-NEXT:    .cfi_def_cfa sp, 16
489; ZVFH-NEXT:    addi sp, sp, 16
490; ZVFH-NEXT:    .cfi_def_cfa_offset 0
491; ZVFH-NEXT:    ret
492;
493; ZVFHMIN-LABEL: vfmadd_vf_nxv32bf16:
494; ZVFHMIN:       # %bb.0:
495; ZVFHMIN-NEXT:    addi sp, sp, -16
496; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
497; ZVFHMIN-NEXT:    csrr a0, vlenb
498; ZVFHMIN-NEXT:    slli a0, a0, 5
499; ZVFHMIN-NEXT:    sub sp, sp, a0
500; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
501; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
502; ZVFHMIN-NEXT:    vmv8r.v v0, v16
503; ZVFHMIN-NEXT:    addi a0, sp, 16
504; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
505; ZVFHMIN-NEXT:    vmv8r.v v16, v8
506; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
507; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v16
508; ZVFHMIN-NEXT:    csrr a1, vlenb
509; ZVFHMIN-NEXT:    slli a1, a1, 4
510; ZVFHMIN-NEXT:    add a1, sp, a1
511; ZVFHMIN-NEXT:    addi a1, a1, 16
512; ZVFHMIN-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
513; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v0
514; ZVFHMIN-NEXT:    csrr a1, vlenb
515; ZVFHMIN-NEXT:    slli a1, a1, 3
516; ZVFHMIN-NEXT:    add a1, sp, a1
517; ZVFHMIN-NEXT:    addi a1, a1, 16
518; ZVFHMIN-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
519; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
520; ZVFHMIN-NEXT:    vmv.v.x v24, a0
521; ZVFHMIN-NEXT:    csrr a0, vlenb
522; ZVFHMIN-NEXT:    li a1, 24
523; ZVFHMIN-NEXT:    mul a0, a0, a1
524; ZVFHMIN-NEXT:    add a0, sp, a0
525; ZVFHMIN-NEXT:    addi a0, a0, 16
526; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
527; ZVFHMIN-NEXT:    csrr a0, vlenb
528; ZVFHMIN-NEXT:    li a1, 24
529; ZVFHMIN-NEXT:    mul a0, a0, a1
530; ZVFHMIN-NEXT:    add a0, sp, a0
531; ZVFHMIN-NEXT:    addi a0, a0, 16
532; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
533; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
534; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v0
535; ZVFHMIN-NEXT:    csrr a0, vlenb
536; ZVFHMIN-NEXT:    slli a0, a0, 4
537; ZVFHMIN-NEXT:    add a0, sp, a0
538; ZVFHMIN-NEXT:    addi a0, a0, 16
539; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
540; ZVFHMIN-NEXT:    csrr a0, vlenb
541; ZVFHMIN-NEXT:    slli a0, a0, 3
542; ZVFHMIN-NEXT:    add a0, sp, a0
543; ZVFHMIN-NEXT:    addi a0, a0, 16
544; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
545; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
546; ZVFHMIN-NEXT:    vfmadd.vv v8, v24, v0
547; ZVFHMIN-NEXT:    vmv.v.v v24, v8
548; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
549; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v20
550; ZVFHMIN-NEXT:    csrr a0, vlenb
551; ZVFHMIN-NEXT:    slli a0, a0, 4
552; ZVFHMIN-NEXT:    add a0, sp, a0
553; ZVFHMIN-NEXT:    addi a0, a0, 16
554; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
555; ZVFHMIN-NEXT:    addi a0, sp, 16
556; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
557; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v20
558; ZVFHMIN-NEXT:    csrr a0, vlenb
559; ZVFHMIN-NEXT:    li a1, 24
560; ZVFHMIN-NEXT:    mul a0, a0, a1
561; ZVFHMIN-NEXT:    add a0, sp, a0
562; ZVFHMIN-NEXT:    addi a0, a0, 16
563; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
564; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v4
565; ZVFHMIN-NEXT:    csrr a0, vlenb
566; ZVFHMIN-NEXT:    slli a0, a0, 4
567; ZVFHMIN-NEXT:    add a0, sp, a0
568; ZVFHMIN-NEXT:    addi a0, a0, 16
569; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
570; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
571; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v0
572; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
573; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v24
574; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v12, v16
575; ZVFHMIN-NEXT:    csrr a0, vlenb
576; ZVFHMIN-NEXT:    slli a0, a0, 5
577; ZVFHMIN-NEXT:    add sp, sp, a0
578; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
579; ZVFHMIN-NEXT:    addi sp, sp, 16
580; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
581; ZVFHMIN-NEXT:    ret
582  %head = insertelement <vscale x 32 x bfloat> poison, bfloat %c, i32 0
583  %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
584  %vd = call <vscale x 32 x bfloat> @llvm.fma.v32bf16(<vscale x 32 x bfloat> %vb, <vscale x 32 x bfloat> %splat, <vscale x 32 x bfloat> %va)
585  ret <vscale x 32 x bfloat> %vd
586}
587
588declare <vscale x 1 x half> @llvm.fma.v1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x half>)
589
590define <vscale x 1 x half> @vfmadd_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x half> %vc) {
591; ZVFH-LABEL: vfmadd_vv_nxv1f16:
592; ZVFH:       # %bb.0:
593; ZVFH-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
594; ZVFH-NEXT:    vfmadd.vv v8, v9, v10
595; ZVFH-NEXT:    ret
596;
597; ZVFHMIN-LABEL: vfmadd_vv_nxv1f16:
598; ZVFHMIN:       # %bb.0:
599; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
600; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
601; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
602; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
603; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
604; ZVFHMIN-NEXT:    vfmadd.vv v12, v10, v11
605; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
606; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
607; ZVFHMIN-NEXT:    ret
608  %vd = call <vscale x 1 x half> @llvm.fma.v1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x half> %vc)
609  ret <vscale x 1 x half> %vd
610}
611
612define <vscale x 1 x half> @vfmadd_vv_nxv1f16_commuted(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x half> %vc) {
613; ZVFH-LABEL: vfmadd_vv_nxv1f16_commuted:
614; ZVFH:       # %bb.0:
615; ZVFH-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
616; ZVFH-NEXT:    vfmacc.vv v8, v10, v9
617; ZVFH-NEXT:    ret
618;
619; ZVFHMIN-LABEL: vfmadd_vv_nxv1f16_commuted:
620; ZVFHMIN:       # %bb.0:
621; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
622; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
623; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
624; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v10
625; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
626; ZVFHMIN-NEXT:    vfmadd.vv v9, v8, v11
627; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
628; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
629; ZVFHMIN-NEXT:    ret
630  %vd = call <vscale x 1 x half> @llvm.fma.v1f16(<vscale x 1 x half> %vb, <vscale x 1 x half> %vc, <vscale x 1 x half> %va)
631  ret <vscale x 1 x half> %vd
632}
633
634define <vscale x 1 x half> @vfmadd_vf_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, half %c) {
635; ZVFH-LABEL: vfmadd_vf_nxv1f16:
636; ZVFH:       # %bb.0:
637; ZVFH-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
638; ZVFH-NEXT:    vfmadd.vf v8, fa0, v9
639; ZVFH-NEXT:    ret
640;
641; ZVFHMIN-LABEL: vfmadd_vf_nxv1f16:
642; ZVFHMIN:       # %bb.0:
643; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
644; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
645; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
646; ZVFHMIN-NEXT:    vmv.v.x v9, a0
647; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
648; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
649; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
650; ZVFHMIN-NEXT:    vfmadd.vv v12, v11, v10
651; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
652; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
653; ZVFHMIN-NEXT:    ret
654  %head = insertelement <vscale x 1 x half> poison, half %c, i32 0
655  %splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
656  %vd = call <vscale x 1 x half> @llvm.fma.v1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %splat, <vscale x 1 x half> %vb)
657  ret <vscale x 1 x half> %vd
658}
659
660declare <vscale x 2 x half> @llvm.fma.v2f16(<vscale x 2 x half>, <vscale x 2 x half>, <vscale x 2 x half>)
661
662define <vscale x 2 x half> @vfmadd_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x half> %vc) {
663; ZVFH-LABEL: vfmadd_vv_nxv2f16:
664; ZVFH:       # %bb.0:
665; ZVFH-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
666; ZVFH-NEXT:    vfmadd.vv v8, v10, v9
667; ZVFH-NEXT:    ret
668;
669; ZVFHMIN-LABEL: vfmadd_vv_nxv2f16:
670; ZVFHMIN:       # %bb.0:
671; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
672; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
673; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
674; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
675; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
676; ZVFHMIN-NEXT:    vfmadd.vv v12, v9, v11
677; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
678; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
679; ZVFHMIN-NEXT:    ret
680  %vd = call <vscale x 2 x half> @llvm.fma.v2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vc, <vscale x 2 x half> %vb)
681  ret <vscale x 2 x half> %vd
682}
683
684define <vscale x 2 x half> @vfmadd_vf_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, half %c) {
685; ZVFH-LABEL: vfmadd_vf_nxv2f16:
686; ZVFH:       # %bb.0:
687; ZVFH-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
688; ZVFH-NEXT:    vfmacc.vf v8, fa0, v9
689; ZVFH-NEXT:    ret
690;
691; ZVFHMIN-LABEL: vfmadd_vf_nxv2f16:
692; ZVFHMIN:       # %bb.0:
693; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
694; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
695; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
696; ZVFHMIN-NEXT:    vmv.v.x v8, a0
697; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
698; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
699; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
700; ZVFHMIN-NEXT:    vfmadd.vv v9, v11, v10
701; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
702; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
703; ZVFHMIN-NEXT:    ret
704  %head = insertelement <vscale x 2 x half> poison, half %c, i32 0
705  %splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
706  %vd = call <vscale x 2 x half> @llvm.fma.v2f16(<vscale x 2 x half> %vb, <vscale x 2 x half> %splat, <vscale x 2 x half> %va)
707  ret <vscale x 2 x half> %vd
708}
709
710declare <vscale x 4 x half> @llvm.fma.v4f16(<vscale x 4 x half>, <vscale x 4 x half>, <vscale x 4 x half>)
711
712define <vscale x 4 x half> @vfmadd_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x half> %vc) {
713; ZVFH-LABEL: vfmadd_vv_nxv4f16:
714; ZVFH:       # %bb.0:
715; ZVFH-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
716; ZVFH-NEXT:    vfmadd.vv v8, v9, v10
717; ZVFH-NEXT:    ret
718;
719; ZVFHMIN-LABEL: vfmadd_vv_nxv4f16:
720; ZVFHMIN:       # %bb.0:
721; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
722; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
723; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
724; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
725; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
726; ZVFHMIN-NEXT:    vfmadd.vv v14, v10, v12
727; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
728; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14
729; ZVFHMIN-NEXT:    ret
730  %vd = call <vscale x 4 x half> @llvm.fma.v4f16(<vscale x 4 x half> %vb, <vscale x 4 x half> %va, <vscale x 4 x half> %vc)
731  ret <vscale x 4 x half> %vd
732}
733
734define <vscale x 4 x half> @vfmadd_vf_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, half %c) {
735; ZVFH-LABEL: vfmadd_vf_nxv4f16:
736; ZVFH:       # %bb.0:
737; ZVFH-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
738; ZVFH-NEXT:    vfmadd.vf v8, fa0, v9
739; ZVFH-NEXT:    ret
740;
741; ZVFHMIN-LABEL: vfmadd_vf_nxv4f16:
742; ZVFHMIN:       # %bb.0:
743; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
744; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
745; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
746; ZVFHMIN-NEXT:    vmv.v.x v9, a0
747; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
748; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
749; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
750; ZVFHMIN-NEXT:    vfmadd.vv v14, v12, v10
751; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
752; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14
753; ZVFHMIN-NEXT:    ret
754  %head = insertelement <vscale x 4 x half> poison, half %c, i32 0
755  %splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
756  %vd = call <vscale x 4 x half> @llvm.fma.v4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %splat, <vscale x 4 x half> %vb)
757  ret <vscale x 4 x half> %vd
758}
759
760declare <vscale x 8 x half> @llvm.fma.v8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
761
762define <vscale x 8 x half> @vfmadd_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x half> %vc) {
763; ZVFH-LABEL: vfmadd_vv_nxv8f16:
764; ZVFH:       # %bb.0:
765; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
766; ZVFH-NEXT:    vfmacc.vv v8, v12, v10
767; ZVFH-NEXT:    ret
768;
769; ZVFHMIN-LABEL: vfmadd_vv_nxv8f16:
770; ZVFHMIN:       # %bb.0:
771; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
772; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
773; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10
774; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
775; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
776; ZVFHMIN-NEXT:    vfmadd.vv v24, v20, v16
777; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
778; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
779; ZVFHMIN-NEXT:    ret
780  %vd = call <vscale x 8 x half> @llvm.fma.v8f16(<vscale x 8 x half> %vb, <vscale x 8 x half> %vc, <vscale x 8 x half> %va)
781  ret <vscale x 8 x half> %vd
782}
783
784define <vscale x 8 x half> @vfmadd_vf_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, half %c) {
785; ZVFH-LABEL: vfmadd_vf_nxv8f16:
786; ZVFH:       # %bb.0:
787; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
788; ZVFH-NEXT:    vfmacc.vf v8, fa0, v10
789; ZVFH-NEXT:    ret
790;
791; ZVFHMIN-LABEL: vfmadd_vf_nxv8f16:
792; ZVFHMIN:       # %bb.0:
793; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
794; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
795; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
796; ZVFHMIN-NEXT:    vmv.v.x v8, a0
797; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
798; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8
799; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
800; ZVFHMIN-NEXT:    vfmadd.vv v20, v16, v12
801; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
802; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v20
803; ZVFHMIN-NEXT:    ret
804  %head = insertelement <vscale x 8 x half> poison, half %c, i32 0
805  %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
806  %vd = call <vscale x 8 x half> @llvm.fma.v8f16(<vscale x 8 x half> %vb, <vscale x 8 x half> %splat, <vscale x 8 x half> %va)
807  ret <vscale x 8 x half> %vd
808}
809
810declare <vscale x 16 x half> @llvm.fma.v16f16(<vscale x 16 x half>, <vscale x 16 x half>, <vscale x 16 x half>)
811
812define <vscale x 16 x half> @vfmadd_vv_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x half> %vc) {
813; ZVFH-LABEL: vfmadd_vv_nxv16f16:
814; ZVFH:       # %bb.0:
815; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
816; ZVFH-NEXT:    vfmadd.vv v8, v16, v12
817; ZVFH-NEXT:    ret
818;
819; ZVFHMIN-LABEL: vfmadd_vv_nxv16f16:
820; ZVFHMIN:       # %bb.0:
821; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
822; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
823; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v16
824; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
825; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
826; ZVFHMIN-NEXT:    vfmadd.vv v16, v0, v24
827; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
828; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
829; ZVFHMIN-NEXT:    ret
830  %vd = call <vscale x 16 x half> @llvm.fma.v16f16(<vscale x 16 x half> %vc, <vscale x 16 x half> %va, <vscale x 16 x half> %vb)
831  ret <vscale x 16 x half> %vd
832}
833
834define <vscale x 16 x half> @vfmadd_vf_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, half %c) {
835; ZVFH-LABEL: vfmadd_vf_nxv16f16:
836; ZVFH:       # %bb.0:
837; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
838; ZVFH-NEXT:    vfmadd.vf v8, fa0, v12
839; ZVFH-NEXT:    ret
840;
841; ZVFHMIN-LABEL: vfmadd_vf_nxv16f16:
842; ZVFHMIN:       # %bb.0:
843; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
844; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
845; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
846; ZVFHMIN-NEXT:    vmv.v.x v12, a0
847; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
848; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v12
849; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
850; ZVFHMIN-NEXT:    vfmadd.vv v0, v24, v16
851; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
852; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
853; ZVFHMIN-NEXT:    ret
854  %head = insertelement <vscale x 16 x half> poison, half %c, i32 0
855  %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
856  %vd = call <vscale x 16 x half> @llvm.fma.v16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %splat, <vscale x 16 x half> %vb)
857  ret <vscale x 16 x half> %vd
858}
859
860declare <vscale x 32 x half> @llvm.fma.v32f16(<vscale x 32 x half>, <vscale x 32 x half>, <vscale x 32 x half>)
861
862define <vscale x 32 x half> @vfmadd_vv_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x half> %vc) {
863; ZVFH-LABEL: vfmadd_vv_nxv32f16:
864; ZVFH:       # %bb.0:
865; ZVFH-NEXT:    vl8re16.v v24, (a0)
866; ZVFH-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
867; ZVFH-NEXT:    vfmacc.vv v8, v16, v24
868; ZVFH-NEXT:    ret
869;
870; ZVFHMIN-LABEL: vfmadd_vv_nxv32f16:
871; ZVFHMIN:       # %bb.0:
872; ZVFHMIN-NEXT:    addi sp, sp, -16
873; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
874; ZVFHMIN-NEXT:    csrr a1, vlenb
875; ZVFHMIN-NEXT:    slli a1, a1, 5
876; ZVFHMIN-NEXT:    sub sp, sp, a1
877; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
878; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
879; ZVFHMIN-NEXT:    vmv8r.v v0, v16
880; ZVFHMIN-NEXT:    addi a1, sp, 16
881; ZVFHMIN-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
882; ZVFHMIN-NEXT:    vmv8r.v v16, v8
883; ZVFHMIN-NEXT:    vl8re16.v v8, (a0)
884; ZVFHMIN-NEXT:    csrr a0, vlenb
885; ZVFHMIN-NEXT:    slli a0, a0, 4
886; ZVFHMIN-NEXT:    add a0, sp, a0
887; ZVFHMIN-NEXT:    addi a0, a0, 16
888; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
889; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
890; ZVFHMIN-NEXT:    csrr a0, vlenb
891; ZVFHMIN-NEXT:    li a1, 24
892; ZVFHMIN-NEXT:    mul a0, a0, a1
893; ZVFHMIN-NEXT:    add a0, sp, a0
894; ZVFHMIN-NEXT:    addi a0, a0, 16
895; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
896; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v0
897; ZVFHMIN-NEXT:    csrr a0, vlenb
898; ZVFHMIN-NEXT:    slli a0, a0, 3
899; ZVFHMIN-NEXT:    add a0, sp, a0
900; ZVFHMIN-NEXT:    addi a0, a0, 16
901; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
902; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
903; ZVFHMIN-NEXT:    csrr a0, vlenb
904; ZVFHMIN-NEXT:    li a1, 24
905; ZVFHMIN-NEXT:    mul a0, a0, a1
906; ZVFHMIN-NEXT:    add a0, sp, a0
907; ZVFHMIN-NEXT:    addi a0, a0, 16
908; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
909; ZVFHMIN-NEXT:    csrr a0, vlenb
910; ZVFHMIN-NEXT:    slli a0, a0, 3
911; ZVFHMIN-NEXT:    add a0, sp, a0
912; ZVFHMIN-NEXT:    addi a0, a0, 16
913; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
914; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
915; ZVFHMIN-NEXT:    vfmadd.vv v0, v8, v24
916; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
917; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20
918; ZVFHMIN-NEXT:    csrr a0, vlenb
919; ZVFHMIN-NEXT:    li a1, 24
920; ZVFHMIN-NEXT:    mul a0, a0, a1
921; ZVFHMIN-NEXT:    add a0, sp, a0
922; ZVFHMIN-NEXT:    addi a0, a0, 16
923; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
924; ZVFHMIN-NEXT:    addi a0, sp, 16
925; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
926; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20
927; ZVFHMIN-NEXT:    csrr a0, vlenb
928; ZVFHMIN-NEXT:    slli a0, a0, 4
929; ZVFHMIN-NEXT:    add a0, sp, a0
930; ZVFHMIN-NEXT:    addi a0, a0, 16
931; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
932; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v28
933; ZVFHMIN-NEXT:    csrr a0, vlenb
934; ZVFHMIN-NEXT:    li a1, 24
935; ZVFHMIN-NEXT:    mul a0, a0, a1
936; ZVFHMIN-NEXT:    add a0, sp, a0
937; ZVFHMIN-NEXT:    addi a0, a0, 16
938; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
939; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
940; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v24
941; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
942; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
943; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
944; ZVFHMIN-NEXT:    csrr a0, vlenb
945; ZVFHMIN-NEXT:    slli a0, a0, 5
946; ZVFHMIN-NEXT:    add sp, sp, a0
947; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
948; ZVFHMIN-NEXT:    addi sp, sp, 16
949; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
950; ZVFHMIN-NEXT:    ret
951  %vd = call <vscale x 32 x half> @llvm.fma.v32f16(<vscale x 32 x half> %vc, <vscale x 32 x half> %vb, <vscale x 32 x half> %va)
952  ret <vscale x 32 x half> %vd
953}
954
955define <vscale x 32 x half> @vfmadd_vf_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, half %c) {
956; ZVFH-LABEL: vfmadd_vf_nxv32f16:
957; ZVFH:       # %bb.0:
958; ZVFH-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
959; ZVFH-NEXT:    vfmacc.vf v8, fa0, v16
960; ZVFH-NEXT:    ret
961;
962; ZVFHMIN-LABEL: vfmadd_vf_nxv32f16:
963; ZVFHMIN:       # %bb.0:
964; ZVFHMIN-NEXT:    addi sp, sp, -16
965; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
966; ZVFHMIN-NEXT:    csrr a0, vlenb
967; ZVFHMIN-NEXT:    slli a0, a0, 5
968; ZVFHMIN-NEXT:    sub sp, sp, a0
969; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
970; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
971; ZVFHMIN-NEXT:    vmv8r.v v0, v16
972; ZVFHMIN-NEXT:    addi a0, sp, 16
973; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
974; ZVFHMIN-NEXT:    vmv8r.v v16, v8
975; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
976; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
977; ZVFHMIN-NEXT:    csrr a1, vlenb
978; ZVFHMIN-NEXT:    slli a1, a1, 4
979; ZVFHMIN-NEXT:    add a1, sp, a1
980; ZVFHMIN-NEXT:    addi a1, a1, 16
981; ZVFHMIN-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
982; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v0
983; ZVFHMIN-NEXT:    csrr a1, vlenb
984; ZVFHMIN-NEXT:    slli a1, a1, 3
985; ZVFHMIN-NEXT:    add a1, sp, a1
986; ZVFHMIN-NEXT:    addi a1, a1, 16
987; ZVFHMIN-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
988; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
989; ZVFHMIN-NEXT:    vmv.v.x v24, a0
990; ZVFHMIN-NEXT:    csrr a0, vlenb
991; ZVFHMIN-NEXT:    li a1, 24
992; ZVFHMIN-NEXT:    mul a0, a0, a1
993; ZVFHMIN-NEXT:    add a0, sp, a0
994; ZVFHMIN-NEXT:    addi a0, a0, 16
995; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
996; ZVFHMIN-NEXT:    csrr a0, vlenb
997; ZVFHMIN-NEXT:    li a1, 24
998; ZVFHMIN-NEXT:    mul a0, a0, a1
999; ZVFHMIN-NEXT:    add a0, sp, a0
1000; ZVFHMIN-NEXT:    addi a0, a0, 16
1001; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
1002; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
1003; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v0
1004; ZVFHMIN-NEXT:    csrr a0, vlenb
1005; ZVFHMIN-NEXT:    slli a0, a0, 4
1006; ZVFHMIN-NEXT:    add a0, sp, a0
1007; ZVFHMIN-NEXT:    addi a0, a0, 16
1008; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
1009; ZVFHMIN-NEXT:    csrr a0, vlenb
1010; ZVFHMIN-NEXT:    slli a0, a0, 3
1011; ZVFHMIN-NEXT:    add a0, sp, a0
1012; ZVFHMIN-NEXT:    addi a0, a0, 16
1013; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
1014; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
1015; ZVFHMIN-NEXT:    vfmadd.vv v8, v24, v0
1016; ZVFHMIN-NEXT:    vmv.v.v v24, v8
1017; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
1018; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20
1019; ZVFHMIN-NEXT:    csrr a0, vlenb
1020; ZVFHMIN-NEXT:    slli a0, a0, 4
1021; ZVFHMIN-NEXT:    add a0, sp, a0
1022; ZVFHMIN-NEXT:    addi a0, a0, 16
1023; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
1024; ZVFHMIN-NEXT:    addi a0, sp, 16
1025; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
1026; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20
1027; ZVFHMIN-NEXT:    csrr a0, vlenb
1028; ZVFHMIN-NEXT:    li a1, 24
1029; ZVFHMIN-NEXT:    mul a0, a0, a1
1030; ZVFHMIN-NEXT:    add a0, sp, a0
1031; ZVFHMIN-NEXT:    addi a0, a0, 16
1032; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
1033; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v4
1034; ZVFHMIN-NEXT:    csrr a0, vlenb
1035; ZVFHMIN-NEXT:    slli a0, a0, 4
1036; ZVFHMIN-NEXT:    add a0, sp, a0
1037; ZVFHMIN-NEXT:    addi a0, a0, 16
1038; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
1039; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
1040; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v0
1041; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
1042; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
1043; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
1044; ZVFHMIN-NEXT:    csrr a0, vlenb
1045; ZVFHMIN-NEXT:    slli a0, a0, 5
1046; ZVFHMIN-NEXT:    add sp, sp, a0
1047; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
1048; ZVFHMIN-NEXT:    addi sp, sp, 16
1049; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
1050; ZVFHMIN-NEXT:    ret
1051  %head = insertelement <vscale x 32 x half> poison, half %c, i32 0
1052  %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
1053  %vd = call <vscale x 32 x half> @llvm.fma.v32f16(<vscale x 32 x half> %vb, <vscale x 32 x half> %splat, <vscale x 32 x half> %va)
1054  ret <vscale x 32 x half> %vd
1055}
1056
1057declare <vscale x 1 x float> @llvm.fma.v1f32(<vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>)
1058
1059define <vscale x 1 x float> @vfmadd_vv_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x float> %vc) {
1060; CHECK-LABEL: vfmadd_vv_nxv1f32:
1061; CHECK:       # %bb.0:
1062; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
1063; CHECK-NEXT:    vfmadd.vv v8, v9, v10
1064; CHECK-NEXT:    ret
1065  %vd = call <vscale x 1 x float> @llvm.fma.v1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x float> %vc)
1066  ret <vscale x 1 x float> %vd
1067}
1068
1069define <vscale x 1 x float> @vfmadd_vf_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, float %c) {
1070; CHECK-LABEL: vfmadd_vf_nxv1f32:
1071; CHECK:       # %bb.0:
1072; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
1073; CHECK-NEXT:    vfmadd.vf v8, fa0, v9
1074; CHECK-NEXT:    ret
1075  %head = insertelement <vscale x 1 x float> poison, float %c, i32 0
1076  %splat = shufflevector <vscale x 1 x float> %head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
1077  %vd = call <vscale x 1 x float> @llvm.fma.v1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %splat, <vscale x 1 x float> %vb)
1078  ret <vscale x 1 x float> %vd
1079}
1080
1081declare <vscale x 2 x float> @llvm.fma.v2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>)
1082
1083define <vscale x 2 x float> @vfmadd_vv_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, <vscale x 2 x float> %vc) {
1084; CHECK-LABEL: vfmadd_vv_nxv2f32:
1085; CHECK:       # %bb.0:
1086; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
1087; CHECK-NEXT:    vfmadd.vv v8, v10, v9
1088; CHECK-NEXT:    ret
1089  %vd = call <vscale x 2 x float> @llvm.fma.v2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vc, <vscale x 2 x float> %vb)
1090  ret <vscale x 2 x float> %vd
1091}
1092
1093define <vscale x 2 x float> @vfmadd_vf_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, float %c) {
1094; CHECK-LABEL: vfmadd_vf_nxv2f32:
1095; CHECK:       # %bb.0:
1096; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
1097; CHECK-NEXT:    vfmacc.vf v8, fa0, v9
1098; CHECK-NEXT:    ret
1099  %head = insertelement <vscale x 2 x float> poison, float %c, i32 0
1100  %splat = shufflevector <vscale x 2 x float> %head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
1101  %vd = call <vscale x 2 x float> @llvm.fma.v2f32(<vscale x 2 x float> %vb, <vscale x 2 x float> %splat, <vscale x 2 x float> %va)
1102  ret <vscale x 2 x float> %vd
1103}
1104
1105declare <vscale x 4 x float> @llvm.fma.v4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1106
1107define <vscale x 4 x float> @vfmadd_vv_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, <vscale x 4 x float> %vc) {
1108; CHECK-LABEL: vfmadd_vv_nxv4f32:
1109; CHECK:       # %bb.0:
1110; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
1111; CHECK-NEXT:    vfmadd.vv v8, v10, v12
1112; CHECK-NEXT:    ret
1113  %vd = call <vscale x 4 x float> @llvm.fma.v4f32(<vscale x 4 x float> %vb, <vscale x 4 x float> %va, <vscale x 4 x float> %vc)
1114  ret <vscale x 4 x float> %vd
1115}
1116
1117define <vscale x 4 x float> @vfmadd_vf_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, float %c) {
1118; CHECK-LABEL: vfmadd_vf_nxv4f32:
1119; CHECK:       # %bb.0:
1120; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
1121; CHECK-NEXT:    vfmadd.vf v8, fa0, v10
1122; CHECK-NEXT:    ret
1123  %head = insertelement <vscale x 4 x float> poison, float %c, i32 0
1124  %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
1125  %vd = call <vscale x 4 x float> @llvm.fma.v4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %splat, <vscale x 4 x float> %vb)
1126  ret <vscale x 4 x float> %vd
1127}
1128
1129declare <vscale x 8 x float> @llvm.fma.v8f32(<vscale x 8 x float>, <vscale x 8 x float>, <vscale x 8 x float>)
1130
1131define <vscale x 8 x float> @vfmadd_vv_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, <vscale x 8 x float> %vc) {
1132; CHECK-LABEL: vfmadd_vv_nxv8f32:
1133; CHECK:       # %bb.0:
1134; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
1135; CHECK-NEXT:    vfmacc.vv v8, v16, v12
1136; CHECK-NEXT:    ret
1137  %vd = call <vscale x 8 x float> @llvm.fma.v8f32(<vscale x 8 x float> %vb, <vscale x 8 x float> %vc, <vscale x 8 x float> %va)
1138  ret <vscale x 8 x float> %vd
1139}
1140
1141define <vscale x 8 x float> @vfmadd_vf_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, float %c) {
1142; CHECK-LABEL: vfmadd_vf_nxv8f32:
1143; CHECK:       # %bb.0:
1144; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
1145; CHECK-NEXT:    vfmacc.vf v8, fa0, v12
1146; CHECK-NEXT:    ret
1147  %head = insertelement <vscale x 8 x float> poison, float %c, i32 0
1148  %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
1149  %vd = call <vscale x 8 x float> @llvm.fma.v8f32(<vscale x 8 x float> %vb, <vscale x 8 x float> %splat, <vscale x 8 x float> %va)
1150  ret <vscale x 8 x float> %vd
1151}
1152
1153declare <vscale x 16 x float> @llvm.fma.v16f32(<vscale x 16 x float>, <vscale x 16 x float>, <vscale x 16 x float>)
1154
1155define <vscale x 16 x float> @vfmadd_vv_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, <vscale x 16 x float> %vc) {
1156; CHECK-LABEL: vfmadd_vv_nxv16f32:
1157; CHECK:       # %bb.0:
1158; CHECK-NEXT:    vl8re32.v v24, (a0)
1159; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
1160; CHECK-NEXT:    vfmadd.vv v8, v24, v16
1161; CHECK-NEXT:    ret
1162  %vd = call <vscale x 16 x float> @llvm.fma.v16f32(<vscale x 16 x float> %vc, <vscale x 16 x float> %va, <vscale x 16 x float> %vb)
1163  ret <vscale x 16 x float> %vd
1164}
1165
1166define <vscale x 16 x float> @vfmadd_vf_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, float %c) {
1167; CHECK-LABEL: vfmadd_vf_nxv16f32:
1168; CHECK:       # %bb.0:
1169; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
1170; CHECK-NEXT:    vfmadd.vf v8, fa0, v16
1171; CHECK-NEXT:    ret
1172  %head = insertelement <vscale x 16 x float> poison, float %c, i32 0
1173  %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
1174  %vd = call <vscale x 16 x float> @llvm.fma.v16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %splat, <vscale x 16 x float> %vb)
1175  ret <vscale x 16 x float> %vd
1176}
1177
1178declare <vscale x 1 x double> @llvm.fma.v1f64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x double>)
1179
1180define <vscale x 1 x double> @vfmadd_vv_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x double> %vc) {
1181; CHECK-LABEL: vfmadd_vv_nxv1f64:
1182; CHECK:       # %bb.0:
1183; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
1184; CHECK-NEXT:    vfmadd.vv v8, v9, v10
1185; CHECK-NEXT:    ret
1186  %vd = call <vscale x 1 x double> @llvm.fma.v1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x double> %vc)
1187  ret <vscale x 1 x double> %vd
1188}
1189
1190define <vscale x 1 x double> @vfmadd_vf_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, double %c) {
1191; CHECK-LABEL: vfmadd_vf_nxv1f64:
1192; CHECK:       # %bb.0:
1193; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
1194; CHECK-NEXT:    vfmadd.vf v8, fa0, v9
1195; CHECK-NEXT:    ret
1196  %head = insertelement <vscale x 1 x double> poison, double %c, i32 0
1197  %splat = shufflevector <vscale x 1 x double> %head, <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer
1198  %vd = call <vscale x 1 x double> @llvm.fma.v1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %splat, <vscale x 1 x double> %vb)
1199  ret <vscale x 1 x double> %vd
1200}
1201
1202declare <vscale x 2 x double> @llvm.fma.v2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
1203
1204define <vscale x 2 x double> @vfmadd_vv_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, <vscale x 2 x double> %vc) {
1205; CHECK-LABEL: vfmadd_vv_nxv2f64:
1206; CHECK:       # %bb.0:
1207; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
1208; CHECK-NEXT:    vfmadd.vv v8, v12, v10
1209; CHECK-NEXT:    ret
1210  %vd = call <vscale x 2 x double> @llvm.fma.v2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vc, <vscale x 2 x double> %vb)
1211  ret <vscale x 2 x double> %vd
1212}
1213
1214define <vscale x 2 x double> @vfmadd_vf_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, double %c) {
1215; CHECK-LABEL: vfmadd_vf_nxv2f64:
1216; CHECK:       # %bb.0:
1217; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
1218; CHECK-NEXT:    vfmacc.vf v8, fa0, v10
1219; CHECK-NEXT:    ret
1220  %head = insertelement <vscale x 2 x double> poison, double %c, i32 0
1221  %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
1222  %vd = call <vscale x 2 x double> @llvm.fma.v2f64(<vscale x 2 x double> %vb, <vscale x 2 x double> %splat, <vscale x 2 x double> %va)
1223  ret <vscale x 2 x double> %vd
1224}
1225
1226declare <vscale x 4 x double> @llvm.fma.v4f64(<vscale x 4 x double>, <vscale x 4 x double>, <vscale x 4 x double>)
1227
1228define <vscale x 4 x double> @vfmadd_vv_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, <vscale x 4 x double> %vc) {
1229; CHECK-LABEL: vfmadd_vv_nxv4f64:
1230; CHECK:       # %bb.0:
1231; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
1232; CHECK-NEXT:    vfmadd.vv v8, v12, v16
1233; CHECK-NEXT:    ret
1234  %vd = call <vscale x 4 x double> @llvm.fma.v4f64(<vscale x 4 x double> %vb, <vscale x 4 x double> %va, <vscale x 4 x double> %vc)
1235  ret <vscale x 4 x double> %vd
1236}
1237
1238define <vscale x 4 x double> @vfmadd_vf_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, double %c) {
1239; CHECK-LABEL: vfmadd_vf_nxv4f64:
1240; CHECK:       # %bb.0:
1241; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
1242; CHECK-NEXT:    vfmadd.vf v8, fa0, v12
1243; CHECK-NEXT:    ret
1244  %head = insertelement <vscale x 4 x double> poison, double %c, i32 0
1245  %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
1246  %vd = call <vscale x 4 x double> @llvm.fma.v4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %splat, <vscale x 4 x double> %vb)
1247  ret <vscale x 4 x double> %vd
1248}
1249
1250declare <vscale x 8 x double> @llvm.fma.v8f64(<vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x double>)
1251
1252define <vscale x 8 x double> @vfmadd_vv_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x double> %vc) {
1253; CHECK-LABEL: vfmadd_vv_nxv8f64:
1254; CHECK:       # %bb.0:
1255; CHECK-NEXT:    vl8re64.v v24, (a0)
1256; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
1257; CHECK-NEXT:    vfmacc.vv v8, v16, v24
1258; CHECK-NEXT:    ret
1259  %vd = call <vscale x 8 x double> @llvm.fma.v8f64(<vscale x 8 x double> %vb, <vscale x 8 x double> %vc, <vscale x 8 x double> %va)
1260  ret <vscale x 8 x double> %vd
1261}
1262
1263define <vscale x 8 x double> @vfmadd_vf_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, double %c) {
1264; CHECK-LABEL: vfmadd_vf_nxv8f64:
1265; CHECK:       # %bb.0:
1266; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
1267; CHECK-NEXT:    vfmacc.vf v8, fa0, v16
1268; CHECK-NEXT:    ret
1269  %head = insertelement <vscale x 8 x double> poison, double %c, i32 0
1270  %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
1271  %vd = call <vscale x 8 x double> @llvm.fma.v8f64(<vscale x 8 x double> %vb, <vscale x 8 x double> %splat, <vscale x 8 x double> %va)
1272  ret <vscale x 8 x double> %vd
1273}
1274