xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll (revision b6c0f1bfa79a3a32d841ac5ab1f94c3aee3b5d90)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+v \
3; RUN:     -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
4; RUN:     --check-prefixes=CHECK,ZVFH
5; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+v \
6; RUN:     -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
7; RUN:     --check-prefixes=CHECK,ZVFH
8; RUN: llc -mtriple=riscv32 \
9; RUN:     -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
10; RUN:     -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
11; RUN:     --check-prefixes=CHECK,ZVFHMIN
12; RUN: llc -mtriple=riscv64 \
13; RUN:     -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
14; RUN:     -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
15; RUN:     --check-prefixes=CHECK,ZVFHMIN
16
17; This tests a mix of vfmacc and vfmadd by using different operand orders to
18; trigger commuting in TwoAddressInstructionPass.
19
20define <vscale x 1 x bfloat> @vfmadd_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %vc) strictfp {
21; CHECK-LABEL: vfmadd_vv_nxv1bf16:
22; CHECK:       # %bb.0:
23; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
24; CHECK-NEXT:    vfwcvtbf16.f.f.v v11, v10
25; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
26; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
27; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
28; CHECK-NEXT:    vfmadd.vv v9, v10, v11
29; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
30; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
31; CHECK-NEXT:    ret
32  %vd = call <vscale x 1 x bfloat> @llvm.experimental.constrained.fma.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %vc, metadata !"round.dynamic", metadata !"fpexcept.strict")
33  ret <vscale x 1 x bfloat> %vd
34}
35
36define <vscale x 1 x bfloat> @vfmadd_vf_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, bfloat %c) strictfp {
37; CHECK-LABEL: vfmadd_vf_nxv1bf16:
38; CHECK:       # %bb.0:
39; CHECK-NEXT:    fmv.x.h a0, fa0
40; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
41; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
42; CHECK-NEXT:    vmv.v.x v9, a0
43; CHECK-NEXT:    vfwcvtbf16.f.f.v v11, v8
44; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
45; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
46; CHECK-NEXT:    vfmadd.vv v12, v11, v10
47; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
48; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
49; CHECK-NEXT:    ret
50  %head = insertelement <vscale x 1 x bfloat> poison, bfloat %c, i32 0
51  %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
52  %vd = call <vscale x 1 x bfloat> @llvm.experimental.constrained.fma.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %splat, <vscale x 1 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict")
53  ret <vscale x 1 x bfloat> %vd
54}
55
56
57define <vscale x 2 x bfloat> @vfmadd_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x bfloat> %vc) strictfp {
58; CHECK-LABEL: vfmadd_vv_nxv2bf16:
59; CHECK:       # %bb.0:
60; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
61; CHECK-NEXT:    vfwcvtbf16.f.f.v v11, v9
62; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v10
63; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
64; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
65; CHECK-NEXT:    vfmadd.vv v10, v9, v11
66; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
67; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
68; CHECK-NEXT:    ret
69  %vd = call <vscale x 2 x bfloat> @llvm.experimental.constrained.fma.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vc, <vscale x 2 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict")
70  ret <vscale x 2 x bfloat> %vd
71}
72
73define <vscale x 2 x bfloat> @vfmadd_vf_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, bfloat %c) strictfp {
74; CHECK-LABEL: vfmadd_vf_nxv2bf16:
75; CHECK:       # %bb.0:
76; CHECK-NEXT:    fmv.x.h a0, fa0
77; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
78; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
79; CHECK-NEXT:    vmv.v.x v8, a0
80; CHECK-NEXT:    vfwcvtbf16.f.f.v v11, v9
81; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
82; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
83; CHECK-NEXT:    vfmadd.vv v9, v11, v10
84; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
85; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
86; CHECK-NEXT:    ret
87  %head = insertelement <vscale x 2 x bfloat> poison, bfloat %c, i32 0
88  %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
89  %vd = call <vscale x 2 x bfloat> @llvm.experimental.constrained.fma.nxv2bf16(<vscale x 2 x bfloat> %vb, <vscale x 2 x bfloat> %splat, <vscale x 2 x bfloat> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
90  ret <vscale x 2 x bfloat> %vd
91}
92
93
94define <vscale x 4 x bfloat> @vfmadd_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x bfloat> %vc) strictfp {
95; CHECK-LABEL: vfmadd_vv_nxv4bf16:
96; CHECK:       # %bb.0:
97; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
98; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
99; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
100; CHECK-NEXT:    vfwcvtbf16.f.f.v v14, v9
101; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
102; CHECK-NEXT:    vfmadd.vv v14, v10, v12
103; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
104; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v14
105; CHECK-NEXT:    ret
106  %vd = call <vscale x 4 x bfloat> @llvm.experimental.constrained.fma.nxv4bf16(<vscale x 4 x bfloat> %vb, <vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vc, metadata !"round.dynamic", metadata !"fpexcept.strict")
107  ret <vscale x 4 x bfloat> %vd
108}
109
110define <vscale x 4 x bfloat> @vfmadd_vf_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, bfloat %c) strictfp {
111; CHECK-LABEL: vfmadd_vf_nxv4bf16:
112; CHECK:       # %bb.0:
113; CHECK-NEXT:    fmv.x.h a0, fa0
114; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
115; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
116; CHECK-NEXT:    vmv.v.x v9, a0
117; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
118; CHECK-NEXT:    vfwcvtbf16.f.f.v v14, v9
119; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
120; CHECK-NEXT:    vfmadd.vv v14, v12, v10
121; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
122; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v14
123; CHECK-NEXT:    ret
124  %head = insertelement <vscale x 4 x bfloat> poison, bfloat %c, i32 0
125  %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
126  %vd = call <vscale x 4 x bfloat> @llvm.experimental.constrained.fma.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %splat, <vscale x 4 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict")
127  ret <vscale x 4 x bfloat> %vd
128}
129
130
131define <vscale x 8 x bfloat> @vfmadd_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %vc) strictfp {
132; CHECK-LABEL: vfmadd_vv_nxv8bf16:
133; CHECK:       # %bb.0:
134; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
135; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
136; CHECK-NEXT:    vfwcvtbf16.f.f.v v20, v12
137; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
138; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
139; CHECK-NEXT:    vfmadd.vv v12, v20, v16
140; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
141; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
142; CHECK-NEXT:    ret
143  %vd = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fma.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %vc, <vscale x 8 x bfloat> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
144  ret <vscale x 8 x bfloat> %vd
145}
146
147define <vscale x 8 x bfloat> @vfmadd_vf_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, bfloat %c) strictfp {
148; CHECK-LABEL: vfmadd_vf_nxv8bf16:
149; CHECK:       # %bb.0:
150; CHECK-NEXT:    fmv.x.h a0, fa0
151; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
152; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
153; CHECK-NEXT:    vmv.v.x v8, a0
154; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
155; CHECK-NEXT:    vfwcvtbf16.f.f.v v20, v8
156; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
157; CHECK-NEXT:    vfmadd.vv v20, v16, v12
158; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
159; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v20
160; CHECK-NEXT:    ret
161  %head = insertelement <vscale x 8 x bfloat> poison, bfloat %c, i32 0
162  %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
163  %vd = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fma.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %splat, <vscale x 8 x bfloat> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
164  ret <vscale x 8 x bfloat> %vd
165}
166
167
168define <vscale x 16 x bfloat> @vfmadd_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x bfloat> %vc) strictfp {
169; CHECK-LABEL: vfmadd_vv_nxv16bf16:
170; CHECK:       # %bb.0:
171; CHECK-NEXT:    addi sp, sp, -16
172; CHECK-NEXT:    .cfi_def_cfa_offset 16
173; CHECK-NEXT:    csrr a0, vlenb
174; CHECK-NEXT:    slli a0, a0, 2
175; CHECK-NEXT:    sub sp, sp, a0
176; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
177; CHECK-NEXT:    addi a0, sp, 16
178; CHECK-NEXT:    vs4r.v v16, (a0) # Unknown-size Folded Spill
179; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
180; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
181; CHECK-NEXT:    vfwcvtbf16.f.f.v v0, v8
182; CHECK-NEXT:    addi a0, sp, 16
183; CHECK-NEXT:    vl4r.v v8, (a0) # Unknown-size Folded Reload
184; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
185; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
186; CHECK-NEXT:    vfmadd.vv v24, v0, v16
187; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
188; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v24
189; CHECK-NEXT:    csrr a0, vlenb
190; CHECK-NEXT:    slli a0, a0, 2
191; CHECK-NEXT:    add sp, sp, a0
192; CHECK-NEXT:    .cfi_def_cfa sp, 16
193; CHECK-NEXT:    addi sp, sp, 16
194; CHECK-NEXT:    .cfi_def_cfa_offset 0
195; CHECK-NEXT:    ret
196  %vd = call <vscale x 16 x bfloat> @llvm.experimental.constrained.fma.nxv16bf16(<vscale x 16 x bfloat> %vc, <vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict")
197  ret <vscale x 16 x bfloat> %vd
198}
199
200define <vscale x 16 x bfloat> @vfmadd_vf_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, bfloat %c) strictfp {
201; CHECK-LABEL: vfmadd_vf_nxv16bf16:
202; CHECK:       # %bb.0:
203; CHECK-NEXT:    fmv.x.h a0, fa0
204; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
205; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
206; CHECK-NEXT:    vmv.v.x v12, a0
207; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
208; CHECK-NEXT:    vfwcvtbf16.f.f.v v0, v12
209; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
210; CHECK-NEXT:    vfmadd.vv v0, v24, v16
211; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
212; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
213; CHECK-NEXT:    ret
214  %head = insertelement <vscale x 16 x bfloat> poison, bfloat %c, i32 0
215  %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
216  %vd = call <vscale x 16 x bfloat> @llvm.experimental.constrained.fma.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %splat, <vscale x 16 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict")
217  ret <vscale x 16 x bfloat> %vd
218}
219
220
221define <vscale x 32 x bfloat> @vfmadd_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x bfloat> %vc) strictfp {
222; CHECK-LABEL: vfmadd_vv_nxv32bf16:
223; CHECK:       # %bb.0:
224; CHECK-NEXT:    addi sp, sp, -16
225; CHECK-NEXT:    .cfi_def_cfa_offset 16
226; CHECK-NEXT:    csrr a1, vlenb
227; CHECK-NEXT:    slli a1, a1, 5
228; CHECK-NEXT:    sub sp, sp, a1
229; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
230; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
231; CHECK-NEXT:    vmv8r.v v0, v16
232; CHECK-NEXT:    addi a1, sp, 16
233; CHECK-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
234; CHECK-NEXT:    vmv8r.v v16, v8
235; CHECK-NEXT:    vl8re16.v v8, (a0)
236; CHECK-NEXT:    csrr a0, vlenb
237; CHECK-NEXT:    slli a0, a0, 4
238; CHECK-NEXT:    add a0, sp, a0
239; CHECK-NEXT:    addi a0, a0, 16
240; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
241; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v16
242; CHECK-NEXT:    csrr a0, vlenb
243; CHECK-NEXT:    li a1, 24
244; CHECK-NEXT:    mul a0, a0, a1
245; CHECK-NEXT:    add a0, sp, a0
246; CHECK-NEXT:    addi a0, a0, 16
247; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
248; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v0
249; CHECK-NEXT:    csrr a0, vlenb
250; CHECK-NEXT:    slli a0, a0, 3
251; CHECK-NEXT:    add a0, sp, a0
252; CHECK-NEXT:    addi a0, a0, 16
253; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
254; CHECK-NEXT:    vfwcvtbf16.f.f.v v0, v8
255; CHECK-NEXT:    csrr a0, vlenb
256; CHECK-NEXT:    li a1, 24
257; CHECK-NEXT:    mul a0, a0, a1
258; CHECK-NEXT:    add a0, sp, a0
259; CHECK-NEXT:    addi a0, a0, 16
260; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
261; CHECK-NEXT:    csrr a0, vlenb
262; CHECK-NEXT:    slli a0, a0, 3
263; CHECK-NEXT:    add a0, sp, a0
264; CHECK-NEXT:    addi a0, a0, 16
265; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
266; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
267; CHECK-NEXT:    vfmadd.vv v0, v8, v24
268; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
269; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v20
270; CHECK-NEXT:    csrr a0, vlenb
271; CHECK-NEXT:    li a1, 24
272; CHECK-NEXT:    mul a0, a0, a1
273; CHECK-NEXT:    add a0, sp, a0
274; CHECK-NEXT:    addi a0, a0, 16
275; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
276; CHECK-NEXT:    addi a0, sp, 16
277; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
278; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v20
279; CHECK-NEXT:    csrr a0, vlenb
280; CHECK-NEXT:    slli a0, a0, 4
281; CHECK-NEXT:    add a0, sp, a0
282; CHECK-NEXT:    addi a0, a0, 16
283; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
284; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v28
285; CHECK-NEXT:    csrr a0, vlenb
286; CHECK-NEXT:    li a1, 24
287; CHECK-NEXT:    mul a0, a0, a1
288; CHECK-NEXT:    add a0, sp, a0
289; CHECK-NEXT:    addi a0, a0, 16
290; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
291; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
292; CHECK-NEXT:    vfmadd.vv v16, v8, v24
293; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
294; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
295; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16
296; CHECK-NEXT:    csrr a0, vlenb
297; CHECK-NEXT:    slli a0, a0, 5
298; CHECK-NEXT:    add sp, sp, a0
299; CHECK-NEXT:    .cfi_def_cfa sp, 16
300; CHECK-NEXT:    addi sp, sp, 16
301; CHECK-NEXT:    .cfi_def_cfa_offset 0
302; CHECK-NEXT:    ret
303  %vd = call <vscale x 32 x bfloat> @llvm.experimental.constrained.fma.nxv32bf16(<vscale x 32 x bfloat> %vc, <vscale x 32 x bfloat> %vb, <vscale x 32 x bfloat> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
304  ret <vscale x 32 x bfloat> %vd
305}
306
307define <vscale x 32 x bfloat> @vfmadd_vf_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, bfloat %c) strictfp {
308; CHECK-LABEL: vfmadd_vf_nxv32bf16:
309; CHECK:       # %bb.0:
310; CHECK-NEXT:    addi sp, sp, -16
311; CHECK-NEXT:    .cfi_def_cfa_offset 16
312; CHECK-NEXT:    csrr a0, vlenb
313; CHECK-NEXT:    li a1, 24
314; CHECK-NEXT:    mul a0, a0, a1
315; CHECK-NEXT:    sub sp, sp, a0
316; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
317; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
318; CHECK-NEXT:    vmv8r.v v24, v16
319; CHECK-NEXT:    csrr a0, vlenb
320; CHECK-NEXT:    slli a0, a0, 3
321; CHECK-NEXT:    add a0, sp, a0
322; CHECK-NEXT:    addi a0, a0, 16
323; CHECK-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
324; CHECK-NEXT:    fmv.x.h a0, fa0
325; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
326; CHECK-NEXT:    addi a1, sp, 16
327; CHECK-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
328; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v24
329; CHECK-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
330; CHECK-NEXT:    vmv.v.x v24, a0
331; CHECK-NEXT:    csrr a0, vlenb
332; CHECK-NEXT:    slli a0, a0, 4
333; CHECK-NEXT:    add a0, sp, a0
334; CHECK-NEXT:    addi a0, a0, 16
335; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
336; CHECK-NEXT:    csrr a0, vlenb
337; CHECK-NEXT:    slli a0, a0, 4
338; CHECK-NEXT:    add a0, sp, a0
339; CHECK-NEXT:    addi a0, a0, 16
340; CHECK-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
341; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
342; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v0
343; CHECK-NEXT:    addi a0, sp, 16
344; CHECK-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
345; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
346; CHECK-NEXT:    vfmadd.vv v24, v16, v0
347; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
348; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
349; CHECK-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
350; CHECK-NEXT:    csrr a0, vlenb
351; CHECK-NEXT:    slli a0, a0, 3
352; CHECK-NEXT:    add a0, sp, a0
353; CHECK-NEXT:    addi a0, a0, 16
354; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
355; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
356; CHECK-NEXT:    csrr a0, vlenb
357; CHECK-NEXT:    slli a0, a0, 4
358; CHECK-NEXT:    add a0, sp, a0
359; CHECK-NEXT:    addi a0, a0, 16
360; CHECK-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
361; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v4
362; CHECK-NEXT:    addi a0, sp, 16
363; CHECK-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
364; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
365; CHECK-NEXT:    vfmadd.vv v16, v8, v0
366; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
367; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v24
368; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16
369; CHECK-NEXT:    csrr a0, vlenb
370; CHECK-NEXT:    li a1, 24
371; CHECK-NEXT:    mul a0, a0, a1
372; CHECK-NEXT:    add sp, sp, a0
373; CHECK-NEXT:    .cfi_def_cfa sp, 16
374; CHECK-NEXT:    addi sp, sp, 16
375; CHECK-NEXT:    .cfi_def_cfa_offset 0
376; CHECK-NEXT:    ret
377  %head = insertelement <vscale x 32 x bfloat> poison, bfloat %c, i32 0
378  %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
379  %vd = call <vscale x 32 x bfloat> @llvm.experimental.constrained.fma.nxv32bf16(<vscale x 32 x bfloat> %vb, <vscale x 32 x bfloat> %splat, <vscale x 32 x bfloat> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
380  ret <vscale x 32 x bfloat> %vd
381}
382
383declare <vscale x 1 x half> @llvm.experimental.constrained.fma.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x half>, metadata, metadata)
384
385define <vscale x 1 x half> @vfmadd_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x half> %vc) strictfp {
386; ZVFH-LABEL: vfmadd_vv_nxv1f16:
387; ZVFH:       # %bb.0:
388; ZVFH-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
389; ZVFH-NEXT:    vfmadd.vv v8, v9, v10
390; ZVFH-NEXT:    ret
391;
392; ZVFHMIN-LABEL: vfmadd_vv_nxv1f16:
393; ZVFHMIN:       # %bb.0:
394; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
395; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
396; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
397; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
398; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
399; ZVFHMIN-NEXT:    vfmadd.vv v9, v10, v11
400; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
401; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
402; ZVFHMIN-NEXT:    ret
403  %vd = call <vscale x 1 x half> @llvm.experimental.constrained.fma.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x half> %vc, metadata !"round.dynamic", metadata !"fpexcept.strict")
404  ret <vscale x 1 x half> %vd
405}
406
407define <vscale x 1 x half> @vfmadd_vf_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, half %c) strictfp {
408; ZVFH-LABEL: vfmadd_vf_nxv1f16:
409; ZVFH:       # %bb.0:
410; ZVFH-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
411; ZVFH-NEXT:    vfmadd.vf v8, fa0, v9
412; ZVFH-NEXT:    ret
413;
414; ZVFHMIN-LABEL: vfmadd_vf_nxv1f16:
415; ZVFHMIN:       # %bb.0:
416; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
417; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
418; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
419; ZVFHMIN-NEXT:    vmv.v.x v9, a0
420; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
421; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
422; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
423; ZVFHMIN-NEXT:    vfmadd.vv v12, v11, v10
424; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
425; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
426; ZVFHMIN-NEXT:    ret
427  %head = insertelement <vscale x 1 x half> poison, half %c, i32 0
428  %splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
429  %vd = call <vscale x 1 x half> @llvm.experimental.constrained.fma.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %splat, <vscale x 1 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict")
430  ret <vscale x 1 x half> %vd
431}
432
433declare <vscale x 2 x half> @llvm.experimental.constrained.fma.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, <vscale x 2 x half>, metadata, metadata)
434
435define <vscale x 2 x half> @vfmadd_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x half> %vc) strictfp {
436; ZVFH-LABEL: vfmadd_vv_nxv2f16:
437; ZVFH:       # %bb.0:
438; ZVFH-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
439; ZVFH-NEXT:    vfmadd.vv v8, v10, v9
440; ZVFH-NEXT:    ret
441;
442; ZVFHMIN-LABEL: vfmadd_vv_nxv2f16:
443; ZVFHMIN:       # %bb.0:
444; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
445; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
446; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v10
447; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
448; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
449; ZVFHMIN-NEXT:    vfmadd.vv v10, v9, v11
450; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
451; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
452; ZVFHMIN-NEXT:    ret
453  %vd = call <vscale x 2 x half> @llvm.experimental.constrained.fma.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vc, <vscale x 2 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict")
454  ret <vscale x 2 x half> %vd
455}
456
457define <vscale x 2 x half> @vfmadd_vf_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, half %c) strictfp {
458; ZVFH-LABEL: vfmadd_vf_nxv2f16:
459; ZVFH:       # %bb.0:
460; ZVFH-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
461; ZVFH-NEXT:    vfmacc.vf v8, fa0, v9
462; ZVFH-NEXT:    ret
463;
464; ZVFHMIN-LABEL: vfmadd_vf_nxv2f16:
465; ZVFHMIN:       # %bb.0:
466; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
467; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
468; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
469; ZVFHMIN-NEXT:    vmv.v.x v8, a0
470; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
471; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
472; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
473; ZVFHMIN-NEXT:    vfmadd.vv v9, v11, v10
474; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
475; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
476; ZVFHMIN-NEXT:    ret
477  %head = insertelement <vscale x 2 x half> poison, half %c, i32 0
478  %splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
479  %vd = call <vscale x 2 x half> @llvm.experimental.constrained.fma.nxv2f16(<vscale x 2 x half> %vb, <vscale x 2 x half> %splat, <vscale x 2 x half> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
480  ret <vscale x 2 x half> %vd
481}
482
483declare <vscale x 4 x half> @llvm.experimental.constrained.fma.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, <vscale x 4 x half>, metadata, metadata)
484
485define <vscale x 4 x half> @vfmadd_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x half> %vc) strictfp {
486; ZVFH-LABEL: vfmadd_vv_nxv4f16:
487; ZVFH:       # %bb.0:
488; ZVFH-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
489; ZVFH-NEXT:    vfmadd.vv v8, v9, v10
490; ZVFH-NEXT:    ret
491;
492; ZVFHMIN-LABEL: vfmadd_vv_nxv4f16:
493; ZVFHMIN:       # %bb.0:
494; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
495; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
496; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
497; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
498; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
499; ZVFHMIN-NEXT:    vfmadd.vv v14, v10, v12
500; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
501; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14
502; ZVFHMIN-NEXT:    ret
503  %vd = call <vscale x 4 x half> @llvm.experimental.constrained.fma.nxv4f16(<vscale x 4 x half> %vb, <vscale x 4 x half> %va, <vscale x 4 x half> %vc, metadata !"round.dynamic", metadata !"fpexcept.strict")
504  ret <vscale x 4 x half> %vd
505}
506
507define <vscale x 4 x half> @vfmadd_vf_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, half %c) strictfp {
508; ZVFH-LABEL: vfmadd_vf_nxv4f16:
509; ZVFH:       # %bb.0:
510; ZVFH-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
511; ZVFH-NEXT:    vfmadd.vf v8, fa0, v9
512; ZVFH-NEXT:    ret
513;
514; ZVFHMIN-LABEL: vfmadd_vf_nxv4f16:
515; ZVFHMIN:       # %bb.0:
516; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
517; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
518; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
519; ZVFHMIN-NEXT:    vmv.v.x v9, a0
520; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
521; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
522; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
523; ZVFHMIN-NEXT:    vfmadd.vv v14, v12, v10
524; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
525; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14
526; ZVFHMIN-NEXT:    ret
527  %head = insertelement <vscale x 4 x half> poison, half %c, i32 0
528  %splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
529  %vd = call <vscale x 4 x half> @llvm.experimental.constrained.fma.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %splat, <vscale x 4 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict")
530  ret <vscale x 4 x half> %vd
531}
532
533declare <vscale x 8 x half> @llvm.experimental.constrained.fma.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, metadata, metadata)
534
535define <vscale x 8 x half> @vfmadd_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x half> %vc) strictfp {
536; ZVFH-LABEL: vfmadd_vv_nxv8f16:
537; ZVFH:       # %bb.0:
538; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
539; ZVFH-NEXT:    vfmacc.vv v8, v12, v10
540; ZVFH-NEXT:    ret
541;
542; ZVFHMIN-LABEL: vfmadd_vv_nxv8f16:
543; ZVFHMIN:       # %bb.0:
544; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
545; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
546; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v12
547; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
548; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
549; ZVFHMIN-NEXT:    vfmadd.vv v12, v20, v16
550; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
551; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
552; ZVFHMIN-NEXT:    ret
553  %vd = call <vscale x 8 x half> @llvm.experimental.constrained.fma.nxv8f16(<vscale x 8 x half> %vb, <vscale x 8 x half> %vc, <vscale x 8 x half> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
554  ret <vscale x 8 x half> %vd
555}
556
557define <vscale x 8 x half> @vfmadd_vf_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, half %c) strictfp {
558; ZVFH-LABEL: vfmadd_vf_nxv8f16:
559; ZVFH:       # %bb.0:
560; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
561; ZVFH-NEXT:    vfmacc.vf v8, fa0, v10
562; ZVFH-NEXT:    ret
563;
564; ZVFHMIN-LABEL: vfmadd_vf_nxv8f16:
565; ZVFHMIN:       # %bb.0:
566; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
567; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
568; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
569; ZVFHMIN-NEXT:    vmv.v.x v8, a0
570; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
571; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8
572; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
573; ZVFHMIN-NEXT:    vfmadd.vv v20, v16, v12
574; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
575; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v20
576; ZVFHMIN-NEXT:    ret
577  %head = insertelement <vscale x 8 x half> poison, half %c, i32 0
578  %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
579  %vd = call <vscale x 8 x half> @llvm.experimental.constrained.fma.nxv8f16(<vscale x 8 x half> %vb, <vscale x 8 x half> %splat, <vscale x 8 x half> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
580  ret <vscale x 8 x half> %vd
581}
582
583declare <vscale x 16 x half> @llvm.experimental.constrained.fma.nxv16f16(<vscale x 16 x half>, <vscale x 16 x half>, <vscale x 16 x half>, metadata, metadata)
584
585define <vscale x 16 x half> @vfmadd_vv_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x half> %vc) strictfp {
586; ZVFH-LABEL: vfmadd_vv_nxv16f16:
587; ZVFH:       # %bb.0:
588; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
589; ZVFH-NEXT:    vfmadd.vv v8, v16, v12
590; ZVFH-NEXT:    ret
591;
592; ZVFHMIN-LABEL: vfmadd_vv_nxv16f16:
593; ZVFHMIN:       # %bb.0:
594; ZVFHMIN-NEXT:    addi sp, sp, -16
595; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
596; ZVFHMIN-NEXT:    csrr a0, vlenb
597; ZVFHMIN-NEXT:    slli a0, a0, 2
598; ZVFHMIN-NEXT:    sub sp, sp, a0
599; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
600; ZVFHMIN-NEXT:    addi a0, sp, 16
601; ZVFHMIN-NEXT:    vs4r.v v16, (a0) # Unknown-size Folded Spill
602; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
603; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
604; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
605; ZVFHMIN-NEXT:    addi a0, sp, 16
606; ZVFHMIN-NEXT:    vl4r.v v8, (a0) # Unknown-size Folded Reload
607; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
608; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
609; ZVFHMIN-NEXT:    vfmadd.vv v24, v0, v16
610; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
611; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
612; ZVFHMIN-NEXT:    csrr a0, vlenb
613; ZVFHMIN-NEXT:    slli a0, a0, 2
614; ZVFHMIN-NEXT:    add sp, sp, a0
615; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
616; ZVFHMIN-NEXT:    addi sp, sp, 16
617; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
618; ZVFHMIN-NEXT:    ret
619  %vd = call <vscale x 16 x half> @llvm.experimental.constrained.fma.nxv16f16(<vscale x 16 x half> %vc, <vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict")
620  ret <vscale x 16 x half> %vd
621}
622
623define <vscale x 16 x half> @vfmadd_vf_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, half %c) strictfp {
624; ZVFH-LABEL: vfmadd_vf_nxv16f16:
625; ZVFH:       # %bb.0:
626; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
627; ZVFH-NEXT:    vfmadd.vf v8, fa0, v12
628; ZVFH-NEXT:    ret
629;
630; ZVFHMIN-LABEL: vfmadd_vf_nxv16f16:
631; ZVFHMIN:       # %bb.0:
632; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
633; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
634; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
635; ZVFHMIN-NEXT:    vmv.v.x v12, a0
636; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
637; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v12
638; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
639; ZVFHMIN-NEXT:    vfmadd.vv v0, v24, v16
640; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
641; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
642; ZVFHMIN-NEXT:    ret
643  %head = insertelement <vscale x 16 x half> poison, half %c, i32 0
644  %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
645  %vd = call <vscale x 16 x half> @llvm.experimental.constrained.fma.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %splat, <vscale x 16 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict")
646  ret <vscale x 16 x half> %vd
647}
648
649declare <vscale x 32 x half> @llvm.experimental.constrained.fma.nxv32f16(<vscale x 32 x half>, <vscale x 32 x half>, <vscale x 32 x half>, metadata, metadata)
650
651define <vscale x 32 x half> @vfmadd_vv_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x half> %vc) strictfp {
652; ZVFH-LABEL: vfmadd_vv_nxv32f16:
653; ZVFH:       # %bb.0:
654; ZVFH-NEXT:    vl8re16.v v24, (a0)
655; ZVFH-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
656; ZVFH-NEXT:    vfmacc.vv v8, v16, v24
657; ZVFH-NEXT:    ret
658;
659; ZVFHMIN-LABEL: vfmadd_vv_nxv32f16:
660; ZVFHMIN:       # %bb.0:
661; ZVFHMIN-NEXT:    addi sp, sp, -16
662; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
663; ZVFHMIN-NEXT:    csrr a1, vlenb
664; ZVFHMIN-NEXT:    slli a1, a1, 5
665; ZVFHMIN-NEXT:    sub sp, sp, a1
666; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
667; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
668; ZVFHMIN-NEXT:    vmv8r.v v0, v16
669; ZVFHMIN-NEXT:    addi a1, sp, 16
670; ZVFHMIN-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
671; ZVFHMIN-NEXT:    vmv8r.v v16, v8
672; ZVFHMIN-NEXT:    vl8re16.v v8, (a0)
673; ZVFHMIN-NEXT:    csrr a0, vlenb
674; ZVFHMIN-NEXT:    slli a0, a0, 4
675; ZVFHMIN-NEXT:    add a0, sp, a0
676; ZVFHMIN-NEXT:    addi a0, a0, 16
677; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
678; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16
679; ZVFHMIN-NEXT:    csrr a0, vlenb
680; ZVFHMIN-NEXT:    li a1, 24
681; ZVFHMIN-NEXT:    mul a0, a0, a1
682; ZVFHMIN-NEXT:    add a0, sp, a0
683; ZVFHMIN-NEXT:    addi a0, a0, 16
684; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
685; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v0
686; ZVFHMIN-NEXT:    csrr a0, vlenb
687; ZVFHMIN-NEXT:    slli a0, a0, 3
688; ZVFHMIN-NEXT:    add a0, sp, a0
689; ZVFHMIN-NEXT:    addi a0, a0, 16
690; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
691; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
692; ZVFHMIN-NEXT:    csrr a0, vlenb
693; ZVFHMIN-NEXT:    li a1, 24
694; ZVFHMIN-NEXT:    mul a0, a0, a1
695; ZVFHMIN-NEXT:    add a0, sp, a0
696; ZVFHMIN-NEXT:    addi a0, a0, 16
697; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
698; ZVFHMIN-NEXT:    csrr a0, vlenb
699; ZVFHMIN-NEXT:    slli a0, a0, 3
700; ZVFHMIN-NEXT:    add a0, sp, a0
701; ZVFHMIN-NEXT:    addi a0, a0, 16
702; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
703; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
704; ZVFHMIN-NEXT:    vfmadd.vv v0, v8, v24
705; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
706; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20
707; ZVFHMIN-NEXT:    csrr a0, vlenb
708; ZVFHMIN-NEXT:    li a1, 24
709; ZVFHMIN-NEXT:    mul a0, a0, a1
710; ZVFHMIN-NEXT:    add a0, sp, a0
711; ZVFHMIN-NEXT:    addi a0, a0, 16
712; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
713; ZVFHMIN-NEXT:    addi a0, sp, 16
714; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
715; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20
716; ZVFHMIN-NEXT:    csrr a0, vlenb
717; ZVFHMIN-NEXT:    slli a0, a0, 4
718; ZVFHMIN-NEXT:    add a0, sp, a0
719; ZVFHMIN-NEXT:    addi a0, a0, 16
720; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
721; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v28
722; ZVFHMIN-NEXT:    csrr a0, vlenb
723; ZVFHMIN-NEXT:    li a1, 24
724; ZVFHMIN-NEXT:    mul a0, a0, a1
725; ZVFHMIN-NEXT:    add a0, sp, a0
726; ZVFHMIN-NEXT:    addi a0, a0, 16
727; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
728; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
729; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v24
730; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
731; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
732; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
733; ZVFHMIN-NEXT:    csrr a0, vlenb
734; ZVFHMIN-NEXT:    slli a0, a0, 5
735; ZVFHMIN-NEXT:    add sp, sp, a0
736; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
737; ZVFHMIN-NEXT:    addi sp, sp, 16
738; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
739; ZVFHMIN-NEXT:    ret
740  %vd = call <vscale x 32 x half> @llvm.experimental.constrained.fma.nxv32f16(<vscale x 32 x half> %vc, <vscale x 32 x half> %vb, <vscale x 32 x half> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
741  ret <vscale x 32 x half> %vd
742}
743
744define <vscale x 32 x half> @vfmadd_vf_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, half %c) strictfp {
745; ZVFH-LABEL: vfmadd_vf_nxv32f16:
746; ZVFH:       # %bb.0:
747; ZVFH-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
748; ZVFH-NEXT:    vfmacc.vf v8, fa0, v16
749; ZVFH-NEXT:    ret
750;
751; ZVFHMIN-LABEL: vfmadd_vf_nxv32f16:
752; ZVFHMIN:       # %bb.0:
753; ZVFHMIN-NEXT:    addi sp, sp, -16
754; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
755; ZVFHMIN-NEXT:    csrr a0, vlenb
756; ZVFHMIN-NEXT:    li a1, 24
757; ZVFHMIN-NEXT:    mul a0, a0, a1
758; ZVFHMIN-NEXT:    sub sp, sp, a0
759; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
760; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
761; ZVFHMIN-NEXT:    vmv8r.v v24, v16
762; ZVFHMIN-NEXT:    csrr a0, vlenb
763; ZVFHMIN-NEXT:    slli a0, a0, 3
764; ZVFHMIN-NEXT:    add a0, sp, a0
765; ZVFHMIN-NEXT:    addi a0, a0, 16
766; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
767; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
768; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
769; ZVFHMIN-NEXT:    addi a1, sp, 16
770; ZVFHMIN-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
771; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
772; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
773; ZVFHMIN-NEXT:    vmv.v.x v24, a0
774; ZVFHMIN-NEXT:    csrr a0, vlenb
775; ZVFHMIN-NEXT:    slli a0, a0, 4
776; ZVFHMIN-NEXT:    add a0, sp, a0
777; ZVFHMIN-NEXT:    addi a0, a0, 16
778; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
779; ZVFHMIN-NEXT:    csrr a0, vlenb
780; ZVFHMIN-NEXT:    slli a0, a0, 4
781; ZVFHMIN-NEXT:    add a0, sp, a0
782; ZVFHMIN-NEXT:    addi a0, a0, 16
783; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
784; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
785; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v0
786; ZVFHMIN-NEXT:    addi a0, sp, 16
787; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
788; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
789; ZVFHMIN-NEXT:    vfmadd.vv v24, v16, v0
790; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
791; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
792; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
793; ZVFHMIN-NEXT:    csrr a0, vlenb
794; ZVFHMIN-NEXT:    slli a0, a0, 3
795; ZVFHMIN-NEXT:    add a0, sp, a0
796; ZVFHMIN-NEXT:    addi a0, a0, 16
797; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
798; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
799; ZVFHMIN-NEXT:    csrr a0, vlenb
800; ZVFHMIN-NEXT:    slli a0, a0, 4
801; ZVFHMIN-NEXT:    add a0, sp, a0
802; ZVFHMIN-NEXT:    addi a0, a0, 16
803; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
804; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4
805; ZVFHMIN-NEXT:    addi a0, sp, 16
806; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
807; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
808; ZVFHMIN-NEXT:    vfmadd.vv v16, v8, v0
809; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
810; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
811; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
812; ZVFHMIN-NEXT:    csrr a0, vlenb
813; ZVFHMIN-NEXT:    li a1, 24
814; ZVFHMIN-NEXT:    mul a0, a0, a1
815; ZVFHMIN-NEXT:    add sp, sp, a0
816; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
817; ZVFHMIN-NEXT:    addi sp, sp, 16
818; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
819; ZVFHMIN-NEXT:    ret
820  %head = insertelement <vscale x 32 x half> poison, half %c, i32 0
821  %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
822  %vd = call <vscale x 32 x half> @llvm.experimental.constrained.fma.nxv32f16(<vscale x 32 x half> %vb, <vscale x 32 x half> %splat, <vscale x 32 x half> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
823  ret <vscale x 32 x half> %vd
824}
825
826declare <vscale x 1 x float> @llvm.experimental.constrained.fma.nxv1f32(<vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, metadata, metadata)
827
828define <vscale x 1 x float> @vfmadd_vv_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x float> %vc) strictfp {
829; CHECK-LABEL: vfmadd_vv_nxv1f32:
830; CHECK:       # %bb.0:
831; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
832; CHECK-NEXT:    vfmadd.vv v8, v9, v10
833; CHECK-NEXT:    ret
834  %vd = call <vscale x 1 x float> @llvm.experimental.constrained.fma.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x float> %vc, metadata !"round.dynamic", metadata !"fpexcept.strict")
835  ret <vscale x 1 x float> %vd
836}
837
838define <vscale x 1 x float> @vfmadd_vf_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, float %c) strictfp {
839; CHECK-LABEL: vfmadd_vf_nxv1f32:
840; CHECK:       # %bb.0:
841; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
842; CHECK-NEXT:    vfmadd.vf v8, fa0, v9
843; CHECK-NEXT:    ret
844  %head = insertelement <vscale x 1 x float> poison, float %c, i32 0
845  %splat = shufflevector <vscale x 1 x float> %head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
846  %vd = call <vscale x 1 x float> @llvm.experimental.constrained.fma.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %splat, <vscale x 1 x float> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict")
847  ret <vscale x 1 x float> %vd
848}
849
850declare <vscale x 2 x float> @llvm.experimental.constrained.fma.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>, metadata, metadata)
851
852define <vscale x 2 x float> @vfmadd_vv_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, <vscale x 2 x float> %vc) strictfp {
853; CHECK-LABEL: vfmadd_vv_nxv2f32:
854; CHECK:       # %bb.0:
855; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
856; CHECK-NEXT:    vfmadd.vv v8, v10, v9
857; CHECK-NEXT:    ret
858  %vd = call <vscale x 2 x float> @llvm.experimental.constrained.fma.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vc, <vscale x 2 x float> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict")
859  ret <vscale x 2 x float> %vd
860}
861
862define <vscale x 2 x float> @vfmadd_vf_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, float %c) strictfp {
863; CHECK-LABEL: vfmadd_vf_nxv2f32:
864; CHECK:       # %bb.0:
865; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
866; CHECK-NEXT:    vfmacc.vf v8, fa0, v9
867; CHECK-NEXT:    ret
868  %head = insertelement <vscale x 2 x float> poison, float %c, i32 0
869  %splat = shufflevector <vscale x 2 x float> %head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
870  %vd = call <vscale x 2 x float> @llvm.experimental.constrained.fma.nxv2f32(<vscale x 2 x float> %vb, <vscale x 2 x float> %splat, <vscale x 2 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
871  ret <vscale x 2 x float> %vd
872}
873
874declare <vscale x 4 x float> @llvm.experimental.constrained.fma.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, metadata, metadata)
875
876define <vscale x 4 x float> @vfmadd_vv_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, <vscale x 4 x float> %vc) strictfp {
877; CHECK-LABEL: vfmadd_vv_nxv4f32:
878; CHECK:       # %bb.0:
879; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
880; CHECK-NEXT:    vfmadd.vv v8, v10, v12
881; CHECK-NEXT:    ret
882  %vd = call <vscale x 4 x float> @llvm.experimental.constrained.fma.nxv4f32(<vscale x 4 x float> %vb, <vscale x 4 x float> %va, <vscale x 4 x float> %vc, metadata !"round.dynamic", metadata !"fpexcept.strict")
883  ret <vscale x 4 x float> %vd
884}
885
886define <vscale x 4 x float> @vfmadd_vf_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, float %c) strictfp {
887; CHECK-LABEL: vfmadd_vf_nxv4f32:
888; CHECK:       # %bb.0:
889; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
890; CHECK-NEXT:    vfmadd.vf v8, fa0, v10
891; CHECK-NEXT:    ret
892  %head = insertelement <vscale x 4 x float> poison, float %c, i32 0
893  %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
894  %vd = call <vscale x 4 x float> @llvm.experimental.constrained.fma.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %splat, <vscale x 4 x float> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict")
895  ret <vscale x 4 x float> %vd
896}
897
898declare <vscale x 8 x float> @llvm.experimental.constrained.fma.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>, <vscale x 8 x float>, metadata, metadata)
899
900define <vscale x 8 x float> @vfmadd_vv_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, <vscale x 8 x float> %vc) strictfp {
901; CHECK-LABEL: vfmadd_vv_nxv8f32:
902; CHECK:       # %bb.0:
903; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
904; CHECK-NEXT:    vfmacc.vv v8, v16, v12
905; CHECK-NEXT:    ret
906  %vd = call <vscale x 8 x float> @llvm.experimental.constrained.fma.nxv8f32(<vscale x 8 x float> %vb, <vscale x 8 x float> %vc, <vscale x 8 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
907  ret <vscale x 8 x float> %vd
908}
909
910define <vscale x 8 x float> @vfmadd_vf_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, float %c) strictfp {
911; CHECK-LABEL: vfmadd_vf_nxv8f32:
912; CHECK:       # %bb.0:
913; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
914; CHECK-NEXT:    vfmacc.vf v8, fa0, v12
915; CHECK-NEXT:    ret
916  %head = insertelement <vscale x 8 x float> poison, float %c, i32 0
917  %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
918  %vd = call <vscale x 8 x float> @llvm.experimental.constrained.fma.nxv8f32(<vscale x 8 x float> %vb, <vscale x 8 x float> %splat, <vscale x 8 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
919  ret <vscale x 8 x float> %vd
920}
921
922declare <vscale x 16 x float> @llvm.experimental.constrained.fma.nxv16f32(<vscale x 16 x float>, <vscale x 16 x float>, <vscale x 16 x float>, metadata, metadata)
923
924define <vscale x 16 x float> @vfmadd_vv_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, <vscale x 16 x float> %vc) strictfp {
925; CHECK-LABEL: vfmadd_vv_nxv16f32:
926; CHECK:       # %bb.0:
927; CHECK-NEXT:    vl8re32.v v24, (a0)
928; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
929; CHECK-NEXT:    vfmadd.vv v8, v24, v16
930; CHECK-NEXT:    ret
931  %vd = call <vscale x 16 x float> @llvm.experimental.constrained.fma.nxv16f32(<vscale x 16 x float> %vc, <vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict")
932  ret <vscale x 16 x float> %vd
933}
934
935define <vscale x 16 x float> @vfmadd_vf_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, float %c) strictfp {
936; CHECK-LABEL: vfmadd_vf_nxv16f32:
937; CHECK:       # %bb.0:
938; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
939; CHECK-NEXT:    vfmadd.vf v8, fa0, v16
940; CHECK-NEXT:    ret
941  %head = insertelement <vscale x 16 x float> poison, float %c, i32 0
942  %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
943  %vd = call <vscale x 16 x float> @llvm.experimental.constrained.fma.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %splat, <vscale x 16 x float> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict")
944  ret <vscale x 16 x float> %vd
945}
946
947declare <vscale x 1 x double> @llvm.experimental.constrained.fma.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x double>, metadata, metadata)
948
949define <vscale x 1 x double> @vfmadd_vv_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x double> %vc) strictfp {
950; CHECK-LABEL: vfmadd_vv_nxv1f64:
951; CHECK:       # %bb.0:
952; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
953; CHECK-NEXT:    vfmadd.vv v8, v9, v10
954; CHECK-NEXT:    ret
955  %vd = call <vscale x 1 x double> @llvm.experimental.constrained.fma.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x double> %vc, metadata !"round.dynamic", metadata !"fpexcept.strict")
956  ret <vscale x 1 x double> %vd
957}
958
959define <vscale x 1 x double> @vfmadd_vf_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, double %c) strictfp {
960; CHECK-LABEL: vfmadd_vf_nxv1f64:
961; CHECK:       # %bb.0:
962; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
963; CHECK-NEXT:    vfmadd.vf v8, fa0, v9
964; CHECK-NEXT:    ret
965  %head = insertelement <vscale x 1 x double> poison, double %c, i32 0
966  %splat = shufflevector <vscale x 1 x double> %head, <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer
967  %vd = call <vscale x 1 x double> @llvm.experimental.constrained.fma.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %splat, <vscale x 1 x double> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict")
968  ret <vscale x 1 x double> %vd
969}
970
971declare <vscale x 2 x double> @llvm.experimental.constrained.fma.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, metadata, metadata)
972
973define <vscale x 2 x double> @vfmadd_vv_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, <vscale x 2 x double> %vc) strictfp {
974; CHECK-LABEL: vfmadd_vv_nxv2f64:
975; CHECK:       # %bb.0:
976; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
977; CHECK-NEXT:    vfmadd.vv v8, v12, v10
978; CHECK-NEXT:    ret
979  %vd = call <vscale x 2 x double> @llvm.experimental.constrained.fma.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vc, <vscale x 2 x double> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict")
980  ret <vscale x 2 x double> %vd
981}
982
983define <vscale x 2 x double> @vfmadd_vf_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, double %c) strictfp {
984; CHECK-LABEL: vfmadd_vf_nxv2f64:
985; CHECK:       # %bb.0:
986; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
987; CHECK-NEXT:    vfmacc.vf v8, fa0, v10
988; CHECK-NEXT:    ret
989  %head = insertelement <vscale x 2 x double> poison, double %c, i32 0
990  %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
991  %vd = call <vscale x 2 x double> @llvm.experimental.constrained.fma.nxv2f64(<vscale x 2 x double> %vb, <vscale x 2 x double> %splat, <vscale x 2 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
992  ret <vscale x 2 x double> %vd
993}
994
995declare <vscale x 4 x double> @llvm.experimental.constrained.fma.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>, <vscale x 4 x double>, metadata, metadata)
996
997define <vscale x 4 x double> @vfmadd_vv_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, <vscale x 4 x double> %vc) strictfp {
998; CHECK-LABEL: vfmadd_vv_nxv4f64:
999; CHECK:       # %bb.0:
1000; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
1001; CHECK-NEXT:    vfmadd.vv v8, v12, v16
1002; CHECK-NEXT:    ret
1003  %vd = call <vscale x 4 x double> @llvm.experimental.constrained.fma.nxv4f64(<vscale x 4 x double> %vb, <vscale x 4 x double> %va, <vscale x 4 x double> %vc, metadata !"round.dynamic", metadata !"fpexcept.strict")
1004  ret <vscale x 4 x double> %vd
1005}
1006
1007define <vscale x 4 x double> @vfmadd_vf_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, double %c) strictfp {
1008; CHECK-LABEL: vfmadd_vf_nxv4f64:
1009; CHECK:       # %bb.0:
1010; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
1011; CHECK-NEXT:    vfmadd.vf v8, fa0, v12
1012; CHECK-NEXT:    ret
1013  %head = insertelement <vscale x 4 x double> poison, double %c, i32 0
1014  %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
1015  %vd = call <vscale x 4 x double> @llvm.experimental.constrained.fma.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %splat, <vscale x 4 x double> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict")
1016  ret <vscale x 4 x double> %vd
1017}
1018
1019declare <vscale x 8 x double> @llvm.experimental.constrained.fma.nxv8f64(<vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x double>, metadata, metadata)
1020
1021define <vscale x 8 x double> @vfmadd_vv_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x double> %vc) strictfp {
1022; CHECK-LABEL: vfmadd_vv_nxv8f64:
1023; CHECK:       # %bb.0:
1024; CHECK-NEXT:    vl8re64.v v24, (a0)
1025; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
1026; CHECK-NEXT:    vfmacc.vv v8, v16, v24
1027; CHECK-NEXT:    ret
1028  %vd = call <vscale x 8 x double> @llvm.experimental.constrained.fma.nxv8f64(<vscale x 8 x double> %vb, <vscale x 8 x double> %vc, <vscale x 8 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
1029  ret <vscale x 8 x double> %vd
1030}
1031
1032define <vscale x 8 x double> @vfmadd_vf_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, double %c) strictfp {
1033; CHECK-LABEL: vfmadd_vf_nxv8f64:
1034; CHECK:       # %bb.0:
1035; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
1036; CHECK-NEXT:    vfmacc.vf v8, fa0, v16
1037; CHECK-NEXT:    ret
1038  %head = insertelement <vscale x 8 x double> poison, double %c, i32 0
1039  %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
1040  %vd = call <vscale x 8 x double> @llvm.experimental.constrained.fma.nxv8f64(<vscale x 8 x double> %vb, <vscale x 8 x double> %splat, <vscale x 8 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
1041  ret <vscale x 8 x double> %vd
1042}
1043