xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll (revision 8ce81f17a16b8b689895c7c093d0401a75c09882)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \
3; RUN:     -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
4; RUN:     --check-prefixes=CHECK,ZVFH
5; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \
6; RUN:     -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
7; RUN:     --check-prefixes=CHECK,ZVFH
8; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
9; RUN:     -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
10; RUN:     --check-prefixes=CHECK,ZVFHMIN
11; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
12; RUN:     -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
13; RUN:     --check-prefixes=CHECK,ZVFHMIN
14
15declare <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x i1>, i32)
16
17define <vscale x 1 x bfloat> @vfadd_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
18; CHECK-LABEL: vfadd_vv_nxv1bf16:
19; CHECK:       # %bb.0:
20; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
21; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9, v0.t
22; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8, v0.t
23; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
24; CHECK-NEXT:    vfadd.vv v9, v9, v10, v0.t
25; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
26; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9, v0.t
27; CHECK-NEXT:    ret
28  %v = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x i1> %m, i32 %evl)
29  ret <vscale x 1 x bfloat> %v
30}
31
32define <vscale x 1 x bfloat> @vfadd_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, i32 zeroext %evl) {
33; CHECK-LABEL: vfadd_vv_nxv1bf16_unmasked:
34; CHECK:       # %bb.0:
35; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
36; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
37; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
38; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
39; CHECK-NEXT:    vfadd.vv v9, v9, v10
40; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
41; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
42; CHECK-NEXT:    ret
43  %v = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x i1> splat (i1 true), i32 %evl)
44  ret <vscale x 1 x bfloat> %v
45}
46
47define <vscale x 1 x bfloat> @vfadd_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
48; CHECK-LABEL: vfadd_vf_nxv1bf16:
49; CHECK:       # %bb.0:
50; CHECK-NEXT:    fmv.x.h a1, fa0
51; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
52; CHECK-NEXT:    vmv.v.x v9, a1
53; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8, v0.t
54; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9, v0.t
55; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
56; CHECK-NEXT:    vfadd.vv v9, v10, v8, v0.t
57; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
58; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9, v0.t
59; CHECK-NEXT:    ret
60  %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
61  %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
62  %v = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 %evl)
63  ret <vscale x 1 x bfloat> %v
64}
65
66define <vscale x 1 x bfloat> @vfadd_vf_nxv1bf16_commute(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
67; CHECK-LABEL: vfadd_vf_nxv1bf16_commute:
68; CHECK:       # %bb.0:
69; CHECK-NEXT:    fmv.x.h a1, fa0
70; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
71; CHECK-NEXT:    vmv.v.x v9, a1
72; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8, v0.t
73; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9, v0.t
74; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
75; CHECK-NEXT:    vfadd.vv v9, v8, v10, v0.t
76; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
77; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9, v0.t
78; CHECK-NEXT:    ret
79  %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
80  %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
81  %v = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl)
82  ret <vscale x 1 x bfloat> %v
83}
84
85define <vscale x 1 x bfloat> @vfadd_vf_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, bfloat %b, i32 zeroext %evl) {
86; CHECK-LABEL: vfadd_vf_nxv1bf16_unmasked:
87; CHECK:       # %bb.0:
88; CHECK-NEXT:    fmv.x.h a1, fa0
89; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
90; CHECK-NEXT:    vmv.v.x v9, a1
91; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
92; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
93; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
94; CHECK-NEXT:    vfadd.vv v9, v10, v8
95; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
96; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
97; CHECK-NEXT:    ret
98  %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
99  %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
100  %v = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl)
101  ret <vscale x 1 x bfloat> %v
102}
103
104define <vscale x 1 x bfloat> @vfadd_vf_nxv1bf16_unmasked_commute(<vscale x 1 x bfloat> %va, bfloat %b, i32 zeroext %evl) {
105; CHECK-LABEL: vfadd_vf_nxv1bf16_unmasked_commute:
106; CHECK:       # %bb.0:
107; CHECK-NEXT:    fmv.x.h a1, fa0
108; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
109; CHECK-NEXT:    vmv.v.x v9, a1
110; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
111; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
112; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
113; CHECK-NEXT:    vfadd.vv v9, v8, v10
114; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
115; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
116; CHECK-NEXT:    ret
117  %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
118  %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
119  %v = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
120  ret <vscale x 1 x bfloat> %v
121}
122
123declare <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x i1>, i32)
124
125define <vscale x 2 x bfloat> @vfadd_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
126; CHECK-LABEL: vfadd_vv_nxv2bf16:
127; CHECK:       # %bb.0:
128; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
129; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9, v0.t
130; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8, v0.t
131; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
132; CHECK-NEXT:    vfadd.vv v9, v9, v10, v0.t
133; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
134; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9, v0.t
135; CHECK-NEXT:    ret
136  %v = call <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x i1> %m, i32 %evl)
137  ret <vscale x 2 x bfloat> %v
138}
139
140define <vscale x 2 x bfloat> @vfadd_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, i32 zeroext %evl) {
141; CHECK-LABEL: vfadd_vv_nxv2bf16_unmasked:
142; CHECK:       # %bb.0:
143; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
144; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
145; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
146; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
147; CHECK-NEXT:    vfadd.vv v9, v9, v10
148; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
149; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
150; CHECK-NEXT:    ret
151  %v = call <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x i1> splat (i1 true), i32 %evl)
152  ret <vscale x 2 x bfloat> %v
153}
154
155define <vscale x 2 x bfloat> @vfadd_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
156; CHECK-LABEL: vfadd_vf_nxv2bf16:
157; CHECK:       # %bb.0:
158; CHECK-NEXT:    fmv.x.h a1, fa0
159; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
160; CHECK-NEXT:    vmv.v.x v9, a1
161; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8, v0.t
162; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9, v0.t
163; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
164; CHECK-NEXT:    vfadd.vv v9, v10, v8, v0.t
165; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
166; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9, v0.t
167; CHECK-NEXT:    ret
168  %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
169  %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
170  %v = call <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 %evl)
171  ret <vscale x 2 x bfloat> %v
172}
173
174define <vscale x 2 x bfloat> @vfadd_vf_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, bfloat %b, i32 zeroext %evl) {
175; CHECK-LABEL: vfadd_vf_nxv2bf16_unmasked:
176; CHECK:       # %bb.0:
177; CHECK-NEXT:    fmv.x.h a1, fa0
178; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
179; CHECK-NEXT:    vmv.v.x v9, a1
180; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
181; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
182; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
183; CHECK-NEXT:    vfadd.vv v9, v10, v8
184; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
185; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
186; CHECK-NEXT:    ret
187  %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
188  %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
189  %v = call <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl)
190  ret <vscale x 2 x bfloat> %v
191}
192
193declare <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x i1>, i32)
194
195define <vscale x 4 x bfloat> @vfadd_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
196; CHECK-LABEL: vfadd_vv_nxv4bf16:
197; CHECK:       # %bb.0:
198; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
199; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9, v0.t
200; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8, v0.t
201; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
202; CHECK-NEXT:    vfadd.vv v10, v12, v10, v0.t
203; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
204; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10, v0.t
205; CHECK-NEXT:    ret
206  %v = call <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x i1> %m, i32 %evl)
207  ret <vscale x 4 x bfloat> %v
208}
209
210define <vscale x 4 x bfloat> @vfadd_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, i32 zeroext %evl) {
211; CHECK-LABEL: vfadd_vv_nxv4bf16_unmasked:
212; CHECK:       # %bb.0:
213; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
214; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
215; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
216; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
217; CHECK-NEXT:    vfadd.vv v10, v12, v10
218; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
219; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
220; CHECK-NEXT:    ret
221  %v = call <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x i1> splat (i1 true), i32 %evl)
222  ret <vscale x 4 x bfloat> %v
223}
224
225define <vscale x 4 x bfloat> @vfadd_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
226; CHECK-LABEL: vfadd_vf_nxv4bf16:
227; CHECK:       # %bb.0:
228; CHECK-NEXT:    fmv.x.h a1, fa0
229; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
230; CHECK-NEXT:    vmv.v.x v9, a1
231; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8, v0.t
232; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9, v0.t
233; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
234; CHECK-NEXT:    vfadd.vv v10, v10, v12, v0.t
235; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
236; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10, v0.t
237; CHECK-NEXT:    ret
238  %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
239  %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
240  %v = call <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 %evl)
241  ret <vscale x 4 x bfloat> %v
242}
243
244define <vscale x 4 x bfloat> @vfadd_vf_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, bfloat %b, i32 zeroext %evl) {
245; CHECK-LABEL: vfadd_vf_nxv4bf16_unmasked:
246; CHECK:       # %bb.0:
247; CHECK-NEXT:    fmv.x.h a1, fa0
248; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
249; CHECK-NEXT:    vmv.v.x v9, a1
250; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
251; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
252; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
253; CHECK-NEXT:    vfadd.vv v10, v10, v12
254; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
255; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
256; CHECK-NEXT:    ret
257  %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
258  %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
259  %v = call <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
260  ret <vscale x 4 x bfloat> %v
261}
262
263declare <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, i32)
264
265define <vscale x 8 x bfloat> @vfadd_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
266; CHECK-LABEL: vfadd_vv_nxv8bf16:
267; CHECK:       # %bb.0:
268; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
269; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10, v0.t
270; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8, v0.t
271; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
272; CHECK-NEXT:    vfadd.vv v12, v16, v12, v0.t
273; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
274; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12, v0.t
275; CHECK-NEXT:    ret
276  %v = call <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x i1> %m, i32 %evl)
277  ret <vscale x 8 x bfloat> %v
278}
279
280define <vscale x 8 x bfloat> @vfadd_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, i32 zeroext %evl) {
281; CHECK-LABEL: vfadd_vv_nxv8bf16_unmasked:
282; CHECK:       # %bb.0:
283; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
284; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
285; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
286; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
287; CHECK-NEXT:    vfadd.vv v12, v16, v12
288; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
289; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
290; CHECK-NEXT:    ret
291  %v = call <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x i1> splat (i1 true), i32 %evl)
292  ret <vscale x 8 x bfloat> %v
293}
294
295define <vscale x 8 x bfloat> @vfadd_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
296; CHECK-LABEL: vfadd_vf_nxv8bf16:
297; CHECK:       # %bb.0:
298; CHECK-NEXT:    fmv.x.h a1, fa0
299; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
300; CHECK-NEXT:    vmv.v.x v10, a1
301; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8, v0.t
302; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10, v0.t
303; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
304; CHECK-NEXT:    vfadd.vv v12, v12, v16, v0.t
305; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
306; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12, v0.t
307; CHECK-NEXT:    ret
308  %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
309  %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
310  %v = call <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 %evl)
311  ret <vscale x 8 x bfloat> %v
312}
313
314define <vscale x 8 x bfloat> @vfadd_vf_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, bfloat %b, i32 zeroext %evl) {
315; CHECK-LABEL: vfadd_vf_nxv8bf16_unmasked:
316; CHECK:       # %bb.0:
317; CHECK-NEXT:    fmv.x.h a1, fa0
318; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
319; CHECK-NEXT:    vmv.v.x v10, a1
320; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
321; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
322; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
323; CHECK-NEXT:    vfadd.vv v12, v12, v16
324; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
325; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
326; CHECK-NEXT:    ret
327  %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
328  %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
329  %v = call <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
330  ret <vscale x 8 x bfloat> %v
331}
332
333declare <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>, <vscale x 16 x i1>, i32)
334
335define <vscale x 16 x bfloat> @vfadd_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
336; CHECK-LABEL: vfadd_vv_nxv16bf16:
337; CHECK:       # %bb.0:
338; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
339; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12, v0.t
340; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8, v0.t
341; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
342; CHECK-NEXT:    vfadd.vv v16, v24, v16, v0.t
343; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
344; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
345; CHECK-NEXT:    ret
346  %v = call <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x i1> %m, i32 %evl)
347  ret <vscale x 16 x bfloat> %v
348}
349
350define <vscale x 16 x bfloat> @vfadd_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, i32 zeroext %evl) {
351; CHECK-LABEL: vfadd_vv_nxv16bf16_unmasked:
352; CHECK:       # %bb.0:
353; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
354; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
355; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
356; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
357; CHECK-NEXT:    vfadd.vv v16, v24, v16
358; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
359; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
360; CHECK-NEXT:    ret
361  %v = call <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x i1> splat (i1 true), i32 %evl)
362  ret <vscale x 16 x bfloat> %v
363}
364
365define <vscale x 16 x bfloat> @vfadd_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
366; CHECK-LABEL: vfadd_vf_nxv16bf16:
367; CHECK:       # %bb.0:
368; CHECK-NEXT:    fmv.x.h a1, fa0
369; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
370; CHECK-NEXT:    vmv.v.x v12, a1
371; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8, v0.t
372; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12, v0.t
373; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
374; CHECK-NEXT:    vfadd.vv v16, v16, v24, v0.t
375; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
376; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
377; CHECK-NEXT:    ret
378  %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
379  %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
380  %v = call <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 %evl)
381  ret <vscale x 16 x bfloat> %v
382}
383
384define <vscale x 16 x bfloat> @vfadd_vf_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, bfloat %b, i32 zeroext %evl) {
385; CHECK-LABEL: vfadd_vf_nxv16bf16_unmasked:
386; CHECK:       # %bb.0:
387; CHECK-NEXT:    fmv.x.h a1, fa0
388; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
389; CHECK-NEXT:    vmv.v.x v12, a1
390; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
391; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12
392; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
393; CHECK-NEXT:    vfadd.vv v16, v16, v24
394; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
395; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
396; CHECK-NEXT:    ret
397  %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
398  %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
399  %v = call <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl)
400  ret <vscale x 16 x bfloat> %v
401}
402
403declare <vscale x 32 x bfloat> @llvm.vp.fadd.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>, <vscale x 32 x i1>, i32)
404
405define <vscale x 32 x bfloat> @vfadd_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
406; CHECK-LABEL: vfadd_vv_nxv32bf16:
407; CHECK:       # %bb.0:
408; CHECK-NEXT:    addi sp, sp, -16
409; CHECK-NEXT:    .cfi_def_cfa_offset 16
410; CHECK-NEXT:    csrr a1, vlenb
411; CHECK-NEXT:    slli a1, a1, 3
412; CHECK-NEXT:    mv a2, a1
413; CHECK-NEXT:    slli a1, a1, 1
414; CHECK-NEXT:    add a1, a1, a2
415; CHECK-NEXT:    sub sp, sp, a1
416; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
417; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
418; CHECK-NEXT:    vmv1r.v v7, v0
419; CHECK-NEXT:    csrr a1, vlenb
420; CHECK-NEXT:    slli a1, a1, 4
421; CHECK-NEXT:    add a1, sp, a1
422; CHECK-NEXT:    addi a1, a1, 16
423; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
424; CHECK-NEXT:    csrr a2, vlenb
425; CHECK-NEXT:    slli a1, a2, 1
426; CHECK-NEXT:    srli a2, a2, 2
427; CHECK-NEXT:    sub a3, a0, a1
428; CHECK-NEXT:    vslidedown.vx v0, v0, a2
429; CHECK-NEXT:    sltu a2, a0, a3
430; CHECK-NEXT:    addi a2, a2, -1
431; CHECK-NEXT:    and a2, a2, a3
432; CHECK-NEXT:    vmv4r.v v8, v16
433; CHECK-NEXT:    csrr a3, vlenb
434; CHECK-NEXT:    slli a3, a3, 3
435; CHECK-NEXT:    add a3, sp, a3
436; CHECK-NEXT:    addi a3, a3, 16
437; CHECK-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
438; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
439; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v20, v0.t
440; CHECK-NEXT:    addi a2, sp, 16
441; CHECK-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
442; CHECK-NEXT:    csrr a2, vlenb
443; CHECK-NEXT:    slli a2, a2, 4
444; CHECK-NEXT:    add a2, sp, a2
445; CHECK-NEXT:    addi a2, a2, 16
446; CHECK-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
447; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v20, v0.t
448; CHECK-NEXT:    addi a2, sp, 16
449; CHECK-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
450; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
451; CHECK-NEXT:    vfadd.vv v16, v8, v16, v0.t
452; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
453; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16, v0.t
454; CHECK-NEXT:    bltu a0, a1, .LBB22_2
455; CHECK-NEXT:  # %bb.1:
456; CHECK-NEXT:    mv a0, a1
457; CHECK-NEXT:  .LBB22_2:
458; CHECK-NEXT:    vmv1r.v v0, v7
459; CHECK-NEXT:    csrr a1, vlenb
460; CHECK-NEXT:    slli a1, a1, 3
461; CHECK-NEXT:    add a1, sp, a1
462; CHECK-NEXT:    addi a1, a1, 16
463; CHECK-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
464; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
465; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v16, v0.t
466; CHECK-NEXT:    addi a0, sp, 16
467; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
468; CHECK-NEXT:    csrr a0, vlenb
469; CHECK-NEXT:    slli a0, a0, 4
470; CHECK-NEXT:    add a0, sp, a0
471; CHECK-NEXT:    addi a0, a0, 16
472; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
473; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v16, v0.t
474; CHECK-NEXT:    addi a0, sp, 16
475; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
476; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
477; CHECK-NEXT:    vfadd.vv v16, v24, v16, v0.t
478; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
479; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
480; CHECK-NEXT:    csrr a0, vlenb
481; CHECK-NEXT:    slli a0, a0, 3
482; CHECK-NEXT:    mv a1, a0
483; CHECK-NEXT:    slli a0, a0, 1
484; CHECK-NEXT:    add a0, a0, a1
485; CHECK-NEXT:    add sp, sp, a0
486; CHECK-NEXT:    .cfi_def_cfa sp, 16
487; CHECK-NEXT:    addi sp, sp, 16
488; CHECK-NEXT:    .cfi_def_cfa_offset 0
489; CHECK-NEXT:    ret
490  %v = call <vscale x 32 x bfloat> @llvm.vp.fadd.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x i1> %m, i32 %evl)
491  ret <vscale x 32 x bfloat> %v
492}
493
494define <vscale x 32 x bfloat> @vfadd_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, i32 zeroext %evl) {
495; CHECK-LABEL: vfadd_vv_nxv32bf16_unmasked:
496; CHECK:       # %bb.0:
497; CHECK-NEXT:    addi sp, sp, -16
498; CHECK-NEXT:    .cfi_def_cfa_offset 16
499; CHECK-NEXT:    csrr a1, vlenb
500; CHECK-NEXT:    slli a1, a1, 3
501; CHECK-NEXT:    sub sp, sp, a1
502; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
503; CHECK-NEXT:    csrr a2, vlenb
504; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
505; CHECK-NEXT:    vmset.m v24
506; CHECK-NEXT:    slli a1, a2, 1
507; CHECK-NEXT:    srli a2, a2, 2
508; CHECK-NEXT:    sub a3, a0, a1
509; CHECK-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
510; CHECK-NEXT:    vslidedown.vx v0, v24, a2
511; CHECK-NEXT:    sltu a2, a0, a3
512; CHECK-NEXT:    addi a2, a2, -1
513; CHECK-NEXT:    and a2, a2, a3
514; CHECK-NEXT:    addi a3, sp, 16
515; CHECK-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
516; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
517; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v20, v0.t
518; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12, v0.t
519; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
520; CHECK-NEXT:    vfadd.vv v16, v16, v24, v0.t
521; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
522; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16, v0.t
523; CHECK-NEXT:    bltu a0, a1, .LBB23_2
524; CHECK-NEXT:  # %bb.1:
525; CHECK-NEXT:    mv a0, a1
526; CHECK-NEXT:  .LBB23_2:
527; CHECK-NEXT:    addi a1, sp, 16
528; CHECK-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
529; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
530; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v24
531; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
532; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
533; CHECK-NEXT:    vfadd.vv v16, v24, v16
534; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
535; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
536; CHECK-NEXT:    csrr a0, vlenb
537; CHECK-NEXT:    slli a0, a0, 3
538; CHECK-NEXT:    add sp, sp, a0
539; CHECK-NEXT:    .cfi_def_cfa sp, 16
540; CHECK-NEXT:    addi sp, sp, 16
541; CHECK-NEXT:    .cfi_def_cfa_offset 0
542; CHECK-NEXT:    ret
543  %v = call <vscale x 32 x bfloat> @llvm.vp.fadd.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x i1> splat (i1 true), i32 %evl)
544  ret <vscale x 32 x bfloat> %v
545}
546
547define <vscale x 32 x bfloat> @vfadd_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
548; CHECK-LABEL: vfadd_vf_nxv32bf16:
549; CHECK:       # %bb.0:
550; CHECK-NEXT:    addi sp, sp, -16
551; CHECK-NEXT:    .cfi_def_cfa_offset 16
552; CHECK-NEXT:    csrr a1, vlenb
553; CHECK-NEXT:    slli a1, a1, 3
554; CHECK-NEXT:    mv a2, a1
555; CHECK-NEXT:    slli a1, a1, 1
556; CHECK-NEXT:    add a1, a1, a2
557; CHECK-NEXT:    sub sp, sp, a1
558; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
559; CHECK-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
560; CHECK-NEXT:    vmv1r.v v7, v0
561; CHECK-NEXT:    vmv8r.v v16, v8
562; CHECK-NEXT:    fmv.x.h a1, fa0
563; CHECK-NEXT:    csrr a2, vlenb
564; CHECK-NEXT:    vmv.v.x v8, a1
565; CHECK-NEXT:    slli a1, a2, 1
566; CHECK-NEXT:    srli a2, a2, 2
567; CHECK-NEXT:    sub a3, a0, a1
568; CHECK-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
569; CHECK-NEXT:    vslidedown.vx v0, v0, a2
570; CHECK-NEXT:    sltu a2, a0, a3
571; CHECK-NEXT:    addi a2, a2, -1
572; CHECK-NEXT:    and a2, a2, a3
573; CHECK-NEXT:    csrr a3, vlenb
574; CHECK-NEXT:    slli a3, a3, 3
575; CHECK-NEXT:    add a3, sp, a3
576; CHECK-NEXT:    addi a3, a3, 16
577; CHECK-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
578; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
579; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12, v0.t
580; CHECK-NEXT:    vmv4r.v v8, v16
581; CHECK-NEXT:    csrr a2, vlenb
582; CHECK-NEXT:    slli a2, a2, 4
583; CHECK-NEXT:    add a2, sp, a2
584; CHECK-NEXT:    addi a2, a2, 16
585; CHECK-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
586; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v20, v0.t
587; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
588; CHECK-NEXT:    vfadd.vv v24, v8, v24, v0.t
589; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
590; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v24, v0.t
591; CHECK-NEXT:    bltu a0, a1, .LBB24_2
592; CHECK-NEXT:  # %bb.1:
593; CHECK-NEXT:    mv a0, a1
594; CHECK-NEXT:  .LBB24_2:
595; CHECK-NEXT:    vmv1r.v v0, v7
596; CHECK-NEXT:    csrr a1, vlenb
597; CHECK-NEXT:    slli a1, a1, 4
598; CHECK-NEXT:    add a1, sp, a1
599; CHECK-NEXT:    addi a1, a1, 16
600; CHECK-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
601; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
602; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v24, v0.t
603; CHECK-NEXT:    addi a0, sp, 16
604; CHECK-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
605; CHECK-NEXT:    csrr a0, vlenb
606; CHECK-NEXT:    slli a0, a0, 3
607; CHECK-NEXT:    add a0, sp, a0
608; CHECK-NEXT:    addi a0, a0, 16
609; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
610; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v24, v0.t
611; CHECK-NEXT:    vmv8r.v v24, v16
612; CHECK-NEXT:    addi a0, sp, 16
613; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
614; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
615; CHECK-NEXT:    vfadd.vv v24, v16, v24, v0.t
616; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
617; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v24, v0.t
618; CHECK-NEXT:    csrr a0, vlenb
619; CHECK-NEXT:    slli a0, a0, 3
620; CHECK-NEXT:    mv a1, a0
621; CHECK-NEXT:    slli a0, a0, 1
622; CHECK-NEXT:    add a0, a0, a1
623; CHECK-NEXT:    add sp, sp, a0
624; CHECK-NEXT:    .cfi_def_cfa sp, 16
625; CHECK-NEXT:    addi sp, sp, 16
626; CHECK-NEXT:    .cfi_def_cfa_offset 0
627; CHECK-NEXT:    ret
628  %elt.head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
629  %vb = shufflevector <vscale x 32 x bfloat> %elt.head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
630  %v = call <vscale x 32 x bfloat> @llvm.vp.fadd.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 %evl)
631  ret <vscale x 32 x bfloat> %v
632}
633
634define <vscale x 32 x bfloat> @vfadd_vf_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, bfloat %b, i32 zeroext %evl) {
635; CHECK-LABEL: vfadd_vf_nxv32bf16_unmasked:
636; CHECK:       # %bb.0:
637; CHECK-NEXT:    addi sp, sp, -16
638; CHECK-NEXT:    .cfi_def_cfa_offset 16
639; CHECK-NEXT:    csrr a1, vlenb
640; CHECK-NEXT:    slli a1, a1, 3
641; CHECK-NEXT:    sub sp, sp, a1
642; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
643; CHECK-NEXT:    fmv.x.h a1, fa0
644; CHECK-NEXT:    csrr a2, vlenb
645; CHECK-NEXT:    vsetvli a3, zero, e16, m8, ta, ma
646; CHECK-NEXT:    vmset.m v24
647; CHECK-NEXT:    vmv.v.x v16, a1
648; CHECK-NEXT:    slli a1, a2, 1
649; CHECK-NEXT:    srli a2, a2, 2
650; CHECK-NEXT:    sub a3, a0, a1
651; CHECK-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
652; CHECK-NEXT:    vslidedown.vx v0, v24, a2
653; CHECK-NEXT:    sltu a2, a0, a3
654; CHECK-NEXT:    addi a2, a2, -1
655; CHECK-NEXT:    and a2, a2, a3
656; CHECK-NEXT:    addi a3, sp, 16
657; CHECK-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
658; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
659; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v20, v0.t
660; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12, v0.t
661; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
662; CHECK-NEXT:    vfadd.vv v16, v16, v24, v0.t
663; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
664; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16, v0.t
665; CHECK-NEXT:    bltu a0, a1, .LBB25_2
666; CHECK-NEXT:  # %bb.1:
667; CHECK-NEXT:    mv a0, a1
668; CHECK-NEXT:  .LBB25_2:
669; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
670; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
671; CHECK-NEXT:    addi a0, sp, 16
672; CHECK-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
673; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v0
674; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
675; CHECK-NEXT:    vfadd.vv v16, v16, v24
676; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
677; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
678; CHECK-NEXT:    csrr a0, vlenb
679; CHECK-NEXT:    slli a0, a0, 3
680; CHECK-NEXT:    add sp, sp, a0
681; CHECK-NEXT:    .cfi_def_cfa sp, 16
682; CHECK-NEXT:    addi sp, sp, 16
683; CHECK-NEXT:    .cfi_def_cfa_offset 0
684; CHECK-NEXT:    ret
685  %elt.head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
686  %vb = shufflevector <vscale x 32 x bfloat> %elt.head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
687  %v = call <vscale x 32 x bfloat> @llvm.vp.fadd.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> splat (i1 true), i32 %evl)
688  ret <vscale x 32 x bfloat> %v
689}
690declare <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x i1>, i32)
691
692define <vscale x 1 x half> @vfadd_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
693; ZVFH-LABEL: vfadd_vv_nxv1f16:
694; ZVFH:       # %bb.0:
695; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
696; ZVFH-NEXT:    vfadd.vv v8, v8, v9, v0.t
697; ZVFH-NEXT:    ret
698;
699; ZVFHMIN-LABEL: vfadd_vv_nxv1f16:
700; ZVFHMIN:       # %bb.0:
701; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
702; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
703; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
704; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
705; ZVFHMIN-NEXT:    vfadd.vv v9, v9, v10, v0.t
706; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
707; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
708; ZVFHMIN-NEXT:    ret
709  %v = call <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 %evl)
710  ret <vscale x 1 x half> %v
711}
712
713define <vscale x 1 x half> @vfadd_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, <vscale x 1 x half> %b, i32 zeroext %evl) {
714; ZVFH-LABEL: vfadd_vv_nxv1f16_unmasked:
715; ZVFH:       # %bb.0:
716; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
717; ZVFH-NEXT:    vfadd.vv v8, v8, v9
718; ZVFH-NEXT:    ret
719;
720; ZVFHMIN-LABEL: vfadd_vv_nxv1f16_unmasked:
721; ZVFHMIN:       # %bb.0:
722; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
723; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
724; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
725; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
726; ZVFHMIN-NEXT:    vfadd.vv v9, v9, v10
727; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
728; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
729; ZVFHMIN-NEXT:    ret
730  %v = call <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %b, <vscale x 1 x i1> splat (i1 true), i32 %evl)
731  ret <vscale x 1 x half> %v
732}
733
734define <vscale x 1 x half> @vfadd_vf_nxv1f16(<vscale x 1 x half> %va, half %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
735; ZVFH-LABEL: vfadd_vf_nxv1f16:
736; ZVFH:       # %bb.0:
737; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
738; ZVFH-NEXT:    vfadd.vf v8, v8, fa0, v0.t
739; ZVFH-NEXT:    ret
740;
741; ZVFHMIN-LABEL: vfadd_vf_nxv1f16:
742; ZVFHMIN:       # %bb.0:
743; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
744; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
745; ZVFHMIN-NEXT:    vmv.v.x v9, a1
746; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
747; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
748; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
749; ZVFHMIN-NEXT:    vfadd.vv v9, v10, v8, v0.t
750; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
751; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
752; ZVFHMIN-NEXT:    ret
753  %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
754  %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
755  %v = call <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 %evl)
756  ret <vscale x 1 x half> %v
757}
758
759define <vscale x 1 x half> @vfadd_vf_nxv1f16_commute(<vscale x 1 x half> %va, half %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
760; ZVFH-LABEL: vfadd_vf_nxv1f16_commute:
761; ZVFH:       # %bb.0:
762; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
763; ZVFH-NEXT:    vfadd.vf v8, v8, fa0, v0.t
764; ZVFH-NEXT:    ret
765;
766; ZVFHMIN-LABEL: vfadd_vf_nxv1f16_commute:
767; ZVFHMIN:       # %bb.0:
768; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
769; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
770; ZVFHMIN-NEXT:    vmv.v.x v9, a1
771; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
772; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
773; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
774; ZVFHMIN-NEXT:    vfadd.vv v9, v8, v10, v0.t
775; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
776; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
777; ZVFHMIN-NEXT:    ret
778  %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
779  %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
780  %v = call <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
781  ret <vscale x 1 x half> %v
782}
783
784define <vscale x 1 x half> @vfadd_vf_nxv1f16_unmasked(<vscale x 1 x half> %va, half %b, i32 zeroext %evl) {
785; ZVFH-LABEL: vfadd_vf_nxv1f16_unmasked:
786; ZVFH:       # %bb.0:
787; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
788; ZVFH-NEXT:    vfadd.vf v8, v8, fa0
789; ZVFH-NEXT:    ret
790;
791; ZVFHMIN-LABEL: vfadd_vf_nxv1f16_unmasked:
792; ZVFHMIN:       # %bb.0:
793; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
794; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
795; ZVFHMIN-NEXT:    vmv.v.x v9, a1
796; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
797; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
798; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
799; ZVFHMIN-NEXT:    vfadd.vv v9, v10, v8
800; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
801; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
802; ZVFHMIN-NEXT:    ret
803  %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
804  %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
805  %v = call <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl)
806  ret <vscale x 1 x half> %v
807}
808
809define <vscale x 1 x half> @vfadd_vf_nxv1f16_unmasked_commute(<vscale x 1 x half> %va, half %b, i32 zeroext %evl) {
810; ZVFH-LABEL: vfadd_vf_nxv1f16_unmasked_commute:
811; ZVFH:       # %bb.0:
812; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
813; ZVFH-NEXT:    vfadd.vf v8, v8, fa0
814; ZVFH-NEXT:    ret
815;
816; ZVFHMIN-LABEL: vfadd_vf_nxv1f16_unmasked_commute:
817; ZVFHMIN:       # %bb.0:
818; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
819; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
820; ZVFHMIN-NEXT:    vmv.v.x v9, a1
821; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
822; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
823; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
824; ZVFHMIN-NEXT:    vfadd.vv v9, v8, v10
825; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
826; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
827; ZVFHMIN-NEXT:    ret
828  %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
829  %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
830  %v = call <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
831  ret <vscale x 1 x half> %v
832}
833
834declare <vscale x 2 x half> @llvm.vp.fadd.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, <vscale x 2 x i1>, i32)
835
836define <vscale x 2 x half> @vfadd_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
837; ZVFH-LABEL: vfadd_vv_nxv2f16:
838; ZVFH:       # %bb.0:
839; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
840; ZVFH-NEXT:    vfadd.vv v8, v8, v9, v0.t
841; ZVFH-NEXT:    ret
842;
843; ZVFHMIN-LABEL: vfadd_vv_nxv2f16:
844; ZVFHMIN:       # %bb.0:
845; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
846; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
847; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
848; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
849; ZVFHMIN-NEXT:    vfadd.vv v9, v9, v10, v0.t
850; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
851; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
852; ZVFHMIN-NEXT:    ret
853  %v = call <vscale x 2 x half> @llvm.vp.fadd.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %b, <vscale x 2 x i1> %m, i32 %evl)
854  ret <vscale x 2 x half> %v
855}
856
857define <vscale x 2 x half> @vfadd_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, <vscale x 2 x half> %b, i32 zeroext %evl) {
858; ZVFH-LABEL: vfadd_vv_nxv2f16_unmasked:
859; ZVFH:       # %bb.0:
860; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
861; ZVFH-NEXT:    vfadd.vv v8, v8, v9
862; ZVFH-NEXT:    ret
863;
864; ZVFHMIN-LABEL: vfadd_vv_nxv2f16_unmasked:
865; ZVFHMIN:       # %bb.0:
866; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
867; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
868; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
869; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
870; ZVFHMIN-NEXT:    vfadd.vv v9, v9, v10
871; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
872; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
873; ZVFHMIN-NEXT:    ret
874  %v = call <vscale x 2 x half> @llvm.vp.fadd.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %b, <vscale x 2 x i1> splat (i1 true), i32 %evl)
875  ret <vscale x 2 x half> %v
876}
877
878define <vscale x 2 x half> @vfadd_vf_nxv2f16(<vscale x 2 x half> %va, half %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
879; ZVFH-LABEL: vfadd_vf_nxv2f16:
880; ZVFH:       # %bb.0:
881; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
882; ZVFH-NEXT:    vfadd.vf v8, v8, fa0, v0.t
883; ZVFH-NEXT:    ret
884;
885; ZVFHMIN-LABEL: vfadd_vf_nxv2f16:
886; ZVFHMIN:       # %bb.0:
887; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
888; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
889; ZVFHMIN-NEXT:    vmv.v.x v9, a1
890; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
891; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
892; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
893; ZVFHMIN-NEXT:    vfadd.vv v9, v10, v8, v0.t
894; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
895; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
896; ZVFHMIN-NEXT:    ret
897  %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
898  %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
899  %v = call <vscale x 2 x half> @llvm.vp.fadd.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x i1> %m, i32 %evl)
900  ret <vscale x 2 x half> %v
901}
902
903define <vscale x 2 x half> @vfadd_vf_nxv2f16_unmasked(<vscale x 2 x half> %va, half %b, i32 zeroext %evl) {
904; ZVFH-LABEL: vfadd_vf_nxv2f16_unmasked:
905; ZVFH:       # %bb.0:
906; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
907; ZVFH-NEXT:    vfadd.vf v8, v8, fa0
908; ZVFH-NEXT:    ret
909;
910; ZVFHMIN-LABEL: vfadd_vf_nxv2f16_unmasked:
911; ZVFHMIN:       # %bb.0:
912; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
913; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
914; ZVFHMIN-NEXT:    vmv.v.x v9, a1
915; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
916; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
917; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
918; ZVFHMIN-NEXT:    vfadd.vv v9, v10, v8
919; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
920; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
921; ZVFHMIN-NEXT:    ret
922  %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
923  %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
924  %v = call <vscale x 2 x half> @llvm.vp.fadd.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl)
925  ret <vscale x 2 x half> %v
926}
927
928declare <vscale x 4 x half> @llvm.vp.fadd.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, <vscale x 4 x i1>, i32)
929
930define <vscale x 4 x half> @vfadd_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
931; ZVFH-LABEL: vfadd_vv_nxv4f16:
932; ZVFH:       # %bb.0:
933; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
934; ZVFH-NEXT:    vfadd.vv v8, v8, v9, v0.t
935; ZVFH-NEXT:    ret
936;
937; ZVFHMIN-LABEL: vfadd_vv_nxv4f16:
938; ZVFHMIN:       # %bb.0:
939; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
940; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9, v0.t
941; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
942; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
943; ZVFHMIN-NEXT:    vfadd.vv v10, v12, v10, v0.t
944; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
945; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
946; ZVFHMIN-NEXT:    ret
947  %v = call <vscale x 4 x half> @llvm.vp.fadd.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 %evl)
948  ret <vscale x 4 x half> %v
949}
950
951define <vscale x 4 x half> @vfadd_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, <vscale x 4 x half> %b, i32 zeroext %evl) {
952; ZVFH-LABEL: vfadd_vv_nxv4f16_unmasked:
953; ZVFH:       # %bb.0:
954; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
955; ZVFH-NEXT:    vfadd.vv v8, v8, v9
956; ZVFH-NEXT:    ret
957;
958; ZVFHMIN-LABEL: vfadd_vv_nxv4f16_unmasked:
959; ZVFHMIN:       # %bb.0:
960; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
961; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
962; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
963; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
964; ZVFHMIN-NEXT:    vfadd.vv v10, v12, v10
965; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
966; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
967; ZVFHMIN-NEXT:    ret
968  %v = call <vscale x 4 x half> @llvm.vp.fadd.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %b, <vscale x 4 x i1> splat (i1 true), i32 %evl)
969  ret <vscale x 4 x half> %v
970}
971
972define <vscale x 4 x half> @vfadd_vf_nxv4f16(<vscale x 4 x half> %va, half %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
973; ZVFH-LABEL: vfadd_vf_nxv4f16:
974; ZVFH:       # %bb.0:
975; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
976; ZVFH-NEXT:    vfadd.vf v8, v8, fa0, v0.t
977; ZVFH-NEXT:    ret
978;
979; ZVFHMIN-LABEL: vfadd_vf_nxv4f16:
980; ZVFHMIN:       # %bb.0:
981; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
982; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
983; ZVFHMIN-NEXT:    vmv.v.x v9, a1
984; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
985; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9, v0.t
986; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
987; ZVFHMIN-NEXT:    vfadd.vv v10, v10, v12, v0.t
988; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
989; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
990; ZVFHMIN-NEXT:    ret
991  %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
992  %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
993  %v = call <vscale x 4 x half> @llvm.vp.fadd.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x i1> %m, i32 %evl)
994  ret <vscale x 4 x half> %v
995}
996
997define <vscale x 4 x half> @vfadd_vf_nxv4f16_unmasked(<vscale x 4 x half> %va, half %b, i32 zeroext %evl) {
998; ZVFH-LABEL: vfadd_vf_nxv4f16_unmasked:
999; ZVFH:       # %bb.0:
1000; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
1001; ZVFH-NEXT:    vfadd.vf v8, v8, fa0
1002; ZVFH-NEXT:    ret
1003;
1004; ZVFHMIN-LABEL: vfadd_vf_nxv4f16_unmasked:
1005; ZVFHMIN:       # %bb.0:
1006; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
1007; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
1008; ZVFHMIN-NEXT:    vmv.v.x v9, a1
1009; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
1010; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
1011; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
1012; ZVFHMIN-NEXT:    vfadd.vv v10, v10, v12
1013; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
1014; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
1015; ZVFHMIN-NEXT:    ret
1016  %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
1017  %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
1018  %v = call <vscale x 4 x half> @llvm.vp.fadd.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
1019  ret <vscale x 4 x half> %v
1020}
1021
1022declare <vscale x 8 x half> @llvm.vp.fadd.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, i32)
1023
1024define <vscale x 8 x half> @vfadd_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1025; ZVFH-LABEL: vfadd_vv_nxv8f16:
1026; ZVFH:       # %bb.0:
1027; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
1028; ZVFH-NEXT:    vfadd.vv v8, v8, v10, v0.t
1029; ZVFH-NEXT:    ret
1030;
1031; ZVFHMIN-LABEL: vfadd_vv_nxv8f16:
1032; ZVFHMIN:       # %bb.0:
1033; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
1034; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10, v0.t
1035; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
1036; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1037; ZVFHMIN-NEXT:    vfadd.vv v12, v16, v12, v0.t
1038; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1039; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
1040; ZVFHMIN-NEXT:    ret
1041  %v = call <vscale x 8 x half> @llvm.vp.fadd.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 %evl)
1042  ret <vscale x 8 x half> %v
1043}
1044
1045define <vscale x 8 x half> @vfadd_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, <vscale x 8 x half> %b, i32 zeroext %evl) {
1046; ZVFH-LABEL: vfadd_vv_nxv8f16_unmasked:
1047; ZVFH:       # %bb.0:
1048; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
1049; ZVFH-NEXT:    vfadd.vv v8, v8, v10
1050; ZVFH-NEXT:    ret
1051;
1052; ZVFHMIN-LABEL: vfadd_vv_nxv8f16_unmasked:
1053; ZVFHMIN:       # %bb.0:
1054; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
1055; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
1056; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
1057; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1058; ZVFHMIN-NEXT:    vfadd.vv v12, v16, v12
1059; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1060; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
1061; ZVFHMIN-NEXT:    ret
1062  %v = call <vscale x 8 x half> @llvm.vp.fadd.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %b, <vscale x 8 x i1> splat (i1 true), i32 %evl)
1063  ret <vscale x 8 x half> %v
1064}
1065
1066define <vscale x 8 x half> @vfadd_vf_nxv8f16(<vscale x 8 x half> %va, half %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1067; ZVFH-LABEL: vfadd_vf_nxv8f16:
1068; ZVFH:       # %bb.0:
1069; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
1070; ZVFH-NEXT:    vfadd.vf v8, v8, fa0, v0.t
1071; ZVFH-NEXT:    ret
1072;
1073; ZVFHMIN-LABEL: vfadd_vf_nxv8f16:
1074; ZVFHMIN:       # %bb.0:
1075; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
1076; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
1077; ZVFHMIN-NEXT:    vmv.v.x v10, a1
1078; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
1079; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10, v0.t
1080; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1081; ZVFHMIN-NEXT:    vfadd.vv v12, v12, v16, v0.t
1082; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1083; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
1084; ZVFHMIN-NEXT:    ret
1085  %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
1086  %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
1087  %v = call <vscale x 8 x half> @llvm.vp.fadd.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x i1> %m, i32 %evl)
1088  ret <vscale x 8 x half> %v
1089}
1090
1091define <vscale x 8 x half> @vfadd_vf_nxv8f16_unmasked(<vscale x 8 x half> %va, half %b, i32 zeroext %evl) {
1092; ZVFH-LABEL: vfadd_vf_nxv8f16_unmasked:
1093; ZVFH:       # %bb.0:
1094; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
1095; ZVFH-NEXT:    vfadd.vf v8, v8, fa0
1096; ZVFH-NEXT:    ret
1097;
1098; ZVFHMIN-LABEL: vfadd_vf_nxv8f16_unmasked:
1099; ZVFHMIN:       # %bb.0:
1100; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
1101; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
1102; ZVFHMIN-NEXT:    vmv.v.x v10, a1
1103; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
1104; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
1105; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1106; ZVFHMIN-NEXT:    vfadd.vv v12, v12, v16
1107; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1108; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
1109; ZVFHMIN-NEXT:    ret
1110  %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
1111  %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
1112  %v = call <vscale x 8 x half> @llvm.vp.fadd.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
1113  ret <vscale x 8 x half> %v
1114}
1115
1116declare <vscale x 16 x half> @llvm.vp.fadd.nxv16f16(<vscale x 16 x half>, <vscale x 16 x half>, <vscale x 16 x i1>, i32)
1117
1118define <vscale x 16 x half> @vfadd_vv_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
1119; ZVFH-LABEL: vfadd_vv_nxv16f16:
1120; ZVFH:       # %bb.0:
1121; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
1122; ZVFH-NEXT:    vfadd.vv v8, v8, v12, v0.t
1123; ZVFH-NEXT:    ret
1124;
1125; ZVFHMIN-LABEL: vfadd_vv_nxv16f16:
1126; ZVFHMIN:       # %bb.0:
1127; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
1128; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
1129; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
1130; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
1131; ZVFHMIN-NEXT:    vfadd.vv v16, v24, v16, v0.t
1132; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
1133; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
1134; ZVFHMIN-NEXT:    ret
1135  %v = call <vscale x 16 x half> @llvm.vp.fadd.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %b, <vscale x 16 x i1> %m, i32 %evl)
1136  ret <vscale x 16 x half> %v
1137}
1138
1139define <vscale x 16 x half> @vfadd_vv_nxv16f16_unmasked(<vscale x 16 x half> %va, <vscale x 16 x half> %b, i32 zeroext %evl) {
1140; ZVFH-LABEL: vfadd_vv_nxv16f16_unmasked:
1141; ZVFH:       # %bb.0:
1142; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
1143; ZVFH-NEXT:    vfadd.vv v8, v8, v12
1144; ZVFH-NEXT:    ret
1145;
1146; ZVFHMIN-LABEL: vfadd_vv_nxv16f16_unmasked:
1147; ZVFHMIN:       # %bb.0:
1148; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
1149; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
1150; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
1151; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
1152; ZVFHMIN-NEXT:    vfadd.vv v16, v24, v16
1153; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
1154; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
1155; ZVFHMIN-NEXT:    ret
1156  %v = call <vscale x 16 x half> @llvm.vp.fadd.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %b, <vscale x 16 x i1> splat (i1 true), i32 %evl)
1157  ret <vscale x 16 x half> %v
1158}
1159
1160define <vscale x 16 x half> @vfadd_vf_nxv16f16(<vscale x 16 x half> %va, half %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
1161; ZVFH-LABEL: vfadd_vf_nxv16f16:
1162; ZVFH:       # %bb.0:
1163; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
1164; ZVFH-NEXT:    vfadd.vf v8, v8, fa0, v0.t
1165; ZVFH-NEXT:    ret
1166;
1167; ZVFHMIN-LABEL: vfadd_vf_nxv16f16:
1168; ZVFHMIN:       # %bb.0:
1169; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
1170; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
1171; ZVFHMIN-NEXT:    vmv.v.x v12, a1
1172; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
1173; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
1174; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
1175; ZVFHMIN-NEXT:    vfadd.vv v16, v16, v24, v0.t
1176; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
1177; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
1178; ZVFHMIN-NEXT:    ret
1179  %elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
1180  %vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
1181  %v = call <vscale x 16 x half> @llvm.vp.fadd.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 %evl)
1182  ret <vscale x 16 x half> %v
1183}
1184
1185define <vscale x 16 x half> @vfadd_vf_nxv16f16_unmasked(<vscale x 16 x half> %va, half %b, i32 zeroext %evl) {
1186; ZVFH-LABEL: vfadd_vf_nxv16f16_unmasked:
1187; ZVFH:       # %bb.0:
1188; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
1189; ZVFH-NEXT:    vfadd.vf v8, v8, fa0
1190; ZVFH-NEXT:    ret
1191;
1192; ZVFHMIN-LABEL: vfadd_vf_nxv16f16_unmasked:
1193; ZVFHMIN:       # %bb.0:
1194; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
1195; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
1196; ZVFHMIN-NEXT:    vmv.v.x v12, a1
1197; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
1198; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
1199; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
1200; ZVFHMIN-NEXT:    vfadd.vv v16, v16, v24
1201; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
1202; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
1203; ZVFHMIN-NEXT:    ret
1204  %elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
1205  %vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
1206  %v = call <vscale x 16 x half> @llvm.vp.fadd.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl)
1207  ret <vscale x 16 x half> %v
1208}
1209
1210declare <vscale x 32 x half> @llvm.vp.fadd.nxv32f16(<vscale x 32 x half>, <vscale x 32 x half>, <vscale x 32 x i1>, i32)
1211
1212define <vscale x 32 x half> @vfadd_vv_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
1213; ZVFH-LABEL: vfadd_vv_nxv32f16:
1214; ZVFH:       # %bb.0:
1215; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
1216; ZVFH-NEXT:    vfadd.vv v8, v8, v16, v0.t
1217; ZVFH-NEXT:    ret
1218;
1219; ZVFHMIN-LABEL: vfadd_vv_nxv32f16:
1220; ZVFHMIN:       # %bb.0:
1221; ZVFHMIN-NEXT:    addi sp, sp, -16
1222; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
1223; ZVFHMIN-NEXT:    csrr a1, vlenb
1224; ZVFHMIN-NEXT:    slli a1, a1, 3
1225; ZVFHMIN-NEXT:    mv a2, a1
1226; ZVFHMIN-NEXT:    slli a1, a1, 1
1227; ZVFHMIN-NEXT:    add a1, a1, a2
1228; ZVFHMIN-NEXT:    sub sp, sp, a1
1229; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
1230; ZVFHMIN-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
1231; ZVFHMIN-NEXT:    vmv1r.v v7, v0
1232; ZVFHMIN-NEXT:    csrr a1, vlenb
1233; ZVFHMIN-NEXT:    slli a1, a1, 4
1234; ZVFHMIN-NEXT:    add a1, sp, a1
1235; ZVFHMIN-NEXT:    addi a1, a1, 16
1236; ZVFHMIN-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
1237; ZVFHMIN-NEXT:    csrr a2, vlenb
1238; ZVFHMIN-NEXT:    slli a1, a2, 1
1239; ZVFHMIN-NEXT:    srli a2, a2, 2
1240; ZVFHMIN-NEXT:    sub a3, a0, a1
1241; ZVFHMIN-NEXT:    vslidedown.vx v0, v0, a2
1242; ZVFHMIN-NEXT:    sltu a2, a0, a3
1243; ZVFHMIN-NEXT:    addi a2, a2, -1
1244; ZVFHMIN-NEXT:    and a2, a2, a3
1245; ZVFHMIN-NEXT:    vmv4r.v v8, v16
1246; ZVFHMIN-NEXT:    csrr a3, vlenb
1247; ZVFHMIN-NEXT:    slli a3, a3, 3
1248; ZVFHMIN-NEXT:    add a3, sp, a3
1249; ZVFHMIN-NEXT:    addi a3, a3, 16
1250; ZVFHMIN-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
1251; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
1252; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20, v0.t
1253; ZVFHMIN-NEXT:    addi a2, sp, 16
1254; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
1255; ZVFHMIN-NEXT:    csrr a2, vlenb
1256; ZVFHMIN-NEXT:    slli a2, a2, 4
1257; ZVFHMIN-NEXT:    add a2, sp, a2
1258; ZVFHMIN-NEXT:    addi a2, a2, 16
1259; ZVFHMIN-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
1260; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20, v0.t
1261; ZVFHMIN-NEXT:    addi a2, sp, 16
1262; ZVFHMIN-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
1263; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
1264; ZVFHMIN-NEXT:    vfadd.vv v16, v8, v16, v0.t
1265; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
1266; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16, v0.t
1267; ZVFHMIN-NEXT:    bltu a0, a1, .LBB48_2
1268; ZVFHMIN-NEXT:  # %bb.1:
1269; ZVFHMIN-NEXT:    mv a0, a1
1270; ZVFHMIN-NEXT:  .LBB48_2:
1271; ZVFHMIN-NEXT:    vmv1r.v v0, v7
1272; ZVFHMIN-NEXT:    csrr a1, vlenb
1273; ZVFHMIN-NEXT:    slli a1, a1, 3
1274; ZVFHMIN-NEXT:    add a1, sp, a1
1275; ZVFHMIN-NEXT:    addi a1, a1, 16
1276; ZVFHMIN-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
1277; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
1278; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16, v0.t
1279; ZVFHMIN-NEXT:    addi a0, sp, 16
1280; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
1281; ZVFHMIN-NEXT:    csrr a0, vlenb
1282; ZVFHMIN-NEXT:    slli a0, a0, 4
1283; ZVFHMIN-NEXT:    add a0, sp, a0
1284; ZVFHMIN-NEXT:    addi a0, a0, 16
1285; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
1286; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v16, v0.t
1287; ZVFHMIN-NEXT:    addi a0, sp, 16
1288; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
1289; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
1290; ZVFHMIN-NEXT:    vfadd.vv v16, v24, v16, v0.t
1291; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
1292; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
1293; ZVFHMIN-NEXT:    csrr a0, vlenb
1294; ZVFHMIN-NEXT:    slli a0, a0, 3
1295; ZVFHMIN-NEXT:    mv a1, a0
1296; ZVFHMIN-NEXT:    slli a0, a0, 1
1297; ZVFHMIN-NEXT:    add a0, a0, a1
1298; ZVFHMIN-NEXT:    add sp, sp, a0
1299; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
1300; ZVFHMIN-NEXT:    addi sp, sp, 16
1301; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
1302; ZVFHMIN-NEXT:    ret
1303  %v = call <vscale x 32 x half> @llvm.vp.fadd.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %b, <vscale x 32 x i1> %m, i32 %evl)
1304  ret <vscale x 32 x half> %v
1305}
1306
1307define <vscale x 32 x half> @vfadd_vv_nxv32f16_unmasked(<vscale x 32 x half> %va, <vscale x 32 x half> %b, i32 zeroext %evl) {
1308; ZVFH-LABEL: vfadd_vv_nxv32f16_unmasked:
1309; ZVFH:       # %bb.0:
1310; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
1311; ZVFH-NEXT:    vfadd.vv v8, v8, v16
1312; ZVFH-NEXT:    ret
1313;
1314; ZVFHMIN-LABEL: vfadd_vv_nxv32f16_unmasked:
1315; ZVFHMIN:       # %bb.0:
1316; ZVFHMIN-NEXT:    addi sp, sp, -16
1317; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
1318; ZVFHMIN-NEXT:    csrr a1, vlenb
1319; ZVFHMIN-NEXT:    slli a1, a1, 3
1320; ZVFHMIN-NEXT:    sub sp, sp, a1
1321; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
1322; ZVFHMIN-NEXT:    csrr a2, vlenb
1323; ZVFHMIN-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
1324; ZVFHMIN-NEXT:    vmset.m v24
1325; ZVFHMIN-NEXT:    slli a1, a2, 1
1326; ZVFHMIN-NEXT:    srli a2, a2, 2
1327; ZVFHMIN-NEXT:    sub a3, a0, a1
1328; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
1329; ZVFHMIN-NEXT:    vslidedown.vx v0, v24, a2
1330; ZVFHMIN-NEXT:    sltu a2, a0, a3
1331; ZVFHMIN-NEXT:    addi a2, a2, -1
1332; ZVFHMIN-NEXT:    and a2, a2, a3
1333; ZVFHMIN-NEXT:    addi a3, sp, 16
1334; ZVFHMIN-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
1335; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
1336; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
1337; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
1338; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
1339; ZVFHMIN-NEXT:    vfadd.vv v16, v16, v24, v0.t
1340; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
1341; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16, v0.t
1342; ZVFHMIN-NEXT:    bltu a0, a1, .LBB49_2
1343; ZVFHMIN-NEXT:  # %bb.1:
1344; ZVFHMIN-NEXT:    mv a0, a1
1345; ZVFHMIN-NEXT:  .LBB49_2:
1346; ZVFHMIN-NEXT:    addi a1, sp, 16
1347; ZVFHMIN-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
1348; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
1349; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24
1350; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
1351; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
1352; ZVFHMIN-NEXT:    vfadd.vv v16, v24, v16
1353; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
1354; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
1355; ZVFHMIN-NEXT:    csrr a0, vlenb
1356; ZVFHMIN-NEXT:    slli a0, a0, 3
1357; ZVFHMIN-NEXT:    add sp, sp, a0
1358; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
1359; ZVFHMIN-NEXT:    addi sp, sp, 16
1360; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
1361; ZVFHMIN-NEXT:    ret
1362  %v = call <vscale x 32 x half> @llvm.vp.fadd.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %b, <vscale x 32 x i1> splat (i1 true), i32 %evl)
1363  ret <vscale x 32 x half> %v
1364}
1365
1366define <vscale x 32 x half> @vfadd_vf_nxv32f16(<vscale x 32 x half> %va, half %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
1367; ZVFH-LABEL: vfadd_vf_nxv32f16:
1368; ZVFH:       # %bb.0:
1369; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
1370; ZVFH-NEXT:    vfadd.vf v8, v8, fa0, v0.t
1371; ZVFH-NEXT:    ret
1372;
1373; ZVFHMIN-LABEL: vfadd_vf_nxv32f16:
1374; ZVFHMIN:       # %bb.0:
1375; ZVFHMIN-NEXT:    addi sp, sp, -16
1376; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
1377; ZVFHMIN-NEXT:    csrr a1, vlenb
1378; ZVFHMIN-NEXT:    slli a1, a1, 3
1379; ZVFHMIN-NEXT:    mv a2, a1
1380; ZVFHMIN-NEXT:    slli a1, a1, 1
1381; ZVFHMIN-NEXT:    add a1, a1, a2
1382; ZVFHMIN-NEXT:    sub sp, sp, a1
1383; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
1384; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
1385; ZVFHMIN-NEXT:    vmv1r.v v7, v0
1386; ZVFHMIN-NEXT:    vmv8r.v v16, v8
1387; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
1388; ZVFHMIN-NEXT:    csrr a2, vlenb
1389; ZVFHMIN-NEXT:    vmv.v.x v8, a1
1390; ZVFHMIN-NEXT:    slli a1, a2, 1
1391; ZVFHMIN-NEXT:    srli a2, a2, 2
1392; ZVFHMIN-NEXT:    sub a3, a0, a1
1393; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
1394; ZVFHMIN-NEXT:    vslidedown.vx v0, v0, a2
1395; ZVFHMIN-NEXT:    sltu a2, a0, a3
1396; ZVFHMIN-NEXT:    addi a2, a2, -1
1397; ZVFHMIN-NEXT:    and a2, a2, a3
1398; ZVFHMIN-NEXT:    csrr a3, vlenb
1399; ZVFHMIN-NEXT:    slli a3, a3, 3
1400; ZVFHMIN-NEXT:    add a3, sp, a3
1401; ZVFHMIN-NEXT:    addi a3, a3, 16
1402; ZVFHMIN-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
1403; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
1404; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
1405; ZVFHMIN-NEXT:    vmv4r.v v8, v16
1406; ZVFHMIN-NEXT:    csrr a2, vlenb
1407; ZVFHMIN-NEXT:    slli a2, a2, 4
1408; ZVFHMIN-NEXT:    add a2, sp, a2
1409; ZVFHMIN-NEXT:    addi a2, a2, 16
1410; ZVFHMIN-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
1411; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v20, v0.t
1412; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
1413; ZVFHMIN-NEXT:    vfadd.vv v24, v8, v24, v0.t
1414; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
1415; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24, v0.t
1416; ZVFHMIN-NEXT:    bltu a0, a1, .LBB50_2
1417; ZVFHMIN-NEXT:  # %bb.1:
1418; ZVFHMIN-NEXT:    mv a0, a1
1419; ZVFHMIN-NEXT:  .LBB50_2:
1420; ZVFHMIN-NEXT:    vmv1r.v v0, v7
1421; ZVFHMIN-NEXT:    csrr a1, vlenb
1422; ZVFHMIN-NEXT:    slli a1, a1, 4
1423; ZVFHMIN-NEXT:    add a1, sp, a1
1424; ZVFHMIN-NEXT:    addi a1, a1, 16
1425; ZVFHMIN-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
1426; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
1427; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24, v0.t
1428; ZVFHMIN-NEXT:    addi a0, sp, 16
1429; ZVFHMIN-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
1430; ZVFHMIN-NEXT:    csrr a0, vlenb
1431; ZVFHMIN-NEXT:    slli a0, a0, 3
1432; ZVFHMIN-NEXT:    add a0, sp, a0
1433; ZVFHMIN-NEXT:    addi a0, a0, 16
1434; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
1435; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v24, v0.t
1436; ZVFHMIN-NEXT:    vmv8r.v v24, v16
1437; ZVFHMIN-NEXT:    addi a0, sp, 16
1438; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
1439; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
1440; ZVFHMIN-NEXT:    vfadd.vv v24, v16, v24, v0.t
1441; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
1442; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24, v0.t
1443; ZVFHMIN-NEXT:    csrr a0, vlenb
1444; ZVFHMIN-NEXT:    slli a0, a0, 3
1445; ZVFHMIN-NEXT:    mv a1, a0
1446; ZVFHMIN-NEXT:    slli a0, a0, 1
1447; ZVFHMIN-NEXT:    add a0, a0, a1
1448; ZVFHMIN-NEXT:    add sp, sp, a0
1449; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
1450; ZVFHMIN-NEXT:    addi sp, sp, 16
1451; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
1452; ZVFHMIN-NEXT:    ret
1453  %elt.head = insertelement <vscale x 32 x half> poison, half %b, i32 0
1454  %vb = shufflevector <vscale x 32 x half> %elt.head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
1455  %v = call <vscale x 32 x half> @llvm.vp.fadd.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x i1> %m, i32 %evl)
1456  ret <vscale x 32 x half> %v
1457}
1458
1459define <vscale x 32 x half> @vfadd_vf_nxv32f16_unmasked(<vscale x 32 x half> %va, half %b, i32 zeroext %evl) {
1460; ZVFH-LABEL: vfadd_vf_nxv32f16_unmasked:
1461; ZVFH:       # %bb.0:
1462; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
1463; ZVFH-NEXT:    vfadd.vf v8, v8, fa0
1464; ZVFH-NEXT:    ret
1465;
1466; ZVFHMIN-LABEL: vfadd_vf_nxv32f16_unmasked:
1467; ZVFHMIN:       # %bb.0:
1468; ZVFHMIN-NEXT:    addi sp, sp, -16
1469; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
1470; ZVFHMIN-NEXT:    csrr a1, vlenb
1471; ZVFHMIN-NEXT:    slli a1, a1, 3
1472; ZVFHMIN-NEXT:    sub sp, sp, a1
1473; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
1474; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
1475; ZVFHMIN-NEXT:    csrr a2, vlenb
1476; ZVFHMIN-NEXT:    vsetvli a3, zero, e16, m8, ta, ma
1477; ZVFHMIN-NEXT:    vmset.m v24
1478; ZVFHMIN-NEXT:    vmv.v.x v16, a1
1479; ZVFHMIN-NEXT:    slli a1, a2, 1
1480; ZVFHMIN-NEXT:    srli a2, a2, 2
1481; ZVFHMIN-NEXT:    sub a3, a0, a1
1482; ZVFHMIN-NEXT:    vsetvli a4, zero, e8, mf2, ta, ma
1483; ZVFHMIN-NEXT:    vslidedown.vx v0, v24, a2
1484; ZVFHMIN-NEXT:    sltu a2, a0, a3
1485; ZVFHMIN-NEXT:    addi a2, a2, -1
1486; ZVFHMIN-NEXT:    and a2, a2, a3
1487; ZVFHMIN-NEXT:    addi a3, sp, 16
1488; ZVFHMIN-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
1489; ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
1490; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v20, v0.t
1491; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
1492; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
1493; ZVFHMIN-NEXT:    vfadd.vv v16, v16, v24, v0.t
1494; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
1495; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16, v0.t
1496; ZVFHMIN-NEXT:    bltu a0, a1, .LBB51_2
1497; ZVFHMIN-NEXT:  # %bb.1:
1498; ZVFHMIN-NEXT:    mv a0, a1
1499; ZVFHMIN-NEXT:  .LBB51_2:
1500; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
1501; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
1502; ZVFHMIN-NEXT:    addi a0, sp, 16
1503; ZVFHMIN-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
1504; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v0
1505; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
1506; ZVFHMIN-NEXT:    vfadd.vv v16, v16, v24
1507; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
1508; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
1509; ZVFHMIN-NEXT:    csrr a0, vlenb
1510; ZVFHMIN-NEXT:    slli a0, a0, 3
1511; ZVFHMIN-NEXT:    add sp, sp, a0
1512; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
1513; ZVFHMIN-NEXT:    addi sp, sp, 16
1514; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
1515; ZVFHMIN-NEXT:    ret
1516  %elt.head = insertelement <vscale x 32 x half> poison, half %b, i32 0
1517  %vb = shufflevector <vscale x 32 x half> %elt.head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
1518  %v = call <vscale x 32 x half> @llvm.vp.fadd.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x i1> splat (i1 true), i32 %evl)
1519  ret <vscale x 32 x half> %v
1520}
1521
1522declare <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x i1>, i32)
1523
1524define <vscale x 1 x float> @vfadd_vv_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1525; CHECK-LABEL: vfadd_vv_nxv1f32:
1526; CHECK:       # %bb.0:
1527; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
1528; CHECK-NEXT:    vfadd.vv v8, v8, v9, v0.t
1529; CHECK-NEXT:    ret
1530  %v = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %b, <vscale x 1 x i1> %m, i32 %evl)
1531  ret <vscale x 1 x float> %v
1532}
1533
1534define <vscale x 1 x float> @vfadd_vv_nxv1f32_unmasked(<vscale x 1 x float> %va, <vscale x 1 x float> %b, i32 zeroext %evl) {
1535; CHECK-LABEL: vfadd_vv_nxv1f32_unmasked:
1536; CHECK:       # %bb.0:
1537; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
1538; CHECK-NEXT:    vfadd.vv v8, v8, v9
1539; CHECK-NEXT:    ret
1540  %v = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %b, <vscale x 1 x i1> splat (i1 true), i32 %evl)
1541  ret <vscale x 1 x float> %v
1542}
1543
1544define <vscale x 1 x float> @vfadd_vf_nxv1f32(<vscale x 1 x float> %va, float %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1545; CHECK-LABEL: vfadd_vf_nxv1f32:
1546; CHECK:       # %bb.0:
1547; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
1548; CHECK-NEXT:    vfadd.vf v8, v8, fa0, v0.t
1549; CHECK-NEXT:    ret
1550  %elt.head = insertelement <vscale x 1 x float> poison, float %b, i32 0
1551  %vb = shufflevector <vscale x 1 x float> %elt.head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
1552  %v = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x i1> %m, i32 %evl)
1553  ret <vscale x 1 x float> %v
1554}
1555
1556define <vscale x 1 x float> @vfadd_vf_nxv1f32_unmasked(<vscale x 1 x float> %va, float %b, i32 zeroext %evl) {
1557; CHECK-LABEL: vfadd_vf_nxv1f32_unmasked:
1558; CHECK:       # %bb.0:
1559; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
1560; CHECK-NEXT:    vfadd.vf v8, v8, fa0
1561; CHECK-NEXT:    ret
1562  %elt.head = insertelement <vscale x 1 x float> poison, float %b, i32 0
1563  %vb = shufflevector <vscale x 1 x float> %elt.head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
1564  %v = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl)
1565  ret <vscale x 1 x float> %v
1566}
1567
1568declare <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x i1>, i32)
1569
1570define <vscale x 2 x float> @vfadd_vv_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1571; CHECK-LABEL: vfadd_vv_nxv2f32:
1572; CHECK:       # %bb.0:
1573; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1574; CHECK-NEXT:    vfadd.vv v8, v8, v9, v0.t
1575; CHECK-NEXT:    ret
1576  %v = call <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %b, <vscale x 2 x i1> %m, i32 %evl)
1577  ret <vscale x 2 x float> %v
1578}
1579
1580define <vscale x 2 x float> @vfadd_vv_nxv2f32_unmasked(<vscale x 2 x float> %va, <vscale x 2 x float> %b, i32 zeroext %evl) {
1581; CHECK-LABEL: vfadd_vv_nxv2f32_unmasked:
1582; CHECK:       # %bb.0:
1583; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1584; CHECK-NEXT:    vfadd.vv v8, v8, v9
1585; CHECK-NEXT:    ret
1586  %v = call <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %b, <vscale x 2 x i1> splat (i1 true), i32 %evl)
1587  ret <vscale x 2 x float> %v
1588}
1589
1590define <vscale x 2 x float> @vfadd_vf_nxv2f32(<vscale x 2 x float> %va, float %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1591; CHECK-LABEL: vfadd_vf_nxv2f32:
1592; CHECK:       # %bb.0:
1593; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1594; CHECK-NEXT:    vfadd.vf v8, v8, fa0, v0.t
1595; CHECK-NEXT:    ret
1596  %elt.head = insertelement <vscale x 2 x float> poison, float %b, i32 0
1597  %vb = shufflevector <vscale x 2 x float> %elt.head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
1598  %v = call <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, <vscale x 2 x i1> %m, i32 %evl)
1599  ret <vscale x 2 x float> %v
1600}
1601
1602define <vscale x 2 x float> @vfadd_vf_nxv2f32_unmasked(<vscale x 2 x float> %va, float %b, i32 zeroext %evl) {
1603; CHECK-LABEL: vfadd_vf_nxv2f32_unmasked:
1604; CHECK:       # %bb.0:
1605; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1606; CHECK-NEXT:    vfadd.vf v8, v8, fa0
1607; CHECK-NEXT:    ret
1608  %elt.head = insertelement <vscale x 2 x float> poison, float %b, i32 0
1609  %vb = shufflevector <vscale x 2 x float> %elt.head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
1610  %v = call <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl)
1611  ret <vscale x 2 x float> %v
1612}
1613
1614declare <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, i32)
1615
1616define <vscale x 4 x float> @vfadd_vv_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1617; CHECK-LABEL: vfadd_vv_nxv4f32:
1618; CHECK:       # %bb.0:
1619; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1620; CHECK-NEXT:    vfadd.vv v8, v8, v10, v0.t
1621; CHECK-NEXT:    ret
1622  %v = call <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %b, <vscale x 4 x i1> %m, i32 %evl)
1623  ret <vscale x 4 x float> %v
1624}
1625
1626define <vscale x 4 x float> @vfadd_vv_nxv4f32_unmasked(<vscale x 4 x float> %va, <vscale x 4 x float> %b, i32 zeroext %evl) {
1627; CHECK-LABEL: vfadd_vv_nxv4f32_unmasked:
1628; CHECK:       # %bb.0:
1629; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1630; CHECK-NEXT:    vfadd.vv v8, v8, v10
1631; CHECK-NEXT:    ret
1632  %v = call <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %b, <vscale x 4 x i1> splat (i1 true), i32 %evl)
1633  ret <vscale x 4 x float> %v
1634}
1635
1636define <vscale x 4 x float> @vfadd_vf_nxv4f32(<vscale x 4 x float> %va, float %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1637; CHECK-LABEL: vfadd_vf_nxv4f32:
1638; CHECK:       # %bb.0:
1639; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1640; CHECK-NEXT:    vfadd.vf v8, v8, fa0, v0.t
1641; CHECK-NEXT:    ret
1642  %elt.head = insertelement <vscale x 4 x float> poison, float %b, i32 0
1643  %vb = shufflevector <vscale x 4 x float> %elt.head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
1644  %v = call <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, <vscale x 4 x i1> %m, i32 %evl)
1645  ret <vscale x 4 x float> %v
1646}
1647
1648define <vscale x 4 x float> @vfadd_vf_nxv4f32_unmasked(<vscale x 4 x float> %va, float %b, i32 zeroext %evl) {
1649; CHECK-LABEL: vfadd_vf_nxv4f32_unmasked:
1650; CHECK:       # %bb.0:
1651; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1652; CHECK-NEXT:    vfadd.vf v8, v8, fa0
1653; CHECK-NEXT:    ret
1654  %elt.head = insertelement <vscale x 4 x float> poison, float %b, i32 0
1655  %vb = shufflevector <vscale x 4 x float> %elt.head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
1656  %v = call <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
1657  ret <vscale x 4 x float> %v
1658}
1659
1660declare <vscale x 8 x float> @llvm.vp.fadd.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>, <vscale x 8 x i1>, i32)
1661
1662define <vscale x 8 x float> @vfadd_vv_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1663; CHECK-LABEL: vfadd_vv_nxv8f32:
1664; CHECK:       # %bb.0:
1665; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
1666; CHECK-NEXT:    vfadd.vv v8, v8, v12, v0.t
1667; CHECK-NEXT:    ret
1668  %v = call <vscale x 8 x float> @llvm.vp.fadd.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %b, <vscale x 8 x i1> %m, i32 %evl)
1669  ret <vscale x 8 x float> %v
1670}
1671
1672define <vscale x 8 x float> @vfadd_vv_nxv8f32_unmasked(<vscale x 8 x float> %va, <vscale x 8 x float> %b, i32 zeroext %evl) {
1673; CHECK-LABEL: vfadd_vv_nxv8f32_unmasked:
1674; CHECK:       # %bb.0:
1675; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
1676; CHECK-NEXT:    vfadd.vv v8, v8, v12
1677; CHECK-NEXT:    ret
1678  %v = call <vscale x 8 x float> @llvm.vp.fadd.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %b, <vscale x 8 x i1> splat (i1 true), i32 %evl)
1679  ret <vscale x 8 x float> %v
1680}
1681
1682define <vscale x 8 x float> @vfadd_vf_nxv8f32(<vscale x 8 x float> %va, float %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1683; CHECK-LABEL: vfadd_vf_nxv8f32:
1684; CHECK:       # %bb.0:
1685; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
1686; CHECK-NEXT:    vfadd.vf v8, v8, fa0, v0.t
1687; CHECK-NEXT:    ret
1688  %elt.head = insertelement <vscale x 8 x float> poison, float %b, i32 0
1689  %vb = shufflevector <vscale x 8 x float> %elt.head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
1690  %v = call <vscale x 8 x float> @llvm.vp.fadd.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, <vscale x 8 x i1> %m, i32 %evl)
1691  ret <vscale x 8 x float> %v
1692}
1693
1694define <vscale x 8 x float> @vfadd_vf_nxv8f32_unmasked(<vscale x 8 x float> %va, float %b, i32 zeroext %evl) {
1695; CHECK-LABEL: vfadd_vf_nxv8f32_unmasked:
1696; CHECK:       # %bb.0:
1697; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
1698; CHECK-NEXT:    vfadd.vf v8, v8, fa0
1699; CHECK-NEXT:    ret
1700  %elt.head = insertelement <vscale x 8 x float> poison, float %b, i32 0
1701  %vb = shufflevector <vscale x 8 x float> %elt.head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
1702  %v = call <vscale x 8 x float> @llvm.vp.fadd.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
1703  ret <vscale x 8 x float> %v
1704}
1705
1706declare <vscale x 16 x float> @llvm.vp.fadd.nxv16f32(<vscale x 16 x float>, <vscale x 16 x float>, <vscale x 16 x i1>, i32)
1707
1708define <vscale x 16 x float> @vfadd_vv_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
1709; CHECK-LABEL: vfadd_vv_nxv16f32:
1710; CHECK:       # %bb.0:
1711; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
1712; CHECK-NEXT:    vfadd.vv v8, v8, v16, v0.t
1713; CHECK-NEXT:    ret
1714  %v = call <vscale x 16 x float> @llvm.vp.fadd.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %b, <vscale x 16 x i1> %m, i32 %evl)
1715  ret <vscale x 16 x float> %v
1716}
1717
1718define <vscale x 16 x float> @vfadd_vv_nxv16f32_unmasked(<vscale x 16 x float> %va, <vscale x 16 x float> %b, i32 zeroext %evl) {
1719; CHECK-LABEL: vfadd_vv_nxv16f32_unmasked:
1720; CHECK:       # %bb.0:
1721; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
1722; CHECK-NEXT:    vfadd.vv v8, v8, v16
1723; CHECK-NEXT:    ret
1724  %v = call <vscale x 16 x float> @llvm.vp.fadd.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %b, <vscale x 16 x i1> splat (i1 true), i32 %evl)
1725  ret <vscale x 16 x float> %v
1726}
1727
1728define <vscale x 16 x float> @vfadd_vf_nxv16f32(<vscale x 16 x float> %va, float %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
1729; CHECK-LABEL: vfadd_vf_nxv16f32:
1730; CHECK:       # %bb.0:
1731; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
1732; CHECK-NEXT:    vfadd.vf v8, v8, fa0, v0.t
1733; CHECK-NEXT:    ret
1734  %elt.head = insertelement <vscale x 16 x float> poison, float %b, i32 0
1735  %vb = shufflevector <vscale x 16 x float> %elt.head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
1736  %v = call <vscale x 16 x float> @llvm.vp.fadd.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, <vscale x 16 x i1> %m, i32 %evl)
1737  ret <vscale x 16 x float> %v
1738}
1739
1740define <vscale x 16 x float> @vfadd_vf_nxv16f32_unmasked(<vscale x 16 x float> %va, float %b, i32 zeroext %evl) {
1741; CHECK-LABEL: vfadd_vf_nxv16f32_unmasked:
1742; CHECK:       # %bb.0:
1743; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
1744; CHECK-NEXT:    vfadd.vf v8, v8, fa0
1745; CHECK-NEXT:    ret
1746  %elt.head = insertelement <vscale x 16 x float> poison, float %b, i32 0
1747  %vb = shufflevector <vscale x 16 x float> %elt.head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
1748  %v = call <vscale x 16 x float> @llvm.vp.fadd.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl)
1749  ret <vscale x 16 x float> %v
1750}
1751
1752declare <vscale x 1 x double> @llvm.vp.fadd.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x i1>, i32)
1753
1754define <vscale x 1 x double> @vfadd_vv_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1755; CHECK-LABEL: vfadd_vv_nxv1f64:
1756; CHECK:       # %bb.0:
1757; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1758; CHECK-NEXT:    vfadd.vv v8, v8, v9, v0.t
1759; CHECK-NEXT:    ret
1760  %v = call <vscale x 1 x double> @llvm.vp.fadd.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %b, <vscale x 1 x i1> %m, i32 %evl)
1761  ret <vscale x 1 x double> %v
1762}
1763
1764define <vscale x 1 x double> @vfadd_vv_nxv1f64_unmasked(<vscale x 1 x double> %va, <vscale x 1 x double> %b, i32 zeroext %evl) {
1765; CHECK-LABEL: vfadd_vv_nxv1f64_unmasked:
1766; CHECK:       # %bb.0:
1767; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1768; CHECK-NEXT:    vfadd.vv v8, v8, v9
1769; CHECK-NEXT:    ret
1770  %v = call <vscale x 1 x double> @llvm.vp.fadd.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %b, <vscale x 1 x i1> splat (i1 true), i32 %evl)
1771  ret <vscale x 1 x double> %v
1772}
1773
1774define <vscale x 1 x double> @vfadd_vf_nxv1f64(<vscale x 1 x double> %va, double %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1775; CHECK-LABEL: vfadd_vf_nxv1f64:
1776; CHECK:       # %bb.0:
1777; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1778; CHECK-NEXT:    vfadd.vf v8, v8, fa0, v0.t
1779; CHECK-NEXT:    ret
1780  %elt.head = insertelement <vscale x 1 x double> poison, double %b, i32 0
1781  %vb = shufflevector <vscale x 1 x double> %elt.head, <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer
1782  %v = call <vscale x 1 x double> @llvm.vp.fadd.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x i1> %m, i32 %evl)
1783  ret <vscale x 1 x double> %v
1784}
1785
1786define <vscale x 1 x double> @vfadd_vf_nxv1f64_unmasked(<vscale x 1 x double> %va, double %b, i32 zeroext %evl) {
1787; CHECK-LABEL: vfadd_vf_nxv1f64_unmasked:
1788; CHECK:       # %bb.0:
1789; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1790; CHECK-NEXT:    vfadd.vf v8, v8, fa0
1791; CHECK-NEXT:    ret
1792  %elt.head = insertelement <vscale x 1 x double> poison, double %b, i32 0
1793  %vb = shufflevector <vscale x 1 x double> %elt.head, <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer
1794  %v = call <vscale x 1 x double> @llvm.vp.fadd.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl)
1795  ret <vscale x 1 x double> %v
1796}
1797
1798declare <vscale x 2 x double> @llvm.vp.fadd.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, i32)
1799
1800define <vscale x 2 x double> @vfadd_vv_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1801; CHECK-LABEL: vfadd_vv_nxv2f64:
1802; CHECK:       # %bb.0:
1803; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1804; CHECK-NEXT:    vfadd.vv v8, v8, v10, v0.t
1805; CHECK-NEXT:    ret
1806  %v = call <vscale x 2 x double> @llvm.vp.fadd.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %b, <vscale x 2 x i1> %m, i32 %evl)
1807  ret <vscale x 2 x double> %v
1808}
1809
1810define <vscale x 2 x double> @vfadd_vv_nxv2f64_unmasked(<vscale x 2 x double> %va, <vscale x 2 x double> %b, i32 zeroext %evl) {
1811; CHECK-LABEL: vfadd_vv_nxv2f64_unmasked:
1812; CHECK:       # %bb.0:
1813; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1814; CHECK-NEXT:    vfadd.vv v8, v8, v10
1815; CHECK-NEXT:    ret
1816  %v = call <vscale x 2 x double> @llvm.vp.fadd.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %b, <vscale x 2 x i1> splat (i1 true), i32 %evl)
1817  ret <vscale x 2 x double> %v
1818}
1819
1820define <vscale x 2 x double> @vfadd_vf_nxv2f64(<vscale x 2 x double> %va, double %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1821; CHECK-LABEL: vfadd_vf_nxv2f64:
1822; CHECK:       # %bb.0:
1823; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1824; CHECK-NEXT:    vfadd.vf v8, v8, fa0, v0.t
1825; CHECK-NEXT:    ret
1826  %elt.head = insertelement <vscale x 2 x double> poison, double %b, i32 0
1827  %vb = shufflevector <vscale x 2 x double> %elt.head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
1828  %v = call <vscale x 2 x double> @llvm.vp.fadd.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, <vscale x 2 x i1> %m, i32 %evl)
1829  ret <vscale x 2 x double> %v
1830}
1831
1832define <vscale x 2 x double> @vfadd_vf_nxv2f64_unmasked(<vscale x 2 x double> %va, double %b, i32 zeroext %evl) {
1833; CHECK-LABEL: vfadd_vf_nxv2f64_unmasked:
1834; CHECK:       # %bb.0:
1835; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1836; CHECK-NEXT:    vfadd.vf v8, v8, fa0
1837; CHECK-NEXT:    ret
1838  %elt.head = insertelement <vscale x 2 x double> poison, double %b, i32 0
1839  %vb = shufflevector <vscale x 2 x double> %elt.head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
1840  %v = call <vscale x 2 x double> @llvm.vp.fadd.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl)
1841  ret <vscale x 2 x double> %v
1842}
1843
1844declare <vscale x 4 x double> @llvm.vp.fadd.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>, <vscale x 4 x i1>, i32)
1845
1846define <vscale x 4 x double> @vfadd_vv_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1847; CHECK-LABEL: vfadd_vv_nxv4f64:
1848; CHECK:       # %bb.0:
1849; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1850; CHECK-NEXT:    vfadd.vv v8, v8, v12, v0.t
1851; CHECK-NEXT:    ret
1852  %v = call <vscale x 4 x double> @llvm.vp.fadd.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %b, <vscale x 4 x i1> %m, i32 %evl)
1853  ret <vscale x 4 x double> %v
1854}
1855
1856define <vscale x 4 x double> @vfadd_vv_nxv4f64_unmasked(<vscale x 4 x double> %va, <vscale x 4 x double> %b, i32 zeroext %evl) {
1857; CHECK-LABEL: vfadd_vv_nxv4f64_unmasked:
1858; CHECK:       # %bb.0:
1859; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1860; CHECK-NEXT:    vfadd.vv v8, v8, v12
1861; CHECK-NEXT:    ret
1862  %v = call <vscale x 4 x double> @llvm.vp.fadd.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %b, <vscale x 4 x i1> splat (i1 true), i32 %evl)
1863  ret <vscale x 4 x double> %v
1864}
1865
1866define <vscale x 4 x double> @vfadd_vf_nxv4f64(<vscale x 4 x double> %va, double %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1867; CHECK-LABEL: vfadd_vf_nxv4f64:
1868; CHECK:       # %bb.0:
1869; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1870; CHECK-NEXT:    vfadd.vf v8, v8, fa0, v0.t
1871; CHECK-NEXT:    ret
1872  %elt.head = insertelement <vscale x 4 x double> poison, double %b, i32 0
1873  %vb = shufflevector <vscale x 4 x double> %elt.head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
1874  %v = call <vscale x 4 x double> @llvm.vp.fadd.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, <vscale x 4 x i1> %m, i32 %evl)
1875  ret <vscale x 4 x double> %v
1876}
1877
1878define <vscale x 4 x double> @vfadd_vf_nxv4f64_unmasked(<vscale x 4 x double> %va, double %b, i32 zeroext %evl) {
1879; CHECK-LABEL: vfadd_vf_nxv4f64_unmasked:
1880; CHECK:       # %bb.0:
1881; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1882; CHECK-NEXT:    vfadd.vf v8, v8, fa0
1883; CHECK-NEXT:    ret
1884  %elt.head = insertelement <vscale x 4 x double> poison, double %b, i32 0
1885  %vb = shufflevector <vscale x 4 x double> %elt.head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
1886  %v = call <vscale x 4 x double> @llvm.vp.fadd.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
1887  ret <vscale x 4 x double> %v
1888}
1889
1890declare <vscale x 7 x double> @llvm.vp.fadd.nxv7f64(<vscale x 7 x double>, <vscale x 7 x double>, <vscale x 7 x i1>, i32)
1891
1892define <vscale x 7 x double> @vfadd_vv_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x double> %b, <vscale x 7 x i1> %m, i32 zeroext %evl) {
1893; CHECK-LABEL: vfadd_vv_nxv7f64:
1894; CHECK:       # %bb.0:
1895; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1896; CHECK-NEXT:    vfadd.vv v8, v8, v16, v0.t
1897; CHECK-NEXT:    ret
1898  %v = call <vscale x 7 x double> @llvm.vp.fadd.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x double> %b, <vscale x 7 x i1> %m, i32 %evl)
1899  ret <vscale x 7 x double> %v
1900}
1901
1902declare <vscale x 8 x double> @llvm.vp.fadd.nxv8f64(<vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x i1>, i32)
1903
1904define <vscale x 8 x double> @vfadd_vv_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1905; CHECK-LABEL: vfadd_vv_nxv8f64:
1906; CHECK:       # %bb.0:
1907; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1908; CHECK-NEXT:    vfadd.vv v8, v8, v16, v0.t
1909; CHECK-NEXT:    ret
1910  %v = call <vscale x 8 x double> @llvm.vp.fadd.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %b, <vscale x 8 x i1> %m, i32 %evl)
1911  ret <vscale x 8 x double> %v
1912}
1913
1914define <vscale x 8 x double> @vfadd_vv_nxv8f64_unmasked(<vscale x 8 x double> %va, <vscale x 8 x double> %b, i32 zeroext %evl) {
1915; CHECK-LABEL: vfadd_vv_nxv8f64_unmasked:
1916; CHECK:       # %bb.0:
1917; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1918; CHECK-NEXT:    vfadd.vv v8, v8, v16
1919; CHECK-NEXT:    ret
1920  %v = call <vscale x 8 x double> @llvm.vp.fadd.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %b, <vscale x 8 x i1> splat (i1 true), i32 %evl)
1921  ret <vscale x 8 x double> %v
1922}
1923
1924define <vscale x 8 x double> @vfadd_vf_nxv8f64(<vscale x 8 x double> %va, double %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1925; CHECK-LABEL: vfadd_vf_nxv8f64:
1926; CHECK:       # %bb.0:
1927; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1928; CHECK-NEXT:    vfadd.vf v8, v8, fa0, v0.t
1929; CHECK-NEXT:    ret
1930  %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
1931  %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
1932  %v = call <vscale x 8 x double> @llvm.vp.fadd.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x i1> %m, i32 %evl)
1933  ret <vscale x 8 x double> %v
1934}
1935
1936define <vscale x 8 x double> @vfadd_vf_nxv8f64_unmasked(<vscale x 8 x double> %va, double %b, i32 zeroext %evl) {
1937; CHECK-LABEL: vfadd_vf_nxv8f64_unmasked:
1938; CHECK:       # %bb.0:
1939; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1940; CHECK-NEXT:    vfadd.vf v8, v8, fa0
1941; CHECK-NEXT:    ret
1942  %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0
1943  %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
1944  %v = call <vscale x 8 x double> @llvm.vp.fadd.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
1945  ret <vscale x 8 x double> %v
1946}
1947