xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll (revision 8ce81f17a16b8b689895c7c093d0401a75c09882)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \
3; RUN:     -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
4; RUN:     --check-prefixes=CHECK,ZVFH
5; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \
6; RUN:     -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
7; RUN:     --check-prefixes=CHECK,ZVFH
8; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
9; RUN:     -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
10; RUN:     --check-prefixes=CHECK,ZVFHMIN
11; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
12; RUN:     -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
13; RUN:     --check-prefixes=CHECK,ZVFHMIN
14
15declare <vscale x 1 x bfloat> @llvm.vp.sqrt.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x i1>, i32)
16
17define <vscale x 1 x bfloat> @vfsqrt_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
18; CHECK-LABEL: vfsqrt_vv_nxv1bf16:
19; CHECK:       # %bb.0:
20; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
21; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8, v0.t
22; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
23; CHECK-NEXT:    vfsqrt.v v9, v9, v0.t
24; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
25; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9, v0.t
26; CHECK-NEXT:    ret
27  %v = call <vscale x 1 x bfloat> @llvm.vp.sqrt.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl)
28  ret <vscale x 1 x bfloat> %v
29}
30
31define <vscale x 1 x bfloat> @vfsqrt_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, i32 zeroext %evl) {
32; CHECK-LABEL: vfsqrt_vv_nxv1bf16_unmasked:
33; CHECK:       # %bb.0:
34; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
35; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
36; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
37; CHECK-NEXT:    vfsqrt.v v9, v9
38; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
39; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
40; CHECK-NEXT:    ret
41  %v = call <vscale x 1 x bfloat> @llvm.vp.sqrt.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
42  ret <vscale x 1 x bfloat> %v
43}
44
45declare <vscale x 2 x bfloat> @llvm.vp.sqrt.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x i1>, i32)
46
47define <vscale x 2 x bfloat> @vfsqrt_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
48; CHECK-LABEL: vfsqrt_vv_nxv2bf16:
49; CHECK:       # %bb.0:
50; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
51; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8, v0.t
52; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
53; CHECK-NEXT:    vfsqrt.v v9, v9, v0.t
54; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
55; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9, v0.t
56; CHECK-NEXT:    ret
57  %v = call <vscale x 2 x bfloat> @llvm.vp.sqrt.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl)
58  ret <vscale x 2 x bfloat> %v
59}
60
61define <vscale x 2 x bfloat> @vfsqrt_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, i32 zeroext %evl) {
62; CHECK-LABEL: vfsqrt_vv_nxv2bf16_unmasked:
63; CHECK:       # %bb.0:
64; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
65; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
66; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
67; CHECK-NEXT:    vfsqrt.v v9, v9
68; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
69; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
70; CHECK-NEXT:    ret
71  %v = call <vscale x 2 x bfloat> @llvm.vp.sqrt.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
72  ret <vscale x 2 x bfloat> %v
73}
74
75declare <vscale x 4 x bfloat> @llvm.vp.sqrt.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x i1>, i32)
76
77define <vscale x 4 x bfloat> @vfsqrt_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
78; CHECK-LABEL: vfsqrt_vv_nxv4bf16:
79; CHECK:       # %bb.0:
80; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
81; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8, v0.t
82; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
83; CHECK-NEXT:    vfsqrt.v v10, v10, v0.t
84; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
85; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10, v0.t
86; CHECK-NEXT:    ret
87  %v = call <vscale x 4 x bfloat> @llvm.vp.sqrt.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 %evl)
88  ret <vscale x 4 x bfloat> %v
89}
90
91define <vscale x 4 x bfloat> @vfsqrt_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, i32 zeroext %evl) {
92; CHECK-LABEL: vfsqrt_vv_nxv4bf16_unmasked:
93; CHECK:       # %bb.0:
94; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
95; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
96; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
97; CHECK-NEXT:    vfsqrt.v v10, v10
98; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
99; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
100; CHECK-NEXT:    ret
101  %v = call <vscale x 4 x bfloat> @llvm.vp.sqrt.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
102  ret <vscale x 4 x bfloat> %v
103}
104
105declare <vscale x 8 x bfloat> @llvm.vp.sqrt.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i32)
106
107define <vscale x 8 x bfloat> @vfsqrt_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
108; CHECK-LABEL: vfsqrt_vv_nxv8bf16:
109; CHECK:       # %bb.0:
110; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
111; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8, v0.t
112; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
113; CHECK-NEXT:    vfsqrt.v v12, v12, v0.t
114; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
115; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12, v0.t
116; CHECK-NEXT:    ret
117  %v = call <vscale x 8 x bfloat> @llvm.vp.sqrt.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 %evl)
118  ret <vscale x 8 x bfloat> %v
119}
120
121define <vscale x 8 x bfloat> @vfsqrt_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, i32 zeroext %evl) {
122; CHECK-LABEL: vfsqrt_vv_nxv8bf16_unmasked:
123; CHECK:       # %bb.0:
124; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
125; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
126; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
127; CHECK-NEXT:    vfsqrt.v v12, v12
128; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
129; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
130; CHECK-NEXT:    ret
131  %v = call <vscale x 8 x bfloat> @llvm.vp.sqrt.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
132  ret <vscale x 8 x bfloat> %v
133}
134
135declare <vscale x 16 x bfloat> @llvm.vp.sqrt.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x i1>, i32)
136
137define <vscale x 16 x bfloat> @vfsqrt_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
138; CHECK-LABEL: vfsqrt_vv_nxv16bf16:
139; CHECK:       # %bb.0:
140; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
141; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8, v0.t
142; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
143; CHECK-NEXT:    vfsqrt.v v16, v16, v0.t
144; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
145; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16, v0.t
146; CHECK-NEXT:    ret
147  %v = call <vscale x 16 x bfloat> @llvm.vp.sqrt.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 %evl)
148  ret <vscale x 16 x bfloat> %v
149}
150
151define <vscale x 16 x bfloat> @vfsqrt_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, i32 zeroext %evl) {
152; CHECK-LABEL: vfsqrt_vv_nxv16bf16_unmasked:
153; CHECK:       # %bb.0:
154; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
155; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
156; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
157; CHECK-NEXT:    vfsqrt.v v16, v16
158; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
159; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
160; CHECK-NEXT:    ret
161  %v = call <vscale x 16 x bfloat> @llvm.vp.sqrt.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
162  ret <vscale x 16 x bfloat> %v
163}
164
165declare <vscale x 32 x bfloat> @llvm.vp.sqrt.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x i1>, i32)
166
167define <vscale x 32 x bfloat> @vfsqrt_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
168; CHECK-LABEL: vfsqrt_vv_nxv32bf16:
169; CHECK:       # %bb.0:
170; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
171; CHECK-NEXT:    vmv1r.v v16, v0
172; CHECK-NEXT:    csrr a2, vlenb
173; CHECK-NEXT:    slli a1, a2, 1
174; CHECK-NEXT:    srli a2, a2, 2
175; CHECK-NEXT:    sub a3, a0, a1
176; CHECK-NEXT:    sltu a4, a0, a3
177; CHECK-NEXT:    addi a4, a4, -1
178; CHECK-NEXT:    vslidedown.vx v0, v0, a2
179; CHECK-NEXT:    and a3, a4, a3
180; CHECK-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
181; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12, v0.t
182; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
183; CHECK-NEXT:    vfsqrt.v v24, v24, v0.t
184; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
185; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v24, v0.t
186; CHECK-NEXT:    bltu a0, a1, .LBB10_2
187; CHECK-NEXT:  # %bb.1:
188; CHECK-NEXT:    mv a0, a1
189; CHECK-NEXT:  .LBB10_2:
190; CHECK-NEXT:    vmv1r.v v0, v16
191; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
192; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8, v0.t
193; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
194; CHECK-NEXT:    vfsqrt.v v24, v24, v0.t
195; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
196; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v24, v0.t
197; CHECK-NEXT:    ret
198  %v = call <vscale x 32 x bfloat> @llvm.vp.sqrt.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 %evl)
199  ret <vscale x 32 x bfloat> %v
200}
201
202define <vscale x 32 x bfloat> @vfsqrt_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) {
203; CHECK-LABEL: vfsqrt_vv_nxv32bf16_unmasked:
204; CHECK:       # %bb.0:
205; CHECK-NEXT:    csrr a2, vlenb
206; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
207; CHECK-NEXT:    vmset.m v16
208; CHECK-NEXT:    slli a1, a2, 1
209; CHECK-NEXT:    srli a2, a2, 2
210; CHECK-NEXT:    sub a3, a0, a1
211; CHECK-NEXT:    sltu a4, a0, a3
212; CHECK-NEXT:    addi a4, a4, -1
213; CHECK-NEXT:    vsetvli a5, zero, e8, mf2, ta, ma
214; CHECK-NEXT:    vslidedown.vx v0, v16, a2
215; CHECK-NEXT:    and a3, a4, a3
216; CHECK-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
217; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12, v0.t
218; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
219; CHECK-NEXT:    vfsqrt.v v16, v16, v0.t
220; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
221; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16, v0.t
222; CHECK-NEXT:    bltu a0, a1, .LBB11_2
223; CHECK-NEXT:  # %bb.1:
224; CHECK-NEXT:    mv a0, a1
225; CHECK-NEXT:  .LBB11_2:
226; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
227; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
228; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
229; CHECK-NEXT:    vfsqrt.v v16, v16
230; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
231; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
232; CHECK-NEXT:    ret
233  %v = call <vscale x 32 x bfloat> @llvm.vp.sqrt.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
234  ret <vscale x 32 x bfloat> %v
235}
236declare <vscale x 1 x half> @llvm.vp.sqrt.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
237
238define <vscale x 1 x half> @vfsqrt_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
239; ZVFH-LABEL: vfsqrt_vv_nxv1f16:
240; ZVFH:       # %bb.0:
241; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
242; ZVFH-NEXT:    vfsqrt.v v8, v8, v0.t
243; ZVFH-NEXT:    ret
244;
245; ZVFHMIN-LABEL: vfsqrt_vv_nxv1f16:
246; ZVFHMIN:       # %bb.0:
247; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
248; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
249; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
250; ZVFHMIN-NEXT:    vfsqrt.v v9, v9, v0.t
251; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
252; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
253; ZVFHMIN-NEXT:    ret
254  %v = call <vscale x 1 x half> @llvm.vp.sqrt.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
255  ret <vscale x 1 x half> %v
256}
257
258define <vscale x 1 x half> @vfsqrt_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) {
259; ZVFH-LABEL: vfsqrt_vv_nxv1f16_unmasked:
260; ZVFH:       # %bb.0:
261; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
262; ZVFH-NEXT:    vfsqrt.v v8, v8
263; ZVFH-NEXT:    ret
264;
265; ZVFHMIN-LABEL: vfsqrt_vv_nxv1f16_unmasked:
266; ZVFHMIN:       # %bb.0:
267; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
268; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
269; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
270; ZVFHMIN-NEXT:    vfsqrt.v v9, v9
271; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
272; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
273; ZVFHMIN-NEXT:    ret
274  %v = call <vscale x 1 x half> @llvm.vp.sqrt.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
275  ret <vscale x 1 x half> %v
276}
277
278declare <vscale x 2 x half> @llvm.vp.sqrt.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, i32)
279
280define <vscale x 2 x half> @vfsqrt_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
281; ZVFH-LABEL: vfsqrt_vv_nxv2f16:
282; ZVFH:       # %bb.0:
283; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
284; ZVFH-NEXT:    vfsqrt.v v8, v8, v0.t
285; ZVFH-NEXT:    ret
286;
287; ZVFHMIN-LABEL: vfsqrt_vv_nxv2f16:
288; ZVFHMIN:       # %bb.0:
289; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
290; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
291; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
292; ZVFHMIN-NEXT:    vfsqrt.v v9, v9, v0.t
293; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
294; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
295; ZVFHMIN-NEXT:    ret
296  %v = call <vscale x 2 x half> @llvm.vp.sqrt.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl)
297  ret <vscale x 2 x half> %v
298}
299
300define <vscale x 2 x half> @vfsqrt_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) {
301; ZVFH-LABEL: vfsqrt_vv_nxv2f16_unmasked:
302; ZVFH:       # %bb.0:
303; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
304; ZVFH-NEXT:    vfsqrt.v v8, v8
305; ZVFH-NEXT:    ret
306;
307; ZVFHMIN-LABEL: vfsqrt_vv_nxv2f16_unmasked:
308; ZVFHMIN:       # %bb.0:
309; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
310; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
311; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
312; ZVFHMIN-NEXT:    vfsqrt.v v9, v9
313; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
314; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
315; ZVFHMIN-NEXT:    ret
316  %v = call <vscale x 2 x half> @llvm.vp.sqrt.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
317  ret <vscale x 2 x half> %v
318}
319
320declare <vscale x 4 x half> @llvm.vp.sqrt.nxv4f16(<vscale x 4 x half>, <vscale x 4 x i1>, i32)
321
322define <vscale x 4 x half> @vfsqrt_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
323; ZVFH-LABEL: vfsqrt_vv_nxv4f16:
324; ZVFH:       # %bb.0:
325; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
326; ZVFH-NEXT:    vfsqrt.v v8, v8, v0.t
327; ZVFH-NEXT:    ret
328;
329; ZVFHMIN-LABEL: vfsqrt_vv_nxv4f16:
330; ZVFHMIN:       # %bb.0:
331; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
332; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
333; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
334; ZVFHMIN-NEXT:    vfsqrt.v v10, v10, v0.t
335; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
336; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
337; ZVFHMIN-NEXT:    ret
338  %v = call <vscale x 4 x half> @llvm.vp.sqrt.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl)
339  ret <vscale x 4 x half> %v
340}
341
342define <vscale x 4 x half> @vfsqrt_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) {
343; ZVFH-LABEL: vfsqrt_vv_nxv4f16_unmasked:
344; ZVFH:       # %bb.0:
345; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
346; ZVFH-NEXT:    vfsqrt.v v8, v8
347; ZVFH-NEXT:    ret
348;
349; ZVFHMIN-LABEL: vfsqrt_vv_nxv4f16_unmasked:
350; ZVFHMIN:       # %bb.0:
351; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
352; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
353; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
354; ZVFHMIN-NEXT:    vfsqrt.v v10, v10
355; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
356; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
357; ZVFHMIN-NEXT:    ret
358  %v = call <vscale x 4 x half> @llvm.vp.sqrt.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
359  ret <vscale x 4 x half> %v
360}
361
362declare <vscale x 8 x half> @llvm.vp.sqrt.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i32)
363
364define <vscale x 8 x half> @vfsqrt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
365; ZVFH-LABEL: vfsqrt_vv_nxv8f16:
366; ZVFH:       # %bb.0:
367; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
368; ZVFH-NEXT:    vfsqrt.v v8, v8, v0.t
369; ZVFH-NEXT:    ret
370;
371; ZVFHMIN-LABEL: vfsqrt_vv_nxv8f16:
372; ZVFHMIN:       # %bb.0:
373; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
374; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
375; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
376; ZVFHMIN-NEXT:    vfsqrt.v v12, v12, v0.t
377; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
378; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
379; ZVFHMIN-NEXT:    ret
380  %v = call <vscale x 8 x half> @llvm.vp.sqrt.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl)
381  ret <vscale x 8 x half> %v
382}
383
384define <vscale x 8 x half> @vfsqrt_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) {
385; ZVFH-LABEL: vfsqrt_vv_nxv8f16_unmasked:
386; ZVFH:       # %bb.0:
387; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
388; ZVFH-NEXT:    vfsqrt.v v8, v8
389; ZVFH-NEXT:    ret
390;
391; ZVFHMIN-LABEL: vfsqrt_vv_nxv8f16_unmasked:
392; ZVFHMIN:       # %bb.0:
393; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
394; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
395; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
396; ZVFHMIN-NEXT:    vfsqrt.v v12, v12
397; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
398; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
399; ZVFHMIN-NEXT:    ret
400  %v = call <vscale x 8 x half> @llvm.vp.sqrt.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
401  ret <vscale x 8 x half> %v
402}
403
404declare <vscale x 16 x half> @llvm.vp.sqrt.nxv16f16(<vscale x 16 x half>, <vscale x 16 x i1>, i32)
405
406define <vscale x 16 x half> @vfsqrt_vv_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
407; ZVFH-LABEL: vfsqrt_vv_nxv16f16:
408; ZVFH:       # %bb.0:
409; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
410; ZVFH-NEXT:    vfsqrt.v v8, v8, v0.t
411; ZVFH-NEXT:    ret
412;
413; ZVFHMIN-LABEL: vfsqrt_vv_nxv16f16:
414; ZVFHMIN:       # %bb.0:
415; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
416; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
417; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
418; ZVFHMIN-NEXT:    vfsqrt.v v16, v16, v0.t
419; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
420; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16, v0.t
421; ZVFHMIN-NEXT:    ret
422  %v = call <vscale x 16 x half> @llvm.vp.sqrt.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl)
423  ret <vscale x 16 x half> %v
424}
425
426define <vscale x 16 x half> @vfsqrt_vv_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) {
427; ZVFH-LABEL: vfsqrt_vv_nxv16f16_unmasked:
428; ZVFH:       # %bb.0:
429; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
430; ZVFH-NEXT:    vfsqrt.v v8, v8
431; ZVFH-NEXT:    ret
432;
433; ZVFHMIN-LABEL: vfsqrt_vv_nxv16f16_unmasked:
434; ZVFHMIN:       # %bb.0:
435; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
436; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
437; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
438; ZVFHMIN-NEXT:    vfsqrt.v v16, v16
439; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
440; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
441; ZVFHMIN-NEXT:    ret
442  %v = call <vscale x 16 x half> @llvm.vp.sqrt.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
443  ret <vscale x 16 x half> %v
444}
445
446declare <vscale x 32 x half> @llvm.vp.sqrt.nxv32f16(<vscale x 32 x half>, <vscale x 32 x i1>, i32)
447
448define <vscale x 32 x half> @vfsqrt_vv_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
449; ZVFH-LABEL: vfsqrt_vv_nxv32f16:
450; ZVFH:       # %bb.0:
451; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
452; ZVFH-NEXT:    vfsqrt.v v8, v8, v0.t
453; ZVFH-NEXT:    ret
454;
455; ZVFHMIN-LABEL: vfsqrt_vv_nxv32f16:
456; ZVFHMIN:       # %bb.0:
457; ZVFHMIN-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
458; ZVFHMIN-NEXT:    vmv1r.v v16, v0
459; ZVFHMIN-NEXT:    csrr a2, vlenb
460; ZVFHMIN-NEXT:    slli a1, a2, 1
461; ZVFHMIN-NEXT:    srli a2, a2, 2
462; ZVFHMIN-NEXT:    sub a3, a0, a1
463; ZVFHMIN-NEXT:    sltu a4, a0, a3
464; ZVFHMIN-NEXT:    addi a4, a4, -1
465; ZVFHMIN-NEXT:    vslidedown.vx v0, v0, a2
466; ZVFHMIN-NEXT:    and a3, a4, a3
467; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
468; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
469; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
470; ZVFHMIN-NEXT:    vfsqrt.v v24, v24, v0.t
471; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
472; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v24, v0.t
473; ZVFHMIN-NEXT:    bltu a0, a1, .LBB22_2
474; ZVFHMIN-NEXT:  # %bb.1:
475; ZVFHMIN-NEXT:    mv a0, a1
476; ZVFHMIN-NEXT:  .LBB22_2:
477; ZVFHMIN-NEXT:    vmv1r.v v0, v16
478; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
479; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
480; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
481; ZVFHMIN-NEXT:    vfsqrt.v v24, v24, v0.t
482; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
483; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24, v0.t
484; ZVFHMIN-NEXT:    ret
485  %v = call <vscale x 32 x half> @llvm.vp.sqrt.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 %evl)
486  ret <vscale x 32 x half> %v
487}
488
489define <vscale x 32 x half> @vfsqrt_vv_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) {
490; ZVFH-LABEL: vfsqrt_vv_nxv32f16_unmasked:
491; ZVFH:       # %bb.0:
492; ZVFH-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
493; ZVFH-NEXT:    vfsqrt.v v8, v8
494; ZVFH-NEXT:    ret
495;
496; ZVFHMIN-LABEL: vfsqrt_vv_nxv32f16_unmasked:
497; ZVFHMIN:       # %bb.0:
498; ZVFHMIN-NEXT:    csrr a2, vlenb
499; ZVFHMIN-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
500; ZVFHMIN-NEXT:    vmset.m v16
501; ZVFHMIN-NEXT:    slli a1, a2, 1
502; ZVFHMIN-NEXT:    srli a2, a2, 2
503; ZVFHMIN-NEXT:    sub a3, a0, a1
504; ZVFHMIN-NEXT:    sltu a4, a0, a3
505; ZVFHMIN-NEXT:    addi a4, a4, -1
506; ZVFHMIN-NEXT:    vsetvli a5, zero, e8, mf2, ta, ma
507; ZVFHMIN-NEXT:    vslidedown.vx v0, v16, a2
508; ZVFHMIN-NEXT:    and a3, a4, a3
509; ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
510; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12, v0.t
511; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
512; ZVFHMIN-NEXT:    vfsqrt.v v16, v16, v0.t
513; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
514; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16, v0.t
515; ZVFHMIN-NEXT:    bltu a0, a1, .LBB23_2
516; ZVFHMIN-NEXT:  # %bb.1:
517; ZVFHMIN-NEXT:    mv a0, a1
518; ZVFHMIN-NEXT:  .LBB23_2:
519; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
520; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
521; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
522; ZVFHMIN-NEXT:    vfsqrt.v v16, v16
523; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
524; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
525; ZVFHMIN-NEXT:    ret
526  %v = call <vscale x 32 x half> @llvm.vp.sqrt.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
527  ret <vscale x 32 x half> %v
528}
529
530declare <vscale x 1 x float> @llvm.vp.sqrt.nxv1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
531
532define <vscale x 1 x float> @vfsqrt_vv_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
533; CHECK-LABEL: vfsqrt_vv_nxv1f32:
534; CHECK:       # %bb.0:
535; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
536; CHECK-NEXT:    vfsqrt.v v8, v8, v0.t
537; CHECK-NEXT:    ret
538  %v = call <vscale x 1 x float> @llvm.vp.sqrt.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 %evl)
539  ret <vscale x 1 x float> %v
540}
541
542define <vscale x 1 x float> @vfsqrt_vv_nxv1f32_unmasked(<vscale x 1 x float> %va, i32 zeroext %evl) {
543; CHECK-LABEL: vfsqrt_vv_nxv1f32_unmasked:
544; CHECK:       # %bb.0:
545; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
546; CHECK-NEXT:    vfsqrt.v v8, v8
547; CHECK-NEXT:    ret
548  %v = call <vscale x 1 x float> @llvm.vp.sqrt.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
549  ret <vscale x 1 x float> %v
550}
551
552declare <vscale x 2 x float> @llvm.vp.sqrt.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
553
554define <vscale x 2 x float> @vfsqrt_vv_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
555; CHECK-LABEL: vfsqrt_vv_nxv2f32:
556; CHECK:       # %bb.0:
557; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
558; CHECK-NEXT:    vfsqrt.v v8, v8, v0.t
559; CHECK-NEXT:    ret
560  %v = call <vscale x 2 x float> @llvm.vp.sqrt.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl)
561  ret <vscale x 2 x float> %v
562}
563
564define <vscale x 2 x float> @vfsqrt_vv_nxv2f32_unmasked(<vscale x 2 x float> %va, i32 zeroext %evl) {
565; CHECK-LABEL: vfsqrt_vv_nxv2f32_unmasked:
566; CHECK:       # %bb.0:
567; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
568; CHECK-NEXT:    vfsqrt.v v8, v8
569; CHECK-NEXT:    ret
570  %v = call <vscale x 2 x float> @llvm.vp.sqrt.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
571  ret <vscale x 2 x float> %v
572}
573
574declare <vscale x 4 x float> @llvm.vp.sqrt.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
575
576define <vscale x 4 x float> @vfsqrt_vv_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
577; CHECK-LABEL: vfsqrt_vv_nxv4f32:
578; CHECK:       # %bb.0:
579; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
580; CHECK-NEXT:    vfsqrt.v v8, v8, v0.t
581; CHECK-NEXT:    ret
582  %v = call <vscale x 4 x float> @llvm.vp.sqrt.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
583  ret <vscale x 4 x float> %v
584}
585
586define <vscale x 4 x float> @vfsqrt_vv_nxv4f32_unmasked(<vscale x 4 x float> %va, i32 zeroext %evl) {
587; CHECK-LABEL: vfsqrt_vv_nxv4f32_unmasked:
588; CHECK:       # %bb.0:
589; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
590; CHECK-NEXT:    vfsqrt.v v8, v8
591; CHECK-NEXT:    ret
592  %v = call <vscale x 4 x float> @llvm.vp.sqrt.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
593  ret <vscale x 4 x float> %v
594}
595
596declare <vscale x 8 x float> @llvm.vp.sqrt.nxv8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
597
598define <vscale x 8 x float> @vfsqrt_vv_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
599; CHECK-LABEL: vfsqrt_vv_nxv8f32:
600; CHECK:       # %bb.0:
601; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
602; CHECK-NEXT:    vfsqrt.v v8, v8, v0.t
603; CHECK-NEXT:    ret
604  %v = call <vscale x 8 x float> @llvm.vp.sqrt.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
605  ret <vscale x 8 x float> %v
606}
607
608define <vscale x 8 x float> @vfsqrt_vv_nxv8f32_unmasked(<vscale x 8 x float> %va, i32 zeroext %evl) {
609; CHECK-LABEL: vfsqrt_vv_nxv8f32_unmasked:
610; CHECK:       # %bb.0:
611; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
612; CHECK-NEXT:    vfsqrt.v v8, v8
613; CHECK-NEXT:    ret
614  %v = call <vscale x 8 x float> @llvm.vp.sqrt.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
615  ret <vscale x 8 x float> %v
616}
617
618declare <vscale x 16 x float> @llvm.vp.sqrt.nxv16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32)
619
620define <vscale x 16 x float> @vfsqrt_vv_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
621; CHECK-LABEL: vfsqrt_vv_nxv16f32:
622; CHECK:       # %bb.0:
623; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
624; CHECK-NEXT:    vfsqrt.v v8, v8, v0.t
625; CHECK-NEXT:    ret
626  %v = call <vscale x 16 x float> @llvm.vp.sqrt.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
627  ret <vscale x 16 x float> %v
628}
629
630define <vscale x 16 x float> @vfsqrt_vv_nxv16f32_unmasked(<vscale x 16 x float> %va, i32 zeroext %evl) {
631; CHECK-LABEL: vfsqrt_vv_nxv16f32_unmasked:
632; CHECK:       # %bb.0:
633; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
634; CHECK-NEXT:    vfsqrt.v v8, v8
635; CHECK-NEXT:    ret
636  %v = call <vscale x 16 x float> @llvm.vp.sqrt.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
637  ret <vscale x 16 x float> %v
638}
639
640declare <vscale x 1 x double> @llvm.vp.sqrt.nxv1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
641
642define <vscale x 1 x double> @vfsqrt_vv_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
643; CHECK-LABEL: vfsqrt_vv_nxv1f64:
644; CHECK:       # %bb.0:
645; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
646; CHECK-NEXT:    vfsqrt.v v8, v8, v0.t
647; CHECK-NEXT:    ret
648  %v = call <vscale x 1 x double> @llvm.vp.sqrt.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl)
649  ret <vscale x 1 x double> %v
650}
651
652define <vscale x 1 x double> @vfsqrt_vv_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) {
653; CHECK-LABEL: vfsqrt_vv_nxv1f64_unmasked:
654; CHECK:       # %bb.0:
655; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
656; CHECK-NEXT:    vfsqrt.v v8, v8
657; CHECK-NEXT:    ret
658  %v = call <vscale x 1 x double> @llvm.vp.sqrt.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
659  ret <vscale x 1 x double> %v
660}
661
662declare <vscale x 2 x double> @llvm.vp.sqrt.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
663
664define <vscale x 2 x double> @vfsqrt_vv_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
665; CHECK-LABEL: vfsqrt_vv_nxv2f64:
666; CHECK:       # %bb.0:
667; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
668; CHECK-NEXT:    vfsqrt.v v8, v8, v0.t
669; CHECK-NEXT:    ret
670  %v = call <vscale x 2 x double> @llvm.vp.sqrt.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
671  ret <vscale x 2 x double> %v
672}
673
674define <vscale x 2 x double> @vfsqrt_vv_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) {
675; CHECK-LABEL: vfsqrt_vv_nxv2f64_unmasked:
676; CHECK:       # %bb.0:
677; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
678; CHECK-NEXT:    vfsqrt.v v8, v8
679; CHECK-NEXT:    ret
680  %v = call <vscale x 2 x double> @llvm.vp.sqrt.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
681  ret <vscale x 2 x double> %v
682}
683
684declare <vscale x 4 x double> @llvm.vp.sqrt.nxv4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
685
686define <vscale x 4 x double> @vfsqrt_vv_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
687; CHECK-LABEL: vfsqrt_vv_nxv4f64:
688; CHECK:       # %bb.0:
689; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
690; CHECK-NEXT:    vfsqrt.v v8, v8, v0.t
691; CHECK-NEXT:    ret
692  %v = call <vscale x 4 x double> @llvm.vp.sqrt.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
693  ret <vscale x 4 x double> %v
694}
695
696define <vscale x 4 x double> @vfsqrt_vv_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) {
697; CHECK-LABEL: vfsqrt_vv_nxv4f64_unmasked:
698; CHECK:       # %bb.0:
699; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
700; CHECK-NEXT:    vfsqrt.v v8, v8
701; CHECK-NEXT:    ret
702  %v = call <vscale x 4 x double> @llvm.vp.sqrt.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
703  ret <vscale x 4 x double> %v
704}
705
706declare <vscale x 7 x double> @llvm.vp.sqrt.nxv7f64(<vscale x 7 x double>, <vscale x 7 x i1>, i32)
707
708define <vscale x 7 x double> @vfsqrt_vv_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
709; CHECK-LABEL: vfsqrt_vv_nxv7f64:
710; CHECK:       # %bb.0:
711; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
712; CHECK-NEXT:    vfsqrt.v v8, v8, v0.t
713; CHECK-NEXT:    ret
714  %v = call <vscale x 7 x double> @llvm.vp.sqrt.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
715  ret <vscale x 7 x double> %v
716}
717
718define <vscale x 7 x double> @vfsqrt_vv_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) {
719; CHECK-LABEL: vfsqrt_vv_nxv7f64_unmasked:
720; CHECK:       # %bb.0:
721; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
722; CHECK-NEXT:    vfsqrt.v v8, v8
723; CHECK-NEXT:    ret
724  %v = call <vscale x 7 x double> @llvm.vp.sqrt.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> splat (i1 true), i32 %evl)
725  ret <vscale x 7 x double> %v
726}
727
728declare <vscale x 8 x double> @llvm.vp.sqrt.nxv8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
729
730define <vscale x 8 x double> @vfsqrt_vv_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
731; CHECK-LABEL: vfsqrt_vv_nxv8f64:
732; CHECK:       # %bb.0:
733; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
734; CHECK-NEXT:    vfsqrt.v v8, v8, v0.t
735; CHECK-NEXT:    ret
736  %v = call <vscale x 8 x double> @llvm.vp.sqrt.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
737  ret <vscale x 8 x double> %v
738}
739
740define <vscale x 8 x double> @vfsqrt_vv_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) {
741; CHECK-LABEL: vfsqrt_vv_nxv8f64_unmasked:
742; CHECK:       # %bb.0:
743; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
744; CHECK-NEXT:    vfsqrt.v v8, v8
745; CHECK-NEXT:    ret
746  %v = call <vscale x 8 x double> @llvm.vp.sqrt.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
747  ret <vscale x 8 x double> %v
748}
749
750; Test splitting.
751declare <vscale x 16 x double> @llvm.vp.sqrt.nxv16f64(<vscale x 16 x double>, <vscale x 16 x i1>, i32)
752
753define <vscale x 16 x double> @vfsqrt_vv_nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
754; CHECK-LABEL: vfsqrt_vv_nxv16f64:
755; CHECK:       # %bb.0:
756; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
757; CHECK-NEXT:    vmv1r.v v24, v0
758; CHECK-NEXT:    csrr a1, vlenb
759; CHECK-NEXT:    srli a2, a1, 3
760; CHECK-NEXT:    sub a3, a0, a1
761; CHECK-NEXT:    vslidedown.vx v0, v0, a2
762; CHECK-NEXT:    sltu a2, a0, a3
763; CHECK-NEXT:    addi a2, a2, -1
764; CHECK-NEXT:    and a2, a2, a3
765; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
766; CHECK-NEXT:    vfsqrt.v v16, v16, v0.t
767; CHECK-NEXT:    bltu a0, a1, .LBB44_2
768; CHECK-NEXT:  # %bb.1:
769; CHECK-NEXT:    mv a0, a1
770; CHECK-NEXT:  .LBB44_2:
771; CHECK-NEXT:    vmv1r.v v0, v24
772; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
773; CHECK-NEXT:    vfsqrt.v v8, v8, v0.t
774; CHECK-NEXT:    ret
775  %v = call <vscale x 16 x double> @llvm.vp.sqrt.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl)
776  ret <vscale x 16 x double> %v
777}
778
779define <vscale x 16 x double> @vfsqrt_vv_nxv16f64_unmasked(<vscale x 16 x double> %va, i32 zeroext %evl) {
780; CHECK-LABEL: vfsqrt_vv_nxv16f64_unmasked:
781; CHECK:       # %bb.0:
782; CHECK-NEXT:    csrr a1, vlenb
783; CHECK-NEXT:    sub a2, a0, a1
784; CHECK-NEXT:    sltu a3, a0, a2
785; CHECK-NEXT:    addi a3, a3, -1
786; CHECK-NEXT:    and a2, a3, a2
787; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
788; CHECK-NEXT:    vfsqrt.v v16, v16
789; CHECK-NEXT:    bltu a0, a1, .LBB45_2
790; CHECK-NEXT:  # %bb.1:
791; CHECK-NEXT:    mv a0, a1
792; CHECK-NEXT:  .LBB45_2:
793; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
794; CHECK-NEXT:    vfsqrt.v v8, v8
795; CHECK-NEXT:    ret
796  %v = call <vscale x 16 x double> @llvm.vp.sqrt.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
797  ret <vscale x 16 x double> %v
798}
799