xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll (revision 8ce81f17a16b8b689895c7c093d0401a75c09882)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \
3; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
4; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \
5; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
6; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -target-abi=ilp32d \
7; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
8; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -target-abi=lp64d \
9; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
10
11declare <2 x half> @llvm.vp.sqrt.v2f16(<2 x half>, <2 x i1>, i32)
12
13define <2 x half> @vfsqrt_vv_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) {
14; ZVFH-LABEL: vfsqrt_vv_v2f16:
15; ZVFH:       # %bb.0:
16; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
17; ZVFH-NEXT:    vfsqrt.v v8, v8, v0.t
18; ZVFH-NEXT:    ret
19;
20; ZVFHMIN-LABEL: vfsqrt_vv_v2f16:
21; ZVFHMIN:       # %bb.0:
22; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
23; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
24; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
25; ZVFHMIN-NEXT:    vfsqrt.v v9, v9, v0.t
26; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
27; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
28; ZVFHMIN-NEXT:    ret
29  %v = call <2 x half> @llvm.vp.sqrt.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl)
30  ret <2 x half> %v
31}
32
33define <2 x half> @vfsqrt_vv_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) {
34; ZVFH-LABEL: vfsqrt_vv_v2f16_unmasked:
35; ZVFH:       # %bb.0:
36; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
37; ZVFH-NEXT:    vfsqrt.v v8, v8
38; ZVFH-NEXT:    ret
39;
40; ZVFHMIN-LABEL: vfsqrt_vv_v2f16_unmasked:
41; ZVFHMIN:       # %bb.0:
42; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
43; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
44; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
45; ZVFHMIN-NEXT:    vfsqrt.v v9, v9
46; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
47; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
48; ZVFHMIN-NEXT:    ret
49  %v = call <2 x half> @llvm.vp.sqrt.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl)
50  ret <2 x half> %v
51}
52
53declare <4 x half> @llvm.vp.sqrt.v4f16(<4 x half>, <4 x i1>, i32)
54
55define <4 x half> @vfsqrt_vv_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) {
56; ZVFH-LABEL: vfsqrt_vv_v4f16:
57; ZVFH:       # %bb.0:
58; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
59; ZVFH-NEXT:    vfsqrt.v v8, v8, v0.t
60; ZVFH-NEXT:    ret
61;
62; ZVFHMIN-LABEL: vfsqrt_vv_v4f16:
63; ZVFHMIN:       # %bb.0:
64; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
65; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
66; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
67; ZVFHMIN-NEXT:    vfsqrt.v v9, v9, v0.t
68; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
69; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9, v0.t
70; ZVFHMIN-NEXT:    ret
71  %v = call <4 x half> @llvm.vp.sqrt.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl)
72  ret <4 x half> %v
73}
74
75define <4 x half> @vfsqrt_vv_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) {
76; ZVFH-LABEL: vfsqrt_vv_v4f16_unmasked:
77; ZVFH:       # %bb.0:
78; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
79; ZVFH-NEXT:    vfsqrt.v v8, v8
80; ZVFH-NEXT:    ret
81;
82; ZVFHMIN-LABEL: vfsqrt_vv_v4f16_unmasked:
83; ZVFHMIN:       # %bb.0:
84; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
85; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
86; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
87; ZVFHMIN-NEXT:    vfsqrt.v v9, v9
88; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
89; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
90; ZVFHMIN-NEXT:    ret
91  %v = call <4 x half> @llvm.vp.sqrt.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl)
92  ret <4 x half> %v
93}
94
95declare <8 x half> @llvm.vp.sqrt.v8f16(<8 x half>, <8 x i1>, i32)
96
97define <8 x half> @vfsqrt_vv_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) {
98; ZVFH-LABEL: vfsqrt_vv_v8f16:
99; ZVFH:       # %bb.0:
100; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
101; ZVFH-NEXT:    vfsqrt.v v8, v8, v0.t
102; ZVFH-NEXT:    ret
103;
104; ZVFHMIN-LABEL: vfsqrt_vv_v8f16:
105; ZVFHMIN:       # %bb.0:
106; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
107; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
108; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
109; ZVFHMIN-NEXT:    vfsqrt.v v10, v10, v0.t
110; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
111; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10, v0.t
112; ZVFHMIN-NEXT:    ret
113  %v = call <8 x half> @llvm.vp.sqrt.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl)
114  ret <8 x half> %v
115}
116
117define <8 x half> @vfsqrt_vv_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) {
118; ZVFH-LABEL: vfsqrt_vv_v8f16_unmasked:
119; ZVFH:       # %bb.0:
120; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
121; ZVFH-NEXT:    vfsqrt.v v8, v8
122; ZVFH-NEXT:    ret
123;
124; ZVFHMIN-LABEL: vfsqrt_vv_v8f16_unmasked:
125; ZVFHMIN:       # %bb.0:
126; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
127; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
128; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
129; ZVFHMIN-NEXT:    vfsqrt.v v10, v10
130; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
131; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
132; ZVFHMIN-NEXT:    ret
133  %v = call <8 x half> @llvm.vp.sqrt.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl)
134  ret <8 x half> %v
135}
136
137declare <16 x half> @llvm.vp.sqrt.v16f16(<16 x half>, <16 x i1>, i32)
138
139define <16 x half> @vfsqrt_vv_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
140; ZVFH-LABEL: vfsqrt_vv_v16f16:
141; ZVFH:       # %bb.0:
142; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
143; ZVFH-NEXT:    vfsqrt.v v8, v8, v0.t
144; ZVFH-NEXT:    ret
145;
146; ZVFHMIN-LABEL: vfsqrt_vv_v16f16:
147; ZVFHMIN:       # %bb.0:
148; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
149; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
150; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
151; ZVFHMIN-NEXT:    vfsqrt.v v12, v12, v0.t
152; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
153; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12, v0.t
154; ZVFHMIN-NEXT:    ret
155  %v = call <16 x half> @llvm.vp.sqrt.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl)
156  ret <16 x half> %v
157}
158
159define <16 x half> @vfsqrt_vv_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) {
160; ZVFH-LABEL: vfsqrt_vv_v16f16_unmasked:
161; ZVFH:       # %bb.0:
162; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
163; ZVFH-NEXT:    vfsqrt.v v8, v8
164; ZVFH-NEXT:    ret
165;
166; ZVFHMIN-LABEL: vfsqrt_vv_v16f16_unmasked:
167; ZVFHMIN:       # %bb.0:
168; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
169; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
170; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
171; ZVFHMIN-NEXT:    vfsqrt.v v12, v12
172; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
173; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
174; ZVFHMIN-NEXT:    ret
175  %v = call <16 x half> @llvm.vp.sqrt.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl)
176  ret <16 x half> %v
177}
178
179declare <2 x float> @llvm.vp.sqrt.v2f32(<2 x float>, <2 x i1>, i32)
180
181define <2 x float> @vfsqrt_vv_v2f32(<2 x float> %va, <2 x i1> %m, i32 zeroext %evl) {
182; CHECK-LABEL: vfsqrt_vv_v2f32:
183; CHECK:       # %bb.0:
184; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
185; CHECK-NEXT:    vfsqrt.v v8, v8, v0.t
186; CHECK-NEXT:    ret
187  %v = call <2 x float> @llvm.vp.sqrt.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl)
188  ret <2 x float> %v
189}
190
191define <2 x float> @vfsqrt_vv_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) {
192; CHECK-LABEL: vfsqrt_vv_v2f32_unmasked:
193; CHECK:       # %bb.0:
194; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
195; CHECK-NEXT:    vfsqrt.v v8, v8
196; CHECK-NEXT:    ret
197  %v = call <2 x float> @llvm.vp.sqrt.v2f32(<2 x float> %va, <2 x i1> splat (i1 true), i32 %evl)
198  ret <2 x float> %v
199}
200
201declare <4 x float> @llvm.vp.sqrt.v4f32(<4 x float>, <4 x i1>, i32)
202
203define <4 x float> @vfsqrt_vv_v4f32(<4 x float> %va, <4 x i1> %m, i32 zeroext %evl) {
204; CHECK-LABEL: vfsqrt_vv_v4f32:
205; CHECK:       # %bb.0:
206; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
207; CHECK-NEXT:    vfsqrt.v v8, v8, v0.t
208; CHECK-NEXT:    ret
209  %v = call <4 x float> @llvm.vp.sqrt.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl)
210  ret <4 x float> %v
211}
212
213define <4 x float> @vfsqrt_vv_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) {
214; CHECK-LABEL: vfsqrt_vv_v4f32_unmasked:
215; CHECK:       # %bb.0:
216; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
217; CHECK-NEXT:    vfsqrt.v v8, v8
218; CHECK-NEXT:    ret
219  %v = call <4 x float> @llvm.vp.sqrt.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl)
220  ret <4 x float> %v
221}
222
223declare <8 x float> @llvm.vp.sqrt.v8f32(<8 x float>, <8 x i1>, i32)
224
225define <8 x float> @vfsqrt_vv_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) {
226; CHECK-LABEL: vfsqrt_vv_v8f32:
227; CHECK:       # %bb.0:
228; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
229; CHECK-NEXT:    vfsqrt.v v8, v8, v0.t
230; CHECK-NEXT:    ret
231  %v = call <8 x float> @llvm.vp.sqrt.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
232  ret <8 x float> %v
233}
234
235define <8 x float> @vfsqrt_vv_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) {
236; CHECK-LABEL: vfsqrt_vv_v8f32_unmasked:
237; CHECK:       # %bb.0:
238; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
239; CHECK-NEXT:    vfsqrt.v v8, v8
240; CHECK-NEXT:    ret
241  %v = call <8 x float> @llvm.vp.sqrt.v8f32(<8 x float> %va, <8 x i1> splat (i1 true), i32 %evl)
242  ret <8 x float> %v
243}
244
245declare <16 x float> @llvm.vp.sqrt.v16f32(<16 x float>, <16 x i1>, i32)
246
247define <16 x float> @vfsqrt_vv_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) {
248; CHECK-LABEL: vfsqrt_vv_v16f32:
249; CHECK:       # %bb.0:
250; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
251; CHECK-NEXT:    vfsqrt.v v8, v8, v0.t
252; CHECK-NEXT:    ret
253  %v = call <16 x float> @llvm.vp.sqrt.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
254  ret <16 x float> %v
255}
256
257define <16 x float> @vfsqrt_vv_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl) {
258; CHECK-LABEL: vfsqrt_vv_v16f32_unmasked:
259; CHECK:       # %bb.0:
260; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
261; CHECK-NEXT:    vfsqrt.v v8, v8
262; CHECK-NEXT:    ret
263  %v = call <16 x float> @llvm.vp.sqrt.v16f32(<16 x float> %va, <16 x i1> splat (i1 true), i32 %evl)
264  ret <16 x float> %v
265}
266
267declare <2 x double> @llvm.vp.sqrt.v2f64(<2 x double>, <2 x i1>, i32)
268
269define <2 x double> @vfsqrt_vv_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext %evl) {
270; CHECK-LABEL: vfsqrt_vv_v2f64:
271; CHECK:       # %bb.0:
272; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
273; CHECK-NEXT:    vfsqrt.v v8, v8, v0.t
274; CHECK-NEXT:    ret
275  %v = call <2 x double> @llvm.vp.sqrt.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl)
276  ret <2 x double> %v
277}
278
279define <2 x double> @vfsqrt_vv_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) {
280; CHECK-LABEL: vfsqrt_vv_v2f64_unmasked:
281; CHECK:       # %bb.0:
282; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
283; CHECK-NEXT:    vfsqrt.v v8, v8
284; CHECK-NEXT:    ret
285  %v = call <2 x double> @llvm.vp.sqrt.v2f64(<2 x double> %va, <2 x i1> splat (i1 true), i32 %evl)
286  ret <2 x double> %v
287}
288
289declare <4 x double> @llvm.vp.sqrt.v4f64(<4 x double>, <4 x i1>, i32)
290
291define <4 x double> @vfsqrt_vv_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
292; CHECK-LABEL: vfsqrt_vv_v4f64:
293; CHECK:       # %bb.0:
294; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
295; CHECK-NEXT:    vfsqrt.v v8, v8, v0.t
296; CHECK-NEXT:    ret
297  %v = call <4 x double> @llvm.vp.sqrt.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
298  ret <4 x double> %v
299}
300
301define <4 x double> @vfsqrt_vv_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) {
302; CHECK-LABEL: vfsqrt_vv_v4f64_unmasked:
303; CHECK:       # %bb.0:
304; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
305; CHECK-NEXT:    vfsqrt.v v8, v8
306; CHECK-NEXT:    ret
307  %v = call <4 x double> @llvm.vp.sqrt.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl)
308  ret <4 x double> %v
309}
310
311declare <8 x double> @llvm.vp.sqrt.v8f64(<8 x double>, <8 x i1>, i32)
312
313define <8 x double> @vfsqrt_vv_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
314; CHECK-LABEL: vfsqrt_vv_v8f64:
315; CHECK:       # %bb.0:
316; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
317; CHECK-NEXT:    vfsqrt.v v8, v8, v0.t
318; CHECK-NEXT:    ret
319  %v = call <8 x double> @llvm.vp.sqrt.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
320  ret <8 x double> %v
321}
322
323define <8 x double> @vfsqrt_vv_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) {
324; CHECK-LABEL: vfsqrt_vv_v8f64_unmasked:
325; CHECK:       # %bb.0:
326; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
327; CHECK-NEXT:    vfsqrt.v v8, v8
328; CHECK-NEXT:    ret
329  %v = call <8 x double> @llvm.vp.sqrt.v8f64(<8 x double> %va, <8 x i1> splat (i1 true), i32 %evl)
330  ret <8 x double> %v
331}
332
333declare <15 x double> @llvm.vp.sqrt.v15f64(<15 x double>, <15 x i1>, i32)
334
335define <15 x double> @vfsqrt_vv_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
336; CHECK-LABEL: vfsqrt_vv_v15f64:
337; CHECK:       # %bb.0:
338; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
339; CHECK-NEXT:    vfsqrt.v v8, v8, v0.t
340; CHECK-NEXT:    ret
341  %v = call <15 x double> @llvm.vp.sqrt.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
342  ret <15 x double> %v
343}
344
345define <15 x double> @vfsqrt_vv_v15f64_unmasked(<15 x double> %va, i32 zeroext %evl) {
346; CHECK-LABEL: vfsqrt_vv_v15f64_unmasked:
347; CHECK:       # %bb.0:
348; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
349; CHECK-NEXT:    vfsqrt.v v8, v8
350; CHECK-NEXT:    ret
351  %v = call <15 x double> @llvm.vp.sqrt.v15f64(<15 x double> %va, <15 x i1> splat (i1 true), i32 %evl)
352  ret <15 x double> %v
353}
354
355declare <16 x double> @llvm.vp.sqrt.v16f64(<16 x double>, <16 x i1>, i32)
356
357define <16 x double> @vfsqrt_vv_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
358; CHECK-LABEL: vfsqrt_vv_v16f64:
359; CHECK:       # %bb.0:
360; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
361; CHECK-NEXT:    vfsqrt.v v8, v8, v0.t
362; CHECK-NEXT:    ret
363  %v = call <16 x double> @llvm.vp.sqrt.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
364  ret <16 x double> %v
365}
366
367define <16 x double> @vfsqrt_vv_v16f64_unmasked(<16 x double> %va, i32 zeroext %evl) {
368; CHECK-LABEL: vfsqrt_vv_v16f64_unmasked:
369; CHECK:       # %bb.0:
370; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
371; CHECK-NEXT:    vfsqrt.v v8, v8
372; CHECK-NEXT:    ret
373  %v = call <16 x double> @llvm.vp.sqrt.v16f64(<16 x double> %va, <16 x i1> splat (i1 true), i32 %evl)
374  ret <16 x double> %v
375}
376
377declare <32 x double> @llvm.vp.sqrt.v32f64(<32 x double>, <32 x i1>, i32)
378
379define <32 x double> @vfsqrt_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) {
380; CHECK-LABEL: vfsqrt_vv_v32f64:
381; CHECK:       # %bb.0:
382; CHECK-NEXT:    li a2, 16
383; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
384; CHECK-NEXT:    vslidedown.vi v24, v0, 2
385; CHECK-NEXT:    mv a1, a0
386; CHECK-NEXT:    bltu a0, a2, .LBB26_2
387; CHECK-NEXT:  # %bb.1:
388; CHECK-NEXT:    li a1, 16
389; CHECK-NEXT:  .LBB26_2:
390; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
391; CHECK-NEXT:    vfsqrt.v v8, v8, v0.t
392; CHECK-NEXT:    addi a1, a0, -16
393; CHECK-NEXT:    sltu a0, a0, a1
394; CHECK-NEXT:    addi a0, a0, -1
395; CHECK-NEXT:    and a0, a0, a1
396; CHECK-NEXT:    vmv1r.v v0, v24
397; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
398; CHECK-NEXT:    vfsqrt.v v16, v16, v0.t
399; CHECK-NEXT:    ret
400  %v = call <32 x double> @llvm.vp.sqrt.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
401  ret <32 x double> %v
402}
403
404define <32 x double> @vfsqrt_vv_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) {
405; CHECK-LABEL: vfsqrt_vv_v32f64_unmasked:
406; CHECK:       # %bb.0:
407; CHECK-NEXT:    li a2, 16
408; CHECK-NEXT:    mv a1, a0
409; CHECK-NEXT:    bltu a0, a2, .LBB27_2
410; CHECK-NEXT:  # %bb.1:
411; CHECK-NEXT:    li a1, 16
412; CHECK-NEXT:  .LBB27_2:
413; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
414; CHECK-NEXT:    vfsqrt.v v8, v8
415; CHECK-NEXT:    addi a1, a0, -16
416; CHECK-NEXT:    sltu a0, a0, a1
417; CHECK-NEXT:    addi a0, a0, -1
418; CHECK-NEXT:    and a0, a0, a1
419; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
420; CHECK-NEXT:    vfsqrt.v v16, v16
421; CHECK-NEXT:    ret
422  %v = call <32 x double> @llvm.vp.sqrt.v32f64(<32 x double> %va, <32 x i1> splat (i1 true), i32 %evl)
423  ret <32 x double> %v
424}
425