xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/vfsqrt-sdnode.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \
3; RUN:     -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
4; RUN:     --check-prefixes=CHECK,ZVFH
5; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \
6; RUN:     -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
7; RUN:     --check-prefixes=CHECK,ZVFH
8; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
9; RUN:     -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
10; RUN:     --check-prefixes=CHECK,ZVFHMIN
11; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
12; RUN:     -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
13; RUN:     --check-prefixes=CHECK,ZVFHMIN
14
15define <vscale x 1 x bfloat> @vfsqrt_nxv1bf16(<vscale x 1 x bfloat> %v) {
16; CHECK-LABEL: vfsqrt_nxv1bf16:
17; CHECK:       # %bb.0:
18; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
19; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
20; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
21; CHECK-NEXT:    vfsqrt.v v9, v9
22; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
23; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
24; CHECK-NEXT:    ret
25  %r = call <vscale x 1 x bfloat> @llvm.sqrt.nxv1bf16(<vscale x 1 x bfloat> %v)
26  ret <vscale x 1 x bfloat> %r
27}
28
29define <vscale x 2 x bfloat> @vfsqrt_nxv2bf16(<vscale x 2 x bfloat> %v) {
30; CHECK-LABEL: vfsqrt_nxv2bf16:
31; CHECK:       # %bb.0:
32; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
33; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
34; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
35; CHECK-NEXT:    vfsqrt.v v9, v9
36; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
37; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
38; CHECK-NEXT:    ret
39  %r = call <vscale x 2 x bfloat> @llvm.sqrt.nxv2bf16(<vscale x 2 x bfloat> %v)
40  ret <vscale x 2 x bfloat> %r
41}
42
43define <vscale x 4 x bfloat> @vfsqrt_nxv4bf16(<vscale x 4 x bfloat> %v) {
44; CHECK-LABEL: vfsqrt_nxv4bf16:
45; CHECK:       # %bb.0:
46; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
47; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
48; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
49; CHECK-NEXT:    vfsqrt.v v10, v10
50; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
51; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
52; CHECK-NEXT:    ret
53  %r = call <vscale x 4 x bfloat> @llvm.sqrt.nxv4bf16(<vscale x 4 x bfloat> %v)
54  ret <vscale x 4 x bfloat> %r
55}
56
57define <vscale x 8 x bfloat> @vfsqrt_nxv8bf16(<vscale x 8 x bfloat> %v) {
58; CHECK-LABEL: vfsqrt_nxv8bf16:
59; CHECK:       # %bb.0:
60; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
61; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
62; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
63; CHECK-NEXT:    vfsqrt.v v12, v12
64; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
65; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
66; CHECK-NEXT:    ret
67  %r = call <vscale x 8 x bfloat> @llvm.sqrt.nxv8bf16(<vscale x 8 x bfloat> %v)
68  ret <vscale x 8 x bfloat> %r
69}
70
71define <vscale x 16 x bfloat> @vfsqrt_nxv16bf16(<vscale x 16 x bfloat> %v) {
72; CHECK-LABEL: vfsqrt_nxv16bf16:
73; CHECK:       # %bb.0:
74; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
75; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
76; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
77; CHECK-NEXT:    vfsqrt.v v16, v16
78; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
79; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
80; CHECK-NEXT:    ret
81  %r = call <vscale x 16 x bfloat> @llvm.sqrt.nxv16bf16(<vscale x 16 x bfloat> %v)
82  ret <vscale x 16 x bfloat> %r
83}
84
85define <vscale x 32 x bfloat> @vfsqrt_nxv32bf16(<vscale x 32 x bfloat> %v) {
86; CHECK-LABEL: vfsqrt_nxv32bf16:
87; CHECK:       # %bb.0:
88; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
89; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
90; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12
91; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
92; CHECK-NEXT:    vfsqrt.v v16, v16
93; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
94; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
95; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
96; CHECK-NEXT:    vfsqrt.v v16, v24
97; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
98; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16
99; CHECK-NEXT:    ret
100  %r = call <vscale x 32 x bfloat> @llvm.sqrt.nxv32bf16(<vscale x 32 x bfloat> %v)
101  ret <vscale x 32 x bfloat> %r
102}
103
104declare <vscale x 1 x half> @llvm.sqrt.nxv1f16(<vscale x 1 x half>)
105
106define <vscale x 1 x half> @vfsqrt_nxv1f16(<vscale x 1 x half> %v) {
107; ZVFH-LABEL: vfsqrt_nxv1f16:
108; ZVFH:       # %bb.0:
109; ZVFH-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
110; ZVFH-NEXT:    vfsqrt.v v8, v8
111; ZVFH-NEXT:    ret
112;
113; ZVFHMIN-LABEL: vfsqrt_nxv1f16:
114; ZVFHMIN:       # %bb.0:
115; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
116; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
117; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
118; ZVFHMIN-NEXT:    vfsqrt.v v9, v9
119; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
120; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
121; ZVFHMIN-NEXT:    ret
122  %r = call <vscale x 1 x half> @llvm.sqrt.nxv1f16(<vscale x 1 x half> %v)
123  ret <vscale x 1 x half> %r
124}
125
126declare <vscale x 2 x half> @llvm.sqrt.nxv2f16(<vscale x 2 x half>)
127
128define <vscale x 2 x half> @vfsqrt_nxv2f16(<vscale x 2 x half> %v) {
129; ZVFH-LABEL: vfsqrt_nxv2f16:
130; ZVFH:       # %bb.0:
131; ZVFH-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
132; ZVFH-NEXT:    vfsqrt.v v8, v8
133; ZVFH-NEXT:    ret
134;
135; ZVFHMIN-LABEL: vfsqrt_nxv2f16:
136; ZVFHMIN:       # %bb.0:
137; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
138; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
139; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
140; ZVFHMIN-NEXT:    vfsqrt.v v9, v9
141; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
142; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
143; ZVFHMIN-NEXT:    ret
144  %r = call <vscale x 2 x half> @llvm.sqrt.nxv2f16(<vscale x 2 x half> %v)
145  ret <vscale x 2 x half> %r
146}
147
148declare <vscale x 4 x half> @llvm.sqrt.nxv4f16(<vscale x 4 x half>)
149
150define <vscale x 4 x half> @vfsqrt_nxv4f16(<vscale x 4 x half> %v) {
151; ZVFH-LABEL: vfsqrt_nxv4f16:
152; ZVFH:       # %bb.0:
153; ZVFH-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
154; ZVFH-NEXT:    vfsqrt.v v8, v8
155; ZVFH-NEXT:    ret
156;
157; ZVFHMIN-LABEL: vfsqrt_nxv4f16:
158; ZVFHMIN:       # %bb.0:
159; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
160; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
161; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
162; ZVFHMIN-NEXT:    vfsqrt.v v10, v10
163; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
164; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
165; ZVFHMIN-NEXT:    ret
166  %r = call <vscale x 4 x half> @llvm.sqrt.nxv4f16(<vscale x 4 x half> %v)
167  ret <vscale x 4 x half> %r
168}
169
170declare <vscale x 8 x half> @llvm.sqrt.nxv8f16(<vscale x 8 x half>)
171
172define <vscale x 8 x half> @vfsqrt_nxv8f16(<vscale x 8 x half> %v) {
173; ZVFH-LABEL: vfsqrt_nxv8f16:
174; ZVFH:       # %bb.0:
175; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
176; ZVFH-NEXT:    vfsqrt.v v8, v8
177; ZVFH-NEXT:    ret
178;
179; ZVFHMIN-LABEL: vfsqrt_nxv8f16:
180; ZVFHMIN:       # %bb.0:
181; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
182; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
183; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
184; ZVFHMIN-NEXT:    vfsqrt.v v12, v12
185; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
186; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
187; ZVFHMIN-NEXT:    ret
188  %r = call <vscale x 8 x half> @llvm.sqrt.nxv8f16(<vscale x 8 x half> %v)
189  ret <vscale x 8 x half> %r
190}
191
192declare <vscale x 16 x half> @llvm.sqrt.nxv16f16(<vscale x 16 x half>)
193
194define <vscale x 16 x half> @vfsqrt_nxv16f16(<vscale x 16 x half> %v) {
195; ZVFH-LABEL: vfsqrt_nxv16f16:
196; ZVFH:       # %bb.0:
197; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
198; ZVFH-NEXT:    vfsqrt.v v8, v8
199; ZVFH-NEXT:    ret
200;
201; ZVFHMIN-LABEL: vfsqrt_nxv16f16:
202; ZVFHMIN:       # %bb.0:
203; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
204; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
205; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
206; ZVFHMIN-NEXT:    vfsqrt.v v16, v16
207; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
208; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
209; ZVFHMIN-NEXT:    ret
210  %r = call <vscale x 16 x half> @llvm.sqrt.nxv16f16(<vscale x 16 x half> %v)
211  ret <vscale x 16 x half> %r
212}
213
214declare <vscale x 32 x half> @llvm.sqrt.nxv32f16(<vscale x 32 x half>)
215
216define <vscale x 32 x half> @vfsqrt_nxv32f16(<vscale x 32 x half> %v) {
217; ZVFH-LABEL: vfsqrt_nxv32f16:
218; ZVFH:       # %bb.0:
219; ZVFH-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
220; ZVFH-NEXT:    vfsqrt.v v8, v8
221; ZVFH-NEXT:    ret
222;
223; ZVFHMIN-LABEL: vfsqrt_nxv32f16:
224; ZVFHMIN:       # %bb.0:
225; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
226; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
227; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
228; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
229; ZVFHMIN-NEXT:    vfsqrt.v v16, v16
230; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
231; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
232; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
233; ZVFHMIN-NEXT:    vfsqrt.v v16, v24
234; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
235; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v16
236; ZVFHMIN-NEXT:    ret
237  %r = call <vscale x 32 x half> @llvm.sqrt.nxv32f16(<vscale x 32 x half> %v)
238  ret <vscale x 32 x half> %r
239}
240
241declare <vscale x 1 x float> @llvm.sqrt.nxv1f32(<vscale x 1 x float>)
242
243define <vscale x 1 x float> @vfsqrt_nxv1f32(<vscale x 1 x float> %v) {
244; CHECK-LABEL: vfsqrt_nxv1f32:
245; CHECK:       # %bb.0:
246; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
247; CHECK-NEXT:    vfsqrt.v v8, v8
248; CHECK-NEXT:    ret
249  %r = call <vscale x 1 x float> @llvm.sqrt.nxv1f32(<vscale x 1 x float> %v)
250  ret <vscale x 1 x float> %r
251}
252
253declare <vscale x 2 x float> @llvm.sqrt.nxv2f32(<vscale x 2 x float>)
254
255define <vscale x 2 x float> @vfsqrt_nxv2f32(<vscale x 2 x float> %v) {
256; CHECK-LABEL: vfsqrt_nxv2f32:
257; CHECK:       # %bb.0:
258; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
259; CHECK-NEXT:    vfsqrt.v v8, v8
260; CHECK-NEXT:    ret
261  %r = call <vscale x 2 x float> @llvm.sqrt.nxv2f32(<vscale x 2 x float> %v)
262  ret <vscale x 2 x float> %r
263}
264
265declare <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float>)
266
267define <vscale x 4 x float> @vfsqrt_nxv4f32(<vscale x 4 x float> %v) {
268; CHECK-LABEL: vfsqrt_nxv4f32:
269; CHECK:       # %bb.0:
270; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
271; CHECK-NEXT:    vfsqrt.v v8, v8
272; CHECK-NEXT:    ret
273  %r = call <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float> %v)
274  ret <vscale x 4 x float> %r
275}
276
277declare <vscale x 8 x float> @llvm.sqrt.nxv8f32(<vscale x 8 x float>)
278
279define <vscale x 8 x float> @vfsqrt_nxv8f32(<vscale x 8 x float> %v) {
280; CHECK-LABEL: vfsqrt_nxv8f32:
281; CHECK:       # %bb.0:
282; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
283; CHECK-NEXT:    vfsqrt.v v8, v8
284; CHECK-NEXT:    ret
285  %r = call <vscale x 8 x float> @llvm.sqrt.nxv8f32(<vscale x 8 x float> %v)
286  ret <vscale x 8 x float> %r
287}
288
289declare <vscale x 16 x float> @llvm.sqrt.nxv16f32(<vscale x 16 x float>)
290
291define <vscale x 16 x float> @vfsqrt_nxv16f32(<vscale x 16 x float> %v) {
292; CHECK-LABEL: vfsqrt_nxv16f32:
293; CHECK:       # %bb.0:
294; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
295; CHECK-NEXT:    vfsqrt.v v8, v8
296; CHECK-NEXT:    ret
297  %r = call <vscale x 16 x float> @llvm.sqrt.nxv16f32(<vscale x 16 x float> %v)
298  ret <vscale x 16 x float> %r
299}
300
301declare <vscale x 1 x double> @llvm.sqrt.nxv1f64(<vscale x 1 x double>)
302
303define <vscale x 1 x double> @vfsqrt_nxv1f64(<vscale x 1 x double> %v) {
304; CHECK-LABEL: vfsqrt_nxv1f64:
305; CHECK:       # %bb.0:
306; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
307; CHECK-NEXT:    vfsqrt.v v8, v8
308; CHECK-NEXT:    ret
309  %r = call <vscale x 1 x double> @llvm.sqrt.nxv1f64(<vscale x 1 x double> %v)
310  ret <vscale x 1 x double> %r
311}
312
313declare <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double>)
314
315define <vscale x 2 x double> @vfsqrt_nxv2f64(<vscale x 2 x double> %v) {
316; CHECK-LABEL: vfsqrt_nxv2f64:
317; CHECK:       # %bb.0:
318; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
319; CHECK-NEXT:    vfsqrt.v v8, v8
320; CHECK-NEXT:    ret
321  %r = call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> %v)
322  ret <vscale x 2 x double> %r
323}
324
325declare <vscale x 4 x double> @llvm.sqrt.nxv4f64(<vscale x 4 x double>)
326
327define <vscale x 4 x double> @vfsqrt_nxv4f64(<vscale x 4 x double> %v) {
328; CHECK-LABEL: vfsqrt_nxv4f64:
329; CHECK:       # %bb.0:
330; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
331; CHECK-NEXT:    vfsqrt.v v8, v8
332; CHECK-NEXT:    ret
333  %r = call <vscale x 4 x double> @llvm.sqrt.nxv4f64(<vscale x 4 x double> %v)
334  ret <vscale x 4 x double> %r
335}
336
337declare <vscale x 8 x double> @llvm.sqrt.nxv8f64(<vscale x 8 x double>)
338
339define <vscale x 8 x double> @vfsqrt_nxv8f64(<vscale x 8 x double> %v) {
340; CHECK-LABEL: vfsqrt_nxv8f64:
341; CHECK:       # %bb.0:
342; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
343; CHECK-NEXT:    vfsqrt.v v8, v8
344; CHECK-NEXT:    ret
345  %r = call <vscale x 8 x double> @llvm.sqrt.nxv8f64(<vscale x 8 x double> %v)
346  ret <vscale x 8 x double> %r
347}
348