xref: /llvm-project/llvm/test/CodeGen/X86/sqrt-partial.ll (revision ee5585ed09aff2e54cb540fad4c33f0c93626b1b)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
4
5; PR31455 - https://bugs.llvm.org/show_bug.cgi?id=31455
6; We have to assume that errno can be set, so we have to make a libcall in that case.
7; But it's better for perf to check that the argument is valid rather than the result of
8; sqrtss/sqrtsd.
9; Note: This is really a test of the -partially-inline-libcalls IR pass (and we have an IR test
10; for that), but we're checking the final asm to make sure that comes out as expected too.
11
12define float @f(float %val) nounwind {
13; SSE-LABEL: f:
14; SSE:       # %bb.0:
15; SSE-NEXT:    xorps %xmm1, %xmm1
16; SSE-NEXT:    ucomiss %xmm1, %xmm0
17; SSE-NEXT:    jb sqrtf # TAILCALL
18; SSE-NEXT:  # %bb.1: # %.split
19; SSE-NEXT:    sqrtss %xmm0, %xmm0
20; SSE-NEXT:    retq
21;
22; AVX-LABEL: f:
23; AVX:       # %bb.0:
24; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
25; AVX-NEXT:    vucomiss %xmm1, %xmm0
26; AVX-NEXT:    jb sqrtf # TAILCALL
27; AVX-NEXT:  # %bb.1: # %.split
28; AVX-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
29; AVX-NEXT:    retq
30  %res = tail call float @sqrtf(float %val)
31  ret float %res
32}
33
34define double @d(double %val) nounwind {
35; SSE-LABEL: d:
36; SSE:       # %bb.0:
37; SSE-NEXT:    xorpd %xmm1, %xmm1
38; SSE-NEXT:    ucomisd %xmm1, %xmm0
39; SSE-NEXT:    jb sqrt # TAILCALL
40; SSE-NEXT:  # %bb.1: # %.split
41; SSE-NEXT:    sqrtsd %xmm0, %xmm0
42; SSE-NEXT:    retq
43;
44; AVX-LABEL: d:
45; AVX:       # %bb.0:
46; AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
47; AVX-NEXT:    vucomisd %xmm1, %xmm0
48; AVX-NEXT:    jb sqrt # TAILCALL
49; AVX-NEXT:  # %bb.1: # %.split
50; AVX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
51; AVX-NEXT:    retq
52  %res = tail call double @sqrt(double %val)
53  ret double %res
54}
55
56define double @minsize(double %x, double %y) minsize {
57; SSE-LABEL: minsize:
58; SSE:       # %bb.0:
59; SSE-NEXT:    mulsd %xmm0, %xmm0
60; SSE-NEXT:    mulsd %xmm1, %xmm1
61; SSE-NEXT:    addsd %xmm0, %xmm1
62; SSE-NEXT:    sqrtsd %xmm1, %xmm0
63; SSE-NEXT:    retq
64;
65; AVX-LABEL: minsize:
66; AVX:       # %bb.0:
67; AVX-NEXT:    vmulsd %xmm0, %xmm0, %xmm0
68; AVX-NEXT:    vmulsd %xmm1, %xmm1, %xmm1
69; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
70; AVX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
71; AVX-NEXT:    retq
72  %t3 = fmul fast double %x, %x
73  %t4 = fmul fast double %y, %y
74  %t5 = fadd fast double %t3, %t4
75  %t6 = tail call fast double @llvm.sqrt.f64(double %t5)
76  ret double %t6
77}
78
79; Partial reg avoidance may involve register allocation
80; rather than adding an instruction.
81
82define double @partial_dep_minsize(double %x, double %y) minsize {
83; SSE-LABEL: partial_dep_minsize:
84; SSE:       # %bb.0:
85; SSE-NEXT:    sqrtsd %xmm1, %xmm0
86; SSE-NEXT:    addsd %xmm1, %xmm0
87; SSE-NEXT:    retq
88;
89; AVX-LABEL: partial_dep_minsize:
90; AVX:       # %bb.0:
91; AVX-NEXT:    vsqrtsd %xmm1, %xmm1, %xmm0
92; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
93; AVX-NEXT:    retq
94  %t6 = tail call fast double @llvm.sqrt.f64(double %y)
95  %t = fadd fast double %t6, %y
96  ret double %t
97}
98
99declare dso_local float @sqrtf(float)
100declare dso_local double @sqrt(double)
101declare dso_local double @llvm.sqrt.f64(double)
102