xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll (revision 00a4e248dc65d3a60fd900b342d4ba410bf70af0)
1; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s
2
3declare float @llvm.amdgcn.rcp.f32(float) #0
4declare double @llvm.amdgcn.rcp.f64(double) #0
5
6declare double @llvm.amdgcn.sqrt.f64(double) #0
7declare float @llvm.amdgcn.sqrt.f32(float) #0
8declare double @llvm.sqrt.f64(double) #0
9declare float @llvm.sqrt.f32(float) #0
10
11; FUNC-LABEL: {{^}}rcp_undef_f32:
12; SI: v_mov_b32_e32 [[NAN:v[0-9]+]], 0x7fc00000
13; SI-NOT: [[NAN]]
14; SI: buffer_store_dword [[NAN]]
15define amdgpu_kernel void @rcp_undef_f32(ptr addrspace(1) %out) #1 {
16  %rcp = call float @llvm.amdgcn.rcp.f32(float undef)
17  store float %rcp, ptr addrspace(1) %out, align 4
18  ret void
19}
20
21; FUNC-LABEL: {{^}}rcp_2_f32:
22; SI-NOT: v_rcp_f32
23; SI: v_mov_b32_e32 v{{[0-9]+}}, 0.5
24define amdgpu_kernel void @rcp_2_f32(ptr addrspace(1) %out) #1 {
25  %rcp = call float @llvm.amdgcn.rcp.f32(float 2.0)
26  store float %rcp, ptr addrspace(1) %out, align 4
27  ret void
28}
29
30; FUNC-LABEL: {{^}}rcp_10_f32:
31; SI-NOT: v_rcp_f32
32; SI: v_mov_b32_e32 v{{[0-9]+}}, 0x3dcccccd
33define amdgpu_kernel void @rcp_10_f32(ptr addrspace(1) %out) #1 {
34  %rcp = call float @llvm.amdgcn.rcp.f32(float 10.0)
35  store float %rcp, ptr addrspace(1) %out, align 4
36  ret void
37}
38
39; FUNC-LABEL: {{^}}safe_no_fp32_denormals_rcp_f32:
40; SI: v_rcp_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}
41; SI-NOT: [[RESULT]]
42; SI: buffer_store_dword [[RESULT]]
43define amdgpu_kernel void @safe_no_fp32_denormals_rcp_f32(ptr addrspace(1) %out, float %src) #1 {
44  %rcp = fdiv float 1.0, %src, !fpmath !0
45  store float %rcp, ptr addrspace(1) %out, align 4
46  ret void
47}
48
49; FUNC-LABEL: {{^}}safe_f32_denormals_rcp_pat_f32:
50; SI: v_rcp_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}
51; SI-NOT: [[RESULT]]
52; SI: buffer_store_dword [[RESULT]]
53define amdgpu_kernel void @safe_f32_denormals_rcp_pat_f32(ptr addrspace(1) %out, float %src) #4 {
54  %rcp = fdiv float 1.0, %src, !fpmath !0
55  store float %rcp, ptr addrspace(1) %out, align 4
56  ret void
57}
58
59; FUNC-LABEL: {{^}}unsafe_f32_denormals_rcp_pat_f32:
60; SI: v_div_scale_f32
61define amdgpu_kernel void @unsafe_f32_denormals_rcp_pat_f32(ptr addrspace(1) %out, float %src) #3 {
62  %rcp = fdiv float 1.0, %src
63  store float %rcp, ptr addrspace(1) %out, align 4
64  ret void
65}
66
67; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_f32:
68; SI: v_mul_f32
69; SI: v_rsq_f32
70; SI: v_mul_f32
71; SI: v_fma_f32
72; SI: v_fma_f32
73; SI: v_fma_f32
74; SI: v_fma_f32
75; SI: v_fma_f32
76; SI: v_rcp_f32
77define amdgpu_kernel void @safe_rsq_rcp_pat_f32(ptr addrspace(1) %out, float %src) #1 {
78  %sqrt = call contract float @llvm.sqrt.f32(float %src)
79  %rcp = call contract float @llvm.amdgcn.rcp.f32(float %sqrt)
80  store float %rcp, ptr addrspace(1) %out, align 4
81  ret void
82}
83
84; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_amdgcn_sqrt_f32:
85; SI: v_sqrt_f32_e32
86; SI: v_rcp_f32_e32
87define amdgpu_kernel void @safe_rsq_rcp_pat_amdgcn_sqrt_f32(ptr addrspace(1) %out, float %src) #1 {
88  %sqrt = call contract float @llvm.amdgcn.sqrt.f32(float %src)
89  %rcp = call contract float @llvm.amdgcn.rcp.f32(float %sqrt)
90  store float %rcp, ptr addrspace(1) %out, align 4
91  ret void
92}
93
94; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_amdgcn_sqrt_f32_nocontract:
95; SI: v_sqrt_f32_e32
96; SI: v_rcp_f32_e32
97define amdgpu_kernel void @safe_rsq_rcp_pat_amdgcn_sqrt_f32_nocontract(ptr addrspace(1) %out, float %src) #1 {
98  %sqrt = call float @llvm.amdgcn.sqrt.f32(float %src)
99  %rcp = call contract float @llvm.amdgcn.rcp.f32(float %sqrt)
100  store float %rcp, ptr addrspace(1) %out, align 4
101  ret void
102}
103
104; FUNC-LABEL: {{^}}unsafe_rsq_rcp_pat_f32:
105; SI: v_sqrt_f32_e32
106; SI: v_rcp_f32_e32
107define amdgpu_kernel void @unsafe_rsq_rcp_pat_f32(ptr addrspace(1) %out, float %src) #2 {
108  %sqrt = call float @llvm.sqrt.f32(float %src)
109  %rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt)
110  store float %rcp, ptr addrspace(1) %out, align 4
111  ret void
112}
113
114; FUNC-LABEL: {{^}}rcp_f64:
115; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
116; SI-NOT: [[RESULT]]
117; SI: buffer_store_dwordx2 [[RESULT]]
118define amdgpu_kernel void @rcp_f64(ptr addrspace(1) %out, double %src) #1 {
119  %rcp = call double @llvm.amdgcn.rcp.f64(double %src)
120  store double %rcp, ptr addrspace(1) %out, align 8
121  ret void
122}
123
124; FUNC-LABEL: {{^}}unsafe_rcp_f64:
125; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
126; SI-NOT: [[RESULT]]
127; SI: buffer_store_dwordx2 [[RESULT]]
128define amdgpu_kernel void @unsafe_rcp_f64(ptr addrspace(1) %out, double %src) #2 {
129  %rcp = call double @llvm.amdgcn.rcp.f64(double %src)
130  store double %rcp, ptr addrspace(1) %out, align 8
131  ret void
132}
133
134; FUNC-LABEL: {{^}}rcp_pat_f64:
135; SI: v_div_scale_f64
136define amdgpu_kernel void @rcp_pat_f64(ptr addrspace(1) %out, double %src) #1 {
137  %rcp = fdiv double 1.0, %src
138  store double %rcp, ptr addrspace(1) %out, align 8
139  ret void
140}
141
142; FUNC-LABEL: {{^}}unsafe_rcp_pat_f64:
143; SI: v_rcp_f64
144; SI: v_fma_f64
145; SI: v_fma_f64
146; SI: v_fma_f64
147; SI: v_fma_f64
148; SI: v_fma_f64
149; SI: v_fma_f64
150define amdgpu_kernel void @unsafe_rcp_pat_f64(ptr addrspace(1) %out, double %src) #2 {
151  %rcp = fdiv double 1.0, %src
152  store double %rcp, ptr addrspace(1) %out, align 8
153  ret void
154}
155
156; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_f64:
157; SI-NOT: v_rsq_f64_e32
158; SI: v_rsq_f64
159; SI: v_mul_f64
160; SI: v_mul_f64
161; SI: v_fma_f64
162; SI: v_fma_f64
163; SI: v_fma_f64
164; SI: v_fma_f64
165; SI: v_fma_f64
166; SI: v_fma_f64
167; SI: v_rcp_f64
168define amdgpu_kernel void @safe_rsq_rcp_pat_f64(ptr addrspace(1) %out, double %src) #1 {
169  %sqrt = call double @llvm.sqrt.f64(double %src)
170  %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt)
171  store double %rcp, ptr addrspace(1) %out, align 8
172  ret void
173}
174
175; FUNC-LABEL: {{^}}safe_amdgcn_sqrt_rsq_rcp_pat_f64:
176; SI-NOT: v_rsq_f64_e32
177; SI: v_sqrt_f64
178; SI: v_rcp_f64
179define amdgpu_kernel void @safe_amdgcn_sqrt_rsq_rcp_pat_f64(ptr addrspace(1) %out, double %src) #1 {
180  %sqrt = call double @llvm.amdgcn.sqrt.f64(double %src)
181  %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt)
182  store double %rcp, ptr addrspace(1) %out, align 8
183  ret void
184}
185
186; FUNC-LABEL: {{^}}unsafe_rsq_rcp_pat_f64:
187; SI: v_rsq_f64
188; SI: v_mul_f64
189; SI: v_mul_f64
190; SI: v_fma_f64
191; SI: v_fma_f64
192; SI: v_fma_f64
193; SI: v_fma_f64
194; SI: v_fma_f64
195; SI: v_fma_f64
196; SI: v_rcp_f64
197; SI: buffer_store_dwordx2
198define amdgpu_kernel void @unsafe_rsq_rcp_pat_f64(ptr addrspace(1) %out, double %src) #2 {
199  %sqrt = call double @llvm.sqrt.f64(double %src)
200  %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt)
201  store double %rcp, ptr addrspace(1) %out, align 8
202  ret void
203}
204
205; FUNC-LABEL: {{^}}unsafe_amdgcn_sqrt_rsq_rcp_pat_f64:
206; SI: v_sqrt_f64_e32 [[SQRT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
207; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SQRT]]
208; SI: buffer_store_dwordx2 [[RESULT]]
209define amdgpu_kernel void @unsafe_amdgcn_sqrt_rsq_rcp_pat_f64(ptr addrspace(1) %out, double %src) #2 {
210  %sqrt = call double @llvm.amdgcn.sqrt.f64(double %src)
211  %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt)
212  store double %rcp, ptr addrspace(1) %out, align 8
213  ret void
214}
215
216attributes #0 = { nounwind readnone }
217attributes #1 = { nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
218attributes #2 = { nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
219attributes #3 = { nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="ieee,ieee" }
220attributes #4 = { nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="ieee,ieee" }
221
222!0 = !{float 2.500000e+00}
223