xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/floor.f64.ll (revision b356aa3e2da7d1792412783e2c6247538ead75e8)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck -check-prefix=GFX78 %s
4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefix=GFX78 %s
5
6define double @v_floor_f64_ieee(double %x) {
7; GFX6-LABEL: v_floor_f64_ieee:
8; GFX6:       ; %bb.0:
9; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10; GFX6-NEXT:    v_fract_f64_e32 v[2:3], v[0:1]
11; GFX6-NEXT:    v_mov_b32_e32 v4, -1
12; GFX6-NEXT:    v_mov_b32_e32 v5, 0x3fefffff
13; GFX6-NEXT:    v_min_f64 v[2:3], v[2:3], v[4:5]
14; GFX6-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
15; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
16; GFX6-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
17; GFX6-NEXT:    v_add_f64 v[0:1], v[0:1], -v[2:3]
18; GFX6-NEXT:    s_setpc_b64 s[30:31]
19;
20; GFX78-LABEL: v_floor_f64_ieee:
21; GFX78:       ; %bb.0:
22; GFX78-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23; GFX78-NEXT:    v_floor_f64_e32 v[0:1], v[0:1]
24; GFX78-NEXT:    s_setpc_b64 s[30:31]
25  %result = call double @llvm.floor.f64(double %x)
26  ret double %result
27}
28
29define double @v_floor_f64_ieee_nnan(double %x) {
30; GFX6-LABEL: v_floor_f64_ieee_nnan:
31; GFX6:       ; %bb.0:
32; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33; GFX6-NEXT:    v_fract_f64_e32 v[2:3], v[0:1]
34; GFX6-NEXT:    v_mov_b32_e32 v4, -1
35; GFX6-NEXT:    v_mov_b32_e32 v5, 0x3fefffff
36; GFX6-NEXT:    v_min_f64 v[2:3], v[2:3], v[4:5]
37; GFX6-NEXT:    v_add_f64 v[0:1], v[0:1], -v[2:3]
38; GFX6-NEXT:    s_setpc_b64 s[30:31]
39;
40; GFX78-LABEL: v_floor_f64_ieee_nnan:
41; GFX78:       ; %bb.0:
42; GFX78-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
43; GFX78-NEXT:    v_floor_f64_e32 v[0:1], v[0:1]
44; GFX78-NEXT:    s_setpc_b64 s[30:31]
45  %result = call nnan double @llvm.floor.f64(double %x)
46  ret double %result
47}
48
49define double @v_floor_f64_ieee_fneg(double %x) {
50; GFX6-LABEL: v_floor_f64_ieee_fneg:
51; GFX6:       ; %bb.0:
52; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
53; GFX6-NEXT:    v_fract_f64_e64 v[2:3], -v[0:1]
54; GFX6-NEXT:    v_mov_b32_e32 v4, -1
55; GFX6-NEXT:    v_mov_b32_e32 v5, 0x3fefffff
56; GFX6-NEXT:    v_min_f64 v[2:3], v[2:3], v[4:5]
57; GFX6-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
58; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
59; GFX6-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
60; GFX6-NEXT:    v_add_f64 v[0:1], -v[0:1], -v[2:3]
61; GFX6-NEXT:    s_setpc_b64 s[30:31]
62;
63; GFX78-LABEL: v_floor_f64_ieee_fneg:
64; GFX78:       ; %bb.0:
65; GFX78-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
66; GFX78-NEXT:    v_floor_f64_e64 v[0:1], -v[0:1]
67; GFX78-NEXT:    s_setpc_b64 s[30:31]
68  %neg.x = fneg double %x
69  %result = call double @llvm.floor.f64(double %neg.x)
70  ret double %result
71}
72
73define double @v_floor_f64_nonieee(double %x) #1 {
74; GFX6-LABEL: v_floor_f64_nonieee:
75; GFX6:       ; %bb.0:
76; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
77; GFX6-NEXT:    v_fract_f64_e32 v[2:3], v[0:1]
78; GFX6-NEXT:    v_mov_b32_e32 v4, -1
79; GFX6-NEXT:    v_mov_b32_e32 v5, 0x3fefffff
80; GFX6-NEXT:    v_min_f64 v[2:3], v[2:3], v[4:5]
81; GFX6-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
82; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
83; GFX6-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
84; GFX6-NEXT:    v_add_f64 v[0:1], v[0:1], -v[2:3]
85; GFX6-NEXT:    s_setpc_b64 s[30:31]
86;
87; GFX78-LABEL: v_floor_f64_nonieee:
88; GFX78:       ; %bb.0:
89; GFX78-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
90; GFX78-NEXT:    v_floor_f64_e32 v[0:1], v[0:1]
91; GFX78-NEXT:    s_setpc_b64 s[30:31]
92  %result = call double @llvm.floor.f64(double %x)
93  ret double %result
94}
95
96define double @v_floor_f64_nonieee_nnan(double %x) #1 {
97; GFX6-LABEL: v_floor_f64_nonieee_nnan:
98; GFX6:       ; %bb.0:
99; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
100; GFX6-NEXT:    v_fract_f64_e32 v[2:3], v[0:1]
101; GFX6-NEXT:    v_mov_b32_e32 v4, -1
102; GFX6-NEXT:    v_mov_b32_e32 v5, 0x3fefffff
103; GFX6-NEXT:    v_min_f64 v[2:3], v[2:3], v[4:5]
104; GFX6-NEXT:    v_add_f64 v[0:1], v[0:1], -v[2:3]
105; GFX6-NEXT:    s_setpc_b64 s[30:31]
106;
107; GFX78-LABEL: v_floor_f64_nonieee_nnan:
108; GFX78:       ; %bb.0:
109; GFX78-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
110; GFX78-NEXT:    v_floor_f64_e32 v[0:1], v[0:1]
111; GFX78-NEXT:    s_setpc_b64 s[30:31]
112  %result = call nnan double @llvm.floor.f64(double %x)
113  ret double %result
114}
115
116define double @v_floor_f64_non_ieee_fneg(double %x) #1 {
117; GFX6-LABEL: v_floor_f64_non_ieee_fneg:
118; GFX6:       ; %bb.0:
119; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120; GFX6-NEXT:    v_fract_f64_e64 v[2:3], -v[0:1]
121; GFX6-NEXT:    v_mov_b32_e32 v4, -1
122; GFX6-NEXT:    v_mov_b32_e32 v5, 0x3fefffff
123; GFX6-NEXT:    v_min_f64 v[2:3], v[2:3], v[4:5]
124; GFX6-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
125; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
126; GFX6-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
127; GFX6-NEXT:    v_add_f64 v[0:1], -v[0:1], -v[2:3]
128; GFX6-NEXT:    s_setpc_b64 s[30:31]
129;
130; GFX78-LABEL: v_floor_f64_non_ieee_fneg:
131; GFX78:       ; %bb.0:
132; GFX78-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
133; GFX78-NEXT:    v_floor_f64_e64 v[0:1], -v[0:1]
134; GFX78-NEXT:    s_setpc_b64 s[30:31]
135  %neg.x = fneg double %x
136  %result = call double @llvm.floor.f64(double %neg.x)
137  ret double %result
138}
139
140define double @v_floor_f64_fabs(double %x) {
141; GFX6-LABEL: v_floor_f64_fabs:
142; GFX6:       ; %bb.0:
143; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
144; GFX6-NEXT:    v_fract_f64_e64 v[2:3], |v[0:1]|
145; GFX6-NEXT:    v_mov_b32_e32 v4, -1
146; GFX6-NEXT:    v_mov_b32_e32 v5, 0x3fefffff
147; GFX6-NEXT:    v_min_f64 v[2:3], v[2:3], v[4:5]
148; GFX6-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
149; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
150; GFX6-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
151; GFX6-NEXT:    v_add_f64 v[0:1], |v[0:1]|, -v[2:3]
152; GFX6-NEXT:    s_setpc_b64 s[30:31]
153;
154; GFX78-LABEL: v_floor_f64_fabs:
155; GFX78:       ; %bb.0:
156; GFX78-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157; GFX78-NEXT:    v_floor_f64_e64 v[0:1], |v[0:1]|
158; GFX78-NEXT:    s_setpc_b64 s[30:31]
159  %abs.x = call double @llvm.fabs.f64(double %x)
160  %result = call double @llvm.floor.f64(double %abs.x)
161  ret double %result
162}
163
164define double @v_floor_f64_fneg_fabs(double %x) {
165; GFX6-LABEL: v_floor_f64_fneg_fabs:
166; GFX6:       ; %bb.0:
167; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
168; GFX6-NEXT:    v_fract_f64_e64 v[2:3], -|v[0:1]|
169; GFX6-NEXT:    v_mov_b32_e32 v4, -1
170; GFX6-NEXT:    v_mov_b32_e32 v5, 0x3fefffff
171; GFX6-NEXT:    v_min_f64 v[2:3], v[2:3], v[4:5]
172; GFX6-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
173; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
174; GFX6-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
175; GFX6-NEXT:    v_add_f64 v[0:1], -|v[0:1]|, -v[2:3]
176; GFX6-NEXT:    s_setpc_b64 s[30:31]
177;
178; GFX78-LABEL: v_floor_f64_fneg_fabs:
179; GFX78:       ; %bb.0:
180; GFX78-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
181; GFX78-NEXT:    v_floor_f64_e64 v[0:1], -|v[0:1]|
182; GFX78-NEXT:    s_setpc_b64 s[30:31]
183  %abs.x = call double @llvm.fabs.f64(double %x)
184  %neg.abs.x = fneg double %abs.x
185  %result = call double @llvm.floor.f64(double %neg.abs.x)
186  ret double %result
187}
188
189define amdgpu_ps <2 x float> @s_floor_f64(double inreg %x) {
190; GFX6-LABEL: s_floor_f64:
191; GFX6:       ; %bb.0:
192; GFX6-NEXT:    v_fract_f64_e32 v[0:1], s[2:3]
193; GFX6-NEXT:    v_mov_b32_e32 v2, -1
194; GFX6-NEXT:    v_mov_b32_e32 v3, 0x3fefffff
195; GFX6-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
196; GFX6-NEXT:    v_cmp_o_f64_e64 vcc, s[2:3], s[2:3]
197; GFX6-NEXT:    v_mov_b32_e32 v2, s2
198; GFX6-NEXT:    v_mov_b32_e32 v3, s3
199; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
200; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
201; GFX6-NEXT:    v_add_f64 v[0:1], s[2:3], -v[0:1]
202; GFX6-NEXT:    ; return to shader part epilog
203;
204; GFX78-LABEL: s_floor_f64:
205; GFX78:       ; %bb.0:
206; GFX78-NEXT:    v_floor_f64_e32 v[0:1], s[2:3]
207; GFX78-NEXT:    ; return to shader part epilog
208  %result = call double @llvm.floor.f64(double %x)
209  %cast = bitcast double %result to <2 x float>
210  ret <2 x float> %cast
211}
212
213define amdgpu_ps <2 x float> @s_floor_f64_fneg(double inreg %x) {
214; GFX6-LABEL: s_floor_f64_fneg:
215; GFX6:       ; %bb.0:
216; GFX6-NEXT:    v_fract_f64_e64 v[0:1], -s[2:3]
217; GFX6-NEXT:    v_mov_b32_e32 v2, -1
218; GFX6-NEXT:    v_mov_b32_e32 v3, 0x3fefffff
219; GFX6-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
220; GFX6-NEXT:    v_cmp_o_f64_e64 vcc, s[2:3], s[2:3]
221; GFX6-NEXT:    v_mov_b32_e32 v2, s2
222; GFX6-NEXT:    v_mov_b32_e32 v3, s3
223; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
224; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
225; GFX6-NEXT:    v_add_f64 v[0:1], -s[2:3], -v[0:1]
226; GFX6-NEXT:    ; return to shader part epilog
227;
228; GFX78-LABEL: s_floor_f64_fneg:
229; GFX78:       ; %bb.0:
230; GFX78-NEXT:    v_floor_f64_e64 v[0:1], -s[2:3]
231; GFX78-NEXT:    ; return to shader part epilog
232  %neg.x = fneg double %x
233  %result = call double @llvm.floor.f64(double %neg.x)
234  %cast = bitcast double %result to <2 x float>
235  ret <2 x float> %cast
236}
237
238define amdgpu_ps <2 x float> @s_floor_f64_fabs(double inreg %x) {
239; GFX6-LABEL: s_floor_f64_fabs:
240; GFX6:       ; %bb.0:
241; GFX6-NEXT:    v_fract_f64_e64 v[0:1], |s[2:3]|
242; GFX6-NEXT:    v_mov_b32_e32 v2, -1
243; GFX6-NEXT:    v_mov_b32_e32 v3, 0x3fefffff
244; GFX6-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
245; GFX6-NEXT:    v_cmp_o_f64_e64 vcc, s[2:3], s[2:3]
246; GFX6-NEXT:    v_mov_b32_e32 v2, s2
247; GFX6-NEXT:    v_mov_b32_e32 v3, s3
248; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
249; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
250; GFX6-NEXT:    v_add_f64 v[0:1], |s[2:3]|, -v[0:1]
251; GFX6-NEXT:    ; return to shader part epilog
252;
253; GFX78-LABEL: s_floor_f64_fabs:
254; GFX78:       ; %bb.0:
255; GFX78-NEXT:    v_floor_f64_e64 v[0:1], |s[2:3]|
256; GFX78-NEXT:    ; return to shader part epilog
257  %abs.x = call double @llvm.fabs.f64(double %x)
258  %result = call double @llvm.floor.f64(double %abs.x)
259  %cast = bitcast double %result to <2 x float>
260  ret <2 x float> %cast
261}
262
263define amdgpu_ps <2 x float> @s_floor_f64_fneg_fabs(double inreg %x) {
264; GFX6-LABEL: s_floor_f64_fneg_fabs:
265; GFX6:       ; %bb.0:
266; GFX6-NEXT:    v_fract_f64_e64 v[0:1], -|s[2:3]|
267; GFX6-NEXT:    v_mov_b32_e32 v2, -1
268; GFX6-NEXT:    v_mov_b32_e32 v3, 0x3fefffff
269; GFX6-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
270; GFX6-NEXT:    v_cmp_o_f64_e64 vcc, s[2:3], s[2:3]
271; GFX6-NEXT:    v_mov_b32_e32 v2, s2
272; GFX6-NEXT:    v_mov_b32_e32 v3, s3
273; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
274; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
275; GFX6-NEXT:    v_add_f64 v[0:1], -|s[2:3]|, -v[0:1]
276; GFX6-NEXT:    ; return to shader part epilog
277;
278; GFX78-LABEL: s_floor_f64_fneg_fabs:
279; GFX78:       ; %bb.0:
280; GFX78-NEXT:    v_floor_f64_e64 v[0:1], -|s[2:3]|
281; GFX78-NEXT:    ; return to shader part epilog
282  %abs.x = call double @llvm.fabs.f64(double %x)
283  %neg.abs.x = fneg double %abs.x
284  %result = call double @llvm.floor.f64(double %neg.abs.x)
285  %cast = bitcast double %result to <2 x float>
286  ret <2 x float> %cast
287}
288
289declare double @llvm.floor.f64(double) #0
290declare double @llvm.fabs.f64(double) #0
291
292attributes #0 = { nounwind readnone speculatable willreturn }
293attributes #1 = { "amdgpu-ieee"="false" }
294