xref: /llvm-project/llvm/test/CodeGen/AMDGPU/fract-match.ll (revision 29441e4f5fa5f5c7709f7cf180815ba97f611297)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2
3; Test with buggy fract, shouldn't match
4; RUN: opt -S -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -amdgpu-codegenprepare %s | FileCheck -check-prefixes=IR,GFX6-IR %s
5
6; Working fract, but no f16
7; RUN: opt -S -mtriple=amdgcn-amd-amdpal -mcpu=kaveri -amdgpu-codegenprepare %s | FileCheck -check-prefixes=IR,IR-FRACT,GFX7-IR %s
8
9; Working fract and f16 support
10; RUN: opt -S -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -amdgpu-codegenprepare %s | FileCheck -check-prefixes=IR,IR-FRACT,IR-LEGALF16 %s
11; RUN: opt -S -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-codegenprepare %s | FileCheck -check-prefixes=IR,IR-FRACT,IR-LEGALF16 %s
12
13; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
14; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=kaveri < %s | FileCheck -check-prefixes=GCN,GFX7 %s
15; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
16; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
17; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s
18
19; Test patterns to match v_fract_* instructions.
20
21; Expansion as it appears in the library with the extra output for
22; floor. We can fold in the nan check into the instruction, but the
23; inf check must remain.
24define float @safe_math_fract_f32(float %x, ptr addrspace(1) writeonly captures(none) %ip) {
25; GFX6-IR-LABEL: define float @safe_math_fract_f32
26; GFX6-IR-SAME: (float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0:[0-9]+]] {
27; GFX6-IR-NEXT:  entry:
28; GFX6-IR-NEXT:    [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
29; GFX6-IR-NEXT:    [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
30; GFX6-IR-NEXT:    [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
31; GFX6-IR-NEXT:    [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00
32; GFX6-IR-NEXT:    [[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]]
33; GFX6-IR-NEXT:    [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
34; GFX6-IR-NEXT:    [[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0000000000000
35; GFX6-IR-NEXT:    [[COND6:%.*]] = select i1 [[CMPINF]], float 0.000000e+00, float [[COND]]
36; GFX6-IR-NEXT:    store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4
37; GFX6-IR-NEXT:    ret float [[COND6]]
38;
39; IR-FRACT-LABEL: define float @safe_math_fract_f32
40; IR-FRACT-SAME: (float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0:[0-9]+]] {
41; IR-FRACT-NEXT:  entry:
42; IR-FRACT-NEXT:    [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
43; IR-FRACT-NEXT:    [[COND:%.*]] = call float @llvm.amdgcn.fract.f32(float [[X]])
44; IR-FRACT-NEXT:    [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
45; IR-FRACT-NEXT:    [[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0000000000000
46; IR-FRACT-NEXT:    [[COND6:%.*]] = select i1 [[CMPINF]], float 0.000000e+00, float [[COND]]
47; IR-FRACT-NEXT:    store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4
48; IR-FRACT-NEXT:    ret float [[COND6]]
49;
50; GFX6-LABEL: safe_math_fract_f32:
51; GFX6:       ; %bb.0: ; %entry
52; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
53; GFX6-NEXT:    v_floor_f32_e32 v3, v0
54; GFX6-NEXT:    v_sub_f32_e32 v4, v0, v3
55; GFX6-NEXT:    v_min_f32_e32 v4, 0x3f7fffff, v4
56; GFX6-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
57; GFX6-NEXT:    s_mov_b32 s8, 0x7f800000
58; GFX6-NEXT:    s_mov_b32 s6, 0
59; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
60; GFX6-NEXT:    v_cmp_neq_f32_e64 vcc, |v0|, s8
61; GFX6-NEXT:    s_mov_b32 s7, 0xf000
62; GFX6-NEXT:    s_mov_b32 s4, s6
63; GFX6-NEXT:    s_mov_b32 s5, s6
64; GFX6-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
65; GFX6-NEXT:    buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
66; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
67; GFX6-NEXT:    s_setpc_b64 s[30:31]
68;
69; GFX7-LABEL: safe_math_fract_f32:
70; GFX7:       ; %bb.0: ; %entry
71; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
72; GFX7-NEXT:    s_mov_b32 s8, 0x7f800000
73; GFX7-NEXT:    s_mov_b32 s6, 0
74; GFX7-NEXT:    v_fract_f32_e32 v4, v0
75; GFX7-NEXT:    v_cmp_neq_f32_e64 vcc, |v0|, s8
76; GFX7-NEXT:    s_mov_b32 s7, 0xf000
77; GFX7-NEXT:    s_mov_b32 s4, s6
78; GFX7-NEXT:    s_mov_b32 s5, s6
79; GFX7-NEXT:    v_floor_f32_e32 v3, v0
80; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
81; GFX7-NEXT:    buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
82; GFX7-NEXT:    s_waitcnt vmcnt(0)
83; GFX7-NEXT:    s_setpc_b64 s[30:31]
84;
85; GFX8-LABEL: safe_math_fract_f32:
86; GFX8:       ; %bb.0: ; %entry
87; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
88; GFX8-NEXT:    s_mov_b32 s4, 0x7f800000
89; GFX8-NEXT:    v_fract_f32_e32 v4, v0
90; GFX8-NEXT:    v_cmp_neq_f32_e64 vcc, |v0|, s4
91; GFX8-NEXT:    v_floor_f32_e32 v3, v0
92; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
93; GFX8-NEXT:    global_store_dword v[1:2], v3, off
94; GFX8-NEXT:    s_waitcnt vmcnt(0)
95; GFX8-NEXT:    s_setpc_b64 s[30:31]
96;
97; GFX11-LABEL: safe_math_fract_f32:
98; GFX11:       ; %bb.0: ; %entry
99; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
100; GFX11-NEXT:    v_fract_f32_e32 v3, v0
101; GFX11-NEXT:    v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
102; GFX11-NEXT:    v_floor_f32_e32 v4, v0
103; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
104; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v3, vcc_lo
105; GFX11-NEXT:    global_store_b32 v[1:2], v4, off
106; GFX11-NEXT:    s_setpc_b64 s[30:31]
107;
108; GFX12-LABEL: safe_math_fract_f32:
109; GFX12:       ; %bb.0: ; %entry
110; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
111; GFX12-NEXT:    s_wait_expcnt 0x0
112; GFX12-NEXT:    s_wait_samplecnt 0x0
113; GFX12-NEXT:    s_wait_bvhcnt 0x0
114; GFX12-NEXT:    s_wait_kmcnt 0x0
115; GFX12-NEXT:    v_fract_f32_e32 v3, v0
116; GFX12-NEXT:    v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
117; GFX12-NEXT:    v_floor_f32_e32 v4, v0
118; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3)
119; GFX12-NEXT:    v_cndmask_b32_e32 v0, 0, v3, vcc_lo
120; GFX12-NEXT:    global_store_b32 v[1:2], v4, off
121; GFX12-NEXT:    s_setpc_b64 s[30:31]
122entry:
123  %floor = tail call float @llvm.floor.f32(float %x)
124  %sub = fsub float %x, %floor
125  %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
126  %uno = fcmp uno float %x, 0.000000e+00
127  %cond = select i1 %uno, float %x, float %min
128  %fabs = tail call float @llvm.fabs.f32(float %x)
129  %cmpinf = fcmp oeq float %fabs, 0x7FF0000000000000
130  %cond6 = select i1 %cmpinf, float 0.000000e+00, float %cond
131  store float %floor, ptr addrspace(1) %ip, align 4
132  ret float %cond6
133}
134
135define float @safe_math_fract_f32_noinf_check(float %x, ptr addrspace(1) writeonly captures(none) %ip) {
136; GFX6-IR-LABEL: define float @safe_math_fract_f32_noinf_check
137; GFX6-IR-SAME: (float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] {
138; GFX6-IR-NEXT:  entry:
139; GFX6-IR-NEXT:    [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
140; GFX6-IR-NEXT:    [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
141; GFX6-IR-NEXT:    [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
142; GFX6-IR-NEXT:    [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00
143; GFX6-IR-NEXT:    [[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]]
144; GFX6-IR-NEXT:    store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4
145; GFX6-IR-NEXT:    ret float [[COND]]
146;
147; IR-FRACT-LABEL: define float @safe_math_fract_f32_noinf_check
148; IR-FRACT-SAME: (float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] {
149; IR-FRACT-NEXT:  entry:
150; IR-FRACT-NEXT:    [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
151; IR-FRACT-NEXT:    [[COND:%.*]] = call float @llvm.amdgcn.fract.f32(float [[X]])
152; IR-FRACT-NEXT:    store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4
153; IR-FRACT-NEXT:    ret float [[COND]]
154;
155; GFX6-LABEL: safe_math_fract_f32_noinf_check:
156; GFX6:       ; %bb.0: ; %entry
157; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
158; GFX6-NEXT:    v_floor_f32_e32 v3, v0
159; GFX6-NEXT:    v_sub_f32_e32 v4, v0, v3
160; GFX6-NEXT:    s_mov_b32 s6, 0
161; GFX6-NEXT:    v_min_f32_e32 v4, 0x3f7fffff, v4
162; GFX6-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
163; GFX6-NEXT:    s_mov_b32 s7, 0xf000
164; GFX6-NEXT:    s_mov_b32 s4, s6
165; GFX6-NEXT:    s_mov_b32 s5, s6
166; GFX6-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
167; GFX6-NEXT:    buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
168; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
169; GFX6-NEXT:    s_setpc_b64 s[30:31]
170;
171; GFX7-LABEL: safe_math_fract_f32_noinf_check:
172; GFX7:       ; %bb.0: ; %entry
173; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
174; GFX7-NEXT:    s_mov_b32 s6, 0
175; GFX7-NEXT:    s_mov_b32 s7, 0xf000
176; GFX7-NEXT:    s_mov_b32 s4, s6
177; GFX7-NEXT:    s_mov_b32 s5, s6
178; GFX7-NEXT:    v_floor_f32_e32 v3, v0
179; GFX7-NEXT:    v_fract_f32_e32 v0, v0
180; GFX7-NEXT:    buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
181; GFX7-NEXT:    s_waitcnt vmcnt(0)
182; GFX7-NEXT:    s_setpc_b64 s[30:31]
183;
184; GFX8-LABEL: safe_math_fract_f32_noinf_check:
185; GFX8:       ; %bb.0: ; %entry
186; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
187; GFX8-NEXT:    v_floor_f32_e32 v3, v0
188; GFX8-NEXT:    v_fract_f32_e32 v0, v0
189; GFX8-NEXT:    global_store_dword v[1:2], v3, off
190; GFX8-NEXT:    s_waitcnt vmcnt(0)
191; GFX8-NEXT:    s_setpc_b64 s[30:31]
192;
193; GFX11-LABEL: safe_math_fract_f32_noinf_check:
194; GFX11:       ; %bb.0: ; %entry
195; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
196; GFX11-NEXT:    v_floor_f32_e32 v3, v0
197; GFX11-NEXT:    v_fract_f32_e32 v0, v0
198; GFX11-NEXT:    global_store_b32 v[1:2], v3, off
199; GFX11-NEXT:    s_setpc_b64 s[30:31]
200;
201; GFX12-LABEL: safe_math_fract_f32_noinf_check:
202; GFX12:       ; %bb.0: ; %entry
203; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
204; GFX12-NEXT:    s_wait_expcnt 0x0
205; GFX12-NEXT:    s_wait_samplecnt 0x0
206; GFX12-NEXT:    s_wait_bvhcnt 0x0
207; GFX12-NEXT:    s_wait_kmcnt 0x0
208; GFX12-NEXT:    v_floor_f32_e32 v3, v0
209; GFX12-NEXT:    v_fract_f32_e32 v0, v0
210; GFX12-NEXT:    global_store_b32 v[1:2], v3, off
211; GFX12-NEXT:    s_setpc_b64 s[30:31]
212entry:
213  %floor = tail call float @llvm.floor.f32(float %x)
214  %sub = fsub float %x, %floor
215  %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
216  %uno = fcmp uno float %x, 0.000000e+00
217  %cond = select i1 %uno, float %x, float %min
218  store float %floor, ptr addrspace(1) %ip, align 4
219  ret float %cond
220}
221
222; Cannot match fract without a nan check or no-nans.
223define float @no_nan_check_math_fract_f32(float %x, ptr addrspace(1) writeonly captures(none) %ip) {
224; IR-LABEL: define float @no_nan_check_math_fract_f32
225; IR-SAME: (float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0:[0-9]+]] {
226; IR-NEXT:  entry:
227; IR-NEXT:    [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
228; IR-NEXT:    [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
229; IR-NEXT:    [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
230; IR-NEXT:    [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
231; IR-NEXT:    [[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0000000000000
232; IR-NEXT:    [[COND6:%.*]] = select i1 [[CMPINF]], float 0.000000e+00, float [[MIN]]
233; IR-NEXT:    store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4
234; IR-NEXT:    ret float [[COND6]]
235;
236; GFX6-LABEL: no_nan_check_math_fract_f32:
237; GFX6:       ; %bb.0: ; %entry
238; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
239; GFX6-NEXT:    v_floor_f32_e32 v3, v0
240; GFX6-NEXT:    v_sub_f32_e32 v4, v0, v3
241; GFX6-NEXT:    s_mov_b32 s8, 0x7f800000
242; GFX6-NEXT:    s_mov_b32 s6, 0
243; GFX6-NEXT:    v_min_f32_e32 v4, 0x3f7fffff, v4
244; GFX6-NEXT:    v_cmp_neq_f32_e64 vcc, |v0|, s8
245; GFX6-NEXT:    s_mov_b32 s7, 0xf000
246; GFX6-NEXT:    s_mov_b32 s4, s6
247; GFX6-NEXT:    s_mov_b32 s5, s6
248; GFX6-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
249; GFX6-NEXT:    buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
250; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
251; GFX6-NEXT:    s_setpc_b64 s[30:31]
252;
253; GFX7-LABEL: no_nan_check_math_fract_f32:
254; GFX7:       ; %bb.0: ; %entry
255; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
256; GFX7-NEXT:    v_floor_f32_e32 v3, v0
257; GFX7-NEXT:    v_sub_f32_e32 v4, v0, v3
258; GFX7-NEXT:    s_mov_b32 s8, 0x7f800000
259; GFX7-NEXT:    s_mov_b32 s6, 0
260; GFX7-NEXT:    v_min_f32_e32 v4, 0x3f7fffff, v4
261; GFX7-NEXT:    v_cmp_neq_f32_e64 vcc, |v0|, s8
262; GFX7-NEXT:    s_mov_b32 s7, 0xf000
263; GFX7-NEXT:    s_mov_b32 s4, s6
264; GFX7-NEXT:    s_mov_b32 s5, s6
265; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
266; GFX7-NEXT:    buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
267; GFX7-NEXT:    s_waitcnt vmcnt(0)
268; GFX7-NEXT:    s_setpc_b64 s[30:31]
269;
270; GFX8-LABEL: no_nan_check_math_fract_f32:
271; GFX8:       ; %bb.0: ; %entry
272; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
273; GFX8-NEXT:    v_floor_f32_e32 v3, v0
274; GFX8-NEXT:    v_sub_f32_e32 v4, v0, v3
275; GFX8-NEXT:    s_mov_b32 s4, 0x7f800000
276; GFX8-NEXT:    v_min_f32_e32 v4, 0x3f7fffff, v4
277; GFX8-NEXT:    v_cmp_neq_f32_e64 vcc, |v0|, s4
278; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
279; GFX8-NEXT:    global_store_dword v[1:2], v3, off
280; GFX8-NEXT:    s_waitcnt vmcnt(0)
281; GFX8-NEXT:    s_setpc_b64 s[30:31]
282;
283; GFX11-LABEL: no_nan_check_math_fract_f32:
284; GFX11:       ; %bb.0: ; %entry
285; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
286; GFX11-NEXT:    v_floor_f32_e32 v3, v0
287; GFX11-NEXT:    v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
288; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
289; GFX11-NEXT:    v_sub_f32_e32 v4, v0, v3
290; GFX11-NEXT:    global_store_b32 v[1:2], v3, off
291; GFX11-NEXT:    v_min_f32_e32 v4, 0x3f7fffff, v4
292; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc_lo
293; GFX11-NEXT:    s_setpc_b64 s[30:31]
294;
295; GFX12-LABEL: no_nan_check_math_fract_f32:
296; GFX12:       ; %bb.0: ; %entry
297; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
298; GFX12-NEXT:    s_wait_expcnt 0x0
299; GFX12-NEXT:    s_wait_samplecnt 0x0
300; GFX12-NEXT:    s_wait_bvhcnt 0x0
301; GFX12-NEXT:    s_wait_kmcnt 0x0
302; GFX12-NEXT:    v_floor_f32_e32 v3, v0
303; GFX12-NEXT:    v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
304; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
305; GFX12-NEXT:    v_sub_f32_e32 v4, v0, v3
306; GFX12-NEXT:    global_store_b32 v[1:2], v3, off
307; GFX12-NEXT:    v_min_num_f32_e32 v4, 0x3f7fffff, v4
308; GFX12-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc_lo
309; GFX12-NEXT:    s_setpc_b64 s[30:31]
310entry:
311  %floor = tail call float @llvm.floor.f32(float %x)
312  %sub = fsub float %x, %floor
313  %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
314  %fabs = tail call float @llvm.fabs.f32(float %x)
315  %cmpinf = fcmp oeq float %fabs, 0x7FF0000000000000
316  %cond6 = select i1 %cmpinf, float 0.000000e+00, float %min
317  store float %floor, ptr addrspace(1) %ip, align 4
318  ret float %cond6
319}
320
321define float @basic_fract_f32_nonans(float nofpclass(nan) %x) {
322; GFX6-IR-LABEL: define float @basic_fract_f32_nonans
323; GFX6-IR-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
324; GFX6-IR-NEXT:  entry:
325; GFX6-IR-NEXT:    [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
326; GFX6-IR-NEXT:    [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
327; GFX6-IR-NEXT:    [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
328; GFX6-IR-NEXT:    ret float [[MIN]]
329;
330; IR-FRACT-LABEL: define float @basic_fract_f32_nonans
331; IR-FRACT-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
332; IR-FRACT-NEXT:  entry:
333; IR-FRACT-NEXT:    [[MIN:%.*]] = call nnan float @llvm.amdgcn.fract.f32(float [[X]])
334; IR-FRACT-NEXT:    ret float [[MIN]]
335;
336; GFX6-LABEL: basic_fract_f32_nonans:
337; GFX6:       ; %bb.0: ; %entry
338; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
339; GFX6-NEXT:    v_floor_f32_e32 v1, v0
340; GFX6-NEXT:    v_sub_f32_e32 v0, v0, v1
341; GFX6-NEXT:    v_min_f32_e32 v0, 0x3f7fffff, v0
342; GFX6-NEXT:    s_setpc_b64 s[30:31]
343;
344; GFX7-LABEL: basic_fract_f32_nonans:
345; GFX7:       ; %bb.0: ; %entry
346; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
347; GFX7-NEXT:    v_fract_f32_e32 v0, v0
348; GFX7-NEXT:    s_setpc_b64 s[30:31]
349;
350; GFX8-LABEL: basic_fract_f32_nonans:
351; GFX8:       ; %bb.0: ; %entry
352; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
353; GFX8-NEXT:    v_fract_f32_e32 v0, v0
354; GFX8-NEXT:    s_setpc_b64 s[30:31]
355;
356; GFX11-LABEL: basic_fract_f32_nonans:
357; GFX11:       ; %bb.0: ; %entry
358; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
359; GFX11-NEXT:    v_fract_f32_e32 v0, v0
360; GFX11-NEXT:    s_setpc_b64 s[30:31]
361;
362; GFX12-LABEL: basic_fract_f32_nonans:
363; GFX12:       ; %bb.0: ; %entry
364; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
365; GFX12-NEXT:    s_wait_expcnt 0x0
366; GFX12-NEXT:    s_wait_samplecnt 0x0
367; GFX12-NEXT:    s_wait_bvhcnt 0x0
368; GFX12-NEXT:    s_wait_kmcnt 0x0
369; GFX12-NEXT:    v_fract_f32_e32 v0, v0
370; GFX12-NEXT:    s_setpc_b64 s[30:31]
371entry:
372  %floor = tail call float @llvm.floor.f32(float %x)
373  %sub = fsub float %x, %floor
374  %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
375  ret float %min
376}
377
378define float @basic_fract_f32_flags_minnum(float %x) {
379; IR-LABEL: define float @basic_fract_f32_flags_minnum
380; IR-SAME: (float [[X:%.*]]) #[[ATTR0]] {
381; IR-NEXT:  entry:
382; IR-NEXT:    [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
383; IR-NEXT:    [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
384; IR-NEXT:    [[MIN:%.*]] = tail call nsz float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
385; IR-NEXT:    ret float [[MIN]]
386;
387; GFX6-LABEL: basic_fract_f32_flags_minnum:
388; GFX6:       ; %bb.0: ; %entry
389; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
390; GFX6-NEXT:    v_floor_f32_e32 v1, v0
391; GFX6-NEXT:    v_sub_f32_e32 v0, v0, v1
392; GFX6-NEXT:    v_min_f32_e32 v0, 0x3f7fffff, v0
393; GFX6-NEXT:    s_setpc_b64 s[30:31]
394;
395; GFX7-LABEL: basic_fract_f32_flags_minnum:
396; GFX7:       ; %bb.0: ; %entry
397; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
398; GFX7-NEXT:    v_floor_f32_e32 v1, v0
399; GFX7-NEXT:    v_sub_f32_e32 v0, v0, v1
400; GFX7-NEXT:    v_min_f32_e32 v0, 0x3f7fffff, v0
401; GFX7-NEXT:    s_setpc_b64 s[30:31]
402;
403; GFX8-LABEL: basic_fract_f32_flags_minnum:
404; GFX8:       ; %bb.0: ; %entry
405; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
406; GFX8-NEXT:    v_floor_f32_e32 v1, v0
407; GFX8-NEXT:    v_sub_f32_e32 v0, v0, v1
408; GFX8-NEXT:    v_min_f32_e32 v0, 0x3f7fffff, v0
409; GFX8-NEXT:    s_setpc_b64 s[30:31]
410;
411; GFX11-LABEL: basic_fract_f32_flags_minnum:
412; GFX11:       ; %bb.0: ; %entry
413; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
414; GFX11-NEXT:    v_floor_f32_e32 v1, v0
415; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
416; GFX11-NEXT:    v_sub_f32_e32 v0, v0, v1
417; GFX11-NEXT:    v_min_f32_e32 v0, 0x3f7fffff, v0
418; GFX11-NEXT:    s_setpc_b64 s[30:31]
419;
420; GFX12-LABEL: basic_fract_f32_flags_minnum:
421; GFX12:       ; %bb.0: ; %entry
422; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
423; GFX12-NEXT:    s_wait_expcnt 0x0
424; GFX12-NEXT:    s_wait_samplecnt 0x0
425; GFX12-NEXT:    s_wait_bvhcnt 0x0
426; GFX12-NEXT:    s_wait_kmcnt 0x0
427; GFX12-NEXT:    v_floor_f32_e32 v1, v0
428; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
429; GFX12-NEXT:    v_sub_f32_e32 v0, v0, v1
430; GFX12-NEXT:    v_min_num_f32_e32 v0, 0x3f7fffff, v0
431; GFX12-NEXT:    s_setpc_b64 s[30:31]
432entry:
433  %floor = tail call float @llvm.floor.f32(float %x)
434  %sub = fsub float %x, %floor
435  %min = tail call nsz float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
436  ret float %min
437}
438
439define float @basic_fract_f32_flags_fsub(float nofpclass(nan) %x) {
440; GFX6-IR-LABEL: define float @basic_fract_f32_flags_fsub
441; GFX6-IR-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
442; GFX6-IR-NEXT:  entry:
443; GFX6-IR-NEXT:    [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
444; GFX6-IR-NEXT:    [[SUB:%.*]] = fsub nsz float [[X]], [[FLOOR]]
445; GFX6-IR-NEXT:    [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
446; GFX6-IR-NEXT:    ret float [[MIN]]
447;
448; IR-FRACT-LABEL: define float @basic_fract_f32_flags_fsub
449; IR-FRACT-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
450; IR-FRACT-NEXT:  entry:
451; IR-FRACT-NEXT:    [[MIN:%.*]] = call nnan float @llvm.amdgcn.fract.f32(float [[X]])
452; IR-FRACT-NEXT:    ret float [[MIN]]
453;
454; GFX6-LABEL: basic_fract_f32_flags_fsub:
455; GFX6:       ; %bb.0: ; %entry
456; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
457; GFX6-NEXT:    v_floor_f32_e32 v1, v0
458; GFX6-NEXT:    v_sub_f32_e32 v0, v0, v1
459; GFX6-NEXT:    v_min_f32_e32 v0, 0x3f7fffff, v0
460; GFX6-NEXT:    s_setpc_b64 s[30:31]
461;
462; GFX7-LABEL: basic_fract_f32_flags_fsub:
463; GFX7:       ; %bb.0: ; %entry
464; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
465; GFX7-NEXT:    v_fract_f32_e32 v0, v0
466; GFX7-NEXT:    s_setpc_b64 s[30:31]
467;
468; GFX8-LABEL: basic_fract_f32_flags_fsub:
469; GFX8:       ; %bb.0: ; %entry
470; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
471; GFX8-NEXT:    v_fract_f32_e32 v0, v0
472; GFX8-NEXT:    s_setpc_b64 s[30:31]
473;
474; GFX11-LABEL: basic_fract_f32_flags_fsub:
475; GFX11:       ; %bb.0: ; %entry
476; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
477; GFX11-NEXT:    v_fract_f32_e32 v0, v0
478; GFX11-NEXT:    s_setpc_b64 s[30:31]
479;
480; GFX12-LABEL: basic_fract_f32_flags_fsub:
481; GFX12:       ; %bb.0: ; %entry
482; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
483; GFX12-NEXT:    s_wait_expcnt 0x0
484; GFX12-NEXT:    s_wait_samplecnt 0x0
485; GFX12-NEXT:    s_wait_bvhcnt 0x0
486; GFX12-NEXT:    s_wait_kmcnt 0x0
487; GFX12-NEXT:    v_fract_f32_e32 v0, v0
488; GFX12-NEXT:    s_setpc_b64 s[30:31]
489entry:
490  %floor = tail call float @llvm.floor.f32(float %x)
491  %sub = fsub nsz float %x, %floor
492  %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
493  ret float %min
494}
495
496define <2 x float> @basic_fract_v2f32_nonans(<2 x float> nofpclass(nan) %x) {
497; GFX6-IR-LABEL: define <2 x float> @basic_fract_v2f32_nonans
498; GFX6-IR-SAME: (<2 x float> nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
499; GFX6-IR-NEXT:  entry:
500; GFX6-IR-NEXT:    [[FLOOR:%.*]] = tail call <2 x float> @llvm.floor.v2f32(<2 x float> [[X]])
501; GFX6-IR-NEXT:    [[SUB:%.*]] = fsub <2 x float> [[X]], [[FLOOR]]
502; GFX6-IR-NEXT:    [[MIN:%.*]] = tail call <2 x float> @llvm.minnum.v2f32(<2 x float> [[SUB]], <2 x float> splat (float 0x3FEFFFFFE0000000))
503; GFX6-IR-NEXT:    ret <2 x float> [[MIN]]
504;
505; IR-FRACT-LABEL: define <2 x float> @basic_fract_v2f32_nonans
506; IR-FRACT-SAME: (<2 x float> nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
507; IR-FRACT-NEXT:  entry:
508; IR-FRACT-NEXT:    [[TMP0:%.*]] = extractelement <2 x float> [[X]], i64 0
509; IR-FRACT-NEXT:    [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 1
510; IR-FRACT-NEXT:    [[TMP2:%.*]] = call nnan float @llvm.amdgcn.fract.f32(float [[TMP0]])
511; IR-FRACT-NEXT:    [[TMP3:%.*]] = call nnan float @llvm.amdgcn.fract.f32(float [[TMP1]])
512; IR-FRACT-NEXT:    [[TMP4:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i64 0
513; IR-FRACT-NEXT:    [[MIN:%.*]] = insertelement <2 x float> [[TMP4]], float [[TMP3]], i64 1
514; IR-FRACT-NEXT:    ret <2 x float> [[MIN]]
515;
516; GFX6-LABEL: basic_fract_v2f32_nonans:
517; GFX6:       ; %bb.0: ; %entry
518; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
519; GFX6-NEXT:    v_floor_f32_e32 v2, v0
520; GFX6-NEXT:    v_floor_f32_e32 v3, v1
521; GFX6-NEXT:    v_sub_f32_e32 v1, v1, v3
522; GFX6-NEXT:    v_sub_f32_e32 v0, v0, v2
523; GFX6-NEXT:    v_min_f32_e32 v0, 0x3f7fffff, v0
524; GFX6-NEXT:    v_min_f32_e32 v1, 0x3f7fffff, v1
525; GFX6-NEXT:    s_setpc_b64 s[30:31]
526;
527; GFX7-LABEL: basic_fract_v2f32_nonans:
528; GFX7:       ; %bb.0: ; %entry
529; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
530; GFX7-NEXT:    v_fract_f32_e32 v0, v0
531; GFX7-NEXT:    v_fract_f32_e32 v1, v1
532; GFX7-NEXT:    s_setpc_b64 s[30:31]
533;
534; GFX8-LABEL: basic_fract_v2f32_nonans:
535; GFX8:       ; %bb.0: ; %entry
536; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
537; GFX8-NEXT:    v_fract_f32_e32 v0, v0
538; GFX8-NEXT:    v_fract_f32_e32 v1, v1
539; GFX8-NEXT:    s_setpc_b64 s[30:31]
540;
541; GFX11-LABEL: basic_fract_v2f32_nonans:
542; GFX11:       ; %bb.0: ; %entry
543; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
544; GFX11-NEXT:    v_fract_f32_e32 v0, v0
545; GFX11-NEXT:    v_fract_f32_e32 v1, v1
546; GFX11-NEXT:    s_setpc_b64 s[30:31]
547;
548; GFX12-LABEL: basic_fract_v2f32_nonans:
549; GFX12:       ; %bb.0: ; %entry
550; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
551; GFX12-NEXT:    s_wait_expcnt 0x0
552; GFX12-NEXT:    s_wait_samplecnt 0x0
553; GFX12-NEXT:    s_wait_bvhcnt 0x0
554; GFX12-NEXT:    s_wait_kmcnt 0x0
555; GFX12-NEXT:    v_fract_f32_e32 v0, v0
556; GFX12-NEXT:    v_fract_f32_e32 v1, v1
557; GFX12-NEXT:    s_setpc_b64 s[30:31]
558entry:
559  %floor = tail call <2 x float> @llvm.floor.v2f32(<2 x float> %x)
560  %sub = fsub <2 x float> %x, %floor
561  %min = tail call <2 x float> @llvm.minnum.v2f32(<2 x float> %sub, <2 x float> <float 0x3FEFFFFFE0000000, float 0x3FEFFFFFE0000000>)
562  ret <2 x float> %min
563}
564
565define float @basic_fract_f32_multi_use_fsub_nonans(float nofpclass(nan) %x, ptr addrspace(1) %ptr) {
566; GFX6-IR-LABEL: define float @basic_fract_f32_multi_use_fsub_nonans
567; GFX6-IR-SAME: (float nofpclass(nan) [[X:%.*]], ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
568; GFX6-IR-NEXT:  entry:
569; GFX6-IR-NEXT:    [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
570; GFX6-IR-NEXT:    [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
571; GFX6-IR-NEXT:    [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
572; GFX6-IR-NEXT:    store float [[SUB]], ptr addrspace(1) [[PTR]], align 4
573; GFX6-IR-NEXT:    ret float [[MIN]]
574;
575; IR-FRACT-LABEL: define float @basic_fract_f32_multi_use_fsub_nonans
576; IR-FRACT-SAME: (float nofpclass(nan) [[X:%.*]], ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
577; IR-FRACT-NEXT:  entry:
578; IR-FRACT-NEXT:    [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
579; IR-FRACT-NEXT:    [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
580; IR-FRACT-NEXT:    [[MIN:%.*]] = call nnan float @llvm.amdgcn.fract.f32(float [[X]])
581; IR-FRACT-NEXT:    store float [[SUB]], ptr addrspace(1) [[PTR]], align 4
582; IR-FRACT-NEXT:    ret float [[MIN]]
583;
584; GFX6-LABEL: basic_fract_f32_multi_use_fsub_nonans:
585; GFX6:       ; %bb.0: ; %entry
586; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
587; GFX6-NEXT:    v_floor_f32_e32 v3, v0
588; GFX6-NEXT:    s_mov_b32 s6, 0
589; GFX6-NEXT:    v_sub_f32_e32 v3, v0, v3
590; GFX6-NEXT:    s_mov_b32 s7, 0xf000
591; GFX6-NEXT:    s_mov_b32 s4, s6
592; GFX6-NEXT:    s_mov_b32 s5, s6
593; GFX6-NEXT:    v_min_f32_e32 v0, 0x3f7fffff, v3
594; GFX6-NEXT:    buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
595; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
596; GFX6-NEXT:    s_setpc_b64 s[30:31]
597;
598; GFX7-LABEL: basic_fract_f32_multi_use_fsub_nonans:
599; GFX7:       ; %bb.0: ; %entry
600; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
601; GFX7-NEXT:    s_mov_b32 s6, 0
602; GFX7-NEXT:    v_floor_f32_e32 v3, v0
603; GFX7-NEXT:    s_mov_b32 s7, 0xf000
604; GFX7-NEXT:    s_mov_b32 s4, s6
605; GFX7-NEXT:    s_mov_b32 s5, s6
606; GFX7-NEXT:    v_sub_f32_e32 v3, v0, v3
607; GFX7-NEXT:    v_fract_f32_e32 v0, v0
608; GFX7-NEXT:    buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
609; GFX7-NEXT:    s_waitcnt vmcnt(0)
610; GFX7-NEXT:    s_setpc_b64 s[30:31]
611;
612; GFX8-LABEL: basic_fract_f32_multi_use_fsub_nonans:
613; GFX8:       ; %bb.0: ; %entry
614; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
615; GFX8-NEXT:    v_floor_f32_e32 v3, v0
616; GFX8-NEXT:    v_sub_f32_e32 v3, v0, v3
617; GFX8-NEXT:    v_fract_f32_e32 v0, v0
618; GFX8-NEXT:    global_store_dword v[1:2], v3, off
619; GFX8-NEXT:    s_waitcnt vmcnt(0)
620; GFX8-NEXT:    s_setpc_b64 s[30:31]
621;
622; GFX11-LABEL: basic_fract_f32_multi_use_fsub_nonans:
623; GFX11:       ; %bb.0: ; %entry
624; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
625; GFX11-NEXT:    v_floor_f32_e32 v3, v0
626; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
627; GFX11-NEXT:    v_sub_f32_e32 v3, v0, v3
628; GFX11-NEXT:    v_fract_f32_e32 v0, v0
629; GFX11-NEXT:    global_store_b32 v[1:2], v3, off
630; GFX11-NEXT:    s_setpc_b64 s[30:31]
631;
632; GFX12-LABEL: basic_fract_f32_multi_use_fsub_nonans:
633; GFX12:       ; %bb.0: ; %entry
634; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
635; GFX12-NEXT:    s_wait_expcnt 0x0
636; GFX12-NEXT:    s_wait_samplecnt 0x0
637; GFX12-NEXT:    s_wait_bvhcnt 0x0
638; GFX12-NEXT:    s_wait_kmcnt 0x0
639; GFX12-NEXT:    v_floor_f32_e32 v3, v0
640; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
641; GFX12-NEXT:    v_sub_f32_e32 v3, v0, v3
642; GFX12-NEXT:    v_fract_f32_e32 v0, v0
643; GFX12-NEXT:    global_store_b32 v[1:2], v3, off
644; GFX12-NEXT:    s_setpc_b64 s[30:31]
645entry:
646  %floor = tail call float @llvm.floor.f32(float %x)
647  %sub = fsub float %x, %floor
648  %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
649  store float %sub, ptr addrspace(1) %ptr
650  ret float %min
651}
652
653define float @nnan_minnum_fract_f32(float %x) {
654; GFX6-IR-LABEL: define float @nnan_minnum_fract_f32
655; GFX6-IR-SAME: (float [[X:%.*]]) #[[ATTR0]] {
656; GFX6-IR-NEXT:  entry:
657; GFX6-IR-NEXT:    [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
658; GFX6-IR-NEXT:    [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
659; GFX6-IR-NEXT:    [[MIN:%.*]] = tail call nnan float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
660; GFX6-IR-NEXT:    ret float [[MIN]]
661;
662; IR-FRACT-LABEL: define float @nnan_minnum_fract_f32
663; IR-FRACT-SAME: (float [[X:%.*]]) #[[ATTR0]] {
664; IR-FRACT-NEXT:  entry:
665; IR-FRACT-NEXT:    [[MIN:%.*]] = call nnan float @llvm.amdgcn.fract.f32(float [[X]])
666; IR-FRACT-NEXT:    ret float [[MIN]]
667;
668; GFX6-LABEL: nnan_minnum_fract_f32:
669; GFX6:       ; %bb.0: ; %entry
670; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
671; GFX6-NEXT:    v_floor_f32_e32 v1, v0
672; GFX6-NEXT:    v_sub_f32_e32 v0, v0, v1
673; GFX6-NEXT:    v_min_f32_e32 v0, 0x3f7fffff, v0
674; GFX6-NEXT:    s_setpc_b64 s[30:31]
675;
676; GFX7-LABEL: nnan_minnum_fract_f32:
677; GFX7:       ; %bb.0: ; %entry
678; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
679; GFX7-NEXT:    v_fract_f32_e32 v0, v0
680; GFX7-NEXT:    s_setpc_b64 s[30:31]
681;
682; GFX8-LABEL: nnan_minnum_fract_f32:
683; GFX8:       ; %bb.0: ; %entry
684; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
685; GFX8-NEXT:    v_fract_f32_e32 v0, v0
686; GFX8-NEXT:    s_setpc_b64 s[30:31]
687;
688; GFX11-LABEL: nnan_minnum_fract_f32:
689; GFX11:       ; %bb.0: ; %entry
690; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
691; GFX11-NEXT:    v_fract_f32_e32 v0, v0
692; GFX11-NEXT:    s_setpc_b64 s[30:31]
693;
694; GFX12-LABEL: nnan_minnum_fract_f32:
695; GFX12:       ; %bb.0: ; %entry
696; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
697; GFX12-NEXT:    s_wait_expcnt 0x0
698; GFX12-NEXT:    s_wait_samplecnt 0x0
699; GFX12-NEXT:    s_wait_bvhcnt 0x0
700; GFX12-NEXT:    s_wait_kmcnt 0x0
701; GFX12-NEXT:    v_fract_f32_e32 v0, v0
702; GFX12-NEXT:    s_setpc_b64 s[30:31]
703entry:
704  %floor = tail call float @llvm.floor.f32(float %x)
705  %sub = fsub float %x, %floor
706  %min = tail call nnan float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
707  ret float %min
708}
709
710; TODO: Could match if we checked isKnownNeverNaN on the minnum src
711; instead of the pattern input source.
712define float @nnan_fsub_fract_f32(float %x) {
713; IR-LABEL: define float @nnan_fsub_fract_f32
714; IR-SAME: (float [[X:%.*]]) #[[ATTR0]] {
715; IR-NEXT:  entry:
716; IR-NEXT:    [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
717; IR-NEXT:    [[SUB:%.*]] = fsub nnan float [[X]], [[FLOOR]]
718; IR-NEXT:    [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
719; IR-NEXT:    ret float [[MIN]]
720;
721; GFX6-LABEL: nnan_fsub_fract_f32:
722; GFX6:       ; %bb.0: ; %entry
723; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
724; GFX6-NEXT:    v_floor_f32_e32 v1, v0
725; GFX6-NEXT:    v_sub_f32_e32 v0, v0, v1
726; GFX6-NEXT:    v_min_f32_e32 v0, 0x3f7fffff, v0
727; GFX6-NEXT:    s_setpc_b64 s[30:31]
728;
729; GFX7-LABEL: nnan_fsub_fract_f32:
730; GFX7:       ; %bb.0: ; %entry
731; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
732; GFX7-NEXT:    v_floor_f32_e32 v1, v0
733; GFX7-NEXT:    v_sub_f32_e32 v0, v0, v1
734; GFX7-NEXT:    v_min_f32_e32 v0, 0x3f7fffff, v0
735; GFX7-NEXT:    s_setpc_b64 s[30:31]
736;
737; GFX8-LABEL: nnan_fsub_fract_f32:
738; GFX8:       ; %bb.0: ; %entry
739; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
740; GFX8-NEXT:    v_floor_f32_e32 v1, v0
741; GFX8-NEXT:    v_sub_f32_e32 v0, v0, v1
742; GFX8-NEXT:    v_min_f32_e32 v0, 0x3f7fffff, v0
743; GFX8-NEXT:    s_setpc_b64 s[30:31]
744;
745; GFX11-LABEL: nnan_fsub_fract_f32:
746; GFX11:       ; %bb.0: ; %entry
747; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
748; GFX11-NEXT:    v_floor_f32_e32 v1, v0
749; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
750; GFX11-NEXT:    v_sub_f32_e32 v0, v0, v1
751; GFX11-NEXT:    v_min_f32_e32 v0, 0x3f7fffff, v0
752; GFX11-NEXT:    s_setpc_b64 s[30:31]
753;
754; GFX12-LABEL: nnan_fsub_fract_f32:
755; GFX12:       ; %bb.0: ; %entry
756; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
757; GFX12-NEXT:    s_wait_expcnt 0x0
758; GFX12-NEXT:    s_wait_samplecnt 0x0
759; GFX12-NEXT:    s_wait_bvhcnt 0x0
760; GFX12-NEXT:    s_wait_kmcnt 0x0
761; GFX12-NEXT:    v_floor_f32_e32 v1, v0
762; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
763; GFX12-NEXT:    v_sub_f32_e32 v0, v0, v1
764; GFX12-NEXT:    v_min_num_f32_e32 v0, 0x3f7fffff, v0
765; GFX12-NEXT:    s_setpc_b64 s[30:31]
766entry:
767  %floor = tail call float @llvm.floor.f32(float %x)
768  %sub = fsub nnan float %x, %floor
769  %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
770  ret float %min
771}
772
773define float @nnan_floor_fract_f32(float %x) {
774; IR-LABEL: define float @nnan_floor_fract_f32
775; IR-SAME: (float [[X:%.*]]) #[[ATTR0]] {
776; IR-NEXT:  entry:
777; IR-NEXT:    [[FLOOR:%.*]] = tail call nnan float @llvm.floor.f32(float [[X]])
778; IR-NEXT:    [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
779; IR-NEXT:    [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
780; IR-NEXT:    ret float [[MIN]]
781;
782; GFX6-LABEL: nnan_floor_fract_f32:
783; GFX6:       ; %bb.0: ; %entry
784; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
785; GFX6-NEXT:    v_floor_f32_e32 v1, v0
786; GFX6-NEXT:    v_sub_f32_e32 v0, v0, v1
787; GFX6-NEXT:    v_min_f32_e32 v0, 0x3f7fffff, v0
788; GFX6-NEXT:    s_setpc_b64 s[30:31]
789;
790; GFX7-LABEL: nnan_floor_fract_f32:
791; GFX7:       ; %bb.0: ; %entry
792; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
793; GFX7-NEXT:    v_floor_f32_e32 v1, v0
794; GFX7-NEXT:    v_sub_f32_e32 v0, v0, v1
795; GFX7-NEXT:    v_min_f32_e32 v0, 0x3f7fffff, v0
796; GFX7-NEXT:    s_setpc_b64 s[30:31]
797;
798; GFX8-LABEL: nnan_floor_fract_f32:
799; GFX8:       ; %bb.0: ; %entry
800; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
801; GFX8-NEXT:    v_floor_f32_e32 v1, v0
802; GFX8-NEXT:    v_sub_f32_e32 v0, v0, v1
803; GFX8-NEXT:    v_min_f32_e32 v0, 0x3f7fffff, v0
804; GFX8-NEXT:    s_setpc_b64 s[30:31]
805;
806; GFX11-LABEL: nnan_floor_fract_f32:
807; GFX11:       ; %bb.0: ; %entry
808; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
809; GFX11-NEXT:    v_floor_f32_e32 v1, v0
810; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
811; GFX11-NEXT:    v_sub_f32_e32 v0, v0, v1
812; GFX11-NEXT:    v_min_f32_e32 v0, 0x3f7fffff, v0
813; GFX11-NEXT:    s_setpc_b64 s[30:31]
814;
815; GFX12-LABEL: nnan_floor_fract_f32:
816; GFX12:       ; %bb.0: ; %entry
817; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
818; GFX12-NEXT:    s_wait_expcnt 0x0
819; GFX12-NEXT:    s_wait_samplecnt 0x0
820; GFX12-NEXT:    s_wait_bvhcnt 0x0
821; GFX12-NEXT:    s_wait_kmcnt 0x0
822; GFX12-NEXT:    v_floor_f32_e32 v1, v0
823; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
824; GFX12-NEXT:    v_sub_f32_e32 v0, v0, v1
825; GFX12-NEXT:    v_min_num_f32_e32 v0, 0x3f7fffff, v0
826; GFX12-NEXT:    s_setpc_b64 s[30:31]
827entry:
828  %floor = tail call nnan float @llvm.floor.f32(float %x)
829  %sub = fsub float %x, %floor
830  %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
831  ret float %min
832}
833
834define float @nnan_src_fract_f32(float nofpclass(nan) %x) {
835; GFX6-IR-LABEL: define float @nnan_src_fract_f32
836; GFX6-IR-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
837; GFX6-IR-NEXT:  entry:
838; GFX6-IR-NEXT:    [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
839; GFX6-IR-NEXT:    [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
840; GFX6-IR-NEXT:    [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
841; GFX6-IR-NEXT:    ret float [[MIN]]
842;
843; IR-FRACT-LABEL: define float @nnan_src_fract_f32
844; IR-FRACT-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
845; IR-FRACT-NEXT:  entry:
846; IR-FRACT-NEXT:    [[MIN:%.*]] = call nnan float @llvm.amdgcn.fract.f32(float [[X]])
847; IR-FRACT-NEXT:    ret float [[MIN]]
848;
849; GFX6-LABEL: nnan_src_fract_f32:
850; GFX6:       ; %bb.0: ; %entry
851; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
852; GFX6-NEXT:    v_floor_f32_e32 v1, v0
853; GFX6-NEXT:    v_sub_f32_e32 v0, v0, v1
854; GFX6-NEXT:    v_min_f32_e32 v0, 0x3f7fffff, v0
855; GFX6-NEXT:    s_setpc_b64 s[30:31]
856;
857; GFX7-LABEL: nnan_src_fract_f32:
858; GFX7:       ; %bb.0: ; %entry
859; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
860; GFX7-NEXT:    v_fract_f32_e32 v0, v0
861; GFX7-NEXT:    s_setpc_b64 s[30:31]
862;
863; GFX8-LABEL: nnan_src_fract_f32:
864; GFX8:       ; %bb.0: ; %entry
865; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
866; GFX8-NEXT:    v_fract_f32_e32 v0, v0
867; GFX8-NEXT:    s_setpc_b64 s[30:31]
868;
869; GFX11-LABEL: nnan_src_fract_f32:
870; GFX11:       ; %bb.0: ; %entry
871; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
872; GFX11-NEXT:    v_fract_f32_e32 v0, v0
873; GFX11-NEXT:    s_setpc_b64 s[30:31]
874;
875; GFX12-LABEL: nnan_src_fract_f32:
876; GFX12:       ; %bb.0: ; %entry
877; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
878; GFX12-NEXT:    s_wait_expcnt 0x0
879; GFX12-NEXT:    s_wait_samplecnt 0x0
880; GFX12-NEXT:    s_wait_bvhcnt 0x0
881; GFX12-NEXT:    s_wait_kmcnt 0x0
882; GFX12-NEXT:    v_fract_f32_e32 v0, v0
883; GFX12-NEXT:    s_setpc_b64 s[30:31]
884entry:
885  %floor = tail call float @llvm.floor.f32(float %x)
886  %sub = fsub float %x, %floor
887  %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
888  ret float %min
889}
890
891; Negative test
892define float @not_fract_f32_wrong_const(float nofpclass(nan) %x) {
893; IR-LABEL: define float @not_fract_f32_wrong_const
894; IR-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
895; IR-NEXT:  entry:
896; IR-NEXT:    [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
897; IR-NEXT:    [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
898; IR-NEXT:    [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFC0000000)
899; IR-NEXT:    ret float [[MIN]]
900;
901; GFX6-LABEL: not_fract_f32_wrong_const:
902; GFX6:       ; %bb.0: ; %entry
903; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
904; GFX6-NEXT:    v_floor_f32_e32 v1, v0
905; GFX6-NEXT:    v_sub_f32_e32 v0, v0, v1
906; GFX6-NEXT:    v_min_f32_e32 v0, 0x3f7ffffe, v0
907; GFX6-NEXT:    s_setpc_b64 s[30:31]
908;
909; GFX7-LABEL: not_fract_f32_wrong_const:
910; GFX7:       ; %bb.0: ; %entry
911; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
912; GFX7-NEXT:    v_floor_f32_e32 v1, v0
913; GFX7-NEXT:    v_sub_f32_e32 v0, v0, v1
914; GFX7-NEXT:    v_min_f32_e32 v0, 0x3f7ffffe, v0
915; GFX7-NEXT:    s_setpc_b64 s[30:31]
916;
917; GFX8-LABEL: not_fract_f32_wrong_const:
918; GFX8:       ; %bb.0: ; %entry
919; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
920; GFX8-NEXT:    v_floor_f32_e32 v1, v0
921; GFX8-NEXT:    v_sub_f32_e32 v0, v0, v1
922; GFX8-NEXT:    v_min_f32_e32 v0, 0x3f7ffffe, v0
923; GFX8-NEXT:    s_setpc_b64 s[30:31]
924;
925; GFX11-LABEL: not_fract_f32_wrong_const:
926; GFX11:       ; %bb.0: ; %entry
927; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
928; GFX11-NEXT:    v_floor_f32_e32 v1, v0
929; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
930; GFX11-NEXT:    v_sub_f32_e32 v0, v0, v1
931; GFX11-NEXT:    v_min_f32_e32 v0, 0x3f7ffffe, v0
932; GFX11-NEXT:    s_setpc_b64 s[30:31]
933;
934; GFX12-LABEL: not_fract_f32_wrong_const:
935; GFX12:       ; %bb.0: ; %entry
936; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
937; GFX12-NEXT:    s_wait_expcnt 0x0
938; GFX12-NEXT:    s_wait_samplecnt 0x0
939; GFX12-NEXT:    s_wait_bvhcnt 0x0
940; GFX12-NEXT:    s_wait_kmcnt 0x0
941; GFX12-NEXT:    v_floor_f32_e32 v1, v0
942; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
943; GFX12-NEXT:    v_sub_f32_e32 v0, v0, v1
944; GFX12-NEXT:    v_min_num_f32_e32 v0, 0x3f7ffffe, v0
945; GFX12-NEXT:    s_setpc_b64 s[30:31]
946entry:
947  %floor = tail call float @llvm.floor.f32(float %x)
948  %sub = fsub float %x, %floor
949  %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFC0000000)
950  ret float %min
951}
952
953; Negative test
954define float @not_fract_f32_swapped_fsub(float nofpclass(nan) %x) {
955; IR-LABEL: define float @not_fract_f32_swapped_fsub
956; IR-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
957; IR-NEXT:  entry:
958; IR-NEXT:    [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
959; IR-NEXT:    [[SUB:%.*]] = fsub float [[FLOOR]], [[X]]
960; IR-NEXT:    [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
961; IR-NEXT:    ret float [[MIN]]
962;
963; GFX6-LABEL: not_fract_f32_swapped_fsub:
964; GFX6:       ; %bb.0: ; %entry
965; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
966; GFX6-NEXT:    v_floor_f32_e32 v1, v0
967; GFX6-NEXT:    v_sub_f32_e32 v0, v1, v0
968; GFX6-NEXT:    v_min_f32_e32 v0, 0x3f7fffff, v0
969; GFX6-NEXT:    s_setpc_b64 s[30:31]
970;
971; GFX7-LABEL: not_fract_f32_swapped_fsub:
972; GFX7:       ; %bb.0: ; %entry
973; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
974; GFX7-NEXT:    v_floor_f32_e32 v1, v0
975; GFX7-NEXT:    v_sub_f32_e32 v0, v1, v0
976; GFX7-NEXT:    v_min_f32_e32 v0, 0x3f7fffff, v0
977; GFX7-NEXT:    s_setpc_b64 s[30:31]
978;
979; GFX8-LABEL: not_fract_f32_swapped_fsub:
980; GFX8:       ; %bb.0: ; %entry
981; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
982; GFX8-NEXT:    v_floor_f32_e32 v1, v0
983; GFX8-NEXT:    v_sub_f32_e32 v0, v1, v0
984; GFX8-NEXT:    v_min_f32_e32 v0, 0x3f7fffff, v0
985; GFX8-NEXT:    s_setpc_b64 s[30:31]
986;
987; GFX11-LABEL: not_fract_f32_swapped_fsub:
988; GFX11:       ; %bb.0: ; %entry
989; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
990; GFX11-NEXT:    v_floor_f32_e32 v1, v0
991; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
992; GFX11-NEXT:    v_sub_f32_e32 v0, v1, v0
993; GFX11-NEXT:    v_min_f32_e32 v0, 0x3f7fffff, v0
994; GFX11-NEXT:    s_setpc_b64 s[30:31]
995;
996; GFX12-LABEL: not_fract_f32_swapped_fsub:
997; GFX12:       ; %bb.0: ; %entry
998; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
999; GFX12-NEXT:    s_wait_expcnt 0x0
1000; GFX12-NEXT:    s_wait_samplecnt 0x0
1001; GFX12-NEXT:    s_wait_bvhcnt 0x0
1002; GFX12-NEXT:    s_wait_kmcnt 0x0
1003; GFX12-NEXT:    v_floor_f32_e32 v1, v0
1004; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1005; GFX12-NEXT:    v_sub_f32_e32 v0, v1, v0
1006; GFX12-NEXT:    v_min_num_f32_e32 v0, 0x3f7fffff, v0
1007; GFX12-NEXT:    s_setpc_b64 s[30:31]
1008entry:
1009  %floor = tail call float @llvm.floor.f32(float %x)
1010  %sub = fsub float %floor, %x
1011  %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
1012  ret float %min
1013}
1014
1015; Negative test
1016define float @not_fract_f32_not_floor(float nofpclass(nan) %x) {
1017; IR-LABEL: define float @not_fract_f32_not_floor
1018; IR-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
1019; IR-NEXT:  entry:
1020; IR-NEXT:    [[FLOOR:%.*]] = tail call float @llvm.trunc.f32(float [[X]])
1021; IR-NEXT:    [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
1022; IR-NEXT:    [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
1023; IR-NEXT:    ret float [[MIN]]
1024;
1025; GFX6-LABEL: not_fract_f32_not_floor:
1026; GFX6:       ; %bb.0: ; %entry
1027; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1028; GFX6-NEXT:    v_trunc_f32_e32 v1, v0
1029; GFX6-NEXT:    v_sub_f32_e32 v0, v0, v1
1030; GFX6-NEXT:    v_min_f32_e32 v0, 0x3f7fffff, v0
1031; GFX6-NEXT:    s_setpc_b64 s[30:31]
1032;
1033; GFX7-LABEL: not_fract_f32_not_floor:
1034; GFX7:       ; %bb.0: ; %entry
1035; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1036; GFX7-NEXT:    v_trunc_f32_e32 v1, v0
1037; GFX7-NEXT:    v_sub_f32_e32 v0, v0, v1
1038; GFX7-NEXT:    v_min_f32_e32 v0, 0x3f7fffff, v0
1039; GFX7-NEXT:    s_setpc_b64 s[30:31]
1040;
1041; GFX8-LABEL: not_fract_f32_not_floor:
1042; GFX8:       ; %bb.0: ; %entry
1043; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1044; GFX8-NEXT:    v_trunc_f32_e32 v1, v0
1045; GFX8-NEXT:    v_sub_f32_e32 v0, v0, v1
1046; GFX8-NEXT:    v_min_f32_e32 v0, 0x3f7fffff, v0
1047; GFX8-NEXT:    s_setpc_b64 s[30:31]
1048;
1049; GFX11-LABEL: not_fract_f32_not_floor:
1050; GFX11:       ; %bb.0: ; %entry
1051; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1052; GFX11-NEXT:    v_trunc_f32_e32 v1, v0
1053; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1054; GFX11-NEXT:    v_sub_f32_e32 v0, v0, v1
1055; GFX11-NEXT:    v_min_f32_e32 v0, 0x3f7fffff, v0
1056; GFX11-NEXT:    s_setpc_b64 s[30:31]
1057;
1058; GFX12-LABEL: not_fract_f32_not_floor:
1059; GFX12:       ; %bb.0: ; %entry
1060; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1061; GFX12-NEXT:    s_wait_expcnt 0x0
1062; GFX12-NEXT:    s_wait_samplecnt 0x0
1063; GFX12-NEXT:    s_wait_bvhcnt 0x0
1064; GFX12-NEXT:    s_wait_kmcnt 0x0
1065; GFX12-NEXT:    v_trunc_f32_e32 v1, v0
1066; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1067; GFX12-NEXT:    v_sub_f32_e32 v0, v0, v1
1068; GFX12-NEXT:    v_min_num_f32_e32 v0, 0x3f7fffff, v0
1069; GFX12-NEXT:    s_setpc_b64 s[30:31]
1070entry:
1071  %floor = tail call float @llvm.trunc.f32(float %x)
1072  %sub = fsub float %x, %floor
1073  %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
1074  ret float %min
1075}
1076
1077; Negative test
1078define float @not_fract_f32_different_floor(float %x, float %y) {
1079; IR-LABEL: define float @not_fract_f32_different_floor
1080; IR-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR0]] {
1081; IR-NEXT:  entry:
1082; IR-NEXT:    [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[Y]])
1083; IR-NEXT:    [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
1084; IR-NEXT:    [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
1085; IR-NEXT:    ret float [[MIN]]
1086;
1087; GFX6-LABEL: not_fract_f32_different_floor:
1088; GFX6:       ; %bb.0: ; %entry
1089; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1090; GFX6-NEXT:    v_floor_f32_e32 v1, v1
1091; GFX6-NEXT:    v_sub_f32_e32 v0, v0, v1
1092; GFX6-NEXT:    v_min_f32_e32 v0, 0x3f7fffff, v0
1093; GFX6-NEXT:    s_setpc_b64 s[30:31]
1094;
1095; GFX7-LABEL: not_fract_f32_different_floor:
1096; GFX7:       ; %bb.0: ; %entry
1097; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1098; GFX7-NEXT:    v_floor_f32_e32 v1, v1
1099; GFX7-NEXT:    v_sub_f32_e32 v0, v0, v1
1100; GFX7-NEXT:    v_min_f32_e32 v0, 0x3f7fffff, v0
1101; GFX7-NEXT:    s_setpc_b64 s[30:31]
1102;
1103; GFX8-LABEL: not_fract_f32_different_floor:
1104; GFX8:       ; %bb.0: ; %entry
1105; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1106; GFX8-NEXT:    v_floor_f32_e32 v1, v1
1107; GFX8-NEXT:    v_sub_f32_e32 v0, v0, v1
1108; GFX8-NEXT:    v_min_f32_e32 v0, 0x3f7fffff, v0
1109; GFX8-NEXT:    s_setpc_b64 s[30:31]
1110;
1111; GFX11-LABEL: not_fract_f32_different_floor:
1112; GFX11:       ; %bb.0: ; %entry
1113; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1114; GFX11-NEXT:    v_floor_f32_e32 v1, v1
1115; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1116; GFX11-NEXT:    v_sub_f32_e32 v0, v0, v1
1117; GFX11-NEXT:    v_min_f32_e32 v0, 0x3f7fffff, v0
1118; GFX11-NEXT:    s_setpc_b64 s[30:31]
1119;
1120; GFX12-LABEL: not_fract_f32_different_floor:
1121; GFX12:       ; %bb.0: ; %entry
1122; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1123; GFX12-NEXT:    s_wait_expcnt 0x0
1124; GFX12-NEXT:    s_wait_samplecnt 0x0
1125; GFX12-NEXT:    s_wait_bvhcnt 0x0
1126; GFX12-NEXT:    s_wait_kmcnt 0x0
1127; GFX12-NEXT:    v_floor_f32_e32 v1, v1
1128; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1129; GFX12-NEXT:    v_sub_f32_e32 v0, v0, v1
1130; GFX12-NEXT:    v_min_num_f32_e32 v0, 0x3f7fffff, v0
1131; GFX12-NEXT:    s_setpc_b64 s[30:31]
1132entry:
1133  %floor = tail call float @llvm.floor.f32(float %y)
1134  %sub = fsub float %x, %floor
1135  %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
1136  ret float %min
1137}
1138
1139; Negative test
1140define float @not_fract_f32_maxnum(float nofpclass(nan) %x) {
1141; IR-LABEL: define float @not_fract_f32_maxnum
1142; IR-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
1143; IR-NEXT:  entry:
1144; IR-NEXT:    [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
1145; IR-NEXT:    [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
1146; IR-NEXT:    [[MIN:%.*]] = tail call float @llvm.maxnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
1147; IR-NEXT:    ret float [[MIN]]
1148;
1149; GFX6-LABEL: not_fract_f32_maxnum:
1150; GFX6:       ; %bb.0: ; %entry
1151; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1152; GFX6-NEXT:    v_floor_f32_e32 v1, v0
1153; GFX6-NEXT:    v_sub_f32_e32 v0, v0, v1
1154; GFX6-NEXT:    v_max_f32_e32 v0, 0x3f7fffff, v0
1155; GFX6-NEXT:    s_setpc_b64 s[30:31]
1156;
1157; GFX7-LABEL: not_fract_f32_maxnum:
1158; GFX7:       ; %bb.0: ; %entry
1159; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1160; GFX7-NEXT:    v_floor_f32_e32 v1, v0
1161; GFX7-NEXT:    v_sub_f32_e32 v0, v0, v1
1162; GFX7-NEXT:    v_max_f32_e32 v0, 0x3f7fffff, v0
1163; GFX7-NEXT:    s_setpc_b64 s[30:31]
1164;
1165; GFX8-LABEL: not_fract_f32_maxnum:
1166; GFX8:       ; %bb.0: ; %entry
1167; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1168; GFX8-NEXT:    v_floor_f32_e32 v1, v0
1169; GFX8-NEXT:    v_sub_f32_e32 v0, v0, v1
1170; GFX8-NEXT:    v_max_f32_e32 v0, 0x3f7fffff, v0
1171; GFX8-NEXT:    s_setpc_b64 s[30:31]
1172;
1173; GFX11-LABEL: not_fract_f32_maxnum:
1174; GFX11:       ; %bb.0: ; %entry
1175; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1176; GFX11-NEXT:    v_floor_f32_e32 v1, v0
1177; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1178; GFX11-NEXT:    v_sub_f32_e32 v0, v0, v1
1179; GFX11-NEXT:    v_max_f32_e32 v0, 0x3f7fffff, v0
1180; GFX11-NEXT:    s_setpc_b64 s[30:31]
1181;
1182; GFX12-LABEL: not_fract_f32_maxnum:
1183; GFX12:       ; %bb.0: ; %entry
1184; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1185; GFX12-NEXT:    s_wait_expcnt 0x0
1186; GFX12-NEXT:    s_wait_samplecnt 0x0
1187; GFX12-NEXT:    s_wait_bvhcnt 0x0
1188; GFX12-NEXT:    s_wait_kmcnt 0x0
1189; GFX12-NEXT:    v_floor_f32_e32 v1, v0
1190; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1191; GFX12-NEXT:    v_sub_f32_e32 v0, v0, v1
1192; GFX12-NEXT:    v_max_num_f32_e32 v0, 0x3f7fffff, v0
1193; GFX12-NEXT:    s_setpc_b64 s[30:31]
1194entry:
1195  %floor = tail call float @llvm.floor.f32(float %x)
1196  %sub = fsub float %x, %floor
1197  %min = tail call float @llvm.maxnum.f32(float %sub, float 0x3FEFFFFFE0000000)
1198  ret float %min
1199}
1200
1201define float @fcmp_uno_check_is_nan_f32(float %x) {
1202; IR-LABEL: define float @fcmp_uno_check_is_nan_f32
1203; IR-SAME: (float [[X:%.*]]) #[[ATTR0]] {
1204; IR-NEXT:  entry:
1205; IR-NEXT:    [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
1206; IR-NEXT:    [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
1207; IR-NEXT:    [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
1208; IR-NEXT:    [[UNO:%.*]] = fcmp uno float [[X]], 0x7FF8000000000000
1209; IR-NEXT:    [[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]]
1210; IR-NEXT:    ret float [[COND]]
1211;
1212; GCN-LABEL: fcmp_uno_check_is_nan_f32:
1213; GCN:       ; %bb.0: ; %entry
1214; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1215; GCN-NEXT:    s_setpc_b64 s[30:31]
1216;
1217; GFX12-LABEL: fcmp_uno_check_is_nan_f32:
1218; GFX12:       ; %bb.0: ; %entry
1219; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1220; GFX12-NEXT:    s_wait_expcnt 0x0
1221; GFX12-NEXT:    s_wait_samplecnt 0x0
1222; GFX12-NEXT:    s_wait_bvhcnt 0x0
1223; GFX12-NEXT:    s_wait_kmcnt 0x0
1224; GFX12-NEXT:    s_setpc_b64 s[30:31]
1225entry:
1226  %floor = tail call float @llvm.floor.f32(float %x)
1227  %sub = fsub float %x, %floor
1228  %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
1229  %uno = fcmp uno float %x, 0x7FF8000000000000
1230  %cond = select i1 %uno, float %x, float %min
1231  ret float %cond
1232}
1233
1234; No inf check
1235define float @select_nan_fract_f32(float %x) {
1236; GFX6-IR-LABEL: define float @select_nan_fract_f32
1237; GFX6-IR-SAME: (float [[X:%.*]]) #[[ATTR0]] {
1238; GFX6-IR-NEXT:  entry:
1239; GFX6-IR-NEXT:    [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
1240; GFX6-IR-NEXT:    [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
1241; GFX6-IR-NEXT:    [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
1242; GFX6-IR-NEXT:    [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00
1243; GFX6-IR-NEXT:    [[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]]
1244; GFX6-IR-NEXT:    ret float [[COND]]
1245;
1246; IR-FRACT-LABEL: define float @select_nan_fract_f32
1247; IR-FRACT-SAME: (float [[X:%.*]]) #[[ATTR0]] {
1248; IR-FRACT-NEXT:  entry:
1249; IR-FRACT-NEXT:    [[COND:%.*]] = call float @llvm.amdgcn.fract.f32(float [[X]])
1250; IR-FRACT-NEXT:    ret float [[COND]]
1251;
1252; GFX6-LABEL: select_nan_fract_f32:
1253; GFX6:       ; %bb.0: ; %entry
1254; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1255; GFX6-NEXT:    v_floor_f32_e32 v1, v0
1256; GFX6-NEXT:    v_sub_f32_e32 v1, v0, v1
1257; GFX6-NEXT:    v_min_f32_e32 v1, 0x3f7fffff, v1
1258; GFX6-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
1259; GFX6-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
1260; GFX6-NEXT:    s_setpc_b64 s[30:31]
1261;
1262; GFX7-LABEL: select_nan_fract_f32:
1263; GFX7:       ; %bb.0: ; %entry
1264; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1265; GFX7-NEXT:    v_fract_f32_e32 v0, v0
1266; GFX7-NEXT:    s_setpc_b64 s[30:31]
1267;
1268; GFX8-LABEL: select_nan_fract_f32:
1269; GFX8:       ; %bb.0: ; %entry
1270; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1271; GFX8-NEXT:    v_fract_f32_e32 v0, v0
1272; GFX8-NEXT:    s_setpc_b64 s[30:31]
1273;
1274; GFX11-LABEL: select_nan_fract_f32:
1275; GFX11:       ; %bb.0: ; %entry
1276; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1277; GFX11-NEXT:    v_fract_f32_e32 v0, v0
1278; GFX11-NEXT:    s_setpc_b64 s[30:31]
1279;
1280; GFX12-LABEL: select_nan_fract_f32:
1281; GFX12:       ; %bb.0: ; %entry
1282; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1283; GFX12-NEXT:    s_wait_expcnt 0x0
1284; GFX12-NEXT:    s_wait_samplecnt 0x0
1285; GFX12-NEXT:    s_wait_bvhcnt 0x0
1286; GFX12-NEXT:    s_wait_kmcnt 0x0
1287; GFX12-NEXT:    v_fract_f32_e32 v0, v0
1288; GFX12-NEXT:    s_setpc_b64 s[30:31]
1289entry:
1290  %floor = tail call float @llvm.floor.f32(float %x)
1291  %sub = fsub float %x, %floor
1292  %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
1293  %uno = fcmp uno float %x, 0.000000e+00
1294  %cond = select i1 %uno, float %x, float %min
1295  ret float %cond
1296}
1297
1298define float @commuted_select_nan_fract_f32(float %x) {
1299; GFX6-IR-LABEL: define float @commuted_select_nan_fract_f32
1300; GFX6-IR-SAME: (float [[X:%.*]]) #[[ATTR0]] {
1301; GFX6-IR-NEXT:  entry:
1302; GFX6-IR-NEXT:    [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
1303; GFX6-IR-NEXT:    [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
1304; GFX6-IR-NEXT:    [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
1305; GFX6-IR-NEXT:    [[UNO:%.*]] = fcmp ord float [[X]], 0.000000e+00
1306; GFX6-IR-NEXT:    [[COND:%.*]] = select i1 [[UNO]], float [[MIN]], float [[X]]
1307; GFX6-IR-NEXT:    ret float [[COND]]
1308;
1309; IR-FRACT-LABEL: define float @commuted_select_nan_fract_f32
1310; IR-FRACT-SAME: (float [[X:%.*]]) #[[ATTR0]] {
1311; IR-FRACT-NEXT:  entry:
1312; IR-FRACT-NEXT:    [[COND:%.*]] = call float @llvm.amdgcn.fract.f32(float [[X]])
1313; IR-FRACT-NEXT:    ret float [[COND]]
1314;
1315; GFX6-LABEL: commuted_select_nan_fract_f32:
1316; GFX6:       ; %bb.0: ; %entry
1317; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1318; GFX6-NEXT:    v_floor_f32_e32 v1, v0
1319; GFX6-NEXT:    v_sub_f32_e32 v1, v0, v1
1320; GFX6-NEXT:    v_min_f32_e32 v1, 0x3f7fffff, v1
1321; GFX6-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
1322; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1323; GFX6-NEXT:    s_setpc_b64 s[30:31]
1324;
1325; GFX7-LABEL: commuted_select_nan_fract_f32:
1326; GFX7:       ; %bb.0: ; %entry
1327; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1328; GFX7-NEXT:    v_fract_f32_e32 v0, v0
1329; GFX7-NEXT:    s_setpc_b64 s[30:31]
1330;
1331; GFX8-LABEL: commuted_select_nan_fract_f32:
1332; GFX8:       ; %bb.0: ; %entry
1333; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1334; GFX8-NEXT:    v_fract_f32_e32 v0, v0
1335; GFX8-NEXT:    s_setpc_b64 s[30:31]
1336;
1337; GFX11-LABEL: commuted_select_nan_fract_f32:
1338; GFX11:       ; %bb.0: ; %entry
1339; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1340; GFX11-NEXT:    v_fract_f32_e32 v0, v0
1341; GFX11-NEXT:    s_setpc_b64 s[30:31]
1342;
1343; GFX12-LABEL: commuted_select_nan_fract_f32:
1344; GFX12:       ; %bb.0: ; %entry
1345; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1346; GFX12-NEXT:    s_wait_expcnt 0x0
1347; GFX12-NEXT:    s_wait_samplecnt 0x0
1348; GFX12-NEXT:    s_wait_bvhcnt 0x0
1349; GFX12-NEXT:    s_wait_kmcnt 0x0
1350; GFX12-NEXT:    v_fract_f32_e32 v0, v0
1351; GFX12-NEXT:    s_setpc_b64 s[30:31]
1352entry:
1353  %floor = tail call float @llvm.floor.f32(float %x)
1354  %sub = fsub float %x, %floor
1355  %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
1356  %uno = fcmp ord float %x, 0.000000e+00
1357  %cond = select i1 %uno, float %min, float %x
1358  ret float %cond
1359}
1360
1361define float @wrong_commuted_nan_select_f32(float %x) {
1362; IR-LABEL: define float @wrong_commuted_nan_select_f32
1363; IR-SAME: (float [[X:%.*]]) #[[ATTR0]] {
1364; IR-NEXT:  entry:
1365; IR-NEXT:    [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
1366; IR-NEXT:    [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
1367; IR-NEXT:    [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
1368; IR-NEXT:    [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00
1369; IR-NEXT:    [[COND:%.*]] = select i1 [[UNO]], float [[MIN]], float [[X]]
1370; IR-NEXT:    ret float [[COND]]
1371;
1372; GFX6-LABEL: wrong_commuted_nan_select_f32:
1373; GFX6:       ; %bb.0: ; %entry
1374; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1375; GFX6-NEXT:    v_floor_f32_e32 v1, v0
1376; GFX6-NEXT:    v_sub_f32_e32 v1, v0, v1
1377; GFX6-NEXT:    v_min_f32_e32 v1, 0x3f7fffff, v1
1378; GFX6-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
1379; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1380; GFX6-NEXT:    s_setpc_b64 s[30:31]
1381;
1382; GFX7-LABEL: wrong_commuted_nan_select_f32:
1383; GFX7:       ; %bb.0: ; %entry
1384; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1385; GFX7-NEXT:    v_floor_f32_e32 v1, v0
1386; GFX7-NEXT:    v_sub_f32_e32 v1, v0, v1
1387; GFX7-NEXT:    v_min_f32_e32 v1, 0x3f7fffff, v1
1388; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
1389; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1390; GFX7-NEXT:    s_setpc_b64 s[30:31]
1391;
1392; GFX8-LABEL: wrong_commuted_nan_select_f32:
1393; GFX8:       ; %bb.0: ; %entry
1394; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1395; GFX8-NEXT:    v_floor_f32_e32 v1, v0
1396; GFX8-NEXT:    v_sub_f32_e32 v1, v0, v1
1397; GFX8-NEXT:    v_min_f32_e32 v1, 0x3f7fffff, v1
1398; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
1399; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1400; GFX8-NEXT:    s_setpc_b64 s[30:31]
1401;
1402; GFX11-LABEL: wrong_commuted_nan_select_f32:
1403; GFX11:       ; %bb.0: ; %entry
1404; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1405; GFX11-NEXT:    v_floor_f32_e32 v1, v0
1406; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
1407; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1408; GFX11-NEXT:    v_sub_f32_e32 v1, v0, v1
1409; GFX11-NEXT:    v_min_f32_e32 v1, 0x3f7fffff, v1
1410; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1411; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1412; GFX11-NEXT:    s_setpc_b64 s[30:31]
1413;
1414; GFX12-LABEL: wrong_commuted_nan_select_f32:
1415; GFX12:       ; %bb.0: ; %entry
1416; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1417; GFX12-NEXT:    s_wait_expcnt 0x0
1418; GFX12-NEXT:    s_wait_samplecnt 0x0
1419; GFX12-NEXT:    s_wait_bvhcnt 0x0
1420; GFX12-NEXT:    s_wait_kmcnt 0x0
1421; GFX12-NEXT:    v_floor_f32_e32 v1, v0
1422; GFX12-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
1423; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1424; GFX12-NEXT:    v_sub_f32_e32 v1, v0, v1
1425; GFX12-NEXT:    v_min_num_f32_e32 v1, 0x3f7fffff, v1
1426; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1427; GFX12-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1428; GFX12-NEXT:    s_setpc_b64 s[30:31]
1429entry:
1430  %floor = tail call float @llvm.floor.f32(float %x)
1431  %sub = fsub float %x, %floor
1432  %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
1433  %uno = fcmp uno float %x, 0.000000e+00
1434  %cond = select i1 %uno, float %min, float %x
1435  ret float %cond
1436}
1437
1438define half @basic_fract_f16_nonan(half nofpclass(nan) %x) {
1439; GFX6-IR-LABEL: define half @basic_fract_f16_nonan
1440; GFX6-IR-SAME: (half nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
1441; GFX6-IR-NEXT:  entry:
1442; GFX6-IR-NEXT:    [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]])
1443; GFX6-IR-NEXT:    [[SUB:%.*]] = fsub half [[X]], [[FLOOR]]
1444; GFX6-IR-NEXT:    [[MIN:%.*]] = tail call half @llvm.minnum.f16(half [[SUB]], half 0xH3BFF)
1445; GFX6-IR-NEXT:    ret half [[MIN]]
1446;
1447; GFX7-IR-LABEL: define half @basic_fract_f16_nonan
1448; GFX7-IR-SAME: (half nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
1449; GFX7-IR-NEXT:  entry:
1450; GFX7-IR-NEXT:    [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]])
1451; GFX7-IR-NEXT:    [[SUB:%.*]] = fsub half [[X]], [[FLOOR]]
1452; GFX7-IR-NEXT:    [[MIN:%.*]] = tail call half @llvm.minnum.f16(half [[SUB]], half 0xH3BFF)
1453; GFX7-IR-NEXT:    ret half [[MIN]]
1454;
1455; IR-LEGALF16-LABEL: define half @basic_fract_f16_nonan
1456; IR-LEGALF16-SAME: (half nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
1457; IR-LEGALF16-NEXT:  entry:
1458; IR-LEGALF16-NEXT:    [[MIN:%.*]] = call nnan half @llvm.amdgcn.fract.f16(half [[X]])
1459; IR-LEGALF16-NEXT:    ret half [[MIN]]
1460;
1461; GFX6-LABEL: basic_fract_f16_nonan:
1462; GFX6:       ; %bb.0: ; %entry
1463; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1464; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
1465; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
1466; GFX6-NEXT:    v_floor_f32_e32 v1, v0
1467; GFX6-NEXT:    v_sub_f32_e32 v0, v0, v1
1468; GFX6-NEXT:    v_min_f32_e32 v0, 0x3f7fe000, v0
1469; GFX6-NEXT:    s_setpc_b64 s[30:31]
1470;
1471; GFX7-LABEL: basic_fract_f16_nonan:
1472; GFX7:       ; %bb.0: ; %entry
1473; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1474; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
1475; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
1476; GFX7-NEXT:    v_floor_f32_e32 v1, v0
1477; GFX7-NEXT:    v_sub_f32_e32 v0, v0, v1
1478; GFX7-NEXT:    v_min_f32_e32 v0, 0x3f7fe000, v0
1479; GFX7-NEXT:    s_setpc_b64 s[30:31]
1480;
1481; GFX8-LABEL: basic_fract_f16_nonan:
1482; GFX8:       ; %bb.0: ; %entry
1483; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1484; GFX8-NEXT:    v_fract_f16_e32 v0, v0
1485; GFX8-NEXT:    s_setpc_b64 s[30:31]
1486;
1487; GFX11-LABEL: basic_fract_f16_nonan:
1488; GFX11:       ; %bb.0: ; %entry
1489; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1490; GFX11-NEXT:    v_fract_f16_e32 v0, v0
1491; GFX11-NEXT:    s_setpc_b64 s[30:31]
1492;
1493; GFX12-LABEL: basic_fract_f16_nonan:
1494; GFX12:       ; %bb.0: ; %entry
1495; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1496; GFX12-NEXT:    s_wait_expcnt 0x0
1497; GFX12-NEXT:    s_wait_samplecnt 0x0
1498; GFX12-NEXT:    s_wait_bvhcnt 0x0
1499; GFX12-NEXT:    s_wait_kmcnt 0x0
1500; GFX12-NEXT:    v_fract_f16_e32 v0, v0
1501; GFX12-NEXT:    s_setpc_b64 s[30:31]
1502entry:
1503  %floor = tail call half @llvm.floor.f16(half %x)
1504  %sub = fsub half %x, %floor
1505  %min = tail call half @llvm.minnum.f16(half %sub, half 0xH3BFF)
1506  ret half %min
1507}
1508
1509define <2 x half> @basic_fract_v2f16_nonan(<2 x half> nofpclass(nan) %x) {
1510; GFX6-IR-LABEL: define <2 x half> @basic_fract_v2f16_nonan
1511; GFX6-IR-SAME: (<2 x half> nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
1512; GFX6-IR-NEXT:  entry:
1513; GFX6-IR-NEXT:    [[FLOOR:%.*]] = tail call <2 x half> @llvm.floor.v2f16(<2 x half> [[X]])
1514; GFX6-IR-NEXT:    [[SUB:%.*]] = fsub <2 x half> [[X]], [[FLOOR]]
1515; GFX6-IR-NEXT:    [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> splat (half 0xH3BFF))
1516; GFX6-IR-NEXT:    ret <2 x half> [[MIN]]
1517;
1518; GFX7-IR-LABEL: define <2 x half> @basic_fract_v2f16_nonan
1519; GFX7-IR-SAME: (<2 x half> nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
1520; GFX7-IR-NEXT:  entry:
1521; GFX7-IR-NEXT:    [[FLOOR:%.*]] = tail call <2 x half> @llvm.floor.v2f16(<2 x half> [[X]])
1522; GFX7-IR-NEXT:    [[SUB:%.*]] = fsub <2 x half> [[X]], [[FLOOR]]
1523; GFX7-IR-NEXT:    [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> splat (half 0xH3BFF))
1524; GFX7-IR-NEXT:    ret <2 x half> [[MIN]]
1525;
1526; IR-LEGALF16-LABEL: define <2 x half> @basic_fract_v2f16_nonan
1527; IR-LEGALF16-SAME: (<2 x half> nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
1528; IR-LEGALF16-NEXT:  entry:
1529; IR-LEGALF16-NEXT:    [[TMP0:%.*]] = extractelement <2 x half> [[X]], i64 0
1530; IR-LEGALF16-NEXT:    [[TMP1:%.*]] = extractelement <2 x half> [[X]], i64 1
1531; IR-LEGALF16-NEXT:    [[TMP2:%.*]] = call nnan half @llvm.amdgcn.fract.f16(half [[TMP0]])
1532; IR-LEGALF16-NEXT:    [[TMP3:%.*]] = call nnan half @llvm.amdgcn.fract.f16(half [[TMP1]])
1533; IR-LEGALF16-NEXT:    [[TMP4:%.*]] = insertelement <2 x half> poison, half [[TMP2]], i64 0
1534; IR-LEGALF16-NEXT:    [[MIN:%.*]] = insertelement <2 x half> [[TMP4]], half [[TMP3]], i64 1
1535; IR-LEGALF16-NEXT:    ret <2 x half> [[MIN]]
1536;
1537; GFX6-LABEL: basic_fract_v2f16_nonan:
1538; GFX6:       ; %bb.0: ; %entry
1539; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1540; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
1541; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
1542; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
1543; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
1544; GFX6-NEXT:    v_floor_f32_e32 v2, v0
1545; GFX6-NEXT:    v_floor_f32_e32 v3, v1
1546; GFX6-NEXT:    v_sub_f32_e32 v1, v1, v3
1547; GFX6-NEXT:    v_sub_f32_e32 v0, v0, v2
1548; GFX6-NEXT:    v_min_f32_e32 v0, 0x3f7fe000, v0
1549; GFX6-NEXT:    v_min_f32_e32 v1, 0x3f7fe000, v1
1550; GFX6-NEXT:    s_setpc_b64 s[30:31]
1551;
1552; GFX7-LABEL: basic_fract_v2f16_nonan:
1553; GFX7:       ; %bb.0: ; %entry
1554; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1555; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
1556; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
1557; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
1558; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
1559; GFX7-NEXT:    v_floor_f32_e32 v2, v0
1560; GFX7-NEXT:    v_floor_f32_e32 v3, v1
1561; GFX7-NEXT:    v_sub_f32_e32 v1, v1, v3
1562; GFX7-NEXT:    v_sub_f32_e32 v0, v0, v2
1563; GFX7-NEXT:    v_min_f32_e32 v0, 0x3f7fe000, v0
1564; GFX7-NEXT:    v_min_f32_e32 v1, 0x3f7fe000, v1
1565; GFX7-NEXT:    s_setpc_b64 s[30:31]
1566;
1567; GFX8-LABEL: basic_fract_v2f16_nonan:
1568; GFX8:       ; %bb.0: ; %entry
1569; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1570; GFX8-NEXT:    v_fract_f16_e32 v1, v0
1571; GFX8-NEXT:    v_fract_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1572; GFX8-NEXT:    v_pack_b32_f16 v0, v1, v0
1573; GFX8-NEXT:    s_setpc_b64 s[30:31]
1574;
1575; GFX11-LABEL: basic_fract_v2f16_nonan:
1576; GFX11:       ; %bb.0: ; %entry
1577; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1578; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
1579; GFX11-NEXT:    v_fract_f16_e32 v0, v0
1580; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1581; GFX11-NEXT:    v_fract_f16_e32 v1, v1
1582; GFX11-NEXT:    v_pack_b32_f16 v0, v0, v1
1583; GFX11-NEXT:    s_setpc_b64 s[30:31]
1584;
1585; GFX12-LABEL: basic_fract_v2f16_nonan:
1586; GFX12:       ; %bb.0: ; %entry
1587; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1588; GFX12-NEXT:    s_wait_expcnt 0x0
1589; GFX12-NEXT:    s_wait_samplecnt 0x0
1590; GFX12-NEXT:    s_wait_bvhcnt 0x0
1591; GFX12-NEXT:    s_wait_kmcnt 0x0
1592; GFX12-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
1593; GFX12-NEXT:    v_fract_f16_e32 v0, v0
1594; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1595; GFX12-NEXT:    v_fract_f16_e32 v1, v1
1596; GFX12-NEXT:    v_pack_b32_f16 v0, v0, v1
1597; GFX12-NEXT:    s_setpc_b64 s[30:31]
1598entry:
1599  %floor = tail call <2 x half> @llvm.floor.v2f16(<2 x half> %x)
1600  %sub = fsub <2 x half> %x, %floor
1601  %min = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> %sub, <2 x half> <half 0xH3BFF, half 0xH3BFF>)
1602  ret <2 x half> %min
1603}
1604
1605define double @basic_fract_f64_nanans(double nofpclass(nan) %x) {
1606; GFX6-IR-LABEL: define double @basic_fract_f64_nanans
1607; GFX6-IR-SAME: (double nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
1608; GFX6-IR-NEXT:  entry:
1609; GFX6-IR-NEXT:    [[FLOOR:%.*]] = tail call double @llvm.floor.f64(double [[X]])
1610; GFX6-IR-NEXT:    [[SUB:%.*]] = fsub double [[X]], [[FLOOR]]
1611; GFX6-IR-NEXT:    [[MIN:%.*]] = tail call double @llvm.minnum.f64(double [[SUB]], double 0x3FEFFFFFFFFFFFFF)
1612; GFX6-IR-NEXT:    ret double [[MIN]]
1613;
1614; IR-FRACT-LABEL: define double @basic_fract_f64_nanans
1615; IR-FRACT-SAME: (double nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
1616; IR-FRACT-NEXT:  entry:
1617; IR-FRACT-NEXT:    [[MIN:%.*]] = call nnan double @llvm.amdgcn.fract.f64(double [[X]])
1618; IR-FRACT-NEXT:    ret double [[MIN]]
1619;
1620; GFX6-LABEL: basic_fract_f64_nanans:
1621; GFX6:       ; %bb.0: ; %entry
1622; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1623; GFX6-NEXT:    v_fract_f64_e32 v[2:3], v[0:1]
1624; GFX6-NEXT:    v_mov_b32_e32 v4, -1
1625; GFX6-NEXT:    v_mov_b32_e32 v5, 0x3fefffff
1626; GFX6-NEXT:    v_min_f64 v[2:3], v[2:3], v[4:5]
1627; GFX6-NEXT:    v_cmp_class_f64_e64 vcc, v[0:1], 3
1628; GFX6-NEXT:    s_mov_b32 s4, -1
1629; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
1630; GFX6-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
1631; GFX6-NEXT:    v_add_f64 v[2:3], v[0:1], -v[2:3]
1632; GFX6-NEXT:    s_mov_b32 s5, 0x3fefffff
1633; GFX6-NEXT:    v_add_f64 v[0:1], v[0:1], -v[2:3]
1634; GFX6-NEXT:    v_min_f64 v[0:1], v[0:1], s[4:5]
1635; GFX6-NEXT:    s_setpc_b64 s[30:31]
1636;
1637; GFX7-LABEL: basic_fract_f64_nanans:
1638; GFX7:       ; %bb.0: ; %entry
1639; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1640; GFX7-NEXT:    v_fract_f64_e32 v[0:1], v[0:1]
1641; GFX7-NEXT:    s_setpc_b64 s[30:31]
1642;
1643; GFX8-LABEL: basic_fract_f64_nanans:
1644; GFX8:       ; %bb.0: ; %entry
1645; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1646; GFX8-NEXT:    v_fract_f64_e32 v[0:1], v[0:1]
1647; GFX8-NEXT:    s_setpc_b64 s[30:31]
1648;
1649; GFX11-LABEL: basic_fract_f64_nanans:
1650; GFX11:       ; %bb.0: ; %entry
1651; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1652; GFX11-NEXT:    v_fract_f64_e32 v[0:1], v[0:1]
1653; GFX11-NEXT:    s_setpc_b64 s[30:31]
1654;
1655; GFX12-LABEL: basic_fract_f64_nanans:
1656; GFX12:       ; %bb.0: ; %entry
1657; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1658; GFX12-NEXT:    s_wait_expcnt 0x0
1659; GFX12-NEXT:    s_wait_samplecnt 0x0
1660; GFX12-NEXT:    s_wait_bvhcnt 0x0
1661; GFX12-NEXT:    s_wait_kmcnt 0x0
1662; GFX12-NEXT:    v_fract_f64_e32 v[0:1], v[0:1]
1663; GFX12-NEXT:    s_setpc_b64 s[30:31]
1664entry:
1665  %floor = tail call double @llvm.floor.f64(double %x)
1666  %sub = fsub double %x, %floor
1667  %min = tail call double @llvm.minnum.f64(double %sub, double 0x3FEFFFFFFFFFFFFF)
1668  ret double %min
1669}
1670
1671define half @safe_math_fract_f16_noinf_check(half %x, ptr addrspace(1) writeonly captures(none) %ip) {
1672; GFX6-IR-LABEL: define half @safe_math_fract_f16_noinf_check
1673; GFX6-IR-SAME: (half [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] {
1674; GFX6-IR-NEXT:  entry:
1675; GFX6-IR-NEXT:    [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]])
1676; GFX6-IR-NEXT:    [[SUB:%.*]] = fsub half [[X]], [[FLOOR]]
1677; GFX6-IR-NEXT:    [[MIN:%.*]] = tail call half @llvm.minnum.f16(half [[SUB]], half 0xH3BFF)
1678; GFX6-IR-NEXT:    [[UNO:%.*]] = fcmp uno half [[X]], 0xH0000
1679; GFX6-IR-NEXT:    [[COND:%.*]] = select i1 [[UNO]], half [[X]], half [[MIN]]
1680; GFX6-IR-NEXT:    store half [[FLOOR]], ptr addrspace(1) [[IP]], align 4
1681; GFX6-IR-NEXT:    ret half [[COND]]
1682;
1683; GFX7-IR-LABEL: define half @safe_math_fract_f16_noinf_check
1684; GFX7-IR-SAME: (half [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] {
1685; GFX7-IR-NEXT:  entry:
1686; GFX7-IR-NEXT:    [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]])
1687; GFX7-IR-NEXT:    [[SUB:%.*]] = fsub half [[X]], [[FLOOR]]
1688; GFX7-IR-NEXT:    [[MIN:%.*]] = tail call half @llvm.minnum.f16(half [[SUB]], half 0xH3BFF)
1689; GFX7-IR-NEXT:    [[UNO:%.*]] = fcmp uno half [[X]], 0xH0000
1690; GFX7-IR-NEXT:    [[COND:%.*]] = select i1 [[UNO]], half [[X]], half [[MIN]]
1691; GFX7-IR-NEXT:    store half [[FLOOR]], ptr addrspace(1) [[IP]], align 4
1692; GFX7-IR-NEXT:    ret half [[COND]]
1693;
1694; IR-LEGALF16-LABEL: define half @safe_math_fract_f16_noinf_check
1695; IR-LEGALF16-SAME: (half [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] {
1696; IR-LEGALF16-NEXT:  entry:
1697; IR-LEGALF16-NEXT:    [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]])
1698; IR-LEGALF16-NEXT:    [[COND:%.*]] = call half @llvm.amdgcn.fract.f16(half [[X]])
1699; IR-LEGALF16-NEXT:    store half [[FLOOR]], ptr addrspace(1) [[IP]], align 4
1700; IR-LEGALF16-NEXT:    ret half [[COND]]
1701;
1702; GFX6-LABEL: safe_math_fract_f16_noinf_check:
1703; GFX6:       ; %bb.0: ; %entry
1704; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1705; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
1706; GFX6-NEXT:    s_mov_b32 s6, 0
1707; GFX6-NEXT:    s_mov_b32 s7, 0xf000
1708; GFX6-NEXT:    s_mov_b32 s4, s6
1709; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
1710; GFX6-NEXT:    s_mov_b32 s5, s6
1711; GFX6-NEXT:    v_floor_f32_e32 v3, v0
1712; GFX6-NEXT:    v_sub_f32_e32 v4, v0, v3
1713; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
1714; GFX6-NEXT:    v_min_f32_e32 v4, 0x3f7fe000, v4
1715; GFX6-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
1716; GFX6-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
1717; GFX6-NEXT:    buffer_store_short v3, v[1:2], s[4:7], 0 addr64
1718; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1719; GFX6-NEXT:    s_setpc_b64 s[30:31]
1720;
1721; GFX7-LABEL: safe_math_fract_f16_noinf_check:
1722; GFX7:       ; %bb.0: ; %entry
1723; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1724; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
1725; GFX7-NEXT:    s_mov_b32 s6, 0
1726; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1727; GFX7-NEXT:    s_mov_b32 s4, s6
1728; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
1729; GFX7-NEXT:    s_mov_b32 s5, s6
1730; GFX7-NEXT:    v_floor_f32_e32 v3, v0
1731; GFX7-NEXT:    v_sub_f32_e32 v4, v0, v3
1732; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
1733; GFX7-NEXT:    v_min_f32_e32 v4, 0x3f7fe000, v4
1734; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
1735; GFX7-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
1736; GFX7-NEXT:    buffer_store_short v3, v[1:2], s[4:7], 0 addr64
1737; GFX7-NEXT:    s_waitcnt vmcnt(0)
1738; GFX7-NEXT:    s_setpc_b64 s[30:31]
1739;
1740; GFX8-LABEL: safe_math_fract_f16_noinf_check:
1741; GFX8:       ; %bb.0: ; %entry
1742; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1743; GFX8-NEXT:    v_floor_f16_e32 v3, v0
1744; GFX8-NEXT:    v_fract_f16_e32 v0, v0
1745; GFX8-NEXT:    global_store_short v[1:2], v3, off
1746; GFX8-NEXT:    s_waitcnt vmcnt(0)
1747; GFX8-NEXT:    s_setpc_b64 s[30:31]
1748;
1749; GFX11-LABEL: safe_math_fract_f16_noinf_check:
1750; GFX11:       ; %bb.0: ; %entry
1751; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1752; GFX11-NEXT:    v_floor_f16_e32 v3, v0
1753; GFX11-NEXT:    v_fract_f16_e32 v0, v0
1754; GFX11-NEXT:    global_store_b16 v[1:2], v3, off
1755; GFX11-NEXT:    s_setpc_b64 s[30:31]
1756;
1757; GFX12-LABEL: safe_math_fract_f16_noinf_check:
1758; GFX12:       ; %bb.0: ; %entry
1759; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1760; GFX12-NEXT:    s_wait_expcnt 0x0
1761; GFX12-NEXT:    s_wait_samplecnt 0x0
1762; GFX12-NEXT:    s_wait_bvhcnt 0x0
1763; GFX12-NEXT:    s_wait_kmcnt 0x0
1764; GFX12-NEXT:    v_floor_f16_e32 v3, v0
1765; GFX12-NEXT:    v_fract_f16_e32 v0, v0
1766; GFX12-NEXT:    global_store_b16 v[1:2], v3, off
1767; GFX12-NEXT:    s_setpc_b64 s[30:31]
1768entry:
1769  %floor = tail call half @llvm.floor.f16(half %x)
1770  %sub = fsub half %x, %floor
1771  %min = tail call half @llvm.minnum.f16(half %sub, half 0xH3BFF)
1772  %uno = fcmp uno half %x, 0.000000e+00
1773  %cond = select i1 %uno, half %x, half %min
1774  store half %floor, ptr addrspace(1) %ip, align 4
1775  ret half %cond
1776}
1777
1778define double @safe_math_fract_f64_noinf_check(double %x, ptr addrspace(1) writeonly captures(none) %ip) {
1779; GFX6-IR-LABEL: define double @safe_math_fract_f64_noinf_check
1780; GFX6-IR-SAME: (double [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] {
1781; GFX6-IR-NEXT:  entry:
1782; GFX6-IR-NEXT:    [[FLOOR:%.*]] = tail call double @llvm.floor.f64(double [[X]])
1783; GFX6-IR-NEXT:    [[SUB:%.*]] = fsub double [[X]], [[FLOOR]]
1784; GFX6-IR-NEXT:    [[MIN:%.*]] = tail call double @llvm.minnum.f64(double [[SUB]], double 0x3FEFFFFFFFFFFFFF)
1785; GFX6-IR-NEXT:    [[UNO:%.*]] = fcmp uno double [[X]], 0.000000e+00
1786; GFX6-IR-NEXT:    [[COND:%.*]] = select i1 [[UNO]], double [[X]], double [[MIN]]
1787; GFX6-IR-NEXT:    store double [[FLOOR]], ptr addrspace(1) [[IP]], align 4
1788; GFX6-IR-NEXT:    ret double [[COND]]
1789;
1790; IR-FRACT-LABEL: define double @safe_math_fract_f64_noinf_check
1791; IR-FRACT-SAME: (double [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] {
1792; IR-FRACT-NEXT:  entry:
1793; IR-FRACT-NEXT:    [[FLOOR:%.*]] = tail call double @llvm.floor.f64(double [[X]])
1794; IR-FRACT-NEXT:    [[COND:%.*]] = call double @llvm.amdgcn.fract.f64(double [[X]])
1795; IR-FRACT-NEXT:    store double [[FLOOR]], ptr addrspace(1) [[IP]], align 4
1796; IR-FRACT-NEXT:    ret double [[COND]]
1797;
1798; GFX6-LABEL: safe_math_fract_f64_noinf_check:
1799; GFX6:       ; %bb.0: ; %entry
1800; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1801; GFX6-NEXT:    v_fract_f64_e32 v[4:5], v[0:1]
1802; GFX6-NEXT:    v_mov_b32_e32 v6, -1
1803; GFX6-NEXT:    v_mov_b32_e32 v7, 0x3fefffff
1804; GFX6-NEXT:    v_min_f64 v[4:5], v[4:5], v[6:7]
1805; GFX6-NEXT:    v_cmp_class_f64_e64 vcc, v[0:1], 3
1806; GFX6-NEXT:    s_mov_b32 s8, -1
1807; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
1808; GFX6-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc
1809; GFX6-NEXT:    v_add_f64 v[4:5], v[0:1], -v[4:5]
1810; GFX6-NEXT:    s_mov_b32 s9, 0x3fefffff
1811; GFX6-NEXT:    v_add_f64 v[6:7], v[0:1], -v[4:5]
1812; GFX6-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
1813; GFX6-NEXT:    v_min_f64 v[6:7], v[6:7], s[8:9]
1814; GFX6-NEXT:    s_mov_b32 s6, 0
1815; GFX6-NEXT:    s_mov_b32 s7, 0xf000
1816; GFX6-NEXT:    s_mov_b32 s4, s6
1817; GFX6-NEXT:    s_mov_b32 s5, s6
1818; GFX6-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
1819; GFX6-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
1820; GFX6-NEXT:    buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64
1821; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1822; GFX6-NEXT:    s_setpc_b64 s[30:31]
1823;
1824; GFX7-LABEL: safe_math_fract_f64_noinf_check:
1825; GFX7:       ; %bb.0: ; %entry
1826; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1827; GFX7-NEXT:    v_floor_f64_e32 v[4:5], v[0:1]
1828; GFX7-NEXT:    v_fract_f64_e32 v[0:1], v[0:1]
1829; GFX7-NEXT:    s_mov_b32 s6, 0
1830; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1831; GFX7-NEXT:    s_mov_b32 s4, s6
1832; GFX7-NEXT:    s_mov_b32 s5, s6
1833; GFX7-NEXT:    buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64
1834; GFX7-NEXT:    s_waitcnt vmcnt(0)
1835; GFX7-NEXT:    s_setpc_b64 s[30:31]
1836;
1837; GFX8-LABEL: safe_math_fract_f64_noinf_check:
1838; GFX8:       ; %bb.0: ; %entry
1839; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1840; GFX8-NEXT:    v_floor_f64_e32 v[4:5], v[0:1]
1841; GFX8-NEXT:    v_fract_f64_e32 v[0:1], v[0:1]
1842; GFX8-NEXT:    global_store_dwordx2 v[2:3], v[4:5], off
1843; GFX8-NEXT:    s_waitcnt vmcnt(0)
1844; GFX8-NEXT:    s_setpc_b64 s[30:31]
1845;
1846; GFX11-LABEL: safe_math_fract_f64_noinf_check:
1847; GFX11:       ; %bb.0: ; %entry
1848; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1849; GFX11-NEXT:    v_floor_f64_e32 v[4:5], v[0:1]
1850; GFX11-NEXT:    v_fract_f64_e32 v[0:1], v[0:1]
1851; GFX11-NEXT:    global_store_b64 v[2:3], v[4:5], off
1852; GFX11-NEXT:    s_setpc_b64 s[30:31]
1853;
1854; GFX12-LABEL: safe_math_fract_f64_noinf_check:
1855; GFX12:       ; %bb.0: ; %entry
1856; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1857; GFX12-NEXT:    s_wait_expcnt 0x0
1858; GFX12-NEXT:    s_wait_samplecnt 0x0
1859; GFX12-NEXT:    s_wait_bvhcnt 0x0
1860; GFX12-NEXT:    s_wait_kmcnt 0x0
1861; GFX12-NEXT:    v_floor_f64_e32 v[4:5], v[0:1]
1862; GFX12-NEXT:    v_fract_f64_e32 v[0:1], v[0:1]
1863; GFX12-NEXT:    global_store_b64 v[2:3], v[4:5], off
1864; GFX12-NEXT:    s_setpc_b64 s[30:31]
1865entry:
1866  %floor = tail call double @llvm.floor.f64(double %x)
1867  %sub = fsub double %x, %floor
1868  %min = tail call double @llvm.minnum.f64(double %sub, double 0x3FEFFFFFFFFFFFFF)
1869  %uno = fcmp uno double %x, 0.000000e+00
1870  %cond = select i1 %uno, double %x, double %min
1871  store double %floor, ptr addrspace(1) %ip, align 4
1872  ret double %cond
1873}
1874
1875define float @select_nan_fract_f32_flags_select(float %x) {
1876; GFX6-IR-LABEL: define float @select_nan_fract_f32_flags_select
1877; GFX6-IR-SAME: (float [[X:%.*]]) #[[ATTR0]] {
1878; GFX6-IR-NEXT:  entry:
1879; GFX6-IR-NEXT:    [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
1880; GFX6-IR-NEXT:    [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
1881; GFX6-IR-NEXT:    [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
1882; GFX6-IR-NEXT:    [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00
1883; GFX6-IR-NEXT:    [[COND:%.*]] = select nsz i1 [[UNO]], float [[X]], float [[MIN]]
1884; GFX6-IR-NEXT:    ret float [[COND]]
1885;
1886; IR-FRACT-LABEL: define float @select_nan_fract_f32_flags_select
1887; IR-FRACT-SAME: (float [[X:%.*]]) #[[ATTR0]] {
1888; IR-FRACT-NEXT:  entry:
1889; IR-FRACT-NEXT:    [[COND:%.*]] = call nsz float @llvm.amdgcn.fract.f32(float [[X]])
1890; IR-FRACT-NEXT:    ret float [[COND]]
1891;
1892; GFX6-LABEL: select_nan_fract_f32_flags_select:
1893; GFX6:       ; %bb.0: ; %entry
1894; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1895; GFX6-NEXT:    v_floor_f32_e32 v1, v0
1896; GFX6-NEXT:    v_sub_f32_e32 v1, v0, v1
1897; GFX6-NEXT:    v_min_f32_e32 v1, 0x3f7fffff, v1
1898; GFX6-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
1899; GFX6-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
1900; GFX6-NEXT:    s_setpc_b64 s[30:31]
1901;
1902; GFX7-LABEL: select_nan_fract_f32_flags_select:
1903; GFX7:       ; %bb.0: ; %entry
1904; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1905; GFX7-NEXT:    v_fract_f32_e32 v0, v0
1906; GFX7-NEXT:    s_setpc_b64 s[30:31]
1907;
1908; GFX8-LABEL: select_nan_fract_f32_flags_select:
1909; GFX8:       ; %bb.0: ; %entry
1910; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1911; GFX8-NEXT:    v_fract_f32_e32 v0, v0
1912; GFX8-NEXT:    s_setpc_b64 s[30:31]
1913;
1914; GFX11-LABEL: select_nan_fract_f32_flags_select:
1915; GFX11:       ; %bb.0: ; %entry
1916; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1917; GFX11-NEXT:    v_fract_f32_e32 v0, v0
1918; GFX11-NEXT:    s_setpc_b64 s[30:31]
1919;
1920; GFX12-LABEL: select_nan_fract_f32_flags_select:
1921; GFX12:       ; %bb.0: ; %entry
1922; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1923; GFX12-NEXT:    s_wait_expcnt 0x0
1924; GFX12-NEXT:    s_wait_samplecnt 0x0
1925; GFX12-NEXT:    s_wait_bvhcnt 0x0
1926; GFX12-NEXT:    s_wait_kmcnt 0x0
1927; GFX12-NEXT:    v_fract_f32_e32 v0, v0
1928; GFX12-NEXT:    s_setpc_b64 s[30:31]
1929entry:
1930  %floor = tail call float @llvm.floor.f32(float %x)
1931  %sub = fsub float %x, %floor
1932  %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
1933  %uno = fcmp uno float %x, 0.000000e+00
1934  %cond = select nsz i1 %uno, float %x, float %min
1935  ret float %cond
1936}
1937
1938define float @select_nan_fract_f32_flags_minnum(float %x) {
1939; GFX6-IR-LABEL: define float @select_nan_fract_f32_flags_minnum
1940; GFX6-IR-SAME: (float [[X:%.*]]) #[[ATTR0]] {
1941; GFX6-IR-NEXT:  entry:
1942; GFX6-IR-NEXT:    [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
1943; GFX6-IR-NEXT:    [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
1944; GFX6-IR-NEXT:    [[MIN:%.*]] = tail call nsz float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
1945; GFX6-IR-NEXT:    [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00
1946; GFX6-IR-NEXT:    [[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]]
1947; GFX6-IR-NEXT:    ret float [[COND]]
1948;
1949; IR-FRACT-LABEL: define float @select_nan_fract_f32_flags_minnum
1950; IR-FRACT-SAME: (float [[X:%.*]]) #[[ATTR0]] {
1951; IR-FRACT-NEXT:  entry:
1952; IR-FRACT-NEXT:    [[COND:%.*]] = call float @llvm.amdgcn.fract.f32(float [[X]])
1953; IR-FRACT-NEXT:    ret float [[COND]]
1954;
1955; GFX6-LABEL: select_nan_fract_f32_flags_minnum:
1956; GFX6:       ; %bb.0: ; %entry
1957; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1958; GFX6-NEXT:    v_floor_f32_e32 v1, v0
1959; GFX6-NEXT:    v_sub_f32_e32 v1, v0, v1
1960; GFX6-NEXT:    v_min_f32_e32 v1, 0x3f7fffff, v1
1961; GFX6-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
1962; GFX6-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
1963; GFX6-NEXT:    s_setpc_b64 s[30:31]
1964;
1965; GFX7-LABEL: select_nan_fract_f32_flags_minnum:
1966; GFX7:       ; %bb.0: ; %entry
1967; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1968; GFX7-NEXT:    v_fract_f32_e32 v0, v0
1969; GFX7-NEXT:    s_setpc_b64 s[30:31]
1970;
1971; GFX8-LABEL: select_nan_fract_f32_flags_minnum:
1972; GFX8:       ; %bb.0: ; %entry
1973; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1974; GFX8-NEXT:    v_fract_f32_e32 v0, v0
1975; GFX8-NEXT:    s_setpc_b64 s[30:31]
1976;
1977; GFX11-LABEL: select_nan_fract_f32_flags_minnum:
1978; GFX11:       ; %bb.0: ; %entry
1979; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1980; GFX11-NEXT:    v_fract_f32_e32 v0, v0
1981; GFX11-NEXT:    s_setpc_b64 s[30:31]
1982;
1983; GFX12-LABEL: select_nan_fract_f32_flags_minnum:
1984; GFX12:       ; %bb.0: ; %entry
1985; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1986; GFX12-NEXT:    s_wait_expcnt 0x0
1987; GFX12-NEXT:    s_wait_samplecnt 0x0
1988; GFX12-NEXT:    s_wait_bvhcnt 0x0
1989; GFX12-NEXT:    s_wait_kmcnt 0x0
1990; GFX12-NEXT:    v_fract_f32_e32 v0, v0
1991; GFX12-NEXT:    s_setpc_b64 s[30:31]
1992entry:
1993  %floor = tail call float @llvm.floor.f32(float %x)
1994  %sub = fsub float %x, %floor
1995  %min = tail call nsz float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
1996  %uno = fcmp uno float %x, 0.000000e+00
1997  %cond = select i1 %uno, float %x, float %min
1998  ret float %cond
1999}
2000
2001define <2 x float> @safe_math_fract_v2f32(<2 x float> %x, ptr addrspace(1) writeonly captures(none) %ip) {
2002; GFX6-IR-LABEL: define <2 x float> @safe_math_fract_v2f32
2003; GFX6-IR-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] {
2004; GFX6-IR-NEXT:  entry:
2005; GFX6-IR-NEXT:    [[FLOOR:%.*]] = tail call <2 x float> @llvm.floor.v2f32(<2 x float> [[X]])
2006; GFX6-IR-NEXT:    [[SUB:%.*]] = fsub <2 x float> [[X]], [[FLOOR]]
2007; GFX6-IR-NEXT:    [[MIN:%.*]] = tail call <2 x float> @llvm.minnum.v2f32(<2 x float> [[SUB]], <2 x float> splat (float 0x3FEFFFFFE0000000))
2008; GFX6-IR-NEXT:    [[UNO:%.*]] = fcmp uno <2 x float> [[X]], zeroinitializer
2009; GFX6-IR-NEXT:    [[COND:%.*]] = select <2 x i1> [[UNO]], <2 x float> [[X]], <2 x float> [[MIN]]
2010; GFX6-IR-NEXT:    [[FABS:%.*]] = tail call <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]])
2011; GFX6-IR-NEXT:    [[CMPINF:%.*]] = fcmp oeq <2 x float> [[FABS]], splat (float 0x7FF0000000000000)
2012; GFX6-IR-NEXT:    [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x float> zeroinitializer, <2 x float> [[COND]]
2013; GFX6-IR-NEXT:    store <2 x float> [[FLOOR]], ptr addrspace(1) [[IP]], align 4
2014; GFX6-IR-NEXT:    ret <2 x float> [[COND6]]
2015;
2016; IR-FRACT-LABEL: define <2 x float> @safe_math_fract_v2f32
2017; IR-FRACT-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] {
2018; IR-FRACT-NEXT:  entry:
2019; IR-FRACT-NEXT:    [[FLOOR:%.*]] = tail call <2 x float> @llvm.floor.v2f32(<2 x float> [[X]])
2020; IR-FRACT-NEXT:    [[TMP0:%.*]] = extractelement <2 x float> [[X]], i64 0
2021; IR-FRACT-NEXT:    [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 1
2022; IR-FRACT-NEXT:    [[TMP2:%.*]] = call float @llvm.amdgcn.fract.f32(float [[TMP0]])
2023; IR-FRACT-NEXT:    [[TMP3:%.*]] = call float @llvm.amdgcn.fract.f32(float [[TMP1]])
2024; IR-FRACT-NEXT:    [[TMP4:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i64 0
2025; IR-FRACT-NEXT:    [[COND:%.*]] = insertelement <2 x float> [[TMP4]], float [[TMP3]], i64 1
2026; IR-FRACT-NEXT:    [[FABS:%.*]] = tail call <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]])
2027; IR-FRACT-NEXT:    [[CMPINF:%.*]] = fcmp oeq <2 x float> [[FABS]], splat (float 0x7FF0000000000000)
2028; IR-FRACT-NEXT:    [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x float> zeroinitializer, <2 x float> [[COND]]
2029; IR-FRACT-NEXT:    store <2 x float> [[FLOOR]], ptr addrspace(1) [[IP]], align 4
2030; IR-FRACT-NEXT:    ret <2 x float> [[COND6]]
2031;
2032; GFX6-LABEL: safe_math_fract_v2f32:
2033; GFX6:       ; %bb.0: ; %entry
2034; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2035; GFX6-NEXT:    v_floor_f32_e32 v5, v1
2036; GFX6-NEXT:    v_floor_f32_e32 v4, v0
2037; GFX6-NEXT:    v_sub_f32_e32 v6, v1, v5
2038; GFX6-NEXT:    v_sub_f32_e32 v7, v0, v4
2039; GFX6-NEXT:    v_min_f32_e32 v6, 0x3f7fffff, v6
2040; GFX6-NEXT:    v_cmp_u_f32_e32 vcc, v1, v1
2041; GFX6-NEXT:    v_min_f32_e32 v7, 0x3f7fffff, v7
2042; GFX6-NEXT:    v_cndmask_b32_e32 v6, v6, v1, vcc
2043; GFX6-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
2044; GFX6-NEXT:    v_mov_b32_e32 v8, 0x204
2045; GFX6-NEXT:    v_cndmask_b32_e32 v7, v7, v0, vcc
2046; GFX6-NEXT:    v_cmp_class_f32_e32 vcc, v0, v8
2047; GFX6-NEXT:    s_mov_b32 s6, 0
2048; GFX6-NEXT:    v_cndmask_b32_e64 v0, v7, 0, vcc
2049; GFX6-NEXT:    v_cmp_class_f32_e32 vcc, v1, v8
2050; GFX6-NEXT:    s_mov_b32 s7, 0xf000
2051; GFX6-NEXT:    s_mov_b32 s4, s6
2052; GFX6-NEXT:    s_mov_b32 s5, s6
2053; GFX6-NEXT:    v_cndmask_b32_e64 v1, v6, 0, vcc
2054; GFX6-NEXT:    buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64
2055; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
2056; GFX6-NEXT:    s_setpc_b64 s[30:31]
2057;
2058; GFX7-LABEL: safe_math_fract_v2f32:
2059; GFX7:       ; %bb.0: ; %entry
2060; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2061; GFX7-NEXT:    v_mov_b32_e32 v8, 0x204
2062; GFX7-NEXT:    v_fract_f32_e32 v6, v0
2063; GFX7-NEXT:    v_cmp_class_f32_e32 vcc, v0, v8
2064; GFX7-NEXT:    s_mov_b32 s6, 0
2065; GFX7-NEXT:    v_floor_f32_e32 v4, v0
2066; GFX7-NEXT:    v_fract_f32_e32 v7, v1
2067; GFX7-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
2068; GFX7-NEXT:    v_cmp_class_f32_e32 vcc, v1, v8
2069; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2070; GFX7-NEXT:    s_mov_b32 s4, s6
2071; GFX7-NEXT:    s_mov_b32 s5, s6
2072; GFX7-NEXT:    v_floor_f32_e32 v5, v1
2073; GFX7-NEXT:    v_cndmask_b32_e64 v1, v7, 0, vcc
2074; GFX7-NEXT:    buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64
2075; GFX7-NEXT:    s_waitcnt vmcnt(0)
2076; GFX7-NEXT:    s_setpc_b64 s[30:31]
2077;
2078; GFX8-LABEL: safe_math_fract_v2f32:
2079; GFX8:       ; %bb.0: ; %entry
2080; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2081; GFX8-NEXT:    v_mov_b32_e32 v8, 0x204
2082; GFX8-NEXT:    v_fract_f32_e32 v6, v0
2083; GFX8-NEXT:    v_cmp_class_f32_e32 vcc, v0, v8
2084; GFX8-NEXT:    v_floor_f32_e32 v4, v0
2085; GFX8-NEXT:    v_fract_f32_e32 v7, v1
2086; GFX8-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
2087; GFX8-NEXT:    v_cmp_class_f32_e32 vcc, v1, v8
2088; GFX8-NEXT:    v_floor_f32_e32 v5, v1
2089; GFX8-NEXT:    v_cndmask_b32_e64 v1, v7, 0, vcc
2090; GFX8-NEXT:    global_store_dwordx2 v[2:3], v[4:5], off
2091; GFX8-NEXT:    s_waitcnt vmcnt(0)
2092; GFX8-NEXT:    s_setpc_b64 s[30:31]
2093;
2094; GFX11-LABEL: safe_math_fract_v2f32:
2095; GFX11:       ; %bb.0: ; %entry
2096; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2097; GFX11-NEXT:    v_fract_f32_e32 v6, v0
2098; GFX11-NEXT:    v_cmp_class_f32_e64 s0, v0, 0x204
2099; GFX11-NEXT:    v_fract_f32_e32 v7, v1
2100; GFX11-NEXT:    v_floor_f32_e32 v4, v0
2101; GFX11-NEXT:    v_floor_f32_e32 v5, v1
2102; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
2103; GFX11-NEXT:    v_cndmask_b32_e64 v0, v6, 0, s0
2104; GFX11-NEXT:    v_cmp_class_f32_e64 s0, v1, 0x204
2105; GFX11-NEXT:    global_store_b64 v[2:3], v[4:5], off
2106; GFX11-NEXT:    v_cndmask_b32_e64 v1, v7, 0, s0
2107; GFX11-NEXT:    s_setpc_b64 s[30:31]
2108;
2109; GFX12-LABEL: safe_math_fract_v2f32:
2110; GFX12:       ; %bb.0: ; %entry
2111; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2112; GFX12-NEXT:    s_wait_expcnt 0x0
2113; GFX12-NEXT:    s_wait_samplecnt 0x0
2114; GFX12-NEXT:    s_wait_bvhcnt 0x0
2115; GFX12-NEXT:    s_wait_kmcnt 0x0
2116; GFX12-NEXT:    v_fract_f32_e32 v6, v0
2117; GFX12-NEXT:    v_cmp_class_f32_e64 s0, v0, 0x204
2118; GFX12-NEXT:    v_fract_f32_e32 v7, v1
2119; GFX12-NEXT:    v_floor_f32_e32 v4, v0
2120; GFX12-NEXT:    v_floor_f32_e32 v5, v1
2121; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_4)
2122; GFX12-NEXT:    v_cndmask_b32_e64 v0, v6, 0, s0
2123; GFX12-NEXT:    v_cmp_class_f32_e64 s0, v1, 0x204
2124; GFX12-NEXT:    global_store_b64 v[2:3], v[4:5], off
2125; GFX12-NEXT:    v_cndmask_b32_e64 v1, v7, 0, s0
2126; GFX12-NEXT:    s_setpc_b64 s[30:31]
2127entry:
2128  %floor = tail call <2 x float> @llvm.floor.v2f32(<2 x float> %x)
2129  %sub = fsub <2 x float> %x, %floor
2130  %min = tail call <2 x float> @llvm.minnum.v2f32(<2 x float> %sub, <2 x float> <float 0x3FEFFFFFE0000000, float 0x3FEFFFFFE0000000>)
2131  %uno = fcmp uno <2 x float> %x, zeroinitializer
2132  %cond = select <2 x i1> %uno, <2 x float> %x, <2 x float> %min
2133  %fabs = tail call <2 x float> @llvm.fabs.v2f32(<2 x float> %x)
2134  %cmpinf = fcmp oeq <2 x float> %fabs, <float 0x7FF0000000000000, float 0x7FF0000000000000>
2135  %cond6 = select <2 x i1> %cmpinf, <2 x float> zeroinitializer, <2 x float> %cond
2136  store <2 x float> %floor, ptr addrspace(1) %ip, align 4
2137  ret <2 x float> %cond6
2138}
2139
2140define double @safe_math_fract_f64(double %x, ptr addrspace(1) writeonly captures(none) %ip) {
2141; GFX6-IR-LABEL: define double @safe_math_fract_f64
2142; GFX6-IR-SAME: (double [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] {
2143; GFX6-IR-NEXT:  entry:
2144; GFX6-IR-NEXT:    [[FLOOR:%.*]] = tail call double @llvm.floor.f64(double [[X]])
2145; GFX6-IR-NEXT:    [[SUB:%.*]] = fsub double [[X]], [[FLOOR]]
2146; GFX6-IR-NEXT:    [[MIN:%.*]] = tail call double @llvm.minnum.f64(double [[SUB]], double 0x3FEFFFFFFFFFFFFF)
2147; GFX6-IR-NEXT:    [[UNO:%.*]] = fcmp uno double [[X]], 0.000000e+00
2148; GFX6-IR-NEXT:    [[COND:%.*]] = select i1 [[UNO]], double [[X]], double [[MIN]]
2149; GFX6-IR-NEXT:    [[FABS:%.*]] = tail call double @llvm.fabs.f64(double [[X]])
2150; GFX6-IR-NEXT:    [[CMPINF:%.*]] = fcmp oeq double [[FABS]], 0x7FF0000000000000
2151; GFX6-IR-NEXT:    [[COND6:%.*]] = select i1 [[CMPINF]], double 0.000000e+00, double [[COND]]
2152; GFX6-IR-NEXT:    store double [[FLOOR]], ptr addrspace(1) [[IP]], align 4
2153; GFX6-IR-NEXT:    ret double [[COND6]]
2154;
2155; IR-FRACT-LABEL: define double @safe_math_fract_f64
2156; IR-FRACT-SAME: (double [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] {
2157; IR-FRACT-NEXT:  entry:
2158; IR-FRACT-NEXT:    [[FLOOR:%.*]] = tail call double @llvm.floor.f64(double [[X]])
2159; IR-FRACT-NEXT:    [[COND:%.*]] = call double @llvm.amdgcn.fract.f64(double [[X]])
2160; IR-FRACT-NEXT:    [[FABS:%.*]] = tail call double @llvm.fabs.f64(double [[X]])
2161; IR-FRACT-NEXT:    [[CMPINF:%.*]] = fcmp oeq double [[FABS]], 0x7FF0000000000000
2162; IR-FRACT-NEXT:    [[COND6:%.*]] = select i1 [[CMPINF]], double 0.000000e+00, double [[COND]]
2163; IR-FRACT-NEXT:    store double [[FLOOR]], ptr addrspace(1) [[IP]], align 4
2164; IR-FRACT-NEXT:    ret double [[COND6]]
2165;
2166; GFX6-LABEL: safe_math_fract_f64:
2167; GFX6:       ; %bb.0: ; %entry
2168; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2169; GFX6-NEXT:    v_fract_f64_e32 v[4:5], v[0:1]
2170; GFX6-NEXT:    v_mov_b32_e32 v6, -1
2171; GFX6-NEXT:    v_mov_b32_e32 v7, 0x3fefffff
2172; GFX6-NEXT:    v_min_f64 v[4:5], v[4:5], v[6:7]
2173; GFX6-NEXT:    v_cmp_class_f64_e64 vcc, v[0:1], 3
2174; GFX6-NEXT:    s_mov_b32 s8, -1
2175; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
2176; GFX6-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc
2177; GFX6-NEXT:    v_add_f64 v[4:5], v[0:1], -v[4:5]
2178; GFX6-NEXT:    s_mov_b32 s9, 0x3fefffff
2179; GFX6-NEXT:    v_add_f64 v[6:7], v[0:1], -v[4:5]
2180; GFX6-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
2181; GFX6-NEXT:    v_min_f64 v[6:7], v[6:7], s[8:9]
2182; GFX6-NEXT:    s_mov_b32 s8, 0
2183; GFX6-NEXT:    s_mov_b32 s9, 0x7ff00000
2184; GFX6-NEXT:    v_cndmask_b32_e32 v7, v7, v1, vcc
2185; GFX6-NEXT:    v_cndmask_b32_e32 v6, v6, v0, vcc
2186; GFX6-NEXT:    v_cmp_neq_f64_e64 vcc, |v[0:1]|, s[8:9]
2187; GFX6-NEXT:    s_mov_b32 s6, 0
2188; GFX6-NEXT:    s_mov_b32 s7, 0xf000
2189; GFX6-NEXT:    s_mov_b32 s4, s6
2190; GFX6-NEXT:    s_mov_b32 s5, s6
2191; GFX6-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
2192; GFX6-NEXT:    v_cndmask_b32_e32 v1, 0, v7, vcc
2193; GFX6-NEXT:    buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64
2194; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
2195; GFX6-NEXT:    s_setpc_b64 s[30:31]
2196;
2197; GFX7-LABEL: safe_math_fract_f64:
2198; GFX7:       ; %bb.0: ; %entry
2199; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2200; GFX7-NEXT:    s_mov_b32 s4, 0
2201; GFX7-NEXT:    s_mov_b32 s5, 0x7ff00000
2202; GFX7-NEXT:    v_fract_f64_e32 v[4:5], v[0:1]
2203; GFX7-NEXT:    v_cmp_neq_f64_e64 vcc, |v[0:1]|, s[4:5]
2204; GFX7-NEXT:    v_floor_f64_e32 v[6:7], v[0:1]
2205; GFX7-NEXT:    s_mov_b32 s6, 0
2206; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2207; GFX7-NEXT:    s_mov_b32 s4, s6
2208; GFX7-NEXT:    s_mov_b32 s5, s6
2209; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
2210; GFX7-NEXT:    v_cndmask_b32_e32 v1, 0, v5, vcc
2211; GFX7-NEXT:    buffer_store_dwordx2 v[6:7], v[2:3], s[4:7], 0 addr64
2212; GFX7-NEXT:    s_waitcnt vmcnt(0)
2213; GFX7-NEXT:    s_setpc_b64 s[30:31]
2214;
2215; GFX8-LABEL: safe_math_fract_f64:
2216; GFX8:       ; %bb.0: ; %entry
2217; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2218; GFX8-NEXT:    s_mov_b32 s4, 0
2219; GFX8-NEXT:    s_mov_b32 s5, 0x7ff00000
2220; GFX8-NEXT:    v_fract_f64_e32 v[4:5], v[0:1]
2221; GFX8-NEXT:    v_cmp_neq_f64_e64 vcc, |v[0:1]|, s[4:5]
2222; GFX8-NEXT:    v_floor_f64_e32 v[6:7], v[0:1]
2223; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
2224; GFX8-NEXT:    v_cndmask_b32_e32 v1, 0, v5, vcc
2225; GFX8-NEXT:    global_store_dwordx2 v[2:3], v[6:7], off
2226; GFX8-NEXT:    s_waitcnt vmcnt(0)
2227; GFX8-NEXT:    s_setpc_b64 s[30:31]
2228;
2229; GFX11-LABEL: safe_math_fract_f64:
2230; GFX11:       ; %bb.0: ; %entry
2231; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2232; GFX11-NEXT:    v_fract_f64_e32 v[4:5], v[0:1]
2233; GFX11-NEXT:    v_cmp_neq_f64_e64 vcc_lo, 0x7ff00000, |v[0:1]|
2234; GFX11-NEXT:    v_floor_f64_e32 v[6:7], v[0:1]
2235; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
2236; GFX11-NEXT:    v_dual_cndmask_b32 v0, 0, v4 :: v_dual_cndmask_b32 v1, 0, v5
2237; GFX11-NEXT:    global_store_b64 v[2:3], v[6:7], off
2238; GFX11-NEXT:    s_setpc_b64 s[30:31]
2239;
2240; GFX12-LABEL: safe_math_fract_f64:
2241; GFX12:       ; %bb.0: ; %entry
2242; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2243; GFX12-NEXT:    s_wait_expcnt 0x0
2244; GFX12-NEXT:    s_wait_samplecnt 0x0
2245; GFX12-NEXT:    s_wait_bvhcnt 0x0
2246; GFX12-NEXT:    s_wait_kmcnt 0x0
2247; GFX12-NEXT:    v_fract_f64_e32 v[4:5], v[0:1]
2248; GFX12-NEXT:    v_cmp_neq_f64_e64 vcc_lo, 0x7ff00000, |v[0:1]|
2249; GFX12-NEXT:    v_floor_f64_e32 v[6:7], v[0:1]
2250; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3)
2251; GFX12-NEXT:    v_dual_cndmask_b32 v0, 0, v4 :: v_dual_cndmask_b32 v1, 0, v5
2252; GFX12-NEXT:    global_store_b64 v[2:3], v[6:7], off
2253; GFX12-NEXT:    s_setpc_b64 s[30:31]
2254entry:
2255  %floor = tail call double @llvm.floor.f64(double %x)
2256  %sub = fsub double %x, %floor
2257  %min = tail call double @llvm.minnum.f64(double %sub, double 0x3FEFFFFFFFFFFFFF)
2258  %uno = fcmp uno double %x, 0.000000e+00
2259  %cond = select i1 %uno, double %x, double %min
2260  %fabs = tail call double @llvm.fabs.f64(double %x)
2261  %cmpinf = fcmp oeq double %fabs, 0x7FF0000000000000
2262  %cond6 = select i1 %cmpinf, double 0.000000e+00, double %cond
2263  store double %floor, ptr addrspace(1) %ip, align 4
2264  ret double %cond6
2265}
2266
2267define half @safe_math_fract_f16(half %x, ptr addrspace(1) writeonly captures(none) %ip) {
2268; GFX6-IR-LABEL: define half @safe_math_fract_f16
2269; GFX6-IR-SAME: (half [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] {
2270; GFX6-IR-NEXT:  entry:
2271; GFX6-IR-NEXT:    [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]])
2272; GFX6-IR-NEXT:    [[SUB:%.*]] = fsub half [[X]], [[FLOOR]]
2273; GFX6-IR-NEXT:    [[MIN:%.*]] = tail call half @llvm.minnum.f16(half [[SUB]], half 0xH3BFF)
2274; GFX6-IR-NEXT:    [[UNO:%.*]] = fcmp uno half [[X]], 0xH0000
2275; GFX6-IR-NEXT:    [[COND:%.*]] = select i1 [[UNO]], half [[X]], half [[MIN]]
2276; GFX6-IR-NEXT:    [[FABS:%.*]] = tail call half @llvm.fabs.f16(half [[X]])
2277; GFX6-IR-NEXT:    [[CMPINF:%.*]] = fcmp oeq half [[FABS]], 0xH7C00
2278; GFX6-IR-NEXT:    [[COND6:%.*]] = select i1 [[CMPINF]], half 0xH0000, half [[COND]]
2279; GFX6-IR-NEXT:    store half [[FLOOR]], ptr addrspace(1) [[IP]], align 4
2280; GFX6-IR-NEXT:    ret half [[COND6]]
2281;
2282; GFX7-IR-LABEL: define half @safe_math_fract_f16
2283; GFX7-IR-SAME: (half [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] {
2284; GFX7-IR-NEXT:  entry:
2285; GFX7-IR-NEXT:    [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]])
2286; GFX7-IR-NEXT:    [[SUB:%.*]] = fsub half [[X]], [[FLOOR]]
2287; GFX7-IR-NEXT:    [[MIN:%.*]] = tail call half @llvm.minnum.f16(half [[SUB]], half 0xH3BFF)
2288; GFX7-IR-NEXT:    [[UNO:%.*]] = fcmp uno half [[X]], 0xH0000
2289; GFX7-IR-NEXT:    [[COND:%.*]] = select i1 [[UNO]], half [[X]], half [[MIN]]
2290; GFX7-IR-NEXT:    [[FABS:%.*]] = tail call half @llvm.fabs.f16(half [[X]])
2291; GFX7-IR-NEXT:    [[CMPINF:%.*]] = fcmp oeq half [[FABS]], 0xH7C00
2292; GFX7-IR-NEXT:    [[COND6:%.*]] = select i1 [[CMPINF]], half 0xH0000, half [[COND]]
2293; GFX7-IR-NEXT:    store half [[FLOOR]], ptr addrspace(1) [[IP]], align 4
2294; GFX7-IR-NEXT:    ret half [[COND6]]
2295;
2296; IR-LEGALF16-LABEL: define half @safe_math_fract_f16
2297; IR-LEGALF16-SAME: (half [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] {
2298; IR-LEGALF16-NEXT:  entry:
2299; IR-LEGALF16-NEXT:    [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]])
2300; IR-LEGALF16-NEXT:    [[COND:%.*]] = call half @llvm.amdgcn.fract.f16(half [[X]])
2301; IR-LEGALF16-NEXT:    [[FABS:%.*]] = tail call half @llvm.fabs.f16(half [[X]])
2302; IR-LEGALF16-NEXT:    [[CMPINF:%.*]] = fcmp oeq half [[FABS]], 0xH7C00
2303; IR-LEGALF16-NEXT:    [[COND6:%.*]] = select i1 [[CMPINF]], half 0xH0000, half [[COND]]
2304; IR-LEGALF16-NEXT:    store half [[FLOOR]], ptr addrspace(1) [[IP]], align 4
2305; IR-LEGALF16-NEXT:    ret half [[COND6]]
2306;
2307; GFX6-LABEL: safe_math_fract_f16:
2308; GFX6:       ; %bb.0: ; %entry
2309; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2310; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
2311; GFX6-NEXT:    s_movk_i32 s8, 0x7c00
2312; GFX6-NEXT:    s_mov_b32 s6, 0
2313; GFX6-NEXT:    s_mov_b32 s7, 0xf000
2314; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v0
2315; GFX6-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
2316; GFX6-NEXT:    s_mov_b32 s4, s6
2317; GFX6-NEXT:    s_mov_b32 s5, s6
2318; GFX6-NEXT:    v_floor_f32_e32 v4, v3
2319; GFX6-NEXT:    v_sub_f32_e32 v5, v3, v4
2320; GFX6-NEXT:    v_cvt_f16_f32_e32 v4, v4
2321; GFX6-NEXT:    v_min_f32_e32 v5, 0x3f7fe000, v5
2322; GFX6-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
2323; GFX6-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
2324; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, s8, v0
2325; GFX6-NEXT:    v_cndmask_b32_e32 v0, 0, v3, vcc
2326; GFX6-NEXT:    buffer_store_short v4, v[1:2], s[4:7], 0 addr64
2327; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
2328; GFX6-NEXT:    s_setpc_b64 s[30:31]
2329;
2330; GFX7-LABEL: safe_math_fract_f16:
2331; GFX7:       ; %bb.0: ; %entry
2332; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2333; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
2334; GFX7-NEXT:    s_movk_i32 s8, 0x7c00
2335; GFX7-NEXT:    s_mov_b32 s6, 0
2336; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2337; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v0
2338; GFX7-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
2339; GFX7-NEXT:    s_mov_b32 s4, s6
2340; GFX7-NEXT:    s_mov_b32 s5, s6
2341; GFX7-NEXT:    v_floor_f32_e32 v4, v3
2342; GFX7-NEXT:    v_sub_f32_e32 v5, v3, v4
2343; GFX7-NEXT:    v_cvt_f16_f32_e32 v4, v4
2344; GFX7-NEXT:    v_min_f32_e32 v5, 0x3f7fe000, v5
2345; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
2346; GFX7-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
2347; GFX7-NEXT:    v_cmp_ne_u32_e32 vcc, s8, v0
2348; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v3, vcc
2349; GFX7-NEXT:    buffer_store_short v4, v[1:2], s[4:7], 0 addr64
2350; GFX7-NEXT:    s_waitcnt vmcnt(0)
2351; GFX7-NEXT:    s_setpc_b64 s[30:31]
2352;
2353; GFX8-LABEL: safe_math_fract_f16:
2354; GFX8:       ; %bb.0: ; %entry
2355; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2356; GFX8-NEXT:    s_movk_i32 s4, 0x7c00
2357; GFX8-NEXT:    v_fract_f16_e32 v4, v0
2358; GFX8-NEXT:    v_cmp_neq_f16_e64 vcc, |v0|, s4
2359; GFX8-NEXT:    v_floor_f16_e32 v3, v0
2360; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
2361; GFX8-NEXT:    global_store_short v[1:2], v3, off
2362; GFX8-NEXT:    s_waitcnt vmcnt(0)
2363; GFX8-NEXT:    s_setpc_b64 s[30:31]
2364;
2365; GFX11-LABEL: safe_math_fract_f16:
2366; GFX11:       ; %bb.0: ; %entry
2367; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2368; GFX11-NEXT:    v_fract_f16_e32 v3, v0
2369; GFX11-NEXT:    v_cmp_neq_f16_e64 vcc_lo, 0x7c00, |v0|
2370; GFX11-NEXT:    v_floor_f16_e32 v4, v0
2371; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
2372; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v3, vcc_lo
2373; GFX11-NEXT:    global_store_b16 v[1:2], v4, off
2374; GFX11-NEXT:    s_setpc_b64 s[30:31]
2375;
2376; GFX12-LABEL: safe_math_fract_f16:
2377; GFX12:       ; %bb.0: ; %entry
2378; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2379; GFX12-NEXT:    s_wait_expcnt 0x0
2380; GFX12-NEXT:    s_wait_samplecnt 0x0
2381; GFX12-NEXT:    s_wait_bvhcnt 0x0
2382; GFX12-NEXT:    s_wait_kmcnt 0x0
2383; GFX12-NEXT:    v_fract_f16_e32 v3, v0
2384; GFX12-NEXT:    v_cmp_neq_f16_e64 vcc_lo, 0x7c00, |v0|
2385; GFX12-NEXT:    v_floor_f16_e32 v4, v0
2386; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3)
2387; GFX12-NEXT:    v_cndmask_b32_e32 v0, 0, v3, vcc_lo
2388; GFX12-NEXT:    global_store_b16 v[1:2], v4, off
2389; GFX12-NEXT:    s_setpc_b64 s[30:31]
2390entry:
2391  %floor = tail call half @llvm.floor.f16(half %x)
2392  %sub = fsub half %x, %floor
2393  %min = tail call half @llvm.minnum.f16(half %sub, half 0xH3BFF)
2394  %uno = fcmp uno half %x, 0.000000e+00
2395  %cond = select i1 %uno, half %x, half %min
2396  %fabs = tail call half @llvm.fabs.f16(half %x)
2397  %cmpinf = fcmp oeq half %fabs, 0xH7C00
2398  %cond6 = select i1 %cmpinf, half 0.000000e+00, half %cond
2399  store half %floor, ptr addrspace(1) %ip, align 4
2400  ret half %cond6
2401}
2402
2403define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) writeonly captures(none) %ip) {
2404; GFX6-IR-LABEL: define <2 x half> @safe_math_fract_v2f16
2405; GFX6-IR-SAME: (<2 x half> [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] {
2406; GFX6-IR-NEXT:  entry:
2407; GFX6-IR-NEXT:    [[FLOOR:%.*]] = tail call <2 x half> @llvm.floor.v2f16(<2 x half> [[X]])
2408; GFX6-IR-NEXT:    [[SUB:%.*]] = fsub <2 x half> [[X]], [[FLOOR]]
2409; GFX6-IR-NEXT:    [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> splat (half 0xH3BFF))
2410; GFX6-IR-NEXT:    [[UNO:%.*]] = fcmp uno <2 x half> [[X]], zeroinitializer
2411; GFX6-IR-NEXT:    [[COND:%.*]] = select <2 x i1> [[UNO]], <2 x half> [[X]], <2 x half> [[MIN]]
2412; GFX6-IR-NEXT:    [[FABS:%.*]] = tail call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X]])
2413; GFX6-IR-NEXT:    [[CMPINF:%.*]] = fcmp oeq <2 x half> [[FABS]], splat (half 0xH7C00)
2414; GFX6-IR-NEXT:    [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x half> zeroinitializer, <2 x half> [[COND]]
2415; GFX6-IR-NEXT:    store <2 x half> [[FLOOR]], ptr addrspace(1) [[IP]], align 4
2416; GFX6-IR-NEXT:    ret <2 x half> [[COND6]]
2417;
2418; GFX7-IR-LABEL: define <2 x half> @safe_math_fract_v2f16
2419; GFX7-IR-SAME: (<2 x half> [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] {
2420; GFX7-IR-NEXT:  entry:
2421; GFX7-IR-NEXT:    [[FLOOR:%.*]] = tail call <2 x half> @llvm.floor.v2f16(<2 x half> [[X]])
2422; GFX7-IR-NEXT:    [[SUB:%.*]] = fsub <2 x half> [[X]], [[FLOOR]]
2423; GFX7-IR-NEXT:    [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> splat (half 0xH3BFF))
2424; GFX7-IR-NEXT:    [[UNO:%.*]] = fcmp uno <2 x half> [[X]], zeroinitializer
2425; GFX7-IR-NEXT:    [[COND:%.*]] = select <2 x i1> [[UNO]], <2 x half> [[X]], <2 x half> [[MIN]]
2426; GFX7-IR-NEXT:    [[FABS:%.*]] = tail call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X]])
2427; GFX7-IR-NEXT:    [[CMPINF:%.*]] = fcmp oeq <2 x half> [[FABS]], splat (half 0xH7C00)
2428; GFX7-IR-NEXT:    [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x half> zeroinitializer, <2 x half> [[COND]]
2429; GFX7-IR-NEXT:    store <2 x half> [[FLOOR]], ptr addrspace(1) [[IP]], align 4
2430; GFX7-IR-NEXT:    ret <2 x half> [[COND6]]
2431;
2432; IR-LEGALF16-LABEL: define <2 x half> @safe_math_fract_v2f16
2433; IR-LEGALF16-SAME: (<2 x half> [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] {
2434; IR-LEGALF16-NEXT:  entry:
2435; IR-LEGALF16-NEXT:    [[FLOOR:%.*]] = tail call <2 x half> @llvm.floor.v2f16(<2 x half> [[X]])
2436; IR-LEGALF16-NEXT:    [[TMP0:%.*]] = extractelement <2 x half> [[X]], i64 0
2437; IR-LEGALF16-NEXT:    [[TMP1:%.*]] = extractelement <2 x half> [[X]], i64 1
2438; IR-LEGALF16-NEXT:    [[TMP2:%.*]] = call half @llvm.amdgcn.fract.f16(half [[TMP0]])
2439; IR-LEGALF16-NEXT:    [[TMP3:%.*]] = call half @llvm.amdgcn.fract.f16(half [[TMP1]])
2440; IR-LEGALF16-NEXT:    [[TMP4:%.*]] = insertelement <2 x half> poison, half [[TMP2]], i64 0
2441; IR-LEGALF16-NEXT:    [[COND:%.*]] = insertelement <2 x half> [[TMP4]], half [[TMP3]], i64 1
2442; IR-LEGALF16-NEXT:    [[FABS:%.*]] = tail call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X]])
2443; IR-LEGALF16-NEXT:    [[CMPINF:%.*]] = fcmp oeq <2 x half> [[FABS]], splat (half 0xH7C00)
2444; IR-LEGALF16-NEXT:    [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x half> zeroinitializer, <2 x half> [[COND]]
2445; IR-LEGALF16-NEXT:    store <2 x half> [[FLOOR]], ptr addrspace(1) [[IP]], align 4
2446; IR-LEGALF16-NEXT:    ret <2 x half> [[COND6]]
2447;
2448; GFX6-LABEL: safe_math_fract_v2f16:
2449; GFX6:       ; %bb.0: ; %entry
2450; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2451; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
2452; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
2453; GFX6-NEXT:    s_movk_i32 s8, 0x7c00
2454; GFX6-NEXT:    s_mov_b32 s6, 0
2455; GFX6-NEXT:    v_cvt_f32_f16_e32 v4, v1
2456; GFX6-NEXT:    v_cvt_f32_f16_e32 v5, v0
2457; GFX6-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
2458; GFX6-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
2459; GFX6-NEXT:    v_floor_f32_e32 v6, v4
2460; GFX6-NEXT:    v_cvt_f16_f32_e32 v7, v6
2461; GFX6-NEXT:    v_floor_f32_e32 v8, v5
2462; GFX6-NEXT:    v_sub_f32_e32 v6, v4, v6
2463; GFX6-NEXT:    v_cvt_f16_f32_e32 v9, v8
2464; GFX6-NEXT:    v_sub_f32_e32 v8, v5, v8
2465; GFX6-NEXT:    v_min_f32_e32 v6, 0x3f7fe000, v6
2466; GFX6-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
2467; GFX6-NEXT:    v_min_f32_e32 v8, 0x3f7fe000, v8
2468; GFX6-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc
2469; GFX6-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
2470; GFX6-NEXT:    v_cndmask_b32_e32 v5, v8, v5, vcc
2471; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, s8, v0
2472; GFX6-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
2473; GFX6-NEXT:    v_cndmask_b32_e32 v0, 0, v5, vcc
2474; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, s8, v1
2475; GFX6-NEXT:    s_mov_b32 s7, 0xf000
2476; GFX6-NEXT:    s_mov_b32 s4, s6
2477; GFX6-NEXT:    s_mov_b32 s5, s6
2478; GFX6-NEXT:    v_or_b32_e32 v7, v9, v7
2479; GFX6-NEXT:    v_cndmask_b32_e32 v1, 0, v4, vcc
2480; GFX6-NEXT:    buffer_store_dword v7, v[2:3], s[4:7], 0 addr64
2481; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
2482; GFX6-NEXT:    s_setpc_b64 s[30:31]
2483;
2484; GFX7-LABEL: safe_math_fract_v2f16:
2485; GFX7:       ; %bb.0: ; %entry
2486; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2487; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
2488; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
2489; GFX7-NEXT:    s_movk_i32 s8, 0x7c00
2490; GFX7-NEXT:    s_mov_b32 s6, 0
2491; GFX7-NEXT:    v_cvt_f32_f16_e32 v4, v1
2492; GFX7-NEXT:    v_cvt_f32_f16_e32 v5, v0
2493; GFX7-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
2494; GFX7-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
2495; GFX7-NEXT:    v_floor_f32_e32 v6, v4
2496; GFX7-NEXT:    v_cvt_f16_f32_e32 v7, v6
2497; GFX7-NEXT:    v_floor_f32_e32 v8, v5
2498; GFX7-NEXT:    v_sub_f32_e32 v6, v4, v6
2499; GFX7-NEXT:    v_cvt_f16_f32_e32 v9, v8
2500; GFX7-NEXT:    v_sub_f32_e32 v8, v5, v8
2501; GFX7-NEXT:    v_min_f32_e32 v6, 0x3f7fe000, v6
2502; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v4, v4
2503; GFX7-NEXT:    v_min_f32_e32 v8, 0x3f7fe000, v8
2504; GFX7-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc
2505; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
2506; GFX7-NEXT:    v_cndmask_b32_e32 v5, v8, v5, vcc
2507; GFX7-NEXT:    v_cmp_ne_u32_e32 vcc, s8, v0
2508; GFX7-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
2509; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v5, vcc
2510; GFX7-NEXT:    v_cmp_ne_u32_e32 vcc, s8, v1
2511; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2512; GFX7-NEXT:    s_mov_b32 s4, s6
2513; GFX7-NEXT:    s_mov_b32 s5, s6
2514; GFX7-NEXT:    v_or_b32_e32 v7, v9, v7
2515; GFX7-NEXT:    v_cndmask_b32_e32 v1, 0, v4, vcc
2516; GFX7-NEXT:    buffer_store_dword v7, v[2:3], s[4:7], 0 addr64
2517; GFX7-NEXT:    s_waitcnt vmcnt(0)
2518; GFX7-NEXT:    s_setpc_b64 s[30:31]
2519;
2520; GFX8-LABEL: safe_math_fract_v2f16:
2521; GFX8:       ; %bb.0: ; %entry
2522; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2523; GFX8-NEXT:    s_movk_i32 s6, 0x204
2524; GFX8-NEXT:    v_floor_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2525; GFX8-NEXT:    v_floor_f16_e32 v4, v0
2526; GFX8-NEXT:    v_fract_f16_sdwa v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2527; GFX8-NEXT:    v_cmp_class_f16_sdwa s[4:5], v0, s6 src0_sel:WORD_1 src1_sel:DWORD
2528; GFX8-NEXT:    v_pack_b32_f16 v3, v4, v3
2529; GFX8-NEXT:    v_fract_f16_e32 v4, v0
2530; GFX8-NEXT:    v_cndmask_b32_e64 v5, v5, 0, s[4:5]
2531; GFX8-NEXT:    v_cmp_class_f16_e64 s[4:5], v0, s6
2532; GFX8-NEXT:    v_cndmask_b32_e64 v0, v4, 0, s[4:5]
2533; GFX8-NEXT:    v_pack_b32_f16 v0, v0, v5
2534; GFX8-NEXT:    global_store_dword v[1:2], v3, off
2535; GFX8-NEXT:    s_waitcnt vmcnt(0)
2536; GFX8-NEXT:    s_setpc_b64 s[30:31]
2537;
2538; GFX11-LABEL: safe_math_fract_v2f16:
2539; GFX11:       ; %bb.0: ; %entry
2540; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2541; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
2542; GFX11-NEXT:    v_fract_f16_e32 v6, v0
2543; GFX11-NEXT:    v_floor_f16_e32 v5, v0
2544; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
2545; GFX11-NEXT:    v_fract_f16_e32 v4, v3
2546; GFX11-NEXT:    v_cmp_class_f16_e64 s0, v3, 0x204
2547; GFX11-NEXT:    v_floor_f16_e32 v7, v3
2548; GFX11-NEXT:    v_cndmask_b32_e64 v3, v4, 0, s0
2549; GFX11-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x204
2550; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
2551; GFX11-NEXT:    v_pack_b32_f16 v4, v5, v7
2552; GFX11-NEXT:    v_cndmask_b32_e64 v0, v6, 0, s0
2553; GFX11-NEXT:    global_store_b32 v[1:2], v4, off
2554; GFX11-NEXT:    v_pack_b32_f16 v0, v0, v3
2555; GFX11-NEXT:    s_setpc_b64 s[30:31]
2556;
2557; GFX12-LABEL: safe_math_fract_v2f16:
2558; GFX12:       ; %bb.0: ; %entry
2559; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2560; GFX12-NEXT:    s_wait_expcnt 0x0
2561; GFX12-NEXT:    s_wait_samplecnt 0x0
2562; GFX12-NEXT:    s_wait_bvhcnt 0x0
2563; GFX12-NEXT:    s_wait_kmcnt 0x0
2564; GFX12-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
2565; GFX12-NEXT:    v_fract_f16_e32 v6, v0
2566; GFX12-NEXT:    v_floor_f16_e32 v5, v0
2567; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
2568; GFX12-NEXT:    v_fract_f16_e32 v4, v3
2569; GFX12-NEXT:    v_cmp_class_f16_e64 s0, v3, 0x204
2570; GFX12-NEXT:    v_floor_f16_e32 v7, v3
2571; GFX12-NEXT:    v_cndmask_b32_e64 v3, v4, 0, s0
2572; GFX12-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x204
2573; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
2574; GFX12-NEXT:    v_pack_b32_f16 v4, v5, v7
2575; GFX12-NEXT:    v_cndmask_b32_e64 v0, v6, 0, s0
2576; GFX12-NEXT:    global_store_b32 v[1:2], v4, off
2577; GFX12-NEXT:    v_pack_b32_f16 v0, v0, v3
2578; GFX12-NEXT:    s_setpc_b64 s[30:31]
2579entry:
2580  %floor = tail call <2 x half> @llvm.floor.v2f16(<2 x half> %x)
2581  %sub = fsub <2 x half> %x, %floor
2582  %min = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> %sub, <2 x half> <half 0xH3BFF, half 0xH3BFF>)
2583  %uno = fcmp uno <2 x half> %x, zeroinitializer
2584  %cond = select <2 x i1> %uno, <2 x half> %x, <2 x half> %min
2585  %fabs = tail call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
2586  %cmpinf = fcmp oeq <2 x half> %fabs, <half 0xH7C00, half 0xH7C00>
2587  %cond6 = select <2 x i1> %cmpinf, <2 x half> zeroinitializer, <2 x half> %cond
2588  store <2 x half> %floor, ptr addrspace(1) %ip, align 4
2589  ret <2 x half> %cond6
2590}
2591
2592define <2 x double> @safe_math_fract_v2f64(<2 x double> %x, ptr addrspace(1) writeonly captures(none) %ip) {
2593; GFX6-IR-LABEL: define <2 x double> @safe_math_fract_v2f64
2594; GFX6-IR-SAME: (<2 x double> [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] {
2595; GFX6-IR-NEXT:  entry:
2596; GFX6-IR-NEXT:    [[FLOOR:%.*]] = tail call <2 x double> @llvm.floor.v2f64(<2 x double> [[X]])
2597; GFX6-IR-NEXT:    [[SUB:%.*]] = fsub <2 x double> [[X]], [[FLOOR]]
2598; GFX6-IR-NEXT:    [[MIN:%.*]] = tail call <2 x double> @llvm.minnum.v2f64(<2 x double> [[SUB]], <2 x double> splat (double 0x3FEFFFFFFFFFFFFF))
2599; GFX6-IR-NEXT:    [[UNO:%.*]] = fcmp uno <2 x double> [[X]], zeroinitializer
2600; GFX6-IR-NEXT:    [[COND:%.*]] = select <2 x i1> [[UNO]], <2 x double> [[X]], <2 x double> [[MIN]]
2601; GFX6-IR-NEXT:    [[FABS:%.*]] = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> [[X]])
2602; GFX6-IR-NEXT:    [[CMPINF:%.*]] = fcmp oeq <2 x double> [[FABS]], splat (double 0x7FF0000000000000)
2603; GFX6-IR-NEXT:    [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x double> zeroinitializer, <2 x double> [[COND]]
2604; GFX6-IR-NEXT:    store <2 x double> [[FLOOR]], ptr addrspace(1) [[IP]], align 4
2605; GFX6-IR-NEXT:    ret <2 x double> [[COND6]]
2606;
2607; IR-FRACT-LABEL: define <2 x double> @safe_math_fract_v2f64
2608; IR-FRACT-SAME: (<2 x double> [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] {
2609; IR-FRACT-NEXT:  entry:
2610; IR-FRACT-NEXT:    [[FLOOR:%.*]] = tail call <2 x double> @llvm.floor.v2f64(<2 x double> [[X]])
2611; IR-FRACT-NEXT:    [[TMP0:%.*]] = extractelement <2 x double> [[X]], i64 0
2612; IR-FRACT-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[X]], i64 1
2613; IR-FRACT-NEXT:    [[TMP2:%.*]] = call double @llvm.amdgcn.fract.f64(double [[TMP0]])
2614; IR-FRACT-NEXT:    [[TMP3:%.*]] = call double @llvm.amdgcn.fract.f64(double [[TMP1]])
2615; IR-FRACT-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> poison, double [[TMP2]], i64 0
2616; IR-FRACT-NEXT:    [[COND:%.*]] = insertelement <2 x double> [[TMP4]], double [[TMP3]], i64 1
2617; IR-FRACT-NEXT:    [[FABS:%.*]] = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> [[X]])
2618; IR-FRACT-NEXT:    [[CMPINF:%.*]] = fcmp oeq <2 x double> [[FABS]], splat (double 0x7FF0000000000000)
2619; IR-FRACT-NEXT:    [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x double> zeroinitializer, <2 x double> [[COND]]
2620; IR-FRACT-NEXT:    store <2 x double> [[FLOOR]], ptr addrspace(1) [[IP]], align 4
2621; IR-FRACT-NEXT:    ret <2 x double> [[COND6]]
2622;
2623; GFX6-LABEL: safe_math_fract_v2f64:
2624; GFX6:       ; %bb.0: ; %entry
2625; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2626; GFX6-NEXT:    v_fract_f64_e32 v[6:7], v[2:3]
2627; GFX6-NEXT:    v_mov_b32_e32 v10, -1
2628; GFX6-NEXT:    v_mov_b32_e32 v11, 0x3fefffff
2629; GFX6-NEXT:    v_min_f64 v[6:7], v[6:7], v[10:11]
2630; GFX6-NEXT:    v_cmp_class_f64_e64 vcc, v[2:3], 3
2631; GFX6-NEXT:    v_fract_f64_e32 v[12:13], v[0:1]
2632; GFX6-NEXT:    v_cndmask_b32_e32 v6, v6, v2, vcc
2633; GFX6-NEXT:    v_cndmask_b32_e32 v7, v7, v3, vcc
2634; GFX6-NEXT:    v_add_f64 v[8:9], v[2:3], -v[6:7]
2635; GFX6-NEXT:    v_min_f64 v[6:7], v[12:13], v[10:11]
2636; GFX6-NEXT:    v_cmp_class_f64_e64 vcc, v[0:1], 3
2637; GFX6-NEXT:    v_add_f64 v[10:11], v[2:3], -v[8:9]
2638; GFX6-NEXT:    v_cndmask_b32_e32 v6, v6, v0, vcc
2639; GFX6-NEXT:    v_cndmask_b32_e32 v7, v7, v1, vcc
2640; GFX6-NEXT:    v_add_f64 v[6:7], v[0:1], -v[6:7]
2641; GFX6-NEXT:    s_mov_b32 s8, -1
2642; GFX6-NEXT:    s_mov_b32 s9, 0x3fefffff
2643; GFX6-NEXT:    v_add_f64 v[12:13], v[0:1], -v[6:7]
2644; GFX6-NEXT:    v_min_f64 v[10:11], v[10:11], s[8:9]
2645; GFX6-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
2646; GFX6-NEXT:    v_min_f64 v[12:13], v[12:13], s[8:9]
2647; GFX6-NEXT:    v_cndmask_b32_e32 v11, v11, v3, vcc
2648; GFX6-NEXT:    v_cndmask_b32_e32 v10, v10, v2, vcc
2649; GFX6-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
2650; GFX6-NEXT:    v_mov_b32_e32 v14, 0x204
2651; GFX6-NEXT:    v_cndmask_b32_e32 v13, v13, v1, vcc
2652; GFX6-NEXT:    v_cndmask_b32_e32 v12, v12, v0, vcc
2653; GFX6-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v14
2654; GFX6-NEXT:    s_mov_b32 s6, 0
2655; GFX6-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
2656; GFX6-NEXT:    v_cndmask_b32_e64 v1, v13, 0, vcc
2657; GFX6-NEXT:    v_cmp_class_f64_e32 vcc, v[2:3], v14
2658; GFX6-NEXT:    s_mov_b32 s7, 0xf000
2659; GFX6-NEXT:    s_mov_b32 s4, s6
2660; GFX6-NEXT:    s_mov_b32 s5, s6
2661; GFX6-NEXT:    v_cndmask_b32_e64 v2, v10, 0, vcc
2662; GFX6-NEXT:    v_cndmask_b32_e64 v3, v11, 0, vcc
2663; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[4:7], 0 addr64
2664; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
2665; GFX6-NEXT:    s_setpc_b64 s[30:31]
2666;
2667; GFX7-LABEL: safe_math_fract_v2f64:
2668; GFX7:       ; %bb.0: ; %entry
2669; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2670; GFX7-NEXT:    v_mov_b32_e32 v6, 0x204
2671; GFX7-NEXT:    v_fract_f64_e32 v[10:11], v[0:1]
2672; GFX7-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v6
2673; GFX7-NEXT:    v_fract_f64_e32 v[12:13], v[2:3]
2674; GFX7-NEXT:    v_cmp_class_f64_e64 s[4:5], v[2:3], v6
2675; GFX7-NEXT:    v_floor_f64_e32 v[8:9], v[2:3]
2676; GFX7-NEXT:    v_floor_f64_e32 v[6:7], v[0:1]
2677; GFX7-NEXT:    s_mov_b32 s10, 0
2678; GFX7-NEXT:    s_mov_b32 s11, 0xf000
2679; GFX7-NEXT:    s_mov_b32 s8, s10
2680; GFX7-NEXT:    s_mov_b32 s9, s10
2681; GFX7-NEXT:    v_cndmask_b32_e64 v0, v10, 0, vcc
2682; GFX7-NEXT:    v_cndmask_b32_e64 v1, v11, 0, vcc
2683; GFX7-NEXT:    v_cndmask_b32_e64 v2, v12, 0, s[4:5]
2684; GFX7-NEXT:    v_cndmask_b32_e64 v3, v13, 0, s[4:5]
2685; GFX7-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[8:11], 0 addr64
2686; GFX7-NEXT:    s_waitcnt vmcnt(0)
2687; GFX7-NEXT:    s_setpc_b64 s[30:31]
2688;
2689; GFX8-LABEL: safe_math_fract_v2f64:
2690; GFX8:       ; %bb.0: ; %entry
2691; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2692; GFX8-NEXT:    v_mov_b32_e32 v6, 0x204
2693; GFX8-NEXT:    v_fract_f64_e32 v[10:11], v[0:1]
2694; GFX8-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v6
2695; GFX8-NEXT:    v_fract_f64_e32 v[12:13], v[2:3]
2696; GFX8-NEXT:    v_cmp_class_f64_e64 s[4:5], v[2:3], v6
2697; GFX8-NEXT:    v_floor_f64_e32 v[8:9], v[2:3]
2698; GFX8-NEXT:    v_floor_f64_e32 v[6:7], v[0:1]
2699; GFX8-NEXT:    v_cndmask_b32_e64 v0, v10, 0, vcc
2700; GFX8-NEXT:    v_cndmask_b32_e64 v1, v11, 0, vcc
2701; GFX8-NEXT:    v_cndmask_b32_e64 v2, v12, 0, s[4:5]
2702; GFX8-NEXT:    v_cndmask_b32_e64 v3, v13, 0, s[4:5]
2703; GFX8-NEXT:    global_store_dwordx4 v[4:5], v[6:9], off
2704; GFX8-NEXT:    s_waitcnt vmcnt(0)
2705; GFX8-NEXT:    s_setpc_b64 s[30:31]
2706;
2707; GFX11-LABEL: safe_math_fract_v2f64:
2708; GFX11:       ; %bb.0: ; %entry
2709; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2710; GFX11-NEXT:    v_fract_f64_e32 v[10:11], v[0:1]
2711; GFX11-NEXT:    v_cmp_class_f64_e64 s0, v[0:1], 0x204
2712; GFX11-NEXT:    v_fract_f64_e32 v[12:13], v[2:3]
2713; GFX11-NEXT:    v_cmp_class_f64_e64 s1, v[2:3], 0x204
2714; GFX11-NEXT:    v_floor_f64_e32 v[8:9], v[2:3]
2715; GFX11-NEXT:    v_floor_f64_e32 v[6:7], v[0:1]
2716; GFX11-NEXT:    v_cndmask_b32_e64 v0, v10, 0, s0
2717; GFX11-NEXT:    v_cndmask_b32_e64 v1, v11, 0, s0
2718; GFX11-NEXT:    v_cndmask_b32_e64 v2, v12, 0, s1
2719; GFX11-NEXT:    v_cndmask_b32_e64 v3, v13, 0, s1
2720; GFX11-NEXT:    global_store_b128 v[4:5], v[6:9], off
2721; GFX11-NEXT:    s_setpc_b64 s[30:31]
2722;
2723; GFX12-LABEL: safe_math_fract_v2f64:
2724; GFX12:       ; %bb.0: ; %entry
2725; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2726; GFX12-NEXT:    s_wait_expcnt 0x0
2727; GFX12-NEXT:    s_wait_samplecnt 0x0
2728; GFX12-NEXT:    s_wait_bvhcnt 0x0
2729; GFX12-NEXT:    s_wait_kmcnt 0x0
2730; GFX12-NEXT:    v_fract_f64_e32 v[10:11], v[0:1]
2731; GFX12-NEXT:    v_cmp_class_f64_e64 s0, v[0:1], 0x204
2732; GFX12-NEXT:    v_fract_f64_e32 v[12:13], v[2:3]
2733; GFX12-NEXT:    v_cmp_class_f64_e64 s1, v[2:3], 0x204
2734; GFX12-NEXT:    v_floor_f64_e32 v[8:9], v[2:3]
2735; GFX12-NEXT:    v_floor_f64_e32 v[6:7], v[0:1]
2736; GFX12-NEXT:    v_cndmask_b32_e64 v0, v10, 0, s0
2737; GFX12-NEXT:    v_cndmask_b32_e64 v1, v11, 0, s0
2738; GFX12-NEXT:    v_cndmask_b32_e64 v2, v12, 0, s1
2739; GFX12-NEXT:    v_cndmask_b32_e64 v3, v13, 0, s1
2740; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
2741; GFX12-NEXT:    s_setpc_b64 s[30:31]
2742entry:
2743  %floor = tail call <2 x double> @llvm.floor.v2f64(<2 x double> %x)
2744  %sub = fsub <2 x double> %x, %floor
2745  %min = tail call <2 x double> @llvm.minnum.v2f64(<2 x double> %sub, <2 x double> <double 0x3FEFFFFFFFFFFFFF, double 0x3FEFFFFFFFFFFFFF>)
2746  %uno = fcmp uno <2 x double> %x, zeroinitializer
2747  %cond = select <2 x i1> %uno, <2 x double> %x, <2 x double> %min
2748  %fabs = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> %x)
2749  %cmpinf = fcmp oeq <2 x double> %fabs, <double 0x7FF0000000000000, double 0x7FF0000000000000>
2750  %cond6 = select <2 x i1> %cmpinf, <2 x double> zeroinitializer, <2 x double> %cond
2751  store <2 x double> %floor, ptr addrspace(1) %ip, align 4
2752  ret <2 x double> %cond6
2753}
2754
2755declare half @llvm.floor.f16(half) #0
2756declare float @llvm.floor.f32(float) #0
2757declare double @llvm.floor.f64(double) #0
2758declare <2 x double> @llvm.floor.v2f64(<2 x double>) #0
2759declare <2 x float> @llvm.floor.v2f32(<2 x float>) #0
2760declare <2 x half> @llvm.floor.v2f16(<2 x half>) #0
2761declare float @llvm.trunc.f32(float) #0
2762declare float @llvm.minnum.f32(float, float) #0
2763declare half @llvm.minnum.f16(half, half) #0
2764declare double @llvm.minnum.f64(double, double) #0
2765declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #0
2766declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #0
2767declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) #0
2768declare float @llvm.maxnum.f32(float, float) #0
2769declare float @llvm.fabs.f32(float) #0
2770declare double @llvm.fabs.f64(double) #0
2771declare <2 x float> @llvm.fabs.v2f32(<2 x float>) #0
2772declare half @llvm.fabs.f16(half) #0
2773declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #0
2774declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #0
2775
2776attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
2777