xref: /llvm-project/llvm/test/CodeGen/AMDGPU/fp-classify.ll (revision 42ed7757835122a63477b5783215e1100cd0b709)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s
3; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s
4; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
5
6declare float @llvm.fabs.f32(float) #1
7declare double @llvm.fabs.f64(double) #1
8
9define amdgpu_kernel void @test_isinf_pattern(ptr addrspace(1) nocapture %out, float %x) #0 {
10; SI-LABEL: test_isinf_pattern:
11; SI:       ; %bb.0:
12; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
13; SI-NEXT:    s_load_dword s4, s[4:5], 0xb
14; SI-NEXT:    s_mov_b32 s3, 0xf000
15; SI-NEXT:    s_mov_b32 s2, -1
16; SI-NEXT:    v_mov_b32_e32 v0, 0x204
17; SI-NEXT:    s_waitcnt lgkmcnt(0)
18; SI-NEXT:    v_cmp_class_f32_e32 vcc, s4, v0
19; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
20; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
21; SI-NEXT:    s_endpgm
22;
23; VI-LABEL: test_isinf_pattern:
24; VI:       ; %bb.0:
25; VI-NEXT:    s_load_dword s2, s[4:5], 0x2c
26; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
27; VI-NEXT:    v_mov_b32_e32 v0, 0x204
28; VI-NEXT:    s_waitcnt lgkmcnt(0)
29; VI-NEXT:    v_cmp_class_f32_e32 vcc, s2, v0
30; VI-NEXT:    v_mov_b32_e32 v0, s0
31; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
32; VI-NEXT:    v_mov_b32_e32 v1, s1
33; VI-NEXT:    flat_store_dword v[0:1], v2
34; VI-NEXT:    s_endpgm
35;
36; GFX11-LABEL: test_isinf_pattern:
37; GFX11:       ; %bb.0:
38; GFX11-NEXT:    s_clause 0x1
39; GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x2c
40; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
41; GFX11-NEXT:    v_mov_b32_e32 v0, 0
42; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
43; GFX11-NEXT:    v_cmp_class_f32_e64 s2, s2, 0x204
44; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
45; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s2
46; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
47; GFX11-NEXT:    s_endpgm
48  %fabs = tail call float @llvm.fabs.f32(float %x) #1
49  %cmp = fcmp oeq float %fabs, 0x7FF0000000000000
50  %ext = zext i1 %cmp to i32
51  store i32 %ext, ptr addrspace(1) %out, align 4
52  ret void
53}
54
55define amdgpu_kernel void @test_not_isinf_pattern_0(ptr addrspace(1) nocapture %out, float %x) #0 {
56; SI-LABEL: test_not_isinf_pattern_0:
57; SI:       ; %bb.0:
58; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
59; SI-NEXT:    s_load_dword s4, s[4:5], 0xb
60; SI-NEXT:    s_mov_b32 s3, 0xf000
61; SI-NEXT:    s_mov_b32 s2, -1
62; SI-NEXT:    v_mov_b32_e32 v0, 0x7f800000
63; SI-NEXT:    s_waitcnt lgkmcnt(0)
64; SI-NEXT:    v_cmp_nlg_f32_e64 s[4:5], |s4|, v0
65; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
66; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
67; SI-NEXT:    s_endpgm
68;
69; VI-LABEL: test_not_isinf_pattern_0:
70; VI:       ; %bb.0:
71; VI-NEXT:    s_load_dword s2, s[4:5], 0x2c
72; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
73; VI-NEXT:    v_mov_b32_e32 v0, 0x7f800000
74; VI-NEXT:    s_waitcnt lgkmcnt(0)
75; VI-NEXT:    v_cmp_nlg_f32_e64 s[2:3], |s2|, v0
76; VI-NEXT:    v_mov_b32_e32 v0, s0
77; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[2:3]
78; VI-NEXT:    v_mov_b32_e32 v1, s1
79; VI-NEXT:    flat_store_dword v[0:1], v2
80; VI-NEXT:    s_endpgm
81;
82; GFX11-LABEL: test_not_isinf_pattern_0:
83; GFX11:       ; %bb.0:
84; GFX11-NEXT:    s_clause 0x1
85; GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x2c
86; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
87; GFX11-NEXT:    v_mov_b32_e32 v0, 0
88; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
89; GFX11-NEXT:    v_cmp_nlg_f32_e64 s2, 0x7f800000, |s2|
90; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
91; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s2
92; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
93; GFX11-NEXT:    s_endpgm
94  %fabs = tail call float @llvm.fabs.f32(float %x) #1
95  %cmp = fcmp ueq float %fabs, 0x7FF0000000000000
96  %ext = zext i1 %cmp to i32
97  store i32 %ext, ptr addrspace(1) %out, align 4
98  ret void
99}
100
101define amdgpu_kernel void @test_not_isinf_pattern_1(ptr addrspace(1) nocapture %out, float %x) #0 {
102; SI-LABEL: test_not_isinf_pattern_1:
103; SI:       ; %bb.0:
104; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
105; SI-NEXT:    s_mov_b32 s3, 0xf000
106; SI-NEXT:    s_mov_b32 s2, -1
107; SI-NEXT:    v_mov_b32_e32 v0, 0
108; SI-NEXT:    s_waitcnt lgkmcnt(0)
109; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
110; SI-NEXT:    s_endpgm
111;
112; VI-LABEL: test_not_isinf_pattern_1:
113; VI:       ; %bb.0:
114; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
115; VI-NEXT:    v_mov_b32_e32 v2, 0
116; VI-NEXT:    s_waitcnt lgkmcnt(0)
117; VI-NEXT:    v_mov_b32_e32 v0, s0
118; VI-NEXT:    v_mov_b32_e32 v1, s1
119; VI-NEXT:    flat_store_dword v[0:1], v2
120; VI-NEXT:    s_endpgm
121;
122; GFX11-LABEL: test_not_isinf_pattern_1:
123; GFX11:       ; %bb.0:
124; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
125; GFX11-NEXT:    v_mov_b32_e32 v0, 0
126; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
127; GFX11-NEXT:    global_store_b32 v0, v0, s[0:1]
128; GFX11-NEXT:    s_endpgm
129  %fabs = tail call float @llvm.fabs.f32(float %x) #1
130  %cmp = fcmp oeq float %fabs, 0xFFF0000000000000
131  %ext = zext i1 %cmp to i32
132  store i32 %ext, ptr addrspace(1) %out, align 4
133  ret void
134}
135
136define amdgpu_kernel void @test_isfinite_pattern_0(ptr addrspace(1) nocapture %out, float %x) #0 {
137; SI-LABEL: test_isfinite_pattern_0:
138; SI:       ; %bb.0:
139; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
140; SI-NEXT:    s_load_dword s4, s[4:5], 0xb
141; SI-NEXT:    s_mov_b32 s3, 0xf000
142; SI-NEXT:    s_mov_b32 s2, -1
143; SI-NEXT:    v_mov_b32_e32 v0, 0x1f8
144; SI-NEXT:    s_waitcnt lgkmcnt(0)
145; SI-NEXT:    v_cmp_class_f32_e32 vcc, s4, v0
146; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
147; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
148; SI-NEXT:    s_endpgm
149;
150; VI-LABEL: test_isfinite_pattern_0:
151; VI:       ; %bb.0:
152; VI-NEXT:    s_load_dword s2, s[4:5], 0x2c
153; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
154; VI-NEXT:    v_mov_b32_e32 v0, 0x1f8
155; VI-NEXT:    s_waitcnt lgkmcnt(0)
156; VI-NEXT:    v_cmp_class_f32_e32 vcc, s2, v0
157; VI-NEXT:    v_mov_b32_e32 v0, s0
158; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
159; VI-NEXT:    v_mov_b32_e32 v1, s1
160; VI-NEXT:    flat_store_dword v[0:1], v2
161; VI-NEXT:    s_endpgm
162;
163; GFX11-LABEL: test_isfinite_pattern_0:
164; GFX11:       ; %bb.0:
165; GFX11-NEXT:    s_clause 0x1
166; GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x2c
167; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
168; GFX11-NEXT:    v_mov_b32_e32 v0, 0
169; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
170; GFX11-NEXT:    v_cmp_class_f32_e64 s2, s2, 0x1f8
171; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
172; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s2
173; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
174; GFX11-NEXT:    s_endpgm
175  %ord = fcmp ord float %x, 0.000000e+00
176  %x.fabs = tail call float @llvm.fabs.f32(float %x) #1
177  %ninf = fcmp une float %x.fabs, 0x7FF0000000000000
178  %and = and i1 %ord, %ninf
179  %ext = zext i1 %and to i32
180  store i32 %ext, ptr addrspace(1) %out, align 4
181  ret void
182}
183
184define amdgpu_kernel void @test_isfinite_pattern_1(ptr addrspace(1) nocapture %out, float %x) #0 {
185; SI-LABEL: test_isfinite_pattern_1:
186; SI:       ; %bb.0:
187; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
188; SI-NEXT:    s_load_dword s4, s[4:5], 0xb
189; SI-NEXT:    s_mov_b32 s3, 0xf000
190; SI-NEXT:    s_mov_b32 s2, -1
191; SI-NEXT:    v_mov_b32_e32 v0, 0x1f8
192; SI-NEXT:    s_waitcnt lgkmcnt(0)
193; SI-NEXT:    v_cmp_class_f32_e32 vcc, s4, v0
194; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
195; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
196; SI-NEXT:    s_endpgm
197;
198; VI-LABEL: test_isfinite_pattern_1:
199; VI:       ; %bb.0:
200; VI-NEXT:    s_load_dword s2, s[4:5], 0x2c
201; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
202; VI-NEXT:    v_mov_b32_e32 v0, 0x1f8
203; VI-NEXT:    s_waitcnt lgkmcnt(0)
204; VI-NEXT:    v_cmp_class_f32_e32 vcc, s2, v0
205; VI-NEXT:    v_mov_b32_e32 v0, s0
206; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
207; VI-NEXT:    v_mov_b32_e32 v1, s1
208; VI-NEXT:    flat_store_dword v[0:1], v2
209; VI-NEXT:    s_endpgm
210;
211; GFX11-LABEL: test_isfinite_pattern_1:
212; GFX11:       ; %bb.0:
213; GFX11-NEXT:    s_clause 0x1
214; GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x2c
215; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
216; GFX11-NEXT:    v_mov_b32_e32 v0, 0
217; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
218; GFX11-NEXT:    v_cmp_class_f32_e64 s2, s2, 0x1f8
219; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
220; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s2
221; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
222; GFX11-NEXT:    s_endpgm
223  %x.fabs = tail call float @llvm.fabs.f32(float %x) #3
224  %cmpinf = fcmp one float %x.fabs, 0x7FF0000000000000
225  %ext = zext i1 %cmpinf to i32
226  store i32 %ext, ptr addrspace(1) %out, align 4
227  ret void
228}
229
230; Use negative infinity
231define amdgpu_kernel void @test_isfinite_not_pattern_0(ptr addrspace(1) nocapture %out, float %x) #0 {
232; SI-LABEL: test_isfinite_not_pattern_0:
233; SI:       ; %bb.0:
234; SI-NEXT:    s_load_dword s6, s[4:5], 0xb
235; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
236; SI-NEXT:    s_mov_b32 s3, 0xf000
237; SI-NEXT:    s_mov_b32 s2, -1
238; SI-NEXT:    s_waitcnt lgkmcnt(0)
239; SI-NEXT:    v_cmp_o_f32_e64 s[4:5], s6, s6
240; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
241; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
242; SI-NEXT:    s_endpgm
243;
244; VI-LABEL: test_isfinite_not_pattern_0:
245; VI:       ; %bb.0:
246; VI-NEXT:    s_load_dword s2, s[4:5], 0x2c
247; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
248; VI-NEXT:    s_waitcnt lgkmcnt(0)
249; VI-NEXT:    v_cmp_o_f32_e64 s[2:3], s2, s2
250; VI-NEXT:    v_mov_b32_e32 v0, s0
251; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[2:3]
252; VI-NEXT:    v_mov_b32_e32 v1, s1
253; VI-NEXT:    flat_store_dword v[0:1], v2
254; VI-NEXT:    s_endpgm
255;
256; GFX11-LABEL: test_isfinite_not_pattern_0:
257; GFX11:       ; %bb.0:
258; GFX11-NEXT:    s_clause 0x1
259; GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x2c
260; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
261; GFX11-NEXT:    v_mov_b32_e32 v0, 0
262; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
263; GFX11-NEXT:    v_cmp_o_f32_e64 s2, s2, s2
264; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
265; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s2
266; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
267; GFX11-NEXT:    s_endpgm
268  %ord = fcmp ord float %x, 0.000000e+00
269  %x.fabs = tail call float @llvm.fabs.f32(float %x) #1
270  %ninf = fcmp une float %x.fabs, 0xFFF0000000000000
271  %and = and i1 %ord, %ninf
272  %ext = zext i1 %and to i32
273  store i32 %ext, ptr addrspace(1) %out, align 4
274  ret void
275}
276
277; No fabs
278define amdgpu_kernel void @test_isfinite_not_pattern_1(ptr addrspace(1) nocapture %out, float %x) #0 {
279; SI-LABEL: test_isfinite_not_pattern_1:
280; SI:       ; %bb.0:
281; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
282; SI-NEXT:    s_load_dword s6, s[4:5], 0xb
283; SI-NEXT:    s_mov_b32 s3, 0xf000
284; SI-NEXT:    s_mov_b32 s2, -1
285; SI-NEXT:    v_mov_b32_e32 v0, 0x7f800000
286; SI-NEXT:    s_waitcnt lgkmcnt(0)
287; SI-NEXT:    v_cmp_o_f32_e64 s[4:5], s6, s6
288; SI-NEXT:    v_cmp_neq_f32_e32 vcc, s6, v0
289; SI-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
290; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
291; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
292; SI-NEXT:    s_endpgm
293;
294; VI-LABEL: test_isfinite_not_pattern_1:
295; VI:       ; %bb.0:
296; VI-NEXT:    s_load_dword s6, s[4:5], 0x2c
297; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
298; VI-NEXT:    v_mov_b32_e32 v0, 0x7f800000
299; VI-NEXT:    s_waitcnt lgkmcnt(0)
300; VI-NEXT:    v_cmp_o_f32_e64 s[2:3], s6, s6
301; VI-NEXT:    v_cmp_neq_f32_e32 vcc, s6, v0
302; VI-NEXT:    s_and_b64 s[2:3], s[2:3], vcc
303; VI-NEXT:    v_mov_b32_e32 v0, s0
304; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[2:3]
305; VI-NEXT:    v_mov_b32_e32 v1, s1
306; VI-NEXT:    flat_store_dword v[0:1], v2
307; VI-NEXT:    s_endpgm
308;
309; GFX11-LABEL: test_isfinite_not_pattern_1:
310; GFX11:       ; %bb.0:
311; GFX11-NEXT:    s_clause 0x1
312; GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x2c
313; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
314; GFX11-NEXT:    v_mov_b32_e32 v0, 0
315; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
316; GFX11-NEXT:    v_cmp_o_f32_e64 s3, s2, s2
317; GFX11-NEXT:    v_cmp_neq_f32_e64 s2, 0x7f800000, s2
318; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
319; GFX11-NEXT:    s_and_b32 s2, s3, s2
320; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s2
321; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
322; GFX11-NEXT:    s_endpgm
323  %ord = fcmp ord float %x, 0.000000e+00
324  %ninf = fcmp une float %x, 0x7FF0000000000000
325  %and = and i1 %ord, %ninf
326  %ext = zext i1 %and to i32
327  store i32 %ext, ptr addrspace(1) %out, align 4
328  ret void
329}
330
331; fabs of different value
332define amdgpu_kernel void @test_isfinite_not_pattern_2(ptr addrspace(1) nocapture %out, float %x, float %y) #0 {
333; SI-LABEL: test_isfinite_not_pattern_2:
334; SI:       ; %bb.0:
335; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
336; SI-NEXT:    s_mov_b32 s7, 0xf000
337; SI-NEXT:    s_mov_b32 s6, -1
338; SI-NEXT:    v_mov_b32_e32 v0, 0x7f800000
339; SI-NEXT:    s_waitcnt lgkmcnt(0)
340; SI-NEXT:    s_mov_b32 s4, s0
341; SI-NEXT:    s_mov_b32 s5, s1
342; SI-NEXT:    v_cmp_o_f32_e64 s[0:1], s2, s2
343; SI-NEXT:    v_cmp_neq_f32_e64 s[2:3], |s3|, v0
344; SI-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
345; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
346; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
347; SI-NEXT:    s_endpgm
348;
349; VI-LABEL: test_isfinite_not_pattern_2:
350; VI:       ; %bb.0:
351; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
352; VI-NEXT:    v_mov_b32_e32 v0, 0x7f800000
353; VI-NEXT:    s_waitcnt lgkmcnt(0)
354; VI-NEXT:    v_cmp_o_f32_e64 s[4:5], s2, s2
355; VI-NEXT:    v_cmp_neq_f32_e64 s[2:3], |s3|, v0
356; VI-NEXT:    s_and_b64 s[2:3], s[4:5], s[2:3]
357; VI-NEXT:    v_mov_b32_e32 v0, s0
358; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[2:3]
359; VI-NEXT:    v_mov_b32_e32 v1, s1
360; VI-NEXT:    flat_store_dword v[0:1], v2
361; VI-NEXT:    s_endpgm
362;
363; GFX11-LABEL: test_isfinite_not_pattern_2:
364; GFX11:       ; %bb.0:
365; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
366; GFX11-NEXT:    v_mov_b32_e32 v0, 0
367; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
368; GFX11-NEXT:    v_cmp_o_f32_e64 s2, s2, s2
369; GFX11-NEXT:    v_cmp_neq_f32_e64 s3, 0x7f800000, |s3|
370; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
371; GFX11-NEXT:    s_and_b32 s2, s2, s3
372; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s2
373; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
374; GFX11-NEXT:    s_endpgm
375  %ord = fcmp ord float %x, 0.000000e+00
376  %x.fabs = tail call float @llvm.fabs.f32(float %y) #1
377  %ninf = fcmp une float %x.fabs, 0x7FF0000000000000
378  %and = and i1 %ord, %ninf
379  %ext = zext i1 %and to i32
380  store i32 %ext, ptr addrspace(1) %out, align 4
381  ret void
382}
383
384; Wrong ordered compare type
385define amdgpu_kernel void @test_isfinite_not_pattern_3(ptr addrspace(1) nocapture %out, float %x) #0 {
386; SI-LABEL: test_isfinite_not_pattern_3:
387; SI:       ; %bb.0:
388; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
389; SI-NEXT:    s_load_dword s6, s[4:5], 0xb
390; SI-NEXT:    s_mov_b32 s3, 0xf000
391; SI-NEXT:    s_mov_b32 s2, -1
392; SI-NEXT:    v_mov_b32_e32 v0, 0x7f800000
393; SI-NEXT:    s_waitcnt lgkmcnt(0)
394; SI-NEXT:    v_cmp_u_f32_e64 s[4:5], s6, s6
395; SI-NEXT:    v_cmp_neq_f32_e64 s[6:7], |s6|, v0
396; SI-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
397; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
398; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
399; SI-NEXT:    s_endpgm
400;
401; VI-LABEL: test_isfinite_not_pattern_3:
402; VI:       ; %bb.0:
403; VI-NEXT:    s_load_dword s6, s[4:5], 0x2c
404; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
405; VI-NEXT:    v_mov_b32_e32 v0, 0x7f800000
406; VI-NEXT:    s_waitcnt lgkmcnt(0)
407; VI-NEXT:    v_cmp_u_f32_e64 s[2:3], s6, s6
408; VI-NEXT:    v_cmp_neq_f32_e64 s[4:5], |s6|, v0
409; VI-NEXT:    s_and_b64 s[2:3], s[2:3], s[4:5]
410; VI-NEXT:    v_mov_b32_e32 v0, s0
411; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[2:3]
412; VI-NEXT:    v_mov_b32_e32 v1, s1
413; VI-NEXT:    flat_store_dword v[0:1], v2
414; VI-NEXT:    s_endpgm
415;
416; GFX11-LABEL: test_isfinite_not_pattern_3:
417; GFX11:       ; %bb.0:
418; GFX11-NEXT:    s_clause 0x1
419; GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x2c
420; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
421; GFX11-NEXT:    v_mov_b32_e32 v0, 0
422; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
423; GFX11-NEXT:    v_cmp_u_f32_e64 s3, s2, s2
424; GFX11-NEXT:    v_cmp_neq_f32_e64 s2, 0x7f800000, |s2|
425; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
426; GFX11-NEXT:    s_and_b32 s2, s3, s2
427; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s2
428; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
429; GFX11-NEXT:    s_endpgm
430  %ord = fcmp uno float %x, 0.000000e+00
431  %x.fabs = tail call float @llvm.fabs.f32(float %x) #1
432  %ninf = fcmp une float %x.fabs, 0x7FF0000000000000
433  %and = and i1 %ord, %ninf
434  %ext = zext i1 %and to i32
435  store i32 %ext, ptr addrspace(1) %out, align 4
436  ret void
437}
438
439define amdgpu_kernel void @test_isfinite_pattern_4(ptr addrspace(1) nocapture %out, float %x) #0 {
440; SI-LABEL: test_isfinite_pattern_4:
441; SI:       ; %bb.0:
442; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
443; SI-NEXT:    s_load_dword s4, s[4:5], 0xb
444; SI-NEXT:    s_mov_b32 s3, 0xf000
445; SI-NEXT:    s_mov_b32 s2, -1
446; SI-NEXT:    v_mov_b32_e32 v0, 0x1f8
447; SI-NEXT:    s_waitcnt lgkmcnt(0)
448; SI-NEXT:    v_cmp_class_f32_e32 vcc, s4, v0
449; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
450; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
451; SI-NEXT:    s_endpgm
452;
453; VI-LABEL: test_isfinite_pattern_4:
454; VI:       ; %bb.0:
455; VI-NEXT:    s_load_dword s2, s[4:5], 0x2c
456; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
457; VI-NEXT:    v_mov_b32_e32 v0, 0x1f8
458; VI-NEXT:    s_waitcnt lgkmcnt(0)
459; VI-NEXT:    v_cmp_class_f32_e32 vcc, s2, v0
460; VI-NEXT:    v_mov_b32_e32 v0, s0
461; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
462; VI-NEXT:    v_mov_b32_e32 v1, s1
463; VI-NEXT:    flat_store_dword v[0:1], v2
464; VI-NEXT:    s_endpgm
465;
466; GFX11-LABEL: test_isfinite_pattern_4:
467; GFX11:       ; %bb.0:
468; GFX11-NEXT:    s_clause 0x1
469; GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x2c
470; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
471; GFX11-NEXT:    v_mov_b32_e32 v0, 0
472; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
473; GFX11-NEXT:    v_cmp_class_f32_e64 s2, s2, 0x1f8
474; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
475; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s2
476; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
477; GFX11-NEXT:    s_endpgm
478  %ord = fcmp ord float %x, 0.000000e+00
479  %x.fabs = tail call float @llvm.fabs.f32(float %x) #1
480  %ninf = fcmp one float %x.fabs, 0x7FF0000000000000
481  %and = and i1 %ord, %ninf
482  %ext = zext i1 %and to i32
483  store i32 %ext, ptr addrspace(1) %out, align 4
484  ret void
485}
486
487define amdgpu_kernel void @test_isfinite_pattern_4_commute_and(ptr addrspace(1) nocapture %out, float %x) #0 {
488; SI-LABEL: test_isfinite_pattern_4_commute_and:
489; SI:       ; %bb.0:
490; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
491; SI-NEXT:    s_load_dword s4, s[4:5], 0xb
492; SI-NEXT:    s_mov_b32 s3, 0xf000
493; SI-NEXT:    s_mov_b32 s2, -1
494; SI-NEXT:    v_mov_b32_e32 v0, 0x1f8
495; SI-NEXT:    s_waitcnt lgkmcnt(0)
496; SI-NEXT:    v_cmp_class_f32_e32 vcc, s4, v0
497; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
498; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
499; SI-NEXT:    s_endpgm
500;
501; VI-LABEL: test_isfinite_pattern_4_commute_and:
502; VI:       ; %bb.0:
503; VI-NEXT:    s_load_dword s2, s[4:5], 0x2c
504; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
505; VI-NEXT:    v_mov_b32_e32 v0, 0x1f8
506; VI-NEXT:    s_waitcnt lgkmcnt(0)
507; VI-NEXT:    v_cmp_class_f32_e32 vcc, s2, v0
508; VI-NEXT:    v_mov_b32_e32 v0, s0
509; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
510; VI-NEXT:    v_mov_b32_e32 v1, s1
511; VI-NEXT:    flat_store_dword v[0:1], v2
512; VI-NEXT:    s_endpgm
513;
514; GFX11-LABEL: test_isfinite_pattern_4_commute_and:
515; GFX11:       ; %bb.0:
516; GFX11-NEXT:    s_clause 0x1
517; GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x2c
518; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
519; GFX11-NEXT:    v_mov_b32_e32 v0, 0
520; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
521; GFX11-NEXT:    v_cmp_class_f32_e64 s2, s2, 0x1f8
522; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
523; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s2
524; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
525; GFX11-NEXT:    s_endpgm
526  %ord = fcmp ord float %x, 0.000000e+00
527  %x.fabs = tail call float @llvm.fabs.f32(float %x) #1
528  %ninf = fcmp one float %x.fabs, 0x7FF0000000000000
529  %and = and i1 %ninf, %ord
530  %ext = zext i1 %and to i32
531  store i32 %ext, ptr addrspace(1) %out, align 4
532  ret void
533}
534
535define amdgpu_kernel void @test_not_isfinite_pattern_4_wrong_ord_test(ptr addrspace(1) nocapture %out, float %x, [8 x i32], float %y) #0 {
536; SI-LABEL: test_not_isfinite_pattern_4_wrong_ord_test:
537; SI:       ; %bb.0:
538; SI-NEXT:    s_load_dword s0, s[4:5], 0x14
539; SI-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x9
540; SI-NEXT:    s_load_dword s1, s[4:5], 0xb
541; SI-NEXT:    s_mov_b32 s11, 0xf000
542; SI-NEXT:    s_mov_b32 s10, -1
543; SI-NEXT:    v_mov_b32_e32 v0, 0x1f8
544; SI-NEXT:    s_waitcnt lgkmcnt(0)
545; SI-NEXT:    v_mov_b32_e32 v1, s0
546; SI-NEXT:    v_cmp_o_f32_e32 vcc, s1, v1
547; SI-NEXT:    v_cmp_class_f32_e64 s[0:1], s1, v0
548; SI-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
549; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
550; SI-NEXT:    buffer_store_dword v0, off, s[8:11], 0
551; SI-NEXT:    s_endpgm
552;
553; VI-LABEL: test_not_isfinite_pattern_4_wrong_ord_test:
554; VI:       ; %bb.0:
555; VI-NEXT:    s_load_dword s0, s[4:5], 0x50
556; VI-NEXT:    s_load_dword s1, s[4:5], 0x2c
557; VI-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
558; VI-NEXT:    v_mov_b32_e32 v0, 0x1f8
559; VI-NEXT:    s_waitcnt lgkmcnt(0)
560; VI-NEXT:    v_mov_b32_e32 v1, s0
561; VI-NEXT:    v_cmp_class_f32_e32 vcc, s1, v0
562; VI-NEXT:    v_cmp_o_f32_e64 s[0:1], s1, v1
563; VI-NEXT:    s_and_b64 s[0:1], s[0:1], vcc
564; VI-NEXT:    v_mov_b32_e32 v0, s2
565; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[0:1]
566; VI-NEXT:    v_mov_b32_e32 v1, s3
567; VI-NEXT:    flat_store_dword v[0:1], v2
568; VI-NEXT:    s_endpgm
569;
570; GFX11-LABEL: test_not_isfinite_pattern_4_wrong_ord_test:
571; GFX11:       ; %bb.0:
572; GFX11-NEXT:    s_clause 0x2
573; GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x2c
574; GFX11-NEXT:    s_load_b32 s3, s[4:5], 0x50
575; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
576; GFX11-NEXT:    v_mov_b32_e32 v0, 0
577; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
578; GFX11-NEXT:    v_cmp_o_f32_e64 s3, s2, s3
579; GFX11-NEXT:    v_cmp_class_f32_e64 s2, s2, 0x1f8
580; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
581; GFX11-NEXT:    s_and_b32 s2, s3, s2
582; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s2
583; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
584; GFX11-NEXT:    s_endpgm
585  %ord = fcmp ord float %x, %y
586  %x.fabs = tail call float @llvm.fabs.f32(float %x) #1
587  %ninf = fcmp one float %x.fabs, 0x7FF0000000000000
588  %and = and i1 %ord, %ninf
589  %ext = zext i1 %and to i32
590  store i32 %ext, ptr addrspace(1) %out, align 4
591  ret void
592}
593
594define amdgpu_kernel void @test_isinf_pattern_f16(ptr addrspace(1) nocapture %out, half %x) #0 {
595; SI-LABEL: test_isinf_pattern_f16:
596; SI:       ; %bb.0:
597; SI-NEXT:    s_load_dword s6, s[4:5], 0xb
598; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
599; SI-NEXT:    s_mov_b32 s3, 0xf000
600; SI-NEXT:    s_mov_b32 s2, -1
601; SI-NEXT:    s_waitcnt lgkmcnt(0)
602; SI-NEXT:    s_and_b32 s4, s6, 0x7fff
603; SI-NEXT:    s_cmpk_eq_i32 s4, 0x7c00
604; SI-NEXT:    s_cselect_b64 s[4:5], -1, 0
605; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
606; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
607; SI-NEXT:    s_endpgm
608;
609; VI-LABEL: test_isinf_pattern_f16:
610; VI:       ; %bb.0:
611; VI-NEXT:    s_load_dword s2, s[4:5], 0x2c
612; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
613; VI-NEXT:    v_mov_b32_e32 v0, 0x204
614; VI-NEXT:    s_waitcnt lgkmcnt(0)
615; VI-NEXT:    v_cmp_class_f16_e32 vcc, s2, v0
616; VI-NEXT:    v_mov_b32_e32 v0, s0
617; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
618; VI-NEXT:    v_mov_b32_e32 v1, s1
619; VI-NEXT:    flat_store_dword v[0:1], v2
620; VI-NEXT:    s_endpgm
621;
622; GFX11-LABEL: test_isinf_pattern_f16:
623; GFX11:       ; %bb.0:
624; GFX11-NEXT:    s_clause 0x1
625; GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x2c
626; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
627; GFX11-NEXT:    v_mov_b32_e32 v0, 0
628; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
629; GFX11-NEXT:    v_cmp_class_f16_e64 s2, s2, 0x204
630; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
631; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s2
632; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
633; GFX11-NEXT:    s_endpgm
634  %fabs = tail call half @llvm.fabs.f16(half %x) #1
635  %cmp = fcmp oeq half %fabs, 0xH7C00
636  %ext = zext i1 %cmp to i32
637  store i32 %ext, ptr addrspace(1) %out, align 4
638  ret void
639}
640
641define amdgpu_kernel void @test_isfinite_pattern_0_f16(ptr addrspace(1) nocapture %out, half %x) #0 {
642; SI-LABEL: test_isfinite_pattern_0_f16:
643; SI:       ; %bb.0:
644; SI-NEXT:    s_load_dword s6, s[4:5], 0xb
645; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
646; SI-NEXT:    s_mov_b32 s3, 0xf000
647; SI-NEXT:    s_mov_b32 s2, -1
648; SI-NEXT:    s_waitcnt lgkmcnt(0)
649; SI-NEXT:    v_cvt_f32_f16_e32 v0, s6
650; SI-NEXT:    s_and_b32 s4, s6, 0x7fff
651; SI-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
652; SI-NEXT:    s_cmpk_lg_i32 s4, 0x7c00
653; SI-NEXT:    s_cselect_b64 s[4:5], -1, 0
654; SI-NEXT:    s_and_b64 s[4:5], vcc, s[4:5]
655; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
656; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
657; SI-NEXT:    s_endpgm
658;
659; VI-LABEL: test_isfinite_pattern_0_f16:
660; VI:       ; %bb.0:
661; VI-NEXT:    s_load_dword s2, s[4:5], 0x2c
662; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
663; VI-NEXT:    v_mov_b32_e32 v0, 0x1f8
664; VI-NEXT:    s_waitcnt lgkmcnt(0)
665; VI-NEXT:    v_cmp_class_f16_e32 vcc, s2, v0
666; VI-NEXT:    v_mov_b32_e32 v0, s0
667; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
668; VI-NEXT:    v_mov_b32_e32 v1, s1
669; VI-NEXT:    flat_store_dword v[0:1], v2
670; VI-NEXT:    s_endpgm
671;
672; GFX11-LABEL: test_isfinite_pattern_0_f16:
673; GFX11:       ; %bb.0:
674; GFX11-NEXT:    s_clause 0x1
675; GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x2c
676; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
677; GFX11-NEXT:    v_mov_b32_e32 v0, 0
678; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
679; GFX11-NEXT:    v_cmp_class_f16_e64 s2, s2, 0x1f8
680; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
681; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s2
682; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
683; GFX11-NEXT:    s_endpgm
684  %ord = fcmp ord half %x, 0.0
685  %x.fabs = tail call half @llvm.fabs.f16(half %x) #1
686  %ninf = fcmp une half %x.fabs, 0xH7C00
687  %and = and i1 %ord, %ninf
688  %ext = zext i1 %and to i32
689  store i32 %ext, ptr addrspace(1) %out, align 4
690  ret void
691}
692
693define amdgpu_kernel void @test_isfinite_pattern_4_f16(ptr addrspace(1) nocapture %out, half %x) #0 {
694; SI-LABEL: test_isfinite_pattern_4_f16:
695; SI:       ; %bb.0:
696; SI-NEXT:    s_load_dword s6, s[4:5], 0xb
697; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
698; SI-NEXT:    s_mov_b32 s3, 0xf000
699; SI-NEXT:    s_mov_b32 s2, -1
700; SI-NEXT:    s_waitcnt lgkmcnt(0)
701; SI-NEXT:    s_and_b32 s4, s6, 0x7fff
702; SI-NEXT:    s_cmpk_lt_i32 s4, 0x7c00
703; SI-NEXT:    s_cselect_b64 s[4:5], -1, 0
704; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
705; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
706; SI-NEXT:    s_endpgm
707;
708; VI-LABEL: test_isfinite_pattern_4_f16:
709; VI:       ; %bb.0:
710; VI-NEXT:    s_load_dword s2, s[4:5], 0x2c
711; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
712; VI-NEXT:    v_mov_b32_e32 v0, 0x1f8
713; VI-NEXT:    s_waitcnt lgkmcnt(0)
714; VI-NEXT:    v_cmp_class_f16_e32 vcc, s2, v0
715; VI-NEXT:    v_mov_b32_e32 v0, s0
716; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
717; VI-NEXT:    v_mov_b32_e32 v1, s1
718; VI-NEXT:    flat_store_dword v[0:1], v2
719; VI-NEXT:    s_endpgm
720;
721; GFX11-LABEL: test_isfinite_pattern_4_f16:
722; GFX11:       ; %bb.0:
723; GFX11-NEXT:    s_clause 0x1
724; GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x2c
725; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
726; GFX11-NEXT:    v_mov_b32_e32 v0, 0
727; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
728; GFX11-NEXT:    v_cmp_class_f16_e64 s2, s2, 0x1f8
729; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
730; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s2
731; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
732; GFX11-NEXT:    s_endpgm
733  %ord = fcmp ord half %x, 0.0
734  %x.fabs = tail call half @llvm.fabs.f16(half %x) #1
735  %ninf = fcmp one half %x.fabs, 0xH7C00
736  %and = and i1 %ord, %ninf
737  %ext = zext i1 %and to i32
738  store i32 %ext, ptr addrspace(1) %out, align 4
739  ret void
740}
741
742declare half @llvm.fabs.f16(half) #1
743
744attributes #0 = { nounwind }
745attributes #1 = { nounwind readnone }
746