xref: /llvm-project/llvm/test/CodeGen/AMDGPU/minmax.ll (revision d0812dbbff2ac839694ee1988e6af962fcd0bb1e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,SDAG,SDAG-GFX11 %s
3; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GISEL,GISEL-GFX11 %s
4; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,SDAG,SDAG-GFX12 %s
5; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GISEL,GISEL-GFX12 %s
6
7define i32 @test_minmax_i32(i32 %a, i32 %b, i32 %c) {
8; GFX11-LABEL: test_minmax_i32:
9; GFX11:       ; %bb.0:
10; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; GFX11-NEXT:    v_maxmin_i32 v0, v0, v1, v2
12; GFX11-NEXT:    s_setpc_b64 s[30:31]
13;
14; GFX12-LABEL: test_minmax_i32:
15; GFX12:       ; %bb.0:
16; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
17; GFX12-NEXT:    s_wait_expcnt 0x0
18; GFX12-NEXT:    s_wait_samplecnt 0x0
19; GFX12-NEXT:    s_wait_bvhcnt 0x0
20; GFX12-NEXT:    s_wait_kmcnt 0x0
21; GFX12-NEXT:    v_maxmin_i32 v0, v0, v1, v2
22; GFX12-NEXT:    s_setpc_b64 s[30:31]
23  %smax = call i32 @llvm.smax.i32(i32 %a, i32 %b)
24  %sminmax = call i32 @llvm.smin.i32(i32 %smax, i32 %c)
25  ret i32 %sminmax
26}
27
28define amdgpu_ps void @s_test_minmax_i32(i32 inreg %a, i32 inreg %b, i32 inreg %c, ptr addrspace(1) inreg %out) {
29; SDAG-LABEL: s_test_minmax_i32:
30; SDAG:       ; %bb.0:
31; SDAG-NEXT:    s_max_i32 s0, s0, s1
32; SDAG-NEXT:    s_mov_b32 s5, s4
33; SDAG-NEXT:    s_min_i32 s0, s0, s2
34; SDAG-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
35; SDAG-NEXT:    s_mov_b32 s4, s3
36; SDAG-NEXT:    global_store_b32 v0, v1, s[4:5]
37; SDAG-NEXT:    s_endpgm
38;
39; GISEL-LABEL: s_test_minmax_i32:
40; GISEL:       ; %bb.0:
41; GISEL-NEXT:    s_max_i32 s0, s0, s1
42; GISEL-NEXT:    s_mov_b32 s6, s3
43; GISEL-NEXT:    s_min_i32 s0, s0, s2
44; GISEL-NEXT:    s_mov_b32 s7, s4
45; GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
46; GISEL-NEXT:    global_store_b32 v1, v0, s[6:7]
47; GISEL-NEXT:    s_endpgm
48  %smax = call i32 @llvm.smax.i32(i32 %a, i32 %b)
49  %sminmax = call i32 @llvm.smin.i32(i32 %smax, i32 %c)
50  store i32 %sminmax, ptr addrspace(1) %out
51  ret void
52}
53
54define i32 @test_minmax_commuted_i32(i32 %a, i32 %b, i32 %c) {
55; GFX11-LABEL: test_minmax_commuted_i32:
56; GFX11:       ; %bb.0:
57; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
58; GFX11-NEXT:    v_maxmin_i32 v0, v0, v1, v2
59; GFX11-NEXT:    s_setpc_b64 s[30:31]
60;
61; GFX12-LABEL: test_minmax_commuted_i32:
62; GFX12:       ; %bb.0:
63; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
64; GFX12-NEXT:    s_wait_expcnt 0x0
65; GFX12-NEXT:    s_wait_samplecnt 0x0
66; GFX12-NEXT:    s_wait_bvhcnt 0x0
67; GFX12-NEXT:    s_wait_kmcnt 0x0
68; GFX12-NEXT:    v_maxmin_i32 v0, v0, v1, v2
69; GFX12-NEXT:    s_setpc_b64 s[30:31]
70  %smax = call i32 @llvm.smax.i32(i32 %a, i32 %b)
71  %sminmax = call i32 @llvm.smin.i32(i32 %c, i32 %smax)
72  ret i32 %sminmax
73}
74
75define i32 @test_maxmin_i32(i32 %a, i32 %b, i32 %c) {
76; GFX11-LABEL: test_maxmin_i32:
77; GFX11:       ; %bb.0:
78; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
79; GFX11-NEXT:    v_minmax_i32 v0, v0, v1, v2
80; GFX11-NEXT:    s_setpc_b64 s[30:31]
81;
82; GFX12-LABEL: test_maxmin_i32:
83; GFX12:       ; %bb.0:
84; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
85; GFX12-NEXT:    s_wait_expcnt 0x0
86; GFX12-NEXT:    s_wait_samplecnt 0x0
87; GFX12-NEXT:    s_wait_bvhcnt 0x0
88; GFX12-NEXT:    s_wait_kmcnt 0x0
89; GFX12-NEXT:    v_minmax_i32 v0, v0, v1, v2
90; GFX12-NEXT:    s_setpc_b64 s[30:31]
91  %smin = call i32 @llvm.smin.i32(i32 %a, i32 %b)
92  %smaxmin = call i32 @llvm.smax.i32(i32 %smin, i32 %c)
93  ret i32 %smaxmin
94}
95
96define i32 @test_maxmin_commuted_i32(i32 %a, i32 %b, i32 %c) {
97; GFX11-LABEL: test_maxmin_commuted_i32:
98; GFX11:       ; %bb.0:
99; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
100; GFX11-NEXT:    v_minmax_i32 v0, v0, v1, v2
101; GFX11-NEXT:    s_setpc_b64 s[30:31]
102;
103; GFX12-LABEL: test_maxmin_commuted_i32:
104; GFX12:       ; %bb.0:
105; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
106; GFX12-NEXT:    s_wait_expcnt 0x0
107; GFX12-NEXT:    s_wait_samplecnt 0x0
108; GFX12-NEXT:    s_wait_bvhcnt 0x0
109; GFX12-NEXT:    s_wait_kmcnt 0x0
110; GFX12-NEXT:    v_minmax_i32 v0, v0, v1, v2
111; GFX12-NEXT:    s_setpc_b64 s[30:31]
112  %smin = call i32 @llvm.smin.i32(i32 %a, i32 %b)
113  %smaxmin = call i32 @llvm.smax.i32(i32 %c, i32 %smin)
114  ret i32 %smaxmin
115}
116
117define void @test_smed3_i32(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) {
118; GFX11-LABEL: test_smed3_i32:
119; GFX11:       ; %bb.0:
120; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
121; GFX11-NEXT:    v_med3_i32 v2, v2, v3, v4
122; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
123; GFX11-NEXT:    s_setpc_b64 s[30:31]
124;
125; GFX12-LABEL: test_smed3_i32:
126; GFX12:       ; %bb.0:
127; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
128; GFX12-NEXT:    s_wait_expcnt 0x0
129; GFX12-NEXT:    s_wait_samplecnt 0x0
130; GFX12-NEXT:    s_wait_bvhcnt 0x0
131; GFX12-NEXT:    s_wait_kmcnt 0x0
132; GFX12-NEXT:    v_med3_i32 v2, v2, v3, v4
133; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
134; GFX12-NEXT:    s_setpc_b64 s[30:31]
135  %tmp0 = call i32 @llvm.smin.i32(i32 %x, i32 %y)
136  %tmp1 = call i32 @llvm.smax.i32(i32 %x, i32 %y)
137  %tmp2 = call i32 @llvm.smin.i32(i32 %tmp1, i32 %z)
138  %tmp3 = call i32 @llvm.smax.i32(i32 %tmp0, i32 %tmp2)
139  store i32 %tmp3, ptr addrspace(1) %arg
140  ret void
141}
142
143define i32 @test_minmax_u32(i32 %a, i32 %b, i32 %c) {
144; GFX11-LABEL: test_minmax_u32:
145; GFX11:       ; %bb.0:
146; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
147; GFX11-NEXT:    v_maxmin_u32 v0, v0, v1, v2
148; GFX11-NEXT:    s_setpc_b64 s[30:31]
149;
150; GFX12-LABEL: test_minmax_u32:
151; GFX12:       ; %bb.0:
152; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
153; GFX12-NEXT:    s_wait_expcnt 0x0
154; GFX12-NEXT:    s_wait_samplecnt 0x0
155; GFX12-NEXT:    s_wait_bvhcnt 0x0
156; GFX12-NEXT:    s_wait_kmcnt 0x0
157; GFX12-NEXT:    v_maxmin_u32 v0, v0, v1, v2
158; GFX12-NEXT:    s_setpc_b64 s[30:31]
159  %umax = call i32 @llvm.umax.i32(i32 %a, i32 %b)
160  %uminmax = call i32 @llvm.umin.i32(i32 %umax, i32 %c)
161  ret i32 %uminmax
162}
163
164define amdgpu_ps void @s_test_minmax_u32(i32 inreg %a, i32 inreg %b, i32 inreg %c, ptr addrspace(1) inreg %out) {
165; SDAG-LABEL: s_test_minmax_u32:
166; SDAG:       ; %bb.0:
167; SDAG-NEXT:    s_max_u32 s0, s0, s1
168; SDAG-NEXT:    s_mov_b32 s5, s4
169; SDAG-NEXT:    s_min_u32 s0, s0, s2
170; SDAG-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
171; SDAG-NEXT:    s_mov_b32 s4, s3
172; SDAG-NEXT:    global_store_b32 v0, v1, s[4:5]
173; SDAG-NEXT:    s_endpgm
174;
175; GISEL-LABEL: s_test_minmax_u32:
176; GISEL:       ; %bb.0:
177; GISEL-NEXT:    s_max_u32 s0, s0, s1
178; GISEL-NEXT:    s_mov_b32 s6, s3
179; GISEL-NEXT:    s_min_u32 s0, s0, s2
180; GISEL-NEXT:    s_mov_b32 s7, s4
181; GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
182; GISEL-NEXT:    global_store_b32 v1, v0, s[6:7]
183; GISEL-NEXT:    s_endpgm
184  %smax = call i32 @llvm.umax.i32(i32 %a, i32 %b)
185  %sminmax = call i32 @llvm.umin.i32(i32 %smax, i32 %c)
186  store i32 %sminmax, ptr addrspace(1) %out
187  ret void
188}
189
190define i32 @test_minmax_commuted_u32(i32 %a, i32 %b, i32 %c) {
191; GFX11-LABEL: test_minmax_commuted_u32:
192; GFX11:       ; %bb.0:
193; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
194; GFX11-NEXT:    v_maxmin_u32 v0, v0, v1, v2
195; GFX11-NEXT:    s_setpc_b64 s[30:31]
196;
197; GFX12-LABEL: test_minmax_commuted_u32:
198; GFX12:       ; %bb.0:
199; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
200; GFX12-NEXT:    s_wait_expcnt 0x0
201; GFX12-NEXT:    s_wait_samplecnt 0x0
202; GFX12-NEXT:    s_wait_bvhcnt 0x0
203; GFX12-NEXT:    s_wait_kmcnt 0x0
204; GFX12-NEXT:    v_maxmin_u32 v0, v0, v1, v2
205; GFX12-NEXT:    s_setpc_b64 s[30:31]
206  %umax = call i32 @llvm.umax.i32(i32 %a, i32 %b)
207  %uminmax = call i32 @llvm.umin.i32(i32 %c, i32 %umax)
208  ret i32 %uminmax
209}
210
211define i32 @test_maxmin_u32(i32 %a, i32 %b, i32 %c) {
212; GFX11-LABEL: test_maxmin_u32:
213; GFX11:       ; %bb.0:
214; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
215; GFX11-NEXT:    v_minmax_u32 v0, v0, v1, v2
216; GFX11-NEXT:    s_setpc_b64 s[30:31]
217;
218; GFX12-LABEL: test_maxmin_u32:
219; GFX12:       ; %bb.0:
220; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
221; GFX12-NEXT:    s_wait_expcnt 0x0
222; GFX12-NEXT:    s_wait_samplecnt 0x0
223; GFX12-NEXT:    s_wait_bvhcnt 0x0
224; GFX12-NEXT:    s_wait_kmcnt 0x0
225; GFX12-NEXT:    v_minmax_u32 v0, v0, v1, v2
226; GFX12-NEXT:    s_setpc_b64 s[30:31]
227  %umin = call i32 @llvm.umin.i32(i32 %a, i32 %b)
228  %umaxmin = call i32 @llvm.umax.i32(i32 %umin, i32 %c)
229  ret i32 %umaxmin
230}
231
232define i32 @test_maxmin_commuted_u32(i32 %a, i32 %b, i32 %c) {
233; GFX11-LABEL: test_maxmin_commuted_u32:
234; GFX11:       ; %bb.0:
235; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
236; GFX11-NEXT:    v_minmax_u32 v0, v0, v1, v2
237; GFX11-NEXT:    s_setpc_b64 s[30:31]
238;
239; GFX12-LABEL: test_maxmin_commuted_u32:
240; GFX12:       ; %bb.0:
241; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
242; GFX12-NEXT:    s_wait_expcnt 0x0
243; GFX12-NEXT:    s_wait_samplecnt 0x0
244; GFX12-NEXT:    s_wait_bvhcnt 0x0
245; GFX12-NEXT:    s_wait_kmcnt 0x0
246; GFX12-NEXT:    v_minmax_u32 v0, v0, v1, v2
247; GFX12-NEXT:    s_setpc_b64 s[30:31]
248  %umin = call i32 @llvm.umin.i32(i32 %a, i32 %b)
249  %umaxmin = call i32 @llvm.umax.i32(i32 %c, i32 %umin)
250  ret i32 %umaxmin
251}
252
253define void @test_umed3_i32(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) {
254; GFX11-LABEL: test_umed3_i32:
255; GFX11:       ; %bb.0:
256; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
257; GFX11-NEXT:    v_med3_u32 v2, v2, v3, v4
258; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
259; GFX11-NEXT:    s_setpc_b64 s[30:31]
260;
261; GFX12-LABEL: test_umed3_i32:
262; GFX12:       ; %bb.0:
263; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
264; GFX12-NEXT:    s_wait_expcnt 0x0
265; GFX12-NEXT:    s_wait_samplecnt 0x0
266; GFX12-NEXT:    s_wait_bvhcnt 0x0
267; GFX12-NEXT:    s_wait_kmcnt 0x0
268; GFX12-NEXT:    v_med3_u32 v2, v2, v3, v4
269; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
270; GFX12-NEXT:    s_setpc_b64 s[30:31]
271  %tmp0 = call i32 @llvm.umin.i32(i32 %x, i32 %y)
272  %tmp1 = call i32 @llvm.umax.i32(i32 %x, i32 %y)
273  %tmp2 = call i32 @llvm.umin.i32(i32 %tmp1, i32 %z)
274  %tmp3 = call i32 @llvm.umax.i32(i32 %tmp0, i32 %tmp2)
275  store i32 %tmp3, ptr addrspace(1) %arg
276  ret void
277}
278
279define float @test_minmax_f32_ieee_true(float %a, float %b, float %c) {
280; SDAG-GFX11-LABEL: test_minmax_f32_ieee_true:
281; SDAG-GFX11:       ; %bb.0:
282; SDAG-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
283; SDAG-GFX11-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0
284; SDAG-GFX11-NEXT:    v_max_f32_e32 v2, v2, v2
285; SDAG-GFX11-NEXT:    v_maxmin_f32 v0, v0, v1, v2
286; SDAG-GFX11-NEXT:    s_setpc_b64 s[30:31]
287;
288; GISEL-GFX11-LABEL: test_minmax_f32_ieee_true:
289; GISEL-GFX11:       ; %bb.0:
290; GISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
291; GISEL-GFX11-NEXT:    v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
292; GISEL-GFX11-NEXT:    v_max_f32_e32 v2, v2, v2
293; GISEL-GFX11-NEXT:    v_maxmin_f32 v0, v0, v1, v2
294; GISEL-GFX11-NEXT:    s_setpc_b64 s[30:31]
295;
296; SDAG-GFX12-LABEL: test_minmax_f32_ieee_true:
297; SDAG-GFX12:       ; %bb.0:
298; SDAG-GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
299; SDAG-GFX12-NEXT:    s_wait_expcnt 0x0
300; SDAG-GFX12-NEXT:    s_wait_samplecnt 0x0
301; SDAG-GFX12-NEXT:    s_wait_bvhcnt 0x0
302; SDAG-GFX12-NEXT:    s_wait_kmcnt 0x0
303; SDAG-GFX12-NEXT:    v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
304; SDAG-GFX12-NEXT:    v_max_num_f32_e32 v2, v2, v2
305; SDAG-GFX12-NEXT:    v_maxmin_num_f32 v0, v0, v1, v2
306; SDAG-GFX12-NEXT:    s_setpc_b64 s[30:31]
307;
308; GISEL-GFX12-LABEL: test_minmax_f32_ieee_true:
309; GISEL-GFX12:       ; %bb.0:
310; GISEL-GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
311; GISEL-GFX12-NEXT:    s_wait_expcnt 0x0
312; GISEL-GFX12-NEXT:    s_wait_samplecnt 0x0
313; GISEL-GFX12-NEXT:    s_wait_bvhcnt 0x0
314; GISEL-GFX12-NEXT:    s_wait_kmcnt 0x0
315; GISEL-GFX12-NEXT:    v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
316; GISEL-GFX12-NEXT:    v_max_num_f32_e32 v2, v2, v2
317; GISEL-GFX12-NEXT:    v_maxmin_num_f32 v0, v0, v1, v2
318; GISEL-GFX12-NEXT:    s_setpc_b64 s[30:31]
319  %max = call float @llvm.maxnum.f32(float %a, float %b)
320  %minmax = call float @llvm.minnum.f32(float %max, float %c)
321  ret float %minmax
322}
323
324define amdgpu_ps void @s_test_minmax_f32_ieee_false(float inreg %a, float inreg %b, float inreg %c, ptr addrspace(1) inreg %out) {
325; SDAG-GFX11-LABEL: s_test_minmax_f32_ieee_false:
326; SDAG-GFX11:       ; %bb.0:
327; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
328; SDAG-GFX11-NEXT:    s_mov_b32 s5, s4
329; SDAG-GFX11-NEXT:    s_mov_b32 s4, s3
330; SDAG-GFX11-NEXT:    v_maxmin_f32 v0, s0, s1, v0
331; SDAG-GFX11-NEXT:    global_store_b32 v1, v0, s[4:5]
332; SDAG-GFX11-NEXT:    s_endpgm
333;
334; GISEL-GFX11-LABEL: s_test_minmax_f32_ieee_false:
335; GISEL-GFX11:       ; %bb.0:
336; GISEL-GFX11-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
337; GISEL-GFX11-NEXT:    s_mov_b32 s6, s3
338; GISEL-GFX11-NEXT:    s_mov_b32 s7, s4
339; GISEL-GFX11-NEXT:    v_maxmin_f32 v0, s0, s1, v0
340; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[6:7]
341; GISEL-GFX11-NEXT:    s_endpgm
342;
343; SDAG-GFX12-LABEL: s_test_minmax_f32_ieee_false:
344; SDAG-GFX12:       ; %bb.0:
345; SDAG-GFX12-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
346; SDAG-GFX12-NEXT:    s_mov_b32 s5, s4
347; SDAG-GFX12-NEXT:    s_mov_b32 s4, s3
348; SDAG-GFX12-NEXT:    v_maxmin_num_f32 v0, s0, s1, v0
349; SDAG-GFX12-NEXT:    global_store_b32 v1, v0, s[4:5]
350; SDAG-GFX12-NEXT:    s_endpgm
351;
352; GISEL-GFX12-LABEL: s_test_minmax_f32_ieee_false:
353; GISEL-GFX12:       ; %bb.0:
354; GISEL-GFX12-NEXT:    s_max_num_f32 s0, s0, s1
355; GISEL-GFX12-NEXT:    s_mov_b32 s6, s3
356; GISEL-GFX12-NEXT:    s_mov_b32 s7, s4
357; GISEL-GFX12-NEXT:    v_mov_b32_e32 v1, 0
358; GISEL-GFX12-NEXT:    s_min_num_f32 s0, s0, s2
359; GISEL-GFX12-NEXT:    v_mov_b32_e32 v0, s0
360; GISEL-GFX12-NEXT:    global_store_b32 v1, v0, s[6:7]
361; GISEL-GFX12-NEXT:    s_endpgm
362  %smax = call float @llvm.maxnum.f32(float %a, float %b)
363  %sminmax = call float @llvm.minnum.f32(float %smax, float %c)
364  store float %sminmax, ptr addrspace(1) %out
365  ret void
366}
367
368define amdgpu_ps float @test_minmax_commuted_f32_ieee_false(float %a, float %b, float %c) {
369; GFX11-LABEL: test_minmax_commuted_f32_ieee_false:
370; GFX11:       ; %bb.0:
371; GFX11-NEXT:    v_maxmin_f32 v0, v0, v1, v2
372; GFX11-NEXT:    ; return to shader part epilog
373;
374; GFX12-LABEL: test_minmax_commuted_f32_ieee_false:
375; GFX12:       ; %bb.0:
376; GFX12-NEXT:    v_maxmin_num_f32 v0, v0, v1, v2
377; GFX12-NEXT:    ; return to shader part epilog
378  %max = call float @llvm.maxnum.f32(float %a, float %b)
379  %minmax = call float @llvm.minnum.f32(float %c, float %max)
380  ret float %minmax
381}
382
383define float @test_maxmin_f32_ieee_true(float %a, float %b, float %c) {
384; SDAG-GFX11-LABEL: test_maxmin_f32_ieee_true:
385; SDAG-GFX11:       ; %bb.0:
386; SDAG-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
387; SDAG-GFX11-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0
388; SDAG-GFX11-NEXT:    v_max_f32_e32 v2, v2, v2
389; SDAG-GFX11-NEXT:    v_minmax_f32 v0, v0, v1, v2
390; SDAG-GFX11-NEXT:    s_setpc_b64 s[30:31]
391;
392; GISEL-GFX11-LABEL: test_maxmin_f32_ieee_true:
393; GISEL-GFX11:       ; %bb.0:
394; GISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
395; GISEL-GFX11-NEXT:    v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
396; GISEL-GFX11-NEXT:    v_max_f32_e32 v2, v2, v2
397; GISEL-GFX11-NEXT:    v_minmax_f32 v0, v0, v1, v2
398; GISEL-GFX11-NEXT:    s_setpc_b64 s[30:31]
399;
400; SDAG-GFX12-LABEL: test_maxmin_f32_ieee_true:
401; SDAG-GFX12:       ; %bb.0:
402; SDAG-GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
403; SDAG-GFX12-NEXT:    s_wait_expcnt 0x0
404; SDAG-GFX12-NEXT:    s_wait_samplecnt 0x0
405; SDAG-GFX12-NEXT:    s_wait_bvhcnt 0x0
406; SDAG-GFX12-NEXT:    s_wait_kmcnt 0x0
407; SDAG-GFX12-NEXT:    v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
408; SDAG-GFX12-NEXT:    v_max_num_f32_e32 v2, v2, v2
409; SDAG-GFX12-NEXT:    v_minmax_num_f32 v0, v0, v1, v2
410; SDAG-GFX12-NEXT:    s_setpc_b64 s[30:31]
411;
412; GISEL-GFX12-LABEL: test_maxmin_f32_ieee_true:
413; GISEL-GFX12:       ; %bb.0:
414; GISEL-GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
415; GISEL-GFX12-NEXT:    s_wait_expcnt 0x0
416; GISEL-GFX12-NEXT:    s_wait_samplecnt 0x0
417; GISEL-GFX12-NEXT:    s_wait_bvhcnt 0x0
418; GISEL-GFX12-NEXT:    s_wait_kmcnt 0x0
419; GISEL-GFX12-NEXT:    v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
420; GISEL-GFX12-NEXT:    v_max_num_f32_e32 v2, v2, v2
421; GISEL-GFX12-NEXT:    v_minmax_num_f32 v0, v0, v1, v2
422; GISEL-GFX12-NEXT:    s_setpc_b64 s[30:31]
423  %min = call float @llvm.minnum.f32(float %a, float %b)
424  %maxmin = call float @llvm.maxnum.f32(float %min, float %c)
425  ret float %maxmin
426}
427
428define amdgpu_ps float @test_maxmin_commuted_f32_ieee_false(float %a, float %b, float %c) {
429; GFX11-LABEL: test_maxmin_commuted_f32_ieee_false:
430; GFX11:       ; %bb.0:
431; GFX11-NEXT:    v_minmax_f32 v0, v0, v1, v2
432; GFX11-NEXT:    ; return to shader part epilog
433;
434; GFX12-LABEL: test_maxmin_commuted_f32_ieee_false:
435; GFX12:       ; %bb.0:
436; GFX12-NEXT:    v_minmax_num_f32 v0, v0, v1, v2
437; GFX12-NEXT:    ; return to shader part epilog
438  %min = call float @llvm.minnum.f32(float %a, float %b)
439  %maxmin = call float @llvm.maxnum.f32(float %c, float %min)
440  ret float %maxmin
441}
442
443define void @test_med3_f32(ptr addrspace(1) %arg, float %x, float %y, float %z) #0 {
444; GFX11-LABEL: test_med3_f32:
445; GFX11:       ; %bb.0:
446; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
447; GFX11-NEXT:    v_med3_f32 v2, v2, v3, v4
448; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
449; GFX11-NEXT:    s_setpc_b64 s[30:31]
450;
451; GFX12-LABEL: test_med3_f32:
452; GFX12:       ; %bb.0:
453; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
454; GFX12-NEXT:    s_wait_expcnt 0x0
455; GFX12-NEXT:    s_wait_samplecnt 0x0
456; GFX12-NEXT:    s_wait_bvhcnt 0x0
457; GFX12-NEXT:    s_wait_kmcnt 0x0
458; GFX12-NEXT:    v_med3_num_f32 v2, v2, v3, v4
459; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
460; GFX12-NEXT:    s_setpc_b64 s[30:31]
461  %tmp0 = call float @llvm.minnum.f32(float %x, float %y)
462  %tmp1 = call float @llvm.maxnum.f32(float %x, float %y)
463  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %z)
464  %tmp3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
465  store float %tmp3, ptr addrspace(1) %arg
466  ret void
467}
468
469define amdgpu_ps half @test_minmax_f16_ieee_false(half %a, half %b, half %c) {
470; GFX11-LABEL: test_minmax_f16_ieee_false:
471; GFX11:       ; %bb.0:
472; GFX11-NEXT:    v_maxmin_f16 v0, v0, v1, v2
473; GFX11-NEXT:    ; return to shader part epilog
474;
475; GFX12-LABEL: test_minmax_f16_ieee_false:
476; GFX12:       ; %bb.0:
477; GFX12-NEXT:    v_maxmin_num_f16 v0, v0, v1, v2
478; GFX12-NEXT:    ; return to shader part epilog
479  %max = call half @llvm.maxnum.f16(half %a, half %b)
480  %minmax = call half @llvm.minnum.f16(half %max, half %c)
481  ret half %minmax
482}
483
484define amdgpu_ps void @s_test_minmax_f16_ieee_false(half inreg %a, half inreg %b, half inreg %c, ptr addrspace(1) inreg %out) {
485; SDAG-GFX11-LABEL: s_test_minmax_f16_ieee_false:
486; SDAG-GFX11:       ; %bb.0:
487; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
488; SDAG-GFX11-NEXT:    s_mov_b32 s5, s4
489; SDAG-GFX11-NEXT:    s_mov_b32 s4, s3
490; SDAG-GFX11-NEXT:    v_maxmin_f16 v0, s0, s1, v0
491; SDAG-GFX11-NEXT:    global_store_b16 v1, v0, s[4:5]
492; SDAG-GFX11-NEXT:    s_endpgm
493;
494; GISEL-GFX11-LABEL: s_test_minmax_f16_ieee_false:
495; GISEL-GFX11:       ; %bb.0:
496; GISEL-GFX11-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
497; GISEL-GFX11-NEXT:    s_mov_b32 s6, s3
498; GISEL-GFX11-NEXT:    s_mov_b32 s7, s4
499; GISEL-GFX11-NEXT:    v_maxmin_f16 v0, s0, s1, v0
500; GISEL-GFX11-NEXT:    global_store_b16 v1, v0, s[6:7]
501; GISEL-GFX11-NEXT:    s_endpgm
502;
503; SDAG-GFX12-LABEL: s_test_minmax_f16_ieee_false:
504; SDAG-GFX12:       ; %bb.0:
505; SDAG-GFX12-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
506; SDAG-GFX12-NEXT:    s_mov_b32 s5, s4
507; SDAG-GFX12-NEXT:    s_mov_b32 s4, s3
508; SDAG-GFX12-NEXT:    v_maxmin_num_f16 v0, s0, s1, v0
509; SDAG-GFX12-NEXT:    global_store_b16 v1, v0, s[4:5]
510; SDAG-GFX12-NEXT:    s_endpgm
511;
512; GISEL-GFX12-LABEL: s_test_minmax_f16_ieee_false:
513; GISEL-GFX12:       ; %bb.0:
514; GISEL-GFX12-NEXT:    s_max_num_f16 s0, s0, s1
515; GISEL-GFX12-NEXT:    s_mov_b32 s6, s3
516; GISEL-GFX12-NEXT:    s_mov_b32 s7, s4
517; GISEL-GFX12-NEXT:    v_mov_b32_e32 v1, 0
518; GISEL-GFX12-NEXT:    s_min_num_f16 s0, s0, s2
519; GISEL-GFX12-NEXT:    v_mov_b32_e32 v0, s0
520; GISEL-GFX12-NEXT:    global_store_b16 v1, v0, s[6:7]
521; GISEL-GFX12-NEXT:    s_endpgm
522  %smax = call half @llvm.maxnum.f16(half %a, half %b)
523  %sminmax = call half @llvm.minnum.f16(half %smax, half %c)
524  store half %sminmax, ptr addrspace(1) %out
525  ret void
526}
527
528define half @test_minmax_commuted_f16_ieee_true(half %a, half %b, half %c) {
529; SDAG-GFX11-LABEL: test_minmax_commuted_f16_ieee_true:
530; SDAG-GFX11:       ; %bb.0:
531; SDAG-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
532; SDAG-GFX11-NEXT:    v_max_f16_e32 v1, v1, v1
533; SDAG-GFX11-NEXT:    v_max_f16_e32 v0, v0, v0
534; SDAG-GFX11-NEXT:    v_max_f16_e32 v2, v2, v2
535; SDAG-GFX11-NEXT:    v_maxmin_f16 v0, v0, v1, v2
536; SDAG-GFX11-NEXT:    s_setpc_b64 s[30:31]
537;
538; GISEL-GFX11-LABEL: test_minmax_commuted_f16_ieee_true:
539; GISEL-GFX11:       ; %bb.0:
540; GISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
541; GISEL-GFX11-NEXT:    v_max_f16_e32 v0, v0, v0
542; GISEL-GFX11-NEXT:    v_max_f16_e32 v1, v1, v1
543; GISEL-GFX11-NEXT:    v_max_f16_e32 v2, v2, v2
544; GISEL-GFX11-NEXT:    v_maxmin_f16 v0, v0, v1, v2
545; GISEL-GFX11-NEXT:    s_setpc_b64 s[30:31]
546;
547; SDAG-GFX12-LABEL: test_minmax_commuted_f16_ieee_true:
548; SDAG-GFX12:       ; %bb.0:
549; SDAG-GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
550; SDAG-GFX12-NEXT:    s_wait_expcnt 0x0
551; SDAG-GFX12-NEXT:    s_wait_samplecnt 0x0
552; SDAG-GFX12-NEXT:    s_wait_bvhcnt 0x0
553; SDAG-GFX12-NEXT:    s_wait_kmcnt 0x0
554; SDAG-GFX12-NEXT:    v_max_num_f16_e32 v1, v1, v1
555; SDAG-GFX12-NEXT:    v_max_num_f16_e32 v0, v0, v0
556; SDAG-GFX12-NEXT:    v_max_num_f16_e32 v2, v2, v2
557; SDAG-GFX12-NEXT:    v_maxmin_num_f16 v0, v0, v1, v2
558; SDAG-GFX12-NEXT:    s_setpc_b64 s[30:31]
559;
560; GISEL-GFX12-LABEL: test_minmax_commuted_f16_ieee_true:
561; GISEL-GFX12:       ; %bb.0:
562; GISEL-GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
563; GISEL-GFX12-NEXT:    s_wait_expcnt 0x0
564; GISEL-GFX12-NEXT:    s_wait_samplecnt 0x0
565; GISEL-GFX12-NEXT:    s_wait_bvhcnt 0x0
566; GISEL-GFX12-NEXT:    s_wait_kmcnt 0x0
567; GISEL-GFX12-NEXT:    v_max_num_f16_e32 v0, v0, v0
568; GISEL-GFX12-NEXT:    v_max_num_f16_e32 v1, v1, v1
569; GISEL-GFX12-NEXT:    v_max_num_f16_e32 v2, v2, v2
570; GISEL-GFX12-NEXT:    v_maxmin_num_f16 v0, v0, v1, v2
571; GISEL-GFX12-NEXT:    s_setpc_b64 s[30:31]
572  %max = call half @llvm.maxnum.f16(half %a, half %b)
573  %minmax = call half @llvm.minnum.f16(half %c, half %max)
574  ret half %minmax
575}
576
577define amdgpu_ps half @test_maxmin_f16_ieee_false(half %a, half %b, half %c) {
578; GFX11-LABEL: test_maxmin_f16_ieee_false:
579; GFX11:       ; %bb.0:
580; GFX11-NEXT:    v_minmax_f16 v0, v0, v1, v2
581; GFX11-NEXT:    ; return to shader part epilog
582;
583; GFX12-LABEL: test_maxmin_f16_ieee_false:
584; GFX12:       ; %bb.0:
585; GFX12-NEXT:    v_minmax_num_f16 v0, v0, v1, v2
586; GFX12-NEXT:    ; return to shader part epilog
587  %min = call half @llvm.minnum.f16(half %a, half %b)
588  %maxmin = call half @llvm.maxnum.f16(half %min, half %c)
589  ret half %maxmin
590}
591
592define half @test_maxmin_commuted_f16_ieee_true(half %a, half %b, half %c) {
593; SDAG-GFX11-LABEL: test_maxmin_commuted_f16_ieee_true:
594; SDAG-GFX11:       ; %bb.0:
595; SDAG-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
596; SDAG-GFX11-NEXT:    v_max_f16_e32 v1, v1, v1
597; SDAG-GFX11-NEXT:    v_max_f16_e32 v0, v0, v0
598; SDAG-GFX11-NEXT:    v_max_f16_e32 v2, v2, v2
599; SDAG-GFX11-NEXT:    v_minmax_f16 v0, v0, v1, v2
600; SDAG-GFX11-NEXT:    s_setpc_b64 s[30:31]
601;
602; GISEL-GFX11-LABEL: test_maxmin_commuted_f16_ieee_true:
603; GISEL-GFX11:       ; %bb.0:
604; GISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
605; GISEL-GFX11-NEXT:    v_max_f16_e32 v0, v0, v0
606; GISEL-GFX11-NEXT:    v_max_f16_e32 v1, v1, v1
607; GISEL-GFX11-NEXT:    v_max_f16_e32 v2, v2, v2
608; GISEL-GFX11-NEXT:    v_minmax_f16 v0, v0, v1, v2
609; GISEL-GFX11-NEXT:    s_setpc_b64 s[30:31]
610;
611; SDAG-GFX12-LABEL: test_maxmin_commuted_f16_ieee_true:
612; SDAG-GFX12:       ; %bb.0:
613; SDAG-GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
614; SDAG-GFX12-NEXT:    s_wait_expcnt 0x0
615; SDAG-GFX12-NEXT:    s_wait_samplecnt 0x0
616; SDAG-GFX12-NEXT:    s_wait_bvhcnt 0x0
617; SDAG-GFX12-NEXT:    s_wait_kmcnt 0x0
618; SDAG-GFX12-NEXT:    v_max_num_f16_e32 v1, v1, v1
619; SDAG-GFX12-NEXT:    v_max_num_f16_e32 v0, v0, v0
620; SDAG-GFX12-NEXT:    v_max_num_f16_e32 v2, v2, v2
621; SDAG-GFX12-NEXT:    v_minmax_num_f16 v0, v0, v1, v2
622; SDAG-GFX12-NEXT:    s_setpc_b64 s[30:31]
623;
624; GISEL-GFX12-LABEL: test_maxmin_commuted_f16_ieee_true:
625; GISEL-GFX12:       ; %bb.0:
626; GISEL-GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
627; GISEL-GFX12-NEXT:    s_wait_expcnt 0x0
628; GISEL-GFX12-NEXT:    s_wait_samplecnt 0x0
629; GISEL-GFX12-NEXT:    s_wait_bvhcnt 0x0
630; GISEL-GFX12-NEXT:    s_wait_kmcnt 0x0
631; GISEL-GFX12-NEXT:    v_max_num_f16_e32 v0, v0, v0
632; GISEL-GFX12-NEXT:    v_max_num_f16_e32 v1, v1, v1
633; GISEL-GFX12-NEXT:    v_max_num_f16_e32 v2, v2, v2
634; GISEL-GFX12-NEXT:    v_minmax_num_f16 v0, v0, v1, v2
635; GISEL-GFX12-NEXT:    s_setpc_b64 s[30:31]
636  %min = call half @llvm.minnum.f16(half %a, half %b)
637  %maxmin = call half @llvm.maxnum.f16(half %c, half %min)
638  ret half %maxmin
639}
640
641define void @test_med3_f16(ptr addrspace(1) %arg, half %x, half %y, half %z) #0 {
642; GFX11-LABEL: test_med3_f16:
643; GFX11:       ; %bb.0:
644; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
645; GFX11-NEXT:    v_med3_f16 v2, v2, v3, v4
646; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
647; GFX11-NEXT:    s_setpc_b64 s[30:31]
648;
649; GFX12-LABEL: test_med3_f16:
650; GFX12:       ; %bb.0:
651; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
652; GFX12-NEXT:    s_wait_expcnt 0x0
653; GFX12-NEXT:    s_wait_samplecnt 0x0
654; GFX12-NEXT:    s_wait_bvhcnt 0x0
655; GFX12-NEXT:    s_wait_kmcnt 0x0
656; GFX12-NEXT:    v_med3_num_f16 v2, v2, v3, v4
657; GFX12-NEXT:    global_store_b16 v[0:1], v2, off
658; GFX12-NEXT:    s_setpc_b64 s[30:31]
659  %tmp0 = call half @llvm.minnum.f16(half %x, half %y)
660  %tmp1 = call half @llvm.maxnum.f16(half %x, half %y)
661  %tmp2 = call half @llvm.minnum.f16(half %tmp1, half %z)
662  %tmp3 = call half @llvm.maxnum.f16(half %tmp0, half %tmp2)
663  store half %tmp3, ptr addrspace(1) %arg
664  ret void
665}
666
667declare i32 @llvm.smin.i32(i32, i32)
668declare i32 @llvm.smax.i32(i32, i32)
669declare i32 @llvm.umin.i32(i32, i32)
670declare i32 @llvm.umax.i32(i32, i32)
671declare half @llvm.minnum.f16(half, half)
672declare half @llvm.maxnum.f16(half, half)
673declare float @llvm.minnum.f32(float, float)
674declare float @llvm.maxnum.f32(float, float)
675attributes #0 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" }
676
677