xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll (revision ba52f06f9d92c7ca04b440f618f8d352ea121fcc)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
3; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8 %s
4; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
5
6define float @test_min_max_ValK0_K1_f32(float %a) #0 {
7; GFX10-LABEL: test_min_max_ValK0_K1_f32:
8; GFX10:       ; %bb.0:
9; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10; GFX10-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
11; GFX10-NEXT:    s_setpc_b64 s[30:31]
12;
13; GFX8-LABEL: test_min_max_ValK0_K1_f32:
14; GFX8:       ; %bb.0:
15; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16; GFX8-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
17; GFX8-NEXT:    s_setpc_b64 s[30:31]
18;
19; GFX12-LABEL: test_min_max_ValK0_K1_f32:
20; GFX12:       ; %bb.0:
21; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
22; GFX12-NEXT:    s_wait_expcnt 0x0
23; GFX12-NEXT:    s_wait_samplecnt 0x0
24; GFX12-NEXT:    s_wait_bvhcnt 0x0
25; GFX12-NEXT:    s_wait_kmcnt 0x0
26; GFX12-NEXT:    v_med3_num_f32 v0, v0, 2.0, 4.0
27; GFX12-NEXT:    s_setpc_b64 s[30:31]
28  %maxnum = call nnan float @llvm.maxnum.f32(float %a, float 2.0)
29  %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 4.0)
30  ret float %fmed
31}
32
33define float @test_min_max_K0Val_K1_f32(float %a) #1 {
34; GFX10-LABEL: test_min_max_K0Val_K1_f32:
35; GFX10:       ; %bb.0:
36; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37; GFX10-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
38; GFX10-NEXT:    s_setpc_b64 s[30:31]
39;
40; GFX8-LABEL: test_min_max_K0Val_K1_f32:
41; GFX8:       ; %bb.0:
42; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
43; GFX8-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
44; GFX8-NEXT:    s_setpc_b64 s[30:31]
45;
46; GFX12-LABEL: test_min_max_K0Val_K1_f32:
47; GFX12:       ; %bb.0:
48; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
49; GFX12-NEXT:    s_wait_expcnt 0x0
50; GFX12-NEXT:    s_wait_samplecnt 0x0
51; GFX12-NEXT:    s_wait_bvhcnt 0x0
52; GFX12-NEXT:    s_wait_kmcnt 0x0
53; GFX12-NEXT:    v_med3_num_f32 v0, v0, 2.0, 4.0
54; GFX12-NEXT:    s_setpc_b64 s[30:31]
55  %maxnum = call nnan float @llvm.maxnum.f32(float 2.0, float %a)
56  %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 4.0)
57  ret float %fmed
58}
59
60; min-max patterns for ieee=true do not have to check for NaNs
61; 'v_max_f16_e32 v0, v0, v0' is from fcanonicalize of the input to fmin/fmax with ieee=true
62define half @test_min_K1max_ValK0_f16(half %a) #0 {
63; GFX10-LABEL: test_min_K1max_ValK0_f16:
64; GFX10:       ; %bb.0:
65; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
66; GFX10-NEXT:    v_max_f16_e32 v0, v0, v0
67; GFX10-NEXT:    v_med3_f16 v0, v0, 2.0, 4.0
68; GFX10-NEXT:    s_setpc_b64 s[30:31]
69;
70; GFX8-LABEL: test_min_K1max_ValK0_f16:
71; GFX8:       ; %bb.0:
72; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
73; GFX8-NEXT:    v_max_f16_e32 v0, v0, v0
74; GFX8-NEXT:    v_max_f16_e32 v0, 2.0, v0
75; GFX8-NEXT:    v_min_f16_e32 v0, 4.0, v0
76; GFX8-NEXT:    s_setpc_b64 s[30:31]
77;
78; GFX12-LABEL: test_min_K1max_ValK0_f16:
79; GFX12:       ; %bb.0:
80; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
81; GFX12-NEXT:    s_wait_expcnt 0x0
82; GFX12-NEXT:    s_wait_samplecnt 0x0
83; GFX12-NEXT:    s_wait_bvhcnt 0x0
84; GFX12-NEXT:    s_wait_kmcnt 0x0
85; GFX12-NEXT:    v_max_num_f16_e32 v0, v0, v0
86; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
87; GFX12-NEXT:    v_med3_num_f16 v0, v0, 2.0, 4.0
88; GFX12-NEXT:    s_setpc_b64 s[30:31]
89  %maxnum = call half @llvm.maxnum.f16(half %a, half 2.0)
90  %fmed = call half @llvm.minnum.f16(half 4.0, half %maxnum)
91  ret half %fmed
92}
93
94define half @test_min_K1max_K0Val_f16(half %a) #1 {
95; GFX10-LABEL: test_min_K1max_K0Val_f16:
96; GFX10:       ; %bb.0:
97; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
98; GFX10-NEXT:    v_med3_f16 v0, v0, 2.0, 4.0
99; GFX10-NEXT:    s_setpc_b64 s[30:31]
100;
101; GFX8-LABEL: test_min_K1max_K0Val_f16:
102; GFX8:       ; %bb.0:
103; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104; GFX8-NEXT:    v_max_f16_e32 v0, 2.0, v0
105; GFX8-NEXT:    v_min_f16_e32 v0, 4.0, v0
106; GFX8-NEXT:    s_setpc_b64 s[30:31]
107;
108; GFX12-LABEL: test_min_K1max_K0Val_f16:
109; GFX12:       ; %bb.0:
110; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
111; GFX12-NEXT:    s_wait_expcnt 0x0
112; GFX12-NEXT:    s_wait_samplecnt 0x0
113; GFX12-NEXT:    s_wait_bvhcnt 0x0
114; GFX12-NEXT:    s_wait_kmcnt 0x0
115; GFX12-NEXT:    v_med3_num_f16 v0, v0, 2.0, 4.0
116; GFX12-NEXT:    s_setpc_b64 s[30:31]
117  %maxnum = call nnan half @llvm.maxnum.f16(half 2.0, half %a)
118  %fmed = call nnan half @llvm.minnum.f16(half 4.0, half %maxnum)
119  ret half %fmed
120}
121
122; max-mix patterns work only for non-NaN inputs
123define float @test_max_min_ValK1_K0_f32(float %a) #0 {
124; GFX10-LABEL: test_max_min_ValK1_K0_f32:
125; GFX10:       ; %bb.0:
126; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127; GFX10-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
128; GFX10-NEXT:    s_setpc_b64 s[30:31]
129;
130; GFX8-LABEL: test_max_min_ValK1_K0_f32:
131; GFX8:       ; %bb.0:
132; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
133; GFX8-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
134; GFX8-NEXT:    s_setpc_b64 s[30:31]
135;
136; GFX12-LABEL: test_max_min_ValK1_K0_f32:
137; GFX12:       ; %bb.0:
138; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
139; GFX12-NEXT:    s_wait_expcnt 0x0
140; GFX12-NEXT:    s_wait_samplecnt 0x0
141; GFX12-NEXT:    s_wait_bvhcnt 0x0
142; GFX12-NEXT:    s_wait_kmcnt 0x0
143; GFX12-NEXT:    v_med3_num_f32 v0, v0, 2.0, 4.0
144; GFX12-NEXT:    s_setpc_b64 s[30:31]
145  %minnum = call nnan float @llvm.minnum.f32(float %a, float 4.0)
146  %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 2.0)
147  ret float %fmed
148}
149
150define float @test_max_min_K1Val_K0_f32(float %a) #1 {
151; GFX10-LABEL: test_max_min_K1Val_K0_f32:
152; GFX10:       ; %bb.0:
153; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
154; GFX10-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
155; GFX10-NEXT:    s_setpc_b64 s[30:31]
156;
157; GFX8-LABEL: test_max_min_K1Val_K0_f32:
158; GFX8:       ; %bb.0:
159; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
160; GFX8-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
161; GFX8-NEXT:    s_setpc_b64 s[30:31]
162;
163; GFX12-LABEL: test_max_min_K1Val_K0_f32:
164; GFX12:       ; %bb.0:
165; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
166; GFX12-NEXT:    s_wait_expcnt 0x0
167; GFX12-NEXT:    s_wait_samplecnt 0x0
168; GFX12-NEXT:    s_wait_bvhcnt 0x0
169; GFX12-NEXT:    s_wait_kmcnt 0x0
170; GFX12-NEXT:    v_med3_num_f32 v0, v0, 2.0, 4.0
171; GFX12-NEXT:    s_setpc_b64 s[30:31]
172  %minnum = call nnan float @llvm.minnum.f32(float 4.0, float %a)
173  %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 2.0)
174  ret float %fmed
175}
176
177define half @test_max_K0min_ValK1_f16(half %a) #0 {
178; GFX10-LABEL: test_max_K0min_ValK1_f16:
179; GFX10:       ; %bb.0:
180; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
181; GFX10-NEXT:    v_med3_f16 v0, v0, 2.0, 4.0
182; GFX10-NEXT:    s_setpc_b64 s[30:31]
183;
184; GFX8-LABEL: test_max_K0min_ValK1_f16:
185; GFX8:       ; %bb.0:
186; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
187; GFX8-NEXT:    v_min_f16_e32 v0, 4.0, v0
188; GFX8-NEXT:    v_max_f16_e32 v0, 2.0, v0
189; GFX8-NEXT:    s_setpc_b64 s[30:31]
190;
191; GFX12-LABEL: test_max_K0min_ValK1_f16:
192; GFX12:       ; %bb.0:
193; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
194; GFX12-NEXT:    s_wait_expcnt 0x0
195; GFX12-NEXT:    s_wait_samplecnt 0x0
196; GFX12-NEXT:    s_wait_bvhcnt 0x0
197; GFX12-NEXT:    s_wait_kmcnt 0x0
198; GFX12-NEXT:    v_med3_num_f16 v0, v0, 2.0, 4.0
199; GFX12-NEXT:    s_setpc_b64 s[30:31]
200  %minnum = call nnan half @llvm.minnum.f16(half %a, half 4.0)
201  %fmed = call nnan half @llvm.maxnum.f16(half 2.0, half %minnum)
202  ret half %fmed
203}
204
205define half @test_max_K0min_K1Val_f16(half %a) #1 {
206; GFX10-LABEL: test_max_K0min_K1Val_f16:
207; GFX10:       ; %bb.0:
208; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
209; GFX10-NEXT:    v_med3_f16 v0, v0, 2.0, 4.0
210; GFX10-NEXT:    s_setpc_b64 s[30:31]
211;
212; GFX8-LABEL: test_max_K0min_K1Val_f16:
213; GFX8:       ; %bb.0:
214; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
215; GFX8-NEXT:    v_min_f16_e32 v0, 4.0, v0
216; GFX8-NEXT:    v_max_f16_e32 v0, 2.0, v0
217; GFX8-NEXT:    s_setpc_b64 s[30:31]
218;
219; GFX12-LABEL: test_max_K0min_K1Val_f16:
220; GFX12:       ; %bb.0:
221; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
222; GFX12-NEXT:    s_wait_expcnt 0x0
223; GFX12-NEXT:    s_wait_samplecnt 0x0
224; GFX12-NEXT:    s_wait_bvhcnt 0x0
225; GFX12-NEXT:    s_wait_kmcnt 0x0
226; GFX12-NEXT:    v_med3_num_f16 v0, v0, 2.0, 4.0
227; GFX12-NEXT:    s_setpc_b64 s[30:31]
228  %minnum = call nnan half @llvm.minnum.f16(half 4.0, half %a)
229  %fmed = call nnan half @llvm.maxnum.f16(half 2.0, half %minnum)
230  ret half %fmed
231}
232
233; global nnan function attribute always forces fmed3 combine
234
235define float @test_min_max_global_nnan(float %a) #2 {
236; GFX10-LABEL: test_min_max_global_nnan:
237; GFX10:       ; %bb.0:
238; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
239; GFX10-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
240; GFX10-NEXT:    s_setpc_b64 s[30:31]
241;
242; GFX8-LABEL: test_min_max_global_nnan:
243; GFX8:       ; %bb.0:
244; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
245; GFX8-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
246; GFX8-NEXT:    s_setpc_b64 s[30:31]
247;
248; GFX12-LABEL: test_min_max_global_nnan:
249; GFX12:       ; %bb.0:
250; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
251; GFX12-NEXT:    s_wait_expcnt 0x0
252; GFX12-NEXT:    s_wait_samplecnt 0x0
253; GFX12-NEXT:    s_wait_bvhcnt 0x0
254; GFX12-NEXT:    s_wait_kmcnt 0x0
255; GFX12-NEXT:    v_med3_num_f32 v0, v0, 2.0, 4.0
256; GFX12-NEXT:    s_setpc_b64 s[30:31]
257  %maxnum = call float @llvm.maxnum.f32(float %a, float 2.0)
258  %fmed = call float @llvm.minnum.f32(float %maxnum, float 4.0)
259  ret float %fmed
260}
261
262define float @test_max_min_global_nnan(float %a) #2 {
263; GFX10-LABEL: test_max_min_global_nnan:
264; GFX10:       ; %bb.0:
265; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
266; GFX10-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
267; GFX10-NEXT:    s_setpc_b64 s[30:31]
268;
269; GFX8-LABEL: test_max_min_global_nnan:
270; GFX8:       ; %bb.0:
271; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
272; GFX8-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
273; GFX8-NEXT:    s_setpc_b64 s[30:31]
274;
275; GFX12-LABEL: test_max_min_global_nnan:
276; GFX12:       ; %bb.0:
277; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
278; GFX12-NEXT:    s_wait_expcnt 0x0
279; GFX12-NEXT:    s_wait_samplecnt 0x0
280; GFX12-NEXT:    s_wait_bvhcnt 0x0
281; GFX12-NEXT:    s_wait_kmcnt 0x0
282; GFX12-NEXT:    v_med3_num_f32 v0, v0, 2.0, 4.0
283; GFX12-NEXT:    s_setpc_b64 s[30:31]
284  %minnum = call float @llvm.minnum.f32(float %a, float 4.0)
285  %fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0)
286  ret float %fmed
287}
288
289; ------------------------------------------------------------------------------
290; Negative patterns
291; ------------------------------------------------------------------------------
292
293; min(max(Val, K0), K1) K0 > K1, should be K0<=K1
294define float @test_min_max_K0_gt_K1(float %a) #0 {
295; GFX10-LABEL: test_min_max_K0_gt_K1:
296; GFX10:       ; %bb.0:
297; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
298; GFX10-NEXT:    v_max_f32_e32 v0, 4.0, v0
299; GFX10-NEXT:    v_min_f32_e32 v0, 2.0, v0
300; GFX10-NEXT:    s_setpc_b64 s[30:31]
301;
302; GFX8-LABEL: test_min_max_K0_gt_K1:
303; GFX8:       ; %bb.0:
304; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
305; GFX8-NEXT:    v_max_f32_e32 v0, 4.0, v0
306; GFX8-NEXT:    v_min_f32_e32 v0, 2.0, v0
307; GFX8-NEXT:    s_setpc_b64 s[30:31]
308;
309; GFX12-LABEL: test_min_max_K0_gt_K1:
310; GFX12:       ; %bb.0:
311; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
312; GFX12-NEXT:    s_wait_expcnt 0x0
313; GFX12-NEXT:    s_wait_samplecnt 0x0
314; GFX12-NEXT:    s_wait_bvhcnt 0x0
315; GFX12-NEXT:    s_wait_kmcnt 0x0
316; GFX12-NEXT:    v_maxmin_num_f32 v0, v0, 4.0, 2.0
317; GFX12-NEXT:    s_setpc_b64 s[30:31]
318  %maxnum = call nnan float @llvm.maxnum.f32(float %a, float 4.0)
319  %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 2.0)
320  ret float %fmed
321}
322
323; max(min(Val, K1), K0) K0 > K1, should be K0<=K1
324define float @test_max_min_K0_gt_K1(float %a) #0 {
325; GFX10-LABEL: test_max_min_K0_gt_K1:
326; GFX10:       ; %bb.0:
327; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
328; GFX10-NEXT:    v_min_f32_e32 v0, 2.0, v0
329; GFX10-NEXT:    v_max_f32_e32 v0, 4.0, v0
330; GFX10-NEXT:    s_setpc_b64 s[30:31]
331;
332; GFX8-LABEL: test_max_min_K0_gt_K1:
333; GFX8:       ; %bb.0:
334; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
335; GFX8-NEXT:    v_min_f32_e32 v0, 2.0, v0
336; GFX8-NEXT:    v_max_f32_e32 v0, 4.0, v0
337; GFX8-NEXT:    s_setpc_b64 s[30:31]
338;
339; GFX12-LABEL: test_max_min_K0_gt_K1:
340; GFX12:       ; %bb.0:
341; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
342; GFX12-NEXT:    s_wait_expcnt 0x0
343; GFX12-NEXT:    s_wait_samplecnt 0x0
344; GFX12-NEXT:    s_wait_bvhcnt 0x0
345; GFX12-NEXT:    s_wait_kmcnt 0x0
346; GFX12-NEXT:    v_minmax_num_f32 v0, v0, 2.0, 4.0
347; GFX12-NEXT:    s_setpc_b64 s[30:31]
348  %minnum = call nnan float @llvm.minnum.f32(float %a, float 2.0)
349  %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 4.0)
350  ret float %fmed
351}
352
353; non-inline constant
354define float @test_min_max_non_inline_const(float %a) #0 {
355; GFX10-LABEL: test_min_max_non_inline_const:
356; GFX10:       ; %bb.0:
357; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
358; GFX10-NEXT:    v_max_f32_e32 v0, 2.0, v0
359; GFX10-NEXT:    v_min_f32_e32 v0, 0x41000000, v0
360; GFX10-NEXT:    s_setpc_b64 s[30:31]
361;
362; GFX8-LABEL: test_min_max_non_inline_const:
363; GFX8:       ; %bb.0:
364; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
365; GFX8-NEXT:    v_max_f32_e32 v0, 2.0, v0
366; GFX8-NEXT:    v_min_f32_e32 v0, 0x41000000, v0
367; GFX8-NEXT:    s_setpc_b64 s[30:31]
368;
369; GFX12-LABEL: test_min_max_non_inline_const:
370; GFX12:       ; %bb.0:
371; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
372; GFX12-NEXT:    s_wait_expcnt 0x0
373; GFX12-NEXT:    s_wait_samplecnt 0x0
374; GFX12-NEXT:    s_wait_bvhcnt 0x0
375; GFX12-NEXT:    s_wait_kmcnt 0x0
376; GFX12-NEXT:    v_maxmin_num_f32 v0, v0, 2.0, 0x41000000
377; GFX12-NEXT:    s_setpc_b64 s[30:31]
378  %maxnum = call nnan float @llvm.maxnum.f32(float %a, float 2.0)
379  %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 8.0)
380  ret float %fmed
381}
382
383; there is no fmed3 for f64 or v2f16 types
384
385define double @test_min_max_f64(double %a) #0 {
386; GFX10-LABEL: test_min_max_f64:
387; GFX10:       ; %bb.0:
388; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
389; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], 2.0
390; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], 4.0
391; GFX10-NEXT:    s_setpc_b64 s[30:31]
392;
393; GFX8-LABEL: test_min_max_f64:
394; GFX8:       ; %bb.0:
395; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
396; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], 2.0
397; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], 4.0
398; GFX8-NEXT:    s_setpc_b64 s[30:31]
399;
400; GFX12-LABEL: test_min_max_f64:
401; GFX12:       ; %bb.0:
402; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
403; GFX12-NEXT:    s_wait_expcnt 0x0
404; GFX12-NEXT:    s_wait_samplecnt 0x0
405; GFX12-NEXT:    s_wait_bvhcnt 0x0
406; GFX12-NEXT:    s_wait_kmcnt 0x0
407; GFX12-NEXT:    v_max_num_f64_e32 v[0:1], 2.0, v[0:1]
408; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
409; GFX12-NEXT:    v_min_num_f64_e32 v[0:1], 4.0, v[0:1]
410; GFX12-NEXT:    s_setpc_b64 s[30:31]
411  %maxnum = call nnan double @llvm.maxnum.f64(double %a, double 2.0)
412  %fmed = call nnan double @llvm.minnum.f64(double %maxnum, double 4.0)
413  ret double %fmed
414}
415
416define <2 x half> @test_min_max_v2f16(<2 x half> %a) #0 {
417; GFX10-LABEL: test_min_max_v2f16:
418; GFX10:       ; %bb.0:
419; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
420; GFX10-NEXT:    v_pk_max_f16 v0, v0, 2.0 op_sel_hi:[1,0]
421; GFX10-NEXT:    v_pk_min_f16 v0, v0, 4.0 op_sel_hi:[1,0]
422; GFX10-NEXT:    s_setpc_b64 s[30:31]
423;
424; GFX8-LABEL: test_min_max_v2f16:
425; GFX8:       ; %bb.0:
426; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
427; GFX8-NEXT:    v_mov_b32_e32 v2, 0x4000
428; GFX8-NEXT:    v_max_f16_e32 v1, 2.0, v0
429; GFX8-NEXT:    v_max_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
430; GFX8-NEXT:    v_mov_b32_e32 v2, 0x4400
431; GFX8-NEXT:    v_min_f16_e32 v1, 4.0, v1
432; GFX8-NEXT:    v_min_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
433; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
434; GFX8-NEXT:    s_setpc_b64 s[30:31]
435;
436; GFX12-LABEL: test_min_max_v2f16:
437; GFX12:       ; %bb.0:
438; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
439; GFX12-NEXT:    s_wait_expcnt 0x0
440; GFX12-NEXT:    s_wait_samplecnt 0x0
441; GFX12-NEXT:    s_wait_bvhcnt 0x0
442; GFX12-NEXT:    s_wait_kmcnt 0x0
443; GFX12-NEXT:    v_pk_max_num_f16 v0, v0, 2.0 op_sel_hi:[1,0]
444; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
445; GFX12-NEXT:    v_pk_min_num_f16 v0, v0, 4.0 op_sel_hi:[1,0]
446; GFX12-NEXT:    s_setpc_b64 s[30:31]
447  %maxnum = call nnan <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half 2.0, half 2.0>)
448  %fmed = call nnan <2 x half> @llvm.minnum.v2f16(<2 x half> %maxnum, <2 x half> <half 4.0, half 4.0>)
449  ret <2 x half> %fmed
450}
451
452; input that can be NaN
453
454; min-max patterns for ieee=false require known non-NaN input
455define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
456; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false:
457; GFX10:       ; %bb.0:
458; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
459; GFX10-NEXT:    v_max_f32_e32 v0, 2.0, v0
460; GFX10-NEXT:    v_min_f32_e32 v0, 4.0, v0
461; GFX10-NEXT:    s_setpc_b64 s[30:31]
462;
463; GFX8-LABEL: test_min_max_maybe_NaN_input_ieee_false:
464; GFX8:       ; %bb.0:
465; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
466; GFX8-NEXT:    v_max_f32_e32 v0, 2.0, v0
467; GFX8-NEXT:    v_min_f32_e32 v0, 4.0, v0
468; GFX8-NEXT:    s_setpc_b64 s[30:31]
469;
470; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_false:
471; GFX12:       ; %bb.0:
472; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
473; GFX12-NEXT:    s_wait_expcnt 0x0
474; GFX12-NEXT:    s_wait_samplecnt 0x0
475; GFX12-NEXT:    s_wait_bvhcnt 0x0
476; GFX12-NEXT:    s_wait_kmcnt 0x0
477; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v0
478; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
479; GFX12-NEXT:    v_med3_num_f32 v0, v0, 2.0, 4.0
480; GFX12-NEXT:    s_setpc_b64 s[30:31]
481  %maxnum = call float @llvm.maxnum.f32(float %a, float 2.0)
482  %fmed = call float @llvm.minnum.f32(float %maxnum, float 4.0)
483  ret float %fmed
484}
485
486; max-min patterns always require known non-NaN input
487
488define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
489; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false:
490; GFX10:       ; %bb.0:
491; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
492; GFX10-NEXT:    v_min_f32_e32 v0, 4.0, v0
493; GFX10-NEXT:    v_max_f32_e32 v0, 2.0, v0
494; GFX10-NEXT:    s_setpc_b64 s[30:31]
495;
496; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_false:
497; GFX8:       ; %bb.0:
498; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
499; GFX8-NEXT:    v_min_f32_e32 v0, 4.0, v0
500; GFX8-NEXT:    v_max_f32_e32 v0, 2.0, v0
501; GFX8-NEXT:    s_setpc_b64 s[30:31]
502;
503; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_false:
504; GFX12:       ; %bb.0:
505; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
506; GFX12-NEXT:    s_wait_expcnt 0x0
507; GFX12-NEXT:    s_wait_samplecnt 0x0
508; GFX12-NEXT:    s_wait_bvhcnt 0x0
509; GFX12-NEXT:    s_wait_kmcnt 0x0
510; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v0
511; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
512; GFX12-NEXT:    v_minmax_num_f32 v0, v0, 4.0, 2.0
513; GFX12-NEXT:    s_setpc_b64 s[30:31]
514  %minnum = call float @llvm.minnum.f32(float %a, float 4.0)
515  %fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0)
516  ret float %fmed
517}
518
519; 'v_max_f32_e32 v0, v0, v0' is from fcanonicalize of the input to fmin/fmax with ieee=true
520define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
521; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_true:
522; GFX10:       ; %bb.0:
523; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
524; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
525; GFX10-NEXT:    v_min_f32_e32 v0, 4.0, v0
526; GFX10-NEXT:    v_max_f32_e32 v0, 2.0, v0
527; GFX10-NEXT:    s_setpc_b64 s[30:31]
528;
529; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_true:
530; GFX8:       ; %bb.0:
531; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
532; GFX8-NEXT:    v_mul_f32_e32 v0, 1.0, v0
533; GFX8-NEXT:    v_min_f32_e32 v0, 4.0, v0
534; GFX8-NEXT:    v_max_f32_e32 v0, 2.0, v0
535; GFX8-NEXT:    s_setpc_b64 s[30:31]
536;
537; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_true:
538; GFX12:       ; %bb.0:
539; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
540; GFX12-NEXT:    s_wait_expcnt 0x0
541; GFX12-NEXT:    s_wait_samplecnt 0x0
542; GFX12-NEXT:    s_wait_bvhcnt 0x0
543; GFX12-NEXT:    s_wait_kmcnt 0x0
544; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v0
545; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
546; GFX12-NEXT:    v_minmax_num_f32 v0, v0, 4.0, 2.0
547; GFX12-NEXT:    s_setpc_b64 s[30:31]
548  %minnum = call float @llvm.minnum.f32(float %a, float 4.0)
549  %fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0)
550  ret float %fmed
551}
552
553declare half @llvm.minnum.f16(half, half)
554declare half @llvm.maxnum.f16(half, half)
555declare float @llvm.minnum.f32(float, float)
556declare float @llvm.maxnum.f32(float, float)
557declare double @llvm.minnum.f64(double, double)
558declare double @llvm.maxnum.f64(double, double)
559declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>)
560declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>)
561attributes #0 = {"amdgpu-ieee"="true"}
562attributes #1 = {"amdgpu-ieee"="false"}
563attributes #2 = {"no-nans-fp-math"="true"}
564