xref: /llvm-project/llvm/test/CodeGen/AMDGPU/fmaximum3.ll (revision 7221bc74bc6b038b40c00d5111555ea87b326bf3)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
3; RUN: llc -mtriple=amdgcn -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX940 %s
4; RUN: llc -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s
5
6define float @v_fmaximum3_f32(float %a, float %b, float %c) {
7; GFX12-LABEL: v_fmaximum3_f32:
8; GFX12:       ; %bb.0:
9; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
10; GFX12-NEXT:    s_wait_expcnt 0x0
11; GFX12-NEXT:    s_wait_samplecnt 0x0
12; GFX12-NEXT:    s_wait_bvhcnt 0x0
13; GFX12-NEXT:    s_wait_kmcnt 0x0
14; GFX12-NEXT:    v_maximum3_f32 v0, v0, v1, v2
15; GFX12-NEXT:    s_setpc_b64 s[30:31]
16;
17; GFX940-LABEL: v_fmaximum3_f32:
18; GFX940:       ; %bb.0:
19; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20; GFX940-NEXT:    v_max_f32_e32 v3, v0, v1
21; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
22; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
23; GFX940-NEXT:    s_nop 1
24; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
25; GFX940-NEXT:    v_max_f32_e32 v1, v0, v2
26; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
27; GFX940-NEXT:    s_nop 1
28; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
29; GFX940-NEXT:    s_setpc_b64 s[30:31]
30;
31; GFX950-LABEL: v_fmaximum3_f32:
32; GFX950:       ; %bb.0:
33; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34; GFX950-NEXT:    v_maximum3_f32 v0, v0, v1, v2
35; GFX950-NEXT:    s_setpc_b64 s[30:31]
36  %max0 = call float @llvm.maximum.f32(float %a, float %b)
37  %max1 = call float @llvm.maximum.f32(float %max0, float %c)
38  ret float %max1
39}
40
41define float @v_fmaximum3_f32_commute(float %a, float %b, float %c) {
42; GFX12-LABEL: v_fmaximum3_f32_commute:
43; GFX12:       ; %bb.0:
44; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
45; GFX12-NEXT:    s_wait_expcnt 0x0
46; GFX12-NEXT:    s_wait_samplecnt 0x0
47; GFX12-NEXT:    s_wait_bvhcnt 0x0
48; GFX12-NEXT:    s_wait_kmcnt 0x0
49; GFX12-NEXT:    v_maximum3_f32 v0, v2, v0, v1
50; GFX12-NEXT:    s_setpc_b64 s[30:31]
51;
52; GFX940-LABEL: v_fmaximum3_f32_commute:
53; GFX940:       ; %bb.0:
54; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
55; GFX940-NEXT:    v_max_f32_e32 v3, v0, v1
56; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
57; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
58; GFX940-NEXT:    s_nop 1
59; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
60; GFX940-NEXT:    v_max_f32_e32 v1, v2, v0
61; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v2, v0
62; GFX940-NEXT:    s_nop 1
63; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
64; GFX940-NEXT:    s_setpc_b64 s[30:31]
65;
66; GFX950-LABEL: v_fmaximum3_f32_commute:
67; GFX950:       ; %bb.0:
68; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
69; GFX950-NEXT:    v_maximum3_f32 v0, v2, v0, v1
70; GFX950-NEXT:    s_setpc_b64 s[30:31]
71  %max0 = call float @llvm.maximum.f32(float %a, float %b)
72  %max1 = call float @llvm.maximum.f32(float %c, float %max0)
73  ret float %max1
74}
75
76define amdgpu_ps i32 @s_fmaximum3_f32(float inreg %a, float inreg %b, float inreg %c) {
77; GFX12-LABEL: s_fmaximum3_f32:
78; GFX12:       ; %bb.0:
79; GFX12-NEXT:    v_mov_b32_e32 v0, s2
80; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
81; GFX12-NEXT:    v_maximum3_f32 v0, s0, s1, v0
82; GFX12-NEXT:    v_readfirstlane_b32 s0, v0
83; GFX12-NEXT:    ; return to shader part epilog
84;
85; GFX940-LABEL: s_fmaximum3_f32:
86; GFX940:       ; %bb.0:
87; GFX940-NEXT:    v_mov_b32_e32 v0, s1
88; GFX940-NEXT:    v_max_f32_e32 v1, s0, v0
89; GFX940-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
90; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, s0, v0
91; GFX940-NEXT:    s_nop 1
92; GFX940-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
93; GFX940-NEXT:    v_max_f32_e32 v1, s2, v0
94; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, s2, v0
95; GFX940-NEXT:    s_nop 1
96; GFX940-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
97; GFX940-NEXT:    s_nop 0
98; GFX940-NEXT:    v_readfirstlane_b32 s0, v0
99; GFX940-NEXT:    ; return to shader part epilog
100;
101; GFX950-LABEL: s_fmaximum3_f32:
102; GFX950:       ; %bb.0:
103; GFX950-NEXT:    v_mov_b32_e32 v0, s1
104; GFX950-NEXT:    v_mov_b32_e32 v1, s2
105; GFX950-NEXT:    v_maximum3_f32 v0, s0, v0, v1
106; GFX950-NEXT:    s_nop 0
107; GFX950-NEXT:    v_readfirstlane_b32 s0, v0
108; GFX950-NEXT:    ; return to shader part epilog
109  %max0 = call float @llvm.maximum.f32(float %a, float %b)
110  %max1 = call float @llvm.maximum.f32(float %max0, float %c)
111  %cast = bitcast float %max1 to i32
112  %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %cast)
113  ret i32 %readfirstlane
114}
115
116define float @v_fmaximum3_f32_fabs0(float %a, float %b, float %c) {
117; GFX12-LABEL: v_fmaximum3_f32_fabs0:
118; GFX12:       ; %bb.0:
119; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
120; GFX12-NEXT:    s_wait_expcnt 0x0
121; GFX12-NEXT:    s_wait_samplecnt 0x0
122; GFX12-NEXT:    s_wait_bvhcnt 0x0
123; GFX12-NEXT:    s_wait_kmcnt 0x0
124; GFX12-NEXT:    v_maximum3_f32 v0, |v0|, v1, v2
125; GFX12-NEXT:    s_setpc_b64 s[30:31]
126;
127; GFX940-LABEL: v_fmaximum3_f32_fabs0:
128; GFX940:       ; %bb.0:
129; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
130; GFX940-NEXT:    v_max_f32_e64 v3, |v0|, v1
131; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
132; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, |v0|, v1
133; GFX940-NEXT:    s_nop 1
134; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
135; GFX940-NEXT:    v_max_f32_e32 v1, v0, v2
136; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
137; GFX940-NEXT:    s_nop 1
138; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
139; GFX940-NEXT:    s_setpc_b64 s[30:31]
140;
141; GFX950-LABEL: v_fmaximum3_f32_fabs0:
142; GFX950:       ; %bb.0:
143; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
144; GFX950-NEXT:    v_maximum3_f32 v0, |v0|, v1, v2
145; GFX950-NEXT:    s_setpc_b64 s[30:31]
146  %a.fabs = call float @llvm.fabs.f32(float %a)
147  %max0 = call float @llvm.maximum.f32(float %a.fabs, float %b)
148  %max1 = call float @llvm.maximum.f32(float %max0, float %c)
149  ret float %max1
150}
151
152define float @v_fmaximum3_f32_fabs1(float %a, float %b, float %c) {
153; GFX12-LABEL: v_fmaximum3_f32_fabs1:
154; GFX12:       ; %bb.0:
155; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
156; GFX12-NEXT:    s_wait_expcnt 0x0
157; GFX12-NEXT:    s_wait_samplecnt 0x0
158; GFX12-NEXT:    s_wait_bvhcnt 0x0
159; GFX12-NEXT:    s_wait_kmcnt 0x0
160; GFX12-NEXT:    v_maximum3_f32 v0, v0, |v1|, v2
161; GFX12-NEXT:    s_setpc_b64 s[30:31]
162;
163; GFX940-LABEL: v_fmaximum3_f32_fabs1:
164; GFX940:       ; %bb.0:
165; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
166; GFX940-NEXT:    v_max_f32_e64 v3, v0, |v1|
167; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
168; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v1|
169; GFX940-NEXT:    s_nop 1
170; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
171; GFX940-NEXT:    v_max_f32_e32 v1, v0, v2
172; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
173; GFX940-NEXT:    s_nop 1
174; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
175; GFX940-NEXT:    s_setpc_b64 s[30:31]
176;
177; GFX950-LABEL: v_fmaximum3_f32_fabs1:
178; GFX950:       ; %bb.0:
179; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
180; GFX950-NEXT:    v_maximum3_f32 v0, v0, |v1|, v2
181; GFX950-NEXT:    s_setpc_b64 s[30:31]
182  %b.fabs = call float @llvm.fabs.f32(float %b)
183  %max0 = call float @llvm.maximum.f32(float %a, float %b.fabs)
184  %max1 = call float @llvm.maximum.f32(float %max0, float %c)
185  ret float %max1
186}
187
188define float @v_fmaximum3_f32_fabs2(float %a, float %b, float %c) {
189; GFX12-LABEL: v_fmaximum3_f32_fabs2:
190; GFX12:       ; %bb.0:
191; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
192; GFX12-NEXT:    s_wait_expcnt 0x0
193; GFX12-NEXT:    s_wait_samplecnt 0x0
194; GFX12-NEXT:    s_wait_bvhcnt 0x0
195; GFX12-NEXT:    s_wait_kmcnt 0x0
196; GFX12-NEXT:    v_maximum3_f32 v0, v0, v1, |v2|
197; GFX12-NEXT:    s_setpc_b64 s[30:31]
198;
199; GFX940-LABEL: v_fmaximum3_f32_fabs2:
200; GFX940:       ; %bb.0:
201; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
202; GFX940-NEXT:    v_max_f32_e32 v3, v0, v1
203; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
204; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
205; GFX940-NEXT:    s_nop 1
206; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
207; GFX940-NEXT:    v_max_f32_e64 v1, v0, |v2|
208; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v2|
209; GFX940-NEXT:    s_nop 1
210; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
211; GFX940-NEXT:    s_setpc_b64 s[30:31]
212;
213; GFX950-LABEL: v_fmaximum3_f32_fabs2:
214; GFX950:       ; %bb.0:
215; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
216; GFX950-NEXT:    v_maximum3_f32 v0, v0, v1, |v2|
217; GFX950-NEXT:    s_setpc_b64 s[30:31]
218  %c.fabs = call float @llvm.fabs.f32(float %c)
219  %max0 = call float @llvm.maximum.f32(float %a, float %b)
220  %max1 = call float @llvm.maximum.f32(float %max0, float %c.fabs)
221  ret float %max1
222}
223
224define float @v_fmaximum3_f32_fabs_all(float %a, float %b, float %c) {
225; GFX12-LABEL: v_fmaximum3_f32_fabs_all:
226; GFX12:       ; %bb.0:
227; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
228; GFX12-NEXT:    s_wait_expcnt 0x0
229; GFX12-NEXT:    s_wait_samplecnt 0x0
230; GFX12-NEXT:    s_wait_bvhcnt 0x0
231; GFX12-NEXT:    s_wait_kmcnt 0x0
232; GFX12-NEXT:    v_maximum3_f32 v0, |v0|, |v1|, |v2|
233; GFX12-NEXT:    s_setpc_b64 s[30:31]
234;
235; GFX940-LABEL: v_fmaximum3_f32_fabs_all:
236; GFX940:       ; %bb.0:
237; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
238; GFX940-NEXT:    v_max_f32_e64 v3, |v0|, |v1|
239; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
240; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, |v0|, |v1|
241; GFX940-NEXT:    s_nop 1
242; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
243; GFX940-NEXT:    v_max_f32_e64 v1, v0, |v2|
244; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v2|
245; GFX940-NEXT:    s_nop 1
246; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
247; GFX940-NEXT:    s_setpc_b64 s[30:31]
248;
249; GFX950-LABEL: v_fmaximum3_f32_fabs_all:
250; GFX950:       ; %bb.0:
251; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
252; GFX950-NEXT:    v_maximum3_f32 v0, |v0|, |v1|, |v2|
253; GFX950-NEXT:    s_setpc_b64 s[30:31]
254  %a.fabs = call float @llvm.fabs.f32(float %a)
255  %b.fabs = call float @llvm.fabs.f32(float %b)
256  %c.fabs = call float @llvm.fabs.f32(float %c)
257  %max0 = call float @llvm.maximum.f32(float %a.fabs, float %b.fabs)
258  %max1 = call float @llvm.maximum.f32(float %max0, float %c.fabs)
259  ret float %max1
260}
261
262define float @v_fmaximum3_f32_fneg_all(float %a, float %b, float %c) {
263; GFX12-LABEL: v_fmaximum3_f32_fneg_all:
264; GFX12:       ; %bb.0:
265; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
266; GFX12-NEXT:    s_wait_expcnt 0x0
267; GFX12-NEXT:    s_wait_samplecnt 0x0
268; GFX12-NEXT:    s_wait_bvhcnt 0x0
269; GFX12-NEXT:    s_wait_kmcnt 0x0
270; GFX12-NEXT:    v_maximum3_f32 v0, -v0, -v1, -v2
271; GFX12-NEXT:    s_setpc_b64 s[30:31]
272;
273; GFX940-LABEL: v_fmaximum3_f32_fneg_all:
274; GFX940:       ; %bb.0:
275; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
276; GFX940-NEXT:    v_max_f32_e64 v3, -v0, -v1
277; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
278; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v1
279; GFX940-NEXT:    s_nop 1
280; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
281; GFX940-NEXT:    v_max_f32_e64 v1, v0, -v2
282; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v2
283; GFX940-NEXT:    s_nop 1
284; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
285; GFX940-NEXT:    s_setpc_b64 s[30:31]
286;
287; GFX950-LABEL: v_fmaximum3_f32_fneg_all:
288; GFX950:       ; %bb.0:
289; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
290; GFX950-NEXT:    v_maximum3_f32 v0, -v0, -v1, -v2
291; GFX950-NEXT:    s_setpc_b64 s[30:31]
292  %a.fneg = fneg float %a
293  %b.fneg = fneg float %b
294  %c.fneg = fneg float %c
295  %max0 = call float @llvm.maximum.f32(float %a.fneg, float %b.fneg)
296  %max1 = call float @llvm.maximum.f32(float %max0, float %c.fneg)
297  ret float %max1
298}
299
300define float @v_fmaximum3_f32_fneg_fabs_all(float %a, float %b, float %c) {
301; GFX12-LABEL: v_fmaximum3_f32_fneg_fabs_all:
302; GFX12:       ; %bb.0:
303; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
304; GFX12-NEXT:    s_wait_expcnt 0x0
305; GFX12-NEXT:    s_wait_samplecnt 0x0
306; GFX12-NEXT:    s_wait_bvhcnt 0x0
307; GFX12-NEXT:    s_wait_kmcnt 0x0
308; GFX12-NEXT:    v_maximum3_f32 v0, -|v0|, -|v1|, -|v2|
309; GFX12-NEXT:    s_setpc_b64 s[30:31]
310;
311; GFX940-LABEL: v_fmaximum3_f32_fneg_fabs_all:
312; GFX940:       ; %bb.0:
313; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
314; GFX940-NEXT:    v_max_f32_e64 v3, -|v0|, -|v1|
315; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
316; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, -|v0|, -|v1|
317; GFX940-NEXT:    s_nop 1
318; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
319; GFX940-NEXT:    v_max_f32_e64 v1, v0, -|v2|
320; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v0, -|v2|
321; GFX940-NEXT:    s_nop 1
322; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
323; GFX940-NEXT:    s_setpc_b64 s[30:31]
324;
325; GFX950-LABEL: v_fmaximum3_f32_fneg_fabs_all:
326; GFX950:       ; %bb.0:
327; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
328; GFX950-NEXT:    v_maximum3_f32 v0, -|v0|, -|v1|, -|v2|
329; GFX950-NEXT:    s_setpc_b64 s[30:31]
330  %a.fabs = call float @llvm.fabs.f32(float %a)
331  %b.fabs = call float @llvm.fabs.f32(float %b)
332  %c.fabs = call float @llvm.fabs.f32(float %c)
333  %a.fneg.fabs = fneg float %a.fabs
334  %b.fneg.fabs = fneg float %b.fabs
335  %c.fneg.fabs = fneg float %c.fabs
336  %max0 = call float @llvm.maximum.f32(float %a.fneg.fabs, float %b.fneg.fabs)
337  %max1 = call float @llvm.maximum.f32(float %max0, float %c.fneg.fabs)
338  ret float %max1
339}
340
341define float @v_fmaximum3_f32_fneg0(float %a, float %b, float %c) {
342; GFX12-LABEL: v_fmaximum3_f32_fneg0:
343; GFX12:       ; %bb.0:
344; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
345; GFX12-NEXT:    s_wait_expcnt 0x0
346; GFX12-NEXT:    s_wait_samplecnt 0x0
347; GFX12-NEXT:    s_wait_bvhcnt 0x0
348; GFX12-NEXT:    s_wait_kmcnt 0x0
349; GFX12-NEXT:    v_maximum3_f32 v0, -v0, v1, v2
350; GFX12-NEXT:    s_setpc_b64 s[30:31]
351;
352; GFX940-LABEL: v_fmaximum3_f32_fneg0:
353; GFX940:       ; %bb.0:
354; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
355; GFX940-NEXT:    v_max_f32_e64 v3, -v0, v1
356; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
357; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, -v0, v1
358; GFX940-NEXT:    s_nop 1
359; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
360; GFX940-NEXT:    v_max_f32_e32 v1, v0, v2
361; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
362; GFX940-NEXT:    s_nop 1
363; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
364; GFX940-NEXT:    s_setpc_b64 s[30:31]
365;
366; GFX950-LABEL: v_fmaximum3_f32_fneg0:
367; GFX950:       ; %bb.0:
368; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
369; GFX950-NEXT:    v_maximum3_f32 v0, -v0, v1, v2
370; GFX950-NEXT:    s_setpc_b64 s[30:31]
371  %a.fneg = fneg float %a
372  %max0 = call float @llvm.maximum.f32(float %a.fneg, float %b)
373  %max1 = call float @llvm.maximum.f32(float %max0, float %c)
374  ret float %max1
375}
376
377define float @v_fmaximum3_f32_fneg1(float %a, float %b, float %c) {
378; GFX12-LABEL: v_fmaximum3_f32_fneg1:
379; GFX12:       ; %bb.0:
380; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
381; GFX12-NEXT:    s_wait_expcnt 0x0
382; GFX12-NEXT:    s_wait_samplecnt 0x0
383; GFX12-NEXT:    s_wait_bvhcnt 0x0
384; GFX12-NEXT:    s_wait_kmcnt 0x0
385; GFX12-NEXT:    v_maximum3_f32 v0, v0, -v1, v2
386; GFX12-NEXT:    s_setpc_b64 s[30:31]
387;
388; GFX940-LABEL: v_fmaximum3_f32_fneg1:
389; GFX940:       ; %bb.0:
390; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
391; GFX940-NEXT:    v_max_f32_e64 v3, v0, -v1
392; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
393; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v1
394; GFX940-NEXT:    s_nop 1
395; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
396; GFX940-NEXT:    v_max_f32_e32 v1, v0, v2
397; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
398; GFX940-NEXT:    s_nop 1
399; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
400; GFX940-NEXT:    s_setpc_b64 s[30:31]
401;
402; GFX950-LABEL: v_fmaximum3_f32_fneg1:
403; GFX950:       ; %bb.0:
404; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
405; GFX950-NEXT:    v_maximum3_f32 v0, v0, -v1, v2
406; GFX950-NEXT:    s_setpc_b64 s[30:31]
407  %b.fneg = fneg float %b
408  %max0 = call float @llvm.maximum.f32(float %a, float %b.fneg)
409  %max1 = call float @llvm.maximum.f32(float %max0, float %c)
410  ret float %max1
411}
412
413define float @v_fmaximum3_f32_fneg2(float %a, float %b, float %c) {
414; GFX12-LABEL: v_fmaximum3_f32_fneg2:
415; GFX12:       ; %bb.0:
416; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
417; GFX12-NEXT:    s_wait_expcnt 0x0
418; GFX12-NEXT:    s_wait_samplecnt 0x0
419; GFX12-NEXT:    s_wait_bvhcnt 0x0
420; GFX12-NEXT:    s_wait_kmcnt 0x0
421; GFX12-NEXT:    v_maximum3_f32 v0, v0, v1, -v2
422; GFX12-NEXT:    s_setpc_b64 s[30:31]
423;
424; GFX940-LABEL: v_fmaximum3_f32_fneg2:
425; GFX940:       ; %bb.0:
426; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
427; GFX940-NEXT:    v_max_f32_e32 v3, v0, v1
428; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
429; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
430; GFX940-NEXT:    s_nop 1
431; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
432; GFX940-NEXT:    v_max_f32_e64 v1, v0, -v2
433; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v2
434; GFX940-NEXT:    s_nop 1
435; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
436; GFX940-NEXT:    s_setpc_b64 s[30:31]
437;
438; GFX950-LABEL: v_fmaximum3_f32_fneg2:
439; GFX950:       ; %bb.0:
440; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
441; GFX950-NEXT:    v_maximum3_f32 v0, v0, v1, -v2
442; GFX950-NEXT:    s_setpc_b64 s[30:31]
443  %c.fneg = fneg float %c
444  %max0 = call float @llvm.maximum.f32(float %a, float %b)
445  %max1 = call float @llvm.maximum.f32(float %max0, float %c.fneg)
446  ret float %max1
447}
448
449define float @v_fmaximum3_f32_const0(float %b, float %c) {
450; GFX12-LABEL: v_fmaximum3_f32_const0:
451; GFX12:       ; %bb.0:
452; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
453; GFX12-NEXT:    s_wait_expcnt 0x0
454; GFX12-NEXT:    s_wait_samplecnt 0x0
455; GFX12-NEXT:    s_wait_bvhcnt 0x0
456; GFX12-NEXT:    s_wait_kmcnt 0x0
457; GFX12-NEXT:    v_maximum3_f32 v0, v0, 0x41000000, v1
458; GFX12-NEXT:    s_setpc_b64 s[30:31]
459;
460; GFX940-LABEL: v_fmaximum3_f32_const0:
461; GFX940:       ; %bb.0:
462; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
463; GFX940-NEXT:    v_max_f32_e32 v2, 0x41000000, v0
464; GFX940-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
465; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
466; GFX940-NEXT:    s_nop 1
467; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
468; GFX940-NEXT:    v_max_f32_e32 v2, v0, v1
469; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
470; GFX940-NEXT:    s_nop 1
471; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
472; GFX940-NEXT:    s_setpc_b64 s[30:31]
473;
474; GFX950-LABEL: v_fmaximum3_f32_const0:
475; GFX950:       ; %bb.0:
476; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
477; GFX950-NEXT:    s_mov_b32 s0, 0x41000000
478; GFX950-NEXT:    v_maximum3_f32 v0, v0, s0, v1
479; GFX950-NEXT:    s_setpc_b64 s[30:31]
480  %max0 = call float @llvm.maximum.f32(float 8.0, float %b)
481  %max1 = call float @llvm.maximum.f32(float %max0, float %c)
482  ret float %max1
483}
484
485define float @v_fmaximum3_f32__const2(float %a, float %b) {
486; GFX12-LABEL: v_fmaximum3_f32__const2:
487; GFX12:       ; %bb.0:
488; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
489; GFX12-NEXT:    s_wait_expcnt 0x0
490; GFX12-NEXT:    s_wait_samplecnt 0x0
491; GFX12-NEXT:    s_wait_bvhcnt 0x0
492; GFX12-NEXT:    s_wait_kmcnt 0x0
493; GFX12-NEXT:    v_maximum3_f32 v0, v0, v1, 0x41000000
494; GFX12-NEXT:    s_setpc_b64 s[30:31]
495;
496; GFX940-LABEL: v_fmaximum3_f32__const2:
497; GFX940:       ; %bb.0:
498; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
499; GFX940-NEXT:    v_max_f32_e32 v2, v0, v1
500; GFX940-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
501; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
502; GFX940-NEXT:    s_nop 1
503; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
504; GFX940-NEXT:    v_max_f32_e32 v1, 0x41000000, v0
505; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
506; GFX940-NEXT:    s_nop 1
507; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
508; GFX940-NEXT:    s_setpc_b64 s[30:31]
509;
510; GFX950-LABEL: v_fmaximum3_f32__const2:
511; GFX950:       ; %bb.0:
512; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
513; GFX950-NEXT:    s_mov_b32 s0, 0x41000000
514; GFX950-NEXT:    v_maximum3_f32 v0, v0, v1, s0
515; GFX950-NEXT:    s_setpc_b64 s[30:31]
516  %max0 = call float @llvm.maximum.f32(float %a, float %b)
517  %max1 = call float @llvm.maximum.f32(float %max0, float 8.0)
518  ret float %max1
519}
520
521define float @v_fmaximum3_f32_inlineimm0(float %b, float %c) {
522; GFX12-LABEL: v_fmaximum3_f32_inlineimm0:
523; GFX12:       ; %bb.0:
524; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
525; GFX12-NEXT:    s_wait_expcnt 0x0
526; GFX12-NEXT:    s_wait_samplecnt 0x0
527; GFX12-NEXT:    s_wait_bvhcnt 0x0
528; GFX12-NEXT:    s_wait_kmcnt 0x0
529; GFX12-NEXT:    v_maximum3_f32 v0, v0, 4.0, v1
530; GFX12-NEXT:    s_setpc_b64 s[30:31]
531;
532; GFX940-LABEL: v_fmaximum3_f32_inlineimm0:
533; GFX940:       ; %bb.0:
534; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
535; GFX940-NEXT:    v_max_f32_e32 v2, 4.0, v0
536; GFX940-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
537; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
538; GFX940-NEXT:    s_nop 1
539; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
540; GFX940-NEXT:    v_max_f32_e32 v2, v0, v1
541; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
542; GFX940-NEXT:    s_nop 1
543; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
544; GFX940-NEXT:    s_setpc_b64 s[30:31]
545;
546; GFX950-LABEL: v_fmaximum3_f32_inlineimm0:
547; GFX950:       ; %bb.0:
548; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
549; GFX950-NEXT:    v_maximum3_f32 v0, v0, 4.0, v1
550; GFX950-NEXT:    s_setpc_b64 s[30:31]
551  %max0 = call float @llvm.maximum.f32(float 4.0, float %b)
552  %max1 = call float @llvm.maximum.f32(float %max0, float %c)
553  ret float %max1
554}
555
556define float @v_fmaximum3_f32__inlineimm(float %a, float %b) {
557; GFX12-LABEL: v_fmaximum3_f32__inlineimm:
558; GFX12:       ; %bb.0:
559; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
560; GFX12-NEXT:    s_wait_expcnt 0x0
561; GFX12-NEXT:    s_wait_samplecnt 0x0
562; GFX12-NEXT:    s_wait_bvhcnt 0x0
563; GFX12-NEXT:    s_wait_kmcnt 0x0
564; GFX12-NEXT:    v_maximum3_f32 v0, v0, v1, 4.0
565; GFX12-NEXT:    s_setpc_b64 s[30:31]
566;
567; GFX940-LABEL: v_fmaximum3_f32__inlineimm:
568; GFX940:       ; %bb.0:
569; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
570; GFX940-NEXT:    v_max_f32_e32 v2, v0, v1
571; GFX940-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
572; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
573; GFX940-NEXT:    s_nop 1
574; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
575; GFX940-NEXT:    v_max_f32_e32 v1, 4.0, v0
576; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
577; GFX940-NEXT:    s_nop 1
578; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
579; GFX940-NEXT:    s_setpc_b64 s[30:31]
580;
581; GFX950-LABEL: v_fmaximum3_f32__inlineimm:
582; GFX950:       ; %bb.0:
583; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
584; GFX950-NEXT:    v_maximum3_f32 v0, v0, v1, 4.0
585; GFX950-NEXT:    s_setpc_b64 s[30:31]
586  %max0 = call float @llvm.maximum.f32(float %a, float %b)
587  %max1 = call float @llvm.maximum.f32(float %max0, float 4.0)
588  ret float %max1
589}
590
591define float @v_fmaximum3_f32_const1_const2(float %a) {
592; GFX12-LABEL: v_fmaximum3_f32_const1_const2:
593; GFX12:       ; %bb.0:
594; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
595; GFX12-NEXT:    s_wait_expcnt 0x0
596; GFX12-NEXT:    s_wait_samplecnt 0x0
597; GFX12-NEXT:    s_wait_bvhcnt 0x0
598; GFX12-NEXT:    s_wait_kmcnt 0x0
599; GFX12-NEXT:    s_mov_b32 s0, 0x41000000
600; GFX12-NEXT:    s_wait_alu 0xfffe
601; GFX12-NEXT:    v_maximum3_f32 v0, v0, s0, 0x41800000
602; GFX12-NEXT:    s_setpc_b64 s[30:31]
603;
604; GFX940-LABEL: v_fmaximum3_f32_const1_const2:
605; GFX940:       ; %bb.0:
606; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
607; GFX940-NEXT:    v_max_f32_e32 v1, 0x41000000, v0
608; GFX940-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
609; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
610; GFX940-NEXT:    s_nop 1
611; GFX940-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
612; GFX940-NEXT:    v_max_f32_e32 v1, 0x41800000, v0
613; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
614; GFX940-NEXT:    s_nop 1
615; GFX940-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
616; GFX940-NEXT:    s_setpc_b64 s[30:31]
617;
618; GFX950-LABEL: v_fmaximum3_f32_const1_const2:
619; GFX950:       ; %bb.0:
620; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
621; GFX950-NEXT:    s_mov_b32 s0, 0x41000000
622; GFX950-NEXT:    v_mov_b32_e32 v1, 0x41800000
623; GFX950-NEXT:    v_maximum3_f32 v0, v0, s0, v1
624; GFX950-NEXT:    s_setpc_b64 s[30:31]
625  %max0 = call float @llvm.maximum.f32(float %a, float 8.0)
626  %max1 = call float @llvm.maximum.f32(float %max0, float 16.0)
627  ret float %max1
628}
629
630define <2 x float> @v_fmaximum3_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
631; GFX12-LABEL: v_fmaximum3_v2f32:
632; GFX12:       ; %bb.0:
633; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
634; GFX12-NEXT:    s_wait_expcnt 0x0
635; GFX12-NEXT:    s_wait_samplecnt 0x0
636; GFX12-NEXT:    s_wait_bvhcnt 0x0
637; GFX12-NEXT:    s_wait_kmcnt 0x0
638; GFX12-NEXT:    v_maximum3_f32 v0, v4, v0, v2
639; GFX12-NEXT:    v_maximum3_f32 v1, v5, v1, v3
640; GFX12-NEXT:    s_setpc_b64 s[30:31]
641;
642; GFX940-LABEL: v_fmaximum3_v2f32:
643; GFX940:       ; %bb.0:
644; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
645; GFX940-NEXT:    v_max_f32_e32 v6, v1, v3
646; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
647; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
648; GFX940-NEXT:    v_max_f32_e32 v3, v0, v2
649; GFX940-NEXT:    s_nop 0
650; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
651; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
652; GFX940-NEXT:    s_nop 1
653; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
654; GFX940-NEXT:    v_max_f32_e32 v2, v4, v0
655; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v4, v0
656; GFX940-NEXT:    s_nop 1
657; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
658; GFX940-NEXT:    v_max_f32_e32 v2, v5, v1
659; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v5, v1
660; GFX940-NEXT:    s_nop 1
661; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
662; GFX940-NEXT:    s_setpc_b64 s[30:31]
663;
664; GFX950-LABEL: v_fmaximum3_v2f32:
665; GFX950:       ; %bb.0:
666; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
667; GFX950-NEXT:    v_maximum3_f32 v0, v4, v0, v2
668; GFX950-NEXT:    v_maximum3_f32 v1, v5, v1, v3
669; GFX950-NEXT:    s_setpc_b64 s[30:31]
670  %max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b)
671  %max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %c, <2 x float> %max0)
672  ret <2 x float> %max1
673}
674
675define <2 x float> @v_fmaximum3_v2f32_commute(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
676; GFX12-LABEL: v_fmaximum3_v2f32_commute:
677; GFX12:       ; %bb.0:
678; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
679; GFX12-NEXT:    s_wait_expcnt 0x0
680; GFX12-NEXT:    s_wait_samplecnt 0x0
681; GFX12-NEXT:    s_wait_bvhcnt 0x0
682; GFX12-NEXT:    s_wait_kmcnt 0x0
683; GFX12-NEXT:    v_maximum3_f32 v0, v0, v2, v4
684; GFX12-NEXT:    v_maximum3_f32 v1, v1, v3, v5
685; GFX12-NEXT:    s_setpc_b64 s[30:31]
686;
687; GFX940-LABEL: v_fmaximum3_v2f32_commute:
688; GFX940:       ; %bb.0:
689; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
690; GFX940-NEXT:    v_max_f32_e32 v6, v1, v3
691; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
692; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
693; GFX940-NEXT:    v_max_f32_e32 v3, v0, v2
694; GFX940-NEXT:    s_nop 0
695; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
696; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
697; GFX940-NEXT:    s_nop 1
698; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
699; GFX940-NEXT:    v_max_f32_e32 v2, v0, v4
700; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
701; GFX940-NEXT:    s_nop 1
702; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
703; GFX940-NEXT:    v_max_f32_e32 v2, v1, v5
704; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v5
705; GFX940-NEXT:    s_nop 1
706; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
707; GFX940-NEXT:    s_setpc_b64 s[30:31]
708;
709; GFX950-LABEL: v_fmaximum3_v2f32_commute:
710; GFX950:       ; %bb.0:
711; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
712; GFX950-NEXT:    v_maximum3_f32 v0, v0, v2, v4
713; GFX950-NEXT:    v_maximum3_f32 v1, v1, v3, v5
714; GFX950-NEXT:    s_setpc_b64 s[30:31]
715  %max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b)
716  %max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %max0, <2 x float> %c)
717  ret <2 x float> %max1
718}
719
720define <2 x float> @v_fmaximum3_v2f32__fabs_all(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
721; GFX12-LABEL: v_fmaximum3_v2f32__fabs_all:
722; GFX12:       ; %bb.0:
723; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
724; GFX12-NEXT:    s_wait_expcnt 0x0
725; GFX12-NEXT:    s_wait_samplecnt 0x0
726; GFX12-NEXT:    s_wait_bvhcnt 0x0
727; GFX12-NEXT:    s_wait_kmcnt 0x0
728; GFX12-NEXT:    v_maximum3_f32 v0, |v0|, |v2|, |v4|
729; GFX12-NEXT:    v_maximum3_f32 v1, |v1|, |v3|, |v5|
730; GFX12-NEXT:    s_setpc_b64 s[30:31]
731;
732; GFX940-LABEL: v_fmaximum3_v2f32__fabs_all:
733; GFX940:       ; %bb.0:
734; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
735; GFX940-NEXT:    v_max_f32_e64 v6, |v1|, |v3|
736; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
737; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, |v1|, |v3|
738; GFX940-NEXT:    v_max_f32_e64 v3, |v0|, |v2|
739; GFX940-NEXT:    s_nop 0
740; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
741; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, |v0|, |v2|
742; GFX940-NEXT:    s_nop 1
743; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
744; GFX940-NEXT:    v_max_f32_e64 v2, v0, |v4|
745; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v4|
746; GFX940-NEXT:    s_nop 1
747; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
748; GFX940-NEXT:    v_max_f32_e64 v2, v1, |v5|
749; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v1, |v5|
750; GFX940-NEXT:    s_nop 1
751; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
752; GFX940-NEXT:    s_setpc_b64 s[30:31]
753;
754; GFX950-LABEL: v_fmaximum3_v2f32__fabs_all:
755; GFX950:       ; %bb.0:
756; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
757; GFX950-NEXT:    v_maximum3_f32 v0, |v0|, |v2|, |v4|
758; GFX950-NEXT:    v_maximum3_f32 v1, |v1|, |v3|, |v5|
759; GFX950-NEXT:    s_setpc_b64 s[30:31]
760  %a.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
761  %b.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b)
762  %c.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %c)
763  %max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a.fabs, <2 x float> %b.fabs)
764  %max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %max0, <2 x float> %c.fabs)
765  ret <2 x float> %max1
766}
767
768define <2 x float> @v_fmaximum3_v2f32__fneg_all(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
769; GFX12-LABEL: v_fmaximum3_v2f32__fneg_all:
770; GFX12:       ; %bb.0:
771; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
772; GFX12-NEXT:    s_wait_expcnt 0x0
773; GFX12-NEXT:    s_wait_samplecnt 0x0
774; GFX12-NEXT:    s_wait_bvhcnt 0x0
775; GFX12-NEXT:    s_wait_kmcnt 0x0
776; GFX12-NEXT:    v_maximum3_f32 v0, -v0, -v2, -v4
777; GFX12-NEXT:    v_maximum3_f32 v1, -v1, -v3, -v5
778; GFX12-NEXT:    s_setpc_b64 s[30:31]
779;
780; GFX940-LABEL: v_fmaximum3_v2f32__fneg_all:
781; GFX940:       ; %bb.0:
782; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
783; GFX940-NEXT:    v_max_f32_e64 v6, -v1, -v3
784; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
785; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, -v1, -v3
786; GFX940-NEXT:    v_max_f32_e64 v3, -v0, -v2
787; GFX940-NEXT:    s_nop 0
788; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
789; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v2
790; GFX940-NEXT:    s_nop 1
791; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
792; GFX940-NEXT:    v_max_f32_e64 v2, v0, -v4
793; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v4
794; GFX940-NEXT:    s_nop 1
795; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
796; GFX940-NEXT:    v_max_f32_e64 v2, v1, -v5
797; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v1, -v5
798; GFX940-NEXT:    s_nop 1
799; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
800; GFX940-NEXT:    s_setpc_b64 s[30:31]
801;
802; GFX950-LABEL: v_fmaximum3_v2f32__fneg_all:
803; GFX950:       ; %bb.0:
804; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
805; GFX950-NEXT:    v_maximum3_f32 v0, -v0, -v2, -v4
806; GFX950-NEXT:    v_maximum3_f32 v1, -v1, -v3, -v5
807; GFX950-NEXT:    s_setpc_b64 s[30:31]
808  %a.fneg = fneg <2 x float> %a
809  %b.fneg = fneg <2 x float> %b
810  %c.fneg = fneg <2 x float> %c
811  %max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a.fneg, <2 x float> %b.fneg)
812  %max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %max0, <2 x float> %c.fneg)
813  ret <2 x float> %max1
814}
815
816define <2 x float> @v_fmaximum3_v2f32__inlineimm1(<2 x float> %a, <2 x float> %c) {
817; GFX12-LABEL: v_fmaximum3_v2f32__inlineimm1:
818; GFX12:       ; %bb.0:
819; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
820; GFX12-NEXT:    s_wait_expcnt 0x0
821; GFX12-NEXT:    s_wait_samplecnt 0x0
822; GFX12-NEXT:    s_wait_bvhcnt 0x0
823; GFX12-NEXT:    s_wait_kmcnt 0x0
824; GFX12-NEXT:    v_maximum3_f32 v0, v0, 2.0, v2
825; GFX12-NEXT:    v_maximum3_f32 v1, v1, 2.0, v3
826; GFX12-NEXT:    s_setpc_b64 s[30:31]
827;
828; GFX940-LABEL: v_fmaximum3_v2f32__inlineimm1:
829; GFX940:       ; %bb.0:
830; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
831; GFX940-NEXT:    v_max_f32_e32 v4, 2.0, v1
832; GFX940-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
833; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v1
834; GFX940-NEXT:    s_nop 1
835; GFX940-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
836; GFX940-NEXT:    v_max_f32_e32 v4, 2.0, v0
837; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
838; GFX940-NEXT:    s_nop 1
839; GFX940-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
840; GFX940-NEXT:    v_max_f32_e32 v4, v0, v2
841; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
842; GFX940-NEXT:    v_max_f32_e32 v2, v1, v3
843; GFX940-NEXT:    s_nop 0
844; GFX940-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
845; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
846; GFX940-NEXT:    s_nop 1
847; GFX940-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
848; GFX940-NEXT:    s_setpc_b64 s[30:31]
849;
850; GFX950-LABEL: v_fmaximum3_v2f32__inlineimm1:
851; GFX950:       ; %bb.0:
852; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
853; GFX950-NEXT:    v_maximum3_f32 v0, v0, 2.0, v2
854; GFX950-NEXT:    v_maximum3_f32 v1, v1, 2.0, v3
855; GFX950-NEXT:    s_setpc_b64 s[30:31]
856  %max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> <float 2.0, float 2.0>)
857  %max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %max0, <2 x float> %c)
858  ret <2 x float> %max1
859}
860
861define <2 x float> @v_fmaximum3_v2f32__inlineimm2(<2 x float> %a, <2 x float> %b) {
862; GFX12-LABEL: v_fmaximum3_v2f32__inlineimm2:
863; GFX12:       ; %bb.0:
864; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
865; GFX12-NEXT:    s_wait_expcnt 0x0
866; GFX12-NEXT:    s_wait_samplecnt 0x0
867; GFX12-NEXT:    s_wait_bvhcnt 0x0
868; GFX12-NEXT:    s_wait_kmcnt 0x0
869; GFX12-NEXT:    v_maximum3_f32 v0, v0, v2, 4.0
870; GFX12-NEXT:    v_maximum3_f32 v1, v1, v3, 4.0
871; GFX12-NEXT:    s_setpc_b64 s[30:31]
872;
873; GFX940-LABEL: v_fmaximum3_v2f32__inlineimm2:
874; GFX940:       ; %bb.0:
875; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
876; GFX940-NEXT:    v_max_f32_e32 v4, v1, v3
877; GFX940-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
878; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
879; GFX940-NEXT:    v_max_f32_e32 v3, v0, v2
880; GFX940-NEXT:    s_nop 0
881; GFX940-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
882; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
883; GFX940-NEXT:    s_nop 1
884; GFX940-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
885; GFX940-NEXT:    v_max_f32_e32 v2, 4.0, v0
886; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
887; GFX940-NEXT:    s_nop 1
888; GFX940-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
889; GFX940-NEXT:    v_max_f32_e32 v2, 4.0, v1
890; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v1
891; GFX940-NEXT:    s_nop 1
892; GFX940-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
893; GFX940-NEXT:    s_setpc_b64 s[30:31]
894;
895; GFX950-LABEL: v_fmaximum3_v2f32__inlineimm2:
896; GFX950:       ; %bb.0:
897; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
898; GFX950-NEXT:    v_maximum3_f32 v0, v0, v2, 4.0
899; GFX950-NEXT:    v_maximum3_f32 v1, v1, v3, 4.0
900; GFX950-NEXT:    s_setpc_b64 s[30:31]
901  %max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b)
902  %max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %max0, <2 x float> <float 4.0, float 4.0>)
903  ret <2 x float> %max1
904}
905
906define <3 x float> @v_fmaximum3_v3f32(<3 x float> %a, <3 x float> %b, <3 x float> %c) {
907; GFX12-LABEL: v_fmaximum3_v3f32:
908; GFX12:       ; %bb.0:
909; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
910; GFX12-NEXT:    s_wait_expcnt 0x0
911; GFX12-NEXT:    s_wait_samplecnt 0x0
912; GFX12-NEXT:    s_wait_bvhcnt 0x0
913; GFX12-NEXT:    s_wait_kmcnt 0x0
914; GFX12-NEXT:    v_maximum3_f32 v0, v6, v0, v3
915; GFX12-NEXT:    v_maximum3_f32 v1, v7, v1, v4
916; GFX12-NEXT:    v_maximum3_f32 v2, v8, v2, v5
917; GFX12-NEXT:    s_setpc_b64 s[30:31]
918;
919; GFX940-LABEL: v_fmaximum3_v3f32:
920; GFX940:       ; %bb.0:
921; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
922; GFX940-NEXT:    v_max_f32_e32 v9, v2, v5
923; GFX940-NEXT:    v_mov_b32_e32 v10, 0x7fc00000
924; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
925; GFX940-NEXT:    v_max_f32_e32 v5, v1, v4
926; GFX940-NEXT:    s_nop 0
927; GFX940-NEXT:    v_cndmask_b32_e32 v2, v10, v9, vcc
928; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
929; GFX940-NEXT:    v_max_f32_e32 v4, v0, v3
930; GFX940-NEXT:    s_nop 0
931; GFX940-NEXT:    v_cndmask_b32_e32 v1, v10, v5, vcc
932; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
933; GFX940-NEXT:    s_nop 1
934; GFX940-NEXT:    v_cndmask_b32_e32 v0, v10, v4, vcc
935; GFX940-NEXT:    v_max_f32_e32 v3, v6, v0
936; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v6, v0
937; GFX940-NEXT:    s_nop 1
938; GFX940-NEXT:    v_cndmask_b32_e32 v0, v10, v3, vcc
939; GFX940-NEXT:    v_max_f32_e32 v3, v7, v1
940; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v7, v1
941; GFX940-NEXT:    s_nop 1
942; GFX940-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
943; GFX940-NEXT:    v_max_f32_e32 v3, v8, v2
944; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v8, v2
945; GFX940-NEXT:    s_nop 1
946; GFX940-NEXT:    v_cndmask_b32_e32 v2, v10, v3, vcc
947; GFX940-NEXT:    s_setpc_b64 s[30:31]
948;
949; GFX950-LABEL: v_fmaximum3_v3f32:
950; GFX950:       ; %bb.0:
951; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
952; GFX950-NEXT:    v_maximum3_f32 v0, v6, v0, v3
953; GFX950-NEXT:    v_maximum3_f32 v1, v7, v1, v4
954; GFX950-NEXT:    v_maximum3_f32 v2, v8, v2, v5
955; GFX950-NEXT:    s_setpc_b64 s[30:31]
956  %max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> %b)
957  %max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %c, <3 x float> %max0)
958  ret <3 x float> %max1
959}
960
961define <3 x float> @v_fmaximum3_v3f32_commute(<3 x float> %a, <3 x float> %b, <3 x float> %c) {
962; GFX12-LABEL: v_fmaximum3_v3f32_commute:
963; GFX12:       ; %bb.0:
964; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
965; GFX12-NEXT:    s_wait_expcnt 0x0
966; GFX12-NEXT:    s_wait_samplecnt 0x0
967; GFX12-NEXT:    s_wait_bvhcnt 0x0
968; GFX12-NEXT:    s_wait_kmcnt 0x0
969; GFX12-NEXT:    v_maximum3_f32 v0, v0, v3, v6
970; GFX12-NEXT:    v_maximum3_f32 v1, v1, v4, v7
971; GFX12-NEXT:    v_maximum3_f32 v2, v2, v5, v8
972; GFX12-NEXT:    s_setpc_b64 s[30:31]
973;
974; GFX940-LABEL: v_fmaximum3_v3f32_commute:
975; GFX940:       ; %bb.0:
976; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
977; GFX940-NEXT:    v_max_f32_e32 v9, v2, v5
978; GFX940-NEXT:    v_mov_b32_e32 v10, 0x7fc00000
979; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
980; GFX940-NEXT:    v_max_f32_e32 v5, v1, v4
981; GFX940-NEXT:    s_nop 0
982; GFX940-NEXT:    v_cndmask_b32_e32 v2, v10, v9, vcc
983; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
984; GFX940-NEXT:    v_max_f32_e32 v4, v0, v3
985; GFX940-NEXT:    s_nop 0
986; GFX940-NEXT:    v_cndmask_b32_e32 v1, v10, v5, vcc
987; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
988; GFX940-NEXT:    s_nop 1
989; GFX940-NEXT:    v_cndmask_b32_e32 v0, v10, v4, vcc
990; GFX940-NEXT:    v_max_f32_e32 v3, v0, v6
991; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v6
992; GFX940-NEXT:    s_nop 1
993; GFX940-NEXT:    v_cndmask_b32_e32 v0, v10, v3, vcc
994; GFX940-NEXT:    v_max_f32_e32 v3, v1, v7
995; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v7
996; GFX940-NEXT:    s_nop 1
997; GFX940-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
998; GFX940-NEXT:    v_max_f32_e32 v3, v2, v8
999; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v2, v8
1000; GFX940-NEXT:    s_nop 1
1001; GFX940-NEXT:    v_cndmask_b32_e32 v2, v10, v3, vcc
1002; GFX940-NEXT:    s_setpc_b64 s[30:31]
1003;
1004; GFX950-LABEL: v_fmaximum3_v3f32_commute:
1005; GFX950:       ; %bb.0:
1006; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1007; GFX950-NEXT:    v_maximum3_f32 v0, v0, v3, v6
1008; GFX950-NEXT:    v_maximum3_f32 v1, v1, v4, v7
1009; GFX950-NEXT:    v_maximum3_f32 v2, v2, v5, v8
1010; GFX950-NEXT:    s_setpc_b64 s[30:31]
1011  %max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> %b)
1012  %max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %max0, <3 x float> %c)
1013  ret <3 x float> %max1
1014}
1015
1016define <3 x float> @v_fmaximum3_v3f32__fabs_all(<3 x float> %a, <3 x float> %b, <3 x float> %c) {
1017; GFX12-LABEL: v_fmaximum3_v3f32__fabs_all:
1018; GFX12:       ; %bb.0:
1019; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1020; GFX12-NEXT:    s_wait_expcnt 0x0
1021; GFX12-NEXT:    s_wait_samplecnt 0x0
1022; GFX12-NEXT:    s_wait_bvhcnt 0x0
1023; GFX12-NEXT:    s_wait_kmcnt 0x0
1024; GFX12-NEXT:    v_maximum3_f32 v0, |v0|, |v3|, |v6|
1025; GFX12-NEXT:    v_maximum3_f32 v1, |v1|, |v4|, |v7|
1026; GFX12-NEXT:    v_maximum3_f32 v2, |v2|, |v5|, |v8|
1027; GFX12-NEXT:    s_setpc_b64 s[30:31]
1028;
1029; GFX940-LABEL: v_fmaximum3_v3f32__fabs_all:
1030; GFX940:       ; %bb.0:
1031; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1032; GFX940-NEXT:    v_max_f32_e64 v9, |v2|, |v5|
1033; GFX940-NEXT:    v_mov_b32_e32 v10, 0x7fc00000
1034; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, |v2|, |v5|
1035; GFX940-NEXT:    v_max_f32_e64 v5, |v1|, |v4|
1036; GFX940-NEXT:    s_nop 0
1037; GFX940-NEXT:    v_cndmask_b32_e32 v2, v10, v9, vcc
1038; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, |v1|, |v4|
1039; GFX940-NEXT:    v_max_f32_e64 v4, |v0|, |v3|
1040; GFX940-NEXT:    s_nop 0
1041; GFX940-NEXT:    v_cndmask_b32_e32 v1, v10, v5, vcc
1042; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, |v0|, |v3|
1043; GFX940-NEXT:    s_nop 1
1044; GFX940-NEXT:    v_cndmask_b32_e32 v0, v10, v4, vcc
1045; GFX940-NEXT:    v_max_f32_e64 v3, v0, |v6|
1046; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v0, |v6|
1047; GFX940-NEXT:    s_nop 1
1048; GFX940-NEXT:    v_cndmask_b32_e32 v0, v10, v3, vcc
1049; GFX940-NEXT:    v_max_f32_e64 v3, v1, |v7|
1050; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v1, |v7|
1051; GFX940-NEXT:    s_nop 1
1052; GFX940-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
1053; GFX940-NEXT:    v_max_f32_e64 v3, v2, |v8|
1054; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v2, |v8|
1055; GFX940-NEXT:    s_nop 1
1056; GFX940-NEXT:    v_cndmask_b32_e32 v2, v10, v3, vcc
1057; GFX940-NEXT:    s_setpc_b64 s[30:31]
1058;
1059; GFX950-LABEL: v_fmaximum3_v3f32__fabs_all:
1060; GFX950:       ; %bb.0:
1061; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1062; GFX950-NEXT:    v_maximum3_f32 v0, |v0|, |v3|, |v6|
1063; GFX950-NEXT:    v_maximum3_f32 v1, |v1|, |v4|, |v7|
1064; GFX950-NEXT:    v_maximum3_f32 v2, |v2|, |v5|, |v8|
1065; GFX950-NEXT:    s_setpc_b64 s[30:31]
1066  %a.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %a)
1067  %b.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %b)
1068  %c.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %c)
1069  %max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a.fabs, <3 x float> %b.fabs)
1070  %max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %max0, <3 x float> %c.fabs)
1071  ret <3 x float> %max1
1072}
1073
1074define <3 x float> @v_fmaximum3_v3f32__fneg_all(<3 x float> %a, <3 x float> %b, <3 x float> %c) {
1075; GFX12-LABEL: v_fmaximum3_v3f32__fneg_all:
1076; GFX12:       ; %bb.0:
1077; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1078; GFX12-NEXT:    s_wait_expcnt 0x0
1079; GFX12-NEXT:    s_wait_samplecnt 0x0
1080; GFX12-NEXT:    s_wait_bvhcnt 0x0
1081; GFX12-NEXT:    s_wait_kmcnt 0x0
1082; GFX12-NEXT:    v_maximum3_f32 v0, -v0, -v3, -v6
1083; GFX12-NEXT:    v_maximum3_f32 v1, -v1, -v4, -v7
1084; GFX12-NEXT:    v_maximum3_f32 v2, -v2, -v5, -v8
1085; GFX12-NEXT:    s_setpc_b64 s[30:31]
1086;
1087; GFX940-LABEL: v_fmaximum3_v3f32__fneg_all:
1088; GFX940:       ; %bb.0:
1089; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1090; GFX940-NEXT:    v_max_f32_e64 v9, -v2, -v5
1091; GFX940-NEXT:    v_mov_b32_e32 v10, 0x7fc00000
1092; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, -v2, -v5
1093; GFX940-NEXT:    v_max_f32_e64 v5, -v1, -v4
1094; GFX940-NEXT:    s_nop 0
1095; GFX940-NEXT:    v_cndmask_b32_e32 v2, v10, v9, vcc
1096; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, -v1, -v4
1097; GFX940-NEXT:    v_max_f32_e64 v4, -v0, -v3
1098; GFX940-NEXT:    s_nop 0
1099; GFX940-NEXT:    v_cndmask_b32_e32 v1, v10, v5, vcc
1100; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v3
1101; GFX940-NEXT:    s_nop 1
1102; GFX940-NEXT:    v_cndmask_b32_e32 v0, v10, v4, vcc
1103; GFX940-NEXT:    v_max_f32_e64 v3, v0, -v6
1104; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v0, -v6
1105; GFX940-NEXT:    s_nop 1
1106; GFX940-NEXT:    v_cndmask_b32_e32 v0, v10, v3, vcc
1107; GFX940-NEXT:    v_max_f32_e64 v3, v1, -v7
1108; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v1, -v7
1109; GFX940-NEXT:    s_nop 1
1110; GFX940-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
1111; GFX940-NEXT:    v_max_f32_e64 v3, v2, -v8
1112; GFX940-NEXT:    v_cmp_o_f32_e64 vcc, v2, -v8
1113; GFX940-NEXT:    s_nop 1
1114; GFX940-NEXT:    v_cndmask_b32_e32 v2, v10, v3, vcc
1115; GFX940-NEXT:    s_setpc_b64 s[30:31]
1116;
1117; GFX950-LABEL: v_fmaximum3_v3f32__fneg_all:
1118; GFX950:       ; %bb.0:
1119; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1120; GFX950-NEXT:    v_maximum3_f32 v0, -v0, -v3, -v6
1121; GFX950-NEXT:    v_maximum3_f32 v1, -v1, -v4, -v7
1122; GFX950-NEXT:    v_maximum3_f32 v2, -v2, -v5, -v8
1123; GFX950-NEXT:    s_setpc_b64 s[30:31]
1124  %a.fneg = fneg <3 x float> %a
1125  %b.fneg = fneg <3 x float> %b
1126  %c.fneg = fneg <3 x float> %c
1127  %max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a.fneg, <3 x float> %b.fneg)
1128  %max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %max0, <3 x float> %c.fneg)
1129  ret <3 x float> %max1
1130}
1131
1132define <3 x float> @v_fmaximum3_v3f32__inlineimm1(<3 x float> %a, <3 x float> %c) {
1133; GFX12-LABEL: v_fmaximum3_v3f32__inlineimm1:
1134; GFX12:       ; %bb.0:
1135; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1136; GFX12-NEXT:    s_wait_expcnt 0x0
1137; GFX12-NEXT:    s_wait_samplecnt 0x0
1138; GFX12-NEXT:    s_wait_bvhcnt 0x0
1139; GFX12-NEXT:    s_wait_kmcnt 0x0
1140; GFX12-NEXT:    v_maximum3_f32 v0, v0, 2.0, v3
1141; GFX12-NEXT:    v_maximum3_f32 v1, v1, 2.0, v4
1142; GFX12-NEXT:    v_maximum3_f32 v2, v2, 2.0, v5
1143; GFX12-NEXT:    s_setpc_b64 s[30:31]
1144;
1145; GFX940-LABEL: v_fmaximum3_v3f32__inlineimm1:
1146; GFX940:       ; %bb.0:
1147; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1148; GFX940-NEXT:    v_max_f32_e32 v6, 2.0, v2
1149; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
1150; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v2, v2
1151; GFX940-NEXT:    s_nop 1
1152; GFX940-NEXT:    v_cndmask_b32_e32 v2, v7, v6, vcc
1153; GFX940-NEXT:    v_max_f32_e32 v6, 2.0, v1
1154; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v1
1155; GFX940-NEXT:    s_nop 1
1156; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
1157; GFX940-NEXT:    v_max_f32_e32 v6, 2.0, v0
1158; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
1159; GFX940-NEXT:    s_nop 1
1160; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
1161; GFX940-NEXT:    v_max_f32_e32 v6, v0, v3
1162; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
1163; GFX940-NEXT:    v_max_f32_e32 v3, v1, v4
1164; GFX940-NEXT:    s_nop 0
1165; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
1166; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
1167; GFX940-NEXT:    s_nop 1
1168; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
1169; GFX940-NEXT:    v_max_f32_e32 v3, v2, v5
1170; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
1171; GFX940-NEXT:    s_nop 1
1172; GFX940-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
1173; GFX940-NEXT:    s_setpc_b64 s[30:31]
1174;
1175; GFX950-LABEL: v_fmaximum3_v3f32__inlineimm1:
1176; GFX950:       ; %bb.0:
1177; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1178; GFX950-NEXT:    v_maximum3_f32 v0, v0, 2.0, v3
1179; GFX950-NEXT:    v_maximum3_f32 v1, v1, 2.0, v4
1180; GFX950-NEXT:    v_maximum3_f32 v2, v2, 2.0, v5
1181; GFX950-NEXT:    s_setpc_b64 s[30:31]
1182  %max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> <float 2.0, float 2.0, float 2.0>)
1183  %max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %max0, <3 x float> %c)
1184  ret <3 x float> %max1
1185}
1186
1187define <3 x float> @v_fmaximum3_v3f32__inlineimm2(<3 x float> %a, <3 x float> %b) {
1188; GFX12-LABEL: v_fmaximum3_v3f32__inlineimm2:
1189; GFX12:       ; %bb.0:
1190; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1191; GFX12-NEXT:    s_wait_expcnt 0x0
1192; GFX12-NEXT:    s_wait_samplecnt 0x0
1193; GFX12-NEXT:    s_wait_bvhcnt 0x0
1194; GFX12-NEXT:    s_wait_kmcnt 0x0
1195; GFX12-NEXT:    v_maximum3_f32 v0, v0, v3, 4.0
1196; GFX12-NEXT:    v_maximum3_f32 v1, v1, v4, 4.0
1197; GFX12-NEXT:    v_maximum3_f32 v2, v2, v5, 4.0
1198; GFX12-NEXT:    s_setpc_b64 s[30:31]
1199;
1200; GFX940-LABEL: v_fmaximum3_v3f32__inlineimm2:
1201; GFX940:       ; %bb.0:
1202; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1203; GFX940-NEXT:    v_max_f32_e32 v6, v2, v5
1204; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
1205; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
1206; GFX940-NEXT:    v_max_f32_e32 v5, v1, v4
1207; GFX940-NEXT:    s_nop 0
1208; GFX940-NEXT:    v_cndmask_b32_e32 v2, v7, v6, vcc
1209; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
1210; GFX940-NEXT:    v_max_f32_e32 v4, v0, v3
1211; GFX940-NEXT:    s_nop 0
1212; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v5, vcc
1213; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
1214; GFX940-NEXT:    s_nop 1
1215; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v4, vcc
1216; GFX940-NEXT:    v_max_f32_e32 v3, 4.0, v0
1217; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
1218; GFX940-NEXT:    s_nop 1
1219; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
1220; GFX940-NEXT:    v_max_f32_e32 v3, 4.0, v1
1221; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v1, v1
1222; GFX940-NEXT:    s_nop 1
1223; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
1224; GFX940-NEXT:    v_max_f32_e32 v3, 4.0, v2
1225; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v2, v2
1226; GFX940-NEXT:    s_nop 1
1227; GFX940-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
1228; GFX940-NEXT:    s_setpc_b64 s[30:31]
1229;
1230; GFX950-LABEL: v_fmaximum3_v3f32__inlineimm2:
1231; GFX950:       ; %bb.0:
1232; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1233; GFX950-NEXT:    v_maximum3_f32 v0, v0, v3, 4.0
1234; GFX950-NEXT:    v_maximum3_f32 v1, v1, v4, 4.0
1235; GFX950-NEXT:    v_maximum3_f32 v2, v2, v5, 4.0
1236; GFX950-NEXT:    s_setpc_b64 s[30:31]
1237  %max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> %b)
1238  %max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %max0, <3 x float> <float 4.0, float 4.0, float 4.0>)
1239  ret <3 x float> %max1
1240}
1241
1242
1243define half @v_fmaximum3_f16(half %a, half %b, half %c) {
1244; GFX12-LABEL: v_fmaximum3_f16:
1245; GFX12:       ; %bb.0:
1246; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1247; GFX12-NEXT:    s_wait_expcnt 0x0
1248; GFX12-NEXT:    s_wait_samplecnt 0x0
1249; GFX12-NEXT:    s_wait_bvhcnt 0x0
1250; GFX12-NEXT:    s_wait_kmcnt 0x0
1251; GFX12-NEXT:    v_maximum3_f16 v0, v0, v1, v2
1252; GFX12-NEXT:    s_setpc_b64 s[30:31]
1253;
1254; GFX9-LABEL: v_fmaximum3_f16:
1255; GFX9:       ; %bb.0:
1256; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1257; GFX9-NEXT:    v_max_f16_e32 v3, v0, v1
1258; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7e00
1259; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
1260; GFX9-NEXT:    s_nop 1
1261; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
1262; GFX9-NEXT:    v_max_f16_e32 v1, v0, v2
1263; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
1264; GFX9-NEXT:    s_nop 1
1265; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
1266; GFX9-NEXT:    s_setpc_b64 s[30:31]
1267  %max0 = call half @llvm.maximum.f16(half %a, half %b)
1268  %max1 = call half @llvm.maximum.f16(half %max0, half %c)
1269  ret half %max1
1270}
1271
1272define half @v_fmaximum3_f16_commute(half %a, half %b, half %c) {
1273; GFX12-LABEL: v_fmaximum3_f16_commute:
1274; GFX12:       ; %bb.0:
1275; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1276; GFX12-NEXT:    s_wait_expcnt 0x0
1277; GFX12-NEXT:    s_wait_samplecnt 0x0
1278; GFX12-NEXT:    s_wait_bvhcnt 0x0
1279; GFX12-NEXT:    s_wait_kmcnt 0x0
1280; GFX12-NEXT:    v_maximum3_f16 v0, v2, v0, v1
1281; GFX12-NEXT:    s_setpc_b64 s[30:31]
1282;
1283; GFX9-LABEL: v_fmaximum3_f16_commute:
1284; GFX9:       ; %bb.0:
1285; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1286; GFX9-NEXT:    v_max_f16_e32 v3, v0, v1
1287; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7e00
1288; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
1289; GFX9-NEXT:    s_nop 1
1290; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
1291; GFX9-NEXT:    v_max_f16_e32 v1, v2, v0
1292; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v2, v0
1293; GFX9-NEXT:    s_nop 1
1294; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
1295; GFX9-NEXT:    s_setpc_b64 s[30:31]
1296  %max0 = call half @llvm.maximum.f16(half %a, half %b)
1297  %max1 = call half @llvm.maximum.f16(half %c, half %max0)
1298  ret half %max1
1299}
1300
1301define amdgpu_ps i32 @s_fmaximum3_f16(half inreg %a, half inreg %b, half inreg %c) {
1302; GFX12-LABEL: s_fmaximum3_f16:
1303; GFX12:       ; %bb.0:
1304; GFX12-NEXT:    v_mov_b32_e32 v0, s2
1305; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1306; GFX12-NEXT:    v_maximum3_f16 v0, s0, s1, v0
1307; GFX12-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1308; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1309; GFX12-NEXT:    v_readfirstlane_b32 s0, v0
1310; GFX12-NEXT:    ; return to shader part epilog
1311;
1312; GFX9-LABEL: s_fmaximum3_f16:
1313; GFX9:       ; %bb.0:
1314; GFX9-NEXT:    v_mov_b32_e32 v0, s1
1315; GFX9-NEXT:    v_max_f16_e32 v1, s0, v0
1316; GFX9-NEXT:    v_mov_b32_e32 v2, 0x7e00
1317; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, s0, v0
1318; GFX9-NEXT:    s_nop 1
1319; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
1320; GFX9-NEXT:    v_max_f16_e32 v1, s2, v0
1321; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, s2, v0
1322; GFX9-NEXT:    s_nop 1
1323; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
1324; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1325; GFX9-NEXT:    s_nop 0
1326; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
1327; GFX9-NEXT:    ; return to shader part epilog
1328  %max0 = call half @llvm.maximum.f16(half %a, half %b)
1329  %max1 = call half @llvm.maximum.f16(half %max0, half %c)
1330  %cast = bitcast half %max1 to i16
1331  %zext = zext i16 %cast to i32
1332  %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %zext)
1333  ret i32 %readfirstlane
1334}
1335
1336define half @v_fmaximum3_f16_fabs0(half %a, half %b, half %c) {
1337; GFX12-LABEL: v_fmaximum3_f16_fabs0:
1338; GFX12:       ; %bb.0:
1339; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1340; GFX12-NEXT:    s_wait_expcnt 0x0
1341; GFX12-NEXT:    s_wait_samplecnt 0x0
1342; GFX12-NEXT:    s_wait_bvhcnt 0x0
1343; GFX12-NEXT:    s_wait_kmcnt 0x0
1344; GFX12-NEXT:    v_maximum3_f16 v0, |v0|, v1, v2
1345; GFX12-NEXT:    s_setpc_b64 s[30:31]
1346;
1347; GFX9-LABEL: v_fmaximum3_f16_fabs0:
1348; GFX9:       ; %bb.0:
1349; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1350; GFX9-NEXT:    v_max_f16_e64 v3, |v0|, v1
1351; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7e00
1352; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, v1
1353; GFX9-NEXT:    s_nop 1
1354; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
1355; GFX9-NEXT:    v_max_f16_e32 v1, v0, v2
1356; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
1357; GFX9-NEXT:    s_nop 1
1358; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
1359; GFX9-NEXT:    s_setpc_b64 s[30:31]
1360  %a.fabs = call half @llvm.fabs.f16(half %a)
1361  %max0 = call half @llvm.maximum.f16(half %a.fabs, half %b)
1362  %max1 = call half @llvm.maximum.f16(half %max0, half %c)
1363  ret half %max1
1364}
1365
1366define half @v_fmaximum3_f16_fabs1(half %a, half %b, half %c) {
1367; GFX12-LABEL: v_fmaximum3_f16_fabs1:
1368; GFX12:       ; %bb.0:
1369; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1370; GFX12-NEXT:    s_wait_expcnt 0x0
1371; GFX12-NEXT:    s_wait_samplecnt 0x0
1372; GFX12-NEXT:    s_wait_bvhcnt 0x0
1373; GFX12-NEXT:    s_wait_kmcnt 0x0
1374; GFX12-NEXT:    v_maximum3_f16 v0, v0, |v1|, v2
1375; GFX12-NEXT:    s_setpc_b64 s[30:31]
1376;
1377; GFX9-LABEL: v_fmaximum3_f16_fabs1:
1378; GFX9:       ; %bb.0:
1379; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1380; GFX9-NEXT:    v_max_f16_e64 v3, v0, |v1|
1381; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7e00
1382; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v1|
1383; GFX9-NEXT:    s_nop 1
1384; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
1385; GFX9-NEXT:    v_max_f16_e32 v1, v0, v2
1386; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
1387; GFX9-NEXT:    s_nop 1
1388; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
1389; GFX9-NEXT:    s_setpc_b64 s[30:31]
1390  %b.fabs = call half @llvm.fabs.f16(half %b)
1391  %max0 = call half @llvm.maximum.f16(half %a, half %b.fabs)
1392  %max1 = call half @llvm.maximum.f16(half %max0, half %c)
1393  ret half %max1
1394}
1395
1396define half @v_fmaximum3_f16_fabs2(half %a, half %b, half %c) {
1397; GFX12-LABEL: v_fmaximum3_f16_fabs2:
1398; GFX12:       ; %bb.0:
1399; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1400; GFX12-NEXT:    s_wait_expcnt 0x0
1401; GFX12-NEXT:    s_wait_samplecnt 0x0
1402; GFX12-NEXT:    s_wait_bvhcnt 0x0
1403; GFX12-NEXT:    s_wait_kmcnt 0x0
1404; GFX12-NEXT:    v_maximum3_f16 v0, v0, v1, |v2|
1405; GFX12-NEXT:    s_setpc_b64 s[30:31]
1406;
1407; GFX9-LABEL: v_fmaximum3_f16_fabs2:
1408; GFX9:       ; %bb.0:
1409; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1410; GFX9-NEXT:    v_max_f16_e32 v3, v0, v1
1411; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7e00
1412; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
1413; GFX9-NEXT:    s_nop 1
1414; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
1415; GFX9-NEXT:    v_max_f16_e64 v1, v0, |v2|
1416; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v2|
1417; GFX9-NEXT:    s_nop 1
1418; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
1419; GFX9-NEXT:    s_setpc_b64 s[30:31]
1420  %c.fabs = call half @llvm.fabs.f16(half %c)
1421  %max0 = call half @llvm.maximum.f16(half %a, half %b)
1422  %max1 = call half @llvm.maximum.f16(half %max0, half %c.fabs)
1423  ret half %max1
1424}
1425
1426define half @v_fmaximum3_f16_fabs_all(half %a, half %b, half %c) {
1427; GFX12-LABEL: v_fmaximum3_f16_fabs_all:
1428; GFX12:       ; %bb.0:
1429; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1430; GFX12-NEXT:    s_wait_expcnt 0x0
1431; GFX12-NEXT:    s_wait_samplecnt 0x0
1432; GFX12-NEXT:    s_wait_bvhcnt 0x0
1433; GFX12-NEXT:    s_wait_kmcnt 0x0
1434; GFX12-NEXT:    v_maximum3_f16 v0, |v0|, |v1|, |v2|
1435; GFX12-NEXT:    s_setpc_b64 s[30:31]
1436;
1437; GFX9-LABEL: v_fmaximum3_f16_fabs_all:
1438; GFX9:       ; %bb.0:
1439; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1440; GFX9-NEXT:    v_max_f16_e64 v3, |v0|, |v1|
1441; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7e00
1442; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v1|
1443; GFX9-NEXT:    s_nop 1
1444; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
1445; GFX9-NEXT:    v_max_f16_e64 v1, v0, |v2|
1446; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v2|
1447; GFX9-NEXT:    s_nop 1
1448; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
1449; GFX9-NEXT:    s_setpc_b64 s[30:31]
1450  %a.fabs = call half @llvm.fabs.f16(half %a)
1451  %b.fabs = call half @llvm.fabs.f16(half %b)
1452  %c.fabs = call half @llvm.fabs.f16(half %c)
1453  %max0 = call half @llvm.maximum.f16(half %a.fabs, half %b.fabs)
1454  %max1 = call half @llvm.maximum.f16(half %max0, half %c.fabs)
1455  ret half %max1
1456}
1457
1458define half @v_fmaximum3_f16_fneg_all(half %a, half %b, half %c) {
1459; GFX12-LABEL: v_fmaximum3_f16_fneg_all:
1460; GFX12:       ; %bb.0:
1461; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1462; GFX12-NEXT:    s_wait_expcnt 0x0
1463; GFX12-NEXT:    s_wait_samplecnt 0x0
1464; GFX12-NEXT:    s_wait_bvhcnt 0x0
1465; GFX12-NEXT:    s_wait_kmcnt 0x0
1466; GFX12-NEXT:    v_maximum3_f16 v0, -v0, -v1, -v2
1467; GFX12-NEXT:    s_setpc_b64 s[30:31]
1468;
1469; GFX9-LABEL: v_fmaximum3_f16_fneg_all:
1470; GFX9:       ; %bb.0:
1471; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1472; GFX9-NEXT:    v_max_f16_e64 v3, -v0, -v1
1473; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7e00
1474; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v1
1475; GFX9-NEXT:    s_nop 1
1476; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
1477; GFX9-NEXT:    v_max_f16_e64 v1, v0, -v2
1478; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, v0, -v2
1479; GFX9-NEXT:    s_nop 1
1480; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
1481; GFX9-NEXT:    s_setpc_b64 s[30:31]
1482  %a.fneg = fneg half %a
1483  %b.fneg = fneg half %b
1484  %c.fneg = fneg half %c
1485  %max0 = call half @llvm.maximum.f16(half %a.fneg, half %b.fneg)
1486  %max1 = call half @llvm.maximum.f16(half %max0, half %c.fneg)
1487  ret half %max1
1488}
1489
1490define half @v_fmaximum3_f16_fneg_fabs_all(half %a, half %b, half %c) {
1491; GFX12-LABEL: v_fmaximum3_f16_fneg_fabs_all:
1492; GFX12:       ; %bb.0:
1493; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1494; GFX12-NEXT:    s_wait_expcnt 0x0
1495; GFX12-NEXT:    s_wait_samplecnt 0x0
1496; GFX12-NEXT:    s_wait_bvhcnt 0x0
1497; GFX12-NEXT:    s_wait_kmcnt 0x0
1498; GFX12-NEXT:    v_maximum3_f16 v0, -|v0|, -|v1|, -|v2|
1499; GFX12-NEXT:    s_setpc_b64 s[30:31]
1500;
1501; GFX9-LABEL: v_fmaximum3_f16_fneg_fabs_all:
1502; GFX9:       ; %bb.0:
1503; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1504; GFX9-NEXT:    v_max_f16_e64 v3, -|v0|, -|v1|
1505; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7e00
1506; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, -|v0|, -|v1|
1507; GFX9-NEXT:    s_nop 1
1508; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
1509; GFX9-NEXT:    v_max_f16_e64 v1, v0, -|v2|
1510; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, v0, -|v2|
1511; GFX9-NEXT:    s_nop 1
1512; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
1513; GFX9-NEXT:    s_setpc_b64 s[30:31]
1514  %a.fabs = call half @llvm.fabs.f16(half %a)
1515  %b.fabs = call half @llvm.fabs.f16(half %b)
1516  %c.fabs = call half @llvm.fabs.f16(half %c)
1517  %a.fneg.fabs = fneg half %a.fabs
1518  %b.fneg.fabs = fneg half %b.fabs
1519  %c.fneg.fabs = fneg half %c.fabs
1520  %max0 = call half @llvm.maximum.f16(half %a.fneg.fabs, half %b.fneg.fabs)
1521  %max1 = call half @llvm.maximum.f16(half %max0, half %c.fneg.fabs)
1522  ret half %max1
1523}
1524
1525define half @v_fmaximum3_f16_fneg0(half %a, half %b, half %c) {
1526; GFX12-LABEL: v_fmaximum3_f16_fneg0:
1527; GFX12:       ; %bb.0:
1528; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1529; GFX12-NEXT:    s_wait_expcnt 0x0
1530; GFX12-NEXT:    s_wait_samplecnt 0x0
1531; GFX12-NEXT:    s_wait_bvhcnt 0x0
1532; GFX12-NEXT:    s_wait_kmcnt 0x0
1533; GFX12-NEXT:    v_maximum3_f16 v0, -v0, v1, v2
1534; GFX12-NEXT:    s_setpc_b64 s[30:31]
1535;
1536; GFX9-LABEL: v_fmaximum3_f16_fneg0:
1537; GFX9:       ; %bb.0:
1538; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1539; GFX9-NEXT:    v_max_f16_e64 v3, -v0, v1
1540; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7e00
1541; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, -v0, v1
1542; GFX9-NEXT:    s_nop 1
1543; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
1544; GFX9-NEXT:    v_max_f16_e32 v1, v0, v2
1545; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
1546; GFX9-NEXT:    s_nop 1
1547; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
1548; GFX9-NEXT:    s_setpc_b64 s[30:31]
1549  %a.fneg = fneg half %a
1550  %max0 = call half @llvm.maximum.f16(half %a.fneg, half %b)
1551  %max1 = call half @llvm.maximum.f16(half %max0, half %c)
1552  ret half %max1
1553}
1554
1555define half @v_fmaximum3_f16_fneg1(half %a, half %b, half %c) {
1556; GFX12-LABEL: v_fmaximum3_f16_fneg1:
1557; GFX12:       ; %bb.0:
1558; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1559; GFX12-NEXT:    s_wait_expcnt 0x0
1560; GFX12-NEXT:    s_wait_samplecnt 0x0
1561; GFX12-NEXT:    s_wait_bvhcnt 0x0
1562; GFX12-NEXT:    s_wait_kmcnt 0x0
1563; GFX12-NEXT:    v_maximum3_f16 v0, v0, -v1, v2
1564; GFX12-NEXT:    s_setpc_b64 s[30:31]
1565;
1566; GFX9-LABEL: v_fmaximum3_f16_fneg1:
1567; GFX9:       ; %bb.0:
1568; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1569; GFX9-NEXT:    v_max_f16_e64 v3, v0, -v1
1570; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7e00
1571; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, v0, -v1
1572; GFX9-NEXT:    s_nop 1
1573; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
1574; GFX9-NEXT:    v_max_f16_e32 v1, v0, v2
1575; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
1576; GFX9-NEXT:    s_nop 1
1577; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
1578; GFX9-NEXT:    s_setpc_b64 s[30:31]
1579  %b.fneg = fneg half %b
1580  %max0 = call half @llvm.maximum.f16(half %a, half %b.fneg)
1581  %max1 = call half @llvm.maximum.f16(half %max0, half %c)
1582  ret half %max1
1583}
1584
1585define half @v_fmaximum3_f16_fneg2(half %a, half %b, half %c) {
1586; GFX12-LABEL: v_fmaximum3_f16_fneg2:
1587; GFX12:       ; %bb.0:
1588; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1589; GFX12-NEXT:    s_wait_expcnt 0x0
1590; GFX12-NEXT:    s_wait_samplecnt 0x0
1591; GFX12-NEXT:    s_wait_bvhcnt 0x0
1592; GFX12-NEXT:    s_wait_kmcnt 0x0
1593; GFX12-NEXT:    v_maximum3_f16 v0, v0, v1, -v2
1594; GFX12-NEXT:    s_setpc_b64 s[30:31]
1595;
1596; GFX9-LABEL: v_fmaximum3_f16_fneg2:
1597; GFX9:       ; %bb.0:
1598; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1599; GFX9-NEXT:    v_max_f16_e32 v3, v0, v1
1600; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7e00
1601; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
1602; GFX9-NEXT:    s_nop 1
1603; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
1604; GFX9-NEXT:    v_max_f16_e64 v1, v0, -v2
1605; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, v0, -v2
1606; GFX9-NEXT:    s_nop 1
1607; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
1608; GFX9-NEXT:    s_setpc_b64 s[30:31]
1609  %c.fneg = fneg half %c
1610  %max0 = call half @llvm.maximum.f16(half %a, half %b)
1611  %max1 = call half @llvm.maximum.f16(half %max0, half %c.fneg)
1612  ret half %max1
1613}
1614
1615define half @v_fmaximum3_f16_const0(half %b, half %c) {
1616; GFX12-LABEL: v_fmaximum3_f16_const0:
1617; GFX12:       ; %bb.0:
1618; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1619; GFX12-NEXT:    s_wait_expcnt 0x0
1620; GFX12-NEXT:    s_wait_samplecnt 0x0
1621; GFX12-NEXT:    s_wait_bvhcnt 0x0
1622; GFX12-NEXT:    s_wait_kmcnt 0x0
1623; GFX12-NEXT:    v_maximum3_f16 v0, v0, 0x4800, v1
1624; GFX12-NEXT:    s_setpc_b64 s[30:31]
1625;
1626; GFX9-LABEL: v_fmaximum3_f16_const0:
1627; GFX9:       ; %bb.0:
1628; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1629; GFX9-NEXT:    v_max_f16_e32 v2, 0x4800, v0
1630; GFX9-NEXT:    v_mov_b32_e32 v3, 0x7e00
1631; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
1632; GFX9-NEXT:    s_nop 1
1633; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
1634; GFX9-NEXT:    v_max_f16_e32 v2, v0, v1
1635; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
1636; GFX9-NEXT:    s_nop 1
1637; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
1638; GFX9-NEXT:    s_setpc_b64 s[30:31]
1639  %max0 = call half @llvm.maximum.f16(half 8.0, half %b)
1640  %max1 = call half @llvm.maximum.f16(half %max0, half %c)
1641  ret half %max1
1642}
1643
1644define half @v_fmaximum3_f16__const2(half %a, half %b) {
1645; GFX12-LABEL: v_fmaximum3_f16__const2:
1646; GFX12:       ; %bb.0:
1647; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1648; GFX12-NEXT:    s_wait_expcnt 0x0
1649; GFX12-NEXT:    s_wait_samplecnt 0x0
1650; GFX12-NEXT:    s_wait_bvhcnt 0x0
1651; GFX12-NEXT:    s_wait_kmcnt 0x0
1652; GFX12-NEXT:    v_maximum3_f16 v0, v0, v1, 0x4800
1653; GFX12-NEXT:    s_setpc_b64 s[30:31]
1654;
1655; GFX9-LABEL: v_fmaximum3_f16__const2:
1656; GFX9:       ; %bb.0:
1657; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1658; GFX9-NEXT:    v_max_f16_e32 v2, v0, v1
1659; GFX9-NEXT:    v_mov_b32_e32 v3, 0x7e00
1660; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
1661; GFX9-NEXT:    s_nop 1
1662; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
1663; GFX9-NEXT:    v_max_f16_e32 v1, 0x4800, v0
1664; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
1665; GFX9-NEXT:    s_nop 1
1666; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
1667; GFX9-NEXT:    s_setpc_b64 s[30:31]
1668  %max0 = call half @llvm.maximum.f16(half %a, half %b)
1669  %max1 = call half @llvm.maximum.f16(half %max0, half 8.0)
1670  ret half %max1
1671}
1672
1673define half @v_fmaximum3_f16_inlineimm0(half %b, half %c) {
1674; GFX12-LABEL: v_fmaximum3_f16_inlineimm0:
1675; GFX12:       ; %bb.0:
1676; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1677; GFX12-NEXT:    s_wait_expcnt 0x0
1678; GFX12-NEXT:    s_wait_samplecnt 0x0
1679; GFX12-NEXT:    s_wait_bvhcnt 0x0
1680; GFX12-NEXT:    s_wait_kmcnt 0x0
1681; GFX12-NEXT:    v_maximum3_f16 v0, v0, 4.0, v1
1682; GFX12-NEXT:    s_setpc_b64 s[30:31]
1683;
1684; GFX9-LABEL: v_fmaximum3_f16_inlineimm0:
1685; GFX9:       ; %bb.0:
1686; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1687; GFX9-NEXT:    v_max_f16_e32 v2, 4.0, v0
1688; GFX9-NEXT:    v_mov_b32_e32 v3, 0x7e00
1689; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
1690; GFX9-NEXT:    s_nop 1
1691; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
1692; GFX9-NEXT:    v_max_f16_e32 v2, v0, v1
1693; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
1694; GFX9-NEXT:    s_nop 1
1695; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
1696; GFX9-NEXT:    s_setpc_b64 s[30:31]
1697  %max0 = call half @llvm.maximum.f16(half 4.0, half %b)
1698  %max1 = call half @llvm.maximum.f16(half %max0, half %c)
1699  ret half %max1
1700}
1701
1702define half @v_fmaximum3_f16__inlineimm(half %a, half %b) {
1703; GFX12-LABEL: v_fmaximum3_f16__inlineimm:
1704; GFX12:       ; %bb.0:
1705; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1706; GFX12-NEXT:    s_wait_expcnt 0x0
1707; GFX12-NEXT:    s_wait_samplecnt 0x0
1708; GFX12-NEXT:    s_wait_bvhcnt 0x0
1709; GFX12-NEXT:    s_wait_kmcnt 0x0
1710; GFX12-NEXT:    v_maximum3_f16 v0, v0, v1, 4.0
1711; GFX12-NEXT:    s_setpc_b64 s[30:31]
1712;
1713; GFX9-LABEL: v_fmaximum3_f16__inlineimm:
1714; GFX9:       ; %bb.0:
1715; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1716; GFX9-NEXT:    v_max_f16_e32 v2, v0, v1
1717; GFX9-NEXT:    v_mov_b32_e32 v3, 0x7e00
1718; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
1719; GFX9-NEXT:    s_nop 1
1720; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
1721; GFX9-NEXT:    v_max_f16_e32 v1, 4.0, v0
1722; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
1723; GFX9-NEXT:    s_nop 1
1724; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
1725; GFX9-NEXT:    s_setpc_b64 s[30:31]
1726  %max0 = call half @llvm.maximum.f16(half %a, half %b)
1727  %max1 = call half @llvm.maximum.f16(half %max0, half 4.0)
1728  ret half %max1
1729}
1730
1731define half @v_fmaximum3_f16_const1_const2(half %a) {
1732; GFX12-LABEL: v_fmaximum3_f16_const1_const2:
1733; GFX12:       ; %bb.0:
1734; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1735; GFX12-NEXT:    s_wait_expcnt 0x0
1736; GFX12-NEXT:    s_wait_samplecnt 0x0
1737; GFX12-NEXT:    s_wait_bvhcnt 0x0
1738; GFX12-NEXT:    s_wait_kmcnt 0x0
1739; GFX12-NEXT:    s_movk_i32 s0, 0x4800
1740; GFX12-NEXT:    s_wait_alu 0xfffe
1741; GFX12-NEXT:    v_maximum3_f16 v0, v0, s0, 0x4c00
1742; GFX12-NEXT:    s_setpc_b64 s[30:31]
1743;
1744; GFX9-LABEL: v_fmaximum3_f16_const1_const2:
1745; GFX9:       ; %bb.0:
1746; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1747; GFX9-NEXT:    v_max_f16_e32 v1, 0x4800, v0
1748; GFX9-NEXT:    v_mov_b32_e32 v2, 0x7e00
1749; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
1750; GFX9-NEXT:    s_nop 1
1751; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
1752; GFX9-NEXT:    v_max_f16_e32 v1, 0x4c00, v0
1753; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
1754; GFX9-NEXT:    s_nop 1
1755; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
1756; GFX9-NEXT:    s_setpc_b64 s[30:31]
1757  %max0 = call half @llvm.maximum.f16(half %a, half 8.0)
1758  %max1 = call half @llvm.maximum.f16(half %max0, half 16.0)
1759  ret half %max1
1760}
1761
1762define <2 x half> @v_fmaximum3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) {
1763; GFX12-LABEL: v_fmaximum3_v2f16:
1764; GFX12:       ; %bb.0:
1765; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1766; GFX12-NEXT:    s_wait_expcnt 0x0
1767; GFX12-NEXT:    s_wait_samplecnt 0x0
1768; GFX12-NEXT:    s_wait_bvhcnt 0x0
1769; GFX12-NEXT:    s_wait_kmcnt 0x0
1770; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v1
1771; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1772; GFX12-NEXT:    v_pk_maximum_f16 v0, v2, v0
1773; GFX12-NEXT:    s_setpc_b64 s[30:31]
1774;
1775; GFX940-LABEL: v_fmaximum3_v2f16:
1776; GFX940:       ; %bb.0:
1777; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1778; GFX940-NEXT:    v_pk_max_f16 v3, v0, v1
1779; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7e00
1780; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
1781; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
1782; GFX940-NEXT:    s_nop 0
1783; GFX940-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
1784; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
1785; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
1786; GFX940-NEXT:    s_nop 1
1787; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
1788; GFX940-NEXT:    v_perm_b32 v1, v0, v5, s0
1789; GFX940-NEXT:    v_pk_max_f16 v1, v2, v1
1790; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v2, v5
1791; GFX940-NEXT:    s_nop 1
1792; GFX940-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
1793; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
1794; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v2, v0 src0_sel:WORD_1 src1_sel:DWORD
1795; GFX940-NEXT:    s_nop 1
1796; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
1797; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
1798; GFX940-NEXT:    s_setpc_b64 s[30:31]
1799;
1800; GFX950-LABEL: v_fmaximum3_v2f16:
1801; GFX950:       ; %bb.0:
1802; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1803; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v1, v1
1804; GFX950-NEXT:    s_nop 0
1805; GFX950-NEXT:    v_pk_maximum3_f16 v0, v2, v0, v0
1806; GFX950-NEXT:    s_setpc_b64 s[30:31]
1807  %max0 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b)
1808  %max1 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %c, <2 x half> %max0)
1809  ret <2 x half> %max1
1810}
1811
1812define <2 x half> @v_fmaximum3_v2f16_commute(<2 x half> %a, <2 x half> %b, <2 x half> %c) {
1813; GFX12-LABEL: v_fmaximum3_v2f16_commute:
1814; GFX12:       ; %bb.0:
1815; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1816; GFX12-NEXT:    s_wait_expcnt 0x0
1817; GFX12-NEXT:    s_wait_samplecnt 0x0
1818; GFX12-NEXT:    s_wait_bvhcnt 0x0
1819; GFX12-NEXT:    s_wait_kmcnt 0x0
1820; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v1
1821; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1822; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v2
1823; GFX12-NEXT:    s_setpc_b64 s[30:31]
1824;
1825; GFX940-LABEL: v_fmaximum3_v2f16_commute:
1826; GFX940:       ; %bb.0:
1827; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1828; GFX940-NEXT:    v_pk_max_f16 v3, v0, v1
1829; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7e00
1830; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
1831; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
1832; GFX940-NEXT:    s_nop 0
1833; GFX940-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
1834; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
1835; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
1836; GFX940-NEXT:    s_nop 1
1837; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
1838; GFX940-NEXT:    v_perm_b32 v1, v0, v5, s0
1839; GFX940-NEXT:    v_pk_max_f16 v1, v1, v2
1840; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v5, v2
1841; GFX940-NEXT:    s_nop 1
1842; GFX940-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
1843; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
1844; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:DWORD src1_sel:WORD_1
1845; GFX940-NEXT:    s_nop 1
1846; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
1847; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
1848; GFX940-NEXT:    s_setpc_b64 s[30:31]
1849;
1850; GFX950-LABEL: v_fmaximum3_v2f16_commute:
1851; GFX950:       ; %bb.0:
1852; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1853; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v1, v1
1854; GFX950-NEXT:    s_nop 0
1855; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
1856; GFX950-NEXT:    s_setpc_b64 s[30:31]
1857  %max0 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b)
1858  %max1 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %max0, <2 x half> %c)
1859  ret <2 x half> %max1
1860}
1861
1862define <2 x half> @v_fmaximum3_v2f16__fabs_all(<2 x half> %a, <2 x half> %b, <2 x half> %c) {
1863; GFX12-LABEL: v_fmaximum3_v2f16__fabs_all:
1864; GFX12:       ; %bb.0:
1865; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1866; GFX12-NEXT:    s_wait_expcnt 0x0
1867; GFX12-NEXT:    s_wait_samplecnt 0x0
1868; GFX12-NEXT:    s_wait_bvhcnt 0x0
1869; GFX12-NEXT:    s_wait_kmcnt 0x0
1870; GFX12-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
1871; GFX12-NEXT:    v_and_b32_e32 v1, 0x7fff7fff, v1
1872; GFX12-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
1873; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1874; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v1
1875; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v2
1876; GFX12-NEXT:    s_setpc_b64 s[30:31]
1877;
1878; GFX940-LABEL: v_fmaximum3_v2f16__fabs_all:
1879; GFX940:       ; %bb.0:
1880; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1881; GFX940-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v0
1882; GFX940-NEXT:    v_and_b32_e32 v4, 0x7fff7fff, v1
1883; GFX940-NEXT:    v_pk_max_f16 v3, v3, v4
1884; GFX940-NEXT:    v_mov_b32_e32 v6, 0x7e00
1885; GFX940-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
1886; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, |v0|, |v1| src0_sel:WORD_1 src1_sel:WORD_1
1887; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
1888; GFX940-NEXT:    v_and_b32_e32 v5, 0x7fff7fff, v2
1889; GFX940-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc
1890; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v1|
1891; GFX940-NEXT:    s_nop 1
1892; GFX940-NEXT:    v_cndmask_b32_e32 v0, v6, v3, vcc
1893; GFX940-NEXT:    v_perm_b32 v1, v4, v0, s0
1894; GFX940-NEXT:    v_pk_max_f16 v1, v1, v5
1895; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v4, |v2| src0_sel:DWORD src1_sel:WORD_1
1896; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
1897; GFX940-NEXT:    s_nop 0
1898; GFX940-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
1899; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v2|
1900; GFX940-NEXT:    s_nop 1
1901; GFX940-NEXT:    v_cndmask_b32_e32 v0, v6, v1, vcc
1902; GFX940-NEXT:    v_perm_b32 v0, v3, v0, s0
1903; GFX940-NEXT:    s_setpc_b64 s[30:31]
1904;
1905; GFX950-LABEL: v_fmaximum3_v2f16__fabs_all:
1906; GFX950:       ; %bb.0:
1907; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1908; GFX950-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
1909; GFX950-NEXT:    v_and_b32_e32 v1, 0x7fff7fff, v1
1910; GFX950-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
1911; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v1, v1
1912; GFX950-NEXT:    s_nop 0
1913; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
1914; GFX950-NEXT:    s_setpc_b64 s[30:31]
1915  %a.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %a)
1916  %b.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %b)
1917  %c.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %c)
1918  %max0 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a.fabs, <2 x half> %b.fabs)
1919  %max1 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %max0, <2 x half> %c.fabs)
1920  ret <2 x half> %max1
1921}
1922
1923define <2 x half> @v_fmaximum3_v2f16__fneg_all(<2 x half> %a, <2 x half> %b, <2 x half> %c) {
1924; GFX12-LABEL: v_fmaximum3_v2f16__fneg_all:
1925; GFX12:       ; %bb.0:
1926; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1927; GFX12-NEXT:    s_wait_expcnt 0x0
1928; GFX12-NEXT:    s_wait_samplecnt 0x0
1929; GFX12-NEXT:    s_wait_bvhcnt 0x0
1930; GFX12-NEXT:    s_wait_kmcnt 0x0
1931; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
1932; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1933; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
1934; GFX12-NEXT:    s_setpc_b64 s[30:31]
1935;
1936; GFX940-LABEL: v_fmaximum3_v2f16__fneg_all:
1937; GFX940:       ; %bb.0:
1938; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1939; GFX940-NEXT:    v_pk_max_f16 v3, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
1940; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7e00
1941; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v1
1942; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
1943; GFX940-NEXT:    s_nop 0
1944; GFX940-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
1945; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
1946; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, -v0, -v1 src0_sel:WORD_1 src1_sel:WORD_1
1947; GFX940-NEXT:    s_nop 1
1948; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
1949; GFX940-NEXT:    v_perm_b32 v1, v0, v5, s0
1950; GFX940-NEXT:    v_pk_max_f16 v1, v1, v2 neg_lo:[0,1] neg_hi:[0,1]
1951; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v5, -v2
1952; GFX940-NEXT:    s_nop 1
1953; GFX940-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
1954; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
1955; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, -v2 src0_sel:DWORD src1_sel:WORD_1
1956; GFX940-NEXT:    s_nop 1
1957; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
1958; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
1959; GFX940-NEXT:    s_setpc_b64 s[30:31]
1960;
1961; GFX950-LABEL: v_fmaximum3_v2f16__fneg_all:
1962; GFX950:       ; %bb.0:
1963; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1964; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v1, v1 neg_lo:[1,1,1] neg_hi:[1,1,1]
1965; GFX950-NEXT:    s_nop 0
1966; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2 neg_lo:[0,1,1] neg_hi:[0,1,1]
1967; GFX950-NEXT:    s_setpc_b64 s[30:31]
1968  %a.fneg = fneg <2 x half> %a
1969  %b.fneg = fneg <2 x half> %b
1970  %c.fneg = fneg <2 x half> %c
1971  %max0 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a.fneg, <2 x half> %b.fneg)
1972  %max1 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %max0, <2 x half> %c.fneg)
1973  ret <2 x half> %max1
1974}
1975
1976define <2 x half> @v_fmaximum3_v2f16__inlineimm1(<2 x half> %a, <2 x half> %c) {
1977; GFX12-LABEL: v_fmaximum3_v2f16__inlineimm1:
1978; GFX12:       ; %bb.0:
1979; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1980; GFX12-NEXT:    s_wait_expcnt 0x0
1981; GFX12-NEXT:    s_wait_samplecnt 0x0
1982; GFX12-NEXT:    s_wait_bvhcnt 0x0
1983; GFX12-NEXT:    s_wait_kmcnt 0x0
1984; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, 2.0 op_sel_hi:[1,0]
1985; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1986; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v1
1987; GFX12-NEXT:    s_setpc_b64 s[30:31]
1988;
1989; GFX940-LABEL: v_fmaximum3_v2f16__inlineimm1:
1990; GFX940:       ; %bb.0:
1991; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1992; GFX940-NEXT:    v_pk_max_f16 v2, v0, 2.0 op_sel_hi:[1,0]
1993; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7e00
1994; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
1995; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
1996; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
1997; GFX940-NEXT:    s_nop 0
1998; GFX940-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
1999; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
2000; GFX940-NEXT:    s_nop 1
2001; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc
2002; GFX940-NEXT:    v_perm_b32 v2, v3, v0, s0
2003; GFX940-NEXT:    v_pk_max_f16 v2, v2, v1
2004; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v3, v1 src0_sel:DWORD src1_sel:WORD_1
2005; GFX940-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
2006; GFX940-NEXT:    s_nop 0
2007; GFX940-NEXT:    v_cndmask_b32_e32 v3, v4, v5, vcc
2008; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
2009; GFX940-NEXT:    s_nop 1
2010; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc
2011; GFX940-NEXT:    v_perm_b32 v0, v3, v0, s0
2012; GFX940-NEXT:    s_setpc_b64 s[30:31]
2013;
2014; GFX950-LABEL: v_fmaximum3_v2f16__inlineimm1:
2015; GFX950:       ; %bb.0:
2016; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2017; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, 2.0, 2.0 op_sel_hi:[1,0,0]
2018; GFX950-NEXT:    s_nop 0
2019; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v1, v1
2020; GFX950-NEXT:    s_setpc_b64 s[30:31]
2021  %max0 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> <half 2.0, half 2.0>)
2022  %max1 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %max0, <2 x half> %c)
2023  ret <2 x half> %max1
2024}
2025
2026define <2 x half> @v_fmaximum3_v2f16__inlineimm2(<2 x half> %a, <2 x half> %b) {
2027; GFX12-LABEL: v_fmaximum3_v2f16__inlineimm2:
2028; GFX12:       ; %bb.0:
2029; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2030; GFX12-NEXT:    s_wait_expcnt 0x0
2031; GFX12-NEXT:    s_wait_samplecnt 0x0
2032; GFX12-NEXT:    s_wait_bvhcnt 0x0
2033; GFX12-NEXT:    s_wait_kmcnt 0x0
2034; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v1
2035; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2036; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, 4.0 op_sel_hi:[1,0]
2037; GFX12-NEXT:    s_setpc_b64 s[30:31]
2038;
2039; GFX940-LABEL: v_fmaximum3_v2f16__inlineimm2:
2040; GFX940:       ; %bb.0:
2041; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2042; GFX940-NEXT:    v_pk_max_f16 v2, v0, v1
2043; GFX940-NEXT:    v_mov_b32_e32 v3, 0x7e00
2044; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
2045; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
2046; GFX940-NEXT:    s_nop 0
2047; GFX940-NEXT:    v_cndmask_b32_e32 v4, v3, v2, vcc
2048; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
2049; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
2050; GFX940-NEXT:    s_nop 1
2051; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
2052; GFX940-NEXT:    v_perm_b32 v1, v0, v4, s0
2053; GFX940-NEXT:    v_pk_max_f16 v1, v1, 4.0 op_sel_hi:[1,0]
2054; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v4, v4
2055; GFX940-NEXT:    s_nop 1
2056; GFX940-NEXT:    v_cndmask_b32_e32 v2, v3, v1, vcc
2057; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
2058; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
2059; GFX940-NEXT:    s_nop 1
2060; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
2061; GFX940-NEXT:    v_perm_b32 v0, v0, v2, s0
2062; GFX940-NEXT:    s_setpc_b64 s[30:31]
2063;
2064; GFX950-LABEL: v_fmaximum3_v2f16__inlineimm2:
2065; GFX950:       ; %bb.0:
2066; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2067; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v1, v1
2068; GFX950-NEXT:    s_nop 0
2069; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, 4.0, 4.0 op_sel_hi:[1,0,0]
2070; GFX950-NEXT:    s_setpc_b64 s[30:31]
2071  %max0 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b)
2072  %max1 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %max0, <2 x half> <half 4.0, half 4.0>)
2073  ret <2 x half> %max1
2074}
2075
2076define <3 x half> @v_fmaximum3_v3f16(<3 x half> %a, <3 x half> %b, <3 x half> %c) {
2077; GFX12-LABEL: v_fmaximum3_v3f16:
2078; GFX12:       ; %bb.0:
2079; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2080; GFX12-NEXT:    s_wait_expcnt 0x0
2081; GFX12-NEXT:    s_wait_samplecnt 0x0
2082; GFX12-NEXT:    s_wait_bvhcnt 0x0
2083; GFX12-NEXT:    s_wait_kmcnt 0x0
2084; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v2
2085; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v3
2086; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2087; GFX12-NEXT:    v_pk_maximum_f16 v0, v4, v0
2088; GFX12-NEXT:    v_pk_maximum_f16 v1, v5, v1
2089; GFX12-NEXT:    s_setpc_b64 s[30:31]
2090;
2091; GFX940-LABEL: v_fmaximum3_v3f16:
2092; GFX940:       ; %bb.0:
2093; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2094; GFX940-NEXT:    v_pk_max_f16 v6, v0, v2
2095; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7e00
2096; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
2097; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
2098; GFX940-NEXT:    s_nop 0
2099; GFX940-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
2100; GFX940-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
2101; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
2102; GFX940-NEXT:    v_pk_max_f16 v2, v1, v3
2103; GFX940-NEXT:    s_nop 0
2104; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
2105; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
2106; GFX940-NEXT:    s_nop 1
2107; GFX940-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
2108; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
2109; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
2110; GFX940-NEXT:    s_nop 1
2111; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
2112; GFX940-NEXT:    v_perm_b32 v1, v1, v6, s0
2113; GFX940-NEXT:    v_pk_max_f16 v1, v5, v1
2114; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v5, v6
2115; GFX940-NEXT:    v_perm_b32 v2, v0, v8, s0
2116; GFX940-NEXT:    v_pk_max_f16 v2, v4, v2
2117; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
2118; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v4, v8
2119; GFX940-NEXT:    s_nop 1
2120; GFX940-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
2121; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
2122; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
2123; GFX940-NEXT:    s_nop 1
2124; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
2125; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
2126; GFX940-NEXT:    s_setpc_b64 s[30:31]
2127;
2128; GFX950-LABEL: v_fmaximum3_v3f16:
2129; GFX950:       ; %bb.0:
2130; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2131; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
2132; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
2133; GFX950-NEXT:    v_pk_maximum3_f16 v1, v5, v1, v1
2134; GFX950-NEXT:    v_pk_maximum3_f16 v0, v4, v0, v0
2135; GFX950-NEXT:    s_setpc_b64 s[30:31]
2136  %max0 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %a, <3 x half> %b)
2137  %max1 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %c, <3 x half> %max0)
2138  ret <3 x half> %max1
2139}
2140
2141define <3 x half> @v_fmaximum3_v3f16_commute(<3 x half> %a, <3 x half> %b, <3 x half> %c) {
2142; GFX12-LABEL: v_fmaximum3_v3f16_commute:
2143; GFX12:       ; %bb.0:
2144; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2145; GFX12-NEXT:    s_wait_expcnt 0x0
2146; GFX12-NEXT:    s_wait_samplecnt 0x0
2147; GFX12-NEXT:    s_wait_bvhcnt 0x0
2148; GFX12-NEXT:    s_wait_kmcnt 0x0
2149; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v2
2150; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v3
2151; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2152; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v4
2153; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v5
2154; GFX12-NEXT:    s_setpc_b64 s[30:31]
2155;
2156; GFX940-LABEL: v_fmaximum3_v3f16_commute:
2157; GFX940:       ; %bb.0:
2158; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2159; GFX940-NEXT:    v_pk_max_f16 v6, v0, v2
2160; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7e00
2161; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
2162; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
2163; GFX940-NEXT:    s_nop 0
2164; GFX940-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
2165; GFX940-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
2166; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
2167; GFX940-NEXT:    v_pk_max_f16 v2, v1, v3
2168; GFX940-NEXT:    s_nop 0
2169; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
2170; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
2171; GFX940-NEXT:    s_nop 1
2172; GFX940-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
2173; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
2174; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
2175; GFX940-NEXT:    s_nop 1
2176; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
2177; GFX940-NEXT:    v_perm_b32 v1, v1, v6, s0
2178; GFX940-NEXT:    v_pk_max_f16 v1, v1, v5
2179; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v6, v5
2180; GFX940-NEXT:    v_perm_b32 v2, v0, v8, s0
2181; GFX940-NEXT:    v_pk_max_f16 v2, v2, v4
2182; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
2183; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v8, v4
2184; GFX940-NEXT:    s_nop 1
2185; GFX940-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
2186; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
2187; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1
2188; GFX940-NEXT:    s_nop 1
2189; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
2190; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
2191; GFX940-NEXT:    s_setpc_b64 s[30:31]
2192;
2193; GFX950-LABEL: v_fmaximum3_v3f16_commute:
2194; GFX950:       ; %bb.0:
2195; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2196; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
2197; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
2198; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v5, v5
2199; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v4, v4
2200; GFX950-NEXT:    s_setpc_b64 s[30:31]
2201  %max0 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %a, <3 x half> %b)
2202  %max1 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %max0, <3 x half> %c)
2203  ret <3 x half> %max1
2204}
2205
2206define <3 x half> @v_fmaximum3_v3f16__fabs_all(<3 x half> %a, <3 x half> %b, <3 x half> %c) {
2207; GFX12-LABEL: v_fmaximum3_v3f16__fabs_all:
2208; GFX12:       ; %bb.0:
2209; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2210; GFX12-NEXT:    s_wait_expcnt 0x0
2211; GFX12-NEXT:    s_wait_samplecnt 0x0
2212; GFX12-NEXT:    s_wait_bvhcnt 0x0
2213; GFX12-NEXT:    s_wait_kmcnt 0x0
2214; GFX12-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
2215; GFX12-NEXT:    v_and_b32_e32 v1, 0x7fff7fff, v1
2216; GFX12-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
2217; GFX12-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
2218; GFX12-NEXT:    v_and_b32_e32 v5, 0x7fff7fff, v5
2219; GFX12-NEXT:    v_and_b32_e32 v4, 0x7fff7fff, v4
2220; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
2221; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v2
2222; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v3
2223; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2224; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v4
2225; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v5
2226; GFX12-NEXT:    s_setpc_b64 s[30:31]
2227;
2228; GFX940-LABEL: v_fmaximum3_v3f16__fabs_all:
2229; GFX940:       ; %bb.0:
2230; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2231; GFX940-NEXT:    v_and_b32_e32 v7, 0x7fff7fff, v1
2232; GFX940-NEXT:    v_and_b32_e32 v9, 0x7fff7fff, v3
2233; GFX940-NEXT:    v_and_b32_e32 v6, 0x7fff7fff, v0
2234; GFX940-NEXT:    v_and_b32_e32 v8, 0x7fff7fff, v2
2235; GFX940-NEXT:    v_pk_max_f16 v7, v7, v9
2236; GFX940-NEXT:    v_mov_b32_e32 v12, 0x7e00
2237; GFX940-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
2238; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1
2239; GFX940-NEXT:    v_pk_max_f16 v6, v6, v8
2240; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
2241; GFX940-NEXT:    v_cndmask_b32_e32 v9, v12, v9, vcc
2242; GFX940-NEXT:    v_lshrrev_b32_e32 v8, 16, v6
2243; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1
2244; GFX940-NEXT:    v_and_b32_e32 v11, 0x7fff7fff, v4
2245; GFX940-NEXT:    v_and_b32_e32 v10, 0x7fff7fff, v5
2246; GFX940-NEXT:    v_cndmask_b32_e32 v8, v12, v8, vcc
2247; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, |v1|, |v3|
2248; GFX940-NEXT:    s_nop 1
2249; GFX940-NEXT:    v_cndmask_b32_e32 v1, v12, v7, vcc
2250; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v2|
2251; GFX940-NEXT:    s_nop 1
2252; GFX940-NEXT:    v_cndmask_b32_e32 v0, v12, v6, vcc
2253; GFX940-NEXT:    v_perm_b32 v2, v8, v0, s0
2254; GFX940-NEXT:    v_pk_max_f16 v2, v2, v11
2255; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v8, |v4| src0_sel:DWORD src1_sel:WORD_1
2256; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
2257; GFX940-NEXT:    v_perm_b32 v6, v9, v1, s0
2258; GFX940-NEXT:    v_cndmask_b32_e32 v3, v12, v3, vcc
2259; GFX940-NEXT:    v_pk_max_f16 v6, v6, v10
2260; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v1, |v5|
2261; GFX940-NEXT:    s_nop 1
2262; GFX940-NEXT:    v_cndmask_b32_e32 v1, v12, v6, vcc
2263; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v4|
2264; GFX940-NEXT:    s_nop 1
2265; GFX940-NEXT:    v_cndmask_b32_e32 v0, v12, v2, vcc
2266; GFX940-NEXT:    v_perm_b32 v0, v3, v0, s0
2267; GFX940-NEXT:    s_setpc_b64 s[30:31]
2268;
2269; GFX950-LABEL: v_fmaximum3_v3f16__fabs_all:
2270; GFX950:       ; %bb.0:
2271; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2272; GFX950-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
2273; GFX950-NEXT:    v_and_b32_e32 v1, 0x7fff7fff, v1
2274; GFX950-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
2275; GFX950-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
2276; GFX950-NEXT:    v_and_b32_e32 v5, 0x7fff7fff, v5
2277; GFX950-NEXT:    v_and_b32_e32 v4, 0x7fff7fff, v4
2278; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
2279; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
2280; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v5, v5
2281; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v4, v4
2282; GFX950-NEXT:    s_setpc_b64 s[30:31]
2283  %a.fabs = call <3 x half> @llvm.fabs.v3f16(<3 x half> %a)
2284  %b.fabs = call <3 x half> @llvm.fabs.v3f16(<3 x half> %b)
2285  %c.fabs = call <3 x half> @llvm.fabs.v3f16(<3 x half> %c)
2286  %max0 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %a.fabs, <3 x half> %b.fabs)
2287  %max1 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %max0, <3 x half> %c.fabs)
2288  ret <3 x half> %max1
2289}
2290
2291define <3 x half> @v_fmaximum3_v3f16__fneg_all(<3 x half> %a, <3 x half> %b, <3 x half> %c) {
2292; GFX12-LABEL: v_fmaximum3_v3f16__fneg_all:
2293; GFX12:       ; %bb.0:
2294; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2295; GFX12-NEXT:    s_wait_expcnt 0x0
2296; GFX12-NEXT:    s_wait_samplecnt 0x0
2297; GFX12-NEXT:    s_wait_bvhcnt 0x0
2298; GFX12-NEXT:    s_wait_kmcnt 0x0
2299; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
2300; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
2301; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2302; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1]
2303; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
2304; GFX12-NEXT:    s_setpc_b64 s[30:31]
2305;
2306; GFX940-LABEL: v_fmaximum3_v3f16__fneg_all:
2307; GFX940:       ; %bb.0:
2308; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2309; GFX940-NEXT:    v_pk_max_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
2310; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7e00
2311; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v2
2312; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
2313; GFX940-NEXT:    s_nop 0
2314; GFX940-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
2315; GFX940-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
2316; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1
2317; GFX940-NEXT:    v_pk_max_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
2318; GFX940-NEXT:    s_nop 0
2319; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
2320; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, -v1, -v3
2321; GFX940-NEXT:    s_nop 1
2322; GFX940-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
2323; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
2324; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1
2325; GFX940-NEXT:    s_nop 1
2326; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
2327; GFX940-NEXT:    v_perm_b32 v1, v1, v6, s0
2328; GFX940-NEXT:    v_pk_max_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
2329; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v6, -v5
2330; GFX940-NEXT:    v_perm_b32 v2, v0, v8, s0
2331; GFX940-NEXT:    v_pk_max_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1]
2332; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
2333; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v8, -v4
2334; GFX940-NEXT:    s_nop 1
2335; GFX940-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
2336; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
2337; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1
2338; GFX940-NEXT:    s_nop 1
2339; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
2340; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
2341; GFX940-NEXT:    s_setpc_b64 s[30:31]
2342;
2343; GFX950-LABEL: v_fmaximum3_v3f16__fneg_all:
2344; GFX950:       ; %bb.0:
2345; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2346; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3 neg_lo:[1,1,1] neg_hi:[1,1,1]
2347; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2 neg_lo:[1,1,1] neg_hi:[1,1,1]
2348; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v5, v5 neg_lo:[0,1,1] neg_hi:[0,1,1]
2349; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v4, v4 neg_lo:[0,1,1] neg_hi:[0,1,1]
2350; GFX950-NEXT:    s_setpc_b64 s[30:31]
2351  %a.fneg = fneg <3 x half> %a
2352  %b.fneg = fneg <3 x half> %b
2353  %c.fneg = fneg <3 x half> %c
2354  %max0 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %a.fneg, <3 x half> %b.fneg)
2355  %max1 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %max0, <3 x half> %c.fneg)
2356  ret <3 x half> %max1
2357}
2358
2359define <3 x half> @v_fmaximum3_v3f16__inlineimm1(<3 x half> %a, <3 x half> %c) {
2360; GFX12-LABEL: v_fmaximum3_v3f16__inlineimm1:
2361; GFX12:       ; %bb.0:
2362; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2363; GFX12-NEXT:    s_wait_expcnt 0x0
2364; GFX12-NEXT:    s_wait_samplecnt 0x0
2365; GFX12-NEXT:    s_wait_bvhcnt 0x0
2366; GFX12-NEXT:    s_wait_kmcnt 0x0
2367; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, 2.0 op_sel_hi:[1,0]
2368; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, 2.0
2369; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2370; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v2
2371; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v3
2372; GFX12-NEXT:    s_setpc_b64 s[30:31]
2373;
2374; GFX940-LABEL: v_fmaximum3_v3f16__inlineimm1:
2375; GFX940:       ; %bb.0:
2376; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2377; GFX940-NEXT:    v_pk_max_f16 v4, v0, 2.0 op_sel_hi:[1,0]
2378; GFX940-NEXT:    v_mov_b32_e32 v6, 0x7e00
2379; GFX940-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
2380; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
2381; GFX940-NEXT:    v_pk_max_f16 v7, v1, 2.0
2382; GFX940-NEXT:    s_mov_b32 s1, 0x5040100
2383; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
2384; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v1
2385; GFX940-NEXT:    s_movk_i32 s0, 0x7e00
2386; GFX940-NEXT:    s_nop 0
2387; GFX940-NEXT:    v_cndmask_b32_e32 v1, v6, v7, vcc
2388; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
2389; GFX940-NEXT:    s_nop 1
2390; GFX940-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc
2391; GFX940-NEXT:    v_perm_b32 v4, v5, v0, s1
2392; GFX940-NEXT:    v_pk_max_f16 v4, v4, v2
2393; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1
2394; GFX940-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
2395; GFX940-NEXT:    s_nop 0
2396; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v7, vcc
2397; GFX940-NEXT:    v_pack_b32_f16 v7, v1, s0
2398; GFX940-NEXT:    v_pk_max_f16 v7, v7, v3
2399; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
2400; GFX940-NEXT:    s_nop 1
2401; GFX940-NEXT:    v_cndmask_b32_e32 v1, v6, v7, vcc
2402; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
2403; GFX940-NEXT:    s_nop 1
2404; GFX940-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc
2405; GFX940-NEXT:    v_perm_b32 v0, v5, v0, s1
2406; GFX940-NEXT:    s_setpc_b64 s[30:31]
2407;
2408; GFX950-LABEL: v_fmaximum3_v3f16__inlineimm1:
2409; GFX950:       ; %bb.0:
2410; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2411; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, 2.0, 2.0
2412; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, 2.0, 2.0 op_sel_hi:[1,0,0]
2413; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
2414; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
2415; GFX950-NEXT:    s_setpc_b64 s[30:31]
2416  %max0 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %a, <3 x half> <half 2.0, half 2.0, half 2.0>)
2417  %max1 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %max0, <3 x half> %c)
2418  ret <3 x half> %max1
2419}
2420
2421define <3 x half> @v_fmaximum3_v3f16__inlineimm2(<3 x half> %a, <3 x half> %b) {
2422; GFX12-LABEL: v_fmaximum3_v3f16__inlineimm2:
2423; GFX12:       ; %bb.0:
2424; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2425; GFX12-NEXT:    s_wait_expcnt 0x0
2426; GFX12-NEXT:    s_wait_samplecnt 0x0
2427; GFX12-NEXT:    s_wait_bvhcnt 0x0
2428; GFX12-NEXT:    s_wait_kmcnt 0x0
2429; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v2
2430; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v3
2431; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2432; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, 4.0 op_sel_hi:[1,0]
2433; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, 4.0
2434; GFX12-NEXT:    s_setpc_b64 s[30:31]
2435;
2436; GFX940-LABEL: v_fmaximum3_v3f16__inlineimm2:
2437; GFX940:       ; %bb.0:
2438; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2439; GFX940-NEXT:    v_pk_max_f16 v4, v0, v2
2440; GFX940-NEXT:    v_mov_b32_e32 v5, 0x7e00
2441; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
2442; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
2443; GFX940-NEXT:    s_nop 0
2444; GFX940-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
2445; GFX940-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
2446; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
2447; GFX940-NEXT:    v_pk_max_f16 v2, v1, v3
2448; GFX940-NEXT:    s_nop 0
2449; GFX940-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
2450; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
2451; GFX940-NEXT:    s_nop 1
2452; GFX940-NEXT:    v_cndmask_b32_e32 v4, v5, v2, vcc
2453; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
2454; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
2455; GFX940-NEXT:    s_nop 1
2456; GFX940-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
2457; GFX940-NEXT:    v_perm_b32 v1, v1, v4, s0
2458; GFX940-NEXT:    v_pk_max_f16 v1, v1, 4.0
2459; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v4, v4
2460; GFX940-NEXT:    v_perm_b32 v2, v0, v6, s0
2461; GFX940-NEXT:    v_pk_max_f16 v2, v2, 4.0 op_sel_hi:[1,0]
2462; GFX940-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
2463; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v6, v6
2464; GFX940-NEXT:    s_nop 1
2465; GFX940-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
2466; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
2467; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
2468; GFX940-NEXT:    s_nop 1
2469; GFX940-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
2470; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
2471; GFX940-NEXT:    s_setpc_b64 s[30:31]
2472;
2473; GFX950-LABEL: v_fmaximum3_v3f16__inlineimm2:
2474; GFX950:       ; %bb.0:
2475; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2476; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
2477; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
2478; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, 4.0, 4.0
2479; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, 4.0, 4.0 op_sel_hi:[1,0,0]
2480; GFX950-NEXT:    s_setpc_b64 s[30:31]
2481  %max0 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %a, <3 x half> %b)
2482  %max1 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %max0, <3 x half> <half 4.0, half 4.0, half 4.0>)
2483  ret <3 x half> %max1
2484}
2485
2486define <4 x half> @v_fmaximum3_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
2487; GFX12-LABEL: v_fmaximum3_v4f16:
2488; GFX12:       ; %bb.0:
2489; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2490; GFX12-NEXT:    s_wait_expcnt 0x0
2491; GFX12-NEXT:    s_wait_samplecnt 0x0
2492; GFX12-NEXT:    s_wait_bvhcnt 0x0
2493; GFX12-NEXT:    s_wait_kmcnt 0x0
2494; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v2
2495; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v3
2496; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2497; GFX12-NEXT:    v_pk_maximum_f16 v0, v4, v0
2498; GFX12-NEXT:    v_pk_maximum_f16 v1, v5, v1
2499; GFX12-NEXT:    s_setpc_b64 s[30:31]
2500;
2501; GFX940-LABEL: v_fmaximum3_v4f16:
2502; GFX940:       ; %bb.0:
2503; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2504; GFX940-NEXT:    v_pk_max_f16 v6, v0, v2
2505; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7e00
2506; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
2507; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
2508; GFX940-NEXT:    s_nop 0
2509; GFX940-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
2510; GFX940-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
2511; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
2512; GFX940-NEXT:    v_pk_max_f16 v2, v1, v3
2513; GFX940-NEXT:    s_nop 0
2514; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
2515; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
2516; GFX940-NEXT:    s_nop 1
2517; GFX940-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
2518; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
2519; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
2520; GFX940-NEXT:    s_nop 1
2521; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
2522; GFX940-NEXT:    v_perm_b32 v2, v1, v6, s0
2523; GFX940-NEXT:    v_pk_max_f16 v2, v5, v2
2524; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v5, v6
2525; GFX940-NEXT:    s_nop 1
2526; GFX940-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
2527; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
2528; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v1 src0_sel:WORD_1 src1_sel:DWORD
2529; GFX940-NEXT:    s_nop 1
2530; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
2531; GFX940-NEXT:    v_perm_b32 v2, v0, v8, s0
2532; GFX940-NEXT:    v_pk_max_f16 v2, v4, v2
2533; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v4, v8
2534; GFX940-NEXT:    v_perm_b32 v1, v1, v3, s0
2535; GFX940-NEXT:    s_nop 0
2536; GFX940-NEXT:    v_cndmask_b32_e32 v5, v7, v2, vcc
2537; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
2538; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
2539; GFX940-NEXT:    s_nop 1
2540; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
2541; GFX940-NEXT:    v_perm_b32 v0, v0, v5, s0
2542; GFX940-NEXT:    s_setpc_b64 s[30:31]
2543;
2544; GFX950-LABEL: v_fmaximum3_v4f16:
2545; GFX950:       ; %bb.0:
2546; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2547; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
2548; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
2549; GFX950-NEXT:    v_pk_maximum3_f16 v1, v5, v1, v1
2550; GFX950-NEXT:    v_pk_maximum3_f16 v0, v4, v0, v0
2551; GFX950-NEXT:    s_setpc_b64 s[30:31]
2552  %max0 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a, <4 x half> %b)
2553  %max1 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %c, <4 x half> %max0)
2554  ret <4 x half> %max1
2555}
2556
2557define <4 x half> @v_fmaximum3_v4f16_commute(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
2558; GFX12-LABEL: v_fmaximum3_v4f16_commute:
2559; GFX12:       ; %bb.0:
2560; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2561; GFX12-NEXT:    s_wait_expcnt 0x0
2562; GFX12-NEXT:    s_wait_samplecnt 0x0
2563; GFX12-NEXT:    s_wait_bvhcnt 0x0
2564; GFX12-NEXT:    s_wait_kmcnt 0x0
2565; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v2
2566; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v3
2567; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2568; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v4
2569; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v5
2570; GFX12-NEXT:    s_setpc_b64 s[30:31]
2571;
2572; GFX940-LABEL: v_fmaximum3_v4f16_commute:
2573; GFX940:       ; %bb.0:
2574; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2575; GFX940-NEXT:    v_pk_max_f16 v6, v0, v2
2576; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7e00
2577; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
2578; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
2579; GFX940-NEXT:    s_nop 0
2580; GFX940-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
2581; GFX940-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
2582; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
2583; GFX940-NEXT:    v_pk_max_f16 v2, v1, v3
2584; GFX940-NEXT:    s_nop 0
2585; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
2586; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
2587; GFX940-NEXT:    s_nop 1
2588; GFX940-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
2589; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
2590; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
2591; GFX940-NEXT:    s_nop 1
2592; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
2593; GFX940-NEXT:    v_perm_b32 v2, v1, v6, s0
2594; GFX940-NEXT:    v_pk_max_f16 v2, v2, v5
2595; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v6, v5
2596; GFX940-NEXT:    s_nop 1
2597; GFX940-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
2598; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
2599; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:DWORD src1_sel:WORD_1
2600; GFX940-NEXT:    s_nop 1
2601; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
2602; GFX940-NEXT:    v_perm_b32 v2, v0, v8, s0
2603; GFX940-NEXT:    v_pk_max_f16 v2, v2, v4
2604; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v8, v4
2605; GFX940-NEXT:    v_perm_b32 v1, v1, v3, s0
2606; GFX940-NEXT:    s_nop 0
2607; GFX940-NEXT:    v_cndmask_b32_e32 v5, v7, v2, vcc
2608; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
2609; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1
2610; GFX940-NEXT:    s_nop 1
2611; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
2612; GFX940-NEXT:    v_perm_b32 v0, v0, v5, s0
2613; GFX940-NEXT:    s_setpc_b64 s[30:31]
2614;
2615; GFX950-LABEL: v_fmaximum3_v4f16_commute:
2616; GFX950:       ; %bb.0:
2617; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2618; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
2619; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
2620; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v5, v5
2621; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v4, v4
2622; GFX950-NEXT:    s_setpc_b64 s[30:31]
2623  %max0 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a, <4 x half> %b)
2624  %max1 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %max0, <4 x half> %c)
2625  ret <4 x half> %max1
2626}
2627
2628define <4 x half> @v_fmaximum3_v4f16__fabs_all(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
2629; GFX12-LABEL: v_fmaximum3_v4f16__fabs_all:
2630; GFX12:       ; %bb.0:
2631; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2632; GFX12-NEXT:    s_wait_expcnt 0x0
2633; GFX12-NEXT:    s_wait_samplecnt 0x0
2634; GFX12-NEXT:    s_wait_bvhcnt 0x0
2635; GFX12-NEXT:    s_wait_kmcnt 0x0
2636; GFX12-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
2637; GFX12-NEXT:    v_and_b32_e32 v1, 0x7fff7fff, v1
2638; GFX12-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
2639; GFX12-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
2640; GFX12-NEXT:    v_and_b32_e32 v5, 0x7fff7fff, v5
2641; GFX12-NEXT:    v_and_b32_e32 v4, 0x7fff7fff, v4
2642; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
2643; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v2
2644; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v3
2645; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2646; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v4
2647; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v5
2648; GFX12-NEXT:    s_setpc_b64 s[30:31]
2649;
2650; GFX940-LABEL: v_fmaximum3_v4f16__fabs_all:
2651; GFX940:       ; %bb.0:
2652; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2653; GFX940-NEXT:    v_and_b32_e32 v7, 0x7fff7fff, v0
2654; GFX940-NEXT:    v_and_b32_e32 v9, 0x7fff7fff, v2
2655; GFX940-NEXT:    v_and_b32_e32 v6, 0x7fff7fff, v1
2656; GFX940-NEXT:    v_and_b32_e32 v8, 0x7fff7fff, v3
2657; GFX940-NEXT:    v_pk_max_f16 v7, v7, v9
2658; GFX940-NEXT:    v_mov_b32_e32 v12, 0x7e00
2659; GFX940-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
2660; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1
2661; GFX940-NEXT:    v_pk_max_f16 v6, v6, v8
2662; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
2663; GFX940-NEXT:    v_cndmask_b32_e32 v9, v12, v9, vcc
2664; GFX940-NEXT:    v_lshrrev_b32_e32 v8, 16, v6
2665; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1
2666; GFX940-NEXT:    v_and_b32_e32 v11, 0x7fff7fff, v5
2667; GFX940-NEXT:    v_and_b32_e32 v10, 0x7fff7fff, v4
2668; GFX940-NEXT:    v_cndmask_b32_e32 v8, v12, v8, vcc
2669; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v2|
2670; GFX940-NEXT:    s_nop 1
2671; GFX940-NEXT:    v_cndmask_b32_e32 v0, v12, v7, vcc
2672; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, |v1|, |v3|
2673; GFX940-NEXT:    s_nop 1
2674; GFX940-NEXT:    v_cndmask_b32_e32 v1, v12, v6, vcc
2675; GFX940-NEXT:    v_perm_b32 v2, v8, v1, s0
2676; GFX940-NEXT:    v_pk_max_f16 v2, v2, v11
2677; GFX940-NEXT:    v_perm_b32 v6, v9, v0, s0
2678; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
2679; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v8, |v5| src0_sel:DWORD src1_sel:WORD_1
2680; GFX940-NEXT:    v_pk_max_f16 v6, v6, v10
2681; GFX940-NEXT:    s_nop 0
2682; GFX940-NEXT:    v_cndmask_b32_e32 v3, v12, v3, vcc
2683; GFX940-NEXT:    v_lshrrev_b32_e32 v7, 16, v6
2684; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v9, |v4| src0_sel:DWORD src1_sel:WORD_1
2685; GFX940-NEXT:    s_nop 1
2686; GFX940-NEXT:    v_cndmask_b32_e32 v7, v12, v7, vcc
2687; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v1, |v5|
2688; GFX940-NEXT:    s_nop 1
2689; GFX940-NEXT:    v_cndmask_b32_e32 v1, v12, v2, vcc
2690; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v4|
2691; GFX940-NEXT:    v_perm_b32 v1, v3, v1, s0
2692; GFX940-NEXT:    s_nop 0
2693; GFX940-NEXT:    v_cndmask_b32_e32 v0, v12, v6, vcc
2694; GFX940-NEXT:    v_perm_b32 v0, v7, v0, s0
2695; GFX940-NEXT:    s_setpc_b64 s[30:31]
2696;
2697; GFX950-LABEL: v_fmaximum3_v4f16__fabs_all:
2698; GFX950:       ; %bb.0:
2699; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2700; GFX950-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
2701; GFX950-NEXT:    v_and_b32_e32 v1, 0x7fff7fff, v1
2702; GFX950-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
2703; GFX950-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
2704; GFX950-NEXT:    v_and_b32_e32 v5, 0x7fff7fff, v5
2705; GFX950-NEXT:    v_and_b32_e32 v4, 0x7fff7fff, v4
2706; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
2707; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
2708; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v5, v5
2709; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v4, v4
2710; GFX950-NEXT:    s_setpc_b64 s[30:31]
2711  %a.fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
2712  %b.fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %b)
2713  %c.fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %c)
2714  %max0 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a.fabs, <4 x half> %b.fabs)
2715  %max1 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %max0, <4 x half> %c.fabs)
2716  ret <4 x half> %max1
2717}
2718
2719define <4 x half> @v_fmaximum3_v4f16__fneg_all(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
2720; GFX12-LABEL: v_fmaximum3_v4f16__fneg_all:
2721; GFX12:       ; %bb.0:
2722; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2723; GFX12-NEXT:    s_wait_expcnt 0x0
2724; GFX12-NEXT:    s_wait_samplecnt 0x0
2725; GFX12-NEXT:    s_wait_bvhcnt 0x0
2726; GFX12-NEXT:    s_wait_kmcnt 0x0
2727; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
2728; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
2729; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2730; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1]
2731; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
2732; GFX12-NEXT:    s_setpc_b64 s[30:31]
2733;
2734; GFX940-LABEL: v_fmaximum3_v4f16__fneg_all:
2735; GFX940:       ; %bb.0:
2736; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2737; GFX940-NEXT:    v_pk_max_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
2738; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7e00
2739; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v2
2740; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
2741; GFX940-NEXT:    s_nop 0
2742; GFX940-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
2743; GFX940-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
2744; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1
2745; GFX940-NEXT:    v_pk_max_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
2746; GFX940-NEXT:    s_nop 0
2747; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
2748; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, -v1, -v3
2749; GFX940-NEXT:    s_nop 1
2750; GFX940-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
2751; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
2752; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1
2753; GFX940-NEXT:    s_nop 1
2754; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
2755; GFX940-NEXT:    v_perm_b32 v2, v1, v6, s0
2756; GFX940-NEXT:    v_pk_max_f16 v2, v2, v5 neg_lo:[0,1] neg_hi:[0,1]
2757; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v6, -v5
2758; GFX940-NEXT:    s_nop 1
2759; GFX940-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
2760; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
2761; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, -v5 src0_sel:DWORD src1_sel:WORD_1
2762; GFX940-NEXT:    s_nop 1
2763; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
2764; GFX940-NEXT:    v_perm_b32 v2, v0, v8, s0
2765; GFX940-NEXT:    v_pk_max_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1]
2766; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v8, -v4
2767; GFX940-NEXT:    v_perm_b32 v1, v1, v3, s0
2768; GFX940-NEXT:    s_nop 0
2769; GFX940-NEXT:    v_cndmask_b32_e32 v5, v7, v2, vcc
2770; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
2771; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1
2772; GFX940-NEXT:    s_nop 1
2773; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
2774; GFX940-NEXT:    v_perm_b32 v0, v0, v5, s0
2775; GFX940-NEXT:    s_setpc_b64 s[30:31]
2776;
2777; GFX950-LABEL: v_fmaximum3_v4f16__fneg_all:
2778; GFX950:       ; %bb.0:
2779; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2780; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3 neg_lo:[1,1,1] neg_hi:[1,1,1]
2781; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2 neg_lo:[1,1,1] neg_hi:[1,1,1]
2782; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v5, v5 neg_lo:[0,1,1] neg_hi:[0,1,1]
2783; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v4, v4 neg_lo:[0,1,1] neg_hi:[0,1,1]
2784; GFX950-NEXT:    s_setpc_b64 s[30:31]
2785  %a.fneg = fneg <4 x half> %a
2786  %b.fneg = fneg <4 x half> %b
2787  %c.fneg = fneg <4 x half> %c
2788  %max0 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a.fneg, <4 x half> %b.fneg)
2789  %max1 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %max0, <4 x half> %c.fneg)
2790  ret <4 x half> %max1
2791}
2792
2793define <4 x half> @v_fmaximum3_v4f16__inlineimm1(<4 x half> %a, <4 x half> %c) {
2794; GFX12-LABEL: v_fmaximum3_v4f16__inlineimm1:
2795; GFX12:       ; %bb.0:
2796; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2797; GFX12-NEXT:    s_wait_expcnt 0x0
2798; GFX12-NEXT:    s_wait_samplecnt 0x0
2799; GFX12-NEXT:    s_wait_bvhcnt 0x0
2800; GFX12-NEXT:    s_wait_kmcnt 0x0
2801; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, 2.0 op_sel_hi:[1,0]
2802; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, 2.0 op_sel_hi:[1,0]
2803; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2804; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v2
2805; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v3
2806; GFX12-NEXT:    s_setpc_b64 s[30:31]
2807;
2808; GFX940-LABEL: v_fmaximum3_v4f16__inlineimm1:
2809; GFX940:       ; %bb.0:
2810; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2811; GFX940-NEXT:    v_pk_max_f16 v4, v0, 2.0 op_sel_hi:[1,0]
2812; GFX940-NEXT:    v_mov_b32_e32 v6, 0x7e00
2813; GFX940-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
2814; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
2815; GFX940-NEXT:    v_pk_max_f16 v7, v1, 2.0 op_sel_hi:[1,0]
2816; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
2817; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
2818; GFX940-NEXT:    v_lshrrev_b32_e32 v8, 16, v7
2819; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v1 src0_sel:WORD_1 src1_sel:WORD_1
2820; GFX940-NEXT:    s_nop 1
2821; GFX940-NEXT:    v_cndmask_b32_e32 v8, v6, v8, vcc
2822; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
2823; GFX940-NEXT:    s_nop 1
2824; GFX940-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc
2825; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v1
2826; GFX940-NEXT:    s_nop 1
2827; GFX940-NEXT:    v_cndmask_b32_e32 v1, v6, v7, vcc
2828; GFX940-NEXT:    v_perm_b32 v4, v8, v1, s0
2829; GFX940-NEXT:    v_pk_max_f16 v4, v4, v3
2830; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v8, v3 src0_sel:DWORD src1_sel:WORD_1
2831; GFX940-NEXT:    v_perm_b32 v8, v5, v0, s0
2832; GFX940-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
2833; GFX940-NEXT:    v_pk_max_f16 v8, v8, v2
2834; GFX940-NEXT:    v_cndmask_b32_e32 v7, v6, v7, vcc
2835; GFX940-NEXT:    v_lshrrev_b32_e32 v9, 16, v8
2836; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1
2837; GFX940-NEXT:    s_nop 1
2838; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v9, vcc
2839; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
2840; GFX940-NEXT:    s_nop 1
2841; GFX940-NEXT:    v_cndmask_b32_e32 v1, v6, v4, vcc
2842; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
2843; GFX940-NEXT:    v_perm_b32 v1, v7, v1, s0
2844; GFX940-NEXT:    s_nop 0
2845; GFX940-NEXT:    v_cndmask_b32_e32 v0, v6, v8, vcc
2846; GFX940-NEXT:    v_perm_b32 v0, v5, v0, s0
2847; GFX940-NEXT:    s_setpc_b64 s[30:31]
2848;
2849; GFX950-LABEL: v_fmaximum3_v4f16__inlineimm1:
2850; GFX950:       ; %bb.0:
2851; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2852; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, 2.0, 2.0 op_sel_hi:[1,0,0]
2853; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, 2.0, 2.0 op_sel_hi:[1,0,0]
2854; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
2855; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
2856; GFX950-NEXT:    s_setpc_b64 s[30:31]
2857  %max0 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a, <4 x half> <half 2.0, half 2.0, half 2.0, half 2.0>)
2858  %max1 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %max0, <4 x half> %c)
2859  ret <4 x half> %max1
2860}
2861
2862define <4 x half> @v_fmaximum3_v4f16__inlineimm2(<4 x half> %a, <4 x half> %b) {
2863; GFX12-LABEL: v_fmaximum3_v4f16__inlineimm2:
2864; GFX12:       ; %bb.0:
2865; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2866; GFX12-NEXT:    s_wait_expcnt 0x0
2867; GFX12-NEXT:    s_wait_samplecnt 0x0
2868; GFX12-NEXT:    s_wait_bvhcnt 0x0
2869; GFX12-NEXT:    s_wait_kmcnt 0x0
2870; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v2
2871; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v3
2872; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2873; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, 4.0 op_sel_hi:[1,0]
2874; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, 4.0 op_sel_hi:[1,0]
2875; GFX12-NEXT:    s_setpc_b64 s[30:31]
2876;
2877; GFX940-LABEL: v_fmaximum3_v4f16__inlineimm2:
2878; GFX940:       ; %bb.0:
2879; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2880; GFX940-NEXT:    v_pk_max_f16 v4, v0, v2
2881; GFX940-NEXT:    v_mov_b32_e32 v5, 0x7e00
2882; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
2883; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
2884; GFX940-NEXT:    s_nop 0
2885; GFX940-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
2886; GFX940-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
2887; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
2888; GFX940-NEXT:    v_pk_max_f16 v2, v1, v3
2889; GFX940-NEXT:    s_nop 0
2890; GFX940-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
2891; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
2892; GFX940-NEXT:    s_nop 1
2893; GFX940-NEXT:    v_cndmask_b32_e32 v4, v5, v2, vcc
2894; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
2895; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
2896; GFX940-NEXT:    s_nop 1
2897; GFX940-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
2898; GFX940-NEXT:    v_perm_b32 v2, v1, v4, s0
2899; GFX940-NEXT:    v_pk_max_f16 v2, v2, 4.0 op_sel_hi:[1,0]
2900; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v4, v4
2901; GFX940-NEXT:    s_nop 1
2902; GFX940-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
2903; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
2904; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v1
2905; GFX940-NEXT:    s_nop 1
2906; GFX940-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
2907; GFX940-NEXT:    v_perm_b32 v2, v0, v6, s0
2908; GFX940-NEXT:    v_pk_max_f16 v2, v2, 4.0 op_sel_hi:[1,0]
2909; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v6, v6
2910; GFX940-NEXT:    v_perm_b32 v1, v1, v3, s0
2911; GFX940-NEXT:    s_nop 0
2912; GFX940-NEXT:    v_cndmask_b32_e32 v4, v5, v2, vcc
2913; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
2914; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
2915; GFX940-NEXT:    s_nop 1
2916; GFX940-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
2917; GFX940-NEXT:    v_perm_b32 v0, v0, v4, s0
2918; GFX940-NEXT:    s_setpc_b64 s[30:31]
2919;
2920; GFX950-LABEL: v_fmaximum3_v4f16__inlineimm2:
2921; GFX950:       ; %bb.0:
2922; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2923; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
2924; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
2925; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, 4.0, 4.0 op_sel_hi:[1,0,0]
2926; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, 4.0, 4.0 op_sel_hi:[1,0,0]
2927; GFX950-NEXT:    s_setpc_b64 s[30:31]
2928  %max0 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a, <4 x half> %b)
2929  %max1 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %max0, <4 x half> <half 4.0, half 4.0, half 4.0, half 4.0>)
2930  ret <4 x half> %max1
2931}
2932
2933define double @v_fmaximum3_f64(double %a, double %b, double %c) {
2934; GFX12-LABEL: v_fmaximum3_f64:
2935; GFX12:       ; %bb.0:
2936; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2937; GFX12-NEXT:    s_wait_expcnt 0x0
2938; GFX12-NEXT:    s_wait_samplecnt 0x0
2939; GFX12-NEXT:    s_wait_bvhcnt 0x0
2940; GFX12-NEXT:    s_wait_kmcnt 0x0
2941; GFX12-NEXT:    v_maximum_f64 v[0:1], v[0:1], v[2:3]
2942; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2943; GFX12-NEXT:    v_maximum_f64 v[0:1], v[0:1], v[4:5]
2944; GFX12-NEXT:    s_setpc_b64 s[30:31]
2945;
2946; GFX9-LABEL: v_fmaximum3_f64:
2947; GFX9:       ; %bb.0:
2948; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2949; GFX9-NEXT:    v_max_f64 v[6:7], v[0:1], v[2:3]
2950; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
2951; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
2952; GFX9-NEXT:    s_nop 1
2953; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
2954; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
2955; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
2956; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
2957; GFX9-NEXT:    s_nop 1
2958; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
2959; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
2960; GFX9-NEXT:    s_setpc_b64 s[30:31]
2961  %max0 = call double @llvm.maximum.f64(double %a, double %b)
2962  %max1 = call double @llvm.maximum.f64(double %max0, double %c)
2963  ret double %max1
2964}
2965
2966define double @v_fmaximum3_f64_commute(double %a, double %b, double %c) {
2967; GFX12-LABEL: v_fmaximum3_f64_commute:
2968; GFX12:       ; %bb.0:
2969; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2970; GFX12-NEXT:    s_wait_expcnt 0x0
2971; GFX12-NEXT:    s_wait_samplecnt 0x0
2972; GFX12-NEXT:    s_wait_bvhcnt 0x0
2973; GFX12-NEXT:    s_wait_kmcnt 0x0
2974; GFX12-NEXT:    v_maximum_f64 v[0:1], v[0:1], v[2:3]
2975; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2976; GFX12-NEXT:    v_maximum_f64 v[0:1], v[4:5], v[0:1]
2977; GFX12-NEXT:    s_setpc_b64 s[30:31]
2978;
2979; GFX9-LABEL: v_fmaximum3_f64_commute:
2980; GFX9:       ; %bb.0:
2981; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2982; GFX9-NEXT:    v_max_f64 v[6:7], v[0:1], v[2:3]
2983; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
2984; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
2985; GFX9-NEXT:    s_nop 1
2986; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
2987; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
2988; GFX9-NEXT:    v_max_f64 v[2:3], v[4:5], v[0:1]
2989; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[0:1]
2990; GFX9-NEXT:    s_nop 1
2991; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
2992; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
2993; GFX9-NEXT:    s_setpc_b64 s[30:31]
2994  %max0 = call double @llvm.maximum.f64(double %a, double %b)
2995  %max1 = call double @llvm.maximum.f64(double %c, double %max0)
2996  ret double %max1
2997}
2998
2999define amdgpu_ps <2 x i32> @s_fmaximum3_f64(double inreg %a, double inreg %b, double inreg %c) {
3000; GFX12-LABEL: s_fmaximum3_f64:
3001; GFX12:       ; %bb.0:
3002; GFX12-NEXT:    v_maximum_f64 v[0:1], s[0:1], s[2:3]
3003; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3004; GFX12-NEXT:    v_maximum_f64 v[0:1], v[0:1], s[4:5]
3005; GFX12-NEXT:    v_readfirstlane_b32 s0, v0
3006; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2)
3007; GFX12-NEXT:    v_readfirstlane_b32 s1, v1
3008; GFX12-NEXT:    ; return to shader part epilog
3009;
3010; GFX9-LABEL: s_fmaximum3_f64:
3011; GFX9:       ; %bb.0:
3012; GFX9-NEXT:    v_mov_b64_e32 v[0:1], s[2:3]
3013; GFX9-NEXT:    v_max_f64 v[2:3], s[0:1], v[0:1]
3014; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7ff80000
3015; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, s[0:1], v[0:1]
3016; GFX9-NEXT:    s_nop 1
3017; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
3018; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
3019; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], s[4:5]
3020; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, s[4:5], v[0:1]
3021; GFX9-NEXT:    s_nop 1
3022; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v4, vcc
3023; GFX9-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc
3024; GFX9-NEXT:    v_readfirstlane_b32 s1, v0
3025; GFX9-NEXT:    v_readfirstlane_b32 s0, v1
3026; GFX9-NEXT:    ; return to shader part epilog
3027  %max0 = call double @llvm.maximum.f64(double %a, double %b)
3028  %max1 = call double @llvm.maximum.f64(double %max0, double %c)
3029  %cast = bitcast double %max1 to <2 x i32>
3030  %elt0 = extractelement <2 x i32> %cast, i32 0
3031  %elt1 = extractelement <2 x i32> %cast, i32 1
3032  %readlane0 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt0)
3033  %readlane1 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt1)
3034  %insert.0 = insertelement <2 x i32> poison, i32 %readlane0, i32 0
3035  %insert.1 = insertelement <2 x i32> %insert.0, i32 %readlane1, i32 1
3036  ret <2 x i32> %insert.1
3037}
3038
3039define double @v_fmaximum3_f64_fabs0(double %a, double %b, double %c) {
3040; GFX12-LABEL: v_fmaximum3_f64_fabs0:
3041; GFX12:       ; %bb.0:
3042; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
3043; GFX12-NEXT:    s_wait_expcnt 0x0
3044; GFX12-NEXT:    s_wait_samplecnt 0x0
3045; GFX12-NEXT:    s_wait_bvhcnt 0x0
3046; GFX12-NEXT:    s_wait_kmcnt 0x0
3047; GFX12-NEXT:    v_maximum_f64 v[0:1], |v[0:1]|, v[2:3]
3048; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3049; GFX12-NEXT:    v_maximum_f64 v[0:1], v[0:1], v[4:5]
3050; GFX12-NEXT:    s_setpc_b64 s[30:31]
3051;
3052; GFX9-LABEL: v_fmaximum3_f64_fabs0:
3053; GFX9:       ; %bb.0:
3054; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3055; GFX9-NEXT:    v_max_f64 v[6:7], |v[0:1]|, v[2:3]
3056; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
3057; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, |v[0:1]|, v[2:3]
3058; GFX9-NEXT:    s_nop 1
3059; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
3060; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
3061; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
3062; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
3063; GFX9-NEXT:    s_nop 1
3064; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
3065; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
3066; GFX9-NEXT:    s_setpc_b64 s[30:31]
3067  %a.fabs = call double @llvm.fabs.f64(double %a)
3068  %max0 = call double @llvm.maximum.f64(double %a.fabs, double %b)
3069  %max1 = call double @llvm.maximum.f64(double %max0, double %c)
3070  ret double %max1
3071}
3072
3073define double @v_fmaximum3_f64_fabs1(double %a, double %b, double %c) {
3074; GFX12-LABEL: v_fmaximum3_f64_fabs1:
3075; GFX12:       ; %bb.0:
3076; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
3077; GFX12-NEXT:    s_wait_expcnt 0x0
3078; GFX12-NEXT:    s_wait_samplecnt 0x0
3079; GFX12-NEXT:    s_wait_bvhcnt 0x0
3080; GFX12-NEXT:    s_wait_kmcnt 0x0
3081; GFX12-NEXT:    v_maximum_f64 v[0:1], v[0:1], |v[2:3]|
3082; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3083; GFX12-NEXT:    v_maximum_f64 v[0:1], v[0:1], v[4:5]
3084; GFX12-NEXT:    s_setpc_b64 s[30:31]
3085;
3086; GFX9-LABEL: v_fmaximum3_f64_fabs1:
3087; GFX9:       ; %bb.0:
3088; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3089; GFX9-NEXT:    v_max_f64 v[6:7], v[0:1], |v[2:3]|
3090; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
3091; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], |v[2:3]|
3092; GFX9-NEXT:    s_nop 1
3093; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
3094; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
3095; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
3096; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
3097; GFX9-NEXT:    s_nop 1
3098; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
3099; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
3100; GFX9-NEXT:    s_setpc_b64 s[30:31]
3101  %b.fabs = call double @llvm.fabs.f64(double %b)
3102  %max0 = call double @llvm.maximum.f64(double %a, double %b.fabs)
3103  %max1 = call double @llvm.maximum.f64(double %max0, double %c)
3104  ret double %max1
3105}
3106
3107define double @v_fmaximum3_f64_fabs2(double %a, double %b, double %c) {
3108; GFX12-LABEL: v_fmaximum3_f64_fabs2:
3109; GFX12:       ; %bb.0:
3110; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
3111; GFX12-NEXT:    s_wait_expcnt 0x0
3112; GFX12-NEXT:    s_wait_samplecnt 0x0
3113; GFX12-NEXT:    s_wait_bvhcnt 0x0
3114; GFX12-NEXT:    s_wait_kmcnt 0x0
3115; GFX12-NEXT:    v_maximum_f64 v[0:1], v[0:1], v[2:3]
3116; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3117; GFX12-NEXT:    v_maximum_f64 v[0:1], v[0:1], |v[4:5]|
3118; GFX12-NEXT:    s_setpc_b64 s[30:31]
3119;
3120; GFX9-LABEL: v_fmaximum3_f64_fabs2:
3121; GFX9:       ; %bb.0:
3122; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3123; GFX9-NEXT:    v_max_f64 v[6:7], v[0:1], v[2:3]
3124; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
3125; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
3126; GFX9-NEXT:    s_nop 1
3127; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
3128; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
3129; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], |v[4:5]|
3130; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]|
3131; GFX9-NEXT:    s_nop 1
3132; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
3133; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
3134; GFX9-NEXT:    s_setpc_b64 s[30:31]
3135  %c.fabs = call double @llvm.fabs.f64(double %c)
3136  %max0 = call double @llvm.maximum.f64(double %a, double %b)
3137  %max1 = call double @llvm.maximum.f64(double %max0, double %c.fabs)
3138  ret double %max1
3139}
3140
3141define double @v_fmaximum3_f64_fabs_all(double %a, double %b, double %c) {
3142; GFX12-LABEL: v_fmaximum3_f64_fabs_all:
3143; GFX12:       ; %bb.0:
3144; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
3145; GFX12-NEXT:    s_wait_expcnt 0x0
3146; GFX12-NEXT:    s_wait_samplecnt 0x0
3147; GFX12-NEXT:    s_wait_bvhcnt 0x0
3148; GFX12-NEXT:    s_wait_kmcnt 0x0
3149; GFX12-NEXT:    v_maximum_f64 v[0:1], |v[0:1]|, |v[2:3]|
3150; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3151; GFX12-NEXT:    v_maximum_f64 v[0:1], v[0:1], |v[4:5]|
3152; GFX12-NEXT:    s_setpc_b64 s[30:31]
3153;
3154; GFX9-LABEL: v_fmaximum3_f64_fabs_all:
3155; GFX9:       ; %bb.0:
3156; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3157; GFX9-NEXT:    v_max_f64 v[6:7], |v[0:1]|, |v[2:3]|
3158; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
3159; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[2:3]|
3160; GFX9-NEXT:    s_nop 1
3161; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
3162; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
3163; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], |v[4:5]|
3164; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]|
3165; GFX9-NEXT:    s_nop 1
3166; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
3167; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
3168; GFX9-NEXT:    s_setpc_b64 s[30:31]
3169  %a.fabs = call double @llvm.fabs.f64(double %a)
3170  %b.fabs = call double @llvm.fabs.f64(double %b)
3171  %c.fabs = call double @llvm.fabs.f64(double %c)
3172  %max0 = call double @llvm.maximum.f64(double %a.fabs, double %b.fabs)
3173  %max1 = call double @llvm.maximum.f64(double %max0, double %c.fabs)
3174  ret double %max1
3175}
3176
3177define double @v_fmaximum3_f64_fneg_all(double %a, double %b, double %c) {
3178; GFX12-LABEL: v_fmaximum3_f64_fneg_all:
3179; GFX12:       ; %bb.0:
3180; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
3181; GFX12-NEXT:    s_wait_expcnt 0x0
3182; GFX12-NEXT:    s_wait_samplecnt 0x0
3183; GFX12-NEXT:    s_wait_bvhcnt 0x0
3184; GFX12-NEXT:    s_wait_kmcnt 0x0
3185; GFX12-NEXT:    v_maximum_f64 v[0:1], -v[0:1], -v[2:3]
3186; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3187; GFX12-NEXT:    v_maximum_f64 v[0:1], v[0:1], -v[4:5]
3188; GFX12-NEXT:    s_setpc_b64 s[30:31]
3189;
3190; GFX9-LABEL: v_fmaximum3_f64_fneg_all:
3191; GFX9:       ; %bb.0:
3192; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3193; GFX9-NEXT:    v_max_f64 v[6:7], -v[0:1], -v[2:3]
3194; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
3195; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, -v[0:1], -v[2:3]
3196; GFX9-NEXT:    s_nop 1
3197; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
3198; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
3199; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], -v[4:5]
3200; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], -v[4:5]
3201; GFX9-NEXT:    s_nop 1
3202; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
3203; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
3204; GFX9-NEXT:    s_setpc_b64 s[30:31]
3205  %a.fneg = fneg double %a
3206  %b.fneg = fneg double %b
3207  %c.fneg = fneg double %c
3208  %max0 = call double @llvm.maximum.f64(double %a.fneg, double %b.fneg)
3209  %max1 = call double @llvm.maximum.f64(double %max0, double %c.fneg)
3210  ret double %max1
3211}
3212
3213define double @v_fmaximum3_f64_fneg_fabs_all(double %a, double %b, double %c) {
3214; GFX12-LABEL: v_fmaximum3_f64_fneg_fabs_all:
3215; GFX12:       ; %bb.0:
3216; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
3217; GFX12-NEXT:    s_wait_expcnt 0x0
3218; GFX12-NEXT:    s_wait_samplecnt 0x0
3219; GFX12-NEXT:    s_wait_bvhcnt 0x0
3220; GFX12-NEXT:    s_wait_kmcnt 0x0
3221; GFX12-NEXT:    v_maximum_f64 v[0:1], -|v[0:1]|, -|v[2:3]|
3222; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3223; GFX12-NEXT:    v_maximum_f64 v[0:1], v[0:1], -|v[4:5]|
3224; GFX12-NEXT:    s_setpc_b64 s[30:31]
3225;
3226; GFX9-LABEL: v_fmaximum3_f64_fneg_fabs_all:
3227; GFX9:       ; %bb.0:
3228; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3229; GFX9-NEXT:    v_max_f64 v[6:7], -|v[0:1]|, -|v[2:3]|
3230; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
3231; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, -|v[0:1]|, -|v[2:3]|
3232; GFX9-NEXT:    s_nop 1
3233; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
3234; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
3235; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], -|v[4:5]|
3236; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], -|v[4:5]|
3237; GFX9-NEXT:    s_nop 1
3238; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
3239; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
3240; GFX9-NEXT:    s_setpc_b64 s[30:31]
3241  %a.fabs = call double @llvm.fabs.f64(double %a)
3242  %b.fabs = call double @llvm.fabs.f64(double %b)
3243  %c.fabs = call double @llvm.fabs.f64(double %c)
3244  %a.fneg.fabs = fneg double %a.fabs
3245  %b.fneg.fabs = fneg double %b.fabs
3246  %c.fneg.fabs = fneg double %c.fabs
3247  %max0 = call double @llvm.maximum.f64(double %a.fneg.fabs, double %b.fneg.fabs)
3248  %max1 = call double @llvm.maximum.f64(double %max0, double %c.fneg.fabs)
3249  ret double %max1
3250}
3251
3252define double @v_fmaximum3_f64_fneg0(double %a, double %b, double %c) {
3253; GFX12-LABEL: v_fmaximum3_f64_fneg0:
3254; GFX12:       ; %bb.0:
3255; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
3256; GFX12-NEXT:    s_wait_expcnt 0x0
3257; GFX12-NEXT:    s_wait_samplecnt 0x0
3258; GFX12-NEXT:    s_wait_bvhcnt 0x0
3259; GFX12-NEXT:    s_wait_kmcnt 0x0
3260; GFX12-NEXT:    v_maximum_f64 v[0:1], -v[0:1], v[2:3]
3261; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3262; GFX12-NEXT:    v_maximum_f64 v[0:1], v[0:1], v[4:5]
3263; GFX12-NEXT:    s_setpc_b64 s[30:31]
3264;
3265; GFX9-LABEL: v_fmaximum3_f64_fneg0:
3266; GFX9:       ; %bb.0:
3267; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3268; GFX9-NEXT:    v_max_f64 v[6:7], -v[0:1], v[2:3]
3269; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
3270; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, -v[0:1], v[2:3]
3271; GFX9-NEXT:    s_nop 1
3272; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
3273; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
3274; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
3275; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
3276; GFX9-NEXT:    s_nop 1
3277; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
3278; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
3279; GFX9-NEXT:    s_setpc_b64 s[30:31]
3280  %a.fneg = fneg double %a
3281  %max0 = call double @llvm.maximum.f64(double %a.fneg, double %b)
3282  %max1 = call double @llvm.maximum.f64(double %max0, double %c)
3283  ret double %max1
3284}
3285
3286define double @v_fmaximum3_f64_fneg1(double %a, double %b, double %c) {
3287; GFX12-LABEL: v_fmaximum3_f64_fneg1:
3288; GFX12:       ; %bb.0:
3289; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
3290; GFX12-NEXT:    s_wait_expcnt 0x0
3291; GFX12-NEXT:    s_wait_samplecnt 0x0
3292; GFX12-NEXT:    s_wait_bvhcnt 0x0
3293; GFX12-NEXT:    s_wait_kmcnt 0x0
3294; GFX12-NEXT:    v_maximum_f64 v[0:1], v[0:1], -v[2:3]
3295; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3296; GFX12-NEXT:    v_maximum_f64 v[0:1], v[0:1], v[4:5]
3297; GFX12-NEXT:    s_setpc_b64 s[30:31]
3298;
3299; GFX9-LABEL: v_fmaximum3_f64_fneg1:
3300; GFX9:       ; %bb.0:
3301; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3302; GFX9-NEXT:    v_max_f64 v[6:7], v[0:1], -v[2:3]
3303; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
3304; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], -v[2:3]
3305; GFX9-NEXT:    s_nop 1
3306; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
3307; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
3308; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
3309; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
3310; GFX9-NEXT:    s_nop 1
3311; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
3312; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
3313; GFX9-NEXT:    s_setpc_b64 s[30:31]
3314  %b.fneg = fneg double %b
3315  %max0 = call double @llvm.maximum.f64(double %a, double %b.fneg)
3316  %max1 = call double @llvm.maximum.f64(double %max0, double %c)
3317  ret double %max1
3318}
3319
3320define double @v_fmaximum3_f64_fneg2(double %a, double %b, double %c) {
3321; GFX12-LABEL: v_fmaximum3_f64_fneg2:
3322; GFX12:       ; %bb.0:
3323; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
3324; GFX12-NEXT:    s_wait_expcnt 0x0
3325; GFX12-NEXT:    s_wait_samplecnt 0x0
3326; GFX12-NEXT:    s_wait_bvhcnt 0x0
3327; GFX12-NEXT:    s_wait_kmcnt 0x0
3328; GFX12-NEXT:    v_maximum_f64 v[0:1], v[0:1], v[2:3]
3329; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3330; GFX12-NEXT:    v_maximum_f64 v[0:1], v[0:1], -v[4:5]
3331; GFX12-NEXT:    s_setpc_b64 s[30:31]
3332;
3333; GFX9-LABEL: v_fmaximum3_f64_fneg2:
3334; GFX9:       ; %bb.0:
3335; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3336; GFX9-NEXT:    v_max_f64 v[6:7], v[0:1], v[2:3]
3337; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
3338; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
3339; GFX9-NEXT:    s_nop 1
3340; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
3341; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
3342; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], -v[4:5]
3343; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], -v[4:5]
3344; GFX9-NEXT:    s_nop 1
3345; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
3346; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
3347; GFX9-NEXT:    s_setpc_b64 s[30:31]
3348  %c.fneg = fneg double %c
3349  %max0 = call double @llvm.maximum.f64(double %a, double %b)
3350  %max1 = call double @llvm.maximum.f64(double %max0, double %c.fneg)
3351  ret double %max1
3352}
3353
3354define double @v_fmaximum3_f64_const0(double %b, double %c) {
3355; GFX12-LABEL: v_fmaximum3_f64_const0:
3356; GFX12:       ; %bb.0:
3357; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
3358; GFX12-NEXT:    s_wait_expcnt 0x0
3359; GFX12-NEXT:    s_wait_samplecnt 0x0
3360; GFX12-NEXT:    s_wait_bvhcnt 0x0
3361; GFX12-NEXT:    s_wait_kmcnt 0x0
3362; GFX12-NEXT:    v_maximum_f64 v[0:1], 0x40200000, v[0:1]
3363; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3364; GFX12-NEXT:    v_maximum_f64 v[0:1], v[0:1], v[2:3]
3365; GFX12-NEXT:    s_setpc_b64 s[30:31]
3366;
3367; GFX9-LABEL: v_fmaximum3_f64_const0:
3368; GFX9:       ; %bb.0:
3369; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3370; GFX9-NEXT:    s_mov_b32 s0, 0
3371; GFX9-NEXT:    s_mov_b32 s1, 0x40200000
3372; GFX9-NEXT:    v_max_f64 v[4:5], v[0:1], s[0:1]
3373; GFX9-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
3374; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
3375; GFX9-NEXT:    s_nop 1
3376; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
3377; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
3378; GFX9-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
3379; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
3380; GFX9-NEXT:    s_nop 1
3381; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
3382; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
3383; GFX9-NEXT:    s_setpc_b64 s[30:31]
3384  %max0 = call double @llvm.maximum.f64(double 8.0, double %b)
3385  %max1 = call double @llvm.maximum.f64(double %max0, double %c)
3386  ret double %max1
3387}
3388
3389define double @v_fmaximum3_f64__const2(double %a, double %b) {
3390; GFX12-LABEL: v_fmaximum3_f64__const2:
3391; GFX12:       ; %bb.0:
3392; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
3393; GFX12-NEXT:    s_wait_expcnt 0x0
3394; GFX12-NEXT:    s_wait_samplecnt 0x0
3395; GFX12-NEXT:    s_wait_bvhcnt 0x0
3396; GFX12-NEXT:    s_wait_kmcnt 0x0
3397; GFX12-NEXT:    v_maximum_f64 v[0:1], v[0:1], v[2:3]
3398; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3399; GFX12-NEXT:    v_maximum_f64 v[0:1], 0x40200000, v[0:1]
3400; GFX12-NEXT:    s_setpc_b64 s[30:31]
3401;
3402; GFX9-LABEL: v_fmaximum3_f64__const2:
3403; GFX9:       ; %bb.0:
3404; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3405; GFX9-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
3406; GFX9-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
3407; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
3408; GFX9-NEXT:    s_mov_b32 s0, 0
3409; GFX9-NEXT:    s_mov_b32 s1, 0x40200000
3410; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
3411; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
3412; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], s[0:1]
3413; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
3414; GFX9-NEXT:    s_nop 1
3415; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
3416; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v6, vcc
3417; GFX9-NEXT:    s_setpc_b64 s[30:31]
3418  %max0 = call double @llvm.maximum.f64(double %a, double %b)
3419  %max1 = call double @llvm.maximum.f64(double %max0, double 8.0)
3420  ret double %max1
3421}
3422
3423define double @v_fmaximum3_f64_inlineimm0(double %b, double %c) {
3424; GFX12-LABEL: v_fmaximum3_f64_inlineimm0:
3425; GFX12:       ; %bb.0:
3426; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
3427; GFX12-NEXT:    s_wait_expcnt 0x0
3428; GFX12-NEXT:    s_wait_samplecnt 0x0
3429; GFX12-NEXT:    s_wait_bvhcnt 0x0
3430; GFX12-NEXT:    s_wait_kmcnt 0x0
3431; GFX12-NEXT:    v_maximum_f64 v[0:1], v[0:1], 4.0
3432; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3433; GFX12-NEXT:    v_maximum_f64 v[0:1], v[0:1], v[2:3]
3434; GFX12-NEXT:    s_setpc_b64 s[30:31]
3435;
3436; GFX9-LABEL: v_fmaximum3_f64_inlineimm0:
3437; GFX9:       ; %bb.0:
3438; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3439; GFX9-NEXT:    v_max_f64 v[4:5], v[0:1], 4.0
3440; GFX9-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
3441; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
3442; GFX9-NEXT:    s_nop 1
3443; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
3444; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
3445; GFX9-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
3446; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
3447; GFX9-NEXT:    s_nop 1
3448; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
3449; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
3450; GFX9-NEXT:    s_setpc_b64 s[30:31]
3451  %max0 = call double @llvm.maximum.f64(double 4.0, double %b)
3452  %max1 = call double @llvm.maximum.f64(double %max0, double %c)
3453  ret double %max1
3454}
3455
3456define double @v_fmaximum3_f64__inlineimm(double %a, double %b) {
3457; GFX12-LABEL: v_fmaximum3_f64__inlineimm:
3458; GFX12:       ; %bb.0:
3459; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
3460; GFX12-NEXT:    s_wait_expcnt 0x0
3461; GFX12-NEXT:    s_wait_samplecnt 0x0
3462; GFX12-NEXT:    s_wait_bvhcnt 0x0
3463; GFX12-NEXT:    s_wait_kmcnt 0x0
3464; GFX12-NEXT:    v_maximum_f64 v[0:1], v[0:1], v[2:3]
3465; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3466; GFX12-NEXT:    v_maximum_f64 v[0:1], v[0:1], 4.0
3467; GFX12-NEXT:    s_setpc_b64 s[30:31]
3468;
3469; GFX9-LABEL: v_fmaximum3_f64__inlineimm:
3470; GFX9:       ; %bb.0:
3471; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3472; GFX9-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
3473; GFX9-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
3474; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
3475; GFX9-NEXT:    s_nop 1
3476; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
3477; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
3478; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], 4.0
3479; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
3480; GFX9-NEXT:    s_nop 1
3481; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
3482; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v6, vcc
3483; GFX9-NEXT:    s_setpc_b64 s[30:31]
3484  %max0 = call double @llvm.maximum.f64(double %a, double %b)
3485  %max1 = call double @llvm.maximum.f64(double %max0, double 4.0)
3486  ret double %max1
3487}
3488
3489define double @v_fmaximum3_f64_const1_const2(double %a) {
3490; GFX12-LABEL: v_fmaximum3_f64_const1_const2:
3491; GFX12:       ; %bb.0:
3492; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
3493; GFX12-NEXT:    s_wait_expcnt 0x0
3494; GFX12-NEXT:    s_wait_samplecnt 0x0
3495; GFX12-NEXT:    s_wait_bvhcnt 0x0
3496; GFX12-NEXT:    s_wait_kmcnt 0x0
3497; GFX12-NEXT:    v_maximum_f64 v[0:1], 0x40200000, v[0:1]
3498; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3499; GFX12-NEXT:    v_maximum_f64 v[0:1], 0x40300000, v[0:1]
3500; GFX12-NEXT:    s_setpc_b64 s[30:31]
3501;
3502; GFX9-LABEL: v_fmaximum3_f64_const1_const2:
3503; GFX9:       ; %bb.0:
3504; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3505; GFX9-NEXT:    s_mov_b32 s0, 0
3506; GFX9-NEXT:    s_mov_b32 s1, 0x40200000
3507; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], s[0:1]
3508; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7ff80000
3509; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
3510; GFX9-NEXT:    s_mov_b32 s0, 0
3511; GFX9-NEXT:    s_mov_b32 s1, 0x40300000
3512; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
3513; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
3514; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], s[0:1]
3515; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
3516; GFX9-NEXT:    s_nop 1
3517; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
3518; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
3519; GFX9-NEXT:    s_setpc_b64 s[30:31]
3520  %max0 = call double @llvm.maximum.f64(double %a, double 8.0)
3521  %max1 = call double @llvm.maximum.f64(double %max0, double 16.0)
3522  ret double %max1
3523}
3524
3525define <2 x float> @v_no_fmaximum3_f32__multi_use(float %a, float %b, float %c) {
3526; GFX12-LABEL: v_no_fmaximum3_f32__multi_use:
3527; GFX12:       ; %bb.0:
3528; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
3529; GFX12-NEXT:    s_wait_expcnt 0x0
3530; GFX12-NEXT:    s_wait_samplecnt 0x0
3531; GFX12-NEXT:    s_wait_bvhcnt 0x0
3532; GFX12-NEXT:    s_wait_kmcnt 0x0
3533; GFX12-NEXT:    v_maximum_f32 v0, v0, v1
3534; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3535; GFX12-NEXT:    v_maximum_f32 v1, v0, v2
3536; GFX12-NEXT:    s_setpc_b64 s[30:31]
3537;
3538; GFX940-LABEL: v_no_fmaximum3_f32__multi_use:
3539; GFX940:       ; %bb.0:
3540; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3541; GFX940-NEXT:    v_max_f32_e32 v3, v0, v1
3542; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
3543; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
3544; GFX940-NEXT:    s_nop 1
3545; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
3546; GFX940-NEXT:    v_max_f32_e32 v1, v0, v2
3547; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
3548; GFX940-NEXT:    s_nop 1
3549; GFX940-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
3550; GFX940-NEXT:    s_setpc_b64 s[30:31]
3551;
3552; GFX950-LABEL: v_no_fmaximum3_f32__multi_use:
3553; GFX950:       ; %bb.0:
3554; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3555; GFX950-NEXT:    v_maximum3_f32 v0, v0, v1, v1
3556; GFX950-NEXT:    v_maximum3_f32 v1, v0, v2, v2
3557; GFX950-NEXT:    s_setpc_b64 s[30:31]
3558  %max0 = call float @llvm.maximum.f32(float %a, float %b)
3559  %max1 = call float @llvm.maximum.f32(float %max0, float %c)
3560  %insert.0 = insertelement <2 x float> poison, float %max0, i32 0
3561  %insert.1 = insertelement <2 x float> %insert.0, float %max1, i32 1
3562  ret <2 x float> %insert.1
3563}
3564
3565define amdgpu_ps <2 x i32> @s_no_fmaximum3_f32__multi_use(float inreg %a, float inreg %b, float inreg %c) {
3566; GFX12-LABEL: s_no_fmaximum3_f32__multi_use:
3567; GFX12:       ; %bb.0:
3568; GFX12-NEXT:    s_maximum_f32 s0, s0, s1
3569; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
3570; GFX12-NEXT:    s_maximum_f32 s1, s0, s2
3571; GFX12-NEXT:    ; return to shader part epilog
3572;
3573; GFX940-LABEL: s_no_fmaximum3_f32__multi_use:
3574; GFX940:       ; %bb.0:
3575; GFX940-NEXT:    v_mov_b32_e32 v0, s1
3576; GFX940-NEXT:    v_max_f32_e32 v1, s0, v0
3577; GFX940-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
3578; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, s0, v0
3579; GFX940-NEXT:    s_nop 1
3580; GFX940-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
3581; GFX940-NEXT:    v_max_f32_e32 v1, s2, v0
3582; GFX940-NEXT:    v_cmp_o_f32_e32 vcc, s2, v0
3583; GFX940-NEXT:    v_readfirstlane_b32 s0, v0
3584; GFX940-NEXT:    s_nop 0
3585; GFX940-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
3586; GFX940-NEXT:    s_nop 0
3587; GFX940-NEXT:    v_readfirstlane_b32 s1, v1
3588; GFX940-NEXT:    ; return to shader part epilog
3589;
3590; GFX950-LABEL: s_no_fmaximum3_f32__multi_use:
3591; GFX950:       ; %bb.0:
3592; GFX950-NEXT:    v_mov_b32_e32 v0, s0
3593; GFX950-NEXT:    v_maximum3_f32 v0, v0, s1, s1
3594; GFX950-NEXT:    v_maximum3_f32 v1, v0, s2, s2
3595; GFX950-NEXT:    v_readfirstlane_b32 s0, v0
3596; GFX950-NEXT:    v_readfirstlane_b32 s1, v1
3597; GFX950-NEXT:    ; return to shader part epilog
3598  %max0 = call float @llvm.maximum.f32(float %a, float %b)
3599  %max1 = call float @llvm.maximum.f32(float %max0, float %c)
3600  %cast0 = bitcast float %max0 to i32
3601  %cast1 = bitcast float %max1 to i32
3602  %readfirstlane0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast0)
3603  %readfirstlane1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast1)
3604  %insert.0 = insertelement <2 x i32> poison, i32 %readfirstlane0, i32 0
3605  %insert.1 = insertelement <2 x i32> %insert.0, i32 %readfirstlane1, i32 1
3606  ret <2 x i32> %insert.1
3607}
3608
3609define <2 x half> @v_no_fmaximum3_f16__multi_use(half %a, half %b, half %c) {
3610; GFX12-LABEL: v_no_fmaximum3_f16__multi_use:
3611; GFX12:       ; %bb.0:
3612; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
3613; GFX12-NEXT:    s_wait_expcnt 0x0
3614; GFX12-NEXT:    s_wait_samplecnt 0x0
3615; GFX12-NEXT:    s_wait_bvhcnt 0x0
3616; GFX12-NEXT:    s_wait_kmcnt 0x0
3617; GFX12-NEXT:    v_maximum_f16 v0, v0, v1
3618; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3619; GFX12-NEXT:    v_maximum_f16 v1, v0, v2
3620; GFX12-NEXT:    v_pack_b32_f16 v0, v0, v1
3621; GFX12-NEXT:    s_setpc_b64 s[30:31]
3622;
3623; GFX9-LABEL: v_no_fmaximum3_f16__multi_use:
3624; GFX9:       ; %bb.0:
3625; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3626; GFX9-NEXT:    v_max_f16_e32 v3, v0, v1
3627; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7e00
3628; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
3629; GFX9-NEXT:    s_nop 1
3630; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
3631; GFX9-NEXT:    v_max_f16_e32 v1, v0, v2
3632; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
3633; GFX9-NEXT:    s_nop 1
3634; GFX9-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
3635; GFX9-NEXT:    v_pack_b32_f16 v0, v0, v1
3636; GFX9-NEXT:    s_setpc_b64 s[30:31]
3637  %max0 = call half @llvm.maximum.f16(half %a, half %b)
3638  %max1 = call half @llvm.maximum.f16(half %max0, half %c)
3639  %insert.0 = insertelement <2 x half> poison, half %max0, i32 0
3640  %insert.1 = insertelement <2 x half> %insert.0, half %max1, i32 1
3641  ret <2 x half> %insert.1
3642}
3643
3644define amdgpu_ps <2 x i32> @s_no_fmaximum3_f16__multi_use(half inreg %a, half inreg %b, half inreg %c) {
3645; GFX12-LABEL: s_no_fmaximum3_f16__multi_use:
3646; GFX12:       ; %bb.0:
3647; GFX12-NEXT:    s_maximum_f16 s0, s0, s1
3648; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
3649; GFX12-NEXT:    s_maximum_f16 s1, s0, s2
3650; GFX12-NEXT:    s_and_b32 s0, 0xffff, s0
3651; GFX12-NEXT:    s_and_b32 s1, 0xffff, s1
3652; GFX12-NEXT:    ; return to shader part epilog
3653;
3654; GFX9-LABEL: s_no_fmaximum3_f16__multi_use:
3655; GFX9:       ; %bb.0:
3656; GFX9-NEXT:    v_mov_b32_e32 v0, s1
3657; GFX9-NEXT:    v_max_f16_e32 v1, s0, v0
3658; GFX9-NEXT:    v_mov_b32_e32 v2, 0x7e00
3659; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, s0, v0
3660; GFX9-NEXT:    s_nop 1
3661; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
3662; GFX9-NEXT:    v_max_f16_e32 v1, s2, v0
3663; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, s2, v0
3664; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
3665; GFX9-NEXT:    s_nop 0
3666; GFX9-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
3667; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
3668; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
3669; GFX9-NEXT:    v_readfirstlane_b32 s1, v1
3670; GFX9-NEXT:    ; return to shader part epilog
3671  %max0 = call half @llvm.maximum.f16(half %a, half %b)
3672  %max1 = call half @llvm.maximum.f16(half %max0, half %c)
3673  %cast0 = bitcast half %max0 to i16
3674  %cast1 = bitcast half %max1 to i16
3675  %ext0 = zext i16 %cast0 to i32
3676  %ext1 = zext i16 %cast1 to i32
3677  %readfirstlane0 = call i32 @llvm.amdgcn.readfirstlane(i32 %ext0)
3678  %readfirstlane1 = call i32 @llvm.amdgcn.readfirstlane(i32 %ext1)
3679  %insert.0 = insertelement <2 x i32> poison, i32 %readfirstlane0, i32 0
3680  %insert.1 = insertelement <2 x i32> %insert.0, i32 %readfirstlane1, i32 1
3681  ret <2 x i32> %insert.1
3682}
3683
3684define <4 x half> @v_no_fmaximum3_v2f16__multi_use(<2 x half> %a, <2 x half> %b, <2 x half> %c) {
3685; GFX12-LABEL: v_no_fmaximum3_v2f16__multi_use:
3686; GFX12:       ; %bb.0:
3687; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
3688; GFX12-NEXT:    s_wait_expcnt 0x0
3689; GFX12-NEXT:    s_wait_samplecnt 0x0
3690; GFX12-NEXT:    s_wait_bvhcnt 0x0
3691; GFX12-NEXT:    s_wait_kmcnt 0x0
3692; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v1
3693; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3694; GFX12-NEXT:    v_pk_maximum_f16 v1, v0, v2
3695; GFX12-NEXT:    s_setpc_b64 s[30:31]
3696;
3697; GFX940-LABEL: v_no_fmaximum3_v2f16__multi_use:
3698; GFX940:       ; %bb.0:
3699; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3700; GFX940-NEXT:    v_pk_max_f16 v3, v0, v1
3701; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7e00
3702; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
3703; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
3704; GFX940-NEXT:    s_nop 0
3705; GFX940-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
3706; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
3707; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
3708; GFX940-NEXT:    s_nop 1
3709; GFX940-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
3710; GFX940-NEXT:    v_perm_b32 v0, v1, v5, s0
3711; GFX940-NEXT:    v_pk_max_f16 v3, v0, v2
3712; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v5, v2
3713; GFX940-NEXT:    s_nop 1
3714; GFX940-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
3715; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
3716; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:WORD_1
3717; GFX940-NEXT:    s_nop 1
3718; GFX940-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
3719; GFX940-NEXT:    v_perm_b32 v1, v1, v5, s0
3720; GFX940-NEXT:    s_setpc_b64 s[30:31]
3721;
3722; GFX950-LABEL: v_no_fmaximum3_v2f16__multi_use:
3723; GFX950:       ; %bb.0:
3724; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3725; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v1, v1
3726; GFX950-NEXT:    s_nop 0
3727; GFX950-NEXT:    v_pk_maximum3_f16 v1, v0, v2, v2
3728; GFX950-NEXT:    s_setpc_b64 s[30:31]
3729  %max0 = call <2 x half> @llvm.maximum.f16(<2 x half> %a, <2 x half> %b)
3730  %max1 = call <2 x half> @llvm.maximum.f16(<2 x half> %max0, <2 x half> %c)
3731  %concat = shufflevector <2 x half> %max0, <2 x half> %max1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3732  ret <4 x half> %concat
3733}
3734
3735define <2 x double> @v_no_fmaximum3_f64__multi_use(double %a, double %b, double %c) {
3736; GFX12-LABEL: v_no_fmaximum3_f64__multi_use:
3737; GFX12:       ; %bb.0:
3738; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
3739; GFX12-NEXT:    s_wait_expcnt 0x0
3740; GFX12-NEXT:    s_wait_samplecnt 0x0
3741; GFX12-NEXT:    s_wait_bvhcnt 0x0
3742; GFX12-NEXT:    s_wait_kmcnt 0x0
3743; GFX12-NEXT:    v_maximum_f64 v[0:1], v[0:1], v[2:3]
3744; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3745; GFX12-NEXT:    v_maximum_f64 v[2:3], v[0:1], v[4:5]
3746; GFX12-NEXT:    s_setpc_b64 s[30:31]
3747;
3748; GFX9-LABEL: v_no_fmaximum3_f64__multi_use:
3749; GFX9:       ; %bb.0:
3750; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3751; GFX9-NEXT:    v_max_f64 v[6:7], v[0:1], v[2:3]
3752; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
3753; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
3754; GFX9-NEXT:    s_nop 1
3755; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
3756; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
3757; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
3758; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
3759; GFX9-NEXT:    s_nop 1
3760; GFX9-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
3761; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
3762; GFX9-NEXT:    s_setpc_b64 s[30:31]
3763  %max0 = call double @llvm.maximum.f64(double %a, double %b)
3764  %max1 = call double @llvm.maximum.f64(double %max0, double %c)
3765  %insert.0 = insertelement <2 x double> poison, double %max0, i32 0
3766  %insert.1 = insertelement <2 x double> %insert.0, double %max1, i32 1
3767  ret <2 x double> %insert.1
3768}
3769