xref: /llvm-project/llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll (revision f2c164c8150548d983565c4ddc0fde790f9e2a5b)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-SAFE %s
3; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-NNAN %s
4
5; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=VI-SAFE %s
6; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-NNAN %s
7
8; RUN: llc -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI-SAFE %s
9; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI-NNAN %s
10
11; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-SAFE %s
12; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-NNAN %s
13
14define half @test_fmax_legacy_ugt_f16(half %a, half %b) #0 {
15; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_f16:
16; GFX9-SAFE:       ; %bb.0:
17; GFX9-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v0, v1
19; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
20; GFX9-SAFE-NEXT:    s_setpc_b64 s[30:31]
21;
22; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_f16:
23; GFX9-NNAN:       ; %bb.0:
24; GFX9-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25; GFX9-NNAN-NEXT:    v_max_f16_e32 v0, v0, v1
26; GFX9-NNAN-NEXT:    s_setpc_b64 s[30:31]
27;
28; VI-SAFE-LABEL: test_fmax_legacy_ugt_f16:
29; VI-SAFE:       ; %bb.0:
30; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
31; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v0, v1
32; VI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
33; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
34;
35; VI-NNAN-LABEL: test_fmax_legacy_ugt_f16:
36; VI-NNAN:       ; %bb.0:
37; VI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
38; VI-NNAN-NEXT:    v_max_f16_e32 v0, v0, v1
39; VI-NNAN-NEXT:    s_setpc_b64 s[30:31]
40;
41; SI-SAFE-LABEL: test_fmax_legacy_ugt_f16:
42; SI-SAFE:       ; %bb.0:
43; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
44; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
45; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
46; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
47; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
48; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v0, v1, v0
49; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
50;
51; SI-NNAN-LABEL: test_fmax_legacy_ugt_f16:
52; SI-NNAN:       ; %bb.0:
53; SI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
54; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v1, v1
55; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v0, v0
56; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v1, v1
57; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v0, v0
58; SI-NNAN-NEXT:    v_max_f32_e32 v0, v0, v1
59; SI-NNAN-NEXT:    s_setpc_b64 s[30:31]
60;
61; GFX11-SAFE-LABEL: test_fmax_legacy_ugt_f16:
62; GFX11-SAFE:       ; %bb.0:
63; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
64; GFX11-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc_lo, v0, v1
65; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
66; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
67;
68; GFX11-NNAN-LABEL: test_fmax_legacy_ugt_f16:
69; GFX11-NNAN:       ; %bb.0:
70; GFX11-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
71; GFX11-NNAN-NEXT:    v_max_f16_e32 v0, v0, v1
72; GFX11-NNAN-NEXT:    s_setpc_b64 s[30:31]
73  %cmp = fcmp ugt half %a, %b
74  %val = select i1 %cmp, half %a, half %b
75  ret half %val
76}
77
78define <2 x half> @test_fmax_legacy_ugt_v2f16(<2 x half> %a, <2 x half> %b) #0 {
79; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_v2f16:
80; GFX9-SAFE:       ; %bb.0:
81; GFX9-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
83; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
84; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v3, v2
85; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
86; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v0, v1
87; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
88; GFX9-SAFE-NEXT:    s_mov_b32 s4, 0x5040100
89; GFX9-SAFE-NEXT:    v_perm_b32 v0, v2, v0, s4
90; GFX9-SAFE-NEXT:    s_setpc_b64 s[30:31]
91;
92; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_v2f16:
93; GFX9-NNAN:       ; %bb.0:
94; GFX9-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
95; GFX9-NNAN-NEXT:    v_pk_max_f16 v0, v0, v1
96; GFX9-NNAN-NEXT:    s_setpc_b64 s[30:31]
97;
98; VI-SAFE-LABEL: test_fmax_legacy_ugt_v2f16:
99; VI-SAFE:       ; %bb.0:
100; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
101; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
102; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
103; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v3, v2
104; VI-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
105; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v0, v1
106; VI-SAFE-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
107; VI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
108; VI-SAFE-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
109; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
110;
111; VI-NNAN-LABEL: test_fmax_legacy_ugt_v2f16:
112; VI-NNAN:       ; %bb.0:
113; VI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114; VI-NNAN-NEXT:    v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
115; VI-NNAN-NEXT:    v_max_f16_e32 v0, v0, v1
116; VI-NNAN-NEXT:    v_or_b32_e32 v0, v0, v2
117; VI-NNAN-NEXT:    s_setpc_b64 s[30:31]
118;
119; SI-SAFE-LABEL: test_fmax_legacy_ugt_v2f16:
120; SI-SAFE:       ; %bb.0:
121; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
122; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
123; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v3, v3
124; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
125; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
126; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
127; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v3, v3
128; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
129; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
130; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v0, v2, v0
131; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v1, v3, v1
132; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
133;
134; SI-NNAN-LABEL: test_fmax_legacy_ugt_v2f16:
135; SI-NNAN:       ; %bb.0:
136; SI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
137; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v3, v3
138; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v1, v1
139; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v2, v2
140; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v0, v0
141; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v3, v3
142; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v1, v1
143; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v2, v2
144; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v0, v0
145; SI-NNAN-NEXT:    v_max_f32_e32 v0, v0, v2
146; SI-NNAN-NEXT:    v_max_f32_e32 v1, v1, v3
147; SI-NNAN-NEXT:    s_setpc_b64 s[30:31]
148;
149; GFX11-SAFE-LABEL: test_fmax_legacy_ugt_v2f16:
150; GFX11-SAFE:       ; %bb.0:
151; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
152; GFX11-SAFE-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
153; GFX11-SAFE-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
154; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
155; GFX11-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc_lo, v3, v2
156; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
157; GFX11-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc_lo, v0, v1
158; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
159; GFX11-SAFE-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
160; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
161;
162; GFX11-NNAN-LABEL: test_fmax_legacy_ugt_v2f16:
163; GFX11-NNAN:       ; %bb.0:
164; GFX11-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
165; GFX11-NNAN-NEXT:    v_pk_max_f16 v0, v0, v1
166; GFX11-NNAN-NEXT:    s_setpc_b64 s[30:31]
167  %cmp = fcmp ugt <2 x half> %a, %b
168  %val = select <2 x i1> %cmp, <2 x half> %a, <2 x half> %b
169  ret <2 x half> %val
170}
171
172define <3 x half> @test_fmax_legacy_ugt_v3f16(<3 x half> %a, <3 x half> %b) #0 {
173; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_v3f16:
174; GFX9-SAFE:       ; %bb.0:
175; GFX9-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
176; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
177; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
178; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v5, v4
179; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
180; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v1, v3
181; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
182; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v0, v2
183; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
184; GFX9-SAFE-NEXT:    s_mov_b32 s4, 0x5040100
185; GFX9-SAFE-NEXT:    v_perm_b32 v0, v4, v0, s4
186; GFX9-SAFE-NEXT:    s_setpc_b64 s[30:31]
187;
188; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_v3f16:
189; GFX9-NNAN:       ; %bb.0:
190; GFX9-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
191; GFX9-NNAN-NEXT:    v_pk_max_f16 v1, v1, v3
192; GFX9-NNAN-NEXT:    v_pk_max_f16 v0, v0, v2
193; GFX9-NNAN-NEXT:    s_setpc_b64 s[30:31]
194;
195; VI-SAFE-LABEL: test_fmax_legacy_ugt_v3f16:
196; VI-SAFE:       ; %bb.0:
197; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
198; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
199; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
200; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v5, v4
201; VI-SAFE-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
202; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v1, v3
203; VI-SAFE-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
204; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v0, v2
205; VI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
206; VI-SAFE-NEXT:    v_lshlrev_b32_e32 v2, 16, v4
207; VI-SAFE-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
208; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
209;
210; VI-NNAN-LABEL: test_fmax_legacy_ugt_v3f16:
211; VI-NNAN:       ; %bb.0:
212; VI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
213; VI-NNAN-NEXT:    v_max_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
214; VI-NNAN-NEXT:    v_max_f16_e32 v0, v0, v2
215; VI-NNAN-NEXT:    v_max_f16_e32 v1, v1, v3
216; VI-NNAN-NEXT:    v_or_b32_e32 v0, v0, v4
217; VI-NNAN-NEXT:    s_setpc_b64 s[30:31]
218;
219; SI-SAFE-LABEL: test_fmax_legacy_ugt_v3f16:
220; SI-SAFE:       ; %bb.0:
221; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
222; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
223; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v5, v5
224; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
225; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v4, v4
226; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
227; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v3, v3
228; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
229; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v5, v5
230; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
231; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v4, v4
232; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
233; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v3, v3
234; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v0, v3, v0
235; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v1, v4, v1
236; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v2, v5, v2
237; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
238;
239; SI-NNAN-LABEL: test_fmax_legacy_ugt_v3f16:
240; SI-NNAN:       ; %bb.0:
241; SI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
242; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v5, v5
243; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v2, v2
244; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v4, v4
245; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v1, v1
246; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v3, v3
247; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v0, v0
248; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v5, v5
249; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v2, v2
250; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v4, v4
251; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v1, v1
252; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v3, v3
253; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v0, v0
254; SI-NNAN-NEXT:    v_max_f32_e32 v0, v0, v3
255; SI-NNAN-NEXT:    v_max_f32_e32 v1, v1, v4
256; SI-NNAN-NEXT:    v_max_f32_e32 v2, v2, v5
257; SI-NNAN-NEXT:    s_setpc_b64 s[30:31]
258;
259; GFX11-SAFE-LABEL: test_fmax_legacy_ugt_v3f16:
260; GFX11-SAFE:       ; %bb.0:
261; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
262; GFX11-SAFE-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
263; GFX11-SAFE-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
264; GFX11-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc_lo, v0, v2
265; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc_lo
266; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_3)
267; GFX11-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc_lo, v5, v4
268; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v2, v4, v5, vcc_lo
269; GFX11-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc_lo, v1, v3
270; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc_lo
271; GFX11-SAFE-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
272; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
273;
274; GFX11-NNAN-LABEL: test_fmax_legacy_ugt_v3f16:
275; GFX11-NNAN:       ; %bb.0:
276; GFX11-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
277; GFX11-NNAN-NEXT:    v_pk_max_f16 v0, v0, v2
278; GFX11-NNAN-NEXT:    v_pk_max_f16 v1, v1, v3
279; GFX11-NNAN-NEXT:    s_setpc_b64 s[30:31]
280  %cmp = fcmp ugt <3 x half> %a, %b
281  %val = select <3 x i1> %cmp, <3 x half> %a, <3 x half> %b
282  ret <3 x half> %val
283}
284
285define <4 x half> @test_fmax_legacy_ugt_v4f16(<4 x half> %a, <4 x half> %b) #0 {
286; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_v4f16:
287; GFX9-SAFE:       ; %bb.0:
288; GFX9-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
289; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
290; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
291; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
292; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
293; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v7, v6
294; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc
295; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v5, v4
296; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
297; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v1, v3
298; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
299; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v0, v2
300; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
301; GFX9-SAFE-NEXT:    s_mov_b32 s4, 0x5040100
302; GFX9-SAFE-NEXT:    v_perm_b32 v0, v4, v0, s4
303; GFX9-SAFE-NEXT:    v_perm_b32 v1, v6, v1, s4
304; GFX9-SAFE-NEXT:    s_setpc_b64 s[30:31]
305;
306; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_v4f16:
307; GFX9-NNAN:       ; %bb.0:
308; GFX9-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
309; GFX9-NNAN-NEXT:    v_pk_max_f16 v0, v0, v2
310; GFX9-NNAN-NEXT:    v_pk_max_f16 v1, v1, v3
311; GFX9-NNAN-NEXT:    s_setpc_b64 s[30:31]
312;
313; VI-SAFE-LABEL: test_fmax_legacy_ugt_v4f16:
314; VI-SAFE:       ; %bb.0:
315; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
316; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
317; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
318; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
319; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
320; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v7, v6
321; VI-SAFE-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc
322; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v5, v4
323; VI-SAFE-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
324; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v1, v3
325; VI-SAFE-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
326; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v0, v2
327; VI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
328; VI-SAFE-NEXT:    v_lshlrev_b32_e32 v2, 16, v4
329; VI-SAFE-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
330; VI-SAFE-NEXT:    v_lshlrev_b32_e32 v2, 16, v6
331; VI-SAFE-NEXT:    v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
332; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
333;
334; VI-NNAN-LABEL: test_fmax_legacy_ugt_v4f16:
335; VI-NNAN:       ; %bb.0:
336; VI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
337; VI-NNAN-NEXT:    v_max_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
338; VI-NNAN-NEXT:    v_max_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
339; VI-NNAN-NEXT:    v_max_f16_e32 v1, v1, v3
340; VI-NNAN-NEXT:    v_max_f16_e32 v0, v0, v2
341; VI-NNAN-NEXT:    v_or_b32_e32 v0, v0, v5
342; VI-NNAN-NEXT:    v_or_b32_e32 v1, v1, v4
343; VI-NNAN-NEXT:    s_setpc_b64 s[30:31]
344;
345; SI-SAFE-LABEL: test_fmax_legacy_ugt_v4f16:
346; SI-SAFE:       ; %bb.0:
347; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
348; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v3, v3
349; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v7, v7
350; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
351; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v6, v6
352; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
353; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v5, v5
354; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
355; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v4, v4
356; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v3, v3
357; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v7, v7
358; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
359; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v6, v6
360; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
361; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v5, v5
362; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
363; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v4, v4
364; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v0, v4, v0
365; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v1, v5, v1
366; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v2, v6, v2
367; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v3, v7, v3
368; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
369;
370; SI-NNAN-LABEL: test_fmax_legacy_ugt_v4f16:
371; SI-NNAN:       ; %bb.0:
372; SI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
373; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v7, v7
374; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v3, v3
375; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v6, v6
376; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v2, v2
377; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v5, v5
378; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v1, v1
379; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v4, v4
380; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v0, v0
381; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v7, v7
382; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v3, v3
383; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v6, v6
384; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v2, v2
385; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v5, v5
386; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v1, v1
387; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v4, v4
388; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v0, v0
389; SI-NNAN-NEXT:    v_max_f32_e32 v0, v0, v4
390; SI-NNAN-NEXT:    v_max_f32_e32 v1, v1, v5
391; SI-NNAN-NEXT:    v_max_f32_e32 v2, v2, v6
392; SI-NNAN-NEXT:    v_max_f32_e32 v3, v3, v7
393; SI-NNAN-NEXT:    s_setpc_b64 s[30:31]
394;
395; GFX11-SAFE-LABEL: test_fmax_legacy_ugt_v4f16:
396; GFX11-SAFE:       ; %bb.0:
397; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
398; GFX11-SAFE-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
399; GFX11-SAFE-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
400; GFX11-SAFE-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
401; GFX11-SAFE-NEXT:    v_lshrrev_b32_e32 v7, 16, v0
402; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
403; GFX11-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc_lo, v5, v4
404; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc_lo
405; GFX11-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc_lo, v7, v6
406; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v5, v6, v7, vcc_lo
407; GFX11-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc_lo, v0, v2
408; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc_lo
409; GFX11-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc_lo, v1, v3
410; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc_lo
411; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
412; GFX11-SAFE-NEXT:    v_perm_b32 v0, v5, v0, 0x5040100
413; GFX11-SAFE-NEXT:    v_perm_b32 v1, v4, v1, 0x5040100
414; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
415;
416; GFX11-NNAN-LABEL: test_fmax_legacy_ugt_v4f16:
417; GFX11-NNAN:       ; %bb.0:
418; GFX11-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
419; GFX11-NNAN-NEXT:    v_pk_max_f16 v0, v0, v2
420; GFX11-NNAN-NEXT:    v_pk_max_f16 v1, v1, v3
421; GFX11-NNAN-NEXT:    s_setpc_b64 s[30:31]
422  %cmp = fcmp ugt <4 x half> %a, %b
423  %val = select <4 x i1> %cmp, <4 x half> %a, <4 x half> %b
424  ret <4 x half> %val
425}
426
427define <8 x half> @test_fmax_legacy_ugt_v8f16(<8 x half> %a, <8 x half> %b) #0 {
428; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_v8f16:
429; GFX9-SAFE:       ; %bb.0:
430; GFX9-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
431; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v14, 16, v7
432; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v15, 16, v3
433; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v12, 16, v6
434; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v13, 16, v2
435; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v15, v14
436; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v10, 16, v5
437; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v11, 16, v1
438; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v14, v14, v15, vcc
439; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v13, v12
440; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v8, 16, v4
441; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v9, 16, v0
442; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v12, v12, v13, vcc
443; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v11, v10
444; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc
445; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v9, v8
446; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
447; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v3, v7
448; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v3, v7, v3, vcc
449; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v2, v6
450; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc
451; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v1, v5
452; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
453; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v0, v4
454; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
455; GFX9-SAFE-NEXT:    s_mov_b32 s4, 0x5040100
456; GFX9-SAFE-NEXT:    v_perm_b32 v0, v8, v0, s4
457; GFX9-SAFE-NEXT:    v_perm_b32 v1, v10, v1, s4
458; GFX9-SAFE-NEXT:    v_perm_b32 v2, v12, v2, s4
459; GFX9-SAFE-NEXT:    v_perm_b32 v3, v14, v3, s4
460; GFX9-SAFE-NEXT:    s_setpc_b64 s[30:31]
461;
462; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_v8f16:
463; GFX9-NNAN:       ; %bb.0:
464; GFX9-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
465; GFX9-NNAN-NEXT:    v_pk_max_f16 v0, v0, v4
466; GFX9-NNAN-NEXT:    v_pk_max_f16 v1, v1, v5
467; GFX9-NNAN-NEXT:    v_pk_max_f16 v2, v2, v6
468; GFX9-NNAN-NEXT:    v_pk_max_f16 v3, v3, v7
469; GFX9-NNAN-NEXT:    s_setpc_b64 s[30:31]
470;
471; VI-SAFE-LABEL: test_fmax_legacy_ugt_v8f16:
472; VI-SAFE:       ; %bb.0:
473; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
474; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v14, 16, v7
475; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v15, 16, v3
476; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v12, 16, v6
477; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v13, 16, v2
478; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v15, v14
479; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v10, 16, v5
480; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v11, 16, v1
481; VI-SAFE-NEXT:    v_cndmask_b32_e32 v14, v14, v15, vcc
482; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v13, v12
483; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v8, 16, v4
484; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v9, 16, v0
485; VI-SAFE-NEXT:    v_cndmask_b32_e32 v12, v12, v13, vcc
486; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v11, v10
487; VI-SAFE-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc
488; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v9, v8
489; VI-SAFE-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
490; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v3, v7
491; VI-SAFE-NEXT:    v_cndmask_b32_e32 v3, v7, v3, vcc
492; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v2, v6
493; VI-SAFE-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc
494; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v1, v5
495; VI-SAFE-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
496; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v0, v4
497; VI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
498; VI-SAFE-NEXT:    v_lshlrev_b32_e32 v4, 16, v8
499; VI-SAFE-NEXT:    v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
500; VI-SAFE-NEXT:    v_lshlrev_b32_e32 v4, 16, v10
501; VI-SAFE-NEXT:    v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
502; VI-SAFE-NEXT:    v_lshlrev_b32_e32 v4, 16, v12
503; VI-SAFE-NEXT:    v_or_b32_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
504; VI-SAFE-NEXT:    v_lshlrev_b32_e32 v4, 16, v14
505; VI-SAFE-NEXT:    v_or_b32_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
506; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
507;
508; VI-NNAN-LABEL: test_fmax_legacy_ugt_v8f16:
509; VI-NNAN:       ; %bb.0:
510; VI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
511; VI-NNAN-NEXT:    v_max_f16_sdwa v8, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
512; VI-NNAN-NEXT:    v_max_f16_sdwa v9, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
513; VI-NNAN-NEXT:    v_max_f16_sdwa v10, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
514; VI-NNAN-NEXT:    v_max_f16_sdwa v11, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
515; VI-NNAN-NEXT:    v_max_f16_e32 v3, v3, v7
516; VI-NNAN-NEXT:    v_max_f16_e32 v2, v2, v6
517; VI-NNAN-NEXT:    v_max_f16_e32 v1, v1, v5
518; VI-NNAN-NEXT:    v_max_f16_e32 v0, v0, v4
519; VI-NNAN-NEXT:    v_or_b32_e32 v0, v0, v11
520; VI-NNAN-NEXT:    v_or_b32_e32 v1, v1, v10
521; VI-NNAN-NEXT:    v_or_b32_e32 v2, v2, v9
522; VI-NNAN-NEXT:    v_or_b32_e32 v3, v3, v8
523; VI-NNAN-NEXT:    s_setpc_b64 s[30:31]
524;
525; SI-SAFE-LABEL: test_fmax_legacy_ugt_v8f16:
526; SI-SAFE:       ; %bb.0:
527; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
528; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v7, v7
529; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v15, v15
530; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v6, v6
531; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v14, v14
532; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v5, v5
533; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v13, v13
534; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v4, v4
535; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v12, v12
536; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v3, v3
537; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v11, v11
538; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
539; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v10, v10
540; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
541; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v9, v9
542; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
543; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v8, v8
544; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v7, v7
545; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v15, v15
546; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v6, v6
547; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v14, v14
548; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v5, v5
549; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v13, v13
550; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v4, v4
551; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v12, v12
552; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v3, v3
553; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v11, v11
554; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
555; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v10, v10
556; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
557; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v9, v9
558; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
559; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v8, v8
560; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v0, v8, v0
561; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v1, v9, v1
562; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v2, v10, v2
563; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v3, v11, v3
564; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v4, v12, v4
565; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v5, v13, v5
566; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v6, v14, v6
567; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v7, v15, v7
568; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
569;
570; SI-NNAN-LABEL: test_fmax_legacy_ugt_v8f16:
571; SI-NNAN:       ; %bb.0:
572; SI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
573; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v15, v15
574; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v7, v7
575; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v14, v14
576; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v6, v6
577; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v13, v13
578; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v5, v5
579; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v12, v12
580; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v4, v4
581; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v11, v11
582; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v3, v3
583; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v10, v10
584; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v2, v2
585; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v9, v9
586; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v1, v1
587; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v8, v8
588; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v0, v0
589; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v15, v15
590; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v7, v7
591; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v14, v14
592; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v6, v6
593; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v13, v13
594; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v5, v5
595; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v12, v12
596; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v4, v4
597; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v11, v11
598; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v3, v3
599; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v10, v10
600; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v2, v2
601; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v9, v9
602; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v1, v1
603; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v8, v8
604; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v0, v0
605; SI-NNAN-NEXT:    v_max_f32_e32 v0, v0, v8
606; SI-NNAN-NEXT:    v_max_f32_e32 v1, v1, v9
607; SI-NNAN-NEXT:    v_max_f32_e32 v2, v2, v10
608; SI-NNAN-NEXT:    v_max_f32_e32 v3, v3, v11
609; SI-NNAN-NEXT:    v_max_f32_e32 v4, v4, v12
610; SI-NNAN-NEXT:    v_max_f32_e32 v5, v5, v13
611; SI-NNAN-NEXT:    v_max_f32_e32 v6, v6, v14
612; SI-NNAN-NEXT:    v_max_f32_e32 v7, v7, v15
613; SI-NNAN-NEXT:    s_setpc_b64 s[30:31]
614;
615; GFX11-SAFE-LABEL: test_fmax_legacy_ugt_v8f16:
616; GFX11-SAFE:       ; %bb.0:
617; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
618; GFX11-SAFE-NEXT:    v_lshrrev_b32_e32 v10, 16, v7
619; GFX11-SAFE-NEXT:    v_lshrrev_b32_e32 v11, 16, v3
620; GFX11-SAFE-NEXT:    v_lshrrev_b32_e32 v12, 16, v6
621; GFX11-SAFE-NEXT:    v_lshrrev_b32_e32 v13, 16, v2
622; GFX11-SAFE-NEXT:    v_lshrrev_b32_e32 v14, 16, v5
623; GFX11-SAFE-NEXT:    v_lshrrev_b32_e32 v15, 16, v1
624; GFX11-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc_lo, v11, v10
625; GFX11-SAFE-NEXT:    v_lshrrev_b32_e32 v8, 16, v4
626; GFX11-SAFE-NEXT:    v_lshrrev_b32_e32 v9, 16, v0
627; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc_lo
628; GFX11-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc_lo, v13, v12
629; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v11, v12, v13, vcc_lo
630; GFX11-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc_lo, v15, v14
631; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v12, v14, v15, vcc_lo
632; GFX11-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc_lo, v9, v8
633; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc_lo
634; GFX11-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc_lo, v2, v6
635; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc_lo
636; GFX11-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc_lo, v0, v4
637; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_2)
638; GFX11-SAFE-NEXT:    v_perm_b32 v2, v11, v2, 0x5040100
639; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc_lo
640; GFX11-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc_lo, v1, v5
641; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc_lo
642; GFX11-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc_lo, v3, v7
643; GFX11-SAFE-NEXT:    v_perm_b32 v1, v12, v1, 0x5040100
644; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v3, v7, v3, vcc_lo
645; GFX11-SAFE-NEXT:    v_perm_b32 v0, v8, v0, 0x5040100
646; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_2)
647; GFX11-SAFE-NEXT:    v_perm_b32 v3, v10, v3, 0x5040100
648; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
649;
650; GFX11-NNAN-LABEL: test_fmax_legacy_ugt_v8f16:
651; GFX11-NNAN:       ; %bb.0:
652; GFX11-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
653; GFX11-NNAN-NEXT:    v_pk_max_f16 v0, v0, v4
654; GFX11-NNAN-NEXT:    v_pk_max_f16 v1, v1, v5
655; GFX11-NNAN-NEXT:    v_pk_max_f16 v2, v2, v6
656; GFX11-NNAN-NEXT:    v_pk_max_f16 v3, v3, v7
657; GFX11-NNAN-NEXT:    s_setpc_b64 s[30:31]
658  %cmp = fcmp ugt <8 x half> %a, %b
659  %val = select <8 x i1> %cmp, <8 x half> %a, <8 x half> %b
660  ret <8 x half> %val
661}
662
663attributes #0 = { nounwind }
664