xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll (revision 6206f5444fc0732e6495703c75a67f1f90f5b418)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s
5; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s
6; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
7; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
8; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
9
10define half @v_maximum_f16(half %src0, half %src1) {
11; GFX7-LABEL: v_maximum_f16:
12; GFX7:       ; %bb.0:
13; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
15; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
16; GFX7-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
17; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
18; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
19; GFX7-NEXT:    v_max_f32_e32 v3, v0, v1
20; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
21; GFX7-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
22; GFX7-NEXT:    s_setpc_b64 s[30:31]
23;
24; GFX8-LABEL: v_maximum_f16:
25; GFX8:       ; %bb.0:
26; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27; GFX8-NEXT:    v_max_f16_e32 v2, v0, v1
28; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7e00
29; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
30; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
31; GFX8-NEXT:    s_setpc_b64 s[30:31]
32;
33; GFX900-LABEL: v_maximum_f16:
34; GFX900:       ; %bb.0:
35; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
36; GFX900-NEXT:    v_max_f16_e32 v2, v0, v1
37; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7e00
38; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
39; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
40; GFX900-NEXT:    s_setpc_b64 s[30:31]
41;
42; GFX950-LABEL: v_maximum_f16:
43; GFX950:       ; %bb.0:
44; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
45; GFX950-NEXT:    v_max_f16_e32 v2, v0, v1
46; GFX950-NEXT:    v_mov_b32_e32 v3, 0x7e00
47; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
48; GFX950-NEXT:    s_nop 1
49; GFX950-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
50; GFX950-NEXT:    s_setpc_b64 s[30:31]
51;
52; GFX10-LABEL: v_maximum_f16:
53; GFX10:       ; %bb.0:
54; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
55; GFX10-NEXT:    v_max_f16_e32 v2, v0, v1
56; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
57; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
58; GFX10-NEXT:    s_setpc_b64 s[30:31]
59;
60; GFX11-LABEL: v_maximum_f16:
61; GFX11:       ; %bb.0:
62; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
63; GFX11-NEXT:    v_max_f16_e32 v2, v0, v1
64; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
65; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
66; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
67; GFX11-NEXT:    s_setpc_b64 s[30:31]
68;
69; GFX12-LABEL: v_maximum_f16:
70; GFX12:       ; %bb.0:
71; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
72; GFX12-NEXT:    s_wait_expcnt 0x0
73; GFX12-NEXT:    s_wait_samplecnt 0x0
74; GFX12-NEXT:    s_wait_bvhcnt 0x0
75; GFX12-NEXT:    s_wait_kmcnt 0x0
76; GFX12-NEXT:    v_maximum_f16 v0, v0, v1
77; GFX12-NEXT:    s_setpc_b64 s[30:31]
78  %op = call half @llvm.maximum.f16(half %src0, half %src1)
79  ret half %op
80}
81
82define half @v_maximum_f16__nnan(half %src0, half %src1) {
83; GFX7-LABEL: v_maximum_f16__nnan:
84; GFX7:       ; %bb.0:
85; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
87; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
88; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
89; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
90; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
91; GFX7-NEXT:    s_setpc_b64 s[30:31]
92;
93; GFX8-LABEL: v_maximum_f16__nnan:
94; GFX8:       ; %bb.0:
95; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
96; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
97; GFX8-NEXT:    s_setpc_b64 s[30:31]
98;
99; GFX9-LABEL: v_maximum_f16__nnan:
100; GFX9:       ; %bb.0:
101; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102; GFX9-NEXT:    v_max_f16_e32 v0, v0, v1
103; GFX9-NEXT:    s_setpc_b64 s[30:31]
104;
105; GFX10-LABEL: v_maximum_f16__nnan:
106; GFX10:       ; %bb.0:
107; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
108; GFX10-NEXT:    v_max_f16_e32 v0, v0, v1
109; GFX10-NEXT:    s_setpc_b64 s[30:31]
110;
111; GFX11-LABEL: v_maximum_f16__nnan:
112; GFX11:       ; %bb.0:
113; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114; GFX11-NEXT:    v_max_f16_e32 v0, v0, v1
115; GFX11-NEXT:    s_setpc_b64 s[30:31]
116;
117; GFX12-LABEL: v_maximum_f16__nnan:
118; GFX12:       ; %bb.0:
119; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
120; GFX12-NEXT:    s_wait_expcnt 0x0
121; GFX12-NEXT:    s_wait_samplecnt 0x0
122; GFX12-NEXT:    s_wait_bvhcnt 0x0
123; GFX12-NEXT:    s_wait_kmcnt 0x0
124; GFX12-NEXT:    v_maximum_f16 v0, v0, v1
125; GFX12-NEXT:    s_setpc_b64 s[30:31]
126  %op = call nnan half @llvm.maximum.f16(half %src0, half %src1)
127  ret half %op
128}
129
130define half @v_maximum_f16__nsz(half %src0, half %src1) {
131; GFX7-LABEL: v_maximum_f16__nsz:
132; GFX7:       ; %bb.0:
133; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
134; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
135; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
136; GFX7-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
137; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
138; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
139; GFX7-NEXT:    v_max_f32_e32 v3, v0, v1
140; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
141; GFX7-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
142; GFX7-NEXT:    s_setpc_b64 s[30:31]
143;
144; GFX8-LABEL: v_maximum_f16__nsz:
145; GFX8:       ; %bb.0:
146; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
147; GFX8-NEXT:    v_max_f16_e32 v2, v0, v1
148; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7e00
149; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
150; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
151; GFX8-NEXT:    s_setpc_b64 s[30:31]
152;
153; GFX900-LABEL: v_maximum_f16__nsz:
154; GFX900:       ; %bb.0:
155; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
156; GFX900-NEXT:    v_max_f16_e32 v2, v0, v1
157; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7e00
158; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
159; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
160; GFX900-NEXT:    s_setpc_b64 s[30:31]
161;
162; GFX950-LABEL: v_maximum_f16__nsz:
163; GFX950:       ; %bb.0:
164; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
165; GFX950-NEXT:    v_max_f16_e32 v2, v0, v1
166; GFX950-NEXT:    v_mov_b32_e32 v3, 0x7e00
167; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
168; GFX950-NEXT:    s_nop 1
169; GFX950-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
170; GFX950-NEXT:    s_setpc_b64 s[30:31]
171;
172; GFX10-LABEL: v_maximum_f16__nsz:
173; GFX10:       ; %bb.0:
174; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
175; GFX10-NEXT:    v_max_f16_e32 v2, v0, v1
176; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
177; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
178; GFX10-NEXT:    s_setpc_b64 s[30:31]
179;
180; GFX11-LABEL: v_maximum_f16__nsz:
181; GFX11:       ; %bb.0:
182; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
183; GFX11-NEXT:    v_max_f16_e32 v2, v0, v1
184; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
185; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
186; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
187; GFX11-NEXT:    s_setpc_b64 s[30:31]
188;
189; GFX12-LABEL: v_maximum_f16__nsz:
190; GFX12:       ; %bb.0:
191; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
192; GFX12-NEXT:    s_wait_expcnt 0x0
193; GFX12-NEXT:    s_wait_samplecnt 0x0
194; GFX12-NEXT:    s_wait_bvhcnt 0x0
195; GFX12-NEXT:    s_wait_kmcnt 0x0
196; GFX12-NEXT:    v_maximum_f16 v0, v0, v1
197; GFX12-NEXT:    s_setpc_b64 s[30:31]
198  %op = call nsz half @llvm.maximum.f16(half %src0, half %src1)
199  ret half %op
200}
201
202define half @v_maximum_f16__nnan_nsz(half %src0, half %src1) {
203; GFX7-LABEL: v_maximum_f16__nnan_nsz:
204; GFX7:       ; %bb.0:
205; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
207; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
208; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
209; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
210; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
211; GFX7-NEXT:    s_setpc_b64 s[30:31]
212;
213; GFX8-LABEL: v_maximum_f16__nnan_nsz:
214; GFX8:       ; %bb.0:
215; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
216; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
217; GFX8-NEXT:    s_setpc_b64 s[30:31]
218;
219; GFX9-LABEL: v_maximum_f16__nnan_nsz:
220; GFX9:       ; %bb.0:
221; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
222; GFX9-NEXT:    v_max_f16_e32 v0, v0, v1
223; GFX9-NEXT:    s_setpc_b64 s[30:31]
224;
225; GFX10-LABEL: v_maximum_f16__nnan_nsz:
226; GFX10:       ; %bb.0:
227; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
228; GFX10-NEXT:    v_max_f16_e32 v0, v0, v1
229; GFX10-NEXT:    s_setpc_b64 s[30:31]
230;
231; GFX11-LABEL: v_maximum_f16__nnan_nsz:
232; GFX11:       ; %bb.0:
233; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
234; GFX11-NEXT:    v_max_f16_e32 v0, v0, v1
235; GFX11-NEXT:    s_setpc_b64 s[30:31]
236;
237; GFX12-LABEL: v_maximum_f16__nnan_nsz:
238; GFX12:       ; %bb.0:
239; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
240; GFX12-NEXT:    s_wait_expcnt 0x0
241; GFX12-NEXT:    s_wait_samplecnt 0x0
242; GFX12-NEXT:    s_wait_bvhcnt 0x0
243; GFX12-NEXT:    s_wait_kmcnt 0x0
244; GFX12-NEXT:    v_maximum_f16 v0, v0, v1
245; GFX12-NEXT:    s_setpc_b64 s[30:31]
246  %op = call nnan nsz half @llvm.maximum.f16(half %src0, half %src1)
247  ret half %op
248}
249
250define half @v_maximum_f16__nnan_src0(half %arg0, half %src1) {
251; GFX7-LABEL: v_maximum_f16__nnan_src0:
252; GFX7:       ; %bb.0:
253; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
254; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
255; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
256; GFX7-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
257; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
258; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
259; GFX7-NEXT:    v_add_f32_e32 v0, 1.0, v0
260; GFX7-NEXT:    v_max_f32_e32 v3, v0, v1
261; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
262; GFX7-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
263; GFX7-NEXT:    s_setpc_b64 s[30:31]
264;
265; GFX8-LABEL: v_maximum_f16__nnan_src0:
266; GFX8:       ; %bb.0:
267; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
268; GFX8-NEXT:    v_add_f16_e32 v0, 1.0, v0
269; GFX8-NEXT:    v_max_f16_e32 v2, v0, v1
270; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7e00
271; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
272; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
273; GFX8-NEXT:    s_setpc_b64 s[30:31]
274;
275; GFX900-LABEL: v_maximum_f16__nnan_src0:
276; GFX900:       ; %bb.0:
277; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
278; GFX900-NEXT:    v_add_f16_e32 v0, 1.0, v0
279; GFX900-NEXT:    v_max_f16_e32 v2, v0, v1
280; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7e00
281; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
282; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
283; GFX900-NEXT:    s_setpc_b64 s[30:31]
284;
285; GFX950-LABEL: v_maximum_f16__nnan_src0:
286; GFX950:       ; %bb.0:
287; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
288; GFX950-NEXT:    v_add_f16_e32 v0, 1.0, v0
289; GFX950-NEXT:    v_max_f16_e32 v2, v0, v1
290; GFX950-NEXT:    v_mov_b32_e32 v3, 0x7e00
291; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
292; GFX950-NEXT:    s_nop 1
293; GFX950-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
294; GFX950-NEXT:    s_setpc_b64 s[30:31]
295;
296; GFX10-LABEL: v_maximum_f16__nnan_src0:
297; GFX10:       ; %bb.0:
298; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
299; GFX10-NEXT:    v_add_f16_e32 v0, 1.0, v0
300; GFX10-NEXT:    v_max_f16_e32 v2, v0, v1
301; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
302; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
303; GFX10-NEXT:    s_setpc_b64 s[30:31]
304;
305; GFX11-LABEL: v_maximum_f16__nnan_src0:
306; GFX11:       ; %bb.0:
307; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
308; GFX11-NEXT:    v_add_f16_e32 v0, 1.0, v0
309; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
310; GFX11-NEXT:    v_max_f16_e32 v2, v0, v1
311; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
312; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
313; GFX11-NEXT:    s_setpc_b64 s[30:31]
314;
315; GFX12-LABEL: v_maximum_f16__nnan_src0:
316; GFX12:       ; %bb.0:
317; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
318; GFX12-NEXT:    s_wait_expcnt 0x0
319; GFX12-NEXT:    s_wait_samplecnt 0x0
320; GFX12-NEXT:    s_wait_bvhcnt 0x0
321; GFX12-NEXT:    s_wait_kmcnt 0x0
322; GFX12-NEXT:    v_add_f16_e32 v0, 1.0, v0
323; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
324; GFX12-NEXT:    v_maximum_f16 v0, v0, v1
325; GFX12-NEXT:    s_setpc_b64 s[30:31]
326  %src0 = fadd nnan half %arg0, 1.0
327  %op = call half @llvm.maximum.f16(half %src0, half %src1)
328  ret half %op
329}
330
331define half @v_maximum_f16__nnan_src1(half %src0, half %arg1) {
332; GFX7-LABEL: v_maximum_f16__nnan_src1:
333; GFX7:       ; %bb.0:
334; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
335; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
336; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
337; GFX7-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
338; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
339; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
340; GFX7-NEXT:    v_add_f32_e32 v1, 1.0, v1
341; GFX7-NEXT:    v_max_f32_e32 v3, v0, v1
342; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
343; GFX7-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
344; GFX7-NEXT:    s_setpc_b64 s[30:31]
345;
346; GFX8-LABEL: v_maximum_f16__nnan_src1:
347; GFX8:       ; %bb.0:
348; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
349; GFX8-NEXT:    v_add_f16_e32 v1, 1.0, v1
350; GFX8-NEXT:    v_max_f16_e32 v2, v0, v1
351; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7e00
352; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
353; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
354; GFX8-NEXT:    s_setpc_b64 s[30:31]
355;
356; GFX900-LABEL: v_maximum_f16__nnan_src1:
357; GFX900:       ; %bb.0:
358; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
359; GFX900-NEXT:    v_add_f16_e32 v1, 1.0, v1
360; GFX900-NEXT:    v_max_f16_e32 v2, v0, v1
361; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7e00
362; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
363; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
364; GFX900-NEXT:    s_setpc_b64 s[30:31]
365;
366; GFX950-LABEL: v_maximum_f16__nnan_src1:
367; GFX950:       ; %bb.0:
368; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
369; GFX950-NEXT:    v_add_f16_e32 v1, 1.0, v1
370; GFX950-NEXT:    v_max_f16_e32 v2, v0, v1
371; GFX950-NEXT:    v_mov_b32_e32 v3, 0x7e00
372; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
373; GFX950-NEXT:    s_nop 1
374; GFX950-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
375; GFX950-NEXT:    s_setpc_b64 s[30:31]
376;
377; GFX10-LABEL: v_maximum_f16__nnan_src1:
378; GFX10:       ; %bb.0:
379; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
380; GFX10-NEXT:    v_add_f16_e32 v1, 1.0, v1
381; GFX10-NEXT:    v_max_f16_e32 v2, v0, v1
382; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
383; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
384; GFX10-NEXT:    s_setpc_b64 s[30:31]
385;
386; GFX11-LABEL: v_maximum_f16__nnan_src1:
387; GFX11:       ; %bb.0:
388; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
389; GFX11-NEXT:    v_add_f16_e32 v1, 1.0, v1
390; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
391; GFX11-NEXT:    v_max_f16_e32 v2, v0, v1
392; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
393; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
394; GFX11-NEXT:    s_setpc_b64 s[30:31]
395;
396; GFX12-LABEL: v_maximum_f16__nnan_src1:
397; GFX12:       ; %bb.0:
398; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
399; GFX12-NEXT:    s_wait_expcnt 0x0
400; GFX12-NEXT:    s_wait_samplecnt 0x0
401; GFX12-NEXT:    s_wait_bvhcnt 0x0
402; GFX12-NEXT:    s_wait_kmcnt 0x0
403; GFX12-NEXT:    v_add_f16_e32 v1, 1.0, v1
404; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
405; GFX12-NEXT:    v_maximum_f16 v0, v0, v1
406; GFX12-NEXT:    s_setpc_b64 s[30:31]
407  %src1 = fadd nnan half %arg1, 1.0
408  %op = call half @llvm.maximum.f16(half %src0, half %src1)
409  ret half %op
410}
411
412define void @s_maximum_f16(half inreg %src0, half inreg %src1) {
413; GFX7-LABEL: s_maximum_f16:
414; GFX7:       ; %bb.0:
415; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
416; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, s17
417; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, s16
418; GFX7-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
419; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
420; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
421; GFX7-NEXT:    v_max_f32_e32 v3, v1, v0
422; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v0
423; GFX7-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
424; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
425; GFX7-NEXT:    ;;#ASMSTART
426; GFX7-NEXT:    ; use v0
427; GFX7-NEXT:    ;;#ASMEND
428; GFX7-NEXT:    s_setpc_b64 s[30:31]
429;
430; GFX8-LABEL: s_maximum_f16:
431; GFX8:       ; %bb.0:
432; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
433; GFX8-NEXT:    v_mov_b32_e32 v0, s17
434; GFX8-NEXT:    v_max_f16_e32 v1, s16, v0
435; GFX8-NEXT:    v_mov_b32_e32 v2, 0x7e00
436; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, s16, v0
437; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
438; GFX8-NEXT:    v_and_b32_e32 v0, 0xffff, v0
439; GFX8-NEXT:    ;;#ASMSTART
440; GFX8-NEXT:    ; use v0
441; GFX8-NEXT:    ;;#ASMEND
442; GFX8-NEXT:    s_setpc_b64 s[30:31]
443;
444; GFX900-LABEL: s_maximum_f16:
445; GFX900:       ; %bb.0:
446; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
447; GFX900-NEXT:    v_mov_b32_e32 v0, s17
448; GFX900-NEXT:    v_max_f16_e32 v1, s16, v0
449; GFX900-NEXT:    v_mov_b32_e32 v2, 0x7e00
450; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, s16, v0
451; GFX900-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
452; GFX900-NEXT:    v_and_b32_e32 v0, 0xffff, v0
453; GFX900-NEXT:    ;;#ASMSTART
454; GFX900-NEXT:    ; use v0
455; GFX900-NEXT:    ;;#ASMEND
456; GFX900-NEXT:    s_setpc_b64 s[30:31]
457;
458; GFX950-LABEL: s_maximum_f16:
459; GFX950:       ; %bb.0:
460; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
461; GFX950-NEXT:    v_mov_b32_e32 v0, s1
462; GFX950-NEXT:    v_max_f16_e32 v1, s0, v0
463; GFX950-NEXT:    v_mov_b32_e32 v2, 0x7e00
464; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, s0, v0
465; GFX950-NEXT:    s_nop 1
466; GFX950-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
467; GFX950-NEXT:    v_and_b32_e32 v0, 0xffff, v0
468; GFX950-NEXT:    ;;#ASMSTART
469; GFX950-NEXT:    ; use v0
470; GFX950-NEXT:    ;;#ASMEND
471; GFX950-NEXT:    s_setpc_b64 s[30:31]
472;
473; GFX10-LABEL: s_maximum_f16:
474; GFX10:       ; %bb.0:
475; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
476; GFX10-NEXT:    v_max_f16_e64 v0, s16, s17
477; GFX10-NEXT:    v_cmp_o_f16_e64 vcc_lo, s16, s17
478; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
479; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
480; GFX10-NEXT:    ;;#ASMSTART
481; GFX10-NEXT:    ; use v0
482; GFX10-NEXT:    ;;#ASMEND
483; GFX10-NEXT:    s_setpc_b64 s[30:31]
484;
485; GFX11-LABEL: s_maximum_f16:
486; GFX11:       ; %bb.0:
487; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
488; GFX11-NEXT:    v_max_f16_e64 v0, s0, s1
489; GFX11-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s1
490; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
491; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
492; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
493; GFX11-NEXT:    ;;#ASMSTART
494; GFX11-NEXT:    ; use v0
495; GFX11-NEXT:    ;;#ASMEND
496; GFX11-NEXT:    s_setpc_b64 s[30:31]
497;
498; GFX12-LABEL: s_maximum_f16:
499; GFX12:       ; %bb.0:
500; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
501; GFX12-NEXT:    s_wait_expcnt 0x0
502; GFX12-NEXT:    s_wait_samplecnt 0x0
503; GFX12-NEXT:    s_wait_bvhcnt 0x0
504; GFX12-NEXT:    s_wait_kmcnt 0x0
505; GFX12-NEXT:    s_maximum_f16 s0, s0, s1
506; GFX12-NEXT:    s_wait_alu 0xfffe
507; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_2)
508; GFX12-NEXT:    s_and_b32 s0, 0xffff, s0
509; GFX12-NEXT:    ;;#ASMSTART
510; GFX12-NEXT:    ; use s0
511; GFX12-NEXT:    ;;#ASMEND
512; GFX12-NEXT:    s_wait_alu 0xfffe
513; GFX12-NEXT:    s_setpc_b64 s[30:31]
514  %op = call half @llvm.maximum.f16(half %src0, half %src1)
515  %cast = bitcast half %op to i16
516  %zext = zext i16 %cast to i32
517  call void asm sideeffect "; use $0", "s"(i32 %zext)
518  ret void
519}
520
521define <2 x half> @v_maximum_v2f16(<2 x half> %src0, <2 x half> %src1) {
522; GFX7-LABEL: v_maximum_v2f16:
523; GFX7:       ; %bb.0:
524; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
525; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
526; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
527; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
528; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
529; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
530; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
531; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
532; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
533; GFX7-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
534; GFX7-NEXT:    v_max_f32_e32 v4, v0, v2
535; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
536; GFX7-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
537; GFX7-NEXT:    v_max_f32_e32 v2, v1, v3
538; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
539; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
540; GFX7-NEXT:    s_setpc_b64 s[30:31]
541;
542; GFX8-LABEL: v_maximum_v2f16:
543; GFX8:       ; %bb.0:
544; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
545; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
546; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
547; GFX8-NEXT:    v_max_f16_e32 v4, v3, v2
548; GFX8-NEXT:    v_mov_b32_e32 v5, 0x7e00
549; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v3, v2
550; GFX8-NEXT:    v_cndmask_b32_e32 v2, v5, v4, vcc
551; GFX8-NEXT:    v_max_f16_e32 v3, v0, v1
552; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
553; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
554; GFX8-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
555; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
556; GFX8-NEXT:    s_setpc_b64 s[30:31]
557;
558; GFX900-LABEL: v_maximum_v2f16:
559; GFX900:       ; %bb.0:
560; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
561; GFX900-NEXT:    v_pk_max_f16 v2, v0, v1
562; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7e00
563; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
564; GFX900-NEXT:    v_cndmask_b32_e32 v4, v3, v2, vcc
565; GFX900-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
566; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
567; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
568; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
569; GFX900-NEXT:    v_perm_b32 v0, v0, v4, s4
570; GFX900-NEXT:    s_setpc_b64 s[30:31]
571;
572; GFX950-LABEL: v_maximum_v2f16:
573; GFX950:       ; %bb.0:
574; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
575; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v1, v1
576; GFX950-NEXT:    s_setpc_b64 s[30:31]
577;
578; GFX10-LABEL: v_maximum_v2f16:
579; GFX10:       ; %bb.0:
580; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
581; GFX10-NEXT:    v_pk_max_f16 v2, v0, v1
582; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
583; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
584; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v2, vcc_lo
585; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
586; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v3, vcc_lo
587; GFX10-NEXT:    v_perm_b32 v0, v0, v2, 0x5040100
588; GFX10-NEXT:    s_setpc_b64 s[30:31]
589;
590; GFX11-LABEL: v_maximum_v2f16:
591; GFX11:       ; %bb.0:
592; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
593; GFX11-NEXT:    v_pk_max_f16 v2, v0, v1
594; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
595; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
596; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
597; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
598; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
599; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
600; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v3
601; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
602; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v5, vcc_lo
603; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
604; GFX11-NEXT:    s_setpc_b64 s[30:31]
605;
606; GFX12-LABEL: v_maximum_v2f16:
607; GFX12:       ; %bb.0:
608; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
609; GFX12-NEXT:    s_wait_expcnt 0x0
610; GFX12-NEXT:    s_wait_samplecnt 0x0
611; GFX12-NEXT:    s_wait_bvhcnt 0x0
612; GFX12-NEXT:    s_wait_kmcnt 0x0
613; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v1
614; GFX12-NEXT:    s_setpc_b64 s[30:31]
615  %op = call <2 x half> @llvm.maximum.v2f16(<2 x half> %src0, <2 x half> %src1)
616  ret <2 x half> %op
617}
618
619define <2 x half> @v_maximum_v2f16__nnan(<2 x half> %src0, <2 x half> %src1) {
620; GFX7-LABEL: v_maximum_v2f16__nnan:
621; GFX7:       ; %bb.0:
622; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
623; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
624; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
625; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
626; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
627; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
628; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
629; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
630; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
631; GFX7-NEXT:    v_max_f32_e32 v0, v0, v2
632; GFX7-NEXT:    v_max_f32_e32 v1, v1, v3
633; GFX7-NEXT:    s_setpc_b64 s[30:31]
634;
635; GFX8-LABEL: v_maximum_v2f16__nnan:
636; GFX8:       ; %bb.0:
637; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
638; GFX8-NEXT:    v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
639; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
640; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
641; GFX8-NEXT:    s_setpc_b64 s[30:31]
642;
643; GFX900-LABEL: v_maximum_v2f16__nnan:
644; GFX900:       ; %bb.0:
645; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
646; GFX900-NEXT:    v_pk_max_f16 v0, v0, v1
647; GFX900-NEXT:    s_setpc_b64 s[30:31]
648;
649; GFX950-LABEL: v_maximum_v2f16__nnan:
650; GFX950:       ; %bb.0:
651; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
652; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v1, v1
653; GFX950-NEXT:    s_setpc_b64 s[30:31]
654;
655; GFX10-LABEL: v_maximum_v2f16__nnan:
656; GFX10:       ; %bb.0:
657; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
658; GFX10-NEXT:    v_pk_max_f16 v0, v0, v1
659; GFX10-NEXT:    s_setpc_b64 s[30:31]
660;
661; GFX11-LABEL: v_maximum_v2f16__nnan:
662; GFX11:       ; %bb.0:
663; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
664; GFX11-NEXT:    v_pk_max_f16 v0, v0, v1
665; GFX11-NEXT:    s_setpc_b64 s[30:31]
666;
667; GFX12-LABEL: v_maximum_v2f16__nnan:
668; GFX12:       ; %bb.0:
669; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
670; GFX12-NEXT:    s_wait_expcnt 0x0
671; GFX12-NEXT:    s_wait_samplecnt 0x0
672; GFX12-NEXT:    s_wait_bvhcnt 0x0
673; GFX12-NEXT:    s_wait_kmcnt 0x0
674; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v1
675; GFX12-NEXT:    s_setpc_b64 s[30:31]
676  %op = call nnan <2 x half> @llvm.maximum.v2f16(<2 x half> %src0, <2 x half> %src1)
677  ret <2 x half> %op
678}
679
680define <2 x half> @v_maximum_v2f16__nsz(<2 x half> %src0, <2 x half> %src1) {
681; GFX7-LABEL: v_maximum_v2f16__nsz:
682; GFX7:       ; %bb.0:
683; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
684; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
685; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
686; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
687; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
688; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
689; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
690; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
691; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
692; GFX7-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
693; GFX7-NEXT:    v_max_f32_e32 v4, v0, v2
694; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
695; GFX7-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
696; GFX7-NEXT:    v_max_f32_e32 v2, v1, v3
697; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
698; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
699; GFX7-NEXT:    s_setpc_b64 s[30:31]
700;
701; GFX8-LABEL: v_maximum_v2f16__nsz:
702; GFX8:       ; %bb.0:
703; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
704; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
705; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
706; GFX8-NEXT:    v_max_f16_e32 v4, v3, v2
707; GFX8-NEXT:    v_mov_b32_e32 v5, 0x7e00
708; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v3, v2
709; GFX8-NEXT:    v_cndmask_b32_e32 v2, v5, v4, vcc
710; GFX8-NEXT:    v_max_f16_e32 v3, v0, v1
711; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
712; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
713; GFX8-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
714; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
715; GFX8-NEXT:    s_setpc_b64 s[30:31]
716;
717; GFX900-LABEL: v_maximum_v2f16__nsz:
718; GFX900:       ; %bb.0:
719; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
720; GFX900-NEXT:    v_pk_max_f16 v2, v0, v1
721; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7e00
722; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
723; GFX900-NEXT:    v_cndmask_b32_e32 v4, v3, v2, vcc
724; GFX900-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
725; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
726; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
727; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
728; GFX900-NEXT:    v_perm_b32 v0, v0, v4, s4
729; GFX900-NEXT:    s_setpc_b64 s[30:31]
730;
731; GFX950-LABEL: v_maximum_v2f16__nsz:
732; GFX950:       ; %bb.0:
733; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
734; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v1, v1
735; GFX950-NEXT:    s_setpc_b64 s[30:31]
736;
737; GFX10-LABEL: v_maximum_v2f16__nsz:
738; GFX10:       ; %bb.0:
739; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
740; GFX10-NEXT:    v_pk_max_f16 v2, v0, v1
741; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
742; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
743; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v2, vcc_lo
744; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
745; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v3, vcc_lo
746; GFX10-NEXT:    v_perm_b32 v0, v0, v2, 0x5040100
747; GFX10-NEXT:    s_setpc_b64 s[30:31]
748;
749; GFX11-LABEL: v_maximum_v2f16__nsz:
750; GFX11:       ; %bb.0:
751; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
752; GFX11-NEXT:    v_pk_max_f16 v2, v0, v1
753; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
754; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
755; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
756; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
757; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
758; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
759; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v3
760; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
761; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v5, vcc_lo
762; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
763; GFX11-NEXT:    s_setpc_b64 s[30:31]
764;
765; GFX12-LABEL: v_maximum_v2f16__nsz:
766; GFX12:       ; %bb.0:
767; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
768; GFX12-NEXT:    s_wait_expcnt 0x0
769; GFX12-NEXT:    s_wait_samplecnt 0x0
770; GFX12-NEXT:    s_wait_bvhcnt 0x0
771; GFX12-NEXT:    s_wait_kmcnt 0x0
772; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v1
773; GFX12-NEXT:    s_setpc_b64 s[30:31]
774  %op = call nsz <2 x half> @llvm.maximum.v2f16(<2 x half> %src0, <2 x half> %src1)
775  ret <2 x half> %op
776}
777
778define <2 x half> @v_maximum_v2f16__nnan_nsz(<2 x half> %src0, <2 x half> %src1) {
779; GFX7-LABEL: v_maximum_v2f16__nnan_nsz:
780; GFX7:       ; %bb.0:
781; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
782; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
783; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
784; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
785; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
786; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
787; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
788; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
789; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
790; GFX7-NEXT:    v_max_f32_e32 v0, v0, v2
791; GFX7-NEXT:    v_max_f32_e32 v1, v1, v3
792; GFX7-NEXT:    s_setpc_b64 s[30:31]
793;
794; GFX8-LABEL: v_maximum_v2f16__nnan_nsz:
795; GFX8:       ; %bb.0:
796; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
797; GFX8-NEXT:    v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
798; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
799; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
800; GFX8-NEXT:    s_setpc_b64 s[30:31]
801;
802; GFX900-LABEL: v_maximum_v2f16__nnan_nsz:
803; GFX900:       ; %bb.0:
804; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
805; GFX900-NEXT:    v_pk_max_f16 v0, v0, v1
806; GFX900-NEXT:    s_setpc_b64 s[30:31]
807;
808; GFX950-LABEL: v_maximum_v2f16__nnan_nsz:
809; GFX950:       ; %bb.0:
810; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
811; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v1, v1
812; GFX950-NEXT:    s_setpc_b64 s[30:31]
813;
814; GFX10-LABEL: v_maximum_v2f16__nnan_nsz:
815; GFX10:       ; %bb.0:
816; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
817; GFX10-NEXT:    v_pk_max_f16 v0, v0, v1
818; GFX10-NEXT:    s_setpc_b64 s[30:31]
819;
820; GFX11-LABEL: v_maximum_v2f16__nnan_nsz:
821; GFX11:       ; %bb.0:
822; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
823; GFX11-NEXT:    v_pk_max_f16 v0, v0, v1
824; GFX11-NEXT:    s_setpc_b64 s[30:31]
825;
826; GFX12-LABEL: v_maximum_v2f16__nnan_nsz:
827; GFX12:       ; %bb.0:
828; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
829; GFX12-NEXT:    s_wait_expcnt 0x0
830; GFX12-NEXT:    s_wait_samplecnt 0x0
831; GFX12-NEXT:    s_wait_bvhcnt 0x0
832; GFX12-NEXT:    s_wait_kmcnt 0x0
833; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v1
834; GFX12-NEXT:    s_setpc_b64 s[30:31]
835  %op = call nnan nsz <2 x half> @llvm.maximum.v2f16(<2 x half> %src0, <2 x half> %src1)
836  ret <2 x half> %op
837}
838
839define void @s_maximum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
840; GFX7-LABEL: s_maximum_v2f16:
841; GFX7:       ; %bb.0:
842; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
843; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, s19
844; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, s17
845; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, s18
846; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, s16
847; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
848; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
849; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
850; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
851; GFX7-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
852; GFX7-NEXT:    v_max_f32_e32 v4, v1, v0
853; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v0
854; GFX7-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
855; GFX7-NEXT:    v_max_f32_e32 v1, v3, v2
856; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v3, v2
857; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
858; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
859; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
860; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
861; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
862; GFX7-NEXT:    ;;#ASMSTART
863; GFX7-NEXT:    ; use v0
864; GFX7-NEXT:    ;;#ASMEND
865; GFX7-NEXT:    s_setpc_b64 s[30:31]
866;
867; GFX8-LABEL: s_maximum_v2f16:
868; GFX8:       ; %bb.0:
869; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
870; GFX8-NEXT:    s_lshr_b32 s4, s17, 16
871; GFX8-NEXT:    s_lshr_b32 s5, s16, 16
872; GFX8-NEXT:    v_mov_b32_e32 v0, s4
873; GFX8-NEXT:    v_max_f16_e32 v1, s5, v0
874; GFX8-NEXT:    v_mov_b32_e32 v2, 0x7e00
875; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, s5, v0
876; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
877; GFX8-NEXT:    v_mov_b32_e32 v1, s17
878; GFX8-NEXT:    v_max_f16_e32 v3, s16, v1
879; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, s16, v1
880; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
881; GFX8-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
882; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
883; GFX8-NEXT:    ;;#ASMSTART
884; GFX8-NEXT:    ; use v0
885; GFX8-NEXT:    ;;#ASMEND
886; GFX8-NEXT:    s_setpc_b64 s[30:31]
887;
888; GFX900-LABEL: s_maximum_v2f16:
889; GFX900:       ; %bb.0:
890; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
891; GFX900-NEXT:    v_mov_b32_e32 v0, s17
892; GFX900-NEXT:    v_mov_b32_e32 v1, s17
893; GFX900-NEXT:    s_lshr_b32 s4, s17, 16
894; GFX900-NEXT:    v_pk_max_f16 v1, s16, v1
895; GFX900-NEXT:    v_mov_b32_e32 v2, 0x7e00
896; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, s16, v0
897; GFX900-NEXT:    s_lshr_b32 s5, s16, 16
898; GFX900-NEXT:    v_mov_b32_e32 v3, s4
899; GFX900-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
900; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
901; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, s5, v3
902; GFX900-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
903; GFX900-NEXT:    v_and_b32_e32 v0, 0xffff, v0
904; GFX900-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
905; GFX900-NEXT:    ;;#ASMSTART
906; GFX900-NEXT:    ; use v0
907; GFX900-NEXT:    ;;#ASMEND
908; GFX900-NEXT:    s_setpc_b64 s[30:31]
909;
910; GFX950-LABEL: s_maximum_v2f16:
911; GFX950:       ; %bb.0:
912; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
913; GFX950-NEXT:    v_mov_b32_e32 v0, s0
914; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, s1, s1
915; GFX950-NEXT:    s_nop 0
916; GFX950-NEXT:    ;;#ASMSTART
917; GFX950-NEXT:    ; use v0
918; GFX950-NEXT:    ;;#ASMEND
919; GFX950-NEXT:    s_setpc_b64 s[30:31]
920;
921; GFX10-LABEL: s_maximum_v2f16:
922; GFX10:       ; %bb.0:
923; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
924; GFX10-NEXT:    v_pk_max_f16 v0, s16, s17
925; GFX10-NEXT:    v_cmp_o_f16_e64 vcc_lo, s16, s17
926; GFX10-NEXT:    s_lshr_b32 s4, s17, 16
927; GFX10-NEXT:    s_lshr_b32 s5, s16, 16
928; GFX10-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
929; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
930; GFX10-NEXT:    v_cmp_o_f16_e64 vcc_lo, s5, s4
931; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
932; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v1, vcc_lo
933; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
934; GFX10-NEXT:    ;;#ASMSTART
935; GFX10-NEXT:    ; use v0
936; GFX10-NEXT:    ;;#ASMEND
937; GFX10-NEXT:    s_setpc_b64 s[30:31]
938;
939; GFX11-LABEL: s_maximum_v2f16:
940; GFX11:       ; %bb.0:
941; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
942; GFX11-NEXT:    v_pk_max_f16 v0, s0, s1
943; GFX11-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s1
944; GFX11-NEXT:    s_lshr_b32 s2, s1, 16
945; GFX11-NEXT:    s_lshr_b32 s0, s0, 16
946; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
947; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
948; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
949; GFX11-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s2
950; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
951; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
952; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v1, vcc_lo
953; GFX11-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
954; GFX11-NEXT:    ;;#ASMSTART
955; GFX11-NEXT:    ; use v0
956; GFX11-NEXT:    ;;#ASMEND
957; GFX11-NEXT:    s_setpc_b64 s[30:31]
958;
959; GFX12-LABEL: s_maximum_v2f16:
960; GFX12:       ; %bb.0:
961; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
962; GFX12-NEXT:    s_wait_expcnt 0x0
963; GFX12-NEXT:    s_wait_samplecnt 0x0
964; GFX12-NEXT:    s_wait_bvhcnt 0x0
965; GFX12-NEXT:    s_wait_kmcnt 0x0
966; GFX12-NEXT:    v_pk_maximum_f16 v0, s0, s1
967; GFX12-NEXT:    ;;#ASMSTART
968; GFX12-NEXT:    ; use v0
969; GFX12-NEXT:    ;;#ASMEND
970; GFX12-NEXT:    s_setpc_b64 s[30:31]
971  %op = call <2 x half> @llvm.maximum.v2f16(<2 x half> %src0, <2 x half> %src1)
972  %cast = bitcast <2 x half> %op to i32
973  call void asm sideeffect "; use $0", "s"(i32 %cast)
974  ret void
975}
976
977define <3 x half> @v_maximum_v3f16(<3 x half> %src0, <3 x half> %src1) {
978; GFX7-LABEL: v_maximum_v3f16:
979; GFX7:       ; %bb.0:
980; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
981; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
982; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
983; GFX7-NEXT:    v_cvt_f16_f32_e32 v4, v4
984; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
985; GFX7-NEXT:    v_cvt_f16_f32_e32 v5, v5
986; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
987; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
988; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
989; GFX7-NEXT:    v_cvt_f32_f16_e32 v4, v4
990; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
991; GFX7-NEXT:    v_cvt_f32_f16_e32 v5, v5
992; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
993; GFX7-NEXT:    v_max_f32_e32 v6, v0, v3
994; GFX7-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
995; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
996; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
997; GFX7-NEXT:    v_max_f32_e32 v3, v1, v4
998; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
999; GFX7-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
1000; GFX7-NEXT:    v_max_f32_e32 v3, v2, v5
1001; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
1002; GFX7-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
1003; GFX7-NEXT:    s_setpc_b64 s[30:31]
1004;
1005; GFX8-LABEL: v_maximum_v3f16:
1006; GFX8:       ; %bb.0:
1007; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1008; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
1009; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
1010; GFX8-NEXT:    v_max_f16_e32 v6, v5, v4
1011; GFX8-NEXT:    v_mov_b32_e32 v7, 0x7e00
1012; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v5, v4
1013; GFX8-NEXT:    v_cndmask_b32_e32 v4, v7, v6, vcc
1014; GFX8-NEXT:    v_max_f16_e32 v5, v1, v3
1015; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
1016; GFX8-NEXT:    v_cndmask_b32_e32 v1, v7, v5, vcc
1017; GFX8-NEXT:    v_max_f16_e32 v3, v0, v2
1018; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
1019; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
1020; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v4
1021; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1022; GFX8-NEXT:    s_setpc_b64 s[30:31]
1023;
1024; GFX900-LABEL: v_maximum_v3f16:
1025; GFX900:       ; %bb.0:
1026; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1027; GFX900-NEXT:    v_pk_max_f16 v4, v1, v3
1028; GFX900-NEXT:    v_mov_b32_e32 v5, 0x7e00
1029; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
1030; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
1031; GFX900-NEXT:    v_pk_max_f16 v3, v0, v2
1032; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
1033; GFX900-NEXT:    v_cndmask_b32_e32 v4, v5, v3, vcc
1034; GFX900-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
1035; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
1036; GFX900-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
1037; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1038; GFX900-NEXT:    v_perm_b32 v0, v0, v4, s4
1039; GFX900-NEXT:    s_setpc_b64 s[30:31]
1040;
1041; GFX950-LABEL: v_maximum_v3f16:
1042; GFX950:       ; %bb.0:
1043; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1044; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
1045; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
1046; GFX950-NEXT:    s_setpc_b64 s[30:31]
1047;
1048; GFX10-LABEL: v_maximum_v3f16:
1049; GFX10:       ; %bb.0:
1050; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1051; GFX10-NEXT:    v_pk_max_f16 v4, v0, v2
1052; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
1053; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
1054; GFX10-NEXT:    v_cndmask_b32_e32 v4, 0x7e00, v4, vcc_lo
1055; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
1056; GFX10-NEXT:    v_pk_max_f16 v2, v1, v3
1057; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v5, vcc_lo
1058; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
1059; GFX10-NEXT:    v_perm_b32 v0, v0, v4, 0x5040100
1060; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v2, vcc_lo
1061; GFX10-NEXT:    s_setpc_b64 s[30:31]
1062;
1063; GFX11-LABEL: v_maximum_v3f16:
1064; GFX11:       ; %bb.0:
1065; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1066; GFX11-NEXT:    v_pk_max_f16 v4, v0, v2
1067; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
1068; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
1069; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
1070; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
1071; GFX11-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
1072; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v4, vcc_lo
1073; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
1074; GFX11-NEXT:    v_pk_max_f16 v4, v1, v3
1075; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1076; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v7, vcc_lo
1077; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
1078; GFX11-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
1079; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
1080; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
1081; GFX11-NEXT:    s_setpc_b64 s[30:31]
1082;
1083; GFX12-LABEL: v_maximum_v3f16:
1084; GFX12:       ; %bb.0:
1085; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1086; GFX12-NEXT:    s_wait_expcnt 0x0
1087; GFX12-NEXT:    s_wait_samplecnt 0x0
1088; GFX12-NEXT:    s_wait_bvhcnt 0x0
1089; GFX12-NEXT:    s_wait_kmcnt 0x0
1090; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v2
1091; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v3
1092; GFX12-NEXT:    s_setpc_b64 s[30:31]
1093  %op = call <3 x half> @llvm.maximum.v3f16(<3 x half> %src0, <3 x half> %src1)
1094  ret <3 x half> %op
1095}
1096
1097define <3 x half> @v_maximum_v3f16__nnan(<3 x half> %src0, <3 x half> %src1) {
1098; GFX7-LABEL: v_maximum_v3f16__nnan:
1099; GFX7:       ; %bb.0:
1100; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1101; GFX7-NEXT:    v_cvt_f16_f32_e32 v5, v5
1102; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
1103; GFX7-NEXT:    v_cvt_f16_f32_e32 v4, v4
1104; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
1105; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
1106; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
1107; GFX7-NEXT:    v_cvt_f32_f16_e32 v5, v5
1108; GFX7-NEXT:    v_cvt_f32_f16_e32 v4, v4
1109; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
1110; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
1111; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
1112; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
1113; GFX7-NEXT:    v_max_f32_e32 v0, v0, v3
1114; GFX7-NEXT:    v_max_f32_e32 v1, v1, v4
1115; GFX7-NEXT:    v_max_f32_e32 v2, v2, v5
1116; GFX7-NEXT:    s_setpc_b64 s[30:31]
1117;
1118; GFX8-LABEL: v_maximum_v3f16__nnan:
1119; GFX8:       ; %bb.0:
1120; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1121; GFX8-NEXT:    v_max_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1122; GFX8-NEXT:    v_max_f16_e32 v0, v0, v2
1123; GFX8-NEXT:    v_max_f16_e32 v1, v1, v3
1124; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
1125; GFX8-NEXT:    s_setpc_b64 s[30:31]
1126;
1127; GFX900-LABEL: v_maximum_v3f16__nnan:
1128; GFX900:       ; %bb.0:
1129; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1130; GFX900-NEXT:    v_pk_max_f16 v0, v0, v2
1131; GFX900-NEXT:    v_pk_max_f16 v1, v1, v3
1132; GFX900-NEXT:    s_setpc_b64 s[30:31]
1133;
1134; GFX950-LABEL: v_maximum_v3f16__nnan:
1135; GFX950:       ; %bb.0:
1136; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1137; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
1138; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
1139; GFX950-NEXT:    s_setpc_b64 s[30:31]
1140;
1141; GFX10-LABEL: v_maximum_v3f16__nnan:
1142; GFX10:       ; %bb.0:
1143; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1144; GFX10-NEXT:    v_pk_max_f16 v0, v0, v2
1145; GFX10-NEXT:    v_pk_max_f16 v1, v1, v3
1146; GFX10-NEXT:    s_setpc_b64 s[30:31]
1147;
1148; GFX11-LABEL: v_maximum_v3f16__nnan:
1149; GFX11:       ; %bb.0:
1150; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1151; GFX11-NEXT:    v_pk_max_f16 v0, v0, v2
1152; GFX11-NEXT:    v_pk_max_f16 v1, v1, v3
1153; GFX11-NEXT:    s_setpc_b64 s[30:31]
1154;
1155; GFX12-LABEL: v_maximum_v3f16__nnan:
1156; GFX12:       ; %bb.0:
1157; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1158; GFX12-NEXT:    s_wait_expcnt 0x0
1159; GFX12-NEXT:    s_wait_samplecnt 0x0
1160; GFX12-NEXT:    s_wait_bvhcnt 0x0
1161; GFX12-NEXT:    s_wait_kmcnt 0x0
1162; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v2
1163; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v3
1164; GFX12-NEXT:    s_setpc_b64 s[30:31]
1165  %op = call nnan <3 x half> @llvm.maximum.v3f16(<3 x half> %src0, <3 x half> %src1)
1166  ret <3 x half> %op
1167}
1168
1169define <3 x half> @v_maximum_v3f16__nsz(<3 x half> %src0, <3 x half> %src1) {
1170; GFX7-LABEL: v_maximum_v3f16__nsz:
1171; GFX7:       ; %bb.0:
1172; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1173; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
1174; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
1175; GFX7-NEXT:    v_cvt_f16_f32_e32 v4, v4
1176; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
1177; GFX7-NEXT:    v_cvt_f16_f32_e32 v5, v5
1178; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
1179; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
1180; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
1181; GFX7-NEXT:    v_cvt_f32_f16_e32 v4, v4
1182; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
1183; GFX7-NEXT:    v_cvt_f32_f16_e32 v5, v5
1184; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
1185; GFX7-NEXT:    v_max_f32_e32 v6, v0, v3
1186; GFX7-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
1187; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
1188; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
1189; GFX7-NEXT:    v_max_f32_e32 v3, v1, v4
1190; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
1191; GFX7-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
1192; GFX7-NEXT:    v_max_f32_e32 v3, v2, v5
1193; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
1194; GFX7-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
1195; GFX7-NEXT:    s_setpc_b64 s[30:31]
1196;
1197; GFX8-LABEL: v_maximum_v3f16__nsz:
1198; GFX8:       ; %bb.0:
1199; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1200; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
1201; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
1202; GFX8-NEXT:    v_max_f16_e32 v6, v5, v4
1203; GFX8-NEXT:    v_mov_b32_e32 v7, 0x7e00
1204; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v5, v4
1205; GFX8-NEXT:    v_cndmask_b32_e32 v4, v7, v6, vcc
1206; GFX8-NEXT:    v_max_f16_e32 v5, v1, v3
1207; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
1208; GFX8-NEXT:    v_cndmask_b32_e32 v1, v7, v5, vcc
1209; GFX8-NEXT:    v_max_f16_e32 v3, v0, v2
1210; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
1211; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
1212; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v4
1213; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1214; GFX8-NEXT:    s_setpc_b64 s[30:31]
1215;
1216; GFX900-LABEL: v_maximum_v3f16__nsz:
1217; GFX900:       ; %bb.0:
1218; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1219; GFX900-NEXT:    v_pk_max_f16 v4, v1, v3
1220; GFX900-NEXT:    v_mov_b32_e32 v5, 0x7e00
1221; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
1222; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
1223; GFX900-NEXT:    v_pk_max_f16 v3, v0, v2
1224; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
1225; GFX900-NEXT:    v_cndmask_b32_e32 v4, v5, v3, vcc
1226; GFX900-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
1227; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
1228; GFX900-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
1229; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1230; GFX900-NEXT:    v_perm_b32 v0, v0, v4, s4
1231; GFX900-NEXT:    s_setpc_b64 s[30:31]
1232;
1233; GFX950-LABEL: v_maximum_v3f16__nsz:
1234; GFX950:       ; %bb.0:
1235; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1236; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
1237; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
1238; GFX950-NEXT:    s_setpc_b64 s[30:31]
1239;
1240; GFX10-LABEL: v_maximum_v3f16__nsz:
1241; GFX10:       ; %bb.0:
1242; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1243; GFX10-NEXT:    v_pk_max_f16 v4, v0, v2
1244; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
1245; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
1246; GFX10-NEXT:    v_cndmask_b32_e32 v4, 0x7e00, v4, vcc_lo
1247; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
1248; GFX10-NEXT:    v_pk_max_f16 v2, v1, v3
1249; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v5, vcc_lo
1250; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
1251; GFX10-NEXT:    v_perm_b32 v0, v0, v4, 0x5040100
1252; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v2, vcc_lo
1253; GFX10-NEXT:    s_setpc_b64 s[30:31]
1254;
1255; GFX11-LABEL: v_maximum_v3f16__nsz:
1256; GFX11:       ; %bb.0:
1257; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1258; GFX11-NEXT:    v_pk_max_f16 v4, v0, v2
1259; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
1260; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
1261; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
1262; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
1263; GFX11-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
1264; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v4, vcc_lo
1265; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
1266; GFX11-NEXT:    v_pk_max_f16 v4, v1, v3
1267; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1268; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v7, vcc_lo
1269; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
1270; GFX11-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
1271; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
1272; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
1273; GFX11-NEXT:    s_setpc_b64 s[30:31]
1274;
1275; GFX12-LABEL: v_maximum_v3f16__nsz:
1276; GFX12:       ; %bb.0:
1277; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1278; GFX12-NEXT:    s_wait_expcnt 0x0
1279; GFX12-NEXT:    s_wait_samplecnt 0x0
1280; GFX12-NEXT:    s_wait_bvhcnt 0x0
1281; GFX12-NEXT:    s_wait_kmcnt 0x0
1282; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v2
1283; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v3
1284; GFX12-NEXT:    s_setpc_b64 s[30:31]
1285  %op = call nsz <3 x half> @llvm.maximum.v3f16(<3 x half> %src0, <3 x half> %src1)
1286  ret <3 x half> %op
1287}
1288
1289define <3 x half> @v_maximum_v3f16__nnan_nsz(<3 x half> %src0, <3 x half> %src1) {
1290; GFX7-LABEL: v_maximum_v3f16__nnan_nsz:
1291; GFX7:       ; %bb.0:
1292; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1293; GFX7-NEXT:    v_cvt_f16_f32_e32 v5, v5
1294; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
1295; GFX7-NEXT:    v_cvt_f16_f32_e32 v4, v4
1296; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
1297; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
1298; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
1299; GFX7-NEXT:    v_cvt_f32_f16_e32 v5, v5
1300; GFX7-NEXT:    v_cvt_f32_f16_e32 v4, v4
1301; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
1302; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
1303; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
1304; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
1305; GFX7-NEXT:    v_max_f32_e32 v0, v0, v3
1306; GFX7-NEXT:    v_max_f32_e32 v1, v1, v4
1307; GFX7-NEXT:    v_max_f32_e32 v2, v2, v5
1308; GFX7-NEXT:    s_setpc_b64 s[30:31]
1309;
1310; GFX8-LABEL: v_maximum_v3f16__nnan_nsz:
1311; GFX8:       ; %bb.0:
1312; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1313; GFX8-NEXT:    v_max_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1314; GFX8-NEXT:    v_max_f16_e32 v0, v0, v2
1315; GFX8-NEXT:    v_max_f16_e32 v1, v1, v3
1316; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
1317; GFX8-NEXT:    s_setpc_b64 s[30:31]
1318;
1319; GFX900-LABEL: v_maximum_v3f16__nnan_nsz:
1320; GFX900:       ; %bb.0:
1321; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1322; GFX900-NEXT:    v_pk_max_f16 v0, v0, v2
1323; GFX900-NEXT:    v_pk_max_f16 v1, v1, v3
1324; GFX900-NEXT:    s_setpc_b64 s[30:31]
1325;
1326; GFX950-LABEL: v_maximum_v3f16__nnan_nsz:
1327; GFX950:       ; %bb.0:
1328; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1329; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
1330; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
1331; GFX950-NEXT:    s_setpc_b64 s[30:31]
1332;
1333; GFX10-LABEL: v_maximum_v3f16__nnan_nsz:
1334; GFX10:       ; %bb.0:
1335; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1336; GFX10-NEXT:    v_pk_max_f16 v0, v0, v2
1337; GFX10-NEXT:    v_pk_max_f16 v1, v1, v3
1338; GFX10-NEXT:    s_setpc_b64 s[30:31]
1339;
1340; GFX11-LABEL: v_maximum_v3f16__nnan_nsz:
1341; GFX11:       ; %bb.0:
1342; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1343; GFX11-NEXT:    v_pk_max_f16 v0, v0, v2
1344; GFX11-NEXT:    v_pk_max_f16 v1, v1, v3
1345; GFX11-NEXT:    s_setpc_b64 s[30:31]
1346;
1347; GFX12-LABEL: v_maximum_v3f16__nnan_nsz:
1348; GFX12:       ; %bb.0:
1349; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1350; GFX12-NEXT:    s_wait_expcnt 0x0
1351; GFX12-NEXT:    s_wait_samplecnt 0x0
1352; GFX12-NEXT:    s_wait_bvhcnt 0x0
1353; GFX12-NEXT:    s_wait_kmcnt 0x0
1354; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v2
1355; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v3
1356; GFX12-NEXT:    s_setpc_b64 s[30:31]
1357  %op = call nnan nsz <3 x half> @llvm.maximum.v3f16(<3 x half> %src0, <3 x half> %src1)
1358  ret <3 x half> %op
1359}
1360
1361define <4 x half> @v_maximum_v4f16(<4 x half> %src0, <4 x half> %src1) {
1362; GFX7-LABEL: v_maximum_v4f16:
1363; GFX7:       ; %bb.0:
1364; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1365; GFX7-NEXT:    v_cvt_f16_f32_e32 v4, v4
1366; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
1367; GFX7-NEXT:    v_cvt_f16_f32_e32 v5, v5
1368; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
1369; GFX7-NEXT:    v_cvt_f16_f32_e32 v6, v6
1370; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
1371; GFX7-NEXT:    v_cvt_f16_f32_e32 v7, v7
1372; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
1373; GFX7-NEXT:    v_cvt_f32_f16_e32 v4, v4
1374; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
1375; GFX7-NEXT:    v_cvt_f32_f16_e32 v5, v5
1376; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
1377; GFX7-NEXT:    v_cvt_f32_f16_e32 v6, v6
1378; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
1379; GFX7-NEXT:    v_cvt_f32_f16_e32 v7, v7
1380; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
1381; GFX7-NEXT:    v_max_f32_e32 v8, v0, v4
1382; GFX7-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
1383; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
1384; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
1385; GFX7-NEXT:    v_max_f32_e32 v4, v1, v5
1386; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v5
1387; GFX7-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc
1388; GFX7-NEXT:    v_max_f32_e32 v4, v2, v6
1389; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v2, v6
1390; GFX7-NEXT:    v_cndmask_b32_e32 v2, v9, v4, vcc
1391; GFX7-NEXT:    v_max_f32_e32 v4, v3, v7
1392; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v3, v7
1393; GFX7-NEXT:    v_cndmask_b32_e32 v3, v9, v4, vcc
1394; GFX7-NEXT:    s_setpc_b64 s[30:31]
1395;
1396; GFX8-LABEL: v_maximum_v4f16:
1397; GFX8:       ; %bb.0:
1398; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1399; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
1400; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
1401; GFX8-NEXT:    v_max_f16_e32 v6, v5, v4
1402; GFX8-NEXT:    v_mov_b32_e32 v7, 0x7e00
1403; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v5, v4
1404; GFX8-NEXT:    v_cndmask_b32_e32 v4, v7, v6, vcc
1405; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
1406; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
1407; GFX8-NEXT:    v_max_f16_e32 v8, v6, v5
1408; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v6, v5
1409; GFX8-NEXT:    v_cndmask_b32_e32 v5, v7, v8, vcc
1410; GFX8-NEXT:    v_max_f16_e32 v6, v1, v3
1411; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
1412; GFX8-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
1413; GFX8-NEXT:    v_max_f16_e32 v3, v0, v2
1414; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
1415; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
1416; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v5
1417; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1418; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v4
1419; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1420; GFX8-NEXT:    s_setpc_b64 s[30:31]
1421;
1422; GFX900-LABEL: v_maximum_v4f16:
1423; GFX900:       ; %bb.0:
1424; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1425; GFX900-NEXT:    v_pk_max_f16 v4, v1, v3
1426; GFX900-NEXT:    v_mov_b32_e32 v5, 0x7e00
1427; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
1428; GFX900-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
1429; GFX900-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
1430; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
1431; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
1432; GFX900-NEXT:    v_pk_max_f16 v3, v0, v2
1433; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
1434; GFX900-NEXT:    v_cndmask_b32_e32 v4, v5, v3, vcc
1435; GFX900-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
1436; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
1437; GFX900-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
1438; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1439; GFX900-NEXT:    v_perm_b32 v0, v0, v4, s4
1440; GFX900-NEXT:    v_perm_b32 v1, v1, v6, s4
1441; GFX900-NEXT:    s_setpc_b64 s[30:31]
1442;
1443; GFX950-LABEL: v_maximum_v4f16:
1444; GFX950:       ; %bb.0:
1445; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1446; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
1447; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
1448; GFX950-NEXT:    s_setpc_b64 s[30:31]
1449;
1450; GFX10-LABEL: v_maximum_v4f16:
1451; GFX10:       ; %bb.0:
1452; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1453; GFX10-NEXT:    v_pk_max_f16 v4, v1, v3
1454; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
1455; GFX10-NEXT:    v_pk_max_f16 v5, v0, v2
1456; GFX10-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v4, vcc_lo
1457; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
1458; GFX10-NEXT:    v_lshrrev_b32_e32 v7, 16, v5
1459; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
1460; GFX10-NEXT:    v_cndmask_b32_e32 v5, 0x7e00, v5, vcc_lo
1461; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
1462; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo
1463; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
1464; GFX10-NEXT:    v_perm_b32 v0, v0, v5, 0x5040100
1465; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
1466; GFX10-NEXT:    v_perm_b32 v1, v1, v6, 0x5040100
1467; GFX10-NEXT:    s_setpc_b64 s[30:31]
1468;
1469; GFX11-LABEL: v_maximum_v4f16:
1470; GFX11:       ; %bb.0:
1471; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1472; GFX11-NEXT:    v_pk_max_f16 v4, v1, v3
1473; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
1474; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
1475; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
1476; GFX11-NEXT:    v_pk_max_f16 v7, v0, v2
1477; GFX11-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
1478; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
1479; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
1480; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
1481; GFX11-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
1482; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
1483; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo
1484; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v8
1485; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1486; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v9, vcc_lo
1487; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
1488; GFX11-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
1489; GFX11-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v4, vcc_lo
1490; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1491; GFX11-NEXT:    v_perm_b32 v1, v3, v1, 0x5040100
1492; GFX11-NEXT:    s_setpc_b64 s[30:31]
1493;
1494; GFX12-LABEL: v_maximum_v4f16:
1495; GFX12:       ; %bb.0:
1496; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1497; GFX12-NEXT:    s_wait_expcnt 0x0
1498; GFX12-NEXT:    s_wait_samplecnt 0x0
1499; GFX12-NEXT:    s_wait_bvhcnt 0x0
1500; GFX12-NEXT:    s_wait_kmcnt 0x0
1501; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v2
1502; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v3
1503; GFX12-NEXT:    s_setpc_b64 s[30:31]
1504  %op = call <4 x half> @llvm.maximum.v4f16(<4 x half> %src0, <4 x half> %src1)
1505  ret <4 x half> %op
1506}
1507
1508define <4 x half> @v_maximum_v4f16__nnan(<4 x half> %src0, <4 x half> %src1) {
1509; GFX7-LABEL: v_maximum_v4f16__nnan:
1510; GFX7:       ; %bb.0:
1511; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1512; GFX7-NEXT:    v_cvt_f16_f32_e32 v7, v7
1513; GFX7-NEXT:    v_cvt_f16_f32_e32 v6, v6
1514; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
1515; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
1516; GFX7-NEXT:    v_cvt_f16_f32_e32 v5, v5
1517; GFX7-NEXT:    v_cvt_f16_f32_e32 v4, v4
1518; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
1519; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
1520; GFX7-NEXT:    v_cvt_f32_f16_e32 v7, v7
1521; GFX7-NEXT:    v_cvt_f32_f16_e32 v6, v6
1522; GFX7-NEXT:    v_cvt_f32_f16_e32 v5, v5
1523; GFX7-NEXT:    v_cvt_f32_f16_e32 v4, v4
1524; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
1525; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
1526; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
1527; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
1528; GFX7-NEXT:    v_max_f32_e32 v0, v0, v4
1529; GFX7-NEXT:    v_max_f32_e32 v1, v1, v5
1530; GFX7-NEXT:    v_max_f32_e32 v2, v2, v6
1531; GFX7-NEXT:    v_max_f32_e32 v3, v3, v7
1532; GFX7-NEXT:    s_setpc_b64 s[30:31]
1533;
1534; GFX8-LABEL: v_maximum_v4f16__nnan:
1535; GFX8:       ; %bb.0:
1536; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1537; GFX8-NEXT:    v_max_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1538; GFX8-NEXT:    v_max_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1539; GFX8-NEXT:    v_max_f16_e32 v1, v1, v3
1540; GFX8-NEXT:    v_max_f16_e32 v0, v0, v2
1541; GFX8-NEXT:    v_or_b32_e32 v0, v0, v5
1542; GFX8-NEXT:    v_or_b32_e32 v1, v1, v4
1543; GFX8-NEXT:    s_setpc_b64 s[30:31]
1544;
1545; GFX900-LABEL: v_maximum_v4f16__nnan:
1546; GFX900:       ; %bb.0:
1547; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1548; GFX900-NEXT:    v_pk_max_f16 v0, v0, v2
1549; GFX900-NEXT:    v_pk_max_f16 v1, v1, v3
1550; GFX900-NEXT:    s_setpc_b64 s[30:31]
1551;
1552; GFX950-LABEL: v_maximum_v4f16__nnan:
1553; GFX950:       ; %bb.0:
1554; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1555; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
1556; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
1557; GFX950-NEXT:    s_setpc_b64 s[30:31]
1558;
1559; GFX10-LABEL: v_maximum_v4f16__nnan:
1560; GFX10:       ; %bb.0:
1561; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1562; GFX10-NEXT:    v_pk_max_f16 v0, v0, v2
1563; GFX10-NEXT:    v_pk_max_f16 v1, v1, v3
1564; GFX10-NEXT:    s_setpc_b64 s[30:31]
1565;
1566; GFX11-LABEL: v_maximum_v4f16__nnan:
1567; GFX11:       ; %bb.0:
1568; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1569; GFX11-NEXT:    v_pk_max_f16 v0, v0, v2
1570; GFX11-NEXT:    v_pk_max_f16 v1, v1, v3
1571; GFX11-NEXT:    s_setpc_b64 s[30:31]
1572;
1573; GFX12-LABEL: v_maximum_v4f16__nnan:
1574; GFX12:       ; %bb.0:
1575; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1576; GFX12-NEXT:    s_wait_expcnt 0x0
1577; GFX12-NEXT:    s_wait_samplecnt 0x0
1578; GFX12-NEXT:    s_wait_bvhcnt 0x0
1579; GFX12-NEXT:    s_wait_kmcnt 0x0
1580; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v2
1581; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v3
1582; GFX12-NEXT:    s_setpc_b64 s[30:31]
1583  %op = call nnan <4 x half> @llvm.maximum.v4f16(<4 x half> %src0, <4 x half> %src1)
1584  ret <4 x half> %op
1585}
1586
1587define <4 x half> @v_maximum_v4f16__nsz(<4 x half> %src0, <4 x half> %src1) {
1588; GFX7-LABEL: v_maximum_v4f16__nsz:
1589; GFX7:       ; %bb.0:
1590; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1591; GFX7-NEXT:    v_cvt_f16_f32_e32 v4, v4
1592; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
1593; GFX7-NEXT:    v_cvt_f16_f32_e32 v5, v5
1594; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
1595; GFX7-NEXT:    v_cvt_f16_f32_e32 v6, v6
1596; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
1597; GFX7-NEXT:    v_cvt_f16_f32_e32 v7, v7
1598; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
1599; GFX7-NEXT:    v_cvt_f32_f16_e32 v4, v4
1600; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
1601; GFX7-NEXT:    v_cvt_f32_f16_e32 v5, v5
1602; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
1603; GFX7-NEXT:    v_cvt_f32_f16_e32 v6, v6
1604; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
1605; GFX7-NEXT:    v_cvt_f32_f16_e32 v7, v7
1606; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
1607; GFX7-NEXT:    v_max_f32_e32 v8, v0, v4
1608; GFX7-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
1609; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
1610; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
1611; GFX7-NEXT:    v_max_f32_e32 v4, v1, v5
1612; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v5
1613; GFX7-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc
1614; GFX7-NEXT:    v_max_f32_e32 v4, v2, v6
1615; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v2, v6
1616; GFX7-NEXT:    v_cndmask_b32_e32 v2, v9, v4, vcc
1617; GFX7-NEXT:    v_max_f32_e32 v4, v3, v7
1618; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v3, v7
1619; GFX7-NEXT:    v_cndmask_b32_e32 v3, v9, v4, vcc
1620; GFX7-NEXT:    s_setpc_b64 s[30:31]
1621;
1622; GFX8-LABEL: v_maximum_v4f16__nsz:
1623; GFX8:       ; %bb.0:
1624; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1625; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
1626; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
1627; GFX8-NEXT:    v_max_f16_e32 v6, v5, v4
1628; GFX8-NEXT:    v_mov_b32_e32 v7, 0x7e00
1629; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v5, v4
1630; GFX8-NEXT:    v_cndmask_b32_e32 v4, v7, v6, vcc
1631; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
1632; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
1633; GFX8-NEXT:    v_max_f16_e32 v8, v6, v5
1634; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v6, v5
1635; GFX8-NEXT:    v_cndmask_b32_e32 v5, v7, v8, vcc
1636; GFX8-NEXT:    v_max_f16_e32 v6, v1, v3
1637; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
1638; GFX8-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
1639; GFX8-NEXT:    v_max_f16_e32 v3, v0, v2
1640; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
1641; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
1642; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v5
1643; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1644; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v4
1645; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1646; GFX8-NEXT:    s_setpc_b64 s[30:31]
1647;
1648; GFX900-LABEL: v_maximum_v4f16__nsz:
1649; GFX900:       ; %bb.0:
1650; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1651; GFX900-NEXT:    v_pk_max_f16 v4, v1, v3
1652; GFX900-NEXT:    v_mov_b32_e32 v5, 0x7e00
1653; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
1654; GFX900-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
1655; GFX900-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
1656; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
1657; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
1658; GFX900-NEXT:    v_pk_max_f16 v3, v0, v2
1659; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
1660; GFX900-NEXT:    v_cndmask_b32_e32 v4, v5, v3, vcc
1661; GFX900-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
1662; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
1663; GFX900-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
1664; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1665; GFX900-NEXT:    v_perm_b32 v0, v0, v4, s4
1666; GFX900-NEXT:    v_perm_b32 v1, v1, v6, s4
1667; GFX900-NEXT:    s_setpc_b64 s[30:31]
1668;
1669; GFX950-LABEL: v_maximum_v4f16__nsz:
1670; GFX950:       ; %bb.0:
1671; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1672; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
1673; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
1674; GFX950-NEXT:    s_setpc_b64 s[30:31]
1675;
1676; GFX10-LABEL: v_maximum_v4f16__nsz:
1677; GFX10:       ; %bb.0:
1678; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1679; GFX10-NEXT:    v_pk_max_f16 v4, v1, v3
1680; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
1681; GFX10-NEXT:    v_pk_max_f16 v5, v0, v2
1682; GFX10-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v4, vcc_lo
1683; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
1684; GFX10-NEXT:    v_lshrrev_b32_e32 v7, 16, v5
1685; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
1686; GFX10-NEXT:    v_cndmask_b32_e32 v5, 0x7e00, v5, vcc_lo
1687; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
1688; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo
1689; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
1690; GFX10-NEXT:    v_perm_b32 v0, v0, v5, 0x5040100
1691; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
1692; GFX10-NEXT:    v_perm_b32 v1, v1, v6, 0x5040100
1693; GFX10-NEXT:    s_setpc_b64 s[30:31]
1694;
1695; GFX11-LABEL: v_maximum_v4f16__nsz:
1696; GFX11:       ; %bb.0:
1697; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1698; GFX11-NEXT:    v_pk_max_f16 v4, v1, v3
1699; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
1700; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
1701; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
1702; GFX11-NEXT:    v_pk_max_f16 v7, v0, v2
1703; GFX11-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
1704; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
1705; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
1706; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
1707; GFX11-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
1708; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
1709; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo
1710; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v8
1711; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1712; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v9, vcc_lo
1713; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
1714; GFX11-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
1715; GFX11-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v4, vcc_lo
1716; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1717; GFX11-NEXT:    v_perm_b32 v1, v3, v1, 0x5040100
1718; GFX11-NEXT:    s_setpc_b64 s[30:31]
1719;
1720; GFX12-LABEL: v_maximum_v4f16__nsz:
1721; GFX12:       ; %bb.0:
1722; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1723; GFX12-NEXT:    s_wait_expcnt 0x0
1724; GFX12-NEXT:    s_wait_samplecnt 0x0
1725; GFX12-NEXT:    s_wait_bvhcnt 0x0
1726; GFX12-NEXT:    s_wait_kmcnt 0x0
1727; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v2
1728; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v3
1729; GFX12-NEXT:    s_setpc_b64 s[30:31]
1730  %op = call nsz <4 x half> @llvm.maximum.v4f16(<4 x half> %src0, <4 x half> %src1)
1731  ret <4 x half> %op
1732}
1733
1734define <4 x half> @v_maximum_v4f16__nnan_nsz(<4 x half> %src0, <4 x half> %src1) {
1735; GFX7-LABEL: v_maximum_v4f16__nnan_nsz:
1736; GFX7:       ; %bb.0:
1737; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1738; GFX7-NEXT:    v_cvt_f16_f32_e32 v7, v7
1739; GFX7-NEXT:    v_cvt_f16_f32_e32 v6, v6
1740; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
1741; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
1742; GFX7-NEXT:    v_cvt_f16_f32_e32 v5, v5
1743; GFX7-NEXT:    v_cvt_f16_f32_e32 v4, v4
1744; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
1745; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
1746; GFX7-NEXT:    v_cvt_f32_f16_e32 v7, v7
1747; GFX7-NEXT:    v_cvt_f32_f16_e32 v6, v6
1748; GFX7-NEXT:    v_cvt_f32_f16_e32 v5, v5
1749; GFX7-NEXT:    v_cvt_f32_f16_e32 v4, v4
1750; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
1751; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
1752; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
1753; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
1754; GFX7-NEXT:    v_max_f32_e32 v0, v0, v4
1755; GFX7-NEXT:    v_max_f32_e32 v1, v1, v5
1756; GFX7-NEXT:    v_max_f32_e32 v2, v2, v6
1757; GFX7-NEXT:    v_max_f32_e32 v3, v3, v7
1758; GFX7-NEXT:    s_setpc_b64 s[30:31]
1759;
1760; GFX8-LABEL: v_maximum_v4f16__nnan_nsz:
1761; GFX8:       ; %bb.0:
1762; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1763; GFX8-NEXT:    v_max_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1764; GFX8-NEXT:    v_max_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1765; GFX8-NEXT:    v_max_f16_e32 v1, v1, v3
1766; GFX8-NEXT:    v_max_f16_e32 v0, v0, v2
1767; GFX8-NEXT:    v_or_b32_e32 v0, v0, v5
1768; GFX8-NEXT:    v_or_b32_e32 v1, v1, v4
1769; GFX8-NEXT:    s_setpc_b64 s[30:31]
1770;
1771; GFX900-LABEL: v_maximum_v4f16__nnan_nsz:
1772; GFX900:       ; %bb.0:
1773; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1774; GFX900-NEXT:    v_pk_max_f16 v0, v0, v2
1775; GFX900-NEXT:    v_pk_max_f16 v1, v1, v3
1776; GFX900-NEXT:    s_setpc_b64 s[30:31]
1777;
1778; GFX950-LABEL: v_maximum_v4f16__nnan_nsz:
1779; GFX950:       ; %bb.0:
1780; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1781; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
1782; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
1783; GFX950-NEXT:    s_setpc_b64 s[30:31]
1784;
1785; GFX10-LABEL: v_maximum_v4f16__nnan_nsz:
1786; GFX10:       ; %bb.0:
1787; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1788; GFX10-NEXT:    v_pk_max_f16 v0, v0, v2
1789; GFX10-NEXT:    v_pk_max_f16 v1, v1, v3
1790; GFX10-NEXT:    s_setpc_b64 s[30:31]
1791;
1792; GFX11-LABEL: v_maximum_v4f16__nnan_nsz:
1793; GFX11:       ; %bb.0:
1794; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1795; GFX11-NEXT:    v_pk_max_f16 v0, v0, v2
1796; GFX11-NEXT:    v_pk_max_f16 v1, v1, v3
1797; GFX11-NEXT:    s_setpc_b64 s[30:31]
1798;
1799; GFX12-LABEL: v_maximum_v4f16__nnan_nsz:
1800; GFX12:       ; %bb.0:
1801; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1802; GFX12-NEXT:    s_wait_expcnt 0x0
1803; GFX12-NEXT:    s_wait_samplecnt 0x0
1804; GFX12-NEXT:    s_wait_bvhcnt 0x0
1805; GFX12-NEXT:    s_wait_kmcnt 0x0
1806; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v2
1807; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v3
1808; GFX12-NEXT:    s_setpc_b64 s[30:31]
1809  %op = call nnan nsz <4 x half> @llvm.maximum.v4f16(<4 x half> %src0, <4 x half> %src1)
1810  ret <4 x half> %op
1811}
1812
1813define <8 x half> @v_maximum_v8f16(<8 x half> %src0, <8 x half> %src1) {
1814; GFX7-LABEL: v_maximum_v8f16:
1815; GFX7:       ; %bb.0:
1816; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1817; GFX7-NEXT:    v_cvt_f16_f32_e32 v8, v8
1818; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
1819; GFX7-NEXT:    v_cvt_f16_f32_e32 v9, v9
1820; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
1821; GFX7-NEXT:    v_cvt_f16_f32_e32 v10, v10
1822; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
1823; GFX7-NEXT:    v_cvt_f16_f32_e32 v11, v11
1824; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
1825; GFX7-NEXT:    v_cvt_f16_f32_e32 v12, v12
1826; GFX7-NEXT:    v_cvt_f16_f32_e32 v4, v4
1827; GFX7-NEXT:    v_cvt_f32_f16_e32 v8, v8
1828; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
1829; GFX7-NEXT:    v_cvt_f16_f32_e32 v13, v13
1830; GFX7-NEXT:    v_cvt_f16_f32_e32 v5, v5
1831; GFX7-NEXT:    v_cvt_f32_f16_e32 v9, v9
1832; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
1833; GFX7-NEXT:    v_cvt_f16_f32_e32 v14, v14
1834; GFX7-NEXT:    v_cvt_f16_f32_e32 v6, v6
1835; GFX7-NEXT:    v_cvt_f32_f16_e32 v10, v10
1836; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
1837; GFX7-NEXT:    v_cvt_f16_f32_e32 v15, v15
1838; GFX7-NEXT:    v_cvt_f16_f32_e32 v7, v7
1839; GFX7-NEXT:    v_cvt_f32_f16_e32 v11, v11
1840; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
1841; GFX7-NEXT:    v_cvt_f32_f16_e32 v12, v12
1842; GFX7-NEXT:    v_cvt_f32_f16_e32 v4, v4
1843; GFX7-NEXT:    v_max_f32_e32 v16, v0, v8
1844; GFX7-NEXT:    v_mov_b32_e32 v17, 0x7fc00000
1845; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v8
1846; GFX7-NEXT:    v_cvt_f32_f16_e32 v13, v13
1847; GFX7-NEXT:    v_cvt_f32_f16_e32 v5, v5
1848; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v16, vcc
1849; GFX7-NEXT:    v_max_f32_e32 v8, v1, v9
1850; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v9
1851; GFX7-NEXT:    v_cvt_f32_f16_e32 v14, v14
1852; GFX7-NEXT:    v_cvt_f32_f16_e32 v6, v6
1853; GFX7-NEXT:    v_cndmask_b32_e32 v1, v17, v8, vcc
1854; GFX7-NEXT:    v_max_f32_e32 v8, v2, v10
1855; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v2, v10
1856; GFX7-NEXT:    v_cvt_f32_f16_e32 v15, v15
1857; GFX7-NEXT:    v_cvt_f32_f16_e32 v7, v7
1858; GFX7-NEXT:    v_cndmask_b32_e32 v2, v17, v8, vcc
1859; GFX7-NEXT:    v_max_f32_e32 v8, v3, v11
1860; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v3, v11
1861; GFX7-NEXT:    v_cndmask_b32_e32 v3, v17, v8, vcc
1862; GFX7-NEXT:    v_max_f32_e32 v8, v4, v12
1863; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v4, v12
1864; GFX7-NEXT:    v_cndmask_b32_e32 v4, v17, v8, vcc
1865; GFX7-NEXT:    v_max_f32_e32 v8, v5, v13
1866; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v5, v13
1867; GFX7-NEXT:    v_cndmask_b32_e32 v5, v17, v8, vcc
1868; GFX7-NEXT:    v_max_f32_e32 v8, v6, v14
1869; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v6, v14
1870; GFX7-NEXT:    v_cndmask_b32_e32 v6, v17, v8, vcc
1871; GFX7-NEXT:    v_max_f32_e32 v8, v7, v15
1872; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v7, v15
1873; GFX7-NEXT:    v_cndmask_b32_e32 v7, v17, v8, vcc
1874; GFX7-NEXT:    s_setpc_b64 s[30:31]
1875;
1876; GFX8-LABEL: v_maximum_v8f16:
1877; GFX8:       ; %bb.0:
1878; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1879; GFX8-NEXT:    v_lshrrev_b32_e32 v8, 16, v7
1880; GFX8-NEXT:    v_lshrrev_b32_e32 v9, 16, v3
1881; GFX8-NEXT:    v_max_f16_e32 v10, v9, v8
1882; GFX8-NEXT:    v_mov_b32_e32 v11, 0x7e00
1883; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v9, v8
1884; GFX8-NEXT:    v_cndmask_b32_e32 v8, v11, v10, vcc
1885; GFX8-NEXT:    v_lshrrev_b32_e32 v9, 16, v6
1886; GFX8-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
1887; GFX8-NEXT:    v_max_f16_e32 v12, v10, v9
1888; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v10, v9
1889; GFX8-NEXT:    v_cndmask_b32_e32 v9, v11, v12, vcc
1890; GFX8-NEXT:    v_lshrrev_b32_e32 v10, 16, v5
1891; GFX8-NEXT:    v_lshrrev_b32_e32 v12, 16, v1
1892; GFX8-NEXT:    v_max_f16_e32 v13, v12, v10
1893; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v12, v10
1894; GFX8-NEXT:    v_cndmask_b32_e32 v10, v11, v13, vcc
1895; GFX8-NEXT:    v_lshrrev_b32_e32 v12, 16, v4
1896; GFX8-NEXT:    v_lshrrev_b32_e32 v13, 16, v0
1897; GFX8-NEXT:    v_max_f16_e32 v14, v13, v12
1898; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v13, v12
1899; GFX8-NEXT:    v_cndmask_b32_e32 v12, v11, v14, vcc
1900; GFX8-NEXT:    v_max_f16_e32 v13, v3, v7
1901; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v3, v7
1902; GFX8-NEXT:    v_cndmask_b32_e32 v3, v11, v13, vcc
1903; GFX8-NEXT:    v_max_f16_e32 v7, v2, v6
1904; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v2, v6
1905; GFX8-NEXT:    v_cndmask_b32_e32 v2, v11, v7, vcc
1906; GFX8-NEXT:    v_max_f16_e32 v6, v1, v5
1907; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v1, v5
1908; GFX8-NEXT:    v_cndmask_b32_e32 v1, v11, v6, vcc
1909; GFX8-NEXT:    v_max_f16_e32 v5, v0, v4
1910; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v4
1911; GFX8-NEXT:    v_cndmask_b32_e32 v0, v11, v5, vcc
1912; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 16, v12
1913; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1914; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 16, v10
1915; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1916; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 16, v9
1917; GFX8-NEXT:    v_or_b32_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1918; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 16, v8
1919; GFX8-NEXT:    v_or_b32_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1920; GFX8-NEXT:    s_setpc_b64 s[30:31]
1921;
1922; GFX900-LABEL: v_maximum_v8f16:
1923; GFX900:       ; %bb.0:
1924; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1925; GFX900-NEXT:    v_pk_max_f16 v8, v3, v7
1926; GFX900-NEXT:    v_mov_b32_e32 v9, 0x7e00
1927; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v3, v7
1928; GFX900-NEXT:    v_cndmask_b32_e32 v10, v9, v8, vcc
1929; GFX900-NEXT:    v_lshrrev_b32_e32 v8, 16, v8
1930; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
1931; GFX900-NEXT:    v_cndmask_b32_e32 v3, v9, v8, vcc
1932; GFX900-NEXT:    v_pk_max_f16 v7, v2, v6
1933; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v2, v6
1934; GFX900-NEXT:    v_cndmask_b32_e32 v8, v9, v7, vcc
1935; GFX900-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
1936; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
1937; GFX900-NEXT:    v_cndmask_b32_e32 v2, v9, v7, vcc
1938; GFX900-NEXT:    v_pk_max_f16 v6, v1, v5
1939; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v1, v5
1940; GFX900-NEXT:    v_cndmask_b32_e32 v7, v9, v6, vcc
1941; GFX900-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
1942; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
1943; GFX900-NEXT:    v_cndmask_b32_e32 v1, v9, v6, vcc
1944; GFX900-NEXT:    v_pk_max_f16 v5, v0, v4
1945; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v4
1946; GFX900-NEXT:    v_cndmask_b32_e32 v6, v9, v5, vcc
1947; GFX900-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
1948; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
1949; GFX900-NEXT:    v_cndmask_b32_e32 v0, v9, v5, vcc
1950; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1951; GFX900-NEXT:    v_perm_b32 v0, v0, v6, s4
1952; GFX900-NEXT:    v_perm_b32 v1, v1, v7, s4
1953; GFX900-NEXT:    v_perm_b32 v2, v2, v8, s4
1954; GFX900-NEXT:    v_perm_b32 v3, v3, v10, s4
1955; GFX900-NEXT:    s_setpc_b64 s[30:31]
1956;
1957; GFX950-LABEL: v_maximum_v8f16:
1958; GFX950:       ; %bb.0:
1959; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1960; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v4, v4
1961; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v5, v5
1962; GFX950-NEXT:    v_pk_maximum3_f16 v2, v2, v6, v6
1963; GFX950-NEXT:    v_pk_maximum3_f16 v3, v3, v7, v7
1964; GFX950-NEXT:    s_setpc_b64 s[30:31]
1965;
1966; GFX10-LABEL: v_maximum_v8f16:
1967; GFX10:       ; %bb.0:
1968; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1969; GFX10-NEXT:    v_pk_max_f16 v8, v3, v7
1970; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v7
1971; GFX10-NEXT:    v_pk_max_f16 v9, v2, v6
1972; GFX10-NEXT:    v_pk_max_f16 v12, v1, v5
1973; GFX10-NEXT:    v_pk_max_f16 v13, v0, v4
1974; GFX10-NEXT:    v_cndmask_b32_e32 v10, 0x7e00, v8, vcc_lo
1975; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v6
1976; GFX10-NEXT:    v_lshrrev_b32_e32 v11, 16, v9
1977; GFX10-NEXT:    v_lshrrev_b32_e32 v8, 16, v8
1978; GFX10-NEXT:    v_cndmask_b32_e32 v9, 0x7e00, v9, vcc_lo
1979; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
1980; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v11, vcc_lo
1981; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v5
1982; GFX10-NEXT:    v_lshrrev_b32_e32 v11, 16, v13
1983; GFX10-NEXT:    v_perm_b32 v2, v2, v9, 0x5040100
1984; GFX10-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v12, vcc_lo
1985; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v4
1986; GFX10-NEXT:    v_lshrrev_b32_e32 v12, 16, v12
1987; GFX10-NEXT:    v_cndmask_b32_e32 v13, 0x7e00, v13, vcc_lo
1988; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
1989; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v11, vcc_lo
1990; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
1991; GFX10-NEXT:    v_perm_b32 v0, v0, v13, 0x5040100
1992; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v12, vcc_lo
1993; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
1994; GFX10-NEXT:    v_perm_b32 v1, v1, v6, 0x5040100
1995; GFX10-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v8, vcc_lo
1996; GFX10-NEXT:    v_perm_b32 v3, v3, v10, 0x5040100
1997; GFX10-NEXT:    s_setpc_b64 s[30:31]
1998;
1999; GFX11-LABEL: v_maximum_v8f16:
2000; GFX11:       ; %bb.0:
2001; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2002; GFX11-NEXT:    v_pk_max_f16 v8, v3, v7
2003; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v7
2004; GFX11-NEXT:    v_pk_max_f16 v10, v2, v6
2005; GFX11-NEXT:    v_lshrrev_b32_e32 v11, 16, v6
2006; GFX11-NEXT:    v_lshrrev_b32_e32 v12, 16, v2
2007; GFX11-NEXT:    v_pk_max_f16 v14, v1, v5
2008; GFX11-NEXT:    v_cndmask_b32_e32 v9, 0x7e00, v8, vcc_lo
2009; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v6
2010; GFX11-NEXT:    v_lshrrev_b32_e32 v13, 16, v10
2011; GFX11-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
2012; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
2013; GFX11-NEXT:    v_lshrrev_b32_e32 v8, 16, v8
2014; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v10, vcc_lo
2015; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v12, v11
2016; GFX11-NEXT:    v_pk_max_f16 v11, v0, v4
2017; GFX11-NEXT:    v_lshrrev_b32_e32 v12, 16, v4
2018; GFX11-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v13, vcc_lo
2019; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v5
2020; GFX11-NEXT:    v_lshrrev_b32_e32 v13, 16, v0
2021; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
2022; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
2023; GFX11-NEXT:    v_lshrrev_b32_e32 v15, 16, v11
2024; GFX11-NEXT:    v_cndmask_b32_e32 v10, 0x7e00, v14, vcc_lo
2025; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v4
2026; GFX11-NEXT:    v_lshrrev_b32_e32 v14, 16, v14
2027; GFX11-NEXT:    v_perm_b32 v2, v6, v2, 0x5040100
2028; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v11, vcc_lo
2029; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v13, v12
2030; GFX11-NEXT:    v_cndmask_b32_e32 v4, 0x7e00, v15, vcc_lo
2031; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v5
2032; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
2033; GFX11-NEXT:    v_perm_b32 v0, v4, v0, 0x5040100
2034; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v14, vcc_lo
2035; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v7
2036; GFX11-NEXT:    v_perm_b32 v1, v1, v10, 0x5040100
2037; GFX11-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v8, vcc_lo
2038; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2039; GFX11-NEXT:    v_perm_b32 v3, v3, v9, 0x5040100
2040; GFX11-NEXT:    s_setpc_b64 s[30:31]
2041;
2042; GFX12-LABEL: v_maximum_v8f16:
2043; GFX12:       ; %bb.0:
2044; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2045; GFX12-NEXT:    s_wait_expcnt 0x0
2046; GFX12-NEXT:    s_wait_samplecnt 0x0
2047; GFX12-NEXT:    s_wait_bvhcnt 0x0
2048; GFX12-NEXT:    s_wait_kmcnt 0x0
2049; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v4
2050; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v5
2051; GFX12-NEXT:    v_pk_maximum_f16 v2, v2, v6
2052; GFX12-NEXT:    v_pk_maximum_f16 v3, v3, v7
2053; GFX12-NEXT:    s_setpc_b64 s[30:31]
2054  %op = call <8 x half> @llvm.maximum.v8f16(<8 x half> %src0, <8 x half> %src1)
2055  ret <8 x half> %op
2056}
2057
2058define <16 x half> @v_maximum_v16f16(<16 x half> %src0, <16 x half> %src1) {
2059; GFX7-LABEL: v_maximum_v16f16:
2060; GFX7:       ; %bb.0:
2061; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2062; GFX7-NEXT:    v_cvt_f16_f32_e32 v17, v17
2063; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
2064; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
2065; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
2066; GFX7-NEXT:    v_cvt_f32_f16_e32 v17, v17
2067; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
2068; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
2069; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
2070; GFX7-NEXT:    v_cvt_f16_f32_e32 v4, v4
2071; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v17
2072; GFX7-NEXT:    v_max_f32_e32 v1, v1, v17
2073; GFX7-NEXT:    v_cvt_f16_f32_e32 v17, v18
2074; GFX7-NEXT:    v_cvt_f32_f16_e32 v4, v4
2075; GFX7-NEXT:    v_cvt_f16_f32_e32 v5, v5
2076; GFX7-NEXT:    v_cvt_f16_f32_e32 v6, v6
2077; GFX7-NEXT:    v_cvt_f32_f16_e32 v17, v17
2078; GFX7-NEXT:    v_cvt_f16_f32_e32 v7, v7
2079; GFX7-NEXT:    v_cvt_f32_f16_e32 v5, v5
2080; GFX7-NEXT:    v_cvt_f32_f16_e32 v6, v6
2081; GFX7-NEXT:    v_cmp_o_f32_e64 s[4:5], v2, v17
2082; GFX7-NEXT:    v_max_f32_e32 v2, v2, v17
2083; GFX7-NEXT:    v_cvt_f16_f32_e32 v17, v19
2084; GFX7-NEXT:    v_cvt_f32_f16_e32 v7, v7
2085; GFX7-NEXT:    v_cvt_f16_f32_e32 v8, v8
2086; GFX7-NEXT:    v_cvt_f16_f32_e32 v9, v9
2087; GFX7-NEXT:    v_cvt_f32_f16_e32 v17, v17
2088; GFX7-NEXT:    v_cvt_f16_f32_e32 v10, v10
2089; GFX7-NEXT:    v_cvt_f32_f16_e32 v8, v8
2090; GFX7-NEXT:    v_cvt_f32_f16_e32 v9, v9
2091; GFX7-NEXT:    v_cmp_o_f32_e64 s[6:7], v3, v17
2092; GFX7-NEXT:    v_max_f32_e32 v3, v3, v17
2093; GFX7-NEXT:    v_cvt_f16_f32_e32 v17, v20
2094; GFX7-NEXT:    v_cvt_f32_f16_e32 v10, v10
2095; GFX7-NEXT:    v_cvt_f16_f32_e32 v11, v11
2096; GFX7-NEXT:    v_cvt_f16_f32_e32 v18, v28
2097; GFX7-NEXT:    v_cvt_f32_f16_e32 v17, v17
2098; GFX7-NEXT:    v_cvt_f16_f32_e32 v12, v12
2099; GFX7-NEXT:    v_cvt_f32_f16_e32 v11, v11
2100; GFX7-NEXT:    v_cvt_f32_f16_e32 v18, v18
2101; GFX7-NEXT:    v_cmp_o_f32_e64 s[8:9], v4, v17
2102; GFX7-NEXT:    v_max_f32_e32 v4, v4, v17
2103; GFX7-NEXT:    v_cvt_f16_f32_e32 v17, v21
2104; GFX7-NEXT:    v_cvt_f32_f16_e32 v12, v12
2105; GFX7-NEXT:    v_cvt_f16_f32_e32 v13, v13
2106; GFX7-NEXT:    v_cvt_f16_f32_e32 v19, v16
2107; GFX7-NEXT:    v_cvt_f32_f16_e32 v17, v17
2108; GFX7-NEXT:    v_cmp_o_f32_e64 s[24:25], v12, v18
2109; GFX7-NEXT:    v_max_f32_e32 v12, v12, v18
2110; GFX7-NEXT:    v_cvt_f16_f32_e32 v18, v29
2111; GFX7-NEXT:    v_cmp_o_f32_e64 s[10:11], v5, v17
2112; GFX7-NEXT:    v_max_f32_e32 v5, v5, v17
2113; GFX7-NEXT:    v_cvt_f16_f32_e32 v17, v22
2114; GFX7-NEXT:    v_cvt_f16_f32_e32 v20, v0
2115; GFX7-NEXT:    v_cvt_f32_f16_e32 v16, v18
2116; GFX7-NEXT:    v_cvt_f32_f16_e32 v18, v13
2117; GFX7-NEXT:    v_cvt_f32_f16_e32 v17, v17
2118; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v19
2119; GFX7-NEXT:    v_cvt_f32_f16_e32 v13, v20
2120; GFX7-NEXT:    v_cmp_o_f32_e64 s[26:27], v18, v16
2121; GFX7-NEXT:    v_cmp_o_f32_e64 s[12:13], v6, v17
2122; GFX7-NEXT:    v_max_f32_e32 v6, v6, v17
2123; GFX7-NEXT:    v_cvt_f16_f32_e32 v17, v23
2124; GFX7-NEXT:    v_max_f32_e32 v16, v18, v16
2125; GFX7-NEXT:    v_max_f32_e32 v18, v13, v0
2126; GFX7-NEXT:    v_cmp_o_f32_e64 s[28:29], v13, v0
2127; GFX7-NEXT:    v_cvt_f32_f16_e32 v17, v17
2128; GFX7-NEXT:    v_cvt_f16_f32_e32 v13, v15
2129; GFX7-NEXT:    v_cvt_f16_f32_e32 v15, v30
2130; GFX7-NEXT:    v_cvt_f16_f32_e32 v14, v14
2131; GFX7-NEXT:    v_cmp_o_f32_e64 s[14:15], v7, v17
2132; GFX7-NEXT:    v_max_f32_e32 v7, v7, v17
2133; GFX7-NEXT:    v_cvt_f16_f32_e32 v17, v24
2134; GFX7-NEXT:    v_cvt_f32_f16_e32 v15, v15
2135; GFX7-NEXT:    v_cvt_f32_f16_e32 v14, v14
2136; GFX7-NEXT:    v_cvt_f32_f16_e32 v20, v13
2137; GFX7-NEXT:    v_cvt_f32_f16_e32 v17, v17
2138; GFX7-NEXT:    v_mov_b32_e32 v19, 0x7fc00000
2139; GFX7-NEXT:    v_cndmask_b32_e32 v1, v19, v1, vcc
2140; GFX7-NEXT:    v_cndmask_b32_e64 v13, v19, v16, s[26:27]
2141; GFX7-NEXT:    v_cmp_o_f32_e64 s[16:17], v8, v17
2142; GFX7-NEXT:    v_max_f32_e32 v8, v8, v17
2143; GFX7-NEXT:    v_cvt_f16_f32_e32 v17, v25
2144; GFX7-NEXT:    v_max_f32_e32 v16, v14, v15
2145; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v14, v15
2146; GFX7-NEXT:    v_cndmask_b32_e32 v14, v19, v16, vcc
2147; GFX7-NEXT:    v_cvt_f32_f16_e32 v17, v17
2148; GFX7-NEXT:    v_cndmask_b32_e64 v2, v19, v2, s[4:5]
2149; GFX7-NEXT:    v_cndmask_b32_e64 v3, v19, v3, s[6:7]
2150; GFX7-NEXT:    v_cndmask_b32_e64 v4, v19, v4, s[8:9]
2151; GFX7-NEXT:    v_cmp_o_f32_e64 s[18:19], v9, v17
2152; GFX7-NEXT:    v_max_f32_e32 v9, v9, v17
2153; GFX7-NEXT:    v_cvt_f16_f32_e32 v17, v26
2154; GFX7-NEXT:    v_cndmask_b32_e64 v5, v19, v5, s[10:11]
2155; GFX7-NEXT:    v_cndmask_b32_e64 v6, v19, v6, s[12:13]
2156; GFX7-NEXT:    v_cndmask_b32_e64 v7, v19, v7, s[14:15]
2157; GFX7-NEXT:    v_cvt_f32_f16_e32 v17, v17
2158; GFX7-NEXT:    v_cndmask_b32_e64 v8, v19, v8, s[16:17]
2159; GFX7-NEXT:    v_cndmask_b32_e64 v9, v19, v9, s[18:19]
2160; GFX7-NEXT:    v_cndmask_b32_e64 v12, v19, v12, s[24:25]
2161; GFX7-NEXT:    v_cmp_o_f32_e64 s[20:21], v10, v17
2162; GFX7-NEXT:    v_max_f32_e32 v10, v10, v17
2163; GFX7-NEXT:    v_cvt_f16_f32_e32 v17, v27
2164; GFX7-NEXT:    v_cndmask_b32_e64 v10, v19, v10, s[20:21]
2165; GFX7-NEXT:    v_cvt_f32_f16_e32 v17, v17
2166; GFX7-NEXT:    v_cmp_o_f32_e64 s[22:23], v11, v17
2167; GFX7-NEXT:    v_max_f32_e32 v11, v11, v17
2168; GFX7-NEXT:    buffer_load_dword v17, off, s[0:3], s32
2169; GFX7-NEXT:    v_cndmask_b32_e64 v11, v19, v11, s[22:23]
2170; GFX7-NEXT:    s_waitcnt vmcnt(0)
2171; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v17
2172; GFX7-NEXT:    v_cvt_f32_f16_e32 v17, v0
2173; GFX7-NEXT:    v_cndmask_b32_e64 v0, v19, v18, s[28:29]
2174; GFX7-NEXT:    v_max_f32_e32 v15, v20, v17
2175; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v20, v17
2176; GFX7-NEXT:    v_cndmask_b32_e32 v15, v19, v15, vcc
2177; GFX7-NEXT:    s_setpc_b64 s[30:31]
2178;
2179; GFX8-LABEL: v_maximum_v16f16:
2180; GFX8:       ; %bb.0:
2181; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2182; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 16, v14
2183; GFX8-NEXT:    v_lshrrev_b32_e32 v18, 16, v6
2184; GFX8-NEXT:    v_max_f16_e32 v16, v18, v17
2185; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v18, v17
2186; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 16, v13
2187; GFX8-NEXT:    v_lshrrev_b32_e32 v18, 16, v5
2188; GFX8-NEXT:    v_max_f16_e32 v20, v18, v17
2189; GFX8-NEXT:    v_cmp_o_f16_e64 s[4:5], v18, v17
2190; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 16, v12
2191; GFX8-NEXT:    v_lshrrev_b32_e32 v18, 16, v4
2192; GFX8-NEXT:    v_max_f16_e32 v21, v18, v17
2193; GFX8-NEXT:    v_cmp_o_f16_e64 s[6:7], v18, v17
2194; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 16, v11
2195; GFX8-NEXT:    v_lshrrev_b32_e32 v18, 16, v3
2196; GFX8-NEXT:    v_max_f16_e32 v22, v18, v17
2197; GFX8-NEXT:    v_cmp_o_f16_e64 s[8:9], v18, v17
2198; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 16, v10
2199; GFX8-NEXT:    v_lshrrev_b32_e32 v18, 16, v2
2200; GFX8-NEXT:    v_max_f16_e32 v23, v18, v17
2201; GFX8-NEXT:    v_cmp_o_f16_e64 s[10:11], v18, v17
2202; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 16, v9
2203; GFX8-NEXT:    v_lshrrev_b32_e32 v18, 16, v1
2204; GFX8-NEXT:    v_max_f16_e32 v24, v18, v17
2205; GFX8-NEXT:    v_cmp_o_f16_e64 s[12:13], v18, v17
2206; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 16, v8
2207; GFX8-NEXT:    v_lshrrev_b32_e32 v18, 16, v0
2208; GFX8-NEXT:    v_max_f16_e32 v25, v18, v17
2209; GFX8-NEXT:    v_cmp_o_f16_e64 s[14:15], v18, v17
2210; GFX8-NEXT:    v_max_f16_e32 v17, v6, v14
2211; GFX8-NEXT:    v_cmp_o_f16_e64 s[16:17], v6, v14
2212; GFX8-NEXT:    v_max_f16_e32 v6, v5, v13
2213; GFX8-NEXT:    v_cmp_o_f16_e64 s[18:19], v5, v13
2214; GFX8-NEXT:    v_max_f16_e32 v5, v4, v12
2215; GFX8-NEXT:    v_cmp_o_f16_e64 s[20:21], v4, v12
2216; GFX8-NEXT:    v_max_f16_e32 v4, v3, v11
2217; GFX8-NEXT:    v_cmp_o_f16_e64 s[22:23], v3, v11
2218; GFX8-NEXT:    v_max_f16_e32 v11, v7, v15
2219; GFX8-NEXT:    v_cmp_o_f16_e64 s[24:25], v7, v15
2220; GFX8-NEXT:    v_lshrrev_b32_e32 v12, 16, v15
2221; GFX8-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
2222; GFX8-NEXT:    v_mov_b32_e32 v19, 0x7e00
2223; GFX8-NEXT:    v_max_f16_e32 v13, v7, v12
2224; GFX8-NEXT:    v_cmp_o_f16_e64 s[26:27], v7, v12
2225; GFX8-NEXT:    v_max_f16_e32 v3, v2, v10
2226; GFX8-NEXT:    v_cndmask_b32_e64 v12, v19, v13, s[26:27]
2227; GFX8-NEXT:    v_cndmask_b32_e32 v13, v19, v16, vcc
2228; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v2, v10
2229; GFX8-NEXT:    v_max_f16_e32 v14, v1, v9
2230; GFX8-NEXT:    v_cndmask_b32_e32 v2, v19, v3, vcc
2231; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v1, v9
2232; GFX8-NEXT:    v_max_f16_e32 v7, v0, v8
2233; GFX8-NEXT:    v_cndmask_b32_e64 v18, v19, v22, s[8:9]
2234; GFX8-NEXT:    v_cndmask_b32_e64 v22, v19, v25, s[14:15]
2235; GFX8-NEXT:    v_cndmask_b32_e32 v1, v19, v14, vcc
2236; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v8
2237; GFX8-NEXT:    v_cndmask_b32_e64 v16, v19, v21, s[6:7]
2238; GFX8-NEXT:    v_cndmask_b32_e64 v21, v19, v24, s[12:13]
2239; GFX8-NEXT:    v_cndmask_b32_e32 v0, v19, v7, vcc
2240; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 16, v22
2241; GFX8-NEXT:    v_cndmask_b32_e64 v15, v19, v20, s[4:5]
2242; GFX8-NEXT:    v_cndmask_b32_e64 v20, v19, v23, s[10:11]
2243; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2244; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 16, v21
2245; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2246; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 16, v20
2247; GFX8-NEXT:    v_cndmask_b32_e64 v4, v19, v4, s[22:23]
2248; GFX8-NEXT:    v_or_b32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2249; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 16, v18
2250; GFX8-NEXT:    v_cndmask_b32_e64 v5, v19, v5, s[20:21]
2251; GFX8-NEXT:    v_or_b32_sdwa v3, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2252; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 16, v16
2253; GFX8-NEXT:    v_cndmask_b32_e64 v6, v19, v6, s[18:19]
2254; GFX8-NEXT:    v_or_b32_sdwa v4, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2255; GFX8-NEXT:    v_lshlrev_b32_e32 v5, 16, v15
2256; GFX8-NEXT:    v_cndmask_b32_e64 v11, v19, v11, s[24:25]
2257; GFX8-NEXT:    v_cndmask_b32_e64 v17, v19, v17, s[16:17]
2258; GFX8-NEXT:    v_or_b32_sdwa v5, v6, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2259; GFX8-NEXT:    v_lshlrev_b32_e32 v6, 16, v13
2260; GFX8-NEXT:    v_lshlrev_b32_e32 v7, 16, v12
2261; GFX8-NEXT:    v_or_b32_sdwa v6, v17, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2262; GFX8-NEXT:    v_or_b32_sdwa v7, v11, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
2263; GFX8-NEXT:    s_setpc_b64 s[30:31]
2264;
2265; GFX900-LABEL: v_maximum_v16f16:
2266; GFX900:       ; %bb.0:
2267; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2268; GFX900-NEXT:    v_pk_max_f16 v16, v7, v15
2269; GFX900-NEXT:    v_mov_b32_e32 v17, 0x7e00
2270; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v7, v15
2271; GFX900-NEXT:    v_cndmask_b32_e32 v18, v17, v16, vcc
2272; GFX900-NEXT:    v_lshrrev_b32_e32 v16, 16, v16
2273; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1
2274; GFX900-NEXT:    v_cndmask_b32_e32 v7, v17, v16, vcc
2275; GFX900-NEXT:    v_pk_max_f16 v15, v6, v14
2276; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v6, v14
2277; GFX900-NEXT:    v_cndmask_b32_e32 v16, v17, v15, vcc
2278; GFX900-NEXT:    v_lshrrev_b32_e32 v15, 16, v15
2279; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1
2280; GFX900-NEXT:    v_cndmask_b32_e32 v6, v17, v15, vcc
2281; GFX900-NEXT:    v_pk_max_f16 v14, v5, v13
2282; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v5, v13
2283; GFX900-NEXT:    v_cndmask_b32_e32 v15, v17, v14, vcc
2284; GFX900-NEXT:    v_lshrrev_b32_e32 v14, 16, v14
2285; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1
2286; GFX900-NEXT:    v_cndmask_b32_e32 v5, v17, v14, vcc
2287; GFX900-NEXT:    v_pk_max_f16 v13, v4, v12
2288; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v4, v12
2289; GFX900-NEXT:    v_cndmask_b32_e32 v14, v17, v13, vcc
2290; GFX900-NEXT:    v_lshrrev_b32_e32 v13, 16, v13
2291; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1
2292; GFX900-NEXT:    v_cndmask_b32_e32 v4, v17, v13, vcc
2293; GFX900-NEXT:    v_pk_max_f16 v12, v3, v11
2294; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v3, v11
2295; GFX900-NEXT:    v_cndmask_b32_e32 v13, v17, v12, vcc
2296; GFX900-NEXT:    v_lshrrev_b32_e32 v12, 16, v12
2297; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1
2298; GFX900-NEXT:    v_cndmask_b32_e32 v3, v17, v12, vcc
2299; GFX900-NEXT:    v_pk_max_f16 v11, v2, v10
2300; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v2, v10
2301; GFX900-NEXT:    v_cndmask_b32_e32 v12, v17, v11, vcc
2302; GFX900-NEXT:    v_lshrrev_b32_e32 v11, 16, v11
2303; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1
2304; GFX900-NEXT:    v_cndmask_b32_e32 v2, v17, v11, vcc
2305; GFX900-NEXT:    v_pk_max_f16 v10, v1, v9
2306; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v1, v9
2307; GFX900-NEXT:    v_cndmask_b32_e32 v11, v17, v10, vcc
2308; GFX900-NEXT:    v_lshrrev_b32_e32 v10, 16, v10
2309; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1
2310; GFX900-NEXT:    v_cndmask_b32_e32 v1, v17, v10, vcc
2311; GFX900-NEXT:    v_pk_max_f16 v9, v0, v8
2312; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v8
2313; GFX900-NEXT:    v_cndmask_b32_e32 v10, v17, v9, vcc
2314; GFX900-NEXT:    v_lshrrev_b32_e32 v9, 16, v9
2315; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1
2316; GFX900-NEXT:    v_cndmask_b32_e32 v0, v17, v9, vcc
2317; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
2318; GFX900-NEXT:    v_perm_b32 v0, v0, v10, s4
2319; GFX900-NEXT:    v_perm_b32 v1, v1, v11, s4
2320; GFX900-NEXT:    v_perm_b32 v2, v2, v12, s4
2321; GFX900-NEXT:    v_perm_b32 v3, v3, v13, s4
2322; GFX900-NEXT:    v_perm_b32 v4, v4, v14, s4
2323; GFX900-NEXT:    v_perm_b32 v5, v5, v15, s4
2324; GFX900-NEXT:    v_perm_b32 v6, v6, v16, s4
2325; GFX900-NEXT:    v_perm_b32 v7, v7, v18, s4
2326; GFX900-NEXT:    s_setpc_b64 s[30:31]
2327;
2328; GFX950-LABEL: v_maximum_v16f16:
2329; GFX950:       ; %bb.0:
2330; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2331; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v8, v8
2332; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v9, v9
2333; GFX950-NEXT:    v_pk_maximum3_f16 v2, v2, v10, v10
2334; GFX950-NEXT:    v_pk_maximum3_f16 v3, v3, v11, v11
2335; GFX950-NEXT:    v_pk_maximum3_f16 v4, v4, v12, v12
2336; GFX950-NEXT:    v_pk_maximum3_f16 v5, v5, v13, v13
2337; GFX950-NEXT:    v_pk_maximum3_f16 v6, v6, v14, v14
2338; GFX950-NEXT:    v_pk_maximum3_f16 v7, v7, v15, v15
2339; GFX950-NEXT:    s_setpc_b64 s[30:31]
2340;
2341; GFX10-LABEL: v_maximum_v16f16:
2342; GFX10:       ; %bb.0:
2343; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2344; GFX10-NEXT:    v_pk_max_f16 v16, v7, v15
2345; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v7, v15
2346; GFX10-NEXT:    v_pk_max_f16 v18, v6, v14
2347; GFX10-NEXT:    v_pk_max_f16 v19, v3, v11
2348; GFX10-NEXT:    v_pk_max_f16 v20, v2, v10
2349; GFX10-NEXT:    v_lshrrev_b32_e32 v17, 16, v16
2350; GFX10-NEXT:    v_cndmask_b32_e32 v16, 0x7e00, v16, vcc_lo
2351; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1
2352; GFX10-NEXT:    v_lshrrev_b32_e32 v15, 16, v18
2353; GFX10-NEXT:    v_pk_max_f16 v21, v0, v8
2354; GFX10-NEXT:    v_cndmask_b32_e32 v7, 0x7e00, v17, vcc_lo
2355; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v14
2356; GFX10-NEXT:    v_pk_max_f16 v17, v5, v13
2357; GFX10-NEXT:    v_lshrrev_b32_e32 v23, 16, v21
2358; GFX10-NEXT:    v_perm_b32 v7, v7, v16, 0x5040100
2359; GFX10-NEXT:    v_cndmask_b32_e32 v18, 0x7e00, v18, vcc_lo
2360; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1
2361; GFX10-NEXT:    v_lshrrev_b32_e32 v14, 16, v17
2362; GFX10-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v15, vcc_lo
2363; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v5, v13
2364; GFX10-NEXT:    v_perm_b32 v6, v6, v18, 0x5040100
2365; GFX10-NEXT:    v_cndmask_b32_e32 v15, 0x7e00, v17, vcc_lo
2366; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1
2367; GFX10-NEXT:    v_pk_max_f16 v17, v4, v12
2368; GFX10-NEXT:    v_cndmask_b32_e32 v5, 0x7e00, v14, vcc_lo
2369; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v12
2370; GFX10-NEXT:    v_lshrrev_b32_e32 v14, 16, v17
2371; GFX10-NEXT:    v_perm_b32 v5, v5, v15, 0x5040100
2372; GFX10-NEXT:    v_cndmask_b32_e32 v13, 0x7e00, v17, vcc_lo
2373; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v11
2374; GFX10-NEXT:    v_lshrrev_b32_e32 v17, 16, v19
2375; GFX10-NEXT:    v_cndmask_b32_e32 v19, 0x7e00, v19, vcc_lo
2376; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1
2377; GFX10-NEXT:    v_pk_max_f16 v11, v1, v9
2378; GFX10-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v17, vcc_lo
2379; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v10
2380; GFX10-NEXT:    v_lshrrev_b32_e32 v22, 16, v11
2381; GFX10-NEXT:    v_perm_b32 v3, v3, v19, 0x5040100
2382; GFX10-NEXT:    v_cndmask_b32_e32 v17, 0x7e00, v20, vcc_lo
2383; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v9
2384; GFX10-NEXT:    v_lshrrev_b32_e32 v20, 16, v20
2385; GFX10-NEXT:    v_cndmask_b32_e32 v11, 0x7e00, v11, vcc_lo
2386; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1
2387; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v22, vcc_lo
2388; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v8
2389; GFX10-NEXT:    v_perm_b32 v1, v1, v11, 0x5040100
2390; GFX10-NEXT:    v_cndmask_b32_e32 v9, 0x7e00, v21, vcc_lo
2391; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1
2392; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v23, vcc_lo
2393; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1
2394; GFX10-NEXT:    v_perm_b32 v0, v0, v9, 0x5040100
2395; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v20, vcc_lo
2396; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1
2397; GFX10-NEXT:    v_perm_b32 v2, v2, v17, 0x5040100
2398; GFX10-NEXT:    v_cndmask_b32_e32 v4, 0x7e00, v14, vcc_lo
2399; GFX10-NEXT:    v_perm_b32 v4, v4, v13, 0x5040100
2400; GFX10-NEXT:    s_setpc_b64 s[30:31]
2401;
2402; GFX11-LABEL: v_maximum_v16f16:
2403; GFX11:       ; %bb.0:
2404; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2405; GFX11-NEXT:    v_pk_max_f16 v16, v7, v15
2406; GFX11-NEXT:    v_lshrrev_b32_e32 v17, 16, v15
2407; GFX11-NEXT:    v_lshrrev_b32_e32 v18, 16, v7
2408; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v7, v15
2409; GFX11-NEXT:    v_pk_max_f16 v15, v6, v14
2410; GFX11-NEXT:    v_lshrrev_b32_e32 v19, 16, v16
2411; GFX11-NEXT:    v_pk_max_f16 v20, v4, v12
2412; GFX11-NEXT:    v_pk_max_f16 v22, v2, v10
2413; GFX11-NEXT:    v_cndmask_b32_e32 v7, 0x7e00, v16, vcc_lo
2414; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v18, v17
2415; GFX11-NEXT:    v_lshrrev_b32_e32 v17, 16, v14
2416; GFX11-NEXT:    v_lshrrev_b32_e32 v18, 16, v6
2417; GFX11-NEXT:    v_lshrrev_b32_e32 v23, 16, v8
2418; GFX11-NEXT:    v_lshrrev_b32_e32 v24, 16, v0
2419; GFX11-NEXT:    v_cndmask_b32_e32 v16, 0x7e00, v19, vcc_lo
2420; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v14
2421; GFX11-NEXT:    v_lshrrev_b32_e32 v19, 16, v15
2422; GFX11-NEXT:    v_pk_max_f16 v14, v5, v13
2423; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
2424; GFX11-NEXT:    v_perm_b32 v7, v16, v7, 0x5040100
2425; GFX11-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v15, vcc_lo
2426; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v18, v17
2427; GFX11-NEXT:    v_lshrrev_b32_e32 v17, 16, v13
2428; GFX11-NEXT:    v_lshrrev_b32_e32 v18, 16, v5
2429; GFX11-NEXT:    v_cndmask_b32_e32 v15, 0x7e00, v19, vcc_lo
2430; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v5, v13
2431; GFX11-NEXT:    v_lshrrev_b32_e32 v19, 16, v14
2432; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
2433; GFX11-NEXT:    v_perm_b32 v6, v15, v6, 0x5040100
2434; GFX11-NEXT:    v_cndmask_b32_e32 v5, 0x7e00, v14, vcc_lo
2435; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v18, v17
2436; GFX11-NEXT:    v_pk_max_f16 v17, v3, v11
2437; GFX11-NEXT:    v_lshrrev_b32_e32 v18, 16, v20
2438; GFX11-NEXT:    v_cndmask_b32_e32 v13, 0x7e00, v19, vcc_lo
2439; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v12
2440; GFX11-NEXT:    v_lshrrev_b32_e32 v19, 16, v11
2441; GFX11-NEXT:    v_lshrrev_b32_e32 v21, 16, v17
2442; GFX11-NEXT:    v_lshrrev_b32_e32 v12, 16, v12
2443; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
2444; GFX11-NEXT:    v_cndmask_b32_e32 v14, 0x7e00, v20, vcc_lo
2445; GFX11-NEXT:    v_lshrrev_b32_e32 v20, 16, v3
2446; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v11
2447; GFX11-NEXT:    v_perm_b32 v5, v13, v5, 0x5040100
2448; GFX11-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v17, vcc_lo
2449; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
2450; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v20, v19
2451; GFX11-NEXT:    v_pk_max_f16 v19, v1, v9
2452; GFX11-NEXT:    v_lshrrev_b32_e32 v20, 16, v22
2453; GFX11-NEXT:    v_cndmask_b32_e32 v11, 0x7e00, v21, vcc_lo
2454; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v10
2455; GFX11-NEXT:    v_lshrrev_b32_e32 v10, 16, v10
2456; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
2457; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
2458; GFX11-NEXT:    v_perm_b32 v3, v11, v3, 0x5040100
2459; GFX11-NEXT:    v_cndmask_b32_e32 v17, 0x7e00, v22, vcc_lo
2460; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v9
2461; GFX11-NEXT:    v_lshrrev_b32_e32 v9, 16, v9
2462; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
2463; GFX11-NEXT:    v_pk_max_f16 v22, v0, v8
2464; GFX11-NEXT:    v_cndmask_b32_e32 v21, 0x7e00, v19, vcc_lo
2465; GFX11-NEXT:    v_lshrrev_b32_e32 v19, 16, v19
2466; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
2467; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v9
2468; GFX11-NEXT:    v_lshrrev_b32_e32 v25, 16, v22
2469; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
2470; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v19, vcc_lo
2471; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v8
2472; GFX11-NEXT:    v_perm_b32 v1, v1, v21, 0x5040100
2473; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v22, vcc_lo
2474; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v24, v23
2475; GFX11-NEXT:    v_cndmask_b32_e32 v8, 0x7e00, v25, vcc_lo
2476; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v10
2477; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
2478; GFX11-NEXT:    v_perm_b32 v0, v8, v0, 0x5040100
2479; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v20, vcc_lo
2480; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v12
2481; GFX11-NEXT:    v_perm_b32 v2, v2, v17, 0x5040100
2482; GFX11-NEXT:    v_cndmask_b32_e32 v4, 0x7e00, v18, vcc_lo
2483; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2484; GFX11-NEXT:    v_perm_b32 v4, v4, v14, 0x5040100
2485; GFX11-NEXT:    s_setpc_b64 s[30:31]
2486;
2487; GFX12-LABEL: v_maximum_v16f16:
2488; GFX12:       ; %bb.0:
2489; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2490; GFX12-NEXT:    s_wait_expcnt 0x0
2491; GFX12-NEXT:    s_wait_samplecnt 0x0
2492; GFX12-NEXT:    s_wait_bvhcnt 0x0
2493; GFX12-NEXT:    s_wait_kmcnt 0x0
2494; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v8
2495; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v9
2496; GFX12-NEXT:    v_pk_maximum_f16 v2, v2, v10
2497; GFX12-NEXT:    v_pk_maximum_f16 v3, v3, v11
2498; GFX12-NEXT:    v_pk_maximum_f16 v4, v4, v12
2499; GFX12-NEXT:    v_pk_maximum_f16 v5, v5, v13
2500; GFX12-NEXT:    v_pk_maximum_f16 v6, v6, v14
2501; GFX12-NEXT:    v_pk_maximum_f16 v7, v7, v15
2502; GFX12-NEXT:    s_setpc_b64 s[30:31]
2503  %op = call <16 x half> @llvm.maximum.v16f16(<16 x half> %src0, <16 x half> %src1)
2504  ret <16 x half> %op
2505}
2506;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
2507; GCN: {{.*}}
2508