xref: /llvm-project/llvm/test/CodeGen/AMDGPU/maximumnum.ll (revision 1547382033ca156c13fc16c3b2baed7350b6de8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX10 %s
5; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
6; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
7
8define half @v_maximumnum_f16(half %x, half %y) {
9; GFX8-LABEL: v_maximumnum_f16:
10; GFX8:       ; %bb.0:
11; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12; GFX8-NEXT:    v_max_f16_e32 v1, v1, v1
13; GFX8-NEXT:    v_max_f16_e32 v0, v0, v0
14; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
15; GFX8-NEXT:    s_setpc_b64 s[30:31]
16;
17; GFX9-LABEL: v_maximumnum_f16:
18; GFX9:       ; %bb.0:
19; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20; GFX9-NEXT:    v_max_f16_e32 v1, v1, v1
21; GFX9-NEXT:    v_max_f16_e32 v0, v0, v0
22; GFX9-NEXT:    v_max_f16_e32 v0, v0, v1
23; GFX9-NEXT:    s_setpc_b64 s[30:31]
24;
25; GFX10-LABEL: v_maximumnum_f16:
26; GFX10:       ; %bb.0:
27; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
28; GFX10-NEXT:    v_max_f16_e32 v1, v1, v1
29; GFX10-NEXT:    v_max_f16_e32 v0, v0, v0
30; GFX10-NEXT:    v_max_f16_e32 v0, v0, v1
31; GFX10-NEXT:    s_setpc_b64 s[30:31]
32;
33; GFX11-LABEL: v_maximumnum_f16:
34; GFX11:       ; %bb.0:
35; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
36; GFX11-NEXT:    v_max_f16_e32 v1, v1, v1
37; GFX11-NEXT:    v_max_f16_e32 v0, v0, v0
38; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
39; GFX11-NEXT:    v_max_f16_e32 v0, v0, v1
40; GFX11-NEXT:    s_setpc_b64 s[30:31]
41;
42; GFX12-LABEL: v_maximumnum_f16:
43; GFX12:       ; %bb.0:
44; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
45; GFX12-NEXT:    s_wait_expcnt 0x0
46; GFX12-NEXT:    s_wait_samplecnt 0x0
47; GFX12-NEXT:    s_wait_bvhcnt 0x0
48; GFX12-NEXT:    s_wait_kmcnt 0x0
49; GFX12-NEXT:    v_max_num_f16_e32 v1, v1, v1
50; GFX12-NEXT:    v_max_num_f16_e32 v0, v0, v0
51; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
52; GFX12-NEXT:    v_max_num_f16_e32 v0, v0, v1
53; GFX12-NEXT:    s_setpc_b64 s[30:31]
54  %result = call half @llvm.maximumnum.f16(half %x, half %y)
55  ret half %result
56}
57
58define half @v_maximumnum_f16_nnan(half %x, half %y) {
59; GFX8-LABEL: v_maximumnum_f16_nnan:
60; GFX8:       ; %bb.0:
61; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
62; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
63; GFX8-NEXT:    s_setpc_b64 s[30:31]
64;
65; GFX9-LABEL: v_maximumnum_f16_nnan:
66; GFX9:       ; %bb.0:
67; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68; GFX9-NEXT:    v_max_f16_e32 v0, v0, v1
69; GFX9-NEXT:    s_setpc_b64 s[30:31]
70;
71; GFX10-LABEL: v_maximumnum_f16_nnan:
72; GFX10:       ; %bb.0:
73; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
74; GFX10-NEXT:    v_max_f16_e32 v0, v0, v1
75; GFX10-NEXT:    s_setpc_b64 s[30:31]
76;
77; GFX11-LABEL: v_maximumnum_f16_nnan:
78; GFX11:       ; %bb.0:
79; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
80; GFX11-NEXT:    v_max_f16_e32 v0, v0, v1
81; GFX11-NEXT:    s_setpc_b64 s[30:31]
82;
83; GFX12-LABEL: v_maximumnum_f16_nnan:
84; GFX12:       ; %bb.0:
85; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
86; GFX12-NEXT:    s_wait_expcnt 0x0
87; GFX12-NEXT:    s_wait_samplecnt 0x0
88; GFX12-NEXT:    s_wait_bvhcnt 0x0
89; GFX12-NEXT:    s_wait_kmcnt 0x0
90; GFX12-NEXT:    v_max_num_f16_e32 v0, v0, v1
91; GFX12-NEXT:    s_setpc_b64 s[30:31]
92  %result = call nnan half @llvm.maximumnum.f16(half %x, half %y)
93  ret half %result
94}
95
96define half @v_maximumnum_f16_1.0(half %x) {
97; GFX8-LABEL: v_maximumnum_f16_1.0:
98; GFX8:       ; %bb.0:
99; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
100; GFX8-NEXT:    v_max_f16_e32 v0, v0, v0
101; GFX8-NEXT:    v_max_f16_e32 v0, 1.0, v0
102; GFX8-NEXT:    s_setpc_b64 s[30:31]
103;
104; GFX9-LABEL: v_maximumnum_f16_1.0:
105; GFX9:       ; %bb.0:
106; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
107; GFX9-NEXT:    v_max_f16_e32 v0, v0, v0
108; GFX9-NEXT:    v_max_f16_e32 v0, 1.0, v0
109; GFX9-NEXT:    s_setpc_b64 s[30:31]
110;
111; GFX10-LABEL: v_maximumnum_f16_1.0:
112; GFX10:       ; %bb.0:
113; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114; GFX10-NEXT:    v_max_f16_e32 v0, v0, v0
115; GFX10-NEXT:    v_max_f16_e32 v0, 1.0, v0
116; GFX10-NEXT:    s_setpc_b64 s[30:31]
117;
118; GFX11-LABEL: v_maximumnum_f16_1.0:
119; GFX11:       ; %bb.0:
120; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
121; GFX11-NEXT:    v_max_f16_e32 v0, v0, v0
122; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
123; GFX11-NEXT:    v_max_f16_e32 v0, 1.0, v0
124; GFX11-NEXT:    s_setpc_b64 s[30:31]
125;
126; GFX12-LABEL: v_maximumnum_f16_1.0:
127; GFX12:       ; %bb.0:
128; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
129; GFX12-NEXT:    s_wait_expcnt 0x0
130; GFX12-NEXT:    s_wait_samplecnt 0x0
131; GFX12-NEXT:    s_wait_bvhcnt 0x0
132; GFX12-NEXT:    s_wait_kmcnt 0x0
133; GFX12-NEXT:    v_max_num_f16_e32 v0, v0, v0
134; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
135; GFX12-NEXT:    v_max_num_f16_e32 v0, 1.0, v0
136; GFX12-NEXT:    s_setpc_b64 s[30:31]
137  %result = call half @llvm.maximumnum.f16(half %x, half 1.0)
138  ret half %result
139}
140
141define bfloat @v_maximumnum_bf16(bfloat %x, bfloat %y) {
142; GFX8-LABEL: v_maximumnum_bf16:
143; GFX8:       ; %bb.0:
144; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
145; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
146; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
147; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
148; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
149; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
150; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
151; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
152; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 16, v0
153; GFX8-NEXT:    v_cmp_gt_f32_e32 vcc, v3, v2
154; GFX8-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc
155; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
156; GFX8-NEXT:    v_mul_f32_e32 v2, 1.0, v2
157; GFX8-NEXT:    v_bfe_u32 v3, v2, 16, 1
158; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v2
159; GFX8-NEXT:    s_movk_i32 s4, 0x7fff
160; GFX8-NEXT:    v_add_u32_e32 v3, vcc, s4, v3
161; GFX8-NEXT:    v_or_b32_e32 v4, 0x400000, v2
162; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
163; GFX8-NEXT:    v_cndmask_b32_e32 v2, v3, v4, vcc
164; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
165; GFX8-NEXT:    v_cmp_eq_u16_e32 vcc, 0, v0
166; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
167; GFX8-NEXT:    v_cmp_eq_u16_e32 vcc, 0, v1
168; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
169; GFX8-NEXT:    v_and_b32_e32 v1, 0xffff0000, v2
170; GFX8-NEXT:    v_cmp_eq_f32_e32 vcc, 0, v1
171; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
172; GFX8-NEXT:    s_setpc_b64 s[30:31]
173;
174; GFX9-LABEL: v_maximumnum_bf16:
175; GFX9:       ; %bb.0:
176; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
177; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
178; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
179; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v1
180; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
181; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
182; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
183; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
184; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v1
185; GFX9-NEXT:    v_cmp_gt_f32_e32 vcc, v2, v3
186; GFX9-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc
187; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
188; GFX9-NEXT:    v_max_f32_e32 v2, v2, v2
189; GFX9-NEXT:    v_bfe_u32 v3, v2, 16, 1
190; GFX9-NEXT:    s_movk_i32 s4, 0x7fff
191; GFX9-NEXT:    v_add3_u32 v3, v3, v2, s4
192; GFX9-NEXT:    v_or_b32_e32 v4, 0x400000, v2
193; GFX9-NEXT:    v_cmp_u_f32_e32 vcc, v2, v2
194; GFX9-NEXT:    v_cndmask_b32_e32 v2, v3, v4, vcc
195; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
196; GFX9-NEXT:    v_cmp_eq_u16_e32 vcc, 0, v0
197; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
198; GFX9-NEXT:    v_cmp_eq_u16_e32 vcc, 0, v1
199; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
200; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff0000, v2
201; GFX9-NEXT:    v_cmp_eq_f32_e32 vcc, 0, v1
202; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
203; GFX9-NEXT:    s_setpc_b64 s[30:31]
204;
205; GFX10-LABEL: v_maximumnum_bf16:
206; GFX10:       ; %bb.0:
207; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
208; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
209; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v1
210; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
211; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
212; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
213; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
214; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
215; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v1
216; GFX10-NEXT:    v_cmp_gt_f32_e32 vcc_lo, v2, v3
217; GFX10-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc_lo
218; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
219; GFX10-NEXT:    v_max_f32_e32 v2, v2, v2
220; GFX10-NEXT:    v_bfe_u32 v3, v2, 16, 1
221; GFX10-NEXT:    v_or_b32_e32 v4, 0x400000, v2
222; GFX10-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
223; GFX10-NEXT:    v_add3_u32 v3, v3, v2, 0x7fff
224; GFX10-NEXT:    v_cndmask_b32_e32 v2, v3, v4, vcc_lo
225; GFX10-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0, v0
226; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
227; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff0000, v2
228; GFX10-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc_lo
229; GFX10-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0, v1
230; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
231; GFX10-NEXT:    v_cmp_eq_f32_e32 vcc_lo, 0, v2
232; GFX10-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc_lo
233; GFX10-NEXT:    s_setpc_b64 s[30:31]
234;
235; GFX11-LABEL: v_maximumnum_bf16:
236; GFX11:       ; %bb.0:
237; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
238; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
239; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
240; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
241; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
242; GFX11-NEXT:    v_lshlrev_b32_e32 v3, 16, v1
243; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
244; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
245; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
246; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
247; GFX11-NEXT:    v_lshlrev_b32_e32 v3, 16, v1
248; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
249; GFX11-NEXT:    v_cmp_gt_f32_e32 vcc_lo, v2, v3
250; GFX11-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc_lo
251; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
252; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
253; GFX11-NEXT:    v_max_f32_e32 v2, v2, v2
254; GFX11-NEXT:    v_bfe_u32 v3, v2, 16, 1
255; GFX11-NEXT:    v_or_b32_e32 v4, 0x400000, v2
256; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
257; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
258; GFX11-NEXT:    v_add3_u32 v3, v3, v2, 0x7fff
259; GFX11-NEXT:    v_cndmask_b32_e32 v2, v3, v4, vcc_lo
260; GFX11-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0, v0
261; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
262; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
263; GFX11-NEXT:    v_and_b32_e32 v2, 0xffff0000, v2
264; GFX11-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc_lo
265; GFX11-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0, v1
266; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
267; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
268; GFX11-NEXT:    v_cmp_eq_f32_e32 vcc_lo, 0, v2
269; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
270; GFX11-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc_lo
271; GFX11-NEXT:    s_setpc_b64 s[30:31]
272;
273; GFX12-LABEL: v_maximumnum_bf16:
274; GFX12:       ; %bb.0:
275; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
276; GFX12-NEXT:    s_wait_expcnt 0x0
277; GFX12-NEXT:    s_wait_samplecnt 0x0
278; GFX12-NEXT:    s_wait_bvhcnt 0x0
279; GFX12-NEXT:    s_wait_kmcnt 0x0
280; GFX12-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
281; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
282; GFX12-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
283; GFX12-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
284; GFX12-NEXT:    v_lshlrev_b32_e32 v3, 16, v1
285; GFX12-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
286; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
287; GFX12-NEXT:    v_cmp_u_f32_e32 vcc_lo, v3, v3
288; GFX12-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc_lo
289; GFX12-NEXT:    v_lshlrev_b32_e32 v3, 16, v1
290; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
291; GFX12-NEXT:    v_cmp_gt_f32_e32 vcc_lo, v2, v3
292; GFX12-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc_lo
293; GFX12-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
294; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
295; GFX12-NEXT:    v_max_num_f32_e32 v2, v2, v2
296; GFX12-NEXT:    v_bfe_u32 v3, v2, 16, 1
297; GFX12-NEXT:    v_or_b32_e32 v4, 0x400000, v2
298; GFX12-NEXT:    v_cmp_u_f32_e32 vcc_lo, v2, v2
299; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
300; GFX12-NEXT:    v_add3_u32 v3, v3, v2, 0x7fff
301; GFX12-NEXT:    v_cndmask_b32_e32 v2, v3, v4, vcc_lo
302; GFX12-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0, v0
303; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
304; GFX12-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
305; GFX12-NEXT:    v_and_b32_e32 v2, 0xffff0000, v2
306; GFX12-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc_lo
307; GFX12-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0, v1
308; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
309; GFX12-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
310; GFX12-NEXT:    v_cmp_eq_f32_e32 vcc_lo, 0, v2
311; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2)
312; GFX12-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc_lo
313; GFX12-NEXT:    s_setpc_b64 s[30:31]
314  %result = call bfloat @llvm.maximumnum.bf16(bfloat %x, bfloat %y)
315  ret bfloat %result
316}
317
318define bfloat @v_maximumnum_bf16_nnan(bfloat %x, bfloat %y) {
319; GFX8-LABEL: v_maximumnum_bf16_nnan:
320; GFX8:       ; %bb.0:
321; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
322; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
323; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 16, v0
324; GFX8-NEXT:    v_cmp_gt_f32_e32 vcc, v3, v2
325; GFX8-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc
326; GFX8-NEXT:    v_cmp_eq_u16_e32 vcc, 0, v0
327; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
328; GFX8-NEXT:    v_cmp_eq_u16_e32 vcc, 0, v1
329; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
330; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
331; GFX8-NEXT:    v_cmp_eq_f32_e32 vcc, 0, v1
332; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
333; GFX8-NEXT:    s_setpc_b64 s[30:31]
334;
335; GFX9-LABEL: v_maximumnum_bf16_nnan:
336; GFX9:       ; %bb.0:
337; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
338; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
339; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v0
340; GFX9-NEXT:    v_cmp_gt_f32_e32 vcc, v3, v2
341; GFX9-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc
342; GFX9-NEXT:    v_cmp_eq_u16_e32 vcc, 0, v0
343; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
344; GFX9-NEXT:    v_cmp_eq_u16_e32 vcc, 0, v1
345; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
346; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
347; GFX9-NEXT:    v_cmp_eq_f32_e32 vcc, 0, v1
348; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
349; GFX9-NEXT:    s_setpc_b64 s[30:31]
350;
351; GFX10-LABEL: v_maximumnum_bf16_nnan:
352; GFX10:       ; %bb.0:
353; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
354; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
355; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v0
356; GFX10-NEXT:    v_cmp_gt_f32_e32 vcc_lo, v3, v2
357; GFX10-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc_lo
358; GFX10-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0, v0
359; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v2
360; GFX10-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc_lo
361; GFX10-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0, v1
362; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
363; GFX10-NEXT:    v_cmp_eq_f32_e32 vcc_lo, 0, v3
364; GFX10-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc_lo
365; GFX10-NEXT:    s_setpc_b64 s[30:31]
366;
367; GFX11-LABEL: v_maximumnum_bf16_nnan:
368; GFX11:       ; %bb.0:
369; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
370; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
371; GFX11-NEXT:    v_lshlrev_b32_e32 v3, 16, v0
372; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
373; GFX11-NEXT:    v_cmp_gt_f32_e32 vcc_lo, v3, v2
374; GFX11-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc_lo
375; GFX11-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0, v0
376; GFX11-NEXT:    v_dual_cndmask_b32 v0, v2, v0 :: v_dual_lshlrev_b32 v3, 16, v2
377; GFX11-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0, v1
378; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
379; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
380; GFX11-NEXT:    v_cmp_eq_f32_e32 vcc_lo, 0, v3
381; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
382; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc_lo
383; GFX11-NEXT:    s_setpc_b64 s[30:31]
384;
385; GFX12-LABEL: v_maximumnum_bf16_nnan:
386; GFX12:       ; %bb.0:
387; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
388; GFX12-NEXT:    s_wait_expcnt 0x0
389; GFX12-NEXT:    s_wait_samplecnt 0x0
390; GFX12-NEXT:    s_wait_bvhcnt 0x0
391; GFX12-NEXT:    s_wait_kmcnt 0x0
392; GFX12-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
393; GFX12-NEXT:    v_lshlrev_b32_e32 v3, 16, v0
394; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
395; GFX12-NEXT:    v_cmp_gt_f32_e32 vcc_lo, v3, v2
396; GFX12-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc_lo
397; GFX12-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0, v0
398; GFX12-NEXT:    v_dual_cndmask_b32 v0, v2, v0 :: v_dual_lshlrev_b32 v3, 16, v2
399; GFX12-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0, v1
400; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
401; GFX12-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
402; GFX12-NEXT:    v_cmp_eq_f32_e32 vcc_lo, 0, v3
403; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2)
404; GFX12-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc_lo
405; GFX12-NEXT:    s_setpc_b64 s[30:31]
406  %result = call nnan bfloat @llvm.maximumnum.bf16(bfloat %x, bfloat %y)
407  ret bfloat %result
408}
409
410define float @v_maximumnum_f32(float %x, float %y) {
411; GFX8-LABEL: v_maximumnum_f32:
412; GFX8:       ; %bb.0:
413; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
414; GFX8-NEXT:    v_mul_f32_e32 v1, 1.0, v1
415; GFX8-NEXT:    v_mul_f32_e32 v0, 1.0, v0
416; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
417; GFX8-NEXT:    s_setpc_b64 s[30:31]
418;
419; GFX9-LABEL: v_maximumnum_f32:
420; GFX9:       ; %bb.0:
421; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
422; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
423; GFX9-NEXT:    v_max_f32_e32 v0, v0, v0
424; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
425; GFX9-NEXT:    s_setpc_b64 s[30:31]
426;
427; GFX10-LABEL: v_maximumnum_f32:
428; GFX10:       ; %bb.0:
429; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
430; GFX10-NEXT:    v_max_f32_e32 v1, v1, v1
431; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
432; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
433; GFX10-NEXT:    s_setpc_b64 s[30:31]
434;
435; GFX11-LABEL: v_maximumnum_f32:
436; GFX11:       ; %bb.0:
437; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
438; GFX11-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0
439; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
440; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
441; GFX11-NEXT:    s_setpc_b64 s[30:31]
442;
443; GFX12-LABEL: v_maximumnum_f32:
444; GFX12:       ; %bb.0:
445; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
446; GFX12-NEXT:    s_wait_expcnt 0x0
447; GFX12-NEXT:    s_wait_samplecnt 0x0
448; GFX12-NEXT:    s_wait_bvhcnt 0x0
449; GFX12-NEXT:    s_wait_kmcnt 0x0
450; GFX12-NEXT:    v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
451; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
452; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v1
453; GFX12-NEXT:    s_setpc_b64 s[30:31]
454  %result = call float @llvm.maximumnum.f32(float %x, float %y)
455  ret float %result
456}
457
458define float @v_maximumnum_f32_nnan(float %x, float %y) {
459; GFX8-LABEL: v_maximumnum_f32_nnan:
460; GFX8:       ; %bb.0:
461; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
462; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
463; GFX8-NEXT:    s_setpc_b64 s[30:31]
464;
465; GFX9-LABEL: v_maximumnum_f32_nnan:
466; GFX9:       ; %bb.0:
467; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
468; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
469; GFX9-NEXT:    s_setpc_b64 s[30:31]
470;
471; GFX10-LABEL: v_maximumnum_f32_nnan:
472; GFX10:       ; %bb.0:
473; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
474; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
475; GFX10-NEXT:    s_setpc_b64 s[30:31]
476;
477; GFX11-LABEL: v_maximumnum_f32_nnan:
478; GFX11:       ; %bb.0:
479; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
480; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
481; GFX11-NEXT:    s_setpc_b64 s[30:31]
482;
483; GFX12-LABEL: v_maximumnum_f32_nnan:
484; GFX12:       ; %bb.0:
485; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
486; GFX12-NEXT:    s_wait_expcnt 0x0
487; GFX12-NEXT:    s_wait_samplecnt 0x0
488; GFX12-NEXT:    s_wait_bvhcnt 0x0
489; GFX12-NEXT:    s_wait_kmcnt 0x0
490; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v1
491; GFX12-NEXT:    s_setpc_b64 s[30:31]
492  %result = call nnan float @llvm.maximumnum.f32(float %x, float %y)
493  ret float %result
494}
495
496define double @v_maximumnum_f64(double %x, double %y) {
497; GFX8-LABEL: v_maximumnum_f64:
498; GFX8:       ; %bb.0:
499; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
500; GFX8-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
501; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
502; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
503; GFX8-NEXT:    s_setpc_b64 s[30:31]
504;
505; GFX9-LABEL: v_maximumnum_f64:
506; GFX9:       ; %bb.0:
507; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
508; GFX9-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
509; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
510; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
511; GFX9-NEXT:    s_setpc_b64 s[30:31]
512;
513; GFX10-LABEL: v_maximumnum_f64:
514; GFX10:       ; %bb.0:
515; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
516; GFX10-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
517; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
518; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
519; GFX10-NEXT:    s_setpc_b64 s[30:31]
520;
521; GFX11-LABEL: v_maximumnum_f64:
522; GFX11:       ; %bb.0:
523; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
524; GFX11-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
525; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
526; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
527; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
528; GFX11-NEXT:    s_setpc_b64 s[30:31]
529;
530; GFX12-LABEL: v_maximumnum_f64:
531; GFX12:       ; %bb.0:
532; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
533; GFX12-NEXT:    s_wait_expcnt 0x0
534; GFX12-NEXT:    s_wait_samplecnt 0x0
535; GFX12-NEXT:    s_wait_bvhcnt 0x0
536; GFX12-NEXT:    s_wait_kmcnt 0x0
537; GFX12-NEXT:    v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
538; GFX12-NEXT:    v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
539; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
540; GFX12-NEXT:    v_max_num_f64_e32 v[0:1], v[0:1], v[2:3]
541; GFX12-NEXT:    s_setpc_b64 s[30:31]
542  %result = call double @llvm.maximumnum.f64(double %x, double %y)
543  ret double %result
544}
545
546define double @v_maximumnum_f64_nnan(double %x, double %y) {
547; GFX8-LABEL: v_maximumnum_f64_nnan:
548; GFX8:       ; %bb.0:
549; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
550; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
551; GFX8-NEXT:    s_setpc_b64 s[30:31]
552;
553; GFX9-LABEL: v_maximumnum_f64_nnan:
554; GFX9:       ; %bb.0:
555; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
556; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
557; GFX9-NEXT:    s_setpc_b64 s[30:31]
558;
559; GFX10-LABEL: v_maximumnum_f64_nnan:
560; GFX10:       ; %bb.0:
561; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
562; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
563; GFX10-NEXT:    s_setpc_b64 s[30:31]
564;
565; GFX11-LABEL: v_maximumnum_f64_nnan:
566; GFX11:       ; %bb.0:
567; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
568; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
569; GFX11-NEXT:    s_setpc_b64 s[30:31]
570;
571; GFX12-LABEL: v_maximumnum_f64_nnan:
572; GFX12:       ; %bb.0:
573; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
574; GFX12-NEXT:    s_wait_expcnt 0x0
575; GFX12-NEXT:    s_wait_samplecnt 0x0
576; GFX12-NEXT:    s_wait_bvhcnt 0x0
577; GFX12-NEXT:    s_wait_kmcnt 0x0
578; GFX12-NEXT:    v_max_num_f64_e32 v[0:1], v[0:1], v[2:3]
579; GFX12-NEXT:    s_setpc_b64 s[30:31]
580  %result = call nnan double @llvm.maximumnum.f64(double %x, double %y)
581  ret double %result
582}
583
584define float @v_maximumnum_f32_1.0(float %x) {
585; GFX8-LABEL: v_maximumnum_f32_1.0:
586; GFX8:       ; %bb.0:
587; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
588; GFX8-NEXT:    v_mul_f32_e32 v0, 1.0, v0
589; GFX8-NEXT:    v_max_f32_e32 v0, 1.0, v0
590; GFX8-NEXT:    s_setpc_b64 s[30:31]
591;
592; GFX9-LABEL: v_maximumnum_f32_1.0:
593; GFX9:       ; %bb.0:
594; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
595; GFX9-NEXT:    v_max_f32_e32 v0, v0, v0
596; GFX9-NEXT:    v_max_f32_e32 v0, 1.0, v0
597; GFX9-NEXT:    s_setpc_b64 s[30:31]
598;
599; GFX10-LABEL: v_maximumnum_f32_1.0:
600; GFX10:       ; %bb.0:
601; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
602; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
603; GFX10-NEXT:    v_max_f32_e32 v0, 1.0, v0
604; GFX10-NEXT:    s_setpc_b64 s[30:31]
605;
606; GFX11-LABEL: v_maximumnum_f32_1.0:
607; GFX11:       ; %bb.0:
608; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
609; GFX11-NEXT:    v_max_f32_e32 v0, v0, v0
610; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
611; GFX11-NEXT:    v_max_f32_e32 v0, 1.0, v0
612; GFX11-NEXT:    s_setpc_b64 s[30:31]
613;
614; GFX12-LABEL: v_maximumnum_f32_1.0:
615; GFX12:       ; %bb.0:
616; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
617; GFX12-NEXT:    s_wait_expcnt 0x0
618; GFX12-NEXT:    s_wait_samplecnt 0x0
619; GFX12-NEXT:    s_wait_bvhcnt 0x0
620; GFX12-NEXT:    s_wait_kmcnt 0x0
621; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v0
622; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
623; GFX12-NEXT:    v_max_num_f32_e32 v0, 1.0, v0
624; GFX12-NEXT:    s_setpc_b64 s[30:31]
625  %result = call float @llvm.maximumnum.f32(float %x, float 1.0)
626  ret float %result
627}
628
629define float @v_maximumnum_f32_rhs_not_snan(float %x, float %y) {
630; GFX8-LABEL: v_maximumnum_f32_rhs_not_snan:
631; GFX8:       ; %bb.0:
632; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
633; GFX8-NEXT:    v_mul_f32_e32 v1, 1.0, v1
634; GFX8-NEXT:    v_mul_f32_e32 v0, 1.0, v0
635; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
636; GFX8-NEXT:    s_setpc_b64 s[30:31]
637;
638; GFX9-LABEL: v_maximumnum_f32_rhs_not_snan:
639; GFX9:       ; %bb.0:
640; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
641; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
642; GFX9-NEXT:    v_max_f32_e32 v0, v0, v0
643; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
644; GFX9-NEXT:    s_setpc_b64 s[30:31]
645;
646; GFX10-LABEL: v_maximumnum_f32_rhs_not_snan:
647; GFX10:       ; %bb.0:
648; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
649; GFX10-NEXT:    v_max_f32_e32 v1, v1, v1
650; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
651; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
652; GFX10-NEXT:    s_setpc_b64 s[30:31]
653;
654; GFX11-LABEL: v_maximumnum_f32_rhs_not_snan:
655; GFX11:       ; %bb.0:
656; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
657; GFX11-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0
658; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
659; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
660; GFX11-NEXT:    s_setpc_b64 s[30:31]
661;
662; GFX12-LABEL: v_maximumnum_f32_rhs_not_snan:
663; GFX12:       ; %bb.0:
664; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
665; GFX12-NEXT:    s_wait_expcnt 0x0
666; GFX12-NEXT:    s_wait_samplecnt 0x0
667; GFX12-NEXT:    s_wait_bvhcnt 0x0
668; GFX12-NEXT:    s_wait_kmcnt 0x0
669; GFX12-NEXT:    v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
670; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
671; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v1
672; GFX12-NEXT:    s_setpc_b64 s[30:31]
673  %canon.y = call float @llvm.canonicalize.f32(float %y)
674  %result = call float @llvm.maximumnum.f32(float %x, float %canon.y)
675  ret float %result
676}
677
678define float @v_maximumnum_f32_lhs_not_snan(float %x, float %y) {
679; GFX8-LABEL: v_maximumnum_f32_lhs_not_snan:
680; GFX8:       ; %bb.0:
681; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
682; GFX8-NEXT:    v_mul_f32_e32 v0, 1.0, v0
683; GFX8-NEXT:    v_mul_f32_e32 v1, 1.0, v1
684; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
685; GFX8-NEXT:    s_setpc_b64 s[30:31]
686;
687; GFX9-LABEL: v_maximumnum_f32_lhs_not_snan:
688; GFX9:       ; %bb.0:
689; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
690; GFX9-NEXT:    v_max_f32_e32 v0, v0, v0
691; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
692; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
693; GFX9-NEXT:    s_setpc_b64 s[30:31]
694;
695; GFX10-LABEL: v_maximumnum_f32_lhs_not_snan:
696; GFX10:       ; %bb.0:
697; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
698; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
699; GFX10-NEXT:    v_max_f32_e32 v1, v1, v1
700; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
701; GFX10-NEXT:    s_setpc_b64 s[30:31]
702;
703; GFX11-LABEL: v_maximumnum_f32_lhs_not_snan:
704; GFX11:       ; %bb.0:
705; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
706; GFX11-NEXT:    v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
707; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
708; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
709; GFX11-NEXT:    s_setpc_b64 s[30:31]
710;
711; GFX12-LABEL: v_maximumnum_f32_lhs_not_snan:
712; GFX12:       ; %bb.0:
713; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
714; GFX12-NEXT:    s_wait_expcnt 0x0
715; GFX12-NEXT:    s_wait_samplecnt 0x0
716; GFX12-NEXT:    s_wait_bvhcnt 0x0
717; GFX12-NEXT:    s_wait_kmcnt 0x0
718; GFX12-NEXT:    v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
719; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
720; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v1
721; GFX12-NEXT:    s_setpc_b64 s[30:31]
722  %canon.x = call float @llvm.canonicalize.f32(float %x)
723  %result = call float @llvm.maximumnum.f32(float %canon.x, float %y)
724  ret float %result
725}
726
727define float @v_maximumnum_f32_both_operands_not_snan(float %x, float %y) {
728; GFX8-LABEL: v_maximumnum_f32_both_operands_not_snan:
729; GFX8:       ; %bb.0:
730; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
731; GFX8-NEXT:    v_mul_f32_e32 v0, 1.0, v0
732; GFX8-NEXT:    v_mul_f32_e32 v1, 1.0, v1
733; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
734; GFX8-NEXT:    s_setpc_b64 s[30:31]
735;
736; GFX9-LABEL: v_maximumnum_f32_both_operands_not_snan:
737; GFX9:       ; %bb.0:
738; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
739; GFX9-NEXT:    v_max_f32_e32 v0, v0, v0
740; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
741; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
742; GFX9-NEXT:    s_setpc_b64 s[30:31]
743;
744; GFX10-LABEL: v_maximumnum_f32_both_operands_not_snan:
745; GFX10:       ; %bb.0:
746; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
747; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
748; GFX10-NEXT:    v_max_f32_e32 v1, v1, v1
749; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
750; GFX10-NEXT:    s_setpc_b64 s[30:31]
751;
752; GFX11-LABEL: v_maximumnum_f32_both_operands_not_snan:
753; GFX11:       ; %bb.0:
754; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
755; GFX11-NEXT:    v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
756; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
757; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
758; GFX11-NEXT:    s_setpc_b64 s[30:31]
759;
760; GFX12-LABEL: v_maximumnum_f32_both_operands_not_snan:
761; GFX12:       ; %bb.0:
762; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
763; GFX12-NEXT:    s_wait_expcnt 0x0
764; GFX12-NEXT:    s_wait_samplecnt 0x0
765; GFX12-NEXT:    s_wait_bvhcnt 0x0
766; GFX12-NEXT:    s_wait_kmcnt 0x0
767; GFX12-NEXT:    v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
768; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
769; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v1
770; GFX12-NEXT:    s_setpc_b64 s[30:31]
771  %canon.x = call float @llvm.canonicalize.f32(float %x)
772  %canon.y = call float @llvm.canonicalize.f32(float %y)
773  %result = call float @llvm.maximumnum.f32(float %canon.x, float %canon.y)
774  ret float %result
775}
776
777define double @v_maximumnum_f64_1.0(double %x) {
778; GFX8-LABEL: v_maximumnum_f64_1.0:
779; GFX8:       ; %bb.0:
780; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
781; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
782; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], 1.0
783; GFX8-NEXT:    s_setpc_b64 s[30:31]
784;
785; GFX9-LABEL: v_maximumnum_f64_1.0:
786; GFX9:       ; %bb.0:
787; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
788; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
789; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], 1.0
790; GFX9-NEXT:    s_setpc_b64 s[30:31]
791;
792; GFX10-LABEL: v_maximumnum_f64_1.0:
793; GFX10:       ; %bb.0:
794; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
795; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
796; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], 1.0
797; GFX10-NEXT:    s_setpc_b64 s[30:31]
798;
799; GFX11-LABEL: v_maximumnum_f64_1.0:
800; GFX11:       ; %bb.0:
801; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
802; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
803; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
804; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], 1.0
805; GFX11-NEXT:    s_setpc_b64 s[30:31]
806;
807; GFX12-LABEL: v_maximumnum_f64_1.0:
808; GFX12:       ; %bb.0:
809; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
810; GFX12-NEXT:    s_wait_expcnt 0x0
811; GFX12-NEXT:    s_wait_samplecnt 0x0
812; GFX12-NEXT:    s_wait_bvhcnt 0x0
813; GFX12-NEXT:    s_wait_kmcnt 0x0
814; GFX12-NEXT:    v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
815; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
816; GFX12-NEXT:    v_max_num_f64_e32 v[0:1], 1.0, v[0:1]
817; GFX12-NEXT:    s_setpc_b64 s[30:31]
818  %result = call double @llvm.maximumnum.f64(double %x, double 1.0)
819  ret double %result
820}
821
822define half @v_maximumnum_f16_s_v(half inreg %x, half %y) {
823; GFX8-LABEL: v_maximumnum_f16_s_v:
824; GFX8:       ; %bb.0:
825; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
826; GFX8-NEXT:    v_max_f16_e32 v0, v0, v0
827; GFX8-NEXT:    v_max_f16_e64 v1, s16, s16
828; GFX8-NEXT:    v_max_f16_e32 v0, v1, v0
829; GFX8-NEXT:    s_setpc_b64 s[30:31]
830;
831; GFX9-LABEL: v_maximumnum_f16_s_v:
832; GFX9:       ; %bb.0:
833; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
834; GFX9-NEXT:    v_max_f16_e32 v0, v0, v0
835; GFX9-NEXT:    v_max_f16_e64 v1, s16, s16
836; GFX9-NEXT:    v_max_f16_e32 v0, v1, v0
837; GFX9-NEXT:    s_setpc_b64 s[30:31]
838;
839; GFX10-LABEL: v_maximumnum_f16_s_v:
840; GFX10:       ; %bb.0:
841; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
842; GFX10-NEXT:    v_max_f16_e32 v0, v0, v0
843; GFX10-NEXT:    v_max_f16_e64 v1, s16, s16
844; GFX10-NEXT:    v_max_f16_e32 v0, v1, v0
845; GFX10-NEXT:    s_setpc_b64 s[30:31]
846;
847; GFX11-LABEL: v_maximumnum_f16_s_v:
848; GFX11:       ; %bb.0:
849; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
850; GFX11-NEXT:    v_max_f16_e32 v0, v0, v0
851; GFX11-NEXT:    v_max_f16_e64 v1, s0, s0
852; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
853; GFX11-NEXT:    v_max_f16_e32 v0, v1, v0
854; GFX11-NEXT:    s_setpc_b64 s[30:31]
855;
856; GFX12-LABEL: v_maximumnum_f16_s_v:
857; GFX12:       ; %bb.0:
858; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
859; GFX12-NEXT:    s_wait_expcnt 0x0
860; GFX12-NEXT:    s_wait_samplecnt 0x0
861; GFX12-NEXT:    s_wait_bvhcnt 0x0
862; GFX12-NEXT:    s_wait_kmcnt 0x0
863; GFX12-NEXT:    v_max_num_f16_e32 v0, v0, v0
864; GFX12-NEXT:    v_max_num_f16_e64 v1, s0, s0
865; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
866; GFX12-NEXT:    v_max_num_f16_e32 v0, v1, v0
867; GFX12-NEXT:    s_setpc_b64 s[30:31]
868 %result = call half @llvm.maximumnum.f16(half %x, half %y)
869  ret half %result
870}
871
872define half @v_maximumnum_f16_v_s(half %x, half inreg %y) {
873; GFX8-LABEL: v_maximumnum_f16_v_s:
874; GFX8:       ; %bb.0:
875; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
876; GFX8-NEXT:    v_max_f16_e64 v1, s16, s16
877; GFX8-NEXT:    v_max_f16_e32 v0, v0, v0
878; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
879; GFX8-NEXT:    s_setpc_b64 s[30:31]
880;
881; GFX9-LABEL: v_maximumnum_f16_v_s:
882; GFX9:       ; %bb.0:
883; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
884; GFX9-NEXT:    v_max_f16_e64 v1, s16, s16
885; GFX9-NEXT:    v_max_f16_e32 v0, v0, v0
886; GFX9-NEXT:    v_max_f16_e32 v0, v0, v1
887; GFX9-NEXT:    s_setpc_b64 s[30:31]
888;
889; GFX10-LABEL: v_maximumnum_f16_v_s:
890; GFX10:       ; %bb.0:
891; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
892; GFX10-NEXT:    v_max_f16_e64 v1, s16, s16
893; GFX10-NEXT:    v_max_f16_e32 v0, v0, v0
894; GFX10-NEXT:    v_max_f16_e32 v0, v0, v1
895; GFX10-NEXT:    s_setpc_b64 s[30:31]
896;
897; GFX11-LABEL: v_maximumnum_f16_v_s:
898; GFX11:       ; %bb.0:
899; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
900; GFX11-NEXT:    v_max_f16_e64 v1, s0, s0
901; GFX11-NEXT:    v_max_f16_e32 v0, v0, v0
902; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
903; GFX11-NEXT:    v_max_f16_e32 v0, v0, v1
904; GFX11-NEXT:    s_setpc_b64 s[30:31]
905;
906; GFX12-LABEL: v_maximumnum_f16_v_s:
907; GFX12:       ; %bb.0:
908; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
909; GFX12-NEXT:    s_wait_expcnt 0x0
910; GFX12-NEXT:    s_wait_samplecnt 0x0
911; GFX12-NEXT:    s_wait_bvhcnt 0x0
912; GFX12-NEXT:    s_wait_kmcnt 0x0
913; GFX12-NEXT:    v_max_num_f16_e64 v1, s0, s0
914; GFX12-NEXT:    v_max_num_f16_e32 v0, v0, v0
915; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
916; GFX12-NEXT:    v_max_num_f16_e32 v0, v0, v1
917; GFX12-NEXT:    s_setpc_b64 s[30:31]
918 %result = call half @llvm.maximumnum.f16(half %x, half %y)
919  ret half %result
920}
921
922define half @v_maximumnum_f16_s_s(half inreg %x, half inreg %y) {
923; GFX8-LABEL: v_maximumnum_f16_s_s:
924; GFX8:       ; %bb.0:
925; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
926; GFX8-NEXT:    v_max_f16_e64 v0, s17, s17
927; GFX8-NEXT:    v_max_f16_e64 v1, s16, s16
928; GFX8-NEXT:    v_max_f16_e32 v0, v1, v0
929; GFX8-NEXT:    s_setpc_b64 s[30:31]
930;
931; GFX9-LABEL: v_maximumnum_f16_s_s:
932; GFX9:       ; %bb.0:
933; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
934; GFX9-NEXT:    v_max_f16_e64 v0, s17, s17
935; GFX9-NEXT:    v_max_f16_e64 v1, s16, s16
936; GFX9-NEXT:    v_max_f16_e32 v0, v1, v0
937; GFX9-NEXT:    s_setpc_b64 s[30:31]
938;
939; GFX10-LABEL: v_maximumnum_f16_s_s:
940; GFX10:       ; %bb.0:
941; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
942; GFX10-NEXT:    v_max_f16_e64 v0, s17, s17
943; GFX10-NEXT:    v_max_f16_e64 v1, s16, s16
944; GFX10-NEXT:    v_max_f16_e32 v0, v1, v0
945; GFX10-NEXT:    s_setpc_b64 s[30:31]
946;
947; GFX11-LABEL: v_maximumnum_f16_s_s:
948; GFX11:       ; %bb.0:
949; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
950; GFX11-NEXT:    v_max_f16_e64 v0, s1, s1
951; GFX11-NEXT:    v_max_f16_e64 v1, s0, s0
952; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
953; GFX11-NEXT:    v_max_f16_e32 v0, v1, v0
954; GFX11-NEXT:    s_setpc_b64 s[30:31]
955;
956; GFX12-LABEL: v_maximumnum_f16_s_s:
957; GFX12:       ; %bb.0:
958; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
959; GFX12-NEXT:    s_wait_expcnt 0x0
960; GFX12-NEXT:    s_wait_samplecnt 0x0
961; GFX12-NEXT:    s_wait_bvhcnt 0x0
962; GFX12-NEXT:    s_wait_kmcnt 0x0
963; GFX12-NEXT:    v_max_num_f16_e64 v0, s1, s1
964; GFX12-NEXT:    v_max_num_f16_e64 v1, s0, s0
965; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
966; GFX12-NEXT:    v_max_num_f16_e32 v0, v1, v0
967; GFX12-NEXT:    s_setpc_b64 s[30:31]
968 %result = call half @llvm.maximumnum.f16(half %x, half %y)
969  ret half %result
970}
971
972define float @v_maximumnum_f32_s_v(float inreg %x, float %y) {
973; GFX8-LABEL: v_maximumnum_f32_s_v:
974; GFX8:       ; %bb.0:
975; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
976; GFX8-NEXT:    v_mul_f32_e32 v0, 1.0, v0
977; GFX8-NEXT:    v_mul_f32_e64 v1, 1.0, s16
978; GFX8-NEXT:    v_max_f32_e32 v0, v1, v0
979; GFX8-NEXT:    s_setpc_b64 s[30:31]
980;
981; GFX9-LABEL: v_maximumnum_f32_s_v:
982; GFX9:       ; %bb.0:
983; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
984; GFX9-NEXT:    v_max_f32_e32 v0, v0, v0
985; GFX9-NEXT:    v_max_f32_e64 v1, s16, s16
986; GFX9-NEXT:    v_max_f32_e32 v0, v1, v0
987; GFX9-NEXT:    s_setpc_b64 s[30:31]
988;
989; GFX10-LABEL: v_maximumnum_f32_s_v:
990; GFX10:       ; %bb.0:
991; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
992; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
993; GFX10-NEXT:    v_max_f32_e64 v1, s16, s16
994; GFX10-NEXT:    v_max_f32_e32 v0, v1, v0
995; GFX10-NEXT:    s_setpc_b64 s[30:31]
996;
997; GFX11-LABEL: v_maximumnum_f32_s_v:
998; GFX11:       ; %bb.0:
999; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1000; GFX11-NEXT:    v_max_f32_e32 v0, v0, v0
1001; GFX11-NEXT:    v_max_f32_e64 v1, s0, s0
1002; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1003; GFX11-NEXT:    v_max_f32_e32 v0, v1, v0
1004; GFX11-NEXT:    s_setpc_b64 s[30:31]
1005;
1006; GFX12-LABEL: v_maximumnum_f32_s_v:
1007; GFX12:       ; %bb.0:
1008; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1009; GFX12-NEXT:    s_wait_expcnt 0x0
1010; GFX12-NEXT:    s_wait_samplecnt 0x0
1011; GFX12-NEXT:    s_wait_bvhcnt 0x0
1012; GFX12-NEXT:    s_wait_kmcnt 0x0
1013; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v0
1014; GFX12-NEXT:    v_max_num_f32_e64 v1, s0, s0
1015; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1016; GFX12-NEXT:    v_max_num_f32_e32 v0, v1, v0
1017; GFX12-NEXT:    s_setpc_b64 s[30:31]
1018 %result = call float @llvm.maximumnum.f32(float %x, float %y)
1019  ret float %result
1020}
1021
1022define float @v_maximumnum_f32_v_s(float %x, float inreg %y) {
1023; GFX8-LABEL: v_maximumnum_f32_v_s:
1024; GFX8:       ; %bb.0:
1025; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1026; GFX8-NEXT:    v_mul_f32_e64 v1, 1.0, s16
1027; GFX8-NEXT:    v_mul_f32_e32 v0, 1.0, v0
1028; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
1029; GFX8-NEXT:    s_setpc_b64 s[30:31]
1030;
1031; GFX9-LABEL: v_maximumnum_f32_v_s:
1032; GFX9:       ; %bb.0:
1033; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1034; GFX9-NEXT:    v_max_f32_e64 v1, s16, s16
1035; GFX9-NEXT:    v_max_f32_e32 v0, v0, v0
1036; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
1037; GFX9-NEXT:    s_setpc_b64 s[30:31]
1038;
1039; GFX10-LABEL: v_maximumnum_f32_v_s:
1040; GFX10:       ; %bb.0:
1041; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1042; GFX10-NEXT:    v_max_f32_e64 v1, s16, s16
1043; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
1044; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
1045; GFX10-NEXT:    s_setpc_b64 s[30:31]
1046;
1047; GFX11-LABEL: v_maximumnum_f32_v_s:
1048; GFX11:       ; %bb.0:
1049; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1050; GFX11-NEXT:    v_max_f32_e64 v1, s0, s0
1051; GFX11-NEXT:    v_max_f32_e32 v0, v0, v0
1052; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1053; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
1054; GFX11-NEXT:    s_setpc_b64 s[30:31]
1055;
1056; GFX12-LABEL: v_maximumnum_f32_v_s:
1057; GFX12:       ; %bb.0:
1058; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1059; GFX12-NEXT:    s_wait_expcnt 0x0
1060; GFX12-NEXT:    s_wait_samplecnt 0x0
1061; GFX12-NEXT:    s_wait_bvhcnt 0x0
1062; GFX12-NEXT:    s_wait_kmcnt 0x0
1063; GFX12-NEXT:    v_max_num_f32_e64 v1, s0, s0
1064; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v0
1065; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1066; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v1
1067; GFX12-NEXT:    s_setpc_b64 s[30:31]
1068 %result = call float @llvm.maximumnum.f32(float %x, float %y)
1069  ret float %result
1070}
1071
1072define float @v_maximumnum_f32_s_s(float inreg %x, float inreg %y) {
1073; GFX8-LABEL: v_maximumnum_f32_s_s:
1074; GFX8:       ; %bb.0:
1075; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1076; GFX8-NEXT:    v_mul_f32_e64 v0, 1.0, s17
1077; GFX8-NEXT:    v_mul_f32_e64 v1, 1.0, s16
1078; GFX8-NEXT:    v_max_f32_e32 v0, v1, v0
1079; GFX8-NEXT:    s_setpc_b64 s[30:31]
1080;
1081; GFX9-LABEL: v_maximumnum_f32_s_s:
1082; GFX9:       ; %bb.0:
1083; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1084; GFX9-NEXT:    v_max_f32_e64 v0, s17, s17
1085; GFX9-NEXT:    v_max_f32_e64 v1, s16, s16
1086; GFX9-NEXT:    v_max_f32_e32 v0, v1, v0
1087; GFX9-NEXT:    s_setpc_b64 s[30:31]
1088;
1089; GFX10-LABEL: v_maximumnum_f32_s_s:
1090; GFX10:       ; %bb.0:
1091; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1092; GFX10-NEXT:    v_max_f32_e64 v0, s17, s17
1093; GFX10-NEXT:    v_max_f32_e64 v1, s16, s16
1094; GFX10-NEXT:    v_max_f32_e32 v0, v1, v0
1095; GFX10-NEXT:    s_setpc_b64 s[30:31]
1096;
1097; GFX11-LABEL: v_maximumnum_f32_s_s:
1098; GFX11:       ; %bb.0:
1099; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1100; GFX11-NEXT:    v_max_f32_e64 v0, s1, s1
1101; GFX11-NEXT:    v_max_f32_e64 v1, s0, s0
1102; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1103; GFX11-NEXT:    v_max_f32_e32 v0, v1, v0
1104; GFX11-NEXT:    s_setpc_b64 s[30:31]
1105;
1106; GFX12-LABEL: v_maximumnum_f32_s_s:
1107; GFX12:       ; %bb.0:
1108; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1109; GFX12-NEXT:    s_wait_expcnt 0x0
1110; GFX12-NEXT:    s_wait_samplecnt 0x0
1111; GFX12-NEXT:    s_wait_bvhcnt 0x0
1112; GFX12-NEXT:    s_wait_kmcnt 0x0
1113; GFX12-NEXT:    v_max_num_f32_e64 v0, s1, s1
1114; GFX12-NEXT:    v_max_num_f32_e64 v1, s0, s0
1115; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1116; GFX12-NEXT:    v_max_num_f32_e32 v0, v1, v0
1117; GFX12-NEXT:    s_setpc_b64 s[30:31]
1118 %result = call float @llvm.maximumnum.f32(float %x, float %y)
1119  ret float %result
1120}
1121
1122define double @v_maximumnum_f64_s_v(double inreg %x, double %y) {
1123; GFX8-LABEL: v_maximumnum_f64_s_v:
1124; GFX8:       ; %bb.0:
1125; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1126; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
1127; GFX8-NEXT:    v_max_f64 v[2:3], s[16:17], s[16:17]
1128; GFX8-NEXT:    v_max_f64 v[0:1], v[2:3], v[0:1]
1129; GFX8-NEXT:    s_setpc_b64 s[30:31]
1130;
1131; GFX9-LABEL: v_maximumnum_f64_s_v:
1132; GFX9:       ; %bb.0:
1133; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1134; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
1135; GFX9-NEXT:    v_max_f64 v[2:3], s[16:17], s[16:17]
1136; GFX9-NEXT:    v_max_f64 v[0:1], v[2:3], v[0:1]
1137; GFX9-NEXT:    s_setpc_b64 s[30:31]
1138;
1139; GFX10-LABEL: v_maximumnum_f64_s_v:
1140; GFX10:       ; %bb.0:
1141; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1142; GFX10-NEXT:    v_max_f64 v[2:3], s[16:17], s[16:17]
1143; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
1144; GFX10-NEXT:    v_max_f64 v[0:1], v[2:3], v[0:1]
1145; GFX10-NEXT:    s_setpc_b64 s[30:31]
1146;
1147; GFX11-LABEL: v_maximumnum_f64_s_v:
1148; GFX11:       ; %bb.0:
1149; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1150; GFX11-NEXT:    v_max_f64 v[2:3], s[0:1], s[0:1]
1151; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
1152; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1153; GFX11-NEXT:    v_max_f64 v[0:1], v[2:3], v[0:1]
1154; GFX11-NEXT:    s_setpc_b64 s[30:31]
1155;
1156; GFX12-LABEL: v_maximumnum_f64_s_v:
1157; GFX12:       ; %bb.0:
1158; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1159; GFX12-NEXT:    s_wait_expcnt 0x0
1160; GFX12-NEXT:    s_wait_samplecnt 0x0
1161; GFX12-NEXT:    s_wait_bvhcnt 0x0
1162; GFX12-NEXT:    s_wait_kmcnt 0x0
1163; GFX12-NEXT:    v_max_num_f64_e64 v[2:3], s[0:1], s[0:1]
1164; GFX12-NEXT:    v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
1165; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1166; GFX12-NEXT:    v_max_num_f64_e32 v[0:1], v[2:3], v[0:1]
1167; GFX12-NEXT:    s_setpc_b64 s[30:31]
1168 %result = call double @llvm.maximumnum.f64(double %x, double %y)
1169  ret double %result
1170}
1171
1172define double @v_maximumnum_f64_v_s(double %x, double inreg %y) {
1173; GFX8-LABEL: v_maximumnum_f64_v_s:
1174; GFX8:       ; %bb.0:
1175; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1176; GFX8-NEXT:    v_max_f64 v[2:3], s[16:17], s[16:17]
1177; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
1178; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
1179; GFX8-NEXT:    s_setpc_b64 s[30:31]
1180;
1181; GFX9-LABEL: v_maximumnum_f64_v_s:
1182; GFX9:       ; %bb.0:
1183; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1184; GFX9-NEXT:    v_max_f64 v[2:3], s[16:17], s[16:17]
1185; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
1186; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
1187; GFX9-NEXT:    s_setpc_b64 s[30:31]
1188;
1189; GFX10-LABEL: v_maximumnum_f64_v_s:
1190; GFX10:       ; %bb.0:
1191; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1192; GFX10-NEXT:    v_max_f64 v[2:3], s[16:17], s[16:17]
1193; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
1194; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
1195; GFX10-NEXT:    s_setpc_b64 s[30:31]
1196;
1197; GFX11-LABEL: v_maximumnum_f64_v_s:
1198; GFX11:       ; %bb.0:
1199; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1200; GFX11-NEXT:    v_max_f64 v[2:3], s[0:1], s[0:1]
1201; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
1202; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1203; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
1204; GFX11-NEXT:    s_setpc_b64 s[30:31]
1205;
1206; GFX12-LABEL: v_maximumnum_f64_v_s:
1207; GFX12:       ; %bb.0:
1208; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1209; GFX12-NEXT:    s_wait_expcnt 0x0
1210; GFX12-NEXT:    s_wait_samplecnt 0x0
1211; GFX12-NEXT:    s_wait_bvhcnt 0x0
1212; GFX12-NEXT:    s_wait_kmcnt 0x0
1213; GFX12-NEXT:    v_max_num_f64_e64 v[2:3], s[0:1], s[0:1]
1214; GFX12-NEXT:    v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
1215; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1216; GFX12-NEXT:    v_max_num_f64_e32 v[0:1], v[0:1], v[2:3]
1217; GFX12-NEXT:    s_setpc_b64 s[30:31]
1218 %result = call double @llvm.maximumnum.f64(double %x, double %y)
1219  ret double %result
1220}
1221
1222define double @v_maximumnum_f64_s_s(double inreg %x, double inreg %y) {
1223; GFX8-LABEL: v_maximumnum_f64_s_s:
1224; GFX8:       ; %bb.0:
1225; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1226; GFX8-NEXT:    v_max_f64 v[0:1], s[18:19], s[18:19]
1227; GFX8-NEXT:    v_max_f64 v[2:3], s[16:17], s[16:17]
1228; GFX8-NEXT:    v_max_f64 v[0:1], v[2:3], v[0:1]
1229; GFX8-NEXT:    s_setpc_b64 s[30:31]
1230;
1231; GFX9-LABEL: v_maximumnum_f64_s_s:
1232; GFX9:       ; %bb.0:
1233; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1234; GFX9-NEXT:    v_max_f64 v[0:1], s[18:19], s[18:19]
1235; GFX9-NEXT:    v_max_f64 v[2:3], s[16:17], s[16:17]
1236; GFX9-NEXT:    v_max_f64 v[0:1], v[2:3], v[0:1]
1237; GFX9-NEXT:    s_setpc_b64 s[30:31]
1238;
1239; GFX10-LABEL: v_maximumnum_f64_s_s:
1240; GFX10:       ; %bb.0:
1241; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1242; GFX10-NEXT:    v_max_f64 v[0:1], s[18:19], s[18:19]
1243; GFX10-NEXT:    v_max_f64 v[2:3], s[16:17], s[16:17]
1244; GFX10-NEXT:    v_max_f64 v[0:1], v[2:3], v[0:1]
1245; GFX10-NEXT:    s_setpc_b64 s[30:31]
1246;
1247; GFX11-LABEL: v_maximumnum_f64_s_s:
1248; GFX11:       ; %bb.0:
1249; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1250; GFX11-NEXT:    v_max_f64 v[0:1], s[2:3], s[2:3]
1251; GFX11-NEXT:    v_max_f64 v[2:3], s[0:1], s[0:1]
1252; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1253; GFX11-NEXT:    v_max_f64 v[0:1], v[2:3], v[0:1]
1254; GFX11-NEXT:    s_setpc_b64 s[30:31]
1255;
1256; GFX12-LABEL: v_maximumnum_f64_s_s:
1257; GFX12:       ; %bb.0:
1258; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1259; GFX12-NEXT:    s_wait_expcnt 0x0
1260; GFX12-NEXT:    s_wait_samplecnt 0x0
1261; GFX12-NEXT:    s_wait_bvhcnt 0x0
1262; GFX12-NEXT:    s_wait_kmcnt 0x0
1263; GFX12-NEXT:    v_max_num_f64_e64 v[0:1], s[2:3], s[2:3]
1264; GFX12-NEXT:    v_max_num_f64_e64 v[2:3], s[0:1], s[0:1]
1265; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1266; GFX12-NEXT:    v_max_num_f64_e32 v[0:1], v[2:3], v[0:1]
1267; GFX12-NEXT:    s_setpc_b64 s[30:31]
1268 %result = call double @llvm.maximumnum.f64(double %x, double %y)
1269  ret double %result
1270}
1271
1272define float @v_maximumnum_f32_fabs_rhs(float %x, float %y) {
1273; GFX8-LABEL: v_maximumnum_f32_fabs_rhs:
1274; GFX8:       ; %bb.0:
1275; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1276; GFX8-NEXT:    v_mul_f32_e64 v1, 1.0, |v1|
1277; GFX8-NEXT:    v_mul_f32_e32 v0, 1.0, v0
1278; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
1279; GFX8-NEXT:    s_setpc_b64 s[30:31]
1280;
1281; GFX9-LABEL: v_maximumnum_f32_fabs_rhs:
1282; GFX9:       ; %bb.0:
1283; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1284; GFX9-NEXT:    v_max_f32_e64 v1, |v1|, |v1|
1285; GFX9-NEXT:    v_max_f32_e32 v0, v0, v0
1286; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
1287; GFX9-NEXT:    s_setpc_b64 s[30:31]
1288;
1289; GFX10-LABEL: v_maximumnum_f32_fabs_rhs:
1290; GFX10:       ; %bb.0:
1291; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1292; GFX10-NEXT:    v_max_f32_e64 v1, |v1|, |v1|
1293; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
1294; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
1295; GFX10-NEXT:    s_setpc_b64 s[30:31]
1296;
1297; GFX11-LABEL: v_maximumnum_f32_fabs_rhs:
1298; GFX11:       ; %bb.0:
1299; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1300; GFX11-NEXT:    v_max_f32_e64 v1, |v1|, |v1|
1301; GFX11-NEXT:    v_max_f32_e32 v0, v0, v0
1302; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1303; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
1304; GFX11-NEXT:    s_setpc_b64 s[30:31]
1305;
1306; GFX12-LABEL: v_maximumnum_f32_fabs_rhs:
1307; GFX12:       ; %bb.0:
1308; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1309; GFX12-NEXT:    s_wait_expcnt 0x0
1310; GFX12-NEXT:    s_wait_samplecnt 0x0
1311; GFX12-NEXT:    s_wait_bvhcnt 0x0
1312; GFX12-NEXT:    s_wait_kmcnt 0x0
1313; GFX12-NEXT:    v_max_num_f32_e64 v1, |v1|, |v1|
1314; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v0
1315; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1316; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v1
1317; GFX12-NEXT:    s_setpc_b64 s[30:31]
1318  %fabs.y = call float @llvm.fabs.f32(float %y)
1319  %result = call float @llvm.maximumnum.f32(float %x, float %fabs.y)
1320  ret float %result
1321}
1322
1323define float @v_maximumnum_f32_fneg_fabs_rhs(float %x, float %y) {
1324; GFX8-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
1325; GFX8:       ; %bb.0:
1326; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1327; GFX8-NEXT:    v_mul_f32_e64 v1, -1.0, |v1|
1328; GFX8-NEXT:    v_mul_f32_e32 v0, 1.0, v0
1329; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
1330; GFX8-NEXT:    s_setpc_b64 s[30:31]
1331;
1332; GFX9-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
1333; GFX9:       ; %bb.0:
1334; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1335; GFX9-NEXT:    v_max_f32_e64 v1, -|v1|, -|v1|
1336; GFX9-NEXT:    v_max_f32_e32 v0, v0, v0
1337; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
1338; GFX9-NEXT:    s_setpc_b64 s[30:31]
1339;
1340; GFX10-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
1341; GFX10:       ; %bb.0:
1342; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1343; GFX10-NEXT:    v_max_f32_e64 v1, -|v1|, -|v1|
1344; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
1345; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
1346; GFX10-NEXT:    s_setpc_b64 s[30:31]
1347;
1348; GFX11-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
1349; GFX11:       ; %bb.0:
1350; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1351; GFX11-NEXT:    v_max_f32_e64 v1, -|v1|, -|v1|
1352; GFX11-NEXT:    v_max_f32_e32 v0, v0, v0
1353; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1354; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
1355; GFX11-NEXT:    s_setpc_b64 s[30:31]
1356;
1357; GFX12-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
1358; GFX12:       ; %bb.0:
1359; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1360; GFX12-NEXT:    s_wait_expcnt 0x0
1361; GFX12-NEXT:    s_wait_samplecnt 0x0
1362; GFX12-NEXT:    s_wait_bvhcnt 0x0
1363; GFX12-NEXT:    s_wait_kmcnt 0x0
1364; GFX12-NEXT:    v_max_num_f32_e64 v1, -|v1|, -|v1|
1365; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v0
1366; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1367; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v1
1368; GFX12-NEXT:    s_setpc_b64 s[30:31]
1369  %fabs.y = call float @llvm.fabs.f32(float %y)
1370  %fneg.fabs.y = fneg float %fabs.y
1371  %result = call float @llvm.maximumnum.f32(float %x, float %fneg.fabs.y)
1372  ret float %result
1373}
1374
1375define float @v_maximumnum_f32_fabs(float %x, float %y) {
1376; GFX8-LABEL: v_maximumnum_f32_fabs:
1377; GFX8:       ; %bb.0:
1378; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1379; GFX8-NEXT:    v_mul_f32_e64 v1, 1.0, |v1|
1380; GFX8-NEXT:    v_mul_f32_e64 v0, 1.0, |v0|
1381; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
1382; GFX8-NEXT:    s_setpc_b64 s[30:31]
1383;
1384; GFX9-LABEL: v_maximumnum_f32_fabs:
1385; GFX9:       ; %bb.0:
1386; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1387; GFX9-NEXT:    v_max_f32_e64 v1, |v1|, |v1|
1388; GFX9-NEXT:    v_max_f32_e64 v0, |v0|, |v0|
1389; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
1390; GFX9-NEXT:    s_setpc_b64 s[30:31]
1391;
1392; GFX10-LABEL: v_maximumnum_f32_fabs:
1393; GFX10:       ; %bb.0:
1394; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1395; GFX10-NEXT:    v_max_f32_e64 v1, |v1|, |v1|
1396; GFX10-NEXT:    v_max_f32_e64 v0, |v0|, |v0|
1397; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
1398; GFX10-NEXT:    s_setpc_b64 s[30:31]
1399;
1400; GFX11-LABEL: v_maximumnum_f32_fabs:
1401; GFX11:       ; %bb.0:
1402; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1403; GFX11-NEXT:    v_max_f32_e64 v1, |v1|, |v1|
1404; GFX11-NEXT:    v_max_f32_e64 v0, |v0|, |v0|
1405; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1406; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
1407; GFX11-NEXT:    s_setpc_b64 s[30:31]
1408;
1409; GFX12-LABEL: v_maximumnum_f32_fabs:
1410; GFX12:       ; %bb.0:
1411; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1412; GFX12-NEXT:    s_wait_expcnt 0x0
1413; GFX12-NEXT:    s_wait_samplecnt 0x0
1414; GFX12-NEXT:    s_wait_bvhcnt 0x0
1415; GFX12-NEXT:    s_wait_kmcnt 0x0
1416; GFX12-NEXT:    v_max_num_f32_e64 v1, |v1|, |v1|
1417; GFX12-NEXT:    v_max_num_f32_e64 v0, |v0|, |v0|
1418; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1419; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v1
1420; GFX12-NEXT:    s_setpc_b64 s[30:31]
1421  %fabs.x = call float @llvm.fabs.f32(float %x)
1422  %fabs.y = call float @llvm.fabs.f32(float %y)
1423  %result = call float @llvm.maximumnum.f32(float %fabs.x, float %fabs.y)
1424  ret float %result
1425}
1426
1427define float @v_maximumnum_f32_fneg(float %x, float %y) {
1428; GFX8-LABEL: v_maximumnum_f32_fneg:
1429; GFX8:       ; %bb.0:
1430; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1431; GFX8-NEXT:    v_mul_f32_e32 v1, -1.0, v1
1432; GFX8-NEXT:    v_mul_f32_e32 v0, -1.0, v0
1433; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
1434; GFX8-NEXT:    s_setpc_b64 s[30:31]
1435;
1436; GFX9-LABEL: v_maximumnum_f32_fneg:
1437; GFX9:       ; %bb.0:
1438; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1439; GFX9-NEXT:    v_max_f32_e64 v1, -v1, -v1
1440; GFX9-NEXT:    v_max_f32_e64 v0, -v0, -v0
1441; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
1442; GFX9-NEXT:    s_setpc_b64 s[30:31]
1443;
1444; GFX10-LABEL: v_maximumnum_f32_fneg:
1445; GFX10:       ; %bb.0:
1446; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1447; GFX10-NEXT:    v_max_f32_e64 v1, -v1, -v1
1448; GFX10-NEXT:    v_max_f32_e64 v0, -v0, -v0
1449; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
1450; GFX10-NEXT:    s_setpc_b64 s[30:31]
1451;
1452; GFX11-LABEL: v_maximumnum_f32_fneg:
1453; GFX11:       ; %bb.0:
1454; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1455; GFX11-NEXT:    v_max_f32_e64 v1, -v1, -v1
1456; GFX11-NEXT:    v_max_f32_e64 v0, -v0, -v0
1457; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1458; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
1459; GFX11-NEXT:    s_setpc_b64 s[30:31]
1460;
1461; GFX12-LABEL: v_maximumnum_f32_fneg:
1462; GFX12:       ; %bb.0:
1463; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1464; GFX12-NEXT:    s_wait_expcnt 0x0
1465; GFX12-NEXT:    s_wait_samplecnt 0x0
1466; GFX12-NEXT:    s_wait_bvhcnt 0x0
1467; GFX12-NEXT:    s_wait_kmcnt 0x0
1468; GFX12-NEXT:    v_max_num_f32_e64 v1, -v1, -v1
1469; GFX12-NEXT:    v_max_num_f32_e64 v0, -v0, -v0
1470; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1471; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v1
1472; GFX12-NEXT:    s_setpc_b64 s[30:31]
1473  %fneg.x = fneg float %x
1474  %fneg.y = fneg float %y
1475  %result = call float @llvm.maximumnum.f32(float %fneg.x, float %fneg.y)
1476  ret float %result
1477}
1478
1479define half @v_maximumnum_f16_fabs_rhs(half %x, half %y) {
1480; GFX8-LABEL: v_maximumnum_f16_fabs_rhs:
1481; GFX8:       ; %bb.0:
1482; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1483; GFX8-NEXT:    v_max_f16_e64 v1, |v1|, |v1|
1484; GFX8-NEXT:    v_max_f16_e32 v0, v0, v0
1485; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
1486; GFX8-NEXT:    s_setpc_b64 s[30:31]
1487;
1488; GFX9-LABEL: v_maximumnum_f16_fabs_rhs:
1489; GFX9:       ; %bb.0:
1490; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1491; GFX9-NEXT:    v_max_f16_e64 v1, |v1|, |v1|
1492; GFX9-NEXT:    v_max_f16_e32 v0, v0, v0
1493; GFX9-NEXT:    v_max_f16_e32 v0, v0, v1
1494; GFX9-NEXT:    s_setpc_b64 s[30:31]
1495;
1496; GFX10-LABEL: v_maximumnum_f16_fabs_rhs:
1497; GFX10:       ; %bb.0:
1498; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1499; GFX10-NEXT:    v_max_f16_e64 v1, |v1|, |v1|
1500; GFX10-NEXT:    v_max_f16_e32 v0, v0, v0
1501; GFX10-NEXT:    v_max_f16_e32 v0, v0, v1
1502; GFX10-NEXT:    s_setpc_b64 s[30:31]
1503;
1504; GFX11-LABEL: v_maximumnum_f16_fabs_rhs:
1505; GFX11:       ; %bb.0:
1506; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1507; GFX11-NEXT:    v_max_f16_e64 v1, |v1|, |v1|
1508; GFX11-NEXT:    v_max_f16_e32 v0, v0, v0
1509; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1510; GFX11-NEXT:    v_max_f16_e32 v0, v0, v1
1511; GFX11-NEXT:    s_setpc_b64 s[30:31]
1512;
1513; GFX12-LABEL: v_maximumnum_f16_fabs_rhs:
1514; GFX12:       ; %bb.0:
1515; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1516; GFX12-NEXT:    s_wait_expcnt 0x0
1517; GFX12-NEXT:    s_wait_samplecnt 0x0
1518; GFX12-NEXT:    s_wait_bvhcnt 0x0
1519; GFX12-NEXT:    s_wait_kmcnt 0x0
1520; GFX12-NEXT:    v_max_num_f16_e64 v1, |v1|, |v1|
1521; GFX12-NEXT:    v_max_num_f16_e32 v0, v0, v0
1522; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1523; GFX12-NEXT:    v_max_num_f16_e32 v0, v0, v1
1524; GFX12-NEXT:    s_setpc_b64 s[30:31]
1525  %fabs.y = call half @llvm.fabs.f16(half %y)
1526  %result = call half @llvm.maximumnum.f16(half %x, half %fabs.y)
1527  ret half %result
1528}
1529
1530define half @v_maximumnum_f16_fneg_fabs_rhs(half %x, half %y) {
1531; GFX8-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
1532; GFX8:       ; %bb.0:
1533; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1534; GFX8-NEXT:    v_max_f16_e64 v1, -|v1|, -|v1|
1535; GFX8-NEXT:    v_max_f16_e32 v0, v0, v0
1536; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
1537; GFX8-NEXT:    s_setpc_b64 s[30:31]
1538;
1539; GFX9-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
1540; GFX9:       ; %bb.0:
1541; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1542; GFX9-NEXT:    v_max_f16_e64 v1, -|v1|, -|v1|
1543; GFX9-NEXT:    v_max_f16_e32 v0, v0, v0
1544; GFX9-NEXT:    v_max_f16_e32 v0, v0, v1
1545; GFX9-NEXT:    s_setpc_b64 s[30:31]
1546;
1547; GFX10-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
1548; GFX10:       ; %bb.0:
1549; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1550; GFX10-NEXT:    v_max_f16_e64 v1, -|v1|, -|v1|
1551; GFX10-NEXT:    v_max_f16_e32 v0, v0, v0
1552; GFX10-NEXT:    v_max_f16_e32 v0, v0, v1
1553; GFX10-NEXT:    s_setpc_b64 s[30:31]
1554;
1555; GFX11-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
1556; GFX11:       ; %bb.0:
1557; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1558; GFX11-NEXT:    v_max_f16_e64 v1, -|v1|, -|v1|
1559; GFX11-NEXT:    v_max_f16_e32 v0, v0, v0
1560; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1561; GFX11-NEXT:    v_max_f16_e32 v0, v0, v1
1562; GFX11-NEXT:    s_setpc_b64 s[30:31]
1563;
1564; GFX12-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
1565; GFX12:       ; %bb.0:
1566; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1567; GFX12-NEXT:    s_wait_expcnt 0x0
1568; GFX12-NEXT:    s_wait_samplecnt 0x0
1569; GFX12-NEXT:    s_wait_bvhcnt 0x0
1570; GFX12-NEXT:    s_wait_kmcnt 0x0
1571; GFX12-NEXT:    v_max_num_f16_e64 v1, -|v1|, -|v1|
1572; GFX12-NEXT:    v_max_num_f16_e32 v0, v0, v0
1573; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1574; GFX12-NEXT:    v_max_num_f16_e32 v0, v0, v1
1575; GFX12-NEXT:    s_setpc_b64 s[30:31]
1576  %fabs.y = call half @llvm.fabs.f16(half %y)
1577  %fneg.fabs.y = fneg half %fabs.y
1578  %result = call half @llvm.maximumnum.f16(half %x, half %fneg.fabs.y)
1579  ret half %result
1580}
1581
1582define half @v_maximumnum_f16_fabs(half %x, half %y) {
1583; GFX8-LABEL: v_maximumnum_f16_fabs:
1584; GFX8:       ; %bb.0:
1585; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1586; GFX8-NEXT:    v_max_f16_e64 v1, |v1|, |v1|
1587; GFX8-NEXT:    v_max_f16_e64 v0, |v0|, |v0|
1588; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
1589; GFX8-NEXT:    s_setpc_b64 s[30:31]
1590;
1591; GFX9-LABEL: v_maximumnum_f16_fabs:
1592; GFX9:       ; %bb.0:
1593; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1594; GFX9-NEXT:    v_max_f16_e64 v1, |v1|, |v1|
1595; GFX9-NEXT:    v_max_f16_e64 v0, |v0|, |v0|
1596; GFX9-NEXT:    v_max_f16_e32 v0, v0, v1
1597; GFX9-NEXT:    s_setpc_b64 s[30:31]
1598;
1599; GFX10-LABEL: v_maximumnum_f16_fabs:
1600; GFX10:       ; %bb.0:
1601; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1602; GFX10-NEXT:    v_max_f16_e64 v1, |v1|, |v1|
1603; GFX10-NEXT:    v_max_f16_e64 v0, |v0|, |v0|
1604; GFX10-NEXT:    v_max_f16_e32 v0, v0, v1
1605; GFX10-NEXT:    s_setpc_b64 s[30:31]
1606;
1607; GFX11-LABEL: v_maximumnum_f16_fabs:
1608; GFX11:       ; %bb.0:
1609; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1610; GFX11-NEXT:    v_max_f16_e64 v1, |v1|, |v1|
1611; GFX11-NEXT:    v_max_f16_e64 v0, |v0|, |v0|
1612; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1613; GFX11-NEXT:    v_max_f16_e32 v0, v0, v1
1614; GFX11-NEXT:    s_setpc_b64 s[30:31]
1615;
1616; GFX12-LABEL: v_maximumnum_f16_fabs:
1617; GFX12:       ; %bb.0:
1618; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1619; GFX12-NEXT:    s_wait_expcnt 0x0
1620; GFX12-NEXT:    s_wait_samplecnt 0x0
1621; GFX12-NEXT:    s_wait_bvhcnt 0x0
1622; GFX12-NEXT:    s_wait_kmcnt 0x0
1623; GFX12-NEXT:    v_max_num_f16_e64 v1, |v1|, |v1|
1624; GFX12-NEXT:    v_max_num_f16_e64 v0, |v0|, |v0|
1625; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1626; GFX12-NEXT:    v_max_num_f16_e32 v0, v0, v1
1627; GFX12-NEXT:    s_setpc_b64 s[30:31]
1628  %fabs.x = call half @llvm.fabs.f16(half %x)
1629  %fabs.y = call half @llvm.fabs.f16(half %y)
1630  %result = call half @llvm.maximumnum.f16(half %fabs.x, half %fabs.y)
1631  ret half %result
1632}
1633
1634define half @v_maximumnum_f16_fneg(half %x, half %y) {
1635; GFX8-LABEL: v_maximumnum_f16_fneg:
1636; GFX8:       ; %bb.0:
1637; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1638; GFX8-NEXT:    v_max_f16_e64 v1, -v1, -v1
1639; GFX8-NEXT:    v_max_f16_e64 v0, -v0, -v0
1640; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
1641; GFX8-NEXT:    s_setpc_b64 s[30:31]
1642;
1643; GFX9-LABEL: v_maximumnum_f16_fneg:
1644; GFX9:       ; %bb.0:
1645; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1646; GFX9-NEXT:    v_max_f16_e64 v1, -v1, -v1
1647; GFX9-NEXT:    v_max_f16_e64 v0, -v0, -v0
1648; GFX9-NEXT:    v_max_f16_e32 v0, v0, v1
1649; GFX9-NEXT:    s_setpc_b64 s[30:31]
1650;
1651; GFX10-LABEL: v_maximumnum_f16_fneg:
1652; GFX10:       ; %bb.0:
1653; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1654; GFX10-NEXT:    v_max_f16_e64 v1, -v1, -v1
1655; GFX10-NEXT:    v_max_f16_e64 v0, -v0, -v0
1656; GFX10-NEXT:    v_max_f16_e32 v0, v0, v1
1657; GFX10-NEXT:    s_setpc_b64 s[30:31]
1658;
1659; GFX11-LABEL: v_maximumnum_f16_fneg:
1660; GFX11:       ; %bb.0:
1661; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1662; GFX11-NEXT:    v_max_f16_e64 v1, -v1, -v1
1663; GFX11-NEXT:    v_max_f16_e64 v0, -v0, -v0
1664; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1665; GFX11-NEXT:    v_max_f16_e32 v0, v0, v1
1666; GFX11-NEXT:    s_setpc_b64 s[30:31]
1667;
1668; GFX12-LABEL: v_maximumnum_f16_fneg:
1669; GFX12:       ; %bb.0:
1670; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1671; GFX12-NEXT:    s_wait_expcnt 0x0
1672; GFX12-NEXT:    s_wait_samplecnt 0x0
1673; GFX12-NEXT:    s_wait_bvhcnt 0x0
1674; GFX12-NEXT:    s_wait_kmcnt 0x0
1675; GFX12-NEXT:    v_max_num_f16_e64 v1, -v1, -v1
1676; GFX12-NEXT:    v_max_num_f16_e64 v0, -v0, -v0
1677; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1678; GFX12-NEXT:    v_max_num_f16_e32 v0, v0, v1
1679; GFX12-NEXT:    s_setpc_b64 s[30:31]
1680  %fneg.x = fneg half %x
1681  %fneg.y = fneg half %y
1682  %result = call half @llvm.maximumnum.f16(half %fneg.x, half %fneg.y)
1683  ret half %result
1684}
1685
1686define double @v_maximumnum_f64_fneg(double %x, double %y) {
1687; GFX8-LABEL: v_maximumnum_f64_fneg:
1688; GFX8:       ; %bb.0:
1689; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1690; GFX8-NEXT:    v_max_f64 v[2:3], -v[2:3], -v[2:3]
1691; GFX8-NEXT:    v_max_f64 v[0:1], -v[0:1], -v[0:1]
1692; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
1693; GFX8-NEXT:    s_setpc_b64 s[30:31]
1694;
1695; GFX9-LABEL: v_maximumnum_f64_fneg:
1696; GFX9:       ; %bb.0:
1697; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1698; GFX9-NEXT:    v_max_f64 v[2:3], -v[2:3], -v[2:3]
1699; GFX9-NEXT:    v_max_f64 v[0:1], -v[0:1], -v[0:1]
1700; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
1701; GFX9-NEXT:    s_setpc_b64 s[30:31]
1702;
1703; GFX10-LABEL: v_maximumnum_f64_fneg:
1704; GFX10:       ; %bb.0:
1705; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1706; GFX10-NEXT:    v_max_f64 v[2:3], -v[2:3], -v[2:3]
1707; GFX10-NEXT:    v_max_f64 v[0:1], -v[0:1], -v[0:1]
1708; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
1709; GFX10-NEXT:    s_setpc_b64 s[30:31]
1710;
1711; GFX11-LABEL: v_maximumnum_f64_fneg:
1712; GFX11:       ; %bb.0:
1713; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1714; GFX11-NEXT:    v_max_f64 v[2:3], -v[2:3], -v[2:3]
1715; GFX11-NEXT:    v_max_f64 v[0:1], -v[0:1], -v[0:1]
1716; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1717; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
1718; GFX11-NEXT:    s_setpc_b64 s[30:31]
1719;
1720; GFX12-LABEL: v_maximumnum_f64_fneg:
1721; GFX12:       ; %bb.0:
1722; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1723; GFX12-NEXT:    s_wait_expcnt 0x0
1724; GFX12-NEXT:    s_wait_samplecnt 0x0
1725; GFX12-NEXT:    s_wait_bvhcnt 0x0
1726; GFX12-NEXT:    s_wait_kmcnt 0x0
1727; GFX12-NEXT:    v_max_num_f64_e64 v[2:3], -v[2:3], -v[2:3]
1728; GFX12-NEXT:    v_max_num_f64_e64 v[0:1], -v[0:1], -v[0:1]
1729; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1730; GFX12-NEXT:    v_max_num_f64_e32 v[0:1], v[0:1], v[2:3]
1731; GFX12-NEXT:    s_setpc_b64 s[30:31]
1732  %fneg.x = fneg double %x
1733  %fneg.y = fneg double %y
1734  %result = call double @llvm.maximumnum.f64(double %fneg.x, double %fneg.y)
1735  ret double %result
1736}
1737
1738define <2 x half> @v_maximumnum_v2f16(<2 x half> %x, <2 x half> %y) {
1739; GFX8-LABEL: v_maximumnum_v2f16:
1740; GFX8:       ; %bb.0:
1741; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1742; GFX8-NEXT:    v_max_f16_sdwa v2, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1743; GFX8-NEXT:    v_max_f16_sdwa v3, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1744; GFX8-NEXT:    v_max_f16_e32 v1, v1, v1
1745; GFX8-NEXT:    v_max_f16_e32 v0, v0, v0
1746; GFX8-NEXT:    v_max_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
1747; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
1748; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
1749; GFX8-NEXT:    s_setpc_b64 s[30:31]
1750;
1751; GFX9-LABEL: v_maximumnum_v2f16:
1752; GFX9:       ; %bb.0:
1753; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1754; GFX9-NEXT:    v_pk_max_f16 v1, v1, v1
1755; GFX9-NEXT:    v_pk_max_f16 v0, v0, v0
1756; GFX9-NEXT:    v_pk_max_f16 v0, v0, v1
1757; GFX9-NEXT:    s_setpc_b64 s[30:31]
1758;
1759; GFX10-LABEL: v_maximumnum_v2f16:
1760; GFX10:       ; %bb.0:
1761; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1762; GFX10-NEXT:    v_pk_max_f16 v1, v1, v1
1763; GFX10-NEXT:    v_pk_max_f16 v0, v0, v0
1764; GFX10-NEXT:    v_pk_max_f16 v0, v0, v1
1765; GFX10-NEXT:    s_setpc_b64 s[30:31]
1766;
1767; GFX11-LABEL: v_maximumnum_v2f16:
1768; GFX11:       ; %bb.0:
1769; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1770; GFX11-NEXT:    v_pk_max_f16 v1, v1, v1
1771; GFX11-NEXT:    v_pk_max_f16 v0, v0, v0
1772; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1773; GFX11-NEXT:    v_pk_max_f16 v0, v0, v1
1774; GFX11-NEXT:    s_setpc_b64 s[30:31]
1775;
1776; GFX12-LABEL: v_maximumnum_v2f16:
1777; GFX12:       ; %bb.0:
1778; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1779; GFX12-NEXT:    s_wait_expcnt 0x0
1780; GFX12-NEXT:    s_wait_samplecnt 0x0
1781; GFX12-NEXT:    s_wait_bvhcnt 0x0
1782; GFX12-NEXT:    s_wait_kmcnt 0x0
1783; GFX12-NEXT:    v_pk_max_num_f16 v1, v1, v1
1784; GFX12-NEXT:    v_pk_max_num_f16 v0, v0, v0
1785; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1786; GFX12-NEXT:    v_pk_max_num_f16 v0, v0, v1
1787; GFX12-NEXT:    s_setpc_b64 s[30:31]
1788  %result = call <2 x half> @llvm.maximumnum.v2f16(<2 x half> %x, <2 x half> %y)
1789  ret <2 x half> %result
1790}
1791
1792define <2 x half> @v_maximumnum_v2f16_nnan(<2 x half> %x, <2 x half> %y) {
1793; GFX8-LABEL: v_maximumnum_v2f16_nnan:
1794; GFX8:       ; %bb.0:
1795; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1796; GFX8-NEXT:    v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1797; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
1798; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
1799; GFX8-NEXT:    s_setpc_b64 s[30:31]
1800;
1801; GFX9-LABEL: v_maximumnum_v2f16_nnan:
1802; GFX9:       ; %bb.0:
1803; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1804; GFX9-NEXT:    v_pk_max_f16 v0, v0, v1
1805; GFX9-NEXT:    s_setpc_b64 s[30:31]
1806;
1807; GFX10-LABEL: v_maximumnum_v2f16_nnan:
1808; GFX10:       ; %bb.0:
1809; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1810; GFX10-NEXT:    v_pk_max_f16 v0, v0, v1
1811; GFX10-NEXT:    s_setpc_b64 s[30:31]
1812;
1813; GFX11-LABEL: v_maximumnum_v2f16_nnan:
1814; GFX11:       ; %bb.0:
1815; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1816; GFX11-NEXT:    v_pk_max_f16 v0, v0, v1
1817; GFX11-NEXT:    s_setpc_b64 s[30:31]
1818;
1819; GFX12-LABEL: v_maximumnum_v2f16_nnan:
1820; GFX12:       ; %bb.0:
1821; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1822; GFX12-NEXT:    s_wait_expcnt 0x0
1823; GFX12-NEXT:    s_wait_samplecnt 0x0
1824; GFX12-NEXT:    s_wait_bvhcnt 0x0
1825; GFX12-NEXT:    s_wait_kmcnt 0x0
1826; GFX12-NEXT:    v_pk_max_num_f16 v0, v0, v1
1827; GFX12-NEXT:    s_setpc_b64 s[30:31]
1828  %result = call nnan <2 x half> @llvm.maximumnum.v2f16(<2 x half> %x, <2 x half> %y)
1829  ret <2 x half> %result
1830}
1831
1832define <3 x half> @v_maximumnum_v3f16(<3 x half> %x, <3 x half> %y) {
1833; GFX8-LABEL: v_maximumnum_v3f16:
1834; GFX8:       ; %bb.0:
1835; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1836; GFX8-NEXT:    v_max_f16_sdwa v4, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1837; GFX8-NEXT:    v_max_f16_sdwa v5, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1838; GFX8-NEXT:    v_max_f16_e32 v2, v2, v2
1839; GFX8-NEXT:    v_max_f16_e32 v0, v0, v0
1840; GFX8-NEXT:    v_max_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
1841; GFX8-NEXT:    v_max_f16_e32 v3, v3, v3
1842; GFX8-NEXT:    v_max_f16_e32 v1, v1, v1
1843; GFX8-NEXT:    v_max_f16_e32 v0, v0, v2
1844; GFX8-NEXT:    v_max_f16_e32 v1, v1, v3
1845; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
1846; GFX8-NEXT:    s_setpc_b64 s[30:31]
1847;
1848; GFX9-LABEL: v_maximumnum_v3f16:
1849; GFX9:       ; %bb.0:
1850; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1851; GFX9-NEXT:    v_pk_max_f16 v2, v2, v2
1852; GFX9-NEXT:    v_pk_max_f16 v0, v0, v0
1853; GFX9-NEXT:    v_pk_max_f16 v0, v0, v2
1854; GFX9-NEXT:    v_pk_max_f16 v2, v3, v3
1855; GFX9-NEXT:    v_pk_max_f16 v1, v1, v1
1856; GFX9-NEXT:    v_pk_max_f16 v1, v1, v2
1857; GFX9-NEXT:    s_setpc_b64 s[30:31]
1858;
1859; GFX10-LABEL: v_maximumnum_v3f16:
1860; GFX10:       ; %bb.0:
1861; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1862; GFX10-NEXT:    v_pk_max_f16 v2, v2, v2
1863; GFX10-NEXT:    v_pk_max_f16 v0, v0, v0
1864; GFX10-NEXT:    v_pk_max_f16 v3, v3, v3
1865; GFX10-NEXT:    v_pk_max_f16 v1, v1, v1
1866; GFX10-NEXT:    v_pk_max_f16 v0, v0, v2
1867; GFX10-NEXT:    v_pk_max_f16 v1, v1, v3
1868; GFX10-NEXT:    s_setpc_b64 s[30:31]
1869;
1870; GFX11-LABEL: v_maximumnum_v3f16:
1871; GFX11:       ; %bb.0:
1872; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1873; GFX11-NEXT:    v_pk_max_f16 v2, v2, v2
1874; GFX11-NEXT:    v_pk_max_f16 v0, v0, v0
1875; GFX11-NEXT:    v_pk_max_f16 v3, v3, v3
1876; GFX11-NEXT:    v_pk_max_f16 v1, v1, v1
1877; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
1878; GFX11-NEXT:    v_pk_max_f16 v0, v0, v2
1879; GFX11-NEXT:    v_pk_max_f16 v1, v1, v3
1880; GFX11-NEXT:    s_setpc_b64 s[30:31]
1881;
1882; GFX12-LABEL: v_maximumnum_v3f16:
1883; GFX12:       ; %bb.0:
1884; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1885; GFX12-NEXT:    s_wait_expcnt 0x0
1886; GFX12-NEXT:    s_wait_samplecnt 0x0
1887; GFX12-NEXT:    s_wait_bvhcnt 0x0
1888; GFX12-NEXT:    s_wait_kmcnt 0x0
1889; GFX12-NEXT:    v_pk_max_num_f16 v2, v2, v2
1890; GFX12-NEXT:    v_pk_max_num_f16 v0, v0, v0
1891; GFX12-NEXT:    v_pk_max_num_f16 v3, v3, v3
1892; GFX12-NEXT:    v_pk_max_num_f16 v1, v1, v1
1893; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
1894; GFX12-NEXT:    v_pk_max_num_f16 v0, v0, v2
1895; GFX12-NEXT:    v_pk_max_num_f16 v1, v1, v3
1896; GFX12-NEXT:    s_setpc_b64 s[30:31]
1897  %result = call <3 x half> @llvm.maximumnum.v3f16(<3 x half> %x, <3 x half> %y)
1898  ret <3 x half> %result
1899}
1900
1901define <3 x half> @v_maximumnum_v3f16_nnan(<3 x half> %x, <3 x half> %y) {
1902; GFX8-LABEL: v_maximumnum_v3f16_nnan:
1903; GFX8:       ; %bb.0:
1904; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1905; GFX8-NEXT:    v_max_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1906; GFX8-NEXT:    v_max_f16_e32 v0, v0, v2
1907; GFX8-NEXT:    v_max_f16_e32 v1, v1, v3
1908; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
1909; GFX8-NEXT:    s_setpc_b64 s[30:31]
1910;
1911; GFX9-LABEL: v_maximumnum_v3f16_nnan:
1912; GFX9:       ; %bb.0:
1913; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1914; GFX9-NEXT:    v_pk_max_f16 v0, v0, v2
1915; GFX9-NEXT:    v_pk_max_f16 v1, v1, v3
1916; GFX9-NEXT:    s_setpc_b64 s[30:31]
1917;
1918; GFX10-LABEL: v_maximumnum_v3f16_nnan:
1919; GFX10:       ; %bb.0:
1920; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1921; GFX10-NEXT:    v_pk_max_f16 v0, v0, v2
1922; GFX10-NEXT:    v_pk_max_f16 v1, v1, v3
1923; GFX10-NEXT:    s_setpc_b64 s[30:31]
1924;
1925; GFX11-LABEL: v_maximumnum_v3f16_nnan:
1926; GFX11:       ; %bb.0:
1927; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1928; GFX11-NEXT:    v_pk_max_f16 v0, v0, v2
1929; GFX11-NEXT:    v_pk_max_f16 v1, v1, v3
1930; GFX11-NEXT:    s_setpc_b64 s[30:31]
1931;
1932; GFX12-LABEL: v_maximumnum_v3f16_nnan:
1933; GFX12:       ; %bb.0:
1934; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1935; GFX12-NEXT:    s_wait_expcnt 0x0
1936; GFX12-NEXT:    s_wait_samplecnt 0x0
1937; GFX12-NEXT:    s_wait_bvhcnt 0x0
1938; GFX12-NEXT:    s_wait_kmcnt 0x0
1939; GFX12-NEXT:    v_pk_max_num_f16 v0, v0, v2
1940; GFX12-NEXT:    v_pk_max_num_f16 v1, v1, v3
1941; GFX12-NEXT:    s_setpc_b64 s[30:31]
1942  %result = call nnan <3 x half> @llvm.maximumnum.v3f16(<3 x half> %x, <3 x half> %y)
1943  ret <3 x half> %result
1944}
1945
1946define <4 x half> @v_maximumnum_v4f16(<4 x half> %x, <4 x half> %y) {
1947; GFX8-LABEL: v_maximumnum_v4f16:
1948; GFX8:       ; %bb.0:
1949; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1950; GFX8-NEXT:    v_max_f16_sdwa v4, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1951; GFX8-NEXT:    v_max_f16_sdwa v5, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1952; GFX8-NEXT:    v_max_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
1953; GFX8-NEXT:    v_max_f16_sdwa v5, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1954; GFX8-NEXT:    v_max_f16_sdwa v6, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1955; GFX8-NEXT:    v_max_f16_e32 v3, v3, v3
1956; GFX8-NEXT:    v_max_f16_e32 v1, v1, v1
1957; GFX8-NEXT:    v_max_f16_e32 v2, v2, v2
1958; GFX8-NEXT:    v_max_f16_e32 v0, v0, v0
1959; GFX8-NEXT:    v_max_f16_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
1960; GFX8-NEXT:    v_max_f16_e32 v1, v1, v3
1961; GFX8-NEXT:    v_max_f16_e32 v0, v0, v2
1962; GFX8-NEXT:    v_or_b32_e32 v0, v0, v5
1963; GFX8-NEXT:    v_or_b32_e32 v1, v1, v4
1964; GFX8-NEXT:    s_setpc_b64 s[30:31]
1965;
1966; GFX9-LABEL: v_maximumnum_v4f16:
1967; GFX9:       ; %bb.0:
1968; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1969; GFX9-NEXT:    v_pk_max_f16 v2, v2, v2
1970; GFX9-NEXT:    v_pk_max_f16 v0, v0, v0
1971; GFX9-NEXT:    v_pk_max_f16 v0, v0, v2
1972; GFX9-NEXT:    v_pk_max_f16 v2, v3, v3
1973; GFX9-NEXT:    v_pk_max_f16 v1, v1, v1
1974; GFX9-NEXT:    v_pk_max_f16 v1, v1, v2
1975; GFX9-NEXT:    s_setpc_b64 s[30:31]
1976;
1977; GFX10-LABEL: v_maximumnum_v4f16:
1978; GFX10:       ; %bb.0:
1979; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1980; GFX10-NEXT:    v_pk_max_f16 v2, v2, v2
1981; GFX10-NEXT:    v_pk_max_f16 v0, v0, v0
1982; GFX10-NEXT:    v_pk_max_f16 v3, v3, v3
1983; GFX10-NEXT:    v_pk_max_f16 v1, v1, v1
1984; GFX10-NEXT:    v_pk_max_f16 v0, v0, v2
1985; GFX10-NEXT:    v_pk_max_f16 v1, v1, v3
1986; GFX10-NEXT:    s_setpc_b64 s[30:31]
1987;
1988; GFX11-LABEL: v_maximumnum_v4f16:
1989; GFX11:       ; %bb.0:
1990; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1991; GFX11-NEXT:    v_pk_max_f16 v2, v2, v2
1992; GFX11-NEXT:    v_pk_max_f16 v0, v0, v0
1993; GFX11-NEXT:    v_pk_max_f16 v3, v3, v3
1994; GFX11-NEXT:    v_pk_max_f16 v1, v1, v1
1995; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
1996; GFX11-NEXT:    v_pk_max_f16 v0, v0, v2
1997; GFX11-NEXT:    v_pk_max_f16 v1, v1, v3
1998; GFX11-NEXT:    s_setpc_b64 s[30:31]
1999;
2000; GFX12-LABEL: v_maximumnum_v4f16:
2001; GFX12:       ; %bb.0:
2002; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2003; GFX12-NEXT:    s_wait_expcnt 0x0
2004; GFX12-NEXT:    s_wait_samplecnt 0x0
2005; GFX12-NEXT:    s_wait_bvhcnt 0x0
2006; GFX12-NEXT:    s_wait_kmcnt 0x0
2007; GFX12-NEXT:    v_pk_max_num_f16 v2, v2, v2
2008; GFX12-NEXT:    v_pk_max_num_f16 v0, v0, v0
2009; GFX12-NEXT:    v_pk_max_num_f16 v3, v3, v3
2010; GFX12-NEXT:    v_pk_max_num_f16 v1, v1, v1
2011; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
2012; GFX12-NEXT:    v_pk_max_num_f16 v0, v0, v2
2013; GFX12-NEXT:    v_pk_max_num_f16 v1, v1, v3
2014; GFX12-NEXT:    s_setpc_b64 s[30:31]
2015  %result = call <4 x half> @llvm.maximumnum.v4f16(<4 x half> %x, <4 x half> %y)
2016  ret <4 x half> %result
2017}
2018
2019define <4 x half> @v_maximumnum_v4f16_nnan(<4 x half> %x, <4 x half> %y) {
2020; GFX8-LABEL: v_maximumnum_v4f16_nnan:
2021; GFX8:       ; %bb.0:
2022; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2023; GFX8-NEXT:    v_max_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
2024; GFX8-NEXT:    v_max_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
2025; GFX8-NEXT:    v_max_f16_e32 v1, v1, v3
2026; GFX8-NEXT:    v_max_f16_e32 v0, v0, v2
2027; GFX8-NEXT:    v_or_b32_e32 v0, v0, v5
2028; GFX8-NEXT:    v_or_b32_e32 v1, v1, v4
2029; GFX8-NEXT:    s_setpc_b64 s[30:31]
2030;
2031; GFX9-LABEL: v_maximumnum_v4f16_nnan:
2032; GFX9:       ; %bb.0:
2033; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2034; GFX9-NEXT:    v_pk_max_f16 v0, v0, v2
2035; GFX9-NEXT:    v_pk_max_f16 v1, v1, v3
2036; GFX9-NEXT:    s_setpc_b64 s[30:31]
2037;
2038; GFX10-LABEL: v_maximumnum_v4f16_nnan:
2039; GFX10:       ; %bb.0:
2040; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2041; GFX10-NEXT:    v_pk_max_f16 v0, v0, v2
2042; GFX10-NEXT:    v_pk_max_f16 v1, v1, v3
2043; GFX10-NEXT:    s_setpc_b64 s[30:31]
2044;
2045; GFX11-LABEL: v_maximumnum_v4f16_nnan:
2046; GFX11:       ; %bb.0:
2047; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2048; GFX11-NEXT:    v_pk_max_f16 v0, v0, v2
2049; GFX11-NEXT:    v_pk_max_f16 v1, v1, v3
2050; GFX11-NEXT:    s_setpc_b64 s[30:31]
2051;
2052; GFX12-LABEL: v_maximumnum_v4f16_nnan:
2053; GFX12:       ; %bb.0:
2054; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2055; GFX12-NEXT:    s_wait_expcnt 0x0
2056; GFX12-NEXT:    s_wait_samplecnt 0x0
2057; GFX12-NEXT:    s_wait_bvhcnt 0x0
2058; GFX12-NEXT:    s_wait_kmcnt 0x0
2059; GFX12-NEXT:    v_pk_max_num_f16 v0, v0, v2
2060; GFX12-NEXT:    v_pk_max_num_f16 v1, v1, v3
2061; GFX12-NEXT:    s_setpc_b64 s[30:31]
2062  %result = call nnan <4 x half> @llvm.maximumnum.v4f16(<4 x half> %x, <4 x half> %y)
2063  ret <4 x half> %result
2064}
2065
2066define <6 x half> @v_maximumnum_v6f16(<6 x half> %x, <6 x half> %y) {
2067; GFX8-LABEL: v_maximumnum_v6f16:
2068; GFX8:       ; %bb.0:
2069; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2070; GFX8-NEXT:    v_max_f16_sdwa v6, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
2071; GFX8-NEXT:    v_max_f16_sdwa v7, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
2072; GFX8-NEXT:    v_max_f16_sdwa v6, v7, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
2073; GFX8-NEXT:    v_max_f16_sdwa v7, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
2074; GFX8-NEXT:    v_max_f16_sdwa v8, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
2075; GFX8-NEXT:    v_max_f16_sdwa v7, v8, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
2076; GFX8-NEXT:    v_max_f16_sdwa v8, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
2077; GFX8-NEXT:    v_max_f16_sdwa v9, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
2078; GFX8-NEXT:    v_max_f16_e32 v5, v5, v5
2079; GFX8-NEXT:    v_max_f16_e32 v2, v2, v2
2080; GFX8-NEXT:    v_max_f16_e32 v4, v4, v4
2081; GFX8-NEXT:    v_max_f16_e32 v1, v1, v1
2082; GFX8-NEXT:    v_max_f16_e32 v3, v3, v3
2083; GFX8-NEXT:    v_max_f16_e32 v0, v0, v0
2084; GFX8-NEXT:    v_max_f16_sdwa v8, v9, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
2085; GFX8-NEXT:    v_max_f16_e32 v2, v2, v5
2086; GFX8-NEXT:    v_max_f16_e32 v1, v1, v4
2087; GFX8-NEXT:    v_max_f16_e32 v0, v0, v3
2088; GFX8-NEXT:    v_or_b32_e32 v0, v0, v8
2089; GFX8-NEXT:    v_or_b32_e32 v1, v1, v7
2090; GFX8-NEXT:    v_or_b32_e32 v2, v2, v6
2091; GFX8-NEXT:    s_setpc_b64 s[30:31]
2092;
2093; GFX9-LABEL: v_maximumnum_v6f16:
2094; GFX9:       ; %bb.0:
2095; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2096; GFX9-NEXT:    v_pk_max_f16 v3, v3, v3
2097; GFX9-NEXT:    v_pk_max_f16 v0, v0, v0
2098; GFX9-NEXT:    v_pk_max_f16 v0, v0, v3
2099; GFX9-NEXT:    v_pk_max_f16 v3, v4, v4
2100; GFX9-NEXT:    v_pk_max_f16 v1, v1, v1
2101; GFX9-NEXT:    v_pk_max_f16 v1, v1, v3
2102; GFX9-NEXT:    v_pk_max_f16 v3, v5, v5
2103; GFX9-NEXT:    v_pk_max_f16 v2, v2, v2
2104; GFX9-NEXT:    v_pk_max_f16 v2, v2, v3
2105; GFX9-NEXT:    s_setpc_b64 s[30:31]
2106;
2107; GFX10-LABEL: v_maximumnum_v6f16:
2108; GFX10:       ; %bb.0:
2109; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2110; GFX10-NEXT:    v_pk_max_f16 v3, v3, v3
2111; GFX10-NEXT:    v_pk_max_f16 v0, v0, v0
2112; GFX10-NEXT:    v_pk_max_f16 v4, v4, v4
2113; GFX10-NEXT:    v_pk_max_f16 v1, v1, v1
2114; GFX10-NEXT:    v_pk_max_f16 v5, v5, v5
2115; GFX10-NEXT:    v_pk_max_f16 v2, v2, v2
2116; GFX10-NEXT:    v_pk_max_f16 v0, v0, v3
2117; GFX10-NEXT:    v_pk_max_f16 v1, v1, v4
2118; GFX10-NEXT:    v_pk_max_f16 v2, v2, v5
2119; GFX10-NEXT:    s_setpc_b64 s[30:31]
2120;
2121; GFX11-LABEL: v_maximumnum_v6f16:
2122; GFX11:       ; %bb.0:
2123; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2124; GFX11-NEXT:    v_pk_max_f16 v3, v3, v3
2125; GFX11-NEXT:    v_pk_max_f16 v0, v0, v0
2126; GFX11-NEXT:    v_pk_max_f16 v4, v4, v4
2127; GFX11-NEXT:    v_pk_max_f16 v1, v1, v1
2128; GFX11-NEXT:    v_pk_max_f16 v5, v5, v5
2129; GFX11-NEXT:    v_pk_max_f16 v2, v2, v2
2130; GFX11-NEXT:    v_pk_max_f16 v0, v0, v3
2131; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
2132; GFX11-NEXT:    v_pk_max_f16 v1, v1, v4
2133; GFX11-NEXT:    v_pk_max_f16 v2, v2, v5
2134; GFX11-NEXT:    s_setpc_b64 s[30:31]
2135;
2136; GFX12-LABEL: v_maximumnum_v6f16:
2137; GFX12:       ; %bb.0:
2138; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2139; GFX12-NEXT:    s_wait_expcnt 0x0
2140; GFX12-NEXT:    s_wait_samplecnt 0x0
2141; GFX12-NEXT:    s_wait_bvhcnt 0x0
2142; GFX12-NEXT:    s_wait_kmcnt 0x0
2143; GFX12-NEXT:    v_pk_max_num_f16 v3, v3, v3
2144; GFX12-NEXT:    v_pk_max_num_f16 v0, v0, v0
2145; GFX12-NEXT:    v_pk_max_num_f16 v4, v4, v4
2146; GFX12-NEXT:    v_pk_max_num_f16 v1, v1, v1
2147; GFX12-NEXT:    v_pk_max_num_f16 v5, v5, v5
2148; GFX12-NEXT:    v_pk_max_num_f16 v2, v2, v2
2149; GFX12-NEXT:    v_pk_max_num_f16 v0, v0, v3
2150; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
2151; GFX12-NEXT:    v_pk_max_num_f16 v1, v1, v4
2152; GFX12-NEXT:    v_pk_max_num_f16 v2, v2, v5
2153; GFX12-NEXT:    s_setpc_b64 s[30:31]
2154  %result = call <6 x half> @llvm.maximumnum.v6f16(<6 x half> %x, <6 x half> %y)
2155  ret <6 x half> %result
2156}
2157
2158define <8 x half> @v_maximumnum_v8f16(<8 x half> %x, <8 x half> %y) {
2159; GFX8-LABEL: v_maximumnum_v8f16:
2160; GFX8:       ; %bb.0:
2161; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2162; GFX8-NEXT:    v_max_f16_sdwa v8, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
2163; GFX8-NEXT:    v_max_f16_sdwa v9, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
2164; GFX8-NEXT:    v_max_f16_sdwa v8, v9, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
2165; GFX8-NEXT:    v_max_f16_sdwa v9, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
2166; GFX8-NEXT:    v_max_f16_sdwa v10, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
2167; GFX8-NEXT:    v_max_f16_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
2168; GFX8-NEXT:    v_max_f16_sdwa v10, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
2169; GFX8-NEXT:    v_max_f16_sdwa v11, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
2170; GFX8-NEXT:    v_max_f16_sdwa v10, v11, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
2171; GFX8-NEXT:    v_max_f16_sdwa v11, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
2172; GFX8-NEXT:    v_max_f16_sdwa v12, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
2173; GFX8-NEXT:    v_max_f16_e32 v7, v7, v7
2174; GFX8-NEXT:    v_max_f16_e32 v3, v3, v3
2175; GFX8-NEXT:    v_max_f16_e32 v6, v6, v6
2176; GFX8-NEXT:    v_max_f16_e32 v2, v2, v2
2177; GFX8-NEXT:    v_max_f16_e32 v5, v5, v5
2178; GFX8-NEXT:    v_max_f16_e32 v1, v1, v1
2179; GFX8-NEXT:    v_max_f16_e32 v4, v4, v4
2180; GFX8-NEXT:    v_max_f16_e32 v0, v0, v0
2181; GFX8-NEXT:    v_max_f16_sdwa v11, v12, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
2182; GFX8-NEXT:    v_max_f16_e32 v3, v3, v7
2183; GFX8-NEXT:    v_max_f16_e32 v2, v2, v6
2184; GFX8-NEXT:    v_max_f16_e32 v1, v1, v5
2185; GFX8-NEXT:    v_max_f16_e32 v0, v0, v4
2186; GFX8-NEXT:    v_or_b32_e32 v0, v0, v11
2187; GFX8-NEXT:    v_or_b32_e32 v1, v1, v10
2188; GFX8-NEXT:    v_or_b32_e32 v2, v2, v9
2189; GFX8-NEXT:    v_or_b32_e32 v3, v3, v8
2190; GFX8-NEXT:    s_setpc_b64 s[30:31]
2191;
2192; GFX9-LABEL: v_maximumnum_v8f16:
2193; GFX9:       ; %bb.0:
2194; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2195; GFX9-NEXT:    v_pk_max_f16 v4, v4, v4
2196; GFX9-NEXT:    v_pk_max_f16 v0, v0, v0
2197; GFX9-NEXT:    v_pk_max_f16 v0, v0, v4
2198; GFX9-NEXT:    v_pk_max_f16 v4, v5, v5
2199; GFX9-NEXT:    v_pk_max_f16 v1, v1, v1
2200; GFX9-NEXT:    v_pk_max_f16 v1, v1, v4
2201; GFX9-NEXT:    v_pk_max_f16 v4, v6, v6
2202; GFX9-NEXT:    v_pk_max_f16 v2, v2, v2
2203; GFX9-NEXT:    v_pk_max_f16 v2, v2, v4
2204; GFX9-NEXT:    v_pk_max_f16 v4, v7, v7
2205; GFX9-NEXT:    v_pk_max_f16 v3, v3, v3
2206; GFX9-NEXT:    v_pk_max_f16 v3, v3, v4
2207; GFX9-NEXT:    s_setpc_b64 s[30:31]
2208;
2209; GFX10-LABEL: v_maximumnum_v8f16:
2210; GFX10:       ; %bb.0:
2211; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2212; GFX10-NEXT:    v_pk_max_f16 v4, v4, v4
2213; GFX10-NEXT:    v_pk_max_f16 v0, v0, v0
2214; GFX10-NEXT:    v_pk_max_f16 v5, v5, v5
2215; GFX10-NEXT:    v_pk_max_f16 v1, v1, v1
2216; GFX10-NEXT:    v_pk_max_f16 v6, v6, v6
2217; GFX10-NEXT:    v_pk_max_f16 v2, v2, v2
2218; GFX10-NEXT:    v_pk_max_f16 v7, v7, v7
2219; GFX10-NEXT:    v_pk_max_f16 v3, v3, v3
2220; GFX10-NEXT:    v_pk_max_f16 v0, v0, v4
2221; GFX10-NEXT:    v_pk_max_f16 v1, v1, v5
2222; GFX10-NEXT:    v_pk_max_f16 v2, v2, v6
2223; GFX10-NEXT:    v_pk_max_f16 v3, v3, v7
2224; GFX10-NEXT:    s_setpc_b64 s[30:31]
2225;
2226; GFX11-LABEL: v_maximumnum_v8f16:
2227; GFX11:       ; %bb.0:
2228; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2229; GFX11-NEXT:    v_pk_max_f16 v4, v4, v4
2230; GFX11-NEXT:    v_pk_max_f16 v0, v0, v0
2231; GFX11-NEXT:    v_pk_max_f16 v5, v5, v5
2232; GFX11-NEXT:    v_pk_max_f16 v1, v1, v1
2233; GFX11-NEXT:    v_pk_max_f16 v6, v6, v6
2234; GFX11-NEXT:    v_pk_max_f16 v2, v2, v2
2235; GFX11-NEXT:    v_pk_max_f16 v7, v7, v7
2236; GFX11-NEXT:    v_pk_max_f16 v3, v3, v3
2237; GFX11-NEXT:    v_pk_max_f16 v0, v0, v4
2238; GFX11-NEXT:    v_pk_max_f16 v1, v1, v5
2239; GFX11-NEXT:    v_pk_max_f16 v2, v2, v6
2240; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
2241; GFX11-NEXT:    v_pk_max_f16 v3, v3, v7
2242; GFX11-NEXT:    s_setpc_b64 s[30:31]
2243;
2244; GFX12-LABEL: v_maximumnum_v8f16:
2245; GFX12:       ; %bb.0:
2246; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2247; GFX12-NEXT:    s_wait_expcnt 0x0
2248; GFX12-NEXT:    s_wait_samplecnt 0x0
2249; GFX12-NEXT:    s_wait_bvhcnt 0x0
2250; GFX12-NEXT:    s_wait_kmcnt 0x0
2251; GFX12-NEXT:    v_pk_max_num_f16 v4, v4, v4
2252; GFX12-NEXT:    v_pk_max_num_f16 v0, v0, v0
2253; GFX12-NEXT:    v_pk_max_num_f16 v5, v5, v5
2254; GFX12-NEXT:    v_pk_max_num_f16 v1, v1, v1
2255; GFX12-NEXT:    v_pk_max_num_f16 v6, v6, v6
2256; GFX12-NEXT:    v_pk_max_num_f16 v2, v2, v2
2257; GFX12-NEXT:    v_pk_max_num_f16 v7, v7, v7
2258; GFX12-NEXT:    v_pk_max_num_f16 v3, v3, v3
2259; GFX12-NEXT:    v_pk_max_num_f16 v0, v0, v4
2260; GFX12-NEXT:    v_pk_max_num_f16 v1, v1, v5
2261; GFX12-NEXT:    v_pk_max_num_f16 v2, v2, v6
2262; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_4)
2263; GFX12-NEXT:    v_pk_max_num_f16 v3, v3, v7
2264; GFX12-NEXT:    s_setpc_b64 s[30:31]
2265  %result = call <8 x half> @llvm.maximumnum.v8f16(<8 x half> %x, <8 x half> %y)
2266  ret <8 x half> %result
2267}
2268
2269define <2 x float> @v_maximumnum_v2f32(<2 x float> %x, <2 x float> %y) {
2270; GFX8-LABEL: v_maximumnum_v2f32:
2271; GFX8:       ; %bb.0:
2272; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2273; GFX8-NEXT:    v_mul_f32_e32 v2, 1.0, v2
2274; GFX8-NEXT:    v_mul_f32_e32 v0, 1.0, v0
2275; GFX8-NEXT:    v_max_f32_e32 v0, v0, v2
2276; GFX8-NEXT:    v_mul_f32_e32 v2, 1.0, v3
2277; GFX8-NEXT:    v_mul_f32_e32 v1, 1.0, v1
2278; GFX8-NEXT:    v_max_f32_e32 v1, v1, v2
2279; GFX8-NEXT:    s_setpc_b64 s[30:31]
2280;
2281; GFX9-LABEL: v_maximumnum_v2f32:
2282; GFX9:       ; %bb.0:
2283; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2284; GFX9-NEXT:    v_max_f32_e32 v2, v2, v2
2285; GFX9-NEXT:    v_max_f32_e32 v0, v0, v0
2286; GFX9-NEXT:    v_max_f32_e32 v0, v0, v2
2287; GFX9-NEXT:    v_max_f32_e32 v2, v3, v3
2288; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
2289; GFX9-NEXT:    v_max_f32_e32 v1, v1, v2
2290; GFX9-NEXT:    s_setpc_b64 s[30:31]
2291;
2292; GFX10-LABEL: v_maximumnum_v2f32:
2293; GFX10:       ; %bb.0:
2294; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2295; GFX10-NEXT:    v_max_f32_e32 v2, v2, v2
2296; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
2297; GFX10-NEXT:    v_max_f32_e32 v3, v3, v3
2298; GFX10-NEXT:    v_max_f32_e32 v1, v1, v1
2299; GFX10-NEXT:    v_max_f32_e32 v0, v0, v2
2300; GFX10-NEXT:    v_max_f32_e32 v1, v1, v3
2301; GFX10-NEXT:    s_setpc_b64 s[30:31]
2302;
2303; GFX11-LABEL: v_maximumnum_v2f32:
2304; GFX11:       ; %bb.0:
2305; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2306; GFX11-NEXT:    v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3
2307; GFX11-NEXT:    v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
2308; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2309; GFX11-NEXT:    v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3
2310; GFX11-NEXT:    s_setpc_b64 s[30:31]
2311;
2312; GFX12-LABEL: v_maximumnum_v2f32:
2313; GFX12:       ; %bb.0:
2314; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2315; GFX12-NEXT:    s_wait_expcnt 0x0
2316; GFX12-NEXT:    s_wait_samplecnt 0x0
2317; GFX12-NEXT:    s_wait_bvhcnt 0x0
2318; GFX12-NEXT:    s_wait_kmcnt 0x0
2319; GFX12-NEXT:    v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
2320; GFX12-NEXT:    v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
2321; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2322; GFX12-NEXT:    v_dual_max_num_f32 v0, v0, v2 :: v_dual_max_num_f32 v1, v1, v3
2323; GFX12-NEXT:    s_setpc_b64 s[30:31]
2324  %result = call <2 x float> @llvm.maximumnum.v2f32(<2 x float> %x, <2 x float> %y)
2325  ret <2 x float> %result
2326}
2327
2328define <2 x float> @v_maximumnum_v2f32_nnan(<2 x float> %x, <2 x float> %y) {
2329; GFX8-LABEL: v_maximumnum_v2f32_nnan:
2330; GFX8:       ; %bb.0:
2331; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2332; GFX8-NEXT:    v_max_f32_e32 v0, v0, v2
2333; GFX8-NEXT:    v_max_f32_e32 v1, v1, v3
2334; GFX8-NEXT:    s_setpc_b64 s[30:31]
2335;
2336; GFX9-LABEL: v_maximumnum_v2f32_nnan:
2337; GFX9:       ; %bb.0:
2338; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2339; GFX9-NEXT:    v_max_f32_e32 v0, v0, v2
2340; GFX9-NEXT:    v_max_f32_e32 v1, v1, v3
2341; GFX9-NEXT:    s_setpc_b64 s[30:31]
2342;
2343; GFX10-LABEL: v_maximumnum_v2f32_nnan:
2344; GFX10:       ; %bb.0:
2345; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2346; GFX10-NEXT:    v_max_f32_e32 v0, v0, v2
2347; GFX10-NEXT:    v_max_f32_e32 v1, v1, v3
2348; GFX10-NEXT:    s_setpc_b64 s[30:31]
2349;
2350; GFX11-LABEL: v_maximumnum_v2f32_nnan:
2351; GFX11:       ; %bb.0:
2352; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2353; GFX11-NEXT:    v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3
2354; GFX11-NEXT:    s_setpc_b64 s[30:31]
2355;
2356; GFX12-LABEL: v_maximumnum_v2f32_nnan:
2357; GFX12:       ; %bb.0:
2358; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2359; GFX12-NEXT:    s_wait_expcnt 0x0
2360; GFX12-NEXT:    s_wait_samplecnt 0x0
2361; GFX12-NEXT:    s_wait_bvhcnt 0x0
2362; GFX12-NEXT:    s_wait_kmcnt 0x0
2363; GFX12-NEXT:    v_dual_max_num_f32 v0, v0, v2 :: v_dual_max_num_f32 v1, v1, v3
2364; GFX12-NEXT:    s_setpc_b64 s[30:31]
2365  %result = call nnan <2 x float> @llvm.maximumnum.v2f32(<2 x float> %x, <2 x float> %y)
2366  ret <2 x float> %result
2367}
2368
2369define <3 x float> @v_maximumnum_v3f32(<3 x float> %x, <3 x float> %y) {
2370; GFX8-LABEL: v_maximumnum_v3f32:
2371; GFX8:       ; %bb.0:
2372; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2373; GFX8-NEXT:    v_mul_f32_e32 v3, 1.0, v3
2374; GFX8-NEXT:    v_mul_f32_e32 v0, 1.0, v0
2375; GFX8-NEXT:    v_max_f32_e32 v0, v0, v3
2376; GFX8-NEXT:    v_mul_f32_e32 v3, 1.0, v4
2377; GFX8-NEXT:    v_mul_f32_e32 v1, 1.0, v1
2378; GFX8-NEXT:    v_max_f32_e32 v1, v1, v3
2379; GFX8-NEXT:    v_mul_f32_e32 v3, 1.0, v5
2380; GFX8-NEXT:    v_mul_f32_e32 v2, 1.0, v2
2381; GFX8-NEXT:    v_max_f32_e32 v2, v2, v3
2382; GFX8-NEXT:    s_setpc_b64 s[30:31]
2383;
2384; GFX9-LABEL: v_maximumnum_v3f32:
2385; GFX9:       ; %bb.0:
2386; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2387; GFX9-NEXT:    v_max_f32_e32 v3, v3, v3
2388; GFX9-NEXT:    v_max_f32_e32 v0, v0, v0
2389; GFX9-NEXT:    v_max_f32_e32 v0, v0, v3
2390; GFX9-NEXT:    v_max_f32_e32 v3, v4, v4
2391; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
2392; GFX9-NEXT:    v_max_f32_e32 v1, v1, v3
2393; GFX9-NEXT:    v_max_f32_e32 v3, v5, v5
2394; GFX9-NEXT:    v_max_f32_e32 v2, v2, v2
2395; GFX9-NEXT:    v_max_f32_e32 v2, v2, v3
2396; GFX9-NEXT:    s_setpc_b64 s[30:31]
2397;
2398; GFX10-LABEL: v_maximumnum_v3f32:
2399; GFX10:       ; %bb.0:
2400; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2401; GFX10-NEXT:    v_max_f32_e32 v3, v3, v3
2402; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
2403; GFX10-NEXT:    v_max_f32_e32 v4, v4, v4
2404; GFX10-NEXT:    v_max_f32_e32 v1, v1, v1
2405; GFX10-NEXT:    v_max_f32_e32 v5, v5, v5
2406; GFX10-NEXT:    v_max_f32_e32 v2, v2, v2
2407; GFX10-NEXT:    v_max_f32_e32 v0, v0, v3
2408; GFX10-NEXT:    v_max_f32_e32 v1, v1, v4
2409; GFX10-NEXT:    v_max_f32_e32 v2, v2, v5
2410; GFX10-NEXT:    s_setpc_b64 s[30:31]
2411;
2412; GFX11-LABEL: v_maximumnum_v3f32:
2413; GFX11:       ; %bb.0:
2414; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2415; GFX11-NEXT:    v_dual_max_f32 v3, v3, v3 :: v_dual_max_f32 v0, v0, v0
2416; GFX11-NEXT:    v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v1, v1, v1
2417; GFX11-NEXT:    v_dual_max_f32 v5, v5, v5 :: v_dual_max_f32 v2, v2, v2
2418; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2419; GFX11-NEXT:    v_dual_max_f32 v0, v0, v3 :: v_dual_max_f32 v1, v1, v4
2420; GFX11-NEXT:    v_max_f32_e32 v2, v2, v5
2421; GFX11-NEXT:    s_setpc_b64 s[30:31]
2422;
2423; GFX12-LABEL: v_maximumnum_v3f32:
2424; GFX12:       ; %bb.0:
2425; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2426; GFX12-NEXT:    s_wait_expcnt 0x0
2427; GFX12-NEXT:    s_wait_samplecnt 0x0
2428; GFX12-NEXT:    s_wait_bvhcnt 0x0
2429; GFX12-NEXT:    s_wait_kmcnt 0x0
2430; GFX12-NEXT:    v_dual_max_num_f32 v3, v3, v3 :: v_dual_max_num_f32 v0, v0, v0
2431; GFX12-NEXT:    v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v1, v1, v1
2432; GFX12-NEXT:    v_dual_max_num_f32 v5, v5, v5 :: v_dual_max_num_f32 v2, v2, v2
2433; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2434; GFX12-NEXT:    v_dual_max_num_f32 v0, v0, v3 :: v_dual_max_num_f32 v1, v1, v4
2435; GFX12-NEXT:    v_max_num_f32_e32 v2, v2, v5
2436; GFX12-NEXT:    s_setpc_b64 s[30:31]
2437  %result = call <3 x float> @llvm.maximumnum.v3f32(<3 x float> %x, <3 x float> %y)
2438  ret <3 x float> %result
2439}
2440
2441define <3 x float> @v_maximumnum_v3f32_nnan(<3 x float> %x, <3 x float> %y) {
2442; GFX8-LABEL: v_maximumnum_v3f32_nnan:
2443; GFX8:       ; %bb.0:
2444; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2445; GFX8-NEXT:    v_max_f32_e32 v0, v0, v3
2446; GFX8-NEXT:    v_max_f32_e32 v1, v1, v4
2447; GFX8-NEXT:    v_max_f32_e32 v2, v2, v5
2448; GFX8-NEXT:    s_setpc_b64 s[30:31]
2449;
2450; GFX9-LABEL: v_maximumnum_v3f32_nnan:
2451; GFX9:       ; %bb.0:
2452; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2453; GFX9-NEXT:    v_max_f32_e32 v0, v0, v3
2454; GFX9-NEXT:    v_max_f32_e32 v1, v1, v4
2455; GFX9-NEXT:    v_max_f32_e32 v2, v2, v5
2456; GFX9-NEXT:    s_setpc_b64 s[30:31]
2457;
2458; GFX10-LABEL: v_maximumnum_v3f32_nnan:
2459; GFX10:       ; %bb.0:
2460; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2461; GFX10-NEXT:    v_max_f32_e32 v0, v0, v3
2462; GFX10-NEXT:    v_max_f32_e32 v1, v1, v4
2463; GFX10-NEXT:    v_max_f32_e32 v2, v2, v5
2464; GFX10-NEXT:    s_setpc_b64 s[30:31]
2465;
2466; GFX11-LABEL: v_maximumnum_v3f32_nnan:
2467; GFX11:       ; %bb.0:
2468; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2469; GFX11-NEXT:    v_dual_max_f32 v0, v0, v3 :: v_dual_max_f32 v1, v1, v4
2470; GFX11-NEXT:    v_max_f32_e32 v2, v2, v5
2471; GFX11-NEXT:    s_setpc_b64 s[30:31]
2472;
2473; GFX12-LABEL: v_maximumnum_v3f32_nnan:
2474; GFX12:       ; %bb.0:
2475; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2476; GFX12-NEXT:    s_wait_expcnt 0x0
2477; GFX12-NEXT:    s_wait_samplecnt 0x0
2478; GFX12-NEXT:    s_wait_bvhcnt 0x0
2479; GFX12-NEXT:    s_wait_kmcnt 0x0
2480; GFX12-NEXT:    v_dual_max_num_f32 v0, v0, v3 :: v_dual_max_num_f32 v1, v1, v4
2481; GFX12-NEXT:    v_max_num_f32_e32 v2, v2, v5
2482; GFX12-NEXT:    s_setpc_b64 s[30:31]
2483  %result = call nnan <3 x float> @llvm.maximumnum.v3f32(<3 x float> %x, <3 x float> %y)
2484  ret <3 x float> %result
2485}
2486
2487define <4 x float> @v_maximumnum_v4f32(<4 x float> %x, <4 x float> %y) {
2488; GFX8-LABEL: v_maximumnum_v4f32:
2489; GFX8:       ; %bb.0:
2490; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2491; GFX8-NEXT:    v_mul_f32_e32 v4, 1.0, v4
2492; GFX8-NEXT:    v_mul_f32_e32 v0, 1.0, v0
2493; GFX8-NEXT:    v_max_f32_e32 v0, v0, v4
2494; GFX8-NEXT:    v_mul_f32_e32 v4, 1.0, v5
2495; GFX8-NEXT:    v_mul_f32_e32 v1, 1.0, v1
2496; GFX8-NEXT:    v_max_f32_e32 v1, v1, v4
2497; GFX8-NEXT:    v_mul_f32_e32 v4, 1.0, v6
2498; GFX8-NEXT:    v_mul_f32_e32 v2, 1.0, v2
2499; GFX8-NEXT:    v_max_f32_e32 v2, v2, v4
2500; GFX8-NEXT:    v_mul_f32_e32 v4, 1.0, v7
2501; GFX8-NEXT:    v_mul_f32_e32 v3, 1.0, v3
2502; GFX8-NEXT:    v_max_f32_e32 v3, v3, v4
2503; GFX8-NEXT:    s_setpc_b64 s[30:31]
2504;
2505; GFX9-LABEL: v_maximumnum_v4f32:
2506; GFX9:       ; %bb.0:
2507; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2508; GFX9-NEXT:    v_max_f32_e32 v4, v4, v4
2509; GFX9-NEXT:    v_max_f32_e32 v0, v0, v0
2510; GFX9-NEXT:    v_max_f32_e32 v0, v0, v4
2511; GFX9-NEXT:    v_max_f32_e32 v4, v5, v5
2512; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
2513; GFX9-NEXT:    v_max_f32_e32 v1, v1, v4
2514; GFX9-NEXT:    v_max_f32_e32 v4, v6, v6
2515; GFX9-NEXT:    v_max_f32_e32 v2, v2, v2
2516; GFX9-NEXT:    v_max_f32_e32 v2, v2, v4
2517; GFX9-NEXT:    v_max_f32_e32 v4, v7, v7
2518; GFX9-NEXT:    v_max_f32_e32 v3, v3, v3
2519; GFX9-NEXT:    v_max_f32_e32 v3, v3, v4
2520; GFX9-NEXT:    s_setpc_b64 s[30:31]
2521;
2522; GFX10-LABEL: v_maximumnum_v4f32:
2523; GFX10:       ; %bb.0:
2524; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2525; GFX10-NEXT:    v_max_f32_e32 v4, v4, v4
2526; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
2527; GFX10-NEXT:    v_max_f32_e32 v5, v5, v5
2528; GFX10-NEXT:    v_max_f32_e32 v1, v1, v1
2529; GFX10-NEXT:    v_max_f32_e32 v6, v6, v6
2530; GFX10-NEXT:    v_max_f32_e32 v2, v2, v2
2531; GFX10-NEXT:    v_max_f32_e32 v7, v7, v7
2532; GFX10-NEXT:    v_max_f32_e32 v3, v3, v3
2533; GFX10-NEXT:    v_max_f32_e32 v0, v0, v4
2534; GFX10-NEXT:    v_max_f32_e32 v1, v1, v5
2535; GFX10-NEXT:    v_max_f32_e32 v2, v2, v6
2536; GFX10-NEXT:    v_max_f32_e32 v3, v3, v7
2537; GFX10-NEXT:    s_setpc_b64 s[30:31]
2538;
2539; GFX11-LABEL: v_maximumnum_v4f32:
2540; GFX11:       ; %bb.0:
2541; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2542; GFX11-NEXT:    v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v5, v5, v5
2543; GFX11-NEXT:    v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
2544; GFX11-NEXT:    v_dual_max_f32 v6, v6, v6 :: v_dual_max_f32 v7, v7, v7
2545; GFX11-NEXT:    v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3
2546; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
2547; GFX11-NEXT:    v_dual_max_f32 v0, v0, v4 :: v_dual_max_f32 v1, v1, v5
2548; GFX11-NEXT:    v_dual_max_f32 v2, v2, v6 :: v_dual_max_f32 v3, v3, v7
2549; GFX11-NEXT:    s_setpc_b64 s[30:31]
2550;
2551; GFX12-LABEL: v_maximumnum_v4f32:
2552; GFX12:       ; %bb.0:
2553; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2554; GFX12-NEXT:    s_wait_expcnt 0x0
2555; GFX12-NEXT:    s_wait_samplecnt 0x0
2556; GFX12-NEXT:    s_wait_bvhcnt 0x0
2557; GFX12-NEXT:    s_wait_kmcnt 0x0
2558; GFX12-NEXT:    v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5
2559; GFX12-NEXT:    v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
2560; GFX12-NEXT:    v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7
2561; GFX12-NEXT:    v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
2562; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
2563; GFX12-NEXT:    v_dual_max_num_f32 v0, v0, v4 :: v_dual_max_num_f32 v1, v1, v5
2564; GFX12-NEXT:    v_dual_max_num_f32 v2, v2, v6 :: v_dual_max_num_f32 v3, v3, v7
2565; GFX12-NEXT:    s_setpc_b64 s[30:31]
2566  %result = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> %x, <4 x float> %y)
2567  ret <4 x float> %result
2568}
2569
2570define <4 x float> @v_maximumnum_v4f32_nnan(<4 x float> %x, <4 x float> %y) {
2571; GFX8-LABEL: v_maximumnum_v4f32_nnan:
2572; GFX8:       ; %bb.0:
2573; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2574; GFX8-NEXT:    v_max_f32_e32 v0, v0, v4
2575; GFX8-NEXT:    v_max_f32_e32 v1, v1, v5
2576; GFX8-NEXT:    v_max_f32_e32 v2, v2, v6
2577; GFX8-NEXT:    v_max_f32_e32 v3, v3, v7
2578; GFX8-NEXT:    s_setpc_b64 s[30:31]
2579;
2580; GFX9-LABEL: v_maximumnum_v4f32_nnan:
2581; GFX9:       ; %bb.0:
2582; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2583; GFX9-NEXT:    v_max_f32_e32 v0, v0, v4
2584; GFX9-NEXT:    v_max_f32_e32 v1, v1, v5
2585; GFX9-NEXT:    v_max_f32_e32 v2, v2, v6
2586; GFX9-NEXT:    v_max_f32_e32 v3, v3, v7
2587; GFX9-NEXT:    s_setpc_b64 s[30:31]
2588;
2589; GFX10-LABEL: v_maximumnum_v4f32_nnan:
2590; GFX10:       ; %bb.0:
2591; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2592; GFX10-NEXT:    v_max_f32_e32 v0, v0, v4
2593; GFX10-NEXT:    v_max_f32_e32 v1, v1, v5
2594; GFX10-NEXT:    v_max_f32_e32 v2, v2, v6
2595; GFX10-NEXT:    v_max_f32_e32 v3, v3, v7
2596; GFX10-NEXT:    s_setpc_b64 s[30:31]
2597;
2598; GFX11-LABEL: v_maximumnum_v4f32_nnan:
2599; GFX11:       ; %bb.0:
2600; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2601; GFX11-NEXT:    v_dual_max_f32 v0, v0, v4 :: v_dual_max_f32 v1, v1, v5
2602; GFX11-NEXT:    v_dual_max_f32 v2, v2, v6 :: v_dual_max_f32 v3, v3, v7
2603; GFX11-NEXT:    s_setpc_b64 s[30:31]
2604;
2605; GFX12-LABEL: v_maximumnum_v4f32_nnan:
2606; GFX12:       ; %bb.0:
2607; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2608; GFX12-NEXT:    s_wait_expcnt 0x0
2609; GFX12-NEXT:    s_wait_samplecnt 0x0
2610; GFX12-NEXT:    s_wait_bvhcnt 0x0
2611; GFX12-NEXT:    s_wait_kmcnt 0x0
2612; GFX12-NEXT:    v_dual_max_num_f32 v0, v0, v4 :: v_dual_max_num_f32 v1, v1, v5
2613; GFX12-NEXT:    v_dual_max_num_f32 v2, v2, v6 :: v_dual_max_num_f32 v3, v3, v7
2614; GFX12-NEXT:    s_setpc_b64 s[30:31]
2615  %result = call nnan <4 x float> @llvm.maximumnum.v4f32(<4 x float> %x, <4 x float> %y)
2616  ret <4 x float> %result
2617}
2618
2619define <2 x double> @v_maximumnum_v2f64(<2 x double> %x, <2 x double> %y) {
2620; GFX8-LABEL: v_maximumnum_v2f64:
2621; GFX8:       ; %bb.0:
2622; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2623; GFX8-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
2624; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
2625; GFX8-NEXT:    v_max_f64 v[6:7], v[6:7], v[6:7]
2626; GFX8-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
2627; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[4:5]
2628; GFX8-NEXT:    v_max_f64 v[2:3], v[2:3], v[6:7]
2629; GFX8-NEXT:    s_setpc_b64 s[30:31]
2630;
2631; GFX9-LABEL: v_maximumnum_v2f64:
2632; GFX9:       ; %bb.0:
2633; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2634; GFX9-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
2635; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
2636; GFX9-NEXT:    v_max_f64 v[6:7], v[6:7], v[6:7]
2637; GFX9-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
2638; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[4:5]
2639; GFX9-NEXT:    v_max_f64 v[2:3], v[2:3], v[6:7]
2640; GFX9-NEXT:    s_setpc_b64 s[30:31]
2641;
2642; GFX10-LABEL: v_maximumnum_v2f64:
2643; GFX10:       ; %bb.0:
2644; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2645; GFX10-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
2646; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
2647; GFX10-NEXT:    v_max_f64 v[6:7], v[6:7], v[6:7]
2648; GFX10-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
2649; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[4:5]
2650; GFX10-NEXT:    v_max_f64 v[2:3], v[2:3], v[6:7]
2651; GFX10-NEXT:    s_setpc_b64 s[30:31]
2652;
2653; GFX11-LABEL: v_maximumnum_v2f64:
2654; GFX11:       ; %bb.0:
2655; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2656; GFX11-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
2657; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
2658; GFX11-NEXT:    v_max_f64 v[6:7], v[6:7], v[6:7]
2659; GFX11-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
2660; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
2661; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[4:5]
2662; GFX11-NEXT:    v_max_f64 v[2:3], v[2:3], v[6:7]
2663; GFX11-NEXT:    s_setpc_b64 s[30:31]
2664;
2665; GFX12-LABEL: v_maximumnum_v2f64:
2666; GFX12:       ; %bb.0:
2667; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2668; GFX12-NEXT:    s_wait_expcnt 0x0
2669; GFX12-NEXT:    s_wait_samplecnt 0x0
2670; GFX12-NEXT:    s_wait_bvhcnt 0x0
2671; GFX12-NEXT:    s_wait_kmcnt 0x0
2672; GFX12-NEXT:    v_max_num_f64_e32 v[4:5], v[4:5], v[4:5]
2673; GFX12-NEXT:    v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
2674; GFX12-NEXT:    v_max_num_f64_e32 v[6:7], v[6:7], v[6:7]
2675; GFX12-NEXT:    v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
2676; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
2677; GFX12-NEXT:    v_max_num_f64_e32 v[0:1], v[0:1], v[4:5]
2678; GFX12-NEXT:    v_max_num_f64_e32 v[2:3], v[2:3], v[6:7]
2679; GFX12-NEXT:    s_setpc_b64 s[30:31]
2680  %result = call <2 x double> @llvm.maximumnum.v2f64(<2 x double> %x, <2 x double> %y)
2681  ret <2 x double> %result
2682}
2683
2684define <2 x double> @v_maximumnum_v2f64_nnan(<2 x double> %x, <2 x double> %y) {
2685; GFX8-LABEL: v_maximumnum_v2f64_nnan:
2686; GFX8:       ; %bb.0:
2687; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2688; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[4:5]
2689; GFX8-NEXT:    v_max_f64 v[2:3], v[2:3], v[6:7]
2690; GFX8-NEXT:    s_setpc_b64 s[30:31]
2691;
2692; GFX9-LABEL: v_maximumnum_v2f64_nnan:
2693; GFX9:       ; %bb.0:
2694; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2695; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[4:5]
2696; GFX9-NEXT:    v_max_f64 v[2:3], v[2:3], v[6:7]
2697; GFX9-NEXT:    s_setpc_b64 s[30:31]
2698;
2699; GFX10-LABEL: v_maximumnum_v2f64_nnan:
2700; GFX10:       ; %bb.0:
2701; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2702; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[4:5]
2703; GFX10-NEXT:    v_max_f64 v[2:3], v[2:3], v[6:7]
2704; GFX10-NEXT:    s_setpc_b64 s[30:31]
2705;
2706; GFX11-LABEL: v_maximumnum_v2f64_nnan:
2707; GFX11:       ; %bb.0:
2708; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2709; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[4:5]
2710; GFX11-NEXT:    v_max_f64 v[2:3], v[2:3], v[6:7]
2711; GFX11-NEXT:    s_setpc_b64 s[30:31]
2712;
2713; GFX12-LABEL: v_maximumnum_v2f64_nnan:
2714; GFX12:       ; %bb.0:
2715; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2716; GFX12-NEXT:    s_wait_expcnt 0x0
2717; GFX12-NEXT:    s_wait_samplecnt 0x0
2718; GFX12-NEXT:    s_wait_bvhcnt 0x0
2719; GFX12-NEXT:    s_wait_kmcnt 0x0
2720; GFX12-NEXT:    v_max_num_f64_e32 v[0:1], v[0:1], v[4:5]
2721; GFX12-NEXT:    v_max_num_f64_e32 v[2:3], v[2:3], v[6:7]
2722; GFX12-NEXT:    s_setpc_b64 s[30:31]
2723  %result = call nnan <2 x double> @llvm.maximumnum.v2f64(<2 x double> %x, <2 x double> %y)
2724  ret <2 x double> %result
2725}
2726
2727define <3 x double> @v_maximumnum_v3f64(<3 x double> %x, <3 x double> %y) {
2728; GFX8-LABEL: v_maximumnum_v3f64:
2729; GFX8:       ; %bb.0:
2730; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2731; GFX8-NEXT:    v_max_f64 v[6:7], v[6:7], v[6:7]
2732; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
2733; GFX8-NEXT:    v_max_f64 v[8:9], v[8:9], v[8:9]
2734; GFX8-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
2735; GFX8-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
2736; GFX8-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
2737; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[6:7]
2738; GFX8-NEXT:    v_max_f64 v[2:3], v[2:3], v[8:9]
2739; GFX8-NEXT:    v_max_f64 v[4:5], v[4:5], v[10:11]
2740; GFX8-NEXT:    s_setpc_b64 s[30:31]
2741;
2742; GFX9-LABEL: v_maximumnum_v3f64:
2743; GFX9:       ; %bb.0:
2744; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2745; GFX9-NEXT:    v_max_f64 v[6:7], v[6:7], v[6:7]
2746; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
2747; GFX9-NEXT:    v_max_f64 v[8:9], v[8:9], v[8:9]
2748; GFX9-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
2749; GFX9-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
2750; GFX9-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
2751; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[6:7]
2752; GFX9-NEXT:    v_max_f64 v[2:3], v[2:3], v[8:9]
2753; GFX9-NEXT:    v_max_f64 v[4:5], v[4:5], v[10:11]
2754; GFX9-NEXT:    s_setpc_b64 s[30:31]
2755;
2756; GFX10-LABEL: v_maximumnum_v3f64:
2757; GFX10:       ; %bb.0:
2758; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2759; GFX10-NEXT:    v_max_f64 v[6:7], v[6:7], v[6:7]
2760; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
2761; GFX10-NEXT:    v_max_f64 v[8:9], v[8:9], v[8:9]
2762; GFX10-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
2763; GFX10-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
2764; GFX10-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
2765; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[6:7]
2766; GFX10-NEXT:    v_max_f64 v[2:3], v[2:3], v[8:9]
2767; GFX10-NEXT:    v_max_f64 v[4:5], v[4:5], v[10:11]
2768; GFX10-NEXT:    s_setpc_b64 s[30:31]
2769;
2770; GFX11-LABEL: v_maximumnum_v3f64:
2771; GFX11:       ; %bb.0:
2772; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2773; GFX11-NEXT:    v_max_f64 v[6:7], v[6:7], v[6:7]
2774; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
2775; GFX11-NEXT:    v_max_f64 v[8:9], v[8:9], v[8:9]
2776; GFX11-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
2777; GFX11-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
2778; GFX11-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
2779; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[6:7]
2780; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
2781; GFX11-NEXT:    v_max_f64 v[2:3], v[2:3], v[8:9]
2782; GFX11-NEXT:    v_max_f64 v[4:5], v[4:5], v[10:11]
2783; GFX11-NEXT:    s_setpc_b64 s[30:31]
2784;
2785; GFX12-LABEL: v_maximumnum_v3f64:
2786; GFX12:       ; %bb.0:
2787; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2788; GFX12-NEXT:    s_wait_expcnt 0x0
2789; GFX12-NEXT:    s_wait_samplecnt 0x0
2790; GFX12-NEXT:    s_wait_bvhcnt 0x0
2791; GFX12-NEXT:    s_wait_kmcnt 0x0
2792; GFX12-NEXT:    v_max_num_f64_e32 v[6:7], v[6:7], v[6:7]
2793; GFX12-NEXT:    v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
2794; GFX12-NEXT:    v_max_num_f64_e32 v[8:9], v[8:9], v[8:9]
2795; GFX12-NEXT:    v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
2796; GFX12-NEXT:    v_max_num_f64_e32 v[10:11], v[10:11], v[10:11]
2797; GFX12-NEXT:    v_max_num_f64_e32 v[4:5], v[4:5], v[4:5]
2798; GFX12-NEXT:    v_max_num_f64_e32 v[0:1], v[0:1], v[6:7]
2799; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
2800; GFX12-NEXT:    v_max_num_f64_e32 v[2:3], v[2:3], v[8:9]
2801; GFX12-NEXT:    v_max_num_f64_e32 v[4:5], v[4:5], v[10:11]
2802; GFX12-NEXT:    s_setpc_b64 s[30:31]
2803  %result = call <3 x double> @llvm.maximumnum.v3f64(<3 x double> %x, <3 x double> %y)
2804  ret <3 x double> %result
2805}
2806
2807define <3 x double> @v_maximumnum_v3f64_nnan(<3 x double> %x, <3 x double> %y) {
2808; GFX8-LABEL: v_maximumnum_v3f64_nnan:
2809; GFX8:       ; %bb.0:
2810; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2811; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[6:7]
2812; GFX8-NEXT:    v_max_f64 v[2:3], v[2:3], v[8:9]
2813; GFX8-NEXT:    v_max_f64 v[4:5], v[4:5], v[10:11]
2814; GFX8-NEXT:    s_setpc_b64 s[30:31]
2815;
2816; GFX9-LABEL: v_maximumnum_v3f64_nnan:
2817; GFX9:       ; %bb.0:
2818; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2819; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[6:7]
2820; GFX9-NEXT:    v_max_f64 v[2:3], v[2:3], v[8:9]
2821; GFX9-NEXT:    v_max_f64 v[4:5], v[4:5], v[10:11]
2822; GFX9-NEXT:    s_setpc_b64 s[30:31]
2823;
2824; GFX10-LABEL: v_maximumnum_v3f64_nnan:
2825; GFX10:       ; %bb.0:
2826; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2827; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[6:7]
2828; GFX10-NEXT:    v_max_f64 v[2:3], v[2:3], v[8:9]
2829; GFX10-NEXT:    v_max_f64 v[4:5], v[4:5], v[10:11]
2830; GFX10-NEXT:    s_setpc_b64 s[30:31]
2831;
2832; GFX11-LABEL: v_maximumnum_v3f64_nnan:
2833; GFX11:       ; %bb.0:
2834; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2835; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[6:7]
2836; GFX11-NEXT:    v_max_f64 v[2:3], v[2:3], v[8:9]
2837; GFX11-NEXT:    v_max_f64 v[4:5], v[4:5], v[10:11]
2838; GFX11-NEXT:    s_setpc_b64 s[30:31]
2839;
2840; GFX12-LABEL: v_maximumnum_v3f64_nnan:
2841; GFX12:       ; %bb.0:
2842; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2843; GFX12-NEXT:    s_wait_expcnt 0x0
2844; GFX12-NEXT:    s_wait_samplecnt 0x0
2845; GFX12-NEXT:    s_wait_bvhcnt 0x0
2846; GFX12-NEXT:    s_wait_kmcnt 0x0
2847; GFX12-NEXT:    v_max_num_f64_e32 v[0:1], v[0:1], v[6:7]
2848; GFX12-NEXT:    v_max_num_f64_e32 v[2:3], v[2:3], v[8:9]
2849; GFX12-NEXT:    v_max_num_f64_e32 v[4:5], v[4:5], v[10:11]
2850; GFX12-NEXT:    s_setpc_b64 s[30:31]
2851  %result = call nnan <3 x double> @llvm.maximumnum.v3f64(<3 x double> %x, <3 x double> %y)
2852  ret <3 x double> %result
2853}
2854
2855define <4 x double> @v_maximumnum_v4f64(<4 x double> %x, <4 x double> %y) {
2856; GFX8-LABEL: v_maximumnum_v4f64:
2857; GFX8:       ; %bb.0:
2858; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2859; GFX8-NEXT:    v_max_f64 v[8:9], v[8:9], v[8:9]
2860; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
2861; GFX8-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
2862; GFX8-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
2863; GFX8-NEXT:    v_max_f64 v[12:13], v[12:13], v[12:13]
2864; GFX8-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
2865; GFX8-NEXT:    v_max_f64 v[14:15], v[14:15], v[14:15]
2866; GFX8-NEXT:    v_max_f64 v[6:7], v[6:7], v[6:7]
2867; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
2868; GFX8-NEXT:    v_max_f64 v[2:3], v[2:3], v[10:11]
2869; GFX8-NEXT:    v_max_f64 v[4:5], v[4:5], v[12:13]
2870; GFX8-NEXT:    v_max_f64 v[6:7], v[6:7], v[14:15]
2871; GFX8-NEXT:    s_setpc_b64 s[30:31]
2872;
2873; GFX9-LABEL: v_maximumnum_v4f64:
2874; GFX9:       ; %bb.0:
2875; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2876; GFX9-NEXT:    v_max_f64 v[8:9], v[8:9], v[8:9]
2877; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
2878; GFX9-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
2879; GFX9-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
2880; GFX9-NEXT:    v_max_f64 v[12:13], v[12:13], v[12:13]
2881; GFX9-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
2882; GFX9-NEXT:    v_max_f64 v[14:15], v[14:15], v[14:15]
2883; GFX9-NEXT:    v_max_f64 v[6:7], v[6:7], v[6:7]
2884; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
2885; GFX9-NEXT:    v_max_f64 v[2:3], v[2:3], v[10:11]
2886; GFX9-NEXT:    v_max_f64 v[4:5], v[4:5], v[12:13]
2887; GFX9-NEXT:    v_max_f64 v[6:7], v[6:7], v[14:15]
2888; GFX9-NEXT:    s_setpc_b64 s[30:31]
2889;
2890; GFX10-LABEL: v_maximumnum_v4f64:
2891; GFX10:       ; %bb.0:
2892; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2893; GFX10-NEXT:    v_max_f64 v[8:9], v[8:9], v[8:9]
2894; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
2895; GFX10-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
2896; GFX10-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
2897; GFX10-NEXT:    v_max_f64 v[12:13], v[12:13], v[12:13]
2898; GFX10-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
2899; GFX10-NEXT:    v_max_f64 v[14:15], v[14:15], v[14:15]
2900; GFX10-NEXT:    v_max_f64 v[6:7], v[6:7], v[6:7]
2901; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
2902; GFX10-NEXT:    v_max_f64 v[2:3], v[2:3], v[10:11]
2903; GFX10-NEXT:    v_max_f64 v[4:5], v[4:5], v[12:13]
2904; GFX10-NEXT:    v_max_f64 v[6:7], v[6:7], v[14:15]
2905; GFX10-NEXT:    s_setpc_b64 s[30:31]
2906;
2907; GFX11-LABEL: v_maximumnum_v4f64:
2908; GFX11:       ; %bb.0:
2909; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2910; GFX11-NEXT:    v_max_f64 v[8:9], v[8:9], v[8:9]
2911; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
2912; GFX11-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
2913; GFX11-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
2914; GFX11-NEXT:    v_max_f64 v[12:13], v[12:13], v[12:13]
2915; GFX11-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
2916; GFX11-NEXT:    v_max_f64 v[14:15], v[14:15], v[14:15]
2917; GFX11-NEXT:    v_max_f64 v[6:7], v[6:7], v[6:7]
2918; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
2919; GFX11-NEXT:    v_max_f64 v[2:3], v[2:3], v[10:11]
2920; GFX11-NEXT:    v_max_f64 v[4:5], v[4:5], v[12:13]
2921; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
2922; GFX11-NEXT:    v_max_f64 v[6:7], v[6:7], v[14:15]
2923; GFX11-NEXT:    s_setpc_b64 s[30:31]
2924;
2925; GFX12-LABEL: v_maximumnum_v4f64:
2926; GFX12:       ; %bb.0:
2927; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2928; GFX12-NEXT:    s_wait_expcnt 0x0
2929; GFX12-NEXT:    s_wait_samplecnt 0x0
2930; GFX12-NEXT:    s_wait_bvhcnt 0x0
2931; GFX12-NEXT:    s_wait_kmcnt 0x0
2932; GFX12-NEXT:    v_max_num_f64_e32 v[8:9], v[8:9], v[8:9]
2933; GFX12-NEXT:    v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
2934; GFX12-NEXT:    v_max_num_f64_e32 v[10:11], v[10:11], v[10:11]
2935; GFX12-NEXT:    v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
2936; GFX12-NEXT:    v_max_num_f64_e32 v[12:13], v[12:13], v[12:13]
2937; GFX12-NEXT:    v_max_num_f64_e32 v[4:5], v[4:5], v[4:5]
2938; GFX12-NEXT:    v_max_num_f64_e32 v[14:15], v[14:15], v[14:15]
2939; GFX12-NEXT:    v_max_num_f64_e32 v[6:7], v[6:7], v[6:7]
2940; GFX12-NEXT:    v_max_num_f64_e32 v[0:1], v[0:1], v[8:9]
2941; GFX12-NEXT:    v_max_num_f64_e32 v[2:3], v[2:3], v[10:11]
2942; GFX12-NEXT:    v_max_num_f64_e32 v[4:5], v[4:5], v[12:13]
2943; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_4)
2944; GFX12-NEXT:    v_max_num_f64_e32 v[6:7], v[6:7], v[14:15]
2945; GFX12-NEXT:    s_setpc_b64 s[30:31]
2946  %result = call <4 x double> @llvm.maximumnum.v4f64(<4 x double> %x, <4 x double> %y)
2947  ret <4 x double> %result
2948}
2949
2950define <4 x double> @v_maximumnum_v4f64_nnan(<4 x double> %x, <4 x double> %y) {
2951; GFX8-LABEL: v_maximumnum_v4f64_nnan:
2952; GFX8:       ; %bb.0:
2953; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2954; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
2955; GFX8-NEXT:    v_max_f64 v[2:3], v[2:3], v[10:11]
2956; GFX8-NEXT:    v_max_f64 v[4:5], v[4:5], v[12:13]
2957; GFX8-NEXT:    v_max_f64 v[6:7], v[6:7], v[14:15]
2958; GFX8-NEXT:    s_setpc_b64 s[30:31]
2959;
2960; GFX9-LABEL: v_maximumnum_v4f64_nnan:
2961; GFX9:       ; %bb.0:
2962; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2963; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
2964; GFX9-NEXT:    v_max_f64 v[2:3], v[2:3], v[10:11]
2965; GFX9-NEXT:    v_max_f64 v[4:5], v[4:5], v[12:13]
2966; GFX9-NEXT:    v_max_f64 v[6:7], v[6:7], v[14:15]
2967; GFX9-NEXT:    s_setpc_b64 s[30:31]
2968;
2969; GFX10-LABEL: v_maximumnum_v4f64_nnan:
2970; GFX10:       ; %bb.0:
2971; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2972; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
2973; GFX10-NEXT:    v_max_f64 v[2:3], v[2:3], v[10:11]
2974; GFX10-NEXT:    v_max_f64 v[4:5], v[4:5], v[12:13]
2975; GFX10-NEXT:    v_max_f64 v[6:7], v[6:7], v[14:15]
2976; GFX10-NEXT:    s_setpc_b64 s[30:31]
2977;
2978; GFX11-LABEL: v_maximumnum_v4f64_nnan:
2979; GFX11:       ; %bb.0:
2980; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2981; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
2982; GFX11-NEXT:    v_max_f64 v[2:3], v[2:3], v[10:11]
2983; GFX11-NEXT:    v_max_f64 v[4:5], v[4:5], v[12:13]
2984; GFX11-NEXT:    v_max_f64 v[6:7], v[6:7], v[14:15]
2985; GFX11-NEXT:    s_setpc_b64 s[30:31]
2986;
2987; GFX12-LABEL: v_maximumnum_v4f64_nnan:
2988; GFX12:       ; %bb.0:
2989; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2990; GFX12-NEXT:    s_wait_expcnt 0x0
2991; GFX12-NEXT:    s_wait_samplecnt 0x0
2992; GFX12-NEXT:    s_wait_bvhcnt 0x0
2993; GFX12-NEXT:    s_wait_kmcnt 0x0
2994; GFX12-NEXT:    v_max_num_f64_e32 v[0:1], v[0:1], v[8:9]
2995; GFX12-NEXT:    v_max_num_f64_e32 v[2:3], v[2:3], v[10:11]
2996; GFX12-NEXT:    v_max_num_f64_e32 v[4:5], v[4:5], v[12:13]
2997; GFX12-NEXT:    v_max_num_f64_e32 v[6:7], v[6:7], v[14:15]
2998; GFX12-NEXT:    s_setpc_b64 s[30:31]
2999  %result = call nnan <4 x double> @llvm.maximumnum.v4f64(<4 x double> %x, <4 x double> %y)
3000  ret <4 x double> %result
3001}
3002