xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll (revision 6206f5444fc0732e6495703c75a67f1f90f5b418)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
5; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s
6; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s
7; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
8; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
9; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
10
11define float @v_maximum_f32(float %src0, float %src1) {
12; GFX7-LABEL: v_maximum_f32:
13; GFX7:       ; %bb.0:
14; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15; GFX7-NEXT:    v_max_f32_e32 v2, v0, v1
16; GFX7-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
17; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
18; GFX7-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
19; GFX7-NEXT:    s_setpc_b64 s[30:31]
20;
21; GFX8-LABEL: v_maximum_f32:
22; GFX8:       ; %bb.0:
23; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
24; GFX8-NEXT:    v_max_f32_e32 v2, v0, v1
25; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
26; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
27; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
28; GFX8-NEXT:    s_setpc_b64 s[30:31]
29;
30; GFX900-LABEL: v_maximum_f32:
31; GFX900:       ; %bb.0:
32; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33; GFX900-NEXT:    v_max_f32_e32 v2, v0, v1
34; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
35; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
36; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
37; GFX900-NEXT:    s_setpc_b64 s[30:31]
38;
39; GFX950-LABEL: v_maximum_f32:
40; GFX950:       ; %bb.0:
41; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
42; GFX950-NEXT:    v_maximum3_f32 v0, v0, v1, v1
43; GFX950-NEXT:    s_setpc_b64 s[30:31]
44;
45; GFX10-LABEL: v_maximum_f32:
46; GFX10:       ; %bb.0:
47; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48; GFX10-NEXT:    v_max_f32_e32 v2, v0, v1
49; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
50; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
51; GFX10-NEXT:    s_setpc_b64 s[30:31]
52;
53; GFX11-LABEL: v_maximum_f32:
54; GFX11:       ; %bb.0:
55; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
56; GFX11-NEXT:    v_max_f32_e32 v2, v0, v1
57; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
58; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
59; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
60; GFX11-NEXT:    s_setpc_b64 s[30:31]
61;
62; GFX12-LABEL: v_maximum_f32:
63; GFX12:       ; %bb.0:
64; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
65; GFX12-NEXT:    s_wait_expcnt 0x0
66; GFX12-NEXT:    s_wait_samplecnt 0x0
67; GFX12-NEXT:    s_wait_bvhcnt 0x0
68; GFX12-NEXT:    s_wait_kmcnt 0x0
69; GFX12-NEXT:    v_maximum_f32 v0, v0, v1
70; GFX12-NEXT:    s_setpc_b64 s[30:31]
71  %op = call float @llvm.maximum.f32(float %src0, float %src1)
72  ret float %op
73}
74
75define float @v_maximum_f32__nnan(float %src0, float %src1) {
76; GFX7-LABEL: v_maximum_f32__nnan:
77; GFX7:       ; %bb.0:
78; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
79; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
80; GFX7-NEXT:    s_setpc_b64 s[30:31]
81;
82; GFX8-LABEL: v_maximum_f32__nnan:
83; GFX8:       ; %bb.0:
84; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
86; GFX8-NEXT:    s_setpc_b64 s[30:31]
87;
88; GFX900-LABEL: v_maximum_f32__nnan:
89; GFX900:       ; %bb.0:
90; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
91; GFX900-NEXT:    v_max_f32_e32 v0, v0, v1
92; GFX900-NEXT:    s_setpc_b64 s[30:31]
93;
94; GFX950-LABEL: v_maximum_f32__nnan:
95; GFX950:       ; %bb.0:
96; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
97; GFX950-NEXT:    v_maximum3_f32 v0, v0, v1, v1
98; GFX950-NEXT:    s_setpc_b64 s[30:31]
99;
100; GFX10-LABEL: v_maximum_f32__nnan:
101; GFX10:       ; %bb.0:
102; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
104; GFX10-NEXT:    s_setpc_b64 s[30:31]
105;
106; GFX11-LABEL: v_maximum_f32__nnan:
107; GFX11:       ; %bb.0:
108; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
109; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
110; GFX11-NEXT:    s_setpc_b64 s[30:31]
111;
112; GFX12-LABEL: v_maximum_f32__nnan:
113; GFX12:       ; %bb.0:
114; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
115; GFX12-NEXT:    s_wait_expcnt 0x0
116; GFX12-NEXT:    s_wait_samplecnt 0x0
117; GFX12-NEXT:    s_wait_bvhcnt 0x0
118; GFX12-NEXT:    s_wait_kmcnt 0x0
119; GFX12-NEXT:    v_maximum_f32 v0, v0, v1
120; GFX12-NEXT:    s_setpc_b64 s[30:31]
121  %op = call nnan float @llvm.maximum.f32(float %src0, float %src1)
122  ret float %op
123}
124
125define float @v_maximum_f32__nsz(float %src0, float %src1) {
126; GFX7-LABEL: v_maximum_f32__nsz:
127; GFX7:       ; %bb.0:
128; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
129; GFX7-NEXT:    v_max_f32_e32 v2, v0, v1
130; GFX7-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
131; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
132; GFX7-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
133; GFX7-NEXT:    s_setpc_b64 s[30:31]
134;
135; GFX8-LABEL: v_maximum_f32__nsz:
136; GFX8:       ; %bb.0:
137; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
138; GFX8-NEXT:    v_max_f32_e32 v2, v0, v1
139; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
140; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
141; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
142; GFX8-NEXT:    s_setpc_b64 s[30:31]
143;
144; GFX900-LABEL: v_maximum_f32__nsz:
145; GFX900:       ; %bb.0:
146; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
147; GFX900-NEXT:    v_max_f32_e32 v2, v0, v1
148; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
149; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
150; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
151; GFX900-NEXT:    s_setpc_b64 s[30:31]
152;
153; GFX950-LABEL: v_maximum_f32__nsz:
154; GFX950:       ; %bb.0:
155; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
156; GFX950-NEXT:    v_maximum3_f32 v0, v0, v1, v1
157; GFX950-NEXT:    s_setpc_b64 s[30:31]
158;
159; GFX10-LABEL: v_maximum_f32__nsz:
160; GFX10:       ; %bb.0:
161; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
162; GFX10-NEXT:    v_max_f32_e32 v2, v0, v1
163; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
164; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
165; GFX10-NEXT:    s_setpc_b64 s[30:31]
166;
167; GFX11-LABEL: v_maximum_f32__nsz:
168; GFX11:       ; %bb.0:
169; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
170; GFX11-NEXT:    v_max_f32_e32 v2, v0, v1
171; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
172; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
173; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
174; GFX11-NEXT:    s_setpc_b64 s[30:31]
175;
176; GFX12-LABEL: v_maximum_f32__nsz:
177; GFX12:       ; %bb.0:
178; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
179; GFX12-NEXT:    s_wait_expcnt 0x0
180; GFX12-NEXT:    s_wait_samplecnt 0x0
181; GFX12-NEXT:    s_wait_bvhcnt 0x0
182; GFX12-NEXT:    s_wait_kmcnt 0x0
183; GFX12-NEXT:    v_maximum_f32 v0, v0, v1
184; GFX12-NEXT:    s_setpc_b64 s[30:31]
185  %op = call nsz float @llvm.maximum.f32(float %src0, float %src1)
186  ret float %op
187}
188
189define float @v_maximum_f32__nnan_nsz(float %src0, float %src1) {
190; GFX7-LABEL: v_maximum_f32__nnan_nsz:
191; GFX7:       ; %bb.0:
192; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
193; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
194; GFX7-NEXT:    s_setpc_b64 s[30:31]
195;
196; GFX8-LABEL: v_maximum_f32__nnan_nsz:
197; GFX8:       ; %bb.0:
198; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
199; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
200; GFX8-NEXT:    s_setpc_b64 s[30:31]
201;
202; GFX900-LABEL: v_maximum_f32__nnan_nsz:
203; GFX900:       ; %bb.0:
204; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
205; GFX900-NEXT:    v_max_f32_e32 v0, v0, v1
206; GFX900-NEXT:    s_setpc_b64 s[30:31]
207;
208; GFX950-LABEL: v_maximum_f32__nnan_nsz:
209; GFX950:       ; %bb.0:
210; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
211; GFX950-NEXT:    v_maximum3_f32 v0, v0, v1, v1
212; GFX950-NEXT:    s_setpc_b64 s[30:31]
213;
214; GFX10-LABEL: v_maximum_f32__nnan_nsz:
215; GFX10:       ; %bb.0:
216; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
217; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
218; GFX10-NEXT:    s_setpc_b64 s[30:31]
219;
220; GFX11-LABEL: v_maximum_f32__nnan_nsz:
221; GFX11:       ; %bb.0:
222; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
223; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
224; GFX11-NEXT:    s_setpc_b64 s[30:31]
225;
226; GFX12-LABEL: v_maximum_f32__nnan_nsz:
227; GFX12:       ; %bb.0:
228; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
229; GFX12-NEXT:    s_wait_expcnt 0x0
230; GFX12-NEXT:    s_wait_samplecnt 0x0
231; GFX12-NEXT:    s_wait_bvhcnt 0x0
232; GFX12-NEXT:    s_wait_kmcnt 0x0
233; GFX12-NEXT:    v_maximum_f32 v0, v0, v1
234; GFX12-NEXT:    s_setpc_b64 s[30:31]
235  %op = call nnan nsz float @llvm.maximum.f32(float %src0, float %src1)
236  ret float %op
237}
238
239define float @v_maximum_f32__nnan_src0(float %arg0, float %src1) {
240; GFX7-LABEL: v_maximum_f32__nnan_src0:
241; GFX7:       ; %bb.0:
242; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
243; GFX7-NEXT:    v_add_f32_e32 v0, 1.0, v0
244; GFX7-NEXT:    v_max_f32_e32 v2, v0, v1
245; GFX7-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
246; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
247; GFX7-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
248; GFX7-NEXT:    s_setpc_b64 s[30:31]
249;
250; GFX8-LABEL: v_maximum_f32__nnan_src0:
251; GFX8:       ; %bb.0:
252; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
253; GFX8-NEXT:    v_add_f32_e32 v0, 1.0, v0
254; GFX8-NEXT:    v_max_f32_e32 v2, v0, v1
255; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
256; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
257; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
258; GFX8-NEXT:    s_setpc_b64 s[30:31]
259;
260; GFX900-LABEL: v_maximum_f32__nnan_src0:
261; GFX900:       ; %bb.0:
262; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
263; GFX900-NEXT:    v_add_f32_e32 v0, 1.0, v0
264; GFX900-NEXT:    v_max_f32_e32 v2, v0, v1
265; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
266; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
267; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
268; GFX900-NEXT:    s_setpc_b64 s[30:31]
269;
270; GFX950-LABEL: v_maximum_f32__nnan_src0:
271; GFX950:       ; %bb.0:
272; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
273; GFX950-NEXT:    v_add_f32_e32 v0, 1.0, v0
274; GFX950-NEXT:    v_maximum3_f32 v0, v0, v1, v1
275; GFX950-NEXT:    s_setpc_b64 s[30:31]
276;
277; GFX10-LABEL: v_maximum_f32__nnan_src0:
278; GFX10:       ; %bb.0:
279; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
280; GFX10-NEXT:    v_add_f32_e32 v0, 1.0, v0
281; GFX10-NEXT:    v_max_f32_e32 v2, v0, v1
282; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
283; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
284; GFX10-NEXT:    s_setpc_b64 s[30:31]
285;
286; GFX11-LABEL: v_maximum_f32__nnan_src0:
287; GFX11:       ; %bb.0:
288; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
289; GFX11-NEXT:    v_add_f32_e32 v0, 1.0, v0
290; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
291; GFX11-NEXT:    v_max_f32_e32 v2, v0, v1
292; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
293; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
294; GFX11-NEXT:    s_setpc_b64 s[30:31]
295;
296; GFX12-LABEL: v_maximum_f32__nnan_src0:
297; GFX12:       ; %bb.0:
298; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
299; GFX12-NEXT:    s_wait_expcnt 0x0
300; GFX12-NEXT:    s_wait_samplecnt 0x0
301; GFX12-NEXT:    s_wait_bvhcnt 0x0
302; GFX12-NEXT:    s_wait_kmcnt 0x0
303; GFX12-NEXT:    v_add_f32_e32 v0, 1.0, v0
304; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
305; GFX12-NEXT:    v_maximum_f32 v0, v0, v1
306; GFX12-NEXT:    s_setpc_b64 s[30:31]
307  %src0 = fadd nnan float %arg0, 1.0
308  %op = call float @llvm.maximum.f32(float %src0, float %src1)
309  ret float %op
310}
311
312define float @v_maximum_f32__nnan_src1(float %src0, float %arg1) {
313; GFX7-LABEL: v_maximum_f32__nnan_src1:
314; GFX7:       ; %bb.0:
315; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
316; GFX7-NEXT:    v_add_f32_e32 v1, 1.0, v1
317; GFX7-NEXT:    v_max_f32_e32 v2, v0, v1
318; GFX7-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
319; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
320; GFX7-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
321; GFX7-NEXT:    s_setpc_b64 s[30:31]
322;
323; GFX8-LABEL: v_maximum_f32__nnan_src1:
324; GFX8:       ; %bb.0:
325; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
326; GFX8-NEXT:    v_add_f32_e32 v1, 1.0, v1
327; GFX8-NEXT:    v_max_f32_e32 v2, v0, v1
328; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
329; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
330; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
331; GFX8-NEXT:    s_setpc_b64 s[30:31]
332;
333; GFX900-LABEL: v_maximum_f32__nnan_src1:
334; GFX900:       ; %bb.0:
335; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
336; GFX900-NEXT:    v_add_f32_e32 v1, 1.0, v1
337; GFX900-NEXT:    v_max_f32_e32 v2, v0, v1
338; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
339; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v1
340; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
341; GFX900-NEXT:    s_setpc_b64 s[30:31]
342;
343; GFX950-LABEL: v_maximum_f32__nnan_src1:
344; GFX950:       ; %bb.0:
345; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
346; GFX950-NEXT:    v_add_f32_e32 v1, 1.0, v1
347; GFX950-NEXT:    v_maximum3_f32 v0, v0, v1, v1
348; GFX950-NEXT:    s_setpc_b64 s[30:31]
349;
350; GFX10-LABEL: v_maximum_f32__nnan_src1:
351; GFX10:       ; %bb.0:
352; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
353; GFX10-NEXT:    v_add_f32_e32 v1, 1.0, v1
354; GFX10-NEXT:    v_max_f32_e32 v2, v0, v1
355; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
356; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
357; GFX10-NEXT:    s_setpc_b64 s[30:31]
358;
359; GFX11-LABEL: v_maximum_f32__nnan_src1:
360; GFX11:       ; %bb.0:
361; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
362; GFX11-NEXT:    v_add_f32_e32 v1, 1.0, v1
363; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
364; GFX11-NEXT:    v_max_f32_e32 v2, v0, v1
365; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v1
366; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
367; GFX11-NEXT:    s_setpc_b64 s[30:31]
368;
369; GFX12-LABEL: v_maximum_f32__nnan_src1:
370; GFX12:       ; %bb.0:
371; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
372; GFX12-NEXT:    s_wait_expcnt 0x0
373; GFX12-NEXT:    s_wait_samplecnt 0x0
374; GFX12-NEXT:    s_wait_bvhcnt 0x0
375; GFX12-NEXT:    s_wait_kmcnt 0x0
376; GFX12-NEXT:    v_add_f32_e32 v1, 1.0, v1
377; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
378; GFX12-NEXT:    v_maximum_f32 v0, v0, v1
379; GFX12-NEXT:    s_setpc_b64 s[30:31]
380  %src1 = fadd nnan float %arg1, 1.0
381  %op = call float @llvm.maximum.f32(float %src0, float %src1)
382  ret float %op
383}
384
385define void @s_maximum_f32(float inreg %src0, float inreg %src1) {
386; GFX7-LABEL: s_maximum_f32:
387; GFX7:       ; %bb.0:
388; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
389; GFX7-NEXT:    v_mov_b32_e32 v0, s17
390; GFX7-NEXT:    v_max_f32_e32 v1, s16, v0
391; GFX7-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
392; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, s16, v0
393; GFX7-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
394; GFX7-NEXT:    ;;#ASMSTART
395; GFX7-NEXT:    ; use v0
396; GFX7-NEXT:    ;;#ASMEND
397; GFX7-NEXT:    s_setpc_b64 s[30:31]
398;
399; GFX8-LABEL: s_maximum_f32:
400; GFX8:       ; %bb.0:
401; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
402; GFX8-NEXT:    v_mov_b32_e32 v0, s17
403; GFX8-NEXT:    v_max_f32_e32 v1, s16, v0
404; GFX8-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
405; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, s16, v0
406; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
407; GFX8-NEXT:    ;;#ASMSTART
408; GFX8-NEXT:    ; use v0
409; GFX8-NEXT:    ;;#ASMEND
410; GFX8-NEXT:    s_setpc_b64 s[30:31]
411;
412; GFX900-LABEL: s_maximum_f32:
413; GFX900:       ; %bb.0:
414; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
415; GFX900-NEXT:    v_mov_b32_e32 v0, s17
416; GFX900-NEXT:    v_max_f32_e32 v1, s16, v0
417; GFX900-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
418; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, s16, v0
419; GFX900-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
420; GFX900-NEXT:    ;;#ASMSTART
421; GFX900-NEXT:    ; use v0
422; GFX900-NEXT:    ;;#ASMEND
423; GFX900-NEXT:    s_setpc_b64 s[30:31]
424;
425; GFX950-LABEL: s_maximum_f32:
426; GFX950:       ; %bb.0:
427; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
428; GFX950-NEXT:    v_mov_b32_e32 v0, s0
429; GFX950-NEXT:    v_maximum3_f32 v0, v0, s1, s1
430; GFX950-NEXT:    ;;#ASMSTART
431; GFX950-NEXT:    ; use v0
432; GFX950-NEXT:    ;;#ASMEND
433; GFX950-NEXT:    s_setpc_b64 s[30:31]
434;
435; GFX10-LABEL: s_maximum_f32:
436; GFX10:       ; %bb.0:
437; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
438; GFX10-NEXT:    v_max_f32_e64 v0, s16, s17
439; GFX10-NEXT:    v_cmp_o_f32_e64 vcc_lo, s16, s17
440; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
441; GFX10-NEXT:    ;;#ASMSTART
442; GFX10-NEXT:    ; use v0
443; GFX10-NEXT:    ;;#ASMEND
444; GFX10-NEXT:    s_setpc_b64 s[30:31]
445;
446; GFX11-LABEL: s_maximum_f32:
447; GFX11:       ; %bb.0:
448; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
449; GFX11-NEXT:    v_max_f32_e64 v0, s0, s1
450; GFX11-NEXT:    v_cmp_o_f32_e64 vcc_lo, s0, s1
451; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
452; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
453; GFX11-NEXT:    ;;#ASMSTART
454; GFX11-NEXT:    ; use v0
455; GFX11-NEXT:    ;;#ASMEND
456; GFX11-NEXT:    s_setpc_b64 s[30:31]
457;
458; GFX12-LABEL: s_maximum_f32:
459; GFX12:       ; %bb.0:
460; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
461; GFX12-NEXT:    s_wait_expcnt 0x0
462; GFX12-NEXT:    s_wait_samplecnt 0x0
463; GFX12-NEXT:    s_wait_bvhcnt 0x0
464; GFX12-NEXT:    s_wait_kmcnt 0x0
465; GFX12-NEXT:    s_maximum_f32 s0, s0, s1
466; GFX12-NEXT:    ;;#ASMSTART
467; GFX12-NEXT:    ; use s0
468; GFX12-NEXT:    ;;#ASMEND
469; GFX12-NEXT:    s_wait_alu 0xfffe
470; GFX12-NEXT:    s_setpc_b64 s[30:31]
471  %op = call float @llvm.maximum.f32(float %src0, float %src1)
472  call void asm sideeffect "; use $0", "s"(float %op)
473  ret void
474}
475
476define <2 x float> @v_maximum_v2f32(<2 x float> %src0, <2 x float> %src1) {
477; GFX7-LABEL: v_maximum_v2f32:
478; GFX7:       ; %bb.0:
479; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
480; GFX7-NEXT:    v_max_f32_e32 v4, v0, v2
481; GFX7-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
482; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
483; GFX7-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
484; GFX7-NEXT:    v_max_f32_e32 v2, v1, v3
485; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
486; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
487; GFX7-NEXT:    s_setpc_b64 s[30:31]
488;
489; GFX8-LABEL: v_maximum_v2f32:
490; GFX8:       ; %bb.0:
491; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
492; GFX8-NEXT:    v_max_f32_e32 v4, v0, v2
493; GFX8-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
494; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
495; GFX8-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
496; GFX8-NEXT:    v_max_f32_e32 v2, v1, v3
497; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
498; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
499; GFX8-NEXT:    s_setpc_b64 s[30:31]
500;
501; GFX900-LABEL: v_maximum_v2f32:
502; GFX900:       ; %bb.0:
503; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
504; GFX900-NEXT:    v_max_f32_e32 v4, v0, v2
505; GFX900-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
506; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
507; GFX900-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
508; GFX900-NEXT:    v_max_f32_e32 v2, v1, v3
509; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
510; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
511; GFX900-NEXT:    s_setpc_b64 s[30:31]
512;
513; GFX950-LABEL: v_maximum_v2f32:
514; GFX950:       ; %bb.0:
515; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
516; GFX950-NEXT:    v_maximum3_f32 v0, v0, v2, v2
517; GFX950-NEXT:    v_maximum3_f32 v1, v1, v3, v3
518; GFX950-NEXT:    s_setpc_b64 s[30:31]
519;
520; GFX10-LABEL: v_maximum_v2f32:
521; GFX10:       ; %bb.0:
522; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
523; GFX10-NEXT:    v_max_f32_e32 v4, v0, v2
524; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v2
525; GFX10-NEXT:    v_max_f32_e32 v5, v1, v3
526; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
527; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v3
528; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
529; GFX10-NEXT:    s_setpc_b64 s[30:31]
530;
531; GFX11-LABEL: v_maximum_v2f32:
532; GFX11:       ; %bb.0:
533; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
534; GFX11-NEXT:    v_dual_max_f32 v4, v0, v2 :: v_dual_max_f32 v5, v1, v3
535; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v2
536; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
537; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
538; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v3
539; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
540; GFX11-NEXT:    s_setpc_b64 s[30:31]
541;
542; GFX12-LABEL: v_maximum_v2f32:
543; GFX12:       ; %bb.0:
544; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
545; GFX12-NEXT:    s_wait_expcnt 0x0
546; GFX12-NEXT:    s_wait_samplecnt 0x0
547; GFX12-NEXT:    s_wait_bvhcnt 0x0
548; GFX12-NEXT:    s_wait_kmcnt 0x0
549; GFX12-NEXT:    v_maximum_f32 v0, v0, v2
550; GFX12-NEXT:    v_maximum_f32 v1, v1, v3
551; GFX12-NEXT:    s_setpc_b64 s[30:31]
552  %op = call <2 x float> @llvm.maximum.v2f32(<2 x float> %src0, <2 x float> %src1)
553  ret <2 x float> %op
554}
555
556define <2 x float> @v_maximum_v2f32__nnan(<2 x float> %src0, <2 x float> %src1) {
557; GFX7-LABEL: v_maximum_v2f32__nnan:
558; GFX7:       ; %bb.0:
559; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
560; GFX7-NEXT:    v_max_f32_e32 v0, v0, v2
561; GFX7-NEXT:    v_max_f32_e32 v1, v1, v3
562; GFX7-NEXT:    s_setpc_b64 s[30:31]
563;
564; GFX8-LABEL: v_maximum_v2f32__nnan:
565; GFX8:       ; %bb.0:
566; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
567; GFX8-NEXT:    v_max_f32_e32 v0, v0, v2
568; GFX8-NEXT:    v_max_f32_e32 v1, v1, v3
569; GFX8-NEXT:    s_setpc_b64 s[30:31]
570;
571; GFX900-LABEL: v_maximum_v2f32__nnan:
572; GFX900:       ; %bb.0:
573; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
574; GFX900-NEXT:    v_max_f32_e32 v0, v0, v2
575; GFX900-NEXT:    v_max_f32_e32 v1, v1, v3
576; GFX900-NEXT:    s_setpc_b64 s[30:31]
577;
578; GFX950-LABEL: v_maximum_v2f32__nnan:
579; GFX950:       ; %bb.0:
580; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
581; GFX950-NEXT:    v_maximum3_f32 v0, v0, v2, v2
582; GFX950-NEXT:    v_maximum3_f32 v1, v1, v3, v3
583; GFX950-NEXT:    s_setpc_b64 s[30:31]
584;
585; GFX10-LABEL: v_maximum_v2f32__nnan:
586; GFX10:       ; %bb.0:
587; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
588; GFX10-NEXT:    v_max_f32_e32 v0, v0, v2
589; GFX10-NEXT:    v_max_f32_e32 v1, v1, v3
590; GFX10-NEXT:    s_setpc_b64 s[30:31]
591;
592; GFX11-LABEL: v_maximum_v2f32__nnan:
593; GFX11:       ; %bb.0:
594; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
595; GFX11-NEXT:    v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3
596; GFX11-NEXT:    s_setpc_b64 s[30:31]
597;
598; GFX12-LABEL: v_maximum_v2f32__nnan:
599; GFX12:       ; %bb.0:
600; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
601; GFX12-NEXT:    s_wait_expcnt 0x0
602; GFX12-NEXT:    s_wait_samplecnt 0x0
603; GFX12-NEXT:    s_wait_bvhcnt 0x0
604; GFX12-NEXT:    s_wait_kmcnt 0x0
605; GFX12-NEXT:    v_maximum_f32 v0, v0, v2
606; GFX12-NEXT:    v_maximum_f32 v1, v1, v3
607; GFX12-NEXT:    s_setpc_b64 s[30:31]
608  %op = call nnan <2 x float> @llvm.maximum.v2f32(<2 x float> %src0, <2 x float> %src1)
609  ret <2 x float> %op
610}
611
612define <2 x float> @v_maximum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
613; GFX7-LABEL: v_maximum_v2f32__nsz:
614; GFX7:       ; %bb.0:
615; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
616; GFX7-NEXT:    v_max_f32_e32 v4, v0, v2
617; GFX7-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
618; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
619; GFX7-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
620; GFX7-NEXT:    v_max_f32_e32 v2, v1, v3
621; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
622; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
623; GFX7-NEXT:    s_setpc_b64 s[30:31]
624;
625; GFX8-LABEL: v_maximum_v2f32__nsz:
626; GFX8:       ; %bb.0:
627; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
628; GFX8-NEXT:    v_max_f32_e32 v4, v0, v2
629; GFX8-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
630; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
631; GFX8-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
632; GFX8-NEXT:    v_max_f32_e32 v2, v1, v3
633; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
634; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
635; GFX8-NEXT:    s_setpc_b64 s[30:31]
636;
637; GFX900-LABEL: v_maximum_v2f32__nsz:
638; GFX900:       ; %bb.0:
639; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
640; GFX900-NEXT:    v_max_f32_e32 v4, v0, v2
641; GFX900-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
642; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v2
643; GFX900-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
644; GFX900-NEXT:    v_max_f32_e32 v2, v1, v3
645; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v1, v3
646; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
647; GFX900-NEXT:    s_setpc_b64 s[30:31]
648;
649; GFX950-LABEL: v_maximum_v2f32__nsz:
650; GFX950:       ; %bb.0:
651; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
652; GFX950-NEXT:    v_maximum3_f32 v0, v0, v2, v2
653; GFX950-NEXT:    v_maximum3_f32 v1, v1, v3, v3
654; GFX950-NEXT:    s_setpc_b64 s[30:31]
655;
656; GFX10-LABEL: v_maximum_v2f32__nsz:
657; GFX10:       ; %bb.0:
658; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
659; GFX10-NEXT:    v_max_f32_e32 v4, v0, v2
660; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v2
661; GFX10-NEXT:    v_max_f32_e32 v5, v1, v3
662; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
663; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v3
664; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
665; GFX10-NEXT:    s_setpc_b64 s[30:31]
666;
667; GFX11-LABEL: v_maximum_v2f32__nsz:
668; GFX11:       ; %bb.0:
669; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
670; GFX11-NEXT:    v_dual_max_f32 v4, v0, v2 :: v_dual_max_f32 v5, v1, v3
671; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v2
672; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
673; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo
674; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v3
675; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
676; GFX11-NEXT:    s_setpc_b64 s[30:31]
677;
678; GFX12-LABEL: v_maximum_v2f32__nsz:
679; GFX12:       ; %bb.0:
680; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
681; GFX12-NEXT:    s_wait_expcnt 0x0
682; GFX12-NEXT:    s_wait_samplecnt 0x0
683; GFX12-NEXT:    s_wait_bvhcnt 0x0
684; GFX12-NEXT:    s_wait_kmcnt 0x0
685; GFX12-NEXT:    v_maximum_f32 v0, v0, v2
686; GFX12-NEXT:    v_maximum_f32 v1, v1, v3
687; GFX12-NEXT:    s_setpc_b64 s[30:31]
688  %op = call nsz <2 x float> @llvm.maximum.v2f32(<2 x float> %src0, <2 x float> %src1)
689  ret <2 x float> %op
690}
691
692define <2 x float> @v_maximum_v2f32__nnan_nsz(<2 x float> %src0, <2 x float> %src1) {
693; GFX7-LABEL: v_maximum_v2f32__nnan_nsz:
694; GFX7:       ; %bb.0:
695; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
696; GFX7-NEXT:    v_max_f32_e32 v0, v0, v2
697; GFX7-NEXT:    v_max_f32_e32 v1, v1, v3
698; GFX7-NEXT:    s_setpc_b64 s[30:31]
699;
700; GFX8-LABEL: v_maximum_v2f32__nnan_nsz:
701; GFX8:       ; %bb.0:
702; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
703; GFX8-NEXT:    v_max_f32_e32 v0, v0, v2
704; GFX8-NEXT:    v_max_f32_e32 v1, v1, v3
705; GFX8-NEXT:    s_setpc_b64 s[30:31]
706;
707; GFX900-LABEL: v_maximum_v2f32__nnan_nsz:
708; GFX900:       ; %bb.0:
709; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
710; GFX900-NEXT:    v_max_f32_e32 v0, v0, v2
711; GFX900-NEXT:    v_max_f32_e32 v1, v1, v3
712; GFX900-NEXT:    s_setpc_b64 s[30:31]
713;
714; GFX950-LABEL: v_maximum_v2f32__nnan_nsz:
715; GFX950:       ; %bb.0:
716; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
717; GFX950-NEXT:    v_maximum3_f32 v0, v0, v2, v2
718; GFX950-NEXT:    v_maximum3_f32 v1, v1, v3, v3
719; GFX950-NEXT:    s_setpc_b64 s[30:31]
720;
721; GFX10-LABEL: v_maximum_v2f32__nnan_nsz:
722; GFX10:       ; %bb.0:
723; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
724; GFX10-NEXT:    v_max_f32_e32 v0, v0, v2
725; GFX10-NEXT:    v_max_f32_e32 v1, v1, v3
726; GFX10-NEXT:    s_setpc_b64 s[30:31]
727;
728; GFX11-LABEL: v_maximum_v2f32__nnan_nsz:
729; GFX11:       ; %bb.0:
730; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
731; GFX11-NEXT:    v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3
732; GFX11-NEXT:    s_setpc_b64 s[30:31]
733;
734; GFX12-LABEL: v_maximum_v2f32__nnan_nsz:
735; GFX12:       ; %bb.0:
736; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
737; GFX12-NEXT:    s_wait_expcnt 0x0
738; GFX12-NEXT:    s_wait_samplecnt 0x0
739; GFX12-NEXT:    s_wait_bvhcnt 0x0
740; GFX12-NEXT:    s_wait_kmcnt 0x0
741; GFX12-NEXT:    v_maximum_f32 v0, v0, v2
742; GFX12-NEXT:    v_maximum_f32 v1, v1, v3
743; GFX12-NEXT:    s_setpc_b64 s[30:31]
744  %op = call nnan nsz <2 x float> @llvm.maximum.v2f32(<2 x float> %src0, <2 x float> %src1)
745  ret <2 x float> %op
746}
747
748define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
749; GFX7-LABEL: s_maximum_v2f32:
750; GFX7:       ; %bb.0:
751; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
752; GFX7-NEXT:    v_mov_b32_e32 v0, s19
753; GFX7-NEXT:    v_max_f32_e32 v1, s17, v0
754; GFX7-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
755; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, s17, v0
756; GFX7-NEXT:    v_mov_b32_e32 v0, s18
757; GFX7-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
758; GFX7-NEXT:    v_max_f32_e32 v3, s16, v0
759; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, s16, v0
760; GFX7-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
761; GFX7-NEXT:    ;;#ASMSTART
762; GFX7-NEXT:    ; use v[0:1]
763; GFX7-NEXT:    ;;#ASMEND
764; GFX7-NEXT:    s_setpc_b64 s[30:31]
765;
766; GFX8-LABEL: s_maximum_v2f32:
767; GFX8:       ; %bb.0:
768; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
769; GFX8-NEXT:    v_mov_b32_e32 v0, s19
770; GFX8-NEXT:    v_max_f32_e32 v1, s17, v0
771; GFX8-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
772; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, s17, v0
773; GFX8-NEXT:    v_mov_b32_e32 v0, s18
774; GFX8-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
775; GFX8-NEXT:    v_max_f32_e32 v3, s16, v0
776; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, s16, v0
777; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
778; GFX8-NEXT:    ;;#ASMSTART
779; GFX8-NEXT:    ; use v[0:1]
780; GFX8-NEXT:    ;;#ASMEND
781; GFX8-NEXT:    s_setpc_b64 s[30:31]
782;
783; GFX900-LABEL: s_maximum_v2f32:
784; GFX900:       ; %bb.0:
785; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
786; GFX900-NEXT:    v_mov_b32_e32 v0, s19
787; GFX900-NEXT:    v_max_f32_e32 v1, s17, v0
788; GFX900-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
789; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, s17, v0
790; GFX900-NEXT:    v_mov_b32_e32 v0, s18
791; GFX900-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
792; GFX900-NEXT:    v_max_f32_e32 v3, s16, v0
793; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, s16, v0
794; GFX900-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
795; GFX900-NEXT:    ;;#ASMSTART
796; GFX900-NEXT:    ; use v[0:1]
797; GFX900-NEXT:    ;;#ASMEND
798; GFX900-NEXT:    s_setpc_b64 s[30:31]
799;
800; GFX950-LABEL: s_maximum_v2f32:
801; GFX950:       ; %bb.0:
802; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
803; GFX950-NEXT:    v_mov_b32_e32 v0, s1
804; GFX950-NEXT:    v_maximum3_f32 v1, v0, s3, s3
805; GFX950-NEXT:    v_mov_b32_e32 v0, s0
806; GFX950-NEXT:    v_maximum3_f32 v0, v0, s2, s2
807; GFX950-NEXT:    ;;#ASMSTART
808; GFX950-NEXT:    ; use v[0:1]
809; GFX950-NEXT:    ;;#ASMEND
810; GFX950-NEXT:    s_setpc_b64 s[30:31]
811;
812; GFX10-LABEL: s_maximum_v2f32:
813; GFX10:       ; %bb.0:
814; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
815; GFX10-NEXT:    v_max_f32_e64 v0, s17, s19
816; GFX10-NEXT:    v_cmp_o_f32_e64 vcc_lo, s17, s19
817; GFX10-NEXT:    v_max_f32_e64 v2, s16, s18
818; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v0, vcc_lo
819; GFX10-NEXT:    v_cmp_o_f32_e64 vcc_lo, s16, s18
820; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
821; GFX10-NEXT:    ;;#ASMSTART
822; GFX10-NEXT:    ; use v[0:1]
823; GFX10-NEXT:    ;;#ASMEND
824; GFX10-NEXT:    s_setpc_b64 s[30:31]
825;
826; GFX11-LABEL: s_maximum_v2f32:
827; GFX11:       ; %bb.0:
828; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
829; GFX11-NEXT:    v_max_f32_e64 v0, s1, s3
830; GFX11-NEXT:    v_cmp_o_f32_e64 vcc_lo, s1, s3
831; GFX11-NEXT:    v_max_f32_e64 v2, s0, s2
832; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
833; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v0, vcc_lo
834; GFX11-NEXT:    v_cmp_o_f32_e64 vcc_lo, s0, s2
835; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
836; GFX11-NEXT:    ;;#ASMSTART
837; GFX11-NEXT:    ; use v[0:1]
838; GFX11-NEXT:    ;;#ASMEND
839; GFX11-NEXT:    s_setpc_b64 s[30:31]
840;
841; GFX12-LABEL: s_maximum_v2f32:
842; GFX12:       ; %bb.0:
843; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
844; GFX12-NEXT:    s_wait_expcnt 0x0
845; GFX12-NEXT:    s_wait_samplecnt 0x0
846; GFX12-NEXT:    s_wait_bvhcnt 0x0
847; GFX12-NEXT:    s_wait_kmcnt 0x0
848; GFX12-NEXT:    s_maximum_f32 s1, s1, s3
849; GFX12-NEXT:    s_maximum_f32 s0, s0, s2
850; GFX12-NEXT:    ;;#ASMSTART
851; GFX12-NEXT:    ; use s[0:1]
852; GFX12-NEXT:    ;;#ASMEND
853; GFX12-NEXT:    s_wait_alu 0xfffe
854; GFX12-NEXT:    s_setpc_b64 s[30:31]
855  %op = call <2 x float> @llvm.maximum.v2f32(<2 x float> %src0, <2 x float> %src1)
856  call void asm sideeffect "; use $0", "s"(<2 x float> %op)
857  ret void
858}
859
860define <3 x float> @v_maximum_v3f32(<3 x float> %src0, <3 x float> %src1) {
861; GFX7-LABEL: v_maximum_v3f32:
862; GFX7:       ; %bb.0:
863; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
864; GFX7-NEXT:    v_max_f32_e32 v6, v0, v3
865; GFX7-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
866; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
867; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
868; GFX7-NEXT:    v_max_f32_e32 v3, v1, v4
869; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
870; GFX7-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
871; GFX7-NEXT:    v_max_f32_e32 v3, v2, v5
872; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
873; GFX7-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
874; GFX7-NEXT:    s_setpc_b64 s[30:31]
875;
876; GFX8-LABEL: v_maximum_v3f32:
877; GFX8:       ; %bb.0:
878; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
879; GFX8-NEXT:    v_max_f32_e32 v6, v0, v3
880; GFX8-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
881; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
882; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
883; GFX8-NEXT:    v_max_f32_e32 v3, v1, v4
884; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
885; GFX8-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
886; GFX8-NEXT:    v_max_f32_e32 v3, v2, v5
887; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
888; GFX8-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
889; GFX8-NEXT:    s_setpc_b64 s[30:31]
890;
891; GFX900-LABEL: v_maximum_v3f32:
892; GFX900:       ; %bb.0:
893; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
894; GFX900-NEXT:    v_max_f32_e32 v6, v0, v3
895; GFX900-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
896; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
897; GFX900-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
898; GFX900-NEXT:    v_max_f32_e32 v3, v1, v4
899; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
900; GFX900-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
901; GFX900-NEXT:    v_max_f32_e32 v3, v2, v5
902; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
903; GFX900-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
904; GFX900-NEXT:    s_setpc_b64 s[30:31]
905;
906; GFX950-LABEL: v_maximum_v3f32:
907; GFX950:       ; %bb.0:
908; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
909; GFX950-NEXT:    v_maximum3_f32 v0, v0, v3, v3
910; GFX950-NEXT:    v_maximum3_f32 v1, v1, v4, v4
911; GFX950-NEXT:    v_maximum3_f32 v2, v2, v5, v5
912; GFX950-NEXT:    s_setpc_b64 s[30:31]
913;
914; GFX10-LABEL: v_maximum_v3f32:
915; GFX10:       ; %bb.0:
916; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
917; GFX10-NEXT:    v_max_f32_e32 v6, v0, v3
918; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v3
919; GFX10-NEXT:    v_max_f32_e32 v7, v1, v4
920; GFX10-NEXT:    v_max_f32_e32 v8, v2, v5
921; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
922; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v4
923; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v7, vcc_lo
924; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v5
925; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
926; GFX10-NEXT:    s_setpc_b64 s[30:31]
927;
928; GFX11-LABEL: v_maximum_v3f32:
929; GFX11:       ; %bb.0:
930; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
931; GFX11-NEXT:    v_dual_max_f32 v6, v0, v3 :: v_dual_max_f32 v7, v1, v4
932; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v3
933; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
934; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
935; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v4
936; GFX11-NEXT:    v_dual_max_f32 v8, v2, v5 :: v_dual_cndmask_b32 v1, 0x7fc00000, v7
937; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v5
938; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
939; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
940; GFX11-NEXT:    s_setpc_b64 s[30:31]
941;
942; GFX12-LABEL: v_maximum_v3f32:
943; GFX12:       ; %bb.0:
944; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
945; GFX12-NEXT:    s_wait_expcnt 0x0
946; GFX12-NEXT:    s_wait_samplecnt 0x0
947; GFX12-NEXT:    s_wait_bvhcnt 0x0
948; GFX12-NEXT:    s_wait_kmcnt 0x0
949; GFX12-NEXT:    v_maximum_f32 v0, v0, v3
950; GFX12-NEXT:    v_maximum_f32 v1, v1, v4
951; GFX12-NEXT:    v_maximum_f32 v2, v2, v5
952; GFX12-NEXT:    s_setpc_b64 s[30:31]
953  %op = call <3 x float> @llvm.maximum.v3f32(<3 x float> %src0, <3 x float> %src1)
954  ret <3 x float> %op
955}
956
957define <3 x float> @v_maximum_v3f32__nnan(<3 x float> %src0, <3 x float> %src1) {
958; GFX7-LABEL: v_maximum_v3f32__nnan:
959; GFX7:       ; %bb.0:
960; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
961; GFX7-NEXT:    v_max_f32_e32 v0, v0, v3
962; GFX7-NEXT:    v_max_f32_e32 v1, v1, v4
963; GFX7-NEXT:    v_max_f32_e32 v2, v2, v5
964; GFX7-NEXT:    s_setpc_b64 s[30:31]
965;
966; GFX8-LABEL: v_maximum_v3f32__nnan:
967; GFX8:       ; %bb.0:
968; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
969; GFX8-NEXT:    v_max_f32_e32 v0, v0, v3
970; GFX8-NEXT:    v_max_f32_e32 v1, v1, v4
971; GFX8-NEXT:    v_max_f32_e32 v2, v2, v5
972; GFX8-NEXT:    s_setpc_b64 s[30:31]
973;
974; GFX900-LABEL: v_maximum_v3f32__nnan:
975; GFX900:       ; %bb.0:
976; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
977; GFX900-NEXT:    v_max_f32_e32 v0, v0, v3
978; GFX900-NEXT:    v_max_f32_e32 v1, v1, v4
979; GFX900-NEXT:    v_max_f32_e32 v2, v2, v5
980; GFX900-NEXT:    s_setpc_b64 s[30:31]
981;
982; GFX950-LABEL: v_maximum_v3f32__nnan:
983; GFX950:       ; %bb.0:
984; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
985; GFX950-NEXT:    v_maximum3_f32 v0, v0, v3, v3
986; GFX950-NEXT:    v_maximum3_f32 v1, v1, v4, v4
987; GFX950-NEXT:    v_maximum3_f32 v2, v2, v5, v5
988; GFX950-NEXT:    s_setpc_b64 s[30:31]
989;
990; GFX10-LABEL: v_maximum_v3f32__nnan:
991; GFX10:       ; %bb.0:
992; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
993; GFX10-NEXT:    v_max_f32_e32 v0, v0, v3
994; GFX10-NEXT:    v_max_f32_e32 v1, v1, v4
995; GFX10-NEXT:    v_max_f32_e32 v2, v2, v5
996; GFX10-NEXT:    s_setpc_b64 s[30:31]
997;
998; GFX11-LABEL: v_maximum_v3f32__nnan:
999; GFX11:       ; %bb.0:
1000; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1001; GFX11-NEXT:    v_dual_max_f32 v0, v0, v3 :: v_dual_max_f32 v1, v1, v4
1002; GFX11-NEXT:    v_max_f32_e32 v2, v2, v5
1003; GFX11-NEXT:    s_setpc_b64 s[30:31]
1004;
1005; GFX12-LABEL: v_maximum_v3f32__nnan:
1006; GFX12:       ; %bb.0:
1007; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1008; GFX12-NEXT:    s_wait_expcnt 0x0
1009; GFX12-NEXT:    s_wait_samplecnt 0x0
1010; GFX12-NEXT:    s_wait_bvhcnt 0x0
1011; GFX12-NEXT:    s_wait_kmcnt 0x0
1012; GFX12-NEXT:    v_maximum_f32 v0, v0, v3
1013; GFX12-NEXT:    v_maximum_f32 v1, v1, v4
1014; GFX12-NEXT:    v_maximum_f32 v2, v2, v5
1015; GFX12-NEXT:    s_setpc_b64 s[30:31]
1016  %op = call nnan <3 x float> @llvm.maximum.v3f32(<3 x float> %src0, <3 x float> %src1)
1017  ret <3 x float> %op
1018}
1019
1020define <3 x float> @v_maximum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
1021; GFX7-LABEL: v_maximum_v3f32__nsz:
1022; GFX7:       ; %bb.0:
1023; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1024; GFX7-NEXT:    v_max_f32_e32 v6, v0, v3
1025; GFX7-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
1026; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
1027; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
1028; GFX7-NEXT:    v_max_f32_e32 v3, v1, v4
1029; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
1030; GFX7-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
1031; GFX7-NEXT:    v_max_f32_e32 v3, v2, v5
1032; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
1033; GFX7-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
1034; GFX7-NEXT:    s_setpc_b64 s[30:31]
1035;
1036; GFX8-LABEL: v_maximum_v3f32__nsz:
1037; GFX8:       ; %bb.0:
1038; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1039; GFX8-NEXT:    v_max_f32_e32 v6, v0, v3
1040; GFX8-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
1041; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
1042; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
1043; GFX8-NEXT:    v_max_f32_e32 v3, v1, v4
1044; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
1045; GFX8-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
1046; GFX8-NEXT:    v_max_f32_e32 v3, v2, v5
1047; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
1048; GFX8-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
1049; GFX8-NEXT:    s_setpc_b64 s[30:31]
1050;
1051; GFX900-LABEL: v_maximum_v3f32__nsz:
1052; GFX900:       ; %bb.0:
1053; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1054; GFX900-NEXT:    v_max_f32_e32 v6, v0, v3
1055; GFX900-NEXT:    v_mov_b32_e32 v7, 0x7fc00000
1056; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v3
1057; GFX900-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
1058; GFX900-NEXT:    v_max_f32_e32 v3, v1, v4
1059; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v1, v4
1060; GFX900-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
1061; GFX900-NEXT:    v_max_f32_e32 v3, v2, v5
1062; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v2, v5
1063; GFX900-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
1064; GFX900-NEXT:    s_setpc_b64 s[30:31]
1065;
1066; GFX950-LABEL: v_maximum_v3f32__nsz:
1067; GFX950:       ; %bb.0:
1068; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1069; GFX950-NEXT:    v_maximum3_f32 v0, v0, v3, v3
1070; GFX950-NEXT:    v_maximum3_f32 v1, v1, v4, v4
1071; GFX950-NEXT:    v_maximum3_f32 v2, v2, v5, v5
1072; GFX950-NEXT:    s_setpc_b64 s[30:31]
1073;
1074; GFX10-LABEL: v_maximum_v3f32__nsz:
1075; GFX10:       ; %bb.0:
1076; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1077; GFX10-NEXT:    v_max_f32_e32 v6, v0, v3
1078; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v3
1079; GFX10-NEXT:    v_max_f32_e32 v7, v1, v4
1080; GFX10-NEXT:    v_max_f32_e32 v8, v2, v5
1081; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
1082; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v4
1083; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v7, vcc_lo
1084; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v5
1085; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
1086; GFX10-NEXT:    s_setpc_b64 s[30:31]
1087;
1088; GFX11-LABEL: v_maximum_v3f32__nsz:
1089; GFX11:       ; %bb.0:
1090; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1091; GFX11-NEXT:    v_dual_max_f32 v6, v0, v3 :: v_dual_max_f32 v7, v1, v4
1092; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v3
1093; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
1094; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo
1095; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v4
1096; GFX11-NEXT:    v_dual_max_f32 v8, v2, v5 :: v_dual_cndmask_b32 v1, 0x7fc00000, v7
1097; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v5
1098; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1099; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
1100; GFX11-NEXT:    s_setpc_b64 s[30:31]
1101;
1102; GFX12-LABEL: v_maximum_v3f32__nsz:
1103; GFX12:       ; %bb.0:
1104; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1105; GFX12-NEXT:    s_wait_expcnt 0x0
1106; GFX12-NEXT:    s_wait_samplecnt 0x0
1107; GFX12-NEXT:    s_wait_bvhcnt 0x0
1108; GFX12-NEXT:    s_wait_kmcnt 0x0
1109; GFX12-NEXT:    v_maximum_f32 v0, v0, v3
1110; GFX12-NEXT:    v_maximum_f32 v1, v1, v4
1111; GFX12-NEXT:    v_maximum_f32 v2, v2, v5
1112; GFX12-NEXT:    s_setpc_b64 s[30:31]
1113  %op = call nsz <3 x float> @llvm.maximum.v3f32(<3 x float> %src0, <3 x float> %src1)
1114  ret <3 x float> %op
1115}
1116
1117define <3 x float> @v_maximum_v3f32__nnan_nsz(<3 x float> %src0, <3 x float> %src1) {
1118; GFX7-LABEL: v_maximum_v3f32__nnan_nsz:
1119; GFX7:       ; %bb.0:
1120; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1121; GFX7-NEXT:    v_max_f32_e32 v0, v0, v3
1122; GFX7-NEXT:    v_max_f32_e32 v1, v1, v4
1123; GFX7-NEXT:    v_max_f32_e32 v2, v2, v5
1124; GFX7-NEXT:    s_setpc_b64 s[30:31]
1125;
1126; GFX8-LABEL: v_maximum_v3f32__nnan_nsz:
1127; GFX8:       ; %bb.0:
1128; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1129; GFX8-NEXT:    v_max_f32_e32 v0, v0, v3
1130; GFX8-NEXT:    v_max_f32_e32 v1, v1, v4
1131; GFX8-NEXT:    v_max_f32_e32 v2, v2, v5
1132; GFX8-NEXT:    s_setpc_b64 s[30:31]
1133;
1134; GFX900-LABEL: v_maximum_v3f32__nnan_nsz:
1135; GFX900:       ; %bb.0:
1136; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1137; GFX900-NEXT:    v_max_f32_e32 v0, v0, v3
1138; GFX900-NEXT:    v_max_f32_e32 v1, v1, v4
1139; GFX900-NEXT:    v_max_f32_e32 v2, v2, v5
1140; GFX900-NEXT:    s_setpc_b64 s[30:31]
1141;
1142; GFX950-LABEL: v_maximum_v3f32__nnan_nsz:
1143; GFX950:       ; %bb.0:
1144; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1145; GFX950-NEXT:    v_maximum3_f32 v0, v0, v3, v3
1146; GFX950-NEXT:    v_maximum3_f32 v1, v1, v4, v4
1147; GFX950-NEXT:    v_maximum3_f32 v2, v2, v5, v5
1148; GFX950-NEXT:    s_setpc_b64 s[30:31]
1149;
1150; GFX10-LABEL: v_maximum_v3f32__nnan_nsz:
1151; GFX10:       ; %bb.0:
1152; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1153; GFX10-NEXT:    v_max_f32_e32 v0, v0, v3
1154; GFX10-NEXT:    v_max_f32_e32 v1, v1, v4
1155; GFX10-NEXT:    v_max_f32_e32 v2, v2, v5
1156; GFX10-NEXT:    s_setpc_b64 s[30:31]
1157;
1158; GFX11-LABEL: v_maximum_v3f32__nnan_nsz:
1159; GFX11:       ; %bb.0:
1160; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1161; GFX11-NEXT:    v_dual_max_f32 v0, v0, v3 :: v_dual_max_f32 v1, v1, v4
1162; GFX11-NEXT:    v_max_f32_e32 v2, v2, v5
1163; GFX11-NEXT:    s_setpc_b64 s[30:31]
1164;
1165; GFX12-LABEL: v_maximum_v3f32__nnan_nsz:
1166; GFX12:       ; %bb.0:
1167; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1168; GFX12-NEXT:    s_wait_expcnt 0x0
1169; GFX12-NEXT:    s_wait_samplecnt 0x0
1170; GFX12-NEXT:    s_wait_bvhcnt 0x0
1171; GFX12-NEXT:    s_wait_kmcnt 0x0
1172; GFX12-NEXT:    v_maximum_f32 v0, v0, v3
1173; GFX12-NEXT:    v_maximum_f32 v1, v1, v4
1174; GFX12-NEXT:    v_maximum_f32 v2, v2, v5
1175; GFX12-NEXT:    s_setpc_b64 s[30:31]
1176  %op = call nnan nsz <3 x float> @llvm.maximum.v3f32(<3 x float> %src0, <3 x float> %src1)
1177  ret <3 x float> %op
1178}
1179
1180define <4 x float> @v_maximum_v4f32(<4 x float> %src0, <4 x float> %src1) {
1181; GFX7-LABEL: v_maximum_v4f32:
1182; GFX7:       ; %bb.0:
1183; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1184; GFX7-NEXT:    v_max_f32_e32 v8, v0, v4
1185; GFX7-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
1186; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
1187; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
1188; GFX7-NEXT:    v_max_f32_e32 v4, v1, v5
1189; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v5
1190; GFX7-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc
1191; GFX7-NEXT:    v_max_f32_e32 v4, v2, v6
1192; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v2, v6
1193; GFX7-NEXT:    v_cndmask_b32_e32 v2, v9, v4, vcc
1194; GFX7-NEXT:    v_max_f32_e32 v4, v3, v7
1195; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v3, v7
1196; GFX7-NEXT:    v_cndmask_b32_e32 v3, v9, v4, vcc
1197; GFX7-NEXT:    s_setpc_b64 s[30:31]
1198;
1199; GFX8-LABEL: v_maximum_v4f32:
1200; GFX8:       ; %bb.0:
1201; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1202; GFX8-NEXT:    v_max_f32_e32 v8, v0, v4
1203; GFX8-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
1204; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
1205; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
1206; GFX8-NEXT:    v_max_f32_e32 v4, v1, v5
1207; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v1, v5
1208; GFX8-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc
1209; GFX8-NEXT:    v_max_f32_e32 v4, v2, v6
1210; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v2, v6
1211; GFX8-NEXT:    v_cndmask_b32_e32 v2, v9, v4, vcc
1212; GFX8-NEXT:    v_max_f32_e32 v4, v3, v7
1213; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v3, v7
1214; GFX8-NEXT:    v_cndmask_b32_e32 v3, v9, v4, vcc
1215; GFX8-NEXT:    s_setpc_b64 s[30:31]
1216;
1217; GFX900-LABEL: v_maximum_v4f32:
1218; GFX900:       ; %bb.0:
1219; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1220; GFX900-NEXT:    v_max_f32_e32 v8, v0, v4
1221; GFX900-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
1222; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
1223; GFX900-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
1224; GFX900-NEXT:    v_max_f32_e32 v4, v1, v5
1225; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v1, v5
1226; GFX900-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc
1227; GFX900-NEXT:    v_max_f32_e32 v4, v2, v6
1228; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v2, v6
1229; GFX900-NEXT:    v_cndmask_b32_e32 v2, v9, v4, vcc
1230; GFX900-NEXT:    v_max_f32_e32 v4, v3, v7
1231; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v3, v7
1232; GFX900-NEXT:    v_cndmask_b32_e32 v3, v9, v4, vcc
1233; GFX900-NEXT:    s_setpc_b64 s[30:31]
1234;
1235; GFX950-LABEL: v_maximum_v4f32:
1236; GFX950:       ; %bb.0:
1237; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1238; GFX950-NEXT:    v_maximum3_f32 v0, v0, v4, v4
1239; GFX950-NEXT:    v_maximum3_f32 v1, v1, v5, v5
1240; GFX950-NEXT:    v_maximum3_f32 v2, v2, v6, v6
1241; GFX950-NEXT:    v_maximum3_f32 v3, v3, v7, v7
1242; GFX950-NEXT:    s_setpc_b64 s[30:31]
1243;
1244; GFX10-LABEL: v_maximum_v4f32:
1245; GFX10:       ; %bb.0:
1246; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1247; GFX10-NEXT:    v_max_f32_e32 v8, v0, v4
1248; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v4
1249; GFX10-NEXT:    v_max_f32_e32 v9, v1, v5
1250; GFX10-NEXT:    v_max_f32_e32 v4, v2, v6
1251; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
1252; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v5
1253; GFX10-NEXT:    v_max_f32_e32 v8, v3, v7
1254; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v9, vcc_lo
1255; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v6
1256; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
1257; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v3, v7
1258; GFX10-NEXT:    v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
1259; GFX10-NEXT:    s_setpc_b64 s[30:31]
1260;
1261; GFX11-LABEL: v_maximum_v4f32:
1262; GFX11:       ; %bb.0:
1263; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1264; GFX11-NEXT:    v_dual_max_f32 v8, v0, v4 :: v_dual_max_f32 v9, v1, v5
1265; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v4
1266; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_3)
1267; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
1268; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v5
1269; GFX11-NEXT:    v_max_f32_e32 v4, v2, v6
1270; GFX11-NEXT:    v_dual_max_f32 v8, v3, v7 :: v_dual_cndmask_b32 v1, 0x7fc00000, v9
1271; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v6
1272; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
1273; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v3, v7
1274; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
1275; GFX11-NEXT:    v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
1276; GFX11-NEXT:    s_setpc_b64 s[30:31]
1277;
1278; GFX12-LABEL: v_maximum_v4f32:
1279; GFX12:       ; %bb.0:
1280; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1281; GFX12-NEXT:    s_wait_expcnt 0x0
1282; GFX12-NEXT:    s_wait_samplecnt 0x0
1283; GFX12-NEXT:    s_wait_bvhcnt 0x0
1284; GFX12-NEXT:    s_wait_kmcnt 0x0
1285; GFX12-NEXT:    v_maximum_f32 v0, v0, v4
1286; GFX12-NEXT:    v_maximum_f32 v1, v1, v5
1287; GFX12-NEXT:    v_maximum_f32 v2, v2, v6
1288; GFX12-NEXT:    v_maximum_f32 v3, v3, v7
1289; GFX12-NEXT:    s_setpc_b64 s[30:31]
1290  %op = call <4 x float> @llvm.maximum.v4f32(<4 x float> %src0, <4 x float> %src1)
1291  ret <4 x float> %op
1292}
1293
1294define <4 x float> @v_maximum_v4f32__nnan(<4 x float> %src0, <4 x float> %src1) {
1295; GFX7-LABEL: v_maximum_v4f32__nnan:
1296; GFX7:       ; %bb.0:
1297; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1298; GFX7-NEXT:    v_max_f32_e32 v0, v0, v4
1299; GFX7-NEXT:    v_max_f32_e32 v1, v1, v5
1300; GFX7-NEXT:    v_max_f32_e32 v2, v2, v6
1301; GFX7-NEXT:    v_max_f32_e32 v3, v3, v7
1302; GFX7-NEXT:    s_setpc_b64 s[30:31]
1303;
1304; GFX8-LABEL: v_maximum_v4f32__nnan:
1305; GFX8:       ; %bb.0:
1306; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1307; GFX8-NEXT:    v_max_f32_e32 v0, v0, v4
1308; GFX8-NEXT:    v_max_f32_e32 v1, v1, v5
1309; GFX8-NEXT:    v_max_f32_e32 v2, v2, v6
1310; GFX8-NEXT:    v_max_f32_e32 v3, v3, v7
1311; GFX8-NEXT:    s_setpc_b64 s[30:31]
1312;
1313; GFX900-LABEL: v_maximum_v4f32__nnan:
1314; GFX900:       ; %bb.0:
1315; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1316; GFX900-NEXT:    v_max_f32_e32 v0, v0, v4
1317; GFX900-NEXT:    v_max_f32_e32 v1, v1, v5
1318; GFX900-NEXT:    v_max_f32_e32 v2, v2, v6
1319; GFX900-NEXT:    v_max_f32_e32 v3, v3, v7
1320; GFX900-NEXT:    s_setpc_b64 s[30:31]
1321;
1322; GFX950-LABEL: v_maximum_v4f32__nnan:
1323; GFX950:       ; %bb.0:
1324; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1325; GFX950-NEXT:    v_maximum3_f32 v0, v0, v4, v4
1326; GFX950-NEXT:    v_maximum3_f32 v1, v1, v5, v5
1327; GFX950-NEXT:    v_maximum3_f32 v2, v2, v6, v6
1328; GFX950-NEXT:    v_maximum3_f32 v3, v3, v7, v7
1329; GFX950-NEXT:    s_setpc_b64 s[30:31]
1330;
1331; GFX10-LABEL: v_maximum_v4f32__nnan:
1332; GFX10:       ; %bb.0:
1333; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1334; GFX10-NEXT:    v_max_f32_e32 v0, v0, v4
1335; GFX10-NEXT:    v_max_f32_e32 v1, v1, v5
1336; GFX10-NEXT:    v_max_f32_e32 v2, v2, v6
1337; GFX10-NEXT:    v_max_f32_e32 v3, v3, v7
1338; GFX10-NEXT:    s_setpc_b64 s[30:31]
1339;
1340; GFX11-LABEL: v_maximum_v4f32__nnan:
1341; GFX11:       ; %bb.0:
1342; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1343; GFX11-NEXT:    v_dual_max_f32 v0, v0, v4 :: v_dual_max_f32 v1, v1, v5
1344; GFX11-NEXT:    v_dual_max_f32 v2, v2, v6 :: v_dual_max_f32 v3, v3, v7
1345; GFX11-NEXT:    s_setpc_b64 s[30:31]
1346;
1347; GFX12-LABEL: v_maximum_v4f32__nnan:
1348; GFX12:       ; %bb.0:
1349; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1350; GFX12-NEXT:    s_wait_expcnt 0x0
1351; GFX12-NEXT:    s_wait_samplecnt 0x0
1352; GFX12-NEXT:    s_wait_bvhcnt 0x0
1353; GFX12-NEXT:    s_wait_kmcnt 0x0
1354; GFX12-NEXT:    v_maximum_f32 v0, v0, v4
1355; GFX12-NEXT:    v_maximum_f32 v1, v1, v5
1356; GFX12-NEXT:    v_maximum_f32 v2, v2, v6
1357; GFX12-NEXT:    v_maximum_f32 v3, v3, v7
1358; GFX12-NEXT:    s_setpc_b64 s[30:31]
1359  %op = call nnan <4 x float> @llvm.maximum.v4f32(<4 x float> %src0, <4 x float> %src1)
1360  ret <4 x float> %op
1361}
1362
1363define <4 x float> @v_maximum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
1364; GFX7-LABEL: v_maximum_v4f32__nsz:
1365; GFX7:       ; %bb.0:
1366; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1367; GFX7-NEXT:    v_max_f32_e32 v8, v0, v4
1368; GFX7-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
1369; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
1370; GFX7-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
1371; GFX7-NEXT:    v_max_f32_e32 v4, v1, v5
1372; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v5
1373; GFX7-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc
1374; GFX7-NEXT:    v_max_f32_e32 v4, v2, v6
1375; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v2, v6
1376; GFX7-NEXT:    v_cndmask_b32_e32 v2, v9, v4, vcc
1377; GFX7-NEXT:    v_max_f32_e32 v4, v3, v7
1378; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v3, v7
1379; GFX7-NEXT:    v_cndmask_b32_e32 v3, v9, v4, vcc
1380; GFX7-NEXT:    s_setpc_b64 s[30:31]
1381;
1382; GFX8-LABEL: v_maximum_v4f32__nsz:
1383; GFX8:       ; %bb.0:
1384; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1385; GFX8-NEXT:    v_max_f32_e32 v8, v0, v4
1386; GFX8-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
1387; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
1388; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
1389; GFX8-NEXT:    v_max_f32_e32 v4, v1, v5
1390; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v1, v5
1391; GFX8-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc
1392; GFX8-NEXT:    v_max_f32_e32 v4, v2, v6
1393; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v2, v6
1394; GFX8-NEXT:    v_cndmask_b32_e32 v2, v9, v4, vcc
1395; GFX8-NEXT:    v_max_f32_e32 v4, v3, v7
1396; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v3, v7
1397; GFX8-NEXT:    v_cndmask_b32_e32 v3, v9, v4, vcc
1398; GFX8-NEXT:    s_setpc_b64 s[30:31]
1399;
1400; GFX900-LABEL: v_maximum_v4f32__nsz:
1401; GFX900:       ; %bb.0:
1402; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1403; GFX900-NEXT:    v_max_f32_e32 v8, v0, v4
1404; GFX900-NEXT:    v_mov_b32_e32 v9, 0x7fc00000
1405; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v4
1406; GFX900-NEXT:    v_cndmask_b32_e32 v0, v9, v8, vcc
1407; GFX900-NEXT:    v_max_f32_e32 v4, v1, v5
1408; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v1, v5
1409; GFX900-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc
1410; GFX900-NEXT:    v_max_f32_e32 v4, v2, v6
1411; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v2, v6
1412; GFX900-NEXT:    v_cndmask_b32_e32 v2, v9, v4, vcc
1413; GFX900-NEXT:    v_max_f32_e32 v4, v3, v7
1414; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v3, v7
1415; GFX900-NEXT:    v_cndmask_b32_e32 v3, v9, v4, vcc
1416; GFX900-NEXT:    s_setpc_b64 s[30:31]
1417;
1418; GFX950-LABEL: v_maximum_v4f32__nsz:
1419; GFX950:       ; %bb.0:
1420; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1421; GFX950-NEXT:    v_maximum3_f32 v0, v0, v4, v4
1422; GFX950-NEXT:    v_maximum3_f32 v1, v1, v5, v5
1423; GFX950-NEXT:    v_maximum3_f32 v2, v2, v6, v6
1424; GFX950-NEXT:    v_maximum3_f32 v3, v3, v7, v7
1425; GFX950-NEXT:    s_setpc_b64 s[30:31]
1426;
1427; GFX10-LABEL: v_maximum_v4f32__nsz:
1428; GFX10:       ; %bb.0:
1429; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1430; GFX10-NEXT:    v_max_f32_e32 v8, v0, v4
1431; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v4
1432; GFX10-NEXT:    v_max_f32_e32 v9, v1, v5
1433; GFX10-NEXT:    v_max_f32_e32 v4, v2, v6
1434; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
1435; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v5
1436; GFX10-NEXT:    v_max_f32_e32 v8, v3, v7
1437; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v9, vcc_lo
1438; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v6
1439; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
1440; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v3, v7
1441; GFX10-NEXT:    v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
1442; GFX10-NEXT:    s_setpc_b64 s[30:31]
1443;
1444; GFX11-LABEL: v_maximum_v4f32__nsz:
1445; GFX11:       ; %bb.0:
1446; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1447; GFX11-NEXT:    v_dual_max_f32 v8, v0, v4 :: v_dual_max_f32 v9, v1, v5
1448; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v4
1449; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_3)
1450; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo
1451; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v5
1452; GFX11-NEXT:    v_max_f32_e32 v4, v2, v6
1453; GFX11-NEXT:    v_dual_max_f32 v8, v3, v7 :: v_dual_cndmask_b32 v1, 0x7fc00000, v9
1454; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v6
1455; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo
1456; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v3, v7
1457; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
1458; GFX11-NEXT:    v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
1459; GFX11-NEXT:    s_setpc_b64 s[30:31]
1460;
1461; GFX12-LABEL: v_maximum_v4f32__nsz:
1462; GFX12:       ; %bb.0:
1463; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1464; GFX12-NEXT:    s_wait_expcnt 0x0
1465; GFX12-NEXT:    s_wait_samplecnt 0x0
1466; GFX12-NEXT:    s_wait_bvhcnt 0x0
1467; GFX12-NEXT:    s_wait_kmcnt 0x0
1468; GFX12-NEXT:    v_maximum_f32 v0, v0, v4
1469; GFX12-NEXT:    v_maximum_f32 v1, v1, v5
1470; GFX12-NEXT:    v_maximum_f32 v2, v2, v6
1471; GFX12-NEXT:    v_maximum_f32 v3, v3, v7
1472; GFX12-NEXT:    s_setpc_b64 s[30:31]
1473  %op = call nsz <4 x float> @llvm.maximum.v4f32(<4 x float> %src0, <4 x float> %src1)
1474  ret <4 x float> %op
1475}
1476
1477define <4 x float> @v_maximum_v4f32__nnan_nsz(<4 x float> %src0, <4 x float> %src1) {
1478; GFX7-LABEL: v_maximum_v4f32__nnan_nsz:
1479; GFX7:       ; %bb.0:
1480; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1481; GFX7-NEXT:    v_max_f32_e32 v0, v0, v4
1482; GFX7-NEXT:    v_max_f32_e32 v1, v1, v5
1483; GFX7-NEXT:    v_max_f32_e32 v2, v2, v6
1484; GFX7-NEXT:    v_max_f32_e32 v3, v3, v7
1485; GFX7-NEXT:    s_setpc_b64 s[30:31]
1486;
1487; GFX8-LABEL: v_maximum_v4f32__nnan_nsz:
1488; GFX8:       ; %bb.0:
1489; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1490; GFX8-NEXT:    v_max_f32_e32 v0, v0, v4
1491; GFX8-NEXT:    v_max_f32_e32 v1, v1, v5
1492; GFX8-NEXT:    v_max_f32_e32 v2, v2, v6
1493; GFX8-NEXT:    v_max_f32_e32 v3, v3, v7
1494; GFX8-NEXT:    s_setpc_b64 s[30:31]
1495;
1496; GFX900-LABEL: v_maximum_v4f32__nnan_nsz:
1497; GFX900:       ; %bb.0:
1498; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1499; GFX900-NEXT:    v_max_f32_e32 v0, v0, v4
1500; GFX900-NEXT:    v_max_f32_e32 v1, v1, v5
1501; GFX900-NEXT:    v_max_f32_e32 v2, v2, v6
1502; GFX900-NEXT:    v_max_f32_e32 v3, v3, v7
1503; GFX900-NEXT:    s_setpc_b64 s[30:31]
1504;
1505; GFX950-LABEL: v_maximum_v4f32__nnan_nsz:
1506; GFX950:       ; %bb.0:
1507; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1508; GFX950-NEXT:    v_maximum3_f32 v0, v0, v4, v4
1509; GFX950-NEXT:    v_maximum3_f32 v1, v1, v5, v5
1510; GFX950-NEXT:    v_maximum3_f32 v2, v2, v6, v6
1511; GFX950-NEXT:    v_maximum3_f32 v3, v3, v7, v7
1512; GFX950-NEXT:    s_setpc_b64 s[30:31]
1513;
1514; GFX10-LABEL: v_maximum_v4f32__nnan_nsz:
1515; GFX10:       ; %bb.0:
1516; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1517; GFX10-NEXT:    v_max_f32_e32 v0, v0, v4
1518; GFX10-NEXT:    v_max_f32_e32 v1, v1, v5
1519; GFX10-NEXT:    v_max_f32_e32 v2, v2, v6
1520; GFX10-NEXT:    v_max_f32_e32 v3, v3, v7
1521; GFX10-NEXT:    s_setpc_b64 s[30:31]
1522;
1523; GFX11-LABEL: v_maximum_v4f32__nnan_nsz:
1524; GFX11:       ; %bb.0:
1525; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1526; GFX11-NEXT:    v_dual_max_f32 v0, v0, v4 :: v_dual_max_f32 v1, v1, v5
1527; GFX11-NEXT:    v_dual_max_f32 v2, v2, v6 :: v_dual_max_f32 v3, v3, v7
1528; GFX11-NEXT:    s_setpc_b64 s[30:31]
1529;
1530; GFX12-LABEL: v_maximum_v4f32__nnan_nsz:
1531; GFX12:       ; %bb.0:
1532; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1533; GFX12-NEXT:    s_wait_expcnt 0x0
1534; GFX12-NEXT:    s_wait_samplecnt 0x0
1535; GFX12-NEXT:    s_wait_bvhcnt 0x0
1536; GFX12-NEXT:    s_wait_kmcnt 0x0
1537; GFX12-NEXT:    v_maximum_f32 v0, v0, v4
1538; GFX12-NEXT:    v_maximum_f32 v1, v1, v5
1539; GFX12-NEXT:    v_maximum_f32 v2, v2, v6
1540; GFX12-NEXT:    v_maximum_f32 v3, v3, v7
1541; GFX12-NEXT:    s_setpc_b64 s[30:31]
1542  %op = call nnan nsz <4 x float> @llvm.maximum.v4f32(<4 x float> %src0, <4 x float> %src1)
1543  ret <4 x float> %op
1544}
1545
1546define <8 x float> @v_maximum_v8f32(<8 x float> %src0, <8 x float> %src1) {
1547; GFX7-LABEL: v_maximum_v8f32:
1548; GFX7:       ; %bb.0:
1549; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1550; GFX7-NEXT:    v_max_f32_e32 v16, v0, v8
1551; GFX7-NEXT:    v_mov_b32_e32 v17, 0x7fc00000
1552; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v8
1553; GFX7-NEXT:    v_cndmask_b32_e32 v0, v17, v16, vcc
1554; GFX7-NEXT:    v_max_f32_e32 v8, v1, v9
1555; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v9
1556; GFX7-NEXT:    v_cndmask_b32_e32 v1, v17, v8, vcc
1557; GFX7-NEXT:    v_max_f32_e32 v8, v2, v10
1558; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v2, v10
1559; GFX7-NEXT:    v_cndmask_b32_e32 v2, v17, v8, vcc
1560; GFX7-NEXT:    v_max_f32_e32 v8, v3, v11
1561; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v3, v11
1562; GFX7-NEXT:    v_cndmask_b32_e32 v3, v17, v8, vcc
1563; GFX7-NEXT:    v_max_f32_e32 v8, v4, v12
1564; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v4, v12
1565; GFX7-NEXT:    v_cndmask_b32_e32 v4, v17, v8, vcc
1566; GFX7-NEXT:    v_max_f32_e32 v8, v5, v13
1567; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v5, v13
1568; GFX7-NEXT:    v_cndmask_b32_e32 v5, v17, v8, vcc
1569; GFX7-NEXT:    v_max_f32_e32 v8, v6, v14
1570; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v6, v14
1571; GFX7-NEXT:    v_cndmask_b32_e32 v6, v17, v8, vcc
1572; GFX7-NEXT:    v_max_f32_e32 v8, v7, v15
1573; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v7, v15
1574; GFX7-NEXT:    v_cndmask_b32_e32 v7, v17, v8, vcc
1575; GFX7-NEXT:    s_setpc_b64 s[30:31]
1576;
1577; GFX8-LABEL: v_maximum_v8f32:
1578; GFX8:       ; %bb.0:
1579; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1580; GFX8-NEXT:    v_max_f32_e32 v16, v0, v8
1581; GFX8-NEXT:    v_mov_b32_e32 v17, 0x7fc00000
1582; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v8
1583; GFX8-NEXT:    v_cndmask_b32_e32 v0, v17, v16, vcc
1584; GFX8-NEXT:    v_max_f32_e32 v8, v1, v9
1585; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v1, v9
1586; GFX8-NEXT:    v_cndmask_b32_e32 v1, v17, v8, vcc
1587; GFX8-NEXT:    v_max_f32_e32 v8, v2, v10
1588; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v2, v10
1589; GFX8-NEXT:    v_cndmask_b32_e32 v2, v17, v8, vcc
1590; GFX8-NEXT:    v_max_f32_e32 v8, v3, v11
1591; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v3, v11
1592; GFX8-NEXT:    v_cndmask_b32_e32 v3, v17, v8, vcc
1593; GFX8-NEXT:    v_max_f32_e32 v8, v4, v12
1594; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v4, v12
1595; GFX8-NEXT:    v_cndmask_b32_e32 v4, v17, v8, vcc
1596; GFX8-NEXT:    v_max_f32_e32 v8, v5, v13
1597; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v5, v13
1598; GFX8-NEXT:    v_cndmask_b32_e32 v5, v17, v8, vcc
1599; GFX8-NEXT:    v_max_f32_e32 v8, v6, v14
1600; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v6, v14
1601; GFX8-NEXT:    v_cndmask_b32_e32 v6, v17, v8, vcc
1602; GFX8-NEXT:    v_max_f32_e32 v8, v7, v15
1603; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v7, v15
1604; GFX8-NEXT:    v_cndmask_b32_e32 v7, v17, v8, vcc
1605; GFX8-NEXT:    s_setpc_b64 s[30:31]
1606;
1607; GFX900-LABEL: v_maximum_v8f32:
1608; GFX900:       ; %bb.0:
1609; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1610; GFX900-NEXT:    v_max_f32_e32 v16, v0, v8
1611; GFX900-NEXT:    v_mov_b32_e32 v17, 0x7fc00000
1612; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v0, v8
1613; GFX900-NEXT:    v_cndmask_b32_e32 v0, v17, v16, vcc
1614; GFX900-NEXT:    v_max_f32_e32 v8, v1, v9
1615; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v1, v9
1616; GFX900-NEXT:    v_cndmask_b32_e32 v1, v17, v8, vcc
1617; GFX900-NEXT:    v_max_f32_e32 v8, v2, v10
1618; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v2, v10
1619; GFX900-NEXT:    v_cndmask_b32_e32 v2, v17, v8, vcc
1620; GFX900-NEXT:    v_max_f32_e32 v8, v3, v11
1621; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v3, v11
1622; GFX900-NEXT:    v_cndmask_b32_e32 v3, v17, v8, vcc
1623; GFX900-NEXT:    v_max_f32_e32 v8, v4, v12
1624; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v4, v12
1625; GFX900-NEXT:    v_cndmask_b32_e32 v4, v17, v8, vcc
1626; GFX900-NEXT:    v_max_f32_e32 v8, v5, v13
1627; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v5, v13
1628; GFX900-NEXT:    v_cndmask_b32_e32 v5, v17, v8, vcc
1629; GFX900-NEXT:    v_max_f32_e32 v8, v6, v14
1630; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v6, v14
1631; GFX900-NEXT:    v_cndmask_b32_e32 v6, v17, v8, vcc
1632; GFX900-NEXT:    v_max_f32_e32 v8, v7, v15
1633; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v7, v15
1634; GFX900-NEXT:    v_cndmask_b32_e32 v7, v17, v8, vcc
1635; GFX900-NEXT:    s_setpc_b64 s[30:31]
1636;
1637; GFX950-LABEL: v_maximum_v8f32:
1638; GFX950:       ; %bb.0:
1639; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1640; GFX950-NEXT:    v_maximum3_f32 v0, v0, v8, v8
1641; GFX950-NEXT:    v_maximum3_f32 v1, v1, v9, v9
1642; GFX950-NEXT:    v_maximum3_f32 v2, v2, v10, v10
1643; GFX950-NEXT:    v_maximum3_f32 v3, v3, v11, v11
1644; GFX950-NEXT:    v_maximum3_f32 v4, v4, v12, v12
1645; GFX950-NEXT:    v_maximum3_f32 v5, v5, v13, v13
1646; GFX950-NEXT:    v_maximum3_f32 v6, v6, v14, v14
1647; GFX950-NEXT:    v_maximum3_f32 v7, v7, v15, v15
1648; GFX950-NEXT:    s_setpc_b64 s[30:31]
1649;
1650; GFX10-LABEL: v_maximum_v8f32:
1651; GFX10:       ; %bb.0:
1652; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1653; GFX10-NEXT:    v_max_f32_e32 v16, v0, v8
1654; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v8
1655; GFX10-NEXT:    v_max_f32_e32 v17, v1, v9
1656; GFX10-NEXT:    v_max_f32_e32 v8, v2, v10
1657; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v16, vcc_lo
1658; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v9
1659; GFX10-NEXT:    v_max_f32_e32 v9, v3, v11
1660; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v17, vcc_lo
1661; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v10
1662; GFX10-NEXT:    v_max_f32_e32 v10, v7, v15
1663; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
1664; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v3, v11
1665; GFX10-NEXT:    v_max_f32_e32 v8, v4, v12
1666; GFX10-NEXT:    v_cndmask_b32_e32 v3, 0x7fc00000, v9, vcc_lo
1667; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v4, v12
1668; GFX10-NEXT:    v_max_f32_e32 v9, v5, v13
1669; GFX10-NEXT:    v_cndmask_b32_e32 v4, 0x7fc00000, v8, vcc_lo
1670; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v5, v13
1671; GFX10-NEXT:    v_max_f32_e32 v8, v6, v14
1672; GFX10-NEXT:    v_cndmask_b32_e32 v5, 0x7fc00000, v9, vcc_lo
1673; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v6, v14
1674; GFX10-NEXT:    v_cndmask_b32_e32 v6, 0x7fc00000, v8, vcc_lo
1675; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v7, v15
1676; GFX10-NEXT:    v_cndmask_b32_e32 v7, 0x7fc00000, v10, vcc_lo
1677; GFX10-NEXT:    s_setpc_b64 s[30:31]
1678;
1679; GFX11-LABEL: v_maximum_v8f32:
1680; GFX11:       ; %bb.0:
1681; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1682; GFX11-NEXT:    v_dual_max_f32 v16, v0, v8 :: v_dual_max_f32 v17, v1, v9
1683; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v8
1684; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1685; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v16, vcc_lo
1686; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v9
1687; GFX11-NEXT:    v_dual_max_f32 v9, v3, v11 :: v_dual_max_f32 v8, v2, v10
1688; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v17, vcc_lo
1689; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v10
1690; GFX11-NEXT:    v_max_f32_e32 v10, v7, v15
1691; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_2)
1692; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
1693; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v3, v11
1694; GFX11-NEXT:    v_dual_max_f32 v8, v4, v12 :: v_dual_cndmask_b32 v3, 0x7fc00000, v9
1695; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v4, v12
1696; GFX11-NEXT:    v_dual_max_f32 v9, v5, v13 :: v_dual_cndmask_b32 v4, 0x7fc00000, v8
1697; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v5, v13
1698; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1699; GFX11-NEXT:    v_dual_max_f32 v8, v6, v14 :: v_dual_cndmask_b32 v5, 0x7fc00000, v9
1700; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v6, v14
1701; GFX11-NEXT:    v_cndmask_b32_e32 v6, 0x7fc00000, v8, vcc_lo
1702; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v7, v15
1703; GFX11-NEXT:    v_cndmask_b32_e32 v7, 0x7fc00000, v10, vcc_lo
1704; GFX11-NEXT:    s_setpc_b64 s[30:31]
1705;
1706; GFX12-LABEL: v_maximum_v8f32:
1707; GFX12:       ; %bb.0:
1708; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1709; GFX12-NEXT:    s_wait_expcnt 0x0
1710; GFX12-NEXT:    s_wait_samplecnt 0x0
1711; GFX12-NEXT:    s_wait_bvhcnt 0x0
1712; GFX12-NEXT:    s_wait_kmcnt 0x0
1713; GFX12-NEXT:    v_maximum_f32 v0, v0, v8
1714; GFX12-NEXT:    v_maximum_f32 v1, v1, v9
1715; GFX12-NEXT:    v_maximum_f32 v2, v2, v10
1716; GFX12-NEXT:    v_maximum_f32 v3, v3, v11
1717; GFX12-NEXT:    v_maximum_f32 v4, v4, v12
1718; GFX12-NEXT:    v_maximum_f32 v5, v5, v13
1719; GFX12-NEXT:    v_maximum_f32 v6, v6, v14
1720; GFX12-NEXT:    v_maximum_f32 v7, v7, v15
1721; GFX12-NEXT:    s_setpc_b64 s[30:31]
1722  %op = call <8 x float> @llvm.maximum.v8f32(<8 x float> %src0, <8 x float> %src1)
1723  ret <8 x float> %op
1724}
1725
1726define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) {
1727; GFX7-LABEL: v_maximum_v16f32:
1728; GFX7:       ; %bb.0:
1729; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1730; GFX7-NEXT:    s_xor_saveexec_b64 s[4:5], -1
1731; GFX7-NEXT:    buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
1732; GFX7-NEXT:    s_mov_b64 exec, s[4:5]
1733; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v17
1734; GFX7-NEXT:    v_max_f32_e32 v1, v1, v17
1735; GFX7-NEXT:    buffer_load_dword v17, off, s[0:3], s32
1736; GFX7-NEXT:    v_writelane_b32 v31, s30, 0
1737; GFX7-NEXT:    v_writelane_b32 v31, s31, 1
1738; GFX7-NEXT:    v_cmp_o_f32_e64 s[4:5], v2, v18
1739; GFX7-NEXT:    v_max_f32_e32 v2, v2, v18
1740; GFX7-NEXT:    v_cmp_o_f32_e64 s[6:7], v3, v19
1741; GFX7-NEXT:    v_max_f32_e32 v3, v3, v19
1742; GFX7-NEXT:    v_mov_b32_e32 v18, 0x7fc00000
1743; GFX7-NEXT:    v_max_f32_e32 v19, v0, v16
1744; GFX7-NEXT:    v_cmp_o_f32_e64 s[28:29], v0, v16
1745; GFX7-NEXT:    v_max_f32_e32 v16, v14, v30
1746; GFX7-NEXT:    v_cmp_o_f32_e64 s[30:31], v14, v30
1747; GFX7-NEXT:    v_cmp_o_f32_e64 s[8:9], v4, v20
1748; GFX7-NEXT:    v_max_f32_e32 v4, v4, v20
1749; GFX7-NEXT:    v_cmp_o_f32_e64 s[10:11], v5, v21
1750; GFX7-NEXT:    v_max_f32_e32 v5, v5, v21
1751; GFX7-NEXT:    v_cmp_o_f32_e64 s[12:13], v6, v22
1752; GFX7-NEXT:    v_max_f32_e32 v6, v6, v22
1753; GFX7-NEXT:    v_cmp_o_f32_e64 s[14:15], v7, v23
1754; GFX7-NEXT:    v_max_f32_e32 v7, v7, v23
1755; GFX7-NEXT:    v_cmp_o_f32_e64 s[16:17], v8, v24
1756; GFX7-NEXT:    v_max_f32_e32 v8, v8, v24
1757; GFX7-NEXT:    v_cmp_o_f32_e64 s[18:19], v9, v25
1758; GFX7-NEXT:    v_max_f32_e32 v9, v9, v25
1759; GFX7-NEXT:    v_cmp_o_f32_e64 s[20:21], v10, v26
1760; GFX7-NEXT:    v_max_f32_e32 v10, v10, v26
1761; GFX7-NEXT:    v_cmp_o_f32_e64 s[22:23], v11, v27
1762; GFX7-NEXT:    v_max_f32_e32 v11, v11, v27
1763; GFX7-NEXT:    v_cmp_o_f32_e64 s[24:25], v12, v28
1764; GFX7-NEXT:    v_max_f32_e32 v12, v12, v28
1765; GFX7-NEXT:    v_cmp_o_f32_e64 s[26:27], v13, v29
1766; GFX7-NEXT:    v_max_f32_e32 v13, v13, v29
1767; GFX7-NEXT:    v_cndmask_b32_e32 v1, v18, v1, vcc
1768; GFX7-NEXT:    v_cndmask_b32_e64 v14, v18, v16, s[30:31]
1769; GFX7-NEXT:    v_cndmask_b32_e64 v0, v18, v19, s[28:29]
1770; GFX7-NEXT:    v_cndmask_b32_e64 v2, v18, v2, s[4:5]
1771; GFX7-NEXT:    v_cndmask_b32_e64 v3, v18, v3, s[6:7]
1772; GFX7-NEXT:    v_cndmask_b32_e64 v4, v18, v4, s[8:9]
1773; GFX7-NEXT:    v_cndmask_b32_e64 v5, v18, v5, s[10:11]
1774; GFX7-NEXT:    v_cndmask_b32_e64 v6, v18, v6, s[12:13]
1775; GFX7-NEXT:    v_cndmask_b32_e64 v7, v18, v7, s[14:15]
1776; GFX7-NEXT:    v_cndmask_b32_e64 v8, v18, v8, s[16:17]
1777; GFX7-NEXT:    v_cndmask_b32_e64 v9, v18, v9, s[18:19]
1778; GFX7-NEXT:    v_cndmask_b32_e64 v10, v18, v10, s[20:21]
1779; GFX7-NEXT:    v_cndmask_b32_e64 v11, v18, v11, s[22:23]
1780; GFX7-NEXT:    v_cndmask_b32_e64 v12, v18, v12, s[24:25]
1781; GFX7-NEXT:    v_cndmask_b32_e64 v13, v18, v13, s[26:27]
1782; GFX7-NEXT:    v_readlane_b32 s31, v31, 1
1783; GFX7-NEXT:    v_readlane_b32 s30, v31, 0
1784; GFX7-NEXT:    s_waitcnt vmcnt(0)
1785; GFX7-NEXT:    v_max_f32_e32 v16, v15, v17
1786; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v15, v17
1787; GFX7-NEXT:    v_cndmask_b32_e32 v15, v18, v16, vcc
1788; GFX7-NEXT:    s_xor_saveexec_b64 s[4:5], -1
1789; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
1790; GFX7-NEXT:    s_mov_b64 exec, s[4:5]
1791; GFX7-NEXT:    s_waitcnt vmcnt(0)
1792; GFX7-NEXT:    s_setpc_b64 s[30:31]
1793;
1794; GFX8-LABEL: v_maximum_v16f32:
1795; GFX8:       ; %bb.0:
1796; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1797; GFX8-NEXT:    s_xor_saveexec_b64 s[4:5], -1
1798; GFX8-NEXT:    buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
1799; GFX8-NEXT:    s_mov_b64 exec, s[4:5]
1800; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v1, v17
1801; GFX8-NEXT:    v_max_f32_e32 v1, v1, v17
1802; GFX8-NEXT:    buffer_load_dword v17, off, s[0:3], s32
1803; GFX8-NEXT:    v_writelane_b32 v31, s30, 0
1804; GFX8-NEXT:    v_writelane_b32 v31, s31, 1
1805; GFX8-NEXT:    v_cmp_o_f32_e64 s[4:5], v2, v18
1806; GFX8-NEXT:    v_max_f32_e32 v2, v2, v18
1807; GFX8-NEXT:    v_cmp_o_f32_e64 s[6:7], v3, v19
1808; GFX8-NEXT:    v_max_f32_e32 v3, v3, v19
1809; GFX8-NEXT:    v_mov_b32_e32 v18, 0x7fc00000
1810; GFX8-NEXT:    v_max_f32_e32 v19, v0, v16
1811; GFX8-NEXT:    v_cmp_o_f32_e64 s[28:29], v0, v16
1812; GFX8-NEXT:    v_max_f32_e32 v16, v14, v30
1813; GFX8-NEXT:    v_cmp_o_f32_e64 s[30:31], v14, v30
1814; GFX8-NEXT:    v_cmp_o_f32_e64 s[8:9], v4, v20
1815; GFX8-NEXT:    v_max_f32_e32 v4, v4, v20
1816; GFX8-NEXT:    v_cmp_o_f32_e64 s[10:11], v5, v21
1817; GFX8-NEXT:    v_max_f32_e32 v5, v5, v21
1818; GFX8-NEXT:    v_cmp_o_f32_e64 s[12:13], v6, v22
1819; GFX8-NEXT:    v_max_f32_e32 v6, v6, v22
1820; GFX8-NEXT:    v_cmp_o_f32_e64 s[14:15], v7, v23
1821; GFX8-NEXT:    v_max_f32_e32 v7, v7, v23
1822; GFX8-NEXT:    v_cmp_o_f32_e64 s[16:17], v8, v24
1823; GFX8-NEXT:    v_max_f32_e32 v8, v8, v24
1824; GFX8-NEXT:    v_cmp_o_f32_e64 s[18:19], v9, v25
1825; GFX8-NEXT:    v_max_f32_e32 v9, v9, v25
1826; GFX8-NEXT:    v_cmp_o_f32_e64 s[20:21], v10, v26
1827; GFX8-NEXT:    v_max_f32_e32 v10, v10, v26
1828; GFX8-NEXT:    v_cmp_o_f32_e64 s[22:23], v11, v27
1829; GFX8-NEXT:    v_max_f32_e32 v11, v11, v27
1830; GFX8-NEXT:    v_cmp_o_f32_e64 s[24:25], v12, v28
1831; GFX8-NEXT:    v_max_f32_e32 v12, v12, v28
1832; GFX8-NEXT:    v_cmp_o_f32_e64 s[26:27], v13, v29
1833; GFX8-NEXT:    v_max_f32_e32 v13, v13, v29
1834; GFX8-NEXT:    v_cndmask_b32_e32 v1, v18, v1, vcc
1835; GFX8-NEXT:    v_cndmask_b32_e64 v14, v18, v16, s[30:31]
1836; GFX8-NEXT:    v_cndmask_b32_e64 v0, v18, v19, s[28:29]
1837; GFX8-NEXT:    v_cndmask_b32_e64 v2, v18, v2, s[4:5]
1838; GFX8-NEXT:    v_cndmask_b32_e64 v3, v18, v3, s[6:7]
1839; GFX8-NEXT:    v_cndmask_b32_e64 v4, v18, v4, s[8:9]
1840; GFX8-NEXT:    v_cndmask_b32_e64 v5, v18, v5, s[10:11]
1841; GFX8-NEXT:    v_cndmask_b32_e64 v6, v18, v6, s[12:13]
1842; GFX8-NEXT:    v_cndmask_b32_e64 v7, v18, v7, s[14:15]
1843; GFX8-NEXT:    v_cndmask_b32_e64 v8, v18, v8, s[16:17]
1844; GFX8-NEXT:    v_cndmask_b32_e64 v9, v18, v9, s[18:19]
1845; GFX8-NEXT:    v_cndmask_b32_e64 v10, v18, v10, s[20:21]
1846; GFX8-NEXT:    v_cndmask_b32_e64 v11, v18, v11, s[22:23]
1847; GFX8-NEXT:    v_cndmask_b32_e64 v12, v18, v12, s[24:25]
1848; GFX8-NEXT:    v_cndmask_b32_e64 v13, v18, v13, s[26:27]
1849; GFX8-NEXT:    v_readlane_b32 s31, v31, 1
1850; GFX8-NEXT:    v_readlane_b32 s30, v31, 0
1851; GFX8-NEXT:    s_waitcnt vmcnt(0)
1852; GFX8-NEXT:    v_max_f32_e32 v16, v15, v17
1853; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v15, v17
1854; GFX8-NEXT:    v_cndmask_b32_e32 v15, v18, v16, vcc
1855; GFX8-NEXT:    s_xor_saveexec_b64 s[4:5], -1
1856; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
1857; GFX8-NEXT:    s_mov_b64 exec, s[4:5]
1858; GFX8-NEXT:    s_waitcnt vmcnt(0)
1859; GFX8-NEXT:    s_setpc_b64 s[30:31]
1860;
1861; GFX900-LABEL: v_maximum_v16f32:
1862; GFX900:       ; %bb.0:
1863; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1864; GFX900-NEXT:    s_xor_saveexec_b64 s[4:5], -1
1865; GFX900-NEXT:    buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
1866; GFX900-NEXT:    s_mov_b64 exec, s[4:5]
1867; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v1, v17
1868; GFX900-NEXT:    v_max_f32_e32 v1, v1, v17
1869; GFX900-NEXT:    buffer_load_dword v17, off, s[0:3], s32
1870; GFX900-NEXT:    v_writelane_b32 v31, s30, 0
1871; GFX900-NEXT:    v_writelane_b32 v31, s31, 1
1872; GFX900-NEXT:    v_cmp_o_f32_e64 s[4:5], v2, v18
1873; GFX900-NEXT:    v_max_f32_e32 v2, v2, v18
1874; GFX900-NEXT:    v_cmp_o_f32_e64 s[6:7], v3, v19
1875; GFX900-NEXT:    v_max_f32_e32 v3, v3, v19
1876; GFX900-NEXT:    v_mov_b32_e32 v18, 0x7fc00000
1877; GFX900-NEXT:    v_max_f32_e32 v19, v0, v16
1878; GFX900-NEXT:    v_cmp_o_f32_e64 s[28:29], v0, v16
1879; GFX900-NEXT:    v_max_f32_e32 v16, v14, v30
1880; GFX900-NEXT:    v_cmp_o_f32_e64 s[30:31], v14, v30
1881; GFX900-NEXT:    v_cmp_o_f32_e64 s[8:9], v4, v20
1882; GFX900-NEXT:    v_max_f32_e32 v4, v4, v20
1883; GFX900-NEXT:    v_cmp_o_f32_e64 s[10:11], v5, v21
1884; GFX900-NEXT:    v_max_f32_e32 v5, v5, v21
1885; GFX900-NEXT:    v_cmp_o_f32_e64 s[12:13], v6, v22
1886; GFX900-NEXT:    v_max_f32_e32 v6, v6, v22
1887; GFX900-NEXT:    v_cmp_o_f32_e64 s[14:15], v7, v23
1888; GFX900-NEXT:    v_max_f32_e32 v7, v7, v23
1889; GFX900-NEXT:    v_cmp_o_f32_e64 s[16:17], v8, v24
1890; GFX900-NEXT:    v_max_f32_e32 v8, v8, v24
1891; GFX900-NEXT:    v_cmp_o_f32_e64 s[18:19], v9, v25
1892; GFX900-NEXT:    v_max_f32_e32 v9, v9, v25
1893; GFX900-NEXT:    v_cmp_o_f32_e64 s[20:21], v10, v26
1894; GFX900-NEXT:    v_max_f32_e32 v10, v10, v26
1895; GFX900-NEXT:    v_cmp_o_f32_e64 s[22:23], v11, v27
1896; GFX900-NEXT:    v_max_f32_e32 v11, v11, v27
1897; GFX900-NEXT:    v_cmp_o_f32_e64 s[24:25], v12, v28
1898; GFX900-NEXT:    v_max_f32_e32 v12, v12, v28
1899; GFX900-NEXT:    v_cmp_o_f32_e64 s[26:27], v13, v29
1900; GFX900-NEXT:    v_max_f32_e32 v13, v13, v29
1901; GFX900-NEXT:    v_cndmask_b32_e32 v1, v18, v1, vcc
1902; GFX900-NEXT:    v_cndmask_b32_e64 v14, v18, v16, s[30:31]
1903; GFX900-NEXT:    v_cndmask_b32_e64 v0, v18, v19, s[28:29]
1904; GFX900-NEXT:    v_cndmask_b32_e64 v2, v18, v2, s[4:5]
1905; GFX900-NEXT:    v_cndmask_b32_e64 v3, v18, v3, s[6:7]
1906; GFX900-NEXT:    v_cndmask_b32_e64 v4, v18, v4, s[8:9]
1907; GFX900-NEXT:    v_cndmask_b32_e64 v5, v18, v5, s[10:11]
1908; GFX900-NEXT:    v_cndmask_b32_e64 v6, v18, v6, s[12:13]
1909; GFX900-NEXT:    v_cndmask_b32_e64 v7, v18, v7, s[14:15]
1910; GFX900-NEXT:    v_cndmask_b32_e64 v8, v18, v8, s[16:17]
1911; GFX900-NEXT:    v_cndmask_b32_e64 v9, v18, v9, s[18:19]
1912; GFX900-NEXT:    v_cndmask_b32_e64 v10, v18, v10, s[20:21]
1913; GFX900-NEXT:    v_cndmask_b32_e64 v11, v18, v11, s[22:23]
1914; GFX900-NEXT:    v_cndmask_b32_e64 v12, v18, v12, s[24:25]
1915; GFX900-NEXT:    v_cndmask_b32_e64 v13, v18, v13, s[26:27]
1916; GFX900-NEXT:    v_readlane_b32 s31, v31, 1
1917; GFX900-NEXT:    v_readlane_b32 s30, v31, 0
1918; GFX900-NEXT:    s_waitcnt vmcnt(0)
1919; GFX900-NEXT:    v_max_f32_e32 v16, v15, v17
1920; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, v15, v17
1921; GFX900-NEXT:    v_cndmask_b32_e32 v15, v18, v16, vcc
1922; GFX900-NEXT:    s_xor_saveexec_b64 s[4:5], -1
1923; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
1924; GFX900-NEXT:    s_mov_b64 exec, s[4:5]
1925; GFX900-NEXT:    s_waitcnt vmcnt(0)
1926; GFX900-NEXT:    s_setpc_b64 s[30:31]
1927;
1928; GFX950-LABEL: v_maximum_v16f32:
1929; GFX950:       ; %bb.0:
1930; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1931; GFX950-NEXT:    scratch_load_dword v31, off, s32
1932; GFX950-NEXT:    v_maximum3_f32 v0, v0, v16, v16
1933; GFX950-NEXT:    v_maximum3_f32 v1, v1, v17, v17
1934; GFX950-NEXT:    v_maximum3_f32 v2, v2, v18, v18
1935; GFX950-NEXT:    v_maximum3_f32 v3, v3, v19, v19
1936; GFX950-NEXT:    v_maximum3_f32 v4, v4, v20, v20
1937; GFX950-NEXT:    v_maximum3_f32 v5, v5, v21, v21
1938; GFX950-NEXT:    v_maximum3_f32 v6, v6, v22, v22
1939; GFX950-NEXT:    v_maximum3_f32 v7, v7, v23, v23
1940; GFX950-NEXT:    v_maximum3_f32 v8, v8, v24, v24
1941; GFX950-NEXT:    v_maximum3_f32 v9, v9, v25, v25
1942; GFX950-NEXT:    v_maximum3_f32 v10, v10, v26, v26
1943; GFX950-NEXT:    v_maximum3_f32 v11, v11, v27, v27
1944; GFX950-NEXT:    v_maximum3_f32 v12, v12, v28, v28
1945; GFX950-NEXT:    v_maximum3_f32 v13, v13, v29, v29
1946; GFX950-NEXT:    v_maximum3_f32 v14, v14, v30, v30
1947; GFX950-NEXT:    s_waitcnt vmcnt(0)
1948; GFX950-NEXT:    v_maximum3_f32 v15, v15, v31, v31
1949; GFX950-NEXT:    s_setpc_b64 s[30:31]
1950;
1951; GFX10-LABEL: v_maximum_v16f32:
1952; GFX10:       ; %bb.0:
1953; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1954; GFX10-NEXT:    buffer_load_dword v31, off, s[0:3], s32
1955; GFX10-NEXT:    v_max_f32_e32 v32, v0, v16
1956; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v16
1957; GFX10-NEXT:    v_max_f32_e32 v33, v1, v17
1958; GFX10-NEXT:    v_max_f32_e32 v34, v2, v18
1959; GFX10-NEXT:    v_max_f32_e32 v35, v3, v19
1960; GFX10-NEXT:    v_max_f32_e32 v36, v4, v20
1961; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v32, vcc_lo
1962; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v17
1963; GFX10-NEXT:    v_max_f32_e32 v37, v5, v21
1964; GFX10-NEXT:    v_max_f32_e32 v38, v6, v22
1965; GFX10-NEXT:    v_max_f32_e32 v39, v7, v23
1966; GFX10-NEXT:    v_max_f32_e32 v48, v8, v24
1967; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v33, vcc_lo
1968; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v18
1969; GFX10-NEXT:    v_max_f32_e32 v49, v9, v25
1970; GFX10-NEXT:    v_max_f32_e32 v50, v10, v26
1971; GFX10-NEXT:    v_max_f32_e32 v51, v11, v27
1972; GFX10-NEXT:    v_max_f32_e32 v52, v12, v28
1973; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v34, vcc_lo
1974; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v3, v19
1975; GFX10-NEXT:    v_max_f32_e32 v53, v13, v29
1976; GFX10-NEXT:    v_max_f32_e32 v54, v14, v30
1977; GFX10-NEXT:    v_cndmask_b32_e32 v3, 0x7fc00000, v35, vcc_lo
1978; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v4, v20
1979; GFX10-NEXT:    v_cndmask_b32_e32 v4, 0x7fc00000, v36, vcc_lo
1980; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v5, v21
1981; GFX10-NEXT:    v_cndmask_b32_e32 v5, 0x7fc00000, v37, vcc_lo
1982; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v6, v22
1983; GFX10-NEXT:    v_cndmask_b32_e32 v6, 0x7fc00000, v38, vcc_lo
1984; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v7, v23
1985; GFX10-NEXT:    v_cndmask_b32_e32 v7, 0x7fc00000, v39, vcc_lo
1986; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v8, v24
1987; GFX10-NEXT:    v_cndmask_b32_e32 v8, 0x7fc00000, v48, vcc_lo
1988; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v9, v25
1989; GFX10-NEXT:    v_cndmask_b32_e32 v9, 0x7fc00000, v49, vcc_lo
1990; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v10, v26
1991; GFX10-NEXT:    v_cndmask_b32_e32 v10, 0x7fc00000, v50, vcc_lo
1992; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v11, v27
1993; GFX10-NEXT:    v_cndmask_b32_e32 v11, 0x7fc00000, v51, vcc_lo
1994; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v12, v28
1995; GFX10-NEXT:    v_cndmask_b32_e32 v12, 0x7fc00000, v52, vcc_lo
1996; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v13, v29
1997; GFX10-NEXT:    v_cndmask_b32_e32 v13, 0x7fc00000, v53, vcc_lo
1998; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v14, v30
1999; GFX10-NEXT:    v_cndmask_b32_e32 v14, 0x7fc00000, v54, vcc_lo
2000; GFX10-NEXT:    s_waitcnt vmcnt(0)
2001; GFX10-NEXT:    v_max_f32_e32 v16, v15, v31
2002; GFX10-NEXT:    v_cmp_o_f32_e32 vcc_lo, v15, v31
2003; GFX10-NEXT:    v_cndmask_b32_e32 v15, 0x7fc00000, v16, vcc_lo
2004; GFX10-NEXT:    s_setpc_b64 s[30:31]
2005;
2006; GFX11-LABEL: v_maximum_v16f32:
2007; GFX11:       ; %bb.0:
2008; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2009; GFX11-NEXT:    scratch_load_b32 v31, off, s32
2010; GFX11-NEXT:    v_dual_max_f32 v32, v0, v16 :: v_dual_max_f32 v33, v1, v17
2011; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v16
2012; GFX11-NEXT:    v_dual_max_f32 v34, v2, v18 :: v_dual_max_f32 v35, v3, v19
2013; GFX11-NEXT:    v_dual_max_f32 v36, v4, v20 :: v_dual_max_f32 v37, v5, v21
2014; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
2015; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v32, vcc_lo
2016; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v1, v17
2017; GFX11-NEXT:    v_max_f32_e32 v54, v14, v30
2018; GFX11-NEXT:    v_dual_max_f32 v38, v6, v22 :: v_dual_max_f32 v39, v7, v23
2019; GFX11-NEXT:    v_dual_max_f32 v48, v8, v24 :: v_dual_max_f32 v49, v9, v25
2020; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v33, vcc_lo
2021; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v2, v18
2022; GFX11-NEXT:    v_dual_max_f32 v50, v10, v26 :: v_dual_max_f32 v51, v11, v27
2023; GFX11-NEXT:    v_dual_max_f32 v52, v12, v28 :: v_dual_max_f32 v53, v13, v29
2024; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7fc00000, v34, vcc_lo
2025; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v3, v19
2026; GFX11-NEXT:    v_cndmask_b32_e32 v3, 0x7fc00000, v35, vcc_lo
2027; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v4, v20
2028; GFX11-NEXT:    v_cndmask_b32_e32 v4, 0x7fc00000, v36, vcc_lo
2029; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v5, v21
2030; GFX11-NEXT:    v_cndmask_b32_e32 v5, 0x7fc00000, v37, vcc_lo
2031; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v6, v22
2032; GFX11-NEXT:    v_cndmask_b32_e32 v6, 0x7fc00000, v38, vcc_lo
2033; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v7, v23
2034; GFX11-NEXT:    v_cndmask_b32_e32 v7, 0x7fc00000, v39, vcc_lo
2035; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v8, v24
2036; GFX11-NEXT:    v_cndmask_b32_e32 v8, 0x7fc00000, v48, vcc_lo
2037; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v9, v25
2038; GFX11-NEXT:    v_cndmask_b32_e32 v9, 0x7fc00000, v49, vcc_lo
2039; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v10, v26
2040; GFX11-NEXT:    v_cndmask_b32_e32 v10, 0x7fc00000, v50, vcc_lo
2041; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v11, v27
2042; GFX11-NEXT:    v_cndmask_b32_e32 v11, 0x7fc00000, v51, vcc_lo
2043; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v12, v28
2044; GFX11-NEXT:    v_cndmask_b32_e32 v12, 0x7fc00000, v52, vcc_lo
2045; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v13, v29
2046; GFX11-NEXT:    v_cndmask_b32_e32 v13, 0x7fc00000, v53, vcc_lo
2047; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v14, v30
2048; GFX11-NEXT:    v_cndmask_b32_e32 v14, 0x7fc00000, v54, vcc_lo
2049; GFX11-NEXT:    s_waitcnt vmcnt(0)
2050; GFX11-NEXT:    v_max_f32_e32 v16, v15, v31
2051; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v15, v31
2052; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
2053; GFX11-NEXT:    v_cndmask_b32_e32 v15, 0x7fc00000, v16, vcc_lo
2054; GFX11-NEXT:    s_setpc_b64 s[30:31]
2055;
2056; GFX12-LABEL: v_maximum_v16f32:
2057; GFX12:       ; %bb.0:
2058; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
2059; GFX12-NEXT:    s_wait_expcnt 0x0
2060; GFX12-NEXT:    s_wait_samplecnt 0x0
2061; GFX12-NEXT:    s_wait_bvhcnt 0x0
2062; GFX12-NEXT:    s_wait_kmcnt 0x0
2063; GFX12-NEXT:    scratch_load_b32 v31, off, s32
2064; GFX12-NEXT:    v_maximum_f32 v0, v0, v16
2065; GFX12-NEXT:    v_maximum_f32 v1, v1, v17
2066; GFX12-NEXT:    v_maximum_f32 v2, v2, v18
2067; GFX12-NEXT:    v_maximum_f32 v3, v3, v19
2068; GFX12-NEXT:    v_maximum_f32 v4, v4, v20
2069; GFX12-NEXT:    v_maximum_f32 v5, v5, v21
2070; GFX12-NEXT:    v_maximum_f32 v6, v6, v22
2071; GFX12-NEXT:    v_maximum_f32 v7, v7, v23
2072; GFX12-NEXT:    v_maximum_f32 v8, v8, v24
2073; GFX12-NEXT:    v_maximum_f32 v9, v9, v25
2074; GFX12-NEXT:    v_maximum_f32 v10, v10, v26
2075; GFX12-NEXT:    v_maximum_f32 v11, v11, v27
2076; GFX12-NEXT:    v_maximum_f32 v12, v12, v28
2077; GFX12-NEXT:    v_maximum_f32 v13, v13, v29
2078; GFX12-NEXT:    v_maximum_f32 v14, v14, v30
2079; GFX12-NEXT:    s_wait_loadcnt 0x0
2080; GFX12-NEXT:    v_maximum_f32 v15, v15, v31
2081; GFX12-NEXT:    s_setpc_b64 s[30:31]
2082  %op = call <16 x float> @llvm.maximum.v16f32(<16 x float> %src0, <16 x float> %src1)
2083  ret <16 x float> %op
2084}
2085;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
2086; GCN: {{.*}}
2087; GFX9: {{.*}}
2088