xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll (revision 6206f5444fc0732e6495703c75a67f1f90f5b418)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; xUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
5; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s
6; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s
7; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
8; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
9; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
10
11define half @v_minimum_f16(half %src0, half %src1) {
12; GFX8-LABEL: v_minimum_f16:
13; GFX8:       ; %bb.0:
14; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15; GFX8-NEXT:    v_min_f16_e32 v2, v0, v1
16; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7e00
17; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
18; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
19; GFX8-NEXT:    s_setpc_b64 s[30:31]
20;
21; GFX900-LABEL: v_minimum_f16:
22; GFX900:       ; %bb.0:
23; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
24; GFX900-NEXT:    v_min_f16_e32 v2, v0, v1
25; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7e00
26; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
27; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
28; GFX900-NEXT:    s_setpc_b64 s[30:31]
29;
30; GFX950-LABEL: v_minimum_f16:
31; GFX950:       ; %bb.0:
32; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33; GFX950-NEXT:    v_min_f16_e32 v2, v0, v1
34; GFX950-NEXT:    v_mov_b32_e32 v3, 0x7e00
35; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
36; GFX950-NEXT:    s_nop 1
37; GFX950-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
38; GFX950-NEXT:    s_setpc_b64 s[30:31]
39;
40; GFX10-LABEL: v_minimum_f16:
41; GFX10:       ; %bb.0:
42; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
43; GFX10-NEXT:    v_min_f16_e32 v2, v0, v1
44; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
45; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
46; GFX10-NEXT:    s_setpc_b64 s[30:31]
47;
48; GFX11-LABEL: v_minimum_f16:
49; GFX11:       ; %bb.0:
50; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51; GFX11-NEXT:    v_min_f16_e32 v2, v0, v1
52; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
53; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
54; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
55; GFX11-NEXT:    s_setpc_b64 s[30:31]
56;
57; GFX12-LABEL: v_minimum_f16:
58; GFX12:       ; %bb.0:
59; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
60; GFX12-NEXT:    s_wait_expcnt 0x0
61; GFX12-NEXT:    s_wait_samplecnt 0x0
62; GFX12-NEXT:    s_wait_bvhcnt 0x0
63; GFX12-NEXT:    s_wait_kmcnt 0x0
64; GFX12-NEXT:    v_minimum_f16 v0, v0, v1
65; GFX12-NEXT:    s_setpc_b64 s[30:31]
66  %op = call half @llvm.minimum.f16(half %src0, half %src1)
67  ret half %op
68}
69
70define half @v_minimum_f16__nnan(half %src0, half %src1) {
71; GFX8-LABEL: v_minimum_f16__nnan:
72; GFX8:       ; %bb.0:
73; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
74; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
75; GFX8-NEXT:    s_setpc_b64 s[30:31]
76;
77; GFX9-LABEL: v_minimum_f16__nnan:
78; GFX9:       ; %bb.0:
79; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
80; GFX9-NEXT:    v_min_f16_e32 v0, v0, v1
81; GFX9-NEXT:    s_setpc_b64 s[30:31]
82;
83; GFX10-LABEL: v_minimum_f16__nnan:
84; GFX10:       ; %bb.0:
85; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86; GFX10-NEXT:    v_min_f16_e32 v0, v0, v1
87; GFX10-NEXT:    s_setpc_b64 s[30:31]
88;
89; GFX11-LABEL: v_minimum_f16__nnan:
90; GFX11:       ; %bb.0:
91; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
92; GFX11-NEXT:    v_min_f16_e32 v0, v0, v1
93; GFX11-NEXT:    s_setpc_b64 s[30:31]
94;
95; GFX12-LABEL: v_minimum_f16__nnan:
96; GFX12:       ; %bb.0:
97; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
98; GFX12-NEXT:    s_wait_expcnt 0x0
99; GFX12-NEXT:    s_wait_samplecnt 0x0
100; GFX12-NEXT:    s_wait_bvhcnt 0x0
101; GFX12-NEXT:    s_wait_kmcnt 0x0
102; GFX12-NEXT:    v_minimum_f16 v0, v0, v1
103; GFX12-NEXT:    s_setpc_b64 s[30:31]
104  %op = call nnan half @llvm.minimum.f16(half %src0, half %src1)
105  ret half %op
106}
107
108define half @v_minimum_f16__nsz(half %src0, half %src1) {
109; GFX8-LABEL: v_minimum_f16__nsz:
110; GFX8:       ; %bb.0:
111; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
112; GFX8-NEXT:    v_min_f16_e32 v2, v0, v1
113; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7e00
114; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
115; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
116; GFX8-NEXT:    s_setpc_b64 s[30:31]
117;
118; GFX900-LABEL: v_minimum_f16__nsz:
119; GFX900:       ; %bb.0:
120; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
121; GFX900-NEXT:    v_min_f16_e32 v2, v0, v1
122; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7e00
123; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
124; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
125; GFX900-NEXT:    s_setpc_b64 s[30:31]
126;
127; GFX950-LABEL: v_minimum_f16__nsz:
128; GFX950:       ; %bb.0:
129; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
130; GFX950-NEXT:    v_min_f16_e32 v2, v0, v1
131; GFX950-NEXT:    v_mov_b32_e32 v3, 0x7e00
132; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
133; GFX950-NEXT:    s_nop 1
134; GFX950-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
135; GFX950-NEXT:    s_setpc_b64 s[30:31]
136;
137; GFX10-LABEL: v_minimum_f16__nsz:
138; GFX10:       ; %bb.0:
139; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
140; GFX10-NEXT:    v_min_f16_e32 v2, v0, v1
141; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
142; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
143; GFX10-NEXT:    s_setpc_b64 s[30:31]
144;
145; GFX11-LABEL: v_minimum_f16__nsz:
146; GFX11:       ; %bb.0:
147; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
148; GFX11-NEXT:    v_min_f16_e32 v2, v0, v1
149; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
150; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
151; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
152; GFX11-NEXT:    s_setpc_b64 s[30:31]
153;
154; GFX12-LABEL: v_minimum_f16__nsz:
155; GFX12:       ; %bb.0:
156; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
157; GFX12-NEXT:    s_wait_expcnt 0x0
158; GFX12-NEXT:    s_wait_samplecnt 0x0
159; GFX12-NEXT:    s_wait_bvhcnt 0x0
160; GFX12-NEXT:    s_wait_kmcnt 0x0
161; GFX12-NEXT:    v_minimum_f16 v0, v0, v1
162; GFX12-NEXT:    s_setpc_b64 s[30:31]
163  %op = call nsz half @llvm.minimum.f16(half %src0, half %src1)
164  ret half %op
165}
166
167define half @v_minimum_f16__nnan_nsz(half %src0, half %src1) {
168; GFX8-LABEL: v_minimum_f16__nnan_nsz:
169; GFX8:       ; %bb.0:
170; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
171; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
172; GFX8-NEXT:    s_setpc_b64 s[30:31]
173;
174; GFX9-LABEL: v_minimum_f16__nnan_nsz:
175; GFX9:       ; %bb.0:
176; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
177; GFX9-NEXT:    v_min_f16_e32 v0, v0, v1
178; GFX9-NEXT:    s_setpc_b64 s[30:31]
179;
180; GFX10-LABEL: v_minimum_f16__nnan_nsz:
181; GFX10:       ; %bb.0:
182; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
183; GFX10-NEXT:    v_min_f16_e32 v0, v0, v1
184; GFX10-NEXT:    s_setpc_b64 s[30:31]
185;
186; GFX11-LABEL: v_minimum_f16__nnan_nsz:
187; GFX11:       ; %bb.0:
188; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
189; GFX11-NEXT:    v_min_f16_e32 v0, v0, v1
190; GFX11-NEXT:    s_setpc_b64 s[30:31]
191;
192; GFX12-LABEL: v_minimum_f16__nnan_nsz:
193; GFX12:       ; %bb.0:
194; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
195; GFX12-NEXT:    s_wait_expcnt 0x0
196; GFX12-NEXT:    s_wait_samplecnt 0x0
197; GFX12-NEXT:    s_wait_bvhcnt 0x0
198; GFX12-NEXT:    s_wait_kmcnt 0x0
199; GFX12-NEXT:    v_minimum_f16 v0, v0, v1
200; GFX12-NEXT:    s_setpc_b64 s[30:31]
201  %op = call nnan nsz half @llvm.minimum.f16(half %src0, half %src1)
202  ret half %op
203}
204
205define half @v_minimum_f16__nnan_src0(half %arg0, half %src1) {
206; GFX8-LABEL: v_minimum_f16__nnan_src0:
207; GFX8:       ; %bb.0:
208; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
209; GFX8-NEXT:    v_add_f16_e32 v0, 1.0, v0
210; GFX8-NEXT:    v_min_f16_e32 v2, v0, v1
211; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7e00
212; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
213; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
214; GFX8-NEXT:    s_setpc_b64 s[30:31]
215;
216; GFX900-LABEL: v_minimum_f16__nnan_src0:
217; GFX900:       ; %bb.0:
218; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
219; GFX900-NEXT:    v_add_f16_e32 v0, 1.0, v0
220; GFX900-NEXT:    v_min_f16_e32 v2, v0, v1
221; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7e00
222; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
223; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
224; GFX900-NEXT:    s_setpc_b64 s[30:31]
225;
226; GFX950-LABEL: v_minimum_f16__nnan_src0:
227; GFX950:       ; %bb.0:
228; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
229; GFX950-NEXT:    v_add_f16_e32 v0, 1.0, v0
230; GFX950-NEXT:    v_min_f16_e32 v2, v0, v1
231; GFX950-NEXT:    v_mov_b32_e32 v3, 0x7e00
232; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
233; GFX950-NEXT:    s_nop 1
234; GFX950-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
235; GFX950-NEXT:    s_setpc_b64 s[30:31]
236;
237; GFX10-LABEL: v_minimum_f16__nnan_src0:
238; GFX10:       ; %bb.0:
239; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
240; GFX10-NEXT:    v_add_f16_e32 v0, 1.0, v0
241; GFX10-NEXT:    v_min_f16_e32 v2, v0, v1
242; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
243; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
244; GFX10-NEXT:    s_setpc_b64 s[30:31]
245;
246; GFX11-LABEL: v_minimum_f16__nnan_src0:
247; GFX11:       ; %bb.0:
248; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
249; GFX11-NEXT:    v_add_f16_e32 v0, 1.0, v0
250; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
251; GFX11-NEXT:    v_min_f16_e32 v2, v0, v1
252; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
253; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
254; GFX11-NEXT:    s_setpc_b64 s[30:31]
255;
256; GFX12-LABEL: v_minimum_f16__nnan_src0:
257; GFX12:       ; %bb.0:
258; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
259; GFX12-NEXT:    s_wait_expcnt 0x0
260; GFX12-NEXT:    s_wait_samplecnt 0x0
261; GFX12-NEXT:    s_wait_bvhcnt 0x0
262; GFX12-NEXT:    s_wait_kmcnt 0x0
263; GFX12-NEXT:    v_add_f16_e32 v0, 1.0, v0
264; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
265; GFX12-NEXT:    v_minimum_f16 v0, v0, v1
266; GFX12-NEXT:    s_setpc_b64 s[30:31]
267  %src0 = fadd nnan half %arg0, 1.0
268  %op = call half @llvm.minimum.f16(half %src0, half %src1)
269  ret half %op
270}
271
272define half @v_minimum_f16__nnan_src1(half %src0, half %arg1) {
273; GFX8-LABEL: v_minimum_f16__nnan_src1:
274; GFX8:       ; %bb.0:
275; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
276; GFX8-NEXT:    v_add_f16_e32 v1, 1.0, v1
277; GFX8-NEXT:    v_min_f16_e32 v2, v0, v1
278; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7e00
279; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
280; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
281; GFX8-NEXT:    s_setpc_b64 s[30:31]
282;
283; GFX900-LABEL: v_minimum_f16__nnan_src1:
284; GFX900:       ; %bb.0:
285; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
286; GFX900-NEXT:    v_add_f16_e32 v1, 1.0, v1
287; GFX900-NEXT:    v_min_f16_e32 v2, v0, v1
288; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7e00
289; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
290; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
291; GFX900-NEXT:    s_setpc_b64 s[30:31]
292;
293; GFX950-LABEL: v_minimum_f16__nnan_src1:
294; GFX950:       ; %bb.0:
295; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
296; GFX950-NEXT:    v_add_f16_e32 v1, 1.0, v1
297; GFX950-NEXT:    v_min_f16_e32 v2, v0, v1
298; GFX950-NEXT:    v_mov_b32_e32 v3, 0x7e00
299; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
300; GFX950-NEXT:    s_nop 1
301; GFX950-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
302; GFX950-NEXT:    s_setpc_b64 s[30:31]
303;
304; GFX10-LABEL: v_minimum_f16__nnan_src1:
305; GFX10:       ; %bb.0:
306; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
307; GFX10-NEXT:    v_add_f16_e32 v1, 1.0, v1
308; GFX10-NEXT:    v_min_f16_e32 v2, v0, v1
309; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
310; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
311; GFX10-NEXT:    s_setpc_b64 s[30:31]
312;
313; GFX11-LABEL: v_minimum_f16__nnan_src1:
314; GFX11:       ; %bb.0:
315; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
316; GFX11-NEXT:    v_add_f16_e32 v1, 1.0, v1
317; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
318; GFX11-NEXT:    v_min_f16_e32 v2, v0, v1
319; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
320; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
321; GFX11-NEXT:    s_setpc_b64 s[30:31]
322;
323; GFX12-LABEL: v_minimum_f16__nnan_src1:
324; GFX12:       ; %bb.0:
325; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
326; GFX12-NEXT:    s_wait_expcnt 0x0
327; GFX12-NEXT:    s_wait_samplecnt 0x0
328; GFX12-NEXT:    s_wait_bvhcnt 0x0
329; GFX12-NEXT:    s_wait_kmcnt 0x0
330; GFX12-NEXT:    v_add_f16_e32 v1, 1.0, v1
331; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
332; GFX12-NEXT:    v_minimum_f16 v0, v0, v1
333; GFX12-NEXT:    s_setpc_b64 s[30:31]
334  %src1 = fadd nnan half %arg1, 1.0
335  %op = call half @llvm.minimum.f16(half %src0, half %src1)
336  ret half %op
337}
338
339define void @s_minimum_f16(half inreg %src0, half inreg %src1) {
340; GFX8-LABEL: s_minimum_f16:
341; GFX8:       ; %bb.0:
342; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
343; GFX8-NEXT:    v_mov_b32_e32 v0, s17
344; GFX8-NEXT:    v_min_f16_e32 v1, s16, v0
345; GFX8-NEXT:    v_mov_b32_e32 v2, 0x7e00
346; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, s16, v0
347; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
348; GFX8-NEXT:    v_and_b32_e32 v0, 0xffff, v0
349; GFX8-NEXT:    ;;#ASMSTART
350; GFX8-NEXT:    ; use v0
351; GFX8-NEXT:    ;;#ASMEND
352; GFX8-NEXT:    s_setpc_b64 s[30:31]
353;
354; GFX900-LABEL: s_minimum_f16:
355; GFX900:       ; %bb.0:
356; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
357; GFX900-NEXT:    v_mov_b32_e32 v0, s17
358; GFX900-NEXT:    v_min_f16_e32 v1, s16, v0
359; GFX900-NEXT:    v_mov_b32_e32 v2, 0x7e00
360; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, s16, v0
361; GFX900-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
362; GFX900-NEXT:    v_and_b32_e32 v0, 0xffff, v0
363; GFX900-NEXT:    ;;#ASMSTART
364; GFX900-NEXT:    ; use v0
365; GFX900-NEXT:    ;;#ASMEND
366; GFX900-NEXT:    s_setpc_b64 s[30:31]
367;
368; GFX950-LABEL: s_minimum_f16:
369; GFX950:       ; %bb.0:
370; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
371; GFX950-NEXT:    v_mov_b32_e32 v0, s1
372; GFX950-NEXT:    v_min_f16_e32 v1, s0, v0
373; GFX950-NEXT:    v_mov_b32_e32 v2, 0x7e00
374; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, s0, v0
375; GFX950-NEXT:    s_nop 1
376; GFX950-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
377; GFX950-NEXT:    v_and_b32_e32 v0, 0xffff, v0
378; GFX950-NEXT:    ;;#ASMSTART
379; GFX950-NEXT:    ; use v0
380; GFX950-NEXT:    ;;#ASMEND
381; GFX950-NEXT:    s_setpc_b64 s[30:31]
382;
383; GFX10-LABEL: s_minimum_f16:
384; GFX10:       ; %bb.0:
385; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
386; GFX10-NEXT:    v_min_f16_e64 v0, s16, s17
387; GFX10-NEXT:    v_cmp_o_f16_e64 vcc_lo, s16, s17
388; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
389; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
390; GFX10-NEXT:    ;;#ASMSTART
391; GFX10-NEXT:    ; use v0
392; GFX10-NEXT:    ;;#ASMEND
393; GFX10-NEXT:    s_setpc_b64 s[30:31]
394;
395; GFX11-LABEL: s_minimum_f16:
396; GFX11:       ; %bb.0:
397; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
398; GFX11-NEXT:    v_min_f16_e64 v0, s0, s1
399; GFX11-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s1
400; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
401; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
402; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
403; GFX11-NEXT:    ;;#ASMSTART
404; GFX11-NEXT:    ; use v0
405; GFX11-NEXT:    ;;#ASMEND
406; GFX11-NEXT:    s_setpc_b64 s[30:31]
407;
408; GFX12-LABEL: s_minimum_f16:
409; GFX12:       ; %bb.0:
410; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
411; GFX12-NEXT:    s_wait_expcnt 0x0
412; GFX12-NEXT:    s_wait_samplecnt 0x0
413; GFX12-NEXT:    s_wait_bvhcnt 0x0
414; GFX12-NEXT:    s_wait_kmcnt 0x0
415; GFX12-NEXT:    s_minimum_f16 s0, s0, s1
416; GFX12-NEXT:    s_wait_alu 0xfffe
417; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_2)
418; GFX12-NEXT:    s_and_b32 s0, 0xffff, s0
419; GFX12-NEXT:    ;;#ASMSTART
420; GFX12-NEXT:    ; use s0
421; GFX12-NEXT:    ;;#ASMEND
422; GFX12-NEXT:    s_wait_alu 0xfffe
423; GFX12-NEXT:    s_setpc_b64 s[30:31]
424  %op = call half @llvm.minimum.f16(half %src0, half %src1)
425  %cast = bitcast half %op to i16
426  %zext = zext i16 %cast to i32
427  call void asm sideeffect "; use $0", "s"(i32 %zext)
428  ret void
429}
430
431define <2 x half> @v_minimum_v2f16(<2 x half> %src0, <2 x half> %src1) {
432; GFX8-LABEL: v_minimum_v2f16:
433; GFX8:       ; %bb.0:
434; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
435; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
436; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
437; GFX8-NEXT:    v_min_f16_e32 v4, v3, v2
438; GFX8-NEXT:    v_mov_b32_e32 v5, 0x7e00
439; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v3, v2
440; GFX8-NEXT:    v_cndmask_b32_e32 v2, v5, v4, vcc
441; GFX8-NEXT:    v_min_f16_e32 v3, v0, v1
442; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
443; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
444; GFX8-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
445; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
446; GFX8-NEXT:    s_setpc_b64 s[30:31]
447;
448; GFX900-LABEL: v_minimum_v2f16:
449; GFX900:       ; %bb.0:
450; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
451; GFX900-NEXT:    v_pk_min_f16 v2, v0, v1
452; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7e00
453; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
454; GFX900-NEXT:    v_cndmask_b32_e32 v4, v3, v2, vcc
455; GFX900-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
456; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
457; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
458; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
459; GFX900-NEXT:    v_perm_b32 v0, v0, v4, s4
460; GFX900-NEXT:    s_setpc_b64 s[30:31]
461;
462; GFX950-LABEL: v_minimum_v2f16:
463; GFX950:       ; %bb.0:
464; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
465; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v1, v1
466; GFX950-NEXT:    s_setpc_b64 s[30:31]
467;
468; GFX10-LABEL: v_minimum_v2f16:
469; GFX10:       ; %bb.0:
470; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
471; GFX10-NEXT:    v_pk_min_f16 v2, v0, v1
472; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
473; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
474; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v2, vcc_lo
475; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
476; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v3, vcc_lo
477; GFX10-NEXT:    v_perm_b32 v0, v0, v2, 0x5040100
478; GFX10-NEXT:    s_setpc_b64 s[30:31]
479;
480; GFX11-LABEL: v_minimum_v2f16:
481; GFX11:       ; %bb.0:
482; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
483; GFX11-NEXT:    v_pk_min_f16 v2, v0, v1
484; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
485; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
486; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
487; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
488; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
489; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
490; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v3
491; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
492; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v5, vcc_lo
493; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
494; GFX11-NEXT:    s_setpc_b64 s[30:31]
495;
496; GFX12-LABEL: v_minimum_v2f16:
497; GFX12:       ; %bb.0:
498; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
499; GFX12-NEXT:    s_wait_expcnt 0x0
500; GFX12-NEXT:    s_wait_samplecnt 0x0
501; GFX12-NEXT:    s_wait_bvhcnt 0x0
502; GFX12-NEXT:    s_wait_kmcnt 0x0
503; GFX12-NEXT:    v_pk_minimum_f16 v0, v0, v1
504; GFX12-NEXT:    s_setpc_b64 s[30:31]
505  %op = call <2 x half> @llvm.minimum.v2f16(<2 x half> %src0, <2 x half> %src1)
506  ret <2 x half> %op
507}
508
509define <2 x half> @v_minimum_v2f16__nnan(<2 x half> %src0, <2 x half> %src1) {
510; GFX8-LABEL: v_minimum_v2f16__nnan:
511; GFX8:       ; %bb.0:
512; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
513; GFX8-NEXT:    v_min_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
514; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
515; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
516; GFX8-NEXT:    s_setpc_b64 s[30:31]
517;
518; GFX900-LABEL: v_minimum_v2f16__nnan:
519; GFX900:       ; %bb.0:
520; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
521; GFX900-NEXT:    v_pk_min_f16 v0, v0, v1
522; GFX900-NEXT:    s_setpc_b64 s[30:31]
523;
524; GFX950-LABEL: v_minimum_v2f16__nnan:
525; GFX950:       ; %bb.0:
526; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
527; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v1, v1
528; GFX950-NEXT:    s_setpc_b64 s[30:31]
529;
530; GFX10-LABEL: v_minimum_v2f16__nnan:
531; GFX10:       ; %bb.0:
532; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
533; GFX10-NEXT:    v_pk_min_f16 v0, v0, v1
534; GFX10-NEXT:    s_setpc_b64 s[30:31]
535;
536; GFX11-LABEL: v_minimum_v2f16__nnan:
537; GFX11:       ; %bb.0:
538; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
539; GFX11-NEXT:    v_pk_min_f16 v0, v0, v1
540; GFX11-NEXT:    s_setpc_b64 s[30:31]
541;
542; GFX12-LABEL: v_minimum_v2f16__nnan:
543; GFX12:       ; %bb.0:
544; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
545; GFX12-NEXT:    s_wait_expcnt 0x0
546; GFX12-NEXT:    s_wait_samplecnt 0x0
547; GFX12-NEXT:    s_wait_bvhcnt 0x0
548; GFX12-NEXT:    s_wait_kmcnt 0x0
549; GFX12-NEXT:    v_pk_minimum_f16 v0, v0, v1
550; GFX12-NEXT:    s_setpc_b64 s[30:31]
551  %op = call nnan <2 x half> @llvm.minimum.v2f16(<2 x half> %src0, <2 x half> %src1)
552  ret <2 x half> %op
553}
554
555define <2 x half> @v_minimum_v2f16__nsz(<2 x half> %src0, <2 x half> %src1) {
556; GFX8-LABEL: v_minimum_v2f16__nsz:
557; GFX8:       ; %bb.0:
558; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
559; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
560; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
561; GFX8-NEXT:    v_min_f16_e32 v4, v3, v2
562; GFX8-NEXT:    v_mov_b32_e32 v5, 0x7e00
563; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v3, v2
564; GFX8-NEXT:    v_cndmask_b32_e32 v2, v5, v4, vcc
565; GFX8-NEXT:    v_min_f16_e32 v3, v0, v1
566; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
567; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
568; GFX8-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
569; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
570; GFX8-NEXT:    s_setpc_b64 s[30:31]
571;
572; GFX900-LABEL: v_minimum_v2f16__nsz:
573; GFX900:       ; %bb.0:
574; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
575; GFX900-NEXT:    v_pk_min_f16 v2, v0, v1
576; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7e00
577; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
578; GFX900-NEXT:    v_cndmask_b32_e32 v4, v3, v2, vcc
579; GFX900-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
580; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
581; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
582; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
583; GFX900-NEXT:    v_perm_b32 v0, v0, v4, s4
584; GFX900-NEXT:    s_setpc_b64 s[30:31]
585;
586; GFX950-LABEL: v_minimum_v2f16__nsz:
587; GFX950:       ; %bb.0:
588; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
589; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v1, v1
590; GFX950-NEXT:    s_setpc_b64 s[30:31]
591;
592; GFX10-LABEL: v_minimum_v2f16__nsz:
593; GFX10:       ; %bb.0:
594; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
595; GFX10-NEXT:    v_pk_min_f16 v2, v0, v1
596; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
597; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
598; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v2, vcc_lo
599; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
600; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v3, vcc_lo
601; GFX10-NEXT:    v_perm_b32 v0, v0, v2, 0x5040100
602; GFX10-NEXT:    s_setpc_b64 s[30:31]
603;
604; GFX11-LABEL: v_minimum_v2f16__nsz:
605; GFX11:       ; %bb.0:
606; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
607; GFX11-NEXT:    v_pk_min_f16 v2, v0, v1
608; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
609; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
610; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
611; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
612; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
613; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
614; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v3
615; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
616; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v5, vcc_lo
617; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
618; GFX11-NEXT:    s_setpc_b64 s[30:31]
619;
620; GFX12-LABEL: v_minimum_v2f16__nsz:
621; GFX12:       ; %bb.0:
622; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
623; GFX12-NEXT:    s_wait_expcnt 0x0
624; GFX12-NEXT:    s_wait_samplecnt 0x0
625; GFX12-NEXT:    s_wait_bvhcnt 0x0
626; GFX12-NEXT:    s_wait_kmcnt 0x0
627; GFX12-NEXT:    v_pk_minimum_f16 v0, v0, v1
628; GFX12-NEXT:    s_setpc_b64 s[30:31]
629  %op = call nsz <2 x half> @llvm.minimum.v2f16(<2 x half> %src0, <2 x half> %src1)
630  ret <2 x half> %op
631}
632
633define <2 x half> @v_minimum_v2f16__nnan_nsz(<2 x half> %src0, <2 x half> %src1) {
634; GFX8-LABEL: v_minimum_v2f16__nnan_nsz:
635; GFX8:       ; %bb.0:
636; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
637; GFX8-NEXT:    v_min_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
638; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
639; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
640; GFX8-NEXT:    s_setpc_b64 s[30:31]
641;
642; GFX900-LABEL: v_minimum_v2f16__nnan_nsz:
643; GFX900:       ; %bb.0:
644; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
645; GFX900-NEXT:    v_pk_min_f16 v0, v0, v1
646; GFX900-NEXT:    s_setpc_b64 s[30:31]
647;
648; GFX950-LABEL: v_minimum_v2f16__nnan_nsz:
649; GFX950:       ; %bb.0:
650; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
651; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v1, v1
652; GFX950-NEXT:    s_setpc_b64 s[30:31]
653;
654; GFX10-LABEL: v_minimum_v2f16__nnan_nsz:
655; GFX10:       ; %bb.0:
656; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
657; GFX10-NEXT:    v_pk_min_f16 v0, v0, v1
658; GFX10-NEXT:    s_setpc_b64 s[30:31]
659;
660; GFX11-LABEL: v_minimum_v2f16__nnan_nsz:
661; GFX11:       ; %bb.0:
662; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
663; GFX11-NEXT:    v_pk_min_f16 v0, v0, v1
664; GFX11-NEXT:    s_setpc_b64 s[30:31]
665;
666; GFX12-LABEL: v_minimum_v2f16__nnan_nsz:
667; GFX12:       ; %bb.0:
668; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
669; GFX12-NEXT:    s_wait_expcnt 0x0
670; GFX12-NEXT:    s_wait_samplecnt 0x0
671; GFX12-NEXT:    s_wait_bvhcnt 0x0
672; GFX12-NEXT:    s_wait_kmcnt 0x0
673; GFX12-NEXT:    v_pk_minimum_f16 v0, v0, v1
674; GFX12-NEXT:    s_setpc_b64 s[30:31]
675  %op = call nnan nsz <2 x half> @llvm.minimum.v2f16(<2 x half> %src0, <2 x half> %src1)
676  ret <2 x half> %op
677}
678
679define void @s_minimum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
680; GFX8-LABEL: s_minimum_v2f16:
681; GFX8:       ; %bb.0:
682; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
683; GFX8-NEXT:    s_lshr_b32 s4, s17, 16
684; GFX8-NEXT:    s_lshr_b32 s5, s16, 16
685; GFX8-NEXT:    v_mov_b32_e32 v0, s4
686; GFX8-NEXT:    v_min_f16_e32 v1, s5, v0
687; GFX8-NEXT:    v_mov_b32_e32 v2, 0x7e00
688; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, s5, v0
689; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
690; GFX8-NEXT:    v_mov_b32_e32 v1, s17
691; GFX8-NEXT:    v_min_f16_e32 v3, s16, v1
692; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, s16, v1
693; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
694; GFX8-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
695; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
696; GFX8-NEXT:    ;;#ASMSTART
697; GFX8-NEXT:    ; use v0
698; GFX8-NEXT:    ;;#ASMEND
699; GFX8-NEXT:    s_setpc_b64 s[30:31]
700;
701; GFX900-LABEL: s_minimum_v2f16:
702; GFX900:       ; %bb.0:
703; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
704; GFX900-NEXT:    v_mov_b32_e32 v0, s17
705; GFX900-NEXT:    v_mov_b32_e32 v1, s17
706; GFX900-NEXT:    s_lshr_b32 s4, s17, 16
707; GFX900-NEXT:    v_pk_min_f16 v1, s16, v1
708; GFX900-NEXT:    v_mov_b32_e32 v2, 0x7e00
709; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, s16, v0
710; GFX900-NEXT:    s_lshr_b32 s5, s16, 16
711; GFX900-NEXT:    v_mov_b32_e32 v3, s4
712; GFX900-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
713; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
714; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, s5, v3
715; GFX900-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
716; GFX900-NEXT:    v_and_b32_e32 v0, 0xffff, v0
717; GFX900-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
718; GFX900-NEXT:    ;;#ASMSTART
719; GFX900-NEXT:    ; use v0
720; GFX900-NEXT:    ;;#ASMEND
721; GFX900-NEXT:    s_setpc_b64 s[30:31]
722;
723; GFX950-LABEL: s_minimum_v2f16:
724; GFX950:       ; %bb.0:
725; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
726; GFX950-NEXT:    v_mov_b32_e32 v0, s0
727; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, s1, s1
728; GFX950-NEXT:    s_nop 0
729; GFX950-NEXT:    ;;#ASMSTART
730; GFX950-NEXT:    ; use v0
731; GFX950-NEXT:    ;;#ASMEND
732; GFX950-NEXT:    s_setpc_b64 s[30:31]
733;
734; GFX10-LABEL: s_minimum_v2f16:
735; GFX10:       ; %bb.0:
736; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
737; GFX10-NEXT:    v_pk_min_f16 v0, s16, s17
738; GFX10-NEXT:    v_cmp_o_f16_e64 vcc_lo, s16, s17
739; GFX10-NEXT:    s_lshr_b32 s4, s17, 16
740; GFX10-NEXT:    s_lshr_b32 s5, s16, 16
741; GFX10-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
742; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
743; GFX10-NEXT:    v_cmp_o_f16_e64 vcc_lo, s5, s4
744; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
745; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v1, vcc_lo
746; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
747; GFX10-NEXT:    ;;#ASMSTART
748; GFX10-NEXT:    ; use v0
749; GFX10-NEXT:    ;;#ASMEND
750; GFX10-NEXT:    s_setpc_b64 s[30:31]
751;
752; GFX11-LABEL: s_minimum_v2f16:
753; GFX11:       ; %bb.0:
754; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
755; GFX11-NEXT:    v_pk_min_f16 v0, s0, s1
756; GFX11-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s1
757; GFX11-NEXT:    s_lshr_b32 s2, s1, 16
758; GFX11-NEXT:    s_lshr_b32 s0, s0, 16
759; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
760; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
761; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
762; GFX11-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s2
763; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
764; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
765; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v1, vcc_lo
766; GFX11-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
767; GFX11-NEXT:    ;;#ASMSTART
768; GFX11-NEXT:    ; use v0
769; GFX11-NEXT:    ;;#ASMEND
770; GFX11-NEXT:    s_setpc_b64 s[30:31]
771;
772; GFX12-LABEL: s_minimum_v2f16:
773; GFX12:       ; %bb.0:
774; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
775; GFX12-NEXT:    s_wait_expcnt 0x0
776; GFX12-NEXT:    s_wait_samplecnt 0x0
777; GFX12-NEXT:    s_wait_bvhcnt 0x0
778; GFX12-NEXT:    s_wait_kmcnt 0x0
779; GFX12-NEXT:    v_pk_minimum_f16 v0, s0, s1
780; GFX12-NEXT:    ;;#ASMSTART
781; GFX12-NEXT:    ; use v0
782; GFX12-NEXT:    ;;#ASMEND
783; GFX12-NEXT:    s_setpc_b64 s[30:31]
784  %op = call <2 x half> @llvm.minimum.v2f16(<2 x half> %src0, <2 x half> %src1)
785  %cast = bitcast <2 x half> %op to i32
786  call void asm sideeffect "; use $0", "s"(i32 %cast)
787  ret void
788}
789
790define <3 x half> @v_minimum_v3f16(<3 x half> %src0, <3 x half> %src1) {
791; GFX8-LABEL: v_minimum_v3f16:
792; GFX8:       ; %bb.0:
793; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
794; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
795; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
796; GFX8-NEXT:    v_min_f16_e32 v6, v5, v4
797; GFX8-NEXT:    v_mov_b32_e32 v7, 0x7e00
798; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v5, v4
799; GFX8-NEXT:    v_cndmask_b32_e32 v4, v7, v6, vcc
800; GFX8-NEXT:    v_min_f16_e32 v5, v1, v3
801; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
802; GFX8-NEXT:    v_cndmask_b32_e32 v1, v7, v5, vcc
803; GFX8-NEXT:    v_min_f16_e32 v3, v0, v2
804; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
805; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
806; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v4
807; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
808; GFX8-NEXT:    s_setpc_b64 s[30:31]
809;
810; GFX900-LABEL: v_minimum_v3f16:
811; GFX900:       ; %bb.0:
812; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
813; GFX900-NEXT:    v_pk_min_f16 v4, v1, v3
814; GFX900-NEXT:    v_mov_b32_e32 v5, 0x7e00
815; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
816; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
817; GFX900-NEXT:    v_pk_min_f16 v3, v0, v2
818; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
819; GFX900-NEXT:    v_cndmask_b32_e32 v4, v5, v3, vcc
820; GFX900-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
821; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
822; GFX900-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
823; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
824; GFX900-NEXT:    v_perm_b32 v0, v0, v4, s4
825; GFX900-NEXT:    s_setpc_b64 s[30:31]
826;
827; GFX950-LABEL: v_minimum_v3f16:
828; GFX950:       ; %bb.0:
829; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
830; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v3, v3
831; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v2, v2
832; GFX950-NEXT:    s_setpc_b64 s[30:31]
833;
834; GFX10-LABEL: v_minimum_v3f16:
835; GFX10:       ; %bb.0:
836; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
837; GFX10-NEXT:    v_pk_min_f16 v4, v0, v2
838; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
839; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
840; GFX10-NEXT:    v_cndmask_b32_e32 v4, 0x7e00, v4, vcc_lo
841; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
842; GFX10-NEXT:    v_pk_min_f16 v2, v1, v3
843; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v5, vcc_lo
844; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
845; GFX10-NEXT:    v_perm_b32 v0, v0, v4, 0x5040100
846; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v2, vcc_lo
847; GFX10-NEXT:    s_setpc_b64 s[30:31]
848;
849; GFX11-LABEL: v_minimum_v3f16:
850; GFX11:       ; %bb.0:
851; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
852; GFX11-NEXT:    v_pk_min_f16 v4, v0, v2
853; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
854; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
855; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
856; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
857; GFX11-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
858; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v4, vcc_lo
859; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
860; GFX11-NEXT:    v_pk_min_f16 v4, v1, v3
861; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
862; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v7, vcc_lo
863; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
864; GFX11-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
865; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
866; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
867; GFX11-NEXT:    s_setpc_b64 s[30:31]
868;
869; GFX12-LABEL: v_minimum_v3f16:
870; GFX12:       ; %bb.0:
871; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
872; GFX12-NEXT:    s_wait_expcnt 0x0
873; GFX12-NEXT:    s_wait_samplecnt 0x0
874; GFX12-NEXT:    s_wait_bvhcnt 0x0
875; GFX12-NEXT:    s_wait_kmcnt 0x0
876; GFX12-NEXT:    v_pk_minimum_f16 v0, v0, v2
877; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, v3
878; GFX12-NEXT:    s_setpc_b64 s[30:31]
879  %op = call <3 x half> @llvm.minimum.v3f16(<3 x half> %src0, <3 x half> %src1)
880  ret <3 x half> %op
881}
882
883define <3 x half> @v_minimum_v3f16__nnan(<3 x half> %src0, <3 x half> %src1) {
884; GFX8-LABEL: v_minimum_v3f16__nnan:
885; GFX8:       ; %bb.0:
886; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
887; GFX8-NEXT:    v_min_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
888; GFX8-NEXT:    v_min_f16_e32 v0, v0, v2
889; GFX8-NEXT:    v_min_f16_e32 v1, v1, v3
890; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
891; GFX8-NEXT:    s_setpc_b64 s[30:31]
892;
893; GFX900-LABEL: v_minimum_v3f16__nnan:
894; GFX900:       ; %bb.0:
895; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
896; GFX900-NEXT:    v_pk_min_f16 v0, v0, v2
897; GFX900-NEXT:    v_pk_min_f16 v1, v1, v3
898; GFX900-NEXT:    s_setpc_b64 s[30:31]
899;
900; GFX950-LABEL: v_minimum_v3f16__nnan:
901; GFX950:       ; %bb.0:
902; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
903; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v3, v3
904; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v2, v2
905; GFX950-NEXT:    s_setpc_b64 s[30:31]
906;
907; GFX10-LABEL: v_minimum_v3f16__nnan:
908; GFX10:       ; %bb.0:
909; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
910; GFX10-NEXT:    v_pk_min_f16 v0, v0, v2
911; GFX10-NEXT:    v_pk_min_f16 v1, v1, v3
912; GFX10-NEXT:    s_setpc_b64 s[30:31]
913;
914; GFX11-LABEL: v_minimum_v3f16__nnan:
915; GFX11:       ; %bb.0:
916; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
917; GFX11-NEXT:    v_pk_min_f16 v0, v0, v2
918; GFX11-NEXT:    v_pk_min_f16 v1, v1, v3
919; GFX11-NEXT:    s_setpc_b64 s[30:31]
920;
921; GFX12-LABEL: v_minimum_v3f16__nnan:
922; GFX12:       ; %bb.0:
923; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
924; GFX12-NEXT:    s_wait_expcnt 0x0
925; GFX12-NEXT:    s_wait_samplecnt 0x0
926; GFX12-NEXT:    s_wait_bvhcnt 0x0
927; GFX12-NEXT:    s_wait_kmcnt 0x0
928; GFX12-NEXT:    v_pk_minimum_f16 v0, v0, v2
929; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, v3
930; GFX12-NEXT:    s_setpc_b64 s[30:31]
931  %op = call nnan <3 x half> @llvm.minimum.v3f16(<3 x half> %src0, <3 x half> %src1)
932  ret <3 x half> %op
933}
934
935define <3 x half> @v_minimum_v3f16__nsz(<3 x half> %src0, <3 x half> %src1) {
936; GFX8-LABEL: v_minimum_v3f16__nsz:
937; GFX8:       ; %bb.0:
938; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
939; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
940; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
941; GFX8-NEXT:    v_min_f16_e32 v6, v5, v4
942; GFX8-NEXT:    v_mov_b32_e32 v7, 0x7e00
943; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v5, v4
944; GFX8-NEXT:    v_cndmask_b32_e32 v4, v7, v6, vcc
945; GFX8-NEXT:    v_min_f16_e32 v5, v1, v3
946; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
947; GFX8-NEXT:    v_cndmask_b32_e32 v1, v7, v5, vcc
948; GFX8-NEXT:    v_min_f16_e32 v3, v0, v2
949; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
950; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
951; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v4
952; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
953; GFX8-NEXT:    s_setpc_b64 s[30:31]
954;
955; GFX900-LABEL: v_minimum_v3f16__nsz:
956; GFX900:       ; %bb.0:
957; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
958; GFX900-NEXT:    v_pk_min_f16 v4, v1, v3
959; GFX900-NEXT:    v_mov_b32_e32 v5, 0x7e00
960; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
961; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
962; GFX900-NEXT:    v_pk_min_f16 v3, v0, v2
963; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
964; GFX900-NEXT:    v_cndmask_b32_e32 v4, v5, v3, vcc
965; GFX900-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
966; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
967; GFX900-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
968; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
969; GFX900-NEXT:    v_perm_b32 v0, v0, v4, s4
970; GFX900-NEXT:    s_setpc_b64 s[30:31]
971;
972; GFX950-LABEL: v_minimum_v3f16__nsz:
973; GFX950:       ; %bb.0:
974; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
975; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v3, v3
976; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v2, v2
977; GFX950-NEXT:    s_setpc_b64 s[30:31]
978;
979; GFX10-LABEL: v_minimum_v3f16__nsz:
980; GFX10:       ; %bb.0:
981; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
982; GFX10-NEXT:    v_pk_min_f16 v4, v0, v2
983; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
984; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
985; GFX10-NEXT:    v_cndmask_b32_e32 v4, 0x7e00, v4, vcc_lo
986; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
987; GFX10-NEXT:    v_pk_min_f16 v2, v1, v3
988; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v5, vcc_lo
989; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
990; GFX10-NEXT:    v_perm_b32 v0, v0, v4, 0x5040100
991; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v2, vcc_lo
992; GFX10-NEXT:    s_setpc_b64 s[30:31]
993;
994; GFX11-LABEL: v_minimum_v3f16__nsz:
995; GFX11:       ; %bb.0:
996; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
997; GFX11-NEXT:    v_pk_min_f16 v4, v0, v2
998; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
999; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
1000; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
1001; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
1002; GFX11-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
1003; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v4, vcc_lo
1004; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
1005; GFX11-NEXT:    v_pk_min_f16 v4, v1, v3
1006; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1007; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v7, vcc_lo
1008; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
1009; GFX11-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
1010; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
1011; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
1012; GFX11-NEXT:    s_setpc_b64 s[30:31]
1013;
1014; GFX12-LABEL: v_minimum_v3f16__nsz:
1015; GFX12:       ; %bb.0:
1016; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1017; GFX12-NEXT:    s_wait_expcnt 0x0
1018; GFX12-NEXT:    s_wait_samplecnt 0x0
1019; GFX12-NEXT:    s_wait_bvhcnt 0x0
1020; GFX12-NEXT:    s_wait_kmcnt 0x0
1021; GFX12-NEXT:    v_pk_minimum_f16 v0, v0, v2
1022; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, v3
1023; GFX12-NEXT:    s_setpc_b64 s[30:31]
1024  %op = call nsz <3 x half> @llvm.minimum.v3f16(<3 x half> %src0, <3 x half> %src1)
1025  ret <3 x half> %op
1026}
1027
1028define <3 x half> @v_minimum_v3f16__nnan_nsz(<3 x half> %src0, <3 x half> %src1) {
1029; GFX8-LABEL: v_minimum_v3f16__nnan_nsz:
1030; GFX8:       ; %bb.0:
1031; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1032; GFX8-NEXT:    v_min_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1033; GFX8-NEXT:    v_min_f16_e32 v0, v0, v2
1034; GFX8-NEXT:    v_min_f16_e32 v1, v1, v3
1035; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
1036; GFX8-NEXT:    s_setpc_b64 s[30:31]
1037;
1038; GFX900-LABEL: v_minimum_v3f16__nnan_nsz:
1039; GFX900:       ; %bb.0:
1040; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1041; GFX900-NEXT:    v_pk_min_f16 v0, v0, v2
1042; GFX900-NEXT:    v_pk_min_f16 v1, v1, v3
1043; GFX900-NEXT:    s_setpc_b64 s[30:31]
1044;
1045; GFX950-LABEL: v_minimum_v3f16__nnan_nsz:
1046; GFX950:       ; %bb.0:
1047; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1048; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v3, v3
1049; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v2, v2
1050; GFX950-NEXT:    s_setpc_b64 s[30:31]
1051;
1052; GFX10-LABEL: v_minimum_v3f16__nnan_nsz:
1053; GFX10:       ; %bb.0:
1054; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1055; GFX10-NEXT:    v_pk_min_f16 v0, v0, v2
1056; GFX10-NEXT:    v_pk_min_f16 v1, v1, v3
1057; GFX10-NEXT:    s_setpc_b64 s[30:31]
1058;
1059; GFX11-LABEL: v_minimum_v3f16__nnan_nsz:
1060; GFX11:       ; %bb.0:
1061; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1062; GFX11-NEXT:    v_pk_min_f16 v0, v0, v2
1063; GFX11-NEXT:    v_pk_min_f16 v1, v1, v3
1064; GFX11-NEXT:    s_setpc_b64 s[30:31]
1065;
1066; GFX12-LABEL: v_minimum_v3f16__nnan_nsz:
1067; GFX12:       ; %bb.0:
1068; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1069; GFX12-NEXT:    s_wait_expcnt 0x0
1070; GFX12-NEXT:    s_wait_samplecnt 0x0
1071; GFX12-NEXT:    s_wait_bvhcnt 0x0
1072; GFX12-NEXT:    s_wait_kmcnt 0x0
1073; GFX12-NEXT:    v_pk_minimum_f16 v0, v0, v2
1074; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, v3
1075; GFX12-NEXT:    s_setpc_b64 s[30:31]
1076  %op = call nnan nsz <3 x half> @llvm.minimum.v3f16(<3 x half> %src0, <3 x half> %src1)
1077  ret <3 x half> %op
1078}
1079
1080define <4 x half> @v_minimum_v4f16(<4 x half> %src0, <4 x half> %src1) {
1081; GFX8-LABEL: v_minimum_v4f16:
1082; GFX8:       ; %bb.0:
1083; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1084; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
1085; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
1086; GFX8-NEXT:    v_min_f16_e32 v6, v5, v4
1087; GFX8-NEXT:    v_mov_b32_e32 v7, 0x7e00
1088; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v5, v4
1089; GFX8-NEXT:    v_cndmask_b32_e32 v4, v7, v6, vcc
1090; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
1091; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
1092; GFX8-NEXT:    v_min_f16_e32 v8, v6, v5
1093; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v6, v5
1094; GFX8-NEXT:    v_cndmask_b32_e32 v5, v7, v8, vcc
1095; GFX8-NEXT:    v_min_f16_e32 v6, v1, v3
1096; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
1097; GFX8-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
1098; GFX8-NEXT:    v_min_f16_e32 v3, v0, v2
1099; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
1100; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
1101; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v5
1102; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1103; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v4
1104; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1105; GFX8-NEXT:    s_setpc_b64 s[30:31]
1106;
1107; GFX900-LABEL: v_minimum_v4f16:
1108; GFX900:       ; %bb.0:
1109; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1110; GFX900-NEXT:    v_pk_min_f16 v4, v1, v3
1111; GFX900-NEXT:    v_mov_b32_e32 v5, 0x7e00
1112; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
1113; GFX900-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
1114; GFX900-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
1115; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
1116; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
1117; GFX900-NEXT:    v_pk_min_f16 v3, v0, v2
1118; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
1119; GFX900-NEXT:    v_cndmask_b32_e32 v4, v5, v3, vcc
1120; GFX900-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
1121; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
1122; GFX900-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
1123; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1124; GFX900-NEXT:    v_perm_b32 v0, v0, v4, s4
1125; GFX900-NEXT:    v_perm_b32 v1, v1, v6, s4
1126; GFX900-NEXT:    s_setpc_b64 s[30:31]
1127;
1128; GFX950-LABEL: v_minimum_v4f16:
1129; GFX950:       ; %bb.0:
1130; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1131; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v2, v2
1132; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v3, v3
1133; GFX950-NEXT:    s_setpc_b64 s[30:31]
1134;
1135; GFX10-LABEL: v_minimum_v4f16:
1136; GFX10:       ; %bb.0:
1137; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1138; GFX10-NEXT:    v_pk_min_f16 v4, v1, v3
1139; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
1140; GFX10-NEXT:    v_pk_min_f16 v5, v0, v2
1141; GFX10-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v4, vcc_lo
1142; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
1143; GFX10-NEXT:    v_lshrrev_b32_e32 v7, 16, v5
1144; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
1145; GFX10-NEXT:    v_cndmask_b32_e32 v5, 0x7e00, v5, vcc_lo
1146; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
1147; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo
1148; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
1149; GFX10-NEXT:    v_perm_b32 v0, v0, v5, 0x5040100
1150; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
1151; GFX10-NEXT:    v_perm_b32 v1, v1, v6, 0x5040100
1152; GFX10-NEXT:    s_setpc_b64 s[30:31]
1153;
1154; GFX11-LABEL: v_minimum_v4f16:
1155; GFX11:       ; %bb.0:
1156; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1157; GFX11-NEXT:    v_pk_min_f16 v4, v1, v3
1158; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
1159; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
1160; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
1161; GFX11-NEXT:    v_pk_min_f16 v7, v0, v2
1162; GFX11-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
1163; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
1164; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
1165; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
1166; GFX11-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
1167; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
1168; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo
1169; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v8
1170; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1171; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v9, vcc_lo
1172; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
1173; GFX11-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
1174; GFX11-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v4, vcc_lo
1175; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1176; GFX11-NEXT:    v_perm_b32 v1, v3, v1, 0x5040100
1177; GFX11-NEXT:    s_setpc_b64 s[30:31]
1178;
1179; GFX12-LABEL: v_minimum_v4f16:
1180; GFX12:       ; %bb.0:
1181; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1182; GFX12-NEXT:    s_wait_expcnt 0x0
1183; GFX12-NEXT:    s_wait_samplecnt 0x0
1184; GFX12-NEXT:    s_wait_bvhcnt 0x0
1185; GFX12-NEXT:    s_wait_kmcnt 0x0
1186; GFX12-NEXT:    v_pk_minimum_f16 v0, v0, v2
1187; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, v3
1188; GFX12-NEXT:    s_setpc_b64 s[30:31]
1189  %op = call <4 x half> @llvm.minimum.v4f16(<4 x half> %src0, <4 x half> %src1)
1190  ret <4 x half> %op
1191}
1192
1193define <4 x half> @v_minimum_v4f16__nnan(<4 x half> %src0, <4 x half> %src1) {
1194; GFX8-LABEL: v_minimum_v4f16__nnan:
1195; GFX8:       ; %bb.0:
1196; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1197; GFX8-NEXT:    v_min_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1198; GFX8-NEXT:    v_min_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1199; GFX8-NEXT:    v_min_f16_e32 v1, v1, v3
1200; GFX8-NEXT:    v_min_f16_e32 v0, v0, v2
1201; GFX8-NEXT:    v_or_b32_e32 v0, v0, v5
1202; GFX8-NEXT:    v_or_b32_e32 v1, v1, v4
1203; GFX8-NEXT:    s_setpc_b64 s[30:31]
1204;
1205; GFX900-LABEL: v_minimum_v4f16__nnan:
1206; GFX900:       ; %bb.0:
1207; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1208; GFX900-NEXT:    v_pk_min_f16 v0, v0, v2
1209; GFX900-NEXT:    v_pk_min_f16 v1, v1, v3
1210; GFX900-NEXT:    s_setpc_b64 s[30:31]
1211;
1212; GFX950-LABEL: v_minimum_v4f16__nnan:
1213; GFX950:       ; %bb.0:
1214; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1215; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v2, v2
1216; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v3, v3
1217; GFX950-NEXT:    s_setpc_b64 s[30:31]
1218;
1219; GFX10-LABEL: v_minimum_v4f16__nnan:
1220; GFX10:       ; %bb.0:
1221; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1222; GFX10-NEXT:    v_pk_min_f16 v0, v0, v2
1223; GFX10-NEXT:    v_pk_min_f16 v1, v1, v3
1224; GFX10-NEXT:    s_setpc_b64 s[30:31]
1225;
1226; GFX11-LABEL: v_minimum_v4f16__nnan:
1227; GFX11:       ; %bb.0:
1228; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1229; GFX11-NEXT:    v_pk_min_f16 v0, v0, v2
1230; GFX11-NEXT:    v_pk_min_f16 v1, v1, v3
1231; GFX11-NEXT:    s_setpc_b64 s[30:31]
1232;
1233; GFX12-LABEL: v_minimum_v4f16__nnan:
1234; GFX12:       ; %bb.0:
1235; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1236; GFX12-NEXT:    s_wait_expcnt 0x0
1237; GFX12-NEXT:    s_wait_samplecnt 0x0
1238; GFX12-NEXT:    s_wait_bvhcnt 0x0
1239; GFX12-NEXT:    s_wait_kmcnt 0x0
1240; GFX12-NEXT:    v_pk_minimum_f16 v0, v0, v2
1241; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, v3
1242; GFX12-NEXT:    s_setpc_b64 s[30:31]
1243  %op = call nnan <4 x half> @llvm.minimum.v4f16(<4 x half> %src0, <4 x half> %src1)
1244  ret <4 x half> %op
1245}
1246
1247define <4 x half> @v_minimum_v4f16__nsz(<4 x half> %src0, <4 x half> %src1) {
1248; GFX8-LABEL: v_minimum_v4f16__nsz:
1249; GFX8:       ; %bb.0:
1250; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1251; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
1252; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
1253; GFX8-NEXT:    v_min_f16_e32 v6, v5, v4
1254; GFX8-NEXT:    v_mov_b32_e32 v7, 0x7e00
1255; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v5, v4
1256; GFX8-NEXT:    v_cndmask_b32_e32 v4, v7, v6, vcc
1257; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
1258; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
1259; GFX8-NEXT:    v_min_f16_e32 v8, v6, v5
1260; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v6, v5
1261; GFX8-NEXT:    v_cndmask_b32_e32 v5, v7, v8, vcc
1262; GFX8-NEXT:    v_min_f16_e32 v6, v1, v3
1263; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
1264; GFX8-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc
1265; GFX8-NEXT:    v_min_f16_e32 v3, v0, v2
1266; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
1267; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
1268; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v5
1269; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1270; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v4
1271; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1272; GFX8-NEXT:    s_setpc_b64 s[30:31]
1273;
1274; GFX900-LABEL: v_minimum_v4f16__nsz:
1275; GFX900:       ; %bb.0:
1276; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1277; GFX900-NEXT:    v_pk_min_f16 v4, v1, v3
1278; GFX900-NEXT:    v_mov_b32_e32 v5, 0x7e00
1279; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
1280; GFX900-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
1281; GFX900-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
1282; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
1283; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
1284; GFX900-NEXT:    v_pk_min_f16 v3, v0, v2
1285; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
1286; GFX900-NEXT:    v_cndmask_b32_e32 v4, v5, v3, vcc
1287; GFX900-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
1288; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
1289; GFX900-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
1290; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1291; GFX900-NEXT:    v_perm_b32 v0, v0, v4, s4
1292; GFX900-NEXT:    v_perm_b32 v1, v1, v6, s4
1293; GFX900-NEXT:    s_setpc_b64 s[30:31]
1294;
1295; GFX950-LABEL: v_minimum_v4f16__nsz:
1296; GFX950:       ; %bb.0:
1297; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1298; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v2, v2
1299; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v3, v3
1300; GFX950-NEXT:    s_setpc_b64 s[30:31]
1301;
1302; GFX10-LABEL: v_minimum_v4f16__nsz:
1303; GFX10:       ; %bb.0:
1304; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1305; GFX10-NEXT:    v_pk_min_f16 v4, v1, v3
1306; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
1307; GFX10-NEXT:    v_pk_min_f16 v5, v0, v2
1308; GFX10-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v4, vcc_lo
1309; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
1310; GFX10-NEXT:    v_lshrrev_b32_e32 v7, 16, v5
1311; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
1312; GFX10-NEXT:    v_cndmask_b32_e32 v5, 0x7e00, v5, vcc_lo
1313; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
1314; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo
1315; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
1316; GFX10-NEXT:    v_perm_b32 v0, v0, v5, 0x5040100
1317; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
1318; GFX10-NEXT:    v_perm_b32 v1, v1, v6, 0x5040100
1319; GFX10-NEXT:    s_setpc_b64 s[30:31]
1320;
1321; GFX11-LABEL: v_minimum_v4f16__nsz:
1322; GFX11:       ; %bb.0:
1323; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1324; GFX11-NEXT:    v_pk_min_f16 v4, v1, v3
1325; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
1326; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
1327; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
1328; GFX11-NEXT:    v_pk_min_f16 v7, v0, v2
1329; GFX11-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
1330; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
1331; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
1332; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
1333; GFX11-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
1334; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
1335; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo
1336; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v8
1337; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1338; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v9, vcc_lo
1339; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
1340; GFX11-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
1341; GFX11-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v4, vcc_lo
1342; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1343; GFX11-NEXT:    v_perm_b32 v1, v3, v1, 0x5040100
1344; GFX11-NEXT:    s_setpc_b64 s[30:31]
1345;
1346; GFX12-LABEL: v_minimum_v4f16__nsz:
1347; GFX12:       ; %bb.0:
1348; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1349; GFX12-NEXT:    s_wait_expcnt 0x0
1350; GFX12-NEXT:    s_wait_samplecnt 0x0
1351; GFX12-NEXT:    s_wait_bvhcnt 0x0
1352; GFX12-NEXT:    s_wait_kmcnt 0x0
1353; GFX12-NEXT:    v_pk_minimum_f16 v0, v0, v2
1354; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, v3
1355; GFX12-NEXT:    s_setpc_b64 s[30:31]
1356  %op = call nsz <4 x half> @llvm.minimum.v4f16(<4 x half> %src0, <4 x half> %src1)
1357  ret <4 x half> %op
1358}
1359
1360define <4 x half> @v_minimum_v4f16__nnan_nsz(<4 x half> %src0, <4 x half> %src1) {
1361; GFX8-LABEL: v_minimum_v4f16__nnan_nsz:
1362; GFX8:       ; %bb.0:
1363; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1364; GFX8-NEXT:    v_min_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1365; GFX8-NEXT:    v_min_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1366; GFX8-NEXT:    v_min_f16_e32 v1, v1, v3
1367; GFX8-NEXT:    v_min_f16_e32 v0, v0, v2
1368; GFX8-NEXT:    v_or_b32_e32 v0, v0, v5
1369; GFX8-NEXT:    v_or_b32_e32 v1, v1, v4
1370; GFX8-NEXT:    s_setpc_b64 s[30:31]
1371;
1372; GFX900-LABEL: v_minimum_v4f16__nnan_nsz:
1373; GFX900:       ; %bb.0:
1374; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1375; GFX900-NEXT:    v_pk_min_f16 v0, v0, v2
1376; GFX900-NEXT:    v_pk_min_f16 v1, v1, v3
1377; GFX900-NEXT:    s_setpc_b64 s[30:31]
1378;
1379; GFX950-LABEL: v_minimum_v4f16__nnan_nsz:
1380; GFX950:       ; %bb.0:
1381; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1382; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v2, v2
1383; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v3, v3
1384; GFX950-NEXT:    s_setpc_b64 s[30:31]
1385;
1386; GFX10-LABEL: v_minimum_v4f16__nnan_nsz:
1387; GFX10:       ; %bb.0:
1388; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1389; GFX10-NEXT:    v_pk_min_f16 v0, v0, v2
1390; GFX10-NEXT:    v_pk_min_f16 v1, v1, v3
1391; GFX10-NEXT:    s_setpc_b64 s[30:31]
1392;
1393; GFX11-LABEL: v_minimum_v4f16__nnan_nsz:
1394; GFX11:       ; %bb.0:
1395; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1396; GFX11-NEXT:    v_pk_min_f16 v0, v0, v2
1397; GFX11-NEXT:    v_pk_min_f16 v1, v1, v3
1398; GFX11-NEXT:    s_setpc_b64 s[30:31]
1399;
1400; GFX12-LABEL: v_minimum_v4f16__nnan_nsz:
1401; GFX12:       ; %bb.0:
1402; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1403; GFX12-NEXT:    s_wait_expcnt 0x0
1404; GFX12-NEXT:    s_wait_samplecnt 0x0
1405; GFX12-NEXT:    s_wait_bvhcnt 0x0
1406; GFX12-NEXT:    s_wait_kmcnt 0x0
1407; GFX12-NEXT:    v_pk_minimum_f16 v0, v0, v2
1408; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, v3
1409; GFX12-NEXT:    s_setpc_b64 s[30:31]
1410  %op = call nnan nsz <4 x half> @llvm.minimum.v4f16(<4 x half> %src0, <4 x half> %src1)
1411  ret <4 x half> %op
1412}
1413
1414define <8 x half> @v_minimum_v8f16(<8 x half> %src0, <8 x half> %src1) {
1415; GFX8-LABEL: v_minimum_v8f16:
1416; GFX8:       ; %bb.0:
1417; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1418; GFX8-NEXT:    v_lshrrev_b32_e32 v8, 16, v7
1419; GFX8-NEXT:    v_lshrrev_b32_e32 v9, 16, v3
1420; GFX8-NEXT:    v_min_f16_e32 v10, v9, v8
1421; GFX8-NEXT:    v_mov_b32_e32 v11, 0x7e00
1422; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v9, v8
1423; GFX8-NEXT:    v_cndmask_b32_e32 v8, v11, v10, vcc
1424; GFX8-NEXT:    v_lshrrev_b32_e32 v9, 16, v6
1425; GFX8-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
1426; GFX8-NEXT:    v_min_f16_e32 v12, v10, v9
1427; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v10, v9
1428; GFX8-NEXT:    v_cndmask_b32_e32 v9, v11, v12, vcc
1429; GFX8-NEXT:    v_lshrrev_b32_e32 v10, 16, v5
1430; GFX8-NEXT:    v_lshrrev_b32_e32 v12, 16, v1
1431; GFX8-NEXT:    v_min_f16_e32 v13, v12, v10
1432; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v12, v10
1433; GFX8-NEXT:    v_cndmask_b32_e32 v10, v11, v13, vcc
1434; GFX8-NEXT:    v_lshrrev_b32_e32 v12, 16, v4
1435; GFX8-NEXT:    v_lshrrev_b32_e32 v13, 16, v0
1436; GFX8-NEXT:    v_min_f16_e32 v14, v13, v12
1437; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v13, v12
1438; GFX8-NEXT:    v_cndmask_b32_e32 v12, v11, v14, vcc
1439; GFX8-NEXT:    v_min_f16_e32 v13, v3, v7
1440; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v3, v7
1441; GFX8-NEXT:    v_cndmask_b32_e32 v3, v11, v13, vcc
1442; GFX8-NEXT:    v_min_f16_e32 v7, v2, v6
1443; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v2, v6
1444; GFX8-NEXT:    v_cndmask_b32_e32 v2, v11, v7, vcc
1445; GFX8-NEXT:    v_min_f16_e32 v6, v1, v5
1446; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v1, v5
1447; GFX8-NEXT:    v_cndmask_b32_e32 v1, v11, v6, vcc
1448; GFX8-NEXT:    v_min_f16_e32 v5, v0, v4
1449; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v4
1450; GFX8-NEXT:    v_cndmask_b32_e32 v0, v11, v5, vcc
1451; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 16, v12
1452; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1453; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 16, v10
1454; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1455; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 16, v9
1456; GFX8-NEXT:    v_or_b32_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1457; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 16, v8
1458; GFX8-NEXT:    v_or_b32_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1459; GFX8-NEXT:    s_setpc_b64 s[30:31]
1460;
1461; GFX900-LABEL: v_minimum_v8f16:
1462; GFX900:       ; %bb.0:
1463; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1464; GFX900-NEXT:    v_pk_min_f16 v8, v3, v7
1465; GFX900-NEXT:    v_mov_b32_e32 v9, 0x7e00
1466; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v3, v7
1467; GFX900-NEXT:    v_cndmask_b32_e32 v10, v9, v8, vcc
1468; GFX900-NEXT:    v_lshrrev_b32_e32 v8, 16, v8
1469; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
1470; GFX900-NEXT:    v_cndmask_b32_e32 v3, v9, v8, vcc
1471; GFX900-NEXT:    v_pk_min_f16 v7, v2, v6
1472; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v2, v6
1473; GFX900-NEXT:    v_cndmask_b32_e32 v8, v9, v7, vcc
1474; GFX900-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
1475; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
1476; GFX900-NEXT:    v_cndmask_b32_e32 v2, v9, v7, vcc
1477; GFX900-NEXT:    v_pk_min_f16 v6, v1, v5
1478; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v1, v5
1479; GFX900-NEXT:    v_cndmask_b32_e32 v7, v9, v6, vcc
1480; GFX900-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
1481; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
1482; GFX900-NEXT:    v_cndmask_b32_e32 v1, v9, v6, vcc
1483; GFX900-NEXT:    v_pk_min_f16 v5, v0, v4
1484; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v4
1485; GFX900-NEXT:    v_cndmask_b32_e32 v6, v9, v5, vcc
1486; GFX900-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
1487; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
1488; GFX900-NEXT:    v_cndmask_b32_e32 v0, v9, v5, vcc
1489; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1490; GFX900-NEXT:    v_perm_b32 v0, v0, v6, s4
1491; GFX900-NEXT:    v_perm_b32 v1, v1, v7, s4
1492; GFX900-NEXT:    v_perm_b32 v2, v2, v8, s4
1493; GFX900-NEXT:    v_perm_b32 v3, v3, v10, s4
1494; GFX900-NEXT:    s_setpc_b64 s[30:31]
1495;
1496; GFX950-LABEL: v_minimum_v8f16:
1497; GFX950:       ; %bb.0:
1498; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1499; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v4, v4
1500; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v5, v5
1501; GFX950-NEXT:    v_pk_minimum3_f16 v2, v2, v6, v6
1502; GFX950-NEXT:    v_pk_minimum3_f16 v3, v3, v7, v7
1503; GFX950-NEXT:    s_setpc_b64 s[30:31]
1504;
1505; GFX10-LABEL: v_minimum_v8f16:
1506; GFX10:       ; %bb.0:
1507; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1508; GFX10-NEXT:    v_pk_min_f16 v8, v3, v7
1509; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v7
1510; GFX10-NEXT:    v_pk_min_f16 v9, v2, v6
1511; GFX10-NEXT:    v_pk_min_f16 v12, v1, v5
1512; GFX10-NEXT:    v_pk_min_f16 v13, v0, v4
1513; GFX10-NEXT:    v_cndmask_b32_e32 v10, 0x7e00, v8, vcc_lo
1514; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v6
1515; GFX10-NEXT:    v_lshrrev_b32_e32 v11, 16, v9
1516; GFX10-NEXT:    v_lshrrev_b32_e32 v8, 16, v8
1517; GFX10-NEXT:    v_cndmask_b32_e32 v9, 0x7e00, v9, vcc_lo
1518; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
1519; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v11, vcc_lo
1520; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v5
1521; GFX10-NEXT:    v_lshrrev_b32_e32 v11, 16, v13
1522; GFX10-NEXT:    v_perm_b32 v2, v2, v9, 0x5040100
1523; GFX10-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v12, vcc_lo
1524; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v4
1525; GFX10-NEXT:    v_lshrrev_b32_e32 v12, 16, v12
1526; GFX10-NEXT:    v_cndmask_b32_e32 v13, 0x7e00, v13, vcc_lo
1527; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
1528; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v11, vcc_lo
1529; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
1530; GFX10-NEXT:    v_perm_b32 v0, v0, v13, 0x5040100
1531; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v12, vcc_lo
1532; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
1533; GFX10-NEXT:    v_perm_b32 v1, v1, v6, 0x5040100
1534; GFX10-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v8, vcc_lo
1535; GFX10-NEXT:    v_perm_b32 v3, v3, v10, 0x5040100
1536; GFX10-NEXT:    s_setpc_b64 s[30:31]
1537;
1538; GFX11-LABEL: v_minimum_v8f16:
1539; GFX11:       ; %bb.0:
1540; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1541; GFX11-NEXT:    v_pk_min_f16 v8, v3, v7
1542; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v7
1543; GFX11-NEXT:    v_pk_min_f16 v10, v2, v6
1544; GFX11-NEXT:    v_lshrrev_b32_e32 v11, 16, v6
1545; GFX11-NEXT:    v_lshrrev_b32_e32 v12, 16, v2
1546; GFX11-NEXT:    v_pk_min_f16 v14, v1, v5
1547; GFX11-NEXT:    v_cndmask_b32_e32 v9, 0x7e00, v8, vcc_lo
1548; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v6
1549; GFX11-NEXT:    v_lshrrev_b32_e32 v13, 16, v10
1550; GFX11-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
1551; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
1552; GFX11-NEXT:    v_lshrrev_b32_e32 v8, 16, v8
1553; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v10, vcc_lo
1554; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v12, v11
1555; GFX11-NEXT:    v_pk_min_f16 v11, v0, v4
1556; GFX11-NEXT:    v_lshrrev_b32_e32 v12, 16, v4
1557; GFX11-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v13, vcc_lo
1558; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v5
1559; GFX11-NEXT:    v_lshrrev_b32_e32 v13, 16, v0
1560; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
1561; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
1562; GFX11-NEXT:    v_lshrrev_b32_e32 v15, 16, v11
1563; GFX11-NEXT:    v_cndmask_b32_e32 v10, 0x7e00, v14, vcc_lo
1564; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v4
1565; GFX11-NEXT:    v_lshrrev_b32_e32 v14, 16, v14
1566; GFX11-NEXT:    v_perm_b32 v2, v6, v2, 0x5040100
1567; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v11, vcc_lo
1568; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v13, v12
1569; GFX11-NEXT:    v_cndmask_b32_e32 v4, 0x7e00, v15, vcc_lo
1570; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v5
1571; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
1572; GFX11-NEXT:    v_perm_b32 v0, v4, v0, 0x5040100
1573; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v14, vcc_lo
1574; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v7
1575; GFX11-NEXT:    v_perm_b32 v1, v1, v10, 0x5040100
1576; GFX11-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v8, vcc_lo
1577; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1578; GFX11-NEXT:    v_perm_b32 v3, v3, v9, 0x5040100
1579; GFX11-NEXT:    s_setpc_b64 s[30:31]
1580;
1581; GFX12-LABEL: v_minimum_v8f16:
1582; GFX12:       ; %bb.0:
1583; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1584; GFX12-NEXT:    s_wait_expcnt 0x0
1585; GFX12-NEXT:    s_wait_samplecnt 0x0
1586; GFX12-NEXT:    s_wait_bvhcnt 0x0
1587; GFX12-NEXT:    s_wait_kmcnt 0x0
1588; GFX12-NEXT:    v_pk_minimum_f16 v0, v0, v4
1589; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, v5
1590; GFX12-NEXT:    v_pk_minimum_f16 v2, v2, v6
1591; GFX12-NEXT:    v_pk_minimum_f16 v3, v3, v7
1592; GFX12-NEXT:    s_setpc_b64 s[30:31]
1593  %op = call <8 x half> @llvm.minimum.v8f16(<8 x half> %src0, <8 x half> %src1)
1594  ret <8 x half> %op
1595}
1596
1597define <16 x half> @v_minimum_v16f16(<16 x half> %src0, <16 x half> %src1) {
1598; GFX8-LABEL: v_minimum_v16f16:
1599; GFX8:       ; %bb.0:
1600; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1601; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 16, v14
1602; GFX8-NEXT:    v_lshrrev_b32_e32 v18, 16, v6
1603; GFX8-NEXT:    v_min_f16_e32 v16, v18, v17
1604; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v18, v17
1605; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 16, v13
1606; GFX8-NEXT:    v_lshrrev_b32_e32 v18, 16, v5
1607; GFX8-NEXT:    v_min_f16_e32 v20, v18, v17
1608; GFX8-NEXT:    v_cmp_o_f16_e64 s[4:5], v18, v17
1609; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 16, v12
1610; GFX8-NEXT:    v_lshrrev_b32_e32 v18, 16, v4
1611; GFX8-NEXT:    v_min_f16_e32 v21, v18, v17
1612; GFX8-NEXT:    v_cmp_o_f16_e64 s[6:7], v18, v17
1613; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 16, v11
1614; GFX8-NEXT:    v_lshrrev_b32_e32 v18, 16, v3
1615; GFX8-NEXT:    v_min_f16_e32 v22, v18, v17
1616; GFX8-NEXT:    v_cmp_o_f16_e64 s[8:9], v18, v17
1617; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 16, v10
1618; GFX8-NEXT:    v_lshrrev_b32_e32 v18, 16, v2
1619; GFX8-NEXT:    v_min_f16_e32 v23, v18, v17
1620; GFX8-NEXT:    v_cmp_o_f16_e64 s[10:11], v18, v17
1621; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 16, v9
1622; GFX8-NEXT:    v_lshrrev_b32_e32 v18, 16, v1
1623; GFX8-NEXT:    v_min_f16_e32 v24, v18, v17
1624; GFX8-NEXT:    v_cmp_o_f16_e64 s[12:13], v18, v17
1625; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 16, v8
1626; GFX8-NEXT:    v_lshrrev_b32_e32 v18, 16, v0
1627; GFX8-NEXT:    v_min_f16_e32 v25, v18, v17
1628; GFX8-NEXT:    v_cmp_o_f16_e64 s[14:15], v18, v17
1629; GFX8-NEXT:    v_min_f16_e32 v17, v6, v14
1630; GFX8-NEXT:    v_cmp_o_f16_e64 s[16:17], v6, v14
1631; GFX8-NEXT:    v_min_f16_e32 v6, v5, v13
1632; GFX8-NEXT:    v_cmp_o_f16_e64 s[18:19], v5, v13
1633; GFX8-NEXT:    v_min_f16_e32 v5, v4, v12
1634; GFX8-NEXT:    v_cmp_o_f16_e64 s[20:21], v4, v12
1635; GFX8-NEXT:    v_min_f16_e32 v4, v3, v11
1636; GFX8-NEXT:    v_cmp_o_f16_e64 s[22:23], v3, v11
1637; GFX8-NEXT:    v_min_f16_e32 v11, v7, v15
1638; GFX8-NEXT:    v_cmp_o_f16_e64 s[24:25], v7, v15
1639; GFX8-NEXT:    v_lshrrev_b32_e32 v12, 16, v15
1640; GFX8-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
1641; GFX8-NEXT:    v_mov_b32_e32 v19, 0x7e00
1642; GFX8-NEXT:    v_min_f16_e32 v13, v7, v12
1643; GFX8-NEXT:    v_cmp_o_f16_e64 s[26:27], v7, v12
1644; GFX8-NEXT:    v_min_f16_e32 v3, v2, v10
1645; GFX8-NEXT:    v_cndmask_b32_e64 v12, v19, v13, s[26:27]
1646; GFX8-NEXT:    v_cndmask_b32_e32 v13, v19, v16, vcc
1647; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v2, v10
1648; GFX8-NEXT:    v_min_f16_e32 v14, v1, v9
1649; GFX8-NEXT:    v_cndmask_b32_e32 v2, v19, v3, vcc
1650; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v1, v9
1651; GFX8-NEXT:    v_min_f16_e32 v7, v0, v8
1652; GFX8-NEXT:    v_cndmask_b32_e64 v18, v19, v22, s[8:9]
1653; GFX8-NEXT:    v_cndmask_b32_e64 v22, v19, v25, s[14:15]
1654; GFX8-NEXT:    v_cndmask_b32_e32 v1, v19, v14, vcc
1655; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, v0, v8
1656; GFX8-NEXT:    v_cndmask_b32_e64 v16, v19, v21, s[6:7]
1657; GFX8-NEXT:    v_cndmask_b32_e64 v21, v19, v24, s[12:13]
1658; GFX8-NEXT:    v_cndmask_b32_e32 v0, v19, v7, vcc
1659; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 16, v22
1660; GFX8-NEXT:    v_cndmask_b32_e64 v15, v19, v20, s[4:5]
1661; GFX8-NEXT:    v_cndmask_b32_e64 v20, v19, v23, s[10:11]
1662; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1663; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 16, v21
1664; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1665; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 16, v20
1666; GFX8-NEXT:    v_cndmask_b32_e64 v4, v19, v4, s[22:23]
1667; GFX8-NEXT:    v_or_b32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1668; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 16, v18
1669; GFX8-NEXT:    v_cndmask_b32_e64 v5, v19, v5, s[20:21]
1670; GFX8-NEXT:    v_or_b32_sdwa v3, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1671; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 16, v16
1672; GFX8-NEXT:    v_cndmask_b32_e64 v6, v19, v6, s[18:19]
1673; GFX8-NEXT:    v_or_b32_sdwa v4, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1674; GFX8-NEXT:    v_lshlrev_b32_e32 v5, 16, v15
1675; GFX8-NEXT:    v_cndmask_b32_e64 v11, v19, v11, s[24:25]
1676; GFX8-NEXT:    v_cndmask_b32_e64 v17, v19, v17, s[16:17]
1677; GFX8-NEXT:    v_or_b32_sdwa v5, v6, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1678; GFX8-NEXT:    v_lshlrev_b32_e32 v6, 16, v13
1679; GFX8-NEXT:    v_lshlrev_b32_e32 v7, 16, v12
1680; GFX8-NEXT:    v_or_b32_sdwa v6, v17, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1681; GFX8-NEXT:    v_or_b32_sdwa v7, v11, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1682; GFX8-NEXT:    s_setpc_b64 s[30:31]
1683;
1684; GFX900-LABEL: v_minimum_v16f16:
1685; GFX900:       ; %bb.0:
1686; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1687; GFX900-NEXT:    v_pk_min_f16 v16, v7, v15
1688; GFX900-NEXT:    v_mov_b32_e32 v17, 0x7e00
1689; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v7, v15
1690; GFX900-NEXT:    v_cndmask_b32_e32 v18, v17, v16, vcc
1691; GFX900-NEXT:    v_lshrrev_b32_e32 v16, 16, v16
1692; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1
1693; GFX900-NEXT:    v_cndmask_b32_e32 v7, v17, v16, vcc
1694; GFX900-NEXT:    v_pk_min_f16 v15, v6, v14
1695; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v6, v14
1696; GFX900-NEXT:    v_cndmask_b32_e32 v16, v17, v15, vcc
1697; GFX900-NEXT:    v_lshrrev_b32_e32 v15, 16, v15
1698; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1
1699; GFX900-NEXT:    v_cndmask_b32_e32 v6, v17, v15, vcc
1700; GFX900-NEXT:    v_pk_min_f16 v14, v5, v13
1701; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v5, v13
1702; GFX900-NEXT:    v_cndmask_b32_e32 v15, v17, v14, vcc
1703; GFX900-NEXT:    v_lshrrev_b32_e32 v14, 16, v14
1704; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1
1705; GFX900-NEXT:    v_cndmask_b32_e32 v5, v17, v14, vcc
1706; GFX900-NEXT:    v_pk_min_f16 v13, v4, v12
1707; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v4, v12
1708; GFX900-NEXT:    v_cndmask_b32_e32 v14, v17, v13, vcc
1709; GFX900-NEXT:    v_lshrrev_b32_e32 v13, 16, v13
1710; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1
1711; GFX900-NEXT:    v_cndmask_b32_e32 v4, v17, v13, vcc
1712; GFX900-NEXT:    v_pk_min_f16 v12, v3, v11
1713; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v3, v11
1714; GFX900-NEXT:    v_cndmask_b32_e32 v13, v17, v12, vcc
1715; GFX900-NEXT:    v_lshrrev_b32_e32 v12, 16, v12
1716; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1
1717; GFX900-NEXT:    v_cndmask_b32_e32 v3, v17, v12, vcc
1718; GFX900-NEXT:    v_pk_min_f16 v11, v2, v10
1719; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v2, v10
1720; GFX900-NEXT:    v_cndmask_b32_e32 v12, v17, v11, vcc
1721; GFX900-NEXT:    v_lshrrev_b32_e32 v11, 16, v11
1722; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1
1723; GFX900-NEXT:    v_cndmask_b32_e32 v2, v17, v11, vcc
1724; GFX900-NEXT:    v_pk_min_f16 v10, v1, v9
1725; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v1, v9
1726; GFX900-NEXT:    v_cndmask_b32_e32 v11, v17, v10, vcc
1727; GFX900-NEXT:    v_lshrrev_b32_e32 v10, 16, v10
1728; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1
1729; GFX900-NEXT:    v_cndmask_b32_e32 v1, v17, v10, vcc
1730; GFX900-NEXT:    v_pk_min_f16 v9, v0, v8
1731; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, v0, v8
1732; GFX900-NEXT:    v_cndmask_b32_e32 v10, v17, v9, vcc
1733; GFX900-NEXT:    v_lshrrev_b32_e32 v9, 16, v9
1734; GFX900-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1
1735; GFX900-NEXT:    v_cndmask_b32_e32 v0, v17, v9, vcc
1736; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1737; GFX900-NEXT:    v_perm_b32 v0, v0, v10, s4
1738; GFX900-NEXT:    v_perm_b32 v1, v1, v11, s4
1739; GFX900-NEXT:    v_perm_b32 v2, v2, v12, s4
1740; GFX900-NEXT:    v_perm_b32 v3, v3, v13, s4
1741; GFX900-NEXT:    v_perm_b32 v4, v4, v14, s4
1742; GFX900-NEXT:    v_perm_b32 v5, v5, v15, s4
1743; GFX900-NEXT:    v_perm_b32 v6, v6, v16, s4
1744; GFX900-NEXT:    v_perm_b32 v7, v7, v18, s4
1745; GFX900-NEXT:    s_setpc_b64 s[30:31]
1746;
1747; GFX950-LABEL: v_minimum_v16f16:
1748; GFX950:       ; %bb.0:
1749; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1750; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v8, v8
1751; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v9, v9
1752; GFX950-NEXT:    v_pk_minimum3_f16 v2, v2, v10, v10
1753; GFX950-NEXT:    v_pk_minimum3_f16 v3, v3, v11, v11
1754; GFX950-NEXT:    v_pk_minimum3_f16 v4, v4, v12, v12
1755; GFX950-NEXT:    v_pk_minimum3_f16 v5, v5, v13, v13
1756; GFX950-NEXT:    v_pk_minimum3_f16 v6, v6, v14, v14
1757; GFX950-NEXT:    v_pk_minimum3_f16 v7, v7, v15, v15
1758; GFX950-NEXT:    s_setpc_b64 s[30:31]
1759;
1760; GFX10-LABEL: v_minimum_v16f16:
1761; GFX10:       ; %bb.0:
1762; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1763; GFX10-NEXT:    v_pk_min_f16 v16, v7, v15
1764; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v7, v15
1765; GFX10-NEXT:    v_pk_min_f16 v18, v6, v14
1766; GFX10-NEXT:    v_pk_min_f16 v19, v3, v11
1767; GFX10-NEXT:    v_pk_min_f16 v20, v2, v10
1768; GFX10-NEXT:    v_lshrrev_b32_e32 v17, 16, v16
1769; GFX10-NEXT:    v_cndmask_b32_e32 v16, 0x7e00, v16, vcc_lo
1770; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1
1771; GFX10-NEXT:    v_lshrrev_b32_e32 v15, 16, v18
1772; GFX10-NEXT:    v_pk_min_f16 v21, v0, v8
1773; GFX10-NEXT:    v_cndmask_b32_e32 v7, 0x7e00, v17, vcc_lo
1774; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v14
1775; GFX10-NEXT:    v_pk_min_f16 v17, v5, v13
1776; GFX10-NEXT:    v_lshrrev_b32_e32 v23, 16, v21
1777; GFX10-NEXT:    v_perm_b32 v7, v7, v16, 0x5040100
1778; GFX10-NEXT:    v_cndmask_b32_e32 v18, 0x7e00, v18, vcc_lo
1779; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1
1780; GFX10-NEXT:    v_lshrrev_b32_e32 v14, 16, v17
1781; GFX10-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v15, vcc_lo
1782; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v5, v13
1783; GFX10-NEXT:    v_perm_b32 v6, v6, v18, 0x5040100
1784; GFX10-NEXT:    v_cndmask_b32_e32 v15, 0x7e00, v17, vcc_lo
1785; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1
1786; GFX10-NEXT:    v_pk_min_f16 v17, v4, v12
1787; GFX10-NEXT:    v_cndmask_b32_e32 v5, 0x7e00, v14, vcc_lo
1788; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v12
1789; GFX10-NEXT:    v_lshrrev_b32_e32 v14, 16, v17
1790; GFX10-NEXT:    v_perm_b32 v5, v5, v15, 0x5040100
1791; GFX10-NEXT:    v_cndmask_b32_e32 v13, 0x7e00, v17, vcc_lo
1792; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v11
1793; GFX10-NEXT:    v_lshrrev_b32_e32 v17, 16, v19
1794; GFX10-NEXT:    v_cndmask_b32_e32 v19, 0x7e00, v19, vcc_lo
1795; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1
1796; GFX10-NEXT:    v_pk_min_f16 v11, v1, v9
1797; GFX10-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v17, vcc_lo
1798; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v10
1799; GFX10-NEXT:    v_lshrrev_b32_e32 v22, 16, v11
1800; GFX10-NEXT:    v_perm_b32 v3, v3, v19, 0x5040100
1801; GFX10-NEXT:    v_cndmask_b32_e32 v17, 0x7e00, v20, vcc_lo
1802; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v9
1803; GFX10-NEXT:    v_lshrrev_b32_e32 v20, 16, v20
1804; GFX10-NEXT:    v_cndmask_b32_e32 v11, 0x7e00, v11, vcc_lo
1805; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1
1806; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v22, vcc_lo
1807; GFX10-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v8
1808; GFX10-NEXT:    v_perm_b32 v1, v1, v11, 0x5040100
1809; GFX10-NEXT:    v_cndmask_b32_e32 v9, 0x7e00, v21, vcc_lo
1810; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1
1811; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v23, vcc_lo
1812; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1
1813; GFX10-NEXT:    v_perm_b32 v0, v0, v9, 0x5040100
1814; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v20, vcc_lo
1815; GFX10-NEXT:    v_cmp_o_f16_sdwa vcc_lo, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1
1816; GFX10-NEXT:    v_perm_b32 v2, v2, v17, 0x5040100
1817; GFX10-NEXT:    v_cndmask_b32_e32 v4, 0x7e00, v14, vcc_lo
1818; GFX10-NEXT:    v_perm_b32 v4, v4, v13, 0x5040100
1819; GFX10-NEXT:    s_setpc_b64 s[30:31]
1820;
1821; GFX11-LABEL: v_minimum_v16f16:
1822; GFX11:       ; %bb.0:
1823; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1824; GFX11-NEXT:    v_pk_min_f16 v16, v7, v15
1825; GFX11-NEXT:    v_lshrrev_b32_e32 v17, 16, v15
1826; GFX11-NEXT:    v_lshrrev_b32_e32 v18, 16, v7
1827; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v7, v15
1828; GFX11-NEXT:    v_pk_min_f16 v15, v6, v14
1829; GFX11-NEXT:    v_lshrrev_b32_e32 v19, 16, v16
1830; GFX11-NEXT:    v_pk_min_f16 v20, v4, v12
1831; GFX11-NEXT:    v_pk_min_f16 v22, v2, v10
1832; GFX11-NEXT:    v_cndmask_b32_e32 v7, 0x7e00, v16, vcc_lo
1833; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v18, v17
1834; GFX11-NEXT:    v_lshrrev_b32_e32 v17, 16, v14
1835; GFX11-NEXT:    v_lshrrev_b32_e32 v18, 16, v6
1836; GFX11-NEXT:    v_lshrrev_b32_e32 v23, 16, v8
1837; GFX11-NEXT:    v_lshrrev_b32_e32 v24, 16, v0
1838; GFX11-NEXT:    v_cndmask_b32_e32 v16, 0x7e00, v19, vcc_lo
1839; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v14
1840; GFX11-NEXT:    v_lshrrev_b32_e32 v19, 16, v15
1841; GFX11-NEXT:    v_pk_min_f16 v14, v5, v13
1842; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
1843; GFX11-NEXT:    v_perm_b32 v7, v16, v7, 0x5040100
1844; GFX11-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v15, vcc_lo
1845; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v18, v17
1846; GFX11-NEXT:    v_lshrrev_b32_e32 v17, 16, v13
1847; GFX11-NEXT:    v_lshrrev_b32_e32 v18, 16, v5
1848; GFX11-NEXT:    v_cndmask_b32_e32 v15, 0x7e00, v19, vcc_lo
1849; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v5, v13
1850; GFX11-NEXT:    v_lshrrev_b32_e32 v19, 16, v14
1851; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
1852; GFX11-NEXT:    v_perm_b32 v6, v15, v6, 0x5040100
1853; GFX11-NEXT:    v_cndmask_b32_e32 v5, 0x7e00, v14, vcc_lo
1854; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v18, v17
1855; GFX11-NEXT:    v_pk_min_f16 v17, v3, v11
1856; GFX11-NEXT:    v_lshrrev_b32_e32 v18, 16, v20
1857; GFX11-NEXT:    v_cndmask_b32_e32 v13, 0x7e00, v19, vcc_lo
1858; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v12
1859; GFX11-NEXT:    v_lshrrev_b32_e32 v19, 16, v11
1860; GFX11-NEXT:    v_lshrrev_b32_e32 v21, 16, v17
1861; GFX11-NEXT:    v_lshrrev_b32_e32 v12, 16, v12
1862; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
1863; GFX11-NEXT:    v_cndmask_b32_e32 v14, 0x7e00, v20, vcc_lo
1864; GFX11-NEXT:    v_lshrrev_b32_e32 v20, 16, v3
1865; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v11
1866; GFX11-NEXT:    v_perm_b32 v5, v13, v5, 0x5040100
1867; GFX11-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v17, vcc_lo
1868; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
1869; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v20, v19
1870; GFX11-NEXT:    v_pk_min_f16 v19, v1, v9
1871; GFX11-NEXT:    v_lshrrev_b32_e32 v20, 16, v22
1872; GFX11-NEXT:    v_cndmask_b32_e32 v11, 0x7e00, v21, vcc_lo
1873; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v10
1874; GFX11-NEXT:    v_lshrrev_b32_e32 v10, 16, v10
1875; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
1876; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
1877; GFX11-NEXT:    v_perm_b32 v3, v11, v3, 0x5040100
1878; GFX11-NEXT:    v_cndmask_b32_e32 v17, 0x7e00, v22, vcc_lo
1879; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v9
1880; GFX11-NEXT:    v_lshrrev_b32_e32 v9, 16, v9
1881; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
1882; GFX11-NEXT:    v_pk_min_f16 v22, v0, v8
1883; GFX11-NEXT:    v_cndmask_b32_e32 v21, 0x7e00, v19, vcc_lo
1884; GFX11-NEXT:    v_lshrrev_b32_e32 v19, 16, v19
1885; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1886; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v9
1887; GFX11-NEXT:    v_lshrrev_b32_e32 v25, 16, v22
1888; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1889; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v19, vcc_lo
1890; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v8
1891; GFX11-NEXT:    v_perm_b32 v1, v1, v21, 0x5040100
1892; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v22, vcc_lo
1893; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v24, v23
1894; GFX11-NEXT:    v_cndmask_b32_e32 v8, 0x7e00, v25, vcc_lo
1895; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v10
1896; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
1897; GFX11-NEXT:    v_perm_b32 v0, v8, v0, 0x5040100
1898; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v20, vcc_lo
1899; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v12
1900; GFX11-NEXT:    v_perm_b32 v2, v2, v17, 0x5040100
1901; GFX11-NEXT:    v_cndmask_b32_e32 v4, 0x7e00, v18, vcc_lo
1902; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1903; GFX11-NEXT:    v_perm_b32 v4, v4, v14, 0x5040100
1904; GFX11-NEXT:    s_setpc_b64 s[30:31]
1905;
1906; GFX12-LABEL: v_minimum_v16f16:
1907; GFX12:       ; %bb.0:
1908; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1909; GFX12-NEXT:    s_wait_expcnt 0x0
1910; GFX12-NEXT:    s_wait_samplecnt 0x0
1911; GFX12-NEXT:    s_wait_bvhcnt 0x0
1912; GFX12-NEXT:    s_wait_kmcnt 0x0
1913; GFX12-NEXT:    v_pk_minimum_f16 v0, v0, v8
1914; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, v9
1915; GFX12-NEXT:    v_pk_minimum_f16 v2, v2, v10
1916; GFX12-NEXT:    v_pk_minimum_f16 v3, v3, v11
1917; GFX12-NEXT:    v_pk_minimum_f16 v4, v4, v12
1918; GFX12-NEXT:    v_pk_minimum_f16 v5, v5, v13
1919; GFX12-NEXT:    v_pk_minimum_f16 v6, v6, v14
1920; GFX12-NEXT:    v_pk_minimum_f16 v7, v7, v15
1921; GFX12-NEXT:    s_setpc_b64 s[30:31]
1922  %op = call <16 x half> @llvm.minimum.v16f16(<16 x half> %src0, <16 x half> %src1)
1923  ret <16 x half> %op
1924}
1925;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
1926; GCN: {{.*}}
1927