xref: /llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll (revision 222ff186087f0f1d9976ac2512dca52d90e13472)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -mtriple=amdgcn -global-isel=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI,SI-SDAG %s
3; RUN: llc -mtriple=amdgcn -global-isel=1 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI,SI-GISEL %s
4; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-SDAG %s
5; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-GISEL %s
6; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-SDAG %s
7; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-GISEL %s
8; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
9; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
10; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
11; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
12
13define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
14; SI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f32:
15; SI-SDAG:       ; %bb.0:
16; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
17; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
18; SI-SDAG-NEXT:    s_mov_b32 s6, 0
19; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
20; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
21; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
22; SI-SDAG-NEXT:    s_mov_b64 s[4:5], s[2:3]
23; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
24; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[6:7]
25; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
26; SI-SDAG-NEXT:    v_add_f32_e32 v2, 1.0, v2
27; SI-SDAG-NEXT:    v_med3_f32 v2, v2, 2.0, 4.0
28; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
29; SI-SDAG-NEXT:    s_endpgm
30;
31; SI-GISEL-LABEL: v_test_nnan_input_fmed3_r_i_i_f32:
32; SI-GISEL:       ; %bb.0:
33; SI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
34; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
35; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
36; SI-GISEL-NEXT:    s_mov_b32 s6, 0
37; SI-GISEL-NEXT:    s_mov_b32 s7, 0xf000
38; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
39; SI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
40; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
41; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
42; SI-GISEL-NEXT:    v_add_f32_e32 v2, 1.0, v2
43; SI-GISEL-NEXT:    v_med3_f32 v2, v2, 2.0, 4.0
44; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[6:7]
45; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
46; SI-GISEL-NEXT:    s_endpgm
47;
48; VI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f32:
49; VI-SDAG:       ; %bb.0:
50; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
51; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
52; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
53; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
54; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
55; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
56; VI-SDAG-NEXT:    flat_load_dword v3, v[0:1]
57; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
58; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
59; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
60; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
61; VI-SDAG-NEXT:    v_add_f32_e32 v2, 1.0, v3
62; VI-SDAG-NEXT:    v_med3_f32 v2, v2, 2.0, 4.0
63; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
64; VI-SDAG-NEXT:    s_endpgm
65;
66; VI-GISEL-LABEL: v_test_nnan_input_fmed3_r_i_i_f32:
67; VI-GISEL:       ; %bb.0:
68; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
69; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
70; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
71; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
72; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
73; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
74; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
75; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1]
76; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
77; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
78; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
79; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
80; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
81; VI-GISEL-NEXT:    v_add_f32_e32 v2, 1.0, v3
82; VI-GISEL-NEXT:    v_med3_f32 v2, v2, 2.0, 4.0
83; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
84; VI-GISEL-NEXT:    s_endpgm
85;
86; GFX9-LABEL: v_test_nnan_input_fmed3_r_i_i_f32:
87; GFX9:       ; %bb.0:
88; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
89; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
90; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
91; GFX9-NEXT:    global_load_dword v1, v0, s[2:3]
92; GFX9-NEXT:    s_waitcnt vmcnt(0)
93; GFX9-NEXT:    v_add_f32_e32 v1, 1.0, v1
94; GFX9-NEXT:    v_med3_f32 v1, v1, 2.0, 4.0
95; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
96; GFX9-NEXT:    s_endpgm
97;
98; GFX11-LABEL: v_test_nnan_input_fmed3_r_i_i_f32:
99; GFX11:       ; %bb.0:
100; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
101; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
102; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
103; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
104; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
105; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3]
106; GFX11-NEXT:    s_waitcnt vmcnt(0)
107; GFX11-NEXT:    v_add_f32_e32 v1, 1.0, v1
108; GFX11-NEXT:    v_med3_f32 v1, v1, 2.0, 4.0
109; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
110; GFX11-NEXT:    s_endpgm
111  %tid = call i32 @llvm.amdgcn.workitem.id.x()
112  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
113  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
114  %a = load float, ptr addrspace(1) %gep0
115  %a.add = fadd nnan float %a, 1.0
116  %max = call float @llvm.maxnum.f32(float %a.add, float 2.0)
117  %med = call float @llvm.minnum.f32(float %max, float 4.0)
118
119  store float %med, ptr addrspace(1) %outgep
120  ret void
121}
122
123define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
124; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_f32:
125; SI-SDAG:       ; %bb.0:
126; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
127; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
128; SI-SDAG-NEXT:    s_mov_b32 s6, 0
129; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
130; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
131; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
132; SI-SDAG-NEXT:    s_mov_b64 s[4:5], s[2:3]
133; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
134; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[6:7]
135; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
136; SI-SDAG-NEXT:    v_add_f32_e32 v2, 1.0, v2
137; SI-SDAG-NEXT:    v_med3_f32 v2, v2, 2.0, 4.0
138; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
139; SI-SDAG-NEXT:    s_endpgm
140;
141; SI-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_f32:
142; SI-GISEL:       ; %bb.0:
143; SI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
144; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
145; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
146; SI-GISEL-NEXT:    s_mov_b32 s6, 0
147; SI-GISEL-NEXT:    s_mov_b32 s7, 0xf000
148; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
149; SI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
150; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
151; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
152; SI-GISEL-NEXT:    v_add_f32_e32 v2, 1.0, v2
153; SI-GISEL-NEXT:    v_med3_f32 v2, v2, 2.0, 4.0
154; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[6:7]
155; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
156; SI-GISEL-NEXT:    s_endpgm
157;
158; VI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_f32:
159; VI-SDAG:       ; %bb.0:
160; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
161; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
162; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
163; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
164; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
165; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
166; VI-SDAG-NEXT:    flat_load_dword v3, v[0:1]
167; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
168; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
169; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
170; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
171; VI-SDAG-NEXT:    v_add_f32_e32 v2, 1.0, v3
172; VI-SDAG-NEXT:    v_med3_f32 v2, v2, 2.0, 4.0
173; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
174; VI-SDAG-NEXT:    s_endpgm
175;
176; VI-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_f32:
177; VI-GISEL:       ; %bb.0:
178; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
179; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
180; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
181; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
182; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
183; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
184; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
185; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1]
186; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
187; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
188; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
189; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
190; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
191; VI-GISEL-NEXT:    v_add_f32_e32 v2, 1.0, v3
192; VI-GISEL-NEXT:    v_med3_f32 v2, v2, 2.0, 4.0
193; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
194; VI-GISEL-NEXT:    s_endpgm
195;
196; GFX9-LABEL: v_test_fmed3_nnan_r_i_i_f32:
197; GFX9:       ; %bb.0:
198; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
199; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
200; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
201; GFX9-NEXT:    global_load_dword v1, v0, s[2:3]
202; GFX9-NEXT:    s_waitcnt vmcnt(0)
203; GFX9-NEXT:    v_add_f32_e32 v1, 1.0, v1
204; GFX9-NEXT:    v_med3_f32 v1, v1, 2.0, 4.0
205; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
206; GFX9-NEXT:    s_endpgm
207;
208; GFX11-LABEL: v_test_fmed3_nnan_r_i_i_f32:
209; GFX11:       ; %bb.0:
210; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
211; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
212; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
213; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
214; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
215; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3]
216; GFX11-NEXT:    s_waitcnt vmcnt(0)
217; GFX11-NEXT:    v_add_f32_e32 v1, 1.0, v1
218; GFX11-NEXT:    v_med3_f32 v1, v1, 2.0, 4.0
219; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
220; GFX11-NEXT:    s_endpgm
221  %tid = call i32 @llvm.amdgcn.workitem.id.x()
222  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
223  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
224  %a = load float, ptr addrspace(1) %gep0
225  %a.add = fadd nnan float %a, 1.0
226
227  %max = call float @llvm.maxnum.f32(float %a.add, float 2.0)
228  %med = call float @llvm.minnum.f32(float %max, float 4.0)
229
230  store float %med, ptr addrspace(1) %outgep
231  ret void
232}
233
234define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute0_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
235; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_commute0_f32:
236; SI-SDAG:       ; %bb.0:
237; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
238; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
239; SI-SDAG-NEXT:    s_mov_b32 s6, 0
240; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
241; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
242; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
243; SI-SDAG-NEXT:    s_mov_b64 s[4:5], s[2:3]
244; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
245; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[6:7]
246; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
247; SI-SDAG-NEXT:    v_add_f32_e32 v2, 1.0, v2
248; SI-SDAG-NEXT:    v_med3_f32 v2, v2, 2.0, 4.0
249; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
250; SI-SDAG-NEXT:    s_endpgm
251;
252; SI-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_commute0_f32:
253; SI-GISEL:       ; %bb.0:
254; SI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
255; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
256; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
257; SI-GISEL-NEXT:    s_mov_b32 s6, 0
258; SI-GISEL-NEXT:    s_mov_b32 s7, 0xf000
259; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
260; SI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
261; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
262; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
263; SI-GISEL-NEXT:    v_add_f32_e32 v2, 1.0, v2
264; SI-GISEL-NEXT:    v_med3_f32 v2, v2, 2.0, 4.0
265; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[6:7]
266; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
267; SI-GISEL-NEXT:    s_endpgm
268;
269; VI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_commute0_f32:
270; VI-SDAG:       ; %bb.0:
271; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
272; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
273; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
274; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
275; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
276; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
277; VI-SDAG-NEXT:    flat_load_dword v3, v[0:1]
278; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
279; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
280; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
281; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
282; VI-SDAG-NEXT:    v_add_f32_e32 v2, 1.0, v3
283; VI-SDAG-NEXT:    v_med3_f32 v2, v2, 2.0, 4.0
284; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
285; VI-SDAG-NEXT:    s_endpgm
286;
287; VI-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_commute0_f32:
288; VI-GISEL:       ; %bb.0:
289; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
290; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
291; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
292; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
293; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
294; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
295; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
296; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1]
297; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
298; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
299; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
300; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
301; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
302; VI-GISEL-NEXT:    v_add_f32_e32 v2, 1.0, v3
303; VI-GISEL-NEXT:    v_med3_f32 v2, v2, 2.0, 4.0
304; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
305; VI-GISEL-NEXT:    s_endpgm
306;
307; GFX9-LABEL: v_test_fmed3_nnan_r_i_i_commute0_f32:
308; GFX9:       ; %bb.0:
309; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
310; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
311; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
312; GFX9-NEXT:    global_load_dword v1, v0, s[2:3]
313; GFX9-NEXT:    s_waitcnt vmcnt(0)
314; GFX9-NEXT:    v_add_f32_e32 v1, 1.0, v1
315; GFX9-NEXT:    v_med3_f32 v1, v1, 2.0, 4.0
316; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
317; GFX9-NEXT:    s_endpgm
318;
319; GFX11-LABEL: v_test_fmed3_nnan_r_i_i_commute0_f32:
320; GFX11:       ; %bb.0:
321; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
322; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
323; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
324; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
325; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
326; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3]
327; GFX11-NEXT:    s_waitcnt vmcnt(0)
328; GFX11-NEXT:    v_add_f32_e32 v1, 1.0, v1
329; GFX11-NEXT:    v_med3_f32 v1, v1, 2.0, 4.0
330; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
331; GFX11-NEXT:    s_endpgm
332  %tid = call i32 @llvm.amdgcn.workitem.id.x()
333  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
334  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
335  %a = load float, ptr addrspace(1) %gep0
336  %a.add = fadd nnan float %a, 1.0
337
338  %max = call float @llvm.maxnum.f32(float 2.0, float %a.add)
339  %med = call float @llvm.minnum.f32(float 4.0, float %max)
340
341  store float %med, ptr addrspace(1) %outgep
342  ret void
343}
344
345define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute1_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
346; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_commute1_f32:
347; SI-SDAG:       ; %bb.0:
348; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
349; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
350; SI-SDAG-NEXT:    s_mov_b32 s6, 0
351; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
352; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
353; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
354; SI-SDAG-NEXT:    s_mov_b64 s[4:5], s[2:3]
355; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
356; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[6:7]
357; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
358; SI-SDAG-NEXT:    v_add_f32_e32 v2, 1.0, v2
359; SI-SDAG-NEXT:    v_med3_f32 v2, v2, 2.0, 4.0
360; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
361; SI-SDAG-NEXT:    s_endpgm
362;
363; SI-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_commute1_f32:
364; SI-GISEL:       ; %bb.0:
365; SI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
366; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
367; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
368; SI-GISEL-NEXT:    s_mov_b32 s6, 0
369; SI-GISEL-NEXT:    s_mov_b32 s7, 0xf000
370; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
371; SI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
372; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
373; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
374; SI-GISEL-NEXT:    v_add_f32_e32 v2, 1.0, v2
375; SI-GISEL-NEXT:    v_med3_f32 v2, v2, 2.0, 4.0
376; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[6:7]
377; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
378; SI-GISEL-NEXT:    s_endpgm
379;
380; VI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_commute1_f32:
381; VI-SDAG:       ; %bb.0:
382; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
383; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
384; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
385; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
386; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
387; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
388; VI-SDAG-NEXT:    flat_load_dword v3, v[0:1]
389; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
390; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
391; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
392; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
393; VI-SDAG-NEXT:    v_add_f32_e32 v2, 1.0, v3
394; VI-SDAG-NEXT:    v_med3_f32 v2, v2, 2.0, 4.0
395; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
396; VI-SDAG-NEXT:    s_endpgm
397;
398; VI-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_commute1_f32:
399; VI-GISEL:       ; %bb.0:
400; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
401; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
402; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
403; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
404; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
405; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
406; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
407; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1]
408; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
409; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
410; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
411; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
412; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
413; VI-GISEL-NEXT:    v_add_f32_e32 v2, 1.0, v3
414; VI-GISEL-NEXT:    v_med3_f32 v2, v2, 2.0, 4.0
415; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
416; VI-GISEL-NEXT:    s_endpgm
417;
418; GFX9-LABEL: v_test_fmed3_nnan_r_i_i_commute1_f32:
419; GFX9:       ; %bb.0:
420; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
421; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
422; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
423; GFX9-NEXT:    global_load_dword v1, v0, s[2:3]
424; GFX9-NEXT:    s_waitcnt vmcnt(0)
425; GFX9-NEXT:    v_add_f32_e32 v1, 1.0, v1
426; GFX9-NEXT:    v_med3_f32 v1, v1, 2.0, 4.0
427; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
428; GFX9-NEXT:    s_endpgm
429;
430; GFX11-LABEL: v_test_fmed3_nnan_r_i_i_commute1_f32:
431; GFX11:       ; %bb.0:
432; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
433; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
434; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
435; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
436; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
437; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3]
438; GFX11-NEXT:    s_waitcnt vmcnt(0)
439; GFX11-NEXT:    v_add_f32_e32 v1, 1.0, v1
440; GFX11-NEXT:    v_med3_f32 v1, v1, 2.0, 4.0
441; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
442; GFX11-NEXT:    s_endpgm
443  %tid = call i32 @llvm.amdgcn.workitem.id.x()
444  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
445  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
446  %a = load float, ptr addrspace(1) %gep0
447  %a.add = fadd nnan float %a, 1.0
448
449  %max = call float @llvm.maxnum.f32(float %a.add, float 2.0)
450  %med = call float @llvm.minnum.f32(float 4.0, float %max)
451
452  store float %med, ptr addrspace(1) %outgep
453  ret void
454}
455
456define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_constant_order_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
457; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_constant_order_f32:
458; SI-SDAG:       ; %bb.0:
459; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
460; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
461; SI-SDAG-NEXT:    s_mov_b32 s6, 0
462; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
463; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
464; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
465; SI-SDAG-NEXT:    s_mov_b64 s[4:5], s[2:3]
466; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
467; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[6:7]
468; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
469; SI-SDAG-NEXT:    v_add_f32_e32 v2, 1.0, v2
470; SI-SDAG-NEXT:    v_max_f32_e32 v2, 4.0, v2
471; SI-SDAG-NEXT:    v_min_f32_e32 v2, 2.0, v2
472; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
473; SI-SDAG-NEXT:    s_endpgm
474;
475; SI-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_constant_order_f32:
476; SI-GISEL:       ; %bb.0:
477; SI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
478; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
479; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
480; SI-GISEL-NEXT:    s_mov_b32 s6, 0
481; SI-GISEL-NEXT:    s_mov_b32 s7, 0xf000
482; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
483; SI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
484; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
485; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
486; SI-GISEL-NEXT:    v_add_f32_e32 v2, 1.0, v2
487; SI-GISEL-NEXT:    v_max_f32_e32 v2, 4.0, v2
488; SI-GISEL-NEXT:    v_min_f32_e32 v2, 2.0, v2
489; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[6:7]
490; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
491; SI-GISEL-NEXT:    s_endpgm
492;
493; VI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_constant_order_f32:
494; VI-SDAG:       ; %bb.0:
495; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
496; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
497; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
498; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
499; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
500; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
501; VI-SDAG-NEXT:    flat_load_dword v3, v[0:1]
502; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
503; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
504; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
505; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
506; VI-SDAG-NEXT:    v_add_f32_e32 v2, 1.0, v3
507; VI-SDAG-NEXT:    v_max_f32_e32 v2, 4.0, v2
508; VI-SDAG-NEXT:    v_min_f32_e32 v2, 2.0, v2
509; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
510; VI-SDAG-NEXT:    s_endpgm
511;
512; VI-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_constant_order_f32:
513; VI-GISEL:       ; %bb.0:
514; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
515; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
516; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
517; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
518; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
519; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
520; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
521; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1]
522; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
523; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
524; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
525; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
526; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
527; VI-GISEL-NEXT:    v_add_f32_e32 v2, 1.0, v3
528; VI-GISEL-NEXT:    v_max_f32_e32 v2, 4.0, v2
529; VI-GISEL-NEXT:    v_min_f32_e32 v2, 2.0, v2
530; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
531; VI-GISEL-NEXT:    s_endpgm
532;
533; GFX9-LABEL: v_test_fmed3_nnan_r_i_i_constant_order_f32:
534; GFX9:       ; %bb.0:
535; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
536; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
537; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
538; GFX9-NEXT:    global_load_dword v1, v0, s[2:3]
539; GFX9-NEXT:    s_waitcnt vmcnt(0)
540; GFX9-NEXT:    v_add_f32_e32 v1, 1.0, v1
541; GFX9-NEXT:    v_max_f32_e32 v1, 4.0, v1
542; GFX9-NEXT:    v_min_f32_e32 v1, 2.0, v1
543; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
544; GFX9-NEXT:    s_endpgm
545;
546; GFX11-LABEL: v_test_fmed3_nnan_r_i_i_constant_order_f32:
547; GFX11:       ; %bb.0:
548; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
549; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
550; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
551; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
552; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
553; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3]
554; GFX11-NEXT:    s_waitcnt vmcnt(0)
555; GFX11-NEXT:    v_add_f32_e32 v1, 1.0, v1
556; GFX11-NEXT:    v_maxmin_f32 v1, v1, 4.0, 2.0
557; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
558; GFX11-NEXT:    s_endpgm
559  %tid = call i32 @llvm.amdgcn.workitem.id.x()
560  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
561  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
562  %a = load float, ptr addrspace(1) %gep0
563  %a.add = fadd nnan float %a, 1.0
564
565  %max = call float @llvm.maxnum.f32(float %a.add, float 4.0)
566  %med = call float @llvm.minnum.f32(float %max, float 2.0)
567
568  store float %med, ptr addrspace(1) %outgep
569  ret void
570}
571
572define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_multi_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
573; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_multi_use_f32:
574; SI-SDAG:       ; %bb.0:
575; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
576; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
577; SI-SDAG-NEXT:    s_mov_b32 s6, 0
578; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
579; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
580; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
581; SI-SDAG-NEXT:    s_mov_b64 s[4:5], s[2:3]
582; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
583; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[6:7]
584; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
585; SI-SDAG-NEXT:    v_add_f32_e32 v2, 1.0, v2
586; SI-SDAG-NEXT:    v_max_f32_e32 v2, 2.0, v2
587; SI-SDAG-NEXT:    v_min_f32_e32 v3, 4.0, v2
588; SI-SDAG-NEXT:    buffer_store_dword v3, v[0:1], s[0:3], 0 addr64
589; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
590; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
591; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
592; SI-SDAG-NEXT:    s_endpgm
593;
594; SI-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_multi_use_f32:
595; SI-GISEL:       ; %bb.0:
596; SI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
597; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
598; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
599; SI-GISEL-NEXT:    s_mov_b32 s6, 0
600; SI-GISEL-NEXT:    s_mov_b32 s7, 0xf000
601; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
602; SI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
603; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
604; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[6:7]
605; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
606; SI-GISEL-NEXT:    v_add_f32_e32 v2, 1.0, v2
607; SI-GISEL-NEXT:    v_max_f32_e32 v3, 2.0, v2
608; SI-GISEL-NEXT:    v_med3_f32 v2, v2, 2.0, 4.0
609; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
610; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
611; SI-GISEL-NEXT:    buffer_store_dword v3, v[0:1], s[0:3], 0 addr64
612; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
613; SI-GISEL-NEXT:    s_endpgm
614;
615; VI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_multi_use_f32:
616; VI-SDAG:       ; %bb.0:
617; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
618; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
619; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
620; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
621; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
622; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
623; VI-SDAG-NEXT:    flat_load_dword v3, v[0:1]
624; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
625; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
626; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
627; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
628; VI-SDAG-NEXT:    v_add_f32_e32 v2, 1.0, v3
629; VI-SDAG-NEXT:    v_max_f32_e32 v2, 2.0, v2
630; VI-SDAG-NEXT:    v_min_f32_e32 v3, 4.0, v2
631; VI-SDAG-NEXT:    flat_store_dword v[0:1], v3
632; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
633; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
634; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
635; VI-SDAG-NEXT:    s_endpgm
636;
637; VI-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_multi_use_f32:
638; VI-GISEL:       ; %bb.0:
639; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
640; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
641; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
642; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
643; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
644; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
645; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
646; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1]
647; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
648; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
649; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
650; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
651; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
652; VI-GISEL-NEXT:    v_add_f32_e32 v2, 1.0, v3
653; VI-GISEL-NEXT:    v_max_f32_e32 v3, 2.0, v2
654; VI-GISEL-NEXT:    v_med3_f32 v2, v2, 2.0, 4.0
655; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
656; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
657; VI-GISEL-NEXT:    flat_store_dword v[0:1], v3
658; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
659; VI-GISEL-NEXT:    s_endpgm
660;
661; GFX9-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_multi_use_f32:
662; GFX9-SDAG:       ; %bb.0:
663; GFX9-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
664; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
665; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
666; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[2:3]
667; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
668; GFX9-SDAG-NEXT:    v_add_f32_e32 v1, 1.0, v1
669; GFX9-SDAG-NEXT:    v_max_f32_e32 v1, 2.0, v1
670; GFX9-SDAG-NEXT:    v_min_f32_e32 v2, 4.0, v1
671; GFX9-SDAG-NEXT:    global_store_dword v0, v2, s[0:1]
672; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
673; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[0:1]
674; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
675; GFX9-SDAG-NEXT:    s_endpgm
676;
677; GFX9-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_multi_use_f32:
678; GFX9-GISEL:       ; %bb.0:
679; GFX9-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
680; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
681; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
682; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[2:3]
683; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
684; GFX9-GISEL-NEXT:    v_add_f32_e32 v1, 1.0, v1
685; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, 2.0, v1
686; GFX9-GISEL-NEXT:    v_med3_f32 v1, v1, 2.0, 4.0
687; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[0:1]
688; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
689; GFX9-GISEL-NEXT:    global_store_dword v0, v2, s[0:1]
690; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
691; GFX9-GISEL-NEXT:    s_endpgm
692;
693; GFX11-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_multi_use_f32:
694; GFX11-SDAG:       ; %bb.0:
695; GFX11-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
696; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
697; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
698; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
699; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
700; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3]
701; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
702; GFX11-SDAG-NEXT:    v_add_f32_e32 v1, 1.0, v1
703; GFX11-SDAG-NEXT:    v_max_f32_e32 v1, 2.0, v1
704; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
705; GFX11-SDAG-NEXT:    v_min_f32_e32 v2, 4.0, v1
706; GFX11-SDAG-NEXT:    global_store_b32 v0, v2, s[0:1] dlc
707; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
708; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1] dlc
709; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
710; GFX11-SDAG-NEXT:    s_endpgm
711;
712; GFX11-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_multi_use_f32:
713; GFX11-GISEL:       ; %bb.0:
714; GFX11-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
715; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
716; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
717; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
718; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
719; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3]
720; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
721; GFX11-GISEL-NEXT:    v_add_f32_e32 v1, 1.0, v1
722; GFX11-GISEL-NEXT:    v_med3_f32 v2, v1, 2.0, 4.0
723; GFX11-GISEL-NEXT:    v_max_f32_e32 v1, 2.0, v1
724; GFX11-GISEL-NEXT:    global_store_b32 v0, v2, s[0:1] dlc
725; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
726; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1] dlc
727; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
728; GFX11-GISEL-NEXT:    s_endpgm
729  %tid = call i32 @llvm.amdgcn.workitem.id.x()
730  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
731  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
732  %a = load float, ptr addrspace(1) %gep0
733  %a.add = fadd nnan float %a, 1.0
734
735  %max = call float @llvm.maxnum.f32(float %a.add, float 2.0)
736  %med = call float @llvm.minnum.f32(float %max, float 4.0)
737
738  store volatile float %med, ptr addrspace(1) %outgep
739  store volatile float %max, ptr addrspace(1) %outgep
740  ret void
741}
742
743define amdgpu_kernel void @v_test_fmed3_r_i_i_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
744; SI-SDAG-LABEL: v_test_fmed3_r_i_i_f64:
745; SI-SDAG:       ; %bb.0:
746; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
747; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
748; SI-SDAG-NEXT:    s_mov_b32 s6, 0
749; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
750; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
751; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
752; SI-SDAG-NEXT:    s_mov_b64 s[4:5], s[2:3]
753; SI-SDAG-NEXT:    buffer_load_dwordx2 v[2:3], v[0:1], s[4:7], 0 addr64
754; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[6:7]
755; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
756; SI-SDAG-NEXT:    v_add_f64 v[2:3], v[2:3], 1.0
757; SI-SDAG-NEXT:    v_max_f64 v[2:3], v[2:3], 2.0
758; SI-SDAG-NEXT:    v_min_f64 v[2:3], v[2:3], 4.0
759; SI-SDAG-NEXT:    buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
760; SI-SDAG-NEXT:    s_endpgm
761;
762; SI-GISEL-LABEL: v_test_fmed3_r_i_i_f64:
763; SI-GISEL:       ; %bb.0:
764; SI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
765; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
766; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
767; SI-GISEL-NEXT:    s_mov_b32 s6, 0
768; SI-GISEL-NEXT:    s_mov_b32 s7, 0xf000
769; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
770; SI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
771; SI-GISEL-NEXT:    buffer_load_dwordx2 v[2:3], v[0:1], s[4:7], 0 addr64
772; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
773; SI-GISEL-NEXT:    v_add_f64 v[2:3], v[2:3], 1.0
774; SI-GISEL-NEXT:    v_max_f64 v[2:3], v[2:3], 2.0
775; SI-GISEL-NEXT:    v_min_f64 v[2:3], v[2:3], 4.0
776; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[6:7]
777; SI-GISEL-NEXT:    buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
778; SI-GISEL-NEXT:    s_endpgm
779;
780; VI-SDAG-LABEL: v_test_fmed3_r_i_i_f64:
781; VI-SDAG:       ; %bb.0:
782; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
783; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
784; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
785; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
786; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
787; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
788; VI-SDAG-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
789; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s1
790; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
791; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
792; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
793; VI-SDAG-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0
794; VI-SDAG-NEXT:    v_max_f64 v[0:1], v[0:1], 2.0
795; VI-SDAG-NEXT:    v_min_f64 v[0:1], v[0:1], 4.0
796; VI-SDAG-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
797; VI-SDAG-NEXT:    s_endpgm
798;
799; VI-GISEL-LABEL: v_test_fmed3_r_i_i_f64:
800; VI-GISEL:       ; %bb.0:
801; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
802; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v4, 3, v0
803; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
804; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
805; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
806; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v4
807; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
808; VI-GISEL-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
809; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s1
810; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s0
811; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v4
812; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
813; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
814; VI-GISEL-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0
815; VI-GISEL-NEXT:    v_max_f64 v[0:1], v[0:1], 2.0
816; VI-GISEL-NEXT:    v_min_f64 v[0:1], v[0:1], 4.0
817; VI-GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
818; VI-GISEL-NEXT:    s_endpgm
819;
820; GFX9-LABEL: v_test_fmed3_r_i_i_f64:
821; GFX9:       ; %bb.0:
822; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
823; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
824; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
825; GFX9-NEXT:    global_load_dwordx2 v[0:1], v2, s[2:3]
826; GFX9-NEXT:    s_waitcnt vmcnt(0)
827; GFX9-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0
828; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], 2.0
829; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], 4.0
830; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
831; GFX9-NEXT:    s_endpgm
832;
833; GFX11-LABEL: v_test_fmed3_r_i_i_f64:
834; GFX11:       ; %bb.0:
835; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
836; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
837; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
838; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
839; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
840; GFX11-NEXT:    global_load_b64 v[0:1], v2, s[2:3]
841; GFX11-NEXT:    s_waitcnt vmcnt(0)
842; GFX11-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0
843; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], 2.0
844; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
845; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], 4.0
846; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
847; GFX11-NEXT:    s_endpgm
848  %tid = call i32 @llvm.amdgcn.workitem.id.x()
849  %gep0 = getelementptr double, ptr addrspace(1) %aptr, i32 %tid
850  %outgep = getelementptr double, ptr addrspace(1) %out, i32 %tid
851  %a = load double, ptr addrspace(1) %gep0
852  %a.add = fadd nnan double %a, 1.0
853
854  %max = call double @llvm.maxnum.f64(double %a.add, double 2.0)
855  %med = call double @llvm.minnum.f64(double %max, double 4.0)
856
857  store double %med, ptr addrspace(1) %outgep
858  ret void
859}
860
861define amdgpu_kernel void @v_test_fmed3_r_i_i_no_nans_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #2 {
862; SI-SDAG-LABEL: v_test_fmed3_r_i_i_no_nans_f32:
863; SI-SDAG:       ; %bb.0:
864; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
865; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
866; SI-SDAG-NEXT:    s_mov_b32 s6, 0
867; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
868; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
869; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
870; SI-SDAG-NEXT:    s_mov_b64 s[4:5], s[2:3]
871; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
872; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[6:7]
873; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
874; SI-SDAG-NEXT:    v_med3_f32 v2, v2, 2.0, 4.0
875; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
876; SI-SDAG-NEXT:    s_endpgm
877;
878; SI-GISEL-LABEL: v_test_fmed3_r_i_i_no_nans_f32:
879; SI-GISEL:       ; %bb.0:
880; SI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
881; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
882; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
883; SI-GISEL-NEXT:    s_mov_b32 s6, 0
884; SI-GISEL-NEXT:    s_mov_b32 s7, 0xf000
885; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
886; SI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
887; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
888; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
889; SI-GISEL-NEXT:    v_med3_f32 v2, v2, 2.0, 4.0
890; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[6:7]
891; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
892; SI-GISEL-NEXT:    s_endpgm
893;
894; VI-SDAG-LABEL: v_test_fmed3_r_i_i_no_nans_f32:
895; VI-SDAG:       ; %bb.0:
896; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
897; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
898; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
899; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
900; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
901; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
902; VI-SDAG-NEXT:    flat_load_dword v3, v[0:1]
903; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
904; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
905; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
906; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
907; VI-SDAG-NEXT:    v_med3_f32 v2, v3, 2.0, 4.0
908; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
909; VI-SDAG-NEXT:    s_endpgm
910;
911; VI-GISEL-LABEL: v_test_fmed3_r_i_i_no_nans_f32:
912; VI-GISEL:       ; %bb.0:
913; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
914; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
915; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
916; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
917; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
918; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
919; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
920; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1]
921; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
922; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
923; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
924; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
925; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
926; VI-GISEL-NEXT:    v_med3_f32 v2, v3, 2.0, 4.0
927; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
928; VI-GISEL-NEXT:    s_endpgm
929;
930; GFX9-LABEL: v_test_fmed3_r_i_i_no_nans_f32:
931; GFX9:       ; %bb.0:
932; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
933; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
934; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
935; GFX9-NEXT:    global_load_dword v1, v0, s[2:3]
936; GFX9-NEXT:    s_waitcnt vmcnt(0)
937; GFX9-NEXT:    v_med3_f32 v1, v1, 2.0, 4.0
938; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
939; GFX9-NEXT:    s_endpgm
940;
941; GFX11-LABEL: v_test_fmed3_r_i_i_no_nans_f32:
942; GFX11:       ; %bb.0:
943; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
944; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
945; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
946; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
947; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
948; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3]
949; GFX11-NEXT:    s_waitcnt vmcnt(0)
950; GFX11-NEXT:    v_med3_f32 v1, v1, 2.0, 4.0
951; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
952; GFX11-NEXT:    s_endpgm
953  %tid = call i32 @llvm.amdgcn.workitem.id.x()
954  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
955  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
956  %a = load float, ptr addrspace(1) %gep0
957
958  %max = call float @llvm.maxnum.f32(float %a, float 2.0)
959  %med = call float @llvm.minnum.f32(float %max, float 4.0)
960
961  store float %med, ptr addrspace(1) %outgep
962  ret void
963}
964
965define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
966; SI-SDAG-LABEL: v_test_legacy_fmed3_r_i_i_f32:
967; SI-SDAG:       ; %bb.0:
968; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
969; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
970; SI-SDAG-NEXT:    s_mov_b32 s6, 0
971; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
972; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
973; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
974; SI-SDAG-NEXT:    s_mov_b64 s[4:5], s[2:3]
975; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
976; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[6:7]
977; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
978; SI-SDAG-NEXT:    v_add_f32_e32 v2, 1.0, v2
979; SI-SDAG-NEXT:    v_med3_f32 v2, v2, 2.0, 4.0
980; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
981; SI-SDAG-NEXT:    s_endpgm
982;
983; SI-GISEL-LABEL: v_test_legacy_fmed3_r_i_i_f32:
984; SI-GISEL:       ; %bb.0:
985; SI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
986; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
987; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
988; SI-GISEL-NEXT:    s_mov_b32 s6, 0
989; SI-GISEL-NEXT:    s_mov_b32 s7, 0xf000
990; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
991; SI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
992; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
993; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
994; SI-GISEL-NEXT:    v_add_f32_e32 v2, 1.0, v2
995; SI-GISEL-NEXT:    v_max_legacy_f32_e64 v2, v2, 2.0
996; SI-GISEL-NEXT:    v_min_legacy_f32_e64 v2, v2, 4.0
997; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[6:7]
998; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
999; SI-GISEL-NEXT:    s_endpgm
1000;
1001; VI-SDAG-LABEL: v_test_legacy_fmed3_r_i_i_f32:
1002; VI-SDAG:       ; %bb.0:
1003; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1004; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
1005; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1006; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
1007; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
1008; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1009; VI-SDAG-NEXT:    flat_load_dword v3, v[0:1]
1010; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
1011; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
1012; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1013; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1014; VI-SDAG-NEXT:    v_add_f32_e32 v2, 1.0, v3
1015; VI-SDAG-NEXT:    v_med3_f32 v2, v2, 2.0, 4.0
1016; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
1017; VI-SDAG-NEXT:    s_endpgm
1018;
1019; VI-GISEL-LABEL: v_test_legacy_fmed3_r_i_i_f32:
1020; VI-GISEL:       ; %bb.0:
1021; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1022; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
1023; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1024; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
1025; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
1026; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
1027; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1028; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1]
1029; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
1030; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
1031; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
1032; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1033; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
1034; VI-GISEL-NEXT:    v_add_f32_e32 v2, 1.0, v3
1035; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, 2.0, v2
1036; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, 2.0, vcc
1037; VI-GISEL-NEXT:    v_cmp_ngt_f32_e32 vcc, 4.0, v2
1038; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, 4.0, vcc
1039; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
1040; VI-GISEL-NEXT:    s_endpgm
1041;
1042; GFX9-SDAG-LABEL: v_test_legacy_fmed3_r_i_i_f32:
1043; GFX9-SDAG:       ; %bb.0:
1044; GFX9-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1045; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1046; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1047; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[2:3]
1048; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1049; GFX9-SDAG-NEXT:    v_add_f32_e32 v1, 1.0, v1
1050; GFX9-SDAG-NEXT:    v_med3_f32 v1, v1, 2.0, 4.0
1051; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[0:1]
1052; GFX9-SDAG-NEXT:    s_endpgm
1053;
1054; GFX9-GISEL-LABEL: v_test_legacy_fmed3_r_i_i_f32:
1055; GFX9-GISEL:       ; %bb.0:
1056; GFX9-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1057; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1058; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1059; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[2:3]
1060; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
1061; GFX9-GISEL-NEXT:    v_add_f32_e32 v1, 1.0, v1
1062; GFX9-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, 2.0, v1
1063; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 2.0, vcc
1064; GFX9-GISEL-NEXT:    v_cmp_ngt_f32_e32 vcc, 4.0, v1
1065; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 4.0, vcc
1066; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[0:1]
1067; GFX9-GISEL-NEXT:    s_endpgm
1068;
1069; GFX11-SDAG-LABEL: v_test_legacy_fmed3_r_i_i_f32:
1070; GFX11-SDAG:       ; %bb.0:
1071; GFX11-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1072; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
1073; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
1074; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1075; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1076; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3]
1077; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
1078; GFX11-SDAG-NEXT:    v_add_f32_e32 v1, 1.0, v1
1079; GFX11-SDAG-NEXT:    v_med3_f32 v1, v1, 2.0, 4.0
1080; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
1081; GFX11-SDAG-NEXT:    s_endpgm
1082;
1083; GFX11-GISEL-LABEL: v_test_legacy_fmed3_r_i_i_f32:
1084; GFX11-GISEL:       ; %bb.0:
1085; GFX11-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1086; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
1087; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
1088; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1089; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1090; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3]
1091; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
1092; GFX11-GISEL-NEXT:    v_add_f32_e32 v1, 1.0, v1
1093; GFX11-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc_lo, 2.0, v1
1094; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 2.0, vcc_lo
1095; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1096; GFX11-GISEL-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 4.0, v1
1097; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo
1098; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
1099; GFX11-GISEL-NEXT:    s_endpgm
1100  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1101  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
1102  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
1103  %a = load float, ptr addrspace(1) %gep0
1104  %a.nnan = fadd nnan float %a, 1.0
1105
1106  ; fmax_legacy
1107  %cmp0 = fcmp ule float %a.nnan, 2.0
1108  %max = select i1 %cmp0, float 2.0, float %a.nnan
1109
1110  ; fmin_legacy
1111  %cmp1 = fcmp uge float %max, 4.0
1112  %med = select i1 %cmp1, float 4.0, float %max
1113
1114  store float %med, ptr addrspace(1) %outgep
1115  ret void
1116}
1117
1118define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
1119; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0:
1120; SI-SDAG:       ; %bb.0:
1121; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
1122; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
1123; SI-SDAG-NEXT:    s_mov_b32 s10, 0
1124; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1125; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
1126; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
1127; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
1128; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1129; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
1130; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
1131; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
1132; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
1133; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1134; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
1135; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1136; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
1137; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1138; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
1139; SI-SDAG-NEXT:    v_med3_f32 v2, -v2, v3, v4
1140; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
1141; SI-SDAG-NEXT:    s_endpgm
1142;
1143; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0:
1144; SI-GISEL:       ; %bb.0:
1145; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
1146; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1147; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
1148; SI-GISEL-NEXT:    s_mov_b32 s10, 0
1149; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
1150; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1151; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
1152; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
1153; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
1154; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
1155; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
1156; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
1157; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
1158; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
1159; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
1160; SI-GISEL-NEXT:    v_mul_f32_e32 v2, -1.0, v2
1161; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
1162; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
1163; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
1164; SI-GISEL-NEXT:    s_endpgm
1165;
1166; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0:
1167; VI-SDAG:       ; %bb.0:
1168; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
1169; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
1170; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1171; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
1172; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
1173; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1174; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
1175; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
1176; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
1177; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
1178; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
1179; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
1180; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
1181; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1182; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
1183; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1184; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
1185; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1186; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
1187; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
1188; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1189; VI-SDAG-NEXT:    v_med3_f32 v2, -v7, v2, v3
1190; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
1191; VI-SDAG-NEXT:    s_endpgm
1192;
1193; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0:
1194; VI-GISEL:       ; %bb.0:
1195; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
1196; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
1197; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1198; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
1199; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
1200; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
1201; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1202; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
1203; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
1204; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
1205; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
1206; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
1207; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
1208; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
1209; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
1210; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
1211; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
1212; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
1213; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
1214; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
1215; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
1216; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
1217; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
1218; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
1219; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1220; VI-GISEL-NEXT:    v_mul_f32_e32 v4, -1.0, v7
1221; VI-GISEL-NEXT:    v_med3_f32 v2, v4, v2, v3
1222; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
1223; VI-GISEL-NEXT:    s_endpgm
1224;
1225; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0:
1226; GFX9-SDAG:       ; %bb.0:
1227; GFX9-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
1228; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1229; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1230; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[10:11] glc
1231; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1232; GFX9-SDAG-NEXT:    global_load_dword v2, v0, s[12:13] glc
1233; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1234; GFX9-SDAG-NEXT:    global_load_dword v3, v0, s[14:15] glc
1235; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1236; GFX9-SDAG-NEXT:    v_med3_f32 v1, -v1, v2, v3
1237; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[8:9]
1238; GFX9-SDAG-NEXT:    s_endpgm
1239;
1240; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0:
1241; GFX9-GISEL:       ; %bb.0:
1242; GFX9-GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
1243; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1244; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1245; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[10:11] glc
1246; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
1247; GFX9-GISEL-NEXT:    global_load_dword v2, v0, s[12:13] glc
1248; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
1249; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
1250; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
1251; GFX9-GISEL-NEXT:    v_max_f32_e64 v1, -v1, -v1
1252; GFX9-GISEL-NEXT:    v_med3_f32 v1, v1, v2, v3
1253; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
1254; GFX9-GISEL-NEXT:    s_endpgm
1255;
1256; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0:
1257; GFX11-SDAG:       ; %bb.0:
1258; GFX11-SDAG-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
1259; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
1260; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1261; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1262; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1263; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
1264; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
1265; GFX11-SDAG-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
1266; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
1267; GFX11-SDAG-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
1268; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
1269; GFX11-SDAG-NEXT:    v_med3_f32 v1, -v1, v2, v3
1270; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
1271; GFX11-SDAG-NEXT:    s_endpgm
1272;
1273; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0:
1274; GFX11-GISEL:       ; %bb.0:
1275; GFX11-GISEL-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
1276; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
1277; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1278; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1279; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1280; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
1281; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
1282; GFX11-GISEL-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
1283; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
1284; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
1285; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
1286; GFX11-GISEL-NEXT:    v_max_f32_e64 v1, -v1, -v1
1287; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1288; GFX11-GISEL-NEXT:    v_med3_f32 v1, v1, v2, v3
1289; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
1290; GFX11-GISEL-NEXT:    s_endpgm
1291  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1292  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
1293  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
1294  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
1295  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
1296  %a = load volatile float, ptr addrspace(1) %gep0
1297  %b = load volatile float, ptr addrspace(1) %gep1
1298  %c = load volatile float, ptr addrspace(1) %gep2
1299  %a.fneg = fsub float -0.0, %a
1300  %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b)
1301  %tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b)
1302  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
1303  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
1304  store float %med3, ptr addrspace(1) %outgep
1305  ret void
1306}
1307
1308define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod1(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
1309; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod1:
1310; SI-SDAG:       ; %bb.0:
1311; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
1312; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
1313; SI-SDAG-NEXT:    s_mov_b32 s10, 0
1314; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1315; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
1316; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
1317; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
1318; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1319; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
1320; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
1321; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
1322; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
1323; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1324; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
1325; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1326; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
1327; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1328; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
1329; SI-SDAG-NEXT:    v_med3_f32 v2, v2, -v3, v4
1330; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
1331; SI-SDAG-NEXT:    s_endpgm
1332;
1333; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod1:
1334; SI-GISEL:       ; %bb.0:
1335; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
1336; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1337; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
1338; SI-GISEL-NEXT:    s_mov_b32 s10, 0
1339; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
1340; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1341; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
1342; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
1343; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
1344; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
1345; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
1346; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
1347; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
1348; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
1349; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
1350; SI-GISEL-NEXT:    v_mul_f32_e32 v3, -1.0, v3
1351; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
1352; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
1353; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
1354; SI-GISEL-NEXT:    s_endpgm
1355;
1356; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod1:
1357; VI-SDAG:       ; %bb.0:
1358; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
1359; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
1360; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1361; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
1362; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
1363; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1364; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
1365; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
1366; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
1367; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
1368; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
1369; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
1370; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
1371; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1372; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
1373; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1374; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
1375; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1376; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
1377; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
1378; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1379; VI-SDAG-NEXT:    v_med3_f32 v2, v7, -v2, v3
1380; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
1381; VI-SDAG-NEXT:    s_endpgm
1382;
1383; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod1:
1384; VI-GISEL:       ; %bb.0:
1385; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
1386; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
1387; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1388; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
1389; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
1390; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
1391; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1392; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
1393; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
1394; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
1395; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
1396; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
1397; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
1398; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
1399; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
1400; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
1401; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
1402; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
1403; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
1404; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
1405; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
1406; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
1407; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
1408; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
1409; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1410; VI-GISEL-NEXT:    v_mul_f32_e32 v2, -1.0, v2
1411; VI-GISEL-NEXT:    v_med3_f32 v2, v7, v2, v3
1412; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
1413; VI-GISEL-NEXT:    s_endpgm
1414;
1415; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod1:
1416; GFX9-SDAG:       ; %bb.0:
1417; GFX9-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
1418; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1419; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1420; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[10:11] glc
1421; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1422; GFX9-SDAG-NEXT:    global_load_dword v2, v0, s[12:13] glc
1423; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1424; GFX9-SDAG-NEXT:    global_load_dword v3, v0, s[14:15] glc
1425; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1426; GFX9-SDAG-NEXT:    v_med3_f32 v1, v1, -v2, v3
1427; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[8:9]
1428; GFX9-SDAG-NEXT:    s_endpgm
1429;
1430; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod1:
1431; GFX9-GISEL:       ; %bb.0:
1432; GFX9-GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
1433; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1434; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1435; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[10:11] glc
1436; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
1437; GFX9-GISEL-NEXT:    global_load_dword v2, v0, s[12:13] glc
1438; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
1439; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
1440; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
1441; GFX9-GISEL-NEXT:    v_max_f32_e64 v2, -v2, -v2
1442; GFX9-GISEL-NEXT:    v_med3_f32 v1, v1, v2, v3
1443; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
1444; GFX9-GISEL-NEXT:    s_endpgm
1445;
1446; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod1:
1447; GFX11-SDAG:       ; %bb.0:
1448; GFX11-SDAG-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
1449; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
1450; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1451; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1452; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1453; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
1454; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
1455; GFX11-SDAG-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
1456; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
1457; GFX11-SDAG-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
1458; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
1459; GFX11-SDAG-NEXT:    v_med3_f32 v1, v1, -v2, v3
1460; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
1461; GFX11-SDAG-NEXT:    s_endpgm
1462;
1463; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod1:
1464; GFX11-GISEL:       ; %bb.0:
1465; GFX11-GISEL-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
1466; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
1467; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1468; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1469; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1470; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
1471; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
1472; GFX11-GISEL-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
1473; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
1474; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
1475; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
1476; GFX11-GISEL-NEXT:    v_max_f32_e64 v2, -v2, -v2
1477; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1478; GFX11-GISEL-NEXT:    v_med3_f32 v1, v1, v2, v3
1479; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
1480; GFX11-GISEL-NEXT:    s_endpgm
1481  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1482  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
1483  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
1484  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
1485  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
1486  %a = load volatile float, ptr addrspace(1) %gep0
1487  %b = load volatile float, ptr addrspace(1) %gep1
1488  %c = load volatile float, ptr addrspace(1) %gep2
1489  %b.fneg = fsub float -0.0, %b
1490  %tmp0 = call float @llvm.minnum.f32(float %a, float %b.fneg)
1491  %tmp1 = call float @llvm.maxnum.f32(float %a, float %b.fneg)
1492  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
1493  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
1494  store float %med3, ptr addrspace(1) %outgep
1495  ret void
1496}
1497
1498define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod2(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
1499; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod2:
1500; SI-SDAG:       ; %bb.0:
1501; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
1502; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
1503; SI-SDAG-NEXT:    s_mov_b32 s10, 0
1504; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1505; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
1506; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
1507; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
1508; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1509; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
1510; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
1511; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
1512; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
1513; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1514; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
1515; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1516; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
1517; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1518; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
1519; SI-SDAG-NEXT:    v_med3_f32 v2, v2, v3, -v4
1520; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
1521; SI-SDAG-NEXT:    s_endpgm
1522;
1523; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod2:
1524; SI-GISEL:       ; %bb.0:
1525; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
1526; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1527; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
1528; SI-GISEL-NEXT:    s_mov_b32 s10, 0
1529; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
1530; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1531; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
1532; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
1533; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
1534; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
1535; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
1536; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
1537; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
1538; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
1539; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
1540; SI-GISEL-NEXT:    v_mul_f32_e32 v4, -1.0, v4
1541; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
1542; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
1543; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
1544; SI-GISEL-NEXT:    s_endpgm
1545;
1546; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod2:
1547; VI-SDAG:       ; %bb.0:
1548; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
1549; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
1550; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1551; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
1552; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
1553; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1554; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
1555; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
1556; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
1557; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
1558; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
1559; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
1560; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
1561; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1562; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
1563; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1564; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
1565; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1566; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
1567; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
1568; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1569; VI-SDAG-NEXT:    v_med3_f32 v2, v7, v2, -v3
1570; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
1571; VI-SDAG-NEXT:    s_endpgm
1572;
1573; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod2:
1574; VI-GISEL:       ; %bb.0:
1575; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
1576; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
1577; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1578; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
1579; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
1580; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
1581; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1582; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
1583; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
1584; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
1585; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
1586; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
1587; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
1588; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
1589; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
1590; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
1591; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
1592; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
1593; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
1594; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
1595; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
1596; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
1597; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
1598; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
1599; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1600; VI-GISEL-NEXT:    v_mul_f32_e32 v3, -1.0, v3
1601; VI-GISEL-NEXT:    v_med3_f32 v2, v7, v2, v3
1602; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
1603; VI-GISEL-NEXT:    s_endpgm
1604;
1605; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod2:
1606; GFX9-SDAG:       ; %bb.0:
1607; GFX9-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
1608; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1609; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1610; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[10:11] glc
1611; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1612; GFX9-SDAG-NEXT:    global_load_dword v2, v0, s[12:13] glc
1613; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1614; GFX9-SDAG-NEXT:    global_load_dword v3, v0, s[14:15] glc
1615; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1616; GFX9-SDAG-NEXT:    v_med3_f32 v1, v1, v2, -v3
1617; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[8:9]
1618; GFX9-SDAG-NEXT:    s_endpgm
1619;
1620; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod2:
1621; GFX9-GISEL:       ; %bb.0:
1622; GFX9-GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
1623; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1624; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1625; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[10:11] glc
1626; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
1627; GFX9-GISEL-NEXT:    global_load_dword v2, v0, s[12:13] glc
1628; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
1629; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
1630; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
1631; GFX9-GISEL-NEXT:    v_max_f32_e64 v3, -v3, -v3
1632; GFX9-GISEL-NEXT:    v_med3_f32 v1, v1, v2, v3
1633; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
1634; GFX9-GISEL-NEXT:    s_endpgm
1635;
1636; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod2:
1637; GFX11-SDAG:       ; %bb.0:
1638; GFX11-SDAG-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
1639; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
1640; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1641; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1642; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1643; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
1644; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
1645; GFX11-SDAG-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
1646; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
1647; GFX11-SDAG-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
1648; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
1649; GFX11-SDAG-NEXT:    v_med3_f32 v1, v1, v2, -v3
1650; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
1651; GFX11-SDAG-NEXT:    s_endpgm
1652;
1653; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod2:
1654; GFX11-GISEL:       ; %bb.0:
1655; GFX11-GISEL-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
1656; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
1657; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1658; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1659; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1660; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
1661; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
1662; GFX11-GISEL-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
1663; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
1664; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
1665; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
1666; GFX11-GISEL-NEXT:    v_max_f32_e64 v3, -v3, -v3
1667; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1668; GFX11-GISEL-NEXT:    v_med3_f32 v1, v1, v2, v3
1669; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
1670; GFX11-GISEL-NEXT:    s_endpgm
1671  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1672  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
1673  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
1674  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
1675  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
1676  %a = load volatile float, ptr addrspace(1) %gep0
1677  %b = load volatile float, ptr addrspace(1) %gep1
1678  %c = load volatile float, ptr addrspace(1) %gep2
1679  %c.fneg = fsub float -0.0, %c
1680  %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
1681  %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
1682  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fneg)
1683  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
1684  store float %med3, ptr addrspace(1) %outgep
1685  ret void
1686}
1687
1688define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
1689; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod012:
1690; SI-SDAG:       ; %bb.0:
1691; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
1692; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
1693; SI-SDAG-NEXT:    s_mov_b32 s10, 0
1694; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1695; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
1696; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
1697; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
1698; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1699; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
1700; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
1701; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
1702; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
1703; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1704; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
1705; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1706; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
1707; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1708; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
1709; SI-SDAG-NEXT:    v_med3_f32 v2, -v2, |v3|, -|v4|
1710; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
1711; SI-SDAG-NEXT:    s_endpgm
1712;
1713; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod012:
1714; SI-GISEL:       ; %bb.0:
1715; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
1716; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1717; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
1718; SI-GISEL-NEXT:    s_mov_b32 s10, 0
1719; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
1720; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1721; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
1722; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
1723; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
1724; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
1725; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
1726; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
1727; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
1728; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
1729; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
1730; SI-GISEL-NEXT:    v_mul_f32_e32 v2, -1.0, v2
1731; SI-GISEL-NEXT:    v_mul_f32_e64 v4, -1.0, |v4|
1732; SI-GISEL-NEXT:    v_med3_f32 v2, v2, |v3|, v4
1733; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
1734; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
1735; SI-GISEL-NEXT:    s_endpgm
1736;
1737; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod012:
1738; VI-SDAG:       ; %bb.0:
1739; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
1740; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
1741; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1742; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
1743; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
1744; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1745; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
1746; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
1747; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
1748; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
1749; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
1750; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
1751; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
1752; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1753; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
1754; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1755; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
1756; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1757; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
1758; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
1759; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1760; VI-SDAG-NEXT:    v_med3_f32 v2, -v7, |v2|, -|v3|
1761; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
1762; VI-SDAG-NEXT:    s_endpgm
1763;
1764; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod012:
1765; VI-GISEL:       ; %bb.0:
1766; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
1767; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
1768; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1769; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
1770; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
1771; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
1772; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1773; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
1774; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
1775; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
1776; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
1777; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
1778; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
1779; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
1780; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
1781; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
1782; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
1783; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
1784; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
1785; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
1786; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
1787; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
1788; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
1789; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
1790; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1791; VI-GISEL-NEXT:    v_mul_f32_e32 v4, -1.0, v7
1792; VI-GISEL-NEXT:    v_mul_f32_e64 v3, -1.0, |v3|
1793; VI-GISEL-NEXT:    v_med3_f32 v2, v4, |v2|, v3
1794; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
1795; VI-GISEL-NEXT:    s_endpgm
1796;
1797; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod012:
1798; GFX9-SDAG:       ; %bb.0:
1799; GFX9-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
1800; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1801; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1802; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[10:11] glc
1803; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1804; GFX9-SDAG-NEXT:    global_load_dword v2, v0, s[12:13] glc
1805; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1806; GFX9-SDAG-NEXT:    global_load_dword v3, v0, s[14:15] glc
1807; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1808; GFX9-SDAG-NEXT:    v_med3_f32 v1, -v1, |v2|, -|v3|
1809; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[8:9]
1810; GFX9-SDAG-NEXT:    s_endpgm
1811;
1812; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod012:
1813; GFX9-GISEL:       ; %bb.0:
1814; GFX9-GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
1815; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1816; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1817; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[10:11] glc
1818; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
1819; GFX9-GISEL-NEXT:    global_load_dword v2, v0, s[12:13] glc
1820; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
1821; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
1822; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
1823; GFX9-GISEL-NEXT:    v_max_f32_e64 v1, -v1, -v1
1824; GFX9-GISEL-NEXT:    v_max_f32_e64 v3, -|v3|, -|v3|
1825; GFX9-GISEL-NEXT:    v_med3_f32 v1, v1, |v2|, v3
1826; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
1827; GFX9-GISEL-NEXT:    s_endpgm
1828;
1829; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod012:
1830; GFX11-SDAG:       ; %bb.0:
1831; GFX11-SDAG-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
1832; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
1833; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1834; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1835; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1836; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
1837; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
1838; GFX11-SDAG-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
1839; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
1840; GFX11-SDAG-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
1841; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
1842; GFX11-SDAG-NEXT:    v_med3_f32 v1, -v1, |v2|, -|v3|
1843; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
1844; GFX11-SDAG-NEXT:    s_endpgm
1845;
1846; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod012:
1847; GFX11-GISEL:       ; %bb.0:
1848; GFX11-GISEL-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
1849; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
1850; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1851; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1852; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1853; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
1854; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
1855; GFX11-GISEL-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
1856; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
1857; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
1858; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
1859; GFX11-GISEL-NEXT:    v_max_f32_e64 v1, -v1, -v1
1860; GFX11-GISEL-NEXT:    v_max_f32_e64 v3, -|v3|, -|v3|
1861; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1862; GFX11-GISEL-NEXT:    v_med3_f32 v1, v1, |v2|, v3
1863; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
1864; GFX11-GISEL-NEXT:    s_endpgm
1865  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1866  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
1867  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
1868  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
1869  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
1870  %a = load volatile float, ptr addrspace(1) %gep0
1871  %b = load volatile float, ptr addrspace(1) %gep1
1872  %c = load volatile float, ptr addrspace(1) %gep2
1873
1874  %a.fneg = fsub float -0.0, %a
1875  %b.fabs = call float @llvm.fabs.f32(float %b)
1876  %c.fabs = call float @llvm.fabs.f32(float %c)
1877  %c.fabs.fneg = fsub float -0.0, %c.fabs
1878
1879  %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b.fabs)
1880  %tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b.fabs)
1881  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
1882  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
1883
1884  store float %med3, ptr addrspace(1) %outgep
1885  ret void
1886}
1887
1888define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
1889; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_negabs012:
1890; SI-SDAG:       ; %bb.0:
1891; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
1892; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
1893; SI-SDAG-NEXT:    s_mov_b32 s10, 0
1894; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1895; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
1896; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
1897; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
1898; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1899; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
1900; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
1901; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
1902; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
1903; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1904; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
1905; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1906; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
1907; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1908; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
1909; SI-SDAG-NEXT:    v_med3_f32 v2, -|v2|, -|v3|, -|v4|
1910; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
1911; SI-SDAG-NEXT:    s_endpgm
1912;
1913; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_negabs012:
1914; SI-GISEL:       ; %bb.0:
1915; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
1916; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
1917; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
1918; SI-GISEL-NEXT:    s_mov_b32 s10, 0
1919; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
1920; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1921; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
1922; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
1923; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
1924; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
1925; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
1926; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
1927; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
1928; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
1929; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
1930; SI-GISEL-NEXT:    v_mul_f32_e64 v2, -1.0, |v2|
1931; SI-GISEL-NEXT:    v_mul_f32_e64 v3, -1.0, |v3|
1932; SI-GISEL-NEXT:    v_mul_f32_e64 v4, -1.0, |v4|
1933; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
1934; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
1935; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
1936; SI-GISEL-NEXT:    s_endpgm
1937;
1938; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_negabs012:
1939; VI-SDAG:       ; %bb.0:
1940; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
1941; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
1942; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1943; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
1944; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
1945; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1946; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
1947; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
1948; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
1949; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
1950; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
1951; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
1952; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
1953; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1954; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
1955; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1956; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
1957; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1958; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
1959; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
1960; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1961; VI-SDAG-NEXT:    v_med3_f32 v2, -|v7|, -|v2|, -|v3|
1962; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
1963; VI-SDAG-NEXT:    s_endpgm
1964;
1965; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_negabs012:
1966; VI-GISEL:       ; %bb.0:
1967; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
1968; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
1969; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1970; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
1971; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
1972; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
1973; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1974; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
1975; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
1976; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
1977; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
1978; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
1979; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
1980; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
1981; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
1982; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
1983; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
1984; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
1985; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
1986; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
1987; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
1988; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
1989; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
1990; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
1991; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1992; VI-GISEL-NEXT:    v_mul_f32_e64 v4, -1.0, |v7|
1993; VI-GISEL-NEXT:    v_mul_f32_e64 v2, -1.0, |v2|
1994; VI-GISEL-NEXT:    v_mul_f32_e64 v3, -1.0, |v3|
1995; VI-GISEL-NEXT:    v_med3_f32 v2, v4, v2, v3
1996; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
1997; VI-GISEL-NEXT:    s_endpgm
1998;
1999; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_negabs012:
2000; GFX9-SDAG:       ; %bb.0:
2001; GFX9-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
2002; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
2003; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2004; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[10:11] glc
2005; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
2006; GFX9-SDAG-NEXT:    global_load_dword v2, v0, s[12:13] glc
2007; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
2008; GFX9-SDAG-NEXT:    global_load_dword v3, v0, s[14:15] glc
2009; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
2010; GFX9-SDAG-NEXT:    v_med3_f32 v1, -|v1|, -|v2|, -|v3|
2011; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[8:9]
2012; GFX9-SDAG-NEXT:    s_endpgm
2013;
2014; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_negabs012:
2015; GFX9-GISEL:       ; %bb.0:
2016; GFX9-GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
2017; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
2018; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2019; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[10:11] glc
2020; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
2021; GFX9-GISEL-NEXT:    global_load_dword v2, v0, s[12:13] glc
2022; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
2023; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
2024; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
2025; GFX9-GISEL-NEXT:    v_max_f32_e64 v1, -|v1|, -|v1|
2026; GFX9-GISEL-NEXT:    v_max_f32_e64 v2, -|v2|, -|v2|
2027; GFX9-GISEL-NEXT:    v_max_f32_e64 v3, -|v3|, -|v3|
2028; GFX9-GISEL-NEXT:    v_med3_f32 v1, v1, v2, v3
2029; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
2030; GFX9-GISEL-NEXT:    s_endpgm
2031;
2032; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_negabs012:
2033; GFX11-SDAG:       ; %bb.0:
2034; GFX11-SDAG-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
2035; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
2036; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2037; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
2038; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2039; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
2040; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
2041; GFX11-SDAG-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
2042; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
2043; GFX11-SDAG-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
2044; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
2045; GFX11-SDAG-NEXT:    v_med3_f32 v1, -|v1|, -|v2|, -|v3|
2046; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
2047; GFX11-SDAG-NEXT:    s_endpgm
2048;
2049; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_negabs012:
2050; GFX11-GISEL:       ; %bb.0:
2051; GFX11-GISEL-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
2052; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
2053; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2054; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
2055; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2056; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
2057; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
2058; GFX11-GISEL-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
2059; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
2060; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
2061; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
2062; GFX11-GISEL-NEXT:    v_max_f32_e64 v1, -|v1|, -|v1|
2063; GFX11-GISEL-NEXT:    v_max_f32_e64 v2, -|v2|, -|v2|
2064; GFX11-GISEL-NEXT:    v_max_f32_e64 v3, -|v3|, -|v3|
2065; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2066; GFX11-GISEL-NEXT:    v_med3_f32 v1, v1, v2, v3
2067; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
2068; GFX11-GISEL-NEXT:    s_endpgm
2069  %tid = call i32 @llvm.amdgcn.workitem.id.x()
2070  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
2071  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
2072  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
2073  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
2074  %a = load volatile float, ptr addrspace(1) %gep0
2075  %b = load volatile float, ptr addrspace(1) %gep1
2076  %c = load volatile float, ptr addrspace(1) %gep2
2077
2078  %a.fabs = call float @llvm.fabs.f32(float %a)
2079  %a.fabs.fneg = fsub float -0.0, %a.fabs
2080  %b.fabs = call float @llvm.fabs.f32(float %b)
2081  %b.fabs.fneg = fsub float -0.0, %b.fabs
2082  %c.fabs = call float @llvm.fabs.f32(float %c)
2083  %c.fabs.fneg = fsub float -0.0, %c.fabs
2084
2085  %tmp0 = call float @llvm.minnum.f32(float %a.fabs.fneg, float %b.fabs.fneg)
2086  %tmp1 = call float @llvm.maxnum.f32(float %a.fabs.fneg, float %b.fabs.fneg)
2087  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
2088  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
2089
2090  store float %med3, ptr addrspace(1) %outgep
2091  ret void
2092}
2093
2094define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
2095; SI-SDAG-LABEL: v_nnan_inputs_med3_f32_pat0:
2096; SI-SDAG:       ; %bb.0:
2097; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
2098; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
2099; SI-SDAG-NEXT:    s_mov_b32 s10, 0
2100; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
2101; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
2102; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
2103; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
2104; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2105; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
2106; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
2107; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
2108; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
2109; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2110; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
2111; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2112; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
2113; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2114; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
2115; SI-SDAG-NEXT:    v_add_f32_e32 v2, 1.0, v2
2116; SI-SDAG-NEXT:    v_add_f32_e32 v3, 2.0, v3
2117; SI-SDAG-NEXT:    v_add_f32_e32 v4, 4.0, v4
2118; SI-SDAG-NEXT:    v_med3_f32 v2, v2, v3, v4
2119; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
2120; SI-SDAG-NEXT:    s_endpgm
2121;
2122; SI-GISEL-LABEL: v_nnan_inputs_med3_f32_pat0:
2123; SI-GISEL:       ; %bb.0:
2124; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
2125; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
2126; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
2127; SI-GISEL-NEXT:    s_mov_b32 s10, 0
2128; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
2129; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2130; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
2131; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
2132; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2133; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
2134; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
2135; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2136; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
2137; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
2138; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2139; SI-GISEL-NEXT:    v_add_f32_e32 v2, 1.0, v2
2140; SI-GISEL-NEXT:    v_add_f32_e32 v3, 2.0, v3
2141; SI-GISEL-NEXT:    v_add_f32_e32 v4, 4.0, v4
2142; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
2143; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
2144; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
2145; SI-GISEL-NEXT:    s_endpgm
2146;
2147; VI-SDAG-LABEL: v_nnan_inputs_med3_f32_pat0:
2148; VI-SDAG:       ; %bb.0:
2149; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
2150; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
2151; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2152; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
2153; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
2154; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2155; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
2156; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
2157; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
2158; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
2159; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
2160; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
2161; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
2162; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2163; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
2164; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2165; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
2166; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2167; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
2168; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
2169; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2170; VI-SDAG-NEXT:    v_add_f32_e32 v4, 1.0, v7
2171; VI-SDAG-NEXT:    v_add_f32_e32 v2, 2.0, v2
2172; VI-SDAG-NEXT:    v_add_f32_e32 v3, 4.0, v3
2173; VI-SDAG-NEXT:    v_med3_f32 v2, v4, v2, v3
2174; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
2175; VI-SDAG-NEXT:    s_endpgm
2176;
2177; VI-GISEL-LABEL: v_nnan_inputs_med3_f32_pat0:
2178; VI-GISEL:       ; %bb.0:
2179; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
2180; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
2181; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2182; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
2183; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
2184; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
2185; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2186; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
2187; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
2188; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
2189; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
2190; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
2191; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
2192; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
2193; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
2194; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
2195; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2196; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
2197; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2198; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
2199; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2200; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
2201; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
2202; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
2203; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2204; VI-GISEL-NEXT:    v_add_f32_e32 v4, 1.0, v7
2205; VI-GISEL-NEXT:    v_add_f32_e32 v2, 2.0, v2
2206; VI-GISEL-NEXT:    v_add_f32_e32 v3, 4.0, v3
2207; VI-GISEL-NEXT:    v_med3_f32 v2, v4, v2, v3
2208; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
2209; VI-GISEL-NEXT:    s_endpgm
2210;
2211; GFX9-LABEL: v_nnan_inputs_med3_f32_pat0:
2212; GFX9:       ; %bb.0:
2213; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
2214; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
2215; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2216; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
2217; GFX9-NEXT:    s_waitcnt vmcnt(0)
2218; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
2219; GFX9-NEXT:    s_waitcnt vmcnt(0)
2220; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
2221; GFX9-NEXT:    s_waitcnt vmcnt(0)
2222; GFX9-NEXT:    v_add_f32_e32 v1, 1.0, v1
2223; GFX9-NEXT:    v_add_f32_e32 v2, 2.0, v2
2224; GFX9-NEXT:    v_add_f32_e32 v3, 4.0, v3
2225; GFX9-NEXT:    v_med3_f32 v1, v1, v2, v3
2226; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
2227; GFX9-NEXT:    s_endpgm
2228;
2229; GFX11-LABEL: v_nnan_inputs_med3_f32_pat0:
2230; GFX11:       ; %bb.0:
2231; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
2232; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
2233; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2234; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
2235; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
2236; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
2237; GFX11-NEXT:    s_waitcnt vmcnt(0)
2238; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
2239; GFX11-NEXT:    s_waitcnt vmcnt(0)
2240; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
2241; GFX11-NEXT:    s_waitcnt vmcnt(0)
2242; GFX11-NEXT:    v_dual_add_f32 v1, 1.0, v1 :: v_dual_add_f32 v2, 2.0, v2
2243; GFX11-NEXT:    v_add_f32_e32 v3, 4.0, v3
2244; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2245; GFX11-NEXT:    v_med3_f32 v1, v1, v2, v3
2246; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
2247; GFX11-NEXT:    s_endpgm
2248  %tid = call i32 @llvm.amdgcn.workitem.id.x()
2249  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
2250  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
2251  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
2252  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
2253  %a = load volatile float, ptr addrspace(1) %gep0
2254  %b = load volatile float, ptr addrspace(1) %gep1
2255  %c = load volatile float, ptr addrspace(1) %gep2
2256
2257  %a.nnan = fadd nnan float %a, 1.0
2258  %b.nnan = fadd nnan float %b, 2.0
2259  %c.nnan = fadd nnan float %c, 4.0
2260
2261  %tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan)
2262  %tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan)
2263  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan)
2264  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
2265  store float %med3, ptr addrspace(1) %outgep
2266  ret void
2267}
2268
2269define amdgpu_kernel void @v_nnan_input_calls_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
2270; SI-SDAG-LABEL: v_nnan_input_calls_med3_f32_pat0:
2271; SI-SDAG:       ; %bb.0:
2272; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
2273; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
2274; SI-SDAG-NEXT:    s_mov_b32 s10, 0
2275; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
2276; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
2277; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
2278; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
2279; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2280; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
2281; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
2282; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
2283; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
2284; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2285; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
2286; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2287; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
2288; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2289; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
2290; SI-SDAG-NEXT:    v_med3_f32 v2, v2, v3, v4
2291; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
2292; SI-SDAG-NEXT:    s_endpgm
2293;
2294; SI-GISEL-LABEL: v_nnan_input_calls_med3_f32_pat0:
2295; SI-GISEL:       ; %bb.0:
2296; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
2297; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
2298; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
2299; SI-GISEL-NEXT:    s_mov_b32 s10, 0
2300; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
2301; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2302; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
2303; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
2304; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2305; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
2306; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
2307; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2308; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
2309; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
2310; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2311; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
2312; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
2313; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
2314; SI-GISEL-NEXT:    s_endpgm
2315;
2316; VI-SDAG-LABEL: v_nnan_input_calls_med3_f32_pat0:
2317; VI-SDAG:       ; %bb.0:
2318; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
2319; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
2320; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2321; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
2322; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
2323; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2324; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
2325; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
2326; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
2327; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
2328; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
2329; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
2330; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
2331; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2332; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
2333; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2334; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
2335; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2336; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
2337; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
2338; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2339; VI-SDAG-NEXT:    v_med3_f32 v2, v7, v2, v3
2340; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
2341; VI-SDAG-NEXT:    s_endpgm
2342;
2343; VI-GISEL-LABEL: v_nnan_input_calls_med3_f32_pat0:
2344; VI-GISEL:       ; %bb.0:
2345; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
2346; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
2347; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2348; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
2349; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
2350; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
2351; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2352; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
2353; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
2354; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
2355; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
2356; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
2357; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
2358; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
2359; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
2360; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
2361; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2362; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
2363; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2364; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
2365; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2366; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
2367; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
2368; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
2369; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2370; VI-GISEL-NEXT:    v_med3_f32 v2, v7, v2, v3
2371; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
2372; VI-GISEL-NEXT:    s_endpgm
2373;
2374; GFX9-LABEL: v_nnan_input_calls_med3_f32_pat0:
2375; GFX9:       ; %bb.0:
2376; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
2377; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
2378; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2379; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
2380; GFX9-NEXT:    s_waitcnt vmcnt(0)
2381; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
2382; GFX9-NEXT:    s_waitcnt vmcnt(0)
2383; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
2384; GFX9-NEXT:    s_waitcnt vmcnt(0)
2385; GFX9-NEXT:    v_med3_f32 v1, v1, v2, v3
2386; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
2387; GFX9-NEXT:    s_endpgm
2388;
2389; GFX11-LABEL: v_nnan_input_calls_med3_f32_pat0:
2390; GFX11:       ; %bb.0:
2391; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
2392; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
2393; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2394; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
2395; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
2396; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
2397; GFX11-NEXT:    s_waitcnt vmcnt(0)
2398; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
2399; GFX11-NEXT:    s_waitcnt vmcnt(0)
2400; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
2401; GFX11-NEXT:    s_waitcnt vmcnt(0)
2402; GFX11-NEXT:    v_med3_f32 v1, v1, v2, v3
2403; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
2404; GFX11-NEXT:    s_endpgm
2405  %tid = call i32 @llvm.amdgcn.workitem.id.x()
2406  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
2407  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
2408  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
2409  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
2410  %a = load volatile float, ptr addrspace(1) %gep0
2411  %b = load volatile float, ptr addrspace(1) %gep1
2412  %c = load volatile float, ptr addrspace(1) %gep2
2413  %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b)
2414  %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b)
2415  %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c)
2416  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
2417  store float %med3, ptr addrspace(1) %outgep
2418  ret void
2419}
2420
2421define amdgpu_kernel void @v_nnan_call_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
2422; SI-SDAG-LABEL: v_nnan_call_med3_f32_pat0:
2423; SI-SDAG:       ; %bb.0:
2424; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
2425; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
2426; SI-SDAG-NEXT:    s_mov_b32 s10, 0
2427; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
2428; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
2429; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
2430; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
2431; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2432; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
2433; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
2434; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
2435; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
2436; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2437; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
2438; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2439; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
2440; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2441; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
2442; SI-SDAG-NEXT:    v_med3_f32 v2, v2, v3, v4
2443; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
2444; SI-SDAG-NEXT:    s_endpgm
2445;
2446; SI-GISEL-LABEL: v_nnan_call_med3_f32_pat0:
2447; SI-GISEL:       ; %bb.0:
2448; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
2449; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
2450; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
2451; SI-GISEL-NEXT:    s_mov_b32 s10, 0
2452; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
2453; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2454; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
2455; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
2456; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2457; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
2458; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
2459; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2460; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
2461; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
2462; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2463; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
2464; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
2465; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
2466; SI-GISEL-NEXT:    s_endpgm
2467;
2468; VI-SDAG-LABEL: v_nnan_call_med3_f32_pat0:
2469; VI-SDAG:       ; %bb.0:
2470; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
2471; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
2472; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2473; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
2474; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
2475; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2476; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
2477; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
2478; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
2479; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
2480; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
2481; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
2482; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
2483; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2484; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
2485; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2486; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
2487; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2488; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
2489; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
2490; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2491; VI-SDAG-NEXT:    v_med3_f32 v2, v7, v2, v3
2492; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
2493; VI-SDAG-NEXT:    s_endpgm
2494;
2495; VI-GISEL-LABEL: v_nnan_call_med3_f32_pat0:
2496; VI-GISEL:       ; %bb.0:
2497; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
2498; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
2499; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2500; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
2501; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
2502; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
2503; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2504; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
2505; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
2506; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
2507; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
2508; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
2509; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
2510; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
2511; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
2512; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
2513; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2514; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
2515; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2516; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
2517; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2518; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
2519; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
2520; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
2521; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2522; VI-GISEL-NEXT:    v_med3_f32 v2, v7, v2, v3
2523; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
2524; VI-GISEL-NEXT:    s_endpgm
2525;
2526; GFX9-LABEL: v_nnan_call_med3_f32_pat0:
2527; GFX9:       ; %bb.0:
2528; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
2529; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
2530; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2531; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
2532; GFX9-NEXT:    s_waitcnt vmcnt(0)
2533; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
2534; GFX9-NEXT:    s_waitcnt vmcnt(0)
2535; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
2536; GFX9-NEXT:    s_waitcnt vmcnt(0)
2537; GFX9-NEXT:    v_med3_f32 v1, v1, v2, v3
2538; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
2539; GFX9-NEXT:    s_endpgm
2540;
2541; GFX11-LABEL: v_nnan_call_med3_f32_pat0:
2542; GFX11:       ; %bb.0:
2543; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
2544; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
2545; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2546; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
2547; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
2548; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
2549; GFX11-NEXT:    s_waitcnt vmcnt(0)
2550; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
2551; GFX11-NEXT:    s_waitcnt vmcnt(0)
2552; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
2553; GFX11-NEXT:    s_waitcnt vmcnt(0)
2554; GFX11-NEXT:    v_med3_f32 v1, v1, v2, v3
2555; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
2556; GFX11-NEXT:    s_endpgm
2557  %tid = call i32 @llvm.amdgcn.workitem.id.x()
2558  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
2559  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
2560  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
2561  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
2562  %a = load volatile float, ptr addrspace(1) %gep0
2563  %b = load volatile float, ptr addrspace(1) %gep1
2564  %c = load volatile float, ptr addrspace(1) %gep2
2565  %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b)
2566  %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b)
2567  %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c)
2568  %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
2569  store float %med3, ptr addrspace(1) %outgep
2570  ret void
2571}
2572
2573define amdgpu_kernel void @v_fast_call_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
2574; SI-SDAG-LABEL: v_fast_call_med3_f32_pat0:
2575; SI-SDAG:       ; %bb.0:
2576; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
2577; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
2578; SI-SDAG-NEXT:    s_mov_b32 s10, 0
2579; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
2580; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
2581; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
2582; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
2583; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2584; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
2585; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
2586; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
2587; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
2588; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2589; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
2590; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2591; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
2592; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2593; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
2594; SI-SDAG-NEXT:    v_med3_f32 v2, v2, v3, v4
2595; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
2596; SI-SDAG-NEXT:    s_endpgm
2597;
2598; SI-GISEL-LABEL: v_fast_call_med3_f32_pat0:
2599; SI-GISEL:       ; %bb.0:
2600; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
2601; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
2602; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
2603; SI-GISEL-NEXT:    s_mov_b32 s10, 0
2604; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
2605; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2606; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
2607; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
2608; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2609; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
2610; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
2611; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2612; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
2613; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
2614; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2615; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
2616; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
2617; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
2618; SI-GISEL-NEXT:    s_endpgm
2619;
2620; VI-SDAG-LABEL: v_fast_call_med3_f32_pat0:
2621; VI-SDAG:       ; %bb.0:
2622; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
2623; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
2624; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2625; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
2626; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
2627; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2628; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
2629; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
2630; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
2631; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
2632; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
2633; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
2634; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
2635; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2636; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
2637; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2638; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
2639; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2640; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
2641; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
2642; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2643; VI-SDAG-NEXT:    v_med3_f32 v2, v7, v2, v3
2644; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
2645; VI-SDAG-NEXT:    s_endpgm
2646;
2647; VI-GISEL-LABEL: v_fast_call_med3_f32_pat0:
2648; VI-GISEL:       ; %bb.0:
2649; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
2650; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
2651; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2652; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
2653; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
2654; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
2655; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2656; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
2657; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
2658; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
2659; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
2660; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
2661; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
2662; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
2663; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
2664; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
2665; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2666; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
2667; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2668; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
2669; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2670; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
2671; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
2672; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
2673; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2674; VI-GISEL-NEXT:    v_med3_f32 v2, v7, v2, v3
2675; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
2676; VI-GISEL-NEXT:    s_endpgm
2677;
2678; GFX9-LABEL: v_fast_call_med3_f32_pat0:
2679; GFX9:       ; %bb.0:
2680; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
2681; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
2682; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2683; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
2684; GFX9-NEXT:    s_waitcnt vmcnt(0)
2685; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
2686; GFX9-NEXT:    s_waitcnt vmcnt(0)
2687; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
2688; GFX9-NEXT:    s_waitcnt vmcnt(0)
2689; GFX9-NEXT:    v_med3_f32 v1, v1, v2, v3
2690; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
2691; GFX9-NEXT:    s_endpgm
2692;
2693; GFX11-LABEL: v_fast_call_med3_f32_pat0:
2694; GFX11:       ; %bb.0:
2695; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
2696; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
2697; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2698; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
2699; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
2700; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
2701; GFX11-NEXT:    s_waitcnt vmcnt(0)
2702; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
2703; GFX11-NEXT:    s_waitcnt vmcnt(0)
2704; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
2705; GFX11-NEXT:    s_waitcnt vmcnt(0)
2706; GFX11-NEXT:    v_med3_f32 v1, v1, v2, v3
2707; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
2708; GFX11-NEXT:    s_endpgm
2709  %tid = call i32 @llvm.amdgcn.workitem.id.x()
2710  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
2711  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
2712  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
2713  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
2714  %a = load volatile float, ptr addrspace(1) %gep0
2715  %b = load volatile float, ptr addrspace(1) %gep1
2716  %c = load volatile float, ptr addrspace(1) %gep2
2717  %tmp0 = call fast float @llvm.minnum.f32(float %a, float %b)
2718  %tmp1 = call fast float @llvm.maxnum.f32(float %a, float %b)
2719  %tmp2 = call fast float @llvm.minnum.f32(float %tmp1, float %c)
2720  %med3 = call fast float @llvm.maxnum.f32(float %tmp0, float %tmp2)
2721  store float %med3, ptr addrspace(1) %outgep
2722  ret void
2723}
2724
2725; 16 combinations
2726
2727; 0: max(min(x, y), min(max(x, y), z))
2728; 1: max(min(x, y), min(max(y, x), z))
2729; 2: max(min(x, y), min(z, max(x, y)))
2730; 3: max(min(x, y), min(z, max(y, x)))
2731; 4: max(min(y, x), min(max(x, y), z))
2732; 5: max(min(y, x), min(max(y, x), z))
2733; 6: max(min(y, x), min(z, max(x, y)))
2734; 7: max(min(y, x), min(z, max(y, x)))
2735; + commute outermost max
2736
2737define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
2738; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0:
2739; SI-SDAG:       ; %bb.0:
2740; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
2741; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
2742; SI-SDAG-NEXT:    s_mov_b32 s10, 0
2743; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
2744; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
2745; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
2746; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
2747; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2748; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
2749; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
2750; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
2751; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
2752; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2753; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
2754; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2755; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
2756; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2757; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
2758; SI-SDAG-NEXT:    v_med3_f32 v2, v2, v3, v4
2759; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
2760; SI-SDAG-NEXT:    s_endpgm
2761;
2762; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0:
2763; SI-GISEL:       ; %bb.0:
2764; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
2765; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
2766; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
2767; SI-GISEL-NEXT:    s_mov_b32 s10, 0
2768; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
2769; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2770; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
2771; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
2772; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2773; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
2774; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
2775; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2776; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
2777; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
2778; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2779; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
2780; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
2781; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
2782; SI-GISEL-NEXT:    s_endpgm
2783;
2784; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0:
2785; VI-SDAG:       ; %bb.0:
2786; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
2787; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
2788; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2789; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
2790; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
2791; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2792; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
2793; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
2794; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
2795; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
2796; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
2797; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
2798; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
2799; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2800; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
2801; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2802; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
2803; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2804; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
2805; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
2806; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2807; VI-SDAG-NEXT:    v_med3_f32 v2, v7, v2, v3
2808; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
2809; VI-SDAG-NEXT:    s_endpgm
2810;
2811; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0:
2812; VI-GISEL:       ; %bb.0:
2813; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
2814; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
2815; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2816; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
2817; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
2818; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
2819; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2820; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
2821; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
2822; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
2823; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
2824; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
2825; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
2826; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
2827; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
2828; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
2829; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2830; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
2831; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2832; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
2833; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2834; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
2835; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
2836; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
2837; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2838; VI-GISEL-NEXT:    v_med3_f32 v2, v7, v2, v3
2839; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
2840; VI-GISEL-NEXT:    s_endpgm
2841;
2842; GFX9-LABEL: v_test_global_nnans_med3_f32_pat0:
2843; GFX9:       ; %bb.0:
2844; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
2845; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
2846; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2847; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
2848; GFX9-NEXT:    s_waitcnt vmcnt(0)
2849; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
2850; GFX9-NEXT:    s_waitcnt vmcnt(0)
2851; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
2852; GFX9-NEXT:    s_waitcnt vmcnt(0)
2853; GFX9-NEXT:    v_med3_f32 v1, v1, v2, v3
2854; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
2855; GFX9-NEXT:    s_endpgm
2856;
2857; GFX11-LABEL: v_test_global_nnans_med3_f32_pat0:
2858; GFX11:       ; %bb.0:
2859; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
2860; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
2861; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2862; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
2863; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
2864; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
2865; GFX11-NEXT:    s_waitcnt vmcnt(0)
2866; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
2867; GFX11-NEXT:    s_waitcnt vmcnt(0)
2868; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
2869; GFX11-NEXT:    s_waitcnt vmcnt(0)
2870; GFX11-NEXT:    v_med3_f32 v1, v1, v2, v3
2871; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
2872; GFX11-NEXT:    s_endpgm
2873  %tid = call i32 @llvm.amdgcn.workitem.id.x()
2874  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
2875  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
2876  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
2877  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
2878  %a = load volatile float, ptr addrspace(1) %gep0
2879  %b = load volatile float, ptr addrspace(1) %gep1
2880  %c = load volatile float, ptr addrspace(1) %gep2
2881  %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
2882  %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
2883  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
2884  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
2885  store float %med3, ptr addrspace(1) %outgep
2886  ret void
2887}
2888
2889define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
2890; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat1:
2891; SI-SDAG:       ; %bb.0:
2892; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
2893; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
2894; SI-SDAG-NEXT:    s_mov_b32 s10, 0
2895; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
2896; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
2897; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
2898; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
2899; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2900; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
2901; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
2902; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
2903; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
2904; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2905; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
2906; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2907; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
2908; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2909; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
2910; SI-SDAG-NEXT:    v_med3_f32 v2, v2, v3, v4
2911; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
2912; SI-SDAG-NEXT:    s_endpgm
2913;
2914; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat1:
2915; SI-GISEL:       ; %bb.0:
2916; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
2917; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
2918; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
2919; SI-GISEL-NEXT:    s_mov_b32 s10, 0
2920; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
2921; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2922; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
2923; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
2924; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2925; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
2926; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
2927; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2928; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
2929; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
2930; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2931; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
2932; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
2933; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
2934; SI-GISEL-NEXT:    s_endpgm
2935;
2936; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat1:
2937; VI-SDAG:       ; %bb.0:
2938; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
2939; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
2940; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
2941; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
2942; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
2943; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2944; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
2945; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
2946; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
2947; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
2948; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
2949; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
2950; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
2951; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2952; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
2953; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2954; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
2955; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
2956; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
2957; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
2958; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2959; VI-SDAG-NEXT:    v_med3_f32 v2, v7, v2, v3
2960; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
2961; VI-SDAG-NEXT:    s_endpgm
2962;
2963; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat1:
2964; VI-GISEL:       ; %bb.0:
2965; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
2966; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
2967; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
2968; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
2969; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
2970; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
2971; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2972; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
2973; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
2974; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
2975; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
2976; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
2977; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
2978; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
2979; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
2980; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
2981; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2982; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
2983; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2984; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
2985; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
2986; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
2987; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
2988; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
2989; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2990; VI-GISEL-NEXT:    v_med3_f32 v2, v7, v2, v3
2991; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
2992; VI-GISEL-NEXT:    s_endpgm
2993;
2994; GFX9-LABEL: v_test_global_nnans_med3_f32_pat1:
2995; GFX9:       ; %bb.0:
2996; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
2997; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
2998; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2999; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
3000; GFX9-NEXT:    s_waitcnt vmcnt(0)
3001; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
3002; GFX9-NEXT:    s_waitcnt vmcnt(0)
3003; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
3004; GFX9-NEXT:    s_waitcnt vmcnt(0)
3005; GFX9-NEXT:    v_med3_f32 v1, v1, v2, v3
3006; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
3007; GFX9-NEXT:    s_endpgm
3008;
3009; GFX11-LABEL: v_test_global_nnans_med3_f32_pat1:
3010; GFX11:       ; %bb.0:
3011; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
3012; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
3013; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3014; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
3015; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
3016; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
3017; GFX11-NEXT:    s_waitcnt vmcnt(0)
3018; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
3019; GFX11-NEXT:    s_waitcnt vmcnt(0)
3020; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
3021; GFX11-NEXT:    s_waitcnt vmcnt(0)
3022; GFX11-NEXT:    v_med3_f32 v1, v1, v2, v3
3023; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
3024; GFX11-NEXT:    s_endpgm
3025  %tid = call i32 @llvm.amdgcn.workitem.id.x()
3026  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
3027  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
3028  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
3029  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
3030  %a = load volatile float, ptr addrspace(1) %gep0
3031  %b = load volatile float, ptr addrspace(1) %gep1
3032  %c = load volatile float, ptr addrspace(1) %gep2
3033  %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
3034  %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
3035  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
3036  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
3037  store float %med3, ptr addrspace(1) %outgep
3038  ret void
3039}
3040
3041define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1_srcmod0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
3042; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat1_srcmod0:
3043; SI-SDAG:       ; %bb.0:
3044; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
3045; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
3046; SI-SDAG-NEXT:    s_mov_b32 s10, 0
3047; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
3048; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
3049; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
3050; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
3051; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3052; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
3053; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
3054; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
3055; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
3056; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3057; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
3058; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3059; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
3060; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3061; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
3062; SI-SDAG-NEXT:    v_med3_f32 v2, -v2, v3, v4
3063; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
3064; SI-SDAG-NEXT:    s_endpgm
3065;
3066; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat1_srcmod0:
3067; SI-GISEL:       ; %bb.0:
3068; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
3069; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
3070; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
3071; SI-GISEL-NEXT:    s_mov_b32 s10, 0
3072; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
3073; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
3074; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
3075; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
3076; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3077; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
3078; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
3079; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3080; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
3081; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
3082; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3083; SI-GISEL-NEXT:    v_mul_f32_e32 v2, -1.0, v2
3084; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
3085; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
3086; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
3087; SI-GISEL-NEXT:    s_endpgm
3088;
3089; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat1_srcmod0:
3090; VI-SDAG:       ; %bb.0:
3091; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
3092; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
3093; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3094; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
3095; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
3096; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3097; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
3098; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
3099; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
3100; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
3101; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
3102; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
3103; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
3104; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3105; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
3106; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3107; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
3108; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3109; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
3110; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
3111; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3112; VI-SDAG-NEXT:    v_med3_f32 v2, -v7, v2, v3
3113; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
3114; VI-SDAG-NEXT:    s_endpgm
3115;
3116; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat1_srcmod0:
3117; VI-GISEL:       ; %bb.0:
3118; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
3119; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
3120; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
3121; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
3122; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
3123; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
3124; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3125; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
3126; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
3127; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
3128; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
3129; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
3130; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
3131; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
3132; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
3133; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
3134; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3135; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
3136; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3137; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
3138; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3139; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
3140; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
3141; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
3142; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3143; VI-GISEL-NEXT:    v_mul_f32_e32 v4, -1.0, v7
3144; VI-GISEL-NEXT:    v_med3_f32 v2, v4, v2, v3
3145; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
3146; VI-GISEL-NEXT:    s_endpgm
3147;
3148; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat1_srcmod0:
3149; GFX9-SDAG:       ; %bb.0:
3150; GFX9-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
3151; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
3152; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3153; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[10:11] glc
3154; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
3155; GFX9-SDAG-NEXT:    global_load_dword v2, v0, s[12:13] glc
3156; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
3157; GFX9-SDAG-NEXT:    global_load_dword v3, v0, s[14:15] glc
3158; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
3159; GFX9-SDAG-NEXT:    v_med3_f32 v1, -v1, v2, v3
3160; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[8:9]
3161; GFX9-SDAG-NEXT:    s_endpgm
3162;
3163; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat1_srcmod0:
3164; GFX9-GISEL:       ; %bb.0:
3165; GFX9-GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
3166; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
3167; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
3168; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[10:11] glc
3169; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
3170; GFX9-GISEL-NEXT:    global_load_dword v2, v0, s[12:13] glc
3171; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
3172; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
3173; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
3174; GFX9-GISEL-NEXT:    v_max_f32_e64 v1, -v1, -v1
3175; GFX9-GISEL-NEXT:    v_med3_f32 v1, v1, v2, v3
3176; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
3177; GFX9-GISEL-NEXT:    s_endpgm
3178;
3179; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat1_srcmod0:
3180; GFX11-SDAG:       ; %bb.0:
3181; GFX11-SDAG-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
3182; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
3183; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3184; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
3185; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3186; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
3187; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
3188; GFX11-SDAG-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
3189; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
3190; GFX11-SDAG-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
3191; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
3192; GFX11-SDAG-NEXT:    v_med3_f32 v1, -v1, v2, v3
3193; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
3194; GFX11-SDAG-NEXT:    s_endpgm
3195;
3196; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat1_srcmod0:
3197; GFX11-GISEL:       ; %bb.0:
3198; GFX11-GISEL-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
3199; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
3200; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3201; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
3202; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
3203; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
3204; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
3205; GFX11-GISEL-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
3206; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
3207; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
3208; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
3209; GFX11-GISEL-NEXT:    v_max_f32_e64 v1, -v1, -v1
3210; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3211; GFX11-GISEL-NEXT:    v_med3_f32 v1, v1, v2, v3
3212; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
3213; GFX11-GISEL-NEXT:    s_endpgm
3214  %tid = call i32 @llvm.amdgcn.workitem.id.x()
3215  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
3216  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
3217  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
3218  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
3219  %a = load volatile float, ptr addrspace(1) %gep0
3220  %b = load volatile float, ptr addrspace(1) %gep1
3221  %c = load volatile float, ptr addrspace(1) %gep2
3222  %a.fneg = fsub float -0.0, %a
3223  %tmp0 = call float @llvm.maxnum.f32(float %a.fneg, float %b)
3224  %tmp1 = call float @llvm.minnum.f32(float %a.fneg, float %b)
3225  %tmp2 = call float @llvm.maxnum.f32(float %tmp1, float %c)
3226  %med3 = call float @llvm.minnum.f32(float %tmp0, float %tmp2)
3227  store float %med3, ptr addrspace(1) %outgep
3228  ret void
3229}
3230
3231define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat2(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
3232; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat2:
3233; SI-SDAG:       ; %bb.0:
3234; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
3235; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
3236; SI-SDAG-NEXT:    s_mov_b32 s10, 0
3237; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
3238; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
3239; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
3240; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
3241; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3242; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
3243; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
3244; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
3245; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
3246; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3247; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
3248; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3249; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
3250; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3251; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
3252; SI-SDAG-NEXT:    v_med3_f32 v2, v2, v3, v4
3253; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
3254; SI-SDAG-NEXT:    s_endpgm
3255;
3256; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat2:
3257; SI-GISEL:       ; %bb.0:
3258; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
3259; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
3260; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
3261; SI-GISEL-NEXT:    s_mov_b32 s10, 0
3262; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
3263; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
3264; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
3265; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
3266; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3267; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
3268; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
3269; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3270; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
3271; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
3272; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3273; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
3274; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
3275; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
3276; SI-GISEL-NEXT:    s_endpgm
3277;
3278; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat2:
3279; VI-SDAG:       ; %bb.0:
3280; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
3281; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
3282; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3283; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
3284; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
3285; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3286; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
3287; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
3288; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
3289; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
3290; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
3291; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
3292; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
3293; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3294; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
3295; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3296; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
3297; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3298; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
3299; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
3300; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3301; VI-SDAG-NEXT:    v_med3_f32 v2, v7, v2, v3
3302; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
3303; VI-SDAG-NEXT:    s_endpgm
3304;
3305; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat2:
3306; VI-GISEL:       ; %bb.0:
3307; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
3308; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
3309; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
3310; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
3311; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
3312; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
3313; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3314; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
3315; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
3316; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
3317; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
3318; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
3319; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
3320; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
3321; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
3322; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
3323; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3324; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
3325; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3326; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
3327; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3328; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
3329; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
3330; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
3331; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3332; VI-GISEL-NEXT:    v_med3_f32 v2, v7, v2, v3
3333; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
3334; VI-GISEL-NEXT:    s_endpgm
3335;
3336; GFX9-LABEL: v_test_global_nnans_med3_f32_pat2:
3337; GFX9:       ; %bb.0:
3338; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
3339; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
3340; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
3341; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
3342; GFX9-NEXT:    s_waitcnt vmcnt(0)
3343; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
3344; GFX9-NEXT:    s_waitcnt vmcnt(0)
3345; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
3346; GFX9-NEXT:    s_waitcnt vmcnt(0)
3347; GFX9-NEXT:    v_med3_f32 v1, v1, v2, v3
3348; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
3349; GFX9-NEXT:    s_endpgm
3350;
3351; GFX11-LABEL: v_test_global_nnans_med3_f32_pat2:
3352; GFX11:       ; %bb.0:
3353; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
3354; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
3355; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3356; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
3357; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
3358; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
3359; GFX11-NEXT:    s_waitcnt vmcnt(0)
3360; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
3361; GFX11-NEXT:    s_waitcnt vmcnt(0)
3362; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
3363; GFX11-NEXT:    s_waitcnt vmcnt(0)
3364; GFX11-NEXT:    v_med3_f32 v1, v1, v2, v3
3365; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
3366; GFX11-NEXT:    s_endpgm
3367  %tid = call i32 @llvm.amdgcn.workitem.id.x()
3368  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
3369  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
3370  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
3371  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
3372  %a = load volatile float, ptr addrspace(1) %gep0
3373  %b = load volatile float, ptr addrspace(1) %gep1
3374  %c = load volatile float, ptr addrspace(1) %gep2
3375  %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
3376  %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
3377  %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
3378  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
3379  store float %med3, ptr addrspace(1) %outgep
3380  ret void
3381}
3382
3383define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat3(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
3384; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat3:
3385; SI-SDAG:       ; %bb.0:
3386; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
3387; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
3388; SI-SDAG-NEXT:    s_mov_b32 s10, 0
3389; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
3390; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
3391; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
3392; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
3393; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3394; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
3395; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
3396; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
3397; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
3398; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3399; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
3400; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3401; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
3402; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3403; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
3404; SI-SDAG-NEXT:    v_med3_f32 v2, v2, v3, v4
3405; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
3406; SI-SDAG-NEXT:    s_endpgm
3407;
3408; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat3:
3409; SI-GISEL:       ; %bb.0:
3410; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
3411; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
3412; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
3413; SI-GISEL-NEXT:    s_mov_b32 s10, 0
3414; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
3415; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
3416; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
3417; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
3418; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3419; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
3420; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
3421; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3422; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
3423; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
3424; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3425; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
3426; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
3427; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
3428; SI-GISEL-NEXT:    s_endpgm
3429;
3430; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat3:
3431; VI-SDAG:       ; %bb.0:
3432; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
3433; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
3434; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3435; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
3436; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
3437; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3438; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
3439; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
3440; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
3441; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
3442; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
3443; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
3444; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
3445; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3446; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
3447; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3448; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
3449; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3450; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
3451; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
3452; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3453; VI-SDAG-NEXT:    v_med3_f32 v2, v7, v2, v3
3454; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
3455; VI-SDAG-NEXT:    s_endpgm
3456;
3457; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat3:
3458; VI-GISEL:       ; %bb.0:
3459; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
3460; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
3461; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
3462; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
3463; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
3464; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
3465; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3466; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
3467; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
3468; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
3469; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
3470; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
3471; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
3472; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
3473; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
3474; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
3475; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3476; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
3477; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3478; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
3479; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3480; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
3481; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
3482; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
3483; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3484; VI-GISEL-NEXT:    v_med3_f32 v2, v7, v2, v3
3485; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
3486; VI-GISEL-NEXT:    s_endpgm
3487;
3488; GFX9-LABEL: v_test_global_nnans_med3_f32_pat3:
3489; GFX9:       ; %bb.0:
3490; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
3491; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
3492; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
3493; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
3494; GFX9-NEXT:    s_waitcnt vmcnt(0)
3495; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
3496; GFX9-NEXT:    s_waitcnt vmcnt(0)
3497; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
3498; GFX9-NEXT:    s_waitcnt vmcnt(0)
3499; GFX9-NEXT:    v_med3_f32 v1, v1, v2, v3
3500; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
3501; GFX9-NEXT:    s_endpgm
3502;
3503; GFX11-LABEL: v_test_global_nnans_med3_f32_pat3:
3504; GFX11:       ; %bb.0:
3505; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
3506; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
3507; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3508; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
3509; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
3510; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
3511; GFX11-NEXT:    s_waitcnt vmcnt(0)
3512; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
3513; GFX11-NEXT:    s_waitcnt vmcnt(0)
3514; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
3515; GFX11-NEXT:    s_waitcnt vmcnt(0)
3516; GFX11-NEXT:    v_med3_f32 v1, v1, v2, v3
3517; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
3518; GFX11-NEXT:    s_endpgm
3519  %tid = call i32 @llvm.amdgcn.workitem.id.x()
3520  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
3521  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
3522  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
3523  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
3524  %a = load volatile float, ptr addrspace(1) %gep0
3525  %b = load volatile float, ptr addrspace(1) %gep1
3526  %c = load volatile float, ptr addrspace(1) %gep2
3527  %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
3528  %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
3529  %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
3530  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
3531  store float %med3, ptr addrspace(1) %outgep
3532  ret void
3533}
3534
3535define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat4(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
3536; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat4:
3537; SI-SDAG:       ; %bb.0:
3538; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
3539; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
3540; SI-SDAG-NEXT:    s_mov_b32 s10, 0
3541; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
3542; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
3543; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
3544; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
3545; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3546; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
3547; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
3548; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
3549; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
3550; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3551; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
3552; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3553; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
3554; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3555; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
3556; SI-SDAG-NEXT:    v_med3_f32 v2, v3, v2, v4
3557; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
3558; SI-SDAG-NEXT:    s_endpgm
3559;
3560; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat4:
3561; SI-GISEL:       ; %bb.0:
3562; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
3563; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
3564; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
3565; SI-GISEL-NEXT:    s_mov_b32 s10, 0
3566; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
3567; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
3568; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
3569; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
3570; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3571; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
3572; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
3573; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3574; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
3575; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
3576; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3577; SI-GISEL-NEXT:    v_med3_f32 v2, v3, v2, v4
3578; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
3579; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
3580; SI-GISEL-NEXT:    s_endpgm
3581;
3582; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat4:
3583; VI-SDAG:       ; %bb.0:
3584; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
3585; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
3586; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3587; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
3588; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
3589; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3590; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
3591; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
3592; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
3593; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
3594; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
3595; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
3596; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
3597; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3598; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
3599; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3600; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
3601; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3602; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
3603; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
3604; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3605; VI-SDAG-NEXT:    v_med3_f32 v2, v2, v7, v3
3606; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
3607; VI-SDAG-NEXT:    s_endpgm
3608;
3609; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat4:
3610; VI-GISEL:       ; %bb.0:
3611; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
3612; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
3613; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
3614; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
3615; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
3616; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
3617; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3618; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
3619; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
3620; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
3621; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
3622; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
3623; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
3624; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
3625; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
3626; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
3627; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3628; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
3629; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3630; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
3631; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3632; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
3633; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
3634; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
3635; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3636; VI-GISEL-NEXT:    v_med3_f32 v2, v2, v7, v3
3637; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
3638; VI-GISEL-NEXT:    s_endpgm
3639;
3640; GFX9-LABEL: v_test_global_nnans_med3_f32_pat4:
3641; GFX9:       ; %bb.0:
3642; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
3643; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
3644; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
3645; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
3646; GFX9-NEXT:    s_waitcnt vmcnt(0)
3647; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
3648; GFX9-NEXT:    s_waitcnt vmcnt(0)
3649; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
3650; GFX9-NEXT:    s_waitcnt vmcnt(0)
3651; GFX9-NEXT:    v_med3_f32 v1, v2, v1, v3
3652; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
3653; GFX9-NEXT:    s_endpgm
3654;
3655; GFX11-LABEL: v_test_global_nnans_med3_f32_pat4:
3656; GFX11:       ; %bb.0:
3657; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
3658; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
3659; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3660; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
3661; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
3662; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
3663; GFX11-NEXT:    s_waitcnt vmcnt(0)
3664; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
3665; GFX11-NEXT:    s_waitcnt vmcnt(0)
3666; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
3667; GFX11-NEXT:    s_waitcnt vmcnt(0)
3668; GFX11-NEXT:    v_med3_f32 v1, v2, v1, v3
3669; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
3670; GFX11-NEXT:    s_endpgm
3671  %tid = call i32 @llvm.amdgcn.workitem.id.x()
3672  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
3673  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
3674  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
3675  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
3676  %a = load volatile float, ptr addrspace(1) %gep0
3677  %b = load volatile float, ptr addrspace(1) %gep1
3678  %c = load volatile float, ptr addrspace(1) %gep2
3679  %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
3680  %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
3681  %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
3682  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
3683  store float %med3, ptr addrspace(1) %outgep
3684  ret void
3685}
3686
3687define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat5(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
3688; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat5:
3689; SI-SDAG:       ; %bb.0:
3690; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
3691; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
3692; SI-SDAG-NEXT:    s_mov_b32 s10, 0
3693; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
3694; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
3695; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
3696; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
3697; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3698; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
3699; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
3700; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
3701; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
3702; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3703; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
3704; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3705; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
3706; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3707; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
3708; SI-SDAG-NEXT:    v_med3_f32 v2, v3, v2, v4
3709; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
3710; SI-SDAG-NEXT:    s_endpgm
3711;
3712; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat5:
3713; SI-GISEL:       ; %bb.0:
3714; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
3715; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
3716; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
3717; SI-GISEL-NEXT:    s_mov_b32 s10, 0
3718; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
3719; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
3720; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
3721; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
3722; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3723; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
3724; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
3725; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3726; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
3727; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
3728; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3729; SI-GISEL-NEXT:    v_med3_f32 v2, v3, v2, v4
3730; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
3731; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
3732; SI-GISEL-NEXT:    s_endpgm
3733;
3734; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat5:
3735; VI-SDAG:       ; %bb.0:
3736; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
3737; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
3738; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3739; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
3740; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
3741; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3742; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
3743; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
3744; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
3745; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
3746; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
3747; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
3748; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
3749; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3750; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
3751; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3752; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
3753; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3754; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
3755; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
3756; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3757; VI-SDAG-NEXT:    v_med3_f32 v2, v2, v7, v3
3758; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
3759; VI-SDAG-NEXT:    s_endpgm
3760;
3761; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat5:
3762; VI-GISEL:       ; %bb.0:
3763; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
3764; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
3765; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
3766; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
3767; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
3768; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
3769; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3770; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
3771; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
3772; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
3773; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
3774; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
3775; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
3776; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
3777; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
3778; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
3779; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3780; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
3781; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3782; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
3783; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3784; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
3785; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
3786; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
3787; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3788; VI-GISEL-NEXT:    v_med3_f32 v2, v2, v7, v3
3789; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
3790; VI-GISEL-NEXT:    s_endpgm
3791;
3792; GFX9-LABEL: v_test_global_nnans_med3_f32_pat5:
3793; GFX9:       ; %bb.0:
3794; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
3795; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
3796; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
3797; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
3798; GFX9-NEXT:    s_waitcnt vmcnt(0)
3799; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
3800; GFX9-NEXT:    s_waitcnt vmcnt(0)
3801; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
3802; GFX9-NEXT:    s_waitcnt vmcnt(0)
3803; GFX9-NEXT:    v_med3_f32 v1, v2, v1, v3
3804; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
3805; GFX9-NEXT:    s_endpgm
3806;
3807; GFX11-LABEL: v_test_global_nnans_med3_f32_pat5:
3808; GFX11:       ; %bb.0:
3809; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
3810; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
3811; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3812; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
3813; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
3814; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
3815; GFX11-NEXT:    s_waitcnt vmcnt(0)
3816; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
3817; GFX11-NEXT:    s_waitcnt vmcnt(0)
3818; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
3819; GFX11-NEXT:    s_waitcnt vmcnt(0)
3820; GFX11-NEXT:    v_med3_f32 v1, v2, v1, v3
3821; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
3822; GFX11-NEXT:    s_endpgm
3823  %tid = call i32 @llvm.amdgcn.workitem.id.x()
3824  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
3825  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
3826  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
3827  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
3828  %a = load volatile float, ptr addrspace(1) %gep0
3829  %b = load volatile float, ptr addrspace(1) %gep1
3830  %c = load volatile float, ptr addrspace(1) %gep2
3831  %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
3832  %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
3833  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
3834  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
3835  store float %med3, ptr addrspace(1) %outgep
3836  ret void
3837}
3838
3839define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat6(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
3840; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat6:
3841; SI-SDAG:       ; %bb.0:
3842; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
3843; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
3844; SI-SDAG-NEXT:    s_mov_b32 s10, 0
3845; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
3846; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
3847; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
3848; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
3849; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3850; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
3851; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
3852; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
3853; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
3854; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3855; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
3856; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3857; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
3858; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3859; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
3860; SI-SDAG-NEXT:    v_med3_f32 v2, v3, v2, v4
3861; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
3862; SI-SDAG-NEXT:    s_endpgm
3863;
3864; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat6:
3865; SI-GISEL:       ; %bb.0:
3866; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
3867; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
3868; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
3869; SI-GISEL-NEXT:    s_mov_b32 s10, 0
3870; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
3871; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
3872; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
3873; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
3874; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3875; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
3876; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
3877; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3878; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
3879; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
3880; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3881; SI-GISEL-NEXT:    v_med3_f32 v2, v3, v2, v4
3882; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
3883; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
3884; SI-GISEL-NEXT:    s_endpgm
3885;
3886; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat6:
3887; VI-SDAG:       ; %bb.0:
3888; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
3889; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
3890; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
3891; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
3892; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
3893; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3894; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
3895; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
3896; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
3897; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
3898; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
3899; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
3900; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
3901; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3902; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
3903; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3904; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
3905; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
3906; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
3907; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
3908; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3909; VI-SDAG-NEXT:    v_med3_f32 v2, v2, v7, v3
3910; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
3911; VI-SDAG-NEXT:    s_endpgm
3912;
3913; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat6:
3914; VI-GISEL:       ; %bb.0:
3915; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
3916; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
3917; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
3918; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
3919; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
3920; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
3921; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3922; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
3923; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
3924; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
3925; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
3926; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
3927; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
3928; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
3929; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
3930; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
3931; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3932; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
3933; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3934; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
3935; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
3936; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
3937; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
3938; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
3939; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3940; VI-GISEL-NEXT:    v_med3_f32 v2, v2, v7, v3
3941; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
3942; VI-GISEL-NEXT:    s_endpgm
3943;
3944; GFX9-LABEL: v_test_global_nnans_med3_f32_pat6:
3945; GFX9:       ; %bb.0:
3946; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
3947; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
3948; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
3949; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
3950; GFX9-NEXT:    s_waitcnt vmcnt(0)
3951; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
3952; GFX9-NEXT:    s_waitcnt vmcnt(0)
3953; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
3954; GFX9-NEXT:    s_waitcnt vmcnt(0)
3955; GFX9-NEXT:    v_med3_f32 v1, v2, v1, v3
3956; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
3957; GFX9-NEXT:    s_endpgm
3958;
3959; GFX11-LABEL: v_test_global_nnans_med3_f32_pat6:
3960; GFX11:       ; %bb.0:
3961; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
3962; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
3963; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3964; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
3965; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
3966; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
3967; GFX11-NEXT:    s_waitcnt vmcnt(0)
3968; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
3969; GFX11-NEXT:    s_waitcnt vmcnt(0)
3970; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
3971; GFX11-NEXT:    s_waitcnt vmcnt(0)
3972; GFX11-NEXT:    v_med3_f32 v1, v2, v1, v3
3973; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
3974; GFX11-NEXT:    s_endpgm
3975  %tid = call i32 @llvm.amdgcn.workitem.id.x()
3976  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
3977  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
3978  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
3979  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
3980  %a = load volatile float, ptr addrspace(1) %gep0
3981  %b = load volatile float, ptr addrspace(1) %gep1
3982  %c = load volatile float, ptr addrspace(1) %gep2
3983  %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
3984  %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
3985  %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
3986  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
3987  store float %med3, ptr addrspace(1) %outgep
3988  ret void
3989}
3990
3991define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat7(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
3992; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat7:
3993; SI-SDAG:       ; %bb.0:
3994; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
3995; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
3996; SI-SDAG-NEXT:    s_mov_b32 s10, 0
3997; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
3998; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
3999; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
4000; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
4001; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
4002; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
4003; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
4004; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
4005; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
4006; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4007; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
4008; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4009; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
4010; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4011; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
4012; SI-SDAG-NEXT:    v_med3_f32 v2, v3, v2, v4
4013; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
4014; SI-SDAG-NEXT:    s_endpgm
4015;
4016; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat7:
4017; SI-GISEL:       ; %bb.0:
4018; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
4019; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
4020; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
4021; SI-GISEL-NEXT:    s_mov_b32 s10, 0
4022; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
4023; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
4024; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
4025; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
4026; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4027; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
4028; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
4029; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4030; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
4031; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
4032; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4033; SI-GISEL-NEXT:    v_med3_f32 v2, v3, v2, v4
4034; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
4035; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
4036; SI-GISEL-NEXT:    s_endpgm
4037;
4038; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat7:
4039; VI-SDAG:       ; %bb.0:
4040; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
4041; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
4042; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
4043; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
4044; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
4045; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4046; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
4047; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
4048; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
4049; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
4050; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
4051; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
4052; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
4053; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4054; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
4055; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4056; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
4057; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4058; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
4059; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
4060; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4061; VI-SDAG-NEXT:    v_med3_f32 v2, v2, v7, v3
4062; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
4063; VI-SDAG-NEXT:    s_endpgm
4064;
4065; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat7:
4066; VI-GISEL:       ; %bb.0:
4067; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
4068; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
4069; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
4070; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
4071; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
4072; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
4073; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4074; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
4075; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
4076; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
4077; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
4078; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
4079; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
4080; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
4081; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
4082; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
4083; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4084; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
4085; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4086; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
4087; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4088; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
4089; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
4090; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
4091; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4092; VI-GISEL-NEXT:    v_med3_f32 v2, v2, v7, v3
4093; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
4094; VI-GISEL-NEXT:    s_endpgm
4095;
4096; GFX9-LABEL: v_test_global_nnans_med3_f32_pat7:
4097; GFX9:       ; %bb.0:
4098; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
4099; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
4100; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
4101; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
4102; GFX9-NEXT:    s_waitcnt vmcnt(0)
4103; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
4104; GFX9-NEXT:    s_waitcnt vmcnt(0)
4105; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
4106; GFX9-NEXT:    s_waitcnt vmcnt(0)
4107; GFX9-NEXT:    v_med3_f32 v1, v2, v1, v3
4108; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
4109; GFX9-NEXT:    s_endpgm
4110;
4111; GFX11-LABEL: v_test_global_nnans_med3_f32_pat7:
4112; GFX11:       ; %bb.0:
4113; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
4114; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
4115; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4116; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
4117; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
4118; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
4119; GFX11-NEXT:    s_waitcnt vmcnt(0)
4120; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
4121; GFX11-NEXT:    s_waitcnt vmcnt(0)
4122; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
4123; GFX11-NEXT:    s_waitcnt vmcnt(0)
4124; GFX11-NEXT:    v_med3_f32 v1, v2, v1, v3
4125; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
4126; GFX11-NEXT:    s_endpgm
4127  %tid = call i32 @llvm.amdgcn.workitem.id.x()
4128  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
4129  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
4130  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
4131  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
4132  %a = load volatile float, ptr addrspace(1) %gep0
4133  %b = load volatile float, ptr addrspace(1) %gep1
4134  %c = load volatile float, ptr addrspace(1) %gep2
4135  %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
4136  %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
4137  %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
4138  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
4139  store float %med3, ptr addrspace(1) %outgep
4140  ret void
4141}
4142
4143define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat8(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
4144; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat8:
4145; SI-SDAG:       ; %bb.0:
4146; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
4147; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
4148; SI-SDAG-NEXT:    s_mov_b32 s10, 0
4149; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
4150; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
4151; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
4152; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
4153; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
4154; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
4155; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
4156; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
4157; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
4158; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4159; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
4160; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4161; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
4162; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4163; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
4164; SI-SDAG-NEXT:    v_med3_f32 v2, v2, v3, v4
4165; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
4166; SI-SDAG-NEXT:    s_endpgm
4167;
4168; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat8:
4169; SI-GISEL:       ; %bb.0:
4170; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
4171; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
4172; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
4173; SI-GISEL-NEXT:    s_mov_b32 s10, 0
4174; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
4175; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
4176; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
4177; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
4178; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4179; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
4180; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
4181; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4182; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
4183; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
4184; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4185; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
4186; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
4187; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
4188; SI-GISEL-NEXT:    s_endpgm
4189;
4190; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat8:
4191; VI-SDAG:       ; %bb.0:
4192; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
4193; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
4194; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
4195; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
4196; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
4197; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4198; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
4199; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
4200; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
4201; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
4202; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
4203; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
4204; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
4205; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4206; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
4207; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4208; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
4209; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4210; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
4211; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
4212; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4213; VI-SDAG-NEXT:    v_med3_f32 v2, v7, v2, v3
4214; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
4215; VI-SDAG-NEXT:    s_endpgm
4216;
4217; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat8:
4218; VI-GISEL:       ; %bb.0:
4219; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
4220; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
4221; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
4222; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
4223; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
4224; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
4225; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4226; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
4227; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
4228; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
4229; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
4230; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
4231; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
4232; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
4233; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
4234; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
4235; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4236; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
4237; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4238; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
4239; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4240; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
4241; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
4242; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
4243; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4244; VI-GISEL-NEXT:    v_med3_f32 v2, v7, v2, v3
4245; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
4246; VI-GISEL-NEXT:    s_endpgm
4247;
4248; GFX9-LABEL: v_test_global_nnans_med3_f32_pat8:
4249; GFX9:       ; %bb.0:
4250; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
4251; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
4252; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
4253; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
4254; GFX9-NEXT:    s_waitcnt vmcnt(0)
4255; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
4256; GFX9-NEXT:    s_waitcnt vmcnt(0)
4257; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
4258; GFX9-NEXT:    s_waitcnt vmcnt(0)
4259; GFX9-NEXT:    v_med3_f32 v1, v1, v2, v3
4260; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
4261; GFX9-NEXT:    s_endpgm
4262;
4263; GFX11-LABEL: v_test_global_nnans_med3_f32_pat8:
4264; GFX11:       ; %bb.0:
4265; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
4266; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
4267; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4268; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
4269; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
4270; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
4271; GFX11-NEXT:    s_waitcnt vmcnt(0)
4272; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
4273; GFX11-NEXT:    s_waitcnt vmcnt(0)
4274; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
4275; GFX11-NEXT:    s_waitcnt vmcnt(0)
4276; GFX11-NEXT:    v_med3_f32 v1, v1, v2, v3
4277; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
4278; GFX11-NEXT:    s_endpgm
4279  %tid = call i32 @llvm.amdgcn.workitem.id.x()
4280  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
4281  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
4282  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
4283  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
4284  %a = load volatile float, ptr addrspace(1) %gep0
4285  %b = load volatile float, ptr addrspace(1) %gep1
4286  %c = load volatile float, ptr addrspace(1) %gep2
4287  %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
4288  %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
4289  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
4290  %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
4291  store float %med3, ptr addrspace(1) %outgep
4292  ret void
4293}
4294
4295define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat9(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
4296; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat9:
4297; SI-SDAG:       ; %bb.0:
4298; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
4299; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
4300; SI-SDAG-NEXT:    s_mov_b32 s10, 0
4301; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
4302; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
4303; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
4304; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
4305; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
4306; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
4307; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
4308; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
4309; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
4310; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4311; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
4312; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4313; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
4314; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4315; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
4316; SI-SDAG-NEXT:    v_med3_f32 v2, v3, v2, v4
4317; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
4318; SI-SDAG-NEXT:    s_endpgm
4319;
4320; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat9:
4321; SI-GISEL:       ; %bb.0:
4322; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
4323; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
4324; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
4325; SI-GISEL-NEXT:    s_mov_b32 s10, 0
4326; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
4327; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
4328; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
4329; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
4330; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4331; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
4332; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
4333; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4334; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
4335; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
4336; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4337; SI-GISEL-NEXT:    v_med3_f32 v2, v3, v2, v4
4338; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
4339; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
4340; SI-GISEL-NEXT:    s_endpgm
4341;
4342; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat9:
4343; VI-SDAG:       ; %bb.0:
4344; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
4345; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
4346; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
4347; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
4348; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
4349; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4350; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
4351; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
4352; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
4353; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
4354; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
4355; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
4356; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
4357; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4358; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
4359; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4360; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
4361; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4362; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
4363; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
4364; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4365; VI-SDAG-NEXT:    v_med3_f32 v2, v2, v7, v3
4366; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
4367; VI-SDAG-NEXT:    s_endpgm
4368;
4369; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat9:
4370; VI-GISEL:       ; %bb.0:
4371; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
4372; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
4373; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
4374; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
4375; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
4376; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
4377; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4378; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
4379; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
4380; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
4381; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
4382; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
4383; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
4384; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
4385; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
4386; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
4387; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4388; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
4389; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4390; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
4391; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4392; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
4393; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
4394; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
4395; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4396; VI-GISEL-NEXT:    v_med3_f32 v2, v2, v7, v3
4397; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
4398; VI-GISEL-NEXT:    s_endpgm
4399;
4400; GFX9-LABEL: v_test_global_nnans_med3_f32_pat9:
4401; GFX9:       ; %bb.0:
4402; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
4403; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
4404; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
4405; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
4406; GFX9-NEXT:    s_waitcnt vmcnt(0)
4407; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
4408; GFX9-NEXT:    s_waitcnt vmcnt(0)
4409; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
4410; GFX9-NEXT:    s_waitcnt vmcnt(0)
4411; GFX9-NEXT:    v_med3_f32 v1, v2, v1, v3
4412; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
4413; GFX9-NEXT:    s_endpgm
4414;
4415; GFX11-LABEL: v_test_global_nnans_med3_f32_pat9:
4416; GFX11:       ; %bb.0:
4417; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
4418; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
4419; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4420; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
4421; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
4422; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
4423; GFX11-NEXT:    s_waitcnt vmcnt(0)
4424; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
4425; GFX11-NEXT:    s_waitcnt vmcnt(0)
4426; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
4427; GFX11-NEXT:    s_waitcnt vmcnt(0)
4428; GFX11-NEXT:    v_med3_f32 v1, v2, v1, v3
4429; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
4430; GFX11-NEXT:    s_endpgm
4431  %tid = call i32 @llvm.amdgcn.workitem.id.x()
4432  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
4433  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
4434  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
4435  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
4436  %a = load volatile float, ptr addrspace(1) %gep0
4437  %b = load volatile float, ptr addrspace(1) %gep1
4438  %c = load volatile float, ptr addrspace(1) %gep2
4439  %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
4440  %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
4441  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
4442  %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
4443  store float %med3, ptr addrspace(1) %outgep
4444  ret void
4445}
4446
4447define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat10(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
4448; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat10:
4449; SI-SDAG:       ; %bb.0:
4450; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
4451; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
4452; SI-SDAG-NEXT:    s_mov_b32 s10, 0
4453; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
4454; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
4455; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
4456; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
4457; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
4458; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
4459; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
4460; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
4461; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
4462; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4463; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
4464; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4465; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
4466; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4467; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
4468; SI-SDAG-NEXT:    v_med3_f32 v2, v2, v3, v4
4469; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
4470; SI-SDAG-NEXT:    s_endpgm
4471;
4472; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat10:
4473; SI-GISEL:       ; %bb.0:
4474; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
4475; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
4476; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
4477; SI-GISEL-NEXT:    s_mov_b32 s10, 0
4478; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
4479; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
4480; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
4481; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
4482; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4483; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
4484; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
4485; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4486; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
4487; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
4488; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4489; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
4490; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
4491; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
4492; SI-GISEL-NEXT:    s_endpgm
4493;
4494; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat10:
4495; VI-SDAG:       ; %bb.0:
4496; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
4497; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
4498; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
4499; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
4500; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
4501; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4502; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
4503; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
4504; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
4505; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
4506; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
4507; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
4508; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
4509; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4510; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
4511; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4512; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
4513; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4514; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
4515; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
4516; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4517; VI-SDAG-NEXT:    v_med3_f32 v2, v7, v2, v3
4518; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
4519; VI-SDAG-NEXT:    s_endpgm
4520;
4521; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat10:
4522; VI-GISEL:       ; %bb.0:
4523; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
4524; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
4525; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
4526; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
4527; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
4528; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
4529; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4530; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
4531; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
4532; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
4533; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
4534; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
4535; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
4536; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
4537; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
4538; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
4539; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4540; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
4541; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4542; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
4543; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4544; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
4545; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
4546; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
4547; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4548; VI-GISEL-NEXT:    v_med3_f32 v2, v7, v2, v3
4549; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
4550; VI-GISEL-NEXT:    s_endpgm
4551;
4552; GFX9-LABEL: v_test_global_nnans_med3_f32_pat10:
4553; GFX9:       ; %bb.0:
4554; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
4555; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
4556; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
4557; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
4558; GFX9-NEXT:    s_waitcnt vmcnt(0)
4559; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
4560; GFX9-NEXT:    s_waitcnt vmcnt(0)
4561; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
4562; GFX9-NEXT:    s_waitcnt vmcnt(0)
4563; GFX9-NEXT:    v_med3_f32 v1, v1, v2, v3
4564; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
4565; GFX9-NEXT:    s_endpgm
4566;
4567; GFX11-LABEL: v_test_global_nnans_med3_f32_pat10:
4568; GFX11:       ; %bb.0:
4569; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
4570; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
4571; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4572; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
4573; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
4574; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
4575; GFX11-NEXT:    s_waitcnt vmcnt(0)
4576; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
4577; GFX11-NEXT:    s_waitcnt vmcnt(0)
4578; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
4579; GFX11-NEXT:    s_waitcnt vmcnt(0)
4580; GFX11-NEXT:    v_med3_f32 v1, v1, v2, v3
4581; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
4582; GFX11-NEXT:    s_endpgm
4583  %tid = call i32 @llvm.amdgcn.workitem.id.x()
4584  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
4585  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
4586  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
4587  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
4588  %a = load volatile float, ptr addrspace(1) %gep0
4589  %b = load volatile float, ptr addrspace(1) %gep1
4590  %c = load volatile float, ptr addrspace(1) %gep2
4591  %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
4592  %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
4593  %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
4594  %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
4595  store float %med3, ptr addrspace(1) %outgep
4596  ret void
4597}
4598
4599define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat11(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
4600; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat11:
4601; SI-SDAG:       ; %bb.0:
4602; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
4603; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
4604; SI-SDAG-NEXT:    s_mov_b32 s10, 0
4605; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
4606; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
4607; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
4608; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
4609; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
4610; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
4611; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
4612; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
4613; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
4614; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4615; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
4616; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4617; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
4618; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4619; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
4620; SI-SDAG-NEXT:    v_med3_f32 v2, v3, v2, v4
4621; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
4622; SI-SDAG-NEXT:    s_endpgm
4623;
4624; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat11:
4625; SI-GISEL:       ; %bb.0:
4626; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
4627; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
4628; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
4629; SI-GISEL-NEXT:    s_mov_b32 s10, 0
4630; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
4631; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
4632; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
4633; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
4634; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4635; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
4636; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
4637; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4638; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
4639; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
4640; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4641; SI-GISEL-NEXT:    v_med3_f32 v2, v3, v2, v4
4642; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
4643; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
4644; SI-GISEL-NEXT:    s_endpgm
4645;
4646; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat11:
4647; VI-SDAG:       ; %bb.0:
4648; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
4649; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
4650; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
4651; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
4652; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
4653; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4654; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
4655; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
4656; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
4657; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
4658; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
4659; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
4660; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
4661; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4662; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
4663; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4664; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
4665; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4666; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
4667; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
4668; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4669; VI-SDAG-NEXT:    v_med3_f32 v2, v2, v7, v3
4670; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
4671; VI-SDAG-NEXT:    s_endpgm
4672;
4673; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat11:
4674; VI-GISEL:       ; %bb.0:
4675; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
4676; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
4677; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
4678; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
4679; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
4680; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
4681; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4682; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
4683; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
4684; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
4685; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
4686; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
4687; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
4688; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
4689; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
4690; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
4691; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4692; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
4693; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4694; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
4695; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4696; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
4697; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
4698; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
4699; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4700; VI-GISEL-NEXT:    v_med3_f32 v2, v2, v7, v3
4701; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
4702; VI-GISEL-NEXT:    s_endpgm
4703;
4704; GFX9-LABEL: v_test_global_nnans_med3_f32_pat11:
4705; GFX9:       ; %bb.0:
4706; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
4707; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
4708; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
4709; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
4710; GFX9-NEXT:    s_waitcnt vmcnt(0)
4711; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
4712; GFX9-NEXT:    s_waitcnt vmcnt(0)
4713; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
4714; GFX9-NEXT:    s_waitcnt vmcnt(0)
4715; GFX9-NEXT:    v_med3_f32 v1, v2, v1, v3
4716; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
4717; GFX9-NEXT:    s_endpgm
4718;
4719; GFX11-LABEL: v_test_global_nnans_med3_f32_pat11:
4720; GFX11:       ; %bb.0:
4721; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
4722; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
4723; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4724; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
4725; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
4726; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
4727; GFX11-NEXT:    s_waitcnt vmcnt(0)
4728; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
4729; GFX11-NEXT:    s_waitcnt vmcnt(0)
4730; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
4731; GFX11-NEXT:    s_waitcnt vmcnt(0)
4732; GFX11-NEXT:    v_med3_f32 v1, v2, v1, v3
4733; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
4734; GFX11-NEXT:    s_endpgm
4735  %tid = call i32 @llvm.amdgcn.workitem.id.x()
4736  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
4737  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
4738  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
4739  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
4740  %a = load volatile float, ptr addrspace(1) %gep0
4741  %b = load volatile float, ptr addrspace(1) %gep1
4742  %c = load volatile float, ptr addrspace(1) %gep2
4743  %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
4744  %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
4745  %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
4746  %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
4747  store float %med3, ptr addrspace(1) %outgep
4748  ret void
4749}
4750
4751define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat12(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
4752; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat12:
4753; SI-SDAG:       ; %bb.0:
4754; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
4755; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
4756; SI-SDAG-NEXT:    s_mov_b32 s10, 0
4757; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
4758; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
4759; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
4760; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
4761; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
4762; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
4763; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
4764; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
4765; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
4766; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4767; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
4768; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4769; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
4770; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4771; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
4772; SI-SDAG-NEXT:    v_med3_f32 v2, v3, v2, v4
4773; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
4774; SI-SDAG-NEXT:    s_endpgm
4775;
4776; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat12:
4777; SI-GISEL:       ; %bb.0:
4778; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
4779; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
4780; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
4781; SI-GISEL-NEXT:    s_mov_b32 s10, 0
4782; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
4783; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
4784; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
4785; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
4786; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4787; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
4788; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
4789; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4790; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
4791; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
4792; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4793; SI-GISEL-NEXT:    v_med3_f32 v2, v3, v2, v4
4794; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
4795; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
4796; SI-GISEL-NEXT:    s_endpgm
4797;
4798; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat12:
4799; VI-SDAG:       ; %bb.0:
4800; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
4801; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
4802; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
4803; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
4804; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
4805; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4806; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
4807; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
4808; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
4809; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
4810; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
4811; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
4812; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
4813; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4814; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
4815; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4816; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
4817; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4818; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
4819; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
4820; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4821; VI-SDAG-NEXT:    v_med3_f32 v2, v2, v7, v3
4822; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
4823; VI-SDAG-NEXT:    s_endpgm
4824;
4825; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat12:
4826; VI-GISEL:       ; %bb.0:
4827; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
4828; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
4829; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
4830; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
4831; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
4832; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
4833; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4834; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
4835; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
4836; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
4837; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
4838; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
4839; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
4840; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
4841; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
4842; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
4843; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4844; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
4845; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4846; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
4847; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4848; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
4849; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
4850; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
4851; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4852; VI-GISEL-NEXT:    v_med3_f32 v2, v2, v7, v3
4853; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
4854; VI-GISEL-NEXT:    s_endpgm
4855;
4856; GFX9-LABEL: v_test_global_nnans_med3_f32_pat12:
4857; GFX9:       ; %bb.0:
4858; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
4859; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
4860; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
4861; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
4862; GFX9-NEXT:    s_waitcnt vmcnt(0)
4863; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
4864; GFX9-NEXT:    s_waitcnt vmcnt(0)
4865; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
4866; GFX9-NEXT:    s_waitcnt vmcnt(0)
4867; GFX9-NEXT:    v_med3_f32 v1, v2, v1, v3
4868; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
4869; GFX9-NEXT:    s_endpgm
4870;
4871; GFX11-LABEL: v_test_global_nnans_med3_f32_pat12:
4872; GFX11:       ; %bb.0:
4873; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
4874; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
4875; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4876; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
4877; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
4878; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
4879; GFX11-NEXT:    s_waitcnt vmcnt(0)
4880; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
4881; GFX11-NEXT:    s_waitcnt vmcnt(0)
4882; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
4883; GFX11-NEXT:    s_waitcnt vmcnt(0)
4884; GFX11-NEXT:    v_med3_f32 v1, v2, v1, v3
4885; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
4886; GFX11-NEXT:    s_endpgm
4887  %tid = call i32 @llvm.amdgcn.workitem.id.x()
4888  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
4889  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
4890  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
4891  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
4892  %a = load volatile float, ptr addrspace(1) %gep0
4893  %b = load volatile float, ptr addrspace(1) %gep1
4894  %c = load volatile float, ptr addrspace(1) %gep2
4895  %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
4896  %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
4897  %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
4898  %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
4899  store float %med3, ptr addrspace(1) %outgep
4900  ret void
4901}
4902
4903define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat13(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
4904; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat13:
4905; SI-SDAG:       ; %bb.0:
4906; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
4907; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
4908; SI-SDAG-NEXT:    s_mov_b32 s10, 0
4909; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
4910; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
4911; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
4912; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
4913; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
4914; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
4915; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
4916; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
4917; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
4918; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4919; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
4920; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4921; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
4922; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4923; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
4924; SI-SDAG-NEXT:    v_med3_f32 v2, v3, v2, v4
4925; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
4926; SI-SDAG-NEXT:    s_endpgm
4927;
4928; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat13:
4929; SI-GISEL:       ; %bb.0:
4930; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
4931; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
4932; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
4933; SI-GISEL-NEXT:    s_mov_b32 s10, 0
4934; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
4935; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
4936; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
4937; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
4938; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4939; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
4940; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
4941; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4942; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
4943; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
4944; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4945; SI-GISEL-NEXT:    v_med3_f32 v2, v3, v2, v4
4946; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
4947; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
4948; SI-GISEL-NEXT:    s_endpgm
4949;
4950; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat13:
4951; VI-SDAG:       ; %bb.0:
4952; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
4953; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
4954; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
4955; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
4956; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
4957; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4958; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
4959; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
4960; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
4961; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
4962; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
4963; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
4964; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
4965; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4966; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
4967; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4968; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
4969; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
4970; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
4971; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
4972; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4973; VI-SDAG-NEXT:    v_med3_f32 v2, v2, v7, v3
4974; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
4975; VI-SDAG-NEXT:    s_endpgm
4976;
4977; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat13:
4978; VI-GISEL:       ; %bb.0:
4979; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
4980; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
4981; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
4982; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
4983; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
4984; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
4985; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4986; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
4987; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
4988; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
4989; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
4990; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
4991; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
4992; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
4993; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
4994; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
4995; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4996; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
4997; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
4998; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
4999; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5000; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
5001; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
5002; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
5003; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
5004; VI-GISEL-NEXT:    v_med3_f32 v2, v2, v7, v3
5005; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
5006; VI-GISEL-NEXT:    s_endpgm
5007;
5008; GFX9-LABEL: v_test_global_nnans_med3_f32_pat13:
5009; GFX9:       ; %bb.0:
5010; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
5011; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
5012; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
5013; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
5014; GFX9-NEXT:    s_waitcnt vmcnt(0)
5015; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
5016; GFX9-NEXT:    s_waitcnt vmcnt(0)
5017; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
5018; GFX9-NEXT:    s_waitcnt vmcnt(0)
5019; GFX9-NEXT:    v_med3_f32 v1, v2, v1, v3
5020; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
5021; GFX9-NEXT:    s_endpgm
5022;
5023; GFX11-LABEL: v_test_global_nnans_med3_f32_pat13:
5024; GFX11:       ; %bb.0:
5025; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
5026; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
5027; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
5028; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
5029; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
5030; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
5031; GFX11-NEXT:    s_waitcnt vmcnt(0)
5032; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
5033; GFX11-NEXT:    s_waitcnt vmcnt(0)
5034; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
5035; GFX11-NEXT:    s_waitcnt vmcnt(0)
5036; GFX11-NEXT:    v_med3_f32 v1, v2, v1, v3
5037; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
5038; GFX11-NEXT:    s_endpgm
5039  %tid = call i32 @llvm.amdgcn.workitem.id.x()
5040  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
5041  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
5042  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
5043  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
5044  %a = load volatile float, ptr addrspace(1) %gep0
5045  %b = load volatile float, ptr addrspace(1) %gep1
5046  %c = load volatile float, ptr addrspace(1) %gep2
5047  %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
5048  %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
5049  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
5050  %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
5051  store float %med3, ptr addrspace(1) %outgep
5052  ret void
5053}
5054
5055define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat14(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
5056; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat14:
5057; SI-SDAG:       ; %bb.0:
5058; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
5059; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
5060; SI-SDAG-NEXT:    s_mov_b32 s10, 0
5061; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
5062; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
5063; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
5064; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
5065; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
5066; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
5067; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
5068; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
5069; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
5070; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5071; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
5072; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5073; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
5074; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5075; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
5076; SI-SDAG-NEXT:    v_med3_f32 v2, v2, v3, v4
5077; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
5078; SI-SDAG-NEXT:    s_endpgm
5079;
5080; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat14:
5081; SI-GISEL:       ; %bb.0:
5082; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
5083; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
5084; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
5085; SI-GISEL-NEXT:    s_mov_b32 s10, 0
5086; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
5087; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
5088; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
5089; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
5090; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5091; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
5092; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
5093; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5094; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
5095; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
5096; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5097; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
5098; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
5099; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
5100; SI-GISEL-NEXT:    s_endpgm
5101;
5102; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat14:
5103; VI-SDAG:       ; %bb.0:
5104; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
5105; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
5106; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
5107; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
5108; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
5109; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
5110; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
5111; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
5112; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
5113; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
5114; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
5115; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
5116; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
5117; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5118; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
5119; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5120; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
5121; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5122; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
5123; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
5124; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
5125; VI-SDAG-NEXT:    v_med3_f32 v2, v7, v2, v3
5126; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
5127; VI-SDAG-NEXT:    s_endpgm
5128;
5129; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat14:
5130; VI-GISEL:       ; %bb.0:
5131; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
5132; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
5133; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
5134; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
5135; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
5136; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
5137; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
5138; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
5139; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
5140; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
5141; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
5142; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
5143; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
5144; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
5145; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
5146; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
5147; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5148; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
5149; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5150; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
5151; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5152; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
5153; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
5154; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
5155; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
5156; VI-GISEL-NEXT:    v_med3_f32 v2, v7, v2, v3
5157; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
5158; VI-GISEL-NEXT:    s_endpgm
5159;
5160; GFX9-LABEL: v_test_global_nnans_med3_f32_pat14:
5161; GFX9:       ; %bb.0:
5162; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
5163; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
5164; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
5165; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
5166; GFX9-NEXT:    s_waitcnt vmcnt(0)
5167; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
5168; GFX9-NEXT:    s_waitcnt vmcnt(0)
5169; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
5170; GFX9-NEXT:    s_waitcnt vmcnt(0)
5171; GFX9-NEXT:    v_med3_f32 v1, v1, v2, v3
5172; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
5173; GFX9-NEXT:    s_endpgm
5174;
5175; GFX11-LABEL: v_test_global_nnans_med3_f32_pat14:
5176; GFX11:       ; %bb.0:
5177; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
5178; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
5179; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
5180; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
5181; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
5182; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
5183; GFX11-NEXT:    s_waitcnt vmcnt(0)
5184; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
5185; GFX11-NEXT:    s_waitcnt vmcnt(0)
5186; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
5187; GFX11-NEXT:    s_waitcnt vmcnt(0)
5188; GFX11-NEXT:    v_med3_f32 v1, v1, v2, v3
5189; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
5190; GFX11-NEXT:    s_endpgm
5191  %tid = call i32 @llvm.amdgcn.workitem.id.x()
5192  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
5193  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
5194  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
5195  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
5196  %a = load volatile float, ptr addrspace(1) %gep0
5197  %b = load volatile float, ptr addrspace(1) %gep1
5198  %c = load volatile float, ptr addrspace(1) %gep2
5199  %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
5200  %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
5201  %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
5202  %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
5203  store float %med3, ptr addrspace(1) %outgep
5204  ret void
5205}
5206
5207define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat15(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
5208; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat15:
5209; SI-SDAG:       ; %bb.0:
5210; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
5211; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
5212; SI-SDAG-NEXT:    s_mov_b32 s10, 0
5213; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
5214; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
5215; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
5216; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
5217; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
5218; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
5219; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
5220; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
5221; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
5222; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5223; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
5224; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5225; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
5226; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5227; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
5228; SI-SDAG-NEXT:    v_med3_f32 v2, v3, v2, v4
5229; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
5230; SI-SDAG-NEXT:    s_endpgm
5231;
5232; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat15:
5233; SI-GISEL:       ; %bb.0:
5234; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
5235; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
5236; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
5237; SI-GISEL-NEXT:    s_mov_b32 s10, 0
5238; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
5239; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
5240; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
5241; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
5242; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5243; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
5244; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
5245; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5246; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
5247; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
5248; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5249; SI-GISEL-NEXT:    v_med3_f32 v2, v3, v2, v4
5250; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
5251; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
5252; SI-GISEL-NEXT:    s_endpgm
5253;
5254; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat15:
5255; VI-SDAG:       ; %bb.0:
5256; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
5257; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
5258; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
5259; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
5260; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
5261; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
5262; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
5263; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
5264; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
5265; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
5266; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
5267; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
5268; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
5269; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5270; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
5271; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5272; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
5273; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5274; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
5275; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
5276; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
5277; VI-SDAG-NEXT:    v_med3_f32 v2, v2, v7, v3
5278; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
5279; VI-SDAG-NEXT:    s_endpgm
5280;
5281; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat15:
5282; VI-GISEL:       ; %bb.0:
5283; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
5284; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
5285; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
5286; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
5287; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
5288; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
5289; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
5290; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
5291; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
5292; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
5293; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
5294; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
5295; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
5296; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
5297; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
5298; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
5299; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5300; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
5301; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5302; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
5303; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5304; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
5305; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
5306; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
5307; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
5308; VI-GISEL-NEXT:    v_med3_f32 v2, v2, v7, v3
5309; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
5310; VI-GISEL-NEXT:    s_endpgm
5311;
5312; GFX9-LABEL: v_test_global_nnans_med3_f32_pat15:
5313; GFX9:       ; %bb.0:
5314; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
5315; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
5316; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
5317; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
5318; GFX9-NEXT:    s_waitcnt vmcnt(0)
5319; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
5320; GFX9-NEXT:    s_waitcnt vmcnt(0)
5321; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
5322; GFX9-NEXT:    s_waitcnt vmcnt(0)
5323; GFX9-NEXT:    v_med3_f32 v1, v2, v1, v3
5324; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
5325; GFX9-NEXT:    s_endpgm
5326;
5327; GFX11-LABEL: v_test_global_nnans_med3_f32_pat15:
5328; GFX11:       ; %bb.0:
5329; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
5330; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
5331; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
5332; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
5333; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
5334; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
5335; GFX11-NEXT:    s_waitcnt vmcnt(0)
5336; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
5337; GFX11-NEXT:    s_waitcnt vmcnt(0)
5338; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
5339; GFX11-NEXT:    s_waitcnt vmcnt(0)
5340; GFX11-NEXT:    v_med3_f32 v1, v2, v1, v3
5341; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
5342; GFX11-NEXT:    s_endpgm
5343  %tid = call i32 @llvm.amdgcn.workitem.id.x()
5344  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
5345  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
5346  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
5347  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
5348  %a = load volatile float, ptr addrspace(1) %gep0
5349  %b = load volatile float, ptr addrspace(1) %gep1
5350  %c = load volatile float, ptr addrspace(1) %gep2
5351  %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
5352  %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
5353  %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
5354  %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
5355  store float %med3, ptr addrspace(1) %outgep
5356  ret void
5357}
5358
5359; Also handle `min` at the root:
5360; min(max(x, y), max(min(x, y), z))
5361
5362define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
5363; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat16:
5364; SI-SDAG:       ; %bb.0:
5365; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
5366; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
5367; SI-SDAG-NEXT:    s_mov_b32 s10, 0
5368; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
5369; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
5370; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
5371; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
5372; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
5373; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
5374; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
5375; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
5376; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
5377; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5378; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
5379; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5380; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
5381; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5382; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
5383; SI-SDAG-NEXT:    v_med3_f32 v2, v2, v3, v4
5384; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
5385; SI-SDAG-NEXT:    s_endpgm
5386;
5387; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat16:
5388; SI-GISEL:       ; %bb.0:
5389; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
5390; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
5391; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
5392; SI-GISEL-NEXT:    s_mov_b32 s10, 0
5393; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
5394; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
5395; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
5396; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
5397; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5398; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
5399; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
5400; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5401; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
5402; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
5403; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5404; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
5405; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
5406; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
5407; SI-GISEL-NEXT:    s_endpgm
5408;
5409; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat16:
5410; VI-SDAG:       ; %bb.0:
5411; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
5412; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
5413; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
5414; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
5415; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
5416; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
5417; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
5418; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
5419; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
5420; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
5421; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
5422; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
5423; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
5424; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5425; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
5426; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5427; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
5428; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5429; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
5430; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
5431; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
5432; VI-SDAG-NEXT:    v_med3_f32 v2, v7, v2, v3
5433; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
5434; VI-SDAG-NEXT:    s_endpgm
5435;
5436; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat16:
5437; VI-GISEL:       ; %bb.0:
5438; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
5439; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
5440; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
5441; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
5442; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
5443; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
5444; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
5445; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
5446; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
5447; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
5448; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
5449; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
5450; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
5451; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
5452; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
5453; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
5454; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5455; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
5456; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5457; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
5458; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5459; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
5460; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
5461; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
5462; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
5463; VI-GISEL-NEXT:    v_med3_f32 v2, v7, v2, v3
5464; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
5465; VI-GISEL-NEXT:    s_endpgm
5466;
5467; GFX9-LABEL: v_test_global_nnans_med3_f32_pat16:
5468; GFX9:       ; %bb.0:
5469; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
5470; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
5471; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
5472; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
5473; GFX9-NEXT:    s_waitcnt vmcnt(0)
5474; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
5475; GFX9-NEXT:    s_waitcnt vmcnt(0)
5476; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
5477; GFX9-NEXT:    s_waitcnt vmcnt(0)
5478; GFX9-NEXT:    v_med3_f32 v1, v1, v2, v3
5479; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
5480; GFX9-NEXT:    s_endpgm
5481;
5482; GFX11-LABEL: v_test_global_nnans_med3_f32_pat16:
5483; GFX11:       ; %bb.0:
5484; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
5485; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
5486; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
5487; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
5488; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
5489; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
5490; GFX11-NEXT:    s_waitcnt vmcnt(0)
5491; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
5492; GFX11-NEXT:    s_waitcnt vmcnt(0)
5493; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
5494; GFX11-NEXT:    s_waitcnt vmcnt(0)
5495; GFX11-NEXT:    v_med3_f32 v1, v1, v2, v3
5496; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
5497; GFX11-NEXT:    s_endpgm
5498  %tid = call i32 @llvm.amdgcn.workitem.id.x()
5499  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
5500  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
5501  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
5502  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
5503  %a = load volatile float, ptr addrspace(1) %gep0
5504  %b = load volatile float, ptr addrspace(1) %gep1
5505  %c = load volatile float, ptr addrspace(1) %gep2
5506  %tmp0 = call float @llvm.maxnum.f32(float %a, float %b)
5507  %tmp1 = call float @llvm.minnum.f32(float %a, float %b)
5508  %tmp2 = call float @llvm.maxnum.f32(float %tmp1, float %c)
5509  %med3 = call float @llvm.minnum.f32(float %tmp0, float %tmp2)
5510  store float %med3, ptr addrspace(1) %outgep
5511  ret void
5512}
5513
5514; ---------------------------------------------------------------------
5515; Negative patterns
5516; ---------------------------------------------------------------------
5517
5518define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
5519; SI-SDAG-LABEL: v_test_safe_med3_f32_pat0_multi_use0:
5520; SI-SDAG:       ; %bb.0:
5521; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
5522; SI-SDAG-NEXT:    s_mov_b32 s10, 0
5523; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
5524; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
5525; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
5526; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
5527; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
5528; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
5529; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
5530; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
5531; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
5532; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
5533; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
5534; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5535; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
5536; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5537; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
5538; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5539; SI-SDAG-NEXT:    s_mov_b32 s10, -1
5540; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 1.0, v2
5541; SI-SDAG-NEXT:    v_mul_f32_e32 v3, 1.0, v3
5542; SI-SDAG-NEXT:    v_mul_f32_e32 v4, 1.0, v4
5543; SI-SDAG-NEXT:    v_min_f32_e32 v5, v2, v3
5544; SI-SDAG-NEXT:    v_max_f32_e32 v2, v2, v3
5545; SI-SDAG-NEXT:    buffer_store_dword v5, off, s[8:11], 0
5546; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5547; SI-SDAG-NEXT:    v_min_f32_e32 v2, v2, v4
5548; SI-SDAG-NEXT:    v_max_f32_e32 v2, v5, v2
5549; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
5550; SI-SDAG-NEXT:    s_endpgm
5551;
5552; SI-GISEL-LABEL: v_test_safe_med3_f32_pat0_multi_use0:
5553; SI-GISEL:       ; %bb.0:
5554; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
5555; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
5556; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
5557; SI-GISEL-NEXT:    s_mov_b32 s10, 0
5558; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
5559; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
5560; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
5561; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
5562; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5563; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
5564; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
5565; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5566; SI-GISEL-NEXT:    s_mov_b32 s2, -1
5567; SI-GISEL-NEXT:    s_mov_b32 s3, s11
5568; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
5569; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
5570; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5571; SI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
5572; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
5573; SI-GISEL-NEXT:    v_min_f32_e32 v5, v2, v3
5574; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v3
5575; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v4
5576; SI-GISEL-NEXT:    buffer_store_dword v5, off, s[0:3], 0
5577; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5578; SI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
5579; SI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
5580; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
5581; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
5582; SI-GISEL-NEXT:    s_endpgm
5583;
5584; VI-SDAG-LABEL: v_test_safe_med3_f32_pat0_multi_use0:
5585; VI-SDAG:       ; %bb.0:
5586; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
5587; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
5588; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
5589; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
5590; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
5591; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
5592; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
5593; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
5594; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
5595; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
5596; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
5597; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
5598; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
5599; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5600; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
5601; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5602; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
5603; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5604; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
5605; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
5606; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
5607; VI-SDAG-NEXT:    v_mul_f32_e32 v4, 1.0, v7
5608; VI-SDAG-NEXT:    v_mul_f32_e32 v2, 1.0, v2
5609; VI-SDAG-NEXT:    v_mul_f32_e32 v3, 1.0, v3
5610; VI-SDAG-NEXT:    v_min_f32_e32 v5, v4, v2
5611; VI-SDAG-NEXT:    v_max_f32_e32 v2, v4, v2
5612; VI-SDAG-NEXT:    v_min_f32_e32 v2, v2, v3
5613; VI-SDAG-NEXT:    v_max_f32_e32 v2, v5, v2
5614; VI-SDAG-NEXT:    flat_store_dword v[0:1], v5
5615; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5616; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
5617; VI-SDAG-NEXT:    s_endpgm
5618;
5619; VI-GISEL-LABEL: v_test_safe_med3_f32_pat0_multi_use0:
5620; VI-GISEL:       ; %bb.0:
5621; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
5622; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
5623; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
5624; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
5625; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
5626; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
5627; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
5628; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
5629; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
5630; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
5631; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
5632; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
5633; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
5634; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
5635; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
5636; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
5637; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5638; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
5639; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5640; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
5641; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5642; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
5643; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
5644; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
5645; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
5646; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v7
5647; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
5648; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
5649; VI-GISEL-NEXT:    v_min_f32_e32 v5, v4, v2
5650; VI-GISEL-NEXT:    v_max_f32_e32 v2, v4, v2
5651; VI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
5652; VI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
5653; VI-GISEL-NEXT:    flat_store_dword v[0:1], v5
5654; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5655; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
5656; VI-GISEL-NEXT:    s_endpgm
5657;
5658; GFX9-LABEL: v_test_safe_med3_f32_pat0_multi_use0:
5659; GFX9:       ; %bb.0:
5660; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
5661; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
5662; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
5663; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
5664; GFX9-NEXT:    s_waitcnt vmcnt(0)
5665; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
5666; GFX9-NEXT:    s_waitcnt vmcnt(0)
5667; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
5668; GFX9-NEXT:    s_waitcnt vmcnt(0)
5669; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
5670; GFX9-NEXT:    v_max_f32_e32 v2, v2, v2
5671; GFX9-NEXT:    v_max_f32_e32 v3, v3, v3
5672; GFX9-NEXT:    v_min_f32_e32 v4, v1, v2
5673; GFX9-NEXT:    v_max_f32_e32 v1, v1, v2
5674; GFX9-NEXT:    global_store_dword v[0:1], v4, off
5675; GFX9-NEXT:    s_waitcnt vmcnt(0)
5676; GFX9-NEXT:    v_min_f32_e32 v1, v1, v3
5677; GFX9-NEXT:    v_max_f32_e32 v1, v4, v1
5678; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
5679; GFX9-NEXT:    s_endpgm
5680;
5681; GFX11-LABEL: v_test_safe_med3_f32_pat0_multi_use0:
5682; GFX11:       ; %bb.0:
5683; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
5684; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
5685; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
5686; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
5687; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
5688; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
5689; GFX11-NEXT:    s_waitcnt vmcnt(0)
5690; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
5691; GFX11-NEXT:    s_waitcnt vmcnt(0)
5692; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
5693; GFX11-NEXT:    s_waitcnt vmcnt(0)
5694; GFX11-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2
5695; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
5696; GFX11-NEXT:    v_min_f32_e32 v4, v1, v2
5697; GFX11-NEXT:    v_dual_max_f32 v1, v1, v2 :: v_dual_max_f32 v2, v3, v3
5698; GFX11-NEXT:    v_minmax_f32 v1, v1, v2, v4
5699; GFX11-NEXT:    global_store_b32 v[0:1], v4, off dlc
5700; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
5701; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
5702; GFX11-NEXT:    s_endpgm
5703  %tid = call i32 @llvm.amdgcn.workitem.id.x()
5704  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
5705  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
5706  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
5707  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
5708  %a = load volatile float, ptr addrspace(1) %gep0
5709  %b = load volatile float, ptr addrspace(1) %gep1
5710  %c = load volatile float, ptr addrspace(1) %gep2
5711  %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
5712  store volatile float %tmp0, ptr addrspace(1) undef
5713  %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
5714  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
5715  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
5716  store float %med3, ptr addrspace(1) %outgep
5717  ret void
5718}
5719
5720define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use1(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
5721; SI-SDAG-LABEL: v_test_safe_med3_f32_pat0_multi_use1:
5722; SI-SDAG:       ; %bb.0:
5723; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
5724; SI-SDAG-NEXT:    s_mov_b32 s10, 0
5725; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
5726; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
5727; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
5728; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
5729; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
5730; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
5731; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
5732; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
5733; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
5734; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
5735; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
5736; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5737; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
5738; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5739; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
5740; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5741; SI-SDAG-NEXT:    s_mov_b32 s10, -1
5742; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 1.0, v2
5743; SI-SDAG-NEXT:    v_mul_f32_e32 v3, 1.0, v3
5744; SI-SDAG-NEXT:    v_mul_f32_e32 v4, 1.0, v4
5745; SI-SDAG-NEXT:    v_min_f32_e32 v5, v2, v3
5746; SI-SDAG-NEXT:    v_max_f32_e32 v2, v2, v3
5747; SI-SDAG-NEXT:    buffer_store_dword v2, off, s[8:11], 0
5748; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
5749; SI-SDAG-NEXT:    v_min_f32_e32 v2, v2, v4
5750; SI-SDAG-NEXT:    v_max_f32_e32 v2, v5, v2
5751; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
5752; SI-SDAG-NEXT:    s_endpgm
5753;
5754; SI-GISEL-LABEL: v_test_safe_med3_f32_pat0_multi_use1:
5755; SI-GISEL:       ; %bb.0:
5756; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
5757; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
5758; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
5759; SI-GISEL-NEXT:    s_mov_b32 s10, 0
5760; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
5761; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
5762; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
5763; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
5764; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5765; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
5766; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
5767; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5768; SI-GISEL-NEXT:    s_mov_b32 s2, -1
5769; SI-GISEL-NEXT:    s_mov_b32 s3, s11
5770; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
5771; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
5772; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5773; SI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
5774; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
5775; SI-GISEL-NEXT:    v_min_f32_e32 v5, v2, v3
5776; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v3
5777; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v4
5778; SI-GISEL-NEXT:    buffer_store_dword v2, off, s[0:3], 0
5779; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
5780; SI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
5781; SI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
5782; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
5783; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
5784; SI-GISEL-NEXT:    s_endpgm
5785;
5786; VI-SDAG-LABEL: v_test_safe_med3_f32_pat0_multi_use1:
5787; VI-SDAG:       ; %bb.0:
5788; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
5789; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
5790; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
5791; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
5792; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
5793; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
5794; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
5795; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
5796; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
5797; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
5798; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
5799; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
5800; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
5801; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5802; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
5803; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5804; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
5805; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5806; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
5807; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
5808; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
5809; VI-SDAG-NEXT:    v_mul_f32_e32 v4, 1.0, v7
5810; VI-SDAG-NEXT:    v_mul_f32_e32 v2, 1.0, v2
5811; VI-SDAG-NEXT:    v_mul_f32_e32 v3, 1.0, v3
5812; VI-SDAG-NEXT:    v_min_f32_e32 v5, v4, v2
5813; VI-SDAG-NEXT:    v_max_f32_e32 v2, v4, v2
5814; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
5815; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5816; VI-SDAG-NEXT:    v_min_f32_e32 v2, v2, v3
5817; VI-SDAG-NEXT:    v_max_f32_e32 v2, v5, v2
5818; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
5819; VI-SDAG-NEXT:    s_endpgm
5820;
5821; VI-GISEL-LABEL: v_test_safe_med3_f32_pat0_multi_use1:
5822; VI-GISEL:       ; %bb.0:
5823; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
5824; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
5825; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
5826; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
5827; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
5828; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
5829; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
5830; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
5831; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
5832; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
5833; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
5834; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
5835; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
5836; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
5837; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
5838; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
5839; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5840; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
5841; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5842; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
5843; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5844; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
5845; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
5846; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
5847; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
5848; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v7
5849; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
5850; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
5851; VI-GISEL-NEXT:    v_min_f32_e32 v5, v4, v2
5852; VI-GISEL-NEXT:    v_max_f32_e32 v2, v4, v2
5853; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
5854; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5855; VI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
5856; VI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
5857; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
5858; VI-GISEL-NEXT:    s_endpgm
5859;
5860; GFX9-LABEL: v_test_safe_med3_f32_pat0_multi_use1:
5861; GFX9:       ; %bb.0:
5862; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
5863; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
5864; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
5865; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
5866; GFX9-NEXT:    s_waitcnt vmcnt(0)
5867; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
5868; GFX9-NEXT:    s_waitcnt vmcnt(0)
5869; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
5870; GFX9-NEXT:    s_waitcnt vmcnt(0)
5871; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
5872; GFX9-NEXT:    v_max_f32_e32 v2, v2, v2
5873; GFX9-NEXT:    v_max_f32_e32 v3, v3, v3
5874; GFX9-NEXT:    v_min_f32_e32 v4, v1, v2
5875; GFX9-NEXT:    v_max_f32_e32 v1, v1, v2
5876; GFX9-NEXT:    global_store_dword v[0:1], v1, off
5877; GFX9-NEXT:    s_waitcnt vmcnt(0)
5878; GFX9-NEXT:    v_min_f32_e32 v1, v1, v3
5879; GFX9-NEXT:    v_max_f32_e32 v1, v4, v1
5880; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
5881; GFX9-NEXT:    s_endpgm
5882;
5883; GFX11-SDAG-LABEL: v_test_safe_med3_f32_pat0_multi_use1:
5884; GFX11-SDAG:       ; %bb.0:
5885; GFX11-SDAG-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
5886; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
5887; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
5888; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
5889; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
5890; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
5891; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
5892; GFX11-SDAG-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
5893; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
5894; GFX11-SDAG-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
5895; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
5896; GFX11-SDAG-NEXT:    v_max_f32_e32 v1, v1, v1
5897; GFX11-SDAG-NEXT:    v_dual_max_f32 v3, v3, v3 :: v_dual_max_f32 v2, v2, v2
5898; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5899; GFX11-SDAG-NEXT:    v_max_f32_e32 v4, v1, v2
5900; GFX11-SDAG-NEXT:    v_min_f32_e32 v3, v4, v3
5901; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
5902; GFX11-SDAG-NEXT:    v_minmax_f32 v1, v1, v2, v3
5903; GFX11-SDAG-NEXT:    global_store_b32 v[0:1], v4, off dlc
5904; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
5905; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
5906; GFX11-SDAG-NEXT:    s_endpgm
5907;
5908; GFX11-GISEL-LABEL: v_test_safe_med3_f32_pat0_multi_use1:
5909; GFX11-GISEL:       ; %bb.0:
5910; GFX11-GISEL-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
5911; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
5912; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
5913; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
5914; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
5915; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
5916; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
5917; GFX11-GISEL-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
5918; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
5919; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
5920; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
5921; GFX11-GISEL-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2
5922; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
5923; GFX11-GISEL-NEXT:    v_min_f32_e32 v4, v1, v2
5924; GFX11-GISEL-NEXT:    v_dual_max_f32 v1, v1, v2 :: v_dual_max_f32 v2, v3, v3
5925; GFX11-GISEL-NEXT:    v_minmax_f32 v2, v1, v2, v4
5926; GFX11-GISEL-NEXT:    global_store_b32 v[0:1], v1, off dlc
5927; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
5928; GFX11-GISEL-NEXT:    global_store_b32 v0, v2, s[0:1]
5929; GFX11-GISEL-NEXT:    s_endpgm
5930  %tid = call i32 @llvm.amdgcn.workitem.id.x()
5931  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
5932  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
5933  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
5934  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
5935  %a = load volatile float, ptr addrspace(1) %gep0
5936  %b = load volatile float, ptr addrspace(1) %gep1
5937  %c = load volatile float, ptr addrspace(1) %gep2
5938  %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
5939  %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
5940  store volatile float %tmp1, ptr addrspace(1) undef
5941  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
5942  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
5943  store float %med3, ptr addrspace(1) %outgep
5944  ret void
5945}
5946
5947define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use2(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
5948; SI-SDAG-LABEL: v_test_safe_med3_f32_pat0_multi_use2:
5949; SI-SDAG:       ; %bb.0:
5950; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
5951; SI-SDAG-NEXT:    s_mov_b32 s10, 0
5952; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
5953; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
5954; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
5955; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
5956; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
5957; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
5958; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
5959; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
5960; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
5961; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
5962; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
5963; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5964; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
5965; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5966; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
5967; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
5968; SI-SDAG-NEXT:    s_mov_b32 s10, -1
5969; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 1.0, v2
5970; SI-SDAG-NEXT:    v_mul_f32_e32 v3, 1.0, v3
5971; SI-SDAG-NEXT:    v_mul_f32_e32 v4, 1.0, v4
5972; SI-SDAG-NEXT:    v_min_f32_e32 v5, v2, v3
5973; SI-SDAG-NEXT:    v_max_f32_e32 v2, v2, v3
5974; SI-SDAG-NEXT:    v_min_f32_e32 v2, v2, v4
5975; SI-SDAG-NEXT:    buffer_store_dword v2, off, s[8:11], 0
5976; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
5977; SI-SDAG-NEXT:    v_max_f32_e32 v2, v5, v2
5978; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
5979; SI-SDAG-NEXT:    s_endpgm
5980;
5981; SI-GISEL-LABEL: v_test_safe_med3_f32_pat0_multi_use2:
5982; SI-GISEL:       ; %bb.0:
5983; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
5984; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
5985; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
5986; SI-GISEL-NEXT:    s_mov_b32 s10, 0
5987; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
5988; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
5989; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
5990; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
5991; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5992; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
5993; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
5994; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
5995; SI-GISEL-NEXT:    s_mov_b32 s2, -1
5996; SI-GISEL-NEXT:    s_mov_b32 s3, s11
5997; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
5998; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
5999; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6000; SI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
6001; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
6002; SI-GISEL-NEXT:    v_min_f32_e32 v5, v2, v3
6003; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v3
6004; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v4
6005; SI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
6006; SI-GISEL-NEXT:    buffer_store_dword v2, off, s[0:3], 0
6007; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
6008; SI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
6009; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
6010; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
6011; SI-GISEL-NEXT:    s_endpgm
6012;
6013; VI-SDAG-LABEL: v_test_safe_med3_f32_pat0_multi_use2:
6014; VI-SDAG:       ; %bb.0:
6015; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
6016; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
6017; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
6018; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
6019; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
6020; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6021; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
6022; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
6023; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
6024; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
6025; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
6026; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
6027; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
6028; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6029; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
6030; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6031; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
6032; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6033; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
6034; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
6035; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6036; VI-SDAG-NEXT:    v_mul_f32_e32 v4, 1.0, v7
6037; VI-SDAG-NEXT:    v_mul_f32_e32 v2, 1.0, v2
6038; VI-SDAG-NEXT:    v_mul_f32_e32 v3, 1.0, v3
6039; VI-SDAG-NEXT:    v_min_f32_e32 v5, v4, v2
6040; VI-SDAG-NEXT:    v_max_f32_e32 v2, v4, v2
6041; VI-SDAG-NEXT:    v_min_f32_e32 v2, v2, v3
6042; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
6043; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6044; VI-SDAG-NEXT:    v_max_f32_e32 v2, v5, v2
6045; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
6046; VI-SDAG-NEXT:    s_endpgm
6047;
6048; VI-GISEL-LABEL: v_test_safe_med3_f32_pat0_multi_use2:
6049; VI-GISEL:       ; %bb.0:
6050; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
6051; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
6052; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
6053; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
6054; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
6055; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
6056; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6057; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
6058; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
6059; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
6060; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
6061; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
6062; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
6063; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
6064; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
6065; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
6066; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6067; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
6068; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6069; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
6070; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6071; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
6072; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
6073; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
6074; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6075; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v7
6076; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
6077; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
6078; VI-GISEL-NEXT:    v_min_f32_e32 v5, v4, v2
6079; VI-GISEL-NEXT:    v_max_f32_e32 v2, v4, v2
6080; VI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
6081; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
6082; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6083; VI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
6084; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
6085; VI-GISEL-NEXT:    s_endpgm
6086;
6087; GFX9-LABEL: v_test_safe_med3_f32_pat0_multi_use2:
6088; GFX9:       ; %bb.0:
6089; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
6090; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
6091; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
6092; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
6093; GFX9-NEXT:    s_waitcnt vmcnt(0)
6094; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
6095; GFX9-NEXT:    s_waitcnt vmcnt(0)
6096; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
6097; GFX9-NEXT:    s_waitcnt vmcnt(0)
6098; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
6099; GFX9-NEXT:    v_max_f32_e32 v2, v2, v2
6100; GFX9-NEXT:    v_max_f32_e32 v3, v3, v3
6101; GFX9-NEXT:    v_min_f32_e32 v4, v1, v2
6102; GFX9-NEXT:    v_max_f32_e32 v1, v1, v2
6103; GFX9-NEXT:    v_min_f32_e32 v1, v1, v3
6104; GFX9-NEXT:    global_store_dword v[0:1], v1, off
6105; GFX9-NEXT:    s_waitcnt vmcnt(0)
6106; GFX9-NEXT:    v_max_f32_e32 v1, v4, v1
6107; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
6108; GFX9-NEXT:    s_endpgm
6109;
6110; GFX11-LABEL: v_test_safe_med3_f32_pat0_multi_use2:
6111; GFX11:       ; %bb.0:
6112; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
6113; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
6114; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
6115; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
6116; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
6117; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
6118; GFX11-NEXT:    s_waitcnt vmcnt(0)
6119; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
6120; GFX11-NEXT:    s_waitcnt vmcnt(0)
6121; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
6122; GFX11-NEXT:    s_waitcnt vmcnt(0)
6123; GFX11-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2
6124; GFX11-NEXT:    v_max_f32_e32 v3, v3, v3
6125; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6126; GFX11-NEXT:    v_maxmin_f32 v3, v1, v2, v3
6127; GFX11-NEXT:    v_minmax_f32 v1, v1, v2, v3
6128; GFX11-NEXT:    global_store_b32 v[0:1], v3, off dlc
6129; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
6130; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
6131; GFX11-NEXT:    s_endpgm
6132  %tid = call i32 @llvm.amdgcn.workitem.id.x()
6133  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
6134  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
6135  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
6136  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
6137  %a = load volatile float, ptr addrspace(1) %gep0
6138  %b = load volatile float, ptr addrspace(1) %gep1
6139  %c = load volatile float, ptr addrspace(1) %gep2
6140  %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
6141  %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
6142  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
6143  store volatile float %tmp2, ptr addrspace(1) undef
6144  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
6145  store float %med3, ptr addrspace(1) %outgep
6146  ret void
6147}
6148
6149define amdgpu_kernel void @v_test_safe_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
6150; SI-SDAG-LABEL: v_test_safe_med3_f32_pat0:
6151; SI-SDAG:       ; %bb.0:
6152; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
6153; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
6154; SI-SDAG-NEXT:    s_mov_b32 s10, 0
6155; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
6156; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
6157; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
6158; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
6159; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
6160; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
6161; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
6162; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6163; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
6164; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6165; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[6:7]
6166; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
6167; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[12:15], 0 addr64 glc
6168; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6169; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 1.0, v2
6170; SI-SDAG-NEXT:    v_mul_f32_e32 v3, 1.0, v3
6171; SI-SDAG-NEXT:    v_min_f32_e32 v5, v2, v3
6172; SI-SDAG-NEXT:    v_max_f32_e32 v2, v2, v3
6173; SI-SDAG-NEXT:    v_mul_f32_e32 v3, 1.0, v4
6174; SI-SDAG-NEXT:    v_min_f32_e32 v2, v2, v3
6175; SI-SDAG-NEXT:    v_max_f32_e32 v2, v5, v2
6176; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
6177; SI-SDAG-NEXT:    s_endpgm
6178;
6179; SI-GISEL-LABEL: v_test_safe_med3_f32_pat0:
6180; SI-GISEL:       ; %bb.0:
6181; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
6182; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
6183; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
6184; SI-GISEL-NEXT:    s_mov_b32 s10, 0
6185; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
6186; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
6187; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
6188; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
6189; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6190; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
6191; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
6192; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6193; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
6194; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
6195; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6196; SI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
6197; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
6198; SI-GISEL-NEXT:    v_min_f32_e32 v5, v2, v3
6199; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v3
6200; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v4
6201; SI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
6202; SI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
6203; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
6204; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
6205; SI-GISEL-NEXT:    s_endpgm
6206;
6207; VI-SDAG-LABEL: v_test_safe_med3_f32_pat0:
6208; VI-SDAG:       ; %bb.0:
6209; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
6210; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v4, 2, v0
6211; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
6212; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
6213; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v4
6214; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6215; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
6216; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v4
6217; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
6218; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
6219; VI-SDAG-NEXT:    flat_load_dword v6, v[0:1] glc
6220; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6221; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
6222; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6223; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s6, v4
6224; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v5, vcc
6225; VI-SDAG-NEXT:    flat_load_dword v3, v[0:1] glc
6226; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6227; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v4
6228; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
6229; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6230; VI-SDAG-NEXT:    v_mul_f32_e32 v4, 1.0, v6
6231; VI-SDAG-NEXT:    v_mul_f32_e32 v2, 1.0, v2
6232; VI-SDAG-NEXT:    v_min_f32_e32 v5, v4, v2
6233; VI-SDAG-NEXT:    v_max_f32_e32 v2, v4, v2
6234; VI-SDAG-NEXT:    v_mul_f32_e32 v3, 1.0, v3
6235; VI-SDAG-NEXT:    v_min_f32_e32 v2, v2, v3
6236; VI-SDAG-NEXT:    v_max_f32_e32 v2, v5, v2
6237; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
6238; VI-SDAG-NEXT:    s_endpgm
6239;
6240; VI-GISEL-LABEL: v_test_safe_med3_f32_pat0:
6241; VI-GISEL:       ; %bb.0:
6242; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
6243; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
6244; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
6245; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
6246; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
6247; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
6248; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6249; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
6250; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
6251; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
6252; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
6253; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
6254; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
6255; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
6256; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6257; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
6258; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6259; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v4, v6
6260; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v5, vcc
6261; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1] glc
6262; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6263; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
6264; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
6265; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
6266; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6267; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v7
6268; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
6269; VI-GISEL-NEXT:    v_min_f32_e32 v5, v4, v2
6270; VI-GISEL-NEXT:    v_max_f32_e32 v2, v4, v2
6271; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
6272; VI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
6273; VI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
6274; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
6275; VI-GISEL-NEXT:    s_endpgm
6276;
6277; GFX9-LABEL: v_test_safe_med3_f32_pat0:
6278; GFX9:       ; %bb.0:
6279; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
6280; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
6281; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
6282; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
6283; GFX9-NEXT:    s_waitcnt vmcnt(0)
6284; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
6285; GFX9-NEXT:    s_waitcnt vmcnt(0)
6286; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
6287; GFX9-NEXT:    s_waitcnt vmcnt(0)
6288; GFX9-NEXT:    v_max_f32_e32 v1, v1, v1
6289; GFX9-NEXT:    v_max_f32_e32 v2, v2, v2
6290; GFX9-NEXT:    v_min_f32_e32 v4, v1, v2
6291; GFX9-NEXT:    v_max_f32_e32 v1, v1, v2
6292; GFX9-NEXT:    v_max_f32_e32 v2, v3, v3
6293; GFX9-NEXT:    v_min_f32_e32 v1, v1, v2
6294; GFX9-NEXT:    v_max_f32_e32 v1, v4, v1
6295; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
6296; GFX9-NEXT:    s_endpgm
6297;
6298; GFX11-SDAG-LABEL: v_test_safe_med3_f32_pat0:
6299; GFX11-SDAG:       ; %bb.0:
6300; GFX11-SDAG-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
6301; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
6302; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
6303; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
6304; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
6305; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
6306; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
6307; GFX11-SDAG-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
6308; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
6309; GFX11-SDAG-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
6310; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
6311; GFX11-SDAG-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2
6312; GFX11-SDAG-NEXT:    v_max_f32_e32 v3, v3, v3
6313; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6314; GFX11-SDAG-NEXT:    v_maxmin_f32 v3, v1, v2, v3
6315; GFX11-SDAG-NEXT:    v_minmax_f32 v1, v1, v2, v3
6316; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
6317; GFX11-SDAG-NEXT:    s_endpgm
6318;
6319; GFX11-GISEL-LABEL: v_test_safe_med3_f32_pat0:
6320; GFX11-GISEL:       ; %bb.0:
6321; GFX11-GISEL-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
6322; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
6323; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
6324; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
6325; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
6326; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
6327; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
6328; GFX11-GISEL-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
6329; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
6330; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
6331; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
6332; GFX11-GISEL-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2
6333; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
6334; GFX11-GISEL-NEXT:    v_min_f32_e32 v4, v1, v2
6335; GFX11-GISEL-NEXT:    v_dual_max_f32 v1, v1, v2 :: v_dual_max_f32 v2, v3, v3
6336; GFX11-GISEL-NEXT:    v_minmax_f32 v1, v1, v2, v4
6337; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
6338; GFX11-GISEL-NEXT:    s_endpgm
6339  %tid = call i32 @llvm.amdgcn.workitem.id.x()
6340  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
6341  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
6342  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
6343  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
6344  %a = load volatile float, ptr addrspace(1) %gep0
6345  %b = load volatile float, ptr addrspace(1) %gep1
6346  %c = load volatile float, ptr addrspace(1) %gep2
6347  %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
6348  %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
6349  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
6350  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
6351  store float %med3, ptr addrspace(1) %outgep
6352  ret void
6353}
6354
6355define amdgpu_kernel void @v_nnan_inputs_missing0_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
6356; SI-SDAG-LABEL: v_nnan_inputs_missing0_med3_f32_pat0:
6357; SI-SDAG:       ; %bb.0:
6358; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
6359; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
6360; SI-SDAG-NEXT:    s_mov_b32 s10, 0
6361; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
6362; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
6363; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
6364; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
6365; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
6366; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
6367; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
6368; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
6369; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
6370; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6371; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
6372; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6373; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
6374; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6375; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
6376; SI-SDAG-NEXT:    v_add_f32_e32 v2, 1.0, v2
6377; SI-SDAG-NEXT:    v_add_f32_e32 v3, 2.0, v3
6378; SI-SDAG-NEXT:    v_add_f32_e32 v4, 4.0, v4
6379; SI-SDAG-NEXT:    v_med3_f32 v2, v2, v3, v4
6380; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
6381; SI-SDAG-NEXT:    s_endpgm
6382;
6383; SI-GISEL-LABEL: v_nnan_inputs_missing0_med3_f32_pat0:
6384; SI-GISEL:       ; %bb.0:
6385; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
6386; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
6387; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
6388; SI-GISEL-NEXT:    s_mov_b32 s10, 0
6389; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
6390; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
6391; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
6392; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
6393; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6394; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
6395; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
6396; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6397; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
6398; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
6399; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6400; SI-GISEL-NEXT:    v_add_f32_e32 v2, 1.0, v2
6401; SI-GISEL-NEXT:    v_add_f32_e32 v3, 2.0, v3
6402; SI-GISEL-NEXT:    v_add_f32_e32 v4, 4.0, v4
6403; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
6404; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
6405; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
6406; SI-GISEL-NEXT:    s_endpgm
6407;
6408; VI-SDAG-LABEL: v_nnan_inputs_missing0_med3_f32_pat0:
6409; VI-SDAG:       ; %bb.0:
6410; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
6411; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
6412; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
6413; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
6414; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
6415; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6416; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
6417; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
6418; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
6419; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
6420; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
6421; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
6422; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
6423; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6424; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
6425; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6426; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
6427; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6428; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
6429; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
6430; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6431; VI-SDAG-NEXT:    v_add_f32_e32 v4, 1.0, v7
6432; VI-SDAG-NEXT:    v_add_f32_e32 v2, 2.0, v2
6433; VI-SDAG-NEXT:    v_add_f32_e32 v3, 4.0, v3
6434; VI-SDAG-NEXT:    v_med3_f32 v2, v4, v2, v3
6435; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
6436; VI-SDAG-NEXT:    s_endpgm
6437;
6438; VI-GISEL-LABEL: v_nnan_inputs_missing0_med3_f32_pat0:
6439; VI-GISEL:       ; %bb.0:
6440; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
6441; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
6442; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
6443; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
6444; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
6445; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
6446; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6447; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
6448; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
6449; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
6450; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
6451; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
6452; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
6453; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
6454; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
6455; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
6456; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6457; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
6458; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6459; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
6460; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6461; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
6462; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
6463; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
6464; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6465; VI-GISEL-NEXT:    v_add_f32_e32 v4, 1.0, v7
6466; VI-GISEL-NEXT:    v_add_f32_e32 v2, 2.0, v2
6467; VI-GISEL-NEXT:    v_add_f32_e32 v3, 4.0, v3
6468; VI-GISEL-NEXT:    v_med3_f32 v2, v4, v2, v3
6469; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
6470; VI-GISEL-NEXT:    s_endpgm
6471;
6472; GFX9-LABEL: v_nnan_inputs_missing0_med3_f32_pat0:
6473; GFX9:       ; %bb.0:
6474; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
6475; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
6476; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
6477; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
6478; GFX9-NEXT:    s_waitcnt vmcnt(0)
6479; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
6480; GFX9-NEXT:    s_waitcnt vmcnt(0)
6481; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
6482; GFX9-NEXT:    s_waitcnt vmcnt(0)
6483; GFX9-NEXT:    v_add_f32_e32 v1, 1.0, v1
6484; GFX9-NEXT:    v_add_f32_e32 v2, 2.0, v2
6485; GFX9-NEXT:    v_add_f32_e32 v3, 4.0, v3
6486; GFX9-NEXT:    v_med3_f32 v1, v1, v2, v3
6487; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
6488; GFX9-NEXT:    s_endpgm
6489;
6490; GFX11-LABEL: v_nnan_inputs_missing0_med3_f32_pat0:
6491; GFX11:       ; %bb.0:
6492; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
6493; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
6494; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
6495; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
6496; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
6497; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
6498; GFX11-NEXT:    s_waitcnt vmcnt(0)
6499; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
6500; GFX11-NEXT:    s_waitcnt vmcnt(0)
6501; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
6502; GFX11-NEXT:    s_waitcnt vmcnt(0)
6503; GFX11-NEXT:    v_dual_add_f32 v1, 1.0, v1 :: v_dual_add_f32 v2, 2.0, v2
6504; GFX11-NEXT:    v_add_f32_e32 v3, 4.0, v3
6505; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
6506; GFX11-NEXT:    v_med3_f32 v1, v1, v2, v3
6507; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
6508; GFX11-NEXT:    s_endpgm
6509  %tid = call i32 @llvm.amdgcn.workitem.id.x()
6510  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
6511  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
6512  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
6513  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
6514  %a = load volatile float, ptr addrspace(1) %gep0
6515  %b = load volatile float, ptr addrspace(1) %gep1
6516  %c = load volatile float, ptr addrspace(1) %gep2
6517
6518  %a.nnan = fadd float %a, 1.0
6519  %b.nnan = fadd nnan float %b, 2.0
6520  %c.nnan = fadd nnan float %c, 4.0
6521
6522  %tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan)
6523  %tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan)
6524  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan)
6525  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
6526  store float %med3, ptr addrspace(1) %outgep
6527  ret void
6528}
6529
6530define amdgpu_kernel void @v_nnan_inputs_missing1_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
6531; SI-SDAG-LABEL: v_nnan_inputs_missing1_med3_f32_pat0:
6532; SI-SDAG:       ; %bb.0:
6533; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
6534; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
6535; SI-SDAG-NEXT:    s_mov_b32 s10, 0
6536; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
6537; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
6538; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
6539; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
6540; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
6541; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
6542; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
6543; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
6544; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
6545; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6546; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
6547; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6548; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
6549; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6550; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
6551; SI-SDAG-NEXT:    v_add_f32_e32 v2, 1.0, v2
6552; SI-SDAG-NEXT:    v_add_f32_e32 v3, 2.0, v3
6553; SI-SDAG-NEXT:    v_add_f32_e32 v4, 4.0, v4
6554; SI-SDAG-NEXT:    v_med3_f32 v2, v2, v3, v4
6555; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
6556; SI-SDAG-NEXT:    s_endpgm
6557;
6558; SI-GISEL-LABEL: v_nnan_inputs_missing1_med3_f32_pat0:
6559; SI-GISEL:       ; %bb.0:
6560; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
6561; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
6562; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
6563; SI-GISEL-NEXT:    s_mov_b32 s10, 0
6564; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
6565; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
6566; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
6567; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
6568; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6569; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
6570; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
6571; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6572; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
6573; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
6574; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6575; SI-GISEL-NEXT:    v_add_f32_e32 v2, 1.0, v2
6576; SI-GISEL-NEXT:    v_add_f32_e32 v3, 2.0, v3
6577; SI-GISEL-NEXT:    v_add_f32_e32 v4, 4.0, v4
6578; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
6579; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
6580; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
6581; SI-GISEL-NEXT:    s_endpgm
6582;
6583; VI-SDAG-LABEL: v_nnan_inputs_missing1_med3_f32_pat0:
6584; VI-SDAG:       ; %bb.0:
6585; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
6586; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
6587; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
6588; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
6589; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
6590; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6591; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
6592; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
6593; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
6594; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
6595; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
6596; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
6597; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
6598; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6599; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
6600; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6601; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
6602; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6603; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
6604; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
6605; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6606; VI-SDAG-NEXT:    v_add_f32_e32 v4, 1.0, v7
6607; VI-SDAG-NEXT:    v_add_f32_e32 v2, 2.0, v2
6608; VI-SDAG-NEXT:    v_add_f32_e32 v3, 4.0, v3
6609; VI-SDAG-NEXT:    v_med3_f32 v2, v4, v2, v3
6610; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
6611; VI-SDAG-NEXT:    s_endpgm
6612;
6613; VI-GISEL-LABEL: v_nnan_inputs_missing1_med3_f32_pat0:
6614; VI-GISEL:       ; %bb.0:
6615; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
6616; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
6617; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
6618; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
6619; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
6620; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
6621; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6622; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
6623; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
6624; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
6625; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
6626; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
6627; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
6628; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
6629; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
6630; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
6631; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6632; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
6633; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6634; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
6635; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6636; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
6637; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
6638; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
6639; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6640; VI-GISEL-NEXT:    v_add_f32_e32 v4, 1.0, v7
6641; VI-GISEL-NEXT:    v_add_f32_e32 v2, 2.0, v2
6642; VI-GISEL-NEXT:    v_add_f32_e32 v3, 4.0, v3
6643; VI-GISEL-NEXT:    v_med3_f32 v2, v4, v2, v3
6644; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
6645; VI-GISEL-NEXT:    s_endpgm
6646;
6647; GFX9-LABEL: v_nnan_inputs_missing1_med3_f32_pat0:
6648; GFX9:       ; %bb.0:
6649; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
6650; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
6651; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
6652; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
6653; GFX9-NEXT:    s_waitcnt vmcnt(0)
6654; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
6655; GFX9-NEXT:    s_waitcnt vmcnt(0)
6656; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
6657; GFX9-NEXT:    s_waitcnt vmcnt(0)
6658; GFX9-NEXT:    v_add_f32_e32 v1, 1.0, v1
6659; GFX9-NEXT:    v_add_f32_e32 v2, 2.0, v2
6660; GFX9-NEXT:    v_add_f32_e32 v3, 4.0, v3
6661; GFX9-NEXT:    v_med3_f32 v1, v1, v2, v3
6662; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
6663; GFX9-NEXT:    s_endpgm
6664;
6665; GFX11-LABEL: v_nnan_inputs_missing1_med3_f32_pat0:
6666; GFX11:       ; %bb.0:
6667; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
6668; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
6669; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
6670; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
6671; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
6672; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
6673; GFX11-NEXT:    s_waitcnt vmcnt(0)
6674; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
6675; GFX11-NEXT:    s_waitcnt vmcnt(0)
6676; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
6677; GFX11-NEXT:    s_waitcnt vmcnt(0)
6678; GFX11-NEXT:    v_dual_add_f32 v1, 1.0, v1 :: v_dual_add_f32 v2, 2.0, v2
6679; GFX11-NEXT:    v_add_f32_e32 v3, 4.0, v3
6680; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
6681; GFX11-NEXT:    v_med3_f32 v1, v1, v2, v3
6682; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
6683; GFX11-NEXT:    s_endpgm
6684  %tid = call i32 @llvm.amdgcn.workitem.id.x()
6685  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
6686  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
6687  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
6688  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
6689  %a = load volatile float, ptr addrspace(1) %gep0
6690  %b = load volatile float, ptr addrspace(1) %gep1
6691  %c = load volatile float, ptr addrspace(1) %gep2
6692
6693  %a.nnan = fadd nnan float %a, 1.0
6694  %b.nnan = fadd float %b, 2.0
6695  %c.nnan = fadd nnan float %c, 4.0
6696
6697  %tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan)
6698  %tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan)
6699  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan)
6700  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
6701  store float %med3, ptr addrspace(1) %outgep
6702  ret void
6703}
6704
6705define amdgpu_kernel void @v_nnan_inputs_missing2_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
6706; SI-SDAG-LABEL: v_nnan_inputs_missing2_med3_f32_pat0:
6707; SI-SDAG:       ; %bb.0:
6708; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
6709; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
6710; SI-SDAG-NEXT:    s_mov_b32 s10, 0
6711; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
6712; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
6713; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
6714; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
6715; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
6716; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
6717; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
6718; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
6719; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
6720; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6721; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
6722; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6723; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
6724; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6725; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
6726; SI-SDAG-NEXT:    v_add_f32_e32 v2, 1.0, v2
6727; SI-SDAG-NEXT:    v_add_f32_e32 v3, 2.0, v3
6728; SI-SDAG-NEXT:    v_add_f32_e32 v4, 4.0, v4
6729; SI-SDAG-NEXT:    v_med3_f32 v2, v2, v3, v4
6730; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
6731; SI-SDAG-NEXT:    s_endpgm
6732;
6733; SI-GISEL-LABEL: v_nnan_inputs_missing2_med3_f32_pat0:
6734; SI-GISEL:       ; %bb.0:
6735; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
6736; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
6737; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
6738; SI-GISEL-NEXT:    s_mov_b32 s10, 0
6739; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
6740; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
6741; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
6742; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
6743; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6744; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
6745; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
6746; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6747; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
6748; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
6749; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6750; SI-GISEL-NEXT:    v_add_f32_e32 v2, 1.0, v2
6751; SI-GISEL-NEXT:    v_add_f32_e32 v3, 2.0, v3
6752; SI-GISEL-NEXT:    v_add_f32_e32 v4, 4.0, v4
6753; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
6754; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
6755; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
6756; SI-GISEL-NEXT:    s_endpgm
6757;
6758; VI-SDAG-LABEL: v_nnan_inputs_missing2_med3_f32_pat0:
6759; VI-SDAG:       ; %bb.0:
6760; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
6761; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
6762; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
6763; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
6764; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
6765; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6766; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
6767; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
6768; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
6769; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
6770; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
6771; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
6772; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
6773; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6774; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
6775; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6776; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
6777; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6778; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
6779; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
6780; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6781; VI-SDAG-NEXT:    v_add_f32_e32 v4, 1.0, v7
6782; VI-SDAG-NEXT:    v_add_f32_e32 v2, 2.0, v2
6783; VI-SDAG-NEXT:    v_add_f32_e32 v3, 4.0, v3
6784; VI-SDAG-NEXT:    v_med3_f32 v2, v4, v2, v3
6785; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
6786; VI-SDAG-NEXT:    s_endpgm
6787;
6788; VI-GISEL-LABEL: v_nnan_inputs_missing2_med3_f32_pat0:
6789; VI-GISEL:       ; %bb.0:
6790; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
6791; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
6792; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
6793; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
6794; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
6795; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
6796; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6797; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
6798; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
6799; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
6800; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
6801; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
6802; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
6803; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
6804; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
6805; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
6806; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6807; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
6808; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6809; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
6810; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6811; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
6812; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
6813; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
6814; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6815; VI-GISEL-NEXT:    v_add_f32_e32 v4, 1.0, v7
6816; VI-GISEL-NEXT:    v_add_f32_e32 v2, 2.0, v2
6817; VI-GISEL-NEXT:    v_add_f32_e32 v3, 4.0, v3
6818; VI-GISEL-NEXT:    v_med3_f32 v2, v4, v2, v3
6819; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
6820; VI-GISEL-NEXT:    s_endpgm
6821;
6822; GFX9-LABEL: v_nnan_inputs_missing2_med3_f32_pat0:
6823; GFX9:       ; %bb.0:
6824; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
6825; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
6826; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
6827; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
6828; GFX9-NEXT:    s_waitcnt vmcnt(0)
6829; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
6830; GFX9-NEXT:    s_waitcnt vmcnt(0)
6831; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
6832; GFX9-NEXT:    s_waitcnt vmcnt(0)
6833; GFX9-NEXT:    v_add_f32_e32 v1, 1.0, v1
6834; GFX9-NEXT:    v_add_f32_e32 v2, 2.0, v2
6835; GFX9-NEXT:    v_add_f32_e32 v3, 4.0, v3
6836; GFX9-NEXT:    v_med3_f32 v1, v1, v2, v3
6837; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
6838; GFX9-NEXT:    s_endpgm
6839;
6840; GFX11-LABEL: v_nnan_inputs_missing2_med3_f32_pat0:
6841; GFX11:       ; %bb.0:
6842; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
6843; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
6844; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
6845; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
6846; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
6847; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
6848; GFX11-NEXT:    s_waitcnt vmcnt(0)
6849; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
6850; GFX11-NEXT:    s_waitcnt vmcnt(0)
6851; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
6852; GFX11-NEXT:    s_waitcnt vmcnt(0)
6853; GFX11-NEXT:    v_dual_add_f32 v1, 1.0, v1 :: v_dual_add_f32 v2, 2.0, v2
6854; GFX11-NEXT:    v_add_f32_e32 v3, 4.0, v3
6855; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
6856; GFX11-NEXT:    v_med3_f32 v1, v1, v2, v3
6857; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
6858; GFX11-NEXT:    s_endpgm
6859  %tid = call i32 @llvm.amdgcn.workitem.id.x()
6860  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
6861  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
6862  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
6863  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
6864  %a = load volatile float, ptr addrspace(1) %gep0
6865  %b = load volatile float, ptr addrspace(1) %gep1
6866  %c = load volatile float, ptr addrspace(1) %gep2
6867
6868  %a.nnan = fadd nnan float %a, 1.0
6869  %b.nnan = fadd nnan float %b, 2.0
6870  %c.nnan = fadd float %c, 4.0
6871
6872  %tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan)
6873  %tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan)
6874  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan)
6875  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
6876  store float %med3, ptr addrspace(1) %outgep
6877  ret void
6878}
6879
6880define amdgpu_kernel void @v_test_nnan_on_call_med3_f32_pat0_srcmod0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
6881; SI-SDAG-LABEL: v_test_nnan_on_call_med3_f32_pat0_srcmod0:
6882; SI-SDAG:       ; %bb.0:
6883; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
6884; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
6885; SI-SDAG-NEXT:    s_mov_b32 s10, 0
6886; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
6887; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
6888; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
6889; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
6890; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
6891; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
6892; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
6893; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
6894; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
6895; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6896; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
6897; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6898; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
6899; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6900; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
6901; SI-SDAG-NEXT:    v_med3_f32 v2, -v2, v3, v4
6902; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
6903; SI-SDAG-NEXT:    s_endpgm
6904;
6905; SI-GISEL-LABEL: v_test_nnan_on_call_med3_f32_pat0_srcmod0:
6906; SI-GISEL:       ; %bb.0:
6907; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
6908; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
6909; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
6910; SI-GISEL-NEXT:    s_mov_b32 s10, 0
6911; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
6912; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
6913; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
6914; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
6915; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6916; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
6917; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
6918; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6919; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
6920; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
6921; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6922; SI-GISEL-NEXT:    v_mul_f32_e32 v2, -1.0, v2
6923; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
6924; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
6925; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
6926; SI-GISEL-NEXT:    s_endpgm
6927;
6928; VI-SDAG-LABEL: v_test_nnan_on_call_med3_f32_pat0_srcmod0:
6929; VI-SDAG:       ; %bb.0:
6930; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
6931; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
6932; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
6933; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
6934; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
6935; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6936; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
6937; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
6938; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
6939; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
6940; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
6941; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
6942; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
6943; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6944; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
6945; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6946; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
6947; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
6948; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
6949; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
6950; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6951; VI-SDAG-NEXT:    v_med3_f32 v2, -v7, v2, v3
6952; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
6953; VI-SDAG-NEXT:    s_endpgm
6954;
6955; VI-GISEL-LABEL: v_test_nnan_on_call_med3_f32_pat0_srcmod0:
6956; VI-GISEL:       ; %bb.0:
6957; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
6958; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
6959; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
6960; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
6961; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
6962; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
6963; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6964; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
6965; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
6966; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
6967; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
6968; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
6969; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
6970; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
6971; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
6972; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
6973; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6974; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
6975; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6976; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
6977; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
6978; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
6979; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
6980; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
6981; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6982; VI-GISEL-NEXT:    v_mul_f32_e32 v4, -1.0, v7
6983; VI-GISEL-NEXT:    v_med3_f32 v2, v4, v2, v3
6984; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
6985; VI-GISEL-NEXT:    s_endpgm
6986;
6987; GFX9-SDAG-LABEL: v_test_nnan_on_call_med3_f32_pat0_srcmod0:
6988; GFX9-SDAG:       ; %bb.0:
6989; GFX9-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
6990; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
6991; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
6992; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[10:11] glc
6993; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
6994; GFX9-SDAG-NEXT:    global_load_dword v2, v0, s[12:13] glc
6995; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
6996; GFX9-SDAG-NEXT:    global_load_dword v3, v0, s[14:15] glc
6997; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
6998; GFX9-SDAG-NEXT:    v_med3_f32 v1, -v1, v2, v3
6999; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[8:9]
7000; GFX9-SDAG-NEXT:    s_endpgm
7001;
7002; GFX9-GISEL-LABEL: v_test_nnan_on_call_med3_f32_pat0_srcmod0:
7003; GFX9-GISEL:       ; %bb.0:
7004; GFX9-GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
7005; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
7006; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
7007; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[10:11] glc
7008; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
7009; GFX9-GISEL-NEXT:    global_load_dword v2, v0, s[12:13] glc
7010; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
7011; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
7012; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
7013; GFX9-GISEL-NEXT:    v_max_f32_e64 v1, -v1, -v1
7014; GFX9-GISEL-NEXT:    v_med3_f32 v1, v1, v2, v3
7015; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
7016; GFX9-GISEL-NEXT:    s_endpgm
7017;
7018; GFX11-SDAG-LABEL: v_test_nnan_on_call_med3_f32_pat0_srcmod0:
7019; GFX11-SDAG:       ; %bb.0:
7020; GFX11-SDAG-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
7021; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
7022; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
7023; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
7024; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
7025; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
7026; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
7027; GFX11-SDAG-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
7028; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
7029; GFX11-SDAG-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
7030; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
7031; GFX11-SDAG-NEXT:    v_med3_f32 v1, -v1, v2, v3
7032; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
7033; GFX11-SDAG-NEXT:    s_endpgm
7034;
7035; GFX11-GISEL-LABEL: v_test_nnan_on_call_med3_f32_pat0_srcmod0:
7036; GFX11-GISEL:       ; %bb.0:
7037; GFX11-GISEL-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
7038; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
7039; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
7040; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
7041; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
7042; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
7043; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
7044; GFX11-GISEL-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
7045; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
7046; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
7047; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
7048; GFX11-GISEL-NEXT:    v_max_f32_e64 v1, -v1, -v1
7049; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
7050; GFX11-GISEL-NEXT:    v_med3_f32 v1, v1, v2, v3
7051; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
7052; GFX11-GISEL-NEXT:    s_endpgm
7053  %tid = call i32 @llvm.amdgcn.workitem.id.x()
7054  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
7055  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
7056  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
7057  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
7058  %a = load volatile float, ptr addrspace(1) %gep0
7059  %b = load volatile float, ptr addrspace(1) %gep1
7060  %c = load volatile float, ptr addrspace(1) %gep2
7061  %a.fneg = fsub float -0.0, %a
7062  %tmp0 = call nnan float @llvm.minnum.f32(float %a.fneg, float %b)
7063  %tmp1 = call nnan float @llvm.maxnum.f32(float %a.fneg, float %b)
7064  %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c)
7065  %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
7066  store float %med3, ptr addrspace(1) %outgep
7067  ret void
7068}
7069
7070define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
7071; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch:
7072; SI-SDAG:       ; %bb.0:
7073; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
7074; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
7075; SI-SDAG-NEXT:    s_mov_b32 s10, 0
7076; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
7077; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
7078; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
7079; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
7080; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
7081; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
7082; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
7083; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
7084; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
7085; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
7086; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[6:7]
7087; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[12:15], 0 addr64 glc
7088; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
7089; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
7090; SI-SDAG-NEXT:    v_min_f32_e64 v5, -v2, v3
7091; SI-SDAG-NEXT:    v_max_f32_e32 v2, v2, v3
7092; SI-SDAG-NEXT:    v_min_f32_e32 v2, v2, v4
7093; SI-SDAG-NEXT:    v_max_f32_e32 v2, v5, v2
7094; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
7095; SI-SDAG-NEXT:    s_endpgm
7096;
7097; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch:
7098; SI-GISEL:       ; %bb.0:
7099; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
7100; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
7101; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
7102; SI-GISEL-NEXT:    s_mov_b32 s10, 0
7103; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
7104; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
7105; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
7106; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
7107; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
7108; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
7109; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
7110; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
7111; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
7112; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
7113; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
7114; SI-GISEL-NEXT:    v_mul_f32_e32 v5, -1.0, v2
7115; SI-GISEL-NEXT:    v_min_f32_e32 v5, v5, v3
7116; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v3
7117; SI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v4
7118; SI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
7119; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
7120; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
7121; SI-GISEL-NEXT:    s_endpgm
7122;
7123; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch:
7124; VI-SDAG:       ; %bb.0:
7125; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
7126; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v4, 2, v0
7127; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
7128; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
7129; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v4
7130; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
7131; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
7132; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v4
7133; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
7134; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
7135; VI-SDAG-NEXT:    flat_load_dword v6, v[0:1] glc
7136; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
7137; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
7138; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
7139; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s6, v4
7140; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v5, vcc
7141; VI-SDAG-NEXT:    flat_load_dword v3, v[0:1] glc
7142; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
7143; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v4
7144; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
7145; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
7146; VI-SDAG-NEXT:    v_min_f32_e64 v4, -v6, v2
7147; VI-SDAG-NEXT:    v_max_f32_e32 v2, v6, v2
7148; VI-SDAG-NEXT:    v_min_f32_e32 v2, v2, v3
7149; VI-SDAG-NEXT:    v_max_f32_e32 v2, v4, v2
7150; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
7151; VI-SDAG-NEXT:    s_endpgm
7152;
7153; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch:
7154; VI-GISEL:       ; %bb.0:
7155; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
7156; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
7157; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
7158; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
7159; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
7160; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
7161; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
7162; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
7163; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
7164; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
7165; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
7166; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
7167; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
7168; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
7169; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
7170; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
7171; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
7172; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v4, v6
7173; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v5, vcc
7174; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1] glc
7175; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
7176; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
7177; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
7178; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
7179; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
7180; VI-GISEL-NEXT:    v_mul_f32_e32 v4, -1.0, v7
7181; VI-GISEL-NEXT:    v_max_f32_e32 v5, v7, v2
7182; VI-GISEL-NEXT:    v_min_f32_e32 v2, v4, v2
7183; VI-GISEL-NEXT:    v_min_f32_e32 v3, v5, v3
7184; VI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v3
7185; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
7186; VI-GISEL-NEXT:    s_endpgm
7187;
7188; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch:
7189; GFX9-SDAG:       ; %bb.0:
7190; GFX9-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
7191; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
7192; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
7193; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[10:11] glc
7194; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
7195; GFX9-SDAG-NEXT:    global_load_dword v2, v0, s[12:13] glc
7196; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
7197; GFX9-SDAG-NEXT:    global_load_dword v3, v0, s[14:15] glc
7198; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
7199; GFX9-SDAG-NEXT:    v_min_f32_e64 v4, -v1, v2
7200; GFX9-SDAG-NEXT:    v_max_f32_e32 v1, v1, v2
7201; GFX9-SDAG-NEXT:    v_min_f32_e32 v1, v1, v3
7202; GFX9-SDAG-NEXT:    v_max_f32_e32 v1, v4, v1
7203; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[8:9]
7204; GFX9-SDAG-NEXT:    s_endpgm
7205;
7206; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch:
7207; GFX9-GISEL:       ; %bb.0:
7208; GFX9-GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
7209; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
7210; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
7211; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[10:11] glc
7212; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
7213; GFX9-GISEL-NEXT:    global_load_dword v2, v0, s[12:13] glc
7214; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
7215; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
7216; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
7217; GFX9-GISEL-NEXT:    v_max_f32_e64 v4, -v1, -v1
7218; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v2
7219; GFX9-GISEL-NEXT:    v_min_f32_e32 v2, v4, v2
7220; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v1, v3
7221; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v2, v1
7222; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
7223; GFX9-GISEL-NEXT:    s_endpgm
7224;
7225; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch:
7226; GFX11-SDAG:       ; %bb.0:
7227; GFX11-SDAG-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
7228; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
7229; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
7230; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
7231; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
7232; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
7233; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
7234; GFX11-SDAG-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
7235; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
7236; GFX11-SDAG-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
7237; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
7238; GFX11-SDAG-NEXT:    v_maxmin_f32 v3, v1, v2, v3
7239; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
7240; GFX11-SDAG-NEXT:    v_minmax_f32 v1, -v1, v2, v3
7241; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
7242; GFX11-SDAG-NEXT:    s_endpgm
7243;
7244; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch:
7245; GFX11-GISEL:       ; %bb.0:
7246; GFX11-GISEL-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
7247; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
7248; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
7249; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
7250; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
7251; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
7252; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
7253; GFX11-GISEL-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
7254; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
7255; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
7256; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
7257; GFX11-GISEL-NEXT:    v_max_f32_e64 v4, -v1, -v1
7258; GFX11-GISEL-NEXT:    v_max_f32_e32 v1, v1, v2
7259; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
7260; GFX11-GISEL-NEXT:    v_min_f32_e32 v4, v4, v2
7261; GFX11-GISEL-NEXT:    v_minmax_f32 v1, v1, v3, v4
7262; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
7263; GFX11-GISEL-NEXT:    s_endpgm
7264  %tid = call i32 @llvm.amdgcn.workitem.id.x()
7265  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
7266  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
7267  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
7268  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
7269  %a = load volatile float, ptr addrspace(1) %gep0
7270  %b = load volatile float, ptr addrspace(1) %gep1
7271  %c = load volatile float, ptr addrspace(1) %gep2
7272  %a.fneg = fsub float -0.0, %a
7273  %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b)
7274  %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
7275  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
7276  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
7277  store float %med3, ptr addrspace(1) %outgep
7278  ret void
7279}
7280
7281; A simple min and max is not sufficient
7282define amdgpu_kernel void @v_test_global_nnans_min_max_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
7283; SI-SDAG-LABEL: v_test_global_nnans_min_max_f32:
7284; SI-SDAG:       ; %bb.0:
7285; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
7286; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
7287; SI-SDAG-NEXT:    s_mov_b32 s10, 0
7288; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
7289; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
7290; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
7291; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
7292; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
7293; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
7294; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
7295; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
7296; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
7297; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
7298; SI-SDAG-NEXT:    buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc
7299; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
7300; SI-SDAG-NEXT:    buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc
7301; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
7302; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
7303; SI-SDAG-NEXT:    v_max_f32_e32 v2, v2, v3
7304; SI-SDAG-NEXT:    v_min_f32_e32 v2, v2, v4
7305; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
7306; SI-SDAG-NEXT:    s_endpgm
7307;
7308; SI-GISEL-LABEL: v_test_global_nnans_min_max_f32:
7309; SI-GISEL:       ; %bb.0:
7310; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
7311; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
7312; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
7313; SI-GISEL-NEXT:    s_mov_b32 s10, 0
7314; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
7315; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
7316; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
7317; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
7318; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
7319; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
7320; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc
7321; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
7322; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
7323; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
7324; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
7325; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v3
7326; SI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v4
7327; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
7328; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
7329; SI-GISEL-NEXT:    s_endpgm
7330;
7331; VI-SDAG-LABEL: v_test_global_nnans_min_max_f32:
7332; VI-SDAG:       ; %bb.0:
7333; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
7334; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
7335; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
7336; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
7337; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
7338; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
7339; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
7340; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
7341; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
7342; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
7343; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
7344; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
7345; VI-SDAG-NEXT:    flat_load_dword v7, v[0:1] glc
7346; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
7347; VI-SDAG-NEXT:    flat_load_dword v2, v[2:3] glc
7348; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
7349; VI-SDAG-NEXT:    flat_load_dword v3, v[4:5] glc
7350; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
7351; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
7352; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
7353; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
7354; VI-SDAG-NEXT:    v_max_f32_e32 v2, v7, v2
7355; VI-SDAG-NEXT:    v_min_f32_e32 v2, v2, v3
7356; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
7357; VI-SDAG-NEXT:    s_endpgm
7358;
7359; VI-GISEL-LABEL: v_test_global_nnans_min_max_f32:
7360; VI-GISEL:       ; %bb.0:
7361; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
7362; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
7363; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
7364; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
7365; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
7366; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
7367; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
7368; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
7369; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
7370; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
7371; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
7372; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
7373; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
7374; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
7375; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
7376; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
7377; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
7378; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
7379; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
7380; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
7381; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
7382; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
7383; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
7384; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
7385; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
7386; VI-GISEL-NEXT:    v_max_f32_e32 v2, v7, v2
7387; VI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
7388; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
7389; VI-GISEL-NEXT:    s_endpgm
7390;
7391; GFX9-LABEL: v_test_global_nnans_min_max_f32:
7392; GFX9:       ; %bb.0:
7393; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
7394; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
7395; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
7396; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
7397; GFX9-NEXT:    s_waitcnt vmcnt(0)
7398; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
7399; GFX9-NEXT:    s_waitcnt vmcnt(0)
7400; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
7401; GFX9-NEXT:    s_waitcnt vmcnt(0)
7402; GFX9-NEXT:    v_max_f32_e32 v1, v1, v2
7403; GFX9-NEXT:    v_min_f32_e32 v1, v1, v3
7404; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
7405; GFX9-NEXT:    s_endpgm
7406;
7407; GFX11-LABEL: v_test_global_nnans_min_max_f32:
7408; GFX11:       ; %bb.0:
7409; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
7410; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
7411; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
7412; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
7413; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
7414; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
7415; GFX11-NEXT:    s_waitcnt vmcnt(0)
7416; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
7417; GFX11-NEXT:    s_waitcnt vmcnt(0)
7418; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
7419; GFX11-NEXT:    s_waitcnt vmcnt(0)
7420; GFX11-NEXT:    v_maxmin_f32 v1, v1, v2, v3
7421; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
7422; GFX11-NEXT:    s_endpgm
7423  %tid = call i32 @llvm.amdgcn.workitem.id.x()
7424  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
7425  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
7426  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
7427  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
7428  %a = load volatile float, ptr addrspace(1) %gep0
7429  %b = load volatile float, ptr addrspace(1) %gep1
7430  %c = load volatile float, ptr addrspace(1) %gep2
7431  %max = call float @llvm.maxnum.f32(float %a, float %b)
7432  %minmax = call float @llvm.minnum.f32(float %max, float %c)
7433  store float %minmax, ptr addrspace(1) %outgep
7434  ret void
7435}
7436
7437define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
7438; SI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f16:
7439; SI-SDAG:       ; %bb.0:
7440; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
7441; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
7442; SI-SDAG-NEXT:    s_mov_b32 s6, 0
7443; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
7444; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
7445; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
7446; SI-SDAG-NEXT:    s_mov_b64 s[4:5], s[2:3]
7447; SI-SDAG-NEXT:    buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64
7448; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[6:7]
7449; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
7450; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v2
7451; SI-SDAG-NEXT:    v_add_f32_e32 v2, 1.0, v2
7452; SI-SDAG-NEXT:    v_med3_f32 v2, v2, 2.0, 4.0
7453; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v2
7454; SI-SDAG-NEXT:    buffer_store_short v2, v[0:1], s[0:3], 0 addr64
7455; SI-SDAG-NEXT:    s_endpgm
7456;
7457; SI-GISEL-LABEL: v_test_nnan_input_fmed3_r_i_i_f16:
7458; SI-GISEL:       ; %bb.0:
7459; SI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
7460; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
7461; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
7462; SI-GISEL-NEXT:    s_mov_b32 s6, 0
7463; SI-GISEL-NEXT:    s_mov_b32 s7, 0xf000
7464; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
7465; SI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
7466; SI-GISEL-NEXT:    buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64
7467; SI-GISEL-NEXT:    v_cvt_f32_f16_e32 v3, 1.0
7468; SI-GISEL-NEXT:    v_cvt_f32_f16_e32 v4, 2.0
7469; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
7470; SI-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
7471; SI-GISEL-NEXT:    v_add_f32_e32 v2, v2, v3
7472; SI-GISEL-NEXT:    v_cvt_f16_f32_e32 v2, v2
7473; SI-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
7474; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v4
7475; SI-GISEL-NEXT:    v_cvt_f16_f32_e32 v2, v2
7476; SI-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
7477; SI-GISEL-NEXT:    v_cvt_f32_f16_e32 v3, 4.0
7478; SI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
7479; SI-GISEL-NEXT:    v_cvt_f16_f32_e32 v2, v2
7480; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[6:7]
7481; SI-GISEL-NEXT:    buffer_store_short v2, v[0:1], s[0:3], 0 addr64
7482; SI-GISEL-NEXT:    s_endpgm
7483;
7484; VI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f16:
7485; VI-SDAG:       ; %bb.0:
7486; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
7487; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 1, v0
7488; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
7489; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
7490; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
7491; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
7492; VI-SDAG-NEXT:    flat_load_ushort v3, v[0:1]
7493; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
7494; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
7495; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
7496; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
7497; VI-SDAG-NEXT:    v_add_f16_e32 v2, 1.0, v3
7498; VI-SDAG-NEXT:    v_max_f16_e32 v2, 2.0, v2
7499; VI-SDAG-NEXT:    v_min_f16_e32 v2, 4.0, v2
7500; VI-SDAG-NEXT:    flat_store_short v[0:1], v2
7501; VI-SDAG-NEXT:    s_endpgm
7502;
7503; VI-GISEL-LABEL: v_test_nnan_input_fmed3_r_i_i_f16:
7504; VI-GISEL:       ; %bb.0:
7505; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
7506; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 1, v0
7507; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
7508; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
7509; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
7510; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
7511; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
7512; VI-GISEL-NEXT:    flat_load_ushort v3, v[0:1]
7513; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
7514; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
7515; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
7516; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
7517; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
7518; VI-GISEL-NEXT:    v_add_f16_e32 v2, 1.0, v3
7519; VI-GISEL-NEXT:    v_max_f16_e32 v2, 2.0, v2
7520; VI-GISEL-NEXT:    v_min_f16_e32 v2, 4.0, v2
7521; VI-GISEL-NEXT:    flat_store_short v[0:1], v2
7522; VI-GISEL-NEXT:    s_endpgm
7523;
7524; GFX9-LABEL: v_test_nnan_input_fmed3_r_i_i_f16:
7525; GFX9:       ; %bb.0:
7526; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
7527; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
7528; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
7529; GFX9-NEXT:    global_load_ushort v1, v0, s[2:3]
7530; GFX9-NEXT:    s_waitcnt vmcnt(0)
7531; GFX9-NEXT:    v_add_f16_e32 v1, 1.0, v1
7532; GFX9-NEXT:    v_med3_f16 v1, v1, 2.0, 4.0
7533; GFX9-NEXT:    global_store_short v0, v1, s[0:1]
7534; GFX9-NEXT:    s_endpgm
7535;
7536; GFX11-SDAG-FAKE16-LABEL: v_test_nnan_input_fmed3_r_i_i_f16:
7537; GFX11-SDAG-FAKE16:       ; %bb.0:
7538; GFX11-SDAG-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
7539; GFX11-SDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
7540; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
7541; GFX11-SDAG-FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
7542; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
7543; GFX11-SDAG-FAKE16-NEXT:    global_load_u16 v1, v0, s[2:3]
7544; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0)
7545; GFX11-SDAG-FAKE16-NEXT:    v_add_f16_e32 v1, 1.0, v1
7546; GFX11-SDAG-FAKE16-NEXT:    v_med3_f16 v1, v1, 2.0, 4.0
7547; GFX11-SDAG-FAKE16-NEXT:    global_store_b16 v0, v1, s[0:1]
7548; GFX11-SDAG-FAKE16-NEXT:    s_endpgm
7549;
7550; GFX11-GISEL-FAKE16-LABEL: v_test_nnan_input_fmed3_r_i_i_f16:
7551; GFX11-GISEL-FAKE16:       ; %bb.0:
7552; GFX11-GISEL-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
7553; GFX11-GISEL-FAKE16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
7554; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
7555; GFX11-GISEL-FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
7556; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
7557; GFX11-GISEL-FAKE16-NEXT:    global_load_u16 v1, v0, s[2:3]
7558; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0)
7559; GFX11-GISEL-FAKE16-NEXT:    v_add_f16_e32 v1, 1.0, v1
7560; GFX11-GISEL-FAKE16-NEXT:    v_med3_f16 v1, v1, 2.0, 4.0
7561; GFX11-GISEL-FAKE16-NEXT:    global_store_b16 v0, v1, s[0:1]
7562; GFX11-GISEL-FAKE16-NEXT:    s_endpgm
7563;
7564; GFX11-SDAG-TRUE16-LABEL: v_test_nnan_input_fmed3_r_i_i_f16:
7565; GFX11-SDAG-TRUE16:       ; %bb.0:
7566; GFX11-SDAG-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
7567; GFX11-SDAG-TRUE16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
7568; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
7569; GFX11-SDAG-TRUE16-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
7570; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
7571; GFX11-SDAG-TRUE16-NEXT:    global_load_u16 v0, v1, s[2:3]
7572; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0)
7573; GFX11-SDAG-TRUE16-NEXT:    v_add_f16_e32 v0.l, 1.0, v0.l
7574; GFX11-SDAG-TRUE16-NEXT:    v_med3_f16 v0.l, v0.l, 2.0, 4.0
7575; GFX11-SDAG-TRUE16-NEXT:    global_store_b16 v1, v0, s[0:1]
7576; GFX11-SDAG-TRUE16-NEXT:    s_endpgm
7577;
7578; GFX11-GISEL-TRUE16-LABEL: v_test_nnan_input_fmed3_r_i_i_f16:
7579; GFX11-GISEL-TRUE16:       ; %bb.0:
7580; GFX11-GISEL-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
7581; GFX11-GISEL-TRUE16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
7582; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
7583; GFX11-GISEL-TRUE16-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
7584; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
7585; GFX11-GISEL-TRUE16-NEXT:    global_load_u16 v0, v1, s[2:3]
7586; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0)
7587; GFX11-GISEL-TRUE16-NEXT:    v_add_f16_e32 v0.l, 1.0, v0.l
7588; GFX11-GISEL-TRUE16-NEXT:    v_med3_f16 v0.l, v0.l, 2.0, 4.0
7589; GFX11-GISEL-TRUE16-NEXT:    global_store_b16 v1, v0, s[0:1]
7590; GFX11-GISEL-TRUE16-NEXT:    s_endpgm
7591  %tid = call i32 @llvm.amdgcn.workitem.id.x()
7592  %gep0 = getelementptr half, ptr addrspace(1) %aptr, i32 %tid
7593  %outgep = getelementptr half, ptr addrspace(1) %out, i32 %tid
7594  %a = load half, ptr addrspace(1) %gep0
7595  %a.add = fadd nnan half %a, 1.0
7596  %max = call half @llvm.maxnum.f16(half %a.add, half 2.0)
7597  %med = call half @llvm.minnum.f16(half %max, half 4.0)
7598
7599  store half %med, ptr addrspace(1) %outgep
7600  ret void
7601}
7602
7603define amdgpu_kernel void @v_nnan_inputs_med3_f16_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
7604; SI-SDAG-LABEL: v_nnan_inputs_med3_f16_pat0:
7605; SI-SDAG:       ; %bb.0:
7606; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
7607; SI-SDAG-NEXT:    s_mov_b32 s11, 0xf000
7608; SI-SDAG-NEXT:    s_mov_b32 s10, 0
7609; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
7610; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
7611; SI-SDAG-NEXT:    s_mov_b64 s[14:15], s[10:11]
7612; SI-SDAG-NEXT:    s_mov_b64 s[18:19], s[10:11]
7613; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
7614; SI-SDAG-NEXT:    s_mov_b64 s[8:9], s[2:3]
7615; SI-SDAG-NEXT:    s_mov_b64 s[12:13], s[4:5]
7616; SI-SDAG-NEXT:    s_mov_b64 s[16:17], s[6:7]
7617; SI-SDAG-NEXT:    buffer_load_ushort v2, v[0:1], s[8:11], 0 addr64 glc
7618; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
7619; SI-SDAG-NEXT:    buffer_load_ushort v3, v[0:1], s[12:15], 0 addr64 glc
7620; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
7621; SI-SDAG-NEXT:    buffer_load_ushort v4, v[0:1], s[16:19], 0 addr64 glc
7622; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
7623; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[10:11]
7624; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v2
7625; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v3, v3
7626; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v4, v4
7627; SI-SDAG-NEXT:    v_add_f32_e32 v2, 1.0, v2
7628; SI-SDAG-NEXT:    v_add_f32_e32 v3, 2.0, v3
7629; SI-SDAG-NEXT:    v_add_f32_e32 v4, 4.0, v4
7630; SI-SDAG-NEXT:    v_med3_f32 v2, v2, v3, v4
7631; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v2
7632; SI-SDAG-NEXT:    buffer_store_short v2, v[0:1], s[0:3], 0 addr64
7633; SI-SDAG-NEXT:    s_endpgm
7634;
7635; SI-GISEL-LABEL: v_nnan_inputs_med3_f16_pat0:
7636; SI-GISEL:       ; %bb.0:
7637; SI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
7638; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
7639; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
7640; SI-GISEL-NEXT:    s_mov_b32 s10, 0
7641; SI-GISEL-NEXT:    s_mov_b32 s11, 0xf000
7642; SI-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, 1.0
7643; SI-GISEL-NEXT:    v_cvt_f32_f16_e32 v3, 2.0
7644; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
7645; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[2:3]
7646; SI-GISEL-NEXT:    buffer_load_ushort v4, v[0:1], s[8:11], 0 addr64 glc
7647; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
7648; SI-GISEL-NEXT:    v_cvt_f32_f16_e32 v5, 4.0
7649; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
7650; SI-GISEL-NEXT:    buffer_load_ushort v6, v[0:1], s[8:11], 0 addr64 glc
7651; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
7652; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
7653; SI-GISEL-NEXT:    buffer_load_ushort v7, v[0:1], s[8:11], 0 addr64 glc
7654; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
7655; SI-GISEL-NEXT:    v_cvt_f32_f16_e32 v4, v4
7656; SI-GISEL-NEXT:    v_add_f32_e32 v2, v4, v2
7657; SI-GISEL-NEXT:    v_cvt_f32_f16_e32 v4, v6
7658; SI-GISEL-NEXT:    v_cvt_f16_f32_e32 v2, v2
7659; SI-GISEL-NEXT:    v_add_f32_e32 v3, v4, v3
7660; SI-GISEL-NEXT:    v_cvt_f32_f16_e32 v4, v7
7661; SI-GISEL-NEXT:    v_cvt_f16_f32_e32 v3, v3
7662; SI-GISEL-NEXT:    v_add_f32_e32 v4, v4, v5
7663; SI-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
7664; SI-GISEL-NEXT:    v_cvt_f16_f32_e32 v4, v4
7665; SI-GISEL-NEXT:    v_cvt_f32_f16_e32 v3, v3
7666; SI-GISEL-NEXT:    v_min_f32_e32 v5, v2, v3
7667; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v3
7668; SI-GISEL-NEXT:    v_cvt_f32_f16_e32 v3, v4
7669; SI-GISEL-NEXT:    v_cvt_f16_f32_e32 v4, v5
7670; SI-GISEL-NEXT:    v_cvt_f16_f32_e32 v2, v2
7671; SI-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
7672; SI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
7673; SI-GISEL-NEXT:    v_cvt_f16_f32_e32 v2, v2
7674; SI-GISEL-NEXT:    v_cvt_f32_f16_e32 v3, v4
7675; SI-GISEL-NEXT:    v_cvt_f32_f16_e32 v2, v2
7676; SI-GISEL-NEXT:    v_max_f32_e32 v2, v3, v2
7677; SI-GISEL-NEXT:    v_cvt_f16_f32_e32 v2, v2
7678; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
7679; SI-GISEL-NEXT:    buffer_store_short v2, v[0:1], s[0:3], 0 addr64
7680; SI-GISEL-NEXT:    s_endpgm
7681;
7682; VI-SDAG-LABEL: v_nnan_inputs_med3_f16_pat0:
7683; VI-SDAG:       ; %bb.0:
7684; VI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
7685; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 1, v0
7686; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
7687; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
7688; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
7689; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
7690; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
7691; VI-SDAG-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
7692; VI-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
7693; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
7694; VI-SDAG-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
7695; VI-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
7696; VI-SDAG-NEXT:    flat_load_ushort v7, v[0:1] glc
7697; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
7698; VI-SDAG-NEXT:    flat_load_ushort v2, v[2:3] glc
7699; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
7700; VI-SDAG-NEXT:    flat_load_ushort v3, v[4:5] glc
7701; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
7702; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
7703; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
7704; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
7705; VI-SDAG-NEXT:    v_add_f16_e32 v4, 1.0, v7
7706; VI-SDAG-NEXT:    v_add_f16_e32 v2, 2.0, v2
7707; VI-SDAG-NEXT:    v_add_f16_e32 v3, 4.0, v3
7708; VI-SDAG-NEXT:    v_min_f16_e32 v5, v4, v2
7709; VI-SDAG-NEXT:    v_max_f16_e32 v2, v4, v2
7710; VI-SDAG-NEXT:    v_min_f16_e32 v2, v2, v3
7711; VI-SDAG-NEXT:    v_max_f16_e32 v2, v5, v2
7712; VI-SDAG-NEXT:    flat_store_short v[0:1], v2
7713; VI-SDAG-NEXT:    s_endpgm
7714;
7715; VI-GISEL-LABEL: v_nnan_inputs_med3_f16_pat0:
7716; VI-GISEL:       ; %bb.0:
7717; VI-GISEL-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
7718; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 1, v0
7719; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
7720; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
7721; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
7722; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
7723; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
7724; VI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
7725; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s5
7726; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
7727; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
7728; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
7729; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
7730; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
7731; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
7732; VI-GISEL-NEXT:    flat_load_ushort v7, v[0:1] glc
7733; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
7734; VI-GISEL-NEXT:    flat_load_ushort v2, v[2:3] glc
7735; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
7736; VI-GISEL-NEXT:    flat_load_ushort v3, v[4:5] glc
7737; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
7738; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
7739; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
7740; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
7741; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
7742; VI-GISEL-NEXT:    v_add_f16_e32 v4, 1.0, v7
7743; VI-GISEL-NEXT:    v_add_f16_e32 v2, 2.0, v2
7744; VI-GISEL-NEXT:    v_add_f16_e32 v3, 4.0, v3
7745; VI-GISEL-NEXT:    v_min_f16_e32 v5, v4, v2
7746; VI-GISEL-NEXT:    v_max_f16_e32 v2, v4, v2
7747; VI-GISEL-NEXT:    v_min_f16_e32 v2, v2, v3
7748; VI-GISEL-NEXT:    v_max_f16_e32 v2, v5, v2
7749; VI-GISEL-NEXT:    flat_store_short v[0:1], v2
7750; VI-GISEL-NEXT:    s_endpgm
7751;
7752; GFX9-LABEL: v_nnan_inputs_med3_f16_pat0:
7753; GFX9:       ; %bb.0:
7754; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
7755; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
7756; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
7757; GFX9-NEXT:    global_load_ushort v1, v0, s[10:11] glc
7758; GFX9-NEXT:    s_waitcnt vmcnt(0)
7759; GFX9-NEXT:    global_load_ushort v2, v0, s[12:13] glc
7760; GFX9-NEXT:    s_waitcnt vmcnt(0)
7761; GFX9-NEXT:    global_load_ushort v3, v0, s[14:15] glc
7762; GFX9-NEXT:    s_waitcnt vmcnt(0)
7763; GFX9-NEXT:    v_add_f16_e32 v1, 1.0, v1
7764; GFX9-NEXT:    v_add_f16_e32 v2, 2.0, v2
7765; GFX9-NEXT:    v_add_f16_e32 v3, 4.0, v3
7766; GFX9-NEXT:    v_med3_f16 v1, v1, v2, v3
7767; GFX9-NEXT:    global_store_short v0, v1, s[8:9]
7768; GFX9-NEXT:    s_endpgm
7769;
7770; GFX11-SDAG-FAKE16-LABEL: v_nnan_inputs_med3_f16_pat0:
7771; GFX11-SDAG-FAKE16:       ; %bb.0:
7772; GFX11-SDAG-FAKE16-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
7773; GFX11-SDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
7774; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
7775; GFX11-SDAG-FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
7776; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
7777; GFX11-SDAG-FAKE16-NEXT:    global_load_u16 v1, v0, s[2:3] glc dlc
7778; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0)
7779; GFX11-SDAG-FAKE16-NEXT:    global_load_u16 v2, v0, s[4:5] glc dlc
7780; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0)
7781; GFX11-SDAG-FAKE16-NEXT:    global_load_u16 v3, v0, s[6:7] glc dlc
7782; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0)
7783; GFX11-SDAG-FAKE16-NEXT:    v_add_f16_e32 v1, 1.0, v1
7784; GFX11-SDAG-FAKE16-NEXT:    v_add_f16_e32 v2, 2.0, v2
7785; GFX11-SDAG-FAKE16-NEXT:    v_add_f16_e32 v3, 4.0, v3
7786; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
7787; GFX11-SDAG-FAKE16-NEXT:    v_med3_f16 v1, v1, v2, v3
7788; GFX11-SDAG-FAKE16-NEXT:    global_store_b16 v0, v1, s[0:1]
7789; GFX11-SDAG-FAKE16-NEXT:    s_endpgm
7790;
7791; GFX11-GISEL-FAKE16-LABEL: v_nnan_inputs_med3_f16_pat0:
7792; GFX11-GISEL-FAKE16:       ; %bb.0:
7793; GFX11-GISEL-FAKE16-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
7794; GFX11-GISEL-FAKE16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
7795; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
7796; GFX11-GISEL-FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
7797; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
7798; GFX11-GISEL-FAKE16-NEXT:    global_load_u16 v1, v0, s[2:3] glc dlc
7799; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0)
7800; GFX11-GISEL-FAKE16-NEXT:    global_load_u16 v2, v0, s[4:5] glc dlc
7801; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0)
7802; GFX11-GISEL-FAKE16-NEXT:    global_load_u16 v3, v0, s[6:7] glc dlc
7803; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0)
7804; GFX11-GISEL-FAKE16-NEXT:    v_add_f16_e32 v1, 1.0, v1
7805; GFX11-GISEL-FAKE16-NEXT:    v_add_f16_e32 v2, 2.0, v2
7806; GFX11-GISEL-FAKE16-NEXT:    v_add_f16_e32 v3, 4.0, v3
7807; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
7808; GFX11-GISEL-FAKE16-NEXT:    v_med3_f16 v1, v1, v2, v3
7809; GFX11-GISEL-FAKE16-NEXT:    global_store_b16 v0, v1, s[0:1]
7810; GFX11-GISEL-FAKE16-NEXT:    s_endpgm
7811;
7812; GFX11-SDAG-TRUE16-LABEL: v_nnan_inputs_med3_f16_pat0:
7813; GFX11-SDAG-TRUE16:       ; %bb.0:
7814; GFX11-SDAG-TRUE16-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
7815; GFX11-SDAG-TRUE16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
7816; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
7817; GFX11-SDAG-TRUE16-NEXT:    v_lshlrev_b32_e32 v2, 1, v0
7818; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
7819; GFX11-SDAG-TRUE16-NEXT:    global_load_u16 v0, v2, s[2:3] glc dlc
7820; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0)
7821; GFX11-SDAG-TRUE16-NEXT:    global_load_u16 v1, v2, s[4:5] glc dlc
7822; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0)
7823; GFX11-SDAG-TRUE16-NEXT:    global_load_u16 v3, v2, s[6:7] glc dlc
7824; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0)
7825; GFX11-SDAG-TRUE16-NEXT:    v_add_f16_e32 v0.l, 1.0, v0.l
7826; GFX11-SDAG-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
7827; GFX11-SDAG-TRUE16-NEXT:    v_mov_b16_e32 v1.l, v3.l
7828; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7829; GFX11-SDAG-TRUE16-NEXT:    v_add_f16_e32 v0.h, 2.0, v0.h
7830; GFX11-SDAG-TRUE16-NEXT:    v_add_f16_e32 v1.l, 4.0, v1.l
7831; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
7832; GFX11-SDAG-TRUE16-NEXT:    v_med3_f16 v0.l, v0.l, v0.h, v1.l
7833; GFX11-SDAG-TRUE16-NEXT:    global_store_b16 v2, v0, s[0:1]
7834; GFX11-SDAG-TRUE16-NEXT:    s_endpgm
7835;
7836; GFX11-GISEL-TRUE16-LABEL: v_nnan_inputs_med3_f16_pat0:
7837; GFX11-GISEL-TRUE16:       ; %bb.0:
7838; GFX11-GISEL-TRUE16-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
7839; GFX11-GISEL-TRUE16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
7840; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
7841; GFX11-GISEL-TRUE16-NEXT:    v_lshlrev_b32_e32 v2, 1, v0
7842; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
7843; GFX11-GISEL-TRUE16-NEXT:    global_load_u16 v0, v2, s[2:3] glc dlc
7844; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0)
7845; GFX11-GISEL-TRUE16-NEXT:    global_load_u16 v1, v2, s[4:5] glc dlc
7846; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0)
7847; GFX11-GISEL-TRUE16-NEXT:    global_load_u16 v3, v2, s[6:7] glc dlc
7848; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0)
7849; GFX11-GISEL-TRUE16-NEXT:    v_add_f16_e32 v0.l, 1.0, v0.l
7850; GFX11-GISEL-TRUE16-NEXT:    v_add_f16_e32 v0.h, 2.0, v1.l
7851; GFX11-GISEL-TRUE16-NEXT:    v_add_f16_e32 v1.l, 4.0, v3.l
7852; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
7853; GFX11-GISEL-TRUE16-NEXT:    v_med3_f16 v0.l, v0.l, v0.h, v1.l
7854; GFX11-GISEL-TRUE16-NEXT:    global_store_b16 v2, v0, s[0:1]
7855; GFX11-GISEL-TRUE16-NEXT:    s_endpgm
7856  %tid = call i32 @llvm.amdgcn.workitem.id.x()
7857  %gep0 = getelementptr half, ptr addrspace(1) %aptr, i32 %tid
7858  %gep1 = getelementptr half, ptr addrspace(1) %bptr, i32 %tid
7859  %gep2 = getelementptr half, ptr addrspace(1) %cptr, i32 %tid
7860  %outgep = getelementptr half, ptr addrspace(1) %out, i32 %tid
7861  %a = load volatile half, ptr addrspace(1) %gep0
7862  %b = load volatile half, ptr addrspace(1) %gep1
7863  %c = load volatile half, ptr addrspace(1) %gep2
7864
7865  %a.nnan = fadd nnan half %a, 1.0
7866  %b.nnan = fadd nnan half %b, 2.0
7867  %c.nnan = fadd nnan half %c, 4.0
7868
7869  %tmp0 = call half @llvm.minnum.f16(half %a.nnan, half %b.nnan)
7870  %tmp1 = call half @llvm.maxnum.f16(half %a.nnan, half %b.nnan)
7871  %tmp2 = call half @llvm.minnum.f16(half %tmp1, half %c.nnan)
7872  %med3 = call half @llvm.maxnum.f16(half %tmp0, half %tmp2)
7873  store half %med3, ptr addrspace(1) %outgep
7874  ret void
7875}
7876
7877define amdgpu_kernel void @two_non_inline_constant(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
7878; SI-SDAG-LABEL: two_non_inline_constant:
7879; SI-SDAG:       ; %bb.0:
7880; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
7881; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
7882; SI-SDAG-NEXT:    s_mov_b32 s6, 0
7883; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
7884; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
7885; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
7886; SI-SDAG-NEXT:    s_mov_b64 s[4:5], s[2:3]
7887; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
7888; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[6:7]
7889; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
7890; SI-SDAG-NEXT:    v_add_f32_e32 v2, 0.5, v2
7891; SI-SDAG-NEXT:    v_max_f32_e32 v2, 0x41000000, v2
7892; SI-SDAG-NEXT:    v_min_f32_e32 v2, 0x41800000, v2
7893; SI-SDAG-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
7894; SI-SDAG-NEXT:    s_endpgm
7895;
7896; SI-GISEL-LABEL: two_non_inline_constant:
7897; SI-GISEL:       ; %bb.0:
7898; SI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
7899; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
7900; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
7901; SI-GISEL-NEXT:    s_mov_b32 s6, 0
7902; SI-GISEL-NEXT:    s_mov_b32 s7, 0xf000
7903; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
7904; SI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
7905; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
7906; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
7907; SI-GISEL-NEXT:    v_add_f32_e32 v2, 0.5, v2
7908; SI-GISEL-NEXT:    v_max_f32_e32 v2, 0x41000000, v2
7909; SI-GISEL-NEXT:    v_min_f32_e32 v2, 0x41800000, v2
7910; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[6:7]
7911; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
7912; SI-GISEL-NEXT:    s_endpgm
7913;
7914; VI-SDAG-LABEL: two_non_inline_constant:
7915; VI-SDAG:       ; %bb.0:
7916; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
7917; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
7918; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
7919; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
7920; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
7921; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
7922; VI-SDAG-NEXT:    flat_load_dword v3, v[0:1]
7923; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
7924; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
7925; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
7926; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
7927; VI-SDAG-NEXT:    v_add_f32_e32 v2, 0.5, v3
7928; VI-SDAG-NEXT:    v_max_f32_e32 v2, 0x41000000, v2
7929; VI-SDAG-NEXT:    v_min_f32_e32 v2, 0x41800000, v2
7930; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
7931; VI-SDAG-NEXT:    s_endpgm
7932;
7933; VI-GISEL-LABEL: two_non_inline_constant:
7934; VI-GISEL:       ; %bb.0:
7935; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
7936; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
7937; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
7938; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
7939; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
7940; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
7941; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
7942; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1]
7943; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
7944; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
7945; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
7946; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
7947; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
7948; VI-GISEL-NEXT:    v_add_f32_e32 v2, 0.5, v3
7949; VI-GISEL-NEXT:    v_max_f32_e32 v2, 0x41000000, v2
7950; VI-GISEL-NEXT:    v_min_f32_e32 v2, 0x41800000, v2
7951; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
7952; VI-GISEL-NEXT:    s_endpgm
7953;
7954; GFX9-LABEL: two_non_inline_constant:
7955; GFX9:       ; %bb.0:
7956; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
7957; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
7958; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
7959; GFX9-NEXT:    global_load_dword v1, v0, s[2:3]
7960; GFX9-NEXT:    s_waitcnt vmcnt(0)
7961; GFX9-NEXT:    v_add_f32_e32 v1, 0.5, v1
7962; GFX9-NEXT:    v_max_f32_e32 v1, 0x41000000, v1
7963; GFX9-NEXT:    v_min_f32_e32 v1, 0x41800000, v1
7964; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
7965; GFX9-NEXT:    s_endpgm
7966;
7967; GFX11-SDAG-LABEL: two_non_inline_constant:
7968; GFX11-SDAG:       ; %bb.0:
7969; GFX11-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
7970; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
7971; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
7972; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
7973; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
7974; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3]
7975; GFX11-SDAG-NEXT:    s_mov_b32 s2, 0x41000000
7976; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
7977; GFX11-SDAG-NEXT:    v_add_f32_e32 v1, 0.5, v1
7978; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
7979; GFX11-SDAG-NEXT:    v_maxmin_f32 v1, v1, s2, 0x41800000
7980; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
7981; GFX11-SDAG-NEXT:    s_endpgm
7982;
7983; GFX11-GISEL-LABEL: two_non_inline_constant:
7984; GFX11-GISEL:       ; %bb.0:
7985; GFX11-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
7986; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
7987; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 0x41800000
7988; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_1)
7989; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
7990; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
7991; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3]
7992; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
7993; GFX11-GISEL-NEXT:    v_add_f32_e32 v1, 0.5, v1
7994; GFX11-GISEL-NEXT:    v_maxmin_f32 v1, v1, 0x41000000, v2
7995; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
7996; GFX11-GISEL-NEXT:    s_endpgm
7997  %tid = call i32 @llvm.amdgcn.workitem.id.x()
7998  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
7999  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
8000  %a = load float, ptr addrspace(1) %gep0
8001  %add = fadd nnan float %a, 0.5
8002  %max = call float @llvm.maxnum.f32(float %add, float 8.0)
8003  %med = call float @llvm.minnum.f32(float %max, float 16.0)
8004
8005  store float %med, ptr addrspace(1) %out.gep
8006  ret void
8007}
8008
8009; FIXME: Simple stores do not work as a multiple use because they are bitcasted to integer constants.
8010define amdgpu_kernel void @one_non_inline_constant(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
8011; SI-SDAG-LABEL: one_non_inline_constant:
8012; SI-SDAG:       ; %bb.0:
8013; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
8014; SI-SDAG-NEXT:    s_mov_b32 s6, 0
8015; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
8016; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
8017; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
8018; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
8019; SI-SDAG-NEXT:    s_mov_b64 s[4:5], s[2:3]
8020; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
8021; SI-SDAG-NEXT:    v_mov_b32_e32 v3, 0x41800000
8022; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[6:7]
8023; SI-SDAG-NEXT:    s_mov_b32 s6, -1
8024; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
8025; SI-SDAG-NEXT:    v_add_f32_e32 v4, 0.5, v2
8026; SI-SDAG-NEXT:    v_add_f32_e32 v2, 0x41800000, v2
8027; SI-SDAG-NEXT:    v_med3_f32 v3, v4, 1.0, v3
8028; SI-SDAG-NEXT:    buffer_store_dword v3, v[0:1], s[0:3], 0 addr64
8029; SI-SDAG-NEXT:    buffer_store_dword v2, off, s[4:7], 0
8030; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
8031; SI-SDAG-NEXT:    s_endpgm
8032;
8033; SI-GISEL-LABEL: one_non_inline_constant:
8034; SI-GISEL:       ; %bb.0:
8035; SI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
8036; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
8037; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
8038; SI-GISEL-NEXT:    s_mov_b32 s6, 0
8039; SI-GISEL-NEXT:    s_mov_b32 s7, 0xf000
8040; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
8041; SI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
8042; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
8043; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x41800000
8044; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[6:7]
8045; SI-GISEL-NEXT:    s_mov_b32 s6, -1
8046; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
8047; SI-GISEL-NEXT:    v_add_f32_e32 v4, 0.5, v2
8048; SI-GISEL-NEXT:    v_add_f32_e32 v2, 0x41800000, v2
8049; SI-GISEL-NEXT:    v_med3_f32 v3, v4, 1.0, v3
8050; SI-GISEL-NEXT:    buffer_store_dword v3, v[0:1], s[0:3], 0 addr64
8051; SI-GISEL-NEXT:    buffer_store_dword v2, off, s[4:7], 0
8052; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
8053; SI-GISEL-NEXT:    s_endpgm
8054;
8055; VI-SDAG-LABEL: one_non_inline_constant:
8056; VI-SDAG:       ; %bb.0:
8057; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
8058; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
8059; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0x41800000
8060; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
8061; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
8062; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
8063; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
8064; VI-SDAG-NEXT:    flat_load_dword v3, v[0:1]
8065; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
8066; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
8067; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
8068; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
8069; VI-SDAG-NEXT:    v_add_f32_e32 v2, 0.5, v3
8070; VI-SDAG-NEXT:    v_med3_f32 v2, v2, 1.0, v4
8071; VI-SDAG-NEXT:    v_add_f32_e32 v3, 0x41800000, v3
8072; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
8073; VI-SDAG-NEXT:    flat_store_dword v[0:1], v3
8074; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
8075; VI-SDAG-NEXT:    s_endpgm
8076;
8077; VI-GISEL-LABEL: one_non_inline_constant:
8078; VI-GISEL:       ; %bb.0:
8079; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
8080; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
8081; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0x41800000
8082; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
8083; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
8084; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
8085; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
8086; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
8087; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1]
8088; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
8089; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
8090; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
8091; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
8092; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
8093; VI-GISEL-NEXT:    v_add_f32_e32 v2, 0.5, v3
8094; VI-GISEL-NEXT:    v_med3_f32 v2, v2, 1.0, v4
8095; VI-GISEL-NEXT:    v_add_f32_e32 v3, 0x41800000, v3
8096; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
8097; VI-GISEL-NEXT:    flat_store_dword v[0:1], v3
8098; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
8099; VI-GISEL-NEXT:    s_endpgm
8100;
8101; GFX9-LABEL: one_non_inline_constant:
8102; GFX9:       ; %bb.0:
8103; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
8104; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
8105; GFX9-NEXT:    v_mov_b32_e32 v2, 0x41800000
8106; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
8107; GFX9-NEXT:    global_load_dword v1, v0, s[2:3]
8108; GFX9-NEXT:    s_waitcnt vmcnt(0)
8109; GFX9-NEXT:    v_add_f32_e32 v3, 0.5, v1
8110; GFX9-NEXT:    v_add_f32_e32 v1, 0x41800000, v1
8111; GFX9-NEXT:    v_med3_f32 v2, v3, 1.0, v2
8112; GFX9-NEXT:    global_store_dword v0, v2, s[0:1]
8113; GFX9-NEXT:    global_store_dword v[0:1], v1, off
8114; GFX9-NEXT:    s_waitcnt vmcnt(0)
8115; GFX9-NEXT:    s_endpgm
8116;
8117; GFX11-LABEL: one_non_inline_constant:
8118; GFX11:       ; %bb.0:
8119; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
8120; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
8121; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
8122; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
8123; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
8124; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3]
8125; GFX11-NEXT:    s_waitcnt vmcnt(0)
8126; GFX11-NEXT:    v_add_f32_e32 v2, 0.5, v1
8127; GFX11-NEXT:    v_add_f32_e32 v1, 0x41800000, v1
8128; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
8129; GFX11-NEXT:    v_med3_f32 v2, v2, 1.0, 0x41800000
8130; GFX11-NEXT:    global_store_b32 v0, v2, s[0:1]
8131; GFX11-NEXT:    global_store_b32 v[0:1], v1, off dlc
8132; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
8133; GFX11-NEXT:    s_endpgm
8134  %tid = call i32 @llvm.amdgcn.workitem.id.x()
8135  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
8136  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
8137  %a = load float, ptr addrspace(1) %gep0
8138  %add = fadd nnan float %a, 0.5
8139  %max = call float @llvm.maxnum.f32(float %add, float 1.0)
8140  %med = call float @llvm.minnum.f32(float %max, float 16.0)
8141
8142  store float %med, ptr addrspace(1) %out.gep
8143
8144  %extra.use = fadd float %a, 16.0
8145  store volatile float %extra.use, ptr addrspace(1) undef
8146  ret void
8147}
8148
8149define amdgpu_kernel void @two_non_inline_constant_multi_use(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
8150; SI-SDAG-LABEL: two_non_inline_constant_multi_use:
8151; SI-SDAG:       ; %bb.0:
8152; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
8153; SI-SDAG-NEXT:    s_mov_b32 s6, 0
8154; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
8155; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
8156; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0
8157; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
8158; SI-SDAG-NEXT:    s_mov_b64 s[4:5], s[2:3]
8159; SI-SDAG-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
8160; SI-SDAG-NEXT:    s_mov_b32 s4, 0x41000000
8161; SI-SDAG-NEXT:    v_mov_b32_e32 v3, 0x41800000
8162; SI-SDAG-NEXT:    s_mov_b64 s[2:3], s[6:7]
8163; SI-SDAG-NEXT:    s_mov_b32 s6, -1
8164; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
8165; SI-SDAG-NEXT:    v_add_f32_e32 v4, 0.5, v2
8166; SI-SDAG-NEXT:    v_add_f32_e32 v5, 0x41800000, v2
8167; SI-SDAG-NEXT:    v_add_f32_e32 v2, 0x41000000, v2
8168; SI-SDAG-NEXT:    v_med3_f32 v3, v4, s4, v3
8169; SI-SDAG-NEXT:    buffer_store_dword v3, v[0:1], s[0:3], 0 addr64
8170; SI-SDAG-NEXT:    buffer_store_dword v5, off, s[4:7], 0
8171; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
8172; SI-SDAG-NEXT:    buffer_store_dword v2, off, s[4:7], 0
8173; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
8174; SI-SDAG-NEXT:    s_endpgm
8175;
8176; SI-GISEL-LABEL: two_non_inline_constant_multi_use:
8177; SI-GISEL:       ; %bb.0:
8178; SI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
8179; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
8180; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
8181; SI-GISEL-NEXT:    s_mov_b32 s6, 0
8182; SI-GISEL-NEXT:    s_mov_b32 s7, 0xf000
8183; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
8184; SI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
8185; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
8186; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x41000000
8187; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0x41800000
8188; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[6:7]
8189; SI-GISEL-NEXT:    s_mov_b32 s6, -1
8190; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
8191; SI-GISEL-NEXT:    v_add_f32_e32 v5, 0.5, v2
8192; SI-GISEL-NEXT:    v_add_f32_e32 v6, 0x41800000, v2
8193; SI-GISEL-NEXT:    v_add_f32_e32 v2, 0x41000000, v2
8194; SI-GISEL-NEXT:    v_med3_f32 v3, v5, v3, v4
8195; SI-GISEL-NEXT:    buffer_store_dword v3, v[0:1], s[0:3], 0 addr64
8196; SI-GISEL-NEXT:    buffer_store_dword v6, off, s[4:7], 0
8197; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
8198; SI-GISEL-NEXT:    buffer_store_dword v2, off, s[4:7], 0
8199; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
8200; SI-GISEL-NEXT:    s_endpgm
8201;
8202; VI-SDAG-LABEL: two_non_inline_constant_multi_use:
8203; VI-SDAG:       ; %bb.0:
8204; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
8205; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
8206; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0x41800000
8207; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
8208; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
8209; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
8210; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
8211; VI-SDAG-NEXT:    flat_load_dword v3, v[0:1]
8212; VI-SDAG-NEXT:    s_mov_b32 s2, 0x41000000
8213; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
8214; VI-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
8215; VI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
8216; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
8217; VI-SDAG-NEXT:    v_add_f32_e32 v2, 0.5, v3
8218; VI-SDAG-NEXT:    v_med3_f32 v2, v2, s2, v4
8219; VI-SDAG-NEXT:    v_add_f32_e32 v5, 0x41800000, v3
8220; VI-SDAG-NEXT:    v_add_f32_e32 v3, 0x41000000, v3
8221; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
8222; VI-SDAG-NEXT:    flat_store_dword v[0:1], v5
8223; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
8224; VI-SDAG-NEXT:    flat_store_dword v[0:1], v3
8225; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
8226; VI-SDAG-NEXT:    s_endpgm
8227;
8228; VI-GISEL-LABEL: two_non_inline_constant_multi_use:
8229; VI-GISEL:       ; %bb.0:
8230; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
8231; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
8232; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0x41000000
8233; VI-GISEL-NEXT:    v_mov_b32_e32 v5, 0x41800000
8234; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
8235; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
8236; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
8237; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
8238; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
8239; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1]
8240; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
8241; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
8242; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
8243; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
8244; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
8245; VI-GISEL-NEXT:    v_add_f32_e32 v2, 0.5, v3
8246; VI-GISEL-NEXT:    v_med3_f32 v2, v2, v4, v5
8247; VI-GISEL-NEXT:    v_add_f32_e32 v6, 0x41800000, v3
8248; VI-GISEL-NEXT:    v_add_f32_e32 v3, 0x41000000, v3
8249; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
8250; VI-GISEL-NEXT:    flat_store_dword v[0:1], v6
8251; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
8252; VI-GISEL-NEXT:    flat_store_dword v[0:1], v3
8253; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
8254; VI-GISEL-NEXT:    s_endpgm
8255;
8256; GFX9-SDAG-LABEL: two_non_inline_constant_multi_use:
8257; GFX9-SDAG:       ; %bb.0:
8258; GFX9-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
8259; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
8260; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, 0x41800000
8261; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
8262; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[2:3]
8263; GFX9-SDAG-NEXT:    s_mov_b32 s2, 0x41000000
8264; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
8265; GFX9-SDAG-NEXT:    v_add_f32_e32 v3, 0.5, v1
8266; GFX9-SDAG-NEXT:    v_add_f32_e32 v4, 0x41800000, v1
8267; GFX9-SDAG-NEXT:    v_add_f32_e32 v1, 0x41000000, v1
8268; GFX9-SDAG-NEXT:    v_med3_f32 v2, v3, s2, v2
8269; GFX9-SDAG-NEXT:    global_store_dword v0, v2, s[0:1]
8270; GFX9-SDAG-NEXT:    global_store_dword v[0:1], v4, off
8271; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
8272; GFX9-SDAG-NEXT:    global_store_dword v[0:1], v1, off
8273; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
8274; GFX9-SDAG-NEXT:    s_endpgm
8275;
8276; GFX9-GISEL-LABEL: two_non_inline_constant_multi_use:
8277; GFX9-GISEL:       ; %bb.0:
8278; GFX9-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
8279; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
8280; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0x41000000
8281; GFX9-GISEL-NEXT:    v_mov_b32_e32 v3, 0x41800000
8282; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
8283; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[2:3]
8284; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
8285; GFX9-GISEL-NEXT:    v_add_f32_e32 v4, 0.5, v1
8286; GFX9-GISEL-NEXT:    v_add_f32_e32 v5, 0x41800000, v1
8287; GFX9-GISEL-NEXT:    v_add_f32_e32 v1, 0x41000000, v1
8288; GFX9-GISEL-NEXT:    v_med3_f32 v2, v4, v2, v3
8289; GFX9-GISEL-NEXT:    global_store_dword v0, v2, s[0:1]
8290; GFX9-GISEL-NEXT:    global_store_dword v[0:1], v5, off
8291; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
8292; GFX9-GISEL-NEXT:    global_store_dword v[0:1], v1, off
8293; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
8294; GFX9-GISEL-NEXT:    s_endpgm
8295;
8296; GFX11-SDAG-LABEL: two_non_inline_constant_multi_use:
8297; GFX11-SDAG:       ; %bb.0:
8298; GFX11-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
8299; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
8300; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
8301; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
8302; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
8303; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3]
8304; GFX11-SDAG-NEXT:    s_mov_b32 s2, 0x41000000
8305; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
8306; GFX11-SDAG-NEXT:    v_add_f32_e32 v3, 0x41800000, v1
8307; GFX11-SDAG-NEXT:    v_add_f32_e32 v2, 0.5, v1
8308; GFX11-SDAG-NEXT:    v_add_f32_e32 v1, 0x41000000, v1
8309; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
8310; GFX11-SDAG-NEXT:    v_med3_f32 v2, v2, s2, 0x41800000
8311; GFX11-SDAG-NEXT:    global_store_b32 v0, v2, s[0:1]
8312; GFX11-SDAG-NEXT:    global_store_b32 v[0:1], v3, off dlc
8313; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
8314; GFX11-SDAG-NEXT:    global_store_b32 v[0:1], v1, off dlc
8315; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
8316; GFX11-SDAG-NEXT:    s_endpgm
8317;
8318; GFX11-GISEL-LABEL: two_non_inline_constant_multi_use:
8319; GFX11-GISEL:       ; %bb.0:
8320; GFX11-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
8321; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
8322; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 0x41800000
8323; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_1)
8324; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
8325; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
8326; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3]
8327; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
8328; GFX11-GISEL-NEXT:    v_add_f32_e32 v3, 0.5, v1
8329; GFX11-GISEL-NEXT:    v_med3_f32 v2, v3, 0x41000000, v2
8330; GFX11-GISEL-NEXT:    v_add_f32_e32 v3, 0x41800000, v1
8331; GFX11-GISEL-NEXT:    v_add_f32_e32 v1, 0x41000000, v1
8332; GFX11-GISEL-NEXT:    global_store_b32 v0, v2, s[0:1]
8333; GFX11-GISEL-NEXT:    global_store_b32 v[0:1], v3, off dlc
8334; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
8335; GFX11-GISEL-NEXT:    global_store_b32 v[0:1], v1, off dlc
8336; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
8337; GFX11-GISEL-NEXT:    s_endpgm
8338  %tid = call i32 @llvm.amdgcn.workitem.id.x()
8339  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
8340  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
8341  %a = load float, ptr addrspace(1) %gep0
8342  %add = fadd nnan float %a, 0.5
8343  %max = call float @llvm.maxnum.f32(float %add, float 8.0)
8344  %med = call float @llvm.minnum.f32(float %max, float 16.0)
8345
8346  store float %med, ptr addrspace(1) %out.gep
8347
8348  %extra.use0 = fadd float %a, 16.0
8349  store volatile float %extra.use0, ptr addrspace(1) undef
8350  %extra.use1 = fadd float %a, 8.0
8351  store volatile float %extra.use1, ptr addrspace(1) undef
8352  ret void
8353}
8354
8355declare i32 @llvm.amdgcn.workitem.id.x() #0
8356declare float @llvm.fabs.f32(float) #0
8357declare float @llvm.minnum.f32(float, float) #0
8358declare float @llvm.maxnum.f32(float, float) #0
8359declare double @llvm.minnum.f64(double, double) #0
8360declare double @llvm.maxnum.f64(double, double) #0
8361declare half @llvm.fabs.f16(half) #0
8362declare half @llvm.minnum.f16(half, half) #0
8363declare half @llvm.maxnum.f16(half, half) #0
8364
8365attributes #0 = { nounwind readnone }
8366attributes #1 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" }
8367attributes #2 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" }
8368;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
8369; SI: {{.*}}
8370