xref: /llvm-project/llvm/test/CodeGen/AMDGPU/fminimum.ll (revision 5a3299a684d7d8c40f48d732e5b80a8bd29aa882)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX12-SDAG %s
3; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX12-GISEL %s
4
5define amdgpu_ps float @test_fminimum_f32_vv(float %a, float %b) {
6; GCN-LABEL: test_fminimum_f32_vv:
7; GCN:       ; %bb.0:
8; GCN-NEXT:    v_minimum_f32 v0, v0, v1
9; GCN-NEXT:    ; return to shader part epilog
10  %val = call float @llvm.minimum.f32(float %a, float %b)
11  ret float %val
12}
13
14define amdgpu_ps float @test_fminimum_f32_ss(float inreg %a, float inreg %b) {
15; GCN-LABEL: test_fminimum_f32_ss:
16; GCN:       ; %bb.0:
17; GCN-NEXT:    s_minimum_f32 s0, s0, s1
18; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
19; GCN-NEXT:    v_mov_b32_e32 v0, s0
20; GCN-NEXT:    ; return to shader part epilog
21  %val = call float @llvm.minimum.f32(float %a, float %b)
22  ret float %val
23}
24
25define amdgpu_ps float @test_fminimum_f32_vs(float %a, float inreg %b) {
26; GCN-LABEL: test_fminimum_f32_vs:
27; GCN:       ; %bb.0:
28; GCN-NEXT:    v_minimum_f32 v0, v0, s0
29; GCN-NEXT:    ; return to shader part epilog
30  %val = call float @llvm.minimum.f32(float %a, float %b)
31  ret float %val
32}
33
34define amdgpu_ps float @test_fminimum_nnan_f32(float %a, float %b) {
35; GCN-LABEL: test_fminimum_nnan_f32:
36; GCN:       ; %bb.0:
37; GCN-NEXT:    v_minimum_f32 v0, v0, v1
38; GCN-NEXT:    ; return to shader part epilog
39  %val = call nnan float @llvm.minimum.f32(float %a, float %b)
40  ret float %val
41}
42
43define amdgpu_ps <2 x float> @test_fminimum_v2f32(<2 x float> %a, <2 x float> %b) {
44; GCN-LABEL: test_fminimum_v2f32:
45; GCN:       ; %bb.0:
46; GCN-NEXT:    v_minimum_f32 v0, v0, v2
47; GCN-NEXT:    v_minimum_f32 v1, v1, v3
48; GCN-NEXT:    ; return to shader part epilog
49  %val = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> %b)
50  ret <2 x float> %val
51}
52
53define amdgpu_ps <2 x float> @test_fminimum_v2f32_ss(<2 x float> inreg %a, <2 x float> inreg %b) {
54; GCN-LABEL: test_fminimum_v2f32_ss:
55; GCN:       ; %bb.0:
56; GCN-NEXT:    s_minimum_f32 s0, s0, s2
57; GCN-NEXT:    s_minimum_f32 s1, s1, s3
58; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
59; GCN-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
60; GCN-NEXT:    ; return to shader part epilog
61  %val = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> %b)
62  ret <2 x float> %val
63}
64
65define amdgpu_ps <3 x float> @test_fminimum_v3f32(<3 x float> %a, <3 x float> %b) {
66; GCN-LABEL: test_fminimum_v3f32:
67; GCN:       ; %bb.0:
68; GCN-NEXT:    v_minimum_f32 v0, v0, v3
69; GCN-NEXT:    v_minimum_f32 v1, v1, v4
70; GCN-NEXT:    v_minimum_f32 v2, v2, v5
71; GCN-NEXT:    ; return to shader part epilog
72  %val = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> %b)
73  ret <3 x float> %val
74}
75
76define amdgpu_ps <4 x float> @test_fminimum_v4f32(<4 x float> %a, <4 x float> %b) {
77; GCN-LABEL: test_fminimum_v4f32:
78; GCN:       ; %bb.0:
79; GCN-NEXT:    v_minimum_f32 v0, v0, v4
80; GCN-NEXT:    v_minimum_f32 v1, v1, v5
81; GCN-NEXT:    v_minimum_f32 v2, v2, v6
82; GCN-NEXT:    v_minimum_f32 v3, v3, v7
83; GCN-NEXT:    ; return to shader part epilog
84  %val = call <4 x float> @llvm.minimum.v4f32(<4 x float> %a, <4 x float> %b)
85  ret <4 x float> %val
86}
87
88define amdgpu_ps <16 x float> @test_fminimum_v16f32(<16 x float> %a, <16 x float> %b) {
89; GCN-LABEL: test_fminimum_v16f32:
90; GCN:       ; %bb.0:
91; GCN-NEXT:    v_minimum_f32 v0, v0, v16
92; GCN-NEXT:    v_minimum_f32 v1, v1, v17
93; GCN-NEXT:    v_minimum_f32 v2, v2, v18
94; GCN-NEXT:    v_minimum_f32 v3, v3, v19
95; GCN-NEXT:    v_minimum_f32 v4, v4, v20
96; GCN-NEXT:    v_minimum_f32 v5, v5, v21
97; GCN-NEXT:    v_minimum_f32 v6, v6, v22
98; GCN-NEXT:    v_minimum_f32 v7, v7, v23
99; GCN-NEXT:    v_minimum_f32 v8, v8, v24
100; GCN-NEXT:    v_minimum_f32 v9, v9, v25
101; GCN-NEXT:    v_minimum_f32 v10, v10, v26
102; GCN-NEXT:    v_minimum_f32 v11, v11, v27
103; GCN-NEXT:    v_minimum_f32 v12, v12, v28
104; GCN-NEXT:    v_minimum_f32 v13, v13, v29
105; GCN-NEXT:    v_minimum_f32 v14, v14, v30
106; GCN-NEXT:    v_minimum_f32 v15, v15, v31
107; GCN-NEXT:    ; return to shader part epilog
108  %val = call <16 x float> @llvm.minimum.v16f32(<16 x float> %a, <16 x float> %b)
109  ret <16 x float> %val
110}
111
112define amdgpu_ps half @test_fminimum_f16_vv(half %a, half %b) {
113; GCN-LABEL: test_fminimum_f16_vv:
114; GCN:       ; %bb.0:
115; GCN-NEXT:    v_minimum_f16 v0, v0, v1
116; GCN-NEXT:    ; return to shader part epilog
117  %val = call half @llvm.minimum.f16(half %a, half %b)
118  ret half %val
119}
120
121define amdgpu_ps half @test_fminimum_f16_ss(half inreg %a, half inreg %b) {
122; GCN-LABEL: test_fminimum_f16_ss:
123; GCN:       ; %bb.0:
124; GCN-NEXT:    s_minimum_f16 s0, s0, s1
125; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
126; GCN-NEXT:    v_mov_b32_e32 v0, s0
127; GCN-NEXT:    ; return to shader part epilog
128  %val = call half @llvm.minimum.f16(half %a, half %b)
129  ret half %val
130}
131
132define amdgpu_ps <2 x half> @test_fminimum_v2f16_vv(<2 x half> %a, <2 x half> %b) {
133; GCN-LABEL: test_fminimum_v2f16_vv:
134; GCN:       ; %bb.0:
135; GCN-NEXT:    v_pk_minimum_f16 v0, v0, v1
136; GCN-NEXT:    ; return to shader part epilog
137  %val = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b)
138  ret <2 x half> %val
139}
140
141define amdgpu_ps <2 x half> @test_fminimum_v2f16_ss(<2 x half> inreg %a, <2 x half> inreg %b) {
142; GCN-LABEL: test_fminimum_v2f16_ss:
143; GCN:       ; %bb.0:
144; GCN-NEXT:    v_pk_minimum_f16 v0, s0, s1
145; GCN-NEXT:    ; return to shader part epilog
146  %val = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b)
147  ret <2 x half> %val
148}
149
150define amdgpu_ps <3 x half> @test_fminimum_v3f16_vv(<3 x half> %a, <3 x half> %b) {
151; GFX12-SDAG-LABEL: test_fminimum_v3f16_vv:
152; GFX12-SDAG:       ; %bb.0:
153; GFX12-SDAG-NEXT:    v_pk_minimum_f16 v0, v0, v2
154; GFX12-SDAG-NEXT:    v_pk_minimum_f16 v1, v1, v3
155; GFX12-SDAG-NEXT:    ; return to shader part epilog
156;
157; GFX12-GISEL-LABEL: test_fminimum_v3f16_vv:
158; GFX12-GISEL:       ; %bb.0:
159; GFX12-GISEL-NEXT:    v_pk_minimum_f16 v0, v0, v2
160; GFX12-GISEL-NEXT:    v_minimum_f16 v1, v1, v3
161; GFX12-GISEL-NEXT:    ; return to shader part epilog
162  %val = call <3 x half> @llvm.minimum.v3f16(<3 x half> %a, <3 x half> %b)
163  ret <3 x half> %val
164}
165
166define amdgpu_ps <3 x half> @test_fminimum_v3f16_ss(<3 x half> inreg %a, <3 x half> inreg %b) {
167; GFX12-SDAG-LABEL: test_fminimum_v3f16_ss:
168; GFX12-SDAG:       ; %bb.0:
169; GFX12-SDAG-NEXT:    v_pk_minimum_f16 v0, s0, s2
170; GFX12-SDAG-NEXT:    v_pk_minimum_f16 v1, s1, s3
171; GFX12-SDAG-NEXT:    ; return to shader part epilog
172;
173; GFX12-GISEL-LABEL: test_fminimum_v3f16_ss:
174; GFX12-GISEL:       ; %bb.0:
175; GFX12-GISEL-NEXT:    v_pk_minimum_f16 v0, s0, s2
176; GFX12-GISEL-NEXT:    s_minimum_f16 s0, s1, s3
177; GFX12-GISEL-NEXT:    s_wait_alu 0xfffe
178; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_2)
179; GFX12-GISEL-NEXT:    v_mov_b32_e32 v1, s0
180; GFX12-GISEL-NEXT:    ; return to shader part epilog
181  %val = call <3 x half> @llvm.minimum.v3f16(<3 x half> %a, <3 x half> %b)
182  ret <3 x half> %val
183}
184
185define amdgpu_ps <4 x half> @test_fminimum_v4f16(<4 x half> %a, <4 x half> %b) {
186; GCN-LABEL: test_fminimum_v4f16:
187; GCN:       ; %bb.0:
188; GCN-NEXT:    v_pk_minimum_f16 v0, v0, v2
189; GCN-NEXT:    v_pk_minimum_f16 v1, v1, v3
190; GCN-NEXT:    ; return to shader part epilog
191  %val = call <4 x half> @llvm.minimum.v4f16(<4 x half> %a, <4 x half> %b)
192  ret <4 x half> %val
193}
194
195define amdgpu_ps <4 x half> @test_fminimum_v4f16_ss(<4 x half> inreg %a, <4 x half> inreg %b) {
196; GCN-LABEL: test_fminimum_v4f16_ss:
197; GCN:       ; %bb.0:
198; GCN-NEXT:    v_pk_minimum_f16 v0, s0, s2
199; GCN-NEXT:    v_pk_minimum_f16 v1, s1, s3
200; GCN-NEXT:    ; return to shader part epilog
201  %val = call <4 x half> @llvm.minimum.v4f16(<4 x half> %a, <4 x half> %b)
202  ret <4 x half> %val
203}
204
205define amdgpu_ps <2 x float> @test_fminimum_f64_vv(double %a, double %b) {
206; GCN-LABEL: test_fminimum_f64_vv:
207; GCN:       ; %bb.0:
208; GCN-NEXT:    v_minimum_f64 v[0:1], v[0:1], v[2:3]
209; GCN-NEXT:    ; return to shader part epilog
210  %val = call double @llvm.minimum.f64(double %a, double %b)
211  %ret = bitcast double %val to <2 x float>
212  ret <2 x float> %ret
213}
214
215define amdgpu_ps <2 x float> @test_fminimum_f64_ss(double inreg %a, double inreg %b) {
216; GCN-LABEL: test_fminimum_f64_ss:
217; GCN:       ; %bb.0:
218; GCN-NEXT:    v_minimum_f64 v[0:1], s[0:1], s[2:3]
219; GCN-NEXT:    ; return to shader part epilog
220  %val = call double @llvm.minimum.f64(double %a, double %b)
221  %ret = bitcast double %val to <2 x float>
222  ret <2 x float> %ret
223}
224
225define amdgpu_ps <4 x float> @test_fminimum_v2f64_ss(<2 x double> inreg %a, <2 x double> inreg %b) {
226; GCN-LABEL: test_fminimum_v2f64_ss:
227; GCN:       ; %bb.0:
228; GCN-NEXT:    v_minimum_f64 v[0:1], s[0:1], s[4:5]
229; GCN-NEXT:    v_minimum_f64 v[2:3], s[2:3], s[6:7]
230; GCN-NEXT:    ; return to shader part epilog
231  %val = call <2 x double> @llvm.minimum.v2f64(<2 x double> %a, <2 x double> %b)
232  %ret = bitcast <2 x double> %val to <4 x float>
233  ret <4 x float> %ret
234}
235
236define amdgpu_ps <8 x float> @test_fminimum_v4f64(<4 x double> %a, <4 x double> %b) {
237; GCN-LABEL: test_fminimum_v4f64:
238; GCN:       ; %bb.0:
239; GCN-NEXT:    v_minimum_f64 v[0:1], v[0:1], v[8:9]
240; GCN-NEXT:    v_minimum_f64 v[2:3], v[2:3], v[10:11]
241; GCN-NEXT:    v_minimum_f64 v[4:5], v[4:5], v[12:13]
242; GCN-NEXT:    v_minimum_f64 v[6:7], v[6:7], v[14:15]
243; GCN-NEXT:    ; return to shader part epilog
244  %val = call <4 x double> @llvm.minimum.v4f64(<4 x double> %a, <4 x double> %b)
245  %ret = bitcast <4 x double> %val to <8 x float>
246  ret <8 x float> %ret
247}
248
249define amdgpu_ps <8 x float> @test_fminimum_v4f64_ss(<4 x double> inreg %a, <4 x double> inreg %b) {
250; GCN-LABEL: test_fminimum_v4f64_ss:
251; GCN:       ; %bb.0:
252; GCN-NEXT:    v_minimum_f64 v[0:1], s[0:1], s[8:9]
253; GCN-NEXT:    v_minimum_f64 v[2:3], s[2:3], s[10:11]
254; GCN-NEXT:    v_minimum_f64 v[4:5], s[4:5], s[12:13]
255; GCN-NEXT:    v_minimum_f64 v[6:7], s[6:7], s[14:15]
256; GCN-NEXT:    ; return to shader part epilog
257  %val = call <4 x double> @llvm.minimum.v4f64(<4 x double> %a, <4 x double> %b)
258  %ret = bitcast <4 x double> %val to <8 x float>
259  ret <8 x float> %ret
260}
261
262define amdgpu_kernel void @fminimumi_f32_move_to_valu(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) {
263; GCN-LABEL: fminimumi_f32_move_to_valu:
264; GCN:       ; %bb.0:
265; GCN-NEXT:    s_clause 0x1
266; GCN-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
267; GCN-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
268; GCN-NEXT:    v_mov_b32_e32 v0, 0
269; GCN-NEXT:    s_wait_kmcnt 0x0
270; GCN-NEXT:    global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
271; GCN-NEXT:    s_wait_loadcnt 0x0
272; GCN-NEXT:    global_load_b32 v2, v0, s[4:5] scope:SCOPE_SYS
273; GCN-NEXT:    s_wait_loadcnt 0x0
274; GCN-NEXT:    v_minimum_f32 v1, v1, v2
275; GCN-NEXT:    global_store_b32 v0, v1, s[0:1]
276; GCN-NEXT:    s_endpgm
277  %a = load volatile float, ptr addrspace(1) %aptr, align 4
278  %b = load volatile float, ptr addrspace(1) %bptr, align 4
279  %v = call float @llvm.minimum.f32(float %a, float %b)
280  store float %v, ptr addrspace(1) %out, align 4
281  ret void
282}
283
284define amdgpu_kernel void @fminimum_f16_move_to_valu(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) {
285; GCN-LABEL: fminimum_f16_move_to_valu:
286; GCN:       ; %bb.0:
287; GCN-NEXT:    s_clause 0x1
288; GCN-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
289; GCN-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
290; GCN-NEXT:    v_mov_b32_e32 v0, 0
291; GCN-NEXT:    s_wait_kmcnt 0x0
292; GCN-NEXT:    global_load_u16 v1, v0, s[2:3] scope:SCOPE_SYS
293; GCN-NEXT:    s_wait_loadcnt 0x0
294; GCN-NEXT:    global_load_u16 v2, v0, s[4:5] scope:SCOPE_SYS
295; GCN-NEXT:    s_wait_loadcnt 0x0
296; GCN-NEXT:    v_minimum_f16 v1, v1, v2
297; GCN-NEXT:    global_store_b16 v0, v1, s[0:1]
298; GCN-NEXT:    s_endpgm
299  %a = load volatile half, ptr addrspace(1) %aptr, align 4
300  %b = load volatile half, ptr addrspace(1) %bptr, align 4
301  %v = call half @llvm.minimum.f16(half %a, half %b)
302  store half %v, ptr addrspace(1) %out, align 4
303  ret void
304}
305
306declare float @llvm.minimum.f32(float, float)
307declare <2 x float> @llvm.minimum.v2f32(<2 x float>, <2 x float>)
308declare <3 x float> @llvm.minimum.v3f32(<3 x float>, <3 x float>)
309declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>)
310declare <16 x float> @llvm.minimum.v16f32(<16 x float>, <16 x float>)
311declare half @llvm.minimum.f16(half, half)
312declare <2 x half> @llvm.minimum.v2f16(<2 x half>, <2 x half>)
313declare <3 x half> @llvm.minimum.v3f16(<3 x half>, <3 x half>)
314declare <4 x half> @llvm.minimum.v4f16(<4 x half>, <4 x half>)
315declare double @llvm.minimum.f64(double, double)
316declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>)
317declare <4 x double> @llvm.minimum.v4f64(<4 x double>, <4 x double>)
318