xref: /llvm-project/llvm/test/CodeGen/AMDGPU/smed3.ll (revision 49357b22dbb26d4aa6816dee279df70f1a2cd695)
1; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
3; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
4; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-FAKE16 %s
5; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-TRUE16 %s
6
7declare i32 @llvm.amdgcn.workitem.id.x() #0
8
9; GCN-LABEL: {{^}}v_test_smed3_r_i_i_i32:
10; GCN: v_med3_i32 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17
11define amdgpu_kernel void @v_test_smed3_r_i_i_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
12  %tid = call i32 @llvm.amdgcn.workitem.id.x()
13  %gep0 = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid
14  %outgep = getelementptr i32, ptr addrspace(1) %out, i32 %tid
15  %a = load i32, ptr addrspace(1) %gep0
16
17  %icmp0 = icmp sgt i32 %a, 12
18  %i0 = select i1 %icmp0, i32 %a, i32 12
19
20  %icmp1 = icmp slt i32 %i0, 17
21  %i1 = select i1 %icmp1, i32 %i0, i32 17
22
23  store i32 %i1, ptr addrspace(1) %outgep
24  ret void
25}
26
27; GCN-LABEL: {{^}}v_test_smed3_multi_use_r_i_i_i32:
28; GCN: v_max_i32
29; GCN: v_min_i32
30define amdgpu_kernel void @v_test_smed3_multi_use_r_i_i_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
31  %tid = call i32 @llvm.amdgcn.workitem.id.x()
32  %gep0 = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid
33  %outgep = getelementptr i32, ptr addrspace(1) %out, i32 %tid
34  %a = load i32, ptr addrspace(1) %gep0
35
36  %icmp0 = icmp sgt i32 %a, 12
37  %i0 = select i1 %icmp0, i32 %a, i32 12
38
39  %icmp1 = icmp slt i32 %i0, 17
40  %i1 = select i1 %icmp1, i32 %i0, i32 17
41
42  store volatile i32 %i0, ptr addrspace(1) %outgep
43  store volatile i32 %i1, ptr addrspace(1) %outgep
44  ret void
45}
46
47; GCN-LABEL: {{^}}v_test_smed3_r_i_i_sign_mismatch_i32:
48; GCN: v_max_u32_e32 v{{[0-9]+}}, 12, v{{[0-9]+}}
49; GCN: v_min_i32_e32 v{{[0-9]+}}, 17, v{{[0-9]+}}
50define amdgpu_kernel void @v_test_smed3_r_i_i_sign_mismatch_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
51  %tid = call i32 @llvm.amdgcn.workitem.id.x()
52  %gep0 = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid
53  %outgep = getelementptr i32, ptr addrspace(1) %out, i32 %tid
54  %a = load i32, ptr addrspace(1) %gep0
55
56  %icmp0 = icmp ugt i32 %a, 12
57  %i0 = select i1 %icmp0, i32 %a, i32 12
58
59  %icmp1 = icmp slt i32 %i0, 17
60  %i1 = select i1 %icmp1, i32 %i0, i32 17
61
62  store i32 %i1, ptr addrspace(1) %outgep
63  ret void
64}
65
66; GCN-LABEL: {{^}}v_test_smed3_r_i_i_i64:
67; GCN: v_cmp_lt_i64
68; GCN: v_cmp_gt_i64
69define amdgpu_kernel void @v_test_smed3_r_i_i_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
70  %tid = call i32 @llvm.amdgcn.workitem.id.x()
71  %gep0 = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
72  %outgep = getelementptr i64, ptr addrspace(1) %out, i32 %tid
73  %a = load i64, ptr addrspace(1) %gep0
74
75  %icmp0 = icmp sgt i64 %a, 12
76  %i0 = select i1 %icmp0, i64 %a, i64 12
77
78  %icmp1 = icmp slt i64 %i0, 17
79  %i1 = select i1 %icmp1, i64 %i0, i64 17
80
81  store i64 %i1, ptr addrspace(1) %outgep
82  ret void
83}
84
85; Regression test for performIntMed3ImmCombine extending arguments to 32 bit
86; which failed for 64 bit arguments. Previously asserted / crashed.
87; GCN-LABEL: {{^}}test_intMed3ImmCombine_no_32bit_extend:
88; GCN: v_cmp_lt_i64
89; GCN: v_cmp_gt_i64
90define i64 @test_intMed3ImmCombine_no_32bit_extend(i64 %x) {
91  %smax = call i64 @llvm.smax.i64(i64 %x, i64 -2)
92  %smin = call i64 @llvm.smin.i64(i64 %smax, i64 2)
93  ret i64 %smin
94}
95declare i64 @llvm.smax.i64(i64, i64)
96declare i64 @llvm.smin.i64(i64, i64)
97
98; GCN-LABEL: {{^}}v_test_smed3_r_i_i_i16:
99; SI: v_med3_i32 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17
100; VI: v_max_i16_e32 [[MAX:v[0-9]]], 12, {{v[0-9]}}
101; VI: v_min_i16_e32 {{v[0-9]}}, 17, [[MAX]]
102; GFX9: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17
103; GFX11-TRUE16: v_med3_i16 v{{[0-9]+}}.l, v{{[0-9]+}}.l, 12, 17
104; GFX11-FAKE16: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17
105define amdgpu_kernel void @v_test_smed3_r_i_i_i16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
106  %tid = call i32 @llvm.amdgcn.workitem.id.x()
107  %gep0 = getelementptr i16, ptr addrspace(1) %aptr, i32 %tid
108  %outgep = getelementptr i16, ptr addrspace(1) %out, i32 %tid
109  %a = load i16, ptr addrspace(1) %gep0
110
111  %icmp0 = icmp sgt i16 %a, 12
112  %i0 = select i1 %icmp0, i16 %a, i16 12
113
114  %icmp1 = icmp slt i16 %i0, 17
115  %i1 = select i1 %icmp1, i16 %i0, i16 17
116
117  store i16 %i1, ptr addrspace(1) %outgep
118  ret void
119}
120
121
122define internal i32 @smin(i32 %x, i32 %y) #2 {
123  %cmp = icmp slt i32 %x, %y
124  %sel = select i1 %cmp, i32 %x, i32 %y
125  ret i32 %sel
126}
127
128define internal i32 @smax(i32 %x, i32 %y) #2 {
129  %cmp = icmp sgt i32 %x, %y
130  %sel = select i1 %cmp, i32 %x, i32 %y
131  ret i32 %sel
132}
133
134define internal i16 @smin16(i16 %x, i16 %y) #2 {
135  %cmp = icmp slt i16 %x, %y
136  %sel = select i1 %cmp, i16 %x, i16 %y
137  ret i16 %sel
138}
139
140define internal i16 @smax16(i16 %x, i16 %y) #2 {
141  %cmp = icmp sgt i16 %x, %y
142  %sel = select i1 %cmp, i16 %x, i16 %y
143  ret i16 %sel
144}
145
146define internal i8 @smin8(i8 %x, i8 %y) #2 {
147  %cmp = icmp slt i8 %x, %y
148  %sel = select i1 %cmp, i8 %x, i8 %y
149  ret i8 %sel
150}
151
152define internal i8 @smax8(i8 %x, i8 %y) #2 {
153  %cmp = icmp sgt i8 %x, %y
154  %sel = select i1 %cmp, i8 %x, i8 %y
155  ret i8 %sel
156}
157
158; 16 combinations
159
160; 0: max(min(x, y), min(max(x, y), z))
161; 1: max(min(x, y), min(max(y, x), z))
162; 2: max(min(x, y), min(z, max(x, y)))
163; 3: max(min(x, y), min(z, max(y, x)))
164; 4: max(min(y, x), min(max(x, y), z))
165; 5: max(min(y, x), min(max(y, x), z))
166; 6: max(min(y, x), min(z, max(x, y)))
167; 7: max(min(y, x), min(z, max(y, x)))
168;
169; + commute outermost max
170
171
172; FIXME: In these cases we probably should have used scalar operations
173; instead.
174
175; GCN-LABEL: {{^}}s_test_smed3_i32_pat_0:
176; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
177define amdgpu_kernel void @s_test_smed3_i32_pat_0(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
178bb:
179  %tmp0 = call i32 @smin(i32 %x, i32 %y)
180  %tmp1 = call i32 @smax(i32 %x, i32 %y)
181  %tmp2 = call i32 @smin(i32 %tmp1, i32 %z)
182  %tmp3 = call i32 @smax(i32 %tmp0, i32 %tmp2)
183  store i32 %tmp3, ptr addrspace(1) %arg
184  ret void
185}
186
187; GCN-LABEL: {{^}}s_test_smed3_i32_pat_1:
188; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
189define amdgpu_kernel void @s_test_smed3_i32_pat_1(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
190bb:
191  %tmp0 = call i32 @smin(i32 %x, i32 %y)
192  %tmp1 = call i32 @smax(i32 %y, i32 %x)
193  %tmp2 = call i32 @smin(i32 %tmp1, i32 %z)
194  %tmp3 = call i32 @smax(i32 %tmp0, i32 %tmp2)
195  store i32 %tmp3, ptr addrspace(1) %arg
196  ret void
197}
198
199; GCN-LABEL: {{^}}s_test_smed3_i32_pat_2:
200; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
201define amdgpu_kernel void @s_test_smed3_i32_pat_2(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
202bb:
203  %tmp0 = call i32 @smin(i32 %x, i32 %y)
204  %tmp1 = call i32 @smax(i32 %x, i32 %y)
205  %tmp2 = call i32 @smin(i32 %z, i32 %tmp1)
206  %tmp3 = call i32 @smax(i32 %tmp0, i32 %tmp2)
207  store i32 %tmp3, ptr addrspace(1) %arg
208  ret void
209}
210
211; GCN-LABEL: {{^}}s_test_smed3_i32_pat_3:
212; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
213define amdgpu_kernel void @s_test_smed3_i32_pat_3(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
214bb:
215  %tmp0 = call i32 @smin(i32 %x, i32 %y)
216  %tmp1 = call i32 @smax(i32 %y, i32 %x)
217  %tmp2 = call i32 @smin(i32 %z, i32 %tmp1)
218  %tmp3 = call i32 @smax(i32 %tmp0, i32 %tmp2)
219  store i32 %tmp3, ptr addrspace(1) %arg
220  ret void
221}
222
223; GCN-LABEL: {{^}}s_test_smed3_i32_pat_4:
224; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
225define amdgpu_kernel void @s_test_smed3_i32_pat_4(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
226bb:
227  %tmp0 = call i32 @smin(i32 %y, i32 %x)
228  %tmp1 = call i32 @smax(i32 %x, i32 %y)
229  %tmp2 = call i32 @smin(i32 %tmp1, i32 %z)
230  %tmp3 = call i32 @smax(i32 %tmp0, i32 %tmp2)
231  store i32 %tmp3, ptr addrspace(1) %arg
232  ret void
233}
234
235; GCN-LABEL: {{^}}s_test_smed3_i32_pat_5:
236; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
237define amdgpu_kernel void @s_test_smed3_i32_pat_5(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
238bb:
239  %tmp0 = call i32 @smin(i32 %y, i32 %x)
240  %tmp1 = call i32 @smax(i32 %y, i32 %x)
241  %tmp2 = call i32 @smin(i32 %tmp1, i32 %z)
242  %tmp3 = call i32 @smax(i32 %tmp0, i32 %tmp2)
243  store i32 %tmp3, ptr addrspace(1) %arg
244  ret void
245}
246
247; GCN-LABEL: {{^}}s_test_smed3_i32_pat_6:
248; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
249define amdgpu_kernel void @s_test_smed3_i32_pat_6(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
250bb:
251  %tmp0 = call i32 @smin(i32 %y, i32 %x)
252  %tmp1 = call i32 @smax(i32 %x, i32 %y)
253  %tmp2 = call i32 @smin(i32 %z, i32 %tmp1)
254  %tmp3 = call i32 @smax(i32 %tmp0, i32 %tmp2)
255  store i32 %tmp3, ptr addrspace(1) %arg
256  ret void
257}
258
259; GCN-LABEL: {{^}}s_test_smed3_i32_pat_7:
260; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
261define amdgpu_kernel void @s_test_smed3_i32_pat_7(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
262bb:
263  %tmp0 = call i32 @smin(i32 %y, i32 %x)
264  %tmp1 = call i32 @smax(i32 %y, i32 %x)
265  %tmp2 = call i32 @smin(i32 %z, i32 %tmp1)
266  %tmp3 = call i32 @smax(i32 %tmp0, i32 %tmp2)
267  store i32 %tmp3, ptr addrspace(1) %arg
268  ret void
269}
270
271; GCN-LABEL: {{^}}s_test_smed3_i32_pat_8:
272; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
273define amdgpu_kernel void @s_test_smed3_i32_pat_8(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
274bb:
275  %tmp0 = call i32 @smin(i32 %x, i32 %y)
276  %tmp1 = call i32 @smax(i32 %x, i32 %y)
277  %tmp2 = call i32 @smin(i32 %tmp1, i32 %z)
278  %tmp3 = call i32 @smax(i32 %tmp2, i32 %tmp0)
279  store i32 %tmp3, ptr addrspace(1) %arg
280  ret void
281}
282
283; GCN-LABEL: {{^}}s_test_smed3_i32_pat_9:
284; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
285define amdgpu_kernel void @s_test_smed3_i32_pat_9(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
286bb:
287  %tmp0 = call i32 @smin(i32 %x, i32 %y)
288  %tmp1 = call i32 @smax(i32 %y, i32 %x)
289  %tmp2 = call i32 @smin(i32 %tmp1, i32 %z)
290  %tmp3 = call i32 @smax(i32 %tmp2, i32 %tmp0)
291  store i32 %tmp3, ptr addrspace(1) %arg
292  ret void
293}
294
295; GCN-LABEL: {{^}}s_test_smed3_i32_pat_10:
296; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
297define amdgpu_kernel void @s_test_smed3_i32_pat_10(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
298bb:
299  %tmp0 = call i32 @smin(i32 %x, i32 %y)
300  %tmp1 = call i32 @smax(i32 %x, i32 %y)
301  %tmp2 = call i32 @smin(i32 %z, i32 %tmp1)
302  %tmp3 = call i32 @smax(i32 %tmp2, i32 %tmp0)
303  store i32 %tmp3, ptr addrspace(1) %arg
304  ret void
305}
306
307; GCN-LABEL: {{^}}s_test_smed3_i32_pat_11:
308; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
309define amdgpu_kernel void @s_test_smed3_i32_pat_11(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
310bb:
311  %tmp0 = call i32 @smin(i32 %x, i32 %y)
312  %tmp1 = call i32 @smax(i32 %y, i32 %x)
313  %tmp2 = call i32 @smin(i32 %z, i32 %tmp1)
314  %tmp3 = call i32 @smax(i32 %tmp2, i32 %tmp0)
315  store i32 %tmp3, ptr addrspace(1) %arg
316  ret void
317}
318
319; GCN-LABEL: {{^}}s_test_smed3_i32_pat_12:
320; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
321define amdgpu_kernel void @s_test_smed3_i32_pat_12(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
322bb:
323  %tmp0 = call i32 @smin(i32 %y, i32 %x)
324  %tmp1 = call i32 @smax(i32 %x, i32 %y)
325  %tmp2 = call i32 @smin(i32 %tmp1, i32 %z)
326  %tmp3 = call i32 @smax(i32 %tmp2, i32 %tmp0)
327  store i32 %tmp3, ptr addrspace(1) %arg
328  ret void
329}
330
331; GCN-LABEL: {{^}}s_test_smed3_i32_pat_13:
332; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
333define amdgpu_kernel void @s_test_smed3_i32_pat_13(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
334bb:
335  %tmp0 = call i32 @smin(i32 %y, i32 %x)
336  %tmp1 = call i32 @smax(i32 %y, i32 %x)
337  %tmp2 = call i32 @smin(i32 %tmp1, i32 %z)
338  %tmp3 = call i32 @smax(i32 %tmp2, i32 %tmp0)
339  store i32 %tmp3, ptr addrspace(1) %arg
340  ret void
341}
342
343; GCN-LABEL: {{^}}s_test_smed3_i32_pat_14:
344; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
345define amdgpu_kernel void @s_test_smed3_i32_pat_14(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
346bb:
347  %tmp0 = call i32 @smin(i32 %y, i32 %x)
348  %tmp1 = call i32 @smax(i32 %x, i32 %y)
349  %tmp2 = call i32 @smin(i32 %z, i32 %tmp1)
350  %tmp3 = call i32 @smax(i32 %tmp2, i32 %tmp0)
351  store i32 %tmp3, ptr addrspace(1) %arg
352  ret void
353}
354
355; GCN-LABEL: {{^}}s_test_smed3_i32_pat_15:
356; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
357define amdgpu_kernel void @s_test_smed3_i32_pat_15(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
358bb:
359  %tmp0 = call i32 @smin(i32 %y, i32 %x)
360  %tmp1 = call i32 @smax(i32 %y, i32 %x)
361  %tmp2 = call i32 @smin(i32 %z, i32 %tmp1)
362  %tmp3 = call i32 @smax(i32 %tmp2, i32 %tmp0)
363  store i32 %tmp3, ptr addrspace(1) %arg
364  ret void
365}
366
367; 16 combinations
368
369; 16: min(max(x, y), max(min(x, y), z))
370; 17: min(max(x, y), max(min(y, x), z))
371; 18: min(max(x, y), max(z, min(x, y)))
372; 19: min(max(x, y), max(z, min(y, x)))
373; 20: min(max(y, x), max(min(x, y), z))
374; 21: min(max(y, x), max(min(y, x), z))
375; 22: min(max(y, x), max(z, min(x, y)))
376; 23: min(max(y, x), max(z, min(y, x)))
377;
378; + commute outermost min
379
380; GCN-LABEL: {{^}}s_test_smed3_i32_pat_16:
381; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
382define amdgpu_kernel void @s_test_smed3_i32_pat_16(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
383bb:
384  %tmp0 = call i32 @smin(i32 %x, i32 %y)
385  %tmp1 = call i32 @smax(i32 %x, i32 %y)
386  %tmp2 = call i32 @smax(i32 %tmp0, i32 %z)
387  %tmp3 = call i32 @smin(i32 %tmp1, i32 %tmp2)
388  store i32 %tmp3, ptr addrspace(1) %arg
389  ret void
390}
391
392; GCN-LABEL: {{^}}s_test_smed3_i32_pat_17:
393; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
394define amdgpu_kernel void @s_test_smed3_i32_pat_17(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
395bb:
396  %tmp0 = call i32 @smin(i32 %y, i32 %x)
397  %tmp1 = call i32 @smax(i32 %x, i32 %y)
398  %tmp2 = call i32 @smax(i32 %tmp0, i32 %z)
399  %tmp3 = call i32 @smin(i32 %tmp1, i32 %tmp2)
400  store i32 %tmp3, ptr addrspace(1) %arg
401  ret void
402}
403
404; GCN-LABEL: {{^}}s_test_smed3_i32_pat_18:
405; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
406define amdgpu_kernel void @s_test_smed3_i32_pat_18(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
407bb:
408  %tmp0 = call i32 @smin(i32 %x, i32 %y)
409  %tmp1 = call i32 @smax(i32 %x, i32 %y)
410  %tmp2 = call i32 @smax(i32 %z, i32 %tmp0)
411  %tmp3 = call i32 @smin(i32 %tmp1, i32 %tmp2)
412  store i32 %tmp3, ptr addrspace(1) %arg
413  ret void
414}
415
416; GCN-LABEL: {{^}}s_test_smed3_i32_pat_19:
417; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
418define amdgpu_kernel void @s_test_smed3_i32_pat_19(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
419bb:
420  %tmp0 = call i32 @smin(i32 %y, i32 %x)
421  %tmp1 = call i32 @smax(i32 %x, i32 %y)
422  %tmp2 = call i32 @smax(i32 %z, i32 %tmp0)
423  %tmp3 = call i32 @smin(i32 %tmp1, i32 %tmp2)
424  store i32 %tmp3, ptr addrspace(1) %arg
425  ret void
426}
427
428; GCN-LABEL: {{^}}s_test_smed3_i32_pat_20:
429; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
430define amdgpu_kernel void @s_test_smed3_i32_pat_20(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
431bb:
432  %tmp0 = call i32 @smin(i32 %x, i32 %y)
433  %tmp1 = call i32 @smax(i32 %y, i32 %x)
434  %tmp2 = call i32 @smax(i32 %tmp0, i32 %z)
435  %tmp3 = call i32 @smin(i32 %tmp1, i32 %tmp2)
436  store i32 %tmp3, ptr addrspace(1) %arg
437  ret void
438}
439
440; GCN-LABEL: {{^}}s_test_smed3_i32_pat_21:
441; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
442define amdgpu_kernel void @s_test_smed3_i32_pat_21(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
443bb:
444  %tmp0 = call i32 @smin(i32 %y, i32 %x)
445  %tmp1 = call i32 @smax(i32 %y, i32 %x)
446  %tmp2 = call i32 @smax(i32 %tmp0, i32 %z)
447  %tmp3 = call i32 @smin(i32 %tmp1, i32 %tmp2)
448  store i32 %tmp3, ptr addrspace(1) %arg
449  ret void
450}
451
452; GCN-LABEL: {{^}}s_test_smed3_i32_pat_22:
453; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
454define amdgpu_kernel void @s_test_smed3_i32_pat_22(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
455bb:
456  %tmp0 = call i32 @smin(i32 %x, i32 %y)
457  %tmp1 = call i32 @smax(i32 %y, i32 %x)
458  %tmp2 = call i32 @smax(i32 %z, i32 %tmp0)
459  %tmp3 = call i32 @smin(i32 %tmp1, i32 %tmp2)
460  store i32 %tmp3, ptr addrspace(1) %arg
461  ret void
462}
463
464; GCN-LABEL: {{^}}s_test_smed3_i32_pat_23:
465; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
466define amdgpu_kernel void @s_test_smed3_i32_pat_23(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
467bb:
468  %tmp0 = call i32 @smin(i32 %y, i32 %x)
469  %tmp1 = call i32 @smax(i32 %y, i32 %x)
470  %tmp2 = call i32 @smax(i32 %z, i32 %tmp0)
471  %tmp3 = call i32 @smin(i32 %tmp1, i32 %tmp2)
472  store i32 %tmp3, ptr addrspace(1) %arg
473  ret void
474}
475
476; GCN-LABEL: {{^}}s_test_smed3_i32_pat_24:
477; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
478define amdgpu_kernel void @s_test_smed3_i32_pat_24(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
479bb:
480  %tmp0 = call i32 @smin(i32 %x, i32 %y)
481  %tmp1 = call i32 @smax(i32 %x, i32 %y)
482  %tmp2 = call i32 @smax(i32 %tmp0, i32 %z)
483  %tmp3 = call i32 @smin(i32 %tmp2, i32 %tmp1)
484  store i32 %tmp3, ptr addrspace(1) %arg
485  ret void
486}
487
488; GCN-LABEL: {{^}}s_test_smed3_i32_pat_25:
489; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
490define amdgpu_kernel void @s_test_smed3_i32_pat_25(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
491bb:
492  %tmp0 = call i32 @smin(i32 %y, i32 %x)
493  %tmp1 = call i32 @smax(i32 %x, i32 %y)
494  %tmp2 = call i32 @smax(i32 %tmp0, i32 %z)
495  %tmp3 = call i32 @smin(i32 %tmp1, i32 %tmp2)
496  store i32 %tmp3, ptr addrspace(1) %arg
497  ret void
498}
499
500; GCN-LABEL: {{^}}s_test_smed3_i32_pat_26:
501; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
502define amdgpu_kernel void @s_test_smed3_i32_pat_26(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
503bb:
504  %tmp0 = call i32 @smin(i32 %x, i32 %y)
505  %tmp1 = call i32 @smax(i32 %x, i32 %y)
506  %tmp2 = call i32 @smax(i32 %z, i32 %tmp0)
507  %tmp3 = call i32 @smin(i32 %tmp2, i32 %tmp1)
508  store i32 %tmp3, ptr addrspace(1) %arg
509  ret void
510}
511
512; GCN-LABEL: {{^}}s_test_smed3_i32_pat_27:
513; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
514define amdgpu_kernel void @s_test_smed3_i32_pat_27(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
515bb:
516  %tmp0 = call i32 @smin(i32 %y, i32 %x)
517  %tmp1 = call i32 @smax(i32 %x, i32 %y)
518  %tmp2 = call i32 @smax(i32 %z, i32 %tmp0)
519  %tmp3 = call i32 @smin(i32 %tmp2, i32 %tmp1)
520  store i32 %tmp3, ptr addrspace(1) %arg
521  ret void
522}
523
524; GCN-LABEL: {{^}}s_test_smed3_i32_pat_28:
525; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
526define amdgpu_kernel void @s_test_smed3_i32_pat_28(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
527bb:
528  %tmp0 = call i32 @smin(i32 %x, i32 %y)
529  %tmp1 = call i32 @smax(i32 %y, i32 %x)
530  %tmp2 = call i32 @smax(i32 %tmp0, i32 %z)
531  %tmp3 = call i32 @smin(i32 %tmp2, i32 %tmp1)
532  store i32 %tmp3, ptr addrspace(1) %arg
533  ret void
534}
535
536; GCN-LABEL: {{^}}s_test_smed3_i32_pat_29:
537; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
538define amdgpu_kernel void @s_test_smed3_i32_pat_29(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
539bb:
540  %tmp0 = call i32 @smin(i32 %y, i32 %x)
541  %tmp1 = call i32 @smax(i32 %y, i32 %x)
542  %tmp2 = call i32 @smax(i32 %tmp0, i32 %z)
543  %tmp3 = call i32 @smin(i32 %tmp2, i32 %tmp1)
544  store i32 %tmp3, ptr addrspace(1) %arg
545  ret void
546}
547
548; GCN-LABEL: {{^}}s_test_smed3_i32_pat_30:
549; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
550define amdgpu_kernel void @s_test_smed3_i32_pat_30(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
551bb:
552  %tmp0 = call i32 @smin(i32 %x, i32 %y)
553  %tmp1 = call i32 @smax(i32 %y, i32 %x)
554  %tmp2 = call i32 @smax(i32 %z, i32 %tmp0)
555  %tmp3 = call i32 @smin(i32 %tmp2, i32 %tmp1)
556  store i32 %tmp3, ptr addrspace(1) %arg
557  ret void
558}
559
560; GCN-LABEL: {{^}}s_test_smed3_i32_pat_31:
561; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
562define amdgpu_kernel void @s_test_smed3_i32_pat_31(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
563bb:
564  %tmp0 = call i32 @smin(i32 %y, i32 %x)
565  %tmp1 = call i32 @smax(i32 %y, i32 %x)
566  %tmp2 = call i32 @smax(i32 %z, i32 %tmp0)
567  %tmp3 = call i32 @smin(i32 %tmp2, i32 %tmp1)
568  store i32 %tmp3, ptr addrspace(1) %arg
569  ret void
570}
571
572; FIXME: Should keep scalar or not promote
573; GCN-LABEL: {{^}}s_test_smed3_i16_pat_0:
574; GCN: s_sext_i32_i16
575; GCN: s_sext_i32_i16
576; GCN: s_sext_i32_i16
577; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
578define amdgpu_kernel void @s_test_smed3_i16_pat_0(ptr addrspace(1) %arg, [8 x i32], i16 %x, [8 x i32], i16 %y, [8 x i32], i16 %z) #1 {
579bb:
580  %tmp0 = call i16 @smin16(i16 %x, i16 %y)
581  %tmp1 = call i16 @smax16(i16 %x, i16 %y)
582  %tmp2 = call i16 @smin16(i16 %tmp1, i16 %z)
583  %tmp3 = call i16 @smax16(i16 %tmp0, i16 %tmp2)
584  store i16 %tmp3, ptr addrspace(1) %arg
585  ret void
586}
587
588; GCN-LABEL: {{^}}s_test_smed3_i8_pat_0:
589; GCN: s_sext_i32_i8
590; GCN: s_sext_i32_i8
591; GCN: s_sext_i32_i8
592; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
593define amdgpu_kernel void @s_test_smed3_i8_pat_0(ptr addrspace(1) %arg, [8 x i32], i8 %x, [8 x i32], i8 %y, [8 x i32], i8 %z) #1 {
594bb:
595  %tmp0 = call i8 @smin8(i8 %x, i8 %y)
596  %tmp1 = call i8 @smax8(i8 %x, i8 %y)
597  %tmp2 = call i8 @smin8(i8 %tmp1, i8 %z)
598  %tmp3 = call i8 @smax8(i8 %tmp0, i8 %tmp2)
599  store i8 %tmp3, ptr addrspace(1) %arg
600  ret void
601}
602
603; GCN-LABEL: {{^}}s_test_smed3_i32_pat_0_multi_use_0:
604; GCN: s_min_i32
605; GCN-NOT: {{s_min_i32|s_max_i32}}
606; GCN: v_med3_i32
607define amdgpu_kernel void @s_test_smed3_i32_pat_0_multi_use_0(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
608bb:
609  %tmp0 = call i32 @smin(i32 %x, i32 %y)
610  %tmp1 = call i32 @smax(i32 %x, i32 %y)
611  %tmp2 = call i32 @smin(i32 %tmp1, i32 %z)
612  %tmp3 = call i32 @smax(i32 %tmp0, i32 %tmp2)
613  store volatile i32 %tmp0, ptr addrspace(1) %arg
614  store volatile i32 %tmp3, ptr addrspace(1) %arg
615  ret void
616}
617
618; GCN-LABEL: {{^}}s_test_smed3_i32_pat_0_multi_use_1:
619; GCN: s_max_i32
620; GCN-NOT: {{s_min_i32|s_max_i32}}
621; GCN: v_med3_i32
622define amdgpu_kernel void @s_test_smed3_i32_pat_0_multi_use_1(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
623bb:
624  %tmp0 = call i32 @smin(i32 %x, i32 %y)
625  %tmp1 = call i32 @smax(i32 %x, i32 %y)
626  %tmp2 = call i32 @smin(i32 %tmp1, i32 %z)
627  %tmp3 = call i32 @smax(i32 %tmp0, i32 %tmp2)
628  store volatile i32 %tmp1, ptr addrspace(1) %arg
629  store volatile i32 %tmp3, ptr addrspace(1) %arg
630  ret void
631}
632
633; GCN-LABEL: {{^}}s_test_smed3_i32_pat_0_multi_use_2:
634; GCN: s_max_i32
635; GCN: s_min_i32
636; GCN-NOT: {{s_min_i32|s_max_i32}}
637; GCN: v_med3_i32
638define amdgpu_kernel void @s_test_smed3_i32_pat_0_multi_use_2(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
639bb:
640  %tmp0 = call i32 @smin(i32 %x, i32 %y)
641  %tmp1 = call i32 @smax(i32 %x, i32 %y)
642  %tmp2 = call i32 @smin(i32 %tmp1, i32 %z)
643  %tmp3 = call i32 @smax(i32 %tmp0, i32 %tmp2)
644  store volatile i32 %tmp2, ptr addrspace(1) %arg
645  store volatile i32 %tmp3, ptr addrspace(1) %arg
646  ret void
647}
648
649; GCN-LABEL: {{^}}s_test_smed3_i32_pat_0_multi_use_result:
650; GCN-NOT: {{s_min_i32|s_max_i32}}
651; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
652define amdgpu_kernel void @s_test_smed3_i32_pat_0_multi_use_result(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
653bb:
654  %tmp0 = call i32 @smin(i32 %x, i32 %y)
655  %tmp1 = call i32 @smax(i32 %x, i32 %y)
656  %tmp2 = call i32 @smin(i32 %tmp1, i32 %z)
657  %tmp3 = call i32 @smax(i32 %tmp0, i32 %tmp2)
658  store volatile i32 %tmp3, ptr addrspace(1) %arg
659  store volatile i32 %tmp3, ptr addrspace(1) %arg
660  ret void
661}
662
663; GCN-LABEL: {{^}}s_test_smed3_reuse_bounds
664; GCN-NOT: {{s_min_i32|s_max_i32}}
665; GCN: v_med3_i32 v{{[0-9]+}}, [[B0:s[0-9]+]], [[B1:v[0-9]+]], v{{[0-9]+}}
666; GCN: v_med3_i32 v{{[0-9]+}}, [[B0]], [[B1]], v{{[0-9]+}}
667define amdgpu_kernel void @s_test_smed3_reuse_bounds(ptr addrspace(1) %arg, i32 %b0, i32 %b1, i32 %x, i32 %y) #1 {
668bb:
669  %lo = call i32 @smin(i32 %b0, i32 %b1)
670  %hi = call i32 @smax(i32 %b0, i32 %b1)
671
672  %tmp0 = call i32 @smin(i32 %x, i32 %hi)
673  %z0 = call i32 @smax(i32 %tmp0, i32 %lo)
674
675  %tmp1 = call i32 @smin(i32 %y, i32 %hi)
676  %z1 = call i32 @smax(i32 %tmp1, i32 %lo)
677
678  store volatile i32 %z0, ptr addrspace(1) %arg
679  store volatile i32 %z1, ptr addrspace(1) %arg
680  ret void
681}
682
683; GCN-LABEL: {{^}}v_test_smed3_i16_pat_0:
684; SI: v_med3_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
685
686; FIXME: VI not matching med3
687; VI: v_min_i16
688; VI: v_max_i16
689; VI: v_min_i16
690; VI: v_max_i16
691
692; GFX9: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
693; GFX11-TRUE16: v_med3_i16 v{{[0-9]+}}.l, v{{[0-9]+}}.l, v{{[0-9]+}}.h, v{{[0-9]+}}.l
694; GFX11-FAKE16: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
695define amdgpu_kernel void @v_test_smed3_i16_pat_0(ptr addrspace(1) %arg, ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #1 {
696bb:
697  %tid = call i32 @llvm.amdgcn.workitem.id.x()
698  %gep0 = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i32 %tid
699  %gep1 = getelementptr inbounds i16, ptr addrspace(1) %gep0, i32 3
700  %gep2 = getelementptr inbounds i16, ptr addrspace(1) %gep0, i32 8
701  %out.gep = getelementptr inbounds i16, ptr addrspace(1) %out, i32 %tid
702  %x = load i16, ptr addrspace(1) %gep0
703  %y = load i16, ptr addrspace(1) %gep1
704  %z = load i16, ptr addrspace(1) %gep2
705
706  %tmp0 = call i16 @smin16(i16 %x, i16 %y)
707  %tmp1 = call i16 @smax16(i16 %x, i16 %y)
708  %tmp2 = call i16 @smin16(i16 %tmp1, i16 %z)
709  %tmp3 = call i16 @smax16(i16 %tmp0, i16 %tmp2)
710  store i16 %tmp3, ptr addrspace(1) %out.gep
711  ret void
712}
713
714; GCN-LABEL: {{^}}v_test_smed3_i16_pat_1:
715; GFX9: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
716; GFX11-TRUE16: v_med3_i16 v{{[0-9]+}}.l, v{{[0-9]+}}.l, v{{[0-9]+}}.h, v{{[0-9]+}}.l
717; GFX11-FAKE16: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
718
719define amdgpu_kernel void @v_test_smed3_i16_pat_1(ptr addrspace(1) %arg, ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #1 {
720bb:
721  %tid = call i32 @llvm.amdgcn.workitem.id.x()
722  %gep0 = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i32 %tid
723  %gep1 = getelementptr inbounds i16, ptr addrspace(1) %gep0, i32 3
724  %gep2 = getelementptr inbounds i16, ptr addrspace(1) %gep0, i32 8
725  %out.gep = getelementptr inbounds i16, ptr addrspace(1) %out, i32 %tid
726  %x = load i16, ptr addrspace(1) %gep0
727  %y = load i16, ptr addrspace(1) %gep1
728  %z = load i16, ptr addrspace(1) %gep2
729
730  %tmp0 = call i16 @smin16(i16 %x, i16 %y)
731  %tmp1 = call i16 @smax16(i16 %x, i16 %y)
732  %tmp2 = call i16 @smax16(i16 %tmp0, i16 %z)
733  %tmp3 = call i16 @smin16(i16 %tmp1, i16 %tmp2)
734  store i16 %tmp3, ptr addrspace(1) %out.gep
735  ret void
736}
737
738attributes #0 = { nounwind readnone }
739attributes #1 = { nounwind }
740attributes #2 = { nounwind readnone alwaysinline }
741