xref: /llvm-project/llvm/test/CodeGen/X86/smax.ll (revision 7b3bbd83c0c24087072ec5b22a76799ab31f87d5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefixes=X64,SSE
3; RUN: llc < %s -mtriple=x86_64-linux -mattr=avx | FileCheck %s --check-prefixes=X64,AVX,AVX1
4; RUN: llc < %s -mtriple=x86_64-linux -mattr=avx2 | FileCheck %s --check-prefixes=X64,AVX,AVX2
5; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86
6
7declare i8 @llvm.smax.i8(i8, i8)
8declare i16 @llvm.smax.i16(i16, i16)
9declare i24 @llvm.smax.i24(i24, i24)
10declare i32 @llvm.smax.i32(i32, i32)
11declare i64 @llvm.smax.i64(i64, i64)
12declare i128 @llvm.smax.i128(i128, i128)
13
14declare <1 x i32> @llvm.smax.v1i32(<1 x i32>, <1 x i32>)
15declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>)
16declare <3 x i32> @llvm.smax.v3i32(<3 x i32>, <3 x i32>)
17declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
18declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>)
19
20declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>)
21declare <16 x i8> @llvm.smax.v16i8(<16 x i8>, <16 x i8>)
22
23define i8 @test_i8(i8 %a, i8 %b) nounwind {
24; X64-LABEL: test_i8:
25; X64:       # %bb.0:
26; X64-NEXT:    movl %esi, %eax
27; X64-NEXT:    cmpb %al, %dil
28; X64-NEXT:    cmovgl %edi, %eax
29; X64-NEXT:    # kill: def $al killed $al killed $eax
30; X64-NEXT:    retq
31;
32; X86-LABEL: test_i8:
33; X86:       # %bb.0:
34; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
35; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
36; X86-NEXT:    cmpb %al, %cl
37; X86-NEXT:    cmovgl %ecx, %eax
38; X86-NEXT:    # kill: def $al killed $al killed $eax
39; X86-NEXT:    retl
40  %r = call i8 @llvm.smax.i8(i8 %a, i8 %b)
41  ret i8 %r
42}
43
44define i16 @test_i16(i16 %a, i16 %b) nounwind {
45; X64-LABEL: test_i16:
46; X64:       # %bb.0:
47; X64-NEXT:    movl %esi, %eax
48; X64-NEXT:    cmpw %ax, %di
49; X64-NEXT:    cmovgl %edi, %eax
50; X64-NEXT:    # kill: def $ax killed $ax killed $eax
51; X64-NEXT:    retq
52;
53; X86-LABEL: test_i16:
54; X86:       # %bb.0:
55; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
56; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
57; X86-NEXT:    cmpw %ax, %cx
58; X86-NEXT:    cmovgl %ecx, %eax
59; X86-NEXT:    # kill: def $ax killed $ax killed $eax
60; X86-NEXT:    retl
61  %r = call i16 @llvm.smax.i16(i16 %a, i16 %b)
62  ret i16 %r
63}
64
65define i24 @test_i24(i24 %a, i24 %b) nounwind {
66; X64-LABEL: test_i24:
67; X64:       # %bb.0:
68; X64-NEXT:    movl %edi, %eax
69; X64-NEXT:    shll $8, %esi
70; X64-NEXT:    sarl $8, %esi
71; X64-NEXT:    shll $8, %eax
72; X64-NEXT:    sarl $8, %eax
73; X64-NEXT:    cmpl %esi, %eax
74; X64-NEXT:    cmovlel %esi, %eax
75; X64-NEXT:    retq
76;
77; X86-LABEL: test_i24:
78; X86:       # %bb.0:
79; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
80; X86-NEXT:    shll $8, %ecx
81; X86-NEXT:    sarl $8, %ecx
82; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
83; X86-NEXT:    shll $8, %eax
84; X86-NEXT:    sarl $8, %eax
85; X86-NEXT:    cmpl %ecx, %eax
86; X86-NEXT:    cmovlel %ecx, %eax
87; X86-NEXT:    retl
88  %r = call i24 @llvm.smax.i24(i24 %a, i24 %b)
89  ret i24 %r
90}
91
92define i32 @test_i32(i32 %a, i32 %b) nounwind {
93; X64-LABEL: test_i32:
94; X64:       # %bb.0:
95; X64-NEXT:    movl %esi, %eax
96; X64-NEXT:    cmpl %esi, %edi
97; X64-NEXT:    cmovgl %edi, %eax
98; X64-NEXT:    retq
99;
100; X86-LABEL: test_i32:
101; X86:       # %bb.0:
102; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
103; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
104; X86-NEXT:    cmpl %eax, %ecx
105; X86-NEXT:    cmovgl %ecx, %eax
106; X86-NEXT:    retl
107  %r = call i32 @llvm.smax.i32(i32 %a, i32 %b)
108  ret i32 %r
109}
110
111define i64 @test_i64(i64 %a, i64 %b) nounwind {
112; X64-LABEL: test_i64:
113; X64:       # %bb.0:
114; X64-NEXT:    movq %rsi, %rax
115; X64-NEXT:    cmpq %rsi, %rdi
116; X64-NEXT:    cmovgq %rdi, %rax
117; X64-NEXT:    retq
118;
119; X86-LABEL: test_i64:
120; X86:       # %bb.0:
121; X86-NEXT:    pushl %edi
122; X86-NEXT:    pushl %esi
123; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
124; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
125; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
126; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
127; X86-NEXT:    cmpl %ecx, %eax
128; X86-NEXT:    movl %edx, %edi
129; X86-NEXT:    sbbl %esi, %edi
130; X86-NEXT:    cmovll %ecx, %eax
131; X86-NEXT:    cmovll %esi, %edx
132; X86-NEXT:    popl %esi
133; X86-NEXT:    popl %edi
134; X86-NEXT:    retl
135  %r = call i64 @llvm.smax.i64(i64 %a, i64 %b)
136  ret i64 %r
137}
138
139define i128 @test_i128(i128 %a, i128 %b) nounwind {
140; X64-LABEL: test_i128:
141; X64:       # %bb.0:
142; X64-NEXT:    movq %rdx, %rax
143; X64-NEXT:    cmpq %rdi, %rdx
144; X64-NEXT:    movq %rcx, %rdx
145; X64-NEXT:    sbbq %rsi, %rdx
146; X64-NEXT:    cmovlq %rdi, %rax
147; X64-NEXT:    cmovlq %rsi, %rcx
148; X64-NEXT:    movq %rcx, %rdx
149; X64-NEXT:    retq
150;
151; X86-LABEL: test_i128:
152; X86:       # %bb.0:
153; X86-NEXT:    pushl %ebp
154; X86-NEXT:    pushl %ebx
155; X86-NEXT:    pushl %edi
156; X86-NEXT:    pushl %esi
157; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
158; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
159; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
160; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
161; X86-NEXT:    cmpl %ebx, %edx
162; X86-NEXT:    movl %esi, %ebp
163; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %ebp
164; X86-NEXT:    movl %edi, %ebp
165; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %ebp
166; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
167; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
168; X86-NEXT:    movl %ecx, %eax
169; X86-NEXT:    sbbl %ebp, %eax
170; X86-NEXT:    cmovll %ebx, %edx
171; X86-NEXT:    cmovll {{[0-9]+}}(%esp), %esi
172; X86-NEXT:    cmovll {{[0-9]+}}(%esp), %edi
173; X86-NEXT:    cmovll %ebp, %ecx
174; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
175; X86-NEXT:    movl %ecx, 12(%eax)
176; X86-NEXT:    movl %edi, 8(%eax)
177; X86-NEXT:    movl %esi, 4(%eax)
178; X86-NEXT:    movl %edx, (%eax)
179; X86-NEXT:    popl %esi
180; X86-NEXT:    popl %edi
181; X86-NEXT:    popl %ebx
182; X86-NEXT:    popl %ebp
183; X86-NEXT:    retl $4
184  %r = call i128 @llvm.smax.i128(i128 %a, i128 %b)
185  ret i128 %r
186}
187
188define <1 x i32> @test_v1i32(<1 x i32> %a, <1 x i32> %b) nounwind {
189; X64-LABEL: test_v1i32:
190; X64:       # %bb.0:
191; X64-NEXT:    movl %esi, %eax
192; X64-NEXT:    cmpl %esi, %edi
193; X64-NEXT:    cmovgl %edi, %eax
194; X64-NEXT:    retq
195;
196; X86-LABEL: test_v1i32:
197; X86:       # %bb.0:
198; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
199; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
200; X86-NEXT:    cmpl %eax, %ecx
201; X86-NEXT:    cmovgl %ecx, %eax
202; X86-NEXT:    retl
203  %r = call <1 x i32> @llvm.smax.v1i32(<1 x i32> %a, <1 x i32> %b)
204  ret <1 x i32> %r
205}
206
207define <2 x i32> @test_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
208; SSE-LABEL: test_v2i32:
209; SSE:       # %bb.0:
210; SSE-NEXT:    movdqa %xmm0, %xmm2
211; SSE-NEXT:    pcmpgtd %xmm1, %xmm2
212; SSE-NEXT:    pand %xmm2, %xmm0
213; SSE-NEXT:    pandn %xmm1, %xmm2
214; SSE-NEXT:    por %xmm2, %xmm0
215; SSE-NEXT:    retq
216;
217; AVX-LABEL: test_v2i32:
218; AVX:       # %bb.0:
219; AVX-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
220; AVX-NEXT:    retq
221;
222; X86-LABEL: test_v2i32:
223; X86:       # %bb.0:
224; X86-NEXT:    pushl %esi
225; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
226; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
227; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
228; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
229; X86-NEXT:    cmpl %eax, %esi
230; X86-NEXT:    cmovgl %esi, %eax
231; X86-NEXT:    cmpl %edx, %ecx
232; X86-NEXT:    cmovgl %ecx, %edx
233; X86-NEXT:    popl %esi
234; X86-NEXT:    retl
235  %r = call <2 x i32> @llvm.smax.v2i32(<2 x i32> %a, <2 x i32> %b)
236  ret <2 x i32> %r
237}
238
239define <3 x i32> @test_v3i32(<3 x i32> %a, <3 x i32> %b) nounwind {
240; SSE-LABEL: test_v3i32:
241; SSE:       # %bb.0:
242; SSE-NEXT:    movdqa %xmm0, %xmm2
243; SSE-NEXT:    pcmpgtd %xmm1, %xmm2
244; SSE-NEXT:    pand %xmm2, %xmm0
245; SSE-NEXT:    pandn %xmm1, %xmm2
246; SSE-NEXT:    por %xmm2, %xmm0
247; SSE-NEXT:    retq
248;
249; AVX-LABEL: test_v3i32:
250; AVX:       # %bb.0:
251; AVX-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
252; AVX-NEXT:    retq
253;
254; X86-LABEL: test_v3i32:
255; X86:       # %bb.0:
256; X86-NEXT:    pushl %ebx
257; X86-NEXT:    pushl %edi
258; X86-NEXT:    pushl %esi
259; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
260; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
261; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
262; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
263; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
264; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
265; X86-NEXT:    cmpl %eax, %ebx
266; X86-NEXT:    cmovgl %ebx, %eax
267; X86-NEXT:    cmpl %edx, %edi
268; X86-NEXT:    cmovgl %edi, %edx
269; X86-NEXT:    cmpl %ecx, %esi
270; X86-NEXT:    cmovgl %esi, %ecx
271; X86-NEXT:    popl %esi
272; X86-NEXT:    popl %edi
273; X86-NEXT:    popl %ebx
274; X86-NEXT:    retl
275  %r = call <3 x i32> @llvm.smax.v3i32(<3 x i32> %a, <3 x i32> %b)
276  ret <3 x i32> %r
277}
278
279define <4 x i32> @test_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
280; SSE-LABEL: test_v4i32:
281; SSE:       # %bb.0:
282; SSE-NEXT:    movdqa %xmm0, %xmm2
283; SSE-NEXT:    pcmpgtd %xmm1, %xmm2
284; SSE-NEXT:    pand %xmm2, %xmm0
285; SSE-NEXT:    pandn %xmm1, %xmm2
286; SSE-NEXT:    por %xmm2, %xmm0
287; SSE-NEXT:    retq
288;
289; AVX-LABEL: test_v4i32:
290; AVX:       # %bb.0:
291; AVX-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
292; AVX-NEXT:    retq
293;
294; X86-LABEL: test_v4i32:
295; X86:       # %bb.0:
296; X86-NEXT:    pushl %edi
297; X86-NEXT:    pushl %esi
298; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
299; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
300; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
301; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
302; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
303; X86-NEXT:    cmpl %edi, %eax
304; X86-NEXT:    cmovgl %eax, %edi
305; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
306; X86-NEXT:    cmpl %esi, %eax
307; X86-NEXT:    cmovgl %eax, %esi
308; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
309; X86-NEXT:    cmpl %edx, %eax
310; X86-NEXT:    cmovgl %eax, %edx
311; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
312; X86-NEXT:    cmpl %ecx, %eax
313; X86-NEXT:    cmovgl %eax, %ecx
314; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
315; X86-NEXT:    movl %ecx, 12(%eax)
316; X86-NEXT:    movl %edx, 8(%eax)
317; X86-NEXT:    movl %esi, 4(%eax)
318; X86-NEXT:    movl %edi, (%eax)
319; X86-NEXT:    popl %esi
320; X86-NEXT:    popl %edi
321; X86-NEXT:    retl $4
322  %r = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> %b)
323  ret <4 x i32> %r
324}
325
326define <8 x i32> @test_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
327; SSE-LABEL: test_v8i32:
328; SSE:       # %bb.0:
329; SSE-NEXT:    movdqa %xmm0, %xmm4
330; SSE-NEXT:    pcmpgtd %xmm2, %xmm4
331; SSE-NEXT:    pand %xmm4, %xmm0
332; SSE-NEXT:    pandn %xmm2, %xmm4
333; SSE-NEXT:    por %xmm4, %xmm0
334; SSE-NEXT:    movdqa %xmm1, %xmm2
335; SSE-NEXT:    pcmpgtd %xmm3, %xmm2
336; SSE-NEXT:    pand %xmm2, %xmm1
337; SSE-NEXT:    pandn %xmm3, %xmm2
338; SSE-NEXT:    por %xmm2, %xmm1
339; SSE-NEXT:    retq
340;
341; AVX1-LABEL: test_v8i32:
342; AVX1:       # %bb.0:
343; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
344; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
345; AVX1-NEXT:    vpmaxsd %xmm2, %xmm3, %xmm2
346; AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
347; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
348; AVX1-NEXT:    retq
349;
350; AVX2-LABEL: test_v8i32:
351; AVX2:       # %bb.0:
352; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
353; AVX2-NEXT:    retq
354;
355; X86-LABEL: test_v8i32:
356; X86:       # %bb.0:
357; X86-NEXT:    pushl %ebp
358; X86-NEXT:    pushl %ebx
359; X86-NEXT:    pushl %edi
360; X86-NEXT:    pushl %esi
361; X86-NEXT:    subl $8, %esp
362; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
363; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
364; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
365; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
366; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
367; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
368; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
369; X86-NEXT:    cmpl %ebp, %eax
370; X86-NEXT:    cmovgl %eax, %ebp
371; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
372; X86-NEXT:    cmpl %ebx, %eax
373; X86-NEXT:    cmovgl %eax, %ebx
374; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
375; X86-NEXT:    cmpl %edi, %eax
376; X86-NEXT:    cmovgl %eax, %edi
377; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
378; X86-NEXT:    cmpl %esi, %eax
379; X86-NEXT:    cmovgl %eax, %esi
380; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
381; X86-NEXT:    cmpl %edx, %eax
382; X86-NEXT:    cmovgl %eax, %edx
383; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
384; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
385; X86-NEXT:    cmpl %ecx, %eax
386; X86-NEXT:    cmovgl %eax, %ecx
387; X86-NEXT:    movl %ecx, (%esp) # 4-byte Spill
388; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
389; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
390; X86-NEXT:    cmpl %edx, %eax
391; X86-NEXT:    cmovgl %eax, %edx
392; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
393; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
394; X86-NEXT:    cmpl %eax, %ecx
395; X86-NEXT:    cmovgl %ecx, %eax
396; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
397; X86-NEXT:    movl %eax, 28(%ecx)
398; X86-NEXT:    movl %edx, 24(%ecx)
399; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
400; X86-NEXT:    movl %eax, 20(%ecx)
401; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
402; X86-NEXT:    movl %eax, 16(%ecx)
403; X86-NEXT:    movl %esi, 12(%ecx)
404; X86-NEXT:    movl %edi, 8(%ecx)
405; X86-NEXT:    movl %ebx, 4(%ecx)
406; X86-NEXT:    movl %ebp, (%ecx)
407; X86-NEXT:    movl %ecx, %eax
408; X86-NEXT:    addl $8, %esp
409; X86-NEXT:    popl %esi
410; X86-NEXT:    popl %edi
411; X86-NEXT:    popl %ebx
412; X86-NEXT:    popl %ebp
413; X86-NEXT:    retl $4
414  %r = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %a, <8 x i32> %b)
415  ret <8 x i32> %r
416}
417
418define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
419; SSE-LABEL: test_v8i16:
420; SSE:       # %bb.0:
421; SSE-NEXT:    pmaxsw %xmm1, %xmm0
422; SSE-NEXT:    retq
423;
424; AVX-LABEL: test_v8i16:
425; AVX:       # %bb.0:
426; AVX-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
427; AVX-NEXT:    retq
428;
429; X86-LABEL: test_v8i16:
430; X86:       # %bb.0:
431; X86-NEXT:    pushl %ebp
432; X86-NEXT:    pushl %ebx
433; X86-NEXT:    pushl %edi
434; X86-NEXT:    pushl %esi
435; X86-NEXT:    subl $8, %esp
436; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
437; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
438; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
439; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
440; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
441; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
442; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
443; X86-NEXT:    cmpw %bp, %ax
444; X86-NEXT:    cmovgl %eax, %ebp
445; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
446; X86-NEXT:    cmpw %bx, %ax
447; X86-NEXT:    cmovgl %eax, %ebx
448; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
449; X86-NEXT:    cmpw %di, %ax
450; X86-NEXT:    cmovgl %eax, %edi
451; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
452; X86-NEXT:    cmpw %si, %ax
453; X86-NEXT:    cmovgl %eax, %esi
454; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
455; X86-NEXT:    cmpw %dx, %ax
456; X86-NEXT:    cmovgl %eax, %edx
457; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
458; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
459; X86-NEXT:    cmpw %cx, %ax
460; X86-NEXT:    cmovgl %eax, %ecx
461; X86-NEXT:    movl %ecx, (%esp) # 4-byte Spill
462; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
463; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
464; X86-NEXT:    cmpw %dx, %ax
465; X86-NEXT:    cmovgl %eax, %edx
466; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
467; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
468; X86-NEXT:    cmpw %ax, %cx
469; X86-NEXT:    cmovgl %ecx, %eax
470; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
471; X86-NEXT:    movw %ax, 14(%ecx)
472; X86-NEXT:    movw %dx, 12(%ecx)
473; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
474; X86-NEXT:    movw %ax, 10(%ecx)
475; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
476; X86-NEXT:    movw %ax, 8(%ecx)
477; X86-NEXT:    movw %si, 6(%ecx)
478; X86-NEXT:    movw %di, 4(%ecx)
479; X86-NEXT:    movw %bx, 2(%ecx)
480; X86-NEXT:    movw %bp, (%ecx)
481; X86-NEXT:    movl %ecx, %eax
482; X86-NEXT:    addl $8, %esp
483; X86-NEXT:    popl %esi
484; X86-NEXT:    popl %edi
485; X86-NEXT:    popl %ebx
486; X86-NEXT:    popl %ebp
487; X86-NEXT:    retl $4
488  %r = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %a, <8 x i16> %b)
489  ret <8 x i16> %r
490}
491
492define <16 x i8> @test_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
493; SSE-LABEL: test_v16i8:
494; SSE:       # %bb.0:
495; SSE-NEXT:    movdqa %xmm0, %xmm2
496; SSE-NEXT:    pcmpgtb %xmm1, %xmm2
497; SSE-NEXT:    pand %xmm2, %xmm0
498; SSE-NEXT:    pandn %xmm1, %xmm2
499; SSE-NEXT:    por %xmm2, %xmm0
500; SSE-NEXT:    retq
501;
502; AVX-LABEL: test_v16i8:
503; AVX:       # %bb.0:
504; AVX-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
505; AVX-NEXT:    retq
506;
507; X86-LABEL: test_v16i8:
508; X86:       # %bb.0:
509; X86-NEXT:    pushl %ebp
510; X86-NEXT:    pushl %ebx
511; X86-NEXT:    pushl %edi
512; X86-NEXT:    pushl %esi
513; X86-NEXT:    subl $40, %esp
514; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
515; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
516; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
517; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
518; X86-NEXT:    cmpb %bl, %al
519; X86-NEXT:    cmovgl %eax, %ebx
520; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
521; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
522; X86-NEXT:    cmpb %dl, %al
523; X86-NEXT:    cmovgl %eax, %edx
524; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
525; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
526; X86-NEXT:    cmpb %cl, %al
527; X86-NEXT:    cmovgl %eax, %ecx
528; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
529; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
530; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
531; X86-NEXT:    cmpb %cl, %al
532; X86-NEXT:    cmovgl %eax, %ecx
533; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
534; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
535; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
536; X86-NEXT:    cmpb %cl, %al
537; X86-NEXT:    cmovgl %eax, %ecx
538; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
539; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
540; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
541; X86-NEXT:    cmpb %cl, %al
542; X86-NEXT:    cmovgl %eax, %ecx
543; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
544; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
545; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
546; X86-NEXT:    cmpb %cl, %al
547; X86-NEXT:    cmovgl %eax, %ecx
548; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
549; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
550; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
551; X86-NEXT:    cmpb %cl, %al
552; X86-NEXT:    cmovgl %eax, %ecx
553; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
554; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
555; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
556; X86-NEXT:    cmpb %cl, %al
557; X86-NEXT:    cmovgl %eax, %ecx
558; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
559; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
560; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
561; X86-NEXT:    cmpb %cl, %al
562; X86-NEXT:    cmovgl %eax, %ecx
563; X86-NEXT:    movl %ecx, (%esp) # 4-byte Spill
564; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
565; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
566; X86-NEXT:    cmpb %cl, %al
567; X86-NEXT:    cmovgl %eax, %ecx
568; X86-NEXT:    movl %ecx, %ebp
569; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
570; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
571; X86-NEXT:    cmpb %cl, %al
572; X86-NEXT:    cmovgl %eax, %ecx
573; X86-NEXT:    movl %ecx, %edi
574; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
575; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
576; X86-NEXT:    cmpb %cl, %al
577; X86-NEXT:    cmovgl %eax, %ecx
578; X86-NEXT:    movl %ecx, %esi
579; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
580; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
581; X86-NEXT:    cmpb %bl, %al
582; X86-NEXT:    cmovgl %eax, %ebx
583; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
584; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
585; X86-NEXT:    cmpb %dl, %al
586; X86-NEXT:    cmovgl %eax, %edx
587; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
588; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
589; X86-NEXT:    cmpb %cl, %al
590; X86-NEXT:    cmovgl %eax, %ecx
591; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
592; X86-NEXT:    movb %cl, 15(%eax)
593; X86-NEXT:    movb %dl, 14(%eax)
594; X86-NEXT:    movb %bl, 13(%eax)
595; X86-NEXT:    movl %esi, %ecx
596; X86-NEXT:    movb %cl, 12(%eax)
597; X86-NEXT:    movl %edi, %ecx
598; X86-NEXT:    movb %cl, 11(%eax)
599; X86-NEXT:    movl %ebp, %ecx
600; X86-NEXT:    movb %cl, 10(%eax)
601; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
602; X86-NEXT:    movb %cl, 9(%eax)
603; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
604; X86-NEXT:    movb %cl, 8(%eax)
605; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
606; X86-NEXT:    movb %cl, 7(%eax)
607; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
608; X86-NEXT:    movb %cl, 6(%eax)
609; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
610; X86-NEXT:    movb %cl, 5(%eax)
611; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
612; X86-NEXT:    movb %cl, 4(%eax)
613; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
614; X86-NEXT:    movb %cl, 3(%eax)
615; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
616; X86-NEXT:    movb %cl, 2(%eax)
617; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
618; X86-NEXT:    movb %cl, 1(%eax)
619; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
620; X86-NEXT:    movb %cl, (%eax)
621; X86-NEXT:    addl $40, %esp
622; X86-NEXT:    popl %esi
623; X86-NEXT:    popl %edi
624; X86-NEXT:    popl %ebx
625; X86-NEXT:    popl %ebp
626; X86-NEXT:    retl $4
627  %r = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %a, <16 x i8> %b)
628  ret <16 x i8> %r
629}
630
631define i16 @test_signbits_i16(i16 %a, i16 %b) nounwind {
632; X64-LABEL: test_signbits_i16:
633; X64:       # %bb.0:
634; X64-NEXT:    movswl %si, %eax
635; X64-NEXT:    movswl %di, %ecx
636; X64-NEXT:    shrl $15, %ecx
637; X64-NEXT:    shrl $8, %eax
638; X64-NEXT:    cmpw %ax, %cx
639; X64-NEXT:    cmovgl %ecx, %eax
640; X64-NEXT:    # kill: def $ax killed $ax killed $eax
641; X64-NEXT:    retq
642;
643; X86-LABEL: test_signbits_i16:
644; X86:       # %bb.0:
645; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %ecx
646; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
647; X86-NEXT:    shrl $15, %eax
648; X86-NEXT:    cmpw %cx, %ax
649; X86-NEXT:    cmovlel %ecx, %eax
650; X86-NEXT:    # kill: def $ax killed $ax killed $eax
651; X86-NEXT:    retl
652  %ax = ashr i16 %a, 15
653  %bx = ashr i16 %b, 8
654  %r = call i16 @llvm.smax.i16(i16 %ax, i16 %bx)
655  ret i16 %r
656}
657
658define i32 @test_signbits_i32(i32 %a, i32 %b) nounwind {
659; X64-LABEL: test_signbits_i32:
660; X64:       # %bb.0:
661; X64-NEXT:    movl %esi, %eax
662; X64-NEXT:    sarl $16, %edi
663; X64-NEXT:    sarl $17, %eax
664; X64-NEXT:    cmpl %eax, %edi
665; X64-NEXT:    cmovgl %edi, %eax
666; X64-NEXT:    retq
667;
668; X86-LABEL: test_signbits_i32:
669; X86:       # %bb.0:
670; X86-NEXT:    movswl {{[0-9]+}}(%esp), %ecx
671; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
672; X86-NEXT:    sarl $17, %eax
673; X86-NEXT:    cmpl %eax, %ecx
674; X86-NEXT:    cmovgl %ecx, %eax
675; X86-NEXT:    retl
676  %ax = ashr i32 %a, 16
677  %bx = ashr i32 %b, 17
678  %r = call i32 @llvm.smax.i32(i32 %ax, i32 %bx)
679  ret i32 %r
680}
681
682define i64 @test_signbits_i64(i64 %a, i64 %b) nounwind {
683; X64-LABEL: test_signbits_i64:
684; X64:       # %bb.0:
685; X64-NEXT:    movq %rsi, %rax
686; X64-NEXT:    sarq $32, %rdi
687; X64-NEXT:    sarq $32, %rax
688; X64-NEXT:    cmpq %rax, %rdi
689; X64-NEXT:    cmovgq %rdi, %rax
690; X64-NEXT:    retq
691;
692; X86-LABEL: test_signbits_i64:
693; X86:       # %bb.0:
694; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
695; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
696; X86-NEXT:    cmpl %eax, %ecx
697; X86-NEXT:    cmovgl %ecx, %eax
698; X86-NEXT:    movl %eax, %edx
699; X86-NEXT:    sarl $31, %edx
700; X86-NEXT:    retl
701  %ax = ashr i64 %a, 32
702  %bx = ashr i64 %b, 32
703  %r = call i64 @llvm.smax.i64(i64 %ax, i64 %bx)
704  ret i64 %r
705}
706
707define i128 @test_signbits_i128(i128 %a, i128 %b) nounwind {
708; X64-LABEL: test_signbits_i128:
709; X64:       # %bb.0:
710; X64-NEXT:    movq %rcx, %rax
711; X64-NEXT:    sarq $28, %rax
712; X64-NEXT:    cmpq %rax, %rsi
713; X64-NEXT:    cmovgq %rsi, %rax
714; X64-NEXT:    movq %rax, %rdx
715; X64-NEXT:    sarq $63, %rdx
716; X64-NEXT:    retq
717;
718; X86-LABEL: test_signbits_i128:
719; X86:       # %bb.0:
720; X86-NEXT:    pushl %ebx
721; X86-NEXT:    pushl %edi
722; X86-NEXT:    pushl %esi
723; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
724; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
725; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
726; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
727; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
728; X86-NEXT:    shrdl $28, %edi, %ecx
729; X86-NEXT:    sarl $28, %edi
730; X86-NEXT:    cmpl %esi, %ecx
731; X86-NEXT:    movl %edi, %ebx
732; X86-NEXT:    sbbl %edx, %ebx
733; X86-NEXT:    cmovll %esi, %ecx
734; X86-NEXT:    cmovll %edx, %edi
735; X86-NEXT:    movl %edi, 4(%eax)
736; X86-NEXT:    sarl $31, %edi
737; X86-NEXT:    movl %ecx, (%eax)
738; X86-NEXT:    movl %edi, 12(%eax)
739; X86-NEXT:    movl %edi, 8(%eax)
740; X86-NEXT:    popl %esi
741; X86-NEXT:    popl %edi
742; X86-NEXT:    popl %ebx
743; X86-NEXT:    retl $4
744  %ax = ashr i128 %a, 64
745  %bx = ashr i128 %b, 92
746  %r = call i128 @llvm.smax.i128(i128 %ax, i128 %bx)
747  ret i128 %r
748}
749