xref: /llvm-project/llvm/test/CodeGen/X86/scmp.ll (revision 0ac2e42227ff565a8eab4c7c65c3ddb36aff3409)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64    | FileCheck %s --check-prefixes=X64,SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64,SSE,SSE4
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X64,AVX,AVX2
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X64,AVX,AVX512
6; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
7
8define i8 @scmp.8.8(i8 %x, i8 %y) nounwind {
9; X64-LABEL: scmp.8.8:
10; X64:       # %bb.0:
11; X64-NEXT:    cmpb %sil, %dil
12; X64-NEXT:    setl %cl
13; X64-NEXT:    setg %al
14; X64-NEXT:    subb %cl, %al
15; X64-NEXT:    retq
16;
17; X86-LABEL: scmp.8.8:
18; X86:       # %bb.0:
19; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
20; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %al
21; X86-NEXT:    setl %cl
22; X86-NEXT:    setg %al
23; X86-NEXT:    subb %cl, %al
24; X86-NEXT:    retl
25  %1 = call i8 @llvm.scmp(i8 %x, i8 %y)
26  ret i8 %1
27}
28
29define i8 @scmp.8.16(i16 %x, i16 %y) nounwind {
30; X64-LABEL: scmp.8.16:
31; X64:       # %bb.0:
32; X64-NEXT:    cmpw %si, %di
33; X64-NEXT:    setl %cl
34; X64-NEXT:    setg %al
35; X64-NEXT:    subb %cl, %al
36; X64-NEXT:    retq
37;
38; X86-LABEL: scmp.8.16:
39; X86:       # %bb.0:
40; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
41; X86-NEXT:    cmpw {{[0-9]+}}(%esp), %ax
42; X86-NEXT:    setl %cl
43; X86-NEXT:    setg %al
44; X86-NEXT:    subb %cl, %al
45; X86-NEXT:    retl
46  %1 = call i8 @llvm.scmp(i16 %x, i16 %y)
47  ret i8 %1
48}
49
50define i8 @scmp.8.32(i32 %x, i32 %y) nounwind {
51; X64-LABEL: scmp.8.32:
52; X64:       # %bb.0:
53; X64-NEXT:    cmpl %esi, %edi
54; X64-NEXT:    setl %cl
55; X64-NEXT:    setg %al
56; X64-NEXT:    subb %cl, %al
57; X64-NEXT:    retq
58;
59; X86-LABEL: scmp.8.32:
60; X86:       # %bb.0:
61; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
62; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
63; X86-NEXT:    setl %cl
64; X86-NEXT:    setg %al
65; X86-NEXT:    subb %cl, %al
66; X86-NEXT:    retl
67  %1 = call i8 @llvm.scmp(i32 %x, i32 %y)
68  ret i8 %1
69}
70
71define i8 @scmp.8.64(i64 %x, i64 %y) nounwind {
72; X64-LABEL: scmp.8.64:
73; X64:       # %bb.0:
74; X64-NEXT:    cmpq %rsi, %rdi
75; X64-NEXT:    setl %cl
76; X64-NEXT:    setg %al
77; X64-NEXT:    subb %cl, %al
78; X64-NEXT:    retq
79;
80; X86-LABEL: scmp.8.64:
81; X86:       # %bb.0:
82; X86-NEXT:    pushl %ebx
83; X86-NEXT:    pushl %edi
84; X86-NEXT:    pushl %esi
85; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
86; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
87; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
88; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
89; X86-NEXT:    cmpl %eax, %edx
90; X86-NEXT:    movl %esi, %edi
91; X86-NEXT:    sbbl %ecx, %edi
92; X86-NEXT:    setl %bl
93; X86-NEXT:    cmpl %edx, %eax
94; X86-NEXT:    sbbl %esi, %ecx
95; X86-NEXT:    setl %al
96; X86-NEXT:    subb %bl, %al
97; X86-NEXT:    popl %esi
98; X86-NEXT:    popl %edi
99; X86-NEXT:    popl %ebx
100; X86-NEXT:    retl
101  %1 = call i8 @llvm.scmp(i64 %x, i64 %y)
102  ret i8 %1
103}
104
105define i8 @scmp.8.128(i128 %x, i128 %y) nounwind {
106; X64-LABEL: scmp.8.128:
107; X64:       # %bb.0:
108; X64-NEXT:    cmpq %rdx, %rdi
109; X64-NEXT:    movq %rsi, %rax
110; X64-NEXT:    sbbq %rcx, %rax
111; X64-NEXT:    setl %r8b
112; X64-NEXT:    cmpq %rdi, %rdx
113; X64-NEXT:    sbbq %rsi, %rcx
114; X64-NEXT:    setl %al
115; X64-NEXT:    subb %r8b, %al
116; X64-NEXT:    retq
117;
118; X86-LABEL: scmp.8.128:
119; X86:       # %bb.0:
120; X86-NEXT:    pushl %ebp
121; X86-NEXT:    pushl %ebx
122; X86-NEXT:    pushl %edi
123; X86-NEXT:    pushl %esi
124; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
125; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
126; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
127; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
128; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
129; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %edi
130; X86-NEXT:    movl %ebx, %ebp
131; X86-NEXT:    sbbl %edx, %ebp
132; X86-NEXT:    movl %ecx, %ebp
133; X86-NEXT:    sbbl %eax, %ebp
134; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
135; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
136; X86-NEXT:    movl %esi, %ecx
137; X86-NEXT:    sbbl %ebp, %ecx
138; X86-NEXT:    setl %cl
139; X86-NEXT:    cmpl %edi, {{[0-9]+}}(%esp)
140; X86-NEXT:    sbbl %ebx, %edx
141; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %eax
142; X86-NEXT:    sbbl %esi, %ebp
143; X86-NEXT:    setl %al
144; X86-NEXT:    subb %cl, %al
145; X86-NEXT:    popl %esi
146; X86-NEXT:    popl %edi
147; X86-NEXT:    popl %ebx
148; X86-NEXT:    popl %ebp
149; X86-NEXT:    retl
150  %1 = call i8 @llvm.scmp(i128 %x, i128 %y)
151  ret i8 %1
152}
153
154define i32 @scmp.32.32(i32 %x, i32 %y) nounwind {
155; X64-LABEL: scmp.32.32:
156; X64:       # %bb.0:
157; X64-NEXT:    cmpl %esi, %edi
158; X64-NEXT:    setl %al
159; X64-NEXT:    setg %cl
160; X64-NEXT:    subb %al, %cl
161; X64-NEXT:    movsbl %cl, %eax
162; X64-NEXT:    retq
163;
164; X86-LABEL: scmp.32.32:
165; X86:       # %bb.0:
166; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
167; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
168; X86-NEXT:    setl %al
169; X86-NEXT:    setg %cl
170; X86-NEXT:    subb %al, %cl
171; X86-NEXT:    movsbl %cl, %eax
172; X86-NEXT:    retl
173  %1 = call i32 @llvm.scmp(i32 %x, i32 %y)
174  ret i32 %1
175}
176
177define i32 @scmp.32.64(i64 %x, i64 %y) nounwind {
178; X64-LABEL: scmp.32.64:
179; X64:       # %bb.0:
180; X64-NEXT:    cmpq %rsi, %rdi
181; X64-NEXT:    setl %al
182; X64-NEXT:    setg %cl
183; X64-NEXT:    subb %al, %cl
184; X64-NEXT:    movsbl %cl, %eax
185; X64-NEXT:    retq
186;
187; X86-LABEL: scmp.32.64:
188; X86:       # %bb.0:
189; X86-NEXT:    pushl %ebx
190; X86-NEXT:    pushl %edi
191; X86-NEXT:    pushl %esi
192; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
193; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
194; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
195; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
196; X86-NEXT:    cmpl %eax, %edx
197; X86-NEXT:    movl %esi, %edi
198; X86-NEXT:    sbbl %ecx, %edi
199; X86-NEXT:    setl %bl
200; X86-NEXT:    cmpl %edx, %eax
201; X86-NEXT:    sbbl %esi, %ecx
202; X86-NEXT:    setl %al
203; X86-NEXT:    subb %bl, %al
204; X86-NEXT:    movsbl %al, %eax
205; X86-NEXT:    popl %esi
206; X86-NEXT:    popl %edi
207; X86-NEXT:    popl %ebx
208; X86-NEXT:    retl
209  %1 = call i32 @llvm.scmp(i64 %x, i64 %y)
210  ret i32 %1
211}
212
213define i64 @scmp.64.64(i64 %x, i64 %y) nounwind {
214; X64-LABEL: scmp.64.64:
215; X64:       # %bb.0:
216; X64-NEXT:    cmpq %rsi, %rdi
217; X64-NEXT:    setl %al
218; X64-NEXT:    setg %cl
219; X64-NEXT:    subb %al, %cl
220; X64-NEXT:    movsbq %cl, %rax
221; X64-NEXT:    retq
222;
223; X86-LABEL: scmp.64.64:
224; X86:       # %bb.0:
225; X86-NEXT:    pushl %ebx
226; X86-NEXT:    pushl %edi
227; X86-NEXT:    pushl %esi
228; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
229; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
230; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
231; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
232; X86-NEXT:    cmpl %eax, %edx
233; X86-NEXT:    movl %esi, %edi
234; X86-NEXT:    sbbl %ecx, %edi
235; X86-NEXT:    setl %bl
236; X86-NEXT:    cmpl %edx, %eax
237; X86-NEXT:    sbbl %esi, %ecx
238; X86-NEXT:    setl %al
239; X86-NEXT:    subb %bl, %al
240; X86-NEXT:    movsbl %al, %eax
241; X86-NEXT:    movl %eax, %edx
242; X86-NEXT:    sarl $31, %edx
243; X86-NEXT:    popl %esi
244; X86-NEXT:    popl %edi
245; X86-NEXT:    popl %ebx
246; X86-NEXT:    retl
247  %1 = call i64 @llvm.scmp(i64 %x, i64 %y)
248  ret i64 %1
249}
250
251define i4 @scmp_narrow_result(i32 %x, i32 %y) nounwind {
252; X64-LABEL: scmp_narrow_result:
253; X64:       # %bb.0:
254; X64-NEXT:    cmpl %esi, %edi
255; X64-NEXT:    setl %cl
256; X64-NEXT:    setg %al
257; X64-NEXT:    subb %cl, %al
258; X64-NEXT:    retq
259;
260; X86-LABEL: scmp_narrow_result:
261; X86:       # %bb.0:
262; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
263; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
264; X86-NEXT:    setl %cl
265; X86-NEXT:    setg %al
266; X86-NEXT:    subb %cl, %al
267; X86-NEXT:    retl
268  %1 = call i4 @llvm.scmp(i32 %x, i32 %y)
269  ret i4 %1
270}
271
272define i8 @scmp_narrow_op(i62 %x, i62 %y) nounwind {
273; X64-LABEL: scmp_narrow_op:
274; X64:       # %bb.0:
275; X64-NEXT:    shlq $2, %rsi
276; X64-NEXT:    sarq $2, %rsi
277; X64-NEXT:    shlq $2, %rdi
278; X64-NEXT:    sarq $2, %rdi
279; X64-NEXT:    cmpq %rsi, %rdi
280; X64-NEXT:    setl %cl
281; X64-NEXT:    setg %al
282; X64-NEXT:    subb %cl, %al
283; X64-NEXT:    retq
284;
285; X86-LABEL: scmp_narrow_op:
286; X86:       # %bb.0:
287; X86-NEXT:    pushl %ebx
288; X86-NEXT:    pushl %edi
289; X86-NEXT:    pushl %esi
290; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
291; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
292; X86-NEXT:    shll $2, %eax
293; X86-NEXT:    sarl $2, %eax
294; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
295; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
296; X86-NEXT:    shll $2, %esi
297; X86-NEXT:    sarl $2, %esi
298; X86-NEXT:    cmpl %ecx, %edx
299; X86-NEXT:    movl %esi, %edi
300; X86-NEXT:    sbbl %eax, %edi
301; X86-NEXT:    setl %bl
302; X86-NEXT:    cmpl %edx, %ecx
303; X86-NEXT:    sbbl %esi, %eax
304; X86-NEXT:    setl %al
305; X86-NEXT:    subb %bl, %al
306; X86-NEXT:    popl %esi
307; X86-NEXT:    popl %edi
308; X86-NEXT:    popl %ebx
309; X86-NEXT:    retl
310  %1 = call i8 @llvm.scmp(i62 %x, i62 %y)
311  ret i8 %1
312}
313
314define i141 @scmp_wide_result(i32 %x, i32 %y) nounwind {
315; X64-LABEL: scmp_wide_result:
316; X64:       # %bb.0:
317; X64-NEXT:    cmpl %esi, %edi
318; X64-NEXT:    setl %al
319; X64-NEXT:    setg %cl
320; X64-NEXT:    subb %al, %cl
321; X64-NEXT:    movsbq %cl, %rax
322; X64-NEXT:    movq %rax, %rdx
323; X64-NEXT:    sarq $63, %rdx
324; X64-NEXT:    movl %edx, %ecx
325; X64-NEXT:    andl $8191, %ecx # imm = 0x1FFF
326; X64-NEXT:    retq
327;
328; X86-LABEL: scmp_wide_result:
329; X86:       # %bb.0:
330; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
331; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
332; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %ecx
333; X86-NEXT:    setl %cl
334; X86-NEXT:    setg %dl
335; X86-NEXT:    subb %cl, %dl
336; X86-NEXT:    movsbl %dl, %ecx
337; X86-NEXT:    movl %ecx, (%eax)
338; X86-NEXT:    sarl $31, %ecx
339; X86-NEXT:    movl %ecx, 12(%eax)
340; X86-NEXT:    movl %ecx, 8(%eax)
341; X86-NEXT:    movl %ecx, 4(%eax)
342; X86-NEXT:    andl $8191, %ecx # imm = 0x1FFF
343; X86-NEXT:    movw %cx, 16(%eax)
344; X86-NEXT:    retl $4
345  %1 = call i141 @llvm.scmp(i32 %x, i32 %y)
346  ret i141 %1
347}
348
349define i8 @scmp_wide_op(i109 %x, i109 %y) nounwind {
350; X64-LABEL: scmp_wide_op:
351; X64:       # %bb.0:
352; X64-NEXT:    shlq $19, %rcx
353; X64-NEXT:    sarq $19, %rcx
354; X64-NEXT:    shlq $19, %rsi
355; X64-NEXT:    sarq $19, %rsi
356; X64-NEXT:    cmpq %rdx, %rdi
357; X64-NEXT:    movq %rsi, %rax
358; X64-NEXT:    sbbq %rcx, %rax
359; X64-NEXT:    setl %r8b
360; X64-NEXT:    cmpq %rdi, %rdx
361; X64-NEXT:    sbbq %rsi, %rcx
362; X64-NEXT:    setl %al
363; X64-NEXT:    subb %r8b, %al
364; X64-NEXT:    retq
365;
366; X86-LABEL: scmp_wide_op:
367; X86:       # %bb.0:
368; X86-NEXT:    pushl %ebp
369; X86-NEXT:    pushl %ebx
370; X86-NEXT:    pushl %edi
371; X86-NEXT:    pushl %esi
372; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
373; X86-NEXT:    shll $19, %eax
374; X86-NEXT:    sarl $19, %eax
375; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
376; X86-NEXT:    shll $19, %ecx
377; X86-NEXT:    sarl $19, %ecx
378; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
379; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
380; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
381; X86-NEXT:    cmpl %esi, {{[0-9]+}}(%esp)
382; X86-NEXT:    sbbl %edx, %ebp
383; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
384; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
385; X86-NEXT:    movl %edi, %esi
386; X86-NEXT:    sbbl %ebp, %esi
387; X86-NEXT:    movl %ecx, %esi
388; X86-NEXT:    sbbl %eax, %esi
389; X86-NEXT:    setl %bl
390; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
391; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %esi
392; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edx
393; X86-NEXT:    sbbl %edi, %ebp
394; X86-NEXT:    sbbl %ecx, %eax
395; X86-NEXT:    setl %al
396; X86-NEXT:    subb %bl, %al
397; X86-NEXT:    popl %esi
398; X86-NEXT:    popl %edi
399; X86-NEXT:    popl %ebx
400; X86-NEXT:    popl %ebp
401; X86-NEXT:    retl
402  %1 = call i8 @llvm.scmp(i109 %x, i109 %y)
403  ret i8 %1
404}
405
406define i41 @scmp_uncommon_types(i7 %x, i7 %y) nounwind {
407; X64-LABEL: scmp_uncommon_types:
408; X64:       # %bb.0:
409; X64-NEXT:    addb %sil, %sil
410; X64-NEXT:    sarb %sil
411; X64-NEXT:    addb %dil, %dil
412; X64-NEXT:    sarb %dil
413; X64-NEXT:    cmpb %sil, %dil
414; X64-NEXT:    setl %al
415; X64-NEXT:    setg %cl
416; X64-NEXT:    subb %al, %cl
417; X64-NEXT:    movsbq %cl, %rax
418; X64-NEXT:    retq
419;
420; X86-LABEL: scmp_uncommon_types:
421; X86:       # %bb.0:
422; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
423; X86-NEXT:    addb %al, %al
424; X86-NEXT:    sarb %al
425; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
426; X86-NEXT:    addb %cl, %cl
427; X86-NEXT:    sarb %cl
428; X86-NEXT:    cmpb %al, %cl
429; X86-NEXT:    setl %al
430; X86-NEXT:    setg %cl
431; X86-NEXT:    subb %al, %cl
432; X86-NEXT:    movsbl %cl, %eax
433; X86-NEXT:    movl %eax, %edx
434; X86-NEXT:    sarl $31, %edx
435; X86-NEXT:    retl
436  %1 = call i41 @llvm.scmp(i7 %x, i7 %y)
437  ret i41 %1
438}
439
440define <4 x i32> @scmp_normal_vectors(<4 x i32> %x, <4 x i32> %y) nounwind {
441; SSE-LABEL: scmp_normal_vectors:
442; SSE:       # %bb.0:
443; SSE-NEXT:    movdqa %xmm0, %xmm2
444; SSE-NEXT:    pcmpgtd %xmm1, %xmm2
445; SSE-NEXT:    pcmpgtd %xmm0, %xmm1
446; SSE-NEXT:    psubd %xmm2, %xmm1
447; SSE-NEXT:    movdqa %xmm1, %xmm0
448; SSE-NEXT:    retq
449;
450; AVX2-LABEL: scmp_normal_vectors:
451; AVX2:       # %bb.0:
452; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm2
453; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
454; AVX2-NEXT:    vpsubd %xmm2, %xmm0, %xmm0
455; AVX2-NEXT:    retq
456;
457; AVX512-LABEL: scmp_normal_vectors:
458; AVX512:       # %bb.0:
459; AVX512-NEXT:    vpcmpgtd %xmm0, %xmm1, %k1
460; AVX512-NEXT:    vpcmpgtd %xmm1, %xmm0, %k2
461; AVX512-NEXT:    vpbroadcastd {{.*#+}} xmm0 {%k2} {z} = [1,1,1,1]
462; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
463; AVX512-NEXT:    vmovdqa32 %xmm1, %xmm0 {%k1}
464; AVX512-NEXT:    retq
465;
466; X86-LABEL: scmp_normal_vectors:
467; X86:       # %bb.0:
468; X86-NEXT:    pushl %ebx
469; X86-NEXT:    pushl %edi
470; X86-NEXT:    pushl %esi
471; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
472; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
473; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
474; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
475; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
476; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %edx
477; X86-NEXT:    setl %dl
478; X86-NEXT:    setg %dh
479; X86-NEXT:    subb %dl, %dh
480; X86-NEXT:    movsbl %dh, %edx
481; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %edi
482; X86-NEXT:    setl %bl
483; X86-NEXT:    setg %bh
484; X86-NEXT:    subb %bl, %bh
485; X86-NEXT:    movsbl %bh, %edi
486; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %esi
487; X86-NEXT:    setl %bl
488; X86-NEXT:    setg %bh
489; X86-NEXT:    subb %bl, %bh
490; X86-NEXT:    movsbl %bh, %esi
491; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %ecx
492; X86-NEXT:    setl %cl
493; X86-NEXT:    setg %ch
494; X86-NEXT:    subb %cl, %ch
495; X86-NEXT:    movsbl %ch, %ecx
496; X86-NEXT:    movl %ecx, 12(%eax)
497; X86-NEXT:    movl %esi, 8(%eax)
498; X86-NEXT:    movl %edi, 4(%eax)
499; X86-NEXT:    movl %edx, (%eax)
500; X86-NEXT:    popl %esi
501; X86-NEXT:    popl %edi
502; X86-NEXT:    popl %ebx
503; X86-NEXT:    retl $4
504  %1 = call <4 x i32> @llvm.scmp(<4 x i32> %x, <4 x i32> %y)
505  ret <4 x i32> %1
506}
507
508define <4 x i8> @scmp_narrow_vec_result(<4 x i32> %x, <4 x i32> %y) nounwind {
509; SSE2-LABEL: scmp_narrow_vec_result:
510; SSE2:       # %bb.0:
511; SSE2-NEXT:    movd %xmm1, %eax
512; SSE2-NEXT:    movd %xmm0, %ecx
513; SSE2-NEXT:    cmpl %eax, %ecx
514; SSE2-NEXT:    setl %al
515; SSE2-NEXT:    setg %cl
516; SSE2-NEXT:    subb %al, %cl
517; SSE2-NEXT:    movzbl %cl, %eax
518; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1]
519; SSE2-NEXT:    movd %xmm2, %ecx
520; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
521; SSE2-NEXT:    movd %xmm2, %edx
522; SSE2-NEXT:    cmpl %ecx, %edx
523; SSE2-NEXT:    setl %cl
524; SSE2-NEXT:    setg %dl
525; SSE2-NEXT:    subb %cl, %dl
526; SSE2-NEXT:    movzbl %dl, %ecx
527; SSE2-NEXT:    shll $8, %ecx
528; SSE2-NEXT:    orl %eax, %ecx
529; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
530; SSE2-NEXT:    movd %xmm2, %eax
531; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
532; SSE2-NEXT:    movd %xmm2, %edx
533; SSE2-NEXT:    cmpl %eax, %edx
534; SSE2-NEXT:    setl %al
535; SSE2-NEXT:    setg %dl
536; SSE2-NEXT:    subb %al, %dl
537; SSE2-NEXT:    movzbl %dl, %eax
538; SSE2-NEXT:    shll $16, %eax
539; SSE2-NEXT:    orl %ecx, %eax
540; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3]
541; SSE2-NEXT:    movd %xmm1, %ecx
542; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
543; SSE2-NEXT:    movd %xmm0, %edx
544; SSE2-NEXT:    cmpl %ecx, %edx
545; SSE2-NEXT:    setl %cl
546; SSE2-NEXT:    setg %dl
547; SSE2-NEXT:    subb %cl, %dl
548; SSE2-NEXT:    movzbl %dl, %ecx
549; SSE2-NEXT:    shll $24, %ecx
550; SSE2-NEXT:    orl %eax, %ecx
551; SSE2-NEXT:    movd %ecx, %xmm0
552; SSE2-NEXT:    retq
553;
554; SSE4-LABEL: scmp_narrow_vec_result:
555; SSE4:       # %bb.0:
556; SSE4-NEXT:    pextrd $1, %xmm1, %eax
557; SSE4-NEXT:    pextrd $1, %xmm0, %ecx
558; SSE4-NEXT:    cmpl %eax, %ecx
559; SSE4-NEXT:    setl %al
560; SSE4-NEXT:    setg %cl
561; SSE4-NEXT:    subb %al, %cl
562; SSE4-NEXT:    movzbl %cl, %eax
563; SSE4-NEXT:    movd %xmm1, %ecx
564; SSE4-NEXT:    movd %xmm0, %edx
565; SSE4-NEXT:    cmpl %ecx, %edx
566; SSE4-NEXT:    setl %cl
567; SSE4-NEXT:    setg %dl
568; SSE4-NEXT:    subb %cl, %dl
569; SSE4-NEXT:    movzbl %dl, %ecx
570; SSE4-NEXT:    movd %ecx, %xmm2
571; SSE4-NEXT:    pinsrb $1, %eax, %xmm2
572; SSE4-NEXT:    pextrd $2, %xmm1, %eax
573; SSE4-NEXT:    pextrd $2, %xmm0, %ecx
574; SSE4-NEXT:    cmpl %eax, %ecx
575; SSE4-NEXT:    setl %al
576; SSE4-NEXT:    setg %cl
577; SSE4-NEXT:    subb %al, %cl
578; SSE4-NEXT:    movzbl %cl, %eax
579; SSE4-NEXT:    pinsrb $2, %eax, %xmm2
580; SSE4-NEXT:    pextrd $3, %xmm1, %eax
581; SSE4-NEXT:    pextrd $3, %xmm0, %ecx
582; SSE4-NEXT:    cmpl %eax, %ecx
583; SSE4-NEXT:    setl %al
584; SSE4-NEXT:    setg %cl
585; SSE4-NEXT:    subb %al, %cl
586; SSE4-NEXT:    movzbl %cl, %eax
587; SSE4-NEXT:    pinsrb $3, %eax, %xmm2
588; SSE4-NEXT:    movdqa %xmm2, %xmm0
589; SSE4-NEXT:    retq
590;
591; AVX-LABEL: scmp_narrow_vec_result:
592; AVX:       # %bb.0:
593; AVX-NEXT:    vpextrd $1, %xmm1, %eax
594; AVX-NEXT:    vpextrd $1, %xmm0, %ecx
595; AVX-NEXT:    cmpl %eax, %ecx
596; AVX-NEXT:    setl %al
597; AVX-NEXT:    setg %cl
598; AVX-NEXT:    subb %al, %cl
599; AVX-NEXT:    vmovd %xmm1, %eax
600; AVX-NEXT:    vmovd %xmm0, %edx
601; AVX-NEXT:    cmpl %eax, %edx
602; AVX-NEXT:    setl %al
603; AVX-NEXT:    setg %dl
604; AVX-NEXT:    subb %al, %dl
605; AVX-NEXT:    vmovd %edx, %xmm2
606; AVX-NEXT:    vpinsrb $1, %ecx, %xmm2, %xmm2
607; AVX-NEXT:    vpextrd $2, %xmm1, %eax
608; AVX-NEXT:    vpextrd $2, %xmm0, %ecx
609; AVX-NEXT:    cmpl %eax, %ecx
610; AVX-NEXT:    setl %al
611; AVX-NEXT:    setg %cl
612; AVX-NEXT:    subb %al, %cl
613; AVX-NEXT:    vpinsrb $2, %ecx, %xmm2, %xmm2
614; AVX-NEXT:    vpextrd $3, %xmm1, %eax
615; AVX-NEXT:    vpextrd $3, %xmm0, %ecx
616; AVX-NEXT:    cmpl %eax, %ecx
617; AVX-NEXT:    setl %al
618; AVX-NEXT:    setg %cl
619; AVX-NEXT:    subb %al, %cl
620; AVX-NEXT:    vpinsrb $3, %ecx, %xmm2, %xmm0
621; AVX-NEXT:    retq
622;
623; X86-LABEL: scmp_narrow_vec_result:
624; X86:       # %bb.0:
625; X86-NEXT:    pushl %ebx
626; X86-NEXT:    pushl %edi
627; X86-NEXT:    pushl %esi
628; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
629; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
630; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
631; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
632; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
633; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %ecx
634; X86-NEXT:    setl %ch
635; X86-NEXT:    setg %cl
636; X86-NEXT:    subb %ch, %cl
637; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %edi
638; X86-NEXT:    setl %ch
639; X86-NEXT:    setg %bl
640; X86-NEXT:    subb %ch, %bl
641; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %esi
642; X86-NEXT:    setl %ch
643; X86-NEXT:    setg %bh
644; X86-NEXT:    subb %ch, %bh
645; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %edx
646; X86-NEXT:    setl %dl
647; X86-NEXT:    setg %ch
648; X86-NEXT:    subb %dl, %ch
649; X86-NEXT:    movb %ch, 3(%eax)
650; X86-NEXT:    movb %bh, 2(%eax)
651; X86-NEXT:    movb %bl, 1(%eax)
652; X86-NEXT:    movb %cl, (%eax)
653; X86-NEXT:    popl %esi
654; X86-NEXT:    popl %edi
655; X86-NEXT:    popl %ebx
656; X86-NEXT:    retl $4
657  %1 = call <4 x i8> @llvm.scmp(<4 x i32> %x, <4 x i32> %y)
658  ret <4 x i8> %1
659}
660
661define <4 x i32> @scmp_narrow_vec_op(<4 x i8> %x, <4 x i8> %y) nounwind {
662; SSE2-LABEL: scmp_narrow_vec_op:
663; SSE2:       # %bb.0:
664; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
665; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
666; SSE2-NEXT:    psrad $24, %xmm1
667; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
668; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
669; SSE2-NEXT:    psrad $24, %xmm0
670; SSE2-NEXT:    movdqa %xmm0, %xmm2
671; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
672; SSE2-NEXT:    pcmpgtd %xmm0, %xmm1
673; SSE2-NEXT:    psubd %xmm2, %xmm1
674; SSE2-NEXT:    movdqa %xmm1, %xmm0
675; SSE2-NEXT:    retq
676;
677; SSE4-LABEL: scmp_narrow_vec_op:
678; SSE4:       # %bb.0:
679; SSE4-NEXT:    pmovsxbd %xmm1, %xmm1
680; SSE4-NEXT:    pmovsxbd %xmm0, %xmm0
681; SSE4-NEXT:    movdqa %xmm0, %xmm2
682; SSE4-NEXT:    pcmpgtd %xmm1, %xmm2
683; SSE4-NEXT:    pcmpgtd %xmm0, %xmm1
684; SSE4-NEXT:    psubd %xmm2, %xmm1
685; SSE4-NEXT:    movdqa %xmm1, %xmm0
686; SSE4-NEXT:    retq
687;
688; AVX2-LABEL: scmp_narrow_vec_op:
689; AVX2:       # %bb.0:
690; AVX2-NEXT:    vpmovsxbd %xmm1, %xmm1
691; AVX2-NEXT:    vpmovsxbd %xmm0, %xmm0
692; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm2
693; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
694; AVX2-NEXT:    vpsubd %xmm2, %xmm0, %xmm0
695; AVX2-NEXT:    retq
696;
697; AVX512-LABEL: scmp_narrow_vec_op:
698; AVX512:       # %bb.0:
699; AVX512-NEXT:    vpmovsxbd %xmm0, %xmm0
700; AVX512-NEXT:    vpmovsxbd %xmm1, %xmm1
701; AVX512-NEXT:    vpcmpgtd %xmm0, %xmm1, %k1
702; AVX512-NEXT:    vpcmpgtd %xmm1, %xmm0, %k2
703; AVX512-NEXT:    vpbroadcastd {{.*#+}} xmm0 {%k2} {z} = [1,1,1,1]
704; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
705; AVX512-NEXT:    vmovdqa32 %xmm1, %xmm0 {%k1}
706; AVX512-NEXT:    retq
707;
708; X86-LABEL: scmp_narrow_vec_op:
709; X86:       # %bb.0:
710; X86-NEXT:    pushl %ebx
711; X86-NEXT:    pushl %edi
712; X86-NEXT:    pushl %esi
713; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
714; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
715; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
716; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
717; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
718; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %dl
719; X86-NEXT:    setl %dl
720; X86-NEXT:    setg %dh
721; X86-NEXT:    subb %dl, %dh
722; X86-NEXT:    movsbl %dh, %edx
723; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %bl
724; X86-NEXT:    setl %bl
725; X86-NEXT:    setg %bh
726; X86-NEXT:    subb %bl, %bh
727; X86-NEXT:    movsbl %bh, %esi
728; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %ch
729; X86-NEXT:    setl %ch
730; X86-NEXT:    setg %bl
731; X86-NEXT:    subb %ch, %bl
732; X86-NEXT:    movsbl %bl, %edi
733; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %cl
734; X86-NEXT:    setl %cl
735; X86-NEXT:    setg %ch
736; X86-NEXT:    subb %cl, %ch
737; X86-NEXT:    movsbl %ch, %ecx
738; X86-NEXT:    movl %ecx, 12(%eax)
739; X86-NEXT:    movl %edi, 8(%eax)
740; X86-NEXT:    movl %esi, 4(%eax)
741; X86-NEXT:    movl %edx, (%eax)
742; X86-NEXT:    popl %esi
743; X86-NEXT:    popl %edi
744; X86-NEXT:    popl %ebx
745; X86-NEXT:    retl $4
746  %1 = call <4 x i32> @llvm.scmp(<4 x i8> %x, <4 x i8> %y)
747  ret <4 x i32> %1
748}
749
750define <16 x i32> @scmp_wide_vec_result(<16 x i8> %x, <16 x i8> %y) nounwind {
751; SSE2-LABEL: scmp_wide_vec_result:
752; SSE2:       # %bb.0:
753; SSE2-NEXT:    movdqa %xmm1, %xmm2
754; SSE2-NEXT:    movdqa %xmm0, %xmm3
755; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
756; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
757; SSE2-NEXT:    psrad $24, %xmm0
758; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
759; SSE2-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
760; SSE2-NEXT:    psrad $24, %xmm5
761; SSE2-NEXT:    movdqa %xmm5, %xmm6
762; SSE2-NEXT:    pcmpgtd %xmm0, %xmm6
763; SSE2-NEXT:    pcmpgtd %xmm5, %xmm0
764; SSE2-NEXT:    psubd %xmm6, %xmm0
765; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
766; SSE2-NEXT:    psrad $24, %xmm1
767; SSE2-NEXT:    punpckhwd {{.*#+}} xmm4 = xmm4[4,4,5,5,6,6,7,7]
768; SSE2-NEXT:    psrad $24, %xmm4
769; SSE2-NEXT:    movdqa %xmm4, %xmm5
770; SSE2-NEXT:    pcmpgtd %xmm1, %xmm5
771; SSE2-NEXT:    pcmpgtd %xmm4, %xmm1
772; SSE2-NEXT:    psubd %xmm5, %xmm1
773; SSE2-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm2[8],xmm4[9],xmm2[9],xmm4[10],xmm2[10],xmm4[11],xmm2[11],xmm4[12],xmm2[12],xmm4[13],xmm2[13],xmm4[14],xmm2[14],xmm4[15],xmm2[15]
774; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
775; SSE2-NEXT:    psrad $24, %xmm2
776; SSE2-NEXT:    punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm3[8],xmm5[9],xmm3[9],xmm5[10],xmm3[10],xmm5[11],xmm3[11],xmm5[12],xmm3[12],xmm5[13],xmm3[13],xmm5[14],xmm3[14],xmm5[15],xmm3[15]
777; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3]
778; SSE2-NEXT:    psrad $24, %xmm3
779; SSE2-NEXT:    movdqa %xmm3, %xmm6
780; SSE2-NEXT:    pcmpgtd %xmm2, %xmm6
781; SSE2-NEXT:    pcmpgtd %xmm3, %xmm2
782; SSE2-NEXT:    psubd %xmm6, %xmm2
783; SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
784; SSE2-NEXT:    psrad $24, %xmm3
785; SSE2-NEXT:    punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
786; SSE2-NEXT:    psrad $24, %xmm4
787; SSE2-NEXT:    movdqa %xmm4, %xmm5
788; SSE2-NEXT:    pcmpgtd %xmm3, %xmm5
789; SSE2-NEXT:    pcmpgtd %xmm4, %xmm3
790; SSE2-NEXT:    psubd %xmm5, %xmm3
791; SSE2-NEXT:    retq
792;
793; SSE4-LABEL: scmp_wide_vec_result:
794; SSE4:       # %bb.0:
795; SSE4-NEXT:    movdqa %xmm0, %xmm4
796; SSE4-NEXT:    pmovsxbd %xmm1, %xmm0
797; SSE4-NEXT:    pmovsxbd %xmm4, %xmm2
798; SSE4-NEXT:    movdqa %xmm2, %xmm3
799; SSE4-NEXT:    pcmpgtd %xmm0, %xmm3
800; SSE4-NEXT:    pcmpgtd %xmm2, %xmm0
801; SSE4-NEXT:    psubd %xmm3, %xmm0
802; SSE4-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1]
803; SSE4-NEXT:    pmovsxbd %xmm2, %xmm5
804; SSE4-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[1,1,1,1]
805; SSE4-NEXT:    pmovsxbd %xmm2, %xmm2
806; SSE4-NEXT:    movdqa %xmm2, %xmm3
807; SSE4-NEXT:    pcmpgtd %xmm5, %xmm3
808; SSE4-NEXT:    pcmpgtd %xmm2, %xmm5
809; SSE4-NEXT:    psubd %xmm3, %xmm5
810; SSE4-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
811; SSE4-NEXT:    pmovsxbd %xmm2, %xmm2
812; SSE4-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[2,3,2,3]
813; SSE4-NEXT:    pmovsxbd %xmm3, %xmm3
814; SSE4-NEXT:    movdqa %xmm3, %xmm6
815; SSE4-NEXT:    pcmpgtd %xmm2, %xmm6
816; SSE4-NEXT:    pcmpgtd %xmm3, %xmm2
817; SSE4-NEXT:    psubd %xmm6, %xmm2
818; SSE4-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3]
819; SSE4-NEXT:    pmovsxbd %xmm1, %xmm3
820; SSE4-NEXT:    pshufd {{.*#+}} xmm1 = xmm4[3,3,3,3]
821; SSE4-NEXT:    pmovsxbd %xmm1, %xmm1
822; SSE4-NEXT:    movdqa %xmm1, %xmm4
823; SSE4-NEXT:    pcmpgtd %xmm3, %xmm4
824; SSE4-NEXT:    pcmpgtd %xmm1, %xmm3
825; SSE4-NEXT:    psubd %xmm4, %xmm3
826; SSE4-NEXT:    movdqa %xmm5, %xmm1
827; SSE4-NEXT:    retq
828;
829; AVX2-LABEL: scmp_wide_vec_result:
830; AVX2:       # %bb.0:
831; AVX2-NEXT:    vpmovsxbd %xmm1, %ymm2
832; AVX2-NEXT:    vpmovsxbd %xmm0, %ymm3
833; AVX2-NEXT:    vpcmpgtd %ymm2, %ymm3, %ymm4
834; AVX2-NEXT:    vpcmpgtd %ymm3, %ymm2, %ymm2
835; AVX2-NEXT:    vpsubd %ymm4, %ymm2, %ymm2
836; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
837; AVX2-NEXT:    vpmovsxbd %xmm1, %ymm1
838; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
839; AVX2-NEXT:    vpmovsxbd %xmm0, %ymm0
840; AVX2-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm3
841; AVX2-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
842; AVX2-NEXT:    vpsubd %ymm3, %ymm0, %ymm1
843; AVX2-NEXT:    vmovdqa %ymm2, %ymm0
844; AVX2-NEXT:    retq
845;
846; AVX512-LABEL: scmp_wide_vec_result:
847; AVX512:       # %bb.0:
848; AVX512-NEXT:    vpcmpgtb %xmm0, %xmm1, %k1
849; AVX512-NEXT:    vpcmpgtb %xmm1, %xmm0, %k2
850; AVX512-NEXT:    vpbroadcastd {{.*#+}} zmm0 {%k2} {z} = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
851; AVX512-NEXT:    vpternlogd {{.*#+}} zmm1 = -1
852; AVX512-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1}
853; AVX512-NEXT:    retq
854;
855; X86-LABEL: scmp_wide_vec_result:
856; X86:       # %bb.0:
857; X86-NEXT:    pushl %ebp
858; X86-NEXT:    pushl %ebx
859; X86-NEXT:    pushl %edi
860; X86-NEXT:    pushl %esi
861; X86-NEXT:    subl $16, %esp
862; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
863; X86-NEXT:    movb {{[0-9]+}}(%esp), %ah
864; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
865; X86-NEXT:    movb {{[0-9]+}}(%esp), %dh
866; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
867; X86-NEXT:    movb {{[0-9]+}}(%esp), %bh
868; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
869; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %al
870; X86-NEXT:    setl %al
871; X86-NEXT:    setg %cl
872; X86-NEXT:    subb %al, %cl
873; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
874; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %bh
875; X86-NEXT:    setl %al
876; X86-NEXT:    setg %cl
877; X86-NEXT:    subb %al, %cl
878; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
879; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %bl
880; X86-NEXT:    setl %al
881; X86-NEXT:    setg %cl
882; X86-NEXT:    subb %al, %cl
883; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
884; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %dh
885; X86-NEXT:    setl %al
886; X86-NEXT:    setg %cl
887; X86-NEXT:    subb %al, %cl
888; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
889; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %ch
890; X86-NEXT:    setl %al
891; X86-NEXT:    setg %cl
892; X86-NEXT:    subb %al, %cl
893; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
894; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %ah
895; X86-NEXT:    setl %al
896; X86-NEXT:    setg %cl
897; X86-NEXT:    subb %al, %cl
898; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
899; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %dl
900; X86-NEXT:    setl %al
901; X86-NEXT:    setg %cl
902; X86-NEXT:    subb %al, %cl
903; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
904; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
905; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %al
906; X86-NEXT:    setl %al
907; X86-NEXT:    setg %bh
908; X86-NEXT:    subb %al, %bh
909; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
910; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %al
911; X86-NEXT:    setl %al
912; X86-NEXT:    setg %bl
913; X86-NEXT:    subb %al, %bl
914; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
915; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %al
916; X86-NEXT:    setl %al
917; X86-NEXT:    setg %dh
918; X86-NEXT:    subb %al, %dh
919; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
920; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %al
921; X86-NEXT:    setl %al
922; X86-NEXT:    setg %dl
923; X86-NEXT:    subb %al, %dl
924; X86-NEXT:    movsbl %dl, %eax
925; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
926; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
927; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %al
928; X86-NEXT:    setl %al
929; X86-NEXT:    setg %dl
930; X86-NEXT:    subb %al, %dl
931; X86-NEXT:    movsbl %dl, %eax
932; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
933; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
934; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %al
935; X86-NEXT:    setl %al
936; X86-NEXT:    setg %dl
937; X86-NEXT:    subb %al, %dl
938; X86-NEXT:    movsbl %dl, %ebp
939; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
940; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %al
941; X86-NEXT:    setl %al
942; X86-NEXT:    setg %dl
943; X86-NEXT:    subb %al, %dl
944; X86-NEXT:    movsbl %dl, %edi
945; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
946; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %al
947; X86-NEXT:    setl %al
948; X86-NEXT:    setg %ah
949; X86-NEXT:    subb %al, %ah
950; X86-NEXT:    movsbl %ah, %esi
951; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
952; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %al
953; X86-NEXT:    setl %al
954; X86-NEXT:    setg %dl
955; X86-NEXT:    subb %al, %dl
956; X86-NEXT:    movsbl %dl, %ecx
957; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
958; X86-NEXT:    movl %ecx, 60(%eax)
959; X86-NEXT:    movl %esi, 56(%eax)
960; X86-NEXT:    movl %edi, 52(%eax)
961; X86-NEXT:    movl %ebp, 48(%eax)
962; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
963; X86-NEXT:    movl %ecx, 44(%eax)
964; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
965; X86-NEXT:    movl %ecx, 40(%eax)
966; X86-NEXT:    movsbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
967; X86-NEXT:    movsbl %dh, %edx
968; X86-NEXT:    movl %edx, 36(%eax)
969; X86-NEXT:    movsbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload
970; X86-NEXT:    movsbl %bl, %esi
971; X86-NEXT:    movl %esi, 32(%eax)
972; X86-NEXT:    movsbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
973; X86-NEXT:    movsbl %bh, %edi
974; X86-NEXT:    movl %edi, 28(%eax)
975; X86-NEXT:    movsbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
976; X86-NEXT:    movsbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
977; X86-NEXT:    movl %ebx, 24(%eax)
978; X86-NEXT:    movl %edi, 20(%eax)
979; X86-NEXT:    movl %esi, 16(%eax)
980; X86-NEXT:    movl %edx, 12(%eax)
981; X86-NEXT:    movl %ecx, 8(%eax)
982; X86-NEXT:    movsbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
983; X86-NEXT:    movl %ecx, 4(%eax)
984; X86-NEXT:    movsbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
985; X86-NEXT:    movl %ecx, (%eax)
986; X86-NEXT:    addl $16, %esp
987; X86-NEXT:    popl %esi
988; X86-NEXT:    popl %edi
989; X86-NEXT:    popl %ebx
990; X86-NEXT:    popl %ebp
991; X86-NEXT:    retl $4
992  %1 = call <16 x i32> @llvm.scmp(<16 x i8> %x, <16 x i8> %y)
993  ret <16 x i32> %1
994}
995
996define <16 x i8> @scmp_wide_vec_op(<16 x i64> %x, <16 x i64> %y) nounwind {
997; SSE2-LABEL: scmp_wide_vec_op:
998; SSE2:       # %bb.0:
999; SSE2-NEXT:    movq %xmm7, %rax
1000; SSE2-NEXT:    cmpq {{[0-9]+}}(%rsp), %rax
1001; SSE2-NEXT:    setl %al
1002; SSE2-NEXT:    setg %cl
1003; SSE2-NEXT:    subb %al, %cl
1004; SSE2-NEXT:    movzbl %cl, %eax
1005; SSE2-NEXT:    movd %eax, %xmm8
1006; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm7[2,3,2,3]
1007; SSE2-NEXT:    movq %xmm7, %rax
1008; SSE2-NEXT:    cmpq {{[0-9]+}}(%rsp), %rax
1009; SSE2-NEXT:    setl %al
1010; SSE2-NEXT:    setg %cl
1011; SSE2-NEXT:    subb %al, %cl
1012; SSE2-NEXT:    movzbl %cl, %eax
1013; SSE2-NEXT:    movd %eax, %xmm7
1014; SSE2-NEXT:    movq %xmm6, %rax
1015; SSE2-NEXT:    cmpq {{[0-9]+}}(%rsp), %rax
1016; SSE2-NEXT:    punpcklbw {{.*#+}} xmm8 = xmm8[0],xmm7[0],xmm8[1],xmm7[1],xmm8[2],xmm7[2],xmm8[3],xmm7[3],xmm8[4],xmm7[4],xmm8[5],xmm7[5],xmm8[6],xmm7[6],xmm8[7],xmm7[7]
1017; SSE2-NEXT:    setl %al
1018; SSE2-NEXT:    setg %cl
1019; SSE2-NEXT:    subb %al, %cl
1020; SSE2-NEXT:    movzbl %cl, %eax
1021; SSE2-NEXT:    movd %eax, %xmm7
1022; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[2,3,2,3]
1023; SSE2-NEXT:    movq %xmm6, %rax
1024; SSE2-NEXT:    cmpq {{[0-9]+}}(%rsp), %rax
1025; SSE2-NEXT:    setl %al
1026; SSE2-NEXT:    setg %cl
1027; SSE2-NEXT:    subb %al, %cl
1028; SSE2-NEXT:    movzbl %cl, %eax
1029; SSE2-NEXT:    movd %eax, %xmm6
1030; SSE2-NEXT:    punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1],xmm7[2],xmm6[2],xmm7[3],xmm6[3],xmm7[4],xmm6[4],xmm7[5],xmm6[5],xmm7[6],xmm6[6],xmm7[7],xmm6[7]
1031; SSE2-NEXT:    punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm8[0],xmm7[1],xmm8[1],xmm7[2],xmm8[2],xmm7[3],xmm8[3]
1032; SSE2-NEXT:    movq %xmm5, %rax
1033; SSE2-NEXT:    cmpq {{[0-9]+}}(%rsp), %rax
1034; SSE2-NEXT:    setl %al
1035; SSE2-NEXT:    setg %cl
1036; SSE2-NEXT:    subb %al, %cl
1037; SSE2-NEXT:    movzbl %cl, %eax
1038; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3]
1039; SSE2-NEXT:    movq %xmm5, %rcx
1040; SSE2-NEXT:    cmpq {{[0-9]+}}(%rsp), %rcx
1041; SSE2-NEXT:    movd %eax, %xmm6
1042; SSE2-NEXT:    setl %al
1043; SSE2-NEXT:    setg %cl
1044; SSE2-NEXT:    subb %al, %cl
1045; SSE2-NEXT:    movzbl %cl, %eax
1046; SSE2-NEXT:    movq %xmm4, %rcx
1047; SSE2-NEXT:    cmpq {{[0-9]+}}(%rsp), %rcx
1048; SSE2-NEXT:    movd %eax, %xmm8
1049; SSE2-NEXT:    setl %al
1050; SSE2-NEXT:    setg %cl
1051; SSE2-NEXT:    subb %al, %cl
1052; SSE2-NEXT:    movzbl %cl, %eax
1053; SSE2-NEXT:    movd %eax, %xmm5
1054; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
1055; SSE2-NEXT:    movq %xmm4, %rax
1056; SSE2-NEXT:    cmpq {{[0-9]+}}(%rsp), %rax
1057; SSE2-NEXT:    setl %al
1058; SSE2-NEXT:    setg %cl
1059; SSE2-NEXT:    subb %al, %cl
1060; SSE2-NEXT:    movzbl %cl, %eax
1061; SSE2-NEXT:    movd %eax, %xmm4
1062; SSE2-NEXT:    movq %xmm3, %rax
1063; SSE2-NEXT:    cmpq {{[0-9]+}}(%rsp), %rax
1064; SSE2-NEXT:    setl %al
1065; SSE2-NEXT:    setg %cl
1066; SSE2-NEXT:    subb %al, %cl
1067; SSE2-NEXT:    movzbl %cl, %eax
1068; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
1069; SSE2-NEXT:    movq %xmm3, %rcx
1070; SSE2-NEXT:    movd %eax, %xmm3
1071; SSE2-NEXT:    cmpq {{[0-9]+}}(%rsp), %rcx
1072; SSE2-NEXT:    setl %al
1073; SSE2-NEXT:    setg %cl
1074; SSE2-NEXT:    subb %al, %cl
1075; SSE2-NEXT:    movzbl %cl, %eax
1076; SSE2-NEXT:    movq %xmm2, %rcx
1077; SSE2-NEXT:    cmpq {{[0-9]+}}(%rsp), %rcx
1078; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
1079; SSE2-NEXT:    movq %xmm2, %rcx
1080; SSE2-NEXT:    movd %eax, %xmm2
1081; SSE2-NEXT:    setl %al
1082; SSE2-NEXT:    setg %dl
1083; SSE2-NEXT:    subb %al, %dl
1084; SSE2-NEXT:    cmpq {{[0-9]+}}(%rsp), %rcx
1085; SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm8[0],xmm6[1],xmm8[1],xmm6[2],xmm8[2],xmm6[3],xmm8[3],xmm6[4],xmm8[4],xmm6[5],xmm8[5],xmm6[6],xmm8[6],xmm6[7],xmm8[7]
1086; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
1087; SSE2-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
1088; SSE2-NEXT:    punpckldq {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1]
1089; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
1090; SSE2-NEXT:    movzbl %dl, %eax
1091; SSE2-NEXT:    movd %eax, %xmm2
1092; SSE2-NEXT:    setl %al
1093; SSE2-NEXT:    setg %cl
1094; SSE2-NEXT:    subb %al, %cl
1095; SSE2-NEXT:    movzbl %cl, %eax
1096; SSE2-NEXT:    movd %eax, %xmm4
1097; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
1098; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
1099; SSE2-NEXT:    movq %xmm1, %rax
1100; SSE2-NEXT:    cmpq {{[0-9]+}}(%rsp), %rax
1101; SSE2-NEXT:    setl %al
1102; SSE2-NEXT:    setg %cl
1103; SSE2-NEXT:    subb %al, %cl
1104; SSE2-NEXT:    movzbl %cl, %eax
1105; SSE2-NEXT:    movd %eax, %xmm3
1106; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1107; SSE2-NEXT:    movq %xmm1, %rax
1108; SSE2-NEXT:    cmpq {{[0-9]+}}(%rsp), %rax
1109; SSE2-NEXT:    setl %al
1110; SSE2-NEXT:    setg %cl
1111; SSE2-NEXT:    subb %al, %cl
1112; SSE2-NEXT:    movzbl %cl, %eax
1113; SSE2-NEXT:    movd %eax, %xmm1
1114; SSE2-NEXT:    movq %xmm0, %rax
1115; SSE2-NEXT:    cmpq {{[0-9]+}}(%rsp), %rax
1116; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
1117; SSE2-NEXT:    setl %al
1118; SSE2-NEXT:    setg %cl
1119; SSE2-NEXT:    subb %al, %cl
1120; SSE2-NEXT:    movzbl %cl, %eax
1121; SSE2-NEXT:    movd %eax, %xmm1
1122; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1123; SSE2-NEXT:    movq %xmm0, %rax
1124; SSE2-NEXT:    cmpq {{[0-9]+}}(%rsp), %rax
1125; SSE2-NEXT:    setl %al
1126; SSE2-NEXT:    setg %cl
1127; SSE2-NEXT:    subb %al, %cl
1128; SSE2-NEXT:    movzbl %cl, %eax
1129; SSE2-NEXT:    movd %eax, %xmm0
1130; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1131; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
1132; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1133; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0]
1134; SSE2-NEXT:    movdqa %xmm1, %xmm0
1135; SSE2-NEXT:    retq
1136;
1137; SSE4-LABEL: scmp_wide_vec_op:
1138; SSE4:       # %bb.0:
1139; SSE4-NEXT:    pextrq $1, %xmm0, %rax
1140; SSE4-NEXT:    cmpq {{[0-9]+}}(%rsp), %rax
1141; SSE4-NEXT:    setl %al
1142; SSE4-NEXT:    setg %cl
1143; SSE4-NEXT:    subb %al, %cl
1144; SSE4-NEXT:    movzbl %cl, %eax
1145; SSE4-NEXT:    movq %xmm0, %rcx
1146; SSE4-NEXT:    cmpq {{[0-9]+}}(%rsp), %rcx
1147; SSE4-NEXT:    setl %cl
1148; SSE4-NEXT:    setg %dl
1149; SSE4-NEXT:    subb %cl, %dl
1150; SSE4-NEXT:    movzbl %dl, %ecx
1151; SSE4-NEXT:    movd %ecx, %xmm0
1152; SSE4-NEXT:    pinsrb $1, %eax, %xmm0
1153; SSE4-NEXT:    movq %xmm1, %rax
1154; SSE4-NEXT:    cmpq {{[0-9]+}}(%rsp), %rax
1155; SSE4-NEXT:    setl %al
1156; SSE4-NEXT:    setg %cl
1157; SSE4-NEXT:    subb %al, %cl
1158; SSE4-NEXT:    movzbl %cl, %eax
1159; SSE4-NEXT:    pinsrb $2, %eax, %xmm0
1160; SSE4-NEXT:    pextrq $1, %xmm1, %rax
1161; SSE4-NEXT:    cmpq {{[0-9]+}}(%rsp), %rax
1162; SSE4-NEXT:    setl %al
1163; SSE4-NEXT:    setg %cl
1164; SSE4-NEXT:    subb %al, %cl
1165; SSE4-NEXT:    movzbl %cl, %eax
1166; SSE4-NEXT:    pinsrb $3, %eax, %xmm0
1167; SSE4-NEXT:    movq %xmm2, %rax
1168; SSE4-NEXT:    cmpq {{[0-9]+}}(%rsp), %rax
1169; SSE4-NEXT:    setl %al
1170; SSE4-NEXT:    setg %cl
1171; SSE4-NEXT:    subb %al, %cl
1172; SSE4-NEXT:    movzbl %cl, %eax
1173; SSE4-NEXT:    pinsrb $4, %eax, %xmm0
1174; SSE4-NEXT:    pextrq $1, %xmm2, %rax
1175; SSE4-NEXT:    cmpq {{[0-9]+}}(%rsp), %rax
1176; SSE4-NEXT:    setl %al
1177; SSE4-NEXT:    setg %cl
1178; SSE4-NEXT:    subb %al, %cl
1179; SSE4-NEXT:    movzbl %cl, %eax
1180; SSE4-NEXT:    pinsrb $5, %eax, %xmm0
1181; SSE4-NEXT:    movq %xmm3, %rax
1182; SSE4-NEXT:    cmpq {{[0-9]+}}(%rsp), %rax
1183; SSE4-NEXT:    setl %al
1184; SSE4-NEXT:    setg %cl
1185; SSE4-NEXT:    subb %al, %cl
1186; SSE4-NEXT:    movzbl %cl, %eax
1187; SSE4-NEXT:    pinsrb $6, %eax, %xmm0
1188; SSE4-NEXT:    pextrq $1, %xmm3, %rax
1189; SSE4-NEXT:    cmpq {{[0-9]+}}(%rsp), %rax
1190; SSE4-NEXT:    setl %al
1191; SSE4-NEXT:    setg %cl
1192; SSE4-NEXT:    subb %al, %cl
1193; SSE4-NEXT:    movzbl %cl, %eax
1194; SSE4-NEXT:    pinsrb $7, %eax, %xmm0
1195; SSE4-NEXT:    movq %xmm4, %rax
1196; SSE4-NEXT:    cmpq {{[0-9]+}}(%rsp), %rax
1197; SSE4-NEXT:    setl %al
1198; SSE4-NEXT:    setg %cl
1199; SSE4-NEXT:    subb %al, %cl
1200; SSE4-NEXT:    movzbl %cl, %eax
1201; SSE4-NEXT:    pinsrb $8, %eax, %xmm0
1202; SSE4-NEXT:    pextrq $1, %xmm4, %rax
1203; SSE4-NEXT:    cmpq {{[0-9]+}}(%rsp), %rax
1204; SSE4-NEXT:    setl %al
1205; SSE4-NEXT:    setg %cl
1206; SSE4-NEXT:    subb %al, %cl
1207; SSE4-NEXT:    movzbl %cl, %eax
1208; SSE4-NEXT:    pinsrb $9, %eax, %xmm0
1209; SSE4-NEXT:    movq %xmm5, %rax
1210; SSE4-NEXT:    cmpq {{[0-9]+}}(%rsp), %rax
1211; SSE4-NEXT:    setl %al
1212; SSE4-NEXT:    setg %cl
1213; SSE4-NEXT:    subb %al, %cl
1214; SSE4-NEXT:    movzbl %cl, %eax
1215; SSE4-NEXT:    pinsrb $10, %eax, %xmm0
1216; SSE4-NEXT:    pextrq $1, %xmm5, %rax
1217; SSE4-NEXT:    cmpq {{[0-9]+}}(%rsp), %rax
1218; SSE4-NEXT:    setl %al
1219; SSE4-NEXT:    setg %cl
1220; SSE4-NEXT:    subb %al, %cl
1221; SSE4-NEXT:    movzbl %cl, %eax
1222; SSE4-NEXT:    pinsrb $11, %eax, %xmm0
1223; SSE4-NEXT:    movq %xmm6, %rax
1224; SSE4-NEXT:    cmpq {{[0-9]+}}(%rsp), %rax
1225; SSE4-NEXT:    setl %al
1226; SSE4-NEXT:    setg %cl
1227; SSE4-NEXT:    subb %al, %cl
1228; SSE4-NEXT:    movzbl %cl, %eax
1229; SSE4-NEXT:    pinsrb $12, %eax, %xmm0
1230; SSE4-NEXT:    pextrq $1, %xmm6, %rax
1231; SSE4-NEXT:    cmpq {{[0-9]+}}(%rsp), %rax
1232; SSE4-NEXT:    setl %al
1233; SSE4-NEXT:    setg %cl
1234; SSE4-NEXT:    subb %al, %cl
1235; SSE4-NEXT:    movzbl %cl, %eax
1236; SSE4-NEXT:    pinsrb $13, %eax, %xmm0
1237; SSE4-NEXT:    movq %xmm7, %rax
1238; SSE4-NEXT:    cmpq {{[0-9]+}}(%rsp), %rax
1239; SSE4-NEXT:    setl %al
1240; SSE4-NEXT:    setg %cl
1241; SSE4-NEXT:    subb %al, %cl
1242; SSE4-NEXT:    movzbl %cl, %eax
1243; SSE4-NEXT:    pinsrb $14, %eax, %xmm0
1244; SSE4-NEXT:    pextrq $1, %xmm7, %rax
1245; SSE4-NEXT:    cmpq {{[0-9]+}}(%rsp), %rax
1246; SSE4-NEXT:    setl %al
1247; SSE4-NEXT:    setg %cl
1248; SSE4-NEXT:    subb %al, %cl
1249; SSE4-NEXT:    movzbl %cl, %eax
1250; SSE4-NEXT:    pinsrb $15, %eax, %xmm0
1251; SSE4-NEXT:    retq
1252;
1253; AVX2-LABEL: scmp_wide_vec_op:
1254; AVX2:       # %bb.0:
1255; AVX2-NEXT:    vpextrq $1, %xmm4, %rax
1256; AVX2-NEXT:    vpextrq $1, %xmm0, %rcx
1257; AVX2-NEXT:    cmpq %rax, %rcx
1258; AVX2-NEXT:    setl %al
1259; AVX2-NEXT:    setg %cl
1260; AVX2-NEXT:    subb %al, %cl
1261; AVX2-NEXT:    vmovq %xmm4, %rax
1262; AVX2-NEXT:    vmovq %xmm0, %rdx
1263; AVX2-NEXT:    cmpq %rax, %rdx
1264; AVX2-NEXT:    setl %al
1265; AVX2-NEXT:    setg %dl
1266; AVX2-NEXT:    subb %al, %dl
1267; AVX2-NEXT:    vmovd %edx, %xmm8
1268; AVX2-NEXT:    vpinsrb $1, %ecx, %xmm8, %xmm8
1269; AVX2-NEXT:    vextracti128 $1, %ymm4, %xmm4
1270; AVX2-NEXT:    vmovq %xmm4, %rax
1271; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
1272; AVX2-NEXT:    vmovq %xmm0, %rcx
1273; AVX2-NEXT:    cmpq %rax, %rcx
1274; AVX2-NEXT:    setl %al
1275; AVX2-NEXT:    setg %cl
1276; AVX2-NEXT:    subb %al, %cl
1277; AVX2-NEXT:    vpinsrb $2, %ecx, %xmm8, %xmm8
1278; AVX2-NEXT:    vpextrq $1, %xmm4, %rax
1279; AVX2-NEXT:    vpextrq $1, %xmm0, %rcx
1280; AVX2-NEXT:    cmpq %rax, %rcx
1281; AVX2-NEXT:    setl %al
1282; AVX2-NEXT:    setg %cl
1283; AVX2-NEXT:    subb %al, %cl
1284; AVX2-NEXT:    vpinsrb $3, %ecx, %xmm8, %xmm0
1285; AVX2-NEXT:    vmovq %xmm5, %rax
1286; AVX2-NEXT:    vmovq %xmm1, %rcx
1287; AVX2-NEXT:    cmpq %rax, %rcx
1288; AVX2-NEXT:    setl %al
1289; AVX2-NEXT:    setg %cl
1290; AVX2-NEXT:    subb %al, %cl
1291; AVX2-NEXT:    vpinsrb $4, %ecx, %xmm0, %xmm0
1292; AVX2-NEXT:    vpextrq $1, %xmm5, %rax
1293; AVX2-NEXT:    vpextrq $1, %xmm1, %rcx
1294; AVX2-NEXT:    cmpq %rax, %rcx
1295; AVX2-NEXT:    setl %al
1296; AVX2-NEXT:    setg %cl
1297; AVX2-NEXT:    subb %al, %cl
1298; AVX2-NEXT:    vpinsrb $5, %ecx, %xmm0, %xmm0
1299; AVX2-NEXT:    vextracti128 $1, %ymm5, %xmm4
1300; AVX2-NEXT:    vmovq %xmm4, %rax
1301; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm1
1302; AVX2-NEXT:    vmovq %xmm1, %rcx
1303; AVX2-NEXT:    cmpq %rax, %rcx
1304; AVX2-NEXT:    setl %al
1305; AVX2-NEXT:    setg %cl
1306; AVX2-NEXT:    subb %al, %cl
1307; AVX2-NEXT:    vpinsrb $6, %ecx, %xmm0, %xmm0
1308; AVX2-NEXT:    vpextrq $1, %xmm4, %rax
1309; AVX2-NEXT:    vpextrq $1, %xmm1, %rcx
1310; AVX2-NEXT:    cmpq %rax, %rcx
1311; AVX2-NEXT:    setl %al
1312; AVX2-NEXT:    setg %cl
1313; AVX2-NEXT:    subb %al, %cl
1314; AVX2-NEXT:    vpinsrb $7, %ecx, %xmm0, %xmm0
1315; AVX2-NEXT:    vmovq %xmm6, %rax
1316; AVX2-NEXT:    vmovq %xmm2, %rcx
1317; AVX2-NEXT:    cmpq %rax, %rcx
1318; AVX2-NEXT:    setl %al
1319; AVX2-NEXT:    setg %cl
1320; AVX2-NEXT:    subb %al, %cl
1321; AVX2-NEXT:    vpinsrb $8, %ecx, %xmm0, %xmm0
1322; AVX2-NEXT:    vpextrq $1, %xmm6, %rax
1323; AVX2-NEXT:    vpextrq $1, %xmm2, %rcx
1324; AVX2-NEXT:    cmpq %rax, %rcx
1325; AVX2-NEXT:    setl %al
1326; AVX2-NEXT:    setg %cl
1327; AVX2-NEXT:    subb %al, %cl
1328; AVX2-NEXT:    vpinsrb $9, %ecx, %xmm0, %xmm0
1329; AVX2-NEXT:    vextracti128 $1, %ymm6, %xmm1
1330; AVX2-NEXT:    vmovq %xmm1, %rax
1331; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm2
1332; AVX2-NEXT:    vmovq %xmm2, %rcx
1333; AVX2-NEXT:    cmpq %rax, %rcx
1334; AVX2-NEXT:    setl %al
1335; AVX2-NEXT:    setg %cl
1336; AVX2-NEXT:    subb %al, %cl
1337; AVX2-NEXT:    vpinsrb $10, %ecx, %xmm0, %xmm0
1338; AVX2-NEXT:    vpextrq $1, %xmm1, %rax
1339; AVX2-NEXT:    vpextrq $1, %xmm2, %rcx
1340; AVX2-NEXT:    cmpq %rax, %rcx
1341; AVX2-NEXT:    setl %al
1342; AVX2-NEXT:    setg %cl
1343; AVX2-NEXT:    subb %al, %cl
1344; AVX2-NEXT:    vpinsrb $11, %ecx, %xmm0, %xmm0
1345; AVX2-NEXT:    vmovq %xmm7, %rax
1346; AVX2-NEXT:    vmovq %xmm3, %rcx
1347; AVX2-NEXT:    cmpq %rax, %rcx
1348; AVX2-NEXT:    setl %al
1349; AVX2-NEXT:    setg %cl
1350; AVX2-NEXT:    subb %al, %cl
1351; AVX2-NEXT:    vpinsrb $12, %ecx, %xmm0, %xmm0
1352; AVX2-NEXT:    vpextrq $1, %xmm7, %rax
1353; AVX2-NEXT:    vpextrq $1, %xmm3, %rcx
1354; AVX2-NEXT:    cmpq %rax, %rcx
1355; AVX2-NEXT:    setl %al
1356; AVX2-NEXT:    setg %cl
1357; AVX2-NEXT:    subb %al, %cl
1358; AVX2-NEXT:    vpinsrb $13, %ecx, %xmm0, %xmm0
1359; AVX2-NEXT:    vextracti128 $1, %ymm7, %xmm1
1360; AVX2-NEXT:    vmovq %xmm1, %rax
1361; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm2
1362; AVX2-NEXT:    vmovq %xmm2, %rcx
1363; AVX2-NEXT:    cmpq %rax, %rcx
1364; AVX2-NEXT:    setl %al
1365; AVX2-NEXT:    setg %cl
1366; AVX2-NEXT:    subb %al, %cl
1367; AVX2-NEXT:    vpinsrb $14, %ecx, %xmm0, %xmm0
1368; AVX2-NEXT:    vpextrq $1, %xmm1, %rax
1369; AVX2-NEXT:    vpextrq $1, %xmm2, %rcx
1370; AVX2-NEXT:    cmpq %rax, %rcx
1371; AVX2-NEXT:    setl %al
1372; AVX2-NEXT:    setg %cl
1373; AVX2-NEXT:    subb %al, %cl
1374; AVX2-NEXT:    vpinsrb $15, %ecx, %xmm0, %xmm0
1375; AVX2-NEXT:    vzeroupper
1376; AVX2-NEXT:    retq
1377;
1378; AVX512-LABEL: scmp_wide_vec_op:
1379; AVX512:       # %bb.0:
1380; AVX512-NEXT:    vpextrq $1, %xmm2, %rax
1381; AVX512-NEXT:    vpextrq $1, %xmm0, %rcx
1382; AVX512-NEXT:    cmpq %rax, %rcx
1383; AVX512-NEXT:    setl %al
1384; AVX512-NEXT:    setg %cl
1385; AVX512-NEXT:    subb %al, %cl
1386; AVX512-NEXT:    vmovq %xmm2, %rax
1387; AVX512-NEXT:    vmovq %xmm0, %rdx
1388; AVX512-NEXT:    cmpq %rax, %rdx
1389; AVX512-NEXT:    setl %al
1390; AVX512-NEXT:    setg %dl
1391; AVX512-NEXT:    subb %al, %dl
1392; AVX512-NEXT:    vmovd %edx, %xmm4
1393; AVX512-NEXT:    vpinsrb $1, %ecx, %xmm4, %xmm4
1394; AVX512-NEXT:    vextracti128 $1, %ymm2, %xmm5
1395; AVX512-NEXT:    vmovq %xmm5, %rax
1396; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm6
1397; AVX512-NEXT:    vmovq %xmm6, %rcx
1398; AVX512-NEXT:    cmpq %rax, %rcx
1399; AVX512-NEXT:    setl %al
1400; AVX512-NEXT:    setg %cl
1401; AVX512-NEXT:    subb %al, %cl
1402; AVX512-NEXT:    vpinsrb $2, %ecx, %xmm4, %xmm4
1403; AVX512-NEXT:    vpextrq $1, %xmm5, %rax
1404; AVX512-NEXT:    vpextrq $1, %xmm6, %rcx
1405; AVX512-NEXT:    cmpq %rax, %rcx
1406; AVX512-NEXT:    setl %al
1407; AVX512-NEXT:    setg %cl
1408; AVX512-NEXT:    subb %al, %cl
1409; AVX512-NEXT:    vpinsrb $3, %ecx, %xmm4, %xmm4
1410; AVX512-NEXT:    vextracti32x4 $2, %zmm2, %xmm5
1411; AVX512-NEXT:    vmovq %xmm5, %rax
1412; AVX512-NEXT:    vextracti32x4 $2, %zmm0, %xmm6
1413; AVX512-NEXT:    vmovq %xmm6, %rcx
1414; AVX512-NEXT:    cmpq %rax, %rcx
1415; AVX512-NEXT:    setl %al
1416; AVX512-NEXT:    setg %cl
1417; AVX512-NEXT:    subb %al, %cl
1418; AVX512-NEXT:    vpinsrb $4, %ecx, %xmm4, %xmm4
1419; AVX512-NEXT:    vpextrq $1, %xmm5, %rax
1420; AVX512-NEXT:    vpextrq $1, %xmm6, %rcx
1421; AVX512-NEXT:    cmpq %rax, %rcx
1422; AVX512-NEXT:    setl %al
1423; AVX512-NEXT:    setg %cl
1424; AVX512-NEXT:    subb %al, %cl
1425; AVX512-NEXT:    vpinsrb $5, %ecx, %xmm4, %xmm4
1426; AVX512-NEXT:    vextracti32x4 $3, %zmm2, %xmm2
1427; AVX512-NEXT:    vmovq %xmm2, %rax
1428; AVX512-NEXT:    vextracti32x4 $3, %zmm0, %xmm0
1429; AVX512-NEXT:    vmovq %xmm0, %rcx
1430; AVX512-NEXT:    cmpq %rax, %rcx
1431; AVX512-NEXT:    setl %al
1432; AVX512-NEXT:    setg %cl
1433; AVX512-NEXT:    subb %al, %cl
1434; AVX512-NEXT:    vpinsrb $6, %ecx, %xmm4, %xmm4
1435; AVX512-NEXT:    vpextrq $1, %xmm2, %rax
1436; AVX512-NEXT:    vpextrq $1, %xmm0, %rcx
1437; AVX512-NEXT:    cmpq %rax, %rcx
1438; AVX512-NEXT:    setl %al
1439; AVX512-NEXT:    setg %cl
1440; AVX512-NEXT:    subb %al, %cl
1441; AVX512-NEXT:    vpinsrb $7, %ecx, %xmm4, %xmm0
1442; AVX512-NEXT:    vmovq %xmm3, %rax
1443; AVX512-NEXT:    vmovq %xmm1, %rcx
1444; AVX512-NEXT:    cmpq %rax, %rcx
1445; AVX512-NEXT:    setl %al
1446; AVX512-NEXT:    setg %cl
1447; AVX512-NEXT:    subb %al, %cl
1448; AVX512-NEXT:    vpinsrb $8, %ecx, %xmm0, %xmm0
1449; AVX512-NEXT:    vpextrq $1, %xmm3, %rax
1450; AVX512-NEXT:    vpextrq $1, %xmm1, %rcx
1451; AVX512-NEXT:    cmpq %rax, %rcx
1452; AVX512-NEXT:    setl %al
1453; AVX512-NEXT:    setg %cl
1454; AVX512-NEXT:    subb %al, %cl
1455; AVX512-NEXT:    vpinsrb $9, %ecx, %xmm0, %xmm0
1456; AVX512-NEXT:    vextracti128 $1, %ymm3, %xmm2
1457; AVX512-NEXT:    vmovq %xmm2, %rax
1458; AVX512-NEXT:    vextracti128 $1, %ymm1, %xmm4
1459; AVX512-NEXT:    vmovq %xmm4, %rcx
1460; AVX512-NEXT:    cmpq %rax, %rcx
1461; AVX512-NEXT:    setl %al
1462; AVX512-NEXT:    setg %cl
1463; AVX512-NEXT:    subb %al, %cl
1464; AVX512-NEXT:    vpinsrb $10, %ecx, %xmm0, %xmm0
1465; AVX512-NEXT:    vpextrq $1, %xmm2, %rax
1466; AVX512-NEXT:    vpextrq $1, %xmm4, %rcx
1467; AVX512-NEXT:    cmpq %rax, %rcx
1468; AVX512-NEXT:    setl %al
1469; AVX512-NEXT:    setg %cl
1470; AVX512-NEXT:    subb %al, %cl
1471; AVX512-NEXT:    vpinsrb $11, %ecx, %xmm0, %xmm0
1472; AVX512-NEXT:    vextracti32x4 $2, %zmm3, %xmm2
1473; AVX512-NEXT:    vmovq %xmm2, %rax
1474; AVX512-NEXT:    vextracti32x4 $2, %zmm1, %xmm4
1475; AVX512-NEXT:    vmovq %xmm4, %rcx
1476; AVX512-NEXT:    cmpq %rax, %rcx
1477; AVX512-NEXT:    setl %al
1478; AVX512-NEXT:    setg %cl
1479; AVX512-NEXT:    subb %al, %cl
1480; AVX512-NEXT:    vpinsrb $12, %ecx, %xmm0, %xmm0
1481; AVX512-NEXT:    vpextrq $1, %xmm2, %rax
1482; AVX512-NEXT:    vpextrq $1, %xmm4, %rcx
1483; AVX512-NEXT:    cmpq %rax, %rcx
1484; AVX512-NEXT:    setl %al
1485; AVX512-NEXT:    setg %cl
1486; AVX512-NEXT:    subb %al, %cl
1487; AVX512-NEXT:    vpinsrb $13, %ecx, %xmm0, %xmm0
1488; AVX512-NEXT:    vextracti32x4 $3, %zmm3, %xmm2
1489; AVX512-NEXT:    vmovq %xmm2, %rax
1490; AVX512-NEXT:    vextracti32x4 $3, %zmm1, %xmm1
1491; AVX512-NEXT:    vmovq %xmm1, %rcx
1492; AVX512-NEXT:    cmpq %rax, %rcx
1493; AVX512-NEXT:    setl %al
1494; AVX512-NEXT:    setg %cl
1495; AVX512-NEXT:    subb %al, %cl
1496; AVX512-NEXT:    vpinsrb $14, %ecx, %xmm0, %xmm0
1497; AVX512-NEXT:    vpextrq $1, %xmm2, %rax
1498; AVX512-NEXT:    vpextrq $1, %xmm1, %rcx
1499; AVX512-NEXT:    cmpq %rax, %rcx
1500; AVX512-NEXT:    setl %al
1501; AVX512-NEXT:    setg %cl
1502; AVX512-NEXT:    subb %al, %cl
1503; AVX512-NEXT:    vpinsrb $15, %ecx, %xmm0, %xmm0
1504; AVX512-NEXT:    vzeroupper
1505; AVX512-NEXT:    retq
1506;
1507; X86-LABEL: scmp_wide_vec_op:
1508; X86:       # %bb.0:
1509; X86-NEXT:    pushl %ebp
1510; X86-NEXT:    pushl %ebx
1511; X86-NEXT:    pushl %edi
1512; X86-NEXT:    pushl %esi
1513; X86-NEXT:    subl $12, %esp
1514; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1515; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
1516; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
1517; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
1518; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
1519; X86-NEXT:    cmpl %edx, %edi
1520; X86-NEXT:    movl %ebx, %ebp
1521; X86-NEXT:    sbbl %esi, %ebp
1522; X86-NEXT:    setl %al
1523; X86-NEXT:    cmpl %edi, %edx
1524; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
1525; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
1526; X86-NEXT:    sbbl %ebx, %esi
1527; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
1528; X86-NEXT:    setl %ah
1529; X86-NEXT:    subb %al, %ah
1530; X86-NEXT:    movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1531; X86-NEXT:    cmpl %ecx, %ebp
1532; X86-NEXT:    movl %ebx, %eax
1533; X86-NEXT:    sbbl %edx, %eax
1534; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
1535; X86-NEXT:    setl %al
1536; X86-NEXT:    cmpl %ebp, %ecx
1537; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
1538; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1539; X86-NEXT:    sbbl %ebx, %edx
1540; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
1541; X86-NEXT:    setl %ah
1542; X86-NEXT:    subb %al, %ah
1543; X86-NEXT:    movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1544; X86-NEXT:    cmpl %edi, %ecx
1545; X86-NEXT:    movl %edx, %eax
1546; X86-NEXT:    sbbl %esi, %eax
1547; X86-NEXT:    setl %al
1548; X86-NEXT:    cmpl %ecx, %edi
1549; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1550; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
1551; X86-NEXT:    sbbl %edx, %esi
1552; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
1553; X86-NEXT:    setl %dl
1554; X86-NEXT:    subb %al, %dl
1555; X86-NEXT:    movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1556; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
1557; X86-NEXT:    cmpl %ebp, %edi
1558; X86-NEXT:    movl %esi, %eax
1559; X86-NEXT:    sbbl %ecx, %eax
1560; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
1561; X86-NEXT:    setl %bl
1562; X86-NEXT:    cmpl %edi, %ebp
1563; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1564; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
1565; X86-NEXT:    sbbl %esi, %ecx
1566; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
1567; X86-NEXT:    setl %cl
1568; X86-NEXT:    subb %bl, %cl
1569; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1570; X86-NEXT:    cmpl %edx, %edi
1571; X86-NEXT:    movl %esi, %ecx
1572; X86-NEXT:    sbbl %eax, %ecx
1573; X86-NEXT:    setl %bl
1574; X86-NEXT:    cmpl %edi, %edx
1575; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
1576; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1577; X86-NEXT:    sbbl %esi, %eax
1578; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1579; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
1580; X86-NEXT:    setl %bh
1581; X86-NEXT:    subb %bl, %bh
1582; X86-NEXT:    movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1583; X86-NEXT:    cmpl %edx, %eax
1584; X86-NEXT:    movl %esi, %edi
1585; X86-NEXT:    sbbl %ecx, %edi
1586; X86-NEXT:    setl %bl
1587; X86-NEXT:    cmpl %eax, %edx
1588; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
1589; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1590; X86-NEXT:    sbbl %esi, %ecx
1591; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1592; X86-NEXT:    setl %bh
1593; X86-NEXT:    subb %bl, %bh
1594; X86-NEXT:    movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1595; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
1596; X86-NEXT:    cmpl %edx, %ecx
1597; X86-NEXT:    movl %esi, %edi
1598; X86-NEXT:    sbbl %eax, %edi
1599; X86-NEXT:    setl %bl
1600; X86-NEXT:    cmpl %ecx, %edx
1601; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1602; X86-NEXT:    sbbl %esi, %eax
1603; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1604; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
1605; X86-NEXT:    setl %bh
1606; X86-NEXT:    subb %bl, %bh
1607; X86-NEXT:    movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1608; X86-NEXT:    cmpl %ecx, %edx
1609; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
1610; X86-NEXT:    movl %esi, %edi
1611; X86-NEXT:    sbbl %eax, %edi
1612; X86-NEXT:    setl %bl
1613; X86-NEXT:    cmpl %edx, %ecx
1614; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1615; X86-NEXT:    sbbl %esi, %eax
1616; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1617; X86-NEXT:    setl %dl
1618; X86-NEXT:    subb %bl, %dl
1619; X86-NEXT:    movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1620; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
1621; X86-NEXT:    cmpl %ecx, %edx
1622; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
1623; X86-NEXT:    movl %esi, %edi
1624; X86-NEXT:    sbbl %eax, %edi
1625; X86-NEXT:    setl %bl
1626; X86-NEXT:    cmpl %edx, %ecx
1627; X86-NEXT:    sbbl %esi, %eax
1628; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1629; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1630; X86-NEXT:    setl %dl
1631; X86-NEXT:    subb %bl, %dl
1632; X86-NEXT:    movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1633; X86-NEXT:    cmpl %eax, %ecx
1634; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
1635; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
1636; X86-NEXT:    movl %esi, %edi
1637; X86-NEXT:    sbbl %edx, %edi
1638; X86-NEXT:    setl %bl
1639; X86-NEXT:    cmpl %ecx, %eax
1640; X86-NEXT:    sbbl %esi, %edx
1641; X86-NEXT:    setl %al
1642; X86-NEXT:    subb %bl, %al
1643; X86-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1644; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
1645; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1646; X86-NEXT:    cmpl %ebp, %ecx
1647; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
1648; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
1649; X86-NEXT:    movl %esi, %edi
1650; X86-NEXT:    sbbl %edx, %edi
1651; X86-NEXT:    setl %al
1652; X86-NEXT:    cmpl %ecx, %ebp
1653; X86-NEXT:    sbbl %esi, %edx
1654; X86-NEXT:    setl %cl
1655; X86-NEXT:    subb %al, %cl
1656; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1657; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
1658; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1659; X86-NEXT:    cmpl %ebp, %ecx
1660; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
1661; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
1662; X86-NEXT:    movl %esi, %edi
1663; X86-NEXT:    sbbl %edx, %edi
1664; X86-NEXT:    setl %al
1665; X86-NEXT:    cmpl %ecx, %ebp
1666; X86-NEXT:    sbbl %esi, %edx
1667; X86-NEXT:    setl %cl
1668; X86-NEXT:    subb %al, %cl
1669; X86-NEXT:    movb %cl, (%esp) # 1-byte Spill
1670; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1671; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1672; X86-NEXT:    cmpl %eax, %ecx
1673; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
1674; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
1675; X86-NEXT:    movl %edi, %ebp
1676; X86-NEXT:    sbbl %esi, %ebp
1677; X86-NEXT:    setl %dl
1678; X86-NEXT:    cmpl %ecx, %eax
1679; X86-NEXT:    sbbl %edi, %esi
1680; X86-NEXT:    setl %ch
1681; X86-NEXT:    subb %dl, %ch
1682; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
1683; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
1684; X86-NEXT:    cmpl %edx, %esi
1685; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
1686; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1687; X86-NEXT:    movl %eax, %ebp
1688; X86-NEXT:    sbbl %edi, %ebp
1689; X86-NEXT:    setl %cl
1690; X86-NEXT:    cmpl %esi, %edx
1691; X86-NEXT:    sbbl %eax, %edi
1692; X86-NEXT:    setl %dl
1693; X86-NEXT:    subb %cl, %dl
1694; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
1695; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
1696; X86-NEXT:    cmpl %ebx, %esi
1697; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
1698; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1699; X86-NEXT:    movl %eax, %ebp
1700; X86-NEXT:    sbbl %edi, %ebp
1701; X86-NEXT:    setl %dh
1702; X86-NEXT:    cmpl %esi, %ebx
1703; X86-NEXT:    sbbl %eax, %edi
1704; X86-NEXT:    setl %cl
1705; X86-NEXT:    subb %dh, %cl
1706; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1707; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
1708; X86-NEXT:    cmpl %eax, %esi
1709; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
1710; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
1711; X86-NEXT:    movl %ebx, %ebp
1712; X86-NEXT:    sbbl %edi, %ebp
1713; X86-NEXT:    setl %dh
1714; X86-NEXT:    cmpl %esi, %eax
1715; X86-NEXT:    sbbl %ebx, %edi
1716; X86-NEXT:    setl %bl
1717; X86-NEXT:    subb %dh, %bl
1718; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1719; X86-NEXT:    movb %bl, 15(%eax)
1720; X86-NEXT:    movb %cl, 14(%eax)
1721; X86-NEXT:    movb %dl, 13(%eax)
1722; X86-NEXT:    movb %ch, 12(%eax)
1723; X86-NEXT:    movzbl (%esp), %ecx # 1-byte Folded Reload
1724; X86-NEXT:    movb %cl, 11(%eax)
1725; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1726; X86-NEXT:    movb %cl, 10(%eax)
1727; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1728; X86-NEXT:    movb %cl, 9(%eax)
1729; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1730; X86-NEXT:    movb %cl, 8(%eax)
1731; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1732; X86-NEXT:    movb %cl, 7(%eax)
1733; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1734; X86-NEXT:    movb %cl, 6(%eax)
1735; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1736; X86-NEXT:    movb %cl, 5(%eax)
1737; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1738; X86-NEXT:    movb %cl, 4(%eax)
1739; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1740; X86-NEXT:    movb %cl, 3(%eax)
1741; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1742; X86-NEXT:    movb %cl, 2(%eax)
1743; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1744; X86-NEXT:    movb %cl, 1(%eax)
1745; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1746; X86-NEXT:    movb %cl, (%eax)
1747; X86-NEXT:    addl $12, %esp
1748; X86-NEXT:    popl %esi
1749; X86-NEXT:    popl %edi
1750; X86-NEXT:    popl %ebx
1751; X86-NEXT:    popl %ebp
1752; X86-NEXT:    retl $4
1753  %1 = call <16 x i8> @llvm.scmp(<16 x i64> %x, <16 x i64> %y)
1754  ret <16 x i8> %1
1755}
1756
1757define <7 x i117> @scmp_uncommon_vectors(<7 x i7> %x, <7 x i7> %y) nounwind {
1758; SSE2-LABEL: scmp_uncommon_vectors:
1759; SSE2:       # %bb.0:
1760; SSE2-NEXT:    pushq %rbp
1761; SSE2-NEXT:    pushq %r15
1762; SSE2-NEXT:    pushq %r14
1763; SSE2-NEXT:    pushq %r13
1764; SSE2-NEXT:    pushq %r12
1765; SSE2-NEXT:    pushq %rbx
1766; SSE2-NEXT:    movq %rdi, %rax
1767; SSE2-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
1768; SSE2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
1769; SSE2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r11d
1770; SSE2-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebx
1771; SSE2-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebp
1772; SSE2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r14d
1773; SSE2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r15d
1774; SSE2-NEXT:    addb %r15b, %r15b
1775; SSE2-NEXT:    sarb %r15b
1776; SSE2-NEXT:    addb %sil, %sil
1777; SSE2-NEXT:    sarb %sil
1778; SSE2-NEXT:    cmpb %r15b, %sil
1779; SSE2-NEXT:    setl %sil
1780; SSE2-NEXT:    setg %r15b
1781; SSE2-NEXT:    subb %sil, %r15b
1782; SSE2-NEXT:    movsbq %r15b, %rsi
1783; SSE2-NEXT:    movq %rsi, (%rax)
1784; SSE2-NEXT:    movq %rsi, %xmm0
1785; SSE2-NEXT:    sarq $63, %rsi
1786; SSE2-NEXT:    addb %r14b, %r14b
1787; SSE2-NEXT:    sarb %r14b
1788; SSE2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r15d
1789; SSE2-NEXT:    addb %r15b, %r15b
1790; SSE2-NEXT:    sarb %r15b
1791; SSE2-NEXT:    cmpb %r14b, %r15b
1792; SSE2-NEXT:    setl %r14b
1793; SSE2-NEXT:    setg %r15b
1794; SSE2-NEXT:    subb %r14b, %r15b
1795; SSE2-NEXT:    movsbq %r15b, %r14
1796; SSE2-NEXT:    movq %r14, %r15
1797; SSE2-NEXT:    sarq $63, %r15
1798; SSE2-NEXT:    addb %bpl, %bpl
1799; SSE2-NEXT:    sarb %bpl
1800; SSE2-NEXT:    addb %dl, %dl
1801; SSE2-NEXT:    sarb %dl
1802; SSE2-NEXT:    cmpb %bpl, %dl
1803; SSE2-NEXT:    setl %dl
1804; SSE2-NEXT:    setg %bpl
1805; SSE2-NEXT:    subb %dl, %bpl
1806; SSE2-NEXT:    movsbq %bpl, %rdx
1807; SSE2-NEXT:    movq %rdx, %r12
1808; SSE2-NEXT:    sarq $63, %r12
1809; SSE2-NEXT:    addb %bl, %bl
1810; SSE2-NEXT:    sarb %bl
1811; SSE2-NEXT:    addb %cl, %cl
1812; SSE2-NEXT:    sarb %cl
1813; SSE2-NEXT:    cmpb %bl, %cl
1814; SSE2-NEXT:    setl %cl
1815; SSE2-NEXT:    setg %bl
1816; SSE2-NEXT:    subb %cl, %bl
1817; SSE2-NEXT:    movsbq %bl, %rbx
1818; SSE2-NEXT:    movq %rbx, %rcx
1819; SSE2-NEXT:    sarq $63, %rcx
1820; SSE2-NEXT:    addb %r11b, %r11b
1821; SSE2-NEXT:    sarb %r11b
1822; SSE2-NEXT:    addb %r8b, %r8b
1823; SSE2-NEXT:    sarb %r8b
1824; SSE2-NEXT:    cmpb %r11b, %r8b
1825; SSE2-NEXT:    setl %r8b
1826; SSE2-NEXT:    setg %r11b
1827; SSE2-NEXT:    subb %r8b, %r11b
1828; SSE2-NEXT:    movsbq %r11b, %r8
1829; SSE2-NEXT:    movq %r8, %r11
1830; SSE2-NEXT:    sarq $63, %r11
1831; SSE2-NEXT:    addb %r10b, %r10b
1832; SSE2-NEXT:    sarb %r10b
1833; SSE2-NEXT:    addb %r9b, %r9b
1834; SSE2-NEXT:    sarb %r9b
1835; SSE2-NEXT:    cmpb %r10b, %r9b
1836; SSE2-NEXT:    setl %r9b
1837; SSE2-NEXT:    setg %r10b
1838; SSE2-NEXT:    subb %r9b, %r10b
1839; SSE2-NEXT:    movsbq %r10b, %r9
1840; SSE2-NEXT:    movq %r9, %r10
1841; SSE2-NEXT:    sarq $63, %r10
1842; SSE2-NEXT:    addb %dil, %dil
1843; SSE2-NEXT:    sarb %dil
1844; SSE2-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebp
1845; SSE2-NEXT:    addb %bpl, %bpl
1846; SSE2-NEXT:    sarb %bpl
1847; SSE2-NEXT:    cmpb %dil, %bpl
1848; SSE2-NEXT:    setl %dil
1849; SSE2-NEXT:    setg %bpl
1850; SSE2-NEXT:    subb %dil, %bpl
1851; SSE2-NEXT:    movsbq %bpl, %rdi
1852; SSE2-NEXT:    movq %rdi, %r13
1853; SSE2-NEXT:    sarq $63, %r13
1854; SSE2-NEXT:    movl %r13d, 96(%rax)
1855; SSE2-NEXT:    movabsq $2251799813685247, %rbp # imm = 0x7FFFFFFFFFFFF
1856; SSE2-NEXT:    andq %r13, %rbp
1857; SSE2-NEXT:    shldq $62, %rdi, %r13
1858; SSE2-NEXT:    movq %r13, 88(%rax)
1859; SSE2-NEXT:    movq %r10, %r13
1860; SSE2-NEXT:    shldq $20, %r9, %r13
1861; SSE2-NEXT:    movq %r13, 64(%rax)
1862; SSE2-NEXT:    movq %r11, %r13
1863; SSE2-NEXT:    shldq $31, %r8, %r13
1864; SSE2-NEXT:    movq %r13, 48(%rax)
1865; SSE2-NEXT:    movq %rcx, %r13
1866; SSE2-NEXT:    shldq $42, %rbx, %r13
1867; SSE2-NEXT:    movq %r13, 32(%rax)
1868; SSE2-NEXT:    movabsq $9007199254738944, %r13 # imm = 0x1FFFFFFFFFF800
1869; SSE2-NEXT:    andq %r12, %r13
1870; SSE2-NEXT:    shldq $53, %rdx, %r12
1871; SSE2-NEXT:    movq %r12, 16(%rax)
1872; SSE2-NEXT:    movq %rbp, %r12
1873; SSE2-NEXT:    shrq $48, %r12
1874; SSE2-NEXT:    movb %r12b, 102(%rax)
1875; SSE2-NEXT:    shrq $32, %rbp
1876; SSE2-NEXT:    movw %bp, 100(%rax)
1877; SSE2-NEXT:    movabsq $9007199254740991, %r12 # imm = 0x1FFFFFFFFFFFFF
1878; SSE2-NEXT:    andq %r12, %r15
1879; SSE2-NEXT:    shldq $9, %r14, %r15
1880; SSE2-NEXT:    shlq $62, %rdi
1881; SSE2-NEXT:    orq %r15, %rdi
1882; SSE2-NEXT:    movq %rdi, 80(%rax)
1883; SSE2-NEXT:    shlq $42, %rbx
1884; SSE2-NEXT:    shrq $11, %r13
1885; SSE2-NEXT:    orq %rbx, %r13
1886; SSE2-NEXT:    movq %r13, 24(%rax)
1887; SSE2-NEXT:    shlq $9, %r14
1888; SSE2-NEXT:    andl $511, %r10d # imm = 0x1FF
1889; SSE2-NEXT:    orq %r14, %r10
1890; SSE2-NEXT:    movq %r10, 72(%rax)
1891; SSE2-NEXT:    shlq $20, %r9
1892; SSE2-NEXT:    andl $1048575, %r11d # imm = 0xFFFFF
1893; SSE2-NEXT:    orq %r9, %r11
1894; SSE2-NEXT:    movq %r11, 56(%rax)
1895; SSE2-NEXT:    shlq $31, %r8
1896; SSE2-NEXT:    andl $2147483647, %ecx # imm = 0x7FFFFFFF
1897; SSE2-NEXT:    orq %r8, %rcx
1898; SSE2-NEXT:    movq %rcx, 40(%rax)
1899; SSE2-NEXT:    movq %rsi, %xmm1
1900; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1901; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1902; SSE2-NEXT:    movq %xmm0, %rcx
1903; SSE2-NEXT:    andq %r12, %rcx
1904; SSE2-NEXT:    shlq $53, %rdx
1905; SSE2-NEXT:    orq %rcx, %rdx
1906; SSE2-NEXT:    movq %rdx, 8(%rax)
1907; SSE2-NEXT:    popq %rbx
1908; SSE2-NEXT:    popq %r12
1909; SSE2-NEXT:    popq %r13
1910; SSE2-NEXT:    popq %r14
1911; SSE2-NEXT:    popq %r15
1912; SSE2-NEXT:    popq %rbp
1913; SSE2-NEXT:    retq
1914;
1915; SSE4-LABEL: scmp_uncommon_vectors:
1916; SSE4:       # %bb.0:
1917; SSE4-NEXT:    pushq %rbp
1918; SSE4-NEXT:    pushq %r15
1919; SSE4-NEXT:    pushq %r14
1920; SSE4-NEXT:    pushq %r13
1921; SSE4-NEXT:    pushq %r12
1922; SSE4-NEXT:    pushq %rbx
1923; SSE4-NEXT:    movq %rdi, %rax
1924; SSE4-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
1925; SSE4-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
1926; SSE4-NEXT:    movzbl {{[0-9]+}}(%rsp), %r11d
1927; SSE4-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebx
1928; SSE4-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebp
1929; SSE4-NEXT:    movzbl {{[0-9]+}}(%rsp), %r15d
1930; SSE4-NEXT:    movzbl {{[0-9]+}}(%rsp), %r14d
1931; SSE4-NEXT:    addb %r14b, %r14b
1932; SSE4-NEXT:    sarb %r14b
1933; SSE4-NEXT:    addb %sil, %sil
1934; SSE4-NEXT:    sarb %sil
1935; SSE4-NEXT:    cmpb %r14b, %sil
1936; SSE4-NEXT:    setl %sil
1937; SSE4-NEXT:    setg %r14b
1938; SSE4-NEXT:    subb %sil, %r14b
1939; SSE4-NEXT:    movsbq %r14b, %r14
1940; SSE4-NEXT:    movq %r14, (%rax)
1941; SSE4-NEXT:    sarq $63, %r14
1942; SSE4-NEXT:    addb %r15b, %r15b
1943; SSE4-NEXT:    sarb %r15b
1944; SSE4-NEXT:    movzbl {{[0-9]+}}(%rsp), %esi
1945; SSE4-NEXT:    addb %sil, %sil
1946; SSE4-NEXT:    sarb %sil
1947; SSE4-NEXT:    cmpb %r15b, %sil
1948; SSE4-NEXT:    setl %sil
1949; SSE4-NEXT:    setg %r15b
1950; SSE4-NEXT:    subb %sil, %r15b
1951; SSE4-NEXT:    movsbq %r15b, %rsi
1952; SSE4-NEXT:    movq %rsi, %r15
1953; SSE4-NEXT:    sarq $63, %r15
1954; SSE4-NEXT:    addb %bpl, %bpl
1955; SSE4-NEXT:    sarb %bpl
1956; SSE4-NEXT:    addb %dl, %dl
1957; SSE4-NEXT:    sarb %dl
1958; SSE4-NEXT:    cmpb %bpl, %dl
1959; SSE4-NEXT:    setl %dl
1960; SSE4-NEXT:    setg %bpl
1961; SSE4-NEXT:    subb %dl, %bpl
1962; SSE4-NEXT:    movsbq %bpl, %r12
1963; SSE4-NEXT:    movq %r12, %r13
1964; SSE4-NEXT:    sarq $63, %r13
1965; SSE4-NEXT:    addb %bl, %bl
1966; SSE4-NEXT:    sarb %bl
1967; SSE4-NEXT:    addb %cl, %cl
1968; SSE4-NEXT:    sarb %cl
1969; SSE4-NEXT:    cmpb %bl, %cl
1970; SSE4-NEXT:    setl %cl
1971; SSE4-NEXT:    setg %dl
1972; SSE4-NEXT:    subb %cl, %dl
1973; SSE4-NEXT:    movsbq %dl, %rbx
1974; SSE4-NEXT:    movq %rbx, %rcx
1975; SSE4-NEXT:    sarq $63, %rcx
1976; SSE4-NEXT:    addb %r11b, %r11b
1977; SSE4-NEXT:    sarb %r11b
1978; SSE4-NEXT:    addb %r8b, %r8b
1979; SSE4-NEXT:    sarb %r8b
1980; SSE4-NEXT:    cmpb %r11b, %r8b
1981; SSE4-NEXT:    setl %dl
1982; SSE4-NEXT:    setg %r8b
1983; SSE4-NEXT:    subb %dl, %r8b
1984; SSE4-NEXT:    movsbq %r8b, %rdx
1985; SSE4-NEXT:    movq %rdx, %r8
1986; SSE4-NEXT:    sarq $63, %r8
1987; SSE4-NEXT:    addb %r10b, %r10b
1988; SSE4-NEXT:    sarb %r10b
1989; SSE4-NEXT:    addb %r9b, %r9b
1990; SSE4-NEXT:    sarb %r9b
1991; SSE4-NEXT:    cmpb %r10b, %r9b
1992; SSE4-NEXT:    setl %r9b
1993; SSE4-NEXT:    setg %r10b
1994; SSE4-NEXT:    subb %r9b, %r10b
1995; SSE4-NEXT:    movsbq %r10b, %r9
1996; SSE4-NEXT:    movq %r9, %r10
1997; SSE4-NEXT:    sarq $63, %r10
1998; SSE4-NEXT:    addb %dil, %dil
1999; SSE4-NEXT:    sarb %dil
2000; SSE4-NEXT:    movzbl {{[0-9]+}}(%rsp), %r11d
2001; SSE4-NEXT:    addb %r11b, %r11b
2002; SSE4-NEXT:    sarb %r11b
2003; SSE4-NEXT:    cmpb %dil, %r11b
2004; SSE4-NEXT:    setl %dil
2005; SSE4-NEXT:    setg %r11b
2006; SSE4-NEXT:    subb %dil, %r11b
2007; SSE4-NEXT:    movsbq %r11b, %rdi
2008; SSE4-NEXT:    movq %rdi, %rbp
2009; SSE4-NEXT:    sarq $63, %rbp
2010; SSE4-NEXT:    movl %ebp, 96(%rax)
2011; SSE4-NEXT:    movabsq $2251799813685247, %r11 # imm = 0x7FFFFFFFFFFFF
2012; SSE4-NEXT:    andq %rbp, %r11
2013; SSE4-NEXT:    shldq $62, %rdi, %rbp
2014; SSE4-NEXT:    movq %rbp, 88(%rax)
2015; SSE4-NEXT:    movq %r10, %rbp
2016; SSE4-NEXT:    shldq $20, %r9, %rbp
2017; SSE4-NEXT:    movq %rbp, 64(%rax)
2018; SSE4-NEXT:    movq %r8, %rbp
2019; SSE4-NEXT:    shldq $31, %rdx, %rbp
2020; SSE4-NEXT:    movq %rbp, 48(%rax)
2021; SSE4-NEXT:    movq %rcx, %rbp
2022; SSE4-NEXT:    shldq $42, %rbx, %rbp
2023; SSE4-NEXT:    movq %rbp, 32(%rax)
2024; SSE4-NEXT:    movabsq $9007199254738944, %rbp # imm = 0x1FFFFFFFFFF800
2025; SSE4-NEXT:    andq %r13, %rbp
2026; SSE4-NEXT:    shldq $53, %r12, %r13
2027; SSE4-NEXT:    movq %r13, 16(%rax)
2028; SSE4-NEXT:    movq %r11, %r13
2029; SSE4-NEXT:    shrq $48, %r13
2030; SSE4-NEXT:    movb %r13b, 102(%rax)
2031; SSE4-NEXT:    shrq $32, %r11
2032; SSE4-NEXT:    movw %r11w, 100(%rax)
2033; SSE4-NEXT:    movabsq $9007199254740991, %r11 # imm = 0x1FFFFFFFFFFFFF
2034; SSE4-NEXT:    andq %r11, %r15
2035; SSE4-NEXT:    shldq $9, %rsi, %r15
2036; SSE4-NEXT:    shlq $62, %rdi
2037; SSE4-NEXT:    orq %r15, %rdi
2038; SSE4-NEXT:    movq %rdi, 80(%rax)
2039; SSE4-NEXT:    andq %r11, %r14
2040; SSE4-NEXT:    shlq $53, %r12
2041; SSE4-NEXT:    orq %r14, %r12
2042; SSE4-NEXT:    movq %r12, 8(%rax)
2043; SSE4-NEXT:    shlq $42, %rbx
2044; SSE4-NEXT:    shrq $11, %rbp
2045; SSE4-NEXT:    orq %rbx, %rbp
2046; SSE4-NEXT:    movq %rbp, 24(%rax)
2047; SSE4-NEXT:    shlq $9, %rsi
2048; SSE4-NEXT:    andl $511, %r10d # imm = 0x1FF
2049; SSE4-NEXT:    orq %rsi, %r10
2050; SSE4-NEXT:    movq %r10, 72(%rax)
2051; SSE4-NEXT:    shlq $20, %r9
2052; SSE4-NEXT:    andl $1048575, %r8d # imm = 0xFFFFF
2053; SSE4-NEXT:    orq %r9, %r8
2054; SSE4-NEXT:    movq %r8, 56(%rax)
2055; SSE4-NEXT:    shlq $31, %rdx
2056; SSE4-NEXT:    andl $2147483647, %ecx # imm = 0x7FFFFFFF
2057; SSE4-NEXT:    orq %rdx, %rcx
2058; SSE4-NEXT:    movq %rcx, 40(%rax)
2059; SSE4-NEXT:    popq %rbx
2060; SSE4-NEXT:    popq %r12
2061; SSE4-NEXT:    popq %r13
2062; SSE4-NEXT:    popq %r14
2063; SSE4-NEXT:    popq %r15
2064; SSE4-NEXT:    popq %rbp
2065; SSE4-NEXT:    retq
2066;
2067; AVX-LABEL: scmp_uncommon_vectors:
2068; AVX:       # %bb.0:
2069; AVX-NEXT:    pushq %rbp
2070; AVX-NEXT:    pushq %r15
2071; AVX-NEXT:    pushq %r14
2072; AVX-NEXT:    pushq %r13
2073; AVX-NEXT:    pushq %r12
2074; AVX-NEXT:    pushq %rbx
2075; AVX-NEXT:    movq %rdi, %rax
2076; AVX-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
2077; AVX-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
2078; AVX-NEXT:    movzbl {{[0-9]+}}(%rsp), %r11d
2079; AVX-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebx
2080; AVX-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebp
2081; AVX-NEXT:    movzbl {{[0-9]+}}(%rsp), %r15d
2082; AVX-NEXT:    movzbl {{[0-9]+}}(%rsp), %r14d
2083; AVX-NEXT:    addb %r14b, %r14b
2084; AVX-NEXT:    sarb %r14b
2085; AVX-NEXT:    addb %sil, %sil
2086; AVX-NEXT:    sarb %sil
2087; AVX-NEXT:    cmpb %r14b, %sil
2088; AVX-NEXT:    setl %sil
2089; AVX-NEXT:    setg %r14b
2090; AVX-NEXT:    subb %sil, %r14b
2091; AVX-NEXT:    movsbq %r14b, %r14
2092; AVX-NEXT:    movq %r14, (%rax)
2093; AVX-NEXT:    sarq $63, %r14
2094; AVX-NEXT:    addb %r15b, %r15b
2095; AVX-NEXT:    sarb %r15b
2096; AVX-NEXT:    movzbl {{[0-9]+}}(%rsp), %esi
2097; AVX-NEXT:    addb %sil, %sil
2098; AVX-NEXT:    sarb %sil
2099; AVX-NEXT:    cmpb %r15b, %sil
2100; AVX-NEXT:    setl %sil
2101; AVX-NEXT:    setg %r15b
2102; AVX-NEXT:    subb %sil, %r15b
2103; AVX-NEXT:    movsbq %r15b, %rsi
2104; AVX-NEXT:    movq %rsi, %r12
2105; AVX-NEXT:    sarq $63, %r12
2106; AVX-NEXT:    addb %bpl, %bpl
2107; AVX-NEXT:    sarb %bpl
2108; AVX-NEXT:    addb %dl, %dl
2109; AVX-NEXT:    sarb %dl
2110; AVX-NEXT:    cmpb %bpl, %dl
2111; AVX-NEXT:    setl %dl
2112; AVX-NEXT:    setg %bpl
2113; AVX-NEXT:    subb %dl, %bpl
2114; AVX-NEXT:    movsbq %bpl, %r15
2115; AVX-NEXT:    movq %r15, %r13
2116; AVX-NEXT:    sarq $63, %r13
2117; AVX-NEXT:    addb %bl, %bl
2118; AVX-NEXT:    sarb %bl
2119; AVX-NEXT:    addb %cl, %cl
2120; AVX-NEXT:    sarb %cl
2121; AVX-NEXT:    cmpb %bl, %cl
2122; AVX-NEXT:    setl %cl
2123; AVX-NEXT:    setg %dl
2124; AVX-NEXT:    subb %cl, %dl
2125; AVX-NEXT:    movsbq %dl, %rbx
2126; AVX-NEXT:    movq %rbx, %rcx
2127; AVX-NEXT:    sarq $63, %rcx
2128; AVX-NEXT:    addb %r11b, %r11b
2129; AVX-NEXT:    sarb %r11b
2130; AVX-NEXT:    addb %r8b, %r8b
2131; AVX-NEXT:    sarb %r8b
2132; AVX-NEXT:    cmpb %r11b, %r8b
2133; AVX-NEXT:    setl %dl
2134; AVX-NEXT:    setg %r8b
2135; AVX-NEXT:    subb %dl, %r8b
2136; AVX-NEXT:    movsbq %r8b, %rdx
2137; AVX-NEXT:    movq %rdx, %r8
2138; AVX-NEXT:    sarq $63, %r8
2139; AVX-NEXT:    addb %r10b, %r10b
2140; AVX-NEXT:    sarb %r10b
2141; AVX-NEXT:    addb %r9b, %r9b
2142; AVX-NEXT:    sarb %r9b
2143; AVX-NEXT:    cmpb %r10b, %r9b
2144; AVX-NEXT:    setl %r9b
2145; AVX-NEXT:    setg %r10b
2146; AVX-NEXT:    subb %r9b, %r10b
2147; AVX-NEXT:    movsbq %r10b, %r9
2148; AVX-NEXT:    movq %r9, %r10
2149; AVX-NEXT:    sarq $63, %r10
2150; AVX-NEXT:    addb %dil, %dil
2151; AVX-NEXT:    sarb %dil
2152; AVX-NEXT:    movzbl {{[0-9]+}}(%rsp), %r11d
2153; AVX-NEXT:    addb %r11b, %r11b
2154; AVX-NEXT:    sarb %r11b
2155; AVX-NEXT:    cmpb %dil, %r11b
2156; AVX-NEXT:    setl %dil
2157; AVX-NEXT:    setg %r11b
2158; AVX-NEXT:    subb %dil, %r11b
2159; AVX-NEXT:    movsbq %r11b, %rdi
2160; AVX-NEXT:    movq %rdi, %rbp
2161; AVX-NEXT:    sarq $63, %rbp
2162; AVX-NEXT:    movl %ebp, 96(%rax)
2163; AVX-NEXT:    movb $51, %r11b
2164; AVX-NEXT:    bzhiq %r11, %rbp, %r11
2165; AVX-NEXT:    shldq $62, %rdi, %rbp
2166; AVX-NEXT:    movq %rbp, 88(%rax)
2167; AVX-NEXT:    movq %r10, %rbp
2168; AVX-NEXT:    shldq $20, %r9, %rbp
2169; AVX-NEXT:    movq %rbp, 64(%rax)
2170; AVX-NEXT:    movq %r8, %rbp
2171; AVX-NEXT:    shldq $31, %rdx, %rbp
2172; AVX-NEXT:    movq %rbp, 48(%rax)
2173; AVX-NEXT:    movq %rcx, %rbp
2174; AVX-NEXT:    shldq $42, %rbx, %rbp
2175; AVX-NEXT:    movq %rbp, 32(%rax)
2176; AVX-NEXT:    movb $42, %bpl
2177; AVX-NEXT:    bzhiq %rbp, %r13, %rbp
2178; AVX-NEXT:    shldq $53, %r15, %r13
2179; AVX-NEXT:    movq %r13, 16(%rax)
2180; AVX-NEXT:    movq %r11, %r13
2181; AVX-NEXT:    shrq $48, %r13
2182; AVX-NEXT:    movb %r13b, 102(%rax)
2183; AVX-NEXT:    shrq $32, %r11
2184; AVX-NEXT:    movw %r11w, 100(%rax)
2185; AVX-NEXT:    movb $53, %r11b
2186; AVX-NEXT:    bzhiq %r11, %r12, %r12
2187; AVX-NEXT:    shldq $9, %rsi, %r12
2188; AVX-NEXT:    shlq $62, %rdi
2189; AVX-NEXT:    orq %r12, %rdi
2190; AVX-NEXT:    movq %rdi, 80(%rax)
2191; AVX-NEXT:    shlq $42, %rbx
2192; AVX-NEXT:    orq %rbp, %rbx
2193; AVX-NEXT:    movq %rbx, 24(%rax)
2194; AVX-NEXT:    bzhiq %r11, %r14, %rdi
2195; AVX-NEXT:    shlq $53, %r15
2196; AVX-NEXT:    orq %rdi, %r15
2197; AVX-NEXT:    movq %r15, 8(%rax)
2198; AVX-NEXT:    shlq $9, %rsi
2199; AVX-NEXT:    andl $511, %r10d # imm = 0x1FF
2200; AVX-NEXT:    orq %rsi, %r10
2201; AVX-NEXT:    movq %r10, 72(%rax)
2202; AVX-NEXT:    shlq $20, %r9
2203; AVX-NEXT:    andl $1048575, %r8d # imm = 0xFFFFF
2204; AVX-NEXT:    orq %r9, %r8
2205; AVX-NEXT:    movq %r8, 56(%rax)
2206; AVX-NEXT:    shlq $31, %rdx
2207; AVX-NEXT:    andl $2147483647, %ecx # imm = 0x7FFFFFFF
2208; AVX-NEXT:    orq %rdx, %rcx
2209; AVX-NEXT:    movq %rcx, 40(%rax)
2210; AVX-NEXT:    popq %rbx
2211; AVX-NEXT:    popq %r12
2212; AVX-NEXT:    popq %r13
2213; AVX-NEXT:    popq %r14
2214; AVX-NEXT:    popq %r15
2215; AVX-NEXT:    popq %rbp
2216; AVX-NEXT:    retq
2217;
2218; X86-LABEL: scmp_uncommon_vectors:
2219; X86:       # %bb.0:
2220; X86-NEXT:    pushl %ebp
2221; X86-NEXT:    pushl %ebx
2222; X86-NEXT:    pushl %edi
2223; X86-NEXT:    pushl %esi
2224; X86-NEXT:    subl $52, %esp
2225; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
2226; X86-NEXT:    addb %al, %al
2227; X86-NEXT:    sarb %al
2228; X86-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
2229; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
2230; X86-NEXT:    addb %al, %al
2231; X86-NEXT:    sarb %al
2232; X86-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
2233; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
2234; X86-NEXT:    addb %al, %al
2235; X86-NEXT:    sarb %al
2236; X86-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
2237; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
2238; X86-NEXT:    addb %al, %al
2239; X86-NEXT:    sarb %al
2240; X86-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
2241; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
2242; X86-NEXT:    addb %al, %al
2243; X86-NEXT:    sarb %al
2244; X86-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
2245; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
2246; X86-NEXT:    addb %al, %al
2247; X86-NEXT:    sarb %al
2248; X86-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
2249; X86-NEXT:    movb {{[0-9]+}}(%esp), %dh
2250; X86-NEXT:    addb %dh, %dh
2251; X86-NEXT:    sarb %dh
2252; X86-NEXT:    movb {{[0-9]+}}(%esp), %dl
2253; X86-NEXT:    addb %dl, %dl
2254; X86-NEXT:    sarb %dl
2255; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
2256; X86-NEXT:    addb %al, %al
2257; X86-NEXT:    sarb %al
2258; X86-NEXT:    movb {{[0-9]+}}(%esp), %ah
2259; X86-NEXT:    addb %ah, %ah
2260; X86-NEXT:    sarb %ah
2261; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
2262; X86-NEXT:    addb %cl, %cl
2263; X86-NEXT:    sarb %cl
2264; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
2265; X86-NEXT:    addb %ch, %ch
2266; X86-NEXT:    sarb %ch
2267; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
2268; X86-NEXT:    addb %bl, %bl
2269; X86-NEXT:    sarb %bl
2270; X86-NEXT:    movb {{[0-9]+}}(%esp), %bh
2271; X86-NEXT:    addb %bh, %bh
2272; X86-NEXT:    sarb %bh
2273; X86-NEXT:    cmpb %bl, %bh
2274; X86-NEXT:    setl %bl
2275; X86-NEXT:    setg %bh
2276; X86-NEXT:    subb %bl, %bh
2277; X86-NEXT:    movsbl %bh, %esi
2278; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2279; X86-NEXT:    sarl $31, %esi
2280; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2281; X86-NEXT:    andl $2097151, %esi # imm = 0x1FFFFF
2282; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2283; X86-NEXT:    cmpb %cl, %ch
2284; X86-NEXT:    setl %cl
2285; X86-NEXT:    setg %ch
2286; X86-NEXT:    subb %cl, %ch
2287; X86-NEXT:    movsbl %ch, %ecx
2288; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2289; X86-NEXT:    sarl $31, %ecx
2290; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2291; X86-NEXT:    andl $2097151, %ecx # imm = 0x1FFFFF
2292; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2293; X86-NEXT:    cmpb %al, %ah
2294; X86-NEXT:    setl %al
2295; X86-NEXT:    setg %cl
2296; X86-NEXT:    subb %al, %cl
2297; X86-NEXT:    movsbl %cl, %ecx
2298; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
2299; X86-NEXT:    movl %ecx, (%edi)
2300; X86-NEXT:    sarl $31, %ecx
2301; X86-NEXT:    movl %ecx, %eax
2302; X86-NEXT:    andl $2097151, %eax # imm = 0x1FFFFF
2303; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2304; X86-NEXT:    cmpb %dh, %dl
2305; X86-NEXT:    setl %al
2306; X86-NEXT:    setg %dl
2307; X86-NEXT:    subb %al, %dl
2308; X86-NEXT:    movsbl %dl, %ebp
2309; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2310; X86-NEXT:    sarl $31, %ebp
2311; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
2312; X86-NEXT:    cmpb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload
2313; X86-NEXT:    setl %al
2314; X86-NEXT:    setg %dl
2315; X86-NEXT:    subb %al, %dl
2316; X86-NEXT:    movsbl %dl, %esi
2317; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2318; X86-NEXT:    sarl $31, %esi
2319; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
2320; X86-NEXT:    cmpb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload
2321; X86-NEXT:    setl %al
2322; X86-NEXT:    setg %dl
2323; X86-NEXT:    subb %al, %dl
2324; X86-NEXT:    movsbl %dl, %eax
2325; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2326; X86-NEXT:    sarl $31, %eax
2327; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload
2328; X86-NEXT:    cmpb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Folded Reload
2329; X86-NEXT:    setl %dl
2330; X86-NEXT:    setg %dh
2331; X86-NEXT:    subb %dl, %dh
2332; X86-NEXT:    movsbl %dh, %ebx
2333; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2334; X86-NEXT:    sarl $31, %ebx
2335; X86-NEXT:    movl %ebx, 96(%edi)
2336; X86-NEXT:    movl %ebx, 92(%edi)
2337; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
2338; X86-NEXT:    movl %edx, 80(%edi)
2339; X86-NEXT:    movl %eax, 68(%edi)
2340; X86-NEXT:    movl %eax, 64(%edi)
2341; X86-NEXT:    movl %esi, 52(%edi)
2342; X86-NEXT:    movl %esi, 48(%edi)
2343; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
2344; X86-NEXT:    movl %edx, 36(%edi)
2345; X86-NEXT:    movl %ebp, 24(%edi)
2346; X86-NEXT:    movl %ebp, 20(%edi)
2347; X86-NEXT:    movl %ecx, 8(%edi)
2348; X86-NEXT:    movl %ecx, 4(%edi)
2349; X86-NEXT:    movl %ebx, %ecx
2350; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2351; X86-NEXT:    movw %cx, 100(%edi)
2352; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
2353; X86-NEXT:    shldl $30, %edx, %ecx
2354; X86-NEXT:    movl %ecx, 88(%edi)
2355; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
2356; X86-NEXT:    shldl $9, %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
2357; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
2358; X86-NEXT:    shldl $9, %edx, %ecx
2359; X86-NEXT:    movl %ecx, 76(%edi)
2360; X86-NEXT:    movl %eax, %ecx
2361; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
2362; X86-NEXT:    shldl $20, %edx, %ecx
2363; X86-NEXT:    movl %ecx, 60(%edi)
2364; X86-NEXT:    movl %esi, %ecx
2365; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
2366; X86-NEXT:    shldl $31, %edx, %ecx
2367; X86-NEXT:    movl %ecx, 44(%edi)
2368; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
2369; X86-NEXT:    shldl $10, %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
2370; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
2371; X86-NEXT:    shldl $10, %edx, %ecx
2372; X86-NEXT:    movl %ecx, 32(%edi)
2373; X86-NEXT:    movl %ebp, %ecx
2374; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
2375; X86-NEXT:    shldl $21, %ebx, %ecx
2376; X86-NEXT:    movl %ecx, 16(%edi)
2377; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
2378; X86-NEXT:    shll $9, %ecx
2379; X86-NEXT:    andl $511, %eax # imm = 0x1FF
2380; X86-NEXT:    orl %ecx, %eax
2381; X86-NEXT:    movl %eax, 72(%edi)
2382; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
2383; X86-NEXT:    shll $20, %eax
2384; X86-NEXT:    andl $1048575, %esi # imm = 0xFFFFF
2385; X86-NEXT:    orl %eax, %esi
2386; X86-NEXT:    movl %esi, 56(%edi)
2387; X86-NEXT:    shll $10, %edx
2388; X86-NEXT:    andl $1023, %ebp # imm = 0x3FF
2389; X86-NEXT:    orl %edx, %ebp
2390; X86-NEXT:    movl %ebp, 28(%edi)
2391; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
2392; X86-NEXT:    shll $21, %eax
2393; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
2394; X86-NEXT:    movl %eax, 12(%edi)
2395; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
2396; X86-NEXT:    andl $7, %eax
2397; X86-NEXT:    movb %al, 102(%edi)
2398; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
2399; X86-NEXT:    shll $30, %eax
2400; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
2401; X86-NEXT:    movl %eax, 84(%edi)
2402; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
2403; X86-NEXT:    shll $31, %eax
2404; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
2405; X86-NEXT:    movl %eax, 40(%edi)
2406; X86-NEXT:    movl %edi, %eax
2407; X86-NEXT:    addl $52, %esp
2408; X86-NEXT:    popl %esi
2409; X86-NEXT:    popl %edi
2410; X86-NEXT:    popl %ebx
2411; X86-NEXT:    popl %ebp
2412; X86-NEXT:    retl $4
2413  %1 = call <7 x i117> @llvm.scmp(<7 x i7> %x, <7 x i7> %y)
2414  ret <7 x i117> %1
2415}
2416
2417define <1 x i3> @scmp_scalarize(<1 x i33> %x, <1 x i33> %y) nounwind {
2418; X64-LABEL: scmp_scalarize:
2419; X64:       # %bb.0:
2420; X64-NEXT:    shlq $31, %rsi
2421; X64-NEXT:    sarq $31, %rsi
2422; X64-NEXT:    shlq $31, %rdi
2423; X64-NEXT:    sarq $31, %rdi
2424; X64-NEXT:    cmpq %rsi, %rdi
2425; X64-NEXT:    setl %cl
2426; X64-NEXT:    setg %al
2427; X64-NEXT:    subb %cl, %al
2428; X64-NEXT:    retq
2429;
2430; X86-LABEL: scmp_scalarize:
2431; X86:       # %bb.0:
2432; X86-NEXT:    pushl %ebx
2433; X86-NEXT:    pushl %edi
2434; X86-NEXT:    pushl %esi
2435; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2436; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
2437; X86-NEXT:    andl $1, %eax
2438; X86-NEXT:    negl %eax
2439; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
2440; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
2441; X86-NEXT:    andl $1, %esi
2442; X86-NEXT:    negl %esi
2443; X86-NEXT:    cmpl %ecx, %edx
2444; X86-NEXT:    movl %esi, %edi
2445; X86-NEXT:    sbbl %eax, %edi
2446; X86-NEXT:    setl %bl
2447; X86-NEXT:    cmpl %edx, %ecx
2448; X86-NEXT:    sbbl %esi, %eax
2449; X86-NEXT:    setl %al
2450; X86-NEXT:    subb %bl, %al
2451; X86-NEXT:    popl %esi
2452; X86-NEXT:    popl %edi
2453; X86-NEXT:    popl %ebx
2454; X86-NEXT:    retl
2455  %1 = call <1 x i3> @llvm.scmp(<1 x i33> %x, <1 x i33> %y)
2456  ret <1 x i3> %1
2457}
2458
2459define <2 x i8> @scmp_bool_operands(<2 x i1> %x, <2 x i1> %y) nounwind {
2460; SSE2-LABEL: scmp_bool_operands:
2461; SSE2:       # %bb.0:
2462; SSE2-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp)
2463; SSE2-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
2464; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
2465; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
2466; SSE2-NEXT:    andb $1, %al
2467; SSE2-NEXT:    negb %al
2468; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edx
2469; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %esi
2470; SSE2-NEXT:    andb $1, %dl
2471; SSE2-NEXT:    negb %dl
2472; SSE2-NEXT:    cmpb %al, %dl
2473; SSE2-NEXT:    setl %al
2474; SSE2-NEXT:    setg %dl
2475; SSE2-NEXT:    subb %al, %dl
2476; SSE2-NEXT:    movzbl %dl, %eax
2477; SSE2-NEXT:    andb $1, %cl
2478; SSE2-NEXT:    negb %cl
2479; SSE2-NEXT:    andb $1, %sil
2480; SSE2-NEXT:    negb %sil
2481; SSE2-NEXT:    cmpb %cl, %sil
2482; SSE2-NEXT:    setl %cl
2483; SSE2-NEXT:    setg %dl
2484; SSE2-NEXT:    subb %cl, %dl
2485; SSE2-NEXT:    movzbl %dl, %ecx
2486; SSE2-NEXT:    shll $8, %ecx
2487; SSE2-NEXT:    orl %eax, %ecx
2488; SSE2-NEXT:    movd %ecx, %xmm0
2489; SSE2-NEXT:    retq
2490;
2491; SSE4-LABEL: scmp_bool_operands:
2492; SSE4:       # %bb.0:
2493; SSE4-NEXT:    pextrb $8, %xmm1, %eax
2494; SSE4-NEXT:    andb $1, %al
2495; SSE4-NEXT:    negb %al
2496; SSE4-NEXT:    pextrb $8, %xmm0, %ecx
2497; SSE4-NEXT:    andb $1, %cl
2498; SSE4-NEXT:    negb %cl
2499; SSE4-NEXT:    cmpb %al, %cl
2500; SSE4-NEXT:    setl %al
2501; SSE4-NEXT:    setg %cl
2502; SSE4-NEXT:    subb %al, %cl
2503; SSE4-NEXT:    movzbl %cl, %eax
2504; SSE4-NEXT:    movd %xmm1, %ecx
2505; SSE4-NEXT:    andb $1, %cl
2506; SSE4-NEXT:    negb %cl
2507; SSE4-NEXT:    movd %xmm0, %edx
2508; SSE4-NEXT:    andb $1, %dl
2509; SSE4-NEXT:    negb %dl
2510; SSE4-NEXT:    cmpb %cl, %dl
2511; SSE4-NEXT:    setl %cl
2512; SSE4-NEXT:    setg %dl
2513; SSE4-NEXT:    subb %cl, %dl
2514; SSE4-NEXT:    movzbl %dl, %ecx
2515; SSE4-NEXT:    movd %ecx, %xmm0
2516; SSE4-NEXT:    pinsrb $1, %eax, %xmm0
2517; SSE4-NEXT:    retq
2518;
2519; AVX2-LABEL: scmp_bool_operands:
2520; AVX2:       # %bb.0:
2521; AVX2-NEXT:    vpextrb $8, %xmm1, %eax
2522; AVX2-NEXT:    andb $1, %al
2523; AVX2-NEXT:    vpextrb $8, %xmm0, %ecx
2524; AVX2-NEXT:    negb %al
2525; AVX2-NEXT:    andb $1, %cl
2526; AVX2-NEXT:    negb %cl
2527; AVX2-NEXT:    cmpb %al, %cl
2528; AVX2-NEXT:    setl %al
2529; AVX2-NEXT:    setg %cl
2530; AVX2-NEXT:    subb %al, %cl
2531; AVX2-NEXT:    vmovd %xmm1, %eax
2532; AVX2-NEXT:    andb $1, %al
2533; AVX2-NEXT:    negb %al
2534; AVX2-NEXT:    vmovd %xmm0, %edx
2535; AVX2-NEXT:    andb $1, %dl
2536; AVX2-NEXT:    negb %dl
2537; AVX2-NEXT:    cmpb %al, %dl
2538; AVX2-NEXT:    setl %al
2539; AVX2-NEXT:    setg %dl
2540; AVX2-NEXT:    subb %al, %dl
2541; AVX2-NEXT:    vmovd %edx, %xmm0
2542; AVX2-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0
2543; AVX2-NEXT:    retq
2544;
2545; AVX512-LABEL: scmp_bool_operands:
2546; AVX512:       # %bb.0:
2547; AVX512-NEXT:    vpsllq $63, %xmm0, %xmm0
2548; AVX512-NEXT:    vpmovq2m %xmm0, %k0
2549; AVX512-NEXT:    kshiftrb $1, %k0, %k1
2550; AVX512-NEXT:    kmovd %k1, %eax
2551; AVX512-NEXT:    vpsllq $63, %xmm1, %xmm0
2552; AVX512-NEXT:    vpmovq2m %xmm0, %k1
2553; AVX512-NEXT:    kshiftrb $1, %k1, %k2
2554; AVX512-NEXT:    kmovd %k2, %ecx
2555; AVX512-NEXT:    andb $1, %cl
2556; AVX512-NEXT:    negb %cl
2557; AVX512-NEXT:    andb $1, %al
2558; AVX512-NEXT:    negb %al
2559; AVX512-NEXT:    cmpb %cl, %al
2560; AVX512-NEXT:    setl %al
2561; AVX512-NEXT:    setg %cl
2562; AVX512-NEXT:    subb %al, %cl
2563; AVX512-NEXT:    kmovd %k1, %eax
2564; AVX512-NEXT:    andb $1, %al
2565; AVX512-NEXT:    negb %al
2566; AVX512-NEXT:    kmovd %k0, %edx
2567; AVX512-NEXT:    andb $1, %dl
2568; AVX512-NEXT:    negb %dl
2569; AVX512-NEXT:    cmpb %al, %dl
2570; AVX512-NEXT:    setl %al
2571; AVX512-NEXT:    setg %dl
2572; AVX512-NEXT:    subb %al, %dl
2573; AVX512-NEXT:    vmovd %edx, %xmm0
2574; AVX512-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0
2575; AVX512-NEXT:    retq
2576;
2577; X86-LABEL: scmp_bool_operands:
2578; X86:       # %bb.0:
2579; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
2580; X86-NEXT:    andb $1, %cl
2581; X86-NEXT:    negb %cl
2582; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
2583; X86-NEXT:    andb $1, %dl
2584; X86-NEXT:    negb %dl
2585; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
2586; X86-NEXT:    andb $1, %al
2587; X86-NEXT:    negb %al
2588; X86-NEXT:    movb {{[0-9]+}}(%esp), %ah
2589; X86-NEXT:    andb $1, %ah
2590; X86-NEXT:    negb %ah
2591; X86-NEXT:    cmpb %al, %ah
2592; X86-NEXT:    setl %ah
2593; X86-NEXT:    setg %al
2594; X86-NEXT:    subb %ah, %al
2595; X86-NEXT:    cmpb %cl, %dl
2596; X86-NEXT:    setl %cl
2597; X86-NEXT:    setg %dl
2598; X86-NEXT:    subb %cl, %dl
2599; X86-NEXT:    retl
2600  %1 = call <2 x i8> @llvm.scmp(<2 x i1> %x, <2 x i1> %y)
2601  ret <2 x i8> %1
2602}
2603
2604define <2 x i16> @scmp_ret_wider_than_operands(<2 x i8> %x, <2 x i8> %y) nounwind {
2605; SSE2-LABEL: scmp_ret_wider_than_operands:
2606; SSE2:       # %bb.0:
2607; SSE2-NEXT:    movd %xmm1, %eax
2608; SSE2-NEXT:    movl %eax, %ecx
2609; SSE2-NEXT:    shrl $8, %ecx
2610; SSE2-NEXT:    movd %xmm0, %edx
2611; SSE2-NEXT:    movl %edx, %esi
2612; SSE2-NEXT:    shrl $8, %esi
2613; SSE2-NEXT:    cmpb %cl, %sil
2614; SSE2-NEXT:    setl %cl
2615; SSE2-NEXT:    setg %sil
2616; SSE2-NEXT:    subb %cl, %sil
2617; SSE2-NEXT:    movsbl %sil, %ecx
2618; SSE2-NEXT:    cmpb %al, %dl
2619; SSE2-NEXT:    setl %al
2620; SSE2-NEXT:    setg %dl
2621; SSE2-NEXT:    subb %al, %dl
2622; SSE2-NEXT:    movsbl %dl, %eax
2623; SSE2-NEXT:    movd %eax, %xmm0
2624; SSE2-NEXT:    pinsrw $1, %ecx, %xmm0
2625; SSE2-NEXT:    retq
2626;
2627; SSE4-LABEL: scmp_ret_wider_than_operands:
2628; SSE4:       # %bb.0:
2629; SSE4-NEXT:    pextrb $1, %xmm1, %eax
2630; SSE4-NEXT:    pextrb $1, %xmm0, %ecx
2631; SSE4-NEXT:    cmpb %al, %cl
2632; SSE4-NEXT:    setl %al
2633; SSE4-NEXT:    setg %cl
2634; SSE4-NEXT:    subb %al, %cl
2635; SSE4-NEXT:    movsbl %cl, %eax
2636; SSE4-NEXT:    movd %xmm1, %ecx
2637; SSE4-NEXT:    movd %xmm0, %edx
2638; SSE4-NEXT:    cmpb %cl, %dl
2639; SSE4-NEXT:    setl %cl
2640; SSE4-NEXT:    setg %dl
2641; SSE4-NEXT:    subb %cl, %dl
2642; SSE4-NEXT:    movsbl %dl, %ecx
2643; SSE4-NEXT:    movd %ecx, %xmm0
2644; SSE4-NEXT:    pinsrw $1, %eax, %xmm0
2645; SSE4-NEXT:    retq
2646;
2647; AVX-LABEL: scmp_ret_wider_than_operands:
2648; AVX:       # %bb.0:
2649; AVX-NEXT:    vpextrb $1, %xmm1, %eax
2650; AVX-NEXT:    vpextrb $1, %xmm0, %ecx
2651; AVX-NEXT:    cmpb %al, %cl
2652; AVX-NEXT:    setl %al
2653; AVX-NEXT:    setg %cl
2654; AVX-NEXT:    subb %al, %cl
2655; AVX-NEXT:    movsbl %cl, %eax
2656; AVX-NEXT:    vmovd %xmm1, %ecx
2657; AVX-NEXT:    vmovd %xmm0, %edx
2658; AVX-NEXT:    cmpb %cl, %dl
2659; AVX-NEXT:    setl %cl
2660; AVX-NEXT:    setg %dl
2661; AVX-NEXT:    subb %cl, %dl
2662; AVX-NEXT:    movsbl %dl, %ecx
2663; AVX-NEXT:    vmovd %ecx, %xmm0
2664; AVX-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0
2665; AVX-NEXT:    retq
2666;
2667; X86-LABEL: scmp_ret_wider_than_operands:
2668; X86:       # %bb.0:
2669; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
2670; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
2671; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %al
2672; X86-NEXT:    setl %al
2673; X86-NEXT:    setg %dl
2674; X86-NEXT:    subb %al, %dl
2675; X86-NEXT:    movsbl %dl, %eax
2676; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %cl
2677; X86-NEXT:    setl %cl
2678; X86-NEXT:    setg %dl
2679; X86-NEXT:    subb %cl, %dl
2680; X86-NEXT:    movsbl %dl, %edx
2681; X86-NEXT:    # kill: def $ax killed $ax killed $eax
2682; X86-NEXT:    # kill: def $dx killed $dx killed $edx
2683; X86-NEXT:    retl
2684  %1 = call <2 x i16> @llvm.scmp(<2 x i8> %x, <2 x i8> %y)
2685  ret <2 x i16> %1
2686}
2687
2688