xref: /llvm-project/llvm/test/CodeGen/X86/ucmp.ll (revision c5edecbb4bfe08997819ff84712e3e22ddd04490)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64    | FileCheck %s --check-prefixes=X64,SSE,SSE4
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64,SSE,SSE2
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X64,AVX,AVX2
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X64,AVX,AVX512
6; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
7
8define i8 @ucmp.8.8(i8 %x, i8 %y) nounwind {
9; X64-LABEL: ucmp.8.8:
10; X64:       # %bb.0:
11; X64-NEXT:    cmpb %sil, %dil
12; X64-NEXT:    seta %al
13; X64-NEXT:    sbbb $0, %al
14; X64-NEXT:    retq
15;
16; X86-LABEL: ucmp.8.8:
17; X86:       # %bb.0:
18; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
19; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %al
20; X86-NEXT:    seta %al
21; X86-NEXT:    sbbb $0, %al
22; X86-NEXT:    retl
23  %1 = call i8 @llvm.ucmp(i8 %x, i8 %y)
24  ret i8 %1
25}
26
27define i8 @ucmp.8.16(i16 %x, i16 %y) nounwind {
28; X64-LABEL: ucmp.8.16:
29; X64:       # %bb.0:
30; X64-NEXT:    cmpw %si, %di
31; X64-NEXT:    seta %al
32; X64-NEXT:    sbbb $0, %al
33; X64-NEXT:    retq
34;
35; X86-LABEL: ucmp.8.16:
36; X86:       # %bb.0:
37; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
38; X86-NEXT:    cmpw {{[0-9]+}}(%esp), %ax
39; X86-NEXT:    seta %al
40; X86-NEXT:    sbbb $0, %al
41; X86-NEXT:    retl
42  %1 = call i8 @llvm.ucmp(i16 %x, i16 %y)
43  ret i8 %1
44}
45
46define i8 @ucmp.8.32(i32 %x, i32 %y) nounwind {
47; X64-LABEL: ucmp.8.32:
48; X64:       # %bb.0:
49; X64-NEXT:    cmpl %esi, %edi
50; X64-NEXT:    seta %al
51; X64-NEXT:    sbbb $0, %al
52; X64-NEXT:    retq
53;
54; X86-LABEL: ucmp.8.32:
55; X86:       # %bb.0:
56; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
57; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
58; X86-NEXT:    seta %al
59; X86-NEXT:    sbbb $0, %al
60; X86-NEXT:    retl
61  %1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
62  ret i8 %1
63}
64
65define i8 @ucmp.8.64(i64 %x, i64 %y) nounwind {
66; X64-LABEL: ucmp.8.64:
67; X64:       # %bb.0:
68; X64-NEXT:    cmpq %rsi, %rdi
69; X64-NEXT:    seta %al
70; X64-NEXT:    sbbb $0, %al
71; X64-NEXT:    retq
72;
73; X86-LABEL: ucmp.8.64:
74; X86:       # %bb.0:
75; X86-NEXT:    pushl %edi
76; X86-NEXT:    pushl %esi
77; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
78; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
79; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
80; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
81; X86-NEXT:    cmpl %ecx, %esi
82; X86-NEXT:    movl %edi, %eax
83; X86-NEXT:    sbbl %edx, %eax
84; X86-NEXT:    setb %al
85; X86-NEXT:    cmpl %esi, %ecx
86; X86-NEXT:    sbbl %edi, %edx
87; X86-NEXT:    sbbb $0, %al
88; X86-NEXT:    popl %esi
89; X86-NEXT:    popl %edi
90; X86-NEXT:    retl
91  %1 = call i8 @llvm.ucmp(i64 %x, i64 %y)
92  ret i8 %1
93}
94
95define i8 @ucmp.8.128(i128 %x, i128 %y) nounwind {
96; X64-LABEL: ucmp.8.128:
97; X64:       # %bb.0:
98; X64-NEXT:    cmpq %rdi, %rdx
99; X64-NEXT:    movq %rcx, %rax
100; X64-NEXT:    sbbq %rsi, %rax
101; X64-NEXT:    setb %al
102; X64-NEXT:    cmpq %rdx, %rdi
103; X64-NEXT:    sbbq %rcx, %rsi
104; X64-NEXT:    sbbb $0, %al
105; X64-NEXT:    retq
106;
107; X86-LABEL: ucmp.8.128:
108; X86:       # %bb.0:
109; X86-NEXT:    pushl %ebp
110; X86-NEXT:    pushl %ebx
111; X86-NEXT:    pushl %edi
112; X86-NEXT:    pushl %esi
113; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
114; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
115; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
116; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
117; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
118; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %ebx
119; X86-NEXT:    movl %ebp, %eax
120; X86-NEXT:    sbbl %esi, %eax
121; X86-NEXT:    movl %ecx, %eax
122; X86-NEXT:    sbbl %edx, %eax
123; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
124; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
125; X86-NEXT:    movl %edi, %eax
126; X86-NEXT:    sbbl %ecx, %eax
127; X86-NEXT:    setb %al
128; X86-NEXT:    cmpl %ebx, {{[0-9]+}}(%esp)
129; X86-NEXT:    sbbl %ebp, %esi
130; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edx
131; X86-NEXT:    sbbl %edi, %ecx
132; X86-NEXT:    sbbb $0, %al
133; X86-NEXT:    popl %esi
134; X86-NEXT:    popl %edi
135; X86-NEXT:    popl %ebx
136; X86-NEXT:    popl %ebp
137; X86-NEXT:    retl
138  %1 = call i8 @llvm.ucmp(i128 %x, i128 %y)
139  ret i8 %1
140}
141
142define i32 @ucmp.32.32(i32 %x, i32 %y) nounwind {
143; X64-LABEL: ucmp.32.32:
144; X64:       # %bb.0:
145; X64-NEXT:    cmpl %esi, %edi
146; X64-NEXT:    seta %al
147; X64-NEXT:    sbbb $0, %al
148; X64-NEXT:    movsbl %al, %eax
149; X64-NEXT:    retq
150;
151; X86-LABEL: ucmp.32.32:
152; X86:       # %bb.0:
153; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
154; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
155; X86-NEXT:    seta %al
156; X86-NEXT:    sbbb $0, %al
157; X86-NEXT:    movsbl %al, %eax
158; X86-NEXT:    retl
159  %1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
160  ret i32 %1
161}
162
163define i32 @ucmp.32.64(i64 %x, i64 %y) nounwind {
164; X64-LABEL: ucmp.32.64:
165; X64:       # %bb.0:
166; X64-NEXT:    cmpq %rsi, %rdi
167; X64-NEXT:    seta %al
168; X64-NEXT:    sbbb $0, %al
169; X64-NEXT:    movsbl %al, %eax
170; X64-NEXT:    retq
171;
172; X86-LABEL: ucmp.32.64:
173; X86:       # %bb.0:
174; X86-NEXT:    pushl %ebx
175; X86-NEXT:    pushl %edi
176; X86-NEXT:    pushl %esi
177; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
178; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
179; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
180; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
181; X86-NEXT:    cmpl %eax, %edx
182; X86-NEXT:    movl %esi, %edi
183; X86-NEXT:    sbbl %ecx, %edi
184; X86-NEXT:    setb %bl
185; X86-NEXT:    cmpl %edx, %eax
186; X86-NEXT:    sbbl %esi, %ecx
187; X86-NEXT:    sbbb $0, %bl
188; X86-NEXT:    movsbl %bl, %eax
189; X86-NEXT:    popl %esi
190; X86-NEXT:    popl %edi
191; X86-NEXT:    popl %ebx
192; X86-NEXT:    retl
193  %1 = call i32 @llvm.ucmp(i64 %x, i64 %y)
194  ret i32 %1
195}
196
197define i64 @ucmp.64.64(i64 %x, i64 %y) nounwind {
198; X64-LABEL: ucmp.64.64:
199; X64:       # %bb.0:
200; X64-NEXT:    cmpq %rsi, %rdi
201; X64-NEXT:    seta %al
202; X64-NEXT:    sbbb $0, %al
203; X64-NEXT:    movsbq %al, %rax
204; X64-NEXT:    retq
205;
206; X86-LABEL: ucmp.64.64:
207; X86:       # %bb.0:
208; X86-NEXT:    pushl %ebx
209; X86-NEXT:    pushl %edi
210; X86-NEXT:    pushl %esi
211; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
212; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
213; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
214; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
215; X86-NEXT:    cmpl %eax, %edx
216; X86-NEXT:    movl %esi, %edi
217; X86-NEXT:    sbbl %ecx, %edi
218; X86-NEXT:    setb %bl
219; X86-NEXT:    cmpl %edx, %eax
220; X86-NEXT:    sbbl %esi, %ecx
221; X86-NEXT:    sbbb $0, %bl
222; X86-NEXT:    movsbl %bl, %eax
223; X86-NEXT:    movl %eax, %edx
224; X86-NEXT:    sarl $31, %edx
225; X86-NEXT:    popl %esi
226; X86-NEXT:    popl %edi
227; X86-NEXT:    popl %ebx
228; X86-NEXT:    retl
229  %1 = call i64 @llvm.ucmp(i64 %x, i64 %y)
230  ret i64 %1
231}
232
233define i4 @ucmp_narrow_result(i32 %x, i32 %y) nounwind {
234; X64-LABEL: ucmp_narrow_result:
235; X64:       # %bb.0:
236; X64-NEXT:    cmpl %esi, %edi
237; X64-NEXT:    seta %al
238; X64-NEXT:    sbbb $0, %al
239; X64-NEXT:    retq
240;
241; X86-LABEL: ucmp_narrow_result:
242; X86:       # %bb.0:
243; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
244; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
245; X86-NEXT:    seta %al
246; X86-NEXT:    sbbb $0, %al
247; X86-NEXT:    retl
248  %1 = call i4 @llvm.ucmp(i32 %x, i32 %y)
249  ret i4 %1
250}
251
252define i8 @ucmp_narrow_op(i62 %x, i62 %y) nounwind {
253; SSE-LABEL: ucmp_narrow_op:
254; SSE:       # %bb.0:
255; SSE-NEXT:    movabsq $4611686018427387903, %rax # imm = 0x3FFFFFFFFFFFFFFF
256; SSE-NEXT:    andq %rax, %rsi
257; SSE-NEXT:    andq %rax, %rdi
258; SSE-NEXT:    cmpq %rsi, %rdi
259; SSE-NEXT:    seta %al
260; SSE-NEXT:    sbbb $0, %al
261; SSE-NEXT:    retq
262;
263; AVX-LABEL: ucmp_narrow_op:
264; AVX:       # %bb.0:
265; AVX-NEXT:    movb $62, %al
266; AVX-NEXT:    bzhiq %rax, %rsi, %rcx
267; AVX-NEXT:    bzhiq %rax, %rdi, %rax
268; AVX-NEXT:    cmpq %rcx, %rax
269; AVX-NEXT:    seta %al
270; AVX-NEXT:    sbbb $0, %al
271; AVX-NEXT:    retq
272;
273; X86-LABEL: ucmp_narrow_op:
274; X86:       # %bb.0:
275; X86-NEXT:    pushl %edi
276; X86-NEXT:    pushl %esi
277; X86-NEXT:    movl $1073741823, %ecx # imm = 0x3FFFFFFF
278; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
279; X86-NEXT:    andl %ecx, %edx
280; X86-NEXT:    andl {{[0-9]+}}(%esp), %ecx
281; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
282; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
283; X86-NEXT:    cmpl %esi, %edi
284; X86-NEXT:    movl %ecx, %eax
285; X86-NEXT:    sbbl %edx, %eax
286; X86-NEXT:    setb %al
287; X86-NEXT:    cmpl %edi, %esi
288; X86-NEXT:    sbbl %ecx, %edx
289; X86-NEXT:    sbbb $0, %al
290; X86-NEXT:    popl %esi
291; X86-NEXT:    popl %edi
292; X86-NEXT:    retl
293  %1 = call i8 @llvm.ucmp(i62 %x, i62 %y)
294  ret i8 %1
295}
296
297define i141 @ucmp_wide_result(i32 %x, i32 %y) nounwind {
298; X64-LABEL: ucmp_wide_result:
299; X64:       # %bb.0:
300; X64-NEXT:    cmpl %esi, %edi
301; X64-NEXT:    seta %al
302; X64-NEXT:    sbbb $0, %al
303; X64-NEXT:    movsbq %al, %rax
304; X64-NEXT:    movq %rax, %rdx
305; X64-NEXT:    sarq $63, %rdx
306; X64-NEXT:    movl %edx, %ecx
307; X64-NEXT:    andl $8191, %ecx # imm = 0x1FFF
308; X64-NEXT:    retq
309;
310; X86-LABEL: ucmp_wide_result:
311; X86:       # %bb.0:
312; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
313; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
314; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %ecx
315; X86-NEXT:    seta %cl
316; X86-NEXT:    sbbb $0, %cl
317; X86-NEXT:    movsbl %cl, %ecx
318; X86-NEXT:    movl %ecx, (%eax)
319; X86-NEXT:    sarl $31, %ecx
320; X86-NEXT:    movl %ecx, 12(%eax)
321; X86-NEXT:    movl %ecx, 8(%eax)
322; X86-NEXT:    movl %ecx, 4(%eax)
323; X86-NEXT:    andl $8191, %ecx # imm = 0x1FFF
324; X86-NEXT:    movw %cx, 16(%eax)
325; X86-NEXT:    retl $4
326  %1 = call i141 @llvm.ucmp(i32 %x, i32 %y)
327  ret i141 %1
328}
329
330define i8 @ucmp_wide_op(i109 %x, i109 %y) nounwind {
331; SSE-LABEL: ucmp_wide_op:
332; SSE:       # %bb.0:
333; SSE-NEXT:    movabsq $35184372088831, %rax # imm = 0x1FFFFFFFFFFF
334; SSE-NEXT:    andq %rax, %rsi
335; SSE-NEXT:    andq %rax, %rcx
336; SSE-NEXT:    cmpq %rdi, %rdx
337; SSE-NEXT:    movq %rcx, %rax
338; SSE-NEXT:    sbbq %rsi, %rax
339; SSE-NEXT:    setb %al
340; SSE-NEXT:    cmpq %rdx, %rdi
341; SSE-NEXT:    sbbq %rcx, %rsi
342; SSE-NEXT:    sbbb $0, %al
343; SSE-NEXT:    retq
344;
345; AVX-LABEL: ucmp_wide_op:
346; AVX:       # %bb.0:
347; AVX-NEXT:    movb $45, %al
348; AVX-NEXT:    bzhiq %rax, %rsi, %rsi
349; AVX-NEXT:    bzhiq %rax, %rcx, %rcx
350; AVX-NEXT:    cmpq %rdi, %rdx
351; AVX-NEXT:    movq %rcx, %rax
352; AVX-NEXT:    sbbq %rsi, %rax
353; AVX-NEXT:    setb %al
354; AVX-NEXT:    cmpq %rdx, %rdi
355; AVX-NEXT:    sbbq %rcx, %rsi
356; AVX-NEXT:    sbbb $0, %al
357; AVX-NEXT:    retq
358;
359; X86-LABEL: ucmp_wide_op:
360; X86:       # %bb.0:
361; X86-NEXT:    pushl %ebp
362; X86-NEXT:    pushl %ebx
363; X86-NEXT:    pushl %edi
364; X86-NEXT:    pushl %esi
365; X86-NEXT:    movl $8191, %ecx # imm = 0x1FFF
366; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
367; X86-NEXT:    andl %ecx, %edx
368; X86-NEXT:    andl {{[0-9]+}}(%esp), %ecx
369; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
370; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
371; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
372; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %ebp
373; X86-NEXT:    sbbl %edi, %eax
374; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
375; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
376; X86-NEXT:    movl %ebx, %eax
377; X86-NEXT:    sbbl %esi, %eax
378; X86-NEXT:    movl %ecx, %eax
379; X86-NEXT:    sbbl %edx, %eax
380; X86-NEXT:    setb %al
381; X86-NEXT:    cmpl %ebp, {{[0-9]+}}(%esp)
382; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edi
383; X86-NEXT:    sbbl %ebx, %esi
384; X86-NEXT:    sbbl %ecx, %edx
385; X86-NEXT:    sbbb $0, %al
386; X86-NEXT:    popl %esi
387; X86-NEXT:    popl %edi
388; X86-NEXT:    popl %ebx
389; X86-NEXT:    popl %ebp
390; X86-NEXT:    retl
391  %1 = call i8 @llvm.ucmp(i109 %x, i109 %y)
392  ret i8 %1
393}
394
395define i41 @ucmp_uncommon_types(i7 %x, i7 %y) nounwind {
396; X64-LABEL: ucmp_uncommon_types:
397; X64:       # %bb.0:
398; X64-NEXT:    andb $127, %sil
399; X64-NEXT:    andb $127, %dil
400; X64-NEXT:    cmpb %sil, %dil
401; X64-NEXT:    seta %al
402; X64-NEXT:    sbbb $0, %al
403; X64-NEXT:    movsbq %al, %rax
404; X64-NEXT:    retq
405;
406; X86-LABEL: ucmp_uncommon_types:
407; X86:       # %bb.0:
408; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
409; X86-NEXT:    andb $127, %al
410; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
411; X86-NEXT:    andb $127, %cl
412; X86-NEXT:    cmpb %al, %cl
413; X86-NEXT:    seta %al
414; X86-NEXT:    sbbb $0, %al
415; X86-NEXT:    movsbl %al, %eax
416; X86-NEXT:    movl %eax, %edx
417; X86-NEXT:    sarl $31, %edx
418; X86-NEXT:    retl
419  %1 = call i41 @llvm.ucmp(i7 %x, i7 %y)
420  ret i41 %1
421}
422
423define <4 x i32> @ucmp_normal_vectors(<4 x i32> %x, <4 x i32> %y) nounwind {
424; SSE4-LABEL: ucmp_normal_vectors:
425; SSE4:       # %bb.0:
426; SSE4-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
427; SSE4-NEXT:    pxor %xmm2, %xmm1
428; SSE4-NEXT:    pxor %xmm2, %xmm0
429; SSE4-NEXT:    movdqa %xmm0, %xmm2
430; SSE4-NEXT:    pcmpgtd %xmm1, %xmm2
431; SSE4-NEXT:    pcmpgtd %xmm0, %xmm1
432; SSE4-NEXT:    psubd %xmm2, %xmm1
433; SSE4-NEXT:    movdqa %xmm1, %xmm0
434; SSE4-NEXT:    retq
435;
436; SSE2-LABEL: ucmp_normal_vectors:
437; SSE2:       # %bb.0:
438; SSE2-NEXT:    movdqa %xmm0, %xmm2
439; SSE2-NEXT:    pmaxud %xmm1, %xmm2
440; SSE2-NEXT:    pcmpeqd %xmm0, %xmm2
441; SSE2-NEXT:    pminud %xmm0, %xmm1
442; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
443; SSE2-NEXT:    psubd %xmm2, %xmm0
444; SSE2-NEXT:    retq
445;
446; AVX2-LABEL: ucmp_normal_vectors:
447; AVX2:       # %bb.0:
448; AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm2
449; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm2
450; AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm1
451; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
452; AVX2-NEXT:    vpsubd %xmm2, %xmm0, %xmm0
453; AVX2-NEXT:    retq
454;
455; AVX512-LABEL: ucmp_normal_vectors:
456; AVX512:       # %bb.0:
457; AVX512-NEXT:    vpcmpltud %xmm1, %xmm0, %k1
458; AVX512-NEXT:    vpcmpnleud %xmm1, %xmm0, %k2
459; AVX512-NEXT:    vpbroadcastd {{.*#+}} xmm0 {%k2} {z} = [1,1,1,1]
460; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
461; AVX512-NEXT:    vmovdqa32 %xmm1, %xmm0 {%k1}
462; AVX512-NEXT:    retq
463;
464; X86-LABEL: ucmp_normal_vectors:
465; X86:       # %bb.0:
466; X86-NEXT:    pushl %ebx
467; X86-NEXT:    pushl %edi
468; X86-NEXT:    pushl %esi
469; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
470; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
471; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
472; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
473; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
474; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %edx
475; X86-NEXT:    seta %dl
476; X86-NEXT:    sbbb $0, %dl
477; X86-NEXT:    movsbl %dl, %edx
478; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %edi
479; X86-NEXT:    seta %bl
480; X86-NEXT:    sbbb $0, %bl
481; X86-NEXT:    movsbl %bl, %edi
482; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %esi
483; X86-NEXT:    seta %bl
484; X86-NEXT:    sbbb $0, %bl
485; X86-NEXT:    movsbl %bl, %esi
486; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %ecx
487; X86-NEXT:    seta %cl
488; X86-NEXT:    sbbb $0, %cl
489; X86-NEXT:    movsbl %cl, %ecx
490; X86-NEXT:    movl %ecx, 12(%eax)
491; X86-NEXT:    movl %esi, 8(%eax)
492; X86-NEXT:    movl %edi, 4(%eax)
493; X86-NEXT:    movl %edx, (%eax)
494; X86-NEXT:    popl %esi
495; X86-NEXT:    popl %edi
496; X86-NEXT:    popl %ebx
497; X86-NEXT:    retl $4
498  %1 = call <4 x i32> @llvm.ucmp(<4 x i32> %x, <4 x i32> %y)
499  ret <4 x i32> %1
500}
501
502define <4 x i8> @ucmp_narrow_vec_result(<4 x i32> %x, <4 x i32> %y) nounwind {
503; SSE4-LABEL: ucmp_narrow_vec_result:
504; SSE4:       # %bb.0:
505; SSE4-NEXT:    movd %xmm1, %eax
506; SSE4-NEXT:    movd %xmm0, %ecx
507; SSE4-NEXT:    cmpl %eax, %ecx
508; SSE4-NEXT:    seta %al
509; SSE4-NEXT:    sbbb $0, %al
510; SSE4-NEXT:    movzbl %al, %eax
511; SSE4-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1]
512; SSE4-NEXT:    movd %xmm2, %ecx
513; SSE4-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
514; SSE4-NEXT:    movd %xmm2, %edx
515; SSE4-NEXT:    cmpl %ecx, %edx
516; SSE4-NEXT:    seta %cl
517; SSE4-NEXT:    sbbb $0, %cl
518; SSE4-NEXT:    movzbl %cl, %ecx
519; SSE4-NEXT:    shll $8, %ecx
520; SSE4-NEXT:    orl %eax, %ecx
521; SSE4-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
522; SSE4-NEXT:    movd %xmm2, %eax
523; SSE4-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
524; SSE4-NEXT:    movd %xmm2, %edx
525; SSE4-NEXT:    cmpl %eax, %edx
526; SSE4-NEXT:    seta %al
527; SSE4-NEXT:    sbbb $0, %al
528; SSE4-NEXT:    movzbl %al, %eax
529; SSE4-NEXT:    shll $16, %eax
530; SSE4-NEXT:    orl %ecx, %eax
531; SSE4-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3]
532; SSE4-NEXT:    movd %xmm1, %ecx
533; SSE4-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
534; SSE4-NEXT:    movd %xmm0, %edx
535; SSE4-NEXT:    cmpl %ecx, %edx
536; SSE4-NEXT:    seta %cl
537; SSE4-NEXT:    sbbb $0, %cl
538; SSE4-NEXT:    movzbl %cl, %ecx
539; SSE4-NEXT:    shll $24, %ecx
540; SSE4-NEXT:    orl %eax, %ecx
541; SSE4-NEXT:    movd %ecx, %xmm0
542; SSE4-NEXT:    retq
543;
544; SSE2-LABEL: ucmp_narrow_vec_result:
545; SSE2:       # %bb.0:
546; SSE2-NEXT:    pextrd $1, %xmm1, %eax
547; SSE2-NEXT:    pextrd $1, %xmm0, %ecx
548; SSE2-NEXT:    cmpl %eax, %ecx
549; SSE2-NEXT:    seta %al
550; SSE2-NEXT:    sbbb $0, %al
551; SSE2-NEXT:    movzbl %al, %eax
552; SSE2-NEXT:    movd %xmm1, %ecx
553; SSE2-NEXT:    movd %xmm0, %edx
554; SSE2-NEXT:    cmpl %ecx, %edx
555; SSE2-NEXT:    seta %cl
556; SSE2-NEXT:    sbbb $0, %cl
557; SSE2-NEXT:    movzbl %cl, %ecx
558; SSE2-NEXT:    movd %ecx, %xmm2
559; SSE2-NEXT:    pinsrb $1, %eax, %xmm2
560; SSE2-NEXT:    pextrd $2, %xmm1, %eax
561; SSE2-NEXT:    pextrd $2, %xmm0, %ecx
562; SSE2-NEXT:    cmpl %eax, %ecx
563; SSE2-NEXT:    seta %al
564; SSE2-NEXT:    sbbb $0, %al
565; SSE2-NEXT:    movzbl %al, %eax
566; SSE2-NEXT:    pinsrb $2, %eax, %xmm2
567; SSE2-NEXT:    pextrd $3, %xmm1, %eax
568; SSE2-NEXT:    pextrd $3, %xmm0, %ecx
569; SSE2-NEXT:    cmpl %eax, %ecx
570; SSE2-NEXT:    seta %al
571; SSE2-NEXT:    sbbb $0, %al
572; SSE2-NEXT:    movzbl %al, %eax
573; SSE2-NEXT:    pinsrb $3, %eax, %xmm2
574; SSE2-NEXT:    movdqa %xmm2, %xmm0
575; SSE2-NEXT:    retq
576;
577; AVX-LABEL: ucmp_narrow_vec_result:
578; AVX:       # %bb.0:
579; AVX-NEXT:    vpextrd $1, %xmm1, %eax
580; AVX-NEXT:    vpextrd $1, %xmm0, %ecx
581; AVX-NEXT:    cmpl %eax, %ecx
582; AVX-NEXT:    seta %al
583; AVX-NEXT:    sbbb $0, %al
584; AVX-NEXT:    vmovd %xmm1, %ecx
585; AVX-NEXT:    vmovd %xmm0, %edx
586; AVX-NEXT:    cmpl %ecx, %edx
587; AVX-NEXT:    seta %cl
588; AVX-NEXT:    sbbb $0, %cl
589; AVX-NEXT:    vmovd %ecx, %xmm2
590; AVX-NEXT:    vpinsrb $1, %eax, %xmm2, %xmm2
591; AVX-NEXT:    vpextrd $2, %xmm1, %eax
592; AVX-NEXT:    vpextrd $2, %xmm0, %ecx
593; AVX-NEXT:    cmpl %eax, %ecx
594; AVX-NEXT:    seta %al
595; AVX-NEXT:    sbbb $0, %al
596; AVX-NEXT:    vpinsrb $2, %eax, %xmm2, %xmm2
597; AVX-NEXT:    vpextrd $3, %xmm1, %eax
598; AVX-NEXT:    vpextrd $3, %xmm0, %ecx
599; AVX-NEXT:    cmpl %eax, %ecx
600; AVX-NEXT:    seta %al
601; AVX-NEXT:    sbbb $0, %al
602; AVX-NEXT:    vpinsrb $3, %eax, %xmm2, %xmm0
603; AVX-NEXT:    retq
604;
605; X86-LABEL: ucmp_narrow_vec_result:
606; X86:       # %bb.0:
607; X86-NEXT:    pushl %ebx
608; X86-NEXT:    pushl %edi
609; X86-NEXT:    pushl %esi
610; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
611; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
612; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
613; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
614; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
615; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %ecx
616; X86-NEXT:    seta %cl
617; X86-NEXT:    sbbb $0, %cl
618; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %edi
619; X86-NEXT:    seta %ch
620; X86-NEXT:    sbbb $0, %ch
621; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %esi
622; X86-NEXT:    seta %bl
623; X86-NEXT:    sbbb $0, %bl
624; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %edx
625; X86-NEXT:    seta %dl
626; X86-NEXT:    sbbb $0, %dl
627; X86-NEXT:    movb %dl, 3(%eax)
628; X86-NEXT:    movb %bl, 2(%eax)
629; X86-NEXT:    movb %ch, 1(%eax)
630; X86-NEXT:    movb %cl, (%eax)
631; X86-NEXT:    popl %esi
632; X86-NEXT:    popl %edi
633; X86-NEXT:    popl %ebx
634; X86-NEXT:    retl $4
635  %1 = call <4 x i8> @llvm.ucmp(<4 x i32> %x, <4 x i32> %y)
636  ret <4 x i8> %1
637}
638
639define <4 x i32> @ucmp_narrow_vec_op(<4 x i8> %x, <4 x i8> %y) nounwind {
640; SSE4-LABEL: ucmp_narrow_vec_op:
641; SSE4:       # %bb.0:
642; SSE4-NEXT:    pxor %xmm2, %xmm2
643; SSE4-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
644; SSE4-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
645; SSE4-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
646; SSE4-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
647; SSE4-NEXT:    movdqa %xmm0, %xmm2
648; SSE4-NEXT:    pcmpgtd %xmm1, %xmm2
649; SSE4-NEXT:    pcmpgtd %xmm0, %xmm1
650; SSE4-NEXT:    psubd %xmm2, %xmm1
651; SSE4-NEXT:    movdqa %xmm1, %xmm0
652; SSE4-NEXT:    retq
653;
654; SSE2-LABEL: ucmp_narrow_vec_op:
655; SSE2:       # %bb.0:
656; SSE2-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
657; SSE2-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
658; SSE2-NEXT:    movdqa %xmm0, %xmm2
659; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
660; SSE2-NEXT:    pcmpgtd %xmm0, %xmm1
661; SSE2-NEXT:    psubd %xmm2, %xmm1
662; SSE2-NEXT:    movdqa %xmm1, %xmm0
663; SSE2-NEXT:    retq
664;
665; AVX2-LABEL: ucmp_narrow_vec_op:
666; AVX2:       # %bb.0:
667; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
668; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
669; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm2
670; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
671; AVX2-NEXT:    vpsubd %xmm2, %xmm0, %xmm0
672; AVX2-NEXT:    retq
673;
674; AVX512-LABEL: ucmp_narrow_vec_op:
675; AVX512:       # %bb.0:
676; AVX512-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
677; AVX512-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
678; AVX512-NEXT:    vpcmpgtd %xmm0, %xmm1, %k1
679; AVX512-NEXT:    vpcmpgtd %xmm1, %xmm0, %k2
680; AVX512-NEXT:    vpbroadcastd {{.*#+}} xmm0 {%k2} {z} = [1,1,1,1]
681; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
682; AVX512-NEXT:    vmovdqa32 %xmm1, %xmm0 {%k1}
683; AVX512-NEXT:    retq
684;
685; X86-LABEL: ucmp_narrow_vec_op:
686; X86:       # %bb.0:
687; X86-NEXT:    pushl %ebx
688; X86-NEXT:    pushl %edi
689; X86-NEXT:    pushl %esi
690; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
691; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
692; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
693; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
694; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
695; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %dl
696; X86-NEXT:    seta %dl
697; X86-NEXT:    sbbb $0, %dl
698; X86-NEXT:    movsbl %dl, %edx
699; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %bl
700; X86-NEXT:    seta %bl
701; X86-NEXT:    sbbb $0, %bl
702; X86-NEXT:    movsbl %bl, %esi
703; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %ch
704; X86-NEXT:    seta %ch
705; X86-NEXT:    sbbb $0, %ch
706; X86-NEXT:    movsbl %ch, %edi
707; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %cl
708; X86-NEXT:    seta %cl
709; X86-NEXT:    sbbb $0, %cl
710; X86-NEXT:    movsbl %cl, %ecx
711; X86-NEXT:    movl %ecx, 12(%eax)
712; X86-NEXT:    movl %edi, 8(%eax)
713; X86-NEXT:    movl %esi, 4(%eax)
714; X86-NEXT:    movl %edx, (%eax)
715; X86-NEXT:    popl %esi
716; X86-NEXT:    popl %edi
717; X86-NEXT:    popl %ebx
718; X86-NEXT:    retl $4
719  %1 = call <4 x i32> @llvm.ucmp(<4 x i8> %x, <4 x i8> %y)
720  ret <4 x i32> %1
721}
722
723define <16 x i32> @ucmp_wide_vec_result(<16 x i8> %x, <16 x i8> %y) nounwind {
724; SSE4-LABEL: ucmp_wide_vec_result:
725; SSE4:       # %bb.0:
726; SSE4-NEXT:    movdqa %xmm1, %xmm3
727; SSE4-NEXT:    pxor %xmm5, %xmm5
728; SSE4-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3],xmm1[4],xmm5[4],xmm1[5],xmm5[5],xmm1[6],xmm5[6],xmm1[7],xmm5[7]
729; SSE4-NEXT:    movdqa %xmm1, %xmm4
730; SSE4-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
731; SSE4-NEXT:    movdqa %xmm0, %xmm2
732; SSE4-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1],xmm2[2],xmm5[2],xmm2[3],xmm5[3],xmm2[4],xmm5[4],xmm2[5],xmm5[5],xmm2[6],xmm5[6],xmm2[7],xmm5[7]
733; SSE4-NEXT:    movdqa %xmm2, %xmm6
734; SSE4-NEXT:    punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
735; SSE4-NEXT:    movdqa %xmm6, %xmm7
736; SSE4-NEXT:    pcmpgtd %xmm4, %xmm7
737; SSE4-NEXT:    pcmpgtd %xmm6, %xmm4
738; SSE4-NEXT:    psubd %xmm7, %xmm4
739; SSE4-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm5[4],xmm1[5],xmm5[5],xmm1[6],xmm5[6],xmm1[7],xmm5[7]
740; SSE4-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm5[4],xmm2[5],xmm5[5],xmm2[6],xmm5[6],xmm2[7],xmm5[7]
741; SSE4-NEXT:    movdqa %xmm2, %xmm6
742; SSE4-NEXT:    pcmpgtd %xmm1, %xmm6
743; SSE4-NEXT:    pcmpgtd %xmm2, %xmm1
744; SSE4-NEXT:    psubd %xmm6, %xmm1
745; SSE4-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm5[8],xmm3[9],xmm5[9],xmm3[10],xmm5[10],xmm3[11],xmm5[11],xmm3[12],xmm5[12],xmm3[13],xmm5[13],xmm3[14],xmm5[14],xmm3[15],xmm5[15]
746; SSE4-NEXT:    movdqa %xmm3, %xmm2
747; SSE4-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1],xmm2[2],xmm5[2],xmm2[3],xmm5[3]
748; SSE4-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm5[8],xmm0[9],xmm5[9],xmm0[10],xmm5[10],xmm0[11],xmm5[11],xmm0[12],xmm5[12],xmm0[13],xmm5[13],xmm0[14],xmm5[14],xmm0[15],xmm5[15]
749; SSE4-NEXT:    movdqa %xmm0, %xmm6
750; SSE4-NEXT:    punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
751; SSE4-NEXT:    movdqa %xmm6, %xmm7
752; SSE4-NEXT:    pcmpgtd %xmm2, %xmm7
753; SSE4-NEXT:    pcmpgtd %xmm6, %xmm2
754; SSE4-NEXT:    psubd %xmm7, %xmm2
755; SSE4-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7]
756; SSE4-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
757; SSE4-NEXT:    movdqa %xmm0, %xmm5
758; SSE4-NEXT:    pcmpgtd %xmm3, %xmm5
759; SSE4-NEXT:    pcmpgtd %xmm0, %xmm3
760; SSE4-NEXT:    psubd %xmm5, %xmm3
761; SSE4-NEXT:    movdqa %xmm4, %xmm0
762; SSE4-NEXT:    retq
763;
764; SSE2-LABEL: ucmp_wide_vec_result:
765; SSE2:       # %bb.0:
766; SSE2-NEXT:    movdqa %xmm0, %xmm4
767; SSE2-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
768; SSE2-NEXT:    pmovzxbd {{.*#+}} xmm2 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero
769; SSE2-NEXT:    movdqa %xmm2, %xmm3
770; SSE2-NEXT:    pcmpgtd %xmm0, %xmm3
771; SSE2-NEXT:    pcmpgtd %xmm2, %xmm0
772; SSE2-NEXT:    psubd %xmm3, %xmm0
773; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1]
774; SSE2-NEXT:    pmovzxbd {{.*#+}} xmm5 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
775; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[1,1,1,1]
776; SSE2-NEXT:    pmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
777; SSE2-NEXT:    movdqa %xmm2, %xmm3
778; SSE2-NEXT:    pcmpgtd %xmm5, %xmm3
779; SSE2-NEXT:    pcmpgtd %xmm2, %xmm5
780; SSE2-NEXT:    psubd %xmm3, %xmm5
781; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
782; SSE2-NEXT:    pmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
783; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[2,3,2,3]
784; SSE2-NEXT:    pmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
785; SSE2-NEXT:    movdqa %xmm3, %xmm6
786; SSE2-NEXT:    pcmpgtd %xmm2, %xmm6
787; SSE2-NEXT:    pcmpgtd %xmm3, %xmm2
788; SSE2-NEXT:    psubd %xmm6, %xmm2
789; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3]
790; SSE2-NEXT:    pmovzxbd {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
791; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm4[3,3,3,3]
792; SSE2-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
793; SSE2-NEXT:    movdqa %xmm1, %xmm4
794; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
795; SSE2-NEXT:    pcmpgtd %xmm1, %xmm3
796; SSE2-NEXT:    psubd %xmm4, %xmm3
797; SSE2-NEXT:    movdqa %xmm5, %xmm1
798; SSE2-NEXT:    retq
799;
800; AVX2-LABEL: ucmp_wide_vec_result:
801; AVX2:       # %bb.0:
802; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
803; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
804; AVX2-NEXT:    vpcmpgtd %ymm2, %ymm3, %ymm4
805; AVX2-NEXT:    vpcmpgtd %ymm3, %ymm2, %ymm2
806; AVX2-NEXT:    vpsubd %ymm4, %ymm2, %ymm2
807; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
808; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
809; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
810; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
811; AVX2-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm3
812; AVX2-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
813; AVX2-NEXT:    vpsubd %ymm3, %ymm0, %ymm1
814; AVX2-NEXT:    vmovdqa %ymm2, %ymm0
815; AVX2-NEXT:    retq
816;
817; AVX512-LABEL: ucmp_wide_vec_result:
818; AVX512:       # %bb.0:
819; AVX512-NEXT:    vpcmpltub %xmm1, %xmm0, %k1
820; AVX512-NEXT:    vpcmpnleub %xmm1, %xmm0, %k2
821; AVX512-NEXT:    vpbroadcastd {{.*#+}} zmm0 {%k2} {z} = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
822; AVX512-NEXT:    vpternlogd {{.*#+}} zmm1 = -1
823; AVX512-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1}
824; AVX512-NEXT:    retq
825;
826; X86-LABEL: ucmp_wide_vec_result:
827; X86:       # %bb.0:
828; X86-NEXT:    pushl %ebp
829; X86-NEXT:    pushl %ebx
830; X86-NEXT:    pushl %edi
831; X86-NEXT:    pushl %esi
832; X86-NEXT:    subl $12, %esp
833; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
834; X86-NEXT:    movb {{[0-9]+}}(%esp), %ah
835; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
836; X86-NEXT:    movb {{[0-9]+}}(%esp), %dh
837; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
838; X86-NEXT:    movb {{[0-9]+}}(%esp), %bh
839; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
840; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
841; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %cl
842; X86-NEXT:    seta %cl
843; X86-NEXT:    sbbb $0, %cl
844; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
845; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %al
846; X86-NEXT:    seta %al
847; X86-NEXT:    sbbb $0, %al
848; X86-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
849; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %bh
850; X86-NEXT:    seta %al
851; X86-NEXT:    sbbb $0, %al
852; X86-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
853; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %bl
854; X86-NEXT:    seta %al
855; X86-NEXT:    sbbb $0, %al
856; X86-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
857; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %dh
858; X86-NEXT:    seta %al
859; X86-NEXT:    sbbb $0, %al
860; X86-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
861; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %ch
862; X86-NEXT:    seta %al
863; X86-NEXT:    sbbb $0, %al
864; X86-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
865; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %ah
866; X86-NEXT:    seta %al
867; X86-NEXT:    sbbb $0, %al
868; X86-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
869; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %dl
870; X86-NEXT:    seta %bl
871; X86-NEXT:    sbbb $0, %bl
872; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
873; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %al
874; X86-NEXT:    seta %al
875; X86-NEXT:    sbbb $0, %al
876; X86-NEXT:    movb %al, (%esp) # 1-byte Spill
877; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
878; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %al
879; X86-NEXT:    seta %bh
880; X86-NEXT:    sbbb $0, %bh
881; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
882; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %al
883; X86-NEXT:    seta %al
884; X86-NEXT:    sbbb $0, %al
885; X86-NEXT:    movsbl %al, %eax
886; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
887; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
888; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %al
889; X86-NEXT:    seta %al
890; X86-NEXT:    sbbb $0, %al
891; X86-NEXT:    movsbl %al, %edi
892; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
893; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %al
894; X86-NEXT:    seta %al
895; X86-NEXT:    sbbb $0, %al
896; X86-NEXT:    movsbl %al, %ebp
897; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
898; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %al
899; X86-NEXT:    seta %al
900; X86-NEXT:    sbbb $0, %al
901; X86-NEXT:    movsbl %al, %esi
902; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
903; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %al
904; X86-NEXT:    seta %al
905; X86-NEXT:    sbbb $0, %al
906; X86-NEXT:    movsbl %al, %edx
907; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
908; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %al
909; X86-NEXT:    seta %al
910; X86-NEXT:    sbbb $0, %al
911; X86-NEXT:    movsbl %al, %ecx
912; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
913; X86-NEXT:    movl %ecx, 60(%eax)
914; X86-NEXT:    movl %edx, 56(%eax)
915; X86-NEXT:    movl %esi, 52(%eax)
916; X86-NEXT:    movl %ebp, 48(%eax)
917; X86-NEXT:    movl %edi, 44(%eax)
918; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
919; X86-NEXT:    movl %ecx, 40(%eax)
920; X86-NEXT:    movsbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
921; X86-NEXT:    movsbl %bh, %ecx
922; X86-NEXT:    movl %ecx, 36(%eax)
923; X86-NEXT:    movsbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
924; X86-NEXT:    movsbl (%esp), %edx # 1-byte Folded Reload
925; X86-NEXT:    movl %edx, 32(%eax)
926; X86-NEXT:    movsbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload
927; X86-NEXT:    movsbl %bl, %edi
928; X86-NEXT:    movl %edi, 28(%eax)
929; X86-NEXT:    movsbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
930; X86-NEXT:    movsbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
931; X86-NEXT:    movl %ebx, 24(%eax)
932; X86-NEXT:    movl %edi, 20(%eax)
933; X86-NEXT:    movl %edx, 16(%eax)
934; X86-NEXT:    movl %ecx, 12(%eax)
935; X86-NEXT:    movl %esi, 8(%eax)
936; X86-NEXT:    movsbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
937; X86-NEXT:    movl %ecx, 4(%eax)
938; X86-NEXT:    movsbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
939; X86-NEXT:    movl %ecx, (%eax)
940; X86-NEXT:    addl $12, %esp
941; X86-NEXT:    popl %esi
942; X86-NEXT:    popl %edi
943; X86-NEXT:    popl %ebx
944; X86-NEXT:    popl %ebp
945; X86-NEXT:    retl $4
946  %1 = call <16 x i32> @llvm.ucmp(<16 x i8> %x, <16 x i8> %y)
947  ret <16 x i32> %1
948}
949
950define <16 x i8> @ucmp_wide_vec_op(<16 x i32> %x, <16 x i32> %y) nounwind {
951; SSE4-LABEL: ucmp_wide_vec_op:
952; SSE4:       # %bb.0:
953; SSE4-NEXT:    pushq %rbp
954; SSE4-NEXT:    pushq %r15
955; SSE4-NEXT:    pushq %r14
956; SSE4-NEXT:    pushq %r13
957; SSE4-NEXT:    pushq %r12
958; SSE4-NEXT:    pushq %rbx
959; SSE4-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[3,3,3,3]
960; SSE4-NEXT:    movd %xmm8, %eax
961; SSE4-NEXT:    pshufd {{.*#+}} xmm8 = xmm3[3,3,3,3]
962; SSE4-NEXT:    movd %xmm8, %ecx
963; SSE4-NEXT:    cmpl %eax, %ecx
964; SSE4-NEXT:    seta %al
965; SSE4-NEXT:    sbbb $0, %al
966; SSE4-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[2,3,2,3]
967; SSE4-NEXT:    movd %xmm8, %ecx
968; SSE4-NEXT:    pshufd {{.*#+}} xmm8 = xmm3[2,3,2,3]
969; SSE4-NEXT:    movd %xmm8, %edx
970; SSE4-NEXT:    cmpl %ecx, %edx
971; SSE4-NEXT:    seta %cl
972; SSE4-NEXT:    sbbb $0, %cl
973; SSE4-NEXT:    movd %xmm7, %edx
974; SSE4-NEXT:    movd %xmm3, %esi
975; SSE4-NEXT:    cmpl %edx, %esi
976; SSE4-NEXT:    seta %dl
977; SSE4-NEXT:    sbbb $0, %dl
978; SSE4-NEXT:    pshufd {{.*#+}} xmm7 = xmm7[1,1,1,1]
979; SSE4-NEXT:    movd %xmm7, %esi
980; SSE4-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,1,1]
981; SSE4-NEXT:    movd %xmm3, %edi
982; SSE4-NEXT:    cmpl %esi, %edi
983; SSE4-NEXT:    seta %sil
984; SSE4-NEXT:    movzbl %al, %eax
985; SSE4-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
986; SSE4-NEXT:    sbbb $0, %sil
987; SSE4-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[3,3,3,3]
988; SSE4-NEXT:    movd %xmm3, %edi
989; SSE4-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[3,3,3,3]
990; SSE4-NEXT:    movd %xmm3, %r8d
991; SSE4-NEXT:    cmpl %edi, %r8d
992; SSE4-NEXT:    seta %dil
993; SSE4-NEXT:    sbbb $0, %dil
994; SSE4-NEXT:    movzbl %cl, %eax
995; SSE4-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
996; SSE4-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[2,3,2,3]
997; SSE4-NEXT:    movd %xmm3, %r8d
998; SSE4-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[2,3,2,3]
999; SSE4-NEXT:    movd %xmm3, %r9d
1000; SSE4-NEXT:    cmpl %r8d, %r9d
1001; SSE4-NEXT:    seta %r8b
1002; SSE4-NEXT:    movzbl %dl, %edx
1003; SSE4-NEXT:    sbbb $0, %r8b
1004; SSE4-NEXT:    movd %xmm6, %r9d
1005; SSE4-NEXT:    movd %xmm2, %r10d
1006; SSE4-NEXT:    cmpl %r9d, %r10d
1007; SSE4-NEXT:    seta %r9b
1008; SSE4-NEXT:    movzbl %sil, %esi
1009; SSE4-NEXT:    sbbb $0, %r9b
1010; SSE4-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[1,1,1,1]
1011; SSE4-NEXT:    movd %xmm3, %r10d
1012; SSE4-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,1,1]
1013; SSE4-NEXT:    movd %xmm2, %r11d
1014; SSE4-NEXT:    cmpl %r10d, %r11d
1015; SSE4-NEXT:    seta %r10b
1016; SSE4-NEXT:    sbbb $0, %r10b
1017; SSE4-NEXT:    movzbl %dil, %edi
1018; SSE4-NEXT:    pshufd {{.*#+}} xmm2 = xmm5[3,3,3,3]
1019; SSE4-NEXT:    movd %xmm2, %r11d
1020; SSE4-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3]
1021; SSE4-NEXT:    movd %xmm2, %ebx
1022; SSE4-NEXT:    cmpl %r11d, %ebx
1023; SSE4-NEXT:    seta %r11b
1024; SSE4-NEXT:    movzbl %r8b, %r8d
1025; SSE4-NEXT:    sbbb $0, %r11b
1026; SSE4-NEXT:    pshufd {{.*#+}} xmm2 = xmm5[2,3,2,3]
1027; SSE4-NEXT:    movd %xmm2, %ebx
1028; SSE4-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
1029; SSE4-NEXT:    movd %xmm2, %ebp
1030; SSE4-NEXT:    cmpl %ebx, %ebp
1031; SSE4-NEXT:    seta %bpl
1032; SSE4-NEXT:    sbbb $0, %bpl
1033; SSE4-NEXT:    movzbl %r9b, %r9d
1034; SSE4-NEXT:    movd %xmm5, %ebx
1035; SSE4-NEXT:    movd %xmm1, %r14d
1036; SSE4-NEXT:    cmpl %ebx, %r14d
1037; SSE4-NEXT:    seta %r14b
1038; SSE4-NEXT:    sbbb $0, %r14b
1039; SSE4-NEXT:    movzbl %r10b, %r10d
1040; SSE4-NEXT:    pshufd {{.*#+}} xmm2 = xmm5[1,1,1,1]
1041; SSE4-NEXT:    movd %xmm2, %ebx
1042; SSE4-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1]
1043; SSE4-NEXT:    movd %xmm1, %r15d
1044; SSE4-NEXT:    cmpl %ebx, %r15d
1045; SSE4-NEXT:    seta %bl
1046; SSE4-NEXT:    movzbl %r11b, %r11d
1047; SSE4-NEXT:    sbbb $0, %bl
1048; SSE4-NEXT:    pshufd {{.*#+}} xmm1 = xmm4[3,3,3,3]
1049; SSE4-NEXT:    movd %xmm1, %r15d
1050; SSE4-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3]
1051; SSE4-NEXT:    movd %xmm1, %r12d
1052; SSE4-NEXT:    cmpl %r15d, %r12d
1053; SSE4-NEXT:    seta %r12b
1054; SSE4-NEXT:    sbbb $0, %r12b
1055; SSE4-NEXT:    movzbl %bpl, %ebp
1056; SSE4-NEXT:    pshufd {{.*#+}} xmm1 = xmm4[2,3,2,3]
1057; SSE4-NEXT:    movd %xmm1, %r15d
1058; SSE4-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1059; SSE4-NEXT:    movd %xmm1, %r13d
1060; SSE4-NEXT:    cmpl %r15d, %r13d
1061; SSE4-NEXT:    seta %r13b
1062; SSE4-NEXT:    movzbl %r14b, %r15d
1063; SSE4-NEXT:    sbbb $0, %r13b
1064; SSE4-NEXT:    movd %xmm4, %r14d
1065; SSE4-NEXT:    movd %xmm0, %eax
1066; SSE4-NEXT:    cmpl %r14d, %eax
1067; SSE4-NEXT:    seta %r14b
1068; SSE4-NEXT:    sbbb $0, %r14b
1069; SSE4-NEXT:    pshufd {{.*#+}} xmm1 = xmm4[1,1,1,1]
1070; SSE4-NEXT:    movd %xmm1, %eax
1071; SSE4-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1072; SSE4-NEXT:    movd %xmm0, %ecx
1073; SSE4-NEXT:    cmpl %eax, %ecx
1074; SSE4-NEXT:    movzbl %bl, %eax
1075; SSE4-NEXT:    movzbl %r12b, %ecx
1076; SSE4-NEXT:    movzbl %r13b, %ebx
1077; SSE4-NEXT:    movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
1078; SSE4-NEXT:    # xmm0 = mem[0],zero,zero,zero
1079; SSE4-NEXT:    movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 4-byte Folded Reload
1080; SSE4-NEXT:    # xmm2 = mem[0],zero,zero,zero
1081; SSE4-NEXT:    movd %edx, %xmm3
1082; SSE4-NEXT:    movd %esi, %xmm4
1083; SSE4-NEXT:    movd %edi, %xmm5
1084; SSE4-NEXT:    movd %r8d, %xmm6
1085; SSE4-NEXT:    movd %r9d, %xmm1
1086; SSE4-NEXT:    movd %r10d, %xmm7
1087; SSE4-NEXT:    movd %r11d, %xmm8
1088; SSE4-NEXT:    movd %ebp, %xmm9
1089; SSE4-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
1090; SSE4-NEXT:    movd %r15d, %xmm10
1091; SSE4-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
1092; SSE4-NEXT:    movd %eax, %xmm0
1093; SSE4-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
1094; SSE4-NEXT:    movd %ecx, %xmm2
1095; SSE4-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
1096; SSE4-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm7[0],xmm1[1],xmm7[1],xmm1[2],xmm7[2],xmm1[3],xmm7[3],xmm1[4],xmm7[4],xmm1[5],xmm7[5],xmm1[6],xmm7[6],xmm1[7],xmm7[7]
1097; SSE4-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3]
1098; SSE4-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
1099; SSE4-NEXT:    punpcklbw {{.*#+}} xmm9 = xmm9[0],xmm8[0],xmm9[1],xmm8[1],xmm9[2],xmm8[2],xmm9[3],xmm8[3],xmm9[4],xmm8[4],xmm9[5],xmm8[5],xmm9[6],xmm8[6],xmm9[7],xmm8[7]
1100; SSE4-NEXT:    punpcklbw {{.*#+}} xmm10 = xmm10[0],xmm0[0],xmm10[1],xmm0[1],xmm10[2],xmm0[2],xmm10[3],xmm0[3],xmm10[4],xmm0[4],xmm10[5],xmm0[5],xmm10[6],xmm0[6],xmm10[7],xmm0[7]
1101; SSE4-NEXT:    punpcklwd {{.*#+}} xmm10 = xmm10[0],xmm9[0],xmm10[1],xmm9[1],xmm10[2],xmm9[2],xmm10[3],xmm9[3]
1102; SSE4-NEXT:    movd %ebx, %xmm3
1103; SSE4-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
1104; SSE4-NEXT:    movzbl %r14b, %eax
1105; SSE4-NEXT:    seta %cl
1106; SSE4-NEXT:    sbbb $0, %cl
1107; SSE4-NEXT:    movd %eax, %xmm0
1108; SSE4-NEXT:    movzbl %cl, %eax
1109; SSE4-NEXT:    movd %eax, %xmm2
1110; SSE4-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1111; SSE4-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
1112; SSE4-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm10[0],xmm0[1],xmm10[1]
1113; SSE4-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1114; SSE4-NEXT:    popq %rbx
1115; SSE4-NEXT:    popq %r12
1116; SSE4-NEXT:    popq %r13
1117; SSE4-NEXT:    popq %r14
1118; SSE4-NEXT:    popq %r15
1119; SSE4-NEXT:    popq %rbp
1120; SSE4-NEXT:    retq
1121;
1122; SSE2-LABEL: ucmp_wide_vec_op:
1123; SSE2:       # %bb.0:
1124; SSE2-NEXT:    pextrd $1, %xmm4, %eax
1125; SSE2-NEXT:    movdqa %xmm0, %xmm8
1126; SSE2-NEXT:    pextrd $1, %xmm0, %ecx
1127; SSE2-NEXT:    cmpl %eax, %ecx
1128; SSE2-NEXT:    seta %al
1129; SSE2-NEXT:    sbbb $0, %al
1130; SSE2-NEXT:    movzbl %al, %eax
1131; SSE2-NEXT:    movd %xmm4, %ecx
1132; SSE2-NEXT:    movd %xmm0, %edx
1133; SSE2-NEXT:    cmpl %ecx, %edx
1134; SSE2-NEXT:    seta %cl
1135; SSE2-NEXT:    sbbb $0, %cl
1136; SSE2-NEXT:    movzbl %cl, %ecx
1137; SSE2-NEXT:    movd %ecx, %xmm0
1138; SSE2-NEXT:    pinsrb $1, %eax, %xmm0
1139; SSE2-NEXT:    pextrd $2, %xmm4, %eax
1140; SSE2-NEXT:    pextrd $2, %xmm8, %ecx
1141; SSE2-NEXT:    cmpl %eax, %ecx
1142; SSE2-NEXT:    seta %al
1143; SSE2-NEXT:    sbbb $0, %al
1144; SSE2-NEXT:    movzbl %al, %eax
1145; SSE2-NEXT:    pinsrb $2, %eax, %xmm0
1146; SSE2-NEXT:    pextrd $3, %xmm4, %eax
1147; SSE2-NEXT:    pextrd $3, %xmm8, %ecx
1148; SSE2-NEXT:    cmpl %eax, %ecx
1149; SSE2-NEXT:    seta %al
1150; SSE2-NEXT:    sbbb $0, %al
1151; SSE2-NEXT:    movzbl %al, %eax
1152; SSE2-NEXT:    pinsrb $3, %eax, %xmm0
1153; SSE2-NEXT:    movd %xmm5, %eax
1154; SSE2-NEXT:    movd %xmm1, %ecx
1155; SSE2-NEXT:    cmpl %eax, %ecx
1156; SSE2-NEXT:    seta %al
1157; SSE2-NEXT:    sbbb $0, %al
1158; SSE2-NEXT:    movzbl %al, %eax
1159; SSE2-NEXT:    pinsrb $4, %eax, %xmm0
1160; SSE2-NEXT:    pextrd $1, %xmm5, %eax
1161; SSE2-NEXT:    pextrd $1, %xmm1, %ecx
1162; SSE2-NEXT:    cmpl %eax, %ecx
1163; SSE2-NEXT:    seta %al
1164; SSE2-NEXT:    sbbb $0, %al
1165; SSE2-NEXT:    movzbl %al, %eax
1166; SSE2-NEXT:    pinsrb $5, %eax, %xmm0
1167; SSE2-NEXT:    pextrd $2, %xmm5, %eax
1168; SSE2-NEXT:    pextrd $2, %xmm1, %ecx
1169; SSE2-NEXT:    cmpl %eax, %ecx
1170; SSE2-NEXT:    seta %al
1171; SSE2-NEXT:    sbbb $0, %al
1172; SSE2-NEXT:    movzbl %al, %eax
1173; SSE2-NEXT:    pinsrb $6, %eax, %xmm0
1174; SSE2-NEXT:    pextrd $3, %xmm5, %eax
1175; SSE2-NEXT:    pextrd $3, %xmm1, %ecx
1176; SSE2-NEXT:    cmpl %eax, %ecx
1177; SSE2-NEXT:    seta %al
1178; SSE2-NEXT:    sbbb $0, %al
1179; SSE2-NEXT:    movzbl %al, %eax
1180; SSE2-NEXT:    pinsrb $7, %eax, %xmm0
1181; SSE2-NEXT:    movd %xmm6, %eax
1182; SSE2-NEXT:    movd %xmm2, %ecx
1183; SSE2-NEXT:    cmpl %eax, %ecx
1184; SSE2-NEXT:    seta %al
1185; SSE2-NEXT:    sbbb $0, %al
1186; SSE2-NEXT:    movzbl %al, %eax
1187; SSE2-NEXT:    pinsrb $8, %eax, %xmm0
1188; SSE2-NEXT:    pextrd $1, %xmm6, %eax
1189; SSE2-NEXT:    pextrd $1, %xmm2, %ecx
1190; SSE2-NEXT:    cmpl %eax, %ecx
1191; SSE2-NEXT:    seta %al
1192; SSE2-NEXT:    sbbb $0, %al
1193; SSE2-NEXT:    movzbl %al, %eax
1194; SSE2-NEXT:    pinsrb $9, %eax, %xmm0
1195; SSE2-NEXT:    pextrd $2, %xmm6, %eax
1196; SSE2-NEXT:    pextrd $2, %xmm2, %ecx
1197; SSE2-NEXT:    cmpl %eax, %ecx
1198; SSE2-NEXT:    seta %al
1199; SSE2-NEXT:    sbbb $0, %al
1200; SSE2-NEXT:    movzbl %al, %eax
1201; SSE2-NEXT:    pinsrb $10, %eax, %xmm0
1202; SSE2-NEXT:    pextrd $3, %xmm6, %eax
1203; SSE2-NEXT:    pextrd $3, %xmm2, %ecx
1204; SSE2-NEXT:    cmpl %eax, %ecx
1205; SSE2-NEXT:    seta %al
1206; SSE2-NEXT:    sbbb $0, %al
1207; SSE2-NEXT:    movzbl %al, %eax
1208; SSE2-NEXT:    pinsrb $11, %eax, %xmm0
1209; SSE2-NEXT:    movd %xmm7, %eax
1210; SSE2-NEXT:    movd %xmm3, %ecx
1211; SSE2-NEXT:    cmpl %eax, %ecx
1212; SSE2-NEXT:    seta %al
1213; SSE2-NEXT:    sbbb $0, %al
1214; SSE2-NEXT:    movzbl %al, %eax
1215; SSE2-NEXT:    pinsrb $12, %eax, %xmm0
1216; SSE2-NEXT:    pextrd $1, %xmm7, %eax
1217; SSE2-NEXT:    pextrd $1, %xmm3, %ecx
1218; SSE2-NEXT:    cmpl %eax, %ecx
1219; SSE2-NEXT:    seta %al
1220; SSE2-NEXT:    sbbb $0, %al
1221; SSE2-NEXT:    movzbl %al, %eax
1222; SSE2-NEXT:    pinsrb $13, %eax, %xmm0
1223; SSE2-NEXT:    pextrd $2, %xmm7, %eax
1224; SSE2-NEXT:    pextrd $2, %xmm3, %ecx
1225; SSE2-NEXT:    cmpl %eax, %ecx
1226; SSE2-NEXT:    seta %al
1227; SSE2-NEXT:    sbbb $0, %al
1228; SSE2-NEXT:    movzbl %al, %eax
1229; SSE2-NEXT:    pinsrb $14, %eax, %xmm0
1230; SSE2-NEXT:    pextrd $3, %xmm7, %eax
1231; SSE2-NEXT:    pextrd $3, %xmm3, %ecx
1232; SSE2-NEXT:    cmpl %eax, %ecx
1233; SSE2-NEXT:    seta %al
1234; SSE2-NEXT:    sbbb $0, %al
1235; SSE2-NEXT:    movzbl %al, %eax
1236; SSE2-NEXT:    pinsrb $15, %eax, %xmm0
1237; SSE2-NEXT:    retq
1238;
1239; AVX2-LABEL: ucmp_wide_vec_op:
1240; AVX2:       # %bb.0:
1241; AVX2-NEXT:    vpextrd $1, %xmm2, %eax
1242; AVX2-NEXT:    vpextrd $1, %xmm0, %ecx
1243; AVX2-NEXT:    cmpl %eax, %ecx
1244; AVX2-NEXT:    seta %al
1245; AVX2-NEXT:    sbbb $0, %al
1246; AVX2-NEXT:    vmovd %xmm2, %ecx
1247; AVX2-NEXT:    vmovd %xmm0, %edx
1248; AVX2-NEXT:    cmpl %ecx, %edx
1249; AVX2-NEXT:    seta %cl
1250; AVX2-NEXT:    sbbb $0, %cl
1251; AVX2-NEXT:    vmovd %ecx, %xmm4
1252; AVX2-NEXT:    vpinsrb $1, %eax, %xmm4, %xmm4
1253; AVX2-NEXT:    vpextrd $2, %xmm2, %eax
1254; AVX2-NEXT:    vpextrd $2, %xmm0, %ecx
1255; AVX2-NEXT:    cmpl %eax, %ecx
1256; AVX2-NEXT:    seta %al
1257; AVX2-NEXT:    sbbb $0, %al
1258; AVX2-NEXT:    vpinsrb $2, %eax, %xmm4, %xmm4
1259; AVX2-NEXT:    vpextrd $3, %xmm2, %eax
1260; AVX2-NEXT:    vpextrd $3, %xmm0, %ecx
1261; AVX2-NEXT:    cmpl %eax, %ecx
1262; AVX2-NEXT:    seta %al
1263; AVX2-NEXT:    sbbb $0, %al
1264; AVX2-NEXT:    vpinsrb $3, %eax, %xmm4, %xmm4
1265; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm2
1266; AVX2-NEXT:    vmovd %xmm2, %eax
1267; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
1268; AVX2-NEXT:    vmovd %xmm0, %ecx
1269; AVX2-NEXT:    cmpl %eax, %ecx
1270; AVX2-NEXT:    seta %al
1271; AVX2-NEXT:    sbbb $0, %al
1272; AVX2-NEXT:    vpinsrb $4, %eax, %xmm4, %xmm4
1273; AVX2-NEXT:    vpextrd $1, %xmm2, %eax
1274; AVX2-NEXT:    vpextrd $1, %xmm0, %ecx
1275; AVX2-NEXT:    cmpl %eax, %ecx
1276; AVX2-NEXT:    seta %al
1277; AVX2-NEXT:    sbbb $0, %al
1278; AVX2-NEXT:    vpinsrb $5, %eax, %xmm4, %xmm4
1279; AVX2-NEXT:    vpextrd $2, %xmm2, %eax
1280; AVX2-NEXT:    vpextrd $2, %xmm0, %ecx
1281; AVX2-NEXT:    cmpl %eax, %ecx
1282; AVX2-NEXT:    seta %al
1283; AVX2-NEXT:    sbbb $0, %al
1284; AVX2-NEXT:    vpinsrb $6, %eax, %xmm4, %xmm4
1285; AVX2-NEXT:    vpextrd $3, %xmm2, %eax
1286; AVX2-NEXT:    vpextrd $3, %xmm0, %ecx
1287; AVX2-NEXT:    cmpl %eax, %ecx
1288; AVX2-NEXT:    seta %al
1289; AVX2-NEXT:    sbbb $0, %al
1290; AVX2-NEXT:    vpinsrb $7, %eax, %xmm4, %xmm0
1291; AVX2-NEXT:    vmovd %xmm3, %eax
1292; AVX2-NEXT:    vmovd %xmm1, %ecx
1293; AVX2-NEXT:    cmpl %eax, %ecx
1294; AVX2-NEXT:    seta %al
1295; AVX2-NEXT:    sbbb $0, %al
1296; AVX2-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0
1297; AVX2-NEXT:    vpextrd $1, %xmm3, %eax
1298; AVX2-NEXT:    vpextrd $1, %xmm1, %ecx
1299; AVX2-NEXT:    cmpl %eax, %ecx
1300; AVX2-NEXT:    seta %al
1301; AVX2-NEXT:    sbbb $0, %al
1302; AVX2-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0
1303; AVX2-NEXT:    vpextrd $2, %xmm3, %eax
1304; AVX2-NEXT:    vpextrd $2, %xmm1, %ecx
1305; AVX2-NEXT:    cmpl %eax, %ecx
1306; AVX2-NEXT:    seta %al
1307; AVX2-NEXT:    sbbb $0, %al
1308; AVX2-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0
1309; AVX2-NEXT:    vpextrd $3, %xmm3, %eax
1310; AVX2-NEXT:    vpextrd $3, %xmm1, %ecx
1311; AVX2-NEXT:    cmpl %eax, %ecx
1312; AVX2-NEXT:    seta %al
1313; AVX2-NEXT:    sbbb $0, %al
1314; AVX2-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0
1315; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm2
1316; AVX2-NEXT:    vmovd %xmm2, %eax
1317; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm1
1318; AVX2-NEXT:    vmovd %xmm1, %ecx
1319; AVX2-NEXT:    cmpl %eax, %ecx
1320; AVX2-NEXT:    seta %al
1321; AVX2-NEXT:    sbbb $0, %al
1322; AVX2-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0
1323; AVX2-NEXT:    vpextrd $1, %xmm2, %eax
1324; AVX2-NEXT:    vpextrd $1, %xmm1, %ecx
1325; AVX2-NEXT:    cmpl %eax, %ecx
1326; AVX2-NEXT:    seta %al
1327; AVX2-NEXT:    sbbb $0, %al
1328; AVX2-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0
1329; AVX2-NEXT:    vpextrd $2, %xmm2, %eax
1330; AVX2-NEXT:    vpextrd $2, %xmm1, %ecx
1331; AVX2-NEXT:    cmpl %eax, %ecx
1332; AVX2-NEXT:    seta %al
1333; AVX2-NEXT:    sbbb $0, %al
1334; AVX2-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
1335; AVX2-NEXT:    vpextrd $3, %xmm2, %eax
1336; AVX2-NEXT:    vpextrd $3, %xmm1, %ecx
1337; AVX2-NEXT:    cmpl %eax, %ecx
1338; AVX2-NEXT:    seta %al
1339; AVX2-NEXT:    sbbb $0, %al
1340; AVX2-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
1341; AVX2-NEXT:    vzeroupper
1342; AVX2-NEXT:    retq
1343;
1344; AVX512-LABEL: ucmp_wide_vec_op:
1345; AVX512:       # %bb.0:
1346; AVX512-NEXT:    vpcmpltud %zmm1, %zmm0, %k1
1347; AVX512-NEXT:    vpcmpnleud %zmm1, %zmm0, %k2
1348; AVX512-NEXT:    vmovdqu8 {{.*#+}} xmm0 {%k2} {z} = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1349; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1350; AVX512-NEXT:    vmovdqu8 %xmm1, %xmm0 {%k1}
1351; AVX512-NEXT:    vzeroupper
1352; AVX512-NEXT:    retq
1353;
1354; X86-LABEL: ucmp_wide_vec_op:
1355; X86:       # %bb.0:
1356; X86-NEXT:    pushl %ebp
1357; X86-NEXT:    pushl %ebx
1358; X86-NEXT:    pushl %edi
1359; X86-NEXT:    pushl %esi
1360; X86-NEXT:    subl $12, %esp
1361; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
1362; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
1363; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1364; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
1365; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
1366; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
1367; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %ebp
1368; X86-NEXT:    seta %al
1369; X86-NEXT:    sbbb $0, %al
1370; X86-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1371; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %ebx
1372; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
1373; X86-NEXT:    seta %al
1374; X86-NEXT:    sbbb $0, %al
1375; X86-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1376; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %ebx
1377; X86-NEXT:    seta %al
1378; X86-NEXT:    sbbb $0, %al
1379; X86-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1380; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %edx
1381; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
1382; X86-NEXT:    seta %al
1383; X86-NEXT:    sbbb $0, %al
1384; X86-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1385; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %edx
1386; X86-NEXT:    seta %al
1387; X86-NEXT:    sbbb $0, %al
1388; X86-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1389; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %ecx
1390; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1391; X86-NEXT:    seta %al
1392; X86-NEXT:    sbbb $0, %al
1393; X86-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1394; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %ecx
1395; X86-NEXT:    seta %al
1396; X86-NEXT:    sbbb $0, %al
1397; X86-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1398; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %edi
1399; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1400; X86-NEXT:    seta %al
1401; X86-NEXT:    sbbb $0, %al
1402; X86-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1403; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %ecx
1404; X86-NEXT:    seta %al
1405; X86-NEXT:    sbbb $0, %al
1406; X86-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1407; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %esi
1408; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1409; X86-NEXT:    seta %al
1410; X86-NEXT:    sbbb $0, %al
1411; X86-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
1412; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %ecx
1413; X86-NEXT:    seta %bh
1414; X86-NEXT:    sbbb $0, %bh
1415; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1416; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
1417; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1418; X86-NEXT:    seta %bl
1419; X86-NEXT:    sbbb $0, %bl
1420; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
1421; X86-NEXT:    seta %dh
1422; X86-NEXT:    sbbb $0, %dh
1423; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1424; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
1425; X86-NEXT:    seta %ch
1426; X86-NEXT:    sbbb $0, %ch
1427; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1428; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
1429; X86-NEXT:    seta %dl
1430; X86-NEXT:    sbbb $0, %dl
1431; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1432; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
1433; X86-NEXT:    seta %cl
1434; X86-NEXT:    sbbb $0, %cl
1435; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1436; X86-NEXT:    movb %cl, 15(%eax)
1437; X86-NEXT:    movb %dl, 14(%eax)
1438; X86-NEXT:    movb %ch, 13(%eax)
1439; X86-NEXT:    movb %dh, 12(%eax)
1440; X86-NEXT:    movb %bl, 11(%eax)
1441; X86-NEXT:    movb %bh, 10(%eax)
1442; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1443; X86-NEXT:    movb %cl, 9(%eax)
1444; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1445; X86-NEXT:    movb %cl, 8(%eax)
1446; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1447; X86-NEXT:    movb %cl, 7(%eax)
1448; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1449; X86-NEXT:    movb %cl, 6(%eax)
1450; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1451; X86-NEXT:    movb %cl, 5(%eax)
1452; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1453; X86-NEXT:    movb %cl, 4(%eax)
1454; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1455; X86-NEXT:    movb %cl, 3(%eax)
1456; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1457; X86-NEXT:    movb %cl, 2(%eax)
1458; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1459; X86-NEXT:    movb %cl, 1(%eax)
1460; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
1461; X86-NEXT:    movb %cl, (%eax)
1462; X86-NEXT:    addl $12, %esp
1463; X86-NEXT:    popl %esi
1464; X86-NEXT:    popl %edi
1465; X86-NEXT:    popl %ebx
1466; X86-NEXT:    popl %ebp
1467; X86-NEXT:    retl $4
1468  %1 = call <16 x i8> @llvm.ucmp(<16 x i32> %x, <16 x i32> %y)
1469  ret <16 x i8> %1
1470}
1471
1472define <17 x i2> @ucmp_uncommon_vectors(<17 x i71> %x, <17 x i71> %y) nounwind {
1473; SSE4-LABEL: ucmp_uncommon_vectors:
1474; SSE4:       # %bb.0:
1475; SSE4-NEXT:    pushq %rbp
1476; SSE4-NEXT:    pushq %r15
1477; SSE4-NEXT:    pushq %r14
1478; SSE4-NEXT:    pushq %r13
1479; SSE4-NEXT:    pushq %r12
1480; SSE4-NEXT:    pushq %rbx
1481; SSE4-NEXT:    subq $120, %rsp
1482; SSE4-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1483; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1484; SSE4-NEXT:    andl $127, %eax
1485; SSE4-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1486; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1487; SSE4-NEXT:    andl $127, %eax
1488; SSE4-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1489; SSE4-NEXT:    andl $127, %edx
1490; SSE4-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1491; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1492; SSE4-NEXT:    andl $127, %eax
1493; SSE4-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1494; SSE4-NEXT:    andl $127, %r8d
1495; SSE4-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1496; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1497; SSE4-NEXT:    andl $127, %eax
1498; SSE4-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1499; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1500; SSE4-NEXT:    andl $127, %eax
1501; SSE4-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1502; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1503; SSE4-NEXT:    andl $127, %eax
1504; SSE4-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1505; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1506; SSE4-NEXT:    andl $127, %eax
1507; SSE4-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1508; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1509; SSE4-NEXT:    andl $127, %eax
1510; SSE4-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1511; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1512; SSE4-NEXT:    andl $127, %eax
1513; SSE4-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1514; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1515; SSE4-NEXT:    andl $127, %eax
1516; SSE4-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1517; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1518; SSE4-NEXT:    andl $127, %eax
1519; SSE4-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1520; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1521; SSE4-NEXT:    andl $127, %eax
1522; SSE4-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1523; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1524; SSE4-NEXT:    andl $127, %eax
1525; SSE4-NEXT:    movq %rax, (%rsp) # 8-byte Spill
1526; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1527; SSE4-NEXT:    andl $127, %eax
1528; SSE4-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1529; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1530; SSE4-NEXT:    andl $127, %eax
1531; SSE4-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1532; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1533; SSE4-NEXT:    andl $127, %eax
1534; SSE4-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1535; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1536; SSE4-NEXT:    andl $127, %eax
1537; SSE4-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1538; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1539; SSE4-NEXT:    andl $127, %eax
1540; SSE4-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1541; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1542; SSE4-NEXT:    andl $127, %eax
1543; SSE4-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1544; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1545; SSE4-NEXT:    andl $127, %eax
1546; SSE4-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1547; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1548; SSE4-NEXT:    andl $127, %eax
1549; SSE4-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1550; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1551; SSE4-NEXT:    andl $127, %eax
1552; SSE4-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1553; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %r10
1554; SSE4-NEXT:    andl $127, %r10d
1555; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1556; SSE4-NEXT:    andl $127, %eax
1557; SSE4-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1558; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
1559; SSE4-NEXT:    andl $127, %ecx
1560; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %r8
1561; SSE4-NEXT:    andl $127, %r8d
1562; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rbx
1563; SSE4-NEXT:    andl $127, %ebx
1564; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
1565; SSE4-NEXT:    andl $127, %edx
1566; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %r13
1567; SSE4-NEXT:    andl $127, %r13d
1568; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %r11
1569; SSE4-NEXT:    andl $127, %r11d
1570; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %r14
1571; SSE4-NEXT:    andl $127, %r14d
1572; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %r12
1573; SSE4-NEXT:    andl $127, %r12d
1574; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1575; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rbp
1576; SSE4-NEXT:    cmpq %rax, %rbp
1577; SSE4-NEXT:    movq %r12, %r15
1578; SSE4-NEXT:    sbbq %r14, %r15
1579; SSE4-NEXT:    setb %r15b
1580; SSE4-NEXT:    cmpq %rbp, %rax
1581; SSE4-NEXT:    sbbq %r12, %r14
1582; SSE4-NEXT:    sbbb $0, %r15b
1583; SSE4-NEXT:    movb %r15b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1584; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1585; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %r14
1586; SSE4-NEXT:    cmpq %rax, %r14
1587; SSE4-NEXT:    movq %r11, %r15
1588; SSE4-NEXT:    sbbq %r13, %r15
1589; SSE4-NEXT:    setb %bpl
1590; SSE4-NEXT:    cmpq %r14, %rax
1591; SSE4-NEXT:    sbbq %r11, %r13
1592; SSE4-NEXT:    sbbb $0, %bpl
1593; SSE4-NEXT:    movb %bpl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1594; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1595; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %r11
1596; SSE4-NEXT:    cmpq %rax, %r11
1597; SSE4-NEXT:    movq %rdx, %r14
1598; SSE4-NEXT:    sbbq %rbx, %r14
1599; SSE4-NEXT:    setb %bpl
1600; SSE4-NEXT:    cmpq %r11, %rax
1601; SSE4-NEXT:    sbbq %rdx, %rbx
1602; SSE4-NEXT:    sbbb $0, %bpl
1603; SSE4-NEXT:    movb %bpl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1604; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1605; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
1606; SSE4-NEXT:    cmpq %rax, %rdx
1607; SSE4-NEXT:    movq %r8, %r11
1608; SSE4-NEXT:    sbbq %rcx, %r11
1609; SSE4-NEXT:    setb %r11b
1610; SSE4-NEXT:    cmpq %rdx, %rax
1611; SSE4-NEXT:    sbbq %r8, %rcx
1612; SSE4-NEXT:    sbbb $0, %r11b
1613; SSE4-NEXT:    movb %r11b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1614; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1615; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
1616; SSE4-NEXT:    cmpq %rax, %rcx
1617; SSE4-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
1618; SSE4-NEXT:    movq %r8, %rdx
1619; SSE4-NEXT:    sbbq %r10, %rdx
1620; SSE4-NEXT:    setb %dl
1621; SSE4-NEXT:    cmpq %rcx, %rax
1622; SSE4-NEXT:    sbbq %r8, %r10
1623; SSE4-NEXT:    sbbb $0, %dl
1624; SSE4-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1625; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1626; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
1627; SSE4-NEXT:    cmpq %rax, %rcx
1628; SSE4-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
1629; SSE4-NEXT:    movq %r11, %rdx
1630; SSE4-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
1631; SSE4-NEXT:    sbbq %r8, %rdx
1632; SSE4-NEXT:    setb %r10b
1633; SSE4-NEXT:    cmpq %rcx, %rax
1634; SSE4-NEXT:    sbbq %r11, %r8
1635; SSE4-NEXT:    sbbb $0, %r10b
1636; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1637; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
1638; SSE4-NEXT:    cmpq %rax, %rcx
1639; SSE4-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
1640; SSE4-NEXT:    movq %r11, %rdx
1641; SSE4-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
1642; SSE4-NEXT:    sbbq %r8, %rdx
1643; SSE4-NEXT:    setb %dl
1644; SSE4-NEXT:    cmpq %rcx, %rax
1645; SSE4-NEXT:    sbbq %r11, %r8
1646; SSE4-NEXT:    sbbb $0, %dl
1647; SSE4-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1648; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1649; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
1650; SSE4-NEXT:    cmpq %rax, %rcx
1651; SSE4-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
1652; SSE4-NEXT:    movq %r11, %rdx
1653; SSE4-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
1654; SSE4-NEXT:    sbbq %r8, %rdx
1655; SSE4-NEXT:    setb %bpl
1656; SSE4-NEXT:    cmpq %rcx, %rax
1657; SSE4-NEXT:    sbbq %r11, %r8
1658; SSE4-NEXT:    sbbb $0, %bpl
1659; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1660; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
1661; SSE4-NEXT:    cmpq %rax, %rcx
1662; SSE4-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
1663; SSE4-NEXT:    movq %r11, %rdx
1664; SSE4-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
1665; SSE4-NEXT:    sbbq %r8, %rdx
1666; SSE4-NEXT:    setb %dl
1667; SSE4-NEXT:    cmpq %rcx, %rax
1668; SSE4-NEXT:    sbbq %r11, %r8
1669; SSE4-NEXT:    sbbb $0, %dl
1670; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1671; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
1672; SSE4-NEXT:    cmpq %rax, %rcx
1673; SSE4-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
1674; SSE4-NEXT:    movq %r14, %r8
1675; SSE4-NEXT:    movq (%rsp), %rbx # 8-byte Reload
1676; SSE4-NEXT:    sbbq %rbx, %r8
1677; SSE4-NEXT:    setb %r11b
1678; SSE4-NEXT:    cmpq %rcx, %rax
1679; SSE4-NEXT:    sbbq %r14, %rbx
1680; SSE4-NEXT:    sbbb $0, %r11b
1681; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1682; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
1683; SSE4-NEXT:    cmpq %rax, %rcx
1684; SSE4-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
1685; SSE4-NEXT:    movq %r14, %rbx
1686; SSE4-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
1687; SSE4-NEXT:    sbbq %r8, %rbx
1688; SSE4-NEXT:    setb %bl
1689; SSE4-NEXT:    cmpq %rcx, %rax
1690; SSE4-NEXT:    sbbq %r14, %r8
1691; SSE4-NEXT:    sbbb $0, %bl
1692; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1693; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %r14
1694; SSE4-NEXT:    cmpq %rax, %r14
1695; SSE4-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
1696; SSE4-NEXT:    movq %r15, %rcx
1697; SSE4-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
1698; SSE4-NEXT:    sbbq %r8, %rcx
1699; SSE4-NEXT:    setb %cl
1700; SSE4-NEXT:    cmpq %r14, %rax
1701; SSE4-NEXT:    sbbq %r15, %r8
1702; SSE4-NEXT:    sbbb $0, %cl
1703; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1704; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %r15
1705; SSE4-NEXT:    cmpq %rax, %r15
1706; SSE4-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
1707; SSE4-NEXT:    movq %r12, %r14
1708; SSE4-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
1709; SSE4-NEXT:    sbbq %r8, %r14
1710; SSE4-NEXT:    setb %r14b
1711; SSE4-NEXT:    cmpq %r15, %rax
1712; SSE4-NEXT:    sbbq %r12, %r8
1713; SSE4-NEXT:    sbbb $0, %r14b
1714; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1715; SSE4-NEXT:    cmpq %r9, %rax
1716; SSE4-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
1717; SSE4-NEXT:    movq %r12, %r15
1718; SSE4-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
1719; SSE4-NEXT:    sbbq %r8, %r15
1720; SSE4-NEXT:    setb %r15b
1721; SSE4-NEXT:    cmpq %rax, %r9
1722; SSE4-NEXT:    sbbq %r12, %r8
1723; SSE4-NEXT:    sbbb $0, %r15b
1724; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1725; SSE4-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
1726; SSE4-NEXT:    cmpq %r12, %rax
1727; SSE4-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
1728; SSE4-NEXT:    movq %r13, %r9
1729; SSE4-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
1730; SSE4-NEXT:    sbbq %r8, %r9
1731; SSE4-NEXT:    setb %r9b
1732; SSE4-NEXT:    cmpq %rax, %r12
1733; SSE4-NEXT:    sbbq %r13, %r8
1734; SSE4-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1735; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %r12
1736; SSE4-NEXT:    sbbb $0, %r9b
1737; SSE4-NEXT:    cmpq %rsi, %r12
1738; SSE4-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
1739; SSE4-NEXT:    movq %r8, %rdi
1740; SSE4-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
1741; SSE4-NEXT:    sbbq %rax, %rdi
1742; SSE4-NEXT:    setb %dil
1743; SSE4-NEXT:    cmpq %r12, %rsi
1744; SSE4-NEXT:    sbbq %r8, %rax
1745; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %r12
1746; SSE4-NEXT:    movq {{[0-9]+}}(%rsp), %r13
1747; SSE4-NEXT:    sbbb $0, %dil
1748; SSE4-NEXT:    cmpq %r12, %r13
1749; SSE4-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
1750; SSE4-NEXT:    movq %r8, %rsi
1751; SSE4-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
1752; SSE4-NEXT:    sbbq %rax, %rsi
1753; SSE4-NEXT:    setb %sil
1754; SSE4-NEXT:    cmpq %r13, %r12
1755; SSE4-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r12d # 1-byte Folded Reload
1756; SSE4-NEXT:    movd %r12d, %xmm1
1757; SSE4-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r12d # 1-byte Folded Reload
1758; SSE4-NEXT:    movd %r12d, %xmm2
1759; SSE4-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r12d # 1-byte Folded Reload
1760; SSE4-NEXT:    movd %r12d, %xmm3
1761; SSE4-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r12d # 1-byte Folded Reload
1762; SSE4-NEXT:    movd %r12d, %xmm4
1763; SSE4-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r12d # 1-byte Folded Reload
1764; SSE4-NEXT:    movd %r12d, %xmm5
1765; SSE4-NEXT:    movzbl %r10b, %r10d
1766; SSE4-NEXT:    movd %r10d, %xmm6
1767; SSE4-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r10d # 1-byte Folded Reload
1768; SSE4-NEXT:    movd %r10d, %xmm7
1769; SSE4-NEXT:    movzbl %bpl, %r10d
1770; SSE4-NEXT:    movd %r10d, %xmm0
1771; SSE4-NEXT:    movzbl %dl, %edx
1772; SSE4-NEXT:    movd %edx, %xmm8
1773; SSE4-NEXT:    movzbl %r11b, %edx
1774; SSE4-NEXT:    movd %edx, %xmm9
1775; SSE4-NEXT:    movzbl %bl, %edx
1776; SSE4-NEXT:    movd %edx, %xmm10
1777; SSE4-NEXT:    movzbl %cl, %ecx
1778; SSE4-NEXT:    movd %ecx, %xmm11
1779; SSE4-NEXT:    movzbl %r14b, %ecx
1780; SSE4-NEXT:    movd %ecx, %xmm12
1781; SSE4-NEXT:    movzbl %r15b, %ecx
1782; SSE4-NEXT:    movd %ecx, %xmm13
1783; SSE4-NEXT:    movzbl %r9b, %ecx
1784; SSE4-NEXT:    movd %ecx, %xmm14
1785; SSE4-NEXT:    movzbl %dil, %ecx
1786; SSE4-NEXT:    movd %ecx, %xmm15
1787; SSE4-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
1788; SSE4-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
1789; SSE4-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
1790; SSE4-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
1791; SSE4-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3],xmm0[4],xmm7[4],xmm0[5],xmm7[5],xmm0[6],xmm7[6],xmm0[7],xmm7[7]
1792; SSE4-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3]
1793; SSE4-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
1794; SSE4-NEXT:    punpcklbw {{.*#+}} xmm9 = xmm9[0],xmm8[0],xmm9[1],xmm8[1],xmm9[2],xmm8[2],xmm9[3],xmm8[3],xmm9[4],xmm8[4],xmm9[5],xmm8[5],xmm9[6],xmm8[6],xmm9[7],xmm8[7]
1795; SSE4-NEXT:    punpcklbw {{.*#+}} xmm11 = xmm11[0],xmm10[0],xmm11[1],xmm10[1],xmm11[2],xmm10[2],xmm11[3],xmm10[3],xmm11[4],xmm10[4],xmm11[5],xmm10[5],xmm11[6],xmm10[6],xmm11[7],xmm10[7]
1796; SSE4-NEXT:    punpcklwd {{.*#+}} xmm11 = xmm11[0],xmm9[0],xmm11[1],xmm9[1],xmm11[2],xmm9[2],xmm11[3],xmm9[3]
1797; SSE4-NEXT:    punpcklbw {{.*#+}} xmm13 = xmm13[0],xmm12[0],xmm13[1],xmm12[1],xmm13[2],xmm12[2],xmm13[3],xmm12[3],xmm13[4],xmm12[4],xmm13[5],xmm12[5],xmm13[6],xmm12[6],xmm13[7],xmm12[7]
1798; SSE4-NEXT:    punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm14[0],xmm15[1],xmm14[1],xmm15[2],xmm14[2],xmm15[3],xmm14[3],xmm15[4],xmm14[4],xmm15[5],xmm14[5],xmm15[6],xmm14[6],xmm15[7],xmm14[7]
1799; SSE4-NEXT:    punpcklwd {{.*#+}} xmm15 = xmm15[0],xmm13[0],xmm15[1],xmm13[1],xmm15[2],xmm13[2],xmm15[3],xmm13[3]
1800; SSE4-NEXT:    punpckldq {{.*#+}} xmm15 = xmm15[0],xmm11[0],xmm15[1],xmm11[1]
1801; SSE4-NEXT:    sbbq %r8, %rax
1802; SSE4-NEXT:    sbbb $0, %sil
1803; SSE4-NEXT:    punpcklqdq {{.*#+}} xmm15 = xmm15[0],xmm0[0]
1804; SSE4-NEXT:    movzbl %sil, %ecx
1805; SSE4-NEXT:    andl $3, %ecx
1806; SSE4-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
1807; SSE4-NEXT:    movb %cl, 4(%rax)
1808; SSE4-NEXT:    movdqa %xmm15, -{{[0-9]+}}(%rsp)
1809; SSE4-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
1810; SSE4-NEXT:    andl $3, %ecx
1811; SSE4-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edx
1812; SSE4-NEXT:    andl $3, %edx
1813; SSE4-NEXT:    leaq (%rdx,%rcx,4), %rcx
1814; SSE4-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edx
1815; SSE4-NEXT:    andl $3, %edx
1816; SSE4-NEXT:    shll $4, %edx
1817; SSE4-NEXT:    orq %rcx, %rdx
1818; SSE4-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
1819; SSE4-NEXT:    andl $3, %ecx
1820; SSE4-NEXT:    shll $6, %ecx
1821; SSE4-NEXT:    orq %rdx, %rcx
1822; SSE4-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edx
1823; SSE4-NEXT:    andl $3, %edx
1824; SSE4-NEXT:    shll $8, %edx
1825; SSE4-NEXT:    orq %rcx, %rdx
1826; SSE4-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
1827; SSE4-NEXT:    andl $3, %ecx
1828; SSE4-NEXT:    shll $10, %ecx
1829; SSE4-NEXT:    movzbl -{{[0-9]+}}(%rsp), %esi
1830; SSE4-NEXT:    andl $3, %esi
1831; SSE4-NEXT:    shll $12, %esi
1832; SSE4-NEXT:    orq %rcx, %rsi
1833; SSE4-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edi
1834; SSE4-NEXT:    andl $3, %edi
1835; SSE4-NEXT:    shll $14, %edi
1836; SSE4-NEXT:    orq %rsi, %rdi
1837; SSE4-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
1838; SSE4-NEXT:    andl $3, %ecx
1839; SSE4-NEXT:    shll $16, %ecx
1840; SSE4-NEXT:    orq %rdi, %rcx
1841; SSE4-NEXT:    orq %rdx, %rcx
1842; SSE4-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edx
1843; SSE4-NEXT:    andl $3, %edx
1844; SSE4-NEXT:    shll $18, %edx
1845; SSE4-NEXT:    movzbl -{{[0-9]+}}(%rsp), %esi
1846; SSE4-NEXT:    andl $3, %esi
1847; SSE4-NEXT:    shll $20, %esi
1848; SSE4-NEXT:    orq %rdx, %rsi
1849; SSE4-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edx
1850; SSE4-NEXT:    andl $3, %edx
1851; SSE4-NEXT:    shll $22, %edx
1852; SSE4-NEXT:    orq %rsi, %rdx
1853; SSE4-NEXT:    movzbl -{{[0-9]+}}(%rsp), %esi
1854; SSE4-NEXT:    andl $3, %esi
1855; SSE4-NEXT:    shll $24, %esi
1856; SSE4-NEXT:    orq %rdx, %rsi
1857; SSE4-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edx
1858; SSE4-NEXT:    andl $3, %edx
1859; SSE4-NEXT:    shlq $26, %rdx
1860; SSE4-NEXT:    orq %rsi, %rdx
1861; SSE4-NEXT:    orq %rcx, %rdx
1862; SSE4-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
1863; SSE4-NEXT:    andl $3, %ecx
1864; SSE4-NEXT:    shlq $28, %rcx
1865; SSE4-NEXT:    movzbl -{{[0-9]+}}(%rsp), %esi
1866; SSE4-NEXT:    andl $3, %esi
1867; SSE4-NEXT:    shlq $30, %rsi
1868; SSE4-NEXT:    orq %rcx, %rsi
1869; SSE4-NEXT:    orq %rdx, %rsi
1870; SSE4-NEXT:    movl %esi, (%rax)
1871; SSE4-NEXT:    addq $120, %rsp
1872; SSE4-NEXT:    popq %rbx
1873; SSE4-NEXT:    popq %r12
1874; SSE4-NEXT:    popq %r13
1875; SSE4-NEXT:    popq %r14
1876; SSE4-NEXT:    popq %r15
1877; SSE4-NEXT:    popq %rbp
1878; SSE4-NEXT:    retq
1879;
1880; SSE2-LABEL: ucmp_uncommon_vectors:
1881; SSE2:       # %bb.0:
1882; SSE2-NEXT:    pushq %rbp
1883; SSE2-NEXT:    pushq %r15
1884; SSE2-NEXT:    pushq %r14
1885; SSE2-NEXT:    pushq %r13
1886; SSE2-NEXT:    pushq %r12
1887; SSE2-NEXT:    pushq %rbx
1888; SSE2-NEXT:    subq $88, %rsp
1889; SSE2-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1890; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1891; SSE2-NEXT:    andl $127, %eax
1892; SSE2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1893; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1894; SSE2-NEXT:    andl $127, %eax
1895; SSE2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1896; SSE2-NEXT:    andl $127, %r8d
1897; SSE2-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1898; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1899; SSE2-NEXT:    andl $127, %eax
1900; SSE2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1901; SSE2-NEXT:    andl $127, %edx
1902; SSE2-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1903; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1904; SSE2-NEXT:    andl $127, %eax
1905; SSE2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1906; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1907; SSE2-NEXT:    andl $127, %eax
1908; SSE2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1909; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1910; SSE2-NEXT:    andl $127, %eax
1911; SSE2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1912; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1913; SSE2-NEXT:    andl $127, %eax
1914; SSE2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1915; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1916; SSE2-NEXT:    andl $127, %eax
1917; SSE2-NEXT:    movq %rax, (%rsp) # 8-byte Spill
1918; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1919; SSE2-NEXT:    andl $127, %eax
1920; SSE2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1921; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1922; SSE2-NEXT:    andl $127, %eax
1923; SSE2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1924; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1925; SSE2-NEXT:    andl $127, %eax
1926; SSE2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1927; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1928; SSE2-NEXT:    andl $127, %eax
1929; SSE2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1930; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1931; SSE2-NEXT:    andl $127, %eax
1932; SSE2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1933; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1934; SSE2-NEXT:    andl $127, %eax
1935; SSE2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1936; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1937; SSE2-NEXT:    andl $127, %eax
1938; SSE2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1939; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1940; SSE2-NEXT:    andl $127, %eax
1941; SSE2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1942; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1943; SSE2-NEXT:    andl $127, %eax
1944; SSE2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1945; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1946; SSE2-NEXT:    andl $127, %eax
1947; SSE2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1948; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1949; SSE2-NEXT:    andl $127, %eax
1950; SSE2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1951; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1952; SSE2-NEXT:    andl $127, %eax
1953; SSE2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1954; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1955; SSE2-NEXT:    andl $127, %eax
1956; SSE2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1957; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1958; SSE2-NEXT:    andl $127, %eax
1959; SSE2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1960; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
1961; SSE2-NEXT:    andl $127, %ecx
1962; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1963; SSE2-NEXT:    andl $127, %eax
1964; SSE2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1965; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rbx
1966; SSE2-NEXT:    andl $127, %ebx
1967; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
1968; SSE2-NEXT:    andl $127, %edx
1969; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %r10
1970; SSE2-NEXT:    andl $127, %r10d
1971; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %r14
1972; SSE2-NEXT:    andl $127, %r14d
1973; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rbp
1974; SSE2-NEXT:    andl $127, %ebp
1975; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %r13
1976; SSE2-NEXT:    andl $127, %r13d
1977; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %r11
1978; SSE2-NEXT:    andl $127, %r11d
1979; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %r15
1980; SSE2-NEXT:    andl $127, %r15d
1981; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1982; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %r12
1983; SSE2-NEXT:    cmpq %rax, %r12
1984; SSE2-NEXT:    movq %r15, %r8
1985; SSE2-NEXT:    sbbq %r11, %r8
1986; SSE2-NEXT:    setb %r8b
1987; SSE2-NEXT:    cmpq %r12, %rax
1988; SSE2-NEXT:    sbbq %r15, %r11
1989; SSE2-NEXT:    sbbb $0, %r8b
1990; SSE2-NEXT:    movb %r8b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1991; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
1992; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %r8
1993; SSE2-NEXT:    cmpq %rax, %r8
1994; SSE2-NEXT:    movq %r13, %r11
1995; SSE2-NEXT:    sbbq %rbp, %r11
1996; SSE2-NEXT:    setb %r11b
1997; SSE2-NEXT:    cmpq %r8, %rax
1998; SSE2-NEXT:    sbbq %r13, %rbp
1999; SSE2-NEXT:    sbbb $0, %r11b
2000; SSE2-NEXT:    movb %r11b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
2001; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2002; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %r8
2003; SSE2-NEXT:    cmpq %rax, %r8
2004; SSE2-NEXT:    movq %r14, %r11
2005; SSE2-NEXT:    sbbq %r10, %r11
2006; SSE2-NEXT:    setb %r11b
2007; SSE2-NEXT:    cmpq %r8, %rax
2008; SSE2-NEXT:    sbbq %r14, %r10
2009; SSE2-NEXT:    sbbb $0, %r11b
2010; SSE2-NEXT:    movb %r11b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
2011; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2012; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %r8
2013; SSE2-NEXT:    cmpq %rax, %r8
2014; SSE2-NEXT:    movq %rdx, %r10
2015; SSE2-NEXT:    sbbq %rbx, %r10
2016; SSE2-NEXT:    setb %r10b
2017; SSE2-NEXT:    cmpq %r8, %rax
2018; SSE2-NEXT:    sbbq %rdx, %rbx
2019; SSE2-NEXT:    sbbb $0, %r10b
2020; SSE2-NEXT:    movb %r10b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
2021; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2022; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
2023; SSE2-NEXT:    cmpq %rax, %rdx
2024; SSE2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
2025; SSE2-NEXT:    movq %r10, %r8
2026; SSE2-NEXT:    sbbq %rcx, %r8
2027; SSE2-NEXT:    setb %r8b
2028; SSE2-NEXT:    cmpq %rdx, %rax
2029; SSE2-NEXT:    sbbq %r10, %rcx
2030; SSE2-NEXT:    sbbb $0, %r8b
2031; SSE2-NEXT:    movb %r8b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
2032; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2033; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
2034; SSE2-NEXT:    cmpq %rax, %rcx
2035; SSE2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
2036; SSE2-NEXT:    movq %r10, %rdx
2037; SSE2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
2038; SSE2-NEXT:    sbbq %r8, %rdx
2039; SSE2-NEXT:    setb %dl
2040; SSE2-NEXT:    cmpq %rcx, %rax
2041; SSE2-NEXT:    sbbq %r10, %r8
2042; SSE2-NEXT:    sbbb $0, %dl
2043; SSE2-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
2044; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2045; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
2046; SSE2-NEXT:    cmpq %rax, %rcx
2047; SSE2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
2048; SSE2-NEXT:    movq %r10, %rdx
2049; SSE2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
2050; SSE2-NEXT:    sbbq %r8, %rdx
2051; SSE2-NEXT:    setb %dl
2052; SSE2-NEXT:    cmpq %rcx, %rax
2053; SSE2-NEXT:    sbbq %r10, %r8
2054; SSE2-NEXT:    sbbb $0, %dl
2055; SSE2-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
2056; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2057; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
2058; SSE2-NEXT:    cmpq %rax, %rcx
2059; SSE2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
2060; SSE2-NEXT:    movq %r11, %rdx
2061; SSE2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
2062; SSE2-NEXT:    sbbq %r10, %rdx
2063; SSE2-NEXT:    setb %r8b
2064; SSE2-NEXT:    cmpq %rcx, %rax
2065; SSE2-NEXT:    sbbq %r11, %r10
2066; SSE2-NEXT:    sbbb $0, %r8b
2067; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2068; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
2069; SSE2-NEXT:    cmpq %rax, %rcx
2070; SSE2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
2071; SSE2-NEXT:    movq %rbx, %rdx
2072; SSE2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
2073; SSE2-NEXT:    sbbq %r10, %rdx
2074; SSE2-NEXT:    setb %r11b
2075; SSE2-NEXT:    cmpq %rcx, %rax
2076; SSE2-NEXT:    sbbq %rbx, %r10
2077; SSE2-NEXT:    sbbb $0, %r11b
2078; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2079; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
2080; SSE2-NEXT:    cmpq %rax, %rcx
2081; SSE2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
2082; SSE2-NEXT:    movq %rbx, %rdx
2083; SSE2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
2084; SSE2-NEXT:    sbbq %r10, %rdx
2085; SSE2-NEXT:    setb %dl
2086; SSE2-NEXT:    cmpq %rcx, %rax
2087; SSE2-NEXT:    sbbq %rbx, %r10
2088; SSE2-NEXT:    sbbb $0, %dl
2089; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2090; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
2091; SSE2-NEXT:    cmpq %rax, %rcx
2092; SSE2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
2093; SSE2-NEXT:    movq %r14, %r10
2094; SSE2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
2095; SSE2-NEXT:    sbbq %rbx, %r10
2096; SSE2-NEXT:    setb %r10b
2097; SSE2-NEXT:    cmpq %rcx, %rax
2098; SSE2-NEXT:    sbbq %r14, %rbx
2099; SSE2-NEXT:    sbbb $0, %r10b
2100; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2101; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rbx
2102; SSE2-NEXT:    cmpq %rax, %rbx
2103; SSE2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
2104; SSE2-NEXT:    movq %r15, %rcx
2105; SSE2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
2106; SSE2-NEXT:    sbbq %r14, %rcx
2107; SSE2-NEXT:    setb %cl
2108; SSE2-NEXT:    cmpq %rbx, %rax
2109; SSE2-NEXT:    sbbq %r15, %r14
2110; SSE2-NEXT:    sbbb $0, %cl
2111; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2112; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %r14
2113; SSE2-NEXT:    cmpq %rax, %r14
2114; SSE2-NEXT:    movq (%rsp), %r12 # 8-byte Reload
2115; SSE2-NEXT:    movq %r12, %rbx
2116; SSE2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
2117; SSE2-NEXT:    sbbq %r15, %rbx
2118; SSE2-NEXT:    setb %bl
2119; SSE2-NEXT:    cmpq %r14, %rax
2120; SSE2-NEXT:    sbbq %r12, %r15
2121; SSE2-NEXT:    sbbb $0, %bl
2122; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2123; SSE2-NEXT:    cmpq %r9, %rax
2124; SSE2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
2125; SSE2-NEXT:    movq %r12, %r14
2126; SSE2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
2127; SSE2-NEXT:    sbbq %r15, %r14
2128; SSE2-NEXT:    setb %bpl
2129; SSE2-NEXT:    cmpq %rax, %r9
2130; SSE2-NEXT:    sbbq %r12, %r15
2131; SSE2-NEXT:    sbbb $0, %bpl
2132; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2133; SSE2-NEXT:    cmpq %rsi, %rax
2134; SSE2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
2135; SSE2-NEXT:    movq %r15, %r9
2136; SSE2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
2137; SSE2-NEXT:    sbbq %r14, %r9
2138; SSE2-NEXT:    setb %r9b
2139; SSE2-NEXT:    cmpq %rax, %rsi
2140; SSE2-NEXT:    sbbq %r15, %r14
2141; SSE2-NEXT:    movq %rdi, %rax
2142; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
2143; SSE2-NEXT:    sbbb $0, %r9b
2144; SSE2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
2145; SSE2-NEXT:    cmpq %r15, %rsi
2146; SSE2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
2147; SSE2-NEXT:    movq %r12, %rdi
2148; SSE2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
2149; SSE2-NEXT:    sbbq %r14, %rdi
2150; SSE2-NEXT:    setb %dil
2151; SSE2-NEXT:    cmpq %rsi, %r15
2152; SSE2-NEXT:    sbbq %r12, %r14
2153; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
2154; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %r14
2155; SSE2-NEXT:    sbbb $0, %dil
2156; SSE2-NEXT:    cmpq %rsi, %r14
2157; SSE2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
2158; SSE2-NEXT:    movq %r13, %r15
2159; SSE2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
2160; SSE2-NEXT:    sbbq %r12, %r15
2161; SSE2-NEXT:    setb %r15b
2162; SSE2-NEXT:    cmpq %r14, %rsi
2163; SSE2-NEXT:    sbbq %r13, %r12
2164; SSE2-NEXT:    sbbb $0, %r15b
2165; SSE2-NEXT:    movzbl %r15b, %esi
2166; SSE2-NEXT:    andl $3, %esi
2167; SSE2-NEXT:    movb %sil, 4(%rax)
2168; SSE2-NEXT:    movzbl %dil, %esi
2169; SSE2-NEXT:    movzbl %r9b, %edi
2170; SSE2-NEXT:    andl $3, %esi
2171; SSE2-NEXT:    andl $3, %edi
2172; SSE2-NEXT:    leaq (%rdi,%rsi,4), %rsi
2173; SSE2-NEXT:    movzbl %bpl, %edi
2174; SSE2-NEXT:    andl $3, %edi
2175; SSE2-NEXT:    shll $4, %edi
2176; SSE2-NEXT:    orq %rsi, %rdi
2177; SSE2-NEXT:    movzbl %bl, %r9d
2178; SSE2-NEXT:    andl $3, %r9d
2179; SSE2-NEXT:    shll $6, %r9d
2180; SSE2-NEXT:    orq %rdi, %r9
2181; SSE2-NEXT:    movzbl %cl, %esi
2182; SSE2-NEXT:    andl $3, %esi
2183; SSE2-NEXT:    shll $8, %esi
2184; SSE2-NEXT:    orq %r9, %rsi
2185; SSE2-NEXT:    movzbl %dl, %ecx
2186; SSE2-NEXT:    movzbl %r10b, %edx
2187; SSE2-NEXT:    andl $3, %edx
2188; SSE2-NEXT:    shll $10, %edx
2189; SSE2-NEXT:    andl $3, %ecx
2190; SSE2-NEXT:    shll $12, %ecx
2191; SSE2-NEXT:    orq %rdx, %rcx
2192; SSE2-NEXT:    movzbl %r11b, %edx
2193; SSE2-NEXT:    andl $3, %edx
2194; SSE2-NEXT:    shll $14, %edx
2195; SSE2-NEXT:    orq %rcx, %rdx
2196; SSE2-NEXT:    movzbl %r8b, %ecx
2197; SSE2-NEXT:    andl $3, %ecx
2198; SSE2-NEXT:    shll $16, %ecx
2199; SSE2-NEXT:    orq %rdx, %rcx
2200; SSE2-NEXT:    orq %rsi, %rcx
2201; SSE2-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 1-byte Folded Reload
2202; SSE2-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 1-byte Folded Reload
2203; SSE2-NEXT:    andl $3, %esi
2204; SSE2-NEXT:    shll $18, %esi
2205; SSE2-NEXT:    andl $3, %edx
2206; SSE2-NEXT:    shll $20, %edx
2207; SSE2-NEXT:    orq %rsi, %rdx
2208; SSE2-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 1-byte Folded Reload
2209; SSE2-NEXT:    andl $3, %esi
2210; SSE2-NEXT:    shll $22, %esi
2211; SSE2-NEXT:    orq %rdx, %rsi
2212; SSE2-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 1-byte Folded Reload
2213; SSE2-NEXT:    andl $3, %edx
2214; SSE2-NEXT:    shll $24, %edx
2215; SSE2-NEXT:    orq %rsi, %rdx
2216; SSE2-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 1-byte Folded Reload
2217; SSE2-NEXT:    andl $3, %esi
2218; SSE2-NEXT:    shlq $26, %rsi
2219; SSE2-NEXT:    orq %rdx, %rsi
2220; SSE2-NEXT:    orq %rcx, %rsi
2221; SSE2-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload
2222; SSE2-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 1-byte Folded Reload
2223; SSE2-NEXT:    andl $3, %edx
2224; SSE2-NEXT:    shlq $28, %rdx
2225; SSE2-NEXT:    andl $3, %ecx
2226; SSE2-NEXT:    shlq $30, %rcx
2227; SSE2-NEXT:    orq %rdx, %rcx
2228; SSE2-NEXT:    orq %rsi, %rcx
2229; SSE2-NEXT:    movl %ecx, (%rax)
2230; SSE2-NEXT:    addq $88, %rsp
2231; SSE2-NEXT:    popq %rbx
2232; SSE2-NEXT:    popq %r12
2233; SSE2-NEXT:    popq %r13
2234; SSE2-NEXT:    popq %r14
2235; SSE2-NEXT:    popq %r15
2236; SSE2-NEXT:    popq %rbp
2237; SSE2-NEXT:    retq
2238;
2239; AVX2-LABEL: ucmp_uncommon_vectors:
2240; AVX2:       # %bb.0:
2241; AVX2-NEXT:    pushq %rbp
2242; AVX2-NEXT:    pushq %r15
2243; AVX2-NEXT:    pushq %r14
2244; AVX2-NEXT:    pushq %r13
2245; AVX2-NEXT:    pushq %r12
2246; AVX2-NEXT:    pushq %rbx
2247; AVX2-NEXT:    subq $88, %rsp
2248; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2249; AVX2-NEXT:    andl $127, %eax
2250; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2251; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2252; AVX2-NEXT:    andl $127, %eax
2253; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2254; AVX2-NEXT:    andl $127, %r8d
2255; AVX2-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2256; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2257; AVX2-NEXT:    andl $127, %eax
2258; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2259; AVX2-NEXT:    andl $127, %edx
2260; AVX2-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2261; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2262; AVX2-NEXT:    andl $127, %eax
2263; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2264; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2265; AVX2-NEXT:    andl $127, %eax
2266; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2267; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2268; AVX2-NEXT:    andl $127, %eax
2269; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2270; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2271; AVX2-NEXT:    andl $127, %eax
2272; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2273; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2274; AVX2-NEXT:    andl $127, %eax
2275; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2276; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2277; AVX2-NEXT:    andl $127, %eax
2278; AVX2-NEXT:    movq %rax, (%rsp) # 8-byte Spill
2279; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2280; AVX2-NEXT:    andl $127, %eax
2281; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2282; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2283; AVX2-NEXT:    andl $127, %eax
2284; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2285; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2286; AVX2-NEXT:    andl $127, %eax
2287; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2288; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2289; AVX2-NEXT:    andl $127, %eax
2290; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2291; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2292; AVX2-NEXT:    andl $127, %eax
2293; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2294; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2295; AVX2-NEXT:    andl $127, %eax
2296; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2297; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2298; AVX2-NEXT:    andl $127, %eax
2299; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2300; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2301; AVX2-NEXT:    andl $127, %eax
2302; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2303; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2304; AVX2-NEXT:    andl $127, %eax
2305; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2306; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2307; AVX2-NEXT:    andl $127, %eax
2308; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2309; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2310; AVX2-NEXT:    andl $127, %eax
2311; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2312; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2313; AVX2-NEXT:    andl $127, %eax
2314; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2315; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2316; AVX2-NEXT:    andl $127, %eax
2317; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2318; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2319; AVX2-NEXT:    andl $127, %eax
2320; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2321; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2322; AVX2-NEXT:    andl $127, %eax
2323; AVX2-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2324; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %r15
2325; AVX2-NEXT:    andl $127, %r15d
2326; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2327; AVX2-NEXT:    andl $127, %eax
2328; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %r14
2329; AVX2-NEXT:    andl $127, %r14d
2330; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
2331; AVX2-NEXT:    andl $127, %edx
2332; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rbp
2333; AVX2-NEXT:    andl $127, %ebp
2334; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %r8
2335; AVX2-NEXT:    andl $127, %r8d
2336; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %r12
2337; AVX2-NEXT:    andl $127, %r12d
2338; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %r13
2339; AVX2-NEXT:    andl $127, %r13d
2340; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rbx
2341; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %r11
2342; AVX2-NEXT:    cmpq %rbx, %r11
2343; AVX2-NEXT:    movq %r13, %r10
2344; AVX2-NEXT:    sbbq %r12, %r10
2345; AVX2-NEXT:    setb %r10b
2346; AVX2-NEXT:    cmpq %r11, %rbx
2347; AVX2-NEXT:    sbbq %r13, %r12
2348; AVX2-NEXT:    sbbb $0, %r10b
2349; AVX2-NEXT:    movb %r10b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
2350; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %r10
2351; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %r11
2352; AVX2-NEXT:    cmpq %r10, %r11
2353; AVX2-NEXT:    movq %r8, %rbx
2354; AVX2-NEXT:    sbbq %rbp, %rbx
2355; AVX2-NEXT:    setb %bl
2356; AVX2-NEXT:    cmpq %r11, %r10
2357; AVX2-NEXT:    sbbq %r8, %rbp
2358; AVX2-NEXT:    sbbb $0, %bl
2359; AVX2-NEXT:    movb %bl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
2360; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %r8
2361; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %r10
2362; AVX2-NEXT:    cmpq %r8, %r10
2363; AVX2-NEXT:    movq %rdx, %r11
2364; AVX2-NEXT:    sbbq %r14, %r11
2365; AVX2-NEXT:    setb %r11b
2366; AVX2-NEXT:    cmpq %r10, %r8
2367; AVX2-NEXT:    sbbq %rdx, %r14
2368; AVX2-NEXT:    sbbb $0, %r11b
2369; AVX2-NEXT:    movb %r11b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
2370; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
2371; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %r8
2372; AVX2-NEXT:    cmpq %rdx, %r8
2373; AVX2-NEXT:    movq %rax, %r10
2374; AVX2-NEXT:    sbbq %r15, %r10
2375; AVX2-NEXT:    setb %r10b
2376; AVX2-NEXT:    cmpq %r8, %rdx
2377; AVX2-NEXT:    sbbq %rax, %r15
2378; AVX2-NEXT:    sbbb $0, %r10b
2379; AVX2-NEXT:    movb %r10b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
2380; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2381; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
2382; AVX2-NEXT:    cmpq %rax, %rdx
2383; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
2384; AVX2-NEXT:    movq %r11, %r8
2385; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
2386; AVX2-NEXT:    sbbq %r10, %r8
2387; AVX2-NEXT:    setb %r8b
2388; AVX2-NEXT:    cmpq %rdx, %rax
2389; AVX2-NEXT:    sbbq %r11, %r10
2390; AVX2-NEXT:    sbbb $0, %r8b
2391; AVX2-NEXT:    movb %r8b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
2392; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2393; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
2394; AVX2-NEXT:    cmpq %rax, %rdx
2395; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
2396; AVX2-NEXT:    movq %r11, %r8
2397; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
2398; AVX2-NEXT:    sbbq %r10, %r8
2399; AVX2-NEXT:    setb %r8b
2400; AVX2-NEXT:    cmpq %rdx, %rax
2401; AVX2-NEXT:    sbbq %r11, %r10
2402; AVX2-NEXT:    sbbb $0, %r8b
2403; AVX2-NEXT:    movb %r8b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
2404; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2405; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
2406; AVX2-NEXT:    cmpq %rax, %rdx
2407; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
2408; AVX2-NEXT:    movq %r11, %r8
2409; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
2410; AVX2-NEXT:    sbbq %r10, %r8
2411; AVX2-NEXT:    setb %r8b
2412; AVX2-NEXT:    cmpq %rdx, %rax
2413; AVX2-NEXT:    sbbq %r11, %r10
2414; AVX2-NEXT:    sbbb $0, %r8b
2415; AVX2-NEXT:    movb %r8b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
2416; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2417; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
2418; AVX2-NEXT:    cmpq %rax, %rdx
2419; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
2420; AVX2-NEXT:    movq %r11, %r8
2421; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
2422; AVX2-NEXT:    sbbq %r10, %r8
2423; AVX2-NEXT:    setb %r12b
2424; AVX2-NEXT:    cmpq %rdx, %rax
2425; AVX2-NEXT:    sbbq %r11, %r10
2426; AVX2-NEXT:    sbbb $0, %r12b
2427; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2428; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
2429; AVX2-NEXT:    cmpq %rax, %rdx
2430; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
2431; AVX2-NEXT:    movq %r11, %r8
2432; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
2433; AVX2-NEXT:    sbbq %r10, %r8
2434; AVX2-NEXT:    setb %r8b
2435; AVX2-NEXT:    cmpq %rdx, %rax
2436; AVX2-NEXT:    sbbq %r11, %r10
2437; AVX2-NEXT:    sbbb $0, %r8b
2438; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2439; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %r10
2440; AVX2-NEXT:    cmpq %rax, %r10
2441; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
2442; AVX2-NEXT:    movq %rbx, %rdx
2443; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
2444; AVX2-NEXT:    sbbq %r11, %rdx
2445; AVX2-NEXT:    setb %dl
2446; AVX2-NEXT:    cmpq %r10, %rax
2447; AVX2-NEXT:    sbbq %rbx, %r11
2448; AVX2-NEXT:    sbbb $0, %dl
2449; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2450; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %r11
2451; AVX2-NEXT:    cmpq %rax, %r11
2452; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
2453; AVX2-NEXT:    movq %r14, %r10
2454; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
2455; AVX2-NEXT:    sbbq %rbx, %r10
2456; AVX2-NEXT:    setb %r10b
2457; AVX2-NEXT:    cmpq %r11, %rax
2458; AVX2-NEXT:    sbbq %r14, %rbx
2459; AVX2-NEXT:    sbbb $0, %r10b
2460; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2461; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rbx
2462; AVX2-NEXT:    cmpq %rax, %rbx
2463; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
2464; AVX2-NEXT:    movq %r15, %r11
2465; AVX2-NEXT:    movq (%rsp), %r14 # 8-byte Reload
2466; AVX2-NEXT:    sbbq %r14, %r11
2467; AVX2-NEXT:    setb %r11b
2468; AVX2-NEXT:    cmpq %rbx, %rax
2469; AVX2-NEXT:    sbbq %r15, %r14
2470; AVX2-NEXT:    sbbb $0, %r11b
2471; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2472; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %r14
2473; AVX2-NEXT:    cmpq %rax, %r14
2474; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
2475; AVX2-NEXT:    movq %r13, %rbx
2476; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
2477; AVX2-NEXT:    sbbq %r15, %rbx
2478; AVX2-NEXT:    setb %bl
2479; AVX2-NEXT:    cmpq %r14, %rax
2480; AVX2-NEXT:    sbbq %r13, %r15
2481; AVX2-NEXT:    sbbb $0, %bl
2482; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2483; AVX2-NEXT:    cmpq %r9, %rax
2484; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
2485; AVX2-NEXT:    movq %r13, %r14
2486; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
2487; AVX2-NEXT:    sbbq %r15, %r14
2488; AVX2-NEXT:    setb %bpl
2489; AVX2-NEXT:    cmpq %rax, %r9
2490; AVX2-NEXT:    sbbq %r13, %r15
2491; AVX2-NEXT:    sbbb $0, %bpl
2492; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2493; AVX2-NEXT:    cmpq %rsi, %rax
2494; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
2495; AVX2-NEXT:    movq %r15, %r9
2496; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
2497; AVX2-NEXT:    sbbq %r14, %r9
2498; AVX2-NEXT:    setb %r9b
2499; AVX2-NEXT:    cmpq %rax, %rsi
2500; AVX2-NEXT:    sbbq %r15, %r14
2501; AVX2-NEXT:    sbbb $0, %r9b
2502; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2503; AVX2-NEXT:    cmpq %rcx, %rax
2504; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
2505; AVX2-NEXT:    movq %r15, %rsi
2506; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
2507; AVX2-NEXT:    sbbq %r14, %rsi
2508; AVX2-NEXT:    setb %sil
2509; AVX2-NEXT:    cmpq %rax, %rcx
2510; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2511; AVX2-NEXT:    sbbq %r15, %r14
2512; AVX2-NEXT:    sbbb $0, %sil
2513; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
2514; AVX2-NEXT:    cmpq %rax, %rcx
2515; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
2516; AVX2-NEXT:    movq %r13, %r14
2517; AVX2-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
2518; AVX2-NEXT:    sbbq %r15, %r14
2519; AVX2-NEXT:    setb %r14b
2520; AVX2-NEXT:    cmpq %rcx, %rax
2521; AVX2-NEXT:    sbbq %r13, %r15
2522; AVX2-NEXT:    movq %rdi, %rax
2523; AVX2-NEXT:    sbbb $0, %r14b
2524; AVX2-NEXT:    movzbl %r14b, %ecx
2525; AVX2-NEXT:    andl $3, %ecx
2526; AVX2-NEXT:    movb %cl, 4(%rdi)
2527; AVX2-NEXT:    movzbl %sil, %ecx
2528; AVX2-NEXT:    andl $3, %ecx
2529; AVX2-NEXT:    movzbl %r9b, %esi
2530; AVX2-NEXT:    andl $3, %esi
2531; AVX2-NEXT:    leaq (%rsi,%rcx,4), %rcx
2532; AVX2-NEXT:    movzbl %bpl, %esi
2533; AVX2-NEXT:    andl $3, %esi
2534; AVX2-NEXT:    shll $4, %esi
2535; AVX2-NEXT:    orq %rcx, %rsi
2536; AVX2-NEXT:    movzbl %bl, %ecx
2537; AVX2-NEXT:    andl $3, %ecx
2538; AVX2-NEXT:    shll $6, %ecx
2539; AVX2-NEXT:    orq %rsi, %rcx
2540; AVX2-NEXT:    movzbl %r11b, %esi
2541; AVX2-NEXT:    andl $3, %esi
2542; AVX2-NEXT:    shll $8, %esi
2543; AVX2-NEXT:    orq %rcx, %rsi
2544; AVX2-NEXT:    movzbl %r10b, %ecx
2545; AVX2-NEXT:    andl $3, %ecx
2546; AVX2-NEXT:    shll $10, %ecx
2547; AVX2-NEXT:    movzbl %dl, %edx
2548; AVX2-NEXT:    andl $3, %edx
2549; AVX2-NEXT:    shll $12, %edx
2550; AVX2-NEXT:    orq %rcx, %rdx
2551; AVX2-NEXT:    movzbl %r8b, %edi
2552; AVX2-NEXT:    andl $3, %edi
2553; AVX2-NEXT:    shll $14, %edi
2554; AVX2-NEXT:    orq %rdx, %rdi
2555; AVX2-NEXT:    movzbl %r12b, %ecx
2556; AVX2-NEXT:    andl $3, %ecx
2557; AVX2-NEXT:    shll $16, %ecx
2558; AVX2-NEXT:    orq %rdi, %rcx
2559; AVX2-NEXT:    orq %rsi, %rcx
2560; AVX2-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 1-byte Folded Reload
2561; AVX2-NEXT:    andl $3, %edx
2562; AVX2-NEXT:    shll $18, %edx
2563; AVX2-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 1-byte Folded Reload
2564; AVX2-NEXT:    andl $3, %esi
2565; AVX2-NEXT:    shll $20, %esi
2566; AVX2-NEXT:    orq %rdx, %rsi
2567; AVX2-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 1-byte Folded Reload
2568; AVX2-NEXT:    andl $3, %edx
2569; AVX2-NEXT:    shll $22, %edx
2570; AVX2-NEXT:    orq %rsi, %rdx
2571; AVX2-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 1-byte Folded Reload
2572; AVX2-NEXT:    andl $3, %esi
2573; AVX2-NEXT:    shll $24, %esi
2574; AVX2-NEXT:    orq %rdx, %rsi
2575; AVX2-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 1-byte Folded Reload
2576; AVX2-NEXT:    andl $3, %edx
2577; AVX2-NEXT:    shlq $26, %rdx
2578; AVX2-NEXT:    orq %rsi, %rdx
2579; AVX2-NEXT:    orq %rcx, %rdx
2580; AVX2-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload
2581; AVX2-NEXT:    andl $3, %ecx
2582; AVX2-NEXT:    shlq $28, %rcx
2583; AVX2-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 1-byte Folded Reload
2584; AVX2-NEXT:    andl $3, %esi
2585; AVX2-NEXT:    shlq $30, %rsi
2586; AVX2-NEXT:    orq %rcx, %rsi
2587; AVX2-NEXT:    orq %rdx, %rsi
2588; AVX2-NEXT:    movl %esi, (%rax)
2589; AVX2-NEXT:    addq $88, %rsp
2590; AVX2-NEXT:    popq %rbx
2591; AVX2-NEXT:    popq %r12
2592; AVX2-NEXT:    popq %r13
2593; AVX2-NEXT:    popq %r14
2594; AVX2-NEXT:    popq %r15
2595; AVX2-NEXT:    popq %rbp
2596; AVX2-NEXT:    retq
2597;
2598; AVX512-LABEL: ucmp_uncommon_vectors:
2599; AVX512:       # %bb.0:
2600; AVX512-NEXT:    pushq %rbp
2601; AVX512-NEXT:    pushq %r15
2602; AVX512-NEXT:    pushq %r14
2603; AVX512-NEXT:    pushq %r13
2604; AVX512-NEXT:    pushq %r12
2605; AVX512-NEXT:    pushq %rbx
2606; AVX512-NEXT:    subq $88, %rsp
2607; AVX512-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2608; AVX512-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2609; AVX512-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2610; AVX512-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2611; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2612; AVX512-NEXT:    andl $127, %eax
2613; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2614; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2615; AVX512-NEXT:    andl $127, %eax
2616; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2617; AVX512-NEXT:    andl $127, %r8d
2618; AVX512-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2619; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2620; AVX512-NEXT:    andl $127, %eax
2621; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2622; AVX512-NEXT:    andl $127, %edx
2623; AVX512-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2624; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2625; AVX512-NEXT:    andl $127, %eax
2626; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2627; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2628; AVX512-NEXT:    andl $127, %eax
2629; AVX512-NEXT:    movq %rax, (%rsp) # 8-byte Spill
2630; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2631; AVX512-NEXT:    andl $127, %eax
2632; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2633; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2634; AVX512-NEXT:    andl $127, %eax
2635; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2636; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2637; AVX512-NEXT:    andl $127, %eax
2638; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2639; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2640; AVX512-NEXT:    andl $127, %eax
2641; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2642; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2643; AVX512-NEXT:    andl $127, %eax
2644; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2645; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2646; AVX512-NEXT:    andl $127, %eax
2647; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2648; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2649; AVX512-NEXT:    andl $127, %eax
2650; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2651; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2652; AVX512-NEXT:    andl $127, %eax
2653; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2654; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2655; AVX512-NEXT:    andl $127, %eax
2656; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2657; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2658; AVX512-NEXT:    andl $127, %eax
2659; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2660; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2661; AVX512-NEXT:    andl $127, %eax
2662; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2663; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2664; AVX512-NEXT:    andl $127, %eax
2665; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2666; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2667; AVX512-NEXT:    andl $127, %eax
2668; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2669; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2670; AVX512-NEXT:    andl $127, %eax
2671; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2672; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2673; AVX512-NEXT:    andl $127, %eax
2674; AVX512-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2675; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rbp
2676; AVX512-NEXT:    andl $127, %ebp
2677; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r12
2678; AVX512-NEXT:    andl $127, %r12d
2679; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r13
2680; AVX512-NEXT:    andl $127, %r13d
2681; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r15
2682; AVX512-NEXT:    andl $127, %r15d
2683; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r10
2684; AVX512-NEXT:    andl $127, %r10d
2685; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rbx
2686; AVX512-NEXT:    andl $127, %ebx
2687; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r8
2688; AVX512-NEXT:    andl $127, %r8d
2689; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r9
2690; AVX512-NEXT:    andl $127, %r9d
2691; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
2692; AVX512-NEXT:    andl $127, %esi
2693; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rdi
2694; AVX512-NEXT:    andl $127, %edi
2695; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2696; AVX512-NEXT:    andl $127, %eax
2697; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
2698; AVX512-NEXT:    andl $127, %edx
2699; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r14
2700; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r11
2701; AVX512-NEXT:    cmpq %r14, %r11
2702; AVX512-NEXT:    movq %rdx, %rcx
2703; AVX512-NEXT:    sbbq %rax, %rcx
2704; AVX512-NEXT:    setb %cl
2705; AVX512-NEXT:    cmpq %r11, %r14
2706; AVX512-NEXT:    sbbq %rdx, %rax
2707; AVX512-NEXT:    sbbb $0, %cl
2708; AVX512-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
2709; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2710; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
2711; AVX512-NEXT:    cmpq %rax, %rcx
2712; AVX512-NEXT:    movq %rdi, %rdx
2713; AVX512-NEXT:    sbbq %rsi, %rdx
2714; AVX512-NEXT:    setb %dl
2715; AVX512-NEXT:    cmpq %rcx, %rax
2716; AVX512-NEXT:    sbbq %rdi, %rsi
2717; AVX512-NEXT:    sbbb $0, %dl
2718; AVX512-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
2719; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2720; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
2721; AVX512-NEXT:    cmpq %rax, %rcx
2722; AVX512-NEXT:    movq %r9, %rdx
2723; AVX512-NEXT:    sbbq %r8, %rdx
2724; AVX512-NEXT:    setb %dl
2725; AVX512-NEXT:    cmpq %rcx, %rax
2726; AVX512-NEXT:    sbbq %r9, %r8
2727; AVX512-NEXT:    sbbb $0, %dl
2728; AVX512-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
2729; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2730; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
2731; AVX512-NEXT:    cmpq %rax, %rcx
2732; AVX512-NEXT:    movq %rbx, %rdx
2733; AVX512-NEXT:    sbbq %r10, %rdx
2734; AVX512-NEXT:    setb %dl
2735; AVX512-NEXT:    cmpq %rcx, %rax
2736; AVX512-NEXT:    sbbq %rbx, %r10
2737; AVX512-NEXT:    sbbb $0, %dl
2738; AVX512-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
2739; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2740; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
2741; AVX512-NEXT:    cmpq %rax, %rcx
2742; AVX512-NEXT:    movq %r15, %rdx
2743; AVX512-NEXT:    sbbq %r13, %rdx
2744; AVX512-NEXT:    setb %dl
2745; AVX512-NEXT:    cmpq %rcx, %rax
2746; AVX512-NEXT:    sbbq %r15, %r13
2747; AVX512-NEXT:    sbbb $0, %dl
2748; AVX512-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
2749; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2750; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
2751; AVX512-NEXT:    cmpq %rax, %rcx
2752; AVX512-NEXT:    movq %r12, %rdx
2753; AVX512-NEXT:    sbbq %rbp, %rdx
2754; AVX512-NEXT:    setb %dl
2755; AVX512-NEXT:    cmpq %rcx, %rax
2756; AVX512-NEXT:    sbbq %r12, %rbp
2757; AVX512-NEXT:    sbbb $0, %dl
2758; AVX512-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
2759; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2760; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
2761; AVX512-NEXT:    cmpq %rax, %rcx
2762; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
2763; AVX512-NEXT:    movq %rdi, %rdx
2764; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
2765; AVX512-NEXT:    sbbq %rsi, %rdx
2766; AVX512-NEXT:    setb %r13b
2767; AVX512-NEXT:    cmpq %rcx, %rax
2768; AVX512-NEXT:    sbbq %rdi, %rsi
2769; AVX512-NEXT:    sbbb $0, %r13b
2770; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
2771; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
2772; AVX512-NEXT:    cmpq %rax, %rcx
2773; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
2774; AVX512-NEXT:    movq %rdi, %rdx
2775; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
2776; AVX512-NEXT:    sbbq %rsi, %rdx
2777; AVX512-NEXT:    setb %bpl
2778; AVX512-NEXT:    cmpq %rcx, %rax
2779; AVX512-NEXT:    sbbq %rdi, %rsi
2780; AVX512-NEXT:    sbbb $0, %bpl
2781; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
2782; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
2783; AVX512-NEXT:    cmpq %rcx, %rdx
2784; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
2785; AVX512-NEXT:    movq %rdi, %rax
2786; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
2787; AVX512-NEXT:    sbbq %rsi, %rax
2788; AVX512-NEXT:    setb %r9b
2789; AVX512-NEXT:    cmpq %rdx, %rcx
2790; AVX512-NEXT:    sbbq %rdi, %rsi
2791; AVX512-NEXT:    sbbb $0, %r9b
2792; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
2793; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
2794; AVX512-NEXT:    cmpq %rdx, %rsi
2795; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
2796; AVX512-NEXT:    movq %rdi, %rcx
2797; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
2798; AVX512-NEXT:    sbbq %rax, %rcx
2799; AVX512-NEXT:    setb %cl
2800; AVX512-NEXT:    cmpq %rsi, %rdx
2801; AVX512-NEXT:    sbbq %rdi, %rax
2802; AVX512-NEXT:    sbbb $0, %cl
2803; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
2804; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rdi
2805; AVX512-NEXT:    cmpq %rsi, %rdi
2806; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
2807; AVX512-NEXT:    movq %r8, %rdx
2808; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
2809; AVX512-NEXT:    sbbq %rax, %rdx
2810; AVX512-NEXT:    setb %dl
2811; AVX512-NEXT:    cmpq %rdi, %rsi
2812; AVX512-NEXT:    sbbq %r8, %rax
2813; AVX512-NEXT:    sbbb $0, %dl
2814; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rdi
2815; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r8
2816; AVX512-NEXT:    cmpq %rdi, %r8
2817; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
2818; AVX512-NEXT:    movq %r10, %rsi
2819; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
2820; AVX512-NEXT:    sbbq %rax, %rsi
2821; AVX512-NEXT:    setb %sil
2822; AVX512-NEXT:    cmpq %r8, %rdi
2823; AVX512-NEXT:    sbbq %r10, %rax
2824; AVX512-NEXT:    sbbb $0, %sil
2825; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r8
2826; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r10
2827; AVX512-NEXT:    cmpq %r8, %r10
2828; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
2829; AVX512-NEXT:    movq %r11, %rdi
2830; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
2831; AVX512-NEXT:    sbbq %rax, %rdi
2832; AVX512-NEXT:    setb %dil
2833; AVX512-NEXT:    cmpq %r10, %r8
2834; AVX512-NEXT:    sbbq %r11, %rax
2835; AVX512-NEXT:    sbbb $0, %dil
2836; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r10
2837; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
2838; AVX512-NEXT:    cmpq %rax, %r10
2839; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
2840; AVX512-NEXT:    movq %rbx, %r8
2841; AVX512-NEXT:    movq (%rsp), %r11 # 8-byte Reload
2842; AVX512-NEXT:    sbbq %r11, %r8
2843; AVX512-NEXT:    setb %r8b
2844; AVX512-NEXT:    cmpq %r10, %rax
2845; AVX512-NEXT:    sbbq %rbx, %r11
2846; AVX512-NEXT:    sbbb $0, %r8b
2847; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r11
2848; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
2849; AVX512-NEXT:    cmpq %rbx, %r11
2850; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
2851; AVX512-NEXT:    movq %r14, %r10
2852; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
2853; AVX512-NEXT:    sbbq %rax, %r10
2854; AVX512-NEXT:    setb %r10b
2855; AVX512-NEXT:    cmpq %r11, %rbx
2856; AVX512-NEXT:    sbbq %r14, %rax
2857; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r11
2858; AVX512-NEXT:    sbbb $0, %r10b
2859; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
2860; AVX512-NEXT:    cmpq %r15, %r11
2861; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
2862; AVX512-NEXT:    movq %rax, %rbx
2863; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
2864; AVX512-NEXT:    sbbq %r14, %rbx
2865; AVX512-NEXT:    setb %bl
2866; AVX512-NEXT:    cmpq %r11, %r15
2867; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r11
2868; AVX512-NEXT:    sbbq %rax, %r14
2869; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r14
2870; AVX512-NEXT:    sbbb $0, %bl
2871; AVX512-NEXT:    cmpq %r11, %r14
2872; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
2873; AVX512-NEXT:    movq %rax, %r15
2874; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
2875; AVX512-NEXT:    sbbq %r12, %r15
2876; AVX512-NEXT:    setb %r15b
2877; AVX512-NEXT:    cmpq %r14, %r11
2878; AVX512-NEXT:    sbbq %rax, %r12
2879; AVX512-NEXT:    sbbb $0, %r15b
2880; AVX512-NEXT:    movzbl %r15b, %r11d
2881; AVX512-NEXT:    andl $3, %r11d
2882; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
2883; AVX512-NEXT:    movb %r11b, 4(%r14)
2884; AVX512-NEXT:    movzbl %bl, %r11d
2885; AVX512-NEXT:    andl $3, %r11d
2886; AVX512-NEXT:    movzbl %r10b, %r10d
2887; AVX512-NEXT:    andl $3, %r10d
2888; AVX512-NEXT:    leaq (%r10,%r11,4), %r10
2889; AVX512-NEXT:    movzbl %r8b, %r8d
2890; AVX512-NEXT:    andl $3, %r8d
2891; AVX512-NEXT:    shll $4, %r8d
2892; AVX512-NEXT:    orq %r10, %r8
2893; AVX512-NEXT:    movzbl %dil, %edi
2894; AVX512-NEXT:    andl $3, %edi
2895; AVX512-NEXT:    shll $6, %edi
2896; AVX512-NEXT:    orq %r8, %rdi
2897; AVX512-NEXT:    movzbl %sil, %esi
2898; AVX512-NEXT:    andl $3, %esi
2899; AVX512-NEXT:    shll $8, %esi
2900; AVX512-NEXT:    orq %rdi, %rsi
2901; AVX512-NEXT:    movzbl %dl, %edx
2902; AVX512-NEXT:    andl $3, %edx
2903; AVX512-NEXT:    shll $10, %edx
2904; AVX512-NEXT:    movzbl %cl, %ecx
2905; AVX512-NEXT:    andl $3, %ecx
2906; AVX512-NEXT:    shll $12, %ecx
2907; AVX512-NEXT:    orq %rdx, %rcx
2908; AVX512-NEXT:    movzbl %r9b, %edx
2909; AVX512-NEXT:    andl $3, %edx
2910; AVX512-NEXT:    shll $14, %edx
2911; AVX512-NEXT:    orq %rcx, %rdx
2912; AVX512-NEXT:    movzbl %bpl, %eax
2913; AVX512-NEXT:    andl $3, %eax
2914; AVX512-NEXT:    shll $16, %eax
2915; AVX512-NEXT:    orq %rdx, %rax
2916; AVX512-NEXT:    orq %rsi, %rax
2917; AVX512-NEXT:    movzbl %r13b, %ecx
2918; AVX512-NEXT:    andl $3, %ecx
2919; AVX512-NEXT:    shll $18, %ecx
2920; AVX512-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 1-byte Folded Reload
2921; AVX512-NEXT:    andl $3, %edx
2922; AVX512-NEXT:    shll $20, %edx
2923; AVX512-NEXT:    orq %rcx, %rdx
2924; AVX512-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload
2925; AVX512-NEXT:    andl $3, %ecx
2926; AVX512-NEXT:    shll $22, %ecx
2927; AVX512-NEXT:    orq %rdx, %rcx
2928; AVX512-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 1-byte Folded Reload
2929; AVX512-NEXT:    andl $3, %edx
2930; AVX512-NEXT:    shll $24, %edx
2931; AVX512-NEXT:    orq %rcx, %rdx
2932; AVX512-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload
2933; AVX512-NEXT:    andl $3, %ecx
2934; AVX512-NEXT:    shlq $26, %rcx
2935; AVX512-NEXT:    orq %rdx, %rcx
2936; AVX512-NEXT:    orq %rax, %rcx
2937; AVX512-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
2938; AVX512-NEXT:    andl $3, %eax
2939; AVX512-NEXT:    shlq $28, %rax
2940; AVX512-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 1-byte Folded Reload
2941; AVX512-NEXT:    andl $3, %edx
2942; AVX512-NEXT:    shlq $30, %rdx
2943; AVX512-NEXT:    orq %rax, %rdx
2944; AVX512-NEXT:    orq %rcx, %rdx
2945; AVX512-NEXT:    movq %r14, %rax
2946; AVX512-NEXT:    movl %edx, (%r14)
2947; AVX512-NEXT:    addq $88, %rsp
2948; AVX512-NEXT:    popq %rbx
2949; AVX512-NEXT:    popq %r12
2950; AVX512-NEXT:    popq %r13
2951; AVX512-NEXT:    popq %r14
2952; AVX512-NEXT:    popq %r15
2953; AVX512-NEXT:    popq %rbp
2954; AVX512-NEXT:    retq
2955;
2956; X86-LABEL: ucmp_uncommon_vectors:
2957; X86:       # %bb.0:
2958; X86-NEXT:    pushl %ebp
2959; X86-NEXT:    pushl %ebx
2960; X86-NEXT:    pushl %edi
2961; X86-NEXT:    pushl %esi
2962; X86-NEXT:    subl $132, %esp
2963; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
2964; X86-NEXT:    andl $127, %eax
2965; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2966; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
2967; X86-NEXT:    andl $127, %eax
2968; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2969; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
2970; X86-NEXT:    andl $127, %eax
2971; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2972; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
2973; X86-NEXT:    andl $127, %eax
2974; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2975; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
2976; X86-NEXT:    andl $127, %eax
2977; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2978; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
2979; X86-NEXT:    andl $127, %eax
2980; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2981; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
2982; X86-NEXT:    andl $127, %eax
2983; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2984; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
2985; X86-NEXT:    andl $127, %eax
2986; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2987; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
2988; X86-NEXT:    andl $127, %eax
2989; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2990; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
2991; X86-NEXT:    andl $127, %eax
2992; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2993; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
2994; X86-NEXT:    andl $127, %eax
2995; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2996; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
2997; X86-NEXT:    andl $127, %eax
2998; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2999; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
3000; X86-NEXT:    andl $127, %eax
3001; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3002; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
3003; X86-NEXT:    andl $127, %eax
3004; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3005; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
3006; X86-NEXT:    andl $127, %eax
3007; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3008; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
3009; X86-NEXT:    andl $127, %eax
3010; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3011; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
3012; X86-NEXT:    andl $127, %eax
3013; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3014; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
3015; X86-NEXT:    andl $127, %eax
3016; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3017; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
3018; X86-NEXT:    andl $127, %eax
3019; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3020; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
3021; X86-NEXT:    andl $127, %eax
3022; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3023; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
3024; X86-NEXT:    andl $127, %eax
3025; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3026; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
3027; X86-NEXT:    andl $127, %eax
3028; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3029; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
3030; X86-NEXT:    andl $127, %eax
3031; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3032; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
3033; X86-NEXT:    andl $127, %eax
3034; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3035; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
3036; X86-NEXT:    andl $127, %eax
3037; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3038; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
3039; X86-NEXT:    andl $127, %eax
3040; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3041; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
3042; X86-NEXT:    andl $127, %eax
3043; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3044; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
3045; X86-NEXT:    andl $127, %eax
3046; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3047; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
3048; X86-NEXT:    andl $127, %eax
3049; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3050; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
3051; X86-NEXT:    andl $127, %eax
3052; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3053; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
3054; X86-NEXT:    andl $127, %ebp
3055; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
3056; X86-NEXT:    andl $127, %eax
3057; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3058; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
3059; X86-NEXT:    andl $127, %edx
3060; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
3061; X86-NEXT:    andl $127, %edi
3062; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
3063; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
3064; X86-NEXT:    cmpl %eax, {{[0-9]+}}(%esp)
3065; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
3066; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
3067; X86-NEXT:    movl %ebx, %esi
3068; X86-NEXT:    sbbl %eax, %esi
3069; X86-NEXT:    movl %edi, %esi
3070; X86-NEXT:    sbbl %edx, %esi
3071; X86-NEXT:    movl $0, %esi
3072; X86-NEXT:    sbbl %esi, %esi
3073; X86-NEXT:    setb %cl
3074; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
3075; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %esi
3076; X86-NEXT:    sbbl %ebx, %eax
3077; X86-NEXT:    sbbl %edi, %edx
3078; X86-NEXT:    movl $0, %eax
3079; X86-NEXT:    sbbl %eax, %eax
3080; X86-NEXT:    sbbb $0, %cl
3081; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
3082; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
3083; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %ecx
3084; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
3085; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
3086; X86-NEXT:    movl %esi, %edi
3087; X86-NEXT:    sbbl %edx, %edi
3088; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
3089; X86-NEXT:    movl %eax, %edi
3090; X86-NEXT:    sbbl %ebp, %edi
3091; X86-NEXT:    movl $0, %edi
3092; X86-NEXT:    sbbl %edi, %edi
3093; X86-NEXT:    setb %bl
3094; X86-NEXT:    cmpl %ecx, {{[0-9]+}}(%esp)
3095; X86-NEXT:    sbbl %esi, %edx
3096; X86-NEXT:    sbbl %eax, %ebp
3097; X86-NEXT:    movl $0, %eax
3098; X86-NEXT:    sbbl %eax, %eax
3099; X86-NEXT:    sbbb $0, %bl
3100; X86-NEXT:    movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
3101; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
3102; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %ecx
3103; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
3104; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
3105; X86-NEXT:    movl %esi, %edi
3106; X86-NEXT:    sbbl %edx, %edi
3107; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
3108; X86-NEXT:    movl %eax, %edi
3109; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
3110; X86-NEXT:    sbbl %ebp, %edi
3111; X86-NEXT:    movl $0, %edi
3112; X86-NEXT:    sbbl %edi, %edi
3113; X86-NEXT:    setb %bl
3114; X86-NEXT:    cmpl %ecx, {{[0-9]+}}(%esp)
3115; X86-NEXT:    sbbl %esi, %edx
3116; X86-NEXT:    sbbl %eax, %ebp
3117; X86-NEXT:    movl $0, %eax
3118; X86-NEXT:    sbbl %eax, %eax
3119; X86-NEXT:    sbbb $0, %bl
3120; X86-NEXT:    movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
3121; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
3122; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %ecx
3123; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
3124; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
3125; X86-NEXT:    movl %esi, %edi
3126; X86-NEXT:    sbbl %edx, %edi
3127; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
3128; X86-NEXT:    movl %eax, %edi
3129; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
3130; X86-NEXT:    sbbl %ebp, %edi
3131; X86-NEXT:    movl $0, %edi
3132; X86-NEXT:    sbbl %edi, %edi
3133; X86-NEXT:    setb %bl
3134; X86-NEXT:    cmpl %ecx, {{[0-9]+}}(%esp)
3135; X86-NEXT:    sbbl %esi, %edx
3136; X86-NEXT:    sbbl %eax, %ebp
3137; X86-NEXT:    movl $0, %eax
3138; X86-NEXT:    sbbl %eax, %eax
3139; X86-NEXT:    sbbb $0, %bl
3140; X86-NEXT:    movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
3141; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
3142; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %ecx
3143; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
3144; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
3145; X86-NEXT:    movl %esi, %edi
3146; X86-NEXT:    sbbl %edx, %edi
3147; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
3148; X86-NEXT:    movl %eax, %edi
3149; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
3150; X86-NEXT:    sbbl %ebp, %edi
3151; X86-NEXT:    movl $0, %edi
3152; X86-NEXT:    sbbl %edi, %edi
3153; X86-NEXT:    setb %bl
3154; X86-NEXT:    cmpl %ecx, {{[0-9]+}}(%esp)
3155; X86-NEXT:    sbbl %esi, %edx
3156; X86-NEXT:    sbbl %eax, %ebp
3157; X86-NEXT:    movl $0, %eax
3158; X86-NEXT:    sbbl %eax, %eax
3159; X86-NEXT:    sbbb $0, %bl
3160; X86-NEXT:    movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
3161; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
3162; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %ecx
3163; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
3164; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
3165; X86-NEXT:    movl %esi, %edi
3166; X86-NEXT:    sbbl %edx, %edi
3167; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
3168; X86-NEXT:    movl %eax, %edi
3169; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
3170; X86-NEXT:    sbbl %ebp, %edi
3171; X86-NEXT:    movl $0, %edi
3172; X86-NEXT:    sbbl %edi, %edi
3173; X86-NEXT:    setb %bl
3174; X86-NEXT:    cmpl %ecx, {{[0-9]+}}(%esp)
3175; X86-NEXT:    sbbl %esi, %edx
3176; X86-NEXT:    sbbl %eax, %ebp
3177; X86-NEXT:    movl $0, %eax
3178; X86-NEXT:    sbbl %eax, %eax
3179; X86-NEXT:    sbbb $0, %bl
3180; X86-NEXT:    movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
3181; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
3182; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %ecx
3183; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
3184; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
3185; X86-NEXT:    movl %esi, %edi
3186; X86-NEXT:    sbbl %edx, %edi
3187; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
3188; X86-NEXT:    movl %eax, %edi
3189; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
3190; X86-NEXT:    sbbl %ebp, %edi
3191; X86-NEXT:    movl $0, %edi
3192; X86-NEXT:    sbbl %edi, %edi
3193; X86-NEXT:    setb %bl
3194; X86-NEXT:    cmpl %ecx, {{[0-9]+}}(%esp)
3195; X86-NEXT:    sbbl %esi, %edx
3196; X86-NEXT:    sbbl %eax, %ebp
3197; X86-NEXT:    movl $0, %eax
3198; X86-NEXT:    sbbl %eax, %eax
3199; X86-NEXT:    sbbb $0, %bl
3200; X86-NEXT:    movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
3201; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
3202; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
3203; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
3204; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
3205; X86-NEXT:    movl %esi, %edi
3206; X86-NEXT:    sbbl %edx, %edi
3207; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
3208; X86-NEXT:    movl %ebp, %edi
3209; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
3210; X86-NEXT:    sbbl %ebx, %edi
3211; X86-NEXT:    movl $0, %edi
3212; X86-NEXT:    sbbl %edi, %edi
3213; X86-NEXT:    setb %cl
3214; X86-NEXT:    cmpl %eax, {{[0-9]+}}(%esp)
3215; X86-NEXT:    sbbl %esi, %edx
3216; X86-NEXT:    sbbl %ebp, %ebx
3217; X86-NEXT:    movl $0, %eax
3218; X86-NEXT:    sbbl %eax, %eax
3219; X86-NEXT:    sbbb $0, %cl
3220; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
3221; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
3222; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
3223; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
3224; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
3225; X86-NEXT:    movl %esi, %edi
3226; X86-NEXT:    sbbl %edx, %edi
3227; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
3228; X86-NEXT:    movl %ebp, %edi
3229; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
3230; X86-NEXT:    sbbl %ebx, %edi
3231; X86-NEXT:    movl $0, %edi
3232; X86-NEXT:    sbbl %edi, %edi
3233; X86-NEXT:    setb %cl
3234; X86-NEXT:    cmpl %eax, {{[0-9]+}}(%esp)
3235; X86-NEXT:    sbbl %esi, %edx
3236; X86-NEXT:    sbbl %ebp, %ebx
3237; X86-NEXT:    movl $0, %eax
3238; X86-NEXT:    sbbl %eax, %eax
3239; X86-NEXT:    sbbb $0, %cl
3240; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
3241; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
3242; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
3243; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
3244; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
3245; X86-NEXT:    movl %esi, %edi
3246; X86-NEXT:    sbbl %edx, %edi
3247; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
3248; X86-NEXT:    movl %ebp, %edi
3249; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
3250; X86-NEXT:    sbbl %ebx, %edi
3251; X86-NEXT:    movl $0, %edi
3252; X86-NEXT:    sbbl %edi, %edi
3253; X86-NEXT:    setb %cl
3254; X86-NEXT:    cmpl %eax, {{[0-9]+}}(%esp)
3255; X86-NEXT:    sbbl %esi, %edx
3256; X86-NEXT:    sbbl %ebp, %ebx
3257; X86-NEXT:    movl $0, %eax
3258; X86-NEXT:    sbbl %eax, %eax
3259; X86-NEXT:    sbbb $0, %cl
3260; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
3261; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
3262; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
3263; X86-NEXT:    cmpl %eax, %ecx
3264; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
3265; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
3266; X86-NEXT:    movl %edi, %edx
3267; X86-NEXT:    sbbl %esi, %edx
3268; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
3269; X86-NEXT:    movl %ebp, %edx
3270; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
3271; X86-NEXT:    sbbl %ebx, %edx
3272; X86-NEXT:    movl $0, %edx
3273; X86-NEXT:    sbbl %edx, %edx
3274; X86-NEXT:    setb %dl
3275; X86-NEXT:    cmpl %ecx, %eax
3276; X86-NEXT:    sbbl %edi, %esi
3277; X86-NEXT:    sbbl %ebp, %ebx
3278; X86-NEXT:    movl $0, %eax
3279; X86-NEXT:    sbbl %eax, %eax
3280; X86-NEXT:    sbbb $0, %dl
3281; X86-NEXT:    movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
3282; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
3283; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
3284; X86-NEXT:    cmpl %eax, %ecx
3285; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
3286; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
3287; X86-NEXT:    movl %edi, %ebx
3288; X86-NEXT:    sbbl %esi, %ebx
3289; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
3290; X86-NEXT:    movl %ebp, %ebx
3291; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
3292; X86-NEXT:    sbbl %edx, %ebx
3293; X86-NEXT:    movl $0, %ebx
3294; X86-NEXT:    sbbl %ebx, %ebx
3295; X86-NEXT:    setb %bl
3296; X86-NEXT:    cmpl %ecx, %eax
3297; X86-NEXT:    sbbl %edi, %esi
3298; X86-NEXT:    sbbl %ebp, %edx
3299; X86-NEXT:    movl $0, %eax
3300; X86-NEXT:    sbbl %eax, %eax
3301; X86-NEXT:    sbbb $0, %bl
3302; X86-NEXT:    movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
3303; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
3304; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
3305; X86-NEXT:    cmpl %eax, %ecx
3306; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
3307; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
3308; X86-NEXT:    movl %edi, %ebx
3309; X86-NEXT:    sbbl %esi, %ebx
3310; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
3311; X86-NEXT:    movl %ebp, %ebx
3312; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
3313; X86-NEXT:    sbbl %edx, %ebx
3314; X86-NEXT:    movl $0, %ebx
3315; X86-NEXT:    sbbl %ebx, %ebx
3316; X86-NEXT:    setb %bl
3317; X86-NEXT:    cmpl %ecx, %eax
3318; X86-NEXT:    sbbl %edi, %esi
3319; X86-NEXT:    sbbl %ebp, %edx
3320; X86-NEXT:    movl $0, %eax
3321; X86-NEXT:    sbbl %eax, %eax
3322; X86-NEXT:    sbbb $0, %bl
3323; X86-NEXT:    movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
3324; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
3325; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
3326; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
3327; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
3328; X86-NEXT:    movl %edi, %ebp
3329; X86-NEXT:    sbbl %esi, %ebp
3330; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
3331; X86-NEXT:    movl %ebx, %ebp
3332; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
3333; X86-NEXT:    sbbl %edx, %ebp
3334; X86-NEXT:    movl $0, %ebp
3335; X86-NEXT:    sbbl %ebp, %ebp
3336; X86-NEXT:    setb %cl
3337; X86-NEXT:    cmpl %eax, {{[0-9]+}}(%esp)
3338; X86-NEXT:    sbbl %edi, %esi
3339; X86-NEXT:    sbbl %ebx, %edx
3340; X86-NEXT:    movl $0, %eax
3341; X86-NEXT:    sbbl %eax, %eax
3342; X86-NEXT:    sbbb $0, %cl
3343; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
3344; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
3345; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %esi
3346; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
3347; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
3348; X86-NEXT:    movl %ebp, %eax
3349; X86-NEXT:    sbbl %edi, %eax
3350; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
3351; X86-NEXT:    movl %ecx, %eax
3352; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
3353; X86-NEXT:    sbbl %edx, %eax
3354; X86-NEXT:    movl $0, %eax
3355; X86-NEXT:    sbbl %eax, %eax
3356; X86-NEXT:    setb %bl
3357; X86-NEXT:    cmpl %esi, {{[0-9]+}}(%esp)
3358; X86-NEXT:    sbbl %ebp, %edi
3359; X86-NEXT:    sbbl %ecx, %edx
3360; X86-NEXT:    movl $0, %ecx
3361; X86-NEXT:    sbbl %ecx, %ecx
3362; X86-NEXT:    sbbb $0, %bl
3363; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
3364; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %esi
3365; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
3366; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
3367; X86-NEXT:    movl %ecx, %ebp
3368; X86-NEXT:    sbbl %edi, %ebp
3369; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
3370; X86-NEXT:    movl %eax, %ebp
3371; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
3372; X86-NEXT:    sbbl %edx, %ebp
3373; X86-NEXT:    movl $0, %ebp
3374; X86-NEXT:    sbbl %ebp, %ebp
3375; X86-NEXT:    setb %bh
3376; X86-NEXT:    cmpl %esi, {{[0-9]+}}(%esp)
3377; X86-NEXT:    sbbl %ecx, %edi
3378; X86-NEXT:    sbbl %eax, %edx
3379; X86-NEXT:    movl $0, %ecx
3380; X86-NEXT:    sbbl %ecx, %ecx
3381; X86-NEXT:    sbbb $0, %bh
3382; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
3383; X86-NEXT:    cmpl %eax, {{[0-9]+}}(%esp)
3384; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
3385; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
3386; X86-NEXT:    movl %esi, %ebp
3387; X86-NEXT:    sbbl %edi, %ebp
3388; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
3389; X86-NEXT:    movl %edx, %ebp
3390; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
3391; X86-NEXT:    sbbl %eax, %ebp
3392; X86-NEXT:    movl $0, %ebp
3393; X86-NEXT:    sbbl %ebp, %ebp
3394; X86-NEXT:    setb %cl
3395; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
3396; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %ebp
3397; X86-NEXT:    sbbl %esi, %edi
3398; X86-NEXT:    sbbl %edx, %eax
3399; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
3400; X86-NEXT:    sbbl %eax, %eax
3401; X86-NEXT:    sbbb $0, %cl
3402; X86-NEXT:    movzbl %cl, %ecx
3403; X86-NEXT:    andl $3, %ecx
3404; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
3405; X86-NEXT:    movb %cl, 4(%edi)
3406; X86-NEXT:    movzbl %bh, %ebp
3407; X86-NEXT:    movzbl %bl, %ecx
3408; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
3409; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
3410; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
3411; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload
3412; X86-NEXT:    andl $3, %ebp
3413; X86-NEXT:    andl $3, %ecx
3414; X86-NEXT:    leal (%ecx,%ebp,4), %ecx
3415; X86-NEXT:    andl $3, %eax
3416; X86-NEXT:    shll $4, %eax
3417; X86-NEXT:    orl %ecx, %eax
3418; X86-NEXT:    andl $3, %ebx
3419; X86-NEXT:    shll $6, %ebx
3420; X86-NEXT:    orl %eax, %ebx
3421; X86-NEXT:    andl $3, %esi
3422; X86-NEXT:    shll $8, %esi
3423; X86-NEXT:    orl %ebx, %esi
3424; X86-NEXT:    andl $3, %edx
3425; X86-NEXT:    shll $10, %edx
3426; X86-NEXT:    orl %esi, %edx
3427; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
3428; X86-NEXT:    andl $3, %eax
3429; X86-NEXT:    shll $12, %eax
3430; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
3431; X86-NEXT:    andl $3, %ecx
3432; X86-NEXT:    shll $14, %ecx
3433; X86-NEXT:    orl %eax, %ecx
3434; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
3435; X86-NEXT:    andl $3, %eax
3436; X86-NEXT:    shll $16, %eax
3437; X86-NEXT:    orl %ecx, %eax
3438; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
3439; X86-NEXT:    andl $3, %esi
3440; X86-NEXT:    shll $18, %esi
3441; X86-NEXT:    orl %eax, %esi
3442; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
3443; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
3444; X86-NEXT:    andl $3, %eax
3445; X86-NEXT:    shll $20, %eax
3446; X86-NEXT:    orl %esi, %eax
3447; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
3448; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
3449; X86-NEXT:    orl %edx, %eax
3450; X86-NEXT:    andl $3, %ecx
3451; X86-NEXT:    shll $22, %ecx
3452; X86-NEXT:    andl $3, %esi
3453; X86-NEXT:    shll $24, %esi
3454; X86-NEXT:    orl %ecx, %esi
3455; X86-NEXT:    andl $3, %ebx
3456; X86-NEXT:    shll $26, %ebx
3457; X86-NEXT:    orl %esi, %ebx
3458; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
3459; X86-NEXT:    andl $3, %ecx
3460; X86-NEXT:    shll $28, %ecx
3461; X86-NEXT:    orl %ebx, %ecx
3462; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload
3463; X86-NEXT:    shll $30, %edx
3464; X86-NEXT:    orl %ecx, %edx
3465; X86-NEXT:    orl %eax, %edx
3466; X86-NEXT:    movl %edx, (%edi)
3467; X86-NEXT:    movl %edi, %eax
3468; X86-NEXT:    addl $132, %esp
3469; X86-NEXT:    popl %esi
3470; X86-NEXT:    popl %edi
3471; X86-NEXT:    popl %ebx
3472; X86-NEXT:    popl %ebp
3473; X86-NEXT:    retl $4
3474  %1 = call <17 x i2> @llvm.ucmp(<17 x i71> %x, <17 x i71> %y)
3475  ret <17 x i2> %1
3476}
3477