xref: /llvm-project/llvm/test/CodeGen/X86/sshl_sat_vec.ll (revision 7b3bbd83c0c24087072ec5b22a76799ab31f87d5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64
3; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx2 | FileCheck %s --check-prefix=X64-AVX2
4; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86
5
6declare <2 x i64> @llvm.sshl.sat.v2i64(<2 x i64>, <2 x i64>)
7declare <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32>, <4 x i32>)
8declare <8 x i16> @llvm.sshl.sat.v8i16(<8 x i16>, <8 x i16>)
9declare <16 x i8> @llvm.sshl.sat.v16i8(<16 x i8>, <16 x i8>)
10
11define <2 x i64> @vec_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
12; X64-LABEL: vec_v2i64:
13; X64:       # %bb.0:
14; X64-NEXT:    movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
15; X64-NEXT:    movdqa %xmm2, %xmm3
16; X64-NEXT:    psrlq %xmm1, %xmm3
17; X64-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[2,3,2,3]
18; X64-NEXT:    movdqa %xmm2, %xmm5
19; X64-NEXT:    psrlq %xmm4, %xmm5
20; X64-NEXT:    movsd {{.*#+}} xmm5 = xmm3[0],xmm5[1]
21; X64-NEXT:    movdqa %xmm0, %xmm6
22; X64-NEXT:    psllq %xmm1, %xmm6
23; X64-NEXT:    movdqa %xmm0, %xmm3
24; X64-NEXT:    psllq %xmm4, %xmm3
25; X64-NEXT:    movdqa %xmm3, %xmm7
26; X64-NEXT:    movsd {{.*#+}} xmm3 = xmm6[0],xmm3[1]
27; X64-NEXT:    psrlq %xmm1, %xmm6
28; X64-NEXT:    psrlq %xmm4, %xmm7
29; X64-NEXT:    movsd {{.*#+}} xmm7 = xmm6[0],xmm7[1]
30; X64-NEXT:    xorpd %xmm5, %xmm7
31; X64-NEXT:    psubq %xmm5, %xmm7
32; X64-NEXT:    pcmpeqd %xmm0, %xmm7
33; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm7[1,0,3,2]
34; X64-NEXT:    pand %xmm7, %xmm1
35; X64-NEXT:    andpd %xmm1, %xmm3
36; X64-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
37; X64-NEXT:    pand %xmm2, %xmm0
38; X64-NEXT:    pxor %xmm5, %xmm5
39; X64-NEXT:    pcmpgtd %xmm4, %xmm5
40; X64-NEXT:    por %xmm2, %xmm5
41; X64-NEXT:    pcmpeqd %xmm2, %xmm2
42; X64-NEXT:    pxor %xmm5, %xmm2
43; X64-NEXT:    por %xmm0, %xmm2
44; X64-NEXT:    pandn %xmm2, %xmm1
45; X64-NEXT:    por %xmm3, %xmm1
46; X64-NEXT:    movdqa %xmm1, %xmm0
47; X64-NEXT:    retq
48;
49; X64-AVX2-LABEL: vec_v2i64:
50; X64-AVX2:       # %bb.0:
51; X64-AVX2-NEXT:    vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
52; X64-AVX2-NEXT:    # xmm2 = mem[0,0]
53; X64-AVX2-NEXT:    vmovddup {{.*#+}} xmm3 = [9223372036854775807,9223372036854775807]
54; X64-AVX2-NEXT:    # xmm3 = mem[0,0]
55; X64-AVX2-NEXT:    vblendvpd %xmm0, %xmm2, %xmm3, %xmm3
56; X64-AVX2-NEXT:    vpsrlvq %xmm1, %xmm2, %xmm2
57; X64-AVX2-NEXT:    vpsllvq %xmm1, %xmm0, %xmm4
58; X64-AVX2-NEXT:    vpsrlvq %xmm1, %xmm4, %xmm1
59; X64-AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm1
60; X64-AVX2-NEXT:    vpsubq %xmm2, %xmm1, %xmm1
61; X64-AVX2-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
62; X64-AVX2-NEXT:    vblendvpd %xmm0, %xmm4, %xmm3, %xmm0
63; X64-AVX2-NEXT:    retq
64;
65; X86-LABEL: vec_v2i64:
66; X86:       # %bb.0:
67; X86-NEXT:    pushl %ebp
68; X86-NEXT:    pushl %ebx
69; X86-NEXT:    pushl %edi
70; X86-NEXT:    pushl %esi
71; X86-NEXT:    subl $20, %esp
72; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
73; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
74; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
75; X86-NEXT:    movl %edx, %eax
76; X86-NEXT:    shll %cl, %eax
77; X86-NEXT:    shldl %cl, %edx, %edi
78; X86-NEXT:    xorl %edx, %edx
79; X86-NEXT:    testb $32, %cl
80; X86-NEXT:    cmovnel %eax, %edi
81; X86-NEXT:    cmovnel %edx, %eax
82; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
83; X86-NEXT:    movl %edi, %ebx
84; X86-NEXT:    sarl %cl, %ebx
85; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
86; X86-NEXT:    movl %edi, %eax
87; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
88; X86-NEXT:    sarl $31, %eax
89; X86-NEXT:    testb $32, %cl
90; X86-NEXT:    cmovel %ebx, %eax
91; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
92; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
93; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
94; X86-NEXT:    movl %esi, %eax
95; X86-NEXT:    movb %ch, %cl
96; X86-NEXT:    shll %cl, %eax
97; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
98; X86-NEXT:    movl %ebp, %ebx
99; X86-NEXT:    shldl %cl, %esi, %ebx
100; X86-NEXT:    testb $32, %ch
101; X86-NEXT:    cmovnel %eax, %ebx
102; X86-NEXT:    cmovnel %edx, %eax
103; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
104; X86-NEXT:    movl %ebx, %esi
105; X86-NEXT:    sarl %cl, %esi
106; X86-NEXT:    movl %ebx, %edx
107; X86-NEXT:    sarl $31, %edx
108; X86-NEXT:    testb $32, %ch
109; X86-NEXT:    cmovel %esi, %edx
110; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
111; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
112; X86-NEXT:    shrdl %cl, %edi, %eax
113; X86-NEXT:    testb $32, %cl
114; X86-NEXT:    cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
115; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
116; X86-NEXT:    movb %ch, %cl
117; X86-NEXT:    shrdl %cl, %ebx, %edi
118; X86-NEXT:    testb $32, %ch
119; X86-NEXT:    cmovnel %esi, %edi
120; X86-NEXT:    xorl {{[0-9]+}}(%esp), %eax
121; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
122; X86-NEXT:    xorl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
123; X86-NEXT:    sarl $31, %esi
124; X86-NEXT:    movl %esi, %ecx
125; X86-NEXT:    xorl $2147483647, %ecx # imm = 0x7FFFFFFF
126; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
127; X86-NEXT:    notl %esi
128; X86-NEXT:    cmovel (%esp), %esi # 4-byte Folded Reload
129; X86-NEXT:    movl %esi, (%esp) # 4-byte Spill
130; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
131; X86-NEXT:    xorl {{[0-9]+}}(%esp), %edi
132; X86-NEXT:    xorl %ebp, %edx
133; X86-NEXT:    sarl $31, %ebp
134; X86-NEXT:    movl %ebp, %esi
135; X86-NEXT:    xorl $2147483647, %esi # imm = 0x7FFFFFFF
136; X86-NEXT:    orl %edx, %edi
137; X86-NEXT:    notl %ebp
138; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
139; X86-NEXT:    cmovel %ebx, %esi
140; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
141; X86-NEXT:    movl %esi, 12(%eax)
142; X86-NEXT:    movl %ebp, 8(%eax)
143; X86-NEXT:    movl %ecx, 4(%eax)
144; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
145; X86-NEXT:    movl %ecx, (%eax)
146; X86-NEXT:    addl $20, %esp
147; X86-NEXT:    popl %esi
148; X86-NEXT:    popl %edi
149; X86-NEXT:    popl %ebx
150; X86-NEXT:    popl %ebp
151; X86-NEXT:    retl $4
152  %tmp = call <2 x i64> @llvm.sshl.sat.v2i64(<2 x i64> %x, <2 x i64> %y)
153  ret <2 x i64> %tmp
154}
155
156define <4 x i32> @vec_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
157; X64-LABEL: vec_v4i32:
158; X64:       # %bb.0:
159; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
160; X64-NEXT:    pshuflw {{.*#+}} xmm4 = xmm1[2,3,3,3,4,5,6,7]
161; X64-NEXT:    pshuflw {{.*#+}} xmm5 = xmm1[0,1,1,1,4,5,6,7]
162; X64-NEXT:    pslld $23, %xmm1
163; X64-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
164; X64-NEXT:    cvttps2dq %xmm1, %xmm6
165; X64-NEXT:    movdqa %xmm0, %xmm1
166; X64-NEXT:    pmuludq %xmm6, %xmm1
167; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3]
168; X64-NEXT:    pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
169; X64-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
170; X64-NEXT:    pmuludq %xmm7, %xmm6
171; X64-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[0,2,2,3]
172; X64-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1]
173; X64-NEXT:    pshuflw {{.*#+}} xmm6 = xmm3[2,3,3,3,4,5,6,7]
174; X64-NEXT:    movdqa %xmm2, %xmm7
175; X64-NEXT:    psrad %xmm6, %xmm7
176; X64-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[0,1,1,1,4,5,6,7]
177; X64-NEXT:    movdqa %xmm1, %xmm6
178; X64-NEXT:    psrad %xmm3, %xmm6
179; X64-NEXT:    punpckhqdq {{.*#+}} xmm6 = xmm6[1],xmm7[1]
180; X64-NEXT:    movdqa %xmm2, %xmm3
181; X64-NEXT:    psrad %xmm4, %xmm3
182; X64-NEXT:    psrad %xmm5, %xmm1
183; X64-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
184; X64-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,3],xmm6[0,3]
185; X64-NEXT:    pcmpeqd %xmm0, %xmm1
186; X64-NEXT:    pand %xmm1, %xmm2
187; X64-NEXT:    pxor %xmm3, %xmm3
188; X64-NEXT:    pcmpgtd %xmm0, %xmm3
189; X64-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
190; X64-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
191; X64-NEXT:    por %xmm3, %xmm0
192; X64-NEXT:    pandn %xmm0, %xmm1
193; X64-NEXT:    por %xmm2, %xmm1
194; X64-NEXT:    movdqa %xmm1, %xmm0
195; X64-NEXT:    retq
196;
197; X64-AVX2-LABEL: vec_v4i32:
198; X64-AVX2:       # %bb.0:
199; X64-AVX2-NEXT:    vbroadcastss {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
200; X64-AVX2-NEXT:    vbroadcastss {{.*#+}} xmm3 = [2147483647,2147483647,2147483647,2147483647]
201; X64-AVX2-NEXT:    vblendvps %xmm0, %xmm2, %xmm3, %xmm2
202; X64-AVX2-NEXT:    vpsllvd %xmm1, %xmm0, %xmm3
203; X64-AVX2-NEXT:    vpsravd %xmm1, %xmm3, %xmm1
204; X64-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
205; X64-AVX2-NEXT:    vblendvps %xmm0, %xmm3, %xmm2, %xmm0
206; X64-AVX2-NEXT:    retq
207;
208; X86-LABEL: vec_v4i32:
209; X86:       # %bb.0:
210; X86-NEXT:    pushl %ebp
211; X86-NEXT:    pushl %ebx
212; X86-NEXT:    pushl %edi
213; X86-NEXT:    pushl %esi
214; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
215; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
216; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
217; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
218; X86-NEXT:    movl %edi, %edx
219; X86-NEXT:    shll %cl, %edx
220; X86-NEXT:    movl %edx, %ebp
221; X86-NEXT:    sarl %cl, %ebp
222; X86-NEXT:    xorl %ebx, %ebx
223; X86-NEXT:    testl %edi, %edi
224; X86-NEXT:    sets %bl
225; X86-NEXT:    addl $2147483647, %ebx # imm = 0x7FFFFFFF
226; X86-NEXT:    cmpl %ebp, %edi
227; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
228; X86-NEXT:    cmovel %edx, %ebx
229; X86-NEXT:    movl %edi, %ebp
230; X86-NEXT:    movb %ch, %cl
231; X86-NEXT:    shll %cl, %ebp
232; X86-NEXT:    movl %ebp, %eax
233; X86-NEXT:    sarl %cl, %eax
234; X86-NEXT:    xorl %edx, %edx
235; X86-NEXT:    testl %edi, %edi
236; X86-NEXT:    sets %dl
237; X86-NEXT:    addl $2147483647, %edx # imm = 0x7FFFFFFF
238; X86-NEXT:    cmpl %eax, %edi
239; X86-NEXT:    cmovel %ebp, %edx
240; X86-NEXT:    movl %esi, %edi
241; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
242; X86-NEXT:    shll %cl, %edi
243; X86-NEXT:    movl %edi, %ebp
244; X86-NEXT:    sarl %cl, %ebp
245; X86-NEXT:    xorl %eax, %eax
246; X86-NEXT:    testl %esi, %esi
247; X86-NEXT:    sets %al
248; X86-NEXT:    addl $2147483647, %eax # imm = 0x7FFFFFFF
249; X86-NEXT:    cmpl %ebp, %esi
250; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
251; X86-NEXT:    cmovel %edi, %eax
252; X86-NEXT:    movl %esi, %edi
253; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
254; X86-NEXT:    shll %cl, %edi
255; X86-NEXT:    movl %edi, %ebp
256; X86-NEXT:    sarl %cl, %ebp
257; X86-NEXT:    xorl %ecx, %ecx
258; X86-NEXT:    testl %esi, %esi
259; X86-NEXT:    sets %cl
260; X86-NEXT:    addl $2147483647, %ecx # imm = 0x7FFFFFFF
261; X86-NEXT:    cmpl %ebp, %esi
262; X86-NEXT:    cmovel %edi, %ecx
263; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
264; X86-NEXT:    movl %ecx, 12(%esi)
265; X86-NEXT:    movl %eax, 8(%esi)
266; X86-NEXT:    movl %edx, 4(%esi)
267; X86-NEXT:    movl %ebx, (%esi)
268; X86-NEXT:    movl %esi, %eax
269; X86-NEXT:    popl %esi
270; X86-NEXT:    popl %edi
271; X86-NEXT:    popl %ebx
272; X86-NEXT:    popl %ebp
273; X86-NEXT:    retl $4
274  %tmp = call <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
275  ret <4 x i32> %tmp
276}
277
278define <8 x i16> @vec_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
279; X64-LABEL: vec_v8i16:
280; X64:       # %bb.0:
281; X64-NEXT:    movdqa %xmm1, %xmm2
282; X64-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
283; X64-NEXT:    pslld $23, %xmm2
284; X64-NEXT:    movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
285; X64-NEXT:    paddd %xmm3, %xmm2
286; X64-NEXT:    cvttps2dq %xmm2, %xmm2
287; X64-NEXT:    pslld $16, %xmm2
288; X64-NEXT:    psrad $16, %xmm2
289; X64-NEXT:    movdqa %xmm1, %xmm4
290; X64-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0,0,1,1,2,2,3,3]
291; X64-NEXT:    pslld $23, %xmm4
292; X64-NEXT:    paddd %xmm3, %xmm4
293; X64-NEXT:    cvttps2dq %xmm4, %xmm3
294; X64-NEXT:    pslld $16, %xmm3
295; X64-NEXT:    psrad $16, %xmm3
296; X64-NEXT:    packssdw %xmm2, %xmm3
297; X64-NEXT:    pmullw %xmm0, %xmm3
298; X64-NEXT:    psllw $12, %xmm1
299; X64-NEXT:    movdqa %xmm1, %xmm2
300; X64-NEXT:    psraw $15, %xmm2
301; X64-NEXT:    movdqa %xmm3, %xmm4
302; X64-NEXT:    psraw $8, %xmm4
303; X64-NEXT:    pand %xmm2, %xmm4
304; X64-NEXT:    pandn %xmm3, %xmm2
305; X64-NEXT:    por %xmm4, %xmm2
306; X64-NEXT:    paddw %xmm1, %xmm1
307; X64-NEXT:    movdqa %xmm1, %xmm4
308; X64-NEXT:    psraw $15, %xmm4
309; X64-NEXT:    movdqa %xmm4, %xmm5
310; X64-NEXT:    pandn %xmm2, %xmm5
311; X64-NEXT:    psraw $4, %xmm2
312; X64-NEXT:    pand %xmm4, %xmm2
313; X64-NEXT:    por %xmm5, %xmm2
314; X64-NEXT:    paddw %xmm1, %xmm1
315; X64-NEXT:    movdqa %xmm1, %xmm4
316; X64-NEXT:    psraw $15, %xmm4
317; X64-NEXT:    movdqa %xmm4, %xmm5
318; X64-NEXT:    pandn %xmm2, %xmm5
319; X64-NEXT:    psraw $2, %xmm2
320; X64-NEXT:    pand %xmm4, %xmm2
321; X64-NEXT:    por %xmm5, %xmm2
322; X64-NEXT:    paddw %xmm1, %xmm1
323; X64-NEXT:    psraw $15, %xmm1
324; X64-NEXT:    movdqa %xmm1, %xmm4
325; X64-NEXT:    pandn %xmm2, %xmm4
326; X64-NEXT:    psraw $1, %xmm2
327; X64-NEXT:    pand %xmm1, %xmm2
328; X64-NEXT:    por %xmm4, %xmm2
329; X64-NEXT:    pcmpeqw %xmm0, %xmm2
330; X64-NEXT:    pand %xmm2, %xmm3
331; X64-NEXT:    pxor %xmm1, %xmm1
332; X64-NEXT:    pcmpgtw %xmm0, %xmm1
333; X64-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
334; X64-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
335; X64-NEXT:    por %xmm1, %xmm0
336; X64-NEXT:    pandn %xmm0, %xmm2
337; X64-NEXT:    por %xmm3, %xmm2
338; X64-NEXT:    movdqa %xmm2, %xmm0
339; X64-NEXT:    retq
340;
341; X64-AVX2-LABEL: vec_v8i16:
342; X64-AVX2:       # %bb.0:
343; X64-AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
344; X64-AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
345; X64-AVX2-NEXT:    vpsllvd %ymm1, %ymm2, %ymm2
346; X64-AVX2-NEXT:    vpshufb {{.*#+}} ymm2 = ymm2[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
347; X64-AVX2-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
348; X64-AVX2-NEXT:    vpmovsxwd %xmm2, %ymm3
349; X64-AVX2-NEXT:    vpsravd %ymm1, %ymm3, %ymm1
350; X64-AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
351; X64-AVX2-NEXT:    vpackssdw %xmm3, %xmm1, %xmm1
352; X64-AVX2-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm1
353; X64-AVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
354; X64-AVX2-NEXT:    vpcmpgtw %xmm0, %xmm3, %xmm0
355; X64-AVX2-NEXT:    vpbroadcastw {{.*#+}} xmm3 = [32767,32767,32767,32767,32767,32767,32767,32767]
356; X64-AVX2-NEXT:    vpblendvb %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm0
357; X64-AVX2-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
358; X64-AVX2-NEXT:    vzeroupper
359; X64-AVX2-NEXT:    retq
360;
361; X86-LABEL: vec_v8i16:
362; X86:       # %bb.0:
363; X86-NEXT:    pushl %ebp
364; X86-NEXT:    pushl %ebx
365; X86-NEXT:    pushl %edi
366; X86-NEXT:    pushl %esi
367; X86-NEXT:    subl $16, %esp
368; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
369; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
370; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
371; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
372; X86-NEXT:    movl %edi, %ebx
373; X86-NEXT:    shll %cl, %ebx
374; X86-NEXT:    movswl %bx, %ebp
375; X86-NEXT:    sarl %cl, %ebp
376; X86-NEXT:    xorl %ecx, %ecx
377; X86-NEXT:    testw %di, %di
378; X86-NEXT:    sets %cl
379; X86-NEXT:    addl $32767, %ecx # imm = 0x7FFF
380; X86-NEXT:    cmpw %bp, %di
381; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
382; X86-NEXT:    cmovel %ebx, %ecx
383; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
384; X86-NEXT:    movl %esi, %edi
385; X86-NEXT:    movl %eax, %ecx
386; X86-NEXT:    shll %cl, %edi
387; X86-NEXT:    movswl %di, %ebx
388; X86-NEXT:    sarl %cl, %ebx
389; X86-NEXT:    xorl %eax, %eax
390; X86-NEXT:    testw %si, %si
391; X86-NEXT:    sets %al
392; X86-NEXT:    addl $32767, %eax # imm = 0x7FFF
393; X86-NEXT:    cmpw %bx, %si
394; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
395; X86-NEXT:    cmovel %edi, %eax
396; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
397; X86-NEXT:    movl %edx, %esi
398; X86-NEXT:    shll %cl, %esi
399; X86-NEXT:    movswl %si, %edi
400; X86-NEXT:    sarl %cl, %edi
401; X86-NEXT:    xorl %eax, %eax
402; X86-NEXT:    testw %dx, %dx
403; X86-NEXT:    sets %al
404; X86-NEXT:    addl $32767, %eax # imm = 0x7FFF
405; X86-NEXT:    cmpw %di, %dx
406; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
407; X86-NEXT:    cmovel %esi, %eax
408; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
409; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
410; X86-NEXT:    movl %eax, %edx
411; X86-NEXT:    shll %cl, %edx
412; X86-NEXT:    movswl %dx, %esi
413; X86-NEXT:    sarl %cl, %esi
414; X86-NEXT:    xorl %ebx, %ebx
415; X86-NEXT:    testw %ax, %ax
416; X86-NEXT:    sets %bl
417; X86-NEXT:    addl $32767, %ebx # imm = 0x7FFF
418; X86-NEXT:    cmpw %si, %ax
419; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
420; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
421; X86-NEXT:    cmovel %edx, %ebx
422; X86-NEXT:    movl %ebx, (%esp) # 4-byte Spill
423; X86-NEXT:    movl %eax, %edx
424; X86-NEXT:    shll %cl, %edx
425; X86-NEXT:    movswl %dx, %esi
426; X86-NEXT:    sarl %cl, %esi
427; X86-NEXT:    xorl %ecx, %ecx
428; X86-NEXT:    testw %ax, %ax
429; X86-NEXT:    sets %cl
430; X86-NEXT:    addl $32767, %ecx # imm = 0x7FFF
431; X86-NEXT:    cmpw %si, %ax
432; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
433; X86-NEXT:    cmovel %edx, %ecx
434; X86-NEXT:    movl %ecx, %ebp
435; X86-NEXT:    movl %eax, %edx
436; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
437; X86-NEXT:    shll %cl, %edx
438; X86-NEXT:    movswl %dx, %esi
439; X86-NEXT:    sarl %cl, %esi
440; X86-NEXT:    xorl %ebx, %ebx
441; X86-NEXT:    testw %ax, %ax
442; X86-NEXT:    sets %bl
443; X86-NEXT:    addl $32767, %ebx # imm = 0x7FFF
444; X86-NEXT:    cmpw %si, %ax
445; X86-NEXT:    cmovel %edx, %ebx
446; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
447; X86-NEXT:    movl %eax, %esi
448; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
449; X86-NEXT:    shll %cl, %esi
450; X86-NEXT:    movswl %si, %edi
451; X86-NEXT:    sarl %cl, %edi
452; X86-NEXT:    xorl %edx, %edx
453; X86-NEXT:    testw %ax, %ax
454; X86-NEXT:    sets %dl
455; X86-NEXT:    addl $32767, %edx # imm = 0x7FFF
456; X86-NEXT:    cmpw %di, %ax
457; X86-NEXT:    cmovel %esi, %edx
458; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
459; X86-NEXT:    movl %eax, %esi
460; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
461; X86-NEXT:    shll %cl, %esi
462; X86-NEXT:    movswl %si, %edi
463; X86-NEXT:    sarl %cl, %edi
464; X86-NEXT:    xorl %ecx, %ecx
465; X86-NEXT:    testw %ax, %ax
466; X86-NEXT:    sets %cl
467; X86-NEXT:    addl $32767, %ecx # imm = 0x7FFF
468; X86-NEXT:    cmpw %di, %ax
469; X86-NEXT:    cmovel %esi, %ecx
470; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
471; X86-NEXT:    movw %cx, 14(%eax)
472; X86-NEXT:    movw %dx, 12(%eax)
473; X86-NEXT:    movw %bx, 10(%eax)
474; X86-NEXT:    movw %bp, 8(%eax)
475; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
476; X86-NEXT:    movw %cx, 6(%eax)
477; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
478; X86-NEXT:    movw %cx, 4(%eax)
479; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
480; X86-NEXT:    movw %cx, 2(%eax)
481; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
482; X86-NEXT:    movw %cx, (%eax)
483; X86-NEXT:    addl $16, %esp
484; X86-NEXT:    popl %esi
485; X86-NEXT:    popl %edi
486; X86-NEXT:    popl %ebx
487; X86-NEXT:    popl %ebp
488; X86-NEXT:    retl $4
489  %tmp = call <8 x i16> @llvm.sshl.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
490  ret <8 x i16> %tmp
491}
492
493define <16 x i8> @vec_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
494; X64-LABEL: vec_v16i8:
495; X64:       # %bb.0:
496; X64-NEXT:    psllw $5, %xmm1
497; X64-NEXT:    pxor %xmm3, %xmm3
498; X64-NEXT:    pxor %xmm4, %xmm4
499; X64-NEXT:    pcmpgtb %xmm1, %xmm4
500; X64-NEXT:    movdqa %xmm0, %xmm2
501; X64-NEXT:    psllw $4, %xmm2
502; X64-NEXT:    pand %xmm4, %xmm2
503; X64-NEXT:    pandn %xmm0, %xmm4
504; X64-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
505; X64-NEXT:    por %xmm4, %xmm2
506; X64-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm1[8],xmm4[9],xmm1[9],xmm4[10],xmm1[10],xmm4[11],xmm1[11],xmm4[12],xmm1[12],xmm4[13],xmm1[13],xmm4[14],xmm1[14],xmm4[15],xmm1[15]
507; X64-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3],xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7]
508; X64-NEXT:    paddb %xmm1, %xmm1
509; X64-NEXT:    pxor %xmm6, %xmm6
510; X64-NEXT:    pcmpgtb %xmm1, %xmm6
511; X64-NEXT:    movdqa %xmm6, %xmm7
512; X64-NEXT:    pandn %xmm2, %xmm7
513; X64-NEXT:    psllw $2, %xmm2
514; X64-NEXT:    pand %xmm6, %xmm2
515; X64-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
516; X64-NEXT:    por %xmm7, %xmm2
517; X64-NEXT:    paddb %xmm1, %xmm1
518; X64-NEXT:    pxor %xmm6, %xmm6
519; X64-NEXT:    pcmpgtb %xmm1, %xmm6
520; X64-NEXT:    movdqa %xmm6, %xmm1
521; X64-NEXT:    pandn %xmm2, %xmm1
522; X64-NEXT:    paddb %xmm2, %xmm2
523; X64-NEXT:    pand %xmm6, %xmm2
524; X64-NEXT:    por %xmm1, %xmm2
525; X64-NEXT:    punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm2[8],xmm6[9],xmm2[9],xmm6[10],xmm2[10],xmm6[11],xmm2[11],xmm6[12],xmm2[12],xmm6[13],xmm2[13],xmm6[14],xmm2[14],xmm6[15],xmm2[15]
526; X64-NEXT:    pxor %xmm1, %xmm1
527; X64-NEXT:    pcmpgtw %xmm4, %xmm1
528; X64-NEXT:    movdqa %xmm1, %xmm7
529; X64-NEXT:    pandn %xmm6, %xmm7
530; X64-NEXT:    psraw $4, %xmm6
531; X64-NEXT:    pand %xmm1, %xmm6
532; X64-NEXT:    por %xmm7, %xmm6
533; X64-NEXT:    paddw %xmm4, %xmm4
534; X64-NEXT:    pxor %xmm1, %xmm1
535; X64-NEXT:    pcmpgtw %xmm4, %xmm1
536; X64-NEXT:    movdqa %xmm1, %xmm7
537; X64-NEXT:    pandn %xmm6, %xmm7
538; X64-NEXT:    psraw $2, %xmm6
539; X64-NEXT:    pand %xmm1, %xmm6
540; X64-NEXT:    por %xmm7, %xmm6
541; X64-NEXT:    paddw %xmm4, %xmm4
542; X64-NEXT:    pxor %xmm1, %xmm1
543; X64-NEXT:    pcmpgtw %xmm4, %xmm1
544; X64-NEXT:    movdqa %xmm1, %xmm4
545; X64-NEXT:    pandn %xmm6, %xmm4
546; X64-NEXT:    psraw $1, %xmm6
547; X64-NEXT:    pand %xmm1, %xmm6
548; X64-NEXT:    por %xmm4, %xmm6
549; X64-NEXT:    psrlw $8, %xmm6
550; X64-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
551; X64-NEXT:    pxor %xmm4, %xmm4
552; X64-NEXT:    pcmpgtw %xmm5, %xmm4
553; X64-NEXT:    movdqa %xmm4, %xmm7
554; X64-NEXT:    pandn %xmm1, %xmm7
555; X64-NEXT:    psraw $4, %xmm1
556; X64-NEXT:    pand %xmm4, %xmm1
557; X64-NEXT:    por %xmm7, %xmm1
558; X64-NEXT:    paddw %xmm5, %xmm5
559; X64-NEXT:    pxor %xmm4, %xmm4
560; X64-NEXT:    pcmpgtw %xmm5, %xmm4
561; X64-NEXT:    movdqa %xmm4, %xmm7
562; X64-NEXT:    pandn %xmm1, %xmm7
563; X64-NEXT:    psraw $2, %xmm1
564; X64-NEXT:    pand %xmm4, %xmm1
565; X64-NEXT:    por %xmm7, %xmm1
566; X64-NEXT:    paddw %xmm5, %xmm5
567; X64-NEXT:    pxor %xmm4, %xmm4
568; X64-NEXT:    pcmpgtw %xmm5, %xmm4
569; X64-NEXT:    movdqa %xmm4, %xmm5
570; X64-NEXT:    pandn %xmm1, %xmm5
571; X64-NEXT:    psraw $1, %xmm1
572; X64-NEXT:    pand %xmm4, %xmm1
573; X64-NEXT:    por %xmm5, %xmm1
574; X64-NEXT:    psrlw $8, %xmm1
575; X64-NEXT:    packuswb %xmm6, %xmm1
576; X64-NEXT:    pcmpeqb %xmm0, %xmm1
577; X64-NEXT:    pand %xmm1, %xmm2
578; X64-NEXT:    pcmpgtb %xmm0, %xmm3
579; X64-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
580; X64-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
581; X64-NEXT:    por %xmm3, %xmm0
582; X64-NEXT:    pandn %xmm0, %xmm1
583; X64-NEXT:    por %xmm2, %xmm1
584; X64-NEXT:    movdqa %xmm1, %xmm0
585; X64-NEXT:    retq
586;
587; X64-AVX2-LABEL: vec_v16i8:
588; X64-AVX2:       # %bb.0:
589; X64-AVX2-NEXT:    vpsllw $5, %xmm1, %xmm1
590; X64-AVX2-NEXT:    vpsllw $4, %xmm0, %xmm2
591; X64-AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
592; X64-AVX2-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm2
593; X64-AVX2-NEXT:    vpsllw $2, %xmm2, %xmm3
594; X64-AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
595; X64-AVX2-NEXT:    vpaddb %xmm1, %xmm1, %xmm4
596; X64-AVX2-NEXT:    vpblendvb %xmm4, %xmm3, %xmm2, %xmm2
597; X64-AVX2-NEXT:    vpaddb %xmm2, %xmm2, %xmm3
598; X64-AVX2-NEXT:    vpaddb %xmm4, %xmm4, %xmm4
599; X64-AVX2-NEXT:    vpblendvb %xmm4, %xmm3, %xmm2, %xmm2
600; X64-AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
601; X64-AVX2-NEXT:    vpsraw $4, %xmm3, %xmm4
602; X64-AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
603; X64-AVX2-NEXT:    vpblendvb %xmm5, %xmm4, %xmm3, %xmm3
604; X64-AVX2-NEXT:    vpsraw $2, %xmm3, %xmm4
605; X64-AVX2-NEXT:    vpaddw %xmm5, %xmm5, %xmm5
606; X64-AVX2-NEXT:    vpblendvb %xmm5, %xmm4, %xmm3, %xmm3
607; X64-AVX2-NEXT:    vpsraw $1, %xmm3, %xmm4
608; X64-AVX2-NEXT:    vpaddw %xmm5, %xmm5, %xmm5
609; X64-AVX2-NEXT:    vpblendvb %xmm5, %xmm4, %xmm3, %xmm3
610; X64-AVX2-NEXT:    vpsrlw $8, %xmm3, %xmm3
611; X64-AVX2-NEXT:    vpunpcklbw {{.*#+}} xmm4 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
612; X64-AVX2-NEXT:    vpsraw $4, %xmm4, %xmm5
613; X64-AVX2-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
614; X64-AVX2-NEXT:    vpblendvb %xmm1, %xmm5, %xmm4, %xmm4
615; X64-AVX2-NEXT:    vpsraw $2, %xmm4, %xmm5
616; X64-AVX2-NEXT:    vpaddw %xmm1, %xmm1, %xmm1
617; X64-AVX2-NEXT:    vpblendvb %xmm1, %xmm5, %xmm4, %xmm4
618; X64-AVX2-NEXT:    vpsraw $1, %xmm4, %xmm5
619; X64-AVX2-NEXT:    vpaddw %xmm1, %xmm1, %xmm1
620; X64-AVX2-NEXT:    vpblendvb %xmm1, %xmm5, %xmm4, %xmm1
621; X64-AVX2-NEXT:    vpsrlw $8, %xmm1, %xmm1
622; X64-AVX2-NEXT:    vpackuswb %xmm3, %xmm1, %xmm1
623; X64-AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm1
624; X64-AVX2-NEXT:    vpbroadcastb {{.*#+}} xmm3 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
625; X64-AVX2-NEXT:    vpblendvb %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm0
626; X64-AVX2-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
627; X64-AVX2-NEXT:    retq
628;
629; X86-LABEL: vec_v16i8:
630; X86:       # %bb.0:
631; X86-NEXT:    pushl %ebp
632; X86-NEXT:    pushl %ebx
633; X86-NEXT:    pushl %edi
634; X86-NEXT:    pushl %esi
635; X86-NEXT:    subl $44, %esp
636; X86-NEXT:    movb {{[0-9]+}}(%esp), %dh
637; X86-NEXT:    movb {{[0-9]+}}(%esp), %dl
638; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
639; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
640; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
641; X86-NEXT:    movb %ch, %bh
642; X86-NEXT:    shlb %cl, %bh
643; X86-NEXT:    movzbl %bh, %esi
644; X86-NEXT:    sarb %cl, %bh
645; X86-NEXT:    xorl %eax, %eax
646; X86-NEXT:    testb %ch, %ch
647; X86-NEXT:    sets %al
648; X86-NEXT:    addl $127, %eax
649; X86-NEXT:    cmpb %bh, %ch
650; X86-NEXT:    cmovel %esi, %eax
651; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
652; X86-NEXT:    movl %ebx, %eax
653; X86-NEXT:    movl %edx, %ecx
654; X86-NEXT:    shlb %cl, %al
655; X86-NEXT:    movzbl %al, %esi
656; X86-NEXT:    sarb %cl, %al
657; X86-NEXT:    xorl %ecx, %ecx
658; X86-NEXT:    testb %bl, %bl
659; X86-NEXT:    sets %cl
660; X86-NEXT:    addl $127, %ecx
661; X86-NEXT:    cmpb %al, %bl
662; X86-NEXT:    cmovel %esi, %ecx
663; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
664; X86-NEXT:    movb %dh, %al
665; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
666; X86-NEXT:    shlb %cl, %al
667; X86-NEXT:    movzbl %al, %esi
668; X86-NEXT:    sarb %cl, %al
669; X86-NEXT:    xorl %ecx, %ecx
670; X86-NEXT:    testb %dh, %dh
671; X86-NEXT:    sets %cl
672; X86-NEXT:    addl $127, %ecx
673; X86-NEXT:    cmpb %al, %dh
674; X86-NEXT:    cmovel %esi, %ecx
675; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
676; X86-NEXT:    movb {{[0-9]+}}(%esp), %ah
677; X86-NEXT:    movb %ah, %al
678; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
679; X86-NEXT:    shlb %cl, %al
680; X86-NEXT:    movzbl %al, %esi
681; X86-NEXT:    sarb %cl, %al
682; X86-NEXT:    xorl %edx, %edx
683; X86-NEXT:    testb %ah, %ah
684; X86-NEXT:    sets %dl
685; X86-NEXT:    addl $127, %edx
686; X86-NEXT:    cmpb %al, %ah
687; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
688; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
689; X86-NEXT:    cmovel %esi, %edx
690; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
691; X86-NEXT:    movl %eax, %edx
692; X86-NEXT:    shlb %cl, %dl
693; X86-NEXT:    movzbl %dl, %esi
694; X86-NEXT:    sarb %cl, %dl
695; X86-NEXT:    xorl %ecx, %ecx
696; X86-NEXT:    testb %al, %al
697; X86-NEXT:    sets %cl
698; X86-NEXT:    addl $127, %ecx
699; X86-NEXT:    cmpb %dl, %al
700; X86-NEXT:    cmovel %esi, %ecx
701; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
702; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
703; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
704; X86-NEXT:    movl %eax, %edx
705; X86-NEXT:    shlb %cl, %dl
706; X86-NEXT:    movzbl %dl, %esi
707; X86-NEXT:    sarb %cl, %dl
708; X86-NEXT:    xorl %ecx, %ecx
709; X86-NEXT:    testb %al, %al
710; X86-NEXT:    sets %cl
711; X86-NEXT:    addl $127, %ecx
712; X86-NEXT:    cmpb %dl, %al
713; X86-NEXT:    cmovel %esi, %ecx
714; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
715; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
716; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
717; X86-NEXT:    movl %eax, %edx
718; X86-NEXT:    shlb %cl, %dl
719; X86-NEXT:    movzbl %dl, %esi
720; X86-NEXT:    sarb %cl, %dl
721; X86-NEXT:    xorl %ecx, %ecx
722; X86-NEXT:    testb %al, %al
723; X86-NEXT:    sets %cl
724; X86-NEXT:    addl $127, %ecx
725; X86-NEXT:    cmpb %dl, %al
726; X86-NEXT:    cmovel %esi, %ecx
727; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
728; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
729; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
730; X86-NEXT:    movl %eax, %edx
731; X86-NEXT:    shlb %cl, %dl
732; X86-NEXT:    movzbl %dl, %esi
733; X86-NEXT:    sarb %cl, %dl
734; X86-NEXT:    xorl %ecx, %ecx
735; X86-NEXT:    testb %al, %al
736; X86-NEXT:    sets %cl
737; X86-NEXT:    addl $127, %ecx
738; X86-NEXT:    cmpb %dl, %al
739; X86-NEXT:    cmovel %esi, %ecx
740; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
741; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
742; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
743; X86-NEXT:    movl %eax, %edx
744; X86-NEXT:    shlb %cl, %dl
745; X86-NEXT:    movzbl %dl, %esi
746; X86-NEXT:    sarb %cl, %dl
747; X86-NEXT:    xorl %ecx, %ecx
748; X86-NEXT:    testb %al, %al
749; X86-NEXT:    sets %cl
750; X86-NEXT:    addl $127, %ecx
751; X86-NEXT:    cmpb %dl, %al
752; X86-NEXT:    cmovel %esi, %ecx
753; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
754; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
755; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
756; X86-NEXT:    movl %eax, %edx
757; X86-NEXT:    shlb %cl, %dl
758; X86-NEXT:    movzbl %dl, %esi
759; X86-NEXT:    sarb %cl, %dl
760; X86-NEXT:    xorl %ecx, %ecx
761; X86-NEXT:    testb %al, %al
762; X86-NEXT:    sets %cl
763; X86-NEXT:    addl $127, %ecx
764; X86-NEXT:    cmpb %dl, %al
765; X86-NEXT:    cmovel %esi, %ecx
766; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
767; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
768; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
769; X86-NEXT:    movl %eax, %edx
770; X86-NEXT:    shlb %cl, %dl
771; X86-NEXT:    movzbl %dl, %esi
772; X86-NEXT:    sarb %cl, %dl
773; X86-NEXT:    xorl %ecx, %ecx
774; X86-NEXT:    testb %al, %al
775; X86-NEXT:    sets %cl
776; X86-NEXT:    addl $127, %ecx
777; X86-NEXT:    cmpb %dl, %al
778; X86-NEXT:    cmovel %esi, %ecx
779; X86-NEXT:    movl %ecx, (%esp) # 4-byte Spill
780; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
781; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
782; X86-NEXT:    movl %eax, %edx
783; X86-NEXT:    shlb %cl, %dl
784; X86-NEXT:    movzbl %dl, %esi
785; X86-NEXT:    sarb %cl, %dl
786; X86-NEXT:    xorl %ecx, %ecx
787; X86-NEXT:    testb %al, %al
788; X86-NEXT:    sets %cl
789; X86-NEXT:    addl $127, %ecx
790; X86-NEXT:    cmpb %dl, %al
791; X86-NEXT:    cmovel %esi, %ecx
792; X86-NEXT:    movl %ecx, %ebp
793; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
794; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
795; X86-NEXT:    movl %eax, %edx
796; X86-NEXT:    shlb %cl, %dl
797; X86-NEXT:    movzbl %dl, %esi
798; X86-NEXT:    sarb %cl, %dl
799; X86-NEXT:    xorl %ecx, %ecx
800; X86-NEXT:    testb %al, %al
801; X86-NEXT:    sets %cl
802; X86-NEXT:    addl $127, %ecx
803; X86-NEXT:    cmpb %dl, %al
804; X86-NEXT:    cmovel %esi, %ecx
805; X86-NEXT:    movl %ecx, %edi
806; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
807; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
808; X86-NEXT:    movl %eax, %edx
809; X86-NEXT:    shlb %cl, %dl
810; X86-NEXT:    movzbl %dl, %esi
811; X86-NEXT:    sarb %cl, %dl
812; X86-NEXT:    xorl %ebx, %ebx
813; X86-NEXT:    testb %al, %al
814; X86-NEXT:    sets %bl
815; X86-NEXT:    addl $127, %ebx
816; X86-NEXT:    cmpb %dl, %al
817; X86-NEXT:    cmovel %esi, %ebx
818; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
819; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
820; X86-NEXT:    movb %al, %ah
821; X86-NEXT:    shlb %cl, %ah
822; X86-NEXT:    movzbl %ah, %esi
823; X86-NEXT:    sarb %cl, %ah
824; X86-NEXT:    xorl %edx, %edx
825; X86-NEXT:    testb %al, %al
826; X86-NEXT:    sets %dl
827; X86-NEXT:    addl $127, %edx
828; X86-NEXT:    cmpb %ah, %al
829; X86-NEXT:    cmovel %esi, %edx
830; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
831; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
832; X86-NEXT:    movb %al, %ah
833; X86-NEXT:    shlb %cl, %ah
834; X86-NEXT:    movzbl %ah, %esi
835; X86-NEXT:    sarb %cl, %ah
836; X86-NEXT:    xorl %ecx, %ecx
837; X86-NEXT:    testb %al, %al
838; X86-NEXT:    sets %cl
839; X86-NEXT:    addl $127, %ecx
840; X86-NEXT:    cmpb %ah, %al
841; X86-NEXT:    cmovel %esi, %ecx
842; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
843; X86-NEXT:    movb %cl, 15(%eax)
844; X86-NEXT:    movb %dl, 14(%eax)
845; X86-NEXT:    movb %bl, 13(%eax)
846; X86-NEXT:    movl %edi, %ecx
847; X86-NEXT:    movb %cl, 12(%eax)
848; X86-NEXT:    movl %ebp, %ecx
849; X86-NEXT:    movb %cl, 11(%eax)
850; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
851; X86-NEXT:    movb %cl, 10(%eax)
852; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
853; X86-NEXT:    movb %cl, 9(%eax)
854; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
855; X86-NEXT:    movb %cl, 8(%eax)
856; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
857; X86-NEXT:    movb %cl, 7(%eax)
858; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
859; X86-NEXT:    movb %cl, 6(%eax)
860; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
861; X86-NEXT:    movb %cl, 5(%eax)
862; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
863; X86-NEXT:    movb %cl, 4(%eax)
864; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
865; X86-NEXT:    movb %cl, 3(%eax)
866; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
867; X86-NEXT:    movb %cl, 2(%eax)
868; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
869; X86-NEXT:    movb %cl, 1(%eax)
870; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
871; X86-NEXT:    movb %cl, (%eax)
872; X86-NEXT:    addl $44, %esp
873; X86-NEXT:    popl %esi
874; X86-NEXT:    popl %edi
875; X86-NEXT:    popl %ebx
876; X86-NEXT:    popl %ebp
877; X86-NEXT:    retl $4
878  %tmp = call <16 x i8> @llvm.sshl.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
879  ret <16 x i8> %tmp
880}
881