xref: /llvm-project/llvm/test/CodeGen/X86/vector-trunc-ssat.ll (revision f0b3b6d15b2c0ee2cff2dd31dc075adb5d9a4ff7)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2-SSSE3,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSE2-SSSE3,SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST,AVX2-FAST-ALL
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST,AVX2-FAST-PERLANE
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL
11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL
12; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW
13; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW
14; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
15; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
16; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefixes=SKX
17
18;
19; Signed saturation truncation to vXi32
20;
21
22define <2 x i32> @trunc_ssat_v2i64_v2i32(<2 x i64> %a0) {
23; SSE2-SSSE3-LABEL: trunc_ssat_v2i64_v2i32:
24; SSE2-SSSE3:       # %bb.0:
25; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
26; SSE2-SSSE3-NEXT:    movdqa %xmm0, %xmm2
27; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm2
28; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
29; SSE2-SSSE3-NEXT:    pxor %xmm4, %xmm4
30; SSE2-SSSE3-NEXT:    pcmpeqd %xmm3, %xmm4
31; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [4294967295,4294967295]
32; SSE2-SSSE3-NEXT:    pcmpgtd %xmm2, %xmm3
33; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
34; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm2
35; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
36; SSE2-SSSE3-NEXT:    por %xmm2, %xmm3
37; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm0
38; SSE2-SSSE3-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
39; SSE2-SSSE3-NEXT:    por %xmm0, %xmm3
40; SSE2-SSSE3-NEXT:    pxor %xmm3, %xmm1
41; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
42; SSE2-SSSE3-NEXT:    pcmpeqd %xmm2, %xmm2
43; SSE2-SSSE3-NEXT:    pcmpeqd %xmm0, %xmm2
44; SSE2-SSSE3-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
45; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2]
46; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm0
47; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
48; SSE2-SSSE3-NEXT:    por %xmm0, %xmm1
49; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm3
50; SSE2-SSSE3-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
51; SSE2-SSSE3-NEXT:    por %xmm3, %xmm1
52; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
53; SSE2-SSSE3-NEXT:    retq
54;
55; SSE41-LABEL: trunc_ssat_v2i64_v2i32:
56; SSE41:       # %bb.0:
57; SSE41-NEXT:    movdqa %xmm0, %xmm1
58; SSE41-NEXT:    movapd {{.*#+}} xmm2 = [2147483647,2147483647]
59; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648]
60; SSE41-NEXT:    pxor %xmm3, %xmm0
61; SSE41-NEXT:    pmovsxbd {{.*#+}} xmm4 = [4294967295,0,4294967295,0]
62; SSE41-NEXT:    movdqa %xmm0, %xmm5
63; SSE41-NEXT:    pcmpeqd %xmm4, %xmm5
64; SSE41-NEXT:    pcmpgtd %xmm0, %xmm4
65; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2]
66; SSE41-NEXT:    pand %xmm5, %xmm0
67; SSE41-NEXT:    por %xmm4, %xmm0
68; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
69; SSE41-NEXT:    movapd {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968]
70; SSE41-NEXT:    pxor %xmm2, %xmm3
71; SSE41-NEXT:    pmovsxbd {{.*#+}} xmm0 = [0,4294967295,0,4294967295]
72; SSE41-NEXT:    movdqa %xmm3, %xmm4
73; SSE41-NEXT:    pcmpeqd %xmm0, %xmm4
74; SSE41-NEXT:    pcmpgtd %xmm0, %xmm3
75; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
76; SSE41-NEXT:    pand %xmm4, %xmm0
77; SSE41-NEXT:    por %xmm3, %xmm0
78; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
79; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
80; SSE41-NEXT:    retq
81;
82; AVX1-LABEL: trunc_ssat_v2i64_v2i32:
83; AVX1:       # %bb.0:
84; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = [2147483647,2147483647]
85; AVX1-NEXT:    # xmm1 = mem[0,0]
86; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
87; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
88; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968]
89; AVX1-NEXT:    # xmm1 = mem[0,0]
90; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
91; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
92; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
93; AVX1-NEXT:    retq
94;
95; AVX2-LABEL: trunc_ssat_v2i64_v2i32:
96; AVX2:       # %bb.0:
97; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [2147483647,2147483647]
98; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
99; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
100; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968]
101; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
102; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
103; AVX2-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
104; AVX2-NEXT:    retq
105;
106; AVX512F-LABEL: trunc_ssat_v2i64_v2i32:
107; AVX512F:       # %bb.0:
108; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
109; AVX512F-NEXT:    vpmovsqd %zmm0, %ymm0
110; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
111; AVX512F-NEXT:    vzeroupper
112; AVX512F-NEXT:    retq
113;
114; AVX512VL-LABEL: trunc_ssat_v2i64_v2i32:
115; AVX512VL:       # %bb.0:
116; AVX512VL-NEXT:    vpmovsqd %xmm0, %xmm0
117; AVX512VL-NEXT:    retq
118;
119; AVX512BW-LABEL: trunc_ssat_v2i64_v2i32:
120; AVX512BW:       # %bb.0:
121; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
122; AVX512BW-NEXT:    vpmovsqd %zmm0, %ymm0
123; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
124; AVX512BW-NEXT:    vzeroupper
125; AVX512BW-NEXT:    retq
126;
127; AVX512BWVL-LABEL: trunc_ssat_v2i64_v2i32:
128; AVX512BWVL:       # %bb.0:
129; AVX512BWVL-NEXT:    vpmovsqd %xmm0, %xmm0
130; AVX512BWVL-NEXT:    retq
131;
132; SKX-LABEL: trunc_ssat_v2i64_v2i32:
133; SKX:       # %bb.0:
134; SKX-NEXT:    vpmovsqd %xmm0, %xmm0
135; SKX-NEXT:    retq
136  %1 = icmp slt <2 x i64> %a0, <i64 2147483647, i64 2147483647>
137  %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 2147483647, i64 2147483647>
138  %3 = icmp sgt <2 x i64> %2, <i64 -2147483648, i64 -2147483648>
139  %4 = select <2 x i1> %3, <2 x i64> %2, <2 x i64> <i64 -2147483648, i64 -2147483648>
140  %5 = trunc <2 x i64> %4 to <2 x i32>
141  ret <2 x i32> %5
142}
143
144define void @trunc_ssat_v2i64_v2i32_store(<2 x i64> %a0, ptr %p1) {
145; SSE2-SSSE3-LABEL: trunc_ssat_v2i64_v2i32_store:
146; SSE2-SSSE3:       # %bb.0:
147; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
148; SSE2-SSSE3-NEXT:    movdqa %xmm0, %xmm2
149; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm2
150; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
151; SSE2-SSSE3-NEXT:    pxor %xmm4, %xmm4
152; SSE2-SSSE3-NEXT:    pcmpeqd %xmm3, %xmm4
153; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [4294967295,4294967295]
154; SSE2-SSSE3-NEXT:    pcmpgtd %xmm2, %xmm3
155; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
156; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm2
157; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
158; SSE2-SSSE3-NEXT:    por %xmm2, %xmm3
159; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm0
160; SSE2-SSSE3-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
161; SSE2-SSSE3-NEXT:    por %xmm0, %xmm3
162; SSE2-SSSE3-NEXT:    pxor %xmm3, %xmm1
163; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
164; SSE2-SSSE3-NEXT:    pcmpeqd %xmm2, %xmm2
165; SSE2-SSSE3-NEXT:    pcmpeqd %xmm0, %xmm2
166; SSE2-SSSE3-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
167; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2]
168; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm0
169; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
170; SSE2-SSSE3-NEXT:    por %xmm0, %xmm1
171; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm3
172; SSE2-SSSE3-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
173; SSE2-SSSE3-NEXT:    por %xmm3, %xmm1
174; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
175; SSE2-SSSE3-NEXT:    movq %xmm0, (%rdi)
176; SSE2-SSSE3-NEXT:    retq
177;
178; SSE41-LABEL: trunc_ssat_v2i64_v2i32_store:
179; SSE41:       # %bb.0:
180; SSE41-NEXT:    movdqa %xmm0, %xmm1
181; SSE41-NEXT:    movapd {{.*#+}} xmm2 = [2147483647,2147483647]
182; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648]
183; SSE41-NEXT:    pxor %xmm3, %xmm0
184; SSE41-NEXT:    pmovsxbd {{.*#+}} xmm4 = [4294967295,0,4294967295,0]
185; SSE41-NEXT:    movdqa %xmm0, %xmm5
186; SSE41-NEXT:    pcmpeqd %xmm4, %xmm5
187; SSE41-NEXT:    pcmpgtd %xmm0, %xmm4
188; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2]
189; SSE41-NEXT:    pand %xmm5, %xmm0
190; SSE41-NEXT:    por %xmm4, %xmm0
191; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
192; SSE41-NEXT:    movapd {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968]
193; SSE41-NEXT:    pxor %xmm2, %xmm3
194; SSE41-NEXT:    pmovsxbd {{.*#+}} xmm0 = [0,4294967295,0,4294967295]
195; SSE41-NEXT:    movdqa %xmm3, %xmm4
196; SSE41-NEXT:    pcmpeqd %xmm0, %xmm4
197; SSE41-NEXT:    pcmpgtd %xmm0, %xmm3
198; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
199; SSE41-NEXT:    pand %xmm4, %xmm0
200; SSE41-NEXT:    por %xmm3, %xmm0
201; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
202; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
203; SSE41-NEXT:    movq %xmm0, (%rdi)
204; SSE41-NEXT:    retq
205;
206; AVX1-LABEL: trunc_ssat_v2i64_v2i32_store:
207; AVX1:       # %bb.0:
208; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = [2147483647,2147483647]
209; AVX1-NEXT:    # xmm1 = mem[0,0]
210; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
211; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
212; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968]
213; AVX1-NEXT:    # xmm1 = mem[0,0]
214; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
215; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
216; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
217; AVX1-NEXT:    vmovlpd %xmm0, (%rdi)
218; AVX1-NEXT:    retq
219;
220; AVX2-LABEL: trunc_ssat_v2i64_v2i32_store:
221; AVX2:       # %bb.0:
222; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [2147483647,2147483647]
223; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
224; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
225; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968]
226; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
227; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
228; AVX2-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
229; AVX2-NEXT:    vmovlpd %xmm0, (%rdi)
230; AVX2-NEXT:    retq
231;
232; AVX512F-LABEL: trunc_ssat_v2i64_v2i32_store:
233; AVX512F:       # %bb.0:
234; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
235; AVX512F-NEXT:    vpmovsqd %zmm0, %ymm0
236; AVX512F-NEXT:    vmovq %xmm0, (%rdi)
237; AVX512F-NEXT:    vzeroupper
238; AVX512F-NEXT:    retq
239;
240; AVX512VL-LABEL: trunc_ssat_v2i64_v2i32_store:
241; AVX512VL:       # %bb.0:
242; AVX512VL-NEXT:    vpmovsqd %xmm0, (%rdi)
243; AVX512VL-NEXT:    retq
244;
245; AVX512BW-LABEL: trunc_ssat_v2i64_v2i32_store:
246; AVX512BW:       # %bb.0:
247; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
248; AVX512BW-NEXT:    vpmovsqd %zmm0, %ymm0
249; AVX512BW-NEXT:    vmovq %xmm0, (%rdi)
250; AVX512BW-NEXT:    vzeroupper
251; AVX512BW-NEXT:    retq
252;
253; AVX512BWVL-LABEL: trunc_ssat_v2i64_v2i32_store:
254; AVX512BWVL:       # %bb.0:
255; AVX512BWVL-NEXT:    vpmovsqd %xmm0, (%rdi)
256; AVX512BWVL-NEXT:    retq
257;
258; SKX-LABEL: trunc_ssat_v2i64_v2i32_store:
259; SKX:       # %bb.0:
260; SKX-NEXT:    vpmovsqd %xmm0, (%rdi)
261; SKX-NEXT:    retq
262  %1 = icmp slt <2 x i64> %a0, <i64 2147483647, i64 2147483647>
263  %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 2147483647, i64 2147483647>
264  %3 = icmp sgt <2 x i64> %2, <i64 -2147483648, i64 -2147483648>
265  %4 = select <2 x i1> %3, <2 x i64> %2, <2 x i64> <i64 -2147483648, i64 -2147483648>
266  %5 = trunc <2 x i64> %4 to <2 x i32>
267  store <2 x i32> %5, ptr %p1
268  ret void
269}
270
271define <4 x i32> @trunc_ssat_v4i64_v4i32(<4 x i64> %a0) {
272; SSE2-SSSE3-LABEL: trunc_ssat_v4i64_v4i32:
273; SSE2-SSSE3:       # %bb.0:
274; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [2147483647,2147483647]
275; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
276; SSE2-SSSE3-NEXT:    movdqa %xmm0, %xmm4
277; SSE2-SSSE3-NEXT:    pxor %xmm2, %xmm4
278; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3]
279; SSE2-SSSE3-NEXT:    pxor %xmm6, %xmm6
280; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm5
281; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm7 = [4294967295,4294967295]
282; SSE2-SSSE3-NEXT:    movdqa %xmm7, %xmm8
283; SSE2-SSSE3-NEXT:    pcmpgtd %xmm4, %xmm8
284; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm8[0,0,2,2]
285; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm4
286; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm8[1,1,3,3]
287; SSE2-SSSE3-NEXT:    por %xmm4, %xmm5
288; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm0
289; SSE2-SSSE3-NEXT:    pandn %xmm3, %xmm5
290; SSE2-SSSE3-NEXT:    por %xmm5, %xmm0
291; SSE2-SSSE3-NEXT:    movdqa %xmm1, %xmm4
292; SSE2-SSSE3-NEXT:    pxor %xmm2, %xmm4
293; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3]
294; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm5
295; SSE2-SSSE3-NEXT:    pcmpgtd %xmm4, %xmm7
296; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
297; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm4
298; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
299; SSE2-SSSE3-NEXT:    por %xmm4, %xmm5
300; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm1
301; SSE2-SSSE3-NEXT:    pandn %xmm3, %xmm5
302; SSE2-SSSE3-NEXT:    por %xmm1, %xmm5
303; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968]
304; SSE2-SSSE3-NEXT:    movdqa %xmm5, %xmm3
305; SSE2-SSSE3-NEXT:    pxor %xmm2, %xmm3
306; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
307; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm6
308; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm4
309; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm7 = [18446744069414584320,18446744069414584320]
310; SSE2-SSSE3-NEXT:    pcmpgtd %xmm7, %xmm3
311; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm8 = xmm3[0,0,2,2]
312; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm8
313; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
314; SSE2-SSSE3-NEXT:    por %xmm8, %xmm3
315; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm5
316; SSE2-SSSE3-NEXT:    pandn %xmm1, %xmm3
317; SSE2-SSSE3-NEXT:    por %xmm5, %xmm3
318; SSE2-SSSE3-NEXT:    pxor %xmm0, %xmm2
319; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
320; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm4
321; SSE2-SSSE3-NEXT:    pcmpgtd %xmm7, %xmm2
322; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm2[0,0,2,2]
323; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm5
324; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
325; SSE2-SSSE3-NEXT:    por %xmm5, %xmm2
326; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm0
327; SSE2-SSSE3-NEXT:    pandn %xmm1, %xmm2
328; SSE2-SSSE3-NEXT:    por %xmm2, %xmm0
329; SSE2-SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
330; SSE2-SSSE3-NEXT:    retq
331;
332; SSE41-LABEL: trunc_ssat_v4i64_v4i32:
333; SSE41:       # %bb.0:
334; SSE41-NEXT:    movdqa %xmm0, %xmm2
335; SSE41-NEXT:    movapd {{.*#+}} xmm4 = [2147483647,2147483647]
336; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648]
337; SSE41-NEXT:    movdqa %xmm0, %xmm5
338; SSE41-NEXT:    pxor %xmm3, %xmm5
339; SSE41-NEXT:    pmovsxbd {{.*#+}} xmm6 = [4294967295,0,4294967295,0]
340; SSE41-NEXT:    movdqa %xmm6, %xmm7
341; SSE41-NEXT:    pcmpgtd %xmm5, %xmm7
342; SSE41-NEXT:    pcmpeqd %xmm6, %xmm5
343; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2]
344; SSE41-NEXT:    pand %xmm5, %xmm0
345; SSE41-NEXT:    por %xmm7, %xmm0
346; SSE41-NEXT:    movapd %xmm4, %xmm5
347; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm5
348; SSE41-NEXT:    movdqa %xmm1, %xmm0
349; SSE41-NEXT:    pxor %xmm3, %xmm0
350; SSE41-NEXT:    movdqa %xmm0, %xmm2
351; SSE41-NEXT:    pcmpeqd %xmm6, %xmm2
352; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
353; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
354; SSE41-NEXT:    pand %xmm2, %xmm0
355; SSE41-NEXT:    por %xmm6, %xmm0
356; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm4
357; SSE41-NEXT:    movapd {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968]
358; SSE41-NEXT:    movapd %xmm4, %xmm2
359; SSE41-NEXT:    xorpd %xmm3, %xmm2
360; SSE41-NEXT:    pmovsxbd {{.*#+}} xmm6 = [0,4294967295,0,4294967295]
361; SSE41-NEXT:    movapd %xmm2, %xmm7
362; SSE41-NEXT:    pcmpeqd %xmm6, %xmm7
363; SSE41-NEXT:    pcmpgtd %xmm6, %xmm2
364; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
365; SSE41-NEXT:    pand %xmm7, %xmm0
366; SSE41-NEXT:    por %xmm2, %xmm0
367; SSE41-NEXT:    movapd %xmm1, %xmm2
368; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
369; SSE41-NEXT:    xorpd %xmm5, %xmm3
370; SSE41-NEXT:    movapd %xmm3, %xmm4
371; SSE41-NEXT:    pcmpeqd %xmm6, %xmm4
372; SSE41-NEXT:    pcmpgtd %xmm6, %xmm3
373; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
374; SSE41-NEXT:    pand %xmm4, %xmm0
375; SSE41-NEXT:    por %xmm3, %xmm0
376; SSE41-NEXT:    blendvpd %xmm0, %xmm5, %xmm1
377; SSE41-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
378; SSE41-NEXT:    movaps %xmm1, %xmm0
379; SSE41-NEXT:    retq
380;
381; AVX1-LABEL: trunc_ssat_v4i64_v4i32:
382; AVX1:       # %bb.0:
383; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = [2147483647,2147483647]
384; AVX1-NEXT:    # xmm1 = mem[0,0]
385; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
386; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm2
387; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
388; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm3
389; AVX1-NEXT:    vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
390; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968]
391; AVX1-NEXT:    # xmm1 = mem[0,0]
392; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm3
393; AVX1-NEXT:    vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
394; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm2, %xmm3
395; AVX1-NEXT:    vblendvpd %xmm3, %xmm2, %xmm1, %xmm1
396; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[0,2]
397; AVX1-NEXT:    vzeroupper
398; AVX1-NEXT:    retq
399;
400; AVX2-SLOW-LABEL: trunc_ssat_v4i64_v4i32:
401; AVX2-SLOW:       # %bb.0:
402; AVX2-SLOW-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [2147483647,2147483647,2147483647,2147483647]
403; AVX2-SLOW-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
404; AVX2-SLOW-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
405; AVX2-SLOW-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968]
406; AVX2-SLOW-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
407; AVX2-SLOW-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
408; AVX2-SLOW-NEXT:    vextractf128 $1, %ymm0, %xmm1
409; AVX2-SLOW-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
410; AVX2-SLOW-NEXT:    vzeroupper
411; AVX2-SLOW-NEXT:    retq
412;
413; AVX2-FAST-ALL-LABEL: trunc_ssat_v4i64_v4i32:
414; AVX2-FAST-ALL:       # %bb.0:
415; AVX2-FAST-ALL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [2147483647,2147483647,2147483647,2147483647]
416; AVX2-FAST-ALL-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
417; AVX2-FAST-ALL-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
418; AVX2-FAST-ALL-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968]
419; AVX2-FAST-ALL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
420; AVX2-FAST-ALL-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
421; AVX2-FAST-ALL-NEXT:    vbroadcastf128 {{.*#+}} ymm1 = [0,2,4,6,0,2,4,6]
422; AVX2-FAST-ALL-NEXT:    # ymm1 = mem[0,1,0,1]
423; AVX2-FAST-ALL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
424; AVX2-FAST-ALL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
425; AVX2-FAST-ALL-NEXT:    vzeroupper
426; AVX2-FAST-ALL-NEXT:    retq
427;
428; AVX2-FAST-PERLANE-LABEL: trunc_ssat_v4i64_v4i32:
429; AVX2-FAST-PERLANE:       # %bb.0:
430; AVX2-FAST-PERLANE-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [2147483647,2147483647,2147483647,2147483647]
431; AVX2-FAST-PERLANE-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
432; AVX2-FAST-PERLANE-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
433; AVX2-FAST-PERLANE-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968]
434; AVX2-FAST-PERLANE-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
435; AVX2-FAST-PERLANE-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
436; AVX2-FAST-PERLANE-NEXT:    vextractf128 $1, %ymm0, %xmm1
437; AVX2-FAST-PERLANE-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
438; AVX2-FAST-PERLANE-NEXT:    vzeroupper
439; AVX2-FAST-PERLANE-NEXT:    retq
440;
441; AVX512F-LABEL: trunc_ssat_v4i64_v4i32:
442; AVX512F:       # %bb.0:
443; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
444; AVX512F-NEXT:    vpmovsqd %zmm0, %ymm0
445; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
446; AVX512F-NEXT:    vzeroupper
447; AVX512F-NEXT:    retq
448;
449; AVX512VL-LABEL: trunc_ssat_v4i64_v4i32:
450; AVX512VL:       # %bb.0:
451; AVX512VL-NEXT:    vpmovsqd %ymm0, %xmm0
452; AVX512VL-NEXT:    vzeroupper
453; AVX512VL-NEXT:    retq
454;
455; AVX512BW-LABEL: trunc_ssat_v4i64_v4i32:
456; AVX512BW:       # %bb.0:
457; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
458; AVX512BW-NEXT:    vpmovsqd %zmm0, %ymm0
459; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
460; AVX512BW-NEXT:    vzeroupper
461; AVX512BW-NEXT:    retq
462;
463; AVX512BWVL-LABEL: trunc_ssat_v4i64_v4i32:
464; AVX512BWVL:       # %bb.0:
465; AVX512BWVL-NEXT:    vpmovsqd %ymm0, %xmm0
466; AVX512BWVL-NEXT:    vzeroupper
467; AVX512BWVL-NEXT:    retq
468;
469; SKX-LABEL: trunc_ssat_v4i64_v4i32:
470; SKX:       # %bb.0:
471; SKX-NEXT:    vpmovsqd %ymm0, %xmm0
472; SKX-NEXT:    vzeroupper
473; SKX-NEXT:    retq
474  %1 = icmp slt <4 x i64> %a0, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
475  %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
476  %3 = icmp sgt <4 x i64> %2, <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
477  %4 = select <4 x i1> %3, <4 x i64> %2, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
478  %5 = trunc <4 x i64> %4 to <4 x i32>
479  ret <4 x i32> %5
480}
481
482
483define <8 x i32> @trunc_ssat_v8i64_v8i32(ptr %p0) "min-legal-vector-width"="256" {
484; SSE2-SSSE3-LABEL: trunc_ssat_v8i64_v8i32:
485; SSE2-SSSE3:       # %bb.0:
486; SSE2-SSSE3-NEXT:    movdqa (%rdi), %xmm3
487; SSE2-SSSE3-NEXT:    movdqa 16(%rdi), %xmm5
488; SSE2-SSSE3-NEXT:    movdqa 32(%rdi), %xmm7
489; SSE2-SSSE3-NEXT:    movdqa 48(%rdi), %xmm1
490; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [2147483647,2147483647]
491; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [2147483648,2147483648]
492; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm2
493; SSE2-SSSE3-NEXT:    pxor %xmm0, %xmm2
494; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm9 = xmm2[1,1,3,3]
495; SSE2-SSSE3-NEXT:    pxor %xmm8, %xmm8
496; SSE2-SSSE3-NEXT:    pcmpeqd %xmm8, %xmm9
497; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm6 = [4294967295,4294967295]
498; SSE2-SSSE3-NEXT:    movdqa %xmm6, %xmm10
499; SSE2-SSSE3-NEXT:    pcmpgtd %xmm2, %xmm10
500; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2]
501; SSE2-SSSE3-NEXT:    pand %xmm9, %xmm11
502; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm10[1,1,3,3]
503; SSE2-SSSE3-NEXT:    por %xmm11, %xmm2
504; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm3
505; SSE2-SSSE3-NEXT:    pandn %xmm4, %xmm2
506; SSE2-SSSE3-NEXT:    por %xmm3, %xmm2
507; SSE2-SSSE3-NEXT:    movdqa %xmm5, %xmm3
508; SSE2-SSSE3-NEXT:    pxor %xmm0, %xmm3
509; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm9 = xmm3[1,1,3,3]
510; SSE2-SSSE3-NEXT:    pcmpeqd %xmm8, %xmm9
511; SSE2-SSSE3-NEXT:    movdqa %xmm6, %xmm10
512; SSE2-SSSE3-NEXT:    pcmpgtd %xmm3, %xmm10
513; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2]
514; SSE2-SSSE3-NEXT:    pand %xmm9, %xmm11
515; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm10[1,1,3,3]
516; SSE2-SSSE3-NEXT:    por %xmm11, %xmm3
517; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm5
518; SSE2-SSSE3-NEXT:    pandn %xmm4, %xmm3
519; SSE2-SSSE3-NEXT:    por %xmm5, %xmm3
520; SSE2-SSSE3-NEXT:    movdqa %xmm7, %xmm5
521; SSE2-SSSE3-NEXT:    pxor %xmm0, %xmm5
522; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm9 = xmm5[1,1,3,3]
523; SSE2-SSSE3-NEXT:    pcmpeqd %xmm8, %xmm9
524; SSE2-SSSE3-NEXT:    movdqa %xmm6, %xmm10
525; SSE2-SSSE3-NEXT:    pcmpgtd %xmm5, %xmm10
526; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2]
527; SSE2-SSSE3-NEXT:    pand %xmm9, %xmm11
528; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm10[1,1,3,3]
529; SSE2-SSSE3-NEXT:    por %xmm11, %xmm5
530; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm7
531; SSE2-SSSE3-NEXT:    pandn %xmm4, %xmm5
532; SSE2-SSSE3-NEXT:    por %xmm7, %xmm5
533; SSE2-SSSE3-NEXT:    movdqa %xmm1, %xmm7
534; SSE2-SSSE3-NEXT:    pxor %xmm0, %xmm7
535; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm9 = xmm7[1,1,3,3]
536; SSE2-SSSE3-NEXT:    pcmpeqd %xmm8, %xmm9
537; SSE2-SSSE3-NEXT:    pcmpgtd %xmm7, %xmm6
538; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
539; SSE2-SSSE3-NEXT:    pand %xmm9, %xmm7
540; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3]
541; SSE2-SSSE3-NEXT:    por %xmm7, %xmm8
542; SSE2-SSSE3-NEXT:    pand %xmm8, %xmm1
543; SSE2-SSSE3-NEXT:    pandn %xmm4, %xmm8
544; SSE2-SSSE3-NEXT:    por %xmm1, %xmm8
545; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [18446744071562067968,18446744071562067968]
546; SSE2-SSSE3-NEXT:    movdqa %xmm8, %xmm1
547; SSE2-SSSE3-NEXT:    pxor %xmm0, %xmm1
548; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm9 = xmm1[1,1,3,3]
549; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm6
550; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm9
551; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm7 = [18446744069414584320,18446744069414584320]
552; SSE2-SSSE3-NEXT:    pcmpgtd %xmm7, %xmm1
553; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm1[0,0,2,2]
554; SSE2-SSSE3-NEXT:    pand %xmm9, %xmm10
555; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm9 = xmm1[1,1,3,3]
556; SSE2-SSSE3-NEXT:    por %xmm10, %xmm9
557; SSE2-SSSE3-NEXT:    pand %xmm9, %xmm8
558; SSE2-SSSE3-NEXT:    pandn %xmm4, %xmm9
559; SSE2-SSSE3-NEXT:    por %xmm8, %xmm9
560; SSE2-SSSE3-NEXT:    movdqa %xmm5, %xmm1
561; SSE2-SSSE3-NEXT:    pxor %xmm0, %xmm1
562; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm8 = xmm1[1,1,3,3]
563; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm8
564; SSE2-SSSE3-NEXT:    pcmpgtd %xmm7, %xmm1
565; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm1[0,0,2,2]
566; SSE2-SSSE3-NEXT:    pand %xmm8, %xmm10
567; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
568; SSE2-SSSE3-NEXT:    por %xmm10, %xmm1
569; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm5
570; SSE2-SSSE3-NEXT:    pandn %xmm4, %xmm1
571; SSE2-SSSE3-NEXT:    por %xmm5, %xmm1
572; SSE2-SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm9[0,2]
573; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm5
574; SSE2-SSSE3-NEXT:    pxor %xmm0, %xmm5
575; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm8 = xmm5[1,1,3,3]
576; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm8
577; SSE2-SSSE3-NEXT:    pcmpgtd %xmm7, %xmm5
578; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm9 = xmm5[0,0,2,2]
579; SSE2-SSSE3-NEXT:    pand %xmm8, %xmm9
580; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
581; SSE2-SSSE3-NEXT:    por %xmm9, %xmm5
582; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm3
583; SSE2-SSSE3-NEXT:    pandn %xmm4, %xmm5
584; SSE2-SSSE3-NEXT:    por %xmm3, %xmm5
585; SSE2-SSSE3-NEXT:    pxor %xmm2, %xmm0
586; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
587; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm3
588; SSE2-SSSE3-NEXT:    pcmpgtd %xmm7, %xmm0
589; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm0[0,0,2,2]
590; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm6
591; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
592; SSE2-SSSE3-NEXT:    por %xmm6, %xmm0
593; SSE2-SSSE3-NEXT:    pand %xmm0, %xmm2
594; SSE2-SSSE3-NEXT:    pandn %xmm4, %xmm0
595; SSE2-SSSE3-NEXT:    por %xmm2, %xmm0
596; SSE2-SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm5[0,2]
597; SSE2-SSSE3-NEXT:    retq
598;
599; SSE41-LABEL: trunc_ssat_v8i64_v8i32:
600; SSE41:       # %bb.0:
601; SSE41-NEXT:    movdqa (%rdi), %xmm5
602; SSE41-NEXT:    movdqa 16(%rdi), %xmm8
603; SSE41-NEXT:    movdqa 32(%rdi), %xmm7
604; SSE41-NEXT:    movdqa 48(%rdi), %xmm2
605; SSE41-NEXT:    movapd {{.*#+}} xmm1 = [2147483647,2147483647]
606; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648]
607; SSE41-NEXT:    movdqa %xmm5, %xmm4
608; SSE41-NEXT:    pxor %xmm3, %xmm4
609; SSE41-NEXT:    pmovsxbd {{.*#+}} xmm6 = [4294967295,0,4294967295,0]
610; SSE41-NEXT:    movdqa %xmm6, %xmm9
611; SSE41-NEXT:    pcmpgtd %xmm4, %xmm9
612; SSE41-NEXT:    pcmpeqd %xmm6, %xmm4
613; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2]
614; SSE41-NEXT:    pand %xmm4, %xmm0
615; SSE41-NEXT:    por %xmm9, %xmm0
616; SSE41-NEXT:    movapd %xmm1, %xmm4
617; SSE41-NEXT:    blendvpd %xmm0, %xmm5, %xmm4
618; SSE41-NEXT:    movdqa %xmm8, %xmm5
619; SSE41-NEXT:    pxor %xmm3, %xmm5
620; SSE41-NEXT:    movdqa %xmm6, %xmm9
621; SSE41-NEXT:    pcmpgtd %xmm5, %xmm9
622; SSE41-NEXT:    pcmpeqd %xmm6, %xmm5
623; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2]
624; SSE41-NEXT:    pand %xmm5, %xmm0
625; SSE41-NEXT:    por %xmm9, %xmm0
626; SSE41-NEXT:    movapd %xmm1, %xmm5
627; SSE41-NEXT:    blendvpd %xmm0, %xmm8, %xmm5
628; SSE41-NEXT:    movdqa %xmm7, %xmm8
629; SSE41-NEXT:    pxor %xmm3, %xmm8
630; SSE41-NEXT:    movdqa %xmm6, %xmm9
631; SSE41-NEXT:    pcmpgtd %xmm8, %xmm9
632; SSE41-NEXT:    pcmpeqd %xmm6, %xmm8
633; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2]
634; SSE41-NEXT:    pand %xmm8, %xmm0
635; SSE41-NEXT:    por %xmm9, %xmm0
636; SSE41-NEXT:    movapd %xmm1, %xmm8
637; SSE41-NEXT:    blendvpd %xmm0, %xmm7, %xmm8
638; SSE41-NEXT:    movdqa %xmm2, %xmm0
639; SSE41-NEXT:    pxor %xmm3, %xmm0
640; SSE41-NEXT:    movdqa %xmm0, %xmm7
641; SSE41-NEXT:    pcmpeqd %xmm6, %xmm7
642; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
643; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
644; SSE41-NEXT:    pand %xmm7, %xmm0
645; SSE41-NEXT:    por %xmm6, %xmm0
646; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
647; SSE41-NEXT:    movapd {{.*#+}} xmm2 = [18446744071562067968,18446744071562067968]
648; SSE41-NEXT:    movapd %xmm1, %xmm7
649; SSE41-NEXT:    xorpd %xmm3, %xmm7
650; SSE41-NEXT:    pmovsxbd {{.*#+}} xmm6 = [0,4294967295,0,4294967295]
651; SSE41-NEXT:    movapd %xmm7, %xmm9
652; SSE41-NEXT:    pcmpeqd %xmm6, %xmm9
653; SSE41-NEXT:    pcmpgtd %xmm6, %xmm7
654; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2]
655; SSE41-NEXT:    pand %xmm9, %xmm0
656; SSE41-NEXT:    por %xmm7, %xmm0
657; SSE41-NEXT:    movapd %xmm2, %xmm7
658; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm7
659; SSE41-NEXT:    movapd %xmm8, %xmm1
660; SSE41-NEXT:    xorpd %xmm3, %xmm1
661; SSE41-NEXT:    movapd %xmm1, %xmm9
662; SSE41-NEXT:    pcmpeqd %xmm6, %xmm9
663; SSE41-NEXT:    pcmpgtd %xmm6, %xmm1
664; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2]
665; SSE41-NEXT:    pand %xmm9, %xmm0
666; SSE41-NEXT:    por %xmm1, %xmm0
667; SSE41-NEXT:    movapd %xmm2, %xmm1
668; SSE41-NEXT:    blendvpd %xmm0, %xmm8, %xmm1
669; SSE41-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm7[0,2]
670; SSE41-NEXT:    movapd %xmm5, %xmm7
671; SSE41-NEXT:    xorpd %xmm3, %xmm7
672; SSE41-NEXT:    movapd %xmm7, %xmm8
673; SSE41-NEXT:    pcmpeqd %xmm6, %xmm8
674; SSE41-NEXT:    pcmpgtd %xmm6, %xmm7
675; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2]
676; SSE41-NEXT:    pand %xmm8, %xmm0
677; SSE41-NEXT:    por %xmm7, %xmm0
678; SSE41-NEXT:    movapd %xmm2, %xmm7
679; SSE41-NEXT:    blendvpd %xmm0, %xmm5, %xmm7
680; SSE41-NEXT:    xorpd %xmm4, %xmm3
681; SSE41-NEXT:    movapd %xmm3, %xmm5
682; SSE41-NEXT:    pcmpeqd %xmm6, %xmm5
683; SSE41-NEXT:    pcmpgtd %xmm6, %xmm3
684; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
685; SSE41-NEXT:    pand %xmm5, %xmm0
686; SSE41-NEXT:    por %xmm3, %xmm0
687; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
688; SSE41-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm7[0,2]
689; SSE41-NEXT:    movaps %xmm2, %xmm0
690; SSE41-NEXT:    retq
691;
692; AVX1-LABEL: trunc_ssat_v8i64_v8i32:
693; AVX1:       # %bb.0:
694; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
695; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm1
696; AVX1-NEXT:    vmovdqa 32(%rdi), %xmm2
697; AVX1-NEXT:    vmovdqa 48(%rdi), %xmm3
698; AVX1-NEXT:    vmovddup {{.*#+}} xmm4 = [2147483647,2147483647]
699; AVX1-NEXT:    # xmm4 = mem[0,0]
700; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm4, %xmm5
701; AVX1-NEXT:    vblendvpd %xmm5, %xmm1, %xmm4, %xmm1
702; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm5
703; AVX1-NEXT:    vblendvpd %xmm5, %xmm3, %xmm4, %xmm3
704; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm4, %xmm5
705; AVX1-NEXT:    vblendvpd %xmm5, %xmm0, %xmm4, %xmm0
706; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm5
707; AVX1-NEXT:    vblendvpd %xmm5, %xmm2, %xmm4, %xmm2
708; AVX1-NEXT:    vmovddup {{.*#+}} xmm4 = [18446744071562067968,18446744071562067968]
709; AVX1-NEXT:    # xmm4 = mem[0,0]
710; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm2, %xmm5
711; AVX1-NEXT:    vblendvpd %xmm5, %xmm2, %xmm4, %xmm2
712; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm0, %xmm5
713; AVX1-NEXT:    vblendvpd %xmm5, %xmm0, %xmm4, %xmm0
714; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm5
715; AVX1-NEXT:    vblendvpd %xmm5, %xmm3, %xmm4, %xmm3
716; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm1, %xmm5
717; AVX1-NEXT:    vblendvpd %xmm5, %xmm1, %xmm4, %xmm1
718; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
719; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
720; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
721; AVX1-NEXT:    retq
722;
723; AVX2-SLOW-LABEL: trunc_ssat_v8i64_v8i32:
724; AVX2-SLOW:       # %bb.0:
725; AVX2-SLOW-NEXT:    vmovdqa (%rdi), %ymm0
726; AVX2-SLOW-NEXT:    vmovdqa 32(%rdi), %ymm1
727; AVX2-SLOW-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [2147483647,2147483647,2147483647,2147483647]
728; AVX2-SLOW-NEXT:    vpcmpgtq %ymm0, %ymm2, %ymm3
729; AVX2-SLOW-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
730; AVX2-SLOW-NEXT:    vpcmpgtq %ymm1, %ymm2, %ymm3
731; AVX2-SLOW-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
732; AVX2-SLOW-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968]
733; AVX2-SLOW-NEXT:    vpcmpgtq %ymm2, %ymm1, %ymm3
734; AVX2-SLOW-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
735; AVX2-SLOW-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm3
736; AVX2-SLOW-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
737; AVX2-SLOW-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
738; AVX2-SLOW-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
739; AVX2-SLOW-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
740; AVX2-SLOW-NEXT:    retq
741;
742; AVX2-FAST-ALL-LABEL: trunc_ssat_v8i64_v8i32:
743; AVX2-FAST-ALL:       # %bb.0:
744; AVX2-FAST-ALL-NEXT:    vmovdqa (%rdi), %ymm0
745; AVX2-FAST-ALL-NEXT:    vmovdqa 32(%rdi), %ymm1
746; AVX2-FAST-ALL-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [2147483647,2147483647,2147483647,2147483647]
747; AVX2-FAST-ALL-NEXT:    vpcmpgtq %ymm0, %ymm2, %ymm3
748; AVX2-FAST-ALL-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
749; AVX2-FAST-ALL-NEXT:    vpcmpgtq %ymm1, %ymm2, %ymm3
750; AVX2-FAST-ALL-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
751; AVX2-FAST-ALL-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968]
752; AVX2-FAST-ALL-NEXT:    vpcmpgtq %ymm2, %ymm1, %ymm3
753; AVX2-FAST-ALL-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
754; AVX2-FAST-ALL-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm3
755; AVX2-FAST-ALL-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
756; AVX2-FAST-ALL-NEXT:    vmovapd {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
757; AVX2-FAST-ALL-NEXT:    vpermps %ymm0, %ymm2, %ymm0
758; AVX2-FAST-ALL-NEXT:    vpermps %ymm1, %ymm2, %ymm1
759; AVX2-FAST-ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
760; AVX2-FAST-ALL-NEXT:    retq
761;
762; AVX2-FAST-PERLANE-LABEL: trunc_ssat_v8i64_v8i32:
763; AVX2-FAST-PERLANE:       # %bb.0:
764; AVX2-FAST-PERLANE-NEXT:    vmovdqa (%rdi), %ymm0
765; AVX2-FAST-PERLANE-NEXT:    vmovdqa 32(%rdi), %ymm1
766; AVX2-FAST-PERLANE-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [2147483647,2147483647,2147483647,2147483647]
767; AVX2-FAST-PERLANE-NEXT:    vpcmpgtq %ymm0, %ymm2, %ymm3
768; AVX2-FAST-PERLANE-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
769; AVX2-FAST-PERLANE-NEXT:    vpcmpgtq %ymm1, %ymm2, %ymm3
770; AVX2-FAST-PERLANE-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
771; AVX2-FAST-PERLANE-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968]
772; AVX2-FAST-PERLANE-NEXT:    vpcmpgtq %ymm2, %ymm1, %ymm3
773; AVX2-FAST-PERLANE-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
774; AVX2-FAST-PERLANE-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm3
775; AVX2-FAST-PERLANE-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
776; AVX2-FAST-PERLANE-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
777; AVX2-FAST-PERLANE-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
778; AVX2-FAST-PERLANE-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
779; AVX2-FAST-PERLANE-NEXT:    retq
780;
781; AVX512-LABEL: trunc_ssat_v8i64_v8i32:
782; AVX512:       # %bb.0:
783; AVX512-NEXT:    vmovdqa64 (%rdi), %zmm0
784; AVX512-NEXT:    vpmovsqd %zmm0, %ymm0
785; AVX512-NEXT:    retq
786;
787; SKX-LABEL: trunc_ssat_v8i64_v8i32:
788; SKX:       # %bb.0:
789; SKX-NEXT:    vmovdqa (%rdi), %ymm0
790; SKX-NEXT:    vmovdqa 32(%rdi), %ymm1
791; SKX-NEXT:    vpmovsqd %ymm0, %xmm0
792; SKX-NEXT:    vpmovsqd %ymm1, %xmm1
793; SKX-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
794; SKX-NEXT:    retq
795  %a0 = load <8 x i64>, ptr %p0
796  %1 = icmp slt <8 x i64> %a0, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
797  %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
798  %3 = icmp sgt <8 x i64> %2, <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
799  %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
800  %5 = trunc <8 x i64> %4 to <8 x i32>
801  ret <8 x i32> %5
802}
803
804;
805; Signed saturation truncation to vXi16
806;
807
808define <2 x i16> @trunc_ssat_v2i64_v2i16(<2 x i64> %a0) {
809; SSE2-SSSE3-LABEL: trunc_ssat_v2i64_v2i16:
810; SSE2-SSSE3:       # %bb.0:
811; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
812; SSE2-SSSE3-NEXT:    movdqa %xmm0, %xmm2
813; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm2
814; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
815; SSE2-SSSE3-NEXT:    pxor %xmm4, %xmm4
816; SSE2-SSSE3-NEXT:    pcmpeqd %xmm3, %xmm4
817; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [2147516415,2147516415]
818; SSE2-SSSE3-NEXT:    pcmpgtd %xmm2, %xmm3
819; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
820; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm2
821; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
822; SSE2-SSSE3-NEXT:    por %xmm2, %xmm3
823; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm0
824; SSE2-SSSE3-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
825; SSE2-SSSE3-NEXT:    por %xmm0, %xmm3
826; SSE2-SSSE3-NEXT:    pxor %xmm3, %xmm1
827; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
828; SSE2-SSSE3-NEXT:    pcmpeqd %xmm2, %xmm2
829; SSE2-SSSE3-NEXT:    pcmpeqd %xmm0, %xmm2
830; SSE2-SSSE3-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
831; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2]
832; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm0
833; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
834; SSE2-SSSE3-NEXT:    por %xmm0, %xmm1
835; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm3
836; SSE2-SSSE3-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
837; SSE2-SSSE3-NEXT:    por %xmm3, %xmm1
838; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
839; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
840; SSE2-SSSE3-NEXT:    retq
841;
842; SSE41-LABEL: trunc_ssat_v2i64_v2i16:
843; SSE41:       # %bb.0:
844; SSE41-NEXT:    movdqa %xmm0, %xmm1
845; SSE41-NEXT:    movapd {{.*#+}} xmm2 = [32767,32767]
846; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648]
847; SSE41-NEXT:    pxor %xmm3, %xmm0
848; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm4 = [2147516415,2147516415]
849; SSE41-NEXT:    movdqa %xmm0, %xmm5
850; SSE41-NEXT:    pcmpeqd %xmm4, %xmm5
851; SSE41-NEXT:    pcmpgtd %xmm0, %xmm4
852; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2]
853; SSE41-NEXT:    pand %xmm5, %xmm0
854; SSE41-NEXT:    por %xmm4, %xmm0
855; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
856; SSE41-NEXT:    movapd {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
857; SSE41-NEXT:    pxor %xmm2, %xmm3
858; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [18446744071562035200,18446744071562035200]
859; SSE41-NEXT:    movdqa %xmm3, %xmm4
860; SSE41-NEXT:    pcmpeqd %xmm0, %xmm4
861; SSE41-NEXT:    pcmpgtd %xmm0, %xmm3
862; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
863; SSE41-NEXT:    pand %xmm4, %xmm0
864; SSE41-NEXT:    por %xmm3, %xmm0
865; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
866; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
867; SSE41-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
868; SSE41-NEXT:    retq
869;
870; AVX1-LABEL: trunc_ssat_v2i64_v2i16:
871; AVX1:       # %bb.0:
872; AVX1-NEXT:    vpmovsxwq {{.*#+}} xmm1 = [32767,32767]
873; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
874; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
875; AVX1-NEXT:    vpmovsxwq {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
876; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
877; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
878; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
879; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
880; AVX1-NEXT:    retq
881;
882; AVX2-SLOW-LABEL: trunc_ssat_v2i64_v2i16:
883; AVX2-SLOW:       # %bb.0:
884; AVX2-SLOW-NEXT:    vpmovsxwq {{.*#+}} xmm1 = [32767,32767]
885; AVX2-SLOW-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
886; AVX2-SLOW-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
887; AVX2-SLOW-NEXT:    vpmovsxwq {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
888; AVX2-SLOW-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
889; AVX2-SLOW-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
890; AVX2-SLOW-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
891; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
892; AVX2-SLOW-NEXT:    retq
893;
894; AVX2-FAST-LABEL: trunc_ssat_v2i64_v2i16:
895; AVX2-FAST:       # %bb.0:
896; AVX2-FAST-NEXT:    vpmovsxwq {{.*#+}} xmm1 = [32767,32767]
897; AVX2-FAST-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
898; AVX2-FAST-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
899; AVX2-FAST-NEXT:    vpmovsxwq {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
900; AVX2-FAST-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
901; AVX2-FAST-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
902; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
903; AVX2-FAST-NEXT:    retq
904;
905; AVX512F-LABEL: trunc_ssat_v2i64_v2i16:
906; AVX512F:       # %bb.0:
907; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
908; AVX512F-NEXT:    vpmovsqw %zmm0, %xmm0
909; AVX512F-NEXT:    vzeroupper
910; AVX512F-NEXT:    retq
911;
912; AVX512VL-LABEL: trunc_ssat_v2i64_v2i16:
913; AVX512VL:       # %bb.0:
914; AVX512VL-NEXT:    vpmovsqw %xmm0, %xmm0
915; AVX512VL-NEXT:    retq
916;
917; AVX512BW-LABEL: trunc_ssat_v2i64_v2i16:
918; AVX512BW:       # %bb.0:
919; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
920; AVX512BW-NEXT:    vpmovsqw %zmm0, %xmm0
921; AVX512BW-NEXT:    vzeroupper
922; AVX512BW-NEXT:    retq
923;
924; AVX512BWVL-LABEL: trunc_ssat_v2i64_v2i16:
925; AVX512BWVL:       # %bb.0:
926; AVX512BWVL-NEXT:    vpmovsqw %xmm0, %xmm0
927; AVX512BWVL-NEXT:    retq
928;
929; SKX-LABEL: trunc_ssat_v2i64_v2i16:
930; SKX:       # %bb.0:
931; SKX-NEXT:    vpmovsqw %xmm0, %xmm0
932; SKX-NEXT:    retq
933  %1 = icmp slt <2 x i64> %a0, <i64 32767, i64 32767>
934  %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 32767, i64 32767>
935  %3 = icmp sgt <2 x i64> %2, <i64 -32768, i64 -32768>
936  %4 = select <2 x i1> %3, <2 x i64> %2, <2 x i64> <i64 -32768, i64 -32768>
937  %5 = trunc <2 x i64> %4 to <2 x i16>
938  ret <2 x i16> %5
939}
940
941define void @trunc_ssat_v2i64_v2i16_store(<2 x i64> %a0, ptr%p1) {
942; SSE2-SSSE3-LABEL: trunc_ssat_v2i64_v2i16_store:
943; SSE2-SSSE3:       # %bb.0:
944; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
945; SSE2-SSSE3-NEXT:    movdqa %xmm0, %xmm2
946; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm2
947; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
948; SSE2-SSSE3-NEXT:    pxor %xmm4, %xmm4
949; SSE2-SSSE3-NEXT:    pcmpeqd %xmm3, %xmm4
950; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [2147516415,2147516415]
951; SSE2-SSSE3-NEXT:    pcmpgtd %xmm2, %xmm3
952; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
953; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm2
954; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
955; SSE2-SSSE3-NEXT:    por %xmm2, %xmm3
956; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm0
957; SSE2-SSSE3-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
958; SSE2-SSSE3-NEXT:    por %xmm0, %xmm3
959; SSE2-SSSE3-NEXT:    pxor %xmm3, %xmm1
960; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
961; SSE2-SSSE3-NEXT:    pcmpeqd %xmm2, %xmm2
962; SSE2-SSSE3-NEXT:    pcmpeqd %xmm0, %xmm2
963; SSE2-SSSE3-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
964; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2]
965; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm0
966; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
967; SSE2-SSSE3-NEXT:    por %xmm0, %xmm1
968; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm3
969; SSE2-SSSE3-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
970; SSE2-SSSE3-NEXT:    por %xmm3, %xmm1
971; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
972; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
973; SSE2-SSSE3-NEXT:    movd %xmm0, (%rdi)
974; SSE2-SSSE3-NEXT:    retq
975;
976; SSE41-LABEL: trunc_ssat_v2i64_v2i16_store:
977; SSE41:       # %bb.0:
978; SSE41-NEXT:    movdqa %xmm0, %xmm1
979; SSE41-NEXT:    movapd {{.*#+}} xmm2 = [32767,32767]
980; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648]
981; SSE41-NEXT:    pxor %xmm3, %xmm0
982; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm4 = [2147516415,2147516415]
983; SSE41-NEXT:    movdqa %xmm0, %xmm5
984; SSE41-NEXT:    pcmpeqd %xmm4, %xmm5
985; SSE41-NEXT:    pcmpgtd %xmm0, %xmm4
986; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2]
987; SSE41-NEXT:    pand %xmm5, %xmm0
988; SSE41-NEXT:    por %xmm4, %xmm0
989; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
990; SSE41-NEXT:    movapd {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
991; SSE41-NEXT:    pxor %xmm2, %xmm3
992; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [18446744071562035200,18446744071562035200]
993; SSE41-NEXT:    movdqa %xmm3, %xmm4
994; SSE41-NEXT:    pcmpeqd %xmm0, %xmm4
995; SSE41-NEXT:    pcmpgtd %xmm0, %xmm3
996; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
997; SSE41-NEXT:    pand %xmm4, %xmm0
998; SSE41-NEXT:    por %xmm3, %xmm0
999; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
1000; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
1001; SSE41-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
1002; SSE41-NEXT:    movd %xmm0, (%rdi)
1003; SSE41-NEXT:    retq
1004;
1005; AVX1-LABEL: trunc_ssat_v2i64_v2i16_store:
1006; AVX1:       # %bb.0:
1007; AVX1-NEXT:    vpmovsxwq {{.*#+}} xmm1 = [32767,32767]
1008; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
1009; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1010; AVX1-NEXT:    vpmovsxwq {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
1011; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
1012; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1013; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
1014; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
1015; AVX1-NEXT:    vmovd %xmm0, (%rdi)
1016; AVX1-NEXT:    retq
1017;
1018; AVX2-SLOW-LABEL: trunc_ssat_v2i64_v2i16_store:
1019; AVX2-SLOW:       # %bb.0:
1020; AVX2-SLOW-NEXT:    vpmovsxwq {{.*#+}} xmm1 = [32767,32767]
1021; AVX2-SLOW-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
1022; AVX2-SLOW-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1023; AVX2-SLOW-NEXT:    vpmovsxwq {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
1024; AVX2-SLOW-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
1025; AVX2-SLOW-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1026; AVX2-SLOW-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
1027; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
1028; AVX2-SLOW-NEXT:    vmovd %xmm0, (%rdi)
1029; AVX2-SLOW-NEXT:    retq
1030;
1031; AVX2-FAST-LABEL: trunc_ssat_v2i64_v2i16_store:
1032; AVX2-FAST:       # %bb.0:
1033; AVX2-FAST-NEXT:    vpmovsxwq {{.*#+}} xmm1 = [32767,32767]
1034; AVX2-FAST-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
1035; AVX2-FAST-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1036; AVX2-FAST-NEXT:    vpmovsxwq {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
1037; AVX2-FAST-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
1038; AVX2-FAST-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1039; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,u,u,u,u,u,u,u,u,u,u,u,u]
1040; AVX2-FAST-NEXT:    vmovd %xmm0, (%rdi)
1041; AVX2-FAST-NEXT:    retq
1042;
1043; AVX512F-LABEL: trunc_ssat_v2i64_v2i16_store:
1044; AVX512F:       # %bb.0:
1045; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1046; AVX512F-NEXT:    vpmovsqw %zmm0, %xmm0
1047; AVX512F-NEXT:    vmovd %xmm0, (%rdi)
1048; AVX512F-NEXT:    vzeroupper
1049; AVX512F-NEXT:    retq
1050;
1051; AVX512VL-LABEL: trunc_ssat_v2i64_v2i16_store:
1052; AVX512VL:       # %bb.0:
1053; AVX512VL-NEXT:    vpmovsqw %xmm0, (%rdi)
1054; AVX512VL-NEXT:    retq
1055;
1056; AVX512BW-LABEL: trunc_ssat_v2i64_v2i16_store:
1057; AVX512BW:       # %bb.0:
1058; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1059; AVX512BW-NEXT:    vpmovsqw %zmm0, %xmm0
1060; AVX512BW-NEXT:    vmovd %xmm0, (%rdi)
1061; AVX512BW-NEXT:    vzeroupper
1062; AVX512BW-NEXT:    retq
1063;
1064; AVX512BWVL-LABEL: trunc_ssat_v2i64_v2i16_store:
1065; AVX512BWVL:       # %bb.0:
1066; AVX512BWVL-NEXT:    vpmovsqw %xmm0, (%rdi)
1067; AVX512BWVL-NEXT:    retq
1068;
1069; SKX-LABEL: trunc_ssat_v2i64_v2i16_store:
1070; SKX:       # %bb.0:
1071; SKX-NEXT:    vpmovsqw %xmm0, (%rdi)
1072; SKX-NEXT:    retq
1073  %1 = icmp slt <2 x i64> %a0, <i64 32767, i64 32767>
1074  %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 32767, i64 32767>
1075  %3 = icmp sgt <2 x i64> %2, <i64 -32768, i64 -32768>
1076  %4 = select <2 x i1> %3, <2 x i64> %2, <2 x i64> <i64 -32768, i64 -32768>
1077  %5 = trunc <2 x i64> %4 to <2 x i16>
1078  store <2 x i16> %5, ptr%p1
1079  ret void
1080}
1081
1082define <4 x i16> @trunc_ssat_v4i64_v4i16(<4 x i64> %a0) {
1083; SSE2-SSSE3-LABEL: trunc_ssat_v4i64_v4i16:
1084; SSE2-SSSE3:       # %bb.0:
1085; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [32767,32767]
1086; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
1087; SSE2-SSSE3-NEXT:    movdqa %xmm0, %xmm4
1088; SSE2-SSSE3-NEXT:    pxor %xmm2, %xmm4
1089; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3]
1090; SSE2-SSSE3-NEXT:    pxor %xmm6, %xmm6
1091; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm5
1092; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm7 = [2147516415,2147516415]
1093; SSE2-SSSE3-NEXT:    movdqa %xmm7, %xmm8
1094; SSE2-SSSE3-NEXT:    pcmpgtd %xmm4, %xmm8
1095; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm8[0,0,2,2]
1096; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm4
1097; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm8[1,1,3,3]
1098; SSE2-SSSE3-NEXT:    por %xmm4, %xmm5
1099; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm0
1100; SSE2-SSSE3-NEXT:    pandn %xmm3, %xmm5
1101; SSE2-SSSE3-NEXT:    por %xmm5, %xmm0
1102; SSE2-SSSE3-NEXT:    movdqa %xmm1, %xmm4
1103; SSE2-SSSE3-NEXT:    pxor %xmm2, %xmm4
1104; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3]
1105; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm5
1106; SSE2-SSSE3-NEXT:    pcmpgtd %xmm4, %xmm7
1107; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
1108; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm4
1109; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
1110; SSE2-SSSE3-NEXT:    por %xmm4, %xmm5
1111; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm1
1112; SSE2-SSSE3-NEXT:    pandn %xmm3, %xmm5
1113; SSE2-SSSE3-NEXT:    por %xmm1, %xmm5
1114; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
1115; SSE2-SSSE3-NEXT:    movdqa %xmm5, %xmm3
1116; SSE2-SSSE3-NEXT:    pxor %xmm2, %xmm3
1117; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
1118; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm6
1119; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm4
1120; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm7 = [18446744071562035200,18446744071562035200]
1121; SSE2-SSSE3-NEXT:    pcmpgtd %xmm7, %xmm3
1122; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm8 = xmm3[0,0,2,2]
1123; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm8
1124; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1125; SSE2-SSSE3-NEXT:    por %xmm8, %xmm3
1126; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm5
1127; SSE2-SSSE3-NEXT:    pandn %xmm1, %xmm3
1128; SSE2-SSSE3-NEXT:    por %xmm5, %xmm3
1129; SSE2-SSSE3-NEXT:    pxor %xmm0, %xmm2
1130; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
1131; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm4
1132; SSE2-SSSE3-NEXT:    pcmpgtd %xmm7, %xmm2
1133; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm2[0,0,2,2]
1134; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm5
1135; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1136; SSE2-SSSE3-NEXT:    por %xmm5, %xmm2
1137; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm0
1138; SSE2-SSSE3-NEXT:    pandn %xmm1, %xmm2
1139; SSE2-SSSE3-NEXT:    por %xmm2, %xmm0
1140; SSE2-SSSE3-NEXT:    packssdw %xmm3, %xmm0
1141; SSE2-SSSE3-NEXT:    packssdw %xmm0, %xmm0
1142; SSE2-SSSE3-NEXT:    retq
1143;
1144; SSE41-LABEL: trunc_ssat_v4i64_v4i16:
1145; SSE41:       # %bb.0:
1146; SSE41-NEXT:    movdqa %xmm0, %xmm2
1147; SSE41-NEXT:    movapd {{.*#+}} xmm4 = [32767,32767]
1148; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648]
1149; SSE41-NEXT:    movdqa %xmm0, %xmm5
1150; SSE41-NEXT:    pxor %xmm3, %xmm5
1151; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm6 = [2147516415,2147516415]
1152; SSE41-NEXT:    movdqa %xmm6, %xmm7
1153; SSE41-NEXT:    pcmpgtd %xmm5, %xmm7
1154; SSE41-NEXT:    pcmpeqd %xmm6, %xmm5
1155; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2]
1156; SSE41-NEXT:    pand %xmm5, %xmm0
1157; SSE41-NEXT:    por %xmm7, %xmm0
1158; SSE41-NEXT:    movapd %xmm4, %xmm5
1159; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm5
1160; SSE41-NEXT:    movdqa %xmm1, %xmm0
1161; SSE41-NEXT:    pxor %xmm3, %xmm0
1162; SSE41-NEXT:    movdqa %xmm0, %xmm2
1163; SSE41-NEXT:    pcmpeqd %xmm6, %xmm2
1164; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
1165; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
1166; SSE41-NEXT:    pand %xmm2, %xmm0
1167; SSE41-NEXT:    por %xmm6, %xmm0
1168; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm4
1169; SSE41-NEXT:    movapd {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
1170; SSE41-NEXT:    movapd %xmm4, %xmm2
1171; SSE41-NEXT:    xorpd %xmm3, %xmm2
1172; SSE41-NEXT:    movdqa {{.*#+}} xmm6 = [18446744071562035200,18446744071562035200]
1173; SSE41-NEXT:    movapd %xmm2, %xmm7
1174; SSE41-NEXT:    pcmpeqd %xmm6, %xmm7
1175; SSE41-NEXT:    pcmpgtd %xmm6, %xmm2
1176; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
1177; SSE41-NEXT:    pand %xmm7, %xmm0
1178; SSE41-NEXT:    por %xmm2, %xmm0
1179; SSE41-NEXT:    movapd %xmm1, %xmm2
1180; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
1181; SSE41-NEXT:    xorpd %xmm5, %xmm3
1182; SSE41-NEXT:    movapd %xmm3, %xmm4
1183; SSE41-NEXT:    pcmpeqd %xmm6, %xmm4
1184; SSE41-NEXT:    pcmpgtd %xmm6, %xmm3
1185; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
1186; SSE41-NEXT:    pand %xmm4, %xmm0
1187; SSE41-NEXT:    por %xmm3, %xmm0
1188; SSE41-NEXT:    blendvpd %xmm0, %xmm5, %xmm1
1189; SSE41-NEXT:    packssdw %xmm2, %xmm1
1190; SSE41-NEXT:    packssdw %xmm1, %xmm1
1191; SSE41-NEXT:    movdqa %xmm1, %xmm0
1192; SSE41-NEXT:    retq
1193;
1194; AVX1-LABEL: trunc_ssat_v4i64_v4i16:
1195; AVX1:       # %bb.0:
1196; AVX1-NEXT:    vpmovsxwq {{.*#+}} xmm1 = [32767,32767]
1197; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
1198; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm2
1199; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1200; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm3
1201; AVX1-NEXT:    vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
1202; AVX1-NEXT:    vpmovsxwq {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
1203; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm3
1204; AVX1-NEXT:    vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
1205; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm2, %xmm3
1206; AVX1-NEXT:    vblendvpd %xmm3, %xmm2, %xmm1, %xmm1
1207; AVX1-NEXT:    vpackssdw %xmm0, %xmm1, %xmm0
1208; AVX1-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
1209; AVX1-NEXT:    vzeroupper
1210; AVX1-NEXT:    retq
1211;
1212; AVX2-LABEL: trunc_ssat_v4i64_v4i16:
1213; AVX2:       # %bb.0:
1214; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [32767,32767,32767,32767]
1215; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
1216; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1217; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [18446744073709518848,18446744073709518848,18446744073709518848,18446744073709518848]
1218; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
1219; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1220; AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm1
1221; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
1222; AVX2-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
1223; AVX2-NEXT:    vzeroupper
1224; AVX2-NEXT:    retq
1225;
1226; AVX512F-LABEL: trunc_ssat_v4i64_v4i16:
1227; AVX512F:       # %bb.0:
1228; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1229; AVX512F-NEXT:    vpmovsqw %zmm0, %xmm0
1230; AVX512F-NEXT:    vzeroupper
1231; AVX512F-NEXT:    retq
1232;
1233; AVX512VL-LABEL: trunc_ssat_v4i64_v4i16:
1234; AVX512VL:       # %bb.0:
1235; AVX512VL-NEXT:    vpmovsqw %ymm0, %xmm0
1236; AVX512VL-NEXT:    vzeroupper
1237; AVX512VL-NEXT:    retq
1238;
1239; AVX512BW-LABEL: trunc_ssat_v4i64_v4i16:
1240; AVX512BW:       # %bb.0:
1241; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1242; AVX512BW-NEXT:    vpmovsqw %zmm0, %xmm0
1243; AVX512BW-NEXT:    vzeroupper
1244; AVX512BW-NEXT:    retq
1245;
1246; AVX512BWVL-LABEL: trunc_ssat_v4i64_v4i16:
1247; AVX512BWVL:       # %bb.0:
1248; AVX512BWVL-NEXT:    vpmovsqw %ymm0, %xmm0
1249; AVX512BWVL-NEXT:    vzeroupper
1250; AVX512BWVL-NEXT:    retq
1251;
1252; SKX-LABEL: trunc_ssat_v4i64_v4i16:
1253; SKX:       # %bb.0:
1254; SKX-NEXT:    vpmovsqw %ymm0, %xmm0
1255; SKX-NEXT:    vzeroupper
1256; SKX-NEXT:    retq
1257  %1 = icmp slt <4 x i64> %a0, <i64 32767, i64 32767, i64 32767, i64 32767>
1258  %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> <i64 32767, i64 32767, i64 32767, i64 32767>
1259  %3 = icmp sgt <4 x i64> %2, <i64 -32768, i64 -32768, i64 -32768, i64 -32768>
1260  %4 = select <4 x i1> %3, <4 x i64> %2, <4 x i64> <i64 -32768, i64 -32768, i64 -32768, i64 -32768>
1261  %5 = trunc <4 x i64> %4 to <4 x i16>
1262  ret <4 x i16> %5
1263}
1264
1265define void @trunc_ssat_v4i64_v4i16_store(<4 x i64> %a0, ptr%p1) {
1266; SSE2-SSSE3-LABEL: trunc_ssat_v4i64_v4i16_store:
1267; SSE2-SSSE3:       # %bb.0:
1268; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [32767,32767]
1269; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
1270; SSE2-SSSE3-NEXT:    movdqa %xmm0, %xmm3
1271; SSE2-SSSE3-NEXT:    pxor %xmm2, %xmm3
1272; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm3[1,1,3,3]
1273; SSE2-SSSE3-NEXT:    pxor %xmm6, %xmm6
1274; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm5
1275; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm7 = [2147516415,2147516415]
1276; SSE2-SSSE3-NEXT:    movdqa %xmm7, %xmm8
1277; SSE2-SSSE3-NEXT:    pcmpgtd %xmm3, %xmm8
1278; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm9 = xmm8[0,0,2,2]
1279; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm9
1280; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm8[1,1,3,3]
1281; SSE2-SSSE3-NEXT:    por %xmm9, %xmm3
1282; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm0
1283; SSE2-SSSE3-NEXT:    pandn %xmm4, %xmm3
1284; SSE2-SSSE3-NEXT:    por %xmm0, %xmm3
1285; SSE2-SSSE3-NEXT:    movdqa %xmm1, %xmm0
1286; SSE2-SSSE3-NEXT:    pxor %xmm2, %xmm0
1287; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
1288; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm5
1289; SSE2-SSSE3-NEXT:    pcmpgtd %xmm0, %xmm7
1290; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2]
1291; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm0
1292; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
1293; SSE2-SSSE3-NEXT:    por %xmm0, %xmm5
1294; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm1
1295; SSE2-SSSE3-NEXT:    pandn %xmm4, %xmm5
1296; SSE2-SSSE3-NEXT:    por %xmm1, %xmm5
1297; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [18446744073709518848,18446744073709518848]
1298; SSE2-SSSE3-NEXT:    movdqa %xmm5, %xmm1
1299; SSE2-SSSE3-NEXT:    pxor %xmm2, %xmm1
1300; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3]
1301; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm6
1302; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm4
1303; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm7 = [18446744071562035200,18446744071562035200]
1304; SSE2-SSSE3-NEXT:    pcmpgtd %xmm7, %xmm1
1305; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm8 = xmm1[0,0,2,2]
1306; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm8
1307; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1308; SSE2-SSSE3-NEXT:    por %xmm8, %xmm1
1309; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm5
1310; SSE2-SSSE3-NEXT:    pandn %xmm0, %xmm1
1311; SSE2-SSSE3-NEXT:    por %xmm5, %xmm1
1312; SSE2-SSSE3-NEXT:    pxor %xmm3, %xmm2
1313; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
1314; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm4
1315; SSE2-SSSE3-NEXT:    pcmpgtd %xmm7, %xmm2
1316; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm2[0,0,2,2]
1317; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm5
1318; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1319; SSE2-SSSE3-NEXT:    por %xmm5, %xmm2
1320; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm3
1321; SSE2-SSSE3-NEXT:    pandn %xmm0, %xmm2
1322; SSE2-SSSE3-NEXT:    por %xmm3, %xmm2
1323; SSE2-SSSE3-NEXT:    packssdw %xmm1, %xmm2
1324; SSE2-SSSE3-NEXT:    packssdw %xmm2, %xmm2
1325; SSE2-SSSE3-NEXT:    movq %xmm2, (%rdi)
1326; SSE2-SSSE3-NEXT:    retq
1327;
1328; SSE41-LABEL: trunc_ssat_v4i64_v4i16_store:
1329; SSE41:       # %bb.0:
1330; SSE41-NEXT:    movdqa %xmm0, %xmm2
1331; SSE41-NEXT:    movapd {{.*#+}} xmm4 = [32767,32767]
1332; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648]
1333; SSE41-NEXT:    movdqa %xmm0, %xmm5
1334; SSE41-NEXT:    pxor %xmm3, %xmm5
1335; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm6 = [2147516415,2147516415]
1336; SSE41-NEXT:    movdqa %xmm6, %xmm7
1337; SSE41-NEXT:    pcmpgtd %xmm5, %xmm7
1338; SSE41-NEXT:    pcmpeqd %xmm6, %xmm5
1339; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2]
1340; SSE41-NEXT:    pand %xmm5, %xmm0
1341; SSE41-NEXT:    por %xmm7, %xmm0
1342; SSE41-NEXT:    movapd %xmm4, %xmm5
1343; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm5
1344; SSE41-NEXT:    movdqa %xmm1, %xmm0
1345; SSE41-NEXT:    pxor %xmm3, %xmm0
1346; SSE41-NEXT:    movdqa %xmm0, %xmm2
1347; SSE41-NEXT:    pcmpeqd %xmm6, %xmm2
1348; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
1349; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
1350; SSE41-NEXT:    pand %xmm2, %xmm0
1351; SSE41-NEXT:    por %xmm6, %xmm0
1352; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm4
1353; SSE41-NEXT:    movapd {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
1354; SSE41-NEXT:    movapd %xmm4, %xmm2
1355; SSE41-NEXT:    xorpd %xmm3, %xmm2
1356; SSE41-NEXT:    movdqa {{.*#+}} xmm6 = [18446744071562035200,18446744071562035200]
1357; SSE41-NEXT:    movapd %xmm2, %xmm7
1358; SSE41-NEXT:    pcmpeqd %xmm6, %xmm7
1359; SSE41-NEXT:    pcmpgtd %xmm6, %xmm2
1360; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
1361; SSE41-NEXT:    pand %xmm7, %xmm0
1362; SSE41-NEXT:    por %xmm2, %xmm0
1363; SSE41-NEXT:    movapd %xmm1, %xmm2
1364; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
1365; SSE41-NEXT:    xorpd %xmm5, %xmm3
1366; SSE41-NEXT:    movapd %xmm3, %xmm4
1367; SSE41-NEXT:    pcmpeqd %xmm6, %xmm4
1368; SSE41-NEXT:    pcmpgtd %xmm6, %xmm3
1369; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
1370; SSE41-NEXT:    pand %xmm4, %xmm0
1371; SSE41-NEXT:    por %xmm3, %xmm0
1372; SSE41-NEXT:    blendvpd %xmm0, %xmm5, %xmm1
1373; SSE41-NEXT:    packssdw %xmm2, %xmm1
1374; SSE41-NEXT:    packssdw %xmm1, %xmm1
1375; SSE41-NEXT:    movq %xmm1, (%rdi)
1376; SSE41-NEXT:    retq
1377;
1378; AVX1-LABEL: trunc_ssat_v4i64_v4i16_store:
1379; AVX1:       # %bb.0:
1380; AVX1-NEXT:    vpmovsxwq {{.*#+}} xmm1 = [32767,32767]
1381; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
1382; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm2
1383; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1384; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm3
1385; AVX1-NEXT:    vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
1386; AVX1-NEXT:    vpmovsxwq {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
1387; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm3
1388; AVX1-NEXT:    vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
1389; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm2, %xmm3
1390; AVX1-NEXT:    vblendvpd %xmm3, %xmm2, %xmm1, %xmm1
1391; AVX1-NEXT:    vpackssdw %xmm0, %xmm1, %xmm0
1392; AVX1-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
1393; AVX1-NEXT:    vmovq %xmm0, (%rdi)
1394; AVX1-NEXT:    vzeroupper
1395; AVX1-NEXT:    retq
1396;
1397; AVX2-LABEL: trunc_ssat_v4i64_v4i16_store:
1398; AVX2:       # %bb.0:
1399; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [32767,32767,32767,32767]
1400; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
1401; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1402; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [18446744073709518848,18446744073709518848,18446744073709518848,18446744073709518848]
1403; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
1404; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1405; AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm1
1406; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
1407; AVX2-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
1408; AVX2-NEXT:    vmovq %xmm0, (%rdi)
1409; AVX2-NEXT:    vzeroupper
1410; AVX2-NEXT:    retq
1411;
1412; AVX512F-LABEL: trunc_ssat_v4i64_v4i16_store:
1413; AVX512F:       # %bb.0:
1414; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1415; AVX512F-NEXT:    vpmovsqw %zmm0, %xmm0
1416; AVX512F-NEXT:    vmovq %xmm0, (%rdi)
1417; AVX512F-NEXT:    vzeroupper
1418; AVX512F-NEXT:    retq
1419;
1420; AVX512VL-LABEL: trunc_ssat_v4i64_v4i16_store:
1421; AVX512VL:       # %bb.0:
1422; AVX512VL-NEXT:    vpmovsqw %ymm0, (%rdi)
1423; AVX512VL-NEXT:    vzeroupper
1424; AVX512VL-NEXT:    retq
1425;
1426; AVX512BW-LABEL: trunc_ssat_v4i64_v4i16_store:
1427; AVX512BW:       # %bb.0:
1428; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1429; AVX512BW-NEXT:    vpmovsqw %zmm0, %xmm0
1430; AVX512BW-NEXT:    vmovq %xmm0, (%rdi)
1431; AVX512BW-NEXT:    vzeroupper
1432; AVX512BW-NEXT:    retq
1433;
1434; AVX512BWVL-LABEL: trunc_ssat_v4i64_v4i16_store:
1435; AVX512BWVL:       # %bb.0:
1436; AVX512BWVL-NEXT:    vpmovsqw %ymm0, (%rdi)
1437; AVX512BWVL-NEXT:    vzeroupper
1438; AVX512BWVL-NEXT:    retq
1439;
1440; SKX-LABEL: trunc_ssat_v4i64_v4i16_store:
1441; SKX:       # %bb.0:
1442; SKX-NEXT:    vpmovsqw %ymm0, (%rdi)
1443; SKX-NEXT:    vzeroupper
1444; SKX-NEXT:    retq
1445  %1 = icmp slt <4 x i64> %a0, <i64 32767, i64 32767, i64 32767, i64 32767>
1446  %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> <i64 32767, i64 32767, i64 32767, i64 32767>
1447  %3 = icmp sgt <4 x i64> %2, <i64 -32768, i64 -32768, i64 -32768, i64 -32768>
1448  %4 = select <4 x i1> %3, <4 x i64> %2, <4 x i64> <i64 -32768, i64 -32768, i64 -32768, i64 -32768>
1449  %5 = trunc <4 x i64> %4 to <4 x i16>
1450  store <4 x i16> %5, ptr%p1
1451  ret void
1452}
1453
1454define <8 x i16> @trunc_ssat_v8i64_v8i16(ptr %p0) "min-legal-vector-width"="256" {
1455; SSE2-SSSE3-LABEL: trunc_ssat_v8i64_v8i16:
1456; SSE2-SSSE3:       # %bb.0:
1457; SSE2-SSSE3-NEXT:    movdqa (%rdi), %xmm6
1458; SSE2-SSSE3-NEXT:    movdqa 16(%rdi), %xmm0
1459; SSE2-SSSE3-NEXT:    movdqa 32(%rdi), %xmm3
1460; SSE2-SSSE3-NEXT:    movdqa 48(%rdi), %xmm5
1461; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [32767,32767]
1462; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
1463; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm2
1464; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm2
1465; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm9 = xmm2[1,1,3,3]
1466; SSE2-SSSE3-NEXT:    pxor %xmm8, %xmm8
1467; SSE2-SSSE3-NEXT:    pcmpeqd %xmm8, %xmm9
1468; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm7 = [2147516415,2147516415]
1469; SSE2-SSSE3-NEXT:    movdqa %xmm7, %xmm10
1470; SSE2-SSSE3-NEXT:    pcmpgtd %xmm2, %xmm10
1471; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2]
1472; SSE2-SSSE3-NEXT:    pand %xmm9, %xmm11
1473; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm10[1,1,3,3]
1474; SSE2-SSSE3-NEXT:    por %xmm11, %xmm2
1475; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm3
1476; SSE2-SSSE3-NEXT:    pandn %xmm4, %xmm2
1477; SSE2-SSSE3-NEXT:    por %xmm3, %xmm2
1478; SSE2-SSSE3-NEXT:    movdqa %xmm5, %xmm3
1479; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm3
1480; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm9 = xmm3[1,1,3,3]
1481; SSE2-SSSE3-NEXT:    pcmpeqd %xmm8, %xmm9
1482; SSE2-SSSE3-NEXT:    movdqa %xmm7, %xmm10
1483; SSE2-SSSE3-NEXT:    pcmpgtd %xmm3, %xmm10
1484; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2]
1485; SSE2-SSSE3-NEXT:    pand %xmm9, %xmm11
1486; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm10[1,1,3,3]
1487; SSE2-SSSE3-NEXT:    por %xmm11, %xmm3
1488; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm5
1489; SSE2-SSSE3-NEXT:    pandn %xmm4, %xmm3
1490; SSE2-SSSE3-NEXT:    por %xmm5, %xmm3
1491; SSE2-SSSE3-NEXT:    movdqa %xmm6, %xmm5
1492; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm5
1493; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm9 = xmm5[1,1,3,3]
1494; SSE2-SSSE3-NEXT:    pcmpeqd %xmm8, %xmm9
1495; SSE2-SSSE3-NEXT:    movdqa %xmm7, %xmm10
1496; SSE2-SSSE3-NEXT:    pcmpgtd %xmm5, %xmm10
1497; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2]
1498; SSE2-SSSE3-NEXT:    pand %xmm9, %xmm11
1499; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm10[1,1,3,3]
1500; SSE2-SSSE3-NEXT:    por %xmm11, %xmm5
1501; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm6
1502; SSE2-SSSE3-NEXT:    pandn %xmm4, %xmm5
1503; SSE2-SSSE3-NEXT:    por %xmm6, %xmm5
1504; SSE2-SSSE3-NEXT:    movdqa %xmm0, %xmm6
1505; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm6
1506; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm9 = xmm6[1,1,3,3]
1507; SSE2-SSSE3-NEXT:    pcmpeqd %xmm8, %xmm9
1508; SSE2-SSSE3-NEXT:    pcmpgtd %xmm6, %xmm7
1509; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[0,0,2,2]
1510; SSE2-SSSE3-NEXT:    pand %xmm9, %xmm6
1511; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[1,1,3,3]
1512; SSE2-SSSE3-NEXT:    por %xmm6, %xmm8
1513; SSE2-SSSE3-NEXT:    pand %xmm8, %xmm0
1514; SSE2-SSSE3-NEXT:    pandn %xmm4, %xmm8
1515; SSE2-SSSE3-NEXT:    por %xmm0, %xmm8
1516; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [18446744073709518848,18446744073709518848]
1517; SSE2-SSSE3-NEXT:    movdqa %xmm8, %xmm0
1518; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm0
1519; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm9 = xmm0[1,1,3,3]
1520; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm6
1521; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm9
1522; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm7 = [18446744071562035200,18446744071562035200]
1523; SSE2-SSSE3-NEXT:    pcmpgtd %xmm7, %xmm0
1524; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm0[0,0,2,2]
1525; SSE2-SSSE3-NEXT:    pand %xmm9, %xmm10
1526; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm9 = xmm0[1,1,3,3]
1527; SSE2-SSSE3-NEXT:    por %xmm10, %xmm9
1528; SSE2-SSSE3-NEXT:    pand %xmm9, %xmm8
1529; SSE2-SSSE3-NEXT:    pandn %xmm4, %xmm9
1530; SSE2-SSSE3-NEXT:    por %xmm8, %xmm9
1531; SSE2-SSSE3-NEXT:    movdqa %xmm5, %xmm0
1532; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm0
1533; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm8 = xmm0[1,1,3,3]
1534; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm8
1535; SSE2-SSSE3-NEXT:    pcmpgtd %xmm7, %xmm0
1536; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm0[0,0,2,2]
1537; SSE2-SSSE3-NEXT:    pand %xmm8, %xmm10
1538; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1539; SSE2-SSSE3-NEXT:    por %xmm10, %xmm0
1540; SSE2-SSSE3-NEXT:    pand %xmm0, %xmm5
1541; SSE2-SSSE3-NEXT:    pandn %xmm4, %xmm0
1542; SSE2-SSSE3-NEXT:    por %xmm5, %xmm0
1543; SSE2-SSSE3-NEXT:    packssdw %xmm9, %xmm0
1544; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm5
1545; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm5
1546; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm8 = xmm5[1,1,3,3]
1547; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm8
1548; SSE2-SSSE3-NEXT:    pcmpgtd %xmm7, %xmm5
1549; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm9 = xmm5[0,0,2,2]
1550; SSE2-SSSE3-NEXT:    pand %xmm8, %xmm9
1551; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
1552; SSE2-SSSE3-NEXT:    por %xmm9, %xmm5
1553; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm3
1554; SSE2-SSSE3-NEXT:    pandn %xmm4, %xmm5
1555; SSE2-SSSE3-NEXT:    por %xmm3, %xmm5
1556; SSE2-SSSE3-NEXT:    pxor %xmm2, %xmm1
1557; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
1558; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm3
1559; SSE2-SSSE3-NEXT:    pcmpgtd %xmm7, %xmm1
1560; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm1[0,0,2,2]
1561; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm6
1562; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1563; SSE2-SSSE3-NEXT:    por %xmm6, %xmm1
1564; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm2
1565; SSE2-SSSE3-NEXT:    pandn %xmm4, %xmm1
1566; SSE2-SSSE3-NEXT:    por %xmm2, %xmm1
1567; SSE2-SSSE3-NEXT:    packssdw %xmm5, %xmm1
1568; SSE2-SSSE3-NEXT:    packssdw %xmm1, %xmm0
1569; SSE2-SSSE3-NEXT:    retq
1570;
1571; SSE41-LABEL: trunc_ssat_v8i64_v8i16:
1572; SSE41:       # %bb.0:
1573; SSE41-NEXT:    movdqa (%rdi), %xmm7
1574; SSE41-NEXT:    movdqa 16(%rdi), %xmm5
1575; SSE41-NEXT:    movdqa 32(%rdi), %xmm4
1576; SSE41-NEXT:    movdqa 48(%rdi), %xmm8
1577; SSE41-NEXT:    movapd {{.*#+}} xmm1 = [32767,32767]
1578; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm2 = [2147483648,2147483648]
1579; SSE41-NEXT:    movdqa %xmm4, %xmm3
1580; SSE41-NEXT:    pxor %xmm2, %xmm3
1581; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm6 = [2147516415,2147516415]
1582; SSE41-NEXT:    movdqa %xmm6, %xmm9
1583; SSE41-NEXT:    pcmpgtd %xmm3, %xmm9
1584; SSE41-NEXT:    pcmpeqd %xmm6, %xmm3
1585; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2]
1586; SSE41-NEXT:    pand %xmm3, %xmm0
1587; SSE41-NEXT:    por %xmm9, %xmm0
1588; SSE41-NEXT:    movapd %xmm1, %xmm3
1589; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm3
1590; SSE41-NEXT:    movdqa %xmm8, %xmm4
1591; SSE41-NEXT:    pxor %xmm2, %xmm4
1592; SSE41-NEXT:    movdqa %xmm6, %xmm9
1593; SSE41-NEXT:    pcmpgtd %xmm4, %xmm9
1594; SSE41-NEXT:    pcmpeqd %xmm6, %xmm4
1595; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2]
1596; SSE41-NEXT:    pand %xmm4, %xmm0
1597; SSE41-NEXT:    por %xmm9, %xmm0
1598; SSE41-NEXT:    movapd %xmm1, %xmm4
1599; SSE41-NEXT:    blendvpd %xmm0, %xmm8, %xmm4
1600; SSE41-NEXT:    movdqa %xmm7, %xmm8
1601; SSE41-NEXT:    pxor %xmm2, %xmm8
1602; SSE41-NEXT:    movdqa %xmm6, %xmm9
1603; SSE41-NEXT:    pcmpgtd %xmm8, %xmm9
1604; SSE41-NEXT:    pcmpeqd %xmm6, %xmm8
1605; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2]
1606; SSE41-NEXT:    pand %xmm8, %xmm0
1607; SSE41-NEXT:    por %xmm9, %xmm0
1608; SSE41-NEXT:    movapd %xmm1, %xmm8
1609; SSE41-NEXT:    blendvpd %xmm0, %xmm7, %xmm8
1610; SSE41-NEXT:    movdqa %xmm5, %xmm0
1611; SSE41-NEXT:    pxor %xmm2, %xmm0
1612; SSE41-NEXT:    movdqa %xmm0, %xmm7
1613; SSE41-NEXT:    pcmpeqd %xmm6, %xmm7
1614; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
1615; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
1616; SSE41-NEXT:    pand %xmm7, %xmm0
1617; SSE41-NEXT:    por %xmm6, %xmm0
1618; SSE41-NEXT:    blendvpd %xmm0, %xmm5, %xmm1
1619; SSE41-NEXT:    movapd {{.*#+}} xmm5 = [18446744073709518848,18446744073709518848]
1620; SSE41-NEXT:    movapd %xmm1, %xmm7
1621; SSE41-NEXT:    xorpd %xmm2, %xmm7
1622; SSE41-NEXT:    movdqa {{.*#+}} xmm6 = [18446744071562035200,18446744071562035200]
1623; SSE41-NEXT:    movapd %xmm7, %xmm9
1624; SSE41-NEXT:    pcmpeqd %xmm6, %xmm9
1625; SSE41-NEXT:    pcmpgtd %xmm6, %xmm7
1626; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2]
1627; SSE41-NEXT:    pand %xmm9, %xmm0
1628; SSE41-NEXT:    por %xmm7, %xmm0
1629; SSE41-NEXT:    movapd %xmm5, %xmm7
1630; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm7
1631; SSE41-NEXT:    movapd %xmm8, %xmm1
1632; SSE41-NEXT:    xorpd %xmm2, %xmm1
1633; SSE41-NEXT:    movapd %xmm1, %xmm9
1634; SSE41-NEXT:    pcmpeqd %xmm6, %xmm9
1635; SSE41-NEXT:    pcmpgtd %xmm6, %xmm1
1636; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2]
1637; SSE41-NEXT:    pand %xmm9, %xmm0
1638; SSE41-NEXT:    por %xmm1, %xmm0
1639; SSE41-NEXT:    movapd %xmm5, %xmm1
1640; SSE41-NEXT:    blendvpd %xmm0, %xmm8, %xmm1
1641; SSE41-NEXT:    packssdw %xmm7, %xmm1
1642; SSE41-NEXT:    movapd %xmm4, %xmm7
1643; SSE41-NEXT:    xorpd %xmm2, %xmm7
1644; SSE41-NEXT:    movapd %xmm7, %xmm8
1645; SSE41-NEXT:    pcmpeqd %xmm6, %xmm8
1646; SSE41-NEXT:    pcmpgtd %xmm6, %xmm7
1647; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2]
1648; SSE41-NEXT:    pand %xmm8, %xmm0
1649; SSE41-NEXT:    por %xmm7, %xmm0
1650; SSE41-NEXT:    movapd %xmm5, %xmm7
1651; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm7
1652; SSE41-NEXT:    xorpd %xmm3, %xmm2
1653; SSE41-NEXT:    movapd %xmm2, %xmm4
1654; SSE41-NEXT:    pcmpeqd %xmm6, %xmm4
1655; SSE41-NEXT:    pcmpgtd %xmm6, %xmm2
1656; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
1657; SSE41-NEXT:    pand %xmm4, %xmm0
1658; SSE41-NEXT:    por %xmm2, %xmm0
1659; SSE41-NEXT:    blendvpd %xmm0, %xmm3, %xmm5
1660; SSE41-NEXT:    packssdw %xmm7, %xmm5
1661; SSE41-NEXT:    packssdw %xmm5, %xmm1
1662; SSE41-NEXT:    movdqa %xmm1, %xmm0
1663; SSE41-NEXT:    retq
1664;
1665; AVX1-LABEL: trunc_ssat_v8i64_v8i16:
1666; AVX1:       # %bb.0:
1667; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
1668; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm1
1669; AVX1-NEXT:    vmovdqa 32(%rdi), %xmm2
1670; AVX1-NEXT:    vmovdqa 48(%rdi), %xmm3
1671; AVX1-NEXT:    vpmovsxwq {{.*#+}} xmm4 = [32767,32767]
1672; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm5
1673; AVX1-NEXT:    vblendvpd %xmm5, %xmm2, %xmm4, %xmm2
1674; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm5
1675; AVX1-NEXT:    vblendvpd %xmm5, %xmm3, %xmm4, %xmm3
1676; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm4, %xmm5
1677; AVX1-NEXT:    vblendvpd %xmm5, %xmm0, %xmm4, %xmm0
1678; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm4, %xmm5
1679; AVX1-NEXT:    vblendvpd %xmm5, %xmm1, %xmm4, %xmm1
1680; AVX1-NEXT:    vpmovsxwq {{.*#+}} xmm4 = [18446744073709518848,18446744073709518848]
1681; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm1, %xmm5
1682; AVX1-NEXT:    vblendvpd %xmm5, %xmm1, %xmm4, %xmm1
1683; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm0, %xmm5
1684; AVX1-NEXT:    vblendvpd %xmm5, %xmm0, %xmm4, %xmm0
1685; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
1686; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm1
1687; AVX1-NEXT:    vblendvpd %xmm1, %xmm3, %xmm4, %xmm1
1688; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm2, %xmm3
1689; AVX1-NEXT:    vblendvpd %xmm3, %xmm2, %xmm4, %xmm2
1690; AVX1-NEXT:    vpackssdw %xmm1, %xmm2, %xmm1
1691; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
1692; AVX1-NEXT:    retq
1693;
1694; AVX2-LABEL: trunc_ssat_v8i64_v8i16:
1695; AVX2:       # %bb.0:
1696; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
1697; AVX2-NEXT:    vmovdqa 32(%rdi), %ymm1
1698; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [32767,32767,32767,32767]
1699; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm2, %ymm3
1700; AVX2-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
1701; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm2, %ymm3
1702; AVX2-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
1703; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [18446744073709518848,18446744073709518848,18446744073709518848,18446744073709518848]
1704; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm1, %ymm3
1705; AVX2-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
1706; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm3
1707; AVX2-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
1708; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
1709; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1710; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
1711; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
1712; AVX2-NEXT:    vzeroupper
1713; AVX2-NEXT:    retq
1714;
1715; AVX512-LABEL: trunc_ssat_v8i64_v8i16:
1716; AVX512:       # %bb.0:
1717; AVX512-NEXT:    vmovdqa64 (%rdi), %zmm0
1718; AVX512-NEXT:    vpmovsqw %zmm0, %xmm0
1719; AVX512-NEXT:    vzeroupper
1720; AVX512-NEXT:    retq
1721;
1722; SKX-LABEL: trunc_ssat_v8i64_v8i16:
1723; SKX:       # %bb.0:
1724; SKX-NEXT:    vmovdqa (%rdi), %ymm0
1725; SKX-NEXT:    vmovdqa 32(%rdi), %ymm1
1726; SKX-NEXT:    vpmovsqw %ymm1, %xmm1
1727; SKX-NEXT:    vpmovsqw %ymm0, %xmm0
1728; SKX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1729; SKX-NEXT:    vzeroupper
1730; SKX-NEXT:    retq
1731  %a0 = load <8 x i64>, ptr %p0
1732  %1 = icmp slt <8 x i64> %a0, <i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767>
1733  %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767>
1734  %3 = icmp sgt <8 x i64> %2, <i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768>
1735  %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> <i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768>
1736  %5 = trunc <8 x i64> %4 to <8 x i16>
1737  ret <8 x i16> %5
1738}
1739
1740define <4 x i16> @trunc_ssat_v4i32_v4i16(<4 x i32> %a0) {
1741; SSE-LABEL: trunc_ssat_v4i32_v4i16:
1742; SSE:       # %bb.0:
1743; SSE-NEXT:    packssdw %xmm0, %xmm0
1744; SSE-NEXT:    retq
1745;
1746; AVX-LABEL: trunc_ssat_v4i32_v4i16:
1747; AVX:       # %bb.0:
1748; AVX-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
1749; AVX-NEXT:    retq
1750;
1751; AVX512-LABEL: trunc_ssat_v4i32_v4i16:
1752; AVX512:       # %bb.0:
1753; AVX512-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
1754; AVX512-NEXT:    retq
1755;
1756; SKX-LABEL: trunc_ssat_v4i32_v4i16:
1757; SKX:       # %bb.0:
1758; SKX-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
1759; SKX-NEXT:    retq
1760  %1 = icmp slt <4 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767>
1761  %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>
1762  %3 = icmp sgt <4 x i32> %2, <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
1763  %4 = select <4 x i1> %3, <4 x i32> %2, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
1764  %5 = trunc <4 x i32> %4 to <4 x i16>
1765  ret <4 x i16> %5
1766}
1767
1768define void @trunc_ssat_v4i32_v4i16_store(<4 x i32> %a0, ptr%p1) {
1769; SSE-LABEL: trunc_ssat_v4i32_v4i16_store:
1770; SSE:       # %bb.0:
1771; SSE-NEXT:    packssdw %xmm0, %xmm0
1772; SSE-NEXT:    movq %xmm0, (%rdi)
1773; SSE-NEXT:    retq
1774;
1775; AVX-LABEL: trunc_ssat_v4i32_v4i16_store:
1776; AVX:       # %bb.0:
1777; AVX-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
1778; AVX-NEXT:    vmovq %xmm0, (%rdi)
1779; AVX-NEXT:    retq
1780;
1781; AVX512F-LABEL: trunc_ssat_v4i32_v4i16_store:
1782; AVX512F:       # %bb.0:
1783; AVX512F-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
1784; AVX512F-NEXT:    vmovq %xmm0, (%rdi)
1785; AVX512F-NEXT:    retq
1786;
1787; AVX512VL-LABEL: trunc_ssat_v4i32_v4i16_store:
1788; AVX512VL:       # %bb.0:
1789; AVX512VL-NEXT:    vpmovsdw %xmm0, (%rdi)
1790; AVX512VL-NEXT:    retq
1791;
1792; AVX512BW-LABEL: trunc_ssat_v4i32_v4i16_store:
1793; AVX512BW:       # %bb.0:
1794; AVX512BW-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
1795; AVX512BW-NEXT:    vmovq %xmm0, (%rdi)
1796; AVX512BW-NEXT:    retq
1797;
1798; AVX512BWVL-LABEL: trunc_ssat_v4i32_v4i16_store:
1799; AVX512BWVL:       # %bb.0:
1800; AVX512BWVL-NEXT:    vpmovsdw %xmm0, (%rdi)
1801; AVX512BWVL-NEXT:    retq
1802;
1803; SKX-LABEL: trunc_ssat_v4i32_v4i16_store:
1804; SKX:       # %bb.0:
1805; SKX-NEXT:    vpmovsdw %xmm0, (%rdi)
1806; SKX-NEXT:    retq
1807  %1 = icmp slt <4 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767>
1808  %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>
1809  %3 = icmp sgt <4 x i32> %2, <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
1810  %4 = select <4 x i1> %3, <4 x i32> %2, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
1811  %5 = trunc <4 x i32> %4 to <4 x i16>
1812  store <4 x i16> %5, ptr%p1
1813  ret void
1814}
1815
1816define <8 x i16> @trunc_ssat_v8i32_v8i16(<8 x i32> %a0) {
1817; SSE-LABEL: trunc_ssat_v8i32_v8i16:
1818; SSE:       # %bb.0:
1819; SSE-NEXT:    packssdw %xmm1, %xmm0
1820; SSE-NEXT:    retq
1821;
1822; AVX1-LABEL: trunc_ssat_v8i32_v8i16:
1823; AVX1:       # %bb.0:
1824; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1825; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
1826; AVX1-NEXT:    vzeroupper
1827; AVX1-NEXT:    retq
1828;
1829; AVX2-LABEL: trunc_ssat_v8i32_v8i16:
1830; AVX2:       # %bb.0:
1831; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1832; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
1833; AVX2-NEXT:    vzeroupper
1834; AVX2-NEXT:    retq
1835;
1836; AVX512F-LABEL: trunc_ssat_v8i32_v8i16:
1837; AVX512F:       # %bb.0:
1838; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
1839; AVX512F-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
1840; AVX512F-NEXT:    vzeroupper
1841; AVX512F-NEXT:    retq
1842;
1843; AVX512VL-LABEL: trunc_ssat_v8i32_v8i16:
1844; AVX512VL:       # %bb.0:
1845; AVX512VL-NEXT:    vpmovsdw %ymm0, %xmm0
1846; AVX512VL-NEXT:    vzeroupper
1847; AVX512VL-NEXT:    retq
1848;
1849; AVX512BW-LABEL: trunc_ssat_v8i32_v8i16:
1850; AVX512BW:       # %bb.0:
1851; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm1
1852; AVX512BW-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
1853; AVX512BW-NEXT:    vzeroupper
1854; AVX512BW-NEXT:    retq
1855;
1856; AVX512BWVL-LABEL: trunc_ssat_v8i32_v8i16:
1857; AVX512BWVL:       # %bb.0:
1858; AVX512BWVL-NEXT:    vpmovsdw %ymm0, %xmm0
1859; AVX512BWVL-NEXT:    vzeroupper
1860; AVX512BWVL-NEXT:    retq
1861;
1862; SKX-LABEL: trunc_ssat_v8i32_v8i16:
1863; SKX:       # %bb.0:
1864; SKX-NEXT:    vpmovsdw %ymm0, %xmm0
1865; SKX-NEXT:    vzeroupper
1866; SKX-NEXT:    retq
1867  %1 = icmp slt <8 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
1868  %2 = select <8 x i1> %1, <8 x i32> %a0, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
1869  %3 = icmp sgt <8 x i32> %2, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
1870  %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
1871  %5 = trunc <8 x i32> %4 to <8 x i16>
1872  ret <8 x i16> %5
1873}
1874
1875define <16 x i16> @trunc_ssat_v16i32_v16i16(ptr %p0) "min-legal-vector-width"="256" {
1876; SSE-LABEL: trunc_ssat_v16i32_v16i16:
1877; SSE:       # %bb.0:
1878; SSE-NEXT:    movdqa (%rdi), %xmm0
1879; SSE-NEXT:    movdqa 32(%rdi), %xmm1
1880; SSE-NEXT:    packssdw 16(%rdi), %xmm0
1881; SSE-NEXT:    packssdw 48(%rdi), %xmm1
1882; SSE-NEXT:    retq
1883;
1884; AVX1-LABEL: trunc_ssat_v16i32_v16i16:
1885; AVX1:       # %bb.0:
1886; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
1887; AVX1-NEXT:    vmovdqa 32(%rdi), %xmm1
1888; AVX1-NEXT:    vpackssdw 48(%rdi), %xmm1, %xmm1
1889; AVX1-NEXT:    vpackssdw 16(%rdi), %xmm0, %xmm0
1890; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1891; AVX1-NEXT:    retq
1892;
1893; AVX2-LABEL: trunc_ssat_v16i32_v16i16:
1894; AVX2:       # %bb.0:
1895; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
1896; AVX2-NEXT:    vpackssdw 32(%rdi), %ymm0, %ymm0
1897; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1898; AVX2-NEXT:    retq
1899;
1900; AVX512-LABEL: trunc_ssat_v16i32_v16i16:
1901; AVX512:       # %bb.0:
1902; AVX512-NEXT:    vmovdqa64 (%rdi), %zmm0
1903; AVX512-NEXT:    vpmovsdw %zmm0, %ymm0
1904; AVX512-NEXT:    retq
1905;
1906; SKX-LABEL: trunc_ssat_v16i32_v16i16:
1907; SKX:       # %bb.0:
1908; SKX-NEXT:    vmovdqa (%rdi), %ymm0
1909; SKX-NEXT:    vpackssdw 32(%rdi), %ymm0, %ymm0
1910; SKX-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1911; SKX-NEXT:    retq
1912  %a0 = load <16 x i32>, ptr %p0
1913  %1 = icmp slt <16 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
1914  %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
1915  %3 = icmp sgt <16 x i32> %2, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
1916  %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
1917  %5 = trunc <16 x i32> %4 to <16 x i16>
1918  ret <16 x i16> %5
1919}
1920
1921;
1922; Signed saturation truncation to vXi8
1923;
1924
1925define <2 x i8> @trunc_ssat_v2i64_v2i8(<2 x i64> %a0) {
1926; SSE2-LABEL: trunc_ssat_v2i64_v2i8:
1927; SSE2:       # %bb.0:
1928; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
1929; SSE2-NEXT:    movdqa %xmm0, %xmm2
1930; SSE2-NEXT:    pxor %xmm1, %xmm2
1931; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
1932; SSE2-NEXT:    pxor %xmm4, %xmm4
1933; SSE2-NEXT:    pcmpeqd %xmm3, %xmm4
1934; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [2147483775,2147483775]
1935; SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
1936; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
1937; SSE2-NEXT:    pand %xmm4, %xmm2
1938; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1939; SSE2-NEXT:    por %xmm2, %xmm3
1940; SSE2-NEXT:    pand %xmm3, %xmm0
1941; SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
1942; SSE2-NEXT:    por %xmm3, %xmm0
1943; SSE2-NEXT:    pxor %xmm0, %xmm1
1944; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1945; SSE2-NEXT:    pcmpeqd %xmm3, %xmm3
1946; SSE2-NEXT:    pcmpeqd %xmm2, %xmm3
1947; SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1948; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2]
1949; SSE2-NEXT:    pand %xmm3, %xmm2
1950; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1951; SSE2-NEXT:    por %xmm2, %xmm1
1952; SSE2-NEXT:    pand %xmm1, %xmm0
1953; SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1954; SSE2-NEXT:    por %xmm1, %xmm0
1955; SSE2-NEXT:    packssdw %xmm0, %xmm0
1956; SSE2-NEXT:    packssdw %xmm0, %xmm0
1957; SSE2-NEXT:    packsswb %xmm0, %xmm0
1958; SSE2-NEXT:    retq
1959;
1960; SSSE3-LABEL: trunc_ssat_v2i64_v2i8:
1961; SSSE3:       # %bb.0:
1962; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
1963; SSSE3-NEXT:    movdqa %xmm0, %xmm2
1964; SSSE3-NEXT:    pxor %xmm1, %xmm2
1965; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
1966; SSSE3-NEXT:    pxor %xmm4, %xmm4
1967; SSSE3-NEXT:    pcmpeqd %xmm3, %xmm4
1968; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [2147483775,2147483775]
1969; SSSE3-NEXT:    pcmpgtd %xmm2, %xmm3
1970; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
1971; SSSE3-NEXT:    pand %xmm4, %xmm2
1972; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1973; SSSE3-NEXT:    por %xmm2, %xmm3
1974; SSSE3-NEXT:    pand %xmm3, %xmm0
1975; SSSE3-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
1976; SSSE3-NEXT:    por %xmm3, %xmm0
1977; SSSE3-NEXT:    pxor %xmm0, %xmm1
1978; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1979; SSSE3-NEXT:    pcmpeqd %xmm3, %xmm3
1980; SSSE3-NEXT:    pcmpeqd %xmm2, %xmm3
1981; SSSE3-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1982; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2]
1983; SSSE3-NEXT:    pand %xmm3, %xmm2
1984; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1985; SSSE3-NEXT:    por %xmm2, %xmm1
1986; SSSE3-NEXT:    pand %xmm1, %xmm0
1987; SSSE3-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1988; SSSE3-NEXT:    por %xmm1, %xmm0
1989; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
1990; SSSE3-NEXT:    retq
1991;
1992; SSE41-LABEL: trunc_ssat_v2i64_v2i8:
1993; SSE41:       # %bb.0:
1994; SSE41-NEXT:    movdqa %xmm0, %xmm1
1995; SSE41-NEXT:    movapd {{.*#+}} xmm2 = [127,127]
1996; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648]
1997; SSE41-NEXT:    pxor %xmm3, %xmm0
1998; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm4 = [2147483775,2147483775]
1999; SSE41-NEXT:    movdqa %xmm0, %xmm5
2000; SSE41-NEXT:    pcmpeqd %xmm4, %xmm5
2001; SSE41-NEXT:    pcmpgtd %xmm0, %xmm4
2002; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2]
2003; SSE41-NEXT:    pand %xmm5, %xmm0
2004; SSE41-NEXT:    por %xmm4, %xmm0
2005; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
2006; SSE41-NEXT:    movapd {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488]
2007; SSE41-NEXT:    pxor %xmm2, %xmm3
2008; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [18446744071562067840,18446744071562067840]
2009; SSE41-NEXT:    movdqa %xmm3, %xmm4
2010; SSE41-NEXT:    pcmpeqd %xmm0, %xmm4
2011; SSE41-NEXT:    pcmpgtd %xmm0, %xmm3
2012; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
2013; SSE41-NEXT:    pand %xmm4, %xmm0
2014; SSE41-NEXT:    por %xmm3, %xmm0
2015; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
2016; SSE41-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
2017; SSE41-NEXT:    movdqa %xmm1, %xmm0
2018; SSE41-NEXT:    retq
2019;
2020; AVX-LABEL: trunc_ssat_v2i64_v2i8:
2021; AVX:       # %bb.0:
2022; AVX-NEXT:    vpmovsxbq {{.*#+}} xmm1 = [127,127]
2023; AVX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
2024; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
2025; AVX-NEXT:    vpmovsxbq {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488]
2026; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
2027; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
2028; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
2029; AVX-NEXT:    retq
2030;
2031; AVX512F-LABEL: trunc_ssat_v2i64_v2i8:
2032; AVX512F:       # %bb.0:
2033; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2034; AVX512F-NEXT:    vpmovsqb %zmm0, %xmm0
2035; AVX512F-NEXT:    vzeroupper
2036; AVX512F-NEXT:    retq
2037;
2038; AVX512VL-LABEL: trunc_ssat_v2i64_v2i8:
2039; AVX512VL:       # %bb.0:
2040; AVX512VL-NEXT:    vpmovsqb %xmm0, %xmm0
2041; AVX512VL-NEXT:    retq
2042;
2043; AVX512BW-LABEL: trunc_ssat_v2i64_v2i8:
2044; AVX512BW:       # %bb.0:
2045; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2046; AVX512BW-NEXT:    vpmovsqb %zmm0, %xmm0
2047; AVX512BW-NEXT:    vzeroupper
2048; AVX512BW-NEXT:    retq
2049;
2050; AVX512BWVL-LABEL: trunc_ssat_v2i64_v2i8:
2051; AVX512BWVL:       # %bb.0:
2052; AVX512BWVL-NEXT:    vpmovsqb %xmm0, %xmm0
2053; AVX512BWVL-NEXT:    retq
2054;
2055; SKX-LABEL: trunc_ssat_v2i64_v2i8:
2056; SKX:       # %bb.0:
2057; SKX-NEXT:    vpmovsqb %xmm0, %xmm0
2058; SKX-NEXT:    retq
2059  %1 = icmp slt <2 x i64> %a0, <i64 127, i64 127>
2060  %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 127, i64 127>
2061  %3 = icmp sgt <2 x i64> %2, <i64 -128, i64 -128>
2062  %4 = select <2 x i1> %3, <2 x i64> %2, <2 x i64> <i64 -128, i64 -128>
2063  %5 = trunc <2 x i64> %4 to <2 x i8>
2064  ret <2 x i8> %5
2065}
2066
2067define void @trunc_ssat_v2i64_v2i8_store(<2 x i64> %a0, ptr%p1) {
2068; SSE2-LABEL: trunc_ssat_v2i64_v2i8_store:
2069; SSE2:       # %bb.0:
2070; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
2071; SSE2-NEXT:    movdqa %xmm0, %xmm2
2072; SSE2-NEXT:    pxor %xmm1, %xmm2
2073; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
2074; SSE2-NEXT:    pxor %xmm4, %xmm4
2075; SSE2-NEXT:    pcmpeqd %xmm3, %xmm4
2076; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [2147483775,2147483775]
2077; SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
2078; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
2079; SSE2-NEXT:    pand %xmm4, %xmm2
2080; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
2081; SSE2-NEXT:    por %xmm2, %xmm3
2082; SSE2-NEXT:    pand %xmm3, %xmm0
2083; SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
2084; SSE2-NEXT:    por %xmm0, %xmm3
2085; SSE2-NEXT:    pxor %xmm3, %xmm1
2086; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
2087; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
2088; SSE2-NEXT:    pcmpeqd %xmm0, %xmm2
2089; SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2090; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2]
2091; SSE2-NEXT:    pand %xmm2, %xmm0
2092; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2093; SSE2-NEXT:    por %xmm0, %xmm1
2094; SSE2-NEXT:    pand %xmm1, %xmm3
2095; SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2096; SSE2-NEXT:    por %xmm3, %xmm1
2097; SSE2-NEXT:    packssdw %xmm1, %xmm1
2098; SSE2-NEXT:    packssdw %xmm1, %xmm1
2099; SSE2-NEXT:    packsswb %xmm1, %xmm1
2100; SSE2-NEXT:    movd %xmm1, %eax
2101; SSE2-NEXT:    movw %ax, (%rdi)
2102; SSE2-NEXT:    retq
2103;
2104; SSSE3-LABEL: trunc_ssat_v2i64_v2i8_store:
2105; SSSE3:       # %bb.0:
2106; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
2107; SSSE3-NEXT:    movdqa %xmm0, %xmm2
2108; SSSE3-NEXT:    pxor %xmm1, %xmm2
2109; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
2110; SSSE3-NEXT:    pxor %xmm4, %xmm4
2111; SSSE3-NEXT:    pcmpeqd %xmm3, %xmm4
2112; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [2147483775,2147483775]
2113; SSSE3-NEXT:    pcmpgtd %xmm2, %xmm3
2114; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
2115; SSSE3-NEXT:    pand %xmm4, %xmm2
2116; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
2117; SSSE3-NEXT:    por %xmm2, %xmm3
2118; SSSE3-NEXT:    pand %xmm3, %xmm0
2119; SSSE3-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
2120; SSSE3-NEXT:    por %xmm0, %xmm3
2121; SSSE3-NEXT:    pxor %xmm3, %xmm1
2122; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
2123; SSSE3-NEXT:    pcmpeqd %xmm2, %xmm2
2124; SSSE3-NEXT:    pcmpeqd %xmm0, %xmm2
2125; SSSE3-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2126; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2]
2127; SSSE3-NEXT:    pand %xmm2, %xmm0
2128; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2129; SSSE3-NEXT:    por %xmm0, %xmm1
2130; SSSE3-NEXT:    pand %xmm1, %xmm3
2131; SSSE3-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2132; SSSE3-NEXT:    por %xmm3, %xmm1
2133; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
2134; SSSE3-NEXT:    movd %xmm1, %eax
2135; SSSE3-NEXT:    movw %ax, (%rdi)
2136; SSSE3-NEXT:    retq
2137;
2138; SSE41-LABEL: trunc_ssat_v2i64_v2i8_store:
2139; SSE41:       # %bb.0:
2140; SSE41-NEXT:    movdqa %xmm0, %xmm1
2141; SSE41-NEXT:    movapd {{.*#+}} xmm2 = [127,127]
2142; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648]
2143; SSE41-NEXT:    pxor %xmm3, %xmm0
2144; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm4 = [2147483775,2147483775]
2145; SSE41-NEXT:    movdqa %xmm0, %xmm5
2146; SSE41-NEXT:    pcmpeqd %xmm4, %xmm5
2147; SSE41-NEXT:    pcmpgtd %xmm0, %xmm4
2148; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2]
2149; SSE41-NEXT:    pand %xmm5, %xmm0
2150; SSE41-NEXT:    por %xmm4, %xmm0
2151; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
2152; SSE41-NEXT:    movapd {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488]
2153; SSE41-NEXT:    pxor %xmm2, %xmm3
2154; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [18446744071562067840,18446744071562067840]
2155; SSE41-NEXT:    movdqa %xmm3, %xmm4
2156; SSE41-NEXT:    pcmpeqd %xmm0, %xmm4
2157; SSE41-NEXT:    pcmpgtd %xmm0, %xmm3
2158; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
2159; SSE41-NEXT:    pand %xmm4, %xmm0
2160; SSE41-NEXT:    por %xmm3, %xmm0
2161; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
2162; SSE41-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
2163; SSE41-NEXT:    pextrw $0, %xmm1, (%rdi)
2164; SSE41-NEXT:    retq
2165;
2166; AVX-LABEL: trunc_ssat_v2i64_v2i8_store:
2167; AVX:       # %bb.0:
2168; AVX-NEXT:    vpmovsxbq {{.*#+}} xmm1 = [127,127]
2169; AVX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
2170; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
2171; AVX-NEXT:    vpmovsxbq {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488]
2172; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
2173; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
2174; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
2175; AVX-NEXT:    vpextrw $0, %xmm0, (%rdi)
2176; AVX-NEXT:    retq
2177;
2178; AVX512F-LABEL: trunc_ssat_v2i64_v2i8_store:
2179; AVX512F:       # %bb.0:
2180; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2181; AVX512F-NEXT:    vpmovsqb %zmm0, %xmm0
2182; AVX512F-NEXT:    vpextrw $0, %xmm0, (%rdi)
2183; AVX512F-NEXT:    vzeroupper
2184; AVX512F-NEXT:    retq
2185;
2186; AVX512VL-LABEL: trunc_ssat_v2i64_v2i8_store:
2187; AVX512VL:       # %bb.0:
2188; AVX512VL-NEXT:    vpmovsqb %xmm0, (%rdi)
2189; AVX512VL-NEXT:    retq
2190;
2191; AVX512BW-LABEL: trunc_ssat_v2i64_v2i8_store:
2192; AVX512BW:       # %bb.0:
2193; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2194; AVX512BW-NEXT:    vpmovsqb %zmm0, %xmm0
2195; AVX512BW-NEXT:    vpextrw $0, %xmm0, (%rdi)
2196; AVX512BW-NEXT:    vzeroupper
2197; AVX512BW-NEXT:    retq
2198;
2199; AVX512BWVL-LABEL: trunc_ssat_v2i64_v2i8_store:
2200; AVX512BWVL:       # %bb.0:
2201; AVX512BWVL-NEXT:    vpmovsqb %xmm0, (%rdi)
2202; AVX512BWVL-NEXT:    retq
2203;
2204; SKX-LABEL: trunc_ssat_v2i64_v2i8_store:
2205; SKX:       # %bb.0:
2206; SKX-NEXT:    vpmovsqb %xmm0, (%rdi)
2207; SKX-NEXT:    retq
2208  %1 = icmp slt <2 x i64> %a0, <i64 127, i64 127>
2209  %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 127, i64 127>
2210  %3 = icmp sgt <2 x i64> %2, <i64 -128, i64 -128>
2211  %4 = select <2 x i1> %3, <2 x i64> %2, <2 x i64> <i64 -128, i64 -128>
2212  %5 = trunc <2 x i64> %4 to <2 x i8>
2213  store <2 x i8> %5, ptr%p1
2214  ret void
2215}
2216
2217define <4 x i8> @trunc_ssat_v4i64_v4i8(<4 x i64> %a0) {
2218; SSE2-SSSE3-LABEL: trunc_ssat_v4i64_v4i8:
2219; SSE2-SSSE3:       # %bb.0:
2220; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [127,127]
2221; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
2222; SSE2-SSSE3-NEXT:    movdqa %xmm0, %xmm4
2223; SSE2-SSSE3-NEXT:    pxor %xmm2, %xmm4
2224; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3]
2225; SSE2-SSSE3-NEXT:    pxor %xmm6, %xmm6
2226; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm5
2227; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm7 = [2147483775,2147483775]
2228; SSE2-SSSE3-NEXT:    movdqa %xmm7, %xmm8
2229; SSE2-SSSE3-NEXT:    pcmpgtd %xmm4, %xmm8
2230; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm8[0,0,2,2]
2231; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm4
2232; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm8[1,1,3,3]
2233; SSE2-SSSE3-NEXT:    por %xmm4, %xmm5
2234; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm0
2235; SSE2-SSSE3-NEXT:    pandn %xmm3, %xmm5
2236; SSE2-SSSE3-NEXT:    por %xmm5, %xmm0
2237; SSE2-SSSE3-NEXT:    movdqa %xmm1, %xmm4
2238; SSE2-SSSE3-NEXT:    pxor %xmm2, %xmm4
2239; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3]
2240; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm5
2241; SSE2-SSSE3-NEXT:    pcmpgtd %xmm4, %xmm7
2242; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
2243; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm4
2244; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
2245; SSE2-SSSE3-NEXT:    por %xmm4, %xmm5
2246; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm1
2247; SSE2-SSSE3-NEXT:    pandn %xmm3, %xmm5
2248; SSE2-SSSE3-NEXT:    por %xmm1, %xmm5
2249; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488]
2250; SSE2-SSSE3-NEXT:    movdqa %xmm5, %xmm3
2251; SSE2-SSSE3-NEXT:    pxor %xmm2, %xmm3
2252; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
2253; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm6
2254; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm4
2255; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm7 = [18446744071562067840,18446744071562067840]
2256; SSE2-SSSE3-NEXT:    pcmpgtd %xmm7, %xmm3
2257; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm8 = xmm3[0,0,2,2]
2258; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm8
2259; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
2260; SSE2-SSSE3-NEXT:    por %xmm8, %xmm3
2261; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm5
2262; SSE2-SSSE3-NEXT:    pandn %xmm1, %xmm3
2263; SSE2-SSSE3-NEXT:    por %xmm5, %xmm3
2264; SSE2-SSSE3-NEXT:    pxor %xmm0, %xmm2
2265; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
2266; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm4
2267; SSE2-SSSE3-NEXT:    pcmpgtd %xmm7, %xmm2
2268; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm2[0,0,2,2]
2269; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm5
2270; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2271; SSE2-SSSE3-NEXT:    por %xmm5, %xmm2
2272; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm0
2273; SSE2-SSSE3-NEXT:    pandn %xmm1, %xmm2
2274; SSE2-SSSE3-NEXT:    por %xmm2, %xmm0
2275; SSE2-SSSE3-NEXT:    packssdw %xmm3, %xmm0
2276; SSE2-SSSE3-NEXT:    packssdw %xmm0, %xmm0
2277; SSE2-SSSE3-NEXT:    packsswb %xmm0, %xmm0
2278; SSE2-SSSE3-NEXT:    retq
2279;
2280; SSE41-LABEL: trunc_ssat_v4i64_v4i8:
2281; SSE41:       # %bb.0:
2282; SSE41-NEXT:    movdqa %xmm0, %xmm2
2283; SSE41-NEXT:    movapd {{.*#+}} xmm4 = [127,127]
2284; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648]
2285; SSE41-NEXT:    movdqa %xmm0, %xmm5
2286; SSE41-NEXT:    pxor %xmm3, %xmm5
2287; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm6 = [2147483775,2147483775]
2288; SSE41-NEXT:    movdqa %xmm6, %xmm7
2289; SSE41-NEXT:    pcmpgtd %xmm5, %xmm7
2290; SSE41-NEXT:    pcmpeqd %xmm6, %xmm5
2291; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2]
2292; SSE41-NEXT:    pand %xmm5, %xmm0
2293; SSE41-NEXT:    por %xmm7, %xmm0
2294; SSE41-NEXT:    movapd %xmm4, %xmm5
2295; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm5
2296; SSE41-NEXT:    movdqa %xmm1, %xmm0
2297; SSE41-NEXT:    pxor %xmm3, %xmm0
2298; SSE41-NEXT:    movdqa %xmm0, %xmm2
2299; SSE41-NEXT:    pcmpeqd %xmm6, %xmm2
2300; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
2301; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
2302; SSE41-NEXT:    pand %xmm2, %xmm0
2303; SSE41-NEXT:    por %xmm6, %xmm0
2304; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm4
2305; SSE41-NEXT:    movapd {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488]
2306; SSE41-NEXT:    movapd %xmm4, %xmm2
2307; SSE41-NEXT:    xorpd %xmm3, %xmm2
2308; SSE41-NEXT:    movdqa {{.*#+}} xmm6 = [18446744071562067840,18446744071562067840]
2309; SSE41-NEXT:    movapd %xmm2, %xmm7
2310; SSE41-NEXT:    pcmpeqd %xmm6, %xmm7
2311; SSE41-NEXT:    pcmpgtd %xmm6, %xmm2
2312; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
2313; SSE41-NEXT:    pand %xmm7, %xmm0
2314; SSE41-NEXT:    por %xmm2, %xmm0
2315; SSE41-NEXT:    movapd %xmm1, %xmm2
2316; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
2317; SSE41-NEXT:    xorpd %xmm5, %xmm3
2318; SSE41-NEXT:    movapd %xmm3, %xmm4
2319; SSE41-NEXT:    pcmpeqd %xmm6, %xmm4
2320; SSE41-NEXT:    pcmpgtd %xmm6, %xmm3
2321; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
2322; SSE41-NEXT:    pand %xmm4, %xmm0
2323; SSE41-NEXT:    por %xmm3, %xmm0
2324; SSE41-NEXT:    blendvpd %xmm0, %xmm5, %xmm1
2325; SSE41-NEXT:    packssdw %xmm2, %xmm1
2326; SSE41-NEXT:    packssdw %xmm1, %xmm1
2327; SSE41-NEXT:    packsswb %xmm1, %xmm1
2328; SSE41-NEXT:    movdqa %xmm1, %xmm0
2329; SSE41-NEXT:    retq
2330;
2331; AVX1-LABEL: trunc_ssat_v4i64_v4i8:
2332; AVX1:       # %bb.0:
2333; AVX1-NEXT:    vpmovsxbq {{.*#+}} xmm1 = [127,127]
2334; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
2335; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm2
2336; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
2337; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm3
2338; AVX1-NEXT:    vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
2339; AVX1-NEXT:    vpmovsxbq {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488]
2340; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm3
2341; AVX1-NEXT:    vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
2342; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm2, %xmm3
2343; AVX1-NEXT:    vblendvpd %xmm3, %xmm2, %xmm1, %xmm1
2344; AVX1-NEXT:    vpackssdw %xmm0, %xmm1, %xmm0
2345; AVX1-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
2346; AVX1-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
2347; AVX1-NEXT:    vzeroupper
2348; AVX1-NEXT:    retq
2349;
2350; AVX2-LABEL: trunc_ssat_v4i64_v4i8:
2351; AVX2:       # %bb.0:
2352; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [127,127,127,127]
2353; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
2354; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
2355; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488]
2356; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
2357; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
2358; AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm1
2359; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
2360; AVX2-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
2361; AVX2-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
2362; AVX2-NEXT:    vzeroupper
2363; AVX2-NEXT:    retq
2364;
2365; AVX512F-LABEL: trunc_ssat_v4i64_v4i8:
2366; AVX512F:       # %bb.0:
2367; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2368; AVX512F-NEXT:    vpmovsqb %zmm0, %xmm0
2369; AVX512F-NEXT:    vzeroupper
2370; AVX512F-NEXT:    retq
2371;
2372; AVX512VL-LABEL: trunc_ssat_v4i64_v4i8:
2373; AVX512VL:       # %bb.0:
2374; AVX512VL-NEXT:    vpmovsqb %ymm0, %xmm0
2375; AVX512VL-NEXT:    vzeroupper
2376; AVX512VL-NEXT:    retq
2377;
2378; AVX512BW-LABEL: trunc_ssat_v4i64_v4i8:
2379; AVX512BW:       # %bb.0:
2380; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2381; AVX512BW-NEXT:    vpmovsqb %zmm0, %xmm0
2382; AVX512BW-NEXT:    vzeroupper
2383; AVX512BW-NEXT:    retq
2384;
2385; AVX512BWVL-LABEL: trunc_ssat_v4i64_v4i8:
2386; AVX512BWVL:       # %bb.0:
2387; AVX512BWVL-NEXT:    vpmovsqb %ymm0, %xmm0
2388; AVX512BWVL-NEXT:    vzeroupper
2389; AVX512BWVL-NEXT:    retq
2390;
2391; SKX-LABEL: trunc_ssat_v4i64_v4i8:
2392; SKX:       # %bb.0:
2393; SKX-NEXT:    vpmovsqb %ymm0, %xmm0
2394; SKX-NEXT:    vzeroupper
2395; SKX-NEXT:    retq
2396  %1 = icmp slt <4 x i64> %a0, <i64 127, i64 127, i64 127, i64 127>
2397  %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> <i64 127, i64 127, i64 127, i64 127>
2398  %3 = icmp sgt <4 x i64> %2, <i64 -128, i64 -128, i64 -128, i64 -128>
2399  %4 = select <4 x i1> %3, <4 x i64> %2, <4 x i64> <i64 -128, i64 -128, i64 -128, i64 -128>
2400  %5 = trunc <4 x i64> %4 to <4 x i8>
2401  ret <4 x i8> %5
2402}
2403
2404define void @trunc_ssat_v4i64_v4i8_store(<4 x i64> %a0, ptr%p1) {
2405; SSE2-SSSE3-LABEL: trunc_ssat_v4i64_v4i8_store:
2406; SSE2-SSSE3:       # %bb.0:
2407; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [127,127]
2408; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
2409; SSE2-SSSE3-NEXT:    movdqa %xmm0, %xmm3
2410; SSE2-SSSE3-NEXT:    pxor %xmm2, %xmm3
2411; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm3[1,1,3,3]
2412; SSE2-SSSE3-NEXT:    pxor %xmm6, %xmm6
2413; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm5
2414; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm7 = [2147483775,2147483775]
2415; SSE2-SSSE3-NEXT:    movdqa %xmm7, %xmm8
2416; SSE2-SSSE3-NEXT:    pcmpgtd %xmm3, %xmm8
2417; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm9 = xmm8[0,0,2,2]
2418; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm9
2419; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm8[1,1,3,3]
2420; SSE2-SSSE3-NEXT:    por %xmm9, %xmm3
2421; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm0
2422; SSE2-SSSE3-NEXT:    pandn %xmm4, %xmm3
2423; SSE2-SSSE3-NEXT:    por %xmm0, %xmm3
2424; SSE2-SSSE3-NEXT:    movdqa %xmm1, %xmm0
2425; SSE2-SSSE3-NEXT:    pxor %xmm2, %xmm0
2426; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
2427; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm5
2428; SSE2-SSSE3-NEXT:    pcmpgtd %xmm0, %xmm7
2429; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2]
2430; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm0
2431; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
2432; SSE2-SSSE3-NEXT:    por %xmm0, %xmm5
2433; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm1
2434; SSE2-SSSE3-NEXT:    pandn %xmm4, %xmm5
2435; SSE2-SSSE3-NEXT:    por %xmm1, %xmm5
2436; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [18446744073709551488,18446744073709551488]
2437; SSE2-SSSE3-NEXT:    movdqa %xmm5, %xmm1
2438; SSE2-SSSE3-NEXT:    pxor %xmm2, %xmm1
2439; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3]
2440; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm6
2441; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm4
2442; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm7 = [18446744071562067840,18446744071562067840]
2443; SSE2-SSSE3-NEXT:    pcmpgtd %xmm7, %xmm1
2444; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm8 = xmm1[0,0,2,2]
2445; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm8
2446; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2447; SSE2-SSSE3-NEXT:    por %xmm8, %xmm1
2448; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm5
2449; SSE2-SSSE3-NEXT:    pandn %xmm0, %xmm1
2450; SSE2-SSSE3-NEXT:    por %xmm5, %xmm1
2451; SSE2-SSSE3-NEXT:    pxor %xmm3, %xmm2
2452; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
2453; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm4
2454; SSE2-SSSE3-NEXT:    pcmpgtd %xmm7, %xmm2
2455; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm2[0,0,2,2]
2456; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm5
2457; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2458; SSE2-SSSE3-NEXT:    por %xmm5, %xmm2
2459; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm3
2460; SSE2-SSSE3-NEXT:    pandn %xmm0, %xmm2
2461; SSE2-SSSE3-NEXT:    por %xmm3, %xmm2
2462; SSE2-SSSE3-NEXT:    packssdw %xmm1, %xmm2
2463; SSE2-SSSE3-NEXT:    packssdw %xmm2, %xmm2
2464; SSE2-SSSE3-NEXT:    packsswb %xmm2, %xmm2
2465; SSE2-SSSE3-NEXT:    movd %xmm2, (%rdi)
2466; SSE2-SSSE3-NEXT:    retq
2467;
2468; SSE41-LABEL: trunc_ssat_v4i64_v4i8_store:
2469; SSE41:       # %bb.0:
2470; SSE41-NEXT:    movdqa %xmm0, %xmm2
2471; SSE41-NEXT:    movapd {{.*#+}} xmm4 = [127,127]
2472; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648]
2473; SSE41-NEXT:    movdqa %xmm0, %xmm5
2474; SSE41-NEXT:    pxor %xmm3, %xmm5
2475; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm6 = [2147483775,2147483775]
2476; SSE41-NEXT:    movdqa %xmm6, %xmm7
2477; SSE41-NEXT:    pcmpgtd %xmm5, %xmm7
2478; SSE41-NEXT:    pcmpeqd %xmm6, %xmm5
2479; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2]
2480; SSE41-NEXT:    pand %xmm5, %xmm0
2481; SSE41-NEXT:    por %xmm7, %xmm0
2482; SSE41-NEXT:    movapd %xmm4, %xmm5
2483; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm5
2484; SSE41-NEXT:    movdqa %xmm1, %xmm0
2485; SSE41-NEXT:    pxor %xmm3, %xmm0
2486; SSE41-NEXT:    movdqa %xmm0, %xmm2
2487; SSE41-NEXT:    pcmpeqd %xmm6, %xmm2
2488; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
2489; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
2490; SSE41-NEXT:    pand %xmm2, %xmm0
2491; SSE41-NEXT:    por %xmm6, %xmm0
2492; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm4
2493; SSE41-NEXT:    movapd {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488]
2494; SSE41-NEXT:    movapd %xmm4, %xmm2
2495; SSE41-NEXT:    xorpd %xmm3, %xmm2
2496; SSE41-NEXT:    movdqa {{.*#+}} xmm6 = [18446744071562067840,18446744071562067840]
2497; SSE41-NEXT:    movapd %xmm2, %xmm7
2498; SSE41-NEXT:    pcmpeqd %xmm6, %xmm7
2499; SSE41-NEXT:    pcmpgtd %xmm6, %xmm2
2500; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
2501; SSE41-NEXT:    pand %xmm7, %xmm0
2502; SSE41-NEXT:    por %xmm2, %xmm0
2503; SSE41-NEXT:    movapd %xmm1, %xmm2
2504; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
2505; SSE41-NEXT:    xorpd %xmm5, %xmm3
2506; SSE41-NEXT:    movapd %xmm3, %xmm4
2507; SSE41-NEXT:    pcmpeqd %xmm6, %xmm4
2508; SSE41-NEXT:    pcmpgtd %xmm6, %xmm3
2509; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
2510; SSE41-NEXT:    pand %xmm4, %xmm0
2511; SSE41-NEXT:    por %xmm3, %xmm0
2512; SSE41-NEXT:    blendvpd %xmm0, %xmm5, %xmm1
2513; SSE41-NEXT:    packssdw %xmm2, %xmm1
2514; SSE41-NEXT:    packssdw %xmm1, %xmm1
2515; SSE41-NEXT:    packsswb %xmm1, %xmm1
2516; SSE41-NEXT:    movd %xmm1, (%rdi)
2517; SSE41-NEXT:    retq
2518;
2519; AVX1-LABEL: trunc_ssat_v4i64_v4i8_store:
2520; AVX1:       # %bb.0:
2521; AVX1-NEXT:    vpmovsxbq {{.*#+}} xmm1 = [127,127]
2522; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
2523; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm2
2524; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
2525; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm3
2526; AVX1-NEXT:    vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
2527; AVX1-NEXT:    vpmovsxbq {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488]
2528; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm3
2529; AVX1-NEXT:    vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
2530; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm2, %xmm3
2531; AVX1-NEXT:    vblendvpd %xmm3, %xmm2, %xmm1, %xmm1
2532; AVX1-NEXT:    vpackssdw %xmm0, %xmm1, %xmm0
2533; AVX1-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
2534; AVX1-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
2535; AVX1-NEXT:    vmovd %xmm0, (%rdi)
2536; AVX1-NEXT:    vzeroupper
2537; AVX1-NEXT:    retq
2538;
2539; AVX2-LABEL: trunc_ssat_v4i64_v4i8_store:
2540; AVX2:       # %bb.0:
2541; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [127,127,127,127]
2542; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
2543; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
2544; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488]
2545; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
2546; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
2547; AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm1
2548; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
2549; AVX2-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
2550; AVX2-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
2551; AVX2-NEXT:    vmovd %xmm0, (%rdi)
2552; AVX2-NEXT:    vzeroupper
2553; AVX2-NEXT:    retq
2554;
2555; AVX512F-LABEL: trunc_ssat_v4i64_v4i8_store:
2556; AVX512F:       # %bb.0:
2557; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2558; AVX512F-NEXT:    vpmovsqb %zmm0, %xmm0
2559; AVX512F-NEXT:    vmovd %xmm0, (%rdi)
2560; AVX512F-NEXT:    vzeroupper
2561; AVX512F-NEXT:    retq
2562;
2563; AVX512VL-LABEL: trunc_ssat_v4i64_v4i8_store:
2564; AVX512VL:       # %bb.0:
2565; AVX512VL-NEXT:    vpmovsqb %ymm0, (%rdi)
2566; AVX512VL-NEXT:    vzeroupper
2567; AVX512VL-NEXT:    retq
2568;
2569; AVX512BW-LABEL: trunc_ssat_v4i64_v4i8_store:
2570; AVX512BW:       # %bb.0:
2571; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
2572; AVX512BW-NEXT:    vpmovsqb %zmm0, %xmm0
2573; AVX512BW-NEXT:    vmovd %xmm0, (%rdi)
2574; AVX512BW-NEXT:    vzeroupper
2575; AVX512BW-NEXT:    retq
2576;
2577; AVX512BWVL-LABEL: trunc_ssat_v4i64_v4i8_store:
2578; AVX512BWVL:       # %bb.0:
2579; AVX512BWVL-NEXT:    vpmovsqb %ymm0, (%rdi)
2580; AVX512BWVL-NEXT:    vzeroupper
2581; AVX512BWVL-NEXT:    retq
2582;
2583; SKX-LABEL: trunc_ssat_v4i64_v4i8_store:
2584; SKX:       # %bb.0:
2585; SKX-NEXT:    vpmovsqb %ymm0, (%rdi)
2586; SKX-NEXT:    vzeroupper
2587; SKX-NEXT:    retq
2588  %1 = icmp slt <4 x i64> %a0, <i64 127, i64 127, i64 127, i64 127>
2589  %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> <i64 127, i64 127, i64 127, i64 127>
2590  %3 = icmp sgt <4 x i64> %2, <i64 -128, i64 -128, i64 -128, i64 -128>
2591  %4 = select <4 x i1> %3, <4 x i64> %2, <4 x i64> <i64 -128, i64 -128, i64 -128, i64 -128>
2592  %5 = trunc <4 x i64> %4 to <4 x i8>
2593  store <4 x i8> %5, ptr%p1
2594  ret void
2595}
2596
2597define <8 x i8> @trunc_ssat_v8i64_v8i8(ptr %p0) "min-legal-vector-width"="256" {
2598; SSE2-SSSE3-LABEL: trunc_ssat_v8i64_v8i8:
2599; SSE2-SSSE3:       # %bb.0:
2600; SSE2-SSSE3-NEXT:    movdqa (%rdi), %xmm6
2601; SSE2-SSSE3-NEXT:    movdqa 16(%rdi), %xmm0
2602; SSE2-SSSE3-NEXT:    movdqa 32(%rdi), %xmm3
2603; SSE2-SSSE3-NEXT:    movdqa 48(%rdi), %xmm5
2604; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [127,127]
2605; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
2606; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm2
2607; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm2
2608; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm9 = xmm2[1,1,3,3]
2609; SSE2-SSSE3-NEXT:    pxor %xmm8, %xmm8
2610; SSE2-SSSE3-NEXT:    pcmpeqd %xmm8, %xmm9
2611; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm7 = [2147483775,2147483775]
2612; SSE2-SSSE3-NEXT:    movdqa %xmm7, %xmm10
2613; SSE2-SSSE3-NEXT:    pcmpgtd %xmm2, %xmm10
2614; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2]
2615; SSE2-SSSE3-NEXT:    pand %xmm9, %xmm11
2616; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm10[1,1,3,3]
2617; SSE2-SSSE3-NEXT:    por %xmm11, %xmm2
2618; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm3
2619; SSE2-SSSE3-NEXT:    pandn %xmm4, %xmm2
2620; SSE2-SSSE3-NEXT:    por %xmm3, %xmm2
2621; SSE2-SSSE3-NEXT:    movdqa %xmm5, %xmm3
2622; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm3
2623; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm9 = xmm3[1,1,3,3]
2624; SSE2-SSSE3-NEXT:    pcmpeqd %xmm8, %xmm9
2625; SSE2-SSSE3-NEXT:    movdqa %xmm7, %xmm10
2626; SSE2-SSSE3-NEXT:    pcmpgtd %xmm3, %xmm10
2627; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2]
2628; SSE2-SSSE3-NEXT:    pand %xmm9, %xmm11
2629; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm10[1,1,3,3]
2630; SSE2-SSSE3-NEXT:    por %xmm11, %xmm3
2631; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm5
2632; SSE2-SSSE3-NEXT:    pandn %xmm4, %xmm3
2633; SSE2-SSSE3-NEXT:    por %xmm5, %xmm3
2634; SSE2-SSSE3-NEXT:    movdqa %xmm6, %xmm5
2635; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm5
2636; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm9 = xmm5[1,1,3,3]
2637; SSE2-SSSE3-NEXT:    pcmpeqd %xmm8, %xmm9
2638; SSE2-SSSE3-NEXT:    movdqa %xmm7, %xmm10
2639; SSE2-SSSE3-NEXT:    pcmpgtd %xmm5, %xmm10
2640; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2]
2641; SSE2-SSSE3-NEXT:    pand %xmm9, %xmm11
2642; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm10[1,1,3,3]
2643; SSE2-SSSE3-NEXT:    por %xmm11, %xmm5
2644; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm6
2645; SSE2-SSSE3-NEXT:    pandn %xmm4, %xmm5
2646; SSE2-SSSE3-NEXT:    por %xmm6, %xmm5
2647; SSE2-SSSE3-NEXT:    movdqa %xmm0, %xmm6
2648; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm6
2649; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm9 = xmm6[1,1,3,3]
2650; SSE2-SSSE3-NEXT:    pcmpeqd %xmm8, %xmm9
2651; SSE2-SSSE3-NEXT:    pcmpgtd %xmm6, %xmm7
2652; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[0,0,2,2]
2653; SSE2-SSSE3-NEXT:    pand %xmm9, %xmm6
2654; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[1,1,3,3]
2655; SSE2-SSSE3-NEXT:    por %xmm6, %xmm8
2656; SSE2-SSSE3-NEXT:    pand %xmm8, %xmm0
2657; SSE2-SSSE3-NEXT:    pandn %xmm4, %xmm8
2658; SSE2-SSSE3-NEXT:    por %xmm0, %xmm8
2659; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [18446744073709551488,18446744073709551488]
2660; SSE2-SSSE3-NEXT:    movdqa %xmm8, %xmm0
2661; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm0
2662; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm9 = xmm0[1,1,3,3]
2663; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm6
2664; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm9
2665; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm7 = [18446744071562067840,18446744071562067840]
2666; SSE2-SSSE3-NEXT:    pcmpgtd %xmm7, %xmm0
2667; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm0[0,0,2,2]
2668; SSE2-SSSE3-NEXT:    pand %xmm9, %xmm10
2669; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm9 = xmm0[1,1,3,3]
2670; SSE2-SSSE3-NEXT:    por %xmm10, %xmm9
2671; SSE2-SSSE3-NEXT:    pand %xmm9, %xmm8
2672; SSE2-SSSE3-NEXT:    pandn %xmm4, %xmm9
2673; SSE2-SSSE3-NEXT:    por %xmm8, %xmm9
2674; SSE2-SSSE3-NEXT:    movdqa %xmm5, %xmm0
2675; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm0
2676; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm8 = xmm0[1,1,3,3]
2677; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm8
2678; SSE2-SSSE3-NEXT:    pcmpgtd %xmm7, %xmm0
2679; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm0[0,0,2,2]
2680; SSE2-SSSE3-NEXT:    pand %xmm8, %xmm10
2681; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2682; SSE2-SSSE3-NEXT:    por %xmm10, %xmm0
2683; SSE2-SSSE3-NEXT:    pand %xmm0, %xmm5
2684; SSE2-SSSE3-NEXT:    pandn %xmm4, %xmm0
2685; SSE2-SSSE3-NEXT:    por %xmm5, %xmm0
2686; SSE2-SSSE3-NEXT:    packssdw %xmm9, %xmm0
2687; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm5
2688; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm5
2689; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm8 = xmm5[1,1,3,3]
2690; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm8
2691; SSE2-SSSE3-NEXT:    pcmpgtd %xmm7, %xmm5
2692; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm9 = xmm5[0,0,2,2]
2693; SSE2-SSSE3-NEXT:    pand %xmm8, %xmm9
2694; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
2695; SSE2-SSSE3-NEXT:    por %xmm9, %xmm5
2696; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm3
2697; SSE2-SSSE3-NEXT:    pandn %xmm4, %xmm5
2698; SSE2-SSSE3-NEXT:    por %xmm3, %xmm5
2699; SSE2-SSSE3-NEXT:    pxor %xmm2, %xmm1
2700; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
2701; SSE2-SSSE3-NEXT:    pcmpeqd %xmm6, %xmm3
2702; SSE2-SSSE3-NEXT:    pcmpgtd %xmm7, %xmm1
2703; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm1[0,0,2,2]
2704; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm6
2705; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2706; SSE2-SSSE3-NEXT:    por %xmm6, %xmm1
2707; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm2
2708; SSE2-SSSE3-NEXT:    pandn %xmm4, %xmm1
2709; SSE2-SSSE3-NEXT:    por %xmm2, %xmm1
2710; SSE2-SSSE3-NEXT:    packssdw %xmm5, %xmm1
2711; SSE2-SSSE3-NEXT:    packssdw %xmm1, %xmm0
2712; SSE2-SSSE3-NEXT:    packsswb %xmm0, %xmm0
2713; SSE2-SSSE3-NEXT:    retq
2714;
2715; SSE41-LABEL: trunc_ssat_v8i64_v8i8:
2716; SSE41:       # %bb.0:
2717; SSE41-NEXT:    movdqa (%rdi), %xmm7
2718; SSE41-NEXT:    movdqa 16(%rdi), %xmm5
2719; SSE41-NEXT:    movdqa 32(%rdi), %xmm4
2720; SSE41-NEXT:    movdqa 48(%rdi), %xmm8
2721; SSE41-NEXT:    movapd {{.*#+}} xmm1 = [127,127]
2722; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm2 = [2147483648,2147483648]
2723; SSE41-NEXT:    movdqa %xmm4, %xmm3
2724; SSE41-NEXT:    pxor %xmm2, %xmm3
2725; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm6 = [2147483775,2147483775]
2726; SSE41-NEXT:    movdqa %xmm6, %xmm9
2727; SSE41-NEXT:    pcmpgtd %xmm3, %xmm9
2728; SSE41-NEXT:    pcmpeqd %xmm6, %xmm3
2729; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2]
2730; SSE41-NEXT:    pand %xmm3, %xmm0
2731; SSE41-NEXT:    por %xmm9, %xmm0
2732; SSE41-NEXT:    movapd %xmm1, %xmm3
2733; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm3
2734; SSE41-NEXT:    movdqa %xmm8, %xmm4
2735; SSE41-NEXT:    pxor %xmm2, %xmm4
2736; SSE41-NEXT:    movdqa %xmm6, %xmm9
2737; SSE41-NEXT:    pcmpgtd %xmm4, %xmm9
2738; SSE41-NEXT:    pcmpeqd %xmm6, %xmm4
2739; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2]
2740; SSE41-NEXT:    pand %xmm4, %xmm0
2741; SSE41-NEXT:    por %xmm9, %xmm0
2742; SSE41-NEXT:    movapd %xmm1, %xmm4
2743; SSE41-NEXT:    blendvpd %xmm0, %xmm8, %xmm4
2744; SSE41-NEXT:    movdqa %xmm7, %xmm8
2745; SSE41-NEXT:    pxor %xmm2, %xmm8
2746; SSE41-NEXT:    movdqa %xmm6, %xmm9
2747; SSE41-NEXT:    pcmpgtd %xmm8, %xmm9
2748; SSE41-NEXT:    pcmpeqd %xmm6, %xmm8
2749; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2]
2750; SSE41-NEXT:    pand %xmm8, %xmm0
2751; SSE41-NEXT:    por %xmm9, %xmm0
2752; SSE41-NEXT:    movapd %xmm1, %xmm8
2753; SSE41-NEXT:    blendvpd %xmm0, %xmm7, %xmm8
2754; SSE41-NEXT:    movdqa %xmm5, %xmm0
2755; SSE41-NEXT:    pxor %xmm2, %xmm0
2756; SSE41-NEXT:    movdqa %xmm0, %xmm7
2757; SSE41-NEXT:    pcmpeqd %xmm6, %xmm7
2758; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
2759; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
2760; SSE41-NEXT:    pand %xmm7, %xmm0
2761; SSE41-NEXT:    por %xmm6, %xmm0
2762; SSE41-NEXT:    blendvpd %xmm0, %xmm5, %xmm1
2763; SSE41-NEXT:    movapd {{.*#+}} xmm5 = [18446744073709551488,18446744073709551488]
2764; SSE41-NEXT:    movapd %xmm1, %xmm7
2765; SSE41-NEXT:    xorpd %xmm2, %xmm7
2766; SSE41-NEXT:    movdqa {{.*#+}} xmm6 = [18446744071562067840,18446744071562067840]
2767; SSE41-NEXT:    movapd %xmm7, %xmm9
2768; SSE41-NEXT:    pcmpeqd %xmm6, %xmm9
2769; SSE41-NEXT:    pcmpgtd %xmm6, %xmm7
2770; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2]
2771; SSE41-NEXT:    pand %xmm9, %xmm0
2772; SSE41-NEXT:    por %xmm7, %xmm0
2773; SSE41-NEXT:    movapd %xmm5, %xmm7
2774; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm7
2775; SSE41-NEXT:    movapd %xmm8, %xmm1
2776; SSE41-NEXT:    xorpd %xmm2, %xmm1
2777; SSE41-NEXT:    movapd %xmm1, %xmm9
2778; SSE41-NEXT:    pcmpeqd %xmm6, %xmm9
2779; SSE41-NEXT:    pcmpgtd %xmm6, %xmm1
2780; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2]
2781; SSE41-NEXT:    pand %xmm9, %xmm0
2782; SSE41-NEXT:    por %xmm1, %xmm0
2783; SSE41-NEXT:    movapd %xmm5, %xmm1
2784; SSE41-NEXT:    blendvpd %xmm0, %xmm8, %xmm1
2785; SSE41-NEXT:    packssdw %xmm7, %xmm1
2786; SSE41-NEXT:    movapd %xmm4, %xmm7
2787; SSE41-NEXT:    xorpd %xmm2, %xmm7
2788; SSE41-NEXT:    movapd %xmm7, %xmm8
2789; SSE41-NEXT:    pcmpeqd %xmm6, %xmm8
2790; SSE41-NEXT:    pcmpgtd %xmm6, %xmm7
2791; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2]
2792; SSE41-NEXT:    pand %xmm8, %xmm0
2793; SSE41-NEXT:    por %xmm7, %xmm0
2794; SSE41-NEXT:    movapd %xmm5, %xmm7
2795; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm7
2796; SSE41-NEXT:    xorpd %xmm3, %xmm2
2797; SSE41-NEXT:    movapd %xmm2, %xmm4
2798; SSE41-NEXT:    pcmpeqd %xmm6, %xmm4
2799; SSE41-NEXT:    pcmpgtd %xmm6, %xmm2
2800; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
2801; SSE41-NEXT:    pand %xmm4, %xmm0
2802; SSE41-NEXT:    por %xmm2, %xmm0
2803; SSE41-NEXT:    blendvpd %xmm0, %xmm3, %xmm5
2804; SSE41-NEXT:    packssdw %xmm7, %xmm5
2805; SSE41-NEXT:    packssdw %xmm5, %xmm1
2806; SSE41-NEXT:    packsswb %xmm1, %xmm1
2807; SSE41-NEXT:    movdqa %xmm1, %xmm0
2808; SSE41-NEXT:    retq
2809;
2810; AVX1-LABEL: trunc_ssat_v8i64_v8i8:
2811; AVX1:       # %bb.0:
2812; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
2813; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm1
2814; AVX1-NEXT:    vmovdqa 32(%rdi), %xmm2
2815; AVX1-NEXT:    vmovdqa 48(%rdi), %xmm3
2816; AVX1-NEXT:    vpmovsxbq {{.*#+}} xmm4 = [127,127]
2817; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm5
2818; AVX1-NEXT:    vblendvpd %xmm5, %xmm2, %xmm4, %xmm2
2819; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm5
2820; AVX1-NEXT:    vblendvpd %xmm5, %xmm3, %xmm4, %xmm3
2821; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm4, %xmm5
2822; AVX1-NEXT:    vblendvpd %xmm5, %xmm0, %xmm4, %xmm0
2823; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm4, %xmm5
2824; AVX1-NEXT:    vblendvpd %xmm5, %xmm1, %xmm4, %xmm1
2825; AVX1-NEXT:    vpmovsxbq {{.*#+}} xmm4 = [18446744073709551488,18446744073709551488]
2826; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm1, %xmm5
2827; AVX1-NEXT:    vblendvpd %xmm5, %xmm1, %xmm4, %xmm1
2828; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm0, %xmm5
2829; AVX1-NEXT:    vblendvpd %xmm5, %xmm0, %xmm4, %xmm0
2830; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
2831; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm1
2832; AVX1-NEXT:    vblendvpd %xmm1, %xmm3, %xmm4, %xmm1
2833; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm2, %xmm3
2834; AVX1-NEXT:    vblendvpd %xmm3, %xmm2, %xmm4, %xmm2
2835; AVX1-NEXT:    vpackssdw %xmm1, %xmm2, %xmm1
2836; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
2837; AVX1-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
2838; AVX1-NEXT:    retq
2839;
2840; AVX2-LABEL: trunc_ssat_v8i64_v8i8:
2841; AVX2:       # %bb.0:
2842; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
2843; AVX2-NEXT:    vmovdqa 32(%rdi), %ymm1
2844; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [127,127,127,127]
2845; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm2, %ymm3
2846; AVX2-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
2847; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm2, %ymm3
2848; AVX2-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
2849; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488]
2850; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm1, %ymm3
2851; AVX2-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
2852; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm3
2853; AVX2-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
2854; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
2855; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
2856; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
2857; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
2858; AVX2-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
2859; AVX2-NEXT:    vzeroupper
2860; AVX2-NEXT:    retq
2861;
2862; AVX512-LABEL: trunc_ssat_v8i64_v8i8:
2863; AVX512:       # %bb.0:
2864; AVX512-NEXT:    vmovdqa64 (%rdi), %zmm0
2865; AVX512-NEXT:    vpmovsqb %zmm0, %xmm0
2866; AVX512-NEXT:    vzeroupper
2867; AVX512-NEXT:    retq
2868;
2869; SKX-LABEL: trunc_ssat_v8i64_v8i8:
2870; SKX:       # %bb.0:
2871; SKX-NEXT:    vmovdqa (%rdi), %ymm0
2872; SKX-NEXT:    vmovdqa 32(%rdi), %ymm1
2873; SKX-NEXT:    vpmovsqb %ymm1, %xmm1
2874; SKX-NEXT:    vpmovsqb %ymm0, %xmm0
2875; SKX-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2876; SKX-NEXT:    vzeroupper
2877; SKX-NEXT:    retq
2878  %a0 = load <8 x i64>, ptr %p0
2879  %1 = icmp slt <8 x i64> %a0, <i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127>
2880  %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127>
2881  %3 = icmp sgt <8 x i64> %2, <i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128>
2882  %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> <i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128>
2883  %5 = trunc <8 x i64> %4 to <8 x i8>
2884  ret <8 x i8> %5
2885}
2886
2887; TODO: The AVX1 codegen shows a missed opportunity to narrow blendv+logic to 128-bit.
2888
2889define void @trunc_ssat_v8i64_v8i8_store(ptr %p0, ptr%p1) "min-legal-vector-width"="256" {
2890; SSE2-SSSE3-LABEL: trunc_ssat_v8i64_v8i8_store:
2891; SSE2-SSSE3:       # %bb.0:
2892; SSE2-SSSE3-NEXT:    movdqa (%rdi), %xmm6
2893; SSE2-SSSE3-NEXT:    movdqa 16(%rdi), %xmm3
2894; SSE2-SSSE3-NEXT:    movdqa 32(%rdi), %xmm2
2895; SSE2-SSSE3-NEXT:    movdqa 48(%rdi), %xmm5
2896; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [127,127]
2897; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [2147483648,2147483648]
2898; SSE2-SSSE3-NEXT:    movdqa %xmm2, %xmm1
2899; SSE2-SSSE3-NEXT:    pxor %xmm0, %xmm1
2900; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm9 = xmm1[1,1,3,3]
2901; SSE2-SSSE3-NEXT:    pxor %xmm8, %xmm8
2902; SSE2-SSSE3-NEXT:    pcmpeqd %xmm8, %xmm9
2903; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm7 = [2147483775,2147483775]
2904; SSE2-SSSE3-NEXT:    movdqa %xmm7, %xmm10
2905; SSE2-SSSE3-NEXT:    pcmpgtd %xmm1, %xmm10
2906; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2]
2907; SSE2-SSSE3-NEXT:    pand %xmm9, %xmm11
2908; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm10[1,1,3,3]
2909; SSE2-SSSE3-NEXT:    por %xmm11, %xmm1
2910; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm2
2911; SSE2-SSSE3-NEXT:    pandn %xmm4, %xmm1
2912; SSE2-SSSE3-NEXT:    por %xmm2, %xmm1
2913; SSE2-SSSE3-NEXT:    movdqa %xmm5, %xmm2
2914; SSE2-SSSE3-NEXT:    pxor %xmm0, %xmm2
2915; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm9 = xmm2[1,1,3,3]
2916; SSE2-SSSE3-NEXT:    pcmpeqd %xmm8, %xmm9
2917; SSE2-SSSE3-NEXT:    movdqa %xmm7, %xmm10
2918; SSE2-SSSE3-NEXT:    pcmpgtd %xmm2, %xmm10
2919; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2]
2920; SSE2-SSSE3-NEXT:    pand %xmm9, %xmm11
2921; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm10[1,1,3,3]
2922; SSE2-SSSE3-NEXT:    por %xmm11, %xmm2
2923; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm5
2924; SSE2-SSSE3-NEXT:    pandn %xmm4, %xmm2
2925; SSE2-SSSE3-NEXT:    por %xmm5, %xmm2
2926; SSE2-SSSE3-NEXT:    movdqa %xmm6, %xmm5
2927; SSE2-SSSE3-NEXT:    pxor %xmm0, %xmm5
2928; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm9 = xmm5[1,1,3,3]
2929; SSE2-SSSE3-NEXT:    pcmpeqd %xmm8, %xmm9
2930; SSE2-SSSE3-NEXT:    movdqa %xmm7, %xmm10
2931; SSE2-SSSE3-NEXT:    pcmpgtd %xmm5, %xmm10
2932; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2]
2933; SSE2-SSSE3-NEXT:    pand %xmm9, %xmm11
2934; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm10[1,1,3,3]
2935; SSE2-SSSE3-NEXT:    por %xmm11, %xmm5
2936; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm6
2937; SSE2-SSSE3-NEXT:    pandn %xmm4, %xmm5
2938; SSE2-SSSE3-NEXT:    por %xmm6, %xmm5
2939; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm6
2940; SSE2-SSSE3-NEXT:    pxor %xmm0, %xmm6
2941; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm9 = xmm6[1,1,3,3]
2942; SSE2-SSSE3-NEXT:    pcmpeqd %xmm8, %xmm9
2943; SSE2-SSSE3-NEXT:    pcmpgtd %xmm6, %xmm7
2944; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[0,0,2,2]
2945; SSE2-SSSE3-NEXT:    pand %xmm9, %xmm6
2946; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
2947; SSE2-SSSE3-NEXT:    por %xmm6, %xmm7
2948; SSE2-SSSE3-NEXT:    pand %xmm7, %xmm3
2949; SSE2-SSSE3-NEXT:    pandn %xmm4, %xmm7
2950; SSE2-SSSE3-NEXT:    por %xmm3, %xmm7
2951; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [18446744073709551488,18446744073709551488]
2952; SSE2-SSSE3-NEXT:    movdqa %xmm7, %xmm8
2953; SSE2-SSSE3-NEXT:    pxor %xmm0, %xmm8
2954; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm9 = xmm8[1,1,3,3]
2955; SSE2-SSSE3-NEXT:    pcmpeqd %xmm4, %xmm4
2956; SSE2-SSSE3-NEXT:    pcmpeqd %xmm4, %xmm9
2957; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm6 = [18446744071562067840,18446744071562067840]
2958; SSE2-SSSE3-NEXT:    pcmpgtd %xmm6, %xmm8
2959; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm8[0,0,2,2]
2960; SSE2-SSSE3-NEXT:    pand %xmm9, %xmm10
2961; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm8 = xmm8[1,1,3,3]
2962; SSE2-SSSE3-NEXT:    por %xmm10, %xmm8
2963; SSE2-SSSE3-NEXT:    pand %xmm8, %xmm7
2964; SSE2-SSSE3-NEXT:    pandn %xmm3, %xmm8
2965; SSE2-SSSE3-NEXT:    por %xmm7, %xmm8
2966; SSE2-SSSE3-NEXT:    movdqa %xmm5, %xmm7
2967; SSE2-SSSE3-NEXT:    pxor %xmm0, %xmm7
2968; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm9 = xmm7[1,1,3,3]
2969; SSE2-SSSE3-NEXT:    pcmpeqd %xmm4, %xmm9
2970; SSE2-SSSE3-NEXT:    pcmpgtd %xmm6, %xmm7
2971; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2]
2972; SSE2-SSSE3-NEXT:    pand %xmm9, %xmm10
2973; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
2974; SSE2-SSSE3-NEXT:    por %xmm10, %xmm7
2975; SSE2-SSSE3-NEXT:    pand %xmm7, %xmm5
2976; SSE2-SSSE3-NEXT:    pandn %xmm3, %xmm7
2977; SSE2-SSSE3-NEXT:    por %xmm5, %xmm7
2978; SSE2-SSSE3-NEXT:    packssdw %xmm8, %xmm7
2979; SSE2-SSSE3-NEXT:    movdqa %xmm2, %xmm5
2980; SSE2-SSSE3-NEXT:    pxor %xmm0, %xmm5
2981; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm8 = xmm5[1,1,3,3]
2982; SSE2-SSSE3-NEXT:    pcmpeqd %xmm4, %xmm8
2983; SSE2-SSSE3-NEXT:    pcmpgtd %xmm6, %xmm5
2984; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm9 = xmm5[0,0,2,2]
2985; SSE2-SSSE3-NEXT:    pand %xmm8, %xmm9
2986; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
2987; SSE2-SSSE3-NEXT:    por %xmm9, %xmm5
2988; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm2
2989; SSE2-SSSE3-NEXT:    pandn %xmm3, %xmm5
2990; SSE2-SSSE3-NEXT:    por %xmm2, %xmm5
2991; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm0
2992; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
2993; SSE2-SSSE3-NEXT:    pcmpeqd %xmm4, %xmm2
2994; SSE2-SSSE3-NEXT:    pcmpgtd %xmm6, %xmm0
2995; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2]
2996; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm4
2997; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2998; SSE2-SSSE3-NEXT:    por %xmm4, %xmm0
2999; SSE2-SSSE3-NEXT:    pand %xmm0, %xmm1
3000; SSE2-SSSE3-NEXT:    pandn %xmm3, %xmm0
3001; SSE2-SSSE3-NEXT:    por %xmm1, %xmm0
3002; SSE2-SSSE3-NEXT:    packssdw %xmm5, %xmm0
3003; SSE2-SSSE3-NEXT:    packssdw %xmm0, %xmm7
3004; SSE2-SSSE3-NEXT:    packsswb %xmm7, %xmm7
3005; SSE2-SSSE3-NEXT:    movq %xmm7, (%rsi)
3006; SSE2-SSSE3-NEXT:    retq
3007;
3008; SSE41-LABEL: trunc_ssat_v8i64_v8i8_store:
3009; SSE41:       # %bb.0:
3010; SSE41-NEXT:    movdqa (%rdi), %xmm7
3011; SSE41-NEXT:    movdqa 16(%rdi), %xmm5
3012; SSE41-NEXT:    movdqa 32(%rdi), %xmm3
3013; SSE41-NEXT:    movdqa 48(%rdi), %xmm8
3014; SSE41-NEXT:    movapd {{.*#+}} xmm4 = [127,127]
3015; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = [2147483648,2147483648]
3016; SSE41-NEXT:    movdqa %xmm3, %xmm2
3017; SSE41-NEXT:    pxor %xmm1, %xmm2
3018; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm6 = [2147483775,2147483775]
3019; SSE41-NEXT:    movdqa %xmm6, %xmm9
3020; SSE41-NEXT:    pcmpgtd %xmm2, %xmm9
3021; SSE41-NEXT:    pcmpeqd %xmm6, %xmm2
3022; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2]
3023; SSE41-NEXT:    pand %xmm2, %xmm0
3024; SSE41-NEXT:    por %xmm9, %xmm0
3025; SSE41-NEXT:    movapd %xmm4, %xmm2
3026; SSE41-NEXT:    blendvpd %xmm0, %xmm3, %xmm2
3027; SSE41-NEXT:    movdqa %xmm8, %xmm3
3028; SSE41-NEXT:    pxor %xmm1, %xmm3
3029; SSE41-NEXT:    movdqa %xmm6, %xmm9
3030; SSE41-NEXT:    pcmpgtd %xmm3, %xmm9
3031; SSE41-NEXT:    pcmpeqd %xmm6, %xmm3
3032; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2]
3033; SSE41-NEXT:    pand %xmm3, %xmm0
3034; SSE41-NEXT:    por %xmm9, %xmm0
3035; SSE41-NEXT:    movapd %xmm4, %xmm3
3036; SSE41-NEXT:    blendvpd %xmm0, %xmm8, %xmm3
3037; SSE41-NEXT:    movdqa %xmm7, %xmm8
3038; SSE41-NEXT:    pxor %xmm1, %xmm8
3039; SSE41-NEXT:    movdqa %xmm6, %xmm9
3040; SSE41-NEXT:    pcmpgtd %xmm8, %xmm9
3041; SSE41-NEXT:    pcmpeqd %xmm6, %xmm8
3042; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2]
3043; SSE41-NEXT:    pand %xmm8, %xmm0
3044; SSE41-NEXT:    por %xmm9, %xmm0
3045; SSE41-NEXT:    movapd %xmm4, %xmm8
3046; SSE41-NEXT:    blendvpd %xmm0, %xmm7, %xmm8
3047; SSE41-NEXT:    movdqa %xmm5, %xmm0
3048; SSE41-NEXT:    pxor %xmm1, %xmm0
3049; SSE41-NEXT:    movdqa %xmm0, %xmm7
3050; SSE41-NEXT:    pcmpeqd %xmm6, %xmm7
3051; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
3052; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
3053; SSE41-NEXT:    pand %xmm7, %xmm0
3054; SSE41-NEXT:    por %xmm6, %xmm0
3055; SSE41-NEXT:    blendvpd %xmm0, %xmm5, %xmm4
3056; SSE41-NEXT:    movapd {{.*#+}} xmm5 = [18446744073709551488,18446744073709551488]
3057; SSE41-NEXT:    movapd %xmm4, %xmm7
3058; SSE41-NEXT:    xorpd %xmm1, %xmm7
3059; SSE41-NEXT:    movdqa {{.*#+}} xmm6 = [18446744071562067840,18446744071562067840]
3060; SSE41-NEXT:    movapd %xmm7, %xmm9
3061; SSE41-NEXT:    pcmpeqd %xmm6, %xmm9
3062; SSE41-NEXT:    pcmpgtd %xmm6, %xmm7
3063; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2]
3064; SSE41-NEXT:    pand %xmm9, %xmm0
3065; SSE41-NEXT:    por %xmm7, %xmm0
3066; SSE41-NEXT:    movapd %xmm5, %xmm7
3067; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm7
3068; SSE41-NEXT:    movapd %xmm8, %xmm4
3069; SSE41-NEXT:    xorpd %xmm1, %xmm4
3070; SSE41-NEXT:    movapd %xmm4, %xmm9
3071; SSE41-NEXT:    pcmpeqd %xmm6, %xmm9
3072; SSE41-NEXT:    pcmpgtd %xmm6, %xmm4
3073; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2]
3074; SSE41-NEXT:    pand %xmm9, %xmm0
3075; SSE41-NEXT:    por %xmm4, %xmm0
3076; SSE41-NEXT:    movapd %xmm5, %xmm4
3077; SSE41-NEXT:    blendvpd %xmm0, %xmm8, %xmm4
3078; SSE41-NEXT:    packssdw %xmm7, %xmm4
3079; SSE41-NEXT:    movapd %xmm3, %xmm7
3080; SSE41-NEXT:    xorpd %xmm1, %xmm7
3081; SSE41-NEXT:    movapd %xmm7, %xmm8
3082; SSE41-NEXT:    pcmpeqd %xmm6, %xmm8
3083; SSE41-NEXT:    pcmpgtd %xmm6, %xmm7
3084; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2]
3085; SSE41-NEXT:    pand %xmm8, %xmm0
3086; SSE41-NEXT:    por %xmm7, %xmm0
3087; SSE41-NEXT:    movapd %xmm5, %xmm7
3088; SSE41-NEXT:    blendvpd %xmm0, %xmm3, %xmm7
3089; SSE41-NEXT:    xorpd %xmm2, %xmm1
3090; SSE41-NEXT:    movapd %xmm1, %xmm3
3091; SSE41-NEXT:    pcmpeqd %xmm6, %xmm3
3092; SSE41-NEXT:    pcmpgtd %xmm6, %xmm1
3093; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2]
3094; SSE41-NEXT:    pand %xmm3, %xmm0
3095; SSE41-NEXT:    por %xmm1, %xmm0
3096; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm5
3097; SSE41-NEXT:    packssdw %xmm7, %xmm5
3098; SSE41-NEXT:    packssdw %xmm5, %xmm4
3099; SSE41-NEXT:    packsswb %xmm4, %xmm4
3100; SSE41-NEXT:    movq %xmm4, (%rsi)
3101; SSE41-NEXT:    retq
3102;
3103; AVX1-LABEL: trunc_ssat_v8i64_v8i8_store:
3104; AVX1:       # %bb.0:
3105; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
3106; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm1
3107; AVX1-NEXT:    vmovdqa 32(%rdi), %xmm2
3108; AVX1-NEXT:    vmovdqa 48(%rdi), %xmm3
3109; AVX1-NEXT:    vpmovsxbq {{.*#+}} xmm4 = [127,127]
3110; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm5
3111; AVX1-NEXT:    vblendvpd %xmm5, %xmm2, %xmm4, %xmm2
3112; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm5
3113; AVX1-NEXT:    vblendvpd %xmm5, %xmm3, %xmm4, %xmm3
3114; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm4, %xmm5
3115; AVX1-NEXT:    vblendvpd %xmm5, %xmm0, %xmm4, %xmm0
3116; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm4, %xmm5
3117; AVX1-NEXT:    vblendvpd %xmm5, %xmm1, %xmm4, %xmm1
3118; AVX1-NEXT:    vpmovsxbq {{.*#+}} xmm4 = [18446744073709551488,18446744073709551488]
3119; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm1, %xmm5
3120; AVX1-NEXT:    vblendvpd %xmm5, %xmm1, %xmm4, %xmm1
3121; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm0, %xmm5
3122; AVX1-NEXT:    vblendvpd %xmm5, %xmm0, %xmm4, %xmm0
3123; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
3124; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm1
3125; AVX1-NEXT:    vblendvpd %xmm1, %xmm3, %xmm4, %xmm1
3126; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm2, %xmm3
3127; AVX1-NEXT:    vblendvpd %xmm3, %xmm2, %xmm4, %xmm2
3128; AVX1-NEXT:    vpackssdw %xmm1, %xmm2, %xmm1
3129; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
3130; AVX1-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
3131; AVX1-NEXT:    vmovq %xmm0, (%rsi)
3132; AVX1-NEXT:    retq
3133;
3134; AVX2-LABEL: trunc_ssat_v8i64_v8i8_store:
3135; AVX2:       # %bb.0:
3136; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
3137; AVX2-NEXT:    vmovdqa 32(%rdi), %ymm1
3138; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [127,127,127,127]
3139; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm2, %ymm3
3140; AVX2-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
3141; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm2, %ymm3
3142; AVX2-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
3143; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488]
3144; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm1, %ymm3
3145; AVX2-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
3146; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm3
3147; AVX2-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
3148; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
3149; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
3150; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
3151; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
3152; AVX2-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
3153; AVX2-NEXT:    vmovq %xmm0, (%rsi)
3154; AVX2-NEXT:    vzeroupper
3155; AVX2-NEXT:    retq
3156;
3157; AVX512-LABEL: trunc_ssat_v8i64_v8i8_store:
3158; AVX512:       # %bb.0:
3159; AVX512-NEXT:    vmovdqa64 (%rdi), %zmm0
3160; AVX512-NEXT:    vpmovsqb %zmm0, (%rsi)
3161; AVX512-NEXT:    vzeroupper
3162; AVX512-NEXT:    retq
3163;
3164; SKX-LABEL: trunc_ssat_v8i64_v8i8_store:
3165; SKX:       # %bb.0:
3166; SKX-NEXT:    vmovdqa (%rdi), %ymm0
3167; SKX-NEXT:    vmovdqa 32(%rdi), %ymm1
3168; SKX-NEXT:    vpmovsqb %ymm1, %xmm1
3169; SKX-NEXT:    vpmovsqb %ymm0, %xmm0
3170; SKX-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3171; SKX-NEXT:    vmovq %xmm0, (%rsi)
3172; SKX-NEXT:    vzeroupper
3173; SKX-NEXT:    retq
3174  %a0 = load <8 x i64>, ptr %p0
3175  %1 = icmp slt <8 x i64> %a0, <i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127>
3176  %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127>
3177  %3 = icmp sgt <8 x i64> %2, <i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128>
3178  %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> <i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128>
3179  %5 = trunc <8 x i64> %4 to <8 x i8>
3180  store <8 x i8> %5, ptr%p1
3181  ret void
3182}
3183
3184define <16 x i8> @trunc_ssat_v16i64_v16i8(ptr %p0) "min-legal-vector-width"="256" {
3185; SSE2-SSSE3-LABEL: trunc_ssat_v16i64_v16i8:
3186; SSE2-SSSE3:       # %bb.0:
3187; SSE2-SSSE3-NEXT:    movdqa (%rdi), %xmm8
3188; SSE2-SSSE3-NEXT:    movdqa 16(%rdi), %xmm0
3189; SSE2-SSSE3-NEXT:    movdqa 32(%rdi), %xmm12
3190; SSE2-SSSE3-NEXT:    movdqa 48(%rdi), %xmm11
3191; SSE2-SSSE3-NEXT:    movdqa 80(%rdi), %xmm7
3192; SSE2-SSSE3-NEXT:    movdqa 64(%rdi), %xmm5
3193; SSE2-SSSE3-NEXT:    movdqa 112(%rdi), %xmm4
3194; SSE2-SSSE3-NEXT:    movdqa 96(%rdi), %xmm3
3195; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm6 = [127,127]
3196; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
3197; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm2
3198; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm2
3199; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm13 = xmm2[1,1,3,3]
3200; SSE2-SSSE3-NEXT:    pxor %xmm10, %xmm10
3201; SSE2-SSSE3-NEXT:    pcmpeqd %xmm10, %xmm13
3202; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm9 = [2147483775,2147483775]
3203; SSE2-SSSE3-NEXT:    movdqa %xmm9, %xmm14
3204; SSE2-SSSE3-NEXT:    pcmpgtd %xmm2, %xmm14
3205; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm15 = xmm14[0,0,2,2]
3206; SSE2-SSSE3-NEXT:    pand %xmm13, %xmm15
3207; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm14[1,1,3,3]
3208; SSE2-SSSE3-NEXT:    por %xmm15, %xmm2
3209; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm3
3210; SSE2-SSSE3-NEXT:    pandn %xmm6, %xmm2
3211; SSE2-SSSE3-NEXT:    por %xmm3, %xmm2
3212; SSE2-SSSE3-NEXT:    movdqa %xmm4, %xmm3
3213; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm3
3214; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm13 = xmm3[1,1,3,3]
3215; SSE2-SSSE3-NEXT:    pcmpeqd %xmm10, %xmm13
3216; SSE2-SSSE3-NEXT:    movdqa %xmm9, %xmm14
3217; SSE2-SSSE3-NEXT:    pcmpgtd %xmm3, %xmm14
3218; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm15 = xmm14[0,0,2,2]
3219; SSE2-SSSE3-NEXT:    pand %xmm13, %xmm15
3220; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm14[1,1,3,3]
3221; SSE2-SSSE3-NEXT:    por %xmm15, %xmm3
3222; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm4
3223; SSE2-SSSE3-NEXT:    pandn %xmm6, %xmm3
3224; SSE2-SSSE3-NEXT:    por %xmm4, %xmm3
3225; SSE2-SSSE3-NEXT:    movdqa %xmm5, %xmm4
3226; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm4
3227; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm13 = xmm4[1,1,3,3]
3228; SSE2-SSSE3-NEXT:    pcmpeqd %xmm10, %xmm13
3229; SSE2-SSSE3-NEXT:    movdqa %xmm9, %xmm14
3230; SSE2-SSSE3-NEXT:    pcmpgtd %xmm4, %xmm14
3231; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm15 = xmm14[0,0,2,2]
3232; SSE2-SSSE3-NEXT:    pand %xmm13, %xmm15
3233; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm14[1,1,3,3]
3234; SSE2-SSSE3-NEXT:    por %xmm15, %xmm4
3235; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm5
3236; SSE2-SSSE3-NEXT:    pandn %xmm6, %xmm4
3237; SSE2-SSSE3-NEXT:    por %xmm5, %xmm4
3238; SSE2-SSSE3-NEXT:    movdqa %xmm7, %xmm5
3239; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm5
3240; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm13 = xmm5[1,1,3,3]
3241; SSE2-SSSE3-NEXT:    pcmpeqd %xmm10, %xmm13
3242; SSE2-SSSE3-NEXT:    movdqa %xmm9, %xmm14
3243; SSE2-SSSE3-NEXT:    pcmpgtd %xmm5, %xmm14
3244; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm15 = xmm14[0,0,2,2]
3245; SSE2-SSSE3-NEXT:    pand %xmm13, %xmm15
3246; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm14[1,1,3,3]
3247; SSE2-SSSE3-NEXT:    por %xmm15, %xmm5
3248; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm7
3249; SSE2-SSSE3-NEXT:    pandn %xmm6, %xmm5
3250; SSE2-SSSE3-NEXT:    por %xmm7, %xmm5
3251; SSE2-SSSE3-NEXT:    movdqa %xmm12, %xmm7
3252; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm7
3253; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm13 = xmm7[1,1,3,3]
3254; SSE2-SSSE3-NEXT:    pcmpeqd %xmm10, %xmm13
3255; SSE2-SSSE3-NEXT:    movdqa %xmm9, %xmm14
3256; SSE2-SSSE3-NEXT:    pcmpgtd %xmm7, %xmm14
3257; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm15 = xmm14[0,0,2,2]
3258; SSE2-SSSE3-NEXT:    pand %xmm13, %xmm15
3259; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm14[1,1,3,3]
3260; SSE2-SSSE3-NEXT:    por %xmm15, %xmm7
3261; SSE2-SSSE3-NEXT:    pand %xmm7, %xmm12
3262; SSE2-SSSE3-NEXT:    pandn %xmm6, %xmm7
3263; SSE2-SSSE3-NEXT:    por %xmm12, %xmm7
3264; SSE2-SSSE3-NEXT:    movdqa %xmm11, %xmm12
3265; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm12
3266; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm13 = xmm12[1,1,3,3]
3267; SSE2-SSSE3-NEXT:    pcmpeqd %xmm10, %xmm13
3268; SSE2-SSSE3-NEXT:    movdqa %xmm9, %xmm14
3269; SSE2-SSSE3-NEXT:    pcmpgtd %xmm12, %xmm14
3270; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm15 = xmm14[0,0,2,2]
3271; SSE2-SSSE3-NEXT:    pand %xmm13, %xmm15
3272; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm12 = xmm14[1,1,3,3]
3273; SSE2-SSSE3-NEXT:    por %xmm15, %xmm12
3274; SSE2-SSSE3-NEXT:    pand %xmm12, %xmm11
3275; SSE2-SSSE3-NEXT:    pandn %xmm6, %xmm12
3276; SSE2-SSSE3-NEXT:    por %xmm11, %xmm12
3277; SSE2-SSSE3-NEXT:    movdqa %xmm8, %xmm11
3278; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm11
3279; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm13 = xmm11[1,1,3,3]
3280; SSE2-SSSE3-NEXT:    pcmpeqd %xmm10, %xmm13
3281; SSE2-SSSE3-NEXT:    movdqa %xmm9, %xmm14
3282; SSE2-SSSE3-NEXT:    pcmpgtd %xmm11, %xmm14
3283; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm15 = xmm14[0,0,2,2]
3284; SSE2-SSSE3-NEXT:    pand %xmm13, %xmm15
3285; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm11 = xmm14[1,1,3,3]
3286; SSE2-SSSE3-NEXT:    por %xmm15, %xmm11
3287; SSE2-SSSE3-NEXT:    pand %xmm11, %xmm8
3288; SSE2-SSSE3-NEXT:    pandn %xmm6, %xmm11
3289; SSE2-SSSE3-NEXT:    por %xmm8, %xmm11
3290; SSE2-SSSE3-NEXT:    movdqa %xmm0, %xmm8
3291; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm8
3292; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm13 = xmm8[1,1,3,3]
3293; SSE2-SSSE3-NEXT:    pcmpeqd %xmm10, %xmm13
3294; SSE2-SSSE3-NEXT:    pcmpgtd %xmm8, %xmm9
3295; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm8 = xmm9[0,0,2,2]
3296; SSE2-SSSE3-NEXT:    pand %xmm13, %xmm8
3297; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm9[1,1,3,3]
3298; SSE2-SSSE3-NEXT:    por %xmm8, %xmm10
3299; SSE2-SSSE3-NEXT:    pand %xmm10, %xmm0
3300; SSE2-SSSE3-NEXT:    pandn %xmm6, %xmm10
3301; SSE2-SSSE3-NEXT:    por %xmm0, %xmm10
3302; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm6 = [18446744073709551488,18446744073709551488]
3303; SSE2-SSSE3-NEXT:    movdqa %xmm10, %xmm0
3304; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm0
3305; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm13 = xmm0[1,1,3,3]
3306; SSE2-SSSE3-NEXT:    pcmpeqd %xmm8, %xmm8
3307; SSE2-SSSE3-NEXT:    pcmpeqd %xmm8, %xmm13
3308; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm9 = [18446744071562067840,18446744071562067840]
3309; SSE2-SSSE3-NEXT:    pcmpgtd %xmm9, %xmm0
3310; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm14 = xmm0[0,0,2,2]
3311; SSE2-SSSE3-NEXT:    pand %xmm13, %xmm14
3312; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm13 = xmm0[1,1,3,3]
3313; SSE2-SSSE3-NEXT:    por %xmm14, %xmm13
3314; SSE2-SSSE3-NEXT:    pand %xmm13, %xmm10
3315; SSE2-SSSE3-NEXT:    pandn %xmm6, %xmm13
3316; SSE2-SSSE3-NEXT:    por %xmm10, %xmm13
3317; SSE2-SSSE3-NEXT:    movdqa %xmm11, %xmm0
3318; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm0
3319; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm0[1,1,3,3]
3320; SSE2-SSSE3-NEXT:    pcmpeqd %xmm8, %xmm10
3321; SSE2-SSSE3-NEXT:    pcmpgtd %xmm9, %xmm0
3322; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm14 = xmm0[0,0,2,2]
3323; SSE2-SSSE3-NEXT:    pand %xmm10, %xmm14
3324; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
3325; SSE2-SSSE3-NEXT:    por %xmm14, %xmm0
3326; SSE2-SSSE3-NEXT:    pand %xmm0, %xmm11
3327; SSE2-SSSE3-NEXT:    pandn %xmm6, %xmm0
3328; SSE2-SSSE3-NEXT:    por %xmm11, %xmm0
3329; SSE2-SSSE3-NEXT:    packssdw %xmm13, %xmm0
3330; SSE2-SSSE3-NEXT:    movdqa %xmm12, %xmm10
3331; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm10
3332; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm11 = xmm10[1,1,3,3]
3333; SSE2-SSSE3-NEXT:    pcmpeqd %xmm8, %xmm11
3334; SSE2-SSSE3-NEXT:    pcmpgtd %xmm9, %xmm10
3335; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm13 = xmm10[0,0,2,2]
3336; SSE2-SSSE3-NEXT:    pand %xmm11, %xmm13
3337; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm10[1,1,3,3]
3338; SSE2-SSSE3-NEXT:    por %xmm13, %xmm10
3339; SSE2-SSSE3-NEXT:    pand %xmm10, %xmm12
3340; SSE2-SSSE3-NEXT:    pandn %xmm6, %xmm10
3341; SSE2-SSSE3-NEXT:    por %xmm12, %xmm10
3342; SSE2-SSSE3-NEXT:    movdqa %xmm7, %xmm11
3343; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm11
3344; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm12 = xmm11[1,1,3,3]
3345; SSE2-SSSE3-NEXT:    pcmpeqd %xmm8, %xmm12
3346; SSE2-SSSE3-NEXT:    pcmpgtd %xmm9, %xmm11
3347; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm13 = xmm11[0,0,2,2]
3348; SSE2-SSSE3-NEXT:    pand %xmm12, %xmm13
3349; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm11 = xmm11[1,1,3,3]
3350; SSE2-SSSE3-NEXT:    por %xmm13, %xmm11
3351; SSE2-SSSE3-NEXT:    pand %xmm11, %xmm7
3352; SSE2-SSSE3-NEXT:    pandn %xmm6, %xmm11
3353; SSE2-SSSE3-NEXT:    por %xmm7, %xmm11
3354; SSE2-SSSE3-NEXT:    packssdw %xmm10, %xmm11
3355; SSE2-SSSE3-NEXT:    packssdw %xmm11, %xmm0
3356; SSE2-SSSE3-NEXT:    movdqa %xmm5, %xmm7
3357; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm7
3358; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm7[1,1,3,3]
3359; SSE2-SSSE3-NEXT:    pcmpeqd %xmm8, %xmm10
3360; SSE2-SSSE3-NEXT:    pcmpgtd %xmm9, %xmm7
3361; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm11 = xmm7[0,0,2,2]
3362; SSE2-SSSE3-NEXT:    pand %xmm10, %xmm11
3363; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
3364; SSE2-SSSE3-NEXT:    por %xmm11, %xmm7
3365; SSE2-SSSE3-NEXT:    pand %xmm7, %xmm5
3366; SSE2-SSSE3-NEXT:    pandn %xmm6, %xmm7
3367; SSE2-SSSE3-NEXT:    por %xmm5, %xmm7
3368; SSE2-SSSE3-NEXT:    movdqa %xmm4, %xmm5
3369; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm5
3370; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm5[1,1,3,3]
3371; SSE2-SSSE3-NEXT:    pcmpeqd %xmm8, %xmm10
3372; SSE2-SSSE3-NEXT:    pcmpgtd %xmm9, %xmm5
3373; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm11 = xmm5[0,0,2,2]
3374; SSE2-SSSE3-NEXT:    pand %xmm10, %xmm11
3375; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
3376; SSE2-SSSE3-NEXT:    por %xmm11, %xmm5
3377; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm4
3378; SSE2-SSSE3-NEXT:    pandn %xmm6, %xmm5
3379; SSE2-SSSE3-NEXT:    por %xmm4, %xmm5
3380; SSE2-SSSE3-NEXT:    packssdw %xmm7, %xmm5
3381; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm4
3382; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm4
3383; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm4[1,1,3,3]
3384; SSE2-SSSE3-NEXT:    pcmpeqd %xmm8, %xmm7
3385; SSE2-SSSE3-NEXT:    pcmpgtd %xmm9, %xmm4
3386; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm4[0,0,2,2]
3387; SSE2-SSSE3-NEXT:    pand %xmm7, %xmm10
3388; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
3389; SSE2-SSSE3-NEXT:    por %xmm10, %xmm4
3390; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm3
3391; SSE2-SSSE3-NEXT:    pandn %xmm6, %xmm4
3392; SSE2-SSSE3-NEXT:    por %xmm3, %xmm4
3393; SSE2-SSSE3-NEXT:    pxor %xmm2, %xmm1
3394; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
3395; SSE2-SSSE3-NEXT:    pcmpeqd %xmm8, %xmm3
3396; SSE2-SSSE3-NEXT:    pcmpgtd %xmm9, %xmm1
3397; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm1[0,0,2,2]
3398; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm7
3399; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
3400; SSE2-SSSE3-NEXT:    por %xmm7, %xmm1
3401; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm2
3402; SSE2-SSSE3-NEXT:    pandn %xmm6, %xmm1
3403; SSE2-SSSE3-NEXT:    por %xmm2, %xmm1
3404; SSE2-SSSE3-NEXT:    packssdw %xmm4, %xmm1
3405; SSE2-SSSE3-NEXT:    packssdw %xmm1, %xmm5
3406; SSE2-SSSE3-NEXT:    packsswb %xmm5, %xmm0
3407; SSE2-SSSE3-NEXT:    retq
3408;
3409; SSE41-LABEL: trunc_ssat_v16i64_v16i8:
3410; SSE41:       # %bb.0:
3411; SSE41-NEXT:    movdqa (%rdi), %xmm8
3412; SSE41-NEXT:    movdqa 16(%rdi), %xmm7
3413; SSE41-NEXT:    movdqa 32(%rdi), %xmm12
3414; SSE41-NEXT:    movdqa 48(%rdi), %xmm11
3415; SSE41-NEXT:    movdqa 80(%rdi), %xmm10
3416; SSE41-NEXT:    movdqa 64(%rdi), %xmm6
3417; SSE41-NEXT:    movdqa 112(%rdi), %xmm5
3418; SSE41-NEXT:    movdqa 96(%rdi), %xmm4
3419; SSE41-NEXT:    movapd {{.*#+}} xmm1 = [127,127]
3420; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm2 = [2147483648,2147483648]
3421; SSE41-NEXT:    movdqa %xmm4, %xmm3
3422; SSE41-NEXT:    pxor %xmm2, %xmm3
3423; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm9 = [2147483775,2147483775]
3424; SSE41-NEXT:    movdqa %xmm9, %xmm13
3425; SSE41-NEXT:    pcmpgtd %xmm3, %xmm13
3426; SSE41-NEXT:    pcmpeqd %xmm9, %xmm3
3427; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm13[0,0,2,2]
3428; SSE41-NEXT:    pand %xmm3, %xmm0
3429; SSE41-NEXT:    por %xmm13, %xmm0
3430; SSE41-NEXT:    movapd %xmm1, %xmm3
3431; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm3
3432; SSE41-NEXT:    movdqa %xmm5, %xmm4
3433; SSE41-NEXT:    pxor %xmm2, %xmm4
3434; SSE41-NEXT:    movdqa %xmm9, %xmm13
3435; SSE41-NEXT:    pcmpgtd %xmm4, %xmm13
3436; SSE41-NEXT:    pcmpeqd %xmm9, %xmm4
3437; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm13[0,0,2,2]
3438; SSE41-NEXT:    pand %xmm4, %xmm0
3439; SSE41-NEXT:    por %xmm13, %xmm0
3440; SSE41-NEXT:    movapd %xmm1, %xmm4
3441; SSE41-NEXT:    blendvpd %xmm0, %xmm5, %xmm4
3442; SSE41-NEXT:    movdqa %xmm6, %xmm5
3443; SSE41-NEXT:    pxor %xmm2, %xmm5
3444; SSE41-NEXT:    movdqa %xmm9, %xmm13
3445; SSE41-NEXT:    pcmpgtd %xmm5, %xmm13
3446; SSE41-NEXT:    pcmpeqd %xmm9, %xmm5
3447; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm13[0,0,2,2]
3448; SSE41-NEXT:    pand %xmm5, %xmm0
3449; SSE41-NEXT:    por %xmm13, %xmm0
3450; SSE41-NEXT:    movapd %xmm1, %xmm5
3451; SSE41-NEXT:    blendvpd %xmm0, %xmm6, %xmm5
3452; SSE41-NEXT:    movdqa %xmm10, %xmm6
3453; SSE41-NEXT:    pxor %xmm2, %xmm6
3454; SSE41-NEXT:    movdqa %xmm9, %xmm13
3455; SSE41-NEXT:    pcmpgtd %xmm6, %xmm13
3456; SSE41-NEXT:    pcmpeqd %xmm9, %xmm6
3457; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm13[0,0,2,2]
3458; SSE41-NEXT:    pand %xmm6, %xmm0
3459; SSE41-NEXT:    por %xmm13, %xmm0
3460; SSE41-NEXT:    movapd %xmm1, %xmm6
3461; SSE41-NEXT:    blendvpd %xmm0, %xmm10, %xmm6
3462; SSE41-NEXT:    movdqa %xmm12, %xmm10
3463; SSE41-NEXT:    pxor %xmm2, %xmm10
3464; SSE41-NEXT:    movdqa %xmm9, %xmm13
3465; SSE41-NEXT:    pcmpgtd %xmm10, %xmm13
3466; SSE41-NEXT:    pcmpeqd %xmm9, %xmm10
3467; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm13[0,0,2,2]
3468; SSE41-NEXT:    pand %xmm10, %xmm0
3469; SSE41-NEXT:    por %xmm13, %xmm0
3470; SSE41-NEXT:    movapd %xmm1, %xmm10
3471; SSE41-NEXT:    blendvpd %xmm0, %xmm12, %xmm10
3472; SSE41-NEXT:    movdqa %xmm11, %xmm12
3473; SSE41-NEXT:    pxor %xmm2, %xmm12
3474; SSE41-NEXT:    movdqa %xmm9, %xmm13
3475; SSE41-NEXT:    pcmpgtd %xmm12, %xmm13
3476; SSE41-NEXT:    pcmpeqd %xmm9, %xmm12
3477; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm13[0,0,2,2]
3478; SSE41-NEXT:    pand %xmm12, %xmm0
3479; SSE41-NEXT:    por %xmm13, %xmm0
3480; SSE41-NEXT:    movapd %xmm1, %xmm12
3481; SSE41-NEXT:    blendvpd %xmm0, %xmm11, %xmm12
3482; SSE41-NEXT:    movdqa %xmm8, %xmm11
3483; SSE41-NEXT:    pxor %xmm2, %xmm11
3484; SSE41-NEXT:    movdqa %xmm9, %xmm13
3485; SSE41-NEXT:    pcmpgtd %xmm11, %xmm13
3486; SSE41-NEXT:    pcmpeqd %xmm9, %xmm11
3487; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm13[0,0,2,2]
3488; SSE41-NEXT:    pand %xmm11, %xmm0
3489; SSE41-NEXT:    por %xmm13, %xmm0
3490; SSE41-NEXT:    movapd %xmm1, %xmm11
3491; SSE41-NEXT:    blendvpd %xmm0, %xmm8, %xmm11
3492; SSE41-NEXT:    movdqa %xmm7, %xmm0
3493; SSE41-NEXT:    pxor %xmm2, %xmm0
3494; SSE41-NEXT:    movdqa %xmm0, %xmm8
3495; SSE41-NEXT:    pcmpeqd %xmm9, %xmm8
3496; SSE41-NEXT:    pcmpgtd %xmm0, %xmm9
3497; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2]
3498; SSE41-NEXT:    pand %xmm8, %xmm0
3499; SSE41-NEXT:    por %xmm9, %xmm0
3500; SSE41-NEXT:    blendvpd %xmm0, %xmm7, %xmm1
3501; SSE41-NEXT:    movapd {{.*#+}} xmm7 = [18446744073709551488,18446744073709551488]
3502; SSE41-NEXT:    movapd %xmm1, %xmm9
3503; SSE41-NEXT:    xorpd %xmm2, %xmm9
3504; SSE41-NEXT:    movdqa {{.*#+}} xmm8 = [18446744071562067840,18446744071562067840]
3505; SSE41-NEXT:    movapd %xmm9, %xmm13
3506; SSE41-NEXT:    pcmpeqd %xmm8, %xmm13
3507; SSE41-NEXT:    pcmpgtd %xmm8, %xmm9
3508; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2]
3509; SSE41-NEXT:    pand %xmm13, %xmm0
3510; SSE41-NEXT:    por %xmm9, %xmm0
3511; SSE41-NEXT:    movapd %xmm7, %xmm9
3512; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm9
3513; SSE41-NEXT:    movapd %xmm11, %xmm1
3514; SSE41-NEXT:    xorpd %xmm2, %xmm1
3515; SSE41-NEXT:    movapd %xmm1, %xmm13
3516; SSE41-NEXT:    pcmpeqd %xmm8, %xmm13
3517; SSE41-NEXT:    pcmpgtd %xmm8, %xmm1
3518; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2]
3519; SSE41-NEXT:    pand %xmm13, %xmm0
3520; SSE41-NEXT:    por %xmm1, %xmm0
3521; SSE41-NEXT:    movapd %xmm7, %xmm1
3522; SSE41-NEXT:    blendvpd %xmm0, %xmm11, %xmm1
3523; SSE41-NEXT:    packssdw %xmm9, %xmm1
3524; SSE41-NEXT:    movapd %xmm12, %xmm9
3525; SSE41-NEXT:    xorpd %xmm2, %xmm9
3526; SSE41-NEXT:    movapd %xmm9, %xmm11
3527; SSE41-NEXT:    pcmpeqd %xmm8, %xmm11
3528; SSE41-NEXT:    pcmpgtd %xmm8, %xmm9
3529; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2]
3530; SSE41-NEXT:    pand %xmm11, %xmm0
3531; SSE41-NEXT:    por %xmm9, %xmm0
3532; SSE41-NEXT:    movapd %xmm7, %xmm9
3533; SSE41-NEXT:    blendvpd %xmm0, %xmm12, %xmm9
3534; SSE41-NEXT:    movapd %xmm10, %xmm11
3535; SSE41-NEXT:    xorpd %xmm2, %xmm11
3536; SSE41-NEXT:    movapd %xmm11, %xmm12
3537; SSE41-NEXT:    pcmpeqd %xmm8, %xmm12
3538; SSE41-NEXT:    pcmpgtd %xmm8, %xmm11
3539; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm11[0,0,2,2]
3540; SSE41-NEXT:    pand %xmm12, %xmm0
3541; SSE41-NEXT:    por %xmm11, %xmm0
3542; SSE41-NEXT:    movapd %xmm7, %xmm11
3543; SSE41-NEXT:    blendvpd %xmm0, %xmm10, %xmm11
3544; SSE41-NEXT:    packssdw %xmm9, %xmm11
3545; SSE41-NEXT:    packssdw %xmm11, %xmm1
3546; SSE41-NEXT:    movapd %xmm6, %xmm9
3547; SSE41-NEXT:    xorpd %xmm2, %xmm9
3548; SSE41-NEXT:    movapd %xmm9, %xmm10
3549; SSE41-NEXT:    pcmpeqd %xmm8, %xmm10
3550; SSE41-NEXT:    pcmpgtd %xmm8, %xmm9
3551; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2]
3552; SSE41-NEXT:    pand %xmm10, %xmm0
3553; SSE41-NEXT:    por %xmm9, %xmm0
3554; SSE41-NEXT:    movapd %xmm7, %xmm9
3555; SSE41-NEXT:    blendvpd %xmm0, %xmm6, %xmm9
3556; SSE41-NEXT:    movapd %xmm5, %xmm6
3557; SSE41-NEXT:    xorpd %xmm2, %xmm6
3558; SSE41-NEXT:    movapd %xmm6, %xmm10
3559; SSE41-NEXT:    pcmpeqd %xmm8, %xmm10
3560; SSE41-NEXT:    pcmpgtd %xmm8, %xmm6
3561; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
3562; SSE41-NEXT:    pand %xmm10, %xmm0
3563; SSE41-NEXT:    por %xmm6, %xmm0
3564; SSE41-NEXT:    movapd %xmm7, %xmm6
3565; SSE41-NEXT:    blendvpd %xmm0, %xmm5, %xmm6
3566; SSE41-NEXT:    packssdw %xmm9, %xmm6
3567; SSE41-NEXT:    movapd %xmm4, %xmm5
3568; SSE41-NEXT:    xorpd %xmm2, %xmm5
3569; SSE41-NEXT:    movapd %xmm5, %xmm9
3570; SSE41-NEXT:    pcmpeqd %xmm8, %xmm9
3571; SSE41-NEXT:    pcmpgtd %xmm8, %xmm5
3572; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2]
3573; SSE41-NEXT:    pand %xmm9, %xmm0
3574; SSE41-NEXT:    por %xmm5, %xmm0
3575; SSE41-NEXT:    movapd %xmm7, %xmm5
3576; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm5
3577; SSE41-NEXT:    xorpd %xmm3, %xmm2
3578; SSE41-NEXT:    movapd %xmm2, %xmm4
3579; SSE41-NEXT:    pcmpeqd %xmm8, %xmm4
3580; SSE41-NEXT:    pcmpgtd %xmm8, %xmm2
3581; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
3582; SSE41-NEXT:    pand %xmm4, %xmm0
3583; SSE41-NEXT:    por %xmm2, %xmm0
3584; SSE41-NEXT:    blendvpd %xmm0, %xmm3, %xmm7
3585; SSE41-NEXT:    packssdw %xmm5, %xmm7
3586; SSE41-NEXT:    packssdw %xmm7, %xmm6
3587; SSE41-NEXT:    packsswb %xmm6, %xmm1
3588; SSE41-NEXT:    movdqa %xmm1, %xmm0
3589; SSE41-NEXT:    retq
3590;
3591; AVX1-LABEL: trunc_ssat_v16i64_v16i8:
3592; AVX1:       # %bb.0:
3593; AVX1-NEXT:    vmovdqa 96(%rdi), %xmm0
3594; AVX1-NEXT:    vpmovsxbq {{.*#+}} xmm2 = [127,127]
3595; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm2, %xmm1
3596; AVX1-NEXT:    vblendvpd %xmm1, %xmm0, %xmm2, %xmm0
3597; AVX1-NEXT:    vmovdqa 112(%rdi), %xmm1
3598; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm2, %xmm3
3599; AVX1-NEXT:    vblendvpd %xmm3, %xmm1, %xmm2, %xmm1
3600; AVX1-NEXT:    vmovdqa 64(%rdi), %xmm3
3601; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm4
3602; AVX1-NEXT:    vblendvpd %xmm4, %xmm3, %xmm2, %xmm3
3603; AVX1-NEXT:    vmovdqa 80(%rdi), %xmm4
3604; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm2, %xmm5
3605; AVX1-NEXT:    vblendvpd %xmm5, %xmm4, %xmm2, %xmm4
3606; AVX1-NEXT:    vmovdqa (%rdi), %xmm5
3607; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm6
3608; AVX1-NEXT:    vmovdqa 32(%rdi), %xmm7
3609; AVX1-NEXT:    vmovdqa 48(%rdi), %xmm8
3610; AVX1-NEXT:    vpcmpgtq %xmm7, %xmm2, %xmm9
3611; AVX1-NEXT:    vblendvpd %xmm9, %xmm7, %xmm2, %xmm7
3612; AVX1-NEXT:    vpcmpgtq %xmm8, %xmm2, %xmm9
3613; AVX1-NEXT:    vblendvpd %xmm9, %xmm8, %xmm2, %xmm8
3614; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm2, %xmm9
3615; AVX1-NEXT:    vblendvpd %xmm9, %xmm5, %xmm2, %xmm5
3616; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm2, %xmm9
3617; AVX1-NEXT:    vblendvpd %xmm9, %xmm6, %xmm2, %xmm2
3618; AVX1-NEXT:    vpmovsxbq {{.*#+}} xmm6 = [18446744073709551488,18446744073709551488]
3619; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm2, %xmm9
3620; AVX1-NEXT:    vblendvpd %xmm9, %xmm2, %xmm6, %xmm2
3621; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm5, %xmm9
3622; AVX1-NEXT:    vblendvpd %xmm9, %xmm5, %xmm6, %xmm5
3623; AVX1-NEXT:    vpackssdw %xmm2, %xmm5, %xmm2
3624; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm8, %xmm5
3625; AVX1-NEXT:    vblendvpd %xmm5, %xmm8, %xmm6, %xmm5
3626; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm7, %xmm8
3627; AVX1-NEXT:    vblendvpd %xmm8, %xmm7, %xmm6, %xmm7
3628; AVX1-NEXT:    vpackssdw %xmm5, %xmm7, %xmm5
3629; AVX1-NEXT:    vpackssdw %xmm5, %xmm2, %xmm2
3630; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm4, %xmm5
3631; AVX1-NEXT:    vblendvpd %xmm5, %xmm4, %xmm6, %xmm4
3632; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm3, %xmm5
3633; AVX1-NEXT:    vblendvpd %xmm5, %xmm3, %xmm6, %xmm3
3634; AVX1-NEXT:    vpackssdw %xmm4, %xmm3, %xmm3
3635; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm1, %xmm4
3636; AVX1-NEXT:    vblendvpd %xmm4, %xmm1, %xmm6, %xmm1
3637; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm0, %xmm4
3638; AVX1-NEXT:    vblendvpd %xmm4, %xmm0, %xmm6, %xmm0
3639; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
3640; AVX1-NEXT:    vpackssdw %xmm0, %xmm3, %xmm0
3641; AVX1-NEXT:    vpacksswb %xmm0, %xmm2, %xmm0
3642; AVX1-NEXT:    retq
3643;
3644; AVX2-LABEL: trunc_ssat_v16i64_v16i8:
3645; AVX2:       # %bb.0:
3646; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
3647; AVX2-NEXT:    vmovdqa 32(%rdi), %ymm1
3648; AVX2-NEXT:    vmovdqa 64(%rdi), %ymm2
3649; AVX2-NEXT:    vmovdqa 96(%rdi), %ymm3
3650; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm4 = [127,127,127,127]
3651; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm4, %ymm5
3652; AVX2-NEXT:    vblendvpd %ymm5, %ymm2, %ymm4, %ymm2
3653; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm4, %ymm5
3654; AVX2-NEXT:    vblendvpd %ymm5, %ymm3, %ymm4, %ymm3
3655; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm4, %ymm5
3656; AVX2-NEXT:    vblendvpd %ymm5, %ymm0, %ymm4, %ymm0
3657; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm4, %ymm5
3658; AVX2-NEXT:    vblendvpd %ymm5, %ymm1, %ymm4, %ymm1
3659; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm4 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488]
3660; AVX2-NEXT:    vpcmpgtq %ymm4, %ymm1, %ymm5
3661; AVX2-NEXT:    vblendvpd %ymm5, %ymm1, %ymm4, %ymm1
3662; AVX2-NEXT:    vpcmpgtq %ymm4, %ymm0, %ymm5
3663; AVX2-NEXT:    vblendvpd %ymm5, %ymm0, %ymm4, %ymm0
3664; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
3665; AVX2-NEXT:    vpcmpgtq %ymm4, %ymm3, %ymm1
3666; AVX2-NEXT:    vblendvpd %ymm1, %ymm3, %ymm4, %ymm1
3667; AVX2-NEXT:    vpcmpgtq %ymm4, %ymm2, %ymm3
3668; AVX2-NEXT:    vblendvpd %ymm3, %ymm2, %ymm4, %ymm2
3669; AVX2-NEXT:    vpackssdw %ymm1, %ymm2, %ymm1
3670; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,1,3]
3671; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
3672; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
3673; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
3674; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
3675; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
3676; AVX2-NEXT:    vzeroupper
3677; AVX2-NEXT:    retq
3678;
3679; AVX512-LABEL: trunc_ssat_v16i64_v16i8:
3680; AVX512:       # %bb.0:
3681; AVX512-NEXT:    vmovdqa64 (%rdi), %zmm0
3682; AVX512-NEXT:    vmovdqa64 64(%rdi), %zmm1
3683; AVX512-NEXT:    vpmovsqb %zmm1, %xmm1
3684; AVX512-NEXT:    vpmovsqb %zmm0, %xmm0
3685; AVX512-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3686; AVX512-NEXT:    vzeroupper
3687; AVX512-NEXT:    retq
3688;
3689; SKX-LABEL: trunc_ssat_v16i64_v16i8:
3690; SKX:       # %bb.0:
3691; SKX-NEXT:    vmovdqa (%rdi), %ymm0
3692; SKX-NEXT:    vmovdqa 32(%rdi), %ymm1
3693; SKX-NEXT:    vmovdqa 64(%rdi), %ymm2
3694; SKX-NEXT:    vmovdqa 96(%rdi), %ymm3
3695; SKX-NEXT:    vpmovsqb %ymm3, %xmm3
3696; SKX-NEXT:    vpmovsqb %ymm2, %xmm2
3697; SKX-NEXT:    vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
3698; SKX-NEXT:    vpmovsqb %ymm1, %xmm1
3699; SKX-NEXT:    vpmovsqb %ymm0, %xmm0
3700; SKX-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3701; SKX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
3702; SKX-NEXT:    vzeroupper
3703; SKX-NEXT:    retq
3704  %a0 = load <16 x i64>, ptr %p0
3705  %1 = icmp slt <16 x i64> %a0, <i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127>
3706  %2 = select <16 x i1> %1, <16 x i64> %a0, <16 x i64> <i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127>
3707  %3 = icmp sgt <16 x i64> %2, <i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128>
3708  %4 = select <16 x i1> %3, <16 x i64> %2, <16 x i64> <i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128>
3709  %5 = trunc <16 x i64> %4 to <16 x i8>
3710  ret <16 x i8> %5
3711}
3712
3713define <4 x i8> @trunc_ssat_v4i32_v4i8(<4 x i32> %a0) {
3714; SSE-LABEL: trunc_ssat_v4i32_v4i8:
3715; SSE:       # %bb.0:
3716; SSE-NEXT:    packssdw %xmm0, %xmm0
3717; SSE-NEXT:    packsswb %xmm0, %xmm0
3718; SSE-NEXT:    retq
3719;
3720; AVX-LABEL: trunc_ssat_v4i32_v4i8:
3721; AVX:       # %bb.0:
3722; AVX-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
3723; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
3724; AVX-NEXT:    retq
3725;
3726; AVX512-LABEL: trunc_ssat_v4i32_v4i8:
3727; AVX512:       # %bb.0:
3728; AVX512-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
3729; AVX512-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
3730; AVX512-NEXT:    retq
3731;
3732; SKX-LABEL: trunc_ssat_v4i32_v4i8:
3733; SKX:       # %bb.0:
3734; SKX-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
3735; SKX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
3736; SKX-NEXT:    retq
3737  %1 = icmp slt <4 x i32> %a0, <i32 127, i32 127, i32 127, i32 127>
3738  %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> <i32 127, i32 127, i32 127, i32 127>
3739  %3 = icmp sgt <4 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128>
3740  %4 = select <4 x i1> %3, <4 x i32> %2, <4 x i32> <i32 -128, i32 -128, i32 -128, i32 -128>
3741  %5 = trunc <4 x i32> %4 to <4 x i8>
3742  ret <4 x i8> %5
3743}
3744
3745define void @trunc_ssat_v4i32_v4i8_store(<4 x i32> %a0, ptr%p1) {
3746; SSE-LABEL: trunc_ssat_v4i32_v4i8_store:
3747; SSE:       # %bb.0:
3748; SSE-NEXT:    packssdw %xmm0, %xmm0
3749; SSE-NEXT:    packsswb %xmm0, %xmm0
3750; SSE-NEXT:    movd %xmm0, (%rdi)
3751; SSE-NEXT:    retq
3752;
3753; AVX-LABEL: trunc_ssat_v4i32_v4i8_store:
3754; AVX:       # %bb.0:
3755; AVX-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
3756; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
3757; AVX-NEXT:    vmovd %xmm0, (%rdi)
3758; AVX-NEXT:    retq
3759;
3760; AVX512F-LABEL: trunc_ssat_v4i32_v4i8_store:
3761; AVX512F:       # %bb.0:
3762; AVX512F-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
3763; AVX512F-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
3764; AVX512F-NEXT:    vmovd %xmm0, (%rdi)
3765; AVX512F-NEXT:    retq
3766;
3767; AVX512VL-LABEL: trunc_ssat_v4i32_v4i8_store:
3768; AVX512VL:       # %bb.0:
3769; AVX512VL-NEXT:    vpmovsdb %xmm0, (%rdi)
3770; AVX512VL-NEXT:    retq
3771;
3772; AVX512BW-LABEL: trunc_ssat_v4i32_v4i8_store:
3773; AVX512BW:       # %bb.0:
3774; AVX512BW-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
3775; AVX512BW-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
3776; AVX512BW-NEXT:    vmovd %xmm0, (%rdi)
3777; AVX512BW-NEXT:    retq
3778;
3779; AVX512BWVL-LABEL: trunc_ssat_v4i32_v4i8_store:
3780; AVX512BWVL:       # %bb.0:
3781; AVX512BWVL-NEXT:    vpmovsdb %xmm0, (%rdi)
3782; AVX512BWVL-NEXT:    retq
3783;
3784; SKX-LABEL: trunc_ssat_v4i32_v4i8_store:
3785; SKX:       # %bb.0:
3786; SKX-NEXT:    vpmovsdb %xmm0, (%rdi)
3787; SKX-NEXT:    retq
3788  %1 = icmp slt <4 x i32> %a0, <i32 127, i32 127, i32 127, i32 127>
3789  %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> <i32 127, i32 127, i32 127, i32 127>
3790  %3 = icmp sgt <4 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128>
3791  %4 = select <4 x i1> %3, <4 x i32> %2, <4 x i32> <i32 -128, i32 -128, i32 -128, i32 -128>
3792  %5 = trunc <4 x i32> %4 to <4 x i8>
3793  store <4 x i8> %5, ptr%p1
3794  ret void
3795}
3796
3797define <8 x i8> @trunc_ssat_v8i32_v8i8(<8 x i32> %a0) {
3798; SSE-LABEL: trunc_ssat_v8i32_v8i8:
3799; SSE:       # %bb.0:
3800; SSE-NEXT:    packssdw %xmm1, %xmm0
3801; SSE-NEXT:    packsswb %xmm0, %xmm0
3802; SSE-NEXT:    retq
3803;
3804; AVX1-LABEL: trunc_ssat_v8i32_v8i8:
3805; AVX1:       # %bb.0:
3806; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
3807; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
3808; AVX1-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
3809; AVX1-NEXT:    vzeroupper
3810; AVX1-NEXT:    retq
3811;
3812; AVX2-LABEL: trunc_ssat_v8i32_v8i8:
3813; AVX2:       # %bb.0:
3814; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
3815; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
3816; AVX2-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
3817; AVX2-NEXT:    vzeroupper
3818; AVX2-NEXT:    retq
3819;
3820; AVX512F-LABEL: trunc_ssat_v8i32_v8i8:
3821; AVX512F:       # %bb.0:
3822; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
3823; AVX512F-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
3824; AVX512F-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
3825; AVX512F-NEXT:    vzeroupper
3826; AVX512F-NEXT:    retq
3827;
3828; AVX512VL-LABEL: trunc_ssat_v8i32_v8i8:
3829; AVX512VL:       # %bb.0:
3830; AVX512VL-NEXT:    vpmovsdb %ymm0, %xmm0
3831; AVX512VL-NEXT:    vzeroupper
3832; AVX512VL-NEXT:    retq
3833;
3834; AVX512BW-LABEL: trunc_ssat_v8i32_v8i8:
3835; AVX512BW:       # %bb.0:
3836; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm1
3837; AVX512BW-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
3838; AVX512BW-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
3839; AVX512BW-NEXT:    vzeroupper
3840; AVX512BW-NEXT:    retq
3841;
3842; AVX512BWVL-LABEL: trunc_ssat_v8i32_v8i8:
3843; AVX512BWVL:       # %bb.0:
3844; AVX512BWVL-NEXT:    vpmovsdb %ymm0, %xmm0
3845; AVX512BWVL-NEXT:    vzeroupper
3846; AVX512BWVL-NEXT:    retq
3847;
3848; SKX-LABEL: trunc_ssat_v8i32_v8i8:
3849; SKX:       # %bb.0:
3850; SKX-NEXT:    vpmovsdb %ymm0, %xmm0
3851; SKX-NEXT:    vzeroupper
3852; SKX-NEXT:    retq
3853  %1 = icmp slt <8 x i32> %a0, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
3854  %2 = select <8 x i1> %1, <8 x i32> %a0, <8 x i32> <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
3855  %3 = icmp sgt <8 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
3856  %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
3857  %5 = trunc <8 x i32> %4 to <8 x i8>
3858  ret <8 x i8> %5
3859}
3860
3861define void @trunc_ssat_v8i32_v8i8_store(<8 x i32> %a0, ptr%p1) {
3862; SSE-LABEL: trunc_ssat_v8i32_v8i8_store:
3863; SSE:       # %bb.0:
3864; SSE-NEXT:    packssdw %xmm1, %xmm0
3865; SSE-NEXT:    packsswb %xmm0, %xmm0
3866; SSE-NEXT:    movq %xmm0, (%rdi)
3867; SSE-NEXT:    retq
3868;
3869; AVX1-LABEL: trunc_ssat_v8i32_v8i8_store:
3870; AVX1:       # %bb.0:
3871; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
3872; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
3873; AVX1-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
3874; AVX1-NEXT:    vmovq %xmm0, (%rdi)
3875; AVX1-NEXT:    vzeroupper
3876; AVX1-NEXT:    retq
3877;
3878; AVX2-LABEL: trunc_ssat_v8i32_v8i8_store:
3879; AVX2:       # %bb.0:
3880; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
3881; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
3882; AVX2-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
3883; AVX2-NEXT:    vmovq %xmm0, (%rdi)
3884; AVX2-NEXT:    vzeroupper
3885; AVX2-NEXT:    retq
3886;
3887; AVX512F-LABEL: trunc_ssat_v8i32_v8i8_store:
3888; AVX512F:       # %bb.0:
3889; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
3890; AVX512F-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
3891; AVX512F-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
3892; AVX512F-NEXT:    vmovq %xmm0, (%rdi)
3893; AVX512F-NEXT:    vzeroupper
3894; AVX512F-NEXT:    retq
3895;
3896; AVX512VL-LABEL: trunc_ssat_v8i32_v8i8_store:
3897; AVX512VL:       # %bb.0:
3898; AVX512VL-NEXT:    vpmovsdb %ymm0, (%rdi)
3899; AVX512VL-NEXT:    vzeroupper
3900; AVX512VL-NEXT:    retq
3901;
3902; AVX512BW-LABEL: trunc_ssat_v8i32_v8i8_store:
3903; AVX512BW:       # %bb.0:
3904; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm1
3905; AVX512BW-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
3906; AVX512BW-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
3907; AVX512BW-NEXT:    vmovq %xmm0, (%rdi)
3908; AVX512BW-NEXT:    vzeroupper
3909; AVX512BW-NEXT:    retq
3910;
3911; AVX512BWVL-LABEL: trunc_ssat_v8i32_v8i8_store:
3912; AVX512BWVL:       # %bb.0:
3913; AVX512BWVL-NEXT:    vpmovsdb %ymm0, (%rdi)
3914; AVX512BWVL-NEXT:    vzeroupper
3915; AVX512BWVL-NEXT:    retq
3916;
3917; SKX-LABEL: trunc_ssat_v8i32_v8i8_store:
3918; SKX:       # %bb.0:
3919; SKX-NEXT:    vpmovsdb %ymm0, (%rdi)
3920; SKX-NEXT:    vzeroupper
3921; SKX-NEXT:    retq
3922  %1 = icmp slt <8 x i32> %a0, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
3923  %2 = select <8 x i1> %1, <8 x i32> %a0, <8 x i32> <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
3924  %3 = icmp sgt <8 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
3925  %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
3926  %5 = trunc <8 x i32> %4 to <8 x i8>
3927  store <8 x i8> %5, ptr%p1
3928  ret void
3929}
3930
3931define <16 x i8> @trunc_ssat_v16i32_v16i8(ptr %p0) "min-legal-vector-width"="256" {
3932; SSE-LABEL: trunc_ssat_v16i32_v16i8:
3933; SSE:       # %bb.0:
3934; SSE-NEXT:    movdqa (%rdi), %xmm0
3935; SSE-NEXT:    movdqa 32(%rdi), %xmm1
3936; SSE-NEXT:    packssdw 48(%rdi), %xmm1
3937; SSE-NEXT:    packssdw 16(%rdi), %xmm0
3938; SSE-NEXT:    packsswb %xmm1, %xmm0
3939; SSE-NEXT:    retq
3940;
3941; AVX1-LABEL: trunc_ssat_v16i32_v16i8:
3942; AVX1:       # %bb.0:
3943; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
3944; AVX1-NEXT:    vmovdqa 32(%rdi), %xmm1
3945; AVX1-NEXT:    vpackssdw 48(%rdi), %xmm1, %xmm1
3946; AVX1-NEXT:    vpackssdw 16(%rdi), %xmm0, %xmm0
3947; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
3948; AVX1-NEXT:    retq
3949;
3950; AVX2-LABEL: trunc_ssat_v16i32_v16i8:
3951; AVX2:       # %bb.0:
3952; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
3953; AVX2-NEXT:    vpackssdw 32(%rdi), %ymm0, %ymm0
3954; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
3955; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
3956; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
3957; AVX2-NEXT:    vzeroupper
3958; AVX2-NEXT:    retq
3959;
3960; AVX512-LABEL: trunc_ssat_v16i32_v16i8:
3961; AVX512:       # %bb.0:
3962; AVX512-NEXT:    vmovdqa64 (%rdi), %zmm0
3963; AVX512-NEXT:    vpmovsdb %zmm0, %xmm0
3964; AVX512-NEXT:    vzeroupper
3965; AVX512-NEXT:    retq
3966;
3967; SKX-LABEL: trunc_ssat_v16i32_v16i8:
3968; SKX:       # %bb.0:
3969; SKX-NEXT:    vmovdqa (%rdi), %ymm0
3970; SKX-NEXT:    vmovdqa 32(%rdi), %ymm1
3971; SKX-NEXT:    vpmovsdb %ymm1, %xmm1
3972; SKX-NEXT:    vpmovsdb %ymm0, %xmm0
3973; SKX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3974; SKX-NEXT:    vzeroupper
3975; SKX-NEXT:    retq
3976  %a0 = load <16 x i32>, ptr %p0
3977  %1 = icmp slt <16 x i32> %a0, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
3978  %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
3979  %3 = icmp sgt <16 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
3980  %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
3981  %5 = trunc <16 x i32> %4 to <16 x i8>
3982  ret <16 x i8> %5
3983}
3984
3985define void @trunc_ssat_v16i32_v16i8_store(ptr %p0, ptr %p1) "min-legal-vector-width"="256" {
3986; SSE-LABEL: trunc_ssat_v16i32_v16i8_store:
3987; SSE:       # %bb.0:
3988; SSE-NEXT:    movdqa (%rdi), %xmm0
3989; SSE-NEXT:    movdqa 32(%rdi), %xmm1
3990; SSE-NEXT:    packssdw 48(%rdi), %xmm1
3991; SSE-NEXT:    packssdw 16(%rdi), %xmm0
3992; SSE-NEXT:    packsswb %xmm1, %xmm0
3993; SSE-NEXT:    movdqa %xmm0, (%rsi)
3994; SSE-NEXT:    retq
3995;
3996; AVX1-LABEL: trunc_ssat_v16i32_v16i8_store:
3997; AVX1:       # %bb.0:
3998; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
3999; AVX1-NEXT:    vmovdqa 32(%rdi), %xmm1
4000; AVX1-NEXT:    vpackssdw 48(%rdi), %xmm1, %xmm1
4001; AVX1-NEXT:    vpackssdw 16(%rdi), %xmm0, %xmm0
4002; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
4003; AVX1-NEXT:    vmovdqa %xmm0, (%rsi)
4004; AVX1-NEXT:    retq
4005;
4006; AVX2-LABEL: trunc_ssat_v16i32_v16i8_store:
4007; AVX2:       # %bb.0:
4008; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
4009; AVX2-NEXT:    vpackssdw 32(%rdi), %ymm0, %ymm0
4010; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
4011; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
4012; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
4013; AVX2-NEXT:    vmovdqa %xmm0, (%rsi)
4014; AVX2-NEXT:    vzeroupper
4015; AVX2-NEXT:    retq
4016;
4017; AVX512-LABEL: trunc_ssat_v16i32_v16i8_store:
4018; AVX512:       # %bb.0:
4019; AVX512-NEXT:    vmovdqa64 (%rdi), %zmm0
4020; AVX512-NEXT:    vpmovsdb %zmm0, (%rsi)
4021; AVX512-NEXT:    vzeroupper
4022; AVX512-NEXT:    retq
4023;
4024; SKX-LABEL: trunc_ssat_v16i32_v16i8_store:
4025; SKX:       # %bb.0:
4026; SKX-NEXT:    vmovdqa (%rdi), %ymm0
4027; SKX-NEXT:    vmovdqa 32(%rdi), %ymm1
4028; SKX-NEXT:    vpmovsdb %ymm1, %xmm1
4029; SKX-NEXT:    vpmovsdb %ymm0, %xmm0
4030; SKX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4031; SKX-NEXT:    vmovdqa %xmm0, (%rsi)
4032; SKX-NEXT:    vzeroupper
4033; SKX-NEXT:    retq
4034  %a0 = load <16 x i32>, ptr %p0
4035  %1 = icmp slt <16 x i32> %a0, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
4036  %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
4037  %3 = icmp sgt <16 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
4038  %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
4039  %5 = trunc <16 x i32> %4 to <16 x i8>
4040  store <16 x i8> %5, ptr %p1
4041  ret void
4042}
4043
4044define <8 x i8> @trunc_ssat_v8i16_v8i8(<8 x i16> %a0) {
4045; SSE-LABEL: trunc_ssat_v8i16_v8i8:
4046; SSE:       # %bb.0:
4047; SSE-NEXT:    packsswb %xmm0, %xmm0
4048; SSE-NEXT:    retq
4049;
4050; AVX-LABEL: trunc_ssat_v8i16_v8i8:
4051; AVX:       # %bb.0:
4052; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
4053; AVX-NEXT:    retq
4054;
4055; AVX512-LABEL: trunc_ssat_v8i16_v8i8:
4056; AVX512:       # %bb.0:
4057; AVX512-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
4058; AVX512-NEXT:    retq
4059;
4060; SKX-LABEL: trunc_ssat_v8i16_v8i8:
4061; SKX:       # %bb.0:
4062; SKX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
4063; SKX-NEXT:    retq
4064  %1 = icmp slt <8 x i16> %a0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
4065  %2 = select <8 x i1> %1, <8 x i16> %a0, <8 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
4066  %3 = icmp sgt <8 x i16> %2, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
4067  %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
4068  %5 = trunc <8 x i16> %4 to <8 x i8>
4069  ret <8 x i8> %5
4070}
4071
4072define void @trunc_ssat_v8i16_v8i8_store(<8 x i16> %a0, ptr%p1) {
4073; SSE-LABEL: trunc_ssat_v8i16_v8i8_store:
4074; SSE:       # %bb.0:
4075; SSE-NEXT:    packsswb %xmm0, %xmm0
4076; SSE-NEXT:    movq %xmm0, (%rdi)
4077; SSE-NEXT:    retq
4078;
4079; AVX-LABEL: trunc_ssat_v8i16_v8i8_store:
4080; AVX:       # %bb.0:
4081; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
4082; AVX-NEXT:    vmovq %xmm0, (%rdi)
4083; AVX-NEXT:    retq
4084;
4085; AVX512F-LABEL: trunc_ssat_v8i16_v8i8_store:
4086; AVX512F:       # %bb.0:
4087; AVX512F-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
4088; AVX512F-NEXT:    vmovq %xmm0, (%rdi)
4089; AVX512F-NEXT:    retq
4090;
4091; AVX512VL-LABEL: trunc_ssat_v8i16_v8i8_store:
4092; AVX512VL:       # %bb.0:
4093; AVX512VL-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
4094; AVX512VL-NEXT:    vmovq %xmm0, (%rdi)
4095; AVX512VL-NEXT:    retq
4096;
4097; AVX512BW-LABEL: trunc_ssat_v8i16_v8i8_store:
4098; AVX512BW:       # %bb.0:
4099; AVX512BW-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
4100; AVX512BW-NEXT:    vmovq %xmm0, (%rdi)
4101; AVX512BW-NEXT:    retq
4102;
4103; AVX512BWVL-LABEL: trunc_ssat_v8i16_v8i8_store:
4104; AVX512BWVL:       # %bb.0:
4105; AVX512BWVL-NEXT:    vpmovswb %xmm0, (%rdi)
4106; AVX512BWVL-NEXT:    retq
4107;
4108; SKX-LABEL: trunc_ssat_v8i16_v8i8_store:
4109; SKX:       # %bb.0:
4110; SKX-NEXT:    vpmovswb %xmm0, (%rdi)
4111; SKX-NEXT:    retq
4112  %1 = icmp slt <8 x i16> %a0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
4113  %2 = select <8 x i1> %1, <8 x i16> %a0, <8 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
4114  %3 = icmp sgt <8 x i16> %2, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
4115  %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
4116  %5 = trunc <8 x i16> %4 to <8 x i8>
4117  store <8 x i8> %5, ptr%p1
4118  ret void
4119}
4120
4121define <16 x i8> @trunc_ssat_v16i16_v16i8(<16 x i16> %a0) {
4122; SSE-LABEL: trunc_ssat_v16i16_v16i8:
4123; SSE:       # %bb.0:
4124; SSE-NEXT:    packsswb %xmm1, %xmm0
4125; SSE-NEXT:    retq
4126;
4127; AVX1-LABEL: trunc_ssat_v16i16_v16i8:
4128; AVX1:       # %bb.0:
4129; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
4130; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
4131; AVX1-NEXT:    vzeroupper
4132; AVX1-NEXT:    retq
4133;
4134; AVX2-LABEL: trunc_ssat_v16i16_v16i8:
4135; AVX2:       # %bb.0:
4136; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
4137; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
4138; AVX2-NEXT:    vzeroupper
4139; AVX2-NEXT:    retq
4140;
4141; AVX512F-LABEL: trunc_ssat_v16i16_v16i8:
4142; AVX512F:       # %bb.0:
4143; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
4144; AVX512F-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
4145; AVX512F-NEXT:    vzeroupper
4146; AVX512F-NEXT:    retq
4147;
4148; AVX512VL-LABEL: trunc_ssat_v16i16_v16i8:
4149; AVX512VL:       # %bb.0:
4150; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
4151; AVX512VL-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
4152; AVX512VL-NEXT:    vzeroupper
4153; AVX512VL-NEXT:    retq
4154;
4155; AVX512BW-LABEL: trunc_ssat_v16i16_v16i8:
4156; AVX512BW:       # %bb.0:
4157; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm1
4158; AVX512BW-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
4159; AVX512BW-NEXT:    vzeroupper
4160; AVX512BW-NEXT:    retq
4161;
4162; AVX512BWVL-LABEL: trunc_ssat_v16i16_v16i8:
4163; AVX512BWVL:       # %bb.0:
4164; AVX512BWVL-NEXT:    vpmovswb %ymm0, %xmm0
4165; AVX512BWVL-NEXT:    vzeroupper
4166; AVX512BWVL-NEXT:    retq
4167;
4168; SKX-LABEL: trunc_ssat_v16i16_v16i8:
4169; SKX:       # %bb.0:
4170; SKX-NEXT:    vpmovswb %ymm0, %xmm0
4171; SKX-NEXT:    vzeroupper
4172; SKX-NEXT:    retq
4173  %1 = icmp slt <16 x i16> %a0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
4174  %2 = select <16 x i1> %1, <16 x i16> %a0, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
4175  %3 = icmp sgt <16 x i16> %2, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
4176  %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
4177  %5 = trunc <16 x i16> %4 to <16 x i8>
4178  ret <16 x i8> %5
4179}
4180
4181define <32 x i8> @trunc_ssat_v32i16_v32i8(ptr %p0) "min-legal-vector-width"="256" {
4182; SSE-LABEL: trunc_ssat_v32i16_v32i8:
4183; SSE:       # %bb.0:
4184; SSE-NEXT:    movdqa (%rdi), %xmm0
4185; SSE-NEXT:    movdqa 32(%rdi), %xmm1
4186; SSE-NEXT:    packsswb 16(%rdi), %xmm0
4187; SSE-NEXT:    packsswb 48(%rdi), %xmm1
4188; SSE-NEXT:    retq
4189;
4190; AVX1-LABEL: trunc_ssat_v32i16_v32i8:
4191; AVX1:       # %bb.0:
4192; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
4193; AVX1-NEXT:    vmovdqa 32(%rdi), %xmm1
4194; AVX1-NEXT:    vpacksswb 48(%rdi), %xmm1, %xmm1
4195; AVX1-NEXT:    vpacksswb 16(%rdi), %xmm0, %xmm0
4196; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
4197; AVX1-NEXT:    retq
4198;
4199; AVX2-LABEL: trunc_ssat_v32i16_v32i8:
4200; AVX2:       # %bb.0:
4201; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
4202; AVX2-NEXT:    vpacksswb 32(%rdi), %ymm0, %ymm0
4203; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
4204; AVX2-NEXT:    retq
4205;
4206; AVX512F-LABEL: trunc_ssat_v32i16_v32i8:
4207; AVX512F:       # %bb.0:
4208; AVX512F-NEXT:    vmovdqa (%rdi), %ymm0
4209; AVX512F-NEXT:    vpacksswb 32(%rdi), %ymm0, %ymm0
4210; AVX512F-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
4211; AVX512F-NEXT:    retq
4212;
4213; AVX512VL-LABEL: trunc_ssat_v32i16_v32i8:
4214; AVX512VL:       # %bb.0:
4215; AVX512VL-NEXT:    vmovdqa (%rdi), %ymm0
4216; AVX512VL-NEXT:    vpacksswb 32(%rdi), %ymm0, %ymm0
4217; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
4218; AVX512VL-NEXT:    retq
4219;
4220; AVX512BW-LABEL: trunc_ssat_v32i16_v32i8:
4221; AVX512BW:       # %bb.0:
4222; AVX512BW-NEXT:    vmovdqa64 (%rdi), %zmm0
4223; AVX512BW-NEXT:    vpmovswb %zmm0, %ymm0
4224; AVX512BW-NEXT:    retq
4225;
4226; AVX512BWVL-LABEL: trunc_ssat_v32i16_v32i8:
4227; AVX512BWVL:       # %bb.0:
4228; AVX512BWVL-NEXT:    vmovdqa64 (%rdi), %zmm0
4229; AVX512BWVL-NEXT:    vpmovswb %zmm0, %ymm0
4230; AVX512BWVL-NEXT:    retq
4231;
4232; SKX-LABEL: trunc_ssat_v32i16_v32i8:
4233; SKX:       # %bb.0:
4234; SKX-NEXT:    vmovdqa (%rdi), %ymm0
4235; SKX-NEXT:    vpacksswb 32(%rdi), %ymm0, %ymm0
4236; SKX-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
4237; SKX-NEXT:    retq
4238  %a0 = load <32 x i16>, ptr %p0
4239  %1 = icmp slt <32 x i16> %a0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
4240  %2 = select <32 x i1> %1, <32 x i16> %a0, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
4241  %3 = icmp sgt <32 x i16> %2, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
4242  %4 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
4243  %5 = trunc <32 x i16> %4 to <32 x i8>
4244  ret <32 x i8> %5
4245}
4246
4247define <32 x i8> @trunc_ssat_v32i32_v32i8(ptr %p0) "min-legal-vector-width"="256" {
4248; SSE-LABEL: trunc_ssat_v32i32_v32i8:
4249; SSE:       # %bb.0:
4250; SSE-NEXT:    movdqa (%rdi), %xmm0
4251; SSE-NEXT:    movdqa 32(%rdi), %xmm2
4252; SSE-NEXT:    movdqa 64(%rdi), %xmm1
4253; SSE-NEXT:    movdqa 96(%rdi), %xmm3
4254; SSE-NEXT:    packssdw 48(%rdi), %xmm2
4255; SSE-NEXT:    packssdw 16(%rdi), %xmm0
4256; SSE-NEXT:    packsswb %xmm2, %xmm0
4257; SSE-NEXT:    packssdw 112(%rdi), %xmm3
4258; SSE-NEXT:    packssdw 80(%rdi), %xmm1
4259; SSE-NEXT:    packsswb %xmm3, %xmm1
4260; SSE-NEXT:    retq
4261;
4262; AVX1-LABEL: trunc_ssat_v32i32_v32i8:
4263; AVX1:       # %bb.0:
4264; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
4265; AVX1-NEXT:    vmovdqa 32(%rdi), %xmm1
4266; AVX1-NEXT:    vmovdqa 64(%rdi), %xmm2
4267; AVX1-NEXT:    vmovdqa 96(%rdi), %xmm3
4268; AVX1-NEXT:    vpackssdw 112(%rdi), %xmm3, %xmm3
4269; AVX1-NEXT:    vpackssdw 80(%rdi), %xmm2, %xmm2
4270; AVX1-NEXT:    vpacksswb %xmm3, %xmm2, %xmm2
4271; AVX1-NEXT:    vpackssdw 48(%rdi), %xmm1, %xmm1
4272; AVX1-NEXT:    vpackssdw 16(%rdi), %xmm0, %xmm0
4273; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
4274; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
4275; AVX1-NEXT:    retq
4276;
4277; AVX2-LABEL: trunc_ssat_v32i32_v32i8:
4278; AVX2:       # %bb.0:
4279; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
4280; AVX2-NEXT:    vmovdqa 64(%rdi), %ymm1
4281; AVX2-NEXT:    vpackssdw 96(%rdi), %ymm1, %ymm1
4282; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,1,3]
4283; AVX2-NEXT:    vpackssdw 32(%rdi), %ymm0, %ymm0
4284; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
4285; AVX2-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0
4286; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
4287; AVX2-NEXT:    retq
4288;
4289; AVX512-LABEL: trunc_ssat_v32i32_v32i8:
4290; AVX512:       # %bb.0:
4291; AVX512-NEXT:    vmovdqa64 (%rdi), %zmm0
4292; AVX512-NEXT:    vmovdqa64 64(%rdi), %zmm1
4293; AVX512-NEXT:    vpmovsdb %zmm0, %xmm0
4294; AVX512-NEXT:    vpmovsdb %zmm1, %xmm1
4295; AVX512-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
4296; AVX512-NEXT:    retq
4297;
4298; SKX-LABEL: trunc_ssat_v32i32_v32i8:
4299; SKX:       # %bb.0:
4300; SKX-NEXT:    vmovdqa (%rdi), %ymm0
4301; SKX-NEXT:    vmovdqa 64(%rdi), %ymm1
4302; SKX-NEXT:    vpackssdw 96(%rdi), %ymm1, %ymm1
4303; SKX-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,1,3]
4304; SKX-NEXT:    vpackssdw 32(%rdi), %ymm0, %ymm0
4305; SKX-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
4306; SKX-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0
4307; SKX-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
4308; SKX-NEXT:    retq
4309  %a0 = load <32 x i32>, ptr %p0
4310  %1 = icmp slt <32 x i32> %a0, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
4311  %2 = select <32 x i1> %1, <32 x i32> %a0, <32 x i32> <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
4312  %3 = icmp sgt <32 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
4313  %4 = select <32 x i1> %3, <32 x i32> %2, <32 x i32> <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
4314  %5 = trunc <32 x i32> %4 to <32 x i8>
4315  ret <32 x i8> %5
4316}
4317
4318; This used to crash with avx512 due because we were missing a check for
4319; unsupported element types like i24.
4320define void @trunc_ssat_v16i32_v16i24(<16 x i32> %x, ptr %y) nounwind {
4321; SSE2-SSSE3-LABEL: trunc_ssat_v16i32_v16i24:
4322; SSE2-SSSE3:       # %bb.0:
4323; SSE2-SSSE3-NEXT:    pushq %rbp
4324; SSE2-SSSE3-NEXT:    pushq %r15
4325; SSE2-SSSE3-NEXT:    pushq %r14
4326; SSE2-SSSE3-NEXT:    pushq %r12
4327; SSE2-SSSE3-NEXT:    pushq %rbx
4328; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [8388607,8388607,8388607,8388607]
4329; SSE2-SSSE3-NEXT:    movdqa %xmm5, %xmm4
4330; SSE2-SSSE3-NEXT:    pcmpgtd %xmm3, %xmm4
4331; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm3
4332; SSE2-SSSE3-NEXT:    pandn %xmm5, %xmm4
4333; SSE2-SSSE3-NEXT:    por %xmm3, %xmm4
4334; SSE2-SSSE3-NEXT:    movdqa %xmm5, %xmm3
4335; SSE2-SSSE3-NEXT:    pcmpgtd %xmm2, %xmm3
4336; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm2
4337; SSE2-SSSE3-NEXT:    pandn %xmm5, %xmm3
4338; SSE2-SSSE3-NEXT:    por %xmm2, %xmm3
4339; SSE2-SSSE3-NEXT:    movdqa %xmm5, %xmm2
4340; SSE2-SSSE3-NEXT:    pcmpgtd %xmm1, %xmm2
4341; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
4342; SSE2-SSSE3-NEXT:    pandn %xmm5, %xmm2
4343; SSE2-SSSE3-NEXT:    por %xmm1, %xmm2
4344; SSE2-SSSE3-NEXT:    movdqa %xmm5, %xmm1
4345; SSE2-SSSE3-NEXT:    pcmpgtd %xmm0, %xmm1
4346; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
4347; SSE2-SSSE3-NEXT:    pandn %xmm5, %xmm1
4348; SSE2-SSSE3-NEXT:    por %xmm0, %xmm1
4349; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [4286578688,4286578688,4286578688,4286578688]
4350; SSE2-SSSE3-NEXT:    movdqa %xmm1, %xmm0
4351; SSE2-SSSE3-NEXT:    pcmpgtd %xmm5, %xmm0
4352; SSE2-SSSE3-NEXT:    pand %xmm0, %xmm1
4353; SSE2-SSSE3-NEXT:    pandn %xmm5, %xmm0
4354; SSE2-SSSE3-NEXT:    por %xmm1, %xmm0
4355; SSE2-SSSE3-NEXT:    movdqa %xmm2, %xmm1
4356; SSE2-SSSE3-NEXT:    pcmpgtd %xmm5, %xmm1
4357; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm2
4358; SSE2-SSSE3-NEXT:    pandn %xmm5, %xmm1
4359; SSE2-SSSE3-NEXT:    por %xmm2, %xmm1
4360; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm2
4361; SSE2-SSSE3-NEXT:    pcmpgtd %xmm5, %xmm2
4362; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm3
4363; SSE2-SSSE3-NEXT:    pandn %xmm5, %xmm2
4364; SSE2-SSSE3-NEXT:    por %xmm3, %xmm2
4365; SSE2-SSSE3-NEXT:    movdqa %xmm4, %xmm3
4366; SSE2-SSSE3-NEXT:    pcmpgtd %xmm5, %xmm3
4367; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm4
4368; SSE2-SSSE3-NEXT:    pandn %xmm5, %xmm3
4369; SSE2-SSSE3-NEXT:    por %xmm4, %xmm3
4370; SSE2-SSSE3-NEXT:    movd %xmm3, %r8d
4371; SSE2-SSSE3-NEXT:    movw %r8w, 36(%rdi)
4372; SSE2-SSSE3-NEXT:    movd %xmm2, %r11d
4373; SSE2-SSSE3-NEXT:    movw %r11w, 24(%rdi)
4374; SSE2-SSSE3-NEXT:    movd %xmm1, %r14d
4375; SSE2-SSSE3-NEXT:    movw %r14w, 12(%rdi)
4376; SSE2-SSSE3-NEXT:    movd %xmm0, %eax
4377; SSE2-SSSE3-NEXT:    movw %ax, (%rdi)
4378; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[3,3,3,3]
4379; SSE2-SSSE3-NEXT:    movd %xmm4, %ecx
4380; SSE2-SSSE3-NEXT:    movw %cx, 45(%rdi)
4381; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
4382; SSE2-SSSE3-NEXT:    movd %xmm4, %edx
4383; SSE2-SSSE3-NEXT:    movw %dx, 42(%rdi)
4384; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,1,1]
4385; SSE2-SSSE3-NEXT:    movd %xmm3, %esi
4386; SSE2-SSSE3-NEXT:    movw %si, 39(%rdi)
4387; SSE2-SSSE3-NEXT:    shrl $16, %r8d
4388; SSE2-SSSE3-NEXT:    movb %r8b, 38(%rdi)
4389; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[3,3,3,3]
4390; SSE2-SSSE3-NEXT:    movd %xmm3, %r8d
4391; SSE2-SSSE3-NEXT:    movw %r8w, 33(%rdi)
4392; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[2,3,2,3]
4393; SSE2-SSSE3-NEXT:    movd %xmm3, %r9d
4394; SSE2-SSSE3-NEXT:    movw %r9w, 30(%rdi)
4395; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,1,1]
4396; SSE2-SSSE3-NEXT:    movd %xmm2, %r10d
4397; SSE2-SSSE3-NEXT:    movw %r10w, 27(%rdi)
4398; SSE2-SSSE3-NEXT:    shrl $16, %r11d
4399; SSE2-SSSE3-NEXT:    movb %r11b, 26(%rdi)
4400; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3]
4401; SSE2-SSSE3-NEXT:    movd %xmm2, %r11d
4402; SSE2-SSSE3-NEXT:    movw %r11w, 21(%rdi)
4403; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
4404; SSE2-SSSE3-NEXT:    movd %xmm2, %ebx
4405; SSE2-SSSE3-NEXT:    movw %bx, 18(%rdi)
4406; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1]
4407; SSE2-SSSE3-NEXT:    movd %xmm1, %ebp
4408; SSE2-SSSE3-NEXT:    movw %bp, 15(%rdi)
4409; SSE2-SSSE3-NEXT:    shrl $16, %r14d
4410; SSE2-SSSE3-NEXT:    movb %r14b, 14(%rdi)
4411; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3]
4412; SSE2-SSSE3-NEXT:    movd %xmm1, %r14d
4413; SSE2-SSSE3-NEXT:    movw %r14w, 9(%rdi)
4414; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
4415; SSE2-SSSE3-NEXT:    movd %xmm1, %r15d
4416; SSE2-SSSE3-NEXT:    movw %r15w, 6(%rdi)
4417; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
4418; SSE2-SSSE3-NEXT:    movd %xmm0, %r12d
4419; SSE2-SSSE3-NEXT:    movw %r12w, 3(%rdi)
4420; SSE2-SSSE3-NEXT:    shrl $16, %eax
4421; SSE2-SSSE3-NEXT:    movb %al, 2(%rdi)
4422; SSE2-SSSE3-NEXT:    shrl $16, %ecx
4423; SSE2-SSSE3-NEXT:    movb %cl, 47(%rdi)
4424; SSE2-SSSE3-NEXT:    shrl $16, %edx
4425; SSE2-SSSE3-NEXT:    movb %dl, 44(%rdi)
4426; SSE2-SSSE3-NEXT:    shrl $16, %esi
4427; SSE2-SSSE3-NEXT:    movb %sil, 41(%rdi)
4428; SSE2-SSSE3-NEXT:    shrl $16, %r8d
4429; SSE2-SSSE3-NEXT:    movb %r8b, 35(%rdi)
4430; SSE2-SSSE3-NEXT:    shrl $16, %r9d
4431; SSE2-SSSE3-NEXT:    movb %r9b, 32(%rdi)
4432; SSE2-SSSE3-NEXT:    shrl $16, %r10d
4433; SSE2-SSSE3-NEXT:    movb %r10b, 29(%rdi)
4434; SSE2-SSSE3-NEXT:    shrl $16, %r11d
4435; SSE2-SSSE3-NEXT:    movb %r11b, 23(%rdi)
4436; SSE2-SSSE3-NEXT:    shrl $16, %ebx
4437; SSE2-SSSE3-NEXT:    movb %bl, 20(%rdi)
4438; SSE2-SSSE3-NEXT:    shrl $16, %ebp
4439; SSE2-SSSE3-NEXT:    movb %bpl, 17(%rdi)
4440; SSE2-SSSE3-NEXT:    shrl $16, %r14d
4441; SSE2-SSSE3-NEXT:    movb %r14b, 11(%rdi)
4442; SSE2-SSSE3-NEXT:    shrl $16, %r15d
4443; SSE2-SSSE3-NEXT:    movb %r15b, 8(%rdi)
4444; SSE2-SSSE3-NEXT:    shrl $16, %r12d
4445; SSE2-SSSE3-NEXT:    movb %r12b, 5(%rdi)
4446; SSE2-SSSE3-NEXT:    popq %rbx
4447; SSE2-SSSE3-NEXT:    popq %r12
4448; SSE2-SSSE3-NEXT:    popq %r14
4449; SSE2-SSSE3-NEXT:    popq %r15
4450; SSE2-SSSE3-NEXT:    popq %rbp
4451; SSE2-SSSE3-NEXT:    retq
4452;
4453; SSE41-LABEL: trunc_ssat_v16i32_v16i24:
4454; SSE41:       # %bb.0:
4455; SSE41-NEXT:    pmovsxbw {{.*#+}} xmm4 = [65535,127,65535,127,65535,127,65535,127]
4456; SSE41-NEXT:    pminsd %xmm4, %xmm3
4457; SSE41-NEXT:    pminsd %xmm4, %xmm2
4458; SSE41-NEXT:    pminsd %xmm4, %xmm1
4459; SSE41-NEXT:    pminsd %xmm4, %xmm0
4460; SSE41-NEXT:    pmovsxbw {{.*#+}} xmm4 = [0,65408,0,65408,0,65408,0,65408]
4461; SSE41-NEXT:    pmaxsd %xmm4, %xmm0
4462; SSE41-NEXT:    pmaxsd %xmm4, %xmm1
4463; SSE41-NEXT:    pmaxsd %xmm4, %xmm2
4464; SSE41-NEXT:    pmaxsd %xmm4, %xmm3
4465; SSE41-NEXT:    pextrd $3, %xmm3, %eax
4466; SSE41-NEXT:    movw %ax, 45(%rdi)
4467; SSE41-NEXT:    shrl $16, %eax
4468; SSE41-NEXT:    movb %al, 47(%rdi)
4469; SSE41-NEXT:    pextrd $2, %xmm3, %eax
4470; SSE41-NEXT:    movw %ax, 42(%rdi)
4471; SSE41-NEXT:    shrl $16, %eax
4472; SSE41-NEXT:    movb %al, 44(%rdi)
4473; SSE41-NEXT:    pextrd $1, %xmm3, %eax
4474; SSE41-NEXT:    movw %ax, 39(%rdi)
4475; SSE41-NEXT:    shrl $16, %eax
4476; SSE41-NEXT:    movb %al, 41(%rdi)
4477; SSE41-NEXT:    movd %xmm3, %eax
4478; SSE41-NEXT:    movw %ax, 36(%rdi)
4479; SSE41-NEXT:    shrl $16, %eax
4480; SSE41-NEXT:    movb %al, 38(%rdi)
4481; SSE41-NEXT:    pextrd $3, %xmm2, %eax
4482; SSE41-NEXT:    movw %ax, 33(%rdi)
4483; SSE41-NEXT:    shrl $16, %eax
4484; SSE41-NEXT:    movb %al, 35(%rdi)
4485; SSE41-NEXT:    pextrd $2, %xmm2, %eax
4486; SSE41-NEXT:    movw %ax, 30(%rdi)
4487; SSE41-NEXT:    shrl $16, %eax
4488; SSE41-NEXT:    movb %al, 32(%rdi)
4489; SSE41-NEXT:    pextrd $1, %xmm2, %eax
4490; SSE41-NEXT:    movw %ax, 27(%rdi)
4491; SSE41-NEXT:    shrl $16, %eax
4492; SSE41-NEXT:    movb %al, 29(%rdi)
4493; SSE41-NEXT:    movd %xmm2, %eax
4494; SSE41-NEXT:    movw %ax, 24(%rdi)
4495; SSE41-NEXT:    shrl $16, %eax
4496; SSE41-NEXT:    movb %al, 26(%rdi)
4497; SSE41-NEXT:    pextrd $3, %xmm1, %eax
4498; SSE41-NEXT:    movw %ax, 21(%rdi)
4499; SSE41-NEXT:    shrl $16, %eax
4500; SSE41-NEXT:    movb %al, 23(%rdi)
4501; SSE41-NEXT:    pextrd $2, %xmm1, %eax
4502; SSE41-NEXT:    movw %ax, 18(%rdi)
4503; SSE41-NEXT:    shrl $16, %eax
4504; SSE41-NEXT:    movb %al, 20(%rdi)
4505; SSE41-NEXT:    pextrd $1, %xmm1, %eax
4506; SSE41-NEXT:    movw %ax, 15(%rdi)
4507; SSE41-NEXT:    shrl $16, %eax
4508; SSE41-NEXT:    movb %al, 17(%rdi)
4509; SSE41-NEXT:    movd %xmm1, %eax
4510; SSE41-NEXT:    movw %ax, 12(%rdi)
4511; SSE41-NEXT:    shrl $16, %eax
4512; SSE41-NEXT:    movb %al, 14(%rdi)
4513; SSE41-NEXT:    pextrd $3, %xmm0, %eax
4514; SSE41-NEXT:    movw %ax, 9(%rdi)
4515; SSE41-NEXT:    shrl $16, %eax
4516; SSE41-NEXT:    movb %al, 11(%rdi)
4517; SSE41-NEXT:    pextrd $2, %xmm0, %eax
4518; SSE41-NEXT:    movw %ax, 6(%rdi)
4519; SSE41-NEXT:    shrl $16, %eax
4520; SSE41-NEXT:    movb %al, 8(%rdi)
4521; SSE41-NEXT:    pextrd $1, %xmm0, %eax
4522; SSE41-NEXT:    movw %ax, 3(%rdi)
4523; SSE41-NEXT:    shrl $16, %eax
4524; SSE41-NEXT:    movb %al, 5(%rdi)
4525; SSE41-NEXT:    movd %xmm0, %eax
4526; SSE41-NEXT:    movw %ax, (%rdi)
4527; SSE41-NEXT:    shrl $16, %eax
4528; SSE41-NEXT:    movb %al, 2(%rdi)
4529; SSE41-NEXT:    retq
4530;
4531; AVX1-LABEL: trunc_ssat_v16i32_v16i24:
4532; AVX1:       # %bb.0:
4533; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
4534; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm3 = [8388607,8388607,8388607,8388607]
4535; AVX1-NEXT:    vpminsd %xmm3, %xmm2, %xmm4
4536; AVX1-NEXT:    vpminsd %xmm3, %xmm1, %xmm2
4537; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
4538; AVX1-NEXT:    vpminsd %xmm3, %xmm1, %xmm1
4539; AVX1-NEXT:    vpminsd %xmm3, %xmm0, %xmm0
4540; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm3 = [4286578688,4286578688,4286578688,4286578688]
4541; AVX1-NEXT:    vpmaxsd %xmm3, %xmm0, %xmm0
4542; AVX1-NEXT:    vpmaxsd %xmm3, %xmm1, %xmm1
4543; AVX1-NEXT:    vpmaxsd %xmm3, %xmm2, %xmm2
4544; AVX1-NEXT:    vpmaxsd %xmm3, %xmm4, %xmm3
4545; AVX1-NEXT:    vpextrd $3, %xmm3, %eax
4546; AVX1-NEXT:    movw %ax, 45(%rdi)
4547; AVX1-NEXT:    shrl $16, %eax
4548; AVX1-NEXT:    movb %al, 47(%rdi)
4549; AVX1-NEXT:    vpextrd $2, %xmm3, %eax
4550; AVX1-NEXT:    movw %ax, 42(%rdi)
4551; AVX1-NEXT:    shrl $16, %eax
4552; AVX1-NEXT:    movb %al, 44(%rdi)
4553; AVX1-NEXT:    vpextrd $1, %xmm3, %eax
4554; AVX1-NEXT:    movw %ax, 39(%rdi)
4555; AVX1-NEXT:    shrl $16, %eax
4556; AVX1-NEXT:    movb %al, 41(%rdi)
4557; AVX1-NEXT:    vmovd %xmm3, %eax
4558; AVX1-NEXT:    movw %ax, 36(%rdi)
4559; AVX1-NEXT:    shrl $16, %eax
4560; AVX1-NEXT:    movb %al, 38(%rdi)
4561; AVX1-NEXT:    vpextrd $3, %xmm2, %eax
4562; AVX1-NEXT:    movw %ax, 33(%rdi)
4563; AVX1-NEXT:    shrl $16, %eax
4564; AVX1-NEXT:    movb %al, 35(%rdi)
4565; AVX1-NEXT:    vpextrd $2, %xmm2, %eax
4566; AVX1-NEXT:    movw %ax, 30(%rdi)
4567; AVX1-NEXT:    shrl $16, %eax
4568; AVX1-NEXT:    movb %al, 32(%rdi)
4569; AVX1-NEXT:    vpextrd $1, %xmm2, %eax
4570; AVX1-NEXT:    movw %ax, 27(%rdi)
4571; AVX1-NEXT:    shrl $16, %eax
4572; AVX1-NEXT:    movb %al, 29(%rdi)
4573; AVX1-NEXT:    vmovd %xmm2, %eax
4574; AVX1-NEXT:    movw %ax, 24(%rdi)
4575; AVX1-NEXT:    shrl $16, %eax
4576; AVX1-NEXT:    movb %al, 26(%rdi)
4577; AVX1-NEXT:    vpextrd $3, %xmm1, %eax
4578; AVX1-NEXT:    movw %ax, 21(%rdi)
4579; AVX1-NEXT:    shrl $16, %eax
4580; AVX1-NEXT:    movb %al, 23(%rdi)
4581; AVX1-NEXT:    vpextrd $2, %xmm1, %eax
4582; AVX1-NEXT:    movw %ax, 18(%rdi)
4583; AVX1-NEXT:    shrl $16, %eax
4584; AVX1-NEXT:    movb %al, 20(%rdi)
4585; AVX1-NEXT:    vpextrd $1, %xmm1, %eax
4586; AVX1-NEXT:    movw %ax, 15(%rdi)
4587; AVX1-NEXT:    shrl $16, %eax
4588; AVX1-NEXT:    movb %al, 17(%rdi)
4589; AVX1-NEXT:    vmovd %xmm1, %eax
4590; AVX1-NEXT:    movw %ax, 12(%rdi)
4591; AVX1-NEXT:    shrl $16, %eax
4592; AVX1-NEXT:    movb %al, 14(%rdi)
4593; AVX1-NEXT:    vpextrd $3, %xmm0, %eax
4594; AVX1-NEXT:    movw %ax, 9(%rdi)
4595; AVX1-NEXT:    shrl $16, %eax
4596; AVX1-NEXT:    movb %al, 11(%rdi)
4597; AVX1-NEXT:    vpextrd $2, %xmm0, %eax
4598; AVX1-NEXT:    movw %ax, 6(%rdi)
4599; AVX1-NEXT:    shrl $16, %eax
4600; AVX1-NEXT:    movb %al, 8(%rdi)
4601; AVX1-NEXT:    vpextrd $1, %xmm0, %eax
4602; AVX1-NEXT:    movw %ax, 3(%rdi)
4603; AVX1-NEXT:    shrl $16, %eax
4604; AVX1-NEXT:    movb %al, 5(%rdi)
4605; AVX1-NEXT:    vmovd %xmm0, %eax
4606; AVX1-NEXT:    movw %ax, (%rdi)
4607; AVX1-NEXT:    shrl $16, %eax
4608; AVX1-NEXT:    movb %al, 2(%rdi)
4609; AVX1-NEXT:    vzeroupper
4610; AVX1-NEXT:    retq
4611;
4612; AVX2-LABEL: trunc_ssat_v16i32_v16i24:
4613; AVX2:       # %bb.0:
4614; AVX2-NEXT:    vpbroadcastd {{.*#+}} ymm2 = [8388607,8388607,8388607,8388607,8388607,8388607,8388607,8388607]
4615; AVX2-NEXT:    vpminsd %ymm2, %ymm1, %ymm1
4616; AVX2-NEXT:    vpminsd %ymm2, %ymm0, %ymm0
4617; AVX2-NEXT:    vpbroadcastd {{.*#+}} ymm2 = [4286578688,4286578688,4286578688,4286578688,4286578688,4286578688,4286578688,4286578688]
4618; AVX2-NEXT:    vpmaxsd %ymm2, %ymm0, %ymm0
4619; AVX2-NEXT:    vpmaxsd %ymm2, %ymm1, %ymm1
4620; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
4621; AVX2-NEXT:    vpextrd $3, %xmm2, %eax
4622; AVX2-NEXT:    movw %ax, 45(%rdi)
4623; AVX2-NEXT:    shrl $16, %eax
4624; AVX2-NEXT:    movb %al, 47(%rdi)
4625; AVX2-NEXT:    vpextrd $2, %xmm2, %eax
4626; AVX2-NEXT:    movw %ax, 42(%rdi)
4627; AVX2-NEXT:    shrl $16, %eax
4628; AVX2-NEXT:    movb %al, 44(%rdi)
4629; AVX2-NEXT:    vpextrd $1, %xmm2, %eax
4630; AVX2-NEXT:    movw %ax, 39(%rdi)
4631; AVX2-NEXT:    shrl $16, %eax
4632; AVX2-NEXT:    movb %al, 41(%rdi)
4633; AVX2-NEXT:    vmovd %xmm2, %eax
4634; AVX2-NEXT:    movw %ax, 36(%rdi)
4635; AVX2-NEXT:    shrl $16, %eax
4636; AVX2-NEXT:    movb %al, 38(%rdi)
4637; AVX2-NEXT:    vpextrd $3, %xmm1, %eax
4638; AVX2-NEXT:    movw %ax, 33(%rdi)
4639; AVX2-NEXT:    shrl $16, %eax
4640; AVX2-NEXT:    movb %al, 35(%rdi)
4641; AVX2-NEXT:    vpextrd $2, %xmm1, %eax
4642; AVX2-NEXT:    movw %ax, 30(%rdi)
4643; AVX2-NEXT:    shrl $16, %eax
4644; AVX2-NEXT:    movb %al, 32(%rdi)
4645; AVX2-NEXT:    vpextrd $1, %xmm1, %eax
4646; AVX2-NEXT:    movw %ax, 27(%rdi)
4647; AVX2-NEXT:    shrl $16, %eax
4648; AVX2-NEXT:    movb %al, 29(%rdi)
4649; AVX2-NEXT:    vmovd %xmm1, %eax
4650; AVX2-NEXT:    movw %ax, 24(%rdi)
4651; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
4652; AVX2-NEXT:    shrl $16, %eax
4653; AVX2-NEXT:    movb %al, 26(%rdi)
4654; AVX2-NEXT:    vpextrd $3, %xmm1, %eax
4655; AVX2-NEXT:    movw %ax, 21(%rdi)
4656; AVX2-NEXT:    shrl $16, %eax
4657; AVX2-NEXT:    movb %al, 23(%rdi)
4658; AVX2-NEXT:    vpextrd $2, %xmm1, %eax
4659; AVX2-NEXT:    movw %ax, 18(%rdi)
4660; AVX2-NEXT:    shrl $16, %eax
4661; AVX2-NEXT:    movb %al, 20(%rdi)
4662; AVX2-NEXT:    vpextrd $1, %xmm1, %eax
4663; AVX2-NEXT:    movw %ax, 15(%rdi)
4664; AVX2-NEXT:    shrl $16, %eax
4665; AVX2-NEXT:    movb %al, 17(%rdi)
4666; AVX2-NEXT:    vmovd %xmm1, %eax
4667; AVX2-NEXT:    movw %ax, 12(%rdi)
4668; AVX2-NEXT:    shrl $16, %eax
4669; AVX2-NEXT:    movb %al, 14(%rdi)
4670; AVX2-NEXT:    vpextrd $3, %xmm0, %eax
4671; AVX2-NEXT:    movw %ax, 9(%rdi)
4672; AVX2-NEXT:    shrl $16, %eax
4673; AVX2-NEXT:    movb %al, 11(%rdi)
4674; AVX2-NEXT:    vpextrd $2, %xmm0, %eax
4675; AVX2-NEXT:    movw %ax, 6(%rdi)
4676; AVX2-NEXT:    shrl $16, %eax
4677; AVX2-NEXT:    movb %al, 8(%rdi)
4678; AVX2-NEXT:    vpextrd $1, %xmm0, %eax
4679; AVX2-NEXT:    movw %ax, 3(%rdi)
4680; AVX2-NEXT:    shrl $16, %eax
4681; AVX2-NEXT:    movb %al, 5(%rdi)
4682; AVX2-NEXT:    vmovd %xmm0, %eax
4683; AVX2-NEXT:    movw %ax, (%rdi)
4684; AVX2-NEXT:    shrl $16, %eax
4685; AVX2-NEXT:    movb %al, 2(%rdi)
4686; AVX2-NEXT:    vzeroupper
4687; AVX2-NEXT:    retq
4688;
4689; AVX512-LABEL: trunc_ssat_v16i32_v16i24:
4690; AVX512:       # %bb.0:
4691; AVX512-NEXT:    pushq %rbp
4692; AVX512-NEXT:    pushq %r15
4693; AVX512-NEXT:    pushq %r14
4694; AVX512-NEXT:    pushq %rbx
4695; AVX512-NEXT:    vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
4696; AVX512-NEXT:    vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
4697; AVX512-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
4698; AVX512-NEXT:    vpextrd $3, %xmm1, %r15d
4699; AVX512-NEXT:    movw %r15w, 45(%rdi)
4700; AVX512-NEXT:    vpextrd $2, %xmm1, %r14d
4701; AVX512-NEXT:    movw %r14w, 42(%rdi)
4702; AVX512-NEXT:    vpextrd $1, %xmm1, %ebp
4703; AVX512-NEXT:    movw %bp, 39(%rdi)
4704; AVX512-NEXT:    vmovd %xmm1, %r11d
4705; AVX512-NEXT:    movw %r11w, 36(%rdi)
4706; AVX512-NEXT:    vextracti32x4 $2, %zmm0, %xmm1
4707; AVX512-NEXT:    vpextrd $3, %xmm1, %ebx
4708; AVX512-NEXT:    movw %bx, 33(%rdi)
4709; AVX512-NEXT:    vpextrd $2, %xmm1, %r10d
4710; AVX512-NEXT:    movw %r10w, 30(%rdi)
4711; AVX512-NEXT:    vpextrd $1, %xmm1, %r9d
4712; AVX512-NEXT:    movw %r9w, 27(%rdi)
4713; AVX512-NEXT:    vmovd %xmm1, %r8d
4714; AVX512-NEXT:    movw %r8w, 24(%rdi)
4715; AVX512-NEXT:    vpextrd $3, %xmm0, %esi
4716; AVX512-NEXT:    movw %si, 9(%rdi)
4717; AVX512-NEXT:    vpextrd $2, %xmm0, %edx
4718; AVX512-NEXT:    movw %dx, 6(%rdi)
4719; AVX512-NEXT:    vpextrd $1, %xmm0, %ecx
4720; AVX512-NEXT:    movw %cx, 3(%rdi)
4721; AVX512-NEXT:    vmovd %xmm0, %eax
4722; AVX512-NEXT:    movw %ax, (%rdi)
4723; AVX512-NEXT:    shrl $16, %r15d
4724; AVX512-NEXT:    movb %r15b, 47(%rdi)
4725; AVX512-NEXT:    shrl $16, %r14d
4726; AVX512-NEXT:    movb %r14b, 44(%rdi)
4727; AVX512-NEXT:    shrl $16, %ebp
4728; AVX512-NEXT:    movb %bpl, 41(%rdi)
4729; AVX512-NEXT:    shrl $16, %r11d
4730; AVX512-NEXT:    movb %r11b, 38(%rdi)
4731; AVX512-NEXT:    shrl $16, %ebx
4732; AVX512-NEXT:    movb %bl, 35(%rdi)
4733; AVX512-NEXT:    shrl $16, %r10d
4734; AVX512-NEXT:    movb %r10b, 32(%rdi)
4735; AVX512-NEXT:    shrl $16, %r9d
4736; AVX512-NEXT:    movb %r9b, 29(%rdi)
4737; AVX512-NEXT:    shrl $16, %r8d
4738; AVX512-NEXT:    movb %r8b, 26(%rdi)
4739; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm0
4740; AVX512-NEXT:    vpextrd $3, %xmm0, %r11d
4741; AVX512-NEXT:    movw %r11w, 21(%rdi)
4742; AVX512-NEXT:    vpextrd $2, %xmm0, %r10d
4743; AVX512-NEXT:    movw %r10w, 18(%rdi)
4744; AVX512-NEXT:    vpextrd $1, %xmm0, %r9d
4745; AVX512-NEXT:    movw %r9w, 15(%rdi)
4746; AVX512-NEXT:    vmovd %xmm0, %r8d
4747; AVX512-NEXT:    movw %r8w, 12(%rdi)
4748; AVX512-NEXT:    shrl $16, %esi
4749; AVX512-NEXT:    movb %sil, 11(%rdi)
4750; AVX512-NEXT:    shrl $16, %edx
4751; AVX512-NEXT:    movb %dl, 8(%rdi)
4752; AVX512-NEXT:    shrl $16, %ecx
4753; AVX512-NEXT:    movb %cl, 5(%rdi)
4754; AVX512-NEXT:    shrl $16, %eax
4755; AVX512-NEXT:    movb %al, 2(%rdi)
4756; AVX512-NEXT:    shrl $16, %r11d
4757; AVX512-NEXT:    movb %r11b, 23(%rdi)
4758; AVX512-NEXT:    shrl $16, %r10d
4759; AVX512-NEXT:    movb %r10b, 20(%rdi)
4760; AVX512-NEXT:    shrl $16, %r9d
4761; AVX512-NEXT:    movb %r9b, 17(%rdi)
4762; AVX512-NEXT:    shrl $16, %r8d
4763; AVX512-NEXT:    movb %r8b, 14(%rdi)
4764; AVX512-NEXT:    popq %rbx
4765; AVX512-NEXT:    popq %r14
4766; AVX512-NEXT:    popq %r15
4767; AVX512-NEXT:    popq %rbp
4768; AVX512-NEXT:    vzeroupper
4769; AVX512-NEXT:    retq
4770;
4771; SKX-LABEL: trunc_ssat_v16i32_v16i24:
4772; SKX:       # %bb.0:
4773; SKX-NEXT:    pushq %rbp
4774; SKX-NEXT:    pushq %r15
4775; SKX-NEXT:    pushq %r14
4776; SKX-NEXT:    pushq %rbx
4777; SKX-NEXT:    vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
4778; SKX-NEXT:    vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
4779; SKX-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
4780; SKX-NEXT:    vpextrd $3, %xmm1, %r15d
4781; SKX-NEXT:    movw %r15w, 45(%rdi)
4782; SKX-NEXT:    vpextrd $2, %xmm1, %r14d
4783; SKX-NEXT:    movw %r14w, 42(%rdi)
4784; SKX-NEXT:    vpextrd $1, %xmm1, %ebp
4785; SKX-NEXT:    movw %bp, 39(%rdi)
4786; SKX-NEXT:    vmovd %xmm1, %r11d
4787; SKX-NEXT:    movw %r11w, 36(%rdi)
4788; SKX-NEXT:    vextracti32x4 $2, %zmm0, %xmm1
4789; SKX-NEXT:    vpextrd $3, %xmm1, %ebx
4790; SKX-NEXT:    movw %bx, 33(%rdi)
4791; SKX-NEXT:    vpextrd $2, %xmm1, %r10d
4792; SKX-NEXT:    movw %r10w, 30(%rdi)
4793; SKX-NEXT:    vpextrd $1, %xmm1, %r9d
4794; SKX-NEXT:    movw %r9w, 27(%rdi)
4795; SKX-NEXT:    vmovd %xmm1, %r8d
4796; SKX-NEXT:    vpextrd $3, %xmm0, %edx
4797; SKX-NEXT:    movw %r8w, 24(%rdi)
4798; SKX-NEXT:    movw %dx, 9(%rdi)
4799; SKX-NEXT:    vpextrd $2, %xmm0, %esi
4800; SKX-NEXT:    vpextrd $1, %xmm0, %eax
4801; SKX-NEXT:    movw %si, 6(%rdi)
4802; SKX-NEXT:    movw %ax, 3(%rdi)
4803; SKX-NEXT:    vmovd %xmm0, %ecx
4804; SKX-NEXT:    movw %cx, (%rdi)
4805; SKX-NEXT:    shrl $16, %r15d
4806; SKX-NEXT:    movb %r15b, 47(%rdi)
4807; SKX-NEXT:    shrl $16, %r14d
4808; SKX-NEXT:    movb %r14b, 44(%rdi)
4809; SKX-NEXT:    shrl $16, %ebp
4810; SKX-NEXT:    movb %bpl, 41(%rdi)
4811; SKX-NEXT:    shrl $16, %r11d
4812; SKX-NEXT:    movb %r11b, 38(%rdi)
4813; SKX-NEXT:    shrl $16, %ebx
4814; SKX-NEXT:    movb %bl, 35(%rdi)
4815; SKX-NEXT:    shrl $16, %r10d
4816; SKX-NEXT:    movb %r10b, 32(%rdi)
4817; SKX-NEXT:    shrl $16, %r9d
4818; SKX-NEXT:    movb %r9b, 29(%rdi)
4819; SKX-NEXT:    shrl $16, %r8d
4820; SKX-NEXT:    movb %r8b, 26(%rdi)
4821; SKX-NEXT:    vextracti128 $1, %ymm0, %xmm0
4822; SKX-NEXT:    vpextrd $3, %xmm0, %r11d
4823; SKX-NEXT:    movw %r11w, 21(%rdi)
4824; SKX-NEXT:    vpextrd $2, %xmm0, %r10d
4825; SKX-NEXT:    movw %r10w, 18(%rdi)
4826; SKX-NEXT:    vpextrd $1, %xmm0, %r9d
4827; SKX-NEXT:    movw %r9w, 15(%rdi)
4828; SKX-NEXT:    vmovd %xmm0, %r8d
4829; SKX-NEXT:    movw %r8w, 12(%rdi)
4830; SKX-NEXT:    shrl $16, %edx
4831; SKX-NEXT:    movb %dl, 11(%rdi)
4832; SKX-NEXT:    shrl $16, %esi
4833; SKX-NEXT:    movb %sil, 8(%rdi)
4834; SKX-NEXT:    shrl $16, %eax
4835; SKX-NEXT:    movb %al, 5(%rdi)
4836; SKX-NEXT:    shrl $16, %ecx
4837; SKX-NEXT:    movb %cl, 2(%rdi)
4838; SKX-NEXT:    shrl $16, %r11d
4839; SKX-NEXT:    movb %r11b, 23(%rdi)
4840; SKX-NEXT:    shrl $16, %r10d
4841; SKX-NEXT:    movb %r10b, 20(%rdi)
4842; SKX-NEXT:    shrl $16, %r9d
4843; SKX-NEXT:    movb %r9b, 17(%rdi)
4844; SKX-NEXT:    shrl $16, %r8d
4845; SKX-NEXT:    movb %r8b, 14(%rdi)
4846; SKX-NEXT:    popq %rbx
4847; SKX-NEXT:    popq %r14
4848; SKX-NEXT:    popq %r15
4849; SKX-NEXT:    popq %rbp
4850; SKX-NEXT:    vzeroupper
4851; SKX-NEXT:    retq
4852  %a = call <16 x i32> @llvm.smin.v16i32(<16 x i32> %x, <16 x i32> <i32 8388607, i32 8388607, i32 8388607, i32 8388607, i32 8388607, i32 8388607, i32 8388607, i32 8388607, i32 8388607, i32 8388607, i32 8388607, i32 8388607, i32 8388607, i32 8388607, i32 8388607, i32 8388607>)
4853  %b = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %a, <16 x i32> <i32 -8388608, i32 -8388608, i32 -8388608, i32 -8388608, i32 -8388608, i32 -8388608, i32 -8388608, i32 -8388608, i32 -8388608, i32 -8388608, i32 -8388608, i32 -8388608, i32 -8388608, i32 -8388608, i32 -8388608, i32 -8388608>)
4854  %c = trunc <16 x i32> %b to <16 x i24>
4855  store <16 x i24> %c, ptr %y
4856  ret void
4857}
4858declare <16 x i32> @llvm.smax.v16i32(<16 x i32>, <16 x i32>)
4859declare <16 x i32> @llvm.smin.v16i32(<16 x i32>, <16 x i32>)
4860