xref: /llvm-project/llvm/test/CodeGen/X86/vector-trunc-nowrap.ll (revision ddd2f57b29661f21308eec0400fa92a6d075b0c6)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2-SSSE3,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSE2-SSSE3,SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST-ALL
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST-PERLANE
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL
11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL
12; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW
13; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW
14; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
15; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
16
17define <8 x i32> @trunc8i64_8i32_nsw(<8 x i64> %a) {
18; SSE-LABEL: trunc8i64_8i32_nsw:
19; SSE:       # %bb.0: # %entry
20; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
21; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
22; SSE-NEXT:    movaps %xmm2, %xmm1
23; SSE-NEXT:    retq
24;
25; AVX1-LABEL: trunc8i64_8i32_nsw:
26; AVX1:       # %bb.0: # %entry
27; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
28; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
29; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
30; AVX1-NEXT:    retq
31;
32; AVX2-SLOW-LABEL: trunc8i64_8i32_nsw:
33; AVX2-SLOW:       # %bb.0: # %entry
34; AVX2-SLOW-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
35; AVX2-SLOW-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
36; AVX2-SLOW-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
37; AVX2-SLOW-NEXT:    retq
38;
39; AVX2-FAST-ALL-LABEL: trunc8i64_8i32_nsw:
40; AVX2-FAST-ALL:       # %bb.0: # %entry
41; AVX2-FAST-ALL-NEXT:    vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
42; AVX2-FAST-ALL-NEXT:    vpermps %ymm0, %ymm2, %ymm0
43; AVX2-FAST-ALL-NEXT:    vpermps %ymm1, %ymm2, %ymm1
44; AVX2-FAST-ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
45; AVX2-FAST-ALL-NEXT:    retq
46;
47; AVX2-FAST-PERLANE-LABEL: trunc8i64_8i32_nsw:
48; AVX2-FAST-PERLANE:       # %bb.0: # %entry
49; AVX2-FAST-PERLANE-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
50; AVX2-FAST-PERLANE-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
51; AVX2-FAST-PERLANE-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
52; AVX2-FAST-PERLANE-NEXT:    retq
53;
54; AVX512-LABEL: trunc8i64_8i32_nsw:
55; AVX512:       # %bb.0: # %entry
56; AVX512-NEXT:    vpmovqd %zmm0, %ymm0
57; AVX512-NEXT:    retq
58entry:
59  %0 = trunc nsw <8 x i64> %a to <8 x i32>
60  ret <8 x i32> %0
61}
62
63define <8 x i32> @trunc8i64_8i32_nuw(<8 x i64> %a) {
64; SSE-LABEL: trunc8i64_8i32_nuw:
65; SSE:       # %bb.0: # %entry
66; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
67; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
68; SSE-NEXT:    movaps %xmm2, %xmm1
69; SSE-NEXT:    retq
70;
71; AVX1-LABEL: trunc8i64_8i32_nuw:
72; AVX1:       # %bb.0: # %entry
73; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
74; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
75; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
76; AVX1-NEXT:    retq
77;
78; AVX2-SLOW-LABEL: trunc8i64_8i32_nuw:
79; AVX2-SLOW:       # %bb.0: # %entry
80; AVX2-SLOW-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
81; AVX2-SLOW-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
82; AVX2-SLOW-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
83; AVX2-SLOW-NEXT:    retq
84;
85; AVX2-FAST-ALL-LABEL: trunc8i64_8i32_nuw:
86; AVX2-FAST-ALL:       # %bb.0: # %entry
87; AVX2-FAST-ALL-NEXT:    vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
88; AVX2-FAST-ALL-NEXT:    vpermps %ymm0, %ymm2, %ymm0
89; AVX2-FAST-ALL-NEXT:    vpermps %ymm1, %ymm2, %ymm1
90; AVX2-FAST-ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
91; AVX2-FAST-ALL-NEXT:    retq
92;
93; AVX2-FAST-PERLANE-LABEL: trunc8i64_8i32_nuw:
94; AVX2-FAST-PERLANE:       # %bb.0: # %entry
95; AVX2-FAST-PERLANE-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
96; AVX2-FAST-PERLANE-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
97; AVX2-FAST-PERLANE-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
98; AVX2-FAST-PERLANE-NEXT:    retq
99;
100; AVX512-LABEL: trunc8i64_8i32_nuw:
101; AVX512:       # %bb.0: # %entry
102; AVX512-NEXT:    vpmovqd %zmm0, %ymm0
103; AVX512-NEXT:    retq
104entry:
105  %0 = trunc nuw <8 x i64> %a to <8 x i32>
106  ret <8 x i32> %0
107}
108
109define <8 x i16> @trunc8i64_8i16_nsw(<8 x i64> %a) {
110; SSE-LABEL: trunc8i64_8i16_nsw:
111; SSE:       # %bb.0: # %entry
112; SSE-NEXT:    packssdw %xmm3, %xmm2
113; SSE-NEXT:    packssdw %xmm1, %xmm0
114; SSE-NEXT:    packssdw %xmm2, %xmm0
115; SSE-NEXT:    retq
116;
117; AVX1-LABEL: trunc8i64_8i16_nsw:
118; AVX1:       # %bb.0: # %entry
119; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
120; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
121; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
122; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
123; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
124; AVX1-NEXT:    vzeroupper
125; AVX1-NEXT:    retq
126;
127; AVX2-LABEL: trunc8i64_8i16_nsw:
128; AVX2:       # %bb.0: # %entry
129; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
130; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
131; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
132; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
133; AVX2-NEXT:    vzeroupper
134; AVX2-NEXT:    retq
135;
136; AVX512-LABEL: trunc8i64_8i16_nsw:
137; AVX512:       # %bb.0: # %entry
138; AVX512-NEXT:    vpmovqw %zmm0, %xmm0
139; AVX512-NEXT:    vzeroupper
140; AVX512-NEXT:    retq
141entry:
142  %0 = trunc nsw <8 x i64> %a to <8 x i16>
143  ret <8 x i16> %0
144}
145
146define <8 x i16> @trunc8i64_8i16_nuw(<8 x i64> %a) {
147; SSE2-SSSE3-LABEL: trunc8i64_8i16_nuw:
148; SSE2-SSSE3:       # %bb.0: # %entry
149; SSE2-SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
150; SSE2-SSSE3-NEXT:    pslld $16, %xmm2
151; SSE2-SSSE3-NEXT:    psrad $16, %xmm2
152; SSE2-SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
153; SSE2-SSSE3-NEXT:    pslld $16, %xmm0
154; SSE2-SSSE3-NEXT:    psrad $16, %xmm0
155; SSE2-SSSE3-NEXT:    packssdw %xmm2, %xmm0
156; SSE2-SSSE3-NEXT:    retq
157;
158; SSE41-LABEL: trunc8i64_8i16_nuw:
159; SSE41:       # %bb.0: # %entry
160; SSE41-NEXT:    packusdw %xmm3, %xmm2
161; SSE41-NEXT:    packusdw %xmm1, %xmm0
162; SSE41-NEXT:    packusdw %xmm2, %xmm0
163; SSE41-NEXT:    retq
164;
165; AVX1-LABEL: trunc8i64_8i16_nuw:
166; AVX1:       # %bb.0: # %entry
167; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
168; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
169; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
170; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
171; AVX1-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
172; AVX1-NEXT:    vzeroupper
173; AVX1-NEXT:    retq
174;
175; AVX2-LABEL: trunc8i64_8i16_nuw:
176; AVX2:       # %bb.0: # %entry
177; AVX2-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0
178; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
179; AVX2-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
180; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
181; AVX2-NEXT:    vzeroupper
182; AVX2-NEXT:    retq
183;
184; AVX512-LABEL: trunc8i64_8i16_nuw:
185; AVX512:       # %bb.0: # %entry
186; AVX512-NEXT:    vpmovqw %zmm0, %xmm0
187; AVX512-NEXT:    vzeroupper
188; AVX512-NEXT:    retq
189entry:
190  %0 = trunc nuw <8 x i64> %a to <8 x i16>
191  ret <8 x i16> %0
192}
193
194define void @trunc8i64_8i8_nsw(<8 x i64> %a) {
195; SSE-LABEL: trunc8i64_8i8_nsw:
196; SSE:       # %bb.0: # %entry
197; SSE-NEXT:    packssdw %xmm3, %xmm2
198; SSE-NEXT:    packssdw %xmm1, %xmm0
199; SSE-NEXT:    packssdw %xmm2, %xmm0
200; SSE-NEXT:    packsswb %xmm0, %xmm0
201; SSE-NEXT:    movq %xmm0, (%rax)
202; SSE-NEXT:    retq
203;
204; AVX1-LABEL: trunc8i64_8i8_nsw:
205; AVX1:       # %bb.0: # %entry
206; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
207; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
208; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
209; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
210; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
211; AVX1-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
212; AVX1-NEXT:    vmovq %xmm0, (%rax)
213; AVX1-NEXT:    vzeroupper
214; AVX1-NEXT:    retq
215;
216; AVX2-LABEL: trunc8i64_8i8_nsw:
217; AVX2:       # %bb.0: # %entry
218; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
219; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
220; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
221; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
222; AVX2-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
223; AVX2-NEXT:    vmovq %xmm0, (%rax)
224; AVX2-NEXT:    vzeroupper
225; AVX2-NEXT:    retq
226;
227; AVX512-LABEL: trunc8i64_8i8_nsw:
228; AVX512:       # %bb.0: # %entry
229; AVX512-NEXT:    vpmovqb %zmm0, (%rax)
230; AVX512-NEXT:    vzeroupper
231; AVX512-NEXT:    retq
232entry:
233  %0 = trunc nsw <8 x i64> %a to <8 x i8>
234  store <8 x i8> %0, ptr undef, align 4
235  ret void
236}
237
238define void @trunc8i64_8i8_nuw(<8 x i64> %a) {
239; SSE2-SSSE3-LABEL: trunc8i64_8i8_nuw:
240; SSE2-SSSE3:       # %bb.0: # %entry
241; SSE2-SSSE3-NEXT:    packuswb %xmm3, %xmm2
242; SSE2-SSSE3-NEXT:    packuswb %xmm1, %xmm0
243; SSE2-SSSE3-NEXT:    packuswb %xmm2, %xmm0
244; SSE2-SSSE3-NEXT:    packuswb %xmm0, %xmm0
245; SSE2-SSSE3-NEXT:    movq %xmm0, (%rax)
246; SSE2-SSSE3-NEXT:    retq
247;
248; SSE41-LABEL: trunc8i64_8i8_nuw:
249; SSE41:       # %bb.0: # %entry
250; SSE41-NEXT:    packusdw %xmm3, %xmm2
251; SSE41-NEXT:    packusdw %xmm1, %xmm0
252; SSE41-NEXT:    packusdw %xmm2, %xmm0
253; SSE41-NEXT:    packuswb %xmm0, %xmm0
254; SSE41-NEXT:    movq %xmm0, (%rax)
255; SSE41-NEXT:    retq
256;
257; AVX1-LABEL: trunc8i64_8i8_nuw:
258; AVX1:       # %bb.0: # %entry
259; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
260; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
261; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
262; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
263; AVX1-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
264; AVX1-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
265; AVX1-NEXT:    vmovq %xmm0, (%rax)
266; AVX1-NEXT:    vzeroupper
267; AVX1-NEXT:    retq
268;
269; AVX2-LABEL: trunc8i64_8i8_nuw:
270; AVX2:       # %bb.0: # %entry
271; AVX2-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0
272; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
273; AVX2-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
274; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
275; AVX2-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
276; AVX2-NEXT:    vmovq %xmm0, (%rax)
277; AVX2-NEXT:    vzeroupper
278; AVX2-NEXT:    retq
279;
280; AVX512-LABEL: trunc8i64_8i8_nuw:
281; AVX512:       # %bb.0: # %entry
282; AVX512-NEXT:    vpmovqb %zmm0, (%rax)
283; AVX512-NEXT:    vzeroupper
284; AVX512-NEXT:    retq
285entry:
286  %0 = trunc nuw <8 x i64> %a to <8 x i8>
287  store <8 x i8> %0, ptr undef, align 4
288  ret void
289}
290
291define <8 x i16> @trunc8i32_8i16_nsw(<8 x i32> %a) {
292; SSE-LABEL: trunc8i32_8i16_nsw:
293; SSE:       # %bb.0: # %entry
294; SSE-NEXT:    packssdw %xmm1, %xmm0
295; SSE-NEXT:    retq
296;
297; AVX1-LABEL: trunc8i32_8i16_nsw:
298; AVX1:       # %bb.0: # %entry
299; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
300; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
301; AVX1-NEXT:    vzeroupper
302; AVX1-NEXT:    retq
303;
304; AVX2-LABEL: trunc8i32_8i16_nsw:
305; AVX2:       # %bb.0: # %entry
306; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
307; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
308; AVX2-NEXT:    vzeroupper
309; AVX2-NEXT:    retq
310;
311; AVX512F-LABEL: trunc8i32_8i16_nsw:
312; AVX512F:       # %bb.0: # %entry
313; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
314; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
315; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
316; AVX512F-NEXT:    vzeroupper
317; AVX512F-NEXT:    retq
318;
319; AVX512VL-LABEL: trunc8i32_8i16_nsw:
320; AVX512VL:       # %bb.0: # %entry
321; AVX512VL-NEXT:    vpmovdw %ymm0, %xmm0
322; AVX512VL-NEXT:    vzeroupper
323; AVX512VL-NEXT:    retq
324;
325; AVX512BW-LABEL: trunc8i32_8i16_nsw:
326; AVX512BW:       # %bb.0: # %entry
327; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
328; AVX512BW-NEXT:    vpmovdw %zmm0, %ymm0
329; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
330; AVX512BW-NEXT:    vzeroupper
331; AVX512BW-NEXT:    retq
332;
333; AVX512BWVL-LABEL: trunc8i32_8i16_nsw:
334; AVX512BWVL:       # %bb.0: # %entry
335; AVX512BWVL-NEXT:    vpmovdw %ymm0, %xmm0
336; AVX512BWVL-NEXT:    vzeroupper
337; AVX512BWVL-NEXT:    retq
338entry:
339  %0 = trunc nsw <8 x i32> %a to <8 x i16>
340  ret <8 x i16> %0
341}
342
343define <8 x i16> @trunc8i32_8i16_nuw(<8 x i32> %a) {
344; SSE2-LABEL: trunc8i32_8i16_nuw:
345; SSE2:       # %bb.0: # %entry
346; SSE2-NEXT:    pslld $16, %xmm1
347; SSE2-NEXT:    psrad $16, %xmm1
348; SSE2-NEXT:    pslld $16, %xmm0
349; SSE2-NEXT:    psrad $16, %xmm0
350; SSE2-NEXT:    packssdw %xmm1, %xmm0
351; SSE2-NEXT:    retq
352;
353; SSSE3-LABEL: trunc8i32_8i16_nuw:
354; SSSE3:       # %bb.0: # %entry
355; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
356; SSSE3-NEXT:    pshufb %xmm2, %xmm1
357; SSSE3-NEXT:    pshufb %xmm2, %xmm0
358; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
359; SSSE3-NEXT:    retq
360;
361; SSE41-LABEL: trunc8i32_8i16_nuw:
362; SSE41:       # %bb.0: # %entry
363; SSE41-NEXT:    packusdw %xmm1, %xmm0
364; SSE41-NEXT:    retq
365;
366; AVX1-LABEL: trunc8i32_8i16_nuw:
367; AVX1:       # %bb.0: # %entry
368; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
369; AVX1-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
370; AVX1-NEXT:    vzeroupper
371; AVX1-NEXT:    retq
372;
373; AVX2-LABEL: trunc8i32_8i16_nuw:
374; AVX2:       # %bb.0: # %entry
375; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
376; AVX2-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
377; AVX2-NEXT:    vzeroupper
378; AVX2-NEXT:    retq
379;
380; AVX512F-LABEL: trunc8i32_8i16_nuw:
381; AVX512F:       # %bb.0: # %entry
382; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
383; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
384; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
385; AVX512F-NEXT:    vzeroupper
386; AVX512F-NEXT:    retq
387;
388; AVX512VL-LABEL: trunc8i32_8i16_nuw:
389; AVX512VL:       # %bb.0: # %entry
390; AVX512VL-NEXT:    vpmovdw %ymm0, %xmm0
391; AVX512VL-NEXT:    vzeroupper
392; AVX512VL-NEXT:    retq
393;
394; AVX512BW-LABEL: trunc8i32_8i16_nuw:
395; AVX512BW:       # %bb.0: # %entry
396; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
397; AVX512BW-NEXT:    vpmovdw %zmm0, %ymm0
398; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
399; AVX512BW-NEXT:    vzeroupper
400; AVX512BW-NEXT:    retq
401;
402; AVX512BWVL-LABEL: trunc8i32_8i16_nuw:
403; AVX512BWVL:       # %bb.0: # %entry
404; AVX512BWVL-NEXT:    vpmovdw %ymm0, %xmm0
405; AVX512BWVL-NEXT:    vzeroupper
406; AVX512BWVL-NEXT:    retq
407entry:
408  %0 = trunc nuw <8 x i32> %a to <8 x i16>
409  ret <8 x i16> %0
410}
411
412define void @trunc8i32_8i8_nsw(<8 x i32> %a) {
413; SSE-LABEL: trunc8i32_8i8_nsw:
414; SSE:       # %bb.0: # %entry
415; SSE-NEXT:    packssdw %xmm1, %xmm0
416; SSE-NEXT:    packsswb %xmm0, %xmm0
417; SSE-NEXT:    movq %xmm0, (%rax)
418; SSE-NEXT:    retq
419;
420; AVX1-LABEL: trunc8i32_8i8_nsw:
421; AVX1:       # %bb.0: # %entry
422; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
423; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
424; AVX1-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
425; AVX1-NEXT:    vmovq %xmm0, (%rax)
426; AVX1-NEXT:    vzeroupper
427; AVX1-NEXT:    retq
428;
429; AVX2-LABEL: trunc8i32_8i8_nsw:
430; AVX2:       # %bb.0: # %entry
431; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
432; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
433; AVX2-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
434; AVX2-NEXT:    vmovq %xmm0, (%rax)
435; AVX2-NEXT:    vzeroupper
436; AVX2-NEXT:    retq
437;
438; AVX512F-LABEL: trunc8i32_8i8_nsw:
439; AVX512F:       # %bb.0: # %entry
440; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
441; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
442; AVX512F-NEXT:    vmovq %xmm0, (%rax)
443; AVX512F-NEXT:    vzeroupper
444; AVX512F-NEXT:    retq
445;
446; AVX512VL-LABEL: trunc8i32_8i8_nsw:
447; AVX512VL:       # %bb.0: # %entry
448; AVX512VL-NEXT:    vpmovdb %ymm0, (%rax)
449; AVX512VL-NEXT:    vzeroupper
450; AVX512VL-NEXT:    retq
451;
452; AVX512BW-LABEL: trunc8i32_8i8_nsw:
453; AVX512BW:       # %bb.0: # %entry
454; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
455; AVX512BW-NEXT:    vpmovdb %zmm0, %xmm0
456; AVX512BW-NEXT:    vmovq %xmm0, (%rax)
457; AVX512BW-NEXT:    vzeroupper
458; AVX512BW-NEXT:    retq
459;
460; AVX512BWVL-LABEL: trunc8i32_8i8_nsw:
461; AVX512BWVL:       # %bb.0: # %entry
462; AVX512BWVL-NEXT:    vpmovdb %ymm0, (%rax)
463; AVX512BWVL-NEXT:    vzeroupper
464; AVX512BWVL-NEXT:    retq
465entry:
466  %0 = trunc nsw <8 x i32> %a to <8 x i8>
467  store <8 x i8> %0, ptr undef, align 4
468  ret void
469}
470
471define void @trunc8i32_8i8_nuw(<8 x i32> %a) {
472; SSE2-SSSE3-LABEL: trunc8i32_8i8_nuw:
473; SSE2-SSSE3:       # %bb.0: # %entry
474; SSE2-SSSE3-NEXT:    packuswb %xmm1, %xmm0
475; SSE2-SSSE3-NEXT:    packuswb %xmm0, %xmm0
476; SSE2-SSSE3-NEXT:    movq %xmm0, (%rax)
477; SSE2-SSSE3-NEXT:    retq
478;
479; SSE41-LABEL: trunc8i32_8i8_nuw:
480; SSE41:       # %bb.0: # %entry
481; SSE41-NEXT:    packusdw %xmm1, %xmm0
482; SSE41-NEXT:    packuswb %xmm0, %xmm0
483; SSE41-NEXT:    movq %xmm0, (%rax)
484; SSE41-NEXT:    retq
485;
486; AVX1-LABEL: trunc8i32_8i8_nuw:
487; AVX1:       # %bb.0: # %entry
488; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
489; AVX1-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
490; AVX1-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
491; AVX1-NEXT:    vmovq %xmm0, (%rax)
492; AVX1-NEXT:    vzeroupper
493; AVX1-NEXT:    retq
494;
495; AVX2-LABEL: trunc8i32_8i8_nuw:
496; AVX2:       # %bb.0: # %entry
497; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
498; AVX2-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
499; AVX2-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
500; AVX2-NEXT:    vmovq %xmm0, (%rax)
501; AVX2-NEXT:    vzeroupper
502; AVX2-NEXT:    retq
503;
504; AVX512F-LABEL: trunc8i32_8i8_nuw:
505; AVX512F:       # %bb.0: # %entry
506; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
507; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
508; AVX512F-NEXT:    vmovq %xmm0, (%rax)
509; AVX512F-NEXT:    vzeroupper
510; AVX512F-NEXT:    retq
511;
512; AVX512VL-LABEL: trunc8i32_8i8_nuw:
513; AVX512VL:       # %bb.0: # %entry
514; AVX512VL-NEXT:    vpmovdb %ymm0, (%rax)
515; AVX512VL-NEXT:    vzeroupper
516; AVX512VL-NEXT:    retq
517;
518; AVX512BW-LABEL: trunc8i32_8i8_nuw:
519; AVX512BW:       # %bb.0: # %entry
520; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
521; AVX512BW-NEXT:    vpmovdb %zmm0, %xmm0
522; AVX512BW-NEXT:    vmovq %xmm0, (%rax)
523; AVX512BW-NEXT:    vzeroupper
524; AVX512BW-NEXT:    retq
525;
526; AVX512BWVL-LABEL: trunc8i32_8i8_nuw:
527; AVX512BWVL:       # %bb.0: # %entry
528; AVX512BWVL-NEXT:    vpmovdb %ymm0, (%rax)
529; AVX512BWVL-NEXT:    vzeroupper
530; AVX512BWVL-NEXT:    retq
531entry:
532  %0 = trunc nuw <8 x i32> %a to <8 x i8>
533  store <8 x i8> %0, ptr undef, align 4
534  ret void
535}
536
537define void @trunc16i32_16i16_nsw(<16 x i32> %a) {
538; SSE-LABEL: trunc16i32_16i16_nsw:
539; SSE:       # %bb.0: # %entry
540; SSE-NEXT:    packssdw %xmm1, %xmm0
541; SSE-NEXT:    packssdw %xmm3, %xmm2
542; SSE-NEXT:    movdqu %xmm2, (%rax)
543; SSE-NEXT:    movdqu %xmm0, (%rax)
544; SSE-NEXT:    retq
545;
546; AVX1-LABEL: trunc16i32_16i16_nsw:
547; AVX1:       # %bb.0: # %entry
548; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
549; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
550; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
551; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
552; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
553; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
554; AVX1-NEXT:    vzeroupper
555; AVX1-NEXT:    retq
556;
557; AVX2-LABEL: trunc16i32_16i16_nsw:
558; AVX2:       # %bb.0: # %entry
559; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
560; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
561; AVX2-NEXT:    vmovdqu %ymm0, (%rax)
562; AVX2-NEXT:    vzeroupper
563; AVX2-NEXT:    retq
564;
565; AVX512-LABEL: trunc16i32_16i16_nsw:
566; AVX512:       # %bb.0: # %entry
567; AVX512-NEXT:    vpmovdw %zmm0, (%rax)
568; AVX512-NEXT:    vzeroupper
569; AVX512-NEXT:    retq
570entry:
571  %0 = trunc nsw <16 x i32> %a to <16 x i16>
572  store <16 x i16> %0, ptr undef, align 4
573  ret void
574}
575
576define void @trunc16i32_16i16_nuw(<16 x i32> %a) {
577; SSE2-LABEL: trunc16i32_16i16_nuw:
578; SSE2:       # %bb.0: # %entry
579; SSE2-NEXT:    pslld $16, %xmm1
580; SSE2-NEXT:    psrad $16, %xmm1
581; SSE2-NEXT:    pslld $16, %xmm0
582; SSE2-NEXT:    psrad $16, %xmm0
583; SSE2-NEXT:    packssdw %xmm1, %xmm0
584; SSE2-NEXT:    pslld $16, %xmm3
585; SSE2-NEXT:    psrad $16, %xmm3
586; SSE2-NEXT:    pslld $16, %xmm2
587; SSE2-NEXT:    psrad $16, %xmm2
588; SSE2-NEXT:    packssdw %xmm3, %xmm2
589; SSE2-NEXT:    movdqu %xmm2, (%rax)
590; SSE2-NEXT:    movdqu %xmm0, (%rax)
591; SSE2-NEXT:    retq
592;
593; SSSE3-LABEL: trunc16i32_16i16_nuw:
594; SSSE3:       # %bb.0: # %entry
595; SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
596; SSSE3-NEXT:    pshufb %xmm4, %xmm1
597; SSSE3-NEXT:    pshufb %xmm4, %xmm0
598; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
599; SSSE3-NEXT:    pshufb %xmm4, %xmm3
600; SSSE3-NEXT:    pshufb %xmm4, %xmm2
601; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
602; SSSE3-NEXT:    movdqu %xmm2, (%rax)
603; SSSE3-NEXT:    movdqu %xmm0, (%rax)
604; SSSE3-NEXT:    retq
605;
606; SSE41-LABEL: trunc16i32_16i16_nuw:
607; SSE41:       # %bb.0: # %entry
608; SSE41-NEXT:    packusdw %xmm1, %xmm0
609; SSE41-NEXT:    packusdw %xmm3, %xmm2
610; SSE41-NEXT:    movdqu %xmm2, (%rax)
611; SSE41-NEXT:    movdqu %xmm0, (%rax)
612; SSE41-NEXT:    retq
613;
614; AVX1-LABEL: trunc16i32_16i16_nuw:
615; AVX1:       # %bb.0: # %entry
616; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
617; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
618; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
619; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
620; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
621; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
622; AVX1-NEXT:    vzeroupper
623; AVX1-NEXT:    retq
624;
625; AVX2-LABEL: trunc16i32_16i16_nuw:
626; AVX2:       # %bb.0: # %entry
627; AVX2-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0
628; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
629; AVX2-NEXT:    vmovdqu %ymm0, (%rax)
630; AVX2-NEXT:    vzeroupper
631; AVX2-NEXT:    retq
632;
633; AVX512-LABEL: trunc16i32_16i16_nuw:
634; AVX512:       # %bb.0: # %entry
635; AVX512-NEXT:    vpmovdw %zmm0, (%rax)
636; AVX512-NEXT:    vzeroupper
637; AVX512-NEXT:    retq
638entry:
639  %0 = trunc nuw <16 x i32> %a to <16 x i16>
640  store <16 x i16> %0, ptr undef, align 4
641  ret void
642}
643
644define void @trunc16i32_16i8_nsw(<16 x i32> %a) {
645; SSE2-SSSE3-LABEL: trunc16i32_16i8_nsw:
646; SSE2-SSSE3:       # %bb.0: # %entry
647; SSE2-SSSE3-NEXT:    packuswb %xmm3, %xmm2
648; SSE2-SSSE3-NEXT:    packuswb %xmm1, %xmm0
649; SSE2-SSSE3-NEXT:    packuswb %xmm2, %xmm0
650; SSE2-SSSE3-NEXT:    movdqu %xmm0, (%rax)
651; SSE2-SSSE3-NEXT:    retq
652;
653; SSE41-LABEL: trunc16i32_16i8_nsw:
654; SSE41:       # %bb.0: # %entry
655; SSE41-NEXT:    packusdw %xmm3, %xmm2
656; SSE41-NEXT:    packusdw %xmm1, %xmm0
657; SSE41-NEXT:    packuswb %xmm2, %xmm0
658; SSE41-NEXT:    movdqu %xmm0, (%rax)
659; SSE41-NEXT:    retq
660;
661; AVX1-LABEL: trunc16i32_16i8_nsw:
662; AVX1:       # %bb.0: # %entry
663; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
664; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
665; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
666; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
667; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
668; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
669; AVX1-NEXT:    vzeroupper
670; AVX1-NEXT:    retq
671;
672; AVX2-LABEL: trunc16i32_16i8_nsw:
673; AVX2:       # %bb.0: # %entry
674; AVX2-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0
675; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
676; AVX2-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
677; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
678; AVX2-NEXT:    vmovdqu %xmm0, (%rax)
679; AVX2-NEXT:    vzeroupper
680; AVX2-NEXT:    retq
681;
682; AVX512-LABEL: trunc16i32_16i8_nsw:
683; AVX512:       # %bb.0: # %entry
684; AVX512-NEXT:    vpmovdb %zmm0, (%rax)
685; AVX512-NEXT:    vzeroupper
686; AVX512-NEXT:    retq
687entry:
688  %0 = trunc nuw <16 x i32> %a to <16 x i8>
689  store <16 x i8> %0, ptr undef, align 4
690  ret void
691}
692
693define void @trunc16i32_16i8_nuw(<16 x i32> %a) {
694; SSE2-SSSE3-LABEL: trunc16i32_16i8_nuw:
695; SSE2-SSSE3:       # %bb.0: # %entry
696; SSE2-SSSE3-NEXT:    packuswb %xmm3, %xmm2
697; SSE2-SSSE3-NEXT:    packuswb %xmm1, %xmm0
698; SSE2-SSSE3-NEXT:    packuswb %xmm2, %xmm0
699; SSE2-SSSE3-NEXT:    movdqu %xmm0, (%rax)
700; SSE2-SSSE3-NEXT:    retq
701;
702; SSE41-LABEL: trunc16i32_16i8_nuw:
703; SSE41:       # %bb.0: # %entry
704; SSE41-NEXT:    packusdw %xmm3, %xmm2
705; SSE41-NEXT:    packusdw %xmm1, %xmm0
706; SSE41-NEXT:    packuswb %xmm2, %xmm0
707; SSE41-NEXT:    movdqu %xmm0, (%rax)
708; SSE41-NEXT:    retq
709;
710; AVX1-LABEL: trunc16i32_16i8_nuw:
711; AVX1:       # %bb.0: # %entry
712; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
713; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
714; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
715; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
716; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
717; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
718; AVX1-NEXT:    vzeroupper
719; AVX1-NEXT:    retq
720;
721; AVX2-LABEL: trunc16i32_16i8_nuw:
722; AVX2:       # %bb.0: # %entry
723; AVX2-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0
724; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
725; AVX2-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
726; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
727; AVX2-NEXT:    vmovdqu %xmm0, (%rax)
728; AVX2-NEXT:    vzeroupper
729; AVX2-NEXT:    retq
730;
731; AVX512-LABEL: trunc16i32_16i8_nuw:
732; AVX512:       # %bb.0: # %entry
733; AVX512-NEXT:    vpmovdb %zmm0, (%rax)
734; AVX512-NEXT:    vzeroupper
735; AVX512-NEXT:    retq
736entry:
737  %0 = trunc nuw <16 x i32> %a to <16 x i8>
738  store <16 x i8> %0, ptr undef, align 4
739  ret void
740}
741
742define void @trunc16i16_16i8_nsw(<16 x i16> %a) {
743; SSE-LABEL: trunc16i16_16i8_nsw:
744; SSE:       # %bb.0: # %entry
745; SSE-NEXT:    packsswb %xmm1, %xmm0
746; SSE-NEXT:    movdqu %xmm0, (%rax)
747; SSE-NEXT:    retq
748;
749; AVX1-LABEL: trunc16i16_16i8_nsw:
750; AVX1:       # %bb.0: # %entry
751; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
752; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
753; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
754; AVX1-NEXT:    vzeroupper
755; AVX1-NEXT:    retq
756;
757; AVX2-LABEL: trunc16i16_16i8_nsw:
758; AVX2:       # %bb.0: # %entry
759; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
760; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
761; AVX2-NEXT:    vmovdqu %xmm0, (%rax)
762; AVX2-NEXT:    vzeroupper
763; AVX2-NEXT:    retq
764;
765; AVX512F-LABEL: trunc16i16_16i8_nsw:
766; AVX512F:       # %bb.0: # %entry
767; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
768; AVX512F-NEXT:    vpmovdb %zmm0, (%rax)
769; AVX512F-NEXT:    vzeroupper
770; AVX512F-NEXT:    retq
771;
772; AVX512VL-LABEL: trunc16i16_16i8_nsw:
773; AVX512VL:       # %bb.0: # %entry
774; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
775; AVX512VL-NEXT:    vpmovdb %zmm0, (%rax)
776; AVX512VL-NEXT:    vzeroupper
777; AVX512VL-NEXT:    retq
778;
779; AVX512BW-LABEL: trunc16i16_16i8_nsw:
780; AVX512BW:       # %bb.0: # %entry
781; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
782; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
783; AVX512BW-NEXT:    vmovdqu %xmm0, (%rax)
784; AVX512BW-NEXT:    vzeroupper
785; AVX512BW-NEXT:    retq
786;
787; AVX512BWVL-LABEL: trunc16i16_16i8_nsw:
788; AVX512BWVL:       # %bb.0: # %entry
789; AVX512BWVL-NEXT:    vpmovwb %ymm0, (%rax)
790; AVX512BWVL-NEXT:    vzeroupper
791; AVX512BWVL-NEXT:    retq
792entry:
793  %0 = trunc nsw <16 x i16> %a to <16 x i8>
794  store <16 x i8> %0, ptr undef, align 4
795  ret void
796}
797
798define void @trunc16i16_16i8_nuw(<16 x i16> %a) {
799; SSE-LABEL: trunc16i16_16i8_nuw:
800; SSE:       # %bb.0: # %entry
801; SSE-NEXT:    packuswb %xmm1, %xmm0
802; SSE-NEXT:    movdqu %xmm0, (%rax)
803; SSE-NEXT:    retq
804;
805; AVX1-LABEL: trunc16i16_16i8_nuw:
806; AVX1:       # %bb.0: # %entry
807; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
808; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
809; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
810; AVX1-NEXT:    vzeroupper
811; AVX1-NEXT:    retq
812;
813; AVX2-LABEL: trunc16i16_16i8_nuw:
814; AVX2:       # %bb.0: # %entry
815; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
816; AVX2-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
817; AVX2-NEXT:    vmovdqu %xmm0, (%rax)
818; AVX2-NEXT:    vzeroupper
819; AVX2-NEXT:    retq
820;
821; AVX512F-LABEL: trunc16i16_16i8_nuw:
822; AVX512F:       # %bb.0: # %entry
823; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
824; AVX512F-NEXT:    vpmovdb %zmm0, (%rax)
825; AVX512F-NEXT:    vzeroupper
826; AVX512F-NEXT:    retq
827;
828; AVX512VL-LABEL: trunc16i16_16i8_nuw:
829; AVX512VL:       # %bb.0: # %entry
830; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
831; AVX512VL-NEXT:    vpmovdb %zmm0, (%rax)
832; AVX512VL-NEXT:    vzeroupper
833; AVX512VL-NEXT:    retq
834;
835; AVX512BW-LABEL: trunc16i16_16i8_nuw:
836; AVX512BW:       # %bb.0: # %entry
837; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
838; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
839; AVX512BW-NEXT:    vmovdqu %xmm0, (%rax)
840; AVX512BW-NEXT:    vzeroupper
841; AVX512BW-NEXT:    retq
842;
843; AVX512BWVL-LABEL: trunc16i16_16i8_nuw:
844; AVX512BWVL:       # %bb.0: # %entry
845; AVX512BWVL-NEXT:    vpmovwb %ymm0, (%rax)
846; AVX512BWVL-NEXT:    vzeroupper
847; AVX512BWVL-NEXT:    retq
848entry:
849  %0 = trunc nuw <16 x i16> %a to <16 x i8>
850  store <16 x i8> %0, ptr undef, align 4
851  ret void
852}
853
854define void @trunc32i16_32i8_nsw(<32 x i16> %a) {
855; SSE-LABEL: trunc32i16_32i8_nsw:
856; SSE:       # %bb.0: # %entry
857; SSE-NEXT:    packsswb %xmm1, %xmm0
858; SSE-NEXT:    packsswb %xmm3, %xmm2
859; SSE-NEXT:    movdqu %xmm2, (%rax)
860; SSE-NEXT:    movdqu %xmm0, (%rax)
861; SSE-NEXT:    retq
862;
863; AVX1-LABEL: trunc32i16_32i8_nsw:
864; AVX1:       # %bb.0: # %entry
865; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
866; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
867; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
868; AVX1-NEXT:    vpacksswb %xmm2, %xmm1, %xmm1
869; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
870; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
871; AVX1-NEXT:    vzeroupper
872; AVX1-NEXT:    retq
873;
874; AVX2-LABEL: trunc32i16_32i8_nsw:
875; AVX2:       # %bb.0: # %entry
876; AVX2-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0
877; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
878; AVX2-NEXT:    vmovdqu %ymm0, (%rax)
879; AVX2-NEXT:    vzeroupper
880; AVX2-NEXT:    retq
881;
882; AVX512F-LABEL: trunc32i16_32i8_nsw:
883; AVX512F:       # %bb.0: # %entry
884; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
885; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
886; AVX512F-NEXT:    vpmovdb %zmm1, (%rax)
887; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
888; AVX512F-NEXT:    vpmovdb %zmm0, (%rax)
889; AVX512F-NEXT:    vzeroupper
890; AVX512F-NEXT:    retq
891;
892; AVX512VL-LABEL: trunc32i16_32i8_nsw:
893; AVX512VL:       # %bb.0: # %entry
894; AVX512VL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
895; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
896; AVX512VL-NEXT:    vpmovdb %zmm1, (%rax)
897; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
898; AVX512VL-NEXT:    vpmovdb %zmm0, (%rax)
899; AVX512VL-NEXT:    vzeroupper
900; AVX512VL-NEXT:    retq
901;
902; AVX512BW-LABEL: trunc32i16_32i8_nsw:
903; AVX512BW:       # %bb.0: # %entry
904; AVX512BW-NEXT:    vpmovwb %zmm0, (%rax)
905; AVX512BW-NEXT:    vzeroupper
906; AVX512BW-NEXT:    retq
907;
908; AVX512BWVL-LABEL: trunc32i16_32i8_nsw:
909; AVX512BWVL:       # %bb.0: # %entry
910; AVX512BWVL-NEXT:    vpmovwb %zmm0, (%rax)
911; AVX512BWVL-NEXT:    vzeroupper
912; AVX512BWVL-NEXT:    retq
913entry:
914  %0 = trunc nsw <32 x i16> %a to <32 x i8>
915  store <32 x i8> %0, ptr undef, align 4
916  ret void
917}
918
919define void @trunc32i16_32i8_nuw(<32 x i16> %a) {
920; SSE-LABEL: trunc32i16_32i8_nuw:
921; SSE:       # %bb.0: # %entry
922; SSE-NEXT:    packsswb %xmm1, %xmm0
923; SSE-NEXT:    packsswb %xmm3, %xmm2
924; SSE-NEXT:    movdqu %xmm2, (%rax)
925; SSE-NEXT:    movdqu %xmm0, (%rax)
926; SSE-NEXT:    retq
927;
928; AVX1-LABEL: trunc32i16_32i8_nuw:
929; AVX1:       # %bb.0: # %entry
930; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
931; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
932; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
933; AVX1-NEXT:    vpacksswb %xmm2, %xmm1, %xmm1
934; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
935; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
936; AVX1-NEXT:    vzeroupper
937; AVX1-NEXT:    retq
938;
939; AVX2-LABEL: trunc32i16_32i8_nuw:
940; AVX2:       # %bb.0: # %entry
941; AVX2-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0
942; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
943; AVX2-NEXT:    vmovdqu %ymm0, (%rax)
944; AVX2-NEXT:    vzeroupper
945; AVX2-NEXT:    retq
946;
947; AVX512F-LABEL: trunc32i16_32i8_nuw:
948; AVX512F:       # %bb.0: # %entry
949; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
950; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
951; AVX512F-NEXT:    vpmovdb %zmm1, (%rax)
952; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
953; AVX512F-NEXT:    vpmovdb %zmm0, (%rax)
954; AVX512F-NEXT:    vzeroupper
955; AVX512F-NEXT:    retq
956;
957; AVX512VL-LABEL: trunc32i16_32i8_nuw:
958; AVX512VL:       # %bb.0: # %entry
959; AVX512VL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
960; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
961; AVX512VL-NEXT:    vpmovdb %zmm1, (%rax)
962; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
963; AVX512VL-NEXT:    vpmovdb %zmm0, (%rax)
964; AVX512VL-NEXT:    vzeroupper
965; AVX512VL-NEXT:    retq
966;
967; AVX512BW-LABEL: trunc32i16_32i8_nuw:
968; AVX512BW:       # %bb.0: # %entry
969; AVX512BW-NEXT:    vpmovwb %zmm0, (%rax)
970; AVX512BW-NEXT:    vzeroupper
971; AVX512BW-NEXT:    retq
972;
973; AVX512BWVL-LABEL: trunc32i16_32i8_nuw:
974; AVX512BWVL:       # %bb.0: # %entry
975; AVX512BWVL-NEXT:    vpmovwb %zmm0, (%rax)
976; AVX512BWVL-NEXT:    vzeroupper
977; AVX512BWVL-NEXT:    retq
978entry:
979  %0 = trunc nsw <32 x i16> %a to <32 x i8>
980  store <32 x i8> %0, ptr undef, align 4
981  ret void
982}
983
984define <8 x i32> @trunc2x4i64_8i32_nsw(<4 x i64> %a, <4 x i64> %b) {
985; SSE-LABEL: trunc2x4i64_8i32_nsw:
986; SSE:       # %bb.0: # %entry
987; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
988; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
989; SSE-NEXT:    movaps %xmm2, %xmm1
990; SSE-NEXT:    retq
991;
992; AVX1-LABEL: trunc2x4i64_8i32_nsw:
993; AVX1:       # %bb.0: # %entry
994; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
995; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
996; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
997; AVX1-NEXT:    retq
998;
999; AVX2-SLOW-LABEL: trunc2x4i64_8i32_nsw:
1000; AVX2-SLOW:       # %bb.0: # %entry
1001; AVX2-SLOW-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
1002; AVX2-SLOW-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1003; AVX2-SLOW-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
1004; AVX2-SLOW-NEXT:    retq
1005;
1006; AVX2-FAST-ALL-LABEL: trunc2x4i64_8i32_nsw:
1007; AVX2-FAST-ALL:       # %bb.0: # %entry
1008; AVX2-FAST-ALL-NEXT:    vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
1009; AVX2-FAST-ALL-NEXT:    vpermps %ymm0, %ymm2, %ymm0
1010; AVX2-FAST-ALL-NEXT:    vpermps %ymm1, %ymm2, %ymm1
1011; AVX2-FAST-ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1012; AVX2-FAST-ALL-NEXT:    retq
1013;
1014; AVX2-FAST-PERLANE-LABEL: trunc2x4i64_8i32_nsw:
1015; AVX2-FAST-PERLANE:       # %bb.0: # %entry
1016; AVX2-FAST-PERLANE-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
1017; AVX2-FAST-PERLANE-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1018; AVX2-FAST-PERLANE-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
1019; AVX2-FAST-PERLANE-NEXT:    retq
1020;
1021; AVX512-LABEL: trunc2x4i64_8i32_nsw:
1022; AVX512:       # %bb.0: # %entry
1023; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1024; AVX512-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1025; AVX512-NEXT:    vpmovqd %zmm0, %ymm0
1026; AVX512-NEXT:    retq
1027entry:
1028  %0 = trunc nsw <4 x i64> %a to <4 x i32>
1029  %1 = trunc nsw <4 x i64> %b to <4 x i32>
1030  %2 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1031  ret <8 x i32> %2
1032}
1033
1034define <8 x i32> @trunc2x4i64_8i32_nuw(<4 x i64> %a, <4 x i64> %b) {
1035; SSE-LABEL: trunc2x4i64_8i32_nuw:
1036; SSE:       # %bb.0: # %entry
1037; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
1038; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
1039; SSE-NEXT:    movaps %xmm2, %xmm1
1040; SSE-NEXT:    retq
1041;
1042; AVX1-LABEL: trunc2x4i64_8i32_nuw:
1043; AVX1:       # %bb.0: # %entry
1044; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
1045; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1046; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
1047; AVX1-NEXT:    retq
1048;
1049; AVX2-SLOW-LABEL: trunc2x4i64_8i32_nuw:
1050; AVX2-SLOW:       # %bb.0: # %entry
1051; AVX2-SLOW-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
1052; AVX2-SLOW-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1053; AVX2-SLOW-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
1054; AVX2-SLOW-NEXT:    retq
1055;
1056; AVX2-FAST-ALL-LABEL: trunc2x4i64_8i32_nuw:
1057; AVX2-FAST-ALL:       # %bb.0: # %entry
1058; AVX2-FAST-ALL-NEXT:    vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
1059; AVX2-FAST-ALL-NEXT:    vpermps %ymm0, %ymm2, %ymm0
1060; AVX2-FAST-ALL-NEXT:    vpermps %ymm1, %ymm2, %ymm1
1061; AVX2-FAST-ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1062; AVX2-FAST-ALL-NEXT:    retq
1063;
1064; AVX2-FAST-PERLANE-LABEL: trunc2x4i64_8i32_nuw:
1065; AVX2-FAST-PERLANE:       # %bb.0: # %entry
1066; AVX2-FAST-PERLANE-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
1067; AVX2-FAST-PERLANE-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1068; AVX2-FAST-PERLANE-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
1069; AVX2-FAST-PERLANE-NEXT:    retq
1070;
1071; AVX512-LABEL: trunc2x4i64_8i32_nuw:
1072; AVX512:       # %bb.0: # %entry
1073; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1074; AVX512-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1075; AVX512-NEXT:    vpmovqd %zmm0, %ymm0
1076; AVX512-NEXT:    retq
1077entry:
1078  %0 = trunc nuw <4 x i64> %a to <4 x i32>
1079  %1 = trunc nuw <4 x i64> %b to <4 x i32>
1080  %2 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1081  ret <8 x i32> %2
1082}
1083
1084define <8 x i16> @trunc2x4i64_8i16_nsw(<4 x i64> %a, <4 x i64> %b) {
1085; SSE-LABEL: trunc2x4i64_8i16_nsw:
1086; SSE:       # %bb.0: # %entry
1087; SSE-NEXT:    packssdw %xmm1, %xmm0
1088; SSE-NEXT:    packssdw %xmm3, %xmm2
1089; SSE-NEXT:    packssdw %xmm2, %xmm0
1090; SSE-NEXT:    retq
1091;
1092; AVX1-LABEL: trunc2x4i64_8i16_nsw:
1093; AVX1:       # %bb.0: # %entry
1094; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1095; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
1096; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1097; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
1098; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
1099; AVX1-NEXT:    vzeroupper
1100; AVX1-NEXT:    retq
1101;
1102; AVX2-LABEL: trunc2x4i64_8i16_nsw:
1103; AVX2:       # %bb.0: # %entry
1104; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
1105; AVX2-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
1106; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
1107; AVX2-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
1108; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
1109; AVX2-NEXT:    vzeroupper
1110; AVX2-NEXT:    retq
1111;
1112; AVX512F-LABEL: trunc2x4i64_8i16_nsw:
1113; AVX512F:       # %bb.0: # %entry
1114; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
1115; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1116; AVX512F-NEXT:    vpmovqw %zmm0, %xmm0
1117; AVX512F-NEXT:    vpmovqw %zmm1, %xmm1
1118; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1119; AVX512F-NEXT:    vzeroupper
1120; AVX512F-NEXT:    retq
1121;
1122; AVX512VL-LABEL: trunc2x4i64_8i16_nsw:
1123; AVX512VL:       # %bb.0: # %entry
1124; AVX512VL-NEXT:    vpmovqw %ymm0, %xmm0
1125; AVX512VL-NEXT:    vpmovqw %ymm1, %xmm1
1126; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1127; AVX512VL-NEXT:    vzeroupper
1128; AVX512VL-NEXT:    retq
1129;
1130; AVX512BW-LABEL: trunc2x4i64_8i16_nsw:
1131; AVX512BW:       # %bb.0: # %entry
1132; AVX512BW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
1133; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1134; AVX512BW-NEXT:    vpmovqw %zmm0, %xmm0
1135; AVX512BW-NEXT:    vpmovqw %zmm1, %xmm1
1136; AVX512BW-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1137; AVX512BW-NEXT:    vzeroupper
1138; AVX512BW-NEXT:    retq
1139;
1140; AVX512BWVL-LABEL: trunc2x4i64_8i16_nsw:
1141; AVX512BWVL:       # %bb.0: # %entry
1142; AVX512BWVL-NEXT:    vpmovqw %ymm0, %xmm0
1143; AVX512BWVL-NEXT:    vpmovqw %ymm1, %xmm1
1144; AVX512BWVL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1145; AVX512BWVL-NEXT:    vzeroupper
1146; AVX512BWVL-NEXT:    retq
1147entry:
1148  %0 = trunc nsw <4 x i64> %a to <4 x i16>
1149  %1 = trunc nsw <4 x i64> %b to <4 x i16>
1150  %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1151  ret <8 x i16> %2
1152}
1153
1154define <8 x i16> @trunc2x4i64_8i16_nuw(<4 x i64> %a, <4 x i64> %b) {
1155; SSE2-SSSE3-LABEL: trunc2x4i64_8i16_nuw:
1156; SSE2-SSSE3:       # %bb.0: # %entry
1157; SSE2-SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
1158; SSE2-SSSE3-NEXT:    pslld $16, %xmm0
1159; SSE2-SSSE3-NEXT:    psrad $16, %xmm0
1160; SSE2-SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
1161; SSE2-SSSE3-NEXT:    pslld $16, %xmm2
1162; SSE2-SSSE3-NEXT:    psrad $16, %xmm2
1163; SSE2-SSSE3-NEXT:    packssdw %xmm2, %xmm0
1164; SSE2-SSSE3-NEXT:    retq
1165;
1166; SSE41-LABEL: trunc2x4i64_8i16_nuw:
1167; SSE41:       # %bb.0: # %entry
1168; SSE41-NEXT:    packusdw %xmm1, %xmm0
1169; SSE41-NEXT:    packusdw %xmm3, %xmm2
1170; SSE41-NEXT:    packusdw %xmm2, %xmm0
1171; SSE41-NEXT:    retq
1172;
1173; AVX1-LABEL: trunc2x4i64_8i16_nuw:
1174; AVX1:       # %bb.0: # %entry
1175; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1176; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
1177; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1178; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
1179; AVX1-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
1180; AVX1-NEXT:    vzeroupper
1181; AVX1-NEXT:    retq
1182;
1183; AVX2-LABEL: trunc2x4i64_8i16_nuw:
1184; AVX2:       # %bb.0: # %entry
1185; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
1186; AVX2-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
1187; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
1188; AVX2-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
1189; AVX2-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
1190; AVX2-NEXT:    vzeroupper
1191; AVX2-NEXT:    retq
1192;
1193; AVX512F-LABEL: trunc2x4i64_8i16_nuw:
1194; AVX512F:       # %bb.0: # %entry
1195; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
1196; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1197; AVX512F-NEXT:    vpmovqw %zmm0, %xmm0
1198; AVX512F-NEXT:    vpmovqw %zmm1, %xmm1
1199; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1200; AVX512F-NEXT:    vzeroupper
1201; AVX512F-NEXT:    retq
1202;
1203; AVX512VL-LABEL: trunc2x4i64_8i16_nuw:
1204; AVX512VL:       # %bb.0: # %entry
1205; AVX512VL-NEXT:    vpmovqw %ymm0, %xmm0
1206; AVX512VL-NEXT:    vpmovqw %ymm1, %xmm1
1207; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1208; AVX512VL-NEXT:    vzeroupper
1209; AVX512VL-NEXT:    retq
1210;
1211; AVX512BW-LABEL: trunc2x4i64_8i16_nuw:
1212; AVX512BW:       # %bb.0: # %entry
1213; AVX512BW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
1214; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1215; AVX512BW-NEXT:    vpmovqw %zmm0, %xmm0
1216; AVX512BW-NEXT:    vpmovqw %zmm1, %xmm1
1217; AVX512BW-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1218; AVX512BW-NEXT:    vzeroupper
1219; AVX512BW-NEXT:    retq
1220;
1221; AVX512BWVL-LABEL: trunc2x4i64_8i16_nuw:
1222; AVX512BWVL:       # %bb.0: # %entry
1223; AVX512BWVL-NEXT:    vpmovqw %ymm0, %xmm0
1224; AVX512BWVL-NEXT:    vpmovqw %ymm1, %xmm1
1225; AVX512BWVL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1226; AVX512BWVL-NEXT:    vzeroupper
1227; AVX512BWVL-NEXT:    retq
1228entry:
1229  %0 = trunc nuw <4 x i64> %a to <4 x i16>
1230  %1 = trunc nuw <4 x i64> %b to <4 x i16>
1231  %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1232  ret <8 x i16> %2
1233}
1234
1235define <4 x i32> @trunc2x2i64_4i32_nsw(<2 x i64> %a, <2 x i64> %b) {
1236; SSE-LABEL: trunc2x2i64_4i32_nsw:
1237; SSE:       # %bb.0: # %entry
1238; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
1239; SSE-NEXT:    retq
1240;
1241; AVX-LABEL: trunc2x2i64_4i32_nsw:
1242; AVX:       # %bb.0: # %entry
1243; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
1244; AVX-NEXT:    retq
1245;
1246; AVX512F-LABEL: trunc2x2i64_4i32_nsw:
1247; AVX512F:       # %bb.0: # %entry
1248; AVX512F-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
1249; AVX512F-NEXT:    retq
1250;
1251; AVX512VL-LABEL: trunc2x2i64_4i32_nsw:
1252; AVX512VL:       # %bb.0: # %entry
1253; AVX512VL-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
1254; AVX512VL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1255; AVX512VL-NEXT:    vpmovqd %ymm0, %xmm0
1256; AVX512VL-NEXT:    vzeroupper
1257; AVX512VL-NEXT:    retq
1258;
1259; AVX512BW-LABEL: trunc2x2i64_4i32_nsw:
1260; AVX512BW:       # %bb.0: # %entry
1261; AVX512BW-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
1262; AVX512BW-NEXT:    retq
1263;
1264; AVX512BWVL-LABEL: trunc2x2i64_4i32_nsw:
1265; AVX512BWVL:       # %bb.0: # %entry
1266; AVX512BWVL-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
1267; AVX512BWVL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1268; AVX512BWVL-NEXT:    vpmovqd %ymm0, %xmm0
1269; AVX512BWVL-NEXT:    vzeroupper
1270; AVX512BWVL-NEXT:    retq
1271entry:
1272  %0 = trunc nsw <2 x i64> %a to <2 x i32>
1273  %1 = trunc nsw <2 x i64> %b to <2 x i32>
1274  %2 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1275  ret <4 x i32> %2
1276}
1277
1278define <4 x i32> @trunc2x2i64_4i32_nuw(<2 x i64> %a, <2 x i64> %b) {
1279; SSE-LABEL: trunc2x2i64_4i32_nuw:
1280; SSE:       # %bb.0: # %entry
1281; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
1282; SSE-NEXT:    retq
1283;
1284; AVX-LABEL: trunc2x2i64_4i32_nuw:
1285; AVX:       # %bb.0: # %entry
1286; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
1287; AVX-NEXT:    retq
1288;
1289; AVX512F-LABEL: trunc2x2i64_4i32_nuw:
1290; AVX512F:       # %bb.0: # %entry
1291; AVX512F-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
1292; AVX512F-NEXT:    retq
1293;
1294; AVX512VL-LABEL: trunc2x2i64_4i32_nuw:
1295; AVX512VL:       # %bb.0: # %entry
1296; AVX512VL-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
1297; AVX512VL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1298; AVX512VL-NEXT:    vpmovqd %ymm0, %xmm0
1299; AVX512VL-NEXT:    vzeroupper
1300; AVX512VL-NEXT:    retq
1301;
1302; AVX512BW-LABEL: trunc2x2i64_4i32_nuw:
1303; AVX512BW:       # %bb.0: # %entry
1304; AVX512BW-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
1305; AVX512BW-NEXT:    retq
1306;
1307; AVX512BWVL-LABEL: trunc2x2i64_4i32_nuw:
1308; AVX512BWVL:       # %bb.0: # %entry
1309; AVX512BWVL-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
1310; AVX512BWVL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1311; AVX512BWVL-NEXT:    vpmovqd %ymm0, %xmm0
1312; AVX512BWVL-NEXT:    vzeroupper
1313; AVX512BWVL-NEXT:    retq
1314entry:
1315  %0 = trunc nuw <2 x i64> %a to <2 x i32>
1316  %1 = trunc nuw <2 x i64> %b to <2 x i32>
1317  %2 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1318  ret <4 x i32> %2
1319}
1320
1321define <8 x i16> @trunc2x4i32_8i16_nsw(<4 x i32> %a, <4 x i32> %b) {
1322; SSE-LABEL: trunc2x4i32_8i16_nsw:
1323; SSE:       # %bb.0: # %entry
1324; SSE-NEXT:    packssdw %xmm1, %xmm0
1325; SSE-NEXT:    retq
1326;
1327; AVX-LABEL: trunc2x4i32_8i16_nsw:
1328; AVX:       # %bb.0: # %entry
1329; AVX-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
1330; AVX-NEXT:    retq
1331;
1332; AVX512-LABEL: trunc2x4i32_8i16_nsw:
1333; AVX512:       # %bb.0: # %entry
1334; AVX512-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
1335; AVX512-NEXT:    retq
1336entry:
1337  %0 = trunc nsw <4 x i32> %a to <4 x i16>
1338  %1 = trunc nsw <4 x i32> %b to <4 x i16>
1339  %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1340  ret <8 x i16> %2
1341}
1342
1343define <8 x i16> @trunc2x4i32_8i16_nuw(<4 x i32> %a, <4 x i32> %b) {
1344; SSE2-LABEL: trunc2x4i32_8i16_nuw:
1345; SSE2:       # %bb.0: # %entry
1346; SSE2-NEXT:    pslld $16, %xmm1
1347; SSE2-NEXT:    psrad $16, %xmm1
1348; SSE2-NEXT:    pslld $16, %xmm0
1349; SSE2-NEXT:    psrad $16, %xmm0
1350; SSE2-NEXT:    packssdw %xmm1, %xmm0
1351; SSE2-NEXT:    retq
1352;
1353; SSSE3-LABEL: trunc2x4i32_8i16_nuw:
1354; SSSE3:       # %bb.0: # %entry
1355; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
1356; SSSE3-NEXT:    pshufb %xmm2, %xmm1
1357; SSSE3-NEXT:    pshufb %xmm2, %xmm0
1358; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1359; SSSE3-NEXT:    retq
1360;
1361; SSE41-LABEL: trunc2x4i32_8i16_nuw:
1362; SSE41:       # %bb.0: # %entry
1363; SSE41-NEXT:    packusdw %xmm1, %xmm0
1364; SSE41-NEXT:    retq
1365;
1366; AVX-LABEL: trunc2x4i32_8i16_nuw:
1367; AVX:       # %bb.0: # %entry
1368; AVX-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
1369; AVX-NEXT:    retq
1370;
1371; AVX512-LABEL: trunc2x4i32_8i16_nuw:
1372; AVX512:       # %bb.0: # %entry
1373; AVX512-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
1374; AVX512-NEXT:    retq
1375entry:
1376  %0 = trunc nuw <4 x i32> %a to <4 x i16>
1377  %1 = trunc nuw <4 x i32> %b to <4 x i16>
1378  %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1379  ret <8 x i16> %2
1380}
1381
1382define <32 x i8> @trunc2x16i16_32i8_nsw(<16 x i16> %a, <16 x i16> %b) {
1383; SSE-LABEL: trunc2x16i16_32i8_nsw:
1384; SSE:       # %bb.0: # %entry
1385; SSE-NEXT:    packsswb %xmm1, %xmm0
1386; SSE-NEXT:    packsswb %xmm3, %xmm2
1387; SSE-NEXT:    movdqa %xmm2, %xmm1
1388; SSE-NEXT:    retq
1389;
1390; AVX1-LABEL: trunc2x16i16_32i8_nsw:
1391; AVX1:       # %bb.0: # %entry
1392; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1393; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
1394; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1395; AVX1-NEXT:    vpacksswb %xmm2, %xmm1, %xmm1
1396; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1397; AVX1-NEXT:    retq
1398;
1399; AVX2-LABEL: trunc2x16i16_32i8_nsw:
1400; AVX2:       # %bb.0: # %entry
1401; AVX2-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0
1402; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1403; AVX2-NEXT:    retq
1404;
1405; AVX512F-LABEL: trunc2x16i16_32i8_nsw:
1406; AVX512F:       # %bb.0: # %entry
1407; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1408; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
1409; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
1410; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
1411; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1412; AVX512F-NEXT:    retq
1413;
1414; AVX512VL-LABEL: trunc2x16i16_32i8_nsw:
1415; AVX512VL:       # %bb.0: # %entry
1416; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1417; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
1418; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
1419; AVX512VL-NEXT:    vpmovdb %zmm1, %xmm1
1420; AVX512VL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1421; AVX512VL-NEXT:    retq
1422;
1423; AVX512BW-LABEL: trunc2x16i16_32i8_nsw:
1424; AVX512BW:       # %bb.0: # %entry
1425; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1426; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1427; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
1428; AVX512BW-NEXT:    retq
1429;
1430; AVX512BWVL-LABEL: trunc2x16i16_32i8_nsw:
1431; AVX512BWVL:       # %bb.0: # %entry
1432; AVX512BWVL-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1433; AVX512BWVL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1434; AVX512BWVL-NEXT:    vpmovwb %zmm0, %ymm0
1435; AVX512BWVL-NEXT:    retq
1436entry:
1437  %0 = trunc nsw <16 x i16> %a to <16 x i8>
1438  %1 = trunc nsw <16 x i16> %b to <16 x i8>
1439  %2 = shufflevector <16 x i8> %0, <16 x i8> %1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
1440  ret <32 x i8> %2
1441}
1442
1443define <32 x i8> @trunc2x16i16_32i8_nuw(<16 x i16> %a, <16 x i16> %b) {
1444; SSE-LABEL: trunc2x16i16_32i8_nuw:
1445; SSE:       # %bb.0: # %entry
1446; SSE-NEXT:    packuswb %xmm1, %xmm0
1447; SSE-NEXT:    packuswb %xmm3, %xmm2
1448; SSE-NEXT:    movdqa %xmm2, %xmm1
1449; SSE-NEXT:    retq
1450;
1451; AVX1-LABEL: trunc2x16i16_32i8_nuw:
1452; AVX1:       # %bb.0: # %entry
1453; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1454; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
1455; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1456; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
1457; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1458; AVX1-NEXT:    retq
1459;
1460; AVX2-LABEL: trunc2x16i16_32i8_nuw:
1461; AVX2:       # %bb.0: # %entry
1462; AVX2-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0
1463; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1464; AVX2-NEXT:    retq
1465;
1466; AVX512F-LABEL: trunc2x16i16_32i8_nuw:
1467; AVX512F:       # %bb.0: # %entry
1468; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1469; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
1470; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
1471; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
1472; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1473; AVX512F-NEXT:    retq
1474;
1475; AVX512VL-LABEL: trunc2x16i16_32i8_nuw:
1476; AVX512VL:       # %bb.0: # %entry
1477; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1478; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
1479; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
1480; AVX512VL-NEXT:    vpmovdb %zmm1, %xmm1
1481; AVX512VL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1482; AVX512VL-NEXT:    retq
1483;
1484; AVX512BW-LABEL: trunc2x16i16_32i8_nuw:
1485; AVX512BW:       # %bb.0: # %entry
1486; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1487; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1488; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
1489; AVX512BW-NEXT:    retq
1490;
1491; AVX512BWVL-LABEL: trunc2x16i16_32i8_nuw:
1492; AVX512BWVL:       # %bb.0: # %entry
1493; AVX512BWVL-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1494; AVX512BWVL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1495; AVX512BWVL-NEXT:    vpmovwb %zmm0, %ymm0
1496; AVX512BWVL-NEXT:    retq
1497entry:
1498  %0 = trunc nuw <16 x i16> %a to <16 x i8>
1499  %1 = trunc nuw <16 x i16> %b to <16 x i8>
1500  %2 = shufflevector <16 x i8> %0, <16 x i8> %1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
1501  ret <32 x i8> %2
1502}
1503
1504define <16 x i8> @trunc2x8i16_16i8_nsw(<8 x i16> %a, <8 x i16> %b) {
1505; SSE-LABEL: trunc2x8i16_16i8_nsw:
1506; SSE:       # %bb.0: # %entry
1507; SSE-NEXT:    packsswb %xmm1, %xmm0
1508; SSE-NEXT:    retq
1509;
1510; AVX-LABEL: trunc2x8i16_16i8_nsw:
1511; AVX:       # %bb.0: # %entry
1512; AVX-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
1513; AVX-NEXT:    retq
1514;
1515; AVX512-LABEL: trunc2x8i16_16i8_nsw:
1516; AVX512:       # %bb.0: # %entry
1517; AVX512-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
1518; AVX512-NEXT:    retq
1519entry:
1520  %0 = trunc nsw <8 x i16> %a to <8 x i8>
1521  %1 = trunc nsw <8 x i16> %b to <8 x i8>
1522  %2 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1523  ret <16 x i8> %2
1524}
1525
1526define <16 x i8> @trunc2x8i16_16i8_nuw(<8 x i16> %a, <8 x i16> %b) {
1527; SSE-LABEL: trunc2x8i16_16i8_nuw:
1528; SSE:       # %bb.0: # %entry
1529; SSE-NEXT:    packuswb %xmm1, %xmm0
1530; SSE-NEXT:    retq
1531;
1532; AVX-LABEL: trunc2x8i16_16i8_nuw:
1533; AVX:       # %bb.0: # %entry
1534; AVX-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
1535; AVX-NEXT:    retq
1536;
1537; AVX512-LABEL: trunc2x8i16_16i8_nuw:
1538; AVX512:       # %bb.0: # %entry
1539; AVX512-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
1540; AVX512-NEXT:    retq
1541entry:
1542  %0 = trunc nuw <8 x i16> %a to <8 x i8>
1543  %1 = trunc nuw <8 x i16> %b to <8 x i8>
1544  %2 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1545  ret <16 x i8> %2
1546}
1547
1548define i64 @trunc8i16_i64_nsw(<8 x i16> %inval) {
1549; SSE-LABEL: trunc8i16_i64_nsw:
1550; SSE:       # %bb.0: # %entry
1551; SSE-NEXT:    packsswb %xmm0, %xmm0
1552; SSE-NEXT:    movq %xmm0, %rax
1553; SSE-NEXT:    retq
1554;
1555; AVX-LABEL: trunc8i16_i64_nsw:
1556; AVX:       # %bb.0: # %entry
1557; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
1558; AVX-NEXT:    vmovq %xmm0, %rax
1559; AVX-NEXT:    retq
1560;
1561; AVX512-LABEL: trunc8i16_i64_nsw:
1562; AVX512:       # %bb.0: # %entry
1563; AVX512-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
1564; AVX512-NEXT:    vmovq %xmm0, %rax
1565; AVX512-NEXT:    retq
1566entry:
1567  %0 = trunc nsw <8 x i16> %inval to <8 x i8>
1568  %1 = bitcast <8 x i8> %0 to i64
1569  ret i64 %1
1570}
1571
1572define i64 @trunc8i16_i64_nuw(<8 x i16> %inval) {
1573; SSE-LABEL: trunc8i16_i64_nuw:
1574; SSE:       # %bb.0: # %entry
1575; SSE-NEXT:    packuswb %xmm0, %xmm0
1576; SSE-NEXT:    movq %xmm0, %rax
1577; SSE-NEXT:    retq
1578;
1579; AVX-LABEL: trunc8i16_i64_nuw:
1580; AVX:       # %bb.0: # %entry
1581; AVX-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
1582; AVX-NEXT:    vmovq %xmm0, %rax
1583; AVX-NEXT:    retq
1584;
1585; AVX512-LABEL: trunc8i16_i64_nuw:
1586; AVX512:       # %bb.0: # %entry
1587; AVX512-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
1588; AVX512-NEXT:    vmovq %xmm0, %rax
1589; AVX512-NEXT:    retq
1590entry:
1591  %0 = trunc nuw <8 x i16> %inval to <8 x i8>
1592  %1 = bitcast <8 x i8> %0 to i64
1593  ret i64 %1
1594}
1595