xref: /llvm-project/llvm/test/CodeGen/X86/avx512-trunc.ll (revision 69a322fed19b977d15be9500d8653496b73673e9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=KNL
3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,SKX
4; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,SKX
5
6 attributes #0 = { nounwind }
7
8define <16 x i8> @trunc_16x32_to_16x8(<16 x i32> %i) #0 {
9; ALL-LABEL: trunc_16x32_to_16x8:
10; ALL:       ## %bb.0:
11; ALL-NEXT:    vpmovdb %zmm0, %xmm0
12; ALL-NEXT:    vzeroupper
13; ALL-NEXT:    retq
14  %x = trunc <16 x i32> %i to <16 x i8>
15  ret <16 x i8> %x
16}
17
18define <8 x i16> @trunc_8x64_to_8x16(<8 x i64> %i) #0 {
19; ALL-LABEL: trunc_8x64_to_8x16:
20; ALL:       ## %bb.0:
21; ALL-NEXT:    vpmovqw %zmm0, %xmm0
22; ALL-NEXT:    vzeroupper
23; ALL-NEXT:    retq
24  %x = trunc <8 x i64> %i to <8 x i16>
25  ret <8 x i16> %x
26}
27
28define <16 x i16> @trunc_v16i32_to_v16i16(<16 x i32> %x) #0 {
29; ALL-LABEL: trunc_v16i32_to_v16i16:
30; ALL:       ## %bb.0:
31; ALL-NEXT:    vpmovdw %zmm0, %ymm0
32; ALL-NEXT:    retq
33  %1 = trunc <16 x i32> %x to <16 x i16>
34  ret <16 x i16> %1
35}
36
37define <8 x i8> @trunc_qb_512(<8 x i64> %i) #0 {
38; ALL-LABEL: trunc_qb_512:
39; ALL:       ## %bb.0:
40; ALL-NEXT:    vpmovqb %zmm0, %xmm0
41; ALL-NEXT:    vzeroupper
42; ALL-NEXT:    retq
43  %x = trunc <8 x i64> %i to <8 x i8>
44  ret <8 x i8> %x
45}
46
47define void @trunc_qb_512_mem(<8 x i64> %i, ptr %res) #0 {
48; ALL-LABEL: trunc_qb_512_mem:
49; ALL:       ## %bb.0:
50; ALL-NEXT:    vpmovqb %zmm0, (%rdi)
51; ALL-NEXT:    vzeroupper
52; ALL-NEXT:    retq
53    %x = trunc <8 x i64> %i to <8 x i8>
54    store <8 x i8> %x, ptr %res
55    ret void
56}
57
58define <4 x i8> @trunc_qb_256(<4 x i64> %i) #0 {
59; KNL-LABEL: trunc_qb_256:
60; KNL:       ## %bb.0:
61; KNL-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
62; KNL-NEXT:    vpmovqb %zmm0, %xmm0
63; KNL-NEXT:    vzeroupper
64; KNL-NEXT:    retq
65;
66; SKX-LABEL: trunc_qb_256:
67; SKX:       ## %bb.0:
68; SKX-NEXT:    vpmovqb %ymm0, %xmm0
69; SKX-NEXT:    vzeroupper
70; SKX-NEXT:    retq
71  %x = trunc <4 x i64> %i to <4 x i8>
72  ret <4 x i8> %x
73}
74
75define void @trunc_qb_256_mem(<4 x i64> %i, ptr %res) #0 {
76; KNL-LABEL: trunc_qb_256_mem:
77; KNL:       ## %bb.0:
78; KNL-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
79; KNL-NEXT:    vpmovqb %zmm0, %xmm0
80; KNL-NEXT:    vmovd %xmm0, (%rdi)
81; KNL-NEXT:    vzeroupper
82; KNL-NEXT:    retq
83;
84; SKX-LABEL: trunc_qb_256_mem:
85; SKX:       ## %bb.0:
86; SKX-NEXT:    vpmovqb %ymm0, (%rdi)
87; SKX-NEXT:    vzeroupper
88; SKX-NEXT:    retq
89    %x = trunc <4 x i64> %i to <4 x i8>
90    store <4 x i8> %x, ptr %res
91    ret void
92}
93
94define <2 x i8> @trunc_qb_128(<2 x i64> %i) #0 {
95; KNL-LABEL: trunc_qb_128:
96; KNL:       ## %bb.0:
97; KNL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
98; KNL-NEXT:    retq
99;
100; SKX-LABEL: trunc_qb_128:
101; SKX:       ## %bb.0:
102; SKX-NEXT:    vpmovqb %xmm0, %xmm0
103; SKX-NEXT:    retq
104  %x = trunc <2 x i64> %i to <2 x i8>
105  ret <2 x i8> %x
106}
107
108define void @trunc_qb_128_mem(<2 x i64> %i, ptr %res) #0 {
109; KNL-LABEL: trunc_qb_128_mem:
110; KNL:       ## %bb.0:
111; KNL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
112; KNL-NEXT:    vpextrw $0, %xmm0, (%rdi)
113; KNL-NEXT:    retq
114;
115; SKX-LABEL: trunc_qb_128_mem:
116; SKX:       ## %bb.0:
117; SKX-NEXT:    vpmovqb %xmm0, (%rdi)
118; SKX-NEXT:    retq
119    %x = trunc <2 x i64> %i to <2 x i8>
120    store <2 x i8> %x, ptr %res
121    ret void
122}
123
124define <8 x i16> @trunc_qw_512(<8 x i64> %i) #0 {
125; ALL-LABEL: trunc_qw_512:
126; ALL:       ## %bb.0:
127; ALL-NEXT:    vpmovqw %zmm0, %xmm0
128; ALL-NEXT:    vzeroupper
129; ALL-NEXT:    retq
130  %x = trunc <8 x i64> %i to <8 x i16>
131  ret <8 x i16> %x
132}
133
134define void @trunc_qw_512_mem(<8 x i64> %i, ptr %res) #0 {
135; ALL-LABEL: trunc_qw_512_mem:
136; ALL:       ## %bb.0:
137; ALL-NEXT:    vpmovqw %zmm0, (%rdi)
138; ALL-NEXT:    vzeroupper
139; ALL-NEXT:    retq
140    %x = trunc <8 x i64> %i to <8 x i16>
141    store <8 x i16> %x, ptr %res
142    ret void
143}
144
145define <4 x i16> @trunc_qw_256(<4 x i64> %i) #0 {
146; KNL-LABEL: trunc_qw_256:
147; KNL:       ## %bb.0:
148; KNL-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
149; KNL-NEXT:    vpmovqw %zmm0, %xmm0
150; KNL-NEXT:    vzeroupper
151; KNL-NEXT:    retq
152;
153; SKX-LABEL: trunc_qw_256:
154; SKX:       ## %bb.0:
155; SKX-NEXT:    vpmovqw %ymm0, %xmm0
156; SKX-NEXT:    vzeroupper
157; SKX-NEXT:    retq
158  %x = trunc <4 x i64> %i to <4 x i16>
159  ret <4 x i16> %x
160}
161
162define void @trunc_qw_256_mem(<4 x i64> %i, ptr %res) #0 {
163; KNL-LABEL: trunc_qw_256_mem:
164; KNL:       ## %bb.0:
165; KNL-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
166; KNL-NEXT:    vpmovqw %zmm0, %xmm0
167; KNL-NEXT:    vmovq %xmm0, (%rdi)
168; KNL-NEXT:    vzeroupper
169; KNL-NEXT:    retq
170;
171; SKX-LABEL: trunc_qw_256_mem:
172; SKX:       ## %bb.0:
173; SKX-NEXT:    vpmovqw %ymm0, (%rdi)
174; SKX-NEXT:    vzeroupper
175; SKX-NEXT:    retq
176    %x = trunc <4 x i64> %i to <4 x i16>
177    store <4 x i16> %x, ptr %res
178    ret void
179}
180
181define <2 x i16> @trunc_qw_128(<2 x i64> %i) #0 {
182; KNL-LABEL: trunc_qw_128:
183; KNL:       ## %bb.0:
184; KNL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
185; KNL-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
186; KNL-NEXT:    retq
187;
188; SKX-LABEL: trunc_qw_128:
189; SKX:       ## %bb.0:
190; SKX-NEXT:    vpmovqw %xmm0, %xmm0
191; SKX-NEXT:    retq
192  %x = trunc <2 x i64> %i to <2 x i16>
193  ret <2 x i16> %x
194}
195
196define void @trunc_qw_128_mem(<2 x i64> %i, ptr %res) #0 {
197; KNL-LABEL: trunc_qw_128_mem:
198; KNL:       ## %bb.0:
199; KNL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
200; KNL-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
201; KNL-NEXT:    vmovd %xmm0, (%rdi)
202; KNL-NEXT:    retq
203;
204; SKX-LABEL: trunc_qw_128_mem:
205; SKX:       ## %bb.0:
206; SKX-NEXT:    vpmovqw %xmm0, (%rdi)
207; SKX-NEXT:    retq
208    %x = trunc <2 x i64> %i to <2 x i16>
209    store <2 x i16> %x, ptr %res
210    ret void
211}
212
213define <8 x i32> @trunc_qd_512(<8 x i64> %i) #0 {
214; ALL-LABEL: trunc_qd_512:
215; ALL:       ## %bb.0:
216; ALL-NEXT:    vpmovqd %zmm0, %ymm0
217; ALL-NEXT:    retq
218  %x = trunc <8 x i64> %i to <8 x i32>
219  ret <8 x i32> %x
220}
221
222define void @trunc_qd_512_mem(<8 x i64> %i, ptr %res) #0 {
223; ALL-LABEL: trunc_qd_512_mem:
224; ALL:       ## %bb.0:
225; ALL-NEXT:    vpmovqd %zmm0, (%rdi)
226; ALL-NEXT:    vzeroupper
227; ALL-NEXT:    retq
228    %x = trunc <8 x i64> %i to <8 x i32>
229    store <8 x i32> %x, ptr %res
230    ret void
231}
232
233define <4 x i32> @trunc_qd_256(<4 x i64> %i) #0 {
234; KNL-LABEL: trunc_qd_256:
235; KNL:       ## %bb.0:
236; KNL-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
237; KNL-NEXT:    vpmovqd %zmm0, %ymm0
238; KNL-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $ymm0
239; KNL-NEXT:    vzeroupper
240; KNL-NEXT:    retq
241;
242; SKX-LABEL: trunc_qd_256:
243; SKX:       ## %bb.0:
244; SKX-NEXT:    vpmovqd %ymm0, %xmm0
245; SKX-NEXT:    vzeroupper
246; SKX-NEXT:    retq
247  %x = trunc <4 x i64> %i to <4 x i32>
248  ret <4 x i32> %x
249}
250
251define void @trunc_qd_256_mem(<4 x i64> %i, ptr %res) #0 {
252; KNL-LABEL: trunc_qd_256_mem:
253; KNL:       ## %bb.0:
254; KNL-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
255; KNL-NEXT:    vpmovqd %zmm0, %ymm0
256; KNL-NEXT:    vmovdqa %xmm0, (%rdi)
257; KNL-NEXT:    vzeroupper
258; KNL-NEXT:    retq
259;
260; SKX-LABEL: trunc_qd_256_mem:
261; SKX:       ## %bb.0:
262; SKX-NEXT:    vpmovqd %ymm0, (%rdi)
263; SKX-NEXT:    vzeroupper
264; SKX-NEXT:    retq
265    %x = trunc <4 x i64> %i to <4 x i32>
266    store <4 x i32> %x, ptr %res
267    ret void
268}
269
270define <2 x i32> @trunc_qd_128(<2 x i64> %i) #0 {
271; ALL-LABEL: trunc_qd_128:
272; ALL:       ## %bb.0:
273; ALL-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
274; ALL-NEXT:    retq
275  %x = trunc <2 x i64> %i to <2 x i32>
276  ret <2 x i32> %x
277}
278
279define void @trunc_qd_128_mem(<2 x i64> %i, ptr %res) #0 {
280; KNL-LABEL: trunc_qd_128_mem:
281; KNL:       ## %bb.0:
282; KNL-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
283; KNL-NEXT:    vmovlps %xmm0, (%rdi)
284; KNL-NEXT:    retq
285;
286; SKX-LABEL: trunc_qd_128_mem:
287; SKX:       ## %bb.0:
288; SKX-NEXT:    vpmovqd %xmm0, (%rdi)
289; SKX-NEXT:    retq
290    %x = trunc <2 x i64> %i to <2 x i32>
291    store <2 x i32> %x, ptr %res
292    ret void
293}
294
295define <16 x i8> @trunc_db_512(<16 x i32> %i) #0 {
296; ALL-LABEL: trunc_db_512:
297; ALL:       ## %bb.0:
298; ALL-NEXT:    vpmovdb %zmm0, %xmm0
299; ALL-NEXT:    vzeroupper
300; ALL-NEXT:    retq
301  %x = trunc <16 x i32> %i to <16 x i8>
302  ret <16 x i8> %x
303}
304
305define void @trunc_db_512_mem(<16 x i32> %i, ptr %res) #0 {
306; ALL-LABEL: trunc_db_512_mem:
307; ALL:       ## %bb.0:
308; ALL-NEXT:    vpmovdb %zmm0, (%rdi)
309; ALL-NEXT:    vzeroupper
310; ALL-NEXT:    retq
311    %x = trunc <16 x i32> %i to <16 x i8>
312    store <16 x i8> %x, ptr %res
313    ret void
314}
315
316define <8 x i8> @trunc_db_256(<8 x i32> %i) #0 {
317; KNL-LABEL: trunc_db_256:
318; KNL:       ## %bb.0:
319; KNL-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
320; KNL-NEXT:    vpmovdb %zmm0, %xmm0
321; KNL-NEXT:    vzeroupper
322; KNL-NEXT:    retq
323;
324; SKX-LABEL: trunc_db_256:
325; SKX:       ## %bb.0:
326; SKX-NEXT:    vpmovdb %ymm0, %xmm0
327; SKX-NEXT:    vzeroupper
328; SKX-NEXT:    retq
329  %x = trunc <8 x i32> %i to <8 x i8>
330  ret <8 x i8> %x
331}
332
333define void @trunc_db_256_mem(<8 x i32> %i, ptr %res) #0 {
334; KNL-LABEL: trunc_db_256_mem:
335; KNL:       ## %bb.0:
336; KNL-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
337; KNL-NEXT:    vpmovdb %zmm0, %xmm0
338; KNL-NEXT:    vmovq %xmm0, (%rdi)
339; KNL-NEXT:    vzeroupper
340; KNL-NEXT:    retq
341;
342; SKX-LABEL: trunc_db_256_mem:
343; SKX:       ## %bb.0:
344; SKX-NEXT:    vpmovdb %ymm0, (%rdi)
345; SKX-NEXT:    vzeroupper
346; SKX-NEXT:    retq
347    %x = trunc <8 x i32> %i to <8 x i8>
348    store <8 x i8> %x, ptr %res
349    ret void
350}
351
352define <4 x i8> @trunc_db_128(<4 x i32> %i) #0 {
353; KNL-LABEL: trunc_db_128:
354; KNL:       ## %bb.0:
355; KNL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
356; KNL-NEXT:    retq
357;
358; SKX-LABEL: trunc_db_128:
359; SKX:       ## %bb.0:
360; SKX-NEXT:    vpmovdb %xmm0, %xmm0
361; SKX-NEXT:    retq
362  %x = trunc <4 x i32> %i to <4 x i8>
363  ret <4 x i8> %x
364}
365
366define void @trunc_db_128_mem(<4 x i32> %i, ptr %res) #0 {
367; KNL-LABEL: trunc_db_128_mem:
368; KNL:       ## %bb.0:
369; KNL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
370; KNL-NEXT:    vmovd %xmm0, (%rdi)
371; KNL-NEXT:    retq
372;
373; SKX-LABEL: trunc_db_128_mem:
374; SKX:       ## %bb.0:
375; SKX-NEXT:    vpmovdb %xmm0, (%rdi)
376; SKX-NEXT:    retq
377    %x = trunc <4 x i32> %i to <4 x i8>
378    store <4 x i8> %x, ptr %res
379    ret void
380}
381
382define <16 x i16> @trunc_dw_512(<16 x i32> %i) #0 {
383; ALL-LABEL: trunc_dw_512:
384; ALL:       ## %bb.0:
385; ALL-NEXT:    vpmovdw %zmm0, %ymm0
386; ALL-NEXT:    retq
387  %x = trunc <16 x i32> %i to <16 x i16>
388  ret <16 x i16> %x
389}
390
391define void @trunc_dw_512_mem(<16 x i32> %i, ptr %res) #0 {
392; ALL-LABEL: trunc_dw_512_mem:
393; ALL:       ## %bb.0:
394; ALL-NEXT:    vpmovdw %zmm0, (%rdi)
395; ALL-NEXT:    vzeroupper
396; ALL-NEXT:    retq
397    %x = trunc <16 x i32> %i to <16 x i16>
398    store <16 x i16> %x, ptr %res
399    ret void
400}
401
402define <8 x i16> @trunc_dw_256(<8 x i32> %i) #0 {
403; KNL-LABEL: trunc_dw_256:
404; KNL:       ## %bb.0:
405; KNL-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
406; KNL-NEXT:    vpmovdw %zmm0, %ymm0
407; KNL-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $ymm0
408; KNL-NEXT:    vzeroupper
409; KNL-NEXT:    retq
410;
411; SKX-LABEL: trunc_dw_256:
412; SKX:       ## %bb.0:
413; SKX-NEXT:    vpmovdw %ymm0, %xmm0
414; SKX-NEXT:    vzeroupper
415; SKX-NEXT:    retq
416  %x = trunc <8 x i32> %i to <8 x i16>
417  ret <8 x i16> %x
418}
419
420define void @trunc_dw_256_mem(<8 x i32> %i, ptr %res) #0 {
421; KNL-LABEL: trunc_dw_256_mem:
422; KNL:       ## %bb.0:
423; KNL-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
424; KNL-NEXT:    vpmovdw %zmm0, %ymm0
425; KNL-NEXT:    vmovdqa %xmm0, (%rdi)
426; KNL-NEXT:    vzeroupper
427; KNL-NEXT:    retq
428;
429; SKX-LABEL: trunc_dw_256_mem:
430; SKX:       ## %bb.0:
431; SKX-NEXT:    vpmovdw %ymm0, (%rdi)
432; SKX-NEXT:    vzeroupper
433; SKX-NEXT:    retq
434    %x = trunc <8 x i32> %i to <8 x i16>
435    store <8 x i16> %x, ptr %res
436    ret void
437}
438
439define void @trunc_dw_128_mem(<4 x i32> %i, ptr %res) #0 {
440; KNL-LABEL: trunc_dw_128_mem:
441; KNL:       ## %bb.0:
442; KNL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u]
443; KNL-NEXT:    vmovq %xmm0, (%rdi)
444; KNL-NEXT:    retq
445;
446; SKX-LABEL: trunc_dw_128_mem:
447; SKX:       ## %bb.0:
448; SKX-NEXT:    vpmovdw %xmm0, (%rdi)
449; SKX-NEXT:    retq
450    %x = trunc <4 x i32> %i to <4 x i16>
451    store <4 x i16> %x, ptr %res
452    ret void
453}
454
455define <32 x i8> @trunc_wb_512(<32 x i16> %i) #0 {
456; KNL-LABEL: trunc_wb_512:
457; KNL:       ## %bb.0:
458; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
459; KNL-NEXT:    vpmovdb %zmm1, %xmm1
460; KNL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
461; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
462; KNL-NEXT:    vpmovdb %zmm0, %xmm0
463; KNL-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
464; KNL-NEXT:    retq
465;
466; SKX-LABEL: trunc_wb_512:
467; SKX:       ## %bb.0:
468; SKX-NEXT:    vpmovwb %zmm0, %ymm0
469; SKX-NEXT:    retq
470  %x = trunc <32 x i16> %i to <32 x i8>
471  ret <32 x i8> %x
472}
473
474define void @trunc_wb_512_mem(<32 x i16> %i, ptr %res) #0 {
475; KNL-LABEL: trunc_wb_512_mem:
476; KNL:       ## %bb.0:
477; KNL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
478; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
479; KNL-NEXT:    vpmovdb %zmm1, 16(%rdi)
480; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
481; KNL-NEXT:    vpmovdb %zmm0, (%rdi)
482; KNL-NEXT:    vzeroupper
483; KNL-NEXT:    retq
484;
485; SKX-LABEL: trunc_wb_512_mem:
486; SKX:       ## %bb.0:
487; SKX-NEXT:    vpmovwb %zmm0, (%rdi)
488; SKX-NEXT:    vzeroupper
489; SKX-NEXT:    retq
490    %x = trunc <32 x i16> %i to <32 x i8>
491    store <32 x i8> %x, ptr %res
492    ret void
493}
494
495define <16 x i8> @trunc_wb_256(<16 x i16> %i) #0 {
496; KNL-LABEL: trunc_wb_256:
497; KNL:       ## %bb.0:
498; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
499; KNL-NEXT:    vpmovdb %zmm0, %xmm0
500; KNL-NEXT:    vzeroupper
501; KNL-NEXT:    retq
502;
503; SKX-LABEL: trunc_wb_256:
504; SKX:       ## %bb.0:
505; SKX-NEXT:    vpmovwb %ymm0, %xmm0
506; SKX-NEXT:    vzeroupper
507; SKX-NEXT:    retq
508  %x = trunc <16 x i16> %i to <16 x i8>
509  ret <16 x i8> %x
510}
511
512define void @trunc_wb_256_mem(<16 x i16> %i, ptr %res) #0 {
513; KNL-LABEL: trunc_wb_256_mem:
514; KNL:       ## %bb.0:
515; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
516; KNL-NEXT:    vpmovdb %zmm0, (%rdi)
517; KNL-NEXT:    vzeroupper
518; KNL-NEXT:    retq
519;
520; SKX-LABEL: trunc_wb_256_mem:
521; SKX:       ## %bb.0:
522; SKX-NEXT:    vpmovwb %ymm0, (%rdi)
523; SKX-NEXT:    vzeroupper
524; SKX-NEXT:    retq
525    %x = trunc <16 x i16> %i to <16 x i8>
526    store <16 x i8> %x, ptr %res
527    ret void
528}
529
530define <16 x i8> @trunc_wb_256_mem_and_ret(<16 x i16> %i, ptr %res) #0 {
531; KNL-LABEL: trunc_wb_256_mem_and_ret:
532; KNL:       ## %bb.0:
533; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
534; KNL-NEXT:    vpmovdb %zmm0, %xmm0
535; KNL-NEXT:    vmovdqa %xmm0, (%rdi)
536; KNL-NEXT:    vzeroupper
537; KNL-NEXT:    retq
538;
539; SKX-LABEL: trunc_wb_256_mem_and_ret:
540; SKX:       ## %bb.0:
541; SKX-NEXT:    vpmovwb %ymm0, %xmm0
542; SKX-NEXT:    vmovdqa %xmm0, (%rdi)
543; SKX-NEXT:    vzeroupper
544; SKX-NEXT:    retq
545    %x = trunc <16 x i16> %i to <16 x i8>
546    store <16 x i8> %x, ptr %res
547    ret <16 x i8> %x
548}
549
550define <8 x i8> @trunc_wb_128(<8 x i16> %i) #0 {
551; KNL-LABEL: trunc_wb_128:
552; KNL:       ## %bb.0:
553; KNL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
554; KNL-NEXT:    retq
555;
556; SKX-LABEL: trunc_wb_128:
557; SKX:       ## %bb.0:
558; SKX-NEXT:    vpmovwb %xmm0, %xmm0
559; SKX-NEXT:    retq
560  %x = trunc <8 x i16> %i to <8 x i8>
561  ret <8 x i8> %x
562}
563
564define void @trunc_wb_128_mem(<8 x i16> %i, ptr %res) #0 {
565; KNL-LABEL: trunc_wb_128_mem:
566; KNL:       ## %bb.0:
567; KNL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
568; KNL-NEXT:    vmovq %xmm0, (%rdi)
569; KNL-NEXT:    retq
570;
571; SKX-LABEL: trunc_wb_128_mem:
572; SKX:       ## %bb.0:
573; SKX-NEXT:    vpmovwb %xmm0, (%rdi)
574; SKX-NEXT:    retq
575    %x = trunc <8 x i16> %i to <8 x i8>
576    store <8 x i8> %x, ptr %res
577    ret void
578}
579
580
581define void @usat_trunc_wb_256_mem(<16 x i16> %i, ptr %res) {
582; KNL-LABEL: usat_trunc_wb_256_mem:
583; KNL:       ## %bb.0:
584; KNL-NEXT:    vpminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
585; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
586; KNL-NEXT:    vpmovdb %zmm0, (%rdi)
587; KNL-NEXT:    vzeroupper
588; KNL-NEXT:    retq
589;
590; SKX-LABEL: usat_trunc_wb_256_mem:
591; SKX:       ## %bb.0:
592; SKX-NEXT:    vpmovuswb %ymm0, (%rdi)
593; SKX-NEXT:    vzeroupper
594; SKX-NEXT:    retq
595  %x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
596  %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
597  %x6 = trunc <16 x i16> %x5 to <16 x i8>
598  store <16 x i8> %x6, ptr %res, align 1
599  ret void
600}
601
602define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) {
603; KNL-LABEL: usat_trunc_wb_256:
604; KNL:       ## %bb.0:
605; KNL-NEXT:    vpminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
606; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
607; KNL-NEXT:    vpmovdb %zmm0, %xmm0
608; KNL-NEXT:    vzeroupper
609; KNL-NEXT:    retq
610;
611; SKX-LABEL: usat_trunc_wb_256:
612; SKX:       ## %bb.0:
613; SKX-NEXT:    vpmovuswb %ymm0, %xmm0
614; SKX-NEXT:    vzeroupper
615; SKX-NEXT:    retq
616  %x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
617  %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
618  %x6 = trunc <16 x i16> %x5 to <16 x i8>
619  ret <16 x i8> %x6
620}
621
622define void @usat_trunc_wb_128_mem(<8 x i16> %i, ptr %res) {
623; KNL-LABEL: usat_trunc_wb_128_mem:
624; KNL:       ## %bb.0:
625; KNL-NEXT:    vpminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
626; KNL-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
627; KNL-NEXT:    vmovq %xmm0, (%rdi)
628; KNL-NEXT:    retq
629;
630; SKX-LABEL: usat_trunc_wb_128_mem:
631; SKX:       ## %bb.0:
632; SKX-NEXT:    vpmovuswb %xmm0, (%rdi)
633; SKX-NEXT:    retq
634  %x3 = icmp ult <8 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
635  %x5 = select <8 x i1> %x3, <8 x i16> %i, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
636  %x6 = trunc <8 x i16> %x5 to <8 x i8>
637  store <8 x i8> %x6, ptr %res, align 1
638  ret void
639}
640
641define void @usat_trunc_db_512_mem(<16 x i32> %i, ptr %res) {
642; ALL-LABEL: usat_trunc_db_512_mem:
643; ALL:       ## %bb.0:
644; ALL-NEXT:    vpmovusdb %zmm0, (%rdi)
645; ALL-NEXT:    vzeroupper
646; ALL-NEXT:    retq
647  %x3 = icmp ult <16 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
648  %x5 = select <16 x i1> %x3, <16 x i32> %i, <16 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
649  %x6 = trunc <16 x i32> %x5 to <16 x i8>
650  store <16 x i8> %x6, ptr %res, align 1
651  ret void
652}
653
654define void @usat_trunc_qb_512_mem(<8 x i64> %i, ptr %res) {
655; ALL-LABEL: usat_trunc_qb_512_mem:
656; ALL:       ## %bb.0:
657; ALL-NEXT:    vpmovusqb %zmm0, (%rdi)
658; ALL-NEXT:    vzeroupper
659; ALL-NEXT:    retq
660  %x3 = icmp ult <8 x i64> %i, <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
661  %x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64> <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
662  %x6 = trunc <8 x i64> %x5 to <8 x i8>
663  store <8 x i8> %x6, ptr %res, align 1
664  ret void
665}
666
667define void @usat_trunc_qd_512_mem(<8 x i64> %i, ptr %res) {
668; ALL-LABEL: usat_trunc_qd_512_mem:
669; ALL:       ## %bb.0:
670; ALL-NEXT:    vpmovusqd %zmm0, (%rdi)
671; ALL-NEXT:    vzeroupper
672; ALL-NEXT:    retq
673  %x3 = icmp ult <8 x i64> %i, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
674  %x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
675  %x6 = trunc <8 x i64> %x5 to <8 x i32>
676  store <8 x i32> %x6, ptr %res, align 1
677  ret void
678}
679
680define void @usat_trunc_qw_512_mem(<8 x i64> %i, ptr %res) {
681; ALL-LABEL: usat_trunc_qw_512_mem:
682; ALL:       ## %bb.0:
683; ALL-NEXT:    vpmovusqw %zmm0, (%rdi)
684; ALL-NEXT:    vzeroupper
685; ALL-NEXT:    retq
686  %x3 = icmp ult <8 x i64> %i, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
687  %x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
688  %x6 = trunc <8 x i64> %x5 to <8 x i16>
689  store <8 x i16> %x6, ptr %res, align 1
690  ret void
691}
692
693define <32 x i8> @usat_trunc_db_1024(<32 x i32> %i) {
694; ALL-LABEL: usat_trunc_db_1024:
695; ALL:       ## %bb.0:
696; ALL-NEXT:    vpmovusdb %zmm0, %xmm0
697; ALL-NEXT:    vpmovusdb %zmm1, %xmm1
698; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
699; ALL-NEXT:    retq
700  %x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
701  %x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
702  %x6 = trunc <32 x i32> %x5 to <32 x i8>
703  ret <32 x i8> %x6
704}
705
706define void @usat_trunc_db_1024_mem(<32 x i32> %i, ptr %p) {
707; ALL-LABEL: usat_trunc_db_1024_mem:
708; ALL:       ## %bb.0:
709; ALL-NEXT:    vpmovusdb %zmm1, 16(%rdi)
710; ALL-NEXT:    vpmovusdb %zmm0, (%rdi)
711; ALL-NEXT:    vzeroupper
712; ALL-NEXT:    retq
713  %x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
714  %x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
715  %x6 = trunc <32 x i32> %x5 to <32 x i8>
716  store <32 x i8>%x6, ptr %p, align 1
717  ret void
718}
719
720define <16 x i16> @usat_trunc_dw_512(<16 x i32> %i) {
721; ALL-LABEL: usat_trunc_dw_512:
722; ALL:       ## %bb.0:
723; ALL-NEXT:    vpmovusdw %zmm0, %ymm0
724; ALL-NEXT:    retq
725  %x3 = icmp ult <16 x i32> %i, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
726  %x5 = select <16 x i1> %x3, <16 x i32> %i, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
727  %x6 = trunc <16 x i32> %x5 to <16 x i16>
728  ret <16 x i16> %x6
729}
730
731define <8 x i8> @usat_trunc_wb_128(<8 x i16> %i) {
732; KNL-LABEL: usat_trunc_wb_128:
733; KNL:       ## %bb.0:
734; KNL-NEXT:    vpminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
735; KNL-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
736; KNL-NEXT:    retq
737;
738; SKX-LABEL: usat_trunc_wb_128:
739; SKX:       ## %bb.0:
740; SKX-NEXT:    vpmovuswb %xmm0, %xmm0
741; SKX-NEXT:    retq
742  %x3 = icmp ult <8 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
743  %x5 = select <8 x i1> %x3, <8 x i16> %i, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
744  %x6 = trunc <8 x i16> %x5 to <8 x i8>
745  ret <8 x i8>%x6
746}
747
748define <16 x i16> @usat_trunc_qw_1024(<16 x i64> %i) {
749; ALL-LABEL: usat_trunc_qw_1024:
750; ALL:       ## %bb.0:
751; ALL-NEXT:    vpmovusqw %zmm0, %xmm0
752; ALL-NEXT:    vpmovusqw %zmm1, %xmm1
753; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
754; ALL-NEXT:    retq
755  %x3 = icmp ult <16 x i64> %i, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
756  %x5 = select <16 x i1> %x3, <16 x i64> %i, <16 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
757  %x6 = trunc <16 x i64> %x5 to <16 x i16>
758  ret <16 x i16> %x6
759}
760
761define <16 x i8> @usat_trunc_db_256(<8 x i32> %x) {
762; KNL-LABEL: usat_trunc_db_256:
763; KNL:       ## %bb.0:
764; KNL-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
765; KNL-NEXT:    vpmovusdb %zmm0, %xmm0
766; KNL-NEXT:    vzeroupper
767; KNL-NEXT:    retq
768;
769; SKX-LABEL: usat_trunc_db_256:
770; SKX:       ## %bb.0:
771; SKX-NEXT:    vpmovusdb %ymm0, %xmm0
772; SKX-NEXT:    vzeroupper
773; SKX-NEXT:    retq
774  %tmp1 = icmp ult <8 x i32> %x, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
775  %tmp2 = select <8 x i1> %tmp1, <8 x i32> %x, <8 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
776  %tmp3 = trunc <8 x i32> %tmp2 to <8 x i8>
777  %tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
778  ret <16 x i8> %tmp4
779}
780
781
782
783; Tests for the following unsigned saturation pattern:
784
785; %a = icmp sgt %x, C1
786; %b = select %a, %x, C2
787; %c = icmp slt %b, C2
788; %d = select %c, %b, C2
789; %res = trunc %d
790
791
792define void @smax_usat_trunc_wb_256_mem1(<16 x i16> %i, ptr %res) {
793; KNL-LABEL: smax_usat_trunc_wb_256_mem1:
794; KNL:       ## %bb.0:
795; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
796; KNL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
797; KNL-NEXT:    vmovdqu %xmm0, (%rdi)
798; KNL-NEXT:    vzeroupper
799; KNL-NEXT:    retq
800;
801; SKX-LABEL: smax_usat_trunc_wb_256_mem1:
802; SKX:       ## %bb.0:
803; SKX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
804; SKX-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
805; SKX-NEXT:    vpmovuswb %ymm0, (%rdi)
806; SKX-NEXT:    vzeroupper
807; SKX-NEXT:    retq
808  %x1 = icmp sgt <16 x i16> %i, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
809  %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
810  %x3 = icmp slt <16 x i16> %x2, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
811  %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
812  %x6 = trunc <16 x i16> %x5 to <16 x i8>
813  store <16 x i8> %x6, ptr %res, align 1
814  ret void
815}
816
817; Test for smax(smin(x, C2), C1).
818define void @smax_usat_trunc_wb_256_mem2(<16 x i16> %i, ptr %res) {
819; KNL-LABEL: smax_usat_trunc_wb_256_mem2:
820; KNL:       ## %bb.0:
821; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
822; KNL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
823; KNL-NEXT:    vmovdqu %xmm0, (%rdi)
824; KNL-NEXT:    vzeroupper
825; KNL-NEXT:    retq
826;
827; SKX-LABEL: smax_usat_trunc_wb_256_mem2:
828; SKX:       ## %bb.0:
829; SKX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
830; SKX-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
831; SKX-NEXT:    vpmovuswb %ymm0, (%rdi)
832; SKX-NEXT:    vzeroupper
833; SKX-NEXT:    retq
834  %x1 = icmp slt <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
835  %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
836  %x3 = icmp sgt <16 x i16> %x2, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
837  %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
838  %x6 = trunc <16 x i16> %x5 to <16 x i8>
839  store <16 x i8> %x6, ptr %res, align 1
840  ret void
841}
842
843define <16 x i8> @smax_usat_trunc_wb_256(<16 x i16> %i) {
844; KNL-LABEL: smax_usat_trunc_wb_256:
845; KNL:       ## %bb.0:
846; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
847; KNL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
848; KNL-NEXT:    vzeroupper
849; KNL-NEXT:    retq
850;
851; SKX-LABEL: smax_usat_trunc_wb_256:
852; SKX:       ## %bb.0:
853; SKX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
854; SKX-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
855; SKX-NEXT:    vpmovuswb %ymm0, %xmm0
856; SKX-NEXT:    vzeroupper
857; SKX-NEXT:    retq
858  %x1 = icmp sgt <16 x i16> %i, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
859  %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
860  %x3 = icmp slt <16 x i16> %x2, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
861  %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
862  %x6 = trunc <16 x i16> %x5 to <16 x i8>
863  ret <16 x i8> %x6
864  }
865
866define void @smax_usat_trunc_wb_128_mem(<8 x i16> %i, ptr %res) {
867; KNL-LABEL: smax_usat_trunc_wb_128_mem:
868; KNL:       ## %bb.0:
869; KNL-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
870; KNL-NEXT:    vmovq %xmm0, (%rdi)
871; KNL-NEXT:    retq
872;
873; SKX-LABEL: smax_usat_trunc_wb_128_mem:
874; SKX:       ## %bb.0:
875; SKX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
876; SKX-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
877; SKX-NEXT:    vpmovuswb %xmm0, (%rdi)
878; SKX-NEXT:    retq
879  %x1 = icmp sgt <8 x i16> %i, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
880  %x2 = select <8 x i1> %x1, <8 x i16> %i, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
881  %x3 = icmp slt <8 x i16> %x2, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
882  %x5 = select <8 x i1> %x3, <8 x i16> %x2, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
883  %x6 = trunc <8 x i16> %x5 to <8 x i8>
884  store <8 x i8> %x6, ptr %res, align 1
885  ret void
886}
887
888define void @smax_usat_trunc_db_512_mem(<16 x i32> %i, ptr %res) {
889; ALL-LABEL: smax_usat_trunc_db_512_mem:
890; ALL:       ## %bb.0:
891; ALL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
892; ALL-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
893; ALL-NEXT:    vpmovusdb %zmm0, (%rdi)
894; ALL-NEXT:    vzeroupper
895; ALL-NEXT:    retq
896  %x1 = icmp sgt <16 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
897  %x2 = select <16 x i1> %x1, <16 x i32> %i, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
898  %x3 = icmp slt <16 x i32> %x2, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
899  %x5 = select <16 x i1> %x3, <16 x i32> %x2, <16 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
900  %x6 = trunc <16 x i32> %x5 to <16 x i8>
901  store <16 x i8> %x6, ptr %res, align 1
902  ret void
903}
904
905define void @smax_usat_trunc_qb_512_mem(<8 x i64> %i, ptr %res) {
906; ALL-LABEL: smax_usat_trunc_qb_512_mem:
907; ALL:       ## %bb.0:
908; ALL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
909; ALL-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
910; ALL-NEXT:    vpmovusqb %zmm0, (%rdi)
911; ALL-NEXT:    vzeroupper
912; ALL-NEXT:    retq
913  %x1 = icmp sgt <8 x i64> %i, <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
914  %x2 = select <8 x i1> %x1, <8 x i64> %i, <8 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
915  %x3 = icmp slt <8 x i64> %x2, <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
916  %x5 = select <8 x i1> %x3, <8 x i64> %x2, <8 x i64> <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
917  %x6 = trunc <8 x i64> %x5 to <8 x i8>
918  store <8 x i8> %x6, ptr %res, align 1
919  ret void
920}
921
922define void @smax_usat_trunc_qd_512_mem(<8 x i64> %i, ptr %res) {
923; ALL-LABEL: smax_usat_trunc_qd_512_mem:
924; ALL:       ## %bb.0:
925; ALL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
926; ALL-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
927; ALL-NEXT:    vpmovusqd %zmm0, (%rdi)
928; ALL-NEXT:    vzeroupper
929; ALL-NEXT:    retq
930  %x1 = icmp sgt <8 x i64> %i, <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
931  %x2 = select <8 x i1> %x1, <8 x i64> %i, <8 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
932  %x3 = icmp slt <8 x i64> %x2, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
933  %x5 = select <8 x i1> %x3, <8 x i64> %x2, <8 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
934  %x6 = trunc <8 x i64> %x5 to <8 x i32>
935  store <8 x i32> %x6, ptr %res, align 1
936  ret void
937}
938
939define void @smax_usat_trunc_qw_512_mem(<8 x i64> %i, ptr %res) {
940; ALL-LABEL: smax_usat_trunc_qw_512_mem:
941; ALL:       ## %bb.0:
942; ALL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
943; ALL-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
944; ALL-NEXT:    vpmovusqw %zmm0, (%rdi)
945; ALL-NEXT:    vzeroupper
946; ALL-NEXT:    retq
947  %x1 = icmp sgt <8 x i64> %i, <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
948  %x2 = select <8 x i1> %x1, <8 x i64> %i, <8 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
949  %x3 = icmp slt <8 x i64> %x2, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
950  %x5 = select <8 x i1> %x3, <8 x i64> %x2, <8 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
951  %x6 = trunc <8 x i64> %x5 to <8 x i16>
952  store <8 x i16> %x6, ptr %res, align 1
953  ret void
954}
955
956define <32 x i8> @smax_usat_trunc_db_1024(<32 x i32> %i) {
957; ALL-LABEL: smax_usat_trunc_db_1024:
958; ALL:       ## %bb.0:
959; ALL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
960; ALL-NEXT:    vpmaxsd %zmm2, %zmm1, %zmm1
961; ALL-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm0
962; ALL-NEXT:    vpmovusdb %zmm0, %xmm0
963; ALL-NEXT:    vpmovusdb %zmm1, %xmm1
964; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
965; ALL-NEXT:    retq
966  %x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
967  %x2 = select <32 x i1> %x1, <32 x i32> %i, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
968  %x3 = icmp slt <32 x i32> %x2, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
969  %x5 = select <32 x i1> %x3, <32 x i32> %x2, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
970  %x6 = trunc <32 x i32> %x5 to <32 x i8>
971  ret <32 x i8> %x6
972}
973
974define void @smax_usat_trunc_db_1024_mem(<32 x i32> %i, ptr %p) {
975; ALL-LABEL: smax_usat_trunc_db_1024_mem:
976; ALL:       ## %bb.0:
977; ALL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
978; ALL-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm0
979; ALL-NEXT:    vpmaxsd %zmm2, %zmm1, %zmm1
980; ALL-NEXT:    vpmovusdb %zmm1, 16(%rdi)
981; ALL-NEXT:    vpmovusdb %zmm0, (%rdi)
982; ALL-NEXT:    vzeroupper
983; ALL-NEXT:    retq
984  %x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
985  %x2 = select <32 x i1> %x1, <32 x i32> %i, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
986  %x3 = icmp slt <32 x i32> %x2, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
987  %x5 = select <32 x i1> %x3, <32 x i32> %x2, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
988  %x6 = trunc <32 x i32> %x5 to <32 x i8>
989  store <32 x i8>%x6, ptr %p, align 1
990  ret void
991}
992
993define <16 x i16> @smax_usat_trunc_dw_512(<16 x i32> %i) {
994; ALL-LABEL: smax_usat_trunc_dw_512:
995; ALL:       ## %bb.0:
996; ALL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
997; ALL-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
998; ALL-NEXT:    vpmovusdw %zmm0, %ymm0
999; ALL-NEXT:    retq
1000  %x1 = icmp sgt <16 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1001  %x2 = select <16 x i1> %x1, <16 x i32> %i, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1002  %x3 = icmp slt <16 x i32> %x2, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
1003  %x5 = select <16 x i1> %x3, <16 x i32> %x2, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
1004  %x6 = trunc <16 x i32> %x5 to <16 x i16>
1005  ret <16 x i16> %x6
1006}
1007
1008define void @negative_test1_smax_usat_trunc_wb_256_mem(<16 x i16> %i, ptr %res) {
1009; KNL-LABEL: negative_test1_smax_usat_trunc_wb_256_mem:
1010; KNL:       ## %bb.0:
1011; KNL-NEXT:    vpminsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1012; KNL-NEXT:    vpmaxsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1013; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1014; KNL-NEXT:    vpmovdb %zmm0, (%rdi)
1015; KNL-NEXT:    vzeroupper
1016; KNL-NEXT:    retq
1017;
1018; SKX-LABEL: negative_test1_smax_usat_trunc_wb_256_mem:
1019; SKX:       ## %bb.0:
1020; SKX-NEXT:    vpminsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1021; SKX-NEXT:    vpmaxsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1022; SKX-NEXT:    vpmovwb %ymm0, (%rdi)
1023; SKX-NEXT:    vzeroupper
1024; SKX-NEXT:    retq
1025  %x1 = icmp slt <16 x i16> %i, <i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5>
1026  %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5>
1027  %x3 = icmp sgt <16 x i16> %x2, <i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10>
1028  %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10>
1029  %x6 = trunc <16 x i16> %x5 to <16 x i8>
1030  store <16 x i8> %x6, ptr %res, align 1
1031  ret void
1032}
1033
1034define void @negative_test2_smax_usat_trunc_wb_256_mem(<16 x i16> %i, ptr %res) {
1035; KNL-LABEL: negative_test2_smax_usat_trunc_wb_256_mem:
1036; KNL:       ## %bb.0:
1037; KNL-NEXT:    vpmaxsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1038; KNL-NEXT:    vpminsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1039; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1040; KNL-NEXT:    vpmovdb %zmm0, (%rdi)
1041; KNL-NEXT:    vzeroupper
1042; KNL-NEXT:    retq
1043;
1044; SKX-LABEL: negative_test2_smax_usat_trunc_wb_256_mem:
1045; SKX:       ## %bb.0:
1046; SKX-NEXT:    vpmaxsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1047; SKX-NEXT:    vpminsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1048; SKX-NEXT:    vpmovwb %ymm0, (%rdi)
1049; SKX-NEXT:    vzeroupper
1050; SKX-NEXT:    retq
1051  %x1 = icmp sgt <16 x i16> %i, <i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10>
1052  %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10>
1053  %x3 = icmp slt <16 x i16> %x2, <i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5>
1054  %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5>
1055  %x6 = trunc <16 x i16> %x5 to <16 x i8>
1056  store <16 x i8> %x6, ptr %res, align 1
1057  ret void
1058}
1059
1060define void @ssat_trunc_db_1024_mem(<32 x i32> %i, ptr %p) {
1061; ALL-LABEL: ssat_trunc_db_1024_mem:
1062; ALL:       ## %bb.0:
1063; ALL-NEXT:    vpmovsdb %zmm1, 16(%rdi)
1064; ALL-NEXT:    vpmovsdb %zmm0, (%rdi)
1065; ALL-NEXT:    vzeroupper
1066; ALL-NEXT:    retq
1067  %x1 = icmp sgt <32 x i32> %i, <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32
1068-128, i32 -128>
1069  %x2 = select <32 x i1> %x1, <32 x i32> %i, <32 x i32> <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
1070  %x3 = icmp slt <32 x i32> %x2, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
1071  %x5 = select <32 x i1> %x3, <32 x i32> %x2, <32 x i32> <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
1072  %x6 = trunc <32 x i32> %x5 to <32 x i8>
1073  store <32 x i8>%x6, ptr %p, align 1
1074  ret void
1075}
1076
1077