xref: /llvm-project/llvm/test/Transforms/InstCombine/X86/x86-pack.ll (revision 38fffa630ee80163dc65e759392ad29798905679)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s
3
4;
5; UNDEF Elts
6;
7
8define <8 x i16> @undef_packssdw_128() {
9; CHECK-LABEL: @undef_packssdw_128(
10; CHECK-NEXT:    ret <8 x i16> undef
11;
12  %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> undef, <4 x i32> undef)
13  ret <8 x i16> %1
14}
15
16define <8 x i16> @undef_packusdw_128() {
17; CHECK-LABEL: @undef_packusdw_128(
18; CHECK-NEXT:    ret <8 x i16> undef
19;
20  %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> undef, <4 x i32> undef)
21  ret <8 x i16> %1
22}
23
24define <16 x i8> @undef_packsswb_128() {
25; CHECK-LABEL: @undef_packsswb_128(
26; CHECK-NEXT:    ret <16 x i8> undef
27;
28  %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> undef, <8 x i16> undef)
29  ret <16 x i8> %1
30}
31
32define <16 x i8> @undef_packuswb_128() {
33; CHECK-LABEL: @undef_packuswb_128(
34; CHECK-NEXT:    ret <16 x i8> undef
35;
36  %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> undef, <8 x i16> undef)
37  ret <16 x i8> %1
38}
39
40define <16 x i16> @undef_packssdw_256() {
41; CHECK-LABEL: @undef_packssdw_256(
42; CHECK-NEXT:    ret <16 x i16> undef
43;
44  %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> undef, <8 x i32> undef)
45  ret <16 x i16> %1
46}
47
48define <16 x i16> @undef_packusdw_256() {
49; CHECK-LABEL: @undef_packusdw_256(
50; CHECK-NEXT:    ret <16 x i16> undef
51;
52  %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> undef, <8 x i32> undef)
53  ret <16 x i16> %1
54}
55
56define <32 x i8> @undef_packsswb_256() {
57; CHECK-LABEL: @undef_packsswb_256(
58; CHECK-NEXT:    ret <32 x i8> undef
59;
60  %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> undef, <16 x i16> undef)
61  ret <32 x i8> %1
62}
63
64define <32 x i8> @undef_packuswb_256() {
65; CHECK-LABEL: @undef_packuswb_256(
66; CHECK-NEXT:    ret <32 x i8> undef
67;
68  %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> undef, <16 x i16> undef)
69  ret <32 x i8> %1
70}
71
72define <32 x i16> @undef_packssdw_512() {
73; CHECK-LABEL: @undef_packssdw_512(
74; CHECK-NEXT:    ret <32 x i16> undef
75;
76  %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> undef, <16 x i32> undef)
77  ret <32 x i16> %1
78}
79
80define <32 x i16> @undef_packusdw_512() {
81; CHECK-LABEL: @undef_packusdw_512(
82; CHECK-NEXT:    ret <32 x i16> undef
83;
84  %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> undef, <16 x i32> undef)
85  ret <32 x i16> %1
86}
87
88define <64 x i8> @undef_packsswb_512() {
89; CHECK-LABEL: @undef_packsswb_512(
90; CHECK-NEXT:    ret <64 x i8> undef
91;
92  %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> undef, <32 x i16> undef)
93  ret <64 x i8> %1
94}
95
96define <64 x i8> @undef_packuswb_512() {
97; CHECK-LABEL: @undef_packuswb_512(
98; CHECK-NEXT:    ret <64 x i8> undef
99;
100  %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> undef, <32 x i16> undef)
101  ret <64 x i8> %1
102}
103
104;
105; Constant Folding
106;
107
108define <8 x i16> @fold_packssdw_128() {
109; CHECK-LABEL: @fold_packssdw_128(
110; CHECK-NEXT:    ret <8 x i16> <i16 0, i16 -1, i16 32767, i16 -32768, i16 0, i16 0, i16 0, i16 0>
111;
112  %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> <i32 0, i32 -1, i32 65536, i32 -131072>, <4 x i32> zeroinitializer)
113  ret <8 x i16> %1
114}
115
116define <8 x i16> @fold_packusdw_128() {
117; CHECK-LABEL: @fold_packusdw_128(
118; CHECK-NEXT:    ret <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 0, i16 0, i16 -32768, i16 -1>
119;
120  %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> undef, <4 x i32> <i32 0, i32 -1, i32 32768, i32 65537>)
121  ret <8 x i16> %1
122}
123
124define <16 x i8> @fold_packsswb_128() {
125; CHECK-LABEL: @fold_packsswb_128(
126; CHECK-NEXT:    ret <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>
127;
128  %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> zeroinitializer, <8 x i16> undef)
129  ret <16 x i8> %1
130}
131
132define <16 x i8> @fold_packuswb_128() {
133; CHECK-LABEL: @fold_packuswb_128(
134; CHECK-NEXT:    ret <16 x i8> <i8 0, i8 1, i8 0, i8 -1, i8 0, i8 0, i8 0, i8 15, i8 0, i8 127, i8 0, i8 1, i8 0, i8 1, i8 0, i8 0>
135;
136  %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> <i16 0, i16 1, i16 -1, i16 255, i16 65535, i16 -32768, i16 -127, i16 15>, <8 x i16> <i16 -15, i16 127, i16 32768, i16 -65535, i16 -255, i16 1, i16 -1, i16 0>)
137  ret <16 x i8> %1
138}
139
140define <16 x i16> @fold_packssdw_256() {
141; CHECK-LABEL: @fold_packssdw_256(
142; CHECK-NEXT:    ret <16 x i16> <i16 0, i16 256, i16 32767, i16 -32768, i16 undef, i16 undef, i16 undef, i16 undef, i16 -127, i16 -32768, i16 -32767, i16 32767, i16 undef, i16 undef, i16 undef, i16 undef>
143;
144  %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> <i32 0, i32 256, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767>, <8 x i32> undef)
145  ret <16 x i16> %1
146}
147
148define <16 x i16> @fold_packusdw_256() {
149; CHECK-LABEL: @fold_packusdw_256(
150; CHECK-NEXT:    ret <16 x i16> <i16 0, i16 0, i16 0, i16 -1, i16 0, i16 256, i16 -1, i16 0, i16 127, i16 -32768, i16 32767, i16 0, i16 0, i16 0, i16 0, i16 32767>
151;
152  %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> <i32 0, i32 -256, i32 -65535, i32 65536, i32 127, i32 32768, i32 32767, i32 -32767>, <8 x i32> <i32 0, i32 256, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767>)
153  ret <16 x i16> %1
154}
155
156define <32 x i8> @fold_packsswb_256() {
157; CHECK-LABEL: @fold_packsswb_256(
158; CHECK-NEXT:    ret <32 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
159;
160  %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> undef, <16 x i16> zeroinitializer)
161  ret <32 x i8> %1
162}
163
164define <32 x i8> @fold_packuswb_256() {
165; CHECK-LABEL: @fold_packuswb_256(
166; CHECK-NEXT:    ret <32 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64>
167;
168  %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> zeroinitializer, <16 x i16> <i16 0, i16 -127, i16 -128, i16 -32768, i16 65536, i16 255, i16 256, i16 512, i16 -1, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64>)
169  ret <32 x i8> %1
170}
171
172define <32 x i16> @fold_packssdw_512() {
173; CHECK-LABEL: @fold_packssdw_512(
174; CHECK-NEXT:    ret <32 x i16> <i16 0, i16 512, i16 32767, i16 -32768, i16 undef, i16 undef, i16 undef, i16 undef, i16 -127, i16 -32768, i16 -32767, i16 32767, i16 undef, i16 undef, i16 undef, i16 undef, i16 0, i16 512, i16 32767, i16 -32768, i16 undef, i16 undef, i16 undef, i16 undef, i16 -127, i16 -32768, i16 -32767, i16 32767, i16 undef, i16 undef, i16 undef, i16 undef>
175;
176  %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> <i32 0, i32 512, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767, i32 0, i32 512, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767>, <16 x i32> undef)
177  ret <32 x i16> %1
178}
179
180define <32 x i16> @fold_packusdw_512() {
181; CHECK-LABEL: @fold_packusdw_512(
182; CHECK-NEXT:    ret <32 x i16> <i16 0, i16 0, i16 0, i16 -1, i16 0, i16 512, i16 -1, i16 0, i16 127, i16 -32768, i16 32767, i16 0, i16 0, i16 0, i16 0, i16 32767, i16 0, i16 0, i16 0, i16 -1, i16 0, i16 512, i16 -1, i16 0, i16 127, i16 -32768, i16 32767, i16 0, i16 0, i16 0, i16 0, i16 32767>
183;
184  %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> <i32 0, i32 -512, i32 -65535, i32 65536, i32 127, i32 32768, i32 32767, i32 -32767, i32 0, i32 -512, i32 -65535, i32 65536, i32 127, i32 32768, i32 32767, i32 -32767>, <16 x i32> <i32 0, i32 512, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767, i32 0, i32 512, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767>)
185  ret <32 x i16> %1
186}
187
188define <64 x i8> @fold_packsswb_512() {
189; CHECK-LABEL: @fold_packsswb_512(
190; CHECK-NEXT:    ret <64 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
191;
192  %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> undef, <32 x i16> zeroinitializer)
193  ret <64 x i8> %1
194}
195
196define <64 x i8> @fold_packuswb_512() {
197; CHECK-LABEL: @fold_packuswb_512(
198; CHECK-NEXT:    ret <64 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64>
199;
200  %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> zeroinitializer, <32 x i16> <i16 0, i16 -127, i16 -128, i16 -32768, i16 65536, i16 255, i16 512, i16 512, i16 -1, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 0, i16 -127, i16 -128, i16 -32768, i16 65536, i16 255, i16 512, i16 512, i16 -1, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64>)
201  ret <64 x i8> %1
202}
203
204;
205; Demanded Elts
206;
207
208define <8 x i16> @elts_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
209; CHECK-LABEL: @elts_packssdw_128(
210; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[A0:%.*]], <4 x i32> poison)
211; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 poison, i32 poison, i32 poison, i32 poison>
212; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
213;
214  %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 3, i32 1, i32 undef, i32 undef>
215  %2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 undef>
216  %3 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %1, <4 x i32> %2)
217  %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 7, i32 7, i32 7, i32 7>
218  ret <8 x i16> %4
219}
220
221define <8 x i16> @elts_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) {
222; CHECK-LABEL: @elts_packusdw_128(
223; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]])
224; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 poison, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 poison>
225; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
226;
227  %1 = insertelement <4 x i32> %a0, i32 0, i32 0
228  %2 = insertelement <4 x i32> %a1, i32 0, i32 3
229  %3 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %1, <4 x i32> %2)
230  %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 undef>
231  ret <8 x i16> %4
232}
233
234define <16 x i8> @elts_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
235; CHECK-LABEL: @elts_packsswb_128(
236; CHECK-NEXT:    ret <16 x i8> zeroinitializer
237;
238  %1 = insertelement <8 x i16> %a0, i16 0, i32 0
239  %2 = insertelement <8 x i16> %a1, i16 0, i32 0
240  %3 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %1, <8 x i16> %2)
241  %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
242  ret <16 x i8> %4
243}
244
245define <16 x i8> @elts_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
246; CHECK-LABEL: @elts_packuswb_128(
247; CHECK-NEXT:    ret <16 x i8> undef
248;
249  %1 = insertelement <8 x i16> undef, i16 0, i32 0
250  %2 = insertelement <8 x i16> undef, i16 0, i32 0
251  %3 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %1, <8 x i16> %2)
252  %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
253  ret <16 x i8> %4
254}
255
256define <16 x i16> @elts_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) {
257; CHECK-LABEL: @elts_packssdw_256(
258; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[A0:%.*]], <8 x i32> poison)
259; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> poison, <16 x i32> <i32 poison, i32 poison, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 8, i32 poison, i32 poison, i32 11, i32 poison, i32 poison, i32 poison, i32 poison>
260; CHECK-NEXT:    ret <16 x i16> [[TMP2]]
261;
262  %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
263  %2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 undef, i32 2, i32 1, i32 undef, i32 undef, i32 6, i32 5, i32 undef>
264  %3 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %1, <8 x i32> %2)
265  %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 undef, i32 undef, i32 2, i32 3, i32 4, i32 undef, i32 undef, i32 7, i32 8, i32 undef, i32 undef, i32 11, i32 12, i32 undef, i32 undef, i32 15>
266  ret <16 x i16> %4
267}
268
269define <16 x i16> @elts_packusdw_256(<8 x i32> %a0, <8 x i32> %a1) {
270; CHECK-LABEL: @elts_packusdw_256(
271; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A1:%.*]], <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
272; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> poison, <8 x i32> [[TMP1]])
273; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison>
274; CHECK-NEXT:    ret <16 x i16> [[TMP3]]
275;
276  %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
277  %2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
278  %3 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %1, <8 x i32> %2)
279  %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef>
280  ret <16 x i16> %4
281}
282
283define <32 x i8> @elts_packsswb_256(<16 x i16> %a0, <16 x i16> %a1) {
284; CHECK-LABEL: @elts_packsswb_256(
285; CHECK-NEXT:    ret <32 x i8> zeroinitializer
286;
287  %1 = insertelement <16 x i16> %a0, i16 0, i32 0
288  %2 = insertelement <16 x i16> %a1, i16 0, i32 8
289  %3 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %1, <16 x i16> %2)
290  %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
291  ret <32 x i8> %4
292}
293
294define <32 x i8> @elts_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) {
295; CHECK-LABEL: @elts_packuswb_256(
296; CHECK-NEXT:    ret <32 x i8> undef
297;
298  %1 = insertelement <16 x i16> undef, i16 0, i32 1
299  %2 = insertelement <16 x i16> undef, i16 0, i32 0
300  %3 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %1, <16 x i16> %2)
301  %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> zeroinitializer
302  ret <32 x i8> %4
303}
304
305define <32 x i16> @elts_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) {
306; CHECK-LABEL: @elts_packssdw_512(
307; CHECK-NEXT:    [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A0:%.*]], <16 x i32> poison)
308; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <32 x i16> [[TMP1]], <32 x i16> poison, <32 x i32> <i32 poison, i32 poison, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 8, i32 poison, i32 poison, i32 11, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison, i32 24, i32 poison, i32 poison, i32 27, i32 poison, i32 poison, i32 poison, i32 poison>
309; CHECK-NEXT:    ret <32 x i16> [[TMP2]]
310;
311  %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 9, i32 8, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
312  %2 = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> <i32 undef, i32 2, i32 1, i32 undef, i32 undef, i32 6, i32 5, i32 undef, i32 undef, i32 10, i32 9, i32 undef, i32 undef, i32 14, i32 13, i32 undef>
313  %3 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %1, <16 x i32> %2)
314  %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 undef, i32 undef, i32 2, i32 3, i32 4, i32 undef, i32 undef, i32 7, i32 8, i32 undef, i32 undef, i32 11, i32 12, i32 undef, i32 undef, i32 15, i32 undef, i32 undef, i32 18, i32 19, i32 20, i32 undef, i32 undef, i32 23, i32 24, i32 undef, i32 undef, i32 27, i32 28, i32 undef, i32 undef, i32 31>
315  ret <32 x i16> %4
316}
317
318define <32 x i16> @elts_packusdw_512(<16 x i32> %a0, <16 x i32> %a1) {
319; CHECK-LABEL: @elts_packusdw_512(
320; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[A1:%.*]], <16 x i32> poison, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
321; CHECK-NEXT:    [[TMP2:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> poison, <16 x i32> [[TMP1]])
322; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <32 x i16> [[TMP2]], <32 x i16> poison, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 20, i32 21, i32 22, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 28, i32 29, i32 30, i32 31, i32 poison, i32 poison, i32 poison, i32 poison>
323; CHECK-NEXT:    ret <32 x i16> [[TMP3]]
324;
325  %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
326  %2 = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
327  %3 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %1, <16 x i32> %2)
328  %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef>
329  ret <32 x i16> %4
330}
331
332define <64 x i8> @elts_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) {
333; CHECK-LABEL: @elts_packsswb_512(
334; CHECK-NEXT:    ret <64 x i8> zeroinitializer
335;
336  %1 = insertelement <32 x i16> %a0, i16 0, i32 0
337  %2 = insertelement <32 x i16> %a1, i16 0, i32 8
338  %3 = insertelement <32 x i16> %1, i16 0, i32 16
339  %4 = insertelement <32 x i16> %2, i16 0, i32 24
340  %5 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %3, <32 x i16> %4)
341  %6 = shufflevector <64 x i8> %5, <64 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56>
342  ret <64 x i8> %6
343}
344
345define <64 x i8> @elts_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) {
346; CHECK-LABEL: @elts_packuswb_512(
347; CHECK-NEXT:    ret <64 x i8> undef
348;
349  %1 = insertelement <32 x i16> undef, i16 0, i32 1
350  %2 = insertelement <32 x i16> undef, i16 0, i32 0
351  %3 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %1, <32 x i16> %2)
352  %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> zeroinitializer
353  ret <64 x i8> %4
354}
355
356;
357; Truncation (without Saturation)
358;
359
360define <8 x i16> @trunc_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
361; CHECK-LABEL: @trunc_packssdw_128(
362; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[A0:%.*]], splat (i32 17)
363; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], splat (i32 15)
364; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
365; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
366;
367  %1 = ashr <4 x i32> %a0, <i32 17, i32 17, i32 17, i32 17>
368  %2 = and  <4 x i32> %a1, <i32 15, i32 15, i32 15, i32 15>
369  %3 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %1, <4 x i32> %2)
370  ret <8 x i16> %3
371}
372
373define <8 x i16> @trunc_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) {
374; CHECK-LABEL: @trunc_packusdw_128(
375; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], splat (i32 17)
376; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], splat (i32 15)
377; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
378; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
379;
380  %1 = lshr <4 x i32> %a0, <i32 17, i32 17, i32 17, i32 17>
381  %2 = and  <4 x i32> %a1, <i32 15, i32 15, i32 15, i32 15>
382  %3 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %1, <4 x i32> %2)
383  ret <8 x i16> %3
384}
385
386define <16 x i8> @trunc_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
387; CHECK-LABEL: @trunc_packsswb_128(
388; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], splat (i16 15)
389; CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], splat (i16 1)
390; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
391; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
392;
393  %1 = ashr <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
394  %2 = and  <8 x i16> %a1, <i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1>
395  %3 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %1, <8 x i16> %2)
396  ret <16 x i8> %3
397}
398
399define <16 x i8> @trunc_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
400; CHECK-LABEL: @trunc_packuswb_128(
401; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> [[A0:%.*]], splat (i16 15)
402; CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], splat (i16 1)
403; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
404; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
405;
406  %1 = lshr <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
407  %2 = and  <8 x i16> %a1, <i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1>
408  %3 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %1, <8 x i16> %2)
409  ret <16 x i8> %3
410}
411
412define <16 x i16> @trunc_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) {
413; CHECK-LABEL: @trunc_packssdw_256(
414; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[A0:%.*]], splat (i32 17)
415; CHECK-NEXT:    [[TMP2:%.*]] = ashr <8 x i32> [[A1:%.*]], splat (i32 23)
416; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]])
417; CHECK-NEXT:    ret <16 x i16> [[TMP3]]
418;
419  %1 = ashr <8 x i32> %a0, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
420  %2 = ashr <8 x i32> %a1, <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>
421  %3 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %1, <8 x i32> %2)
422  ret <16 x i16> %3
423}
424
425define <16 x i16> @trunc_packusdw_256(<8 x i32> %a0, <8 x i32> %a1) {
426; CHECK-LABEL: @trunc_packusdw_256(
427; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> [[A0:%.*]], splat (i32 17)
428; CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i32> [[A1:%.*]], splat (i32 15)
429; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]])
430; CHECK-NEXT:    ret <16 x i16> [[TMP3]]
431;
432  %1 = lshr <8 x i32> %a0, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
433  %2 = and  <8 x i32> %a1, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
434  %3 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %1, <8 x i32> %2)
435  ret <16 x i16> %3
436}
437
438define <32 x i8> @trunc_packsswb_256(<16 x i16> %a0, <16 x i16> %a1) {
439; CHECK-LABEL: @trunc_packsswb_256(
440; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], splat (i16 15)
441; CHECK-NEXT:    [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], splat (i16 1)
442; CHECK-NEXT:    [[TMP3:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
443; CHECK-NEXT:    ret <32 x i8> [[TMP3]]
444;
445  %1 = ashr <16 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
446  %2 = and  <16 x i16> %a1, <i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1>
447  %3 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %1, <16 x i16> %2)
448  ret <32 x i8> %3
449}
450
451define <32 x i8> @trunc_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) {
452; CHECK-LABEL: @trunc_packuswb_256(
453; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> [[A0:%.*]], splat (i16 15)
454; CHECK-NEXT:    [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], splat (i16 1)
455; CHECK-NEXT:    [[TMP3:%.*]] = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
456; CHECK-NEXT:    ret <32 x i8> [[TMP3]]
457;
458  %1 = lshr <16 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
459  %2 = and  <16 x i16> %a1, <i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1>
460  %3 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %1, <16 x i16> %2)
461  ret <32 x i8> %3
462}
463
464define <32 x i16> @trunc_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) {
465; CHECK-LABEL: @trunc_packssdw_512(
466; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[A0:%.*]], splat (i32 17)
467; CHECK-NEXT:    [[TMP2:%.*]] = ashr <16 x i32> [[A1:%.*]], splat (i32 23)
468; CHECK-NEXT:    [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]])
469; CHECK-NEXT:    ret <32 x i16> [[TMP3]]
470;
471  %1 = ashr <16 x i32> %a0, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
472  %2 = ashr <16 x i32> %a1, <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>
473  %3 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %1, <16 x i32> %2)
474  ret <32 x i16> %3
475}
476
477define <32 x i16> @trunc_packusdw_512(<16 x i32> %a0, <16 x i32> %a1) {
478; CHECK-LABEL: @trunc_packusdw_512(
479; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> [[A0:%.*]], splat (i32 17)
480; CHECK-NEXT:    [[TMP2:%.*]] = and <16 x i32> [[A1:%.*]], splat (i32 15)
481; CHECK-NEXT:    [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]])
482; CHECK-NEXT:    ret <32 x i16> [[TMP3]]
483;
484  %1 = lshr <16 x i32> %a0, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
485  %2 = and  <16 x i32> %a1, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
486  %3 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %1, <16 x i32> %2)
487  ret <32 x i16> %3
488}
489
490define <64 x i8> @trunc_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) {
491; CHECK-LABEL: @trunc_packsswb_512(
492; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], splat (i16 15)
493; CHECK-NEXT:    [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], splat (i16 1)
494; CHECK-NEXT:    [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]])
495; CHECK-NEXT:    ret <64 x i8> [[TMP3]]
496;
497  %1 = ashr <32 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
498  %2 = and  <32 x i16> %a1, <i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1>
499  %3 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %1, <32 x i16> %2)
500  ret <64 x i8> %3
501}
502
503define <64 x i8> @trunc_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) {
504; CHECK-LABEL: @trunc_packuswb_512(
505; CHECK-NEXT:    [[TMP1:%.*]] = lshr <32 x i16> [[A0:%.*]], splat (i16 15)
506; CHECK-NEXT:    [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], splat (i16 1)
507; CHECK-NEXT:    [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]])
508; CHECK-NEXT:    ret <64 x i8> [[TMP3]]
509;
510  %1 = lshr <32 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
511  %2 = and  <32 x i16> %a1, <i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1, i16  1>
512  %3 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %1, <32 x i16> %2)
513  ret <64 x i8> %3
514}
515
516;
517; Signed Pack Comparison Results
518;
519
520define <8 x i16> @cmp_packssdw_128(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) {
521; CHECK-LABEL: @cmp_packssdw_128(
522; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <4 x i32> [[A0:%.*]], [[A1:%.*]]
523; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <4 x i32> [[A2:%.*]], [[A3:%.*]]
524; CHECK-NEXT:    [[TMP3:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
525; CHECK-NEXT:    [[TMP4:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
526; CHECK-NEXT:    [[TMP5:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP3]], <4 x i32> [[TMP4]])
527; CHECK-NEXT:    ret <8 x i16> [[TMP5]]
528;
529  %1 = icmp eq <4 x i32> %a0, %a1
530  %2 = icmp eq <4 x i32> %a2, %a3
531  %3 = sext <4 x i1> %1 to <4 x i32>
532  %4 = sext <4 x i1> %2 to <4 x i32>
533  %5 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %3, <4 x i32> %4)
534  ret <8 x i16> %5
535}
536
537define <16 x i8> @cmp_packsswb_128(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2, <8 x i16> %a3) {
538; CHECK-LABEL: @cmp_packsswb_128(
539; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i16> [[A0:%.*]], [[A1:%.*]]
540; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <8 x i16> [[A2:%.*]], [[A3:%.*]]
541; CHECK-NEXT:    [[TMP3:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
542; CHECK-NEXT:    [[TMP4:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16>
543; CHECK-NEXT:    [[TMP5:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> [[TMP3]], <8 x i16> [[TMP4]])
544; CHECK-NEXT:    ret <16 x i8> [[TMP5]]
545;
546  %1 = icmp eq <8 x i16> %a0, %a1
547  %2 = icmp eq <8 x i16> %a2, %a3
548  %3 = sext <8 x i1> %1 to <8 x i16>
549  %4 = sext <8 x i1> %2 to <8 x i16>
550  %5 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %3, <8 x i16> %4)
551  ret <16 x i8> %5
552}
553
554define <16 x i16> @cmp_packssdw_256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2, <8 x i32> %a3) {
555; CHECK-LABEL: @cmp_packssdw_256(
556; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i32> [[A0:%.*]], [[A1:%.*]]
557; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <8 x i32> [[A2:%.*]], [[A3:%.*]]
558; CHECK-NEXT:    [[TMP3:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i32>
559; CHECK-NEXT:    [[TMP4:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i32>
560; CHECK-NEXT:    [[TMP5:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP3]], <8 x i32> [[TMP4]])
561; CHECK-NEXT:    ret <16 x i16> [[TMP5]]
562;
563  %1 = icmp eq <8 x i32> %a0, %a1
564  %2 = icmp eq <8 x i32> %a2, %a3
565  %3 = sext <8 x i1> %1 to <8 x i32>
566  %4 = sext <8 x i1> %2 to <8 x i32>
567  %5 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %3, <8 x i32> %4)
568  ret <16 x i16> %5
569}
570
571define <32 x i8> @cmp_packsswb_256(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> %a2, <16 x i16> %a3) {
572; CHECK-LABEL: @cmp_packsswb_256(
573; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <16 x i16> [[A0:%.*]], [[A1:%.*]]
574; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <16 x i16> [[A2:%.*]], [[A3:%.*]]
575; CHECK-NEXT:    [[TMP3:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i16>
576; CHECK-NEXT:    [[TMP4:%.*]] = sext <16 x i1> [[TMP2]] to <16 x i16>
577; CHECK-NEXT:    [[TMP5:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> [[TMP3]], <16 x i16> [[TMP4]])
578; CHECK-NEXT:    ret <32 x i8> [[TMP5]]
579;
580  %1 = icmp eq <16 x i16> %a0, %a1
581  %2 = icmp eq <16 x i16> %a2, %a3
582  %3 = sext <16 x i1> %1 to <16 x i16>
583  %4 = sext <16 x i1> %2 to <16 x i16>
584  %5 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %3, <16 x i16> %4)
585  ret <32 x i8> %5
586}
587
588define <32 x i16> @cmp_packssdw_512(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, <16 x i32> %a3) {
589; CHECK-LABEL: @cmp_packssdw_512(
590; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <16 x i32> [[A0:%.*]], [[A1:%.*]]
591; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <16 x i32> [[A2:%.*]], [[A3:%.*]]
592; CHECK-NEXT:    [[TMP3:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i32>
593; CHECK-NEXT:    [[TMP4:%.*]] = sext <16 x i1> [[TMP2]] to <16 x i32>
594; CHECK-NEXT:    [[TMP5:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP3]], <16 x i32> [[TMP4]])
595; CHECK-NEXT:    ret <32 x i16> [[TMP5]]
596;
597  %1 = icmp eq <16 x i32> %a0, %a1
598  %2 = icmp eq <16 x i32> %a2, %a3
599  %3 = sext <16 x i1> %1 to <16 x i32>
600  %4 = sext <16 x i1> %2 to <16 x i32>
601  %5 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %3, <16 x i32> %4)
602  ret <32 x i16> %5
603}
604
605define <64 x i8> @cmp_packsswb_512(<32 x i16> %a0, <32 x i16> %a1, <32 x i16> %a2, <32 x i16> %a3) {
606; CHECK-LABEL: @cmp_packsswb_512(
607; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <32 x i16> [[A0:%.*]], [[A1:%.*]]
608; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <32 x i16> [[A2:%.*]], [[A3:%.*]]
609; CHECK-NEXT:    [[TMP3:%.*]] = sext <32 x i1> [[TMP1]] to <32 x i16>
610; CHECK-NEXT:    [[TMP4:%.*]] = sext <32 x i1> [[TMP2]] to <32 x i16>
611; CHECK-NEXT:    [[TMP5:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP3]], <32 x i16> [[TMP4]])
612; CHECK-NEXT:    ret <64 x i8> [[TMP5]]
613;
614  %1 = icmp eq <32 x i16> %a0, %a1
615  %2 = icmp eq <32 x i16> %a2, %a3
616  %3 = sext <32 x i1> %1 to <32 x i16>
617  %4 = sext <32 x i1> %2 to <32 x i16>
618  %5 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %3, <32 x i16> %4)
619  ret <64 x i8> %5
620}
621
622declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
623declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
624declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
625declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
626
627declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone
628declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone
629declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone
630declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone
631
632declare <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32>, <16 x i32>) nounwind readnone
633declare <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32>, <16 x i32>) nounwind readnone
634declare <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16>, <32 x i16>) nounwind readnone
635declare <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16>, <32 x i16>) nounwind readnone
636