xref: /llvm-project/llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics-upgrade.ll (revision 2f448bf509432c1a19ec46ab8cbc7353c03c6280)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
4
5define <8 x i16> @test_mask_expand_load_w_128(ptr %addr, <8 x i16> %data, i8 %mask) {
6; X86-LABEL: test_mask_expand_load_w_128:
7; X86:       # %bb.0:
8; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
10; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
11; X86-NEXT:    vpexpandw (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x00]
12; X86-NEXT:    retl # encoding: [0xc3]
13;
14; X64-LABEL: test_mask_expand_load_w_128:
15; X64:       # %bb.0:
16; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
17; X64-NEXT:    vpexpandw (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x07]
18; X64-NEXT:    retq # encoding: [0xc3]
19  %res = call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(ptr %addr, <8 x i16> %data, i8 %mask)
20  ret <8 x i16> %res
21}
22
23define <8 x i16> @test_maskz_expand_load_w_128(ptr %addr, i8 %mask) {
24; X86-LABEL: test_maskz_expand_load_w_128:
25; X86:       # %bb.0:
26; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
27; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
28; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
29; X86-NEXT:    vpexpandw (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0x00]
30; X86-NEXT:    retl # encoding: [0xc3]
31;
32; X64-LABEL: test_maskz_expand_load_w_128:
33; X64:       # %bb.0:
34; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
35; X64-NEXT:    vpexpandw (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0x07]
36; X64-NEXT:    retq # encoding: [0xc3]
37  %res = call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(ptr %addr, <8 x i16> zeroinitializer, i8 %mask)
38  ret <8 x i16> %res
39}
40
41declare <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(ptr %addr, <8 x i16> %data, i8 %mask)
42
43define <8 x i16> @test_expand_load_w_128(ptr %addr, <8 x i16> %data) {
44; X86-LABEL: test_expand_load_w_128:
45; X86:       # %bb.0:
46; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
47; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
48; X86-NEXT:    vpexpandw (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x00]
49; X86-NEXT:    retl # encoding: [0xc3]
50;
51; X64-LABEL: test_expand_load_w_128:
52; X64:       # %bb.0:
53; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
54; X64-NEXT:    vpexpandw (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x07]
55; X64-NEXT:    retq # encoding: [0xc3]
56  %res = call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(ptr %addr, <8 x i16> %data, i8 -1)
57  ret <8 x i16> %res
58}
59
60define <8 x i16> @test_expand_w_128(<8 x i16> %data) {
61; CHECK-LABEL: test_expand_w_128:
62; CHECK:       # %bb.0:
63; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
64  %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> undef, i8 -1)
65  ret <8 x i16> %res
66}
67
68define <8 x i16> @test_mask_expand_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) {
69; X86-LABEL: test_mask_expand_w_128:
70; X86:       # %bb.0:
71; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
72; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
73; X86-NEXT:    vpexpandw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0xc8]
74; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
75; X86-NEXT:    retl # encoding: [0xc3]
76;
77; X64-LABEL: test_mask_expand_w_128:
78; X64:       # %bb.0:
79; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
80; X64-NEXT:    vpexpandw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0xc8]
81; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
82; X64-NEXT:    retq # encoding: [0xc3]
83  %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask)
84  ret <8 x i16> %res
85}
86
87define <8 x i16> @test_maskz_expand_w_128(<8 x i16> %data, i8 %mask) {
88; X86-LABEL: test_maskz_expand_w_128:
89; X86:       # %bb.0:
90; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
91; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
92; X86-NEXT:    vpexpandw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0xc0]
93; X86-NEXT:    retl # encoding: [0xc3]
94;
95; X64-LABEL: test_maskz_expand_w_128:
96; X64:       # %bb.0:
97; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
98; X64-NEXT:    vpexpandw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0xc0]
99; X64-NEXT:    retq # encoding: [0xc3]
100  %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> zeroinitializer, i8 %mask)
101  ret <8 x i16> %res
102}
103
104declare <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> %src0, i8 %mask)
105
106define <16 x i8> @test_mask_expand_load_b_128(ptr %addr, <16 x i8> %data, i16 %mask) {
107; X86-LABEL: test_mask_expand_load_b_128:
108; X86:       # %bb.0:
109; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
110; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
111; X86-NEXT:    vpexpandb (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x00]
112; X86-NEXT:    retl # encoding: [0xc3]
113;
114; X64-LABEL: test_mask_expand_load_b_128:
115; X64:       # %bb.0:
116; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
117; X64-NEXT:    vpexpandb (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x07]
118; X64-NEXT:    retq # encoding: [0xc3]
119  %res = call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(ptr %addr, <16 x i8> %data, i16 %mask)
120  ret <16 x i8> %res
121}
122
123define <16 x i8> @test_maskz_expand_load_b_128(ptr %addr, i16 %mask) {
124; X86-LABEL: test_maskz_expand_load_b_128:
125; X86:       # %bb.0:
126; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
127; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
128; X86-NEXT:    vpexpandb (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0x00]
129; X86-NEXT:    retl # encoding: [0xc3]
130;
131; X64-LABEL: test_maskz_expand_load_b_128:
132; X64:       # %bb.0:
133; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
134; X64-NEXT:    vpexpandb (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0x07]
135; X64-NEXT:    retq # encoding: [0xc3]
136  %res = call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(ptr %addr, <16 x i8> zeroinitializer, i16 %mask)
137  ret <16 x i8> %res
138}
139
140declare <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(ptr %addr, <16 x i8> %data, i16 %mask)
141
142define <16 x i8> @test_expand_load_b_128(ptr %addr, <16 x i8> %data) {
143; X86-LABEL: test_expand_load_b_128:
144; X86:       # %bb.0:
145; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
146; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
147; X86-NEXT:    vpexpandb (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x00]
148; X86-NEXT:    retl # encoding: [0xc3]
149;
150; X64-LABEL: test_expand_load_b_128:
151; X64:       # %bb.0:
152; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
153; X64-NEXT:    vpexpandb (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x07]
154; X64-NEXT:    retq # encoding: [0xc3]
155  %res = call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(ptr %addr, <16 x i8> %data, i16 -1)
156  ret <16 x i8> %res
157}
158
159define <16 x i8> @test_expand_b_128(<16 x i8> %data) {
160; CHECK-LABEL: test_expand_b_128:
161; CHECK:       # %bb.0:
162; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
163  %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> undef, i16 -1)
164  ret <16 x i8> %res
165}
166
167define <16 x i8> @test_mask_expand_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) {
168; X86-LABEL: test_mask_expand_b_128:
169; X86:       # %bb.0:
170; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
171; X86-NEXT:    vpexpandb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0xc8]
172; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
173; X86-NEXT:    retl # encoding: [0xc3]
174;
175; X64-LABEL: test_mask_expand_b_128:
176; X64:       # %bb.0:
177; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
178; X64-NEXT:    vpexpandb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0xc8]
179; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
180; X64-NEXT:    retq # encoding: [0xc3]
181  %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask)
182  ret <16 x i8> %res
183}
184
185define <16 x i8> @test_maskz_expand_b_128(<16 x i8> %data, i16 %mask) {
186; X86-LABEL: test_maskz_expand_b_128:
187; X86:       # %bb.0:
188; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
189; X86-NEXT:    vpexpandb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0xc0]
190; X86-NEXT:    retl # encoding: [0xc3]
191;
192; X64-LABEL: test_maskz_expand_b_128:
193; X64:       # %bb.0:
194; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
195; X64-NEXT:    vpexpandb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0xc0]
196; X64-NEXT:    retq # encoding: [0xc3]
197  %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> zeroinitializer, i16 %mask)
198  ret <16 x i8> %res
199}
200
201declare <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> %src0, i16 %mask)
202
203define void @test_mask_compress_store_w_128(ptr %addr, <8 x i16> %data, i8 %mask) {
204; X86-LABEL: test_mask_compress_store_w_128:
205; X86:       # %bb.0:
206; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
207; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
208; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
209; X86-NEXT:    vpcompressw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x00]
210; X86-NEXT:    retl # encoding: [0xc3]
211;
212; X64-LABEL: test_mask_compress_store_w_128:
213; X64:       # %bb.0:
214; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
215; X64-NEXT:    vpcompressw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x07]
216; X64-NEXT:    retq # encoding: [0xc3]
217  call void @llvm.x86.avx512.mask.compress.store.w.128(ptr %addr, <8 x i16> %data, i8 %mask)
218  ret void
219}
220
221declare void @llvm.x86.avx512.mask.compress.store.w.128(ptr %addr, <8 x i16> %data, i8 %mask)
222
223define void @test_compress_store_w_128(ptr %addr, <8 x i16> %data) {
224; X86-LABEL: test_compress_store_w_128:
225; X86:       # %bb.0:
226; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
227; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
228; X86-NEXT:    vpcompressw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x00]
229; X86-NEXT:    retl # encoding: [0xc3]
230;
231; X64-LABEL: test_compress_store_w_128:
232; X64:       # %bb.0:
233; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
234; X64-NEXT:    vpcompressw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x07]
235; X64-NEXT:    retq # encoding: [0xc3]
236  call void @llvm.x86.avx512.mask.compress.store.w.128(ptr %addr, <8 x i16> %data, i8 -1)
237  ret void
238}
239
240define <8 x i16> @test_mask_compress_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) {
241; X86-LABEL: test_mask_compress_w_128:
242; X86:       # %bb.0:
243; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
244; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
245; X86-NEXT:    vpcompressw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0xc1]
246; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
247; X86-NEXT:    retl # encoding: [0xc3]
248;
249; X64-LABEL: test_mask_compress_w_128:
250; X64:       # %bb.0:
251; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
252; X64-NEXT:    vpcompressw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0xc1]
253; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
254; X64-NEXT:    retq # encoding: [0xc3]
255  %res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask)
256  ret <8 x i16> %res
257}
258
259define <8 x i16> @test_maskz_compress_w_128(<8 x i16> %data, i8 %mask) {
260; X86-LABEL: test_maskz_compress_w_128:
261; X86:       # %bb.0:
262; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
263; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
264; X86-NEXT:    vpcompressw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x63,0xc0]
265; X86-NEXT:    retl # encoding: [0xc3]
266;
267; X64-LABEL: test_maskz_compress_w_128:
268; X64:       # %bb.0:
269; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
270; X64-NEXT:    vpcompressw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x63,0xc0]
271; X64-NEXT:    retq # encoding: [0xc3]
272  %res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> zeroinitializer, i8 %mask)
273  ret <8 x i16> %res
274}
275
276define <8 x i16> @test_compress_w_128(<8 x i16> %data) {
277; CHECK-LABEL: test_compress_w_128:
278; CHECK:       # %bb.0:
279; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
280  %res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> undef, i8 -1)
281  ret <8 x i16> %res
282}
283
284declare <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> %src0, i8 %mask)
285
286define void @test_mask_compress_store_b_128(ptr %addr, <16 x i8> %data, i16 %mask) {
287; X86-LABEL: test_mask_compress_store_b_128:
288; X86:       # %bb.0:
289; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
290; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
291; X86-NEXT:    vpcompressb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x00]
292; X86-NEXT:    retl # encoding: [0xc3]
293;
294; X64-LABEL: test_mask_compress_store_b_128:
295; X64:       # %bb.0:
296; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
297; X64-NEXT:    vpcompressb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x07]
298; X64-NEXT:    retq # encoding: [0xc3]
299  call void @llvm.x86.avx512.mask.compress.store.b.128(ptr %addr, <16 x i8> %data, i16 %mask)
300  ret void
301}
302
303declare void @llvm.x86.avx512.mask.compress.store.b.128(ptr %addr, <16 x i8> %data, i16 %mask)
304
305define void @test_compress_store_b_128(ptr %addr, <16 x i8> %data) {
306; X86-LABEL: test_compress_store_b_128:
307; X86:       # %bb.0:
308; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
309; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
310; X86-NEXT:    vpcompressb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x00]
311; X86-NEXT:    retl # encoding: [0xc3]
312;
313; X64-LABEL: test_compress_store_b_128:
314; X64:       # %bb.0:
315; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
316; X64-NEXT:    vpcompressb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x07]
317; X64-NEXT:    retq # encoding: [0xc3]
318  call void @llvm.x86.avx512.mask.compress.store.b.128(ptr %addr, <16 x i8> %data, i16 -1)
319  ret void
320}
321
322define <16 x i8> @test_mask_compress_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) {
323; X86-LABEL: test_mask_compress_b_128:
324; X86:       # %bb.0:
325; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
326; X86-NEXT:    vpcompressb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0xc1]
327; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
328; X86-NEXT:    retl # encoding: [0xc3]
329;
330; X64-LABEL: test_mask_compress_b_128:
331; X64:       # %bb.0:
332; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
333; X64-NEXT:    vpcompressb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0xc1]
334; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
335; X64-NEXT:    retq # encoding: [0xc3]
336  %res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask)
337  ret <16 x i8> %res
338}
339
340define <16 x i8> @test_maskz_compress_b_128(<16 x i8> %data, i16 %mask) {
341; X86-LABEL: test_maskz_compress_b_128:
342; X86:       # %bb.0:
343; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
344; X86-NEXT:    vpcompressb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x63,0xc0]
345; X86-NEXT:    retl # encoding: [0xc3]
346;
347; X64-LABEL: test_maskz_compress_b_128:
348; X64:       # %bb.0:
349; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
350; X64-NEXT:    vpcompressb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x63,0xc0]
351; X64-NEXT:    retq # encoding: [0xc3]
352  %res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> zeroinitializer, i16 %mask)
353  ret <16 x i8> %res
354}
355
356define <16 x i8> @test_compress_b_128(<16 x i8> %data) {
357; CHECK-LABEL: test_compress_b_128:
358; CHECK:       # %bb.0:
359; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
360  %res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> undef, i16 -1)
361  ret <16 x i8> %res
362}
363
364declare <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> %src0, i16 %mask)
365
366define <16 x i16> @test_mask_expand_load_w_256(ptr %addr, <16 x i16> %data, i16 %mask) {
367; X86-LABEL: test_mask_expand_load_w_256:
368; X86:       # %bb.0:
369; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
370; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
371; X86-NEXT:    vpexpandw (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x00]
372; X86-NEXT:    retl # encoding: [0xc3]
373;
374; X64-LABEL: test_mask_expand_load_w_256:
375; X64:       # %bb.0:
376; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
377; X64-NEXT:    vpexpandw (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x07]
378; X64-NEXT:    retq # encoding: [0xc3]
379  %res = call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(ptr %addr, <16 x i16> %data, i16 %mask)
380  ret <16 x i16> %res
381}
382
383define <16 x i16> @test_maskz_expand_load_w_256(ptr %addr, i16 %mask) {
384; X86-LABEL: test_maskz_expand_load_w_256:
385; X86:       # %bb.0:
386; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
387; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
388; X86-NEXT:    vpexpandw (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0x00]
389; X86-NEXT:    retl # encoding: [0xc3]
390;
391; X64-LABEL: test_maskz_expand_load_w_256:
392; X64:       # %bb.0:
393; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
394; X64-NEXT:    vpexpandw (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0x07]
395; X64-NEXT:    retq # encoding: [0xc3]
396  %res = call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(ptr %addr, <16 x i16> zeroinitializer, i16 %mask)
397  ret <16 x i16> %res
398}
399
400declare <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(ptr %addr, <16 x i16> %data, i16 %mask)
401
402define <16 x i16> @test_expand_load_w_256(ptr %addr, <16 x i16> %data) {
403; X86-LABEL: test_expand_load_w_256:
404; X86:       # %bb.0:
405; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
406; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
407; X86-NEXT:    vpexpandw (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x00]
408; X86-NEXT:    retl # encoding: [0xc3]
409;
410; X64-LABEL: test_expand_load_w_256:
411; X64:       # %bb.0:
412; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
413; X64-NEXT:    vpexpandw (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x07]
414; X64-NEXT:    retq # encoding: [0xc3]
415  %res = call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(ptr %addr, <16 x i16> %data, i16 -1)
416  ret <16 x i16> %res
417}
418
419define <16 x i16> @test_expand_w_256(<16 x i16> %data) {
420; CHECK-LABEL: test_expand_w_256:
421; CHECK:       # %bb.0:
422; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
423  %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> undef, i16 -1)
424  ret <16 x i16> %res
425}
426
427define <16 x i16> @test_mask_expand_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) {
428; X86-LABEL: test_mask_expand_w_256:
429; X86:       # %bb.0:
430; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
431; X86-NEXT:    vpexpandw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0xc8]
432; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
433; X86-NEXT:    retl # encoding: [0xc3]
434;
435; X64-LABEL: test_mask_expand_w_256:
436; X64:       # %bb.0:
437; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
438; X64-NEXT:    vpexpandw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0xc8]
439; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
440; X64-NEXT:    retq # encoding: [0xc3]
441  %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask)
442  ret <16 x i16> %res
443}
444
445define <16 x i16> @test_maskz_expand_w_256(<16 x i16> %data, i16 %mask) {
446; X86-LABEL: test_maskz_expand_w_256:
447; X86:       # %bb.0:
448; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
449; X86-NEXT:    vpexpandw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0xc0]
450; X86-NEXT:    retl # encoding: [0xc3]
451;
452; X64-LABEL: test_maskz_expand_w_256:
453; X64:       # %bb.0:
454; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
455; X64-NEXT:    vpexpandw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0xc0]
456; X64-NEXT:    retq # encoding: [0xc3]
457  %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> zeroinitializer, i16 %mask)
458  ret <16 x i16> %res
459}
460
461declare <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> %src0, i16 %mask)
462
463define <32 x i8> @test_mask_expand_load_b_256(ptr %addr, <32 x i8> %data, i32 %mask) {
464; X86-LABEL: test_mask_expand_load_b_256:
465; X86:       # %bb.0:
466; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
467; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
468; X86-NEXT:    vpexpandb (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x00]
469; X86-NEXT:    retl # encoding: [0xc3]
470;
471; X64-LABEL: test_mask_expand_load_b_256:
472; X64:       # %bb.0:
473; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
474; X64-NEXT:    vpexpandb (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x07]
475; X64-NEXT:    retq # encoding: [0xc3]
476  %res = call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(ptr %addr, <32 x i8> %data, i32 %mask)
477  ret <32 x i8> %res
478}
479
480define <32 x i8> @test_maskz_expand_load_b_256(ptr %addr, i32 %mask) {
481; X86-LABEL: test_maskz_expand_load_b_256:
482; X86:       # %bb.0:
483; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
484; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
485; X86-NEXT:    vpexpandb (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0x00]
486; X86-NEXT:    retl # encoding: [0xc3]
487;
488; X64-LABEL: test_maskz_expand_load_b_256:
489; X64:       # %bb.0:
490; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
491; X64-NEXT:    vpexpandb (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0x07]
492; X64-NEXT:    retq # encoding: [0xc3]
493  %res = call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(ptr %addr, <32 x i8> zeroinitializer, i32 %mask)
494  ret <32 x i8> %res
495}
496
497declare <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(ptr %addr, <32 x i8> %data, i32 %mask)
498
499define <32 x i8> @test_expand_load_b_256(ptr %addr, <32 x i8> %data) {
500; X86-LABEL: test_expand_load_b_256:
501; X86:       # %bb.0:
502; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
503; X86-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
504; X86-NEXT:    vpexpandb (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x00]
505; X86-NEXT:    retl # encoding: [0xc3]
506;
507; X64-LABEL: test_expand_load_b_256:
508; X64:       # %bb.0:
509; X64-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
510; X64-NEXT:    vpexpandb (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x07]
511; X64-NEXT:    retq # encoding: [0xc3]
512  %res = call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(ptr %addr, <32 x i8> %data, i32 -1)
513  ret <32 x i8> %res
514}
515
516define <32 x i8> @test_expand_b_256(<32 x i8> %data) {
517; CHECK-LABEL: test_expand_b_256:
518; CHECK:       # %bb.0:
519; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
520  %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> undef, i32 -1)
521  ret <32 x i8> %res
522}
523
524define <32 x i8> @test_mask_expand_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) {
525; X86-LABEL: test_mask_expand_b_256:
526; X86:       # %bb.0:
527; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
528; X86-NEXT:    vpexpandb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0xc8]
529; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
530; X86-NEXT:    retl # encoding: [0xc3]
531;
532; X64-LABEL: test_mask_expand_b_256:
533; X64:       # %bb.0:
534; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
535; X64-NEXT:    vpexpandb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0xc8]
536; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
537; X64-NEXT:    retq # encoding: [0xc3]
538  %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask)
539  ret <32 x i8> %res
540}
541
542define <32 x i8> @test_maskz_expand_b_256(<32 x i8> %data, i32 %mask) {
543; X86-LABEL: test_maskz_expand_b_256:
544; X86:       # %bb.0:
545; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
546; X86-NEXT:    vpexpandb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0xc0]
547; X86-NEXT:    retl # encoding: [0xc3]
548;
549; X64-LABEL: test_maskz_expand_b_256:
550; X64:       # %bb.0:
551; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
552; X64-NEXT:    vpexpandb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0xc0]
553; X64-NEXT:    retq # encoding: [0xc3]
554  %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> zeroinitializer, i32 %mask)
555  ret <32 x i8> %res
556}
557
558declare <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> %src0, i32 %mask)
559
560define void @test_mask_compress_store_w_256(ptr %addr, <16 x i16> %data, i16 %mask) {
561; X86-LABEL: test_mask_compress_store_w_256:
562; X86:       # %bb.0:
563; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
564; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
565; X86-NEXT:    vpcompressw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x00]
566; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
567; X86-NEXT:    retl # encoding: [0xc3]
568;
569; X64-LABEL: test_mask_compress_store_w_256:
570; X64:       # %bb.0:
571; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
572; X64-NEXT:    vpcompressw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x07]
573; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
574; X64-NEXT:    retq # encoding: [0xc3]
575  call void @llvm.x86.avx512.mask.compress.store.w.256(ptr %addr, <16 x i16> %data, i16 %mask)
576  ret void
577}
578
579declare void @llvm.x86.avx512.mask.compress.store.w.256(ptr %addr, <16 x i16> %data, i16 %mask)
580
581define void @test_compress_store_w_256(ptr %addr, <16 x i16> %data) {
582; X86-LABEL: test_compress_store_w_256:
583; X86:       # %bb.0:
584; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
585; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
586; X86-NEXT:    vpcompressw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x00]
587; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
588; X86-NEXT:    retl # encoding: [0xc3]
589;
590; X64-LABEL: test_compress_store_w_256:
591; X64:       # %bb.0:
592; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
593; X64-NEXT:    vpcompressw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x07]
594; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
595; X64-NEXT:    retq # encoding: [0xc3]
596  call void @llvm.x86.avx512.mask.compress.store.w.256(ptr %addr, <16 x i16> %data, i16 -1)
597  ret void
598}
599
600define <16 x i16> @test_mask_compress_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) {
601; X86-LABEL: test_mask_compress_w_256:
602; X86:       # %bb.0:
603; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
604; X86-NEXT:    vpcompressw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0xc1]
605; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
606; X86-NEXT:    retl # encoding: [0xc3]
607;
608; X64-LABEL: test_mask_compress_w_256:
609; X64:       # %bb.0:
610; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
611; X64-NEXT:    vpcompressw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0xc1]
612; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
613; X64-NEXT:    retq # encoding: [0xc3]
614  %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask)
615  ret <16 x i16> %res
616}
617
618define <16 x i16> @test_maskz_compress_w_256(<16 x i16> %data, i16 %mask) {
619; X86-LABEL: test_maskz_compress_w_256:
620; X86:       # %bb.0:
621; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
622; X86-NEXT:    vpcompressw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x63,0xc0]
623; X86-NEXT:    retl # encoding: [0xc3]
624;
625; X64-LABEL: test_maskz_compress_w_256:
626; X64:       # %bb.0:
627; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
628; X64-NEXT:    vpcompressw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x63,0xc0]
629; X64-NEXT:    retq # encoding: [0xc3]
630  %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> zeroinitializer, i16 %mask)
631  ret <16 x i16> %res
632}
633
634define <16 x i16> @test_compress_w_256(<16 x i16> %data) {
635; CHECK-LABEL: test_compress_w_256:
636; CHECK:       # %bb.0:
637; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
638  %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> undef, i16 -1)
639  ret <16 x i16> %res
640}
641
642declare <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> %src0, i16 %mask)
643
644define void @test_mask_compress_store_b_256(ptr %addr, <32 x i8> %data, i32 %mask) {
645; X86-LABEL: test_mask_compress_store_b_256:
646; X86:       # %bb.0:
647; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
648; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
649; X86-NEXT:    vpcompressb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x00]
650; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
651; X86-NEXT:    retl # encoding: [0xc3]
652;
653; X64-LABEL: test_mask_compress_store_b_256:
654; X64:       # %bb.0:
655; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
656; X64-NEXT:    vpcompressb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x07]
657; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
658; X64-NEXT:    retq # encoding: [0xc3]
659  call void @llvm.x86.avx512.mask.compress.store.b.256(ptr %addr, <32 x i8> %data, i32 %mask)
660  ret void
661}
662
663declare void @llvm.x86.avx512.mask.compress.store.b.256(ptr %addr, <32 x i8> %data, i32 %mask)
664
665define void @test_compress_store_b_256(ptr %addr, <32 x i8> %data) {
666; X86-LABEL: test_compress_store_b_256:
667; X86:       # %bb.0:
668; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
669; X86-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
670; X86-NEXT:    vpcompressb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x00]
671; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
672; X86-NEXT:    retl # encoding: [0xc3]
673;
674; X64-LABEL: test_compress_store_b_256:
675; X64:       # %bb.0:
676; X64-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
677; X64-NEXT:    vpcompressb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x07]
678; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
679; X64-NEXT:    retq # encoding: [0xc3]
680  call void @llvm.x86.avx512.mask.compress.store.b.256(ptr %addr, <32 x i8> %data, i32 -1)
681  ret void
682}
683
684define <32 x i8> @test_mask_compress_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) {
685; X86-LABEL: test_mask_compress_b_256:
686; X86:       # %bb.0:
687; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
688; X86-NEXT:    vpcompressb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0xc1]
689; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
690; X86-NEXT:    retl # encoding: [0xc3]
691;
692; X64-LABEL: test_mask_compress_b_256:
693; X64:       # %bb.0:
694; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
695; X64-NEXT:    vpcompressb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0xc1]
696; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
697; X64-NEXT:    retq # encoding: [0xc3]
698  %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask)
699  ret <32 x i8> %res
700}
701
702define <32 x i8> @test_maskz_compress_b_256(<32 x i8> %data, i32 %mask) {
703; X86-LABEL: test_maskz_compress_b_256:
704; X86:       # %bb.0:
705; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
706; X86-NEXT:    vpcompressb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x63,0xc0]
707; X86-NEXT:    retl # encoding: [0xc3]
708;
709; X64-LABEL: test_maskz_compress_b_256:
710; X64:       # %bb.0:
711; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
712; X64-NEXT:    vpcompressb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x63,0xc0]
713; X64-NEXT:    retq # encoding: [0xc3]
714  %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> zeroinitializer, i32 %mask)
715  ret <32 x i8> %res
716}
717
718define <32 x i8> @test_compress_b_256(<32 x i8> %data) {
719; CHECK-LABEL: test_compress_b_256:
720; CHECK:       # %bb.0:
721; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
722  %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> undef, i32 -1)
723  ret <32 x i8> %res
724}
725
726declare <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> %src0, i32 %mask)
727
728define { <4 x i32>, <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpshld_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
729; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_128:
730; X86:       # %bb.0:
731; X86-NEXT:    vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
732; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
733; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
734; X86-NEXT:    vpshldd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xe1,0x16]
735; X86-NEXT:    vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17]
736; X86-NEXT:    vpshldd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xd1,0x18]
737; X86-NEXT:    vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
738; X86-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
739; X86-NEXT:    retl # encoding: [0xc3]
740;
741; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_128:
742; X64:       # %bb.0:
743; X64-NEXT:    vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
744; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
745; X64-NEXT:    vpshldd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xe1,0x16]
746; X64-NEXT:    vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17]
747; X64-NEXT:    vpshldd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xd1,0x18]
748; X64-NEXT:    vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
749; X64-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
750; X64-NEXT:    retq # encoding: [0xc3]
751  %res0 = call <4 x i32> @llvm.x86.avx512.mask.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 %x4)
752  %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 23, <4 x i32> %x3, i8 -1)
753  %res2 = call <4 x i32> @llvm.x86.avx512.mask.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 24, <4 x i32> zeroinitializer,i8 %x4)
754  %res3 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } poison, <4 x i32> %res0, 0
755  %res4 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> }  %res3, <4 x i32> %res1, 1
756  %res5 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> }  %res4, <4 x i32> %res2, 2
757  ret { <4 x i32>, <4 x i32>, <4 x i32> } %res5
758}
759declare <4 x i32> @llvm.x86.avx512.mask.vpshld.d.128(<4 x i32>, <4 x i32>, i32, <4 x i32>, i8)
760
761define { <8 x i32>, <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshld_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
762; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_256:
763; X86:       # %bb.0:
764; X86-NEXT:    vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2]
765; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
766; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
767; X86-NEXT:    vpshldd $22, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xe1,0x16]
768; X86-NEXT:    vpshldd $23, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xd9,0x17]
769; X86-NEXT:    vpshldd $24, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x71,0xd1,0x18]
770; X86-NEXT:    vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4]
771; X86-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
772; X86-NEXT:    retl # encoding: [0xc3]
773;
774; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_256:
775; X64:       # %bb.0:
776; X64-NEXT:    vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2]
777; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
778; X64-NEXT:    vpshldd $22, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xe1,0x16]
779; X64-NEXT:    vpshldd $23, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xd9,0x17]
780; X64-NEXT:    vpshldd $24, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x71,0xd1,0x18]
781; X64-NEXT:    vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4]
782; X64-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
783; X64-NEXT:    retq # encoding: [0xc3]
784  %res0 = call <8 x i32> @llvm.x86.avx512.mask.vpshld.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4)
785  %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpshld.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 23, <8 x i32> %x3, i8 -1)
786  %res2 = call <8 x i32> @llvm.x86.avx512.mask.vpshld.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 24, <8 x i32> zeroinitializer, i8 %x4)
787  %res3 = insertvalue { <8 x i32>, <8 x i32>, <8 x i32> } poison, <8 x i32> %res0, 0
788  %res4 = insertvalue { <8 x i32>, <8 x i32>, <8 x i32> }  %res3, <8 x i32> %res1, 1
789  %res5 = insertvalue { <8 x i32>, <8 x i32>, <8 x i32> }  %res4, <8 x i32> %res2, 2
790  ret { <8 x i32>, <8 x i32>, <8 x i32> } %res5
791}
792declare <8 x i32> @llvm.x86.avx512.mask.vpshld.d.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8)
793
794define { <2 x i64>, <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshld_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
795; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_128:
796; X86:       # %bb.0:
797; X86-NEXT:    vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
798; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
799; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
800; X86-NEXT:    vpshldq $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xe1,0x16]
801; X86-NEXT:    vpshldq $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xd9,0x17]
802; X86-NEXT:    vpshldq $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x71,0xd1,0x18]
803; X86-NEXT:    vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
804; X86-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
805; X86-NEXT:    retl # encoding: [0xc3]
806;
807; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_128:
808; X64:       # %bb.0:
809; X64-NEXT:    vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
810; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
811; X64-NEXT:    vpshldq $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xe1,0x16]
812; X64-NEXT:    vpshldq $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xd9,0x17]
813; X64-NEXT:    vpshldq $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x71,0xd1,0x18]
814; X64-NEXT:    vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
815; X64-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
816; X64-NEXT:    retq # encoding: [0xc3]
817  %res0 = call <2 x i64> @llvm.x86.avx512.mask.vpshld.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 %x4)
818  %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpshld.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 23, <2 x i64> %x3, i8 -1)
819  %res2 = call <2 x i64> @llvm.x86.avx512.mask.vpshld.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 24, <2 x i64> zeroinitializer, i8 %x4)
820  %res3 = insertvalue { <2 x i64>, <2 x i64>, <2 x i64> } poison, <2 x i64> %res0, 0
821  %res4 = insertvalue { <2 x i64>, <2 x i64>, <2 x i64> }  %res3, <2 x i64> %res1, 1
822  %res5 = insertvalue { <2 x i64>, <2 x i64>, <2 x i64> }  %res4, <2 x i64> %res2, 2
823  ret { <2 x i64>, <2 x i64>, <2 x i64> } %res5
824}
825declare <2 x i64> @llvm.x86.avx512.mask.vpshld.q.128(<2 x i64>, <2 x i64>, i32, <2 x i64>, i8)
826
827define { <4 x i64>, <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshld_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
828; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_256:
829; X86:       # %bb.0:
830; X86-NEXT:    vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2]
831; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
832; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
833; X86-NEXT:    vpshldq $22, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xe1,0x16]
834; X86-NEXT:    vpshldq $23, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xd9,0x17]
835; X86-NEXT:    vpshldq $24, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x71,0xd1,0x18]
836; X86-NEXT:    vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4]
837; X86-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
838; X86-NEXT:    retl # encoding: [0xc3]
839;
840; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_256:
841; X64:       # %bb.0:
842; X64-NEXT:    vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2]
843; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
844; X64-NEXT:    vpshldq $22, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xe1,0x16]
845; X64-NEXT:    vpshldq $23, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xd9,0x17]
846; X64-NEXT:    vpshldq $24, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x71,0xd1,0x18]
847; X64-NEXT:    vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4]
848; X64-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
849; X64-NEXT:    retq # encoding: [0xc3]
850  %res0 = call <4 x i64> @llvm.x86.avx512.mask.vpshld.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4)
851  %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpshld.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 23, <4 x i64> %x3, i8 -1)
852  %res2 = call <4 x i64> @llvm.x86.avx512.mask.vpshld.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 24, <4 x i64> zeroinitializer, i8 %x4)
853  %res3 = insertvalue { <4 x i64>, <4 x i64>, <4 x i64> } poison, <4 x i64> %res0, 0
854  %res4 = insertvalue { <4 x i64>, <4 x i64>, <4 x i64> }  %res3, <4 x i64> %res1, 1
855  %res5 = insertvalue { <4 x i64>, <4 x i64>, <4 x i64> }  %res4, <4 x i64> %res2, 2
856  ret { <4 x i64>, <4 x i64>, <4 x i64> } %res5
857}
858declare <4 x i64> @llvm.x86.avx512.mask.vpshld.q.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8)
859
860define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshld_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) {
861; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_128:
862; X86:       # %bb.0:
863; X86-NEXT:    vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
864; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
865; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
866; X86-NEXT:    vpshldw $6, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xe1,0x06]
867; X86-NEXT:    vpshldw $7, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xd9,0x07]
868; X86-NEXT:    vpshldw $8, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x70,0xd1,0x08]
869; X86-NEXT:    vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
870; X86-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
871; X86-NEXT:    retl # encoding: [0xc3]
872;
873; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_128:
874; X64:       # %bb.0:
875; X64-NEXT:    vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
876; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
877; X64-NEXT:    vpshldw $6, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xe1,0x06]
878; X64-NEXT:    vpshldw $7, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xd9,0x07]
879; X64-NEXT:    vpshldw $8, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x70,0xd1,0x08]
880; X64-NEXT:    vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
881; X64-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
882; X64-NEXT:    retq # encoding: [0xc3]
883  %res0 = call <8 x i16> @llvm.x86.avx512.mask.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 6, <8 x i16> %x3, i8 %x4)
884  %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 7, <8 x i16> %x3, i8 -1)
885  %res2 = call <8 x i16> @llvm.x86.avx512.mask.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 8, <8 x i16> zeroinitializer, i8 %x4)
886  %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0
887  %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> }  %res3, <8 x i16> %res1, 1
888  %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> }  %res4, <8 x i16> %res2, 2
889  ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5
890}
891declare <8 x i16> @llvm.x86.avx512.mask.vpshld.w.128(<8 x i16>, <8 x i16>, i32, <8 x i16>, i8)
892
893define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshld_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) {
894; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_256:
895; X86:       # %bb.0:
896; X86-NEXT:    vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2]
897; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
898; X86-NEXT:    vpshldw $6, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xe1,0x06]
899; X86-NEXT:    vpshldw $7, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xd9,0x07]
900; X86-NEXT:    vpshldw $8, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x70,0xd1,0x08]
901; X86-NEXT:    vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4]
902; X86-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
903; X86-NEXT:    retl # encoding: [0xc3]
904;
905; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_256:
906; X64:       # %bb.0:
907; X64-NEXT:    vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2]
908; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
909; X64-NEXT:    vpshldw $6, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xe1,0x06]
910; X64-NEXT:    vpshldw $7, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xd9,0x07]
911; X64-NEXT:    vpshldw $8, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x70,0xd1,0x08]
912; X64-NEXT:    vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4]
913; X64-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
914; X64-NEXT:    retq # encoding: [0xc3]
915  %res0 = call <16 x i16> @llvm.x86.avx512.mask.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 6, <16 x i16> %x3, i16 %x4)
916  %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 7, <16 x i16> %x3, i16 -1)
917  %res2 = call <16 x i16> @llvm.x86.avx512.mask.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 8, <16 x i16> zeroinitializer, i16 %x4)
918  %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res0, 0
919  %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> }  %res3, <16 x i16> %res1, 1
920  %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> }  %res4, <16 x i16> %res2, 2
921  ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5
922}
923declare <16 x i16> @llvm.x86.avx512.mask.vpshld.w.256(<16 x i16>, <16 x i16>, i32, <16 x i16>, i16)
924
925define { <4 x i32>, <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpshrd_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
926; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_128:
927; X86:       # %bb.0:
928; X86-NEXT:    vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
929; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
930; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
931; X86-NEXT:    vpshrdd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xe1,0x16]
932; X86-NEXT:    vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17]
933; X86-NEXT:    vpshrdd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xd1,0x18]
934; X86-NEXT:    vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
935; X86-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
936; X86-NEXT:    retl # encoding: [0xc3]
937;
938; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_128:
939; X64:       # %bb.0:
940; X64-NEXT:    vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
941; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
942; X64-NEXT:    vpshrdd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xe1,0x16]
943; X64-NEXT:    vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17]
944; X64-NEXT:    vpshrdd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xd1,0x18]
945; X64-NEXT:    vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
946; X64-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
947; X64-NEXT:    retq # encoding: [0xc3]
948  %res0 = call <4 x i32> @llvm.x86.avx512.mask.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 %x4)
949  %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 23, <4 x i32> %x3, i8 -1)
950  %res2 = call <4 x i32> @llvm.x86.avx512.mask.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 24, <4 x i32> zeroinitializer,i8 %x4)
951  %res3 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } poison, <4 x i32> %res0, 0
952  %res4 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> }  %res3, <4 x i32> %res1, 1
953  %res5 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> }  %res4, <4 x i32> %res2, 2
954  ret { <4 x i32>, <4 x i32>, <4 x i32> } %res5
955}
956declare <4 x i32> @llvm.x86.avx512.mask.vpshrd.d.128(<4 x i32>, <4 x i32>, i32, <4 x i32>, i8)
957
958define { <8 x i32>, <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshrd_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
959; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_256:
960; X86:       # %bb.0:
961; X86-NEXT:    vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2]
962; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
963; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
964; X86-NEXT:    vpshrdd $22, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xe1,0x16]
965; X86-NEXT:    vpshrdd $23, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xd9,0x17]
966; X86-NEXT:    vpshrdd $24, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x73,0xd1,0x18]
967; X86-NEXT:    vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4]
968; X86-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
969; X86-NEXT:    retl # encoding: [0xc3]
970;
971; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_256:
972; X64:       # %bb.0:
973; X64-NEXT:    vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2]
974; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
975; X64-NEXT:    vpshrdd $22, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xe1,0x16]
976; X64-NEXT:    vpshrdd $23, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xd9,0x17]
977; X64-NEXT:    vpshrdd $24, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x73,0xd1,0x18]
978; X64-NEXT:    vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4]
979; X64-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
980; X64-NEXT:    retq # encoding: [0xc3]
981  %res0 = call <8 x i32> @llvm.x86.avx512.mask.vpshrd.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4)
982  %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpshrd.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 23, <8 x i32> %x3, i8 -1)
983  %res2 = call <8 x i32> @llvm.x86.avx512.mask.vpshrd.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 24, <8 x i32> zeroinitializer, i8 %x4)
984  %res3 = insertvalue { <8 x i32>, <8 x i32>, <8 x i32> } poison, <8 x i32> %res0, 0
985  %res4 = insertvalue { <8 x i32>, <8 x i32>, <8 x i32> }  %res3, <8 x i32> %res1, 1
986  %res5 = insertvalue { <8 x i32>, <8 x i32>, <8 x i32> }  %res4, <8 x i32> %res2, 2
987  ret { <8 x i32>, <8 x i32>, <8 x i32> } %res5
988}
989declare <8 x i32> @llvm.x86.avx512.mask.vpshrd.d.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8)
990
991define { <2 x i64>, <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshrd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
992; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_128:
993; X86:       # %bb.0:
994; X86-NEXT:    vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
995; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
996; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
997; X86-NEXT:    vpshrdq $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xe1,0x16]
998; X86-NEXT:    vpshrdq $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xd9,0x17]
999; X86-NEXT:    vpshrdq $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x73,0xd1,0x18]
1000; X86-NEXT:    vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
1001; X86-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1002; X86-NEXT:    retl # encoding: [0xc3]
1003;
1004; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_128:
1005; X64:       # %bb.0:
1006; X64-NEXT:    vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
1007; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1008; X64-NEXT:    vpshrdq $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xe1,0x16]
1009; X64-NEXT:    vpshrdq $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xd9,0x17]
1010; X64-NEXT:    vpshrdq $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x73,0xd1,0x18]
1011; X64-NEXT:    vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
1012; X64-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1013; X64-NEXT:    retq # encoding: [0xc3]
1014  %res0 = call <2 x i64> @llvm.x86.avx512.mask.vpshrd.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 %x4)
1015  %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpshrd.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 23, <2 x i64> %x3, i8 -1)
1016  %res2 = call <2 x i64> @llvm.x86.avx512.mask.vpshrd.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 24, <2 x i64> zeroinitializer, i8 %x4)
1017  %res3 = insertvalue { <2 x i64>, <2 x i64>, <2 x i64> } poison, <2 x i64> %res0, 0
1018  %res4 = insertvalue { <2 x i64>, <2 x i64>, <2 x i64> }  %res3, <2 x i64> %res1, 1
1019  %res5 = insertvalue { <2 x i64>, <2 x i64>, <2 x i64> }  %res4, <2 x i64> %res2, 2
1020  ret { <2 x i64>, <2 x i64>, <2 x i64> } %res5
1021}
1022declare <2 x i64> @llvm.x86.avx512.mask.vpshrd.q.128(<2 x i64>, <2 x i64>, i32, <2 x i64>, i8)
1023
1024define { <4 x i64>, <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshrd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
1025; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_256:
1026; X86:       # %bb.0:
1027; X86-NEXT:    vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2]
1028; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1029; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1030; X86-NEXT:    vpshrdq $22, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xe1,0x16]
1031; X86-NEXT:    vpshrdq $23, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xd9,0x17]
1032; X86-NEXT:    vpshrdq $24, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x73,0xd1,0x18]
1033; X86-NEXT:    vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4]
1034; X86-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1035; X86-NEXT:    retl # encoding: [0xc3]
1036;
1037; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_256:
1038; X64:       # %bb.0:
1039; X64-NEXT:    vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2]
1040; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1041; X64-NEXT:    vpshrdq $22, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xe1,0x16]
1042; X64-NEXT:    vpshrdq $23, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xd9,0x17]
1043; X64-NEXT:    vpshrdq $24, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x73,0xd1,0x18]
1044; X64-NEXT:    vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4]
1045; X64-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1046; X64-NEXT:    retq # encoding: [0xc3]
1047  %res0 = call <4 x i64> @llvm.x86.avx512.mask.vpshrd.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4)
1048  %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpshrd.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 23, <4 x i64> %x3, i8 -1)
1049  %res2 = call <4 x i64> @llvm.x86.avx512.mask.vpshrd.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 24, <4 x i64> zeroinitializer, i8 %x4)
1050  %res3 = insertvalue { <4 x i64>, <4 x i64>, <4 x i64> } poison, <4 x i64> %res0, 0
1051  %res4 = insertvalue { <4 x i64>, <4 x i64>, <4 x i64> }  %res3, <4 x i64> %res1, 1
1052  %res5 = insertvalue { <4 x i64>, <4 x i64>, <4 x i64> }  %res4, <4 x i64> %res2, 2
1053  ret { <4 x i64>, <4 x i64>, <4 x i64> } %res5
1054}
1055declare <4 x i64> @llvm.x86.avx512.mask.vpshrd.q.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8)
1056
1057define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshrd_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) {
1058; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_128:
1059; X86:       # %bb.0:
1060; X86-NEXT:    vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
1061; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1062; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1063; X86-NEXT:    vpshrdw $6, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xe1,0x06]
1064; X86-NEXT:    vpshrdw $7, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xd9,0x07]
1065; X86-NEXT:    vpshrdw $8, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x72,0xd1,0x08]
1066; X86-NEXT:    vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
1067; X86-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1068; X86-NEXT:    retl # encoding: [0xc3]
1069;
1070; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_128:
1071; X64:       # %bb.0:
1072; X64-NEXT:    vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
1073; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1074; X64-NEXT:    vpshrdw $6, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xe1,0x06]
1075; X64-NEXT:    vpshrdw $7, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xd9,0x07]
1076; X64-NEXT:    vpshrdw $8, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x72,0xd1,0x08]
1077; X64-NEXT:    vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
1078; X64-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1079; X64-NEXT:    retq # encoding: [0xc3]
1080  %res0 = call <8 x i16> @llvm.x86.avx512.mask.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 6, <8 x i16> %x3, i8 %x4)
1081  %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 7, <8 x i16> %x3, i8 -1)
1082  %res2 = call <8 x i16> @llvm.x86.avx512.mask.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 8, <8 x i16> zeroinitializer, i8 %x4)
1083  %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0
1084  %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> }  %res3, <8 x i16> %res1, 1
1085  %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> }  %res4, <8 x i16> %res2, 2
1086  ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5
1087}
1088declare <8 x i16> @llvm.x86.avx512.mask.vpshrd.w.128(<8 x i16>, <8 x i16>, i32, <8 x i16>, i8)
1089
1090define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshrd_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) {
1091; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_256:
1092; X86:       # %bb.0:
1093; X86-NEXT:    vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2]
1094; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1095; X86-NEXT:    vpshrdw $6, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xe1,0x06]
1096; X86-NEXT:    vpshrdw $7, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xd9,0x07]
1097; X86-NEXT:    vpshrdw $8, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x72,0xd1,0x08]
1098; X86-NEXT:    vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4]
1099; X86-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1100; X86-NEXT:    retl # encoding: [0xc3]
1101;
1102; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_256:
1103; X64:       # %bb.0:
1104; X64-NEXT:    vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2]
1105; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1106; X64-NEXT:    vpshrdw $6, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xe1,0x06]
1107; X64-NEXT:    vpshrdw $7, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xd9,0x07]
1108; X64-NEXT:    vpshrdw $8, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x72,0xd1,0x08]
1109; X64-NEXT:    vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4]
1110; X64-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1111; X64-NEXT:    retq # encoding: [0xc3]
1112  %res0 = call <16 x i16> @llvm.x86.avx512.mask.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 6, <16 x i16> %x3, i16 %x4)
1113  %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 7, <16 x i16> %x3, i16 -1)
1114  %res2 = call <16 x i16> @llvm.x86.avx512.mask.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 8, <16 x i16> zeroinitializer, i16 %x4)
1115  %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res0, 0
1116  %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> }  %res3, <16 x i16> %res1, 1
1117  %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> }  %res4, <16 x i16> %res2, 2
1118  ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5
1119}
1120declare <16 x i16> @llvm.x86.avx512.mask.vpshrd.w.256(<16 x i16>, <16 x i16>, i32, <16 x i16>, i16)
1121
1122define { <4 x i32>, <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpshld_d_128_2(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
1123; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_128_2:
1124; X86:       # %bb.0:
1125; X86-NEXT:    vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
1126; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1127; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1128; X86-NEXT:    vpshldd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xe1,0x16]
1129; X86-NEXT:    vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17]
1130; X86-NEXT:    vpshldd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xd1,0x18]
1131; X86-NEXT:    vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
1132; X86-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1133; X86-NEXT:    retl # encoding: [0xc3]
1134;
1135; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_128_2:
1136; X64:       # %bb.0:
1137; X64-NEXT:    vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
1138; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1139; X64-NEXT:    vpshldd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xe1,0x16]
1140; X64-NEXT:    vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17]
1141; X64-NEXT:    vpshldd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xd1,0x18]
1142; X64-NEXT:    vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
1143; X64-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1144; X64-NEXT:    retq # encoding: [0xc3]
1145  %1 = call <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22)
1146  %2 = bitcast i8 %x4 to <8 x i1>
1147  %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1148  %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x3
1149  %4 = call <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 23)
1150  %5 = call <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 24)
1151  %6 = bitcast i8 %x4 to <8 x i1>
1152  %extract = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1153  %7 = select <4 x i1> %extract, <4 x i32> %5, <4 x i32> zeroinitializer
1154  %res3 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } poison, <4 x i32> %3, 0
1155  %res4 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> }  %res3, <4 x i32> %4, 1
1156  %res5 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> }  %res4, <4 x i32> %7, 2
1157  ret { <4 x i32>, <4 x i32>, <4 x i32> } %res5
1158}
1159declare <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32>, <4 x i32>, i32)
1160
1161define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshld_d_256_2(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
1162; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_256_2:
1163; X86:       # %bb.0:
1164; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1165; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1166; X86-NEXT:    vpshldd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xd1,0x16]
1167; X86-NEXT:    vpshldd $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xc9,0x17]
1168; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1169; X86-NEXT:    retl # encoding: [0xc3]
1170;
1171; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_256_2:
1172; X64:       # %bb.0:
1173; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1174; X64-NEXT:    vpshldd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xd1,0x16]
1175; X64-NEXT:    vpshldd $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xc9,0x17]
1176; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1177; X64-NEXT:    retq # encoding: [0xc3]
1178  %1 = call <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22)
1179  %2 = bitcast i8 %x4 to <8 x i1>
1180  %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x3
1181  %4 = call <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 23)
1182  %res2 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %3, 0
1183  %res3 = insertvalue { <8 x i32>, <8 x i32> }  %res2, <8 x i32> %4, 1
1184  ret { <8 x i32>, <8 x i32> } %res3
1185}
1186declare <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32>, <8 x i32>, i32)
1187
1188define { <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshld_q_128_2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
1189; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_128_2:
1190; X86:       # %bb.0:
1191; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1192; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1193; X86-NEXT:    vpshldq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xd1,0x16]
1194; X86-NEXT:    vpshldq $23, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xc9,0x17]
1195; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1196; X86-NEXT:    retl # encoding: [0xc3]
1197;
1198; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_128_2:
1199; X64:       # %bb.0:
1200; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1201; X64-NEXT:    vpshldq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xd1,0x16]
1202; X64-NEXT:    vpshldq $23, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xc9,0x17]
1203; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1204; X64-NEXT:    retq # encoding: [0xc3]
1205  %1 = call <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22)
1206  %2 = bitcast i8 %x4 to <8 x i1>
1207  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
1208  %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x3
1209  %4 = call <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 23)
1210  %res2 = insertvalue { <2 x i64>, <2 x i64> } poison, <2 x i64> %3, 0
1211  %res3 = insertvalue { <2 x i64>, <2 x i64> }  %res2, <2 x i64> %4, 1
1212  ret { <2 x i64>, <2 x i64> } %res3
1213}
1214declare <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64>, <2 x i64>, i32)
1215
1216define { <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshld_q_256_2(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
1217; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_256_2:
1218; X86:       # %bb.0:
1219; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1220; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1221; X86-NEXT:    vpshldq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xd1,0x16]
1222; X86-NEXT:    vpshldq $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xc9,0x17]
1223; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1224; X86-NEXT:    retl # encoding: [0xc3]
1225;
1226; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_256_2:
1227; X64:       # %bb.0:
1228; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1229; X64-NEXT:    vpshldq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xd1,0x16]
1230; X64-NEXT:    vpshldq $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xc9,0x17]
1231; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1232; X64-NEXT:    retq # encoding: [0xc3]
1233  %1 = call <4 x i64> @llvm.x86.avx512.vpshld.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22)
1234  %2 = bitcast i8 %x4 to <8 x i1>
1235  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1236  %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x3
1237  %4 = call <4 x i64> @llvm.x86.avx512.vpshld.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 23)
1238  %res2 = insertvalue { <4 x i64>, <4 x i64> } poison, <4 x i64> %3, 0
1239  %res3 = insertvalue { <4 x i64>, <4 x i64> }  %res2, <4 x i64> %4, 1
1240  ret { <4 x i64>, <4 x i64> } %res3
1241}
1242declare <4 x i64> @llvm.x86.avx512.vpshld.q.256(<4 x i64>, <4 x i64>, i32)
1243
1244define { <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshld_w_128_2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) {
1245; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_128_2:
1246; X86:       # %bb.0:
1247; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1248; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1249; X86-NEXT:    vpshldw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x06]
1250; X86-NEXT:    vpshldw $7, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xc9,0x07]
1251; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1252; X86-NEXT:    retl # encoding: [0xc3]
1253;
1254; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_128_2:
1255; X64:       # %bb.0:
1256; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1257; X64-NEXT:    vpshldw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x06]
1258; X64-NEXT:    vpshldw $7, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xc9,0x07]
1259; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1260; X64-NEXT:    retq # encoding: [0xc3]
1261  %1 = call <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 6)
1262  %2 = bitcast i8 %x4 to <8 x i1>
1263  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x3
1264  %4 = call <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 7)
1265  %res2 = insertvalue { <8 x i16>, <8 x i16> } poison, <8 x i16> %3, 0
1266  %res3 = insertvalue { <8 x i16>, <8 x i16> }  %res2, <8 x i16> %4, 1
1267  ret { <8 x i16>, <8 x i16> } %res3
1268}
1269declare <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16>, <8 x i16>, i32)
1270
1271define { <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshld_w_256_2(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) {
1272; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_256_2:
1273; X86:       # %bb.0:
1274; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1275; X86-NEXT:    vpshldw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x06]
1276; X86-NEXT:    vpshldw $7, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xc9,0x07]
1277; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1278; X86-NEXT:    retl # encoding: [0xc3]
1279;
1280; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_256_2:
1281; X64:       # %bb.0:
1282; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1283; X64-NEXT:    vpshldw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x06]
1284; X64-NEXT:    vpshldw $7, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xc9,0x07]
1285; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1286; X64-NEXT:    retq # encoding: [0xc3]
1287  %1 = call <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 6)
1288  %2 = bitcast i16 %x4 to <16 x i1>
1289  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x3
1290  %4 = call <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 7)
1291  %res2 = insertvalue { <16 x i16>, <16 x i16> } poison, <16 x i16> %3, 0
1292  %res3 = insertvalue { <16 x i16>, <16 x i16> }  %res2, <16 x i16> %4, 1
1293  ret { <16 x i16>, <16 x i16> } %res3
1294}
1295declare <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16>, <16 x i16>, i32)
1296
1297define { <4 x i32>, <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpshrd_d_128_2(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
1298; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_128_2:
1299; X86:       # %bb.0:
1300; X86-NEXT:    vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
1301; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1302; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1303; X86-NEXT:    vpshrdd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xe1,0x16]
1304; X86-NEXT:    vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17]
1305; X86-NEXT:    vpshrdd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xd1,0x18]
1306; X86-NEXT:    vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
1307; X86-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1308; X86-NEXT:    retl # encoding: [0xc3]
1309;
1310; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_128_2:
1311; X64:       # %bb.0:
1312; X64-NEXT:    vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
1313; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1314; X64-NEXT:    vpshrdd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xe1,0x16]
1315; X64-NEXT:    vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17]
1316; X64-NEXT:    vpshrdd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xd1,0x18]
1317; X64-NEXT:    vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
1318; X64-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1319; X64-NEXT:    retq # encoding: [0xc3]
1320  %1 = call <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22)
1321  %2 = bitcast i8 %x4 to <8 x i1>
1322  %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1323  %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x3
1324  %4 = call <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 23)
1325  %5 = call <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 24)
1326  %6 = bitcast i8 %x4 to <8 x i1>
1327  %extract = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1328  %7 = select <4 x i1> %extract, <4 x i32> %5, <4 x i32> zeroinitializer
1329  %res3 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } poison, <4 x i32> %3, 0
1330  %res4 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> }  %res3, <4 x i32> %4, 1
1331  %res5 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> }  %res4, <4 x i32> %7, 2
1332  ret { <4 x i32>, <4 x i32>, <4 x i32> } %res5
1333}
1334declare <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32>, <4 x i32>, i32)
1335
1336define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshrd_d_256_2(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
1337; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_256_2:
1338; X86:       # %bb.0:
1339; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1340; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1341; X86-NEXT:    vpshrdd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xd1,0x16]
1342; X86-NEXT:    vpshrdd $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xc9,0x17]
1343; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1344; X86-NEXT:    retl # encoding: [0xc3]
1345;
1346; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_256_2:
1347; X64:       # %bb.0:
1348; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1349; X64-NEXT:    vpshrdd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xd1,0x16]
1350; X64-NEXT:    vpshrdd $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xc9,0x17]
1351; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1352; X64-NEXT:    retq # encoding: [0xc3]
1353  %1 = call <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22)
1354  %2 = bitcast i8 %x4 to <8 x i1>
1355  %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x3
1356  %4 = call <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 23)
1357  %res2 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %3, 0
1358  %res3 = insertvalue { <8 x i32>, <8 x i32> }  %res2, <8 x i32> %4, 1
1359  ret { <8 x i32>, <8 x i32> } %res3
1360}
1361declare <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32>, <8 x i32>, i32)
1362
1363define { <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshrd_q_128_2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
1364; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_128_2:
1365; X86:       # %bb.0:
1366; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1367; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1368; X86-NEXT:    vpshrdq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xd1,0x16]
1369; X86-NEXT:    vpshrdq $23, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xc9,0x17]
1370; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1371; X86-NEXT:    retl # encoding: [0xc3]
1372;
1373; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_128_2:
1374; X64:       # %bb.0:
1375; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1376; X64-NEXT:    vpshrdq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xd1,0x16]
1377; X64-NEXT:    vpshrdq $23, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xc9,0x17]
1378; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1379; X64-NEXT:    retq # encoding: [0xc3]
1380  %1 = call <2 x i64> @llvm.x86.avx512.vpshrd.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22)
1381  %2 = bitcast i8 %x4 to <8 x i1>
1382  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
1383  %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x3
1384  %4 = call <2 x i64> @llvm.x86.avx512.vpshrd.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 23)
1385  %res2 = insertvalue { <2 x i64>, <2 x i64> } poison, <2 x i64> %3, 0
1386  %res3 = insertvalue { <2 x i64>, <2 x i64> }  %res2, <2 x i64> %4, 1
1387  ret { <2 x i64>, <2 x i64> } %res3
1388}
1389declare <2 x i64> @llvm.x86.avx512.vpshrd.q.128(<2 x i64>, <2 x i64>, i32)
1390
1391define { <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshrd_q_256_2(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
1392; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_256_2:
1393; X86:       # %bb.0:
1394; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1395; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1396; X86-NEXT:    vpshrdq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xd1,0x16]
1397; X86-NEXT:    vpshrdq $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xc9,0x17]
1398; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1399; X86-NEXT:    retl # encoding: [0xc3]
1400;
1401; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_256_2:
1402; X64:       # %bb.0:
1403; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1404; X64-NEXT:    vpshrdq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xd1,0x16]
1405; X64-NEXT:    vpshrdq $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xc9,0x17]
1406; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1407; X64-NEXT:    retq # encoding: [0xc3]
1408  %1 = call <4 x i64> @llvm.x86.avx512.vpshrd.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22)
1409  %2 = bitcast i8 %x4 to <8 x i1>
1410  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1411  %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x3
1412  %4 = call <4 x i64> @llvm.x86.avx512.vpshrd.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 23)
1413  %res2 = insertvalue { <4 x i64>, <4 x i64> } poison, <4 x i64> %3, 0
1414  %res3 = insertvalue { <4 x i64>, <4 x i64> }  %res2, <4 x i64> %4, 1
1415  ret { <4 x i64>, <4 x i64> } %res3
1416}
1417declare <4 x i64> @llvm.x86.avx512.vpshrd.q.256(<4 x i64>, <4 x i64>, i32)
1418
1419define { <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshrd_w_128_2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) {
1420; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_128_2:
1421; X86:       # %bb.0:
1422; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1423; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1424; X86-NEXT:    vpshrdw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x06]
1425; X86-NEXT:    vpshrdw $7, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xc9,0x07]
1426; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1427; X86-NEXT:    retl # encoding: [0xc3]
1428;
1429; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_128_2:
1430; X64:       # %bb.0:
1431; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1432; X64-NEXT:    vpshrdw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x06]
1433; X64-NEXT:    vpshrdw $7, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xc9,0x07]
1434; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1435; X64-NEXT:    retq # encoding: [0xc3]
1436  %1 = call <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 6)
1437  %2 = bitcast i8 %x4 to <8 x i1>
1438  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x3
1439  %4 = call <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 7)
1440  %res2 = insertvalue { <8 x i16>, <8 x i16> } poison, <8 x i16> %3, 0
1441  %res3 = insertvalue { <8 x i16>, <8 x i16> }  %res2, <8 x i16> %4, 1
1442  ret { <8 x i16>, <8 x i16> } %res3
1443}
1444declare <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16>, <8 x i16>, i32)
1445
1446define { <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshrd_w_256_2(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) {
1447; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_256_2:
1448; X86:       # %bb.0:
1449; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1450; X86-NEXT:    vpshrdw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x06]
1451; X86-NEXT:    vpshrdw $7, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xc9,0x07]
1452; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1453; X86-NEXT:    retl # encoding: [0xc3]
1454;
1455; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_256_2:
1456; X64:       # %bb.0:
1457; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1458; X64-NEXT:    vpshrdw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x06]
1459; X64-NEXT:    vpshrdw $7, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xc9,0x07]
1460; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1461; X64-NEXT:    retq # encoding: [0xc3]
1462  %1 = call <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 6)
1463  %2 = bitcast i16 %x4 to <16 x i1>
1464  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x3
1465  %4 = call <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 7)
1466  %res2 = insertvalue { <16 x i16>, <16 x i16> } poison, <16 x i16> %3, 0
1467  %res3 = insertvalue { <16 x i16>, <16 x i16> }  %res2, <16 x i16> %4, 1
1468  ret { <16 x i16>, <16 x i16> } %res3
1469}
1470declare <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16>, <16 x i16>, i32)
1471
1472declare <8 x i32> @llvm.x86.avx512.mask.vpshrdv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1473declare <8 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1474
1475define <8 x i32>@test_int_x86_avx512_vpshrdv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
1476; CHECK-LABEL: test_int_x86_avx512_vpshrdv_d_256:
1477; CHECK:       # %bb.0:
1478; CHECK-NEXT:    vpshrdvd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x73,0xc2]
1479; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1480  %res = call <8 x i32> @llvm.x86.avx512.mask.vpshrdv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
1481  ret <8 x i32> %res
1482}
1483
1484define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshrdv_d_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4, i8 %x3) {
1485; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_256:
1486; X86:       # %bb.0:
1487; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1488; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1489; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1490; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1491; X86-NEXT:    vpshrdvd (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x73,0x00]
1492; X86-NEXT:    vpshrdvd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x73,0xda]
1493; X86-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1494; X86-NEXT:    retl # encoding: [0xc3]
1495;
1496; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_256:
1497; X64:       # %bb.0:
1498; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1499; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1500; X64-NEXT:    vpshrdvd (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x73,0x07]
1501; X64-NEXT:    vpshrdvd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x73,0xda]
1502; X64-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1503; X64-NEXT:    retq # encoding: [0xc3]
1504  %x2 = load <8 x i32>, ptr %x2p
1505  %res0 = call <8 x i32> @llvm.x86.avx512.mask.vpshrdv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
1506  %res1 = call <8 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8  %x3)
1507  %res2 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %res0, 0
1508  %res3 = insertvalue { <8 x i32>, <8 x i32> }  %res2, <8 x i32> %res1, 1
1509  ret { <8 x i32>, <8 x i32> } %res3
1510}
1511
1512declare <4 x i32> @llvm.x86.avx512.mask.vpshrdv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1513declare <4 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1514
1515define <4 x i32>@test_int_x86_avx512_vpshrdv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
1516; CHECK-LABEL: test_int_x86_avx512_vpshrdv_d_128:
1517; CHECK:       # %bb.0:
1518; CHECK-NEXT:    vpshrdvd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x73,0xc2]
1519; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1520  %res = call <4 x i32> @llvm.x86.avx512.mask.vpshrdv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
1521  ret <4 x i32> %res
1522}
1523
1524define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpshrdv_d_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4, i8 %x3) {
1525; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_128:
1526; X86:       # %bb.0:
1527; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1528; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1529; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1530; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1531; X86-NEXT:    vpshrdvd (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x73,0x00]
1532; X86-NEXT:    vpshrdvd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x73,0xda]
1533; X86-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1534; X86-NEXT:    retl # encoding: [0xc3]
1535;
1536; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_128:
1537; X64:       # %bb.0:
1538; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1539; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1540; X64-NEXT:    vpshrdvd (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x73,0x07]
1541; X64-NEXT:    vpshrdvd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x73,0xda]
1542; X64-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1543; X64-NEXT:    retq # encoding: [0xc3]
1544  %x2 = load <4 x i32>, ptr %x2p
1545  %res0 = call <4 x i32> @llvm.x86.avx512.mask.vpshrdv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
1546  %res1 = call <4 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8  %x3)
1547  %res2 = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> %res0, 0
1548  %res3 = insertvalue { <4 x i32>, <4 x i32> }  %res2, <4 x i32> %res1, 1
1549  ret { <4 x i32>, <4 x i32> } %res3
1550}
1551
1552declare <4 x i64> @llvm.x86.avx512.mask.vpshrdv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
1553declare <4 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
1554
1555define <4 x i64>@test_int_x86_avx512_vpshrdv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
1556; CHECK-LABEL: test_int_x86_avx512_vpshrdv_q_256:
1557; CHECK:       # %bb.0:
1558; CHECK-NEXT:    vpshrdvq %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x73,0xc2]
1559; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1560  %res = call <4 x i64> @llvm.x86.avx512.mask.vpshrdv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
1561  ret <4 x i64> %res
1562}
1563
1564define { <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshrdv_q_256(<4 x i64> %x0, <4 x i64> %x1, ptr %x2p, <4 x i64> %x4, i8 %x3) {
1565; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_256:
1566; X86:       # %bb.0:
1567; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1568; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1569; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1570; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1571; X86-NEXT:    vpshrdvq (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x73,0x00]
1572; X86-NEXT:    vpshrdvq %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x73,0xda]
1573; X86-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1574; X86-NEXT:    retl # encoding: [0xc3]
1575;
1576; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_256:
1577; X64:       # %bb.0:
1578; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1579; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1580; X64-NEXT:    vpshrdvq (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x73,0x07]
1581; X64-NEXT:    vpshrdvq %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x73,0xda]
1582; X64-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1583; X64-NEXT:    retq # encoding: [0xc3]
1584  %x2 = load <4 x i64>, ptr %x2p
1585  %res0 = call <4 x i64> @llvm.x86.avx512.mask.vpshrdv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
1586  %res1 = call <4 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x4, i8  %x3)
1587  %res2 = insertvalue { <4 x i64>, <4 x i64> } poison, <4 x i64> %res0, 0
1588  %res3 = insertvalue { <4 x i64>, <4 x i64> }  %res2, <4 x i64> %res1, 1
1589  ret { <4 x i64>, <4 x i64> } %res3
1590}
1591
1592declare <2 x i64> @llvm.x86.avx512.mask.vpshrdv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
1593declare <2 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
1594
1595define <2 x i64>@test_int_x86_avx512_vpshrdv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
1596; CHECK-LABEL: test_int_x86_avx512_vpshrdv_q_128:
1597; CHECK:       # %bb.0:
1598; CHECK-NEXT:    vpshrdvq %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x73,0xc2]
1599; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1600  %res = call <2 x i64> @llvm.x86.avx512.mask.vpshrdv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
1601  ret <2 x i64> %res
1602}
1603
1604define { <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshrdv_q_128(<2 x i64> %x0, <2 x i64> %x1, ptr %x2p, <2 x i64> %x4, i8 %x3) {
1605; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_128:
1606; X86:       # %bb.0:
1607; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1608; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1609; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1610; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1611; X86-NEXT:    vpshrdvq (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x73,0x00]
1612; X86-NEXT:    vpshrdvq %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x73,0xda]
1613; X86-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1614; X86-NEXT:    retl # encoding: [0xc3]
1615;
1616; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_128:
1617; X64:       # %bb.0:
1618; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1619; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1620; X64-NEXT:    vpshrdvq (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x73,0x07]
1621; X64-NEXT:    vpshrdvq %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x73,0xda]
1622; X64-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1623; X64-NEXT:    retq # encoding: [0xc3]
1624  %x2 = load <2 x i64>, ptr %x2p
1625  %res0 = call <2 x i64> @llvm.x86.avx512.mask.vpshrdv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
1626  %res1 = call <2 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x4, i8  %x3)
1627  %res2 = insertvalue { <2 x i64>, <2 x i64> } poison, <2 x i64> %res0, 0
1628  %res3 = insertvalue { <2 x i64>, <2 x i64> }  %res2, <2 x i64> %res1, 1
1629  ret { <2 x i64>, <2 x i64> } %res3
1630}
1631
1632declare <16 x i16> @llvm.x86.avx512.mask.vpshrdv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1633declare <16 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1634
1635define <16 x i16>@test_int_x86_avx512_vpshrdv_w_256(<16 x i16> %x0, <16 x i16> %x1,  <16 x i16> %x2) {
1636; CHECK-LABEL: test_int_x86_avx512_vpshrdv_w_256:
1637; CHECK:       # %bb.0:
1638; CHECK-NEXT:    vpshrdvw %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x72,0xc2]
1639; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1640  %res = call <16 x i16> @llvm.x86.avx512.mask.vpshrdv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
1641  ret <16 x i16> %res
1642}
1643
1644define { <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshrdv_w_256(<16 x i16> %x0, <16 x i16> %x1, ptr %x2p, <16 x i16> %x4, i16 %x3) {
1645; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_256:
1646; X86:       # %bb.0:
1647; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1648; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1649; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1650; X86-NEXT:    vpshrdvw (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x72,0x00]
1651; X86-NEXT:    vpshrdvw %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x72,0xda]
1652; X86-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1653; X86-NEXT:    retl # encoding: [0xc3]
1654;
1655; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_256:
1656; X64:       # %bb.0:
1657; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1658; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1659; X64-NEXT:    vpshrdvw (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x72,0x07]
1660; X64-NEXT:    vpshrdvw %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x72,0xda]
1661; X64-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1662; X64-NEXT:    retq # encoding: [0xc3]
1663  %x2 = load <16 x i16>, ptr %x2p
1664  %res0 = call <16 x i16> @llvm.x86.avx512.mask.vpshrdv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
1665  %res1 = call <16 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x4, i16  %x3)
1666  %res2 = insertvalue { <16 x i16>, <16 x i16> } poison, <16 x i16> %res0, 0
1667  %res3 = insertvalue { <16 x i16>, <16 x i16> }  %res2, <16 x i16> %res1, 1
1668  ret { <16 x i16>, <16 x i16> } %res3
1669}
1670
1671declare <8 x i16> @llvm.x86.avx512.mask.vpshrdv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1672declare <8 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1673
1674define <8 x i16>@test_int_x86_avx512_vpshrdv_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
1675; CHECK-LABEL: test_int_x86_avx512_vpshrdv_w_128:
1676; CHECK:       # %bb.0:
1677; CHECK-NEXT:    vpshrdvw %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x72,0xc2]
1678; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1679  %res = call <8 x i16> @llvm.x86.avx512.mask.vpshrdv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
1680  ret <8 x i16> %res
1681}
1682
1683define { <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshrdv_w_128(<8 x i16> %x0, <8 x i16> %x1, ptr %x2p, <8 x i16> %x4, i8 %x3) {
1684; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_128:
1685; X86:       # %bb.0:
1686; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1687; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1688; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1689; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1690; X86-NEXT:    vpshrdvw (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x72,0x00]
1691; X86-NEXT:    vpshrdvw %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x72,0xda]
1692; X86-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1693; X86-NEXT:    retl # encoding: [0xc3]
1694;
1695; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_128:
1696; X64:       # %bb.0:
1697; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1698; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1699; X64-NEXT:    vpshrdvw (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x72,0x07]
1700; X64-NEXT:    vpshrdvw %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x72,0xda]
1701; X64-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1702; X64-NEXT:    retq # encoding: [0xc3]
1703  %x2 = load <8 x i16>, ptr %x2p
1704  %res0 = call <8 x i16> @llvm.x86.avx512.mask.vpshrdv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
1705  %res1 = call <8 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x4, i8  %x3)
1706  %res2 = insertvalue { <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0
1707  %res3 = insertvalue { <8 x i16>, <8 x i16> }  %res2, <8 x i16> %res1, 1
1708  ret { <8 x i16>, <8 x i16> } %res3
1709}
1710
1711declare <8 x i32> @llvm.x86.avx512.mask.vpshldv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1712declare <8 x i32> @llvm.x86.avx512.maskz.vpshldv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1713
1714define <8 x i32>@test_int_x86_avx512_vpshldv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
1715; CHECK-LABEL: test_int_x86_avx512_vpshldv_d_256:
1716; CHECK:       # %bb.0:
1717; CHECK-NEXT:    vpshldvd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x71,0xc2]
1718; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1719  %res = call <8 x i32> @llvm.x86.avx512.mask.vpshldv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
1720  ret <8 x i32> %res
1721}
1722
1723define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshldv_d_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4, i8 %x3) {
1724; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_256:
1725; X86:       # %bb.0:
1726; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1727; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1728; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1729; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1730; X86-NEXT:    vpshldvd (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x71,0x00]
1731; X86-NEXT:    vpshldvd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x71,0xda]
1732; X86-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1733; X86-NEXT:    retl # encoding: [0xc3]
1734;
1735; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_256:
1736; X64:       # %bb.0:
1737; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1738; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1739; X64-NEXT:    vpshldvd (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x71,0x07]
1740; X64-NEXT:    vpshldvd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x71,0xda]
1741; X64-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1742; X64-NEXT:    retq # encoding: [0xc3]
1743  %x2 = load <8 x i32>, ptr %x2p
1744  %res0 = call <8 x i32> @llvm.x86.avx512.mask.vpshldv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
1745  %res1 = call <8 x i32> @llvm.x86.avx512.maskz.vpshldv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8  %x3)
1746  %res2 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %res0, 0
1747  %res3 = insertvalue { <8 x i32>, <8 x i32> }  %res2, <8 x i32> %res1, 1
1748  ret { <8 x i32>, <8 x i32> } %res3
1749}
1750
1751declare <4 x i32> @llvm.x86.avx512.mask.vpshldv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1752declare <4 x i32> @llvm.x86.avx512.maskz.vpshldv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1753
1754define <4 x i32>@test_int_x86_avx512_vpshldv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
1755; CHECK-LABEL: test_int_x86_avx512_vpshldv_d_128:
1756; CHECK:       # %bb.0:
1757; CHECK-NEXT:    vpshldvd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x71,0xc2]
1758; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1759  %res = call <4 x i32> @llvm.x86.avx512.mask.vpshldv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
1760  ret <4 x i32> %res
1761}
1762
1763define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpshldv_d_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4, i8 %x3) {
1764; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_128:
1765; X86:       # %bb.0:
1766; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1767; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1768; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1769; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1770; X86-NEXT:    vpshldvd (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x71,0x00]
1771; X86-NEXT:    vpshldvd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x71,0xda]
1772; X86-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1773; X86-NEXT:    retl # encoding: [0xc3]
1774;
1775; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_128:
1776; X64:       # %bb.0:
1777; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1778; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1779; X64-NEXT:    vpshldvd (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x71,0x07]
1780; X64-NEXT:    vpshldvd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x71,0xda]
1781; X64-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1782; X64-NEXT:    retq # encoding: [0xc3]
1783  %x2 = load <4 x i32>, ptr %x2p
1784  %res0 = call <4 x i32> @llvm.x86.avx512.mask.vpshldv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
1785  %res1 = call <4 x i32> @llvm.x86.avx512.maskz.vpshldv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8  %x3)
1786  %res2 = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> %res0, 0
1787  %res3 = insertvalue { <4 x i32>, <4 x i32> }  %res2, <4 x i32> %res1, 1
1788  ret { <4 x i32>, <4 x i32> } %res3
1789}
1790
1791declare <4 x i64> @llvm.x86.avx512.mask.vpshldv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
1792declare <4 x i64> @llvm.x86.avx512.maskz.vpshldv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
1793
1794define <4 x i64>@test_int_x86_avx512_vpshldv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
1795; CHECK-LABEL: test_int_x86_avx512_vpshldv_q_256:
1796; CHECK:       # %bb.0:
1797; CHECK-NEXT:    vpshldvq %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x71,0xc2]
1798; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1799  %res = call <4 x i64> @llvm.x86.avx512.mask.vpshldv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
1800  ret <4 x i64> %res
1801}
1802
1803define { <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshldv_q_256(<4 x i64> %x0, <4 x i64> %x1, ptr %x2p, <4 x i64> %x4, i8 %x3) {
1804; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_256:
1805; X86:       # %bb.0:
1806; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1807; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1808; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1809; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1810; X86-NEXT:    vpshldvq (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x71,0x00]
1811; X86-NEXT:    vpshldvq %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x71,0xda]
1812; X86-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1813; X86-NEXT:    retl # encoding: [0xc3]
1814;
1815; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_256:
1816; X64:       # %bb.0:
1817; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1818; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1819; X64-NEXT:    vpshldvq (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x71,0x07]
1820; X64-NEXT:    vpshldvq %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x71,0xda]
1821; X64-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1822; X64-NEXT:    retq # encoding: [0xc3]
1823  %x2 = load <4 x i64>, ptr %x2p
1824  %res0 = call <4 x i64> @llvm.x86.avx512.mask.vpshldv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
1825  %res1 = call <4 x i64> @llvm.x86.avx512.maskz.vpshldv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x4, i8  %x3)
1826  %res2 = insertvalue { <4 x i64>, <4 x i64> } poison, <4 x i64> %res0, 0
1827  %res3 = insertvalue { <4 x i64>, <4 x i64> }  %res2, <4 x i64> %res1, 1
1828  ret { <4 x i64>, <4 x i64> } %res3
1829}
1830
1831declare <2 x i64> @llvm.x86.avx512.mask.vpshldv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
1832declare <2 x i64> @llvm.x86.avx512.maskz.vpshldv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
1833
1834define <2 x i64>@test_int_x86_avx512_vpshldv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
1835; CHECK-LABEL: test_int_x86_avx512_vpshldv_q_128:
1836; CHECK:       # %bb.0:
1837; CHECK-NEXT:    vpshldvq %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x71,0xc2]
1838; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1839  %res = call <2 x i64> @llvm.x86.avx512.mask.vpshldv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
1840  ret <2 x i64> %res
1841}
1842
1843define { <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshldv_q_128(<2 x i64> %x0, <2 x i64> %x1, ptr %x2p, <2 x i64> %x4, i8 %x3) {
1844; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_128:
1845; X86:       # %bb.0:
1846; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1847; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1848; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1849; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1850; X86-NEXT:    vpshldvq (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x71,0x00]
1851; X86-NEXT:    vpshldvq %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x71,0xda]
1852; X86-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1853; X86-NEXT:    retl # encoding: [0xc3]
1854;
1855; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_128:
1856; X64:       # %bb.0:
1857; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1858; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1859; X64-NEXT:    vpshldvq (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x71,0x07]
1860; X64-NEXT:    vpshldvq %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x71,0xda]
1861; X64-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1862; X64-NEXT:    retq # encoding: [0xc3]
1863  %x2 = load <2 x i64>, ptr %x2p
1864  %res0 = call <2 x i64> @llvm.x86.avx512.mask.vpshldv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
1865  %res1 = call <2 x i64> @llvm.x86.avx512.maskz.vpshldv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x4, i8  %x3)
1866  %res2 = insertvalue { <2 x i64>, <2 x i64> } poison, <2 x i64> %res0, 0
1867  %res3 = insertvalue { <2 x i64>, <2 x i64> }  %res2, <2 x i64> %res1, 1
1868  ret { <2 x i64>, <2 x i64> } %res3
1869}
1870
1871declare <16 x i16> @llvm.x86.avx512.mask.vpshldv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1872declare <16 x i16> @llvm.x86.avx512.maskz.vpshldv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1873
1874define <16 x i16>@test_int_x86_avx512_vpshldv_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
1875; CHECK-LABEL: test_int_x86_avx512_vpshldv_w_256:
1876; CHECK:       # %bb.0:
1877; CHECK-NEXT:    vpshldvw %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x70,0xc2]
1878; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1879  %res = call <16 x i16> @llvm.x86.avx512.mask.vpshldv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
1880  ret <16 x i16> %res
1881}
1882
1883define { <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshldv_w_256(<16 x i16> %x0, <16 x i16> %x1, ptr %x2p, <16 x i16> %x4, i16 %x3) {
1884; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_256:
1885; X86:       # %bb.0:
1886; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1887; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1888; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1889; X86-NEXT:    vpshldvw (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x70,0x00]
1890; X86-NEXT:    vpshldvw %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x70,0xda]
1891; X86-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1892; X86-NEXT:    retl # encoding: [0xc3]
1893;
1894; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_256:
1895; X64:       # %bb.0:
1896; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1897; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1898; X64-NEXT:    vpshldvw (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x70,0x07]
1899; X64-NEXT:    vpshldvw %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x70,0xda]
1900; X64-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1901; X64-NEXT:    retq # encoding: [0xc3]
1902  %x2 = load <16 x i16>, ptr %x2p
1903  %res0 = call <16 x i16> @llvm.x86.avx512.mask.vpshldv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
1904  %res1 = call <16 x i16> @llvm.x86.avx512.maskz.vpshldv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x4, i16  %x3)
1905  %res2 = insertvalue { <16 x i16>, <16 x i16> } poison, <16 x i16> %res0, 0
1906  %res3 = insertvalue { <16 x i16>, <16 x i16> }  %res2, <16 x i16> %res1, 1
1907  ret { <16 x i16>, <16 x i16> } %res3
1908}
1909
1910declare <8 x i16> @llvm.x86.avx512.mask.vpshldv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1911declare <8 x i16> @llvm.x86.avx512.maskz.vpshldv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1912
1913define <8 x i16>@test_int_x86_avx512_vpshldv_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
1914; CHECK-LABEL: test_int_x86_avx512_vpshldv_w_128:
1915; CHECK:       # %bb.0:
1916; CHECK-NEXT:    vpshldvw %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x70,0xc2]
1917; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1918  %res = call <8 x i16> @llvm.x86.avx512.mask.vpshldv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
1919  ret <8 x i16> %res
1920}
1921
1922define { <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshldv_w_128(<8 x i16> %x0, <8 x i16> %x1, ptr %x2p, <8 x i16> %x4, i8 %x3) {
1923; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_128:
1924; X86:       # %bb.0:
1925; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1926; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1927; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1928; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1929; X86-NEXT:    vpshldvw (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x70,0x00]
1930; X86-NEXT:    vpshldvw %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x70,0xda]
1931; X86-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1932; X86-NEXT:    retl # encoding: [0xc3]
1933;
1934; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_128:
1935; X64:       # %bb.0:
1936; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1937; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1938; X64-NEXT:    vpshldvw (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x70,0x07]
1939; X64-NEXT:    vpshldvw %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x70,0xda]
1940; X64-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1941; X64-NEXT:    retq # encoding: [0xc3]
1942  %x2 = load <8 x i16>, ptr %x2p
1943  %res0 = call <8 x i16> @llvm.x86.avx512.mask.vpshldv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
1944  %res1 = call <8 x i16> @llvm.x86.avx512.maskz.vpshldv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x4, i8  %x3)
1945  %res2 = insertvalue { <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0
1946  %res3 = insertvalue { <8 x i16>, <8 x i16> }  %res2, <8 x i16> %res1, 1
1947  ret { <8 x i16>, <8 x i16> } %res3
1948}
1949