xref: /llvm-project/llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics.ll (revision 2f448bf509432c1a19ec46ab8cbc7353c03c6280)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
4
5define <8 x i16> @test_mask_expand_load_w_128(ptr %addr, <8 x i16> %data, i8 %mask) {
6; X86-LABEL: test_mask_expand_load_w_128:
7; X86:       # %bb.0:
8; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
10; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
11; X86-NEXT:    vpexpandw (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x00]
12; X86-NEXT:    retl # encoding: [0xc3]
13;
14; X64-LABEL: test_mask_expand_load_w_128:
15; X64:       # %bb.0:
16; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
17; X64-NEXT:    vpexpandw (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x07]
18; X64-NEXT:    retq # encoding: [0xc3]
19  %1 = bitcast i8 %mask to <8 x i1>
20  %2 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr %addr, <8 x i1> %1, <8 x i16> %data)
21  ret <8 x i16> %2
22}
23
24define <8 x i16> @test_maskz_expand_load_w_128(ptr %addr, i8 %mask) {
25; X86-LABEL: test_maskz_expand_load_w_128:
26; X86:       # %bb.0:
27; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
28; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
29; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
30; X86-NEXT:    vpexpandw (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0x00]
31; X86-NEXT:    retl # encoding: [0xc3]
32;
33; X64-LABEL: test_maskz_expand_load_w_128:
34; X64:       # %bb.0:
35; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
36; X64-NEXT:    vpexpandw (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0x07]
37; X64-NEXT:    retq # encoding: [0xc3]
38  %1 = bitcast i8 %mask to <8 x i1>
39  %2 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr %addr, <8 x i1> %1, <8 x i16> zeroinitializer)
40  ret <8 x i16> %2
41}
42
43define <8 x i16> @test_expand_load_w_128(ptr %addr, <8 x i16> %data) {
44; X86-LABEL: test_expand_load_w_128:
45; X86:       # %bb.0:
46; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
47; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
48; X86-NEXT:    vpexpandw (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x00]
49; X86-NEXT:    retl # encoding: [0xc3]
50;
51; X64-LABEL: test_expand_load_w_128:
52; X64:       # %bb.0:
53; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
54; X64-NEXT:    vpexpandw (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x07]
55; X64-NEXT:    retq # encoding: [0xc3]
56  %1 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr %addr, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> %data)
57  ret <8 x i16> %1
58}
59
60define <8 x i16> @test_expand_w_128(<8 x i16> %data) {
61; CHECK-LABEL: test_expand_w_128:
62; CHECK:       # %bb.0:
63; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
64  %1 = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %data, <8 x i16> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
65  ret <8 x i16> %1
66}
67
68define <8 x i16> @test_mask_expand_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) {
69; X86-LABEL: test_mask_expand_w_128:
70; X86:       # %bb.0:
71; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
72; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
73; X86-NEXT:    vpexpandw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0xc8]
74; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
75; X86-NEXT:    retl # encoding: [0xc3]
76;
77; X64-LABEL: test_mask_expand_w_128:
78; X64:       # %bb.0:
79; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
80; X64-NEXT:    vpexpandw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0xc8]
81; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
82; X64-NEXT:    retq # encoding: [0xc3]
83  %1 = bitcast i8 %mask to <8 x i1>
84  %2 = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %data, <8 x i16> %passthru, <8 x i1> %1)
85  ret <8 x i16> %2
86}
87
88define <8 x i16> @test_maskz_expand_w_128(<8 x i16> %data, i8 %mask) {
89; X86-LABEL: test_maskz_expand_w_128:
90; X86:       # %bb.0:
91; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
92; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
93; X86-NEXT:    vpexpandw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0xc0]
94; X86-NEXT:    retl # encoding: [0xc3]
95;
96; X64-LABEL: test_maskz_expand_w_128:
97; X64:       # %bb.0:
98; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
99; X64-NEXT:    vpexpandw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0xc0]
100; X64-NEXT:    retq # encoding: [0xc3]
101  %1 = bitcast i8 %mask to <8 x i1>
102  %2 = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %data, <8 x i16> zeroinitializer, <8 x i1> %1)
103  ret <8 x i16> %2
104}
105
106define <16 x i8> @test_mask_expand_load_b_128(ptr %addr, <16 x i8> %data, i16 %mask) {
107; X86-LABEL: test_mask_expand_load_b_128:
108; X86:       # %bb.0:
109; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
110; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
111; X86-NEXT:    vpexpandb (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x00]
112; X86-NEXT:    retl # encoding: [0xc3]
113;
114; X64-LABEL: test_mask_expand_load_b_128:
115; X64:       # %bb.0:
116; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
117; X64-NEXT:    vpexpandb (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x07]
118; X64-NEXT:    retq # encoding: [0xc3]
119  %1 = bitcast i16 %mask to <16 x i1>
120  %2 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr %addr, <16 x i1> %1, <16 x i8> %data)
121  ret <16 x i8> %2
122}
123
124define <16 x i8> @test_maskz_expand_load_b_128(ptr %addr, i16 %mask) {
125; X86-LABEL: test_maskz_expand_load_b_128:
126; X86:       # %bb.0:
127; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
128; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
129; X86-NEXT:    vpexpandb (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0x00]
130; X86-NEXT:    retl # encoding: [0xc3]
131;
132; X64-LABEL: test_maskz_expand_load_b_128:
133; X64:       # %bb.0:
134; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
135; X64-NEXT:    vpexpandb (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0x07]
136; X64-NEXT:    retq # encoding: [0xc3]
137  %1 = bitcast i16 %mask to <16 x i1>
138  %2 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr %addr, <16 x i1> %1, <16 x i8> zeroinitializer)
139  ret <16 x i8> %2
140}
141
142define <16 x i8> @test_expand_load_b_128(ptr %addr, <16 x i8> %data) {
143; X86-LABEL: test_expand_load_b_128:
144; X86:       # %bb.0:
145; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
146; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
147; X86-NEXT:    vpexpandb (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x00]
148; X86-NEXT:    retl # encoding: [0xc3]
149;
150; X64-LABEL: test_expand_load_b_128:
151; X64:       # %bb.0:
152; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
153; X64-NEXT:    vpexpandb (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x07]
154; X64-NEXT:    retq # encoding: [0xc3]
155  %1 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr %addr, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> %data)
156  ret <16 x i8> %1
157}
158
159define <16 x i8> @test_expand_b_128(<16 x i8> %data) {
160; CHECK-LABEL: test_expand_b_128:
161; CHECK:       # %bb.0:
162; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
163  %1 = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %data, <16 x i8> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
164  ret <16 x i8> %1
165}
166
167define <16 x i8> @test_mask_expand_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) {
168; X86-LABEL: test_mask_expand_b_128:
169; X86:       # %bb.0:
170; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
171; X86-NEXT:    vpexpandb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0xc8]
172; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
173; X86-NEXT:    retl # encoding: [0xc3]
174;
175; X64-LABEL: test_mask_expand_b_128:
176; X64:       # %bb.0:
177; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
178; X64-NEXT:    vpexpandb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0xc8]
179; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
180; X64-NEXT:    retq # encoding: [0xc3]
181  %1 = bitcast i16 %mask to <16 x i1>
182  %2 = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %data, <16 x i8> %passthru, <16 x i1> %1)
183  ret <16 x i8> %2
184}
185
186define <16 x i8> @test_maskz_expand_b_128(<16 x i8> %data, i16 %mask) {
187; X86-LABEL: test_maskz_expand_b_128:
188; X86:       # %bb.0:
189; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
190; X86-NEXT:    vpexpandb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0xc0]
191; X86-NEXT:    retl # encoding: [0xc3]
192;
193; X64-LABEL: test_maskz_expand_b_128:
194; X64:       # %bb.0:
195; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
196; X64-NEXT:    vpexpandb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0xc0]
197; X64-NEXT:    retq # encoding: [0xc3]
198  %1 = bitcast i16 %mask to <16 x i1>
199  %2 = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %data, <16 x i8> zeroinitializer, <16 x i1> %1)
200  ret <16 x i8> %2
201}
202
203define void @test_mask_compress_store_w_128(ptr %addr, <8 x i16> %data, i8 %mask) {
204; X86-LABEL: test_mask_compress_store_w_128:
205; X86:       # %bb.0:
206; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
207; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
208; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
209; X86-NEXT:    vpcompressw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x00]
210; X86-NEXT:    retl # encoding: [0xc3]
211;
212; X64-LABEL: test_mask_compress_store_w_128:
213; X64:       # %bb.0:
214; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
215; X64-NEXT:    vpcompressw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x07]
216; X64-NEXT:    retq # encoding: [0xc3]
217  %1 = bitcast i8 %mask to <8 x i1>
218  call void @llvm.masked.compressstore.v8i16(<8 x i16> %data, ptr %addr, <8 x i1> %1)
219  ret void
220}
221
222define void @test_compress_store_w_128(ptr %addr, <8 x i16> %data) {
223; X86-LABEL: test_compress_store_w_128:
224; X86:       # %bb.0:
225; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
226; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
227; X86-NEXT:    vpcompressw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x00]
228; X86-NEXT:    retl # encoding: [0xc3]
229;
230; X64-LABEL: test_compress_store_w_128:
231; X64:       # %bb.0:
232; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
233; X64-NEXT:    vpcompressw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x07]
234; X64-NEXT:    retq # encoding: [0xc3]
235  call void @llvm.masked.compressstore.v8i16(<8 x i16> %data, ptr %addr, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
236  ret void
237}
238
239define <8 x i16> @test_mask_compress_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) {
240; X86-LABEL: test_mask_compress_w_128:
241; X86:       # %bb.0:
242; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
243; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
244; X86-NEXT:    vpcompressw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0xc1]
245; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
246; X86-NEXT:    retl # encoding: [0xc3]
247;
248; X64-LABEL: test_mask_compress_w_128:
249; X64:       # %bb.0:
250; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
251; X64-NEXT:    vpcompressw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0xc1]
252; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
253; X64-NEXT:    retq # encoding: [0xc3]
254  %1 = bitcast i8 %mask to <8 x i1>
255  %2 = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16> %data, <8 x i16> %passthru, <8 x i1> %1)
256  ret <8 x i16> %2
257}
258
259define <8 x i16> @test_maskz_compress_w_128(<8 x i16> %data, i8 %mask) {
260; X86-LABEL: test_maskz_compress_w_128:
261; X86:       # %bb.0:
262; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
263; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
264; X86-NEXT:    vpcompressw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x63,0xc0]
265; X86-NEXT:    retl # encoding: [0xc3]
266;
267; X64-LABEL: test_maskz_compress_w_128:
268; X64:       # %bb.0:
269; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
270; X64-NEXT:    vpcompressw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x63,0xc0]
271; X64-NEXT:    retq # encoding: [0xc3]
272  %1 = bitcast i8 %mask to <8 x i1>
273  %2 = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16> %data, <8 x i16> zeroinitializer, <8 x i1> %1)
274  ret <8 x i16> %2
275}
276
277define <8 x i16> @test_compress_w_128(<8 x i16> %data) {
278; CHECK-LABEL: test_compress_w_128:
279; CHECK:       # %bb.0:
280; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
281  %1 = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16> %data, <8 x i16> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
282  ret <8 x i16> %1
283}
284
285define void @test_mask_compress_store_b_128(ptr %addr, <16 x i8> %data, i16 %mask) {
286; X86-LABEL: test_mask_compress_store_b_128:
287; X86:       # %bb.0:
288; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
289; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
290; X86-NEXT:    vpcompressb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x00]
291; X86-NEXT:    retl # encoding: [0xc3]
292;
293; X64-LABEL: test_mask_compress_store_b_128:
294; X64:       # %bb.0:
295; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
296; X64-NEXT:    vpcompressb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x07]
297; X64-NEXT:    retq # encoding: [0xc3]
298  %1 = bitcast i16 %mask to <16 x i1>
299  call void @llvm.masked.compressstore.v16i8(<16 x i8> %data, ptr %addr, <16 x i1> %1)
300  ret void
301}
302
303define void @test_compress_store_b_128(ptr %addr, <16 x i8> %data) {
304; X86-LABEL: test_compress_store_b_128:
305; X86:       # %bb.0:
306; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
307; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
308; X86-NEXT:    vpcompressb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x00]
309; X86-NEXT:    retl # encoding: [0xc3]
310;
311; X64-LABEL: test_compress_store_b_128:
312; X64:       # %bb.0:
313; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
314; X64-NEXT:    vpcompressb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x07]
315; X64-NEXT:    retq # encoding: [0xc3]
316  call void @llvm.masked.compressstore.v16i8(<16 x i8> %data, ptr %addr, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
317  ret void
318}
319
320define <16 x i8> @test_mask_compress_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) {
321; X86-LABEL: test_mask_compress_b_128:
322; X86:       # %bb.0:
323; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
324; X86-NEXT:    vpcompressb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0xc1]
325; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
326; X86-NEXT:    retl # encoding: [0xc3]
327;
328; X64-LABEL: test_mask_compress_b_128:
329; X64:       # %bb.0:
330; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
331; X64-NEXT:    vpcompressb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0xc1]
332; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
333; X64-NEXT:    retq # encoding: [0xc3]
334  %1 = bitcast i16 %mask to <16 x i1>
335  %2 = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %data, <16 x i8> %passthru, <16 x i1> %1)
336  ret <16 x i8> %2
337}
338
339define <16 x i8> @test_maskz_compress_b_128(<16 x i8> %data, i16 %mask) {
340; X86-LABEL: test_maskz_compress_b_128:
341; X86:       # %bb.0:
342; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
343; X86-NEXT:    vpcompressb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x63,0xc0]
344; X86-NEXT:    retl # encoding: [0xc3]
345;
346; X64-LABEL: test_maskz_compress_b_128:
347; X64:       # %bb.0:
348; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
349; X64-NEXT:    vpcompressb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x63,0xc0]
350; X64-NEXT:    retq # encoding: [0xc3]
351  %1 = bitcast i16 %mask to <16 x i1>
352  %2 = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %data, <16 x i8> zeroinitializer, <16 x i1> %1)
353  ret <16 x i8> %2
354}
355
356define <16 x i8> @test_compress_b_128(<16 x i8> %data) {
357; CHECK-LABEL: test_compress_b_128:
358; CHECK:       # %bb.0:
359; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
360  %1 = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %data, <16 x i8> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
361  ret <16 x i8> %1
362}
363
364define <16 x i16> @test_mask_expand_load_w_256(ptr %addr, <16 x i16> %data, i16 %mask) {
365; X86-LABEL: test_mask_expand_load_w_256:
366; X86:       # %bb.0:
367; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
368; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
369; X86-NEXT:    vpexpandw (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x00]
370; X86-NEXT:    retl # encoding: [0xc3]
371;
372; X64-LABEL: test_mask_expand_load_w_256:
373; X64:       # %bb.0:
374; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
375; X64-NEXT:    vpexpandw (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x07]
376; X64-NEXT:    retq # encoding: [0xc3]
377  %1 = bitcast i16 %mask to <16 x i1>
378  %2 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr %addr, <16 x i1> %1, <16 x i16> %data)
379  ret <16 x i16> %2
380}
381
382define <16 x i16> @test_maskz_expand_load_w_256(ptr %addr, i16 %mask) {
383; X86-LABEL: test_maskz_expand_load_w_256:
384; X86:       # %bb.0:
385; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
386; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
387; X86-NEXT:    vpexpandw (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0x00]
388; X86-NEXT:    retl # encoding: [0xc3]
389;
390; X64-LABEL: test_maskz_expand_load_w_256:
391; X64:       # %bb.0:
392; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
393; X64-NEXT:    vpexpandw (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0x07]
394; X64-NEXT:    retq # encoding: [0xc3]
395  %1 = bitcast i16 %mask to <16 x i1>
396  %2 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr %addr, <16 x i1> %1, <16 x i16> zeroinitializer)
397  ret <16 x i16> %2
398}
399
400define <16 x i16> @test_expand_load_w_256(ptr %addr, <16 x i16> %data) {
401; X86-LABEL: test_expand_load_w_256:
402; X86:       # %bb.0:
403; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
404; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
405; X86-NEXT:    vpexpandw (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x00]
406; X86-NEXT:    retl # encoding: [0xc3]
407;
408; X64-LABEL: test_expand_load_w_256:
409; X64:       # %bb.0:
410; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
411; X64-NEXT:    vpexpandw (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x07]
412; X64-NEXT:    retq # encoding: [0xc3]
413  %1 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr %addr, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i16> %data)
414  ret <16 x i16> %1
415}
416
417define <16 x i16> @test_expand_w_256(<16 x i16> %data) {
418; CHECK-LABEL: test_expand_w_256:
419; CHECK:       # %bb.0:
420; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
421  %1 = call <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16> %data, <16 x i16> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
422  ret <16 x i16> %1
423}
424
425define <16 x i16> @test_mask_expand_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) {
426; X86-LABEL: test_mask_expand_w_256:
427; X86:       # %bb.0:
428; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
429; X86-NEXT:    vpexpandw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0xc8]
430; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
431; X86-NEXT:    retl # encoding: [0xc3]
432;
433; X64-LABEL: test_mask_expand_w_256:
434; X64:       # %bb.0:
435; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
436; X64-NEXT:    vpexpandw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0xc8]
437; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
438; X64-NEXT:    retq # encoding: [0xc3]
439  %1 = bitcast i16 %mask to <16 x i1>
440  %2 = call <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16> %data, <16 x i16> %passthru, <16 x i1> %1)
441  ret <16 x i16> %2
442}
443
444define <16 x i16> @test_maskz_expand_w_256(<16 x i16> %data, i16 %mask) {
445; X86-LABEL: test_maskz_expand_w_256:
446; X86:       # %bb.0:
447; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
448; X86-NEXT:    vpexpandw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0xc0]
449; X86-NEXT:    retl # encoding: [0xc3]
450;
451; X64-LABEL: test_maskz_expand_w_256:
452; X64:       # %bb.0:
453; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
454; X64-NEXT:    vpexpandw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0xc0]
455; X64-NEXT:    retq # encoding: [0xc3]
456  %1 = bitcast i16 %mask to <16 x i1>
457  %2 = call <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16> %data, <16 x i16> zeroinitializer, <16 x i1> %1)
458  ret <16 x i16> %2
459}
460
461define <32 x i8> @test_mask_expand_load_b_256(ptr %addr, <32 x i8> %data, i32 %mask) {
462; X86-LABEL: test_mask_expand_load_b_256:
463; X86:       # %bb.0:
464; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
465; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
466; X86-NEXT:    vpexpandb (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x00]
467; X86-NEXT:    retl # encoding: [0xc3]
468;
469; X64-LABEL: test_mask_expand_load_b_256:
470; X64:       # %bb.0:
471; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
472; X64-NEXT:    vpexpandb (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x07]
473; X64-NEXT:    retq # encoding: [0xc3]
474  %1 = bitcast i32 %mask to <32 x i1>
475  %2 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr %addr, <32 x i1> %1, <32 x i8> %data)
476  ret <32 x i8> %2
477}
478
479define <32 x i8> @test_maskz_expand_load_b_256(ptr %addr, i32 %mask) {
480; X86-LABEL: test_maskz_expand_load_b_256:
481; X86:       # %bb.0:
482; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
483; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
484; X86-NEXT:    vpexpandb (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0x00]
485; X86-NEXT:    retl # encoding: [0xc3]
486;
487; X64-LABEL: test_maskz_expand_load_b_256:
488; X64:       # %bb.0:
489; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
490; X64-NEXT:    vpexpandb (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0x07]
491; X64-NEXT:    retq # encoding: [0xc3]
492  %1 = bitcast i32 %mask to <32 x i1>
493  %2 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr %addr, <32 x i1> %1, <32 x i8> zeroinitializer)
494  ret <32 x i8> %2
495}
496
497define <32 x i8> @test_expand_load_b_256(ptr %addr, <32 x i8> %data) {
498; X86-LABEL: test_expand_load_b_256:
499; X86:       # %bb.0:
500; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
501; X86-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
502; X86-NEXT:    vpexpandb (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x00]
503; X86-NEXT:    retl # encoding: [0xc3]
504;
505; X64-LABEL: test_expand_load_b_256:
506; X64:       # %bb.0:
507; X64-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
508; X64-NEXT:    vpexpandb (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x07]
509; X64-NEXT:    retq # encoding: [0xc3]
510  %1 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr %addr, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> %data)
511  ret <32 x i8> %1
512}
513
514define <32 x i8> @test_expand_b_256(<32 x i8> %data) {
515; CHECK-LABEL: test_expand_b_256:
516; CHECK:       # %bb.0:
517; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
518  %1 = call <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8> %data, <32 x i8> undef, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
519  ret <32 x i8> %1
520}
521
522define <32 x i8> @test_mask_expand_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) {
523; X86-LABEL: test_mask_expand_b_256:
524; X86:       # %bb.0:
525; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
526; X86-NEXT:    vpexpandb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0xc8]
527; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
528; X86-NEXT:    retl # encoding: [0xc3]
529;
530; X64-LABEL: test_mask_expand_b_256:
531; X64:       # %bb.0:
532; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
533; X64-NEXT:    vpexpandb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0xc8]
534; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
535; X64-NEXT:    retq # encoding: [0xc3]
536  %1 = bitcast i32 %mask to <32 x i1>
537  %2 = call <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8> %data, <32 x i8> %passthru, <32 x i1> %1)
538  ret <32 x i8> %2
539}
540
541define <32 x i8> @test_maskz_expand_b_256(<32 x i8> %data, i32 %mask) {
542; X86-LABEL: test_maskz_expand_b_256:
543; X86:       # %bb.0:
544; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
545; X86-NEXT:    vpexpandb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0xc0]
546; X86-NEXT:    retl # encoding: [0xc3]
547;
548; X64-LABEL: test_maskz_expand_b_256:
549; X64:       # %bb.0:
550; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
551; X64-NEXT:    vpexpandb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0xc0]
552; X64-NEXT:    retq # encoding: [0xc3]
553  %1 = bitcast i32 %mask to <32 x i1>
554  %2 = call <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8> %data, <32 x i8> zeroinitializer, <32 x i1> %1)
555  ret <32 x i8> %2
556}
557
558define void @test_mask_compress_store_w_256(ptr %addr, <16 x i16> %data, i16 %mask) {
559; X86-LABEL: test_mask_compress_store_w_256:
560; X86:       # %bb.0:
561; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
562; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
563; X86-NEXT:    vpcompressw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x00]
564; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
565; X86-NEXT:    retl # encoding: [0xc3]
566;
567; X64-LABEL: test_mask_compress_store_w_256:
568; X64:       # %bb.0:
569; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
570; X64-NEXT:    vpcompressw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x07]
571; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
572; X64-NEXT:    retq # encoding: [0xc3]
573  %1 = bitcast i16 %mask to <16 x i1>
574  call void @llvm.masked.compressstore.v16i16(<16 x i16> %data, ptr %addr, <16 x i1> %1)
575  ret void
576}
577
578define void @test_compress_store_w_256(ptr %addr, <16 x i16> %data) {
579; X86-LABEL: test_compress_store_w_256:
580; X86:       # %bb.0:
581; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
582; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
583; X86-NEXT:    vpcompressw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x00]
584; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
585; X86-NEXT:    retl # encoding: [0xc3]
586;
587; X64-LABEL: test_compress_store_w_256:
588; X64:       # %bb.0:
589; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
590; X64-NEXT:    vpcompressw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x07]
591; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
592; X64-NEXT:    retq # encoding: [0xc3]
593  call void @llvm.masked.compressstore.v16i16(<16 x i16> %data, ptr %addr, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
594  ret void
595}
596
597define <16 x i16> @test_mask_compress_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) {
598; X86-LABEL: test_mask_compress_w_256:
599; X86:       # %bb.0:
600; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
601; X86-NEXT:    vpcompressw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0xc1]
602; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
603; X86-NEXT:    retl # encoding: [0xc3]
604;
605; X64-LABEL: test_mask_compress_w_256:
606; X64:       # %bb.0:
607; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
608; X64-NEXT:    vpcompressw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0xc1]
609; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
610; X64-NEXT:    retq # encoding: [0xc3]
611  %1 = bitcast i16 %mask to <16 x i1>
612  %2 = call <16 x i16> @llvm.x86.avx512.mask.compress.v16i16(<16 x i16> %data, <16 x i16> %passthru, <16 x i1> %1)
613  ret <16 x i16> %2
614}
615
616define <16 x i16> @test_maskz_compress_w_256(<16 x i16> %data, i16 %mask) {
617; X86-LABEL: test_maskz_compress_w_256:
618; X86:       # %bb.0:
619; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
620; X86-NEXT:    vpcompressw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x63,0xc0]
621; X86-NEXT:    retl # encoding: [0xc3]
622;
623; X64-LABEL: test_maskz_compress_w_256:
624; X64:       # %bb.0:
625; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
626; X64-NEXT:    vpcompressw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x63,0xc0]
627; X64-NEXT:    retq # encoding: [0xc3]
628  %1 = bitcast i16 %mask to <16 x i1>
629  %2 = call <16 x i16> @llvm.x86.avx512.mask.compress.v16i16(<16 x i16> %data, <16 x i16> zeroinitializer, <16 x i1> %1)
630  ret <16 x i16> %2
631}
632
633define <16 x i16> @test_compress_w_256(<16 x i16> %data) {
634; CHECK-LABEL: test_compress_w_256:
635; CHECK:       # %bb.0:
636; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
637  %1 = call <16 x i16> @llvm.x86.avx512.mask.compress.v16i16(<16 x i16> %data, <16 x i16> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
638  ret <16 x i16> %1
639}
640
641define void @test_mask_compress_store_b_256(ptr %addr, <32 x i8> %data, i32 %mask) {
642; X86-LABEL: test_mask_compress_store_b_256:
643; X86:       # %bb.0:
644; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
645; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
646; X86-NEXT:    vpcompressb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x00]
647; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
648; X86-NEXT:    retl # encoding: [0xc3]
649;
650; X64-LABEL: test_mask_compress_store_b_256:
651; X64:       # %bb.0:
652; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
653; X64-NEXT:    vpcompressb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x07]
654; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
655; X64-NEXT:    retq # encoding: [0xc3]
656  %1 = bitcast i32 %mask to <32 x i1>
657  call void @llvm.masked.compressstore.v32i8(<32 x i8> %data, ptr %addr, <32 x i1> %1)
658  ret void
659}
660
661define void @test_compress_store_b_256(ptr %addr, <32 x i8> %data) {
662; X86-LABEL: test_compress_store_b_256:
663; X86:       # %bb.0:
664; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
665; X86-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
666; X86-NEXT:    vpcompressb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x00]
667; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
668; X86-NEXT:    retl # encoding: [0xc3]
669;
670; X64-LABEL: test_compress_store_b_256:
671; X64:       # %bb.0:
672; X64-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
673; X64-NEXT:    vpcompressb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x07]
674; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
675; X64-NEXT:    retq # encoding: [0xc3]
676  call void @llvm.masked.compressstore.v32i8(<32 x i8> %data, ptr %addr, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
677  ret void
678}
679
680define <32 x i8> @test_mask_compress_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) {
681; X86-LABEL: test_mask_compress_b_256:
682; X86:       # %bb.0:
683; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
684; X86-NEXT:    vpcompressb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0xc1]
685; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
686; X86-NEXT:    retl # encoding: [0xc3]
687;
688; X64-LABEL: test_mask_compress_b_256:
689; X64:       # %bb.0:
690; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
691; X64-NEXT:    vpcompressb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0xc1]
692; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
693; X64-NEXT:    retq # encoding: [0xc3]
694  %1 = bitcast i32 %mask to <32 x i1>
695  %2 = call <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8> %data, <32 x i8> %passthru, <32 x i1> %1)
696  ret <32 x i8> %2
697}
698
699define <32 x i8> @test_maskz_compress_b_256(<32 x i8> %data, i32 %mask) {
700; X86-LABEL: test_maskz_compress_b_256:
701; X86:       # %bb.0:
702; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
703; X86-NEXT:    vpcompressb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x63,0xc0]
704; X86-NEXT:    retl # encoding: [0xc3]
705;
706; X64-LABEL: test_maskz_compress_b_256:
707; X64:       # %bb.0:
708; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
709; X64-NEXT:    vpcompressb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x63,0xc0]
710; X64-NEXT:    retq # encoding: [0xc3]
711  %1 = bitcast i32 %mask to <32 x i1>
712  %2 = call <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8> %data, <32 x i8> zeroinitializer, <32 x i1> %1)
713  ret <32 x i8> %2
714}
715
716define <32 x i8> @test_compress_b_256(<32 x i8> %data) {
717; CHECK-LABEL: test_compress_b_256:
718; CHECK:       # %bb.0:
719; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
720  %1 = call <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8> %data, <32 x i8> undef, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
721  ret <32 x i8> %1
722}
723
724define { <4 x i32>, <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpshld_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
725; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_128:
726; X86:       # %bb.0:
727; X86-NEXT:    vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
728; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
729; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
730; X86-NEXT:    vpshldd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xe1,0x16]
731; X86-NEXT:    vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17]
732; X86-NEXT:    vpshldd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xd1,0x18]
733; X86-NEXT:    vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
734; X86-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
735; X86-NEXT:    retl # encoding: [0xc3]
736;
737; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_128:
738; X64:       # %bb.0:
739; X64-NEXT:    vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
740; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
741; X64-NEXT:    vpshldd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xe1,0x16]
742; X64-NEXT:    vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17]
743; X64-NEXT:    vpshldd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xd1,0x18]
744; X64-NEXT:    vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
745; X64-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
746; X64-NEXT:    retq # encoding: [0xc3]
747  %1 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> <i32 22, i32 22, i32 22, i32 22>)
748  %2 = bitcast i8 %x4 to <8 x i1>
749  %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
750  %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x3
751  %4 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> <i32 23, i32 23, i32 23, i32 23>)
752  %5 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> <i32 24, i32 24, i32 24, i32 24>)
753  %6 = bitcast i8 %x4 to <8 x i1>
754  %extract = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
755  %7 = select <4 x i1> %extract, <4 x i32> %5, <4 x i32> zeroinitializer
756  %res3 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } poison, <4 x i32> %3, 0
757  %res4 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } %res3, <4 x i32> %4, 1
758  %res5 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } %res4, <4 x i32> %7, 2
759  ret { <4 x i32>, <4 x i32>, <4 x i32> } %res5
760}
761
762define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshld_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
763; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_256:
764; X86:       # %bb.0:
765; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
766; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
767; X86-NEXT:    vpshldd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xd1,0x16]
768; X86-NEXT:    vpshldd $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xc9,0x17]
769; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
770; X86-NEXT:    retl # encoding: [0xc3]
771;
772; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_256:
773; X64:       # %bb.0:
774; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
775; X64-NEXT:    vpshldd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xd1,0x16]
776; X64-NEXT:    vpshldd $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xc9,0x17]
777; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
778; X64-NEXT:    retq # encoding: [0xc3]
779  %1 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22>)
780  %2 = bitcast i8 %x4 to <8 x i1>
781  %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x3
782  %4 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>)
783  %5 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %3, 0
784  %6 = insertvalue { <8 x i32>, <8 x i32> } %5, <8 x i32> %4, 1
785  ret { <8 x i32>, <8 x i32> } %6
786}
787
788define { <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshld_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
789; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_128:
790; X86:       # %bb.0:
791; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
792; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
793; X86-NEXT:    vpshldq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xd1,0x16]
794; X86-NEXT:    vpshldq $23, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xc9,0x17]
795; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
796; X86-NEXT:    retl # encoding: [0xc3]
797;
798; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_128:
799; X64:       # %bb.0:
800; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
801; X64-NEXT:    vpshldq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xd1,0x16]
802; X64-NEXT:    vpshldq $23, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xc9,0x17]
803; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
804; X64-NEXT:    retq # encoding: [0xc3]
805  %1 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> <i64 22, i64 22>)
806  %2 = bitcast i8 %x4 to <8 x i1>
807  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
808  %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x3
809  %4 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> <i64 23, i64 23>)
810  %5 = insertvalue { <2 x i64>, <2 x i64> } poison, <2 x i64> %3, 0
811  %6 = insertvalue { <2 x i64>, <2 x i64> } %5, <2 x i64> %4, 1
812  ret { <2 x i64>, <2 x i64> } %6
813}
814
815define { <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshld_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
816; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_256:
817; X86:       # %bb.0:
818; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
819; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
820; X86-NEXT:    vpshldq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xd1,0x16]
821; X86-NEXT:    vpshldq $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xc9,0x17]
822; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
823; X86-NEXT:    retl # encoding: [0xc3]
824;
825; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_256:
826; X64:       # %bb.0:
827; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
828; X64-NEXT:    vpshldq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xd1,0x16]
829; X64-NEXT:    vpshldq $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xc9,0x17]
830; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
831; X64-NEXT:    retq # encoding: [0xc3]
832  %1 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> <i64 22, i64 22, i64 22, i64 22>)
833  %2 = bitcast i8 %x4 to <8 x i1>
834  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
835  %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x3
836  %4 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> <i64 23, i64 23, i64 23, i64 23>)
837  %5 = insertvalue { <4 x i64>, <4 x i64> } poison, <4 x i64> %3, 0
838  %6 = insertvalue { <4 x i64>, <4 x i64> } %5, <4 x i64> %4, 1
839  ret { <4 x i64>, <4 x i64> } %6
840}
841
842define { <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshld_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) {
843; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_128:
844; X86:       # %bb.0:
845; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
846; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
847; X86-NEXT:    vpshldw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x06]
848; X86-NEXT:    vpshldw $7, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xc9,0x07]
849; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
850; X86-NEXT:    retl # encoding: [0xc3]
851;
852; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_128:
853; X64:       # %bb.0:
854; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
855; X64-NEXT:    vpshldw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x06]
856; X64-NEXT:    vpshldw $7, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xc9,0x07]
857; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
858; X64-NEXT:    retq # encoding: [0xc3]
859  %1 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>)
860  %2 = bitcast i8 %x4 to <8 x i1>
861  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x3
862  %4 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
863  %5 = insertvalue { <8 x i16>, <8 x i16> } poison, <8 x i16> %3, 0
864  %6 = insertvalue { <8 x i16>, <8 x i16> } %5, <8 x i16> %4, 1
865  ret { <8 x i16>, <8 x i16> } %6
866}
867
868define { <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshld_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) {
869; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_256:
870; X86:       # %bb.0:
871; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
872; X86-NEXT:    vpshldw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x06]
873; X86-NEXT:    vpshldw $7, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xc9,0x07]
874; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
875; X86-NEXT:    retl # encoding: [0xc3]
876;
877; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_256:
878; X64:       # %bb.0:
879; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
880; X64-NEXT:    vpshldw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x06]
881; X64-NEXT:    vpshldw $7, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xc9,0x07]
882; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
883; X64-NEXT:    retq # encoding: [0xc3]
884  %1 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>)
885  %2 = bitcast i16 %x4 to <16 x i1>
886  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x3
887  %4 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
888  %5 = insertvalue { <16 x i16>, <16 x i16> } poison, <16 x i16> %3, 0
889  %6 = insertvalue { <16 x i16>, <16 x i16> } %5, <16 x i16> %4, 1
890  ret { <16 x i16>, <16 x i16> } %6
891}
892
893define { <4 x i32>, <4 x i32>,  <4 x i32> } @test_int_x86_avx512_mask_vpshrd_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
894; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_128:
895; X86:       # %bb.0:
896; X86-NEXT:    vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
897; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
898; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
899; X86-NEXT:    vpshrdd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xe1,0x16]
900; X86-NEXT:    vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17]
901; X86-NEXT:    vpshrdd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xd1,0x18]
902; X86-NEXT:    vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
903; X86-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
904; X86-NEXT:    retl # encoding: [0xc3]
905;
906; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_128:
907; X64:       # %bb.0:
908; X64-NEXT:    vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
909; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
910; X64-NEXT:    vpshrdd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xe1,0x16]
911; X64-NEXT:    vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17]
912; X64-NEXT:    vpshrdd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xd1,0x18]
913; X64-NEXT:    vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
914; X64-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
915; X64-NEXT:    retq # encoding: [0xc3]
916  %1 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> <i32 22, i32 22, i32 22, i32 22>)
917  %2 = bitcast i8 %x4 to <8 x i1>
918  %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
919  %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x3
920  %4 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> <i32 23, i32 23, i32 23, i32 23>)
921  %5 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> <i32 24, i32 24, i32 24, i32 24>)
922  %6 = bitcast i8 %x4 to <8 x i1>
923  %extract = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
924  %7 = select <4 x i1> %extract, <4 x i32> %5, <4 x i32> zeroinitializer
925  %res3 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } poison, <4 x i32> %3, 0
926  %res4 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } %res3, <4 x i32> %4, 1
927  %res5 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } %res4, <4 x i32> %7, 2
928  ret { <4 x i32>, <4 x i32>,  <4 x i32> } %res5
929}
930
931define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshrd_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
932; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_256:
933; X86:       # %bb.0:
934; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
935; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
936; X86-NEXT:    vpshrdd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xd1,0x16]
937; X86-NEXT:    vpshrdd $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xc9,0x17]
938; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
939; X86-NEXT:    retl # encoding: [0xc3]
940;
941; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_256:
942; X64:       # %bb.0:
943; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
944; X64-NEXT:    vpshrdd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xd1,0x16]
945; X64-NEXT:    vpshrdd $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xc9,0x17]
946; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
947; X64-NEXT:    retq # encoding: [0xc3]
948  %1 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22>)
949  %2 = bitcast i8 %x4 to <8 x i1>
950  %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x3
951  %4 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>)
952  %5 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %3, 0
953  %6 = insertvalue { <8 x i32>, <8 x i32> } %5, <8 x i32> %4, 1
954  ret { <8 x i32>, <8 x i32> } %6
955}
956
957define { <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshrd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
958; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_128:
959; X86:       # %bb.0:
960; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
961; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
962; X86-NEXT:    vpshrdq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xd1,0x16]
963; X86-NEXT:    vpshrdq $23, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xc9,0x17]
964; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
965; X86-NEXT:    retl # encoding: [0xc3]
966;
967; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_128:
968; X64:       # %bb.0:
969; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
970; X64-NEXT:    vpshrdq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xd1,0x16]
971; X64-NEXT:    vpshrdq $23, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xc9,0x17]
972; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
973; X64-NEXT:    retq # encoding: [0xc3]
974  %1 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> <i64 22, i64 22>)
975  %2 = bitcast i8 %x4 to <8 x i1>
976  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
977  %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x3
978  %4 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> <i64 23, i64 23>)
979  %5 = insertvalue { <2 x i64>, <2 x i64> } poison, <2 x i64> %3, 0
980  %6 = insertvalue { <2 x i64>, <2 x i64> } %5, <2 x i64> %4, 1
981  ret { <2 x i64>, <2 x i64> } %6
982}
983
984define { <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshrd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
985; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_256:
986; X86:       # %bb.0:
987; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
988; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
989; X86-NEXT:    vpshrdq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xd1,0x16]
990; X86-NEXT:    vpshrdq $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xc9,0x17]
991; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
992; X86-NEXT:    retl # encoding: [0xc3]
993;
994; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_256:
995; X64:       # %bb.0:
996; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
997; X64-NEXT:    vpshrdq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xd1,0x16]
998; X64-NEXT:    vpshrdq $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xc9,0x17]
999; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1000; X64-NEXT:    retq # encoding: [0xc3]
1001  %1 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> <i64 22, i64 22, i64 22, i64 22>)
1002  %2 = bitcast i8 %x4 to <8 x i1>
1003  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1004  %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x3
1005  %4 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> <i64 23, i64 23, i64 23, i64 23>)
1006  %5 = insertvalue { <4 x i64>, <4 x i64> } poison, <4 x i64> %3, 0
1007  %6 = insertvalue { <4 x i64>, <4 x i64> } %5, <4 x i64> %4, 1
1008  ret { <4 x i64>, <4 x i64> } %6
1009}
1010
1011define { <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshrd_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) {
1012; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_128:
1013; X86:       # %bb.0:
1014; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1015; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1016; X86-NEXT:    vpshrdw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x06]
1017; X86-NEXT:    vpshrdw $7, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xc9,0x07]
1018; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1019; X86-NEXT:    retl # encoding: [0xc3]
1020;
1021; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_128:
1022; X64:       # %bb.0:
1023; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1024; X64-NEXT:    vpshrdw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x06]
1025; X64-NEXT:    vpshrdw $7, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xc9,0x07]
1026; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1027; X64-NEXT:    retq # encoding: [0xc3]
1028  %1 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>)
1029  %2 = bitcast i8 %x4 to <8 x i1>
1030  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x3
1031  %4 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
1032  %5 = insertvalue { <8 x i16>, <8 x i16> } poison, <8 x i16> %3, 0
1033  %6 = insertvalue { <8 x i16>, <8 x i16> } %5, <8 x i16> %4, 1
1034  ret { <8 x i16>, <8 x i16> } %6
1035}
1036
1037define { <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshrd_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) {
1038; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_256:
1039; X86:       # %bb.0:
1040; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1041; X86-NEXT:    vpshrdw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x06]
1042; X86-NEXT:    vpshrdw $7, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xc9,0x07]
1043; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1044; X86-NEXT:    retl # encoding: [0xc3]
1045;
1046; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_256:
1047; X64:       # %bb.0:
1048; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1049; X64-NEXT:    vpshrdw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x06]
1050; X64-NEXT:    vpshrdw $7, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xc9,0x07]
1051; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1052; X64-NEXT:    retq # encoding: [0xc3]
1053  %1 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>)
1054  %2 = bitcast i16 %x4 to <16 x i1>
1055  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x3
1056  %4 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
1057  %5 = insertvalue { <16 x i16>, <16 x i16> } poison, <16 x i16> %3, 0
1058  %6 = insertvalue { <16 x i16>, <16 x i16> } %5, <16 x i16> %4, 1
1059  ret { <16 x i16>, <16 x i16> } %6
1060}
1061
1062define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshrdv_d_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4, i8 %x3) {
1063; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_256:
1064; X86:       # %bb.0:
1065; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1066; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1067; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1068; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1069; X86-NEXT:    vpshrdvd (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x73,0x00]
1070; X86-NEXT:    vpshrdvd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x73,0xda]
1071; X86-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1072; X86-NEXT:    retl # encoding: [0xc3]
1073;
1074; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_256:
1075; X64:       # %bb.0:
1076; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1077; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1078; X64-NEXT:    vpshrdvd (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x73,0x07]
1079; X64-NEXT:    vpshrdvd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x73,0xda]
1080; X64-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1081; X64-NEXT:    retq # encoding: [0xc3]
1082  %x2 = load <8 x i32>, ptr %x2p
1083  %1 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> %x2)
1084  %2 = bitcast i8 %x3 to <8 x i1>
1085  %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x0
1086  %4 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> %x4)
1087  %5 = bitcast i8 %x3 to <8 x i1>
1088  %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer
1089  %res3 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %3, 0
1090  %res4 = insertvalue { <8 x i32>, <8 x i32> } %res3, <8 x i32> %6, 1
1091  ret { <8 x i32>, <8 x i32> } %res4
1092}
1093
1094define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpshrdv_d_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4, i8 %x3) {
1095; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_128:
1096; X86:       # %bb.0:
1097; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1098; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1099; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1100; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1101; X86-NEXT:    vpshrdvd (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x73,0x00]
1102; X86-NEXT:    vpshrdvd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x73,0xda]
1103; X86-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1104; X86-NEXT:    retl # encoding: [0xc3]
1105;
1106; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_128:
1107; X64:       # %bb.0:
1108; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1109; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1110; X64-NEXT:    vpshrdvd (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x73,0x07]
1111; X64-NEXT:    vpshrdvd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x73,0xda]
1112; X64-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1113; X64-NEXT:    retq # encoding: [0xc3]
1114  %x2 = load <4 x i32>, ptr %x2p
1115  %1 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> %x2)
1116  %2 = bitcast i8 %x3 to <8 x i1>
1117  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1118  %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x0
1119  %4 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> %x4)
1120  %5 = bitcast i8 %x3 to <8 x i1>
1121  %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1122  %6 = select <4 x i1> %extract1, <4 x i32> %4, <4 x i32> zeroinitializer
1123  %res3 = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> %3, 0
1124  %res4 = insertvalue { <4 x i32>, <4 x i32> } %res3, <4 x i32> %6, 1
1125  ret { <4 x i32>, <4 x i32> } %res4
1126}
1127
1128define { <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshrdv_q_256(<4 x i64> %x0, <4 x i64> %x1, ptr %x2p, <4 x i64> %x4, i8 %x3) {
1129; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_256:
1130; X86:       # %bb.0:
1131; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1132; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1133; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1134; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1135; X86-NEXT:    vpshrdvq (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x73,0x00]
1136; X86-NEXT:    vpshrdvq %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x73,0xda]
1137; X86-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1138; X86-NEXT:    retl # encoding: [0xc3]
1139;
1140; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_256:
1141; X64:       # %bb.0:
1142; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1143; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1144; X64-NEXT:    vpshrdvq (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x73,0x07]
1145; X64-NEXT:    vpshrdvq %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x73,0xda]
1146; X64-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1147; X64-NEXT:    retq # encoding: [0xc3]
1148  %x2 = load <4 x i64>, ptr %x2p
1149  %1 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> %x2)
1150  %2 = bitcast i8 %x3 to <8 x i1>
1151  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1152  %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x0
1153  %4 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> %x4)
1154  %5 = bitcast i8 %x3 to <8 x i1>
1155  %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1156  %6 = select <4 x i1> %extract1, <4 x i64> %4, <4 x i64> zeroinitializer
1157  %res3 = insertvalue { <4 x i64>, <4 x i64> } poison, <4 x i64> %3, 0
1158  %res4 = insertvalue { <4 x i64>, <4 x i64> } %res3, <4 x i64> %6, 1
1159  ret { <4 x i64>, <4 x i64> } %res4
1160}
1161
1162define { <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshrdv_q_128(<2 x i64> %x0, <2 x i64> %x1, ptr %x2p, <2 x i64> %x4, i8 %x3) {
1163; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_128:
1164; X86:       # %bb.0:
1165; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1166; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1167; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1168; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1169; X86-NEXT:    vpshrdvq (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x73,0x00]
1170; X86-NEXT:    vpshrdvq %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x73,0xda]
1171; X86-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1172; X86-NEXT:    retl # encoding: [0xc3]
1173;
1174; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_128:
1175; X64:       # %bb.0:
1176; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1177; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1178; X64-NEXT:    vpshrdvq (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x73,0x07]
1179; X64-NEXT:    vpshrdvq %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x73,0xda]
1180; X64-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1181; X64-NEXT:    retq # encoding: [0xc3]
1182  %x2 = load <2 x i64>, ptr %x2p
1183  %1 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> %x2)
1184  %2 = bitcast i8 %x3 to <8 x i1>
1185  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
1186  %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x0
1187  %4 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> %x4)
1188  %5 = bitcast i8 %x3 to <8 x i1>
1189  %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
1190  %6 = select <2 x i1> %extract1, <2 x i64> %4, <2 x i64> zeroinitializer
1191  %res3 = insertvalue { <2 x i64>, <2 x i64> } poison, <2 x i64> %3, 0
1192  %res4 = insertvalue { <2 x i64>, <2 x i64> } %res3, <2 x i64> %6, 1
1193  ret { <2 x i64>, <2 x i64> } %res4
1194}
1195
1196define { <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshrdv_w_256(<16 x i16> %x0, <16 x i16> %x1, ptr %x2p, <16 x i16> %x4, i16 %x3) {
1197; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_256:
1198; X86:       # %bb.0:
1199; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1200; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1201; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1202; X86-NEXT:    vpshrdvw (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x72,0x00]
1203; X86-NEXT:    vpshrdvw %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x72,0xda]
1204; X86-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1205; X86-NEXT:    retl # encoding: [0xc3]
1206;
1207; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_256:
1208; X64:       # %bb.0:
1209; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1210; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1211; X64-NEXT:    vpshrdvw (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x72,0x07]
1212; X64-NEXT:    vpshrdvw %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x72,0xda]
1213; X64-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1214; X64-NEXT:    retq # encoding: [0xc3]
1215  %x2 = load <16 x i16>, ptr %x2p
1216  %1 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x2)
1217  %2 = bitcast i16 %x3 to <16 x i1>
1218  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x0
1219  %4 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x4)
1220  %5 = bitcast i16 %x3 to <16 x i1>
1221  %6 = select <16 x i1> %5, <16 x i16> %4, <16 x i16> zeroinitializer
1222  %7 = insertvalue { <16 x i16>, <16 x i16> } poison, <16 x i16> %3, 0
1223  %8 = insertvalue { <16 x i16>, <16 x i16> } %7, <16 x i16> %6, 1
1224  ret { <16 x i16>, <16 x i16> } %8
1225}
1226
1227define { <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshrdv_w_128(<8 x i16> %x0, <8 x i16> %x1, ptr %x2p, <8 x i16> %x4, i8 %x3) {
1228; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_128:
1229; X86:       # %bb.0:
1230; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1231; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1232; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1233; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1234; X86-NEXT:    vpshrdvw (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x72,0x00]
1235; X86-NEXT:    vpshrdvw %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x72,0xda]
1236; X86-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1237; X86-NEXT:    retl # encoding: [0xc3]
1238;
1239; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_128:
1240; X64:       # %bb.0:
1241; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1242; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1243; X64-NEXT:    vpshrdvw (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x72,0x07]
1244; X64-NEXT:    vpshrdvw %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x72,0xda]
1245; X64-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1246; X64-NEXT:    retq # encoding: [0xc3]
1247  %x2 = load <8 x i16>, ptr %x2p
1248  %1 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x2)
1249  %2 = bitcast i8 %x3 to <8 x i1>
1250  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x0
1251  %4 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x4)
1252  %5 = bitcast i8 %x3 to <8 x i1>
1253  %6 = select <8 x i1> %5, <8 x i16> %4, <8 x i16> zeroinitializer
1254  %7 = insertvalue { <8 x i16>, <8 x i16> } poison, <8 x i16> %3, 0
1255  %8 = insertvalue { <8 x i16>, <8 x i16> } %7, <8 x i16> %6, 1
1256  ret { <8 x i16>, <8 x i16> } %8
1257}
1258
1259define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshldv_d_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4, i8 %x3) {
1260; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_256:
1261; X86:       # %bb.0:
1262; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1263; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1264; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1265; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1266; X86-NEXT:    vpshldvd (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x71,0x00]
1267; X86-NEXT:    vpshldvd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x71,0xda]
1268; X86-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1269; X86-NEXT:    retl # encoding: [0xc3]
1270;
1271; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_256:
1272; X64:       # %bb.0:
1273; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1274; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1275; X64-NEXT:    vpshldvd (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x71,0x07]
1276; X64-NEXT:    vpshldvd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x71,0xda]
1277; X64-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1278; X64-NEXT:    retq # encoding: [0xc3]
1279  %x2 = load <8 x i32>, ptr %x2p
1280  %1 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
1281  %2 = bitcast i8 %x3 to <8 x i1>
1282  %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x0
1283  %4 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4)
1284  %5 = bitcast i8 %x3 to <8 x i1>
1285  %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer
1286  %7 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %3, 0
1287  %8 = insertvalue { <8 x i32>, <8 x i32> } %7, <8 x i32> %6, 1
1288  ret { <8 x i32>, <8 x i32> } %8
1289}
1290
1291define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpshldv_d_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4, i8 %x3) {
1292; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_128:
1293; X86:       # %bb.0:
1294; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1295; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1296; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1297; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1298; X86-NEXT:    vpshldvd (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x71,0x00]
1299; X86-NEXT:    vpshldvd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x71,0xda]
1300; X86-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1301; X86-NEXT:    retl # encoding: [0xc3]
1302;
1303; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_128:
1304; X64:       # %bb.0:
1305; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1306; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1307; X64-NEXT:    vpshldvd (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x71,0x07]
1308; X64-NEXT:    vpshldvd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x71,0xda]
1309; X64-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1310; X64-NEXT:    retq # encoding: [0xc3]
1311  %x2 = load <4 x i32>, ptr %x2p
1312  %1 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
1313  %2 = bitcast i8 %x3 to <8 x i1>
1314  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1315  %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x0
1316  %4 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4)
1317  %5 = bitcast i8 %x3 to <8 x i1>
1318  %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1319  %6 = select <4 x i1> %extract1, <4 x i32> %4, <4 x i32> zeroinitializer
1320  %7 = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> %3, 0
1321  %8 = insertvalue { <4 x i32>, <4 x i32> } %7, <4 x i32> %6, 1
1322  ret { <4 x i32>, <4 x i32> } %8
1323}
1324
1325define { <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshldv_q_256(<4 x i64> %x0, <4 x i64> %x1, ptr %x2p, <4 x i64> %x4, i8 %x3) {
1326; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_256:
1327; X86:       # %bb.0:
1328; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1329; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1330; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1331; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1332; X86-NEXT:    vpshldvq (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x71,0x00]
1333; X86-NEXT:    vpshldvq %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x71,0xda]
1334; X86-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1335; X86-NEXT:    retl # encoding: [0xc3]
1336;
1337; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_256:
1338; X64:       # %bb.0:
1339; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1340; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1341; X64-NEXT:    vpshldvq (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x71,0x07]
1342; X64-NEXT:    vpshldvq %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x71,0xda]
1343; X64-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1344; X64-NEXT:    retq # encoding: [0xc3]
1345  %x2 = load <4 x i64>, ptr %x2p
1346  %1 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2)
1347  %2 = bitcast i8 %x3 to <8 x i1>
1348  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1349  %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x0
1350  %4 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x4)
1351  %5 = bitcast i8 %x3 to <8 x i1>
1352  %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1353  %6 = select <4 x i1> %extract1, <4 x i64> %4, <4 x i64> zeroinitializer
1354  %7 = insertvalue { <4 x i64>, <4 x i64> } poison, <4 x i64> %3, 0
1355  %8 = insertvalue { <4 x i64>, <4 x i64> } %7, <4 x i64> %6, 1
1356  ret { <4 x i64>, <4 x i64> } %8
1357}
1358
1359define { <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshldv_q_128(<2 x i64> %x0, <2 x i64> %x1, ptr %x2p, <2 x i64> %x4, i8 %x3) {
1360; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_128:
1361; X86:       # %bb.0:
1362; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1363; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1364; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1365; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1366; X86-NEXT:    vpshldvq (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x71,0x00]
1367; X86-NEXT:    vpshldvq %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x71,0xda]
1368; X86-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1369; X86-NEXT:    retl # encoding: [0xc3]
1370;
1371; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_128:
1372; X64:       # %bb.0:
1373; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1374; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1375; X64-NEXT:    vpshldvq (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x71,0x07]
1376; X64-NEXT:    vpshldvq %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x71,0xda]
1377; X64-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1378; X64-NEXT:    retq # encoding: [0xc3]
1379  %x2 = load <2 x i64>, ptr %x2p
1380  %1 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2)
1381  %2 = bitcast i8 %x3 to <8 x i1>
1382  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
1383  %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x0
1384  %4 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x4)
1385  %5 = bitcast i8 %x3 to <8 x i1>
1386  %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
1387  %6 = select <2 x i1> %extract1, <2 x i64> %4, <2 x i64> zeroinitializer
1388  %7 = insertvalue { <2 x i64>, <2 x i64> } poison, <2 x i64> %3, 0
1389  %8 = insertvalue { <2 x i64>, <2 x i64> } %7, <2 x i64> %6, 1
1390  ret { <2 x i64>, <2 x i64> } %8
1391}
1392
1393define { <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshldv_w_256(<16 x i16> %x0, <16 x i16> %x1, ptr %x2p, <16 x i16> %x4, i16 %x3) {
1394; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_256:
1395; X86:       # %bb.0:
1396; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1397; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1398; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1399; X86-NEXT:    vpshldvw (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x70,0x00]
1400; X86-NEXT:    vpshldvw %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x70,0xda]
1401; X86-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1402; X86-NEXT:    retl # encoding: [0xc3]
1403;
1404; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_256:
1405; X64:       # %bb.0:
1406; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1407; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1408; X64-NEXT:    vpshldvw (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x70,0x07]
1409; X64-NEXT:    vpshldvw %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x70,0xda]
1410; X64-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1411; X64-NEXT:    retq # encoding: [0xc3]
1412  %x2 = load <16 x i16>, ptr %x2p
1413  %1 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2)
1414  %2 = bitcast i16 %x3 to <16 x i1>
1415  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x0
1416  %4 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x4)
1417  %5 = bitcast i16 %x3 to <16 x i1>
1418  %6 = select <16 x i1> %5, <16 x i16> %4, <16 x i16> zeroinitializer
1419  %7 = insertvalue { <16 x i16>, <16 x i16> } poison, <16 x i16> %3, 0
1420  %8 = insertvalue { <16 x i16>, <16 x i16> } %7, <16 x i16> %6, 1
1421  ret { <16 x i16>, <16 x i16> } %8
1422}
1423
1424define { <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshldv_w_128(<8 x i16> %x0, <8 x i16> %x1, ptr %x2p, <8 x i16> %x4, i8 %x3) {
1425; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_128:
1426; X86:       # %bb.0:
1427; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1428; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1429; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1430; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1431; X86-NEXT:    vpshldvw (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x70,0x00]
1432; X86-NEXT:    vpshldvw %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x70,0xda]
1433; X86-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1434; X86-NEXT:    retl # encoding: [0xc3]
1435;
1436; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_128:
1437; X64:       # %bb.0:
1438; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1439; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1440; X64-NEXT:    vpshldvw (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x70,0x07]
1441; X64-NEXT:    vpshldvw %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x70,0xda]
1442; X64-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1443; X64-NEXT:    retq # encoding: [0xc3]
1444  %x2 = load <8 x i16>, ptr %x2p
1445  %1 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2)
1446  %2 = bitcast i8 %x3 to <8 x i1>
1447  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x0
1448  %4 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x4)
1449  %5 = bitcast i8 %x3 to <8 x i1>
1450  %6 = select <8 x i1> %5, <8 x i16> %4, <8 x i16> zeroinitializer
1451  %7 = insertvalue { <8 x i16>, <8 x i16> } poison, <8 x i16> %3, 0
1452  %8 = insertvalue { <8 x i16>, <8 x i16> } %7, <8 x i16> %6, 1
1453  ret { <8 x i16>, <8 x i16> } %8
1454}
1455
1456declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
1457declare <8 x i32> @llvm.fshl.v8i32(<8 x i32>, <8 x i32>, <8 x i32>)
1458declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
1459declare <4 x i64> @llvm.fshl.v4i64(<4 x i64>, <4 x i64>, <4 x i64>)
1460declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
1461declare <16 x i16> @llvm.fshl.v16i16(<16 x i16>, <16 x i16>, <16 x i16>)
1462declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
1463declare <8 x i32> @llvm.fshr.v8i32(<8 x i32>, <8 x i32>, <8 x i32>)
1464declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
1465declare <4 x i64> @llvm.fshr.v4i64(<4 x i64>, <4 x i64>, <4 x i64>)
1466declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
1467declare <16 x i16> @llvm.fshr.v16i16(<16 x i16>, <16 x i16>, <16 x i16>)
1468declare <8 x i16> @llvm.masked.expandload.v8i16(ptr, <8 x i1>, <8 x i16>)
1469declare <16 x i8> @llvm.masked.expandload.v16i8(ptr, <16 x i1>, <16 x i8>)
1470declare void @llvm.masked.compressstore.v8i16(<8 x i16>, ptr, <8 x i1>)
1471declare void @llvm.masked.compressstore.v16i8(<16 x i8>, ptr, <16 x i1>)
1472declare <16 x i16> @llvm.masked.expandload.v16i16(ptr, <16 x i1>, <16 x i16>)
1473declare <32 x i8> @llvm.masked.expandload.v32i8(ptr, <32 x i1>, <32 x i8>)
1474declare void @llvm.masked.compressstore.v16i16(<16 x i16>, ptr, <16 x i1>)
1475declare void @llvm.masked.compressstore.v32i8(<32 x i8>, ptr, <32 x i1>)
1476declare <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16>, <8 x i16>, <8 x i1>)
1477declare <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8>, <16 x i8>, <16 x i1>)
1478declare <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16>, <8 x i16>, <8 x i1>)
1479declare <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8>, <16 x i8>, <16 x i1>)
1480declare <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16>, <16 x i16>, <16 x i1>)
1481declare <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8>, <32 x i8>, <32 x i1>)
1482declare <16 x i16> @llvm.x86.avx512.mask.compress.v16i16(<16 x i16>, <16 x i16>, <16 x i1>)
1483declare <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8>, <32 x i8>, <32 x i1>)
1484