xref: /llvm-project/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics.ll (revision 2f448bf509432c1a19ec46ab8cbc7353c03c6280)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
4
5define <32 x i16> @test_mask_expand_load_w_512(ptr %addr, <32 x i16> %data, i32 %mask) {
6; X86-LABEL: test_mask_expand_load_w_512:
7; X86:       # %bb.0:
8; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
10; X86-NEXT:    vpexpandw (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x00]
11; X86-NEXT:    retl # encoding: [0xc3]
12;
13; X64-LABEL: test_mask_expand_load_w_512:
14; X64:       # %bb.0:
15; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
16; X64-NEXT:    vpexpandw (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x07]
17; X64-NEXT:    retq # encoding: [0xc3]
18  %1 = bitcast i32 %mask to <32 x i1>
19  %2 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr %addr, <32 x i1> %1, <32 x i16> %data)
20  ret <32 x i16> %2
21}
22
23define <32 x i16> @test_maskz_expand_load_w_512(ptr %addr, i32 %mask) {
24; X86-LABEL: test_maskz_expand_load_w_512:
25; X86:       # %bb.0:
26; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
27; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
28; X86-NEXT:    vpexpandw (%eax), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0x00]
29; X86-NEXT:    retl # encoding: [0xc3]
30;
31; X64-LABEL: test_maskz_expand_load_w_512:
32; X64:       # %bb.0:
33; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
34; X64-NEXT:    vpexpandw (%rdi), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0x07]
35; X64-NEXT:    retq # encoding: [0xc3]
36  %1 = bitcast i32 %mask to <32 x i1>
37  %2 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr %addr, <32 x i1> %1, <32 x i16> zeroinitializer)
38  ret <32 x i16> %2
39}
40
41define <32 x i16> @test_expand_load_w_512(ptr %addr, <32 x i16> %data) {
42; X86-LABEL: test_expand_load_w_512:
43; X86:       # %bb.0:
44; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
45; X86-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
46; X86-NEXT:    vpexpandw (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x00]
47; X86-NEXT:    retl # encoding: [0xc3]
48;
49; X64-LABEL: test_expand_load_w_512:
50; X64:       # %bb.0:
51; X64-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
52; X64-NEXT:    vpexpandw (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x07]
53; X64-NEXT:    retq # encoding: [0xc3]
54  %1 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr %addr, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i16> %data)
55  ret <32 x i16> %1
56}
57
58define <32 x i16> @test_expand_w_512(<32 x i16> %data) {
59; CHECK-LABEL: test_expand_w_512:
60; CHECK:       # %bb.0:
61; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
62  %1 = call <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16> %data, <32 x i16> undef, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
63  ret <32 x i16> %1
64}
65
66define <32 x i16> @test_mask_expand_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) {
67; X86-LABEL: test_mask_expand_w_512:
68; X86:       # %bb.0:
69; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
70; X86-NEXT:    vpexpandw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0xc8]
71; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
72; X86-NEXT:    retl # encoding: [0xc3]
73;
74; X64-LABEL: test_mask_expand_w_512:
75; X64:       # %bb.0:
76; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
77; X64-NEXT:    vpexpandw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0xc8]
78; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
79; X64-NEXT:    retq # encoding: [0xc3]
80  %1 = bitcast i32 %mask to <32 x i1>
81  %2 = call <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16> %data, <32 x i16> %passthru, <32 x i1> %1)
82  ret <32 x i16> %2
83}
84
85define <32 x i16> @test_maskz_expand_w_512(<32 x i16> %data, i32 %mask) {
86; X86-LABEL: test_maskz_expand_w_512:
87; X86:       # %bb.0:
88; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
89; X86-NEXT:    vpexpandw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0xc0]
90; X86-NEXT:    retl # encoding: [0xc3]
91;
92; X64-LABEL: test_maskz_expand_w_512:
93; X64:       # %bb.0:
94; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
95; X64-NEXT:    vpexpandw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0xc0]
96; X64-NEXT:    retq # encoding: [0xc3]
97  %1 = bitcast i32 %mask to <32 x i1>
98  %2 = call <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16> %data, <32 x i16> zeroinitializer, <32 x i1> %1)
99  ret <32 x i16> %2
100}
101
102define <64 x i8> @test_mask_expand_load_b_512(ptr %addr, <64 x i8> %data, i64 %mask) {
103; X86-LABEL: test_mask_expand_load_b_512:
104; X86:       # %bb.0:
105; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
106; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
107; X86-NEXT:    vpexpandb (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x00]
108; X86-NEXT:    retl # encoding: [0xc3]
109;
110; X64-LABEL: test_mask_expand_load_b_512:
111; X64:       # %bb.0:
112; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
113; X64-NEXT:    vpexpandb (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x07]
114; X64-NEXT:    retq # encoding: [0xc3]
115  %1 = bitcast i64 %mask to <64 x i1>
116  %2 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr %addr, <64 x i1> %1, <64 x i8> %data)
117  ret <64 x i8> %2
118}
119
120define <64 x i8> @test_maskz_expand_load_b_512(ptr %addr, i64 %mask) {
121; X86-LABEL: test_maskz_expand_load_b_512:
122; X86:       # %bb.0:
123; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
124; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
125; X86-NEXT:    vpexpandb (%eax), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0x00]
126; X86-NEXT:    retl # encoding: [0xc3]
127;
128; X64-LABEL: test_maskz_expand_load_b_512:
129; X64:       # %bb.0:
130; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
131; X64-NEXT:    vpexpandb (%rdi), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0x07]
132; X64-NEXT:    retq # encoding: [0xc3]
133  %1 = bitcast i64 %mask to <64 x i1>
134  %2 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr %addr, <64 x i1> %1, <64 x i8> zeroinitializer)
135  ret <64 x i8> %2
136}
137
138define <64 x i8> @test_expand_load_b_512(ptr %addr, <64 x i8> %data) {
139; X86-LABEL: test_expand_load_b_512:
140; X86:       # %bb.0:
141; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
142; X86-NEXT:    kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
143; X86-NEXT:    vpexpandb (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x00]
144; X86-NEXT:    retl # encoding: [0xc3]
145;
146; X64-LABEL: test_expand_load_b_512:
147; X64:       # %bb.0:
148; X64-NEXT:    kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
149; X64-NEXT:    vpexpandb (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x07]
150; X64-NEXT:    retq # encoding: [0xc3]
151  %1 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr %addr, <64 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <64 x i8> %data)
152  ret <64 x i8> %1
153}
154
155define <64 x i8> @test_expand_b_512(<64 x i8> %data) {
156; CHECK-LABEL: test_expand_b_512:
157; CHECK:       # %bb.0:
158; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
159  %1 = call <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8> %data, <64 x i8> undef, <64 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
160  ret <64 x i8> %1
161}
162
163define <64 x i8> @test_mask_expand_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) {
164; X86-LABEL: test_mask_expand_b_512:
165; X86:       # %bb.0:
166; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
167; X86-NEXT:    vpexpandb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0xc8]
168; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
169; X86-NEXT:    retl # encoding: [0xc3]
170;
171; X64-LABEL: test_mask_expand_b_512:
172; X64:       # %bb.0:
173; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
174; X64-NEXT:    vpexpandb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0xc8]
175; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
176; X64-NEXT:    retq # encoding: [0xc3]
177  %1 = bitcast i64 %mask to <64 x i1>
178  %2 = call <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8> %data, <64 x i8> %passthru, <64 x i1> %1)
179  ret <64 x i8> %2
180}
181
182define <64 x i8> @test_maskz_expand_b_512(<64 x i8> %data, i64 %mask) {
183; X86-LABEL: test_maskz_expand_b_512:
184; X86:       # %bb.0:
185; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
186; X86-NEXT:    vpexpandb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0xc0]
187; X86-NEXT:    retl # encoding: [0xc3]
188;
189; X64-LABEL: test_maskz_expand_b_512:
190; X64:       # %bb.0:
191; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
192; X64-NEXT:    vpexpandb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0xc0]
193; X64-NEXT:    retq # encoding: [0xc3]
194  %1 = bitcast i64 %mask to <64 x i1>
195  %2 = call <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8> %data, <64 x i8> zeroinitializer, <64 x i1> %1)
196  ret <64 x i8> %2
197}
198
199define void @test_mask_compress_store_w_512(ptr %addr, <32 x i16> %data, i32 %mask) {
200; X86-LABEL: test_mask_compress_store_w_512:
201; X86:       # %bb.0:
202; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
203; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
204; X86-NEXT:    vpcompressw %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x00]
205; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
206; X86-NEXT:    retl # encoding: [0xc3]
207;
208; X64-LABEL: test_mask_compress_store_w_512:
209; X64:       # %bb.0:
210; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
211; X64-NEXT:    vpcompressw %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x07]
212; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
213; X64-NEXT:    retq # encoding: [0xc3]
214  %1 = bitcast i32 %mask to <32 x i1>
215  call void @llvm.masked.compressstore.v32i16(<32 x i16> %data, ptr %addr, <32 x i1> %1)
216  ret void
217}
218
219define void @test_compress_store_w_512(ptr %addr, <32 x i16> %data) {
220; X86-LABEL: test_compress_store_w_512:
221; X86:       # %bb.0:
222; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
223; X86-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
224; X86-NEXT:    vpcompressw %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x00]
225; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
226; X86-NEXT:    retl # encoding: [0xc3]
227;
228; X64-LABEL: test_compress_store_w_512:
229; X64:       # %bb.0:
230; X64-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
231; X64-NEXT:    vpcompressw %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x07]
232; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
233; X64-NEXT:    retq # encoding: [0xc3]
234  call void @llvm.masked.compressstore.v32i16(<32 x i16> %data, ptr %addr, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
235  ret void
236}
237
238define <32 x i16> @test_mask_compress_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) {
239; X86-LABEL: test_mask_compress_w_512:
240; X86:       # %bb.0:
241; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
242; X86-NEXT:    vpcompressw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0xc1]
243; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
244; X86-NEXT:    retl # encoding: [0xc3]
245;
246; X64-LABEL: test_mask_compress_w_512:
247; X64:       # %bb.0:
248; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
249; X64-NEXT:    vpcompressw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0xc1]
250; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
251; X64-NEXT:    retq # encoding: [0xc3]
252  %1 = bitcast i32 %mask to <32 x i1>
253  %2 = call <32 x i16> @llvm.x86.avx512.mask.compress.v32i16(<32 x i16> %data, <32 x i16> %passthru, <32 x i1> %1)
254  ret <32 x i16> %2
255}
256
257define <32 x i16> @test_maskz_compress_w_512(<32 x i16> %data, i32 %mask) {
258; X86-LABEL: test_maskz_compress_w_512:
259; X86:       # %bb.0:
260; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
261; X86-NEXT:    vpcompressw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x63,0xc0]
262; X86-NEXT:    retl # encoding: [0xc3]
263;
264; X64-LABEL: test_maskz_compress_w_512:
265; X64:       # %bb.0:
266; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
267; X64-NEXT:    vpcompressw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x63,0xc0]
268; X64-NEXT:    retq # encoding: [0xc3]
269  %1 = bitcast i32 %mask to <32 x i1>
270  %2 = call <32 x i16> @llvm.x86.avx512.mask.compress.v32i16(<32 x i16> %data, <32 x i16> zeroinitializer, <32 x i1> %1)
271  ret <32 x i16> %2
272}
273
274define <32 x i16> @test_compress_w_512(<32 x i16> %data) {
275; CHECK-LABEL: test_compress_w_512:
276; CHECK:       # %bb.0:
277; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
278  %1 = call <32 x i16> @llvm.x86.avx512.mask.compress.v32i16(<32 x i16> %data, <32 x i16> undef, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
279  ret <32 x i16> %1
280}
281
282define void @test_mask_compress_store_b_512(ptr %addr, <64 x i8> %data, i64 %mask) {
283; X86-LABEL: test_mask_compress_store_b_512:
284; X86:       # %bb.0:
285; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
286; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
287; X86-NEXT:    vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00]
288; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
289; X86-NEXT:    retl # encoding: [0xc3]
290;
291; X64-LABEL: test_mask_compress_store_b_512:
292; X64:       # %bb.0:
293; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
294; X64-NEXT:    vpcompressb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x07]
295; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
296; X64-NEXT:    retq # encoding: [0xc3]
297  %1 = bitcast i64 %mask to <64 x i1>
298  call void @llvm.masked.compressstore.v64i8(<64 x i8> %data, ptr %addr, <64 x i1> %1)
299  ret void
300}
301
302define void @test_compress_store_b_512(ptr %addr, <64 x i8> %data) {
303; X86-LABEL: test_compress_store_b_512:
304; X86:       # %bb.0:
305; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
306; X86-NEXT:    kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
307; X86-NEXT:    vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00]
308; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
309; X86-NEXT:    retl # encoding: [0xc3]
310;
311; X64-LABEL: test_compress_store_b_512:
312; X64:       # %bb.0:
313; X64-NEXT:    kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
314; X64-NEXT:    vpcompressb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x07]
315; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
316; X64-NEXT:    retq # encoding: [0xc3]
317  call void @llvm.masked.compressstore.v64i8(<64 x i8> %data, ptr %addr, <64 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
318  ret void
319}
320
321define <64 x i8> @test_mask_compress_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) {
322; X86-LABEL: test_mask_compress_b_512:
323; X86:       # %bb.0:
324; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
325; X86-NEXT:    vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1]
326; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
327; X86-NEXT:    retl # encoding: [0xc3]
328;
329; X64-LABEL: test_mask_compress_b_512:
330; X64:       # %bb.0:
331; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
332; X64-NEXT:    vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1]
333; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
334; X64-NEXT:    retq # encoding: [0xc3]
335  %1 = bitcast i64 %mask to <64 x i1>
336  %2 = call <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8> %data, <64 x i8> %passthru, <64 x i1> %1)
337  ret <64 x i8> %2
338}
339
340define <64 x i8> @test_maskz_compress_b_512(<64 x i8> %data, i64 %mask) {
341; X86-LABEL: test_maskz_compress_b_512:
342; X86:       # %bb.0:
343; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
344; X86-NEXT:    vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0]
345; X86-NEXT:    retl # encoding: [0xc3]
346;
347; X64-LABEL: test_maskz_compress_b_512:
348; X64:       # %bb.0:
349; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
350; X64-NEXT:    vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0]
351; X64-NEXT:    retq # encoding: [0xc3]
352  %1 = bitcast i64 %mask to <64 x i1>
353  %2 = call <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8> %data, <64 x i8> zeroinitializer, <64 x i1> %1)
354  ret <64 x i8> %2
355}
356
357define <64 x i8> @test_compress_b_512(<64 x i8> %data) {
358; CHECK-LABEL: test_compress_b_512:
359; CHECK:       # %bb.0:
360; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
361  %1 = call <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8> %data, <64 x i8> undef, <64 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
362  ret <64 x i8> %1
363}
364
365define { <16 x i32>, <16 x i32> }@test_int_x86_avx512_mask_vpshld_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
366; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_512:
367; X86:       # %bb.0:
368; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
369; X86-NEXT:    vpshldd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x71,0xd1,0x16]
370; X86-NEXT:    vpshldd $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0x7d,0x48,0x71,0xc9,0x17]
371; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
372; X86-NEXT:    retl # encoding: [0xc3]
373;
374; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_512:
375; X64:       # %bb.0:
376; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
377; X64-NEXT:    vpshldd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x71,0xd1,0x16]
378; X64-NEXT:    vpshldd $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0x7d,0x48,0x71,0xc9,0x17]
379; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
380; X64-NEXT:    retq # encoding: [0xc3]
381  %1 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22>)
382  %2 = bitcast i16 %x4 to <16 x i1>
383  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x3
384  %4 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>)
385  %5 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %3, 0
386  %6 = insertvalue { <16 x i32>, <16 x i32> } %5, <16 x i32> %4, 1
387  ret { <16 x i32>, <16 x i32> } %6
388}
389
390define { <8 x i64>, <8 x i64> } @test_int_x86_avx512_mask_vpshld_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
391; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_512:
392; X86:       # %bb.0:
393; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
394; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
395; X86-NEXT:    vpshldq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x71,0xd1,0x16]
396; X86-NEXT:    vpshldq $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x71,0xc9,0x17]
397; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
398; X86-NEXT:    retl # encoding: [0xc3]
399;
400; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_512:
401; X64:       # %bb.0:
402; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
403; X64-NEXT:    vpshldq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x71,0xd1,0x16]
404; X64-NEXT:    vpshldq $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x71,0xc9,0x17]
405; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
406; X64-NEXT:    retq # encoding: [0xc3]
407  %1 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> <i64 22, i64 22, i64 22, i64 22, i64 22, i64 22, i64 22, i64 22>)
408  %2 = bitcast i8 %x4 to <8 x i1>
409  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x3
410  %4 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> <i64 23, i64 23, i64 23, i64 23, i64 23, i64 23, i64 23, i64 23>)
411  %5 = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> %3, 0
412  %6 = insertvalue { <8 x i64>, <8 x i64> } %5, <8 x i64> %4, 1
413  ret { <8 x i64>, <8 x i64> } %6
414}
415
416define { <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_vpshld_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) {
417; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_512:
418; X86:       # %bb.0:
419; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
420; X86-NEXT:    vpshldw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x06]
421; X86-NEXT:    vpshldw $7, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xc9,0x07]
422; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
423; X86-NEXT:    retl # encoding: [0xc3]
424;
425; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_512:
426; X64:       # %bb.0:
427; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
428; X64-NEXT:    vpshldw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x06]
429; X64-NEXT:    vpshldw $7, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xc9,0x07]
430; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
431; X64-NEXT:    retq # encoding: [0xc3]
432  %1 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>)
433  %2 = bitcast i32 %x4 to <32 x i1>
434  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x3
435  %4 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
436  %5 = insertvalue { <32 x i16>, <32 x i16> } poison, <32 x i16> %3, 0
437  %6 = insertvalue { <32 x i16>, <32 x i16> } %5, <32 x i16> %4, 1
438  ret { <32 x i16>, <32 x i16> } %6
439}
440
441define { <16 x i32>, <16 x i32> }@test_int_x86_avx512_mask_vpshrd_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
442; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_512:
443; X86:       # %bb.0:
444; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
445; X86-NEXT:    vpshrdd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x73,0xd1,0x16]
446; X86-NEXT:    vpshrdd $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0x7d,0x48,0x73,0xc9,0x17]
447; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
448; X86-NEXT:    retl # encoding: [0xc3]
449;
450; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_512:
451; X64:       # %bb.0:
452; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
453; X64-NEXT:    vpshrdd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x73,0xd1,0x16]
454; X64-NEXT:    vpshrdd $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0x7d,0x48,0x73,0xc9,0x17]
455; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
456; X64-NEXT:    retq # encoding: [0xc3]
457  %1 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22>)
458  %2 = bitcast i16 %x4 to <16 x i1>
459  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x3
460  %4 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>)
461  %5 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %3, 0
462  %6 = insertvalue { <16 x i32>, <16 x i32> } %5, <16 x i32> %4, 1
463  ret { <16 x i32>, <16 x i32> } %6
464}
465
466define { <8 x i64>, <8 x i64> } @test_int_x86_avx512_mask_vpshrd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
467; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_512:
468; X86:       # %bb.0:
469; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
470; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
471; X86-NEXT:    vpshrdq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x73,0xd1,0x16]
472; X86-NEXT:    vpshrdq $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x73,0xc9,0x17]
473; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
474; X86-NEXT:    retl # encoding: [0xc3]
475;
476; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_512:
477; X64:       # %bb.0:
478; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
479; X64-NEXT:    vpshrdq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x73,0xd1,0x16]
480; X64-NEXT:    vpshrdq $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x73,0xc9,0x17]
481; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
482; X64-NEXT:    retq # encoding: [0xc3]
483  %1 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> <i64 22, i64 22, i64 22, i64 22, i64 22, i64 22, i64 22, i64 22>)
484  %2 = bitcast i8 %x4 to <8 x i1>
485  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x3
486  %4 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> <i64 23, i64 23, i64 23, i64 23, i64 23, i64 23, i64 23, i64 23>)
487  %5 = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> %3, 0
488  %6 = insertvalue { <8 x i64>, <8 x i64> } %5, <8 x i64> %4, 1
489  ret { <8 x i64>, <8 x i64> } %6
490}
491
492define { <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_vpshrd_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) {
493; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_512:
494; X86:       # %bb.0:
495; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
496; X86-NEXT:    vpshrdw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x06]
497; X86-NEXT:    vpshrdw $7, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xc9,0x07]
498; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
499; X86-NEXT:    retl # encoding: [0xc3]
500;
501; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_512:
502; X64:       # %bb.0:
503; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
504; X64-NEXT:    vpshrdw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x06]
505; X64-NEXT:    vpshrdw $7, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xc9,0x07]
506; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
507; X64-NEXT:    retq # encoding: [0xc3]
508  %1 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>)
509  %2 = bitcast i32 %x4 to <32 x i1>
510  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x3
511  %4 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
512  %5 = insertvalue { <32 x i16>, <32 x i16> } poison, <32 x i16> %3, 0
513  %6 = insertvalue { <32 x i16>, <32 x i16> } %5, <32 x i16> %4, 1
514  ret { <32 x i16>, <32 x i16> } %6
515}
516
517define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpshrdv_d_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, <16 x i32> %x4, i16 %x3) {
518; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_512:
519; X86:       # %bb.0:
520; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
521; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
522; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
523; X86-NEXT:    vpshrdvd (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x73,0x00]
524; X86-NEXT:    vpshrdvd %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x73,0xda]
525; X86-NEXT:    vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb]
526; X86-NEXT:    retl # encoding: [0xc3]
527;
528; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_512:
529; X64:       # %bb.0:
530; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
531; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
532; X64-NEXT:    vpshrdvd (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x73,0x07]
533; X64-NEXT:    vpshrdvd %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x73,0xda]
534; X64-NEXT:    vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb]
535; X64-NEXT:    retq # encoding: [0xc3]
536  %x2 = load <16 x i32>, ptr %x2p
537  %1 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2)
538  %2 = bitcast i16 %x3 to <16 x i1>
539  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
540  %4 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x4)
541  %5 = bitcast i16 %x3 to <16 x i1>
542  %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer
543  %7 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %3, 0
544  %8 = insertvalue { <16 x i32>, <16 x i32> } %7, <16 x i32> %6, 1
545  ret { <16 x i32>, <16 x i32> } %8
546}
547
548define { <8 x i64>, <8 x i64> } @test_int_x86_avx512_mask_vpshrdv_q_512(<8 x i64> %x0, <8 x i64> %x1, ptr %x2p, <8 x i64> %x4, i8 %x3) {
549; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_512:
550; X86:       # %bb.0:
551; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
552; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
553; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
554; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
555; X86-NEXT:    vpshrdvq (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x73,0x00]
556; X86-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x73,0xda]
557; X86-NEXT:    vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb]
558; X86-NEXT:    retl # encoding: [0xc3]
559;
560; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_512:
561; X64:       # %bb.0:
562; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
563; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
564; X64-NEXT:    vpshrdvq (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x73,0x07]
565; X64-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x73,0xda]
566; X64-NEXT:    vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb]
567; X64-NEXT:    retq # encoding: [0xc3]
568  %x2 = load <8 x i64>, ptr %x2p
569  %1 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> %x2)
570  %2 = bitcast i8 %x3 to <8 x i1>
571  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0
572  %4 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> %x4)
573  %5 = bitcast i8 %x3 to <8 x i1>
574  %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer
575  %7 = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> %3, 0
576  %8 = insertvalue { <8 x i64>, <8 x i64> } %7, <8 x i64> %6, 1
577  ret { <8 x i64>, <8 x i64> } %8
578}
579
580define { <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_vpshrdv_w_512(<32 x i16> %x0, <32 x i16> %x1, ptr %x2p, <32 x i16> %x4, i32 %x3) {
581; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_512:
582; X86:       # %bb.0:
583; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
584; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
585; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
586; X86-NEXT:    vpshrdvw (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x72,0x00]
587; X86-NEXT:    vpshrdvw %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x72,0xda]
588; X86-NEXT:    vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb]
589; X86-NEXT:    retl # encoding: [0xc3]
590;
591; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_512:
592; X64:       # %bb.0:
593; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
594; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
595; X64-NEXT:    vpshrdvw (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x72,0x07]
596; X64-NEXT:    vpshrdvw %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x72,0xda]
597; X64-NEXT:    vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb]
598; X64-NEXT:    retq # encoding: [0xc3]
599  %x2 = load <32 x i16>, ptr %x2p
600  %1 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2)
601  %2 = bitcast i32 %x3 to <32 x i1>
602  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x0
603  %4 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x4)
604  %5 = bitcast i32 %x3 to <32 x i1>
605  %6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> zeroinitializer
606  %7 = insertvalue { <32 x i16>, <32 x i16> } poison, <32 x i16> %3, 0
607  %8 = insertvalue { <32 x i16>, <32 x i16> } %7, <32 x i16> %6, 1
608  ret { <32 x i16>, <32 x i16> } %8
609}
610
611define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpshldv_d_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, <16 x i32> %x4, i16 %x3) {
612; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_512:
613; X86:       # %bb.0:
614; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
615; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
616; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
617; X86-NEXT:    vpshldvd (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x71,0x00]
618; X86-NEXT:    vpshldvd %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x71,0xda]
619; X86-NEXT:    vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb]
620; X86-NEXT:    retl # encoding: [0xc3]
621;
622; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_512:
623; X64:       # %bb.0:
624; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
625; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
626; X64-NEXT:    vpshldvd (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x71,0x07]
627; X64-NEXT:    vpshldvd %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x71,0xda]
628; X64-NEXT:    vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb]
629; X64-NEXT:    retq # encoding: [0xc3]
630  %x2 = load <16 x i32>, ptr %x2p
631  %1 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2)
632  %2 = bitcast i16 %x3 to <16 x i1>
633  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
634  %4 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4)
635  %5 = bitcast i16 %x3 to <16 x i1>
636  %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer
637  %7 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %3, 0
638  %8 = insertvalue { <16 x i32>, <16 x i32> } %7, <16 x i32> %6, 1
639  ret { <16 x i32>, <16 x i32> } %8
640}
641
642define { <8 x i64>, <8 x i64> } @test_int_x86_avx512_mask_vpshldv_q_512(<8 x i64> %x0, <8 x i64> %x1, ptr %x2p, <8 x i64> %x4, i8 %x3) {
643; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_512:
644; X86:       # %bb.0:
645; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
646; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
647; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
648; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
649; X86-NEXT:    vpshldvq (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x71,0x00]
650; X86-NEXT:    vpshldvq %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x71,0xda]
651; X86-NEXT:    vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb]
652; X86-NEXT:    retl # encoding: [0xc3]
653;
654; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_512:
655; X64:       # %bb.0:
656; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
657; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
658; X64-NEXT:    vpshldvq (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x71,0x07]
659; X64-NEXT:    vpshldvq %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x71,0xda]
660; X64-NEXT:    vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb]
661; X64-NEXT:    retq # encoding: [0xc3]
662  %x2 = load <8 x i64>, ptr %x2p
663  %1 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
664  %2 = bitcast i8 %x3 to <8 x i1>
665  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0
666  %4 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x4)
667  %5 = bitcast i8 %x3 to <8 x i1>
668  %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer
669  %7 = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> %3, 0
670  %8 = insertvalue { <8 x i64>, <8 x i64> } %7, <8 x i64> %6, 1
671  ret { <8 x i64>, <8 x i64> } %8
672}
673
674define { <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_vpshldv_w_512(<32 x i16> %x0, <32 x i16> %x1, ptr %x2p, <32 x i16> %x4, i32 %x3) {
675; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_512:
676; X86:       # %bb.0:
677; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
678; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
679; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
680; X86-NEXT:    vpshldvw (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x70,0x00]
681; X86-NEXT:    vpshldvw %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x70,0xda]
682; X86-NEXT:    vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb]
683; X86-NEXT:    retl # encoding: [0xc3]
684;
685; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_512:
686; X64:       # %bb.0:
687; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
688; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
689; X64-NEXT:    vpshldvw (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x70,0x07]
690; X64-NEXT:    vpshldvw %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x70,0xda]
691; X64-NEXT:    vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb]
692; X64-NEXT:    retq # encoding: [0xc3]
693  %x2 = load <32 x i16>, ptr %x2p
694  %1 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2)
695  %2 = bitcast i32 %x3 to <32 x i1>
696  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x0
697  %4 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x4)
698  %5 = bitcast i32 %x3 to <32 x i1>
699  %6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> zeroinitializer
700  %7 = insertvalue { <32 x i16>, <32 x i16> } poison, <32 x i16> %3, 0
701  %8 = insertvalue { <32 x i16>, <32 x i16> } %7, <32 x i16> %6, 1
702  ret { <32 x i16>, <32 x i16> } %8
703}
704
705declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)
706declare <8 x i64> @llvm.fshl.v8i64(<8 x i64>, <8 x i64>, <8 x i64>)
707declare <32 x i16> @llvm.fshl.v32i16(<32 x i16>, <32 x i16>, <32 x i16>)
708declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)
709declare <8 x i64> @llvm.fshr.v8i64(<8 x i64>, <8 x i64>, <8 x i64>)
710declare <32 x i16> @llvm.fshr.v32i16(<32 x i16>, <32 x i16>, <32 x i16>)
711declare <32 x i16> @llvm.masked.expandload.v32i16(ptr, <32 x i1>, <32 x i16>)
712declare <64 x i8> @llvm.masked.expandload.v64i8(ptr, <64 x i1>, <64 x i8>)
713declare void @llvm.masked.compressstore.v32i16(<32 x i16>, ptr, <32 x i1>)
714declare void @llvm.masked.compressstore.v64i8(<64 x i8>, ptr, <64 x i1>)
715declare <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16>, <32 x i16>, <32 x i1>)
716declare <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8>, <64 x i8>, <64 x i1>)
717declare <32 x i16> @llvm.x86.avx512.mask.compress.v32i16(<32 x i16>, <32 x i16>, <32 x i1>)
718declare <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8>, <64 x i8>, <64 x i1>)
719