xref: /llvm-project/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics-upgrade.ll (revision 2f448bf509432c1a19ec46ab8cbc7353c03c6280)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
4
5define <32 x i16> @test_mask_expand_load_w_512(ptr %addr, <32 x i16> %data, i32 %mask) {
6; X86-LABEL: test_mask_expand_load_w_512:
7; X86:       # %bb.0:
8; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
10; X86-NEXT:    vpexpandw (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x00]
11; X86-NEXT:    retl # encoding: [0xc3]
12;
13; X64-LABEL: test_mask_expand_load_w_512:
14; X64:       # %bb.0:
15; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
16; X64-NEXT:    vpexpandw (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x07]
17; X64-NEXT:    retq # encoding: [0xc3]
18  %res = call <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(ptr %addr, <32 x i16> %data, i32 %mask)
19  ret <32 x i16> %res
20}
21
22define <32 x i16> @test_maskz_expand_load_w_512(ptr %addr, i32 %mask) {
23; X86-LABEL: test_maskz_expand_load_w_512:
24; X86:       # %bb.0:
25; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
26; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
27; X86-NEXT:    vpexpandw (%eax), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0x00]
28; X86-NEXT:    retl # encoding: [0xc3]
29;
30; X64-LABEL: test_maskz_expand_load_w_512:
31; X64:       # %bb.0:
32; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
33; X64-NEXT:    vpexpandw (%rdi), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0x07]
34; X64-NEXT:    retq # encoding: [0xc3]
35  %res = call <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(ptr %addr, <32 x i16> zeroinitializer, i32 %mask)
36  ret <32 x i16> %res
37}
38
39declare <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(ptr %addr, <32 x i16> %data, i32 %mask)
40
41define <32 x i16> @test_expand_load_w_512(ptr %addr, <32 x i16> %data) {
42; X86-LABEL: test_expand_load_w_512:
43; X86:       # %bb.0:
44; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
45; X86-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
46; X86-NEXT:    vpexpandw (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x00]
47; X86-NEXT:    retl # encoding: [0xc3]
48;
49; X64-LABEL: test_expand_load_w_512:
50; X64:       # %bb.0:
51; X64-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
52; X64-NEXT:    vpexpandw (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x07]
53; X64-NEXT:    retq # encoding: [0xc3]
54  %res = call <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(ptr %addr, <32 x i16> %data, i32 -1)
55  ret <32 x i16> %res
56}
57
58define <32 x i16> @test_expand_w_512(<32 x i16> %data) {
59; CHECK-LABEL: test_expand_w_512:
60; CHECK:       # %bb.0:
61; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
62  %res = call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> undef, i32 -1)
63  ret <32 x i16> %res
64}
65
66define <32 x i16> @test_mask_expand_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) {
67; X86-LABEL: test_mask_expand_w_512:
68; X86:       # %bb.0:
69; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
70; X86-NEXT:    vpexpandw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0xc8]
71; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
72; X86-NEXT:    retl # encoding: [0xc3]
73;
74; X64-LABEL: test_mask_expand_w_512:
75; X64:       # %bb.0:
76; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
77; X64-NEXT:    vpexpandw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0xc8]
78; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
79; X64-NEXT:    retq # encoding: [0xc3]
80  %res = call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask)
81  ret <32 x i16> %res
82}
83
84define <32 x i16> @test_maskz_expand_w_512(<32 x i16> %data, i32 %mask) {
85; X86-LABEL: test_maskz_expand_w_512:
86; X86:       # %bb.0:
87; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
88; X86-NEXT:    vpexpandw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0xc0]
89; X86-NEXT:    retl # encoding: [0xc3]
90;
91; X64-LABEL: test_maskz_expand_w_512:
92; X64:       # %bb.0:
93; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
94; X64-NEXT:    vpexpandw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0xc0]
95; X64-NEXT:    retq # encoding: [0xc3]
96  %res = call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> zeroinitializer, i32 %mask)
97  ret <32 x i16> %res
98}
99
100declare <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> %src0, i32 %mask)
101
102define <64 x i8> @test_mask_expand_load_b_512(ptr %addr, <64 x i8> %data, i64 %mask) {
103; X86-LABEL: test_mask_expand_load_b_512:
104; X86:       # %bb.0:
105; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
106; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
107; X86-NEXT:    vpexpandb (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x00]
108; X86-NEXT:    retl # encoding: [0xc3]
109;
110; X64-LABEL: test_mask_expand_load_b_512:
111; X64:       # %bb.0:
112; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
113; X64-NEXT:    vpexpandb (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x07]
114; X64-NEXT:    retq # encoding: [0xc3]
115  %res = call <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(ptr %addr, <64 x i8> %data, i64 %mask)
116  ret <64 x i8> %res
117}
118
119define <64 x i8> @test_maskz_expand_load_b_512(ptr %addr, i64 %mask) {
120; X86-LABEL: test_maskz_expand_load_b_512:
121; X86:       # %bb.0:
122; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
123; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
124; X86-NEXT:    vpexpandb (%eax), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0x00]
125; X86-NEXT:    retl # encoding: [0xc3]
126;
127; X64-LABEL: test_maskz_expand_load_b_512:
128; X64:       # %bb.0:
129; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
130; X64-NEXT:    vpexpandb (%rdi), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0x07]
131; X64-NEXT:    retq # encoding: [0xc3]
132  %res = call <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(ptr %addr, <64 x i8> zeroinitializer, i64 %mask)
133  ret <64 x i8> %res
134}
135
136declare <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(ptr %addr, <64 x i8> %data, i64 %mask)
137
138define <64 x i8> @test_expand_load_b_512(ptr %addr, <64 x i8> %data) {
139; X86-LABEL: test_expand_load_b_512:
140; X86:       # %bb.0:
141; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
142; X86-NEXT:    kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
143; X86-NEXT:    vpexpandb (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x00]
144; X86-NEXT:    retl # encoding: [0xc3]
145;
146; X64-LABEL: test_expand_load_b_512:
147; X64:       # %bb.0:
148; X64-NEXT:    kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
149; X64-NEXT:    vpexpandb (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x07]
150; X64-NEXT:    retq # encoding: [0xc3]
151  %res = call <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(ptr %addr, <64 x i8> %data, i64 -1)
152  ret <64 x i8> %res
153}
154
155define <64 x i8> @test_expand_b_512(<64 x i8> %data) {
156; CHECK-LABEL: test_expand_b_512:
157; CHECK:       # %bb.0:
158; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
159  %res = call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> undef, i64 -1)
160  ret <64 x i8> %res
161}
162
163define <64 x i8> @test_mask_expand_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) {
164; X86-LABEL: test_mask_expand_b_512:
165; X86:       # %bb.0:
166; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
167; X86-NEXT:    vpexpandb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0xc8]
168; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
169; X86-NEXT:    retl # encoding: [0xc3]
170;
171; X64-LABEL: test_mask_expand_b_512:
172; X64:       # %bb.0:
173; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
174; X64-NEXT:    vpexpandb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0xc8]
175; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
176; X64-NEXT:    retq # encoding: [0xc3]
177  %res = call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask)
178  ret <64 x i8> %res
179}
180
181define <64 x i8> @test_maskz_expand_b_512(<64 x i8> %data, i64 %mask) {
182; X86-LABEL: test_maskz_expand_b_512:
183; X86:       # %bb.0:
184; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
185; X86-NEXT:    vpexpandb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0xc0]
186; X86-NEXT:    retl # encoding: [0xc3]
187;
188; X64-LABEL: test_maskz_expand_b_512:
189; X64:       # %bb.0:
190; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
191; X64-NEXT:    vpexpandb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0xc0]
192; X64-NEXT:    retq # encoding: [0xc3]
193  %res = call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> zeroinitializer, i64 %mask)
194  ret <64 x i8> %res
195}
196
197declare <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> %src0, i64 %mask)
198
199define void @test_mask_compress_store_w_512(ptr %addr, <32 x i16> %data, i32 %mask) {
200; X86-LABEL: test_mask_compress_store_w_512:
201; X86:       # %bb.0:
202; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
203; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
204; X86-NEXT:    vpcompressw %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x00]
205; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
206; X86-NEXT:    retl # encoding: [0xc3]
207;
208; X64-LABEL: test_mask_compress_store_w_512:
209; X64:       # %bb.0:
210; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
211; X64-NEXT:    vpcompressw %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x07]
212; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
213; X64-NEXT:    retq # encoding: [0xc3]
214  call void @llvm.x86.avx512.mask.compress.store.w.512(ptr %addr, <32 x i16> %data, i32 %mask)
215  ret void
216}
217
218declare void @llvm.x86.avx512.mask.compress.store.w.512(ptr %addr, <32 x i16> %data, i32 %mask)
219
220define void @test_compress_store_w_512(ptr %addr, <32 x i16> %data) {
221; X86-LABEL: test_compress_store_w_512:
222; X86:       # %bb.0:
223; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
224; X86-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
225; X86-NEXT:    vpcompressw %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x00]
226; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
227; X86-NEXT:    retl # encoding: [0xc3]
228;
229; X64-LABEL: test_compress_store_w_512:
230; X64:       # %bb.0:
231; X64-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
232; X64-NEXT:    vpcompressw %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x07]
233; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
234; X64-NEXT:    retq # encoding: [0xc3]
235  call void @llvm.x86.avx512.mask.compress.store.w.512(ptr %addr, <32 x i16> %data, i32 -1)
236  ret void
237}
238
239define <32 x i16> @test_mask_compress_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) {
240; X86-LABEL: test_mask_compress_w_512:
241; X86:       # %bb.0:
242; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
243; X86-NEXT:    vpcompressw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0xc1]
244; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
245; X86-NEXT:    retl # encoding: [0xc3]
246;
247; X64-LABEL: test_mask_compress_w_512:
248; X64:       # %bb.0:
249; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
250; X64-NEXT:    vpcompressw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0xc1]
251; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
252; X64-NEXT:    retq # encoding: [0xc3]
253  %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask)
254  ret <32 x i16> %res
255}
256
257define <32 x i16> @test_maskz_compress_w_512(<32 x i16> %data, i32 %mask) {
258; X86-LABEL: test_maskz_compress_w_512:
259; X86:       # %bb.0:
260; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
261; X86-NEXT:    vpcompressw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x63,0xc0]
262; X86-NEXT:    retl # encoding: [0xc3]
263;
264; X64-LABEL: test_maskz_compress_w_512:
265; X64:       # %bb.0:
266; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
267; X64-NEXT:    vpcompressw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x63,0xc0]
268; X64-NEXT:    retq # encoding: [0xc3]
269  %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> zeroinitializer, i32 %mask)
270  ret <32 x i16> %res
271}
272
273define <32 x i16> @test_compress_w_512(<32 x i16> %data) {
274; CHECK-LABEL: test_compress_w_512:
275; CHECK:       # %bb.0:
276; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
277  %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> undef, i32 -1)
278  ret <32 x i16> %res
279}
280
281declare <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> %src0, i32 %mask)
282
283define void @test_mask_compress_store_b_512(ptr %addr, <64 x i8> %data, i64 %mask) {
284; X86-LABEL: test_mask_compress_store_b_512:
285; X86:       # %bb.0:
286; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
287; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
288; X86-NEXT:    vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00]
289; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
290; X86-NEXT:    retl # encoding: [0xc3]
291;
292; X64-LABEL: test_mask_compress_store_b_512:
293; X64:       # %bb.0:
294; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
295; X64-NEXT:    vpcompressb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x07]
296; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
297; X64-NEXT:    retq # encoding: [0xc3]
298  call void @llvm.x86.avx512.mask.compress.store.b.512(ptr %addr, <64 x i8> %data, i64 %mask)
299  ret void
300}
301
302declare void @llvm.x86.avx512.mask.compress.store.b.512(ptr %addr, <64 x i8> %data, i64 %mask)
303
304define void @test_compress_store_b_512(ptr %addr, <64 x i8> %data) {
305; X86-LABEL: test_compress_store_b_512:
306; X86:       # %bb.0:
307; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
308; X86-NEXT:    kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
309; X86-NEXT:    vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00]
310; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
311; X86-NEXT:    retl # encoding: [0xc3]
312;
313; X64-LABEL: test_compress_store_b_512:
314; X64:       # %bb.0:
315; X64-NEXT:    kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
316; X64-NEXT:    vpcompressb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x07]
317; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
318; X64-NEXT:    retq # encoding: [0xc3]
319  call void @llvm.x86.avx512.mask.compress.store.b.512(ptr %addr, <64 x i8> %data, i64 -1)
320  ret void
321}
322
323define <64 x i8> @test_mask_compress_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) {
324; X86-LABEL: test_mask_compress_b_512:
325; X86:       # %bb.0:
326; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
327; X86-NEXT:    vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1]
328; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
329; X86-NEXT:    retl # encoding: [0xc3]
330;
331; X64-LABEL: test_mask_compress_b_512:
332; X64:       # %bb.0:
333; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
334; X64-NEXT:    vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1]
335; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
336; X64-NEXT:    retq # encoding: [0xc3]
337  %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask)
338  ret <64 x i8> %res
339}
340
341define <64 x i8> @test_maskz_compress_b_512(<64 x i8> %data, i64 %mask) {
342; X86-LABEL: test_maskz_compress_b_512:
343; X86:       # %bb.0:
344; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
345; X86-NEXT:    vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0]
346; X86-NEXT:    retl # encoding: [0xc3]
347;
348; X64-LABEL: test_maskz_compress_b_512:
349; X64:       # %bb.0:
350; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
351; X64-NEXT:    vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0]
352; X64-NEXT:    retq # encoding: [0xc3]
353  %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> zeroinitializer, i64 %mask)
354  ret <64 x i8> %res
355}
356
357define <64 x i8> @test_compress_b_512(<64 x i8> %data) {
358; CHECK-LABEL: test_compress_b_512:
359; CHECK:       # %bb.0:
360; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
361  %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> undef, i64 -1)
362  ret <64 x i8> %res
363}
364
365declare <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> %src0, i64 %mask)
366
367define { <16 x i32>, <16 x i32> }@test_int_x86_avx512_mask_vpshld_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
368; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_512:
369; X86:       # %bb.0:
370; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
371; X86-NEXT:    vpshldd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x71,0xd1,0x16]
372; X86-NEXT:    vpshldd $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0x7d,0x48,0x71,0xc9,0x17]
373; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
374; X86-NEXT:    retl # encoding: [0xc3]
375;
376; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_512:
377; X64:       # %bb.0:
378; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
379; X64-NEXT:    vpshldd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x71,0xd1,0x16]
380; X64-NEXT:    vpshldd $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0x7d,0x48,0x71,0xc9,0x17]
381; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
382; X64-NEXT:    retq # encoding: [0xc3]
383  %res = call <16 x i32> @llvm.x86.avx512.mask.vpshld.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 %x4)
384  %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpshld.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 23, <16 x i32> %x3, i16 -1)
385  %res2 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %res, 0
386  %res3 = insertvalue { <16 x i32>, <16 x i32> } %res2, <16 x i32> %res1, 1
387  ret { <16 x i32>, <16 x i32> } %res3
388}
389declare <16 x i32> @llvm.x86.avx512.mask.vpshld.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
390
391define { <8 x i64>, <8 x i64> }@test_int_x86_avx512_mask_vpshld_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
392; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_512:
393; X86:       # %bb.0:
394; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
395; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
396; X86-NEXT:    vpshldq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x71,0xd1,0x16]
397; X86-NEXT:    vpshldq $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x71,0xc9,0x17]
398; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
399; X86-NEXT:    retl # encoding: [0xc3]
400;
401; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_512:
402; X64:       # %bb.0:
403; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
404; X64-NEXT:    vpshldq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x71,0xd1,0x16]
405; X64-NEXT:    vpshldq $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x71,0xc9,0x17]
406; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
407; X64-NEXT:    retq # encoding: [0xc3]
408  %res = call <8 x i64> @llvm.x86.avx512.mask.vpshld.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 %x4)
409  %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpshld.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 23, <8 x i64> %x3, i8 -1)
410  %res2 = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> %res, 0
411  %res3 = insertvalue { <8 x i64>, <8 x i64> } %res2, <8 x i64> %res1, 1
412  ret { <8 x i64>, <8 x i64> } %res3
413}
414declare <8 x i64> @llvm.x86.avx512.mask.vpshld.q.512(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
415
416define { <32 x i16>, <32 x i16> }@test_int_x86_avx512_mask_vpshld_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) {
417; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_512:
418; X86:       # %bb.0:
419; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
420; X86-NEXT:    vpshldw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x06]
421; X86-NEXT:    vpshldw $7, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xc9,0x07]
422; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
423; X86-NEXT:    retl # encoding: [0xc3]
424;
425; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_512:
426; X64:       # %bb.0:
427; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
428; X64-NEXT:    vpshldw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x06]
429; X64-NEXT:    vpshldw $7, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xc9,0x07]
430; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
431; X64-NEXT:    retq # encoding: [0xc3]
432  %res = call <32 x i16> @llvm.x86.avx512.mask.vpshld.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 6, <32 x i16> %x3, i32 %x4)
433  %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpshld.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 7, <32 x i16> %x3, i32 -1)
434  %res2 = insertvalue { <32 x i16>, <32 x i16> } poison, <32 x i16> %res, 0
435  %res3 = insertvalue { <32 x i16>, <32 x i16> } %res2, <32 x i16> %res1, 1
436  ret { <32 x i16>, <32 x i16> } %res3
437}
438declare <32 x i16> @llvm.x86.avx512.mask.vpshld.w.512(<32 x i16>, <32 x i16>, i32, <32 x i16>, i32)
439
440define { <16 x i32>, <16 x i32> }@test_int_x86_avx512_mask_vpshrd_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
441; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_512:
442; X86:       # %bb.0:
443; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
444; X86-NEXT:    vpshrdd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x73,0xd1,0x16]
445; X86-NEXT:    vpshrdd $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0x7d,0x48,0x73,0xc9,0x17]
446; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
447; X86-NEXT:    retl # encoding: [0xc3]
448;
449; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_512:
450; X64:       # %bb.0:
451; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
452; X64-NEXT:    vpshrdd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x73,0xd1,0x16]
453; X64-NEXT:    vpshrdd $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0x7d,0x48,0x73,0xc9,0x17]
454; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
455; X64-NEXT:    retq # encoding: [0xc3]
456  %res = call <16 x i32> @llvm.x86.avx512.mask.vpshrd.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 %x4)
457  %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpshrd.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 23, <16 x i32> %x3, i16 -1)
458  %res2 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %res, 0
459  %res3 = insertvalue { <16 x i32>, <16 x i32> } %res2, <16 x i32> %res1, 1
460  ret { <16 x i32>, <16 x i32> } %res3
461}
462declare <16 x i32> @llvm.x86.avx512.mask.vpshrd.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
463
464define { <8 x i64>, <8 x i64> }@test_int_x86_avx512_mask_vpshrd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
465; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_512:
466; X86:       # %bb.0:
467; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
468; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
469; X86-NEXT:    vpshrdq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x73,0xd1,0x16]
470; X86-NEXT:    vpshrdq $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x73,0xc9,0x17]
471; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
472; X86-NEXT:    retl # encoding: [0xc3]
473;
474; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_512:
475; X64:       # %bb.0:
476; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
477; X64-NEXT:    vpshrdq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x73,0xd1,0x16]
478; X64-NEXT:    vpshrdq $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x73,0xc9,0x17]
479; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
480; X64-NEXT:    retq # encoding: [0xc3]
481  %res = call <8 x i64> @llvm.x86.avx512.mask.vpshrd.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 %x4)
482  %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpshrd.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 23, <8 x i64> %x3, i8 -1)
483  %res2 = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> %res, 0
484  %res3 = insertvalue { <8 x i64>, <8 x i64> } %res2, <8 x i64> %res1, 1
485  ret { <8 x i64>, <8 x i64> } %res3
486}
487declare <8 x i64> @llvm.x86.avx512.mask.vpshrd.q.512(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
488
489define { <32 x i16>, <32 x i16> }@test_int_x86_avx512_mask_vpshrd_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) {
490; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_512:
491; X86:       # %bb.0:
492; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
493; X86-NEXT:    vpshrdw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x06]
494; X86-NEXT:    vpshrdw $7, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xc9,0x07]
495; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
496; X86-NEXT:    retl # encoding: [0xc3]
497;
498; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_512:
499; X64:       # %bb.0:
500; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
501; X64-NEXT:    vpshrdw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x06]
502; X64-NEXT:    vpshrdw $7, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xc9,0x07]
503; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
504; X64-NEXT:    retq # encoding: [0xc3]
505  %res = call <32 x i16> @llvm.x86.avx512.mask.vpshrd.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 6, <32 x i16> %x3, i32 %x4)
506  %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpshrd.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 7, <32 x i16> %x3, i32 -1)
507  %res2 = insertvalue { <32 x i16>, <32 x i16> } poison, <32 x i16> %res, 0
508  %res3 = insertvalue { <32 x i16>, <32 x i16> } %res2, <32 x i16> %res1, 1
509  ret { <32 x i16>, <32 x i16> } %res3
510}
511declare <32 x i16> @llvm.x86.avx512.mask.vpshrd.w.512(<32 x i16>, <32 x i16>, i32, <32 x i16>, i32)
512
513define { <16 x i32>, <16 x i32> }@test_int_x86_avx512_mask_vpshld_d_512_2(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
514; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_512_2:
515; X86:       # %bb.0:
516; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
517; X86-NEXT:    vpshldd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x71,0xd1,0x16]
518; X86-NEXT:    vpshldd $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0x7d,0x48,0x71,0xc9,0x17]
519; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
520; X86-NEXT:    retl # encoding: [0xc3]
521;
522; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_512_2:
523; X64:       # %bb.0:
524; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
525; X64-NEXT:    vpshldd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x71,0xd1,0x16]
526; X64-NEXT:    vpshldd $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0x7d,0x48,0x71,0xc9,0x17]
527; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
528; X64-NEXT:    retq # encoding: [0xc3]
529  %1 = call <16 x i32> @llvm.x86.avx512.vpshld.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 22)
530  %2 = bitcast i16 %x4 to <16 x i1>
531  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x3
532  %4 = call <16 x i32> @llvm.x86.avx512.vpshld.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 23)
533  %5 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %3, 0
534  %6 = insertvalue { <16 x i32>, <16 x i32> } %5, <16 x i32> %4, 1
535  ret { <16 x i32>, <16 x i32> } %6
536}
537declare <16 x i32> @llvm.x86.avx512.vpshld.d.512(<16 x i32>, <16 x i32>, i32)
538
539define { <8 x i64>, <8 x i64> }@test_int_x86_avx512_mask_vpshld_q_512_2(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
540; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_512_2:
541; X86:       # %bb.0:
542; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
543; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
544; X86-NEXT:    vpshldq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x71,0xd1,0x16]
545; X86-NEXT:    vpshldq $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x71,0xc9,0x17]
546; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
547; X86-NEXT:    retl # encoding: [0xc3]
548;
549; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_512_2:
550; X64:       # %bb.0:
551; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
552; X64-NEXT:    vpshldq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x71,0xd1,0x16]
553; X64-NEXT:    vpshldq $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x71,0xc9,0x17]
554; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
555; X64-NEXT:    retq # encoding: [0xc3]
556  %1 = call <8 x i64> @llvm.x86.avx512.vpshld.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 22)
557  %2 = bitcast i8 %x4 to <8 x i1>
558  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x3
559  %4 = call <8 x i64> @llvm.x86.avx512.vpshld.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 23)
560  %5 = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> %3, 0
561  %6 = insertvalue { <8 x i64>, <8 x i64> } %5, <8 x i64> %4, 1
562  ret { <8 x i64>, <8 x i64> } %6
563}
564declare <8 x i64> @llvm.x86.avx512.vpshld.q.512(<8 x i64>, <8 x i64>, i32)
565
566define { <32 x i16>, <32 x i16> }@test_int_x86_avx512_mask_vpshld_w_512_2(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) {
567; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_512_2:
568; X86:       # %bb.0:
569; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
570; X86-NEXT:    vpshldw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x06]
571; X86-NEXT:    vpshldw $7, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xc9,0x07]
572; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
573; X86-NEXT:    retl # encoding: [0xc3]
574;
575; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_512_2:
576; X64:       # %bb.0:
577; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
578; X64-NEXT:    vpshldw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x06]
579; X64-NEXT:    vpshldw $7, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xc9,0x07]
580; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
581; X64-NEXT:    retq # encoding: [0xc3]
582  %1 = call <32 x i16> @llvm.x86.avx512.vpshld.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 6)
583  %2 = bitcast i32 %x4 to <32 x i1>
584  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x3
585  %4 = call <32 x i16> @llvm.x86.avx512.vpshld.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 7)
586  %5 = insertvalue { <32 x i16>, <32 x i16> } poison, <32 x i16> %3, 0
587  %6 = insertvalue { <32 x i16>, <32 x i16> } %5, <32 x i16> %4, 1
588  ret { <32 x i16>, <32 x i16> } %6
589}
590declare <32 x i16> @llvm.x86.avx512.vpshld.w.512(<32 x i16>, <32 x i16>, i32)
591
592define { <16 x i32>, <16 x i32> }@test_int_x86_avx512_mask_vpshrd_d_512_2(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
593; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_512_2:
594; X86:       # %bb.0:
595; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
596; X86-NEXT:    vpshrdd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x73,0xd1,0x16]
597; X86-NEXT:    vpshrdd $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0x7d,0x48,0x73,0xc9,0x17]
598; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
599; X86-NEXT:    retl # encoding: [0xc3]
600;
601; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_512_2:
602; X64:       # %bb.0:
603; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
604; X64-NEXT:    vpshrdd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x73,0xd1,0x16]
605; X64-NEXT:    vpshrdd $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0x7d,0x48,0x73,0xc9,0x17]
606; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
607; X64-NEXT:    retq # encoding: [0xc3]
608  %1 = call <16 x i32> @llvm.x86.avx512.vpshrd.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 22)
609  %2 = bitcast i16 %x4 to <16 x i1>
610  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x3
611  %4 = call <16 x i32> @llvm.x86.avx512.vpshrd.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 23)
612  %5 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %3, 0
613  %6 = insertvalue { <16 x i32>, <16 x i32> } %5, <16 x i32> %4, 1
614  ret { <16 x i32>, <16 x i32> } %6
615}
616declare <16 x i32> @llvm.x86.avx512.vpshrd.d.512(<16 x i32>, <16 x i32>, i32)
617
618define { <8 x i64>, <8 x i64> }@test_int_x86_avx512_mask_vpshrd_q_512_2(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
619; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_512_2:
620; X86:       # %bb.0:
621; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
622; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
623; X86-NEXT:    vpshrdq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x73,0xd1,0x16]
624; X86-NEXT:    vpshrdq $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x73,0xc9,0x17]
625; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
626; X86-NEXT:    retl # encoding: [0xc3]
627;
628; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_512_2:
629; X64:       # %bb.0:
630; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
631; X64-NEXT:    vpshrdq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x73,0xd1,0x16]
632; X64-NEXT:    vpshrdq $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x73,0xc9,0x17]
633; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
634; X64-NEXT:    retq # encoding: [0xc3]
635  %1 = call <8 x i64> @llvm.x86.avx512.vpshrd.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 22)
636  %2 = bitcast i8 %x4 to <8 x i1>
637  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x3
638  %4 = call <8 x i64> @llvm.x86.avx512.vpshrd.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 23)
639  %5 = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> %3, 0
640  %6 = insertvalue { <8 x i64>, <8 x i64> } %5, <8 x i64> %4, 1
641  ret { <8 x i64>, <8 x i64> } %6
642}
643declare <8 x i64> @llvm.x86.avx512.vpshrd.q.512(<8 x i64>, <8 x i64>, i32)
644
645define { <32 x i16>, <32 x i16> }@test_int_x86_avx512_mask_vpshrd_w_512_2(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) {
646; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_512_2:
647; X86:       # %bb.0:
648; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
649; X86-NEXT:    vpshrdw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x06]
650; X86-NEXT:    vpshrdw $7, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xc9,0x07]
651; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
652; X86-NEXT:    retl # encoding: [0xc3]
653;
654; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_512_2:
655; X64:       # %bb.0:
656; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
657; X64-NEXT:    vpshrdw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x06]
658; X64-NEXT:    vpshrdw $7, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xc9,0x07]
659; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
660; X64-NEXT:    retq # encoding: [0xc3]
661  %1 = call <32 x i16> @llvm.x86.avx512.vpshrd.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 6)
662  %2 = bitcast i32 %x4 to <32 x i1>
663  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x3
664  %4 = call <32 x i16> @llvm.x86.avx512.vpshrd.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 7)
665  %5 = insertvalue { <32 x i16>, <32 x i16> } poison, <32 x i16> %3, 0
666  %6 = insertvalue { <32 x i16>, <32 x i16> } %5, <32 x i16> %4, 1
667  ret { <32 x i16>, <32 x i16> } %6
668}
669declare <32 x i16> @llvm.x86.avx512.vpshrd.w.512(<32 x i16>, <32 x i16>, i32)
670
671declare <16 x i32> @llvm.x86.avx512.mask.vpshrdv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
672declare <16 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
673
674define <16 x i32>@test_int_x86_avx512_vpshrdv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
675; CHECK-LABEL: test_int_x86_avx512_vpshrdv_d_512:
676; CHECK:       # %bb.0:
677; CHECK-NEXT:    vpshrdvd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x73,0xc2]
678; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
679  %res = call <16 x i32> @llvm.x86.avx512.mask.vpshrdv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
680  ret <16 x i32> %res
681}
682
683define { <16 x i32>, <16 x i32> }@test_int_x86_avx512_mask_vpshrdv_d_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, <16 x i32> %x4, i16 %x3) {
684; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_512:
685; X86:       # %bb.0:
686; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
687; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
688; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
689; X86-NEXT:    vpshrdvd (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x73,0x00]
690; X86-NEXT:    vpshrdvd %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x73,0xda]
691; X86-NEXT:    vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb]
692; X86-NEXT:    retl # encoding: [0xc3]
693;
694; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_512:
695; X64:       # %bb.0:
696; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
697; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
698; X64-NEXT:    vpshrdvd (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x73,0x07]
699; X64-NEXT:    vpshrdvd %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x73,0xda]
700; X64-NEXT:    vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb]
701; X64-NEXT:    retq # encoding: [0xc3]
702  %x2 = load <16 x i32>, ptr %x2p
703  %res = call <16 x i32> @llvm.x86.avx512.mask.vpshrdv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
704  %res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16  %x3)
705  %res2 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %res, 0
706  %res3 = insertvalue { <16 x i32>, <16 x i32> } %res2, <16 x i32> %res1, 1
707  ret { <16 x i32>, <16 x i32> } %res3
708}
709
710declare <8 x i64> @llvm.x86.avx512.mask.vpshrdv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
711declare <8 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
712
713define <8 x i64>@test_int_x86_avx512_vpshrdv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
714; CHECK-LABEL: test_int_x86_avx512_vpshrdv_q_512:
715; CHECK:       # %bb.0:
716; CHECK-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x73,0xc2]
717; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
718  %res = call <8 x i64> @llvm.x86.avx512.mask.vpshrdv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
719  ret <8 x i64> %res
720}
721
722define { <8 x i64>, <8 x i64> }@test_int_x86_avx512_mask_vpshrdv_q_512(<8 x i64> %x0, <8 x i64> %x1, ptr %x2p, <8 x i64> %x4, i8 %x3) {
723; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_512:
724; X86:       # %bb.0:
725; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
726; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
727; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
728; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
729; X86-NEXT:    vpshrdvq (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x73,0x00]
730; X86-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x73,0xda]
731; X86-NEXT:    vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb]
732; X86-NEXT:    retl # encoding: [0xc3]
733;
734; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_512:
735; X64:       # %bb.0:
736; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
737; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
738; X64-NEXT:    vpshrdvq (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x73,0x07]
739; X64-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x73,0xda]
740; X64-NEXT:    vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb]
741; X64-NEXT:    retq # encoding: [0xc3]
742  %x2 = load <8 x i64>, ptr %x2p
743  %res = call <8 x i64> @llvm.x86.avx512.mask.vpshrdv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
744  %res1 = call <8 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x4, i8  %x3)
745  %res2 = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> %res, 0
746  %res3 = insertvalue { <8 x i64>, <8 x i64> } %res2, <8 x i64> %res1, 1
747  ret { <8 x i64>, <8 x i64> } %res3
748}
749
750declare <32 x i16> @llvm.x86.avx512.mask.vpshrdv.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
751declare <32 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
752
753define <32 x i16>@test_int_x86_avx512_vpshrdv_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) {
754; CHECK-LABEL: test_int_x86_avx512_vpshrdv_w_512:
755; CHECK:       # %bb.0:
756; CHECK-NEXT:    vpshrdvw %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x72,0xc2]
757; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
758  %res = call <32 x i16> @llvm.x86.avx512.mask.vpshrdv.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
759  ret <32 x i16> %res
760}
761
762define { <32 x i16>, <32 x i16> }@test_int_x86_avx512_mask_vpshrdv_w_512(<32 x i16> %x0, <32 x i16> %x1, ptr %x2p, <32 x i16> %x4, i32 %x3) {
763; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_512:
764; X86:       # %bb.0:
765; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
766; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
767; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
768; X86-NEXT:    vpshrdvw (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x72,0x00]
769; X86-NEXT:    vpshrdvw %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x72,0xda]
770; X86-NEXT:    vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb]
771; X86-NEXT:    retl # encoding: [0xc3]
772;
773; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_512:
774; X64:       # %bb.0:
775; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
776; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
777; X64-NEXT:    vpshrdvw (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x72,0x07]
778; X64-NEXT:    vpshrdvw %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x72,0xda]
779; X64-NEXT:    vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb]
780; X64-NEXT:    retq # encoding: [0xc3]
781  %x2 = load <32 x i16>, ptr %x2p
782  %res = call <32 x i16> @llvm.x86.avx512.mask.vpshrdv.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
783  %res1 = call <32 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x4, i32  %x3)
784  %res2 = insertvalue { <32 x i16>, <32 x i16> } poison, <32 x i16> %res, 0
785  %res3 = insertvalue { <32 x i16>, <32 x i16> } %res2, <32 x i16> %res1, 1
786  ret { <32 x i16>, <32 x i16> } %res3
787}
788
789declare <16 x i32> @llvm.x86.avx512.mask.vpshldv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
790declare <16 x i32> @llvm.x86.avx512.maskz.vpshldv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
791
792define <16 x i32>@test_int_x86_avx512_vpshldv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
793; CHECK-LABEL: test_int_x86_avx512_vpshldv_d_512:
794; CHECK:       # %bb.0:
795; CHECK-NEXT:    vpshldvd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x71,0xc2]
796; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
797  %res = call <16 x i32> @llvm.x86.avx512.mask.vpshldv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
798  ret <16 x i32> %res
799}
800
801define { <16 x i32>, <16 x i32> }@test_int_x86_avx512_mask_vpshldv_d_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, <16 x i32> %x4, i16 %x3) {
802; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_512:
803; X86:       # %bb.0:
804; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
805; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
806; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
807; X86-NEXT:    vpshldvd (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x71,0x00]
808; X86-NEXT:    vpshldvd %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x71,0xda]
809; X86-NEXT:    vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb]
810; X86-NEXT:    retl # encoding: [0xc3]
811;
812; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_512:
813; X64:       # %bb.0:
814; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
815; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
816; X64-NEXT:    vpshldvd (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x71,0x07]
817; X64-NEXT:    vpshldvd %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x71,0xda]
818; X64-NEXT:    vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb]
819; X64-NEXT:    retq # encoding: [0xc3]
820  %x2 = load <16 x i32>, ptr %x2p
821  %res = call <16 x i32> @llvm.x86.avx512.mask.vpshldv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
822  %res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpshldv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16  %x3)
823  %res2 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %res, 0
824  %res3 = insertvalue { <16 x i32>, <16 x i32> } %res2, <16 x i32> %res1, 1
825  ret { <16 x i32>, <16 x i32> } %res3
826}
827
828declare <8 x i64> @llvm.x86.avx512.mask.vpshldv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
829declare <8 x i64> @llvm.x86.avx512.maskz.vpshldv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
830
831define <8 x i64>@test_int_x86_avx512_vpshldv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
832; CHECK-LABEL: test_int_x86_avx512_vpshldv_q_512:
833; CHECK:       # %bb.0:
834; CHECK-NEXT:    vpshldvq %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x71,0xc2]
835; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
836  %res = call <8 x i64> @llvm.x86.avx512.mask.vpshldv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
837  ret <8 x i64> %res
838}
839
840define { <8 x i64>, <8 x i64> }@test_int_x86_avx512_mask_vpshldv_q_512(<8 x i64> %x0, <8 x i64> %x1, ptr %x2p, <8 x i64> %x4, i8 %x3) {
841; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_512:
842; X86:       # %bb.0:
843; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
844; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
845; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
846; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
847; X86-NEXT:    vpshldvq (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x71,0x00]
848; X86-NEXT:    vpshldvq %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x71,0xda]
849; X86-NEXT:    vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb]
850; X86-NEXT:    retl # encoding: [0xc3]
851;
852; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_512:
853; X64:       # %bb.0:
854; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
855; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
856; X64-NEXT:    vpshldvq (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x71,0x07]
857; X64-NEXT:    vpshldvq %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x71,0xda]
858; X64-NEXT:    vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb]
859; X64-NEXT:    retq # encoding: [0xc3]
860  %x2 = load <8 x i64>, ptr %x2p
861  %res = call <8 x i64> @llvm.x86.avx512.mask.vpshldv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
862  %res1 = call <8 x i64> @llvm.x86.avx512.maskz.vpshldv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x4, i8  %x3)
863  %res2 = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> %res, 0
864  %res3 = insertvalue { <8 x i64>, <8 x i64> } %res2, <8 x i64> %res1, 1
865  ret { <8 x i64>, <8 x i64> } %res3
866}
867
868declare <32 x i16> @llvm.x86.avx512.mask.vpshldv.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
869declare <32 x i16> @llvm.x86.avx512.maskz.vpshldv.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
870
871define <32 x i16>@test_int_x86_avx512_vpshldv_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) {
872; CHECK-LABEL: test_int_x86_avx512_vpshldv_w_512:
873; CHECK:       # %bb.0:
874; CHECK-NEXT:    vpshldvw %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x70,0xc2]
875; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
876  %res = call <32 x i16> @llvm.x86.avx512.mask.vpshldv.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
877  ret <32 x i16> %res
878}
879
880define { <32 x i16>, <32 x i16> }@test_int_x86_avx512_mask_vpshldv_w_512(<32 x i16> %x0, <32 x i16> %x1, ptr %x2p, <32 x i16> %x4, i32 %x3) {
881; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_512:
882; X86:       # %bb.0:
883; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
884; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
885; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
886; X86-NEXT:    vpshldvw (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x70,0x00]
887; X86-NEXT:    vpshldvw %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x70,0xda]
888; X86-NEXT:    vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb]
889; X86-NEXT:    retl # encoding: [0xc3]
890;
891; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_512:
892; X64:       # %bb.0:
893; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
894; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
895; X64-NEXT:    vpshldvw (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x70,0x07]
896; X64-NEXT:    vpshldvw %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x70,0xda]
897; X64-NEXT:    vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb]
898; X64-NEXT:    retq # encoding: [0xc3]
899  %x2 = load <32 x i16>, ptr %x2p
900  %res = call <32 x i16> @llvm.x86.avx512.mask.vpshldv.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
901  %res1 = call <32 x i16> @llvm.x86.avx512.maskz.vpshldv.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x4, i32  %x3)
902  %res2 = insertvalue { <32 x i16>, <32 x i16> } poison, <32 x i16> %res, 0
903  %res3 = insertvalue { <32 x i16>, <32 x i16> } %res2, <32 x i16> %res1, 1
904  ret { <32 x i16>, <32 x i16> } %res3
905}
906