xref: /llvm-project/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics-fast-isel.ll (revision f0dd12ec5c0169ba5b4363b62d59511181cf954a)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefixes=CHECK,X64
4
5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vbmi2-builtins.c
6
7define <8 x i64> @test_mm512_mask_compress_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__D) {
8; X86-LABEL: test_mm512_mask_compress_epi16:
9; X86:       # %bb.0: # %entry
10; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
11; X86-NEXT:    vpcompressw %zmm1, %zmm0 {%k1}
12; X86-NEXT:    retl
13;
14; X64-LABEL: test_mm512_mask_compress_epi16:
15; X64:       # %bb.0: # %entry
16; X64-NEXT:    kmovd %edi, %k1
17; X64-NEXT:    vpcompressw %zmm1, %zmm0 {%k1}
18; X64-NEXT:    retq
19entry:
20  %0 = bitcast <8 x i64> %__D to <32 x i16>
21  %1 = bitcast <8 x i64> %__S to <32 x i16>
22  %2 = tail call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %0, <32 x i16> %1, i32 %__U)
23  %3 = bitcast <32 x i16> %2 to <8 x i64>
24  ret <8 x i64> %3
25}
26
27define <8 x i64> @test_mm512_maskz_compress_epi16(i32 %__U, <8 x i64> %__D) {
28; X86-LABEL: test_mm512_maskz_compress_epi16:
29; X86:       # %bb.0: # %entry
30; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
31; X86-NEXT:    vpcompressw %zmm0, %zmm0 {%k1} {z}
32; X86-NEXT:    retl
33;
34; X64-LABEL: test_mm512_maskz_compress_epi16:
35; X64:       # %bb.0: # %entry
36; X64-NEXT:    kmovd %edi, %k1
37; X64-NEXT:    vpcompressw %zmm0, %zmm0 {%k1} {z}
38; X64-NEXT:    retq
39entry:
40  %0 = bitcast <8 x i64> %__D to <32 x i16>
41  %1 = tail call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %0, <32 x i16> zeroinitializer, i32 %__U)
42  %2 = bitcast <32 x i16> %1 to <8 x i64>
43  ret <8 x i64> %2
44}
45
46define <8 x i64> @test_mm512_mask_compress_epi8(<8 x i64> %__S, i64 %__U, <8 x i64> %__D) {
47; X86-LABEL: test_mm512_mask_compress_epi8:
48; X86:       # %bb.0: # %entry
49; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
50; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
51; X86-NEXT:    kunpckdq %k1, %k0, %k1
52; X86-NEXT:    vpcompressb %zmm1, %zmm0 {%k1}
53; X86-NEXT:    retl
54;
55; X64-LABEL: test_mm512_mask_compress_epi8:
56; X64:       # %bb.0: # %entry
57; X64-NEXT:    kmovq %rdi, %k1
58; X64-NEXT:    vpcompressb %zmm1, %zmm0 {%k1}
59; X64-NEXT:    retq
60entry:
61  %0 = bitcast <8 x i64> %__D to <64 x i8>
62  %1 = bitcast <8 x i64> %__S to <64 x i8>
63  %2 = tail call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %0, <64 x i8> %1, i64 %__U)
64  %3 = bitcast <64 x i8> %2 to <8 x i64>
65  ret <8 x i64> %3
66}
67
68define <8 x i64> @test_mm512_maskz_compress_epi8(i64 %__U, <8 x i64> %__D) {
69; X86-LABEL: test_mm512_maskz_compress_epi8:
70; X86:       # %bb.0: # %entry
71; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
72; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
73; X86-NEXT:    kunpckdq %k1, %k0, %k1
74; X86-NEXT:    vpcompressb %zmm0, %zmm0 {%k1} {z}
75; X86-NEXT:    retl
76;
77; X64-LABEL: test_mm512_maskz_compress_epi8:
78; X64:       # %bb.0: # %entry
79; X64-NEXT:    kmovq %rdi, %k1
80; X64-NEXT:    vpcompressb %zmm0, %zmm0 {%k1} {z}
81; X64-NEXT:    retq
82entry:
83  %0 = bitcast <8 x i64> %__D to <64 x i8>
84  %1 = tail call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %0, <64 x i8> zeroinitializer, i64 %__U)
85  %2 = bitcast <64 x i8> %1 to <8 x i64>
86  ret <8 x i64> %2
87}
88
89define void @test_mm512_mask_compressstoreu_epi16(ptr %__P, i32 %__U, <8 x i64> %__D) {
90; X86-LABEL: test_mm512_mask_compressstoreu_epi16:
91; X86:       # %bb.0: # %entry
92; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
93; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
94; X86-NEXT:    vpcompressw %zmm0, (%eax) {%k1}
95; X86-NEXT:    vzeroupper
96; X86-NEXT:    retl
97;
98; X64-LABEL: test_mm512_mask_compressstoreu_epi16:
99; X64:       # %bb.0: # %entry
100; X64-NEXT:    kmovd %esi, %k1
101; X64-NEXT:    vpcompressw %zmm0, (%rdi) {%k1}
102; X64-NEXT:    vzeroupper
103; X64-NEXT:    retq
104entry:
105  %0 = bitcast <8 x i64> %__D to <32 x i16>
106  %1 = bitcast i32 %__U to <32 x i1>
107  tail call void @llvm.masked.compressstore.v32i16(<32 x i16> %0, ptr %__P, <32 x i1> %1)
108  ret void
109}
110
111define void @test_mm512_mask_compressstoreu_epi8(ptr %__P, i64 %__U, <8 x i64> %__D) {
112; X86-LABEL: test_mm512_mask_compressstoreu_epi8:
113; X86:       # %bb.0: # %entry
114; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
115; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
116; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
117; X86-NEXT:    kunpckdq %k1, %k0, %k1
118; X86-NEXT:    vpcompressb %zmm0, (%eax) {%k1}
119; X86-NEXT:    vzeroupper
120; X86-NEXT:    retl
121;
122; X64-LABEL: test_mm512_mask_compressstoreu_epi8:
123; X64:       # %bb.0: # %entry
124; X64-NEXT:    kmovq %rsi, %k1
125; X64-NEXT:    vpcompressb %zmm0, (%rdi) {%k1}
126; X64-NEXT:    vzeroupper
127; X64-NEXT:    retq
128entry:
129  %0 = bitcast <8 x i64> %__D to <64 x i8>
130  %1 = bitcast i64 %__U to <64 x i1>
131  tail call void @llvm.masked.compressstore.v64i8(<64 x i8> %0, ptr %__P, <64 x i1> %1)
132  ret void
133}
134
135define <8 x i64> @test_mm512_mask_expand_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__D) {
136; X86-LABEL: test_mm512_mask_expand_epi16:
137; X86:       # %bb.0: # %entry
138; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
139; X86-NEXT:    vpexpandw %zmm1, %zmm0 {%k1}
140; X86-NEXT:    retl
141;
142; X64-LABEL: test_mm512_mask_expand_epi16:
143; X64:       # %bb.0: # %entry
144; X64-NEXT:    kmovd %edi, %k1
145; X64-NEXT:    vpexpandw %zmm1, %zmm0 {%k1}
146; X64-NEXT:    retq
147entry:
148  %0 = bitcast <8 x i64> %__D to <32 x i16>
149  %1 = bitcast <8 x i64> %__S to <32 x i16>
150  %2 = tail call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %0, <32 x i16> %1, i32 %__U)
151  %3 = bitcast <32 x i16> %2 to <8 x i64>
152  ret <8 x i64> %3
153}
154
155define <8 x i64> @test_mm512_maskz_expand_epi16(i32 %__U, <8 x i64> %__D) {
156; X86-LABEL: test_mm512_maskz_expand_epi16:
157; X86:       # %bb.0: # %entry
158; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
159; X86-NEXT:    vpexpandw %zmm0, %zmm0 {%k1} {z}
160; X86-NEXT:    retl
161;
162; X64-LABEL: test_mm512_maskz_expand_epi16:
163; X64:       # %bb.0: # %entry
164; X64-NEXT:    kmovd %edi, %k1
165; X64-NEXT:    vpexpandw %zmm0, %zmm0 {%k1} {z}
166; X64-NEXT:    retq
167entry:
168  %0 = bitcast <8 x i64> %__D to <32 x i16>
169  %1 = tail call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %0, <32 x i16> zeroinitializer, i32 %__U)
170  %2 = bitcast <32 x i16> %1 to <8 x i64>
171  ret <8 x i64> %2
172}
173
174define <8 x i64> @test_mm512_mask_expand_epi8(<8 x i64> %__S, i64 %__U, <8 x i64> %__D) {
175; X86-LABEL: test_mm512_mask_expand_epi8:
176; X86:       # %bb.0: # %entry
177; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
178; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
179; X86-NEXT:    kunpckdq %k1, %k0, %k1
180; X86-NEXT:    vpexpandb %zmm1, %zmm0 {%k1}
181; X86-NEXT:    retl
182;
183; X64-LABEL: test_mm512_mask_expand_epi8:
184; X64:       # %bb.0: # %entry
185; X64-NEXT:    kmovq %rdi, %k1
186; X64-NEXT:    vpexpandb %zmm1, %zmm0 {%k1}
187; X64-NEXT:    retq
188entry:
189  %0 = bitcast <8 x i64> %__D to <64 x i8>
190  %1 = bitcast <8 x i64> %__S to <64 x i8>
191  %2 = tail call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %0, <64 x i8> %1, i64 %__U)
192  %3 = bitcast <64 x i8> %2 to <8 x i64>
193  ret <8 x i64> %3
194}
195
196define <8 x i64> @test_mm512_maskz_expand_epi8(i64 %__U, <8 x i64> %__D) {
197; X86-LABEL: test_mm512_maskz_expand_epi8:
198; X86:       # %bb.0: # %entry
199; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
200; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
201; X86-NEXT:    kunpckdq %k1, %k0, %k1
202; X86-NEXT:    vpexpandb %zmm0, %zmm0 {%k1} {z}
203; X86-NEXT:    retl
204;
205; X64-LABEL: test_mm512_maskz_expand_epi8:
206; X64:       # %bb.0: # %entry
207; X64-NEXT:    kmovq %rdi, %k1
208; X64-NEXT:    vpexpandb %zmm0, %zmm0 {%k1} {z}
209; X64-NEXT:    retq
210entry:
211  %0 = bitcast <8 x i64> %__D to <64 x i8>
212  %1 = tail call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %0, <64 x i8> zeroinitializer, i64 %__U)
213  %2 = bitcast <64 x i8> %1 to <8 x i64>
214  ret <8 x i64> %2
215}
216
217define <8 x i64> @test_mm512_mask_expandloadu_epi16(<8 x i64> %__S, i32 %__U, ptr readonly %__P) {
218; X86-LABEL: test_mm512_mask_expandloadu_epi16:
219; X86:       # %bb.0: # %entry
220; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
221; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
222; X86-NEXT:    vpexpandw (%eax), %zmm0 {%k1}
223; X86-NEXT:    retl
224;
225; X64-LABEL: test_mm512_mask_expandloadu_epi16:
226; X64:       # %bb.0: # %entry
227; X64-NEXT:    kmovd %edi, %k1
228; X64-NEXT:    vpexpandw (%rsi), %zmm0 {%k1}
229; X64-NEXT:    retq
230entry:
231  %0 = bitcast <8 x i64> %__S to <32 x i16>
232  %1 = bitcast i32 %__U to <32 x i1>
233  %2 = tail call <32 x i16> @llvm.masked.expandload.v32i16(ptr %__P, <32 x i1> %1, <32 x i16> %0)
234  %3 = bitcast <32 x i16> %2 to <8 x i64>
235  ret <8 x i64> %3
236}
237
238define <8 x i64> @test_mm512_maskz_expandloadu_epi16(i32 %__U, ptr readonly %__P) {
239; X86-LABEL: test_mm512_maskz_expandloadu_epi16:
240; X86:       # %bb.0: # %entry
241; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
242; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
243; X86-NEXT:    vpexpandw (%eax), %zmm0 {%k1} {z}
244; X86-NEXT:    retl
245;
246; X64-LABEL: test_mm512_maskz_expandloadu_epi16:
247; X64:       # %bb.0: # %entry
248; X64-NEXT:    kmovd %edi, %k1
249; X64-NEXT:    vpexpandw (%rsi), %zmm0 {%k1} {z}
250; X64-NEXT:    retq
251entry:
252  %0 = bitcast i32 %__U to <32 x i1>
253  %1 = tail call <32 x i16> @llvm.masked.expandload.v32i16(ptr %__P, <32 x i1> %0, <32 x i16> zeroinitializer)
254  %2 = bitcast <32 x i16> %1 to <8 x i64>
255  ret <8 x i64> %2
256}
257
258define <8 x i64> @test_mm512_mask_expandloadu_epi8(<8 x i64> %__S, i64 %__U, ptr readonly %__P) {
259; X86-LABEL: test_mm512_mask_expandloadu_epi8:
260; X86:       # %bb.0: # %entry
261; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
262; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
263; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
264; X86-NEXT:    kunpckdq %k1, %k0, %k1
265; X86-NEXT:    vpexpandb (%eax), %zmm0 {%k1}
266; X86-NEXT:    retl
267;
268; X64-LABEL: test_mm512_mask_expandloadu_epi8:
269; X64:       # %bb.0: # %entry
270; X64-NEXT:    kmovq %rdi, %k1
271; X64-NEXT:    vpexpandb (%rsi), %zmm0 {%k1}
272; X64-NEXT:    retq
273entry:
274  %0 = bitcast <8 x i64> %__S to <64 x i8>
275  %1 = bitcast i64 %__U to <64 x i1>
276  %2 = tail call <64 x i8> @llvm.masked.expandload.v64i8(ptr %__P, <64 x i1> %1, <64 x i8> %0)
277  %3 = bitcast <64 x i8> %2 to <8 x i64>
278  ret <8 x i64> %3
279}
280
281define <8 x i64> @test_mm512_maskz_expandloadu_epi8(i64 %__U, ptr readonly %__P) {
282; X86-LABEL: test_mm512_maskz_expandloadu_epi8:
283; X86:       # %bb.0: # %entry
284; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
285; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
286; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
287; X86-NEXT:    kunpckdq %k1, %k0, %k1
288; X86-NEXT:    vpexpandb (%eax), %zmm0 {%k1} {z}
289; X86-NEXT:    retl
290;
291; X64-LABEL: test_mm512_maskz_expandloadu_epi8:
292; X64:       # %bb.0: # %entry
293; X64-NEXT:    kmovq %rdi, %k1
294; X64-NEXT:    vpexpandb (%rsi), %zmm0 {%k1} {z}
295; X64-NEXT:    retq
296entry:
297  %0 = bitcast i64 %__U to <64 x i1>
298  %1 = tail call <64 x i8> @llvm.masked.expandload.v64i8(ptr %__P, <64 x i1> %0, <64 x i8> zeroinitializer)
299  %2 = bitcast <64 x i8> %1 to <8 x i64>
300  ret <8 x i64> %2
301}
302
303define <8 x i64> @test_mm512_mask_shldi_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
304; X86-LABEL: test_mm512_mask_shldi_epi64:
305; X86:       # %bb.0: # %entry
306; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
307; X86-NEXT:    kmovd %eax, %k1
308; X86-NEXT:    vpshldq $47, %zmm2, %zmm1, %zmm0 {%k1}
309; X86-NEXT:    retl
310;
311; X64-LABEL: test_mm512_mask_shldi_epi64:
312; X64:       # %bb.0: # %entry
313; X64-NEXT:    kmovd %edi, %k1
314; X64-NEXT:    vpshldq $47, %zmm2, %zmm1, %zmm0 {%k1}
315; X64-NEXT:    retq
316entry:
317  %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> <i64 47, i64 47, i64 47, i64 47, i64 47, i64 47, i64 47, i64 47>)
318  %1 = bitcast i8 %__U to <8 x i1>
319  %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S
320  ret <8 x i64> %2
321}
322
323declare <8 x i64> @llvm.fshl.v8i64(<8 x i64>, <8 x i64>, <8 x i64>)
324
325define <8 x i64> @test_mm512_maskz_shldi_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
326; X86-LABEL: test_mm512_maskz_shldi_epi64:
327; X86:       # %bb.0: # %entry
328; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
329; X86-NEXT:    kmovd %eax, %k1
330; X86-NEXT:    vpshldq $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
331; X86-NEXT:    retl
332;
333; X64-LABEL: test_mm512_maskz_shldi_epi64:
334; X64:       # %bb.0: # %entry
335; X64-NEXT:    kmovd %edi, %k1
336; X64-NEXT:    vpshldq $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
337; X64-NEXT:    retq
338entry:
339  %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>)
340  %1 = bitcast i8 %__U to <8 x i1>
341  %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer
342  ret <8 x i64> %2
343}
344
345define <8 x i64> @test_mm512_shldi_epi64(<8 x i64> %__A, <8 x i64> %__B) {
346; CHECK-LABEL: test_mm512_shldi_epi64:
347; CHECK:       # %bb.0: # %entry
348; CHECK-NEXT:    vpshldq $31, %zmm1, %zmm0, %zmm0
349; CHECK-NEXT:    ret{{[l|q]}}
350entry:
351  %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> <i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31>)
352  ret <8 x i64> %0
353}
354
355define <8 x i64> @test_mm512_mask_shldi_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
356; X86-LABEL: test_mm512_mask_shldi_epi32:
357; X86:       # %bb.0: # %entry
358; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
359; X86-NEXT:    vpshldd $7, %zmm2, %zmm1, %zmm0 {%k1}
360; X86-NEXT:    retl
361;
362; X64-LABEL: test_mm512_mask_shldi_epi32:
363; X64:       # %bb.0: # %entry
364; X64-NEXT:    kmovd %edi, %k1
365; X64-NEXT:    vpshldd $7, %zmm2, %zmm1, %zmm0 {%k1}
366; X64-NEXT:    retq
367entry:
368  %0 = bitcast <8 x i64> %__A to <16 x i32>
369  %1 = bitcast <8 x i64> %__B to <16 x i32>
370  %2 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>)
371  %3 = bitcast <8 x i64> %__S to <16 x i32>
372  %4 = bitcast i16 %__U to <16 x i1>
373  %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3
374  %6 = bitcast <16 x i32> %5 to <8 x i64>
375  ret <8 x i64> %6
376}
377
378declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)
379
380define <8 x i64> @test_mm512_maskz_shldi_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
381; X86-LABEL: test_mm512_maskz_shldi_epi32:
382; X86:       # %bb.0: # %entry
383; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
384; X86-NEXT:    vpshldd $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
385; X86-NEXT:    retl
386;
387; X64-LABEL: test_mm512_maskz_shldi_epi32:
388; X64:       # %bb.0: # %entry
389; X64-NEXT:    kmovd %edi, %k1
390; X64-NEXT:    vpshldd $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
391; X64-NEXT:    retq
392entry:
393  %0 = bitcast <8 x i64> %__A to <16 x i32>
394  %1 = bitcast <8 x i64> %__B to <16 x i32>
395  %2 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>)
396  %3 = bitcast i16 %__U to <16 x i1>
397  %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer
398  %5 = bitcast <16 x i32> %4 to <8 x i64>
399  ret <8 x i64> %5
400}
401
402define <8 x i64> @test_mm512_shldi_epi32(<8 x i64> %__A, <8 x i64> %__B) {
403; CHECK-LABEL: test_mm512_shldi_epi32:
404; CHECK:       # %bb.0: # %entry
405; CHECK-NEXT:    vpshldd $31, %zmm1, %zmm0, %zmm0
406; CHECK-NEXT:    ret{{[l|q]}}
407entry:
408  %0 = bitcast <8 x i64> %__A to <16 x i32>
409  %1 = bitcast <8 x i64> %__B to <16 x i32>
410  %2 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>)
411  %3 = bitcast <16 x i32> %2 to <8 x i64>
412  ret <8 x i64> %3
413}
414
415define <8 x i64> @test_mm512_mask_shldi_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
416; X86-LABEL: test_mm512_mask_shldi_epi16:
417; X86:       # %bb.0: # %entry
418; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
419; X86-NEXT:    vpshldw $3, %zmm2, %zmm1, %zmm0 {%k1}
420; X86-NEXT:    retl
421;
422; X64-LABEL: test_mm512_mask_shldi_epi16:
423; X64:       # %bb.0: # %entry
424; X64-NEXT:    kmovd %edi, %k1
425; X64-NEXT:    vpshldw $3, %zmm2, %zmm1, %zmm0 {%k1}
426; X64-NEXT:    retq
427entry:
428  %0 = bitcast <8 x i64> %__A to <32 x i16>
429  %1 = bitcast <8 x i64> %__B to <32 x i16>
430  %2 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
431  %3 = bitcast <8 x i64> %__S to <32 x i16>
432  %4 = bitcast i32 %__U to <32 x i1>
433  %5 = select <32 x i1> %4, <32 x i16> %2, <32 x i16> %3
434  %6 = bitcast <32 x i16> %5 to <8 x i64>
435  ret <8 x i64> %6
436}
437
438declare <32 x i16> @llvm.fshl.v32i16(<32 x i16>, <32 x i16>, <32 x i16>)
439
440define <8 x i64> @test_mm512_maskz_shldi_epi16(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
441; X86-LABEL: test_mm512_maskz_shldi_epi16:
442; X86:       # %bb.0: # %entry
443; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
444; X86-NEXT:    vpshldw $7, %zmm1, %zmm0, %zmm0 {%k1} {z}
445; X86-NEXT:    retl
446;
447; X64-LABEL: test_mm512_maskz_shldi_epi16:
448; X64:       # %bb.0: # %entry
449; X64-NEXT:    kmovd %edi, %k1
450; X64-NEXT:    vpshldw $7, %zmm1, %zmm0, %zmm0 {%k1} {z}
451; X64-NEXT:    retq
452entry:
453  %0 = bitcast <8 x i64> %__A to <32 x i16>
454  %1 = bitcast <8 x i64> %__B to <32 x i16>
455  %2 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
456  %3 = bitcast i32 %__U to <32 x i1>
457  %4 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> zeroinitializer
458  %5 = bitcast <32 x i16> %4 to <8 x i64>
459  ret <8 x i64> %5
460}
461
462define <8 x i64> @test_mm512_shldi_epi16(<8 x i64> %__A, <8 x i64> %__B) {
463; CHECK-LABEL: test_mm512_shldi_epi16:
464; CHECK:       # %bb.0: # %entry
465; CHECK-NEXT:    vpshldw $15, %zmm1, %zmm0, %zmm0
466; CHECK-NEXT:    ret{{[l|q]}}
467entry:
468  %0 = bitcast <8 x i64> %__A to <32 x i16>
469  %1 = bitcast <8 x i64> %__B to <32 x i16>
470  %2 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
471  %3 = bitcast <32 x i16> %2 to <8 x i64>
472  ret <8 x i64> %3
473}
474
475define <8 x i64> @test_mm512_mask_shrdi_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
476; X86-LABEL: test_mm512_mask_shrdi_epi64:
477; X86:       # %bb.0: # %entry
478; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
479; X86-NEXT:    kmovd %eax, %k1
480; X86-NEXT:    vpshrdq $47, %zmm2, %zmm1, %zmm0 {%k1}
481; X86-NEXT:    retl
482;
483; X64-LABEL: test_mm512_mask_shrdi_epi64:
484; X64:       # %bb.0: # %entry
485; X64-NEXT:    kmovd %edi, %k1
486; X64-NEXT:    vpshrdq $47, %zmm2, %zmm1, %zmm0 {%k1}
487; X64-NEXT:    retq
488entry:
489  %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__B, <8 x i64> %__A, <8 x i64> <i64 47, i64 47, i64 47, i64 47, i64 47, i64 47, i64 47, i64 47>)
490  %1 = bitcast i8 %__U to <8 x i1>
491  %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S
492  ret <8 x i64> %2
493}
494
495declare <8 x i64> @llvm.fshr.v8i64(<8 x i64>, <8 x i64>, <8 x i64>)
496
497define <8 x i64> @test_mm512_maskz_shrdi_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
498; X86-LABEL: test_mm512_maskz_shrdi_epi64:
499; X86:       # %bb.0: # %entry
500; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
501; X86-NEXT:    kmovd %eax, %k1
502; X86-NEXT:    vpshrdq $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
503; X86-NEXT:    retl
504;
505; X64-LABEL: test_mm512_maskz_shrdi_epi64:
506; X64:       # %bb.0: # %entry
507; X64-NEXT:    kmovd %edi, %k1
508; X64-NEXT:    vpshrdq $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
509; X64-NEXT:    retq
510entry:
511  %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__B, <8 x i64> %__A, <8 x i64> <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>)
512  %1 = bitcast i8 %__U to <8 x i1>
513  %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer
514  ret <8 x i64> %2
515}
516
517define <8 x i64> @test_mm512_shrdi_epi64(<8 x i64> %__A, <8 x i64> %__B) {
518; CHECK-LABEL: test_mm512_shrdi_epi64:
519; CHECK:       # %bb.0: # %entry
520; CHECK-NEXT:    vpshrdq $31, %zmm1, %zmm0, %zmm0
521; CHECK-NEXT:    ret{{[l|q]}}
522entry:
523  %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__B, <8 x i64> %__A, <8 x i64> <i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31>)
524  ret <8 x i64> %0
525}
526
527define <8 x i64> @test_mm512_mask_shrdi_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
528; X86-LABEL: test_mm512_mask_shrdi_epi32:
529; X86:       # %bb.0: # %entry
530; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
531; X86-NEXT:    vpshrdd $7, %zmm2, %zmm1, %zmm0 {%k1}
532; X86-NEXT:    retl
533;
534; X64-LABEL: test_mm512_mask_shrdi_epi32:
535; X64:       # %bb.0: # %entry
536; X64-NEXT:    kmovd %edi, %k1
537; X64-NEXT:    vpshrdd $7, %zmm2, %zmm1, %zmm0 {%k1}
538; X64-NEXT:    retq
539entry:
540  %0 = bitcast <8 x i64> %__A to <16 x i32>
541  %1 = bitcast <8 x i64> %__B to <16 x i32>
542  %2 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>)
543  %3 = bitcast <8 x i64> %__S to <16 x i32>
544  %4 = bitcast i16 %__U to <16 x i1>
545  %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3
546  %6 = bitcast <16 x i32> %5 to <8 x i64>
547  ret <8 x i64> %6
548}
549
550declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)
551
552define <8 x i64> @test_mm512_maskz_shrdi_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
553; X86-LABEL: test_mm512_maskz_shrdi_epi32:
554; X86:       # %bb.0: # %entry
555; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
556; X86-NEXT:    vpshrdd $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
557; X86-NEXT:    retl
558;
559; X64-LABEL: test_mm512_maskz_shrdi_epi32:
560; X64:       # %bb.0: # %entry
561; X64-NEXT:    kmovd %edi, %k1
562; X64-NEXT:    vpshrdd $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
563; X64-NEXT:    retq
564entry:
565  %0 = bitcast <8 x i64> %__A to <16 x i32>
566  %1 = bitcast <8 x i64> %__B to <16 x i32>
567  %2 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>)
568  %3 = bitcast i16 %__U to <16 x i1>
569  %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer
570  %5 = bitcast <16 x i32> %4 to <8 x i64>
571  ret <8 x i64> %5
572}
573
574define <8 x i64> @test_mm512_shrdi_epi32(<8 x i64> %__A, <8 x i64> %__B) {
575; CHECK-LABEL: test_mm512_shrdi_epi32:
576; CHECK:       # %bb.0: # %entry
577; CHECK-NEXT:    vpshrdd $31, %zmm1, %zmm0, %zmm0
578; CHECK-NEXT:    ret{{[l|q]}}
579entry:
580  %0 = bitcast <8 x i64> %__A to <16 x i32>
581  %1 = bitcast <8 x i64> %__B to <16 x i32>
582  %2 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>)
583  %3 = bitcast <16 x i32> %2 to <8 x i64>
584  ret <8 x i64> %3
585}
586
587define <8 x i64> @test_mm512_mask_shrdi_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
588; X86-LABEL: test_mm512_mask_shrdi_epi16:
589; X86:       # %bb.0: # %entry
590; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
591; X86-NEXT:    vpshrdw $3, %zmm2, %zmm1, %zmm0 {%k1}
592; X86-NEXT:    retl
593;
594; X64-LABEL: test_mm512_mask_shrdi_epi16:
595; X64:       # %bb.0: # %entry
596; X64-NEXT:    kmovd %edi, %k1
597; X64-NEXT:    vpshrdw $3, %zmm2, %zmm1, %zmm0 {%k1}
598; X64-NEXT:    retq
599entry:
600  %0 = bitcast <8 x i64> %__A to <32 x i16>
601  %1 = bitcast <8 x i64> %__B to <32 x i16>
602  %2 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
603  %3 = bitcast <8 x i64> %__S to <32 x i16>
604  %4 = bitcast i32 %__U to <32 x i1>
605  %5 = select <32 x i1> %4, <32 x i16> %2, <32 x i16> %3
606  %6 = bitcast <32 x i16> %5 to <8 x i64>
607  ret <8 x i64> %6
608}
609
610declare <32 x i16> @llvm.fshr.v32i16(<32 x i16>, <32 x i16>, <32 x i16>)
611
612define <8 x i64> @test_mm512_maskz_shrdi_epi16(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
613; X86-LABEL: test_mm512_maskz_shrdi_epi16:
614; X86:       # %bb.0: # %entry
615; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
616; X86-NEXT:    vpshrdw $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
617; X86-NEXT:    retl
618;
619; X64-LABEL: test_mm512_maskz_shrdi_epi16:
620; X64:       # %bb.0: # %entry
621; X64-NEXT:    kmovd %edi, %k1
622; X64-NEXT:    vpshrdw $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
623; X64-NEXT:    retq
624entry:
625  %0 = bitcast <8 x i64> %__A to <32 x i16>
626  %1 = bitcast <8 x i64> %__B to <32 x i16>
627  %2 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
628  %3 = bitcast i32 %__U to <32 x i1>
629  %4 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> zeroinitializer
630  %5 = bitcast <32 x i16> %4 to <8 x i64>
631  ret <8 x i64> %5
632}
633
634define <8 x i64> @test_mm512_shrdi_epi16(<8 x i64> %__A, <8 x i64> %__B) {
635; CHECK-LABEL: test_mm512_shrdi_epi16:
636; CHECK:       # %bb.0: # %entry
637; CHECK-NEXT:    vpshrdw $15, %zmm1, %zmm0, %zmm0
638; CHECK-NEXT:    ret{{[l|q]}}
639entry:
640  %0 = bitcast <8 x i64> %__A to <32 x i16>
641  %1 = bitcast <8 x i64> %__B to <32 x i16>
642  %2 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>)
643  %3 = bitcast <32 x i16> %2 to <8 x i64>
644  ret <8 x i64> %3
645}
646
647define <8 x i64> @test_mm512_mask_shldv_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
648; X86-LABEL: test_mm512_mask_shldv_epi64:
649; X86:       # %bb.0: # %entry
650; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
651; X86-NEXT:    kmovd %eax, %k1
652; X86-NEXT:    vpshldvq %zmm2, %zmm1, %zmm0 {%k1}
653; X86-NEXT:    retl
654;
655; X64-LABEL: test_mm512_mask_shldv_epi64:
656; X64:       # %bb.0: # %entry
657; X64-NEXT:    kmovd %edi, %k1
658; X64-NEXT:    vpshldvq %zmm2, %zmm1, %zmm0 {%k1}
659; X64-NEXT:    retq
660entry:
661  %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B)
662  %1 = bitcast i8 %__U to <8 x i1>
663  %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S
664  ret <8 x i64> %2
665}
666
667define <8 x i64> @test_mm512_maskz_shldv_epi64(i8 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
668; X86-LABEL: test_mm512_maskz_shldv_epi64:
669; X86:       # %bb.0: # %entry
670; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
671; X86-NEXT:    kmovd %eax, %k1
672; X86-NEXT:    vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z}
673; X86-NEXT:    retl
674;
675; X64-LABEL: test_mm512_maskz_shldv_epi64:
676; X64:       # %bb.0: # %entry
677; X64-NEXT:    kmovd %edi, %k1
678; X64-NEXT:    vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z}
679; X64-NEXT:    retq
680entry:
681  %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B)
682  %1 = bitcast i8 %__U to <8 x i1>
683  %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer
684  ret <8 x i64> %2
685}
686
687define <8 x i64> @test_mm512_shldv_epi64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
688; CHECK-LABEL: test_mm512_shldv_epi64:
689; CHECK:       # %bb.0: # %entry
690; CHECK-NEXT:    vpshldvq %zmm2, %zmm1, %zmm0
691; CHECK-NEXT:    ret{{[l|q]}}
692entry:
693  %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B)
694  ret <8 x i64> %0
695}
696
697define <8 x i64> @test_mm512_mask_shldv_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
698; X86-LABEL: test_mm512_mask_shldv_epi32:
699; X86:       # %bb.0: # %entry
700; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
701; X86-NEXT:    vpshldvd %zmm2, %zmm1, %zmm0 {%k1}
702; X86-NEXT:    retl
703;
704; X64-LABEL: test_mm512_mask_shldv_epi32:
705; X64:       # %bb.0: # %entry
706; X64-NEXT:    kmovd %edi, %k1
707; X64-NEXT:    vpshldvd %zmm2, %zmm1, %zmm0 {%k1}
708; X64-NEXT:    retq
709entry:
710  %0 = bitcast <8 x i64> %__S to <16 x i32>
711  %1 = bitcast <8 x i64> %__A to <16 x i32>
712  %2 = bitcast <8 x i64> %__B to <16 x i32>
713  %3 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2)
714  %4 = bitcast i16 %__U to <16 x i1>
715  %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> %0
716  %6 = bitcast <16 x i32> %5 to <8 x i64>
717  ret <8 x i64> %6
718}
719
720define <8 x i64> @test_mm512_maskz_shldv_epi32(i16 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
721; X86-LABEL: test_mm512_maskz_shldv_epi32:
722; X86:       # %bb.0: # %entry
723; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
724; X86-NEXT:    vpshldvd %zmm2, %zmm1, %zmm0 {%k1} {z}
725; X86-NEXT:    retl
726;
727; X64-LABEL: test_mm512_maskz_shldv_epi32:
728; X64:       # %bb.0: # %entry
729; X64-NEXT:    kmovd %edi, %k1
730; X64-NEXT:    vpshldvd %zmm2, %zmm1, %zmm0 {%k1} {z}
731; X64-NEXT:    retq
732entry:
733  %0 = bitcast <8 x i64> %__S to <16 x i32>
734  %1 = bitcast <8 x i64> %__A to <16 x i32>
735  %2 = bitcast <8 x i64> %__B to <16 x i32>
736  %3 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2)
737  %4 = bitcast i16 %__U to <16 x i1>
738  %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> zeroinitializer
739  %6 = bitcast <16 x i32> %5 to <8 x i64>
740  ret <8 x i64> %6
741}
742
743define <8 x i64> @test_mm512_shldv_epi32(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
744; CHECK-LABEL: test_mm512_shldv_epi32:
745; CHECK:       # %bb.0: # %entry
746; CHECK-NEXT:    vpshldvd %zmm2, %zmm1, %zmm0
747; CHECK-NEXT:    ret{{[l|q]}}
748entry:
749  %0 = bitcast <8 x i64> %__S to <16 x i32>
750  %1 = bitcast <8 x i64> %__A to <16 x i32>
751  %2 = bitcast <8 x i64> %__B to <16 x i32>
752  %3 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2)
753  %4 = bitcast <16 x i32> %3 to <8 x i64>
754  ret <8 x i64> %4
755}
756
757define <8 x i64> @test_mm512_mask_shldv_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
758; X86-LABEL: test_mm512_mask_shldv_epi16:
759; X86:       # %bb.0: # %entry
760; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
761; X86-NEXT:    vpshldvw %zmm2, %zmm1, %zmm0 {%k1}
762; X86-NEXT:    retl
763;
764; X64-LABEL: test_mm512_mask_shldv_epi16:
765; X64:       # %bb.0: # %entry
766; X64-NEXT:    kmovd %edi, %k1
767; X64-NEXT:    vpshldvw %zmm2, %zmm1, %zmm0 {%k1}
768; X64-NEXT:    retq
769entry:
770  %0 = bitcast <8 x i64> %__S to <32 x i16>
771  %1 = bitcast <8 x i64> %__A to <32 x i16>
772  %2 = bitcast <8 x i64> %__B to <32 x i16>
773  %3 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2)
774  %4 = bitcast i32 %__U to <32 x i1>
775  %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> %0
776  %6 = bitcast <32 x i16> %5 to <8 x i64>
777  ret <8 x i64> %6
778}
779
780define <8 x i64> @test_mm512_maskz_shldv_epi16(i32 %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
781; X86-LABEL: test_mm512_maskz_shldv_epi16:
782; X86:       # %bb.0: # %entry
783; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
784; X86-NEXT:    vpshldvw %zmm2, %zmm1, %zmm0 {%k1} {z}
785; X86-NEXT:    retl
786;
787; X64-LABEL: test_mm512_maskz_shldv_epi16:
788; X64:       # %bb.0: # %entry
789; X64-NEXT:    kmovd %edi, %k1
790; X64-NEXT:    vpshldvw %zmm2, %zmm1, %zmm0 {%k1} {z}
791; X64-NEXT:    retq
792entry:
793  %0 = bitcast <8 x i64> %__S to <32 x i16>
794  %1 = bitcast <8 x i64> %__A to <32 x i16>
795  %2 = bitcast <8 x i64> %__B to <32 x i16>
796  %3 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2)
797  %4 = bitcast i32 %__U to <32 x i1>
798  %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> zeroinitializer
799  %6 = bitcast <32 x i16> %5 to <8 x i64>
800  ret <8 x i64> %6
801}
802
803define <8 x i64> @test_mm512_shldv_epi16(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
804; CHECK-LABEL: test_mm512_shldv_epi16:
805; CHECK:       # %bb.0: # %entry
806; CHECK-NEXT:    vpshldvw %zmm2, %zmm1, %zmm0
807; CHECK-NEXT:    ret{{[l|q]}}
808entry:
809  %0 = bitcast <8 x i64> %__S to <32 x i16>
810  %1 = bitcast <8 x i64> %__A to <32 x i16>
811  %2 = bitcast <8 x i64> %__B to <32 x i16>
812  %3 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2)
813  %4 = bitcast <32 x i16> %3 to <8 x i64>
814  ret <8 x i64> %4
815}
816
817define <8 x i64> @test_mm512_mask_shrdv_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
818; X86-LABEL: test_mm512_mask_shrdv_epi64:
819; X86:       # %bb.0: # %entry
820; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
821; X86-NEXT:    kmovd %eax, %k1
822; X86-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm0 {%k1}
823; X86-NEXT:    retl
824;
825; X64-LABEL: test_mm512_mask_shrdv_epi64:
826; X64:       # %bb.0: # %entry
827; X64-NEXT:    kmovd %edi, %k1
828; X64-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm0 {%k1}
829; X64-NEXT:    retq
830entry:
831  %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__A, <8 x i64> %__S, <8 x i64> %__B)
832  %1 = bitcast i8 %__U to <8 x i1>
833  %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S
834  ret <8 x i64> %2
835}
836
837define <8 x i64> @test_mm512_maskz_shrdv_epi64(i8 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
838; X86-LABEL: test_mm512_maskz_shrdv_epi64:
839; X86:       # %bb.0: # %entry
840; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
841; X86-NEXT:    kmovd %eax, %k1
842; X86-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z}
843; X86-NEXT:    retl
844;
845; X64-LABEL: test_mm512_maskz_shrdv_epi64:
846; X64:       # %bb.0: # %entry
847; X64-NEXT:    kmovd %edi, %k1
848; X64-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z}
849; X64-NEXT:    retq
850entry:
851  %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__A, <8 x i64> %__S, <8 x i64> %__B)
852  %1 = bitcast i8 %__U to <8 x i1>
853  %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer
854  ret <8 x i64> %2
855}
856
857define <8 x i64> @test_mm512_shrdv_epi64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
858; CHECK-LABEL: test_mm512_shrdv_epi64:
859; CHECK:       # %bb.0: # %entry
860; CHECK-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm0
861; CHECK-NEXT:    ret{{[l|q]}}
862entry:
863  %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__A, <8 x i64> %__S, <8 x i64> %__B)
864  ret <8 x i64> %0
865}
866
867define <8 x i64> @test_mm512_mask_shrdv_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
868; X86-LABEL: test_mm512_mask_shrdv_epi32:
869; X86:       # %bb.0: # %entry
870; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
871; X86-NEXT:    vpshrdvd %zmm2, %zmm1, %zmm0 {%k1}
872; X86-NEXT:    retl
873;
874; X64-LABEL: test_mm512_mask_shrdv_epi32:
875; X64:       # %bb.0: # %entry
876; X64-NEXT:    kmovd %edi, %k1
877; X64-NEXT:    vpshrdvd %zmm2, %zmm1, %zmm0 {%k1}
878; X64-NEXT:    retq
879entry:
880  %0 = bitcast <8 x i64> %__S to <16 x i32>
881  %1 = bitcast <8 x i64> %__A to <16 x i32>
882  %2 = bitcast <8 x i64> %__B to <16 x i32>
883  %3 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> %2)
884  %4 = bitcast i16 %__U to <16 x i1>
885  %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> %0
886  %6 = bitcast <16 x i32> %5 to <8 x i64>
887  ret <8 x i64> %6
888}
889
890define <8 x i64> @test_mm512_maskz_shrdv_epi32(i16 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
891; X86-LABEL: test_mm512_maskz_shrdv_epi32:
892; X86:       # %bb.0: # %entry
893; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
894; X86-NEXT:    vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} {z}
895; X86-NEXT:    retl
896;
897; X64-LABEL: test_mm512_maskz_shrdv_epi32:
898; X64:       # %bb.0: # %entry
899; X64-NEXT:    kmovd %edi, %k1
900; X64-NEXT:    vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} {z}
901; X64-NEXT:    retq
902entry:
903  %0 = bitcast <8 x i64> %__S to <16 x i32>
904  %1 = bitcast <8 x i64> %__A to <16 x i32>
905  %2 = bitcast <8 x i64> %__B to <16 x i32>
906  %3 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> %2)
907  %4 = bitcast i16 %__U to <16 x i1>
908  %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> zeroinitializer
909  %6 = bitcast <16 x i32> %5 to <8 x i64>
910  ret <8 x i64> %6
911}
912
913define <8 x i64> @test_mm512_shrdv_epi32(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
914; CHECK-LABEL: test_mm512_shrdv_epi32:
915; CHECK:       # %bb.0: # %entry
916; CHECK-NEXT:    vpshrdvd %zmm2, %zmm1, %zmm0
917; CHECK-NEXT:    ret{{[l|q]}}
918entry:
919  %0 = bitcast <8 x i64> %__S to <16 x i32>
920  %1 = bitcast <8 x i64> %__A to <16 x i32>
921  %2 = bitcast <8 x i64> %__B to <16 x i32>
922  %3 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> %2)
923  %4 = bitcast <16 x i32> %3 to <8 x i64>
924  ret <8 x i64> %4
925}
926
927define <8 x i64> @test_mm512_mask_shrdv_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
928; X86-LABEL: test_mm512_mask_shrdv_epi16:
929; X86:       # %bb.0: # %entry
930; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
931; X86-NEXT:    vpshrdvw %zmm2, %zmm1, %zmm0 {%k1}
932; X86-NEXT:    retl
933;
934; X64-LABEL: test_mm512_mask_shrdv_epi16:
935; X64:       # %bb.0: # %entry
936; X64-NEXT:    kmovd %edi, %k1
937; X64-NEXT:    vpshrdvw %zmm2, %zmm1, %zmm0 {%k1}
938; X64-NEXT:    retq
939entry:
940  %0 = bitcast <8 x i64> %__S to <32 x i16>
941  %1 = bitcast <8 x i64> %__A to <32 x i16>
942  %2 = bitcast <8 x i64> %__B to <32 x i16>
943  %3 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> %2)
944  %4 = bitcast i32 %__U to <32 x i1>
945  %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> %0
946  %6 = bitcast <32 x i16> %5 to <8 x i64>
947  ret <8 x i64> %6
948}
949
950define <8 x i64> @test_mm512_maskz_shrdv_epi16(i32 %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
951; X86-LABEL: test_mm512_maskz_shrdv_epi16:
952; X86:       # %bb.0: # %entry
953; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
954; X86-NEXT:    vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} {z}
955; X86-NEXT:    retl
956;
957; X64-LABEL: test_mm512_maskz_shrdv_epi16:
958; X64:       # %bb.0: # %entry
959; X64-NEXT:    kmovd %edi, %k1
960; X64-NEXT:    vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} {z}
961; X64-NEXT:    retq
962entry:
963  %0 = bitcast <8 x i64> %__S to <32 x i16>
964  %1 = bitcast <8 x i64> %__A to <32 x i16>
965  %2 = bitcast <8 x i64> %__B to <32 x i16>
966  %3 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> %2)
967  %4 = bitcast i32 %__U to <32 x i1>
968  %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> zeroinitializer
969  %6 = bitcast <32 x i16> %5 to <8 x i64>
970  ret <8 x i64> %6
971}
972
973define <8 x i64> @test_mm512_shrdv_epi16(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
974; CHECK-LABEL: test_mm512_shrdv_epi16:
975; CHECK:       # %bb.0: # %entry
976; CHECK-NEXT:    vpshrdvw %zmm2, %zmm1, %zmm0
977; CHECK-NEXT:    ret{{[l|q]}}
978entry:
979  %0 = bitcast <8 x i64> %__S to <32 x i16>
980  %1 = bitcast <8 x i64> %__A to <32 x i16>
981  %2 = bitcast <8 x i64> %__B to <32 x i16>
982  %3 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> %2)
983  %4 = bitcast <32 x i16> %3 to <8 x i64>
984  ret <8 x i64> %4
985}
986
987declare <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16>, <32 x i16>, i32)
988declare <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8>, <64 x i8>, i64)
989declare void @llvm.masked.compressstore.v32i16(<32 x i16>, ptr, <32 x i1>)
990declare void @llvm.masked.compressstore.v64i8(<64 x i8>, ptr, <64 x i1>)
991declare <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16>, <32 x i16>, i32)
992declare <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8>, <64 x i8>, i64)
993declare <32 x i16> @llvm.masked.expandload.v32i16(ptr, <32 x i1>, <32 x i16>)
994declare <64 x i8> @llvm.masked.expandload.v64i8(ptr, <64 x i1>, <64 x i8>)
995