xref: /llvm-project/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll (revision b5d35feacb7246573c6a4ab2bddc4919a4228ed5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
4
5define i32 @test_int_x86_avx512_kadd_d(<32 x i16> %A, <32 x i16> %B) nounwind {
6; CHECK-LABEL: test_int_x86_avx512_kadd_d:
7; CHECK:       # %bb.0: # %entry
8; CHECK-NEXT:    vptestmw %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x26,0xc0]
9; CHECK-NEXT:    vptestmw %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x26,0xc9]
10; CHECK-NEXT:    kaddd %k1, %k0, %k0 # encoding: [0xc4,0xe1,0xfd,0x4a,0xc1]
11; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
12; CHECK-NEXT:    kortestd %k0, %k0 # encoding: [0xc4,0xe1,0xf9,0x98,0xc0]
13; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
14; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
15; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
16entry:
17  %0 = icmp ne <32 x i16> %A, zeroinitializer
18  %1 = icmp ne <32 x i16> %B, zeroinitializer
19  %2 = call <32 x i1> @llvm.x86.avx512.kadd.d(<32 x i1> %0, <32 x i1> %1)
20  %3 = bitcast <32 x i1> %2 to i32
21  %4 = icmp eq i32 %3, 0
22  %5 = zext i1 %4 to i32
23  ret i32 %5
24}
25declare <32 x i1> @llvm.x86.avx512.kadd.d(<32 x i1>, <32 x i1>)
26
27define i32 @test_int_x86_avx512_kadd_q(<64 x i8> %A, <64 x i8> %B) nounwind {
28; X86-LABEL: test_int_x86_avx512_kadd_q:
29; X86:       # %bb.0: # %entry
30; X86-NEXT:    vptestmb %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc0]
31; X86-NEXT:    vptestmb %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x26,0xc9]
32; X86-NEXT:    kaddq %k1, %k0, %k0 # encoding: [0xc4,0xe1,0xfc,0x4a,0xc1]
33; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
34; X86-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
35; X86-NEXT:    kortestd %k1, %k0 # encoding: [0xc4,0xe1,0xf9,0x98,0xc1]
36; X86-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
37; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
38; X86-NEXT:    retl # encoding: [0xc3]
39;
40; X64-LABEL: test_int_x86_avx512_kadd_q:
41; X64:       # %bb.0: # %entry
42; X64-NEXT:    vptestmb %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc0]
43; X64-NEXT:    vptestmb %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x26,0xc9]
44; X64-NEXT:    kaddq %k1, %k0, %k0 # encoding: [0xc4,0xe1,0xfc,0x4a,0xc1]
45; X64-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
46; X64-NEXT:    kortestq %k0, %k0 # encoding: [0xc4,0xe1,0xf8,0x98,0xc0]
47; X64-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
48; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
49; X64-NEXT:    retq # encoding: [0xc3]
50entry:
51  %0 = icmp ne <64 x i8> %A, zeroinitializer
52  %1 = icmp ne <64 x i8> %B, zeroinitializer
53  %2 = call <64 x i1> @llvm.x86.avx512.kadd.q(<64 x i1> %0, <64 x i1> %1)
54  %3 = bitcast <64 x i1> %2 to i64
55  %4 = icmp eq i64 %3, 0
56  %5 = zext i1 %4 to i32
57  ret i32 %5
58}
59declare <64 x i1> @llvm.x86.avx512.kadd.q(<64 x i1>, <64 x i1>)
60
61define i32 @test_x86_avx512_ktestc_d(<32 x i16> %A, <32 x i16> %B) {
62; CHECK-LABEL: test_x86_avx512_ktestc_d:
63; CHECK:       # %bb.0:
64; CHECK-NEXT:    vptestmw %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x26,0xc0]
65; CHECK-NEXT:    vptestmw %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x26,0xc9]
66; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
67; CHECK-NEXT:    ktestd %k1, %k0 # encoding: [0xc4,0xe1,0xf9,0x99,0xc1]
68; CHECK-NEXT:    setb %al # encoding: [0x0f,0x92,0xc0]
69; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
70; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
71  %1 = icmp ne <32 x i16> %A, zeroinitializer
72  %2 = icmp ne <32 x i16> %B, zeroinitializer
73  %res = call i32 @llvm.x86.avx512.ktestc.d(<32 x i1> %1, <32 x i1> %2) ; <i32> [#uses=1]
74  ret i32 %res
75}
76declare i32 @llvm.x86.avx512.ktestc.d(<32 x i1>, <32 x i1>) nounwind readnone
77
78define i32 @test_x86_avx512_ktestz_d(<32 x i16> %A, <32 x i16> %B) {
79; CHECK-LABEL: test_x86_avx512_ktestz_d:
80; CHECK:       # %bb.0:
81; CHECK-NEXT:    vptestmw %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x26,0xc0]
82; CHECK-NEXT:    vptestmw %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x26,0xc9]
83; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
84; CHECK-NEXT:    ktestd %k1, %k0 # encoding: [0xc4,0xe1,0xf9,0x99,0xc1]
85; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
86; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
87; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
88  %1 = icmp ne <32 x i16> %A, zeroinitializer
89  %2 = icmp ne <32 x i16> %B, zeroinitializer
90  %res = call i32 @llvm.x86.avx512.ktestz.d(<32 x i1> %1, <32 x i1> %2) ; <i32> [#uses=1]
91  ret i32 %res
92}
93declare i32 @llvm.x86.avx512.ktestz.d(<32 x i1>, <32 x i1>) nounwind readnone
94
95define i32 @test_x86_avx512_ktestc_q(<64 x i8> %A, <64 x i8> %B) {
96; CHECK-LABEL: test_x86_avx512_ktestc_q:
97; CHECK:       # %bb.0:
98; CHECK-NEXT:    vptestmb %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc0]
99; CHECK-NEXT:    vptestmb %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x26,0xc9]
100; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
101; CHECK-NEXT:    ktestq %k1, %k0 # encoding: [0xc4,0xe1,0xf8,0x99,0xc1]
102; CHECK-NEXT:    setb %al # encoding: [0x0f,0x92,0xc0]
103; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
104; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
105  %1 = icmp ne <64 x i8> %A, zeroinitializer
106  %2 = icmp ne <64 x i8> %B, zeroinitializer
107  %res = call i32 @llvm.x86.avx512.ktestc.q(<64 x i1> %1, <64 x i1> %2) ; <i32> [#uses=1]
108  ret i32 %res
109}
110declare i32 @llvm.x86.avx512.ktestc.q(<64 x i1>, <64 x i1>) nounwind readnone
111
112define i32 @test_x86_avx512_ktestz_q(<64 x i8> %A, <64 x i8> %B) {
113; CHECK-LABEL: test_x86_avx512_ktestz_q:
114; CHECK:       # %bb.0:
115; CHECK-NEXT:    vptestmb %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc0]
116; CHECK-NEXT:    vptestmb %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x26,0xc9]
117; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
118; CHECK-NEXT:    ktestq %k1, %k0 # encoding: [0xc4,0xe1,0xf8,0x99,0xc1]
119; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
120; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
121; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
122  %1 = icmp ne <64 x i8> %A, zeroinitializer
123  %2 = icmp ne <64 x i8> %B, zeroinitializer
124  %res = call i32 @llvm.x86.avx512.ktestz.q(<64 x i1> %1, <64 x i1> %2) ; <i32> [#uses=1]
125  ret i32 %res
126}
127declare i32 @llvm.x86.avx512.ktestz.q(<64 x i1>, <64 x i1>) nounwind readnone
128
129define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
130; CHECK-LABEL: test_mask_packs_epi32_rr_512:
131; CHECK:       # %bb.0:
132; CHECK-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0xc1]
133; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
134  %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
135  ret <32 x i16> %1
136}
137
138define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
139; X86-LABEL: test_mask_packs_epi32_rrk_512:
140; X86:       # %bb.0:
141; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
142; X86-NEXT:    vpackssdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1]
143; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
144; X86-NEXT:    retl # encoding: [0xc3]
145;
146; X64-LABEL: test_mask_packs_epi32_rrk_512:
147; X64:       # %bb.0:
148; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
149; X64-NEXT:    vpackssdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1]
150; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
151; X64-NEXT:    retq # encoding: [0xc3]
152  %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
153  %2 = bitcast i32 %mask to <32 x i1>
154  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
155  ret <32 x i16> %3
156}
157
158define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
159; X86-LABEL: test_mask_packs_epi32_rrkz_512:
160; X86:       # %bb.0:
161; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
162; X86-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1]
163; X86-NEXT:    retl # encoding: [0xc3]
164;
165; X64-LABEL: test_mask_packs_epi32_rrkz_512:
166; X64:       # %bb.0:
167; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
168; X64-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1]
169; X64-NEXT:    retq # encoding: [0xc3]
170  %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
171  %2 = bitcast i32 %mask to <32 x i1>
172  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
173  ret <32 x i16> %3
174}
175
176define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, ptr %ptr_b) {
177; X86-LABEL: test_mask_packs_epi32_rm_512:
178; X86:       # %bb.0:
179; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
180; X86-NEXT:    vpackssdw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x00]
181; X86-NEXT:    retl # encoding: [0xc3]
182;
183; X64-LABEL: test_mask_packs_epi32_rm_512:
184; X64:       # %bb.0:
185; X64-NEXT:    vpackssdw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x07]
186; X64-NEXT:    retq # encoding: [0xc3]
187  %b = load <16 x i32>, ptr %ptr_b
188  %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
189  ret <32 x i16> %1
190}
191
192define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) {
193; X86-LABEL: test_mask_packs_epi32_rmk_512:
194; X86:       # %bb.0:
195; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
196; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
197; X86-NEXT:    vpackssdw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x08]
198; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
199; X86-NEXT:    retl # encoding: [0xc3]
200;
201; X64-LABEL: test_mask_packs_epi32_rmk_512:
202; X64:       # %bb.0:
203; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
204; X64-NEXT:    vpackssdw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x0f]
205; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
206; X64-NEXT:    retq # encoding: [0xc3]
207  %b = load <16 x i32>, ptr %ptr_b
208  %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
209  %2 = bitcast i32 %mask to <32 x i1>
210  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
211  ret <32 x i16> %3
212}
213
214define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, ptr %ptr_b, i32 %mask) {
215; X86-LABEL: test_mask_packs_epi32_rmkz_512:
216; X86:       # %bb.0:
217; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
218; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
219; X86-NEXT:    vpackssdw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x00]
220; X86-NEXT:    retl # encoding: [0xc3]
221;
222; X64-LABEL: test_mask_packs_epi32_rmkz_512:
223; X64:       # %bb.0:
224; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
225; X64-NEXT:    vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x07]
226; X64-NEXT:    retq # encoding: [0xc3]
227  %b = load <16 x i32>, ptr %ptr_b
228  %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
229  %2 = bitcast i32 %mask to <32 x i1>
230  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
231  ret <32 x i16> %3
232}
233
234define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, ptr %ptr_b) {
235; X86-LABEL: test_mask_packs_epi32_rmb_512:
236; X86:       # %bb.0:
237; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
238; X86-NEXT:    vpackssdw (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x00]
239; X86-NEXT:    retl # encoding: [0xc3]
240;
241; X64-LABEL: test_mask_packs_epi32_rmb_512:
242; X64:       # %bb.0:
243; X64-NEXT:    vpackssdw (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x07]
244; X64-NEXT:    retq # encoding: [0xc3]
245  %q = load i32, ptr %ptr_b
246  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
247  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
248  %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
249  ret <32 x i16> %1
250}
251
252define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) {
253; X86-LABEL: test_mask_packs_epi32_rmbk_512:
254; X86:       # %bb.0:
255; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
256; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
257; X86-NEXT:    vpackssdw (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x08]
258; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
259; X86-NEXT:    retl # encoding: [0xc3]
260;
261; X64-LABEL: test_mask_packs_epi32_rmbk_512:
262; X64:       # %bb.0:
263; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
264; X64-NEXT:    vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x0f]
265; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
266; X64-NEXT:    retq # encoding: [0xc3]
267  %q = load i32, ptr %ptr_b
268  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
269  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
270  %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
271  %2 = bitcast i32 %mask to <32 x i1>
272  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
273  ret <32 x i16> %3
274}
275
276define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, ptr %ptr_b, i32 %mask) {
277; X86-LABEL: test_mask_packs_epi32_rmbkz_512:
278; X86:       # %bb.0:
279; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
280; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
281; X86-NEXT:    vpackssdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x00]
282; X86-NEXT:    retl # encoding: [0xc3]
283;
284; X64-LABEL: test_mask_packs_epi32_rmbkz_512:
285; X64:       # %bb.0:
286; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
287; X64-NEXT:    vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x07]
288; X64-NEXT:    retq # encoding: [0xc3]
289  %q = load i32, ptr %ptr_b
290  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
291  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
292  %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
293  %2 = bitcast i32 %mask to <32 x i1>
294  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
295  ret <32 x i16> %3
296}
297
298declare <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32>, <16 x i32>)
299
300define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
301; CHECK-LABEL: test_mask_packs_epi16_rr_512:
302; CHECK:       # %bb.0:
303; CHECK-NEXT:    vpacksswb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0xc1]
304; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
305  %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
306  ret <64 x i8> %1
307}
308
309define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
310; X86-LABEL: test_mask_packs_epi16_rrk_512:
311; X86:       # %bb.0:
312; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
313; X86-NEXT:    vpacksswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0xd1]
314; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
315; X86-NEXT:    retl # encoding: [0xc3]
316;
317; X64-LABEL: test_mask_packs_epi16_rrk_512:
318; X64:       # %bb.0:
319; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
320; X64-NEXT:    vpacksswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0xd1]
321; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
322; X64-NEXT:    retq # encoding: [0xc3]
323  %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
324  %2 = bitcast i64 %mask to <64 x i1>
325  %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru
326  ret <64 x i8> %3
327}
328
329define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
330; X86-LABEL: test_mask_packs_epi16_rrkz_512:
331; X86:       # %bb.0:
332; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
333; X86-NEXT:    vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0xc1]
334; X86-NEXT:    retl # encoding: [0xc3]
335;
336; X64-LABEL: test_mask_packs_epi16_rrkz_512:
337; X64:       # %bb.0:
338; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
339; X64-NEXT:    vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0xc1]
340; X64-NEXT:    retq # encoding: [0xc3]
341  %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
342  %2 = bitcast i64 %mask to <64 x i1>
343  %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer
344  ret <64 x i8> %3
345}
346
347define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) {
348; X86-LABEL: test_mask_packs_epi16_rm_512:
349; X86:       # %bb.0:
350; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
351; X86-NEXT:    vpacksswb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0x00]
352; X86-NEXT:    retl # encoding: [0xc3]
353;
354; X64-LABEL: test_mask_packs_epi16_rm_512:
355; X64:       # %bb.0:
356; X64-NEXT:    vpacksswb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0x07]
357; X64-NEXT:    retq # encoding: [0xc3]
358  %b = load <32 x i16>, ptr %ptr_b
359  %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
360  ret <64 x i8> %1
361}
362
363define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <64 x i8> %passThru, i64 %mask) {
364; X86-LABEL: test_mask_packs_epi16_rmk_512:
365; X86:       # %bb.0:
366; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
367; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
368; X86-NEXT:    vpacksswb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0x08]
369; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
370; X86-NEXT:    retl # encoding: [0xc3]
371;
372; X64-LABEL: test_mask_packs_epi16_rmk_512:
373; X64:       # %bb.0:
374; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
375; X64-NEXT:    vpacksswb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0x0f]
376; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
377; X64-NEXT:    retq # encoding: [0xc3]
378  %b = load <32 x i16>, ptr %ptr_b
379  %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
380  %2 = bitcast i64 %mask to <64 x i1>
381  %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru
382  ret <64 x i8> %3
383}
384
385define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i64 %mask) {
386; X86-LABEL: test_mask_packs_epi16_rmkz_512:
387; X86:       # %bb.0:
388; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
389; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
390; X86-NEXT:    vpacksswb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0x00]
391; X86-NEXT:    retl # encoding: [0xc3]
392;
393; X64-LABEL: test_mask_packs_epi16_rmkz_512:
394; X64:       # %bb.0:
395; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
396; X64-NEXT:    vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0x07]
397; X64-NEXT:    retq # encoding: [0xc3]
398  %b = load <32 x i16>, ptr %ptr_b
399  %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
400  %2 = bitcast i64 %mask to <64 x i1>
401  %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer
402  ret <64 x i8> %3
403}
404
405declare <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16>, <32 x i16>)
406
407
408define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
409; CHECK-LABEL: test_mask_packus_epi32_rr_512:
410; CHECK:       # %bb.0:
411; CHECK-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0xc1]
412; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
413  %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
414  ret <32 x i16> %1
415}
416
417define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
418; X86-LABEL: test_mask_packus_epi32_rrk_512:
419; X86:       # %bb.0:
420; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
421; X86-NEXT:    vpackusdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0xd1]
422; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
423; X86-NEXT:    retl # encoding: [0xc3]
424;
425; X64-LABEL: test_mask_packus_epi32_rrk_512:
426; X64:       # %bb.0:
427; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
428; X64-NEXT:    vpackusdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0xd1]
429; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
430; X64-NEXT:    retq # encoding: [0xc3]
431  %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
432  %2 = bitcast i32 %mask to <32 x i1>
433  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
434  ret <32 x i16> %3
435}
436
437define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
438; X86-LABEL: test_mask_packus_epi32_rrkz_512:
439; X86:       # %bb.0:
440; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
441; X86-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0xc1]
442; X86-NEXT:    retl # encoding: [0xc3]
443;
444; X64-LABEL: test_mask_packus_epi32_rrkz_512:
445; X64:       # %bb.0:
446; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
447; X64-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0xc1]
448; X64-NEXT:    retq # encoding: [0xc3]
449  %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
450  %2 = bitcast i32 %mask to <32 x i1>
451  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
452  ret <32 x i16> %3
453}
454
455define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, ptr %ptr_b) {
456; X86-LABEL: test_mask_packus_epi32_rm_512:
457; X86:       # %bb.0:
458; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
459; X86-NEXT:    vpackusdw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0x00]
460; X86-NEXT:    retl # encoding: [0xc3]
461;
462; X64-LABEL: test_mask_packus_epi32_rm_512:
463; X64:       # %bb.0:
464; X64-NEXT:    vpackusdw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0x07]
465; X64-NEXT:    retq # encoding: [0xc3]
466  %b = load <16 x i32>, ptr %ptr_b
467  %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
468  ret <32 x i16> %1
469}
470
471define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) {
472; X86-LABEL: test_mask_packus_epi32_rmk_512:
473; X86:       # %bb.0:
474; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
475; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
476; X86-NEXT:    vpackusdw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0x08]
477; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
478; X86-NEXT:    retl # encoding: [0xc3]
479;
480; X64-LABEL: test_mask_packus_epi32_rmk_512:
481; X64:       # %bb.0:
482; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
483; X64-NEXT:    vpackusdw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0x0f]
484; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
485; X64-NEXT:    retq # encoding: [0xc3]
486  %b = load <16 x i32>, ptr %ptr_b
487  %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
488  %2 = bitcast i32 %mask to <32 x i1>
489  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
490  ret <32 x i16> %3
491}
492
493define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, ptr %ptr_b, i32 %mask) {
494; X86-LABEL: test_mask_packus_epi32_rmkz_512:
495; X86:       # %bb.0:
496; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
497; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
498; X86-NEXT:    vpackusdw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0x00]
499; X86-NEXT:    retl # encoding: [0xc3]
500;
501; X64-LABEL: test_mask_packus_epi32_rmkz_512:
502; X64:       # %bb.0:
503; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
504; X64-NEXT:    vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0x07]
505; X64-NEXT:    retq # encoding: [0xc3]
506  %b = load <16 x i32>, ptr %ptr_b
507  %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
508  %2 = bitcast i32 %mask to <32 x i1>
509  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
510  ret <32 x i16> %3
511}
512
513define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, ptr %ptr_b) {
514; X86-LABEL: test_mask_packus_epi32_rmb_512:
515; X86:       # %bb.0:
516; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
517; X86-NEXT:    vpackusdw (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x58,0x2b,0x00]
518; X86-NEXT:    retl # encoding: [0xc3]
519;
520; X64-LABEL: test_mask_packus_epi32_rmb_512:
521; X64:       # %bb.0:
522; X64-NEXT:    vpackusdw (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x58,0x2b,0x07]
523; X64-NEXT:    retq # encoding: [0xc3]
524  %q = load i32, ptr %ptr_b
525  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
526  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
527  %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
528  ret <32 x i16> %1
529}
530
531define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) {
532; X86-LABEL: test_mask_packus_epi32_rmbk_512:
533; X86:       # %bb.0:
534; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
535; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
536; X86-NEXT:    vpackusdw (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x59,0x2b,0x08]
537; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
538; X86-NEXT:    retl # encoding: [0xc3]
539;
540; X64-LABEL: test_mask_packus_epi32_rmbk_512:
541; X64:       # %bb.0:
542; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
543; X64-NEXT:    vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x59,0x2b,0x0f]
544; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
545; X64-NEXT:    retq # encoding: [0xc3]
546  %q = load i32, ptr %ptr_b
547  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
548  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
549  %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
550  %2 = bitcast i32 %mask to <32 x i1>
551  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
552  ret <32 x i16> %3
553}
554
555define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, ptr %ptr_b, i32 %mask) {
556; X86-LABEL: test_mask_packus_epi32_rmbkz_512:
557; X86:       # %bb.0:
558; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
559; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
560; X86-NEXT:    vpackusdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xd9,0x2b,0x00]
561; X86-NEXT:    retl # encoding: [0xc3]
562;
563; X64-LABEL: test_mask_packus_epi32_rmbkz_512:
564; X64:       # %bb.0:
565; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
566; X64-NEXT:    vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xd9,0x2b,0x07]
567; X64-NEXT:    retq # encoding: [0xc3]
568  %q = load i32, ptr %ptr_b
569  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
570  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
571  %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
572  %2 = bitcast i32 %mask to <32 x i1>
573  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
574  ret <32 x i16> %3
575}
576
577declare <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32>, <16 x i32>)
578
579define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
580; CHECK-LABEL: test_mask_packus_epi16_rr_512:
581; CHECK:       # %bb.0:
582; CHECK-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0xc1]
583; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
584  %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
585  ret <64 x i8> %1
586}
587
588define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
589; X86-LABEL: test_mask_packus_epi16_rrk_512:
590; X86:       # %bb.0:
591; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
592; X86-NEXT:    vpackuswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0xd1]
593; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
594; X86-NEXT:    retl # encoding: [0xc3]
595;
596; X64-LABEL: test_mask_packus_epi16_rrk_512:
597; X64:       # %bb.0:
598; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
599; X64-NEXT:    vpackuswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0xd1]
600; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
601; X64-NEXT:    retq # encoding: [0xc3]
602  %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
603  %2 = bitcast i64 %mask to <64 x i1>
604  %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru
605  ret <64 x i8> %3
606}
607
608define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
609; X86-LABEL: test_mask_packus_epi16_rrkz_512:
610; X86:       # %bb.0:
611; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
612; X86-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0xc1]
613; X86-NEXT:    retl # encoding: [0xc3]
614;
615; X64-LABEL: test_mask_packus_epi16_rrkz_512:
616; X64:       # %bb.0:
617; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
618; X64-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0xc1]
619; X64-NEXT:    retq # encoding: [0xc3]
620  %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
621  %2 = bitcast i64 %mask to <64 x i1>
622  %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer
623  ret <64 x i8> %3
624}
625
626define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) {
627; X86-LABEL: test_mask_packus_epi16_rm_512:
628; X86:       # %bb.0:
629; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
630; X86-NEXT:    vpackuswb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0x00]
631; X86-NEXT:    retl # encoding: [0xc3]
632;
633; X64-LABEL: test_mask_packus_epi16_rm_512:
634; X64:       # %bb.0:
635; X64-NEXT:    vpackuswb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0x07]
636; X64-NEXT:    retq # encoding: [0xc3]
637  %b = load <32 x i16>, ptr %ptr_b
638  %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
639  ret <64 x i8> %1
640}
641
642define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <64 x i8> %passThru, i64 %mask) {
643; X86-LABEL: test_mask_packus_epi16_rmk_512:
644; X86:       # %bb.0:
645; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
646; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
647; X86-NEXT:    vpackuswb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0x08]
648; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
649; X86-NEXT:    retl # encoding: [0xc3]
650;
651; X64-LABEL: test_mask_packus_epi16_rmk_512:
652; X64:       # %bb.0:
653; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
654; X64-NEXT:    vpackuswb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0x0f]
655; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
656; X64-NEXT:    retq # encoding: [0xc3]
657  %b = load <32 x i16>, ptr %ptr_b
658  %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
659  %2 = bitcast i64 %mask to <64 x i1>
660  %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru
661  ret <64 x i8> %3
662}
663
664define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i64 %mask) {
665; X86-LABEL: test_mask_packus_epi16_rmkz_512:
666; X86:       # %bb.0:
667; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
668; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
669; X86-NEXT:    vpackuswb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0x00]
670; X86-NEXT:    retl # encoding: [0xc3]
671;
672; X64-LABEL: test_mask_packus_epi16_rmkz_512:
673; X64:       # %bb.0:
674; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
675; X64-NEXT:    vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0x07]
676; X64-NEXT:    retq # encoding: [0xc3]
677  %b = load <32 x i16>, ptr %ptr_b
678  %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
679  %2 = bitcast i64 %mask to <64 x i1>
680  %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer
681  ret <64 x i8> %3
682}
683
684declare <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16>, <32 x i16>)
685
686define <32 x i16>@test_int_x86_avx512_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) {
687; CHECK-LABEL: test_int_x86_avx512_vpermt2var_hi_512:
688; CHECK:       # %bb.0:
689; CHECK-NEXT:    vpermi2w %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x75,0xc2]
690; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
691  %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2)
692  ret <32 x i16> %1
693}
694
695define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
696; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
697; X86:       # %bb.0:
698; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
699; X86-NEXT:    vpermt2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x7d,0xca]
700; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
701; X86-NEXT:    retl # encoding: [0xc3]
702;
703; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
704; X64:       # %bb.0:
705; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
706; X64-NEXT:    vpermt2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x7d,0xca]
707; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
708; X64-NEXT:    retq # encoding: [0xc3]
709  %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2)
710  %2 = bitcast i32 %x3 to <32 x i1>
711  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x1
712  ret <32 x i16> %3
713}
714
715define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
716; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
717; X86:       # %bb.0:
718; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
719; X86-NEXT:    vpermi2w %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x75,0xc2]
720; X86-NEXT:    retl # encoding: [0xc3]
721;
722; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
723; X64:       # %bb.0:
724; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
725; X64-NEXT:    vpermi2w %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x75,0xc2]
726; X64-NEXT:    retq # encoding: [0xc3]
727  %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2)
728  %2 = bitcast i32 %x3 to <32 x i1>
729  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
730  ret <32 x i16> %3
731}
732
733declare <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>)
734
735define <32 x i16>@test_int_x86_avx512_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) {
736; CHECK-LABEL: test_int_x86_avx512_vpermi2var_hi_512:
737; CHECK:       # %bb.0:
738; CHECK-NEXT:    vpermt2w %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x7d,0xc2]
739; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
740  %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2)
741  ret <32 x i16> %1
742}
743
744define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
745; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
746; X86:       # %bb.0:
747; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
748; X86-NEXT:    vpermi2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x75,0xca]
749; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
750; X86-NEXT:    retl # encoding: [0xc3]
751;
752; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
753; X64:       # %bb.0:
754; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
755; X64-NEXT:    vpermi2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x75,0xca]
756; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
757; X64-NEXT:    retq # encoding: [0xc3]
758  %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2)
759  %2 = bitcast i32 %x3 to <32 x i1>
760  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x1
761  ret <32 x i16> %3
762}
763
764declare <64 x i8> @llvm.x86.avx512.pavg.b.512(<64 x i8>, <64 x i8>)
765
766define <64 x i8> @test_int_x86_avx512_pavg_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) {
767; CHECK-LABEL: test_int_x86_avx512_pavg_b_512:
768; CHECK:       # %bb.0:
769; CHECK-NEXT:    vpavgb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe0,0xc1]
770; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
771  %1 = call <64 x i8> @llvm.x86.avx512.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1)
772  ret <64 x i8> %1
773}
774
775define <64 x i8> @test_int_x86_avx512_mask_pavg_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
776; X86-LABEL: test_int_x86_avx512_mask_pavg_b_512:
777; X86:       # %bb.0:
778; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
779; X86-NEXT:    vpavgb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe0,0xd1]
780; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
781; X86-NEXT:    retl # encoding: [0xc3]
782;
783; X64-LABEL: test_int_x86_avx512_mask_pavg_b_512:
784; X64:       # %bb.0:
785; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
786; X64-NEXT:    vpavgb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe0,0xd1]
787; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
788; X64-NEXT:    retq # encoding: [0xc3]
789  %1 = call <64 x i8> @llvm.x86.avx512.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1)
790  %2 = bitcast i64 %x3 to <64 x i1>
791  %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %x2
792  ret <64 x i8> %3
793}
794
795declare <32 x i16> @llvm.x86.avx512.pavg.w.512(<32 x i16>, <32 x i16>)
796
797define <32 x i16> @test_int_x86_avx512_pavg_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) {
798; CHECK-LABEL: test_int_x86_avx512_pavg_w_512:
799; CHECK:       # %bb.0:
800; CHECK-NEXT:    vpavgw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe3,0xc1]
801; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
802  %1 = call <32 x i16> @llvm.x86.avx512.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1)
803  ret <32 x i16> %1
804}
805
806define <32 x i16> @test_int_x86_avx512_mask_pavg_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
807; X86-LABEL: test_int_x86_avx512_mask_pavg_w_512:
808; X86:       # %bb.0:
809; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
810; X86-NEXT:    vpavgw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe3,0xd1]
811; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
812; X86-NEXT:    retl # encoding: [0xc3]
813;
814; X64-LABEL: test_int_x86_avx512_mask_pavg_w_512:
815; X64:       # %bb.0:
816; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
817; X64-NEXT:    vpavgw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe3,0xd1]
818; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
819; X64-NEXT:    retq # encoding: [0xc3]
820  %1 = call <32 x i16> @llvm.x86.avx512.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1)
821  %2 = bitcast i32 %x3 to <32 x i1>
822  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
823  ret <32 x i16> %3
824}
825
826declare <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8>, <64 x i8>)
827
828define <64 x i8>@test_int_x86_avx512_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1) {
829; CHECK-LABEL: test_int_x86_avx512_pshuf_b_512:
830; CHECK:       # %bb.0:
831; CHECK-NEXT:    vpshufb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x00,0xc1]
832; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
833  %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1)
834  ret <64 x i8> %res
835}
836
837define <64 x i8>@test_int_x86_avx512_pshuf_b_512_mask(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %mask) {
838; X86-LABEL: test_int_x86_avx512_pshuf_b_512_mask:
839; X86:       # %bb.0:
840; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
841; X86-NEXT:    vpshufb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x00,0xd1]
842; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
843; X86-NEXT:    retl # encoding: [0xc3]
844;
845; X64-LABEL: test_int_x86_avx512_pshuf_b_512_mask:
846; X64:       # %bb.0:
847; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
848; X64-NEXT:    vpshufb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x00,0xd1]
849; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
850; X64-NEXT:    retq # encoding: [0xc3]
851  %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1)
852  %mask.cast = bitcast i64 %mask to <64 x i1>
853  %res2 = select <64 x i1> %mask.cast, <64 x i8> %res, <64 x i8> %x2
854  ret <64 x i8> %res2
855}
856
857define <64 x i8>@test_int_x86_avx512_pshuf_b_512_maskz(<64 x i8> %x0, <64 x i8> %x1, i64 %mask) {
858; X86-LABEL: test_int_x86_avx512_pshuf_b_512_maskz:
859; X86:       # %bb.0:
860; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
861; X86-NEXT:    vpshufb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x00,0xc1]
862; X86-NEXT:    retl # encoding: [0xc3]
863;
864; X64-LABEL: test_int_x86_avx512_pshuf_b_512_maskz:
865; X64:       # %bb.0:
866; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
867; X64-NEXT:    vpshufb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x00,0xc1]
868; X64-NEXT:    retq # encoding: [0xc3]
869  %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1)
870  %mask.cast = bitcast i64 %mask to <64 x i1>
871  %res2 = select <64 x i1> %mask.cast, <64 x i8> %res, <64 x i8> zeroinitializer
872  ret <64 x i8> %res2
873}
874
875declare <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16>, <32 x i16>)
876
877define <32 x i16> @test_int_x86_avx512_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) {
878; CHECK-LABEL: test_int_x86_avx512_pmulhu_w_512:
879; CHECK:       # %bb.0:
880; CHECK-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe4,0xc1]
881; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
882  %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1)
883  ret <32 x i16> %1
884}
885
886define <32 x i16> @test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
887; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_512:
888; X86:       # %bb.0:
889; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
890; X86-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe4,0xd1]
891; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
892; X86-NEXT:    retl # encoding: [0xc3]
893;
894; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_512:
895; X64:       # %bb.0:
896; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
897; X64-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe4,0xd1]
898; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
899; X64-NEXT:    retq # encoding: [0xc3]
900  %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1)
901  %2 = bitcast i32 %x3 to <32 x i1>
902  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
903  ret <32 x i16> %3
904}
905
906declare <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16>, <32 x i16>)
907
908define <32 x i16> @test_int_x86_avx512_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) {
909; CHECK-LABEL: test_int_x86_avx512_pmulh_w_512:
910; CHECK:       # %bb.0:
911; CHECK-NEXT:    vpmulhw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe5,0xc1]
912; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
913  %1 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1)
914  ret <32 x i16> %1
915}
916
917define <32 x i16> @test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
918; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_512:
919; X86:       # %bb.0:
920; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
921; X86-NEXT:    vpmulhw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe5,0xd1]
922; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
923; X86-NEXT:    retl # encoding: [0xc3]
924;
925; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_512:
926; X64:       # %bb.0:
927; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
928; X64-NEXT:    vpmulhw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe5,0xd1]
929; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
930; X64-NEXT:    retq # encoding: [0xc3]
931  %1 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1)
932  %2 = bitcast i32 %x3 to <32 x i1>
933  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
934  ret <32 x i16> %3
935}
936
937declare <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16>, <32 x i16>)
938
939define <32 x i16> @test_int_x86_avx512_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) {
940; CHECK-LABEL: test_int_x86_avx512_pmulhr_sw_512:
941; CHECK:       # %bb.0:
942; CHECK-NEXT:    vpmulhrsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x0b,0xc1]
943; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
944  %1 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1)
945  ret <32 x i16> %1
946}
947
948define <32 x i16> @test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
949; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512:
950; X86:       # %bb.0:
951; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
952; X86-NEXT:    vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x0b,0xd1]
953; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
954; X86-NEXT:    retl # encoding: [0xc3]
955;
956; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512:
957; X64:       # %bb.0:
958; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
959; X64-NEXT:    vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x0b,0xd1]
960; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
961; X64-NEXT:    retq # encoding: [0xc3]
962  %1 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1)
963  %2 = bitcast i32 %x3 to <32 x i1>
964  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
965  ret <32 x i16> %3
966}
967
968define <32 x i8>@test_int_x86_avx512_pmov_wb_512(<32 x i16> %x0) {
969; CHECK-LABEL: test_int_x86_avx512_pmov_wb_512:
970; CHECK:       # %bb.0:
971; CHECK-NEXT:    vpmovwb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x30,0xc0]
972; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
973  %1 = trunc <32 x i16> %x0 to <32 x i8>
974  ret <32 x i8> %1
975}
976
977define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
978; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
979; X86:       # %bb.0:
980; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
981; X86-NEXT:    vpmovwb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0xc1]
982; X86-NEXT:    vmovdqa %ymm1, %ymm0 # encoding: [0xc5,0xfd,0x6f,0xc1]
983; X86-NEXT:    retl # encoding: [0xc3]
984;
985; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
986; X64:       # %bb.0:
987; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
988; X64-NEXT:    vpmovwb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0xc1]
989; X64-NEXT:    vmovdqa %ymm1, %ymm0 # encoding: [0xc5,0xfd,0x6f,0xc1]
990; X64-NEXT:    retq # encoding: [0xc3]
991  %1 = trunc <32 x i16> %x0 to <32 x i8>
992  %2 = bitcast i32 %x2 to <32 x i1>
993  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %x1
994  ret <32 x i8> %3
995}
996
997define <32 x i8>@test_int_x86_avx512_maskz_pmov_wb_512(<32 x i16> %x0, i32 %x2) {
998; X86-LABEL: test_int_x86_avx512_maskz_pmov_wb_512:
999; X86:       # %bb.0:
1000; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1001; X86-NEXT:    vpmovwb %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x30,0xc0]
1002; X86-NEXT:    retl # encoding: [0xc3]
1003;
1004; X64-LABEL: test_int_x86_avx512_maskz_pmov_wb_512:
1005; X64:       # %bb.0:
1006; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1007; X64-NEXT:    vpmovwb %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x30,0xc0]
1008; X64-NEXT:    retq # encoding: [0xc3]
1009  %1 = trunc <32 x i16> %x0 to <32 x i8>
1010  %2 = bitcast i32 %x2 to <32 x i1>
1011  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
1012  ret <32 x i8> %3
1013}
1014
1015declare void @llvm.x86.avx512.mask.pmov.wb.mem.512(ptr %ptr, <32 x i16>, i32)
1016
1017define void @test_int_x86_avx512_mask_pmov_wb_mem_512(ptr %ptr, <32 x i16> %x1, i32 %x2) {
1018; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512:
1019; X86:       # %bb.0:
1020; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1021; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1022; X86-NEXT:    vpmovwb %zmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x48,0x30,0x00]
1023; X86-NEXT:    vpmovwb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0x00]
1024; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1025; X86-NEXT:    retl # encoding: [0xc3]
1026;
1027; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512:
1028; X64:       # %bb.0:
1029; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1030; X64-NEXT:    vpmovwb %zmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x48,0x30,0x07]
1031; X64-NEXT:    vpmovwb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0x07]
1032; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1033; X64-NEXT:    retq # encoding: [0xc3]
1034    call void @llvm.x86.avx512.mask.pmov.wb.mem.512(ptr %ptr, <32 x i16> %x1, i32 -1)
1035    call void @llvm.x86.avx512.mask.pmov.wb.mem.512(ptr %ptr, <32 x i16> %x1, i32 %x2)
1036    ret void
1037}
1038
1039declare <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16>, <32 x i8>, i32)
1040
1041define <32 x i8>@test_int_x86_avx512_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1) {
1042; CHECK-LABEL: test_int_x86_avx512_pmovs_wb_512:
1043; CHECK:       # %bb.0:
1044; CHECK-NEXT:    vpmovswb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x20,0xc0]
1045; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1046  %res = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
1047  ret <32 x i8> %res
1048}
1049
1050define <32 x i8>@test_int_x86_avx512_mask_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
1051; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_512:
1052; X86:       # %bb.0:
1053; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1054; X86-NEXT:    vpmovswb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x20,0xc1]
1055; X86-NEXT:    vmovdqa %ymm1, %ymm0 # encoding: [0xc5,0xfd,0x6f,0xc1]
1056; X86-NEXT:    retl # encoding: [0xc3]
1057;
1058; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_512:
1059; X64:       # %bb.0:
1060; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1061; X64-NEXT:    vpmovswb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x20,0xc1]
1062; X64-NEXT:    vmovdqa %ymm1, %ymm0 # encoding: [0xc5,0xfd,0x6f,0xc1]
1063; X64-NEXT:    retq # encoding: [0xc3]
1064  %res = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
1065  ret <32 x i8> %res
1066}
1067
1068define <32 x i8>@test_int_x86_avx512_maskz_pmovs_wb_512(<32 x i16> %x0, i32 %x2) {
1069; X86-LABEL: test_int_x86_avx512_maskz_pmovs_wb_512:
1070; X86:       # %bb.0:
1071; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1072; X86-NEXT:    vpmovswb %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x20,0xc0]
1073; X86-NEXT:    retl # encoding: [0xc3]
1074;
1075; X64-LABEL: test_int_x86_avx512_maskz_pmovs_wb_512:
1076; X64:       # %bb.0:
1077; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1078; X64-NEXT:    vpmovswb %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x20,0xc0]
1079; X64-NEXT:    retq # encoding: [0xc3]
1080  %res = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
1081  ret <32 x i8> %res
1082}
1083
1084declare void @llvm.x86.avx512.mask.pmovs.wb.mem.512(ptr %ptr, <32 x i16>, i32)
1085
1086define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(ptr %ptr, <32 x i16> %x1, i32 %x2) {
1087; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512:
1088; X86:       # %bb.0:
1089; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1090; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1091; X86-NEXT:    vpmovswb %zmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x48,0x20,0x00]
1092; X86-NEXT:    vpmovswb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x20,0x00]
1093; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1094; X86-NEXT:    retl # encoding: [0xc3]
1095;
1096; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512:
1097; X64:       # %bb.0:
1098; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1099; X64-NEXT:    vpmovswb %zmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x48,0x20,0x07]
1100; X64-NEXT:    vpmovswb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x20,0x07]
1101; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1102; X64-NEXT:    retq # encoding: [0xc3]
1103    call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(ptr %ptr, <32 x i16> %x1, i32 -1)
1104    call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(ptr %ptr, <32 x i16> %x1, i32 %x2)
1105    ret void
1106}
1107
1108declare <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16>, <32 x i8>, i32)
1109
1110define <32 x i8>@test_int_x86_avx512_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1) {
1111; CHECK-LABEL: test_int_x86_avx512_pmovus_wb_512:
1112; CHECK:       # %bb.0:
1113; CHECK-NEXT:    vpmovuswb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x10,0xc0]
1114; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1115  %res = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
1116  ret <32 x i8> %res
1117}
1118
1119define <32 x i8>@test_int_x86_avx512_mask_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
1120; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_512:
1121; X86:       # %bb.0:
1122; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1123; X86-NEXT:    vpmovuswb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x10,0xc1]
1124; X86-NEXT:    vmovdqa %ymm1, %ymm0 # encoding: [0xc5,0xfd,0x6f,0xc1]
1125; X86-NEXT:    retl # encoding: [0xc3]
1126;
1127; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_512:
1128; X64:       # %bb.0:
1129; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1130; X64-NEXT:    vpmovuswb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x10,0xc1]
1131; X64-NEXT:    vmovdqa %ymm1, %ymm0 # encoding: [0xc5,0xfd,0x6f,0xc1]
1132; X64-NEXT:    retq # encoding: [0xc3]
1133  %res = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
1134  ret <32 x i8> %res
1135}
1136
1137define <32 x i8>@test_int_x86_avx512_maskz_pmovus_wb_512(<32 x i16> %x0, i32 %x2) {
1138; X86-LABEL: test_int_x86_avx512_maskz_pmovus_wb_512:
1139; X86:       # %bb.0:
1140; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1141; X86-NEXT:    vpmovuswb %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x10,0xc0]
1142; X86-NEXT:    retl # encoding: [0xc3]
1143;
1144; X64-LABEL: test_int_x86_avx512_maskz_pmovus_wb_512:
1145; X64:       # %bb.0:
1146; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1147; X64-NEXT:    vpmovuswb %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x10,0xc0]
1148; X64-NEXT:    retq # encoding: [0xc3]
1149  %res = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
1150  ret <32 x i8> %res
1151}
1152
1153declare void @llvm.x86.avx512.mask.pmovus.wb.mem.512(ptr %ptr, <32 x i16>, i32)
1154
1155define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(ptr %ptr, <32 x i16> %x1, i32 %x2) {
1156; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512:
1157; X86:       # %bb.0:
1158; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1159; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1160; X86-NEXT:    vpmovuswb %zmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x48,0x10,0x00]
1161; X86-NEXT:    vpmovuswb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x10,0x00]
1162; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1163; X86-NEXT:    retl # encoding: [0xc3]
1164;
1165; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512:
1166; X64:       # %bb.0:
1167; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1168; X64-NEXT:    vpmovuswb %zmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x48,0x10,0x07]
1169; X64-NEXT:    vpmovuswb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x10,0x07]
1170; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1171; X64-NEXT:    retq # encoding: [0xc3]
1172    call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(ptr %ptr, <32 x i16> %x1, i32 -1)
1173    call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(ptr %ptr, <32 x i16> %x1, i32 %x2)
1174    ret void
1175}
1176
1177declare <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8>, <64 x i8>)
1178
1179define <32 x i16> @test_int_x86_avx512_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1) {
1180; CHECK-LABEL: test_int_x86_avx512_pmaddubs_w_512:
1181; CHECK:       # %bb.0:
1182; CHECK-NEXT:    vpmaddubsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x04,0xc1]
1183; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1184  %1 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1)
1185  ret <32 x i16> %1
1186}
1187
1188define <32 x i16> @test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) {
1189; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
1190; X86:       # %bb.0:
1191; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1192; X86-NEXT:    vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x04,0xd1]
1193; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1194; X86-NEXT:    retl # encoding: [0xc3]
1195;
1196; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
1197; X64:       # %bb.0:
1198; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1199; X64-NEXT:    vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x04,0xd1]
1200; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1201; X64-NEXT:    retq # encoding: [0xc3]
1202  %1 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1)
1203  %2 = bitcast i32 %x3 to <32 x i1>
1204  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
1205  ret <32 x i16> %3
1206}
1207
1208declare <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16>, <32 x i16>)
1209
1210define <16 x i32> @test_int_x86_avx512_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1) {
1211; CHECK-LABEL: test_int_x86_avx512_pmaddw_d_512:
1212; CHECK:       # %bb.0:
1213; CHECK-NEXT:    vpmaddwd %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf5,0xc1]
1214; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1215  %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1)
1216  ret <16 x i32> %1
1217}
1218
1219define <16 x i32> @test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) {
1220; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
1221; X86:       # %bb.0:
1222; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1223; X86-NEXT:    vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf5,0xd1]
1224; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1225; X86-NEXT:    retl # encoding: [0xc3]
1226;
1227; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
1228; X64:       # %bb.0:
1229; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1230; X64-NEXT:    vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf5,0xd1]
1231; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1232; X64-NEXT:    retq # encoding: [0xc3]
1233  %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1)
1234  %2 = bitcast i16 %x3 to <16 x i1>
1235  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
1236  ret <16 x i32> %3
1237}
1238
1239declare <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8>, <64 x i8>, i32)
1240
1241define { <32 x i16>, <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) {
1242; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:
1243; X86:       # %bb.0:
1244; X86-NEXT:    vmovdqa64 %zmm2, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe2]
1245; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1246; X86-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xe1,0x02]
1247; X86-NEXT:    vdbpsadbw $3, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xd9,0x03]
1248; X86-NEXT:    vdbpsadbw $4, %zmm1, %zmm0, %zmm2 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd1,0x04]
1249; X86-NEXT:    vmovdqa64 %zmm4, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc4]
1250; X86-NEXT:    vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb]
1251; X86-NEXT:    retl # encoding: [0xc3]
1252;
1253; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:
1254; X64:       # %bb.0:
1255; X64-NEXT:    vmovdqa64 %zmm2, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe2]
1256; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1257; X64-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xe1,0x02]
1258; X64-NEXT:    vdbpsadbw $3, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xd9,0x03]
1259; X64-NEXT:    vdbpsadbw $4, %zmm1, %zmm0, %zmm2 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd1,0x04]
1260; X64-NEXT:    vmovdqa64 %zmm4, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc4]
1261; X64-NEXT:    vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb]
1262; X64-NEXT:    retq # encoding: [0xc3]
1263  %1 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2)
1264  %2 = bitcast i32 %x4 to <32 x i1>
1265  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x3
1266  %4 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 3)
1267  %5 = bitcast i32 %x4 to <32 x i1>
1268  %6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> zeroinitializer
1269  %7 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 4)
1270  %res1 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> %3, 0
1271  %res2 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> }  %res1, <32 x i16> %6, 1
1272  %res3 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> }  %res2, <32 x i16> %7, 2
1273  ret { <32 x i16>, <32 x i16>, <32 x i16> } %res3
1274}
1275
1276declare  <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8>, <64 x i8>)
1277
1278define { <8 x i64>, <8 x i64> } @test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2){
1279; CHECK-LABEL: test_int_x86_avx512_mask_psadb_w_512:
1280; CHECK:       # %bb.0:
1281; CHECK-NEXT:    vpsadbw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xf6,0xd9]
1282; CHECK-NEXT:    vpsadbw %zmm2, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x7d,0x48,0xf6,0xca]
1283; CHECK-NEXT:    vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
1284; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1285  %res0 = call  <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1)
1286  %res1 = call  <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x2)
1287  %res2 = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> %res0, 0
1288  %res3 = insertvalue { <8 x i64>, <8 x i64> }  %res2, <8 x i64> %res1, 1
1289  ret { <8 x i64>, <8 x i64> } %res3
1290}
1291
1292declare <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16>, <32 x i16>) nounwind readnone
1293
1294define <32 x i16> @test_x86_avx512_psrlv_w_512_const() optsize {
1295; X86-LABEL: test_x86_avx512_psrlv_w_512_const:
1296; X86:       # %bb.0:
1297; X86-NEXT:    vpmovsxbw {{.*#+}} zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
1298; X86-NEXT:    # encoding: [0x62,0xf2,0x7d,0x48,0x20,0x05,A,A,A,A]
1299; X86-NEXT:    # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1300; X86-NEXT:    vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0x05,A,A,A,A]
1301; X86-NEXT:    # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1302; X86-NEXT:    retl # encoding: [0xc3]
1303;
1304; X64-LABEL: test_x86_avx512_psrlv_w_512_const:
1305; X64:       # %bb.0:
1306; X64-NEXT:    vpmovsxbw {{.*#+}} zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
1307; X64-NEXT:    # encoding: [0x62,0xf2,0x7d,0x48,0x20,0x05,A,A,A,A]
1308; X64-NEXT:    # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1309; X64-NEXT:    vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0x05,A,A,A,A]
1310; X64-NEXT:    # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1311; X64-NEXT:    retq # encoding: [0xc3]
1312  %res1 = call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1,  i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>)
1313  ret <32 x i16> %res1
1314}
1315
1316define <32 x i16>@test_int_x86_avx512_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1) {
1317; CHECK-LABEL: test_int_x86_avx512_psrlv32hi:
1318; CHECK:       # %bb.0:
1319; CHECK-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0xc1]
1320; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1321  %1 = call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %x0, <32 x i16> %x1)
1322  ret <32 x i16> %1
1323}
1324
1325define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1326; X86-LABEL: test_int_x86_avx512_mask_psrlv32hi:
1327; X86:       # %bb.0:
1328; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1329; X86-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x10,0xd1]
1330; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1331; X86-NEXT:    retl # encoding: [0xc3]
1332;
1333; X64-LABEL: test_int_x86_avx512_mask_psrlv32hi:
1334; X64:       # %bb.0:
1335; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1336; X64-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x10,0xd1]
1337; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1338; X64-NEXT:    retq # encoding: [0xc3]
1339  %1 = call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %x0, <32 x i16> %x1)
1340  %2 = bitcast i32 %x3 to <32 x i1>
1341  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
1342  ret <32 x i16> %3
1343}
1344
1345define <32 x i16>@test_int_x86_avx512_maskz_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, i32 %x3) {
1346; X86-LABEL: test_int_x86_avx512_maskz_psrlv32hi:
1347; X86:       # %bb.0:
1348; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1349; X86-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x10,0xc1]
1350; X86-NEXT:    retl # encoding: [0xc3]
1351;
1352; X64-LABEL: test_int_x86_avx512_maskz_psrlv32hi:
1353; X64:       # %bb.0:
1354; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1355; X64-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x10,0xc1]
1356; X64-NEXT:    retq # encoding: [0xc3]
1357  %1 = call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %x0, <32 x i16> %x1)
1358  %2 = bitcast i32 %x3 to <32 x i1>
1359  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
1360  ret <32 x i16> %3
1361}
1362
1363declare <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16>, <32 x i16>)
1364
1365define <32 x i16>@test_int_x86_avx512_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1) {
1366; CHECK-LABEL: test_int_x86_avx512_psrav32_hi:
1367; CHECK:       # %bb.0:
1368; CHECK-NEXT:    vpsravw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0xc1]
1369; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1370  %1 = call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %x0, <32 x i16> %x1)
1371  ret <32 x i16> %1
1372}
1373
1374define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1375; X86-LABEL: test_int_x86_avx512_mask_psrav32_hi:
1376; X86:       # %bb.0:
1377; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1378; X86-NEXT:    vpsravw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x11,0xd1]
1379; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1380; X86-NEXT:    retl # encoding: [0xc3]
1381;
1382; X64-LABEL: test_int_x86_avx512_mask_psrav32_hi:
1383; X64:       # %bb.0:
1384; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1385; X64-NEXT:    vpsravw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x11,0xd1]
1386; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1387; X64-NEXT:    retq # encoding: [0xc3]
1388  %1 = call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %x0, <32 x i16> %x1)
1389  %2 = bitcast i32 %x3 to <32 x i1>
1390  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
1391  ret <32 x i16> %3
1392}
1393
1394define <32 x i16>@test_int_x86_avx512_maskz_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, i32 %x3) {
1395; X86-LABEL: test_int_x86_avx512_maskz_psrav32_hi:
1396; X86:       # %bb.0:
1397; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1398; X86-NEXT:    vpsravw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x11,0xc1]
1399; X86-NEXT:    retl # encoding: [0xc3]
1400;
1401; X64-LABEL: test_int_x86_avx512_maskz_psrav32_hi:
1402; X64:       # %bb.0:
1403; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1404; X64-NEXT:    vpsravw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x11,0xc1]
1405; X64-NEXT:    retq # encoding: [0xc3]
1406  %1 = call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %x0, <32 x i16> %x1)
1407  %2 = bitcast i32 %x3 to <32 x i1>
1408  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
1409  ret <32 x i16> %3
1410}
1411
1412define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi_const(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1413; X86-LABEL: test_int_x86_avx512_mask_psrav32_hi_const:
1414; X86:       # %bb.0:
1415; X86-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51]
1416; X86-NEXT:    # encoding: [0x62,0xf2,0x7d,0x48,0x5a,0x05,A,A,A,A]
1417; X86-NEXT:    # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1418; X86-NEXT:    # zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
1419; X86-NEXT:    vpsravw {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0x05,A,A,A,A]
1420; X86-NEXT:    # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1421; X86-NEXT:    retl # encoding: [0xc3]
1422;
1423; X64-LABEL: test_int_x86_avx512_mask_psrav32_hi_const:
1424; X64:       # %bb.0:
1425; X64-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51]
1426; X64-NEXT:    # encoding: [0x62,0xf2,0x7d,0x48,0x5a,0x05,A,A,A,A]
1427; X64-NEXT:    # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1428; X64-NEXT:    # zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
1429; X64-NEXT:    vpsravw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0x05,A,A,A,A]
1430; X64-NEXT:    # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1431; X64-NEXT:    retq # encoding: [0xc3]
1432  %1 = call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> <i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51>, <32 x i16> <i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49, i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49, i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49, i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49>)
1433  ret <32 x i16> %1
1434}
1435
1436define <32 x i16>@test_int_x86_avx512_psllv32hi(<32 x i16> %x0, <32 x i16> %x1) {
1437; CHECK-LABEL: test_int_x86_avx512_psllv32hi:
1438; CHECK:       # %bb.0:
1439; CHECK-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0xc1]
1440; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1441  %1 = call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %x0, <32 x i16> %x1)
1442  ret <32 x i16> %1
1443}
1444
1445define <32 x i16>@test_int_x86_avx512_mask_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1446; X86-LABEL: test_int_x86_avx512_mask_psllv32hi:
1447; X86:       # %bb.0:
1448; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1449; X86-NEXT:    vpsllvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x12,0xd1]
1450; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1451; X86-NEXT:    retl # encoding: [0xc3]
1452;
1453; X64-LABEL: test_int_x86_avx512_mask_psllv32hi:
1454; X64:       # %bb.0:
1455; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1456; X64-NEXT:    vpsllvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x12,0xd1]
1457; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1458; X64-NEXT:    retq # encoding: [0xc3]
1459  %1 = call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %x0, <32 x i16> %x1)
1460  %2 = bitcast i32 %x3 to <32 x i1>
1461  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
1462  ret <32 x i16> %3
1463}
1464
1465define <32 x i16>@test_int_x86_avx512_maskz_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, i32 %x3) {
1466; X86-LABEL: test_int_x86_avx512_maskz_psllv32hi:
1467; X86:       # %bb.0:
1468; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1469; X86-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x12,0xc1]
1470; X86-NEXT:    retl # encoding: [0xc3]
1471;
1472; X64-LABEL: test_int_x86_avx512_maskz_psllv32hi:
1473; X64:       # %bb.0:
1474; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1475; X64-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x12,0xc1]
1476; X64-NEXT:    retq # encoding: [0xc3]
1477  %1 = call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %x0, <32 x i16> %x1)
1478  %2 = bitcast i32 %x3 to <32 x i1>
1479  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
1480  ret <32 x i16> %3
1481}
1482
1483declare <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16>, <32 x i16>)
1484
1485define <32 x i16>@test_int_x86_avx512_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1) {
1486; CHECK-LABEL: test_int_x86_avx512_permvar_hi_512:
1487; CHECK:       # %bb.0:
1488; CHECK-NEXT:    vpermw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x8d,0xc0]
1489; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1490  %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1)
1491  ret <32 x i16> %1
1492}
1493
1494define <32 x i16>@test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1495; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
1496; X86:       # %bb.0:
1497; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1498; X86-NEXT:    vpermw %zmm0, %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x8d,0xd0]
1499; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1500; X86-NEXT:    retl # encoding: [0xc3]
1501;
1502; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
1503; X64:       # %bb.0:
1504; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1505; X64-NEXT:    vpermw %zmm0, %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x8d,0xd0]
1506; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1507; X64-NEXT:    retq # encoding: [0xc3]
1508  %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1)
1509  %2 = bitcast i32 %x3 to <32 x i1>
1510  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
1511  ret <32 x i16> %3
1512}
1513
1514define <32 x i16>@test_int_x86_avx512_maskz_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x3) {
1515; X86-LABEL: test_int_x86_avx512_maskz_permvar_hi_512:
1516; X86:       # %bb.0:
1517; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1518; X86-NEXT:    vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x8d,0xc0]
1519; X86-NEXT:    retl # encoding: [0xc3]
1520;
1521; X64-LABEL: test_int_x86_avx512_maskz_permvar_hi_512:
1522; X64:       # %bb.0:
1523; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1524; X64-NEXT:    vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x8d,0xc0]
1525; X64-NEXT:    retq # encoding: [0xc3]
1526  %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1)
1527  %2 = bitcast i32 %x3 to <32 x i1>
1528  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
1529  ret <32 x i16> %3
1530}
1531
1532define <32 x i16> @test_x86_avx512_psll_w_512(<32 x i16> %a0, <8 x i16> %a1) {
1533; CHECK-LABEL: test_x86_avx512_psll_w_512:
1534; CHECK:       # %bb.0:
1535; CHECK-NEXT:    vpsllw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf1,0xc1]
1536; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1537  %res = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1538  ret <32 x i16> %res
1539}
1540define <32 x i16> @test_x86_avx512_mask_psll_w_512(<32 x i16> %a0, <8 x i16> %a1, <32 x i16> %passthru, i32 %mask) {
1541; X86-LABEL: test_x86_avx512_mask_psll_w_512:
1542; X86:       # %bb.0:
1543; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1544; X86-NEXT:    vpsllw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf1,0xd1]
1545; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1546; X86-NEXT:    retl # encoding: [0xc3]
1547;
1548; X64-LABEL: test_x86_avx512_mask_psll_w_512:
1549; X64:       # %bb.0:
1550; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1551; X64-NEXT:    vpsllw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf1,0xd1]
1552; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1553; X64-NEXT:    retq # encoding: [0xc3]
1554  %res = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1555  %mask.cast = bitcast i32 %mask to <32 x i1>
1556  %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru
1557  ret <32 x i16> %res2
1558}
1559define <32 x i16> @test_x86_avx512_maskz_psll_w_512(<32 x i16> %a0, <8 x i16> %a1, i32 %mask) {
1560; X86-LABEL: test_x86_avx512_maskz_psll_w_512:
1561; X86:       # %bb.0:
1562; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1563; X86-NEXT:    vpsllw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf1,0xc1]
1564; X86-NEXT:    retl # encoding: [0xc3]
1565;
1566; X64-LABEL: test_x86_avx512_maskz_psll_w_512:
1567; X64:       # %bb.0:
1568; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1569; X64-NEXT:    vpsllw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf1,0xc1]
1570; X64-NEXT:    retq # encoding: [0xc3]
1571  %res = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1572  %mask.cast = bitcast i32 %mask to <32 x i1>
1573  %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer
1574  ret <32 x i16> %res2
1575}
1576declare <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16>, <8 x i16>) nounwind readnone
1577
1578
1579define <32 x i16> @test_x86_avx512_psllv_w_512_const() optsize {
1580; X86-LABEL: test_x86_avx512_psllv_w_512_const:
1581; X86:       # %bb.0:
1582; X86-NEXT:    vpmovsxbw {{.*#+}} zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
1583; X86-NEXT:    # encoding: [0x62,0xf2,0x7d,0x48,0x20,0x05,A,A,A,A]
1584; X86-NEXT:    # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1585; X86-NEXT:    vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0x05,A,A,A,A]
1586; X86-NEXT:    # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1587; X86-NEXT:    retl # encoding: [0xc3]
1588;
1589; X64-LABEL: test_x86_avx512_psllv_w_512_const:
1590; X64:       # %bb.0:
1591; X64-NEXT:    vpmovsxbw {{.*#+}} zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
1592; X64-NEXT:    # encoding: [0x62,0xf2,0x7d,0x48,0x20,0x05,A,A,A,A]
1593; X64-NEXT:    # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1594; X64-NEXT:    vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0x05,A,A,A,A]
1595; X64-NEXT:    # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1596; X64-NEXT:    retq # encoding: [0xc3]
1597  %res1 = call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4,  i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1,  i16 1, i16 1, i16 -1>)
1598  ret <32 x i16> %res1
1599}
1600declare <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16>, <32 x i16>) nounwind readnone
1601
1602define <32 x i16> @test_x86_avx512_pslli_w_512(<32 x i16> %a0) {
1603; CHECK-LABEL: test_x86_avx512_pslli_w_512:
1604; CHECK:       # %bb.0:
1605; CHECK-NEXT:    vpsllw $7, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xf0,0x07]
1606; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1607  %res = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1608  ret <32 x i16> %res
1609}
1610define <32 x i16> @test_x86_avx512_mask_pslli_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
1611; X86-LABEL: test_x86_avx512_mask_pslli_w_512:
1612; X86:       # %bb.0:
1613; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1614; X86-NEXT:    vpsllw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xf0,0x07]
1615; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1616; X86-NEXT:    retl # encoding: [0xc3]
1617;
1618; X64-LABEL: test_x86_avx512_mask_pslli_w_512:
1619; X64:       # %bb.0:
1620; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1621; X64-NEXT:    vpsllw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xf0,0x07]
1622; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1623; X64-NEXT:    retq # encoding: [0xc3]
1624  %res = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1625  %mask.cast = bitcast i32 %mask to <32 x i1>
1626  %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru
1627  ret <32 x i16> %res2
1628}
1629define <32 x i16> @test_x86_avx512_maskz_pslli_w_512(<32 x i16> %a0, i32 %mask) {
1630; X86-LABEL: test_x86_avx512_maskz_pslli_w_512:
1631; X86:       # %bb.0:
1632; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1633; X86-NEXT:    vpsllw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xf0,0x07]
1634; X86-NEXT:    retl # encoding: [0xc3]
1635;
1636; X64-LABEL: test_x86_avx512_maskz_pslli_w_512:
1637; X64:       # %bb.0:
1638; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1639; X64-NEXT:    vpsllw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xf0,0x07]
1640; X64-NEXT:    retq # encoding: [0xc3]
1641  %res = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1642  %mask.cast = bitcast i32 %mask to <32 x i1>
1643  %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer
1644  ret <32 x i16> %res2
1645}
1646declare <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16>, i32) nounwind readnone
1647
1648
1649define <32 x i16> @test_x86_avx512_psra_w_512(<32 x i16> %a0, <8 x i16> %a1) {
1650; CHECK-LABEL: test_x86_avx512_psra_w_512:
1651; CHECK:       # %bb.0:
1652; CHECK-NEXT:    vpsraw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe1,0xc1]
1653; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1654  %res = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1655  ret <32 x i16> %res
1656}
1657define <32 x i16> @test_x86_avx512_mask_psra_w_512(<32 x i16> %a0, <8 x i16> %a1, <32 x i16> %passthru, i32 %mask) {
1658; X86-LABEL: test_x86_avx512_mask_psra_w_512:
1659; X86:       # %bb.0:
1660; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1661; X86-NEXT:    vpsraw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe1,0xd1]
1662; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1663; X86-NEXT:    retl # encoding: [0xc3]
1664;
1665; X64-LABEL: test_x86_avx512_mask_psra_w_512:
1666; X64:       # %bb.0:
1667; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1668; X64-NEXT:    vpsraw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe1,0xd1]
1669; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1670; X64-NEXT:    retq # encoding: [0xc3]
1671  %res = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1672  %mask.cast = bitcast i32 %mask to <32 x i1>
1673  %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru
1674  ret <32 x i16> %res2
1675}
1676define <32 x i16> @test_x86_avx512_maskz_psra_w_512(<32 x i16> %a0, <8 x i16> %a1, i32 %mask) {
1677; X86-LABEL: test_x86_avx512_maskz_psra_w_512:
1678; X86:       # %bb.0:
1679; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1680; X86-NEXT:    vpsraw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe1,0xc1]
1681; X86-NEXT:    retl # encoding: [0xc3]
1682;
1683; X64-LABEL: test_x86_avx512_maskz_psra_w_512:
1684; X64:       # %bb.0:
1685; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1686; X64-NEXT:    vpsraw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe1,0xc1]
1687; X64-NEXT:    retq # encoding: [0xc3]
1688  %res = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1689  %mask.cast = bitcast i32 %mask to <32 x i1>
1690  %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer
1691  ret <32 x i16> %res2
1692}
1693declare <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16>, <8 x i16>) nounwind readnone
1694
1695
1696define <32 x i16> @test_x86_avx512_psrai_w_512(<32 x i16> %a0) {
1697; CHECK-LABEL: test_x86_avx512_psrai_w_512:
1698; CHECK:       # %bb.0:
1699; CHECK-NEXT:    vpsraw $7, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xe0,0x07]
1700; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1701  %res = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1702  ret <32 x i16> %res
1703}
1704define <32 x i16> @test_x86_avx512_mask_psrai_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
1705; X86-LABEL: test_x86_avx512_mask_psrai_w_512:
1706; X86:       # %bb.0:
1707; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1708; X86-NEXT:    vpsraw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xe0,0x07]
1709; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1710; X86-NEXT:    retl # encoding: [0xc3]
1711;
1712; X64-LABEL: test_x86_avx512_mask_psrai_w_512:
1713; X64:       # %bb.0:
1714; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1715; X64-NEXT:    vpsraw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xe0,0x07]
1716; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1717; X64-NEXT:    retq # encoding: [0xc3]
1718  %res = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1719  %mask.cast = bitcast i32 %mask to <32 x i1>
1720  %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru
1721  ret <32 x i16> %res2
1722}
1723define <32 x i16> @test_x86_avx512_maskz_psrai_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
1724; X86-LABEL: test_x86_avx512_maskz_psrai_w_512:
1725; X86:       # %bb.0:
1726; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1727; X86-NEXT:    vpsraw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xe0,0x07]
1728; X86-NEXT:    retl # encoding: [0xc3]
1729;
1730; X64-LABEL: test_x86_avx512_maskz_psrai_w_512:
1731; X64:       # %bb.0:
1732; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1733; X64-NEXT:    vpsraw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xe0,0x07]
1734; X64-NEXT:    retq # encoding: [0xc3]
1735  %res = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1736  %mask.cast = bitcast i32 %mask to <32 x i1>
1737  %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer
1738  ret <32 x i16> %res2
1739}
1740declare <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16>, i32) nounwind readnone
1741
1742
1743define <32 x i16> @test_x86_avx512_psrl_w_512(<32 x i16> %a0, <8 x i16> %a1) {
1744; CHECK-LABEL: test_x86_avx512_psrl_w_512:
1745; CHECK:       # %bb.0:
1746; CHECK-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd1,0xc1]
1747; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1748  %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1749  ret <32 x i16> %res
1750}
1751define <32 x i16> @test_x86_avx512_mask_psrl_w_512(<32 x i16> %a0, <8 x i16> %a1, <32 x i16> %passthru, i32 %mask) {
1752; X86-LABEL: test_x86_avx512_mask_psrl_w_512:
1753; X86:       # %bb.0:
1754; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1755; X86-NEXT:    vpsrlw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd1,0xd1]
1756; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1757; X86-NEXT:    retl # encoding: [0xc3]
1758;
1759; X64-LABEL: test_x86_avx512_mask_psrl_w_512:
1760; X64:       # %bb.0:
1761; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1762; X64-NEXT:    vpsrlw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd1,0xd1]
1763; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1764; X64-NEXT:    retq # encoding: [0xc3]
1765  %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1766  %mask.cast = bitcast i32 %mask to <32 x i1>
1767  %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru
1768  ret <32 x i16> %res2
1769}
1770define <32 x i16> @test_x86_avx512_maskz_psrl_w_512(<32 x i16> %a0, <8 x i16> %a1, i32 %mask) {
1771; X86-LABEL: test_x86_avx512_maskz_psrl_w_512:
1772; X86:       # %bb.0:
1773; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1774; X86-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd1,0xc1]
1775; X86-NEXT:    retl # encoding: [0xc3]
1776;
1777; X64-LABEL: test_x86_avx512_maskz_psrl_w_512:
1778; X64:       # %bb.0:
1779; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1780; X64-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd1,0xc1]
1781; X64-NEXT:    retq # encoding: [0xc3]
1782  %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1783  %mask.cast = bitcast i32 %mask to <32 x i1>
1784  %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer
1785  ret <32 x i16> %res2
1786}
1787declare <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16>, <8 x i16>) nounwind readnone
1788
1789define <32 x i16> @test_x86_avx512_psrl_w_512_load(<32 x i16> %a0, ptr %p) {
1790; X86-LABEL: test_x86_avx512_psrl_w_512_load:
1791; X86:       # %bb.0:
1792; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1793; X86-NEXT:    vpsrlw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd1,0x00]
1794; X86-NEXT:    retl # encoding: [0xc3]
1795;
1796; X64-LABEL: test_x86_avx512_psrl_w_512_load:
1797; X64:       # %bb.0:
1798; X64-NEXT:    vpsrlw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd1,0x07]
1799; X64-NEXT:    retq # encoding: [0xc3]
1800  %a1 = load <8 x i16>, ptr %p
1801  %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1802  ret <32 x i16> %res
1803}
1804
1805define <32 x i16> @test_x86_avx512_psrli_w_512(<32 x i16> %a0) {
1806; CHECK-LABEL: test_x86_avx512_psrli_w_512:
1807; CHECK:       # %bb.0:
1808; CHECK-NEXT:    vpsrlw $7, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xd0,0x07]
1809; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1810  %res = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1811  ret <32 x i16> %res
1812}
1813define <32 x i16> @test_x86_avx512_mask_psrli_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
1814; X86-LABEL: test_x86_avx512_mask_psrli_w_512:
1815; X86:       # %bb.0:
1816; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1817; X86-NEXT:    vpsrlw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xd0,0x07]
1818; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1819; X86-NEXT:    retl # encoding: [0xc3]
1820;
1821; X64-LABEL: test_x86_avx512_mask_psrli_w_512:
1822; X64:       # %bb.0:
1823; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1824; X64-NEXT:    vpsrlw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xd0,0x07]
1825; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1826; X64-NEXT:    retq # encoding: [0xc3]
1827  %res = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1828  %mask.cast = bitcast i32 %mask to <32 x i1>
1829  %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru
1830  ret <32 x i16> %res2
1831}
1832define <32 x i16> @test_x86_avx512_maskz_psrli_w_512(<32 x i16> %a0, i32 %mask) {
1833; X86-LABEL: test_x86_avx512_maskz_psrli_w_512:
1834; X86:       # %bb.0:
1835; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1836; X86-NEXT:    vpsrlw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xd0,0x07]
1837; X86-NEXT:    retl # encoding: [0xc3]
1838;
1839; X64-LABEL: test_x86_avx512_maskz_psrli_w_512:
1840; X64:       # %bb.0:
1841; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1842; X64-NEXT:    vpsrlw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xd0,0x07]
1843; X64-NEXT:    retq # encoding: [0xc3]
1844  %res = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1845  %mask.cast = bitcast i32 %mask to <32 x i1>
1846  %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer
1847  ret <32 x i16> %res2
1848}
1849declare <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16>, i32) nounwind readnone
1850