xref: /llvm-project/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll (revision 189900eb149bb55ae3787346f57c1ccbdc50fb3c)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512bw,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
4
5declare <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8, <16 x i8>, i16)
6
7define { <16 x i8>, <16 x i8>, <16 x i8> } @test_int_x86_avx512_mask_pbroadcast_b_gpr_128(i8 %x0, <16 x i8> %x1, i16 %mask) {
8; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_128:
9; X86:       # %bb.0:
10; X86-NEXT:    vpbroadcastb {{[0-9]+}}(%esp), %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x78,0x5c,0x24,0x04]
11; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
12; X86-NEXT:    vpblendmb %xmm3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x66,0xcb]
13; X86-NEXT:    vmovdqu8 %xmm3, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x6f,0xd3]
14; X86-NEXT:    vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
15; X86-NEXT:    retl # encoding: [0xc3]
16;
17; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_128:
18; X64:       # %bb.0:
19; X64-NEXT:    vmovdqa %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc8]
20; X64-NEXT:    vpbroadcastb %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc7]
21; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
22; X64-NEXT:    vpbroadcastb %edi, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xcf]
23; X64-NEXT:    vpbroadcastb %edi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xd7]
24; X64-NEXT:    retq # encoding: [0xc3]
25  %res0 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 -1)
26  %res1 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 %mask)
27  %res2 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> zeroinitializer, i16 %mask)
28  %res3 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } poison, <16 x i8> %res0, 0
29  %res4 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> }  %res3, <16 x i8> %res1, 1
30  %res5 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> }  %res4, <16 x i8> %res2, 2
31  ret { <16 x i8>, <16 x i8>, <16 x i8> } %res5
32}
33
34
35declare <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16, <8 x i16>, i8)
36
37define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_pbroadcast_w_gpr_128(i16 %x0, <8 x i16> %x1, i8 %mask) {
38; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_128:
39; X86:       # %bb.0:
40; X86-NEXT:    vpbroadcastw {{[0-9]+}}(%esp), %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x79,0x5c,0x24,0x04]
41; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
42; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
43; X86-NEXT:    vpblendmw %xmm3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x66,0xcb]
44; X86-NEXT:    vmovdqu16 %xmm3, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x6f,0xd3]
45; X86-NEXT:    vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
46; X86-NEXT:    retl # encoding: [0xc3]
47;
48; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_128:
49; X64:       # %bb.0:
50; X64-NEXT:    vmovdqa %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc8]
51; X64-NEXT:    vpbroadcastw %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xc7]
52; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
53; X64-NEXT:    vpbroadcastw %edi, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xcf]
54; X64-NEXT:    vpbroadcastw %edi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xd7]
55; X64-NEXT:    retq # encoding: [0xc3]
56  %res0 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 -1)
57  %res1 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 %mask)
58  %res2 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> zeroinitializer, i8 %mask)
59  %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0
60  %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> }  %res3, <8 x i16> %res1, 1
61  %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> }  %res4, <8 x i16> %res2, 2
62  ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5
63}
64
65
66declare <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8, <32 x i8>, i32)
67
68define { <32 x i8>, <32 x i8>, <32 x i8> } @test_int_x86_avx512_mask_pbroadcast_b_gpr_256(i8 %x0, <32 x i8> %x1, i32 %mask) {
69; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_256:
70; X86:       # %bb.0:
71; X86-NEXT:    vpbroadcastb {{[0-9]+}}(%esp), %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x78,0x5c,0x24,0x04]
72; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
73; X86-NEXT:    vpblendmb %ymm3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x66,0xcb]
74; X86-NEXT:    vmovdqu8 %ymm3, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0xd3]
75; X86-NEXT:    vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
76; X86-NEXT:    retl # encoding: [0xc3]
77;
78; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_256:
79; X64:       # %bb.0:
80; X64-NEXT:    vmovdqa %ymm0, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc8]
81; X64-NEXT:    vpbroadcastb %edi, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x7a,0xc7]
82; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
83; X64-NEXT:    vpbroadcastb %edi, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7a,0xcf]
84; X64-NEXT:    vpbroadcastb %edi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7a,0xd7]
85; X64-NEXT:    retq # encoding: [0xc3]
86  %res0 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 -1)
87  %res1 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 %mask)
88  %res2 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> zeroinitializer, i32 %mask)
89  %res3 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } poison, <32 x i8> %res0, 0
90  %res4 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> }  %res3, <32 x i8> %res1, 1
91  %res5 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> }  %res4, <32 x i8> %res2, 2
92  ret { <32 x i8>, <32 x i8>, <32 x i8> } %res5
93}
94
95
96
97declare <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16, <16 x i16>, i16)
98
99define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_pbroadcast_w_gpr_256(i16 %x0, <16 x i16> %x1, i16 %mask) {
100; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_256:
101; X86:       # %bb.0:
102; X86-NEXT:    vpbroadcastw {{[0-9]+}}(%esp), %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x79,0x5c,0x24,0x04]
103; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
104; X86-NEXT:    vpblendmw %ymm3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x66,0xcb]
105; X86-NEXT:    vmovdqu16 %ymm3, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x6f,0xd3]
106; X86-NEXT:    vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
107; X86-NEXT:    retl # encoding: [0xc3]
108;
109; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_256:
110; X64:       # %bb.0:
111; X64-NEXT:    vmovdqa %ymm0, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc8]
112; X64-NEXT:    vpbroadcastw %edi, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xc7]
113; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
114; X64-NEXT:    vpbroadcastw %edi, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xcf]
115; X64-NEXT:    vpbroadcastw %edi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xd7]
116; X64-NEXT:    retq # encoding: [0xc3]
117  %res = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 -1)
118  %res1 = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 %mask)
119  %res2 = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> zeroinitializer, i16 %mask)
120  %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res, 0
121  %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> }  %res3, <16 x i16> %res1, 1
122  %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> }  %res4, <16 x i16> %res2, 2
123  ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5
124}
125
126declare <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8>, <32 x i8>, i32)
127
128define <32 x i8>@test_int_x86_avx512_pbroadcastb_256(<16 x i8> %x0, <32 x i8> %x1) {
129; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_256:
130; CHECK:       # %bb.0:
131; CHECK-NEXT:    vpbroadcastb %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x78,0xc0]
132; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
133  %res = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 -1)
134  ret <32 x i8> %res
135}
136
137define <32 x i8>@test_int_x86_avx512_mask_pbroadcastb_256(<16 x i8> %x0, <32 x i8> %x1, i32 %mask) {
138; X86-LABEL: test_int_x86_avx512_mask_pbroadcastb_256:
139; X86:       # %bb.0:
140; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
141; X86-NEXT:    vpbroadcastb %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x78,0xc8]
142; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
143; X86-NEXT:    retl # encoding: [0xc3]
144;
145; X64-LABEL: test_int_x86_avx512_mask_pbroadcastb_256:
146; X64:       # %bb.0:
147; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
148; X64-NEXT:    vpbroadcastb %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x78,0xc8]
149; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
150; X64-NEXT:    retq # encoding: [0xc3]
151  %res = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 %mask)
152  ret <32 x i8> %res
153}
154
155define <32 x i8>@test_int_x86_avx512_maskz_pbroadcastb_256(<16 x i8> %x0, i32 %mask) {
156; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastb_256:
157; X86:       # %bb.0:
158; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
159; X86-NEXT:    vpbroadcastb %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x78,0xc0]
160; X86-NEXT:    retl # encoding: [0xc3]
161;
162; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastb_256:
163; X64:       # %bb.0:
164; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
165; X64-NEXT:    vpbroadcastb %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x78,0xc0]
166; X64-NEXT:    retq # encoding: [0xc3]
167  %res = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> zeroinitializer, i32 %mask)
168  ret <32 x i8> %res
169}
170
171declare <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8>, <16 x i8>, i16)
172
173define <16 x i8>@test_int_x86_avx512_pbroadcastb_128(<16 x i8> %x0, <16 x i8> %x1) {
174; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_128:
175; CHECK:       # %bb.0:
176; CHECK-NEXT:    vpbroadcastb %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x78,0xc0]
177; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
178  %res = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1)
179  ret <16 x i8> %res
180}
181
182define <16 x i8>@test_int_x86_avx512_mask_pbroadcastb_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) {
183; X86-LABEL: test_int_x86_avx512_mask_pbroadcastb_128:
184; X86:       # %bb.0:
185; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
186; X86-NEXT:    vpbroadcastb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x78,0xc8]
187; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
188; X86-NEXT:    retl # encoding: [0xc3]
189;
190; X64-LABEL: test_int_x86_avx512_mask_pbroadcastb_128:
191; X64:       # %bb.0:
192; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
193; X64-NEXT:    vpbroadcastb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x78,0xc8]
194; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
195; X64-NEXT:    retq # encoding: [0xc3]
196  %res = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask)
197  ret <16 x i8> %res
198}
199
200define <16 x i8>@test_int_x86_avx512_maskz_pbroadcastb_128(<16 x i8> %x0, i16 %mask) {
201; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastb_128:
202; X86:       # %bb.0:
203; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
204; X86-NEXT:    vpbroadcastb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x78,0xc0]
205; X86-NEXT:    retl # encoding: [0xc3]
206;
207; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastb_128:
208; X64:       # %bb.0:
209; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
210; X64-NEXT:    vpbroadcastb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x78,0xc0]
211; X64-NEXT:    retq # encoding: [0xc3]
212  %res = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> zeroinitializer, i16 %mask)
213  ret <16 x i8> %res
214}
215
216declare <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16>, <16 x i16>, i16)
217
218define <16 x i16>@test_int_x86_avx512_pbroadcastw_256(<8 x i16> %x0, <16 x i16> %x1) {
219; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_256:
220; CHECK:       # %bb.0:
221; CHECK-NEXT:    vpbroadcastw %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x79,0xc0]
222; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
223  %res = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 -1)
224  ret <16 x i16> %res
225}
226
227define <16 x i16>@test_int_x86_avx512_mask_pbroadcastw_256(<8 x i16> %x0, <16 x i16> %x1, i16 %mask) {
228; X86-LABEL: test_int_x86_avx512_mask_pbroadcastw_256:
229; X86:       # %bb.0:
230; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
231; X86-NEXT:    vpbroadcastw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x79,0xc8]
232; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
233; X86-NEXT:    retl # encoding: [0xc3]
234;
235; X64-LABEL: test_int_x86_avx512_mask_pbroadcastw_256:
236; X64:       # %bb.0:
237; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
238; X64-NEXT:    vpbroadcastw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x79,0xc8]
239; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
240; X64-NEXT:    retq # encoding: [0xc3]
241  %res = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 %mask)
242  ret <16 x i16> %res
243}
244
245define <16 x i16>@test_int_x86_avx512_maskz_pbroadcastw_256(<8 x i16> %x0, i16 %mask) {
246; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastw_256:
247; X86:       # %bb.0:
248; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
249; X86-NEXT:    vpbroadcastw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x79,0xc0]
250; X86-NEXT:    retl # encoding: [0xc3]
251;
252; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastw_256:
253; X64:       # %bb.0:
254; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
255; X64-NEXT:    vpbroadcastw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x79,0xc0]
256; X64-NEXT:    retq # encoding: [0xc3]
257  %res = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> zeroinitializer, i16 %mask)
258  ret <16 x i16> %res
259}
260
261declare <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16>, <8 x i16>, i8)
262
263define <8 x i16>@test_int_x86_avx512_pbroadcastw_128(<8 x i16> %x0, <8 x i16> %x1) {
264; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_128:
265; CHECK:       # %bb.0:
266; CHECK-NEXT:    vpbroadcastw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x79,0xc0]
267; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
268  %res = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1)
269  ret <8 x i16> %res
270}
271
272define <8 x i16>@test_int_x86_avx512_mask_pbroadcastw_128(<8 x i16> %x0, <8 x i16> %x1, i8 %mask) {
273; X86-LABEL: test_int_x86_avx512_mask_pbroadcastw_128:
274; X86:       # %bb.0:
275; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
276; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
277; X86-NEXT:    vpbroadcastw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x79,0xc8]
278; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
279; X86-NEXT:    retl # encoding: [0xc3]
280;
281; X64-LABEL: test_int_x86_avx512_mask_pbroadcastw_128:
282; X64:       # %bb.0:
283; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
284; X64-NEXT:    vpbroadcastw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x79,0xc8]
285; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
286; X64-NEXT:    retq # encoding: [0xc3]
287  %res = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 %mask)
288  ret <8 x i16> %res
289}
290
291define <8 x i16>@test_int_x86_avx512_maskz_pbroadcastw_128(<8 x i16> %x0, i8 %mask) {
292; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastw_128:
293; X86:       # %bb.0:
294; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
295; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
296; X86-NEXT:    vpbroadcastw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x79,0xc0]
297; X86-NEXT:    retl # encoding: [0xc3]
298;
299; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastw_128:
300; X64:       # %bb.0:
301; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
302; X64-NEXT:    vpbroadcastw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x79,0xc0]
303; X64-NEXT:    retq # encoding: [0xc3]
304  %res = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> zeroinitializer, i8 %mask)
305  ret <8 x i16> %res
306}
307
308declare <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8>, <64 x i8>, i64)
309
310define <64 x i8>@test_int_x86_avx512_pbroadcastb_512(<16 x i8> %x0, <64 x i8> %x1) {
311; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_512:
312; CHECK:       # %bb.0:
313; CHECK-NEXT:    vpbroadcastb %xmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x78,0xc0]
314; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
315  %res = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> %x1, i64 -1)
316  ret <64 x i8> %res
317}
318
319define <64 x i8>@test_int_x86_avx512_mask_pbroadcastb_512(<16 x i8> %x0, <64 x i8> %x1, i64 %mask) {
320; X86-LABEL: test_int_x86_avx512_mask_pbroadcastb_512:
321; X86:       # %bb.0:
322; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
323; X86-NEXT:    vpbroadcastb %xmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x78,0xc8]
324; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
325; X86-NEXT:    retl # encoding: [0xc3]
326;
327; X64-LABEL: test_int_x86_avx512_mask_pbroadcastb_512:
328; X64:       # %bb.0:
329; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
330; X64-NEXT:    vpbroadcastb %xmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x78,0xc8]
331; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
332; X64-NEXT:    retq # encoding: [0xc3]
333  %res = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> %x1, i64 %mask)
334  ret <64 x i8> %res
335}
336
337define <64 x i8>@test_int_x86_avx512_maskz_pbroadcastb_512(<16 x i8> %x0, i64 %mask) {
338; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastb_512:
339; X86:       # %bb.0:
340; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
341; X86-NEXT:    vpbroadcastb %xmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x78,0xc0]
342; X86-NEXT:    retl # encoding: [0xc3]
343;
344; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastb_512:
345; X64:       # %bb.0:
346; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
347; X64-NEXT:    vpbroadcastb %xmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x78,0xc0]
348; X64-NEXT:    retq # encoding: [0xc3]
349  %res = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> zeroinitializer, i64 %mask)
350  ret <64 x i8> %res
351}
352
353declare <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16>, <32 x i16>, i32)
354
355define <32 x i16>@test_int_x86_avx512_pbroadcastw_512(<8 x i16> %x0, <32 x i16> %x1) {
356; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_512:
357; CHECK:       # %bb.0:
358; CHECK-NEXT:    vpbroadcastw %xmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x79,0xc0]
359; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
360  %res = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> %x1, i32 -1)
361  ret <32 x i16> %res
362}
363
364define <32 x i16>@test_int_x86_avx512_mask_pbroadcastw_512(<8 x i16> %x0, <32 x i16> %x1, i32 %mask) {
365; X86-LABEL: test_int_x86_avx512_mask_pbroadcastw_512:
366; X86:       # %bb.0:
367; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
368; X86-NEXT:    vpbroadcastw %xmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x79,0xc8]
369; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
370; X86-NEXT:    retl # encoding: [0xc3]
371;
372; X64-LABEL: test_int_x86_avx512_mask_pbroadcastw_512:
373; X64:       # %bb.0:
374; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
375; X64-NEXT:    vpbroadcastw %xmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x79,0xc8]
376; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
377; X64-NEXT:    retq # encoding: [0xc3]
378  %res = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> %x1, i32 %mask)
379  ret <32 x i16> %res
380}
381
382define <32 x i16>@test_int_x86_avx512_maskz_pbroadcastw_512(<8 x i16> %x0, i32 %mask) {
383; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastw_512:
384; X86:       # %bb.0:
385; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
386; X86-NEXT:    vpbroadcastw %xmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x79,0xc0]
387; X86-NEXT:    retl # encoding: [0xc3]
388;
389; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastw_512:
390; X64:       # %bb.0:
391; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
392; X64-NEXT:    vpbroadcastw %xmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x79,0xc0]
393; X64-NEXT:    retq # encoding: [0xc3]
394  %res = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> zeroinitializer, i32 %mask)
395  ret <32 x i16> %res
396}
397
398declare void @llvm.x86.avx512.mask.storeu.b.128(ptr, <16 x i8>, i16)
399
400define void@test_int_x86_avx512_mask_storeu_b_128(ptr %ptr1, ptr %ptr2, <16 x i8> %x1, i16 %x2) {
401; X86-LABEL: test_int_x86_avx512_mask_storeu_b_128:
402; X86:       # %bb.0:
403; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
404; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
405; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
406; X86-NEXT:    vmovdqu8 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x7f,0x01]
407; X86-NEXT:    vmovdqu %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00]
408; X86-NEXT:    retl # encoding: [0xc3]
409;
410; X64-LABEL: test_int_x86_avx512_mask_storeu_b_128:
411; X64:       # %bb.0:
412; X64-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
413; X64-NEXT:    vmovdqu8 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x7f,0x07]
414; X64-NEXT:    vmovdqu %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x06]
415; X64-NEXT:    retq # encoding: [0xc3]
416  call void @llvm.x86.avx512.mask.storeu.b.128(ptr %ptr1, <16 x i8> %x1, i16 %x2)
417  call void @llvm.x86.avx512.mask.storeu.b.128(ptr %ptr2, <16 x i8> %x1, i16 -1)
418  ret void
419}
420
421declare void @llvm.x86.avx512.mask.storeu.b.256(ptr, <32 x i8>, i32)
422
423define void@test_int_x86_avx512_mask_storeu_b_256(ptr %ptr1, ptr %ptr2, <32 x i8> %x1, i32 %x2) {
424; X86-LABEL: test_int_x86_avx512_mask_storeu_b_256:
425; X86:       # %bb.0:
426; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
427; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
428; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c]
429; X86-NEXT:    vmovdqu8 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x7f,0x01]
430; X86-NEXT:    vmovdqu %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x00]
431; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
432; X86-NEXT:    retl # encoding: [0xc3]
433;
434; X64-LABEL: test_int_x86_avx512_mask_storeu_b_256:
435; X64:       # %bb.0:
436; X64-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
437; X64-NEXT:    vmovdqu8 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x7f,0x07]
438; X64-NEXT:    vmovdqu %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06]
439; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
440; X64-NEXT:    retq # encoding: [0xc3]
441  call void @llvm.x86.avx512.mask.storeu.b.256(ptr %ptr1, <32 x i8> %x1, i32 %x2)
442  call void @llvm.x86.avx512.mask.storeu.b.256(ptr %ptr2, <32 x i8> %x1, i32 -1)
443  ret void
444}
445
446declare void @llvm.x86.avx512.mask.storeu.w.128(ptr, <8 x i16>, i8)
447
448define void@test_int_x86_avx512_mask_storeu_w_128(ptr %ptr1, ptr %ptr2, <8 x i16> %x1, i8 %x2) {
449; X86-LABEL: test_int_x86_avx512_mask_storeu_w_128:
450; X86:       # %bb.0:
451; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
452; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
453; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
454; X86-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
455; X86-NEXT:    vmovdqu16 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7f,0x01]
456; X86-NEXT:    vmovdqu %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00]
457; X86-NEXT:    retl # encoding: [0xc3]
458;
459; X64-LABEL: test_int_x86_avx512_mask_storeu_w_128:
460; X64:       # %bb.0:
461; X64-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
462; X64-NEXT:    vmovdqu16 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7f,0x07]
463; X64-NEXT:    vmovdqu %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x06]
464; X64-NEXT:    retq # encoding: [0xc3]
465  call void @llvm.x86.avx512.mask.storeu.w.128(ptr %ptr1, <8 x i16> %x1, i8 %x2)
466  call void @llvm.x86.avx512.mask.storeu.w.128(ptr %ptr2, <8 x i16> %x1, i8 -1)
467  ret void
468}
469
470declare void @llvm.x86.avx512.mask.storeu.w.256(ptr, <16 x i16>, i16)
471
472define void@test_int_x86_avx512_mask_storeu_w_256(ptr %ptr1, ptr %ptr2, <16 x i16> %x1, i16 %x2) {
473; X86-LABEL: test_int_x86_avx512_mask_storeu_w_256:
474; X86:       # %bb.0:
475; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
476; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
477; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
478; X86-NEXT:    vmovdqu16 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7f,0x01]
479; X86-NEXT:    vmovdqu %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x00]
480; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
481; X86-NEXT:    retl # encoding: [0xc3]
482;
483; X64-LABEL: test_int_x86_avx512_mask_storeu_w_256:
484; X64:       # %bb.0:
485; X64-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
486; X64-NEXT:    vmovdqu16 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7f,0x07]
487; X64-NEXT:    vmovdqu %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06]
488; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
489; X64-NEXT:    retq # encoding: [0xc3]
490  call void @llvm.x86.avx512.mask.storeu.w.256(ptr %ptr1, <16 x i16> %x1, i16 %x2)
491  call void @llvm.x86.avx512.mask.storeu.w.256(ptr %ptr2, <16 x i16> %x1, i16 -1)
492  ret void
493}
494
495declare <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(ptr, <8 x i16>, i8)
496
497define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_loadu_w_128(ptr %ptr, ptr %ptr2, <8 x i16> %x1, i8 %mask) {
498; X86-LABEL: test_int_x86_avx512_mask_loadu_w_128:
499; X86:       # %bb.0:
500; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
501; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
502; X86-NEXT:    vmovdqu (%ecx), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01]
503; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
504; X86-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
505; X86-NEXT:    vpblendmw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x66,0x08]
506; X86-NEXT:    vmovdqu16 (%ecx), %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x6f,0x11]
507; X86-NEXT:    retl # encoding: [0xc3]
508;
509; X64-LABEL: test_int_x86_avx512_mask_loadu_w_128:
510; X64:       # %bb.0:
511; X64-NEXT:    vmovdqu (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07]
512; X64-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
513; X64-NEXT:    vpblendmw (%rsi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x66,0x0e]
514; X64-NEXT:    vmovdqu16 (%rdi), %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x6f,0x17]
515; X64-NEXT:    retq # encoding: [0xc3]
516    %res0 = call <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(ptr %ptr, <8 x i16> %x1, i8 -1)
517    %res1 = call <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(ptr %ptr2, <8 x i16> %res0, i8 %mask)
518    %res2 = call <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(ptr %ptr, <8 x i16> zeroinitializer, i8 %mask)
519    %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0
520    %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> }  %res3, <8 x i16> %res1, 1
521    %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> }  %res4, <8 x i16> %res2, 2
522    ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5
523}
524
525declare <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(ptr, <16 x i16>, i16)
526
527define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_loadu_w_256(ptr %ptr, ptr %ptr2, <16 x i16> %x1, i16 %mask) {
528; X86-LABEL: test_int_x86_avx512_mask_loadu_w_256:
529; X86:       # %bb.0:
530; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
531; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
532; X86-NEXT:    vmovdqu (%ecx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x01]
533; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
534; X86-NEXT:    vpblendmw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x66,0x08]
535; X86-NEXT:    vmovdqu16 (%ecx), %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x6f,0x11]
536; X86-NEXT:    retl # encoding: [0xc3]
537;
538; X64-LABEL: test_int_x86_avx512_mask_loadu_w_256:
539; X64:       # %bb.0:
540; X64-NEXT:    vmovdqu (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07]
541; X64-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
542; X64-NEXT:    vpblendmw (%rsi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x66,0x0e]
543; X64-NEXT:    vmovdqu16 (%rdi), %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x6f,0x17]
544; X64-NEXT:    retq # encoding: [0xc3]
545    %res0 = call <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(ptr %ptr, <16 x i16> %x1, i16 -1)
546    %res1 = call <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(ptr %ptr2, <16 x i16> %res0, i16 %mask)
547    %res2 = call <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(ptr %ptr, <16 x i16> zeroinitializer, i16 %mask)
548    %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res0, 0
549    %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> }  %res3, <16 x i16> %res1, 1
550    %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> }  %res4, <16 x i16> %res2, 2
551    ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5
552}
553
554declare <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(ptr, <16 x i8>, i16)
555
556define { <16 x i8>, <16 x i8>, <16 x i8> } @test_int_x86_avx512_mask_loadu_b_128(ptr %ptr, ptr %ptr2, <16 x i8> %x1, i16 %mask) {
557; X86-LABEL: test_int_x86_avx512_mask_loadu_b_128:
558; X86:       # %bb.0:
559; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
560; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
561; X86-NEXT:    vmovdqu (%ecx), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01]
562; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
563; X86-NEXT:    vpblendmb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x66,0x08]
564; X86-NEXT:    vmovdqu8 (%ecx), %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x11]
565; X86-NEXT:    retl # encoding: [0xc3]
566;
567; X64-LABEL: test_int_x86_avx512_mask_loadu_b_128:
568; X64:       # %bb.0:
569; X64-NEXT:    vmovdqu (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07]
570; X64-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
571; X64-NEXT:    vpblendmb (%rsi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x66,0x0e]
572; X64-NEXT:    vmovdqu8 (%rdi), %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x17]
573; X64-NEXT:    retq # encoding: [0xc3]
574    %res0 = call <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(ptr %ptr, <16 x i8> %x1, i16 -1)
575    %res1 = call <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(ptr %ptr2, <16 x i8> %res0, i16 %mask)
576    %res2 = call <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(ptr %ptr, <16 x i8> zeroinitializer, i16 %mask)
577    %res3 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } poison, <16 x i8> %res0, 0
578    %res4 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> }  %res3, <16 x i8> %res1, 1
579    %res5 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> }  %res4, <16 x i8> %res2, 2
580    ret { <16 x i8>, <16 x i8>, <16 x i8> } %res5
581}
582
583declare <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(ptr, <32 x i8>, i32)
584
585define { <32 x i8>, <32 x i8>, <32 x i8> } @test_int_x86_avx512_mask_loadu_b_256(ptr %ptr, ptr %ptr2, <32 x i8> %x1, i32 %mask) {
586; X86-LABEL: test_int_x86_avx512_mask_loadu_b_256:
587; X86:       # %bb.0:
588; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
589; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
590; X86-NEXT:    vmovdqu (%ecx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x01]
591; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c]
592; X86-NEXT:    vpblendmb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x66,0x08]
593; X86-NEXT:    vmovdqu8 (%ecx), %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0x11]
594; X86-NEXT:    retl # encoding: [0xc3]
595;
596; X64-LABEL: test_int_x86_avx512_mask_loadu_b_256:
597; X64:       # %bb.0:
598; X64-NEXT:    vmovdqu (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07]
599; X64-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
600; X64-NEXT:    vpblendmb (%rsi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x66,0x0e]
601; X64-NEXT:    vmovdqu8 (%rdi), %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0x17]
602; X64-NEXT:    retq # encoding: [0xc3]
603    %res0 = call <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(ptr %ptr, <32 x i8> %x1, i32 -1)
604    %res1 = call <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(ptr %ptr2, <32 x i8> %res0, i32 %mask)
605    %res2 = call <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(ptr %ptr, <32 x i8> zeroinitializer, i32 %mask)
606    %res3 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } poison, <32 x i8> %res0, 0
607    %res4 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> }  %res3, <32 x i8> %res1, 1
608    %res5 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> }  %res4, <32 x i8> %res2, 2
609    ret { <32 x i8>, <32 x i8>, <32 x i8> } %res5
610}
611
612declare <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8>, <16 x i8>, i32, <16 x i8>, i16)
613
614define <16 x i8>@test_int_x86_avx512_palignr_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x3) {
615; CHECK-LABEL: test_int_x86_avx512_palignr_128:
616; CHECK:       # %bb.0:
617; CHECK-NEXT:    vpalignr $2, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xc1,0x02]
618; CHECK-NEXT:    # xmm0 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
619; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
620  %res = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> %x3, i16 -1)
621  ret <16 x i8> %res
622}
623
624define <16 x i8>@test_int_x86_avx512_mask_palignr_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x3, i16 %x4) {
625; X86-LABEL: test_int_x86_avx512_mask_palignr_128:
626; X86:       # %bb.0:
627; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
628; X86-NEXT:    vpalignr $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x0f,0xd1,0x02]
629; X86-NEXT:    # xmm2 {%k1} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
630; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
631; X86-NEXT:    retl # encoding: [0xc3]
632;
633; X64-LABEL: test_int_x86_avx512_mask_palignr_128:
634; X64:       # %bb.0:
635; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
636; X64-NEXT:    vpalignr $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x0f,0xd1,0x02]
637; X64-NEXT:    # xmm2 {%k1} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
638; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
639; X64-NEXT:    retq # encoding: [0xc3]
640  %res = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> %x3, i16 %x4)
641  ret <16 x i8> %res
642}
643
644define <16 x i8>@test_int_x86_avx512_maskz_palignr_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x4) {
645; X86-LABEL: test_int_x86_avx512_maskz_palignr_128:
646; X86:       # %bb.0:
647; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
648; X86-NEXT:    vpalignr $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x0f,0xc1,0x02]
649; X86-NEXT:    # xmm0 {%k1} {z} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
650; X86-NEXT:    retl # encoding: [0xc3]
651;
652; X64-LABEL: test_int_x86_avx512_maskz_palignr_128:
653; X64:       # %bb.0:
654; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
655; X64-NEXT:    vpalignr $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x0f,0xc1,0x02]
656; X64-NEXT:    # xmm0 {%k1} {z} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
657; X64-NEXT:    retq # encoding: [0xc3]
658  %res = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> zeroinitializer, i16 %x4)
659  ret <16 x i8> %res
660}
661
662declare <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8>, <32 x i8>, i32, <32 x i8>, i32)
663
664define <32 x i8>@test_int_x86_avx512_palignr_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x3) {
665; CHECK-LABEL: test_int_x86_avx512_palignr_256:
666; CHECK:       # %bb.0:
667; CHECK-NEXT:    vpalignr $2, %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x0f,0xc1,0x02]
668; CHECK-NEXT:    # ymm0 = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
669; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
670  %res = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> %x3, i32 -1)
671  ret <32 x i8> %res
672}
673
674define <32 x i8>@test_int_x86_avx512_mask_palignr_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x3, i32 %x4) {
675; X86-LABEL: test_int_x86_avx512_mask_palignr_256:
676; X86:       # %bb.0:
677; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
678; X86-NEXT:    vpalignr $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x0f,0xd1,0x02]
679; X86-NEXT:    # ymm2 {%k1} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
680; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
681; X86-NEXT:    retl # encoding: [0xc3]
682;
683; X64-LABEL: test_int_x86_avx512_mask_palignr_256:
684; X64:       # %bb.0:
685; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
686; X64-NEXT:    vpalignr $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x0f,0xd1,0x02]
687; X64-NEXT:    # ymm2 {%k1} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
688; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
689; X64-NEXT:    retq # encoding: [0xc3]
690  %res = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> %x3, i32 %x4)
691  ret <32 x i8> %res
692}
693
694define <32 x i8>@test_int_x86_avx512_maskz_palignr_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x4) {
695; X86-LABEL: test_int_x86_avx512_maskz_palignr_256:
696; X86:       # %bb.0:
697; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
698; X86-NEXT:    vpalignr $2, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x0f,0xc1,0x02]
699; X86-NEXT:    # ymm0 {%k1} {z} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
700; X86-NEXT:    retl # encoding: [0xc3]
701;
702; X64-LABEL: test_int_x86_avx512_maskz_palignr_256:
703; X64:       # %bb.0:
704; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
705; X64-NEXT:    vpalignr $2, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x0f,0xc1,0x02]
706; X64-NEXT:    # ymm0 {%k1} {z} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
707; X64-NEXT:    retq # encoding: [0xc3]
708  %res = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> zeroinitializer, i32 %x4)
709  ret <32 x i8> %res
710}
711
712declare <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16>, i32, <8 x i16>, i8)
713
714define <8 x i16>@test_int_x86_avx512_pshufh_w_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2) {
715; CHECK-LABEL: test_int_x86_avx512_pshufh_w_128:
716; CHECK:       # %bb.0:
717; CHECK-NEXT:    vpshufhw $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x70,0xc0,0x03]
718; CHECK-NEXT:    # xmm0 = xmm0[0,1,2,3,7,4,4,4]
719; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
720  %res = call <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 -1)
721  ret <8 x i16> %res
722}
723
724define <8 x i16>@test_int_x86_avx512_mask_pshufh_w_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) {
725; X86-LABEL: test_int_x86_avx512_mask_pshufh_w_128:
726; X86:       # %bb.0:
727; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
728; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
729; X86-NEXT:    vpshufhw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x70,0xc8,0x03]
730; X86-NEXT:    # xmm1 {%k1} = xmm0[0,1,2,3,7,4,4,4]
731; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
732; X86-NEXT:    retl # encoding: [0xc3]
733;
734; X64-LABEL: test_int_x86_avx512_mask_pshufh_w_128:
735; X64:       # %bb.0:
736; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
737; X64-NEXT:    vpshufhw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x70,0xc8,0x03]
738; X64-NEXT:    # xmm1 {%k1} = xmm0[0,1,2,3,7,4,4,4]
739; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
740; X64-NEXT:    retq # encoding: [0xc3]
741  %res = call <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3)
742  ret <8 x i16> %res
743}
744
745define <8 x i16>@test_int_x86_avx512_maskz_pshufh_w_128(<8 x i16> %x0, i8 %x3) {
746; X86-LABEL: test_int_x86_avx512_maskz_pshufh_w_128:
747; X86:       # %bb.0:
748; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
749; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
750; X86-NEXT:    vpshufhw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x70,0xc0,0x03]
751; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[0,1,2,3,7,4,4,4]
752; X86-NEXT:    retl # encoding: [0xc3]
753;
754; X64-LABEL: test_int_x86_avx512_maskz_pshufh_w_128:
755; X64:       # %bb.0:
756; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
757; X64-NEXT:    vpshufhw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x70,0xc0,0x03]
758; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[0,1,2,3,7,4,4,4]
759; X64-NEXT:    retq # encoding: [0xc3]
760  %res = call <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3)
761  ret <8 x i16> %res
762}
763
764declare <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16>, i32, <16 x i16>, i16)
765
766define <16 x i16>@test_int_x86_avx512_pshufh_w_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2) {
767; CHECK-LABEL: test_int_x86_avx512_pshufh_w_256:
768; CHECK:       # %bb.0:
769; CHECK-NEXT:    vpshufhw $3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x70,0xc0,0x03]
770; CHECK-NEXT:    # ymm0 = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12]
771; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
772  %res = call <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 -1)
773  ret <16 x i16> %res
774}
775
776define <16 x i16>@test_int_x86_avx512_mask_pshufh_w_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) {
777; X86-LABEL: test_int_x86_avx512_mask_pshufh_w_256:
778; X86:       # %bb.0:
779; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
780; X86-NEXT:    vpshufhw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x70,0xc8,0x03]
781; X86-NEXT:    # ymm1 {%k1} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12]
782; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
783; X86-NEXT:    retl # encoding: [0xc3]
784;
785; X64-LABEL: test_int_x86_avx512_mask_pshufh_w_256:
786; X64:       # %bb.0:
787; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
788; X64-NEXT:    vpshufhw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x70,0xc8,0x03]
789; X64-NEXT:    # ymm1 {%k1} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12]
790; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
791; X64-NEXT:    retq # encoding: [0xc3]
792  %res = call <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3)
793  ret <16 x i16> %res
794}
795
796define <16 x i16>@test_int_x86_avx512_maskz_pshufh_w_256(<16 x i16> %x0, i16 %x3) {
797; X86-LABEL: test_int_x86_avx512_maskz_pshufh_w_256:
798; X86:       # %bb.0:
799; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
800; X86-NEXT:    vpshufhw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x70,0xc0,0x03]
801; X86-NEXT:    # ymm0 {%k1} {z} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12]
802; X86-NEXT:    retl # encoding: [0xc3]
803;
804; X64-LABEL: test_int_x86_avx512_maskz_pshufh_w_256:
805; X64:       # %bb.0:
806; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
807; X64-NEXT:    vpshufhw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x70,0xc0,0x03]
808; X64-NEXT:    # ymm0 {%k1} {z} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12]
809; X64-NEXT:    retq # encoding: [0xc3]
810  %res = call <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3)
811  ret <16 x i16> %res
812}
813
814declare <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16>, i32, <8 x i16>, i8)
815
816define <8 x i16>@test_int_x86_avx512_pshufl_w_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2) {
817; CHECK-LABEL: test_int_x86_avx512_pshufl_w_128:
818; CHECK:       # %bb.0:
819; CHECK-NEXT:    vpshuflw $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x70,0xc0,0x03]
820; CHECK-NEXT:    # xmm0 = xmm0[3,0,0,0,4,5,6,7]
821; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
822  %res = call <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 -1)
823  ret <8 x i16> %res
824}
825
826define <8 x i16>@test_int_x86_avx512_mask_pshufl_w_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) {
827; X86-LABEL: test_int_x86_avx512_mask_pshufl_w_128:
828; X86:       # %bb.0:
829; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
830; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
831; X86-NEXT:    vpshuflw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x70,0xc8,0x03]
832; X86-NEXT:    # xmm1 {%k1} = xmm0[3,0,0,0,4,5,6,7]
833; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
834; X86-NEXT:    retl # encoding: [0xc3]
835;
836; X64-LABEL: test_int_x86_avx512_mask_pshufl_w_128:
837; X64:       # %bb.0:
838; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
839; X64-NEXT:    vpshuflw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x70,0xc8,0x03]
840; X64-NEXT:    # xmm1 {%k1} = xmm0[3,0,0,0,4,5,6,7]
841; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
842; X64-NEXT:    retq # encoding: [0xc3]
843  %res = call <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3)
844  ret <8 x i16> %res
845}
846
847define <8 x i16>@test_int_x86_avx512_maskz_pshufl_w_128(<8 x i16> %x0, i8 %x3) {
848; X86-LABEL: test_int_x86_avx512_maskz_pshufl_w_128:
849; X86:       # %bb.0:
850; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
851; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
852; X86-NEXT:    vpshuflw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x70,0xc0,0x03]
853; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[3,0,0,0,4,5,6,7]
854; X86-NEXT:    retl # encoding: [0xc3]
855;
856; X64-LABEL: test_int_x86_avx512_maskz_pshufl_w_128:
857; X64:       # %bb.0:
858; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
859; X64-NEXT:    vpshuflw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x70,0xc0,0x03]
860; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[3,0,0,0,4,5,6,7]
861; X64-NEXT:    retq # encoding: [0xc3]
862  %res = call <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3)
863  ret <8 x i16> %res
864}
865
866declare <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16>, i32, <16 x i16>, i16)
867
868define <16 x i16>@test_int_x86_avx512_pshufl_w_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) {
869; CHECK-LABEL: test_int_x86_avx512_pshufl_w_256:
870; CHECK:       # %bb.0:
871; CHECK-NEXT:    vpshuflw $3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xff,0x70,0xc0,0x03]
872; CHECK-NEXT:    # ymm0 = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15]
873; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
874  %res = call <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 -1)
875  ret <16 x i16> %res
876}
877
878define <16 x i16>@test_int_x86_avx512_mask_pshufl_w_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) {
879; X86-LABEL: test_int_x86_avx512_mask_pshufl_w_256:
880; X86:       # %bb.0:
881; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
882; X86-NEXT:    vpshuflw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x70,0xc8,0x03]
883; X86-NEXT:    # ymm1 {%k1} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15]
884; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
885; X86-NEXT:    retl # encoding: [0xc3]
886;
887; X64-LABEL: test_int_x86_avx512_mask_pshufl_w_256:
888; X64:       # %bb.0:
889; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
890; X64-NEXT:    vpshuflw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x70,0xc8,0x03]
891; X64-NEXT:    # ymm1 {%k1} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15]
892; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
893; X64-NEXT:    retq # encoding: [0xc3]
894  %res = call <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3)
895  ret <16 x i16> %res
896}
897
898define <16 x i16>@test_int_x86_avx512_maskz_pshufl_w_256(<16 x i16> %x0, i16 %x3) {
899; X86-LABEL: test_int_x86_avx512_maskz_pshufl_w_256:
900; X86:       # %bb.0:
901; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
902; X86-NEXT:    vpshuflw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x70,0xc0,0x03]
903; X86-NEXT:    # ymm0 {%k1} {z} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15]
904; X86-NEXT:    retl # encoding: [0xc3]
905;
906; X64-LABEL: test_int_x86_avx512_maskz_pshufl_w_256:
907; X64:       # %bb.0:
908; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
909; X64-NEXT:    vpshuflw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x70,0xc0,0x03]
910; X64-NEXT:    # ymm0 {%k1} {z} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15]
911; X64-NEXT:    retq # encoding: [0xc3]
912  %res = call <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3)
913  ret <16 x i16> %res
914}
915
916define i32 @test_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b) {
917; CHECK-LABEL: test_pcmpeq_b_256:
918; CHECK:       # %bb.0:
919; CHECK-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
920; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
921; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
922; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
923  %res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1)
924  ret i32 %res
925}
926
927define i32 @test_mask_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
928; X86-LABEL: test_mask_pcmpeq_b_256:
929; X86:       # %bb.0:
930; X86-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
931; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
932; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04]
933; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
934; X86-NEXT:    retl # encoding: [0xc3]
935;
936; X64-LABEL: test_mask_pcmpeq_b_256:
937; X64:       # %bb.0:
938; X64-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
939; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
940; X64-NEXT:    andl %edi, %eax # encoding: [0x21,0xf8]
941; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
942; X64-NEXT:    retq # encoding: [0xc3]
943  %res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask)
944  ret i32 %res
945}
946
947declare i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8>, <32 x i8>, i32)
948
949define i16 @test_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b) {
950; CHECK-LABEL: test_pcmpeq_w_256:
951; CHECK:       # %bb.0:
952; CHECK-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1]
953; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
954; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
955; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
956; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
957  %res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1)
958  ret i16 %res
959}
960
961define i16 @test_mask_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
962; X86-LABEL: test_mask_pcmpeq_w_256:
963; X86:       # %bb.0:
964; X86-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1]
965; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
966; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax # encoding: [0x66,0x23,0x44,0x24,0x04]
967; X86-NEXT:    # kill: def $ax killed $ax killed $eax
968; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
969; X86-NEXT:    retl # encoding: [0xc3]
970;
971; X64-LABEL: test_mask_pcmpeq_w_256:
972; X64:       # %bb.0:
973; X64-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1]
974; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
975; X64-NEXT:    andl %edi, %eax # encoding: [0x21,0xf8]
976; X64-NEXT:    # kill: def $ax killed $ax killed $eax
977; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
978; X64-NEXT:    retq # encoding: [0xc3]
979  %res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask)
980  ret i16 %res
981}
982
983declare i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16>, <16 x i16>, i16)
984
985define i32 @test_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b) {
986; CHECK-LABEL: test_pcmpgt_b_256:
987; CHECK:       # %bb.0:
988; CHECK-NEXT:    vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1]
989; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
990; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
991; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
992  %res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1)
993  ret i32 %res
994}
995
996define i32 @test_mask_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
997; X86-LABEL: test_mask_pcmpgt_b_256:
998; X86:       # %bb.0:
999; X86-NEXT:    vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1]
1000; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1001; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04]
1002; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1003; X86-NEXT:    retl # encoding: [0xc3]
1004;
1005; X64-LABEL: test_mask_pcmpgt_b_256:
1006; X64:       # %bb.0:
1007; X64-NEXT:    vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1]
1008; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1009; X64-NEXT:    andl %edi, %eax # encoding: [0x21,0xf8]
1010; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1011; X64-NEXT:    retq # encoding: [0xc3]
1012  %res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask)
1013  ret i32 %res
1014}
1015
1016declare i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8>, <32 x i8>, i32)
1017
1018define i16 @test_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b) {
1019; CHECK-LABEL: test_pcmpgt_w_256:
1020; CHECK:       # %bb.0:
1021; CHECK-NEXT:    vpcmpgtw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x65,0xc1]
1022; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1023; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
1024; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1025; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1026  %res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1)
1027  ret i16 %res
1028}
1029
1030define i16 @test_mask_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
1031; X86-LABEL: test_mask_pcmpgt_w_256:
1032; X86:       # %bb.0:
1033; X86-NEXT:    vpcmpgtw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x65,0xc1]
1034; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1035; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax # encoding: [0x66,0x23,0x44,0x24,0x04]
1036; X86-NEXT:    # kill: def $ax killed $ax killed $eax
1037; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1038; X86-NEXT:    retl # encoding: [0xc3]
1039;
1040; X64-LABEL: test_mask_pcmpgt_w_256:
1041; X64:       # %bb.0:
1042; X64-NEXT:    vpcmpgtw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x65,0xc1]
1043; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1044; X64-NEXT:    andl %edi, %eax # encoding: [0x21,0xf8]
1045; X64-NEXT:    # kill: def $ax killed $ax killed $eax
1046; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1047; X64-NEXT:    retq # encoding: [0xc3]
1048  %res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask)
1049  ret i16 %res
1050}
1051
1052declare i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16>, <16 x i16>, i16)
1053
1054define i16 @test_pcmpeq_b_128(<16 x i8> %a, <16 x i8> %b) {
1055; CHECK-LABEL: test_pcmpeq_b_128:
1056; CHECK:       # %bb.0:
1057; CHECK-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1]
1058; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1059; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
1060; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1061  %res = call i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8> %a, <16 x i8> %b, i16 -1)
1062  ret i16 %res
1063}
1064
1065define i16 @test_mask_pcmpeq_b_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
1066; X86-LABEL: test_mask_pcmpeq_b_128:
1067; X86:       # %bb.0:
1068; X86-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1]
1069; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1070; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax # encoding: [0x66,0x23,0x44,0x24,0x04]
1071; X86-NEXT:    # kill: def $ax killed $ax killed $eax
1072; X86-NEXT:    retl # encoding: [0xc3]
1073;
1074; X64-LABEL: test_mask_pcmpeq_b_128:
1075; X64:       # %bb.0:
1076; X64-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1]
1077; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1078; X64-NEXT:    andl %edi, %eax # encoding: [0x21,0xf8]
1079; X64-NEXT:    # kill: def $ax killed $ax killed $eax
1080; X64-NEXT:    retq # encoding: [0xc3]
1081  %res = call i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8> %a, <16 x i8> %b, i16 %mask)
1082  ret i16 %res
1083}
1084
1085declare i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8>, <16 x i8>, i16)
1086
1087define i8 @test_pcmpeq_w_128(<8 x i16> %a, <8 x i16> %b) {
1088; CHECK-LABEL: test_pcmpeq_w_128:
1089; CHECK:       # %bb.0:
1090; CHECK-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
1091; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1092; CHECK-NEXT:    # kill: def $al killed $al killed $eax
1093; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1094  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16> %a, <8 x i16> %b, i8 -1)
1095  ret i8 %res
1096}
1097
1098define i8 @test_mask_pcmpeq_w_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
1099; X86-LABEL: test_mask_pcmpeq_w_128:
1100; X86:       # %bb.0:
1101; X86-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
1102; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1103; X86-NEXT:    andb {{[0-9]+}}(%esp), %al # encoding: [0x22,0x44,0x24,0x04]
1104; X86-NEXT:    # kill: def $al killed $al killed $eax
1105; X86-NEXT:    retl # encoding: [0xc3]
1106;
1107; X64-LABEL: test_mask_pcmpeq_w_128:
1108; X64:       # %bb.0:
1109; X64-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
1110; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1111; X64-NEXT:    andb %dil, %al # encoding: [0x40,0x20,0xf8]
1112; X64-NEXT:    # kill: def $al killed $al killed $eax
1113; X64-NEXT:    retq # encoding: [0xc3]
1114  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16> %a, <8 x i16> %b, i8 %mask)
1115  ret i8 %res
1116}
1117
1118declare i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16>, <8 x i16>, i8)
1119
1120define i16 @test_pcmpgt_b_128(<16 x i8> %a, <16 x i8> %b) {
1121; CHECK-LABEL: test_pcmpgt_b_128:
1122; CHECK:       # %bb.0:
1123; CHECK-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1]
1124; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1125; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
1126; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1127  %res = call i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8> %a, <16 x i8> %b, i16 -1)
1128  ret i16 %res
1129}
1130
1131define i16 @test_mask_pcmpgt_b_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
1132; X86-LABEL: test_mask_pcmpgt_b_128:
1133; X86:       # %bb.0:
1134; X86-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1]
1135; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1136; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax # encoding: [0x66,0x23,0x44,0x24,0x04]
1137; X86-NEXT:    # kill: def $ax killed $ax killed $eax
1138; X86-NEXT:    retl # encoding: [0xc3]
1139;
1140; X64-LABEL: test_mask_pcmpgt_b_128:
1141; X64:       # %bb.0:
1142; X64-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1]
1143; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1144; X64-NEXT:    andl %edi, %eax # encoding: [0x21,0xf8]
1145; X64-NEXT:    # kill: def $ax killed $ax killed $eax
1146; X64-NEXT:    retq # encoding: [0xc3]
1147  %res = call i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8> %a, <16 x i8> %b, i16 %mask)
1148  ret i16 %res
1149}
1150
1151declare i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8>, <16 x i8>, i16)
1152
1153define i8 @test_pcmpgt_w_128(<8 x i16> %a, <8 x i16> %b) {
1154; CHECK-LABEL: test_pcmpgt_w_128:
1155; CHECK:       # %bb.0:
1156; CHECK-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1]
1157; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1158; CHECK-NEXT:    # kill: def $al killed $al killed $eax
1159; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1160  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16> %a, <8 x i16> %b, i8 -1)
1161  ret i8 %res
1162}
1163
1164define i8 @test_mask_pcmpgt_w_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
1165; X86-LABEL: test_mask_pcmpgt_w_128:
1166; X86:       # %bb.0:
1167; X86-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1]
1168; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1169; X86-NEXT:    andb {{[0-9]+}}(%esp), %al # encoding: [0x22,0x44,0x24,0x04]
1170; X86-NEXT:    # kill: def $al killed $al killed $eax
1171; X86-NEXT:    retl # encoding: [0xc3]
1172;
1173; X64-LABEL: test_mask_pcmpgt_w_128:
1174; X64:       # %bb.0:
1175; X64-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1]
1176; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1177; X64-NEXT:    andb %dil, %al # encoding: [0x40,0x20,0xf8]
1178; X64-NEXT:    # kill: def $al killed $al killed $eax
1179; X64-NEXT:    retq # encoding: [0xc3]
1180  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16> %a, <8 x i16> %b, i8 %mask)
1181  ret i8 %res
1182}
1183
1184declare i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16>, <8 x i16>, i8)
1185
1186declare <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
1187
1188define <16 x i8>@test_int_x86_avx512_punpckhb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) {
1189; CHECK-LABEL: test_int_x86_avx512_punpckhb_w_128:
1190; CHECK:       # %bb.0:
1191; CHECK-NEXT:    vpunpckhbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x68,0xc1]
1192; CHECK-NEXT:    # xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
1193; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1194  %res = call <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
1195  ret <16 x i8> %res
1196}
1197
1198define <16 x i8>@test_int_x86_avx512_mask_punpckhb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
1199; X86-LABEL: test_int_x86_avx512_mask_punpckhb_w_128:
1200; X86:       # %bb.0:
1201; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1202; X86-NEXT:    vpunpckhbw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x68,0xd1]
1203; X86-NEXT:    # xmm2 {%k1} = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
1204; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1205; X86-NEXT:    retl # encoding: [0xc3]
1206;
1207; X64-LABEL: test_int_x86_avx512_mask_punpckhb_w_128:
1208; X64:       # %bb.0:
1209; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1210; X64-NEXT:    vpunpckhbw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x68,0xd1]
1211; X64-NEXT:    # xmm2 {%k1} = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
1212; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1213; X64-NEXT:    retq # encoding: [0xc3]
1214  %res = call <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
1215  ret <16 x i8> %res
1216}
1217
1218declare <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
1219
1220define <16 x i8>@test_int_x86_avx512_ask_punpcklb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) {
1221; CHECK-LABEL: test_int_x86_avx512_ask_punpcklb_w_128:
1222; CHECK:       # %bb.0:
1223; CHECK-NEXT:    vpunpcklbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x60,0xc1]
1224; CHECK-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1225; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1226  %res = call <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
1227  ret <16 x i8> %res
1228}
1229
1230define <16 x i8>@test_int_x86_avx512_mask_punpcklb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
1231; X86-LABEL: test_int_x86_avx512_mask_punpcklb_w_128:
1232; X86:       # %bb.0:
1233; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1234; X86-NEXT:    vpunpcklbw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x60,0xd1]
1235; X86-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1236; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1237; X86-NEXT:    retl # encoding: [0xc3]
1238;
1239; X64-LABEL: test_int_x86_avx512_mask_punpcklb_w_128:
1240; X64:       # %bb.0:
1241; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1242; X64-NEXT:    vpunpcklbw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x60,0xd1]
1243; X64-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1244; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1245; X64-NEXT:    retq # encoding: [0xc3]
1246  %res = call <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
1247  ret <16 x i8> %res
1248}
1249
1250declare <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
1251
1252define <32 x i8>@test_int_x86_avx512_punpckhb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) {
1253; CHECK-LABEL: test_int_x86_avx512_punpckhb_w_256:
1254; CHECK:       # %bb.0:
1255; CHECK-NEXT:    vpunpckhbw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x68,0xc1]
1256; CHECK-NEXT:    # ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
1257; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1258  %res = call <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
1259  ret <32 x i8> %res
1260}
1261
1262define <32 x i8>@test_int_x86_avx512_mask_punpckhb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
1263; X86-LABEL: test_int_x86_avx512_mask_punpckhb_w_256:
1264; X86:       # %bb.0:
1265; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1266; X86-NEXT:    vpunpckhbw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x68,0xd1]
1267; X86-NEXT:    # ymm2 {%k1} = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
1268; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1269; X86-NEXT:    retl # encoding: [0xc3]
1270;
1271; X64-LABEL: test_int_x86_avx512_mask_punpckhb_w_256:
1272; X64:       # %bb.0:
1273; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1274; X64-NEXT:    vpunpckhbw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x68,0xd1]
1275; X64-NEXT:    # ymm2 {%k1} = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
1276; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1277; X64-NEXT:    retq # encoding: [0xc3]
1278  %res = call <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
1279  ret <32 x i8> %res
1280}
1281
1282declare <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
1283
1284define <32 x i8>@test_int_x86_avx512_punpcklb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) {
1285; CHECK-LABEL: test_int_x86_avx512_punpcklb_w_256:
1286; CHECK:       # %bb.0:
1287; CHECK-NEXT:    vpunpcklbw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x60,0xc1]
1288; CHECK-NEXT:    # ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
1289; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1290  %res = call <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
1291  ret <32 x i8> %res
1292}
1293
1294define <32 x i8>@test_int_x86_avx512_mask_punpcklb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
1295; X86-LABEL: test_int_x86_avx512_mask_punpcklb_w_256:
1296; X86:       # %bb.0:
1297; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1298; X86-NEXT:    vpunpcklbw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x60,0xd1]
1299; X86-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
1300; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1301; X86-NEXT:    retl # encoding: [0xc3]
1302;
1303; X64-LABEL: test_int_x86_avx512_mask_punpcklb_w_256:
1304; X64:       # %bb.0:
1305; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1306; X64-NEXT:    vpunpcklbw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x60,0xd1]
1307; X64-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
1308; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1309; X64-NEXT:    retq # encoding: [0xc3]
1310  %res = call <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
1311  ret <32 x i8> %res
1312}
1313
1314declare <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1315
1316define <8 x i16>@test_int_x86_avx512_punpcklw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
1317; CHECK-LABEL: test_int_x86_avx512_punpcklw_d_128:
1318; CHECK:       # %bb.0:
1319; CHECK-NEXT:    vpunpcklwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x61,0xc1]
1320; CHECK-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1321; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1322  %res = call <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
1323  ret <8 x i16> %res
1324}
1325
1326define <8 x i16>@test_int_x86_avx512_mask_punpcklw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
1327; X86-LABEL: test_int_x86_avx512_mask_punpcklw_d_128:
1328; X86:       # %bb.0:
1329; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1330; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1331; X86-NEXT:    vpunpcklwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x61,0xd1]
1332; X86-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1333; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1334; X86-NEXT:    retl # encoding: [0xc3]
1335;
1336; X64-LABEL: test_int_x86_avx512_mask_punpcklw_d_128:
1337; X64:       # %bb.0:
1338; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1339; X64-NEXT:    vpunpcklwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x61,0xd1]
1340; X64-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1341; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1342; X64-NEXT:    retq # encoding: [0xc3]
1343  %res = call <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
1344  ret <8 x i16> %res
1345}
1346
1347declare <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1348
1349define <8 x i16>@test_int_x86_avx512_punpckhw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
1350; CHECK-LABEL: test_int_x86_avx512_punpckhw_d_128:
1351; CHECK:       # %bb.0:
1352; CHECK-NEXT:    vpunpckhwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x69,0xc1]
1353; CHECK-NEXT:    # xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1354; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1355  %res = call <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
1356  ret <8 x i16> %res
1357}
1358
1359define <8 x i16>@test_int_x86_avx512_mask_punpckhw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
1360; X86-LABEL: test_int_x86_avx512_mask_punpckhw_d_128:
1361; X86:       # %bb.0:
1362; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1363; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1364; X86-NEXT:    vpunpckhwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x69,0xd1]
1365; X86-NEXT:    # xmm2 {%k1} = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1366; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1367; X86-NEXT:    retl # encoding: [0xc3]
1368;
1369; X64-LABEL: test_int_x86_avx512_mask_punpckhw_d_128:
1370; X64:       # %bb.0:
1371; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1372; X64-NEXT:    vpunpckhwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x69,0xd1]
1373; X64-NEXT:    # xmm2 {%k1} = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1374; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1375; X64-NEXT:    retq # encoding: [0xc3]
1376  %res = call <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
1377  ret <8 x i16> %res
1378}
1379
1380declare <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1381
1382define <16 x i16>@test_int_x86_avx512_punpcklw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
1383; CHECK-LABEL: test_int_x86_avx512_punpcklw_d_256:
1384; CHECK:       # %bb.0:
1385; CHECK-NEXT:    vpunpcklwd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x61,0xc1]
1386; CHECK-NEXT:    # ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
1387; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1388  %res = call <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
1389  ret <16 x i16> %res
1390}
1391
1392define <16 x i16>@test_int_x86_avx512_mask_punpcklw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
1393; X86-LABEL: test_int_x86_avx512_mask_punpcklw_d_256:
1394; X86:       # %bb.0:
1395; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1396; X86-NEXT:    vpunpcklwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x61,0xd1]
1397; X86-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
1398; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1399; X86-NEXT:    retl # encoding: [0xc3]
1400;
1401; X64-LABEL: test_int_x86_avx512_mask_punpcklw_d_256:
1402; X64:       # %bb.0:
1403; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1404; X64-NEXT:    vpunpcklwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x61,0xd1]
1405; X64-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
1406; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1407; X64-NEXT:    retq # encoding: [0xc3]
1408  %res = call <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
1409  ret <16 x i16> %res
1410}
1411
1412declare <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1413
1414define <16 x i16>@test_int_x86_avx512_punpckhw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
1415; CHECK-LABEL: test_int_x86_avx512_punpckhw_d_256:
1416; CHECK:       # %bb.0:
1417; CHECK-NEXT:    vpunpckhwd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x69,0xc1]
1418; CHECK-NEXT:    # ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
1419; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1420  %res = call <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
1421  ret <16 x i16> %res
1422}
1423
1424define <16 x i16>@test_int_x86_avx512_mask_punpckhw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
1425; X86-LABEL: test_int_x86_avx512_mask_punpckhw_d_256:
1426; X86:       # %bb.0:
1427; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1428; X86-NEXT:    vpunpckhwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x69,0xd1]
1429; X86-NEXT:    # ymm2 {%k1} = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
1430; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1431; X86-NEXT:    retl # encoding: [0xc3]
1432;
1433; X64-LABEL: test_int_x86_avx512_mask_punpckhw_d_256:
1434; X64:       # %bb.0:
1435; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1436; X64-NEXT:    vpunpckhwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x69,0xd1]
1437; X64-NEXT:    # ymm2 {%k1} = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
1438; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1439; X64-NEXT:    retq # encoding: [0xc3]
1440  %res = call <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
1441  ret <16 x i16> %res
1442}
1443
1444define <8 x i16> @test_mask_add_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
1445; CHECK-LABEL: test_mask_add_epi16_rr_128:
1446; CHECK:       # %bb.0:
1447; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1]
1448; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1449  %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
1450  ret <8 x i16> %res
1451}
1452
1453define <8 x i16> @test_mask_add_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
1454; X86-LABEL: test_mask_add_epi16_rrk_128:
1455; X86:       # %bb.0:
1456; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1457; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1458; X86-NEXT:    vpaddw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfd,0xd1]
1459; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1460; X86-NEXT:    retl # encoding: [0xc3]
1461;
1462; X64-LABEL: test_mask_add_epi16_rrk_128:
1463; X64:       # %bb.0:
1464; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1465; X64-NEXT:    vpaddw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfd,0xd1]
1466; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1467; X64-NEXT:    retq # encoding: [0xc3]
1468  %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
1469  ret <8 x i16> %res
1470}
1471
1472define <8 x i16> @test_mask_add_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
1473; X86-LABEL: test_mask_add_epi16_rrkz_128:
1474; X86:       # %bb.0:
1475; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1476; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1477; X86-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfd,0xc1]
1478; X86-NEXT:    retl # encoding: [0xc3]
1479;
1480; X64-LABEL: test_mask_add_epi16_rrkz_128:
1481; X64:       # %bb.0:
1482; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1483; X64-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfd,0xc1]
1484; X64-NEXT:    retq # encoding: [0xc3]
1485  %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
1486  ret <8 x i16> %res
1487}
1488
1489define <8 x i16> @test_mask_add_epi16_rm_128(<8 x i16> %a, ptr %ptr_b) {
1490; X86-LABEL: test_mask_add_epi16_rm_128:
1491; X86:       # %bb.0:
1492; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1493; X86-NEXT:    vpaddw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0x00]
1494; X86-NEXT:    retl # encoding: [0xc3]
1495;
1496; X64-LABEL: test_mask_add_epi16_rm_128:
1497; X64:       # %bb.0:
1498; X64-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0x07]
1499; X64-NEXT:    retq # encoding: [0xc3]
1500  %b = load <8 x i16>, ptr %ptr_b
1501  %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
1502  ret <8 x i16> %res
1503}
1504
1505define <8 x i16> @test_mask_add_epi16_rmk_128(<8 x i16> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) {
1506; X86-LABEL: test_mask_add_epi16_rmk_128:
1507; X86:       # %bb.0:
1508; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1509; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1510; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1511; X86-NEXT:    vpaddw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfd,0x08]
1512; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1513; X86-NEXT:    retl # encoding: [0xc3]
1514;
1515; X64-LABEL: test_mask_add_epi16_rmk_128:
1516; X64:       # %bb.0:
1517; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1518; X64-NEXT:    vpaddw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfd,0x0f]
1519; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1520; X64-NEXT:    retq # encoding: [0xc3]
1521  %b = load <8 x i16>, ptr %ptr_b
1522  %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
1523  ret <8 x i16> %res
1524}
1525
1526define <8 x i16> @test_mask_add_epi16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i8 %mask) {
1527; X86-LABEL: test_mask_add_epi16_rmkz_128:
1528; X86:       # %bb.0:
1529; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1530; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1531; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1532; X86-NEXT:    vpaddw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfd,0x00]
1533; X86-NEXT:    retl # encoding: [0xc3]
1534;
1535; X64-LABEL: test_mask_add_epi16_rmkz_128:
1536; X64:       # %bb.0:
1537; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1538; X64-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfd,0x07]
1539; X64-NEXT:    retq # encoding: [0xc3]
1540  %b = load <8 x i16>, ptr %ptr_b
1541  %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
1542  ret <8 x i16> %res
1543}
1544
1545declare <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1546
1547define <16 x i16> @test_mask_add_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
1548; CHECK-LABEL: test_mask_add_epi16_rr_256:
1549; CHECK:       # %bb.0:
1550; CHECK-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc1]
1551; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1552  %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
1553  ret <16 x i16> %res
1554}
1555
1556define <16 x i16> @test_mask_add_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
1557; X86-LABEL: test_mask_add_epi16_rrk_256:
1558; X86:       # %bb.0:
1559; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1560; X86-NEXT:    vpaddw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfd,0xd1]
1561; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1562; X86-NEXT:    retl # encoding: [0xc3]
1563;
1564; X64-LABEL: test_mask_add_epi16_rrk_256:
1565; X64:       # %bb.0:
1566; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1567; X64-NEXT:    vpaddw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfd,0xd1]
1568; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1569; X64-NEXT:    retq # encoding: [0xc3]
1570  %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
1571  ret <16 x i16> %res
1572}
1573
1574define <16 x i16> @test_mask_add_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
1575; X86-LABEL: test_mask_add_epi16_rrkz_256:
1576; X86:       # %bb.0:
1577; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1578; X86-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0xc1]
1579; X86-NEXT:    retl # encoding: [0xc3]
1580;
1581; X64-LABEL: test_mask_add_epi16_rrkz_256:
1582; X64:       # %bb.0:
1583; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1584; X64-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0xc1]
1585; X64-NEXT:    retq # encoding: [0xc3]
1586  %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
1587  ret <16 x i16> %res
1588}
1589
1590define <16 x i16> @test_mask_add_epi16_rm_256(<16 x i16> %a, ptr %ptr_b) {
1591; X86-LABEL: test_mask_add_epi16_rm_256:
1592; X86:       # %bb.0:
1593; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1594; X86-NEXT:    vpaddw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0x00]
1595; X86-NEXT:    retl # encoding: [0xc3]
1596;
1597; X64-LABEL: test_mask_add_epi16_rm_256:
1598; X64:       # %bb.0:
1599; X64-NEXT:    vpaddw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0x07]
1600; X64-NEXT:    retq # encoding: [0xc3]
1601  %b = load <16 x i16>, ptr %ptr_b
1602  %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
1603  ret <16 x i16> %res
1604}
1605
1606define <16 x i16> @test_mask_add_epi16_rmk_256(<16 x i16> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) {
1607; X86-LABEL: test_mask_add_epi16_rmk_256:
1608; X86:       # %bb.0:
1609; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1610; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1611; X86-NEXT:    vpaddw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfd,0x08]
1612; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1613; X86-NEXT:    retl # encoding: [0xc3]
1614;
1615; X64-LABEL: test_mask_add_epi16_rmk_256:
1616; X64:       # %bb.0:
1617; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1618; X64-NEXT:    vpaddw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfd,0x0f]
1619; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1620; X64-NEXT:    retq # encoding: [0xc3]
1621  %b = load <16 x i16>, ptr %ptr_b
1622  %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
1623  ret <16 x i16> %res
1624}
1625
1626define <16 x i16> @test_mask_add_epi16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i16 %mask) {
1627; X86-LABEL: test_mask_add_epi16_rmkz_256:
1628; X86:       # %bb.0:
1629; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1630; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1631; X86-NEXT:    vpaddw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0x00]
1632; X86-NEXT:    retl # encoding: [0xc3]
1633;
1634; X64-LABEL: test_mask_add_epi16_rmkz_256:
1635; X64:       # %bb.0:
1636; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1637; X64-NEXT:    vpaddw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0x07]
1638; X64-NEXT:    retq # encoding: [0xc3]
1639  %b = load <16 x i16>, ptr %ptr_b
1640  %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
1641  ret <16 x i16> %res
1642}
1643
1644declare <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1645
1646define <8 x i16> @test_mask_sub_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
1647; CHECK-LABEL: test_mask_sub_epi16_rr_128:
1648; CHECK:       # %bb.0:
1649; CHECK-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf9,0xc1]
1650; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1651  %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
1652  ret <8 x i16> %res
1653}
1654
1655define <8 x i16> @test_mask_sub_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
1656; X86-LABEL: test_mask_sub_epi16_rrk_128:
1657; X86:       # %bb.0:
1658; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1659; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1660; X86-NEXT:    vpsubw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf9,0xd1]
1661; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1662; X86-NEXT:    retl # encoding: [0xc3]
1663;
1664; X64-LABEL: test_mask_sub_epi16_rrk_128:
1665; X64:       # %bb.0:
1666; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1667; X64-NEXT:    vpsubw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf9,0xd1]
1668; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1669; X64-NEXT:    retq # encoding: [0xc3]
1670  %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
1671  ret <8 x i16> %res
1672}
1673
1674define <8 x i16> @test_mask_sub_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
1675; X86-LABEL: test_mask_sub_epi16_rrkz_128:
1676; X86:       # %bb.0:
1677; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1678; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1679; X86-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf9,0xc1]
1680; X86-NEXT:    retl # encoding: [0xc3]
1681;
1682; X64-LABEL: test_mask_sub_epi16_rrkz_128:
1683; X64:       # %bb.0:
1684; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1685; X64-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf9,0xc1]
1686; X64-NEXT:    retq # encoding: [0xc3]
1687  %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
1688  ret <8 x i16> %res
1689}
1690
1691define <8 x i16> @test_mask_sub_epi16_rm_128(<8 x i16> %a, ptr %ptr_b) {
1692; X86-LABEL: test_mask_sub_epi16_rm_128:
1693; X86:       # %bb.0:
1694; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1695; X86-NEXT:    vpsubw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf9,0x00]
1696; X86-NEXT:    retl # encoding: [0xc3]
1697;
1698; X64-LABEL: test_mask_sub_epi16_rm_128:
1699; X64:       # %bb.0:
1700; X64-NEXT:    vpsubw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf9,0x07]
1701; X64-NEXT:    retq # encoding: [0xc3]
1702  %b = load <8 x i16>, ptr %ptr_b
1703  %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
1704  ret <8 x i16> %res
1705}
1706
1707define <8 x i16> @test_mask_sub_epi16_rmk_128(<8 x i16> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) {
1708; X86-LABEL: test_mask_sub_epi16_rmk_128:
1709; X86:       # %bb.0:
1710; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1711; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1712; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1713; X86-NEXT:    vpsubw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf9,0x08]
1714; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1715; X86-NEXT:    retl # encoding: [0xc3]
1716;
1717; X64-LABEL: test_mask_sub_epi16_rmk_128:
1718; X64:       # %bb.0:
1719; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1720; X64-NEXT:    vpsubw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf9,0x0f]
1721; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1722; X64-NEXT:    retq # encoding: [0xc3]
1723  %b = load <8 x i16>, ptr %ptr_b
1724  %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
1725  ret <8 x i16> %res
1726}
1727
1728define <8 x i16> @test_mask_sub_epi16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i8 %mask) {
1729; X86-LABEL: test_mask_sub_epi16_rmkz_128:
1730; X86:       # %bb.0:
1731; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1732; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1733; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1734; X86-NEXT:    vpsubw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf9,0x00]
1735; X86-NEXT:    retl # encoding: [0xc3]
1736;
1737; X64-LABEL: test_mask_sub_epi16_rmkz_128:
1738; X64:       # %bb.0:
1739; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1740; X64-NEXT:    vpsubw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf9,0x07]
1741; X64-NEXT:    retq # encoding: [0xc3]
1742  %b = load <8 x i16>, ptr %ptr_b
1743  %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
1744  ret <8 x i16> %res
1745}
1746
1747declare <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1748
1749define <16 x i16> @test_mask_sub_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
1750; CHECK-LABEL: test_mask_sub_epi16_rr_256:
1751; CHECK:       # %bb.0:
1752; CHECK-NEXT:    vpsubw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf9,0xc1]
1753; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1754  %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
1755  ret <16 x i16> %res
1756}
1757
1758define <16 x i16> @test_mask_sub_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
1759; X86-LABEL: test_mask_sub_epi16_rrk_256:
1760; X86:       # %bb.0:
1761; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1762; X86-NEXT:    vpsubw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf9,0xd1]
1763; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1764; X86-NEXT:    retl # encoding: [0xc3]
1765;
1766; X64-LABEL: test_mask_sub_epi16_rrk_256:
1767; X64:       # %bb.0:
1768; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1769; X64-NEXT:    vpsubw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf9,0xd1]
1770; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1771; X64-NEXT:    retq # encoding: [0xc3]
1772  %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
1773  ret <16 x i16> %res
1774}
1775
1776define <16 x i16> @test_mask_sub_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
1777; X86-LABEL: test_mask_sub_epi16_rrkz_256:
1778; X86:       # %bb.0:
1779; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1780; X86-NEXT:    vpsubw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0xc1]
1781; X86-NEXT:    retl # encoding: [0xc3]
1782;
1783; X64-LABEL: test_mask_sub_epi16_rrkz_256:
1784; X64:       # %bb.0:
1785; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1786; X64-NEXT:    vpsubw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0xc1]
1787; X64-NEXT:    retq # encoding: [0xc3]
1788  %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
1789  ret <16 x i16> %res
1790}
1791
1792define <16 x i16> @test_mask_sub_epi16_rm_256(<16 x i16> %a, ptr %ptr_b) {
1793; X86-LABEL: test_mask_sub_epi16_rm_256:
1794; X86:       # %bb.0:
1795; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1796; X86-NEXT:    vpsubw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf9,0x00]
1797; X86-NEXT:    retl # encoding: [0xc3]
1798;
1799; X64-LABEL: test_mask_sub_epi16_rm_256:
1800; X64:       # %bb.0:
1801; X64-NEXT:    vpsubw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf9,0x07]
1802; X64-NEXT:    retq # encoding: [0xc3]
1803  %b = load <16 x i16>, ptr %ptr_b
1804  %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
1805  ret <16 x i16> %res
1806}
1807
1808define <16 x i16> @test_mask_sub_epi16_rmk_256(<16 x i16> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) {
1809; X86-LABEL: test_mask_sub_epi16_rmk_256:
1810; X86:       # %bb.0:
1811; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1812; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1813; X86-NEXT:    vpsubw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf9,0x08]
1814; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1815; X86-NEXT:    retl # encoding: [0xc3]
1816;
1817; X64-LABEL: test_mask_sub_epi16_rmk_256:
1818; X64:       # %bb.0:
1819; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1820; X64-NEXT:    vpsubw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf9,0x0f]
1821; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1822; X64-NEXT:    retq # encoding: [0xc3]
1823  %b = load <16 x i16>, ptr %ptr_b
1824  %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
1825  ret <16 x i16> %res
1826}
1827
1828define <16 x i16> @test_mask_sub_epi16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i16 %mask) {
1829; X86-LABEL: test_mask_sub_epi16_rmkz_256:
1830; X86:       # %bb.0:
1831; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1832; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1833; X86-NEXT:    vpsubw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0x00]
1834; X86-NEXT:    retl # encoding: [0xc3]
1835;
1836; X64-LABEL: test_mask_sub_epi16_rmkz_256:
1837; X64:       # %bb.0:
1838; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1839; X64-NEXT:    vpsubw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0x07]
1840; X64-NEXT:    retq # encoding: [0xc3]
1841  %b = load <16 x i16>, ptr %ptr_b
1842  %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
1843  ret <16 x i16> %res
1844}
1845
1846declare <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1847
1848define <32 x i16> @test_mask_add_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1849; CHECK-LABEL: test_mask_add_epi16_rr_512:
1850; CHECK:       # %bb.0:
1851; CHECK-NEXT:    vpaddw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1]
1852; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1853  %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1854  ret <32 x i16> %res
1855}
1856
1857define <32 x i16> @test_mask_add_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
1858; X86-LABEL: test_mask_add_epi16_rrk_512:
1859; X86:       # %bb.0:
1860; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1861; X86-NEXT:    vpaddw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xfd,0xd1]
1862; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1863; X86-NEXT:    retl # encoding: [0xc3]
1864;
1865; X64-LABEL: test_mask_add_epi16_rrk_512:
1866; X64:       # %bb.0:
1867; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1868; X64-NEXT:    vpaddw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xfd,0xd1]
1869; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1870; X64-NEXT:    retq # encoding: [0xc3]
1871  %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1872  ret <32 x i16> %res
1873}
1874
1875define <32 x i16> @test_mask_add_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
1876; X86-LABEL: test_mask_add_epi16_rrkz_512:
1877; X86:       # %bb.0:
1878; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1879; X86-NEXT:    vpaddw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0xc1]
1880; X86-NEXT:    retl # encoding: [0xc3]
1881;
1882; X64-LABEL: test_mask_add_epi16_rrkz_512:
1883; X64:       # %bb.0:
1884; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1885; X64-NEXT:    vpaddw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0xc1]
1886; X64-NEXT:    retq # encoding: [0xc3]
1887  %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1888  ret <32 x i16> %res
1889}
1890
1891define <32 x i16> @test_mask_add_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) {
1892; X86-LABEL: test_mask_add_epi16_rm_512:
1893; X86:       # %bb.0:
1894; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1895; X86-NEXT:    vpaddw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0x00]
1896; X86-NEXT:    retl # encoding: [0xc3]
1897;
1898; X64-LABEL: test_mask_add_epi16_rm_512:
1899; X64:       # %bb.0:
1900; X64-NEXT:    vpaddw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0x07]
1901; X64-NEXT:    retq # encoding: [0xc3]
1902  %b = load <32 x i16>, ptr %ptr_b
1903  %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1904  ret <32 x i16> %res
1905}
1906
1907define <32 x i16> @test_mask_add_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) {
1908; X86-LABEL: test_mask_add_epi16_rmk_512:
1909; X86:       # %bb.0:
1910; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1911; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1912; X86-NEXT:    vpaddw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xfd,0x08]
1913; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1914; X86-NEXT:    retl # encoding: [0xc3]
1915;
1916; X64-LABEL: test_mask_add_epi16_rmk_512:
1917; X64:       # %bb.0:
1918; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1919; X64-NEXT:    vpaddw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xfd,0x0f]
1920; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1921; X64-NEXT:    retq # encoding: [0xc3]
1922  %b = load <32 x i16>, ptr %ptr_b
1923  %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1924  ret <32 x i16> %res
1925}
1926
1927define <32 x i16> @test_mask_add_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i32 %mask) {
1928; X86-LABEL: test_mask_add_epi16_rmkz_512:
1929; X86:       # %bb.0:
1930; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1931; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1932; X86-NEXT:    vpaddw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0x00]
1933; X86-NEXT:    retl # encoding: [0xc3]
1934;
1935; X64-LABEL: test_mask_add_epi16_rmkz_512:
1936; X64:       # %bb.0:
1937; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1938; X64-NEXT:    vpaddw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0x07]
1939; X64-NEXT:    retq # encoding: [0xc3]
1940  %b = load <32 x i16>, ptr %ptr_b
1941  %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1942  ret <32 x i16> %res
1943}
1944
1945declare <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1946
1947define <32 x i16> @test_mask_sub_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1948; CHECK-LABEL: test_mask_sub_epi16_rr_512:
1949; CHECK:       # %bb.0:
1950; CHECK-NEXT:    vpsubw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf9,0xc1]
1951; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1952  %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1953  ret <32 x i16> %res
1954}
1955
1956define <32 x i16> @test_mask_sub_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
1957; X86-LABEL: test_mask_sub_epi16_rrk_512:
1958; X86:       # %bb.0:
1959; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1960; X86-NEXT:    vpsubw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf9,0xd1]
1961; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1962; X86-NEXT:    retl # encoding: [0xc3]
1963;
1964; X64-LABEL: test_mask_sub_epi16_rrk_512:
1965; X64:       # %bb.0:
1966; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1967; X64-NEXT:    vpsubw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf9,0xd1]
1968; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1969; X64-NEXT:    retq # encoding: [0xc3]
1970  %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1971  ret <32 x i16> %res
1972}
1973
1974define <32 x i16> @test_mask_sub_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
1975; X86-LABEL: test_mask_sub_epi16_rrkz_512:
1976; X86:       # %bb.0:
1977; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1978; X86-NEXT:    vpsubw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0xc1]
1979; X86-NEXT:    retl # encoding: [0xc3]
1980;
1981; X64-LABEL: test_mask_sub_epi16_rrkz_512:
1982; X64:       # %bb.0:
1983; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1984; X64-NEXT:    vpsubw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0xc1]
1985; X64-NEXT:    retq # encoding: [0xc3]
1986  %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1987  ret <32 x i16> %res
1988}
1989
1990define <32 x i16> @test_mask_sub_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) {
1991; X86-LABEL: test_mask_sub_epi16_rm_512:
1992; X86:       # %bb.0:
1993; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1994; X86-NEXT:    vpsubw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf9,0x00]
1995; X86-NEXT:    retl # encoding: [0xc3]
1996;
1997; X64-LABEL: test_mask_sub_epi16_rm_512:
1998; X64:       # %bb.0:
1999; X64-NEXT:    vpsubw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf9,0x07]
2000; X64-NEXT:    retq # encoding: [0xc3]
2001  %b = load <32 x i16>, ptr %ptr_b
2002  %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
2003  ret <32 x i16> %res
2004}
2005
2006define <32 x i16> @test_mask_sub_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) {
2007; X86-LABEL: test_mask_sub_epi16_rmk_512:
2008; X86:       # %bb.0:
2009; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2010; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
2011; X86-NEXT:    vpsubw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf9,0x08]
2012; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2013; X86-NEXT:    retl # encoding: [0xc3]
2014;
2015; X64-LABEL: test_mask_sub_epi16_rmk_512:
2016; X64:       # %bb.0:
2017; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
2018; X64-NEXT:    vpsubw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf9,0x0f]
2019; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2020; X64-NEXT:    retq # encoding: [0xc3]
2021  %b = load <32 x i16>, ptr %ptr_b
2022  %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
2023  ret <32 x i16> %res
2024}
2025
2026define <32 x i16> @test_mask_sub_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i32 %mask) {
2027; X86-LABEL: test_mask_sub_epi16_rmkz_512:
2028; X86:       # %bb.0:
2029; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2030; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
2031; X86-NEXT:    vpsubw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0x00]
2032; X86-NEXT:    retl # encoding: [0xc3]
2033;
2034; X64-LABEL: test_mask_sub_epi16_rmkz_512:
2035; X64:       # %bb.0:
2036; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
2037; X64-NEXT:    vpsubw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0x07]
2038; X64-NEXT:    retq # encoding: [0xc3]
2039  %b = load <32 x i16>, ptr %ptr_b
2040  %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
2041  ret <32 x i16> %res
2042}
2043
2044declare <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2045
2046define <32 x i16> @test_mask_mullo_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
2047; CHECK-LABEL: test_mask_mullo_epi16_rr_512:
2048; CHECK:       # %bb.0:
2049; CHECK-NEXT:    vpmullw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd5,0xc1]
2050; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2051  %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
2052  ret <32 x i16> %res
2053}
2054
2055define <32 x i16> @test_mask_mullo_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
2056; X86-LABEL: test_mask_mullo_epi16_rrk_512:
2057; X86:       # %bb.0:
2058; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2059; X86-NEXT:    vpmullw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd5,0xd1]
2060; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2061; X86-NEXT:    retl # encoding: [0xc3]
2062;
2063; X64-LABEL: test_mask_mullo_epi16_rrk_512:
2064; X64:       # %bb.0:
2065; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2066; X64-NEXT:    vpmullw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd5,0xd1]
2067; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2068; X64-NEXT:    retq # encoding: [0xc3]
2069  %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
2070  ret <32 x i16> %res
2071}
2072
2073define <32 x i16> @test_mask_mullo_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
2074; X86-LABEL: test_mask_mullo_epi16_rrkz_512:
2075; X86:       # %bb.0:
2076; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2077; X86-NEXT:    vpmullw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0xc1]
2078; X86-NEXT:    retl # encoding: [0xc3]
2079;
2080; X64-LABEL: test_mask_mullo_epi16_rrkz_512:
2081; X64:       # %bb.0:
2082; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2083; X64-NEXT:    vpmullw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0xc1]
2084; X64-NEXT:    retq # encoding: [0xc3]
2085  %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
2086  ret <32 x i16> %res
2087}
2088
2089define <32 x i16> @test_mask_mullo_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) {
2090; X86-LABEL: test_mask_mullo_epi16_rm_512:
2091; X86:       # %bb.0:
2092; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2093; X86-NEXT:    vpmullw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd5,0x00]
2094; X86-NEXT:    retl # encoding: [0xc3]
2095;
2096; X64-LABEL: test_mask_mullo_epi16_rm_512:
2097; X64:       # %bb.0:
2098; X64-NEXT:    vpmullw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd5,0x07]
2099; X64-NEXT:    retq # encoding: [0xc3]
2100  %b = load <32 x i16>, ptr %ptr_b
2101  %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
2102  ret <32 x i16> %res
2103}
2104
2105define <32 x i16> @test_mask_mullo_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) {
2106; X86-LABEL: test_mask_mullo_epi16_rmk_512:
2107; X86:       # %bb.0:
2108; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2109; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
2110; X86-NEXT:    vpmullw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd5,0x08]
2111; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2112; X86-NEXT:    retl # encoding: [0xc3]
2113;
2114; X64-LABEL: test_mask_mullo_epi16_rmk_512:
2115; X64:       # %bb.0:
2116; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
2117; X64-NEXT:    vpmullw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd5,0x0f]
2118; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2119; X64-NEXT:    retq # encoding: [0xc3]
2120  %b = load <32 x i16>, ptr %ptr_b
2121  %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
2122  ret <32 x i16> %res
2123}
2124
2125define <32 x i16> @test_mask_mullo_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i32 %mask) {
2126; X86-LABEL: test_mask_mullo_epi16_rmkz_512:
2127; X86:       # %bb.0:
2128; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2129; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
2130; X86-NEXT:    vpmullw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0x00]
2131; X86-NEXT:    retl # encoding: [0xc3]
2132;
2133; X64-LABEL: test_mask_mullo_epi16_rmkz_512:
2134; X64:       # %bb.0:
2135; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
2136; X64-NEXT:    vpmullw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0x07]
2137; X64-NEXT:    retq # encoding: [0xc3]
2138  %b = load <32 x i16>, ptr %ptr_b
2139  %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
2140  ret <32 x i16> %res
2141}
2142
2143declare <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2144
2145define <8 x i16> @test_mask_mullo_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
2146; CHECK-LABEL: test_mask_mullo_epi16_rr_128:
2147; CHECK:       # %bb.0:
2148; CHECK-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd5,0xc1]
2149; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2150  %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
2151  ret <8 x i16> %res
2152}
2153
2154define <8 x i16> @test_mask_mullo_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
2155; X86-LABEL: test_mask_mullo_epi16_rrk_128:
2156; X86:       # %bb.0:
2157; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2158; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2159; X86-NEXT:    vpmullw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd5,0xd1]
2160; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2161; X86-NEXT:    retl # encoding: [0xc3]
2162;
2163; X64-LABEL: test_mask_mullo_epi16_rrk_128:
2164; X64:       # %bb.0:
2165; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2166; X64-NEXT:    vpmullw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd5,0xd1]
2167; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2168; X64-NEXT:    retq # encoding: [0xc3]
2169  %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
2170  ret <8 x i16> %res
2171}
2172
2173define <8 x i16> @test_mask_mullo_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
2174; X86-LABEL: test_mask_mullo_epi16_rrkz_128:
2175; X86:       # %bb.0:
2176; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2177; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2178; X86-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd5,0xc1]
2179; X86-NEXT:    retl # encoding: [0xc3]
2180;
2181; X64-LABEL: test_mask_mullo_epi16_rrkz_128:
2182; X64:       # %bb.0:
2183; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2184; X64-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd5,0xc1]
2185; X64-NEXT:    retq # encoding: [0xc3]
2186  %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
2187  ret <8 x i16> %res
2188}
2189
2190define <8 x i16> @test_mask_mullo_epi16_rm_128(<8 x i16> %a, ptr %ptr_b) {
2191; X86-LABEL: test_mask_mullo_epi16_rm_128:
2192; X86:       # %bb.0:
2193; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2194; X86-NEXT:    vpmullw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd5,0x00]
2195; X86-NEXT:    retl # encoding: [0xc3]
2196;
2197; X64-LABEL: test_mask_mullo_epi16_rm_128:
2198; X64:       # %bb.0:
2199; X64-NEXT:    vpmullw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd5,0x07]
2200; X64-NEXT:    retq # encoding: [0xc3]
2201  %b = load <8 x i16>, ptr %ptr_b
2202  %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
2203  ret <8 x i16> %res
2204}
2205
2206define <8 x i16> @test_mask_mullo_epi16_rmk_128(<8 x i16> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) {
2207; X86-LABEL: test_mask_mullo_epi16_rmk_128:
2208; X86:       # %bb.0:
2209; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2210; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2211; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
2212; X86-NEXT:    vpmullw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd5,0x08]
2213; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2214; X86-NEXT:    retl # encoding: [0xc3]
2215;
2216; X64-LABEL: test_mask_mullo_epi16_rmk_128:
2217; X64:       # %bb.0:
2218; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
2219; X64-NEXT:    vpmullw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd5,0x0f]
2220; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2221; X64-NEXT:    retq # encoding: [0xc3]
2222  %b = load <8 x i16>, ptr %ptr_b
2223  %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
2224  ret <8 x i16> %res
2225}
2226
2227define <8 x i16> @test_mask_mullo_epi16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i8 %mask) {
2228; X86-LABEL: test_mask_mullo_epi16_rmkz_128:
2229; X86:       # %bb.0:
2230; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2231; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2232; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
2233; X86-NEXT:    vpmullw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd5,0x00]
2234; X86-NEXT:    retl # encoding: [0xc3]
2235;
2236; X64-LABEL: test_mask_mullo_epi16_rmkz_128:
2237; X64:       # %bb.0:
2238; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
2239; X64-NEXT:    vpmullw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd5,0x07]
2240; X64-NEXT:    retq # encoding: [0xc3]
2241  %b = load <8 x i16>, ptr %ptr_b
2242  %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
2243  ret <8 x i16> %res
2244}
2245
2246declare <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2247
2248define <16 x i16> @test_mask_mullo_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
2249; CHECK-LABEL: test_mask_mullo_epi16_rr_256:
2250; CHECK:       # %bb.0:
2251; CHECK-NEXT:    vpmullw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd5,0xc1]
2252; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2253  %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
2254  ret <16 x i16> %res
2255}
2256
2257define <16 x i16> @test_mask_mullo_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
2258; X86-LABEL: test_mask_mullo_epi16_rrk_256:
2259; X86:       # %bb.0:
2260; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2261; X86-NEXT:    vpmullw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd5,0xd1]
2262; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2263; X86-NEXT:    retl # encoding: [0xc3]
2264;
2265; X64-LABEL: test_mask_mullo_epi16_rrk_256:
2266; X64:       # %bb.0:
2267; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2268; X64-NEXT:    vpmullw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd5,0xd1]
2269; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2270; X64-NEXT:    retq # encoding: [0xc3]
2271  %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
2272  ret <16 x i16> %res
2273}
2274
2275define <16 x i16> @test_mask_mullo_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
2276; X86-LABEL: test_mask_mullo_epi16_rrkz_256:
2277; X86:       # %bb.0:
2278; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2279; X86-NEXT:    vpmullw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0xc1]
2280; X86-NEXT:    retl # encoding: [0xc3]
2281;
2282; X64-LABEL: test_mask_mullo_epi16_rrkz_256:
2283; X64:       # %bb.0:
2284; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2285; X64-NEXT:    vpmullw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0xc1]
2286; X64-NEXT:    retq # encoding: [0xc3]
2287  %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
2288  ret <16 x i16> %res
2289}
2290
2291define <16 x i16> @test_mask_mullo_epi16_rm_256(<16 x i16> %a, ptr %ptr_b) {
2292; X86-LABEL: test_mask_mullo_epi16_rm_256:
2293; X86:       # %bb.0:
2294; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2295; X86-NEXT:    vpmullw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd5,0x00]
2296; X86-NEXT:    retl # encoding: [0xc3]
2297;
2298; X64-LABEL: test_mask_mullo_epi16_rm_256:
2299; X64:       # %bb.0:
2300; X64-NEXT:    vpmullw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd5,0x07]
2301; X64-NEXT:    retq # encoding: [0xc3]
2302  %b = load <16 x i16>, ptr %ptr_b
2303  %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
2304  ret <16 x i16> %res
2305}
2306
2307define <16 x i16> @test_mask_mullo_epi16_rmk_256(<16 x i16> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) {
2308; X86-LABEL: test_mask_mullo_epi16_rmk_256:
2309; X86:       # %bb.0:
2310; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2311; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2312; X86-NEXT:    vpmullw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd5,0x08]
2313; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
2314; X86-NEXT:    retl # encoding: [0xc3]
2315;
2316; X64-LABEL: test_mask_mullo_epi16_rmk_256:
2317; X64:       # %bb.0:
2318; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
2319; X64-NEXT:    vpmullw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd5,0x0f]
2320; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
2321; X64-NEXT:    retq # encoding: [0xc3]
2322  %b = load <16 x i16>, ptr %ptr_b
2323  %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
2324  ret <16 x i16> %res
2325}
2326
2327define <16 x i16> @test_mask_mullo_epi16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i16 %mask) {
2328; X86-LABEL: test_mask_mullo_epi16_rmkz_256:
2329; X86:       # %bb.0:
2330; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2331; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2332; X86-NEXT:    vpmullw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0x00]
2333; X86-NEXT:    retl # encoding: [0xc3]
2334;
2335; X64-LABEL: test_mask_mullo_epi16_rmkz_256:
2336; X64:       # %bb.0:
2337; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
2338; X64-NEXT:    vpmullw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0x07]
2339; X64-NEXT:    retq # encoding: [0xc3]
2340  %b = load <16 x i16>, ptr %ptr_b
2341  %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
2342  ret <16 x i16> %res
2343}
2344
2345declare <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2346
2347declare <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
2348
2349define <16 x i8>@test_int_x86_avx512_mask_pmaxs_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) {
2350; X86-LABEL: test_int_x86_avx512_mask_pmaxs_b_128:
2351; X86:       # %bb.0:
2352; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2353; X86-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3c,0xd1]
2354; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2355; X86-NEXT:    retl # encoding: [0xc3]
2356;
2357; X64-LABEL: test_int_x86_avx512_mask_pmaxs_b_128:
2358; X64:       # %bb.0:
2359; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2360; X64-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3c,0xd1]
2361; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2362; X64-NEXT:    retq # encoding: [0xc3]
2363  %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2 ,i16 %mask)
2364  ret <16 x i8> %res
2365}
2366
2367define <16 x i8>@test_int_x86_avx512_maskz_pmaxs_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) {
2368; X86-LABEL: test_int_x86_avx512_maskz_pmaxs_b_128:
2369; X86:       # %bb.0:
2370; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2371; X86-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3c,0xc1]
2372; X86-NEXT:    retl # encoding: [0xc3]
2373;
2374; X64-LABEL: test_int_x86_avx512_maskz_pmaxs_b_128:
2375; X64:       # %bb.0:
2376; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2377; X64-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3c,0xc1]
2378; X64-NEXT:    retq # encoding: [0xc3]
2379  %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask)
2380  ret <16 x i8> %res
2381}
2382
2383declare <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
2384
2385define <32 x i8>@test_int_x86_avx512_pmaxs_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) {
2386; CHECK-LABEL: test_int_x86_avx512_pmaxs_b_256:
2387; CHECK:       # %bb.0:
2388; CHECK-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3c,0xc1]
2389; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2390  %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
2391  ret <32 x i8> %res
2392}
2393
2394define <32 x i8>@test_int_x86_avx512_mask_pmaxs_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
2395; X86-LABEL: test_int_x86_avx512_mask_pmaxs_b_256:
2396; X86:       # %bb.0:
2397; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2398; X86-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3c,0xd1]
2399; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2400; X86-NEXT:    retl # encoding: [0xc3]
2401;
2402; X64-LABEL: test_int_x86_avx512_mask_pmaxs_b_256:
2403; X64:       # %bb.0:
2404; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2405; X64-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3c,0xd1]
2406; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2407; X64-NEXT:    retq # encoding: [0xc3]
2408  %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
2409  ret <32 x i8> %res
2410}
2411
2412declare <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2413
2414define <8 x i16>@test_int_x86_avx512_pmaxs_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
2415; CHECK-LABEL: test_int_x86_avx512_pmaxs_w_128:
2416; CHECK:       # %bb.0:
2417; CHECK-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xee,0xc1]
2418; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2419  %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
2420  ret <8 x i16> %res
2421}
2422
2423define <8 x i16>@test_int_x86_avx512_mask_pmaxs_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2424; X86-LABEL: test_int_x86_avx512_mask_pmaxs_w_128:
2425; X86:       # %bb.0:
2426; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2427; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2428; X86-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xee,0xd1]
2429; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2430; X86-NEXT:    retl # encoding: [0xc3]
2431;
2432; X64-LABEL: test_int_x86_avx512_mask_pmaxs_w_128:
2433; X64:       # %bb.0:
2434; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2435; X64-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xee,0xd1]
2436; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2437; X64-NEXT:    retq # encoding: [0xc3]
2438  %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
2439  ret <8 x i16> %res
2440}
2441
2442declare <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2443
2444define <16 x i16>@test_int_x86_avx512_mask_pmaxs_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) {
2445; X86-LABEL: test_int_x86_avx512_mask_pmaxs_w_256:
2446; X86:       # %bb.0:
2447; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2448; X86-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xee,0xd1]
2449; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2450; X86-NEXT:    retl # encoding: [0xc3]
2451;
2452; X64-LABEL: test_int_x86_avx512_mask_pmaxs_w_256:
2453; X64:       # %bb.0:
2454; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2455; X64-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xee,0xd1]
2456; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2457; X64-NEXT:    retq # encoding: [0xc3]
2458  %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask)
2459  ret <16 x i16> %res
2460}
2461
2462define <16 x i16>@test_int_x86_avx512_maskz_pmaxs_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %mask) {
2463; X86-LABEL: test_int_x86_avx512_maskz_pmaxs_w_256:
2464; X86:       # %bb.0:
2465; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2466; X86-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xee,0xc1]
2467; X86-NEXT:    retl # encoding: [0xc3]
2468;
2469; X64-LABEL: test_int_x86_avx512_maskz_pmaxs_w_256:
2470; X64:       # %bb.0:
2471; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2472; X64-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xee,0xc1]
2473; X64-NEXT:    retq # encoding: [0xc3]
2474  %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask)
2475  ret <16 x i16> %res
2476}
2477
2478declare <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
2479
2480define <16 x i8>@test_int_x86_avx512_mask_pmaxu_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2,i16 %mask) {
2481; X86-LABEL: test_int_x86_avx512_mask_pmaxu_b_128:
2482; X86:       # %bb.0:
2483; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2484; X86-NEXT:    vpmaxub %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xde,0xd1]
2485; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2486; X86-NEXT:    retl # encoding: [0xc3]
2487;
2488; X64-LABEL: test_int_x86_avx512_mask_pmaxu_b_128:
2489; X64:       # %bb.0:
2490; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2491; X64-NEXT:    vpmaxub %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xde,0xd1]
2492; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2493; X64-NEXT:    retq # encoding: [0xc3]
2494  %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask)
2495  ret <16 x i8> %res
2496}
2497
2498define <16 x i8>@test_int_x86_avx512_maskz_pmaxu_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) {
2499; X86-LABEL: test_int_x86_avx512_maskz_pmaxu_b_128:
2500; X86:       # %bb.0:
2501; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2502; X86-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xde,0xc1]
2503; X86-NEXT:    retl # encoding: [0xc3]
2504;
2505; X64-LABEL: test_int_x86_avx512_maskz_pmaxu_b_128:
2506; X64:       # %bb.0:
2507; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2508; X64-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xde,0xc1]
2509; X64-NEXT:    retq # encoding: [0xc3]
2510  %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask)
2511  ret <16 x i8> %res
2512}
2513
2514declare <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
2515
2516define <32 x i8>@test_int_x86_avx512_pmaxu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) {
2517; CHECK-LABEL: test_int_x86_avx512_pmaxu_b_256:
2518; CHECK:       # %bb.0:
2519; CHECK-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xde,0xc1]
2520; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2521  %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
2522  ret <32 x i8> %res
2523}
2524
2525define <32 x i8>@test_int_x86_avx512_mask_pmaxu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
2526; X86-LABEL: test_int_x86_avx512_mask_pmaxu_b_256:
2527; X86:       # %bb.0:
2528; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2529; X86-NEXT:    vpmaxub %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xde,0xd1]
2530; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2531; X86-NEXT:    retl # encoding: [0xc3]
2532;
2533; X64-LABEL: test_int_x86_avx512_mask_pmaxu_b_256:
2534; X64:       # %bb.0:
2535; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2536; X64-NEXT:    vpmaxub %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xde,0xd1]
2537; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2538; X64-NEXT:    retq # encoding: [0xc3]
2539  %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
2540  ret <32 x i8> %res
2541}
2542
2543declare <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2544
2545define <8 x i16>@test_int_x86_avx512_pmaxu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
2546; CHECK-LABEL: test_int_x86_avx512_pmaxu_w_128:
2547; CHECK:       # %bb.0:
2548; CHECK-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3e,0xc1]
2549; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2550  %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
2551  ret <8 x i16> %res
2552}
2553
2554define <8 x i16>@test_int_x86_avx512_mask_pmaxu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2555; X86-LABEL: test_int_x86_avx512_mask_pmaxu_w_128:
2556; X86:       # %bb.0:
2557; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2558; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2559; X86-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3e,0xd1]
2560; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2561; X86-NEXT:    retl # encoding: [0xc3]
2562;
2563; X64-LABEL: test_int_x86_avx512_mask_pmaxu_w_128:
2564; X64:       # %bb.0:
2565; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2566; X64-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3e,0xd1]
2567; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2568; X64-NEXT:    retq # encoding: [0xc3]
2569  %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
2570  ret <8 x i16> %res
2571}
2572
2573declare <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2574
2575define <16 x i16>@test_int_x86_avx512_mask_pmaxu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) {
2576; X86-LABEL: test_int_x86_avx512_mask_pmaxu_w_256:
2577; X86:       # %bb.0:
2578; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2579; X86-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3e,0xd1]
2580; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2581; X86-NEXT:    retl # encoding: [0xc3]
2582;
2583; X64-LABEL: test_int_x86_avx512_mask_pmaxu_w_256:
2584; X64:       # %bb.0:
2585; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2586; X64-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3e,0xd1]
2587; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2588; X64-NEXT:    retq # encoding: [0xc3]
2589  %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask)
2590  ret <16 x i16> %res
2591}
2592
2593define <16 x i16>@test_int_x86_avx512_maskz_pmaxu_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %mask) {
2594; X86-LABEL: test_int_x86_avx512_maskz_pmaxu_w_256:
2595; X86:       # %bb.0:
2596; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2597; X86-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x3e,0xc1]
2598; X86-NEXT:    retl # encoding: [0xc3]
2599;
2600; X64-LABEL: test_int_x86_avx512_maskz_pmaxu_w_256:
2601; X64:       # %bb.0:
2602; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2603; X64-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x3e,0xc1]
2604; X64-NEXT:    retq # encoding: [0xc3]
2605  %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask)
2606  ret <16 x i16> %res
2607}
2608
2609declare <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
2610
2611define <16 x i8>@test_int_x86_avx512_mask_pmins_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) {
2612; X86-LABEL: test_int_x86_avx512_mask_pmins_b_128:
2613; X86:       # %bb.0:
2614; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2615; X86-NEXT:    vpminsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x38,0xd1]
2616; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2617; X86-NEXT:    retl # encoding: [0xc3]
2618;
2619; X64-LABEL: test_int_x86_avx512_mask_pmins_b_128:
2620; X64:       # %bb.0:
2621; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2622; X64-NEXT:    vpminsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x38,0xd1]
2623; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2624; X64-NEXT:    retq # encoding: [0xc3]
2625  %res = call <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask)
2626  ret <16 x i8> %res
2627}
2628
2629define <16 x i8>@test_int_x86_avx512_maskz_pmins_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) {
2630; X86-LABEL: test_int_x86_avx512_maskz_pmins_b_128:
2631; X86:       # %bb.0:
2632; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2633; X86-NEXT:    vpminsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x38,0xc1]
2634; X86-NEXT:    retl # encoding: [0xc3]
2635;
2636; X64-LABEL: test_int_x86_avx512_maskz_pmins_b_128:
2637; X64:       # %bb.0:
2638; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2639; X64-NEXT:    vpminsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x38,0xc1]
2640; X64-NEXT:    retq # encoding: [0xc3]
2641  %res = call <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask)
2642  ret <16 x i8> %res
2643}
2644
2645declare <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
2646
2647define <32 x i8>@test_int_x86_avx512_pmins_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) {
2648; CHECK-LABEL: test_int_x86_avx512_pmins_b_256:
2649; CHECK:       # %bb.0:
2650; CHECK-NEXT:    vpminsb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x38,0xc1]
2651; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2652  %res = call <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
2653  ret <32 x i8> %res
2654}
2655
2656define <32 x i8>@test_int_x86_avx512_mask_pmins_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
2657; X86-LABEL: test_int_x86_avx512_mask_pmins_b_256:
2658; X86:       # %bb.0:
2659; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2660; X86-NEXT:    vpminsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x38,0xd1]
2661; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2662; X86-NEXT:    retl # encoding: [0xc3]
2663;
2664; X64-LABEL: test_int_x86_avx512_mask_pmins_b_256:
2665; X64:       # %bb.0:
2666; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2667; X64-NEXT:    vpminsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x38,0xd1]
2668; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2669; X64-NEXT:    retq # encoding: [0xc3]
2670  %res = call <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
2671  ret <32 x i8> %res
2672}
2673
2674declare <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2675
2676define <8 x i16>@test_int_x86_avx512_pmins_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
2677; CHECK-LABEL: test_int_x86_avx512_pmins_w_128:
2678; CHECK:       # %bb.0:
2679; CHECK-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xea,0xc1]
2680; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2681  %res = call <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
2682  ret <8 x i16> %res
2683}
2684
2685define <8 x i16>@test_int_x86_avx512_mask_pmins_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2686; X86-LABEL: test_int_x86_avx512_mask_pmins_w_128:
2687; X86:       # %bb.0:
2688; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2689; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2690; X86-NEXT:    vpminsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xea,0xd1]
2691; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2692; X86-NEXT:    retl # encoding: [0xc3]
2693;
2694; X64-LABEL: test_int_x86_avx512_mask_pmins_w_128:
2695; X64:       # %bb.0:
2696; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2697; X64-NEXT:    vpminsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xea,0xd1]
2698; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2699; X64-NEXT:    retq # encoding: [0xc3]
2700  %res = call <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
2701  ret <8 x i16> %res
2702}
2703
2704declare <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2705
2706define <16 x i16>@test_int_x86_avx512_mask_pmins_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) {
2707; X86-LABEL: test_int_x86_avx512_mask_pmins_w_256:
2708; X86:       # %bb.0:
2709; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2710; X86-NEXT:    vpminsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xea,0xd1]
2711; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2712; X86-NEXT:    retl # encoding: [0xc3]
2713;
2714; X64-LABEL: test_int_x86_avx512_mask_pmins_w_256:
2715; X64:       # %bb.0:
2716; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2717; X64-NEXT:    vpminsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xea,0xd1]
2718; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2719; X64-NEXT:    retq # encoding: [0xc3]
2720  %res = call <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask)
2721  ret <16 x i16> %res
2722}
2723
2724define <16 x i16>@test_int_x86_avx512_maskz_pmins_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %mask) {
2725; X86-LABEL: test_int_x86_avx512_maskz_pmins_w_256:
2726; X86:       # %bb.0:
2727; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2728; X86-NEXT:    vpminsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xea,0xc1]
2729; X86-NEXT:    retl # encoding: [0xc3]
2730;
2731; X64-LABEL: test_int_x86_avx512_maskz_pmins_w_256:
2732; X64:       # %bb.0:
2733; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2734; X64-NEXT:    vpminsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xea,0xc1]
2735; X64-NEXT:    retq # encoding: [0xc3]
2736  %res = call <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask)
2737  ret <16 x i16> %res
2738}
2739
2740declare <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
2741
2742define <16 x i8>@test_int_x86_avx512_mask_pminu_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) {
2743; X86-LABEL: test_int_x86_avx512_mask_pminu_b_128:
2744; X86:       # %bb.0:
2745; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2746; X86-NEXT:    vpminub %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xda,0xd1]
2747; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2748; X86-NEXT:    retl # encoding: [0xc3]
2749;
2750; X64-LABEL: test_int_x86_avx512_mask_pminu_b_128:
2751; X64:       # %bb.0:
2752; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2753; X64-NEXT:    vpminub %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xda,0xd1]
2754; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2755; X64-NEXT:    retq # encoding: [0xc3]
2756  %res = call <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask)
2757  ret <16 x i8> %res
2758}
2759
2760define <16 x i8>@test_int_x86_avx512_maskz_pminu_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) {
2761; X86-LABEL: test_int_x86_avx512_maskz_pminu_b_128:
2762; X86:       # %bb.0:
2763; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2764; X86-NEXT:    vpminub %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xda,0xc1]
2765; X86-NEXT:    retl # encoding: [0xc3]
2766;
2767; X64-LABEL: test_int_x86_avx512_maskz_pminu_b_128:
2768; X64:       # %bb.0:
2769; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2770; X64-NEXT:    vpminub %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xda,0xc1]
2771; X64-NEXT:    retq # encoding: [0xc3]
2772  %res = call <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask)
2773  ret <16 x i8> %res
2774}
2775
2776declare <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
2777
2778define <32 x i8>@test_int_x86_avx512_pminu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) {
2779; CHECK-LABEL: test_int_x86_avx512_pminu_b_256:
2780; CHECK:       # %bb.0:
2781; CHECK-NEXT:    vpminub %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xda,0xc1]
2782; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2783  %res = call <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
2784  ret <32 x i8> %res
2785}
2786
2787define <32 x i8>@test_int_x86_avx512_mask_pminu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
2788; X86-LABEL: test_int_x86_avx512_mask_pminu_b_256:
2789; X86:       # %bb.0:
2790; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2791; X86-NEXT:    vpminub %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xda,0xd1]
2792; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2793; X86-NEXT:    retl # encoding: [0xc3]
2794;
2795; X64-LABEL: test_int_x86_avx512_mask_pminu_b_256:
2796; X64:       # %bb.0:
2797; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2798; X64-NEXT:    vpminub %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xda,0xd1]
2799; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2800; X64-NEXT:    retq # encoding: [0xc3]
2801  %res = call <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
2802  ret <32 x i8> %res
2803}
2804
2805declare <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2806
2807define <8 x i16>@test_int_x86_avx512_pminu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
2808; CHECK-LABEL: test_int_x86_avx512_pminu_w_128:
2809; CHECK:       # %bb.0:
2810; CHECK-NEXT:    vpminuw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3a,0xc1]
2811; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2812  %res = call <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
2813  ret <8 x i16> %res
2814}
2815
2816define <8 x i16>@test_int_x86_avx512_mask_pminu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2817; X86-LABEL: test_int_x86_avx512_mask_pminu_w_128:
2818; X86:       # %bb.0:
2819; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2820; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2821; X86-NEXT:    vpminuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3a,0xd1]
2822; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2823; X86-NEXT:    retl # encoding: [0xc3]
2824;
2825; X64-LABEL: test_int_x86_avx512_mask_pminu_w_128:
2826; X64:       # %bb.0:
2827; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2828; X64-NEXT:    vpminuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3a,0xd1]
2829; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2830; X64-NEXT:    retq # encoding: [0xc3]
2831  %res = call <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
2832  ret <8 x i16> %res
2833}
2834
2835declare <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2836
2837define <16 x i16>@test_int_x86_avx512_mask_pminu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) {
2838; X86-LABEL: test_int_x86_avx512_mask_pminu_w_256:
2839; X86:       # %bb.0:
2840; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2841; X86-NEXT:    vpminuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3a,0xd1]
2842; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2843; X86-NEXT:    retl # encoding: [0xc3]
2844;
2845; X64-LABEL: test_int_x86_avx512_mask_pminu_w_256:
2846; X64:       # %bb.0:
2847; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2848; X64-NEXT:    vpminuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3a,0xd1]
2849; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2850; X64-NEXT:    retq # encoding: [0xc3]
2851  %res = call <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask)
2852  ret <16 x i16> %res
2853}
2854
2855define <16 x i16>@test_int_x86_avx512_maskz_pminu_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %mask) {
2856; X86-LABEL: test_int_x86_avx512_maskz_pminu_w_256:
2857; X86:       # %bb.0:
2858; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2859; X86-NEXT:    vpminuw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x3a,0xc1]
2860; X86-NEXT:    retl # encoding: [0xc3]
2861;
2862; X64-LABEL: test_int_x86_avx512_maskz_pminu_w_256:
2863; X64:       # %bb.0:
2864; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2865; X64-NEXT:    vpminuw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x3a,0xc1]
2866; X64-NEXT:    retq # encoding: [0xc3]
2867  %res = call <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask)
2868  ret <16 x i16> %res
2869}
2870
2871declare <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2872
2873define <8 x i16>@test_int_x86_avx512_psrl_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
2874; CHECK-LABEL: test_int_x86_avx512_psrl_w_128:
2875; CHECK:       # %bb.0:
2876; CHECK-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0xc1]
2877; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2878  %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
2879  ret <8 x i16> %res
2880}
2881
2882define <8 x i16>@test_int_x86_avx512_mask_psrl_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2883; X86-LABEL: test_int_x86_avx512_mask_psrl_w_128:
2884; X86:       # %bb.0:
2885; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2886; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2887; X86-NEXT:    vpsrlw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd1,0xd1]
2888; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2889; X86-NEXT:    retl # encoding: [0xc3]
2890;
2891; X64-LABEL: test_int_x86_avx512_mask_psrl_w_128:
2892; X64:       # %bb.0:
2893; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2894; X64-NEXT:    vpsrlw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd1,0xd1]
2895; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2896; X64-NEXT:    retq # encoding: [0xc3]
2897  %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
2898  ret <8 x i16> %res
2899}
2900
2901
2902define <8 x i16>@test_int_x86_avx512_maskz_psrl_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) {
2903; X86-LABEL: test_int_x86_avx512_maskz_psrl_w_128:
2904; X86:       # %bb.0:
2905; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2906; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2907; X86-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd1,0xc1]
2908; X86-NEXT:    retl # encoding: [0xc3]
2909;
2910; X64-LABEL: test_int_x86_avx512_maskz_psrl_w_128:
2911; X64:       # %bb.0:
2912; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2913; X64-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd1,0xc1]
2914; X64-NEXT:    retq # encoding: [0xc3]
2915  %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
2916  ret <8 x i16> %res
2917}
2918
2919declare <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16>, <8 x i16>, <16 x i16>, i16)
2920
2921define <16 x i16>@test_int_x86_avx512_psrl_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2) {
2922; CHECK-LABEL: test_int_x86_avx512_psrl_w_256:
2923; CHECK:       # %bb.0:
2924; CHECK-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd1,0xc1]
2925; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2926  %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 -1)
2927  ret <16 x i16> %res
2928}
2929
2930define <16 x i16>@test_int_x86_avx512_mask_psrl_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) {
2931; X86-LABEL: test_int_x86_avx512_mask_psrl_w_256:
2932; X86:       # %bb.0:
2933; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2934; X86-NEXT:    vpsrlw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd1,0xd1]
2935; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2936; X86-NEXT:    retl # encoding: [0xc3]
2937;
2938; X64-LABEL: test_int_x86_avx512_mask_psrl_w_256:
2939; X64:       # %bb.0:
2940; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2941; X64-NEXT:    vpsrlw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd1,0xd1]
2942; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2943; X64-NEXT:    retq # encoding: [0xc3]
2944  %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3)
2945  ret <16 x i16> %res
2946}
2947
2948define <16 x i16>@test_int_x86_avx512_maskz_psrl_w_256(<16 x i16> %x0, <8 x i16> %x1, i16 %x3) {
2949; X86-LABEL: test_int_x86_avx512_maskz_psrl_w_256:
2950; X86:       # %bb.0:
2951; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2952; X86-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd1,0xc1]
2953; X86-NEXT:    retl # encoding: [0xc3]
2954;
2955; X64-LABEL: test_int_x86_avx512_maskz_psrl_w_256:
2956; X64:       # %bb.0:
2957; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2958; X64-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd1,0xc1]
2959; X64-NEXT:    retq # encoding: [0xc3]
2960  %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
2961  ret <16 x i16> %res
2962}
2963
2964declare <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2965
2966define <8 x i16>@test_int_x86_avx512_psra_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
2967; CHECK-LABEL: test_int_x86_avx512_psra_w_128:
2968; CHECK:       # %bb.0:
2969; CHECK-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe1,0xc1]
2970; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2971  %res = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
2972  ret <8 x i16> %res
2973}
2974
2975define <8 x i16>@test_int_x86_avx512_mask_psra_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2976; X86-LABEL: test_int_x86_avx512_mask_psra_w_128:
2977; X86:       # %bb.0:
2978; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2979; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2980; X86-NEXT:    vpsraw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe1,0xd1]
2981; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2982; X86-NEXT:    retl # encoding: [0xc3]
2983;
2984; X64-LABEL: test_int_x86_avx512_mask_psra_w_128:
2985; X64:       # %bb.0:
2986; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2987; X64-NEXT:    vpsraw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe1,0xd1]
2988; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2989; X64-NEXT:    retq # encoding: [0xc3]
2990  %res = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
2991  ret <8 x i16> %res
2992}
2993
2994define <8 x i16>@test_int_x86_avx512_maskz_psra_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) {
2995; X86-LABEL: test_int_x86_avx512_maskz_psra_w_128:
2996; X86:       # %bb.0:
2997; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2998; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2999; X86-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe1,0xc1]
3000; X86-NEXT:    retl # encoding: [0xc3]
3001;
3002; X64-LABEL: test_int_x86_avx512_maskz_psra_w_128:
3003; X64:       # %bb.0:
3004; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3005; X64-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe1,0xc1]
3006; X64-NEXT:    retq # encoding: [0xc3]
3007  %res = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
3008  ret <8 x i16> %res
3009}
3010
3011declare <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16>, <8 x i16>, <16 x i16>, i16)
3012
3013define <16 x i16>@test_int_x86_avx512_psra_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2) {
3014; CHECK-LABEL: test_int_x86_avx512_psra_w_256:
3015; CHECK:       # %bb.0:
3016; CHECK-NEXT:    vpsraw %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe1,0xc1]
3017; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3018  %res = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 -1)
3019  ret <16 x i16> %res
3020}
3021
3022define <16 x i16>@test_int_x86_avx512_mask_psra_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) {
3023; X86-LABEL: test_int_x86_avx512_mask_psra_w_256:
3024; X86:       # %bb.0:
3025; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3026; X86-NEXT:    vpsraw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe1,0xd1]
3027; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3028; X86-NEXT:    retl # encoding: [0xc3]
3029;
3030; X64-LABEL: test_int_x86_avx512_mask_psra_w_256:
3031; X64:       # %bb.0:
3032; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3033; X64-NEXT:    vpsraw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe1,0xd1]
3034; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3035; X64-NEXT:    retq # encoding: [0xc3]
3036  %res = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3)
3037  ret <16 x i16> %res
3038}
3039
3040define <16 x i16>@test_int_x86_avx512_maskz_psra_w_256(<16 x i16> %x0, <8 x i16> %x1, i16 %x3) {
3041; X86-LABEL: test_int_x86_avx512_maskz_psra_w_256:
3042; X86:       # %bb.0:
3043; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3044; X86-NEXT:    vpsraw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe1,0xc1]
3045; X86-NEXT:    retl # encoding: [0xc3]
3046;
3047; X64-LABEL: test_int_x86_avx512_maskz_psra_w_256:
3048; X64:       # %bb.0:
3049; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3050; X64-NEXT:    vpsraw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe1,0xc1]
3051; X64-NEXT:    retq # encoding: [0xc3]
3052  %res = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
3053  ret <16 x i16> %res
3054}
3055
3056declare <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
3057
3058define <8 x i16>@test_int_x86_avx512_psll_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
3059; CHECK-LABEL: test_int_x86_avx512_psll_w_128:
3060; CHECK:       # %bb.0:
3061; CHECK-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf1,0xc1]
3062; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3063  %res = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
3064  ret <8 x i16> %res
3065}
3066
3067define <8 x i16>@test_int_x86_avx512_mask_psll_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
3068; X86-LABEL: test_int_x86_avx512_mask_psll_w_128:
3069; X86:       # %bb.0:
3070; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3071; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3072; X86-NEXT:    vpsllw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf1,0xd1]
3073; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3074; X86-NEXT:    retl # encoding: [0xc3]
3075;
3076; X64-LABEL: test_int_x86_avx512_mask_psll_w_128:
3077; X64:       # %bb.0:
3078; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3079; X64-NEXT:    vpsllw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf1,0xd1]
3080; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3081; X64-NEXT:    retq # encoding: [0xc3]
3082  %res = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
3083  ret <8 x i16> %res
3084}
3085
3086define <8 x i16>@test_int_x86_avx512_maskz_psll_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) {
3087; X86-LABEL: test_int_x86_avx512_maskz_psll_w_128:
3088; X86:       # %bb.0:
3089; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3090; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3091; X86-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf1,0xc1]
3092; X86-NEXT:    retl # encoding: [0xc3]
3093;
3094; X64-LABEL: test_int_x86_avx512_maskz_psll_w_128:
3095; X64:       # %bb.0:
3096; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3097; X64-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf1,0xc1]
3098; X64-NEXT:    retq # encoding: [0xc3]
3099  %res = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
3100  ret <8 x i16> %res
3101}
3102
3103declare <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16>, <8 x i16>, <16 x i16>, i16)
3104
3105define <16 x i16>@test_int_x86_avx512_psll_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2) {
3106; CHECK-LABEL: test_int_x86_avx512_psll_w_256:
3107; CHECK:       # %bb.0:
3108; CHECK-NEXT:    vpsllw %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf1,0xc1]
3109; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3110  %res = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 -1)
3111  ret <16 x i16> %res
3112}
3113
3114define <16 x i16>@test_int_x86_avx512_mask_psll_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) {
3115; X86-LABEL: test_int_x86_avx512_mask_psll_w_256:
3116; X86:       # %bb.0:
3117; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3118; X86-NEXT:    vpsllw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf1,0xd1]
3119; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3120; X86-NEXT:    retl # encoding: [0xc3]
3121;
3122; X64-LABEL: test_int_x86_avx512_mask_psll_w_256:
3123; X64:       # %bb.0:
3124; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3125; X64-NEXT:    vpsllw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf1,0xd1]
3126; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3127; X64-NEXT:    retq # encoding: [0xc3]
3128  %res = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3)
3129  ret <16 x i16> %res
3130}
3131
3132define <16 x i16>@test_int_x86_avx512_maskz_psll_w_256(<16 x i16> %x0, <8 x i16> %x1, i16 %x3) {
3133; X86-LABEL: test_int_x86_avx512_maskz_psll_w_256:
3134; X86:       # %bb.0:
3135; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3136; X86-NEXT:    vpsllw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf1,0xc1]
3137; X86-NEXT:    retl # encoding: [0xc3]
3138;
3139; X64-LABEL: test_int_x86_avx512_maskz_psll_w_256:
3140; X64:       # %bb.0:
3141; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3142; X64-NEXT:    vpsllw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf1,0xc1]
3143; X64-NEXT:    retq # encoding: [0xc3]
3144  %res = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
3145  ret <16 x i16> %res
3146}
3147
3148declare <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16>, i32, <8 x i16>, i8)
3149
3150define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_psrl_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) {
3151; X86-LABEL: test_int_x86_avx512_mask_psrl_wi_128:
3152; X86:       # %bb.0:
3153; X86-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
3154; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3155; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3156; X86-NEXT:    vpsrlw $3, %xmm0, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0x71,0xd0,0x03]
3157; X86-NEXT:    vpsrlw $4, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x71,0xd0,0x04]
3158; X86-NEXT:    vpsrlw $5, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x71,0xd0,0x05]
3159; X86-NEXT:    vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
3160; X86-NEXT:    retl # encoding: [0xc3]
3161;
3162; X64-LABEL: test_int_x86_avx512_mask_psrl_wi_128:
3163; X64:       # %bb.0:
3164; X64-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
3165; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3166; X64-NEXT:    vpsrlw $3, %xmm0, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0x71,0xd0,0x03]
3167; X64-NEXT:    vpsrlw $4, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x71,0xd0,0x04]
3168; X64-NEXT:    vpsrlw $5, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x71,0xd0,0x05]
3169; X64-NEXT:    vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
3170; X64-NEXT:    retq # encoding: [0xc3]
3171  %res0 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3)
3172  %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 4, <8 x i16> %x2, i8 -1)
3173  %res2 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 5, <8 x i16> zeroinitializer, i8 %x3)
3174  %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0
3175  %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> }  %res3, <8 x i16> %res1, 1
3176  %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> }  %res4, <8 x i16> %res2, 2
3177  ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5
3178}
3179
3180declare <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16>, i32, <16 x i16>, i16)
3181
3182define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_psrl_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) {
3183; X86-LABEL: test_int_x86_avx512_mask_psrl_wi_256:
3184; X86:       # %bb.0:
3185; X86-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
3186; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
3187; X86-NEXT:    vpsrlw $3, %ymm0, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0x71,0xd0,0x03]
3188; X86-NEXT:    vpsrlw $4, %ymm0, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x71,0xd0,0x04]
3189; X86-NEXT:    vpsrlw $5, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xd0,0x05]
3190; X86-NEXT:    vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
3191; X86-NEXT:    retl # encoding: [0xc3]
3192;
3193; X64-LABEL: test_int_x86_avx512_mask_psrl_wi_256:
3194; X64:       # %bb.0:
3195; X64-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
3196; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3197; X64-NEXT:    vpsrlw $3, %ymm0, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0x71,0xd0,0x03]
3198; X64-NEXT:    vpsrlw $4, %ymm0, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x71,0xd0,0x04]
3199; X64-NEXT:    vpsrlw $5, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xd0,0x05]
3200; X64-NEXT:    vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
3201; X64-NEXT:    retq # encoding: [0xc3]
3202  %res0 = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3)
3203  %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 4, <16 x i16> %x2, i16 -1)
3204  %res2 = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 5, <16 x i16> zeroinitializer, i16 %x3)
3205  %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res0, 0
3206  %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> }  %res3, <16 x i16> %res1, 1
3207  %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> }  %res4, <16 x i16> %res2, 2
3208  ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5
3209}
3210
3211declare <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16>, i32, <8 x i16>, i8)
3212
3213define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_psra_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) {
3214; X86-LABEL: test_int_x86_avx512_mask_psra_wi_128:
3215; X86:       # %bb.0:
3216; X86-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
3217; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3218; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3219; X86-NEXT:    vpsraw $3, %xmm0, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0x71,0xe0,0x03]
3220; X86-NEXT:    vpsraw $4, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0x89,0x71,0xe0,0x04]
3221; X86-NEXT:    vpsraw $5, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xe0,0x05]
3222; X86-NEXT:    vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
3223; X86-NEXT:    retl # encoding: [0xc3]
3224;
3225; X64-LABEL: test_int_x86_avx512_mask_psra_wi_128:
3226; X64:       # %bb.0:
3227; X64-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
3228; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3229; X64-NEXT:    vpsraw $3, %xmm0, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0x71,0xe0,0x03]
3230; X64-NEXT:    vpsraw $4, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0x89,0x71,0xe0,0x04]
3231; X64-NEXT:    vpsraw $5, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xe0,0x05]
3232; X64-NEXT:    vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
3233; X64-NEXT:    retq # encoding: [0xc3]
3234  %res0 = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3)
3235  %res1 = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 4, <8 x i16> zeroinitializer, i8 %x3)
3236  %res2 = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 5, <8 x i16> %x2, i8 -1)
3237  %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0
3238  %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> }  %res3, <8 x i16> %res1, 1
3239  %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> }  %res4, <8 x i16> %res2, 2
3240  ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5
3241}
3242
3243declare <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16>, i32, <16 x i16>, i16)
3244
3245define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_psra_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) {
3246; X86-LABEL: test_int_x86_avx512_mask_psra_wi_256:
3247; X86:       # %bb.0:
3248; X86-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
3249; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
3250; X86-NEXT:    vpsraw $3, %ymm0, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0x71,0xe0,0x03]
3251; X86-NEXT:    vpsraw $4, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0xa9,0x71,0xe0,0x04]
3252; X86-NEXT:    vpsraw $5, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xe0,0x05]
3253; X86-NEXT:    vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
3254; X86-NEXT:    retl # encoding: [0xc3]
3255;
3256; X64-LABEL: test_int_x86_avx512_mask_psra_wi_256:
3257; X64:       # %bb.0:
3258; X64-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
3259; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3260; X64-NEXT:    vpsraw $3, %ymm0, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0x71,0xe0,0x03]
3261; X64-NEXT:    vpsraw $4, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0xa9,0x71,0xe0,0x04]
3262; X64-NEXT:    vpsraw $5, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xe0,0x05]
3263; X64-NEXT:    vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
3264; X64-NEXT:    retq # encoding: [0xc3]
3265  %res = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3)
3266  %res1 = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 4, <16 x i16> zeroinitializer, i16 %x3)
3267  %res2 = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 5, <16 x i16> %x2, i16 -1)
3268  %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res, 0
3269  %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> }  %res3, <16 x i16> %res1, 1
3270  %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> }  %res4, <16 x i16> %res2, 2
3271  ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5
3272}
3273
3274declare <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16>, i32, <8 x i16>, i8)
3275
3276define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_psll_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) {
3277; X86-LABEL: test_int_x86_avx512_mask_psll_wi_128:
3278; X86:       # %bb.0:
3279; X86-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
3280; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3281; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3282; X86-NEXT:    vpsllw $3, %xmm0, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0x71,0xf0,0x03]
3283; X86-NEXT:    vpsllw $4, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0x89,0x71,0xf0,0x04]
3284; X86-NEXT:    vpsllw $5, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xf0,0x05]
3285; X86-NEXT:    vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
3286; X86-NEXT:    retl # encoding: [0xc3]
3287;
3288; X64-LABEL: test_int_x86_avx512_mask_psll_wi_128:
3289; X64:       # %bb.0:
3290; X64-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
3291; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3292; X64-NEXT:    vpsllw $3, %xmm0, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0x71,0xf0,0x03]
3293; X64-NEXT:    vpsllw $4, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0x89,0x71,0xf0,0x04]
3294; X64-NEXT:    vpsllw $5, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xf0,0x05]
3295; X64-NEXT:    vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
3296; X64-NEXT:    retq # encoding: [0xc3]
3297  %res0 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3)
3298  %res1 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 4, <8 x i16> zeroinitializer, i8 %x3)
3299  %res2 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 5, <8 x i16> %x2, i8 -1)
3300  %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0
3301  %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> }  %res3, <8 x i16> %res1, 1
3302  %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> }  %res4, <8 x i16> %res2, 2
3303  ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5
3304}
3305
3306declare <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16>, i32, <16 x i16>, i16)
3307
3308define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_psll_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) {
3309; X86-LABEL: test_int_x86_avx512_mask_psll_wi_256:
3310; X86:       # %bb.0:
3311; X86-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
3312; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
3313; X86-NEXT:    vpsllw $3, %ymm0, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0x71,0xf0,0x03]
3314; X86-NEXT:    vpsllw $4, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0xa9,0x71,0xf0,0x04]
3315; X86-NEXT:    vpsllw $5, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xf0,0x05]
3316; X86-NEXT:    vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
3317; X86-NEXT:    retl # encoding: [0xc3]
3318;
3319; X64-LABEL: test_int_x86_avx512_mask_psll_wi_256:
3320; X64:       # %bb.0:
3321; X64-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
3322; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3323; X64-NEXT:    vpsllw $3, %ymm0, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0x71,0xf0,0x03]
3324; X64-NEXT:    vpsllw $4, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0xa9,0x71,0xf0,0x04]
3325; X64-NEXT:    vpsllw $5, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xf0,0x05]
3326; X64-NEXT:    vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
3327; X64-NEXT:    retq # encoding: [0xc3]
3328  %res = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3)
3329  %res1 = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 4, <16 x i16> zeroinitializer, i16 %x3)
3330  %res2 = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 5, <16 x i16> %x2, i16 -1)
3331  %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res, 0
3332  %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> }  %res3, <16 x i16> %res1, 1
3333  %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> }  %res4, <16 x i16> %res2, 2
3334  ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5
3335}
3336
3337declare <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
3338
3339define <16 x i8>@test_int_x86_avx512_pshuf_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) {
3340; CHECK-LABEL: test_int_x86_avx512_pshuf_b_128:
3341; CHECK:       # %bb.0:
3342; CHECK-NEXT:    vpshufb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x00,0xc1]
3343; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3344  %res = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
3345  ret <16 x i8> %res
3346}
3347
3348define <16 x i8>@test_int_x86_avx512_mask_pshuf_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
3349; X86-LABEL: test_int_x86_avx512_mask_pshuf_b_128:
3350; X86:       # %bb.0:
3351; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3352; X86-NEXT:    vpshufb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x00,0xd1]
3353; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3354; X86-NEXT:    retl # encoding: [0xc3]
3355;
3356; X64-LABEL: test_int_x86_avx512_mask_pshuf_b_128:
3357; X64:       # %bb.0:
3358; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3359; X64-NEXT:    vpshufb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x00,0xd1]
3360; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3361; X64-NEXT:    retq # encoding: [0xc3]
3362  %res = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
3363  ret <16 x i8> %res
3364}
3365
3366declare <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
3367
3368define <32 x i8>@test_int_x86_avx512_pshuf_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) {
3369; CHECK-LABEL: test_int_x86_avx512_pshuf_b_256:
3370; CHECK:       # %bb.0:
3371; CHECK-NEXT:    vpshufb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x00,0xc1]
3372; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3373  %res = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
3374  ret <32 x i8> %res
3375}
3376
3377define <32 x i8>@test_int_x86_avx512_mask_pshuf_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
3378; X86-LABEL: test_int_x86_avx512_mask_pshuf_b_256:
3379; X86:       # %bb.0:
3380; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3381; X86-NEXT:    vpshufb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x00,0xd1]
3382; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3383; X86-NEXT:    retl # encoding: [0xc3]
3384;
3385; X64-LABEL: test_int_x86_avx512_mask_pshuf_b_256:
3386; X64:       # %bb.0:
3387; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3388; X64-NEXT:    vpshufb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x00,0xd1]
3389; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3390; X64-NEXT:    retq # encoding: [0xc3]
3391  %res = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
3392  ret <32 x i8> %res
3393}
3394
3395declare <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8>, <8 x i16>, i8)
3396
3397define <8 x i16>@test_int_x86_avx512_pmovzxb_w_128(<16 x i8> %x0, <8 x i16> %x1) {
3398; CHECK-LABEL: test_int_x86_avx512_pmovzxb_w_128:
3399; CHECK:       # %bb.0:
3400; CHECK-NEXT:    vpmovzxbw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x30,0xc0]
3401; CHECK-NEXT:    # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3402; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3403  %res = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 -1)
3404  ret <8 x i16> %res
3405}
3406
3407define <8 x i16>@test_int_x86_avx512_mask_pmovzxb_w_128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) {
3408; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_w_128:
3409; X86:       # %bb.0:
3410; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3411; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3412; X86-NEXT:    vpmovzxbw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x30,0xc8]
3413; X86-NEXT:    # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3414; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3415; X86-NEXT:    retl # encoding: [0xc3]
3416;
3417; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_w_128:
3418; X64:       # %bb.0:
3419; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3420; X64-NEXT:    vpmovzxbw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x30,0xc8]
3421; X64-NEXT:    # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3422; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3423; X64-NEXT:    retq # encoding: [0xc3]
3424  %res = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2)
3425  ret <8 x i16> %res
3426}
3427
3428define <8 x i16>@test_int_x86_avx512_maskz_pmovzxb_w_128(<16 x i8> %x0, i8 %x2) {
3429; X86-LABEL: test_int_x86_avx512_maskz_pmovzxb_w_128:
3430; X86:       # %bb.0:
3431; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3432; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3433; X86-NEXT:    vpmovzxbw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x30,0xc0]
3434; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3435; X86-NEXT:    retl # encoding: [0xc3]
3436;
3437; X64-LABEL: test_int_x86_avx512_maskz_pmovzxb_w_128:
3438; X64:       # %bb.0:
3439; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3440; X64-NEXT:    vpmovzxbw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x30,0xc0]
3441; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3442; X64-NEXT:    retq # encoding: [0xc3]
3443  %res = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> zeroinitializer, i8 %x2)
3444  ret <8 x i16> %res
3445}
3446
3447declare <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8>, <16 x i16>, i16)
3448
3449define <16 x i16>@test_int_x86_avx512_pmovzxb_w_256(<16 x i8> %x0, <16 x i16> %x1) {
3450; CHECK-LABEL: test_int_x86_avx512_pmovzxb_w_256:
3451; CHECK:       # %bb.0:
3452; CHECK-NEXT:    vpmovzxbw %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x30,0xc0]
3453; CHECK-NEXT:    # ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
3454; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3455  %res = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 -1)
3456  ret <16 x i16> %res
3457}
3458
3459define <16 x i16>@test_int_x86_avx512_mask_pmovzxb_w_256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) {
3460; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_w_256:
3461; X86:       # %bb.0:
3462; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3463; X86-NEXT:    vpmovzxbw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x30,0xc8]
3464; X86-NEXT:    # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
3465; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3466; X86-NEXT:    retl # encoding: [0xc3]
3467;
3468; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_w_256:
3469; X64:       # %bb.0:
3470; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3471; X64-NEXT:    vpmovzxbw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x30,0xc8]
3472; X64-NEXT:    # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
3473; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3474; X64-NEXT:    retq # encoding: [0xc3]
3475  %res = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2)
3476  ret <16 x i16> %res
3477}
3478
3479define <16 x i16>@test_int_x86_avx512_maskz_pmovzxb_w_256(<16 x i8> %x0, i16 %x2) {
3480; X86-LABEL: test_int_x86_avx512_maskz_pmovzxb_w_256:
3481; X86:       # %bb.0:
3482; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3483; X86-NEXT:    vpmovzxbw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x30,0xc0]
3484; X86-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
3485; X86-NEXT:    retl # encoding: [0xc3]
3486;
3487; X64-LABEL: test_int_x86_avx512_maskz_pmovzxb_w_256:
3488; X64:       # %bb.0:
3489; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3490; X64-NEXT:    vpmovzxbw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x30,0xc0]
3491; X64-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
3492; X64-NEXT:    retq # encoding: [0xc3]
3493  %res = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> zeroinitializer, i16 %x2)
3494  ret <16 x i16> %res
3495}
3496
3497declare <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8>, <8 x i16>, i8)
3498
3499define <8 x i16>@test_int_x86_avx512_pmovsxb_w_128(<16 x i8> %x0, <8 x i16> %x1) {
3500; CHECK-LABEL: test_int_x86_avx512_pmovsxb_w_128:
3501; CHECK:       # %bb.0:
3502; CHECK-NEXT:    vpmovsxbw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x20,0xc0]
3503; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3504  %res = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 -1)
3505  ret <8 x i16> %res
3506}
3507
3508define <8 x i16>@test_int_x86_avx512_mask_pmovsxb_w_128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) {
3509; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_w_128:
3510; X86:       # %bb.0:
3511; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3512; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3513; X86-NEXT:    vpmovsxbw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x20,0xc8]
3514; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3515; X86-NEXT:    retl # encoding: [0xc3]
3516;
3517; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_w_128:
3518; X64:       # %bb.0:
3519; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3520; X64-NEXT:    vpmovsxbw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x20,0xc8]
3521; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3522; X64-NEXT:    retq # encoding: [0xc3]
3523  %res = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2)
3524  ret <8 x i16> %res
3525}
3526
3527define <8 x i16>@test_int_x86_avx512_maskz_pmovsxb_w_128(<16 x i8> %x0, i8 %x2) {
3528; X86-LABEL: test_int_x86_avx512_maskz_pmovsxb_w_128:
3529; X86:       # %bb.0:
3530; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3531; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3532; X86-NEXT:    vpmovsxbw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x20,0xc0]
3533; X86-NEXT:    retl # encoding: [0xc3]
3534;
3535; X64-LABEL: test_int_x86_avx512_maskz_pmovsxb_w_128:
3536; X64:       # %bb.0:
3537; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3538; X64-NEXT:    vpmovsxbw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x20,0xc0]
3539; X64-NEXT:    retq # encoding: [0xc3]
3540  %res = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> zeroinitializer, i8 %x2)
3541  ret <8 x i16> %res
3542}
3543
3544declare <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8>, <16 x i16>, i16)
3545
3546define <16 x i16>@test_int_x86_avx512_pmovsxb_w_256(<16 x i8> %x0, <16 x i16> %x1) {
3547; CHECK-LABEL: test_int_x86_avx512_pmovsxb_w_256:
3548; CHECK:       # %bb.0:
3549; CHECK-NEXT:    vpmovsxbw %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x20,0xc0]
3550; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3551  %res = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 -1)
3552  ret <16 x i16> %res
3553}
3554
3555define <16 x i16>@test_int_x86_avx512_mask_pmovsxb_w_256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) {
3556; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_w_256:
3557; X86:       # %bb.0:
3558; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3559; X86-NEXT:    vpmovsxbw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x20,0xc8]
3560; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3561; X86-NEXT:    retl # encoding: [0xc3]
3562;
3563; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_w_256:
3564; X64:       # %bb.0:
3565; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3566; X64-NEXT:    vpmovsxbw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x20,0xc8]
3567; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3568; X64-NEXT:    retq # encoding: [0xc3]
3569  %res = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2)
3570  ret <16 x i16> %res
3571}
3572
3573define <16 x i16>@test_int_x86_avx512_maskz_pmovsxb_w_256(<16 x i8> %x0, i16 %x2) {
3574; X86-LABEL: test_int_x86_avx512_maskz_pmovsxb_w_256:
3575; X86:       # %bb.0:
3576; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3577; X86-NEXT:    vpmovsxbw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x20,0xc0]
3578; X86-NEXT:    retl # encoding: [0xc3]
3579;
3580; X64-LABEL: test_int_x86_avx512_maskz_pmovsxb_w_256:
3581; X64:       # %bb.0:
3582; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3583; X64-NEXT:    vpmovsxbw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x20,0xc0]
3584; X64-NEXT:    retq # encoding: [0xc3]
3585  %res = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> zeroinitializer, i16 %x2)
3586  ret <16 x i16> %res
3587}
3588
3589declare <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32>, <2 x i64>, i8)
3590
3591define <2 x i64>@test_int_x86_avx512_pmovsxd_q_128(<4 x i32> %x0, <2 x i64> %x1) {
3592; CHECK-LABEL: test_int_x86_avx512_pmovsxd_q_128:
3593; CHECK:       # %bb.0:
3594; CHECK-NEXT:    vpmovsxdq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x25,0xc0]
3595; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3596  %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 -1)
3597  ret <2 x i64> %res
3598}
3599
3600define <2 x i64>@test_int_x86_avx512_mask_pmovsxd_q_128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) {
3601; X86-LABEL: test_int_x86_avx512_mask_pmovsxd_q_128:
3602; X86:       # %bb.0:
3603; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3604; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3605; X86-NEXT:    vpmovsxdq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x25,0xc8]
3606; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3607; X86-NEXT:    retl # encoding: [0xc3]
3608;
3609; X64-LABEL: test_int_x86_avx512_mask_pmovsxd_q_128:
3610; X64:       # %bb.0:
3611; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3612; X64-NEXT:    vpmovsxdq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x25,0xc8]
3613; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3614; X64-NEXT:    retq # encoding: [0xc3]
3615  %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2)
3616  ret <2 x i64> %res
3617}
3618
3619define <2 x i64>@test_int_x86_avx512_maskz_pmovsxd_q_128(<4 x i32> %x0, i8 %x2) {
3620; X86-LABEL: test_int_x86_avx512_maskz_pmovsxd_q_128:
3621; X86:       # %bb.0:
3622; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3623; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3624; X86-NEXT:    vpmovsxdq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x25,0xc0]
3625; X86-NEXT:    retl # encoding: [0xc3]
3626;
3627; X64-LABEL: test_int_x86_avx512_maskz_pmovsxd_q_128:
3628; X64:       # %bb.0:
3629; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3630; X64-NEXT:    vpmovsxdq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x25,0xc0]
3631; X64-NEXT:    retq # encoding: [0xc3]
3632  %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> zeroinitializer, i8 %x2)
3633  ret <2 x i64> %res
3634}
3635
3636declare <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32>, <4 x i64>, i8)
3637
3638define <4 x i64>@test_int_x86_avx512_pmovsxd_q_256(<4 x i32> %x0, <4 x i64> %x1) {
3639; CHECK-LABEL: test_int_x86_avx512_pmovsxd_q_256:
3640; CHECK:       # %bb.0:
3641; CHECK-NEXT:    vpmovsxdq %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x25,0xc0]
3642; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3643  %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 -1)
3644  ret <4 x i64> %res
3645}
3646
3647define <4 x i64>@test_int_x86_avx512_mask_pmovsxd_q_256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) {
3648; X86-LABEL: test_int_x86_avx512_mask_pmovsxd_q_256:
3649; X86:       # %bb.0:
3650; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3651; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3652; X86-NEXT:    vpmovsxdq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x25,0xc8]
3653; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3654; X86-NEXT:    retl # encoding: [0xc3]
3655;
3656; X64-LABEL: test_int_x86_avx512_mask_pmovsxd_q_256:
3657; X64:       # %bb.0:
3658; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3659; X64-NEXT:    vpmovsxdq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x25,0xc8]
3660; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3661; X64-NEXT:    retq # encoding: [0xc3]
3662  %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2)
3663  ret <4 x i64> %res
3664}
3665
3666define <4 x i64>@test_int_x86_avx512_maskz_pmovsxd_q_256(<4 x i32> %x0, i8 %x2) {
3667; X86-LABEL: test_int_x86_avx512_maskz_pmovsxd_q_256:
3668; X86:       # %bb.0:
3669; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3670; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3671; X86-NEXT:    vpmovsxdq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x25,0xc0]
3672; X86-NEXT:    retl # encoding: [0xc3]
3673;
3674; X64-LABEL: test_int_x86_avx512_maskz_pmovsxd_q_256:
3675; X64:       # %bb.0:
3676; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3677; X64-NEXT:    vpmovsxdq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x25,0xc0]
3678; X64-NEXT:    retq # encoding: [0xc3]
3679  %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> zeroinitializer, i8 %x2)
3680  ret <4 x i64> %res
3681}
3682
3683declare <16 x i8> @llvm.x86.avx512.cvtmask2b.128(i16)
3684
3685define <16 x i8>@test_int_x86_avx512_cvtmask2b_128(i16 %x0) {
3686; X86-LABEL: test_int_x86_avx512_cvtmask2b_128:
3687; X86:       # %bb.0:
3688; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf8,0x90,0x44,0x24,0x04]
3689; X86-NEXT:    vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
3690; X86-NEXT:    retl # encoding: [0xc3]
3691;
3692; X64-LABEL: test_int_x86_avx512_cvtmask2b_128:
3693; X64:       # %bb.0:
3694; X64-NEXT:    kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7]
3695; X64-NEXT:    vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
3696; X64-NEXT:    retq # encoding: [0xc3]
3697  %res = call <16 x i8> @llvm.x86.avx512.cvtmask2b.128(i16 %x0)
3698  ret <16 x i8> %res
3699}
3700
3701declare <32 x i8> @llvm.x86.avx512.cvtmask2b.256(i32)
3702
3703define <32 x i8>@test_int_x86_avx512_cvtmask2b_256(i32 %x0) {
3704; X86-LABEL: test_int_x86_avx512_cvtmask2b_256:
3705; X86:       # %bb.0:
3706; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04]
3707; X86-NEXT:    vpmovm2b %k0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x28,0x28,0xc0]
3708; X86-NEXT:    retl # encoding: [0xc3]
3709;
3710; X64-LABEL: test_int_x86_avx512_cvtmask2b_256:
3711; X64:       # %bb.0:
3712; X64-NEXT:    kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7]
3713; X64-NEXT:    vpmovm2b %k0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x28,0x28,0xc0]
3714; X64-NEXT:    retq # encoding: [0xc3]
3715  %res = call <32 x i8> @llvm.x86.avx512.cvtmask2b.256(i32 %x0)
3716  ret <32 x i8> %res
3717}
3718
3719declare <8 x i16> @llvm.x86.avx512.cvtmask2w.128(i8)
3720
3721define <8 x i16>@test_int_x86_avx512_cvtmask2w_128(i8 %x0) {
3722; X86-LABEL: test_int_x86_avx512_cvtmask2w_128:
3723; X86:       # %bb.0:
3724; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3725; X86-NEXT:    kmovd %eax, %k0 # encoding: [0xc5,0xfb,0x92,0xc0]
3726; X86-NEXT:    vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0]
3727; X86-NEXT:    retl # encoding: [0xc3]
3728;
3729; X64-LABEL: test_int_x86_avx512_cvtmask2w_128:
3730; X64:       # %bb.0:
3731; X64-NEXT:    kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7]
3732; X64-NEXT:    vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0]
3733; X64-NEXT:    retq # encoding: [0xc3]
3734  %res = call <8 x i16> @llvm.x86.avx512.cvtmask2w.128(i8 %x0)
3735  ret <8 x i16> %res
3736}
3737
3738declare <16 x i16> @llvm.x86.avx512.cvtmask2w.256(i16)
3739
3740define <16 x i16>@test_int_x86_avx512_cvtmask2w_256(i16 %x0) {
3741; X86-LABEL: test_int_x86_avx512_cvtmask2w_256:
3742; X86:       # %bb.0:
3743; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf8,0x90,0x44,0x24,0x04]
3744; X86-NEXT:    vpmovm2w %k0, %ymm0 # encoding: [0x62,0xf2,0xfe,0x28,0x28,0xc0]
3745; X86-NEXT:    retl # encoding: [0xc3]
3746;
3747; X64-LABEL: test_int_x86_avx512_cvtmask2w_256:
3748; X64:       # %bb.0:
3749; X64-NEXT:    kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7]
3750; X64-NEXT:    vpmovm2w %k0, %ymm0 # encoding: [0x62,0xf2,0xfe,0x28,0x28,0xc0]
3751; X64-NEXT:    retq # encoding: [0xc3]
3752  %res = call <16 x i16> @llvm.x86.avx512.cvtmask2w.256(i16 %x0)
3753  ret <16 x i16> %res
3754}
3755define <8 x i16> @test_mask_packs_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
3756; CHECK-LABEL: test_mask_packs_epi32_rr_128:
3757; CHECK:       # %bb.0:
3758; CHECK-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0xc1]
3759; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3760  %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
3761  ret <8 x i16> %res
3762}
3763
3764define <8 x i16> @test_mask_packs_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) {
3765; X86-LABEL: test_mask_packs_epi32_rrk_128:
3766; X86:       # %bb.0:
3767; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3768; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3769; X86-NEXT:    vpackssdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1]
3770; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3771; X86-NEXT:    retl # encoding: [0xc3]
3772;
3773; X64-LABEL: test_mask_packs_epi32_rrk_128:
3774; X64:       # %bb.0:
3775; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3776; X64-NEXT:    vpackssdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1]
3777; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3778; X64-NEXT:    retq # encoding: [0xc3]
3779  %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
3780  ret <8 x i16> %res
3781}
3782
3783define <8 x i16> @test_mask_packs_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
3784; X86-LABEL: test_mask_packs_epi32_rrkz_128:
3785; X86:       # %bb.0:
3786; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3787; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3788; X86-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1]
3789; X86-NEXT:    retl # encoding: [0xc3]
3790;
3791; X64-LABEL: test_mask_packs_epi32_rrkz_128:
3792; X64:       # %bb.0:
3793; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3794; X64-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1]
3795; X64-NEXT:    retq # encoding: [0xc3]
3796  %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
3797  ret <8 x i16> %res
3798}
3799
3800define <8 x i16> @test_mask_packs_epi32_rm_128(<4 x i32> %a, ptr %ptr_b) {
3801; X86-LABEL: test_mask_packs_epi32_rm_128:
3802; X86:       # %bb.0:
3803; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3804; X86-NEXT:    vpackssdw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0x00]
3805; X86-NEXT:    retl # encoding: [0xc3]
3806;
3807; X64-LABEL: test_mask_packs_epi32_rm_128:
3808; X64:       # %bb.0:
3809; X64-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0x07]
3810; X64-NEXT:    retq # encoding: [0xc3]
3811  %b = load <4 x i32>, ptr %ptr_b
3812  %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
3813  ret <8 x i16> %res
3814}
3815
3816define <8 x i16> @test_mask_packs_epi32_rmk_128(<4 x i32> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) {
3817; X86-LABEL: test_mask_packs_epi32_rmk_128:
3818; X86:       # %bb.0:
3819; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3820; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3821; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
3822; X86-NEXT:    vpackssdw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x08]
3823; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3824; X86-NEXT:    retl # encoding: [0xc3]
3825;
3826; X64-LABEL: test_mask_packs_epi32_rmk_128:
3827; X64:       # %bb.0:
3828; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3829; X64-NEXT:    vpackssdw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x0f]
3830; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3831; X64-NEXT:    retq # encoding: [0xc3]
3832  %b = load <4 x i32>, ptr %ptr_b
3833  %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
3834  ret <8 x i16> %res
3835}
3836
3837define <8 x i16> @test_mask_packs_epi32_rmkz_128(<4 x i32> %a, ptr %ptr_b, i8 %mask) {
3838; X86-LABEL: test_mask_packs_epi32_rmkz_128:
3839; X86:       # %bb.0:
3840; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3841; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3842; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
3843; X86-NEXT:    vpackssdw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x00]
3844; X86-NEXT:    retl # encoding: [0xc3]
3845;
3846; X64-LABEL: test_mask_packs_epi32_rmkz_128:
3847; X64:       # %bb.0:
3848; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3849; X64-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x07]
3850; X64-NEXT:    retq # encoding: [0xc3]
3851  %b = load <4 x i32>, ptr %ptr_b
3852  %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
3853  ret <8 x i16> %res
3854}
3855
3856define <8 x i16> @test_mask_packs_epi32_rmb_128(<4 x i32> %a, ptr %ptr_b) {
3857; X86-LABEL: test_mask_packs_epi32_rmb_128:
3858; X86:       # %bb.0:
3859; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3860; X86-NEXT:    vpackssdw (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x00]
3861; X86-NEXT:    retl # encoding: [0xc3]
3862;
3863; X64-LABEL: test_mask_packs_epi32_rmb_128:
3864; X64:       # %bb.0:
3865; X64-NEXT:    vpackssdw (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x07]
3866; X64-NEXT:    retq # encoding: [0xc3]
3867  %q = load i32, ptr %ptr_b
3868  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
3869  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
3870  %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
3871  ret <8 x i16> %res
3872}
3873
3874define <8 x i16> @test_mask_packs_epi32_rmbk_128(<4 x i32> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) {
3875; X86-LABEL: test_mask_packs_epi32_rmbk_128:
3876; X86:       # %bb.0:
3877; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3878; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3879; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
3880; X86-NEXT:    vpackssdw (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x08]
3881; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3882; X86-NEXT:    retl # encoding: [0xc3]
3883;
3884; X64-LABEL: test_mask_packs_epi32_rmbk_128:
3885; X64:       # %bb.0:
3886; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3887; X64-NEXT:    vpackssdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x0f]
3888; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3889; X64-NEXT:    retq # encoding: [0xc3]
3890  %q = load i32, ptr %ptr_b
3891  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
3892  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
3893  %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
3894  ret <8 x i16> %res
3895}
3896
3897define <8 x i16> @test_mask_packs_epi32_rmbkz_128(<4 x i32> %a, ptr %ptr_b, i8 %mask) {
3898; X86-LABEL: test_mask_packs_epi32_rmbkz_128:
3899; X86:       # %bb.0:
3900; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3901; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3902; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
3903; X86-NEXT:    vpackssdw (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x00]
3904; X86-NEXT:    retl # encoding: [0xc3]
3905;
3906; X64-LABEL: test_mask_packs_epi32_rmbkz_128:
3907; X64:       # %bb.0:
3908; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3909; X64-NEXT:    vpackssdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x07]
3910; X64-NEXT:    retq # encoding: [0xc3]
3911  %q = load i32, ptr %ptr_b
3912  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
3913  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
3914  %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
3915  ret <8 x i16> %res
3916}
3917
3918declare <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32>, <4 x i32>, <8 x i16>, i8)
3919
3920define <16 x i16> @test_mask_packs_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
3921; CHECK-LABEL: test_mask_packs_epi32_rr_256:
3922; CHECK:       # %bb.0:
3923; CHECK-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0xc1]
3924; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3925  %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
3926  ret <16 x i16> %res
3927}
3928
3929define <16 x i16> @test_mask_packs_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) {
3930; X86-LABEL: test_mask_packs_epi32_rrk_256:
3931; X86:       # %bb.0:
3932; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3933; X86-NEXT:    vpackssdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1]
3934; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3935; X86-NEXT:    retl # encoding: [0xc3]
3936;
3937; X64-LABEL: test_mask_packs_epi32_rrk_256:
3938; X64:       # %bb.0:
3939; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3940; X64-NEXT:    vpackssdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1]
3941; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3942; X64-NEXT:    retq # encoding: [0xc3]
3943  %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
3944  ret <16 x i16> %res
3945}
3946
3947define <16 x i16> @test_mask_packs_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) {
3948; X86-LABEL: test_mask_packs_epi32_rrkz_256:
3949; X86:       # %bb.0:
3950; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3951; X86-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1]
3952; X86-NEXT:    retl # encoding: [0xc3]
3953;
3954; X64-LABEL: test_mask_packs_epi32_rrkz_256:
3955; X64:       # %bb.0:
3956; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3957; X64-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1]
3958; X64-NEXT:    retq # encoding: [0xc3]
3959  %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
3960  ret <16 x i16> %res
3961}
3962
3963define <16 x i16> @test_mask_packs_epi32_rm_256(<8 x i32> %a, ptr %ptr_b) {
3964; X86-LABEL: test_mask_packs_epi32_rm_256:
3965; X86:       # %bb.0:
3966; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3967; X86-NEXT:    vpackssdw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0x00]
3968; X86-NEXT:    retl # encoding: [0xc3]
3969;
3970; X64-LABEL: test_mask_packs_epi32_rm_256:
3971; X64:       # %bb.0:
3972; X64-NEXT:    vpackssdw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0x07]
3973; X64-NEXT:    retq # encoding: [0xc3]
3974  %b = load <8 x i32>, ptr %ptr_b
3975  %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
3976  ret <16 x i16> %res
3977}
3978
3979define <16 x i16> @test_mask_packs_epi32_rmk_256(<8 x i32> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) {
3980; X86-LABEL: test_mask_packs_epi32_rmk_256:
3981; X86:       # %bb.0:
3982; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3983; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
3984; X86-NEXT:    vpackssdw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x08]
3985; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3986; X86-NEXT:    retl # encoding: [0xc3]
3987;
3988; X64-LABEL: test_mask_packs_epi32_rmk_256:
3989; X64:       # %bb.0:
3990; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3991; X64-NEXT:    vpackssdw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x0f]
3992; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3993; X64-NEXT:    retq # encoding: [0xc3]
3994  %b = load <8 x i32>, ptr %ptr_b
3995  %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
3996  ret <16 x i16> %res
3997}
3998
3999define <16 x i16> @test_mask_packs_epi32_rmkz_256(<8 x i32> %a, ptr %ptr_b, i16 %mask) {
4000; X86-LABEL: test_mask_packs_epi32_rmkz_256:
4001; X86:       # %bb.0:
4002; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4003; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4004; X86-NEXT:    vpackssdw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x00]
4005; X86-NEXT:    retl # encoding: [0xc3]
4006;
4007; X64-LABEL: test_mask_packs_epi32_rmkz_256:
4008; X64:       # %bb.0:
4009; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4010; X64-NEXT:    vpackssdw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x07]
4011; X64-NEXT:    retq # encoding: [0xc3]
4012  %b = load <8 x i32>, ptr %ptr_b
4013  %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
4014  ret <16 x i16> %res
4015}
4016
4017define <16 x i16> @test_mask_packs_epi32_rmb_256(<8 x i32> %a, ptr %ptr_b) {
4018; X86-LABEL: test_mask_packs_epi32_rmb_256:
4019; X86:       # %bb.0:
4020; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4021; X86-NEXT:    vpackssdw (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x00]
4022; X86-NEXT:    retl # encoding: [0xc3]
4023;
4024; X64-LABEL: test_mask_packs_epi32_rmb_256:
4025; X64:       # %bb.0:
4026; X64-NEXT:    vpackssdw (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x07]
4027; X64-NEXT:    retq # encoding: [0xc3]
4028  %q = load i32, ptr %ptr_b
4029  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4030  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4031  %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
4032  ret <16 x i16> %res
4033}
4034
4035define <16 x i16> @test_mask_packs_epi32_rmbk_256(<8 x i32> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) {
4036; X86-LABEL: test_mask_packs_epi32_rmbk_256:
4037; X86:       # %bb.0:
4038; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4039; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4040; X86-NEXT:    vpackssdw (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x08]
4041; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4042; X86-NEXT:    retl # encoding: [0xc3]
4043;
4044; X64-LABEL: test_mask_packs_epi32_rmbk_256:
4045; X64:       # %bb.0:
4046; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4047; X64-NEXT:    vpackssdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x0f]
4048; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4049; X64-NEXT:    retq # encoding: [0xc3]
4050  %q = load i32, ptr %ptr_b
4051  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4052  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4053  %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
4054  ret <16 x i16> %res
4055}
4056
4057define <16 x i16> @test_mask_packs_epi32_rmbkz_256(<8 x i32> %a, ptr %ptr_b, i16 %mask) {
4058; X86-LABEL: test_mask_packs_epi32_rmbkz_256:
4059; X86:       # %bb.0:
4060; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4061; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4062; X86-NEXT:    vpackssdw (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x00]
4063; X86-NEXT:    retl # encoding: [0xc3]
4064;
4065; X64-LABEL: test_mask_packs_epi32_rmbkz_256:
4066; X64:       # %bb.0:
4067; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4068; X64-NEXT:    vpackssdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x07]
4069; X64-NEXT:    retq # encoding: [0xc3]
4070  %q = load i32, ptr %ptr_b
4071  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4072  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4073  %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
4074  ret <16 x i16> %res
4075}
4076
4077declare <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32>, <8 x i32>, <16 x i16>, i16)
4078
4079define <16 x i8> @test_mask_packs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
4080; CHECK-LABEL: test_mask_packs_epi16_rr_128:
4081; CHECK:       # %bb.0:
4082; CHECK-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0xc1]
4083; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4084  %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
4085  ret <16 x i8> %res
4086}
4087
4088define <16 x i8> @test_mask_packs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) {
4089; X86-LABEL: test_mask_packs_epi16_rrk_128:
4090; X86:       # %bb.0:
4091; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4092; X86-NEXT:    vpacksswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0xd1]
4093; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
4094; X86-NEXT:    retl # encoding: [0xc3]
4095;
4096; X64-LABEL: test_mask_packs_epi16_rrk_128:
4097; X64:       # %bb.0:
4098; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4099; X64-NEXT:    vpacksswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0xd1]
4100; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
4101; X64-NEXT:    retq # encoding: [0xc3]
4102  %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
4103  ret <16 x i8> %res
4104}
4105
4106define <16 x i8> @test_mask_packs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) {
4107; X86-LABEL: test_mask_packs_epi16_rrkz_128:
4108; X86:       # %bb.0:
4109; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4110; X86-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0xc1]
4111; X86-NEXT:    retl # encoding: [0xc3]
4112;
4113; X64-LABEL: test_mask_packs_epi16_rrkz_128:
4114; X64:       # %bb.0:
4115; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4116; X64-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0xc1]
4117; X64-NEXT:    retq # encoding: [0xc3]
4118  %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
4119  ret <16 x i8> %res
4120}
4121
4122define <16 x i8> @test_mask_packs_epi16_rm_128(<8 x i16> %a, ptr %ptr_b) {
4123; X86-LABEL: test_mask_packs_epi16_rm_128:
4124; X86:       # %bb.0:
4125; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4126; X86-NEXT:    vpacksswb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0x00]
4127; X86-NEXT:    retl # encoding: [0xc3]
4128;
4129; X64-LABEL: test_mask_packs_epi16_rm_128:
4130; X64:       # %bb.0:
4131; X64-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0x07]
4132; X64-NEXT:    retq # encoding: [0xc3]
4133  %b = load <8 x i16>, ptr %ptr_b
4134  %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
4135  ret <16 x i8> %res
4136}
4137
4138define <16 x i8> @test_mask_packs_epi16_rmk_128(<8 x i16> %a, ptr %ptr_b, <16 x i8> %passThru, i16 %mask) {
4139; X86-LABEL: test_mask_packs_epi16_rmk_128:
4140; X86:       # %bb.0:
4141; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4142; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4143; X86-NEXT:    vpacksswb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0x08]
4144; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4145; X86-NEXT:    retl # encoding: [0xc3]
4146;
4147; X64-LABEL: test_mask_packs_epi16_rmk_128:
4148; X64:       # %bb.0:
4149; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4150; X64-NEXT:    vpacksswb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0x0f]
4151; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4152; X64-NEXT:    retq # encoding: [0xc3]
4153  %b = load <8 x i16>, ptr %ptr_b
4154  %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
4155  ret <16 x i8> %res
4156}
4157
4158define <16 x i8> @test_mask_packs_epi16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i16 %mask) {
4159; X86-LABEL: test_mask_packs_epi16_rmkz_128:
4160; X86:       # %bb.0:
4161; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4162; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4163; X86-NEXT:    vpacksswb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0x00]
4164; X86-NEXT:    retl # encoding: [0xc3]
4165;
4166; X64-LABEL: test_mask_packs_epi16_rmkz_128:
4167; X64:       # %bb.0:
4168; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4169; X64-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0x07]
4170; X64-NEXT:    retq # encoding: [0xc3]
4171  %b = load <8 x i16>, ptr %ptr_b
4172  %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
4173  ret <16 x i8> %res
4174}
4175
4176declare <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16>, <8 x i16>, <16 x i8>, i16)
4177
4178define <32 x i8> @test_mask_packs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
4179; CHECK-LABEL: test_mask_packs_epi16_rr_256:
4180; CHECK:       # %bb.0:
4181; CHECK-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0xc1]
4182; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4183  %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
4184  ret <32 x i8> %res
4185}
4186
4187define <32 x i8> @test_mask_packs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) {
4188; X86-LABEL: test_mask_packs_epi16_rrk_256:
4189; X86:       # %bb.0:
4190; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
4191; X86-NEXT:    vpacksswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0xd1]
4192; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4193; X86-NEXT:    retl # encoding: [0xc3]
4194;
4195; X64-LABEL: test_mask_packs_epi16_rrk_256:
4196; X64:       # %bb.0:
4197; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4198; X64-NEXT:    vpacksswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0xd1]
4199; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4200; X64-NEXT:    retq # encoding: [0xc3]
4201  %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
4202  ret <32 x i8> %res
4203}
4204
4205define <32 x i8> @test_mask_packs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) {
4206; X86-LABEL: test_mask_packs_epi16_rrkz_256:
4207; X86:       # %bb.0:
4208; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
4209; X86-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0xc1]
4210; X86-NEXT:    retl # encoding: [0xc3]
4211;
4212; X64-LABEL: test_mask_packs_epi16_rrkz_256:
4213; X64:       # %bb.0:
4214; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4215; X64-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0xc1]
4216; X64-NEXT:    retq # encoding: [0xc3]
4217  %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
4218  ret <32 x i8> %res
4219}
4220
4221define <32 x i8> @test_mask_packs_epi16_rm_256(<16 x i16> %a, ptr %ptr_b) {
4222; X86-LABEL: test_mask_packs_epi16_rm_256:
4223; X86:       # %bb.0:
4224; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4225; X86-NEXT:    vpacksswb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0x00]
4226; X86-NEXT:    retl # encoding: [0xc3]
4227;
4228; X64-LABEL: test_mask_packs_epi16_rm_256:
4229; X64:       # %bb.0:
4230; X64-NEXT:    vpacksswb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0x07]
4231; X64-NEXT:    retq # encoding: [0xc3]
4232  %b = load <16 x i16>, ptr %ptr_b
4233  %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
4234  ret <32 x i8> %res
4235}
4236
4237define <32 x i8> @test_mask_packs_epi16_rmk_256(<16 x i16> %a, ptr %ptr_b, <32 x i8> %passThru, i32 %mask) {
4238; X86-LABEL: test_mask_packs_epi16_rmk_256:
4239; X86:       # %bb.0:
4240; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4241; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
4242; X86-NEXT:    vpacksswb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0x08]
4243; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4244; X86-NEXT:    retl # encoding: [0xc3]
4245;
4246; X64-LABEL: test_mask_packs_epi16_rmk_256:
4247; X64:       # %bb.0:
4248; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4249; X64-NEXT:    vpacksswb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0x0f]
4250; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4251; X64-NEXT:    retq # encoding: [0xc3]
4252  %b = load <16 x i16>, ptr %ptr_b
4253  %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
4254  ret <32 x i8> %res
4255}
4256
4257define <32 x i8> @test_mask_packs_epi16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i32 %mask) {
4258; X86-LABEL: test_mask_packs_epi16_rmkz_256:
4259; X86:       # %bb.0:
4260; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4261; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
4262; X86-NEXT:    vpacksswb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0x00]
4263; X86-NEXT:    retl # encoding: [0xc3]
4264;
4265; X64-LABEL: test_mask_packs_epi16_rmkz_256:
4266; X64:       # %bb.0:
4267; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4268; X64-NEXT:    vpacksswb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0x07]
4269; X64-NEXT:    retq # encoding: [0xc3]
4270  %b = load <16 x i16>, ptr %ptr_b
4271  %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
4272  ret <32 x i8> %res
4273}
4274
4275declare <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16>, <16 x i16>, <32 x i8>, i32)
4276
4277
4278define <8 x i16> @test_mask_packus_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
4279; CHECK-LABEL: test_mask_packus_epi32_rr_128:
4280; CHECK:       # %bb.0:
4281; CHECK-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0xc1]
4282; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4283  %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
4284  ret <8 x i16> %res
4285}
4286
4287define <8 x i16> @test_mask_packus_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) {
4288; X86-LABEL: test_mask_packus_epi32_rrk_128:
4289; X86:       # %bb.0:
4290; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4291; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
4292; X86-NEXT:    vpackusdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0xd1]
4293; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
4294; X86-NEXT:    retl # encoding: [0xc3]
4295;
4296; X64-LABEL: test_mask_packus_epi32_rrk_128:
4297; X64:       # %bb.0:
4298; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4299; X64-NEXT:    vpackusdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0xd1]
4300; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
4301; X64-NEXT:    retq # encoding: [0xc3]
4302  %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
4303  ret <8 x i16> %res
4304}
4305
4306define <8 x i16> @test_mask_packus_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
4307; X86-LABEL: test_mask_packus_epi32_rrkz_128:
4308; X86:       # %bb.0:
4309; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4310; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
4311; X86-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0xc1]
4312; X86-NEXT:    retl # encoding: [0xc3]
4313;
4314; X64-LABEL: test_mask_packus_epi32_rrkz_128:
4315; X64:       # %bb.0:
4316; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4317; X64-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0xc1]
4318; X64-NEXT:    retq # encoding: [0xc3]
4319  %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
4320  ret <8 x i16> %res
4321}
4322
4323define <8 x i16> @test_mask_packus_epi32_rm_128(<4 x i32> %a, ptr %ptr_b) {
4324; X86-LABEL: test_mask_packus_epi32_rm_128:
4325; X86:       # %bb.0:
4326; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4327; X86-NEXT:    vpackusdw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0x00]
4328; X86-NEXT:    retl # encoding: [0xc3]
4329;
4330; X64-LABEL: test_mask_packus_epi32_rm_128:
4331; X64:       # %bb.0:
4332; X64-NEXT:    vpackusdw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0x07]
4333; X64-NEXT:    retq # encoding: [0xc3]
4334  %b = load <4 x i32>, ptr %ptr_b
4335  %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
4336  ret <8 x i16> %res
4337}
4338
4339define <8 x i16> @test_mask_packus_epi32_rmk_128(<4 x i32> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) {
4340; X86-LABEL: test_mask_packus_epi32_rmk_128:
4341; X86:       # %bb.0:
4342; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4343; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4344; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
4345; X86-NEXT:    vpackusdw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0x08]
4346; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4347; X86-NEXT:    retl # encoding: [0xc3]
4348;
4349; X64-LABEL: test_mask_packus_epi32_rmk_128:
4350; X64:       # %bb.0:
4351; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4352; X64-NEXT:    vpackusdw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0x0f]
4353; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4354; X64-NEXT:    retq # encoding: [0xc3]
4355  %b = load <4 x i32>, ptr %ptr_b
4356  %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
4357  ret <8 x i16> %res
4358}
4359
4360define <8 x i16> @test_mask_packus_epi32_rmkz_128(<4 x i32> %a, ptr %ptr_b, i8 %mask) {
4361; X86-LABEL: test_mask_packus_epi32_rmkz_128:
4362; X86:       # %bb.0:
4363; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4364; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4365; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
4366; X86-NEXT:    vpackusdw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0x00]
4367; X86-NEXT:    retl # encoding: [0xc3]
4368;
4369; X64-LABEL: test_mask_packus_epi32_rmkz_128:
4370; X64:       # %bb.0:
4371; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4372; X64-NEXT:    vpackusdw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0x07]
4373; X64-NEXT:    retq # encoding: [0xc3]
4374  %b = load <4 x i32>, ptr %ptr_b
4375  %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
4376  ret <8 x i16> %res
4377}
4378
4379define <8 x i16> @test_mask_packus_epi32_rmb_128(<4 x i32> %a, ptr %ptr_b) {
4380; X86-LABEL: test_mask_packus_epi32_rmb_128:
4381; X86:       # %bb.0:
4382; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4383; X86-NEXT:    vpackusdw (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0x2b,0x00]
4384; X86-NEXT:    retl # encoding: [0xc3]
4385;
4386; X64-LABEL: test_mask_packus_epi32_rmb_128:
4387; X64:       # %bb.0:
4388; X64-NEXT:    vpackusdw (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0x2b,0x07]
4389; X64-NEXT:    retq # encoding: [0xc3]
4390  %q = load i32, ptr %ptr_b
4391  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
4392  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
4393  %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
4394  ret <8 x i16> %res
4395}
4396
4397define <8 x i16> @test_mask_packus_epi32_rmbk_128(<4 x i32> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) {
4398; X86-LABEL: test_mask_packus_epi32_rmbk_128:
4399; X86:       # %bb.0:
4400; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4401; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4402; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
4403; X86-NEXT:    vpackusdw (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0x2b,0x08]
4404; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4405; X86-NEXT:    retl # encoding: [0xc3]
4406;
4407; X64-LABEL: test_mask_packus_epi32_rmbk_128:
4408; X64:       # %bb.0:
4409; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4410; X64-NEXT:    vpackusdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0x2b,0x0f]
4411; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4412; X64-NEXT:    retq # encoding: [0xc3]
4413  %q = load i32, ptr %ptr_b
4414  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
4415  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
4416  %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
4417  ret <8 x i16> %res
4418}
4419
4420define <8 x i16> @test_mask_packus_epi32_rmbkz_128(<4 x i32> %a, ptr %ptr_b, i8 %mask) {
4421; X86-LABEL: test_mask_packus_epi32_rmbkz_128:
4422; X86:       # %bb.0:
4423; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4424; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4425; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
4426; X86-NEXT:    vpackusdw (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0x2b,0x00]
4427; X86-NEXT:    retl # encoding: [0xc3]
4428;
4429; X64-LABEL: test_mask_packus_epi32_rmbkz_128:
4430; X64:       # %bb.0:
4431; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4432; X64-NEXT:    vpackusdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0x2b,0x07]
4433; X64-NEXT:    retq # encoding: [0xc3]
4434  %q = load i32, ptr %ptr_b
4435  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
4436  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
4437  %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
4438  ret <8 x i16> %res
4439}
4440
4441declare <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32>, <4 x i32>, <8 x i16>, i8)
4442
4443define <16 x i16> @test_mask_packus_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
4444; CHECK-LABEL: test_mask_packus_epi32_rr_256:
4445; CHECK:       # %bb.0:
4446; CHECK-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0xc1]
4447; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4448  %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
4449  ret <16 x i16> %res
4450}
4451
4452define <16 x i16> @test_mask_packus_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) {
4453; X86-LABEL: test_mask_packus_epi32_rrk_256:
4454; X86:       # %bb.0:
4455; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4456; X86-NEXT:    vpackusdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0xd1]
4457; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4458; X86-NEXT:    retl # encoding: [0xc3]
4459;
4460; X64-LABEL: test_mask_packus_epi32_rrk_256:
4461; X64:       # %bb.0:
4462; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4463; X64-NEXT:    vpackusdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0xd1]
4464; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4465; X64-NEXT:    retq # encoding: [0xc3]
4466  %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
4467  ret <16 x i16> %res
4468}
4469
4470define <16 x i16> @test_mask_packus_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) {
4471; X86-LABEL: test_mask_packus_epi32_rrkz_256:
4472; X86:       # %bb.0:
4473; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4474; X86-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0xc1]
4475; X86-NEXT:    retl # encoding: [0xc3]
4476;
4477; X64-LABEL: test_mask_packus_epi32_rrkz_256:
4478; X64:       # %bb.0:
4479; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4480; X64-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0xc1]
4481; X64-NEXT:    retq # encoding: [0xc3]
4482  %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
4483  ret <16 x i16> %res
4484}
4485
4486define <16 x i16> @test_mask_packus_epi32_rm_256(<8 x i32> %a, ptr %ptr_b) {
4487; X86-LABEL: test_mask_packus_epi32_rm_256:
4488; X86:       # %bb.0:
4489; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4490; X86-NEXT:    vpackusdw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0x00]
4491; X86-NEXT:    retl # encoding: [0xc3]
4492;
4493; X64-LABEL: test_mask_packus_epi32_rm_256:
4494; X64:       # %bb.0:
4495; X64-NEXT:    vpackusdw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0x07]
4496; X64-NEXT:    retq # encoding: [0xc3]
4497  %b = load <8 x i32>, ptr %ptr_b
4498  %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
4499  ret <16 x i16> %res
4500}
4501
4502define <16 x i16> @test_mask_packus_epi32_rmk_256(<8 x i32> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) {
4503; X86-LABEL: test_mask_packus_epi32_rmk_256:
4504; X86:       # %bb.0:
4505; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4506; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4507; X86-NEXT:    vpackusdw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0x08]
4508; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4509; X86-NEXT:    retl # encoding: [0xc3]
4510;
4511; X64-LABEL: test_mask_packus_epi32_rmk_256:
4512; X64:       # %bb.0:
4513; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4514; X64-NEXT:    vpackusdw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0x0f]
4515; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4516; X64-NEXT:    retq # encoding: [0xc3]
4517  %b = load <8 x i32>, ptr %ptr_b
4518  %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
4519  ret <16 x i16> %res
4520}
4521
4522define <16 x i16> @test_mask_packus_epi32_rmkz_256(<8 x i32> %a, ptr %ptr_b, i16 %mask) {
4523; X86-LABEL: test_mask_packus_epi32_rmkz_256:
4524; X86:       # %bb.0:
4525; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4526; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4527; X86-NEXT:    vpackusdw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0x00]
4528; X86-NEXT:    retl # encoding: [0xc3]
4529;
4530; X64-LABEL: test_mask_packus_epi32_rmkz_256:
4531; X64:       # %bb.0:
4532; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4533; X64-NEXT:    vpackusdw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0x07]
4534; X64-NEXT:    retq # encoding: [0xc3]
4535  %b = load <8 x i32>, ptr %ptr_b
4536  %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
4537  ret <16 x i16> %res
4538}
4539
4540define <16 x i16> @test_mask_packus_epi32_rmb_256(<8 x i32> %a, ptr %ptr_b) {
4541; X86-LABEL: test_mask_packus_epi32_rmb_256:
4542; X86:       # %bb.0:
4543; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4544; X86-NEXT:    vpackusdw (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x38,0x2b,0x00]
4545; X86-NEXT:    retl # encoding: [0xc3]
4546;
4547; X64-LABEL: test_mask_packus_epi32_rmb_256:
4548; X64:       # %bb.0:
4549; X64-NEXT:    vpackusdw (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x38,0x2b,0x07]
4550; X64-NEXT:    retq # encoding: [0xc3]
4551  %q = load i32, ptr %ptr_b
4552  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4553  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4554  %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
4555  ret <16 x i16> %res
4556}
4557
4558define <16 x i16> @test_mask_packus_epi32_rmbk_256(<8 x i32> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) {
4559; X86-LABEL: test_mask_packus_epi32_rmbk_256:
4560; X86:       # %bb.0:
4561; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4562; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4563; X86-NEXT:    vpackusdw (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x39,0x2b,0x08]
4564; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4565; X86-NEXT:    retl # encoding: [0xc3]
4566;
4567; X64-LABEL: test_mask_packus_epi32_rmbk_256:
4568; X64:       # %bb.0:
4569; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4570; X64-NEXT:    vpackusdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x39,0x2b,0x0f]
4571; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4572; X64-NEXT:    retq # encoding: [0xc3]
4573  %q = load i32, ptr %ptr_b
4574  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4575  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4576  %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
4577  ret <16 x i16> %res
4578}
4579
4580define <16 x i16> @test_mask_packus_epi32_rmbkz_256(<8 x i32> %a, ptr %ptr_b, i16 %mask) {
4581; X86-LABEL: test_mask_packus_epi32_rmbkz_256:
4582; X86:       # %bb.0:
4583; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4584; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4585; X86-NEXT:    vpackusdw (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xb9,0x2b,0x00]
4586; X86-NEXT:    retl # encoding: [0xc3]
4587;
4588; X64-LABEL: test_mask_packus_epi32_rmbkz_256:
4589; X64:       # %bb.0:
4590; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4591; X64-NEXT:    vpackusdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xb9,0x2b,0x07]
4592; X64-NEXT:    retq # encoding: [0xc3]
4593  %q = load i32, ptr %ptr_b
4594  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4595  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4596  %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
4597  ret <16 x i16> %res
4598}
4599
4600declare <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32>, <8 x i32>, <16 x i16>, i16)
4601
4602define <16 x i8> @test_mask_packus_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
4603; CHECK-LABEL: test_mask_packus_epi16_rr_128:
4604; CHECK:       # %bb.0:
4605; CHECK-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0xc1]
4606; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4607  %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
4608  ret <16 x i8> %res
4609}
4610
4611define <16 x i8> @test_mask_packus_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) {
4612; X86-LABEL: test_mask_packus_epi16_rrk_128:
4613; X86:       # %bb.0:
4614; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4615; X86-NEXT:    vpackuswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0xd1]
4616; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
4617; X86-NEXT:    retl # encoding: [0xc3]
4618;
4619; X64-LABEL: test_mask_packus_epi16_rrk_128:
4620; X64:       # %bb.0:
4621; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4622; X64-NEXT:    vpackuswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0xd1]
4623; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
4624; X64-NEXT:    retq # encoding: [0xc3]
4625  %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
4626  ret <16 x i8> %res
4627}
4628
4629define <16 x i8> @test_mask_packus_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) {
4630; X86-LABEL: test_mask_packus_epi16_rrkz_128:
4631; X86:       # %bb.0:
4632; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4633; X86-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0xc1]
4634; X86-NEXT:    retl # encoding: [0xc3]
4635;
4636; X64-LABEL: test_mask_packus_epi16_rrkz_128:
4637; X64:       # %bb.0:
4638; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4639; X64-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0xc1]
4640; X64-NEXT:    retq # encoding: [0xc3]
4641  %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
4642  ret <16 x i8> %res
4643}
4644
4645define <16 x i8> @test_mask_packus_epi16_rm_128(<8 x i16> %a, ptr %ptr_b) {
4646; X86-LABEL: test_mask_packus_epi16_rm_128:
4647; X86:       # %bb.0:
4648; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4649; X86-NEXT:    vpackuswb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0x00]
4650; X86-NEXT:    retl # encoding: [0xc3]
4651;
4652; X64-LABEL: test_mask_packus_epi16_rm_128:
4653; X64:       # %bb.0:
4654; X64-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0x07]
4655; X64-NEXT:    retq # encoding: [0xc3]
4656  %b = load <8 x i16>, ptr %ptr_b
4657  %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
4658  ret <16 x i8> %res
4659}
4660
4661define <16 x i8> @test_mask_packus_epi16_rmk_128(<8 x i16> %a, ptr %ptr_b, <16 x i8> %passThru, i16 %mask) {
4662; X86-LABEL: test_mask_packus_epi16_rmk_128:
4663; X86:       # %bb.0:
4664; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4665; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4666; X86-NEXT:    vpackuswb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0x08]
4667; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4668; X86-NEXT:    retl # encoding: [0xc3]
4669;
4670; X64-LABEL: test_mask_packus_epi16_rmk_128:
4671; X64:       # %bb.0:
4672; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4673; X64-NEXT:    vpackuswb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0x0f]
4674; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4675; X64-NEXT:    retq # encoding: [0xc3]
4676  %b = load <8 x i16>, ptr %ptr_b
4677  %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
4678  ret <16 x i8> %res
4679}
4680
4681define <16 x i8> @test_mask_packus_epi16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i16 %mask) {
4682; X86-LABEL: test_mask_packus_epi16_rmkz_128:
4683; X86:       # %bb.0:
4684; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4685; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4686; X86-NEXT:    vpackuswb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0x00]
4687; X86-NEXT:    retl # encoding: [0xc3]
4688;
4689; X64-LABEL: test_mask_packus_epi16_rmkz_128:
4690; X64:       # %bb.0:
4691; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4692; X64-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0x07]
4693; X64-NEXT:    retq # encoding: [0xc3]
4694  %b = load <8 x i16>, ptr %ptr_b
4695  %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
4696  ret <16 x i8> %res
4697}
4698
4699declare <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16>, <8 x i16>, <16 x i8>, i16)
4700
4701define <32 x i8> @test_mask_packus_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
4702; CHECK-LABEL: test_mask_packus_epi16_rr_256:
4703; CHECK:       # %bb.0:
4704; CHECK-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0xc1]
4705; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4706  %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
4707  ret <32 x i8> %res
4708}
4709
4710define <32 x i8> @test_mask_packus_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) {
4711; X86-LABEL: test_mask_packus_epi16_rrk_256:
4712; X86:       # %bb.0:
4713; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
4714; X86-NEXT:    vpackuswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0xd1]
4715; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4716; X86-NEXT:    retl # encoding: [0xc3]
4717;
4718; X64-LABEL: test_mask_packus_epi16_rrk_256:
4719; X64:       # %bb.0:
4720; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4721; X64-NEXT:    vpackuswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0xd1]
4722; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4723; X64-NEXT:    retq # encoding: [0xc3]
4724  %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
4725  ret <32 x i8> %res
4726}
4727
4728define <32 x i8> @test_mask_packus_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) {
4729; X86-LABEL: test_mask_packus_epi16_rrkz_256:
4730; X86:       # %bb.0:
4731; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
4732; X86-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0xc1]
4733; X86-NEXT:    retl # encoding: [0xc3]
4734;
4735; X64-LABEL: test_mask_packus_epi16_rrkz_256:
4736; X64:       # %bb.0:
4737; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4738; X64-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0xc1]
4739; X64-NEXT:    retq # encoding: [0xc3]
4740  %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
4741  ret <32 x i8> %res
4742}
4743
4744define <32 x i8> @test_mask_packus_epi16_rm_256(<16 x i16> %a, ptr %ptr_b) {
4745; X86-LABEL: test_mask_packus_epi16_rm_256:
4746; X86:       # %bb.0:
4747; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4748; X86-NEXT:    vpackuswb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0x00]
4749; X86-NEXT:    retl # encoding: [0xc3]
4750;
4751; X64-LABEL: test_mask_packus_epi16_rm_256:
4752; X64:       # %bb.0:
4753; X64-NEXT:    vpackuswb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0x07]
4754; X64-NEXT:    retq # encoding: [0xc3]
4755  %b = load <16 x i16>, ptr %ptr_b
4756  %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
4757  ret <32 x i8> %res
4758}
4759
4760define <32 x i8> @test_mask_packus_epi16_rmk_256(<16 x i16> %a, ptr %ptr_b, <32 x i8> %passThru, i32 %mask) {
4761; X86-LABEL: test_mask_packus_epi16_rmk_256:
4762; X86:       # %bb.0:
4763; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4764; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
4765; X86-NEXT:    vpackuswb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0x08]
4766; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4767; X86-NEXT:    retl # encoding: [0xc3]
4768;
4769; X64-LABEL: test_mask_packus_epi16_rmk_256:
4770; X64:       # %bb.0:
4771; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4772; X64-NEXT:    vpackuswb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0x0f]
4773; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4774; X64-NEXT:    retq # encoding: [0xc3]
4775  %b = load <16 x i16>, ptr %ptr_b
4776  %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
4777  ret <32 x i8> %res
4778}
4779
4780define <32 x i8> @test_mask_packus_epi16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i32 %mask) {
4781; X86-LABEL: test_mask_packus_epi16_rmkz_256:
4782; X86:       # %bb.0:
4783; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4784; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
4785; X86-NEXT:    vpackuswb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0x00]
4786; X86-NEXT:    retl # encoding: [0xc3]
4787;
4788; X64-LABEL: test_mask_packus_epi16_rmkz_256:
4789; X64:       # %bb.0:
4790; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4791; X64-NEXT:    vpackuswb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0x07]
4792; X64-NEXT:    retq # encoding: [0xc3]
4793  %b = load <16 x i16>, ptr %ptr_b
4794  %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
4795  ret <32 x i8> %res
4796}
4797
4798declare <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16>, <16 x i16>, <32 x i8>, i32)
4799
4800define <8 x i32> @test_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1) {
4801; X86-LABEL: test_cmp_b_256:
4802; X86:       # %bb.0:
4803; X86-NEXT:    pushl %ebx # encoding: [0x53]
4804; X86-NEXT:    .cfi_def_cfa_offset 8
4805; X86-NEXT:    pushl %edi # encoding: [0x57]
4806; X86-NEXT:    .cfi_def_cfa_offset 12
4807; X86-NEXT:    pushl %esi # encoding: [0x56]
4808; X86-NEXT:    .cfi_def_cfa_offset 16
4809; X86-NEXT:    .cfi_offset %esi, -16
4810; X86-NEXT:    .cfi_offset %edi, -12
4811; X86-NEXT:    .cfi_offset %ebx, -8
4812; X86-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
4813; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
4814; X86-NEXT:    vpcmpgtb %ymm0, %ymm1, %k0 # encoding: [0x62,0xf1,0x75,0x28,0x64,0xc0]
4815; X86-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
4816; X86-NEXT:    vpcmpleb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x02]
4817; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
4818; X86-NEXT:    vpcmpneqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04]
4819; X86-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
4820; X86-NEXT:    vpcmpnltb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x05]
4821; X86-NEXT:    kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
4822; X86-NEXT:    vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1]
4823; X86-NEXT:    kmovd %k0, %ebx # encoding: [0xc5,0xfb,0x93,0xd8]
4824; X86-NEXT:    vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
4825; X86-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01]
4826; X86-NEXT:    vpinsrd $2, %ebx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc3,0x02]
4827; X86-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
4828; X86-NEXT:    vpblendd $8, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc1,0x08]
4829; X86-NEXT:    # xmm0 = xmm0[0,1,2],xmm1[3]
4830; X86-NEXT:    vmovd %edx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xca]
4831; X86-NEXT:    vmovd %eax, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0]
4832; X86-NEXT:    vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
4833; X86-NEXT:    # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
4834; X86-NEXT:    vmovd %ecx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd1]
4835; X86-NEXT:    vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
4836; X86-NEXT:    # xmm1 = xmm1[0],xmm2[0]
4837; X86-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
4838; X86-NEXT:    popl %esi # encoding: [0x5e]
4839; X86-NEXT:    .cfi_def_cfa_offset 12
4840; X86-NEXT:    popl %edi # encoding: [0x5f]
4841; X86-NEXT:    .cfi_def_cfa_offset 8
4842; X86-NEXT:    popl %ebx # encoding: [0x5b]
4843; X86-NEXT:    .cfi_def_cfa_offset 4
4844; X86-NEXT:    retl # encoding: [0xc3]
4845;
4846; X64-LABEL: test_cmp_b_256:
4847; X64:       # %bb.0:
4848; X64-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
4849; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
4850; X64-NEXT:    vpcmpgtb %ymm0, %ymm1, %k0 # encoding: [0x62,0xf1,0x75,0x28,0x64,0xc0]
4851; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
4852; X64-NEXT:    vpcmpleb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x02]
4853; X64-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
4854; X64-NEXT:    vpcmpneqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04]
4855; X64-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
4856; X64-NEXT:    vpcmpnltb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x05]
4857; X64-NEXT:    kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
4858; X64-NEXT:    vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1]
4859; X64-NEXT:    kmovd %k0, %r8d # encoding: [0xc5,0x7b,0x93,0xc0]
4860; X64-NEXT:    vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
4861; X64-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01]
4862; X64-NEXT:    vpinsrd $2, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x22,0xc0,0x02]
4863; X64-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
4864; X64-NEXT:    vpblendd $8, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc1,0x08]
4865; X64-NEXT:    # xmm0 = xmm0[0,1,2],xmm1[3]
4866; X64-NEXT:    vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9]
4867; X64-NEXT:    vmovd %eax, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0]
4868; X64-NEXT:    vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
4869; X64-NEXT:    # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
4870; X64-NEXT:    vmovd %edx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2]
4871; X64-NEXT:    vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
4872; X64-NEXT:    # xmm1 = xmm1[0],xmm2[0]
4873; X64-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
4874; X64-NEXT:    retq # encoding: [0xc3]
4875  %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1)
4876  %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
4877  %res1 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 -1)
4878  %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
4879  %res2 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 -1)
4880  %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
4881  %res3 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 -1)
4882  %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
4883  %res4 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 -1)
4884  %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
4885  %res5 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 -1)
4886  %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
4887  %res6 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 -1)
4888  %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
4889  %res7 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 -1)
4890  %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
4891  ret <8 x i32> %vec7
4892}
4893
4894define <8 x i32> @test_mask_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) {
4895; X86-LABEL: test_mask_cmp_b_256:
4896; X86:       # %bb.0:
4897; X86-NEXT:    pushl %ebp # encoding: [0x55]
4898; X86-NEXT:    .cfi_def_cfa_offset 8
4899; X86-NEXT:    pushl %ebx # encoding: [0x53]
4900; X86-NEXT:    .cfi_def_cfa_offset 12
4901; X86-NEXT:    pushl %edi # encoding: [0x57]
4902; X86-NEXT:    .cfi_def_cfa_offset 16
4903; X86-NEXT:    pushl %esi # encoding: [0x56]
4904; X86-NEXT:    .cfi_def_cfa_offset 20
4905; X86-NEXT:    .cfi_offset %esi, -20
4906; X86-NEXT:    .cfi_offset %edi, -16
4907; X86-NEXT:    .cfi_offset %ebx, -12
4908; X86-NEXT:    .cfi_offset %ebp, -8
4909; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14]
4910; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
4911; X86-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1]
4912; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
4913; X86-NEXT:    vpcmpgtb %ymm0, %ymm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x64,0xc0]
4914; X86-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
4915; X86-NEXT:    vpcmpleb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x02]
4916; X86-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
4917; X86-NEXT:    vpcmpneqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
4918; X86-NEXT:    kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
4919; X86-NEXT:    vpcmpnltb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x05]
4920; X86-NEXT:    kmovd %k0, %ebx # encoding: [0xc5,0xfb,0x93,0xd8]
4921; X86-NEXT:    vpcmpgtb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1]
4922; X86-NEXT:    kmovd %k0, %ebp # encoding: [0xc5,0xfb,0x93,0xe8]
4923; X86-NEXT:    vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
4924; X86-NEXT:    vpinsrd $1, %ebx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc3,0x01]
4925; X86-NEXT:    vpinsrd $2, %ebp, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc5,0x02]
4926; X86-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x03]
4927; X86-NEXT:    vmovd %edx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xca]
4928; X86-NEXT:    vmovd %ecx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd1]
4929; X86-NEXT:    vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
4930; X86-NEXT:    # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
4931; X86-NEXT:    vmovd %esi, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd6]
4932; X86-NEXT:    vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
4933; X86-NEXT:    # xmm1 = xmm1[0],xmm2[0]
4934; X86-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
4935; X86-NEXT:    popl %esi # encoding: [0x5e]
4936; X86-NEXT:    .cfi_def_cfa_offset 16
4937; X86-NEXT:    popl %edi # encoding: [0x5f]
4938; X86-NEXT:    .cfi_def_cfa_offset 12
4939; X86-NEXT:    popl %ebx # encoding: [0x5b]
4940; X86-NEXT:    .cfi_def_cfa_offset 8
4941; X86-NEXT:    popl %ebp # encoding: [0x5d]
4942; X86-NEXT:    .cfi_def_cfa_offset 4
4943; X86-NEXT:    retl # encoding: [0xc3]
4944;
4945; X64-LABEL: test_mask_cmp_b_256:
4946; X64:       # %bb.0:
4947; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4948; X64-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1]
4949; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
4950; X64-NEXT:    vpcmpgtb %ymm0, %ymm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x64,0xc0]
4951; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
4952; X64-NEXT:    vpcmpleb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x02]
4953; X64-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
4954; X64-NEXT:    vpcmpneqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
4955; X64-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
4956; X64-NEXT:    vpcmpnltb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x05]
4957; X64-NEXT:    kmovd %k0, %r8d # encoding: [0xc5,0x7b,0x93,0xc0]
4958; X64-NEXT:    vpcmpgtb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1]
4959; X64-NEXT:    kmovd %k0, %r9d # encoding: [0xc5,0x7b,0x93,0xc8]
4960; X64-NEXT:    vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
4961; X64-NEXT:    vpinsrd $1, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x22,0xc0,0x01]
4962; X64-NEXT:    vpinsrd $2, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x22,0xc1,0x02]
4963; X64-NEXT:    vpinsrd $3, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
4964; X64-NEXT:    vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9]
4965; X64-NEXT:    vmovd %eax, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0]
4966; X64-NEXT:    vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
4967; X64-NEXT:    # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
4968; X64-NEXT:    vmovd %edx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2]
4969; X64-NEXT:    vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
4970; X64-NEXT:    # xmm1 = xmm1[0],xmm2[0]
4971; X64-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
4972; X64-NEXT:    retq # encoding: [0xc3]
4973  %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)
4974  %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
4975  %res1 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 %mask)
4976  %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
4977  %res2 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 %mask)
4978  %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
4979  %res3 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 %mask)
4980  %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
4981  %res4 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 %mask)
4982  %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
4983  %res5 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 %mask)
4984  %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
4985  %res6 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 %mask)
4986  %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
4987  %res7 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 %mask)
4988  %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
4989  ret <8 x i32> %vec7
4990}
4991
4992declare i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8>, <32 x i8>, i32, i32) nounwind readnone
4993
4994define <8 x i32> @test_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1) {
4995; X86-LABEL: test_ucmp_b_256:
4996; X86:       # %bb.0:
4997; X86-NEXT:    pushl %ebx # encoding: [0x53]
4998; X86-NEXT:    .cfi_def_cfa_offset 8
4999; X86-NEXT:    pushl %edi # encoding: [0x57]
5000; X86-NEXT:    .cfi_def_cfa_offset 12
5001; X86-NEXT:    pushl %esi # encoding: [0x56]
5002; X86-NEXT:    .cfi_def_cfa_offset 16
5003; X86-NEXT:    .cfi_offset %esi, -16
5004; X86-NEXT:    .cfi_offset %edi, -12
5005; X86-NEXT:    .cfi_offset %ebx, -8
5006; X86-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
5007; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
5008; X86-NEXT:    vpcmpltub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x01]
5009; X86-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
5010; X86-NEXT:    vpcmpleub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x02]
5011; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
5012; X86-NEXT:    vpcmpneqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04]
5013; X86-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
5014; X86-NEXT:    vpcmpnltub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x05]
5015; X86-NEXT:    kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
5016; X86-NEXT:    vpcmpnleub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x06]
5017; X86-NEXT:    kmovd %k0, %ebx # encoding: [0xc5,0xfb,0x93,0xd8]
5018; X86-NEXT:    vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
5019; X86-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01]
5020; X86-NEXT:    vpinsrd $2, %ebx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc3,0x02]
5021; X86-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
5022; X86-NEXT:    vpblendd $8, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc1,0x08]
5023; X86-NEXT:    # xmm0 = xmm0[0,1,2],xmm1[3]
5024; X86-NEXT:    vmovd %edx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xca]
5025; X86-NEXT:    vmovd %eax, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0]
5026; X86-NEXT:    vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
5027; X86-NEXT:    # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
5028; X86-NEXT:    vmovd %ecx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd1]
5029; X86-NEXT:    vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
5030; X86-NEXT:    # xmm1 = xmm1[0],xmm2[0]
5031; X86-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
5032; X86-NEXT:    popl %esi # encoding: [0x5e]
5033; X86-NEXT:    .cfi_def_cfa_offset 12
5034; X86-NEXT:    popl %edi # encoding: [0x5f]
5035; X86-NEXT:    .cfi_def_cfa_offset 8
5036; X86-NEXT:    popl %ebx # encoding: [0x5b]
5037; X86-NEXT:    .cfi_def_cfa_offset 4
5038; X86-NEXT:    retl # encoding: [0xc3]
5039;
5040; X64-LABEL: test_ucmp_b_256:
5041; X64:       # %bb.0:
5042; X64-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
5043; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
5044; X64-NEXT:    vpcmpltub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x01]
5045; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
5046; X64-NEXT:    vpcmpleub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x02]
5047; X64-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
5048; X64-NEXT:    vpcmpneqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04]
5049; X64-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
5050; X64-NEXT:    vpcmpnltub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x05]
5051; X64-NEXT:    kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
5052; X64-NEXT:    vpcmpnleub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x06]
5053; X64-NEXT:    kmovd %k0, %r8d # encoding: [0xc5,0x7b,0x93,0xc0]
5054; X64-NEXT:    vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
5055; X64-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01]
5056; X64-NEXT:    vpinsrd $2, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x22,0xc0,0x02]
5057; X64-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
5058; X64-NEXT:    vpblendd $8, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc1,0x08]
5059; X64-NEXT:    # xmm0 = xmm0[0,1,2],xmm1[3]
5060; X64-NEXT:    vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9]
5061; X64-NEXT:    vmovd %eax, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0]
5062; X64-NEXT:    vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
5063; X64-NEXT:    # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
5064; X64-NEXT:    vmovd %edx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2]
5065; X64-NEXT:    vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
5066; X64-NEXT:    # xmm1 = xmm1[0],xmm2[0]
5067; X64-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
5068; X64-NEXT:    retq # encoding: [0xc3]
5069  %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1)
5070  %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
5071  %res1 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 -1)
5072  %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
5073  %res2 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 -1)
5074  %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
5075  %res3 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 -1)
5076  %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
5077  %res4 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 -1)
5078  %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
5079  %res5 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 -1)
5080  %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
5081  %res6 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 -1)
5082  %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
5083  %res7 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 -1)
5084  %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
5085  ret <8 x i32> %vec7
5086}
5087
5088define <8 x i32> @test_mask_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) {
5089; X86-LABEL: test_mask_ucmp_b_256:
5090; X86:       # %bb.0:
5091; X86-NEXT:    pushl %ebp # encoding: [0x55]
5092; X86-NEXT:    .cfi_def_cfa_offset 8
5093; X86-NEXT:    pushl %ebx # encoding: [0x53]
5094; X86-NEXT:    .cfi_def_cfa_offset 12
5095; X86-NEXT:    pushl %edi # encoding: [0x57]
5096; X86-NEXT:    .cfi_def_cfa_offset 16
5097; X86-NEXT:    pushl %esi # encoding: [0x56]
5098; X86-NEXT:    .cfi_def_cfa_offset 20
5099; X86-NEXT:    .cfi_offset %esi, -20
5100; X86-NEXT:    .cfi_offset %edi, -16
5101; X86-NEXT:    .cfi_offset %ebx, -12
5102; X86-NEXT:    .cfi_offset %ebp, -8
5103; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14]
5104; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
5105; X86-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1]
5106; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
5107; X86-NEXT:    vpcmpltub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x01]
5108; X86-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
5109; X86-NEXT:    vpcmpleub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x02]
5110; X86-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
5111; X86-NEXT:    vpcmpneqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
5112; X86-NEXT:    kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
5113; X86-NEXT:    vpcmpnltub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x05]
5114; X86-NEXT:    kmovd %k0, %ebx # encoding: [0xc5,0xfb,0x93,0xd8]
5115; X86-NEXT:    vpcmpnleub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x06]
5116; X86-NEXT:    kmovd %k0, %ebp # encoding: [0xc5,0xfb,0x93,0xe8]
5117; X86-NEXT:    vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
5118; X86-NEXT:    vpinsrd $1, %ebx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc3,0x01]
5119; X86-NEXT:    vpinsrd $2, %ebp, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc5,0x02]
5120; X86-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x03]
5121; X86-NEXT:    vmovd %edx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xca]
5122; X86-NEXT:    vmovd %ecx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd1]
5123; X86-NEXT:    vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
5124; X86-NEXT:    # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
5125; X86-NEXT:    vmovd %esi, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd6]
5126; X86-NEXT:    vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
5127; X86-NEXT:    # xmm1 = xmm1[0],xmm2[0]
5128; X86-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
5129; X86-NEXT:    popl %esi # encoding: [0x5e]
5130; X86-NEXT:    .cfi_def_cfa_offset 16
5131; X86-NEXT:    popl %edi # encoding: [0x5f]
5132; X86-NEXT:    .cfi_def_cfa_offset 12
5133; X86-NEXT:    popl %ebx # encoding: [0x5b]
5134; X86-NEXT:    .cfi_def_cfa_offset 8
5135; X86-NEXT:    popl %ebp # encoding: [0x5d]
5136; X86-NEXT:    .cfi_def_cfa_offset 4
5137; X86-NEXT:    retl # encoding: [0xc3]
5138;
5139; X64-LABEL: test_mask_ucmp_b_256:
5140; X64:       # %bb.0:
5141; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
5142; X64-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1]
5143; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
5144; X64-NEXT:    vpcmpltub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x01]
5145; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
5146; X64-NEXT:    vpcmpleub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x02]
5147; X64-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
5148; X64-NEXT:    vpcmpneqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
5149; X64-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
5150; X64-NEXT:    vpcmpnltub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x05]
5151; X64-NEXT:    kmovd %k0, %r8d # encoding: [0xc5,0x7b,0x93,0xc0]
5152; X64-NEXT:    vpcmpnleub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x06]
5153; X64-NEXT:    kmovd %k0, %r9d # encoding: [0xc5,0x7b,0x93,0xc8]
5154; X64-NEXT:    vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
5155; X64-NEXT:    vpinsrd $1, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x22,0xc0,0x01]
5156; X64-NEXT:    vpinsrd $2, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x22,0xc1,0x02]
5157; X64-NEXT:    vpinsrd $3, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
5158; X64-NEXT:    vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9]
5159; X64-NEXT:    vmovd %eax, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0]
5160; X64-NEXT:    vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
5161; X64-NEXT:    # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
5162; X64-NEXT:    vmovd %edx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2]
5163; X64-NEXT:    vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
5164; X64-NEXT:    # xmm1 = xmm1[0],xmm2[0]
5165; X64-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
5166; X64-NEXT:    retq # encoding: [0xc3]
5167  %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)
5168  %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
5169  %res1 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 %mask)
5170  %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
5171  %res2 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 %mask)
5172  %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
5173  %res3 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 %mask)
5174  %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
5175  %res4 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 %mask)
5176  %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
5177  %res5 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 %mask)
5178  %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
5179  %res6 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 %mask)
5180  %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
5181  %res7 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 %mask)
5182  %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
5183  ret <8 x i32> %vec7
5184}
5185
5186declare i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8>, <32 x i8>, i32, i32) nounwind readnone
5187
5188define <8 x i16> @test_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1) {
5189; CHECK-LABEL: test_cmp_w_256:
5190; CHECK:       # %bb.0:
5191; CHECK-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1]
5192; CHECK-NEXT:    vpcmpgtw %ymm0, %ymm1, %k1 # encoding: [0x62,0xf1,0x75,0x28,0x65,0xc8]
5193; CHECK-NEXT:    vpcmplew %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xd1,0x02]
5194; CHECK-NEXT:    vpcmpneqw %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xd9,0x04]
5195; CHECK-NEXT:    vpcmpnltw %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xe1,0x05]
5196; CHECK-NEXT:    vpcmpgtw %ymm1, %ymm0, %k5 # encoding: [0x62,0xf1,0x7d,0x28,0x65,0xe9]
5197; CHECK-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5198; CHECK-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
5199; CHECK-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5200; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5201; CHECK-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5202; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5203; CHECK-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5204; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
5205; CHECK-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5206; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
5207; CHECK-NEXT:    kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
5208; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
5209; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
5210; CHECK-NEXT:    vpblendw $128, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80]
5211; CHECK-NEXT:    # xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
5212; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
5213; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5214  %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1)
5215  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
5216  %res1 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 -1)
5217  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
5218  %res2 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 -1)
5219  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
5220  %res3 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 -1)
5221  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
5222  %res4 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 -1)
5223  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
5224  %res5 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 -1)
5225  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
5226  %res6 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 -1)
5227  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
5228  %res7 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 -1)
5229  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
5230  ret <8 x i16> %vec7
5231}
5232
5233define <8 x i16> @test_mask_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask) {
5234; X86-LABEL: test_mask_cmp_w_256:
5235; X86:       # %bb.0:
5236; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5237; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
5238; X86-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1]
5239; X86-NEXT:    vpcmpgtw %ymm0, %ymm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x65,0xd0]
5240; X86-NEXT:    vpcmplew %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xd9,0x02]
5241; X86-NEXT:    vpcmpneqw %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04]
5242; X86-NEXT:    vpcmpnltw %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe9,0x05]
5243; X86-NEXT:    vpcmpgtw %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x65,0xc9]
5244; X86-NEXT:    kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
5245; X86-NEXT:    kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0]
5246; X86-NEXT:    vmovd %edx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2]
5247; X86-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
5248; X86-NEXT:    kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb]
5249; X86-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
5250; X86-NEXT:    kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc]
5251; X86-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
5252; X86-NEXT:    kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd]
5253; X86-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
5254; X86-NEXT:    kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
5255; X86-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
5256; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
5257; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
5258; X86-NEXT:    retl # encoding: [0xc3]
5259;
5260; X64-LABEL: test_mask_cmp_w_256:
5261; X64:       # %bb.0:
5262; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
5263; X64-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1]
5264; X64-NEXT:    vpcmpgtw %ymm0, %ymm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x65,0xd0]
5265; X64-NEXT:    vpcmplew %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xd9,0x02]
5266; X64-NEXT:    vpcmpneqw %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04]
5267; X64-NEXT:    vpcmpnltw %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe9,0x05]
5268; X64-NEXT:    vpcmpgtw %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x65,0xc9]
5269; X64-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5270; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
5271; X64-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5272; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5273; X64-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5274; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5275; X64-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5276; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
5277; X64-NEXT:    kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
5278; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
5279; X64-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5280; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
5281; X64-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
5282; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
5283; X64-NEXT:    retq # encoding: [0xc3]
5284  %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask)
5285  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
5286  %res1 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 %mask)
5287  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
5288  %res2 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 %mask)
5289  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
5290  %res3 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 %mask)
5291  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
5292  %res4 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 %mask)
5293  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
5294  %res5 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 %mask)
5295  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
5296  %res6 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 %mask)
5297  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
5298  %res7 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 %mask)
5299  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
5300  ret <8 x i16> %vec7
5301}
5302
5303declare i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16>, <16 x i16>, i32, i16) nounwind readnone
5304
5305define <8 x i16> @test_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1) {
5306; CHECK-LABEL: test_ucmp_w_256:
5307; CHECK:       # %bb.0:
5308; CHECK-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1]
5309; CHECK-NEXT:    vpcmpltuw %ymm1, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xc9,0x01]
5310; CHECK-NEXT:    vpcmpleuw %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xd1,0x02]
5311; CHECK-NEXT:    vpcmpneqw %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xd9,0x04]
5312; CHECK-NEXT:    vpcmpnltuw %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xe1,0x05]
5313; CHECK-NEXT:    vpcmpnleuw %ymm1, %ymm0, %k5 # encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xe9,0x06]
5314; CHECK-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5315; CHECK-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
5316; CHECK-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5317; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5318; CHECK-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5319; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5320; CHECK-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5321; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
5322; CHECK-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5323; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
5324; CHECK-NEXT:    kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
5325; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
5326; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
5327; CHECK-NEXT:    vpblendw $128, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80]
5328; CHECK-NEXT:    # xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
5329; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
5330; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5331  %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1)
5332  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
5333  %res1 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 -1)
5334  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
5335  %res2 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 -1)
5336  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
5337  %res3 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 -1)
5338  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
5339  %res4 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 -1)
5340  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
5341  %res5 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 -1)
5342  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
5343  %res6 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 -1)
5344  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
5345  %res7 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 -1)
5346  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
5347  ret <8 x i16> %vec7
5348}
5349
5350define <8 x i16> @test_mask_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask) {
5351; X86-LABEL: test_mask_ucmp_w_256:
5352; X86:       # %bb.0:
5353; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5354; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
5355; X86-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1]
5356; X86-NEXT:    vpcmpltuw %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd1,0x01]
5357; X86-NEXT:    vpcmpleuw %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd9,0x02]
5358; X86-NEXT:    vpcmpneqw %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04]
5359; X86-NEXT:    vpcmpnltuw %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xe9,0x05]
5360; X86-NEXT:    vpcmpnleuw %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xc9,0x06]
5361; X86-NEXT:    kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
5362; X86-NEXT:    kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0]
5363; X86-NEXT:    vmovd %edx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2]
5364; X86-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
5365; X86-NEXT:    kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb]
5366; X86-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
5367; X86-NEXT:    kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc]
5368; X86-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
5369; X86-NEXT:    kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd]
5370; X86-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
5371; X86-NEXT:    kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
5372; X86-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
5373; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
5374; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
5375; X86-NEXT:    retl # encoding: [0xc3]
5376;
5377; X64-LABEL: test_mask_ucmp_w_256:
5378; X64:       # %bb.0:
5379; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
5380; X64-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1]
5381; X64-NEXT:    vpcmpltuw %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd1,0x01]
5382; X64-NEXT:    vpcmpleuw %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd9,0x02]
5383; X64-NEXT:    vpcmpneqw %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04]
5384; X64-NEXT:    vpcmpnltuw %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xe9,0x05]
5385; X64-NEXT:    vpcmpnleuw %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xc9,0x06]
5386; X64-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5387; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
5388; X64-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5389; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5390; X64-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5391; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5392; X64-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5393; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
5394; X64-NEXT:    kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
5395; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
5396; X64-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5397; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
5398; X64-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
5399; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
5400; X64-NEXT:    retq # encoding: [0xc3]
5401  %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask)
5402  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
5403  %res1 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 %mask)
5404  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
5405  %res2 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 %mask)
5406  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
5407  %res3 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 %mask)
5408  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
5409  %res4 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 %mask)
5410  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
5411  %res5 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 %mask)
5412  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
5413  %res6 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 %mask)
5414  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
5415  %res7 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 %mask)
5416  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
5417  ret <8 x i16> %vec7
5418}
5419
5420declare i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16>, <16 x i16>, i32, i16) nounwind readnone
5421
5422define <8 x i16> @test_cmp_b_128(<16 x i8> %a0, <16 x i8> %a1) {
5423; CHECK-LABEL: test_cmp_b_128:
5424; CHECK:       # %bb.0:
5425; CHECK-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1]
5426; CHECK-NEXT:    vpcmpgtb %xmm0, %xmm1, %k1 # encoding: [0x62,0xf1,0x75,0x08,0x64,0xc8]
5427; CHECK-NEXT:    vpcmpleb %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xd1,0x02]
5428; CHECK-NEXT:    vpcmpneqb %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xd9,0x04]
5429; CHECK-NEXT:    vpcmpnltb %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xe1,0x05]
5430; CHECK-NEXT:    vpcmpgtb %xmm1, %xmm0, %k5 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xe9]
5431; CHECK-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5432; CHECK-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
5433; CHECK-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5434; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5435; CHECK-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5436; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5437; CHECK-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5438; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
5439; CHECK-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5440; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
5441; CHECK-NEXT:    kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
5442; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
5443; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
5444; CHECK-NEXT:    vpblendw $128, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80]
5445; CHECK-NEXT:    # xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
5446; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5447  %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1)
5448  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
5449  %res1 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 -1)
5450  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
5451  %res2 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 -1)
5452  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
5453  %res3 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 -1)
5454  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
5455  %res4 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 -1)
5456  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
5457  %res5 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 -1)
5458  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
5459  %res6 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 -1)
5460  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
5461  %res7 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 -1)
5462  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
5463  ret <8 x i16> %vec7
5464}
5465
5466define <8 x i16> @test_mask_cmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask) {
5467; X86-LABEL: test_mask_cmp_b_128:
5468; X86:       # %bb.0:
5469; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5470; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
5471; X86-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1]
5472; X86-NEXT:    vpcmpgtb %xmm0, %xmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x64,0xd0]
5473; X86-NEXT:    vpcmpleb %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xd9,0x02]
5474; X86-NEXT:    vpcmpneqb %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04]
5475; X86-NEXT:    vpcmpnltb %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe9,0x05]
5476; X86-NEXT:    vpcmpgtb %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x64,0xc9]
5477; X86-NEXT:    kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
5478; X86-NEXT:    kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0]
5479; X86-NEXT:    vmovd %edx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2]
5480; X86-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
5481; X86-NEXT:    kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb]
5482; X86-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
5483; X86-NEXT:    kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc]
5484; X86-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
5485; X86-NEXT:    kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd]
5486; X86-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
5487; X86-NEXT:    kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
5488; X86-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
5489; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
5490; X86-NEXT:    retl # encoding: [0xc3]
5491;
5492; X64-LABEL: test_mask_cmp_b_128:
5493; X64:       # %bb.0:
5494; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
5495; X64-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1]
5496; X64-NEXT:    vpcmpgtb %xmm0, %xmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x64,0xd0]
5497; X64-NEXT:    vpcmpleb %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xd9,0x02]
5498; X64-NEXT:    vpcmpneqb %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04]
5499; X64-NEXT:    vpcmpnltb %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe9,0x05]
5500; X64-NEXT:    vpcmpgtb %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x64,0xc9]
5501; X64-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5502; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
5503; X64-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5504; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5505; X64-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5506; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5507; X64-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5508; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
5509; X64-NEXT:    kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
5510; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
5511; X64-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5512; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
5513; X64-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
5514; X64-NEXT:    retq # encoding: [0xc3]
5515  %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask)
5516  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
5517  %res1 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 %mask)
5518  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
5519  %res2 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 %mask)
5520  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
5521  %res3 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 %mask)
5522  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
5523  %res4 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 %mask)
5524  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
5525  %res5 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 %mask)
5526  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
5527  %res6 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 %mask)
5528  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
5529  %res7 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 %mask)
5530  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
5531  ret <8 x i16> %vec7
5532}
5533
5534declare i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8>, <16 x i8>, i32, i16) nounwind readnone
5535
5536define <8 x i16> @test_ucmp_b_128(<16 x i8> %a0, <16 x i8> %a1) {
5537; CHECK-LABEL: test_ucmp_b_128:
5538; CHECK:       # %bb.0:
5539; CHECK-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1]
5540; CHECK-NEXT:    vpcmpltub %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xc9,0x01]
5541; CHECK-NEXT:    vpcmpleub %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xd1,0x02]
5542; CHECK-NEXT:    vpcmpneqb %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xd9,0x04]
5543; CHECK-NEXT:    vpcmpnltub %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xe1,0x05]
5544; CHECK-NEXT:    vpcmpnleub %xmm1, %xmm0, %k5 # encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xe9,0x06]
5545; CHECK-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5546; CHECK-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
5547; CHECK-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5548; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5549; CHECK-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5550; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5551; CHECK-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5552; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
5553; CHECK-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5554; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
5555; CHECK-NEXT:    kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
5556; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
5557; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
5558; CHECK-NEXT:    vpblendw $128, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80]
5559; CHECK-NEXT:    # xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
5560; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5561  %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1)
5562  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
5563  %res1 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 -1)
5564  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
5565  %res2 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 -1)
5566  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
5567  %res3 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 -1)
5568  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
5569  %res4 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 -1)
5570  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
5571  %res5 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 -1)
5572  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
5573  %res6 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 -1)
5574  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
5575  %res7 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 -1)
5576  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
5577  ret <8 x i16> %vec7
5578}
5579
5580define <8 x i16> @test_mask_ucmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask) {
5581; X86-LABEL: test_mask_ucmp_b_128:
5582; X86:       # %bb.0:
5583; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5584; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
5585; X86-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1]
5586; X86-NEXT:    vpcmpltub %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd1,0x01]
5587; X86-NEXT:    vpcmpleub %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd9,0x02]
5588; X86-NEXT:    vpcmpneqb %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04]
5589; X86-NEXT:    vpcmpnltub %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xe9,0x05]
5590; X86-NEXT:    vpcmpnleub %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xc9,0x06]
5591; X86-NEXT:    kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
5592; X86-NEXT:    kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0]
5593; X86-NEXT:    vmovd %edx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2]
5594; X86-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
5595; X86-NEXT:    kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb]
5596; X86-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
5597; X86-NEXT:    kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc]
5598; X86-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
5599; X86-NEXT:    kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd]
5600; X86-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
5601; X86-NEXT:    kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
5602; X86-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
5603; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
5604; X86-NEXT:    retl # encoding: [0xc3]
5605;
5606; X64-LABEL: test_mask_ucmp_b_128:
5607; X64:       # %bb.0:
5608; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
5609; X64-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1]
5610; X64-NEXT:    vpcmpltub %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd1,0x01]
5611; X64-NEXT:    vpcmpleub %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd9,0x02]
5612; X64-NEXT:    vpcmpneqb %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04]
5613; X64-NEXT:    vpcmpnltub %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xe9,0x05]
5614; X64-NEXT:    vpcmpnleub %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xc9,0x06]
5615; X64-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5616; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
5617; X64-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5618; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5619; X64-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5620; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5621; X64-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5622; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
5623; X64-NEXT:    kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
5624; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
5625; X64-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5626; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
5627; X64-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
5628; X64-NEXT:    retq # encoding: [0xc3]
5629  %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask)
5630  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
5631  %res1 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 %mask)
5632  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
5633  %res2 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 %mask)
5634  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
5635  %res3 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 %mask)
5636  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
5637  %res4 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 %mask)
5638  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
5639  %res5 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 %mask)
5640  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
5641  %res6 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 %mask)
5642  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
5643  %res7 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 %mask)
5644  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
5645  ret <8 x i16> %vec7
5646}
5647
5648declare i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8>, <16 x i8>, i32, i16) nounwind readnone
5649
5650define <8 x i8> @test_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1) {
5651; CHECK-LABEL: test_cmp_w_128:
5652; CHECK:       # %bb.0:
5653; CHECK-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
5654; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
5655; CHECK-NEXT:    vpcmpgtw %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x75,0x08,0x65,0xc0]
5656; CHECK-NEXT:    vpcmplew %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xc9,0x02]
5657; CHECK-NEXT:    vpcmpneqw %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd1,0x04]
5658; CHECK-NEXT:    vpcmpnltw %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd9,0x05]
5659; CHECK-NEXT:    vpcmpgtw %xmm1, %xmm0, %k4 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xe1]
5660; CHECK-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
5661; CHECK-NEXT:    movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
5662; CHECK-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
5663; CHECK-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
5664; CHECK-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5665; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
5666; CHECK-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5667; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
5668; CHECK-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5669; CHECK-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
5670; CHECK-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5671; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
5672; CHECK-NEXT:    movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00]
5673; CHECK-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
5674; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5675  %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1)
5676  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
5677  %res1 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 -1)
5678  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
5679  %res2 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 -1)
5680  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
5681  %res3 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 -1)
5682  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
5683  %res4 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 -1)
5684  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
5685  %res5 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 -1)
5686  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
5687  %res6 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 -1)
5688  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
5689  %res7 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 -1)
5690  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
5691  ret <8 x i8> %vec7
5692}
5693
5694define <8 x i8> @test_mask_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) {
5695; X86-LABEL: test_mask_cmp_w_128:
5696; X86:       # %bb.0:
5697; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5698; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
5699; X86-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1]
5700; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
5701; X86-NEXT:    vpcmpgtw %xmm0, %xmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x65,0xc0]
5702; X86-NEXT:    vpcmplew %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd1,0x02]
5703; X86-NEXT:    vpcmpneqw %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x04]
5704; X86-NEXT:    vpcmpnltw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe1,0x05]
5705; X86-NEXT:    vpcmpgtw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x65,0xc9]
5706; X86-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
5707; X86-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
5708; X86-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5709; X86-NEXT:    vpinsrb $1, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x01]
5710; X86-NEXT:    kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
5711; X86-NEXT:    vpinsrb $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x02]
5712; X86-NEXT:    kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb]
5713; X86-NEXT:    vpinsrb $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x04]
5714; X86-NEXT:    kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc]
5715; X86-NEXT:    vpinsrb $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x05]
5716; X86-NEXT:    kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
5717; X86-NEXT:    vpinsrb $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06]
5718; X86-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
5719; X86-NEXT:    retl # encoding: [0xc3]
5720;
5721; X64-LABEL: test_mask_cmp_w_128:
5722; X64:       # %bb.0:
5723; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
5724; X64-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1]
5725; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
5726; X64-NEXT:    vpcmpgtw %xmm0, %xmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x65,0xc0]
5727; X64-NEXT:    vpcmplew %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd1,0x02]
5728; X64-NEXT:    vpcmpneqw %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x04]
5729; X64-NEXT:    vpcmpnltw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe1,0x05]
5730; X64-NEXT:    vpcmpgtw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x65,0xc9]
5731; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
5732; X64-NEXT:    movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
5733; X64-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
5734; X64-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
5735; X64-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5736; X64-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
5737; X64-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5738; X64-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
5739; X64-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5740; X64-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
5741; X64-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5742; X64-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
5743; X64-NEXT:    vpinsrb $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x07]
5744; X64-NEXT:    retq # encoding: [0xc3]
5745  %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask)
5746  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
5747  %res1 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 %mask)
5748  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
5749  %res2 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 %mask)
5750  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
5751  %res3 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 %mask)
5752  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
5753  %res4 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 %mask)
5754  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
5755  %res5 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 %mask)
5756  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
5757  %res6 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 %mask)
5758  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
5759  %res7 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 %mask)
5760  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
5761  ret <8 x i8> %vec7
5762}
5763
5764declare i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone
5765
5766define <8 x i8> @test_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1) {
5767; CHECK-LABEL: test_ucmp_w_128:
5768; CHECK:       # %bb.0:
5769; CHECK-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
5770; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
5771; CHECK-NEXT:    vpcmpltuw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc1,0x01]
5772; CHECK-NEXT:    vpcmpleuw %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc9,0x02]
5773; CHECK-NEXT:    vpcmpneqw %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd1,0x04]
5774; CHECK-NEXT:    vpcmpnltuw %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xd9,0x05]
5775; CHECK-NEXT:    vpcmpnleuw %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xe1,0x06]
5776; CHECK-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
5777; CHECK-NEXT:    movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
5778; CHECK-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
5779; CHECK-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
5780; CHECK-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5781; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
5782; CHECK-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5783; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
5784; CHECK-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5785; CHECK-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
5786; CHECK-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5787; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
5788; CHECK-NEXT:    movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00]
5789; CHECK-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
5790; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5791  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1)
5792  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
5793  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 -1)
5794  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
5795  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 -1)
5796  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
5797  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 -1)
5798  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
5799  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 -1)
5800  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
5801  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 -1)
5802  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
5803  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 -1)
5804  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
5805  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 -1)
5806  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
5807  ret <8 x i8> %vec7
5808}
5809
5810define <8 x i8> @test_mask_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) {
5811; X86-LABEL: test_mask_ucmp_w_128:
5812; X86:       # %bb.0:
5813; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5814; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
5815; X86-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1]
5816; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
5817; X86-NEXT:    vpcmpltuw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc1,0x01]
5818; X86-NEXT:    vpcmpleuw %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd1,0x02]
5819; X86-NEXT:    vpcmpneqw %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x04]
5820; X86-NEXT:    vpcmpnltuw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xe1,0x05]
5821; X86-NEXT:    vpcmpnleuw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc9,0x06]
5822; X86-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
5823; X86-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
5824; X86-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5825; X86-NEXT:    vpinsrb $1, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x01]
5826; X86-NEXT:    kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
5827; X86-NEXT:    vpinsrb $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x02]
5828; X86-NEXT:    kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb]
5829; X86-NEXT:    vpinsrb $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x04]
5830; X86-NEXT:    kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc]
5831; X86-NEXT:    vpinsrb $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x05]
5832; X86-NEXT:    kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
5833; X86-NEXT:    vpinsrb $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06]
5834; X86-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
5835; X86-NEXT:    retl # encoding: [0xc3]
5836;
5837; X64-LABEL: test_mask_ucmp_w_128:
5838; X64:       # %bb.0:
5839; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
5840; X64-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1]
5841; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
5842; X64-NEXT:    vpcmpltuw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc1,0x01]
5843; X64-NEXT:    vpcmpleuw %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd1,0x02]
5844; X64-NEXT:    vpcmpneqw %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x04]
5845; X64-NEXT:    vpcmpnltuw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xe1,0x05]
5846; X64-NEXT:    vpcmpnleuw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc9,0x06]
5847; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
5848; X64-NEXT:    movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
5849; X64-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
5850; X64-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
5851; X64-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5852; X64-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
5853; X64-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5854; X64-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
5855; X64-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5856; X64-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
5857; X64-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5858; X64-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
5859; X64-NEXT:    vpinsrb $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x07]
5860; X64-NEXT:    retq # encoding: [0xc3]
5861  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask)
5862  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
5863  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 %mask)
5864  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
5865  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 %mask)
5866  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
5867  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 %mask)
5868  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
5869  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 %mask)
5870  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
5871  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 %mask)
5872  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
5873  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 %mask)
5874  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
5875  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 %mask)
5876  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
5877  ret <8 x i8> %vec7
5878}
5879
5880declare i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone
5881
5882define <16 x i8>@mm_avg_epu8(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) {
5883; CHECK-LABEL: mm_avg_epu8:
5884; CHECK:       # %bb.0:
5885; CHECK-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe0,0xc1]
5886; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5887  %res = call <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
5888  ret <16 x i8> %res
5889}
5890
5891define <16 x i8>@mm_mask_avg_epu8(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
5892; X86-LABEL: mm_mask_avg_epu8:
5893; X86:       # %bb.0:
5894; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
5895; X86-NEXT:    vpavgb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe0,0xd1]
5896; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
5897; X86-NEXT:    retl # encoding: [0xc3]
5898;
5899; X64-LABEL: mm_mask_avg_epu8:
5900; X64:       # %bb.0:
5901; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
5902; X64-NEXT:    vpavgb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe0,0xd1]
5903; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
5904; X64-NEXT:    retq # encoding: [0xc3]
5905  %res = call <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
5906  ret <16 x i8> %res
5907}
5908
5909declare <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8>, <16 x i8>, i16)
5910
5911define <16 x i8>@test_int_x86_avx512_pabs_b_128(<16 x i8> %x0, <16 x i8> %x1) {
5912; CHECK-LABEL: test_int_x86_avx512_pabs_b_128:
5913; CHECK:       # %bb.0:
5914; CHECK-NEXT:    vpabsb %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1c,0xc0]
5915; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5916  %res = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1)
5917  ret <16 x i8> %res
5918}
5919
5920define <16 x i8>@test_int_x86_avx512_mask_pabs_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) {
5921; X86-LABEL: test_int_x86_avx512_mask_pabs_b_128:
5922; X86:       # %bb.0:
5923; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
5924; X86-NEXT:    vpabsb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1c,0xc8]
5925; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
5926; X86-NEXT:    retl # encoding: [0xc3]
5927;
5928; X64-LABEL: test_int_x86_avx512_mask_pabs_b_128:
5929; X64:       # %bb.0:
5930; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
5931; X64-NEXT:    vpabsb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1c,0xc8]
5932; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
5933; X64-NEXT:    retq # encoding: [0xc3]
5934  %res = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2)
5935  ret <16 x i8> %res
5936}
5937
5938declare <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
5939
5940define <32 x i8>@mm256_avg_epu8(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
5941; CHECK-LABEL: mm256_avg_epu8:
5942; CHECK:       # %bb.0:
5943; CHECK-NEXT:    vpavgb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe0,0xc1]
5944; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5945  %res = call <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
5946  ret <32 x i8> %res
5947}
5948
5949define <32 x i8>@mm256_mask_avg_epu8(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
5950; X86-LABEL: mm256_mask_avg_epu8:
5951; X86:       # %bb.0:
5952; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
5953; X86-NEXT:    vpavgb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe0,0xd1]
5954; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
5955; X86-NEXT:    retl # encoding: [0xc3]
5956;
5957; X64-LABEL: mm256_mask_avg_epu8:
5958; X64:       # %bb.0:
5959; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
5960; X64-NEXT:    vpavgb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe0,0xd1]
5961; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
5962; X64-NEXT:    retq # encoding: [0xc3]
5963  %res = call <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
5964  ret <32 x i8> %res
5965}
5966
5967declare <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8>, <32 x i8>, i32)
5968
5969define <32 x i8>@test_int_x86_avx512_pabs_b_256(<32 x i8> %x0, <32 x i8> %x1) {
5970; CHECK-LABEL: test_int_x86_avx512_pabs_b_256:
5971; CHECK:       # %bb.0:
5972; CHECK-NEXT:    vpabsb %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1c,0xc0]
5973; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5974  %res = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 -1)
5975  ret <32 x i8> %res
5976}
5977
5978define <32 x i8>@test_int_x86_avx512_mask_pabs_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) {
5979; X86-LABEL: test_int_x86_avx512_mask_pabs_b_256:
5980; X86:       # %bb.0:
5981; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
5982; X86-NEXT:    vpabsb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1c,0xc8]
5983; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
5984; X86-NEXT:    retl # encoding: [0xc3]
5985;
5986; X64-LABEL: test_int_x86_avx512_mask_pabs_b_256:
5987; X64:       # %bb.0:
5988; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
5989; X64-NEXT:    vpabsb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1c,0xc8]
5990; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
5991; X64-NEXT:    retq # encoding: [0xc3]
5992  %res = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2)
5993  ret <32 x i8> %res
5994}
5995
5996declare <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
5997
5998define <8 x i16>@mm_avg_epu16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
5999; CHECK-LABEL: mm_avg_epu16:
6000; CHECK:       # %bb.0:
6001; CHECK-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe3,0xc1]
6002; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6003  %res = call <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
6004  ret <8 x i16> %res
6005}
6006
6007define <8 x i16>@mm_mask_avg_epu16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
6008; X86-LABEL: mm_mask_avg_epu16:
6009; X86:       # %bb.0:
6010; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6011; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6012; X86-NEXT:    vpavgw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe3,0xd1]
6013; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6014; X86-NEXT:    retl # encoding: [0xc3]
6015;
6016; X64-LABEL: mm_mask_avg_epu16:
6017; X64:       # %bb.0:
6018; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6019; X64-NEXT:    vpavgw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe3,0xd1]
6020; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6021; X64-NEXT:    retq # encoding: [0xc3]
6022  %res = call <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
6023  ret <8 x i16> %res
6024}
6025
6026declare <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16>, <8 x i16>, i8)
6027
6028define <8 x i16>@test_int_x86_avx512_pabs_w_128(<8 x i16> %x0, <8 x i16> %x1) {
6029; CHECK-LABEL: test_int_x86_avx512_pabs_w_128:
6030; CHECK:       # %bb.0:
6031; CHECK-NEXT:    vpabsw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1d,0xc0]
6032; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6033  %res = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1)
6034  ret <8 x i16> %res
6035}
6036
6037define <8 x i16>@test_int_x86_avx512_mask_pabs_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) {
6038; X86-LABEL: test_int_x86_avx512_mask_pabs_w_128:
6039; X86:       # %bb.0:
6040; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6041; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6042; X86-NEXT:    vpabsw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1d,0xc8]
6043; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
6044; X86-NEXT:    retl # encoding: [0xc3]
6045;
6046; X64-LABEL: test_int_x86_avx512_mask_pabs_w_128:
6047; X64:       # %bb.0:
6048; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6049; X64-NEXT:    vpabsw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1d,0xc8]
6050; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
6051; X64-NEXT:    retq # encoding: [0xc3]
6052  %res = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2)
6053  ret <8 x i16> %res
6054}
6055
6056declare <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
6057
6058define <16 x i16>@mm256_avg_epu16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
6059; CHECK-LABEL: mm256_avg_epu16:
6060; CHECK:       # %bb.0:
6061; CHECK-NEXT:    vpavgw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe3,0xc1]
6062; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6063  %res = call <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
6064  ret <16 x i16> %res
6065}
6066
6067define <16 x i16>@mm256_mask_avg_epu16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
6068; X86-LABEL: mm256_mask_avg_epu16:
6069; X86:       # %bb.0:
6070; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6071; X86-NEXT:    vpavgw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe3,0xd1]
6072; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6073; X86-NEXT:    retl # encoding: [0xc3]
6074;
6075; X64-LABEL: mm256_mask_avg_epu16:
6076; X64:       # %bb.0:
6077; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6078; X64-NEXT:    vpavgw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe3,0xd1]
6079; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6080; X64-NEXT:    retq # encoding: [0xc3]
6081  %res = call <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
6082  ret <16 x i16> %res
6083}
6084
6085declare <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16>, <16 x i16>, i16)
6086
6087define <16 x i16>@test_int_x86_avx512_pabs_w_256(<16 x i16> %x0, <16 x i16> %x1) {
6088; CHECK-LABEL: test_int_x86_avx512_pabs_w_256:
6089; CHECK:       # %bb.0:
6090; CHECK-NEXT:    vpabsw %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1d,0xc0]
6091; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6092  %res = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 -1)
6093  ret <16 x i16> %res
6094}
6095
6096define <16 x i16>@test_int_x86_avx512_mask_pabs_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) {
6097; X86-LABEL: test_int_x86_avx512_mask_pabs_w_256:
6098; X86:       # %bb.0:
6099; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6100; X86-NEXT:    vpabsw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1d,0xc8]
6101; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
6102; X86-NEXT:    retl # encoding: [0xc3]
6103;
6104; X64-LABEL: test_int_x86_avx512_mask_pabs_w_256:
6105; X64:       # %bb.0:
6106; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6107; X64-NEXT:    vpabsw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1d,0xc8]
6108; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
6109; X64-NEXT:    retq # encoding: [0xc3]
6110  %res = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2)
6111  ret <16 x i16> %res
6112}
6113
6114declare <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
6115
6116declare i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8>, <16 x i8>, i16)
6117
6118define i16@test_int_x86_avx512_ptestm_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) {
6119; X86-LABEL: test_int_x86_avx512_ptestm_b_128:
6120; X86:       # %bb.0:
6121; X86-NEXT:    vptestmb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x08,0x26,0xc1]
6122; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
6123; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
6124; X86-NEXT:    andw %cx, %ax # encoding: [0x66,0x21,0xc8]
6125; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
6126; X86-NEXT:    # kill: def $ax killed $ax killed $eax
6127; X86-NEXT:    retl # encoding: [0xc3]
6128;
6129; X64-LABEL: test_int_x86_avx512_ptestm_b_128:
6130; X64:       # %bb.0:
6131; X64-NEXT:    vptestmb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x08,0x26,0xc1]
6132; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
6133; X64-NEXT:    andl %eax, %edi # encoding: [0x21,0xc7]
6134; X64-NEXT:    addl %edi, %eax # encoding: [0x01,0xf8]
6135; X64-NEXT:    # kill: def $ax killed $ax killed $eax
6136; X64-NEXT:    retq # encoding: [0xc3]
6137  %res = call i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2)
6138  %res1 = call i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16-1)
6139  %res2 = add i16 %res, %res1
6140  ret i16 %res2
6141}
6142
6143declare i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8>, <32 x i8>, i32)
6144
6145define i32@test_int_x86_avx512_ptestm_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) {
6146; X86-LABEL: test_int_x86_avx512_ptestm_b_256:
6147; X86:       # %bb.0:
6148; X86-NEXT:    vptestmb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7d,0x28,0x26,0xc1]
6149; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
6150; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6151; X86-NEXT:    andl %ecx, %eax # encoding: [0x21,0xc8]
6152; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
6153; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
6154; X86-NEXT:    retl # encoding: [0xc3]
6155;
6156; X64-LABEL: test_int_x86_avx512_ptestm_b_256:
6157; X64:       # %bb.0:
6158; X64-NEXT:    vptestmb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7d,0x28,0x26,0xc1]
6159; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
6160; X64-NEXT:    andl %eax, %edi # encoding: [0x21,0xc7]
6161; X64-NEXT:    addl %edi, %eax # encoding: [0x01,0xf8]
6162; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
6163; X64-NEXT:    retq # encoding: [0xc3]
6164  %res = call i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2)
6165  %res1 = call i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32-1)
6166  %res2 = add i32 %res, %res1
6167  ret i32 %res2
6168}
6169
6170declare i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16>, <8 x i16>, i8)
6171
6172define i8@test_int_x86_avx512_ptestm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) {
6173; X86-LABEL: test_int_x86_avx512_ptestm_w_128:
6174; X86:       # %bb.0:
6175; X86-NEXT:    vptestmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x26,0xc1]
6176; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
6177; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6178; X86-NEXT:    andb %cl, %al # encoding: [0x20,0xc8]
6179; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
6180; X86-NEXT:    retl # encoding: [0xc3]
6181;
6182; X64-LABEL: test_int_x86_avx512_ptestm_w_128:
6183; X64:       # %bb.0:
6184; X64-NEXT:    vptestmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x26,0xc1]
6185; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
6186; X64-NEXT:    andb %al, %dil # encoding: [0x40,0x20,0xc7]
6187; X64-NEXT:    addb %dil, %al # encoding: [0x40,0x00,0xf8]
6188; X64-NEXT:    # kill: def $al killed $al killed $eax
6189; X64-NEXT:    retq # encoding: [0xc3]
6190  %res = call i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2)
6191  %res1 = call i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8-1)
6192  %res2 = add i8 %res, %res1
6193  ret i8 %res2
6194}
6195
6196declare i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16>, <16 x i16>, i16)
6197
6198define i16@test_int_x86_avx512_ptestm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) {
6199; X86-LABEL: test_int_x86_avx512_ptestm_w_256:
6200; X86:       # %bb.0:
6201; X86-NEXT:    vptestmw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x26,0xc1]
6202; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
6203; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
6204; X86-NEXT:    andw %cx, %ax # encoding: [0x66,0x21,0xc8]
6205; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
6206; X86-NEXT:    # kill: def $ax killed $ax killed $eax
6207; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
6208; X86-NEXT:    retl # encoding: [0xc3]
6209;
6210; X64-LABEL: test_int_x86_avx512_ptestm_w_256:
6211; X64:       # %bb.0:
6212; X64-NEXT:    vptestmw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x26,0xc1]
6213; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
6214; X64-NEXT:    andl %eax, %edi # encoding: [0x21,0xc7]
6215; X64-NEXT:    addl %edi, %eax # encoding: [0x01,0xf8]
6216; X64-NEXT:    # kill: def $ax killed $ax killed $eax
6217; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
6218; X64-NEXT:    retq # encoding: [0xc3]
6219  %res = call i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2)
6220  %res1 = call i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16-1)
6221  %res2 = add i16 %res, %res1
6222  ret i16 %res2
6223}
6224
6225declare i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8>, <16 x i8>, i16)
6226
6227define i16@test_int_x86_avx512_ptestnm_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) {
6228; X86-LABEL: test_int_x86_avx512_ptestnm_b_128:
6229; X86:       # %bb.0:
6230; X86-NEXT:    vptestnmb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x26,0xc1]
6231; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
6232; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
6233; X86-NEXT:    andw %cx, %ax # encoding: [0x66,0x21,0xc8]
6234; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
6235; X86-NEXT:    # kill: def $ax killed $ax killed $eax
6236; X86-NEXT:    retl # encoding: [0xc3]
6237;
6238; X64-LABEL: test_int_x86_avx512_ptestnm_b_128:
6239; X64:       # %bb.0:
6240; X64-NEXT:    vptestnmb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x26,0xc1]
6241; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
6242; X64-NEXT:    andl %eax, %edi # encoding: [0x21,0xc7]
6243; X64-NEXT:    addl %edi, %eax # encoding: [0x01,0xf8]
6244; X64-NEXT:    # kill: def $ax killed $ax killed $eax
6245; X64-NEXT:    retq # encoding: [0xc3]
6246  %res = call i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2)
6247  %res1 = call i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16-1)
6248  %res2 = add i16 %res, %res1
6249  ret i16 %res2
6250}
6251
6252declare i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8>, <32 x i8>, i32)
6253
6254define i32@test_int_x86_avx512_ptestnm_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) {
6255; X86-LABEL: test_int_x86_avx512_ptestnm_b_256:
6256; X86:       # %bb.0:
6257; X86-NEXT:    vptestnmb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x26,0xc1]
6258; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
6259; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6260; X86-NEXT:    andl %ecx, %eax # encoding: [0x21,0xc8]
6261; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
6262; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
6263; X86-NEXT:    retl # encoding: [0xc3]
6264;
6265; X64-LABEL: test_int_x86_avx512_ptestnm_b_256:
6266; X64:       # %bb.0:
6267; X64-NEXT:    vptestnmb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x26,0xc1]
6268; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
6269; X64-NEXT:    andl %eax, %edi # encoding: [0x21,0xc7]
6270; X64-NEXT:    addl %edi, %eax # encoding: [0x01,0xf8]
6271; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
6272; X64-NEXT:    retq # encoding: [0xc3]
6273  %res = call i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2)
6274  %res1 = call i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32-1)
6275  %res2 = add i32 %res, %res1
6276  ret i32 %res2
6277}
6278
6279declare i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16>, <8 x i16>, i8 %x2)
6280
6281define i8@test_int_x86_avx512_ptestnm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) {
6282; X86-LABEL: test_int_x86_avx512_ptestnm_w_128:
6283; X86:       # %bb.0:
6284; X86-NEXT:    vptestnmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x26,0xc1]
6285; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
6286; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6287; X86-NEXT:    andb %cl, %al # encoding: [0x20,0xc8]
6288; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
6289; X86-NEXT:    retl # encoding: [0xc3]
6290;
6291; X64-LABEL: test_int_x86_avx512_ptestnm_w_128:
6292; X64:       # %bb.0:
6293; X64-NEXT:    vptestnmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x26,0xc1]
6294; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
6295; X64-NEXT:    andb %al, %dil # encoding: [0x40,0x20,0xc7]
6296; X64-NEXT:    addb %dil, %al # encoding: [0x40,0x00,0xf8]
6297; X64-NEXT:    # kill: def $al killed $al killed $eax
6298; X64-NEXT:    retq # encoding: [0xc3]
6299  %res = call i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2)
6300  %res1 = call i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8-1)
6301  %res2 = add i8 %res, %res1
6302  ret i8 %res2
6303}
6304
6305declare i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16>, <16 x i16>, i16 %x2)
6306
6307define i16@test_int_x86_avx512_ptestnm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) {
6308; X86-LABEL: test_int_x86_avx512_ptestnm_w_256:
6309; X86:       # %bb.0:
6310; X86-NEXT:    vptestnmw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x26,0xc1]
6311; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
6312; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
6313; X86-NEXT:    andw %cx, %ax # encoding: [0x66,0x21,0xc8]
6314; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
6315; X86-NEXT:    # kill: def $ax killed $ax killed $eax
6316; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
6317; X86-NEXT:    retl # encoding: [0xc3]
6318;
6319; X64-LABEL: test_int_x86_avx512_ptestnm_w_256:
6320; X64:       # %bb.0:
6321; X64-NEXT:    vptestnmw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x26,0xc1]
6322; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
6323; X64-NEXT:    andl %eax, %edi # encoding: [0x21,0xc7]
6324; X64-NEXT:    addl %edi, %eax # encoding: [0x01,0xf8]
6325; X64-NEXT:    # kill: def $ax killed $ax killed $eax
6326; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
6327; X64-NEXT:    retq # encoding: [0xc3]
6328  %res = call i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2)
6329  %res1 = call i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16-1)
6330  %res2 = add i16 %res, %res1
6331  ret i16 %res2
6332}
6333
6334declare i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8>)
6335
6336define i16@test_int_x86_avx512_cvtb2mask_128(<16 x i8> %x0) {
6337; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_128:
6338; CHECK:       # %bb.0:
6339; CHECK-NEXT:    vpmovmskb %xmm0, %eax # encoding: [0xc5,0xf9,0xd7,0xc0]
6340; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
6341; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6342    %res = call i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8> %x0)
6343    ret i16 %res
6344}
6345
6346declare i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8>)
6347
6348define i32@test_int_x86_avx512_cvtb2mask_256(<32 x i8> %x0) {
6349; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_256:
6350; CHECK:       # %bb.0:
6351; CHECK-NEXT:    vpmovmskb %ymm0, %eax # encoding: [0xc5,0xfd,0xd7,0xc0]
6352; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
6353; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6354    %res = call i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8> %x0)
6355    ret i32 %res
6356}
6357
6358declare i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16>)
6359
6360define i8@test_int_x86_avx512_cvtw2mask_128(<8 x i16> %x0) {
6361; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_128:
6362; CHECK:       # %bb.0:
6363; CHECK-NEXT:    vpmovw2m %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x29,0xc0]
6364; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
6365; CHECK-NEXT:    # kill: def $al killed $al killed $eax
6366; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6367    %res = call i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16> %x0)
6368    ret i8 %res
6369}
6370
6371declare i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16>)
6372
6373define i16@test_int_x86_avx512_cvtw2mask_256(<16 x i16> %x0) {
6374; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_256:
6375; CHECK:       # %bb.0:
6376; CHECK-NEXT:    vpmovw2m %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x29,0xc0]
6377; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
6378; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
6379; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
6380; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6381    %res = call i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16> %x0)
6382    ret i16 %res
6383}
6384
6385declare <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
6386
6387define <8 x i16>@test_int_x86_avx512_pmulhu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
6388; CHECK-LABEL: test_int_x86_avx512_pmulhu_w_128:
6389; CHECK:       # %bb.0:
6390; CHECK-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe4,0xc1]
6391; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6392  %res = call <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
6393  ret <8 x i16> %res
6394}
6395
6396define <8 x i16>@test_int_x86_avx512_mask_pmulhu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
6397; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_128:
6398; X86:       # %bb.0:
6399; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6400; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6401; X86-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe4,0xd1]
6402; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6403; X86-NEXT:    retl # encoding: [0xc3]
6404;
6405; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_128:
6406; X64:       # %bb.0:
6407; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6408; X64-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe4,0xd1]
6409; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6410; X64-NEXT:    retq # encoding: [0xc3]
6411  %res = call <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
6412  ret <8 x i16> %res
6413}
6414
6415declare <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
6416
6417define <16 x i16>@test_int_x86_avx512_pmulhu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
6418; CHECK-LABEL: test_int_x86_avx512_pmulhu_w_256:
6419; CHECK:       # %bb.0:
6420; CHECK-NEXT:    vpmulhuw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe4,0xc1]
6421; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6422  %res = call <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
6423  ret <16 x i16> %res
6424}
6425
6426define <16 x i16>@test_int_x86_avx512_mask_pmulhu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
6427; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_256:
6428; X86:       # %bb.0:
6429; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6430; X86-NEXT:    vpmulhuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe4,0xd1]
6431; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6432; X86-NEXT:    retl # encoding: [0xc3]
6433;
6434; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_256:
6435; X64:       # %bb.0:
6436; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6437; X64-NEXT:    vpmulhuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe4,0xd1]
6438; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6439; X64-NEXT:    retq # encoding: [0xc3]
6440  %res = call <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
6441  ret <16 x i16> %res
6442}
6443
6444declare <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
6445
6446define <8 x i16>@test_int_x86_avx512_pmulh_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
6447; CHECK-LABEL: test_int_x86_avx512_pmulh_w_128:
6448; CHECK:       # %bb.0:
6449; CHECK-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe5,0xc1]
6450; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6451  %res = call <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
6452  ret <8 x i16> %res
6453}
6454
6455define <8 x i16>@test_int_x86_avx512_mask_pmulh_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
6456; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_128:
6457; X86:       # %bb.0:
6458; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6459; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6460; X86-NEXT:    vpmulhw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe5,0xd1]
6461; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6462; X86-NEXT:    retl # encoding: [0xc3]
6463;
6464; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_128:
6465; X64:       # %bb.0:
6466; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6467; X64-NEXT:    vpmulhw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe5,0xd1]
6468; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6469; X64-NEXT:    retq # encoding: [0xc3]
6470  %res = call <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
6471  ret <8 x i16> %res
6472}
6473
6474declare <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
6475
6476define <16 x i16>@test_int_x86_avx512_pmulh_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
6477; CHECK-LABEL: test_int_x86_avx512_pmulh_w_256:
6478; CHECK:       # %bb.0:
6479; CHECK-NEXT:    vpmulhw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe5,0xc1]
6480; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6481  %res = call <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
6482  ret <16 x i16> %res
6483}
6484
6485define <16 x i16>@test_int_x86_avx512_mask_pmulh_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
6486; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_256:
6487; X86:       # %bb.0:
6488; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6489; X86-NEXT:    vpmulhw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe5,0xd1]
6490; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6491; X86-NEXT:    retl # encoding: [0xc3]
6492;
6493; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_256:
6494; X64:       # %bb.0:
6495; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6496; X64-NEXT:    vpmulhw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe5,0xd1]
6497; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6498; X64-NEXT:    retq # encoding: [0xc3]
6499  %res = call <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
6500  ret <16 x i16> %res
6501}
6502
6503declare <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
6504
6505define <8 x i16>@test_int_x86_avx512_pmulhr_sw_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
6506; CHECK-LABEL: test_int_x86_avx512_pmulhr_sw_128:
6507; CHECK:       # %bb.0:
6508; CHECK-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0b,0xc1]
6509; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6510  %res = call <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
6511  ret <8 x i16> %res
6512}
6513
6514define <8 x i16>@test_int_x86_avx512_mask_pmulhr_sw_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
6515; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_128:
6516; X86:       # %bb.0:
6517; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6518; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6519; X86-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0b,0xd1]
6520; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6521; X86-NEXT:    retl # encoding: [0xc3]
6522;
6523; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_128:
6524; X64:       # %bb.0:
6525; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6526; X64-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0b,0xd1]
6527; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6528; X64-NEXT:    retq # encoding: [0xc3]
6529  %res = call <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
6530  ret <8 x i16> %res
6531}
6532
6533declare <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
6534
6535define <16 x i16>@test_int_x86_avx512_pmulhr_sw_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
6536; CHECK-LABEL: test_int_x86_avx512_pmulhr_sw_256:
6537; CHECK:       # %bb.0:
6538; CHECK-NEXT:    vpmulhrsw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0b,0xc1]
6539; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6540  %res = call <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
6541  ret <16 x i16> %res
6542}
6543
6544define <16 x i16>@test_int_x86_avx512_mask_pmulhr_sw_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
6545; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_256:
6546; X86:       # %bb.0:
6547; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6548; X86-NEXT:    vpmulhrsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0b,0xd1]
6549; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6550; X86-NEXT:    retl # encoding: [0xc3]
6551;
6552; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_256:
6553; X64:       # %bb.0:
6554; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6555; X64-NEXT:    vpmulhrsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0b,0xd1]
6556; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6557; X64-NEXT:    retq # encoding: [0xc3]
6558  %res = call <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
6559  ret <16 x i16> %res
6560}
6561
6562declare <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8>, <16 x i8>, <8 x i16>, i8)
6563
6564define <8 x i16>@test_int_x86_avx512_ask_pmaddubs_w_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2) {
6565; CHECK-LABEL: test_int_x86_avx512_ask_pmaddubs_w_128:
6566; CHECK:       # %bb.0:
6567; CHECK-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x04,0xc1]
6568; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6569  %res = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 -1)
6570  ret <8 x i16> %res
6571}
6572
6573define <8 x i16>@test_int_x86_avx512_mask_pmaddubs_w_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3) {
6574; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128:
6575; X86:       # %bb.0:
6576; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6577; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6578; X86-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x04,0xd1]
6579; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6580; X86-NEXT:    retl # encoding: [0xc3]
6581;
6582; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128:
6583; X64:       # %bb.0:
6584; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6585; X64-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x04,0xd1]
6586; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6587; X64-NEXT:    retq # encoding: [0xc3]
6588  %res = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3)
6589  ret <8 x i16> %res
6590}
6591
6592declare <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8>, <32 x i8>, <16 x i16>, i16)
6593
6594define <16 x i16>@test_int_x86_avx512_pmaddubs_w_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2) {
6595; CHECK-LABEL: test_int_x86_avx512_pmaddubs_w_256:
6596; CHECK:       # %bb.0:
6597; CHECK-NEXT:    vpmaddubsw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x04,0xc1]
6598; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6599  %res = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 -1)
6600  ret <16 x i16> %res
6601}
6602
6603define <16 x i16>@test_int_x86_avx512_mask_pmaddubs_w_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3) {
6604; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256:
6605; X86:       # %bb.0:
6606; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6607; X86-NEXT:    vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x04,0xd1]
6608; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6609; X86-NEXT:    retl # encoding: [0xc3]
6610;
6611; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256:
6612; X64:       # %bb.0:
6613; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6614; X64-NEXT:    vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x04,0xd1]
6615; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6616; X64-NEXT:    retq # encoding: [0xc3]
6617  %res = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3)
6618  ret <16 x i16> %res
6619}
6620
6621declare <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16>, <8 x i16>, <4 x i32>, i8)
6622
6623define <4 x i32>@test_int_x86_avx512_pmaddw_d_128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2) {
6624; CHECK-LABEL: test_int_x86_avx512_pmaddw_d_128:
6625; CHECK:       # %bb.0:
6626; CHECK-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf5,0xc1]
6627; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6628  %res = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 -1)
6629  ret <4 x i32> %res
6630}
6631
6632define <4 x i32>@test_int_x86_avx512_mask_pmaddw_d_128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) {
6633; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_128:
6634; X86:       # %bb.0:
6635; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6636; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6637; X86-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf5,0xd1]
6638; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6639; X86-NEXT:    retl # encoding: [0xc3]
6640;
6641; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_128:
6642; X64:       # %bb.0:
6643; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6644; X64-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf5,0xd1]
6645; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6646; X64-NEXT:    retq # encoding: [0xc3]
6647  %res = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3)
6648  ret <4 x i32> %res
6649}
6650
6651declare <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16>, <16 x i16>, <8 x i32>, i8)
6652
6653define <8 x i32>@test_int_x86_avx512_pmaddw_d_256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2) {
6654; CHECK-LABEL: test_int_x86_avx512_pmaddw_d_256:
6655; CHECK:       # %bb.0:
6656; CHECK-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf5,0xc1]
6657; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6658  %res = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 -1)
6659  ret <8 x i32> %res
6660}
6661
6662define <8 x i32>@test_int_x86_avx512_mask_pmaddw_d_256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3) {
6663; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_256:
6664; X86:       # %bb.0:
6665; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6666; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6667; X86-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf5,0xd1]
6668; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6669; X86-NEXT:    retl # encoding: [0xc3]
6670;
6671; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_256:
6672; X64:       # %bb.0:
6673; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6674; X64-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf5,0xd1]
6675; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6676; X64-NEXT:    retq # encoding: [0xc3]
6677  %res = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3)
6678  ret <8 x i32> %res
6679}
6680
6681declare <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
6682
6683define <8 x i16>@test_int_x86_avx512_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
6684; CHECK-LABEL: test_int_x86_avx512_permvar_hi_128:
6685; CHECK:       # %bb.0:
6686; CHECK-NEXT:    vpermw %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x8d,0xc0]
6687; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6688  %res = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
6689  ret <8 x i16> %res
6690}
6691
6692define <8 x i16>@test_int_x86_avx512_mask_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
6693; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_128:
6694; X86:       # %bb.0:
6695; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6696; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6697; X86-NEXT:    vpermw %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0]
6698; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6699; X86-NEXT:    retl # encoding: [0xc3]
6700;
6701; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_128:
6702; X64:       # %bb.0:
6703; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6704; X64-NEXT:    vpermw %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0]
6705; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6706; X64-NEXT:    retq # encoding: [0xc3]
6707  %res = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
6708  ret <8 x i16> %res
6709}
6710
6711define <8 x i16>@test_int_x86_avx512_maskz_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) {
6712; X86-LABEL: test_int_x86_avx512_maskz_permvar_hi_128:
6713; X86:       # %bb.0:
6714; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6715; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6716; X86-NEXT:    vpermw %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xc0]
6717; X86-NEXT:    retl # encoding: [0xc3]
6718;
6719; X64-LABEL: test_int_x86_avx512_maskz_permvar_hi_128:
6720; X64:       # %bb.0:
6721; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6722; X64-NEXT:    vpermw %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xc0]
6723; X64-NEXT:    retq # encoding: [0xc3]
6724  %res = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
6725  ret <8 x i16> %res
6726}
6727
6728declare <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
6729
6730define <16 x i16>@test_int_x86_avx512_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
6731; CHECK-LABEL: test_int_x86_avx512_permvar_hi_256:
6732; CHECK:       # %bb.0:
6733; CHECK-NEXT:    vpermw %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x8d,0xc0]
6734; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6735  %res = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
6736  ret <16 x i16> %res
6737}
6738
6739define <16 x i16>@test_int_x86_avx512_mask_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
6740; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_256:
6741; X86:       # %bb.0:
6742; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6743; X86-NEXT:    vpermw %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0]
6744; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6745; X86-NEXT:    retl # encoding: [0xc3]
6746;
6747; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_256:
6748; X64:       # %bb.0:
6749; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6750; X64-NEXT:    vpermw %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0]
6751; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6752; X64-NEXT:    retq # encoding: [0xc3]
6753  %res = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
6754  ret <16 x i16> %res
6755}
6756
6757define <16 x i16>@test_int_x86_avx512_maskz_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) {
6758; X86-LABEL: test_int_x86_avx512_maskz_permvar_hi_256:
6759; X86:       # %bb.0:
6760; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6761; X86-NEXT:    vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xc0]
6762; X86-NEXT:    retl # encoding: [0xc3]
6763;
6764; X64-LABEL: test_int_x86_avx512_maskz_permvar_hi_256:
6765; X64:       # %bb.0:
6766; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6767; X64-NEXT:    vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xc0]
6768; X64-NEXT:    retq # encoding: [0xc3]
6769  %res = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
6770  ret <16 x i16> %res
6771}
6772
6773declare <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
6774
6775define <8 x i16>@test_int_x86_avx512_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
6776; CHECK-LABEL: test_int_x86_avx512_vpermt2var_hi_128:
6777; CHECK:       # %bb.0:
6778; CHECK-NEXT:    vpermi2w %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x75,0xc2]
6779; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6780  %res = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
6781  ret <8 x i16> %res
6782}
6783
6784define <8 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
6785; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128:
6786; X86:       # %bb.0:
6787; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6788; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6789; X86-NEXT:    vpermt2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xca]
6790; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
6791; X86-NEXT:    retl # encoding: [0xc3]
6792;
6793; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128:
6794; X64:       # %bb.0:
6795; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6796; X64-NEXT:    vpermt2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xca]
6797; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
6798; X64-NEXT:    retq # encoding: [0xc3]
6799  %res = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
6800  ret <8 x i16> %res
6801}
6802
6803declare <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
6804
6805define <8 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
6806; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_128:
6807; X86:       # %bb.0:
6808; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6809; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6810; X86-NEXT:    vpermi2w %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x75,0xc2]
6811; X86-NEXT:    retl # encoding: [0xc3]
6812;
6813; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_128:
6814; X64:       # %bb.0:
6815; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6816; X64-NEXT:    vpermi2w %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x75,0xc2]
6817; X64-NEXT:    retq # encoding: [0xc3]
6818  %res = call <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
6819  ret <8 x i16> %res
6820}
6821
6822declare <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
6823
6824define <16 x i16>@test_int_x86_avx512_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
6825; CHECK-LABEL: test_int_x86_avx512_vpermt2var_hi_256:
6826; CHECK:       # %bb.0:
6827; CHECK-NEXT:    vpermi2w %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x75,0xc2]
6828; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6829  %res = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
6830  ret <16 x i16> %res
6831}
6832
6833define <16 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
6834; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_256:
6835; X86:       # %bb.0:
6836; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6837; X86-NEXT:    vpermt2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xca]
6838; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
6839; X86-NEXT:    retl # encoding: [0xc3]
6840;
6841; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_256:
6842; X64:       # %bb.0:
6843; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6844; X64-NEXT:    vpermt2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xca]
6845; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
6846; X64-NEXT:    retq # encoding: [0xc3]
6847  %res = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
6848  ret <16 x i16> %res
6849}
6850
6851declare <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
6852
6853define <16 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
6854; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_256:
6855; X86:       # %bb.0:
6856; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6857; X86-NEXT:    vpermi2w %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x75,0xc2]
6858; X86-NEXT:    retl # encoding: [0xc3]
6859;
6860; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_256:
6861; X64:       # %bb.0:
6862; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6863; X64-NEXT:    vpermi2w %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x75,0xc2]
6864; X64-NEXT:    retq # encoding: [0xc3]
6865  %res = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
6866  ret <16 x i16> %res
6867}
6868
6869declare <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
6870
6871define <8 x i16>@test_int_x86_avx512_vpermi2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
6872; CHECK-LABEL: test_int_x86_avx512_vpermi2var_hi_128:
6873; CHECK:       # %bb.0:
6874; CHECK-NEXT:    vpermt2w %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x7d,0xc2]
6875; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6876  %res = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
6877  ret <8 x i16> %res
6878}
6879
6880define <8 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
6881; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128:
6882; X86:       # %bb.0:
6883; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6884; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6885; X86-NEXT:    vpermi2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x75,0xca]
6886; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
6887; X86-NEXT:    retl # encoding: [0xc3]
6888;
6889; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128:
6890; X64:       # %bb.0:
6891; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6892; X64-NEXT:    vpermi2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x75,0xca]
6893; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
6894; X64-NEXT:    retq # encoding: [0xc3]
6895  %res = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
6896  ret <8 x i16> %res
6897}
6898
6899declare <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
6900
6901define <16 x i16>@test_int_x86_avx512_vpermi2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
6902; CHECK-LABEL: test_int_x86_avx512_vpermi2var_hi_256:
6903; CHECK:       # %bb.0:
6904; CHECK-NEXT:    vpermt2w %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x7d,0xc2]
6905; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6906  %res = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
6907  ret <16 x i16> %res
6908}
6909
6910define <16 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
6911; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256:
6912; X86:       # %bb.0:
6913; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6914; X86-NEXT:    vpermi2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x75,0xca]
6915; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
6916; X86-NEXT:    retl # encoding: [0xc3]
6917;
6918; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256:
6919; X64:       # %bb.0:
6920; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6921; X64-NEXT:    vpermi2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x75,0xca]
6922; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
6923; X64-NEXT:    retq # encoding: [0xc3]
6924  %res = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
6925  ret <16 x i16> %res
6926}
6927
6928declare <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8>, <16 x i8>, i32, <8 x i16>, i8)
6929
6930define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_dbpsadbw_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x3, i8 %x4) {
6931; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_128:
6932; X86:       # %bb.0:
6933; X86-NEXT:    vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
6934; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6935; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6936; X86-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xe1,0x02]
6937; X86-NEXT:    vdbpsadbw $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x03]
6938; X86-NEXT:    vdbpsadbw $4, %xmm1, %xmm0, %xmm2 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xd1,0x04]
6939; X86-NEXT:    vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
6940; X86-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
6941; X86-NEXT:    retl # encoding: [0xc3]
6942;
6943; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_128:
6944; X64:       # %bb.0:
6945; X64-NEXT:    vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
6946; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6947; X64-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xe1,0x02]
6948; X64-NEXT:    vdbpsadbw $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x03]
6949; X64-NEXT:    vdbpsadbw $4, %xmm1, %xmm0, %xmm2 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xd1,0x04]
6950; X64-NEXT:    vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
6951; X64-NEXT:    vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
6952; X64-NEXT:    retq # encoding: [0xc3]
6953  %res0 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> %x3, i8 %x4)
6954  %res1 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 3, <8 x i16> zeroinitializer, i8 %x4)
6955  %res2 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 4, <8 x i16> %x3, i8 -1)
6956  %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0
6957  %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> }  %res3, <8 x i16> %res1, 1
6958  %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> }  %res4, <8 x i16> %res2, 2
6959  ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5
6960}
6961
6962declare <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8>, <32 x i8>, i32, <16 x i16>, i16)
6963
6964define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x3, i16 %x4) {
6965; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_256:
6966; X86:       # %bb.0:
6967; X86-NEXT:    vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2]
6968; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6969; X86-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xe1,0x02]
6970; X86-NEXT:    vdbpsadbw $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x03]
6971; X86-NEXT:    vdbpsadbw $4, %ymm1, %ymm0, %ymm2 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xd1,0x04]
6972; X86-NEXT:    vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4]
6973; X86-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
6974; X86-NEXT:    retl # encoding: [0xc3]
6975;
6976; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_256:
6977; X64:       # %bb.0:
6978; X64-NEXT:    vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2]
6979; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6980; X64-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xe1,0x02]
6981; X64-NEXT:    vdbpsadbw $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x03]
6982; X64-NEXT:    vdbpsadbw $4, %ymm1, %ymm0, %ymm2 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xd1,0x04]
6983; X64-NEXT:    vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4]
6984; X64-NEXT:    vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
6985; X64-NEXT:    retq # encoding: [0xc3]
6986  %res = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> %x3, i16 %x4)
6987  %res1 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 3, <16 x i16> zeroinitializer, i16 %x4)
6988  %res2 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 4, <16 x i16> %x3, i16 -1)
6989  %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res, 0
6990  %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> }  %res3, <16 x i16> %res1, 1
6991  %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> }  %res4, <16 x i16> %res2, 2
6992  ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5
6993}
6994
6995define <8 x i16> @test_mask_adds_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) {
6996; CHECK-LABEL: test_mask_adds_epu16_rr_128:
6997; CHECK:       # %bb.0:
6998; CHECK-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1]
6999; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7000  %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
7001  ret <8 x i16> %res
7002}
7003
7004define <8 x i16> @test_mask_adds_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
7005; X86-LABEL: test_mask_adds_epu16_rrk_128:
7006; X86:       # %bb.0:
7007; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7008; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
7009; X86-NEXT:    vpaddusw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdd,0xd1]
7010; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7011; X86-NEXT:    retl # encoding: [0xc3]
7012;
7013; X64-LABEL: test_mask_adds_epu16_rrk_128:
7014; X64:       # %bb.0:
7015; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7016; X64-NEXT:    vpaddusw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdd,0xd1]
7017; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7018; X64-NEXT:    retq # encoding: [0xc3]
7019  %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
7020  ret <8 x i16> %res
7021}
7022
7023define <8 x i16> @test_mask_adds_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
7024; X86-LABEL: test_mask_adds_epu16_rrkz_128:
7025; X86:       # %bb.0:
7026; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7027; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
7028; X86-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdd,0xc1]
7029; X86-NEXT:    retl # encoding: [0xc3]
7030;
7031; X64-LABEL: test_mask_adds_epu16_rrkz_128:
7032; X64:       # %bb.0:
7033; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7034; X64-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdd,0xc1]
7035; X64-NEXT:    retq # encoding: [0xc3]
7036  %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
7037  ret <8 x i16> %res
7038}
7039
7040define <8 x i16> @test_mask_adds_epu16_rm_128(<8 x i16> %a, ptr %ptr_b) {
7041; X86-LABEL: test_mask_adds_epu16_rm_128:
7042; X86:       # %bb.0:
7043; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7044; X86-NEXT:    vpaddusw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0x00]
7045; X86-NEXT:    retl # encoding: [0xc3]
7046;
7047; X64-LABEL: test_mask_adds_epu16_rm_128:
7048; X64:       # %bb.0:
7049; X64-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0x07]
7050; X64-NEXT:    retq # encoding: [0xc3]
7051  %b = load <8 x i16>, ptr %ptr_b
7052  %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
7053  ret <8 x i16> %res
7054}
7055
7056define <8 x i16> @test_mask_adds_epu16_rmk_128(<8 x i16> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) {
7057; X86-LABEL: test_mask_adds_epu16_rmk_128:
7058; X86:       # %bb.0:
7059; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7060; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7061; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
7062; X86-NEXT:    vpaddusw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdd,0x08]
7063; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7064; X86-NEXT:    retl # encoding: [0xc3]
7065;
7066; X64-LABEL: test_mask_adds_epu16_rmk_128:
7067; X64:       # %bb.0:
7068; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7069; X64-NEXT:    vpaddusw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdd,0x0f]
7070; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7071; X64-NEXT:    retq # encoding: [0xc3]
7072  %b = load <8 x i16>, ptr %ptr_b
7073  %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
7074  ret <8 x i16> %res
7075}
7076
7077define <8 x i16> @test_mask_adds_epu16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i8 %mask) {
7078; X86-LABEL: test_mask_adds_epu16_rmkz_128:
7079; X86:       # %bb.0:
7080; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7081; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7082; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
7083; X86-NEXT:    vpaddusw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdd,0x00]
7084; X86-NEXT:    retl # encoding: [0xc3]
7085;
7086; X64-LABEL: test_mask_adds_epu16_rmkz_128:
7087; X64:       # %bb.0:
7088; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7089; X64-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdd,0x07]
7090; X64-NEXT:    retq # encoding: [0xc3]
7091  %b = load <8 x i16>, ptr %ptr_b
7092  %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
7093  ret <8 x i16> %res
7094}
7095
7096declare <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
7097
7098define <16 x i16> @test_mask_adds_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) {
7099; CHECK-LABEL: test_mask_adds_epu16_rr_256:
7100; CHECK:       # %bb.0:
7101; CHECK-NEXT:    vpaddusw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0xc1]
7102; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7103  %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
7104  ret <16 x i16> %res
7105}
7106
7107define <16 x i16> @test_mask_adds_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
7108; X86-LABEL: test_mask_adds_epu16_rrk_256:
7109; X86:       # %bb.0:
7110; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7111; X86-NEXT:    vpaddusw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdd,0xd1]
7112; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7113; X86-NEXT:    retl # encoding: [0xc3]
7114;
7115; X64-LABEL: test_mask_adds_epu16_rrk_256:
7116; X64:       # %bb.0:
7117; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7118; X64-NEXT:    vpaddusw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdd,0xd1]
7119; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7120; X64-NEXT:    retq # encoding: [0xc3]
7121  %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
7122  ret <16 x i16> %res
7123}
7124
7125define <16 x i16> @test_mask_adds_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
7126; X86-LABEL: test_mask_adds_epu16_rrkz_256:
7127; X86:       # %bb.0:
7128; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7129; X86-NEXT:    vpaddusw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0xc1]
7130; X86-NEXT:    retl # encoding: [0xc3]
7131;
7132; X64-LABEL: test_mask_adds_epu16_rrkz_256:
7133; X64:       # %bb.0:
7134; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7135; X64-NEXT:    vpaddusw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0xc1]
7136; X64-NEXT:    retq # encoding: [0xc3]
7137  %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
7138  ret <16 x i16> %res
7139}
7140
7141define <16 x i16> @test_mask_adds_epu16_rm_256(<16 x i16> %a, ptr %ptr_b) {
7142; X86-LABEL: test_mask_adds_epu16_rm_256:
7143; X86:       # %bb.0:
7144; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7145; X86-NEXT:    vpaddusw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0x00]
7146; X86-NEXT:    retl # encoding: [0xc3]
7147;
7148; X64-LABEL: test_mask_adds_epu16_rm_256:
7149; X64:       # %bb.0:
7150; X64-NEXT:    vpaddusw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0x07]
7151; X64-NEXT:    retq # encoding: [0xc3]
7152  %b = load <16 x i16>, ptr %ptr_b
7153  %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
7154  ret <16 x i16> %res
7155}
7156
7157define <16 x i16> @test_mask_adds_epu16_rmk_256(<16 x i16> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) {
7158; X86-LABEL: test_mask_adds_epu16_rmk_256:
7159; X86:       # %bb.0:
7160; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7161; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7162; X86-NEXT:    vpaddusw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdd,0x08]
7163; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7164; X86-NEXT:    retl # encoding: [0xc3]
7165;
7166; X64-LABEL: test_mask_adds_epu16_rmk_256:
7167; X64:       # %bb.0:
7168; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7169; X64-NEXT:    vpaddusw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdd,0x0f]
7170; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7171; X64-NEXT:    retq # encoding: [0xc3]
7172  %b = load <16 x i16>, ptr %ptr_b
7173  %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
7174  ret <16 x i16> %res
7175}
7176
7177define <16 x i16> @test_mask_adds_epu16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i16 %mask) {
7178; X86-LABEL: test_mask_adds_epu16_rmkz_256:
7179; X86:       # %bb.0:
7180; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7181; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7182; X86-NEXT:    vpaddusw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0x00]
7183; X86-NEXT:    retl # encoding: [0xc3]
7184;
7185; X64-LABEL: test_mask_adds_epu16_rmkz_256:
7186; X64:       # %bb.0:
7187; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7188; X64-NEXT:    vpaddusw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0x07]
7189; X64-NEXT:    retq # encoding: [0xc3]
7190  %b = load <16 x i16>, ptr %ptr_b
7191  %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
7192  ret <16 x i16> %res
7193}
7194
7195declare <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
7196
7197define <8 x i16> @test_mask_subs_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) {
7198; CHECK-LABEL: test_mask_subs_epu16_rr_128:
7199; CHECK:       # %bb.0:
7200; CHECK-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0xc1]
7201; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7202  %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
7203  ret <8 x i16> %res
7204}
7205
7206define <8 x i16> @test_mask_subs_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
7207; X86-LABEL: test_mask_subs_epu16_rrk_128:
7208; X86:       # %bb.0:
7209; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7210; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
7211; X86-NEXT:    vpsubusw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd9,0xd1]
7212; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7213; X86-NEXT:    retl # encoding: [0xc3]
7214;
7215; X64-LABEL: test_mask_subs_epu16_rrk_128:
7216; X64:       # %bb.0:
7217; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7218; X64-NEXT:    vpsubusw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd9,0xd1]
7219; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7220; X64-NEXT:    retq # encoding: [0xc3]
7221  %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
7222  ret <8 x i16> %res
7223}
7224
7225define <8 x i16> @test_mask_subs_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
7226; X86-LABEL: test_mask_subs_epu16_rrkz_128:
7227; X86:       # %bb.0:
7228; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7229; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
7230; X86-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd9,0xc1]
7231; X86-NEXT:    retl # encoding: [0xc3]
7232;
7233; X64-LABEL: test_mask_subs_epu16_rrkz_128:
7234; X64:       # %bb.0:
7235; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7236; X64-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd9,0xc1]
7237; X64-NEXT:    retq # encoding: [0xc3]
7238  %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
7239  ret <8 x i16> %res
7240}
7241
7242define <8 x i16> @test_mask_subs_epu16_rm_128(<8 x i16> %a, ptr %ptr_b) {
7243; X86-LABEL: test_mask_subs_epu16_rm_128:
7244; X86:       # %bb.0:
7245; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7246; X86-NEXT:    vpsubusw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0x00]
7247; X86-NEXT:    retl # encoding: [0xc3]
7248;
7249; X64-LABEL: test_mask_subs_epu16_rm_128:
7250; X64:       # %bb.0:
7251; X64-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0x07]
7252; X64-NEXT:    retq # encoding: [0xc3]
7253  %b = load <8 x i16>, ptr %ptr_b
7254  %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
7255  ret <8 x i16> %res
7256}
7257
7258define <8 x i16> @test_mask_subs_epu16_rmk_128(<8 x i16> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) {
7259; X86-LABEL: test_mask_subs_epu16_rmk_128:
7260; X86:       # %bb.0:
7261; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7262; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7263; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
7264; X86-NEXT:    vpsubusw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd9,0x08]
7265; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7266; X86-NEXT:    retl # encoding: [0xc3]
7267;
7268; X64-LABEL: test_mask_subs_epu16_rmk_128:
7269; X64:       # %bb.0:
7270; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7271; X64-NEXT:    vpsubusw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd9,0x0f]
7272; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7273; X64-NEXT:    retq # encoding: [0xc3]
7274  %b = load <8 x i16>, ptr %ptr_b
7275  %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
7276  ret <8 x i16> %res
7277}
7278
7279define <8 x i16> @test_mask_subs_epu16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i8 %mask) {
7280; X86-LABEL: test_mask_subs_epu16_rmkz_128:
7281; X86:       # %bb.0:
7282; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7283; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7284; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
7285; X86-NEXT:    vpsubusw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd9,0x00]
7286; X86-NEXT:    retl # encoding: [0xc3]
7287;
7288; X64-LABEL: test_mask_subs_epu16_rmkz_128:
7289; X64:       # %bb.0:
7290; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7291; X64-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd9,0x07]
7292; X64-NEXT:    retq # encoding: [0xc3]
7293  %b = load <8 x i16>, ptr %ptr_b
7294  %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
7295  ret <8 x i16> %res
7296}
7297
7298declare <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
7299
7300define <16 x i16> @test_mask_subs_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) {
7301; CHECK-LABEL: test_mask_subs_epu16_rr_256:
7302; CHECK:       # %bb.0:
7303; CHECK-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0xc1]
7304; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7305  %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
7306  ret <16 x i16> %res
7307}
7308
7309define <16 x i16> @test_mask_subs_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
7310; X86-LABEL: test_mask_subs_epu16_rrk_256:
7311; X86:       # %bb.0:
7312; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7313; X86-NEXT:    vpsubusw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd9,0xd1]
7314; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7315; X86-NEXT:    retl # encoding: [0xc3]
7316;
7317; X64-LABEL: test_mask_subs_epu16_rrk_256:
7318; X64:       # %bb.0:
7319; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7320; X64-NEXT:    vpsubusw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd9,0xd1]
7321; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7322; X64-NEXT:    retq # encoding: [0xc3]
7323  %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
7324  ret <16 x i16> %res
7325}
7326
7327define <16 x i16> @test_mask_subs_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
7328; X86-LABEL: test_mask_subs_epu16_rrkz_256:
7329; X86:       # %bb.0:
7330; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7331; X86-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0xc1]
7332; X86-NEXT:    retl # encoding: [0xc3]
7333;
7334; X64-LABEL: test_mask_subs_epu16_rrkz_256:
7335; X64:       # %bb.0:
7336; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7337; X64-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0xc1]
7338; X64-NEXT:    retq # encoding: [0xc3]
7339  %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
7340  ret <16 x i16> %res
7341}
7342
7343define <16 x i16> @test_mask_subs_epu16_rm_256(<16 x i16> %a, ptr %ptr_b) {
7344; X86-LABEL: test_mask_subs_epu16_rm_256:
7345; X86:       # %bb.0:
7346; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7347; X86-NEXT:    vpsubusw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0x00]
7348; X86-NEXT:    retl # encoding: [0xc3]
7349;
7350; X64-LABEL: test_mask_subs_epu16_rm_256:
7351; X64:       # %bb.0:
7352; X64-NEXT:    vpsubusw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0x07]
7353; X64-NEXT:    retq # encoding: [0xc3]
7354  %b = load <16 x i16>, ptr %ptr_b
7355  %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
7356  ret <16 x i16> %res
7357}
7358
7359define <16 x i16> @test_mask_subs_epu16_rmk_256(<16 x i16> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) {
7360; X86-LABEL: test_mask_subs_epu16_rmk_256:
7361; X86:       # %bb.0:
7362; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7363; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7364; X86-NEXT:    vpsubusw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd9,0x08]
7365; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7366; X86-NEXT:    retl # encoding: [0xc3]
7367;
7368; X64-LABEL: test_mask_subs_epu16_rmk_256:
7369; X64:       # %bb.0:
7370; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7371; X64-NEXT:    vpsubusw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd9,0x0f]
7372; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7373; X64-NEXT:    retq # encoding: [0xc3]
7374  %b = load <16 x i16>, ptr %ptr_b
7375  %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
7376  ret <16 x i16> %res
7377}
7378
7379define <16 x i16> @test_mask_subs_epu16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i16 %mask) {
7380; X86-LABEL: test_mask_subs_epu16_rmkz_256:
7381; X86:       # %bb.0:
7382; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7383; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7384; X86-NEXT:    vpsubusw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0x00]
7385; X86-NEXT:    retl # encoding: [0xc3]
7386;
7387; X64-LABEL: test_mask_subs_epu16_rmkz_256:
7388; X64:       # %bb.0:
7389; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7390; X64-NEXT:    vpsubusw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0x07]
7391; X64-NEXT:    retq # encoding: [0xc3]
7392  %b = load <16 x i16>, ptr %ptr_b
7393  %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
7394  ret <16 x i16> %res
7395}
7396
7397declare <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
7398
7399define <16 x i8> @test_mask_adds_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) {
7400; CHECK-LABEL: test_mask_adds_epu8_rr_128:
7401; CHECK:       # %bb.0:
7402; CHECK-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0xc1]
7403; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7404  %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
7405  ret <16 x i8> %res
7406}
7407
7408define <16 x i8> @test_mask_adds_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
7409; X86-LABEL: test_mask_adds_epu8_rrk_128:
7410; X86:       # %bb.0:
7411; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7412; X86-NEXT:    vpaddusb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdc,0xd1]
7413; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7414; X86-NEXT:    retl # encoding: [0xc3]
7415;
7416; X64-LABEL: test_mask_adds_epu8_rrk_128:
7417; X64:       # %bb.0:
7418; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7419; X64-NEXT:    vpaddusb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdc,0xd1]
7420; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7421; X64-NEXT:    retq # encoding: [0xc3]
7422  %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
7423  ret <16 x i8> %res
7424}
7425
7426define <16 x i8> @test_mask_adds_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
7427; X86-LABEL: test_mask_adds_epu8_rrkz_128:
7428; X86:       # %bb.0:
7429; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7430; X86-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdc,0xc1]
7431; X86-NEXT:    retl # encoding: [0xc3]
7432;
7433; X64-LABEL: test_mask_adds_epu8_rrkz_128:
7434; X64:       # %bb.0:
7435; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7436; X64-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdc,0xc1]
7437; X64-NEXT:    retq # encoding: [0xc3]
7438  %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
7439  ret <16 x i8> %res
7440}
7441
7442define <16 x i8> @test_mask_adds_epu8_rm_128(<16 x i8> %a, ptr %ptr_b) {
7443; X86-LABEL: test_mask_adds_epu8_rm_128:
7444; X86:       # %bb.0:
7445; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7446; X86-NEXT:    vpaddusb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0x00]
7447; X86-NEXT:    retl # encoding: [0xc3]
7448;
7449; X64-LABEL: test_mask_adds_epu8_rm_128:
7450; X64:       # %bb.0:
7451; X64-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0x07]
7452; X64-NEXT:    retq # encoding: [0xc3]
7453  %b = load <16 x i8>, ptr %ptr_b
7454  %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
7455  ret <16 x i8> %res
7456}
7457
7458define <16 x i8> @test_mask_adds_epu8_rmk_128(<16 x i8> %a, ptr %ptr_b, <16 x i8> %passThru, i16 %mask) {
7459; X86-LABEL: test_mask_adds_epu8_rmk_128:
7460; X86:       # %bb.0:
7461; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7462; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7463; X86-NEXT:    vpaddusb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdc,0x08]
7464; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7465; X86-NEXT:    retl # encoding: [0xc3]
7466;
7467; X64-LABEL: test_mask_adds_epu8_rmk_128:
7468; X64:       # %bb.0:
7469; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7470; X64-NEXT:    vpaddusb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdc,0x0f]
7471; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7472; X64-NEXT:    retq # encoding: [0xc3]
7473  %b = load <16 x i8>, ptr %ptr_b
7474  %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
7475  ret <16 x i8> %res
7476}
7477
7478define <16 x i8> @test_mask_adds_epu8_rmkz_128(<16 x i8> %a, ptr %ptr_b, i16 %mask) {
7479; X86-LABEL: test_mask_adds_epu8_rmkz_128:
7480; X86:       # %bb.0:
7481; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7482; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7483; X86-NEXT:    vpaddusb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdc,0x00]
7484; X86-NEXT:    retl # encoding: [0xc3]
7485;
7486; X64-LABEL: test_mask_adds_epu8_rmkz_128:
7487; X64:       # %bb.0:
7488; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7489; X64-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdc,0x07]
7490; X64-NEXT:    retq # encoding: [0xc3]
7491  %b = load <16 x i8>, ptr %ptr_b
7492  %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
7493  ret <16 x i8> %res
7494}
7495
7496declare <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
7497
7498define <32 x i8> @test_mask_adds_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) {
7499; CHECK-LABEL: test_mask_adds_epu8_rr_256:
7500; CHECK:       # %bb.0:
7501; CHECK-NEXT:    vpaddusb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0xc1]
7502; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7503  %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
7504  ret <32 x i8> %res
7505}
7506
7507define <32 x i8> @test_mask_adds_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
7508; X86-LABEL: test_mask_adds_epu8_rrk_256:
7509; X86:       # %bb.0:
7510; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
7511; X86-NEXT:    vpaddusb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdc,0xd1]
7512; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7513; X86-NEXT:    retl # encoding: [0xc3]
7514;
7515; X64-LABEL: test_mask_adds_epu8_rrk_256:
7516; X64:       # %bb.0:
7517; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7518; X64-NEXT:    vpaddusb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdc,0xd1]
7519; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7520; X64-NEXT:    retq # encoding: [0xc3]
7521  %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
7522  ret <32 x i8> %res
7523}
7524
7525define <32 x i8> @test_mask_adds_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
7526; X86-LABEL: test_mask_adds_epu8_rrkz_256:
7527; X86:       # %bb.0:
7528; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
7529; X86-NEXT:    vpaddusb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0xc1]
7530; X86-NEXT:    retl # encoding: [0xc3]
7531;
7532; X64-LABEL: test_mask_adds_epu8_rrkz_256:
7533; X64:       # %bb.0:
7534; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7535; X64-NEXT:    vpaddusb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0xc1]
7536; X64-NEXT:    retq # encoding: [0xc3]
7537  %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
7538  ret <32 x i8> %res
7539}
7540
7541define <32 x i8> @test_mask_adds_epu8_rm_256(<32 x i8> %a, ptr %ptr_b) {
7542; X86-LABEL: test_mask_adds_epu8_rm_256:
7543; X86:       # %bb.0:
7544; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7545; X86-NEXT:    vpaddusb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0x00]
7546; X86-NEXT:    retl # encoding: [0xc3]
7547;
7548; X64-LABEL: test_mask_adds_epu8_rm_256:
7549; X64:       # %bb.0:
7550; X64-NEXT:    vpaddusb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0x07]
7551; X64-NEXT:    retq # encoding: [0xc3]
7552  %b = load <32 x i8>, ptr %ptr_b
7553  %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
7554  ret <32 x i8> %res
7555}
7556
7557define <32 x i8> @test_mask_adds_epu8_rmk_256(<32 x i8> %a, ptr %ptr_b, <32 x i8> %passThru, i32 %mask) {
7558; X86-LABEL: test_mask_adds_epu8_rmk_256:
7559; X86:       # %bb.0:
7560; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7561; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
7562; X86-NEXT:    vpaddusb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdc,0x08]
7563; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7564; X86-NEXT:    retl # encoding: [0xc3]
7565;
7566; X64-LABEL: test_mask_adds_epu8_rmk_256:
7567; X64:       # %bb.0:
7568; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7569; X64-NEXT:    vpaddusb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdc,0x0f]
7570; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7571; X64-NEXT:    retq # encoding: [0xc3]
7572  %b = load <32 x i8>, ptr %ptr_b
7573  %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
7574  ret <32 x i8> %res
7575}
7576
7577define <32 x i8> @test_mask_adds_epu8_rmkz_256(<32 x i8> %a, ptr %ptr_b, i32 %mask) {
7578; X86-LABEL: test_mask_adds_epu8_rmkz_256:
7579; X86:       # %bb.0:
7580; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7581; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
7582; X86-NEXT:    vpaddusb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0x00]
7583; X86-NEXT:    retl # encoding: [0xc3]
7584;
7585; X64-LABEL: test_mask_adds_epu8_rmkz_256:
7586; X64:       # %bb.0:
7587; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7588; X64-NEXT:    vpaddusb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0x07]
7589; X64-NEXT:    retq # encoding: [0xc3]
7590  %b = load <32 x i8>, ptr %ptr_b
7591  %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
7592  ret <32 x i8> %res
7593}
7594
7595declare <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
7596
7597define <16 x i8> @test_mask_subs_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) {
7598; CHECK-LABEL: test_mask_subs_epu8_rr_128:
7599; CHECK:       # %bb.0:
7600; CHECK-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0xc1]
7601; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7602  %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
7603  ret <16 x i8> %res
7604}
7605
7606define <16 x i8> @test_mask_subs_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
7607; X86-LABEL: test_mask_subs_epu8_rrk_128:
7608; X86:       # %bb.0:
7609; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7610; X86-NEXT:    vpsubusb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd8,0xd1]
7611; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7612; X86-NEXT:    retl # encoding: [0xc3]
7613;
7614; X64-LABEL: test_mask_subs_epu8_rrk_128:
7615; X64:       # %bb.0:
7616; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7617; X64-NEXT:    vpsubusb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd8,0xd1]
7618; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7619; X64-NEXT:    retq # encoding: [0xc3]
7620  %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
7621  ret <16 x i8> %res
7622}
7623
7624define <16 x i8> @test_mask_subs_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
7625; X86-LABEL: test_mask_subs_epu8_rrkz_128:
7626; X86:       # %bb.0:
7627; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7628; X86-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd8,0xc1]
7629; X86-NEXT:    retl # encoding: [0xc3]
7630;
7631; X64-LABEL: test_mask_subs_epu8_rrkz_128:
7632; X64:       # %bb.0:
7633; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7634; X64-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd8,0xc1]
7635; X64-NEXT:    retq # encoding: [0xc3]
7636  %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
7637  ret <16 x i8> %res
7638}
7639
7640define <16 x i8> @test_mask_subs_epu8_rm_128(<16 x i8> %a, ptr %ptr_b) {
7641; X86-LABEL: test_mask_subs_epu8_rm_128:
7642; X86:       # %bb.0:
7643; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7644; X86-NEXT:    vpsubusb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0x00]
7645; X86-NEXT:    retl # encoding: [0xc3]
7646;
7647; X64-LABEL: test_mask_subs_epu8_rm_128:
7648; X64:       # %bb.0:
7649; X64-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0x07]
7650; X64-NEXT:    retq # encoding: [0xc3]
7651  %b = load <16 x i8>, ptr %ptr_b
7652  %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
7653  ret <16 x i8> %res
7654}
7655
7656define <16 x i8> @test_mask_subs_epu8_rmk_128(<16 x i8> %a, ptr %ptr_b, <16 x i8> %passThru, i16 %mask) {
7657; X86-LABEL: test_mask_subs_epu8_rmk_128:
7658; X86:       # %bb.0:
7659; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7660; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7661; X86-NEXT:    vpsubusb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd8,0x08]
7662; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7663; X86-NEXT:    retl # encoding: [0xc3]
7664;
7665; X64-LABEL: test_mask_subs_epu8_rmk_128:
7666; X64:       # %bb.0:
7667; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7668; X64-NEXT:    vpsubusb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd8,0x0f]
7669; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7670; X64-NEXT:    retq # encoding: [0xc3]
7671  %b = load <16 x i8>, ptr %ptr_b
7672  %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
7673  ret <16 x i8> %res
7674}
7675
7676define <16 x i8> @test_mask_subs_epu8_rmkz_128(<16 x i8> %a, ptr %ptr_b, i16 %mask) {
7677; X86-LABEL: test_mask_subs_epu8_rmkz_128:
7678; X86:       # %bb.0:
7679; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7680; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7681; X86-NEXT:    vpsubusb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd8,0x00]
7682; X86-NEXT:    retl # encoding: [0xc3]
7683;
7684; X64-LABEL: test_mask_subs_epu8_rmkz_128:
7685; X64:       # %bb.0:
7686; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7687; X64-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd8,0x07]
7688; X64-NEXT:    retq # encoding: [0xc3]
7689  %b = load <16 x i8>, ptr %ptr_b
7690  %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
7691  ret <16 x i8> %res
7692}
7693
7694declare <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
7695
7696define <32 x i8> @test_mask_subs_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) {
7697; CHECK-LABEL: test_mask_subs_epu8_rr_256:
7698; CHECK:       # %bb.0:
7699; CHECK-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0xc1]
7700; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7701  %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
7702  ret <32 x i8> %res
7703}
7704
7705define <32 x i8> @test_mask_subs_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
7706; X86-LABEL: test_mask_subs_epu8_rrk_256:
7707; X86:       # %bb.0:
7708; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
7709; X86-NEXT:    vpsubusb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd8,0xd1]
7710; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7711; X86-NEXT:    retl # encoding: [0xc3]
7712;
7713; X64-LABEL: test_mask_subs_epu8_rrk_256:
7714; X64:       # %bb.0:
7715; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7716; X64-NEXT:    vpsubusb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd8,0xd1]
7717; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7718; X64-NEXT:    retq # encoding: [0xc3]
7719  %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
7720  ret <32 x i8> %res
7721}
7722
7723define <32 x i8> @test_mask_subs_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
7724; X86-LABEL: test_mask_subs_epu8_rrkz_256:
7725; X86:       # %bb.0:
7726; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
7727; X86-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0xc1]
7728; X86-NEXT:    retl # encoding: [0xc3]
7729;
7730; X64-LABEL: test_mask_subs_epu8_rrkz_256:
7731; X64:       # %bb.0:
7732; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7733; X64-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0xc1]
7734; X64-NEXT:    retq # encoding: [0xc3]
7735  %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
7736  ret <32 x i8> %res
7737}
7738
7739define <32 x i8> @test_mask_subs_epu8_rm_256(<32 x i8> %a, ptr %ptr_b) {
7740; X86-LABEL: test_mask_subs_epu8_rm_256:
7741; X86:       # %bb.0:
7742; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7743; X86-NEXT:    vpsubusb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0x00]
7744; X86-NEXT:    retl # encoding: [0xc3]
7745;
7746; X64-LABEL: test_mask_subs_epu8_rm_256:
7747; X64:       # %bb.0:
7748; X64-NEXT:    vpsubusb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0x07]
7749; X64-NEXT:    retq # encoding: [0xc3]
7750  %b = load <32 x i8>, ptr %ptr_b
7751  %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
7752  ret <32 x i8> %res
7753}
7754
7755define <32 x i8> @test_mask_subs_epu8_rmk_256(<32 x i8> %a, ptr %ptr_b, <32 x i8> %passThru, i32 %mask) {
7756; X86-LABEL: test_mask_subs_epu8_rmk_256:
7757; X86:       # %bb.0:
7758; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7759; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
7760; X86-NEXT:    vpsubusb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd8,0x08]
7761; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7762; X86-NEXT:    retl # encoding: [0xc3]
7763;
7764; X64-LABEL: test_mask_subs_epu8_rmk_256:
7765; X64:       # %bb.0:
7766; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7767; X64-NEXT:    vpsubusb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd8,0x0f]
7768; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7769; X64-NEXT:    retq # encoding: [0xc3]
7770  %b = load <32 x i8>, ptr %ptr_b
7771  %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
7772  ret <32 x i8> %res
7773}
7774
7775define <32 x i8> @test_mask_subs_epu8_rmkz_256(<32 x i8> %a, ptr %ptr_b, i32 %mask) {
7776; X86-LABEL: test_mask_subs_epu8_rmkz_256:
7777; X86:       # %bb.0:
7778; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7779; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
7780; X86-NEXT:    vpsubusb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0x00]
7781; X86-NEXT:    retl # encoding: [0xc3]
7782;
7783; X64-LABEL: test_mask_subs_epu8_rmkz_256:
7784; X64:       # %bb.0:
7785; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7786; X64-NEXT:    vpsubusb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0x07]
7787; X64-NEXT:    retq # encoding: [0xc3]
7788  %b = load <32 x i8>, ptr %ptr_b
7789  %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
7790  ret <32 x i8> %res
7791}
7792
7793declare <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
7794
7795define <8 x i16> @test_mask_adds_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
7796; CHECK-LABEL: test_mask_adds_epi16_rr_128:
7797; CHECK:       # %bb.0:
7798; CHECK-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0xc1]
7799; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7800  %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
7801  ret <8 x i16> %res
7802}
7803
7804define <8 x i16> @test_mask_adds_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
7805; X86-LABEL: test_mask_adds_epi16_rrk_128:
7806; X86:       # %bb.0:
7807; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7808; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
7809; X86-NEXT:    vpaddsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1]
7810; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7811; X86-NEXT:    retl # encoding: [0xc3]
7812;
7813; X64-LABEL: test_mask_adds_epi16_rrk_128:
7814; X64:       # %bb.0:
7815; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7816; X64-NEXT:    vpaddsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1]
7817; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7818; X64-NEXT:    retq # encoding: [0xc3]
7819  %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
7820  ret <8 x i16> %res
7821}
7822
7823define <8 x i16> @test_mask_adds_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
7824; X86-LABEL: test_mask_adds_epi16_rrkz_128:
7825; X86:       # %bb.0:
7826; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7827; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
7828; X86-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0xc1]
7829; X86-NEXT:    retl # encoding: [0xc3]
7830;
7831; X64-LABEL: test_mask_adds_epi16_rrkz_128:
7832; X64:       # %bb.0:
7833; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7834; X64-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0xc1]
7835; X64-NEXT:    retq # encoding: [0xc3]
7836  %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
7837  ret <8 x i16> %res
7838}
7839
7840define <8 x i16> @test_mask_adds_epi16_rm_128(<8 x i16> %a, ptr %ptr_b) {
7841; X86-LABEL: test_mask_adds_epi16_rm_128:
7842; X86:       # %bb.0:
7843; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7844; X86-NEXT:    vpaddsw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0x00]
7845; X86-NEXT:    retl # encoding: [0xc3]
7846;
7847; X64-LABEL: test_mask_adds_epi16_rm_128:
7848; X64:       # %bb.0:
7849; X64-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0x07]
7850; X64-NEXT:    retq # encoding: [0xc3]
7851  %b = load <8 x i16>, ptr %ptr_b
7852  %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
7853  ret <8 x i16> %res
7854}
7855
7856define <8 x i16> @test_mask_adds_epi16_rmk_128(<8 x i16> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) {
7857; X86-LABEL: test_mask_adds_epi16_rmk_128:
7858; X86:       # %bb.0:
7859; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7860; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7861; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
7862; X86-NEXT:    vpaddsw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0x08]
7863; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7864; X86-NEXT:    retl # encoding: [0xc3]
7865;
7866; X64-LABEL: test_mask_adds_epi16_rmk_128:
7867; X64:       # %bb.0:
7868; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7869; X64-NEXT:    vpaddsw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0x0f]
7870; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7871; X64-NEXT:    retq # encoding: [0xc3]
7872  %b = load <8 x i16>, ptr %ptr_b
7873  %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
7874  ret <8 x i16> %res
7875}
7876
7877define <8 x i16> @test_mask_adds_epi16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i8 %mask) {
7878; X86-LABEL: test_mask_adds_epi16_rmkz_128:
7879; X86:       # %bb.0:
7880; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7881; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7882; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
7883; X86-NEXT:    vpaddsw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0x00]
7884; X86-NEXT:    retl # encoding: [0xc3]
7885;
7886; X64-LABEL: test_mask_adds_epi16_rmkz_128:
7887; X64:       # %bb.0:
7888; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7889; X64-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0x07]
7890; X64-NEXT:    retq # encoding: [0xc3]
7891  %b = load <8 x i16>, ptr %ptr_b
7892  %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
7893  ret <8 x i16> %res
7894}
7895
7896declare <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
7897
7898define <16 x i16> @test_mask_adds_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
7899; CHECK-LABEL: test_mask_adds_epi16_rr_256:
7900; CHECK:       # %bb.0:
7901; CHECK-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0xc1]
7902; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7903  %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
7904  ret <16 x i16> %res
7905}
7906
7907define <16 x i16> @test_mask_adds_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
7908; X86-LABEL: test_mask_adds_epi16_rrk_256:
7909; X86:       # %bb.0:
7910; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7911; X86-NEXT:    vpaddsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1]
7912; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7913; X86-NEXT:    retl # encoding: [0xc3]
7914;
7915; X64-LABEL: test_mask_adds_epi16_rrk_256:
7916; X64:       # %bb.0:
7917; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7918; X64-NEXT:    vpaddsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1]
7919; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7920; X64-NEXT:    retq # encoding: [0xc3]
7921  %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
7922  ret <16 x i16> %res
7923}
7924
7925define <16 x i16> @test_mask_adds_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
7926; X86-LABEL: test_mask_adds_epi16_rrkz_256:
7927; X86:       # %bb.0:
7928; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7929; X86-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0xc1]
7930; X86-NEXT:    retl # encoding: [0xc3]
7931;
7932; X64-LABEL: test_mask_adds_epi16_rrkz_256:
7933; X64:       # %bb.0:
7934; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7935; X64-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0xc1]
7936; X64-NEXT:    retq # encoding: [0xc3]
7937  %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
7938  ret <16 x i16> %res
7939}
7940
7941define <16 x i16> @test_mask_adds_epi16_rm_256(<16 x i16> %a, ptr %ptr_b) {
7942; X86-LABEL: test_mask_adds_epi16_rm_256:
7943; X86:       # %bb.0:
7944; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7945; X86-NEXT:    vpaddsw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0x00]
7946; X86-NEXT:    retl # encoding: [0xc3]
7947;
7948; X64-LABEL: test_mask_adds_epi16_rm_256:
7949; X64:       # %bb.0:
7950; X64-NEXT:    vpaddsw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0x07]
7951; X64-NEXT:    retq # encoding: [0xc3]
7952  %b = load <16 x i16>, ptr %ptr_b
7953  %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
7954  ret <16 x i16> %res
7955}
7956
7957define <16 x i16> @test_mask_adds_epi16_rmk_256(<16 x i16> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) {
7958; X86-LABEL: test_mask_adds_epi16_rmk_256:
7959; X86:       # %bb.0:
7960; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7961; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7962; X86-NEXT:    vpaddsw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0x08]
7963; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7964; X86-NEXT:    retl # encoding: [0xc3]
7965;
7966; X64-LABEL: test_mask_adds_epi16_rmk_256:
7967; X64:       # %bb.0:
7968; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7969; X64-NEXT:    vpaddsw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0x0f]
7970; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7971; X64-NEXT:    retq # encoding: [0xc3]
7972  %b = load <16 x i16>, ptr %ptr_b
7973  %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
7974  ret <16 x i16> %res
7975}
7976
7977define <16 x i16> @test_mask_adds_epi16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i16 %mask) {
7978; X86-LABEL: test_mask_adds_epi16_rmkz_256:
7979; X86:       # %bb.0:
7980; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7981; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7982; X86-NEXT:    vpaddsw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0x00]
7983; X86-NEXT:    retl # encoding: [0xc3]
7984;
7985; X64-LABEL: test_mask_adds_epi16_rmkz_256:
7986; X64:       # %bb.0:
7987; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7988; X64-NEXT:    vpaddsw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0x07]
7989; X64-NEXT:    retq # encoding: [0xc3]
7990  %b = load <16 x i16>, ptr %ptr_b
7991  %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
7992  ret <16 x i16> %res
7993}
7994
7995declare <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
7996
7997declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) #0
7998
7999define <8 x i16> @test_test_subs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
8000; X86-LABEL: test_test_subs_epi16_rrk_128:
8001; X86:       # %bb.0:
8002; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8003; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
8004; X86-NEXT:    vpsubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1]
8005; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8006; X86-NEXT:    retl # encoding: [0xc3]
8007;
8008; X64-LABEL: test_test_subs_epi16_rrk_128:
8009; X64:       # %bb.0:
8010; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8011; X64-NEXT:    vpsubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1]
8012; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8013; X64-NEXT:    retq # encoding: [0xc3]
8014  %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a, <8 x i16> %b)
8015  %2 = bitcast i8 %mask to <8 x i1>
8016  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
8017  ret <8 x i16> %3
8018}
8019
8020define <8 x i16> @test_test_subs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
8021; X86-LABEL: test_test_subs_epi16_rrkz_128:
8022; X86:       # %bb.0:
8023; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8024; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
8025; X86-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0xc1]
8026; X86-NEXT:    retl # encoding: [0xc3]
8027;
8028; X64-LABEL: test_test_subs_epi16_rrkz_128:
8029; X64:       # %bb.0:
8030; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8031; X64-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0xc1]
8032; X64-NEXT:    retq # encoding: [0xc3]
8033  %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a, <8 x i16> %b)
8034  %2 = bitcast i8 %mask to <8 x i1>
8035  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
8036  ret <8 x i16> %3
8037}
8038
8039define <8 x i16> @test_test_subs_epi16_rmk_128(<8 x i16> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) {
8040; X86-LABEL: test_test_subs_epi16_rmk_128:
8041; X86:       # %bb.0:
8042; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8043; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
8044; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
8045; X86-NEXT:    vpsubsw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x08]
8046; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8047; X86-NEXT:    retl # encoding: [0xc3]
8048;
8049; X64-LABEL: test_test_subs_epi16_rmk_128:
8050; X64:       # %bb.0:
8051; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8052; X64-NEXT:    vpsubsw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x0f]
8053; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8054; X64-NEXT:    retq # encoding: [0xc3]
8055  %b = load <8 x i16>, ptr %ptr_b
8056  %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a, <8 x i16> %b)
8057  %2 = bitcast i8 %mask to <8 x i1>
8058  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
8059  ret <8 x i16> %3
8060}
8061
8062define <8 x i16> @test_test_subs_epi16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i8 %mask) {
8063; X86-LABEL: test_test_subs_epi16_rmkz_128:
8064; X86:       # %bb.0:
8065; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8066; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
8067; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
8068; X86-NEXT:    vpsubsw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0x00]
8069; X86-NEXT:    retl # encoding: [0xc3]
8070;
8071; X64-LABEL: test_test_subs_epi16_rmkz_128:
8072; X64:       # %bb.0:
8073; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8074; X64-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0x07]
8075; X64-NEXT:    retq # encoding: [0xc3]
8076  %b = load <8 x i16>, ptr %ptr_b
8077  %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a, <8 x i16> %b)
8078  %2 = bitcast i8 %mask to <8 x i1>
8079  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
8080  ret <8 x i16> %3
8081}
8082
8083declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) #0
8084
8085define <16 x i16> @test_test_subs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
8086; X86-LABEL: test_test_subs_epi16_rrk_256:
8087; X86:       # %bb.0:
8088; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8089; X86-NEXT:    vpsubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1]
8090; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8091; X86-NEXT:    retl # encoding: [0xc3]
8092;
8093; X64-LABEL: test_test_subs_epi16_rrk_256:
8094; X64:       # %bb.0:
8095; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8096; X64-NEXT:    vpsubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1]
8097; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8098; X64-NEXT:    retq # encoding: [0xc3]
8099  %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a, <16 x i16> %b)
8100  %2 = bitcast i16 %mask to <16 x i1>
8101  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
8102  ret <16 x i16> %3
8103}
8104
8105define <16 x i16> @test_test_subs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
8106; X86-LABEL: test_test_subs_epi16_rrkz_256:
8107; X86:       # %bb.0:
8108; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8109; X86-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0xc1]
8110; X86-NEXT:    retl # encoding: [0xc3]
8111;
8112; X64-LABEL: test_test_subs_epi16_rrkz_256:
8113; X64:       # %bb.0:
8114; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8115; X64-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0xc1]
8116; X64-NEXT:    retq # encoding: [0xc3]
8117  %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a, <16 x i16> %b)
8118  %2 = bitcast i16 %mask to <16 x i1>
8119  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
8120  ret <16 x i16> %3
8121}
8122
8123define <16 x i16> @test_test_subs_epi16_rmk_256(<16 x i16> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) {
8124; X86-LABEL: test_test_subs_epi16_rmk_256:
8125; X86:       # %bb.0:
8126; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8127; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8128; X86-NEXT:    vpsubsw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x08]
8129; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8130; X86-NEXT:    retl # encoding: [0xc3]
8131;
8132; X64-LABEL: test_test_subs_epi16_rmk_256:
8133; X64:       # %bb.0:
8134; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8135; X64-NEXT:    vpsubsw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x0f]
8136; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8137; X64-NEXT:    retq # encoding: [0xc3]
8138  %b = load <16 x i16>, ptr %ptr_b
8139  %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a, <16 x i16> %b)
8140  %2 = bitcast i16 %mask to <16 x i1>
8141  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
8142  ret <16 x i16> %3
8143}
8144
8145define <16 x i16> @test_test_subs_epi16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i16 %mask) {
8146; X86-LABEL: test_test_subs_epi16_rmkz_256:
8147; X86:       # %bb.0:
8148; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8149; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8150; X86-NEXT:    vpsubsw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0x00]
8151; X86-NEXT:    retl # encoding: [0xc3]
8152;
8153; X64-LABEL: test_test_subs_epi16_rmkz_256:
8154; X64:       # %bb.0:
8155; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8156; X64-NEXT:    vpsubsw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0x07]
8157; X64-NEXT:    retq # encoding: [0xc3]
8158  %b = load <16 x i16>, ptr %ptr_b
8159  %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a, <16 x i16> %b)
8160  %2 = bitcast i16 %mask to <16 x i1>
8161  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
8162  ret <16 x i16> %3
8163}
8164
8165declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) #0
8166
8167define <16 x i8> @test_test_subs_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
8168; X86-LABEL: test_test_subs_epi8_rrk_128:
8169; X86:       # %bb.0:
8170; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8171; X86-NEXT:    vpsubsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1]
8172; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8173; X86-NEXT:    retl # encoding: [0xc3]
8174;
8175; X64-LABEL: test_test_subs_epi8_rrk_128:
8176; X64:       # %bb.0:
8177; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8178; X64-NEXT:    vpsubsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1]
8179; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8180; X64-NEXT:    retq # encoding: [0xc3]
8181  %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a, <16 x i8> %b)
8182  %2 = bitcast i16 %mask to <16 x i1>
8183  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
8184  ret <16 x i8> %3
8185}
8186
8187define <16 x i8> @test_test_subs_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
8188; X86-LABEL: test_test_subs_epi8_rrkz_128:
8189; X86:       # %bb.0:
8190; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8191; X86-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0xc1]
8192; X86-NEXT:    retl # encoding: [0xc3]
8193;
8194; X64-LABEL: test_test_subs_epi8_rrkz_128:
8195; X64:       # %bb.0:
8196; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8197; X64-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0xc1]
8198; X64-NEXT:    retq # encoding: [0xc3]
8199  %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a, <16 x i8> %b)
8200  %2 = bitcast i16 %mask to <16 x i1>
8201  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
8202  ret <16 x i8> %3
8203}
8204
8205define <16 x i8> @test_test_subs_epi8_rmk_128(<16 x i8> %a, ptr %ptr_b, <16 x i8> %passThru, i16 %mask) {
8206; X86-LABEL: test_test_subs_epi8_rmk_128:
8207; X86:       # %bb.0:
8208; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8209; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8210; X86-NEXT:    vpsubsb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x08]
8211; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8212; X86-NEXT:    retl # encoding: [0xc3]
8213;
8214; X64-LABEL: test_test_subs_epi8_rmk_128:
8215; X64:       # %bb.0:
8216; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8217; X64-NEXT:    vpsubsb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x0f]
8218; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8219; X64-NEXT:    retq # encoding: [0xc3]
8220  %b = load <16 x i8>, ptr %ptr_b
8221  %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a, <16 x i8> %b)
8222  %2 = bitcast i16 %mask to <16 x i1>
8223  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
8224  ret <16 x i8> %3
8225}
8226
8227define <16 x i8> @test_test_subs_epi8_rmkz_128(<16 x i8> %a, ptr %ptr_b, i16 %mask) {
8228; X86-LABEL: test_test_subs_epi8_rmkz_128:
8229; X86:       # %bb.0:
8230; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8231; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8232; X86-NEXT:    vpsubsb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0x00]
8233; X86-NEXT:    retl # encoding: [0xc3]
8234;
8235; X64-LABEL: test_test_subs_epi8_rmkz_128:
8236; X64:       # %bb.0:
8237; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8238; X64-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0x07]
8239; X64-NEXT:    retq # encoding: [0xc3]
8240  %b = load <16 x i8>, ptr %ptr_b
8241  %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a, <16 x i8> %b)
8242  %2 = bitcast i16 %mask to <16 x i1>
8243  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
8244  ret <16 x i8> %3
8245}
8246
8247declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) #0
8248
8249define <32 x i8> @test_test_subs_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
8250; X86-LABEL: test_test_subs_epi8_rrk_256:
8251; X86:       # %bb.0:
8252; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
8253; X86-NEXT:    vpsubsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1]
8254; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8255; X86-NEXT:    retl # encoding: [0xc3]
8256;
8257; X64-LABEL: test_test_subs_epi8_rrk_256:
8258; X64:       # %bb.0:
8259; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8260; X64-NEXT:    vpsubsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1]
8261; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8262; X64-NEXT:    retq # encoding: [0xc3]
8263  %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a, <32 x i8> %b)
8264  %2 = bitcast i32 %mask to <32 x i1>
8265  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
8266  ret <32 x i8> %3
8267}
8268
8269define <32 x i8> @test_test_subs_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
8270; X86-LABEL: test_test_subs_epi8_rrkz_256:
8271; X86:       # %bb.0:
8272; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
8273; X86-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0xc1]
8274; X86-NEXT:    retl # encoding: [0xc3]
8275;
8276; X64-LABEL: test_test_subs_epi8_rrkz_256:
8277; X64:       # %bb.0:
8278; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8279; X64-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0xc1]
8280; X64-NEXT:    retq # encoding: [0xc3]
8281  %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a, <32 x i8> %b)
8282  %2 = bitcast i32 %mask to <32 x i1>
8283  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
8284  ret <32 x i8> %3
8285}
8286
8287define <32 x i8> @test_test_subs_epi8_rmk_256(<32 x i8> %a, ptr %ptr_b, <32 x i8> %passThru, i32 %mask) {
8288; X86-LABEL: test_test_subs_epi8_rmk_256:
8289; X86:       # %bb.0:
8290; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8291; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
8292; X86-NEXT:    vpsubsb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x08]
8293; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8294; X86-NEXT:    retl # encoding: [0xc3]
8295;
8296; X64-LABEL: test_test_subs_epi8_rmk_256:
8297; X64:       # %bb.0:
8298; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8299; X64-NEXT:    vpsubsb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x0f]
8300; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8301; X64-NEXT:    retq # encoding: [0xc3]
8302  %b = load <32 x i8>, ptr %ptr_b
8303  %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a, <32 x i8> %b)
8304  %2 = bitcast i32 %mask to <32 x i1>
8305  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
8306  ret <32 x i8> %3
8307}
8308
8309define <32 x i8> @test_test_subs_epi8_rmkz_256(<32 x i8> %a, ptr %ptr_b, i32 %mask) {
8310; X86-LABEL: test_test_subs_epi8_rmkz_256:
8311; X86:       # %bb.0:
8312; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8313; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
8314; X86-NEXT:    vpsubsb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0x00]
8315; X86-NEXT:    retl # encoding: [0xc3]
8316;
8317; X64-LABEL: test_test_subs_epi8_rmkz_256:
8318; X64:       # %bb.0:
8319; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8320; X64-NEXT:    vpsubsb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0x07]
8321; X64-NEXT:    retq # encoding: [0xc3]
8322  %b = load <32 x i8>, ptr %ptr_b
8323  %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a, <32 x i8> %b)
8324  %2 = bitcast i32 %mask to <32 x i1>
8325  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
8326  ret <32 x i8> %3
8327}
8328
8329define <8 x i16> @test_mask_subs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
8330; CHECK-LABEL: test_mask_subs_epi16_rr_128:
8331; CHECK:       # %bb.0:
8332; CHECK-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0xc1]
8333; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8334  %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
8335  ret <8 x i16> %res
8336}
8337
8338define <8 x i16> @test_mask_subs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
8339; X86-LABEL: test_mask_subs_epi16_rrk_128:
8340; X86:       # %bb.0:
8341; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8342; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
8343; X86-NEXT:    vpsubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1]
8344; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8345; X86-NEXT:    retl # encoding: [0xc3]
8346;
8347; X64-LABEL: test_mask_subs_epi16_rrk_128:
8348; X64:       # %bb.0:
8349; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8350; X64-NEXT:    vpsubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1]
8351; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8352; X64-NEXT:    retq # encoding: [0xc3]
8353  %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
8354  ret <8 x i16> %res
8355}
8356
8357define <8 x i16> @test_mask_subs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
8358; X86-LABEL: test_mask_subs_epi16_rrkz_128:
8359; X86:       # %bb.0:
8360; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8361; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
8362; X86-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0xc1]
8363; X86-NEXT:    retl # encoding: [0xc3]
8364;
8365; X64-LABEL: test_mask_subs_epi16_rrkz_128:
8366; X64:       # %bb.0:
8367; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8368; X64-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0xc1]
8369; X64-NEXT:    retq # encoding: [0xc3]
8370  %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
8371  ret <8 x i16> %res
8372}
8373
8374define <8 x i16> @test_mask_subs_epi16_rm_128(<8 x i16> %a, ptr %ptr_b) {
8375; X86-LABEL: test_mask_subs_epi16_rm_128:
8376; X86:       # %bb.0:
8377; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8378; X86-NEXT:    vpsubsw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0x00]
8379; X86-NEXT:    retl # encoding: [0xc3]
8380;
8381; X64-LABEL: test_mask_subs_epi16_rm_128:
8382; X64:       # %bb.0:
8383; X64-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0x07]
8384; X64-NEXT:    retq # encoding: [0xc3]
8385  %b = load <8 x i16>, ptr %ptr_b
8386  %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
8387  ret <8 x i16> %res
8388}
8389
8390define <8 x i16> @test_mask_subs_epi16_rmk_128(<8 x i16> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) {
8391; X86-LABEL: test_mask_subs_epi16_rmk_128:
8392; X86:       # %bb.0:
8393; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8394; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
8395; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
8396; X86-NEXT:    vpsubsw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x08]
8397; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8398; X86-NEXT:    retl # encoding: [0xc3]
8399;
8400; X64-LABEL: test_mask_subs_epi16_rmk_128:
8401; X64:       # %bb.0:
8402; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8403; X64-NEXT:    vpsubsw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x0f]
8404; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8405; X64-NEXT:    retq # encoding: [0xc3]
8406  %b = load <8 x i16>, ptr %ptr_b
8407  %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
8408  ret <8 x i16> %res
8409}
8410
8411define <8 x i16> @test_mask_subs_epi16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i8 %mask) {
8412; X86-LABEL: test_mask_subs_epi16_rmkz_128:
8413; X86:       # %bb.0:
8414; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8415; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
8416; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
8417; X86-NEXT:    vpsubsw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0x00]
8418; X86-NEXT:    retl # encoding: [0xc3]
8419;
8420; X64-LABEL: test_mask_subs_epi16_rmkz_128:
8421; X64:       # %bb.0:
8422; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8423; X64-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0x07]
8424; X64-NEXT:    retq # encoding: [0xc3]
8425  %b = load <8 x i16>, ptr %ptr_b
8426  %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
8427  ret <8 x i16> %res
8428}
8429
8430declare <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
8431
8432define <16 x i16> @test_mask_subs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
8433; CHECK-LABEL: test_mask_subs_epi16_rr_256:
8434; CHECK:       # %bb.0:
8435; CHECK-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0xc1]
8436; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8437  %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
8438  ret <16 x i16> %res
8439}
8440
8441define <16 x i16> @test_mask_subs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
8442; X86-LABEL: test_mask_subs_epi16_rrk_256:
8443; X86:       # %bb.0:
8444; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8445; X86-NEXT:    vpsubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1]
8446; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8447; X86-NEXT:    retl # encoding: [0xc3]
8448;
8449; X64-LABEL: test_mask_subs_epi16_rrk_256:
8450; X64:       # %bb.0:
8451; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8452; X64-NEXT:    vpsubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1]
8453; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8454; X64-NEXT:    retq # encoding: [0xc3]
8455  %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
8456  ret <16 x i16> %res
8457}
8458
8459define <16 x i16> @test_mask_subs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
8460; X86-LABEL: test_mask_subs_epi16_rrkz_256:
8461; X86:       # %bb.0:
8462; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8463; X86-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0xc1]
8464; X86-NEXT:    retl # encoding: [0xc3]
8465;
8466; X64-LABEL: test_mask_subs_epi16_rrkz_256:
8467; X64:       # %bb.0:
8468; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8469; X64-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0xc1]
8470; X64-NEXT:    retq # encoding: [0xc3]
8471  %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
8472  ret <16 x i16> %res
8473}
8474
8475define <16 x i16> @test_mask_subs_epi16_rm_256(<16 x i16> %a, ptr %ptr_b) {
8476; X86-LABEL: test_mask_subs_epi16_rm_256:
8477; X86:       # %bb.0:
8478; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8479; X86-NEXT:    vpsubsw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0x00]
8480; X86-NEXT:    retl # encoding: [0xc3]
8481;
8482; X64-LABEL: test_mask_subs_epi16_rm_256:
8483; X64:       # %bb.0:
8484; X64-NEXT:    vpsubsw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0x07]
8485; X64-NEXT:    retq # encoding: [0xc3]
8486  %b = load <16 x i16>, ptr %ptr_b
8487  %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
8488  ret <16 x i16> %res
8489}
8490
8491define <16 x i16> @test_mask_subs_epi16_rmk_256(<16 x i16> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) {
8492; X86-LABEL: test_mask_subs_epi16_rmk_256:
8493; X86:       # %bb.0:
8494; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8495; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8496; X86-NEXT:    vpsubsw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x08]
8497; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8498; X86-NEXT:    retl # encoding: [0xc3]
8499;
8500; X64-LABEL: test_mask_subs_epi16_rmk_256:
8501; X64:       # %bb.0:
8502; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8503; X64-NEXT:    vpsubsw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x0f]
8504; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8505; X64-NEXT:    retq # encoding: [0xc3]
8506  %b = load <16 x i16>, ptr %ptr_b
8507  %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
8508  ret <16 x i16> %res
8509}
8510
8511define <16 x i16> @test_mask_subs_epi16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i16 %mask) {
8512; X86-LABEL: test_mask_subs_epi16_rmkz_256:
8513; X86:       # %bb.0:
8514; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8515; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8516; X86-NEXT:    vpsubsw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0x00]
8517; X86-NEXT:    retl # encoding: [0xc3]
8518;
8519; X64-LABEL: test_mask_subs_epi16_rmkz_256:
8520; X64:       # %bb.0:
8521; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8522; X64-NEXT:    vpsubsw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0x07]
8523; X64-NEXT:    retq # encoding: [0xc3]
8524  %b = load <16 x i16>, ptr %ptr_b
8525  %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
8526  ret <16 x i16> %res
8527}
8528
8529declare <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
8530
8531declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) #0
8532
8533define <8 x i16> @test_adds_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
8534; X86-LABEL: test_adds_epi16_rrk_128:
8535; X86:       # %bb.0:
8536; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8537; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
8538; X86-NEXT:    vpaddsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1]
8539; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8540; X86-NEXT:    retl # encoding: [0xc3]
8541;
8542; X64-LABEL: test_adds_epi16_rrk_128:
8543; X64:       # %bb.0:
8544; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8545; X64-NEXT:    vpaddsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1]
8546; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8547; X64-NEXT:    retq # encoding: [0xc3]
8548  %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b)
8549  %2 = bitcast i8 %mask to <8 x i1>
8550  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
8551  ret <8 x i16> %3
8552}
8553
8554define <8 x i16> @test_adds_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
8555; X86-LABEL: test_adds_epi16_rrkz_128:
8556; X86:       # %bb.0:
8557; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8558; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
8559; X86-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0xc1]
8560; X86-NEXT:    retl # encoding: [0xc3]
8561;
8562; X64-LABEL: test_adds_epi16_rrkz_128:
8563; X64:       # %bb.0:
8564; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8565; X64-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0xc1]
8566; X64-NEXT:    retq # encoding: [0xc3]
8567  %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b)
8568  %2 = bitcast i8 %mask to <8 x i1>
8569  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
8570  ret <8 x i16> %3
8571}
8572
8573define <8 x i16> @test_adds_epi16_rmk_128(<8 x i16> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) {
8574; X86-LABEL: test_adds_epi16_rmk_128:
8575; X86:       # %bb.0:
8576; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8577; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
8578; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
8579; X86-NEXT:    vpaddsw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0x08]
8580; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8581; X86-NEXT:    retl # encoding: [0xc3]
8582;
8583; X64-LABEL: test_adds_epi16_rmk_128:
8584; X64:       # %bb.0:
8585; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8586; X64-NEXT:    vpaddsw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0x0f]
8587; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8588; X64-NEXT:    retq # encoding: [0xc3]
8589  %b = load <8 x i16>, ptr %ptr_b
8590  %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b)
8591  %2 = bitcast i8 %mask to <8 x i1>
8592  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
8593  ret <8 x i16> %3
8594}
8595
8596define <8 x i16> @test_adds_epi16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i8 %mask) {
8597; X86-LABEL: test_adds_epi16_rmkz_128:
8598; X86:       # %bb.0:
8599; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8600; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
8601; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
8602; X86-NEXT:    vpaddsw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0x00]
8603; X86-NEXT:    retl # encoding: [0xc3]
8604;
8605; X64-LABEL: test_adds_epi16_rmkz_128:
8606; X64:       # %bb.0:
8607; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8608; X64-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0x07]
8609; X64-NEXT:    retq # encoding: [0xc3]
8610  %b = load <8 x i16>, ptr %ptr_b
8611  %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b)
8612  %2 = bitcast i8 %mask to <8 x i1>
8613  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
8614  ret <8 x i16> %3
8615}
8616
8617declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) #0
8618
8619define <16 x i16> @test_adds_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
8620; X86-LABEL: test_adds_epi16_rrk_256:
8621; X86:       # %bb.0:
8622; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8623; X86-NEXT:    vpaddsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1]
8624; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8625; X86-NEXT:    retl # encoding: [0xc3]
8626;
8627; X64-LABEL: test_adds_epi16_rrk_256:
8628; X64:       # %bb.0:
8629; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8630; X64-NEXT:    vpaddsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1]
8631; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8632; X64-NEXT:    retq # encoding: [0xc3]
8633  %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a, <16 x i16> %b)
8634  %2 = bitcast i16 %mask to <16 x i1>
8635  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
8636  ret <16 x i16> %3
8637}
8638
8639define <16 x i16> @test_adds_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
8640; X86-LABEL: test_adds_epi16_rrkz_256:
8641; X86:       # %bb.0:
8642; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8643; X86-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0xc1]
8644; X86-NEXT:    retl # encoding: [0xc3]
8645;
8646; X64-LABEL: test_adds_epi16_rrkz_256:
8647; X64:       # %bb.0:
8648; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8649; X64-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0xc1]
8650; X64-NEXT:    retq # encoding: [0xc3]
8651  %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a, <16 x i16> %b)
8652  %2 = bitcast i16 %mask to <16 x i1>
8653  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
8654  ret <16 x i16> %3
8655}
8656
8657define <16 x i16> @test_adds_epi16_rmk_256(<16 x i16> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) {
8658; X86-LABEL: test_adds_epi16_rmk_256:
8659; X86:       # %bb.0:
8660; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8661; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8662; X86-NEXT:    vpaddsw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0x08]
8663; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8664; X86-NEXT:    retl # encoding: [0xc3]
8665;
8666; X64-LABEL: test_adds_epi16_rmk_256:
8667; X64:       # %bb.0:
8668; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8669; X64-NEXT:    vpaddsw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0x0f]
8670; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8671; X64-NEXT:    retq # encoding: [0xc3]
8672  %b = load <16 x i16>, ptr %ptr_b
8673  %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a, <16 x i16> %b)
8674  %2 = bitcast i16 %mask to <16 x i1>
8675  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
8676  ret <16 x i16> %3
8677}
8678
8679define <16 x i16> @test_adds_epi16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i16 %mask) {
8680; X86-LABEL: test_adds_epi16_rmkz_256:
8681; X86:       # %bb.0:
8682; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8683; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8684; X86-NEXT:    vpaddsw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0x00]
8685; X86-NEXT:    retl # encoding: [0xc3]
8686;
8687; X64-LABEL: test_adds_epi16_rmkz_256:
8688; X64:       # %bb.0:
8689; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8690; X64-NEXT:    vpaddsw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0x07]
8691; X64-NEXT:    retq # encoding: [0xc3]
8692  %b = load <16 x i16>, ptr %ptr_b
8693  %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a, <16 x i16> %b)
8694  %2 = bitcast i16 %mask to <16 x i1>
8695  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
8696  ret <16 x i16> %3
8697}
8698
8699declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) #0
8700
8701define <16 x i8> @test_adds_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
8702; X86-LABEL: test_adds_epi8_rrk_128:
8703; X86:       # %bb.0:
8704; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8705; X86-NEXT:    vpaddsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1]
8706; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8707; X86-NEXT:    retl # encoding: [0xc3]
8708;
8709; X64-LABEL: test_adds_epi8_rrk_128:
8710; X64:       # %bb.0:
8711; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8712; X64-NEXT:    vpaddsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1]
8713; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8714; X64-NEXT:    retq # encoding: [0xc3]
8715  %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b)
8716  %2 = bitcast i16 %mask to <16 x i1>
8717  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
8718  ret <16 x i8> %3
8719}
8720
8721define <16 x i8> @test_adds_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
8722; X86-LABEL: test_adds_epi8_rrkz_128:
8723; X86:       # %bb.0:
8724; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8725; X86-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0xc1]
8726; X86-NEXT:    retl # encoding: [0xc3]
8727;
8728; X64-LABEL: test_adds_epi8_rrkz_128:
8729; X64:       # %bb.0:
8730; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8731; X64-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0xc1]
8732; X64-NEXT:    retq # encoding: [0xc3]
8733  %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b)
8734  %2 = bitcast i16 %mask to <16 x i1>
8735  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
8736  ret <16 x i8> %3
8737}
8738
8739define <16 x i8> @test_adds_epi8_rm_128(<16 x i8> %a, ptr %ptr_b) {
8740; X86-LABEL: test_adds_epi8_rm_128:
8741; X86:       # %bb.0:
8742; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8743; X86-NEXT:    vpaddsb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0x00]
8744; X86-NEXT:    retl # encoding: [0xc3]
8745;
8746; X64-LABEL: test_adds_epi8_rm_128:
8747; X64:       # %bb.0:
8748; X64-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0x07]
8749; X64-NEXT:    retq # encoding: [0xc3]
8750  %b = load <16 x i8>, ptr %ptr_b
8751  %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b)
8752  ret <16 x i8> %1
8753}
8754
8755define <16 x i8> @test_adds_epi8_rmk_128(<16 x i8> %a, ptr %ptr_b, <16 x i8> %passThru, i16 %mask) {
8756; X86-LABEL: test_adds_epi8_rmk_128:
8757; X86:       # %bb.0:
8758; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8759; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8760; X86-NEXT:    vpaddsb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0x08]
8761; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8762; X86-NEXT:    retl # encoding: [0xc3]
8763;
8764; X64-LABEL: test_adds_epi8_rmk_128:
8765; X64:       # %bb.0:
8766; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8767; X64-NEXT:    vpaddsb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0x0f]
8768; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8769; X64-NEXT:    retq # encoding: [0xc3]
8770  %b = load <16 x i8>, ptr %ptr_b
8771  %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b)
8772  %2 = bitcast i16 %mask to <16 x i1>
8773  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
8774  ret <16 x i8> %3
8775}
8776
8777define <16 x i8> @test_adds_epi8_rmkz_128(<16 x i8> %a, ptr %ptr_b, i16 %mask) {
8778; X86-LABEL: test_adds_epi8_rmkz_128:
8779; X86:       # %bb.0:
8780; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8781; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8782; X86-NEXT:    vpaddsb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0x00]
8783; X86-NEXT:    retl # encoding: [0xc3]
8784;
8785; X64-LABEL: test_adds_epi8_rmkz_128:
8786; X64:       # %bb.0:
8787; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8788; X64-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0x07]
8789; X64-NEXT:    retq # encoding: [0xc3]
8790  %b = load <16 x i8>, ptr %ptr_b
8791  %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b)
8792  %2 = bitcast i16 %mask to <16 x i1>
8793  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
8794  ret <16 x i8> %3
8795}
8796
8797declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) #0
8798
8799define <32 x i8> @test_adds_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
8800; X86-LABEL: test_adds_epi8_rrk_256:
8801; X86:       # %bb.0:
8802; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
8803; X86-NEXT:    vpaddsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1]
8804; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8805; X86-NEXT:    retl # encoding: [0xc3]
8806;
8807; X64-LABEL: test_adds_epi8_rrk_256:
8808; X64:       # %bb.0:
8809; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8810; X64-NEXT:    vpaddsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1]
8811; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8812; X64-NEXT:    retq # encoding: [0xc3]
8813  %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a, <32 x i8> %b)
8814  %2 = bitcast i32 %mask to <32 x i1>
8815  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
8816  ret <32 x i8> %3
8817}
8818
8819define <32 x i8> @test_adds_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
8820; X86-LABEL: test_adds_epi8_rrkz_256:
8821; X86:       # %bb.0:
8822; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
8823; X86-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0xc1]
8824; X86-NEXT:    retl # encoding: [0xc3]
8825;
8826; X64-LABEL: test_adds_epi8_rrkz_256:
8827; X64:       # %bb.0:
8828; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8829; X64-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0xc1]
8830; X64-NEXT:    retq # encoding: [0xc3]
8831  %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a, <32 x i8> %b)
8832  %2 = bitcast i32 %mask to <32 x i1>
8833  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
8834  ret <32 x i8> %3
8835}
8836
8837define <32 x i8> @test_adds_epi8_rmk_256(<32 x i8> %a, ptr %ptr_b, <32 x i8> %passThru, i32 %mask) {
8838; X86-LABEL: test_adds_epi8_rmk_256:
8839; X86:       # %bb.0:
8840; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8841; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
8842; X86-NEXT:    vpaddsb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0x08]
8843; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8844; X86-NEXT:    retl # encoding: [0xc3]
8845;
8846; X64-LABEL: test_adds_epi8_rmk_256:
8847; X64:       # %bb.0:
8848; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8849; X64-NEXT:    vpaddsb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0x0f]
8850; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8851; X64-NEXT:    retq # encoding: [0xc3]
8852  %b = load <32 x i8>, ptr %ptr_b
8853  %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a, <32 x i8> %b)
8854  %2 = bitcast i32 %mask to <32 x i1>
8855  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
8856  ret <32 x i8> %3
8857}
8858
8859define <32 x i8> @test_adds_epi8_rmkz_256(<32 x i8> %a, ptr %ptr_b, i32 %mask) {
8860; X86-LABEL: test_adds_epi8_rmkz_256:
8861; X86:       # %bb.0:
8862; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8863; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
8864; X86-NEXT:    vpaddsb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0x00]
8865; X86-NEXT:    retl # encoding: [0xc3]
8866;
8867; X64-LABEL: test_adds_epi8_rmkz_256:
8868; X64:       # %bb.0:
8869; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8870; X64-NEXT:    vpaddsb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0x07]
8871; X64-NEXT:    retq # encoding: [0xc3]
8872  %b = load <32 x i8>, ptr %ptr_b
8873  %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a, <32 x i8> %b)
8874  %2 = bitcast i32 %mask to <32 x i1>
8875  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
8876  ret <32 x i8> %3
8877}
8878
8879define <16 x i8> @test_mask_adds_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) {
8880; CHECK-LABEL: test_mask_adds_epi8_rr_128:
8881; CHECK:       # %bb.0:
8882; CHECK-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0xc1]
8883; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8884  %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
8885  ret <16 x i8> %res
8886}
8887
8888define <16 x i8> @test_mask_adds_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
8889; X86-LABEL: test_mask_adds_epi8_rrk_128:
8890; X86:       # %bb.0:
8891; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8892; X86-NEXT:    vpaddsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1]
8893; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8894; X86-NEXT:    retl # encoding: [0xc3]
8895;
8896; X64-LABEL: test_mask_adds_epi8_rrk_128:
8897; X64:       # %bb.0:
8898; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8899; X64-NEXT:    vpaddsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1]
8900; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8901; X64-NEXT:    retq # encoding: [0xc3]
8902  %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
8903  ret <16 x i8> %res
8904}
8905
8906define <16 x i8> @test_mask_adds_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
8907; X86-LABEL: test_mask_adds_epi8_rrkz_128:
8908; X86:       # %bb.0:
8909; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8910; X86-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0xc1]
8911; X86-NEXT:    retl # encoding: [0xc3]
8912;
8913; X64-LABEL: test_mask_adds_epi8_rrkz_128:
8914; X64:       # %bb.0:
8915; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8916; X64-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0xc1]
8917; X64-NEXT:    retq # encoding: [0xc3]
8918  %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
8919  ret <16 x i8> %res
8920}
8921
8922define <16 x i8> @test_mask_adds_epi8_rm_128(<16 x i8> %a, ptr %ptr_b) {
8923; X86-LABEL: test_mask_adds_epi8_rm_128:
8924; X86:       # %bb.0:
8925; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8926; X86-NEXT:    vpaddsb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0x00]
8927; X86-NEXT:    retl # encoding: [0xc3]
8928;
8929; X64-LABEL: test_mask_adds_epi8_rm_128:
8930; X64:       # %bb.0:
8931; X64-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0x07]
8932; X64-NEXT:    retq # encoding: [0xc3]
8933  %b = load <16 x i8>, ptr %ptr_b
8934  %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
8935  ret <16 x i8> %res
8936}
8937
8938define <16 x i8> @test_mask_adds_epi8_rmk_128(<16 x i8> %a, ptr %ptr_b, <16 x i8> %passThru, i16 %mask) {
8939; X86-LABEL: test_mask_adds_epi8_rmk_128:
8940; X86:       # %bb.0:
8941; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8942; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8943; X86-NEXT:    vpaddsb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0x08]
8944; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8945; X86-NEXT:    retl # encoding: [0xc3]
8946;
8947; X64-LABEL: test_mask_adds_epi8_rmk_128:
8948; X64:       # %bb.0:
8949; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8950; X64-NEXT:    vpaddsb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0x0f]
8951; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8952; X64-NEXT:    retq # encoding: [0xc3]
8953  %b = load <16 x i8>, ptr %ptr_b
8954  %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
8955  ret <16 x i8> %res
8956}
8957
8958define <16 x i8> @test_mask_adds_epi8_rmkz_128(<16 x i8> %a, ptr %ptr_b, i16 %mask) {
8959; X86-LABEL: test_mask_adds_epi8_rmkz_128:
8960; X86:       # %bb.0:
8961; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8962; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8963; X86-NEXT:    vpaddsb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0x00]
8964; X86-NEXT:    retl # encoding: [0xc3]
8965;
8966; X64-LABEL: test_mask_adds_epi8_rmkz_128:
8967; X64:       # %bb.0:
8968; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8969; X64-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0x07]
8970; X64-NEXT:    retq # encoding: [0xc3]
8971  %b = load <16 x i8>, ptr %ptr_b
8972  %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
8973  ret <16 x i8> %res
8974}
8975
8976declare <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
8977
8978define <32 x i8> @test_mask_adds_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) {
8979; CHECK-LABEL: test_mask_adds_epi8_rr_256:
8980; CHECK:       # %bb.0:
8981; CHECK-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0xc1]
8982; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8983  %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
8984  ret <32 x i8> %res
8985}
8986
8987define <32 x i8> @test_mask_adds_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
8988; X86-LABEL: test_mask_adds_epi8_rrk_256:
8989; X86:       # %bb.0:
8990; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
8991; X86-NEXT:    vpaddsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1]
8992; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8993; X86-NEXT:    retl # encoding: [0xc3]
8994;
8995; X64-LABEL: test_mask_adds_epi8_rrk_256:
8996; X64:       # %bb.0:
8997; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8998; X64-NEXT:    vpaddsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1]
8999; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
9000; X64-NEXT:    retq # encoding: [0xc3]
9001  %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
9002  ret <32 x i8> %res
9003}
9004
9005define <32 x i8> @test_mask_adds_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
9006; X86-LABEL: test_mask_adds_epi8_rrkz_256:
9007; X86:       # %bb.0:
9008; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
9009; X86-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0xc1]
9010; X86-NEXT:    retl # encoding: [0xc3]
9011;
9012; X64-LABEL: test_mask_adds_epi8_rrkz_256:
9013; X64:       # %bb.0:
9014; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9015; X64-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0xc1]
9016; X64-NEXT:    retq # encoding: [0xc3]
9017  %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
9018  ret <32 x i8> %res
9019}
9020
9021define <32 x i8> @test_mask_adds_epi8_rm_256(<32 x i8> %a, ptr %ptr_b) {
9022; X86-LABEL: test_mask_adds_epi8_rm_256:
9023; X86:       # %bb.0:
9024; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9025; X86-NEXT:    vpaddsb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0x00]
9026; X86-NEXT:    retl # encoding: [0xc3]
9027;
9028; X64-LABEL: test_mask_adds_epi8_rm_256:
9029; X64:       # %bb.0:
9030; X64-NEXT:    vpaddsb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0x07]
9031; X64-NEXT:    retq # encoding: [0xc3]
9032  %b = load <32 x i8>, ptr %ptr_b
9033  %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
9034  ret <32 x i8> %res
9035}
9036
9037define <32 x i8> @test_mask_adds_epi8_rmk_256(<32 x i8> %a, ptr %ptr_b, <32 x i8> %passThru, i32 %mask) {
9038; X86-LABEL: test_mask_adds_epi8_rmk_256:
9039; X86:       # %bb.0:
9040; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9041; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
9042; X86-NEXT:    vpaddsb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0x08]
9043; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
9044; X86-NEXT:    retl # encoding: [0xc3]
9045;
9046; X64-LABEL: test_mask_adds_epi8_rmk_256:
9047; X64:       # %bb.0:
9048; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
9049; X64-NEXT:    vpaddsb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0x0f]
9050; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
9051; X64-NEXT:    retq # encoding: [0xc3]
9052  %b = load <32 x i8>, ptr %ptr_b
9053  %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
9054  ret <32 x i8> %res
9055}
9056
9057define <32 x i8> @test_mask_adds_epi8_rmkz_256(<32 x i8> %a, ptr %ptr_b, i32 %mask) {
9058; X86-LABEL: test_mask_adds_epi8_rmkz_256:
9059; X86:       # %bb.0:
9060; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9061; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
9062; X86-NEXT:    vpaddsb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0x00]
9063; X86-NEXT:    retl # encoding: [0xc3]
9064;
9065; X64-LABEL: test_mask_adds_epi8_rmkz_256:
9066; X64:       # %bb.0:
9067; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
9068; X64-NEXT:    vpaddsb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0x07]
9069; X64-NEXT:    retq # encoding: [0xc3]
9070  %b = load <32 x i8>, ptr %ptr_b
9071  %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
9072  ret <32 x i8> %res
9073}
9074
9075declare <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
9076
9077define <16 x i8> @test_mask_subs_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) {
9078; CHECK-LABEL: test_mask_subs_epi8_rr_128:
9079; CHECK:       # %bb.0:
9080; CHECK-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0xc1]
9081; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9082  %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
9083  ret <16 x i8> %res
9084}
9085
9086define <16 x i8> @test_mask_subs_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
9087; X86-LABEL: test_mask_subs_epi8_rrk_128:
9088; X86:       # %bb.0:
9089; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9090; X86-NEXT:    vpsubsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1]
9091; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
9092; X86-NEXT:    retl # encoding: [0xc3]
9093;
9094; X64-LABEL: test_mask_subs_epi8_rrk_128:
9095; X64:       # %bb.0:
9096; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9097; X64-NEXT:    vpsubsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1]
9098; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
9099; X64-NEXT:    retq # encoding: [0xc3]
9100  %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
9101  ret <16 x i8> %res
9102}
9103
9104define <16 x i8> @test_mask_subs_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
9105; X86-LABEL: test_mask_subs_epi8_rrkz_128:
9106; X86:       # %bb.0:
9107; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9108; X86-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0xc1]
9109; X86-NEXT:    retl # encoding: [0xc3]
9110;
9111; X64-LABEL: test_mask_subs_epi8_rrkz_128:
9112; X64:       # %bb.0:
9113; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9114; X64-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0xc1]
9115; X64-NEXT:    retq # encoding: [0xc3]
9116  %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
9117  ret <16 x i8> %res
9118}
9119
9120define <16 x i8> @test_mask_subs_epi8_rm_128(<16 x i8> %a, ptr %ptr_b) {
9121; X86-LABEL: test_mask_subs_epi8_rm_128:
9122; X86:       # %bb.0:
9123; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9124; X86-NEXT:    vpsubsb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0x00]
9125; X86-NEXT:    retl # encoding: [0xc3]
9126;
9127; X64-LABEL: test_mask_subs_epi8_rm_128:
9128; X64:       # %bb.0:
9129; X64-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0x07]
9130; X64-NEXT:    retq # encoding: [0xc3]
9131  %b = load <16 x i8>, ptr %ptr_b
9132  %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
9133  ret <16 x i8> %res
9134}
9135
9136define <16 x i8> @test_mask_subs_epi8_rmk_128(<16 x i8> %a, ptr %ptr_b, <16 x i8> %passThru, i16 %mask) {
9137; X86-LABEL: test_mask_subs_epi8_rmk_128:
9138; X86:       # %bb.0:
9139; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9140; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
9141; X86-NEXT:    vpsubsb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x08]
9142; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
9143; X86-NEXT:    retl # encoding: [0xc3]
9144;
9145; X64-LABEL: test_mask_subs_epi8_rmk_128:
9146; X64:       # %bb.0:
9147; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
9148; X64-NEXT:    vpsubsb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x0f]
9149; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
9150; X64-NEXT:    retq # encoding: [0xc3]
9151  %b = load <16 x i8>, ptr %ptr_b
9152  %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
9153  ret <16 x i8> %res
9154}
9155
9156define <16 x i8> @test_mask_subs_epi8_rmkz_128(<16 x i8> %a, ptr %ptr_b, i16 %mask) {
9157; X86-LABEL: test_mask_subs_epi8_rmkz_128:
9158; X86:       # %bb.0:
9159; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9160; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
9161; X86-NEXT:    vpsubsb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0x00]
9162; X86-NEXT:    retl # encoding: [0xc3]
9163;
9164; X64-LABEL: test_mask_subs_epi8_rmkz_128:
9165; X64:       # %bb.0:
9166; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
9167; X64-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0x07]
9168; X64-NEXT:    retq # encoding: [0xc3]
9169  %b = load <16 x i8>, ptr %ptr_b
9170  %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
9171  ret <16 x i8> %res
9172}
9173
9174declare <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
9175
9176define <32 x i8> @test_mask_subs_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) {
9177; CHECK-LABEL: test_mask_subs_epi8_rr_256:
9178; CHECK:       # %bb.0:
9179; CHECK-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0xc1]
9180; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9181  %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
9182  ret <32 x i8> %res
9183}
9184
9185define <32 x i8> @test_mask_subs_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
9186; X86-LABEL: test_mask_subs_epi8_rrk_256:
9187; X86:       # %bb.0:
9188; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
9189; X86-NEXT:    vpsubsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1]
9190; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
9191; X86-NEXT:    retl # encoding: [0xc3]
9192;
9193; X64-LABEL: test_mask_subs_epi8_rrk_256:
9194; X64:       # %bb.0:
9195; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9196; X64-NEXT:    vpsubsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1]
9197; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
9198; X64-NEXT:    retq # encoding: [0xc3]
9199  %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
9200  ret <32 x i8> %res
9201}
9202
9203define <32 x i8> @test_mask_subs_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
9204; X86-LABEL: test_mask_subs_epi8_rrkz_256:
9205; X86:       # %bb.0:
9206; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
9207; X86-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0xc1]
9208; X86-NEXT:    retl # encoding: [0xc3]
9209;
9210; X64-LABEL: test_mask_subs_epi8_rrkz_256:
9211; X64:       # %bb.0:
9212; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9213; X64-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0xc1]
9214; X64-NEXT:    retq # encoding: [0xc3]
9215  %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
9216  ret <32 x i8> %res
9217}
9218
9219define <32 x i8> @test_mask_subs_epi8_rm_256(<32 x i8> %a, ptr %ptr_b) {
9220; X86-LABEL: test_mask_subs_epi8_rm_256:
9221; X86:       # %bb.0:
9222; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9223; X86-NEXT:    vpsubsb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0x00]
9224; X86-NEXT:    retl # encoding: [0xc3]
9225;
9226; X64-LABEL: test_mask_subs_epi8_rm_256:
9227; X64:       # %bb.0:
9228; X64-NEXT:    vpsubsb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0x07]
9229; X64-NEXT:    retq # encoding: [0xc3]
9230  %b = load <32 x i8>, ptr %ptr_b
9231  %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
9232  ret <32 x i8> %res
9233}
9234
9235define <32 x i8> @test_mask_subs_epi8_rmk_256(<32 x i8> %a, ptr %ptr_b, <32 x i8> %passThru, i32 %mask) {
9236; X86-LABEL: test_mask_subs_epi8_rmk_256:
9237; X86:       # %bb.0:
9238; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9239; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
9240; X86-NEXT:    vpsubsb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x08]
9241; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
9242; X86-NEXT:    retl # encoding: [0xc3]
9243;
9244; X64-LABEL: test_mask_subs_epi8_rmk_256:
9245; X64:       # %bb.0:
9246; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
9247; X64-NEXT:    vpsubsb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x0f]
9248; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
9249; X64-NEXT:    retq # encoding: [0xc3]
9250  %b = load <32 x i8>, ptr %ptr_b
9251  %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
9252  ret <32 x i8> %res
9253}
9254
9255define <32 x i8> @test_mask_subs_epi8_rmkz_256(<32 x i8> %a, ptr %ptr_b, i32 %mask) {
9256; X86-LABEL: test_mask_subs_epi8_rmkz_256:
9257; X86:       # %bb.0:
9258; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9259; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
9260; X86-NEXT:    vpsubsb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0x00]
9261; X86-NEXT:    retl # encoding: [0xc3]
9262;
9263; X64-LABEL: test_mask_subs_epi8_rmkz_256:
9264; X64:       # %bb.0:
9265; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
9266; X64-NEXT:    vpsubsb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0x07]
9267; X64-NEXT:    retq # encoding: [0xc3]
9268  %b = load <32 x i8>, ptr %ptr_b
9269  %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
9270  ret <32 x i8> %res
9271}
9272
9273declare <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
9274
9275declare <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16)
9276
9277define <16 x i16>@test_int_x86_avx512_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
9278; CHECK-LABEL: test_int_x86_avx512_psrav16_hi:
9279; CHECK:       # %bb.0:
9280; CHECK-NEXT:    vpsravw %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x11,0xc1]
9281; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9282  %res = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
9283  ret <16 x i16> %res
9284}
9285
9286define <16 x i16>@test_int_x86_avx512_mask_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
9287; X86-LABEL: test_int_x86_avx512_mask_psrav16_hi:
9288; X86:       # %bb.0:
9289; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9290; X86-NEXT:    vpsravw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x11,0xd1]
9291; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
9292; X86-NEXT:    retl # encoding: [0xc3]
9293;
9294; X64-LABEL: test_int_x86_avx512_mask_psrav16_hi:
9295; X64:       # %bb.0:
9296; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9297; X64-NEXT:    vpsravw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x11,0xd1]
9298; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
9299; X64-NEXT:    retq # encoding: [0xc3]
9300  %res = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
9301  ret <16 x i16> %res
9302}
9303
9304define <16 x i16>@test_int_x86_avx512_maskz_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) {
9305; X86-LABEL: test_int_x86_avx512_maskz_psrav16_hi:
9306; X86:       # %bb.0:
9307; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9308; X86-NEXT:    vpsravw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x11,0xc1]
9309; X86-NEXT:    retl # encoding: [0xc3]
9310;
9311; X64-LABEL: test_int_x86_avx512_maskz_psrav16_hi:
9312; X64:       # %bb.0:
9313; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9314; X64-NEXT:    vpsravw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x11,0xc1]
9315; X64-NEXT:    retq # encoding: [0xc3]
9316  %res = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
9317  ret <16 x i16> %res
9318}
9319
9320declare <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8)
9321
9322define <8 x i16>@test_int_x86_avx512_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
9323; CHECK-LABEL: test_int_x86_avx512_psrav8_hi:
9324; CHECK:       # %bb.0:
9325; CHECK-NEXT:    vpsravw %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x11,0xc1]
9326; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9327  %res = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
9328  ret <8 x i16> %res
9329}
9330
9331define <8 x i16>@test_int_x86_avx512_mask_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
9332; X86-LABEL: test_int_x86_avx512_mask_psrav8_hi:
9333; X86:       # %bb.0:
9334; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9335; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
9336; X86-NEXT:    vpsravw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x11,0xd1]
9337; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
9338; X86-NEXT:    retl # encoding: [0xc3]
9339;
9340; X64-LABEL: test_int_x86_avx512_mask_psrav8_hi:
9341; X64:       # %bb.0:
9342; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9343; X64-NEXT:    vpsravw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x11,0xd1]
9344; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
9345; X64-NEXT:    retq # encoding: [0xc3]
9346  %res = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
9347  ret <8 x i16> %res
9348}
9349
9350define <8 x i16>@test_int_x86_avx512_maskz_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) {
9351; X86-LABEL: test_int_x86_avx512_maskz_psrav8_hi:
9352; X86:       # %bb.0:
9353; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9354; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
9355; X86-NEXT:    vpsravw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x11,0xc1]
9356; X86-NEXT:    retl # encoding: [0xc3]
9357;
9358; X64-LABEL: test_int_x86_avx512_maskz_psrav8_hi:
9359; X64:       # %bb.0:
9360; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9361; X64-NEXT:    vpsravw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x11,0xc1]
9362; X64-NEXT:    retq # encoding: [0xc3]
9363  %res = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
9364  ret <8 x i16> %res
9365}
9366
9367declare <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16)
9368
9369define <16 x i16>@test_int_x86_avx512_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
9370; CHECK-LABEL: test_int_x86_avx512_psllv16_hi:
9371; CHECK:       # %bb.0:
9372; CHECK-NEXT:    vpsllvw %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x12,0xc1]
9373; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9374  %res = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
9375  ret <16 x i16> %res
9376}
9377
9378define <16 x i16>@test_int_x86_avx512_mask_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
9379; X86-LABEL: test_int_x86_avx512_mask_psllv16_hi:
9380; X86:       # %bb.0:
9381; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9382; X86-NEXT:    vpsllvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x12,0xd1]
9383; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
9384; X86-NEXT:    retl # encoding: [0xc3]
9385;
9386; X64-LABEL: test_int_x86_avx512_mask_psllv16_hi:
9387; X64:       # %bb.0:
9388; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9389; X64-NEXT:    vpsllvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x12,0xd1]
9390; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
9391; X64-NEXT:    retq # encoding: [0xc3]
9392  %res = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
9393  ret <16 x i16> %res
9394}
9395
9396define <16 x i16>@test_int_x86_avx512_maskz_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) {
9397; X86-LABEL: test_int_x86_avx512_maskz_psllv16_hi:
9398; X86:       # %bb.0:
9399; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9400; X86-NEXT:    vpsllvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x12,0xc1]
9401; X86-NEXT:    retl # encoding: [0xc3]
9402;
9403; X64-LABEL: test_int_x86_avx512_maskz_psllv16_hi:
9404; X64:       # %bb.0:
9405; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9406; X64-NEXT:    vpsllvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x12,0xc1]
9407; X64-NEXT:    retq # encoding: [0xc3]
9408  %res = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
9409  ret <16 x i16> %res
9410}
9411
9412declare <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8)
9413
9414define <8 x i16>@test_int_x86_avx512_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
9415; CHECK-LABEL: test_int_x86_avx512_psllv8_hi:
9416; CHECK:       # %bb.0:
9417; CHECK-NEXT:    vpsllvw %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x12,0xc1]
9418; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9419  %res = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
9420  ret <8 x i16> %res
9421}
9422
9423define <8 x i16>@test_int_x86_avx512_mask_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
9424; X86-LABEL: test_int_x86_avx512_mask_psllv8_hi:
9425; X86:       # %bb.0:
9426; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9427; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
9428; X86-NEXT:    vpsllvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x12,0xd1]
9429; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
9430; X86-NEXT:    retl # encoding: [0xc3]
9431;
9432; X64-LABEL: test_int_x86_avx512_mask_psllv8_hi:
9433; X64:       # %bb.0:
9434; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9435; X64-NEXT:    vpsllvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x12,0xd1]
9436; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
9437; X64-NEXT:    retq # encoding: [0xc3]
9438  %res = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
9439  ret <8 x i16> %res
9440}
9441
9442define <8 x i16>@test_int_x86_avx512_maskz_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) {
9443; X86-LABEL: test_int_x86_avx512_maskz_psllv8_hi:
9444; X86:       # %bb.0:
9445; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9446; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
9447; X86-NEXT:    vpsllvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x12,0xc1]
9448; X86-NEXT:    retl # encoding: [0xc3]
9449;
9450; X64-LABEL: test_int_x86_avx512_maskz_psllv8_hi:
9451; X64:       # %bb.0:
9452; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9453; X64-NEXT:    vpsllvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x12,0xc1]
9454; X64-NEXT:    retq # encoding: [0xc3]
9455  %res = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
9456  ret <8 x i16> %res
9457}
9458
9459declare <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16)
9460
9461define <16 x i16>@test_int_x86_avx512_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
9462; CHECK-LABEL: test_int_x86_avx512_psrlv16_hi:
9463; CHECK:       # %bb.0:
9464; CHECK-NEXT:    vpsrlvw %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x10,0xc1]
9465; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9466  %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
9467  ret <16 x i16> %res
9468}
9469
9470define <16 x i16>@test_int_x86_avx512_mask_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
9471; X86-LABEL: test_int_x86_avx512_mask_psrlv16_hi:
9472; X86:       # %bb.0:
9473; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9474; X86-NEXT:    vpsrlvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x10,0xd1]
9475; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
9476; X86-NEXT:    retl # encoding: [0xc3]
9477;
9478; X64-LABEL: test_int_x86_avx512_mask_psrlv16_hi:
9479; X64:       # %bb.0:
9480; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9481; X64-NEXT:    vpsrlvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x10,0xd1]
9482; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
9483; X64-NEXT:    retq # encoding: [0xc3]
9484  %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
9485  ret <16 x i16> %res
9486}
9487
9488define <16 x i16>@test_int_x86_avx512_maskz_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) {
9489; X86-LABEL: test_int_x86_avx512_maskz_psrlv16_hi:
9490; X86:       # %bb.0:
9491; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9492; X86-NEXT:    vpsrlvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x10,0xc1]
9493; X86-NEXT:    retl # encoding: [0xc3]
9494;
9495; X64-LABEL: test_int_x86_avx512_maskz_psrlv16_hi:
9496; X64:       # %bb.0:
9497; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9498; X64-NEXT:    vpsrlvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x10,0xc1]
9499; X64-NEXT:    retq # encoding: [0xc3]
9500  %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
9501  ret <16 x i16> %res
9502}
9503
9504declare <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8)
9505
9506define <8 x i16>@test_int_x86_avx512_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
9507; CHECK-LABEL: test_int_x86_avx512_psrlv8_hi:
9508; CHECK:       # %bb.0:
9509; CHECK-NEXT:    vpsrlvw %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x10,0xc1]
9510; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9511  %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
9512  ret <8 x i16> %res
9513}
9514
9515define <8 x i16>@test_int_x86_avx512_mask_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
9516; X86-LABEL: test_int_x86_avx512_mask_psrlv8_hi:
9517; X86:       # %bb.0:
9518; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9519; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
9520; X86-NEXT:    vpsrlvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x10,0xd1]
9521; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
9522; X86-NEXT:    retl # encoding: [0xc3]
9523;
9524; X64-LABEL: test_int_x86_avx512_mask_psrlv8_hi:
9525; X64:       # %bb.0:
9526; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9527; X64-NEXT:    vpsrlvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x10,0xd1]
9528; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
9529; X64-NEXT:    retq # encoding: [0xc3]
9530  %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
9531  ret <8 x i16> %res
9532}
9533
9534define <8 x i16>@test_int_x86_avx512_maskz_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) {
9535; X86-LABEL: test_int_x86_avx512_maskz_psrlv8_hi:
9536; X86:       # %bb.0:
9537; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9538; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
9539; X86-NEXT:    vpsrlvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x10,0xc1]
9540; X86-NEXT:    retl # encoding: [0xc3]
9541;
9542; X64-LABEL: test_int_x86_avx512_maskz_psrlv8_hi:
9543; X64:       # %bb.0:
9544; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9545; X64-NEXT:    vpsrlvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x10,0xc1]
9546; X64-NEXT:    retq # encoding: [0xc3]
9547  %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
9548  ret <8 x i16> %res
9549}
9550
9551declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16>, <16 x i8>, i16)
9552
9553define <16 x i8>@test_int_x86_avx512_pmov_wb_256(<16 x i16> %x0, <16 x i8> %x1) {
9554; CHECK-LABEL: test_int_x86_avx512_pmov_wb_256:
9555; CHECK:       # %bb.0:
9556; CHECK-NEXT:    vpmovwb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x30,0xc0]
9557; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9558; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9559  %res = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1)
9560  ret <16 x i8> %res
9561}
9562
9563define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
9564; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_256:
9565; X86:       # %bb.0:
9566; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9567; X86-NEXT:    vpmovwb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0xc1]
9568; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
9569; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9570; X86-NEXT:    retl # encoding: [0xc3]
9571;
9572; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_256:
9573; X64:       # %bb.0:
9574; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9575; X64-NEXT:    vpmovwb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0xc1]
9576; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
9577; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9578; X64-NEXT:    retq # encoding: [0xc3]
9579  %res = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2)
9580  ret <16 x i8> %res
9581}
9582
9583define <16 x i8>@test_int_x86_avx512_maskz_pmov_wb_256(<16 x i16> %x0, i16 %x2) {
9584; X86-LABEL: test_int_x86_avx512_maskz_pmov_wb_256:
9585; X86:       # %bb.0:
9586; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9587; X86-NEXT:    vpmovwb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x30,0xc0]
9588; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9589; X86-NEXT:    retl # encoding: [0xc3]
9590;
9591; X64-LABEL: test_int_x86_avx512_maskz_pmov_wb_256:
9592; X64:       # %bb.0:
9593; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9594; X64-NEXT:    vpmovwb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x30,0xc0]
9595; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9596; X64-NEXT:    retq # encoding: [0xc3]
9597  %res = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2)
9598  ret <16 x i8> %res
9599}
9600