xref: /llvm-project/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll (revision 9540a7ae82dfabe551bfef94fc9f29ebebf841da)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 < %s | FileCheck %s --check-prefix=CHECK-SSE2
3; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 < %s | FileCheck %s --check-prefix=CHECK-SSE41
4; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s --check-prefix=CHECK-AVX1
5; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s | FileCheck %s --check-prefix=CHECK-AVX2
6; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl < %s | FileCheck %s --check-prefix=CHECK-AVX512VL
7
8; Odd+Even divisors
9define <4 x i32> @test_srem_odd_even(<4 x i32> %X) nounwind {
10; CHECK-SSE2-LABEL: test_srem_odd_even:
11; CHECK-SSE2:       # %bb.0:
12; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
13; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
14; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
15; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
17; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
18; CHECK-SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
19; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
20; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
21; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
22; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
23; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
24; CHECK-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
25; CHECK-SSE2-NEXT:    por %xmm1, %xmm0
26; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
27; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
28; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
29; CHECK-SSE2-NEXT:    retq
30;
31; CHECK-SSE41-LABEL: test_srem_odd_even:
32; CHECK-SSE41:       # %bb.0:
33; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
34; CHECK-SSE41-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
35; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
36; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37; CHECK-SSE41-NEXT:    pxor %xmm2, %xmm2
38; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
39; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
40; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
41; CHECK-SSE41-NEXT:    por %xmm2, %xmm0
42; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993458,306783378,171798690,42949672]
43; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
44; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
45; CHECK-SSE41-NEXT:    psrld $31, %xmm0
46; CHECK-SSE41-NEXT:    retq
47;
48; CHECK-AVX1-LABEL: test_srem_odd_even:
49; CHECK-AVX1:       # %bb.0:
50; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
51; CHECK-AVX1-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
52; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
53; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
54; CHECK-AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
55; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
56; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
57; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
58; CHECK-AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
59; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
60; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
61; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
62; CHECK-AVX1-NEXT:    retq
63;
64; CHECK-AVX2-LABEL: test_srem_odd_even:
65; CHECK-AVX2:       # %bb.0:
66; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
67; CHECK-AVX2-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
68; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
69; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
70; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
71; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
72; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
73; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
74; CHECK-AVX2-NEXT:    retq
75;
76; CHECK-AVX512VL-LABEL: test_srem_odd_even:
77; CHECK-AVX512VL:       # %bb.0:
78; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
79; CHECK-AVX512VL-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
80; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
81; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
82; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
83; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
84; CHECK-AVX512VL-NEXT:    retq
85  %srem = srem <4 x i32> %X, <i32 5, i32 14, i32 25, i32 100>
86  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
87  %ret = zext <4 x i1> %cmp to <4 x i32>
88  ret <4 x i32> %ret
89}
90
91;==============================================================================;
92
93; One all-ones divisor in odd divisor
94define <4 x i32> @test_srem_odd_allones_eq(<4 x i32> %X) nounwind {
95; CHECK-SSE2-LABEL: test_srem_odd_allones_eq:
96; CHECK-SSE2:       # %bb.0:
97; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
98; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
99; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm0
100; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
101; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
102; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
103; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
104; CHECK-SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
105; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
106; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
107; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
108; CHECK-SSE2-NEXT:    retq
109;
110; CHECK-SSE41-LABEL: test_srem_odd_allones_eq:
111; CHECK-SSE41:       # %bb.0:
112; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
113; CHECK-SSE41-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
114; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993458,858993458,4294967295,858993458]
115; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
116; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
117; CHECK-SSE41-NEXT:    psrld $31, %xmm0
118; CHECK-SSE41-NEXT:    retq
119;
120; CHECK-AVX1-LABEL: test_srem_odd_allones_eq:
121; CHECK-AVX1:       # %bb.0:
122; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
123; CHECK-AVX1-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
124; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
125; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
126; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
127; CHECK-AVX1-NEXT:    retq
128;
129; CHECK-AVX2-LABEL: test_srem_odd_allones_eq:
130; CHECK-AVX2:       # %bb.0:
131; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
132; CHECK-AVX2-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
133; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [429496729,429496729,429496729,429496729]
134; CHECK-AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
135; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
136; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
137; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
138; CHECK-AVX2-NEXT:    retq
139;
140; CHECK-AVX512VL-LABEL: test_srem_odd_allones_eq:
141; CHECK-AVX512VL:       # %bb.0:
142; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
143; CHECK-AVX512VL-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
144; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
145; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
146; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
147; CHECK-AVX512VL-NEXT:    retq
148  %srem = srem <4 x i32> %X, <i32 5, i32 5, i32 4294967295, i32 5>
149  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
150  %ret = zext <4 x i1> %cmp to <4 x i32>
151  ret <4 x i32> %ret
152}
153define <4 x i32> @test_srem_odd_allones_ne(<4 x i32> %X) nounwind {
154; CHECK-SSE2-LABEL: test_srem_odd_allones_ne:
155; CHECK-SSE2:       # %bb.0:
156; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
157; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
158; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm0
159; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
160; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
161; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
162; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
163; CHECK-SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
164; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
165; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
166; CHECK-SSE2-NEXT:    psrld $31, %xmm0
167; CHECK-SSE2-NEXT:    retq
168;
169; CHECK-SSE41-LABEL: test_srem_odd_allones_ne:
170; CHECK-SSE41:       # %bb.0:
171; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
172; CHECK-SSE41-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
173; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993458,858993458,4294967295,858993458]
174; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
175; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
176; CHECK-SSE41-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
177; CHECK-SSE41-NEXT:    retq
178;
179; CHECK-AVX1-LABEL: test_srem_odd_allones_ne:
180; CHECK-AVX1:       # %bb.0:
181; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
182; CHECK-AVX1-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
183; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
184; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
185; CHECK-AVX1-NEXT:    vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
186; CHECK-AVX1-NEXT:    retq
187;
188; CHECK-AVX2-LABEL: test_srem_odd_allones_ne:
189; CHECK-AVX2:       # %bb.0:
190; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
191; CHECK-AVX2-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
192; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [429496729,429496729,429496729,429496729]
193; CHECK-AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
194; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
195; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
196; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
197; CHECK-AVX2-NEXT:    vpandn %xmm1, %xmm0, %xmm0
198; CHECK-AVX2-NEXT:    retq
199;
200; CHECK-AVX512VL-LABEL: test_srem_odd_allones_ne:
201; CHECK-AVX512VL:       # %bb.0:
202; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
203; CHECK-AVX512VL-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
204; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
205; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
206; CHECK-AVX512VL-NEXT:    vpandnd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
207; CHECK-AVX512VL-NEXT:    retq
208  %srem = srem <4 x i32> %X, <i32 5, i32 5, i32 4294967295, i32 5>
209  %cmp = icmp ne <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
210  %ret = zext <4 x i1> %cmp to <4 x i32>
211  ret <4 x i32> %ret
212}
213
214; One all-ones divisor in even divisor
215define <4 x i32> @test_srem_even_allones_eq(<4 x i32> %X) nounwind {
216; CHECK-SSE2-LABEL: test_srem_even_allones_eq:
217; CHECK-SSE2:       # %bb.0:
218; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3067833783,3067833783,3067833783,3067833783]
219; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
220; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm0
221; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
222; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
223; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
224; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
225; CHECK-SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
226; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm1
227; CHECK-SSE2-NEXT:    psrld $1, %xmm1
228; CHECK-SSE2-NEXT:    pslld $31, %xmm0
229; CHECK-SSE2-NEXT:    por %xmm1, %xmm0
230; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
231; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
232; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
233; CHECK-SSE2-NEXT:    retq
234;
235; CHECK-SSE41-LABEL: test_srem_even_allones_eq:
236; CHECK-SSE41:       # %bb.0:
237; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
238; CHECK-SSE41-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
239; CHECK-SSE41-NEXT:    movdqa %xmm0, %xmm1
240; CHECK-SSE41-NEXT:    psrld $1, %xmm1
241; CHECK-SSE41-NEXT:    pslld $31, %xmm0
242; CHECK-SSE41-NEXT:    por %xmm1, %xmm0
243; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [306783378,306783378,4294967295,306783378]
244; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
245; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
246; CHECK-SSE41-NEXT:    psrld $31, %xmm0
247; CHECK-SSE41-NEXT:    retq
248;
249; CHECK-AVX1-LABEL: test_srem_even_allones_eq:
250; CHECK-AVX1:       # %bb.0:
251; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
252; CHECK-AVX1-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
253; CHECK-AVX1-NEXT:    vpsrld $1, %xmm0, %xmm1
254; CHECK-AVX1-NEXT:    vpslld $31, %xmm0, %xmm0
255; CHECK-AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
256; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
257; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
258; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
259; CHECK-AVX1-NEXT:    retq
260;
261; CHECK-AVX2-LABEL: test_srem_even_allones_eq:
262; CHECK-AVX2:       # %bb.0:
263; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [3067833783,3067833783,3067833783,3067833783]
264; CHECK-AVX2-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
265; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [306783378,306783378,306783378,306783378]
266; CHECK-AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
267; CHECK-AVX2-NEXT:    vpsrld $1, %xmm0, %xmm1
268; CHECK-AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
269; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
270; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
271; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
272; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
273; CHECK-AVX2-NEXT:    retq
274;
275; CHECK-AVX512VL-LABEL: test_srem_even_allones_eq:
276; CHECK-AVX512VL:       # %bb.0:
277; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
278; CHECK-AVX512VL-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
279; CHECK-AVX512VL-NEXT:    vprord $1, %xmm0, %xmm0
280; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
281; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
282; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
283; CHECK-AVX512VL-NEXT:    retq
284  %srem = srem <4 x i32> %X, <i32 14, i32 14, i32 4294967295, i32 14>
285  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
286  %ret = zext <4 x i1> %cmp to <4 x i32>
287  ret <4 x i32> %ret
288}
289define <4 x i32> @test_srem_even_allones_ne(<4 x i32> %X) nounwind {
290; CHECK-SSE2-LABEL: test_srem_even_allones_ne:
291; CHECK-SSE2:       # %bb.0:
292; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3067833783,3067833783,3067833783,3067833783]
293; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
294; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm0
295; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
296; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
297; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
298; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
299; CHECK-SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
300; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm1
301; CHECK-SSE2-NEXT:    psrld $1, %xmm1
302; CHECK-SSE2-NEXT:    pslld $31, %xmm0
303; CHECK-SSE2-NEXT:    por %xmm1, %xmm0
304; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
305; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
306; CHECK-SSE2-NEXT:    psrld $31, %xmm0
307; CHECK-SSE2-NEXT:    retq
308;
309; CHECK-SSE41-LABEL: test_srem_even_allones_ne:
310; CHECK-SSE41:       # %bb.0:
311; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
312; CHECK-SSE41-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
313; CHECK-SSE41-NEXT:    movdqa %xmm0, %xmm1
314; CHECK-SSE41-NEXT:    psrld $1, %xmm1
315; CHECK-SSE41-NEXT:    pslld $31, %xmm0
316; CHECK-SSE41-NEXT:    por %xmm1, %xmm0
317; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [306783378,306783378,4294967295,306783378]
318; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
319; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
320; CHECK-SSE41-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
321; CHECK-SSE41-NEXT:    retq
322;
323; CHECK-AVX1-LABEL: test_srem_even_allones_ne:
324; CHECK-AVX1:       # %bb.0:
325; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
326; CHECK-AVX1-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
327; CHECK-AVX1-NEXT:    vpsrld $1, %xmm0, %xmm1
328; CHECK-AVX1-NEXT:    vpslld $31, %xmm0, %xmm0
329; CHECK-AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
330; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
331; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
332; CHECK-AVX1-NEXT:    vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
333; CHECK-AVX1-NEXT:    retq
334;
335; CHECK-AVX2-LABEL: test_srem_even_allones_ne:
336; CHECK-AVX2:       # %bb.0:
337; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [3067833783,3067833783,3067833783,3067833783]
338; CHECK-AVX2-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
339; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [306783378,306783378,306783378,306783378]
340; CHECK-AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
341; CHECK-AVX2-NEXT:    vpsrld $1, %xmm0, %xmm1
342; CHECK-AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
343; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
344; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
345; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
346; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
347; CHECK-AVX2-NEXT:    vpandn %xmm1, %xmm0, %xmm0
348; CHECK-AVX2-NEXT:    retq
349;
350; CHECK-AVX512VL-LABEL: test_srem_even_allones_ne:
351; CHECK-AVX512VL:       # %bb.0:
352; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
353; CHECK-AVX512VL-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
354; CHECK-AVX512VL-NEXT:    vprord $1, %xmm0, %xmm0
355; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
356; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
357; CHECK-AVX512VL-NEXT:    vpandnd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
358; CHECK-AVX512VL-NEXT:    retq
359  %srem = srem <4 x i32> %X, <i32 14, i32 14, i32 4294967295, i32 14>
360  %cmp = icmp ne <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
361  %ret = zext <4 x i1> %cmp to <4 x i32>
362  ret <4 x i32> %ret
363}
364
365; One all-ones divisor in odd+even divisor
366define <4 x i32> @test_srem_odd_even_allones_eq(<4 x i32> %X) nounwind {
367; CHECK-SSE2-LABEL: test_srem_odd_even_allones_eq:
368; CHECK-SSE2:       # %bb.0:
369; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
370; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
371; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
372; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
373; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
374; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
375; CHECK-SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
376; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
377; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
378; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
379; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
380; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
381; CHECK-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
382; CHECK-SSE2-NEXT:    por %xmm1, %xmm0
383; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
384; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
385; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
386; CHECK-SSE2-NEXT:    retq
387;
388; CHECK-SSE41-LABEL: test_srem_odd_even_allones_eq:
389; CHECK-SSE41:       # %bb.0:
390; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
391; CHECK-SSE41-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
392; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
393; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
394; CHECK-SSE41-NEXT:    pxor %xmm2, %xmm2
395; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
396; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
397; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
398; CHECK-SSE41-NEXT:    por %xmm2, %xmm0
399; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993458,306783378,4294967295,42949672]
400; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
401; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
402; CHECK-SSE41-NEXT:    psrld $31, %xmm0
403; CHECK-SSE41-NEXT:    retq
404;
405; CHECK-AVX1-LABEL: test_srem_odd_even_allones_eq:
406; CHECK-AVX1:       # %bb.0:
407; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
408; CHECK-AVX1-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
409; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
410; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
411; CHECK-AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
412; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
413; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
414; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
415; CHECK-AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
416; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
417; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
418; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
419; CHECK-AVX1-NEXT:    retq
420;
421; CHECK-AVX2-LABEL: test_srem_odd_even_allones_eq:
422; CHECK-AVX2:       # %bb.0:
423; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
424; CHECK-AVX2-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
425; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
426; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
427; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
428; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
429; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
430; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
431; CHECK-AVX2-NEXT:    retq
432;
433; CHECK-AVX512VL-LABEL: test_srem_odd_even_allones_eq:
434; CHECK-AVX512VL:       # %bb.0:
435; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
436; CHECK-AVX512VL-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
437; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
438; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
439; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
440; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
441; CHECK-AVX512VL-NEXT:    retq
442  %srem = srem <4 x i32> %X, <i32 5, i32 14, i32 4294967295, i32 100>
443  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
444  %ret = zext <4 x i1> %cmp to <4 x i32>
445  ret <4 x i32> %ret
446}
447define <4 x i32> @test_srem_odd_even_allones_ne(<4 x i32> %X) nounwind {
448; CHECK-SSE2-LABEL: test_srem_odd_even_allones_ne:
449; CHECK-SSE2:       # %bb.0:
450; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
451; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
452; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
453; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
454; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
455; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
456; CHECK-SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
457; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
458; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
459; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
460; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
461; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
462; CHECK-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
463; CHECK-SSE2-NEXT:    por %xmm1, %xmm0
464; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
465; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
466; CHECK-SSE2-NEXT:    psrld $31, %xmm0
467; CHECK-SSE2-NEXT:    retq
468;
469; CHECK-SSE41-LABEL: test_srem_odd_even_allones_ne:
470; CHECK-SSE41:       # %bb.0:
471; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
472; CHECK-SSE41-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
473; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
474; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
475; CHECK-SSE41-NEXT:    pxor %xmm2, %xmm2
476; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
477; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
478; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
479; CHECK-SSE41-NEXT:    por %xmm2, %xmm0
480; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993458,306783378,4294967295,42949672]
481; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
482; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
483; CHECK-SSE41-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
484; CHECK-SSE41-NEXT:    retq
485;
486; CHECK-AVX1-LABEL: test_srem_odd_even_allones_ne:
487; CHECK-AVX1:       # %bb.0:
488; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
489; CHECK-AVX1-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
490; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
491; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
492; CHECK-AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
493; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
494; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
495; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
496; CHECK-AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
497; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
498; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
499; CHECK-AVX1-NEXT:    vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
500; CHECK-AVX1-NEXT:    retq
501;
502; CHECK-AVX2-LABEL: test_srem_odd_even_allones_ne:
503; CHECK-AVX2:       # %bb.0:
504; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
505; CHECK-AVX2-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
506; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
507; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
508; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
509; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
510; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
511; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
512; CHECK-AVX2-NEXT:    vpandn %xmm1, %xmm0, %xmm0
513; CHECK-AVX2-NEXT:    retq
514;
515; CHECK-AVX512VL-LABEL: test_srem_odd_even_allones_ne:
516; CHECK-AVX512VL:       # %bb.0:
517; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
518; CHECK-AVX512VL-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
519; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
520; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
521; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
522; CHECK-AVX512VL-NEXT:    vpandnd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
523; CHECK-AVX512VL-NEXT:    retq
524  %srem = srem <4 x i32> %X, <i32 5, i32 14, i32 4294967295, i32 100>
525  %cmp = icmp ne <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
526  %ret = zext <4 x i1> %cmp to <4 x i32>
527  ret <4 x i32> %ret
528}
529
530;------------------------------------------------------------------------------;
531
532; One power-of-two divisor in odd divisor
533define <4 x i32> @test_srem_odd_poweroftwo(<4 x i32> %X) nounwind {
534; CHECK-SSE2-LABEL: test_srem_odd_poweroftwo:
535; CHECK-SSE2:       # %bb.0:
536; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
537; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
538; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
539; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
540; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
541; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
542; CHECK-SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
543; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
544; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
545; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
546; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
547; CHECK-SSE2-NEXT:    psrlq $32, %xmm0
548; CHECK-SSE2-NEXT:    por %xmm2, %xmm0
549; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
550; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
551; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
552; CHECK-SSE2-NEXT:    retq
553;
554; CHECK-SSE41-LABEL: test_srem_odd_poweroftwo:
555; CHECK-SSE41:       # %bb.0:
556; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
557; CHECK-SSE41-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
558; CHECK-SSE41-NEXT:    pmovsxdq {{.*#+}} xmm1 = [1,268435456]
559; CHECK-SSE41-NEXT:    pmuludq %xmm0, %xmm1
560; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
561; CHECK-SSE41-NEXT:    psrlq $32, %xmm1
562; CHECK-SSE41-NEXT:    por %xmm1, %xmm0
563; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993458,858993458,268435455,858993458]
564; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
565; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
566; CHECK-SSE41-NEXT:    psrld $31, %xmm0
567; CHECK-SSE41-NEXT:    retq
568;
569; CHECK-AVX1-LABEL: test_srem_odd_poweroftwo:
570; CHECK-AVX1:       # %bb.0:
571; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
572; CHECK-AVX1-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
573; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
574; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
575; CHECK-AVX1-NEXT:    vpsrlq $32, %xmm1, %xmm1
576; CHECK-AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
577; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
578; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
579; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
580; CHECK-AVX1-NEXT:    retq
581;
582; CHECK-AVX2-LABEL: test_srem_odd_poweroftwo:
583; CHECK-AVX2:       # %bb.0:
584; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
585; CHECK-AVX2-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
586; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
587; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
588; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
589; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
590; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
591; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
592; CHECK-AVX2-NEXT:    retq
593;
594; CHECK-AVX512VL-LABEL: test_srem_odd_poweroftwo:
595; CHECK-AVX512VL:       # %bb.0:
596; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
597; CHECK-AVX512VL-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
598; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
599; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
600; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
601; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
602; CHECK-AVX512VL-NEXT:    retq
603  %srem = srem <4 x i32> %X, <i32 5, i32 5, i32 16, i32 5>
604  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
605  %ret = zext <4 x i1> %cmp to <4 x i32>
606  ret <4 x i32> %ret
607}
608
609; One power-of-two divisor in even divisor
610define <4 x i32> @test_srem_even_poweroftwo(<4 x i32> %X) nounwind {
611; CHECK-SSE2-LABEL: test_srem_even_poweroftwo:
612; CHECK-SSE2:       # %bb.0:
613; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
614; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
615; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
616; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
617; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
618; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
619; CHECK-SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
620; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648,268435456,2147483648]
621; CHECK-SSE2-NEXT:    pmuludq %xmm0, %xmm1
622; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
623; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
624; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
625; CHECK-SSE2-NEXT:    pmuludq %xmm4, %xmm3
626; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,3,2,3]
627; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
628; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
629; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
630; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
631; CHECK-SSE2-NEXT:    por %xmm2, %xmm0
632; CHECK-SSE2-NEXT:    pxor %xmm4, %xmm0
633; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
634; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
635; CHECK-SSE2-NEXT:    retq
636;
637; CHECK-SSE41-LABEL: test_srem_even_poweroftwo:
638; CHECK-SSE41:       # %bb.0:
639; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
640; CHECK-SSE41-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
641; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
642; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
643; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
644; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
645; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
646; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
647; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
648; CHECK-SSE41-NEXT:    por %xmm2, %xmm0
649; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [306783378,306783378,268435455,306783378]
650; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
651; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
652; CHECK-SSE41-NEXT:    psrld $31, %xmm0
653; CHECK-SSE41-NEXT:    retq
654;
655; CHECK-AVX1-LABEL: test_srem_even_poweroftwo:
656; CHECK-AVX1:       # %bb.0:
657; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
658; CHECK-AVX1-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
659; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
660; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
661; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
662; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
663; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
664; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
665; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
666; CHECK-AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
667; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
668; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
669; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
670; CHECK-AVX1-NEXT:    retq
671;
672; CHECK-AVX2-LABEL: test_srem_even_poweroftwo:
673; CHECK-AVX2:       # %bb.0:
674; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
675; CHECK-AVX2-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
676; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
677; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
678; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
679; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
680; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
681; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
682; CHECK-AVX2-NEXT:    retq
683;
684; CHECK-AVX512VL-LABEL: test_srem_even_poweroftwo:
685; CHECK-AVX512VL:       # %bb.0:
686; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
687; CHECK-AVX512VL-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
688; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
689; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
690; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
691; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
692; CHECK-AVX512VL-NEXT:    retq
693  %srem = srem <4 x i32> %X, <i32 14, i32 14, i32 16, i32 14>
694  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
695  %ret = zext <4 x i1> %cmp to <4 x i32>
696  ret <4 x i32> %ret
697}
698
699; One power-of-two divisor in odd+even divisor
700define <4 x i32> @test_srem_odd_even_poweroftwo(<4 x i32> %X) nounwind {
701; CHECK-SSE2-LABEL: test_srem_odd_even_poweroftwo:
702; CHECK-SSE2:       # %bb.0:
703; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
704; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
705; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
706; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
707; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
708; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
709; CHECK-SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
710; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [1,2147483648,268435456,1073741824]
711; CHECK-SSE2-NEXT:    pmuludq %xmm0, %xmm1
712; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
713; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
714; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
715; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,3,2,3]
716; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
717; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
718; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
719; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
720; CHECK-SSE2-NEXT:    por %xmm2, %xmm0
721; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
722; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
723; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
724; CHECK-SSE2-NEXT:    retq
725;
726; CHECK-SSE41-LABEL: test_srem_odd_even_poweroftwo:
727; CHECK-SSE41:       # %bb.0:
728; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
729; CHECK-SSE41-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
730; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
731; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
732; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
733; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
734; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
735; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
736; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
737; CHECK-SSE41-NEXT:    por %xmm2, %xmm0
738; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993458,306783378,268435455,42949672]
739; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
740; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
741; CHECK-SSE41-NEXT:    psrld $31, %xmm0
742; CHECK-SSE41-NEXT:    retq
743;
744; CHECK-AVX1-LABEL: test_srem_odd_even_poweroftwo:
745; CHECK-AVX1:       # %bb.0:
746; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
747; CHECK-AVX1-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
748; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
749; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
750; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
751; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
752; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
753; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
754; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
755; CHECK-AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
756; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
757; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
758; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
759; CHECK-AVX1-NEXT:    retq
760;
761; CHECK-AVX2-LABEL: test_srem_odd_even_poweroftwo:
762; CHECK-AVX2:       # %bb.0:
763; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
764; CHECK-AVX2-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
765; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
766; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
767; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
768; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
769; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
770; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
771; CHECK-AVX2-NEXT:    retq
772;
773; CHECK-AVX512VL-LABEL: test_srem_odd_even_poweroftwo:
774; CHECK-AVX512VL:       # %bb.0:
775; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
776; CHECK-AVX512VL-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
777; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
778; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
779; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
780; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
781; CHECK-AVX512VL-NEXT:    retq
782  %srem = srem <4 x i32> %X, <i32 5, i32 14, i32 16, i32 100>
783  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
784  %ret = zext <4 x i1> %cmp to <4 x i32>
785  ret <4 x i32> %ret
786}
787
788;------------------------------------------------------------------------------;
789
790; One one divisor in odd divisor
791define <4 x i32> @test_srem_odd_one(<4 x i32> %X) nounwind {
792; CHECK-SSE2-LABEL: test_srem_odd_one:
793; CHECK-SSE2:       # %bb.0:
794; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
795; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
796; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm0
797; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
798; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
799; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
800; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
801; CHECK-SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
802; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
803; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
804; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
805; CHECK-SSE2-NEXT:    retq
806;
807; CHECK-SSE41-LABEL: test_srem_odd_one:
808; CHECK-SSE41:       # %bb.0:
809; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
810; CHECK-SSE41-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
811; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993458,858993458,4294967295,858993458]
812; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
813; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
814; CHECK-SSE41-NEXT:    psrld $31, %xmm0
815; CHECK-SSE41-NEXT:    retq
816;
817; CHECK-AVX1-LABEL: test_srem_odd_one:
818; CHECK-AVX1:       # %bb.0:
819; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
820; CHECK-AVX1-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
821; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
822; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
823; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
824; CHECK-AVX1-NEXT:    retq
825;
826; CHECK-AVX2-LABEL: test_srem_odd_one:
827; CHECK-AVX2:       # %bb.0:
828; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
829; CHECK-AVX2-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
830; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [429496729,429496729,429496729,429496729]
831; CHECK-AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
832; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
833; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
834; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
835; CHECK-AVX2-NEXT:    retq
836;
837; CHECK-AVX512VL-LABEL: test_srem_odd_one:
838; CHECK-AVX512VL:       # %bb.0:
839; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
840; CHECK-AVX512VL-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
841; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
842; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
843; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
844; CHECK-AVX512VL-NEXT:    retq
845  %srem = srem <4 x i32> %X, <i32 5, i32 5, i32 1, i32 5>
846  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
847  %ret = zext <4 x i1> %cmp to <4 x i32>
848  ret <4 x i32> %ret
849}
850
851; One one divisor in even divisor
852define <4 x i32> @test_srem_even_one(<4 x i32> %X) nounwind {
853; CHECK-SSE2-LABEL: test_srem_even_one:
854; CHECK-SSE2:       # %bb.0:
855; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3067833783,3067833783,3067833783,3067833783]
856; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
857; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm0
858; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
859; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
860; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
861; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
862; CHECK-SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
863; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm1
864; CHECK-SSE2-NEXT:    psrld $1, %xmm1
865; CHECK-SSE2-NEXT:    pslld $31, %xmm0
866; CHECK-SSE2-NEXT:    por %xmm1, %xmm0
867; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
868; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
869; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
870; CHECK-SSE2-NEXT:    retq
871;
872; CHECK-SSE41-LABEL: test_srem_even_one:
873; CHECK-SSE41:       # %bb.0:
874; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
875; CHECK-SSE41-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
876; CHECK-SSE41-NEXT:    movdqa %xmm0, %xmm1
877; CHECK-SSE41-NEXT:    psrld $1, %xmm1
878; CHECK-SSE41-NEXT:    pslld $31, %xmm0
879; CHECK-SSE41-NEXT:    por %xmm1, %xmm0
880; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [306783378,306783378,4294967295,306783378]
881; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
882; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
883; CHECK-SSE41-NEXT:    psrld $31, %xmm0
884; CHECK-SSE41-NEXT:    retq
885;
886; CHECK-AVX1-LABEL: test_srem_even_one:
887; CHECK-AVX1:       # %bb.0:
888; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
889; CHECK-AVX1-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
890; CHECK-AVX1-NEXT:    vpsrld $1, %xmm0, %xmm1
891; CHECK-AVX1-NEXT:    vpslld $31, %xmm0, %xmm0
892; CHECK-AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
893; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
894; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
895; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
896; CHECK-AVX1-NEXT:    retq
897;
898; CHECK-AVX2-LABEL: test_srem_even_one:
899; CHECK-AVX2:       # %bb.0:
900; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [3067833783,3067833783,3067833783,3067833783]
901; CHECK-AVX2-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
902; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [306783378,306783378,306783378,306783378]
903; CHECK-AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
904; CHECK-AVX2-NEXT:    vpsrld $1, %xmm0, %xmm1
905; CHECK-AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
906; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
907; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
908; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
909; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
910; CHECK-AVX2-NEXT:    retq
911;
912; CHECK-AVX512VL-LABEL: test_srem_even_one:
913; CHECK-AVX512VL:       # %bb.0:
914; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
915; CHECK-AVX512VL-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
916; CHECK-AVX512VL-NEXT:    vprord $1, %xmm0, %xmm0
917; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
918; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
919; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
920; CHECK-AVX512VL-NEXT:    retq
921  %srem = srem <4 x i32> %X, <i32 14, i32 14, i32 1, i32 14>
922  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
923  %ret = zext <4 x i1> %cmp to <4 x i32>
924  ret <4 x i32> %ret
925}
926
927; One one divisor in odd+even divisor
928define <4 x i32> @test_srem_odd_even_one(<4 x i32> %X) nounwind {
929; CHECK-SSE2-LABEL: test_srem_odd_even_one:
930; CHECK-SSE2:       # %bb.0:
931; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
932; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
933; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
934; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
935; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
936; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
937; CHECK-SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
938; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
939; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
940; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
941; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
942; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
943; CHECK-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
944; CHECK-SSE2-NEXT:    por %xmm1, %xmm0
945; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
946; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
947; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
948; CHECK-SSE2-NEXT:    retq
949;
950; CHECK-SSE41-LABEL: test_srem_odd_even_one:
951; CHECK-SSE41:       # %bb.0:
952; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
953; CHECK-SSE41-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
954; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
955; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
956; CHECK-SSE41-NEXT:    pxor %xmm2, %xmm2
957; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
958; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
959; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
960; CHECK-SSE41-NEXT:    por %xmm2, %xmm0
961; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993458,306783378,4294967295,42949672]
962; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
963; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
964; CHECK-SSE41-NEXT:    psrld $31, %xmm0
965; CHECK-SSE41-NEXT:    retq
966;
967; CHECK-AVX1-LABEL: test_srem_odd_even_one:
968; CHECK-AVX1:       # %bb.0:
969; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
970; CHECK-AVX1-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
971; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
972; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
973; CHECK-AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
974; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
975; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
976; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
977; CHECK-AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
978; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
979; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
980; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
981; CHECK-AVX1-NEXT:    retq
982;
983; CHECK-AVX2-LABEL: test_srem_odd_even_one:
984; CHECK-AVX2:       # %bb.0:
985; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
986; CHECK-AVX2-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
987; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
988; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
989; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
990; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
991; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
992; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
993; CHECK-AVX2-NEXT:    retq
994;
995; CHECK-AVX512VL-LABEL: test_srem_odd_even_one:
996; CHECK-AVX512VL:       # %bb.0:
997; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
998; CHECK-AVX512VL-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
999; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1000; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1001; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1002; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1003; CHECK-AVX512VL-NEXT:    retq
1004  %srem = srem <4 x i32> %X, <i32 5, i32 14, i32 1, i32 100>
1005  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
1006  %ret = zext <4 x i1> %cmp to <4 x i32>
1007  ret <4 x i32> %ret
1008}
1009
1010;------------------------------------------------------------------------------;
1011
1012; One INT_MIN divisor in odd divisor
1013define <4 x i32> @test_srem_odd_INT_MIN(<4 x i32> %X) nounwind {
1014; CHECK-SSE2-LABEL: test_srem_odd_INT_MIN:
1015; CHECK-SSE2:       # %bb.0:
1016; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
1017; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647]
1018; CHECK-SSE2-NEXT:    pand %xmm0, %xmm2
1019; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm2
1020; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1021; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1022; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[0,2,2,3]
1023; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1024; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
1025; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
1026; CHECK-SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
1027; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
1028; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
1029; CHECK-SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
1030; CHECK-SSE2-NEXT:    pxor %xmm3, %xmm0
1031; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[2,0],xmm0[3,0]
1032; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0,2]
1033; CHECK-SSE2-NEXT:    psrld $31, %xmm0
1034; CHECK-SSE2-NEXT:    retq
1035;
1036; CHECK-SSE41-LABEL: test_srem_odd_INT_MIN:
1037; CHECK-SSE41:       # %bb.0:
1038; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
1039; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647]
1040; CHECK-SSE41-NEXT:    pand %xmm0, %xmm2
1041; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm2
1042; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1043; CHECK-SSE41-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1044; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993458,858993458,1,858993458]
1045; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
1046; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1047; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5],xmm0[6,7]
1048; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1049; CHECK-SSE41-NEXT:    retq
1050;
1051; CHECK-AVX1-LABEL: test_srem_odd_INT_MIN:
1052; CHECK-AVX1:       # %bb.0:
1053; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1054; CHECK-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
1055; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm2, %xmm1
1056; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1057; CHECK-AVX1-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1058; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
1059; CHECK-AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
1060; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
1061; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1062; CHECK-AVX1-NEXT:    retq
1063;
1064; CHECK-AVX2-LABEL: test_srem_odd_INT_MIN:
1065; CHECK-AVX2:       # %bb.0:
1066; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1067; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647]
1068; CHECK-AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm2
1069; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm2, %xmm1
1070; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1071; CHECK-AVX2-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1072; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
1073; CHECK-AVX2-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
1074; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
1075; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1076; CHECK-AVX2-NEXT:    retq
1077;
1078; CHECK-AVX512VL-LABEL: test_srem_odd_INT_MIN:
1079; CHECK-AVX512VL:       # %bb.0:
1080; CHECK-AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1081; CHECK-AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm2
1082; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm2, %xmm1
1083; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1084; CHECK-AVX512VL-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1085; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
1086; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
1087; CHECK-AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
1088; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1089; CHECK-AVX512VL-NEXT:    retq
1090  %srem = srem <4 x i32> %X, <i32 5, i32 5, i32 2147483648, i32 5>
1091  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
1092  %ret = zext <4 x i1> %cmp to <4 x i32>
1093  ret <4 x i32> %ret
1094}
1095
1096; One INT_MIN divisor in even divisor
1097define <4 x i32> @test_srem_even_INT_MIN(<4 x i32> %X) nounwind {
1098; CHECK-SSE2-LABEL: test_srem_even_INT_MIN:
1099; CHECK-SSE2:       # %bb.0:
1100; CHECK-SSE2-NEXT:    pxor %xmm2, %xmm2
1101; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3067833783,u,1,u]
1102; CHECK-SSE2-NEXT:    pmuludq %xmm0, %xmm1
1103; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1104; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1105; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
1106; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
1107; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
1108; CHECK-SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1109; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2,2147483648]
1110; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm3
1111; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,3,2,3]
1112; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1113; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
1114; CHECK-SSE2-NEXT:    pmuludq %xmm5, %xmm1
1115; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm1[1,3,2,3]
1116; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1]
1117; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
1118; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1119; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
1120; CHECK-SSE2-NEXT:    por %xmm4, %xmm3
1121; CHECK-SSE2-NEXT:    pxor %xmm5, %xmm3
1122; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
1123; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
1124; CHECK-SSE2-NEXT:    pxor %xmm3, %xmm1
1125; CHECK-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1126; CHECK-SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
1127; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[3,0]
1128; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
1129; CHECK-SSE2-NEXT:    psrld $31, %xmm1
1130; CHECK-SSE2-NEXT:    movdqa %xmm1, %xmm0
1131; CHECK-SSE2-NEXT:    retq
1132;
1133; CHECK-SSE41-LABEL: test_srem_even_INT_MIN:
1134; CHECK-SSE41:       # %bb.0:
1135; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
1136; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [3067833783,3067833783,1,3067833783]
1137; CHECK-SSE41-NEXT:    pmulld %xmm0, %xmm2
1138; CHECK-SSE41-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
1139; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
1140; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
1141; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
1142; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
1143; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm4 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7]
1144; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,0,2,2]
1145; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
1146; CHECK-SSE41-NEXT:    por %xmm4, %xmm3
1147; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [306783378,306783378,1,306783378]
1148; CHECK-SSE41-NEXT:    pminud %xmm3, %xmm2
1149; CHECK-SSE41-NEXT:    pcmpeqd %xmm3, %xmm2
1150; CHECK-SSE41-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1151; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1152; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5],xmm2[6,7]
1153; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1154; CHECK-SSE41-NEXT:    retq
1155;
1156; CHECK-AVX1-LABEL: test_srem_even_INT_MIN:
1157; CHECK-AVX1:       # %bb.0:
1158; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1159; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
1160; CHECK-AVX1-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1161; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
1162; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
1163; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1164; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
1165; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm4 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7]
1166; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[0,0,2,2]
1167; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
1168; CHECK-AVX1-NEXT:    vpor %xmm4, %xmm2, %xmm2
1169; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
1170; CHECK-AVX1-NEXT:    vpcmpeqd %xmm3, %xmm2, %xmm2
1171; CHECK-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1172; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1173; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5],xmm2[6,7]
1174; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1175; CHECK-AVX1-NEXT:    retq
1176;
1177; CHECK-AVX2-LABEL: test_srem_even_INT_MIN:
1178; CHECK-AVX2:       # %bb.0:
1179; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1180; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
1181; CHECK-AVX2-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1182; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
1183; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1184; CHECK-AVX2-NEXT:    vpor %xmm3, %xmm2, %xmm2
1185; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
1186; CHECK-AVX2-NEXT:    vpcmpeqd %xmm3, %xmm2, %xmm2
1187; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [2147483647,2147483647,2147483647,2147483647]
1188; CHECK-AVX2-NEXT:    vpand %xmm3, %xmm0, %xmm0
1189; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1190; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm0[2],xmm2[3]
1191; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1192; CHECK-AVX2-NEXT:    retq
1193;
1194; CHECK-AVX512VL-LABEL: test_srem_even_INT_MIN:
1195; CHECK-AVX512VL:       # %bb.0:
1196; CHECK-AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1197; CHECK-AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm2
1198; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm2, %xmm1
1199; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1200; CHECK-AVX512VL-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1201; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1202; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
1203; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
1204; CHECK-AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
1205; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1206; CHECK-AVX512VL-NEXT:    retq
1207  %srem = srem <4 x i32> %X, <i32 14, i32 14, i32 2147483648, i32 14>
1208  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
1209  %ret = zext <4 x i1> %cmp to <4 x i32>
1210  ret <4 x i32> %ret
1211}
1212
1213; One INT_MIN divisor in odd+even divisor
1214define <4 x i32> @test_srem_odd_even_INT_MIN(<4 x i32> %X) nounwind {
1215; CHECK-SSE2-LABEL: test_srem_odd_even_INT_MIN:
1216; CHECK-SSE2:       # %bb.0:
1217; CHECK-SSE2-NEXT:    pxor %xmm2, %xmm2
1218; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3435973837,3067833783,1,3264175145]
1219; CHECK-SSE2-NEXT:    pmuludq %xmm0, %xmm1
1220; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1221; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1222; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
1223; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
1224; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
1225; CHECK-SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1226; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [1,2147483648,2,1073741824]
1227; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm3
1228; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,3,2,3]
1229; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1230; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1231; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm1[1,3,2,3]
1232; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
1233; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
1234; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1235; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
1236; CHECK-SSE2-NEXT:    por %xmm4, %xmm3
1237; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
1238; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
1239; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
1240; CHECK-SSE2-NEXT:    pxor %xmm3, %xmm1
1241; CHECK-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1242; CHECK-SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
1243; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[3,0]
1244; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
1245; CHECK-SSE2-NEXT:    psrld $31, %xmm1
1246; CHECK-SSE2-NEXT:    movdqa %xmm1, %xmm0
1247; CHECK-SSE2-NEXT:    retq
1248;
1249; CHECK-SSE41-LABEL: test_srem_odd_even_INT_MIN:
1250; CHECK-SSE41:       # %bb.0:
1251; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
1252; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [3435973837,3067833783,1,3264175145]
1253; CHECK-SSE41-NEXT:    pmulld %xmm0, %xmm2
1254; CHECK-SSE41-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
1255; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
1256; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
1257; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
1258; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
1259; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm4 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7]
1260; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,0,2,2]
1261; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
1262; CHECK-SSE41-NEXT:    por %xmm4, %xmm3
1263; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [858993458,306783378,1,42949672]
1264; CHECK-SSE41-NEXT:    pminud %xmm3, %xmm2
1265; CHECK-SSE41-NEXT:    pcmpeqd %xmm3, %xmm2
1266; CHECK-SSE41-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1267; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1268; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5],xmm2[6,7]
1269; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1270; CHECK-SSE41-NEXT:    retq
1271;
1272; CHECK-AVX1-LABEL: test_srem_odd_even_INT_MIN:
1273; CHECK-AVX1:       # %bb.0:
1274; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1275; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
1276; CHECK-AVX1-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1277; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
1278; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
1279; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1280; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
1281; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm4 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7]
1282; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[0,0,2,2]
1283; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
1284; CHECK-AVX1-NEXT:    vpor %xmm4, %xmm2, %xmm2
1285; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
1286; CHECK-AVX1-NEXT:    vpcmpeqd %xmm3, %xmm2, %xmm2
1287; CHECK-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1288; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1289; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5],xmm2[6,7]
1290; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1291; CHECK-AVX1-NEXT:    retq
1292;
1293; CHECK-AVX2-LABEL: test_srem_odd_even_INT_MIN:
1294; CHECK-AVX2:       # %bb.0:
1295; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1296; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
1297; CHECK-AVX2-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1298; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
1299; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1300; CHECK-AVX2-NEXT:    vpor %xmm3, %xmm2, %xmm2
1301; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
1302; CHECK-AVX2-NEXT:    vpcmpeqd %xmm3, %xmm2, %xmm2
1303; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [2147483647,2147483647,2147483647,2147483647]
1304; CHECK-AVX2-NEXT:    vpand %xmm3, %xmm0, %xmm0
1305; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1306; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm0[2],xmm2[3]
1307; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1308; CHECK-AVX2-NEXT:    retq
1309;
1310; CHECK-AVX512VL-LABEL: test_srem_odd_even_INT_MIN:
1311; CHECK-AVX512VL:       # %bb.0:
1312; CHECK-AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1313; CHECK-AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm2
1314; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm2, %xmm1
1315; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1316; CHECK-AVX512VL-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1317; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1318; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
1319; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
1320; CHECK-AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
1321; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1322; CHECK-AVX512VL-NEXT:    retq
1323  %srem = srem <4 x i32> %X, <i32 5, i32 14, i32 2147483648, i32 100>
1324  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
1325  %ret = zext <4 x i1> %cmp to <4 x i32>
1326  ret <4 x i32> %ret
1327}
1328
1329;==============================================================================;
1330
1331; One all-ones divisor and power-of-two divisor divisor in odd divisor
1332define <4 x i32> @test_srem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind {
1333; CHECK-SSE2-LABEL: test_srem_odd_allones_and_poweroftwo:
1334; CHECK-SSE2:       # %bb.0:
1335; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1336; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1337; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1338; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1339; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1340; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1341; CHECK-SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1342; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
1343; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1344; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
1345; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
1346; CHECK-SSE2-NEXT:    psrlq $32, %xmm0
1347; CHECK-SSE2-NEXT:    por %xmm2, %xmm0
1348; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1349; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1350; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1351; CHECK-SSE2-NEXT:    retq
1352;
1353; CHECK-SSE41-LABEL: test_srem_odd_allones_and_poweroftwo:
1354; CHECK-SSE41:       # %bb.0:
1355; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1356; CHECK-SSE41-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1357; CHECK-SSE41-NEXT:    pmovsxdq {{.*#+}} xmm1 = [1,268435456]
1358; CHECK-SSE41-NEXT:    pmuludq %xmm0, %xmm1
1359; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
1360; CHECK-SSE41-NEXT:    psrlq $32, %xmm1
1361; CHECK-SSE41-NEXT:    por %xmm1, %xmm0
1362; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993458,4294967295,268435455,858993458]
1363; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
1364; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1365; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1366; CHECK-SSE41-NEXT:    retq
1367;
1368; CHECK-AVX1-LABEL: test_srem_odd_allones_and_poweroftwo:
1369; CHECK-AVX1:       # %bb.0:
1370; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1371; CHECK-AVX1-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1372; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1373; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
1374; CHECK-AVX1-NEXT:    vpsrlq $32, %xmm1, %xmm1
1375; CHECK-AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
1376; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1377; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1378; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1379; CHECK-AVX1-NEXT:    retq
1380;
1381; CHECK-AVX2-LABEL: test_srem_odd_allones_and_poweroftwo:
1382; CHECK-AVX2:       # %bb.0:
1383; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1384; CHECK-AVX2-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1385; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1386; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1387; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1388; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1389; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1390; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1391; CHECK-AVX2-NEXT:    retq
1392;
1393; CHECK-AVX512VL-LABEL: test_srem_odd_allones_and_poweroftwo:
1394; CHECK-AVX512VL:       # %bb.0:
1395; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1396; CHECK-AVX512VL-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1397; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1398; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1399; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1400; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1401; CHECK-AVX512VL-NEXT:    retq
1402  %srem = srem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 5>
1403  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
1404  %ret = zext <4 x i1> %cmp to <4 x i32>
1405  ret <4 x i32> %ret
1406}
1407
1408; One all-ones divisor and power-of-two divisor divisor in even divisor
1409define <4 x i32> @test_srem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
1410; CHECK-SSE2-LABEL: test_srem_even_allones_and_poweroftwo:
1411; CHECK-SSE2:       # %bb.0:
1412; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1413; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1414; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1415; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1416; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1417; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1418; CHECK-SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1419; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,1,268435456,2147483648]
1420; CHECK-SSE2-NEXT:    pmuludq %xmm0, %xmm1
1421; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
1422; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1423; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
1424; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,3,2,3]
1425; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
1426; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
1427; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
1428; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1429; CHECK-SSE2-NEXT:    por %xmm2, %xmm0
1430; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1431; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1432; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1433; CHECK-SSE2-NEXT:    retq
1434;
1435; CHECK-SSE41-LABEL: test_srem_even_allones_and_poweroftwo:
1436; CHECK-SSE41:       # %bb.0:
1437; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1438; CHECK-SSE41-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1439; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1440; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1441; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1442; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1443; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1444; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
1445; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
1446; CHECK-SSE41-NEXT:    por %xmm2, %xmm0
1447; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [306783378,4294967295,268435455,306783378]
1448; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
1449; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1450; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1451; CHECK-SSE41-NEXT:    retq
1452;
1453; CHECK-AVX1-LABEL: test_srem_even_allones_and_poweroftwo:
1454; CHECK-AVX1:       # %bb.0:
1455; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1456; CHECK-AVX1-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1457; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1458; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1459; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1460; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1461; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1462; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
1463; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
1464; CHECK-AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
1465; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1466; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1467; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1468; CHECK-AVX1-NEXT:    retq
1469;
1470; CHECK-AVX2-LABEL: test_srem_even_allones_and_poweroftwo:
1471; CHECK-AVX2:       # %bb.0:
1472; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1473; CHECK-AVX2-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1474; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1475; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1476; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1477; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1478; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1479; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1480; CHECK-AVX2-NEXT:    retq
1481;
1482; CHECK-AVX512VL-LABEL: test_srem_even_allones_and_poweroftwo:
1483; CHECK-AVX512VL:       # %bb.0:
1484; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1485; CHECK-AVX512VL-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1486; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1487; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1488; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1489; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1490; CHECK-AVX512VL-NEXT:    retq
1491  %srem = srem <4 x i32> %X, <i32 14, i32 4294967295, i32 16, i32 14>
1492  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
1493  %ret = zext <4 x i1> %cmp to <4 x i32>
1494  ret <4 x i32> %ret
1495}
1496
1497; One all-ones divisor and power-of-two divisor divisor in odd+even divisor
1498define <4 x i32> @test_srem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
1499; CHECK-SSE2-LABEL: test_srem_odd_even_allones_and_poweroftwo:
1500; CHECK-SSE2:       # %bb.0:
1501; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1502; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1503; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1504; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1505; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1506; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1507; CHECK-SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1508; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [1,1,268435456,1073741824]
1509; CHECK-SSE2-NEXT:    pmuludq %xmm0, %xmm1
1510; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
1511; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1512; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
1513; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,3,2,3]
1514; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
1515; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
1516; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
1517; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1518; CHECK-SSE2-NEXT:    por %xmm2, %xmm0
1519; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1520; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1521; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1522; CHECK-SSE2-NEXT:    retq
1523;
1524; CHECK-SSE41-LABEL: test_srem_odd_even_allones_and_poweroftwo:
1525; CHECK-SSE41:       # %bb.0:
1526; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1527; CHECK-SSE41-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1528; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1529; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1530; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1531; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1532; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1533; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
1534; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
1535; CHECK-SSE41-NEXT:    por %xmm2, %xmm0
1536; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993458,4294967295,268435455,42949672]
1537; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
1538; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1539; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1540; CHECK-SSE41-NEXT:    retq
1541;
1542; CHECK-AVX1-LABEL: test_srem_odd_even_allones_and_poweroftwo:
1543; CHECK-AVX1:       # %bb.0:
1544; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1545; CHECK-AVX1-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1546; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1547; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1548; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1549; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1550; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1551; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
1552; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
1553; CHECK-AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
1554; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1555; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1556; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1557; CHECK-AVX1-NEXT:    retq
1558;
1559; CHECK-AVX2-LABEL: test_srem_odd_even_allones_and_poweroftwo:
1560; CHECK-AVX2:       # %bb.0:
1561; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1562; CHECK-AVX2-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1563; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1564; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1565; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1566; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1567; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1568; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1569; CHECK-AVX2-NEXT:    retq
1570;
1571; CHECK-AVX512VL-LABEL: test_srem_odd_even_allones_and_poweroftwo:
1572; CHECK-AVX512VL:       # %bb.0:
1573; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1574; CHECK-AVX512VL-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1575; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1576; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1577; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1578; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1579; CHECK-AVX512VL-NEXT:    retq
1580  %srem = srem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 100>
1581  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
1582  %ret = zext <4 x i1> %cmp to <4 x i32>
1583  ret <4 x i32> %ret
1584}
1585
1586;------------------------------------------------------------------------------;
1587
1588; One all-ones divisor and one one divisor in odd divisor
1589define <4 x i32> @test_srem_odd_allones_and_one(<4 x i32> %X) nounwind {
1590; CHECK-SSE2-LABEL: test_srem_odd_allones_and_one:
1591; CHECK-SSE2:       # %bb.0:
1592; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
1593; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1594; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm0
1595; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1596; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
1597; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
1598; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1599; CHECK-SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1600; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1601; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1602; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1603; CHECK-SSE2-NEXT:    retq
1604;
1605; CHECK-SSE41-LABEL: test_srem_odd_allones_and_one:
1606; CHECK-SSE41:       # %bb.0:
1607; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1608; CHECK-SSE41-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1609; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993458,4294967295,4294967295,858993458]
1610; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
1611; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1612; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1613; CHECK-SSE41-NEXT:    retq
1614;
1615; CHECK-AVX1-LABEL: test_srem_odd_allones_and_one:
1616; CHECK-AVX1:       # %bb.0:
1617; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1618; CHECK-AVX1-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1619; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1620; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1621; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1622; CHECK-AVX1-NEXT:    retq
1623;
1624; CHECK-AVX2-LABEL: test_srem_odd_allones_and_one:
1625; CHECK-AVX2:       # %bb.0:
1626; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
1627; CHECK-AVX2-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
1628; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [429496729,429496729,429496729,429496729]
1629; CHECK-AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
1630; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1631; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1632; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1633; CHECK-AVX2-NEXT:    retq
1634;
1635; CHECK-AVX512VL-LABEL: test_srem_odd_allones_and_one:
1636; CHECK-AVX512VL:       # %bb.0:
1637; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
1638; CHECK-AVX512VL-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
1639; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1640; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1641; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1642; CHECK-AVX512VL-NEXT:    retq
1643  %srem = srem <4 x i32> %X, <i32 5, i32 4294967295, i32 1, i32 5>
1644  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
1645  %ret = zext <4 x i1> %cmp to <4 x i32>
1646  ret <4 x i32> %ret
1647}
1648
1649; One all-ones divisor and one one divisor in even divisor
1650define <4 x i32> @test_srem_even_allones_and_one(<4 x i32> %X) nounwind {
1651; CHECK-SSE2-LABEL: test_srem_even_allones_and_one:
1652; CHECK-SSE2:       # %bb.0:
1653; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3067833783,3067833783,3067833783,3067833783]
1654; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1655; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm0
1656; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1657; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
1658; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
1659; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1660; CHECK-SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1661; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm1
1662; CHECK-SSE2-NEXT:    psrld $1, %xmm1
1663; CHECK-SSE2-NEXT:    pslld $31, %xmm0
1664; CHECK-SSE2-NEXT:    por %xmm1, %xmm0
1665; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1666; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1667; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1668; CHECK-SSE2-NEXT:    retq
1669;
1670; CHECK-SSE41-LABEL: test_srem_even_allones_and_one:
1671; CHECK-SSE41:       # %bb.0:
1672; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1673; CHECK-SSE41-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1674; CHECK-SSE41-NEXT:    movdqa %xmm0, %xmm1
1675; CHECK-SSE41-NEXT:    psrld $1, %xmm1
1676; CHECK-SSE41-NEXT:    pslld $31, %xmm0
1677; CHECK-SSE41-NEXT:    por %xmm1, %xmm0
1678; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [306783378,4294967295,4294967295,306783378]
1679; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
1680; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1681; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1682; CHECK-SSE41-NEXT:    retq
1683;
1684; CHECK-AVX1-LABEL: test_srem_even_allones_and_one:
1685; CHECK-AVX1:       # %bb.0:
1686; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1687; CHECK-AVX1-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1688; CHECK-AVX1-NEXT:    vpsrld $1, %xmm0, %xmm1
1689; CHECK-AVX1-NEXT:    vpslld $31, %xmm0, %xmm0
1690; CHECK-AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
1691; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1692; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1693; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1694; CHECK-AVX1-NEXT:    retq
1695;
1696; CHECK-AVX2-LABEL: test_srem_even_allones_and_one:
1697; CHECK-AVX2:       # %bb.0:
1698; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [3067833783,3067833783,3067833783,3067833783]
1699; CHECK-AVX2-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
1700; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [306783378,306783378,306783378,306783378]
1701; CHECK-AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
1702; CHECK-AVX2-NEXT:    vpsrld $1, %xmm0, %xmm1
1703; CHECK-AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
1704; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1705; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1706; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1707; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1708; CHECK-AVX2-NEXT:    retq
1709;
1710; CHECK-AVX512VL-LABEL: test_srem_even_allones_and_one:
1711; CHECK-AVX512VL:       # %bb.0:
1712; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
1713; CHECK-AVX512VL-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
1714; CHECK-AVX512VL-NEXT:    vprord $1, %xmm0, %xmm0
1715; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1716; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1717; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1718; CHECK-AVX512VL-NEXT:    retq
1719  %srem = srem <4 x i32> %X, <i32 14, i32 4294967295, i32 1, i32 14>
1720  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
1721  %ret = zext <4 x i1> %cmp to <4 x i32>
1722  ret <4 x i32> %ret
1723}
1724
1725; One all-ones divisor and one one divisor in odd+even divisor
1726define <4 x i32> @test_srem_odd_even_allones_and_one(<4 x i32> %X) nounwind {
1727; CHECK-SSE2-LABEL: test_srem_odd_even_allones_and_one:
1728; CHECK-SSE2:       # %bb.0:
1729; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1730; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1731; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1732; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1733; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1734; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1735; CHECK-SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1736; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
1737; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1738; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1739; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
1740; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1741; CHECK-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1742; CHECK-SSE2-NEXT:    por %xmm1, %xmm0
1743; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1744; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1745; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1746; CHECK-SSE2-NEXT:    retq
1747;
1748; CHECK-SSE41-LABEL: test_srem_odd_even_allones_and_one:
1749; CHECK-SSE41:       # %bb.0:
1750; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1751; CHECK-SSE41-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1752; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1753; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1754; CHECK-SSE41-NEXT:    pxor %xmm2, %xmm2
1755; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1756; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
1757; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
1758; CHECK-SSE41-NEXT:    por %xmm2, %xmm0
1759; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993458,4294967295,4294967295,42949672]
1760; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
1761; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1762; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1763; CHECK-SSE41-NEXT:    retq
1764;
1765; CHECK-AVX1-LABEL: test_srem_odd_even_allones_and_one:
1766; CHECK-AVX1:       # %bb.0:
1767; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1768; CHECK-AVX1-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1769; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1770; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1771; CHECK-AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1772; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1773; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
1774; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
1775; CHECK-AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
1776; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1777; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1778; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1779; CHECK-AVX1-NEXT:    retq
1780;
1781; CHECK-AVX2-LABEL: test_srem_odd_even_allones_and_one:
1782; CHECK-AVX2:       # %bb.0:
1783; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1784; CHECK-AVX2-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1785; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1786; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1787; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1788; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1789; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1790; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1791; CHECK-AVX2-NEXT:    retq
1792;
1793; CHECK-AVX512VL-LABEL: test_srem_odd_even_allones_and_one:
1794; CHECK-AVX512VL:       # %bb.0:
1795; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1796; CHECK-AVX512VL-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1797; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1798; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1799; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1800; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1801; CHECK-AVX512VL-NEXT:    retq
1802  %srem = srem <4 x i32> %X, <i32 5, i32 4294967295, i32 1, i32 100>
1803  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
1804  %ret = zext <4 x i1> %cmp to <4 x i32>
1805  ret <4 x i32> %ret
1806}
1807
1808;------------------------------------------------------------------------------;
1809
1810; One power-of-two divisor divisor and one divisor in odd divisor
1811define <4 x i32> @test_srem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind {
1812; CHECK-SSE2-LABEL: test_srem_odd_poweroftwo_and_one:
1813; CHECK-SSE2:       # %bb.0:
1814; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1815; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1816; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1817; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1818; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1819; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1820; CHECK-SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1821; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
1822; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1823; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1824; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
1825; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1826; CHECK-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1827; CHECK-SSE2-NEXT:    por %xmm1, %xmm0
1828; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1829; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1830; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1831; CHECK-SSE2-NEXT:    retq
1832;
1833; CHECK-SSE41-LABEL: test_srem_odd_poweroftwo_and_one:
1834; CHECK-SSE41:       # %bb.0:
1835; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1836; CHECK-SSE41-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1837; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1838; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1839; CHECK-SSE41-NEXT:    pxor %xmm2, %xmm2
1840; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1841; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
1842; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
1843; CHECK-SSE41-NEXT:    por %xmm2, %xmm0
1844; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993458,268435455,4294967295,858993458]
1845; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
1846; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1847; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1848; CHECK-SSE41-NEXT:    retq
1849;
1850; CHECK-AVX1-LABEL: test_srem_odd_poweroftwo_and_one:
1851; CHECK-AVX1:       # %bb.0:
1852; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1853; CHECK-AVX1-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1854; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1855; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1856; CHECK-AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1857; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1858; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
1859; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
1860; CHECK-AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
1861; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1862; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1863; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1864; CHECK-AVX1-NEXT:    retq
1865;
1866; CHECK-AVX2-LABEL: test_srem_odd_poweroftwo_and_one:
1867; CHECK-AVX2:       # %bb.0:
1868; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1869; CHECK-AVX2-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1870; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1871; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1872; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1873; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1874; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1875; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1876; CHECK-AVX2-NEXT:    retq
1877;
1878; CHECK-AVX512VL-LABEL: test_srem_odd_poweroftwo_and_one:
1879; CHECK-AVX512VL:       # %bb.0:
1880; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1881; CHECK-AVX512VL-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1882; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1883; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1884; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1885; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1886; CHECK-AVX512VL-NEXT:    retq
1887  %srem = srem <4 x i32> %X, <i32 5, i32 16, i32 1, i32 5>
1888  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
1889  %ret = zext <4 x i1> %cmp to <4 x i32>
1890  ret <4 x i32> %ret
1891}
1892
1893; One power-of-two divisor divisor and one divisor in even divisor
1894define <4 x i32> @test_srem_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
1895; CHECK-SSE2-LABEL: test_srem_even_poweroftwo_and_one:
1896; CHECK-SSE2:       # %bb.0:
1897; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1898; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1899; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1900; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1901; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1902; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1903; CHECK-SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1904; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,268435456,1,2147483648]
1905; CHECK-SSE2-NEXT:    pmuludq %xmm0, %xmm1
1906; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
1907; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1908; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
1909; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,3,2,3]
1910; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
1911; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
1912; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
1913; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1914; CHECK-SSE2-NEXT:    por %xmm2, %xmm0
1915; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1916; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1917; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1918; CHECK-SSE2-NEXT:    retq
1919;
1920; CHECK-SSE41-LABEL: test_srem_even_poweroftwo_and_one:
1921; CHECK-SSE41:       # %bb.0:
1922; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1923; CHECK-SSE41-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1924; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1925; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1926; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1927; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1928; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1929; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
1930; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
1931; CHECK-SSE41-NEXT:    por %xmm2, %xmm0
1932; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [306783378,268435455,4294967295,306783378]
1933; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
1934; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1935; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1936; CHECK-SSE41-NEXT:    retq
1937;
1938; CHECK-AVX1-LABEL: test_srem_even_poweroftwo_and_one:
1939; CHECK-AVX1:       # %bb.0:
1940; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1941; CHECK-AVX1-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1942; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1943; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1944; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1945; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1946; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1947; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
1948; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
1949; CHECK-AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
1950; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1951; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1952; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1953; CHECK-AVX1-NEXT:    retq
1954;
1955; CHECK-AVX2-LABEL: test_srem_even_poweroftwo_and_one:
1956; CHECK-AVX2:       # %bb.0:
1957; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1958; CHECK-AVX2-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1959; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1960; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1961; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1962; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1963; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1964; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1965; CHECK-AVX2-NEXT:    retq
1966;
1967; CHECK-AVX512VL-LABEL: test_srem_even_poweroftwo_and_one:
1968; CHECK-AVX512VL:       # %bb.0:
1969; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1970; CHECK-AVX512VL-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1971; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1972; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1973; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1974; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1975; CHECK-AVX512VL-NEXT:    retq
1976  %srem = srem <4 x i32> %X, <i32 14, i32 16, i32 1, i32 14>
1977  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
1978  %ret = zext <4 x i1> %cmp to <4 x i32>
1979  ret <4 x i32> %ret
1980}
1981
1982; One power-of-two divisor divisor and one divisor in odd+even divisor
1983define <4 x i32> @test_srem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
1984; CHECK-SSE2-LABEL: test_srem_odd_even_poweroftwo_and_one:
1985; CHECK-SSE2:       # %bb.0:
1986; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1987; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1988; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1989; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1990; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1991; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1992; CHECK-SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1993; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
1994; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1995; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1996; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
1997; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1998; CHECK-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1999; CHECK-SSE2-NEXT:    por %xmm1, %xmm0
2000; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2001; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2002; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2003; CHECK-SSE2-NEXT:    retq
2004;
2005; CHECK-SSE41-LABEL: test_srem_odd_even_poweroftwo_and_one:
2006; CHECK-SSE41:       # %bb.0:
2007; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2008; CHECK-SSE41-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2009; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
2010; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2011; CHECK-SSE41-NEXT:    pxor %xmm2, %xmm2
2012; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
2013; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
2014; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
2015; CHECK-SSE41-NEXT:    por %xmm2, %xmm0
2016; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993458,268435455,4294967295,42949672]
2017; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
2018; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
2019; CHECK-SSE41-NEXT:    psrld $31, %xmm0
2020; CHECK-SSE41-NEXT:    retq
2021;
2022; CHECK-AVX1-LABEL: test_srem_odd_even_poweroftwo_and_one:
2023; CHECK-AVX1:       # %bb.0:
2024; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2025; CHECK-AVX1-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2026; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
2027; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
2028; CHECK-AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
2029; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
2030; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
2031; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
2032; CHECK-AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
2033; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
2034; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2035; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
2036; CHECK-AVX1-NEXT:    retq
2037;
2038; CHECK-AVX2-LABEL: test_srem_odd_even_poweroftwo_and_one:
2039; CHECK-AVX2:       # %bb.0:
2040; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2041; CHECK-AVX2-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2042; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
2043; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2044; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
2045; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
2046; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2047; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
2048; CHECK-AVX2-NEXT:    retq
2049;
2050; CHECK-AVX512VL-LABEL: test_srem_odd_even_poweroftwo_and_one:
2051; CHECK-AVX512VL:       # %bb.0:
2052; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2053; CHECK-AVX512VL-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2054; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2055; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
2056; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2057; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
2058; CHECK-AVX512VL-NEXT:    retq
2059  %srem = srem <4 x i32> %X, <i32 5, i32 16, i32 1, i32 100>
2060  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
2061  %ret = zext <4 x i1> %cmp to <4 x i32>
2062  ret <4 x i32> %ret
2063}
2064
2065;------------------------------------------------------------------------------;
2066
2067define <4 x i32> @test_srem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind {
2068; CHECK-SSE2-LABEL: test_srem_odd_allones_and_poweroftwo_and_one:
2069; CHECK-SSE2:       # %bb.0:
2070; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2071; CHECK-SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2072; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2073; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm1
2074; CHECK-SSE2-NEXT:    psrlq $32, %xmm1
2075; CHECK-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2076; CHECK-SSE2-NEXT:    por %xmm1, %xmm0
2077; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2078; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2079; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2080; CHECK-SSE2-NEXT:    retq
2081;
2082; CHECK-SSE41-LABEL: test_srem_odd_allones_and_poweroftwo_and_one:
2083; CHECK-SSE41:       # %bb.0:
2084; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2085; CHECK-SSE41-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2086; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2087; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
2088; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
2089; CHECK-SSE41-NEXT:    psrlq $32, %xmm0
2090; CHECK-SSE41-NEXT:    por %xmm1, %xmm0
2091; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993458,4294967295,268435455,4294967295]
2092; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
2093; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
2094; CHECK-SSE41-NEXT:    psrld $31, %xmm0
2095; CHECK-SSE41-NEXT:    retq
2096;
2097; CHECK-AVX1-LABEL: test_srem_odd_allones_and_poweroftwo_and_one:
2098; CHECK-AVX1:       # %bb.0:
2099; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2100; CHECK-AVX1-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2101; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2102; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2103; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
2104; CHECK-AVX1-NEXT:    vpsrlq $32, %xmm0, %xmm0
2105; CHECK-AVX1-NEXT:    vpor %xmm0, %xmm1, %xmm0
2106; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
2107; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2108; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
2109; CHECK-AVX1-NEXT:    retq
2110;
2111; CHECK-AVX2-LABEL: test_srem_odd_allones_and_poweroftwo_and_one:
2112; CHECK-AVX2:       # %bb.0:
2113; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2114; CHECK-AVX2-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2115; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
2116; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2117; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
2118; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
2119; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2120; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
2121; CHECK-AVX2-NEXT:    retq
2122;
2123; CHECK-AVX512VL-LABEL: test_srem_odd_allones_and_poweroftwo_and_one:
2124; CHECK-AVX512VL:       # %bb.0:
2125; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2126; CHECK-AVX512VL-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2127; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2128; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
2129; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2130; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
2131; CHECK-AVX512VL-NEXT:    retq
2132  %srem = srem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 1>
2133  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
2134  %ret = zext <4 x i1> %cmp to <4 x i32>
2135  ret <4 x i32> %ret
2136}
2137
2138define <4 x i32> @test_srem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind {
2139; CHECK-SSE2-LABEL: test_srem_even_allones_and_poweroftwo_and_one:
2140; CHECK-SSE2:       # %bb.0:
2141; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2142; CHECK-SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2143; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2144; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm1
2145; CHECK-SSE2-NEXT:    psrlq $32, %xmm1
2146; CHECK-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2147; CHECK-SSE2-NEXT:    por %xmm1, %xmm0
2148; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2149; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2150; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2151; CHECK-SSE2-NEXT:    retq
2152;
2153; CHECK-SSE41-LABEL: test_srem_even_allones_and_poweroftwo_and_one:
2154; CHECK-SSE41:       # %bb.0:
2155; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2156; CHECK-SSE41-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2157; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2158; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
2159; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
2160; CHECK-SSE41-NEXT:    psrlq $32, %xmm0
2161; CHECK-SSE41-NEXT:    por %xmm1, %xmm0
2162; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [306783378,4294967295,268435455,4294967295]
2163; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
2164; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
2165; CHECK-SSE41-NEXT:    psrld $31, %xmm0
2166; CHECK-SSE41-NEXT:    retq
2167;
2168; CHECK-AVX1-LABEL: test_srem_even_allones_and_poweroftwo_and_one:
2169; CHECK-AVX1:       # %bb.0:
2170; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2171; CHECK-AVX1-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2172; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2173; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2174; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
2175; CHECK-AVX1-NEXT:    vpsrlq $32, %xmm0, %xmm0
2176; CHECK-AVX1-NEXT:    vpor %xmm0, %xmm1, %xmm0
2177; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
2178; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2179; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
2180; CHECK-AVX1-NEXT:    retq
2181;
2182; CHECK-AVX2-LABEL: test_srem_even_allones_and_poweroftwo_and_one:
2183; CHECK-AVX2:       # %bb.0:
2184; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2185; CHECK-AVX2-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2186; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
2187; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2188; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
2189; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
2190; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2191; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
2192; CHECK-AVX2-NEXT:    retq
2193;
2194; CHECK-AVX512VL-LABEL: test_srem_even_allones_and_poweroftwo_and_one:
2195; CHECK-AVX512VL:       # %bb.0:
2196; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2197; CHECK-AVX512VL-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2198; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2199; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
2200; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
2201; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
2202; CHECK-AVX512VL-NEXT:    retq
2203  %srem = srem <4 x i32> %X, <i32 14, i32 4294967295, i32 16, i32 1>
2204  %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
2205  %ret = zext <4 x i1> %cmp to <4 x i32>
2206  ret <4 x i32> %ret
2207}
2208
2209; PR51133: the VSELECT should have i1 element type
2210define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) {
2211; CHECK-SSE2-LABEL: pr51133:
2212; CHECK-SSE2:       # %bb.0:
2213; CHECK-SSE2-NEXT:    movq %rdi, %rax
2214; CHECK-SSE2-NEXT:    movdqa %xmm1, %xmm5
2215; CHECK-SSE2-NEXT:    punpckhbw {{.*#+}} xmm5 = xmm5[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
2216; CHECK-SSE2-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5 # [9,0,41,183,1,1,161,221]
2217; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
2218; CHECK-SSE2-NEXT:    pand %xmm4, %xmm5
2219; CHECK-SSE2-NEXT:    movdqa %xmm1, %xmm6
2220; CHECK-SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2221; CHECK-SSE2-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6 # [171,103,183,171,61,1,127,183]
2222; CHECK-SSE2-NEXT:    pand %xmm4, %xmm6
2223; CHECK-SSE2-NEXT:    packuswb %xmm5, %xmm6
2224; CHECK-SSE2-NEXT:    paddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6
2225; CHECK-SSE2-NEXT:    movdqa %xmm6, %xmm5
2226; CHECK-SSE2-NEXT:    punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm6[8],xmm5[9],xmm6[9],xmm5[10],xmm6[10],xmm5[11],xmm6[11],xmm5[12],xmm6[12],xmm5[13],xmm6[13],xmm5[14],xmm6[14],xmm5[15],xmm6[15]
2227; CHECK-SSE2-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5 # [128,1,128,1,128,32,1,1]
2228; CHECK-SSE2-NEXT:    psrlw $8, %xmm5
2229; CHECK-SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2230; CHECK-SSE2-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6 # [1,1,1,128,64,2,1,32]
2231; CHECK-SSE2-NEXT:    psrlw $8, %xmm6
2232; CHECK-SSE2-NEXT:    packuswb %xmm5, %xmm6
2233; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm7 = [84,2,36,42,2,1,2,4,2,255,4,36,127,31,2,2]
2234; CHECK-SSE2-NEXT:    pminub %xmm6, %xmm7
2235; CHECK-SSE2-NEXT:    pcmpeqb %xmm6, %xmm7
2236; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255]
2237; CHECK-SSE2-NEXT:    pandn %xmm5, %xmm7
2238; CHECK-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2239; CHECK-SSE2-NEXT:    pxor %xmm6, %xmm6
2240; CHECK-SSE2-NEXT:    pcmpgtb %xmm6, %xmm1
2241; CHECK-SSE2-NEXT:    pandn %xmm1, %xmm5
2242; CHECK-SSE2-NEXT:    por %xmm7, %xmm5
2243; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm1
2244; CHECK-SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
2245; CHECK-SSE2-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [223,223,205,183,161,1,171,239]
2246; CHECK-SSE2-NEXT:    pand %xmm4, %xmm1
2247; CHECK-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2248; CHECK-SSE2-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [197,205,27,241,1,1,1,163]
2249; CHECK-SSE2-NEXT:    pand %xmm4, %xmm0
2250; CHECK-SSE2-NEXT:    packuswb %xmm1, %xmm0
2251; CHECK-SSE2-NEXT:    paddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2252; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm1
2253; CHECK-SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
2254; CHECK-SSE2-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [128,128,1,1,1,128,1,64]
2255; CHECK-SSE2-NEXT:    psrlw $8, %xmm1
2256; CHECK-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2257; CHECK-SSE2-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,1,1,128,128,32,128,32]
2258; CHECK-SSE2-NEXT:    psrlw $8, %xmm0
2259; CHECK-SSE2-NEXT:    packuswb %xmm1, %xmm0
2260; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [19,51,13,7,128,32,128,3,5,5,51,37,3,128,85,5]
2261; CHECK-SSE2-NEXT:    pmaxub %xmm0, %xmm1
2262; CHECK-SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
2263; CHECK-SSE2-NEXT:    pcmpeqb %xmm6, %xmm3
2264; CHECK-SSE2-NEXT:    pandn %xmm5, %xmm3
2265; CHECK-SSE2-NEXT:    pcmpeqb %xmm6, %xmm2
2266; CHECK-SSE2-NEXT:    pandn %xmm1, %xmm2
2267; CHECK-SSE2-NEXT:    pmovmskb %xmm2, %ecx
2268; CHECK-SSE2-NEXT:    pmovmskb %xmm3, %edx
2269; CHECK-SSE2-NEXT:    shll $16, %edx
2270; CHECK-SSE2-NEXT:    orl %ecx, %edx
2271; CHECK-SSE2-NEXT:    movl %edx, (%rdi)
2272; CHECK-SSE2-NEXT:    retq
2273;
2274; CHECK-SSE41-LABEL: pr51133:
2275; CHECK-SSE41:       # %bb.0:
2276; CHECK-SSE41-NEXT:    movdqa %xmm0, %xmm4
2277; CHECK-SSE41-NEXT:    movq %rdi, %rax
2278; CHECK-SSE41-NEXT:    movdqa %xmm1, %xmm0
2279; CHECK-SSE41-NEXT:    pmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [171,0,183,0,61,0,127,0,9,0,41,0,1,0,161,0]
2280; CHECK-SSE41-NEXT:    pmovzxbw {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255]
2281; CHECK-SSE41-NEXT:    pand %xmm5, %xmm0
2282; CHECK-SSE41-NEXT:    movdqa %xmm1, %xmm6
2283; CHECK-SSE41-NEXT:    pmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6 # [0,103,0,171,0,1,0,183,0,0,0,183,0,1,0,221]
2284; CHECK-SSE41-NEXT:    psllw $8, %xmm6
2285; CHECK-SSE41-NEXT:    por %xmm0, %xmm6
2286; CHECK-SSE41-NEXT:    paddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6
2287; CHECK-SSE41-NEXT:    movdqa %xmm6, %xmm0
2288; CHECK-SSE41-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm6[8],xmm0[9],xmm6[9],xmm0[10],xmm6[10],xmm0[11],xmm6[11],xmm0[12],xmm6[12],xmm0[13],xmm6[13],xmm0[14],xmm6[14],xmm0[15],xmm6[15]
2289; CHECK-SSE41-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [128,1,128,1,128,32,1,1]
2290; CHECK-SSE41-NEXT:    psrlw $8, %xmm0
2291; CHECK-SSE41-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2292; CHECK-SSE41-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6 # [1,1,1,128,64,2,1,32]
2293; CHECK-SSE41-NEXT:    psrlw $8, %xmm6
2294; CHECK-SSE41-NEXT:    packuswb %xmm0, %xmm6
2295; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [84,2,36,42,2,1,2,4,2,255,4,36,127,31,2,2]
2296; CHECK-SSE41-NEXT:    pminub %xmm6, %xmm0
2297; CHECK-SSE41-NEXT:    pcmpeqb %xmm6, %xmm0
2298; CHECK-SSE41-NEXT:    pcmpeqd %xmm7, %xmm7
2299; CHECK-SSE41-NEXT:    pxor %xmm0, %xmm7
2300; CHECK-SSE41-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2301; CHECK-SSE41-NEXT:    pxor %xmm6, %xmm6
2302; CHECK-SSE41-NEXT:    pcmpgtb %xmm6, %xmm1
2303; CHECK-SSE41-NEXT:    movaps {{.*#+}} xmm0 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255]
2304; CHECK-SSE41-NEXT:    pblendvb %xmm0, %xmm7, %xmm1
2305; CHECK-SSE41-NEXT:    movdqa %xmm4, %xmm0
2306; CHECK-SSE41-NEXT:    pmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [197,0,27,0,1,0,1,0,223,0,205,0,161,0,171,0]
2307; CHECK-SSE41-NEXT:    pand %xmm5, %xmm0
2308; CHECK-SSE41-NEXT:    pmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 # [0,205,0,241,0,1,0,163,0,223,0,183,0,1,0,239]
2309; CHECK-SSE41-NEXT:    psllw $8, %xmm4
2310; CHECK-SSE41-NEXT:    por %xmm0, %xmm4
2311; CHECK-SSE41-NEXT:    paddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4
2312; CHECK-SSE41-NEXT:    movdqa %xmm4, %xmm0
2313; CHECK-SSE41-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm4[8],xmm0[9],xmm4[9],xmm0[10],xmm4[10],xmm0[11],xmm4[11],xmm0[12],xmm4[12],xmm0[13],xmm4[13],xmm0[14],xmm4[14],xmm0[15],xmm4[15]
2314; CHECK-SSE41-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [128,128,1,1,1,128,1,64]
2315; CHECK-SSE41-NEXT:    psrlw $8, %xmm0
2316; CHECK-SSE41-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2317; CHECK-SSE41-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 # [1,1,1,128,128,32,128,32]
2318; CHECK-SSE41-NEXT:    psrlw $8, %xmm4
2319; CHECK-SSE41-NEXT:    packuswb %xmm0, %xmm4
2320; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [19,51,13,7,128,32,128,3,5,5,51,37,3,128,85,5]
2321; CHECK-SSE41-NEXT:    pmaxub %xmm4, %xmm0
2322; CHECK-SSE41-NEXT:    pcmpeqb %xmm4, %xmm0
2323; CHECK-SSE41-NEXT:    pcmpeqb %xmm6, %xmm3
2324; CHECK-SSE41-NEXT:    pandn %xmm1, %xmm3
2325; CHECK-SSE41-NEXT:    pcmpeqb %xmm6, %xmm2
2326; CHECK-SSE41-NEXT:    pandn %xmm0, %xmm2
2327; CHECK-SSE41-NEXT:    pmovmskb %xmm2, %ecx
2328; CHECK-SSE41-NEXT:    pmovmskb %xmm3, %edx
2329; CHECK-SSE41-NEXT:    shll $16, %edx
2330; CHECK-SSE41-NEXT:    orl %ecx, %edx
2331; CHECK-SSE41-NEXT:    movl %edx, (%rdi)
2332; CHECK-SSE41-NEXT:    retq
2333;
2334; CHECK-AVX1-LABEL: pr51133:
2335; CHECK-AVX1:       # %bb.0:
2336; CHECK-AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
2337; CHECK-AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
2338; CHECK-AVX1-NEXT:    vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [34048,34048,26368,37632,21760,33024,22016,35072]
2339; CHECK-AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
2340; CHECK-AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm4 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2341; CHECK-AVX1-NEXT:    vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [20224,26368,6912,30976,33024,33024,33024,12032]
2342; CHECK-AVX1-NEXT:    vpsrlw $8, %xmm4, %xmm4
2343; CHECK-AVX1-NEXT:    vpackuswb %xmm3, %xmm4, %xmm4
2344; CHECK-AVX1-NEXT:    vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm5 # [0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0]
2345; CHECK-AVX1-NEXT:    vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
2346; CHECK-AVX1-NEXT:    vpand %xmm3, %xmm5, %xmm5
2347; CHECK-AVX1-NEXT:    vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm6 # [0,0,0,0,0,1,0,0,0,1,0,1,0,1,0,1]
2348; CHECK-AVX1-NEXT:    vpsllw $8, %xmm6, %xmm6
2349; CHECK-AVX1-NEXT:    vpor %xmm6, %xmm5, %xmm5
2350; CHECK-AVX1-NEXT:    vpaddb %xmm5, %xmm4, %xmm4
2351; CHECK-AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
2352; CHECK-AVX1-NEXT:    vpsraw $8, %xmm5, %xmm5
2353; CHECK-AVX1-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 # [8,8,128,64,8,256,256,8]
2354; CHECK-AVX1-NEXT:    vpsrlw $8, %xmm5, %xmm5
2355; CHECK-AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm6 = xmm4[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2356; CHECK-AVX1-NEXT:    vpsraw $8, %xmm6, %xmm6
2357; CHECK-AVX1-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6, %xmm6 # [64,128,128,16,256,64,256,16]
2358; CHECK-AVX1-NEXT:    vpsrlw $8, %xmm6, %xmm6
2359; CHECK-AVX1-NEXT:    vpackuswb %xmm5, %xmm6, %xmm6
2360; CHECK-AVX1-NEXT:    vpsrlw $7, %xmm4, %xmm4
2361; CHECK-AVX1-NEXT:    vbroadcastss {{.*#+}} xmm5 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
2362; CHECK-AVX1-NEXT:    vpand %xmm5, %xmm4, %xmm4
2363; CHECK-AVX1-NEXT:    vpaddb %xmm4, %xmm6, %xmm4
2364; CHECK-AVX1-NEXT:    vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm6 # [13,0,19,0,2,0,2,0,62,0,5,0,97,0,3,0]
2365; CHECK-AVX1-NEXT:    vpand %xmm3, %xmm6, %xmm6
2366; CHECK-AVX1-NEXT:    vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [0,5,0,34,0,8,0,88,0,62,0,7,0,2,0,60]
2367; CHECK-AVX1-NEXT:    vpsllw $8, %xmm4, %xmm4
2368; CHECK-AVX1-NEXT:    vpor %xmm4, %xmm6, %xmm4
2369; CHECK-AVX1-NEXT:    vpsubb %xmm4, %xmm0, %xmm4
2370; CHECK-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
2371; CHECK-AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm6 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
2372; CHECK-AVX1-NEXT:    vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6, %xmm6 # [2304,0,10496,37632,33024,33024,21760,36096]
2373; CHECK-AVX1-NEXT:    vpsrlw $8, %xmm6, %xmm6
2374; CHECK-AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm7 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2375; CHECK-AVX1-NEXT:    vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm7, %xmm7 # [22016,24320,37632,11008,12544,32512,16640,37632]
2376; CHECK-AVX1-NEXT:    vpsrlw $8, %xmm7, %xmm7
2377; CHECK-AVX1-NEXT:    vpackuswb %xmm6, %xmm7, %xmm6
2378; CHECK-AVX1-NEXT:    vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm7 # [0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0]
2379; CHECK-AVX1-NEXT:    vpand %xmm3, %xmm7, %xmm7
2380; CHECK-AVX1-NEXT:    vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm8 # [0,0,0,0,0,255,0,1,0,1,0,1,0,1,0,1]
2381; CHECK-AVX1-NEXT:    vpsllw $8, %xmm8, %xmm8
2382; CHECK-AVX1-NEXT:    vpor %xmm7, %xmm8, %xmm7
2383; CHECK-AVX1-NEXT:    vpaddb %xmm7, %xmm6, %xmm6
2384; CHECK-AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm7 = xmm6[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
2385; CHECK-AVX1-NEXT:    vpsraw $8, %xmm7, %xmm7
2386; CHECK-AVX1-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm7, %xmm7 # [64,256,32,64,256,64,8,4]
2387; CHECK-AVX1-NEXT:    vpsrlw $8, %xmm7, %xmm7
2388; CHECK-AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm8 = xmm6[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2389; CHECK-AVX1-NEXT:    vpsraw $8, %xmm8, %xmm8
2390; CHECK-AVX1-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm8, %xmm8 # [256,8,64,256,16,4,8,8]
2391; CHECK-AVX1-NEXT:    vpsrlw $8, %xmm8, %xmm8
2392; CHECK-AVX1-NEXT:    vpackuswb %xmm7, %xmm8, %xmm7
2393; CHECK-AVX1-NEXT:    vpsrlw $7, %xmm6, %xmm6
2394; CHECK-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6, %xmm6
2395; CHECK-AVX1-NEXT:    vpand %xmm5, %xmm6, %xmm5
2396; CHECK-AVX1-NEXT:    vpaddb %xmm5, %xmm7, %xmm5
2397; CHECK-AVX1-NEXT:    vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm6 # [3,0,7,0,84,0,127,0,114,0,50,0,2,0,97,0]
2398; CHECK-AVX1-NEXT:    vpand %xmm3, %xmm6, %xmm3
2399; CHECK-AVX1-NEXT:    vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 # [0,87,0,6,0,128,0,56,0,1,0,7,0,8,0,117]
2400; CHECK-AVX1-NEXT:    vpsllw $8, %xmm5, %xmm5
2401; CHECK-AVX1-NEXT:    vpor %xmm5, %xmm3, %xmm3
2402; CHECK-AVX1-NEXT:    vpsubb %xmm3, %xmm0, %xmm0
2403; CHECK-AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
2404; CHECK-AVX1-NEXT:    vpcmpeqb %xmm2, %xmm4, %xmm3
2405; CHECK-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
2406; CHECK-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
2407; CHECK-AVX1-NEXT:    vpcmpeqb %xmm2, %xmm3, %xmm3
2408; CHECK-AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm1
2409; CHECK-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
2410; CHECK-AVX1-NEXT:    vorps %ymm0, %ymm1, %ymm0
2411; CHECK-AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
2412; CHECK-AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
2413; CHECK-AVX1-NEXT:    vxorps %ymm1, %ymm0, %ymm0
2414; CHECK-AVX1-NEXT:    retq
2415;
2416; CHECK-AVX2-LABEL: pr51133:
2417; CHECK-AVX2:       # %bb.0:
2418; CHECK-AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
2419; CHECK-AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31]
2420; CHECK-AVX2-NEXT:    vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [34048,34048,26368,37632,21760,33024,22016,35072,2304,0,10496,37632,33024,33024,21760,36096]
2421; CHECK-AVX2-NEXT:    vpsrlw $8, %ymm3, %ymm3
2422; CHECK-AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm4 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23]
2423; CHECK-AVX2-NEXT:    vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [20224,26368,6912,30976,33024,33024,33024,12032,22016,24320,37632,11008,12544,32512,16640,37632]
2424; CHECK-AVX2-NEXT:    vpsrlw $8, %ymm4, %ymm4
2425; CHECK-AVX2-NEXT:    vpackuswb %ymm3, %ymm4, %ymm3
2426; CHECK-AVX2-NEXT:    vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm4 # [0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0]
2427; CHECK-AVX2-NEXT:    vpbroadcastw {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
2428; CHECK-AVX2-NEXT:    vpand %ymm5, %ymm4, %ymm4
2429; CHECK-AVX2-NEXT:    vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm6 # [0,0,0,0,0,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,255,0,1,0,1,0,1,0,1,0,1]
2430; CHECK-AVX2-NEXT:    vpsllw $8, %ymm6, %ymm6
2431; CHECK-AVX2-NEXT:    vpor %ymm6, %ymm4, %ymm4
2432; CHECK-AVX2-NEXT:    vpaddb %ymm4, %ymm3, %ymm3
2433; CHECK-AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
2434; CHECK-AVX2-NEXT:    vpsraw $8, %ymm4, %ymm4
2435; CHECK-AVX2-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [8,8,128,64,8,256,256,8,64,256,32,64,256,64,8,4]
2436; CHECK-AVX2-NEXT:    vpsrlw $8, %ymm4, %ymm4
2437; CHECK-AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm6 = ymm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
2438; CHECK-AVX2-NEXT:    vpsraw $8, %ymm6, %ymm6
2439; CHECK-AVX2-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm6, %ymm6 # [64,128,128,16,256,64,256,16,256,8,64,256,16,4,8,8]
2440; CHECK-AVX2-NEXT:    vpsrlw $8, %ymm6, %ymm6
2441; CHECK-AVX2-NEXT:    vpackuswb %ymm4, %ymm6, %ymm4
2442; CHECK-AVX2-NEXT:    vpsrlw $7, %ymm3, %ymm3
2443; CHECK-AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
2444; CHECK-AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
2445; CHECK-AVX2-NEXT:    vpaddb %ymm3, %ymm4, %ymm3
2446; CHECK-AVX2-NEXT:    vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm4 # [13,0,19,0,2,0,2,0,62,0,5,0,97,0,3,0,3,0,7,0,84,0,127,0,114,0,50,0,2,0,97,0]
2447; CHECK-AVX2-NEXT:    vpand %ymm5, %ymm4, %ymm4
2448; CHECK-AVX2-NEXT:    vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [0,5,0,34,0,8,0,88,0,62,0,7,0,2,0,60,0,87,0,6,0,128,0,56,0,1,0,7,0,8,0,117]
2449; CHECK-AVX2-NEXT:    vpsllw $8, %ymm3, %ymm3
2450; CHECK-AVX2-NEXT:    vpor %ymm3, %ymm4, %ymm3
2451; CHECK-AVX2-NEXT:    vpsubb %ymm3, %ymm0, %ymm0
2452; CHECK-AVX2-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
2453; CHECK-AVX2-NEXT:    vpcmpeqb %ymm2, %ymm1, %ymm1
2454; CHECK-AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
2455; CHECK-AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
2456; CHECK-AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
2457; CHECK-AVX2-NEXT:    retq
2458;
2459; CHECK-AVX512VL-LABEL: pr51133:
2460; CHECK-AVX512VL:       # %bb.0:
2461; CHECK-AVX512VL-NEXT:    vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2 # [197,0,27,0,1,0,1,0,223,0,205,0,161,0,171,0,171,0,183,0,61,0,127,0,9,0,41,0,1,0,161,0]
2462; CHECK-AVX512VL-NEXT:    vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm3 # [0,205,0,241,0,1,0,163,0,223,0,183,0,1,0,239,0,103,0,171,0,1,0,183,0,0,0,183,0,1,0,221]
2463; CHECK-AVX512VL-NEXT:    vpsllw $8, %ymm3, %ymm3
2464; CHECK-AVX512VL-NEXT:    vpternlogd {{.*#+}} ymm3 = ymm3 | (ymm2 & mem)
2465; CHECK-AVX512VL-NEXT:    vpaddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm2
2466; CHECK-AVX512VL-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
2467; CHECK-AVX512VL-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [128,128,1,1,1,128,1,64,128,1,128,1,128,32,1,1]
2468; CHECK-AVX512VL-NEXT:    vpsrlw $8, %ymm3, %ymm3
2469; CHECK-AVX512VL-NEXT:    vpunpcklbw {{.*#+}} ymm2 = ymm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
2470; CHECK-AVX512VL-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [1,1,1,128,128,32,128,32,1,1,1,128,64,2,1,32]
2471; CHECK-AVX512VL-NEXT:    vpsrlw $8, %ymm2, %ymm2
2472; CHECK-AVX512VL-NEXT:    vpackuswb %ymm3, %ymm2, %ymm2
2473; CHECK-AVX512VL-NEXT:    vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3
2474; CHECK-AVX512VL-NEXT:    vpcmpeqb %ymm3, %ymm2, %ymm2
2475; CHECK-AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255]
2476; CHECK-AVX512VL-NEXT:    vpandn %ymm3, %ymm2, %ymm2
2477; CHECK-AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
2478; CHECK-AVX512VL-NEXT:    vpxor %xmm4, %xmm4, %xmm4
2479; CHECK-AVX512VL-NEXT:    vpcmpgtb %ymm4, %ymm0, %ymm0
2480; CHECK-AVX512VL-NEXT:    vpandn %ymm0, %ymm3, %ymm3
2481; CHECK-AVX512VL-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm0
2482; CHECK-AVX512VL-NEXT:    vpternlogq {{.*#+}} ymm0 = ~ymm0 & (ymm2 | ymm3)
2483; CHECK-AVX512VL-NEXT:    retq
2484  %rem = srem <32 x i8> %x, <i8 13, i8 5, i8 19, i8 34, i8 2, i8 8, i8 2, i8 88, i8 62, i8 62, i8 5, i8 7, i8 97, i8 2, i8 3, i8 60, i8 3, i8 87, i8 7, i8 6, i8 84, i8 -128, i8 127, i8 56, i8 114, i8 1, i8 50, i8 7, i8 2, i8 8, i8 97, i8 117>
2485  %cmp = icmp ne <32 x i8> %rem, zeroinitializer
2486  %cmp4 = icmp ne <32 x i8> %y, zeroinitializer
2487  %cmpres = and <32 x i1> %cmp4, %cmp
2488  ret <32 x i1> %cmpres
2489}
2490