xref: /llvm-project/llvm/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll (revision 69ffa7be3bda5547d7a41233f86b88539616e386)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 < %s | FileCheck %s --check-prefix=CHECK-SSE2
3; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 < %s | FileCheck %s --check-prefix=CHECK-SSE41
4; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX1
5; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX2
6; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl < %s | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX512VL
7
8; Odd+Even divisors
9define <4 x i32> @test_urem_odd_even(<4 x i32> %X) nounwind {
10; CHECK-SSE2-LABEL: test_urem_odd_even:
11; CHECK-SSE2:       # %bb.0:
12; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
13; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
14; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
15; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3]
18; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
19; CHECK-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20; CHECK-SSE2-NEXT:    por %xmm0, %xmm1
21; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24; CHECK-SSE2-NEXT:    movdqa %xmm1, %xmm0
25; CHECK-SSE2-NEXT:    retq
26;
27; CHECK-SSE41-LABEL: test_urem_odd_even:
28; CHECK-SSE41:       # %bb.0:
29; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
30; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
31; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32; CHECK-SSE41-NEXT:    pxor %xmm2, %xmm2
33; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
34; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
35; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
36; CHECK-SSE41-NEXT:    por %xmm2, %xmm0
37; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993459,306783378,171798691,42949672]
38; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
39; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
40; CHECK-SSE41-NEXT:    psrld $31, %xmm0
41; CHECK-SSE41-NEXT:    retq
42;
43; CHECK-AVX1-LABEL: test_urem_odd_even:
44; CHECK-AVX1:       # %bb.0:
45; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
46; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
47; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
48; CHECK-AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
49; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
50; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
51; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
52; CHECK-AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
53; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
54; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
55; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
56; CHECK-AVX1-NEXT:    retq
57;
58; CHECK-AVX2-LABEL: test_urem_odd_even:
59; CHECK-AVX2:       # %bb.0:
60; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
61; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
62; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
63; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
64; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
65; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
66; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
67; CHECK-AVX2-NEXT:    retq
68;
69; CHECK-AVX512VL-LABEL: test_urem_odd_even:
70; CHECK-AVX512VL:       # %bb.0:
71; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
72; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
73; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
74; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
75; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
76; CHECK-AVX512VL-NEXT:    retq
77  %urem = urem <4 x i32> %X, <i32 5, i32 14, i32 25, i32 100>
78  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
79  %ret = zext <4 x i1> %cmp to <4 x i32>
80  ret <4 x i32> %ret
81}
82
83;==============================================================================;
84
85; One all-ones divisor in odd divisor
86define <4 x i32> @test_urem_odd_allones_eq(<4 x i32> %X) nounwind {
87; CHECK-SSE2-LABEL: test_urem_odd_allones_eq:
88; CHECK-SSE2:       # %bb.0:
89; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
90; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
91; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
92; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
93; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
94; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
95; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
96; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
97; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
98; CHECK-SSE2-NEXT:    retq
99;
100; CHECK-SSE41-LABEL: test_urem_odd_allones_eq:
101; CHECK-SSE41:       # %bb.0:
102; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
103; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993459,858993459,1,858993459]
104; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
105; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
106; CHECK-SSE41-NEXT:    psrld $31, %xmm0
107; CHECK-SSE41-NEXT:    retq
108;
109; CHECK-AVX-LABEL: test_urem_odd_allones_eq:
110; CHECK-AVX:       # %bb.0:
111; CHECK-AVX-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
112; CHECK-AVX-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
113; CHECK-AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
114; CHECK-AVX-NEXT:    vpsrld $31, %xmm0, %xmm0
115; CHECK-AVX-NEXT:    retq
116  %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 4294967295, i32 5>
117  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
118  %ret = zext <4 x i1> %cmp to <4 x i32>
119  ret <4 x i32> %ret
120}
121define <4 x i32> @test_urem_odd_allones_ne(<4 x i32> %X) nounwind {
122; CHECK-SSE2-LABEL: test_urem_odd_allones_ne:
123; CHECK-SSE2:       # %bb.0:
124; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
125; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
126; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
127; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
128; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
129; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
130; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
131; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
132; CHECK-SSE2-NEXT:    psrld $31, %xmm0
133; CHECK-SSE2-NEXT:    retq
134;
135; CHECK-SSE41-LABEL: test_urem_odd_allones_ne:
136; CHECK-SSE41:       # %bb.0:
137; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
138; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993460,858993460,2,858993460]
139; CHECK-SSE41-NEXT:    pmaxud %xmm0, %xmm1
140; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
141; CHECK-SSE41-NEXT:    psrld $31, %xmm0
142; CHECK-SSE41-NEXT:    retq
143;
144; CHECK-AVX-LABEL: test_urem_odd_allones_ne:
145; CHECK-AVX:       # %bb.0:
146; CHECK-AVX-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
147; CHECK-AVX-NEXT:    vpmaxud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
148; CHECK-AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
149; CHECK-AVX-NEXT:    vpsrld $31, %xmm0, %xmm0
150; CHECK-AVX-NEXT:    retq
151  %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 4294967295, i32 5>
152  %cmp = icmp ne <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
153  %ret = zext <4 x i1> %cmp to <4 x i32>
154  ret <4 x i32> %ret
155}
156
157; One all-ones divisor in even divisor
158define <4 x i32> @test_urem_even_allones_eq(<4 x i32> %X) nounwind {
159; CHECK-SSE2-LABEL: test_urem_even_allones_eq:
160; CHECK-SSE2:       # %bb.0:
161; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
162; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
163; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
164; CHECK-SSE2-NEXT:    pmuludq %xmm2, %xmm1
165; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
166; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
167; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
168; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3]
169; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
170; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
171; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
172; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
173; CHECK-SSE2-NEXT:    por %xmm4, %xmm0
174; CHECK-SSE2-NEXT:    pxor %xmm2, %xmm0
175; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
176; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
177; CHECK-SSE2-NEXT:    retq
178;
179; CHECK-SSE41-LABEL: test_urem_even_allones_eq:
180; CHECK-SSE41:       # %bb.0:
181; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
182; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
183; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
184; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
185; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
186; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
187; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
188; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
189; CHECK-SSE41-NEXT:    por %xmm2, %xmm0
190; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [306783378,306783378,1,306783378]
191; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
192; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
193; CHECK-SSE41-NEXT:    psrld $31, %xmm0
194; CHECK-SSE41-NEXT:    retq
195;
196; CHECK-AVX1-LABEL: test_urem_even_allones_eq:
197; CHECK-AVX1:       # %bb.0:
198; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
199; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
200; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
201; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
202; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
203; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
204; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
205; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
206; CHECK-AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
207; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
208; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
209; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
210; CHECK-AVX1-NEXT:    retq
211;
212; CHECK-AVX2-LABEL: test_urem_even_allones_eq:
213; CHECK-AVX2:       # %bb.0:
214; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
215; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
216; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
217; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
218; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
219; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
220; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
221; CHECK-AVX2-NEXT:    retq
222;
223; CHECK-AVX512VL-LABEL: test_urem_even_allones_eq:
224; CHECK-AVX512VL:       # %bb.0:
225; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
226; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
227; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
228; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
229; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
230; CHECK-AVX512VL-NEXT:    retq
231  %urem = urem <4 x i32> %X, <i32 14, i32 14, i32 4294967295, i32 14>
232  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
233  %ret = zext <4 x i1> %cmp to <4 x i32>
234  ret <4 x i32> %ret
235}
236define <4 x i32> @test_urem_even_allones_ne(<4 x i32> %X) nounwind {
237; CHECK-SSE2-LABEL: test_urem_even_allones_ne:
238; CHECK-SSE2:       # %bb.0:
239; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
240; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
241; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
242; CHECK-SSE2-NEXT:    pmuludq %xmm2, %xmm1
243; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
244; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
245; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
246; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3]
247; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
248; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
249; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
250; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
251; CHECK-SSE2-NEXT:    por %xmm4, %xmm0
252; CHECK-SSE2-NEXT:    pxor %xmm2, %xmm0
253; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
254; CHECK-SSE2-NEXT:    psrld $31, %xmm0
255; CHECK-SSE2-NEXT:    retq
256;
257; CHECK-SSE41-LABEL: test_urem_even_allones_ne:
258; CHECK-SSE41:       # %bb.0:
259; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
260; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
261; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
262; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
263; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
264; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
265; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
266; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
267; CHECK-SSE41-NEXT:    por %xmm2, %xmm0
268; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [306783379,306783379,2,306783379]
269; CHECK-SSE41-NEXT:    pmaxud %xmm0, %xmm1
270; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
271; CHECK-SSE41-NEXT:    psrld $31, %xmm0
272; CHECK-SSE41-NEXT:    retq
273;
274; CHECK-AVX1-LABEL: test_urem_even_allones_ne:
275; CHECK-AVX1:       # %bb.0:
276; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
277; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
278; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
279; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
280; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
281; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
282; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
283; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
284; CHECK-AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
285; CHECK-AVX1-NEXT:    vpmaxud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
286; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
287; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
288; CHECK-AVX1-NEXT:    retq
289;
290; CHECK-AVX2-LABEL: test_urem_even_allones_ne:
291; CHECK-AVX2:       # %bb.0:
292; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
293; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
294; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
295; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
296; CHECK-AVX2-NEXT:    vpmaxud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
297; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
298; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
299; CHECK-AVX2-NEXT:    retq
300;
301; CHECK-AVX512VL-LABEL: test_urem_even_allones_ne:
302; CHECK-AVX512VL:       # %bb.0:
303; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
304; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
305; CHECK-AVX512VL-NEXT:    vpmaxud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
306; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
307; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
308; CHECK-AVX512VL-NEXT:    retq
309  %urem = urem <4 x i32> %X, <i32 14, i32 14, i32 4294967295, i32 14>
310  %cmp = icmp ne <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
311  %ret = zext <4 x i1> %cmp to <4 x i32>
312  ret <4 x i32> %ret
313}
314
315; One all-ones divisor in odd+even divisor
316define <4 x i32> @test_urem_odd_even_allones_eq(<4 x i32> %X) nounwind {
317; CHECK-SSE2-LABEL: test_urem_odd_even_allones_eq:
318; CHECK-SSE2:       # %bb.0:
319; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
320; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
321; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
322; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
323; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
324; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3]
325; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
326; CHECK-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
327; CHECK-SSE2-NEXT:    por %xmm0, %xmm1
328; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
329; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
330; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
331; CHECK-SSE2-NEXT:    movdqa %xmm1, %xmm0
332; CHECK-SSE2-NEXT:    retq
333;
334; CHECK-SSE41-LABEL: test_urem_odd_even_allones_eq:
335; CHECK-SSE41:       # %bb.0:
336; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
337; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
338; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
339; CHECK-SSE41-NEXT:    pxor %xmm2, %xmm2
340; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
341; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
342; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
343; CHECK-SSE41-NEXT:    por %xmm2, %xmm0
344; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993459,306783378,1,42949672]
345; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
346; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
347; CHECK-SSE41-NEXT:    psrld $31, %xmm0
348; CHECK-SSE41-NEXT:    retq
349;
350; CHECK-AVX1-LABEL: test_urem_odd_even_allones_eq:
351; CHECK-AVX1:       # %bb.0:
352; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
353; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
354; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
355; CHECK-AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
356; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
357; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
358; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
359; CHECK-AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
360; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
361; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
362; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
363; CHECK-AVX1-NEXT:    retq
364;
365; CHECK-AVX2-LABEL: test_urem_odd_even_allones_eq:
366; CHECK-AVX2:       # %bb.0:
367; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
368; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
369; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
370; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
371; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
372; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
373; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
374; CHECK-AVX2-NEXT:    retq
375;
376; CHECK-AVX512VL-LABEL: test_urem_odd_even_allones_eq:
377; CHECK-AVX512VL:       # %bb.0:
378; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
379; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
380; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
381; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
382; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
383; CHECK-AVX512VL-NEXT:    retq
384  %urem = urem <4 x i32> %X, <i32 5, i32 14, i32 4294967295, i32 100>
385  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
386  %ret = zext <4 x i1> %cmp to <4 x i32>
387  ret <4 x i32> %ret
388}
389define <4 x i32> @test_urem_odd_even_allones_ne(<4 x i32> %X) nounwind {
390; CHECK-SSE2-LABEL: test_urem_odd_even_allones_ne:
391; CHECK-SSE2:       # %bb.0:
392; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
393; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
394; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
395; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
396; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
397; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3]
398; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
399; CHECK-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
400; CHECK-SSE2-NEXT:    por %xmm0, %xmm1
401; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
402; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
403; CHECK-SSE2-NEXT:    psrld $31, %xmm1
404; CHECK-SSE2-NEXT:    movdqa %xmm1, %xmm0
405; CHECK-SSE2-NEXT:    retq
406;
407; CHECK-SSE41-LABEL: test_urem_odd_even_allones_ne:
408; CHECK-SSE41:       # %bb.0:
409; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
410; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
411; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
412; CHECK-SSE41-NEXT:    pxor %xmm2, %xmm2
413; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
414; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
415; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
416; CHECK-SSE41-NEXT:    por %xmm2, %xmm0
417; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993460,306783379,2,42949673]
418; CHECK-SSE41-NEXT:    pmaxud %xmm0, %xmm1
419; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
420; CHECK-SSE41-NEXT:    psrld $31, %xmm0
421; CHECK-SSE41-NEXT:    retq
422;
423; CHECK-AVX1-LABEL: test_urem_odd_even_allones_ne:
424; CHECK-AVX1:       # %bb.0:
425; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
426; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
427; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
428; CHECK-AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
429; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
430; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
431; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
432; CHECK-AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
433; CHECK-AVX1-NEXT:    vpmaxud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
434; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
435; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
436; CHECK-AVX1-NEXT:    retq
437;
438; CHECK-AVX2-LABEL: test_urem_odd_even_allones_ne:
439; CHECK-AVX2:       # %bb.0:
440; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
441; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
442; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
443; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
444; CHECK-AVX2-NEXT:    vpmaxud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
445; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
446; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
447; CHECK-AVX2-NEXT:    retq
448;
449; CHECK-AVX512VL-LABEL: test_urem_odd_even_allones_ne:
450; CHECK-AVX512VL:       # %bb.0:
451; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
452; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
453; CHECK-AVX512VL-NEXT:    vpmaxud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
454; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
455; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
456; CHECK-AVX512VL-NEXT:    retq
457  %urem = urem <4 x i32> %X, <i32 5, i32 14, i32 4294967295, i32 100>
458  %cmp = icmp ne <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
459  %ret = zext <4 x i1> %cmp to <4 x i32>
460  ret <4 x i32> %ret
461}
462
463;------------------------------------------------------------------------------;
464
465; One power-of-two divisor in odd divisor
466define <4 x i32> @test_urem_odd_poweroftwo(<4 x i32> %X) nounwind {
467; CHECK-SSE2-LABEL: test_urem_odd_poweroftwo:
468; CHECK-SSE2:       # %bb.0:
469; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
470; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
471; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
472; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
473; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
474; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
475; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
476; CHECK-SSE2-NEXT:    psrlq $32, %xmm0
477; CHECK-SSE2-NEXT:    por %xmm2, %xmm0
478; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
479; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
480; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
481; CHECK-SSE2-NEXT:    retq
482;
483; CHECK-SSE41-LABEL: test_urem_odd_poweroftwo:
484; CHECK-SSE41:       # %bb.0:
485; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
486; CHECK-SSE41-NEXT:    pmovsxdq {{.*#+}} xmm1 = [1,268435456]
487; CHECK-SSE41-NEXT:    pmuludq %xmm0, %xmm1
488; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
489; CHECK-SSE41-NEXT:    psrlq $32, %xmm1
490; CHECK-SSE41-NEXT:    por %xmm1, %xmm0
491; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993459,858993459,268435455,858993459]
492; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
493; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
494; CHECK-SSE41-NEXT:    psrld $31, %xmm0
495; CHECK-SSE41-NEXT:    retq
496;
497; CHECK-AVX1-LABEL: test_urem_odd_poweroftwo:
498; CHECK-AVX1:       # %bb.0:
499; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
500; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
501; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
502; CHECK-AVX1-NEXT:    vpsrlq $32, %xmm1, %xmm1
503; CHECK-AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
504; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
505; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
506; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
507; CHECK-AVX1-NEXT:    retq
508;
509; CHECK-AVX2-LABEL: test_urem_odd_poweroftwo:
510; CHECK-AVX2:       # %bb.0:
511; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
512; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
513; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
514; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
515; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
516; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
517; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
518; CHECK-AVX2-NEXT:    retq
519;
520; CHECK-AVX512VL-LABEL: test_urem_odd_poweroftwo:
521; CHECK-AVX512VL:       # %bb.0:
522; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
523; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
524; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
525; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
526; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
527; CHECK-AVX512VL-NEXT:    retq
528  %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 16, i32 5>
529  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
530  %ret = zext <4 x i1> %cmp to <4 x i32>
531  ret <4 x i32> %ret
532}
533
534; One power-of-two divisor in even divisor
535define <4 x i32> @test_urem_even_poweroftwo(<4 x i32> %X) nounwind {
536; CHECK-SSE2-LABEL: test_urem_even_poweroftwo:
537; CHECK-SSE2:       # %bb.0:
538; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
539; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
540; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
541; CHECK-SSE2-NEXT:    pmuludq %xmm2, %xmm1
542; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
543; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
544; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
545; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3]
546; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
547; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
548; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
549; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
550; CHECK-SSE2-NEXT:    por %xmm4, %xmm0
551; CHECK-SSE2-NEXT:    pxor %xmm2, %xmm0
552; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
553; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
554; CHECK-SSE2-NEXT:    retq
555;
556; CHECK-SSE41-LABEL: test_urem_even_poweroftwo:
557; CHECK-SSE41:       # %bb.0:
558; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
559; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
560; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
561; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
562; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
563; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
564; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
565; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
566; CHECK-SSE41-NEXT:    por %xmm2, %xmm0
567; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [306783378,306783378,268435455,306783378]
568; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
569; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
570; CHECK-SSE41-NEXT:    psrld $31, %xmm0
571; CHECK-SSE41-NEXT:    retq
572;
573; CHECK-AVX1-LABEL: test_urem_even_poweroftwo:
574; CHECK-AVX1:       # %bb.0:
575; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
576; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
577; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
578; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
579; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
580; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
581; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
582; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
583; CHECK-AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
584; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
585; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
586; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
587; CHECK-AVX1-NEXT:    retq
588;
589; CHECK-AVX2-LABEL: test_urem_even_poweroftwo:
590; CHECK-AVX2:       # %bb.0:
591; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
592; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
593; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
594; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
595; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
596; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
597; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
598; CHECK-AVX2-NEXT:    retq
599;
600; CHECK-AVX512VL-LABEL: test_urem_even_poweroftwo:
601; CHECK-AVX512VL:       # %bb.0:
602; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
603; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
604; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
605; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
606; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
607; CHECK-AVX512VL-NEXT:    retq
608  %urem = urem <4 x i32> %X, <i32 14, i32 14, i32 16, i32 14>
609  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
610  %ret = zext <4 x i1> %cmp to <4 x i32>
611  ret <4 x i32> %ret
612}
613
614; One power-of-two divisor in odd+even divisor
615define <4 x i32> @test_urem_odd_even_poweroftwo(<4 x i32> %X) nounwind {
616; CHECK-SSE2-LABEL: test_urem_odd_even_poweroftwo:
617; CHECK-SSE2:       # %bb.0:
618; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
619; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
620; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
621; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
622; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
623; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
624; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
625; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
626; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
627; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
628; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
629; CHECK-SSE2-NEXT:    por %xmm2, %xmm0
630; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
631; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
632; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
633; CHECK-SSE2-NEXT:    retq
634;
635; CHECK-SSE41-LABEL: test_urem_odd_even_poweroftwo:
636; CHECK-SSE41:       # %bb.0:
637; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
638; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
639; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
640; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
641; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
642; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
643; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
644; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
645; CHECK-SSE41-NEXT:    por %xmm2, %xmm0
646; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993459,306783378,268435455,42949672]
647; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
648; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
649; CHECK-SSE41-NEXT:    psrld $31, %xmm0
650; CHECK-SSE41-NEXT:    retq
651;
652; CHECK-AVX1-LABEL: test_urem_odd_even_poweroftwo:
653; CHECK-AVX1:       # %bb.0:
654; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
655; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
656; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
657; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
658; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
659; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
660; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
661; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
662; CHECK-AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
663; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
664; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
665; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
666; CHECK-AVX1-NEXT:    retq
667;
668; CHECK-AVX2-LABEL: test_urem_odd_even_poweroftwo:
669; CHECK-AVX2:       # %bb.0:
670; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
671; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
672; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
673; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
674; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
675; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
676; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
677; CHECK-AVX2-NEXT:    retq
678;
679; CHECK-AVX512VL-LABEL: test_urem_odd_even_poweroftwo:
680; CHECK-AVX512VL:       # %bb.0:
681; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
682; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
683; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
684; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
685; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
686; CHECK-AVX512VL-NEXT:    retq
687  %urem = urem <4 x i32> %X, <i32 5, i32 14, i32 16, i32 100>
688  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
689  %ret = zext <4 x i1> %cmp to <4 x i32>
690  ret <4 x i32> %ret
691}
692
693;------------------------------------------------------------------------------;
694
695; One one divisor in odd divisor
696define <4 x i32> @test_urem_odd_one(<4 x i32> %X) nounwind {
697; CHECK-SSE2-LABEL: test_urem_odd_one:
698; CHECK-SSE2:       # %bb.0:
699; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
700; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
701; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm0
702; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
703; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
704; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
705; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
706; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
707; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
708; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
709; CHECK-SSE2-NEXT:    retq
710;
711; CHECK-SSE41-LABEL: test_urem_odd_one:
712; CHECK-SSE41:       # %bb.0:
713; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
714; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993459,858993459,4294967295,858993459]
715; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
716; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
717; CHECK-SSE41-NEXT:    psrld $31, %xmm0
718; CHECK-SSE41-NEXT:    retq
719;
720; CHECK-AVX1-LABEL: test_urem_odd_one:
721; CHECK-AVX1:       # %bb.0:
722; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
723; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
724; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
725; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
726; CHECK-AVX1-NEXT:    retq
727;
728; CHECK-AVX2-LABEL: test_urem_odd_one:
729; CHECK-AVX2:       # %bb.0:
730; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
731; CHECK-AVX2-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
732; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
733; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
734; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
735; CHECK-AVX2-NEXT:    retq
736;
737; CHECK-AVX512VL-LABEL: test_urem_odd_one:
738; CHECK-AVX512VL:       # %bb.0:
739; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
740; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
741; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
742; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
743; CHECK-AVX512VL-NEXT:    retq
744  %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 1, i32 5>
745  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
746  %ret = zext <4 x i1> %cmp to <4 x i32>
747  ret <4 x i32> %ret
748}
749
750; One one divisor in even divisor
751define <4 x i32> @test_urem_even_one(<4 x i32> %X) nounwind {
752; CHECK-SSE2-LABEL: test_urem_even_one:
753; CHECK-SSE2:       # %bb.0:
754; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3067833783,3067833783,3067833783,3067833783]
755; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
756; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm0
757; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
758; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
759; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
760; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
761; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm1
762; CHECK-SSE2-NEXT:    psrld $1, %xmm1
763; CHECK-SSE2-NEXT:    pslld $31, %xmm0
764; CHECK-SSE2-NEXT:    por %xmm1, %xmm0
765; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
766; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
767; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
768; CHECK-SSE2-NEXT:    retq
769;
770; CHECK-SSE41-LABEL: test_urem_even_one:
771; CHECK-SSE41:       # %bb.0:
772; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
773; CHECK-SSE41-NEXT:    movdqa %xmm0, %xmm1
774; CHECK-SSE41-NEXT:    psrld $1, %xmm1
775; CHECK-SSE41-NEXT:    pslld $31, %xmm0
776; CHECK-SSE41-NEXT:    por %xmm1, %xmm0
777; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [306783378,306783378,4294967295,306783378]
778; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
779; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
780; CHECK-SSE41-NEXT:    psrld $31, %xmm0
781; CHECK-SSE41-NEXT:    retq
782;
783; CHECK-AVX1-LABEL: test_urem_even_one:
784; CHECK-AVX1:       # %bb.0:
785; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
786; CHECK-AVX1-NEXT:    vpsrld $1, %xmm0, %xmm1
787; CHECK-AVX1-NEXT:    vpslld $31, %xmm0, %xmm0
788; CHECK-AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
789; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
790; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
791; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
792; CHECK-AVX1-NEXT:    retq
793;
794; CHECK-AVX2-LABEL: test_urem_even_one:
795; CHECK-AVX2:       # %bb.0:
796; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [3067833783,3067833783,3067833783,3067833783]
797; CHECK-AVX2-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
798; CHECK-AVX2-NEXT:    vpsrld $1, %xmm0, %xmm1
799; CHECK-AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
800; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
801; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
802; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
803; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
804; CHECK-AVX2-NEXT:    retq
805;
806; CHECK-AVX512VL-LABEL: test_urem_even_one:
807; CHECK-AVX512VL:       # %bb.0:
808; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
809; CHECK-AVX512VL-NEXT:    vprord $1, %xmm0, %xmm0
810; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
811; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
812; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
813; CHECK-AVX512VL-NEXT:    retq
814  %urem = urem <4 x i32> %X, <i32 14, i32 14, i32 1, i32 14>
815  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
816  %ret = zext <4 x i1> %cmp to <4 x i32>
817  ret <4 x i32> %ret
818}
819
820; One one divisor in odd+even divisor
821define <4 x i32> @test_urem_odd_even_one(<4 x i32> %X) nounwind {
822; CHECK-SSE2-LABEL: test_urem_odd_even_one:
823; CHECK-SSE2:       # %bb.0:
824; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
825; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
826; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
827; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
828; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
829; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3]
830; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
831; CHECK-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
832; CHECK-SSE2-NEXT:    por %xmm0, %xmm1
833; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
834; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
835; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
836; CHECK-SSE2-NEXT:    movdqa %xmm1, %xmm0
837; CHECK-SSE2-NEXT:    retq
838;
839; CHECK-SSE41-LABEL: test_urem_odd_even_one:
840; CHECK-SSE41:       # %bb.0:
841; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
842; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
843; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
844; CHECK-SSE41-NEXT:    pxor %xmm2, %xmm2
845; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
846; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
847; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
848; CHECK-SSE41-NEXT:    por %xmm2, %xmm0
849; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993459,306783378,4294967295,42949672]
850; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
851; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
852; CHECK-SSE41-NEXT:    psrld $31, %xmm0
853; CHECK-SSE41-NEXT:    retq
854;
855; CHECK-AVX1-LABEL: test_urem_odd_even_one:
856; CHECK-AVX1:       # %bb.0:
857; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
858; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
859; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
860; CHECK-AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
861; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
862; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
863; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
864; CHECK-AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
865; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
866; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
867; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
868; CHECK-AVX1-NEXT:    retq
869;
870; CHECK-AVX2-LABEL: test_urem_odd_even_one:
871; CHECK-AVX2:       # %bb.0:
872; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
873; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
874; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
875; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
876; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
877; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
878; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
879; CHECK-AVX2-NEXT:    retq
880;
881; CHECK-AVX512VL-LABEL: test_urem_odd_even_one:
882; CHECK-AVX512VL:       # %bb.0:
883; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
884; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
885; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
886; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
887; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
888; CHECK-AVX512VL-NEXT:    retq
889  %urem = urem <4 x i32> %X, <i32 5, i32 14, i32 1, i32 100>
890  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
891  %ret = zext <4 x i1> %cmp to <4 x i32>
892  ret <4 x i32> %ret
893}
894
895;------------------------------------------------------------------------------;
896
897; One INT_MIN divisor in odd divisor
898define <4 x i32> @test_urem_odd_INT_MIN(<4 x i32> %X) nounwind {
899; CHECK-SSE2-LABEL: test_urem_odd_INT_MIN:
900; CHECK-SSE2:       # %bb.0:
901; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
902; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
903; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
904; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
905; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
906; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
907; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
908; CHECK-SSE2-NEXT:    psrlq $32, %xmm0
909; CHECK-SSE2-NEXT:    por %xmm2, %xmm0
910; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
911; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
912; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
913; CHECK-SSE2-NEXT:    retq
914;
915; CHECK-SSE41-LABEL: test_urem_odd_INT_MIN:
916; CHECK-SSE41:       # %bb.0:
917; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
918; CHECK-SSE41-NEXT:    pmovsxbq {{.*#+}} xmm1 = [1,2]
919; CHECK-SSE41-NEXT:    pmuludq %xmm0, %xmm1
920; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
921; CHECK-SSE41-NEXT:    psrlq $32, %xmm1
922; CHECK-SSE41-NEXT:    por %xmm1, %xmm0
923; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993459,858993459,1,858993459]
924; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
925; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
926; CHECK-SSE41-NEXT:    psrld $31, %xmm0
927; CHECK-SSE41-NEXT:    retq
928;
929; CHECK-AVX1-LABEL: test_urem_odd_INT_MIN:
930; CHECK-AVX1:       # %bb.0:
931; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
932; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
933; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
934; CHECK-AVX1-NEXT:    vpsrlq $32, %xmm1, %xmm1
935; CHECK-AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
936; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
937; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
938; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
939; CHECK-AVX1-NEXT:    retq
940;
941; CHECK-AVX2-LABEL: test_urem_odd_INT_MIN:
942; CHECK-AVX2:       # %bb.0:
943; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
944; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
945; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
946; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
947; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
948; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
949; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
950; CHECK-AVX2-NEXT:    retq
951;
952; CHECK-AVX512VL-LABEL: test_urem_odd_INT_MIN:
953; CHECK-AVX512VL:       # %bb.0:
954; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
955; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
956; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
957; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
958; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
959; CHECK-AVX512VL-NEXT:    retq
960  %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 2147483648, i32 5>
961  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
962  %ret = zext <4 x i1> %cmp to <4 x i32>
963  ret <4 x i32> %ret
964}
965
966; One INT_MIN divisor in even divisor
967define <4 x i32> @test_urem_even_INT_MIN(<4 x i32> %X) nounwind {
968; CHECK-SSE2-LABEL: test_urem_even_INT_MIN:
969; CHECK-SSE2:       # %bb.0:
970; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
971; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
972; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
973; CHECK-SSE2-NEXT:    pmuludq %xmm2, %xmm1
974; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
975; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
976; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
977; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3]
978; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
979; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
980; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
981; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
982; CHECK-SSE2-NEXT:    por %xmm4, %xmm0
983; CHECK-SSE2-NEXT:    pxor %xmm2, %xmm0
984; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
985; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
986; CHECK-SSE2-NEXT:    retq
987;
988; CHECK-SSE41-LABEL: test_urem_even_INT_MIN:
989; CHECK-SSE41:       # %bb.0:
990; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
991; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
992; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
993; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
994; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
995; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
996; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
997; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
998; CHECK-SSE41-NEXT:    por %xmm2, %xmm0
999; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [306783378,306783378,1,306783378]
1000; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
1001; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1002; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1003; CHECK-SSE41-NEXT:    retq
1004;
1005; CHECK-AVX1-LABEL: test_urem_even_INT_MIN:
1006; CHECK-AVX1:       # %bb.0:
1007; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1008; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1009; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1010; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1011; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1012; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1013; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
1014; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
1015; CHECK-AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
1016; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1017; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1018; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1019; CHECK-AVX1-NEXT:    retq
1020;
1021; CHECK-AVX2-LABEL: test_urem_even_INT_MIN:
1022; CHECK-AVX2:       # %bb.0:
1023; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1024; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1025; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1026; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1027; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1028; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1029; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1030; CHECK-AVX2-NEXT:    retq
1031;
1032; CHECK-AVX512VL-LABEL: test_urem_even_INT_MIN:
1033; CHECK-AVX512VL:       # %bb.0:
1034; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1035; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1036; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1037; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1038; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1039; CHECK-AVX512VL-NEXT:    retq
1040  %urem = urem <4 x i32> %X, <i32 14, i32 14, i32 2147483648, i32 14>
1041  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
1042  %ret = zext <4 x i1> %cmp to <4 x i32>
1043  ret <4 x i32> %ret
1044}
1045
1046; One INT_MIN divisor in odd+even divisor
1047define <4 x i32> @test_urem_odd_even_INT_MIN(<4 x i32> %X) nounwind {
1048; CHECK-SSE2-LABEL: test_urem_odd_even_INT_MIN:
1049; CHECK-SSE2:       # %bb.0:
1050; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1051; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1052; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1053; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
1054; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1055; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1056; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
1057; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
1058; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1059; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1060; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1061; CHECK-SSE2-NEXT:    por %xmm2, %xmm0
1062; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1063; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1064; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1065; CHECK-SSE2-NEXT:    retq
1066;
1067; CHECK-SSE41-LABEL: test_urem_odd_even_INT_MIN:
1068; CHECK-SSE41:       # %bb.0:
1069; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1070; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1071; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1072; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1073; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1074; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1075; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
1076; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
1077; CHECK-SSE41-NEXT:    por %xmm2, %xmm0
1078; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993459,306783378,1,42949672]
1079; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
1080; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1081; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1082; CHECK-SSE41-NEXT:    retq
1083;
1084; CHECK-AVX1-LABEL: test_urem_odd_even_INT_MIN:
1085; CHECK-AVX1:       # %bb.0:
1086; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1087; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1088; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1089; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1090; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1091; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1092; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
1093; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
1094; CHECK-AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
1095; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1096; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1097; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1098; CHECK-AVX1-NEXT:    retq
1099;
1100; CHECK-AVX2-LABEL: test_urem_odd_even_INT_MIN:
1101; CHECK-AVX2:       # %bb.0:
1102; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1103; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1104; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1105; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1106; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1107; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1108; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1109; CHECK-AVX2-NEXT:    retq
1110;
1111; CHECK-AVX512VL-LABEL: test_urem_odd_even_INT_MIN:
1112; CHECK-AVX512VL:       # %bb.0:
1113; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1114; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1115; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1116; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1117; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1118; CHECK-AVX512VL-NEXT:    retq
1119  %urem = urem <4 x i32> %X, <i32 5, i32 14, i32 2147483648, i32 100>
1120  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
1121  %ret = zext <4 x i1> %cmp to <4 x i32>
1122  ret <4 x i32> %ret
1123}
1124
1125;==============================================================================;
1126
1127; One all-ones divisor and power-of-two divisor divisor in odd divisor
1128define <4 x i32> @test_urem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind {
1129; CHECK-SSE2-LABEL: test_urem_odd_allones_and_poweroftwo:
1130; CHECK-SSE2:       # %bb.0:
1131; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1132; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1133; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1134; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1135; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1136; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
1137; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
1138; CHECK-SSE2-NEXT:    psrlq $32, %xmm0
1139; CHECK-SSE2-NEXT:    por %xmm2, %xmm0
1140; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1141; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1142; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1143; CHECK-SSE2-NEXT:    retq
1144;
1145; CHECK-SSE41-LABEL: test_urem_odd_allones_and_poweroftwo:
1146; CHECK-SSE41:       # %bb.0:
1147; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1148; CHECK-SSE41-NEXT:    pmovsxdq {{.*#+}} xmm1 = [1,268435456]
1149; CHECK-SSE41-NEXT:    pmuludq %xmm0, %xmm1
1150; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
1151; CHECK-SSE41-NEXT:    psrlq $32, %xmm1
1152; CHECK-SSE41-NEXT:    por %xmm1, %xmm0
1153; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993459,1,268435455,858993459]
1154; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
1155; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1156; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1157; CHECK-SSE41-NEXT:    retq
1158;
1159; CHECK-AVX1-LABEL: test_urem_odd_allones_and_poweroftwo:
1160; CHECK-AVX1:       # %bb.0:
1161; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1162; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1163; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
1164; CHECK-AVX1-NEXT:    vpsrlq $32, %xmm1, %xmm1
1165; CHECK-AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
1166; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1167; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1168; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1169; CHECK-AVX1-NEXT:    retq
1170;
1171; CHECK-AVX2-LABEL: test_urem_odd_allones_and_poweroftwo:
1172; CHECK-AVX2:       # %bb.0:
1173; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1174; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1175; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1176; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1177; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1178; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1179; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1180; CHECK-AVX2-NEXT:    retq
1181;
1182; CHECK-AVX512VL-LABEL: test_urem_odd_allones_and_poweroftwo:
1183; CHECK-AVX512VL:       # %bb.0:
1184; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1185; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1186; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1187; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1188; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1189; CHECK-AVX512VL-NEXT:    retq
1190  %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 5>
1191  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
1192  %ret = zext <4 x i1> %cmp to <4 x i32>
1193  ret <4 x i32> %ret
1194}
1195
1196; One all-ones divisor and power-of-two divisor divisor in even divisor
1197define <4 x i32> @test_urem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
1198; CHECK-SSE2-LABEL: test_urem_even_allones_and_poweroftwo:
1199; CHECK-SSE2:       # %bb.0:
1200; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1201; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1202; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1203; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
1204; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1205; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1206; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
1207; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
1208; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1209; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1210; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1211; CHECK-SSE2-NEXT:    por %xmm2, %xmm0
1212; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1213; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1214; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1215; CHECK-SSE2-NEXT:    retq
1216;
1217; CHECK-SSE41-LABEL: test_urem_even_allones_and_poweroftwo:
1218; CHECK-SSE41:       # %bb.0:
1219; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1220; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1221; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1222; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1223; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1224; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1225; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
1226; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
1227; CHECK-SSE41-NEXT:    por %xmm2, %xmm0
1228; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [306783378,1,268435455,306783378]
1229; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
1230; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1231; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1232; CHECK-SSE41-NEXT:    retq
1233;
1234; CHECK-AVX1-LABEL: test_urem_even_allones_and_poweroftwo:
1235; CHECK-AVX1:       # %bb.0:
1236; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1237; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1238; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1239; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1240; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1241; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1242; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
1243; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
1244; CHECK-AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
1245; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1246; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1247; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1248; CHECK-AVX1-NEXT:    retq
1249;
1250; CHECK-AVX2-LABEL: test_urem_even_allones_and_poweroftwo:
1251; CHECK-AVX2:       # %bb.0:
1252; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1253; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1254; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1255; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1256; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1257; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1258; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1259; CHECK-AVX2-NEXT:    retq
1260;
1261; CHECK-AVX512VL-LABEL: test_urem_even_allones_and_poweroftwo:
1262; CHECK-AVX512VL:       # %bb.0:
1263; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1264; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1265; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1266; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1267; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1268; CHECK-AVX512VL-NEXT:    retq
1269  %urem = urem <4 x i32> %X, <i32 14, i32 4294967295, i32 16, i32 14>
1270  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
1271  %ret = zext <4 x i1> %cmp to <4 x i32>
1272  ret <4 x i32> %ret
1273}
1274
1275; One all-ones divisor and power-of-two divisor divisor in odd+even divisor
1276define <4 x i32> @test_urem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
1277; CHECK-SSE2-LABEL: test_urem_odd_even_allones_and_poweroftwo:
1278; CHECK-SSE2:       # %bb.0:
1279; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1280; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1281; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1282; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
1283; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1284; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1285; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
1286; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
1287; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1288; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1289; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1290; CHECK-SSE2-NEXT:    por %xmm2, %xmm0
1291; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1292; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1293; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1294; CHECK-SSE2-NEXT:    retq
1295;
1296; CHECK-SSE41-LABEL: test_urem_odd_even_allones_and_poweroftwo:
1297; CHECK-SSE41:       # %bb.0:
1298; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1299; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1300; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1301; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1302; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1303; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1304; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
1305; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
1306; CHECK-SSE41-NEXT:    por %xmm2, %xmm0
1307; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993459,1,268435455,42949672]
1308; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
1309; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1310; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1311; CHECK-SSE41-NEXT:    retq
1312;
1313; CHECK-AVX1-LABEL: test_urem_odd_even_allones_and_poweroftwo:
1314; CHECK-AVX1:       # %bb.0:
1315; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1316; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1317; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1318; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1319; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1320; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1321; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
1322; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
1323; CHECK-AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
1324; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1325; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1326; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1327; CHECK-AVX1-NEXT:    retq
1328;
1329; CHECK-AVX2-LABEL: test_urem_odd_even_allones_and_poweroftwo:
1330; CHECK-AVX2:       # %bb.0:
1331; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1332; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1333; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1334; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1335; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1336; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1337; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1338; CHECK-AVX2-NEXT:    retq
1339;
1340; CHECK-AVX512VL-LABEL: test_urem_odd_even_allones_and_poweroftwo:
1341; CHECK-AVX512VL:       # %bb.0:
1342; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1343; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1344; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1345; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1346; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1347; CHECK-AVX512VL-NEXT:    retq
1348  %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 100>
1349  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
1350  %ret = zext <4 x i1> %cmp to <4 x i32>
1351  ret <4 x i32> %ret
1352}
1353
1354;------------------------------------------------------------------------------;
1355
1356; One all-ones divisor and one one divisor in odd divisor
1357define <4 x i32> @test_urem_odd_allones_and_one(<4 x i32> %X) nounwind {
1358; CHECK-SSE2-LABEL: test_urem_odd_allones_and_one:
1359; CHECK-SSE2:       # %bb.0:
1360; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1361; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1362; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1363; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1364; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1365; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1366; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1367; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1368; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1369; CHECK-SSE2-NEXT:    retq
1370;
1371; CHECK-SSE41-LABEL: test_urem_odd_allones_and_one:
1372; CHECK-SSE41:       # %bb.0:
1373; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1374; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993459,1,4294967295,858993459]
1375; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
1376; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1377; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1378; CHECK-SSE41-NEXT:    retq
1379;
1380; CHECK-AVX-LABEL: test_urem_odd_allones_and_one:
1381; CHECK-AVX:       # %bb.0:
1382; CHECK-AVX-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1383; CHECK-AVX-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1384; CHECK-AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1385; CHECK-AVX-NEXT:    vpsrld $31, %xmm0, %xmm0
1386; CHECK-AVX-NEXT:    retq
1387  %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 1, i32 5>
1388  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
1389  %ret = zext <4 x i1> %cmp to <4 x i32>
1390  ret <4 x i32> %ret
1391}
1392
1393; One all-ones divisor and one one divisor in even divisor
1394define <4 x i32> @test_urem_even_allones_and_one(<4 x i32> %X) nounwind {
1395; CHECK-SSE2-LABEL: test_urem_even_allones_and_one:
1396; CHECK-SSE2:       # %bb.0:
1397; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1398; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1399; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1400; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
1401; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1402; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1403; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
1404; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
1405; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1406; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1407; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1408; CHECK-SSE2-NEXT:    por %xmm2, %xmm0
1409; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1410; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1411; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1412; CHECK-SSE2-NEXT:    retq
1413;
1414; CHECK-SSE41-LABEL: test_urem_even_allones_and_one:
1415; CHECK-SSE41:       # %bb.0:
1416; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1417; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1418; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1419; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1420; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1421; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1422; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
1423; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
1424; CHECK-SSE41-NEXT:    por %xmm2, %xmm0
1425; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [306783378,1,4294967295,306783378]
1426; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
1427; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1428; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1429; CHECK-SSE41-NEXT:    retq
1430;
1431; CHECK-AVX1-LABEL: test_urem_even_allones_and_one:
1432; CHECK-AVX1:       # %bb.0:
1433; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1434; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1435; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1436; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1437; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1438; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1439; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
1440; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
1441; CHECK-AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
1442; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1443; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1444; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1445; CHECK-AVX1-NEXT:    retq
1446;
1447; CHECK-AVX2-LABEL: test_urem_even_allones_and_one:
1448; CHECK-AVX2:       # %bb.0:
1449; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1450; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1451; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1452; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1453; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1454; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1455; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1456; CHECK-AVX2-NEXT:    retq
1457;
1458; CHECK-AVX512VL-LABEL: test_urem_even_allones_and_one:
1459; CHECK-AVX512VL:       # %bb.0:
1460; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1461; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1462; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1463; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1464; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1465; CHECK-AVX512VL-NEXT:    retq
1466  %urem = urem <4 x i32> %X, <i32 14, i32 4294967295, i32 1, i32 14>
1467  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
1468  %ret = zext <4 x i1> %cmp to <4 x i32>
1469  ret <4 x i32> %ret
1470}
1471
1472; One all-ones divisor and one one divisor in odd+even divisor
1473define <4 x i32> @test_urem_odd_even_allones_and_one(<4 x i32> %X) nounwind {
1474; CHECK-SSE2-LABEL: test_urem_odd_even_allones_and_one:
1475; CHECK-SSE2:       # %bb.0:
1476; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1477; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1478; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1479; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1480; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1481; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3]
1482; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1483; CHECK-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1484; CHECK-SSE2-NEXT:    por %xmm0, %xmm1
1485; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1486; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1487; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1488; CHECK-SSE2-NEXT:    movdqa %xmm1, %xmm0
1489; CHECK-SSE2-NEXT:    retq
1490;
1491; CHECK-SSE41-LABEL: test_urem_odd_even_allones_and_one:
1492; CHECK-SSE41:       # %bb.0:
1493; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1494; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1495; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1496; CHECK-SSE41-NEXT:    pxor %xmm2, %xmm2
1497; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1498; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
1499; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
1500; CHECK-SSE41-NEXT:    por %xmm2, %xmm0
1501; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993459,1,4294967295,42949672]
1502; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
1503; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1504; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1505; CHECK-SSE41-NEXT:    retq
1506;
1507; CHECK-AVX1-LABEL: test_urem_odd_even_allones_and_one:
1508; CHECK-AVX1:       # %bb.0:
1509; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1510; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1511; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1512; CHECK-AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1513; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1514; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
1515; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
1516; CHECK-AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
1517; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1518; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1519; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1520; CHECK-AVX1-NEXT:    retq
1521;
1522; CHECK-AVX2-LABEL: test_urem_odd_even_allones_and_one:
1523; CHECK-AVX2:       # %bb.0:
1524; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1525; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1526; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1527; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1528; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1529; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1530; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1531; CHECK-AVX2-NEXT:    retq
1532;
1533; CHECK-AVX512VL-LABEL: test_urem_odd_even_allones_and_one:
1534; CHECK-AVX512VL:       # %bb.0:
1535; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1536; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1537; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1538; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1539; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1540; CHECK-AVX512VL-NEXT:    retq
1541  %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 1, i32 100>
1542  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
1543  %ret = zext <4 x i1> %cmp to <4 x i32>
1544  ret <4 x i32> %ret
1545}
1546
1547;------------------------------------------------------------------------------;
1548
1549; One power-of-two divisor divisor and one divisor in odd divisor
1550define <4 x i32> @test_urem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind {
1551; CHECK-SSE2-LABEL: test_urem_odd_poweroftwo_and_one:
1552; CHECK-SSE2:       # %bb.0:
1553; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1554; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1555; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1556; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1557; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1558; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3]
1559; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1560; CHECK-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1561; CHECK-SSE2-NEXT:    por %xmm0, %xmm1
1562; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1563; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1564; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1565; CHECK-SSE2-NEXT:    movdqa %xmm1, %xmm0
1566; CHECK-SSE2-NEXT:    retq
1567;
1568; CHECK-SSE41-LABEL: test_urem_odd_poweroftwo_and_one:
1569; CHECK-SSE41:       # %bb.0:
1570; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1571; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1572; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1573; CHECK-SSE41-NEXT:    pxor %xmm2, %xmm2
1574; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1575; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
1576; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
1577; CHECK-SSE41-NEXT:    por %xmm2, %xmm0
1578; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993459,268435455,4294967295,858993459]
1579; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
1580; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1581; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1582; CHECK-SSE41-NEXT:    retq
1583;
1584; CHECK-AVX1-LABEL: test_urem_odd_poweroftwo_and_one:
1585; CHECK-AVX1:       # %bb.0:
1586; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1587; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1588; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1589; CHECK-AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1590; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1591; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
1592; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
1593; CHECK-AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
1594; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1595; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1596; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1597; CHECK-AVX1-NEXT:    retq
1598;
1599; CHECK-AVX2-LABEL: test_urem_odd_poweroftwo_and_one:
1600; CHECK-AVX2:       # %bb.0:
1601; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1602; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1603; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1604; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1605; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1606; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1607; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1608; CHECK-AVX2-NEXT:    retq
1609;
1610; CHECK-AVX512VL-LABEL: test_urem_odd_poweroftwo_and_one:
1611; CHECK-AVX512VL:       # %bb.0:
1612; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1613; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1614; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1615; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1616; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1617; CHECK-AVX512VL-NEXT:    retq
1618  %urem = urem <4 x i32> %X, <i32 5, i32 16, i32 1, i32 5>
1619  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
1620  %ret = zext <4 x i1> %cmp to <4 x i32>
1621  ret <4 x i32> %ret
1622}
1623
1624; One power-of-two divisor divisor and one divisor in even divisor
1625define <4 x i32> @test_urem_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
1626; CHECK-SSE2-LABEL: test_urem_even_poweroftwo_and_one:
1627; CHECK-SSE2:       # %bb.0:
1628; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1629; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1630; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1631; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
1632; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1633; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1634; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
1635; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
1636; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1637; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1638; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1639; CHECK-SSE2-NEXT:    por %xmm2, %xmm0
1640; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1641; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1642; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1643; CHECK-SSE2-NEXT:    retq
1644;
1645; CHECK-SSE41-LABEL: test_urem_even_poweroftwo_and_one:
1646; CHECK-SSE41:       # %bb.0:
1647; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1648; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1649; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1650; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1651; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1652; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1653; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
1654; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
1655; CHECK-SSE41-NEXT:    por %xmm2, %xmm0
1656; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [306783378,268435455,4294967295,306783378]
1657; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
1658; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1659; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1660; CHECK-SSE41-NEXT:    retq
1661;
1662; CHECK-AVX1-LABEL: test_urem_even_poweroftwo_and_one:
1663; CHECK-AVX1:       # %bb.0:
1664; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1665; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1666; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1667; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1668; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1669; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1670; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
1671; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
1672; CHECK-AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
1673; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1674; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1675; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1676; CHECK-AVX1-NEXT:    retq
1677;
1678; CHECK-AVX2-LABEL: test_urem_even_poweroftwo_and_one:
1679; CHECK-AVX2:       # %bb.0:
1680; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1681; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1682; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1683; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1684; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1685; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1686; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1687; CHECK-AVX2-NEXT:    retq
1688;
1689; CHECK-AVX512VL-LABEL: test_urem_even_poweroftwo_and_one:
1690; CHECK-AVX512VL:       # %bb.0:
1691; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1692; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1693; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1694; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1695; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1696; CHECK-AVX512VL-NEXT:    retq
1697  %urem = urem <4 x i32> %X, <i32 14, i32 16, i32 1, i32 14>
1698  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
1699  %ret = zext <4 x i1> %cmp to <4 x i32>
1700  ret <4 x i32> %ret
1701}
1702
1703; One power-of-two divisor divisor and one divisor in odd+even divisor
1704define <4 x i32> @test_urem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
1705; CHECK-SSE2-LABEL: test_urem_odd_even_poweroftwo_and_one:
1706; CHECK-SSE2:       # %bb.0:
1707; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1708; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1709; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1710; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1711; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1712; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3]
1713; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1714; CHECK-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1715; CHECK-SSE2-NEXT:    por %xmm0, %xmm1
1716; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1717; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1718; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1719; CHECK-SSE2-NEXT:    movdqa %xmm1, %xmm0
1720; CHECK-SSE2-NEXT:    retq
1721;
1722; CHECK-SSE41-LABEL: test_urem_odd_even_poweroftwo_and_one:
1723; CHECK-SSE41:       # %bb.0:
1724; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1725; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1726; CHECK-SSE41-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1727; CHECK-SSE41-NEXT:    pxor %xmm2, %xmm2
1728; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1729; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
1730; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
1731; CHECK-SSE41-NEXT:    por %xmm2, %xmm0
1732; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993459,268435455,4294967295,42949672]
1733; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
1734; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1735; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1736; CHECK-SSE41-NEXT:    retq
1737;
1738; CHECK-AVX1-LABEL: test_urem_odd_even_poweroftwo_and_one:
1739; CHECK-AVX1:       # %bb.0:
1740; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1741; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1742; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1743; CHECK-AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1744; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1745; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
1746; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
1747; CHECK-AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
1748; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1749; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1750; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1751; CHECK-AVX1-NEXT:    retq
1752;
1753; CHECK-AVX2-LABEL: test_urem_odd_even_poweroftwo_and_one:
1754; CHECK-AVX2:       # %bb.0:
1755; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1756; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1757; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1758; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1759; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1760; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1761; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1762; CHECK-AVX2-NEXT:    retq
1763;
1764; CHECK-AVX512VL-LABEL: test_urem_odd_even_poweroftwo_and_one:
1765; CHECK-AVX512VL:       # %bb.0:
1766; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1767; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1768; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1769; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1770; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1771; CHECK-AVX512VL-NEXT:    retq
1772  %urem = urem <4 x i32> %X, <i32 5, i32 16, i32 1, i32 100>
1773  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
1774  %ret = zext <4 x i1> %cmp to <4 x i32>
1775  ret <4 x i32> %ret
1776}
1777
1778;------------------------------------------------------------------------------;
1779
1780define <4 x i32> @test_urem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind {
1781; CHECK-SSE2-LABEL: test_urem_odd_allones_and_poweroftwo_and_one:
1782; CHECK-SSE2:       # %bb.0:
1783; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1784; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1785; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1786; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1787; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1788; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
1789; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
1790; CHECK-SSE2-NEXT:    psrlq $32, %xmm0
1791; CHECK-SSE2-NEXT:    por %xmm2, %xmm0
1792; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1793; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1794; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1795; CHECK-SSE2-NEXT:    retq
1796;
1797; CHECK-SSE41-LABEL: test_urem_odd_allones_and_poweroftwo_and_one:
1798; CHECK-SSE41:       # %bb.0:
1799; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1800; CHECK-SSE41-NEXT:    pmovsxdq {{.*#+}} xmm1 = [1,268435456]
1801; CHECK-SSE41-NEXT:    pmuludq %xmm0, %xmm1
1802; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
1803; CHECK-SSE41-NEXT:    psrlq $32, %xmm1
1804; CHECK-SSE41-NEXT:    por %xmm1, %xmm0
1805; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [858993459,1,268435455,4294967295]
1806; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
1807; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1808; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1809; CHECK-SSE41-NEXT:    retq
1810;
1811; CHECK-AVX1-LABEL: test_urem_odd_allones_and_poweroftwo_and_one:
1812; CHECK-AVX1:       # %bb.0:
1813; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1814; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1815; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
1816; CHECK-AVX1-NEXT:    vpsrlq $32, %xmm1, %xmm1
1817; CHECK-AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
1818; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1819; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1820; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1821; CHECK-AVX1-NEXT:    retq
1822;
1823; CHECK-AVX2-LABEL: test_urem_odd_allones_and_poweroftwo_and_one:
1824; CHECK-AVX2:       # %bb.0:
1825; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1826; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1827; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1828; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1829; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1830; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1831; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1832; CHECK-AVX2-NEXT:    retq
1833;
1834; CHECK-AVX512VL-LABEL: test_urem_odd_allones_and_poweroftwo_and_one:
1835; CHECK-AVX512VL:       # %bb.0:
1836; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1837; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1838; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1839; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1840; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1841; CHECK-AVX512VL-NEXT:    retq
1842  %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 1>
1843  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
1844  %ret = zext <4 x i1> %cmp to <4 x i32>
1845  ret <4 x i32> %ret
1846}
1847
1848define <4 x i32> @test_urem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind {
1849; CHECK-SSE2-LABEL: test_urem_even_allones_and_poweroftwo_and_one:
1850; CHECK-SSE2:       # %bb.0:
1851; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1852; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1853; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1854; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1855; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1856; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
1857; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
1858; CHECK-SSE2-NEXT:    psrlq $32, %xmm0
1859; CHECK-SSE2-NEXT:    por %xmm2, %xmm0
1860; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1861; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1862; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1863; CHECK-SSE2-NEXT:    retq
1864;
1865; CHECK-SSE41-LABEL: test_urem_even_allones_and_poweroftwo_and_one:
1866; CHECK-SSE41:       # %bb.0:
1867; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1868; CHECK-SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = [2147483648,268435456]
1869; CHECK-SSE41-NEXT:    pmuludq %xmm0, %xmm1
1870; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
1871; CHECK-SSE41-NEXT:    psrlq $32, %xmm1
1872; CHECK-SSE41-NEXT:    por %xmm1, %xmm0
1873; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [306783378,1,268435455,4294967295]
1874; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
1875; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1876; CHECK-SSE41-NEXT:    psrld $31, %xmm0
1877; CHECK-SSE41-NEXT:    retq
1878;
1879; CHECK-AVX1-LABEL: test_urem_even_allones_and_poweroftwo_and_one:
1880; CHECK-AVX1:       # %bb.0:
1881; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1882; CHECK-AVX1-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1883; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
1884; CHECK-AVX1-NEXT:    vpsrlq $32, %xmm1, %xmm1
1885; CHECK-AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
1886; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1887; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1888; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
1889; CHECK-AVX1-NEXT:    retq
1890;
1891; CHECK-AVX2-LABEL: test_urem_even_allones_and_poweroftwo_and_one:
1892; CHECK-AVX2:       # %bb.0:
1893; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1894; CHECK-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1895; CHECK-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1896; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1897; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1898; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1899; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
1900; CHECK-AVX2-NEXT:    retq
1901;
1902; CHECK-AVX512VL-LABEL: test_urem_even_allones_and_poweroftwo_and_one:
1903; CHECK-AVX512VL:       # %bb.0:
1904; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1905; CHECK-AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1906; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1907; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1908; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
1909; CHECK-AVX512VL-NEXT:    retq
1910  %urem = urem <4 x i32> %X, <i32 14, i32 4294967295, i32 16, i32 1>
1911  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
1912  %ret = zext <4 x i1> %cmp to <4 x i32>
1913  ret <4 x i32> %ret
1914}
1915