xref: /llvm-project/llvm/test/CodeGen/X86/urem-seteq-vec-splat.ll (revision b5d35feacb7246573c6a4ab2bddc4919a4228ed5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 < %s | FileCheck %s --check-prefixes=CHECK-SSE,CHECK-SSE2
3; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 < %s | FileCheck %s --check-prefixes=CHECK-SSE,CHECK-SSE41
4; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX1
5; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX2
6; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl < %s | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX512VL
7
8; Odd divisor
9define <4 x i32> @test_urem_odd_25(<4 x i32> %X) nounwind {
10; CHECK-SSE2-LABEL: test_urem_odd_25:
11; CHECK-SSE2:       # %bb.0:
12; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3264175145,3264175145,3264175145,3264175145]
13; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
14; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm0
15; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
16; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
17; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
18; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
19; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
20; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
21; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
22; CHECK-SSE2-NEXT:    retq
23;
24; CHECK-SSE41-LABEL: test_urem_odd_25:
25; CHECK-SSE41:       # %bb.0:
26; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
27; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [171798691,171798691,171798691,171798691]
28; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
29; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
30; CHECK-SSE41-NEXT:    psrld $31, %xmm0
31; CHECK-SSE41-NEXT:    retq
32;
33; CHECK-AVX1-LABEL: test_urem_odd_25:
34; CHECK-AVX1:       # %bb.0:
35; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
36; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
37; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
38; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
39; CHECK-AVX1-NEXT:    retq
40;
41; CHECK-AVX2-LABEL: test_urem_odd_25:
42; CHECK-AVX2:       # %bb.0:
43; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [3264175145,3264175145,3264175145,3264175145]
44; CHECK-AVX2-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
45; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [171798691,171798691,171798691,171798691]
46; CHECK-AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm1
47; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
48; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
49; CHECK-AVX2-NEXT:    retq
50;
51; CHECK-AVX512VL-LABEL: test_urem_odd_25:
52; CHECK-AVX512VL:       # %bb.0:
53; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
54; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm1
55; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
56; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
57; CHECK-AVX512VL-NEXT:    retq
58  %urem = urem <4 x i32> %X, <i32 25, i32 25, i32 25, i32 25>
59  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
60  %ret = zext <4 x i1> %cmp to <4 x i32>
61  ret <4 x i32> %ret
62}
63
64; Even divisors
65define <4 x i32> @test_urem_even_100(<4 x i32> %X) nounwind {
66; CHECK-SSE2-LABEL: test_urem_even_100:
67; CHECK-SSE2:       # %bb.0:
68; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3264175145,3264175145,3264175145,3264175145]
69; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
70; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm0
71; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
72; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
73; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
74; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
75; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm1
76; CHECK-SSE2-NEXT:    psrld $2, %xmm1
77; CHECK-SSE2-NEXT:    pslld $30, %xmm0
78; CHECK-SSE2-NEXT:    por %xmm1, %xmm0
79; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
80; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
81; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
82; CHECK-SSE2-NEXT:    retq
83;
84; CHECK-SSE41-LABEL: test_urem_even_100:
85; CHECK-SSE41:       # %bb.0:
86; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
87; CHECK-SSE41-NEXT:    movdqa %xmm0, %xmm1
88; CHECK-SSE41-NEXT:    psrld $2, %xmm1
89; CHECK-SSE41-NEXT:    pslld $30, %xmm0
90; CHECK-SSE41-NEXT:    por %xmm1, %xmm0
91; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [42949672,42949672,42949672,42949672]
92; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
93; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
94; CHECK-SSE41-NEXT:    psrld $31, %xmm0
95; CHECK-SSE41-NEXT:    retq
96;
97; CHECK-AVX1-LABEL: test_urem_even_100:
98; CHECK-AVX1:       # %bb.0:
99; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
100; CHECK-AVX1-NEXT:    vpsrld $2, %xmm0, %xmm1
101; CHECK-AVX1-NEXT:    vpslld $30, %xmm0, %xmm0
102; CHECK-AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
103; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
104; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
105; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
106; CHECK-AVX1-NEXT:    retq
107;
108; CHECK-AVX2-LABEL: test_urem_even_100:
109; CHECK-AVX2:       # %bb.0:
110; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [3264175145,3264175145,3264175145,3264175145]
111; CHECK-AVX2-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
112; CHECK-AVX2-NEXT:    vpsrld $2, %xmm0, %xmm1
113; CHECK-AVX2-NEXT:    vpslld $30, %xmm0, %xmm0
114; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
115; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [42949672,42949672,42949672,42949672]
116; CHECK-AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm1
117; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
118; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
119; CHECK-AVX2-NEXT:    retq
120;
121; CHECK-AVX512VL-LABEL: test_urem_even_100:
122; CHECK-AVX512VL:       # %bb.0:
123; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
124; CHECK-AVX512VL-NEXT:    vprord $2, %xmm0, %xmm0
125; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm1
126; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
127; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
128; CHECK-AVX512VL-NEXT:    retq
129  %urem = urem <4 x i32> %X, <i32 100, i32 100, i32 100, i32 100>
130  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
131  %ret = zext <4 x i1> %cmp to <4 x i32>
132  ret <4 x i32> %ret
133}
134
135; Negative divisors should be negated, and thus this is still splat vectors.
136
137; Odd divisor
138define <4 x i32> @test_urem_odd_neg25(<4 x i32> %X) nounwind {
139; CHECK-SSE2-LABEL: test_urem_odd_neg25:
140; CHECK-SSE2:       # %bb.0:
141; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
142; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
143; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
144; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
145; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
146; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
147; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
148; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
149; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
150; CHECK-SSE2-NEXT:    retq
151;
152; CHECK-SSE41-LABEL: test_urem_odd_neg25:
153; CHECK-SSE41:       # %bb.0:
154; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
155; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [171798691,1,1,171798691]
156; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
157; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
158; CHECK-SSE41-NEXT:    psrld $31, %xmm0
159; CHECK-SSE41-NEXT:    retq
160;
161; CHECK-AVX-LABEL: test_urem_odd_neg25:
162; CHECK-AVX:       # %bb.0:
163; CHECK-AVX-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
164; CHECK-AVX-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
165; CHECK-AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
166; CHECK-AVX-NEXT:    vpsrld $31, %xmm0, %xmm0
167; CHECK-AVX-NEXT:    retq
168  %urem = urem <4 x i32> %X, <i32 25, i32 -25, i32 -25, i32 25>
169  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
170  %ret = zext <4 x i1> %cmp to <4 x i32>
171  ret <4 x i32> %ret
172}
173
174; Even divisors
175define <4 x i32> @test_urem_even_neg100(<4 x i32> %X) nounwind {
176; CHECK-SSE2-LABEL: test_urem_even_neg100:
177; CHECK-SSE2:       # %bb.0:
178; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
179; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
180; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
181; CHECK-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
182; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
183; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
184; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm1
185; CHECK-SSE2-NEXT:    psrld $2, %xmm1
186; CHECK-SSE2-NEXT:    pslld $30, %xmm0
187; CHECK-SSE2-NEXT:    por %xmm1, %xmm0
188; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
189; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
190; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
191; CHECK-SSE2-NEXT:    retq
192;
193; CHECK-SSE41-LABEL: test_urem_even_neg100:
194; CHECK-SSE41:       # %bb.0:
195; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
196; CHECK-SSE41-NEXT:    movdqa %xmm0, %xmm1
197; CHECK-SSE41-NEXT:    psrld $2, %xmm1
198; CHECK-SSE41-NEXT:    pslld $30, %xmm0
199; CHECK-SSE41-NEXT:    por %xmm1, %xmm0
200; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [1,42949672,1,42949672]
201; CHECK-SSE41-NEXT:    pminud %xmm0, %xmm1
202; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
203; CHECK-SSE41-NEXT:    psrld $31, %xmm0
204; CHECK-SSE41-NEXT:    retq
205;
206; CHECK-AVX1-LABEL: test_urem_even_neg100:
207; CHECK-AVX1:       # %bb.0:
208; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
209; CHECK-AVX1-NEXT:    vpsrld $2, %xmm0, %xmm1
210; CHECK-AVX1-NEXT:    vpslld $30, %xmm0, %xmm0
211; CHECK-AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
212; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
213; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
214; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
215; CHECK-AVX1-NEXT:    retq
216;
217; CHECK-AVX2-LABEL: test_urem_even_neg100:
218; CHECK-AVX2:       # %bb.0:
219; CHECK-AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
220; CHECK-AVX2-NEXT:    vpsrld $2, %xmm0, %xmm1
221; CHECK-AVX2-NEXT:    vpslld $30, %xmm0, %xmm0
222; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
223; CHECK-AVX2-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
224; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
225; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
226; CHECK-AVX2-NEXT:    retq
227;
228; CHECK-AVX512VL-LABEL: test_urem_even_neg100:
229; CHECK-AVX512VL:       # %bb.0:
230; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
231; CHECK-AVX512VL-NEXT:    vprord $2, %xmm0, %xmm0
232; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
233; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
234; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
235; CHECK-AVX512VL-NEXT:    retq
236  %urem = urem <4 x i32> %X, <i32 -100, i32 100, i32 -100, i32 100>
237  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
238  %ret = zext <4 x i1> %cmp to <4 x i32>
239  ret <4 x i32> %ret
240}
241
242;------------------------------------------------------------------------------;
243; Comparison constant has undef elements.
244;------------------------------------------------------------------------------;
245
246define <4 x i32> @test_urem_odd_undef1(<4 x i32> %X) nounwind {
247; CHECK-SSE2-LABEL: test_urem_odd_undef1:
248; CHECK-SSE2:       # %bb.0:
249; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [1374389535,1374389535,1374389535,1374389535]
250; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
251; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
252; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
253; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
254; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm3
255; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
256; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
257; CHECK-SSE2-NEXT:    psrld $3, %xmm2
258; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [25,25,25,25]
259; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
260; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
261; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
262; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm3
263; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
264; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
265; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm0
266; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
267; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
268; CHECK-SSE2-NEXT:    psrld $31, %xmm0
269; CHECK-SSE2-NEXT:    retq
270;
271; CHECK-SSE41-LABEL: test_urem_odd_undef1:
272; CHECK-SSE41:       # %bb.0:
273; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
274; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
275; CHECK-SSE41-NEXT:    pmuludq %xmm2, %xmm1
276; CHECK-SSE41-NEXT:    pmuludq %xmm0, %xmm2
277; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
278; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
279; CHECK-SSE41-NEXT:    psrld $3, %xmm2
280; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
281; CHECK-SSE41-NEXT:    psubd %xmm2, %xmm0
282; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
283; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
284; CHECK-SSE41-NEXT:    psrld $31, %xmm0
285; CHECK-SSE41-NEXT:    retq
286;
287; CHECK-AVX1-LABEL: test_urem_odd_undef1:
288; CHECK-AVX1:       # %bb.0:
289; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
290; CHECK-AVX1-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
291; CHECK-AVX1-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
292; CHECK-AVX1-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
293; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
294; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
295; CHECK-AVX1-NEXT:    vpsrld $3, %xmm1, %xmm1
296; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
297; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
298; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
299; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
300; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
301; CHECK-AVX1-NEXT:    retq
302;
303; CHECK-AVX2-LABEL: test_urem_odd_undef1:
304; CHECK-AVX2:       # %bb.0:
305; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
306; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
307; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
308; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
309; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
310; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
311; CHECK-AVX2-NEXT:    vpsrld $3, %xmm1, %xmm1
312; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [25,25,25,25]
313; CHECK-AVX2-NEXT:    vpmulld %xmm2, %xmm1, %xmm1
314; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
315; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
316; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
317; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
318; CHECK-AVX2-NEXT:    retq
319;
320; CHECK-AVX512VL-LABEL: test_urem_odd_undef1:
321; CHECK-AVX512VL:       # %bb.0:
322; CHECK-AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
323; CHECK-AVX512VL-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
324; CHECK-AVX512VL-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
325; CHECK-AVX512VL-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
326; CHECK-AVX512VL-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
327; CHECK-AVX512VL-NEXT:    vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
328; CHECK-AVX512VL-NEXT:    vpsrld $3, %xmm1, %xmm1
329; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
330; CHECK-AVX512VL-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
331; CHECK-AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
332; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
333; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
334; CHECK-AVX512VL-NEXT:    retq
335  %urem = urem <4 x i32> %X, <i32 25, i32 25, i32 25, i32 25>
336  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 undef, i32 0>
337  %ret = zext <4 x i1> %cmp to <4 x i32>
338  ret <4 x i32> %ret
339}
340
341define <4 x i32> @test_urem_even_undef1(<4 x i32> %X) nounwind {
342; CHECK-SSE2-LABEL: test_urem_even_undef1:
343; CHECK-SSE2:       # %bb.0:
344; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [1374389535,1374389535,1374389535,1374389535]
345; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
346; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
347; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
348; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
349; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm3
350; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
351; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
352; CHECK-SSE2-NEXT:    psrld $5, %xmm2
353; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [100,100,100,100]
354; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
355; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
356; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
357; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm3
358; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
359; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
360; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm0
361; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
362; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
363; CHECK-SSE2-NEXT:    psrld $31, %xmm0
364; CHECK-SSE2-NEXT:    retq
365;
366; CHECK-SSE41-LABEL: test_urem_even_undef1:
367; CHECK-SSE41:       # %bb.0:
368; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
369; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
370; CHECK-SSE41-NEXT:    pmuludq %xmm2, %xmm1
371; CHECK-SSE41-NEXT:    pmuludq %xmm0, %xmm2
372; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
373; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
374; CHECK-SSE41-NEXT:    psrld $5, %xmm2
375; CHECK-SSE41-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
376; CHECK-SSE41-NEXT:    psubd %xmm2, %xmm0
377; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
378; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
379; CHECK-SSE41-NEXT:    psrld $31, %xmm0
380; CHECK-SSE41-NEXT:    retq
381;
382; CHECK-AVX1-LABEL: test_urem_even_undef1:
383; CHECK-AVX1:       # %bb.0:
384; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
385; CHECK-AVX1-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
386; CHECK-AVX1-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
387; CHECK-AVX1-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
388; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
389; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
390; CHECK-AVX1-NEXT:    vpsrld $5, %xmm1, %xmm1
391; CHECK-AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
392; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
393; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
394; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
395; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
396; CHECK-AVX1-NEXT:    retq
397;
398; CHECK-AVX2-LABEL: test_urem_even_undef1:
399; CHECK-AVX2:       # %bb.0:
400; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
401; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
402; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
403; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
404; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
405; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
406; CHECK-AVX2-NEXT:    vpsrld $5, %xmm1, %xmm1
407; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [100,100,100,100]
408; CHECK-AVX2-NEXT:    vpmulld %xmm2, %xmm1, %xmm1
409; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
410; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
411; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
412; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
413; CHECK-AVX2-NEXT:    retq
414;
415; CHECK-AVX512VL-LABEL: test_urem_even_undef1:
416; CHECK-AVX512VL:       # %bb.0:
417; CHECK-AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
418; CHECK-AVX512VL-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
419; CHECK-AVX512VL-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
420; CHECK-AVX512VL-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
421; CHECK-AVX512VL-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
422; CHECK-AVX512VL-NEXT:    vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
423; CHECK-AVX512VL-NEXT:    vpsrld $5, %xmm1, %xmm1
424; CHECK-AVX512VL-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
425; CHECK-AVX512VL-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
426; CHECK-AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
427; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
428; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
429; CHECK-AVX512VL-NEXT:    retq
430  %urem = urem <4 x i32> %X, <i32 100, i32 100, i32 100, i32 100>
431  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 undef, i32 0>
432  %ret = zext <4 x i1> %cmp to <4 x i32>
433  ret <4 x i32> %ret
434}
435
436;------------------------------------------------------------------------------;
437; Negative tests
438;------------------------------------------------------------------------------;
439
440define <4 x i32> @test_urem_one_eq(<4 x i32> %X) nounwind {
441; CHECK-SSE-LABEL: test_urem_one_eq:
442; CHECK-SSE:       # %bb.0:
443; CHECK-SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,1,1,1]
444; CHECK-SSE-NEXT:    retq
445;
446; CHECK-AVX-LABEL: test_urem_one_eq:
447; CHECK-AVX:       # %bb.0:
448; CHECK-AVX-NEXT:    vbroadcastss {{.*#+}} xmm0 = [1,1,1,1]
449; CHECK-AVX-NEXT:    retq
450  %urem = urem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1>
451  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
452  %ret = zext <4 x i1> %cmp to <4 x i32>
453  ret <4 x i32> %ret
454}
455define <4 x i32> @test_urem_one_ne(<4 x i32> %X) nounwind {
456; CHECK-SSE-LABEL: test_urem_one_ne:
457; CHECK-SSE:       # %bb.0:
458; CHECK-SSE-NEXT:    xorps %xmm0, %xmm0
459; CHECK-SSE-NEXT:    retq
460;
461; CHECK-AVX-LABEL: test_urem_one_ne:
462; CHECK-AVX:       # %bb.0:
463; CHECK-AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
464; CHECK-AVX-NEXT:    retq
465  %urem = urem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1>
466  %cmp = icmp ne <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
467  %ret = zext <4 x i1> %cmp to <4 x i32>
468  ret <4 x i32> %ret
469}
470
471; We can lower remainder of division by powers of two much better elsewhere.
472define <4 x i32> @test_urem_pow2(<4 x i32> %X) nounwind {
473; CHECK-SSE-LABEL: test_urem_pow2:
474; CHECK-SSE:       # %bb.0:
475; CHECK-SSE-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
476; CHECK-SSE-NEXT:    pxor %xmm1, %xmm1
477; CHECK-SSE-NEXT:    pcmpeqd %xmm1, %xmm0
478; CHECK-SSE-NEXT:    psrld $31, %xmm0
479; CHECK-SSE-NEXT:    retq
480;
481; CHECK-AVX1-LABEL: test_urem_pow2:
482; CHECK-AVX1:       # %bb.0:
483; CHECK-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
484; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
485; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
486; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
487; CHECK-AVX1-NEXT:    retq
488;
489; CHECK-AVX2-LABEL: test_urem_pow2:
490; CHECK-AVX2:       # %bb.0:
491; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
492; CHECK-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
493; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
494; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
495; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
496; CHECK-AVX2-NEXT:    retq
497;
498; CHECK-AVX512VL-LABEL: test_urem_pow2:
499; CHECK-AVX512VL:       # %bb.0:
500; CHECK-AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
501; CHECK-AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
502; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
503; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
504; CHECK-AVX512VL-NEXT:    retq
505  %urem = urem <4 x i32> %X, <i32 16, i32 16, i32 16, i32 16>
506  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
507  %ret = zext <4 x i1> %cmp to <4 x i32>
508  ret <4 x i32> %ret
509}
510
511; We could lower remainder of division by INT_MIN much better elsewhere.
512define <4 x i32> @test_urem_int_min(<4 x i32> %X) nounwind {
513; CHECK-SSE-LABEL: test_urem_int_min:
514; CHECK-SSE:       # %bb.0:
515; CHECK-SSE-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
516; CHECK-SSE-NEXT:    pxor %xmm1, %xmm1
517; CHECK-SSE-NEXT:    pcmpeqd %xmm1, %xmm0
518; CHECK-SSE-NEXT:    psrld $31, %xmm0
519; CHECK-SSE-NEXT:    retq
520;
521; CHECK-AVX1-LABEL: test_urem_int_min:
522; CHECK-AVX1:       # %bb.0:
523; CHECK-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
524; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
525; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
526; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
527; CHECK-AVX1-NEXT:    retq
528;
529; CHECK-AVX2-LABEL: test_urem_int_min:
530; CHECK-AVX2:       # %bb.0:
531; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [2147483647,2147483647,2147483647,2147483647]
532; CHECK-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
533; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
534; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
535; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
536; CHECK-AVX2-NEXT:    retq
537;
538; CHECK-AVX512VL-LABEL: test_urem_int_min:
539; CHECK-AVX512VL:       # %bb.0:
540; CHECK-AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
541; CHECK-AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
542; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
543; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
544; CHECK-AVX512VL-NEXT:    retq
545  %urem = urem <4 x i32> %X, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
546  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
547  %ret = zext <4 x i1> %cmp to <4 x i32>
548  ret <4 x i32> %ret
549}
550
551; We could lower remainder of division by all-ones much better elsewhere.
552define <4 x i32> @test_urem_allones(<4 x i32> %X) nounwind {
553; CHECK-SSE2-LABEL: test_urem_allones:
554; CHECK-SSE2:       # %bb.0:
555; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm1
556; CHECK-SSE2-NEXT:    psubd %xmm0, %xmm1
557; CHECK-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
558; CHECK-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
559; CHECK-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
560; CHECK-SSE2-NEXT:    movdqa %xmm1, %xmm0
561; CHECK-SSE2-NEXT:    retq
562;
563; CHECK-SSE41-LABEL: test_urem_allones:
564; CHECK-SSE41:       # %bb.0:
565; CHECK-SSE41-NEXT:    pxor %xmm1, %xmm1
566; CHECK-SSE41-NEXT:    psubd %xmm0, %xmm1
567; CHECK-SSE41-NEXT:    pmovsxbd {{.*#+}} xmm0 = [1,1,1,1]
568; CHECK-SSE41-NEXT:    pminud %xmm1, %xmm0
569; CHECK-SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
570; CHECK-SSE41-NEXT:    psrld $31, %xmm0
571; CHECK-SSE41-NEXT:    retq
572;
573; CHECK-AVX1-LABEL: test_urem_allones:
574; CHECK-AVX1:       # %bb.0:
575; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
576; CHECK-AVX1-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
577; CHECK-AVX1-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
578; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
579; CHECK-AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
580; CHECK-AVX1-NEXT:    retq
581;
582; CHECK-AVX2-LABEL: test_urem_allones:
583; CHECK-AVX2:       # %bb.0:
584; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
585; CHECK-AVX2-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
586; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
587; CHECK-AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm1
588; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
589; CHECK-AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
590; CHECK-AVX2-NEXT:    retq
591;
592; CHECK-AVX512VL-LABEL: test_urem_allones:
593; CHECK-AVX512VL:       # %bb.0:
594; CHECK-AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
595; CHECK-AVX512VL-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
596; CHECK-AVX512VL-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm1
597; CHECK-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
598; CHECK-AVX512VL-NEXT:    vpsrld $31, %xmm0, %xmm0
599; CHECK-AVX512VL-NEXT:    retq
600  %urem = urem <4 x i32> %X, <i32 4294967295, i32 4294967295, i32 4294967295, i32 4294967295>
601  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
602  %ret = zext <4 x i1> %cmp to <4 x i32>
603  ret <4 x i32> %ret
604}
605