xref: /llvm-project/llvm/test/CodeGen/X86/combine-srem.ll (revision 61d5addd942a5ef8128e48d3617419e6320d8280)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx  | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2
5
6; fold (srem x, 1) -> 0
7define i32 @combine_srem_by_one(i32 %x) {
8; CHECK-LABEL: combine_srem_by_one:
9; CHECK:       # %bb.0:
10; CHECK-NEXT:    xorl %eax, %eax
11; CHECK-NEXT:    retq
12  %1 = srem i32 %x, 1
13  ret i32 %1
14}
15
16define <4 x i32> @combine_vec_srem_by_one(<4 x i32> %x) {
17; SSE-LABEL: combine_vec_srem_by_one:
18; SSE:       # %bb.0:
19; SSE-NEXT:    xorps %xmm0, %xmm0
20; SSE-NEXT:    retq
21;
22; AVX-LABEL: combine_vec_srem_by_one:
23; AVX:       # %bb.0:
24; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
25; AVX-NEXT:    retq
26  %1 = srem <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
27  ret <4 x i32> %1
28}
29
30; fold (srem x, -1) -> 0
31define i32 @combine_srem_by_negone(i32 %x) {
32; CHECK-LABEL: combine_srem_by_negone:
33; CHECK:       # %bb.0:
34; CHECK-NEXT:    xorl %eax, %eax
35; CHECK-NEXT:    retq
36  %1 = srem i32 %x, -1
37  ret i32 %1
38}
39
40define <4 x i32> @combine_vec_srem_by_negone(<4 x i32> %x) {
41; SSE-LABEL: combine_vec_srem_by_negone:
42; SSE:       # %bb.0:
43; SSE-NEXT:    xorps %xmm0, %xmm0
44; SSE-NEXT:    retq
45;
46; AVX-LABEL: combine_vec_srem_by_negone:
47; AVX:       # %bb.0:
48; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
49; AVX-NEXT:    retq
50  %1 = srem <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
51  ret <4 x i32> %1
52}
53
54; TODO fold (srem x, INT_MIN)
55define i32 @combine_srem_by_minsigned(i32 %x) {
56; CHECK-LABEL: combine_srem_by_minsigned:
57; CHECK:       # %bb.0:
58; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
59; CHECK-NEXT:    leal 2147483647(%rdi), %eax
60; CHECK-NEXT:    testl %edi, %edi
61; CHECK-NEXT:    cmovnsl %edi, %eax
62; CHECK-NEXT:    andl $-2147483648, %eax # imm = 0x80000000
63; CHECK-NEXT:    addl %edi, %eax
64; CHECK-NEXT:    retq
65  %1 = srem i32 %x, -2147483648
66  ret i32 %1
67}
68
69define <4 x i32> @combine_vec_srem_by_minsigned(<4 x i32> %x) {
70; SSE-LABEL: combine_vec_srem_by_minsigned:
71; SSE:       # %bb.0:
72; SSE-NEXT:    movdqa %xmm0, %xmm1
73; SSE-NEXT:    psrad $31, %xmm1
74; SSE-NEXT:    psrld $1, %xmm1
75; SSE-NEXT:    paddd %xmm0, %xmm1
76; SSE-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
77; SSE-NEXT:    paddd %xmm1, %xmm0
78; SSE-NEXT:    retq
79;
80; AVX1-LABEL: combine_vec_srem_by_minsigned:
81; AVX1:       # %bb.0:
82; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
83; AVX1-NEXT:    vpsrld $1, %xmm1, %xmm1
84; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
85; AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
86; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
87; AVX1-NEXT:    retq
88;
89; AVX2-LABEL: combine_vec_srem_by_minsigned:
90; AVX2:       # %bb.0:
91; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm1
92; AVX2-NEXT:    vpsrld $1, %xmm1, %xmm1
93; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
94; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
95; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
96; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
97; AVX2-NEXT:    retq
98  %1 = srem <4 x i32> %x, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
99  ret <4 x i32> %1
100}
101
102; fold (srem 0, x) -> 0
103define i32 @combine_srem_zero(i32 %x) {
104; CHECK-LABEL: combine_srem_zero:
105; CHECK:       # %bb.0:
106; CHECK-NEXT:    xorl %eax, %eax
107; CHECK-NEXT:    retq
108  %1 = srem i32 0, %x
109  ret i32 %1
110}
111
112define <4 x i32> @combine_vec_srem_zero(<4 x i32> %x) {
113; SSE-LABEL: combine_vec_srem_zero:
114; SSE:       # %bb.0:
115; SSE-NEXT:    xorps %xmm0, %xmm0
116; SSE-NEXT:    retq
117;
118; AVX-LABEL: combine_vec_srem_zero:
119; AVX:       # %bb.0:
120; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
121; AVX-NEXT:    retq
122  %1 = srem <4 x i32> zeroinitializer, %x
123  ret <4 x i32> %1
124}
125
126; fold (srem x, x) -> 0
127define i32 @combine_srem_dupe(i32 %x) {
128; CHECK-LABEL: combine_srem_dupe:
129; CHECK:       # %bb.0:
130; CHECK-NEXT:    xorl %eax, %eax
131; CHECK-NEXT:    retq
132  %1 = srem i32 %x, %x
133  ret i32 %1
134}
135
136define <4 x i32> @combine_vec_srem_dupe(<4 x i32> %x) {
137; SSE-LABEL: combine_vec_srem_dupe:
138; SSE:       # %bb.0:
139; SSE-NEXT:    xorps %xmm0, %xmm0
140; SSE-NEXT:    retq
141;
142; AVX-LABEL: combine_vec_srem_dupe:
143; AVX:       # %bb.0:
144; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
145; AVX-NEXT:    retq
146  %1 = srem <4 x i32> %x, %x
147  ret <4 x i32> %1
148}
149
150; fold (srem x, y) -> (urem x, y) iff x and y are positive
151define <4 x i32> @combine_vec_srem_by_pos0(<4 x i32> %x) {
152; SSE-LABEL: combine_vec_srem_by_pos0:
153; SSE:       # %bb.0:
154; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
155; SSE-NEXT:    retq
156;
157; AVX1-LABEL: combine_vec_srem_by_pos0:
158; AVX1:       # %bb.0:
159; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
160; AVX1-NEXT:    retq
161;
162; AVX2-LABEL: combine_vec_srem_by_pos0:
163; AVX2:       # %bb.0:
164; AVX2-NEXT:    vbroadcastss {{.*#+}} xmm1 = [3,3,3,3]
165; AVX2-NEXT:    vandps %xmm1, %xmm0, %xmm0
166; AVX2-NEXT:    retq
167  %1 = and <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255>
168  %2 = srem <4 x i32> %1, <i32 4, i32 4, i32 4, i32 4>
169  ret <4 x i32> %2
170}
171
172define <4 x i32> @combine_vec_srem_by_pos1(<4 x i32> %x) {
173; SSE-LABEL: combine_vec_srem_by_pos1:
174; SSE:       # %bb.0:
175; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
176; SSE-NEXT:    retq
177;
178; AVX-LABEL: combine_vec_srem_by_pos1:
179; AVX:       # %bb.0:
180; AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
181; AVX-NEXT:    retq
182  %1 = and <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255>
183  %2 = srem <4 x i32> %1, <i32 1, i32 4, i32 8, i32 16>
184  ret <4 x i32> %2
185}
186
187; fold (srem x, (1 << c)) -> x - (x / (1 << c)) * (1 << c).
188define <4 x i32> @combine_vec_srem_by_pow2a(<4 x i32> %x) {
189; SSE-LABEL: combine_vec_srem_by_pow2a:
190; SSE:       # %bb.0:
191; SSE-NEXT:    movdqa %xmm0, %xmm1
192; SSE-NEXT:    psrad $31, %xmm1
193; SSE-NEXT:    psrld $30, %xmm1
194; SSE-NEXT:    paddd %xmm0, %xmm1
195; SSE-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
196; SSE-NEXT:    psubd %xmm1, %xmm0
197; SSE-NEXT:    retq
198;
199; AVX1-LABEL: combine_vec_srem_by_pow2a:
200; AVX1:       # %bb.0:
201; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
202; AVX1-NEXT:    vpsrld $30, %xmm1, %xmm1
203; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
204; AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
205; AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
206; AVX1-NEXT:    retq
207;
208; AVX2-LABEL: combine_vec_srem_by_pow2a:
209; AVX2:       # %bb.0:
210; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm1
211; AVX2-NEXT:    vpsrld $30, %xmm1, %xmm1
212; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
213; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [4294967292,4294967292,4294967292,4294967292]
214; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
215; AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
216; AVX2-NEXT:    retq
217  %1 = srem <4 x i32> %x, <i32 4, i32 4, i32 4, i32 4>
218  ret <4 x i32> %1
219}
220
221define <4 x i32> @combine_vec_srem_by_pow2a_neg(<4 x i32> %x) {
222; SSE-LABEL: combine_vec_srem_by_pow2a_neg:
223; SSE:       # %bb.0:
224; SSE-NEXT:    movdqa %xmm0, %xmm1
225; SSE-NEXT:    psrad $31, %xmm1
226; SSE-NEXT:    psrld $30, %xmm1
227; SSE-NEXT:    paddd %xmm0, %xmm1
228; SSE-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
229; SSE-NEXT:    psubd %xmm1, %xmm0
230; SSE-NEXT:    retq
231;
232; AVX1-LABEL: combine_vec_srem_by_pow2a_neg:
233; AVX1:       # %bb.0:
234; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
235; AVX1-NEXT:    vpsrld $30, %xmm1, %xmm1
236; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
237; AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
238; AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
239; AVX1-NEXT:    retq
240;
241; AVX2-LABEL: combine_vec_srem_by_pow2a_neg:
242; AVX2:       # %bb.0:
243; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm1
244; AVX2-NEXT:    vpsrld $30, %xmm1, %xmm1
245; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
246; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [4294967292,4294967292,4294967292,4294967292]
247; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
248; AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
249; AVX2-NEXT:    retq
250  %1 = srem <4 x i32> %x, <i32 -4, i32 -4, i32 -4, i32 -4>
251  ret <4 x i32> %1
252}
253
254define <4 x i32> @combine_vec_srem_by_pow2b(<4 x i32> %x) {
255; SSE-LABEL: combine_vec_srem_by_pow2b:
256; SSE:       # %bb.0:
257; SSE-NEXT:    movdqa %xmm0, %xmm1
258; SSE-NEXT:    psrld $31, %xmm1
259; SSE-NEXT:    movdqa %xmm0, %xmm2
260; SSE-NEXT:    psrad $31, %xmm2
261; SSE-NEXT:    movdqa %xmm2, %xmm3
262; SSE-NEXT:    psrld $29, %xmm3
263; SSE-NEXT:    pblendw {{.*#+}} xmm3 = xmm1[0,1,2,3],xmm3[4,5,6,7]
264; SSE-NEXT:    psrld $30, %xmm2
265; SSE-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
266; SSE-NEXT:    paddd %xmm0, %xmm2
267; SSE-NEXT:    movdqa %xmm2, %xmm1
268; SSE-NEXT:    psrad $3, %xmm1
269; SSE-NEXT:    movdqa %xmm2, %xmm3
270; SSE-NEXT:    psrad $1, %xmm3
271; SSE-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7]
272; SSE-NEXT:    psrad $2, %xmm2
273; SSE-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
274; SSE-NEXT:    pblendw {{.*#+}} xmm2 = xmm0[0,1],xmm2[2,3,4,5,6,7]
275; SSE-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
276; SSE-NEXT:    psubd %xmm2, %xmm0
277; SSE-NEXT:    retq
278;
279; AVX1-LABEL: combine_vec_srem_by_pow2b:
280; AVX1:       # %bb.0:
281; AVX1-NEXT:    vpsrld $31, %xmm0, %xmm1
282; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm2
283; AVX1-NEXT:    vpsrld $29, %xmm2, %xmm3
284; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
285; AVX1-NEXT:    vpsrld $30, %xmm2, %xmm2
286; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
287; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
288; AVX1-NEXT:    vpsrad $3, %xmm1, %xmm2
289; AVX1-NEXT:    vpsrad $1, %xmm1, %xmm3
290; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
291; AVX1-NEXT:    vpsrad $2, %xmm1, %xmm1
292; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
293; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
294; AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
295; AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
296; AVX1-NEXT:    retq
297;
298; AVX2-LABEL: combine_vec_srem_by_pow2b:
299; AVX2:       # %bb.0:
300; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm1
301; AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
302; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
303; AVX2-NEXT:    vpmovsxbd {{.*#+}} xmm2 = [0,1,2,3]
304; AVX2-NEXT:    vpsravd %xmm2, %xmm1, %xmm1
305; AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
306; AVX2-NEXT:    vpsllvd %xmm2, %xmm1, %xmm1
307; AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
308; AVX2-NEXT:    retq
309  %1 = srem <4 x i32> %x, <i32 1, i32 2, i32 4, i32 8>
310  ret <4 x i32> %1
311}
312
313define <4 x i32> @combine_vec_srem_by_pow2b_neg(<4 x i32> %x) {
314; SSE-LABEL: combine_vec_srem_by_pow2b_neg:
315; SSE:       # %bb.0:
316; SSE-NEXT:    movdqa %xmm0, %xmm1
317; SSE-NEXT:    psrad $31, %xmm1
318; SSE-NEXT:    movdqa %xmm1, %xmm2
319; SSE-NEXT:    psrld $28, %xmm2
320; SSE-NEXT:    movdqa %xmm1, %xmm3
321; SSE-NEXT:    psrld $30, %xmm3
322; SSE-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
323; SSE-NEXT:    movdqa %xmm0, %xmm2
324; SSE-NEXT:    psrld $31, %xmm2
325; SSE-NEXT:    psrld $29, %xmm1
326; SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
327; SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
328; SSE-NEXT:    paddd %xmm0, %xmm1
329; SSE-NEXT:    movdqa %xmm1, %xmm2
330; SSE-NEXT:    psrad $4, %xmm2
331; SSE-NEXT:    movdqa %xmm1, %xmm3
332; SSE-NEXT:    psrad $2, %xmm3
333; SSE-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
334; SSE-NEXT:    movdqa %xmm1, %xmm2
335; SSE-NEXT:    psrad $3, %xmm2
336; SSE-NEXT:    psrld $1, %xmm1
337; SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
338; SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
339; SSE-NEXT:    pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
340; SSE-NEXT:    paddd %xmm1, %xmm0
341; SSE-NEXT:    retq
342;
343; AVX1-LABEL: combine_vec_srem_by_pow2b_neg:
344; AVX1:       # %bb.0:
345; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
346; AVX1-NEXT:    vpsrld $28, %xmm1, %xmm2
347; AVX1-NEXT:    vpsrld $30, %xmm1, %xmm3
348; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
349; AVX1-NEXT:    vpsrld $31, %xmm0, %xmm3
350; AVX1-NEXT:    vpsrld $29, %xmm1, %xmm1
351; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm3[0,1,2,3],xmm1[4,5,6,7]
352; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
353; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
354; AVX1-NEXT:    vpsrad $4, %xmm1, %xmm2
355; AVX1-NEXT:    vpsrad $2, %xmm1, %xmm3
356; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
357; AVX1-NEXT:    vpsrad $3, %xmm1, %xmm3
358; AVX1-NEXT:    vpsrld $1, %xmm1, %xmm1
359; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
360; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
361; AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
362; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
363; AVX1-NEXT:    retq
364;
365; AVX2-LABEL: combine_vec_srem_by_pow2b_neg:
366; AVX2:       # %bb.0:
367; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm1
368; AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
369; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
370; AVX2-NEXT:    vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
371; AVX2-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
372; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
373; AVX2-NEXT:    retq
374  %1 = srem <4 x i32> %x, <i32 -2, i32 -4, i32 -8, i32 -16>
375  ret <4 x i32> %1
376}
377
378; FIXME: PR55271 - srem(undef, 3) != undef
379; Use PSLLI intrinsic to postpone the undef creation until after urem-by-constant expansion
380define <4 x i32> @combine_vec_srem_undef_by_3(<4 x i32> %in) {
381; CHECK-LABEL: combine_vec_srem_undef_by_3:
382; CHECK:       # %bb.0:
383; CHECK-NEXT:    retq
384  %x = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> undef, i32 0)
385  %y = srem <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
386  ret <4 x i32> %y
387}
388declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32)
389
390; OSS-Fuzz #6883
391; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=6883
392define i32 @ossfuzz6883() {
393; CHECK-LABEL: ossfuzz6883:
394; CHECK:       # %bb.0:
395; CHECK-NEXT:    movl (%rax), %ecx
396; CHECK-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
397; CHECK-NEXT:    xorl %edx, %edx
398; CHECK-NEXT:    idivl %ecx
399; CHECK-NEXT:    movl %eax, %esi
400; CHECK-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
401; CHECK-NEXT:    xorl %edx, %edx
402; CHECK-NEXT:    divl %ecx
403; CHECK-NEXT:    movl %eax, %edi
404; CHECK-NEXT:    movl %esi, %eax
405; CHECK-NEXT:    cltd
406; CHECK-NEXT:    idivl %edi
407; CHECK-NEXT:    movl %edx, %esi
408; CHECK-NEXT:    movl %ecx, %eax
409; CHECK-NEXT:    cltd
410; CHECK-NEXT:    idivl %esi
411; CHECK-NEXT:    movl %edx, %edi
412; CHECK-NEXT:    movl %ecx, %eax
413; CHECK-NEXT:    xorl %edx, %edx
414; CHECK-NEXT:    divl %esi
415; CHECK-NEXT:    andl %edi, %eax
416; CHECK-NEXT:    retq
417  %B17 = or i32 0, 2147483647
418  %L6 = load i32, ptr undef
419  %B11 = sdiv i32 %B17, %L6
420  %B13 = udiv i32 %B17, %L6
421  %B14 = srem i32 %B11, %B13
422  %B16 = srem i32 %L6, %B14
423  %B10 = udiv i32 %L6, %B14
424  %B6 = and i32 %B16, %B10
425  ret i32 %B6
426}
427
428define i1 @bool_srem(i1 %x, i1 %y) {
429; CHECK-LABEL: bool_srem:
430; CHECK:       # %bb.0:
431; CHECK-NEXT:    xorl %eax, %eax
432; CHECK-NEXT:    retq
433  %r = srem i1 %x, %y
434  ret i1 %r
435}
436define <4 x i1> @boolvec_srem(<4 x i1> %x, <4 x i1> %y) {
437; SSE-LABEL: boolvec_srem:
438; SSE:       # %bb.0:
439; SSE-NEXT:    xorps %xmm0, %xmm0
440; SSE-NEXT:    retq
441;
442; AVX-LABEL: boolvec_srem:
443; AVX:       # %bb.0:
444; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
445; AVX-NEXT:    retq
446  %r = srem <4 x i1> %x, %y
447  ret <4 x i1> %r
448}
449
450define i32 @combine_srem_two(i32 %x) {
451; CHECK-LABEL: combine_srem_two:
452; CHECK:       # %bb.0:
453; CHECK-NEXT:    movl %edi, %eax
454; CHECK-NEXT:    movl %edi, %ecx
455; CHECK-NEXT:    shrl $31, %ecx
456; CHECK-NEXT:    addl %edi, %ecx
457; CHECK-NEXT:    andl $-2, %ecx
458; CHECK-NEXT:    subl %ecx, %eax
459; CHECK-NEXT:    retq
460  %1 = srem i32 %x, 2
461  ret i32 %1
462}
463
464define i32 @combine_srem_negtwo(i32 %x) {
465; CHECK-LABEL: combine_srem_negtwo:
466; CHECK:       # %bb.0:
467; CHECK-NEXT:    movl %edi, %eax
468; CHECK-NEXT:    movl %edi, %ecx
469; CHECK-NEXT:    shrl $31, %ecx
470; CHECK-NEXT:    addl %edi, %ecx
471; CHECK-NEXT:    andl $-2, %ecx
472; CHECK-NEXT:    subl %ecx, %eax
473; CHECK-NEXT:    retq
474  %1 = srem i32 %x, -2
475  ret i32 %1
476}
477
478define i8 @combine_i8_srem_negpow2(i8 %x) {
479; CHECK-LABEL: combine_i8_srem_negpow2:
480; CHECK:       # %bb.0:
481; CHECK-NEXT:    movl %edi, %eax
482; CHECK-NEXT:    movl %eax, %ecx
483; CHECK-NEXT:    sarb $7, %cl
484; CHECK-NEXT:    shrb $2, %cl
485; CHECK-NEXT:    addb %al, %cl
486; CHECK-NEXT:    andb $-64, %cl
487; CHECK-NEXT:    subb %cl, %al
488; CHECK-NEXT:    # kill: def $al killed $al killed $eax
489; CHECK-NEXT:    retq
490  %1 = srem i8 %x, -64
491  ret i8 %1
492}
493
494define i16 @combine_i16_srem_pow2(i16 %x) {
495; CHECK-LABEL: combine_i16_srem_pow2:
496; CHECK:       # %bb.0:
497; CHECK-NEXT:    movl %edi, %eax
498; CHECK-NEXT:    leal 15(%rax), %ecx
499; CHECK-NEXT:    testw %ax, %ax
500; CHECK-NEXT:    cmovnsl %edi, %ecx
501; CHECK-NEXT:    andl $-16, %ecx
502; CHECK-NEXT:    subl %ecx, %eax
503; CHECK-NEXT:    # kill: def $ax killed $ax killed $rax
504; CHECK-NEXT:    retq
505  %1 = srem i16 %x, 16
506  ret i16 %1
507}
508
509define i16 @combine_i16_srem_negpow2(i16 %x) {
510; CHECK-LABEL: combine_i16_srem_negpow2:
511; CHECK:       # %bb.0:
512; CHECK-NEXT:    movl %edi, %eax
513; CHECK-NEXT:    leal 255(%rax), %ecx
514; CHECK-NEXT:    testw %ax, %ax
515; CHECK-NEXT:    cmovnsl %edi, %ecx
516; CHECK-NEXT:    andl $-256, %ecx
517; CHECK-NEXT:    subl %ecx, %eax
518; CHECK-NEXT:    # kill: def $ax killed $ax killed $rax
519; CHECK-NEXT:    retq
520  %1 = srem i16 %x, -256
521  ret i16 %1
522}
523
524define i32 @combine_srem_pow2(i32 %x) {
525; CHECK-LABEL: combine_srem_pow2:
526; CHECK:       # %bb.0:
527; CHECK-NEXT:    movl %edi, %eax
528; CHECK-NEXT:    leal 15(%rax), %ecx
529; CHECK-NEXT:    testl %edi, %edi
530; CHECK-NEXT:    cmovnsl %edi, %ecx
531; CHECK-NEXT:    andl $-16, %ecx
532; CHECK-NEXT:    subl %ecx, %eax
533; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
534; CHECK-NEXT:    retq
535  %1 = srem i32 %x, 16
536  ret i32 %1
537}
538
539define i32 @combine_srem_negpow2(i32 %x) {
540; CHECK-LABEL: combine_srem_negpow2:
541; CHECK:       # %bb.0:
542; CHECK-NEXT:    movl %edi, %eax
543; CHECK-NEXT:    leal 255(%rax), %ecx
544; CHECK-NEXT:    testl %edi, %edi
545; CHECK-NEXT:    cmovnsl %edi, %ecx
546; CHECK-NEXT:    andl $-256, %ecx
547; CHECK-NEXT:    subl %ecx, %eax
548; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
549; CHECK-NEXT:    retq
550  %1 = srem i32 %x, -256
551  ret i32 %1
552}
553
554define i64 @combine_i64_srem_pow2(i64 %x) {
555; CHECK-LABEL: combine_i64_srem_pow2:
556; CHECK:       # %bb.0:
557; CHECK-NEXT:    movq %rdi, %rax
558; CHECK-NEXT:    leaq 15(%rdi), %rcx
559; CHECK-NEXT:    testq %rdi, %rdi
560; CHECK-NEXT:    cmovnsq %rdi, %rcx
561; CHECK-NEXT:    andq $-16, %rcx
562; CHECK-NEXT:    subq %rcx, %rax
563; CHECK-NEXT:    retq
564  %1 = srem i64 %x, 16
565  ret i64 %1
566}
567
568define i64 @combine_i64_srem_negpow2(i64 %x) {
569; CHECK-LABEL: combine_i64_srem_negpow2:
570; CHECK:       # %bb.0:
571; CHECK-NEXT:    movq %rdi, %rax
572; CHECK-NEXT:    leaq 255(%rdi), %rcx
573; CHECK-NEXT:    testq %rdi, %rdi
574; CHECK-NEXT:    cmovnsq %rdi, %rcx
575; CHECK-NEXT:    andq $-256, %rcx
576; CHECK-NEXT:    subq %rcx, %rax
577; CHECK-NEXT:    retq
578  %1 = srem i64 %x, -256
579  ret i64 %1
580}
581