xref: /llvm-project/llvm/test/CodeGen/X86/urem-power-of-two.ll (revision f0dd12ec5c0169ba5b4363b62d59511181cf954a)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
4
5; The easy case: a constant power-of-2 divisor.
6
7define i64 @const_pow_2(i64 %x) {
8; X86-LABEL: const_pow_2:
9; X86:       # %bb.0:
10; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
11; X86-NEXT:    andl $31, %eax
12; X86-NEXT:    xorl %edx, %edx
13; X86-NEXT:    retl
14;
15; X64-LABEL: const_pow_2:
16; X64:       # %bb.0:
17; X64-NEXT:    movq %rdi, %rax
18; X64-NEXT:    andl $31, %eax
19; X64-NEXT:    retq
20  %urem = urem i64 %x, 32
21  ret i64 %urem
22}
23
24; A left-shifted power-of-2 divisor. Use a weird type for wider coverage.
25
26define i25 @shift_left_pow_2(i25 %x, i25 %y) {
27; X86-LABEL: shift_left_pow_2:
28; X86:       # %bb.0:
29; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
30; X86-NEXT:    movl $1, %eax
31; X86-NEXT:    shll %cl, %eax
32; X86-NEXT:    addl $33554431, %eax # imm = 0x1FFFFFF
33; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
34; X86-NEXT:    retl
35;
36; X64-LABEL: shift_left_pow_2:
37; X64:       # %bb.0:
38; X64-NEXT:    movl %esi, %ecx
39; X64-NEXT:    movl $1, %eax
40; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
41; X64-NEXT:    shll %cl, %eax
42; X64-NEXT:    addl $33554431, %eax # imm = 0x1FFFFFF
43; X64-NEXT:    andl %edi, %eax
44; X64-NEXT:    retq
45  %shl = shl i25 1, %y
46  %urem = urem i25 %x, %shl
47  ret i25 %urem
48}
49
50; A logically right-shifted sign bit is a power-of-2 or UB.
51
52define i16 @shift_right_pow_2(i16 %x, i16 %y) {
53; X86-LABEL: shift_right_pow_2:
54; X86:       # %bb.0:
55; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
56; X86-NEXT:    movl $32768, %eax # imm = 0x8000
57; X86-NEXT:    shrl %cl, %eax
58; X86-NEXT:    decl %eax
59; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax
60; X86-NEXT:    # kill: def $ax killed $ax killed $eax
61; X86-NEXT:    retl
62;
63; X64-LABEL: shift_right_pow_2:
64; X64:       # %bb.0:
65; X64-NEXT:    movl %esi, %ecx
66; X64-NEXT:    movl $32768, %eax # imm = 0x8000
67; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
68; X64-NEXT:    shrl %cl, %eax
69; X64-NEXT:    decl %eax
70; X64-NEXT:    andl %edi, %eax
71; X64-NEXT:    # kill: def $ax killed $ax killed $eax
72; X64-NEXT:    retq
73  %shr = lshr i16 -32768, %y
74  %urem = urem i16 %x, %shr
75  ret i16 %urem
76}
77
78; FIXME: A zero divisor would be UB, so this could be reduced to an 'and' with 3.
79
80define i8 @and_pow_2(i8 %x, i8 %y) {
81; X86-LABEL: and_pow_2:
82; X86:       # %bb.0:
83; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
84; X86-NEXT:    andb $4, %cl
85; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
86; X86-NEXT:    divb %cl
87; X86-NEXT:    movzbl %ah, %eax
88; X86-NEXT:    # kill: def $al killed $al killed $eax
89; X86-NEXT:    retl
90;
91; X64-LABEL: and_pow_2:
92; X64:       # %bb.0:
93; X64-NEXT:    andb $4, %sil
94; X64-NEXT:    movzbl %dil, %eax
95; X64-NEXT:    divb %sil
96; X64-NEXT:    movzbl %ah, %eax
97; X64-NEXT:    # kill: def $al killed $al killed $eax
98; X64-NEXT:    retq
99  %and = and i8 %y, 4
100  %urem = urem i8 %x, %and
101  ret i8 %urem
102}
103
104; A vector constant divisor should get the same treatment as a scalar.
105
106define <4 x i32> @vec_const_uniform_pow_2(<4 x i32> %x) {
107; X86-LABEL: vec_const_uniform_pow_2:
108; X86:       # %bb.0:
109; X86-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
110; X86-NEXT:    retl
111;
112; X64-LABEL: vec_const_uniform_pow_2:
113; X64:       # %bb.0:
114; X64-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
115; X64-NEXT:    retq
116  %urem = urem <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16>
117  ret <4 x i32> %urem
118}
119
120define <4 x i32> @vec_const_nonuniform_pow_2(<4 x i32> %x) {
121; X86-LABEL: vec_const_nonuniform_pow_2:
122; X86:       # %bb.0:
123; X86-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
124; X86-NEXT:    retl
125;
126; X64-LABEL: vec_const_nonuniform_pow_2:
127; X64:       # %bb.0:
128; X64-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
129; X64-NEXT:    retq
130  %urem = urem <4 x i32> %x, <i32 2, i32 4, i32 8, i32 16>
131  ret <4 x i32> %urem
132}
133