xref: /llvm-project/llvm/test/CodeGen/X86/ispow2.ll (revision 637e81f8adfe725c73aeafa4c2315d962be4770d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=CHECK,CHECK-NOBMI
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+bmi2,+bmi | FileCheck %s --check-prefixes=CHECK,CHECK-BMI2,CHECK-AVX2
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512vpopcntdq,+bmi2,+bmi | FileCheck %s --check-prefixes=CHECK,CHECK-BMI2,CHECK-AVX512
5
6declare i32 @llvm.ctpop.i32(i32)
7declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>)
8define i1 @is_pow2_non_zero(i32 %xin) {
9; CHECK-NOBMI-LABEL: is_pow2_non_zero:
10; CHECK-NOBMI:       # %bb.0:
11; CHECK-NOBMI-NEXT:    # kill: def $edi killed $edi def $rdi
12; CHECK-NOBMI-NEXT:    orl $256, %edi # imm = 0x100
13; CHECK-NOBMI-NEXT:    leal -1(%rdi), %eax
14; CHECK-NOBMI-NEXT:    testl %eax, %edi
15; CHECK-NOBMI-NEXT:    sete %al
16; CHECK-NOBMI-NEXT:    retq
17;
18; CHECK-BMI2-LABEL: is_pow2_non_zero:
19; CHECK-BMI2:       # %bb.0:
20; CHECK-BMI2-NEXT:    orl $256, %edi # imm = 0x100
21; CHECK-BMI2-NEXT:    blsrl %edi, %eax
22; CHECK-BMI2-NEXT:    sete %al
23; CHECK-BMI2-NEXT:    retq
24  %x = or i32 %xin, 256
25  %cnt = call i32 @llvm.ctpop.i32(i32 %x)
26  %r = icmp eq i32 %cnt, 1
27  ret i1 %r
28}
29
30define i1 @is_pow2_non_zero_x_maybe_z(i32 %x) {
31; CHECK-LABEL: is_pow2_non_zero_x_maybe_z:
32; CHECK:       # %bb.0:
33; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
34; CHECK-NEXT:    leal -1(%rdi), %eax
35; CHECK-NEXT:    xorl %eax, %edi
36; CHECK-NEXT:    cmpl %eax, %edi
37; CHECK-NEXT:    seta %al
38; CHECK-NEXT:    retq
39  %cnt = call i32 @llvm.ctpop.i32(i32 %x)
40  %r = icmp eq i32 %cnt, 1
41  ret i1 %r
42}
43
44define i1 @neither_pow2_non_zero(i32 %xin) {
45; CHECK-NOBMI-LABEL: neither_pow2_non_zero:
46; CHECK-NOBMI:       # %bb.0:
47; CHECK-NOBMI-NEXT:    # kill: def $edi killed $edi def $rdi
48; CHECK-NOBMI-NEXT:    orl $256, %edi # imm = 0x100
49; CHECK-NOBMI-NEXT:    leal -1(%rdi), %eax
50; CHECK-NOBMI-NEXT:    testl %eax, %edi
51; CHECK-NOBMI-NEXT:    setne %al
52; CHECK-NOBMI-NEXT:    retq
53;
54; CHECK-BMI2-LABEL: neither_pow2_non_zero:
55; CHECK-BMI2:       # %bb.0:
56; CHECK-BMI2-NEXT:    orl $256, %edi # imm = 0x100
57; CHECK-BMI2-NEXT:    blsrl %edi, %eax
58; CHECK-BMI2-NEXT:    setne %al
59; CHECK-BMI2-NEXT:    retq
60  %x = or i32 %xin, 256
61  %cnt = call i32 @llvm.ctpop.i32(i32 %x)
62  %r = icmp ne i32 %cnt, 1
63  ret i1 %r
64}
65
66define <4 x i1> @is_pow2_non_zero_4xv64(<4 x i64> %xin) {
67; CHECK-NOBMI-LABEL: is_pow2_non_zero_4xv64:
68; CHECK-NOBMI:       # %bb.0:
69; CHECK-NOBMI-NEXT:    movdqa {{.*#+}} xmm2 = [256,256]
70; CHECK-NOBMI-NEXT:    por %xmm2, %xmm0
71; CHECK-NOBMI-NEXT:    por %xmm2, %xmm1
72; CHECK-NOBMI-NEXT:    pcmpeqd %xmm2, %xmm2
73; CHECK-NOBMI-NEXT:    movdqa %xmm1, %xmm3
74; CHECK-NOBMI-NEXT:    paddq %xmm2, %xmm3
75; CHECK-NOBMI-NEXT:    pand %xmm1, %xmm3
76; CHECK-NOBMI-NEXT:    pxor %xmm1, %xmm1
77; CHECK-NOBMI-NEXT:    pcmpeqd %xmm1, %xmm3
78; CHECK-NOBMI-NEXT:    paddq %xmm0, %xmm2
79; CHECK-NOBMI-NEXT:    pand %xmm2, %xmm0
80; CHECK-NOBMI-NEXT:    pcmpeqd %xmm1, %xmm0
81; CHECK-NOBMI-NEXT:    movdqa %xmm0, %xmm1
82; CHECK-NOBMI-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,3],xmm3[1,3]
83; CHECK-NOBMI-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
84; CHECK-NOBMI-NEXT:    andps %xmm1, %xmm0
85; CHECK-NOBMI-NEXT:    retq
86;
87; CHECK-AVX2-LABEL: is_pow2_non_zero_4xv64:
88; CHECK-AVX2:       # %bb.0:
89; CHECK-AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [256,256,256,256]
90; CHECK-AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
91; CHECK-AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
92; CHECK-AVX2-NEXT:    vpaddq %ymm1, %ymm0, %ymm1
93; CHECK-AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
94; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
95; CHECK-AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
96; CHECK-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
97; CHECK-AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
98; CHECK-AVX2-NEXT:    vzeroupper
99; CHECK-AVX2-NEXT:    retq
100;
101; CHECK-AVX512-LABEL: is_pow2_non_zero_4xv64:
102; CHECK-AVX512:       # %bb.0:
103; CHECK-AVX512-NEXT:    vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
104; CHECK-AVX512-NEXT:    vpopcntq %ymm0, %ymm0
105; CHECK-AVX512-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
106; CHECK-AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
107; CHECK-AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
108; CHECK-AVX512-NEXT:    vzeroupper
109; CHECK-AVX512-NEXT:    retq
110  %x = or <4 x i64> %xin, <i64 256, i64 256, i64 256, i64 256>
111  %cnt = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %x)
112  %r = icmp eq <4 x i64> %cnt, <i64 1, i64 1, i64 1, i64 1>
113  ret <4 x i1> %r
114}
115
116define <4 x i1> @neither_pow2_non_zero_4xv64(<4 x i64> %xin) {
117; CHECK-NOBMI-LABEL: neither_pow2_non_zero_4xv64:
118; CHECK-NOBMI:       # %bb.0:
119; CHECK-NOBMI-NEXT:    movdqa {{.*#+}} xmm2 = [256,256]
120; CHECK-NOBMI-NEXT:    por %xmm2, %xmm0
121; CHECK-NOBMI-NEXT:    por %xmm2, %xmm1
122; CHECK-NOBMI-NEXT:    pcmpeqd %xmm2, %xmm2
123; CHECK-NOBMI-NEXT:    movdqa %xmm1, %xmm3
124; CHECK-NOBMI-NEXT:    paddq %xmm2, %xmm3
125; CHECK-NOBMI-NEXT:    pand %xmm1, %xmm3
126; CHECK-NOBMI-NEXT:    pxor %xmm1, %xmm1
127; CHECK-NOBMI-NEXT:    pcmpeqd %xmm1, %xmm3
128; CHECK-NOBMI-NEXT:    movdqa %xmm0, %xmm4
129; CHECK-NOBMI-NEXT:    paddq %xmm2, %xmm4
130; CHECK-NOBMI-NEXT:    pand %xmm4, %xmm0
131; CHECK-NOBMI-NEXT:    pcmpeqd %xmm1, %xmm0
132; CHECK-NOBMI-NEXT:    movdqa %xmm0, %xmm1
133; CHECK-NOBMI-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,3],xmm3[1,3]
134; CHECK-NOBMI-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
135; CHECK-NOBMI-NEXT:    andps %xmm1, %xmm0
136; CHECK-NOBMI-NEXT:    xorps %xmm2, %xmm0
137; CHECK-NOBMI-NEXT:    retq
138;
139; CHECK-AVX2-LABEL: neither_pow2_non_zero_4xv64:
140; CHECK-AVX2:       # %bb.0:
141; CHECK-AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [256,256,256,256]
142; CHECK-AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
143; CHECK-AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
144; CHECK-AVX2-NEXT:    vpaddq %ymm1, %ymm0, %ymm2
145; CHECK-AVX2-NEXT:    vpand %ymm2, %ymm0, %ymm0
146; CHECK-AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
147; CHECK-AVX2-NEXT:    vpcmpeqq %ymm2, %ymm0, %ymm0
148; CHECK-AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
149; CHECK-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
150; CHECK-AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
151; CHECK-AVX2-NEXT:    vzeroupper
152; CHECK-AVX2-NEXT:    retq
153;
154; CHECK-AVX512-LABEL: neither_pow2_non_zero_4xv64:
155; CHECK-AVX512:       # %bb.0:
156; CHECK-AVX512-NEXT:    vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
157; CHECK-AVX512-NEXT:    vpopcntq %ymm0, %ymm0
158; CHECK-AVX512-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
159; CHECK-AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
160; CHECK-AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
161; CHECK-AVX512-NEXT:    vzeroupper
162; CHECK-AVX512-NEXT:    retq
163  %x = or <4 x i64> %xin, <i64 256, i64 256, i64 256, i64 256>
164  %cnt = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %x)
165  %r = icmp ne <4 x i64> %cnt, <i64 1, i64 1, i64 1, i64 1>
166  ret <4 x i1> %r
167}
168
169define <4 x i1> @neither_pow2_non_zero_4xv64_x_maybe_z(<4 x i64> %x) {
170; CHECK-NOBMI-LABEL: neither_pow2_non_zero_4xv64_x_maybe_z:
171; CHECK-NOBMI:       # %bb.0:
172; CHECK-NOBMI-NEXT:    pcmpeqd %xmm2, %xmm2
173; CHECK-NOBMI-NEXT:    movdqa %xmm1, %xmm3
174; CHECK-NOBMI-NEXT:    paddq %xmm2, %xmm3
175; CHECK-NOBMI-NEXT:    movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456]
176; CHECK-NOBMI-NEXT:    pxor %xmm4, %xmm3
177; CHECK-NOBMI-NEXT:    pxor %xmm3, %xmm1
178; CHECK-NOBMI-NEXT:    movdqa %xmm1, %xmm5
179; CHECK-NOBMI-NEXT:    pcmpgtd %xmm3, %xmm5
180; CHECK-NOBMI-NEXT:    movdqa %xmm0, %xmm6
181; CHECK-NOBMI-NEXT:    paddq %xmm2, %xmm6
182; CHECK-NOBMI-NEXT:    pxor %xmm4, %xmm6
183; CHECK-NOBMI-NEXT:    pxor %xmm6, %xmm0
184; CHECK-NOBMI-NEXT:    movdqa %xmm0, %xmm4
185; CHECK-NOBMI-NEXT:    pcmpgtd %xmm6, %xmm4
186; CHECK-NOBMI-NEXT:    movdqa %xmm4, %xmm7
187; CHECK-NOBMI-NEXT:    shufps {{.*#+}} xmm7 = xmm7[0,2],xmm5[0,2]
188; CHECK-NOBMI-NEXT:    pcmpeqd %xmm3, %xmm1
189; CHECK-NOBMI-NEXT:    pcmpeqd %xmm6, %xmm0
190; CHECK-NOBMI-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
191; CHECK-NOBMI-NEXT:    andps %xmm7, %xmm0
192; CHECK-NOBMI-NEXT:    shufps {{.*#+}} xmm4 = xmm4[1,3],xmm5[1,3]
193; CHECK-NOBMI-NEXT:    orps %xmm4, %xmm0
194; CHECK-NOBMI-NEXT:    xorps %xmm2, %xmm0
195; CHECK-NOBMI-NEXT:    retq
196;
197; CHECK-AVX2-LABEL: neither_pow2_non_zero_4xv64_x_maybe_z:
198; CHECK-AVX2:       # %bb.0:
199; CHECK-AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
200; CHECK-AVX2-NEXT:    vpaddq %ymm1, %ymm0, %ymm2
201; CHECK-AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
202; CHECK-AVX2-NEXT:    vpxor %ymm3, %ymm2, %ymm2
203; CHECK-AVX2-NEXT:    vpxor %ymm0, %ymm2, %ymm0
204; CHECK-AVX2-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm0
205; CHECK-AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
206; CHECK-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
207; CHECK-AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
208; CHECK-AVX2-NEXT:    vzeroupper
209; CHECK-AVX2-NEXT:    retq
210;
211; CHECK-AVX512-LABEL: neither_pow2_non_zero_4xv64_x_maybe_z:
212; CHECK-AVX512:       # %bb.0:
213; CHECK-AVX512-NEXT:    vpopcntq %ymm0, %ymm0
214; CHECK-AVX512-NEXT:    vpcmpneqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
215; CHECK-AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
216; CHECK-AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
217; CHECK-AVX512-NEXT:    vzeroupper
218; CHECK-AVX512-NEXT:    retq
219  %cnt = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %x)
220  %r = icmp ne <4 x i64> %cnt, <i64 1, i64 1, i64 1, i64 1>
221  ret <4 x i1> %r
222}
223
224
225define i1 @ctpop32_eq_one_nonzero(i32 %x) {
226; CHECK-NOBMI-LABEL: ctpop32_eq_one_nonzero:
227; CHECK-NOBMI:       # %bb.0: # %entry
228; CHECK-NOBMI-NEXT:    # kill: def $edi killed $edi def $rdi
229; CHECK-NOBMI-NEXT:    leal -1(%rdi), %eax
230; CHECK-NOBMI-NEXT:    testl %eax, %edi
231; CHECK-NOBMI-NEXT:    sete %al
232; CHECK-NOBMI-NEXT:    retq
233;
234; CHECK-BMI2-LABEL: ctpop32_eq_one_nonzero:
235; CHECK-BMI2:       # %bb.0: # %entry
236; CHECK-BMI2-NEXT:    blsrl %edi, %eax
237; CHECK-BMI2-NEXT:    sete %al
238; CHECK-BMI2-NEXT:    retq
239entry:
240  %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
241  %cmp = icmp eq i32 %popcnt, 1
242  ret i1 %cmp
243}
244
245define i1 @ctpop32_ne_one_nonzero(i32 %x) {
246; CHECK-NOBMI-LABEL: ctpop32_ne_one_nonzero:
247; CHECK-NOBMI:       # %bb.0: # %entry
248; CHECK-NOBMI-NEXT:    # kill: def $edi killed $edi def $rdi
249; CHECK-NOBMI-NEXT:    leal -1(%rdi), %eax
250; CHECK-NOBMI-NEXT:    testl %eax, %edi
251; CHECK-NOBMI-NEXT:    setne %al
252; CHECK-NOBMI-NEXT:    retq
253;
254; CHECK-BMI2-LABEL: ctpop32_ne_one_nonzero:
255; CHECK-BMI2:       # %bb.0: # %entry
256; CHECK-BMI2-NEXT:    blsrl %edi, %eax
257; CHECK-BMI2-NEXT:    setne %al
258; CHECK-BMI2-NEXT:    retq
259entry:
260  %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
261  %cmp = icmp ne i32 %popcnt, 1
262  ret i1 %cmp
263}
264