xref: /llvm-project/llvm/test/CodeGen/AArch64/arm64-popcnt.ll (revision 4c853be6673fd95b4b900a6c0e1804bf33a0629c)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
3; RUN: llc < %s -mtriple=aarch64 -mattr -neon -aarch64-neon-syntax=apple | FileCheck -check-prefix=CHECK-NONEON %s
4; RUN: llc < %s -mtriple=aarch64 -mattr +cssc -aarch64-neon-syntax=apple | FileCheck -check-prefix=CHECK-CSSC %s
5
6define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
7; CHECK-LABEL: cnt32_advsimd:
8; CHECK:       // %bb.0:
9; CHECK-NEXT:    fmov s0, w0
10; CHECK-NEXT:    cnt.8b v0, v0
11; CHECK-NEXT:    addv.8b b0, v0
12; CHECK-NEXT:    fmov w0, s0
13; CHECK-NEXT:    ret
14;
15; CHECK-NONEON-LABEL: cnt32_advsimd:
16; CHECK-NONEON:       // %bb.0:
17; CHECK-NONEON-NEXT:    lsr w9, w0, #1
18; CHECK-NONEON-NEXT:    mov w8, #16843009 // =0x1010101
19; CHECK-NONEON-NEXT:    and w9, w9, #0x55555555
20; CHECK-NONEON-NEXT:    sub w9, w0, w9
21; CHECK-NONEON-NEXT:    lsr w10, w9, #2
22; CHECK-NONEON-NEXT:    and w9, w9, #0x33333333
23; CHECK-NONEON-NEXT:    and w10, w10, #0x33333333
24; CHECK-NONEON-NEXT:    add w9, w9, w10
25; CHECK-NONEON-NEXT:    add w9, w9, w9, lsr #4
26; CHECK-NONEON-NEXT:    and w9, w9, #0xf0f0f0f
27; CHECK-NONEON-NEXT:    mul w8, w9, w8
28; CHECK-NONEON-NEXT:    lsr w0, w8, #24
29; CHECK-NONEON-NEXT:    ret
30;
31; CHECK-CSSC-LABEL: cnt32_advsimd:
32; CHECK-CSSC:       // %bb.0:
33; CHECK-CSSC-NEXT:    cnt w0, w0
34; CHECK-CSSC-NEXT:    ret
35  %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
36  ret i32 %cnt
37}
38
39define i32 @cnt32_advsimd_2(<2 x i32> %x) {
40; CHECK-LABEL: cnt32_advsimd_2:
41; CHECK:       // %bb.0:
42; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
43; CHECK-NEXT:    fmov w8, s0
44; CHECK-NEXT:    fmov s0, w8
45; CHECK-NEXT:    cnt.8b v0, v0
46; CHECK-NEXT:    addv.8b b0, v0
47; CHECK-NEXT:    fmov w0, s0
48; CHECK-NEXT:    ret
49;
50; CHECK-NONEON-LABEL: cnt32_advsimd_2:
51; CHECK-NONEON:       // %bb.0:
52; CHECK-NONEON-NEXT:    lsr w9, w0, #1
53; CHECK-NONEON-NEXT:    mov w8, #16843009 // =0x1010101
54; CHECK-NONEON-NEXT:    and w9, w9, #0x55555555
55; CHECK-NONEON-NEXT:    sub w9, w0, w9
56; CHECK-NONEON-NEXT:    lsr w10, w9, #2
57; CHECK-NONEON-NEXT:    and w9, w9, #0x33333333
58; CHECK-NONEON-NEXT:    and w10, w10, #0x33333333
59; CHECK-NONEON-NEXT:    add w9, w9, w10
60; CHECK-NONEON-NEXT:    add w9, w9, w9, lsr #4
61; CHECK-NONEON-NEXT:    and w9, w9, #0xf0f0f0f
62; CHECK-NONEON-NEXT:    mul w8, w9, w8
63; CHECK-NONEON-NEXT:    lsr w0, w8, #24
64; CHECK-NONEON-NEXT:    ret
65;
66; CHECK-CSSC-LABEL: cnt32_advsimd_2:
67; CHECK-CSSC:       // %bb.0:
68; CHECK-CSSC-NEXT:    // kill: def $d0 killed $d0 def $q0
69; CHECK-CSSC-NEXT:    fmov w8, s0
70; CHECK-CSSC-NEXT:    cnt w0, w8
71; CHECK-CSSC-NEXT:    ret
72  %1 = extractelement <2 x i32> %x, i64 0
73  %2 = tail call i32 @llvm.ctpop.i32(i32 %1)
74  ret i32 %2
75}
76
77define i64 @cnt64_advsimd(i64 %x) nounwind readnone {
78; CHECK-LABEL: cnt64_advsimd:
79; CHECK:       // %bb.0:
80; CHECK-NEXT:    fmov d0, x0
81; CHECK-NEXT:    cnt.8b v0, v0
82; CHECK-NEXT:    addv.8b b0, v0
83; CHECK-NEXT:    fmov x0, d0
84; CHECK-NEXT:    ret
85;
86; CHECK-NONEON-LABEL: cnt64_advsimd:
87; CHECK-NONEON:       // %bb.0:
88; CHECK-NONEON-NEXT:    lsr x9, x0, #1
89; CHECK-NONEON-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
90; CHECK-NONEON-NEXT:    and x9, x9, #0x5555555555555555
91; CHECK-NONEON-NEXT:    sub x9, x0, x9
92; CHECK-NONEON-NEXT:    lsr x10, x9, #2
93; CHECK-NONEON-NEXT:    and x9, x9, #0x3333333333333333
94; CHECK-NONEON-NEXT:    and x10, x10, #0x3333333333333333
95; CHECK-NONEON-NEXT:    add x9, x9, x10
96; CHECK-NONEON-NEXT:    add x9, x9, x9, lsr #4
97; CHECK-NONEON-NEXT:    and x9, x9, #0xf0f0f0f0f0f0f0f
98; CHECK-NONEON-NEXT:    mul x8, x9, x8
99; CHECK-NONEON-NEXT:    lsr x0, x8, #56
100; CHECK-NONEON-NEXT:    ret
101;
102; CHECK-CSSC-LABEL: cnt64_advsimd:
103; CHECK-CSSC:       // %bb.0:
104; CHECK-CSSC-NEXT:    cnt x0, x0
105; CHECK-CSSC-NEXT:    ret
106  %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
107  ret i64 %cnt
108}
109
110; Do not use AdvSIMD when -mno-implicit-float is specified.
111; rdar://9473858
112
113define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat {
114; CHECK-LABEL: cnt32:
115; CHECK:       // %bb.0:
116; CHECK-NEXT:    lsr w9, w0, #1
117; CHECK-NEXT:    mov w8, #16843009 // =0x1010101
118; CHECK-NEXT:    and w9, w9, #0x55555555
119; CHECK-NEXT:    sub w9, w0, w9
120; CHECK-NEXT:    lsr w10, w9, #2
121; CHECK-NEXT:    and w9, w9, #0x33333333
122; CHECK-NEXT:    and w10, w10, #0x33333333
123; CHECK-NEXT:    add w9, w9, w10
124; CHECK-NEXT:    add w9, w9, w9, lsr #4
125; CHECK-NEXT:    and w9, w9, #0xf0f0f0f
126; CHECK-NEXT:    mul w8, w9, w8
127; CHECK-NEXT:    lsr w0, w8, #24
128; CHECK-NEXT:    ret
129;
130; CHECK-NONEON-LABEL: cnt32:
131; CHECK-NONEON:       // %bb.0:
132; CHECK-NONEON-NEXT:    lsr w9, w0, #1
133; CHECK-NONEON-NEXT:    mov w8, #16843009 // =0x1010101
134; CHECK-NONEON-NEXT:    and w9, w9, #0x55555555
135; CHECK-NONEON-NEXT:    sub w9, w0, w9
136; CHECK-NONEON-NEXT:    lsr w10, w9, #2
137; CHECK-NONEON-NEXT:    and w9, w9, #0x33333333
138; CHECK-NONEON-NEXT:    and w10, w10, #0x33333333
139; CHECK-NONEON-NEXT:    add w9, w9, w10
140; CHECK-NONEON-NEXT:    add w9, w9, w9, lsr #4
141; CHECK-NONEON-NEXT:    and w9, w9, #0xf0f0f0f
142; CHECK-NONEON-NEXT:    mul w8, w9, w8
143; CHECK-NONEON-NEXT:    lsr w0, w8, #24
144; CHECK-NONEON-NEXT:    ret
145;
146; CHECK-CSSC-LABEL: cnt32:
147; CHECK-CSSC:       // %bb.0:
148; CHECK-CSSC-NEXT:    cnt w0, w0
149; CHECK-CSSC-NEXT:    ret
150  %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
151  ret i32 %cnt
152}
153
154define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat {
155; CHECK-LABEL: cnt64:
156; CHECK:       // %bb.0:
157; CHECK-NEXT:    lsr x9, x0, #1
158; CHECK-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
159; CHECK-NEXT:    and x9, x9, #0x5555555555555555
160; CHECK-NEXT:    sub x9, x0, x9
161; CHECK-NEXT:    lsr x10, x9, #2
162; CHECK-NEXT:    and x9, x9, #0x3333333333333333
163; CHECK-NEXT:    and x10, x10, #0x3333333333333333
164; CHECK-NEXT:    add x9, x9, x10
165; CHECK-NEXT:    add x9, x9, x9, lsr #4
166; CHECK-NEXT:    and x9, x9, #0xf0f0f0f0f0f0f0f
167; CHECK-NEXT:    mul x8, x9, x8
168; CHECK-NEXT:    lsr x0, x8, #56
169; CHECK-NEXT:    ret
170;
171; CHECK-NONEON-LABEL: cnt64:
172; CHECK-NONEON:       // %bb.0:
173; CHECK-NONEON-NEXT:    lsr x9, x0, #1
174; CHECK-NONEON-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
175; CHECK-NONEON-NEXT:    and x9, x9, #0x5555555555555555
176; CHECK-NONEON-NEXT:    sub x9, x0, x9
177; CHECK-NONEON-NEXT:    lsr x10, x9, #2
178; CHECK-NONEON-NEXT:    and x9, x9, #0x3333333333333333
179; CHECK-NONEON-NEXT:    and x10, x10, #0x3333333333333333
180; CHECK-NONEON-NEXT:    add x9, x9, x10
181; CHECK-NONEON-NEXT:    add x9, x9, x9, lsr #4
182; CHECK-NONEON-NEXT:    and x9, x9, #0xf0f0f0f0f0f0f0f
183; CHECK-NONEON-NEXT:    mul x8, x9, x8
184; CHECK-NONEON-NEXT:    lsr x0, x8, #56
185; CHECK-NONEON-NEXT:    ret
186;
187; CHECK-CSSC-LABEL: cnt64:
188; CHECK-CSSC:       // %bb.0:
189; CHECK-CSSC-NEXT:    cnt x0, x0
190; CHECK-CSSC-NEXT:    ret
191  %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
192  ret i64 %cnt
193}
194
195define i32 @ctpop_eq_one(i64 %x) nounwind readnone {
196; CHECK-LABEL: ctpop_eq_one:
197; CHECK:       // %bb.0:
198; CHECK-NEXT:    sub x8, x0, #1
199; CHECK-NEXT:    eor x9, x0, x8
200; CHECK-NEXT:    cmp x9, x8
201; CHECK-NEXT:    cset w0, hi
202; CHECK-NEXT:    ret
203;
204; CHECK-NONEON-LABEL: ctpop_eq_one:
205; CHECK-NONEON:       // %bb.0:
206; CHECK-NONEON-NEXT:    sub x8, x0, #1
207; CHECK-NONEON-NEXT:    eor x9, x0, x8
208; CHECK-NONEON-NEXT:    cmp x9, x8
209; CHECK-NONEON-NEXT:    cset w0, hi
210; CHECK-NONEON-NEXT:    ret
211;
212; CHECK-CSSC-LABEL: ctpop_eq_one:
213; CHECK-CSSC:       // %bb.0:
214; CHECK-CSSC-NEXT:    cnt x8, x0
215; CHECK-CSSC-NEXT:    cmp x8, #1
216; CHECK-CSSC-NEXT:    cset w0, eq
217; CHECK-CSSC-NEXT:    ret
218  %count = tail call i64 @llvm.ctpop.i64(i64 %x)
219  %cmp = icmp eq i64 %count, 1
220  %conv = zext i1 %cmp to i32
221  ret i32 %conv
222}
223
224define i32 @ctpop_ne_one(i64 %x) nounwind readnone {
225; CHECK-LABEL: ctpop_ne_one:
226; CHECK:       // %bb.0:
227; CHECK-NEXT:    sub x8, x0, #1
228; CHECK-NEXT:    eor x9, x0, x8
229; CHECK-NEXT:    cmp x9, x8
230; CHECK-NEXT:    cset w0, ls
231; CHECK-NEXT:    ret
232;
233; CHECK-NONEON-LABEL: ctpop_ne_one:
234; CHECK-NONEON:       // %bb.0:
235; CHECK-NONEON-NEXT:    sub x8, x0, #1
236; CHECK-NONEON-NEXT:    eor x9, x0, x8
237; CHECK-NONEON-NEXT:    cmp x9, x8
238; CHECK-NONEON-NEXT:    cset w0, ls
239; CHECK-NONEON-NEXT:    ret
240;
241; CHECK-CSSC-LABEL: ctpop_ne_one:
242; CHECK-CSSC:       // %bb.0:
243; CHECK-CSSC-NEXT:    cnt x8, x0
244; CHECK-CSSC-NEXT:    cmp x8, #1
245; CHECK-CSSC-NEXT:    cset w0, ne
246; CHECK-CSSC-NEXT:    ret
247  %count = tail call i64 @llvm.ctpop.i64(i64 %x)
248  %cmp = icmp ne i64 %count, 1
249  %conv = zext i1 %cmp to i32
250  ret i32 %conv
251}
252
253define i1 @ctpop32_ne_one(i32 %x) nounwind readnone {
254; CHECK-LABEL: ctpop32_ne_one:
255; CHECK:       // %bb.0:
256; CHECK-NEXT:    sub w8, w0, #1
257; CHECK-NEXT:    eor w9, w0, w8
258; CHECK-NEXT:    cmp w9, w8
259; CHECK-NEXT:    cset w0, ls
260; CHECK-NEXT:    ret
261;
262; CHECK-NONEON-LABEL: ctpop32_ne_one:
263; CHECK-NONEON:       // %bb.0:
264; CHECK-NONEON-NEXT:    sub w8, w0, #1
265; CHECK-NONEON-NEXT:    eor w9, w0, w8
266; CHECK-NONEON-NEXT:    cmp w9, w8
267; CHECK-NONEON-NEXT:    cset w0, ls
268; CHECK-NONEON-NEXT:    ret
269;
270; CHECK-CSSC-LABEL: ctpop32_ne_one:
271; CHECK-CSSC:       // %bb.0:
272; CHECK-CSSC-NEXT:    cnt w8, w0
273; CHECK-CSSC-NEXT:    cmp w8, #1
274; CHECK-CSSC-NEXT:    cset w0, ne
275; CHECK-CSSC-NEXT:    ret
276  %count = tail call i32 @llvm.ctpop.i32(i32 %x)
277  %cmp = icmp ne i32 %count, 1
278  ret i1 %cmp
279}
280
281define i1 @ctpop32_eq_one_nonzero(i32 %x) {
282; CHECK-LABEL: ctpop32_eq_one_nonzero:
283; CHECK:       // %bb.0: // %entry
284; CHECK-NEXT:    sub w8, w0, #1
285; CHECK-NEXT:    tst w0, w8
286; CHECK-NEXT:    cset w0, eq
287; CHECK-NEXT:    ret
288;
289; CHECK-NONEON-LABEL: ctpop32_eq_one_nonzero:
290; CHECK-NONEON:       // %bb.0: // %entry
291; CHECK-NONEON-NEXT:    sub w8, w0, #1
292; CHECK-NONEON-NEXT:    tst w0, w8
293; CHECK-NONEON-NEXT:    cset w0, eq
294; CHECK-NONEON-NEXT:    ret
295;
296; CHECK-CSSC-LABEL: ctpop32_eq_one_nonzero:
297; CHECK-CSSC:       // %bb.0: // %entry
298; CHECK-CSSC-NEXT:    sub w8, w0, #1
299; CHECK-CSSC-NEXT:    tst w0, w8
300; CHECK-CSSC-NEXT:    cset w0, eq
301; CHECK-CSSC-NEXT:    ret
302entry:
303  %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
304  %cmp = icmp eq i32 %popcnt, 1
305  ret i1 %cmp
306}
307
308define i1 @ctpop32_ne_one_nonzero(i32 %x) {
309; CHECK-LABEL: ctpop32_ne_one_nonzero:
310; CHECK:       // %bb.0: // %entry
311; CHECK-NEXT:    sub w8, w0, #1
312; CHECK-NEXT:    tst w0, w8
313; CHECK-NEXT:    cset w0, ne
314; CHECK-NEXT:    ret
315;
316; CHECK-NONEON-LABEL: ctpop32_ne_one_nonzero:
317; CHECK-NONEON:       // %bb.0: // %entry
318; CHECK-NONEON-NEXT:    sub w8, w0, #1
319; CHECK-NONEON-NEXT:    tst w0, w8
320; CHECK-NONEON-NEXT:    cset w0, ne
321; CHECK-NONEON-NEXT:    ret
322;
323; CHECK-CSSC-LABEL: ctpop32_ne_one_nonzero:
324; CHECK-CSSC:       // %bb.0: // %entry
325; CHECK-CSSC-NEXT:    sub w8, w0, #1
326; CHECK-CSSC-NEXT:    tst w0, w8
327; CHECK-CSSC-NEXT:    cset w0, ne
328; CHECK-CSSC-NEXT:    ret
329entry:
330  %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
331  %cmp = icmp ne i32 %popcnt, 1
332  ret i1 %cmp
333}
334
335declare i32 @llvm.ctpop.i32(i32) nounwind readnone
336declare i64 @llvm.ctpop.i64(i64) nounwind readnone
337