xref: /llvm-project/llvm/test/CodeGen/PowerPC/popcnt-zext.ll (revision a51712751c184ebe056718c938d2526693a31564)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mcpu=ppc -mtriple=powerpc64-- -mattr=+popcntd < %s      | FileCheck %s --check-prefix=FAST
3; RUN: llc -verify-machineinstrs -mcpu=ppc -mtriple=powerpc64-- -mattr=+slow-popcntd < %s | FileCheck %s --check-prefix=SLOW
4; RUN: llc -verify-machineinstrs -mcpu=ppc -mtriple=powerpc64-ibm-aix-xcoff -mattr=+popcntd < %s      | FileCheck %s --check-prefix=FAST
5; RUN: llc -verify-machineinstrs -mcpu=ppc -mtriple=powerpc64-ibm-aix-xcoff -mattr=+slow-popcntd < %s | FileCheck %s --check-prefix=SLOW
6
7define i16 @zpop_i8_i16(i8 %x) {
8; FAST-LABEL: zpop_i8_i16:
9; FAST:       # %bb.0:
10; FAST-NEXT:    clrldi 3, 3, 56
11; FAST-NEXT:    popcntd 3, 3
12; FAST-NEXT:    blr
13;
14; SLOW-LABEL: zpop_i8_i16:
15; SLOW:       # %bb.0:
16; SLOW-NEXT:    clrlwi 4, 3, 24
17; SLOW-NEXT:    rotlwi 3, 3, 31
18; SLOW-NEXT:    andi. 3, 3, 85
19; SLOW-NEXT:    sub 3, 4, 3
20; SLOW-NEXT:    andi. 4, 3, 13107
21; SLOW-NEXT:    rotlwi 3, 3, 30
22; SLOW-NEXT:    andi. 3, 3, 13107
23; SLOW-NEXT:    add 3, 4, 3
24; SLOW-NEXT:    srwi 4, 3, 4
25; SLOW-NEXT:    add 3, 3, 4
26; SLOW-NEXT:    rlwinm 4, 3, 24, 28, 31
27; SLOW-NEXT:    clrlwi 3, 3, 28
28; SLOW-NEXT:    add 3, 3, 4
29; SLOW-NEXT:    blr
30  %z = zext i8 %x to i16
31  %pop = tail call i16 @llvm.ctpop.i16(i16 %z)
32  ret i16 %pop
33}
34
35define i16 @popz_i8_i16(i8 %x) {
36; FAST-LABEL: popz_i8_i16:
37; FAST:       # %bb.0:
38; FAST-NEXT:    clrldi 3, 3, 56
39; FAST-NEXT:    popcntd 3, 3
40; FAST-NEXT:    blr
41;
42; SLOW-LABEL: popz_i8_i16:
43; SLOW:       # %bb.0:
44; SLOW-NEXT:    rotlwi 4, 3, 31
45; SLOW-NEXT:    andi. 4, 4, 85
46; SLOW-NEXT:    sub 3, 3, 4
47; SLOW-NEXT:    rlwinm 4, 3, 30, 30, 31
48; SLOW-NEXT:    rlwimi 4, 3, 30, 26, 27
49; SLOW-NEXT:    andi. 3, 3, 51
50; SLOW-NEXT:    add 3, 3, 4
51; SLOW-NEXT:    srwi 4, 3, 4
52; SLOW-NEXT:    add 3, 3, 4
53; SLOW-NEXT:    clrlwi 3, 3, 28
54; SLOW-NEXT:    blr
55  %pop = tail call i8 @llvm.ctpop.i8(i8 %x)
56  %z = zext i8 %pop to i16
57  ret i16 %z
58}
59
60define i32 @zpop_i8_i32(i8 %x) {
61; FAST-LABEL: zpop_i8_i32:
62; FAST:       # %bb.0:
63; FAST-NEXT:    clrlwi 3, 3, 24
64; FAST-NEXT:    popcntw 3, 3
65; FAST-NEXT:    blr
66;
67; SLOW-LABEL: zpop_i8_i32:
68; SLOW:       # %bb.0:
69; SLOW-NEXT:    clrlwi 5, 3, 24
70; SLOW-NEXT:    rotlwi 3, 3, 31
71; SLOW-NEXT:    andi. 3, 3, 85
72; SLOW-NEXT:    lis 4, 13107
73; SLOW-NEXT:    sub 3, 5, 3
74; SLOW-NEXT:    ori 4, 4, 13107
75; SLOW-NEXT:    rotlwi 5, 3, 30
76; SLOW-NEXT:    and 3, 3, 4
77; SLOW-NEXT:    andis. 4, 5, 13107
78; SLOW-NEXT:    andi. 5, 5, 13107
79; SLOW-NEXT:    or 4, 5, 4
80; SLOW-NEXT:    add 3, 3, 4
81; SLOW-NEXT:    lis 5, 3855
82; SLOW-NEXT:    srwi 4, 3, 4
83; SLOW-NEXT:    add 3, 3, 4
84; SLOW-NEXT:    lis 4, 257
85; SLOW-NEXT:    ori 5, 5, 3855
86; SLOW-NEXT:    and 3, 3, 5
87; SLOW-NEXT:    ori 4, 4, 257
88; SLOW-NEXT:    mullw 3, 3, 4
89; SLOW-NEXT:    srwi 3, 3, 24
90; SLOW-NEXT:    blr
91  %z = zext i8 %x to i32
92  %pop = tail call i32 @llvm.ctpop.i32(i32 %z)
93  ret i32 %pop
94}
95
96define i32 @popz_i8_32(i8 %x) {
97; FAST-LABEL: popz_i8_32:
98; FAST:       # %bb.0:
99; FAST-NEXT:    clrldi 3, 3, 56
100; FAST-NEXT:    popcntd 3, 3
101; FAST-NEXT:    blr
102;
103; SLOW-LABEL: popz_i8_32:
104; SLOW:       # %bb.0:
105; SLOW-NEXT:    rotlwi 4, 3, 31
106; SLOW-NEXT:    andi. 4, 4, 85
107; SLOW-NEXT:    sub 3, 3, 4
108; SLOW-NEXT:    rlwinm 4, 3, 30, 30, 31
109; SLOW-NEXT:    rlwimi 4, 3, 30, 26, 27
110; SLOW-NEXT:    andi. 3, 3, 51
111; SLOW-NEXT:    add 3, 3, 4
112; SLOW-NEXT:    srwi 4, 3, 4
113; SLOW-NEXT:    add 3, 3, 4
114; SLOW-NEXT:    clrlwi 3, 3, 28
115; SLOW-NEXT:    blr
116  %pop = tail call i8 @llvm.ctpop.i8(i8 %x)
117  %z = zext i8 %pop to i32
118  ret i32 %z
119}
120
121define i32 @zpop_i16_i32(i16 %x) {
122; FAST-LABEL: zpop_i16_i32:
123; FAST:       # %bb.0:
124; FAST-NEXT:    clrlwi 3, 3, 16
125; FAST-NEXT:    popcntw 3, 3
126; FAST-NEXT:    blr
127;
128; SLOW-LABEL: zpop_i16_i32:
129; SLOW:       # %bb.0:
130; SLOW-NEXT:    clrlwi 5, 3, 16
131; SLOW-NEXT:    rotlwi 3, 3, 31
132; SLOW-NEXT:    andi. 3, 3, 21845
133; SLOW-NEXT:    lis 4, 13107
134; SLOW-NEXT:    sub 3, 5, 3
135; SLOW-NEXT:    ori 4, 4, 13107
136; SLOW-NEXT:    rotlwi 5, 3, 30
137; SLOW-NEXT:    and 3, 3, 4
138; SLOW-NEXT:    andis. 4, 5, 13107
139; SLOW-NEXT:    andi. 5, 5, 13107
140; SLOW-NEXT:    or 4, 5, 4
141; SLOW-NEXT:    add 3, 3, 4
142; SLOW-NEXT:    lis 5, 3855
143; SLOW-NEXT:    srwi 4, 3, 4
144; SLOW-NEXT:    add 3, 3, 4
145; SLOW-NEXT:    lis 4, 257
146; SLOW-NEXT:    ori 5, 5, 3855
147; SLOW-NEXT:    and 3, 3, 5
148; SLOW-NEXT:    ori 4, 4, 257
149; SLOW-NEXT:    mullw 3, 3, 4
150; SLOW-NEXT:    srwi 3, 3, 24
151; SLOW-NEXT:    blr
152  %z = zext i16 %x to i32
153  %pop = tail call i32 @llvm.ctpop.i32(i32 %z)
154  ret i32 %pop
155}
156
157define i32 @popz_i16_32(i16 %x) {
158; FAST-LABEL: popz_i16_32:
159; FAST:       # %bb.0:
160; FAST-NEXT:    clrldi 3, 3, 48
161; FAST-NEXT:    popcntd 3, 3
162; FAST-NEXT:    blr
163;
164; SLOW-LABEL: popz_i16_32:
165; SLOW:       # %bb.0:
166; SLOW-NEXT:    rotlwi 4, 3, 31
167; SLOW-NEXT:    andi. 4, 4, 21845
168; SLOW-NEXT:    sub 3, 3, 4
169; SLOW-NEXT:    andi. 4, 3, 13107
170; SLOW-NEXT:    rotlwi 3, 3, 30
171; SLOW-NEXT:    andi. 3, 3, 13107
172; SLOW-NEXT:    add 3, 4, 3
173; SLOW-NEXT:    srwi 4, 3, 4
174; SLOW-NEXT:    add 3, 3, 4
175; SLOW-NEXT:    rlwinm 4, 3, 24, 28, 31
176; SLOW-NEXT:    clrlwi 3, 3, 28
177; SLOW-NEXT:    add 3, 3, 4
178; SLOW-NEXT:    clrldi 3, 3, 32
179; SLOW-NEXT:    blr
180  %pop = tail call i16 @llvm.ctpop.i16(i16 %x)
181  %z = zext i16 %pop to i32
182  ret i32 %z
183}
184
185define i64 @zpop_i32_i64(i32 %x) {
186; FAST-LABEL: zpop_i32_i64:
187; FAST:       # %bb.0:
188; FAST-NEXT:    clrldi 3, 3, 32
189; FAST-NEXT:    popcntd 3, 3
190; FAST-NEXT:    blr
191;
192; SLOW-LABEL: zpop_i32_i64:
193; SLOW:       # %bb.0:
194; SLOW-NEXT:    rlwinm 5, 3, 31, 1, 0
195; SLOW-NEXT:    lis 4, 13107
196; SLOW-NEXT:    andis. 6, 5, 21845
197; SLOW-NEXT:    andi. 5, 5, 21845
198; SLOW-NEXT:    ori 4, 4, 13107
199; SLOW-NEXT:    or 5, 5, 6
200; SLOW-NEXT:    clrldi 3, 3, 32
201; SLOW-NEXT:    rldimi 4, 4, 32, 0
202; SLOW-NEXT:    sub 3, 3, 5
203; SLOW-NEXT:    and 5, 3, 4
204; SLOW-NEXT:    rotldi 3, 3, 62
205; SLOW-NEXT:    and 3, 3, 4
206; SLOW-NEXT:    add 3, 5, 3
207; SLOW-NEXT:    lis 4, 3855
208; SLOW-NEXT:    rldicl 5, 3, 60, 4
209; SLOW-NEXT:    ori 4, 4, 3855
210; SLOW-NEXT:    add 3, 3, 5
211; SLOW-NEXT:    lis 5, 257
212; SLOW-NEXT:    rldimi 4, 4, 32, 0
213; SLOW-NEXT:    ori 5, 5, 257
214; SLOW-NEXT:    and 3, 3, 4
215; SLOW-NEXT:    rldimi 5, 5, 32, 0
216; SLOW-NEXT:    mulld 3, 3, 5
217; SLOW-NEXT:    rldicl 3, 3, 8, 56
218; SLOW-NEXT:    blr
219  %z = zext i32 %x to i64
220  %pop = tail call i64 @llvm.ctpop.i64(i64 %z)
221  ret i64 %pop
222}
223
224define i64 @popz_i32_i64(i32 %x) {
225; FAST-LABEL: popz_i32_i64:
226; FAST:       # %bb.0:
227; FAST-NEXT:    popcntw 3, 3
228; FAST-NEXT:    clrldi 3, 3, 32
229; FAST-NEXT:    blr
230;
231; SLOW-LABEL: popz_i32_i64:
232; SLOW:       # %bb.0:
233; SLOW-NEXT:    rotlwi 5, 3, 31
234; SLOW-NEXT:    andis. 6, 5, 21845
235; SLOW-NEXT:    andi. 5, 5, 21845
236; SLOW-NEXT:    or 5, 5, 6
237; SLOW-NEXT:    lis 4, 13107
238; SLOW-NEXT:    sub 3, 3, 5
239; SLOW-NEXT:    ori 4, 4, 13107
240; SLOW-NEXT:    rotlwi 5, 3, 30
241; SLOW-NEXT:    and 3, 3, 4
242; SLOW-NEXT:    andis. 4, 5, 13107
243; SLOW-NEXT:    andi. 5, 5, 13107
244; SLOW-NEXT:    or 4, 5, 4
245; SLOW-NEXT:    add 3, 3, 4
246; SLOW-NEXT:    lis 5, 3855
247; SLOW-NEXT:    srwi 4, 3, 4
248; SLOW-NEXT:    add 3, 3, 4
249; SLOW-NEXT:    lis 4, 257
250; SLOW-NEXT:    ori 5, 5, 3855
251; SLOW-NEXT:    and 3, 3, 5
252; SLOW-NEXT:    ori 4, 4, 257
253; SLOW-NEXT:    mullw 3, 3, 4
254; SLOW-NEXT:    rlwinm 3, 3, 8, 24, 31
255; SLOW-NEXT:    blr
256  %pop = tail call i32 @llvm.ctpop.i32(i32 %x)
257  %z = zext i32 %pop to i64
258  ret i64 %z
259}
260
261define i64 @popa_i16_i64(i16 %x) {
262; FAST-LABEL: popa_i16_i64:
263; FAST:       # %bb.0:
264; FAST-NEXT:    clrldi 3, 3, 48
265; FAST-NEXT:    popcntd 3, 3
266; FAST-NEXT:    rlwinm 3, 3, 0, 27, 27
267; FAST-NEXT:    blr
268;
269; SLOW-LABEL: popa_i16_i64:
270; SLOW:       # %bb.0:
271; SLOW-NEXT:    rotlwi 4, 3, 31
272; SLOW-NEXT:    andi. 4, 4, 21845
273; SLOW-NEXT:    sub 3, 3, 4
274; SLOW-NEXT:    andi. 4, 3, 13107
275; SLOW-NEXT:    rotlwi 3, 3, 30
276; SLOW-NEXT:    andi. 3, 3, 13107
277; SLOW-NEXT:    add 3, 4, 3
278; SLOW-NEXT:    srwi 4, 3, 4
279; SLOW-NEXT:    add 3, 3, 4
280; SLOW-NEXT:    rlwinm 4, 3, 24, 28, 31
281; SLOW-NEXT:    clrlwi 3, 3, 28
282; SLOW-NEXT:    add 3, 3, 4
283; SLOW-NEXT:    rlwinm 3, 3, 0, 27, 27
284; SLOW-NEXT:    blr
285  %pop = call i16 @llvm.ctpop.i16(i16 %x)
286  %z = zext i16 %pop to i64 ; SimplifyDemandedBits may turn zext (or sext) into aext
287  %a = and i64 %z, 16
288  ret i64 %a
289}
290
291declare i8 @llvm.ctpop.i8(i8) nounwind readnone
292declare i16 @llvm.ctpop.i16(i16) nounwind readnone
293declare i32 @llvm.ctpop.i32(i32) nounwind readnone
294declare i64 @llvm.ctpop.i64(i64) nounwind readnone
295