xref: /llvm-project/llvm/test/CodeGen/AArch64/popcount.ll (revision 4c853be6673fd95b4b900a6c0e1804bf33a0629c)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -O0 -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefix=CHECKO0
3; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mattr=+neon | FileCheck %s --check-prefixes=CHECK,NEON
4; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mattr=+neon,+dotprod | FileCheck %s --check-prefixes=CHECK,DOT
5; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mattr=+sve | FileCheck %s --check-prefixes=CHECK,SVE
6; RUN: llc < %s -global-isel -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefix=GISEL
7; RUN: llc < %s -O0 -global-isel -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefix=GISELO0
8; RUN: llc < %s -global-isel -mtriple=aarch64-unknown-unknown -mattr=+neon | FileCheck %s --check-prefixes=GISEL,NEON-GISEL
9; RUN: llc < %s -global-isel -mtriple=aarch64-unknown-unknown -mattr=+neon,+dotprod | FileCheck %s --check-prefixes=GISEL,DOT-GISEL
10; RUN: llc < %s -global-isel -mtriple=aarch64-unknown-unknown -mattr=+sve | FileCheck %s --check-prefixes=GISEL,SVE-GISEL
11
12
13; Function Attrs: nobuiltin nounwind readonly
14define i8 @popcount128(ptr nocapture nonnull readonly %0) {
15; CHECKO0-LABEL: popcount128:
16; CHECKO0:       // %bb.0: // %Entry
17; CHECKO0-NEXT:    ldr q0, [x0]
18; CHECKO0-NEXT:    cnt v0.16b, v0.16b
19; CHECKO0-NEXT:    uaddlv h0, v0.16b
20; CHECKO0-NEXT:    // kill: def $q0 killed $h0
21; CHECKO0-NEXT:    // kill: def $s0 killed $s0 killed $q0
22; CHECKO0-NEXT:    fmov w0, s0
23; CHECKO0-NEXT:    ret
24;
25; CHECK-LABEL: popcount128:
26; CHECK:       // %bb.0: // %Entry
27; CHECK-NEXT:    ldr d0, [x0]
28; CHECK-NEXT:    add x8, x0, #8
29; CHECK-NEXT:    ld1 { v0.d }[1], [x8]
30; CHECK-NEXT:    cnt v0.16b, v0.16b
31; CHECK-NEXT:    addv b0, v0.16b
32; CHECK-NEXT:    fmov w0, s0
33; CHECK-NEXT:    ret
34;
35; GISEL-LABEL: popcount128:
36; GISEL:       // %bb.0: // %Entry
37; GISEL-NEXT:    ldr q0, [x0]
38; GISEL-NEXT:    cnt v0.16b, v0.16b
39; GISEL-NEXT:    uaddlv h0, v0.16b
40; GISEL-NEXT:    fmov w0, s0
41; GISEL-NEXT:    ret
42;
43; GISELO0-LABEL: popcount128:
44; GISELO0:       // %bb.0: // %Entry
45; GISELO0-NEXT:    ldr q0, [x0]
46; GISELO0-NEXT:    cnt v0.16b, v0.16b
47; GISELO0-NEXT:    uaddlv h0, v0.16b
48; GISELO0-NEXT:    // kill: def $q0 killed $h0
49; GISELO0-NEXT:    // kill: def $s0 killed $s0 killed $q0
50; GISELO0-NEXT:    fmov w0, s0
51; GISELO0-NEXT:    ret
52Entry:
53  %1 = load i128, ptr %0, align 16
54  %2 = tail call i128 @llvm.ctpop.i128(i128 %1)
55  %3 = trunc i128 %2 to i8
56  ret i8 %3
57}
58
59; Function Attrs: nounwind readnone speculatable willreturn
60declare i128 @llvm.ctpop.i128(i128)
61
62; Function Attrs: nobuiltin nounwind readonly
63define i16 @popcount256(ptr nocapture nonnull readonly %0) {
64; CHECKO0-LABEL: popcount256:
65; CHECKO0:       // %bb.0: // %Entry
66; CHECKO0-NEXT:    ldr x11, [x0]
67; CHECKO0-NEXT:    ldr x10, [x0, #8]
68; CHECKO0-NEXT:    ldr x9, [x0, #16]
69; CHECKO0-NEXT:    ldr x8, [x0, #24]
70; CHECKO0-NEXT:    // implicit-def: $q1
71; CHECKO0-NEXT:    mov v1.d[0], x11
72; CHECKO0-NEXT:    mov v1.d[1], x10
73; CHECKO0-NEXT:    // implicit-def: $q0
74; CHECKO0-NEXT:    mov v0.d[0], x9
75; CHECKO0-NEXT:    mov v0.d[1], x8
76; CHECKO0-NEXT:    cnt v1.16b, v1.16b
77; CHECKO0-NEXT:    uaddlv h1, v1.16b
78; CHECKO0-NEXT:    // kill: def $q1 killed $h1
79; CHECKO0-NEXT:    // kill: def $s1 killed $s1 killed $q1
80; CHECKO0-NEXT:    fmov w0, s1
81; CHECKO0-NEXT:    mov w10, wzr
82; CHECKO0-NEXT:    mov w9, w0
83; CHECKO0-NEXT:    mov w8, w10
84; CHECKO0-NEXT:    bfi x9, x8, #32, #32
85; CHECKO0-NEXT:    cnt v0.16b, v0.16b
86; CHECKO0-NEXT:    uaddlv h0, v0.16b
87; CHECKO0-NEXT:    // kill: def $q0 killed $h0
88; CHECKO0-NEXT:    // kill: def $s0 killed $s0 killed $q0
89; CHECKO0-NEXT:    fmov w0, s0
90; CHECKO0-NEXT:    mov w8, w0
91; CHECKO0-NEXT:    // kill: def $x10 killed $w10
92; CHECKO0-NEXT:    bfi x8, x10, #32, #32
93; CHECKO0-NEXT:    adds x8, x8, x9
94; CHECKO0-NEXT:    mov w0, w8
95; CHECKO0-NEXT:    ret
96;
97; CHECK-LABEL: popcount256:
98; CHECK:       // %bb.0: // %Entry
99; CHECK-NEXT:    ldr d0, [x0, #16]
100; CHECK-NEXT:    ldr d1, [x0]
101; CHECK-NEXT:    add x8, x0, #8
102; CHECK-NEXT:    add x9, x0, #24
103; CHECK-NEXT:    ld1 { v0.d }[1], [x9]
104; CHECK-NEXT:    ld1 { v1.d }[1], [x8]
105; CHECK-NEXT:    cnt v0.16b, v0.16b
106; CHECK-NEXT:    cnt v1.16b, v1.16b
107; CHECK-NEXT:    addv b0, v0.16b
108; CHECK-NEXT:    addv b1, v1.16b
109; CHECK-NEXT:    fmov w8, s0
110; CHECK-NEXT:    fmov w9, s1
111; CHECK-NEXT:    add w0, w9, w8
112; CHECK-NEXT:    ret
113;
114; GISEL-LABEL: popcount256:
115; GISEL:       // %bb.0: // %Entry
116; GISEL-NEXT:    ldp x8, x9, [x0]
117; GISEL-NEXT:    mov v0.d[0], x8
118; GISEL-NEXT:    ldp x8, x10, [x0, #16]
119; GISEL-NEXT:    mov v1.d[0], x8
120; GISEL-NEXT:    mov v0.d[1], x9
121; GISEL-NEXT:    mov v1.d[1], x10
122; GISEL-NEXT:    cnt v0.16b, v0.16b
123; GISEL-NEXT:    cnt v1.16b, v1.16b
124; GISEL-NEXT:    uaddlv h0, v0.16b
125; GISEL-NEXT:    uaddlv h1, v1.16b
126; GISEL-NEXT:    mov w8, v0.s[0]
127; GISEL-NEXT:    fmov w9, s1
128; GISEL-NEXT:    add x0, x8, w9, uxtw
129; GISEL-NEXT:    // kill: def $w0 killed $w0 killed $x0
130; GISEL-NEXT:    ret
131;
132; GISELO0-LABEL: popcount256:
133; GISELO0:       // %bb.0: // %Entry
134; GISELO0-NEXT:    ldr x11, [x0]
135; GISELO0-NEXT:    ldr x10, [x0, #8]
136; GISELO0-NEXT:    ldr x9, [x0, #16]
137; GISELO0-NEXT:    ldr x8, [x0, #24]
138; GISELO0-NEXT:    // implicit-def: $q1
139; GISELO0-NEXT:    mov v1.d[0], x11
140; GISELO0-NEXT:    mov v1.d[1], x10
141; GISELO0-NEXT:    // implicit-def: $q0
142; GISELO0-NEXT:    mov v0.d[0], x9
143; GISELO0-NEXT:    mov v0.d[1], x8
144; GISELO0-NEXT:    cnt v1.16b, v1.16b
145; GISELO0-NEXT:    uaddlv h1, v1.16b
146; GISELO0-NEXT:    // kill: def $q1 killed $h1
147; GISELO0-NEXT:    // kill: def $s1 killed $s1 killed $q1
148; GISELO0-NEXT:    fmov w0, s1
149; GISELO0-NEXT:    mov w10, wzr
150; GISELO0-NEXT:    mov w9, w0
151; GISELO0-NEXT:    mov w8, w10
152; GISELO0-NEXT:    bfi x9, x8, #32, #32
153; GISELO0-NEXT:    cnt v0.16b, v0.16b
154; GISELO0-NEXT:    uaddlv h0, v0.16b
155; GISELO0-NEXT:    // kill: def $q0 killed $h0
156; GISELO0-NEXT:    // kill: def $s0 killed $s0 killed $q0
157; GISELO0-NEXT:    fmov w0, s0
158; GISELO0-NEXT:    mov w8, w0
159; GISELO0-NEXT:    // kill: def $x10 killed $w10
160; GISELO0-NEXT:    bfi x8, x10, #32, #32
161; GISELO0-NEXT:    adds x8, x8, x9
162; GISELO0-NEXT:    mov w0, w8
163; GISELO0-NEXT:    ret
164Entry:
165  %1 = load i256, ptr %0, align 16
166  %2 = tail call i256 @llvm.ctpop.i256(i256 %1)
167  %3 = trunc i256 %2 to i16
168  ret i16 %3
169}
170
171; Function Attrs: nounwind readnone speculatable willreturn
172declare i256 @llvm.ctpop.i256(i256)
173
174define <1 x i128> @popcount1x128(<1 x i128> %0) {
175; CHECKO0-LABEL: popcount1x128:
176; CHECKO0:       // %bb.0: // %Entry
177; CHECKO0-NEXT:    // implicit-def: $q0
178; CHECKO0-NEXT:    mov v0.d[0], x0
179; CHECKO0-NEXT:    mov v0.d[1], x1
180; CHECKO0-NEXT:    cnt v0.16b, v0.16b
181; CHECKO0-NEXT:    uaddlv h0, v0.16b
182; CHECKO0-NEXT:    // kill: def $q0 killed $h0
183; CHECKO0-NEXT:    mov x1, xzr
184; CHECKO0-NEXT:    // kill: def $s0 killed $s0 killed $q0
185; CHECKO0-NEXT:    fmov w0, s0
186; CHECKO0-NEXT:    mov w8, wzr
187; CHECKO0-NEXT:    // kill: def $x0 killed $w0
188; CHECKO0-NEXT:    // kill: def $x8 killed $w8
189; CHECKO0-NEXT:    bfi x0, x8, #32, #32
190; CHECKO0-NEXT:    ret
191;
192; CHECK-LABEL: popcount1x128:
193; CHECK:       // %bb.0: // %Entry
194; CHECK-NEXT:    fmov d0, x0
195; CHECK-NEXT:    mov v0.d[1], x1
196; CHECK-NEXT:    cnt v0.16b, v0.16b
197; CHECK-NEXT:    addv b0, v0.16b
198; CHECK-NEXT:    mov x1, v0.d[1]
199; CHECK-NEXT:    fmov x0, d0
200; CHECK-NEXT:    ret
201;
202; GISEL-LABEL: popcount1x128:
203; GISEL:       // %bb.0: // %Entry
204; GISEL-NEXT:    mov v0.d[0], x0
205; GISEL-NEXT:    mov v0.d[1], x1
206; GISEL-NEXT:    mov x1, xzr
207; GISEL-NEXT:    cnt v0.16b, v0.16b
208; GISEL-NEXT:    uaddlv h0, v0.16b
209; GISEL-NEXT:    mov w0, v0.s[0]
210; GISEL-NEXT:    ret
211;
212; GISELO0-LABEL: popcount1x128:
213; GISELO0:       // %bb.0: // %Entry
214; GISELO0-NEXT:    // implicit-def: $q0
215; GISELO0-NEXT:    mov v0.d[0], x0
216; GISELO0-NEXT:    mov v0.d[1], x1
217; GISELO0-NEXT:    cnt v0.16b, v0.16b
218; GISELO0-NEXT:    uaddlv h0, v0.16b
219; GISELO0-NEXT:    // kill: def $q0 killed $h0
220; GISELO0-NEXT:    mov x1, xzr
221; GISELO0-NEXT:    // kill: def $s0 killed $s0 killed $q0
222; GISELO0-NEXT:    fmov w0, s0
223; GISELO0-NEXT:    mov w8, wzr
224; GISELO0-NEXT:    // kill: def $x0 killed $w0
225; GISELO0-NEXT:    // kill: def $x8 killed $w8
226; GISELO0-NEXT:    bfi x0, x8, #32, #32
227; GISELO0-NEXT:    ret
228Entry:
229  %1 = tail call <1 x i128> @llvm.ctpop.v1i128(<1 x i128> %0)
230  ret <1 x i128> %1
231}
232
233declare <1 x i128> @llvm.ctpop.v1i128(<1 x i128>)
234
235define <2 x i64> @popcount2x64(<2 x i64> %0) {
236; CHECKO0-LABEL: popcount2x64:
237; CHECKO0:       // %bb.0: // %Entry
238; CHECKO0-NEXT:    cnt v0.16b, v0.16b
239; CHECKO0-NEXT:    uaddlp v0.8h, v0.16b
240; CHECKO0-NEXT:    uaddlp v0.4s, v0.8h
241; CHECKO0-NEXT:    uaddlp v0.2d, v0.4s
242; CHECKO0-NEXT:    ret
243;
244; NEON-LABEL: popcount2x64:
245; NEON:       // %bb.0: // %Entry
246; NEON-NEXT:    cnt v0.16b, v0.16b
247; NEON-NEXT:    uaddlp v0.8h, v0.16b
248; NEON-NEXT:    uaddlp v0.4s, v0.8h
249; NEON-NEXT:    uaddlp v0.2d, v0.4s
250; NEON-NEXT:    ret
251;
252; DOT-LABEL: popcount2x64:
253; DOT:       // %bb.0: // %Entry
254; DOT-NEXT:    movi v1.16b, #1
255; DOT-NEXT:    cnt v0.16b, v0.16b
256; DOT-NEXT:    movi v2.2d, #0000000000000000
257; DOT-NEXT:    udot v2.4s, v1.16b, v0.16b
258; DOT-NEXT:    uaddlp v0.2d, v2.4s
259; DOT-NEXT:    ret
260;
261; SVE-LABEL: popcount2x64:
262; SVE:       // %bb.0: // %Entry
263; SVE-NEXT:    cnt v0.16b, v0.16b
264; SVE-NEXT:    uaddlp v0.8h, v0.16b
265; SVE-NEXT:    uaddlp v0.4s, v0.8h
266; SVE-NEXT:    uaddlp v0.2d, v0.4s
267; SVE-NEXT:    ret
268;
269; GISELO0-LABEL: popcount2x64:
270; GISELO0:       // %bb.0: // %Entry
271; GISELO0-NEXT:    cnt v0.16b, v0.16b
272; GISELO0-NEXT:    uaddlp v0.8h, v0.16b
273; GISELO0-NEXT:    uaddlp v0.4s, v0.8h
274; GISELO0-NEXT:    uaddlp v0.2d, v0.4s
275; GISELO0-NEXT:    ret
276;
277; NEON-GISEL-LABEL: popcount2x64:
278; NEON-GISEL:       // %bb.0: // %Entry
279; NEON-GISEL-NEXT:    cnt v0.16b, v0.16b
280; NEON-GISEL-NEXT:    uaddlp v0.8h, v0.16b
281; NEON-GISEL-NEXT:    uaddlp v0.4s, v0.8h
282; NEON-GISEL-NEXT:    uaddlp v0.2d, v0.4s
283; NEON-GISEL-NEXT:    ret
284;
285; DOT-GISEL-LABEL: popcount2x64:
286; DOT-GISEL:       // %bb.0: // %Entry
287; DOT-GISEL-NEXT:    movi v1.2d, #0000000000000000
288; DOT-GISEL-NEXT:    cnt v0.16b, v0.16b
289; DOT-GISEL-NEXT:    movi v2.16b, #1
290; DOT-GISEL-NEXT:    udot v1.4s, v2.16b, v0.16b
291; DOT-GISEL-NEXT:    uaddlp v0.2d, v1.4s
292; DOT-GISEL-NEXT:    ret
293;
294; SVE-GISEL-LABEL: popcount2x64:
295; SVE-GISEL:       // %bb.0: // %Entry
296; SVE-GISEL-NEXT:    cnt v0.16b, v0.16b
297; SVE-GISEL-NEXT:    uaddlp v0.8h, v0.16b
298; SVE-GISEL-NEXT:    uaddlp v0.4s, v0.8h
299; SVE-GISEL-NEXT:    uaddlp v0.2d, v0.4s
300; SVE-GISEL-NEXT:    ret
301Entry:
302  %1 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
303  ret <2 x i64> %1
304}
305
306declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
307
308define <1 x i64> @popcount1x64(<1 x i64> %0) {
309; CHECKO0-LABEL: popcount1x64:
310; CHECKO0:       // %bb.0: // %Entry
311; CHECKO0-NEXT:    fmov x0, d0
312; CHECKO0-NEXT:    fmov d0, x0
313; CHECKO0-NEXT:    cnt v0.8b, v0.8b
314; CHECKO0-NEXT:    uaddlv h0, v0.8b
315; CHECKO0-NEXT:    // kill: def $q0 killed $h0
316; CHECKO0-NEXT:    mov w8, v0.s[0]
317; CHECKO0-NEXT:    // kill: def $x8 killed $w8
318; CHECKO0-NEXT:    fmov d0, x8
319; CHECKO0-NEXT:    ret
320;
321; CHECK-LABEL: popcount1x64:
322; CHECK:       // %bb.0: // %Entry
323; CHECK-NEXT:    cnt v0.8b, v0.8b
324; CHECK-NEXT:    uaddlp v0.4h, v0.8b
325; CHECK-NEXT:    uaddlp v0.2s, v0.4h
326; CHECK-NEXT:    uaddlp v0.1d, v0.2s
327; CHECK-NEXT:    ret
328;
329; GISEL-LABEL: popcount1x64:
330; GISEL:       // %bb.0: // %Entry
331; GISEL-NEXT:    cnt v0.8b, v0.8b
332; GISEL-NEXT:    uaddlv h0, v0.8b
333; GISEL-NEXT:    mov w8, v0.s[0]
334; GISEL-NEXT:    fmov d0, x8
335; GISEL-NEXT:    ret
336;
337; GISELO0-LABEL: popcount1x64:
338; GISELO0:       // %bb.0: // %Entry
339; GISELO0-NEXT:    fmov x0, d0
340; GISELO0-NEXT:    fmov d0, x0
341; GISELO0-NEXT:    cnt v0.8b, v0.8b
342; GISELO0-NEXT:    uaddlv h0, v0.8b
343; GISELO0-NEXT:    // kill: def $q0 killed $h0
344; GISELO0-NEXT:    mov w8, v0.s[0]
345; GISELO0-NEXT:    // kill: def $x8 killed $w8
346; GISELO0-NEXT:    fmov d0, x8
347; GISELO0-NEXT:    ret
348Entry:
349  %1 = tail call <1 x i64> @llvm.ctpop.v1i64(<1 x i64> %0)
350  ret <1 x i64> %1
351}
352
353declare <1 x i64> @llvm.ctpop.v1i64(<1 x i64>)
354
355define <4 x i32> @popcount4x32(<4 x i32> %0) {
356; CHECKO0-LABEL: popcount4x32:
357; CHECKO0:       // %bb.0: // %Entry
358; CHECKO0-NEXT:    cnt v0.16b, v0.16b
359; CHECKO0-NEXT:    uaddlp v0.8h, v0.16b
360; CHECKO0-NEXT:    uaddlp v0.4s, v0.8h
361; CHECKO0-NEXT:    ret
362;
363; NEON-LABEL: popcount4x32:
364; NEON:       // %bb.0: // %Entry
365; NEON-NEXT:    cnt v0.16b, v0.16b
366; NEON-NEXT:    uaddlp v0.8h, v0.16b
367; NEON-NEXT:    uaddlp v0.4s, v0.8h
368; NEON-NEXT:    ret
369;
370; DOT-LABEL: popcount4x32:
371; DOT:       // %bb.0: // %Entry
372; DOT-NEXT:    movi v1.16b, #1
373; DOT-NEXT:    cnt v2.16b, v0.16b
374; DOT-NEXT:    movi v0.2d, #0000000000000000
375; DOT-NEXT:    udot v0.4s, v1.16b, v2.16b
376; DOT-NEXT:    ret
377;
378; SVE-LABEL: popcount4x32:
379; SVE:       // %bb.0: // %Entry
380; SVE-NEXT:    cnt v0.16b, v0.16b
381; SVE-NEXT:    uaddlp v0.8h, v0.16b
382; SVE-NEXT:    uaddlp v0.4s, v0.8h
383; SVE-NEXT:    ret
384;
385; GISELO0-LABEL: popcount4x32:
386; GISELO0:       // %bb.0: // %Entry
387; GISELO0-NEXT:    cnt v0.16b, v0.16b
388; GISELO0-NEXT:    uaddlp v0.8h, v0.16b
389; GISELO0-NEXT:    uaddlp v0.4s, v0.8h
390; GISELO0-NEXT:    ret
391;
392; NEON-GISEL-LABEL: popcount4x32:
393; NEON-GISEL:       // %bb.0: // %Entry
394; NEON-GISEL-NEXT:    cnt v0.16b, v0.16b
395; NEON-GISEL-NEXT:    uaddlp v0.8h, v0.16b
396; NEON-GISEL-NEXT:    uaddlp v0.4s, v0.8h
397; NEON-GISEL-NEXT:    ret
398;
399; DOT-GISEL-LABEL: popcount4x32:
400; DOT-GISEL:       // %bb.0: // %Entry
401; DOT-GISEL-NEXT:    movi v1.2d, #0000000000000000
402; DOT-GISEL-NEXT:    cnt v0.16b, v0.16b
403; DOT-GISEL-NEXT:    movi v2.16b, #1
404; DOT-GISEL-NEXT:    udot v1.4s, v2.16b, v0.16b
405; DOT-GISEL-NEXT:    mov v0.16b, v1.16b
406; DOT-GISEL-NEXT:    ret
407;
408; SVE-GISEL-LABEL: popcount4x32:
409; SVE-GISEL:       // %bb.0: // %Entry
410; SVE-GISEL-NEXT:    cnt v0.16b, v0.16b
411; SVE-GISEL-NEXT:    uaddlp v0.8h, v0.16b
412; SVE-GISEL-NEXT:    uaddlp v0.4s, v0.8h
413; SVE-GISEL-NEXT:    ret
414Entry:
415  %1 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
416  ret <4 x i32> %1
417}
418
419declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
420
421define <2 x i32> @popcount2x32(<2 x i32> %0) {
422; CHECKO0-LABEL: popcount2x32:
423; CHECKO0:       // %bb.0: // %Entry
424; CHECKO0-NEXT:    cnt v0.8b, v0.8b
425; CHECKO0-NEXT:    uaddlp v0.4h, v0.8b
426; CHECKO0-NEXT:    uaddlp v0.2s, v0.4h
427; CHECKO0-NEXT:    ret
428;
429; NEON-LABEL: popcount2x32:
430; NEON:       // %bb.0: // %Entry
431; NEON-NEXT:    cnt v0.8b, v0.8b
432; NEON-NEXT:    uaddlp v0.4h, v0.8b
433; NEON-NEXT:    uaddlp v0.2s, v0.4h
434; NEON-NEXT:    ret
435;
436; DOT-LABEL: popcount2x32:
437; DOT:       // %bb.0: // %Entry
438; DOT-NEXT:    movi v1.2d, #0000000000000000
439; DOT-NEXT:    cnt v0.8b, v0.8b
440; DOT-NEXT:    movi v2.8b, #1
441; DOT-NEXT:    udot v1.2s, v2.8b, v0.8b
442; DOT-NEXT:    fmov d0, d1
443; DOT-NEXT:    ret
444;
445; SVE-LABEL: popcount2x32:
446; SVE:       // %bb.0: // %Entry
447; SVE-NEXT:    cnt v0.8b, v0.8b
448; SVE-NEXT:    uaddlp v0.4h, v0.8b
449; SVE-NEXT:    uaddlp v0.2s, v0.4h
450; SVE-NEXT:    ret
451;
452; GISELO0-LABEL: popcount2x32:
453; GISELO0:       // %bb.0: // %Entry
454; GISELO0-NEXT:    cnt v0.8b, v0.8b
455; GISELO0-NEXT:    uaddlp v0.4h, v0.8b
456; GISELO0-NEXT:    uaddlp v0.2s, v0.4h
457; GISELO0-NEXT:    ret
458;
459; NEON-GISEL-LABEL: popcount2x32:
460; NEON-GISEL:       // %bb.0: // %Entry
461; NEON-GISEL-NEXT:    cnt v0.8b, v0.8b
462; NEON-GISEL-NEXT:    uaddlp v0.4h, v0.8b
463; NEON-GISEL-NEXT:    uaddlp v0.2s, v0.4h
464; NEON-GISEL-NEXT:    ret
465;
466; DOT-GISEL-LABEL: popcount2x32:
467; DOT-GISEL:       // %bb.0: // %Entry
468; DOT-GISEL-NEXT:    movi v1.2d, #0000000000000000
469; DOT-GISEL-NEXT:    cnt v0.8b, v0.8b
470; DOT-GISEL-NEXT:    movi v2.8b, #1
471; DOT-GISEL-NEXT:    udot v1.2s, v2.8b, v0.8b
472; DOT-GISEL-NEXT:    fmov d0, d1
473; DOT-GISEL-NEXT:    ret
474;
475; SVE-GISEL-LABEL: popcount2x32:
476; SVE-GISEL:       // %bb.0: // %Entry
477; SVE-GISEL-NEXT:    cnt v0.8b, v0.8b
478; SVE-GISEL-NEXT:    uaddlp v0.4h, v0.8b
479; SVE-GISEL-NEXT:    uaddlp v0.2s, v0.4h
480; SVE-GISEL-NEXT:    ret
481Entry:
482  %1 = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %0)
483  ret <2 x i32> %1
484}
485
486declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
487
488define <8 x i16> @popcount8x16(<8 x i16> %0) {
489; CHECKO0-LABEL: popcount8x16:
490; CHECKO0:       // %bb.0: // %Entry
491; CHECKO0-NEXT:    cnt v0.16b, v0.16b
492; CHECKO0-NEXT:    uaddlp v0.8h, v0.16b
493; CHECKO0-NEXT:    ret
494;
495; CHECK-LABEL: popcount8x16:
496; CHECK:       // %bb.0: // %Entry
497; CHECK-NEXT:    cnt v0.16b, v0.16b
498; CHECK-NEXT:    uaddlp v0.8h, v0.16b
499; CHECK-NEXT:    ret
500;
501; GISEL-LABEL: popcount8x16:
502; GISEL:       // %bb.0: // %Entry
503; GISEL-NEXT:    cnt v0.16b, v0.16b
504; GISEL-NEXT:    uaddlp v0.8h, v0.16b
505; GISEL-NEXT:    ret
506;
507; GISELO0-LABEL: popcount8x16:
508; GISELO0:       // %bb.0: // %Entry
509; GISELO0-NEXT:    cnt v0.16b, v0.16b
510; GISELO0-NEXT:    uaddlp v0.8h, v0.16b
511; GISELO0-NEXT:    ret
512Entry:
513  %1 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
514  ret <8 x i16> %1
515}
516
517declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
518
519define <4 x i16> @popcount4x16(<4 x i16> %0) {
520; CHECKO0-LABEL: popcount4x16:
521; CHECKO0:       // %bb.0: // %Entry
522; CHECKO0-NEXT:    cnt v0.8b, v0.8b
523; CHECKO0-NEXT:    uaddlp v0.4h, v0.8b
524; CHECKO0-NEXT:    ret
525;
526; CHECK-LABEL: popcount4x16:
527; CHECK:       // %bb.0: // %Entry
528; CHECK-NEXT:    cnt v0.8b, v0.8b
529; CHECK-NEXT:    uaddlp v0.4h, v0.8b
530; CHECK-NEXT:    ret
531;
532; GISEL-LABEL: popcount4x16:
533; GISEL:       // %bb.0: // %Entry
534; GISEL-NEXT:    cnt v0.8b, v0.8b
535; GISEL-NEXT:    uaddlp v0.4h, v0.8b
536; GISEL-NEXT:    ret
537;
538; GISELO0-LABEL: popcount4x16:
539; GISELO0:       // %bb.0: // %Entry
540; GISELO0-NEXT:    cnt v0.8b, v0.8b
541; GISELO0-NEXT:    uaddlp v0.4h, v0.8b
542; GISELO0-NEXT:    ret
543Entry:
544  %1 = tail call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> %0)
545  ret <4 x i16> %1
546}
547
548declare <4 x i16> @llvm.ctpop.v4i16(<4 x i16>)
549