xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
5; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
6
7define <vscale x 1 x i8> @ctpop_nxv1i8(<vscale x 1 x i8> %va) {
8; CHECK-LABEL: ctpop_nxv1i8:
9; CHECK:       # %bb.0:
10; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
11; CHECK-NEXT:    vsrl.vi v9, v8, 1
12; CHECK-NEXT:    li a0, 85
13; CHECK-NEXT:    vand.vx v9, v9, a0
14; CHECK-NEXT:    li a0, 51
15; CHECK-NEXT:    vsub.vv v8, v8, v9
16; CHECK-NEXT:    vand.vx v9, v8, a0
17; CHECK-NEXT:    vsrl.vi v8, v8, 2
18; CHECK-NEXT:    vand.vx v8, v8, a0
19; CHECK-NEXT:    vadd.vv v8, v9, v8
20; CHECK-NEXT:    vsrl.vi v9, v8, 4
21; CHECK-NEXT:    vadd.vv v8, v8, v9
22; CHECK-NEXT:    vand.vi v8, v8, 15
23; CHECK-NEXT:    ret
24;
25; CHECK-ZVBB-LABEL: ctpop_nxv1i8:
26; CHECK-ZVBB:       # %bb.0:
27; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
28; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
29; CHECK-ZVBB-NEXT:    ret
30  %a = call <vscale x 1 x i8> @llvm.ctpop.nxv1i8(<vscale x 1 x i8> %va)
31  ret <vscale x 1 x i8> %a
32}
33declare <vscale x 1 x i8> @llvm.ctpop.nxv1i8(<vscale x 1 x i8>)
34
35define <vscale x 2 x i8> @ctpop_nxv2i8(<vscale x 2 x i8> %va) {
36; CHECK-LABEL: ctpop_nxv2i8:
37; CHECK:       # %bb.0:
38; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
39; CHECK-NEXT:    vsrl.vi v9, v8, 1
40; CHECK-NEXT:    li a0, 85
41; CHECK-NEXT:    vand.vx v9, v9, a0
42; CHECK-NEXT:    li a0, 51
43; CHECK-NEXT:    vsub.vv v8, v8, v9
44; CHECK-NEXT:    vand.vx v9, v8, a0
45; CHECK-NEXT:    vsrl.vi v8, v8, 2
46; CHECK-NEXT:    vand.vx v8, v8, a0
47; CHECK-NEXT:    vadd.vv v8, v9, v8
48; CHECK-NEXT:    vsrl.vi v9, v8, 4
49; CHECK-NEXT:    vadd.vv v8, v8, v9
50; CHECK-NEXT:    vand.vi v8, v8, 15
51; CHECK-NEXT:    ret
52;
53; CHECK-ZVBB-LABEL: ctpop_nxv2i8:
54; CHECK-ZVBB:       # %bb.0:
55; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
56; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
57; CHECK-ZVBB-NEXT:    ret
58  %a = call <vscale x 2 x i8> @llvm.ctpop.nxv2i8(<vscale x 2 x i8> %va)
59  ret <vscale x 2 x i8> %a
60}
61declare <vscale x 2 x i8> @llvm.ctpop.nxv2i8(<vscale x 2 x i8>)
62
63define <vscale x 4 x i8> @ctpop_nxv4i8(<vscale x 4 x i8> %va) {
64; CHECK-LABEL: ctpop_nxv4i8:
65; CHECK:       # %bb.0:
66; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
67; CHECK-NEXT:    vsrl.vi v9, v8, 1
68; CHECK-NEXT:    li a0, 85
69; CHECK-NEXT:    vand.vx v9, v9, a0
70; CHECK-NEXT:    li a0, 51
71; CHECK-NEXT:    vsub.vv v8, v8, v9
72; CHECK-NEXT:    vand.vx v9, v8, a0
73; CHECK-NEXT:    vsrl.vi v8, v8, 2
74; CHECK-NEXT:    vand.vx v8, v8, a0
75; CHECK-NEXT:    vadd.vv v8, v9, v8
76; CHECK-NEXT:    vsrl.vi v9, v8, 4
77; CHECK-NEXT:    vadd.vv v8, v8, v9
78; CHECK-NEXT:    vand.vi v8, v8, 15
79; CHECK-NEXT:    ret
80;
81; CHECK-ZVBB-LABEL: ctpop_nxv4i8:
82; CHECK-ZVBB:       # %bb.0:
83; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
84; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
85; CHECK-ZVBB-NEXT:    ret
86  %a = call <vscale x 4 x i8> @llvm.ctpop.nxv4i8(<vscale x 4 x i8> %va)
87  ret <vscale x 4 x i8> %a
88}
89declare <vscale x 4 x i8> @llvm.ctpop.nxv4i8(<vscale x 4 x i8>)
90
91define <vscale x 8 x i8> @ctpop_nxv8i8(<vscale x 8 x i8> %va) {
92; CHECK-LABEL: ctpop_nxv8i8:
93; CHECK:       # %bb.0:
94; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
95; CHECK-NEXT:    vsrl.vi v9, v8, 1
96; CHECK-NEXT:    li a0, 85
97; CHECK-NEXT:    vand.vx v9, v9, a0
98; CHECK-NEXT:    li a0, 51
99; CHECK-NEXT:    vsub.vv v8, v8, v9
100; CHECK-NEXT:    vand.vx v9, v8, a0
101; CHECK-NEXT:    vsrl.vi v8, v8, 2
102; CHECK-NEXT:    vand.vx v8, v8, a0
103; CHECK-NEXT:    vadd.vv v8, v9, v8
104; CHECK-NEXT:    vsrl.vi v9, v8, 4
105; CHECK-NEXT:    vadd.vv v8, v8, v9
106; CHECK-NEXT:    vand.vi v8, v8, 15
107; CHECK-NEXT:    ret
108;
109; CHECK-ZVBB-LABEL: ctpop_nxv8i8:
110; CHECK-ZVBB:       # %bb.0:
111; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
112; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
113; CHECK-ZVBB-NEXT:    ret
114  %a = call <vscale x 8 x i8> @llvm.ctpop.nxv8i8(<vscale x 8 x i8> %va)
115  ret <vscale x 8 x i8> %a
116}
117declare <vscale x 8 x i8> @llvm.ctpop.nxv8i8(<vscale x 8 x i8>)
118
119define <vscale x 16 x i8> @ctpop_nxv16i8(<vscale x 16 x i8> %va) {
120; CHECK-LABEL: ctpop_nxv16i8:
121; CHECK:       # %bb.0:
122; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
123; CHECK-NEXT:    vsrl.vi v10, v8, 1
124; CHECK-NEXT:    li a0, 85
125; CHECK-NEXT:    vand.vx v10, v10, a0
126; CHECK-NEXT:    li a0, 51
127; CHECK-NEXT:    vsub.vv v8, v8, v10
128; CHECK-NEXT:    vand.vx v10, v8, a0
129; CHECK-NEXT:    vsrl.vi v8, v8, 2
130; CHECK-NEXT:    vand.vx v8, v8, a0
131; CHECK-NEXT:    vadd.vv v8, v10, v8
132; CHECK-NEXT:    vsrl.vi v10, v8, 4
133; CHECK-NEXT:    vadd.vv v8, v8, v10
134; CHECK-NEXT:    vand.vi v8, v8, 15
135; CHECK-NEXT:    ret
136;
137; CHECK-ZVBB-LABEL: ctpop_nxv16i8:
138; CHECK-ZVBB:       # %bb.0:
139; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
140; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
141; CHECK-ZVBB-NEXT:    ret
142  %a = call <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8> %va)
143  ret <vscale x 16 x i8> %a
144}
145declare <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8>)
146
147define <vscale x 32 x i8> @ctpop_nxv32i8(<vscale x 32 x i8> %va) {
148; CHECK-LABEL: ctpop_nxv32i8:
149; CHECK:       # %bb.0:
150; CHECK-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
151; CHECK-NEXT:    vsrl.vi v12, v8, 1
152; CHECK-NEXT:    li a0, 85
153; CHECK-NEXT:    vand.vx v12, v12, a0
154; CHECK-NEXT:    li a0, 51
155; CHECK-NEXT:    vsub.vv v8, v8, v12
156; CHECK-NEXT:    vand.vx v12, v8, a0
157; CHECK-NEXT:    vsrl.vi v8, v8, 2
158; CHECK-NEXT:    vand.vx v8, v8, a0
159; CHECK-NEXT:    vadd.vv v8, v12, v8
160; CHECK-NEXT:    vsrl.vi v12, v8, 4
161; CHECK-NEXT:    vadd.vv v8, v8, v12
162; CHECK-NEXT:    vand.vi v8, v8, 15
163; CHECK-NEXT:    ret
164;
165; CHECK-ZVBB-LABEL: ctpop_nxv32i8:
166; CHECK-ZVBB:       # %bb.0:
167; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
168; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
169; CHECK-ZVBB-NEXT:    ret
170  %a = call <vscale x 32 x i8> @llvm.ctpop.nxv32i8(<vscale x 32 x i8> %va)
171  ret <vscale x 32 x i8> %a
172}
173declare <vscale x 32 x i8> @llvm.ctpop.nxv32i8(<vscale x 32 x i8>)
174
175define <vscale x 64 x i8> @ctpop_nxv64i8(<vscale x 64 x i8> %va) {
176; CHECK-LABEL: ctpop_nxv64i8:
177; CHECK:       # %bb.0:
178; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
179; CHECK-NEXT:    vsrl.vi v16, v8, 1
180; CHECK-NEXT:    li a0, 85
181; CHECK-NEXT:    vand.vx v16, v16, a0
182; CHECK-NEXT:    li a0, 51
183; CHECK-NEXT:    vsub.vv v8, v8, v16
184; CHECK-NEXT:    vand.vx v16, v8, a0
185; CHECK-NEXT:    vsrl.vi v8, v8, 2
186; CHECK-NEXT:    vand.vx v8, v8, a0
187; CHECK-NEXT:    vadd.vv v8, v16, v8
188; CHECK-NEXT:    vsrl.vi v16, v8, 4
189; CHECK-NEXT:    vadd.vv v8, v8, v16
190; CHECK-NEXT:    vand.vi v8, v8, 15
191; CHECK-NEXT:    ret
192;
193; CHECK-ZVBB-LABEL: ctpop_nxv64i8:
194; CHECK-ZVBB:       # %bb.0:
195; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
196; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
197; CHECK-ZVBB-NEXT:    ret
198  %a = call <vscale x 64 x i8> @llvm.ctpop.nxv64i8(<vscale x 64 x i8> %va)
199  ret <vscale x 64 x i8> %a
200}
201declare <vscale x 64 x i8> @llvm.ctpop.nxv64i8(<vscale x 64 x i8>)
202
203define <vscale x 1 x i16> @ctpop_nxv1i16(<vscale x 1 x i16> %va) {
204; CHECK-LABEL: ctpop_nxv1i16:
205; CHECK:       # %bb.0:
206; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
207; CHECK-NEXT:    vsrl.vi v9, v8, 1
208; CHECK-NEXT:    lui a0, 5
209; CHECK-NEXT:    addi a0, a0, 1365
210; CHECK-NEXT:    vand.vx v9, v9, a0
211; CHECK-NEXT:    lui a0, 3
212; CHECK-NEXT:    addi a0, a0, 819
213; CHECK-NEXT:    vsub.vv v8, v8, v9
214; CHECK-NEXT:    vand.vx v9, v8, a0
215; CHECK-NEXT:    vsrl.vi v8, v8, 2
216; CHECK-NEXT:    vand.vx v8, v8, a0
217; CHECK-NEXT:    lui a0, 1
218; CHECK-NEXT:    addi a0, a0, -241
219; CHECK-NEXT:    vadd.vv v8, v9, v8
220; CHECK-NEXT:    vsrl.vi v9, v8, 4
221; CHECK-NEXT:    vadd.vv v8, v8, v9
222; CHECK-NEXT:    vand.vx v8, v8, a0
223; CHECK-NEXT:    li a0, 257
224; CHECK-NEXT:    vmul.vx v8, v8, a0
225; CHECK-NEXT:    vsrl.vi v8, v8, 8
226; CHECK-NEXT:    ret
227;
228; CHECK-ZVBB-LABEL: ctpop_nxv1i16:
229; CHECK-ZVBB:       # %bb.0:
230; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
231; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
232; CHECK-ZVBB-NEXT:    ret
233  %a = call <vscale x 1 x i16> @llvm.ctpop.nxv1i16(<vscale x 1 x i16> %va)
234  ret <vscale x 1 x i16> %a
235}
236declare <vscale x 1 x i16> @llvm.ctpop.nxv1i16(<vscale x 1 x i16>)
237
238define <vscale x 2 x i16> @ctpop_nxv2i16(<vscale x 2 x i16> %va) {
239; CHECK-LABEL: ctpop_nxv2i16:
240; CHECK:       # %bb.0:
241; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
242; CHECK-NEXT:    vsrl.vi v9, v8, 1
243; CHECK-NEXT:    lui a0, 5
244; CHECK-NEXT:    addi a0, a0, 1365
245; CHECK-NEXT:    vand.vx v9, v9, a0
246; CHECK-NEXT:    lui a0, 3
247; CHECK-NEXT:    addi a0, a0, 819
248; CHECK-NEXT:    vsub.vv v8, v8, v9
249; CHECK-NEXT:    vand.vx v9, v8, a0
250; CHECK-NEXT:    vsrl.vi v8, v8, 2
251; CHECK-NEXT:    vand.vx v8, v8, a0
252; CHECK-NEXT:    lui a0, 1
253; CHECK-NEXT:    addi a0, a0, -241
254; CHECK-NEXT:    vadd.vv v8, v9, v8
255; CHECK-NEXT:    vsrl.vi v9, v8, 4
256; CHECK-NEXT:    vadd.vv v8, v8, v9
257; CHECK-NEXT:    vand.vx v8, v8, a0
258; CHECK-NEXT:    li a0, 257
259; CHECK-NEXT:    vmul.vx v8, v8, a0
260; CHECK-NEXT:    vsrl.vi v8, v8, 8
261; CHECK-NEXT:    ret
262;
263; CHECK-ZVBB-LABEL: ctpop_nxv2i16:
264; CHECK-ZVBB:       # %bb.0:
265; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
266; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
267; CHECK-ZVBB-NEXT:    ret
268  %a = call <vscale x 2 x i16> @llvm.ctpop.nxv2i16(<vscale x 2 x i16> %va)
269  ret <vscale x 2 x i16> %a
270}
271declare <vscale x 2 x i16> @llvm.ctpop.nxv2i16(<vscale x 2 x i16>)
272
273define <vscale x 4 x i16> @ctpop_nxv4i16(<vscale x 4 x i16> %va) {
274; CHECK-LABEL: ctpop_nxv4i16:
275; CHECK:       # %bb.0:
276; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
277; CHECK-NEXT:    vsrl.vi v9, v8, 1
278; CHECK-NEXT:    lui a0, 5
279; CHECK-NEXT:    addi a0, a0, 1365
280; CHECK-NEXT:    vand.vx v9, v9, a0
281; CHECK-NEXT:    lui a0, 3
282; CHECK-NEXT:    addi a0, a0, 819
283; CHECK-NEXT:    vsub.vv v8, v8, v9
284; CHECK-NEXT:    vand.vx v9, v8, a0
285; CHECK-NEXT:    vsrl.vi v8, v8, 2
286; CHECK-NEXT:    vand.vx v8, v8, a0
287; CHECK-NEXT:    lui a0, 1
288; CHECK-NEXT:    addi a0, a0, -241
289; CHECK-NEXT:    vadd.vv v8, v9, v8
290; CHECK-NEXT:    vsrl.vi v9, v8, 4
291; CHECK-NEXT:    vadd.vv v8, v8, v9
292; CHECK-NEXT:    vand.vx v8, v8, a0
293; CHECK-NEXT:    li a0, 257
294; CHECK-NEXT:    vmul.vx v8, v8, a0
295; CHECK-NEXT:    vsrl.vi v8, v8, 8
296; CHECK-NEXT:    ret
297;
298; CHECK-ZVBB-LABEL: ctpop_nxv4i16:
299; CHECK-ZVBB:       # %bb.0:
300; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
301; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
302; CHECK-ZVBB-NEXT:    ret
303  %a = call <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16> %va)
304  ret <vscale x 4 x i16> %a
305}
306declare <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16>)
307
308define <vscale x 8 x i16> @ctpop_nxv8i16(<vscale x 8 x i16> %va) {
309; CHECK-LABEL: ctpop_nxv8i16:
310; CHECK:       # %bb.0:
311; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
312; CHECK-NEXT:    vsrl.vi v10, v8, 1
313; CHECK-NEXT:    lui a0, 5
314; CHECK-NEXT:    addi a0, a0, 1365
315; CHECK-NEXT:    vand.vx v10, v10, a0
316; CHECK-NEXT:    lui a0, 3
317; CHECK-NEXT:    addi a0, a0, 819
318; CHECK-NEXT:    vsub.vv v8, v8, v10
319; CHECK-NEXT:    vand.vx v10, v8, a0
320; CHECK-NEXT:    vsrl.vi v8, v8, 2
321; CHECK-NEXT:    vand.vx v8, v8, a0
322; CHECK-NEXT:    lui a0, 1
323; CHECK-NEXT:    addi a0, a0, -241
324; CHECK-NEXT:    vadd.vv v8, v10, v8
325; CHECK-NEXT:    vsrl.vi v10, v8, 4
326; CHECK-NEXT:    vadd.vv v8, v8, v10
327; CHECK-NEXT:    vand.vx v8, v8, a0
328; CHECK-NEXT:    li a0, 257
329; CHECK-NEXT:    vmul.vx v8, v8, a0
330; CHECK-NEXT:    vsrl.vi v8, v8, 8
331; CHECK-NEXT:    ret
332;
333; CHECK-ZVBB-LABEL: ctpop_nxv8i16:
334; CHECK-ZVBB:       # %bb.0:
335; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
336; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
337; CHECK-ZVBB-NEXT:    ret
338  %a = call <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16> %va)
339  ret <vscale x 8 x i16> %a
340}
341declare <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16>)
342
343define <vscale x 16 x i16> @ctpop_nxv16i16(<vscale x 16 x i16> %va) {
344; CHECK-LABEL: ctpop_nxv16i16:
345; CHECK:       # %bb.0:
346; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
347; CHECK-NEXT:    vsrl.vi v12, v8, 1
348; CHECK-NEXT:    lui a0, 5
349; CHECK-NEXT:    addi a0, a0, 1365
350; CHECK-NEXT:    vand.vx v12, v12, a0
351; CHECK-NEXT:    lui a0, 3
352; CHECK-NEXT:    addi a0, a0, 819
353; CHECK-NEXT:    vsub.vv v8, v8, v12
354; CHECK-NEXT:    vand.vx v12, v8, a0
355; CHECK-NEXT:    vsrl.vi v8, v8, 2
356; CHECK-NEXT:    vand.vx v8, v8, a0
357; CHECK-NEXT:    lui a0, 1
358; CHECK-NEXT:    addi a0, a0, -241
359; CHECK-NEXT:    vadd.vv v8, v12, v8
360; CHECK-NEXT:    vsrl.vi v12, v8, 4
361; CHECK-NEXT:    vadd.vv v8, v8, v12
362; CHECK-NEXT:    vand.vx v8, v8, a0
363; CHECK-NEXT:    li a0, 257
364; CHECK-NEXT:    vmul.vx v8, v8, a0
365; CHECK-NEXT:    vsrl.vi v8, v8, 8
366; CHECK-NEXT:    ret
367;
368; CHECK-ZVBB-LABEL: ctpop_nxv16i16:
369; CHECK-ZVBB:       # %bb.0:
370; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
371; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
372; CHECK-ZVBB-NEXT:    ret
373  %a = call <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16> %va)
374  ret <vscale x 16 x i16> %a
375}
376declare <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16>)
377
378define <vscale x 32 x i16> @ctpop_nxv32i16(<vscale x 32 x i16> %va) {
379; CHECK-LABEL: ctpop_nxv32i16:
380; CHECK:       # %bb.0:
381; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
382; CHECK-NEXT:    vsrl.vi v16, v8, 1
383; CHECK-NEXT:    lui a0, 5
384; CHECK-NEXT:    addi a0, a0, 1365
385; CHECK-NEXT:    vand.vx v16, v16, a0
386; CHECK-NEXT:    lui a0, 3
387; CHECK-NEXT:    addi a0, a0, 819
388; CHECK-NEXT:    vsub.vv v8, v8, v16
389; CHECK-NEXT:    vand.vx v16, v8, a0
390; CHECK-NEXT:    vsrl.vi v8, v8, 2
391; CHECK-NEXT:    vand.vx v8, v8, a0
392; CHECK-NEXT:    lui a0, 1
393; CHECK-NEXT:    addi a0, a0, -241
394; CHECK-NEXT:    vadd.vv v8, v16, v8
395; CHECK-NEXT:    vsrl.vi v16, v8, 4
396; CHECK-NEXT:    vadd.vv v8, v8, v16
397; CHECK-NEXT:    vand.vx v8, v8, a0
398; CHECK-NEXT:    li a0, 257
399; CHECK-NEXT:    vmul.vx v8, v8, a0
400; CHECK-NEXT:    vsrl.vi v8, v8, 8
401; CHECK-NEXT:    ret
402;
403; CHECK-ZVBB-LABEL: ctpop_nxv32i16:
404; CHECK-ZVBB:       # %bb.0:
405; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
406; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
407; CHECK-ZVBB-NEXT:    ret
408  %a = call <vscale x 32 x i16> @llvm.ctpop.nxv32i16(<vscale x 32 x i16> %va)
409  ret <vscale x 32 x i16> %a
410}
411declare <vscale x 32 x i16> @llvm.ctpop.nxv32i16(<vscale x 32 x i16>)
412
413define <vscale x 1 x i32> @ctpop_nxv1i32(<vscale x 1 x i32> %va) {
414; CHECK-LABEL: ctpop_nxv1i32:
415; CHECK:       # %bb.0:
416; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
417; CHECK-NEXT:    vsrl.vi v9, v8, 1
418; CHECK-NEXT:    lui a0, 349525
419; CHECK-NEXT:    addi a0, a0, 1365
420; CHECK-NEXT:    vand.vx v9, v9, a0
421; CHECK-NEXT:    lui a0, 209715
422; CHECK-NEXT:    addi a0, a0, 819
423; CHECK-NEXT:    vsub.vv v8, v8, v9
424; CHECK-NEXT:    vand.vx v9, v8, a0
425; CHECK-NEXT:    vsrl.vi v8, v8, 2
426; CHECK-NEXT:    vand.vx v8, v8, a0
427; CHECK-NEXT:    lui a0, 61681
428; CHECK-NEXT:    addi a0, a0, -241
429; CHECK-NEXT:    vadd.vv v8, v9, v8
430; CHECK-NEXT:    vsrl.vi v9, v8, 4
431; CHECK-NEXT:    vadd.vv v8, v8, v9
432; CHECK-NEXT:    vand.vx v8, v8, a0
433; CHECK-NEXT:    lui a0, 4112
434; CHECK-NEXT:    addi a0, a0, 257
435; CHECK-NEXT:    vmul.vx v8, v8, a0
436; CHECK-NEXT:    vsrl.vi v8, v8, 24
437; CHECK-NEXT:    ret
438;
439; CHECK-ZVBB-LABEL: ctpop_nxv1i32:
440; CHECK-ZVBB:       # %bb.0:
441; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
442; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
443; CHECK-ZVBB-NEXT:    ret
444  %a = call <vscale x 1 x i32> @llvm.ctpop.nxv1i32(<vscale x 1 x i32> %va)
445  ret <vscale x 1 x i32> %a
446}
447declare <vscale x 1 x i32> @llvm.ctpop.nxv1i32(<vscale x 1 x i32>)
448
449define <vscale x 2 x i32> @ctpop_nxv2i32(<vscale x 2 x i32> %va) {
450; CHECK-LABEL: ctpop_nxv2i32:
451; CHECK:       # %bb.0:
452; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
453; CHECK-NEXT:    vsrl.vi v9, v8, 1
454; CHECK-NEXT:    lui a0, 349525
455; CHECK-NEXT:    addi a0, a0, 1365
456; CHECK-NEXT:    vand.vx v9, v9, a0
457; CHECK-NEXT:    lui a0, 209715
458; CHECK-NEXT:    addi a0, a0, 819
459; CHECK-NEXT:    vsub.vv v8, v8, v9
460; CHECK-NEXT:    vand.vx v9, v8, a0
461; CHECK-NEXT:    vsrl.vi v8, v8, 2
462; CHECK-NEXT:    vand.vx v8, v8, a0
463; CHECK-NEXT:    lui a0, 61681
464; CHECK-NEXT:    addi a0, a0, -241
465; CHECK-NEXT:    vadd.vv v8, v9, v8
466; CHECK-NEXT:    vsrl.vi v9, v8, 4
467; CHECK-NEXT:    vadd.vv v8, v8, v9
468; CHECK-NEXT:    vand.vx v8, v8, a0
469; CHECK-NEXT:    lui a0, 4112
470; CHECK-NEXT:    addi a0, a0, 257
471; CHECK-NEXT:    vmul.vx v8, v8, a0
472; CHECK-NEXT:    vsrl.vi v8, v8, 24
473; CHECK-NEXT:    ret
474;
475; CHECK-ZVBB-LABEL: ctpop_nxv2i32:
476; CHECK-ZVBB:       # %bb.0:
477; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
478; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
479; CHECK-ZVBB-NEXT:    ret
480  %a = call <vscale x 2 x i32> @llvm.ctpop.nxv2i32(<vscale x 2 x i32> %va)
481  ret <vscale x 2 x i32> %a
482}
483declare <vscale x 2 x i32> @llvm.ctpop.nxv2i32(<vscale x 2 x i32>)
484
485define <vscale x 4 x i32> @ctpop_nxv4i32(<vscale x 4 x i32> %va) {
486; CHECK-LABEL: ctpop_nxv4i32:
487; CHECK:       # %bb.0:
488; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
489; CHECK-NEXT:    vsrl.vi v10, v8, 1
490; CHECK-NEXT:    lui a0, 349525
491; CHECK-NEXT:    addi a0, a0, 1365
492; CHECK-NEXT:    vand.vx v10, v10, a0
493; CHECK-NEXT:    lui a0, 209715
494; CHECK-NEXT:    addi a0, a0, 819
495; CHECK-NEXT:    vsub.vv v8, v8, v10
496; CHECK-NEXT:    vand.vx v10, v8, a0
497; CHECK-NEXT:    vsrl.vi v8, v8, 2
498; CHECK-NEXT:    vand.vx v8, v8, a0
499; CHECK-NEXT:    lui a0, 61681
500; CHECK-NEXT:    addi a0, a0, -241
501; CHECK-NEXT:    vadd.vv v8, v10, v8
502; CHECK-NEXT:    vsrl.vi v10, v8, 4
503; CHECK-NEXT:    vadd.vv v8, v8, v10
504; CHECK-NEXT:    vand.vx v8, v8, a0
505; CHECK-NEXT:    lui a0, 4112
506; CHECK-NEXT:    addi a0, a0, 257
507; CHECK-NEXT:    vmul.vx v8, v8, a0
508; CHECK-NEXT:    vsrl.vi v8, v8, 24
509; CHECK-NEXT:    ret
510;
511; CHECK-ZVBB-LABEL: ctpop_nxv4i32:
512; CHECK-ZVBB:       # %bb.0:
513; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
514; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
515; CHECK-ZVBB-NEXT:    ret
516  %a = call <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32> %va)
517  ret <vscale x 4 x i32> %a
518}
519declare <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32>)
520
521define <vscale x 8 x i32> @ctpop_nxv8i32(<vscale x 8 x i32> %va) {
522; CHECK-LABEL: ctpop_nxv8i32:
523; CHECK:       # %bb.0:
524; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
525; CHECK-NEXT:    vsrl.vi v12, v8, 1
526; CHECK-NEXT:    lui a0, 349525
527; CHECK-NEXT:    addi a0, a0, 1365
528; CHECK-NEXT:    vand.vx v12, v12, a0
529; CHECK-NEXT:    lui a0, 209715
530; CHECK-NEXT:    addi a0, a0, 819
531; CHECK-NEXT:    vsub.vv v8, v8, v12
532; CHECK-NEXT:    vand.vx v12, v8, a0
533; CHECK-NEXT:    vsrl.vi v8, v8, 2
534; CHECK-NEXT:    vand.vx v8, v8, a0
535; CHECK-NEXT:    lui a0, 61681
536; CHECK-NEXT:    addi a0, a0, -241
537; CHECK-NEXT:    vadd.vv v8, v12, v8
538; CHECK-NEXT:    vsrl.vi v12, v8, 4
539; CHECK-NEXT:    vadd.vv v8, v8, v12
540; CHECK-NEXT:    vand.vx v8, v8, a0
541; CHECK-NEXT:    lui a0, 4112
542; CHECK-NEXT:    addi a0, a0, 257
543; CHECK-NEXT:    vmul.vx v8, v8, a0
544; CHECK-NEXT:    vsrl.vi v8, v8, 24
545; CHECK-NEXT:    ret
546;
547; CHECK-ZVBB-LABEL: ctpop_nxv8i32:
548; CHECK-ZVBB:       # %bb.0:
549; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
550; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
551; CHECK-ZVBB-NEXT:    ret
552  %a = call <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32> %va)
553  ret <vscale x 8 x i32> %a
554}
555declare <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32>)
556
557define <vscale x 16 x i32> @ctpop_nxv16i32(<vscale x 16 x i32> %va) {
558; CHECK-LABEL: ctpop_nxv16i32:
559; CHECK:       # %bb.0:
560; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
561; CHECK-NEXT:    vsrl.vi v16, v8, 1
562; CHECK-NEXT:    lui a0, 349525
563; CHECK-NEXT:    addi a0, a0, 1365
564; CHECK-NEXT:    vand.vx v16, v16, a0
565; CHECK-NEXT:    lui a0, 209715
566; CHECK-NEXT:    addi a0, a0, 819
567; CHECK-NEXT:    vsub.vv v8, v8, v16
568; CHECK-NEXT:    vand.vx v16, v8, a0
569; CHECK-NEXT:    vsrl.vi v8, v8, 2
570; CHECK-NEXT:    vand.vx v8, v8, a0
571; CHECK-NEXT:    lui a0, 61681
572; CHECK-NEXT:    addi a0, a0, -241
573; CHECK-NEXT:    vadd.vv v8, v16, v8
574; CHECK-NEXT:    vsrl.vi v16, v8, 4
575; CHECK-NEXT:    vadd.vv v8, v8, v16
576; CHECK-NEXT:    vand.vx v8, v8, a0
577; CHECK-NEXT:    lui a0, 4112
578; CHECK-NEXT:    addi a0, a0, 257
579; CHECK-NEXT:    vmul.vx v8, v8, a0
580; CHECK-NEXT:    vsrl.vi v8, v8, 24
581; CHECK-NEXT:    ret
582;
583; CHECK-ZVBB-LABEL: ctpop_nxv16i32:
584; CHECK-ZVBB:       # %bb.0:
585; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
586; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
587; CHECK-ZVBB-NEXT:    ret
588  %a = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> %va)
589  ret <vscale x 16 x i32> %a
590}
591
592; We always emit vcpop.v for the scalable vector
593define <vscale x 16 x i1> @ctpop_nxv16i32_ult_two(<vscale x 16 x i32> %va) {
594; CHECK-LABEL: ctpop_nxv16i32_ult_two:
595; CHECK:       # %bb.0:
596; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
597; CHECK-NEXT:    vadd.vi v16, v8, -1
598; CHECK-NEXT:    vand.vv v8, v8, v16
599; CHECK-NEXT:    vmseq.vi v0, v8, 0
600; CHECK-NEXT:    ret
601;
602; CHECK-ZVBB-LABEL: ctpop_nxv16i32_ult_two:
603; CHECK-ZVBB:       # %bb.0:
604; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
605; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
606; CHECK-ZVBB-NEXT:    vmsleu.vi v0, v8, 1
607; CHECK-ZVBB-NEXT:    ret
608  %a = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> %va)
609  %cmp = icmp ult <vscale x 16 x i32> %a, splat (i32 2)
610  ret <vscale x 16 x i1> %cmp
611}
612
613define <vscale x 16 x i1> @ctpop_nxv16i32_ugt_one(<vscale x 16 x i32> %va) {
614; CHECK-LABEL: ctpop_nxv16i32_ugt_one:
615; CHECK:       # %bb.0:
616; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
617; CHECK-NEXT:    vadd.vi v16, v8, -1
618; CHECK-NEXT:    vand.vv v8, v8, v16
619; CHECK-NEXT:    vmsne.vi v0, v8, 0
620; CHECK-NEXT:    ret
621;
622; CHECK-ZVBB-LABEL: ctpop_nxv16i32_ugt_one:
623; CHECK-ZVBB:       # %bb.0:
624; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
625; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
626; CHECK-ZVBB-NEXT:    vmsgtu.vi v0, v8, 1
627; CHECK-ZVBB-NEXT:    ret
628  %a = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> %va)
629  %cmp = icmp ugt <vscale x 16 x i32> %a, splat (i32 1)
630  ret <vscale x 16 x i1> %cmp
631}
632
633define <vscale x 16 x i1> @ctpop_nxv16i32_eq_one(<vscale x 16 x i32> %va) {
634; CHECK-LABEL: ctpop_nxv16i32_eq_one:
635; CHECK:       # %bb.0:
636; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
637; CHECK-NEXT:    vadd.vi v16, v8, -1
638; CHECK-NEXT:    vxor.vv v8, v8, v16
639; CHECK-NEXT:    vmsltu.vv v0, v16, v8
640; CHECK-NEXT:    ret
641;
642; CHECK-ZVBB-LABEL: ctpop_nxv16i32_eq_one:
643; CHECK-ZVBB:       # %bb.0:
644; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
645; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
646; CHECK-ZVBB-NEXT:    vmseq.vi v0, v8, 1
647; CHECK-ZVBB-NEXT:    ret
648  %a = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> %va)
649  %cmp = icmp eq <vscale x 16 x i32> %a, splat (i32 1)
650  ret <vscale x 16 x i1> %cmp
651}
652
653define <vscale x 16 x i1> @ctpop_nxv16i32_ne_one(<vscale x 16 x i32> %va) {
654; CHECK-LABEL: ctpop_nxv16i32_ne_one:
655; CHECK:       # %bb.0:
656; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
657; CHECK-NEXT:    vadd.vi v16, v8, -1
658; CHECK-NEXT:    vxor.vv v8, v8, v16
659; CHECK-NEXT:    vmsleu.vv v0, v8, v16
660; CHECK-NEXT:    ret
661;
662; CHECK-ZVBB-LABEL: ctpop_nxv16i32_ne_one:
663; CHECK-ZVBB:       # %bb.0:
664; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
665; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
666; CHECK-ZVBB-NEXT:    vmsne.vi v0, v8, 1
667; CHECK-ZVBB-NEXT:    ret
668  %a = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> %va)
669  %cmp = icmp ne <vscale x 16 x i32> %a, splat (i32 1)
670  ret <vscale x 16 x i1> %cmp
671}
672
673declare <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32>)
674
675define <vscale x 1 x i64> @ctpop_nxv1i64(<vscale x 1 x i64> %va) {
676; RV32-LABEL: ctpop_nxv1i64:
677; RV32:       # %bb.0:
678; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
679; RV32-NEXT:    vsrl.vi v9, v8, 1
680; RV32-NEXT:    lui a0, 349525
681; RV32-NEXT:    addi a0, a0, 1365
682; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
683; RV32-NEXT:    vmv.v.x v10, a0
684; RV32-NEXT:    lui a0, 209715
685; RV32-NEXT:    addi a0, a0, 819
686; RV32-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
687; RV32-NEXT:    vand.vv v9, v9, v10
688; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
689; RV32-NEXT:    vmv.v.x v10, a0
690; RV32-NEXT:    lui a0, 61681
691; RV32-NEXT:    addi a0, a0, -241
692; RV32-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
693; RV32-NEXT:    vsub.vv v8, v8, v9
694; RV32-NEXT:    vand.vv v9, v8, v10
695; RV32-NEXT:    vsrl.vi v8, v8, 2
696; RV32-NEXT:    vand.vv v8, v8, v10
697; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
698; RV32-NEXT:    vmv.v.x v10, a0
699; RV32-NEXT:    lui a0, 4112
700; RV32-NEXT:    addi a0, a0, 257
701; RV32-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
702; RV32-NEXT:    vadd.vv v8, v9, v8
703; RV32-NEXT:    vsrl.vi v9, v8, 4
704; RV32-NEXT:    vadd.vv v8, v8, v9
705; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
706; RV32-NEXT:    vmv.v.x v9, a0
707; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
708; RV32-NEXT:    vand.vv v8, v8, v10
709; RV32-NEXT:    vmul.vv v8, v8, v9
710; RV32-NEXT:    li a0, 56
711; RV32-NEXT:    vsrl.vx v8, v8, a0
712; RV32-NEXT:    ret
713;
714; RV64-LABEL: ctpop_nxv1i64:
715; RV64:       # %bb.0:
716; RV64-NEXT:    lui a0, 349525
717; RV64-NEXT:    lui a1, 209715
718; RV64-NEXT:    lui a2, 61681
719; RV64-NEXT:    lui a3, 4112
720; RV64-NEXT:    addiw a0, a0, 1365
721; RV64-NEXT:    addiw a1, a1, 819
722; RV64-NEXT:    addiw a2, a2, -241
723; RV64-NEXT:    addiw a3, a3, 257
724; RV64-NEXT:    slli a4, a0, 32
725; RV64-NEXT:    add a0, a0, a4
726; RV64-NEXT:    slli a4, a1, 32
727; RV64-NEXT:    add a1, a1, a4
728; RV64-NEXT:    slli a4, a2, 32
729; RV64-NEXT:    add a2, a2, a4
730; RV64-NEXT:    slli a4, a3, 32
731; RV64-NEXT:    add a3, a3, a4
732; RV64-NEXT:    vsetvli a4, zero, e64, m1, ta, ma
733; RV64-NEXT:    vsrl.vi v9, v8, 1
734; RV64-NEXT:    vand.vx v9, v9, a0
735; RV64-NEXT:    vsub.vv v8, v8, v9
736; RV64-NEXT:    vand.vx v9, v8, a1
737; RV64-NEXT:    vsrl.vi v8, v8, 2
738; RV64-NEXT:    vand.vx v8, v8, a1
739; RV64-NEXT:    vadd.vv v8, v9, v8
740; RV64-NEXT:    vsrl.vi v9, v8, 4
741; RV64-NEXT:    vadd.vv v8, v8, v9
742; RV64-NEXT:    vand.vx v8, v8, a2
743; RV64-NEXT:    vmul.vx v8, v8, a3
744; RV64-NEXT:    li a0, 56
745; RV64-NEXT:    vsrl.vx v8, v8, a0
746; RV64-NEXT:    ret
747;
748; CHECK-ZVBB-LABEL: ctpop_nxv1i64:
749; CHECK-ZVBB:       # %bb.0:
750; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
751; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
752; CHECK-ZVBB-NEXT:    ret
753  %a = call <vscale x 1 x i64> @llvm.ctpop.nxv1i64(<vscale x 1 x i64> %va)
754  ret <vscale x 1 x i64> %a
755}
756declare <vscale x 1 x i64> @llvm.ctpop.nxv1i64(<vscale x 1 x i64>)
757
758define <vscale x 2 x i64> @ctpop_nxv2i64(<vscale x 2 x i64> %va) {
759; RV32-LABEL: ctpop_nxv2i64:
760; RV32:       # %bb.0:
761; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
762; RV32-NEXT:    vsrl.vi v10, v8, 1
763; RV32-NEXT:    lui a0, 349525
764; RV32-NEXT:    addi a0, a0, 1365
765; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
766; RV32-NEXT:    vmv.v.x v12, a0
767; RV32-NEXT:    lui a0, 209715
768; RV32-NEXT:    addi a0, a0, 819
769; RV32-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
770; RV32-NEXT:    vand.vv v10, v10, v12
771; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
772; RV32-NEXT:    vmv.v.x v12, a0
773; RV32-NEXT:    lui a0, 61681
774; RV32-NEXT:    addi a0, a0, -241
775; RV32-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
776; RV32-NEXT:    vsub.vv v8, v8, v10
777; RV32-NEXT:    vand.vv v10, v8, v12
778; RV32-NEXT:    vsrl.vi v8, v8, 2
779; RV32-NEXT:    vand.vv v8, v8, v12
780; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
781; RV32-NEXT:    vmv.v.x v12, a0
782; RV32-NEXT:    lui a0, 4112
783; RV32-NEXT:    addi a0, a0, 257
784; RV32-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
785; RV32-NEXT:    vadd.vv v8, v10, v8
786; RV32-NEXT:    vsrl.vi v10, v8, 4
787; RV32-NEXT:    vadd.vv v8, v8, v10
788; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
789; RV32-NEXT:    vmv.v.x v10, a0
790; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
791; RV32-NEXT:    vand.vv v8, v8, v12
792; RV32-NEXT:    vmul.vv v8, v8, v10
793; RV32-NEXT:    li a0, 56
794; RV32-NEXT:    vsrl.vx v8, v8, a0
795; RV32-NEXT:    ret
796;
797; RV64-LABEL: ctpop_nxv2i64:
798; RV64:       # %bb.0:
799; RV64-NEXT:    lui a0, 349525
800; RV64-NEXT:    lui a1, 209715
801; RV64-NEXT:    lui a2, 61681
802; RV64-NEXT:    lui a3, 4112
803; RV64-NEXT:    addiw a0, a0, 1365
804; RV64-NEXT:    addiw a1, a1, 819
805; RV64-NEXT:    addiw a2, a2, -241
806; RV64-NEXT:    addiw a3, a3, 257
807; RV64-NEXT:    slli a4, a0, 32
808; RV64-NEXT:    add a0, a0, a4
809; RV64-NEXT:    slli a4, a1, 32
810; RV64-NEXT:    add a1, a1, a4
811; RV64-NEXT:    slli a4, a2, 32
812; RV64-NEXT:    add a2, a2, a4
813; RV64-NEXT:    slli a4, a3, 32
814; RV64-NEXT:    add a3, a3, a4
815; RV64-NEXT:    vsetvli a4, zero, e64, m2, ta, ma
816; RV64-NEXT:    vsrl.vi v10, v8, 1
817; RV64-NEXT:    vand.vx v10, v10, a0
818; RV64-NEXT:    vsub.vv v8, v8, v10
819; RV64-NEXT:    vand.vx v10, v8, a1
820; RV64-NEXT:    vsrl.vi v8, v8, 2
821; RV64-NEXT:    vand.vx v8, v8, a1
822; RV64-NEXT:    vadd.vv v8, v10, v8
823; RV64-NEXT:    vsrl.vi v10, v8, 4
824; RV64-NEXT:    vadd.vv v8, v8, v10
825; RV64-NEXT:    vand.vx v8, v8, a2
826; RV64-NEXT:    vmul.vx v8, v8, a3
827; RV64-NEXT:    li a0, 56
828; RV64-NEXT:    vsrl.vx v8, v8, a0
829; RV64-NEXT:    ret
830;
831; CHECK-ZVBB-LABEL: ctpop_nxv2i64:
832; CHECK-ZVBB:       # %bb.0:
833; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
834; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
835; CHECK-ZVBB-NEXT:    ret
836  %a = call <vscale x 2 x i64> @llvm.ctpop.nxv2i64(<vscale x 2 x i64> %va)
837  ret <vscale x 2 x i64> %a
838}
839declare <vscale x 2 x i64> @llvm.ctpop.nxv2i64(<vscale x 2 x i64>)
840
841define <vscale x 4 x i64> @ctpop_nxv4i64(<vscale x 4 x i64> %va) {
842; RV32-LABEL: ctpop_nxv4i64:
843; RV32:       # %bb.0:
844; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
845; RV32-NEXT:    vsrl.vi v12, v8, 1
846; RV32-NEXT:    lui a0, 349525
847; RV32-NEXT:    addi a0, a0, 1365
848; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
849; RV32-NEXT:    vmv.v.x v16, a0
850; RV32-NEXT:    lui a0, 209715
851; RV32-NEXT:    addi a0, a0, 819
852; RV32-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
853; RV32-NEXT:    vand.vv v12, v12, v16
854; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
855; RV32-NEXT:    vmv.v.x v16, a0
856; RV32-NEXT:    lui a0, 61681
857; RV32-NEXT:    addi a0, a0, -241
858; RV32-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
859; RV32-NEXT:    vsub.vv v8, v8, v12
860; RV32-NEXT:    vand.vv v12, v8, v16
861; RV32-NEXT:    vsrl.vi v8, v8, 2
862; RV32-NEXT:    vand.vv v8, v8, v16
863; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
864; RV32-NEXT:    vmv.v.x v16, a0
865; RV32-NEXT:    lui a0, 4112
866; RV32-NEXT:    addi a0, a0, 257
867; RV32-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
868; RV32-NEXT:    vadd.vv v8, v12, v8
869; RV32-NEXT:    vsrl.vi v12, v8, 4
870; RV32-NEXT:    vadd.vv v8, v8, v12
871; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
872; RV32-NEXT:    vmv.v.x v12, a0
873; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
874; RV32-NEXT:    vand.vv v8, v8, v16
875; RV32-NEXT:    vmul.vv v8, v8, v12
876; RV32-NEXT:    li a0, 56
877; RV32-NEXT:    vsrl.vx v8, v8, a0
878; RV32-NEXT:    ret
879;
880; RV64-LABEL: ctpop_nxv4i64:
881; RV64:       # %bb.0:
882; RV64-NEXT:    lui a0, 349525
883; RV64-NEXT:    lui a1, 209715
884; RV64-NEXT:    lui a2, 61681
885; RV64-NEXT:    lui a3, 4112
886; RV64-NEXT:    addiw a0, a0, 1365
887; RV64-NEXT:    addiw a1, a1, 819
888; RV64-NEXT:    addiw a2, a2, -241
889; RV64-NEXT:    addiw a3, a3, 257
890; RV64-NEXT:    slli a4, a0, 32
891; RV64-NEXT:    add a0, a0, a4
892; RV64-NEXT:    slli a4, a1, 32
893; RV64-NEXT:    add a1, a1, a4
894; RV64-NEXT:    slli a4, a2, 32
895; RV64-NEXT:    add a2, a2, a4
896; RV64-NEXT:    slli a4, a3, 32
897; RV64-NEXT:    add a3, a3, a4
898; RV64-NEXT:    vsetvli a4, zero, e64, m4, ta, ma
899; RV64-NEXT:    vsrl.vi v12, v8, 1
900; RV64-NEXT:    vand.vx v12, v12, a0
901; RV64-NEXT:    vsub.vv v8, v8, v12
902; RV64-NEXT:    vand.vx v12, v8, a1
903; RV64-NEXT:    vsrl.vi v8, v8, 2
904; RV64-NEXT:    vand.vx v8, v8, a1
905; RV64-NEXT:    vadd.vv v8, v12, v8
906; RV64-NEXT:    vsrl.vi v12, v8, 4
907; RV64-NEXT:    vadd.vv v8, v8, v12
908; RV64-NEXT:    vand.vx v8, v8, a2
909; RV64-NEXT:    vmul.vx v8, v8, a3
910; RV64-NEXT:    li a0, 56
911; RV64-NEXT:    vsrl.vx v8, v8, a0
912; RV64-NEXT:    ret
913;
914; CHECK-ZVBB-LABEL: ctpop_nxv4i64:
915; CHECK-ZVBB:       # %bb.0:
916; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
917; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
918; CHECK-ZVBB-NEXT:    ret
919  %a = call <vscale x 4 x i64> @llvm.ctpop.nxv4i64(<vscale x 4 x i64> %va)
920  ret <vscale x 4 x i64> %a
921}
922declare <vscale x 4 x i64> @llvm.ctpop.nxv4i64(<vscale x 4 x i64>)
923
924define <vscale x 8 x i64> @ctpop_nxv8i64(<vscale x 8 x i64> %va) {
925; RV32-LABEL: ctpop_nxv8i64:
926; RV32:       # %bb.0:
927; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
928; RV32-NEXT:    vsrl.vi v16, v8, 1
929; RV32-NEXT:    lui a0, 349525
930; RV32-NEXT:    addi a0, a0, 1365
931; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
932; RV32-NEXT:    vmv.v.x v24, a0
933; RV32-NEXT:    lui a0, 209715
934; RV32-NEXT:    addi a0, a0, 819
935; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
936; RV32-NEXT:    vand.vv v24, v16, v24
937; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
938; RV32-NEXT:    vmv.v.x v16, a0
939; RV32-NEXT:    lui a0, 61681
940; RV32-NEXT:    addi a0, a0, -241
941; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
942; RV32-NEXT:    vsub.vv v8, v8, v24
943; RV32-NEXT:    vand.vv v24, v8, v16
944; RV32-NEXT:    vsrl.vi v8, v8, 2
945; RV32-NEXT:    vand.vv v8, v8, v16
946; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
947; RV32-NEXT:    vmv.v.x v16, a0
948; RV32-NEXT:    lui a0, 4112
949; RV32-NEXT:    addi a0, a0, 257
950; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
951; RV32-NEXT:    vadd.vv v8, v24, v8
952; RV32-NEXT:    vsrl.vi v24, v8, 4
953; RV32-NEXT:    vadd.vv v8, v8, v24
954; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
955; RV32-NEXT:    vmv.v.x v24, a0
956; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
957; RV32-NEXT:    vand.vv v8, v8, v16
958; RV32-NEXT:    vmul.vv v8, v8, v24
959; RV32-NEXT:    li a0, 56
960; RV32-NEXT:    vsrl.vx v8, v8, a0
961; RV32-NEXT:    ret
962;
963; RV64-LABEL: ctpop_nxv8i64:
964; RV64:       # %bb.0:
965; RV64-NEXT:    lui a0, 349525
966; RV64-NEXT:    lui a1, 209715
967; RV64-NEXT:    lui a2, 61681
968; RV64-NEXT:    lui a3, 4112
969; RV64-NEXT:    addiw a0, a0, 1365
970; RV64-NEXT:    addiw a1, a1, 819
971; RV64-NEXT:    addiw a2, a2, -241
972; RV64-NEXT:    addiw a3, a3, 257
973; RV64-NEXT:    slli a4, a0, 32
974; RV64-NEXT:    add a0, a0, a4
975; RV64-NEXT:    slli a4, a1, 32
976; RV64-NEXT:    add a1, a1, a4
977; RV64-NEXT:    slli a4, a2, 32
978; RV64-NEXT:    add a2, a2, a4
979; RV64-NEXT:    slli a4, a3, 32
980; RV64-NEXT:    add a3, a3, a4
981; RV64-NEXT:    vsetvli a4, zero, e64, m8, ta, ma
982; RV64-NEXT:    vsrl.vi v16, v8, 1
983; RV64-NEXT:    vand.vx v16, v16, a0
984; RV64-NEXT:    vsub.vv v8, v8, v16
985; RV64-NEXT:    vand.vx v16, v8, a1
986; RV64-NEXT:    vsrl.vi v8, v8, 2
987; RV64-NEXT:    vand.vx v8, v8, a1
988; RV64-NEXT:    vadd.vv v8, v16, v8
989; RV64-NEXT:    vsrl.vi v16, v8, 4
990; RV64-NEXT:    vadd.vv v8, v8, v16
991; RV64-NEXT:    vand.vx v8, v8, a2
992; RV64-NEXT:    vmul.vx v8, v8, a3
993; RV64-NEXT:    li a0, 56
994; RV64-NEXT:    vsrl.vx v8, v8, a0
995; RV64-NEXT:    ret
996;
997; CHECK-ZVBB-LABEL: ctpop_nxv8i64:
998; CHECK-ZVBB:       # %bb.0:
999; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
1000; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
1001; CHECK-ZVBB-NEXT:    ret
1002  %a = call <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64> %va)
1003  ret <vscale x 8 x i64> %a
1004}
1005
1006; We always emit vcpop.v for the scalable vector
1007define <vscale x 8 x i1> @ctpop_nxv8i64_ult_two(<vscale x 8 x i64> %va) {
1008; CHECK-LABEL: ctpop_nxv8i64_ult_two:
1009; CHECK:       # %bb.0:
1010; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
1011; CHECK-NEXT:    vadd.vi v16, v8, -1
1012; CHECK-NEXT:    vand.vv v8, v8, v16
1013; CHECK-NEXT:    vmseq.vi v0, v8, 0
1014; CHECK-NEXT:    ret
1015;
1016; CHECK-ZVBB-LABEL: ctpop_nxv8i64_ult_two:
1017; CHECK-ZVBB:       # %bb.0:
1018; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
1019; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
1020; CHECK-ZVBB-NEXT:    vmsleu.vi v0, v8, 1
1021; CHECK-ZVBB-NEXT:    ret
1022  %a = call <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64> %va)
1023  %cmp = icmp ult <vscale x 8 x i64> %a, splat (i64 2)
1024  ret <vscale x 8 x i1> %cmp
1025}
1026
1027define <vscale x 8 x i1> @ctpop_nxv8i64_ugt_one(<vscale x 8 x i64> %va) {
1028; CHECK-LABEL: ctpop_nxv8i64_ugt_one:
1029; CHECK:       # %bb.0:
1030; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
1031; CHECK-NEXT:    vadd.vi v16, v8, -1
1032; CHECK-NEXT:    vand.vv v8, v8, v16
1033; CHECK-NEXT:    vmsne.vi v0, v8, 0
1034; CHECK-NEXT:    ret
1035;
1036; CHECK-ZVBB-LABEL: ctpop_nxv8i64_ugt_one:
1037; CHECK-ZVBB:       # %bb.0:
1038; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
1039; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
1040; CHECK-ZVBB-NEXT:    vmsgtu.vi v0, v8, 1
1041; CHECK-ZVBB-NEXT:    ret
1042  %a = call <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64> %va)
1043  %cmp = icmp ugt <vscale x 8 x i64> %a, splat (i64 1)
1044  ret <vscale x 8 x i1> %cmp
1045}
1046
1047define <vscale x 8 x i1> @ctpop_nxv8i64_eq_one(<vscale x 8 x i64> %va) {
1048; CHECK-LABEL: ctpop_nxv8i64_eq_one:
1049; CHECK:       # %bb.0:
1050; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
1051; CHECK-NEXT:    vadd.vi v16, v8, -1
1052; CHECK-NEXT:    vxor.vv v8, v8, v16
1053; CHECK-NEXT:    vmsltu.vv v0, v16, v8
1054; CHECK-NEXT:    ret
1055;
1056; CHECK-ZVBB-LABEL: ctpop_nxv8i64_eq_one:
1057; CHECK-ZVBB:       # %bb.0:
1058; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
1059; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
1060; CHECK-ZVBB-NEXT:    vmseq.vi v0, v8, 1
1061; CHECK-ZVBB-NEXT:    ret
1062  %a = call <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64> %va)
1063  %cmp = icmp eq <vscale x 8 x i64> %a, splat (i64 1)
1064  ret <vscale x 8 x i1> %cmp
1065}
1066
1067define <vscale x 8 x i1> @ctpop_nxv8i64_ne_one(<vscale x 8 x i64> %va) {
1068; CHECK-LABEL: ctpop_nxv8i64_ne_one:
1069; CHECK:       # %bb.0:
1070; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
1071; CHECK-NEXT:    vadd.vi v16, v8, -1
1072; CHECK-NEXT:    vxor.vv v8, v8, v16
1073; CHECK-NEXT:    vmsleu.vv v0, v8, v16
1074; CHECK-NEXT:    ret
1075;
1076; CHECK-ZVBB-LABEL: ctpop_nxv8i64_ne_one:
1077; CHECK-ZVBB:       # %bb.0:
1078; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
1079; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
1080; CHECK-ZVBB-NEXT:    vmsne.vi v0, v8, 1
1081; CHECK-ZVBB-NEXT:    ret
1082  %a = call <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64> %va)
1083  %cmp = icmp ne <vscale x 8 x i64> %a, splat (i64 1)
1084  ret <vscale x 8 x i1> %cmp
1085}
1086
1087declare <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64>)
1088