xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll (revision b6c0f1bfa79a3a32d841ac5ab1f94c3aee3b5d90)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \
3; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
4; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \
5; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
6; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb,+m -target-abi=ilp32d \
7; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
8; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb,+m -target-abi=lp64d \
9; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
10
11declare <vscale x 1 x i8> @llvm.vp.ctpop.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i1>, i32)
12
13define <vscale x 1 x i8> @vp_ctpop_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
14; CHECK-LABEL: vp_ctpop_nxv1i8:
15; CHECK:       # %bb.0:
16; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
17; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
18; CHECK-NEXT:    li a0, 85
19; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
20; CHECK-NEXT:    li a0, 51
21; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
22; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
23; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
24; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
25; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
26; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
27; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
28; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
29; CHECK-NEXT:    ret
30;
31; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i8:
32; CHECK-ZVBB:       # %bb.0:
33; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
34; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
35; CHECK-ZVBB-NEXT:    ret
36  %v = call <vscale x 1 x i8> @llvm.vp.ctpop.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 %evl)
37  ret <vscale x 1 x i8> %v
38}
39
40define <vscale x 1 x i8> @vp_ctpop_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zeroext %evl) {
41; CHECK-LABEL: vp_ctpop_nxv1i8_unmasked:
42; CHECK:       # %bb.0:
43; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
44; CHECK-NEXT:    vsrl.vi v9, v8, 1
45; CHECK-NEXT:    li a0, 85
46; CHECK-NEXT:    vand.vx v9, v9, a0
47; CHECK-NEXT:    li a0, 51
48; CHECK-NEXT:    vsub.vv v8, v8, v9
49; CHECK-NEXT:    vand.vx v9, v8, a0
50; CHECK-NEXT:    vsrl.vi v8, v8, 2
51; CHECK-NEXT:    vand.vx v8, v8, a0
52; CHECK-NEXT:    vadd.vv v8, v9, v8
53; CHECK-NEXT:    vsrl.vi v9, v8, 4
54; CHECK-NEXT:    vadd.vv v8, v8, v9
55; CHECK-NEXT:    vand.vi v8, v8, 15
56; CHECK-NEXT:    ret
57;
58; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i8_unmasked:
59; CHECK-ZVBB:       # %bb.0:
60; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
61; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
62; CHECK-ZVBB-NEXT:    ret
63  %v = call <vscale x 1 x i8> @llvm.vp.ctpop.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
64  ret <vscale x 1 x i8> %v
65}
66
67declare <vscale x 2 x i8> @llvm.vp.ctpop.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i32)
68
69define <vscale x 2 x i8> @vp_ctpop_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
70; CHECK-LABEL: vp_ctpop_nxv2i8:
71; CHECK:       # %bb.0:
72; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
73; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
74; CHECK-NEXT:    li a0, 85
75; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
76; CHECK-NEXT:    li a0, 51
77; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
78; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
79; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
80; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
81; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
82; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
83; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
84; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
85; CHECK-NEXT:    ret
86;
87; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i8:
88; CHECK-ZVBB:       # %bb.0:
89; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
90; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
91; CHECK-ZVBB-NEXT:    ret
92  %v = call <vscale x 2 x i8> @llvm.vp.ctpop.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 %evl)
93  ret <vscale x 2 x i8> %v
94}
95
96define <vscale x 2 x i8> @vp_ctpop_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zeroext %evl) {
97; CHECK-LABEL: vp_ctpop_nxv2i8_unmasked:
98; CHECK:       # %bb.0:
99; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
100; CHECK-NEXT:    vsrl.vi v9, v8, 1
101; CHECK-NEXT:    li a0, 85
102; CHECK-NEXT:    vand.vx v9, v9, a0
103; CHECK-NEXT:    li a0, 51
104; CHECK-NEXT:    vsub.vv v8, v8, v9
105; CHECK-NEXT:    vand.vx v9, v8, a0
106; CHECK-NEXT:    vsrl.vi v8, v8, 2
107; CHECK-NEXT:    vand.vx v8, v8, a0
108; CHECK-NEXT:    vadd.vv v8, v9, v8
109; CHECK-NEXT:    vsrl.vi v9, v8, 4
110; CHECK-NEXT:    vadd.vv v8, v8, v9
111; CHECK-NEXT:    vand.vi v8, v8, 15
112; CHECK-NEXT:    ret
113;
114; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i8_unmasked:
115; CHECK-ZVBB:       # %bb.0:
116; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
117; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
118; CHECK-ZVBB-NEXT:    ret
119  %v = call <vscale x 2 x i8> @llvm.vp.ctpop.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
120  ret <vscale x 2 x i8> %v
121}
122
123declare <vscale x 4 x i8> @llvm.vp.ctpop.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i1>, i32)
124
125define <vscale x 4 x i8> @vp_ctpop_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
126; CHECK-LABEL: vp_ctpop_nxv4i8:
127; CHECK:       # %bb.0:
128; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
129; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
130; CHECK-NEXT:    li a0, 85
131; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
132; CHECK-NEXT:    li a0, 51
133; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
134; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
135; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
136; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
137; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
138; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
139; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
140; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
141; CHECK-NEXT:    ret
142;
143; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i8:
144; CHECK-ZVBB:       # %bb.0:
145; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
146; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
147; CHECK-ZVBB-NEXT:    ret
148  %v = call <vscale x 4 x i8> @llvm.vp.ctpop.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 %evl)
149  ret <vscale x 4 x i8> %v
150}
151
152define <vscale x 4 x i8> @vp_ctpop_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zeroext %evl) {
153; CHECK-LABEL: vp_ctpop_nxv4i8_unmasked:
154; CHECK:       # %bb.0:
155; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
156; CHECK-NEXT:    vsrl.vi v9, v8, 1
157; CHECK-NEXT:    li a0, 85
158; CHECK-NEXT:    vand.vx v9, v9, a0
159; CHECK-NEXT:    li a0, 51
160; CHECK-NEXT:    vsub.vv v8, v8, v9
161; CHECK-NEXT:    vand.vx v9, v8, a0
162; CHECK-NEXT:    vsrl.vi v8, v8, 2
163; CHECK-NEXT:    vand.vx v8, v8, a0
164; CHECK-NEXT:    vadd.vv v8, v9, v8
165; CHECK-NEXT:    vsrl.vi v9, v8, 4
166; CHECK-NEXT:    vadd.vv v8, v8, v9
167; CHECK-NEXT:    vand.vi v8, v8, 15
168; CHECK-NEXT:    ret
169;
170; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i8_unmasked:
171; CHECK-ZVBB:       # %bb.0:
172; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
173; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
174; CHECK-ZVBB-NEXT:    ret
175  %v = call <vscale x 4 x i8> @llvm.vp.ctpop.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
176  ret <vscale x 4 x i8> %v
177}
178
179declare <vscale x 8 x i8> @llvm.vp.ctpop.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i1>, i32)
180
181define <vscale x 8 x i8> @vp_ctpop_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
182; CHECK-LABEL: vp_ctpop_nxv8i8:
183; CHECK:       # %bb.0:
184; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
185; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
186; CHECK-NEXT:    li a0, 85
187; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
188; CHECK-NEXT:    li a0, 51
189; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
190; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
191; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
192; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
193; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
194; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
195; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
196; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
197; CHECK-NEXT:    ret
198;
199; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i8:
200; CHECK-ZVBB:       # %bb.0:
201; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
202; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
203; CHECK-ZVBB-NEXT:    ret
204  %v = call <vscale x 8 x i8> @llvm.vp.ctpop.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 %evl)
205  ret <vscale x 8 x i8> %v
206}
207
208define <vscale x 8 x i8> @vp_ctpop_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zeroext %evl) {
209; CHECK-LABEL: vp_ctpop_nxv8i8_unmasked:
210; CHECK:       # %bb.0:
211; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
212; CHECK-NEXT:    vsrl.vi v9, v8, 1
213; CHECK-NEXT:    li a0, 85
214; CHECK-NEXT:    vand.vx v9, v9, a0
215; CHECK-NEXT:    li a0, 51
216; CHECK-NEXT:    vsub.vv v8, v8, v9
217; CHECK-NEXT:    vand.vx v9, v8, a0
218; CHECK-NEXT:    vsrl.vi v8, v8, 2
219; CHECK-NEXT:    vand.vx v8, v8, a0
220; CHECK-NEXT:    vadd.vv v8, v9, v8
221; CHECK-NEXT:    vsrl.vi v9, v8, 4
222; CHECK-NEXT:    vadd.vv v8, v8, v9
223; CHECK-NEXT:    vand.vi v8, v8, 15
224; CHECK-NEXT:    ret
225;
226; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i8_unmasked:
227; CHECK-ZVBB:       # %bb.0:
228; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
229; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
230; CHECK-ZVBB-NEXT:    ret
231  %v = call <vscale x 8 x i8> @llvm.vp.ctpop.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
232  ret <vscale x 8 x i8> %v
233}
234
235declare <vscale x 16 x i8> @llvm.vp.ctpop.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i32)
236
237define <vscale x 16 x i8> @vp_ctpop_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
238; CHECK-LABEL: vp_ctpop_nxv16i8:
239; CHECK:       # %bb.0:
240; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
241; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
242; CHECK-NEXT:    li a0, 85
243; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
244; CHECK-NEXT:    li a0, 51
245; CHECK-NEXT:    vsub.vv v8, v8, v10, v0.t
246; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
247; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
248; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
249; CHECK-NEXT:    vadd.vv v8, v10, v8, v0.t
250; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
251; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
252; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
253; CHECK-NEXT:    ret
254;
255; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i8:
256; CHECK-ZVBB:       # %bb.0:
257; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
258; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
259; CHECK-ZVBB-NEXT:    ret
260  %v = call <vscale x 16 x i8> @llvm.vp.ctpop.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 %evl)
261  ret <vscale x 16 x i8> %v
262}
263
264define <vscale x 16 x i8> @vp_ctpop_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32 zeroext %evl) {
265; CHECK-LABEL: vp_ctpop_nxv16i8_unmasked:
266; CHECK:       # %bb.0:
267; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
268; CHECK-NEXT:    vsrl.vi v10, v8, 1
269; CHECK-NEXT:    li a0, 85
270; CHECK-NEXT:    vand.vx v10, v10, a0
271; CHECK-NEXT:    li a0, 51
272; CHECK-NEXT:    vsub.vv v8, v8, v10
273; CHECK-NEXT:    vand.vx v10, v8, a0
274; CHECK-NEXT:    vsrl.vi v8, v8, 2
275; CHECK-NEXT:    vand.vx v8, v8, a0
276; CHECK-NEXT:    vadd.vv v8, v10, v8
277; CHECK-NEXT:    vsrl.vi v10, v8, 4
278; CHECK-NEXT:    vadd.vv v8, v8, v10
279; CHECK-NEXT:    vand.vi v8, v8, 15
280; CHECK-NEXT:    ret
281;
282; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i8_unmasked:
283; CHECK-ZVBB:       # %bb.0:
284; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
285; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
286; CHECK-ZVBB-NEXT:    ret
287  %v = call <vscale x 16 x i8> @llvm.vp.ctpop.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
288  ret <vscale x 16 x i8> %v
289}
290
291declare <vscale x 32 x i8> @llvm.vp.ctpop.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i1>, i32)
292
293define <vscale x 32 x i8> @vp_ctpop_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
294; CHECK-LABEL: vp_ctpop_nxv32i8:
295; CHECK:       # %bb.0:
296; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
297; CHECK-NEXT:    vsrl.vi v12, v8, 1, v0.t
298; CHECK-NEXT:    li a0, 85
299; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
300; CHECK-NEXT:    li a0, 51
301; CHECK-NEXT:    vsub.vv v8, v8, v12, v0.t
302; CHECK-NEXT:    vand.vx v12, v8, a0, v0.t
303; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
304; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
305; CHECK-NEXT:    vadd.vv v8, v12, v8, v0.t
306; CHECK-NEXT:    vsrl.vi v12, v8, 4, v0.t
307; CHECK-NEXT:    vadd.vv v8, v8, v12, v0.t
308; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
309; CHECK-NEXT:    ret
310;
311; CHECK-ZVBB-LABEL: vp_ctpop_nxv32i8:
312; CHECK-ZVBB:       # %bb.0:
313; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
314; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
315; CHECK-ZVBB-NEXT:    ret
316  %v = call <vscale x 32 x i8> @llvm.vp.ctpop.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 %evl)
317  ret <vscale x 32 x i8> %v
318}
319
320define <vscale x 32 x i8> @vp_ctpop_nxv32i8_unmasked(<vscale x 32 x i8> %va, i32 zeroext %evl) {
321; CHECK-LABEL: vp_ctpop_nxv32i8_unmasked:
322; CHECK:       # %bb.0:
323; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
324; CHECK-NEXT:    vsrl.vi v12, v8, 1
325; CHECK-NEXT:    li a0, 85
326; CHECK-NEXT:    vand.vx v12, v12, a0
327; CHECK-NEXT:    li a0, 51
328; CHECK-NEXT:    vsub.vv v8, v8, v12
329; CHECK-NEXT:    vand.vx v12, v8, a0
330; CHECK-NEXT:    vsrl.vi v8, v8, 2
331; CHECK-NEXT:    vand.vx v8, v8, a0
332; CHECK-NEXT:    vadd.vv v8, v12, v8
333; CHECK-NEXT:    vsrl.vi v12, v8, 4
334; CHECK-NEXT:    vadd.vv v8, v8, v12
335; CHECK-NEXT:    vand.vi v8, v8, 15
336; CHECK-NEXT:    ret
337;
338; CHECK-ZVBB-LABEL: vp_ctpop_nxv32i8_unmasked:
339; CHECK-ZVBB:       # %bb.0:
340; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
341; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
342; CHECK-ZVBB-NEXT:    ret
343  %v = call <vscale x 32 x i8> @llvm.vp.ctpop.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
344  ret <vscale x 32 x i8> %v
345}
346
347declare <vscale x 64 x i8> @llvm.vp.ctpop.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i1>, i32)
348
349define <vscale x 64 x i8> @vp_ctpop_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) {
350; CHECK-LABEL: vp_ctpop_nxv64i8:
351; CHECK:       # %bb.0:
352; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
353; CHECK-NEXT:    vsrl.vi v16, v8, 1, v0.t
354; CHECK-NEXT:    li a0, 85
355; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
356; CHECK-NEXT:    li a0, 51
357; CHECK-NEXT:    vsub.vv v8, v8, v16, v0.t
358; CHECK-NEXT:    vand.vx v16, v8, a0, v0.t
359; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
360; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
361; CHECK-NEXT:    vadd.vv v8, v16, v8, v0.t
362; CHECK-NEXT:    vsrl.vi v16, v8, 4, v0.t
363; CHECK-NEXT:    vadd.vv v8, v8, v16, v0.t
364; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
365; CHECK-NEXT:    ret
366;
367; CHECK-ZVBB-LABEL: vp_ctpop_nxv64i8:
368; CHECK-ZVBB:       # %bb.0:
369; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
370; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
371; CHECK-ZVBB-NEXT:    ret
372  %v = call <vscale x 64 x i8> @llvm.vp.ctpop.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 %evl)
373  ret <vscale x 64 x i8> %v
374}
375
376define <vscale x 64 x i8> @vp_ctpop_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32 zeroext %evl) {
377; CHECK-LABEL: vp_ctpop_nxv64i8_unmasked:
378; CHECK:       # %bb.0:
379; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
380; CHECK-NEXT:    vsrl.vi v16, v8, 1
381; CHECK-NEXT:    li a0, 85
382; CHECK-NEXT:    vand.vx v16, v16, a0
383; CHECK-NEXT:    li a0, 51
384; CHECK-NEXT:    vsub.vv v8, v8, v16
385; CHECK-NEXT:    vand.vx v16, v8, a0
386; CHECK-NEXT:    vsrl.vi v8, v8, 2
387; CHECK-NEXT:    vand.vx v8, v8, a0
388; CHECK-NEXT:    vadd.vv v8, v16, v8
389; CHECK-NEXT:    vsrl.vi v16, v8, 4
390; CHECK-NEXT:    vadd.vv v8, v8, v16
391; CHECK-NEXT:    vand.vi v8, v8, 15
392; CHECK-NEXT:    ret
393;
394; CHECK-ZVBB-LABEL: vp_ctpop_nxv64i8_unmasked:
395; CHECK-ZVBB:       # %bb.0:
396; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
397; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
398; CHECK-ZVBB-NEXT:    ret
399  %v = call <vscale x 64 x i8> @llvm.vp.ctpop.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> splat (i1 true), i32 %evl)
400  ret <vscale x 64 x i8> %v
401}
402
403declare <vscale x 1 x i16> @llvm.vp.ctpop.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32)
404
405define <vscale x 1 x i16> @vp_ctpop_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
406; CHECK-LABEL: vp_ctpop_nxv1i16:
407; CHECK:       # %bb.0:
408; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
409; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
410; CHECK-NEXT:    lui a0, 5
411; CHECK-NEXT:    addi a0, a0, 1365
412; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
413; CHECK-NEXT:    lui a0, 3
414; CHECK-NEXT:    addi a0, a0, 819
415; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
416; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
417; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
418; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
419; CHECK-NEXT:    lui a0, 1
420; CHECK-NEXT:    addi a0, a0, -241
421; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
422; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
423; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
424; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
425; CHECK-NEXT:    li a0, 257
426; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
427; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
428; CHECK-NEXT:    ret
429;
430; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i16:
431; CHECK-ZVBB:       # %bb.0:
432; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
433; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
434; CHECK-ZVBB-NEXT:    ret
435  %v = call <vscale x 1 x i16> @llvm.vp.ctpop.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 %evl)
436  ret <vscale x 1 x i16> %v
437}
438
439define <vscale x 1 x i16> @vp_ctpop_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) {
440; CHECK-LABEL: vp_ctpop_nxv1i16_unmasked:
441; CHECK:       # %bb.0:
442; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
443; CHECK-NEXT:    vsrl.vi v9, v8, 1
444; CHECK-NEXT:    lui a0, 5
445; CHECK-NEXT:    addi a0, a0, 1365
446; CHECK-NEXT:    vand.vx v9, v9, a0
447; CHECK-NEXT:    lui a0, 3
448; CHECK-NEXT:    addi a0, a0, 819
449; CHECK-NEXT:    vsub.vv v8, v8, v9
450; CHECK-NEXT:    vand.vx v9, v8, a0
451; CHECK-NEXT:    vsrl.vi v8, v8, 2
452; CHECK-NEXT:    vand.vx v8, v8, a0
453; CHECK-NEXT:    lui a0, 1
454; CHECK-NEXT:    addi a0, a0, -241
455; CHECK-NEXT:    vadd.vv v8, v9, v8
456; CHECK-NEXT:    vsrl.vi v9, v8, 4
457; CHECK-NEXT:    vadd.vv v8, v8, v9
458; CHECK-NEXT:    vand.vx v8, v8, a0
459; CHECK-NEXT:    li a0, 257
460; CHECK-NEXT:    vmul.vx v8, v8, a0
461; CHECK-NEXT:    vsrl.vi v8, v8, 8
462; CHECK-NEXT:    ret
463;
464; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i16_unmasked:
465; CHECK-ZVBB:       # %bb.0:
466; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
467; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
468; CHECK-ZVBB-NEXT:    ret
469  %v = call <vscale x 1 x i16> @llvm.vp.ctpop.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
470  ret <vscale x 1 x i16> %v
471}
472
473declare <vscale x 2 x i16> @llvm.vp.ctpop.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32)
474
475define <vscale x 2 x i16> @vp_ctpop_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
476; CHECK-LABEL: vp_ctpop_nxv2i16:
477; CHECK:       # %bb.0:
478; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
479; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
480; CHECK-NEXT:    lui a0, 5
481; CHECK-NEXT:    addi a0, a0, 1365
482; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
483; CHECK-NEXT:    lui a0, 3
484; CHECK-NEXT:    addi a0, a0, 819
485; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
486; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
487; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
488; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
489; CHECK-NEXT:    lui a0, 1
490; CHECK-NEXT:    addi a0, a0, -241
491; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
492; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
493; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
494; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
495; CHECK-NEXT:    li a0, 257
496; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
497; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
498; CHECK-NEXT:    ret
499;
500; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i16:
501; CHECK-ZVBB:       # %bb.0:
502; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
503; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
504; CHECK-ZVBB-NEXT:    ret
505  %v = call <vscale x 2 x i16> @llvm.vp.ctpop.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 %evl)
506  ret <vscale x 2 x i16> %v
507}
508
509define <vscale x 2 x i16> @vp_ctpop_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) {
510; CHECK-LABEL: vp_ctpop_nxv2i16_unmasked:
511; CHECK:       # %bb.0:
512; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
513; CHECK-NEXT:    vsrl.vi v9, v8, 1
514; CHECK-NEXT:    lui a0, 5
515; CHECK-NEXT:    addi a0, a0, 1365
516; CHECK-NEXT:    vand.vx v9, v9, a0
517; CHECK-NEXT:    lui a0, 3
518; CHECK-NEXT:    addi a0, a0, 819
519; CHECK-NEXT:    vsub.vv v8, v8, v9
520; CHECK-NEXT:    vand.vx v9, v8, a0
521; CHECK-NEXT:    vsrl.vi v8, v8, 2
522; CHECK-NEXT:    vand.vx v8, v8, a0
523; CHECK-NEXT:    lui a0, 1
524; CHECK-NEXT:    addi a0, a0, -241
525; CHECK-NEXT:    vadd.vv v8, v9, v8
526; CHECK-NEXT:    vsrl.vi v9, v8, 4
527; CHECK-NEXT:    vadd.vv v8, v8, v9
528; CHECK-NEXT:    vand.vx v8, v8, a0
529; CHECK-NEXT:    li a0, 257
530; CHECK-NEXT:    vmul.vx v8, v8, a0
531; CHECK-NEXT:    vsrl.vi v8, v8, 8
532; CHECK-NEXT:    ret
533;
534; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i16_unmasked:
535; CHECK-ZVBB:       # %bb.0:
536; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
537; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
538; CHECK-ZVBB-NEXT:    ret
539  %v = call <vscale x 2 x i16> @llvm.vp.ctpop.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
540  ret <vscale x 2 x i16> %v
541}
542
543declare <vscale x 4 x i16> @llvm.vp.ctpop.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i32)
544
545define <vscale x 4 x i16> @vp_ctpop_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
546; CHECK-LABEL: vp_ctpop_nxv4i16:
547; CHECK:       # %bb.0:
548; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
549; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
550; CHECK-NEXT:    lui a0, 5
551; CHECK-NEXT:    addi a0, a0, 1365
552; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
553; CHECK-NEXT:    lui a0, 3
554; CHECK-NEXT:    addi a0, a0, 819
555; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
556; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
557; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
558; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
559; CHECK-NEXT:    lui a0, 1
560; CHECK-NEXT:    addi a0, a0, -241
561; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
562; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
563; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
564; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
565; CHECK-NEXT:    li a0, 257
566; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
567; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
568; CHECK-NEXT:    ret
569;
570; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i16:
571; CHECK-ZVBB:       # %bb.0:
572; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
573; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
574; CHECK-ZVBB-NEXT:    ret
575  %v = call <vscale x 4 x i16> @llvm.vp.ctpop.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 %evl)
576  ret <vscale x 4 x i16> %v
577}
578
579define <vscale x 4 x i16> @vp_ctpop_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) {
580; CHECK-LABEL: vp_ctpop_nxv4i16_unmasked:
581; CHECK:       # %bb.0:
582; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
583; CHECK-NEXT:    vsrl.vi v9, v8, 1
584; CHECK-NEXT:    lui a0, 5
585; CHECK-NEXT:    addi a0, a0, 1365
586; CHECK-NEXT:    vand.vx v9, v9, a0
587; CHECK-NEXT:    lui a0, 3
588; CHECK-NEXT:    addi a0, a0, 819
589; CHECK-NEXT:    vsub.vv v8, v8, v9
590; CHECK-NEXT:    vand.vx v9, v8, a0
591; CHECK-NEXT:    vsrl.vi v8, v8, 2
592; CHECK-NEXT:    vand.vx v8, v8, a0
593; CHECK-NEXT:    lui a0, 1
594; CHECK-NEXT:    addi a0, a0, -241
595; CHECK-NEXT:    vadd.vv v8, v9, v8
596; CHECK-NEXT:    vsrl.vi v9, v8, 4
597; CHECK-NEXT:    vadd.vv v8, v8, v9
598; CHECK-NEXT:    vand.vx v8, v8, a0
599; CHECK-NEXT:    li a0, 257
600; CHECK-NEXT:    vmul.vx v8, v8, a0
601; CHECK-NEXT:    vsrl.vi v8, v8, 8
602; CHECK-NEXT:    ret
603;
604; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i16_unmasked:
605; CHECK-ZVBB:       # %bb.0:
606; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
607; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
608; CHECK-ZVBB-NEXT:    ret
609  %v = call <vscale x 4 x i16> @llvm.vp.ctpop.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
610  ret <vscale x 4 x i16> %v
611}
612
613declare <vscale x 8 x i16> @llvm.vp.ctpop.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
614
615define <vscale x 8 x i16> @vp_ctpop_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
616; CHECK-LABEL: vp_ctpop_nxv8i16:
617; CHECK:       # %bb.0:
618; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
619; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
620; CHECK-NEXT:    lui a0, 5
621; CHECK-NEXT:    addi a0, a0, 1365
622; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
623; CHECK-NEXT:    lui a0, 3
624; CHECK-NEXT:    addi a0, a0, 819
625; CHECK-NEXT:    vsub.vv v8, v8, v10, v0.t
626; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
627; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
628; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
629; CHECK-NEXT:    lui a0, 1
630; CHECK-NEXT:    addi a0, a0, -241
631; CHECK-NEXT:    vadd.vv v8, v10, v8, v0.t
632; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
633; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
634; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
635; CHECK-NEXT:    li a0, 257
636; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
637; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
638; CHECK-NEXT:    ret
639;
640; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i16:
641; CHECK-ZVBB:       # %bb.0:
642; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
643; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
644; CHECK-ZVBB-NEXT:    ret
645  %v = call <vscale x 8 x i16> @llvm.vp.ctpop.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 %evl)
646  ret <vscale x 8 x i16> %v
647}
648
649define <vscale x 8 x i16> @vp_ctpop_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) {
650; CHECK-LABEL: vp_ctpop_nxv8i16_unmasked:
651; CHECK:       # %bb.0:
652; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
653; CHECK-NEXT:    vsrl.vi v10, v8, 1
654; CHECK-NEXT:    lui a0, 5
655; CHECK-NEXT:    addi a0, a0, 1365
656; CHECK-NEXT:    vand.vx v10, v10, a0
657; CHECK-NEXT:    lui a0, 3
658; CHECK-NEXT:    addi a0, a0, 819
659; CHECK-NEXT:    vsub.vv v8, v8, v10
660; CHECK-NEXT:    vand.vx v10, v8, a0
661; CHECK-NEXT:    vsrl.vi v8, v8, 2
662; CHECK-NEXT:    vand.vx v8, v8, a0
663; CHECK-NEXT:    lui a0, 1
664; CHECK-NEXT:    addi a0, a0, -241
665; CHECK-NEXT:    vadd.vv v8, v10, v8
666; CHECK-NEXT:    vsrl.vi v10, v8, 4
667; CHECK-NEXT:    vadd.vv v8, v8, v10
668; CHECK-NEXT:    vand.vx v8, v8, a0
669; CHECK-NEXT:    li a0, 257
670; CHECK-NEXT:    vmul.vx v8, v8, a0
671; CHECK-NEXT:    vsrl.vi v8, v8, 8
672; CHECK-NEXT:    ret
673;
674; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i16_unmasked:
675; CHECK-ZVBB:       # %bb.0:
676; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
677; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
678; CHECK-ZVBB-NEXT:    ret
679  %v = call <vscale x 8 x i16> @llvm.vp.ctpop.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
680  ret <vscale x 8 x i16> %v
681}
682
683declare <vscale x 16 x i16> @llvm.vp.ctpop.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i1>, i32)
684
685define <vscale x 16 x i16> @vp_ctpop_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
686; CHECK-LABEL: vp_ctpop_nxv16i16:
687; CHECK:       # %bb.0:
688; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
689; CHECK-NEXT:    vsrl.vi v12, v8, 1, v0.t
690; CHECK-NEXT:    lui a0, 5
691; CHECK-NEXT:    addi a0, a0, 1365
692; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
693; CHECK-NEXT:    lui a0, 3
694; CHECK-NEXT:    addi a0, a0, 819
695; CHECK-NEXT:    vsub.vv v8, v8, v12, v0.t
696; CHECK-NEXT:    vand.vx v12, v8, a0, v0.t
697; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
698; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
699; CHECK-NEXT:    lui a0, 1
700; CHECK-NEXT:    addi a0, a0, -241
701; CHECK-NEXT:    vadd.vv v8, v12, v8, v0.t
702; CHECK-NEXT:    vsrl.vi v12, v8, 4, v0.t
703; CHECK-NEXT:    vadd.vv v8, v8, v12, v0.t
704; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
705; CHECK-NEXT:    li a0, 257
706; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
707; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
708; CHECK-NEXT:    ret
709;
710; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i16:
711; CHECK-ZVBB:       # %bb.0:
712; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
713; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
714; CHECK-ZVBB-NEXT:    ret
715  %v = call <vscale x 16 x i16> @llvm.vp.ctpop.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 %evl)
716  ret <vscale x 16 x i16> %v
717}
718
719define <vscale x 16 x i16> @vp_ctpop_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) {
720; CHECK-LABEL: vp_ctpop_nxv16i16_unmasked:
721; CHECK:       # %bb.0:
722; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
723; CHECK-NEXT:    vsrl.vi v12, v8, 1
724; CHECK-NEXT:    lui a0, 5
725; CHECK-NEXT:    addi a0, a0, 1365
726; CHECK-NEXT:    vand.vx v12, v12, a0
727; CHECK-NEXT:    lui a0, 3
728; CHECK-NEXT:    addi a0, a0, 819
729; CHECK-NEXT:    vsub.vv v8, v8, v12
730; CHECK-NEXT:    vand.vx v12, v8, a0
731; CHECK-NEXT:    vsrl.vi v8, v8, 2
732; CHECK-NEXT:    vand.vx v8, v8, a0
733; CHECK-NEXT:    lui a0, 1
734; CHECK-NEXT:    addi a0, a0, -241
735; CHECK-NEXT:    vadd.vv v8, v12, v8
736; CHECK-NEXT:    vsrl.vi v12, v8, 4
737; CHECK-NEXT:    vadd.vv v8, v8, v12
738; CHECK-NEXT:    vand.vx v8, v8, a0
739; CHECK-NEXT:    li a0, 257
740; CHECK-NEXT:    vmul.vx v8, v8, a0
741; CHECK-NEXT:    vsrl.vi v8, v8, 8
742; CHECK-NEXT:    ret
743;
744; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i16_unmasked:
745; CHECK-ZVBB:       # %bb.0:
746; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
747; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
748; CHECK-ZVBB-NEXT:    ret
749  %v = call <vscale x 16 x i16> @llvm.vp.ctpop.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
750  ret <vscale x 16 x i16> %v
751}
752
753declare <vscale x 32 x i16> @llvm.vp.ctpop.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i1>, i32)
754
755define <vscale x 32 x i16> @vp_ctpop_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
756; CHECK-LABEL: vp_ctpop_nxv32i16:
757; CHECK:       # %bb.0:
758; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
759; CHECK-NEXT:    vsrl.vi v16, v8, 1, v0.t
760; CHECK-NEXT:    lui a0, 5
761; CHECK-NEXT:    addi a0, a0, 1365
762; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
763; CHECK-NEXT:    lui a0, 3
764; CHECK-NEXT:    addi a0, a0, 819
765; CHECK-NEXT:    vsub.vv v16, v8, v16, v0.t
766; CHECK-NEXT:    vand.vx v8, v16, a0, v0.t
767; CHECK-NEXT:    vsrl.vi v16, v16, 2, v0.t
768; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
769; CHECK-NEXT:    lui a0, 1
770; CHECK-NEXT:    addi a0, a0, -241
771; CHECK-NEXT:    vadd.vv v8, v8, v16, v0.t
772; CHECK-NEXT:    vsrl.vi v16, v8, 4, v0.t
773; CHECK-NEXT:    vadd.vv v8, v8, v16, v0.t
774; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
775; CHECK-NEXT:    li a0, 257
776; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
777; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
778; CHECK-NEXT:    ret
779;
780; CHECK-ZVBB-LABEL: vp_ctpop_nxv32i16:
781; CHECK-ZVBB:       # %bb.0:
782; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
783; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
784; CHECK-ZVBB-NEXT:    ret
785  %v = call <vscale x 32 x i16> @llvm.vp.ctpop.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 %evl)
786  ret <vscale x 32 x i16> %v
787}
788
789define <vscale x 32 x i16> @vp_ctpop_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) {
790; CHECK-LABEL: vp_ctpop_nxv32i16_unmasked:
791; CHECK:       # %bb.0:
792; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
793; CHECK-NEXT:    vsrl.vi v16, v8, 1
794; CHECK-NEXT:    lui a0, 5
795; CHECK-NEXT:    addi a0, a0, 1365
796; CHECK-NEXT:    vand.vx v16, v16, a0
797; CHECK-NEXT:    lui a0, 3
798; CHECK-NEXT:    addi a0, a0, 819
799; CHECK-NEXT:    vsub.vv v8, v8, v16
800; CHECK-NEXT:    vand.vx v16, v8, a0
801; CHECK-NEXT:    vsrl.vi v8, v8, 2
802; CHECK-NEXT:    vand.vx v8, v8, a0
803; CHECK-NEXT:    lui a0, 1
804; CHECK-NEXT:    addi a0, a0, -241
805; CHECK-NEXT:    vadd.vv v8, v16, v8
806; CHECK-NEXT:    vsrl.vi v16, v8, 4
807; CHECK-NEXT:    vadd.vv v8, v8, v16
808; CHECK-NEXT:    vand.vx v8, v8, a0
809; CHECK-NEXT:    li a0, 257
810; CHECK-NEXT:    vmul.vx v8, v8, a0
811; CHECK-NEXT:    vsrl.vi v8, v8, 8
812; CHECK-NEXT:    ret
813;
814; CHECK-ZVBB-LABEL: vp_ctpop_nxv32i16_unmasked:
815; CHECK-ZVBB:       # %bb.0:
816; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
817; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
818; CHECK-ZVBB-NEXT:    ret
819  %v = call <vscale x 32 x i16> @llvm.vp.ctpop.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
820  ret <vscale x 32 x i16> %v
821}
822
823declare <vscale x 1 x i32> @llvm.vp.ctpop.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32)
824
825define <vscale x 1 x i32> @vp_ctpop_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
826; CHECK-LABEL: vp_ctpop_nxv1i32:
827; CHECK:       # %bb.0:
828; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
829; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
830; CHECK-NEXT:    lui a0, 349525
831; CHECK-NEXT:    addi a0, a0, 1365
832; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
833; CHECK-NEXT:    lui a0, 209715
834; CHECK-NEXT:    addi a0, a0, 819
835; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
836; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
837; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
838; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
839; CHECK-NEXT:    lui a0, 61681
840; CHECK-NEXT:    addi a0, a0, -241
841; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
842; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
843; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
844; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
845; CHECK-NEXT:    lui a0, 4112
846; CHECK-NEXT:    addi a0, a0, 257
847; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
848; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
849; CHECK-NEXT:    ret
850;
851; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i32:
852; CHECK-ZVBB:       # %bb.0:
853; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
854; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
855; CHECK-ZVBB-NEXT:    ret
856  %v = call <vscale x 1 x i32> @llvm.vp.ctpop.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 %evl)
857  ret <vscale x 1 x i32> %v
858}
859
860define <vscale x 1 x i32> @vp_ctpop_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) {
861; CHECK-LABEL: vp_ctpop_nxv1i32_unmasked:
862; CHECK:       # %bb.0:
863; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
864; CHECK-NEXT:    vsrl.vi v9, v8, 1
865; CHECK-NEXT:    lui a0, 349525
866; CHECK-NEXT:    addi a0, a0, 1365
867; CHECK-NEXT:    vand.vx v9, v9, a0
868; CHECK-NEXT:    lui a0, 209715
869; CHECK-NEXT:    addi a0, a0, 819
870; CHECK-NEXT:    vsub.vv v8, v8, v9
871; CHECK-NEXT:    vand.vx v9, v8, a0
872; CHECK-NEXT:    vsrl.vi v8, v8, 2
873; CHECK-NEXT:    vand.vx v8, v8, a0
874; CHECK-NEXT:    lui a0, 61681
875; CHECK-NEXT:    addi a0, a0, -241
876; CHECK-NEXT:    vadd.vv v8, v9, v8
877; CHECK-NEXT:    vsrl.vi v9, v8, 4
878; CHECK-NEXT:    vadd.vv v8, v8, v9
879; CHECK-NEXT:    vand.vx v8, v8, a0
880; CHECK-NEXT:    lui a0, 4112
881; CHECK-NEXT:    addi a0, a0, 257
882; CHECK-NEXT:    vmul.vx v8, v8, a0
883; CHECK-NEXT:    vsrl.vi v8, v8, 24
884; CHECK-NEXT:    ret
885;
886; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i32_unmasked:
887; CHECK-ZVBB:       # %bb.0:
888; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
889; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
890; CHECK-ZVBB-NEXT:    ret
891  %v = call <vscale x 1 x i32> @llvm.vp.ctpop.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
892  ret <vscale x 1 x i32> %v
893}
894
895declare <vscale x 2 x i32> @llvm.vp.ctpop.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32)
896
897define <vscale x 2 x i32> @vp_ctpop_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
898; CHECK-LABEL: vp_ctpop_nxv2i32:
899; CHECK:       # %bb.0:
900; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
901; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
902; CHECK-NEXT:    lui a0, 349525
903; CHECK-NEXT:    addi a0, a0, 1365
904; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
905; CHECK-NEXT:    lui a0, 209715
906; CHECK-NEXT:    addi a0, a0, 819
907; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
908; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
909; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
910; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
911; CHECK-NEXT:    lui a0, 61681
912; CHECK-NEXT:    addi a0, a0, -241
913; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
914; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
915; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
916; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
917; CHECK-NEXT:    lui a0, 4112
918; CHECK-NEXT:    addi a0, a0, 257
919; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
920; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
921; CHECK-NEXT:    ret
922;
923; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i32:
924; CHECK-ZVBB:       # %bb.0:
925; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
926; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
927; CHECK-ZVBB-NEXT:    ret
928  %v = call <vscale x 2 x i32> @llvm.vp.ctpop.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 %evl)
929  ret <vscale x 2 x i32> %v
930}
931
932define <vscale x 2 x i32> @vp_ctpop_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) {
933; CHECK-LABEL: vp_ctpop_nxv2i32_unmasked:
934; CHECK:       # %bb.0:
935; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
936; CHECK-NEXT:    vsrl.vi v9, v8, 1
937; CHECK-NEXT:    lui a0, 349525
938; CHECK-NEXT:    addi a0, a0, 1365
939; CHECK-NEXT:    vand.vx v9, v9, a0
940; CHECK-NEXT:    lui a0, 209715
941; CHECK-NEXT:    addi a0, a0, 819
942; CHECK-NEXT:    vsub.vv v8, v8, v9
943; CHECK-NEXT:    vand.vx v9, v8, a0
944; CHECK-NEXT:    vsrl.vi v8, v8, 2
945; CHECK-NEXT:    vand.vx v8, v8, a0
946; CHECK-NEXT:    lui a0, 61681
947; CHECK-NEXT:    addi a0, a0, -241
948; CHECK-NEXT:    vadd.vv v8, v9, v8
949; CHECK-NEXT:    vsrl.vi v9, v8, 4
950; CHECK-NEXT:    vadd.vv v8, v8, v9
951; CHECK-NEXT:    vand.vx v8, v8, a0
952; CHECK-NEXT:    lui a0, 4112
953; CHECK-NEXT:    addi a0, a0, 257
954; CHECK-NEXT:    vmul.vx v8, v8, a0
955; CHECK-NEXT:    vsrl.vi v8, v8, 24
956; CHECK-NEXT:    ret
957;
958; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i32_unmasked:
959; CHECK-ZVBB:       # %bb.0:
960; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
961; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
962; CHECK-ZVBB-NEXT:    ret
963  %v = call <vscale x 2 x i32> @llvm.vp.ctpop.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
964  ret <vscale x 2 x i32> %v
965}
966
967declare <vscale x 4 x i32> @llvm.vp.ctpop.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
968
969define <vscale x 4 x i32> @vp_ctpop_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
970; CHECK-LABEL: vp_ctpop_nxv4i32:
971; CHECK:       # %bb.0:
972; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
973; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
974; CHECK-NEXT:    lui a0, 349525
975; CHECK-NEXT:    addi a0, a0, 1365
976; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
977; CHECK-NEXT:    lui a0, 209715
978; CHECK-NEXT:    addi a0, a0, 819
979; CHECK-NEXT:    vsub.vv v8, v8, v10, v0.t
980; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
981; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
982; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
983; CHECK-NEXT:    lui a0, 61681
984; CHECK-NEXT:    addi a0, a0, -241
985; CHECK-NEXT:    vadd.vv v8, v10, v8, v0.t
986; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
987; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
988; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
989; CHECK-NEXT:    lui a0, 4112
990; CHECK-NEXT:    addi a0, a0, 257
991; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
992; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
993; CHECK-NEXT:    ret
994;
995; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i32:
996; CHECK-ZVBB:       # %bb.0:
997; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
998; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
999; CHECK-ZVBB-NEXT:    ret
1000  %v = call <vscale x 4 x i32> @llvm.vp.ctpop.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 %evl)
1001  ret <vscale x 4 x i32> %v
1002}
1003
1004define <vscale x 4 x i32> @vp_ctpop_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) {
1005; CHECK-LABEL: vp_ctpop_nxv4i32_unmasked:
1006; CHECK:       # %bb.0:
1007; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1008; CHECK-NEXT:    vsrl.vi v10, v8, 1
1009; CHECK-NEXT:    lui a0, 349525
1010; CHECK-NEXT:    addi a0, a0, 1365
1011; CHECK-NEXT:    vand.vx v10, v10, a0
1012; CHECK-NEXT:    lui a0, 209715
1013; CHECK-NEXT:    addi a0, a0, 819
1014; CHECK-NEXT:    vsub.vv v8, v8, v10
1015; CHECK-NEXT:    vand.vx v10, v8, a0
1016; CHECK-NEXT:    vsrl.vi v8, v8, 2
1017; CHECK-NEXT:    vand.vx v8, v8, a0
1018; CHECK-NEXT:    lui a0, 61681
1019; CHECK-NEXT:    addi a0, a0, -241
1020; CHECK-NEXT:    vadd.vv v8, v10, v8
1021; CHECK-NEXT:    vsrl.vi v10, v8, 4
1022; CHECK-NEXT:    vadd.vv v8, v8, v10
1023; CHECK-NEXT:    vand.vx v8, v8, a0
1024; CHECK-NEXT:    lui a0, 4112
1025; CHECK-NEXT:    addi a0, a0, 257
1026; CHECK-NEXT:    vmul.vx v8, v8, a0
1027; CHECK-NEXT:    vsrl.vi v8, v8, 24
1028; CHECK-NEXT:    ret
1029;
1030; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i32_unmasked:
1031; CHECK-ZVBB:       # %bb.0:
1032; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1033; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
1034; CHECK-ZVBB-NEXT:    ret
1035  %v = call <vscale x 4 x i32> @llvm.vp.ctpop.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
1036  ret <vscale x 4 x i32> %v
1037}
1038
1039declare <vscale x 8 x i32> @llvm.vp.ctpop.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
1040
1041define <vscale x 8 x i32> @vp_ctpop_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1042; CHECK-LABEL: vp_ctpop_nxv8i32:
1043; CHECK:       # %bb.0:
1044; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
1045; CHECK-NEXT:    vsrl.vi v12, v8, 1, v0.t
1046; CHECK-NEXT:    lui a0, 349525
1047; CHECK-NEXT:    addi a0, a0, 1365
1048; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
1049; CHECK-NEXT:    lui a0, 209715
1050; CHECK-NEXT:    addi a0, a0, 819
1051; CHECK-NEXT:    vsub.vv v8, v8, v12, v0.t
1052; CHECK-NEXT:    vand.vx v12, v8, a0, v0.t
1053; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
1054; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
1055; CHECK-NEXT:    lui a0, 61681
1056; CHECK-NEXT:    addi a0, a0, -241
1057; CHECK-NEXT:    vadd.vv v8, v12, v8, v0.t
1058; CHECK-NEXT:    vsrl.vi v12, v8, 4, v0.t
1059; CHECK-NEXT:    vadd.vv v8, v8, v12, v0.t
1060; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
1061; CHECK-NEXT:    lui a0, 4112
1062; CHECK-NEXT:    addi a0, a0, 257
1063; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
1064; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
1065; CHECK-NEXT:    ret
1066;
1067; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i32:
1068; CHECK-ZVBB:       # %bb.0:
1069; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
1070; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
1071; CHECK-ZVBB-NEXT:    ret
1072  %v = call <vscale x 8 x i32> @llvm.vp.ctpop.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 %evl)
1073  ret <vscale x 8 x i32> %v
1074}
1075
1076define <vscale x 8 x i32> @vp_ctpop_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) {
1077; CHECK-LABEL: vp_ctpop_nxv8i32_unmasked:
1078; CHECK:       # %bb.0:
1079; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
1080; CHECK-NEXT:    vsrl.vi v12, v8, 1
1081; CHECK-NEXT:    lui a0, 349525
1082; CHECK-NEXT:    addi a0, a0, 1365
1083; CHECK-NEXT:    vand.vx v12, v12, a0
1084; CHECK-NEXT:    lui a0, 209715
1085; CHECK-NEXT:    addi a0, a0, 819
1086; CHECK-NEXT:    vsub.vv v8, v8, v12
1087; CHECK-NEXT:    vand.vx v12, v8, a0
1088; CHECK-NEXT:    vsrl.vi v8, v8, 2
1089; CHECK-NEXT:    vand.vx v8, v8, a0
1090; CHECK-NEXT:    lui a0, 61681
1091; CHECK-NEXT:    addi a0, a0, -241
1092; CHECK-NEXT:    vadd.vv v8, v12, v8
1093; CHECK-NEXT:    vsrl.vi v12, v8, 4
1094; CHECK-NEXT:    vadd.vv v8, v8, v12
1095; CHECK-NEXT:    vand.vx v8, v8, a0
1096; CHECK-NEXT:    lui a0, 4112
1097; CHECK-NEXT:    addi a0, a0, 257
1098; CHECK-NEXT:    vmul.vx v8, v8, a0
1099; CHECK-NEXT:    vsrl.vi v8, v8, 24
1100; CHECK-NEXT:    ret
1101;
1102; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i32_unmasked:
1103; CHECK-ZVBB:       # %bb.0:
1104; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
1105; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
1106; CHECK-ZVBB-NEXT:    ret
1107  %v = call <vscale x 8 x i32> @llvm.vp.ctpop.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
1108  ret <vscale x 8 x i32> %v
1109}
1110
1111declare <vscale x 16 x i32> @llvm.vp.ctpop.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32)
1112
1113define <vscale x 16 x i32> @vp_ctpop_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
1114; CHECK-LABEL: vp_ctpop_nxv16i32:
1115; CHECK:       # %bb.0:
1116; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
1117; CHECK-NEXT:    vsrl.vi v16, v8, 1, v0.t
1118; CHECK-NEXT:    lui a0, 349525
1119; CHECK-NEXT:    addi a0, a0, 1365
1120; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
1121; CHECK-NEXT:    lui a0, 209715
1122; CHECK-NEXT:    addi a0, a0, 819
1123; CHECK-NEXT:    vsub.vv v16, v8, v16, v0.t
1124; CHECK-NEXT:    vand.vx v8, v16, a0, v0.t
1125; CHECK-NEXT:    vsrl.vi v16, v16, 2, v0.t
1126; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
1127; CHECK-NEXT:    lui a0, 61681
1128; CHECK-NEXT:    addi a0, a0, -241
1129; CHECK-NEXT:    vadd.vv v8, v8, v16, v0.t
1130; CHECK-NEXT:    vsrl.vi v16, v8, 4, v0.t
1131; CHECK-NEXT:    vadd.vv v8, v8, v16, v0.t
1132; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
1133; CHECK-NEXT:    lui a0, 4112
1134; CHECK-NEXT:    addi a0, a0, 257
1135; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
1136; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
1137; CHECK-NEXT:    ret
1138;
1139; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i32:
1140; CHECK-ZVBB:       # %bb.0:
1141; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
1142; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
1143; CHECK-ZVBB-NEXT:    ret
1144  %v = call <vscale x 16 x i32> @llvm.vp.ctpop.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 %evl)
1145  ret <vscale x 16 x i32> %v
1146}
1147
1148define <vscale x 16 x i32> @vp_ctpop_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) {
1149; CHECK-LABEL: vp_ctpop_nxv16i32_unmasked:
1150; CHECK:       # %bb.0:
1151; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
1152; CHECK-NEXT:    vsrl.vi v16, v8, 1
1153; CHECK-NEXT:    lui a0, 349525
1154; CHECK-NEXT:    addi a0, a0, 1365
1155; CHECK-NEXT:    vand.vx v16, v16, a0
1156; CHECK-NEXT:    lui a0, 209715
1157; CHECK-NEXT:    addi a0, a0, 819
1158; CHECK-NEXT:    vsub.vv v8, v8, v16
1159; CHECK-NEXT:    vand.vx v16, v8, a0
1160; CHECK-NEXT:    vsrl.vi v8, v8, 2
1161; CHECK-NEXT:    vand.vx v8, v8, a0
1162; CHECK-NEXT:    lui a0, 61681
1163; CHECK-NEXT:    addi a0, a0, -241
1164; CHECK-NEXT:    vadd.vv v8, v16, v8
1165; CHECK-NEXT:    vsrl.vi v16, v8, 4
1166; CHECK-NEXT:    vadd.vv v8, v8, v16
1167; CHECK-NEXT:    vand.vx v8, v8, a0
1168; CHECK-NEXT:    lui a0, 4112
1169; CHECK-NEXT:    addi a0, a0, 257
1170; CHECK-NEXT:    vmul.vx v8, v8, a0
1171; CHECK-NEXT:    vsrl.vi v8, v8, 24
1172; CHECK-NEXT:    ret
1173;
1174; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i32_unmasked:
1175; CHECK-ZVBB:       # %bb.0:
1176; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
1177; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
1178; CHECK-ZVBB-NEXT:    ret
1179  %v = call <vscale x 16 x i32> @llvm.vp.ctpop.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
1180  ret <vscale x 16 x i32> %v
1181}
1182
1183declare <vscale x 1 x i64> @llvm.vp.ctpop.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i1>, i32)
1184
1185define <vscale x 1 x i64> @vp_ctpop_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1186; RV32-LABEL: vp_ctpop_nxv1i64:
1187; RV32:       # %bb.0:
1188; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1189; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
1190; RV32-NEXT:    lui a1, 349525
1191; RV32-NEXT:    addi a1, a1, 1365
1192; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
1193; RV32-NEXT:    vmv.v.x v10, a1
1194; RV32-NEXT:    lui a1, 209715
1195; RV32-NEXT:    addi a1, a1, 819
1196; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1197; RV32-NEXT:    vand.vv v9, v9, v10, v0.t
1198; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
1199; RV32-NEXT:    vmv.v.x v10, a1
1200; RV32-NEXT:    lui a1, 61681
1201; RV32-NEXT:    addi a1, a1, -241
1202; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1203; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
1204; RV32-NEXT:    vand.vv v9, v8, v10, v0.t
1205; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
1206; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
1207; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
1208; RV32-NEXT:    vmv.v.x v10, a1
1209; RV32-NEXT:    lui a1, 4112
1210; RV32-NEXT:    addi a1, a1, 257
1211; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1212; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
1213; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
1214; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
1215; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
1216; RV32-NEXT:    vmv.v.x v9, a1
1217; RV32-NEXT:    li a1, 56
1218; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1219; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
1220; RV32-NEXT:    vmul.vv v8, v8, v9, v0.t
1221; RV32-NEXT:    vsrl.vx v8, v8, a1, v0.t
1222; RV32-NEXT:    ret
1223;
1224; RV64-LABEL: vp_ctpop_nxv1i64:
1225; RV64:       # %bb.0:
1226; RV64-NEXT:    lui a1, 349525
1227; RV64-NEXT:    lui a2, 209715
1228; RV64-NEXT:    lui a3, 61681
1229; RV64-NEXT:    lui a4, 4112
1230; RV64-NEXT:    addiw a1, a1, 1365
1231; RV64-NEXT:    addiw a2, a2, 819
1232; RV64-NEXT:    addiw a3, a3, -241
1233; RV64-NEXT:    addiw a4, a4, 257
1234; RV64-NEXT:    slli a5, a1, 32
1235; RV64-NEXT:    add a1, a1, a5
1236; RV64-NEXT:    slli a5, a2, 32
1237; RV64-NEXT:    add a2, a2, a5
1238; RV64-NEXT:    slli a5, a3, 32
1239; RV64-NEXT:    add a3, a3, a5
1240; RV64-NEXT:    slli a5, a4, 32
1241; RV64-NEXT:    add a4, a4, a5
1242; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1243; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
1244; RV64-NEXT:    vand.vx v9, v9, a1, v0.t
1245; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
1246; RV64-NEXT:    vand.vx v9, v8, a2, v0.t
1247; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
1248; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
1249; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
1250; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
1251; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
1252; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
1253; RV64-NEXT:    li a0, 56
1254; RV64-NEXT:    vmul.vx v8, v8, a4, v0.t
1255; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
1256; RV64-NEXT:    ret
1257;
1258; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i64:
1259; CHECK-ZVBB:       # %bb.0:
1260; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1261; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
1262; CHECK-ZVBB-NEXT:    ret
1263  %v = call <vscale x 1 x i64> @llvm.vp.ctpop.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 %evl)
1264  ret <vscale x 1 x i64> %v
1265}
1266
1267define <vscale x 1 x i64> @vp_ctpop_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) {
1268; RV32-LABEL: vp_ctpop_nxv1i64_unmasked:
1269; RV32:       # %bb.0:
1270; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1271; RV32-NEXT:    vsrl.vi v9, v8, 1
1272; RV32-NEXT:    lui a1, 349525
1273; RV32-NEXT:    addi a1, a1, 1365
1274; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
1275; RV32-NEXT:    vmv.v.x v10, a1
1276; RV32-NEXT:    lui a1, 209715
1277; RV32-NEXT:    addi a1, a1, 819
1278; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1279; RV32-NEXT:    vand.vv v9, v9, v10
1280; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
1281; RV32-NEXT:    vmv.v.x v10, a1
1282; RV32-NEXT:    lui a1, 61681
1283; RV32-NEXT:    addi a1, a1, -241
1284; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1285; RV32-NEXT:    vsub.vv v8, v8, v9
1286; RV32-NEXT:    vand.vv v9, v8, v10
1287; RV32-NEXT:    vsrl.vi v8, v8, 2
1288; RV32-NEXT:    vand.vv v8, v8, v10
1289; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
1290; RV32-NEXT:    vmv.v.x v10, a1
1291; RV32-NEXT:    lui a1, 4112
1292; RV32-NEXT:    addi a1, a1, 257
1293; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1294; RV32-NEXT:    vadd.vv v8, v9, v8
1295; RV32-NEXT:    vsrl.vi v9, v8, 4
1296; RV32-NEXT:    vadd.vv v8, v8, v9
1297; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
1298; RV32-NEXT:    vmv.v.x v9, a1
1299; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1300; RV32-NEXT:    vand.vv v8, v8, v10
1301; RV32-NEXT:    vmul.vv v8, v8, v9
1302; RV32-NEXT:    li a0, 56
1303; RV32-NEXT:    vsrl.vx v8, v8, a0
1304; RV32-NEXT:    ret
1305;
1306; RV64-LABEL: vp_ctpop_nxv1i64_unmasked:
1307; RV64:       # %bb.0:
1308; RV64-NEXT:    lui a1, 349525
1309; RV64-NEXT:    lui a2, 209715
1310; RV64-NEXT:    lui a3, 61681
1311; RV64-NEXT:    lui a4, 4112
1312; RV64-NEXT:    addiw a1, a1, 1365
1313; RV64-NEXT:    addiw a2, a2, 819
1314; RV64-NEXT:    addiw a3, a3, -241
1315; RV64-NEXT:    addiw a4, a4, 257
1316; RV64-NEXT:    slli a5, a1, 32
1317; RV64-NEXT:    add a1, a1, a5
1318; RV64-NEXT:    slli a5, a2, 32
1319; RV64-NEXT:    add a2, a2, a5
1320; RV64-NEXT:    slli a5, a3, 32
1321; RV64-NEXT:    add a3, a3, a5
1322; RV64-NEXT:    slli a5, a4, 32
1323; RV64-NEXT:    add a4, a4, a5
1324; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1325; RV64-NEXT:    vsrl.vi v9, v8, 1
1326; RV64-NEXT:    vand.vx v9, v9, a1
1327; RV64-NEXT:    vsub.vv v8, v8, v9
1328; RV64-NEXT:    vand.vx v9, v8, a2
1329; RV64-NEXT:    vsrl.vi v8, v8, 2
1330; RV64-NEXT:    vand.vx v8, v8, a2
1331; RV64-NEXT:    vadd.vv v8, v9, v8
1332; RV64-NEXT:    vsrl.vi v9, v8, 4
1333; RV64-NEXT:    vadd.vv v8, v8, v9
1334; RV64-NEXT:    vand.vx v8, v8, a3
1335; RV64-NEXT:    vmul.vx v8, v8, a4
1336; RV64-NEXT:    li a0, 56
1337; RV64-NEXT:    vsrl.vx v8, v8, a0
1338; RV64-NEXT:    ret
1339;
1340; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i64_unmasked:
1341; CHECK-ZVBB:       # %bb.0:
1342; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1343; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
1344; CHECK-ZVBB-NEXT:    ret
1345  %v = call <vscale x 1 x i64> @llvm.vp.ctpop.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
1346  ret <vscale x 1 x i64> %v
1347}
1348
1349declare <vscale x 2 x i64> @llvm.vp.ctpop.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32)
1350
1351define <vscale x 2 x i64> @vp_ctpop_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1352; RV32-LABEL: vp_ctpop_nxv2i64:
1353; RV32:       # %bb.0:
1354; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1355; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
1356; RV32-NEXT:    lui a1, 349525
1357; RV32-NEXT:    addi a1, a1, 1365
1358; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
1359; RV32-NEXT:    vmv.v.x v12, a1
1360; RV32-NEXT:    lui a1, 209715
1361; RV32-NEXT:    addi a1, a1, 819
1362; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1363; RV32-NEXT:    vand.vv v10, v10, v12, v0.t
1364; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
1365; RV32-NEXT:    vmv.v.x v12, a1
1366; RV32-NEXT:    lui a1, 61681
1367; RV32-NEXT:    addi a1, a1, -241
1368; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1369; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
1370; RV32-NEXT:    vand.vv v10, v8, v12, v0.t
1371; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
1372; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
1373; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
1374; RV32-NEXT:    vmv.v.x v12, a1
1375; RV32-NEXT:    lui a1, 4112
1376; RV32-NEXT:    addi a1, a1, 257
1377; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1378; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
1379; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
1380; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
1381; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
1382; RV32-NEXT:    vmv.v.x v10, a1
1383; RV32-NEXT:    li a1, 56
1384; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1385; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
1386; RV32-NEXT:    vmul.vv v8, v8, v10, v0.t
1387; RV32-NEXT:    vsrl.vx v8, v8, a1, v0.t
1388; RV32-NEXT:    ret
1389;
1390; RV64-LABEL: vp_ctpop_nxv2i64:
1391; RV64:       # %bb.0:
1392; RV64-NEXT:    lui a1, 349525
1393; RV64-NEXT:    lui a2, 209715
1394; RV64-NEXT:    lui a3, 61681
1395; RV64-NEXT:    lui a4, 4112
1396; RV64-NEXT:    addiw a1, a1, 1365
1397; RV64-NEXT:    addiw a2, a2, 819
1398; RV64-NEXT:    addiw a3, a3, -241
1399; RV64-NEXT:    addiw a4, a4, 257
1400; RV64-NEXT:    slli a5, a1, 32
1401; RV64-NEXT:    add a1, a1, a5
1402; RV64-NEXT:    slli a5, a2, 32
1403; RV64-NEXT:    add a2, a2, a5
1404; RV64-NEXT:    slli a5, a3, 32
1405; RV64-NEXT:    add a3, a3, a5
1406; RV64-NEXT:    slli a5, a4, 32
1407; RV64-NEXT:    add a4, a4, a5
1408; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1409; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
1410; RV64-NEXT:    vand.vx v10, v10, a1, v0.t
1411; RV64-NEXT:    vsub.vv v8, v8, v10, v0.t
1412; RV64-NEXT:    vand.vx v10, v8, a2, v0.t
1413; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
1414; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
1415; RV64-NEXT:    vadd.vv v8, v10, v8, v0.t
1416; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
1417; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
1418; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
1419; RV64-NEXT:    li a0, 56
1420; RV64-NEXT:    vmul.vx v8, v8, a4, v0.t
1421; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
1422; RV64-NEXT:    ret
1423;
1424; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i64:
1425; CHECK-ZVBB:       # %bb.0:
1426; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1427; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
1428; CHECK-ZVBB-NEXT:    ret
1429  %v = call <vscale x 2 x i64> @llvm.vp.ctpop.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 %evl)
1430  ret <vscale x 2 x i64> %v
1431}
1432
1433define <vscale x 2 x i64> @vp_ctpop_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) {
1434; RV32-LABEL: vp_ctpop_nxv2i64_unmasked:
1435; RV32:       # %bb.0:
1436; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1437; RV32-NEXT:    vsrl.vi v10, v8, 1
1438; RV32-NEXT:    lui a1, 349525
1439; RV32-NEXT:    addi a1, a1, 1365
1440; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
1441; RV32-NEXT:    vmv.v.x v12, a1
1442; RV32-NEXT:    lui a1, 209715
1443; RV32-NEXT:    addi a1, a1, 819
1444; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1445; RV32-NEXT:    vand.vv v10, v10, v12
1446; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
1447; RV32-NEXT:    vmv.v.x v12, a1
1448; RV32-NEXT:    lui a1, 61681
1449; RV32-NEXT:    addi a1, a1, -241
1450; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1451; RV32-NEXT:    vsub.vv v8, v8, v10
1452; RV32-NEXT:    vand.vv v10, v8, v12
1453; RV32-NEXT:    vsrl.vi v8, v8, 2
1454; RV32-NEXT:    vand.vv v8, v8, v12
1455; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
1456; RV32-NEXT:    vmv.v.x v12, a1
1457; RV32-NEXT:    lui a1, 4112
1458; RV32-NEXT:    addi a1, a1, 257
1459; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1460; RV32-NEXT:    vadd.vv v8, v10, v8
1461; RV32-NEXT:    vsrl.vi v10, v8, 4
1462; RV32-NEXT:    vadd.vv v8, v8, v10
1463; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
1464; RV32-NEXT:    vmv.v.x v10, a1
1465; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1466; RV32-NEXT:    vand.vv v8, v8, v12
1467; RV32-NEXT:    vmul.vv v8, v8, v10
1468; RV32-NEXT:    li a0, 56
1469; RV32-NEXT:    vsrl.vx v8, v8, a0
1470; RV32-NEXT:    ret
1471;
1472; RV64-LABEL: vp_ctpop_nxv2i64_unmasked:
1473; RV64:       # %bb.0:
1474; RV64-NEXT:    lui a1, 349525
1475; RV64-NEXT:    lui a2, 209715
1476; RV64-NEXT:    lui a3, 61681
1477; RV64-NEXT:    lui a4, 4112
1478; RV64-NEXT:    addiw a1, a1, 1365
1479; RV64-NEXT:    addiw a2, a2, 819
1480; RV64-NEXT:    addiw a3, a3, -241
1481; RV64-NEXT:    addiw a4, a4, 257
1482; RV64-NEXT:    slli a5, a1, 32
1483; RV64-NEXT:    add a1, a1, a5
1484; RV64-NEXT:    slli a5, a2, 32
1485; RV64-NEXT:    add a2, a2, a5
1486; RV64-NEXT:    slli a5, a3, 32
1487; RV64-NEXT:    add a3, a3, a5
1488; RV64-NEXT:    slli a5, a4, 32
1489; RV64-NEXT:    add a4, a4, a5
1490; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1491; RV64-NEXT:    vsrl.vi v10, v8, 1
1492; RV64-NEXT:    vand.vx v10, v10, a1
1493; RV64-NEXT:    vsub.vv v8, v8, v10
1494; RV64-NEXT:    vand.vx v10, v8, a2
1495; RV64-NEXT:    vsrl.vi v8, v8, 2
1496; RV64-NEXT:    vand.vx v8, v8, a2
1497; RV64-NEXT:    vadd.vv v8, v10, v8
1498; RV64-NEXT:    vsrl.vi v10, v8, 4
1499; RV64-NEXT:    vadd.vv v8, v8, v10
1500; RV64-NEXT:    vand.vx v8, v8, a3
1501; RV64-NEXT:    vmul.vx v8, v8, a4
1502; RV64-NEXT:    li a0, 56
1503; RV64-NEXT:    vsrl.vx v8, v8, a0
1504; RV64-NEXT:    ret
1505;
1506; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i64_unmasked:
1507; CHECK-ZVBB:       # %bb.0:
1508; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1509; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
1510; CHECK-ZVBB-NEXT:    ret
1511  %v = call <vscale x 2 x i64> @llvm.vp.ctpop.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
1512  ret <vscale x 2 x i64> %v
1513}
1514
1515declare <vscale x 4 x i64> @llvm.vp.ctpop.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i1>, i32)
1516
1517define <vscale x 4 x i64> @vp_ctpop_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1518; RV32-LABEL: vp_ctpop_nxv4i64:
1519; RV32:       # %bb.0:
1520; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1521; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
1522; RV32-NEXT:    lui a1, 349525
1523; RV32-NEXT:    addi a1, a1, 1365
1524; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
1525; RV32-NEXT:    vmv.v.x v16, a1
1526; RV32-NEXT:    lui a1, 209715
1527; RV32-NEXT:    addi a1, a1, 819
1528; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1529; RV32-NEXT:    vand.vv v16, v12, v16, v0.t
1530; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
1531; RV32-NEXT:    vmv.v.x v12, a1
1532; RV32-NEXT:    lui a1, 61681
1533; RV32-NEXT:    addi a1, a1, -241
1534; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1535; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
1536; RV32-NEXT:    vand.vv v16, v8, v12, v0.t
1537; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
1538; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
1539; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
1540; RV32-NEXT:    vmv.v.x v12, a1
1541; RV32-NEXT:    lui a1, 4112
1542; RV32-NEXT:    addi a1, a1, 257
1543; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1544; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
1545; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
1546; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
1547; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
1548; RV32-NEXT:    vmv.v.x v16, a1
1549; RV32-NEXT:    li a1, 56
1550; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1551; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
1552; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
1553; RV32-NEXT:    vsrl.vx v8, v8, a1, v0.t
1554; RV32-NEXT:    ret
1555;
1556; RV64-LABEL: vp_ctpop_nxv4i64:
1557; RV64:       # %bb.0:
1558; RV64-NEXT:    lui a1, 349525
1559; RV64-NEXT:    lui a2, 209715
1560; RV64-NEXT:    lui a3, 61681
1561; RV64-NEXT:    lui a4, 4112
1562; RV64-NEXT:    addiw a1, a1, 1365
1563; RV64-NEXT:    addiw a2, a2, 819
1564; RV64-NEXT:    addiw a3, a3, -241
1565; RV64-NEXT:    addiw a4, a4, 257
1566; RV64-NEXT:    slli a5, a1, 32
1567; RV64-NEXT:    add a1, a1, a5
1568; RV64-NEXT:    slli a5, a2, 32
1569; RV64-NEXT:    add a2, a2, a5
1570; RV64-NEXT:    slli a5, a3, 32
1571; RV64-NEXT:    add a3, a3, a5
1572; RV64-NEXT:    slli a5, a4, 32
1573; RV64-NEXT:    add a4, a4, a5
1574; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1575; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
1576; RV64-NEXT:    vand.vx v12, v12, a1, v0.t
1577; RV64-NEXT:    vsub.vv v8, v8, v12, v0.t
1578; RV64-NEXT:    vand.vx v12, v8, a2, v0.t
1579; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
1580; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
1581; RV64-NEXT:    vadd.vv v8, v12, v8, v0.t
1582; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
1583; RV64-NEXT:    vadd.vv v8, v8, v12, v0.t
1584; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
1585; RV64-NEXT:    li a0, 56
1586; RV64-NEXT:    vmul.vx v8, v8, a4, v0.t
1587; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
1588; RV64-NEXT:    ret
1589;
1590; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i64:
1591; CHECK-ZVBB:       # %bb.0:
1592; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1593; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
1594; CHECK-ZVBB-NEXT:    ret
1595  %v = call <vscale x 4 x i64> @llvm.vp.ctpop.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 %evl)
1596  ret <vscale x 4 x i64> %v
1597}
1598
1599define <vscale x 4 x i64> @vp_ctpop_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) {
1600; RV32-LABEL: vp_ctpop_nxv4i64_unmasked:
1601; RV32:       # %bb.0:
1602; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1603; RV32-NEXT:    vsrl.vi v12, v8, 1
1604; RV32-NEXT:    lui a1, 349525
1605; RV32-NEXT:    addi a1, a1, 1365
1606; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
1607; RV32-NEXT:    vmv.v.x v16, a1
1608; RV32-NEXT:    lui a1, 209715
1609; RV32-NEXT:    addi a1, a1, 819
1610; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1611; RV32-NEXT:    vand.vv v12, v12, v16
1612; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
1613; RV32-NEXT:    vmv.v.x v16, a1
1614; RV32-NEXT:    lui a1, 61681
1615; RV32-NEXT:    addi a1, a1, -241
1616; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1617; RV32-NEXT:    vsub.vv v8, v8, v12
1618; RV32-NEXT:    vand.vv v12, v8, v16
1619; RV32-NEXT:    vsrl.vi v8, v8, 2
1620; RV32-NEXT:    vand.vv v8, v8, v16
1621; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
1622; RV32-NEXT:    vmv.v.x v16, a1
1623; RV32-NEXT:    lui a1, 4112
1624; RV32-NEXT:    addi a1, a1, 257
1625; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1626; RV32-NEXT:    vadd.vv v8, v12, v8
1627; RV32-NEXT:    vsrl.vi v12, v8, 4
1628; RV32-NEXT:    vadd.vv v8, v8, v12
1629; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
1630; RV32-NEXT:    vmv.v.x v12, a1
1631; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1632; RV32-NEXT:    vand.vv v8, v8, v16
1633; RV32-NEXT:    vmul.vv v8, v8, v12
1634; RV32-NEXT:    li a0, 56
1635; RV32-NEXT:    vsrl.vx v8, v8, a0
1636; RV32-NEXT:    ret
1637;
1638; RV64-LABEL: vp_ctpop_nxv4i64_unmasked:
1639; RV64:       # %bb.0:
1640; RV64-NEXT:    lui a1, 349525
1641; RV64-NEXT:    lui a2, 209715
1642; RV64-NEXT:    lui a3, 61681
1643; RV64-NEXT:    lui a4, 4112
1644; RV64-NEXT:    addiw a1, a1, 1365
1645; RV64-NEXT:    addiw a2, a2, 819
1646; RV64-NEXT:    addiw a3, a3, -241
1647; RV64-NEXT:    addiw a4, a4, 257
1648; RV64-NEXT:    slli a5, a1, 32
1649; RV64-NEXT:    add a1, a1, a5
1650; RV64-NEXT:    slli a5, a2, 32
1651; RV64-NEXT:    add a2, a2, a5
1652; RV64-NEXT:    slli a5, a3, 32
1653; RV64-NEXT:    add a3, a3, a5
1654; RV64-NEXT:    slli a5, a4, 32
1655; RV64-NEXT:    add a4, a4, a5
1656; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1657; RV64-NEXT:    vsrl.vi v12, v8, 1
1658; RV64-NEXT:    vand.vx v12, v12, a1
1659; RV64-NEXT:    vsub.vv v8, v8, v12
1660; RV64-NEXT:    vand.vx v12, v8, a2
1661; RV64-NEXT:    vsrl.vi v8, v8, 2
1662; RV64-NEXT:    vand.vx v8, v8, a2
1663; RV64-NEXT:    vadd.vv v8, v12, v8
1664; RV64-NEXT:    vsrl.vi v12, v8, 4
1665; RV64-NEXT:    vadd.vv v8, v8, v12
1666; RV64-NEXT:    vand.vx v8, v8, a3
1667; RV64-NEXT:    vmul.vx v8, v8, a4
1668; RV64-NEXT:    li a0, 56
1669; RV64-NEXT:    vsrl.vx v8, v8, a0
1670; RV64-NEXT:    ret
1671;
1672; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i64_unmasked:
1673; CHECK-ZVBB:       # %bb.0:
1674; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1675; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
1676; CHECK-ZVBB-NEXT:    ret
1677  %v = call <vscale x 4 x i64> @llvm.vp.ctpop.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
1678  ret <vscale x 4 x i64> %v
1679}
1680
1681declare <vscale x 7 x i64> @llvm.vp.ctpop.nxv7i64(<vscale x 7 x i64>, <vscale x 7 x i1>, i32)
1682
1683define <vscale x 7 x i64> @vp_ctpop_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
1684; RV32-LABEL: vp_ctpop_nxv7i64:
1685; RV32:       # %bb.0:
1686; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1687; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
1688; RV32-NEXT:    lui a1, 349525
1689; RV32-NEXT:    addi a1, a1, 1365
1690; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
1691; RV32-NEXT:    vmv.v.x v24, a1
1692; RV32-NEXT:    lui a1, 209715
1693; RV32-NEXT:    addi a1, a1, 819
1694; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1695; RV32-NEXT:    vand.vv v24, v16, v24, v0.t
1696; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
1697; RV32-NEXT:    vmv.v.x v16, a1
1698; RV32-NEXT:    lui a1, 61681
1699; RV32-NEXT:    addi a1, a1, -241
1700; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1701; RV32-NEXT:    vsub.vv v8, v8, v24, v0.t
1702; RV32-NEXT:    vand.vv v24, v8, v16, v0.t
1703; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
1704; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
1705; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
1706; RV32-NEXT:    vmv.v.x v8, a1
1707; RV32-NEXT:    lui a1, 4112
1708; RV32-NEXT:    addi a1, a1, 257
1709; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1710; RV32-NEXT:    vadd.vv v16, v24, v16, v0.t
1711; RV32-NEXT:    vsrl.vi v24, v16, 4, v0.t
1712; RV32-NEXT:    vadd.vv v16, v16, v24, v0.t
1713; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
1714; RV32-NEXT:    vmv.v.x v24, a1
1715; RV32-NEXT:    li a1, 56
1716; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1717; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
1718; RV32-NEXT:    vmul.vv v8, v8, v24, v0.t
1719; RV32-NEXT:    vsrl.vx v8, v8, a1, v0.t
1720; RV32-NEXT:    ret
1721;
1722; RV64-LABEL: vp_ctpop_nxv7i64:
1723; RV64:       # %bb.0:
1724; RV64-NEXT:    lui a1, 349525
1725; RV64-NEXT:    lui a2, 209715
1726; RV64-NEXT:    lui a3, 61681
1727; RV64-NEXT:    lui a4, 4112
1728; RV64-NEXT:    addiw a1, a1, 1365
1729; RV64-NEXT:    addiw a2, a2, 819
1730; RV64-NEXT:    addiw a3, a3, -241
1731; RV64-NEXT:    addiw a4, a4, 257
1732; RV64-NEXT:    slli a5, a1, 32
1733; RV64-NEXT:    add a1, a1, a5
1734; RV64-NEXT:    slli a5, a2, 32
1735; RV64-NEXT:    add a2, a2, a5
1736; RV64-NEXT:    slli a5, a3, 32
1737; RV64-NEXT:    add a3, a3, a5
1738; RV64-NEXT:    slli a5, a4, 32
1739; RV64-NEXT:    add a4, a4, a5
1740; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1741; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
1742; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
1743; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
1744; RV64-NEXT:    vand.vx v16, v8, a2, v0.t
1745; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
1746; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
1747; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
1748; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
1749; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
1750; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
1751; RV64-NEXT:    li a0, 56
1752; RV64-NEXT:    vmul.vx v8, v8, a4, v0.t
1753; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
1754; RV64-NEXT:    ret
1755;
1756; CHECK-ZVBB-LABEL: vp_ctpop_nxv7i64:
1757; CHECK-ZVBB:       # %bb.0:
1758; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1759; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
1760; CHECK-ZVBB-NEXT:    ret
1761  %v = call <vscale x 7 x i64> @llvm.vp.ctpop.nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 %evl)
1762  ret <vscale x 7 x i64> %v
1763}
1764
1765define <vscale x 7 x i64> @vp_ctpop_nxv7i64_unmasked(<vscale x 7 x i64> %va, i32 zeroext %evl) {
1766; RV32-LABEL: vp_ctpop_nxv7i64_unmasked:
1767; RV32:       # %bb.0:
1768; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1769; RV32-NEXT:    vsrl.vi v16, v8, 1
1770; RV32-NEXT:    lui a1, 349525
1771; RV32-NEXT:    addi a1, a1, 1365
1772; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
1773; RV32-NEXT:    vmv.v.x v24, a1
1774; RV32-NEXT:    lui a1, 209715
1775; RV32-NEXT:    addi a1, a1, 819
1776; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1777; RV32-NEXT:    vand.vv v24, v16, v24
1778; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
1779; RV32-NEXT:    vmv.v.x v16, a1
1780; RV32-NEXT:    lui a1, 61681
1781; RV32-NEXT:    addi a1, a1, -241
1782; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1783; RV32-NEXT:    vsub.vv v8, v8, v24
1784; RV32-NEXT:    vand.vv v24, v8, v16
1785; RV32-NEXT:    vsrl.vi v8, v8, 2
1786; RV32-NEXT:    vand.vv v8, v8, v16
1787; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
1788; RV32-NEXT:    vmv.v.x v16, a1
1789; RV32-NEXT:    lui a1, 4112
1790; RV32-NEXT:    addi a1, a1, 257
1791; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1792; RV32-NEXT:    vadd.vv v8, v24, v8
1793; RV32-NEXT:    vsrl.vi v24, v8, 4
1794; RV32-NEXT:    vadd.vv v8, v8, v24
1795; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
1796; RV32-NEXT:    vmv.v.x v24, a1
1797; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1798; RV32-NEXT:    vand.vv v8, v8, v16
1799; RV32-NEXT:    vmul.vv v8, v8, v24
1800; RV32-NEXT:    li a0, 56
1801; RV32-NEXT:    vsrl.vx v8, v8, a0
1802; RV32-NEXT:    ret
1803;
1804; RV64-LABEL: vp_ctpop_nxv7i64_unmasked:
1805; RV64:       # %bb.0:
1806; RV64-NEXT:    lui a1, 349525
1807; RV64-NEXT:    lui a2, 209715
1808; RV64-NEXT:    lui a3, 61681
1809; RV64-NEXT:    lui a4, 4112
1810; RV64-NEXT:    addiw a1, a1, 1365
1811; RV64-NEXT:    addiw a2, a2, 819
1812; RV64-NEXT:    addiw a3, a3, -241
1813; RV64-NEXT:    addiw a4, a4, 257
1814; RV64-NEXT:    slli a5, a1, 32
1815; RV64-NEXT:    add a1, a1, a5
1816; RV64-NEXT:    slli a5, a2, 32
1817; RV64-NEXT:    add a2, a2, a5
1818; RV64-NEXT:    slli a5, a3, 32
1819; RV64-NEXT:    add a3, a3, a5
1820; RV64-NEXT:    slli a5, a4, 32
1821; RV64-NEXT:    add a4, a4, a5
1822; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1823; RV64-NEXT:    vsrl.vi v16, v8, 1
1824; RV64-NEXT:    vand.vx v16, v16, a1
1825; RV64-NEXT:    vsub.vv v8, v8, v16
1826; RV64-NEXT:    vand.vx v16, v8, a2
1827; RV64-NEXT:    vsrl.vi v8, v8, 2
1828; RV64-NEXT:    vand.vx v8, v8, a2
1829; RV64-NEXT:    vadd.vv v8, v16, v8
1830; RV64-NEXT:    vsrl.vi v16, v8, 4
1831; RV64-NEXT:    vadd.vv v8, v8, v16
1832; RV64-NEXT:    vand.vx v8, v8, a3
1833; RV64-NEXT:    vmul.vx v8, v8, a4
1834; RV64-NEXT:    li a0, 56
1835; RV64-NEXT:    vsrl.vx v8, v8, a0
1836; RV64-NEXT:    ret
1837;
1838; CHECK-ZVBB-LABEL: vp_ctpop_nxv7i64_unmasked:
1839; CHECK-ZVBB:       # %bb.0:
1840; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1841; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
1842; CHECK-ZVBB-NEXT:    ret
1843  %v = call <vscale x 7 x i64> @llvm.vp.ctpop.nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> splat (i1 true), i32 %evl)
1844  ret <vscale x 7 x i64> %v
1845}
1846
1847declare <vscale x 8 x i64> @llvm.vp.ctpop.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i1>, i32)
1848
1849define <vscale x 8 x i64> @vp_ctpop_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1850; RV32-LABEL: vp_ctpop_nxv8i64:
1851; RV32:       # %bb.0:
1852; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1853; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
1854; RV32-NEXT:    lui a1, 349525
1855; RV32-NEXT:    addi a1, a1, 1365
1856; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
1857; RV32-NEXT:    vmv.v.x v24, a1
1858; RV32-NEXT:    lui a1, 209715
1859; RV32-NEXT:    addi a1, a1, 819
1860; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1861; RV32-NEXT:    vand.vv v24, v16, v24, v0.t
1862; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
1863; RV32-NEXT:    vmv.v.x v16, a1
1864; RV32-NEXT:    lui a1, 61681
1865; RV32-NEXT:    addi a1, a1, -241
1866; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1867; RV32-NEXT:    vsub.vv v8, v8, v24, v0.t
1868; RV32-NEXT:    vand.vv v24, v8, v16, v0.t
1869; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
1870; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
1871; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
1872; RV32-NEXT:    vmv.v.x v8, a1
1873; RV32-NEXT:    lui a1, 4112
1874; RV32-NEXT:    addi a1, a1, 257
1875; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1876; RV32-NEXT:    vadd.vv v16, v24, v16, v0.t
1877; RV32-NEXT:    vsrl.vi v24, v16, 4, v0.t
1878; RV32-NEXT:    vadd.vv v16, v16, v24, v0.t
1879; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
1880; RV32-NEXT:    vmv.v.x v24, a1
1881; RV32-NEXT:    li a1, 56
1882; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1883; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
1884; RV32-NEXT:    vmul.vv v8, v8, v24, v0.t
1885; RV32-NEXT:    vsrl.vx v8, v8, a1, v0.t
1886; RV32-NEXT:    ret
1887;
1888; RV64-LABEL: vp_ctpop_nxv8i64:
1889; RV64:       # %bb.0:
1890; RV64-NEXT:    lui a1, 349525
1891; RV64-NEXT:    lui a2, 209715
1892; RV64-NEXT:    lui a3, 61681
1893; RV64-NEXT:    lui a4, 4112
1894; RV64-NEXT:    addiw a1, a1, 1365
1895; RV64-NEXT:    addiw a2, a2, 819
1896; RV64-NEXT:    addiw a3, a3, -241
1897; RV64-NEXT:    addiw a4, a4, 257
1898; RV64-NEXT:    slli a5, a1, 32
1899; RV64-NEXT:    add a1, a1, a5
1900; RV64-NEXT:    slli a5, a2, 32
1901; RV64-NEXT:    add a2, a2, a5
1902; RV64-NEXT:    slli a5, a3, 32
1903; RV64-NEXT:    add a3, a3, a5
1904; RV64-NEXT:    slli a5, a4, 32
1905; RV64-NEXT:    add a4, a4, a5
1906; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1907; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
1908; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
1909; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
1910; RV64-NEXT:    vand.vx v16, v8, a2, v0.t
1911; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
1912; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
1913; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
1914; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
1915; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
1916; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
1917; RV64-NEXT:    li a0, 56
1918; RV64-NEXT:    vmul.vx v8, v8, a4, v0.t
1919; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
1920; RV64-NEXT:    ret
1921;
1922; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i64:
1923; CHECK-ZVBB:       # %bb.0:
1924; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1925; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
1926; CHECK-ZVBB-NEXT:    ret
1927  %v = call <vscale x 8 x i64> @llvm.vp.ctpop.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 %evl)
1928  ret <vscale x 8 x i64> %v
1929}
1930
1931define <vscale x 8 x i64> @vp_ctpop_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) {
1932; RV32-LABEL: vp_ctpop_nxv8i64_unmasked:
1933; RV32:       # %bb.0:
1934; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1935; RV32-NEXT:    vsrl.vi v16, v8, 1
1936; RV32-NEXT:    lui a1, 349525
1937; RV32-NEXT:    addi a1, a1, 1365
1938; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
1939; RV32-NEXT:    vmv.v.x v24, a1
1940; RV32-NEXT:    lui a1, 209715
1941; RV32-NEXT:    addi a1, a1, 819
1942; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1943; RV32-NEXT:    vand.vv v24, v16, v24
1944; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
1945; RV32-NEXT:    vmv.v.x v16, a1
1946; RV32-NEXT:    lui a1, 61681
1947; RV32-NEXT:    addi a1, a1, -241
1948; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1949; RV32-NEXT:    vsub.vv v8, v8, v24
1950; RV32-NEXT:    vand.vv v24, v8, v16
1951; RV32-NEXT:    vsrl.vi v8, v8, 2
1952; RV32-NEXT:    vand.vv v8, v8, v16
1953; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
1954; RV32-NEXT:    vmv.v.x v16, a1
1955; RV32-NEXT:    lui a1, 4112
1956; RV32-NEXT:    addi a1, a1, 257
1957; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1958; RV32-NEXT:    vadd.vv v8, v24, v8
1959; RV32-NEXT:    vsrl.vi v24, v8, 4
1960; RV32-NEXT:    vadd.vv v8, v8, v24
1961; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
1962; RV32-NEXT:    vmv.v.x v24, a1
1963; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1964; RV32-NEXT:    vand.vv v8, v8, v16
1965; RV32-NEXT:    vmul.vv v8, v8, v24
1966; RV32-NEXT:    li a0, 56
1967; RV32-NEXT:    vsrl.vx v8, v8, a0
1968; RV32-NEXT:    ret
1969;
1970; RV64-LABEL: vp_ctpop_nxv8i64_unmasked:
1971; RV64:       # %bb.0:
1972; RV64-NEXT:    lui a1, 349525
1973; RV64-NEXT:    lui a2, 209715
1974; RV64-NEXT:    lui a3, 61681
1975; RV64-NEXT:    lui a4, 4112
1976; RV64-NEXT:    addiw a1, a1, 1365
1977; RV64-NEXT:    addiw a2, a2, 819
1978; RV64-NEXT:    addiw a3, a3, -241
1979; RV64-NEXT:    addiw a4, a4, 257
1980; RV64-NEXT:    slli a5, a1, 32
1981; RV64-NEXT:    add a1, a1, a5
1982; RV64-NEXT:    slli a5, a2, 32
1983; RV64-NEXT:    add a2, a2, a5
1984; RV64-NEXT:    slli a5, a3, 32
1985; RV64-NEXT:    add a3, a3, a5
1986; RV64-NEXT:    slli a5, a4, 32
1987; RV64-NEXT:    add a4, a4, a5
1988; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1989; RV64-NEXT:    vsrl.vi v16, v8, 1
1990; RV64-NEXT:    vand.vx v16, v16, a1
1991; RV64-NEXT:    vsub.vv v8, v8, v16
1992; RV64-NEXT:    vand.vx v16, v8, a2
1993; RV64-NEXT:    vsrl.vi v8, v8, 2
1994; RV64-NEXT:    vand.vx v8, v8, a2
1995; RV64-NEXT:    vadd.vv v8, v16, v8
1996; RV64-NEXT:    vsrl.vi v16, v8, 4
1997; RV64-NEXT:    vadd.vv v8, v8, v16
1998; RV64-NEXT:    vand.vx v8, v8, a3
1999; RV64-NEXT:    vmul.vx v8, v8, a4
2000; RV64-NEXT:    li a0, 56
2001; RV64-NEXT:    vsrl.vx v8, v8, a0
2002; RV64-NEXT:    ret
2003;
2004; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i64_unmasked:
2005; CHECK-ZVBB:       # %bb.0:
2006; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2007; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
2008; CHECK-ZVBB-NEXT:    ret
2009  %v = call <vscale x 8 x i64> @llvm.vp.ctpop.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
2010  ret <vscale x 8 x i64> %v
2011}
2012
2013declare <vscale x 16 x i64> @llvm.vp.ctpop.nxv16i64(<vscale x 16 x i64>, <vscale x 16 x i1>, i32)
2014
2015define <vscale x 16 x i64> @vp_ctpop_nxv16i64(<vscale x 16 x i64> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
2016; RV32-LABEL: vp_ctpop_nxv16i64:
2017; RV32:       # %bb.0:
2018; RV32-NEXT:    addi sp, sp, -16
2019; RV32-NEXT:    .cfi_def_cfa_offset 16
2020; RV32-NEXT:    csrr a1, vlenb
2021; RV32-NEXT:    li a2, 48
2022; RV32-NEXT:    mul a1, a1, a2
2023; RV32-NEXT:    sub sp, sp, a1
2024; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb
2025; RV32-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
2026; RV32-NEXT:    vmv1r.v v7, v0
2027; RV32-NEXT:    csrr a1, vlenb
2028; RV32-NEXT:    li a2, 24
2029; RV32-NEXT:    mul a1, a1, a2
2030; RV32-NEXT:    add a1, sp, a1
2031; RV32-NEXT:    addi a1, a1, 16
2032; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
2033; RV32-NEXT:    csrr a1, vlenb
2034; RV32-NEXT:    lui a2, 349525
2035; RV32-NEXT:    srli a3, a1, 3
2036; RV32-NEXT:    vslidedown.vx v0, v0, a3
2037; RV32-NEXT:    sub a3, a0, a1
2038; RV32-NEXT:    addi a2, a2, 1365
2039; RV32-NEXT:    vsetvli a4, zero, e32, m8, ta, ma
2040; RV32-NEXT:    vmv.v.x v8, a2
2041; RV32-NEXT:    csrr a2, vlenb
2042; RV32-NEXT:    slli a2, a2, 5
2043; RV32-NEXT:    add a2, sp, a2
2044; RV32-NEXT:    addi a2, a2, 16
2045; RV32-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
2046; RV32-NEXT:    sltu a2, a0, a3
2047; RV32-NEXT:    addi a2, a2, -1
2048; RV32-NEXT:    and a2, a2, a3
2049; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2050; RV32-NEXT:    vsrl.vi v8, v16, 1, v0.t
2051; RV32-NEXT:    csrr a3, vlenb
2052; RV32-NEXT:    li a4, 40
2053; RV32-NEXT:    mul a3, a3, a4
2054; RV32-NEXT:    add a3, sp, a3
2055; RV32-NEXT:    addi a3, a3, 16
2056; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
2057; RV32-NEXT:    csrr a3, vlenb
2058; RV32-NEXT:    slli a3, a3, 5
2059; RV32-NEXT:    add a3, sp, a3
2060; RV32-NEXT:    addi a3, a3, 16
2061; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
2062; RV32-NEXT:    csrr a3, vlenb
2063; RV32-NEXT:    li a4, 40
2064; RV32-NEXT:    mul a3, a3, a4
2065; RV32-NEXT:    add a3, sp, a3
2066; RV32-NEXT:    addi a3, a3, 16
2067; RV32-NEXT:    vl8r.v v24, (a3) # Unknown-size Folded Reload
2068; RV32-NEXT:    vand.vv v8, v24, v8, v0.t
2069; RV32-NEXT:    vsub.vv v16, v16, v8, v0.t
2070; RV32-NEXT:    lui a3, 209715
2071; RV32-NEXT:    addi a3, a3, 819
2072; RV32-NEXT:    vsetvli a4, zero, e32, m8, ta, ma
2073; RV32-NEXT:    vmv.v.x v8, a3
2074; RV32-NEXT:    csrr a3, vlenb
2075; RV32-NEXT:    li a4, 40
2076; RV32-NEXT:    mul a3, a3, a4
2077; RV32-NEXT:    add a3, sp, a3
2078; RV32-NEXT:    addi a3, a3, 16
2079; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
2080; RV32-NEXT:    csrr a3, vlenb
2081; RV32-NEXT:    li a4, 40
2082; RV32-NEXT:    mul a3, a3, a4
2083; RV32-NEXT:    add a3, sp, a3
2084; RV32-NEXT:    addi a3, a3, 16
2085; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
2086; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2087; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
2088; RV32-NEXT:    vsrl.vi v16, v16, 2, v0.t
2089; RV32-NEXT:    csrr a3, vlenb
2090; RV32-NEXT:    li a4, 40
2091; RV32-NEXT:    mul a3, a3, a4
2092; RV32-NEXT:    add a3, sp, a3
2093; RV32-NEXT:    addi a3, a3, 16
2094; RV32-NEXT:    vl8r.v v24, (a3) # Unknown-size Folded Reload
2095; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
2096; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
2097; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
2098; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
2099; RV32-NEXT:    lui a3, 61681
2100; RV32-NEXT:    addi a3, a3, -241
2101; RV32-NEXT:    vsetvli a4, zero, e32, m8, ta, ma
2102; RV32-NEXT:    vmv.v.x v16, a3
2103; RV32-NEXT:    csrr a3, vlenb
2104; RV32-NEXT:    slli a3, a3, 4
2105; RV32-NEXT:    add a3, sp, a3
2106; RV32-NEXT:    addi a3, a3, 16
2107; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
2108; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2109; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
2110; RV32-NEXT:    lui a3, 4112
2111; RV32-NEXT:    addi a3, a3, 257
2112; RV32-NEXT:    vsetvli a4, zero, e32, m8, ta, ma
2113; RV32-NEXT:    vmv.v.x v16, a3
2114; RV32-NEXT:    csrr a3, vlenb
2115; RV32-NEXT:    slli a3, a3, 3
2116; RV32-NEXT:    add a3, sp, a3
2117; RV32-NEXT:    addi a3, a3, 16
2118; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
2119; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2120; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
2121; RV32-NEXT:    li a2, 56
2122; RV32-NEXT:    vsrl.vx v8, v8, a2, v0.t
2123; RV32-NEXT:    addi a3, sp, 16
2124; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
2125; RV32-NEXT:    bltu a0, a1, .LBB46_2
2126; RV32-NEXT:  # %bb.1:
2127; RV32-NEXT:    mv a0, a1
2128; RV32-NEXT:  .LBB46_2:
2129; RV32-NEXT:    vmv1r.v v0, v7
2130; RV32-NEXT:    li a3, 24
2131; RV32-NEXT:    mul a1, a1, a3
2132; RV32-NEXT:    add a1, sp, a1
2133; RV32-NEXT:    addi a1, a1, 16
2134; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
2135; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2136; RV32-NEXT:    vsrl.vi v24, v16, 1, v0.t
2137; RV32-NEXT:    csrr a0, vlenb
2138; RV32-NEXT:    slli a0, a0, 5
2139; RV32-NEXT:    add a0, sp, a0
2140; RV32-NEXT:    addi a0, a0, 16
2141; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
2142; RV32-NEXT:    vand.vv v8, v24, v8, v0.t
2143; RV32-NEXT:    vsub.vv v8, v16, v8, v0.t
2144; RV32-NEXT:    csrr a0, vlenb
2145; RV32-NEXT:    slli a0, a0, 5
2146; RV32-NEXT:    add a0, sp, a0
2147; RV32-NEXT:    addi a0, a0, 16
2148; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
2149; RV32-NEXT:    csrr a0, vlenb
2150; RV32-NEXT:    li a1, 40
2151; RV32-NEXT:    mul a0, a0, a1
2152; RV32-NEXT:    add a0, sp, a0
2153; RV32-NEXT:    addi a0, a0, 16
2154; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
2155; RV32-NEXT:    csrr a0, vlenb
2156; RV32-NEXT:    slli a0, a0, 5
2157; RV32-NEXT:    add a0, sp, a0
2158; RV32-NEXT:    addi a0, a0, 16
2159; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
2160; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
2161; RV32-NEXT:    csrr a0, vlenb
2162; RV32-NEXT:    li a1, 24
2163; RV32-NEXT:    mul a0, a0, a1
2164; RV32-NEXT:    add a0, sp, a0
2165; RV32-NEXT:    addi a0, a0, 16
2166; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
2167; RV32-NEXT:    vmv8r.v v16, v8
2168; RV32-NEXT:    csrr a0, vlenb
2169; RV32-NEXT:    slli a0, a0, 5
2170; RV32-NEXT:    add a0, sp, a0
2171; RV32-NEXT:    addi a0, a0, 16
2172; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
2173; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
2174; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
2175; RV32-NEXT:    csrr a0, vlenb
2176; RV32-NEXT:    li a1, 24
2177; RV32-NEXT:    mul a0, a0, a1
2178; RV32-NEXT:    add a0, sp, a0
2179; RV32-NEXT:    addi a0, a0, 16
2180; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
2181; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
2182; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
2183; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
2184; RV32-NEXT:    csrr a0, vlenb
2185; RV32-NEXT:    slli a0, a0, 4
2186; RV32-NEXT:    add a0, sp, a0
2187; RV32-NEXT:    addi a0, a0, 16
2188; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
2189; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
2190; RV32-NEXT:    csrr a0, vlenb
2191; RV32-NEXT:    slli a0, a0, 3
2192; RV32-NEXT:    add a0, sp, a0
2193; RV32-NEXT:    addi a0, a0, 16
2194; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
2195; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
2196; RV32-NEXT:    vsrl.vx v8, v8, a2, v0.t
2197; RV32-NEXT:    addi a0, sp, 16
2198; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
2199; RV32-NEXT:    csrr a0, vlenb
2200; RV32-NEXT:    li a1, 48
2201; RV32-NEXT:    mul a0, a0, a1
2202; RV32-NEXT:    add sp, sp, a0
2203; RV32-NEXT:    .cfi_def_cfa sp, 16
2204; RV32-NEXT:    addi sp, sp, 16
2205; RV32-NEXT:    .cfi_def_cfa_offset 0
2206; RV32-NEXT:    ret
2207;
2208; RV64-LABEL: vp_ctpop_nxv16i64:
2209; RV64:       # %bb.0:
2210; RV64-NEXT:    addi sp, sp, -16
2211; RV64-NEXT:    .cfi_def_cfa_offset 16
2212; RV64-NEXT:    csrr a1, vlenb
2213; RV64-NEXT:    slli a1, a1, 4
2214; RV64-NEXT:    sub sp, sp, a1
2215; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
2216; RV64-NEXT:    csrr a1, vlenb
2217; RV64-NEXT:    slli a1, a1, 3
2218; RV64-NEXT:    add a1, sp, a1
2219; RV64-NEXT:    addi a1, a1, 16
2220; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
2221; RV64-NEXT:    csrr a1, vlenb
2222; RV64-NEXT:    srli a2, a1, 3
2223; RV64-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
2224; RV64-NEXT:    vslidedown.vx v24, v0, a2
2225; RV64-NEXT:    mv a2, a0
2226; RV64-NEXT:    bltu a0, a1, .LBB46_2
2227; RV64-NEXT:  # %bb.1:
2228; RV64-NEXT:    mv a2, a1
2229; RV64-NEXT:  .LBB46_2:
2230; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2231; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
2232; RV64-NEXT:    lui a2, 349525
2233; RV64-NEXT:    lui a3, 209715
2234; RV64-NEXT:    lui a4, 61681
2235; RV64-NEXT:    lui a5, 4112
2236; RV64-NEXT:    addiw a2, a2, 1365
2237; RV64-NEXT:    addiw a3, a3, 819
2238; RV64-NEXT:    addiw a4, a4, -241
2239; RV64-NEXT:    addiw a5, a5, 257
2240; RV64-NEXT:    slli a6, a2, 32
2241; RV64-NEXT:    add a6, a2, a6
2242; RV64-NEXT:    slli a2, a3, 32
2243; RV64-NEXT:    add a7, a3, a2
2244; RV64-NEXT:    slli a2, a4, 32
2245; RV64-NEXT:    add a2, a4, a2
2246; RV64-NEXT:    slli a3, a5, 32
2247; RV64-NEXT:    add a3, a5, a3
2248; RV64-NEXT:    li a4, 56
2249; RV64-NEXT:    sub a1, a0, a1
2250; RV64-NEXT:    sltu a0, a0, a1
2251; RV64-NEXT:    addi a0, a0, -1
2252; RV64-NEXT:    and a0, a0, a1
2253; RV64-NEXT:    vand.vx v16, v16, a6, v0.t
2254; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
2255; RV64-NEXT:    vand.vx v16, v8, a7, v0.t
2256; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
2257; RV64-NEXT:    vand.vx v8, v8, a7, v0.t
2258; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
2259; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
2260; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
2261; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
2262; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
2263; RV64-NEXT:    vsrl.vx v8, v8, a4, v0.t
2264; RV64-NEXT:    addi a1, sp, 16
2265; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
2266; RV64-NEXT:    vmv1r.v v0, v24
2267; RV64-NEXT:    csrr a1, vlenb
2268; RV64-NEXT:    slli a1, a1, 3
2269; RV64-NEXT:    add a1, sp, a1
2270; RV64-NEXT:    addi a1, a1, 16
2271; RV64-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
2272; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2273; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
2274; RV64-NEXT:    vand.vx v16, v16, a6, v0.t
2275; RV64-NEXT:    vsub.vv v16, v8, v16, v0.t
2276; RV64-NEXT:    vand.vx v8, v16, a7, v0.t
2277; RV64-NEXT:    vsrl.vi v16, v16, 2, v0.t
2278; RV64-NEXT:    vand.vx v16, v16, a7, v0.t
2279; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
2280; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
2281; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
2282; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
2283; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
2284; RV64-NEXT:    vsrl.vx v16, v8, a4, v0.t
2285; RV64-NEXT:    addi a0, sp, 16
2286; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
2287; RV64-NEXT:    csrr a0, vlenb
2288; RV64-NEXT:    slli a0, a0, 4
2289; RV64-NEXT:    add sp, sp, a0
2290; RV64-NEXT:    .cfi_def_cfa sp, 16
2291; RV64-NEXT:    addi sp, sp, 16
2292; RV64-NEXT:    .cfi_def_cfa_offset 0
2293; RV64-NEXT:    ret
2294;
2295; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i64:
2296; CHECK-ZVBB:       # %bb.0:
2297; CHECK-ZVBB-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
2298; CHECK-ZVBB-NEXT:    vmv1r.v v24, v0
2299; CHECK-ZVBB-NEXT:    csrr a1, vlenb
2300; CHECK-ZVBB-NEXT:    srli a2, a1, 3
2301; CHECK-ZVBB-NEXT:    sub a3, a0, a1
2302; CHECK-ZVBB-NEXT:    vslidedown.vx v0, v0, a2
2303; CHECK-ZVBB-NEXT:    sltu a2, a0, a3
2304; CHECK-ZVBB-NEXT:    addi a2, a2, -1
2305; CHECK-ZVBB-NEXT:    and a2, a2, a3
2306; CHECK-ZVBB-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2307; CHECK-ZVBB-NEXT:    vcpop.v v16, v16, v0.t
2308; CHECK-ZVBB-NEXT:    bltu a0, a1, .LBB46_2
2309; CHECK-ZVBB-NEXT:  # %bb.1:
2310; CHECK-ZVBB-NEXT:    mv a0, a1
2311; CHECK-ZVBB-NEXT:  .LBB46_2:
2312; CHECK-ZVBB-NEXT:    vmv1r.v v0, v24
2313; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2314; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
2315; CHECK-ZVBB-NEXT:    ret
2316  %v = call <vscale x 16 x i64> @llvm.vp.ctpop.nxv16i64(<vscale x 16 x i64> %va, <vscale x 16 x i1> %m, i32 %evl)
2317  ret <vscale x 16 x i64> %v
2318}
2319
2320define <vscale x 16 x i64> @vp_ctpop_nxv16i64_unmasked(<vscale x 16 x i64> %va, i32 zeroext %evl) {
2321; RV32-LABEL: vp_ctpop_nxv16i64_unmasked:
2322; RV32:       # %bb.0:
2323; RV32-NEXT:    addi sp, sp, -16
2324; RV32-NEXT:    .cfi_def_cfa_offset 16
2325; RV32-NEXT:    csrr a1, vlenb
2326; RV32-NEXT:    slli a1, a1, 5
2327; RV32-NEXT:    sub sp, sp, a1
2328; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
2329; RV32-NEXT:    csrr a1, vlenb
2330; RV32-NEXT:    lui a2, 349525
2331; RV32-NEXT:    lui a3, 209715
2332; RV32-NEXT:    sub a4, a0, a1
2333; RV32-NEXT:    addi a2, a2, 1365
2334; RV32-NEXT:    addi a3, a3, 819
2335; RV32-NEXT:    vsetvli a5, zero, e32, m8, ta, ma
2336; RV32-NEXT:    vmv.v.x v0, a2
2337; RV32-NEXT:    sltu a2, a0, a4
2338; RV32-NEXT:    addi a2, a2, -1
2339; RV32-NEXT:    and a2, a2, a4
2340; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2341; RV32-NEXT:    vsrl.vi v24, v16, 1
2342; RV32-NEXT:    csrr a4, vlenb
2343; RV32-NEXT:    li a5, 24
2344; RV32-NEXT:    mul a4, a4, a5
2345; RV32-NEXT:    add a4, sp, a4
2346; RV32-NEXT:    addi a4, a4, 16
2347; RV32-NEXT:    vs8r.v v0, (a4) # Unknown-size Folded Spill
2348; RV32-NEXT:    vand.vv v24, v24, v0
2349; RV32-NEXT:    vsub.vv v16, v16, v24
2350; RV32-NEXT:    vsetvli a4, zero, e32, m8, ta, ma
2351; RV32-NEXT:    vmv.v.x v0, a3
2352; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2353; RV32-NEXT:    vand.vv v24, v16, v0
2354; RV32-NEXT:    vsrl.vi v16, v16, 2
2355; RV32-NEXT:    csrr a3, vlenb
2356; RV32-NEXT:    slli a3, a3, 4
2357; RV32-NEXT:    add a3, sp, a3
2358; RV32-NEXT:    addi a3, a3, 16
2359; RV32-NEXT:    vs8r.v v0, (a3) # Unknown-size Folded Spill
2360; RV32-NEXT:    vand.vv v16, v16, v0
2361; RV32-NEXT:    vadd.vv v16, v24, v16
2362; RV32-NEXT:    vsrl.vi v24, v16, 4
2363; RV32-NEXT:    vadd.vv v16, v16, v24
2364; RV32-NEXT:    lui a3, 61681
2365; RV32-NEXT:    lui a4, 4112
2366; RV32-NEXT:    addi a3, a3, -241
2367; RV32-NEXT:    addi a4, a4, 257
2368; RV32-NEXT:    vsetvli a5, zero, e32, m8, ta, ma
2369; RV32-NEXT:    vmv.v.x v24, a3
2370; RV32-NEXT:    csrr a3, vlenb
2371; RV32-NEXT:    slli a3, a3, 3
2372; RV32-NEXT:    add a3, sp, a3
2373; RV32-NEXT:    addi a3, a3, 16
2374; RV32-NEXT:    vs8r.v v24, (a3) # Unknown-size Folded Spill
2375; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2376; RV32-NEXT:    vand.vv v16, v16, v24
2377; RV32-NEXT:    vsetvli a3, zero, e32, m8, ta, ma
2378; RV32-NEXT:    vmv.v.x v24, a4
2379; RV32-NEXT:    addi a3, sp, 16
2380; RV32-NEXT:    vs8r.v v24, (a3) # Unknown-size Folded Spill
2381; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2382; RV32-NEXT:    vmul.vv v16, v16, v24
2383; RV32-NEXT:    li a2, 56
2384; RV32-NEXT:    vsrl.vx v16, v16, a2
2385; RV32-NEXT:    bltu a0, a1, .LBB47_2
2386; RV32-NEXT:  # %bb.1:
2387; RV32-NEXT:    mv a0, a1
2388; RV32-NEXT:  .LBB47_2:
2389; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2390; RV32-NEXT:    vsrl.vi v24, v8, 1
2391; RV32-NEXT:    csrr a0, vlenb
2392; RV32-NEXT:    li a1, 24
2393; RV32-NEXT:    mul a0, a0, a1
2394; RV32-NEXT:    add a0, sp, a0
2395; RV32-NEXT:    addi a0, a0, 16
2396; RV32-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
2397; RV32-NEXT:    vand.vv v24, v24, v0
2398; RV32-NEXT:    vsub.vv v8, v8, v24
2399; RV32-NEXT:    csrr a0, vlenb
2400; RV32-NEXT:    slli a0, a0, 4
2401; RV32-NEXT:    add a0, sp, a0
2402; RV32-NEXT:    addi a0, a0, 16
2403; RV32-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
2404; RV32-NEXT:    vand.vv v24, v8, v0
2405; RV32-NEXT:    vsrl.vi v8, v8, 2
2406; RV32-NEXT:    vand.vv v8, v8, v0
2407; RV32-NEXT:    vadd.vv v8, v24, v8
2408; RV32-NEXT:    vsrl.vi v24, v8, 4
2409; RV32-NEXT:    vadd.vv v8, v8, v24
2410; RV32-NEXT:    csrr a0, vlenb
2411; RV32-NEXT:    slli a0, a0, 3
2412; RV32-NEXT:    add a0, sp, a0
2413; RV32-NEXT:    addi a0, a0, 16
2414; RV32-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
2415; RV32-NEXT:    vand.vv v8, v8, v24
2416; RV32-NEXT:    addi a0, sp, 16
2417; RV32-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
2418; RV32-NEXT:    vmul.vv v8, v8, v24
2419; RV32-NEXT:    vsrl.vx v8, v8, a2
2420; RV32-NEXT:    csrr a0, vlenb
2421; RV32-NEXT:    slli a0, a0, 5
2422; RV32-NEXT:    add sp, sp, a0
2423; RV32-NEXT:    .cfi_def_cfa sp, 16
2424; RV32-NEXT:    addi sp, sp, 16
2425; RV32-NEXT:    .cfi_def_cfa_offset 0
2426; RV32-NEXT:    ret
2427;
2428; RV64-LABEL: vp_ctpop_nxv16i64_unmasked:
2429; RV64:       # %bb.0:
2430; RV64-NEXT:    csrr a2, vlenb
2431; RV64-NEXT:    mv a1, a0
2432; RV64-NEXT:    bltu a0, a2, .LBB47_2
2433; RV64-NEXT:  # %bb.1:
2434; RV64-NEXT:    mv a1, a2
2435; RV64-NEXT:  .LBB47_2:
2436; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2437; RV64-NEXT:    vsrl.vi v24, v8, 1
2438; RV64-NEXT:    lui a3, 349525
2439; RV64-NEXT:    lui a4, 209715
2440; RV64-NEXT:    lui a5, 61681
2441; RV64-NEXT:    lui a6, 4112
2442; RV64-NEXT:    addiw a3, a3, 1365
2443; RV64-NEXT:    addiw a4, a4, 819
2444; RV64-NEXT:    addiw a5, a5, -241
2445; RV64-NEXT:    addiw a6, a6, 257
2446; RV64-NEXT:    slli a7, a3, 32
2447; RV64-NEXT:    add a3, a3, a7
2448; RV64-NEXT:    slli a7, a4, 32
2449; RV64-NEXT:    add a4, a4, a7
2450; RV64-NEXT:    slli a7, a5, 32
2451; RV64-NEXT:    add a5, a5, a7
2452; RV64-NEXT:    slli a7, a6, 32
2453; RV64-NEXT:    add a6, a6, a7
2454; RV64-NEXT:    li a7, 56
2455; RV64-NEXT:    sub a2, a0, a2
2456; RV64-NEXT:    sltu a0, a0, a2
2457; RV64-NEXT:    addi a0, a0, -1
2458; RV64-NEXT:    and a0, a0, a2
2459; RV64-NEXT:    vand.vx v24, v24, a3
2460; RV64-NEXT:    vsub.vv v8, v8, v24
2461; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2462; RV64-NEXT:    vsrl.vi v24, v16, 1
2463; RV64-NEXT:    vand.vx v24, v24, a3
2464; RV64-NEXT:    vsub.vv v16, v16, v24
2465; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2466; RV64-NEXT:    vand.vx v24, v8, a4
2467; RV64-NEXT:    vsrl.vi v8, v8, 2
2468; RV64-NEXT:    vand.vx v8, v8, a4
2469; RV64-NEXT:    vadd.vv v8, v24, v8
2470; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2471; RV64-NEXT:    vand.vx v24, v16, a4
2472; RV64-NEXT:    vsrl.vi v16, v16, 2
2473; RV64-NEXT:    vand.vx v16, v16, a4
2474; RV64-NEXT:    vadd.vv v16, v24, v16
2475; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2476; RV64-NEXT:    vsrl.vi v24, v8, 4
2477; RV64-NEXT:    vadd.vv v8, v8, v24
2478; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2479; RV64-NEXT:    vsrl.vi v24, v16, 4
2480; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2481; RV64-NEXT:    vand.vx v8, v8, a5
2482; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2483; RV64-NEXT:    vadd.vv v16, v16, v24
2484; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2485; RV64-NEXT:    vmul.vx v8, v8, a6
2486; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2487; RV64-NEXT:    vand.vx v16, v16, a5
2488; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2489; RV64-NEXT:    vsrl.vx v8, v8, a7
2490; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2491; RV64-NEXT:    vmul.vx v16, v16, a6
2492; RV64-NEXT:    vsrl.vx v16, v16, a7
2493; RV64-NEXT:    ret
2494;
2495; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i64_unmasked:
2496; CHECK-ZVBB:       # %bb.0:
2497; CHECK-ZVBB-NEXT:    csrr a1, vlenb
2498; CHECK-ZVBB-NEXT:    sub a2, a0, a1
2499; CHECK-ZVBB-NEXT:    sltu a3, a0, a2
2500; CHECK-ZVBB-NEXT:    addi a3, a3, -1
2501; CHECK-ZVBB-NEXT:    and a2, a3, a2
2502; CHECK-ZVBB-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2503; CHECK-ZVBB-NEXT:    vcpop.v v16, v16
2504; CHECK-ZVBB-NEXT:    bltu a0, a1, .LBB47_2
2505; CHECK-ZVBB-NEXT:  # %bb.1:
2506; CHECK-ZVBB-NEXT:    mv a0, a1
2507; CHECK-ZVBB-NEXT:  .LBB47_2:
2508; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2509; CHECK-ZVBB-NEXT:    vcpop.v v8, v8
2510; CHECK-ZVBB-NEXT:    ret
2511  %v = call <vscale x 16 x i64> @llvm.vp.ctpop.nxv16i64(<vscale x 16 x i64> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
2512  ret <vscale x 16 x i64> %v
2513}
2514
2515; Test promotion.
2516declare <vscale x 1 x i9> @llvm.vp.ctpop.nxv1i9(<vscale x 1 x i9>, <vscale x 1 x i1>, i32)
2517
2518define <vscale x 1 x i9> @vp_ctpop_nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
2519; CHECK-LABEL: vp_ctpop_nxv1i9:
2520; CHECK:       # %bb.0:
2521; CHECK-NEXT:    li a1, 511
2522; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
2523; CHECK-NEXT:    vand.vx v8, v8, a1, v0.t
2524; CHECK-NEXT:    lui a0, 5
2525; CHECK-NEXT:    addi a0, a0, 1365
2526; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
2527; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
2528; CHECK-NEXT:    lui a0, 3
2529; CHECK-NEXT:    addi a0, a0, 819
2530; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
2531; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
2532; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
2533; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
2534; CHECK-NEXT:    lui a0, 1
2535; CHECK-NEXT:    addi a0, a0, -241
2536; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
2537; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
2538; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
2539; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
2540; CHECK-NEXT:    li a0, 257
2541; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
2542; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
2543; CHECK-NEXT:    ret
2544;
2545; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i9:
2546; CHECK-ZVBB:       # %bb.0:
2547; CHECK-ZVBB-NEXT:    li a1, 511
2548; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
2549; CHECK-ZVBB-NEXT:    vand.vx v8, v8, a1, v0.t
2550; CHECK-ZVBB-NEXT:    vcpop.v v8, v8, v0.t
2551; CHECK-ZVBB-NEXT:    ret
2552  %v = call <vscale x 1 x i9> @llvm.vp.ctpop.nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 %evl)
2553  ret <vscale x 1 x i9> %v
2554}
2555