xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll (revision b6c0f1bfa79a3a32d841ac5ab1f94c3aee3b5d90)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \
3; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
4; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \
5; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
6
7declare <2 x i8> @llvm.vp.ctpop.v2i8(<2 x i8>, <2 x i1>, i32)
8
9define <2 x i8> @vp_ctpop_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) {
10; CHECK-LABEL: vp_ctpop_v2i8:
11; CHECK:       # %bb.0:
12; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
13; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
14; CHECK-NEXT:    li a0, 85
15; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
16; CHECK-NEXT:    li a0, 51
17; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
18; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
19; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
20; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
21; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
22; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
23; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
24; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
25; CHECK-NEXT:    ret
26  %v = call <2 x i8> @llvm.vp.ctpop.v2i8(<2 x i8> %va, <2 x i1> %m, i32 %evl)
27  ret <2 x i8> %v
28}
29
30define <2 x i8> @vp_ctpop_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) {
31; CHECK-LABEL: vp_ctpop_v2i8_unmasked:
32; CHECK:       # %bb.0:
33; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
34; CHECK-NEXT:    vsrl.vi v9, v8, 1
35; CHECK-NEXT:    li a0, 85
36; CHECK-NEXT:    vand.vx v9, v9, a0
37; CHECK-NEXT:    li a0, 51
38; CHECK-NEXT:    vsub.vv v8, v8, v9
39; CHECK-NEXT:    vand.vx v9, v8, a0
40; CHECK-NEXT:    vsrl.vi v8, v8, 2
41; CHECK-NEXT:    vand.vx v8, v8, a0
42; CHECK-NEXT:    vadd.vv v8, v9, v8
43; CHECK-NEXT:    vsrl.vi v9, v8, 4
44; CHECK-NEXT:    vadd.vv v8, v8, v9
45; CHECK-NEXT:    vand.vi v8, v8, 15
46; CHECK-NEXT:    ret
47  %v = call <2 x i8> @llvm.vp.ctpop.v2i8(<2 x i8> %va, <2 x i1> splat (i1 true), i32 %evl)
48  ret <2 x i8> %v
49}
50
51declare <4 x i8> @llvm.vp.ctpop.v4i8(<4 x i8>, <4 x i1>, i32)
52
53define <4 x i8> @vp_ctpop_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) {
54; CHECK-LABEL: vp_ctpop_v4i8:
55; CHECK:       # %bb.0:
56; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
57; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
58; CHECK-NEXT:    li a0, 85
59; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
60; CHECK-NEXT:    li a0, 51
61; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
62; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
63; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
64; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
65; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
66; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
67; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
68; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
69; CHECK-NEXT:    ret
70  %v = call <4 x i8> @llvm.vp.ctpop.v4i8(<4 x i8> %va, <4 x i1> %m, i32 %evl)
71  ret <4 x i8> %v
72}
73
74define <4 x i8> @vp_ctpop_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) {
75; CHECK-LABEL: vp_ctpop_v4i8_unmasked:
76; CHECK:       # %bb.0:
77; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
78; CHECK-NEXT:    vsrl.vi v9, v8, 1
79; CHECK-NEXT:    li a0, 85
80; CHECK-NEXT:    vand.vx v9, v9, a0
81; CHECK-NEXT:    li a0, 51
82; CHECK-NEXT:    vsub.vv v8, v8, v9
83; CHECK-NEXT:    vand.vx v9, v8, a0
84; CHECK-NEXT:    vsrl.vi v8, v8, 2
85; CHECK-NEXT:    vand.vx v8, v8, a0
86; CHECK-NEXT:    vadd.vv v8, v9, v8
87; CHECK-NEXT:    vsrl.vi v9, v8, 4
88; CHECK-NEXT:    vadd.vv v8, v8, v9
89; CHECK-NEXT:    vand.vi v8, v8, 15
90; CHECK-NEXT:    ret
91  %v = call <4 x i8> @llvm.vp.ctpop.v4i8(<4 x i8> %va, <4 x i1> splat (i1 true), i32 %evl)
92  ret <4 x i8> %v
93}
94
95declare <8 x i8> @llvm.vp.ctpop.v8i8(<8 x i8>, <8 x i1>, i32)
96
97define <8 x i8> @vp_ctpop_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) {
98; CHECK-LABEL: vp_ctpop_v8i8:
99; CHECK:       # %bb.0:
100; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
101; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
102; CHECK-NEXT:    li a0, 85
103; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
104; CHECK-NEXT:    li a0, 51
105; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
106; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
107; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
108; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
109; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
110; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
111; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
112; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
113; CHECK-NEXT:    ret
114  %v = call <8 x i8> @llvm.vp.ctpop.v8i8(<8 x i8> %va, <8 x i1> %m, i32 %evl)
115  ret <8 x i8> %v
116}
117
118define <8 x i8> @vp_ctpop_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) {
119; CHECK-LABEL: vp_ctpop_v8i8_unmasked:
120; CHECK:       # %bb.0:
121; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
122; CHECK-NEXT:    vsrl.vi v9, v8, 1
123; CHECK-NEXT:    li a0, 85
124; CHECK-NEXT:    vand.vx v9, v9, a0
125; CHECK-NEXT:    li a0, 51
126; CHECK-NEXT:    vsub.vv v8, v8, v9
127; CHECK-NEXT:    vand.vx v9, v8, a0
128; CHECK-NEXT:    vsrl.vi v8, v8, 2
129; CHECK-NEXT:    vand.vx v8, v8, a0
130; CHECK-NEXT:    vadd.vv v8, v9, v8
131; CHECK-NEXT:    vsrl.vi v9, v8, 4
132; CHECK-NEXT:    vadd.vv v8, v8, v9
133; CHECK-NEXT:    vand.vi v8, v8, 15
134; CHECK-NEXT:    ret
135  %v = call <8 x i8> @llvm.vp.ctpop.v8i8(<8 x i8> %va, <8 x i1> splat (i1 true), i32 %evl)
136  ret <8 x i8> %v
137}
138
139declare <16 x i8> @llvm.vp.ctpop.v16i8(<16 x i8>, <16 x i1>, i32)
140
141define <16 x i8> @vp_ctpop_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) {
142; CHECK-LABEL: vp_ctpop_v16i8:
143; CHECK:       # %bb.0:
144; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
145; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
146; CHECK-NEXT:    li a0, 85
147; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
148; CHECK-NEXT:    li a0, 51
149; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
150; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
151; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
152; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
153; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
154; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
155; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
156; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
157; CHECK-NEXT:    ret
158  %v = call <16 x i8> @llvm.vp.ctpop.v16i8(<16 x i8> %va, <16 x i1> %m, i32 %evl)
159  ret <16 x i8> %v
160}
161
162define <16 x i8> @vp_ctpop_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) {
163; CHECK-LABEL: vp_ctpop_v16i8_unmasked:
164; CHECK:       # %bb.0:
165; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
166; CHECK-NEXT:    vsrl.vi v9, v8, 1
167; CHECK-NEXT:    li a0, 85
168; CHECK-NEXT:    vand.vx v9, v9, a0
169; CHECK-NEXT:    li a0, 51
170; CHECK-NEXT:    vsub.vv v8, v8, v9
171; CHECK-NEXT:    vand.vx v9, v8, a0
172; CHECK-NEXT:    vsrl.vi v8, v8, 2
173; CHECK-NEXT:    vand.vx v8, v8, a0
174; CHECK-NEXT:    vadd.vv v8, v9, v8
175; CHECK-NEXT:    vsrl.vi v9, v8, 4
176; CHECK-NEXT:    vadd.vv v8, v8, v9
177; CHECK-NEXT:    vand.vi v8, v8, 15
178; CHECK-NEXT:    ret
179  %v = call <16 x i8> @llvm.vp.ctpop.v16i8(<16 x i8> %va, <16 x i1> splat (i1 true), i32 %evl)
180  ret <16 x i8> %v
181}
182
183declare <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16>, <2 x i1>, i32)
184
185define <2 x i16> @vp_ctpop_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) {
186; CHECK-LABEL: vp_ctpop_v2i16:
187; CHECK:       # %bb.0:
188; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
189; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
190; CHECK-NEXT:    lui a0, 5
191; CHECK-NEXT:    addi a0, a0, 1365
192; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
193; CHECK-NEXT:    lui a0, 3
194; CHECK-NEXT:    addi a0, a0, 819
195; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
196; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
197; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
198; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
199; CHECK-NEXT:    lui a0, 1
200; CHECK-NEXT:    addi a0, a0, -241
201; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
202; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
203; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
204; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
205; CHECK-NEXT:    li a0, 257
206; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
207; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
208; CHECK-NEXT:    ret
209  %v = call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> %va, <2 x i1> %m, i32 %evl)
210  ret <2 x i16> %v
211}
212
213define <2 x i16> @vp_ctpop_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
214; CHECK-LABEL: vp_ctpop_v2i16_unmasked:
215; CHECK:       # %bb.0:
216; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
217; CHECK-NEXT:    vsrl.vi v9, v8, 1
218; CHECK-NEXT:    lui a0, 5
219; CHECK-NEXT:    addi a0, a0, 1365
220; CHECK-NEXT:    vand.vx v9, v9, a0
221; CHECK-NEXT:    lui a0, 3
222; CHECK-NEXT:    addi a0, a0, 819
223; CHECK-NEXT:    vsub.vv v8, v8, v9
224; CHECK-NEXT:    vand.vx v9, v8, a0
225; CHECK-NEXT:    vsrl.vi v8, v8, 2
226; CHECK-NEXT:    vand.vx v8, v8, a0
227; CHECK-NEXT:    lui a0, 1
228; CHECK-NEXT:    addi a0, a0, -241
229; CHECK-NEXT:    vadd.vv v8, v9, v8
230; CHECK-NEXT:    vsrl.vi v9, v8, 4
231; CHECK-NEXT:    vadd.vv v8, v8, v9
232; CHECK-NEXT:    vand.vx v8, v8, a0
233; CHECK-NEXT:    li a0, 257
234; CHECK-NEXT:    vmul.vx v8, v8, a0
235; CHECK-NEXT:    vsrl.vi v8, v8, 8
236; CHECK-NEXT:    ret
237  %v = call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> %va, <2 x i1> splat (i1 true), i32 %evl)
238  ret <2 x i16> %v
239}
240
241declare <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16>, <4 x i1>, i32)
242
243define <4 x i16> @vp_ctpop_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) {
244; CHECK-LABEL: vp_ctpop_v4i16:
245; CHECK:       # %bb.0:
246; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
247; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
248; CHECK-NEXT:    lui a0, 5
249; CHECK-NEXT:    addi a0, a0, 1365
250; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
251; CHECK-NEXT:    lui a0, 3
252; CHECK-NEXT:    addi a0, a0, 819
253; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
254; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
255; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
256; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
257; CHECK-NEXT:    lui a0, 1
258; CHECK-NEXT:    addi a0, a0, -241
259; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
260; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
261; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
262; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
263; CHECK-NEXT:    li a0, 257
264; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
265; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
266; CHECK-NEXT:    ret
267  %v = call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> %va, <4 x i1> %m, i32 %evl)
268  ret <4 x i16> %v
269}
270
271define <4 x i16> @vp_ctpop_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
272; CHECK-LABEL: vp_ctpop_v4i16_unmasked:
273; CHECK:       # %bb.0:
274; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
275; CHECK-NEXT:    vsrl.vi v9, v8, 1
276; CHECK-NEXT:    lui a0, 5
277; CHECK-NEXT:    addi a0, a0, 1365
278; CHECK-NEXT:    vand.vx v9, v9, a0
279; CHECK-NEXT:    lui a0, 3
280; CHECK-NEXT:    addi a0, a0, 819
281; CHECK-NEXT:    vsub.vv v8, v8, v9
282; CHECK-NEXT:    vand.vx v9, v8, a0
283; CHECK-NEXT:    vsrl.vi v8, v8, 2
284; CHECK-NEXT:    vand.vx v8, v8, a0
285; CHECK-NEXT:    lui a0, 1
286; CHECK-NEXT:    addi a0, a0, -241
287; CHECK-NEXT:    vadd.vv v8, v9, v8
288; CHECK-NEXT:    vsrl.vi v9, v8, 4
289; CHECK-NEXT:    vadd.vv v8, v8, v9
290; CHECK-NEXT:    vand.vx v8, v8, a0
291; CHECK-NEXT:    li a0, 257
292; CHECK-NEXT:    vmul.vx v8, v8, a0
293; CHECK-NEXT:    vsrl.vi v8, v8, 8
294; CHECK-NEXT:    ret
295  %v = call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> %va, <4 x i1> splat (i1 true), i32 %evl)
296  ret <4 x i16> %v
297}
298
299declare <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16>, <8 x i1>, i32)
300
301define <8 x i16> @vp_ctpop_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) {
302; CHECK-LABEL: vp_ctpop_v8i16:
303; CHECK:       # %bb.0:
304; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
305; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
306; CHECK-NEXT:    lui a0, 5
307; CHECK-NEXT:    addi a0, a0, 1365
308; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
309; CHECK-NEXT:    lui a0, 3
310; CHECK-NEXT:    addi a0, a0, 819
311; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
312; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
313; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
314; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
315; CHECK-NEXT:    lui a0, 1
316; CHECK-NEXT:    addi a0, a0, -241
317; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
318; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
319; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
320; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
321; CHECK-NEXT:    li a0, 257
322; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
323; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
324; CHECK-NEXT:    ret
325  %v = call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> %va, <8 x i1> %m, i32 %evl)
326  ret <8 x i16> %v
327}
328
329define <8 x i16> @vp_ctpop_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
330; CHECK-LABEL: vp_ctpop_v8i16_unmasked:
331; CHECK:       # %bb.0:
332; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
333; CHECK-NEXT:    vsrl.vi v9, v8, 1
334; CHECK-NEXT:    lui a0, 5
335; CHECK-NEXT:    addi a0, a0, 1365
336; CHECK-NEXT:    vand.vx v9, v9, a0
337; CHECK-NEXT:    lui a0, 3
338; CHECK-NEXT:    addi a0, a0, 819
339; CHECK-NEXT:    vsub.vv v8, v8, v9
340; CHECK-NEXT:    vand.vx v9, v8, a0
341; CHECK-NEXT:    vsrl.vi v8, v8, 2
342; CHECK-NEXT:    vand.vx v8, v8, a0
343; CHECK-NEXT:    lui a0, 1
344; CHECK-NEXT:    addi a0, a0, -241
345; CHECK-NEXT:    vadd.vv v8, v9, v8
346; CHECK-NEXT:    vsrl.vi v9, v8, 4
347; CHECK-NEXT:    vadd.vv v8, v8, v9
348; CHECK-NEXT:    vand.vx v8, v8, a0
349; CHECK-NEXT:    li a0, 257
350; CHECK-NEXT:    vmul.vx v8, v8, a0
351; CHECK-NEXT:    vsrl.vi v8, v8, 8
352; CHECK-NEXT:    ret
353  %v = call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> %va, <8 x i1> splat (i1 true), i32 %evl)
354  ret <8 x i16> %v
355}
356
357declare <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16>, <16 x i1>, i32)
358
359define <16 x i16> @vp_ctpop_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) {
360; CHECK-LABEL: vp_ctpop_v16i16:
361; CHECK:       # %bb.0:
362; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
363; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
364; CHECK-NEXT:    lui a0, 5
365; CHECK-NEXT:    addi a0, a0, 1365
366; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
367; CHECK-NEXT:    lui a0, 3
368; CHECK-NEXT:    addi a0, a0, 819
369; CHECK-NEXT:    vsub.vv v8, v8, v10, v0.t
370; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
371; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
372; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
373; CHECK-NEXT:    lui a0, 1
374; CHECK-NEXT:    addi a0, a0, -241
375; CHECK-NEXT:    vadd.vv v8, v10, v8, v0.t
376; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
377; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
378; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
379; CHECK-NEXT:    li a0, 257
380; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
381; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
382; CHECK-NEXT:    ret
383  %v = call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> %va, <16 x i1> %m, i32 %evl)
384  ret <16 x i16> %v
385}
386
387define <16 x i16> @vp_ctpop_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
388; CHECK-LABEL: vp_ctpop_v16i16_unmasked:
389; CHECK:       # %bb.0:
390; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
391; CHECK-NEXT:    vsrl.vi v10, v8, 1
392; CHECK-NEXT:    lui a0, 5
393; CHECK-NEXT:    addi a0, a0, 1365
394; CHECK-NEXT:    vand.vx v10, v10, a0
395; CHECK-NEXT:    lui a0, 3
396; CHECK-NEXT:    addi a0, a0, 819
397; CHECK-NEXT:    vsub.vv v8, v8, v10
398; CHECK-NEXT:    vand.vx v10, v8, a0
399; CHECK-NEXT:    vsrl.vi v8, v8, 2
400; CHECK-NEXT:    vand.vx v8, v8, a0
401; CHECK-NEXT:    lui a0, 1
402; CHECK-NEXT:    addi a0, a0, -241
403; CHECK-NEXT:    vadd.vv v8, v10, v8
404; CHECK-NEXT:    vsrl.vi v10, v8, 4
405; CHECK-NEXT:    vadd.vv v8, v8, v10
406; CHECK-NEXT:    vand.vx v8, v8, a0
407; CHECK-NEXT:    li a0, 257
408; CHECK-NEXT:    vmul.vx v8, v8, a0
409; CHECK-NEXT:    vsrl.vi v8, v8, 8
410; CHECK-NEXT:    ret
411  %v = call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> %va, <16 x i1> splat (i1 true), i32 %evl)
412  ret <16 x i16> %v
413}
414
415declare <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32>, <2 x i1>, i32)
416
417define <2 x i32> @vp_ctpop_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
418; CHECK-LABEL: vp_ctpop_v2i32:
419; CHECK:       # %bb.0:
420; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
421; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
422; CHECK-NEXT:    lui a0, 349525
423; CHECK-NEXT:    addi a0, a0, 1365
424; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
425; CHECK-NEXT:    lui a0, 209715
426; CHECK-NEXT:    addi a0, a0, 819
427; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
428; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
429; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
430; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
431; CHECK-NEXT:    lui a0, 61681
432; CHECK-NEXT:    addi a0, a0, -241
433; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
434; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
435; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
436; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
437; CHECK-NEXT:    lui a0, 4112
438; CHECK-NEXT:    addi a0, a0, 257
439; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
440; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
441; CHECK-NEXT:    ret
442  %v = call <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32> %va, <2 x i1> %m, i32 %evl)
443  ret <2 x i32> %v
444}
445
446define <2 x i32> @vp_ctpop_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
447; CHECK-LABEL: vp_ctpop_v2i32_unmasked:
448; CHECK:       # %bb.0:
449; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
450; CHECK-NEXT:    vsrl.vi v9, v8, 1
451; CHECK-NEXT:    lui a0, 349525
452; CHECK-NEXT:    addi a0, a0, 1365
453; CHECK-NEXT:    vand.vx v9, v9, a0
454; CHECK-NEXT:    lui a0, 209715
455; CHECK-NEXT:    addi a0, a0, 819
456; CHECK-NEXT:    vsub.vv v8, v8, v9
457; CHECK-NEXT:    vand.vx v9, v8, a0
458; CHECK-NEXT:    vsrl.vi v8, v8, 2
459; CHECK-NEXT:    vand.vx v8, v8, a0
460; CHECK-NEXT:    lui a0, 61681
461; CHECK-NEXT:    addi a0, a0, -241
462; CHECK-NEXT:    vadd.vv v8, v9, v8
463; CHECK-NEXT:    vsrl.vi v9, v8, 4
464; CHECK-NEXT:    vadd.vv v8, v8, v9
465; CHECK-NEXT:    vand.vx v8, v8, a0
466; CHECK-NEXT:    lui a0, 4112
467; CHECK-NEXT:    addi a0, a0, 257
468; CHECK-NEXT:    vmul.vx v8, v8, a0
469; CHECK-NEXT:    vsrl.vi v8, v8, 24
470; CHECK-NEXT:    ret
471  %v = call <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32> %va, <2 x i1> splat (i1 true), i32 %evl)
472  ret <2 x i32> %v
473}
474
475declare <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32>, <4 x i1>, i32)
476
477define <4 x i32> @vp_ctpop_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
478; CHECK-LABEL: vp_ctpop_v4i32:
479; CHECK:       # %bb.0:
480; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
481; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
482; CHECK-NEXT:    lui a0, 349525
483; CHECK-NEXT:    addi a0, a0, 1365
484; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
485; CHECK-NEXT:    lui a0, 209715
486; CHECK-NEXT:    addi a0, a0, 819
487; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
488; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
489; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
490; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
491; CHECK-NEXT:    lui a0, 61681
492; CHECK-NEXT:    addi a0, a0, -241
493; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
494; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
495; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
496; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
497; CHECK-NEXT:    lui a0, 4112
498; CHECK-NEXT:    addi a0, a0, 257
499; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
500; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
501; CHECK-NEXT:    ret
502  %v = call <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
503  ret <4 x i32> %v
504}
505
506define <4 x i32> @vp_ctpop_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
507; CHECK-LABEL: vp_ctpop_v4i32_unmasked:
508; CHECK:       # %bb.0:
509; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
510; CHECK-NEXT:    vsrl.vi v9, v8, 1
511; CHECK-NEXT:    lui a0, 349525
512; CHECK-NEXT:    addi a0, a0, 1365
513; CHECK-NEXT:    vand.vx v9, v9, a0
514; CHECK-NEXT:    lui a0, 209715
515; CHECK-NEXT:    addi a0, a0, 819
516; CHECK-NEXT:    vsub.vv v8, v8, v9
517; CHECK-NEXT:    vand.vx v9, v8, a0
518; CHECK-NEXT:    vsrl.vi v8, v8, 2
519; CHECK-NEXT:    vand.vx v8, v8, a0
520; CHECK-NEXT:    lui a0, 61681
521; CHECK-NEXT:    addi a0, a0, -241
522; CHECK-NEXT:    vadd.vv v8, v9, v8
523; CHECK-NEXT:    vsrl.vi v9, v8, 4
524; CHECK-NEXT:    vadd.vv v8, v8, v9
525; CHECK-NEXT:    vand.vx v8, v8, a0
526; CHECK-NEXT:    lui a0, 4112
527; CHECK-NEXT:    addi a0, a0, 257
528; CHECK-NEXT:    vmul.vx v8, v8, a0
529; CHECK-NEXT:    vsrl.vi v8, v8, 24
530; CHECK-NEXT:    ret
531  %v = call <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32> %va, <4 x i1> splat (i1 true), i32 %evl)
532  ret <4 x i32> %v
533}
534
535declare <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32>, <8 x i1>, i32)
536
537define <8 x i32> @vp_ctpop_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
538; CHECK-LABEL: vp_ctpop_v8i32:
539; CHECK:       # %bb.0:
540; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
541; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
542; CHECK-NEXT:    lui a0, 349525
543; CHECK-NEXT:    addi a0, a0, 1365
544; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
545; CHECK-NEXT:    lui a0, 209715
546; CHECK-NEXT:    addi a0, a0, 819
547; CHECK-NEXT:    vsub.vv v8, v8, v10, v0.t
548; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
549; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
550; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
551; CHECK-NEXT:    lui a0, 61681
552; CHECK-NEXT:    addi a0, a0, -241
553; CHECK-NEXT:    vadd.vv v8, v10, v8, v0.t
554; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
555; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
556; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
557; CHECK-NEXT:    lui a0, 4112
558; CHECK-NEXT:    addi a0, a0, 257
559; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
560; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
561; CHECK-NEXT:    ret
562  %v = call <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32> %va, <8 x i1> %m, i32 %evl)
563  ret <8 x i32> %v
564}
565
566define <8 x i32> @vp_ctpop_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
567; CHECK-LABEL: vp_ctpop_v8i32_unmasked:
568; CHECK:       # %bb.0:
569; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
570; CHECK-NEXT:    vsrl.vi v10, v8, 1
571; CHECK-NEXT:    lui a0, 349525
572; CHECK-NEXT:    addi a0, a0, 1365
573; CHECK-NEXT:    vand.vx v10, v10, a0
574; CHECK-NEXT:    lui a0, 209715
575; CHECK-NEXT:    addi a0, a0, 819
576; CHECK-NEXT:    vsub.vv v8, v8, v10
577; CHECK-NEXT:    vand.vx v10, v8, a0
578; CHECK-NEXT:    vsrl.vi v8, v8, 2
579; CHECK-NEXT:    vand.vx v8, v8, a0
580; CHECK-NEXT:    lui a0, 61681
581; CHECK-NEXT:    addi a0, a0, -241
582; CHECK-NEXT:    vadd.vv v8, v10, v8
583; CHECK-NEXT:    vsrl.vi v10, v8, 4
584; CHECK-NEXT:    vadd.vv v8, v8, v10
585; CHECK-NEXT:    vand.vx v8, v8, a0
586; CHECK-NEXT:    lui a0, 4112
587; CHECK-NEXT:    addi a0, a0, 257
588; CHECK-NEXT:    vmul.vx v8, v8, a0
589; CHECK-NEXT:    vsrl.vi v8, v8, 24
590; CHECK-NEXT:    ret
591  %v = call <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32> %va, <8 x i1> splat (i1 true), i32 %evl)
592  ret <8 x i32> %v
593}
594
595declare <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32>, <16 x i1>, i32)
596
597define <16 x i32> @vp_ctpop_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
598; CHECK-LABEL: vp_ctpop_v16i32:
599; CHECK:       # %bb.0:
600; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
601; CHECK-NEXT:    vsrl.vi v12, v8, 1, v0.t
602; CHECK-NEXT:    lui a0, 349525
603; CHECK-NEXT:    addi a0, a0, 1365
604; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
605; CHECK-NEXT:    lui a0, 209715
606; CHECK-NEXT:    addi a0, a0, 819
607; CHECK-NEXT:    vsub.vv v8, v8, v12, v0.t
608; CHECK-NEXT:    vand.vx v12, v8, a0, v0.t
609; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
610; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
611; CHECK-NEXT:    lui a0, 61681
612; CHECK-NEXT:    addi a0, a0, -241
613; CHECK-NEXT:    vadd.vv v8, v12, v8, v0.t
614; CHECK-NEXT:    vsrl.vi v12, v8, 4, v0.t
615; CHECK-NEXT:    vadd.vv v8, v8, v12, v0.t
616; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
617; CHECK-NEXT:    lui a0, 4112
618; CHECK-NEXT:    addi a0, a0, 257
619; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
620; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
621; CHECK-NEXT:    ret
622  %v = call <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32> %va, <16 x i1> %m, i32 %evl)
623  ret <16 x i32> %v
624}
625
626define <16 x i32> @vp_ctpop_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
627; CHECK-LABEL: vp_ctpop_v16i32_unmasked:
628; CHECK:       # %bb.0:
629; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
630; CHECK-NEXT:    vsrl.vi v12, v8, 1
631; CHECK-NEXT:    lui a0, 349525
632; CHECK-NEXT:    addi a0, a0, 1365
633; CHECK-NEXT:    vand.vx v12, v12, a0
634; CHECK-NEXT:    lui a0, 209715
635; CHECK-NEXT:    addi a0, a0, 819
636; CHECK-NEXT:    vsub.vv v8, v8, v12
637; CHECK-NEXT:    vand.vx v12, v8, a0
638; CHECK-NEXT:    vsrl.vi v8, v8, 2
639; CHECK-NEXT:    vand.vx v8, v8, a0
640; CHECK-NEXT:    lui a0, 61681
641; CHECK-NEXT:    addi a0, a0, -241
642; CHECK-NEXT:    vadd.vv v8, v12, v8
643; CHECK-NEXT:    vsrl.vi v12, v8, 4
644; CHECK-NEXT:    vadd.vv v8, v8, v12
645; CHECK-NEXT:    vand.vx v8, v8, a0
646; CHECK-NEXT:    lui a0, 4112
647; CHECK-NEXT:    addi a0, a0, 257
648; CHECK-NEXT:    vmul.vx v8, v8, a0
649; CHECK-NEXT:    vsrl.vi v8, v8, 24
650; CHECK-NEXT:    ret
651  %v = call <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32> %va, <16 x i1> splat (i1 true), i32 %evl)
652  ret <16 x i32> %v
653}
654
655declare <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64>, <2 x i1>, i32)
656
657define <2 x i64> @vp_ctpop_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
658; RV32-LABEL: vp_ctpop_v2i64:
659; RV32:       # %bb.0:
660; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
661; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
662; RV32-NEXT:    lui a1, 349525
663; RV32-NEXT:    addi a1, a1, 1365
664; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
665; RV32-NEXT:    vmv.v.x v10, a1
666; RV32-NEXT:    lui a1, 209715
667; RV32-NEXT:    addi a1, a1, 819
668; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
669; RV32-NEXT:    vand.vv v9, v9, v10, v0.t
670; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
671; RV32-NEXT:    vmv.v.x v10, a1
672; RV32-NEXT:    lui a1, 61681
673; RV32-NEXT:    addi a1, a1, -241
674; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
675; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
676; RV32-NEXT:    vand.vv v9, v8, v10, v0.t
677; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
678; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
679; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
680; RV32-NEXT:    vmv.v.x v10, a1
681; RV32-NEXT:    lui a1, 4112
682; RV32-NEXT:    addi a1, a1, 257
683; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
684; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
685; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
686; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
687; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
688; RV32-NEXT:    vmv.v.x v9, a1
689; RV32-NEXT:    li a1, 56
690; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
691; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
692; RV32-NEXT:    vmul.vv v8, v8, v9, v0.t
693; RV32-NEXT:    vsrl.vx v8, v8, a1, v0.t
694; RV32-NEXT:    ret
695;
696; RV64-LABEL: vp_ctpop_v2i64:
697; RV64:       # %bb.0:
698; RV64-NEXT:    lui a1, 349525
699; RV64-NEXT:    lui a2, 209715
700; RV64-NEXT:    lui a3, 61681
701; RV64-NEXT:    lui a4, 4112
702; RV64-NEXT:    addiw a1, a1, 1365
703; RV64-NEXT:    addiw a2, a2, 819
704; RV64-NEXT:    addiw a3, a3, -241
705; RV64-NEXT:    addiw a4, a4, 257
706; RV64-NEXT:    slli a5, a1, 32
707; RV64-NEXT:    add a1, a1, a5
708; RV64-NEXT:    slli a5, a2, 32
709; RV64-NEXT:    add a2, a2, a5
710; RV64-NEXT:    slli a5, a3, 32
711; RV64-NEXT:    add a3, a3, a5
712; RV64-NEXT:    slli a5, a4, 32
713; RV64-NEXT:    add a4, a4, a5
714; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
715; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
716; RV64-NEXT:    vand.vx v9, v9, a1, v0.t
717; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
718; RV64-NEXT:    vand.vx v9, v8, a2, v0.t
719; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
720; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
721; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
722; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
723; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
724; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
725; RV64-NEXT:    li a0, 56
726; RV64-NEXT:    vmul.vx v8, v8, a4, v0.t
727; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
728; RV64-NEXT:    ret
729  %v = call <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64> %va, <2 x i1> %m, i32 %evl)
730  ret <2 x i64> %v
731}
732
733define <2 x i64> @vp_ctpop_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
734; RV32-LABEL: vp_ctpop_v2i64_unmasked:
735; RV32:       # %bb.0:
736; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
737; RV32-NEXT:    vsrl.vi v9, v8, 1
738; RV32-NEXT:    lui a1, 349525
739; RV32-NEXT:    addi a1, a1, 1365
740; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
741; RV32-NEXT:    vmv.v.x v10, a1
742; RV32-NEXT:    lui a1, 209715
743; RV32-NEXT:    addi a1, a1, 819
744; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
745; RV32-NEXT:    vand.vv v9, v9, v10
746; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
747; RV32-NEXT:    vmv.v.x v10, a1
748; RV32-NEXT:    lui a1, 61681
749; RV32-NEXT:    addi a1, a1, -241
750; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
751; RV32-NEXT:    vsub.vv v8, v8, v9
752; RV32-NEXT:    vand.vv v9, v8, v10
753; RV32-NEXT:    vsrl.vi v8, v8, 2
754; RV32-NEXT:    vand.vv v8, v8, v10
755; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
756; RV32-NEXT:    vmv.v.x v10, a1
757; RV32-NEXT:    lui a1, 4112
758; RV32-NEXT:    addi a1, a1, 257
759; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
760; RV32-NEXT:    vadd.vv v8, v9, v8
761; RV32-NEXT:    vsrl.vi v9, v8, 4
762; RV32-NEXT:    vadd.vv v8, v8, v9
763; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
764; RV32-NEXT:    vmv.v.x v9, a1
765; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
766; RV32-NEXT:    vand.vv v8, v8, v10
767; RV32-NEXT:    vmul.vv v8, v8, v9
768; RV32-NEXT:    li a0, 56
769; RV32-NEXT:    vsrl.vx v8, v8, a0
770; RV32-NEXT:    ret
771;
772; RV64-LABEL: vp_ctpop_v2i64_unmasked:
773; RV64:       # %bb.0:
774; RV64-NEXT:    lui a1, 349525
775; RV64-NEXT:    lui a2, 209715
776; RV64-NEXT:    lui a3, 61681
777; RV64-NEXT:    lui a4, 4112
778; RV64-NEXT:    addiw a1, a1, 1365
779; RV64-NEXT:    addiw a2, a2, 819
780; RV64-NEXT:    addiw a3, a3, -241
781; RV64-NEXT:    addiw a4, a4, 257
782; RV64-NEXT:    slli a5, a1, 32
783; RV64-NEXT:    add a1, a1, a5
784; RV64-NEXT:    slli a5, a2, 32
785; RV64-NEXT:    add a2, a2, a5
786; RV64-NEXT:    slli a5, a3, 32
787; RV64-NEXT:    add a3, a3, a5
788; RV64-NEXT:    slli a5, a4, 32
789; RV64-NEXT:    add a4, a4, a5
790; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
791; RV64-NEXT:    vsrl.vi v9, v8, 1
792; RV64-NEXT:    vand.vx v9, v9, a1
793; RV64-NEXT:    vsub.vv v8, v8, v9
794; RV64-NEXT:    vand.vx v9, v8, a2
795; RV64-NEXT:    vsrl.vi v8, v8, 2
796; RV64-NEXT:    vand.vx v8, v8, a2
797; RV64-NEXT:    vadd.vv v8, v9, v8
798; RV64-NEXT:    vsrl.vi v9, v8, 4
799; RV64-NEXT:    vadd.vv v8, v8, v9
800; RV64-NEXT:    vand.vx v8, v8, a3
801; RV64-NEXT:    vmul.vx v8, v8, a4
802; RV64-NEXT:    li a0, 56
803; RV64-NEXT:    vsrl.vx v8, v8, a0
804; RV64-NEXT:    ret
805  %v = call <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64> %va, <2 x i1> splat (i1 true), i32 %evl)
806  ret <2 x i64> %v
807}
808
809declare <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64>, <4 x i1>, i32)
810
811define <4 x i64> @vp_ctpop_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
812; RV32-LABEL: vp_ctpop_v4i64:
813; RV32:       # %bb.0:
814; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
815; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
816; RV32-NEXT:    lui a1, 349525
817; RV32-NEXT:    addi a1, a1, 1365
818; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
819; RV32-NEXT:    vmv.v.x v12, a1
820; RV32-NEXT:    lui a1, 209715
821; RV32-NEXT:    addi a1, a1, 819
822; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
823; RV32-NEXT:    vand.vv v10, v10, v12, v0.t
824; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
825; RV32-NEXT:    vmv.v.x v12, a1
826; RV32-NEXT:    lui a1, 61681
827; RV32-NEXT:    addi a1, a1, -241
828; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
829; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
830; RV32-NEXT:    vand.vv v10, v8, v12, v0.t
831; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
832; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
833; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
834; RV32-NEXT:    vmv.v.x v12, a1
835; RV32-NEXT:    lui a1, 4112
836; RV32-NEXT:    addi a1, a1, 257
837; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
838; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
839; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
840; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
841; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
842; RV32-NEXT:    vmv.v.x v10, a1
843; RV32-NEXT:    li a1, 56
844; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
845; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
846; RV32-NEXT:    vmul.vv v8, v8, v10, v0.t
847; RV32-NEXT:    vsrl.vx v8, v8, a1, v0.t
848; RV32-NEXT:    ret
849;
850; RV64-LABEL: vp_ctpop_v4i64:
851; RV64:       # %bb.0:
852; RV64-NEXT:    lui a1, 349525
853; RV64-NEXT:    lui a2, 209715
854; RV64-NEXT:    lui a3, 61681
855; RV64-NEXT:    lui a4, 4112
856; RV64-NEXT:    addiw a1, a1, 1365
857; RV64-NEXT:    addiw a2, a2, 819
858; RV64-NEXT:    addiw a3, a3, -241
859; RV64-NEXT:    addiw a4, a4, 257
860; RV64-NEXT:    slli a5, a1, 32
861; RV64-NEXT:    add a1, a1, a5
862; RV64-NEXT:    slli a5, a2, 32
863; RV64-NEXT:    add a2, a2, a5
864; RV64-NEXT:    slli a5, a3, 32
865; RV64-NEXT:    add a3, a3, a5
866; RV64-NEXT:    slli a5, a4, 32
867; RV64-NEXT:    add a4, a4, a5
868; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
869; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
870; RV64-NEXT:    vand.vx v10, v10, a1, v0.t
871; RV64-NEXT:    vsub.vv v8, v8, v10, v0.t
872; RV64-NEXT:    vand.vx v10, v8, a2, v0.t
873; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
874; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
875; RV64-NEXT:    vadd.vv v8, v10, v8, v0.t
876; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
877; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
878; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
879; RV64-NEXT:    li a0, 56
880; RV64-NEXT:    vmul.vx v8, v8, a4, v0.t
881; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
882; RV64-NEXT:    ret
883  %v = call <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64> %va, <4 x i1> %m, i32 %evl)
884  ret <4 x i64> %v
885}
886
887define <4 x i64> @vp_ctpop_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
888; RV32-LABEL: vp_ctpop_v4i64_unmasked:
889; RV32:       # %bb.0:
890; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
891; RV32-NEXT:    vsrl.vi v10, v8, 1
892; RV32-NEXT:    lui a1, 349525
893; RV32-NEXT:    addi a1, a1, 1365
894; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
895; RV32-NEXT:    vmv.v.x v12, a1
896; RV32-NEXT:    lui a1, 209715
897; RV32-NEXT:    addi a1, a1, 819
898; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
899; RV32-NEXT:    vand.vv v10, v10, v12
900; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
901; RV32-NEXT:    vmv.v.x v12, a1
902; RV32-NEXT:    lui a1, 61681
903; RV32-NEXT:    addi a1, a1, -241
904; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
905; RV32-NEXT:    vsub.vv v8, v8, v10
906; RV32-NEXT:    vand.vv v10, v8, v12
907; RV32-NEXT:    vsrl.vi v8, v8, 2
908; RV32-NEXT:    vand.vv v8, v8, v12
909; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
910; RV32-NEXT:    vmv.v.x v12, a1
911; RV32-NEXT:    lui a1, 4112
912; RV32-NEXT:    addi a1, a1, 257
913; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
914; RV32-NEXT:    vadd.vv v8, v10, v8
915; RV32-NEXT:    vsrl.vi v10, v8, 4
916; RV32-NEXT:    vadd.vv v8, v8, v10
917; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
918; RV32-NEXT:    vmv.v.x v10, a1
919; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
920; RV32-NEXT:    vand.vv v8, v8, v12
921; RV32-NEXT:    vmul.vv v8, v8, v10
922; RV32-NEXT:    li a0, 56
923; RV32-NEXT:    vsrl.vx v8, v8, a0
924; RV32-NEXT:    ret
925;
926; RV64-LABEL: vp_ctpop_v4i64_unmasked:
927; RV64:       # %bb.0:
928; RV64-NEXT:    lui a1, 349525
929; RV64-NEXT:    lui a2, 209715
930; RV64-NEXT:    lui a3, 61681
931; RV64-NEXT:    lui a4, 4112
932; RV64-NEXT:    addiw a1, a1, 1365
933; RV64-NEXT:    addiw a2, a2, 819
934; RV64-NEXT:    addiw a3, a3, -241
935; RV64-NEXT:    addiw a4, a4, 257
936; RV64-NEXT:    slli a5, a1, 32
937; RV64-NEXT:    add a1, a1, a5
938; RV64-NEXT:    slli a5, a2, 32
939; RV64-NEXT:    add a2, a2, a5
940; RV64-NEXT:    slli a5, a3, 32
941; RV64-NEXT:    add a3, a3, a5
942; RV64-NEXT:    slli a5, a4, 32
943; RV64-NEXT:    add a4, a4, a5
944; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
945; RV64-NEXT:    vsrl.vi v10, v8, 1
946; RV64-NEXT:    vand.vx v10, v10, a1
947; RV64-NEXT:    vsub.vv v8, v8, v10
948; RV64-NEXT:    vand.vx v10, v8, a2
949; RV64-NEXT:    vsrl.vi v8, v8, 2
950; RV64-NEXT:    vand.vx v8, v8, a2
951; RV64-NEXT:    vadd.vv v8, v10, v8
952; RV64-NEXT:    vsrl.vi v10, v8, 4
953; RV64-NEXT:    vadd.vv v8, v8, v10
954; RV64-NEXT:    vand.vx v8, v8, a3
955; RV64-NEXT:    vmul.vx v8, v8, a4
956; RV64-NEXT:    li a0, 56
957; RV64-NEXT:    vsrl.vx v8, v8, a0
958; RV64-NEXT:    ret
959  %v = call <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64> %va, <4 x i1> splat (i1 true), i32 %evl)
960  ret <4 x i64> %v
961}
962
963declare <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64>, <8 x i1>, i32)
964
965define <8 x i64> @vp_ctpop_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
966; RV32-LABEL: vp_ctpop_v8i64:
967; RV32:       # %bb.0:
968; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
969; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
970; RV32-NEXT:    lui a1, 349525
971; RV32-NEXT:    addi a1, a1, 1365
972; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
973; RV32-NEXT:    vmv.v.x v16, a1
974; RV32-NEXT:    lui a1, 209715
975; RV32-NEXT:    addi a1, a1, 819
976; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
977; RV32-NEXT:    vand.vv v16, v12, v16, v0.t
978; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
979; RV32-NEXT:    vmv.v.x v12, a1
980; RV32-NEXT:    lui a1, 61681
981; RV32-NEXT:    addi a1, a1, -241
982; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
983; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
984; RV32-NEXT:    vand.vv v16, v8, v12, v0.t
985; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
986; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
987; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
988; RV32-NEXT:    vmv.v.x v12, a1
989; RV32-NEXT:    lui a1, 4112
990; RV32-NEXT:    addi a1, a1, 257
991; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
992; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
993; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
994; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
995; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
996; RV32-NEXT:    vmv.v.x v16, a1
997; RV32-NEXT:    li a1, 56
998; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
999; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
1000; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
1001; RV32-NEXT:    vsrl.vx v8, v8, a1, v0.t
1002; RV32-NEXT:    ret
1003;
1004; RV64-LABEL: vp_ctpop_v8i64:
1005; RV64:       # %bb.0:
1006; RV64-NEXT:    lui a1, 349525
1007; RV64-NEXT:    lui a2, 209715
1008; RV64-NEXT:    lui a3, 61681
1009; RV64-NEXT:    lui a4, 4112
1010; RV64-NEXT:    addiw a1, a1, 1365
1011; RV64-NEXT:    addiw a2, a2, 819
1012; RV64-NEXT:    addiw a3, a3, -241
1013; RV64-NEXT:    addiw a4, a4, 257
1014; RV64-NEXT:    slli a5, a1, 32
1015; RV64-NEXT:    add a1, a1, a5
1016; RV64-NEXT:    slli a5, a2, 32
1017; RV64-NEXT:    add a2, a2, a5
1018; RV64-NEXT:    slli a5, a3, 32
1019; RV64-NEXT:    add a3, a3, a5
1020; RV64-NEXT:    slli a5, a4, 32
1021; RV64-NEXT:    add a4, a4, a5
1022; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1023; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
1024; RV64-NEXT:    vand.vx v12, v12, a1, v0.t
1025; RV64-NEXT:    vsub.vv v8, v8, v12, v0.t
1026; RV64-NEXT:    vand.vx v12, v8, a2, v0.t
1027; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
1028; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
1029; RV64-NEXT:    vadd.vv v8, v12, v8, v0.t
1030; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
1031; RV64-NEXT:    vadd.vv v8, v8, v12, v0.t
1032; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
1033; RV64-NEXT:    li a0, 56
1034; RV64-NEXT:    vmul.vx v8, v8, a4, v0.t
1035; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
1036; RV64-NEXT:    ret
1037  %v = call <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64> %va, <8 x i1> %m, i32 %evl)
1038  ret <8 x i64> %v
1039}
1040
1041define <8 x i64> @vp_ctpop_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
1042; RV32-LABEL: vp_ctpop_v8i64_unmasked:
1043; RV32:       # %bb.0:
1044; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1045; RV32-NEXT:    vsrl.vi v12, v8, 1
1046; RV32-NEXT:    lui a1, 349525
1047; RV32-NEXT:    addi a1, a1, 1365
1048; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
1049; RV32-NEXT:    vmv.v.x v16, a1
1050; RV32-NEXT:    lui a1, 209715
1051; RV32-NEXT:    addi a1, a1, 819
1052; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1053; RV32-NEXT:    vand.vv v12, v12, v16
1054; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
1055; RV32-NEXT:    vmv.v.x v16, a1
1056; RV32-NEXT:    lui a1, 61681
1057; RV32-NEXT:    addi a1, a1, -241
1058; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1059; RV32-NEXT:    vsub.vv v8, v8, v12
1060; RV32-NEXT:    vand.vv v12, v8, v16
1061; RV32-NEXT:    vsrl.vi v8, v8, 2
1062; RV32-NEXT:    vand.vv v8, v8, v16
1063; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
1064; RV32-NEXT:    vmv.v.x v16, a1
1065; RV32-NEXT:    lui a1, 4112
1066; RV32-NEXT:    addi a1, a1, 257
1067; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1068; RV32-NEXT:    vadd.vv v8, v12, v8
1069; RV32-NEXT:    vsrl.vi v12, v8, 4
1070; RV32-NEXT:    vadd.vv v8, v8, v12
1071; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
1072; RV32-NEXT:    vmv.v.x v12, a1
1073; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1074; RV32-NEXT:    vand.vv v8, v8, v16
1075; RV32-NEXT:    vmul.vv v8, v8, v12
1076; RV32-NEXT:    li a0, 56
1077; RV32-NEXT:    vsrl.vx v8, v8, a0
1078; RV32-NEXT:    ret
1079;
1080; RV64-LABEL: vp_ctpop_v8i64_unmasked:
1081; RV64:       # %bb.0:
1082; RV64-NEXT:    lui a1, 349525
1083; RV64-NEXT:    lui a2, 209715
1084; RV64-NEXT:    lui a3, 61681
1085; RV64-NEXT:    lui a4, 4112
1086; RV64-NEXT:    addiw a1, a1, 1365
1087; RV64-NEXT:    addiw a2, a2, 819
1088; RV64-NEXT:    addiw a3, a3, -241
1089; RV64-NEXT:    addiw a4, a4, 257
1090; RV64-NEXT:    slli a5, a1, 32
1091; RV64-NEXT:    add a1, a1, a5
1092; RV64-NEXT:    slli a5, a2, 32
1093; RV64-NEXT:    add a2, a2, a5
1094; RV64-NEXT:    slli a5, a3, 32
1095; RV64-NEXT:    add a3, a3, a5
1096; RV64-NEXT:    slli a5, a4, 32
1097; RV64-NEXT:    add a4, a4, a5
1098; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1099; RV64-NEXT:    vsrl.vi v12, v8, 1
1100; RV64-NEXT:    vand.vx v12, v12, a1
1101; RV64-NEXT:    vsub.vv v8, v8, v12
1102; RV64-NEXT:    vand.vx v12, v8, a2
1103; RV64-NEXT:    vsrl.vi v8, v8, 2
1104; RV64-NEXT:    vand.vx v8, v8, a2
1105; RV64-NEXT:    vadd.vv v8, v12, v8
1106; RV64-NEXT:    vsrl.vi v12, v8, 4
1107; RV64-NEXT:    vadd.vv v8, v8, v12
1108; RV64-NEXT:    vand.vx v8, v8, a3
1109; RV64-NEXT:    vmul.vx v8, v8, a4
1110; RV64-NEXT:    li a0, 56
1111; RV64-NEXT:    vsrl.vx v8, v8, a0
1112; RV64-NEXT:    ret
1113  %v = call <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64> %va, <8 x i1> splat (i1 true), i32 %evl)
1114  ret <8 x i64> %v
1115}
1116
1117declare <15 x i64> @llvm.vp.ctpop.v15i64(<15 x i64>, <15 x i1>, i32)
1118
1119define <15 x i64> @vp_ctpop_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) {
1120; RV32-LABEL: vp_ctpop_v15i64:
1121; RV32:       # %bb.0:
1122; RV32-NEXT:    addi sp, sp, -48
1123; RV32-NEXT:    .cfi_def_cfa_offset 48
1124; RV32-NEXT:    csrr a1, vlenb
1125; RV32-NEXT:    slli a1, a1, 3
1126; RV32-NEXT:    sub sp, sp, a1
1127; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb
1128; RV32-NEXT:    addi a1, sp, 48
1129; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
1130; RV32-NEXT:    lui a1, 349525
1131; RV32-NEXT:    addi a1, a1, 1365
1132; RV32-NEXT:    sw a1, 40(sp)
1133; RV32-NEXT:    sw a1, 44(sp)
1134; RV32-NEXT:    lui a1, 209715
1135; RV32-NEXT:    addi a1, a1, 819
1136; RV32-NEXT:    sw a1, 32(sp)
1137; RV32-NEXT:    sw a1, 36(sp)
1138; RV32-NEXT:    lui a1, 61681
1139; RV32-NEXT:    addi a1, a1, -241
1140; RV32-NEXT:    sw a1, 24(sp)
1141; RV32-NEXT:    sw a1, 28(sp)
1142; RV32-NEXT:    lui a1, 4112
1143; RV32-NEXT:    addi a1, a1, 257
1144; RV32-NEXT:    sw a1, 16(sp)
1145; RV32-NEXT:    sw a1, 20(sp)
1146; RV32-NEXT:    addi a1, sp, 40
1147; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1148; RV32-NEXT:    vlse64.v v24, (a1), zero
1149; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1150; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
1151; RV32-NEXT:    addi a1, sp, 32
1152; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1153; RV32-NEXT:    vlse64.v v16, (a1), zero
1154; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1155; RV32-NEXT:    vand.vv v24, v8, v24, v0.t
1156; RV32-NEXT:    addi a1, sp, 24
1157; RV32-NEXT:    addi a2, sp, 48
1158; RV32-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
1159; RV32-NEXT:    vsub.vv v8, v8, v24, v0.t
1160; RV32-NEXT:    vand.vv v24, v8, v16, v0.t
1161; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
1162; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
1163; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1164; RV32-NEXT:    vlse64.v v8, (a1), zero
1165; RV32-NEXT:    addi a1, sp, 48
1166; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
1167; RV32-NEXT:    addi a1, sp, 16
1168; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1169; RV32-NEXT:    vadd.vv v24, v24, v16, v0.t
1170; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1171; RV32-NEXT:    vlse64.v v16, (a1), zero
1172; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1173; RV32-NEXT:    vsrl.vi v8, v24, 4, v0.t
1174; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
1175; RV32-NEXT:    li a0, 56
1176; RV32-NEXT:    addi a1, sp, 48
1177; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
1178; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
1179; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
1180; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
1181; RV32-NEXT:    csrr a0, vlenb
1182; RV32-NEXT:    slli a0, a0, 3
1183; RV32-NEXT:    add sp, sp, a0
1184; RV32-NEXT:    .cfi_def_cfa sp, 48
1185; RV32-NEXT:    addi sp, sp, 48
1186; RV32-NEXT:    .cfi_def_cfa_offset 0
1187; RV32-NEXT:    ret
1188;
1189; RV64-LABEL: vp_ctpop_v15i64:
1190; RV64:       # %bb.0:
1191; RV64-NEXT:    lui a1, 349525
1192; RV64-NEXT:    lui a2, 209715
1193; RV64-NEXT:    lui a3, 61681
1194; RV64-NEXT:    lui a4, 4112
1195; RV64-NEXT:    addiw a1, a1, 1365
1196; RV64-NEXT:    addiw a2, a2, 819
1197; RV64-NEXT:    addiw a3, a3, -241
1198; RV64-NEXT:    addiw a4, a4, 257
1199; RV64-NEXT:    slli a5, a1, 32
1200; RV64-NEXT:    add a1, a1, a5
1201; RV64-NEXT:    slli a5, a2, 32
1202; RV64-NEXT:    add a2, a2, a5
1203; RV64-NEXT:    slli a5, a3, 32
1204; RV64-NEXT:    add a3, a3, a5
1205; RV64-NEXT:    slli a5, a4, 32
1206; RV64-NEXT:    add a4, a4, a5
1207; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1208; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
1209; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
1210; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
1211; RV64-NEXT:    vand.vx v16, v8, a2, v0.t
1212; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
1213; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
1214; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
1215; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
1216; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
1217; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
1218; RV64-NEXT:    li a0, 56
1219; RV64-NEXT:    vmul.vx v8, v8, a4, v0.t
1220; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
1221; RV64-NEXT:    ret
1222  %v = call <15 x i64> @llvm.vp.ctpop.v15i64(<15 x i64> %va, <15 x i1> %m, i32 %evl)
1223  ret <15 x i64> %v
1224}
1225
1226define <15 x i64> @vp_ctpop_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) {
1227; RV32-LABEL: vp_ctpop_v15i64_unmasked:
1228; RV32:       # %bb.0:
1229; RV32-NEXT:    addi sp, sp, -32
1230; RV32-NEXT:    .cfi_def_cfa_offset 32
1231; RV32-NEXT:    lui a1, 349525
1232; RV32-NEXT:    addi a1, a1, 1365
1233; RV32-NEXT:    sw a1, 24(sp)
1234; RV32-NEXT:    sw a1, 28(sp)
1235; RV32-NEXT:    lui a1, 209715
1236; RV32-NEXT:    addi a1, a1, 819
1237; RV32-NEXT:    sw a1, 16(sp)
1238; RV32-NEXT:    sw a1, 20(sp)
1239; RV32-NEXT:    lui a1, 61681
1240; RV32-NEXT:    addi a1, a1, -241
1241; RV32-NEXT:    sw a1, 8(sp)
1242; RV32-NEXT:    sw a1, 12(sp)
1243; RV32-NEXT:    lui a1, 4112
1244; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1245; RV32-NEXT:    vsrl.vi v16, v8, 1
1246; RV32-NEXT:    addi a1, a1, 257
1247; RV32-NEXT:    sw a1, 0(sp)
1248; RV32-NEXT:    sw a1, 4(sp)
1249; RV32-NEXT:    addi a1, sp, 24
1250; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1251; RV32-NEXT:    vlse64.v v0, (a1), zero
1252; RV32-NEXT:    addi a1, sp, 16
1253; RV32-NEXT:    vlse64.v v24, (a1), zero
1254; RV32-NEXT:    addi a1, sp, 8
1255; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1256; RV32-NEXT:    vand.vv v0, v16, v0
1257; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1258; RV32-NEXT:    vlse64.v v16, (a1), zero
1259; RV32-NEXT:    mv a1, sp
1260; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1261; RV32-NEXT:    vsub.vv v8, v8, v0
1262; RV32-NEXT:    vand.vv v0, v8, v24
1263; RV32-NEXT:    vsrl.vi v8, v8, 2
1264; RV32-NEXT:    vand.vv v8, v8, v24
1265; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1266; RV32-NEXT:    vlse64.v v24, (a1), zero
1267; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1268; RV32-NEXT:    vadd.vv v8, v0, v8
1269; RV32-NEXT:    vsrl.vi v0, v8, 4
1270; RV32-NEXT:    vadd.vv v8, v8, v0
1271; RV32-NEXT:    vand.vv v8, v8, v16
1272; RV32-NEXT:    vmul.vv v8, v8, v24
1273; RV32-NEXT:    li a0, 56
1274; RV32-NEXT:    vsrl.vx v8, v8, a0
1275; RV32-NEXT:    addi sp, sp, 32
1276; RV32-NEXT:    .cfi_def_cfa_offset 0
1277; RV32-NEXT:    ret
1278;
1279; RV64-LABEL: vp_ctpop_v15i64_unmasked:
1280; RV64:       # %bb.0:
1281; RV64-NEXT:    lui a1, 349525
1282; RV64-NEXT:    lui a2, 209715
1283; RV64-NEXT:    lui a3, 61681
1284; RV64-NEXT:    lui a4, 4112
1285; RV64-NEXT:    addiw a1, a1, 1365
1286; RV64-NEXT:    addiw a2, a2, 819
1287; RV64-NEXT:    addiw a3, a3, -241
1288; RV64-NEXT:    addiw a4, a4, 257
1289; RV64-NEXT:    slli a5, a1, 32
1290; RV64-NEXT:    add a1, a1, a5
1291; RV64-NEXT:    slli a5, a2, 32
1292; RV64-NEXT:    add a2, a2, a5
1293; RV64-NEXT:    slli a5, a3, 32
1294; RV64-NEXT:    add a3, a3, a5
1295; RV64-NEXT:    slli a5, a4, 32
1296; RV64-NEXT:    add a4, a4, a5
1297; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1298; RV64-NEXT:    vsrl.vi v16, v8, 1
1299; RV64-NEXT:    vand.vx v16, v16, a1
1300; RV64-NEXT:    vsub.vv v8, v8, v16
1301; RV64-NEXT:    vand.vx v16, v8, a2
1302; RV64-NEXT:    vsrl.vi v8, v8, 2
1303; RV64-NEXT:    vand.vx v8, v8, a2
1304; RV64-NEXT:    vadd.vv v8, v16, v8
1305; RV64-NEXT:    vsrl.vi v16, v8, 4
1306; RV64-NEXT:    vadd.vv v8, v8, v16
1307; RV64-NEXT:    vand.vx v8, v8, a3
1308; RV64-NEXT:    vmul.vx v8, v8, a4
1309; RV64-NEXT:    li a0, 56
1310; RV64-NEXT:    vsrl.vx v8, v8, a0
1311; RV64-NEXT:    ret
1312  %v = call <15 x i64> @llvm.vp.ctpop.v15i64(<15 x i64> %va, <15 x i1> splat (i1 true), i32 %evl)
1313  ret <15 x i64> %v
1314}
1315
1316declare <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64>, <16 x i1>, i32)
1317
1318define <16 x i64> @vp_ctpop_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) {
1319; RV32-LABEL: vp_ctpop_v16i64:
1320; RV32:       # %bb.0:
1321; RV32-NEXT:    addi sp, sp, -48
1322; RV32-NEXT:    .cfi_def_cfa_offset 48
1323; RV32-NEXT:    csrr a1, vlenb
1324; RV32-NEXT:    slli a1, a1, 3
1325; RV32-NEXT:    sub sp, sp, a1
1326; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb
1327; RV32-NEXT:    addi a1, sp, 48
1328; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
1329; RV32-NEXT:    lui a1, 349525
1330; RV32-NEXT:    addi a1, a1, 1365
1331; RV32-NEXT:    sw a1, 40(sp)
1332; RV32-NEXT:    sw a1, 44(sp)
1333; RV32-NEXT:    lui a1, 209715
1334; RV32-NEXT:    addi a1, a1, 819
1335; RV32-NEXT:    sw a1, 32(sp)
1336; RV32-NEXT:    sw a1, 36(sp)
1337; RV32-NEXT:    lui a1, 61681
1338; RV32-NEXT:    addi a1, a1, -241
1339; RV32-NEXT:    sw a1, 24(sp)
1340; RV32-NEXT:    sw a1, 28(sp)
1341; RV32-NEXT:    lui a1, 4112
1342; RV32-NEXT:    addi a1, a1, 257
1343; RV32-NEXT:    sw a1, 16(sp)
1344; RV32-NEXT:    sw a1, 20(sp)
1345; RV32-NEXT:    addi a1, sp, 40
1346; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1347; RV32-NEXT:    vlse64.v v24, (a1), zero
1348; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1349; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
1350; RV32-NEXT:    addi a1, sp, 32
1351; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1352; RV32-NEXT:    vlse64.v v16, (a1), zero
1353; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1354; RV32-NEXT:    vand.vv v24, v8, v24, v0.t
1355; RV32-NEXT:    addi a1, sp, 24
1356; RV32-NEXT:    addi a2, sp, 48
1357; RV32-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
1358; RV32-NEXT:    vsub.vv v8, v8, v24, v0.t
1359; RV32-NEXT:    vand.vv v24, v8, v16, v0.t
1360; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
1361; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
1362; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1363; RV32-NEXT:    vlse64.v v8, (a1), zero
1364; RV32-NEXT:    addi a1, sp, 48
1365; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
1366; RV32-NEXT:    addi a1, sp, 16
1367; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1368; RV32-NEXT:    vadd.vv v24, v24, v16, v0.t
1369; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1370; RV32-NEXT:    vlse64.v v16, (a1), zero
1371; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1372; RV32-NEXT:    vsrl.vi v8, v24, 4, v0.t
1373; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
1374; RV32-NEXT:    li a0, 56
1375; RV32-NEXT:    addi a1, sp, 48
1376; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
1377; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
1378; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
1379; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
1380; RV32-NEXT:    csrr a0, vlenb
1381; RV32-NEXT:    slli a0, a0, 3
1382; RV32-NEXT:    add sp, sp, a0
1383; RV32-NEXT:    .cfi_def_cfa sp, 48
1384; RV32-NEXT:    addi sp, sp, 48
1385; RV32-NEXT:    .cfi_def_cfa_offset 0
1386; RV32-NEXT:    ret
1387;
1388; RV64-LABEL: vp_ctpop_v16i64:
1389; RV64:       # %bb.0:
1390; RV64-NEXT:    lui a1, 349525
1391; RV64-NEXT:    lui a2, 209715
1392; RV64-NEXT:    lui a3, 61681
1393; RV64-NEXT:    lui a4, 4112
1394; RV64-NEXT:    addiw a1, a1, 1365
1395; RV64-NEXT:    addiw a2, a2, 819
1396; RV64-NEXT:    addiw a3, a3, -241
1397; RV64-NEXT:    addiw a4, a4, 257
1398; RV64-NEXT:    slli a5, a1, 32
1399; RV64-NEXT:    add a1, a1, a5
1400; RV64-NEXT:    slli a5, a2, 32
1401; RV64-NEXT:    add a2, a2, a5
1402; RV64-NEXT:    slli a5, a3, 32
1403; RV64-NEXT:    add a3, a3, a5
1404; RV64-NEXT:    slli a5, a4, 32
1405; RV64-NEXT:    add a4, a4, a5
1406; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1407; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
1408; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
1409; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
1410; RV64-NEXT:    vand.vx v16, v8, a2, v0.t
1411; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
1412; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
1413; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
1414; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
1415; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
1416; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
1417; RV64-NEXT:    li a0, 56
1418; RV64-NEXT:    vmul.vx v8, v8, a4, v0.t
1419; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
1420; RV64-NEXT:    ret
1421  %v = call <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64> %va, <16 x i1> %m, i32 %evl)
1422  ret <16 x i64> %v
1423}
1424
1425define <16 x i64> @vp_ctpop_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
1426; RV32-LABEL: vp_ctpop_v16i64_unmasked:
1427; RV32:       # %bb.0:
1428; RV32-NEXT:    addi sp, sp, -32
1429; RV32-NEXT:    .cfi_def_cfa_offset 32
1430; RV32-NEXT:    lui a1, 349525
1431; RV32-NEXT:    addi a1, a1, 1365
1432; RV32-NEXT:    sw a1, 24(sp)
1433; RV32-NEXT:    sw a1, 28(sp)
1434; RV32-NEXT:    lui a1, 209715
1435; RV32-NEXT:    addi a1, a1, 819
1436; RV32-NEXT:    sw a1, 16(sp)
1437; RV32-NEXT:    sw a1, 20(sp)
1438; RV32-NEXT:    lui a1, 61681
1439; RV32-NEXT:    addi a1, a1, -241
1440; RV32-NEXT:    sw a1, 8(sp)
1441; RV32-NEXT:    sw a1, 12(sp)
1442; RV32-NEXT:    lui a1, 4112
1443; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1444; RV32-NEXT:    vsrl.vi v16, v8, 1
1445; RV32-NEXT:    addi a1, a1, 257
1446; RV32-NEXT:    sw a1, 0(sp)
1447; RV32-NEXT:    sw a1, 4(sp)
1448; RV32-NEXT:    addi a1, sp, 24
1449; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1450; RV32-NEXT:    vlse64.v v0, (a1), zero
1451; RV32-NEXT:    addi a1, sp, 16
1452; RV32-NEXT:    vlse64.v v24, (a1), zero
1453; RV32-NEXT:    addi a1, sp, 8
1454; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1455; RV32-NEXT:    vand.vv v0, v16, v0
1456; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1457; RV32-NEXT:    vlse64.v v16, (a1), zero
1458; RV32-NEXT:    mv a1, sp
1459; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1460; RV32-NEXT:    vsub.vv v8, v8, v0
1461; RV32-NEXT:    vand.vv v0, v8, v24
1462; RV32-NEXT:    vsrl.vi v8, v8, 2
1463; RV32-NEXT:    vand.vv v8, v8, v24
1464; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1465; RV32-NEXT:    vlse64.v v24, (a1), zero
1466; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1467; RV32-NEXT:    vadd.vv v8, v0, v8
1468; RV32-NEXT:    vsrl.vi v0, v8, 4
1469; RV32-NEXT:    vadd.vv v8, v8, v0
1470; RV32-NEXT:    vand.vv v8, v8, v16
1471; RV32-NEXT:    vmul.vv v8, v8, v24
1472; RV32-NEXT:    li a0, 56
1473; RV32-NEXT:    vsrl.vx v8, v8, a0
1474; RV32-NEXT:    addi sp, sp, 32
1475; RV32-NEXT:    .cfi_def_cfa_offset 0
1476; RV32-NEXT:    ret
1477;
1478; RV64-LABEL: vp_ctpop_v16i64_unmasked:
1479; RV64:       # %bb.0:
1480; RV64-NEXT:    lui a1, 349525
1481; RV64-NEXT:    lui a2, 209715
1482; RV64-NEXT:    lui a3, 61681
1483; RV64-NEXT:    lui a4, 4112
1484; RV64-NEXT:    addiw a1, a1, 1365
1485; RV64-NEXT:    addiw a2, a2, 819
1486; RV64-NEXT:    addiw a3, a3, -241
1487; RV64-NEXT:    addiw a4, a4, 257
1488; RV64-NEXT:    slli a5, a1, 32
1489; RV64-NEXT:    add a1, a1, a5
1490; RV64-NEXT:    slli a5, a2, 32
1491; RV64-NEXT:    add a2, a2, a5
1492; RV64-NEXT:    slli a5, a3, 32
1493; RV64-NEXT:    add a3, a3, a5
1494; RV64-NEXT:    slli a5, a4, 32
1495; RV64-NEXT:    add a4, a4, a5
1496; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1497; RV64-NEXT:    vsrl.vi v16, v8, 1
1498; RV64-NEXT:    vand.vx v16, v16, a1
1499; RV64-NEXT:    vsub.vv v8, v8, v16
1500; RV64-NEXT:    vand.vx v16, v8, a2
1501; RV64-NEXT:    vsrl.vi v8, v8, 2
1502; RV64-NEXT:    vand.vx v8, v8, a2
1503; RV64-NEXT:    vadd.vv v8, v16, v8
1504; RV64-NEXT:    vsrl.vi v16, v8, 4
1505; RV64-NEXT:    vadd.vv v8, v8, v16
1506; RV64-NEXT:    vand.vx v8, v8, a3
1507; RV64-NEXT:    vmul.vx v8, v8, a4
1508; RV64-NEXT:    li a0, 56
1509; RV64-NEXT:    vsrl.vx v8, v8, a0
1510; RV64-NEXT:    ret
1511  %v = call <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64> %va, <16 x i1> splat (i1 true), i32 %evl)
1512  ret <16 x i64> %v
1513}
1514
1515declare <32 x i64> @llvm.vp.ctpop.v32i64(<32 x i64>, <32 x i1>, i32)
1516
1517define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) {
1518; RV32-LABEL: vp_ctpop_v32i64:
1519; RV32:       # %bb.0:
1520; RV32-NEXT:    addi sp, sp, -48
1521; RV32-NEXT:    .cfi_def_cfa_offset 48
1522; RV32-NEXT:    csrr a1, vlenb
1523; RV32-NEXT:    li a2, 48
1524; RV32-NEXT:    mul a1, a1, a2
1525; RV32-NEXT:    sub sp, sp, a1
1526; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb
1527; RV32-NEXT:    csrr a1, vlenb
1528; RV32-NEXT:    slli a1, a1, 4
1529; RV32-NEXT:    add a1, sp, a1
1530; RV32-NEXT:    addi a1, a1, 48
1531; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
1532; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
1533; RV32-NEXT:    vslidedown.vi v7, v0, 2
1534; RV32-NEXT:    lui a1, 349525
1535; RV32-NEXT:    lui a2, 209715
1536; RV32-NEXT:    addi a1, a1, 1365
1537; RV32-NEXT:    sw a1, 40(sp)
1538; RV32-NEXT:    sw a1, 44(sp)
1539; RV32-NEXT:    lui a1, 61681
1540; RV32-NEXT:    addi a2, a2, 819
1541; RV32-NEXT:    sw a2, 32(sp)
1542; RV32-NEXT:    sw a2, 36(sp)
1543; RV32-NEXT:    lui a2, 4112
1544; RV32-NEXT:    addi a1, a1, -241
1545; RV32-NEXT:    sw a1, 24(sp)
1546; RV32-NEXT:    sw a1, 28(sp)
1547; RV32-NEXT:    li a3, 16
1548; RV32-NEXT:    addi a1, a2, 257
1549; RV32-NEXT:    sw a1, 16(sp)
1550; RV32-NEXT:    sw a1, 20(sp)
1551; RV32-NEXT:    mv a1, a0
1552; RV32-NEXT:    bltu a0, a3, .LBB34_2
1553; RV32-NEXT:  # %bb.1:
1554; RV32-NEXT:    li a1, 16
1555; RV32-NEXT:  .LBB34_2:
1556; RV32-NEXT:    addi a2, sp, 40
1557; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1558; RV32-NEXT:    vlse64.v v16, (a2), zero
1559; RV32-NEXT:    csrr a2, vlenb
1560; RV32-NEXT:    li a3, 40
1561; RV32-NEXT:    mul a2, a2, a3
1562; RV32-NEXT:    add a2, sp, a2
1563; RV32-NEXT:    addi a2, a2, 48
1564; RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
1565; RV32-NEXT:    addi a2, sp, 32
1566; RV32-NEXT:    vlse64.v v16, (a2), zero
1567; RV32-NEXT:    csrr a2, vlenb
1568; RV32-NEXT:    slli a2, a2, 5
1569; RV32-NEXT:    add a2, sp, a2
1570; RV32-NEXT:    addi a2, a2, 48
1571; RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
1572; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1573; RV32-NEXT:    vsrl.vi v24, v8, 1, v0.t
1574; RV32-NEXT:    csrr a2, vlenb
1575; RV32-NEXT:    li a3, 40
1576; RV32-NEXT:    mul a2, a2, a3
1577; RV32-NEXT:    add a2, sp, a2
1578; RV32-NEXT:    addi a2, a2, 48
1579; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
1580; RV32-NEXT:    vand.vv v16, v24, v16, v0.t
1581; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
1582; RV32-NEXT:    csrr a2, vlenb
1583; RV32-NEXT:    slli a2, a2, 5
1584; RV32-NEXT:    add a2, sp, a2
1585; RV32-NEXT:    addi a2, a2, 48
1586; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
1587; RV32-NEXT:    vand.vv v24, v8, v16, v0.t
1588; RV32-NEXT:    csrr a2, vlenb
1589; RV32-NEXT:    li a3, 24
1590; RV32-NEXT:    mul a2, a2, a3
1591; RV32-NEXT:    add a2, sp, a2
1592; RV32-NEXT:    addi a2, a2, 48
1593; RV32-NEXT:    vs8r.v v24, (a2) # Unknown-size Folded Spill
1594; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
1595; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
1596; RV32-NEXT:    csrr a2, vlenb
1597; RV32-NEXT:    li a3, 24
1598; RV32-NEXT:    mul a2, a2, a3
1599; RV32-NEXT:    add a2, sp, a2
1600; RV32-NEXT:    addi a2, a2, 48
1601; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
1602; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
1603; RV32-NEXT:    csrr a2, vlenb
1604; RV32-NEXT:    slli a2, a2, 3
1605; RV32-NEXT:    add a2, sp, a2
1606; RV32-NEXT:    addi a2, a2, 48
1607; RV32-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
1608; RV32-NEXT:    addi a2, sp, 24
1609; RV32-NEXT:    addi a3, sp, 16
1610; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1611; RV32-NEXT:    vlse64.v v16, (a2), zero
1612; RV32-NEXT:    addi a2, sp, 48
1613; RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
1614; RV32-NEXT:    vlse64.v v8, (a3), zero
1615; RV32-NEXT:    csrr a2, vlenb
1616; RV32-NEXT:    li a3, 24
1617; RV32-NEXT:    mul a2, a2, a3
1618; RV32-NEXT:    add a2, sp, a2
1619; RV32-NEXT:    addi a2, a2, 48
1620; RV32-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
1621; RV32-NEXT:    csrr a2, vlenb
1622; RV32-NEXT:    slli a2, a2, 3
1623; RV32-NEXT:    add a2, sp, a2
1624; RV32-NEXT:    addi a2, a2, 48
1625; RV32-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
1626; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1627; RV32-NEXT:    vsrl.vi v24, v8, 4, v0.t
1628; RV32-NEXT:    vadd.vv v8, v8, v24, v0.t
1629; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
1630; RV32-NEXT:    csrr a1, vlenb
1631; RV32-NEXT:    li a2, 24
1632; RV32-NEXT:    mul a1, a1, a2
1633; RV32-NEXT:    add a1, sp, a1
1634; RV32-NEXT:    addi a1, a1, 48
1635; RV32-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
1636; RV32-NEXT:    vmul.vv v8, v16, v8, v0.t
1637; RV32-NEXT:    li a1, 56
1638; RV32-NEXT:    vsrl.vx v8, v8, a1, v0.t
1639; RV32-NEXT:    csrr a2, vlenb
1640; RV32-NEXT:    slli a2, a2, 3
1641; RV32-NEXT:    add a2, sp, a2
1642; RV32-NEXT:    addi a2, a2, 48
1643; RV32-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
1644; RV32-NEXT:    addi a2, a0, -16
1645; RV32-NEXT:    sltu a0, a0, a2
1646; RV32-NEXT:    addi a0, a0, -1
1647; RV32-NEXT:    and a0, a0, a2
1648; RV32-NEXT:    vmv1r.v v0, v7
1649; RV32-NEXT:    csrr a2, vlenb
1650; RV32-NEXT:    slli a2, a2, 4
1651; RV32-NEXT:    add a2, sp, a2
1652; RV32-NEXT:    addi a2, a2, 48
1653; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
1654; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1655; RV32-NEXT:    vsrl.vi v24, v16, 1, v0.t
1656; RV32-NEXT:    csrr a0, vlenb
1657; RV32-NEXT:    li a2, 40
1658; RV32-NEXT:    mul a0, a0, a2
1659; RV32-NEXT:    add a0, sp, a0
1660; RV32-NEXT:    addi a0, a0, 48
1661; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
1662; RV32-NEXT:    vand.vv v8, v24, v8, v0.t
1663; RV32-NEXT:    vsub.vv v8, v16, v8, v0.t
1664; RV32-NEXT:    csrr a0, vlenb
1665; RV32-NEXT:    slli a0, a0, 5
1666; RV32-NEXT:    add a0, sp, a0
1667; RV32-NEXT:    addi a0, a0, 48
1668; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
1669; RV32-NEXT:    vand.vv v24, v8, v16, v0.t
1670; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
1671; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
1672; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
1673; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
1674; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
1675; RV32-NEXT:    addi a0, sp, 48
1676; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
1677; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
1678; RV32-NEXT:    csrr a0, vlenb
1679; RV32-NEXT:    li a2, 24
1680; RV32-NEXT:    mul a0, a0, a2
1681; RV32-NEXT:    add a0, sp, a0
1682; RV32-NEXT:    addi a0, a0, 48
1683; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
1684; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
1685; RV32-NEXT:    vsrl.vx v16, v8, a1, v0.t
1686; RV32-NEXT:    csrr a0, vlenb
1687; RV32-NEXT:    slli a0, a0, 3
1688; RV32-NEXT:    add a0, sp, a0
1689; RV32-NEXT:    addi a0, a0, 48
1690; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
1691; RV32-NEXT:    csrr a0, vlenb
1692; RV32-NEXT:    li a1, 48
1693; RV32-NEXT:    mul a0, a0, a1
1694; RV32-NEXT:    add sp, sp, a0
1695; RV32-NEXT:    .cfi_def_cfa sp, 48
1696; RV32-NEXT:    addi sp, sp, 48
1697; RV32-NEXT:    .cfi_def_cfa_offset 0
1698; RV32-NEXT:    ret
1699;
1700; RV64-LABEL: vp_ctpop_v32i64:
1701; RV64:       # %bb.0:
1702; RV64-NEXT:    addi sp, sp, -16
1703; RV64-NEXT:    .cfi_def_cfa_offset 16
1704; RV64-NEXT:    csrr a1, vlenb
1705; RV64-NEXT:    slli a1, a1, 4
1706; RV64-NEXT:    sub sp, sp, a1
1707; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
1708; RV64-NEXT:    csrr a1, vlenb
1709; RV64-NEXT:    slli a1, a1, 3
1710; RV64-NEXT:    add a1, sp, a1
1711; RV64-NEXT:    addi a1, a1, 16
1712; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
1713; RV64-NEXT:    li a2, 16
1714; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
1715; RV64-NEXT:    vslidedown.vi v24, v0, 2
1716; RV64-NEXT:    mv a1, a0
1717; RV64-NEXT:    bltu a0, a2, .LBB34_2
1718; RV64-NEXT:  # %bb.1:
1719; RV64-NEXT:    li a1, 16
1720; RV64-NEXT:  .LBB34_2:
1721; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1722; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
1723; RV64-NEXT:    lui a1, 349525
1724; RV64-NEXT:    lui a2, 209715
1725; RV64-NEXT:    lui a3, 61681
1726; RV64-NEXT:    lui a4, 4112
1727; RV64-NEXT:    addiw a1, a1, 1365
1728; RV64-NEXT:    addiw a2, a2, 819
1729; RV64-NEXT:    addiw a3, a3, -241
1730; RV64-NEXT:    addiw a4, a4, 257
1731; RV64-NEXT:    slli a5, a1, 32
1732; RV64-NEXT:    add a5, a1, a5
1733; RV64-NEXT:    slli a1, a2, 32
1734; RV64-NEXT:    add a6, a2, a1
1735; RV64-NEXT:    slli a1, a3, 32
1736; RV64-NEXT:    add a1, a3, a1
1737; RV64-NEXT:    slli a2, a4, 32
1738; RV64-NEXT:    add a2, a4, a2
1739; RV64-NEXT:    addi a3, a0, -16
1740; RV64-NEXT:    sltu a0, a0, a3
1741; RV64-NEXT:    addi a0, a0, -1
1742; RV64-NEXT:    and a0, a0, a3
1743; RV64-NEXT:    li a3, 56
1744; RV64-NEXT:    vand.vx v16, v16, a5, v0.t
1745; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
1746; RV64-NEXT:    vand.vx v16, v8, a6, v0.t
1747; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
1748; RV64-NEXT:    vand.vx v8, v8, a6, v0.t
1749; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
1750; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
1751; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
1752; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
1753; RV64-NEXT:    vmul.vx v8, v8, a2, v0.t
1754; RV64-NEXT:    vsrl.vx v8, v8, a3, v0.t
1755; RV64-NEXT:    addi a4, sp, 16
1756; RV64-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
1757; RV64-NEXT:    vmv1r.v v0, v24
1758; RV64-NEXT:    csrr a4, vlenb
1759; RV64-NEXT:    slli a4, a4, 3
1760; RV64-NEXT:    add a4, sp, a4
1761; RV64-NEXT:    addi a4, a4, 16
1762; RV64-NEXT:    vl8r.v v8, (a4) # Unknown-size Folded Reload
1763; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1764; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
1765; RV64-NEXT:    vand.vx v16, v16, a5, v0.t
1766; RV64-NEXT:    vsub.vv v16, v8, v16, v0.t
1767; RV64-NEXT:    vand.vx v8, v16, a6, v0.t
1768; RV64-NEXT:    vsrl.vi v16, v16, 2, v0.t
1769; RV64-NEXT:    vand.vx v16, v16, a6, v0.t
1770; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
1771; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
1772; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
1773; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
1774; RV64-NEXT:    vmul.vx v8, v8, a2, v0.t
1775; RV64-NEXT:    vsrl.vx v16, v8, a3, v0.t
1776; RV64-NEXT:    addi a0, sp, 16
1777; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
1778; RV64-NEXT:    csrr a0, vlenb
1779; RV64-NEXT:    slli a0, a0, 4
1780; RV64-NEXT:    add sp, sp, a0
1781; RV64-NEXT:    .cfi_def_cfa sp, 16
1782; RV64-NEXT:    addi sp, sp, 16
1783; RV64-NEXT:    .cfi_def_cfa_offset 0
1784; RV64-NEXT:    ret
1785  %v = call <32 x i64> @llvm.vp.ctpop.v32i64(<32 x i64> %va, <32 x i1> %m, i32 %evl)
1786  ret <32 x i64> %v
1787}
1788
1789define <32 x i64> @vp_ctpop_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
1790; RV32-LABEL: vp_ctpop_v32i64_unmasked:
1791; RV32:       # %bb.0:
1792; RV32-NEXT:    addi sp, sp, -48
1793; RV32-NEXT:    .cfi_def_cfa_offset 48
1794; RV32-NEXT:    csrr a1, vlenb
1795; RV32-NEXT:    li a2, 24
1796; RV32-NEXT:    mul a1, a1, a2
1797; RV32-NEXT:    sub sp, sp, a1
1798; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb
1799; RV32-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
1800; RV32-NEXT:    vmv8r.v v24, v16
1801; RV32-NEXT:    lui a1, 349525
1802; RV32-NEXT:    lui a2, 209715
1803; RV32-NEXT:    addi a1, a1, 1365
1804; RV32-NEXT:    sw a1, 40(sp)
1805; RV32-NEXT:    sw a1, 44(sp)
1806; RV32-NEXT:    lui a1, 61681
1807; RV32-NEXT:    addi a2, a2, 819
1808; RV32-NEXT:    sw a2, 32(sp)
1809; RV32-NEXT:    sw a2, 36(sp)
1810; RV32-NEXT:    lui a2, 4112
1811; RV32-NEXT:    addi a1, a1, -241
1812; RV32-NEXT:    sw a1, 24(sp)
1813; RV32-NEXT:    sw a1, 28(sp)
1814; RV32-NEXT:    li a3, 16
1815; RV32-NEXT:    addi a1, a2, 257
1816; RV32-NEXT:    sw a1, 16(sp)
1817; RV32-NEXT:    sw a1, 20(sp)
1818; RV32-NEXT:    mv a1, a0
1819; RV32-NEXT:    bltu a0, a3, .LBB35_2
1820; RV32-NEXT:  # %bb.1:
1821; RV32-NEXT:    li a1, 16
1822; RV32-NEXT:  .LBB35_2:
1823; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1824; RV32-NEXT:    vsrl.vi v0, v8, 1
1825; RV32-NEXT:    addi a2, sp, 40
1826; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1827; RV32-NEXT:    vlse64.v v16, (a2), zero
1828; RV32-NEXT:    addi a2, a0, -16
1829; RV32-NEXT:    sltu a0, a0, a2
1830; RV32-NEXT:    addi a0, a0, -1
1831; RV32-NEXT:    and a0, a0, a2
1832; RV32-NEXT:    addi a2, sp, 32
1833; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1834; RV32-NEXT:    vand.vv v0, v0, v16
1835; RV32-NEXT:    csrr a3, vlenb
1836; RV32-NEXT:    slli a3, a3, 4
1837; RV32-NEXT:    add a3, sp, a3
1838; RV32-NEXT:    addi a3, a3, 48
1839; RV32-NEXT:    vs8r.v v0, (a3) # Unknown-size Folded Spill
1840; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1841; RV32-NEXT:    vlse64.v v0, (a2), zero
1842; RV32-NEXT:    csrr a2, vlenb
1843; RV32-NEXT:    slli a2, a2, 3
1844; RV32-NEXT:    add a2, sp, a2
1845; RV32-NEXT:    addi a2, a2, 48
1846; RV32-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
1847; RV32-NEXT:    vmv8r.v v8, v24
1848; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1849; RV32-NEXT:    vsrl.vi v24, v24, 1
1850; RV32-NEXT:    vand.vv v16, v24, v16
1851; RV32-NEXT:    addi a2, sp, 48
1852; RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
1853; RV32-NEXT:    csrr a2, vlenb
1854; RV32-NEXT:    slli a2, a2, 4
1855; RV32-NEXT:    add a2, sp, a2
1856; RV32-NEXT:    addi a2, a2, 48
1857; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
1858; RV32-NEXT:    csrr a2, vlenb
1859; RV32-NEXT:    slli a2, a2, 3
1860; RV32-NEXT:    add a2, sp, a2
1861; RV32-NEXT:    addi a2, a2, 48
1862; RV32-NEXT:    vl8r.v v24, (a2) # Unknown-size Folded Reload
1863; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1864; RV32-NEXT:    vsub.vv v16, v24, v16
1865; RV32-NEXT:    addi a2, sp, 48
1866; RV32-NEXT:    vl8r.v v24, (a2) # Unknown-size Folded Reload
1867; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1868; RV32-NEXT:    vsub.vv v8, v8, v24
1869; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1870; RV32-NEXT:    vand.vv v24, v16, v0
1871; RV32-NEXT:    csrr a2, vlenb
1872; RV32-NEXT:    slli a2, a2, 3
1873; RV32-NEXT:    add a2, sp, a2
1874; RV32-NEXT:    addi a2, a2, 48
1875; RV32-NEXT:    vs8r.v v24, (a2) # Unknown-size Folded Spill
1876; RV32-NEXT:    vsrl.vi v16, v16, 2
1877; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1878; RV32-NEXT:    vand.vv v24, v8, v0
1879; RV32-NEXT:    csrr a2, vlenb
1880; RV32-NEXT:    slli a2, a2, 4
1881; RV32-NEXT:    add a2, sp, a2
1882; RV32-NEXT:    addi a2, a2, 48
1883; RV32-NEXT:    vs8r.v v24, (a2) # Unknown-size Folded Spill
1884; RV32-NEXT:    vsrl.vi v8, v8, 2
1885; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1886; RV32-NEXT:    vand.vv v16, v16, v0
1887; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1888; RV32-NEXT:    vand.vv v0, v8, v0
1889; RV32-NEXT:    addi a2, sp, 24
1890; RV32-NEXT:    csrr a3, vlenb
1891; RV32-NEXT:    slli a3, a3, 3
1892; RV32-NEXT:    add a3, sp, a3
1893; RV32-NEXT:    addi a3, a3, 48
1894; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
1895; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1896; RV32-NEXT:    vadd.vv v16, v8, v16
1897; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1898; RV32-NEXT:    vlse64.v v8, (a2), zero
1899; RV32-NEXT:    addi a2, sp, 16
1900; RV32-NEXT:    csrr a3, vlenb
1901; RV32-NEXT:    slli a3, a3, 4
1902; RV32-NEXT:    add a3, sp, a3
1903; RV32-NEXT:    addi a3, a3, 48
1904; RV32-NEXT:    vl8r.v v24, (a3) # Unknown-size Folded Reload
1905; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1906; RV32-NEXT:    vadd.vv v24, v24, v0
1907; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1908; RV32-NEXT:    vsrl.vi v0, v16, 4
1909; RV32-NEXT:    vadd.vv v16, v16, v0
1910; RV32-NEXT:    csrr a3, vlenb
1911; RV32-NEXT:    slli a3, a3, 4
1912; RV32-NEXT:    add a3, sp, a3
1913; RV32-NEXT:    addi a3, a3, 48
1914; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
1915; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1916; RV32-NEXT:    vlse64.v v0, (a2), zero
1917; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1918; RV32-NEXT:    vsrl.vi v16, v24, 4
1919; RV32-NEXT:    vadd.vv v16, v24, v16
1920; RV32-NEXT:    csrr a2, vlenb
1921; RV32-NEXT:    slli a2, a2, 4
1922; RV32-NEXT:    add a2, sp, a2
1923; RV32-NEXT:    addi a2, a2, 48
1924; RV32-NEXT:    vl8r.v v24, (a2) # Unknown-size Folded Reload
1925; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1926; RV32-NEXT:    vand.vv v24, v24, v8
1927; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1928; RV32-NEXT:    vand.vv v8, v16, v8
1929; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1930; RV32-NEXT:    vmul.vv v16, v24, v0
1931; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1932; RV32-NEXT:    vmul.vv v24, v8, v0
1933; RV32-NEXT:    li a2, 56
1934; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1935; RV32-NEXT:    vsrl.vx v8, v16, a2
1936; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1937; RV32-NEXT:    vsrl.vx v16, v24, a2
1938; RV32-NEXT:    csrr a0, vlenb
1939; RV32-NEXT:    li a1, 24
1940; RV32-NEXT:    mul a0, a0, a1
1941; RV32-NEXT:    add sp, sp, a0
1942; RV32-NEXT:    .cfi_def_cfa sp, 48
1943; RV32-NEXT:    addi sp, sp, 48
1944; RV32-NEXT:    .cfi_def_cfa_offset 0
1945; RV32-NEXT:    ret
1946;
1947; RV64-LABEL: vp_ctpop_v32i64_unmasked:
1948; RV64:       # %bb.0:
1949; RV64-NEXT:    li a2, 16
1950; RV64-NEXT:    mv a1, a0
1951; RV64-NEXT:    bltu a0, a2, .LBB35_2
1952; RV64-NEXT:  # %bb.1:
1953; RV64-NEXT:    li a1, 16
1954; RV64-NEXT:  .LBB35_2:
1955; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1956; RV64-NEXT:    vsrl.vi v24, v8, 1
1957; RV64-NEXT:    lui a2, 349525
1958; RV64-NEXT:    lui a3, 209715
1959; RV64-NEXT:    lui a4, 61681
1960; RV64-NEXT:    lui a5, 4112
1961; RV64-NEXT:    addiw a2, a2, 1365
1962; RV64-NEXT:    addiw a3, a3, 819
1963; RV64-NEXT:    addiw a4, a4, -241
1964; RV64-NEXT:    addiw a5, a5, 257
1965; RV64-NEXT:    slli a6, a2, 32
1966; RV64-NEXT:    add a2, a2, a6
1967; RV64-NEXT:    slli a6, a3, 32
1968; RV64-NEXT:    add a3, a3, a6
1969; RV64-NEXT:    slli a6, a4, 32
1970; RV64-NEXT:    add a4, a4, a6
1971; RV64-NEXT:    slli a6, a5, 32
1972; RV64-NEXT:    add a5, a5, a6
1973; RV64-NEXT:    addi a6, a0, -16
1974; RV64-NEXT:    sltu a0, a0, a6
1975; RV64-NEXT:    addi a0, a0, -1
1976; RV64-NEXT:    and a0, a0, a6
1977; RV64-NEXT:    li a6, 56
1978; RV64-NEXT:    vand.vx v24, v24, a2
1979; RV64-NEXT:    vsub.vv v8, v8, v24
1980; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1981; RV64-NEXT:    vsrl.vi v24, v16, 1
1982; RV64-NEXT:    vand.vx v24, v24, a2
1983; RV64-NEXT:    vsub.vv v16, v16, v24
1984; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1985; RV64-NEXT:    vand.vx v24, v8, a3
1986; RV64-NEXT:    vsrl.vi v8, v8, 2
1987; RV64-NEXT:    vand.vx v8, v8, a3
1988; RV64-NEXT:    vadd.vv v8, v24, v8
1989; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1990; RV64-NEXT:    vand.vx v24, v16, a3
1991; RV64-NEXT:    vsrl.vi v16, v16, 2
1992; RV64-NEXT:    vand.vx v16, v16, a3
1993; RV64-NEXT:    vadd.vv v16, v24, v16
1994; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1995; RV64-NEXT:    vsrl.vi v24, v8, 4
1996; RV64-NEXT:    vadd.vv v8, v8, v24
1997; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1998; RV64-NEXT:    vsrl.vi v24, v16, 4
1999; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2000; RV64-NEXT:    vand.vx v8, v8, a4
2001; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2002; RV64-NEXT:    vadd.vv v16, v16, v24
2003; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2004; RV64-NEXT:    vmul.vx v8, v8, a5
2005; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2006; RV64-NEXT:    vand.vx v16, v16, a4
2007; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2008; RV64-NEXT:    vsrl.vx v8, v8, a6
2009; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2010; RV64-NEXT:    vmul.vx v16, v16, a5
2011; RV64-NEXT:    vsrl.vx v16, v16, a6
2012; RV64-NEXT:    ret
2013  %v = call <32 x i64> @llvm.vp.ctpop.v32i64(<32 x i64> %va, <32 x i1> splat (i1 true), i32 %evl)
2014  ret <32 x i64> %v
2015}
2016