xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \
3; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
4; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \
5; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
6
7declare <2 x i8> @llvm.vp.ctlz.v2i8(<2 x i8>, i1 immarg, <2 x i1>, i32)
8
9define <2 x i8> @vp_ctlz_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) {
10; CHECK-LABEL: vp_ctlz_v2i8:
11; CHECK:       # %bb.0:
12; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
13; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
14; CHECK-NEXT:    li a0, 85
15; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
16; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
17; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
18; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
19; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
20; CHECK-NEXT:    vnot.v v8, v8, v0.t
21; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
22; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
23; CHECK-NEXT:    li a0, 51
24; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
25; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
26; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
27; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
28; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
29; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
30; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
31; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
32; CHECK-NEXT:    ret
33  %v = call <2 x i8> @llvm.vp.ctlz.v2i8(<2 x i8> %va, i1 false, <2 x i1> %m, i32 %evl)
34  ret <2 x i8> %v
35}
36
37define <2 x i8> @vp_ctlz_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) {
38; CHECK-LABEL: vp_ctlz_v2i8_unmasked:
39; CHECK:       # %bb.0:
40; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
41; CHECK-NEXT:    vsrl.vi v9, v8, 1
42; CHECK-NEXT:    li a0, 85
43; CHECK-NEXT:    vor.vv v8, v8, v9
44; CHECK-NEXT:    vsrl.vi v9, v8, 2
45; CHECK-NEXT:    vor.vv v8, v8, v9
46; CHECK-NEXT:    vsrl.vi v9, v8, 4
47; CHECK-NEXT:    vor.vv v8, v8, v9
48; CHECK-NEXT:    vnot.v v8, v8
49; CHECK-NEXT:    vsrl.vi v9, v8, 1
50; CHECK-NEXT:    vand.vx v9, v9, a0
51; CHECK-NEXT:    li a0, 51
52; CHECK-NEXT:    vsub.vv v8, v8, v9
53; CHECK-NEXT:    vand.vx v9, v8, a0
54; CHECK-NEXT:    vsrl.vi v8, v8, 2
55; CHECK-NEXT:    vand.vx v8, v8, a0
56; CHECK-NEXT:    vadd.vv v8, v9, v8
57; CHECK-NEXT:    vsrl.vi v9, v8, 4
58; CHECK-NEXT:    vadd.vv v8, v8, v9
59; CHECK-NEXT:    vand.vi v8, v8, 15
60; CHECK-NEXT:    ret
61  %v = call <2 x i8> @llvm.vp.ctlz.v2i8(<2 x i8> %va, i1 false, <2 x i1> splat (i1 true), i32 %evl)
62  ret <2 x i8> %v
63}
64
65declare <4 x i8> @llvm.vp.ctlz.v4i8(<4 x i8>, i1 immarg, <4 x i1>, i32)
66
67define <4 x i8> @vp_ctlz_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) {
68; CHECK-LABEL: vp_ctlz_v4i8:
69; CHECK:       # %bb.0:
70; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
71; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
72; CHECK-NEXT:    li a0, 85
73; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
74; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
75; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
76; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
77; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
78; CHECK-NEXT:    vnot.v v8, v8, v0.t
79; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
80; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
81; CHECK-NEXT:    li a0, 51
82; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
83; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
84; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
85; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
86; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
87; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
88; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
89; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
90; CHECK-NEXT:    ret
91  %v = call <4 x i8> @llvm.vp.ctlz.v4i8(<4 x i8> %va, i1 false, <4 x i1> %m, i32 %evl)
92  ret <4 x i8> %v
93}
94
95define <4 x i8> @vp_ctlz_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) {
96; CHECK-LABEL: vp_ctlz_v4i8_unmasked:
97; CHECK:       # %bb.0:
98; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
99; CHECK-NEXT:    vsrl.vi v9, v8, 1
100; CHECK-NEXT:    li a0, 85
101; CHECK-NEXT:    vor.vv v8, v8, v9
102; CHECK-NEXT:    vsrl.vi v9, v8, 2
103; CHECK-NEXT:    vor.vv v8, v8, v9
104; CHECK-NEXT:    vsrl.vi v9, v8, 4
105; CHECK-NEXT:    vor.vv v8, v8, v9
106; CHECK-NEXT:    vnot.v v8, v8
107; CHECK-NEXT:    vsrl.vi v9, v8, 1
108; CHECK-NEXT:    vand.vx v9, v9, a0
109; CHECK-NEXT:    li a0, 51
110; CHECK-NEXT:    vsub.vv v8, v8, v9
111; CHECK-NEXT:    vand.vx v9, v8, a0
112; CHECK-NEXT:    vsrl.vi v8, v8, 2
113; CHECK-NEXT:    vand.vx v8, v8, a0
114; CHECK-NEXT:    vadd.vv v8, v9, v8
115; CHECK-NEXT:    vsrl.vi v9, v8, 4
116; CHECK-NEXT:    vadd.vv v8, v8, v9
117; CHECK-NEXT:    vand.vi v8, v8, 15
118; CHECK-NEXT:    ret
119  %v = call <4 x i8> @llvm.vp.ctlz.v4i8(<4 x i8> %va, i1 false, <4 x i1> splat (i1 true), i32 %evl)
120  ret <4 x i8> %v
121}
122
123declare <8 x i8> @llvm.vp.ctlz.v8i8(<8 x i8>, i1 immarg, <8 x i1>, i32)
124
125define <8 x i8> @vp_ctlz_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) {
126; CHECK-LABEL: vp_ctlz_v8i8:
127; CHECK:       # %bb.0:
128; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
129; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
130; CHECK-NEXT:    li a0, 85
131; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
132; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
133; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
134; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
135; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
136; CHECK-NEXT:    vnot.v v8, v8, v0.t
137; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
138; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
139; CHECK-NEXT:    li a0, 51
140; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
141; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
142; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
143; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
144; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
145; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
146; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
147; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
148; CHECK-NEXT:    ret
149  %v = call <8 x i8> @llvm.vp.ctlz.v8i8(<8 x i8> %va, i1 false, <8 x i1> %m, i32 %evl)
150  ret <8 x i8> %v
151}
152
153define <8 x i8> @vp_ctlz_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) {
154; CHECK-LABEL: vp_ctlz_v8i8_unmasked:
155; CHECK:       # %bb.0:
156; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
157; CHECK-NEXT:    vsrl.vi v9, v8, 1
158; CHECK-NEXT:    li a0, 85
159; CHECK-NEXT:    vor.vv v8, v8, v9
160; CHECK-NEXT:    vsrl.vi v9, v8, 2
161; CHECK-NEXT:    vor.vv v8, v8, v9
162; CHECK-NEXT:    vsrl.vi v9, v8, 4
163; CHECK-NEXT:    vor.vv v8, v8, v9
164; CHECK-NEXT:    vnot.v v8, v8
165; CHECK-NEXT:    vsrl.vi v9, v8, 1
166; CHECK-NEXT:    vand.vx v9, v9, a0
167; CHECK-NEXT:    li a0, 51
168; CHECK-NEXT:    vsub.vv v8, v8, v9
169; CHECK-NEXT:    vand.vx v9, v8, a0
170; CHECK-NEXT:    vsrl.vi v8, v8, 2
171; CHECK-NEXT:    vand.vx v8, v8, a0
172; CHECK-NEXT:    vadd.vv v8, v9, v8
173; CHECK-NEXT:    vsrl.vi v9, v8, 4
174; CHECK-NEXT:    vadd.vv v8, v8, v9
175; CHECK-NEXT:    vand.vi v8, v8, 15
176; CHECK-NEXT:    ret
177  %v = call <8 x i8> @llvm.vp.ctlz.v8i8(<8 x i8> %va, i1 false, <8 x i1> splat (i1 true), i32 %evl)
178  ret <8 x i8> %v
179}
180
181declare <16 x i8> @llvm.vp.ctlz.v16i8(<16 x i8>, i1 immarg, <16 x i1>, i32)
182
183define <16 x i8> @vp_ctlz_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) {
184; CHECK-LABEL: vp_ctlz_v16i8:
185; CHECK:       # %bb.0:
186; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
187; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
188; CHECK-NEXT:    li a0, 85
189; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
190; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
191; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
192; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
193; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
194; CHECK-NEXT:    vnot.v v8, v8, v0.t
195; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
196; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
197; CHECK-NEXT:    li a0, 51
198; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
199; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
200; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
201; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
202; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
203; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
204; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
205; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
206; CHECK-NEXT:    ret
207  %v = call <16 x i8> @llvm.vp.ctlz.v16i8(<16 x i8> %va, i1 false, <16 x i1> %m, i32 %evl)
208  ret <16 x i8> %v
209}
210
211define <16 x i8> @vp_ctlz_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) {
212; CHECK-LABEL: vp_ctlz_v16i8_unmasked:
213; CHECK:       # %bb.0:
214; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
215; CHECK-NEXT:    vsrl.vi v9, v8, 1
216; CHECK-NEXT:    li a0, 85
217; CHECK-NEXT:    vor.vv v8, v8, v9
218; CHECK-NEXT:    vsrl.vi v9, v8, 2
219; CHECK-NEXT:    vor.vv v8, v8, v9
220; CHECK-NEXT:    vsrl.vi v9, v8, 4
221; CHECK-NEXT:    vor.vv v8, v8, v9
222; CHECK-NEXT:    vnot.v v8, v8
223; CHECK-NEXT:    vsrl.vi v9, v8, 1
224; CHECK-NEXT:    vand.vx v9, v9, a0
225; CHECK-NEXT:    li a0, 51
226; CHECK-NEXT:    vsub.vv v8, v8, v9
227; CHECK-NEXT:    vand.vx v9, v8, a0
228; CHECK-NEXT:    vsrl.vi v8, v8, 2
229; CHECK-NEXT:    vand.vx v8, v8, a0
230; CHECK-NEXT:    vadd.vv v8, v9, v8
231; CHECK-NEXT:    vsrl.vi v9, v8, 4
232; CHECK-NEXT:    vadd.vv v8, v8, v9
233; CHECK-NEXT:    vand.vi v8, v8, 15
234; CHECK-NEXT:    ret
235  %v = call <16 x i8> @llvm.vp.ctlz.v16i8(<16 x i8> %va, i1 false, <16 x i1> splat (i1 true), i32 %evl)
236  ret <16 x i8> %v
237}
238
239declare <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16>, i1 immarg, <2 x i1>, i32)
240
241define <2 x i16> @vp_ctlz_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) {
242; CHECK-LABEL: vp_ctlz_v2i16:
243; CHECK:       # %bb.0:
244; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
245; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
246; CHECK-NEXT:    lui a0, 5
247; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
248; CHECK-NEXT:    addi a0, a0, 1365
249; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
250; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
251; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
252; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
253; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
254; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
255; CHECK-NEXT:    vnot.v v8, v8, v0.t
256; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
257; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
258; CHECK-NEXT:    lui a0, 3
259; CHECK-NEXT:    addi a0, a0, 819
260; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
261; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
262; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
263; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
264; CHECK-NEXT:    lui a0, 1
265; CHECK-NEXT:    addi a0, a0, -241
266; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
267; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
268; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
269; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
270; CHECK-NEXT:    li a0, 257
271; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
272; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
273; CHECK-NEXT:    ret
274  %v = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> %va, i1 false, <2 x i1> %m, i32 %evl)
275  ret <2 x i16> %v
276}
277
278define <2 x i16> @vp_ctlz_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
279; CHECK-LABEL: vp_ctlz_v2i16_unmasked:
280; CHECK:       # %bb.0:
281; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
282; CHECK-NEXT:    vsrl.vi v9, v8, 1
283; CHECK-NEXT:    lui a0, 5
284; CHECK-NEXT:    vor.vv v8, v8, v9
285; CHECK-NEXT:    addi a0, a0, 1365
286; CHECK-NEXT:    vsrl.vi v9, v8, 2
287; CHECK-NEXT:    vor.vv v8, v8, v9
288; CHECK-NEXT:    vsrl.vi v9, v8, 4
289; CHECK-NEXT:    vor.vv v8, v8, v9
290; CHECK-NEXT:    vsrl.vi v9, v8, 8
291; CHECK-NEXT:    vor.vv v8, v8, v9
292; CHECK-NEXT:    vnot.v v8, v8
293; CHECK-NEXT:    vsrl.vi v9, v8, 1
294; CHECK-NEXT:    vand.vx v9, v9, a0
295; CHECK-NEXT:    lui a0, 3
296; CHECK-NEXT:    addi a0, a0, 819
297; CHECK-NEXT:    vsub.vv v8, v8, v9
298; CHECK-NEXT:    vand.vx v9, v8, a0
299; CHECK-NEXT:    vsrl.vi v8, v8, 2
300; CHECK-NEXT:    vand.vx v8, v8, a0
301; CHECK-NEXT:    lui a0, 1
302; CHECK-NEXT:    addi a0, a0, -241
303; CHECK-NEXT:    vadd.vv v8, v9, v8
304; CHECK-NEXT:    vsrl.vi v9, v8, 4
305; CHECK-NEXT:    vadd.vv v8, v8, v9
306; CHECK-NEXT:    vand.vx v8, v8, a0
307; CHECK-NEXT:    li a0, 257
308; CHECK-NEXT:    vmul.vx v8, v8, a0
309; CHECK-NEXT:    vsrl.vi v8, v8, 8
310; CHECK-NEXT:    ret
311  %v = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> %va, i1 false, <2 x i1> splat (i1 true), i32 %evl)
312  ret <2 x i16> %v
313}
314
315declare <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16>, i1 immarg, <4 x i1>, i32)
316
317define <4 x i16> @vp_ctlz_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) {
318; CHECK-LABEL: vp_ctlz_v4i16:
319; CHECK:       # %bb.0:
320; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
321; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
322; CHECK-NEXT:    lui a0, 5
323; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
324; CHECK-NEXT:    addi a0, a0, 1365
325; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
326; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
327; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
328; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
329; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
330; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
331; CHECK-NEXT:    vnot.v v8, v8, v0.t
332; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
333; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
334; CHECK-NEXT:    lui a0, 3
335; CHECK-NEXT:    addi a0, a0, 819
336; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
337; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
338; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
339; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
340; CHECK-NEXT:    lui a0, 1
341; CHECK-NEXT:    addi a0, a0, -241
342; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
343; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
344; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
345; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
346; CHECK-NEXT:    li a0, 257
347; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
348; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
349; CHECK-NEXT:    ret
350  %v = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> %va, i1 false, <4 x i1> %m, i32 %evl)
351  ret <4 x i16> %v
352}
353
354define <4 x i16> @vp_ctlz_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
355; CHECK-LABEL: vp_ctlz_v4i16_unmasked:
356; CHECK:       # %bb.0:
357; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
358; CHECK-NEXT:    vsrl.vi v9, v8, 1
359; CHECK-NEXT:    lui a0, 5
360; CHECK-NEXT:    vor.vv v8, v8, v9
361; CHECK-NEXT:    addi a0, a0, 1365
362; CHECK-NEXT:    vsrl.vi v9, v8, 2
363; CHECK-NEXT:    vor.vv v8, v8, v9
364; CHECK-NEXT:    vsrl.vi v9, v8, 4
365; CHECK-NEXT:    vor.vv v8, v8, v9
366; CHECK-NEXT:    vsrl.vi v9, v8, 8
367; CHECK-NEXT:    vor.vv v8, v8, v9
368; CHECK-NEXT:    vnot.v v8, v8
369; CHECK-NEXT:    vsrl.vi v9, v8, 1
370; CHECK-NEXT:    vand.vx v9, v9, a0
371; CHECK-NEXT:    lui a0, 3
372; CHECK-NEXT:    addi a0, a0, 819
373; CHECK-NEXT:    vsub.vv v8, v8, v9
374; CHECK-NEXT:    vand.vx v9, v8, a0
375; CHECK-NEXT:    vsrl.vi v8, v8, 2
376; CHECK-NEXT:    vand.vx v8, v8, a0
377; CHECK-NEXT:    lui a0, 1
378; CHECK-NEXT:    addi a0, a0, -241
379; CHECK-NEXT:    vadd.vv v8, v9, v8
380; CHECK-NEXT:    vsrl.vi v9, v8, 4
381; CHECK-NEXT:    vadd.vv v8, v8, v9
382; CHECK-NEXT:    vand.vx v8, v8, a0
383; CHECK-NEXT:    li a0, 257
384; CHECK-NEXT:    vmul.vx v8, v8, a0
385; CHECK-NEXT:    vsrl.vi v8, v8, 8
386; CHECK-NEXT:    ret
387  %v = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> %va, i1 false, <4 x i1> splat (i1 true), i32 %evl)
388  ret <4 x i16> %v
389}
390
391declare <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16>, i1 immarg, <8 x i1>, i32)
392
393define <8 x i16> @vp_ctlz_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) {
394; CHECK-LABEL: vp_ctlz_v8i16:
395; CHECK:       # %bb.0:
396; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
397; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
398; CHECK-NEXT:    lui a0, 5
399; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
400; CHECK-NEXT:    addi a0, a0, 1365
401; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
402; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
403; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
404; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
405; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
406; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
407; CHECK-NEXT:    vnot.v v8, v8, v0.t
408; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
409; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
410; CHECK-NEXT:    lui a0, 3
411; CHECK-NEXT:    addi a0, a0, 819
412; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
413; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
414; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
415; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
416; CHECK-NEXT:    lui a0, 1
417; CHECK-NEXT:    addi a0, a0, -241
418; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
419; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
420; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
421; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
422; CHECK-NEXT:    li a0, 257
423; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
424; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
425; CHECK-NEXT:    ret
426  %v = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> %va, i1 false, <8 x i1> %m, i32 %evl)
427  ret <8 x i16> %v
428}
429
430define <8 x i16> @vp_ctlz_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
431; CHECK-LABEL: vp_ctlz_v8i16_unmasked:
432; CHECK:       # %bb.0:
433; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
434; CHECK-NEXT:    vsrl.vi v9, v8, 1
435; CHECK-NEXT:    lui a0, 5
436; CHECK-NEXT:    vor.vv v8, v8, v9
437; CHECK-NEXT:    addi a0, a0, 1365
438; CHECK-NEXT:    vsrl.vi v9, v8, 2
439; CHECK-NEXT:    vor.vv v8, v8, v9
440; CHECK-NEXT:    vsrl.vi v9, v8, 4
441; CHECK-NEXT:    vor.vv v8, v8, v9
442; CHECK-NEXT:    vsrl.vi v9, v8, 8
443; CHECK-NEXT:    vor.vv v8, v8, v9
444; CHECK-NEXT:    vnot.v v8, v8
445; CHECK-NEXT:    vsrl.vi v9, v8, 1
446; CHECK-NEXT:    vand.vx v9, v9, a0
447; CHECK-NEXT:    lui a0, 3
448; CHECK-NEXT:    addi a0, a0, 819
449; CHECK-NEXT:    vsub.vv v8, v8, v9
450; CHECK-NEXT:    vand.vx v9, v8, a0
451; CHECK-NEXT:    vsrl.vi v8, v8, 2
452; CHECK-NEXT:    vand.vx v8, v8, a0
453; CHECK-NEXT:    lui a0, 1
454; CHECK-NEXT:    addi a0, a0, -241
455; CHECK-NEXT:    vadd.vv v8, v9, v8
456; CHECK-NEXT:    vsrl.vi v9, v8, 4
457; CHECK-NEXT:    vadd.vv v8, v8, v9
458; CHECK-NEXT:    vand.vx v8, v8, a0
459; CHECK-NEXT:    li a0, 257
460; CHECK-NEXT:    vmul.vx v8, v8, a0
461; CHECK-NEXT:    vsrl.vi v8, v8, 8
462; CHECK-NEXT:    ret
463  %v = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> %va, i1 false, <8 x i1> splat (i1 true), i32 %evl)
464  ret <8 x i16> %v
465}
466
467declare <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16>, i1 immarg, <16 x i1>, i32)
468
469define <16 x i16> @vp_ctlz_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) {
470; CHECK-LABEL: vp_ctlz_v16i16:
471; CHECK:       # %bb.0:
472; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
473; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
474; CHECK-NEXT:    lui a0, 5
475; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
476; CHECK-NEXT:    addi a0, a0, 1365
477; CHECK-NEXT:    vsrl.vi v10, v8, 2, v0.t
478; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
479; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
480; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
481; CHECK-NEXT:    vsrl.vi v10, v8, 8, v0.t
482; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
483; CHECK-NEXT:    vnot.v v8, v8, v0.t
484; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
485; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
486; CHECK-NEXT:    lui a0, 3
487; CHECK-NEXT:    addi a0, a0, 819
488; CHECK-NEXT:    vsub.vv v8, v8, v10, v0.t
489; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
490; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
491; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
492; CHECK-NEXT:    lui a0, 1
493; CHECK-NEXT:    addi a0, a0, -241
494; CHECK-NEXT:    vadd.vv v8, v10, v8, v0.t
495; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
496; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
497; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
498; CHECK-NEXT:    li a0, 257
499; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
500; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
501; CHECK-NEXT:    ret
502  %v = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> %va, i1 false, <16 x i1> %m, i32 %evl)
503  ret <16 x i16> %v
504}
505
506define <16 x i16> @vp_ctlz_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
507; CHECK-LABEL: vp_ctlz_v16i16_unmasked:
508; CHECK:       # %bb.0:
509; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
510; CHECK-NEXT:    vsrl.vi v10, v8, 1
511; CHECK-NEXT:    lui a0, 5
512; CHECK-NEXT:    vor.vv v8, v8, v10
513; CHECK-NEXT:    addi a0, a0, 1365
514; CHECK-NEXT:    vsrl.vi v10, v8, 2
515; CHECK-NEXT:    vor.vv v8, v8, v10
516; CHECK-NEXT:    vsrl.vi v10, v8, 4
517; CHECK-NEXT:    vor.vv v8, v8, v10
518; CHECK-NEXT:    vsrl.vi v10, v8, 8
519; CHECK-NEXT:    vor.vv v8, v8, v10
520; CHECK-NEXT:    vnot.v v8, v8
521; CHECK-NEXT:    vsrl.vi v10, v8, 1
522; CHECK-NEXT:    vand.vx v10, v10, a0
523; CHECK-NEXT:    lui a0, 3
524; CHECK-NEXT:    addi a0, a0, 819
525; CHECK-NEXT:    vsub.vv v8, v8, v10
526; CHECK-NEXT:    vand.vx v10, v8, a0
527; CHECK-NEXT:    vsrl.vi v8, v8, 2
528; CHECK-NEXT:    vand.vx v8, v8, a0
529; CHECK-NEXT:    lui a0, 1
530; CHECK-NEXT:    addi a0, a0, -241
531; CHECK-NEXT:    vadd.vv v8, v10, v8
532; CHECK-NEXT:    vsrl.vi v10, v8, 4
533; CHECK-NEXT:    vadd.vv v8, v8, v10
534; CHECK-NEXT:    vand.vx v8, v8, a0
535; CHECK-NEXT:    li a0, 257
536; CHECK-NEXT:    vmul.vx v8, v8, a0
537; CHECK-NEXT:    vsrl.vi v8, v8, 8
538; CHECK-NEXT:    ret
539  %v = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> %va, i1 false, <16 x i1> splat (i1 true), i32 %evl)
540  ret <16 x i16> %v
541}
542
543declare <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32>, i1 immarg, <2 x i1>, i32)
544
545define <2 x i32> @vp_ctlz_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
546; CHECK-LABEL: vp_ctlz_v2i32:
547; CHECK:       # %bb.0:
548; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
549; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
550; CHECK-NEXT:    lui a0, 349525
551; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
552; CHECK-NEXT:    addi a0, a0, 1365
553; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
554; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
555; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
556; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
557; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
558; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
559; CHECK-NEXT:    vsrl.vi v9, v8, 16, v0.t
560; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
561; CHECK-NEXT:    vnot.v v8, v8, v0.t
562; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
563; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
564; CHECK-NEXT:    lui a0, 209715
565; CHECK-NEXT:    addi a0, a0, 819
566; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
567; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
568; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
569; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
570; CHECK-NEXT:    lui a0, 61681
571; CHECK-NEXT:    addi a0, a0, -241
572; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
573; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
574; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
575; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
576; CHECK-NEXT:    lui a0, 4112
577; CHECK-NEXT:    addi a0, a0, 257
578; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
579; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
580; CHECK-NEXT:    ret
581  %v = call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> %va, i1 false, <2 x i1> %m, i32 %evl)
582  ret <2 x i32> %v
583}
584
585define <2 x i32> @vp_ctlz_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
586; CHECK-LABEL: vp_ctlz_v2i32_unmasked:
587; CHECK:       # %bb.0:
588; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
589; CHECK-NEXT:    vsrl.vi v9, v8, 1
590; CHECK-NEXT:    lui a0, 349525
591; CHECK-NEXT:    vor.vv v8, v8, v9
592; CHECK-NEXT:    addi a0, a0, 1365
593; CHECK-NEXT:    vsrl.vi v9, v8, 2
594; CHECK-NEXT:    vor.vv v8, v8, v9
595; CHECK-NEXT:    vsrl.vi v9, v8, 4
596; CHECK-NEXT:    vor.vv v8, v8, v9
597; CHECK-NEXT:    vsrl.vi v9, v8, 8
598; CHECK-NEXT:    vor.vv v8, v8, v9
599; CHECK-NEXT:    vsrl.vi v9, v8, 16
600; CHECK-NEXT:    vor.vv v8, v8, v9
601; CHECK-NEXT:    vnot.v v8, v8
602; CHECK-NEXT:    vsrl.vi v9, v8, 1
603; CHECK-NEXT:    vand.vx v9, v9, a0
604; CHECK-NEXT:    lui a0, 209715
605; CHECK-NEXT:    addi a0, a0, 819
606; CHECK-NEXT:    vsub.vv v8, v8, v9
607; CHECK-NEXT:    vand.vx v9, v8, a0
608; CHECK-NEXT:    vsrl.vi v8, v8, 2
609; CHECK-NEXT:    vand.vx v8, v8, a0
610; CHECK-NEXT:    lui a0, 61681
611; CHECK-NEXT:    addi a0, a0, -241
612; CHECK-NEXT:    vadd.vv v8, v9, v8
613; CHECK-NEXT:    vsrl.vi v9, v8, 4
614; CHECK-NEXT:    vadd.vv v8, v8, v9
615; CHECK-NEXT:    vand.vx v8, v8, a0
616; CHECK-NEXT:    lui a0, 4112
617; CHECK-NEXT:    addi a0, a0, 257
618; CHECK-NEXT:    vmul.vx v8, v8, a0
619; CHECK-NEXT:    vsrl.vi v8, v8, 24
620; CHECK-NEXT:    ret
621  %v = call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> %va, i1 false, <2 x i1> splat (i1 true), i32 %evl)
622  ret <2 x i32> %v
623}
624
625declare <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32>, i1 immarg, <4 x i1>, i32)
626
627define <4 x i32> @vp_ctlz_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
628; CHECK-LABEL: vp_ctlz_v4i32:
629; CHECK:       # %bb.0:
630; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
631; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
632; CHECK-NEXT:    lui a0, 349525
633; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
634; CHECK-NEXT:    addi a0, a0, 1365
635; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
636; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
637; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
638; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
639; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
640; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
641; CHECK-NEXT:    vsrl.vi v9, v8, 16, v0.t
642; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
643; CHECK-NEXT:    vnot.v v8, v8, v0.t
644; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
645; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
646; CHECK-NEXT:    lui a0, 209715
647; CHECK-NEXT:    addi a0, a0, 819
648; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
649; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
650; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
651; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
652; CHECK-NEXT:    lui a0, 61681
653; CHECK-NEXT:    addi a0, a0, -241
654; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
655; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
656; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
657; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
658; CHECK-NEXT:    lui a0, 4112
659; CHECK-NEXT:    addi a0, a0, 257
660; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
661; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
662; CHECK-NEXT:    ret
663  %v = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %va, i1 false, <4 x i1> %m, i32 %evl)
664  ret <4 x i32> %v
665}
666
667define <4 x i32> @vp_ctlz_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
668; CHECK-LABEL: vp_ctlz_v4i32_unmasked:
669; CHECK:       # %bb.0:
670; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
671; CHECK-NEXT:    vsrl.vi v9, v8, 1
672; CHECK-NEXT:    lui a0, 349525
673; CHECK-NEXT:    vor.vv v8, v8, v9
674; CHECK-NEXT:    addi a0, a0, 1365
675; CHECK-NEXT:    vsrl.vi v9, v8, 2
676; CHECK-NEXT:    vor.vv v8, v8, v9
677; CHECK-NEXT:    vsrl.vi v9, v8, 4
678; CHECK-NEXT:    vor.vv v8, v8, v9
679; CHECK-NEXT:    vsrl.vi v9, v8, 8
680; CHECK-NEXT:    vor.vv v8, v8, v9
681; CHECK-NEXT:    vsrl.vi v9, v8, 16
682; CHECK-NEXT:    vor.vv v8, v8, v9
683; CHECK-NEXT:    vnot.v v8, v8
684; CHECK-NEXT:    vsrl.vi v9, v8, 1
685; CHECK-NEXT:    vand.vx v9, v9, a0
686; CHECK-NEXT:    lui a0, 209715
687; CHECK-NEXT:    addi a0, a0, 819
688; CHECK-NEXT:    vsub.vv v8, v8, v9
689; CHECK-NEXT:    vand.vx v9, v8, a0
690; CHECK-NEXT:    vsrl.vi v8, v8, 2
691; CHECK-NEXT:    vand.vx v8, v8, a0
692; CHECK-NEXT:    lui a0, 61681
693; CHECK-NEXT:    addi a0, a0, -241
694; CHECK-NEXT:    vadd.vv v8, v9, v8
695; CHECK-NEXT:    vsrl.vi v9, v8, 4
696; CHECK-NEXT:    vadd.vv v8, v8, v9
697; CHECK-NEXT:    vand.vx v8, v8, a0
698; CHECK-NEXT:    lui a0, 4112
699; CHECK-NEXT:    addi a0, a0, 257
700; CHECK-NEXT:    vmul.vx v8, v8, a0
701; CHECK-NEXT:    vsrl.vi v8, v8, 24
702; CHECK-NEXT:    ret
703  %v = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %va, i1 false, <4 x i1> splat (i1 true), i32 %evl)
704  ret <4 x i32> %v
705}
706
707declare <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32>, i1 immarg, <8 x i1>, i32)
708
709define <8 x i32> @vp_ctlz_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
710; CHECK-LABEL: vp_ctlz_v8i32:
711; CHECK:       # %bb.0:
712; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
713; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
714; CHECK-NEXT:    lui a0, 349525
715; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
716; CHECK-NEXT:    addi a0, a0, 1365
717; CHECK-NEXT:    vsrl.vi v10, v8, 2, v0.t
718; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
719; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
720; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
721; CHECK-NEXT:    vsrl.vi v10, v8, 8, v0.t
722; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
723; CHECK-NEXT:    vsrl.vi v10, v8, 16, v0.t
724; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
725; CHECK-NEXT:    vnot.v v8, v8, v0.t
726; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
727; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
728; CHECK-NEXT:    lui a0, 209715
729; CHECK-NEXT:    addi a0, a0, 819
730; CHECK-NEXT:    vsub.vv v8, v8, v10, v0.t
731; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
732; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
733; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
734; CHECK-NEXT:    lui a0, 61681
735; CHECK-NEXT:    addi a0, a0, -241
736; CHECK-NEXT:    vadd.vv v8, v10, v8, v0.t
737; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
738; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
739; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
740; CHECK-NEXT:    lui a0, 4112
741; CHECK-NEXT:    addi a0, a0, 257
742; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
743; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
744; CHECK-NEXT:    ret
745  %v = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> %va, i1 false, <8 x i1> %m, i32 %evl)
746  ret <8 x i32> %v
747}
748
749define <8 x i32> @vp_ctlz_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
750; CHECK-LABEL: vp_ctlz_v8i32_unmasked:
751; CHECK:       # %bb.0:
752; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
753; CHECK-NEXT:    vsrl.vi v10, v8, 1
754; CHECK-NEXT:    lui a0, 349525
755; CHECK-NEXT:    vor.vv v8, v8, v10
756; CHECK-NEXT:    addi a0, a0, 1365
757; CHECK-NEXT:    vsrl.vi v10, v8, 2
758; CHECK-NEXT:    vor.vv v8, v8, v10
759; CHECK-NEXT:    vsrl.vi v10, v8, 4
760; CHECK-NEXT:    vor.vv v8, v8, v10
761; CHECK-NEXT:    vsrl.vi v10, v8, 8
762; CHECK-NEXT:    vor.vv v8, v8, v10
763; CHECK-NEXT:    vsrl.vi v10, v8, 16
764; CHECK-NEXT:    vor.vv v8, v8, v10
765; CHECK-NEXT:    vnot.v v8, v8
766; CHECK-NEXT:    vsrl.vi v10, v8, 1
767; CHECK-NEXT:    vand.vx v10, v10, a0
768; CHECK-NEXT:    lui a0, 209715
769; CHECK-NEXT:    addi a0, a0, 819
770; CHECK-NEXT:    vsub.vv v8, v8, v10
771; CHECK-NEXT:    vand.vx v10, v8, a0
772; CHECK-NEXT:    vsrl.vi v8, v8, 2
773; CHECK-NEXT:    vand.vx v8, v8, a0
774; CHECK-NEXT:    lui a0, 61681
775; CHECK-NEXT:    addi a0, a0, -241
776; CHECK-NEXT:    vadd.vv v8, v10, v8
777; CHECK-NEXT:    vsrl.vi v10, v8, 4
778; CHECK-NEXT:    vadd.vv v8, v8, v10
779; CHECK-NEXT:    vand.vx v8, v8, a0
780; CHECK-NEXT:    lui a0, 4112
781; CHECK-NEXT:    addi a0, a0, 257
782; CHECK-NEXT:    vmul.vx v8, v8, a0
783; CHECK-NEXT:    vsrl.vi v8, v8, 24
784; CHECK-NEXT:    ret
785  %v = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> %va, i1 false, <8 x i1> splat (i1 true), i32 %evl)
786  ret <8 x i32> %v
787}
788
789declare <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32>, i1 immarg, <16 x i1>, i32)
790
791define <16 x i32> @vp_ctlz_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
792; CHECK-LABEL: vp_ctlz_v16i32:
793; CHECK:       # %bb.0:
794; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
795; CHECK-NEXT:    vsrl.vi v12, v8, 1, v0.t
796; CHECK-NEXT:    lui a0, 349525
797; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
798; CHECK-NEXT:    addi a0, a0, 1365
799; CHECK-NEXT:    vsrl.vi v12, v8, 2, v0.t
800; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
801; CHECK-NEXT:    vsrl.vi v12, v8, 4, v0.t
802; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
803; CHECK-NEXT:    vsrl.vi v12, v8, 8, v0.t
804; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
805; CHECK-NEXT:    vsrl.vi v12, v8, 16, v0.t
806; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
807; CHECK-NEXT:    vnot.v v8, v8, v0.t
808; CHECK-NEXT:    vsrl.vi v12, v8, 1, v0.t
809; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
810; CHECK-NEXT:    lui a0, 209715
811; CHECK-NEXT:    addi a0, a0, 819
812; CHECK-NEXT:    vsub.vv v8, v8, v12, v0.t
813; CHECK-NEXT:    vand.vx v12, v8, a0, v0.t
814; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
815; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
816; CHECK-NEXT:    lui a0, 61681
817; CHECK-NEXT:    addi a0, a0, -241
818; CHECK-NEXT:    vadd.vv v8, v12, v8, v0.t
819; CHECK-NEXT:    vsrl.vi v12, v8, 4, v0.t
820; CHECK-NEXT:    vadd.vv v8, v8, v12, v0.t
821; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
822; CHECK-NEXT:    lui a0, 4112
823; CHECK-NEXT:    addi a0, a0, 257
824; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
825; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
826; CHECK-NEXT:    ret
827  %v = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> %va, i1 false, <16 x i1> %m, i32 %evl)
828  ret <16 x i32> %v
829}
830
831define <16 x i32> @vp_ctlz_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
832; CHECK-LABEL: vp_ctlz_v16i32_unmasked:
833; CHECK:       # %bb.0:
834; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
835; CHECK-NEXT:    vsrl.vi v12, v8, 1
836; CHECK-NEXT:    lui a0, 349525
837; CHECK-NEXT:    vor.vv v8, v8, v12
838; CHECK-NEXT:    addi a0, a0, 1365
839; CHECK-NEXT:    vsrl.vi v12, v8, 2
840; CHECK-NEXT:    vor.vv v8, v8, v12
841; CHECK-NEXT:    vsrl.vi v12, v8, 4
842; CHECK-NEXT:    vor.vv v8, v8, v12
843; CHECK-NEXT:    vsrl.vi v12, v8, 8
844; CHECK-NEXT:    vor.vv v8, v8, v12
845; CHECK-NEXT:    vsrl.vi v12, v8, 16
846; CHECK-NEXT:    vor.vv v8, v8, v12
847; CHECK-NEXT:    vnot.v v8, v8
848; CHECK-NEXT:    vsrl.vi v12, v8, 1
849; CHECK-NEXT:    vand.vx v12, v12, a0
850; CHECK-NEXT:    lui a0, 209715
851; CHECK-NEXT:    addi a0, a0, 819
852; CHECK-NEXT:    vsub.vv v8, v8, v12
853; CHECK-NEXT:    vand.vx v12, v8, a0
854; CHECK-NEXT:    vsrl.vi v8, v8, 2
855; CHECK-NEXT:    vand.vx v8, v8, a0
856; CHECK-NEXT:    lui a0, 61681
857; CHECK-NEXT:    addi a0, a0, -241
858; CHECK-NEXT:    vadd.vv v8, v12, v8
859; CHECK-NEXT:    vsrl.vi v12, v8, 4
860; CHECK-NEXT:    vadd.vv v8, v8, v12
861; CHECK-NEXT:    vand.vx v8, v8, a0
862; CHECK-NEXT:    lui a0, 4112
863; CHECK-NEXT:    addi a0, a0, 257
864; CHECK-NEXT:    vmul.vx v8, v8, a0
865; CHECK-NEXT:    vsrl.vi v8, v8, 24
866; CHECK-NEXT:    ret
867  %v = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> %va, i1 false, <16 x i1> splat (i1 true), i32 %evl)
868  ret <16 x i32> %v
869}
870
871declare <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64>, i1 immarg, <2 x i1>, i32)
872
873define <2 x i64> @vp_ctlz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
874; RV32-LABEL: vp_ctlz_v2i64:
875; RV32:       # %bb.0:
876; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
877; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
878; RV32-NEXT:    lui a1, 349525
879; RV32-NEXT:    addi a1, a1, 1365
880; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
881; RV32-NEXT:    vmv.v.x v10, a1
882; RV32-NEXT:    li a1, 32
883; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
884; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
885; RV32-NEXT:    vsrl.vi v9, v8, 2, v0.t
886; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
887; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
888; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
889; RV32-NEXT:    vsrl.vi v9, v8, 8, v0.t
890; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
891; RV32-NEXT:    vsrl.vi v9, v8, 16, v0.t
892; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
893; RV32-NEXT:    vsrl.vx v9, v8, a1, v0.t
894; RV32-NEXT:    lui a1, 209715
895; RV32-NEXT:    addi a1, a1, 819
896; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
897; RV32-NEXT:    vnot.v v8, v8, v0.t
898; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
899; RV32-NEXT:    vand.vv v9, v9, v10, v0.t
900; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
901; RV32-NEXT:    vmv.v.x v10, a1
902; RV32-NEXT:    lui a1, 61681
903; RV32-NEXT:    addi a1, a1, -241
904; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
905; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
906; RV32-NEXT:    vand.vv v9, v8, v10, v0.t
907; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
908; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
909; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
910; RV32-NEXT:    vmv.v.x v10, a1
911; RV32-NEXT:    lui a1, 4112
912; RV32-NEXT:    addi a1, a1, 257
913; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
914; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
915; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
916; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
917; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
918; RV32-NEXT:    vmv.v.x v9, a1
919; RV32-NEXT:    li a1, 56
920; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
921; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
922; RV32-NEXT:    vmul.vv v8, v8, v9, v0.t
923; RV32-NEXT:    vsrl.vx v8, v8, a1, v0.t
924; RV32-NEXT:    ret
925;
926; RV64-LABEL: vp_ctlz_v2i64:
927; RV64:       # %bb.0:
928; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
929; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
930; RV64-NEXT:    lui a0, 349525
931; RV64-NEXT:    lui a1, 209715
932; RV64-NEXT:    lui a2, 61681
933; RV64-NEXT:    lui a3, 4112
934; RV64-NEXT:    addiw a0, a0, 1365
935; RV64-NEXT:    addiw a1, a1, 819
936; RV64-NEXT:    addiw a2, a2, -241
937; RV64-NEXT:    addiw a3, a3, 257
938; RV64-NEXT:    slli a4, a0, 32
939; RV64-NEXT:    add a0, a0, a4
940; RV64-NEXT:    slli a4, a1, 32
941; RV64-NEXT:    add a1, a1, a4
942; RV64-NEXT:    slli a4, a2, 32
943; RV64-NEXT:    add a2, a2, a4
944; RV64-NEXT:    slli a4, a3, 32
945; RV64-NEXT:    add a3, a3, a4
946; RV64-NEXT:    li a4, 32
947; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
948; RV64-NEXT:    vsrl.vi v9, v8, 2, v0.t
949; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
950; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
951; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
952; RV64-NEXT:    vsrl.vi v9, v8, 8, v0.t
953; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
954; RV64-NEXT:    vsrl.vi v9, v8, 16, v0.t
955; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
956; RV64-NEXT:    vsrl.vx v9, v8, a4, v0.t
957; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
958; RV64-NEXT:    vnot.v v8, v8, v0.t
959; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
960; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
961; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
962; RV64-NEXT:    vand.vx v9, v8, a1, v0.t
963; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
964; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
965; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
966; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
967; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
968; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
969; RV64-NEXT:    li a0, 56
970; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
971; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
972; RV64-NEXT:    ret
973  %v = call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> %va, i1 false, <2 x i1> %m, i32 %evl)
974  ret <2 x i64> %v
975}
976
977define <2 x i64> @vp_ctlz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
978; RV32-LABEL: vp_ctlz_v2i64_unmasked:
979; RV32:       # %bb.0:
980; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
981; RV32-NEXT:    vsrl.vi v9, v8, 1
982; RV32-NEXT:    lui a1, 349525
983; RV32-NEXT:    addi a1, a1, 1365
984; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
985; RV32-NEXT:    vmv.v.x v10, a1
986; RV32-NEXT:    li a1, 32
987; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
988; RV32-NEXT:    vor.vv v8, v8, v9
989; RV32-NEXT:    vsrl.vi v9, v8, 2
990; RV32-NEXT:    vor.vv v8, v8, v9
991; RV32-NEXT:    vsrl.vi v9, v8, 4
992; RV32-NEXT:    vor.vv v8, v8, v9
993; RV32-NEXT:    vsrl.vi v9, v8, 8
994; RV32-NEXT:    vor.vv v8, v8, v9
995; RV32-NEXT:    vsrl.vi v9, v8, 16
996; RV32-NEXT:    vor.vv v8, v8, v9
997; RV32-NEXT:    vsrl.vx v9, v8, a1
998; RV32-NEXT:    lui a1, 209715
999; RV32-NEXT:    addi a1, a1, 819
1000; RV32-NEXT:    vor.vv v8, v8, v9
1001; RV32-NEXT:    vnot.v v8, v8
1002; RV32-NEXT:    vsrl.vi v9, v8, 1
1003; RV32-NEXT:    vand.vv v9, v9, v10
1004; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1005; RV32-NEXT:    vmv.v.x v10, a1
1006; RV32-NEXT:    lui a1, 61681
1007; RV32-NEXT:    addi a1, a1, -241
1008; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1009; RV32-NEXT:    vsub.vv v8, v8, v9
1010; RV32-NEXT:    vand.vv v9, v8, v10
1011; RV32-NEXT:    vsrl.vi v8, v8, 2
1012; RV32-NEXT:    vand.vv v8, v8, v10
1013; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1014; RV32-NEXT:    vmv.v.x v10, a1
1015; RV32-NEXT:    lui a1, 4112
1016; RV32-NEXT:    addi a1, a1, 257
1017; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1018; RV32-NEXT:    vadd.vv v8, v9, v8
1019; RV32-NEXT:    vsrl.vi v9, v8, 4
1020; RV32-NEXT:    vadd.vv v8, v8, v9
1021; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1022; RV32-NEXT:    vmv.v.x v9, a1
1023; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1024; RV32-NEXT:    vand.vv v8, v8, v10
1025; RV32-NEXT:    vmul.vv v8, v8, v9
1026; RV32-NEXT:    li a0, 56
1027; RV32-NEXT:    vsrl.vx v8, v8, a0
1028; RV32-NEXT:    ret
1029;
1030; RV64-LABEL: vp_ctlz_v2i64_unmasked:
1031; RV64:       # %bb.0:
1032; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1033; RV64-NEXT:    vsrl.vi v9, v8, 1
1034; RV64-NEXT:    lui a0, 349525
1035; RV64-NEXT:    lui a1, 209715
1036; RV64-NEXT:    lui a2, 61681
1037; RV64-NEXT:    lui a3, 4112
1038; RV64-NEXT:    addiw a0, a0, 1365
1039; RV64-NEXT:    addiw a1, a1, 819
1040; RV64-NEXT:    addiw a2, a2, -241
1041; RV64-NEXT:    addiw a3, a3, 257
1042; RV64-NEXT:    slli a4, a0, 32
1043; RV64-NEXT:    add a0, a0, a4
1044; RV64-NEXT:    slli a4, a1, 32
1045; RV64-NEXT:    add a1, a1, a4
1046; RV64-NEXT:    slli a4, a2, 32
1047; RV64-NEXT:    add a2, a2, a4
1048; RV64-NEXT:    slli a4, a3, 32
1049; RV64-NEXT:    add a3, a3, a4
1050; RV64-NEXT:    li a4, 32
1051; RV64-NEXT:    vor.vv v8, v8, v9
1052; RV64-NEXT:    vsrl.vi v9, v8, 2
1053; RV64-NEXT:    vor.vv v8, v8, v9
1054; RV64-NEXT:    vsrl.vi v9, v8, 4
1055; RV64-NEXT:    vor.vv v8, v8, v9
1056; RV64-NEXT:    vsrl.vi v9, v8, 8
1057; RV64-NEXT:    vor.vv v8, v8, v9
1058; RV64-NEXT:    vsrl.vi v9, v8, 16
1059; RV64-NEXT:    vor.vv v8, v8, v9
1060; RV64-NEXT:    vsrl.vx v9, v8, a4
1061; RV64-NEXT:    vor.vv v8, v8, v9
1062; RV64-NEXT:    vnot.v v8, v8
1063; RV64-NEXT:    vsrl.vi v9, v8, 1
1064; RV64-NEXT:    vand.vx v9, v9, a0
1065; RV64-NEXT:    vsub.vv v8, v8, v9
1066; RV64-NEXT:    vand.vx v9, v8, a1
1067; RV64-NEXT:    vsrl.vi v8, v8, 2
1068; RV64-NEXT:    vand.vx v8, v8, a1
1069; RV64-NEXT:    vadd.vv v8, v9, v8
1070; RV64-NEXT:    vsrl.vi v9, v8, 4
1071; RV64-NEXT:    vadd.vv v8, v8, v9
1072; RV64-NEXT:    vand.vx v8, v8, a2
1073; RV64-NEXT:    vmul.vx v8, v8, a3
1074; RV64-NEXT:    li a0, 56
1075; RV64-NEXT:    vsrl.vx v8, v8, a0
1076; RV64-NEXT:    ret
1077  %v = call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> %va, i1 false, <2 x i1> splat (i1 true), i32 %evl)
1078  ret <2 x i64> %v
1079}
1080
1081declare <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64>, i1 immarg, <4 x i1>, i32)
1082
1083define <4 x i64> @vp_ctlz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
1084; RV32-LABEL: vp_ctlz_v4i64:
1085; RV32:       # %bb.0:
1086; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1087; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
1088; RV32-NEXT:    lui a1, 349525
1089; RV32-NEXT:    addi a1, a1, 1365
1090; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1091; RV32-NEXT:    vmv.v.x v10, a1
1092; RV32-NEXT:    li a1, 32
1093; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1094; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
1095; RV32-NEXT:    vsrl.vi v12, v8, 2, v0.t
1096; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
1097; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
1098; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
1099; RV32-NEXT:    vsrl.vi v12, v8, 8, v0.t
1100; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
1101; RV32-NEXT:    vsrl.vi v12, v8, 16, v0.t
1102; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
1103; RV32-NEXT:    vsrl.vx v12, v8, a1, v0.t
1104; RV32-NEXT:    lui a1, 209715
1105; RV32-NEXT:    addi a1, a1, 819
1106; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
1107; RV32-NEXT:    vnot.v v8, v8, v0.t
1108; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
1109; RV32-NEXT:    vand.vv v10, v12, v10, v0.t
1110; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1111; RV32-NEXT:    vmv.v.x v12, a1
1112; RV32-NEXT:    lui a1, 61681
1113; RV32-NEXT:    addi a1, a1, -241
1114; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1115; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
1116; RV32-NEXT:    vand.vv v10, v8, v12, v0.t
1117; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
1118; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
1119; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1120; RV32-NEXT:    vmv.v.x v12, a1
1121; RV32-NEXT:    lui a1, 4112
1122; RV32-NEXT:    addi a1, a1, 257
1123; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1124; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
1125; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
1126; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
1127; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1128; RV32-NEXT:    vmv.v.x v10, a1
1129; RV32-NEXT:    li a1, 56
1130; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1131; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
1132; RV32-NEXT:    vmul.vv v8, v8, v10, v0.t
1133; RV32-NEXT:    vsrl.vx v8, v8, a1, v0.t
1134; RV32-NEXT:    ret
1135;
1136; RV64-LABEL: vp_ctlz_v4i64:
1137; RV64:       # %bb.0:
1138; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1139; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
1140; RV64-NEXT:    lui a0, 349525
1141; RV64-NEXT:    lui a1, 209715
1142; RV64-NEXT:    lui a2, 61681
1143; RV64-NEXT:    lui a3, 4112
1144; RV64-NEXT:    addiw a0, a0, 1365
1145; RV64-NEXT:    addiw a1, a1, 819
1146; RV64-NEXT:    addiw a2, a2, -241
1147; RV64-NEXT:    addiw a3, a3, 257
1148; RV64-NEXT:    slli a4, a0, 32
1149; RV64-NEXT:    add a0, a0, a4
1150; RV64-NEXT:    slli a4, a1, 32
1151; RV64-NEXT:    add a1, a1, a4
1152; RV64-NEXT:    slli a4, a2, 32
1153; RV64-NEXT:    add a2, a2, a4
1154; RV64-NEXT:    slli a4, a3, 32
1155; RV64-NEXT:    add a3, a3, a4
1156; RV64-NEXT:    li a4, 32
1157; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
1158; RV64-NEXT:    vsrl.vi v10, v8, 2, v0.t
1159; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
1160; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
1161; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
1162; RV64-NEXT:    vsrl.vi v10, v8, 8, v0.t
1163; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
1164; RV64-NEXT:    vsrl.vi v10, v8, 16, v0.t
1165; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
1166; RV64-NEXT:    vsrl.vx v10, v8, a4, v0.t
1167; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
1168; RV64-NEXT:    vnot.v v8, v8, v0.t
1169; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
1170; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
1171; RV64-NEXT:    vsub.vv v8, v8, v10, v0.t
1172; RV64-NEXT:    vand.vx v10, v8, a1, v0.t
1173; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
1174; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
1175; RV64-NEXT:    vadd.vv v8, v10, v8, v0.t
1176; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
1177; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
1178; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
1179; RV64-NEXT:    li a0, 56
1180; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
1181; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
1182; RV64-NEXT:    ret
1183  %v = call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> %va, i1 false, <4 x i1> %m, i32 %evl)
1184  ret <4 x i64> %v
1185}
1186
1187define <4 x i64> @vp_ctlz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
1188; RV32-LABEL: vp_ctlz_v4i64_unmasked:
1189; RV32:       # %bb.0:
1190; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1191; RV32-NEXT:    vsrl.vi v10, v8, 1
1192; RV32-NEXT:    lui a1, 349525
1193; RV32-NEXT:    addi a1, a1, 1365
1194; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1195; RV32-NEXT:    vmv.v.x v12, a1
1196; RV32-NEXT:    li a1, 32
1197; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1198; RV32-NEXT:    vor.vv v8, v8, v10
1199; RV32-NEXT:    vsrl.vi v10, v8, 2
1200; RV32-NEXT:    vor.vv v8, v8, v10
1201; RV32-NEXT:    vsrl.vi v10, v8, 4
1202; RV32-NEXT:    vor.vv v8, v8, v10
1203; RV32-NEXT:    vsrl.vi v10, v8, 8
1204; RV32-NEXT:    vor.vv v8, v8, v10
1205; RV32-NEXT:    vsrl.vi v10, v8, 16
1206; RV32-NEXT:    vor.vv v8, v8, v10
1207; RV32-NEXT:    vsrl.vx v10, v8, a1
1208; RV32-NEXT:    lui a1, 209715
1209; RV32-NEXT:    addi a1, a1, 819
1210; RV32-NEXT:    vor.vv v8, v8, v10
1211; RV32-NEXT:    vnot.v v8, v8
1212; RV32-NEXT:    vsrl.vi v10, v8, 1
1213; RV32-NEXT:    vand.vv v10, v10, v12
1214; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1215; RV32-NEXT:    vmv.v.x v12, a1
1216; RV32-NEXT:    lui a1, 61681
1217; RV32-NEXT:    addi a1, a1, -241
1218; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1219; RV32-NEXT:    vsub.vv v8, v8, v10
1220; RV32-NEXT:    vand.vv v10, v8, v12
1221; RV32-NEXT:    vsrl.vi v8, v8, 2
1222; RV32-NEXT:    vand.vv v8, v8, v12
1223; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1224; RV32-NEXT:    vmv.v.x v12, a1
1225; RV32-NEXT:    lui a1, 4112
1226; RV32-NEXT:    addi a1, a1, 257
1227; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1228; RV32-NEXT:    vadd.vv v8, v10, v8
1229; RV32-NEXT:    vsrl.vi v10, v8, 4
1230; RV32-NEXT:    vadd.vv v8, v8, v10
1231; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1232; RV32-NEXT:    vmv.v.x v10, a1
1233; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1234; RV32-NEXT:    vand.vv v8, v8, v12
1235; RV32-NEXT:    vmul.vv v8, v8, v10
1236; RV32-NEXT:    li a0, 56
1237; RV32-NEXT:    vsrl.vx v8, v8, a0
1238; RV32-NEXT:    ret
1239;
1240; RV64-LABEL: vp_ctlz_v4i64_unmasked:
1241; RV64:       # %bb.0:
1242; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1243; RV64-NEXT:    vsrl.vi v10, v8, 1
1244; RV64-NEXT:    lui a0, 349525
1245; RV64-NEXT:    lui a1, 209715
1246; RV64-NEXT:    lui a2, 61681
1247; RV64-NEXT:    lui a3, 4112
1248; RV64-NEXT:    addiw a0, a0, 1365
1249; RV64-NEXT:    addiw a1, a1, 819
1250; RV64-NEXT:    addiw a2, a2, -241
1251; RV64-NEXT:    addiw a3, a3, 257
1252; RV64-NEXT:    slli a4, a0, 32
1253; RV64-NEXT:    add a0, a0, a4
1254; RV64-NEXT:    slli a4, a1, 32
1255; RV64-NEXT:    add a1, a1, a4
1256; RV64-NEXT:    slli a4, a2, 32
1257; RV64-NEXT:    add a2, a2, a4
1258; RV64-NEXT:    slli a4, a3, 32
1259; RV64-NEXT:    add a3, a3, a4
1260; RV64-NEXT:    li a4, 32
1261; RV64-NEXT:    vor.vv v8, v8, v10
1262; RV64-NEXT:    vsrl.vi v10, v8, 2
1263; RV64-NEXT:    vor.vv v8, v8, v10
1264; RV64-NEXT:    vsrl.vi v10, v8, 4
1265; RV64-NEXT:    vor.vv v8, v8, v10
1266; RV64-NEXT:    vsrl.vi v10, v8, 8
1267; RV64-NEXT:    vor.vv v8, v8, v10
1268; RV64-NEXT:    vsrl.vi v10, v8, 16
1269; RV64-NEXT:    vor.vv v8, v8, v10
1270; RV64-NEXT:    vsrl.vx v10, v8, a4
1271; RV64-NEXT:    vor.vv v8, v8, v10
1272; RV64-NEXT:    vnot.v v8, v8
1273; RV64-NEXT:    vsrl.vi v10, v8, 1
1274; RV64-NEXT:    vand.vx v10, v10, a0
1275; RV64-NEXT:    vsub.vv v8, v8, v10
1276; RV64-NEXT:    vand.vx v10, v8, a1
1277; RV64-NEXT:    vsrl.vi v8, v8, 2
1278; RV64-NEXT:    vand.vx v8, v8, a1
1279; RV64-NEXT:    vadd.vv v8, v10, v8
1280; RV64-NEXT:    vsrl.vi v10, v8, 4
1281; RV64-NEXT:    vadd.vv v8, v8, v10
1282; RV64-NEXT:    vand.vx v8, v8, a2
1283; RV64-NEXT:    vmul.vx v8, v8, a3
1284; RV64-NEXT:    li a0, 56
1285; RV64-NEXT:    vsrl.vx v8, v8, a0
1286; RV64-NEXT:    ret
1287  %v = call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> %va, i1 false, <4 x i1> splat (i1 true), i32 %evl)
1288  ret <4 x i64> %v
1289}
1290
1291declare <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64>, i1 immarg, <8 x i1>, i32)
1292
1293define <8 x i64> @vp_ctlz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
1294; RV32-LABEL: vp_ctlz_v8i64:
1295; RV32:       # %bb.0:
1296; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1297; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
1298; RV32-NEXT:    lui a1, 349525
1299; RV32-NEXT:    addi a1, a1, 1365
1300; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
1301; RV32-NEXT:    vmv.v.x v12, a1
1302; RV32-NEXT:    li a1, 32
1303; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1304; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
1305; RV32-NEXT:    vsrl.vi v16, v8, 2, v0.t
1306; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
1307; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
1308; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
1309; RV32-NEXT:    vsrl.vi v16, v8, 8, v0.t
1310; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
1311; RV32-NEXT:    vsrl.vi v16, v8, 16, v0.t
1312; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
1313; RV32-NEXT:    vsrl.vx v16, v8, a1, v0.t
1314; RV32-NEXT:    lui a1, 209715
1315; RV32-NEXT:    addi a1, a1, 819
1316; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
1317; RV32-NEXT:    vnot.v v16, v8, v0.t
1318; RV32-NEXT:    vsrl.vi v8, v16, 1, v0.t
1319; RV32-NEXT:    vand.vv v12, v8, v12, v0.t
1320; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
1321; RV32-NEXT:    vmv.v.x v8, a1
1322; RV32-NEXT:    lui a1, 61681
1323; RV32-NEXT:    addi a1, a1, -241
1324; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1325; RV32-NEXT:    vsub.vv v12, v16, v12, v0.t
1326; RV32-NEXT:    vand.vv v16, v12, v8, v0.t
1327; RV32-NEXT:    vsrl.vi v12, v12, 2, v0.t
1328; RV32-NEXT:    vand.vv v8, v12, v8, v0.t
1329; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
1330; RV32-NEXT:    vmv.v.x v12, a1
1331; RV32-NEXT:    lui a1, 4112
1332; RV32-NEXT:    addi a1, a1, 257
1333; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1334; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
1335; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
1336; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
1337; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
1338; RV32-NEXT:    vmv.v.x v16, a1
1339; RV32-NEXT:    li a1, 56
1340; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1341; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
1342; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
1343; RV32-NEXT:    vsrl.vx v8, v8, a1, v0.t
1344; RV32-NEXT:    ret
1345;
1346; RV64-LABEL: vp_ctlz_v8i64:
1347; RV64:       # %bb.0:
1348; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1349; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
1350; RV64-NEXT:    lui a0, 349525
1351; RV64-NEXT:    lui a1, 209715
1352; RV64-NEXT:    lui a2, 61681
1353; RV64-NEXT:    lui a3, 4112
1354; RV64-NEXT:    addiw a0, a0, 1365
1355; RV64-NEXT:    addiw a1, a1, 819
1356; RV64-NEXT:    addiw a2, a2, -241
1357; RV64-NEXT:    addiw a3, a3, 257
1358; RV64-NEXT:    slli a4, a0, 32
1359; RV64-NEXT:    add a0, a0, a4
1360; RV64-NEXT:    slli a4, a1, 32
1361; RV64-NEXT:    add a1, a1, a4
1362; RV64-NEXT:    slli a4, a2, 32
1363; RV64-NEXT:    add a2, a2, a4
1364; RV64-NEXT:    slli a4, a3, 32
1365; RV64-NEXT:    add a3, a3, a4
1366; RV64-NEXT:    li a4, 32
1367; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
1368; RV64-NEXT:    vsrl.vi v12, v8, 2, v0.t
1369; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
1370; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
1371; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
1372; RV64-NEXT:    vsrl.vi v12, v8, 8, v0.t
1373; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
1374; RV64-NEXT:    vsrl.vi v12, v8, 16, v0.t
1375; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
1376; RV64-NEXT:    vsrl.vx v12, v8, a4, v0.t
1377; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
1378; RV64-NEXT:    vnot.v v8, v8, v0.t
1379; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
1380; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
1381; RV64-NEXT:    vsub.vv v8, v8, v12, v0.t
1382; RV64-NEXT:    vand.vx v12, v8, a1, v0.t
1383; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
1384; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
1385; RV64-NEXT:    vadd.vv v8, v12, v8, v0.t
1386; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
1387; RV64-NEXT:    vadd.vv v8, v8, v12, v0.t
1388; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
1389; RV64-NEXT:    li a0, 56
1390; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
1391; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
1392; RV64-NEXT:    ret
1393  %v = call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> %va, i1 false, <8 x i1> %m, i32 %evl)
1394  ret <8 x i64> %v
1395}
1396
1397define <8 x i64> @vp_ctlz_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
1398; RV32-LABEL: vp_ctlz_v8i64_unmasked:
1399; RV32:       # %bb.0:
1400; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1401; RV32-NEXT:    vsrl.vi v12, v8, 1
1402; RV32-NEXT:    lui a1, 349525
1403; RV32-NEXT:    addi a1, a1, 1365
1404; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
1405; RV32-NEXT:    vmv.v.x v16, a1
1406; RV32-NEXT:    li a1, 32
1407; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1408; RV32-NEXT:    vor.vv v8, v8, v12
1409; RV32-NEXT:    vsrl.vi v12, v8, 2
1410; RV32-NEXT:    vor.vv v8, v8, v12
1411; RV32-NEXT:    vsrl.vi v12, v8, 4
1412; RV32-NEXT:    vor.vv v8, v8, v12
1413; RV32-NEXT:    vsrl.vi v12, v8, 8
1414; RV32-NEXT:    vor.vv v8, v8, v12
1415; RV32-NEXT:    vsrl.vi v12, v8, 16
1416; RV32-NEXT:    vor.vv v8, v8, v12
1417; RV32-NEXT:    vsrl.vx v12, v8, a1
1418; RV32-NEXT:    lui a1, 209715
1419; RV32-NEXT:    addi a1, a1, 819
1420; RV32-NEXT:    vor.vv v8, v8, v12
1421; RV32-NEXT:    vnot.v v8, v8
1422; RV32-NEXT:    vsrl.vi v12, v8, 1
1423; RV32-NEXT:    vand.vv v12, v12, v16
1424; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
1425; RV32-NEXT:    vmv.v.x v16, a1
1426; RV32-NEXT:    lui a1, 61681
1427; RV32-NEXT:    addi a1, a1, -241
1428; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1429; RV32-NEXT:    vsub.vv v8, v8, v12
1430; RV32-NEXT:    vand.vv v12, v8, v16
1431; RV32-NEXT:    vsrl.vi v8, v8, 2
1432; RV32-NEXT:    vand.vv v8, v8, v16
1433; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
1434; RV32-NEXT:    vmv.v.x v16, a1
1435; RV32-NEXT:    lui a1, 4112
1436; RV32-NEXT:    addi a1, a1, 257
1437; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1438; RV32-NEXT:    vadd.vv v8, v12, v8
1439; RV32-NEXT:    vsrl.vi v12, v8, 4
1440; RV32-NEXT:    vadd.vv v8, v8, v12
1441; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
1442; RV32-NEXT:    vmv.v.x v12, a1
1443; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1444; RV32-NEXT:    vand.vv v8, v8, v16
1445; RV32-NEXT:    vmul.vv v8, v8, v12
1446; RV32-NEXT:    li a0, 56
1447; RV32-NEXT:    vsrl.vx v8, v8, a0
1448; RV32-NEXT:    ret
1449;
1450; RV64-LABEL: vp_ctlz_v8i64_unmasked:
1451; RV64:       # %bb.0:
1452; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1453; RV64-NEXT:    vsrl.vi v12, v8, 1
1454; RV64-NEXT:    lui a0, 349525
1455; RV64-NEXT:    lui a1, 209715
1456; RV64-NEXT:    lui a2, 61681
1457; RV64-NEXT:    lui a3, 4112
1458; RV64-NEXT:    addiw a0, a0, 1365
1459; RV64-NEXT:    addiw a1, a1, 819
1460; RV64-NEXT:    addiw a2, a2, -241
1461; RV64-NEXT:    addiw a3, a3, 257
1462; RV64-NEXT:    slli a4, a0, 32
1463; RV64-NEXT:    add a0, a0, a4
1464; RV64-NEXT:    slli a4, a1, 32
1465; RV64-NEXT:    add a1, a1, a4
1466; RV64-NEXT:    slli a4, a2, 32
1467; RV64-NEXT:    add a2, a2, a4
1468; RV64-NEXT:    slli a4, a3, 32
1469; RV64-NEXT:    add a3, a3, a4
1470; RV64-NEXT:    li a4, 32
1471; RV64-NEXT:    vor.vv v8, v8, v12
1472; RV64-NEXT:    vsrl.vi v12, v8, 2
1473; RV64-NEXT:    vor.vv v8, v8, v12
1474; RV64-NEXT:    vsrl.vi v12, v8, 4
1475; RV64-NEXT:    vor.vv v8, v8, v12
1476; RV64-NEXT:    vsrl.vi v12, v8, 8
1477; RV64-NEXT:    vor.vv v8, v8, v12
1478; RV64-NEXT:    vsrl.vi v12, v8, 16
1479; RV64-NEXT:    vor.vv v8, v8, v12
1480; RV64-NEXT:    vsrl.vx v12, v8, a4
1481; RV64-NEXT:    vor.vv v8, v8, v12
1482; RV64-NEXT:    vnot.v v8, v8
1483; RV64-NEXT:    vsrl.vi v12, v8, 1
1484; RV64-NEXT:    vand.vx v12, v12, a0
1485; RV64-NEXT:    vsub.vv v8, v8, v12
1486; RV64-NEXT:    vand.vx v12, v8, a1
1487; RV64-NEXT:    vsrl.vi v8, v8, 2
1488; RV64-NEXT:    vand.vx v8, v8, a1
1489; RV64-NEXT:    vadd.vv v8, v12, v8
1490; RV64-NEXT:    vsrl.vi v12, v8, 4
1491; RV64-NEXT:    vadd.vv v8, v8, v12
1492; RV64-NEXT:    vand.vx v8, v8, a2
1493; RV64-NEXT:    vmul.vx v8, v8, a3
1494; RV64-NEXT:    li a0, 56
1495; RV64-NEXT:    vsrl.vx v8, v8, a0
1496; RV64-NEXT:    ret
1497  %v = call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> %va, i1 false, <8 x i1> splat (i1 true), i32 %evl)
1498  ret <8 x i64> %v
1499}
1500
1501declare <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64>, i1 immarg, <15 x i1>, i32)
1502
1503define <15 x i64> @vp_ctlz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) {
1504; RV32-LABEL: vp_ctlz_v15i64:
1505; RV32:       # %bb.0:
1506; RV32-NEXT:    addi sp, sp, -48
1507; RV32-NEXT:    .cfi_def_cfa_offset 48
1508; RV32-NEXT:    csrr a1, vlenb
1509; RV32-NEXT:    slli a1, a1, 4
1510; RV32-NEXT:    sub sp, sp, a1
1511; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb
1512; RV32-NEXT:    lui a1, 349525
1513; RV32-NEXT:    addi a1, a1, 1365
1514; RV32-NEXT:    sw a1, 40(sp)
1515; RV32-NEXT:    sw a1, 44(sp)
1516; RV32-NEXT:    lui a1, 209715
1517; RV32-NEXT:    addi a1, a1, 819
1518; RV32-NEXT:    sw a1, 32(sp)
1519; RV32-NEXT:    sw a1, 36(sp)
1520; RV32-NEXT:    lui a1, 61681
1521; RV32-NEXT:    addi a1, a1, -241
1522; RV32-NEXT:    sw a1, 24(sp)
1523; RV32-NEXT:    sw a1, 28(sp)
1524; RV32-NEXT:    lui a1, 4112
1525; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1526; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
1527; RV32-NEXT:    addi a1, a1, 257
1528; RV32-NEXT:    sw a1, 16(sp)
1529; RV32-NEXT:    sw a1, 20(sp)
1530; RV32-NEXT:    addi a1, sp, 40
1531; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1532; RV32-NEXT:    vlse64.v v24, (a1), zero
1533; RV32-NEXT:    csrr a1, vlenb
1534; RV32-NEXT:    slli a1, a1, 3
1535; RV32-NEXT:    add a1, sp, a1
1536; RV32-NEXT:    addi a1, a1, 48
1537; RV32-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
1538; RV32-NEXT:    li a1, 32
1539; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1540; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
1541; RV32-NEXT:    vsrl.vi v16, v8, 2, v0.t
1542; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
1543; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
1544; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
1545; RV32-NEXT:    vsrl.vi v16, v8, 8, v0.t
1546; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
1547; RV32-NEXT:    vsrl.vi v16, v8, 16, v0.t
1548; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
1549; RV32-NEXT:    vsrl.vx v16, v8, a1, v0.t
1550; RV32-NEXT:    addi a1, sp, 32
1551; RV32-NEXT:    vor.vv v16, v8, v16, v0.t
1552; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1553; RV32-NEXT:    vlse64.v v8, (a1), zero
1554; RV32-NEXT:    addi a1, sp, 48
1555; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
1556; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1557; RV32-NEXT:    vnot.v v16, v16, v0.t
1558; RV32-NEXT:    vsrl.vi v8, v16, 1, v0.t
1559; RV32-NEXT:    csrr a1, vlenb
1560; RV32-NEXT:    slli a1, a1, 3
1561; RV32-NEXT:    add a1, sp, a1
1562; RV32-NEXT:    addi a1, a1, 48
1563; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
1564; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
1565; RV32-NEXT:    addi a1, sp, 24
1566; RV32-NEXT:    vsub.vv v8, v16, v8, v0.t
1567; RV32-NEXT:    addi a2, sp, 48
1568; RV32-NEXT:    vl8r.v v24, (a2) # Unknown-size Folded Reload
1569; RV32-NEXT:    vand.vv v16, v8, v24, v0.t
1570; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
1571; RV32-NEXT:    vand.vv v24, v8, v24, v0.t
1572; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1573; RV32-NEXT:    vlse64.v v8, (a1), zero
1574; RV32-NEXT:    csrr a1, vlenb
1575; RV32-NEXT:    slli a1, a1, 3
1576; RV32-NEXT:    add a1, sp, a1
1577; RV32-NEXT:    addi a1, a1, 48
1578; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
1579; RV32-NEXT:    addi a1, sp, 16
1580; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1581; RV32-NEXT:    vadd.vv v24, v16, v24, v0.t
1582; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1583; RV32-NEXT:    vlse64.v v16, (a1), zero
1584; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1585; RV32-NEXT:    vsrl.vi v8, v24, 4, v0.t
1586; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
1587; RV32-NEXT:    li a0, 56
1588; RV32-NEXT:    csrr a1, vlenb
1589; RV32-NEXT:    slli a1, a1, 3
1590; RV32-NEXT:    add a1, sp, a1
1591; RV32-NEXT:    addi a1, a1, 48
1592; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
1593; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
1594; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
1595; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
1596; RV32-NEXT:    csrr a0, vlenb
1597; RV32-NEXT:    slli a0, a0, 4
1598; RV32-NEXT:    add sp, sp, a0
1599; RV32-NEXT:    .cfi_def_cfa sp, 48
1600; RV32-NEXT:    addi sp, sp, 48
1601; RV32-NEXT:    .cfi_def_cfa_offset 0
1602; RV32-NEXT:    ret
1603;
1604; RV64-LABEL: vp_ctlz_v15i64:
1605; RV64:       # %bb.0:
1606; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1607; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
1608; RV64-NEXT:    lui a0, 349525
1609; RV64-NEXT:    lui a1, 209715
1610; RV64-NEXT:    lui a2, 61681
1611; RV64-NEXT:    lui a3, 4112
1612; RV64-NEXT:    addiw a0, a0, 1365
1613; RV64-NEXT:    addiw a1, a1, 819
1614; RV64-NEXT:    addiw a2, a2, -241
1615; RV64-NEXT:    addiw a3, a3, 257
1616; RV64-NEXT:    slli a4, a0, 32
1617; RV64-NEXT:    add a0, a0, a4
1618; RV64-NEXT:    slli a4, a1, 32
1619; RV64-NEXT:    add a1, a1, a4
1620; RV64-NEXT:    slli a4, a2, 32
1621; RV64-NEXT:    add a2, a2, a4
1622; RV64-NEXT:    slli a4, a3, 32
1623; RV64-NEXT:    add a3, a3, a4
1624; RV64-NEXT:    li a4, 32
1625; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
1626; RV64-NEXT:    vsrl.vi v16, v8, 2, v0.t
1627; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
1628; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
1629; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
1630; RV64-NEXT:    vsrl.vi v16, v8, 8, v0.t
1631; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
1632; RV64-NEXT:    vsrl.vi v16, v8, 16, v0.t
1633; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
1634; RV64-NEXT:    vsrl.vx v16, v8, a4, v0.t
1635; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
1636; RV64-NEXT:    vnot.v v8, v8, v0.t
1637; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
1638; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
1639; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
1640; RV64-NEXT:    vand.vx v16, v8, a1, v0.t
1641; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
1642; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
1643; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
1644; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
1645; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
1646; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
1647; RV64-NEXT:    li a0, 56
1648; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
1649; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
1650; RV64-NEXT:    ret
1651  %v = call <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64> %va, i1 false, <15 x i1> %m, i32 %evl)
1652  ret <15 x i64> %v
1653}
1654
1655define <15 x i64> @vp_ctlz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) {
1656; RV32-LABEL: vp_ctlz_v15i64_unmasked:
1657; RV32:       # %bb.0:
1658; RV32-NEXT:    addi sp, sp, -32
1659; RV32-NEXT:    .cfi_def_cfa_offset 32
1660; RV32-NEXT:    lui a1, 349525
1661; RV32-NEXT:    addi a1, a1, 1365
1662; RV32-NEXT:    sw a1, 24(sp)
1663; RV32-NEXT:    sw a1, 28(sp)
1664; RV32-NEXT:    lui a1, 209715
1665; RV32-NEXT:    addi a1, a1, 819
1666; RV32-NEXT:    sw a1, 16(sp)
1667; RV32-NEXT:    sw a1, 20(sp)
1668; RV32-NEXT:    lui a1, 61681
1669; RV32-NEXT:    addi a1, a1, -241
1670; RV32-NEXT:    sw a1, 8(sp)
1671; RV32-NEXT:    sw a1, 12(sp)
1672; RV32-NEXT:    lui a1, 4112
1673; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1674; RV32-NEXT:    vsrl.vi v0, v8, 1
1675; RV32-NEXT:    addi a1, a1, 257
1676; RV32-NEXT:    sw a1, 0(sp)
1677; RV32-NEXT:    sw a1, 4(sp)
1678; RV32-NEXT:    addi a1, sp, 24
1679; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1680; RV32-NEXT:    vlse64.v v24, (a1), zero
1681; RV32-NEXT:    addi a1, sp, 16
1682; RV32-NEXT:    vlse64.v v16, (a1), zero
1683; RV32-NEXT:    li a1, 32
1684; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1685; RV32-NEXT:    vor.vv v8, v8, v0
1686; RV32-NEXT:    vsrl.vi v0, v8, 2
1687; RV32-NEXT:    vor.vv v8, v8, v0
1688; RV32-NEXT:    vsrl.vi v0, v8, 4
1689; RV32-NEXT:    vor.vv v8, v8, v0
1690; RV32-NEXT:    vsrl.vi v0, v8, 8
1691; RV32-NEXT:    vor.vv v8, v8, v0
1692; RV32-NEXT:    vsrl.vi v0, v8, 16
1693; RV32-NEXT:    vor.vv v8, v8, v0
1694; RV32-NEXT:    vsrl.vx v0, v8, a1
1695; RV32-NEXT:    addi a1, sp, 8
1696; RV32-NEXT:    vor.vv v8, v8, v0
1697; RV32-NEXT:    vnot.v v0, v8
1698; RV32-NEXT:    vsrl.vi v8, v0, 1
1699; RV32-NEXT:    vand.vv v24, v8, v24
1700; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1701; RV32-NEXT:    vlse64.v v8, (a1), zero
1702; RV32-NEXT:    mv a1, sp
1703; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1704; RV32-NEXT:    vsub.vv v24, v0, v24
1705; RV32-NEXT:    vand.vv v0, v24, v16
1706; RV32-NEXT:    vsrl.vi v24, v24, 2
1707; RV32-NEXT:    vand.vv v16, v24, v16
1708; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1709; RV32-NEXT:    vlse64.v v24, (a1), zero
1710; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1711; RV32-NEXT:    vadd.vv v16, v0, v16
1712; RV32-NEXT:    vsrl.vi v0, v16, 4
1713; RV32-NEXT:    vadd.vv v16, v16, v0
1714; RV32-NEXT:    vand.vv v8, v16, v8
1715; RV32-NEXT:    vmul.vv v8, v8, v24
1716; RV32-NEXT:    li a0, 56
1717; RV32-NEXT:    vsrl.vx v8, v8, a0
1718; RV32-NEXT:    addi sp, sp, 32
1719; RV32-NEXT:    .cfi_def_cfa_offset 0
1720; RV32-NEXT:    ret
1721;
1722; RV64-LABEL: vp_ctlz_v15i64_unmasked:
1723; RV64:       # %bb.0:
1724; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1725; RV64-NEXT:    vsrl.vi v16, v8, 1
1726; RV64-NEXT:    lui a0, 349525
1727; RV64-NEXT:    lui a1, 209715
1728; RV64-NEXT:    lui a2, 61681
1729; RV64-NEXT:    lui a3, 4112
1730; RV64-NEXT:    addiw a0, a0, 1365
1731; RV64-NEXT:    addiw a1, a1, 819
1732; RV64-NEXT:    addiw a2, a2, -241
1733; RV64-NEXT:    addiw a3, a3, 257
1734; RV64-NEXT:    slli a4, a0, 32
1735; RV64-NEXT:    add a0, a0, a4
1736; RV64-NEXT:    slli a4, a1, 32
1737; RV64-NEXT:    add a1, a1, a4
1738; RV64-NEXT:    slli a4, a2, 32
1739; RV64-NEXT:    add a2, a2, a4
1740; RV64-NEXT:    slli a4, a3, 32
1741; RV64-NEXT:    add a3, a3, a4
1742; RV64-NEXT:    li a4, 32
1743; RV64-NEXT:    vor.vv v8, v8, v16
1744; RV64-NEXT:    vsrl.vi v16, v8, 2
1745; RV64-NEXT:    vor.vv v8, v8, v16
1746; RV64-NEXT:    vsrl.vi v16, v8, 4
1747; RV64-NEXT:    vor.vv v8, v8, v16
1748; RV64-NEXT:    vsrl.vi v16, v8, 8
1749; RV64-NEXT:    vor.vv v8, v8, v16
1750; RV64-NEXT:    vsrl.vi v16, v8, 16
1751; RV64-NEXT:    vor.vv v8, v8, v16
1752; RV64-NEXT:    vsrl.vx v16, v8, a4
1753; RV64-NEXT:    vor.vv v8, v8, v16
1754; RV64-NEXT:    vnot.v v8, v8
1755; RV64-NEXT:    vsrl.vi v16, v8, 1
1756; RV64-NEXT:    vand.vx v16, v16, a0
1757; RV64-NEXT:    vsub.vv v8, v8, v16
1758; RV64-NEXT:    vand.vx v16, v8, a1
1759; RV64-NEXT:    vsrl.vi v8, v8, 2
1760; RV64-NEXT:    vand.vx v8, v8, a1
1761; RV64-NEXT:    vadd.vv v8, v16, v8
1762; RV64-NEXT:    vsrl.vi v16, v8, 4
1763; RV64-NEXT:    vadd.vv v8, v8, v16
1764; RV64-NEXT:    vand.vx v8, v8, a2
1765; RV64-NEXT:    vmul.vx v8, v8, a3
1766; RV64-NEXT:    li a0, 56
1767; RV64-NEXT:    vsrl.vx v8, v8, a0
1768; RV64-NEXT:    ret
1769  %v = call <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64> %va, i1 false, <15 x i1> splat (i1 true), i32 %evl)
1770  ret <15 x i64> %v
1771}
1772
1773declare <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64>, i1 immarg, <16 x i1>, i32)
1774
1775define <16 x i64> @vp_ctlz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) {
1776; RV32-LABEL: vp_ctlz_v16i64:
1777; RV32:       # %bb.0:
1778; RV32-NEXT:    addi sp, sp, -48
1779; RV32-NEXT:    .cfi_def_cfa_offset 48
1780; RV32-NEXT:    csrr a1, vlenb
1781; RV32-NEXT:    slli a1, a1, 4
1782; RV32-NEXT:    sub sp, sp, a1
1783; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb
1784; RV32-NEXT:    lui a1, 349525
1785; RV32-NEXT:    addi a1, a1, 1365
1786; RV32-NEXT:    sw a1, 40(sp)
1787; RV32-NEXT:    sw a1, 44(sp)
1788; RV32-NEXT:    lui a1, 209715
1789; RV32-NEXT:    addi a1, a1, 819
1790; RV32-NEXT:    sw a1, 32(sp)
1791; RV32-NEXT:    sw a1, 36(sp)
1792; RV32-NEXT:    lui a1, 61681
1793; RV32-NEXT:    addi a1, a1, -241
1794; RV32-NEXT:    sw a1, 24(sp)
1795; RV32-NEXT:    sw a1, 28(sp)
1796; RV32-NEXT:    lui a1, 4112
1797; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1798; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
1799; RV32-NEXT:    addi a1, a1, 257
1800; RV32-NEXT:    sw a1, 16(sp)
1801; RV32-NEXT:    sw a1, 20(sp)
1802; RV32-NEXT:    addi a1, sp, 40
1803; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1804; RV32-NEXT:    vlse64.v v24, (a1), zero
1805; RV32-NEXT:    csrr a1, vlenb
1806; RV32-NEXT:    slli a1, a1, 3
1807; RV32-NEXT:    add a1, sp, a1
1808; RV32-NEXT:    addi a1, a1, 48
1809; RV32-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
1810; RV32-NEXT:    li a1, 32
1811; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1812; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
1813; RV32-NEXT:    vsrl.vi v16, v8, 2, v0.t
1814; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
1815; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
1816; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
1817; RV32-NEXT:    vsrl.vi v16, v8, 8, v0.t
1818; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
1819; RV32-NEXT:    vsrl.vi v16, v8, 16, v0.t
1820; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
1821; RV32-NEXT:    vsrl.vx v16, v8, a1, v0.t
1822; RV32-NEXT:    addi a1, sp, 32
1823; RV32-NEXT:    vor.vv v16, v8, v16, v0.t
1824; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1825; RV32-NEXT:    vlse64.v v8, (a1), zero
1826; RV32-NEXT:    addi a1, sp, 48
1827; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
1828; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1829; RV32-NEXT:    vnot.v v16, v16, v0.t
1830; RV32-NEXT:    vsrl.vi v8, v16, 1, v0.t
1831; RV32-NEXT:    csrr a1, vlenb
1832; RV32-NEXT:    slli a1, a1, 3
1833; RV32-NEXT:    add a1, sp, a1
1834; RV32-NEXT:    addi a1, a1, 48
1835; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
1836; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
1837; RV32-NEXT:    addi a1, sp, 24
1838; RV32-NEXT:    vsub.vv v8, v16, v8, v0.t
1839; RV32-NEXT:    addi a2, sp, 48
1840; RV32-NEXT:    vl8r.v v24, (a2) # Unknown-size Folded Reload
1841; RV32-NEXT:    vand.vv v16, v8, v24, v0.t
1842; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
1843; RV32-NEXT:    vand.vv v24, v8, v24, v0.t
1844; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1845; RV32-NEXT:    vlse64.v v8, (a1), zero
1846; RV32-NEXT:    csrr a1, vlenb
1847; RV32-NEXT:    slli a1, a1, 3
1848; RV32-NEXT:    add a1, sp, a1
1849; RV32-NEXT:    addi a1, a1, 48
1850; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
1851; RV32-NEXT:    addi a1, sp, 16
1852; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1853; RV32-NEXT:    vadd.vv v24, v16, v24, v0.t
1854; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1855; RV32-NEXT:    vlse64.v v16, (a1), zero
1856; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1857; RV32-NEXT:    vsrl.vi v8, v24, 4, v0.t
1858; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
1859; RV32-NEXT:    li a0, 56
1860; RV32-NEXT:    csrr a1, vlenb
1861; RV32-NEXT:    slli a1, a1, 3
1862; RV32-NEXT:    add a1, sp, a1
1863; RV32-NEXT:    addi a1, a1, 48
1864; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
1865; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
1866; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
1867; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
1868; RV32-NEXT:    csrr a0, vlenb
1869; RV32-NEXT:    slli a0, a0, 4
1870; RV32-NEXT:    add sp, sp, a0
1871; RV32-NEXT:    .cfi_def_cfa sp, 48
1872; RV32-NEXT:    addi sp, sp, 48
1873; RV32-NEXT:    .cfi_def_cfa_offset 0
1874; RV32-NEXT:    ret
1875;
1876; RV64-LABEL: vp_ctlz_v16i64:
1877; RV64:       # %bb.0:
1878; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1879; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
1880; RV64-NEXT:    lui a0, 349525
1881; RV64-NEXT:    lui a1, 209715
1882; RV64-NEXT:    lui a2, 61681
1883; RV64-NEXT:    lui a3, 4112
1884; RV64-NEXT:    addiw a0, a0, 1365
1885; RV64-NEXT:    addiw a1, a1, 819
1886; RV64-NEXT:    addiw a2, a2, -241
1887; RV64-NEXT:    addiw a3, a3, 257
1888; RV64-NEXT:    slli a4, a0, 32
1889; RV64-NEXT:    add a0, a0, a4
1890; RV64-NEXT:    slli a4, a1, 32
1891; RV64-NEXT:    add a1, a1, a4
1892; RV64-NEXT:    slli a4, a2, 32
1893; RV64-NEXT:    add a2, a2, a4
1894; RV64-NEXT:    slli a4, a3, 32
1895; RV64-NEXT:    add a3, a3, a4
1896; RV64-NEXT:    li a4, 32
1897; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
1898; RV64-NEXT:    vsrl.vi v16, v8, 2, v0.t
1899; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
1900; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
1901; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
1902; RV64-NEXT:    vsrl.vi v16, v8, 8, v0.t
1903; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
1904; RV64-NEXT:    vsrl.vi v16, v8, 16, v0.t
1905; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
1906; RV64-NEXT:    vsrl.vx v16, v8, a4, v0.t
1907; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
1908; RV64-NEXT:    vnot.v v8, v8, v0.t
1909; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
1910; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
1911; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
1912; RV64-NEXT:    vand.vx v16, v8, a1, v0.t
1913; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
1914; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
1915; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
1916; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
1917; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
1918; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
1919; RV64-NEXT:    li a0, 56
1920; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
1921; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
1922; RV64-NEXT:    ret
1923  %v = call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> %va, i1 false, <16 x i1> %m, i32 %evl)
1924  ret <16 x i64> %v
1925}
1926
1927define <16 x i64> @vp_ctlz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
1928; RV32-LABEL: vp_ctlz_v16i64_unmasked:
1929; RV32:       # %bb.0:
1930; RV32-NEXT:    addi sp, sp, -32
1931; RV32-NEXT:    .cfi_def_cfa_offset 32
1932; RV32-NEXT:    lui a1, 349525
1933; RV32-NEXT:    addi a1, a1, 1365
1934; RV32-NEXT:    sw a1, 24(sp)
1935; RV32-NEXT:    sw a1, 28(sp)
1936; RV32-NEXT:    lui a1, 209715
1937; RV32-NEXT:    addi a1, a1, 819
1938; RV32-NEXT:    sw a1, 16(sp)
1939; RV32-NEXT:    sw a1, 20(sp)
1940; RV32-NEXT:    lui a1, 61681
1941; RV32-NEXT:    addi a1, a1, -241
1942; RV32-NEXT:    sw a1, 8(sp)
1943; RV32-NEXT:    sw a1, 12(sp)
1944; RV32-NEXT:    lui a1, 4112
1945; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1946; RV32-NEXT:    vsrl.vi v0, v8, 1
1947; RV32-NEXT:    addi a1, a1, 257
1948; RV32-NEXT:    sw a1, 0(sp)
1949; RV32-NEXT:    sw a1, 4(sp)
1950; RV32-NEXT:    addi a1, sp, 24
1951; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1952; RV32-NEXT:    vlse64.v v24, (a1), zero
1953; RV32-NEXT:    addi a1, sp, 16
1954; RV32-NEXT:    vlse64.v v16, (a1), zero
1955; RV32-NEXT:    li a1, 32
1956; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1957; RV32-NEXT:    vor.vv v8, v8, v0
1958; RV32-NEXT:    vsrl.vi v0, v8, 2
1959; RV32-NEXT:    vor.vv v8, v8, v0
1960; RV32-NEXT:    vsrl.vi v0, v8, 4
1961; RV32-NEXT:    vor.vv v8, v8, v0
1962; RV32-NEXT:    vsrl.vi v0, v8, 8
1963; RV32-NEXT:    vor.vv v8, v8, v0
1964; RV32-NEXT:    vsrl.vi v0, v8, 16
1965; RV32-NEXT:    vor.vv v8, v8, v0
1966; RV32-NEXT:    vsrl.vx v0, v8, a1
1967; RV32-NEXT:    addi a1, sp, 8
1968; RV32-NEXT:    vor.vv v8, v8, v0
1969; RV32-NEXT:    vnot.v v0, v8
1970; RV32-NEXT:    vsrl.vi v8, v0, 1
1971; RV32-NEXT:    vand.vv v24, v8, v24
1972; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1973; RV32-NEXT:    vlse64.v v8, (a1), zero
1974; RV32-NEXT:    mv a1, sp
1975; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1976; RV32-NEXT:    vsub.vv v24, v0, v24
1977; RV32-NEXT:    vand.vv v0, v24, v16
1978; RV32-NEXT:    vsrl.vi v24, v24, 2
1979; RV32-NEXT:    vand.vv v16, v24, v16
1980; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1981; RV32-NEXT:    vlse64.v v24, (a1), zero
1982; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1983; RV32-NEXT:    vadd.vv v16, v0, v16
1984; RV32-NEXT:    vsrl.vi v0, v16, 4
1985; RV32-NEXT:    vadd.vv v16, v16, v0
1986; RV32-NEXT:    vand.vv v8, v16, v8
1987; RV32-NEXT:    vmul.vv v8, v8, v24
1988; RV32-NEXT:    li a0, 56
1989; RV32-NEXT:    vsrl.vx v8, v8, a0
1990; RV32-NEXT:    addi sp, sp, 32
1991; RV32-NEXT:    .cfi_def_cfa_offset 0
1992; RV32-NEXT:    ret
1993;
1994; RV64-LABEL: vp_ctlz_v16i64_unmasked:
1995; RV64:       # %bb.0:
1996; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1997; RV64-NEXT:    vsrl.vi v16, v8, 1
1998; RV64-NEXT:    lui a0, 349525
1999; RV64-NEXT:    lui a1, 209715
2000; RV64-NEXT:    lui a2, 61681
2001; RV64-NEXT:    lui a3, 4112
2002; RV64-NEXT:    addiw a0, a0, 1365
2003; RV64-NEXT:    addiw a1, a1, 819
2004; RV64-NEXT:    addiw a2, a2, -241
2005; RV64-NEXT:    addiw a3, a3, 257
2006; RV64-NEXT:    slli a4, a0, 32
2007; RV64-NEXT:    add a0, a0, a4
2008; RV64-NEXT:    slli a4, a1, 32
2009; RV64-NEXT:    add a1, a1, a4
2010; RV64-NEXT:    slli a4, a2, 32
2011; RV64-NEXT:    add a2, a2, a4
2012; RV64-NEXT:    slli a4, a3, 32
2013; RV64-NEXT:    add a3, a3, a4
2014; RV64-NEXT:    li a4, 32
2015; RV64-NEXT:    vor.vv v8, v8, v16
2016; RV64-NEXT:    vsrl.vi v16, v8, 2
2017; RV64-NEXT:    vor.vv v8, v8, v16
2018; RV64-NEXT:    vsrl.vi v16, v8, 4
2019; RV64-NEXT:    vor.vv v8, v8, v16
2020; RV64-NEXT:    vsrl.vi v16, v8, 8
2021; RV64-NEXT:    vor.vv v8, v8, v16
2022; RV64-NEXT:    vsrl.vi v16, v8, 16
2023; RV64-NEXT:    vor.vv v8, v8, v16
2024; RV64-NEXT:    vsrl.vx v16, v8, a4
2025; RV64-NEXT:    vor.vv v8, v8, v16
2026; RV64-NEXT:    vnot.v v8, v8
2027; RV64-NEXT:    vsrl.vi v16, v8, 1
2028; RV64-NEXT:    vand.vx v16, v16, a0
2029; RV64-NEXT:    vsub.vv v8, v8, v16
2030; RV64-NEXT:    vand.vx v16, v8, a1
2031; RV64-NEXT:    vsrl.vi v8, v8, 2
2032; RV64-NEXT:    vand.vx v8, v8, a1
2033; RV64-NEXT:    vadd.vv v8, v16, v8
2034; RV64-NEXT:    vsrl.vi v16, v8, 4
2035; RV64-NEXT:    vadd.vv v8, v8, v16
2036; RV64-NEXT:    vand.vx v8, v8, a2
2037; RV64-NEXT:    vmul.vx v8, v8, a3
2038; RV64-NEXT:    li a0, 56
2039; RV64-NEXT:    vsrl.vx v8, v8, a0
2040; RV64-NEXT:    ret
2041  %v = call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> %va, i1 false, <16 x i1> splat (i1 true), i32 %evl)
2042  ret <16 x i64> %v
2043}
2044
2045declare <32 x i64> @llvm.vp.ctlz.v32i64(<32 x i64>, i1 immarg, <32 x i1>, i32)
2046
2047define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) {
2048; RV32-LABEL: vp_ctlz_v32i64:
2049; RV32:       # %bb.0:
2050; RV32-NEXT:    addi sp, sp, -48
2051; RV32-NEXT:    .cfi_def_cfa_offset 48
2052; RV32-NEXT:    csrr a1, vlenb
2053; RV32-NEXT:    li a2, 56
2054; RV32-NEXT:    mul a1, a1, a2
2055; RV32-NEXT:    sub sp, sp, a1
2056; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb
2057; RV32-NEXT:    csrr a1, vlenb
2058; RV32-NEXT:    slli a1, a1, 4
2059; RV32-NEXT:    add a1, sp, a1
2060; RV32-NEXT:    addi a1, a1, 48
2061; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
2062; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
2063; RV32-NEXT:    vslidedown.vi v24, v0, 2
2064; RV32-NEXT:    lui a1, 349525
2065; RV32-NEXT:    lui a2, 209715
2066; RV32-NEXT:    addi a1, a1, 1365
2067; RV32-NEXT:    sw a1, 40(sp)
2068; RV32-NEXT:    sw a1, 44(sp)
2069; RV32-NEXT:    lui a1, 61681
2070; RV32-NEXT:    addi a2, a2, 819
2071; RV32-NEXT:    sw a2, 32(sp)
2072; RV32-NEXT:    sw a2, 36(sp)
2073; RV32-NEXT:    lui a2, 4112
2074; RV32-NEXT:    addi a1, a1, -241
2075; RV32-NEXT:    sw a1, 24(sp)
2076; RV32-NEXT:    sw a1, 28(sp)
2077; RV32-NEXT:    li a1, 16
2078; RV32-NEXT:    addi a2, a2, 257
2079; RV32-NEXT:    sw a2, 16(sp)
2080; RV32-NEXT:    sw a2, 20(sp)
2081; RV32-NEXT:    mv a2, a0
2082; RV32-NEXT:    bltu a0, a1, .LBB34_2
2083; RV32-NEXT:  # %bb.1:
2084; RV32-NEXT:    li a2, 16
2085; RV32-NEXT:  .LBB34_2:
2086; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2087; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
2088; RV32-NEXT:    li a1, 32
2089; RV32-NEXT:    addi a3, sp, 40
2090; RV32-NEXT:    addi a4, sp, 32
2091; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
2092; RV32-NEXT:    vsrl.vi v16, v8, 2, v0.t
2093; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
2094; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
2095; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
2096; RV32-NEXT:    vsrl.vi v16, v8, 8, v0.t
2097; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
2098; RV32-NEXT:    vsrl.vi v16, v8, 16, v0.t
2099; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
2100; RV32-NEXT:    vsrl.vx v16, v8, a1, v0.t
2101; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
2102; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2103; RV32-NEXT:    vlse64.v v16, (a3), zero
2104; RV32-NEXT:    csrr a3, vlenb
2105; RV32-NEXT:    li a5, 40
2106; RV32-NEXT:    mul a3, a3, a5
2107; RV32-NEXT:    add a3, sp, a3
2108; RV32-NEXT:    addi a3, a3, 48
2109; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
2110; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2111; RV32-NEXT:    vnot.v v16, v8, v0.t
2112; RV32-NEXT:    csrr a3, vlenb
2113; RV32-NEXT:    slli a3, a3, 5
2114; RV32-NEXT:    add a3, sp, a3
2115; RV32-NEXT:    addi a3, a3, 48
2116; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
2117; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2118; RV32-NEXT:    vlse64.v v8, (a4), zero
2119; RV32-NEXT:    csrr a3, vlenb
2120; RV32-NEXT:    li a4, 48
2121; RV32-NEXT:    mul a3, a3, a4
2122; RV32-NEXT:    add a3, sp, a3
2123; RV32-NEXT:    addi a3, a3, 48
2124; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
2125; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2126; RV32-NEXT:    vsrl.vi v8, v16, 1, v0.t
2127; RV32-NEXT:    csrr a3, vlenb
2128; RV32-NEXT:    li a4, 24
2129; RV32-NEXT:    mul a3, a3, a4
2130; RV32-NEXT:    add a3, sp, a3
2131; RV32-NEXT:    addi a3, a3, 48
2132; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
2133; RV32-NEXT:    csrr a3, vlenb
2134; RV32-NEXT:    li a4, 40
2135; RV32-NEXT:    mul a3, a3, a4
2136; RV32-NEXT:    add a3, sp, a3
2137; RV32-NEXT:    addi a3, a3, 48
2138; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
2139; RV32-NEXT:    csrr a3, vlenb
2140; RV32-NEXT:    li a4, 24
2141; RV32-NEXT:    mul a3, a3, a4
2142; RV32-NEXT:    add a3, sp, a3
2143; RV32-NEXT:    addi a3, a3, 48
2144; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
2145; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
2146; RV32-NEXT:    csrr a3, vlenb
2147; RV32-NEXT:    slli a3, a3, 5
2148; RV32-NEXT:    add a3, sp, a3
2149; RV32-NEXT:    addi a3, a3, 48
2150; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
2151; RV32-NEXT:    vsub.vv v16, v16, v8, v0.t
2152; RV32-NEXT:    csrr a3, vlenb
2153; RV32-NEXT:    slli a3, a3, 5
2154; RV32-NEXT:    add a3, sp, a3
2155; RV32-NEXT:    addi a3, a3, 48
2156; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
2157; RV32-NEXT:    csrr a3, vlenb
2158; RV32-NEXT:    slli a3, a3, 5
2159; RV32-NEXT:    add a3, sp, a3
2160; RV32-NEXT:    addi a3, a3, 48
2161; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
2162; RV32-NEXT:    csrr a3, vlenb
2163; RV32-NEXT:    li a4, 48
2164; RV32-NEXT:    mul a3, a3, a4
2165; RV32-NEXT:    add a3, sp, a3
2166; RV32-NEXT:    addi a3, a3, 48
2167; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
2168; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
2169; RV32-NEXT:    csrr a3, vlenb
2170; RV32-NEXT:    li a4, 24
2171; RV32-NEXT:    mul a3, a3, a4
2172; RV32-NEXT:    add a3, sp, a3
2173; RV32-NEXT:    addi a3, a3, 48
2174; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
2175; RV32-NEXT:    csrr a3, vlenb
2176; RV32-NEXT:    slli a3, a3, 5
2177; RV32-NEXT:    add a3, sp, a3
2178; RV32-NEXT:    addi a3, a3, 48
2179; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
2180; RV32-NEXT:    vsrl.vi v16, v16, 2, v0.t
2181; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
2182; RV32-NEXT:    csrr a3, vlenb
2183; RV32-NEXT:    li a4, 24
2184; RV32-NEXT:    mul a3, a3, a4
2185; RV32-NEXT:    add a3, sp, a3
2186; RV32-NEXT:    addi a3, a3, 48
2187; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
2188; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
2189; RV32-NEXT:    csrr a3, vlenb
2190; RV32-NEXT:    slli a3, a3, 3
2191; RV32-NEXT:    add a3, sp, a3
2192; RV32-NEXT:    addi a3, a3, 48
2193; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
2194; RV32-NEXT:    addi a3, sp, 24
2195; RV32-NEXT:    addi a4, sp, 16
2196; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2197; RV32-NEXT:    vlse64.v v8, (a3), zero
2198; RV32-NEXT:    csrr a3, vlenb
2199; RV32-NEXT:    li a5, 24
2200; RV32-NEXT:    mul a3, a3, a5
2201; RV32-NEXT:    add a3, sp, a3
2202; RV32-NEXT:    addi a3, a3, 48
2203; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
2204; RV32-NEXT:    vlse64.v v8, (a4), zero
2205; RV32-NEXT:    csrr a3, vlenb
2206; RV32-NEXT:    slli a3, a3, 5
2207; RV32-NEXT:    add a3, sp, a3
2208; RV32-NEXT:    addi a3, a3, 48
2209; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
2210; RV32-NEXT:    csrr a3, vlenb
2211; RV32-NEXT:    slli a3, a3, 3
2212; RV32-NEXT:    add a3, sp, a3
2213; RV32-NEXT:    addi a3, a3, 48
2214; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
2215; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2216; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
2217; RV32-NEXT:    addi a2, sp, 48
2218; RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
2219; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
2220; RV32-NEXT:    vadd.vv v16, v8, v16, v0.t
2221; RV32-NEXT:    csrr a2, vlenb
2222; RV32-NEXT:    li a3, 24
2223; RV32-NEXT:    mul a2, a2, a3
2224; RV32-NEXT:    add a2, sp, a2
2225; RV32-NEXT:    addi a2, a2, 48
2226; RV32-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
2227; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
2228; RV32-NEXT:    csrr a2, vlenb
2229; RV32-NEXT:    slli a2, a2, 5
2230; RV32-NEXT:    add a2, sp, a2
2231; RV32-NEXT:    addi a2, a2, 48
2232; RV32-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
2233; RV32-NEXT:    vmul.vv v8, v16, v8, v0.t
2234; RV32-NEXT:    li a2, 56
2235; RV32-NEXT:    vsrl.vx v8, v8, a2, v0.t
2236; RV32-NEXT:    csrr a3, vlenb
2237; RV32-NEXT:    slli a3, a3, 3
2238; RV32-NEXT:    add a3, sp, a3
2239; RV32-NEXT:    addi a3, a3, 48
2240; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
2241; RV32-NEXT:    addi a3, a0, -16
2242; RV32-NEXT:    sltu a0, a0, a3
2243; RV32-NEXT:    addi a0, a0, -1
2244; RV32-NEXT:    and a0, a0, a3
2245; RV32-NEXT:    vmv1r.v v0, v24
2246; RV32-NEXT:    csrr a3, vlenb
2247; RV32-NEXT:    slli a3, a3, 4
2248; RV32-NEXT:    add a3, sp, a3
2249; RV32-NEXT:    addi a3, a3, 48
2250; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
2251; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2252; RV32-NEXT:    vsrl.vi v8, v16, 1, v0.t
2253; RV32-NEXT:    vor.vv v8, v16, v8, v0.t
2254; RV32-NEXT:    vsrl.vi v16, v8, 2, v0.t
2255; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
2256; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
2257; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
2258; RV32-NEXT:    vsrl.vi v16, v8, 8, v0.t
2259; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
2260; RV32-NEXT:    vsrl.vi v16, v8, 16, v0.t
2261; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
2262; RV32-NEXT:    vsrl.vx v16, v8, a1, v0.t
2263; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
2264; RV32-NEXT:    vnot.v v8, v8, v0.t
2265; RV32-NEXT:    addi a0, sp, 48
2266; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
2267; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
2268; RV32-NEXT:    csrr a0, vlenb
2269; RV32-NEXT:    slli a0, a0, 4
2270; RV32-NEXT:    add a0, sp, a0
2271; RV32-NEXT:    addi a0, a0, 48
2272; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
2273; RV32-NEXT:    csrr a0, vlenb
2274; RV32-NEXT:    li a1, 40
2275; RV32-NEXT:    mul a0, a0, a1
2276; RV32-NEXT:    add a0, sp, a0
2277; RV32-NEXT:    addi a0, a0, 48
2278; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
2279; RV32-NEXT:    csrr a0, vlenb
2280; RV32-NEXT:    slli a0, a0, 4
2281; RV32-NEXT:    add a0, sp, a0
2282; RV32-NEXT:    addi a0, a0, 48
2283; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
2284; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
2285; RV32-NEXT:    addi a0, sp, 48
2286; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
2287; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
2288; RV32-NEXT:    csrr a0, vlenb
2289; RV32-NEXT:    li a1, 40
2290; RV32-NEXT:    mul a0, a0, a1
2291; RV32-NEXT:    add a0, sp, a0
2292; RV32-NEXT:    addi a0, a0, 48
2293; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
2294; RV32-NEXT:    csrr a0, vlenb
2295; RV32-NEXT:    li a1, 48
2296; RV32-NEXT:    mul a0, a0, a1
2297; RV32-NEXT:    add a0, sp, a0
2298; RV32-NEXT:    addi a0, a0, 48
2299; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
2300; RV32-NEXT:    csrr a0, vlenb
2301; RV32-NEXT:    li a1, 40
2302; RV32-NEXT:    mul a0, a0, a1
2303; RV32-NEXT:    add a0, sp, a0
2304; RV32-NEXT:    addi a0, a0, 48
2305; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
2306; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
2307; RV32-NEXT:    csrr a0, vlenb
2308; RV32-NEXT:    slli a0, a0, 4
2309; RV32-NEXT:    add a0, sp, a0
2310; RV32-NEXT:    addi a0, a0, 48
2311; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
2312; RV32-NEXT:    csrr a0, vlenb
2313; RV32-NEXT:    li a1, 40
2314; RV32-NEXT:    mul a0, a0, a1
2315; RV32-NEXT:    add a0, sp, a0
2316; RV32-NEXT:    addi a0, a0, 48
2317; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
2318; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
2319; RV32-NEXT:    csrr a0, vlenb
2320; RV32-NEXT:    li a1, 48
2321; RV32-NEXT:    mul a0, a0, a1
2322; RV32-NEXT:    add a0, sp, a0
2323; RV32-NEXT:    addi a0, a0, 48
2324; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
2325; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
2326; RV32-NEXT:    csrr a0, vlenb
2327; RV32-NEXT:    slli a0, a0, 4
2328; RV32-NEXT:    add a0, sp, a0
2329; RV32-NEXT:    addi a0, a0, 48
2330; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
2331; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
2332; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
2333; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
2334; RV32-NEXT:    csrr a0, vlenb
2335; RV32-NEXT:    li a1, 24
2336; RV32-NEXT:    mul a0, a0, a1
2337; RV32-NEXT:    add a0, sp, a0
2338; RV32-NEXT:    addi a0, a0, 48
2339; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
2340; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
2341; RV32-NEXT:    csrr a0, vlenb
2342; RV32-NEXT:    slli a0, a0, 5
2343; RV32-NEXT:    add a0, sp, a0
2344; RV32-NEXT:    addi a0, a0, 48
2345; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
2346; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
2347; RV32-NEXT:    vsrl.vx v16, v8, a2, v0.t
2348; RV32-NEXT:    csrr a0, vlenb
2349; RV32-NEXT:    slli a0, a0, 3
2350; RV32-NEXT:    add a0, sp, a0
2351; RV32-NEXT:    addi a0, a0, 48
2352; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
2353; RV32-NEXT:    csrr a0, vlenb
2354; RV32-NEXT:    li a1, 56
2355; RV32-NEXT:    mul a0, a0, a1
2356; RV32-NEXT:    add sp, sp, a0
2357; RV32-NEXT:    .cfi_def_cfa sp, 48
2358; RV32-NEXT:    addi sp, sp, 48
2359; RV32-NEXT:    .cfi_def_cfa_offset 0
2360; RV32-NEXT:    ret
2361;
2362; RV64-LABEL: vp_ctlz_v32i64:
2363; RV64:       # %bb.0:
2364; RV64-NEXT:    addi sp, sp, -16
2365; RV64-NEXT:    .cfi_def_cfa_offset 16
2366; RV64-NEXT:    csrr a1, vlenb
2367; RV64-NEXT:    slli a1, a1, 4
2368; RV64-NEXT:    sub sp, sp, a1
2369; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
2370; RV64-NEXT:    csrr a1, vlenb
2371; RV64-NEXT:    slli a1, a1, 3
2372; RV64-NEXT:    add a1, sp, a1
2373; RV64-NEXT:    addi a1, a1, 16
2374; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
2375; RV64-NEXT:    li a2, 16
2376; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
2377; RV64-NEXT:    vslidedown.vi v24, v0, 2
2378; RV64-NEXT:    mv a1, a0
2379; RV64-NEXT:    bltu a0, a2, .LBB34_2
2380; RV64-NEXT:  # %bb.1:
2381; RV64-NEXT:    li a1, 16
2382; RV64-NEXT:  .LBB34_2:
2383; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2384; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
2385; RV64-NEXT:    li a1, 32
2386; RV64-NEXT:    lui a2, 349525
2387; RV64-NEXT:    lui a3, 209715
2388; RV64-NEXT:    lui a4, 61681
2389; RV64-NEXT:    lui a5, 4112
2390; RV64-NEXT:    addiw a2, a2, 1365
2391; RV64-NEXT:    addiw a3, a3, 819
2392; RV64-NEXT:    addiw a6, a4, -241
2393; RV64-NEXT:    addiw a7, a5, 257
2394; RV64-NEXT:    slli a5, a2, 32
2395; RV64-NEXT:    add a5, a2, a5
2396; RV64-NEXT:    slli a4, a3, 32
2397; RV64-NEXT:    add a4, a3, a4
2398; RV64-NEXT:    slli a2, a6, 32
2399; RV64-NEXT:    add a2, a6, a2
2400; RV64-NEXT:    slli a3, a7, 32
2401; RV64-NEXT:    add a3, a7, a3
2402; RV64-NEXT:    addi a6, a0, -16
2403; RV64-NEXT:    sltu a0, a0, a6
2404; RV64-NEXT:    addi a0, a0, -1
2405; RV64-NEXT:    and a6, a0, a6
2406; RV64-NEXT:    li a0, 56
2407; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
2408; RV64-NEXT:    vsrl.vi v16, v8, 2, v0.t
2409; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
2410; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
2411; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
2412; RV64-NEXT:    vsrl.vi v16, v8, 8, v0.t
2413; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
2414; RV64-NEXT:    vsrl.vi v16, v8, 16, v0.t
2415; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
2416; RV64-NEXT:    vsrl.vx v16, v8, a1, v0.t
2417; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
2418; RV64-NEXT:    vnot.v v8, v8, v0.t
2419; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
2420; RV64-NEXT:    vand.vx v16, v16, a5, v0.t
2421; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
2422; RV64-NEXT:    vand.vx v16, v8, a4, v0.t
2423; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
2424; RV64-NEXT:    vand.vx v8, v8, a4, v0.t
2425; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
2426; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
2427; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
2428; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
2429; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
2430; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
2431; RV64-NEXT:    addi a7, sp, 16
2432; RV64-NEXT:    vs8r.v v8, (a7) # Unknown-size Folded Spill
2433; RV64-NEXT:    vmv1r.v v0, v24
2434; RV64-NEXT:    csrr a7, vlenb
2435; RV64-NEXT:    slli a7, a7, 3
2436; RV64-NEXT:    add a7, sp, a7
2437; RV64-NEXT:    addi a7, a7, 16
2438; RV64-NEXT:    vl8r.v v8, (a7) # Unknown-size Folded Reload
2439; RV64-NEXT:    vsetvli zero, a6, e64, m8, ta, ma
2440; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
2441; RV64-NEXT:    vor.vv v16, v8, v16, v0.t
2442; RV64-NEXT:    vsrl.vi v8, v16, 2, v0.t
2443; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
2444; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
2445; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
2446; RV64-NEXT:    vsrl.vi v16, v8, 8, v0.t
2447; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
2448; RV64-NEXT:    vsrl.vi v16, v8, 16, v0.t
2449; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
2450; RV64-NEXT:    vsrl.vx v16, v8, a1, v0.t
2451; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
2452; RV64-NEXT:    vnot.v v8, v8, v0.t
2453; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
2454; RV64-NEXT:    vand.vx v16, v16, a5, v0.t
2455; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
2456; RV64-NEXT:    vand.vx v16, v8, a4, v0.t
2457; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
2458; RV64-NEXT:    vand.vx v8, v8, a4, v0.t
2459; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
2460; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
2461; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
2462; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
2463; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
2464; RV64-NEXT:    vsrl.vx v16, v8, a0, v0.t
2465; RV64-NEXT:    addi a0, sp, 16
2466; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
2467; RV64-NEXT:    csrr a0, vlenb
2468; RV64-NEXT:    slli a0, a0, 4
2469; RV64-NEXT:    add sp, sp, a0
2470; RV64-NEXT:    .cfi_def_cfa sp, 16
2471; RV64-NEXT:    addi sp, sp, 16
2472; RV64-NEXT:    .cfi_def_cfa_offset 0
2473; RV64-NEXT:    ret
2474  %v = call <32 x i64> @llvm.vp.ctlz.v32i64(<32 x i64> %va, i1 false, <32 x i1> %m, i32 %evl)
2475  ret <32 x i64> %v
2476}
2477
2478define <32 x i64> @vp_ctlz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
2479; RV32-LABEL: vp_ctlz_v32i64_unmasked:
2480; RV32:       # %bb.0:
2481; RV32-NEXT:    addi sp, sp, -48
2482; RV32-NEXT:    .cfi_def_cfa_offset 48
2483; RV32-NEXT:    csrr a1, vlenb
2484; RV32-NEXT:    slli a1, a1, 4
2485; RV32-NEXT:    sub sp, sp, a1
2486; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb
2487; RV32-NEXT:    lui a1, 349525
2488; RV32-NEXT:    lui a2, 209715
2489; RV32-NEXT:    addi a1, a1, 1365
2490; RV32-NEXT:    sw a1, 40(sp)
2491; RV32-NEXT:    sw a1, 44(sp)
2492; RV32-NEXT:    lui a1, 61681
2493; RV32-NEXT:    addi a2, a2, 819
2494; RV32-NEXT:    sw a2, 32(sp)
2495; RV32-NEXT:    sw a2, 36(sp)
2496; RV32-NEXT:    lui a2, 4112
2497; RV32-NEXT:    addi a1, a1, -241
2498; RV32-NEXT:    sw a1, 24(sp)
2499; RV32-NEXT:    sw a1, 28(sp)
2500; RV32-NEXT:    li a3, 16
2501; RV32-NEXT:    addi a1, a2, 257
2502; RV32-NEXT:    sw a1, 16(sp)
2503; RV32-NEXT:    sw a1, 20(sp)
2504; RV32-NEXT:    mv a1, a0
2505; RV32-NEXT:    bltu a0, a3, .LBB35_2
2506; RV32-NEXT:  # %bb.1:
2507; RV32-NEXT:    li a1, 16
2508; RV32-NEXT:  .LBB35_2:
2509; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2510; RV32-NEXT:    vsrl.vi v0, v8, 1
2511; RV32-NEXT:    li a2, 32
2512; RV32-NEXT:    addi a3, sp, 40
2513; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2514; RV32-NEXT:    vlse64.v v24, (a3), zero
2515; RV32-NEXT:    csrr a3, vlenb
2516; RV32-NEXT:    slli a3, a3, 3
2517; RV32-NEXT:    add a3, sp, a3
2518; RV32-NEXT:    addi a3, a3, 48
2519; RV32-NEXT:    vs8r.v v24, (a3) # Unknown-size Folded Spill
2520; RV32-NEXT:    addi a3, a0, -16
2521; RV32-NEXT:    sltu a0, a0, a3
2522; RV32-NEXT:    addi a0, a0, -1
2523; RV32-NEXT:    and a0, a0, a3
2524; RV32-NEXT:    addi a3, sp, 32
2525; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2526; RV32-NEXT:    vor.vv v8, v8, v0
2527; RV32-NEXT:    vsrl.vi v0, v8, 2
2528; RV32-NEXT:    vor.vv v8, v8, v0
2529; RV32-NEXT:    vsrl.vi v0, v8, 4
2530; RV32-NEXT:    vor.vv v8, v8, v0
2531; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2532; RV32-NEXT:    vsrl.vi v0, v16, 1
2533; RV32-NEXT:    vor.vv v16, v16, v0
2534; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2535; RV32-NEXT:    vsrl.vi v0, v8, 8
2536; RV32-NEXT:    vor.vv v8, v8, v0
2537; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2538; RV32-NEXT:    vsrl.vi v0, v16, 2
2539; RV32-NEXT:    vor.vv v16, v16, v0
2540; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2541; RV32-NEXT:    vsrl.vi v0, v8, 16
2542; RV32-NEXT:    vor.vv v8, v8, v0
2543; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2544; RV32-NEXT:    vsrl.vi v0, v16, 4
2545; RV32-NEXT:    vor.vv v16, v16, v0
2546; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2547; RV32-NEXT:    vsrl.vx v0, v8, a2
2548; RV32-NEXT:    vor.vv v8, v8, v0
2549; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2550; RV32-NEXT:    vsrl.vi v0, v16, 8
2551; RV32-NEXT:    vor.vv v16, v16, v0
2552; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2553; RV32-NEXT:    vnot.v v0, v8
2554; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2555; RV32-NEXT:    vsrl.vi v8, v16, 16
2556; RV32-NEXT:    vor.vv v16, v16, v8
2557; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2558; RV32-NEXT:    vsrl.vi v8, v0, 1
2559; RV32-NEXT:    csrr a4, vlenb
2560; RV32-NEXT:    slli a4, a4, 3
2561; RV32-NEXT:    add a4, sp, a4
2562; RV32-NEXT:    addi a4, a4, 48
2563; RV32-NEXT:    vl8r.v v24, (a4) # Unknown-size Folded Reload
2564; RV32-NEXT:    vand.vv v24, v8, v24
2565; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2566; RV32-NEXT:    vlse64.v v8, (a3), zero
2567; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2568; RV32-NEXT:    vsub.vv v24, v0, v24
2569; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2570; RV32-NEXT:    vsrl.vx v0, v16, a2
2571; RV32-NEXT:    vor.vv v16, v16, v0
2572; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2573; RV32-NEXT:    vand.vv v0, v24, v8
2574; RV32-NEXT:    vsrl.vi v24, v24, 2
2575; RV32-NEXT:    vand.vv v24, v24, v8
2576; RV32-NEXT:    vadd.vv v24, v0, v24
2577; RV32-NEXT:    addi a2, sp, 48
2578; RV32-NEXT:    vs8r.v v24, (a2) # Unknown-size Folded Spill
2579; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2580; RV32-NEXT:    vnot.v v16, v16
2581; RV32-NEXT:    vsrl.vi v0, v16, 1
2582; RV32-NEXT:    csrr a2, vlenb
2583; RV32-NEXT:    slli a2, a2, 3
2584; RV32-NEXT:    add a2, sp, a2
2585; RV32-NEXT:    addi a2, a2, 48
2586; RV32-NEXT:    vl8r.v v24, (a2) # Unknown-size Folded Reload
2587; RV32-NEXT:    vand.vv v0, v0, v24
2588; RV32-NEXT:    addi a2, sp, 24
2589; RV32-NEXT:    addi a3, sp, 16
2590; RV32-NEXT:    vsub.vv v0, v16, v0
2591; RV32-NEXT:    addi a4, sp, 48
2592; RV32-NEXT:    vl8r.v v24, (a4) # Unknown-size Folded Reload
2593; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2594; RV32-NEXT:    vsrl.vi v16, v24, 4
2595; RV32-NEXT:    vadd.vv v16, v24, v16
2596; RV32-NEXT:    csrr a4, vlenb
2597; RV32-NEXT:    slli a4, a4, 3
2598; RV32-NEXT:    add a4, sp, a4
2599; RV32-NEXT:    addi a4, a4, 48
2600; RV32-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
2601; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2602; RV32-NEXT:    vand.vv v24, v0, v8
2603; RV32-NEXT:    vsrl.vi v0, v0, 2
2604; RV32-NEXT:    vand.vv v8, v0, v8
2605; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2606; RV32-NEXT:    vlse64.v v0, (a2), zero
2607; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2608; RV32-NEXT:    vadd.vv v8, v24, v8
2609; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2610; RV32-NEXT:    vlse64.v v24, (a3), zero
2611; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2612; RV32-NEXT:    vsrl.vi v16, v8, 4
2613; RV32-NEXT:    vadd.vv v8, v8, v16
2614; RV32-NEXT:    csrr a2, vlenb
2615; RV32-NEXT:    slli a2, a2, 3
2616; RV32-NEXT:    add a2, sp, a2
2617; RV32-NEXT:    addi a2, a2, 48
2618; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
2619; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2620; RV32-NEXT:    vand.vv v16, v16, v0
2621; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2622; RV32-NEXT:    vand.vv v8, v8, v0
2623; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2624; RV32-NEXT:    vmul.vv v16, v16, v24
2625; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2626; RV32-NEXT:    vmul.vv v24, v8, v24
2627; RV32-NEXT:    li a2, 56
2628; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2629; RV32-NEXT:    vsrl.vx v8, v16, a2
2630; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2631; RV32-NEXT:    vsrl.vx v16, v24, a2
2632; RV32-NEXT:    csrr a0, vlenb
2633; RV32-NEXT:    slli a0, a0, 4
2634; RV32-NEXT:    add sp, sp, a0
2635; RV32-NEXT:    .cfi_def_cfa sp, 48
2636; RV32-NEXT:    addi sp, sp, 48
2637; RV32-NEXT:    .cfi_def_cfa_offset 0
2638; RV32-NEXT:    ret
2639;
2640; RV64-LABEL: vp_ctlz_v32i64_unmasked:
2641; RV64:       # %bb.0:
2642; RV64-NEXT:    li a2, 16
2643; RV64-NEXT:    mv a1, a0
2644; RV64-NEXT:    bltu a0, a2, .LBB35_2
2645; RV64-NEXT:  # %bb.1:
2646; RV64-NEXT:    li a1, 16
2647; RV64-NEXT:  .LBB35_2:
2648; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2649; RV64-NEXT:    vsrl.vi v24, v8, 1
2650; RV64-NEXT:    li a2, 32
2651; RV64-NEXT:    lui a3, 349525
2652; RV64-NEXT:    lui a4, 209715
2653; RV64-NEXT:    lui a5, 61681
2654; RV64-NEXT:    lui a6, 4112
2655; RV64-NEXT:    addiw a7, a3, 1365
2656; RV64-NEXT:    addiw a3, a4, 819
2657; RV64-NEXT:    addiw a4, a5, -241
2658; RV64-NEXT:    addiw a6, a6, 257
2659; RV64-NEXT:    slli a5, a7, 32
2660; RV64-NEXT:    add a7, a7, a5
2661; RV64-NEXT:    slli a5, a3, 32
2662; RV64-NEXT:    add a5, a3, a5
2663; RV64-NEXT:    slli a3, a4, 32
2664; RV64-NEXT:    add a3, a4, a3
2665; RV64-NEXT:    slli a4, a6, 32
2666; RV64-NEXT:    add a4, a6, a4
2667; RV64-NEXT:    addi a6, a0, -16
2668; RV64-NEXT:    sltu a0, a0, a6
2669; RV64-NEXT:    addi a0, a0, -1
2670; RV64-NEXT:    and a6, a0, a6
2671; RV64-NEXT:    li a0, 56
2672; RV64-NEXT:    vor.vv v8, v8, v24
2673; RV64-NEXT:    vsrl.vi v24, v8, 2
2674; RV64-NEXT:    vor.vv v8, v8, v24
2675; RV64-NEXT:    vsrl.vi v24, v8, 4
2676; RV64-NEXT:    vor.vv v8, v8, v24
2677; RV64-NEXT:    vsetvli zero, a6, e64, m8, ta, ma
2678; RV64-NEXT:    vsrl.vi v24, v16, 1
2679; RV64-NEXT:    vor.vv v16, v16, v24
2680; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2681; RV64-NEXT:    vsrl.vi v24, v8, 8
2682; RV64-NEXT:    vor.vv v8, v8, v24
2683; RV64-NEXT:    vsetvli zero, a6, e64, m8, ta, ma
2684; RV64-NEXT:    vsrl.vi v24, v16, 2
2685; RV64-NEXT:    vor.vv v16, v16, v24
2686; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2687; RV64-NEXT:    vsrl.vi v24, v8, 16
2688; RV64-NEXT:    vor.vv v8, v8, v24
2689; RV64-NEXT:    vsetvli zero, a6, e64, m8, ta, ma
2690; RV64-NEXT:    vsrl.vi v24, v16, 4
2691; RV64-NEXT:    vor.vv v16, v16, v24
2692; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2693; RV64-NEXT:    vsrl.vx v24, v8, a2
2694; RV64-NEXT:    vor.vv v8, v8, v24
2695; RV64-NEXT:    vsetvli zero, a6, e64, m8, ta, ma
2696; RV64-NEXT:    vsrl.vi v24, v16, 8
2697; RV64-NEXT:    vor.vv v16, v16, v24
2698; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2699; RV64-NEXT:    vnot.v v8, v8
2700; RV64-NEXT:    vsetvli zero, a6, e64, m8, ta, ma
2701; RV64-NEXT:    vsrl.vi v24, v16, 16
2702; RV64-NEXT:    vor.vv v16, v16, v24
2703; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2704; RV64-NEXT:    vsrl.vi v24, v8, 1
2705; RV64-NEXT:    vand.vx v24, v24, a7
2706; RV64-NEXT:    vsub.vv v8, v8, v24
2707; RV64-NEXT:    vsetvli zero, a6, e64, m8, ta, ma
2708; RV64-NEXT:    vsrl.vx v24, v16, a2
2709; RV64-NEXT:    vor.vv v16, v16, v24
2710; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2711; RV64-NEXT:    vand.vx v24, v8, a5
2712; RV64-NEXT:    vsrl.vi v8, v8, 2
2713; RV64-NEXT:    vsetvli zero, a6, e64, m8, ta, ma
2714; RV64-NEXT:    vnot.v v16, v16
2715; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2716; RV64-NEXT:    vand.vx v8, v8, a5
2717; RV64-NEXT:    vadd.vv v8, v24, v8
2718; RV64-NEXT:    vsetvli zero, a6, e64, m8, ta, ma
2719; RV64-NEXT:    vsrl.vi v24, v16, 1
2720; RV64-NEXT:    vand.vx v24, v24, a7
2721; RV64-NEXT:    vsub.vv v16, v16, v24
2722; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2723; RV64-NEXT:    vsrl.vi v24, v8, 4
2724; RV64-NEXT:    vadd.vv v8, v8, v24
2725; RV64-NEXT:    vsetvli zero, a6, e64, m8, ta, ma
2726; RV64-NEXT:    vand.vx v24, v16, a5
2727; RV64-NEXT:    vsrl.vi v16, v16, 2
2728; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2729; RV64-NEXT:    vand.vx v8, v8, a3
2730; RV64-NEXT:    vsetvli zero, a6, e64, m8, ta, ma
2731; RV64-NEXT:    vand.vx v16, v16, a5
2732; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2733; RV64-NEXT:    vmul.vx v8, v8, a4
2734; RV64-NEXT:    vsetvli zero, a6, e64, m8, ta, ma
2735; RV64-NEXT:    vadd.vv v16, v24, v16
2736; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2737; RV64-NEXT:    vsrl.vx v8, v8, a0
2738; RV64-NEXT:    vsetvli zero, a6, e64, m8, ta, ma
2739; RV64-NEXT:    vsrl.vi v24, v16, 4
2740; RV64-NEXT:    vadd.vv v16, v16, v24
2741; RV64-NEXT:    vand.vx v16, v16, a3
2742; RV64-NEXT:    vmul.vx v16, v16, a4
2743; RV64-NEXT:    vsrl.vx v16, v16, a0
2744; RV64-NEXT:    ret
2745  %v = call <32 x i64> @llvm.vp.ctlz.v32i64(<32 x i64> %va, i1 false, <32 x i1> splat (i1 true), i32 %evl)
2746  ret <32 x i64> %v
2747}
2748
2749define <2 x i8> @vp_ctlz_zero_undef_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) {
2750; CHECK-LABEL: vp_ctlz_zero_undef_v2i8:
2751; CHECK:       # %bb.0:
2752; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
2753; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
2754; CHECK-NEXT:    li a0, 85
2755; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
2756; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
2757; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
2758; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
2759; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
2760; CHECK-NEXT:    vnot.v v8, v8, v0.t
2761; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
2762; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
2763; CHECK-NEXT:    li a0, 51
2764; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
2765; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
2766; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
2767; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
2768; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
2769; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
2770; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
2771; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
2772; CHECK-NEXT:    ret
2773  %v = call <2 x i8> @llvm.vp.ctlz.v2i8(<2 x i8> %va, i1 true, <2 x i1> %m, i32 %evl)
2774  ret <2 x i8> %v
2775}
2776
2777define <2 x i8> @vp_ctlz_zero_undef_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) {
2778; CHECK-LABEL: vp_ctlz_zero_undef_v2i8_unmasked:
2779; CHECK:       # %bb.0:
2780; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
2781; CHECK-NEXT:    vsrl.vi v9, v8, 1
2782; CHECK-NEXT:    li a0, 85
2783; CHECK-NEXT:    vor.vv v8, v8, v9
2784; CHECK-NEXT:    vsrl.vi v9, v8, 2
2785; CHECK-NEXT:    vor.vv v8, v8, v9
2786; CHECK-NEXT:    vsrl.vi v9, v8, 4
2787; CHECK-NEXT:    vor.vv v8, v8, v9
2788; CHECK-NEXT:    vnot.v v8, v8
2789; CHECK-NEXT:    vsrl.vi v9, v8, 1
2790; CHECK-NEXT:    vand.vx v9, v9, a0
2791; CHECK-NEXT:    li a0, 51
2792; CHECK-NEXT:    vsub.vv v8, v8, v9
2793; CHECK-NEXT:    vand.vx v9, v8, a0
2794; CHECK-NEXT:    vsrl.vi v8, v8, 2
2795; CHECK-NEXT:    vand.vx v8, v8, a0
2796; CHECK-NEXT:    vadd.vv v8, v9, v8
2797; CHECK-NEXT:    vsrl.vi v9, v8, 4
2798; CHECK-NEXT:    vadd.vv v8, v8, v9
2799; CHECK-NEXT:    vand.vi v8, v8, 15
2800; CHECK-NEXT:    ret
2801  %v = call <2 x i8> @llvm.vp.ctlz.v2i8(<2 x i8> %va, i1 true, <2 x i1> splat (i1 true), i32 %evl)
2802  ret <2 x i8> %v
2803}
2804
2805define <4 x i8> @vp_ctlz_zero_undef_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) {
2806; CHECK-LABEL: vp_ctlz_zero_undef_v4i8:
2807; CHECK:       # %bb.0:
2808; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
2809; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
2810; CHECK-NEXT:    li a0, 85
2811; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
2812; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
2813; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
2814; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
2815; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
2816; CHECK-NEXT:    vnot.v v8, v8, v0.t
2817; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
2818; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
2819; CHECK-NEXT:    li a0, 51
2820; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
2821; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
2822; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
2823; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
2824; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
2825; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
2826; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
2827; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
2828; CHECK-NEXT:    ret
2829  %v = call <4 x i8> @llvm.vp.ctlz.v4i8(<4 x i8> %va, i1 true, <4 x i1> %m, i32 %evl)
2830  ret <4 x i8> %v
2831}
2832
2833define <4 x i8> @vp_ctlz_zero_undef_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) {
2834; CHECK-LABEL: vp_ctlz_zero_undef_v4i8_unmasked:
2835; CHECK:       # %bb.0:
2836; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
2837; CHECK-NEXT:    vsrl.vi v9, v8, 1
2838; CHECK-NEXT:    li a0, 85
2839; CHECK-NEXT:    vor.vv v8, v8, v9
2840; CHECK-NEXT:    vsrl.vi v9, v8, 2
2841; CHECK-NEXT:    vor.vv v8, v8, v9
2842; CHECK-NEXT:    vsrl.vi v9, v8, 4
2843; CHECK-NEXT:    vor.vv v8, v8, v9
2844; CHECK-NEXT:    vnot.v v8, v8
2845; CHECK-NEXT:    vsrl.vi v9, v8, 1
2846; CHECK-NEXT:    vand.vx v9, v9, a0
2847; CHECK-NEXT:    li a0, 51
2848; CHECK-NEXT:    vsub.vv v8, v8, v9
2849; CHECK-NEXT:    vand.vx v9, v8, a0
2850; CHECK-NEXT:    vsrl.vi v8, v8, 2
2851; CHECK-NEXT:    vand.vx v8, v8, a0
2852; CHECK-NEXT:    vadd.vv v8, v9, v8
2853; CHECK-NEXT:    vsrl.vi v9, v8, 4
2854; CHECK-NEXT:    vadd.vv v8, v8, v9
2855; CHECK-NEXT:    vand.vi v8, v8, 15
2856; CHECK-NEXT:    ret
2857  %v = call <4 x i8> @llvm.vp.ctlz.v4i8(<4 x i8> %va, i1 true, <4 x i1> splat (i1 true), i32 %evl)
2858  ret <4 x i8> %v
2859}
2860
2861define <8 x i8> @vp_ctlz_zero_undef_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) {
2862; CHECK-LABEL: vp_ctlz_zero_undef_v8i8:
2863; CHECK:       # %bb.0:
2864; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
2865; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
2866; CHECK-NEXT:    li a0, 85
2867; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
2868; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
2869; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
2870; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
2871; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
2872; CHECK-NEXT:    vnot.v v8, v8, v0.t
2873; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
2874; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
2875; CHECK-NEXT:    li a0, 51
2876; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
2877; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
2878; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
2879; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
2880; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
2881; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
2882; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
2883; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
2884; CHECK-NEXT:    ret
2885  %v = call <8 x i8> @llvm.vp.ctlz.v8i8(<8 x i8> %va, i1 true, <8 x i1> %m, i32 %evl)
2886  ret <8 x i8> %v
2887}
2888
2889define <8 x i8> @vp_ctlz_zero_undef_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) {
2890; CHECK-LABEL: vp_ctlz_zero_undef_v8i8_unmasked:
2891; CHECK:       # %bb.0:
2892; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
2893; CHECK-NEXT:    vsrl.vi v9, v8, 1
2894; CHECK-NEXT:    li a0, 85
2895; CHECK-NEXT:    vor.vv v8, v8, v9
2896; CHECK-NEXT:    vsrl.vi v9, v8, 2
2897; CHECK-NEXT:    vor.vv v8, v8, v9
2898; CHECK-NEXT:    vsrl.vi v9, v8, 4
2899; CHECK-NEXT:    vor.vv v8, v8, v9
2900; CHECK-NEXT:    vnot.v v8, v8
2901; CHECK-NEXT:    vsrl.vi v9, v8, 1
2902; CHECK-NEXT:    vand.vx v9, v9, a0
2903; CHECK-NEXT:    li a0, 51
2904; CHECK-NEXT:    vsub.vv v8, v8, v9
2905; CHECK-NEXT:    vand.vx v9, v8, a0
2906; CHECK-NEXT:    vsrl.vi v8, v8, 2
2907; CHECK-NEXT:    vand.vx v8, v8, a0
2908; CHECK-NEXT:    vadd.vv v8, v9, v8
2909; CHECK-NEXT:    vsrl.vi v9, v8, 4
2910; CHECK-NEXT:    vadd.vv v8, v8, v9
2911; CHECK-NEXT:    vand.vi v8, v8, 15
2912; CHECK-NEXT:    ret
2913  %v = call <8 x i8> @llvm.vp.ctlz.v8i8(<8 x i8> %va, i1 true, <8 x i1> splat (i1 true), i32 %evl)
2914  ret <8 x i8> %v
2915}
2916
2917define <16 x i8> @vp_ctlz_zero_undef_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) {
2918; CHECK-LABEL: vp_ctlz_zero_undef_v16i8:
2919; CHECK:       # %bb.0:
2920; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
2921; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
2922; CHECK-NEXT:    li a0, 85
2923; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
2924; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
2925; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
2926; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
2927; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
2928; CHECK-NEXT:    vnot.v v8, v8, v0.t
2929; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
2930; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
2931; CHECK-NEXT:    li a0, 51
2932; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
2933; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
2934; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
2935; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
2936; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
2937; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
2938; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
2939; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
2940; CHECK-NEXT:    ret
2941  %v = call <16 x i8> @llvm.vp.ctlz.v16i8(<16 x i8> %va, i1 true, <16 x i1> %m, i32 %evl)
2942  ret <16 x i8> %v
2943}
2944
2945define <16 x i8> @vp_ctlz_zero_undef_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) {
2946; CHECK-LABEL: vp_ctlz_zero_undef_v16i8_unmasked:
2947; CHECK:       # %bb.0:
2948; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
2949; CHECK-NEXT:    vsrl.vi v9, v8, 1
2950; CHECK-NEXT:    li a0, 85
2951; CHECK-NEXT:    vor.vv v8, v8, v9
2952; CHECK-NEXT:    vsrl.vi v9, v8, 2
2953; CHECK-NEXT:    vor.vv v8, v8, v9
2954; CHECK-NEXT:    vsrl.vi v9, v8, 4
2955; CHECK-NEXT:    vor.vv v8, v8, v9
2956; CHECK-NEXT:    vnot.v v8, v8
2957; CHECK-NEXT:    vsrl.vi v9, v8, 1
2958; CHECK-NEXT:    vand.vx v9, v9, a0
2959; CHECK-NEXT:    li a0, 51
2960; CHECK-NEXT:    vsub.vv v8, v8, v9
2961; CHECK-NEXT:    vand.vx v9, v8, a0
2962; CHECK-NEXT:    vsrl.vi v8, v8, 2
2963; CHECK-NEXT:    vand.vx v8, v8, a0
2964; CHECK-NEXT:    vadd.vv v8, v9, v8
2965; CHECK-NEXT:    vsrl.vi v9, v8, 4
2966; CHECK-NEXT:    vadd.vv v8, v8, v9
2967; CHECK-NEXT:    vand.vi v8, v8, 15
2968; CHECK-NEXT:    ret
2969  %v = call <16 x i8> @llvm.vp.ctlz.v16i8(<16 x i8> %va, i1 true, <16 x i1> splat (i1 true), i32 %evl)
2970  ret <16 x i8> %v
2971}
2972
2973define <2 x i16> @vp_ctlz_zero_undef_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) {
2974; CHECK-LABEL: vp_ctlz_zero_undef_v2i16:
2975; CHECK:       # %bb.0:
2976; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
2977; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
2978; CHECK-NEXT:    lui a0, 5
2979; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
2980; CHECK-NEXT:    addi a0, a0, 1365
2981; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
2982; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
2983; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
2984; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
2985; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
2986; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
2987; CHECK-NEXT:    vnot.v v8, v8, v0.t
2988; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
2989; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
2990; CHECK-NEXT:    lui a0, 3
2991; CHECK-NEXT:    addi a0, a0, 819
2992; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
2993; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
2994; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
2995; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
2996; CHECK-NEXT:    lui a0, 1
2997; CHECK-NEXT:    addi a0, a0, -241
2998; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
2999; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
3000; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
3001; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
3002; CHECK-NEXT:    li a0, 257
3003; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
3004; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
3005; CHECK-NEXT:    ret
3006  %v = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> %va, i1 true, <2 x i1> %m, i32 %evl)
3007  ret <2 x i16> %v
3008}
3009
3010define <2 x i16> @vp_ctlz_zero_undef_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
3011; CHECK-LABEL: vp_ctlz_zero_undef_v2i16_unmasked:
3012; CHECK:       # %bb.0:
3013; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
3014; CHECK-NEXT:    vsrl.vi v9, v8, 1
3015; CHECK-NEXT:    lui a0, 5
3016; CHECK-NEXT:    vor.vv v8, v8, v9
3017; CHECK-NEXT:    addi a0, a0, 1365
3018; CHECK-NEXT:    vsrl.vi v9, v8, 2
3019; CHECK-NEXT:    vor.vv v8, v8, v9
3020; CHECK-NEXT:    vsrl.vi v9, v8, 4
3021; CHECK-NEXT:    vor.vv v8, v8, v9
3022; CHECK-NEXT:    vsrl.vi v9, v8, 8
3023; CHECK-NEXT:    vor.vv v8, v8, v9
3024; CHECK-NEXT:    vnot.v v8, v8
3025; CHECK-NEXT:    vsrl.vi v9, v8, 1
3026; CHECK-NEXT:    vand.vx v9, v9, a0
3027; CHECK-NEXT:    lui a0, 3
3028; CHECK-NEXT:    addi a0, a0, 819
3029; CHECK-NEXT:    vsub.vv v8, v8, v9
3030; CHECK-NEXT:    vand.vx v9, v8, a0
3031; CHECK-NEXT:    vsrl.vi v8, v8, 2
3032; CHECK-NEXT:    vand.vx v8, v8, a0
3033; CHECK-NEXT:    lui a0, 1
3034; CHECK-NEXT:    addi a0, a0, -241
3035; CHECK-NEXT:    vadd.vv v8, v9, v8
3036; CHECK-NEXT:    vsrl.vi v9, v8, 4
3037; CHECK-NEXT:    vadd.vv v8, v8, v9
3038; CHECK-NEXT:    vand.vx v8, v8, a0
3039; CHECK-NEXT:    li a0, 257
3040; CHECK-NEXT:    vmul.vx v8, v8, a0
3041; CHECK-NEXT:    vsrl.vi v8, v8, 8
3042; CHECK-NEXT:    ret
3043  %v = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> %va, i1 true, <2 x i1> splat (i1 true), i32 %evl)
3044  ret <2 x i16> %v
3045}
3046
3047define <4 x i16> @vp_ctlz_zero_undef_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) {
3048; CHECK-LABEL: vp_ctlz_zero_undef_v4i16:
3049; CHECK:       # %bb.0:
3050; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
3051; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
3052; CHECK-NEXT:    lui a0, 5
3053; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
3054; CHECK-NEXT:    addi a0, a0, 1365
3055; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
3056; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
3057; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
3058; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
3059; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
3060; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
3061; CHECK-NEXT:    vnot.v v8, v8, v0.t
3062; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
3063; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
3064; CHECK-NEXT:    lui a0, 3
3065; CHECK-NEXT:    addi a0, a0, 819
3066; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
3067; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
3068; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
3069; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
3070; CHECK-NEXT:    lui a0, 1
3071; CHECK-NEXT:    addi a0, a0, -241
3072; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
3073; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
3074; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
3075; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
3076; CHECK-NEXT:    li a0, 257
3077; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
3078; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
3079; CHECK-NEXT:    ret
3080  %v = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> %va, i1 true, <4 x i1> %m, i32 %evl)
3081  ret <4 x i16> %v
3082}
3083
3084define <4 x i16> @vp_ctlz_zero_undef_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
3085; CHECK-LABEL: vp_ctlz_zero_undef_v4i16_unmasked:
3086; CHECK:       # %bb.0:
3087; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
3088; CHECK-NEXT:    vsrl.vi v9, v8, 1
3089; CHECK-NEXT:    lui a0, 5
3090; CHECK-NEXT:    vor.vv v8, v8, v9
3091; CHECK-NEXT:    addi a0, a0, 1365
3092; CHECK-NEXT:    vsrl.vi v9, v8, 2
3093; CHECK-NEXT:    vor.vv v8, v8, v9
3094; CHECK-NEXT:    vsrl.vi v9, v8, 4
3095; CHECK-NEXT:    vor.vv v8, v8, v9
3096; CHECK-NEXT:    vsrl.vi v9, v8, 8
3097; CHECK-NEXT:    vor.vv v8, v8, v9
3098; CHECK-NEXT:    vnot.v v8, v8
3099; CHECK-NEXT:    vsrl.vi v9, v8, 1
3100; CHECK-NEXT:    vand.vx v9, v9, a0
3101; CHECK-NEXT:    lui a0, 3
3102; CHECK-NEXT:    addi a0, a0, 819
3103; CHECK-NEXT:    vsub.vv v8, v8, v9
3104; CHECK-NEXT:    vand.vx v9, v8, a0
3105; CHECK-NEXT:    vsrl.vi v8, v8, 2
3106; CHECK-NEXT:    vand.vx v8, v8, a0
3107; CHECK-NEXT:    lui a0, 1
3108; CHECK-NEXT:    addi a0, a0, -241
3109; CHECK-NEXT:    vadd.vv v8, v9, v8
3110; CHECK-NEXT:    vsrl.vi v9, v8, 4
3111; CHECK-NEXT:    vadd.vv v8, v8, v9
3112; CHECK-NEXT:    vand.vx v8, v8, a0
3113; CHECK-NEXT:    li a0, 257
3114; CHECK-NEXT:    vmul.vx v8, v8, a0
3115; CHECK-NEXT:    vsrl.vi v8, v8, 8
3116; CHECK-NEXT:    ret
3117  %v = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> %va, i1 true, <4 x i1> splat (i1 true), i32 %evl)
3118  ret <4 x i16> %v
3119}
3120
3121define <8 x i16> @vp_ctlz_zero_undef_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) {
3122; CHECK-LABEL: vp_ctlz_zero_undef_v8i16:
3123; CHECK:       # %bb.0:
3124; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
3125; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
3126; CHECK-NEXT:    lui a0, 5
3127; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
3128; CHECK-NEXT:    addi a0, a0, 1365
3129; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
3130; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
3131; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
3132; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
3133; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
3134; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
3135; CHECK-NEXT:    vnot.v v8, v8, v0.t
3136; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
3137; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
3138; CHECK-NEXT:    lui a0, 3
3139; CHECK-NEXT:    addi a0, a0, 819
3140; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
3141; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
3142; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
3143; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
3144; CHECK-NEXT:    lui a0, 1
3145; CHECK-NEXT:    addi a0, a0, -241
3146; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
3147; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
3148; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
3149; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
3150; CHECK-NEXT:    li a0, 257
3151; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
3152; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
3153; CHECK-NEXT:    ret
3154  %v = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> %va, i1 true, <8 x i1> %m, i32 %evl)
3155  ret <8 x i16> %v
3156}
3157
3158define <8 x i16> @vp_ctlz_zero_undef_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
3159; CHECK-LABEL: vp_ctlz_zero_undef_v8i16_unmasked:
3160; CHECK:       # %bb.0:
3161; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
3162; CHECK-NEXT:    vsrl.vi v9, v8, 1
3163; CHECK-NEXT:    lui a0, 5
3164; CHECK-NEXT:    vor.vv v8, v8, v9
3165; CHECK-NEXT:    addi a0, a0, 1365
3166; CHECK-NEXT:    vsrl.vi v9, v8, 2
3167; CHECK-NEXT:    vor.vv v8, v8, v9
3168; CHECK-NEXT:    vsrl.vi v9, v8, 4
3169; CHECK-NEXT:    vor.vv v8, v8, v9
3170; CHECK-NEXT:    vsrl.vi v9, v8, 8
3171; CHECK-NEXT:    vor.vv v8, v8, v9
3172; CHECK-NEXT:    vnot.v v8, v8
3173; CHECK-NEXT:    vsrl.vi v9, v8, 1
3174; CHECK-NEXT:    vand.vx v9, v9, a0
3175; CHECK-NEXT:    lui a0, 3
3176; CHECK-NEXT:    addi a0, a0, 819
3177; CHECK-NEXT:    vsub.vv v8, v8, v9
3178; CHECK-NEXT:    vand.vx v9, v8, a0
3179; CHECK-NEXT:    vsrl.vi v8, v8, 2
3180; CHECK-NEXT:    vand.vx v8, v8, a0
3181; CHECK-NEXT:    lui a0, 1
3182; CHECK-NEXT:    addi a0, a0, -241
3183; CHECK-NEXT:    vadd.vv v8, v9, v8
3184; CHECK-NEXT:    vsrl.vi v9, v8, 4
3185; CHECK-NEXT:    vadd.vv v8, v8, v9
3186; CHECK-NEXT:    vand.vx v8, v8, a0
3187; CHECK-NEXT:    li a0, 257
3188; CHECK-NEXT:    vmul.vx v8, v8, a0
3189; CHECK-NEXT:    vsrl.vi v8, v8, 8
3190; CHECK-NEXT:    ret
3191  %v = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> %va, i1 true, <8 x i1> splat (i1 true), i32 %evl)
3192  ret <8 x i16> %v
3193}
3194
3195define <16 x i16> @vp_ctlz_zero_undef_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) {
3196; CHECK-LABEL: vp_ctlz_zero_undef_v16i16:
3197; CHECK:       # %bb.0:
3198; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
3199; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
3200; CHECK-NEXT:    lui a0, 5
3201; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
3202; CHECK-NEXT:    addi a0, a0, 1365
3203; CHECK-NEXT:    vsrl.vi v10, v8, 2, v0.t
3204; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
3205; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
3206; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
3207; CHECK-NEXT:    vsrl.vi v10, v8, 8, v0.t
3208; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
3209; CHECK-NEXT:    vnot.v v8, v8, v0.t
3210; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
3211; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
3212; CHECK-NEXT:    lui a0, 3
3213; CHECK-NEXT:    addi a0, a0, 819
3214; CHECK-NEXT:    vsub.vv v8, v8, v10, v0.t
3215; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
3216; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
3217; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
3218; CHECK-NEXT:    lui a0, 1
3219; CHECK-NEXT:    addi a0, a0, -241
3220; CHECK-NEXT:    vadd.vv v8, v10, v8, v0.t
3221; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
3222; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
3223; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
3224; CHECK-NEXT:    li a0, 257
3225; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
3226; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
3227; CHECK-NEXT:    ret
3228  %v = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> %va, i1 true, <16 x i1> %m, i32 %evl)
3229  ret <16 x i16> %v
3230}
3231
3232define <16 x i16> @vp_ctlz_zero_undef_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
3233; CHECK-LABEL: vp_ctlz_zero_undef_v16i16_unmasked:
3234; CHECK:       # %bb.0:
3235; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
3236; CHECK-NEXT:    vsrl.vi v10, v8, 1
3237; CHECK-NEXT:    lui a0, 5
3238; CHECK-NEXT:    vor.vv v8, v8, v10
3239; CHECK-NEXT:    addi a0, a0, 1365
3240; CHECK-NEXT:    vsrl.vi v10, v8, 2
3241; CHECK-NEXT:    vor.vv v8, v8, v10
3242; CHECK-NEXT:    vsrl.vi v10, v8, 4
3243; CHECK-NEXT:    vor.vv v8, v8, v10
3244; CHECK-NEXT:    vsrl.vi v10, v8, 8
3245; CHECK-NEXT:    vor.vv v8, v8, v10
3246; CHECK-NEXT:    vnot.v v8, v8
3247; CHECK-NEXT:    vsrl.vi v10, v8, 1
3248; CHECK-NEXT:    vand.vx v10, v10, a0
3249; CHECK-NEXT:    lui a0, 3
3250; CHECK-NEXT:    addi a0, a0, 819
3251; CHECK-NEXT:    vsub.vv v8, v8, v10
3252; CHECK-NEXT:    vand.vx v10, v8, a0
3253; CHECK-NEXT:    vsrl.vi v8, v8, 2
3254; CHECK-NEXT:    vand.vx v8, v8, a0
3255; CHECK-NEXT:    lui a0, 1
3256; CHECK-NEXT:    addi a0, a0, -241
3257; CHECK-NEXT:    vadd.vv v8, v10, v8
3258; CHECK-NEXT:    vsrl.vi v10, v8, 4
3259; CHECK-NEXT:    vadd.vv v8, v8, v10
3260; CHECK-NEXT:    vand.vx v8, v8, a0
3261; CHECK-NEXT:    li a0, 257
3262; CHECK-NEXT:    vmul.vx v8, v8, a0
3263; CHECK-NEXT:    vsrl.vi v8, v8, 8
3264; CHECK-NEXT:    ret
3265  %v = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> %va, i1 true, <16 x i1> splat (i1 true), i32 %evl)
3266  ret <16 x i16> %v
3267}
3268
3269define <2 x i32> @vp_ctlz_zero_undef_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
3270; CHECK-LABEL: vp_ctlz_zero_undef_v2i32:
3271; CHECK:       # %bb.0:
3272; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
3273; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
3274; CHECK-NEXT:    lui a0, 349525
3275; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
3276; CHECK-NEXT:    addi a0, a0, 1365
3277; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
3278; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
3279; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
3280; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
3281; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
3282; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
3283; CHECK-NEXT:    vsrl.vi v9, v8, 16, v0.t
3284; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
3285; CHECK-NEXT:    vnot.v v8, v8, v0.t
3286; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
3287; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
3288; CHECK-NEXT:    lui a0, 209715
3289; CHECK-NEXT:    addi a0, a0, 819
3290; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
3291; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
3292; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
3293; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
3294; CHECK-NEXT:    lui a0, 61681
3295; CHECK-NEXT:    addi a0, a0, -241
3296; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
3297; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
3298; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
3299; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
3300; CHECK-NEXT:    lui a0, 4112
3301; CHECK-NEXT:    addi a0, a0, 257
3302; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
3303; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
3304; CHECK-NEXT:    ret
3305  %v = call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> %va, i1 true, <2 x i1> %m, i32 %evl)
3306  ret <2 x i32> %v
3307}
3308
3309define <2 x i32> @vp_ctlz_zero_undef_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
3310; CHECK-LABEL: vp_ctlz_zero_undef_v2i32_unmasked:
3311; CHECK:       # %bb.0:
3312; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
3313; CHECK-NEXT:    vsrl.vi v9, v8, 1
3314; CHECK-NEXT:    lui a0, 349525
3315; CHECK-NEXT:    vor.vv v8, v8, v9
3316; CHECK-NEXT:    addi a0, a0, 1365
3317; CHECK-NEXT:    vsrl.vi v9, v8, 2
3318; CHECK-NEXT:    vor.vv v8, v8, v9
3319; CHECK-NEXT:    vsrl.vi v9, v8, 4
3320; CHECK-NEXT:    vor.vv v8, v8, v9
3321; CHECK-NEXT:    vsrl.vi v9, v8, 8
3322; CHECK-NEXT:    vor.vv v8, v8, v9
3323; CHECK-NEXT:    vsrl.vi v9, v8, 16
3324; CHECK-NEXT:    vor.vv v8, v8, v9
3325; CHECK-NEXT:    vnot.v v8, v8
3326; CHECK-NEXT:    vsrl.vi v9, v8, 1
3327; CHECK-NEXT:    vand.vx v9, v9, a0
3328; CHECK-NEXT:    lui a0, 209715
3329; CHECK-NEXT:    addi a0, a0, 819
3330; CHECK-NEXT:    vsub.vv v8, v8, v9
3331; CHECK-NEXT:    vand.vx v9, v8, a0
3332; CHECK-NEXT:    vsrl.vi v8, v8, 2
3333; CHECK-NEXT:    vand.vx v8, v8, a0
3334; CHECK-NEXT:    lui a0, 61681
3335; CHECK-NEXT:    addi a0, a0, -241
3336; CHECK-NEXT:    vadd.vv v8, v9, v8
3337; CHECK-NEXT:    vsrl.vi v9, v8, 4
3338; CHECK-NEXT:    vadd.vv v8, v8, v9
3339; CHECK-NEXT:    vand.vx v8, v8, a0
3340; CHECK-NEXT:    lui a0, 4112
3341; CHECK-NEXT:    addi a0, a0, 257
3342; CHECK-NEXT:    vmul.vx v8, v8, a0
3343; CHECK-NEXT:    vsrl.vi v8, v8, 24
3344; CHECK-NEXT:    ret
3345  %v = call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> %va, i1 true, <2 x i1> splat (i1 true), i32 %evl)
3346  ret <2 x i32> %v
3347}
3348
3349define <4 x i32> @vp_ctlz_zero_undef_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
3350; CHECK-LABEL: vp_ctlz_zero_undef_v4i32:
3351; CHECK:       # %bb.0:
3352; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
3353; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
3354; CHECK-NEXT:    lui a0, 349525
3355; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
3356; CHECK-NEXT:    addi a0, a0, 1365
3357; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
3358; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
3359; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
3360; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
3361; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
3362; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
3363; CHECK-NEXT:    vsrl.vi v9, v8, 16, v0.t
3364; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
3365; CHECK-NEXT:    vnot.v v8, v8, v0.t
3366; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
3367; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
3368; CHECK-NEXT:    lui a0, 209715
3369; CHECK-NEXT:    addi a0, a0, 819
3370; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
3371; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
3372; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
3373; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
3374; CHECK-NEXT:    lui a0, 61681
3375; CHECK-NEXT:    addi a0, a0, -241
3376; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
3377; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
3378; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
3379; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
3380; CHECK-NEXT:    lui a0, 4112
3381; CHECK-NEXT:    addi a0, a0, 257
3382; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
3383; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
3384; CHECK-NEXT:    ret
3385  %v = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %va, i1 true, <4 x i1> %m, i32 %evl)
3386  ret <4 x i32> %v
3387}
3388
3389define <4 x i32> @vp_ctlz_zero_undef_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
3390; CHECK-LABEL: vp_ctlz_zero_undef_v4i32_unmasked:
3391; CHECK:       # %bb.0:
3392; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
3393; CHECK-NEXT:    vsrl.vi v9, v8, 1
3394; CHECK-NEXT:    lui a0, 349525
3395; CHECK-NEXT:    vor.vv v8, v8, v9
3396; CHECK-NEXT:    addi a0, a0, 1365
3397; CHECK-NEXT:    vsrl.vi v9, v8, 2
3398; CHECK-NEXT:    vor.vv v8, v8, v9
3399; CHECK-NEXT:    vsrl.vi v9, v8, 4
3400; CHECK-NEXT:    vor.vv v8, v8, v9
3401; CHECK-NEXT:    vsrl.vi v9, v8, 8
3402; CHECK-NEXT:    vor.vv v8, v8, v9
3403; CHECK-NEXT:    vsrl.vi v9, v8, 16
3404; CHECK-NEXT:    vor.vv v8, v8, v9
3405; CHECK-NEXT:    vnot.v v8, v8
3406; CHECK-NEXT:    vsrl.vi v9, v8, 1
3407; CHECK-NEXT:    vand.vx v9, v9, a0
3408; CHECK-NEXT:    lui a0, 209715
3409; CHECK-NEXT:    addi a0, a0, 819
3410; CHECK-NEXT:    vsub.vv v8, v8, v9
3411; CHECK-NEXT:    vand.vx v9, v8, a0
3412; CHECK-NEXT:    vsrl.vi v8, v8, 2
3413; CHECK-NEXT:    vand.vx v8, v8, a0
3414; CHECK-NEXT:    lui a0, 61681
3415; CHECK-NEXT:    addi a0, a0, -241
3416; CHECK-NEXT:    vadd.vv v8, v9, v8
3417; CHECK-NEXT:    vsrl.vi v9, v8, 4
3418; CHECK-NEXT:    vadd.vv v8, v8, v9
3419; CHECK-NEXT:    vand.vx v8, v8, a0
3420; CHECK-NEXT:    lui a0, 4112
3421; CHECK-NEXT:    addi a0, a0, 257
3422; CHECK-NEXT:    vmul.vx v8, v8, a0
3423; CHECK-NEXT:    vsrl.vi v8, v8, 24
3424; CHECK-NEXT:    ret
3425  %v = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %va, i1 true, <4 x i1> splat (i1 true), i32 %evl)
3426  ret <4 x i32> %v
3427}
3428
3429define <8 x i32> @vp_ctlz_zero_undef_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
3430; CHECK-LABEL: vp_ctlz_zero_undef_v8i32:
3431; CHECK:       # %bb.0:
3432; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
3433; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
3434; CHECK-NEXT:    lui a0, 349525
3435; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
3436; CHECK-NEXT:    addi a0, a0, 1365
3437; CHECK-NEXT:    vsrl.vi v10, v8, 2, v0.t
3438; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
3439; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
3440; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
3441; CHECK-NEXT:    vsrl.vi v10, v8, 8, v0.t
3442; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
3443; CHECK-NEXT:    vsrl.vi v10, v8, 16, v0.t
3444; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
3445; CHECK-NEXT:    vnot.v v8, v8, v0.t
3446; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
3447; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
3448; CHECK-NEXT:    lui a0, 209715
3449; CHECK-NEXT:    addi a0, a0, 819
3450; CHECK-NEXT:    vsub.vv v8, v8, v10, v0.t
3451; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
3452; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
3453; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
3454; CHECK-NEXT:    lui a0, 61681
3455; CHECK-NEXT:    addi a0, a0, -241
3456; CHECK-NEXT:    vadd.vv v8, v10, v8, v0.t
3457; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
3458; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
3459; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
3460; CHECK-NEXT:    lui a0, 4112
3461; CHECK-NEXT:    addi a0, a0, 257
3462; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
3463; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
3464; CHECK-NEXT:    ret
3465  %v = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> %va, i1 true, <8 x i1> %m, i32 %evl)
3466  ret <8 x i32> %v
3467}
3468
3469define <8 x i32> @vp_ctlz_zero_undef_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
3470; CHECK-LABEL: vp_ctlz_zero_undef_v8i32_unmasked:
3471; CHECK:       # %bb.0:
3472; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
3473; CHECK-NEXT:    vsrl.vi v10, v8, 1
3474; CHECK-NEXT:    lui a0, 349525
3475; CHECK-NEXT:    vor.vv v8, v8, v10
3476; CHECK-NEXT:    addi a0, a0, 1365
3477; CHECK-NEXT:    vsrl.vi v10, v8, 2
3478; CHECK-NEXT:    vor.vv v8, v8, v10
3479; CHECK-NEXT:    vsrl.vi v10, v8, 4
3480; CHECK-NEXT:    vor.vv v8, v8, v10
3481; CHECK-NEXT:    vsrl.vi v10, v8, 8
3482; CHECK-NEXT:    vor.vv v8, v8, v10
3483; CHECK-NEXT:    vsrl.vi v10, v8, 16
3484; CHECK-NEXT:    vor.vv v8, v8, v10
3485; CHECK-NEXT:    vnot.v v8, v8
3486; CHECK-NEXT:    vsrl.vi v10, v8, 1
3487; CHECK-NEXT:    vand.vx v10, v10, a0
3488; CHECK-NEXT:    lui a0, 209715
3489; CHECK-NEXT:    addi a0, a0, 819
3490; CHECK-NEXT:    vsub.vv v8, v8, v10
3491; CHECK-NEXT:    vand.vx v10, v8, a0
3492; CHECK-NEXT:    vsrl.vi v8, v8, 2
3493; CHECK-NEXT:    vand.vx v8, v8, a0
3494; CHECK-NEXT:    lui a0, 61681
3495; CHECK-NEXT:    addi a0, a0, -241
3496; CHECK-NEXT:    vadd.vv v8, v10, v8
3497; CHECK-NEXT:    vsrl.vi v10, v8, 4
3498; CHECK-NEXT:    vadd.vv v8, v8, v10
3499; CHECK-NEXT:    vand.vx v8, v8, a0
3500; CHECK-NEXT:    lui a0, 4112
3501; CHECK-NEXT:    addi a0, a0, 257
3502; CHECK-NEXT:    vmul.vx v8, v8, a0
3503; CHECK-NEXT:    vsrl.vi v8, v8, 24
3504; CHECK-NEXT:    ret
3505  %v = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> %va, i1 true, <8 x i1> splat (i1 true), i32 %evl)
3506  ret <8 x i32> %v
3507}
3508
3509define <16 x i32> @vp_ctlz_zero_undef_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
3510; CHECK-LABEL: vp_ctlz_zero_undef_v16i32:
3511; CHECK:       # %bb.0:
3512; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
3513; CHECK-NEXT:    vsrl.vi v12, v8, 1, v0.t
3514; CHECK-NEXT:    lui a0, 349525
3515; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
3516; CHECK-NEXT:    addi a0, a0, 1365
3517; CHECK-NEXT:    vsrl.vi v12, v8, 2, v0.t
3518; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
3519; CHECK-NEXT:    vsrl.vi v12, v8, 4, v0.t
3520; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
3521; CHECK-NEXT:    vsrl.vi v12, v8, 8, v0.t
3522; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
3523; CHECK-NEXT:    vsrl.vi v12, v8, 16, v0.t
3524; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
3525; CHECK-NEXT:    vnot.v v8, v8, v0.t
3526; CHECK-NEXT:    vsrl.vi v12, v8, 1, v0.t
3527; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
3528; CHECK-NEXT:    lui a0, 209715
3529; CHECK-NEXT:    addi a0, a0, 819
3530; CHECK-NEXT:    vsub.vv v8, v8, v12, v0.t
3531; CHECK-NEXT:    vand.vx v12, v8, a0, v0.t
3532; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
3533; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
3534; CHECK-NEXT:    lui a0, 61681
3535; CHECK-NEXT:    addi a0, a0, -241
3536; CHECK-NEXT:    vadd.vv v8, v12, v8, v0.t
3537; CHECK-NEXT:    vsrl.vi v12, v8, 4, v0.t
3538; CHECK-NEXT:    vadd.vv v8, v8, v12, v0.t
3539; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
3540; CHECK-NEXT:    lui a0, 4112
3541; CHECK-NEXT:    addi a0, a0, 257
3542; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
3543; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
3544; CHECK-NEXT:    ret
3545  %v = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> %va, i1 true, <16 x i1> %m, i32 %evl)
3546  ret <16 x i32> %v
3547}
3548
3549define <16 x i32> @vp_ctlz_zero_undef_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
3550; CHECK-LABEL: vp_ctlz_zero_undef_v16i32_unmasked:
3551; CHECK:       # %bb.0:
3552; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
3553; CHECK-NEXT:    vsrl.vi v12, v8, 1
3554; CHECK-NEXT:    lui a0, 349525
3555; CHECK-NEXT:    vor.vv v8, v8, v12
3556; CHECK-NEXT:    addi a0, a0, 1365
3557; CHECK-NEXT:    vsrl.vi v12, v8, 2
3558; CHECK-NEXT:    vor.vv v8, v8, v12
3559; CHECK-NEXT:    vsrl.vi v12, v8, 4
3560; CHECK-NEXT:    vor.vv v8, v8, v12
3561; CHECK-NEXT:    vsrl.vi v12, v8, 8
3562; CHECK-NEXT:    vor.vv v8, v8, v12
3563; CHECK-NEXT:    vsrl.vi v12, v8, 16
3564; CHECK-NEXT:    vor.vv v8, v8, v12
3565; CHECK-NEXT:    vnot.v v8, v8
3566; CHECK-NEXT:    vsrl.vi v12, v8, 1
3567; CHECK-NEXT:    vand.vx v12, v12, a0
3568; CHECK-NEXT:    lui a0, 209715
3569; CHECK-NEXT:    addi a0, a0, 819
3570; CHECK-NEXT:    vsub.vv v8, v8, v12
3571; CHECK-NEXT:    vand.vx v12, v8, a0
3572; CHECK-NEXT:    vsrl.vi v8, v8, 2
3573; CHECK-NEXT:    vand.vx v8, v8, a0
3574; CHECK-NEXT:    lui a0, 61681
3575; CHECK-NEXT:    addi a0, a0, -241
3576; CHECK-NEXT:    vadd.vv v8, v12, v8
3577; CHECK-NEXT:    vsrl.vi v12, v8, 4
3578; CHECK-NEXT:    vadd.vv v8, v8, v12
3579; CHECK-NEXT:    vand.vx v8, v8, a0
3580; CHECK-NEXT:    lui a0, 4112
3581; CHECK-NEXT:    addi a0, a0, 257
3582; CHECK-NEXT:    vmul.vx v8, v8, a0
3583; CHECK-NEXT:    vsrl.vi v8, v8, 24
3584; CHECK-NEXT:    ret
3585  %v = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> %va, i1 true, <16 x i1> splat (i1 true), i32 %evl)
3586  ret <16 x i32> %v
3587}
3588
3589define <2 x i64> @vp_ctlz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
3590; RV32-LABEL: vp_ctlz_zero_undef_v2i64:
3591; RV32:       # %bb.0:
3592; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
3593; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
3594; RV32-NEXT:    lui a1, 349525
3595; RV32-NEXT:    addi a1, a1, 1365
3596; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3597; RV32-NEXT:    vmv.v.x v10, a1
3598; RV32-NEXT:    li a1, 32
3599; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
3600; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
3601; RV32-NEXT:    vsrl.vi v9, v8, 2, v0.t
3602; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
3603; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
3604; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
3605; RV32-NEXT:    vsrl.vi v9, v8, 8, v0.t
3606; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
3607; RV32-NEXT:    vsrl.vi v9, v8, 16, v0.t
3608; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
3609; RV32-NEXT:    vsrl.vx v9, v8, a1, v0.t
3610; RV32-NEXT:    lui a1, 209715
3611; RV32-NEXT:    addi a1, a1, 819
3612; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
3613; RV32-NEXT:    vnot.v v8, v8, v0.t
3614; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
3615; RV32-NEXT:    vand.vv v9, v9, v10, v0.t
3616; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3617; RV32-NEXT:    vmv.v.x v10, a1
3618; RV32-NEXT:    lui a1, 61681
3619; RV32-NEXT:    addi a1, a1, -241
3620; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
3621; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
3622; RV32-NEXT:    vand.vv v9, v8, v10, v0.t
3623; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
3624; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
3625; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3626; RV32-NEXT:    vmv.v.x v10, a1
3627; RV32-NEXT:    lui a1, 4112
3628; RV32-NEXT:    addi a1, a1, 257
3629; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
3630; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
3631; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
3632; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
3633; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3634; RV32-NEXT:    vmv.v.x v9, a1
3635; RV32-NEXT:    li a1, 56
3636; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
3637; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
3638; RV32-NEXT:    vmul.vv v8, v8, v9, v0.t
3639; RV32-NEXT:    vsrl.vx v8, v8, a1, v0.t
3640; RV32-NEXT:    ret
3641;
3642; RV64-LABEL: vp_ctlz_zero_undef_v2i64:
3643; RV64:       # %bb.0:
3644; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
3645; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
3646; RV64-NEXT:    lui a0, 349525
3647; RV64-NEXT:    lui a1, 209715
3648; RV64-NEXT:    lui a2, 61681
3649; RV64-NEXT:    lui a3, 4112
3650; RV64-NEXT:    addiw a0, a0, 1365
3651; RV64-NEXT:    addiw a1, a1, 819
3652; RV64-NEXT:    addiw a2, a2, -241
3653; RV64-NEXT:    addiw a3, a3, 257
3654; RV64-NEXT:    slli a4, a0, 32
3655; RV64-NEXT:    add a0, a0, a4
3656; RV64-NEXT:    slli a4, a1, 32
3657; RV64-NEXT:    add a1, a1, a4
3658; RV64-NEXT:    slli a4, a2, 32
3659; RV64-NEXT:    add a2, a2, a4
3660; RV64-NEXT:    slli a4, a3, 32
3661; RV64-NEXT:    add a3, a3, a4
3662; RV64-NEXT:    li a4, 32
3663; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
3664; RV64-NEXT:    vsrl.vi v9, v8, 2, v0.t
3665; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
3666; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
3667; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
3668; RV64-NEXT:    vsrl.vi v9, v8, 8, v0.t
3669; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
3670; RV64-NEXT:    vsrl.vi v9, v8, 16, v0.t
3671; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
3672; RV64-NEXT:    vsrl.vx v9, v8, a4, v0.t
3673; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
3674; RV64-NEXT:    vnot.v v8, v8, v0.t
3675; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
3676; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
3677; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
3678; RV64-NEXT:    vand.vx v9, v8, a1, v0.t
3679; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
3680; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
3681; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
3682; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
3683; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
3684; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
3685; RV64-NEXT:    li a0, 56
3686; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
3687; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
3688; RV64-NEXT:    ret
3689  %v = call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> %va, i1 true, <2 x i1> %m, i32 %evl)
3690  ret <2 x i64> %v
3691}
3692
3693define <2 x i64> @vp_ctlz_zero_undef_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
3694; RV32-LABEL: vp_ctlz_zero_undef_v2i64_unmasked:
3695; RV32:       # %bb.0:
3696; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
3697; RV32-NEXT:    vsrl.vi v9, v8, 1
3698; RV32-NEXT:    lui a1, 349525
3699; RV32-NEXT:    addi a1, a1, 1365
3700; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3701; RV32-NEXT:    vmv.v.x v10, a1
3702; RV32-NEXT:    li a1, 32
3703; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
3704; RV32-NEXT:    vor.vv v8, v8, v9
3705; RV32-NEXT:    vsrl.vi v9, v8, 2
3706; RV32-NEXT:    vor.vv v8, v8, v9
3707; RV32-NEXT:    vsrl.vi v9, v8, 4
3708; RV32-NEXT:    vor.vv v8, v8, v9
3709; RV32-NEXT:    vsrl.vi v9, v8, 8
3710; RV32-NEXT:    vor.vv v8, v8, v9
3711; RV32-NEXT:    vsrl.vi v9, v8, 16
3712; RV32-NEXT:    vor.vv v8, v8, v9
3713; RV32-NEXT:    vsrl.vx v9, v8, a1
3714; RV32-NEXT:    lui a1, 209715
3715; RV32-NEXT:    addi a1, a1, 819
3716; RV32-NEXT:    vor.vv v8, v8, v9
3717; RV32-NEXT:    vnot.v v8, v8
3718; RV32-NEXT:    vsrl.vi v9, v8, 1
3719; RV32-NEXT:    vand.vv v9, v9, v10
3720; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3721; RV32-NEXT:    vmv.v.x v10, a1
3722; RV32-NEXT:    lui a1, 61681
3723; RV32-NEXT:    addi a1, a1, -241
3724; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
3725; RV32-NEXT:    vsub.vv v8, v8, v9
3726; RV32-NEXT:    vand.vv v9, v8, v10
3727; RV32-NEXT:    vsrl.vi v8, v8, 2
3728; RV32-NEXT:    vand.vv v8, v8, v10
3729; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3730; RV32-NEXT:    vmv.v.x v10, a1
3731; RV32-NEXT:    lui a1, 4112
3732; RV32-NEXT:    addi a1, a1, 257
3733; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
3734; RV32-NEXT:    vadd.vv v8, v9, v8
3735; RV32-NEXT:    vsrl.vi v9, v8, 4
3736; RV32-NEXT:    vadd.vv v8, v8, v9
3737; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3738; RV32-NEXT:    vmv.v.x v9, a1
3739; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
3740; RV32-NEXT:    vand.vv v8, v8, v10
3741; RV32-NEXT:    vmul.vv v8, v8, v9
3742; RV32-NEXT:    li a0, 56
3743; RV32-NEXT:    vsrl.vx v8, v8, a0
3744; RV32-NEXT:    ret
3745;
3746; RV64-LABEL: vp_ctlz_zero_undef_v2i64_unmasked:
3747; RV64:       # %bb.0:
3748; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
3749; RV64-NEXT:    vsrl.vi v9, v8, 1
3750; RV64-NEXT:    lui a0, 349525
3751; RV64-NEXT:    lui a1, 209715
3752; RV64-NEXT:    lui a2, 61681
3753; RV64-NEXT:    lui a3, 4112
3754; RV64-NEXT:    addiw a0, a0, 1365
3755; RV64-NEXT:    addiw a1, a1, 819
3756; RV64-NEXT:    addiw a2, a2, -241
3757; RV64-NEXT:    addiw a3, a3, 257
3758; RV64-NEXT:    slli a4, a0, 32
3759; RV64-NEXT:    add a0, a0, a4
3760; RV64-NEXT:    slli a4, a1, 32
3761; RV64-NEXT:    add a1, a1, a4
3762; RV64-NEXT:    slli a4, a2, 32
3763; RV64-NEXT:    add a2, a2, a4
3764; RV64-NEXT:    slli a4, a3, 32
3765; RV64-NEXT:    add a3, a3, a4
3766; RV64-NEXT:    li a4, 32
3767; RV64-NEXT:    vor.vv v8, v8, v9
3768; RV64-NEXT:    vsrl.vi v9, v8, 2
3769; RV64-NEXT:    vor.vv v8, v8, v9
3770; RV64-NEXT:    vsrl.vi v9, v8, 4
3771; RV64-NEXT:    vor.vv v8, v8, v9
3772; RV64-NEXT:    vsrl.vi v9, v8, 8
3773; RV64-NEXT:    vor.vv v8, v8, v9
3774; RV64-NEXT:    vsrl.vi v9, v8, 16
3775; RV64-NEXT:    vor.vv v8, v8, v9
3776; RV64-NEXT:    vsrl.vx v9, v8, a4
3777; RV64-NEXT:    vor.vv v8, v8, v9
3778; RV64-NEXT:    vnot.v v8, v8
3779; RV64-NEXT:    vsrl.vi v9, v8, 1
3780; RV64-NEXT:    vand.vx v9, v9, a0
3781; RV64-NEXT:    vsub.vv v8, v8, v9
3782; RV64-NEXT:    vand.vx v9, v8, a1
3783; RV64-NEXT:    vsrl.vi v8, v8, 2
3784; RV64-NEXT:    vand.vx v8, v8, a1
3785; RV64-NEXT:    vadd.vv v8, v9, v8
3786; RV64-NEXT:    vsrl.vi v9, v8, 4
3787; RV64-NEXT:    vadd.vv v8, v8, v9
3788; RV64-NEXT:    vand.vx v8, v8, a2
3789; RV64-NEXT:    vmul.vx v8, v8, a3
3790; RV64-NEXT:    li a0, 56
3791; RV64-NEXT:    vsrl.vx v8, v8, a0
3792; RV64-NEXT:    ret
3793  %v = call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> %va, i1 true, <2 x i1> splat (i1 true), i32 %evl)
3794  ret <2 x i64> %v
3795}
3796
3797define <4 x i64> @vp_ctlz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
3798; RV32-LABEL: vp_ctlz_zero_undef_v4i64:
3799; RV32:       # %bb.0:
3800; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
3801; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
3802; RV32-NEXT:    lui a1, 349525
3803; RV32-NEXT:    addi a1, a1, 1365
3804; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
3805; RV32-NEXT:    vmv.v.x v10, a1
3806; RV32-NEXT:    li a1, 32
3807; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
3808; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
3809; RV32-NEXT:    vsrl.vi v12, v8, 2, v0.t
3810; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
3811; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
3812; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
3813; RV32-NEXT:    vsrl.vi v12, v8, 8, v0.t
3814; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
3815; RV32-NEXT:    vsrl.vi v12, v8, 16, v0.t
3816; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
3817; RV32-NEXT:    vsrl.vx v12, v8, a1, v0.t
3818; RV32-NEXT:    lui a1, 209715
3819; RV32-NEXT:    addi a1, a1, 819
3820; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
3821; RV32-NEXT:    vnot.v v8, v8, v0.t
3822; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
3823; RV32-NEXT:    vand.vv v10, v12, v10, v0.t
3824; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
3825; RV32-NEXT:    vmv.v.x v12, a1
3826; RV32-NEXT:    lui a1, 61681
3827; RV32-NEXT:    addi a1, a1, -241
3828; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
3829; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
3830; RV32-NEXT:    vand.vv v10, v8, v12, v0.t
3831; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
3832; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
3833; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
3834; RV32-NEXT:    vmv.v.x v12, a1
3835; RV32-NEXT:    lui a1, 4112
3836; RV32-NEXT:    addi a1, a1, 257
3837; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
3838; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
3839; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
3840; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
3841; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
3842; RV32-NEXT:    vmv.v.x v10, a1
3843; RV32-NEXT:    li a1, 56
3844; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
3845; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
3846; RV32-NEXT:    vmul.vv v8, v8, v10, v0.t
3847; RV32-NEXT:    vsrl.vx v8, v8, a1, v0.t
3848; RV32-NEXT:    ret
3849;
3850; RV64-LABEL: vp_ctlz_zero_undef_v4i64:
3851; RV64:       # %bb.0:
3852; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
3853; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
3854; RV64-NEXT:    lui a0, 349525
3855; RV64-NEXT:    lui a1, 209715
3856; RV64-NEXT:    lui a2, 61681
3857; RV64-NEXT:    lui a3, 4112
3858; RV64-NEXT:    addiw a0, a0, 1365
3859; RV64-NEXT:    addiw a1, a1, 819
3860; RV64-NEXT:    addiw a2, a2, -241
3861; RV64-NEXT:    addiw a3, a3, 257
3862; RV64-NEXT:    slli a4, a0, 32
3863; RV64-NEXT:    add a0, a0, a4
3864; RV64-NEXT:    slli a4, a1, 32
3865; RV64-NEXT:    add a1, a1, a4
3866; RV64-NEXT:    slli a4, a2, 32
3867; RV64-NEXT:    add a2, a2, a4
3868; RV64-NEXT:    slli a4, a3, 32
3869; RV64-NEXT:    add a3, a3, a4
3870; RV64-NEXT:    li a4, 32
3871; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
3872; RV64-NEXT:    vsrl.vi v10, v8, 2, v0.t
3873; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
3874; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
3875; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
3876; RV64-NEXT:    vsrl.vi v10, v8, 8, v0.t
3877; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
3878; RV64-NEXT:    vsrl.vi v10, v8, 16, v0.t
3879; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
3880; RV64-NEXT:    vsrl.vx v10, v8, a4, v0.t
3881; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
3882; RV64-NEXT:    vnot.v v8, v8, v0.t
3883; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
3884; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
3885; RV64-NEXT:    vsub.vv v8, v8, v10, v0.t
3886; RV64-NEXT:    vand.vx v10, v8, a1, v0.t
3887; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
3888; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
3889; RV64-NEXT:    vadd.vv v8, v10, v8, v0.t
3890; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
3891; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
3892; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
3893; RV64-NEXT:    li a0, 56
3894; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
3895; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
3896; RV64-NEXT:    ret
3897  %v = call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> %va, i1 true, <4 x i1> %m, i32 %evl)
3898  ret <4 x i64> %v
3899}
3900
3901define <4 x i64> @vp_ctlz_zero_undef_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
3902; RV32-LABEL: vp_ctlz_zero_undef_v4i64_unmasked:
3903; RV32:       # %bb.0:
3904; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
3905; RV32-NEXT:    vsrl.vi v10, v8, 1
3906; RV32-NEXT:    lui a1, 349525
3907; RV32-NEXT:    addi a1, a1, 1365
3908; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
3909; RV32-NEXT:    vmv.v.x v12, a1
3910; RV32-NEXT:    li a1, 32
3911; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
3912; RV32-NEXT:    vor.vv v8, v8, v10
3913; RV32-NEXT:    vsrl.vi v10, v8, 2
3914; RV32-NEXT:    vor.vv v8, v8, v10
3915; RV32-NEXT:    vsrl.vi v10, v8, 4
3916; RV32-NEXT:    vor.vv v8, v8, v10
3917; RV32-NEXT:    vsrl.vi v10, v8, 8
3918; RV32-NEXT:    vor.vv v8, v8, v10
3919; RV32-NEXT:    vsrl.vi v10, v8, 16
3920; RV32-NEXT:    vor.vv v8, v8, v10
3921; RV32-NEXT:    vsrl.vx v10, v8, a1
3922; RV32-NEXT:    lui a1, 209715
3923; RV32-NEXT:    addi a1, a1, 819
3924; RV32-NEXT:    vor.vv v8, v8, v10
3925; RV32-NEXT:    vnot.v v8, v8
3926; RV32-NEXT:    vsrl.vi v10, v8, 1
3927; RV32-NEXT:    vand.vv v10, v10, v12
3928; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
3929; RV32-NEXT:    vmv.v.x v12, a1
3930; RV32-NEXT:    lui a1, 61681
3931; RV32-NEXT:    addi a1, a1, -241
3932; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
3933; RV32-NEXT:    vsub.vv v8, v8, v10
3934; RV32-NEXT:    vand.vv v10, v8, v12
3935; RV32-NEXT:    vsrl.vi v8, v8, 2
3936; RV32-NEXT:    vand.vv v8, v8, v12
3937; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
3938; RV32-NEXT:    vmv.v.x v12, a1
3939; RV32-NEXT:    lui a1, 4112
3940; RV32-NEXT:    addi a1, a1, 257
3941; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
3942; RV32-NEXT:    vadd.vv v8, v10, v8
3943; RV32-NEXT:    vsrl.vi v10, v8, 4
3944; RV32-NEXT:    vadd.vv v8, v8, v10
3945; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
3946; RV32-NEXT:    vmv.v.x v10, a1
3947; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
3948; RV32-NEXT:    vand.vv v8, v8, v12
3949; RV32-NEXT:    vmul.vv v8, v8, v10
3950; RV32-NEXT:    li a0, 56
3951; RV32-NEXT:    vsrl.vx v8, v8, a0
3952; RV32-NEXT:    ret
3953;
3954; RV64-LABEL: vp_ctlz_zero_undef_v4i64_unmasked:
3955; RV64:       # %bb.0:
3956; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
3957; RV64-NEXT:    vsrl.vi v10, v8, 1
3958; RV64-NEXT:    lui a0, 349525
3959; RV64-NEXT:    lui a1, 209715
3960; RV64-NEXT:    lui a2, 61681
3961; RV64-NEXT:    lui a3, 4112
3962; RV64-NEXT:    addiw a0, a0, 1365
3963; RV64-NEXT:    addiw a1, a1, 819
3964; RV64-NEXT:    addiw a2, a2, -241
3965; RV64-NEXT:    addiw a3, a3, 257
3966; RV64-NEXT:    slli a4, a0, 32
3967; RV64-NEXT:    add a0, a0, a4
3968; RV64-NEXT:    slli a4, a1, 32
3969; RV64-NEXT:    add a1, a1, a4
3970; RV64-NEXT:    slli a4, a2, 32
3971; RV64-NEXT:    add a2, a2, a4
3972; RV64-NEXT:    slli a4, a3, 32
3973; RV64-NEXT:    add a3, a3, a4
3974; RV64-NEXT:    li a4, 32
3975; RV64-NEXT:    vor.vv v8, v8, v10
3976; RV64-NEXT:    vsrl.vi v10, v8, 2
3977; RV64-NEXT:    vor.vv v8, v8, v10
3978; RV64-NEXT:    vsrl.vi v10, v8, 4
3979; RV64-NEXT:    vor.vv v8, v8, v10
3980; RV64-NEXT:    vsrl.vi v10, v8, 8
3981; RV64-NEXT:    vor.vv v8, v8, v10
3982; RV64-NEXT:    vsrl.vi v10, v8, 16
3983; RV64-NEXT:    vor.vv v8, v8, v10
3984; RV64-NEXT:    vsrl.vx v10, v8, a4
3985; RV64-NEXT:    vor.vv v8, v8, v10
3986; RV64-NEXT:    vnot.v v8, v8
3987; RV64-NEXT:    vsrl.vi v10, v8, 1
3988; RV64-NEXT:    vand.vx v10, v10, a0
3989; RV64-NEXT:    vsub.vv v8, v8, v10
3990; RV64-NEXT:    vand.vx v10, v8, a1
3991; RV64-NEXT:    vsrl.vi v8, v8, 2
3992; RV64-NEXT:    vand.vx v8, v8, a1
3993; RV64-NEXT:    vadd.vv v8, v10, v8
3994; RV64-NEXT:    vsrl.vi v10, v8, 4
3995; RV64-NEXT:    vadd.vv v8, v8, v10
3996; RV64-NEXT:    vand.vx v8, v8, a2
3997; RV64-NEXT:    vmul.vx v8, v8, a3
3998; RV64-NEXT:    li a0, 56
3999; RV64-NEXT:    vsrl.vx v8, v8, a0
4000; RV64-NEXT:    ret
4001  %v = call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> %va, i1 true, <4 x i1> splat (i1 true), i32 %evl)
4002  ret <4 x i64> %v
4003}
4004
4005define <8 x i64> @vp_ctlz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
4006; RV32-LABEL: vp_ctlz_zero_undef_v8i64:
4007; RV32:       # %bb.0:
4008; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
4009; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
4010; RV32-NEXT:    lui a1, 349525
4011; RV32-NEXT:    addi a1, a1, 1365
4012; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
4013; RV32-NEXT:    vmv.v.x v12, a1
4014; RV32-NEXT:    li a1, 32
4015; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
4016; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
4017; RV32-NEXT:    vsrl.vi v16, v8, 2, v0.t
4018; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
4019; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
4020; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
4021; RV32-NEXT:    vsrl.vi v16, v8, 8, v0.t
4022; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
4023; RV32-NEXT:    vsrl.vi v16, v8, 16, v0.t
4024; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
4025; RV32-NEXT:    vsrl.vx v16, v8, a1, v0.t
4026; RV32-NEXT:    lui a1, 209715
4027; RV32-NEXT:    addi a1, a1, 819
4028; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
4029; RV32-NEXT:    vnot.v v16, v8, v0.t
4030; RV32-NEXT:    vsrl.vi v8, v16, 1, v0.t
4031; RV32-NEXT:    vand.vv v12, v8, v12, v0.t
4032; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
4033; RV32-NEXT:    vmv.v.x v8, a1
4034; RV32-NEXT:    lui a1, 61681
4035; RV32-NEXT:    addi a1, a1, -241
4036; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
4037; RV32-NEXT:    vsub.vv v12, v16, v12, v0.t
4038; RV32-NEXT:    vand.vv v16, v12, v8, v0.t
4039; RV32-NEXT:    vsrl.vi v12, v12, 2, v0.t
4040; RV32-NEXT:    vand.vv v8, v12, v8, v0.t
4041; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
4042; RV32-NEXT:    vmv.v.x v12, a1
4043; RV32-NEXT:    lui a1, 4112
4044; RV32-NEXT:    addi a1, a1, 257
4045; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
4046; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
4047; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
4048; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
4049; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
4050; RV32-NEXT:    vmv.v.x v16, a1
4051; RV32-NEXT:    li a1, 56
4052; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
4053; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
4054; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
4055; RV32-NEXT:    vsrl.vx v8, v8, a1, v0.t
4056; RV32-NEXT:    ret
4057;
4058; RV64-LABEL: vp_ctlz_zero_undef_v8i64:
4059; RV64:       # %bb.0:
4060; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
4061; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
4062; RV64-NEXT:    lui a0, 349525
4063; RV64-NEXT:    lui a1, 209715
4064; RV64-NEXT:    lui a2, 61681
4065; RV64-NEXT:    lui a3, 4112
4066; RV64-NEXT:    addiw a0, a0, 1365
4067; RV64-NEXT:    addiw a1, a1, 819
4068; RV64-NEXT:    addiw a2, a2, -241
4069; RV64-NEXT:    addiw a3, a3, 257
4070; RV64-NEXT:    slli a4, a0, 32
4071; RV64-NEXT:    add a0, a0, a4
4072; RV64-NEXT:    slli a4, a1, 32
4073; RV64-NEXT:    add a1, a1, a4
4074; RV64-NEXT:    slli a4, a2, 32
4075; RV64-NEXT:    add a2, a2, a4
4076; RV64-NEXT:    slli a4, a3, 32
4077; RV64-NEXT:    add a3, a3, a4
4078; RV64-NEXT:    li a4, 32
4079; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
4080; RV64-NEXT:    vsrl.vi v12, v8, 2, v0.t
4081; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
4082; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
4083; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
4084; RV64-NEXT:    vsrl.vi v12, v8, 8, v0.t
4085; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
4086; RV64-NEXT:    vsrl.vi v12, v8, 16, v0.t
4087; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
4088; RV64-NEXT:    vsrl.vx v12, v8, a4, v0.t
4089; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
4090; RV64-NEXT:    vnot.v v8, v8, v0.t
4091; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
4092; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
4093; RV64-NEXT:    vsub.vv v8, v8, v12, v0.t
4094; RV64-NEXT:    vand.vx v12, v8, a1, v0.t
4095; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
4096; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
4097; RV64-NEXT:    vadd.vv v8, v12, v8, v0.t
4098; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
4099; RV64-NEXT:    vadd.vv v8, v8, v12, v0.t
4100; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
4101; RV64-NEXT:    li a0, 56
4102; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
4103; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
4104; RV64-NEXT:    ret
4105  %v = call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> %va, i1 true, <8 x i1> %m, i32 %evl)
4106  ret <8 x i64> %v
4107}
4108
4109define <8 x i64> @vp_ctlz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
4110; RV32-LABEL: vp_ctlz_zero_undef_v8i64_unmasked:
4111; RV32:       # %bb.0:
4112; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
4113; RV32-NEXT:    vsrl.vi v12, v8, 1
4114; RV32-NEXT:    lui a1, 349525
4115; RV32-NEXT:    addi a1, a1, 1365
4116; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
4117; RV32-NEXT:    vmv.v.x v16, a1
4118; RV32-NEXT:    li a1, 32
4119; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
4120; RV32-NEXT:    vor.vv v8, v8, v12
4121; RV32-NEXT:    vsrl.vi v12, v8, 2
4122; RV32-NEXT:    vor.vv v8, v8, v12
4123; RV32-NEXT:    vsrl.vi v12, v8, 4
4124; RV32-NEXT:    vor.vv v8, v8, v12
4125; RV32-NEXT:    vsrl.vi v12, v8, 8
4126; RV32-NEXT:    vor.vv v8, v8, v12
4127; RV32-NEXT:    vsrl.vi v12, v8, 16
4128; RV32-NEXT:    vor.vv v8, v8, v12
4129; RV32-NEXT:    vsrl.vx v12, v8, a1
4130; RV32-NEXT:    lui a1, 209715
4131; RV32-NEXT:    addi a1, a1, 819
4132; RV32-NEXT:    vor.vv v8, v8, v12
4133; RV32-NEXT:    vnot.v v8, v8
4134; RV32-NEXT:    vsrl.vi v12, v8, 1
4135; RV32-NEXT:    vand.vv v12, v12, v16
4136; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
4137; RV32-NEXT:    vmv.v.x v16, a1
4138; RV32-NEXT:    lui a1, 61681
4139; RV32-NEXT:    addi a1, a1, -241
4140; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
4141; RV32-NEXT:    vsub.vv v8, v8, v12
4142; RV32-NEXT:    vand.vv v12, v8, v16
4143; RV32-NEXT:    vsrl.vi v8, v8, 2
4144; RV32-NEXT:    vand.vv v8, v8, v16
4145; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
4146; RV32-NEXT:    vmv.v.x v16, a1
4147; RV32-NEXT:    lui a1, 4112
4148; RV32-NEXT:    addi a1, a1, 257
4149; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
4150; RV32-NEXT:    vadd.vv v8, v12, v8
4151; RV32-NEXT:    vsrl.vi v12, v8, 4
4152; RV32-NEXT:    vadd.vv v8, v8, v12
4153; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
4154; RV32-NEXT:    vmv.v.x v12, a1
4155; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
4156; RV32-NEXT:    vand.vv v8, v8, v16
4157; RV32-NEXT:    vmul.vv v8, v8, v12
4158; RV32-NEXT:    li a0, 56
4159; RV32-NEXT:    vsrl.vx v8, v8, a0
4160; RV32-NEXT:    ret
4161;
4162; RV64-LABEL: vp_ctlz_zero_undef_v8i64_unmasked:
4163; RV64:       # %bb.0:
4164; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
4165; RV64-NEXT:    vsrl.vi v12, v8, 1
4166; RV64-NEXT:    lui a0, 349525
4167; RV64-NEXT:    lui a1, 209715
4168; RV64-NEXT:    lui a2, 61681
4169; RV64-NEXT:    lui a3, 4112
4170; RV64-NEXT:    addiw a0, a0, 1365
4171; RV64-NEXT:    addiw a1, a1, 819
4172; RV64-NEXT:    addiw a2, a2, -241
4173; RV64-NEXT:    addiw a3, a3, 257
4174; RV64-NEXT:    slli a4, a0, 32
4175; RV64-NEXT:    add a0, a0, a4
4176; RV64-NEXT:    slli a4, a1, 32
4177; RV64-NEXT:    add a1, a1, a4
4178; RV64-NEXT:    slli a4, a2, 32
4179; RV64-NEXT:    add a2, a2, a4
4180; RV64-NEXT:    slli a4, a3, 32
4181; RV64-NEXT:    add a3, a3, a4
4182; RV64-NEXT:    li a4, 32
4183; RV64-NEXT:    vor.vv v8, v8, v12
4184; RV64-NEXT:    vsrl.vi v12, v8, 2
4185; RV64-NEXT:    vor.vv v8, v8, v12
4186; RV64-NEXT:    vsrl.vi v12, v8, 4
4187; RV64-NEXT:    vor.vv v8, v8, v12
4188; RV64-NEXT:    vsrl.vi v12, v8, 8
4189; RV64-NEXT:    vor.vv v8, v8, v12
4190; RV64-NEXT:    vsrl.vi v12, v8, 16
4191; RV64-NEXT:    vor.vv v8, v8, v12
4192; RV64-NEXT:    vsrl.vx v12, v8, a4
4193; RV64-NEXT:    vor.vv v8, v8, v12
4194; RV64-NEXT:    vnot.v v8, v8
4195; RV64-NEXT:    vsrl.vi v12, v8, 1
4196; RV64-NEXT:    vand.vx v12, v12, a0
4197; RV64-NEXT:    vsub.vv v8, v8, v12
4198; RV64-NEXT:    vand.vx v12, v8, a1
4199; RV64-NEXT:    vsrl.vi v8, v8, 2
4200; RV64-NEXT:    vand.vx v8, v8, a1
4201; RV64-NEXT:    vadd.vv v8, v12, v8
4202; RV64-NEXT:    vsrl.vi v12, v8, 4
4203; RV64-NEXT:    vadd.vv v8, v8, v12
4204; RV64-NEXT:    vand.vx v8, v8, a2
4205; RV64-NEXT:    vmul.vx v8, v8, a3
4206; RV64-NEXT:    li a0, 56
4207; RV64-NEXT:    vsrl.vx v8, v8, a0
4208; RV64-NEXT:    ret
4209  %v = call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> %va, i1 true, <8 x i1> splat (i1 true), i32 %evl)
4210  ret <8 x i64> %v
4211}
4212
4213define <15 x i64> @vp_ctlz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) {
4214; RV32-LABEL: vp_ctlz_zero_undef_v15i64:
4215; RV32:       # %bb.0:
4216; RV32-NEXT:    addi sp, sp, -48
4217; RV32-NEXT:    .cfi_def_cfa_offset 48
4218; RV32-NEXT:    csrr a1, vlenb
4219; RV32-NEXT:    slli a1, a1, 4
4220; RV32-NEXT:    sub sp, sp, a1
4221; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb
4222; RV32-NEXT:    lui a1, 349525
4223; RV32-NEXT:    addi a1, a1, 1365
4224; RV32-NEXT:    sw a1, 40(sp)
4225; RV32-NEXT:    sw a1, 44(sp)
4226; RV32-NEXT:    lui a1, 209715
4227; RV32-NEXT:    addi a1, a1, 819
4228; RV32-NEXT:    sw a1, 32(sp)
4229; RV32-NEXT:    sw a1, 36(sp)
4230; RV32-NEXT:    lui a1, 61681
4231; RV32-NEXT:    addi a1, a1, -241
4232; RV32-NEXT:    sw a1, 24(sp)
4233; RV32-NEXT:    sw a1, 28(sp)
4234; RV32-NEXT:    lui a1, 4112
4235; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4236; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
4237; RV32-NEXT:    addi a1, a1, 257
4238; RV32-NEXT:    sw a1, 16(sp)
4239; RV32-NEXT:    sw a1, 20(sp)
4240; RV32-NEXT:    addi a1, sp, 40
4241; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
4242; RV32-NEXT:    vlse64.v v24, (a1), zero
4243; RV32-NEXT:    csrr a1, vlenb
4244; RV32-NEXT:    slli a1, a1, 3
4245; RV32-NEXT:    add a1, sp, a1
4246; RV32-NEXT:    addi a1, a1, 48
4247; RV32-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
4248; RV32-NEXT:    li a1, 32
4249; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4250; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
4251; RV32-NEXT:    vsrl.vi v16, v8, 2, v0.t
4252; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
4253; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
4254; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
4255; RV32-NEXT:    vsrl.vi v16, v8, 8, v0.t
4256; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
4257; RV32-NEXT:    vsrl.vi v16, v8, 16, v0.t
4258; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
4259; RV32-NEXT:    vsrl.vx v16, v8, a1, v0.t
4260; RV32-NEXT:    addi a1, sp, 32
4261; RV32-NEXT:    vor.vv v16, v8, v16, v0.t
4262; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
4263; RV32-NEXT:    vlse64.v v8, (a1), zero
4264; RV32-NEXT:    addi a1, sp, 48
4265; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
4266; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4267; RV32-NEXT:    vnot.v v16, v16, v0.t
4268; RV32-NEXT:    vsrl.vi v8, v16, 1, v0.t
4269; RV32-NEXT:    csrr a1, vlenb
4270; RV32-NEXT:    slli a1, a1, 3
4271; RV32-NEXT:    add a1, sp, a1
4272; RV32-NEXT:    addi a1, a1, 48
4273; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
4274; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
4275; RV32-NEXT:    addi a1, sp, 24
4276; RV32-NEXT:    vsub.vv v8, v16, v8, v0.t
4277; RV32-NEXT:    addi a2, sp, 48
4278; RV32-NEXT:    vl8r.v v24, (a2) # Unknown-size Folded Reload
4279; RV32-NEXT:    vand.vv v16, v8, v24, v0.t
4280; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
4281; RV32-NEXT:    vand.vv v24, v8, v24, v0.t
4282; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
4283; RV32-NEXT:    vlse64.v v8, (a1), zero
4284; RV32-NEXT:    csrr a1, vlenb
4285; RV32-NEXT:    slli a1, a1, 3
4286; RV32-NEXT:    add a1, sp, a1
4287; RV32-NEXT:    addi a1, a1, 48
4288; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
4289; RV32-NEXT:    addi a1, sp, 16
4290; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4291; RV32-NEXT:    vadd.vv v24, v16, v24, v0.t
4292; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
4293; RV32-NEXT:    vlse64.v v16, (a1), zero
4294; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4295; RV32-NEXT:    vsrl.vi v8, v24, 4, v0.t
4296; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
4297; RV32-NEXT:    li a0, 56
4298; RV32-NEXT:    csrr a1, vlenb
4299; RV32-NEXT:    slli a1, a1, 3
4300; RV32-NEXT:    add a1, sp, a1
4301; RV32-NEXT:    addi a1, a1, 48
4302; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
4303; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
4304; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
4305; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
4306; RV32-NEXT:    csrr a0, vlenb
4307; RV32-NEXT:    slli a0, a0, 4
4308; RV32-NEXT:    add sp, sp, a0
4309; RV32-NEXT:    .cfi_def_cfa sp, 48
4310; RV32-NEXT:    addi sp, sp, 48
4311; RV32-NEXT:    .cfi_def_cfa_offset 0
4312; RV32-NEXT:    ret
4313;
4314; RV64-LABEL: vp_ctlz_zero_undef_v15i64:
4315; RV64:       # %bb.0:
4316; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4317; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
4318; RV64-NEXT:    lui a0, 349525
4319; RV64-NEXT:    lui a1, 209715
4320; RV64-NEXT:    lui a2, 61681
4321; RV64-NEXT:    lui a3, 4112
4322; RV64-NEXT:    addiw a0, a0, 1365
4323; RV64-NEXT:    addiw a1, a1, 819
4324; RV64-NEXT:    addiw a2, a2, -241
4325; RV64-NEXT:    addiw a3, a3, 257
4326; RV64-NEXT:    slli a4, a0, 32
4327; RV64-NEXT:    add a0, a0, a4
4328; RV64-NEXT:    slli a4, a1, 32
4329; RV64-NEXT:    add a1, a1, a4
4330; RV64-NEXT:    slli a4, a2, 32
4331; RV64-NEXT:    add a2, a2, a4
4332; RV64-NEXT:    slli a4, a3, 32
4333; RV64-NEXT:    add a3, a3, a4
4334; RV64-NEXT:    li a4, 32
4335; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
4336; RV64-NEXT:    vsrl.vi v16, v8, 2, v0.t
4337; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
4338; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
4339; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
4340; RV64-NEXT:    vsrl.vi v16, v8, 8, v0.t
4341; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
4342; RV64-NEXT:    vsrl.vi v16, v8, 16, v0.t
4343; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
4344; RV64-NEXT:    vsrl.vx v16, v8, a4, v0.t
4345; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
4346; RV64-NEXT:    vnot.v v8, v8, v0.t
4347; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
4348; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
4349; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
4350; RV64-NEXT:    vand.vx v16, v8, a1, v0.t
4351; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
4352; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
4353; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
4354; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
4355; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
4356; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
4357; RV64-NEXT:    li a0, 56
4358; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
4359; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
4360; RV64-NEXT:    ret
4361  %v = call <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64> %va, i1 true, <15 x i1> %m, i32 %evl)
4362  ret <15 x i64> %v
4363}
4364
4365define <15 x i64> @vp_ctlz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) {
4366; RV32-LABEL: vp_ctlz_zero_undef_v15i64_unmasked:
4367; RV32:       # %bb.0:
4368; RV32-NEXT:    addi sp, sp, -32
4369; RV32-NEXT:    .cfi_def_cfa_offset 32
4370; RV32-NEXT:    lui a1, 349525
4371; RV32-NEXT:    addi a1, a1, 1365
4372; RV32-NEXT:    sw a1, 24(sp)
4373; RV32-NEXT:    sw a1, 28(sp)
4374; RV32-NEXT:    lui a1, 209715
4375; RV32-NEXT:    addi a1, a1, 819
4376; RV32-NEXT:    sw a1, 16(sp)
4377; RV32-NEXT:    sw a1, 20(sp)
4378; RV32-NEXT:    lui a1, 61681
4379; RV32-NEXT:    addi a1, a1, -241
4380; RV32-NEXT:    sw a1, 8(sp)
4381; RV32-NEXT:    sw a1, 12(sp)
4382; RV32-NEXT:    lui a1, 4112
4383; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4384; RV32-NEXT:    vsrl.vi v0, v8, 1
4385; RV32-NEXT:    addi a1, a1, 257
4386; RV32-NEXT:    sw a1, 0(sp)
4387; RV32-NEXT:    sw a1, 4(sp)
4388; RV32-NEXT:    addi a1, sp, 24
4389; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
4390; RV32-NEXT:    vlse64.v v24, (a1), zero
4391; RV32-NEXT:    addi a1, sp, 16
4392; RV32-NEXT:    vlse64.v v16, (a1), zero
4393; RV32-NEXT:    li a1, 32
4394; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4395; RV32-NEXT:    vor.vv v8, v8, v0
4396; RV32-NEXT:    vsrl.vi v0, v8, 2
4397; RV32-NEXT:    vor.vv v8, v8, v0
4398; RV32-NEXT:    vsrl.vi v0, v8, 4
4399; RV32-NEXT:    vor.vv v8, v8, v0
4400; RV32-NEXT:    vsrl.vi v0, v8, 8
4401; RV32-NEXT:    vor.vv v8, v8, v0
4402; RV32-NEXT:    vsrl.vi v0, v8, 16
4403; RV32-NEXT:    vor.vv v8, v8, v0
4404; RV32-NEXT:    vsrl.vx v0, v8, a1
4405; RV32-NEXT:    addi a1, sp, 8
4406; RV32-NEXT:    vor.vv v8, v8, v0
4407; RV32-NEXT:    vnot.v v0, v8
4408; RV32-NEXT:    vsrl.vi v8, v0, 1
4409; RV32-NEXT:    vand.vv v24, v8, v24
4410; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
4411; RV32-NEXT:    vlse64.v v8, (a1), zero
4412; RV32-NEXT:    mv a1, sp
4413; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4414; RV32-NEXT:    vsub.vv v24, v0, v24
4415; RV32-NEXT:    vand.vv v0, v24, v16
4416; RV32-NEXT:    vsrl.vi v24, v24, 2
4417; RV32-NEXT:    vand.vv v16, v24, v16
4418; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
4419; RV32-NEXT:    vlse64.v v24, (a1), zero
4420; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4421; RV32-NEXT:    vadd.vv v16, v0, v16
4422; RV32-NEXT:    vsrl.vi v0, v16, 4
4423; RV32-NEXT:    vadd.vv v16, v16, v0
4424; RV32-NEXT:    vand.vv v8, v16, v8
4425; RV32-NEXT:    vmul.vv v8, v8, v24
4426; RV32-NEXT:    li a0, 56
4427; RV32-NEXT:    vsrl.vx v8, v8, a0
4428; RV32-NEXT:    addi sp, sp, 32
4429; RV32-NEXT:    .cfi_def_cfa_offset 0
4430; RV32-NEXT:    ret
4431;
4432; RV64-LABEL: vp_ctlz_zero_undef_v15i64_unmasked:
4433; RV64:       # %bb.0:
4434; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4435; RV64-NEXT:    vsrl.vi v16, v8, 1
4436; RV64-NEXT:    lui a0, 349525
4437; RV64-NEXT:    lui a1, 209715
4438; RV64-NEXT:    lui a2, 61681
4439; RV64-NEXT:    lui a3, 4112
4440; RV64-NEXT:    addiw a0, a0, 1365
4441; RV64-NEXT:    addiw a1, a1, 819
4442; RV64-NEXT:    addiw a2, a2, -241
4443; RV64-NEXT:    addiw a3, a3, 257
4444; RV64-NEXT:    slli a4, a0, 32
4445; RV64-NEXT:    add a0, a0, a4
4446; RV64-NEXT:    slli a4, a1, 32
4447; RV64-NEXT:    add a1, a1, a4
4448; RV64-NEXT:    slli a4, a2, 32
4449; RV64-NEXT:    add a2, a2, a4
4450; RV64-NEXT:    slli a4, a3, 32
4451; RV64-NEXT:    add a3, a3, a4
4452; RV64-NEXT:    li a4, 32
4453; RV64-NEXT:    vor.vv v8, v8, v16
4454; RV64-NEXT:    vsrl.vi v16, v8, 2
4455; RV64-NEXT:    vor.vv v8, v8, v16
4456; RV64-NEXT:    vsrl.vi v16, v8, 4
4457; RV64-NEXT:    vor.vv v8, v8, v16
4458; RV64-NEXT:    vsrl.vi v16, v8, 8
4459; RV64-NEXT:    vor.vv v8, v8, v16
4460; RV64-NEXT:    vsrl.vi v16, v8, 16
4461; RV64-NEXT:    vor.vv v8, v8, v16
4462; RV64-NEXT:    vsrl.vx v16, v8, a4
4463; RV64-NEXT:    vor.vv v8, v8, v16
4464; RV64-NEXT:    vnot.v v8, v8
4465; RV64-NEXT:    vsrl.vi v16, v8, 1
4466; RV64-NEXT:    vand.vx v16, v16, a0
4467; RV64-NEXT:    vsub.vv v8, v8, v16
4468; RV64-NEXT:    vand.vx v16, v8, a1
4469; RV64-NEXT:    vsrl.vi v8, v8, 2
4470; RV64-NEXT:    vand.vx v8, v8, a1
4471; RV64-NEXT:    vadd.vv v8, v16, v8
4472; RV64-NEXT:    vsrl.vi v16, v8, 4
4473; RV64-NEXT:    vadd.vv v8, v8, v16
4474; RV64-NEXT:    vand.vx v8, v8, a2
4475; RV64-NEXT:    vmul.vx v8, v8, a3
4476; RV64-NEXT:    li a0, 56
4477; RV64-NEXT:    vsrl.vx v8, v8, a0
4478; RV64-NEXT:    ret
4479  %v = call <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64> %va, i1 true, <15 x i1> splat (i1 true), i32 %evl)
4480  ret <15 x i64> %v
4481}
4482
4483define <16 x i64> @vp_ctlz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) {
4484; RV32-LABEL: vp_ctlz_zero_undef_v16i64:
4485; RV32:       # %bb.0:
4486; RV32-NEXT:    addi sp, sp, -48
4487; RV32-NEXT:    .cfi_def_cfa_offset 48
4488; RV32-NEXT:    csrr a1, vlenb
4489; RV32-NEXT:    slli a1, a1, 4
4490; RV32-NEXT:    sub sp, sp, a1
4491; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb
4492; RV32-NEXT:    lui a1, 349525
4493; RV32-NEXT:    addi a1, a1, 1365
4494; RV32-NEXT:    sw a1, 40(sp)
4495; RV32-NEXT:    sw a1, 44(sp)
4496; RV32-NEXT:    lui a1, 209715
4497; RV32-NEXT:    addi a1, a1, 819
4498; RV32-NEXT:    sw a1, 32(sp)
4499; RV32-NEXT:    sw a1, 36(sp)
4500; RV32-NEXT:    lui a1, 61681
4501; RV32-NEXT:    addi a1, a1, -241
4502; RV32-NEXT:    sw a1, 24(sp)
4503; RV32-NEXT:    sw a1, 28(sp)
4504; RV32-NEXT:    lui a1, 4112
4505; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4506; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
4507; RV32-NEXT:    addi a1, a1, 257
4508; RV32-NEXT:    sw a1, 16(sp)
4509; RV32-NEXT:    sw a1, 20(sp)
4510; RV32-NEXT:    addi a1, sp, 40
4511; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
4512; RV32-NEXT:    vlse64.v v24, (a1), zero
4513; RV32-NEXT:    csrr a1, vlenb
4514; RV32-NEXT:    slli a1, a1, 3
4515; RV32-NEXT:    add a1, sp, a1
4516; RV32-NEXT:    addi a1, a1, 48
4517; RV32-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
4518; RV32-NEXT:    li a1, 32
4519; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4520; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
4521; RV32-NEXT:    vsrl.vi v16, v8, 2, v0.t
4522; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
4523; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
4524; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
4525; RV32-NEXT:    vsrl.vi v16, v8, 8, v0.t
4526; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
4527; RV32-NEXT:    vsrl.vi v16, v8, 16, v0.t
4528; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
4529; RV32-NEXT:    vsrl.vx v16, v8, a1, v0.t
4530; RV32-NEXT:    addi a1, sp, 32
4531; RV32-NEXT:    vor.vv v16, v8, v16, v0.t
4532; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
4533; RV32-NEXT:    vlse64.v v8, (a1), zero
4534; RV32-NEXT:    addi a1, sp, 48
4535; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
4536; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4537; RV32-NEXT:    vnot.v v16, v16, v0.t
4538; RV32-NEXT:    vsrl.vi v8, v16, 1, v0.t
4539; RV32-NEXT:    csrr a1, vlenb
4540; RV32-NEXT:    slli a1, a1, 3
4541; RV32-NEXT:    add a1, sp, a1
4542; RV32-NEXT:    addi a1, a1, 48
4543; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
4544; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
4545; RV32-NEXT:    addi a1, sp, 24
4546; RV32-NEXT:    vsub.vv v8, v16, v8, v0.t
4547; RV32-NEXT:    addi a2, sp, 48
4548; RV32-NEXT:    vl8r.v v24, (a2) # Unknown-size Folded Reload
4549; RV32-NEXT:    vand.vv v16, v8, v24, v0.t
4550; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
4551; RV32-NEXT:    vand.vv v24, v8, v24, v0.t
4552; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
4553; RV32-NEXT:    vlse64.v v8, (a1), zero
4554; RV32-NEXT:    csrr a1, vlenb
4555; RV32-NEXT:    slli a1, a1, 3
4556; RV32-NEXT:    add a1, sp, a1
4557; RV32-NEXT:    addi a1, a1, 48
4558; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
4559; RV32-NEXT:    addi a1, sp, 16
4560; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4561; RV32-NEXT:    vadd.vv v24, v16, v24, v0.t
4562; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
4563; RV32-NEXT:    vlse64.v v16, (a1), zero
4564; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4565; RV32-NEXT:    vsrl.vi v8, v24, 4, v0.t
4566; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
4567; RV32-NEXT:    li a0, 56
4568; RV32-NEXT:    csrr a1, vlenb
4569; RV32-NEXT:    slli a1, a1, 3
4570; RV32-NEXT:    add a1, sp, a1
4571; RV32-NEXT:    addi a1, a1, 48
4572; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
4573; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
4574; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
4575; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
4576; RV32-NEXT:    csrr a0, vlenb
4577; RV32-NEXT:    slli a0, a0, 4
4578; RV32-NEXT:    add sp, sp, a0
4579; RV32-NEXT:    .cfi_def_cfa sp, 48
4580; RV32-NEXT:    addi sp, sp, 48
4581; RV32-NEXT:    .cfi_def_cfa_offset 0
4582; RV32-NEXT:    ret
4583;
4584; RV64-LABEL: vp_ctlz_zero_undef_v16i64:
4585; RV64:       # %bb.0:
4586; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4587; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
4588; RV64-NEXT:    lui a0, 349525
4589; RV64-NEXT:    lui a1, 209715
4590; RV64-NEXT:    lui a2, 61681
4591; RV64-NEXT:    lui a3, 4112
4592; RV64-NEXT:    addiw a0, a0, 1365
4593; RV64-NEXT:    addiw a1, a1, 819
4594; RV64-NEXT:    addiw a2, a2, -241
4595; RV64-NEXT:    addiw a3, a3, 257
4596; RV64-NEXT:    slli a4, a0, 32
4597; RV64-NEXT:    add a0, a0, a4
4598; RV64-NEXT:    slli a4, a1, 32
4599; RV64-NEXT:    add a1, a1, a4
4600; RV64-NEXT:    slli a4, a2, 32
4601; RV64-NEXT:    add a2, a2, a4
4602; RV64-NEXT:    slli a4, a3, 32
4603; RV64-NEXT:    add a3, a3, a4
4604; RV64-NEXT:    li a4, 32
4605; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
4606; RV64-NEXT:    vsrl.vi v16, v8, 2, v0.t
4607; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
4608; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
4609; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
4610; RV64-NEXT:    vsrl.vi v16, v8, 8, v0.t
4611; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
4612; RV64-NEXT:    vsrl.vi v16, v8, 16, v0.t
4613; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
4614; RV64-NEXT:    vsrl.vx v16, v8, a4, v0.t
4615; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
4616; RV64-NEXT:    vnot.v v8, v8, v0.t
4617; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
4618; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
4619; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
4620; RV64-NEXT:    vand.vx v16, v8, a1, v0.t
4621; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
4622; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
4623; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
4624; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
4625; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
4626; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
4627; RV64-NEXT:    li a0, 56
4628; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
4629; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
4630; RV64-NEXT:    ret
4631  %v = call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> %va, i1 true, <16 x i1> %m, i32 %evl)
4632  ret <16 x i64> %v
4633}
4634
4635define <16 x i64> @vp_ctlz_zero_undef_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
4636; RV32-LABEL: vp_ctlz_zero_undef_v16i64_unmasked:
4637; RV32:       # %bb.0:
4638; RV32-NEXT:    addi sp, sp, -32
4639; RV32-NEXT:    .cfi_def_cfa_offset 32
4640; RV32-NEXT:    lui a1, 349525
4641; RV32-NEXT:    addi a1, a1, 1365
4642; RV32-NEXT:    sw a1, 24(sp)
4643; RV32-NEXT:    sw a1, 28(sp)
4644; RV32-NEXT:    lui a1, 209715
4645; RV32-NEXT:    addi a1, a1, 819
4646; RV32-NEXT:    sw a1, 16(sp)
4647; RV32-NEXT:    sw a1, 20(sp)
4648; RV32-NEXT:    lui a1, 61681
4649; RV32-NEXT:    addi a1, a1, -241
4650; RV32-NEXT:    sw a1, 8(sp)
4651; RV32-NEXT:    sw a1, 12(sp)
4652; RV32-NEXT:    lui a1, 4112
4653; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4654; RV32-NEXT:    vsrl.vi v0, v8, 1
4655; RV32-NEXT:    addi a1, a1, 257
4656; RV32-NEXT:    sw a1, 0(sp)
4657; RV32-NEXT:    sw a1, 4(sp)
4658; RV32-NEXT:    addi a1, sp, 24
4659; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
4660; RV32-NEXT:    vlse64.v v24, (a1), zero
4661; RV32-NEXT:    addi a1, sp, 16
4662; RV32-NEXT:    vlse64.v v16, (a1), zero
4663; RV32-NEXT:    li a1, 32
4664; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4665; RV32-NEXT:    vor.vv v8, v8, v0
4666; RV32-NEXT:    vsrl.vi v0, v8, 2
4667; RV32-NEXT:    vor.vv v8, v8, v0
4668; RV32-NEXT:    vsrl.vi v0, v8, 4
4669; RV32-NEXT:    vor.vv v8, v8, v0
4670; RV32-NEXT:    vsrl.vi v0, v8, 8
4671; RV32-NEXT:    vor.vv v8, v8, v0
4672; RV32-NEXT:    vsrl.vi v0, v8, 16
4673; RV32-NEXT:    vor.vv v8, v8, v0
4674; RV32-NEXT:    vsrl.vx v0, v8, a1
4675; RV32-NEXT:    addi a1, sp, 8
4676; RV32-NEXT:    vor.vv v8, v8, v0
4677; RV32-NEXT:    vnot.v v0, v8
4678; RV32-NEXT:    vsrl.vi v8, v0, 1
4679; RV32-NEXT:    vand.vv v24, v8, v24
4680; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
4681; RV32-NEXT:    vlse64.v v8, (a1), zero
4682; RV32-NEXT:    mv a1, sp
4683; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4684; RV32-NEXT:    vsub.vv v24, v0, v24
4685; RV32-NEXT:    vand.vv v0, v24, v16
4686; RV32-NEXT:    vsrl.vi v24, v24, 2
4687; RV32-NEXT:    vand.vv v16, v24, v16
4688; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
4689; RV32-NEXT:    vlse64.v v24, (a1), zero
4690; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4691; RV32-NEXT:    vadd.vv v16, v0, v16
4692; RV32-NEXT:    vsrl.vi v0, v16, 4
4693; RV32-NEXT:    vadd.vv v16, v16, v0
4694; RV32-NEXT:    vand.vv v8, v16, v8
4695; RV32-NEXT:    vmul.vv v8, v8, v24
4696; RV32-NEXT:    li a0, 56
4697; RV32-NEXT:    vsrl.vx v8, v8, a0
4698; RV32-NEXT:    addi sp, sp, 32
4699; RV32-NEXT:    .cfi_def_cfa_offset 0
4700; RV32-NEXT:    ret
4701;
4702; RV64-LABEL: vp_ctlz_zero_undef_v16i64_unmasked:
4703; RV64:       # %bb.0:
4704; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4705; RV64-NEXT:    vsrl.vi v16, v8, 1
4706; RV64-NEXT:    lui a0, 349525
4707; RV64-NEXT:    lui a1, 209715
4708; RV64-NEXT:    lui a2, 61681
4709; RV64-NEXT:    lui a3, 4112
4710; RV64-NEXT:    addiw a0, a0, 1365
4711; RV64-NEXT:    addiw a1, a1, 819
4712; RV64-NEXT:    addiw a2, a2, -241
4713; RV64-NEXT:    addiw a3, a3, 257
4714; RV64-NEXT:    slli a4, a0, 32
4715; RV64-NEXT:    add a0, a0, a4
4716; RV64-NEXT:    slli a4, a1, 32
4717; RV64-NEXT:    add a1, a1, a4
4718; RV64-NEXT:    slli a4, a2, 32
4719; RV64-NEXT:    add a2, a2, a4
4720; RV64-NEXT:    slli a4, a3, 32
4721; RV64-NEXT:    add a3, a3, a4
4722; RV64-NEXT:    li a4, 32
4723; RV64-NEXT:    vor.vv v8, v8, v16
4724; RV64-NEXT:    vsrl.vi v16, v8, 2
4725; RV64-NEXT:    vor.vv v8, v8, v16
4726; RV64-NEXT:    vsrl.vi v16, v8, 4
4727; RV64-NEXT:    vor.vv v8, v8, v16
4728; RV64-NEXT:    vsrl.vi v16, v8, 8
4729; RV64-NEXT:    vor.vv v8, v8, v16
4730; RV64-NEXT:    vsrl.vi v16, v8, 16
4731; RV64-NEXT:    vor.vv v8, v8, v16
4732; RV64-NEXT:    vsrl.vx v16, v8, a4
4733; RV64-NEXT:    vor.vv v8, v8, v16
4734; RV64-NEXT:    vnot.v v8, v8
4735; RV64-NEXT:    vsrl.vi v16, v8, 1
4736; RV64-NEXT:    vand.vx v16, v16, a0
4737; RV64-NEXT:    vsub.vv v8, v8, v16
4738; RV64-NEXT:    vand.vx v16, v8, a1
4739; RV64-NEXT:    vsrl.vi v8, v8, 2
4740; RV64-NEXT:    vand.vx v8, v8, a1
4741; RV64-NEXT:    vadd.vv v8, v16, v8
4742; RV64-NEXT:    vsrl.vi v16, v8, 4
4743; RV64-NEXT:    vadd.vv v8, v8, v16
4744; RV64-NEXT:    vand.vx v8, v8, a2
4745; RV64-NEXT:    vmul.vx v8, v8, a3
4746; RV64-NEXT:    li a0, 56
4747; RV64-NEXT:    vsrl.vx v8, v8, a0
4748; RV64-NEXT:    ret
4749  %v = call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> %va, i1 true, <16 x i1> splat (i1 true), i32 %evl)
4750  ret <16 x i64> %v
4751}
4752
4753define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) {
4754; RV32-LABEL: vp_ctlz_zero_undef_v32i64:
4755; RV32:       # %bb.0:
4756; RV32-NEXT:    addi sp, sp, -48
4757; RV32-NEXT:    .cfi_def_cfa_offset 48
4758; RV32-NEXT:    csrr a1, vlenb
4759; RV32-NEXT:    li a2, 56
4760; RV32-NEXT:    mul a1, a1, a2
4761; RV32-NEXT:    sub sp, sp, a1
4762; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb
4763; RV32-NEXT:    csrr a1, vlenb
4764; RV32-NEXT:    slli a1, a1, 4
4765; RV32-NEXT:    add a1, sp, a1
4766; RV32-NEXT:    addi a1, a1, 48
4767; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
4768; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
4769; RV32-NEXT:    vslidedown.vi v24, v0, 2
4770; RV32-NEXT:    lui a1, 349525
4771; RV32-NEXT:    lui a2, 209715
4772; RV32-NEXT:    addi a1, a1, 1365
4773; RV32-NEXT:    sw a1, 40(sp)
4774; RV32-NEXT:    sw a1, 44(sp)
4775; RV32-NEXT:    lui a1, 61681
4776; RV32-NEXT:    addi a2, a2, 819
4777; RV32-NEXT:    sw a2, 32(sp)
4778; RV32-NEXT:    sw a2, 36(sp)
4779; RV32-NEXT:    lui a2, 4112
4780; RV32-NEXT:    addi a1, a1, -241
4781; RV32-NEXT:    sw a1, 24(sp)
4782; RV32-NEXT:    sw a1, 28(sp)
4783; RV32-NEXT:    li a1, 16
4784; RV32-NEXT:    addi a2, a2, 257
4785; RV32-NEXT:    sw a2, 16(sp)
4786; RV32-NEXT:    sw a2, 20(sp)
4787; RV32-NEXT:    mv a2, a0
4788; RV32-NEXT:    bltu a0, a1, .LBB70_2
4789; RV32-NEXT:  # %bb.1:
4790; RV32-NEXT:    li a2, 16
4791; RV32-NEXT:  .LBB70_2:
4792; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
4793; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
4794; RV32-NEXT:    li a1, 32
4795; RV32-NEXT:    addi a3, sp, 40
4796; RV32-NEXT:    addi a4, sp, 32
4797; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
4798; RV32-NEXT:    vsrl.vi v16, v8, 2, v0.t
4799; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
4800; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
4801; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
4802; RV32-NEXT:    vsrl.vi v16, v8, 8, v0.t
4803; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
4804; RV32-NEXT:    vsrl.vi v16, v8, 16, v0.t
4805; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
4806; RV32-NEXT:    vsrl.vx v16, v8, a1, v0.t
4807; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
4808; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
4809; RV32-NEXT:    vlse64.v v16, (a3), zero
4810; RV32-NEXT:    csrr a3, vlenb
4811; RV32-NEXT:    li a5, 40
4812; RV32-NEXT:    mul a3, a3, a5
4813; RV32-NEXT:    add a3, sp, a3
4814; RV32-NEXT:    addi a3, a3, 48
4815; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
4816; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
4817; RV32-NEXT:    vnot.v v16, v8, v0.t
4818; RV32-NEXT:    csrr a3, vlenb
4819; RV32-NEXT:    slli a3, a3, 5
4820; RV32-NEXT:    add a3, sp, a3
4821; RV32-NEXT:    addi a3, a3, 48
4822; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
4823; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
4824; RV32-NEXT:    vlse64.v v8, (a4), zero
4825; RV32-NEXT:    csrr a3, vlenb
4826; RV32-NEXT:    li a4, 48
4827; RV32-NEXT:    mul a3, a3, a4
4828; RV32-NEXT:    add a3, sp, a3
4829; RV32-NEXT:    addi a3, a3, 48
4830; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
4831; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
4832; RV32-NEXT:    vsrl.vi v8, v16, 1, v0.t
4833; RV32-NEXT:    csrr a3, vlenb
4834; RV32-NEXT:    li a4, 24
4835; RV32-NEXT:    mul a3, a3, a4
4836; RV32-NEXT:    add a3, sp, a3
4837; RV32-NEXT:    addi a3, a3, 48
4838; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
4839; RV32-NEXT:    csrr a3, vlenb
4840; RV32-NEXT:    li a4, 40
4841; RV32-NEXT:    mul a3, a3, a4
4842; RV32-NEXT:    add a3, sp, a3
4843; RV32-NEXT:    addi a3, a3, 48
4844; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
4845; RV32-NEXT:    csrr a3, vlenb
4846; RV32-NEXT:    li a4, 24
4847; RV32-NEXT:    mul a3, a3, a4
4848; RV32-NEXT:    add a3, sp, a3
4849; RV32-NEXT:    addi a3, a3, 48
4850; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
4851; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
4852; RV32-NEXT:    csrr a3, vlenb
4853; RV32-NEXT:    slli a3, a3, 5
4854; RV32-NEXT:    add a3, sp, a3
4855; RV32-NEXT:    addi a3, a3, 48
4856; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
4857; RV32-NEXT:    vsub.vv v16, v16, v8, v0.t
4858; RV32-NEXT:    csrr a3, vlenb
4859; RV32-NEXT:    slli a3, a3, 5
4860; RV32-NEXT:    add a3, sp, a3
4861; RV32-NEXT:    addi a3, a3, 48
4862; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
4863; RV32-NEXT:    csrr a3, vlenb
4864; RV32-NEXT:    slli a3, a3, 5
4865; RV32-NEXT:    add a3, sp, a3
4866; RV32-NEXT:    addi a3, a3, 48
4867; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
4868; RV32-NEXT:    csrr a3, vlenb
4869; RV32-NEXT:    li a4, 48
4870; RV32-NEXT:    mul a3, a3, a4
4871; RV32-NEXT:    add a3, sp, a3
4872; RV32-NEXT:    addi a3, a3, 48
4873; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
4874; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
4875; RV32-NEXT:    csrr a3, vlenb
4876; RV32-NEXT:    li a4, 24
4877; RV32-NEXT:    mul a3, a3, a4
4878; RV32-NEXT:    add a3, sp, a3
4879; RV32-NEXT:    addi a3, a3, 48
4880; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
4881; RV32-NEXT:    csrr a3, vlenb
4882; RV32-NEXT:    slli a3, a3, 5
4883; RV32-NEXT:    add a3, sp, a3
4884; RV32-NEXT:    addi a3, a3, 48
4885; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
4886; RV32-NEXT:    vsrl.vi v16, v16, 2, v0.t
4887; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
4888; RV32-NEXT:    csrr a3, vlenb
4889; RV32-NEXT:    li a4, 24
4890; RV32-NEXT:    mul a3, a3, a4
4891; RV32-NEXT:    add a3, sp, a3
4892; RV32-NEXT:    addi a3, a3, 48
4893; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
4894; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
4895; RV32-NEXT:    csrr a3, vlenb
4896; RV32-NEXT:    slli a3, a3, 3
4897; RV32-NEXT:    add a3, sp, a3
4898; RV32-NEXT:    addi a3, a3, 48
4899; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
4900; RV32-NEXT:    addi a3, sp, 24
4901; RV32-NEXT:    addi a4, sp, 16
4902; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
4903; RV32-NEXT:    vlse64.v v8, (a3), zero
4904; RV32-NEXT:    csrr a3, vlenb
4905; RV32-NEXT:    li a5, 24
4906; RV32-NEXT:    mul a3, a3, a5
4907; RV32-NEXT:    add a3, sp, a3
4908; RV32-NEXT:    addi a3, a3, 48
4909; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
4910; RV32-NEXT:    vlse64.v v8, (a4), zero
4911; RV32-NEXT:    csrr a3, vlenb
4912; RV32-NEXT:    slli a3, a3, 5
4913; RV32-NEXT:    add a3, sp, a3
4914; RV32-NEXT:    addi a3, a3, 48
4915; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
4916; RV32-NEXT:    csrr a3, vlenb
4917; RV32-NEXT:    slli a3, a3, 3
4918; RV32-NEXT:    add a3, sp, a3
4919; RV32-NEXT:    addi a3, a3, 48
4920; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
4921; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
4922; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
4923; RV32-NEXT:    addi a2, sp, 48
4924; RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
4925; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
4926; RV32-NEXT:    vadd.vv v16, v8, v16, v0.t
4927; RV32-NEXT:    csrr a2, vlenb
4928; RV32-NEXT:    li a3, 24
4929; RV32-NEXT:    mul a2, a2, a3
4930; RV32-NEXT:    add a2, sp, a2
4931; RV32-NEXT:    addi a2, a2, 48
4932; RV32-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
4933; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
4934; RV32-NEXT:    csrr a2, vlenb
4935; RV32-NEXT:    slli a2, a2, 5
4936; RV32-NEXT:    add a2, sp, a2
4937; RV32-NEXT:    addi a2, a2, 48
4938; RV32-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
4939; RV32-NEXT:    vmul.vv v8, v16, v8, v0.t
4940; RV32-NEXT:    li a2, 56
4941; RV32-NEXT:    vsrl.vx v8, v8, a2, v0.t
4942; RV32-NEXT:    csrr a3, vlenb
4943; RV32-NEXT:    slli a3, a3, 3
4944; RV32-NEXT:    add a3, sp, a3
4945; RV32-NEXT:    addi a3, a3, 48
4946; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
4947; RV32-NEXT:    addi a3, a0, -16
4948; RV32-NEXT:    sltu a0, a0, a3
4949; RV32-NEXT:    addi a0, a0, -1
4950; RV32-NEXT:    and a0, a0, a3
4951; RV32-NEXT:    vmv1r.v v0, v24
4952; RV32-NEXT:    csrr a3, vlenb
4953; RV32-NEXT:    slli a3, a3, 4
4954; RV32-NEXT:    add a3, sp, a3
4955; RV32-NEXT:    addi a3, a3, 48
4956; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
4957; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4958; RV32-NEXT:    vsrl.vi v8, v16, 1, v0.t
4959; RV32-NEXT:    vor.vv v8, v16, v8, v0.t
4960; RV32-NEXT:    vsrl.vi v16, v8, 2, v0.t
4961; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
4962; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
4963; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
4964; RV32-NEXT:    vsrl.vi v16, v8, 8, v0.t
4965; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
4966; RV32-NEXT:    vsrl.vi v16, v8, 16, v0.t
4967; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
4968; RV32-NEXT:    vsrl.vx v16, v8, a1, v0.t
4969; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
4970; RV32-NEXT:    vnot.v v8, v8, v0.t
4971; RV32-NEXT:    addi a0, sp, 48
4972; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
4973; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
4974; RV32-NEXT:    csrr a0, vlenb
4975; RV32-NEXT:    slli a0, a0, 4
4976; RV32-NEXT:    add a0, sp, a0
4977; RV32-NEXT:    addi a0, a0, 48
4978; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
4979; RV32-NEXT:    csrr a0, vlenb
4980; RV32-NEXT:    li a1, 40
4981; RV32-NEXT:    mul a0, a0, a1
4982; RV32-NEXT:    add a0, sp, a0
4983; RV32-NEXT:    addi a0, a0, 48
4984; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
4985; RV32-NEXT:    csrr a0, vlenb
4986; RV32-NEXT:    slli a0, a0, 4
4987; RV32-NEXT:    add a0, sp, a0
4988; RV32-NEXT:    addi a0, a0, 48
4989; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
4990; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
4991; RV32-NEXT:    addi a0, sp, 48
4992; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
4993; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
4994; RV32-NEXT:    csrr a0, vlenb
4995; RV32-NEXT:    li a1, 40
4996; RV32-NEXT:    mul a0, a0, a1
4997; RV32-NEXT:    add a0, sp, a0
4998; RV32-NEXT:    addi a0, a0, 48
4999; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
5000; RV32-NEXT:    csrr a0, vlenb
5001; RV32-NEXT:    li a1, 48
5002; RV32-NEXT:    mul a0, a0, a1
5003; RV32-NEXT:    add a0, sp, a0
5004; RV32-NEXT:    addi a0, a0, 48
5005; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
5006; RV32-NEXT:    csrr a0, vlenb
5007; RV32-NEXT:    li a1, 40
5008; RV32-NEXT:    mul a0, a0, a1
5009; RV32-NEXT:    add a0, sp, a0
5010; RV32-NEXT:    addi a0, a0, 48
5011; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
5012; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
5013; RV32-NEXT:    csrr a0, vlenb
5014; RV32-NEXT:    slli a0, a0, 4
5015; RV32-NEXT:    add a0, sp, a0
5016; RV32-NEXT:    addi a0, a0, 48
5017; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
5018; RV32-NEXT:    csrr a0, vlenb
5019; RV32-NEXT:    li a1, 40
5020; RV32-NEXT:    mul a0, a0, a1
5021; RV32-NEXT:    add a0, sp, a0
5022; RV32-NEXT:    addi a0, a0, 48
5023; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
5024; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
5025; RV32-NEXT:    csrr a0, vlenb
5026; RV32-NEXT:    li a1, 48
5027; RV32-NEXT:    mul a0, a0, a1
5028; RV32-NEXT:    add a0, sp, a0
5029; RV32-NEXT:    addi a0, a0, 48
5030; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
5031; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
5032; RV32-NEXT:    csrr a0, vlenb
5033; RV32-NEXT:    slli a0, a0, 4
5034; RV32-NEXT:    add a0, sp, a0
5035; RV32-NEXT:    addi a0, a0, 48
5036; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
5037; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
5038; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
5039; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
5040; RV32-NEXT:    csrr a0, vlenb
5041; RV32-NEXT:    li a1, 24
5042; RV32-NEXT:    mul a0, a0, a1
5043; RV32-NEXT:    add a0, sp, a0
5044; RV32-NEXT:    addi a0, a0, 48
5045; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
5046; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
5047; RV32-NEXT:    csrr a0, vlenb
5048; RV32-NEXT:    slli a0, a0, 5
5049; RV32-NEXT:    add a0, sp, a0
5050; RV32-NEXT:    addi a0, a0, 48
5051; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
5052; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
5053; RV32-NEXT:    vsrl.vx v16, v8, a2, v0.t
5054; RV32-NEXT:    csrr a0, vlenb
5055; RV32-NEXT:    slli a0, a0, 3
5056; RV32-NEXT:    add a0, sp, a0
5057; RV32-NEXT:    addi a0, a0, 48
5058; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
5059; RV32-NEXT:    csrr a0, vlenb
5060; RV32-NEXT:    li a1, 56
5061; RV32-NEXT:    mul a0, a0, a1
5062; RV32-NEXT:    add sp, sp, a0
5063; RV32-NEXT:    .cfi_def_cfa sp, 48
5064; RV32-NEXT:    addi sp, sp, 48
5065; RV32-NEXT:    .cfi_def_cfa_offset 0
5066; RV32-NEXT:    ret
5067;
5068; RV64-LABEL: vp_ctlz_zero_undef_v32i64:
5069; RV64:       # %bb.0:
5070; RV64-NEXT:    addi sp, sp, -16
5071; RV64-NEXT:    .cfi_def_cfa_offset 16
5072; RV64-NEXT:    csrr a1, vlenb
5073; RV64-NEXT:    slli a1, a1, 4
5074; RV64-NEXT:    sub sp, sp, a1
5075; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
5076; RV64-NEXT:    csrr a1, vlenb
5077; RV64-NEXT:    slli a1, a1, 3
5078; RV64-NEXT:    add a1, sp, a1
5079; RV64-NEXT:    addi a1, a1, 16
5080; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
5081; RV64-NEXT:    li a2, 16
5082; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
5083; RV64-NEXT:    vslidedown.vi v24, v0, 2
5084; RV64-NEXT:    mv a1, a0
5085; RV64-NEXT:    bltu a0, a2, .LBB70_2
5086; RV64-NEXT:  # %bb.1:
5087; RV64-NEXT:    li a1, 16
5088; RV64-NEXT:  .LBB70_2:
5089; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
5090; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
5091; RV64-NEXT:    li a1, 32
5092; RV64-NEXT:    lui a2, 349525
5093; RV64-NEXT:    lui a3, 209715
5094; RV64-NEXT:    lui a4, 61681
5095; RV64-NEXT:    lui a5, 4112
5096; RV64-NEXT:    addiw a2, a2, 1365
5097; RV64-NEXT:    addiw a3, a3, 819
5098; RV64-NEXT:    addiw a6, a4, -241
5099; RV64-NEXT:    addiw a7, a5, 257
5100; RV64-NEXT:    slli a5, a2, 32
5101; RV64-NEXT:    add a5, a2, a5
5102; RV64-NEXT:    slli a4, a3, 32
5103; RV64-NEXT:    add a4, a3, a4
5104; RV64-NEXT:    slli a2, a6, 32
5105; RV64-NEXT:    add a2, a6, a2
5106; RV64-NEXT:    slli a3, a7, 32
5107; RV64-NEXT:    add a3, a7, a3
5108; RV64-NEXT:    addi a6, a0, -16
5109; RV64-NEXT:    sltu a0, a0, a6
5110; RV64-NEXT:    addi a0, a0, -1
5111; RV64-NEXT:    and a6, a0, a6
5112; RV64-NEXT:    li a0, 56
5113; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
5114; RV64-NEXT:    vsrl.vi v16, v8, 2, v0.t
5115; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
5116; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
5117; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
5118; RV64-NEXT:    vsrl.vi v16, v8, 8, v0.t
5119; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
5120; RV64-NEXT:    vsrl.vi v16, v8, 16, v0.t
5121; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
5122; RV64-NEXT:    vsrl.vx v16, v8, a1, v0.t
5123; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
5124; RV64-NEXT:    vnot.v v8, v8, v0.t
5125; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
5126; RV64-NEXT:    vand.vx v16, v16, a5, v0.t
5127; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
5128; RV64-NEXT:    vand.vx v16, v8, a4, v0.t
5129; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
5130; RV64-NEXT:    vand.vx v8, v8, a4, v0.t
5131; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
5132; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
5133; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
5134; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
5135; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
5136; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
5137; RV64-NEXT:    addi a7, sp, 16
5138; RV64-NEXT:    vs8r.v v8, (a7) # Unknown-size Folded Spill
5139; RV64-NEXT:    vmv1r.v v0, v24
5140; RV64-NEXT:    csrr a7, vlenb
5141; RV64-NEXT:    slli a7, a7, 3
5142; RV64-NEXT:    add a7, sp, a7
5143; RV64-NEXT:    addi a7, a7, 16
5144; RV64-NEXT:    vl8r.v v8, (a7) # Unknown-size Folded Reload
5145; RV64-NEXT:    vsetvli zero, a6, e64, m8, ta, ma
5146; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
5147; RV64-NEXT:    vor.vv v16, v8, v16, v0.t
5148; RV64-NEXT:    vsrl.vi v8, v16, 2, v0.t
5149; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
5150; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
5151; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
5152; RV64-NEXT:    vsrl.vi v16, v8, 8, v0.t
5153; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
5154; RV64-NEXT:    vsrl.vi v16, v8, 16, v0.t
5155; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
5156; RV64-NEXT:    vsrl.vx v16, v8, a1, v0.t
5157; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
5158; RV64-NEXT:    vnot.v v8, v8, v0.t
5159; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
5160; RV64-NEXT:    vand.vx v16, v16, a5, v0.t
5161; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
5162; RV64-NEXT:    vand.vx v16, v8, a4, v0.t
5163; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
5164; RV64-NEXT:    vand.vx v8, v8, a4, v0.t
5165; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
5166; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
5167; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
5168; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
5169; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
5170; RV64-NEXT:    vsrl.vx v16, v8, a0, v0.t
5171; RV64-NEXT:    addi a0, sp, 16
5172; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
5173; RV64-NEXT:    csrr a0, vlenb
5174; RV64-NEXT:    slli a0, a0, 4
5175; RV64-NEXT:    add sp, sp, a0
5176; RV64-NEXT:    .cfi_def_cfa sp, 16
5177; RV64-NEXT:    addi sp, sp, 16
5178; RV64-NEXT:    .cfi_def_cfa_offset 0
5179; RV64-NEXT:    ret
5180  %v = call <32 x i64> @llvm.vp.ctlz.v32i64(<32 x i64> %va, i1 true, <32 x i1> %m, i32 %evl)
5181  ret <32 x i64> %v
5182}
5183
5184define <32 x i64> @vp_ctlz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
5185; RV32-LABEL: vp_ctlz_zero_undef_v32i64_unmasked:
5186; RV32:       # %bb.0:
5187; RV32-NEXT:    addi sp, sp, -48
5188; RV32-NEXT:    .cfi_def_cfa_offset 48
5189; RV32-NEXT:    csrr a1, vlenb
5190; RV32-NEXT:    slli a1, a1, 4
5191; RV32-NEXT:    sub sp, sp, a1
5192; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb
5193; RV32-NEXT:    lui a1, 349525
5194; RV32-NEXT:    lui a2, 209715
5195; RV32-NEXT:    addi a1, a1, 1365
5196; RV32-NEXT:    sw a1, 40(sp)
5197; RV32-NEXT:    sw a1, 44(sp)
5198; RV32-NEXT:    lui a1, 61681
5199; RV32-NEXT:    addi a2, a2, 819
5200; RV32-NEXT:    sw a2, 32(sp)
5201; RV32-NEXT:    sw a2, 36(sp)
5202; RV32-NEXT:    lui a2, 4112
5203; RV32-NEXT:    addi a1, a1, -241
5204; RV32-NEXT:    sw a1, 24(sp)
5205; RV32-NEXT:    sw a1, 28(sp)
5206; RV32-NEXT:    li a3, 16
5207; RV32-NEXT:    addi a1, a2, 257
5208; RV32-NEXT:    sw a1, 16(sp)
5209; RV32-NEXT:    sw a1, 20(sp)
5210; RV32-NEXT:    mv a1, a0
5211; RV32-NEXT:    bltu a0, a3, .LBB71_2
5212; RV32-NEXT:  # %bb.1:
5213; RV32-NEXT:    li a1, 16
5214; RV32-NEXT:  .LBB71_2:
5215; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
5216; RV32-NEXT:    vsrl.vi v0, v8, 1
5217; RV32-NEXT:    li a2, 32
5218; RV32-NEXT:    addi a3, sp, 40
5219; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
5220; RV32-NEXT:    vlse64.v v24, (a3), zero
5221; RV32-NEXT:    csrr a3, vlenb
5222; RV32-NEXT:    slli a3, a3, 3
5223; RV32-NEXT:    add a3, sp, a3
5224; RV32-NEXT:    addi a3, a3, 48
5225; RV32-NEXT:    vs8r.v v24, (a3) # Unknown-size Folded Spill
5226; RV32-NEXT:    addi a3, a0, -16
5227; RV32-NEXT:    sltu a0, a0, a3
5228; RV32-NEXT:    addi a0, a0, -1
5229; RV32-NEXT:    and a0, a0, a3
5230; RV32-NEXT:    addi a3, sp, 32
5231; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
5232; RV32-NEXT:    vor.vv v8, v8, v0
5233; RV32-NEXT:    vsrl.vi v0, v8, 2
5234; RV32-NEXT:    vor.vv v8, v8, v0
5235; RV32-NEXT:    vsrl.vi v0, v8, 4
5236; RV32-NEXT:    vor.vv v8, v8, v0
5237; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
5238; RV32-NEXT:    vsrl.vi v0, v16, 1
5239; RV32-NEXT:    vor.vv v16, v16, v0
5240; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
5241; RV32-NEXT:    vsrl.vi v0, v8, 8
5242; RV32-NEXT:    vor.vv v8, v8, v0
5243; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
5244; RV32-NEXT:    vsrl.vi v0, v16, 2
5245; RV32-NEXT:    vor.vv v16, v16, v0
5246; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
5247; RV32-NEXT:    vsrl.vi v0, v8, 16
5248; RV32-NEXT:    vor.vv v8, v8, v0
5249; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
5250; RV32-NEXT:    vsrl.vi v0, v16, 4
5251; RV32-NEXT:    vor.vv v16, v16, v0
5252; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
5253; RV32-NEXT:    vsrl.vx v0, v8, a2
5254; RV32-NEXT:    vor.vv v8, v8, v0
5255; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
5256; RV32-NEXT:    vsrl.vi v0, v16, 8
5257; RV32-NEXT:    vor.vv v16, v16, v0
5258; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
5259; RV32-NEXT:    vnot.v v0, v8
5260; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
5261; RV32-NEXT:    vsrl.vi v8, v16, 16
5262; RV32-NEXT:    vor.vv v16, v16, v8
5263; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
5264; RV32-NEXT:    vsrl.vi v8, v0, 1
5265; RV32-NEXT:    csrr a4, vlenb
5266; RV32-NEXT:    slli a4, a4, 3
5267; RV32-NEXT:    add a4, sp, a4
5268; RV32-NEXT:    addi a4, a4, 48
5269; RV32-NEXT:    vl8r.v v24, (a4) # Unknown-size Folded Reload
5270; RV32-NEXT:    vand.vv v24, v8, v24
5271; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
5272; RV32-NEXT:    vlse64.v v8, (a3), zero
5273; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
5274; RV32-NEXT:    vsub.vv v24, v0, v24
5275; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
5276; RV32-NEXT:    vsrl.vx v0, v16, a2
5277; RV32-NEXT:    vor.vv v16, v16, v0
5278; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
5279; RV32-NEXT:    vand.vv v0, v24, v8
5280; RV32-NEXT:    vsrl.vi v24, v24, 2
5281; RV32-NEXT:    vand.vv v24, v24, v8
5282; RV32-NEXT:    vadd.vv v24, v0, v24
5283; RV32-NEXT:    addi a2, sp, 48
5284; RV32-NEXT:    vs8r.v v24, (a2) # Unknown-size Folded Spill
5285; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
5286; RV32-NEXT:    vnot.v v16, v16
5287; RV32-NEXT:    vsrl.vi v0, v16, 1
5288; RV32-NEXT:    csrr a2, vlenb
5289; RV32-NEXT:    slli a2, a2, 3
5290; RV32-NEXT:    add a2, sp, a2
5291; RV32-NEXT:    addi a2, a2, 48
5292; RV32-NEXT:    vl8r.v v24, (a2) # Unknown-size Folded Reload
5293; RV32-NEXT:    vand.vv v0, v0, v24
5294; RV32-NEXT:    addi a2, sp, 24
5295; RV32-NEXT:    addi a3, sp, 16
5296; RV32-NEXT:    vsub.vv v0, v16, v0
5297; RV32-NEXT:    addi a4, sp, 48
5298; RV32-NEXT:    vl8r.v v24, (a4) # Unknown-size Folded Reload
5299; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
5300; RV32-NEXT:    vsrl.vi v16, v24, 4
5301; RV32-NEXT:    vadd.vv v16, v24, v16
5302; RV32-NEXT:    csrr a4, vlenb
5303; RV32-NEXT:    slli a4, a4, 3
5304; RV32-NEXT:    add a4, sp, a4
5305; RV32-NEXT:    addi a4, a4, 48
5306; RV32-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
5307; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
5308; RV32-NEXT:    vand.vv v24, v0, v8
5309; RV32-NEXT:    vsrl.vi v0, v0, 2
5310; RV32-NEXT:    vand.vv v8, v0, v8
5311; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
5312; RV32-NEXT:    vlse64.v v0, (a2), zero
5313; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
5314; RV32-NEXT:    vadd.vv v8, v24, v8
5315; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
5316; RV32-NEXT:    vlse64.v v24, (a3), zero
5317; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
5318; RV32-NEXT:    vsrl.vi v16, v8, 4
5319; RV32-NEXT:    vadd.vv v8, v8, v16
5320; RV32-NEXT:    csrr a2, vlenb
5321; RV32-NEXT:    slli a2, a2, 3
5322; RV32-NEXT:    add a2, sp, a2
5323; RV32-NEXT:    addi a2, a2, 48
5324; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
5325; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
5326; RV32-NEXT:    vand.vv v16, v16, v0
5327; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
5328; RV32-NEXT:    vand.vv v8, v8, v0
5329; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
5330; RV32-NEXT:    vmul.vv v16, v16, v24
5331; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
5332; RV32-NEXT:    vmul.vv v24, v8, v24
5333; RV32-NEXT:    li a2, 56
5334; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
5335; RV32-NEXT:    vsrl.vx v8, v16, a2
5336; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
5337; RV32-NEXT:    vsrl.vx v16, v24, a2
5338; RV32-NEXT:    csrr a0, vlenb
5339; RV32-NEXT:    slli a0, a0, 4
5340; RV32-NEXT:    add sp, sp, a0
5341; RV32-NEXT:    .cfi_def_cfa sp, 48
5342; RV32-NEXT:    addi sp, sp, 48
5343; RV32-NEXT:    .cfi_def_cfa_offset 0
5344; RV32-NEXT:    ret
5345;
5346; RV64-LABEL: vp_ctlz_zero_undef_v32i64_unmasked:
5347; RV64:       # %bb.0:
5348; RV64-NEXT:    li a2, 16
5349; RV64-NEXT:    mv a1, a0
5350; RV64-NEXT:    bltu a0, a2, .LBB71_2
5351; RV64-NEXT:  # %bb.1:
5352; RV64-NEXT:    li a1, 16
5353; RV64-NEXT:  .LBB71_2:
5354; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
5355; RV64-NEXT:    vsrl.vi v24, v8, 1
5356; RV64-NEXT:    li a2, 32
5357; RV64-NEXT:    lui a3, 349525
5358; RV64-NEXT:    lui a4, 209715
5359; RV64-NEXT:    lui a5, 61681
5360; RV64-NEXT:    lui a6, 4112
5361; RV64-NEXT:    addiw a7, a3, 1365
5362; RV64-NEXT:    addiw a3, a4, 819
5363; RV64-NEXT:    addiw a4, a5, -241
5364; RV64-NEXT:    addiw a6, a6, 257
5365; RV64-NEXT:    slli a5, a7, 32
5366; RV64-NEXT:    add a7, a7, a5
5367; RV64-NEXT:    slli a5, a3, 32
5368; RV64-NEXT:    add a5, a3, a5
5369; RV64-NEXT:    slli a3, a4, 32
5370; RV64-NEXT:    add a3, a4, a3
5371; RV64-NEXT:    slli a4, a6, 32
5372; RV64-NEXT:    add a4, a6, a4
5373; RV64-NEXT:    addi a6, a0, -16
5374; RV64-NEXT:    sltu a0, a0, a6
5375; RV64-NEXT:    addi a0, a0, -1
5376; RV64-NEXT:    and a6, a0, a6
5377; RV64-NEXT:    li a0, 56
5378; RV64-NEXT:    vor.vv v8, v8, v24
5379; RV64-NEXT:    vsrl.vi v24, v8, 2
5380; RV64-NEXT:    vor.vv v8, v8, v24
5381; RV64-NEXT:    vsrl.vi v24, v8, 4
5382; RV64-NEXT:    vor.vv v8, v8, v24
5383; RV64-NEXT:    vsetvli zero, a6, e64, m8, ta, ma
5384; RV64-NEXT:    vsrl.vi v24, v16, 1
5385; RV64-NEXT:    vor.vv v16, v16, v24
5386; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
5387; RV64-NEXT:    vsrl.vi v24, v8, 8
5388; RV64-NEXT:    vor.vv v8, v8, v24
5389; RV64-NEXT:    vsetvli zero, a6, e64, m8, ta, ma
5390; RV64-NEXT:    vsrl.vi v24, v16, 2
5391; RV64-NEXT:    vor.vv v16, v16, v24
5392; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
5393; RV64-NEXT:    vsrl.vi v24, v8, 16
5394; RV64-NEXT:    vor.vv v8, v8, v24
5395; RV64-NEXT:    vsetvli zero, a6, e64, m8, ta, ma
5396; RV64-NEXT:    vsrl.vi v24, v16, 4
5397; RV64-NEXT:    vor.vv v16, v16, v24
5398; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
5399; RV64-NEXT:    vsrl.vx v24, v8, a2
5400; RV64-NEXT:    vor.vv v8, v8, v24
5401; RV64-NEXT:    vsetvli zero, a6, e64, m8, ta, ma
5402; RV64-NEXT:    vsrl.vi v24, v16, 8
5403; RV64-NEXT:    vor.vv v16, v16, v24
5404; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
5405; RV64-NEXT:    vnot.v v8, v8
5406; RV64-NEXT:    vsetvli zero, a6, e64, m8, ta, ma
5407; RV64-NEXT:    vsrl.vi v24, v16, 16
5408; RV64-NEXT:    vor.vv v16, v16, v24
5409; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
5410; RV64-NEXT:    vsrl.vi v24, v8, 1
5411; RV64-NEXT:    vand.vx v24, v24, a7
5412; RV64-NEXT:    vsub.vv v8, v8, v24
5413; RV64-NEXT:    vsetvli zero, a6, e64, m8, ta, ma
5414; RV64-NEXT:    vsrl.vx v24, v16, a2
5415; RV64-NEXT:    vor.vv v16, v16, v24
5416; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
5417; RV64-NEXT:    vand.vx v24, v8, a5
5418; RV64-NEXT:    vsrl.vi v8, v8, 2
5419; RV64-NEXT:    vsetvli zero, a6, e64, m8, ta, ma
5420; RV64-NEXT:    vnot.v v16, v16
5421; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
5422; RV64-NEXT:    vand.vx v8, v8, a5
5423; RV64-NEXT:    vadd.vv v8, v24, v8
5424; RV64-NEXT:    vsetvli zero, a6, e64, m8, ta, ma
5425; RV64-NEXT:    vsrl.vi v24, v16, 1
5426; RV64-NEXT:    vand.vx v24, v24, a7
5427; RV64-NEXT:    vsub.vv v16, v16, v24
5428; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
5429; RV64-NEXT:    vsrl.vi v24, v8, 4
5430; RV64-NEXT:    vadd.vv v8, v8, v24
5431; RV64-NEXT:    vsetvli zero, a6, e64, m8, ta, ma
5432; RV64-NEXT:    vand.vx v24, v16, a5
5433; RV64-NEXT:    vsrl.vi v16, v16, 2
5434; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
5435; RV64-NEXT:    vand.vx v8, v8, a3
5436; RV64-NEXT:    vsetvli zero, a6, e64, m8, ta, ma
5437; RV64-NEXT:    vand.vx v16, v16, a5
5438; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
5439; RV64-NEXT:    vmul.vx v8, v8, a4
5440; RV64-NEXT:    vsetvli zero, a6, e64, m8, ta, ma
5441; RV64-NEXT:    vadd.vv v16, v24, v16
5442; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
5443; RV64-NEXT:    vsrl.vx v8, v8, a0
5444; RV64-NEXT:    vsetvli zero, a6, e64, m8, ta, ma
5445; RV64-NEXT:    vsrl.vi v24, v16, 4
5446; RV64-NEXT:    vadd.vv v16, v16, v24
5447; RV64-NEXT:    vand.vx v16, v16, a3
5448; RV64-NEXT:    vmul.vx v16, v16, a4
5449; RV64-NEXT:    vsrl.vx v16, v16, a0
5450; RV64-NEXT:    ret
5451  %v = call <32 x i64> @llvm.vp.ctlz.v32i64(<32 x i64> %va, i1 true, <32 x i1> splat (i1 true), i32 %evl)
5452  ret <32 x i64> %v
5453}
5454