xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \
3; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
4; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \
5; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
6
7declare <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8>, i1 immarg, <2 x i1>, i32)
8
9define <2 x i8> @vp_cttz_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) {
10; CHECK-LABEL: vp_cttz_v2i8:
11; CHECK:       # %bb.0:
12; CHECK-NEXT:    li a1, 1
13; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
14; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
15; CHECK-NEXT:    li a0, 85
16; CHECK-NEXT:    vnot.v v8, v8, v0.t
17; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
18; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
19; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
20; CHECK-NEXT:    li a0, 51
21; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
22; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
23; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
24; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
25; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
26; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
27; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
28; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
29; CHECK-NEXT:    ret
30  %v = call <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8> %va, i1 false, <2 x i1> %m, i32 %evl)
31  ret <2 x i8> %v
32}
33
34define <2 x i8> @vp_cttz_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) {
35; CHECK-LABEL: vp_cttz_v2i8_unmasked:
36; CHECK:       # %bb.0:
37; CHECK-NEXT:    li a1, 1
38; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
39; CHECK-NEXT:    vnot.v v9, v8
40; CHECK-NEXT:    vsub.vx v8, v8, a1
41; CHECK-NEXT:    li a0, 85
42; CHECK-NEXT:    vand.vv v8, v9, v8
43; CHECK-NEXT:    vsrl.vi v9, v8, 1
44; CHECK-NEXT:    vand.vx v9, v9, a0
45; CHECK-NEXT:    li a0, 51
46; CHECK-NEXT:    vsub.vv v8, v8, v9
47; CHECK-NEXT:    vand.vx v9, v8, a0
48; CHECK-NEXT:    vsrl.vi v8, v8, 2
49; CHECK-NEXT:    vand.vx v8, v8, a0
50; CHECK-NEXT:    vadd.vv v8, v9, v8
51; CHECK-NEXT:    vsrl.vi v9, v8, 4
52; CHECK-NEXT:    vadd.vv v8, v8, v9
53; CHECK-NEXT:    vand.vi v8, v8, 15
54; CHECK-NEXT:    ret
55  %v = call <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8> %va, i1 false, <2 x i1> splat (i1 true), i32 %evl)
56  ret <2 x i8> %v
57}
58
59declare <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8>, i1 immarg, <4 x i1>, i32)
60
61define <4 x i8> @vp_cttz_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) {
62; CHECK-LABEL: vp_cttz_v4i8:
63; CHECK:       # %bb.0:
64; CHECK-NEXT:    li a1, 1
65; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
66; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
67; CHECK-NEXT:    li a0, 85
68; CHECK-NEXT:    vnot.v v8, v8, v0.t
69; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
70; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
71; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
72; CHECK-NEXT:    li a0, 51
73; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
74; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
75; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
76; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
77; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
78; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
79; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
80; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
81; CHECK-NEXT:    ret
82  %v = call <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8> %va, i1 false, <4 x i1> %m, i32 %evl)
83  ret <4 x i8> %v
84}
85
86define <4 x i8> @vp_cttz_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) {
87; CHECK-LABEL: vp_cttz_v4i8_unmasked:
88; CHECK:       # %bb.0:
89; CHECK-NEXT:    li a1, 1
90; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
91; CHECK-NEXT:    vnot.v v9, v8
92; CHECK-NEXT:    vsub.vx v8, v8, a1
93; CHECK-NEXT:    li a0, 85
94; CHECK-NEXT:    vand.vv v8, v9, v8
95; CHECK-NEXT:    vsrl.vi v9, v8, 1
96; CHECK-NEXT:    vand.vx v9, v9, a0
97; CHECK-NEXT:    li a0, 51
98; CHECK-NEXT:    vsub.vv v8, v8, v9
99; CHECK-NEXT:    vand.vx v9, v8, a0
100; CHECK-NEXT:    vsrl.vi v8, v8, 2
101; CHECK-NEXT:    vand.vx v8, v8, a0
102; CHECK-NEXT:    vadd.vv v8, v9, v8
103; CHECK-NEXT:    vsrl.vi v9, v8, 4
104; CHECK-NEXT:    vadd.vv v8, v8, v9
105; CHECK-NEXT:    vand.vi v8, v8, 15
106; CHECK-NEXT:    ret
107  %v = call <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8> %va, i1 false, <4 x i1> splat (i1 true), i32 %evl)
108  ret <4 x i8> %v
109}
110
111declare <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8>, i1 immarg, <8 x i1>, i32)
112
113define <8 x i8> @vp_cttz_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) {
114; CHECK-LABEL: vp_cttz_v8i8:
115; CHECK:       # %bb.0:
116; CHECK-NEXT:    li a1, 1
117; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
118; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
119; CHECK-NEXT:    li a0, 85
120; CHECK-NEXT:    vnot.v v8, v8, v0.t
121; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
122; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
123; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
124; CHECK-NEXT:    li a0, 51
125; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
126; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
127; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
128; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
129; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
130; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
131; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
132; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
133; CHECK-NEXT:    ret
134  %v = call <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8> %va, i1 false, <8 x i1> %m, i32 %evl)
135  ret <8 x i8> %v
136}
137
138define <8 x i8> @vp_cttz_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) {
139; CHECK-LABEL: vp_cttz_v8i8_unmasked:
140; CHECK:       # %bb.0:
141; CHECK-NEXT:    li a1, 1
142; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
143; CHECK-NEXT:    vnot.v v9, v8
144; CHECK-NEXT:    vsub.vx v8, v8, a1
145; CHECK-NEXT:    li a0, 85
146; CHECK-NEXT:    vand.vv v8, v9, v8
147; CHECK-NEXT:    vsrl.vi v9, v8, 1
148; CHECK-NEXT:    vand.vx v9, v9, a0
149; CHECK-NEXT:    li a0, 51
150; CHECK-NEXT:    vsub.vv v8, v8, v9
151; CHECK-NEXT:    vand.vx v9, v8, a0
152; CHECK-NEXT:    vsrl.vi v8, v8, 2
153; CHECK-NEXT:    vand.vx v8, v8, a0
154; CHECK-NEXT:    vadd.vv v8, v9, v8
155; CHECK-NEXT:    vsrl.vi v9, v8, 4
156; CHECK-NEXT:    vadd.vv v8, v8, v9
157; CHECK-NEXT:    vand.vi v8, v8, 15
158; CHECK-NEXT:    ret
159  %v = call <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8> %va, i1 false, <8 x i1> splat (i1 true), i32 %evl)
160  ret <8 x i8> %v
161}
162
163declare <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8>, i1 immarg, <16 x i1>, i32)
164
165define <16 x i8> @vp_cttz_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) {
166; CHECK-LABEL: vp_cttz_v16i8:
167; CHECK:       # %bb.0:
168; CHECK-NEXT:    li a1, 1
169; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
170; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
171; CHECK-NEXT:    li a0, 85
172; CHECK-NEXT:    vnot.v v8, v8, v0.t
173; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
174; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
175; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
176; CHECK-NEXT:    li a0, 51
177; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
178; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
179; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
180; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
181; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
182; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
183; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
184; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
185; CHECK-NEXT:    ret
186  %v = call <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8> %va, i1 false, <16 x i1> %m, i32 %evl)
187  ret <16 x i8> %v
188}
189
190define <16 x i8> @vp_cttz_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) {
191; CHECK-LABEL: vp_cttz_v16i8_unmasked:
192; CHECK:       # %bb.0:
193; CHECK-NEXT:    li a1, 1
194; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
195; CHECK-NEXT:    vnot.v v9, v8
196; CHECK-NEXT:    vsub.vx v8, v8, a1
197; CHECK-NEXT:    li a0, 85
198; CHECK-NEXT:    vand.vv v8, v9, v8
199; CHECK-NEXT:    vsrl.vi v9, v8, 1
200; CHECK-NEXT:    vand.vx v9, v9, a0
201; CHECK-NEXT:    li a0, 51
202; CHECK-NEXT:    vsub.vv v8, v8, v9
203; CHECK-NEXT:    vand.vx v9, v8, a0
204; CHECK-NEXT:    vsrl.vi v8, v8, 2
205; CHECK-NEXT:    vand.vx v8, v8, a0
206; CHECK-NEXT:    vadd.vv v8, v9, v8
207; CHECK-NEXT:    vsrl.vi v9, v8, 4
208; CHECK-NEXT:    vadd.vv v8, v8, v9
209; CHECK-NEXT:    vand.vi v8, v8, 15
210; CHECK-NEXT:    ret
211  %v = call <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8> %va, i1 false, <16 x i1> splat (i1 true), i32 %evl)
212  ret <16 x i8> %v
213}
214
215declare <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16>, i1 immarg, <2 x i1>, i32)
216
217define <2 x i16> @vp_cttz_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) {
218; CHECK-LABEL: vp_cttz_v2i16:
219; CHECK:       # %bb.0:
220; CHECK-NEXT:    li a1, 1
221; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
222; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
223; CHECK-NEXT:    lui a0, 5
224; CHECK-NEXT:    vnot.v v8, v8, v0.t
225; CHECK-NEXT:    addi a0, a0, 1365
226; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
227; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
228; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
229; CHECK-NEXT:    lui a0, 3
230; CHECK-NEXT:    addi a0, a0, 819
231; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
232; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
233; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
234; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
235; CHECK-NEXT:    lui a0, 1
236; CHECK-NEXT:    addi a0, a0, -241
237; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
238; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
239; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
240; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
241; CHECK-NEXT:    li a0, 257
242; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
243; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
244; CHECK-NEXT:    ret
245  %v = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> %va, i1 false, <2 x i1> %m, i32 %evl)
246  ret <2 x i16> %v
247}
248
249define <2 x i16> @vp_cttz_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
250; CHECK-LABEL: vp_cttz_v2i16_unmasked:
251; CHECK:       # %bb.0:
252; CHECK-NEXT:    li a1, 1
253; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
254; CHECK-NEXT:    vnot.v v9, v8
255; CHECK-NEXT:    vsub.vx v8, v8, a1
256; CHECK-NEXT:    lui a0, 5
257; CHECK-NEXT:    addi a0, a0, 1365
258; CHECK-NEXT:    vand.vv v8, v9, v8
259; CHECK-NEXT:    vsrl.vi v9, v8, 1
260; CHECK-NEXT:    vand.vx v9, v9, a0
261; CHECK-NEXT:    lui a0, 3
262; CHECK-NEXT:    addi a0, a0, 819
263; CHECK-NEXT:    vsub.vv v8, v8, v9
264; CHECK-NEXT:    vand.vx v9, v8, a0
265; CHECK-NEXT:    vsrl.vi v8, v8, 2
266; CHECK-NEXT:    vand.vx v8, v8, a0
267; CHECK-NEXT:    lui a0, 1
268; CHECK-NEXT:    addi a0, a0, -241
269; CHECK-NEXT:    vadd.vv v8, v9, v8
270; CHECK-NEXT:    vsrl.vi v9, v8, 4
271; CHECK-NEXT:    vadd.vv v8, v8, v9
272; CHECK-NEXT:    vand.vx v8, v8, a0
273; CHECK-NEXT:    li a0, 257
274; CHECK-NEXT:    vmul.vx v8, v8, a0
275; CHECK-NEXT:    vsrl.vi v8, v8, 8
276; CHECK-NEXT:    ret
277  %v = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> %va, i1 false, <2 x i1> splat (i1 true), i32 %evl)
278  ret <2 x i16> %v
279}
280
281declare <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16>, i1 immarg, <4 x i1>, i32)
282
283define <4 x i16> @vp_cttz_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) {
284; CHECK-LABEL: vp_cttz_v4i16:
285; CHECK:       # %bb.0:
286; CHECK-NEXT:    li a1, 1
287; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
288; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
289; CHECK-NEXT:    lui a0, 5
290; CHECK-NEXT:    vnot.v v8, v8, v0.t
291; CHECK-NEXT:    addi a0, a0, 1365
292; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
293; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
294; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
295; CHECK-NEXT:    lui a0, 3
296; CHECK-NEXT:    addi a0, a0, 819
297; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
298; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
299; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
300; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
301; CHECK-NEXT:    lui a0, 1
302; CHECK-NEXT:    addi a0, a0, -241
303; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
304; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
305; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
306; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
307; CHECK-NEXT:    li a0, 257
308; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
309; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
310; CHECK-NEXT:    ret
311  %v = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> %va, i1 false, <4 x i1> %m, i32 %evl)
312  ret <4 x i16> %v
313}
314
315define <4 x i16> @vp_cttz_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
316; CHECK-LABEL: vp_cttz_v4i16_unmasked:
317; CHECK:       # %bb.0:
318; CHECK-NEXT:    li a1, 1
319; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
320; CHECK-NEXT:    vnot.v v9, v8
321; CHECK-NEXT:    vsub.vx v8, v8, a1
322; CHECK-NEXT:    lui a0, 5
323; CHECK-NEXT:    addi a0, a0, 1365
324; CHECK-NEXT:    vand.vv v8, v9, v8
325; CHECK-NEXT:    vsrl.vi v9, v8, 1
326; CHECK-NEXT:    vand.vx v9, v9, a0
327; CHECK-NEXT:    lui a0, 3
328; CHECK-NEXT:    addi a0, a0, 819
329; CHECK-NEXT:    vsub.vv v8, v8, v9
330; CHECK-NEXT:    vand.vx v9, v8, a0
331; CHECK-NEXT:    vsrl.vi v8, v8, 2
332; CHECK-NEXT:    vand.vx v8, v8, a0
333; CHECK-NEXT:    lui a0, 1
334; CHECK-NEXT:    addi a0, a0, -241
335; CHECK-NEXT:    vadd.vv v8, v9, v8
336; CHECK-NEXT:    vsrl.vi v9, v8, 4
337; CHECK-NEXT:    vadd.vv v8, v8, v9
338; CHECK-NEXT:    vand.vx v8, v8, a0
339; CHECK-NEXT:    li a0, 257
340; CHECK-NEXT:    vmul.vx v8, v8, a0
341; CHECK-NEXT:    vsrl.vi v8, v8, 8
342; CHECK-NEXT:    ret
343  %v = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> %va, i1 false, <4 x i1> splat (i1 true), i32 %evl)
344  ret <4 x i16> %v
345}
346
347declare <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16>, i1 immarg, <8 x i1>, i32)
348
349define <8 x i16> @vp_cttz_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) {
350; CHECK-LABEL: vp_cttz_v8i16:
351; CHECK:       # %bb.0:
352; CHECK-NEXT:    li a1, 1
353; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
354; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
355; CHECK-NEXT:    lui a0, 5
356; CHECK-NEXT:    vnot.v v8, v8, v0.t
357; CHECK-NEXT:    addi a0, a0, 1365
358; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
359; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
360; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
361; CHECK-NEXT:    lui a0, 3
362; CHECK-NEXT:    addi a0, a0, 819
363; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
364; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
365; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
366; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
367; CHECK-NEXT:    lui a0, 1
368; CHECK-NEXT:    addi a0, a0, -241
369; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
370; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
371; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
372; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
373; CHECK-NEXT:    li a0, 257
374; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
375; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
376; CHECK-NEXT:    ret
377  %v = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> %va, i1 false, <8 x i1> %m, i32 %evl)
378  ret <8 x i16> %v
379}
380
381define <8 x i16> @vp_cttz_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
382; CHECK-LABEL: vp_cttz_v8i16_unmasked:
383; CHECK:       # %bb.0:
384; CHECK-NEXT:    li a1, 1
385; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
386; CHECK-NEXT:    vnot.v v9, v8
387; CHECK-NEXT:    vsub.vx v8, v8, a1
388; CHECK-NEXT:    lui a0, 5
389; CHECK-NEXT:    addi a0, a0, 1365
390; CHECK-NEXT:    vand.vv v8, v9, v8
391; CHECK-NEXT:    vsrl.vi v9, v8, 1
392; CHECK-NEXT:    vand.vx v9, v9, a0
393; CHECK-NEXT:    lui a0, 3
394; CHECK-NEXT:    addi a0, a0, 819
395; CHECK-NEXT:    vsub.vv v8, v8, v9
396; CHECK-NEXT:    vand.vx v9, v8, a0
397; CHECK-NEXT:    vsrl.vi v8, v8, 2
398; CHECK-NEXT:    vand.vx v8, v8, a0
399; CHECK-NEXT:    lui a0, 1
400; CHECK-NEXT:    addi a0, a0, -241
401; CHECK-NEXT:    vadd.vv v8, v9, v8
402; CHECK-NEXT:    vsrl.vi v9, v8, 4
403; CHECK-NEXT:    vadd.vv v8, v8, v9
404; CHECK-NEXT:    vand.vx v8, v8, a0
405; CHECK-NEXT:    li a0, 257
406; CHECK-NEXT:    vmul.vx v8, v8, a0
407; CHECK-NEXT:    vsrl.vi v8, v8, 8
408; CHECK-NEXT:    ret
409  %v = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> %va, i1 false, <8 x i1> splat (i1 true), i32 %evl)
410  ret <8 x i16> %v
411}
412
413declare <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16>, i1 immarg, <16 x i1>, i32)
414
415define <16 x i16> @vp_cttz_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) {
416; CHECK-LABEL: vp_cttz_v16i16:
417; CHECK:       # %bb.0:
418; CHECK-NEXT:    li a1, 1
419; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
420; CHECK-NEXT:    vsub.vx v10, v8, a1, v0.t
421; CHECK-NEXT:    lui a0, 5
422; CHECK-NEXT:    vnot.v v8, v8, v0.t
423; CHECK-NEXT:    addi a0, a0, 1365
424; CHECK-NEXT:    vand.vv v8, v8, v10, v0.t
425; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
426; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
427; CHECK-NEXT:    lui a0, 3
428; CHECK-NEXT:    addi a0, a0, 819
429; CHECK-NEXT:    vsub.vv v8, v8, v10, v0.t
430; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
431; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
432; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
433; CHECK-NEXT:    lui a0, 1
434; CHECK-NEXT:    addi a0, a0, -241
435; CHECK-NEXT:    vadd.vv v8, v10, v8, v0.t
436; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
437; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
438; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
439; CHECK-NEXT:    li a0, 257
440; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
441; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
442; CHECK-NEXT:    ret
443  %v = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> %va, i1 false, <16 x i1> %m, i32 %evl)
444  ret <16 x i16> %v
445}
446
447define <16 x i16> @vp_cttz_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
448; CHECK-LABEL: vp_cttz_v16i16_unmasked:
449; CHECK:       # %bb.0:
450; CHECK-NEXT:    li a1, 1
451; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
452; CHECK-NEXT:    vnot.v v10, v8
453; CHECK-NEXT:    vsub.vx v8, v8, a1
454; CHECK-NEXT:    lui a0, 5
455; CHECK-NEXT:    addi a0, a0, 1365
456; CHECK-NEXT:    vand.vv v8, v10, v8
457; CHECK-NEXT:    vsrl.vi v10, v8, 1
458; CHECK-NEXT:    vand.vx v10, v10, a0
459; CHECK-NEXT:    lui a0, 3
460; CHECK-NEXT:    addi a0, a0, 819
461; CHECK-NEXT:    vsub.vv v8, v8, v10
462; CHECK-NEXT:    vand.vx v10, v8, a0
463; CHECK-NEXT:    vsrl.vi v8, v8, 2
464; CHECK-NEXT:    vand.vx v8, v8, a0
465; CHECK-NEXT:    lui a0, 1
466; CHECK-NEXT:    addi a0, a0, -241
467; CHECK-NEXT:    vadd.vv v8, v10, v8
468; CHECK-NEXT:    vsrl.vi v10, v8, 4
469; CHECK-NEXT:    vadd.vv v8, v8, v10
470; CHECK-NEXT:    vand.vx v8, v8, a0
471; CHECK-NEXT:    li a0, 257
472; CHECK-NEXT:    vmul.vx v8, v8, a0
473; CHECK-NEXT:    vsrl.vi v8, v8, 8
474; CHECK-NEXT:    ret
475  %v = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> %va, i1 false, <16 x i1> splat (i1 true), i32 %evl)
476  ret <16 x i16> %v
477}
478
479declare <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32>, i1 immarg, <2 x i1>, i32)
480
481define <2 x i32> @vp_cttz_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
482; CHECK-LABEL: vp_cttz_v2i32:
483; CHECK:       # %bb.0:
484; CHECK-NEXT:    li a1, 1
485; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
486; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
487; CHECK-NEXT:    lui a0, 349525
488; CHECK-NEXT:    vnot.v v8, v8, v0.t
489; CHECK-NEXT:    addi a0, a0, 1365
490; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
491; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
492; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
493; CHECK-NEXT:    lui a0, 209715
494; CHECK-NEXT:    addi a0, a0, 819
495; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
496; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
497; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
498; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
499; CHECK-NEXT:    lui a0, 61681
500; CHECK-NEXT:    addi a0, a0, -241
501; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
502; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
503; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
504; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
505; CHECK-NEXT:    lui a0, 4112
506; CHECK-NEXT:    addi a0, a0, 257
507; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
508; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
509; CHECK-NEXT:    ret
510  %v = call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> %va, i1 false, <2 x i1> %m, i32 %evl)
511  ret <2 x i32> %v
512}
513
514define <2 x i32> @vp_cttz_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
515; CHECK-LABEL: vp_cttz_v2i32_unmasked:
516; CHECK:       # %bb.0:
517; CHECK-NEXT:    li a1, 1
518; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
519; CHECK-NEXT:    vnot.v v9, v8
520; CHECK-NEXT:    vsub.vx v8, v8, a1
521; CHECK-NEXT:    lui a0, 349525
522; CHECK-NEXT:    addi a0, a0, 1365
523; CHECK-NEXT:    vand.vv v8, v9, v8
524; CHECK-NEXT:    vsrl.vi v9, v8, 1
525; CHECK-NEXT:    vand.vx v9, v9, a0
526; CHECK-NEXT:    lui a0, 209715
527; CHECK-NEXT:    addi a0, a0, 819
528; CHECK-NEXT:    vsub.vv v8, v8, v9
529; CHECK-NEXT:    vand.vx v9, v8, a0
530; CHECK-NEXT:    vsrl.vi v8, v8, 2
531; CHECK-NEXT:    vand.vx v8, v8, a0
532; CHECK-NEXT:    lui a0, 61681
533; CHECK-NEXT:    addi a0, a0, -241
534; CHECK-NEXT:    vadd.vv v8, v9, v8
535; CHECK-NEXT:    vsrl.vi v9, v8, 4
536; CHECK-NEXT:    vadd.vv v8, v8, v9
537; CHECK-NEXT:    vand.vx v8, v8, a0
538; CHECK-NEXT:    lui a0, 4112
539; CHECK-NEXT:    addi a0, a0, 257
540; CHECK-NEXT:    vmul.vx v8, v8, a0
541; CHECK-NEXT:    vsrl.vi v8, v8, 24
542; CHECK-NEXT:    ret
543  %v = call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> %va, i1 false, <2 x i1> splat (i1 true), i32 %evl)
544  ret <2 x i32> %v
545}
546
547declare <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32>, i1 immarg, <4 x i1>, i32)
548
549define <4 x i32> @vp_cttz_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
550; CHECK-LABEL: vp_cttz_v4i32:
551; CHECK:       # %bb.0:
552; CHECK-NEXT:    li a1, 1
553; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
554; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
555; CHECK-NEXT:    lui a0, 349525
556; CHECK-NEXT:    vnot.v v8, v8, v0.t
557; CHECK-NEXT:    addi a0, a0, 1365
558; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
559; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
560; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
561; CHECK-NEXT:    lui a0, 209715
562; CHECK-NEXT:    addi a0, a0, 819
563; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
564; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
565; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
566; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
567; CHECK-NEXT:    lui a0, 61681
568; CHECK-NEXT:    addi a0, a0, -241
569; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
570; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
571; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
572; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
573; CHECK-NEXT:    lui a0, 4112
574; CHECK-NEXT:    addi a0, a0, 257
575; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
576; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
577; CHECK-NEXT:    ret
578  %v = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> %va, i1 false, <4 x i1> %m, i32 %evl)
579  ret <4 x i32> %v
580}
581
582define <4 x i32> @vp_cttz_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
583; CHECK-LABEL: vp_cttz_v4i32_unmasked:
584; CHECK:       # %bb.0:
585; CHECK-NEXT:    li a1, 1
586; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
587; CHECK-NEXT:    vnot.v v9, v8
588; CHECK-NEXT:    vsub.vx v8, v8, a1
589; CHECK-NEXT:    lui a0, 349525
590; CHECK-NEXT:    addi a0, a0, 1365
591; CHECK-NEXT:    vand.vv v8, v9, v8
592; CHECK-NEXT:    vsrl.vi v9, v8, 1
593; CHECK-NEXT:    vand.vx v9, v9, a0
594; CHECK-NEXT:    lui a0, 209715
595; CHECK-NEXT:    addi a0, a0, 819
596; CHECK-NEXT:    vsub.vv v8, v8, v9
597; CHECK-NEXT:    vand.vx v9, v8, a0
598; CHECK-NEXT:    vsrl.vi v8, v8, 2
599; CHECK-NEXT:    vand.vx v8, v8, a0
600; CHECK-NEXT:    lui a0, 61681
601; CHECK-NEXT:    addi a0, a0, -241
602; CHECK-NEXT:    vadd.vv v8, v9, v8
603; CHECK-NEXT:    vsrl.vi v9, v8, 4
604; CHECK-NEXT:    vadd.vv v8, v8, v9
605; CHECK-NEXT:    vand.vx v8, v8, a0
606; CHECK-NEXT:    lui a0, 4112
607; CHECK-NEXT:    addi a0, a0, 257
608; CHECK-NEXT:    vmul.vx v8, v8, a0
609; CHECK-NEXT:    vsrl.vi v8, v8, 24
610; CHECK-NEXT:    ret
611  %v = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> %va, i1 false, <4 x i1> splat (i1 true), i32 %evl)
612  ret <4 x i32> %v
613}
614
615declare <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32>, i1 immarg, <8 x i1>, i32)
616
617define <8 x i32> @vp_cttz_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
618; CHECK-LABEL: vp_cttz_v8i32:
619; CHECK:       # %bb.0:
620; CHECK-NEXT:    li a1, 1
621; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
622; CHECK-NEXT:    vsub.vx v10, v8, a1, v0.t
623; CHECK-NEXT:    lui a0, 349525
624; CHECK-NEXT:    vnot.v v8, v8, v0.t
625; CHECK-NEXT:    addi a0, a0, 1365
626; CHECK-NEXT:    vand.vv v8, v8, v10, v0.t
627; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
628; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
629; CHECK-NEXT:    lui a0, 209715
630; CHECK-NEXT:    addi a0, a0, 819
631; CHECK-NEXT:    vsub.vv v8, v8, v10, v0.t
632; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
633; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
634; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
635; CHECK-NEXT:    lui a0, 61681
636; CHECK-NEXT:    addi a0, a0, -241
637; CHECK-NEXT:    vadd.vv v8, v10, v8, v0.t
638; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
639; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
640; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
641; CHECK-NEXT:    lui a0, 4112
642; CHECK-NEXT:    addi a0, a0, 257
643; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
644; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
645; CHECK-NEXT:    ret
646  %v = call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> %va, i1 false, <8 x i1> %m, i32 %evl)
647  ret <8 x i32> %v
648}
649
650define <8 x i32> @vp_cttz_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
651; CHECK-LABEL: vp_cttz_v8i32_unmasked:
652; CHECK:       # %bb.0:
653; CHECK-NEXT:    li a1, 1
654; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
655; CHECK-NEXT:    vnot.v v10, v8
656; CHECK-NEXT:    vsub.vx v8, v8, a1
657; CHECK-NEXT:    lui a0, 349525
658; CHECK-NEXT:    addi a0, a0, 1365
659; CHECK-NEXT:    vand.vv v8, v10, v8
660; CHECK-NEXT:    vsrl.vi v10, v8, 1
661; CHECK-NEXT:    vand.vx v10, v10, a0
662; CHECK-NEXT:    lui a0, 209715
663; CHECK-NEXT:    addi a0, a0, 819
664; CHECK-NEXT:    vsub.vv v8, v8, v10
665; CHECK-NEXT:    vand.vx v10, v8, a0
666; CHECK-NEXT:    vsrl.vi v8, v8, 2
667; CHECK-NEXT:    vand.vx v8, v8, a0
668; CHECK-NEXT:    lui a0, 61681
669; CHECK-NEXT:    addi a0, a0, -241
670; CHECK-NEXT:    vadd.vv v8, v10, v8
671; CHECK-NEXT:    vsrl.vi v10, v8, 4
672; CHECK-NEXT:    vadd.vv v8, v8, v10
673; CHECK-NEXT:    vand.vx v8, v8, a0
674; CHECK-NEXT:    lui a0, 4112
675; CHECK-NEXT:    addi a0, a0, 257
676; CHECK-NEXT:    vmul.vx v8, v8, a0
677; CHECK-NEXT:    vsrl.vi v8, v8, 24
678; CHECK-NEXT:    ret
679  %v = call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> %va, i1 false, <8 x i1> splat (i1 true), i32 %evl)
680  ret <8 x i32> %v
681}
682
683declare <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32>, i1 immarg, <16 x i1>, i32)
684
685define <16 x i32> @vp_cttz_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
686; CHECK-LABEL: vp_cttz_v16i32:
687; CHECK:       # %bb.0:
688; CHECK-NEXT:    li a1, 1
689; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
690; CHECK-NEXT:    vsub.vx v12, v8, a1, v0.t
691; CHECK-NEXT:    lui a0, 349525
692; CHECK-NEXT:    vnot.v v8, v8, v0.t
693; CHECK-NEXT:    addi a0, a0, 1365
694; CHECK-NEXT:    vand.vv v8, v8, v12, v0.t
695; CHECK-NEXT:    vsrl.vi v12, v8, 1, v0.t
696; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
697; CHECK-NEXT:    lui a0, 209715
698; CHECK-NEXT:    addi a0, a0, 819
699; CHECK-NEXT:    vsub.vv v8, v8, v12, v0.t
700; CHECK-NEXT:    vand.vx v12, v8, a0, v0.t
701; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
702; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
703; CHECK-NEXT:    lui a0, 61681
704; CHECK-NEXT:    addi a0, a0, -241
705; CHECK-NEXT:    vadd.vv v8, v12, v8, v0.t
706; CHECK-NEXT:    vsrl.vi v12, v8, 4, v0.t
707; CHECK-NEXT:    vadd.vv v8, v8, v12, v0.t
708; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
709; CHECK-NEXT:    lui a0, 4112
710; CHECK-NEXT:    addi a0, a0, 257
711; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
712; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
713; CHECK-NEXT:    ret
714  %v = call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> %va, i1 false, <16 x i1> %m, i32 %evl)
715  ret <16 x i32> %v
716}
717
718define <16 x i32> @vp_cttz_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
719; CHECK-LABEL: vp_cttz_v16i32_unmasked:
720; CHECK:       # %bb.0:
721; CHECK-NEXT:    li a1, 1
722; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
723; CHECK-NEXT:    vnot.v v12, v8
724; CHECK-NEXT:    vsub.vx v8, v8, a1
725; CHECK-NEXT:    lui a0, 349525
726; CHECK-NEXT:    addi a0, a0, 1365
727; CHECK-NEXT:    vand.vv v8, v12, v8
728; CHECK-NEXT:    vsrl.vi v12, v8, 1
729; CHECK-NEXT:    vand.vx v12, v12, a0
730; CHECK-NEXT:    lui a0, 209715
731; CHECK-NEXT:    addi a0, a0, 819
732; CHECK-NEXT:    vsub.vv v8, v8, v12
733; CHECK-NEXT:    vand.vx v12, v8, a0
734; CHECK-NEXT:    vsrl.vi v8, v8, 2
735; CHECK-NEXT:    vand.vx v8, v8, a0
736; CHECK-NEXT:    lui a0, 61681
737; CHECK-NEXT:    addi a0, a0, -241
738; CHECK-NEXT:    vadd.vv v8, v12, v8
739; CHECK-NEXT:    vsrl.vi v12, v8, 4
740; CHECK-NEXT:    vadd.vv v8, v8, v12
741; CHECK-NEXT:    vand.vx v8, v8, a0
742; CHECK-NEXT:    lui a0, 4112
743; CHECK-NEXT:    addi a0, a0, 257
744; CHECK-NEXT:    vmul.vx v8, v8, a0
745; CHECK-NEXT:    vsrl.vi v8, v8, 24
746; CHECK-NEXT:    ret
747  %v = call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> %va, i1 false, <16 x i1> splat (i1 true), i32 %evl)
748  ret <16 x i32> %v
749}
750
751declare <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64>, i1 immarg, <2 x i1>, i32)
752
753define <2 x i64> @vp_cttz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
754; RV32-LABEL: vp_cttz_v2i64:
755; RV32:       # %bb.0:
756; RV32-NEXT:    li a1, 1
757; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
758; RV32-NEXT:    vsub.vx v9, v8, a1, v0.t
759; RV32-NEXT:    lui a1, 349525
760; RV32-NEXT:    addi a1, a1, 1365
761; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
762; RV32-NEXT:    vmv.v.x v10, a1
763; RV32-NEXT:    lui a1, 209715
764; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
765; RV32-NEXT:    vnot.v v8, v8, v0.t
766; RV32-NEXT:    addi a1, a1, 819
767; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
768; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
769; RV32-NEXT:    vand.vv v9, v9, v10, v0.t
770; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
771; RV32-NEXT:    vmv.v.x v10, a1
772; RV32-NEXT:    lui a1, 61681
773; RV32-NEXT:    addi a1, a1, -241
774; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
775; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
776; RV32-NEXT:    vand.vv v9, v8, v10, v0.t
777; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
778; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
779; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
780; RV32-NEXT:    vmv.v.x v10, a1
781; RV32-NEXT:    lui a1, 4112
782; RV32-NEXT:    addi a1, a1, 257
783; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
784; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
785; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
786; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
787; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
788; RV32-NEXT:    vmv.v.x v9, a1
789; RV32-NEXT:    li a1, 56
790; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
791; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
792; RV32-NEXT:    vmul.vv v8, v8, v9, v0.t
793; RV32-NEXT:    vsrl.vx v8, v8, a1, v0.t
794; RV32-NEXT:    ret
795;
796; RV64-LABEL: vp_cttz_v2i64:
797; RV64:       # %bb.0:
798; RV64-NEXT:    li a1, 1
799; RV64-NEXT:    lui a2, 349525
800; RV64-NEXT:    lui a3, 209715
801; RV64-NEXT:    lui a4, 61681
802; RV64-NEXT:    lui a5, 4112
803; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
804; RV64-NEXT:    vsub.vx v9, v8, a1, v0.t
805; RV64-NEXT:    addiw a0, a2, 1365
806; RV64-NEXT:    addiw a1, a3, 819
807; RV64-NEXT:    addiw a2, a4, -241
808; RV64-NEXT:    addiw a3, a5, 257
809; RV64-NEXT:    slli a4, a0, 32
810; RV64-NEXT:    add a0, a0, a4
811; RV64-NEXT:    slli a4, a1, 32
812; RV64-NEXT:    add a1, a1, a4
813; RV64-NEXT:    slli a4, a2, 32
814; RV64-NEXT:    add a2, a2, a4
815; RV64-NEXT:    slli a4, a3, 32
816; RV64-NEXT:    add a3, a3, a4
817; RV64-NEXT:    vnot.v v8, v8, v0.t
818; RV64-NEXT:    vand.vv v8, v8, v9, v0.t
819; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
820; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
821; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
822; RV64-NEXT:    vand.vx v9, v8, a1, v0.t
823; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
824; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
825; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
826; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
827; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
828; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
829; RV64-NEXT:    li a0, 56
830; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
831; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
832; RV64-NEXT:    ret
833  %v = call <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64> %va, i1 false, <2 x i1> %m, i32 %evl)
834  ret <2 x i64> %v
835}
836
837define <2 x i64> @vp_cttz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
838; RV32-LABEL: vp_cttz_v2i64_unmasked:
839; RV32:       # %bb.0:
840; RV32-NEXT:    li a1, 1
841; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
842; RV32-NEXT:    vnot.v v9, v8
843; RV32-NEXT:    vsub.vx v8, v8, a1
844; RV32-NEXT:    lui a1, 349525
845; RV32-NEXT:    addi a1, a1, 1365
846; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
847; RV32-NEXT:    vmv.v.x v10, a1
848; RV32-NEXT:    lui a1, 209715
849; RV32-NEXT:    addi a1, a1, 819
850; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
851; RV32-NEXT:    vand.vv v8, v9, v8
852; RV32-NEXT:    vsrl.vi v9, v8, 1
853; RV32-NEXT:    vand.vv v9, v9, v10
854; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
855; RV32-NEXT:    vmv.v.x v10, a1
856; RV32-NEXT:    lui a1, 61681
857; RV32-NEXT:    addi a1, a1, -241
858; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
859; RV32-NEXT:    vsub.vv v8, v8, v9
860; RV32-NEXT:    vand.vv v9, v8, v10
861; RV32-NEXT:    vsrl.vi v8, v8, 2
862; RV32-NEXT:    vand.vv v8, v8, v10
863; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
864; RV32-NEXT:    vmv.v.x v10, a1
865; RV32-NEXT:    lui a1, 4112
866; RV32-NEXT:    addi a1, a1, 257
867; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
868; RV32-NEXT:    vadd.vv v8, v9, v8
869; RV32-NEXT:    vsrl.vi v9, v8, 4
870; RV32-NEXT:    vadd.vv v8, v8, v9
871; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
872; RV32-NEXT:    vmv.v.x v9, a1
873; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
874; RV32-NEXT:    vand.vv v8, v8, v10
875; RV32-NEXT:    vmul.vv v8, v8, v9
876; RV32-NEXT:    li a0, 56
877; RV32-NEXT:    vsrl.vx v8, v8, a0
878; RV32-NEXT:    ret
879;
880; RV64-LABEL: vp_cttz_v2i64_unmasked:
881; RV64:       # %bb.0:
882; RV64-NEXT:    li a1, 1
883; RV64-NEXT:    lui a2, 349525
884; RV64-NEXT:    lui a3, 209715
885; RV64-NEXT:    lui a4, 61681
886; RV64-NEXT:    lui a5, 4112
887; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
888; RV64-NEXT:    vsub.vx v9, v8, a1
889; RV64-NEXT:    addiw a0, a2, 1365
890; RV64-NEXT:    addiw a1, a3, 819
891; RV64-NEXT:    addiw a2, a4, -241
892; RV64-NEXT:    addiw a3, a5, 257
893; RV64-NEXT:    slli a4, a0, 32
894; RV64-NEXT:    add a0, a0, a4
895; RV64-NEXT:    slli a4, a1, 32
896; RV64-NEXT:    add a1, a1, a4
897; RV64-NEXT:    slli a4, a2, 32
898; RV64-NEXT:    add a2, a2, a4
899; RV64-NEXT:    slli a4, a3, 32
900; RV64-NEXT:    add a3, a3, a4
901; RV64-NEXT:    vnot.v v8, v8
902; RV64-NEXT:    vand.vv v8, v8, v9
903; RV64-NEXT:    vsrl.vi v9, v8, 1
904; RV64-NEXT:    vand.vx v9, v9, a0
905; RV64-NEXT:    vsub.vv v8, v8, v9
906; RV64-NEXT:    vand.vx v9, v8, a1
907; RV64-NEXT:    vsrl.vi v8, v8, 2
908; RV64-NEXT:    vand.vx v8, v8, a1
909; RV64-NEXT:    vadd.vv v8, v9, v8
910; RV64-NEXT:    vsrl.vi v9, v8, 4
911; RV64-NEXT:    vadd.vv v8, v8, v9
912; RV64-NEXT:    vand.vx v8, v8, a2
913; RV64-NEXT:    vmul.vx v8, v8, a3
914; RV64-NEXT:    li a0, 56
915; RV64-NEXT:    vsrl.vx v8, v8, a0
916; RV64-NEXT:    ret
917  %v = call <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64> %va, i1 false, <2 x i1> splat (i1 true), i32 %evl)
918  ret <2 x i64> %v
919}
920
921declare <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64>, i1 immarg, <4 x i1>, i32)
922
923define <4 x i64> @vp_cttz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
924; RV32-LABEL: vp_cttz_v4i64:
925; RV32:       # %bb.0:
926; RV32-NEXT:    li a1, 1
927; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
928; RV32-NEXT:    vsub.vx v10, v8, a1, v0.t
929; RV32-NEXT:    lui a1, 349525
930; RV32-NEXT:    addi a1, a1, 1365
931; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
932; RV32-NEXT:    vmv.v.x v12, a1
933; RV32-NEXT:    lui a1, 209715
934; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
935; RV32-NEXT:    vnot.v v8, v8, v0.t
936; RV32-NEXT:    addi a1, a1, 819
937; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
938; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
939; RV32-NEXT:    vand.vv v10, v10, v12, v0.t
940; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
941; RV32-NEXT:    vmv.v.x v12, a1
942; RV32-NEXT:    lui a1, 61681
943; RV32-NEXT:    addi a1, a1, -241
944; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
945; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
946; RV32-NEXT:    vand.vv v10, v8, v12, v0.t
947; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
948; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
949; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
950; RV32-NEXT:    vmv.v.x v12, a1
951; RV32-NEXT:    lui a1, 4112
952; RV32-NEXT:    addi a1, a1, 257
953; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
954; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
955; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
956; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
957; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
958; RV32-NEXT:    vmv.v.x v10, a1
959; RV32-NEXT:    li a1, 56
960; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
961; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
962; RV32-NEXT:    vmul.vv v8, v8, v10, v0.t
963; RV32-NEXT:    vsrl.vx v8, v8, a1, v0.t
964; RV32-NEXT:    ret
965;
966; RV64-LABEL: vp_cttz_v4i64:
967; RV64:       # %bb.0:
968; RV64-NEXT:    li a1, 1
969; RV64-NEXT:    lui a2, 349525
970; RV64-NEXT:    lui a3, 209715
971; RV64-NEXT:    lui a4, 61681
972; RV64-NEXT:    lui a5, 4112
973; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
974; RV64-NEXT:    vsub.vx v10, v8, a1, v0.t
975; RV64-NEXT:    addiw a0, a2, 1365
976; RV64-NEXT:    addiw a1, a3, 819
977; RV64-NEXT:    addiw a2, a4, -241
978; RV64-NEXT:    addiw a3, a5, 257
979; RV64-NEXT:    slli a4, a0, 32
980; RV64-NEXT:    add a0, a0, a4
981; RV64-NEXT:    slli a4, a1, 32
982; RV64-NEXT:    add a1, a1, a4
983; RV64-NEXT:    slli a4, a2, 32
984; RV64-NEXT:    add a2, a2, a4
985; RV64-NEXT:    slli a4, a3, 32
986; RV64-NEXT:    add a3, a3, a4
987; RV64-NEXT:    vnot.v v8, v8, v0.t
988; RV64-NEXT:    vand.vv v8, v8, v10, v0.t
989; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
990; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
991; RV64-NEXT:    vsub.vv v8, v8, v10, v0.t
992; RV64-NEXT:    vand.vx v10, v8, a1, v0.t
993; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
994; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
995; RV64-NEXT:    vadd.vv v8, v10, v8, v0.t
996; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
997; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
998; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
999; RV64-NEXT:    li a0, 56
1000; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
1001; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
1002; RV64-NEXT:    ret
1003  %v = call <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64> %va, i1 false, <4 x i1> %m, i32 %evl)
1004  ret <4 x i64> %v
1005}
1006
1007define <4 x i64> @vp_cttz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
1008; RV32-LABEL: vp_cttz_v4i64_unmasked:
1009; RV32:       # %bb.0:
1010; RV32-NEXT:    li a1, 1
1011; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1012; RV32-NEXT:    vnot.v v10, v8
1013; RV32-NEXT:    vsub.vx v8, v8, a1
1014; RV32-NEXT:    lui a1, 349525
1015; RV32-NEXT:    addi a1, a1, 1365
1016; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1017; RV32-NEXT:    vmv.v.x v12, a1
1018; RV32-NEXT:    lui a1, 209715
1019; RV32-NEXT:    addi a1, a1, 819
1020; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1021; RV32-NEXT:    vand.vv v8, v10, v8
1022; RV32-NEXT:    vsrl.vi v10, v8, 1
1023; RV32-NEXT:    vand.vv v10, v10, v12
1024; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1025; RV32-NEXT:    vmv.v.x v12, a1
1026; RV32-NEXT:    lui a1, 61681
1027; RV32-NEXT:    addi a1, a1, -241
1028; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1029; RV32-NEXT:    vsub.vv v8, v8, v10
1030; RV32-NEXT:    vand.vv v10, v8, v12
1031; RV32-NEXT:    vsrl.vi v8, v8, 2
1032; RV32-NEXT:    vand.vv v8, v8, v12
1033; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1034; RV32-NEXT:    vmv.v.x v12, a1
1035; RV32-NEXT:    lui a1, 4112
1036; RV32-NEXT:    addi a1, a1, 257
1037; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1038; RV32-NEXT:    vadd.vv v8, v10, v8
1039; RV32-NEXT:    vsrl.vi v10, v8, 4
1040; RV32-NEXT:    vadd.vv v8, v8, v10
1041; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1042; RV32-NEXT:    vmv.v.x v10, a1
1043; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1044; RV32-NEXT:    vand.vv v8, v8, v12
1045; RV32-NEXT:    vmul.vv v8, v8, v10
1046; RV32-NEXT:    li a0, 56
1047; RV32-NEXT:    vsrl.vx v8, v8, a0
1048; RV32-NEXT:    ret
1049;
1050; RV64-LABEL: vp_cttz_v4i64_unmasked:
1051; RV64:       # %bb.0:
1052; RV64-NEXT:    li a1, 1
1053; RV64-NEXT:    lui a2, 349525
1054; RV64-NEXT:    lui a3, 209715
1055; RV64-NEXT:    lui a4, 61681
1056; RV64-NEXT:    lui a5, 4112
1057; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1058; RV64-NEXT:    vsub.vx v10, v8, a1
1059; RV64-NEXT:    addiw a0, a2, 1365
1060; RV64-NEXT:    addiw a1, a3, 819
1061; RV64-NEXT:    addiw a2, a4, -241
1062; RV64-NEXT:    addiw a3, a5, 257
1063; RV64-NEXT:    slli a4, a0, 32
1064; RV64-NEXT:    add a0, a0, a4
1065; RV64-NEXT:    slli a4, a1, 32
1066; RV64-NEXT:    add a1, a1, a4
1067; RV64-NEXT:    slli a4, a2, 32
1068; RV64-NEXT:    add a2, a2, a4
1069; RV64-NEXT:    slli a4, a3, 32
1070; RV64-NEXT:    add a3, a3, a4
1071; RV64-NEXT:    vnot.v v8, v8
1072; RV64-NEXT:    vand.vv v8, v8, v10
1073; RV64-NEXT:    vsrl.vi v10, v8, 1
1074; RV64-NEXT:    vand.vx v10, v10, a0
1075; RV64-NEXT:    vsub.vv v8, v8, v10
1076; RV64-NEXT:    vand.vx v10, v8, a1
1077; RV64-NEXT:    vsrl.vi v8, v8, 2
1078; RV64-NEXT:    vand.vx v8, v8, a1
1079; RV64-NEXT:    vadd.vv v8, v10, v8
1080; RV64-NEXT:    vsrl.vi v10, v8, 4
1081; RV64-NEXT:    vadd.vv v8, v8, v10
1082; RV64-NEXT:    vand.vx v8, v8, a2
1083; RV64-NEXT:    vmul.vx v8, v8, a3
1084; RV64-NEXT:    li a0, 56
1085; RV64-NEXT:    vsrl.vx v8, v8, a0
1086; RV64-NEXT:    ret
1087  %v = call <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64> %va, i1 false, <4 x i1> splat (i1 true), i32 %evl)
1088  ret <4 x i64> %v
1089}
1090
1091declare <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64>, i1 immarg, <8 x i1>, i32)
1092
1093define <8 x i64> @vp_cttz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
1094; RV32-LABEL: vp_cttz_v8i64:
1095; RV32:       # %bb.0:
1096; RV32-NEXT:    li a1, 1
1097; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1098; RV32-NEXT:    vsub.vx v12, v8, a1, v0.t
1099; RV32-NEXT:    lui a1, 349525
1100; RV32-NEXT:    addi a1, a1, 1365
1101; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
1102; RV32-NEXT:    vmv.v.x v16, a1
1103; RV32-NEXT:    lui a1, 209715
1104; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1105; RV32-NEXT:    vnot.v v8, v8, v0.t
1106; RV32-NEXT:    addi a1, a1, 819
1107; RV32-NEXT:    vand.vv v12, v8, v12, v0.t
1108; RV32-NEXT:    vsrl.vi v8, v12, 1, v0.t
1109; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
1110; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
1111; RV32-NEXT:    vmv.v.x v8, a1
1112; RV32-NEXT:    lui a1, 61681
1113; RV32-NEXT:    addi a1, a1, -241
1114; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1115; RV32-NEXT:    vsub.vv v12, v12, v16, v0.t
1116; RV32-NEXT:    vand.vv v16, v12, v8, v0.t
1117; RV32-NEXT:    vsrl.vi v12, v12, 2, v0.t
1118; RV32-NEXT:    vand.vv v8, v12, v8, v0.t
1119; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
1120; RV32-NEXT:    vmv.v.x v12, a1
1121; RV32-NEXT:    lui a1, 4112
1122; RV32-NEXT:    addi a1, a1, 257
1123; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1124; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
1125; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
1126; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
1127; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
1128; RV32-NEXT:    vmv.v.x v16, a1
1129; RV32-NEXT:    li a1, 56
1130; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1131; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
1132; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
1133; RV32-NEXT:    vsrl.vx v8, v8, a1, v0.t
1134; RV32-NEXT:    ret
1135;
1136; RV64-LABEL: vp_cttz_v8i64:
1137; RV64:       # %bb.0:
1138; RV64-NEXT:    li a1, 1
1139; RV64-NEXT:    lui a2, 349525
1140; RV64-NEXT:    lui a3, 209715
1141; RV64-NEXT:    lui a4, 61681
1142; RV64-NEXT:    lui a5, 4112
1143; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1144; RV64-NEXT:    vsub.vx v12, v8, a1, v0.t
1145; RV64-NEXT:    addiw a0, a2, 1365
1146; RV64-NEXT:    addiw a1, a3, 819
1147; RV64-NEXT:    addiw a2, a4, -241
1148; RV64-NEXT:    addiw a3, a5, 257
1149; RV64-NEXT:    slli a4, a0, 32
1150; RV64-NEXT:    add a0, a0, a4
1151; RV64-NEXT:    slli a4, a1, 32
1152; RV64-NEXT:    add a1, a1, a4
1153; RV64-NEXT:    slli a4, a2, 32
1154; RV64-NEXT:    add a2, a2, a4
1155; RV64-NEXT:    slli a4, a3, 32
1156; RV64-NEXT:    add a3, a3, a4
1157; RV64-NEXT:    vnot.v v8, v8, v0.t
1158; RV64-NEXT:    vand.vv v8, v8, v12, v0.t
1159; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
1160; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
1161; RV64-NEXT:    vsub.vv v8, v8, v12, v0.t
1162; RV64-NEXT:    vand.vx v12, v8, a1, v0.t
1163; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
1164; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
1165; RV64-NEXT:    vadd.vv v8, v12, v8, v0.t
1166; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
1167; RV64-NEXT:    vadd.vv v8, v8, v12, v0.t
1168; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
1169; RV64-NEXT:    li a0, 56
1170; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
1171; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
1172; RV64-NEXT:    ret
1173  %v = call <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64> %va, i1 false, <8 x i1> %m, i32 %evl)
1174  ret <8 x i64> %v
1175}
1176
1177define <8 x i64> @vp_cttz_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
1178; RV32-LABEL: vp_cttz_v8i64_unmasked:
1179; RV32:       # %bb.0:
1180; RV32-NEXT:    li a1, 1
1181; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1182; RV32-NEXT:    vnot.v v12, v8
1183; RV32-NEXT:    vsub.vx v8, v8, a1
1184; RV32-NEXT:    lui a1, 349525
1185; RV32-NEXT:    addi a1, a1, 1365
1186; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
1187; RV32-NEXT:    vmv.v.x v16, a1
1188; RV32-NEXT:    lui a1, 209715
1189; RV32-NEXT:    addi a1, a1, 819
1190; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1191; RV32-NEXT:    vand.vv v8, v12, v8
1192; RV32-NEXT:    vsrl.vi v12, v8, 1
1193; RV32-NEXT:    vand.vv v12, v12, v16
1194; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
1195; RV32-NEXT:    vmv.v.x v16, a1
1196; RV32-NEXT:    lui a1, 61681
1197; RV32-NEXT:    addi a1, a1, -241
1198; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1199; RV32-NEXT:    vsub.vv v8, v8, v12
1200; RV32-NEXT:    vand.vv v12, v8, v16
1201; RV32-NEXT:    vsrl.vi v8, v8, 2
1202; RV32-NEXT:    vand.vv v8, v8, v16
1203; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
1204; RV32-NEXT:    vmv.v.x v16, a1
1205; RV32-NEXT:    lui a1, 4112
1206; RV32-NEXT:    addi a1, a1, 257
1207; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1208; RV32-NEXT:    vadd.vv v8, v12, v8
1209; RV32-NEXT:    vsrl.vi v12, v8, 4
1210; RV32-NEXT:    vadd.vv v8, v8, v12
1211; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
1212; RV32-NEXT:    vmv.v.x v12, a1
1213; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1214; RV32-NEXT:    vand.vv v8, v8, v16
1215; RV32-NEXT:    vmul.vv v8, v8, v12
1216; RV32-NEXT:    li a0, 56
1217; RV32-NEXT:    vsrl.vx v8, v8, a0
1218; RV32-NEXT:    ret
1219;
1220; RV64-LABEL: vp_cttz_v8i64_unmasked:
1221; RV64:       # %bb.0:
1222; RV64-NEXT:    li a1, 1
1223; RV64-NEXT:    lui a2, 349525
1224; RV64-NEXT:    lui a3, 209715
1225; RV64-NEXT:    lui a4, 61681
1226; RV64-NEXT:    lui a5, 4112
1227; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1228; RV64-NEXT:    vsub.vx v12, v8, a1
1229; RV64-NEXT:    addiw a0, a2, 1365
1230; RV64-NEXT:    addiw a1, a3, 819
1231; RV64-NEXT:    addiw a2, a4, -241
1232; RV64-NEXT:    addiw a3, a5, 257
1233; RV64-NEXT:    slli a4, a0, 32
1234; RV64-NEXT:    add a0, a0, a4
1235; RV64-NEXT:    slli a4, a1, 32
1236; RV64-NEXT:    add a1, a1, a4
1237; RV64-NEXT:    slli a4, a2, 32
1238; RV64-NEXT:    add a2, a2, a4
1239; RV64-NEXT:    slli a4, a3, 32
1240; RV64-NEXT:    add a3, a3, a4
1241; RV64-NEXT:    vnot.v v8, v8
1242; RV64-NEXT:    vand.vv v8, v8, v12
1243; RV64-NEXT:    vsrl.vi v12, v8, 1
1244; RV64-NEXT:    vand.vx v12, v12, a0
1245; RV64-NEXT:    vsub.vv v8, v8, v12
1246; RV64-NEXT:    vand.vx v12, v8, a1
1247; RV64-NEXT:    vsrl.vi v8, v8, 2
1248; RV64-NEXT:    vand.vx v8, v8, a1
1249; RV64-NEXT:    vadd.vv v8, v12, v8
1250; RV64-NEXT:    vsrl.vi v12, v8, 4
1251; RV64-NEXT:    vadd.vv v8, v8, v12
1252; RV64-NEXT:    vand.vx v8, v8, a2
1253; RV64-NEXT:    vmul.vx v8, v8, a3
1254; RV64-NEXT:    li a0, 56
1255; RV64-NEXT:    vsrl.vx v8, v8, a0
1256; RV64-NEXT:    ret
1257  %v = call <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64> %va, i1 false, <8 x i1> splat (i1 true), i32 %evl)
1258  ret <8 x i64> %v
1259}
1260
1261declare <15 x i64> @llvm.vp.cttz.v15i64(<15 x i64>, i1 immarg, <15 x i1>, i32)
1262
1263define <15 x i64> @vp_cttz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) {
1264; RV32-LABEL: vp_cttz_v15i64:
1265; RV32:       # %bb.0:
1266; RV32-NEXT:    addi sp, sp, -48
1267; RV32-NEXT:    .cfi_def_cfa_offset 48
1268; RV32-NEXT:    csrr a1, vlenb
1269; RV32-NEXT:    slli a1, a1, 4
1270; RV32-NEXT:    sub sp, sp, a1
1271; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb
1272; RV32-NEXT:    li a1, 1
1273; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1274; RV32-NEXT:    vsub.vx v16, v8, a1, v0.t
1275; RV32-NEXT:    lui a1, 349525
1276; RV32-NEXT:    addi a1, a1, 1365
1277; RV32-NEXT:    sw a1, 40(sp)
1278; RV32-NEXT:    sw a1, 44(sp)
1279; RV32-NEXT:    lui a1, 209715
1280; RV32-NEXT:    addi a1, a1, 819
1281; RV32-NEXT:    sw a1, 32(sp)
1282; RV32-NEXT:    sw a1, 36(sp)
1283; RV32-NEXT:    lui a1, 61681
1284; RV32-NEXT:    addi a1, a1, -241
1285; RV32-NEXT:    sw a1, 24(sp)
1286; RV32-NEXT:    sw a1, 28(sp)
1287; RV32-NEXT:    lui a1, 4112
1288; RV32-NEXT:    addi a1, a1, 257
1289; RV32-NEXT:    sw a1, 16(sp)
1290; RV32-NEXT:    sw a1, 20(sp)
1291; RV32-NEXT:    addi a1, sp, 40
1292; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1293; RV32-NEXT:    vlse64.v v24, (a1), zero
1294; RV32-NEXT:    csrr a1, vlenb
1295; RV32-NEXT:    slli a1, a1, 3
1296; RV32-NEXT:    add a1, sp, a1
1297; RV32-NEXT:    addi a1, a1, 48
1298; RV32-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
1299; RV32-NEXT:    addi a1, sp, 32
1300; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1301; RV32-NEXT:    vnot.v v8, v8, v0.t
1302; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
1303; RV32-NEXT:    vsrl.vi v8, v16, 1, v0.t
1304; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1305; RV32-NEXT:    vlse64.v v24, (a1), zero
1306; RV32-NEXT:    addi a1, sp, 48
1307; RV32-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
1308; RV32-NEXT:    csrr a1, vlenb
1309; RV32-NEXT:    slli a1, a1, 3
1310; RV32-NEXT:    add a1, sp, a1
1311; RV32-NEXT:    addi a1, a1, 48
1312; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
1313; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1314; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
1315; RV32-NEXT:    addi a1, sp, 24
1316; RV32-NEXT:    vsub.vv v8, v16, v8, v0.t
1317; RV32-NEXT:    addi a2, sp, 48
1318; RV32-NEXT:    vl8r.v v24, (a2) # Unknown-size Folded Reload
1319; RV32-NEXT:    vand.vv v16, v8, v24, v0.t
1320; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
1321; RV32-NEXT:    vand.vv v24, v8, v24, v0.t
1322; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1323; RV32-NEXT:    vlse64.v v8, (a1), zero
1324; RV32-NEXT:    csrr a1, vlenb
1325; RV32-NEXT:    slli a1, a1, 3
1326; RV32-NEXT:    add a1, sp, a1
1327; RV32-NEXT:    addi a1, a1, 48
1328; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
1329; RV32-NEXT:    addi a1, sp, 16
1330; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1331; RV32-NEXT:    vadd.vv v24, v16, v24, v0.t
1332; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1333; RV32-NEXT:    vlse64.v v16, (a1), zero
1334; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1335; RV32-NEXT:    vsrl.vi v8, v24, 4, v0.t
1336; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
1337; RV32-NEXT:    li a0, 56
1338; RV32-NEXT:    csrr a1, vlenb
1339; RV32-NEXT:    slli a1, a1, 3
1340; RV32-NEXT:    add a1, sp, a1
1341; RV32-NEXT:    addi a1, a1, 48
1342; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
1343; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
1344; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
1345; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
1346; RV32-NEXT:    csrr a0, vlenb
1347; RV32-NEXT:    slli a0, a0, 4
1348; RV32-NEXT:    add sp, sp, a0
1349; RV32-NEXT:    .cfi_def_cfa sp, 48
1350; RV32-NEXT:    addi sp, sp, 48
1351; RV32-NEXT:    .cfi_def_cfa_offset 0
1352; RV32-NEXT:    ret
1353;
1354; RV64-LABEL: vp_cttz_v15i64:
1355; RV64:       # %bb.0:
1356; RV64-NEXT:    li a1, 1
1357; RV64-NEXT:    lui a2, 349525
1358; RV64-NEXT:    lui a3, 209715
1359; RV64-NEXT:    lui a4, 61681
1360; RV64-NEXT:    lui a5, 4112
1361; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1362; RV64-NEXT:    vsub.vx v16, v8, a1, v0.t
1363; RV64-NEXT:    addiw a0, a2, 1365
1364; RV64-NEXT:    addiw a1, a3, 819
1365; RV64-NEXT:    addiw a2, a4, -241
1366; RV64-NEXT:    addiw a3, a5, 257
1367; RV64-NEXT:    slli a4, a0, 32
1368; RV64-NEXT:    add a0, a0, a4
1369; RV64-NEXT:    slli a4, a1, 32
1370; RV64-NEXT:    add a1, a1, a4
1371; RV64-NEXT:    slli a4, a2, 32
1372; RV64-NEXT:    add a2, a2, a4
1373; RV64-NEXT:    slli a4, a3, 32
1374; RV64-NEXT:    add a3, a3, a4
1375; RV64-NEXT:    vnot.v v8, v8, v0.t
1376; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
1377; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
1378; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
1379; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
1380; RV64-NEXT:    vand.vx v16, v8, a1, v0.t
1381; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
1382; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
1383; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
1384; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
1385; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
1386; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
1387; RV64-NEXT:    li a0, 56
1388; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
1389; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
1390; RV64-NEXT:    ret
1391  %v = call <15 x i64> @llvm.vp.cttz.v15i64(<15 x i64> %va, i1 false, <15 x i1> %m, i32 %evl)
1392  ret <15 x i64> %v
1393}
1394
1395define <15 x i64> @vp_cttz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) {
1396; RV32-LABEL: vp_cttz_v15i64_unmasked:
1397; RV32:       # %bb.0:
1398; RV32-NEXT:    addi sp, sp, -32
1399; RV32-NEXT:    .cfi_def_cfa_offset 32
1400; RV32-NEXT:    li a1, 1
1401; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1402; RV32-NEXT:    vsub.vx v16, v8, a1
1403; RV32-NEXT:    lui a1, 349525
1404; RV32-NEXT:    addi a1, a1, 1365
1405; RV32-NEXT:    sw a1, 24(sp)
1406; RV32-NEXT:    sw a1, 28(sp)
1407; RV32-NEXT:    lui a1, 209715
1408; RV32-NEXT:    addi a1, a1, 819
1409; RV32-NEXT:    sw a1, 16(sp)
1410; RV32-NEXT:    sw a1, 20(sp)
1411; RV32-NEXT:    lui a1, 61681
1412; RV32-NEXT:    addi a1, a1, -241
1413; RV32-NEXT:    sw a1, 8(sp)
1414; RV32-NEXT:    sw a1, 12(sp)
1415; RV32-NEXT:    lui a1, 4112
1416; RV32-NEXT:    vnot.v v8, v8
1417; RV32-NEXT:    addi a1, a1, 257
1418; RV32-NEXT:    sw a1, 0(sp)
1419; RV32-NEXT:    sw a1, 4(sp)
1420; RV32-NEXT:    addi a1, sp, 24
1421; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1422; RV32-NEXT:    vlse64.v v0, (a1), zero
1423; RV32-NEXT:    addi a1, sp, 16
1424; RV32-NEXT:    vlse64.v v24, (a1), zero
1425; RV32-NEXT:    addi a1, sp, 8
1426; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1427; RV32-NEXT:    vand.vv v16, v8, v16
1428; RV32-NEXT:    vsrl.vi v8, v16, 1
1429; RV32-NEXT:    vand.vv v0, v8, v0
1430; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1431; RV32-NEXT:    vlse64.v v8, (a1), zero
1432; RV32-NEXT:    mv a1, sp
1433; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1434; RV32-NEXT:    vsub.vv v16, v16, v0
1435; RV32-NEXT:    vand.vv v0, v16, v24
1436; RV32-NEXT:    vsrl.vi v16, v16, 2
1437; RV32-NEXT:    vand.vv v16, v16, v24
1438; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1439; RV32-NEXT:    vlse64.v v24, (a1), zero
1440; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1441; RV32-NEXT:    vadd.vv v16, v0, v16
1442; RV32-NEXT:    vsrl.vi v0, v16, 4
1443; RV32-NEXT:    vadd.vv v16, v16, v0
1444; RV32-NEXT:    vand.vv v8, v16, v8
1445; RV32-NEXT:    vmul.vv v8, v8, v24
1446; RV32-NEXT:    li a0, 56
1447; RV32-NEXT:    vsrl.vx v8, v8, a0
1448; RV32-NEXT:    addi sp, sp, 32
1449; RV32-NEXT:    .cfi_def_cfa_offset 0
1450; RV32-NEXT:    ret
1451;
1452; RV64-LABEL: vp_cttz_v15i64_unmasked:
1453; RV64:       # %bb.0:
1454; RV64-NEXT:    li a1, 1
1455; RV64-NEXT:    lui a2, 349525
1456; RV64-NEXT:    lui a3, 209715
1457; RV64-NEXT:    lui a4, 61681
1458; RV64-NEXT:    lui a5, 4112
1459; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1460; RV64-NEXT:    vsub.vx v16, v8, a1
1461; RV64-NEXT:    addiw a0, a2, 1365
1462; RV64-NEXT:    addiw a1, a3, 819
1463; RV64-NEXT:    addiw a2, a4, -241
1464; RV64-NEXT:    addiw a3, a5, 257
1465; RV64-NEXT:    slli a4, a0, 32
1466; RV64-NEXT:    add a0, a0, a4
1467; RV64-NEXT:    slli a4, a1, 32
1468; RV64-NEXT:    add a1, a1, a4
1469; RV64-NEXT:    slli a4, a2, 32
1470; RV64-NEXT:    add a2, a2, a4
1471; RV64-NEXT:    slli a4, a3, 32
1472; RV64-NEXT:    add a3, a3, a4
1473; RV64-NEXT:    vnot.v v8, v8
1474; RV64-NEXT:    vand.vv v8, v8, v16
1475; RV64-NEXT:    vsrl.vi v16, v8, 1
1476; RV64-NEXT:    vand.vx v16, v16, a0
1477; RV64-NEXT:    vsub.vv v8, v8, v16
1478; RV64-NEXT:    vand.vx v16, v8, a1
1479; RV64-NEXT:    vsrl.vi v8, v8, 2
1480; RV64-NEXT:    vand.vx v8, v8, a1
1481; RV64-NEXT:    vadd.vv v8, v16, v8
1482; RV64-NEXT:    vsrl.vi v16, v8, 4
1483; RV64-NEXT:    vadd.vv v8, v8, v16
1484; RV64-NEXT:    vand.vx v8, v8, a2
1485; RV64-NEXT:    vmul.vx v8, v8, a3
1486; RV64-NEXT:    li a0, 56
1487; RV64-NEXT:    vsrl.vx v8, v8, a0
1488; RV64-NEXT:    ret
1489  %v = call <15 x i64> @llvm.vp.cttz.v15i64(<15 x i64> %va, i1 false, <15 x i1> splat (i1 true), i32 %evl)
1490  ret <15 x i64> %v
1491}
1492
1493declare <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64>, i1 immarg, <16 x i1>, i32)
1494
1495define <16 x i64> @vp_cttz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) {
1496; RV32-LABEL: vp_cttz_v16i64:
1497; RV32:       # %bb.0:
1498; RV32-NEXT:    addi sp, sp, -48
1499; RV32-NEXT:    .cfi_def_cfa_offset 48
1500; RV32-NEXT:    csrr a1, vlenb
1501; RV32-NEXT:    slli a1, a1, 4
1502; RV32-NEXT:    sub sp, sp, a1
1503; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb
1504; RV32-NEXT:    li a1, 1
1505; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1506; RV32-NEXT:    vsub.vx v16, v8, a1, v0.t
1507; RV32-NEXT:    lui a1, 349525
1508; RV32-NEXT:    addi a1, a1, 1365
1509; RV32-NEXT:    sw a1, 40(sp)
1510; RV32-NEXT:    sw a1, 44(sp)
1511; RV32-NEXT:    lui a1, 209715
1512; RV32-NEXT:    addi a1, a1, 819
1513; RV32-NEXT:    sw a1, 32(sp)
1514; RV32-NEXT:    sw a1, 36(sp)
1515; RV32-NEXT:    lui a1, 61681
1516; RV32-NEXT:    addi a1, a1, -241
1517; RV32-NEXT:    sw a1, 24(sp)
1518; RV32-NEXT:    sw a1, 28(sp)
1519; RV32-NEXT:    lui a1, 4112
1520; RV32-NEXT:    addi a1, a1, 257
1521; RV32-NEXT:    sw a1, 16(sp)
1522; RV32-NEXT:    sw a1, 20(sp)
1523; RV32-NEXT:    addi a1, sp, 40
1524; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1525; RV32-NEXT:    vlse64.v v24, (a1), zero
1526; RV32-NEXT:    csrr a1, vlenb
1527; RV32-NEXT:    slli a1, a1, 3
1528; RV32-NEXT:    add a1, sp, a1
1529; RV32-NEXT:    addi a1, a1, 48
1530; RV32-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
1531; RV32-NEXT:    addi a1, sp, 32
1532; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1533; RV32-NEXT:    vnot.v v8, v8, v0.t
1534; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
1535; RV32-NEXT:    vsrl.vi v8, v16, 1, v0.t
1536; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1537; RV32-NEXT:    vlse64.v v24, (a1), zero
1538; RV32-NEXT:    addi a1, sp, 48
1539; RV32-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
1540; RV32-NEXT:    csrr a1, vlenb
1541; RV32-NEXT:    slli a1, a1, 3
1542; RV32-NEXT:    add a1, sp, a1
1543; RV32-NEXT:    addi a1, a1, 48
1544; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
1545; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1546; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
1547; RV32-NEXT:    addi a1, sp, 24
1548; RV32-NEXT:    vsub.vv v8, v16, v8, v0.t
1549; RV32-NEXT:    addi a2, sp, 48
1550; RV32-NEXT:    vl8r.v v24, (a2) # Unknown-size Folded Reload
1551; RV32-NEXT:    vand.vv v16, v8, v24, v0.t
1552; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
1553; RV32-NEXT:    vand.vv v24, v8, v24, v0.t
1554; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1555; RV32-NEXT:    vlse64.v v8, (a1), zero
1556; RV32-NEXT:    csrr a1, vlenb
1557; RV32-NEXT:    slli a1, a1, 3
1558; RV32-NEXT:    add a1, sp, a1
1559; RV32-NEXT:    addi a1, a1, 48
1560; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
1561; RV32-NEXT:    addi a1, sp, 16
1562; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1563; RV32-NEXT:    vadd.vv v24, v16, v24, v0.t
1564; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1565; RV32-NEXT:    vlse64.v v16, (a1), zero
1566; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1567; RV32-NEXT:    vsrl.vi v8, v24, 4, v0.t
1568; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
1569; RV32-NEXT:    li a0, 56
1570; RV32-NEXT:    csrr a1, vlenb
1571; RV32-NEXT:    slli a1, a1, 3
1572; RV32-NEXT:    add a1, sp, a1
1573; RV32-NEXT:    addi a1, a1, 48
1574; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
1575; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
1576; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
1577; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
1578; RV32-NEXT:    csrr a0, vlenb
1579; RV32-NEXT:    slli a0, a0, 4
1580; RV32-NEXT:    add sp, sp, a0
1581; RV32-NEXT:    .cfi_def_cfa sp, 48
1582; RV32-NEXT:    addi sp, sp, 48
1583; RV32-NEXT:    .cfi_def_cfa_offset 0
1584; RV32-NEXT:    ret
1585;
1586; RV64-LABEL: vp_cttz_v16i64:
1587; RV64:       # %bb.0:
1588; RV64-NEXT:    li a1, 1
1589; RV64-NEXT:    lui a2, 349525
1590; RV64-NEXT:    lui a3, 209715
1591; RV64-NEXT:    lui a4, 61681
1592; RV64-NEXT:    lui a5, 4112
1593; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1594; RV64-NEXT:    vsub.vx v16, v8, a1, v0.t
1595; RV64-NEXT:    addiw a0, a2, 1365
1596; RV64-NEXT:    addiw a1, a3, 819
1597; RV64-NEXT:    addiw a2, a4, -241
1598; RV64-NEXT:    addiw a3, a5, 257
1599; RV64-NEXT:    slli a4, a0, 32
1600; RV64-NEXT:    add a0, a0, a4
1601; RV64-NEXT:    slli a4, a1, 32
1602; RV64-NEXT:    add a1, a1, a4
1603; RV64-NEXT:    slli a4, a2, 32
1604; RV64-NEXT:    add a2, a2, a4
1605; RV64-NEXT:    slli a4, a3, 32
1606; RV64-NEXT:    add a3, a3, a4
1607; RV64-NEXT:    vnot.v v8, v8, v0.t
1608; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
1609; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
1610; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
1611; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
1612; RV64-NEXT:    vand.vx v16, v8, a1, v0.t
1613; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
1614; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
1615; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
1616; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
1617; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
1618; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
1619; RV64-NEXT:    li a0, 56
1620; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
1621; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
1622; RV64-NEXT:    ret
1623  %v = call <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64> %va, i1 false, <16 x i1> %m, i32 %evl)
1624  ret <16 x i64> %v
1625}
1626
1627define <16 x i64> @vp_cttz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
1628; RV32-LABEL: vp_cttz_v16i64_unmasked:
1629; RV32:       # %bb.0:
1630; RV32-NEXT:    addi sp, sp, -32
1631; RV32-NEXT:    .cfi_def_cfa_offset 32
1632; RV32-NEXT:    li a1, 1
1633; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1634; RV32-NEXT:    vsub.vx v16, v8, a1
1635; RV32-NEXT:    lui a1, 349525
1636; RV32-NEXT:    addi a1, a1, 1365
1637; RV32-NEXT:    sw a1, 24(sp)
1638; RV32-NEXT:    sw a1, 28(sp)
1639; RV32-NEXT:    lui a1, 209715
1640; RV32-NEXT:    addi a1, a1, 819
1641; RV32-NEXT:    sw a1, 16(sp)
1642; RV32-NEXT:    sw a1, 20(sp)
1643; RV32-NEXT:    lui a1, 61681
1644; RV32-NEXT:    addi a1, a1, -241
1645; RV32-NEXT:    sw a1, 8(sp)
1646; RV32-NEXT:    sw a1, 12(sp)
1647; RV32-NEXT:    lui a1, 4112
1648; RV32-NEXT:    vnot.v v8, v8
1649; RV32-NEXT:    addi a1, a1, 257
1650; RV32-NEXT:    sw a1, 0(sp)
1651; RV32-NEXT:    sw a1, 4(sp)
1652; RV32-NEXT:    addi a1, sp, 24
1653; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1654; RV32-NEXT:    vlse64.v v0, (a1), zero
1655; RV32-NEXT:    addi a1, sp, 16
1656; RV32-NEXT:    vlse64.v v24, (a1), zero
1657; RV32-NEXT:    addi a1, sp, 8
1658; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1659; RV32-NEXT:    vand.vv v16, v8, v16
1660; RV32-NEXT:    vsrl.vi v8, v16, 1
1661; RV32-NEXT:    vand.vv v0, v8, v0
1662; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1663; RV32-NEXT:    vlse64.v v8, (a1), zero
1664; RV32-NEXT:    mv a1, sp
1665; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1666; RV32-NEXT:    vsub.vv v16, v16, v0
1667; RV32-NEXT:    vand.vv v0, v16, v24
1668; RV32-NEXT:    vsrl.vi v16, v16, 2
1669; RV32-NEXT:    vand.vv v16, v16, v24
1670; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1671; RV32-NEXT:    vlse64.v v24, (a1), zero
1672; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1673; RV32-NEXT:    vadd.vv v16, v0, v16
1674; RV32-NEXT:    vsrl.vi v0, v16, 4
1675; RV32-NEXT:    vadd.vv v16, v16, v0
1676; RV32-NEXT:    vand.vv v8, v16, v8
1677; RV32-NEXT:    vmul.vv v8, v8, v24
1678; RV32-NEXT:    li a0, 56
1679; RV32-NEXT:    vsrl.vx v8, v8, a0
1680; RV32-NEXT:    addi sp, sp, 32
1681; RV32-NEXT:    .cfi_def_cfa_offset 0
1682; RV32-NEXT:    ret
1683;
1684; RV64-LABEL: vp_cttz_v16i64_unmasked:
1685; RV64:       # %bb.0:
1686; RV64-NEXT:    li a1, 1
1687; RV64-NEXT:    lui a2, 349525
1688; RV64-NEXT:    lui a3, 209715
1689; RV64-NEXT:    lui a4, 61681
1690; RV64-NEXT:    lui a5, 4112
1691; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1692; RV64-NEXT:    vsub.vx v16, v8, a1
1693; RV64-NEXT:    addiw a0, a2, 1365
1694; RV64-NEXT:    addiw a1, a3, 819
1695; RV64-NEXT:    addiw a2, a4, -241
1696; RV64-NEXT:    addiw a3, a5, 257
1697; RV64-NEXT:    slli a4, a0, 32
1698; RV64-NEXT:    add a0, a0, a4
1699; RV64-NEXT:    slli a4, a1, 32
1700; RV64-NEXT:    add a1, a1, a4
1701; RV64-NEXT:    slli a4, a2, 32
1702; RV64-NEXT:    add a2, a2, a4
1703; RV64-NEXT:    slli a4, a3, 32
1704; RV64-NEXT:    add a3, a3, a4
1705; RV64-NEXT:    vnot.v v8, v8
1706; RV64-NEXT:    vand.vv v8, v8, v16
1707; RV64-NEXT:    vsrl.vi v16, v8, 1
1708; RV64-NEXT:    vand.vx v16, v16, a0
1709; RV64-NEXT:    vsub.vv v8, v8, v16
1710; RV64-NEXT:    vand.vx v16, v8, a1
1711; RV64-NEXT:    vsrl.vi v8, v8, 2
1712; RV64-NEXT:    vand.vx v8, v8, a1
1713; RV64-NEXT:    vadd.vv v8, v16, v8
1714; RV64-NEXT:    vsrl.vi v16, v8, 4
1715; RV64-NEXT:    vadd.vv v8, v8, v16
1716; RV64-NEXT:    vand.vx v8, v8, a2
1717; RV64-NEXT:    vmul.vx v8, v8, a3
1718; RV64-NEXT:    li a0, 56
1719; RV64-NEXT:    vsrl.vx v8, v8, a0
1720; RV64-NEXT:    ret
1721  %v = call <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64> %va, i1 false, <16 x i1> splat (i1 true), i32 %evl)
1722  ret <16 x i64> %v
1723}
1724
1725declare <32 x i64> @llvm.vp.cttz.v32i64(<32 x i64>, i1 immarg, <32 x i1>, i32)
1726
1727define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) {
1728; RV32-LABEL: vp_cttz_v32i64:
1729; RV32:       # %bb.0:
1730; RV32-NEXT:    addi sp, sp, -48
1731; RV32-NEXT:    .cfi_def_cfa_offset 48
1732; RV32-NEXT:    csrr a1, vlenb
1733; RV32-NEXT:    li a2, 48
1734; RV32-NEXT:    mul a1, a1, a2
1735; RV32-NEXT:    sub sp, sp, a1
1736; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb
1737; RV32-NEXT:    csrr a1, vlenb
1738; RV32-NEXT:    slli a1, a1, 4
1739; RV32-NEXT:    add a1, sp, a1
1740; RV32-NEXT:    addi a1, a1, 48
1741; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
1742; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
1743; RV32-NEXT:    vslidedown.vi v7, v0, 2
1744; RV32-NEXT:    lui a1, 349525
1745; RV32-NEXT:    lui a2, 209715
1746; RV32-NEXT:    addi a1, a1, 1365
1747; RV32-NEXT:    sw a1, 40(sp)
1748; RV32-NEXT:    sw a1, 44(sp)
1749; RV32-NEXT:    lui a1, 61681
1750; RV32-NEXT:    addi a2, a2, 819
1751; RV32-NEXT:    sw a2, 32(sp)
1752; RV32-NEXT:    sw a2, 36(sp)
1753; RV32-NEXT:    lui a2, 4112
1754; RV32-NEXT:    addi a1, a1, -241
1755; RV32-NEXT:    sw a1, 24(sp)
1756; RV32-NEXT:    sw a1, 28(sp)
1757; RV32-NEXT:    li a1, 16
1758; RV32-NEXT:    addi a2, a2, 257
1759; RV32-NEXT:    sw a2, 16(sp)
1760; RV32-NEXT:    sw a2, 20(sp)
1761; RV32-NEXT:    mv a2, a0
1762; RV32-NEXT:    bltu a0, a1, .LBB34_2
1763; RV32-NEXT:  # %bb.1:
1764; RV32-NEXT:    li a2, 16
1765; RV32-NEXT:  .LBB34_2:
1766; RV32-NEXT:    li a1, 1
1767; RV32-NEXT:    addi a3, sp, 40
1768; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
1769; RV32-NEXT:    vsub.vx v16, v8, a1, v0.t
1770; RV32-NEXT:    vnot.v v8, v8, v0.t
1771; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
1772; RV32-NEXT:    csrr a4, vlenb
1773; RV32-NEXT:    li a5, 24
1774; RV32-NEXT:    mul a4, a4, a5
1775; RV32-NEXT:    add a4, sp, a4
1776; RV32-NEXT:    addi a4, a4, 48
1777; RV32-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
1778; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1779; RV32-NEXT:    vlse64.v v8, (a3), zero
1780; RV32-NEXT:    csrr a3, vlenb
1781; RV32-NEXT:    li a4, 40
1782; RV32-NEXT:    mul a3, a3, a4
1783; RV32-NEXT:    add a3, sp, a3
1784; RV32-NEXT:    addi a3, a3, 48
1785; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
1786; RV32-NEXT:    addi a3, sp, 32
1787; RV32-NEXT:    vlse64.v v8, (a3), zero
1788; RV32-NEXT:    csrr a3, vlenb
1789; RV32-NEXT:    slli a3, a3, 5
1790; RV32-NEXT:    add a3, sp, a3
1791; RV32-NEXT:    addi a3, a3, 48
1792; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
1793; RV32-NEXT:    csrr a3, vlenb
1794; RV32-NEXT:    li a4, 24
1795; RV32-NEXT:    mul a3, a3, a4
1796; RV32-NEXT:    add a3, sp, a3
1797; RV32-NEXT:    addi a3, a3, 48
1798; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
1799; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
1800; RV32-NEXT:    vsrl.vi v24, v16, 1, v0.t
1801; RV32-NEXT:    csrr a3, vlenb
1802; RV32-NEXT:    li a4, 40
1803; RV32-NEXT:    mul a3, a3, a4
1804; RV32-NEXT:    add a3, sp, a3
1805; RV32-NEXT:    addi a3, a3, 48
1806; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
1807; RV32-NEXT:    vand.vv v24, v24, v16, v0.t
1808; RV32-NEXT:    csrr a3, vlenb
1809; RV32-NEXT:    li a4, 24
1810; RV32-NEXT:    mul a3, a3, a4
1811; RV32-NEXT:    add a3, sp, a3
1812; RV32-NEXT:    addi a3, a3, 48
1813; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
1814; RV32-NEXT:    vsub.vv v24, v16, v24, v0.t
1815; RV32-NEXT:    vand.vv v16, v24, v8, v0.t
1816; RV32-NEXT:    csrr a3, vlenb
1817; RV32-NEXT:    li a4, 24
1818; RV32-NEXT:    mul a3, a3, a4
1819; RV32-NEXT:    add a3, sp, a3
1820; RV32-NEXT:    addi a3, a3, 48
1821; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
1822; RV32-NEXT:    vsrl.vi v16, v24, 2, v0.t
1823; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
1824; RV32-NEXT:    csrr a3, vlenb
1825; RV32-NEXT:    li a4, 24
1826; RV32-NEXT:    mul a3, a3, a4
1827; RV32-NEXT:    add a3, sp, a3
1828; RV32-NEXT:    addi a3, a3, 48
1829; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
1830; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
1831; RV32-NEXT:    csrr a3, vlenb
1832; RV32-NEXT:    slli a3, a3, 3
1833; RV32-NEXT:    add a3, sp, a3
1834; RV32-NEXT:    addi a3, a3, 48
1835; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
1836; RV32-NEXT:    addi a3, sp, 24
1837; RV32-NEXT:    addi a4, sp, 16
1838; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1839; RV32-NEXT:    vlse64.v v16, (a3), zero
1840; RV32-NEXT:    addi a3, sp, 48
1841; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
1842; RV32-NEXT:    vlse64.v v8, (a4), zero
1843; RV32-NEXT:    csrr a3, vlenb
1844; RV32-NEXT:    li a4, 24
1845; RV32-NEXT:    mul a3, a3, a4
1846; RV32-NEXT:    add a3, sp, a3
1847; RV32-NEXT:    addi a3, a3, 48
1848; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
1849; RV32-NEXT:    csrr a3, vlenb
1850; RV32-NEXT:    slli a3, a3, 3
1851; RV32-NEXT:    add a3, sp, a3
1852; RV32-NEXT:    addi a3, a3, 48
1853; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
1854; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
1855; RV32-NEXT:    vsrl.vi v24, v8, 4, v0.t
1856; RV32-NEXT:    vadd.vv v8, v8, v24, v0.t
1857; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
1858; RV32-NEXT:    csrr a2, vlenb
1859; RV32-NEXT:    li a3, 24
1860; RV32-NEXT:    mul a2, a2, a3
1861; RV32-NEXT:    add a2, sp, a2
1862; RV32-NEXT:    addi a2, a2, 48
1863; RV32-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
1864; RV32-NEXT:    vmul.vv v8, v16, v8, v0.t
1865; RV32-NEXT:    li a2, 56
1866; RV32-NEXT:    vsrl.vx v8, v8, a2, v0.t
1867; RV32-NEXT:    csrr a3, vlenb
1868; RV32-NEXT:    slli a3, a3, 3
1869; RV32-NEXT:    add a3, sp, a3
1870; RV32-NEXT:    addi a3, a3, 48
1871; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
1872; RV32-NEXT:    addi a3, a0, -16
1873; RV32-NEXT:    sltu a0, a0, a3
1874; RV32-NEXT:    addi a0, a0, -1
1875; RV32-NEXT:    and a0, a0, a3
1876; RV32-NEXT:    vmv1r.v v0, v7
1877; RV32-NEXT:    csrr a3, vlenb
1878; RV32-NEXT:    slli a3, a3, 4
1879; RV32-NEXT:    add a3, sp, a3
1880; RV32-NEXT:    addi a3, a3, 48
1881; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
1882; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1883; RV32-NEXT:    vsub.vx v8, v16, a1, v0.t
1884; RV32-NEXT:    vnot.v v16, v16, v0.t
1885; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
1886; RV32-NEXT:    vsrl.vi v24, v8, 1, v0.t
1887; RV32-NEXT:    csrr a0, vlenb
1888; RV32-NEXT:    li a1, 40
1889; RV32-NEXT:    mul a0, a0, a1
1890; RV32-NEXT:    add a0, sp, a0
1891; RV32-NEXT:    addi a0, a0, 48
1892; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
1893; RV32-NEXT:    vand.vv v16, v24, v16, v0.t
1894; RV32-NEXT:    vsub.vv v24, v8, v16, v0.t
1895; RV32-NEXT:    csrr a0, vlenb
1896; RV32-NEXT:    slli a0, a0, 5
1897; RV32-NEXT:    add a0, sp, a0
1898; RV32-NEXT:    addi a0, a0, 48
1899; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
1900; RV32-NEXT:    vand.vv v16, v24, v8, v0.t
1901; RV32-NEXT:    csrr a0, vlenb
1902; RV32-NEXT:    li a1, 40
1903; RV32-NEXT:    mul a0, a0, a1
1904; RV32-NEXT:    add a0, sp, a0
1905; RV32-NEXT:    addi a0, a0, 48
1906; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
1907; RV32-NEXT:    vsrl.vi v8, v24, 2, v0.t
1908; RV32-NEXT:    csrr a0, vlenb
1909; RV32-NEXT:    slli a0, a0, 5
1910; RV32-NEXT:    add a0, sp, a0
1911; RV32-NEXT:    addi a0, a0, 48
1912; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
1913; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
1914; RV32-NEXT:    csrr a0, vlenb
1915; RV32-NEXT:    li a1, 40
1916; RV32-NEXT:    mul a0, a0, a1
1917; RV32-NEXT:    add a0, sp, a0
1918; RV32-NEXT:    addi a0, a0, 48
1919; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
1920; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
1921; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
1922; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
1923; RV32-NEXT:    addi a0, sp, 48
1924; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
1925; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
1926; RV32-NEXT:    csrr a0, vlenb
1927; RV32-NEXT:    li a1, 24
1928; RV32-NEXT:    mul a0, a0, a1
1929; RV32-NEXT:    add a0, sp, a0
1930; RV32-NEXT:    addi a0, a0, 48
1931; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
1932; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
1933; RV32-NEXT:    vsrl.vx v16, v8, a2, v0.t
1934; RV32-NEXT:    csrr a0, vlenb
1935; RV32-NEXT:    slli a0, a0, 3
1936; RV32-NEXT:    add a0, sp, a0
1937; RV32-NEXT:    addi a0, a0, 48
1938; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
1939; RV32-NEXT:    csrr a0, vlenb
1940; RV32-NEXT:    li a1, 48
1941; RV32-NEXT:    mul a0, a0, a1
1942; RV32-NEXT:    add sp, sp, a0
1943; RV32-NEXT:    .cfi_def_cfa sp, 48
1944; RV32-NEXT:    addi sp, sp, 48
1945; RV32-NEXT:    .cfi_def_cfa_offset 0
1946; RV32-NEXT:    ret
1947;
1948; RV64-LABEL: vp_cttz_v32i64:
1949; RV64:       # %bb.0:
1950; RV64-NEXT:    addi sp, sp, -16
1951; RV64-NEXT:    .cfi_def_cfa_offset 16
1952; RV64-NEXT:    csrr a1, vlenb
1953; RV64-NEXT:    slli a1, a1, 4
1954; RV64-NEXT:    sub sp, sp, a1
1955; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
1956; RV64-NEXT:    csrr a1, vlenb
1957; RV64-NEXT:    slli a1, a1, 3
1958; RV64-NEXT:    add a1, sp, a1
1959; RV64-NEXT:    addi a1, a1, 16
1960; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
1961; RV64-NEXT:    li a1, 16
1962; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
1963; RV64-NEXT:    vslidedown.vi v24, v0, 2
1964; RV64-NEXT:    mv a4, a0
1965; RV64-NEXT:    bltu a0, a1, .LBB34_2
1966; RV64-NEXT:  # %bb.1:
1967; RV64-NEXT:    li a4, 16
1968; RV64-NEXT:  .LBB34_2:
1969; RV64-NEXT:    li a1, 1
1970; RV64-NEXT:    lui a2, 349525
1971; RV64-NEXT:    lui a3, 209715
1972; RV64-NEXT:    lui a5, 61681
1973; RV64-NEXT:    lui a6, 4112
1974; RV64-NEXT:    addiw a2, a2, 1365
1975; RV64-NEXT:    addiw a3, a3, 819
1976; RV64-NEXT:    addiw a7, a5, -241
1977; RV64-NEXT:    addiw t0, a6, 257
1978; RV64-NEXT:    slli a6, a2, 32
1979; RV64-NEXT:    add a6, a2, a6
1980; RV64-NEXT:    slli a5, a3, 32
1981; RV64-NEXT:    add a5, a3, a5
1982; RV64-NEXT:    slli a2, a7, 32
1983; RV64-NEXT:    add a2, a7, a2
1984; RV64-NEXT:    slli a3, t0, 32
1985; RV64-NEXT:    add a3, t0, a3
1986; RV64-NEXT:    addi a7, a0, -16
1987; RV64-NEXT:    sltu a0, a0, a7
1988; RV64-NEXT:    addi a0, a0, -1
1989; RV64-NEXT:    and a7, a0, a7
1990; RV64-NEXT:    li a0, 56
1991; RV64-NEXT:    vsetvli zero, a4, e64, m8, ta, ma
1992; RV64-NEXT:    vsub.vx v16, v8, a1, v0.t
1993; RV64-NEXT:    vnot.v v8, v8, v0.t
1994; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
1995; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
1996; RV64-NEXT:    vand.vx v16, v16, a6, v0.t
1997; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
1998; RV64-NEXT:    vand.vx v16, v8, a5, v0.t
1999; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
2000; RV64-NEXT:    vand.vx v8, v8, a5, v0.t
2001; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
2002; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
2003; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
2004; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
2005; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
2006; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
2007; RV64-NEXT:    addi a4, sp, 16
2008; RV64-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
2009; RV64-NEXT:    vmv1r.v v0, v24
2010; RV64-NEXT:    csrr a4, vlenb
2011; RV64-NEXT:    slli a4, a4, 3
2012; RV64-NEXT:    add a4, sp, a4
2013; RV64-NEXT:    addi a4, a4, 16
2014; RV64-NEXT:    vl8r.v v8, (a4) # Unknown-size Folded Reload
2015; RV64-NEXT:    vsetvli zero, a7, e64, m8, ta, ma
2016; RV64-NEXT:    vsub.vx v16, v8, a1, v0.t
2017; RV64-NEXT:    vnot.v v8, v8, v0.t
2018; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
2019; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
2020; RV64-NEXT:    vand.vx v16, v16, a6, v0.t
2021; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
2022; RV64-NEXT:    vand.vx v16, v8, a5, v0.t
2023; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
2024; RV64-NEXT:    vand.vx v8, v8, a5, v0.t
2025; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
2026; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
2027; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
2028; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
2029; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
2030; RV64-NEXT:    vsrl.vx v16, v8, a0, v0.t
2031; RV64-NEXT:    addi a0, sp, 16
2032; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
2033; RV64-NEXT:    csrr a0, vlenb
2034; RV64-NEXT:    slli a0, a0, 4
2035; RV64-NEXT:    add sp, sp, a0
2036; RV64-NEXT:    .cfi_def_cfa sp, 16
2037; RV64-NEXT:    addi sp, sp, 16
2038; RV64-NEXT:    .cfi_def_cfa_offset 0
2039; RV64-NEXT:    ret
2040  %v = call <32 x i64> @llvm.vp.cttz.v32i64(<32 x i64> %va, i1 false, <32 x i1> %m, i32 %evl)
2041  ret <32 x i64> %v
2042}
2043
2044define <32 x i64> @vp_cttz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
2045; RV32-LABEL: vp_cttz_v32i64_unmasked:
2046; RV32:       # %bb.0:
2047; RV32-NEXT:    addi sp, sp, -48
2048; RV32-NEXT:    .cfi_def_cfa_offset 48
2049; RV32-NEXT:    csrr a1, vlenb
2050; RV32-NEXT:    slli a1, a1, 3
2051; RV32-NEXT:    sub sp, sp, a1
2052; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb
2053; RV32-NEXT:    lui a1, 349525
2054; RV32-NEXT:    lui a2, 209715
2055; RV32-NEXT:    addi a1, a1, 1365
2056; RV32-NEXT:    sw a1, 40(sp)
2057; RV32-NEXT:    sw a1, 44(sp)
2058; RV32-NEXT:    lui a1, 61681
2059; RV32-NEXT:    addi a2, a2, 819
2060; RV32-NEXT:    sw a2, 32(sp)
2061; RV32-NEXT:    sw a2, 36(sp)
2062; RV32-NEXT:    lui a2, 4112
2063; RV32-NEXT:    addi a1, a1, -241
2064; RV32-NEXT:    sw a1, 24(sp)
2065; RV32-NEXT:    sw a1, 28(sp)
2066; RV32-NEXT:    li a3, 16
2067; RV32-NEXT:    addi a1, a2, 257
2068; RV32-NEXT:    sw a1, 16(sp)
2069; RV32-NEXT:    sw a1, 20(sp)
2070; RV32-NEXT:    mv a1, a0
2071; RV32-NEXT:    bltu a0, a3, .LBB35_2
2072; RV32-NEXT:  # %bb.1:
2073; RV32-NEXT:    li a1, 16
2074; RV32-NEXT:  .LBB35_2:
2075; RV32-NEXT:    li a2, 1
2076; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2077; RV32-NEXT:    vnot.v v0, v8
2078; RV32-NEXT:    addi a3, sp, 40
2079; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2080; RV32-NEXT:    vlse64.v v24, (a3), zero
2081; RV32-NEXT:    addi a3, a0, -16
2082; RV32-NEXT:    sltu a0, a0, a3
2083; RV32-NEXT:    addi a0, a0, -1
2084; RV32-NEXT:    and a0, a0, a3
2085; RV32-NEXT:    addi a3, sp, 32
2086; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2087; RV32-NEXT:    vsub.vx v8, v8, a2
2088; RV32-NEXT:    vand.vv v8, v0, v8
2089; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2090; RV32-NEXT:    vsub.vx v0, v16, a2
2091; RV32-NEXT:    vnot.v v16, v16
2092; RV32-NEXT:    vand.vv v16, v16, v0
2093; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2094; RV32-NEXT:    vsrl.vi v0, v8, 1
2095; RV32-NEXT:    vand.vv v0, v0, v24
2096; RV32-NEXT:    vsub.vv v0, v8, v0
2097; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2098; RV32-NEXT:    vsrl.vi v8, v16, 1
2099; RV32-NEXT:    vand.vv v24, v8, v24
2100; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2101; RV32-NEXT:    vlse64.v v8, (a3), zero
2102; RV32-NEXT:    addi a2, sp, 24
2103; RV32-NEXT:    addi a3, sp, 16
2104; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2105; RV32-NEXT:    vsub.vv v16, v16, v24
2106; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2107; RV32-NEXT:    vand.vv v24, v0, v8
2108; RV32-NEXT:    vsrl.vi v0, v0, 2
2109; RV32-NEXT:    vand.vv v0, v0, v8
2110; RV32-NEXT:    vadd.vv v24, v24, v0
2111; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2112; RV32-NEXT:    vand.vv v0, v16, v8
2113; RV32-NEXT:    vsrl.vi v16, v16, 2
2114; RV32-NEXT:    vand.vv v8, v16, v8
2115; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2116; RV32-NEXT:    vsrl.vi v16, v24, 4
2117; RV32-NEXT:    vadd.vv v16, v24, v16
2118; RV32-NEXT:    addi a4, sp, 48
2119; RV32-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
2120; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2121; RV32-NEXT:    vlse64.v v24, (a2), zero
2122; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2123; RV32-NEXT:    vadd.vv v8, v0, v8
2124; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2125; RV32-NEXT:    vlse64.v v0, (a3), zero
2126; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2127; RV32-NEXT:    vsrl.vi v16, v8, 4
2128; RV32-NEXT:    vadd.vv v8, v8, v16
2129; RV32-NEXT:    addi a2, sp, 48
2130; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
2131; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2132; RV32-NEXT:    vand.vv v16, v16, v24
2133; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2134; RV32-NEXT:    vand.vv v8, v8, v24
2135; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2136; RV32-NEXT:    vmul.vv v16, v16, v0
2137; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2138; RV32-NEXT:    vmul.vv v24, v8, v0
2139; RV32-NEXT:    li a2, 56
2140; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2141; RV32-NEXT:    vsrl.vx v8, v16, a2
2142; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2143; RV32-NEXT:    vsrl.vx v16, v24, a2
2144; RV32-NEXT:    csrr a0, vlenb
2145; RV32-NEXT:    slli a0, a0, 3
2146; RV32-NEXT:    add sp, sp, a0
2147; RV32-NEXT:    .cfi_def_cfa sp, 48
2148; RV32-NEXT:    addi sp, sp, 48
2149; RV32-NEXT:    .cfi_def_cfa_offset 0
2150; RV32-NEXT:    ret
2151;
2152; RV64-LABEL: vp_cttz_v32i64_unmasked:
2153; RV64:       # %bb.0:
2154; RV64-NEXT:    li a2, 16
2155; RV64-NEXT:    mv a1, a0
2156; RV64-NEXT:    bltu a0, a2, .LBB35_2
2157; RV64-NEXT:  # %bb.1:
2158; RV64-NEXT:    li a1, 16
2159; RV64-NEXT:  .LBB35_2:
2160; RV64-NEXT:    li a2, 1
2161; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2162; RV64-NEXT:    vnot.v v24, v8
2163; RV64-NEXT:    lui a3, 349525
2164; RV64-NEXT:    lui a4, 209715
2165; RV64-NEXT:    lui a5, 61681
2166; RV64-NEXT:    lui a6, 4112
2167; RV64-NEXT:    addiw a3, a3, 1365
2168; RV64-NEXT:    addiw a4, a4, 819
2169; RV64-NEXT:    addiw a5, a5, -241
2170; RV64-NEXT:    addiw a6, a6, 257
2171; RV64-NEXT:    slli a7, a3, 32
2172; RV64-NEXT:    add a3, a3, a7
2173; RV64-NEXT:    slli a7, a4, 32
2174; RV64-NEXT:    add a4, a4, a7
2175; RV64-NEXT:    slli a7, a5, 32
2176; RV64-NEXT:    add a5, a5, a7
2177; RV64-NEXT:    slli a7, a6, 32
2178; RV64-NEXT:    add a6, a6, a7
2179; RV64-NEXT:    addi a7, a0, -16
2180; RV64-NEXT:    sltu a0, a0, a7
2181; RV64-NEXT:    addi a0, a0, -1
2182; RV64-NEXT:    and a0, a0, a7
2183; RV64-NEXT:    li a7, 56
2184; RV64-NEXT:    vsub.vx v8, v8, a2
2185; RV64-NEXT:    vand.vv v8, v24, v8
2186; RV64-NEXT:    vsrl.vi v24, v8, 1
2187; RV64-NEXT:    vand.vx v24, v24, a3
2188; RV64-NEXT:    vsub.vv v8, v8, v24
2189; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2190; RV64-NEXT:    vsub.vx v24, v16, a2
2191; RV64-NEXT:    vnot.v v16, v16
2192; RV64-NEXT:    vand.vv v16, v16, v24
2193; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2194; RV64-NEXT:    vand.vx v24, v8, a4
2195; RV64-NEXT:    vsrl.vi v8, v8, 2
2196; RV64-NEXT:    vand.vx v8, v8, a4
2197; RV64-NEXT:    vadd.vv v8, v24, v8
2198; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2199; RV64-NEXT:    vsrl.vi v24, v16, 1
2200; RV64-NEXT:    vand.vx v24, v24, a3
2201; RV64-NEXT:    vsub.vv v16, v16, v24
2202; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2203; RV64-NEXT:    vsrl.vi v24, v8, 4
2204; RV64-NEXT:    vadd.vv v8, v8, v24
2205; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2206; RV64-NEXT:    vand.vx v24, v16, a4
2207; RV64-NEXT:    vsrl.vi v16, v16, 2
2208; RV64-NEXT:    vand.vx v16, v16, a4
2209; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2210; RV64-NEXT:    vand.vx v8, v8, a5
2211; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2212; RV64-NEXT:    vadd.vv v16, v24, v16
2213; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2214; RV64-NEXT:    vmul.vx v8, v8, a6
2215; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2216; RV64-NEXT:    vsrl.vi v24, v16, 4
2217; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2218; RV64-NEXT:    vsrl.vx v8, v8, a7
2219; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2220; RV64-NEXT:    vadd.vv v16, v16, v24
2221; RV64-NEXT:    vand.vx v16, v16, a5
2222; RV64-NEXT:    vmul.vx v16, v16, a6
2223; RV64-NEXT:    vsrl.vx v16, v16, a7
2224; RV64-NEXT:    ret
2225  %v = call <32 x i64> @llvm.vp.cttz.v32i64(<32 x i64> %va, i1 false, <32 x i1> splat (i1 true), i32 %evl)
2226  ret <32 x i64> %v
2227}
2228
2229define <2 x i8> @vp_cttz_zero_undef_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) {
2230; CHECK-LABEL: vp_cttz_zero_undef_v2i8:
2231; CHECK:       # %bb.0:
2232; CHECK-NEXT:    li a1, 1
2233; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
2234; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
2235; CHECK-NEXT:    li a0, 85
2236; CHECK-NEXT:    vnot.v v8, v8, v0.t
2237; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
2238; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
2239; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
2240; CHECK-NEXT:    li a0, 51
2241; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
2242; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
2243; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
2244; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
2245; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
2246; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
2247; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
2248; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
2249; CHECK-NEXT:    ret
2250  %v = call <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8> %va, i1 true, <2 x i1> %m, i32 %evl)
2251  ret <2 x i8> %v
2252}
2253
2254define <2 x i8> @vp_cttz_zero_undef_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) {
2255; CHECK-LABEL: vp_cttz_zero_undef_v2i8_unmasked:
2256; CHECK:       # %bb.0:
2257; CHECK-NEXT:    li a1, 1
2258; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
2259; CHECK-NEXT:    vnot.v v9, v8
2260; CHECK-NEXT:    vsub.vx v8, v8, a1
2261; CHECK-NEXT:    li a0, 85
2262; CHECK-NEXT:    vand.vv v8, v9, v8
2263; CHECK-NEXT:    vsrl.vi v9, v8, 1
2264; CHECK-NEXT:    vand.vx v9, v9, a0
2265; CHECK-NEXT:    li a0, 51
2266; CHECK-NEXT:    vsub.vv v8, v8, v9
2267; CHECK-NEXT:    vand.vx v9, v8, a0
2268; CHECK-NEXT:    vsrl.vi v8, v8, 2
2269; CHECK-NEXT:    vand.vx v8, v8, a0
2270; CHECK-NEXT:    vadd.vv v8, v9, v8
2271; CHECK-NEXT:    vsrl.vi v9, v8, 4
2272; CHECK-NEXT:    vadd.vv v8, v8, v9
2273; CHECK-NEXT:    vand.vi v8, v8, 15
2274; CHECK-NEXT:    ret
2275  %v = call <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8> %va, i1 true, <2 x i1> splat (i1 true), i32 %evl)
2276  ret <2 x i8> %v
2277}
2278
2279define <4 x i8> @vp_cttz_zero_undef_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) {
2280; CHECK-LABEL: vp_cttz_zero_undef_v4i8:
2281; CHECK:       # %bb.0:
2282; CHECK-NEXT:    li a1, 1
2283; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
2284; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
2285; CHECK-NEXT:    li a0, 85
2286; CHECK-NEXT:    vnot.v v8, v8, v0.t
2287; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
2288; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
2289; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
2290; CHECK-NEXT:    li a0, 51
2291; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
2292; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
2293; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
2294; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
2295; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
2296; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
2297; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
2298; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
2299; CHECK-NEXT:    ret
2300  %v = call <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8> %va, i1 true, <4 x i1> %m, i32 %evl)
2301  ret <4 x i8> %v
2302}
2303
2304define <4 x i8> @vp_cttz_zero_undef_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) {
2305; CHECK-LABEL: vp_cttz_zero_undef_v4i8_unmasked:
2306; CHECK:       # %bb.0:
2307; CHECK-NEXT:    li a1, 1
2308; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
2309; CHECK-NEXT:    vnot.v v9, v8
2310; CHECK-NEXT:    vsub.vx v8, v8, a1
2311; CHECK-NEXT:    li a0, 85
2312; CHECK-NEXT:    vand.vv v8, v9, v8
2313; CHECK-NEXT:    vsrl.vi v9, v8, 1
2314; CHECK-NEXT:    vand.vx v9, v9, a0
2315; CHECK-NEXT:    li a0, 51
2316; CHECK-NEXT:    vsub.vv v8, v8, v9
2317; CHECK-NEXT:    vand.vx v9, v8, a0
2318; CHECK-NEXT:    vsrl.vi v8, v8, 2
2319; CHECK-NEXT:    vand.vx v8, v8, a0
2320; CHECK-NEXT:    vadd.vv v8, v9, v8
2321; CHECK-NEXT:    vsrl.vi v9, v8, 4
2322; CHECK-NEXT:    vadd.vv v8, v8, v9
2323; CHECK-NEXT:    vand.vi v8, v8, 15
2324; CHECK-NEXT:    ret
2325  %v = call <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8> %va, i1 true, <4 x i1> splat (i1 true), i32 %evl)
2326  ret <4 x i8> %v
2327}
2328
2329define <8 x i8> @vp_cttz_zero_undef_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) {
2330; CHECK-LABEL: vp_cttz_zero_undef_v8i8:
2331; CHECK:       # %bb.0:
2332; CHECK-NEXT:    li a1, 1
2333; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
2334; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
2335; CHECK-NEXT:    li a0, 85
2336; CHECK-NEXT:    vnot.v v8, v8, v0.t
2337; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
2338; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
2339; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
2340; CHECK-NEXT:    li a0, 51
2341; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
2342; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
2343; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
2344; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
2345; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
2346; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
2347; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
2348; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
2349; CHECK-NEXT:    ret
2350  %v = call <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8> %va, i1 true, <8 x i1> %m, i32 %evl)
2351  ret <8 x i8> %v
2352}
2353
2354define <8 x i8> @vp_cttz_zero_undef_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) {
2355; CHECK-LABEL: vp_cttz_zero_undef_v8i8_unmasked:
2356; CHECK:       # %bb.0:
2357; CHECK-NEXT:    li a1, 1
2358; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
2359; CHECK-NEXT:    vnot.v v9, v8
2360; CHECK-NEXT:    vsub.vx v8, v8, a1
2361; CHECK-NEXT:    li a0, 85
2362; CHECK-NEXT:    vand.vv v8, v9, v8
2363; CHECK-NEXT:    vsrl.vi v9, v8, 1
2364; CHECK-NEXT:    vand.vx v9, v9, a0
2365; CHECK-NEXT:    li a0, 51
2366; CHECK-NEXT:    vsub.vv v8, v8, v9
2367; CHECK-NEXT:    vand.vx v9, v8, a0
2368; CHECK-NEXT:    vsrl.vi v8, v8, 2
2369; CHECK-NEXT:    vand.vx v8, v8, a0
2370; CHECK-NEXT:    vadd.vv v8, v9, v8
2371; CHECK-NEXT:    vsrl.vi v9, v8, 4
2372; CHECK-NEXT:    vadd.vv v8, v8, v9
2373; CHECK-NEXT:    vand.vi v8, v8, 15
2374; CHECK-NEXT:    ret
2375  %v = call <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8> %va, i1 true, <8 x i1> splat (i1 true), i32 %evl)
2376  ret <8 x i8> %v
2377}
2378
2379define <16 x i8> @vp_cttz_zero_undef_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) {
2380; CHECK-LABEL: vp_cttz_zero_undef_v16i8:
2381; CHECK:       # %bb.0:
2382; CHECK-NEXT:    li a1, 1
2383; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
2384; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
2385; CHECK-NEXT:    li a0, 85
2386; CHECK-NEXT:    vnot.v v8, v8, v0.t
2387; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
2388; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
2389; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
2390; CHECK-NEXT:    li a0, 51
2391; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
2392; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
2393; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
2394; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
2395; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
2396; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
2397; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
2398; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
2399; CHECK-NEXT:    ret
2400  %v = call <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8> %va, i1 true, <16 x i1> %m, i32 %evl)
2401  ret <16 x i8> %v
2402}
2403
2404define <16 x i8> @vp_cttz_zero_undef_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) {
2405; CHECK-LABEL: vp_cttz_zero_undef_v16i8_unmasked:
2406; CHECK:       # %bb.0:
2407; CHECK-NEXT:    li a1, 1
2408; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
2409; CHECK-NEXT:    vnot.v v9, v8
2410; CHECK-NEXT:    vsub.vx v8, v8, a1
2411; CHECK-NEXT:    li a0, 85
2412; CHECK-NEXT:    vand.vv v8, v9, v8
2413; CHECK-NEXT:    vsrl.vi v9, v8, 1
2414; CHECK-NEXT:    vand.vx v9, v9, a0
2415; CHECK-NEXT:    li a0, 51
2416; CHECK-NEXT:    vsub.vv v8, v8, v9
2417; CHECK-NEXT:    vand.vx v9, v8, a0
2418; CHECK-NEXT:    vsrl.vi v8, v8, 2
2419; CHECK-NEXT:    vand.vx v8, v8, a0
2420; CHECK-NEXT:    vadd.vv v8, v9, v8
2421; CHECK-NEXT:    vsrl.vi v9, v8, 4
2422; CHECK-NEXT:    vadd.vv v8, v8, v9
2423; CHECK-NEXT:    vand.vi v8, v8, 15
2424; CHECK-NEXT:    ret
2425  %v = call <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8> %va, i1 true, <16 x i1> splat (i1 true), i32 %evl)
2426  ret <16 x i8> %v
2427}
2428
2429define <2 x i16> @vp_cttz_zero_undef_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) {
2430; CHECK-LABEL: vp_cttz_zero_undef_v2i16:
2431; CHECK:       # %bb.0:
2432; CHECK-NEXT:    li a1, 1
2433; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
2434; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
2435; CHECK-NEXT:    lui a0, 5
2436; CHECK-NEXT:    vnot.v v8, v8, v0.t
2437; CHECK-NEXT:    addi a0, a0, 1365
2438; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
2439; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
2440; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
2441; CHECK-NEXT:    lui a0, 3
2442; CHECK-NEXT:    addi a0, a0, 819
2443; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
2444; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
2445; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
2446; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
2447; CHECK-NEXT:    lui a0, 1
2448; CHECK-NEXT:    addi a0, a0, -241
2449; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
2450; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
2451; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
2452; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
2453; CHECK-NEXT:    li a0, 257
2454; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
2455; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
2456; CHECK-NEXT:    ret
2457  %v = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> %va, i1 true, <2 x i1> %m, i32 %evl)
2458  ret <2 x i16> %v
2459}
2460
2461define <2 x i16> @vp_cttz_zero_undef_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
2462; CHECK-LABEL: vp_cttz_zero_undef_v2i16_unmasked:
2463; CHECK:       # %bb.0:
2464; CHECK-NEXT:    li a1, 1
2465; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
2466; CHECK-NEXT:    vnot.v v9, v8
2467; CHECK-NEXT:    vsub.vx v8, v8, a1
2468; CHECK-NEXT:    lui a0, 5
2469; CHECK-NEXT:    addi a0, a0, 1365
2470; CHECK-NEXT:    vand.vv v8, v9, v8
2471; CHECK-NEXT:    vsrl.vi v9, v8, 1
2472; CHECK-NEXT:    vand.vx v9, v9, a0
2473; CHECK-NEXT:    lui a0, 3
2474; CHECK-NEXT:    addi a0, a0, 819
2475; CHECK-NEXT:    vsub.vv v8, v8, v9
2476; CHECK-NEXT:    vand.vx v9, v8, a0
2477; CHECK-NEXT:    vsrl.vi v8, v8, 2
2478; CHECK-NEXT:    vand.vx v8, v8, a0
2479; CHECK-NEXT:    lui a0, 1
2480; CHECK-NEXT:    addi a0, a0, -241
2481; CHECK-NEXT:    vadd.vv v8, v9, v8
2482; CHECK-NEXT:    vsrl.vi v9, v8, 4
2483; CHECK-NEXT:    vadd.vv v8, v8, v9
2484; CHECK-NEXT:    vand.vx v8, v8, a0
2485; CHECK-NEXT:    li a0, 257
2486; CHECK-NEXT:    vmul.vx v8, v8, a0
2487; CHECK-NEXT:    vsrl.vi v8, v8, 8
2488; CHECK-NEXT:    ret
2489  %v = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> %va, i1 true, <2 x i1> splat (i1 true), i32 %evl)
2490  ret <2 x i16> %v
2491}
2492
2493define <4 x i16> @vp_cttz_zero_undef_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) {
2494; CHECK-LABEL: vp_cttz_zero_undef_v4i16:
2495; CHECK:       # %bb.0:
2496; CHECK-NEXT:    li a1, 1
2497; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
2498; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
2499; CHECK-NEXT:    lui a0, 5
2500; CHECK-NEXT:    vnot.v v8, v8, v0.t
2501; CHECK-NEXT:    addi a0, a0, 1365
2502; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
2503; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
2504; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
2505; CHECK-NEXT:    lui a0, 3
2506; CHECK-NEXT:    addi a0, a0, 819
2507; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
2508; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
2509; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
2510; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
2511; CHECK-NEXT:    lui a0, 1
2512; CHECK-NEXT:    addi a0, a0, -241
2513; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
2514; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
2515; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
2516; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
2517; CHECK-NEXT:    li a0, 257
2518; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
2519; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
2520; CHECK-NEXT:    ret
2521  %v = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> %va, i1 true, <4 x i1> %m, i32 %evl)
2522  ret <4 x i16> %v
2523}
2524
2525define <4 x i16> @vp_cttz_zero_undef_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
2526; CHECK-LABEL: vp_cttz_zero_undef_v4i16_unmasked:
2527; CHECK:       # %bb.0:
2528; CHECK-NEXT:    li a1, 1
2529; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
2530; CHECK-NEXT:    vnot.v v9, v8
2531; CHECK-NEXT:    vsub.vx v8, v8, a1
2532; CHECK-NEXT:    lui a0, 5
2533; CHECK-NEXT:    addi a0, a0, 1365
2534; CHECK-NEXT:    vand.vv v8, v9, v8
2535; CHECK-NEXT:    vsrl.vi v9, v8, 1
2536; CHECK-NEXT:    vand.vx v9, v9, a0
2537; CHECK-NEXT:    lui a0, 3
2538; CHECK-NEXT:    addi a0, a0, 819
2539; CHECK-NEXT:    vsub.vv v8, v8, v9
2540; CHECK-NEXT:    vand.vx v9, v8, a0
2541; CHECK-NEXT:    vsrl.vi v8, v8, 2
2542; CHECK-NEXT:    vand.vx v8, v8, a0
2543; CHECK-NEXT:    lui a0, 1
2544; CHECK-NEXT:    addi a0, a0, -241
2545; CHECK-NEXT:    vadd.vv v8, v9, v8
2546; CHECK-NEXT:    vsrl.vi v9, v8, 4
2547; CHECK-NEXT:    vadd.vv v8, v8, v9
2548; CHECK-NEXT:    vand.vx v8, v8, a0
2549; CHECK-NEXT:    li a0, 257
2550; CHECK-NEXT:    vmul.vx v8, v8, a0
2551; CHECK-NEXT:    vsrl.vi v8, v8, 8
2552; CHECK-NEXT:    ret
2553  %v = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> %va, i1 true, <4 x i1> splat (i1 true), i32 %evl)
2554  ret <4 x i16> %v
2555}
2556
2557define <8 x i16> @vp_cttz_zero_undef_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) {
2558; CHECK-LABEL: vp_cttz_zero_undef_v8i16:
2559; CHECK:       # %bb.0:
2560; CHECK-NEXT:    li a1, 1
2561; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
2562; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
2563; CHECK-NEXT:    lui a0, 5
2564; CHECK-NEXT:    vnot.v v8, v8, v0.t
2565; CHECK-NEXT:    addi a0, a0, 1365
2566; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
2567; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
2568; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
2569; CHECK-NEXT:    lui a0, 3
2570; CHECK-NEXT:    addi a0, a0, 819
2571; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
2572; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
2573; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
2574; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
2575; CHECK-NEXT:    lui a0, 1
2576; CHECK-NEXT:    addi a0, a0, -241
2577; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
2578; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
2579; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
2580; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
2581; CHECK-NEXT:    li a0, 257
2582; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
2583; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
2584; CHECK-NEXT:    ret
2585  %v = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> %va, i1 true, <8 x i1> %m, i32 %evl)
2586  ret <8 x i16> %v
2587}
2588
2589define <8 x i16> @vp_cttz_zero_undef_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
2590; CHECK-LABEL: vp_cttz_zero_undef_v8i16_unmasked:
2591; CHECK:       # %bb.0:
2592; CHECK-NEXT:    li a1, 1
2593; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
2594; CHECK-NEXT:    vnot.v v9, v8
2595; CHECK-NEXT:    vsub.vx v8, v8, a1
2596; CHECK-NEXT:    lui a0, 5
2597; CHECK-NEXT:    addi a0, a0, 1365
2598; CHECK-NEXT:    vand.vv v8, v9, v8
2599; CHECK-NEXT:    vsrl.vi v9, v8, 1
2600; CHECK-NEXT:    vand.vx v9, v9, a0
2601; CHECK-NEXT:    lui a0, 3
2602; CHECK-NEXT:    addi a0, a0, 819
2603; CHECK-NEXT:    vsub.vv v8, v8, v9
2604; CHECK-NEXT:    vand.vx v9, v8, a0
2605; CHECK-NEXT:    vsrl.vi v8, v8, 2
2606; CHECK-NEXT:    vand.vx v8, v8, a0
2607; CHECK-NEXT:    lui a0, 1
2608; CHECK-NEXT:    addi a0, a0, -241
2609; CHECK-NEXT:    vadd.vv v8, v9, v8
2610; CHECK-NEXT:    vsrl.vi v9, v8, 4
2611; CHECK-NEXT:    vadd.vv v8, v8, v9
2612; CHECK-NEXT:    vand.vx v8, v8, a0
2613; CHECK-NEXT:    li a0, 257
2614; CHECK-NEXT:    vmul.vx v8, v8, a0
2615; CHECK-NEXT:    vsrl.vi v8, v8, 8
2616; CHECK-NEXT:    ret
2617  %v = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> %va, i1 true, <8 x i1> splat (i1 true), i32 %evl)
2618  ret <8 x i16> %v
2619}
2620
2621define <16 x i16> @vp_cttz_zero_undef_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) {
2622; CHECK-LABEL: vp_cttz_zero_undef_v16i16:
2623; CHECK:       # %bb.0:
2624; CHECK-NEXT:    li a1, 1
2625; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
2626; CHECK-NEXT:    vsub.vx v10, v8, a1, v0.t
2627; CHECK-NEXT:    lui a0, 5
2628; CHECK-NEXT:    vnot.v v8, v8, v0.t
2629; CHECK-NEXT:    addi a0, a0, 1365
2630; CHECK-NEXT:    vand.vv v8, v8, v10, v0.t
2631; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
2632; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
2633; CHECK-NEXT:    lui a0, 3
2634; CHECK-NEXT:    addi a0, a0, 819
2635; CHECK-NEXT:    vsub.vv v8, v8, v10, v0.t
2636; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
2637; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
2638; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
2639; CHECK-NEXT:    lui a0, 1
2640; CHECK-NEXT:    addi a0, a0, -241
2641; CHECK-NEXT:    vadd.vv v8, v10, v8, v0.t
2642; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
2643; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
2644; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
2645; CHECK-NEXT:    li a0, 257
2646; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
2647; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
2648; CHECK-NEXT:    ret
2649  %v = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> %va, i1 true, <16 x i1> %m, i32 %evl)
2650  ret <16 x i16> %v
2651}
2652
2653define <16 x i16> @vp_cttz_zero_undef_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
2654; CHECK-LABEL: vp_cttz_zero_undef_v16i16_unmasked:
2655; CHECK:       # %bb.0:
2656; CHECK-NEXT:    li a1, 1
2657; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
2658; CHECK-NEXT:    vnot.v v10, v8
2659; CHECK-NEXT:    vsub.vx v8, v8, a1
2660; CHECK-NEXT:    lui a0, 5
2661; CHECK-NEXT:    addi a0, a0, 1365
2662; CHECK-NEXT:    vand.vv v8, v10, v8
2663; CHECK-NEXT:    vsrl.vi v10, v8, 1
2664; CHECK-NEXT:    vand.vx v10, v10, a0
2665; CHECK-NEXT:    lui a0, 3
2666; CHECK-NEXT:    addi a0, a0, 819
2667; CHECK-NEXT:    vsub.vv v8, v8, v10
2668; CHECK-NEXT:    vand.vx v10, v8, a0
2669; CHECK-NEXT:    vsrl.vi v8, v8, 2
2670; CHECK-NEXT:    vand.vx v8, v8, a0
2671; CHECK-NEXT:    lui a0, 1
2672; CHECK-NEXT:    addi a0, a0, -241
2673; CHECK-NEXT:    vadd.vv v8, v10, v8
2674; CHECK-NEXT:    vsrl.vi v10, v8, 4
2675; CHECK-NEXT:    vadd.vv v8, v8, v10
2676; CHECK-NEXT:    vand.vx v8, v8, a0
2677; CHECK-NEXT:    li a0, 257
2678; CHECK-NEXT:    vmul.vx v8, v8, a0
2679; CHECK-NEXT:    vsrl.vi v8, v8, 8
2680; CHECK-NEXT:    ret
2681  %v = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> %va, i1 true, <16 x i1> splat (i1 true), i32 %evl)
2682  ret <16 x i16> %v
2683}
2684
2685define <2 x i32> @vp_cttz_zero_undef_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
2686; CHECK-LABEL: vp_cttz_zero_undef_v2i32:
2687; CHECK:       # %bb.0:
2688; CHECK-NEXT:    li a1, 1
2689; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
2690; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
2691; CHECK-NEXT:    lui a0, 349525
2692; CHECK-NEXT:    vnot.v v8, v8, v0.t
2693; CHECK-NEXT:    addi a0, a0, 1365
2694; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
2695; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
2696; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
2697; CHECK-NEXT:    lui a0, 209715
2698; CHECK-NEXT:    addi a0, a0, 819
2699; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
2700; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
2701; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
2702; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
2703; CHECK-NEXT:    lui a0, 61681
2704; CHECK-NEXT:    addi a0, a0, -241
2705; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
2706; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
2707; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
2708; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
2709; CHECK-NEXT:    lui a0, 4112
2710; CHECK-NEXT:    addi a0, a0, 257
2711; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
2712; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
2713; CHECK-NEXT:    ret
2714  %v = call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> %va, i1 true, <2 x i1> %m, i32 %evl)
2715  ret <2 x i32> %v
2716}
2717
2718define <2 x i32> @vp_cttz_zero_undef_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
2719; CHECK-LABEL: vp_cttz_zero_undef_v2i32_unmasked:
2720; CHECK:       # %bb.0:
2721; CHECK-NEXT:    li a1, 1
2722; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
2723; CHECK-NEXT:    vnot.v v9, v8
2724; CHECK-NEXT:    vsub.vx v8, v8, a1
2725; CHECK-NEXT:    lui a0, 349525
2726; CHECK-NEXT:    addi a0, a0, 1365
2727; CHECK-NEXT:    vand.vv v8, v9, v8
2728; CHECK-NEXT:    vsrl.vi v9, v8, 1
2729; CHECK-NEXT:    vand.vx v9, v9, a0
2730; CHECK-NEXT:    lui a0, 209715
2731; CHECK-NEXT:    addi a0, a0, 819
2732; CHECK-NEXT:    vsub.vv v8, v8, v9
2733; CHECK-NEXT:    vand.vx v9, v8, a0
2734; CHECK-NEXT:    vsrl.vi v8, v8, 2
2735; CHECK-NEXT:    vand.vx v8, v8, a0
2736; CHECK-NEXT:    lui a0, 61681
2737; CHECK-NEXT:    addi a0, a0, -241
2738; CHECK-NEXT:    vadd.vv v8, v9, v8
2739; CHECK-NEXT:    vsrl.vi v9, v8, 4
2740; CHECK-NEXT:    vadd.vv v8, v8, v9
2741; CHECK-NEXT:    vand.vx v8, v8, a0
2742; CHECK-NEXT:    lui a0, 4112
2743; CHECK-NEXT:    addi a0, a0, 257
2744; CHECK-NEXT:    vmul.vx v8, v8, a0
2745; CHECK-NEXT:    vsrl.vi v8, v8, 24
2746; CHECK-NEXT:    ret
2747  %v = call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> %va, i1 true, <2 x i1> splat (i1 true), i32 %evl)
2748  ret <2 x i32> %v
2749}
2750
2751define <4 x i32> @vp_cttz_zero_undef_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
2752; CHECK-LABEL: vp_cttz_zero_undef_v4i32:
2753; CHECK:       # %bb.0:
2754; CHECK-NEXT:    li a1, 1
2755; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
2756; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
2757; CHECK-NEXT:    lui a0, 349525
2758; CHECK-NEXT:    vnot.v v8, v8, v0.t
2759; CHECK-NEXT:    addi a0, a0, 1365
2760; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
2761; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
2762; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
2763; CHECK-NEXT:    lui a0, 209715
2764; CHECK-NEXT:    addi a0, a0, 819
2765; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
2766; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
2767; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
2768; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
2769; CHECK-NEXT:    lui a0, 61681
2770; CHECK-NEXT:    addi a0, a0, -241
2771; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
2772; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
2773; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
2774; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
2775; CHECK-NEXT:    lui a0, 4112
2776; CHECK-NEXT:    addi a0, a0, 257
2777; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
2778; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
2779; CHECK-NEXT:    ret
2780  %v = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> %va, i1 true, <4 x i1> %m, i32 %evl)
2781  ret <4 x i32> %v
2782}
2783
2784define <4 x i32> @vp_cttz_zero_undef_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
2785; CHECK-LABEL: vp_cttz_zero_undef_v4i32_unmasked:
2786; CHECK:       # %bb.0:
2787; CHECK-NEXT:    li a1, 1
2788; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
2789; CHECK-NEXT:    vnot.v v9, v8
2790; CHECK-NEXT:    vsub.vx v8, v8, a1
2791; CHECK-NEXT:    lui a0, 349525
2792; CHECK-NEXT:    addi a0, a0, 1365
2793; CHECK-NEXT:    vand.vv v8, v9, v8
2794; CHECK-NEXT:    vsrl.vi v9, v8, 1
2795; CHECK-NEXT:    vand.vx v9, v9, a0
2796; CHECK-NEXT:    lui a0, 209715
2797; CHECK-NEXT:    addi a0, a0, 819
2798; CHECK-NEXT:    vsub.vv v8, v8, v9
2799; CHECK-NEXT:    vand.vx v9, v8, a0
2800; CHECK-NEXT:    vsrl.vi v8, v8, 2
2801; CHECK-NEXT:    vand.vx v8, v8, a0
2802; CHECK-NEXT:    lui a0, 61681
2803; CHECK-NEXT:    addi a0, a0, -241
2804; CHECK-NEXT:    vadd.vv v8, v9, v8
2805; CHECK-NEXT:    vsrl.vi v9, v8, 4
2806; CHECK-NEXT:    vadd.vv v8, v8, v9
2807; CHECK-NEXT:    vand.vx v8, v8, a0
2808; CHECK-NEXT:    lui a0, 4112
2809; CHECK-NEXT:    addi a0, a0, 257
2810; CHECK-NEXT:    vmul.vx v8, v8, a0
2811; CHECK-NEXT:    vsrl.vi v8, v8, 24
2812; CHECK-NEXT:    ret
2813  %v = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> %va, i1 true, <4 x i1> splat (i1 true), i32 %evl)
2814  ret <4 x i32> %v
2815}
2816
2817define <8 x i32> @vp_cttz_zero_undef_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
2818; CHECK-LABEL: vp_cttz_zero_undef_v8i32:
2819; CHECK:       # %bb.0:
2820; CHECK-NEXT:    li a1, 1
2821; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
2822; CHECK-NEXT:    vsub.vx v10, v8, a1, v0.t
2823; CHECK-NEXT:    lui a0, 349525
2824; CHECK-NEXT:    vnot.v v8, v8, v0.t
2825; CHECK-NEXT:    addi a0, a0, 1365
2826; CHECK-NEXT:    vand.vv v8, v8, v10, v0.t
2827; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
2828; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
2829; CHECK-NEXT:    lui a0, 209715
2830; CHECK-NEXT:    addi a0, a0, 819
2831; CHECK-NEXT:    vsub.vv v8, v8, v10, v0.t
2832; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
2833; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
2834; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
2835; CHECK-NEXT:    lui a0, 61681
2836; CHECK-NEXT:    addi a0, a0, -241
2837; CHECK-NEXT:    vadd.vv v8, v10, v8, v0.t
2838; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
2839; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
2840; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
2841; CHECK-NEXT:    lui a0, 4112
2842; CHECK-NEXT:    addi a0, a0, 257
2843; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
2844; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
2845; CHECK-NEXT:    ret
2846  %v = call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> %va, i1 true, <8 x i1> %m, i32 %evl)
2847  ret <8 x i32> %v
2848}
2849
2850define <8 x i32> @vp_cttz_zero_undef_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
2851; CHECK-LABEL: vp_cttz_zero_undef_v8i32_unmasked:
2852; CHECK:       # %bb.0:
2853; CHECK-NEXT:    li a1, 1
2854; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
2855; CHECK-NEXT:    vnot.v v10, v8
2856; CHECK-NEXT:    vsub.vx v8, v8, a1
2857; CHECK-NEXT:    lui a0, 349525
2858; CHECK-NEXT:    addi a0, a0, 1365
2859; CHECK-NEXT:    vand.vv v8, v10, v8
2860; CHECK-NEXT:    vsrl.vi v10, v8, 1
2861; CHECK-NEXT:    vand.vx v10, v10, a0
2862; CHECK-NEXT:    lui a0, 209715
2863; CHECK-NEXT:    addi a0, a0, 819
2864; CHECK-NEXT:    vsub.vv v8, v8, v10
2865; CHECK-NEXT:    vand.vx v10, v8, a0
2866; CHECK-NEXT:    vsrl.vi v8, v8, 2
2867; CHECK-NEXT:    vand.vx v8, v8, a0
2868; CHECK-NEXT:    lui a0, 61681
2869; CHECK-NEXT:    addi a0, a0, -241
2870; CHECK-NEXT:    vadd.vv v8, v10, v8
2871; CHECK-NEXT:    vsrl.vi v10, v8, 4
2872; CHECK-NEXT:    vadd.vv v8, v8, v10
2873; CHECK-NEXT:    vand.vx v8, v8, a0
2874; CHECK-NEXT:    lui a0, 4112
2875; CHECK-NEXT:    addi a0, a0, 257
2876; CHECK-NEXT:    vmul.vx v8, v8, a0
2877; CHECK-NEXT:    vsrl.vi v8, v8, 24
2878; CHECK-NEXT:    ret
2879  %v = call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> %va, i1 true, <8 x i1> splat (i1 true), i32 %evl)
2880  ret <8 x i32> %v
2881}
2882
2883define <16 x i32> @vp_cttz_zero_undef_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
2884; CHECK-LABEL: vp_cttz_zero_undef_v16i32:
2885; CHECK:       # %bb.0:
2886; CHECK-NEXT:    li a1, 1
2887; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
2888; CHECK-NEXT:    vsub.vx v12, v8, a1, v0.t
2889; CHECK-NEXT:    lui a0, 349525
2890; CHECK-NEXT:    vnot.v v8, v8, v0.t
2891; CHECK-NEXT:    addi a0, a0, 1365
2892; CHECK-NEXT:    vand.vv v8, v8, v12, v0.t
2893; CHECK-NEXT:    vsrl.vi v12, v8, 1, v0.t
2894; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
2895; CHECK-NEXT:    lui a0, 209715
2896; CHECK-NEXT:    addi a0, a0, 819
2897; CHECK-NEXT:    vsub.vv v8, v8, v12, v0.t
2898; CHECK-NEXT:    vand.vx v12, v8, a0, v0.t
2899; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
2900; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
2901; CHECK-NEXT:    lui a0, 61681
2902; CHECK-NEXT:    addi a0, a0, -241
2903; CHECK-NEXT:    vadd.vv v8, v12, v8, v0.t
2904; CHECK-NEXT:    vsrl.vi v12, v8, 4, v0.t
2905; CHECK-NEXT:    vadd.vv v8, v8, v12, v0.t
2906; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
2907; CHECK-NEXT:    lui a0, 4112
2908; CHECK-NEXT:    addi a0, a0, 257
2909; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
2910; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
2911; CHECK-NEXT:    ret
2912  %v = call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> %va, i1 true, <16 x i1> %m, i32 %evl)
2913  ret <16 x i32> %v
2914}
2915
2916define <16 x i32> @vp_cttz_zero_undef_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
2917; CHECK-LABEL: vp_cttz_zero_undef_v16i32_unmasked:
2918; CHECK:       # %bb.0:
2919; CHECK-NEXT:    li a1, 1
2920; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
2921; CHECK-NEXT:    vnot.v v12, v8
2922; CHECK-NEXT:    vsub.vx v8, v8, a1
2923; CHECK-NEXT:    lui a0, 349525
2924; CHECK-NEXT:    addi a0, a0, 1365
2925; CHECK-NEXT:    vand.vv v8, v12, v8
2926; CHECK-NEXT:    vsrl.vi v12, v8, 1
2927; CHECK-NEXT:    vand.vx v12, v12, a0
2928; CHECK-NEXT:    lui a0, 209715
2929; CHECK-NEXT:    addi a0, a0, 819
2930; CHECK-NEXT:    vsub.vv v8, v8, v12
2931; CHECK-NEXT:    vand.vx v12, v8, a0
2932; CHECK-NEXT:    vsrl.vi v8, v8, 2
2933; CHECK-NEXT:    vand.vx v8, v8, a0
2934; CHECK-NEXT:    lui a0, 61681
2935; CHECK-NEXT:    addi a0, a0, -241
2936; CHECK-NEXT:    vadd.vv v8, v12, v8
2937; CHECK-NEXT:    vsrl.vi v12, v8, 4
2938; CHECK-NEXT:    vadd.vv v8, v8, v12
2939; CHECK-NEXT:    vand.vx v8, v8, a0
2940; CHECK-NEXT:    lui a0, 4112
2941; CHECK-NEXT:    addi a0, a0, 257
2942; CHECK-NEXT:    vmul.vx v8, v8, a0
2943; CHECK-NEXT:    vsrl.vi v8, v8, 24
2944; CHECK-NEXT:    ret
2945  %v = call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> %va, i1 true, <16 x i1> splat (i1 true), i32 %evl)
2946  ret <16 x i32> %v
2947}
2948
2949define <2 x i64> @vp_cttz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
2950; RV32-LABEL: vp_cttz_zero_undef_v2i64:
2951; RV32:       # %bb.0:
2952; RV32-NEXT:    li a1, 1
2953; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
2954; RV32-NEXT:    vsub.vx v9, v8, a1, v0.t
2955; RV32-NEXT:    lui a1, 349525
2956; RV32-NEXT:    addi a1, a1, 1365
2957; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
2958; RV32-NEXT:    vmv.v.x v10, a1
2959; RV32-NEXT:    lui a1, 209715
2960; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
2961; RV32-NEXT:    vnot.v v8, v8, v0.t
2962; RV32-NEXT:    addi a1, a1, 819
2963; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
2964; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
2965; RV32-NEXT:    vand.vv v9, v9, v10, v0.t
2966; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
2967; RV32-NEXT:    vmv.v.x v10, a1
2968; RV32-NEXT:    lui a1, 61681
2969; RV32-NEXT:    addi a1, a1, -241
2970; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
2971; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
2972; RV32-NEXT:    vand.vv v9, v8, v10, v0.t
2973; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
2974; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
2975; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
2976; RV32-NEXT:    vmv.v.x v10, a1
2977; RV32-NEXT:    lui a1, 4112
2978; RV32-NEXT:    addi a1, a1, 257
2979; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
2980; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
2981; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
2982; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
2983; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
2984; RV32-NEXT:    vmv.v.x v9, a1
2985; RV32-NEXT:    li a1, 56
2986; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
2987; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
2988; RV32-NEXT:    vmul.vv v8, v8, v9, v0.t
2989; RV32-NEXT:    vsrl.vx v8, v8, a1, v0.t
2990; RV32-NEXT:    ret
2991;
2992; RV64-LABEL: vp_cttz_zero_undef_v2i64:
2993; RV64:       # %bb.0:
2994; RV64-NEXT:    li a1, 1
2995; RV64-NEXT:    lui a2, 349525
2996; RV64-NEXT:    lui a3, 209715
2997; RV64-NEXT:    lui a4, 61681
2998; RV64-NEXT:    lui a5, 4112
2999; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
3000; RV64-NEXT:    vsub.vx v9, v8, a1, v0.t
3001; RV64-NEXT:    addiw a0, a2, 1365
3002; RV64-NEXT:    addiw a1, a3, 819
3003; RV64-NEXT:    addiw a2, a4, -241
3004; RV64-NEXT:    addiw a3, a5, 257
3005; RV64-NEXT:    slli a4, a0, 32
3006; RV64-NEXT:    add a0, a0, a4
3007; RV64-NEXT:    slli a4, a1, 32
3008; RV64-NEXT:    add a1, a1, a4
3009; RV64-NEXT:    slli a4, a2, 32
3010; RV64-NEXT:    add a2, a2, a4
3011; RV64-NEXT:    slli a4, a3, 32
3012; RV64-NEXT:    add a3, a3, a4
3013; RV64-NEXT:    vnot.v v8, v8, v0.t
3014; RV64-NEXT:    vand.vv v8, v8, v9, v0.t
3015; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
3016; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
3017; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
3018; RV64-NEXT:    vand.vx v9, v8, a1, v0.t
3019; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
3020; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
3021; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
3022; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
3023; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
3024; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
3025; RV64-NEXT:    li a0, 56
3026; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
3027; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
3028; RV64-NEXT:    ret
3029  %v = call <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64> %va, i1 true, <2 x i1> %m, i32 %evl)
3030  ret <2 x i64> %v
3031}
3032
3033define <2 x i64> @vp_cttz_zero_undef_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
3034; RV32-LABEL: vp_cttz_zero_undef_v2i64_unmasked:
3035; RV32:       # %bb.0:
3036; RV32-NEXT:    li a1, 1
3037; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
3038; RV32-NEXT:    vnot.v v9, v8
3039; RV32-NEXT:    vsub.vx v8, v8, a1
3040; RV32-NEXT:    lui a1, 349525
3041; RV32-NEXT:    addi a1, a1, 1365
3042; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3043; RV32-NEXT:    vmv.v.x v10, a1
3044; RV32-NEXT:    lui a1, 209715
3045; RV32-NEXT:    addi a1, a1, 819
3046; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
3047; RV32-NEXT:    vand.vv v8, v9, v8
3048; RV32-NEXT:    vsrl.vi v9, v8, 1
3049; RV32-NEXT:    vand.vv v9, v9, v10
3050; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3051; RV32-NEXT:    vmv.v.x v10, a1
3052; RV32-NEXT:    lui a1, 61681
3053; RV32-NEXT:    addi a1, a1, -241
3054; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
3055; RV32-NEXT:    vsub.vv v8, v8, v9
3056; RV32-NEXT:    vand.vv v9, v8, v10
3057; RV32-NEXT:    vsrl.vi v8, v8, 2
3058; RV32-NEXT:    vand.vv v8, v8, v10
3059; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3060; RV32-NEXT:    vmv.v.x v10, a1
3061; RV32-NEXT:    lui a1, 4112
3062; RV32-NEXT:    addi a1, a1, 257
3063; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
3064; RV32-NEXT:    vadd.vv v8, v9, v8
3065; RV32-NEXT:    vsrl.vi v9, v8, 4
3066; RV32-NEXT:    vadd.vv v8, v8, v9
3067; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3068; RV32-NEXT:    vmv.v.x v9, a1
3069; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
3070; RV32-NEXT:    vand.vv v8, v8, v10
3071; RV32-NEXT:    vmul.vv v8, v8, v9
3072; RV32-NEXT:    li a0, 56
3073; RV32-NEXT:    vsrl.vx v8, v8, a0
3074; RV32-NEXT:    ret
3075;
3076; RV64-LABEL: vp_cttz_zero_undef_v2i64_unmasked:
3077; RV64:       # %bb.0:
3078; RV64-NEXT:    li a1, 1
3079; RV64-NEXT:    lui a2, 349525
3080; RV64-NEXT:    lui a3, 209715
3081; RV64-NEXT:    lui a4, 61681
3082; RV64-NEXT:    lui a5, 4112
3083; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
3084; RV64-NEXT:    vsub.vx v9, v8, a1
3085; RV64-NEXT:    addiw a0, a2, 1365
3086; RV64-NEXT:    addiw a1, a3, 819
3087; RV64-NEXT:    addiw a2, a4, -241
3088; RV64-NEXT:    addiw a3, a5, 257
3089; RV64-NEXT:    slli a4, a0, 32
3090; RV64-NEXT:    add a0, a0, a4
3091; RV64-NEXT:    slli a4, a1, 32
3092; RV64-NEXT:    add a1, a1, a4
3093; RV64-NEXT:    slli a4, a2, 32
3094; RV64-NEXT:    add a2, a2, a4
3095; RV64-NEXT:    slli a4, a3, 32
3096; RV64-NEXT:    add a3, a3, a4
3097; RV64-NEXT:    vnot.v v8, v8
3098; RV64-NEXT:    vand.vv v8, v8, v9
3099; RV64-NEXT:    vsrl.vi v9, v8, 1
3100; RV64-NEXT:    vand.vx v9, v9, a0
3101; RV64-NEXT:    vsub.vv v8, v8, v9
3102; RV64-NEXT:    vand.vx v9, v8, a1
3103; RV64-NEXT:    vsrl.vi v8, v8, 2
3104; RV64-NEXT:    vand.vx v8, v8, a1
3105; RV64-NEXT:    vadd.vv v8, v9, v8
3106; RV64-NEXT:    vsrl.vi v9, v8, 4
3107; RV64-NEXT:    vadd.vv v8, v8, v9
3108; RV64-NEXT:    vand.vx v8, v8, a2
3109; RV64-NEXT:    vmul.vx v8, v8, a3
3110; RV64-NEXT:    li a0, 56
3111; RV64-NEXT:    vsrl.vx v8, v8, a0
3112; RV64-NEXT:    ret
3113  %v = call <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64> %va, i1 true, <2 x i1> splat (i1 true), i32 %evl)
3114  ret <2 x i64> %v
3115}
3116
3117define <4 x i64> @vp_cttz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
3118; RV32-LABEL: vp_cttz_zero_undef_v4i64:
3119; RV32:       # %bb.0:
3120; RV32-NEXT:    li a1, 1
3121; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
3122; RV32-NEXT:    vsub.vx v10, v8, a1, v0.t
3123; RV32-NEXT:    lui a1, 349525
3124; RV32-NEXT:    addi a1, a1, 1365
3125; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
3126; RV32-NEXT:    vmv.v.x v12, a1
3127; RV32-NEXT:    lui a1, 209715
3128; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
3129; RV32-NEXT:    vnot.v v8, v8, v0.t
3130; RV32-NEXT:    addi a1, a1, 819
3131; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
3132; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
3133; RV32-NEXT:    vand.vv v10, v10, v12, v0.t
3134; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
3135; RV32-NEXT:    vmv.v.x v12, a1
3136; RV32-NEXT:    lui a1, 61681
3137; RV32-NEXT:    addi a1, a1, -241
3138; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
3139; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
3140; RV32-NEXT:    vand.vv v10, v8, v12, v0.t
3141; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
3142; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
3143; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
3144; RV32-NEXT:    vmv.v.x v12, a1
3145; RV32-NEXT:    lui a1, 4112
3146; RV32-NEXT:    addi a1, a1, 257
3147; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
3148; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
3149; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
3150; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
3151; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
3152; RV32-NEXT:    vmv.v.x v10, a1
3153; RV32-NEXT:    li a1, 56
3154; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
3155; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
3156; RV32-NEXT:    vmul.vv v8, v8, v10, v0.t
3157; RV32-NEXT:    vsrl.vx v8, v8, a1, v0.t
3158; RV32-NEXT:    ret
3159;
3160; RV64-LABEL: vp_cttz_zero_undef_v4i64:
3161; RV64:       # %bb.0:
3162; RV64-NEXT:    li a1, 1
3163; RV64-NEXT:    lui a2, 349525
3164; RV64-NEXT:    lui a3, 209715
3165; RV64-NEXT:    lui a4, 61681
3166; RV64-NEXT:    lui a5, 4112
3167; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
3168; RV64-NEXT:    vsub.vx v10, v8, a1, v0.t
3169; RV64-NEXT:    addiw a0, a2, 1365
3170; RV64-NEXT:    addiw a1, a3, 819
3171; RV64-NEXT:    addiw a2, a4, -241
3172; RV64-NEXT:    addiw a3, a5, 257
3173; RV64-NEXT:    slli a4, a0, 32
3174; RV64-NEXT:    add a0, a0, a4
3175; RV64-NEXT:    slli a4, a1, 32
3176; RV64-NEXT:    add a1, a1, a4
3177; RV64-NEXT:    slli a4, a2, 32
3178; RV64-NEXT:    add a2, a2, a4
3179; RV64-NEXT:    slli a4, a3, 32
3180; RV64-NEXT:    add a3, a3, a4
3181; RV64-NEXT:    vnot.v v8, v8, v0.t
3182; RV64-NEXT:    vand.vv v8, v8, v10, v0.t
3183; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
3184; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
3185; RV64-NEXT:    vsub.vv v8, v8, v10, v0.t
3186; RV64-NEXT:    vand.vx v10, v8, a1, v0.t
3187; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
3188; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
3189; RV64-NEXT:    vadd.vv v8, v10, v8, v0.t
3190; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
3191; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
3192; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
3193; RV64-NEXT:    li a0, 56
3194; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
3195; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
3196; RV64-NEXT:    ret
3197  %v = call <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64> %va, i1 true, <4 x i1> %m, i32 %evl)
3198  ret <4 x i64> %v
3199}
3200
3201define <4 x i64> @vp_cttz_zero_undef_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
3202; RV32-LABEL: vp_cttz_zero_undef_v4i64_unmasked:
3203; RV32:       # %bb.0:
3204; RV32-NEXT:    li a1, 1
3205; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
3206; RV32-NEXT:    vnot.v v10, v8
3207; RV32-NEXT:    vsub.vx v8, v8, a1
3208; RV32-NEXT:    lui a1, 349525
3209; RV32-NEXT:    addi a1, a1, 1365
3210; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
3211; RV32-NEXT:    vmv.v.x v12, a1
3212; RV32-NEXT:    lui a1, 209715
3213; RV32-NEXT:    addi a1, a1, 819
3214; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
3215; RV32-NEXT:    vand.vv v8, v10, v8
3216; RV32-NEXT:    vsrl.vi v10, v8, 1
3217; RV32-NEXT:    vand.vv v10, v10, v12
3218; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
3219; RV32-NEXT:    vmv.v.x v12, a1
3220; RV32-NEXT:    lui a1, 61681
3221; RV32-NEXT:    addi a1, a1, -241
3222; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
3223; RV32-NEXT:    vsub.vv v8, v8, v10
3224; RV32-NEXT:    vand.vv v10, v8, v12
3225; RV32-NEXT:    vsrl.vi v8, v8, 2
3226; RV32-NEXT:    vand.vv v8, v8, v12
3227; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
3228; RV32-NEXT:    vmv.v.x v12, a1
3229; RV32-NEXT:    lui a1, 4112
3230; RV32-NEXT:    addi a1, a1, 257
3231; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
3232; RV32-NEXT:    vadd.vv v8, v10, v8
3233; RV32-NEXT:    vsrl.vi v10, v8, 4
3234; RV32-NEXT:    vadd.vv v8, v8, v10
3235; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
3236; RV32-NEXT:    vmv.v.x v10, a1
3237; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
3238; RV32-NEXT:    vand.vv v8, v8, v12
3239; RV32-NEXT:    vmul.vv v8, v8, v10
3240; RV32-NEXT:    li a0, 56
3241; RV32-NEXT:    vsrl.vx v8, v8, a0
3242; RV32-NEXT:    ret
3243;
3244; RV64-LABEL: vp_cttz_zero_undef_v4i64_unmasked:
3245; RV64:       # %bb.0:
3246; RV64-NEXT:    li a1, 1
3247; RV64-NEXT:    lui a2, 349525
3248; RV64-NEXT:    lui a3, 209715
3249; RV64-NEXT:    lui a4, 61681
3250; RV64-NEXT:    lui a5, 4112
3251; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
3252; RV64-NEXT:    vsub.vx v10, v8, a1
3253; RV64-NEXT:    addiw a0, a2, 1365
3254; RV64-NEXT:    addiw a1, a3, 819
3255; RV64-NEXT:    addiw a2, a4, -241
3256; RV64-NEXT:    addiw a3, a5, 257
3257; RV64-NEXT:    slli a4, a0, 32
3258; RV64-NEXT:    add a0, a0, a4
3259; RV64-NEXT:    slli a4, a1, 32
3260; RV64-NEXT:    add a1, a1, a4
3261; RV64-NEXT:    slli a4, a2, 32
3262; RV64-NEXT:    add a2, a2, a4
3263; RV64-NEXT:    slli a4, a3, 32
3264; RV64-NEXT:    add a3, a3, a4
3265; RV64-NEXT:    vnot.v v8, v8
3266; RV64-NEXT:    vand.vv v8, v8, v10
3267; RV64-NEXT:    vsrl.vi v10, v8, 1
3268; RV64-NEXT:    vand.vx v10, v10, a0
3269; RV64-NEXT:    vsub.vv v8, v8, v10
3270; RV64-NEXT:    vand.vx v10, v8, a1
3271; RV64-NEXT:    vsrl.vi v8, v8, 2
3272; RV64-NEXT:    vand.vx v8, v8, a1
3273; RV64-NEXT:    vadd.vv v8, v10, v8
3274; RV64-NEXT:    vsrl.vi v10, v8, 4
3275; RV64-NEXT:    vadd.vv v8, v8, v10
3276; RV64-NEXT:    vand.vx v8, v8, a2
3277; RV64-NEXT:    vmul.vx v8, v8, a3
3278; RV64-NEXT:    li a0, 56
3279; RV64-NEXT:    vsrl.vx v8, v8, a0
3280; RV64-NEXT:    ret
3281  %v = call <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64> %va, i1 true, <4 x i1> splat (i1 true), i32 %evl)
3282  ret <4 x i64> %v
3283}
3284
3285define <8 x i64> @vp_cttz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
3286; RV32-LABEL: vp_cttz_zero_undef_v8i64:
3287; RV32:       # %bb.0:
3288; RV32-NEXT:    li a1, 1
3289; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
3290; RV32-NEXT:    vsub.vx v12, v8, a1, v0.t
3291; RV32-NEXT:    lui a1, 349525
3292; RV32-NEXT:    addi a1, a1, 1365
3293; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
3294; RV32-NEXT:    vmv.v.x v16, a1
3295; RV32-NEXT:    lui a1, 209715
3296; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
3297; RV32-NEXT:    vnot.v v8, v8, v0.t
3298; RV32-NEXT:    addi a1, a1, 819
3299; RV32-NEXT:    vand.vv v12, v8, v12, v0.t
3300; RV32-NEXT:    vsrl.vi v8, v12, 1, v0.t
3301; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
3302; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
3303; RV32-NEXT:    vmv.v.x v8, a1
3304; RV32-NEXT:    lui a1, 61681
3305; RV32-NEXT:    addi a1, a1, -241
3306; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
3307; RV32-NEXT:    vsub.vv v12, v12, v16, v0.t
3308; RV32-NEXT:    vand.vv v16, v12, v8, v0.t
3309; RV32-NEXT:    vsrl.vi v12, v12, 2, v0.t
3310; RV32-NEXT:    vand.vv v8, v12, v8, v0.t
3311; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
3312; RV32-NEXT:    vmv.v.x v12, a1
3313; RV32-NEXT:    lui a1, 4112
3314; RV32-NEXT:    addi a1, a1, 257
3315; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
3316; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
3317; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
3318; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
3319; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
3320; RV32-NEXT:    vmv.v.x v16, a1
3321; RV32-NEXT:    li a1, 56
3322; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
3323; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
3324; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
3325; RV32-NEXT:    vsrl.vx v8, v8, a1, v0.t
3326; RV32-NEXT:    ret
3327;
3328; RV64-LABEL: vp_cttz_zero_undef_v8i64:
3329; RV64:       # %bb.0:
3330; RV64-NEXT:    li a1, 1
3331; RV64-NEXT:    lui a2, 349525
3332; RV64-NEXT:    lui a3, 209715
3333; RV64-NEXT:    lui a4, 61681
3334; RV64-NEXT:    lui a5, 4112
3335; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
3336; RV64-NEXT:    vsub.vx v12, v8, a1, v0.t
3337; RV64-NEXT:    addiw a0, a2, 1365
3338; RV64-NEXT:    addiw a1, a3, 819
3339; RV64-NEXT:    addiw a2, a4, -241
3340; RV64-NEXT:    addiw a3, a5, 257
3341; RV64-NEXT:    slli a4, a0, 32
3342; RV64-NEXT:    add a0, a0, a4
3343; RV64-NEXT:    slli a4, a1, 32
3344; RV64-NEXT:    add a1, a1, a4
3345; RV64-NEXT:    slli a4, a2, 32
3346; RV64-NEXT:    add a2, a2, a4
3347; RV64-NEXT:    slli a4, a3, 32
3348; RV64-NEXT:    add a3, a3, a4
3349; RV64-NEXT:    vnot.v v8, v8, v0.t
3350; RV64-NEXT:    vand.vv v8, v8, v12, v0.t
3351; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
3352; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
3353; RV64-NEXT:    vsub.vv v8, v8, v12, v0.t
3354; RV64-NEXT:    vand.vx v12, v8, a1, v0.t
3355; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
3356; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
3357; RV64-NEXT:    vadd.vv v8, v12, v8, v0.t
3358; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
3359; RV64-NEXT:    vadd.vv v8, v8, v12, v0.t
3360; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
3361; RV64-NEXT:    li a0, 56
3362; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
3363; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
3364; RV64-NEXT:    ret
3365  %v = call <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64> %va, i1 true, <8 x i1> %m, i32 %evl)
3366  ret <8 x i64> %v
3367}
3368
3369define <8 x i64> @vp_cttz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
3370; RV32-LABEL: vp_cttz_zero_undef_v8i64_unmasked:
3371; RV32:       # %bb.0:
3372; RV32-NEXT:    li a1, 1
3373; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
3374; RV32-NEXT:    vnot.v v12, v8
3375; RV32-NEXT:    vsub.vx v8, v8, a1
3376; RV32-NEXT:    lui a1, 349525
3377; RV32-NEXT:    addi a1, a1, 1365
3378; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
3379; RV32-NEXT:    vmv.v.x v16, a1
3380; RV32-NEXT:    lui a1, 209715
3381; RV32-NEXT:    addi a1, a1, 819
3382; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
3383; RV32-NEXT:    vand.vv v8, v12, v8
3384; RV32-NEXT:    vsrl.vi v12, v8, 1
3385; RV32-NEXT:    vand.vv v12, v12, v16
3386; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
3387; RV32-NEXT:    vmv.v.x v16, a1
3388; RV32-NEXT:    lui a1, 61681
3389; RV32-NEXT:    addi a1, a1, -241
3390; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
3391; RV32-NEXT:    vsub.vv v8, v8, v12
3392; RV32-NEXT:    vand.vv v12, v8, v16
3393; RV32-NEXT:    vsrl.vi v8, v8, 2
3394; RV32-NEXT:    vand.vv v8, v8, v16
3395; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
3396; RV32-NEXT:    vmv.v.x v16, a1
3397; RV32-NEXT:    lui a1, 4112
3398; RV32-NEXT:    addi a1, a1, 257
3399; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
3400; RV32-NEXT:    vadd.vv v8, v12, v8
3401; RV32-NEXT:    vsrl.vi v12, v8, 4
3402; RV32-NEXT:    vadd.vv v8, v8, v12
3403; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
3404; RV32-NEXT:    vmv.v.x v12, a1
3405; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
3406; RV32-NEXT:    vand.vv v8, v8, v16
3407; RV32-NEXT:    vmul.vv v8, v8, v12
3408; RV32-NEXT:    li a0, 56
3409; RV32-NEXT:    vsrl.vx v8, v8, a0
3410; RV32-NEXT:    ret
3411;
3412; RV64-LABEL: vp_cttz_zero_undef_v8i64_unmasked:
3413; RV64:       # %bb.0:
3414; RV64-NEXT:    li a1, 1
3415; RV64-NEXT:    lui a2, 349525
3416; RV64-NEXT:    lui a3, 209715
3417; RV64-NEXT:    lui a4, 61681
3418; RV64-NEXT:    lui a5, 4112
3419; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
3420; RV64-NEXT:    vsub.vx v12, v8, a1
3421; RV64-NEXT:    addiw a0, a2, 1365
3422; RV64-NEXT:    addiw a1, a3, 819
3423; RV64-NEXT:    addiw a2, a4, -241
3424; RV64-NEXT:    addiw a3, a5, 257
3425; RV64-NEXT:    slli a4, a0, 32
3426; RV64-NEXT:    add a0, a0, a4
3427; RV64-NEXT:    slli a4, a1, 32
3428; RV64-NEXT:    add a1, a1, a4
3429; RV64-NEXT:    slli a4, a2, 32
3430; RV64-NEXT:    add a2, a2, a4
3431; RV64-NEXT:    slli a4, a3, 32
3432; RV64-NEXT:    add a3, a3, a4
3433; RV64-NEXT:    vnot.v v8, v8
3434; RV64-NEXT:    vand.vv v8, v8, v12
3435; RV64-NEXT:    vsrl.vi v12, v8, 1
3436; RV64-NEXT:    vand.vx v12, v12, a0
3437; RV64-NEXT:    vsub.vv v8, v8, v12
3438; RV64-NEXT:    vand.vx v12, v8, a1
3439; RV64-NEXT:    vsrl.vi v8, v8, 2
3440; RV64-NEXT:    vand.vx v8, v8, a1
3441; RV64-NEXT:    vadd.vv v8, v12, v8
3442; RV64-NEXT:    vsrl.vi v12, v8, 4
3443; RV64-NEXT:    vadd.vv v8, v8, v12
3444; RV64-NEXT:    vand.vx v8, v8, a2
3445; RV64-NEXT:    vmul.vx v8, v8, a3
3446; RV64-NEXT:    li a0, 56
3447; RV64-NEXT:    vsrl.vx v8, v8, a0
3448; RV64-NEXT:    ret
3449  %v = call <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64> %va, i1 true, <8 x i1> splat (i1 true), i32 %evl)
3450  ret <8 x i64> %v
3451}
3452
3453define <15 x i64> @vp_cttz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) {
3454; RV32-LABEL: vp_cttz_zero_undef_v15i64:
3455; RV32:       # %bb.0:
3456; RV32-NEXT:    addi sp, sp, -48
3457; RV32-NEXT:    .cfi_def_cfa_offset 48
3458; RV32-NEXT:    csrr a1, vlenb
3459; RV32-NEXT:    slli a1, a1, 4
3460; RV32-NEXT:    sub sp, sp, a1
3461; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb
3462; RV32-NEXT:    li a1, 1
3463; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
3464; RV32-NEXT:    vsub.vx v16, v8, a1, v0.t
3465; RV32-NEXT:    lui a1, 349525
3466; RV32-NEXT:    addi a1, a1, 1365
3467; RV32-NEXT:    sw a1, 40(sp)
3468; RV32-NEXT:    sw a1, 44(sp)
3469; RV32-NEXT:    lui a1, 209715
3470; RV32-NEXT:    addi a1, a1, 819
3471; RV32-NEXT:    sw a1, 32(sp)
3472; RV32-NEXT:    sw a1, 36(sp)
3473; RV32-NEXT:    lui a1, 61681
3474; RV32-NEXT:    addi a1, a1, -241
3475; RV32-NEXT:    sw a1, 24(sp)
3476; RV32-NEXT:    sw a1, 28(sp)
3477; RV32-NEXT:    lui a1, 4112
3478; RV32-NEXT:    addi a1, a1, 257
3479; RV32-NEXT:    sw a1, 16(sp)
3480; RV32-NEXT:    sw a1, 20(sp)
3481; RV32-NEXT:    addi a1, sp, 40
3482; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
3483; RV32-NEXT:    vlse64.v v24, (a1), zero
3484; RV32-NEXT:    csrr a1, vlenb
3485; RV32-NEXT:    slli a1, a1, 3
3486; RV32-NEXT:    add a1, sp, a1
3487; RV32-NEXT:    addi a1, a1, 48
3488; RV32-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
3489; RV32-NEXT:    addi a1, sp, 32
3490; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
3491; RV32-NEXT:    vnot.v v8, v8, v0.t
3492; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
3493; RV32-NEXT:    vsrl.vi v8, v16, 1, v0.t
3494; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
3495; RV32-NEXT:    vlse64.v v24, (a1), zero
3496; RV32-NEXT:    addi a1, sp, 48
3497; RV32-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
3498; RV32-NEXT:    csrr a1, vlenb
3499; RV32-NEXT:    slli a1, a1, 3
3500; RV32-NEXT:    add a1, sp, a1
3501; RV32-NEXT:    addi a1, a1, 48
3502; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
3503; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
3504; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
3505; RV32-NEXT:    addi a1, sp, 24
3506; RV32-NEXT:    vsub.vv v8, v16, v8, v0.t
3507; RV32-NEXT:    addi a2, sp, 48
3508; RV32-NEXT:    vl8r.v v24, (a2) # Unknown-size Folded Reload
3509; RV32-NEXT:    vand.vv v16, v8, v24, v0.t
3510; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
3511; RV32-NEXT:    vand.vv v24, v8, v24, v0.t
3512; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
3513; RV32-NEXT:    vlse64.v v8, (a1), zero
3514; RV32-NEXT:    csrr a1, vlenb
3515; RV32-NEXT:    slli a1, a1, 3
3516; RV32-NEXT:    add a1, sp, a1
3517; RV32-NEXT:    addi a1, a1, 48
3518; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
3519; RV32-NEXT:    addi a1, sp, 16
3520; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
3521; RV32-NEXT:    vadd.vv v24, v16, v24, v0.t
3522; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
3523; RV32-NEXT:    vlse64.v v16, (a1), zero
3524; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
3525; RV32-NEXT:    vsrl.vi v8, v24, 4, v0.t
3526; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
3527; RV32-NEXT:    li a0, 56
3528; RV32-NEXT:    csrr a1, vlenb
3529; RV32-NEXT:    slli a1, a1, 3
3530; RV32-NEXT:    add a1, sp, a1
3531; RV32-NEXT:    addi a1, a1, 48
3532; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
3533; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
3534; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
3535; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
3536; RV32-NEXT:    csrr a0, vlenb
3537; RV32-NEXT:    slli a0, a0, 4
3538; RV32-NEXT:    add sp, sp, a0
3539; RV32-NEXT:    .cfi_def_cfa sp, 48
3540; RV32-NEXT:    addi sp, sp, 48
3541; RV32-NEXT:    .cfi_def_cfa_offset 0
3542; RV32-NEXT:    ret
3543;
3544; RV64-LABEL: vp_cttz_zero_undef_v15i64:
3545; RV64:       # %bb.0:
3546; RV64-NEXT:    li a1, 1
3547; RV64-NEXT:    lui a2, 349525
3548; RV64-NEXT:    lui a3, 209715
3549; RV64-NEXT:    lui a4, 61681
3550; RV64-NEXT:    lui a5, 4112
3551; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
3552; RV64-NEXT:    vsub.vx v16, v8, a1, v0.t
3553; RV64-NEXT:    addiw a0, a2, 1365
3554; RV64-NEXT:    addiw a1, a3, 819
3555; RV64-NEXT:    addiw a2, a4, -241
3556; RV64-NEXT:    addiw a3, a5, 257
3557; RV64-NEXT:    slli a4, a0, 32
3558; RV64-NEXT:    add a0, a0, a4
3559; RV64-NEXT:    slli a4, a1, 32
3560; RV64-NEXT:    add a1, a1, a4
3561; RV64-NEXT:    slli a4, a2, 32
3562; RV64-NEXT:    add a2, a2, a4
3563; RV64-NEXT:    slli a4, a3, 32
3564; RV64-NEXT:    add a3, a3, a4
3565; RV64-NEXT:    vnot.v v8, v8, v0.t
3566; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
3567; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
3568; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
3569; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
3570; RV64-NEXT:    vand.vx v16, v8, a1, v0.t
3571; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
3572; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
3573; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
3574; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
3575; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
3576; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
3577; RV64-NEXT:    li a0, 56
3578; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
3579; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
3580; RV64-NEXT:    ret
3581  %v = call <15 x i64> @llvm.vp.cttz.v15i64(<15 x i64> %va, i1 true, <15 x i1> %m, i32 %evl)
3582  ret <15 x i64> %v
3583}
3584
3585define <15 x i64> @vp_cttz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) {
3586; RV32-LABEL: vp_cttz_zero_undef_v15i64_unmasked:
3587; RV32:       # %bb.0:
3588; RV32-NEXT:    addi sp, sp, -32
3589; RV32-NEXT:    .cfi_def_cfa_offset 32
3590; RV32-NEXT:    li a1, 1
3591; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
3592; RV32-NEXT:    vsub.vx v16, v8, a1
3593; RV32-NEXT:    lui a1, 349525
3594; RV32-NEXT:    addi a1, a1, 1365
3595; RV32-NEXT:    sw a1, 24(sp)
3596; RV32-NEXT:    sw a1, 28(sp)
3597; RV32-NEXT:    lui a1, 209715
3598; RV32-NEXT:    addi a1, a1, 819
3599; RV32-NEXT:    sw a1, 16(sp)
3600; RV32-NEXT:    sw a1, 20(sp)
3601; RV32-NEXT:    lui a1, 61681
3602; RV32-NEXT:    addi a1, a1, -241
3603; RV32-NEXT:    sw a1, 8(sp)
3604; RV32-NEXT:    sw a1, 12(sp)
3605; RV32-NEXT:    lui a1, 4112
3606; RV32-NEXT:    vnot.v v8, v8
3607; RV32-NEXT:    addi a1, a1, 257
3608; RV32-NEXT:    sw a1, 0(sp)
3609; RV32-NEXT:    sw a1, 4(sp)
3610; RV32-NEXT:    addi a1, sp, 24
3611; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
3612; RV32-NEXT:    vlse64.v v0, (a1), zero
3613; RV32-NEXT:    addi a1, sp, 16
3614; RV32-NEXT:    vlse64.v v24, (a1), zero
3615; RV32-NEXT:    addi a1, sp, 8
3616; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
3617; RV32-NEXT:    vand.vv v16, v8, v16
3618; RV32-NEXT:    vsrl.vi v8, v16, 1
3619; RV32-NEXT:    vand.vv v0, v8, v0
3620; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
3621; RV32-NEXT:    vlse64.v v8, (a1), zero
3622; RV32-NEXT:    mv a1, sp
3623; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
3624; RV32-NEXT:    vsub.vv v16, v16, v0
3625; RV32-NEXT:    vand.vv v0, v16, v24
3626; RV32-NEXT:    vsrl.vi v16, v16, 2
3627; RV32-NEXT:    vand.vv v16, v16, v24
3628; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
3629; RV32-NEXT:    vlse64.v v24, (a1), zero
3630; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
3631; RV32-NEXT:    vadd.vv v16, v0, v16
3632; RV32-NEXT:    vsrl.vi v0, v16, 4
3633; RV32-NEXT:    vadd.vv v16, v16, v0
3634; RV32-NEXT:    vand.vv v8, v16, v8
3635; RV32-NEXT:    vmul.vv v8, v8, v24
3636; RV32-NEXT:    li a0, 56
3637; RV32-NEXT:    vsrl.vx v8, v8, a0
3638; RV32-NEXT:    addi sp, sp, 32
3639; RV32-NEXT:    .cfi_def_cfa_offset 0
3640; RV32-NEXT:    ret
3641;
3642; RV64-LABEL: vp_cttz_zero_undef_v15i64_unmasked:
3643; RV64:       # %bb.0:
3644; RV64-NEXT:    li a1, 1
3645; RV64-NEXT:    lui a2, 349525
3646; RV64-NEXT:    lui a3, 209715
3647; RV64-NEXT:    lui a4, 61681
3648; RV64-NEXT:    lui a5, 4112
3649; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
3650; RV64-NEXT:    vsub.vx v16, v8, a1
3651; RV64-NEXT:    addiw a0, a2, 1365
3652; RV64-NEXT:    addiw a1, a3, 819
3653; RV64-NEXT:    addiw a2, a4, -241
3654; RV64-NEXT:    addiw a3, a5, 257
3655; RV64-NEXT:    slli a4, a0, 32
3656; RV64-NEXT:    add a0, a0, a4
3657; RV64-NEXT:    slli a4, a1, 32
3658; RV64-NEXT:    add a1, a1, a4
3659; RV64-NEXT:    slli a4, a2, 32
3660; RV64-NEXT:    add a2, a2, a4
3661; RV64-NEXT:    slli a4, a3, 32
3662; RV64-NEXT:    add a3, a3, a4
3663; RV64-NEXT:    vnot.v v8, v8
3664; RV64-NEXT:    vand.vv v8, v8, v16
3665; RV64-NEXT:    vsrl.vi v16, v8, 1
3666; RV64-NEXT:    vand.vx v16, v16, a0
3667; RV64-NEXT:    vsub.vv v8, v8, v16
3668; RV64-NEXT:    vand.vx v16, v8, a1
3669; RV64-NEXT:    vsrl.vi v8, v8, 2
3670; RV64-NEXT:    vand.vx v8, v8, a1
3671; RV64-NEXT:    vadd.vv v8, v16, v8
3672; RV64-NEXT:    vsrl.vi v16, v8, 4
3673; RV64-NEXT:    vadd.vv v8, v8, v16
3674; RV64-NEXT:    vand.vx v8, v8, a2
3675; RV64-NEXT:    vmul.vx v8, v8, a3
3676; RV64-NEXT:    li a0, 56
3677; RV64-NEXT:    vsrl.vx v8, v8, a0
3678; RV64-NEXT:    ret
3679  %v = call <15 x i64> @llvm.vp.cttz.v15i64(<15 x i64> %va, i1 true, <15 x i1> splat (i1 true), i32 %evl)
3680  ret <15 x i64> %v
3681}
3682
3683define <16 x i64> @vp_cttz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) {
3684; RV32-LABEL: vp_cttz_zero_undef_v16i64:
3685; RV32:       # %bb.0:
3686; RV32-NEXT:    addi sp, sp, -48
3687; RV32-NEXT:    .cfi_def_cfa_offset 48
3688; RV32-NEXT:    csrr a1, vlenb
3689; RV32-NEXT:    slli a1, a1, 4
3690; RV32-NEXT:    sub sp, sp, a1
3691; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb
3692; RV32-NEXT:    li a1, 1
3693; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
3694; RV32-NEXT:    vsub.vx v16, v8, a1, v0.t
3695; RV32-NEXT:    lui a1, 349525
3696; RV32-NEXT:    addi a1, a1, 1365
3697; RV32-NEXT:    sw a1, 40(sp)
3698; RV32-NEXT:    sw a1, 44(sp)
3699; RV32-NEXT:    lui a1, 209715
3700; RV32-NEXT:    addi a1, a1, 819
3701; RV32-NEXT:    sw a1, 32(sp)
3702; RV32-NEXT:    sw a1, 36(sp)
3703; RV32-NEXT:    lui a1, 61681
3704; RV32-NEXT:    addi a1, a1, -241
3705; RV32-NEXT:    sw a1, 24(sp)
3706; RV32-NEXT:    sw a1, 28(sp)
3707; RV32-NEXT:    lui a1, 4112
3708; RV32-NEXT:    addi a1, a1, 257
3709; RV32-NEXT:    sw a1, 16(sp)
3710; RV32-NEXT:    sw a1, 20(sp)
3711; RV32-NEXT:    addi a1, sp, 40
3712; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
3713; RV32-NEXT:    vlse64.v v24, (a1), zero
3714; RV32-NEXT:    csrr a1, vlenb
3715; RV32-NEXT:    slli a1, a1, 3
3716; RV32-NEXT:    add a1, sp, a1
3717; RV32-NEXT:    addi a1, a1, 48
3718; RV32-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
3719; RV32-NEXT:    addi a1, sp, 32
3720; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
3721; RV32-NEXT:    vnot.v v8, v8, v0.t
3722; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
3723; RV32-NEXT:    vsrl.vi v8, v16, 1, v0.t
3724; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
3725; RV32-NEXT:    vlse64.v v24, (a1), zero
3726; RV32-NEXT:    addi a1, sp, 48
3727; RV32-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
3728; RV32-NEXT:    csrr a1, vlenb
3729; RV32-NEXT:    slli a1, a1, 3
3730; RV32-NEXT:    add a1, sp, a1
3731; RV32-NEXT:    addi a1, a1, 48
3732; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
3733; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
3734; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
3735; RV32-NEXT:    addi a1, sp, 24
3736; RV32-NEXT:    vsub.vv v8, v16, v8, v0.t
3737; RV32-NEXT:    addi a2, sp, 48
3738; RV32-NEXT:    vl8r.v v24, (a2) # Unknown-size Folded Reload
3739; RV32-NEXT:    vand.vv v16, v8, v24, v0.t
3740; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
3741; RV32-NEXT:    vand.vv v24, v8, v24, v0.t
3742; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
3743; RV32-NEXT:    vlse64.v v8, (a1), zero
3744; RV32-NEXT:    csrr a1, vlenb
3745; RV32-NEXT:    slli a1, a1, 3
3746; RV32-NEXT:    add a1, sp, a1
3747; RV32-NEXT:    addi a1, a1, 48
3748; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
3749; RV32-NEXT:    addi a1, sp, 16
3750; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
3751; RV32-NEXT:    vadd.vv v24, v16, v24, v0.t
3752; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
3753; RV32-NEXT:    vlse64.v v16, (a1), zero
3754; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
3755; RV32-NEXT:    vsrl.vi v8, v24, 4, v0.t
3756; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
3757; RV32-NEXT:    li a0, 56
3758; RV32-NEXT:    csrr a1, vlenb
3759; RV32-NEXT:    slli a1, a1, 3
3760; RV32-NEXT:    add a1, sp, a1
3761; RV32-NEXT:    addi a1, a1, 48
3762; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
3763; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
3764; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
3765; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
3766; RV32-NEXT:    csrr a0, vlenb
3767; RV32-NEXT:    slli a0, a0, 4
3768; RV32-NEXT:    add sp, sp, a0
3769; RV32-NEXT:    .cfi_def_cfa sp, 48
3770; RV32-NEXT:    addi sp, sp, 48
3771; RV32-NEXT:    .cfi_def_cfa_offset 0
3772; RV32-NEXT:    ret
3773;
3774; RV64-LABEL: vp_cttz_zero_undef_v16i64:
3775; RV64:       # %bb.0:
3776; RV64-NEXT:    li a1, 1
3777; RV64-NEXT:    lui a2, 349525
3778; RV64-NEXT:    lui a3, 209715
3779; RV64-NEXT:    lui a4, 61681
3780; RV64-NEXT:    lui a5, 4112
3781; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
3782; RV64-NEXT:    vsub.vx v16, v8, a1, v0.t
3783; RV64-NEXT:    addiw a0, a2, 1365
3784; RV64-NEXT:    addiw a1, a3, 819
3785; RV64-NEXT:    addiw a2, a4, -241
3786; RV64-NEXT:    addiw a3, a5, 257
3787; RV64-NEXT:    slli a4, a0, 32
3788; RV64-NEXT:    add a0, a0, a4
3789; RV64-NEXT:    slli a4, a1, 32
3790; RV64-NEXT:    add a1, a1, a4
3791; RV64-NEXT:    slli a4, a2, 32
3792; RV64-NEXT:    add a2, a2, a4
3793; RV64-NEXT:    slli a4, a3, 32
3794; RV64-NEXT:    add a3, a3, a4
3795; RV64-NEXT:    vnot.v v8, v8, v0.t
3796; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
3797; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
3798; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
3799; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
3800; RV64-NEXT:    vand.vx v16, v8, a1, v0.t
3801; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
3802; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
3803; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
3804; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
3805; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
3806; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
3807; RV64-NEXT:    li a0, 56
3808; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
3809; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
3810; RV64-NEXT:    ret
3811  %v = call <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64> %va, i1 true, <16 x i1> %m, i32 %evl)
3812  ret <16 x i64> %v
3813}
3814
3815define <16 x i64> @vp_cttz_zero_undef_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
3816; RV32-LABEL: vp_cttz_zero_undef_v16i64_unmasked:
3817; RV32:       # %bb.0:
3818; RV32-NEXT:    addi sp, sp, -32
3819; RV32-NEXT:    .cfi_def_cfa_offset 32
3820; RV32-NEXT:    li a1, 1
3821; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
3822; RV32-NEXT:    vsub.vx v16, v8, a1
3823; RV32-NEXT:    lui a1, 349525
3824; RV32-NEXT:    addi a1, a1, 1365
3825; RV32-NEXT:    sw a1, 24(sp)
3826; RV32-NEXT:    sw a1, 28(sp)
3827; RV32-NEXT:    lui a1, 209715
3828; RV32-NEXT:    addi a1, a1, 819
3829; RV32-NEXT:    sw a1, 16(sp)
3830; RV32-NEXT:    sw a1, 20(sp)
3831; RV32-NEXT:    lui a1, 61681
3832; RV32-NEXT:    addi a1, a1, -241
3833; RV32-NEXT:    sw a1, 8(sp)
3834; RV32-NEXT:    sw a1, 12(sp)
3835; RV32-NEXT:    lui a1, 4112
3836; RV32-NEXT:    vnot.v v8, v8
3837; RV32-NEXT:    addi a1, a1, 257
3838; RV32-NEXT:    sw a1, 0(sp)
3839; RV32-NEXT:    sw a1, 4(sp)
3840; RV32-NEXT:    addi a1, sp, 24
3841; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
3842; RV32-NEXT:    vlse64.v v0, (a1), zero
3843; RV32-NEXT:    addi a1, sp, 16
3844; RV32-NEXT:    vlse64.v v24, (a1), zero
3845; RV32-NEXT:    addi a1, sp, 8
3846; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
3847; RV32-NEXT:    vand.vv v16, v8, v16
3848; RV32-NEXT:    vsrl.vi v8, v16, 1
3849; RV32-NEXT:    vand.vv v0, v8, v0
3850; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
3851; RV32-NEXT:    vlse64.v v8, (a1), zero
3852; RV32-NEXT:    mv a1, sp
3853; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
3854; RV32-NEXT:    vsub.vv v16, v16, v0
3855; RV32-NEXT:    vand.vv v0, v16, v24
3856; RV32-NEXT:    vsrl.vi v16, v16, 2
3857; RV32-NEXT:    vand.vv v16, v16, v24
3858; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
3859; RV32-NEXT:    vlse64.v v24, (a1), zero
3860; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
3861; RV32-NEXT:    vadd.vv v16, v0, v16
3862; RV32-NEXT:    vsrl.vi v0, v16, 4
3863; RV32-NEXT:    vadd.vv v16, v16, v0
3864; RV32-NEXT:    vand.vv v8, v16, v8
3865; RV32-NEXT:    vmul.vv v8, v8, v24
3866; RV32-NEXT:    li a0, 56
3867; RV32-NEXT:    vsrl.vx v8, v8, a0
3868; RV32-NEXT:    addi sp, sp, 32
3869; RV32-NEXT:    .cfi_def_cfa_offset 0
3870; RV32-NEXT:    ret
3871;
3872; RV64-LABEL: vp_cttz_zero_undef_v16i64_unmasked:
3873; RV64:       # %bb.0:
3874; RV64-NEXT:    li a1, 1
3875; RV64-NEXT:    lui a2, 349525
3876; RV64-NEXT:    lui a3, 209715
3877; RV64-NEXT:    lui a4, 61681
3878; RV64-NEXT:    lui a5, 4112
3879; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
3880; RV64-NEXT:    vsub.vx v16, v8, a1
3881; RV64-NEXT:    addiw a0, a2, 1365
3882; RV64-NEXT:    addiw a1, a3, 819
3883; RV64-NEXT:    addiw a2, a4, -241
3884; RV64-NEXT:    addiw a3, a5, 257
3885; RV64-NEXT:    slli a4, a0, 32
3886; RV64-NEXT:    add a0, a0, a4
3887; RV64-NEXT:    slli a4, a1, 32
3888; RV64-NEXT:    add a1, a1, a4
3889; RV64-NEXT:    slli a4, a2, 32
3890; RV64-NEXT:    add a2, a2, a4
3891; RV64-NEXT:    slli a4, a3, 32
3892; RV64-NEXT:    add a3, a3, a4
3893; RV64-NEXT:    vnot.v v8, v8
3894; RV64-NEXT:    vand.vv v8, v8, v16
3895; RV64-NEXT:    vsrl.vi v16, v8, 1
3896; RV64-NEXT:    vand.vx v16, v16, a0
3897; RV64-NEXT:    vsub.vv v8, v8, v16
3898; RV64-NEXT:    vand.vx v16, v8, a1
3899; RV64-NEXT:    vsrl.vi v8, v8, 2
3900; RV64-NEXT:    vand.vx v8, v8, a1
3901; RV64-NEXT:    vadd.vv v8, v16, v8
3902; RV64-NEXT:    vsrl.vi v16, v8, 4
3903; RV64-NEXT:    vadd.vv v8, v8, v16
3904; RV64-NEXT:    vand.vx v8, v8, a2
3905; RV64-NEXT:    vmul.vx v8, v8, a3
3906; RV64-NEXT:    li a0, 56
3907; RV64-NEXT:    vsrl.vx v8, v8, a0
3908; RV64-NEXT:    ret
3909  %v = call <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64> %va, i1 true, <16 x i1> splat (i1 true), i32 %evl)
3910  ret <16 x i64> %v
3911}
3912
3913define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) {
3914; RV32-LABEL: vp_cttz_zero_undef_v32i64:
3915; RV32:       # %bb.0:
3916; RV32-NEXT:    addi sp, sp, -48
3917; RV32-NEXT:    .cfi_def_cfa_offset 48
3918; RV32-NEXT:    csrr a1, vlenb
3919; RV32-NEXT:    li a2, 48
3920; RV32-NEXT:    mul a1, a1, a2
3921; RV32-NEXT:    sub sp, sp, a1
3922; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb
3923; RV32-NEXT:    csrr a1, vlenb
3924; RV32-NEXT:    slli a1, a1, 4
3925; RV32-NEXT:    add a1, sp, a1
3926; RV32-NEXT:    addi a1, a1, 48
3927; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
3928; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
3929; RV32-NEXT:    vslidedown.vi v7, v0, 2
3930; RV32-NEXT:    lui a1, 349525
3931; RV32-NEXT:    lui a2, 209715
3932; RV32-NEXT:    addi a1, a1, 1365
3933; RV32-NEXT:    sw a1, 40(sp)
3934; RV32-NEXT:    sw a1, 44(sp)
3935; RV32-NEXT:    lui a1, 61681
3936; RV32-NEXT:    addi a2, a2, 819
3937; RV32-NEXT:    sw a2, 32(sp)
3938; RV32-NEXT:    sw a2, 36(sp)
3939; RV32-NEXT:    lui a2, 4112
3940; RV32-NEXT:    addi a1, a1, -241
3941; RV32-NEXT:    sw a1, 24(sp)
3942; RV32-NEXT:    sw a1, 28(sp)
3943; RV32-NEXT:    li a1, 16
3944; RV32-NEXT:    addi a2, a2, 257
3945; RV32-NEXT:    sw a2, 16(sp)
3946; RV32-NEXT:    sw a2, 20(sp)
3947; RV32-NEXT:    mv a2, a0
3948; RV32-NEXT:    bltu a0, a1, .LBB70_2
3949; RV32-NEXT:  # %bb.1:
3950; RV32-NEXT:    li a2, 16
3951; RV32-NEXT:  .LBB70_2:
3952; RV32-NEXT:    li a1, 1
3953; RV32-NEXT:    addi a3, sp, 40
3954; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
3955; RV32-NEXT:    vsub.vx v16, v8, a1, v0.t
3956; RV32-NEXT:    vnot.v v8, v8, v0.t
3957; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
3958; RV32-NEXT:    csrr a4, vlenb
3959; RV32-NEXT:    li a5, 24
3960; RV32-NEXT:    mul a4, a4, a5
3961; RV32-NEXT:    add a4, sp, a4
3962; RV32-NEXT:    addi a4, a4, 48
3963; RV32-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
3964; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
3965; RV32-NEXT:    vlse64.v v8, (a3), zero
3966; RV32-NEXT:    csrr a3, vlenb
3967; RV32-NEXT:    li a4, 40
3968; RV32-NEXT:    mul a3, a3, a4
3969; RV32-NEXT:    add a3, sp, a3
3970; RV32-NEXT:    addi a3, a3, 48
3971; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
3972; RV32-NEXT:    addi a3, sp, 32
3973; RV32-NEXT:    vlse64.v v8, (a3), zero
3974; RV32-NEXT:    csrr a3, vlenb
3975; RV32-NEXT:    slli a3, a3, 5
3976; RV32-NEXT:    add a3, sp, a3
3977; RV32-NEXT:    addi a3, a3, 48
3978; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
3979; RV32-NEXT:    csrr a3, vlenb
3980; RV32-NEXT:    li a4, 24
3981; RV32-NEXT:    mul a3, a3, a4
3982; RV32-NEXT:    add a3, sp, a3
3983; RV32-NEXT:    addi a3, a3, 48
3984; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
3985; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
3986; RV32-NEXT:    vsrl.vi v24, v16, 1, v0.t
3987; RV32-NEXT:    csrr a3, vlenb
3988; RV32-NEXT:    li a4, 40
3989; RV32-NEXT:    mul a3, a3, a4
3990; RV32-NEXT:    add a3, sp, a3
3991; RV32-NEXT:    addi a3, a3, 48
3992; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
3993; RV32-NEXT:    vand.vv v24, v24, v16, v0.t
3994; RV32-NEXT:    csrr a3, vlenb
3995; RV32-NEXT:    li a4, 24
3996; RV32-NEXT:    mul a3, a3, a4
3997; RV32-NEXT:    add a3, sp, a3
3998; RV32-NEXT:    addi a3, a3, 48
3999; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
4000; RV32-NEXT:    vsub.vv v24, v16, v24, v0.t
4001; RV32-NEXT:    vand.vv v16, v24, v8, v0.t
4002; RV32-NEXT:    csrr a3, vlenb
4003; RV32-NEXT:    li a4, 24
4004; RV32-NEXT:    mul a3, a3, a4
4005; RV32-NEXT:    add a3, sp, a3
4006; RV32-NEXT:    addi a3, a3, 48
4007; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
4008; RV32-NEXT:    vsrl.vi v16, v24, 2, v0.t
4009; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
4010; RV32-NEXT:    csrr a3, vlenb
4011; RV32-NEXT:    li a4, 24
4012; RV32-NEXT:    mul a3, a3, a4
4013; RV32-NEXT:    add a3, sp, a3
4014; RV32-NEXT:    addi a3, a3, 48
4015; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
4016; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
4017; RV32-NEXT:    csrr a3, vlenb
4018; RV32-NEXT:    slli a3, a3, 3
4019; RV32-NEXT:    add a3, sp, a3
4020; RV32-NEXT:    addi a3, a3, 48
4021; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
4022; RV32-NEXT:    addi a3, sp, 24
4023; RV32-NEXT:    addi a4, sp, 16
4024; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
4025; RV32-NEXT:    vlse64.v v16, (a3), zero
4026; RV32-NEXT:    addi a3, sp, 48
4027; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
4028; RV32-NEXT:    vlse64.v v8, (a4), zero
4029; RV32-NEXT:    csrr a3, vlenb
4030; RV32-NEXT:    li a4, 24
4031; RV32-NEXT:    mul a3, a3, a4
4032; RV32-NEXT:    add a3, sp, a3
4033; RV32-NEXT:    addi a3, a3, 48
4034; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
4035; RV32-NEXT:    csrr a3, vlenb
4036; RV32-NEXT:    slli a3, a3, 3
4037; RV32-NEXT:    add a3, sp, a3
4038; RV32-NEXT:    addi a3, a3, 48
4039; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
4040; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
4041; RV32-NEXT:    vsrl.vi v24, v8, 4, v0.t
4042; RV32-NEXT:    vadd.vv v8, v8, v24, v0.t
4043; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
4044; RV32-NEXT:    csrr a2, vlenb
4045; RV32-NEXT:    li a3, 24
4046; RV32-NEXT:    mul a2, a2, a3
4047; RV32-NEXT:    add a2, sp, a2
4048; RV32-NEXT:    addi a2, a2, 48
4049; RV32-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
4050; RV32-NEXT:    vmul.vv v8, v16, v8, v0.t
4051; RV32-NEXT:    li a2, 56
4052; RV32-NEXT:    vsrl.vx v8, v8, a2, v0.t
4053; RV32-NEXT:    csrr a3, vlenb
4054; RV32-NEXT:    slli a3, a3, 3
4055; RV32-NEXT:    add a3, sp, a3
4056; RV32-NEXT:    addi a3, a3, 48
4057; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
4058; RV32-NEXT:    addi a3, a0, -16
4059; RV32-NEXT:    sltu a0, a0, a3
4060; RV32-NEXT:    addi a0, a0, -1
4061; RV32-NEXT:    and a0, a0, a3
4062; RV32-NEXT:    vmv1r.v v0, v7
4063; RV32-NEXT:    csrr a3, vlenb
4064; RV32-NEXT:    slli a3, a3, 4
4065; RV32-NEXT:    add a3, sp, a3
4066; RV32-NEXT:    addi a3, a3, 48
4067; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
4068; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4069; RV32-NEXT:    vsub.vx v8, v16, a1, v0.t
4070; RV32-NEXT:    vnot.v v16, v16, v0.t
4071; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
4072; RV32-NEXT:    vsrl.vi v24, v8, 1, v0.t
4073; RV32-NEXT:    csrr a0, vlenb
4074; RV32-NEXT:    li a1, 40
4075; RV32-NEXT:    mul a0, a0, a1
4076; RV32-NEXT:    add a0, sp, a0
4077; RV32-NEXT:    addi a0, a0, 48
4078; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
4079; RV32-NEXT:    vand.vv v16, v24, v16, v0.t
4080; RV32-NEXT:    vsub.vv v24, v8, v16, v0.t
4081; RV32-NEXT:    csrr a0, vlenb
4082; RV32-NEXT:    slli a0, a0, 5
4083; RV32-NEXT:    add a0, sp, a0
4084; RV32-NEXT:    addi a0, a0, 48
4085; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
4086; RV32-NEXT:    vand.vv v16, v24, v8, v0.t
4087; RV32-NEXT:    csrr a0, vlenb
4088; RV32-NEXT:    li a1, 40
4089; RV32-NEXT:    mul a0, a0, a1
4090; RV32-NEXT:    add a0, sp, a0
4091; RV32-NEXT:    addi a0, a0, 48
4092; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
4093; RV32-NEXT:    vsrl.vi v8, v24, 2, v0.t
4094; RV32-NEXT:    csrr a0, vlenb
4095; RV32-NEXT:    slli a0, a0, 5
4096; RV32-NEXT:    add a0, sp, a0
4097; RV32-NEXT:    addi a0, a0, 48
4098; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
4099; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
4100; RV32-NEXT:    csrr a0, vlenb
4101; RV32-NEXT:    li a1, 40
4102; RV32-NEXT:    mul a0, a0, a1
4103; RV32-NEXT:    add a0, sp, a0
4104; RV32-NEXT:    addi a0, a0, 48
4105; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
4106; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
4107; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
4108; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
4109; RV32-NEXT:    addi a0, sp, 48
4110; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
4111; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
4112; RV32-NEXT:    csrr a0, vlenb
4113; RV32-NEXT:    li a1, 24
4114; RV32-NEXT:    mul a0, a0, a1
4115; RV32-NEXT:    add a0, sp, a0
4116; RV32-NEXT:    addi a0, a0, 48
4117; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
4118; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
4119; RV32-NEXT:    vsrl.vx v16, v8, a2, v0.t
4120; RV32-NEXT:    csrr a0, vlenb
4121; RV32-NEXT:    slli a0, a0, 3
4122; RV32-NEXT:    add a0, sp, a0
4123; RV32-NEXT:    addi a0, a0, 48
4124; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
4125; RV32-NEXT:    csrr a0, vlenb
4126; RV32-NEXT:    li a1, 48
4127; RV32-NEXT:    mul a0, a0, a1
4128; RV32-NEXT:    add sp, sp, a0
4129; RV32-NEXT:    .cfi_def_cfa sp, 48
4130; RV32-NEXT:    addi sp, sp, 48
4131; RV32-NEXT:    .cfi_def_cfa_offset 0
4132; RV32-NEXT:    ret
4133;
4134; RV64-LABEL: vp_cttz_zero_undef_v32i64:
4135; RV64:       # %bb.0:
4136; RV64-NEXT:    addi sp, sp, -16
4137; RV64-NEXT:    .cfi_def_cfa_offset 16
4138; RV64-NEXT:    csrr a1, vlenb
4139; RV64-NEXT:    slli a1, a1, 4
4140; RV64-NEXT:    sub sp, sp, a1
4141; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
4142; RV64-NEXT:    csrr a1, vlenb
4143; RV64-NEXT:    slli a1, a1, 3
4144; RV64-NEXT:    add a1, sp, a1
4145; RV64-NEXT:    addi a1, a1, 16
4146; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
4147; RV64-NEXT:    li a1, 16
4148; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
4149; RV64-NEXT:    vslidedown.vi v24, v0, 2
4150; RV64-NEXT:    mv a4, a0
4151; RV64-NEXT:    bltu a0, a1, .LBB70_2
4152; RV64-NEXT:  # %bb.1:
4153; RV64-NEXT:    li a4, 16
4154; RV64-NEXT:  .LBB70_2:
4155; RV64-NEXT:    li a1, 1
4156; RV64-NEXT:    lui a2, 349525
4157; RV64-NEXT:    lui a3, 209715
4158; RV64-NEXT:    lui a5, 61681
4159; RV64-NEXT:    lui a6, 4112
4160; RV64-NEXT:    addiw a2, a2, 1365
4161; RV64-NEXT:    addiw a3, a3, 819
4162; RV64-NEXT:    addiw a7, a5, -241
4163; RV64-NEXT:    addiw t0, a6, 257
4164; RV64-NEXT:    slli a6, a2, 32
4165; RV64-NEXT:    add a6, a2, a6
4166; RV64-NEXT:    slli a5, a3, 32
4167; RV64-NEXT:    add a5, a3, a5
4168; RV64-NEXT:    slli a2, a7, 32
4169; RV64-NEXT:    add a2, a7, a2
4170; RV64-NEXT:    slli a3, t0, 32
4171; RV64-NEXT:    add a3, t0, a3
4172; RV64-NEXT:    addi a7, a0, -16
4173; RV64-NEXT:    sltu a0, a0, a7
4174; RV64-NEXT:    addi a0, a0, -1
4175; RV64-NEXT:    and a7, a0, a7
4176; RV64-NEXT:    li a0, 56
4177; RV64-NEXT:    vsetvli zero, a4, e64, m8, ta, ma
4178; RV64-NEXT:    vsub.vx v16, v8, a1, v0.t
4179; RV64-NEXT:    vnot.v v8, v8, v0.t
4180; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
4181; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
4182; RV64-NEXT:    vand.vx v16, v16, a6, v0.t
4183; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
4184; RV64-NEXT:    vand.vx v16, v8, a5, v0.t
4185; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
4186; RV64-NEXT:    vand.vx v8, v8, a5, v0.t
4187; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
4188; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
4189; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
4190; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
4191; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
4192; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
4193; RV64-NEXT:    addi a4, sp, 16
4194; RV64-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
4195; RV64-NEXT:    vmv1r.v v0, v24
4196; RV64-NEXT:    csrr a4, vlenb
4197; RV64-NEXT:    slli a4, a4, 3
4198; RV64-NEXT:    add a4, sp, a4
4199; RV64-NEXT:    addi a4, a4, 16
4200; RV64-NEXT:    vl8r.v v8, (a4) # Unknown-size Folded Reload
4201; RV64-NEXT:    vsetvli zero, a7, e64, m8, ta, ma
4202; RV64-NEXT:    vsub.vx v16, v8, a1, v0.t
4203; RV64-NEXT:    vnot.v v8, v8, v0.t
4204; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
4205; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
4206; RV64-NEXT:    vand.vx v16, v16, a6, v0.t
4207; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
4208; RV64-NEXT:    vand.vx v16, v8, a5, v0.t
4209; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
4210; RV64-NEXT:    vand.vx v8, v8, a5, v0.t
4211; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
4212; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
4213; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
4214; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
4215; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
4216; RV64-NEXT:    vsrl.vx v16, v8, a0, v0.t
4217; RV64-NEXT:    addi a0, sp, 16
4218; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
4219; RV64-NEXT:    csrr a0, vlenb
4220; RV64-NEXT:    slli a0, a0, 4
4221; RV64-NEXT:    add sp, sp, a0
4222; RV64-NEXT:    .cfi_def_cfa sp, 16
4223; RV64-NEXT:    addi sp, sp, 16
4224; RV64-NEXT:    .cfi_def_cfa_offset 0
4225; RV64-NEXT:    ret
4226  %v = call <32 x i64> @llvm.vp.cttz.v32i64(<32 x i64> %va, i1 true, <32 x i1> %m, i32 %evl)
4227  ret <32 x i64> %v
4228}
4229
4230define <32 x i64> @vp_cttz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
4231; RV32-LABEL: vp_cttz_zero_undef_v32i64_unmasked:
4232; RV32:       # %bb.0:
4233; RV32-NEXT:    addi sp, sp, -48
4234; RV32-NEXT:    .cfi_def_cfa_offset 48
4235; RV32-NEXT:    csrr a1, vlenb
4236; RV32-NEXT:    slli a1, a1, 3
4237; RV32-NEXT:    sub sp, sp, a1
4238; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb
4239; RV32-NEXT:    lui a1, 349525
4240; RV32-NEXT:    lui a2, 209715
4241; RV32-NEXT:    addi a1, a1, 1365
4242; RV32-NEXT:    sw a1, 40(sp)
4243; RV32-NEXT:    sw a1, 44(sp)
4244; RV32-NEXT:    lui a1, 61681
4245; RV32-NEXT:    addi a2, a2, 819
4246; RV32-NEXT:    sw a2, 32(sp)
4247; RV32-NEXT:    sw a2, 36(sp)
4248; RV32-NEXT:    lui a2, 4112
4249; RV32-NEXT:    addi a1, a1, -241
4250; RV32-NEXT:    sw a1, 24(sp)
4251; RV32-NEXT:    sw a1, 28(sp)
4252; RV32-NEXT:    li a3, 16
4253; RV32-NEXT:    addi a1, a2, 257
4254; RV32-NEXT:    sw a1, 16(sp)
4255; RV32-NEXT:    sw a1, 20(sp)
4256; RV32-NEXT:    mv a1, a0
4257; RV32-NEXT:    bltu a0, a3, .LBB71_2
4258; RV32-NEXT:  # %bb.1:
4259; RV32-NEXT:    li a1, 16
4260; RV32-NEXT:  .LBB71_2:
4261; RV32-NEXT:    li a2, 1
4262; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
4263; RV32-NEXT:    vnot.v v0, v8
4264; RV32-NEXT:    addi a3, sp, 40
4265; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
4266; RV32-NEXT:    vlse64.v v24, (a3), zero
4267; RV32-NEXT:    addi a3, a0, -16
4268; RV32-NEXT:    sltu a0, a0, a3
4269; RV32-NEXT:    addi a0, a0, -1
4270; RV32-NEXT:    and a0, a0, a3
4271; RV32-NEXT:    addi a3, sp, 32
4272; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
4273; RV32-NEXT:    vsub.vx v8, v8, a2
4274; RV32-NEXT:    vand.vv v8, v0, v8
4275; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4276; RV32-NEXT:    vsub.vx v0, v16, a2
4277; RV32-NEXT:    vnot.v v16, v16
4278; RV32-NEXT:    vand.vv v16, v16, v0
4279; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
4280; RV32-NEXT:    vsrl.vi v0, v8, 1
4281; RV32-NEXT:    vand.vv v0, v0, v24
4282; RV32-NEXT:    vsub.vv v0, v8, v0
4283; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4284; RV32-NEXT:    vsrl.vi v8, v16, 1
4285; RV32-NEXT:    vand.vv v24, v8, v24
4286; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
4287; RV32-NEXT:    vlse64.v v8, (a3), zero
4288; RV32-NEXT:    addi a2, sp, 24
4289; RV32-NEXT:    addi a3, sp, 16
4290; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4291; RV32-NEXT:    vsub.vv v16, v16, v24
4292; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
4293; RV32-NEXT:    vand.vv v24, v0, v8
4294; RV32-NEXT:    vsrl.vi v0, v0, 2
4295; RV32-NEXT:    vand.vv v0, v0, v8
4296; RV32-NEXT:    vadd.vv v24, v24, v0
4297; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4298; RV32-NEXT:    vand.vv v0, v16, v8
4299; RV32-NEXT:    vsrl.vi v16, v16, 2
4300; RV32-NEXT:    vand.vv v8, v16, v8
4301; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
4302; RV32-NEXT:    vsrl.vi v16, v24, 4
4303; RV32-NEXT:    vadd.vv v16, v24, v16
4304; RV32-NEXT:    addi a4, sp, 48
4305; RV32-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
4306; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
4307; RV32-NEXT:    vlse64.v v24, (a2), zero
4308; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4309; RV32-NEXT:    vadd.vv v8, v0, v8
4310; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
4311; RV32-NEXT:    vlse64.v v0, (a3), zero
4312; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4313; RV32-NEXT:    vsrl.vi v16, v8, 4
4314; RV32-NEXT:    vadd.vv v8, v8, v16
4315; RV32-NEXT:    addi a2, sp, 48
4316; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
4317; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
4318; RV32-NEXT:    vand.vv v16, v16, v24
4319; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4320; RV32-NEXT:    vand.vv v8, v8, v24
4321; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
4322; RV32-NEXT:    vmul.vv v16, v16, v0
4323; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4324; RV32-NEXT:    vmul.vv v24, v8, v0
4325; RV32-NEXT:    li a2, 56
4326; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
4327; RV32-NEXT:    vsrl.vx v8, v16, a2
4328; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4329; RV32-NEXT:    vsrl.vx v16, v24, a2
4330; RV32-NEXT:    csrr a0, vlenb
4331; RV32-NEXT:    slli a0, a0, 3
4332; RV32-NEXT:    add sp, sp, a0
4333; RV32-NEXT:    .cfi_def_cfa sp, 48
4334; RV32-NEXT:    addi sp, sp, 48
4335; RV32-NEXT:    .cfi_def_cfa_offset 0
4336; RV32-NEXT:    ret
4337;
4338; RV64-LABEL: vp_cttz_zero_undef_v32i64_unmasked:
4339; RV64:       # %bb.0:
4340; RV64-NEXT:    li a2, 16
4341; RV64-NEXT:    mv a1, a0
4342; RV64-NEXT:    bltu a0, a2, .LBB71_2
4343; RV64-NEXT:  # %bb.1:
4344; RV64-NEXT:    li a1, 16
4345; RV64-NEXT:  .LBB71_2:
4346; RV64-NEXT:    li a2, 1
4347; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
4348; RV64-NEXT:    vnot.v v24, v8
4349; RV64-NEXT:    lui a3, 349525
4350; RV64-NEXT:    lui a4, 209715
4351; RV64-NEXT:    lui a5, 61681
4352; RV64-NEXT:    lui a6, 4112
4353; RV64-NEXT:    addiw a3, a3, 1365
4354; RV64-NEXT:    addiw a4, a4, 819
4355; RV64-NEXT:    addiw a5, a5, -241
4356; RV64-NEXT:    addiw a6, a6, 257
4357; RV64-NEXT:    slli a7, a3, 32
4358; RV64-NEXT:    add a3, a3, a7
4359; RV64-NEXT:    slli a7, a4, 32
4360; RV64-NEXT:    add a4, a4, a7
4361; RV64-NEXT:    slli a7, a5, 32
4362; RV64-NEXT:    add a5, a5, a7
4363; RV64-NEXT:    slli a7, a6, 32
4364; RV64-NEXT:    add a6, a6, a7
4365; RV64-NEXT:    addi a7, a0, -16
4366; RV64-NEXT:    sltu a0, a0, a7
4367; RV64-NEXT:    addi a0, a0, -1
4368; RV64-NEXT:    and a0, a0, a7
4369; RV64-NEXT:    li a7, 56
4370; RV64-NEXT:    vsub.vx v8, v8, a2
4371; RV64-NEXT:    vand.vv v8, v24, v8
4372; RV64-NEXT:    vsrl.vi v24, v8, 1
4373; RV64-NEXT:    vand.vx v24, v24, a3
4374; RV64-NEXT:    vsub.vv v8, v8, v24
4375; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4376; RV64-NEXT:    vsub.vx v24, v16, a2
4377; RV64-NEXT:    vnot.v v16, v16
4378; RV64-NEXT:    vand.vv v16, v16, v24
4379; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
4380; RV64-NEXT:    vand.vx v24, v8, a4
4381; RV64-NEXT:    vsrl.vi v8, v8, 2
4382; RV64-NEXT:    vand.vx v8, v8, a4
4383; RV64-NEXT:    vadd.vv v8, v24, v8
4384; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4385; RV64-NEXT:    vsrl.vi v24, v16, 1
4386; RV64-NEXT:    vand.vx v24, v24, a3
4387; RV64-NEXT:    vsub.vv v16, v16, v24
4388; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
4389; RV64-NEXT:    vsrl.vi v24, v8, 4
4390; RV64-NEXT:    vadd.vv v8, v8, v24
4391; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4392; RV64-NEXT:    vand.vx v24, v16, a4
4393; RV64-NEXT:    vsrl.vi v16, v16, 2
4394; RV64-NEXT:    vand.vx v16, v16, a4
4395; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
4396; RV64-NEXT:    vand.vx v8, v8, a5
4397; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4398; RV64-NEXT:    vadd.vv v16, v24, v16
4399; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
4400; RV64-NEXT:    vmul.vx v8, v8, a6
4401; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4402; RV64-NEXT:    vsrl.vi v24, v16, 4
4403; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
4404; RV64-NEXT:    vsrl.vx v8, v8, a7
4405; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4406; RV64-NEXT:    vadd.vv v16, v16, v24
4407; RV64-NEXT:    vand.vx v16, v16, a5
4408; RV64-NEXT:    vmul.vx v16, v16, a6
4409; RV64-NEXT:    vsrl.vx v16, v16, a7
4410; RV64-NEXT:    ret
4411  %v = call <32 x i64> @llvm.vp.cttz.v32i64(<32 x i64> %va, i1 true, <32 x i1> splat (i1 true), i32 %evl)
4412  ret <32 x i64> %v
4413}
4414