xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll (revision b6c0f1bfa79a3a32d841ac5ab1f94c3aee3b5d90)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v,+m -target-abi=ilp32d \
3; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
4; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v,+m -target-abi=lp64d \
5; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
6; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb,+m -target-abi=ilp32d \
7; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
8; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb,+m -target-abi=lp64d \
9; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
10
11declare <vscale x 1 x i8> @llvm.vp.cttz.nxv1i8(<vscale x 1 x i8>, i1 immarg, <vscale x 1 x i1>, i32)
12
13define <vscale x 1 x i8> @vp_cttz_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
14; CHECK-LABEL: vp_cttz_nxv1i8:
15; CHECK:       # %bb.0:
16; CHECK-NEXT:    li a1, 1
17; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
18; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
19; CHECK-NEXT:    li a0, 85
20; CHECK-NEXT:    vnot.v v8, v8, v0.t
21; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
22; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
23; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
24; CHECK-NEXT:    li a0, 51
25; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
26; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
27; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
28; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
29; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
30; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
31; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
32; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
33; CHECK-NEXT:    ret
34;
35; CHECK-ZVBB-LABEL: vp_cttz_nxv1i8:
36; CHECK-ZVBB:       # %bb.0:
37; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
38; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
39; CHECK-ZVBB-NEXT:    ret
40  %v = call <vscale x 1 x i8> @llvm.vp.cttz.nxv1i8(<vscale x 1 x i8> %va, i1 false, <vscale x 1 x i1> %m, i32 %evl)
41  ret <vscale x 1 x i8> %v
42}
43
44define <vscale x 1 x i8> @vp_cttz_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zeroext %evl) {
45; CHECK-LABEL: vp_cttz_nxv1i8_unmasked:
46; CHECK:       # %bb.0:
47; CHECK-NEXT:    li a1, 1
48; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
49; CHECK-NEXT:    vnot.v v9, v8
50; CHECK-NEXT:    vsub.vx v8, v8, a1
51; CHECK-NEXT:    li a0, 85
52; CHECK-NEXT:    vand.vv v8, v9, v8
53; CHECK-NEXT:    vsrl.vi v9, v8, 1
54; CHECK-NEXT:    vand.vx v9, v9, a0
55; CHECK-NEXT:    li a0, 51
56; CHECK-NEXT:    vsub.vv v8, v8, v9
57; CHECK-NEXT:    vand.vx v9, v8, a0
58; CHECK-NEXT:    vsrl.vi v8, v8, 2
59; CHECK-NEXT:    vand.vx v8, v8, a0
60; CHECK-NEXT:    vadd.vv v8, v9, v8
61; CHECK-NEXT:    vsrl.vi v9, v8, 4
62; CHECK-NEXT:    vadd.vv v8, v8, v9
63; CHECK-NEXT:    vand.vi v8, v8, 15
64; CHECK-NEXT:    ret
65;
66; CHECK-ZVBB-LABEL: vp_cttz_nxv1i8_unmasked:
67; CHECK-ZVBB:       # %bb.0:
68; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
69; CHECK-ZVBB-NEXT:    vctz.v v8, v8
70; CHECK-ZVBB-NEXT:    ret
71  %v = call <vscale x 1 x i8> @llvm.vp.cttz.nxv1i8(<vscale x 1 x i8> %va, i1 false, <vscale x 1 x i1> splat (i1 true), i32 %evl)
72  ret <vscale x 1 x i8> %v
73}
74
75declare <vscale x 2 x i8> @llvm.vp.cttz.nxv2i8(<vscale x 2 x i8>, i1 immarg, <vscale x 2 x i1>, i32)
76
77define <vscale x 2 x i8> @vp_cttz_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
78; CHECK-LABEL: vp_cttz_nxv2i8:
79; CHECK:       # %bb.0:
80; CHECK-NEXT:    li a1, 1
81; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
82; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
83; CHECK-NEXT:    li a0, 85
84; CHECK-NEXT:    vnot.v v8, v8, v0.t
85; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
86; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
87; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
88; CHECK-NEXT:    li a0, 51
89; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
90; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
91; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
92; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
93; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
94; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
95; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
96; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
97; CHECK-NEXT:    ret
98;
99; CHECK-ZVBB-LABEL: vp_cttz_nxv2i8:
100; CHECK-ZVBB:       # %bb.0:
101; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
102; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
103; CHECK-ZVBB-NEXT:    ret
104  %v = call <vscale x 2 x i8> @llvm.vp.cttz.nxv2i8(<vscale x 2 x i8> %va, i1 false, <vscale x 2 x i1> %m, i32 %evl)
105  ret <vscale x 2 x i8> %v
106}
107
108define <vscale x 2 x i8> @vp_cttz_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zeroext %evl) {
109; CHECK-LABEL: vp_cttz_nxv2i8_unmasked:
110; CHECK:       # %bb.0:
111; CHECK-NEXT:    li a1, 1
112; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
113; CHECK-NEXT:    vnot.v v9, v8
114; CHECK-NEXT:    vsub.vx v8, v8, a1
115; CHECK-NEXT:    li a0, 85
116; CHECK-NEXT:    vand.vv v8, v9, v8
117; CHECK-NEXT:    vsrl.vi v9, v8, 1
118; CHECK-NEXT:    vand.vx v9, v9, a0
119; CHECK-NEXT:    li a0, 51
120; CHECK-NEXT:    vsub.vv v8, v8, v9
121; CHECK-NEXT:    vand.vx v9, v8, a0
122; CHECK-NEXT:    vsrl.vi v8, v8, 2
123; CHECK-NEXT:    vand.vx v8, v8, a0
124; CHECK-NEXT:    vadd.vv v8, v9, v8
125; CHECK-NEXT:    vsrl.vi v9, v8, 4
126; CHECK-NEXT:    vadd.vv v8, v8, v9
127; CHECK-NEXT:    vand.vi v8, v8, 15
128; CHECK-NEXT:    ret
129;
130; CHECK-ZVBB-LABEL: vp_cttz_nxv2i8_unmasked:
131; CHECK-ZVBB:       # %bb.0:
132; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
133; CHECK-ZVBB-NEXT:    vctz.v v8, v8
134; CHECK-ZVBB-NEXT:    ret
135  %v = call <vscale x 2 x i8> @llvm.vp.cttz.nxv2i8(<vscale x 2 x i8> %va, i1 false, <vscale x 2 x i1> splat (i1 true), i32 %evl)
136  ret <vscale x 2 x i8> %v
137}
138
139declare <vscale x 4 x i8> @llvm.vp.cttz.nxv4i8(<vscale x 4 x i8>, i1 immarg, <vscale x 4 x i1>, i32)
140
141define <vscale x 4 x i8> @vp_cttz_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
142; CHECK-LABEL: vp_cttz_nxv4i8:
143; CHECK:       # %bb.0:
144; CHECK-NEXT:    li a1, 1
145; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
146; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
147; CHECK-NEXT:    li a0, 85
148; CHECK-NEXT:    vnot.v v8, v8, v0.t
149; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
150; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
151; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
152; CHECK-NEXT:    li a0, 51
153; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
154; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
155; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
156; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
157; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
158; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
159; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
160; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
161; CHECK-NEXT:    ret
162;
163; CHECK-ZVBB-LABEL: vp_cttz_nxv4i8:
164; CHECK-ZVBB:       # %bb.0:
165; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
166; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
167; CHECK-ZVBB-NEXT:    ret
168  %v = call <vscale x 4 x i8> @llvm.vp.cttz.nxv4i8(<vscale x 4 x i8> %va, i1 false, <vscale x 4 x i1> %m, i32 %evl)
169  ret <vscale x 4 x i8> %v
170}
171
172define <vscale x 4 x i8> @vp_cttz_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zeroext %evl) {
173; CHECK-LABEL: vp_cttz_nxv4i8_unmasked:
174; CHECK:       # %bb.0:
175; CHECK-NEXT:    li a1, 1
176; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
177; CHECK-NEXT:    vnot.v v9, v8
178; CHECK-NEXT:    vsub.vx v8, v8, a1
179; CHECK-NEXT:    li a0, 85
180; CHECK-NEXT:    vand.vv v8, v9, v8
181; CHECK-NEXT:    vsrl.vi v9, v8, 1
182; CHECK-NEXT:    vand.vx v9, v9, a0
183; CHECK-NEXT:    li a0, 51
184; CHECK-NEXT:    vsub.vv v8, v8, v9
185; CHECK-NEXT:    vand.vx v9, v8, a0
186; CHECK-NEXT:    vsrl.vi v8, v8, 2
187; CHECK-NEXT:    vand.vx v8, v8, a0
188; CHECK-NEXT:    vadd.vv v8, v9, v8
189; CHECK-NEXT:    vsrl.vi v9, v8, 4
190; CHECK-NEXT:    vadd.vv v8, v8, v9
191; CHECK-NEXT:    vand.vi v8, v8, 15
192; CHECK-NEXT:    ret
193;
194; CHECK-ZVBB-LABEL: vp_cttz_nxv4i8_unmasked:
195; CHECK-ZVBB:       # %bb.0:
196; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
197; CHECK-ZVBB-NEXT:    vctz.v v8, v8
198; CHECK-ZVBB-NEXT:    ret
199  %v = call <vscale x 4 x i8> @llvm.vp.cttz.nxv4i8(<vscale x 4 x i8> %va, i1 false, <vscale x 4 x i1> splat (i1 true), i32 %evl)
200  ret <vscale x 4 x i8> %v
201}
202
203declare <vscale x 8 x i8> @llvm.vp.cttz.nxv8i8(<vscale x 8 x i8>, i1 immarg, <vscale x 8 x i1>, i32)
204
205define <vscale x 8 x i8> @vp_cttz_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
206; CHECK-LABEL: vp_cttz_nxv8i8:
207; CHECK:       # %bb.0:
208; CHECK-NEXT:    li a1, 1
209; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
210; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
211; CHECK-NEXT:    li a0, 85
212; CHECK-NEXT:    vnot.v v8, v8, v0.t
213; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
214; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
215; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
216; CHECK-NEXT:    li a0, 51
217; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
218; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
219; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
220; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
221; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
222; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
223; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
224; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
225; CHECK-NEXT:    ret
226;
227; CHECK-ZVBB-LABEL: vp_cttz_nxv8i8:
228; CHECK-ZVBB:       # %bb.0:
229; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
230; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
231; CHECK-ZVBB-NEXT:    ret
232  %v = call <vscale x 8 x i8> @llvm.vp.cttz.nxv8i8(<vscale x 8 x i8> %va, i1 false, <vscale x 8 x i1> %m, i32 %evl)
233  ret <vscale x 8 x i8> %v
234}
235
236define <vscale x 8 x i8> @vp_cttz_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zeroext %evl) {
237; CHECK-LABEL: vp_cttz_nxv8i8_unmasked:
238; CHECK:       # %bb.0:
239; CHECK-NEXT:    li a1, 1
240; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
241; CHECK-NEXT:    vnot.v v9, v8
242; CHECK-NEXT:    vsub.vx v8, v8, a1
243; CHECK-NEXT:    li a0, 85
244; CHECK-NEXT:    vand.vv v8, v9, v8
245; CHECK-NEXT:    vsrl.vi v9, v8, 1
246; CHECK-NEXT:    vand.vx v9, v9, a0
247; CHECK-NEXT:    li a0, 51
248; CHECK-NEXT:    vsub.vv v8, v8, v9
249; CHECK-NEXT:    vand.vx v9, v8, a0
250; CHECK-NEXT:    vsrl.vi v8, v8, 2
251; CHECK-NEXT:    vand.vx v8, v8, a0
252; CHECK-NEXT:    vadd.vv v8, v9, v8
253; CHECK-NEXT:    vsrl.vi v9, v8, 4
254; CHECK-NEXT:    vadd.vv v8, v8, v9
255; CHECK-NEXT:    vand.vi v8, v8, 15
256; CHECK-NEXT:    ret
257;
258; CHECK-ZVBB-LABEL: vp_cttz_nxv8i8_unmasked:
259; CHECK-ZVBB:       # %bb.0:
260; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
261; CHECK-ZVBB-NEXT:    vctz.v v8, v8
262; CHECK-ZVBB-NEXT:    ret
263  %v = call <vscale x 8 x i8> @llvm.vp.cttz.nxv8i8(<vscale x 8 x i8> %va, i1 false, <vscale x 8 x i1> splat (i1 true), i32 %evl)
264  ret <vscale x 8 x i8> %v
265}
266
267declare <vscale x 16 x i8> @llvm.vp.cttz.nxv16i8(<vscale x 16 x i8>, i1 immarg, <vscale x 16 x i1>, i32)
268
269define <vscale x 16 x i8> @vp_cttz_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
270; CHECK-LABEL: vp_cttz_nxv16i8:
271; CHECK:       # %bb.0:
272; CHECK-NEXT:    li a1, 1
273; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
274; CHECK-NEXT:    vsub.vx v10, v8, a1, v0.t
275; CHECK-NEXT:    li a0, 85
276; CHECK-NEXT:    vnot.v v8, v8, v0.t
277; CHECK-NEXT:    vand.vv v8, v8, v10, v0.t
278; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
279; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
280; CHECK-NEXT:    li a0, 51
281; CHECK-NEXT:    vsub.vv v8, v8, v10, v0.t
282; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
283; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
284; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
285; CHECK-NEXT:    vadd.vv v8, v10, v8, v0.t
286; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
287; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
288; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
289; CHECK-NEXT:    ret
290;
291; CHECK-ZVBB-LABEL: vp_cttz_nxv16i8:
292; CHECK-ZVBB:       # %bb.0:
293; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
294; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
295; CHECK-ZVBB-NEXT:    ret
296  %v = call <vscale x 16 x i8> @llvm.vp.cttz.nxv16i8(<vscale x 16 x i8> %va, i1 false, <vscale x 16 x i1> %m, i32 %evl)
297  ret <vscale x 16 x i8> %v
298}
299
300define <vscale x 16 x i8> @vp_cttz_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32 zeroext %evl) {
301; CHECK-LABEL: vp_cttz_nxv16i8_unmasked:
302; CHECK:       # %bb.0:
303; CHECK-NEXT:    li a1, 1
304; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
305; CHECK-NEXT:    vnot.v v10, v8
306; CHECK-NEXT:    vsub.vx v8, v8, a1
307; CHECK-NEXT:    li a0, 85
308; CHECK-NEXT:    vand.vv v8, v10, v8
309; CHECK-NEXT:    vsrl.vi v10, v8, 1
310; CHECK-NEXT:    vand.vx v10, v10, a0
311; CHECK-NEXT:    li a0, 51
312; CHECK-NEXT:    vsub.vv v8, v8, v10
313; CHECK-NEXT:    vand.vx v10, v8, a0
314; CHECK-NEXT:    vsrl.vi v8, v8, 2
315; CHECK-NEXT:    vand.vx v8, v8, a0
316; CHECK-NEXT:    vadd.vv v8, v10, v8
317; CHECK-NEXT:    vsrl.vi v10, v8, 4
318; CHECK-NEXT:    vadd.vv v8, v8, v10
319; CHECK-NEXT:    vand.vi v8, v8, 15
320; CHECK-NEXT:    ret
321;
322; CHECK-ZVBB-LABEL: vp_cttz_nxv16i8_unmasked:
323; CHECK-ZVBB:       # %bb.0:
324; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
325; CHECK-ZVBB-NEXT:    vctz.v v8, v8
326; CHECK-ZVBB-NEXT:    ret
327  %v = call <vscale x 16 x i8> @llvm.vp.cttz.nxv16i8(<vscale x 16 x i8> %va, i1 false, <vscale x 16 x i1> splat (i1 true), i32 %evl)
328  ret <vscale x 16 x i8> %v
329}
330
331declare <vscale x 32 x i8> @llvm.vp.cttz.nxv32i8(<vscale x 32 x i8>, i1 immarg, <vscale x 32 x i1>, i32)
332
333define <vscale x 32 x i8> @vp_cttz_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
334; CHECK-LABEL: vp_cttz_nxv32i8:
335; CHECK:       # %bb.0:
336; CHECK-NEXT:    li a1, 1
337; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
338; CHECK-NEXT:    vsub.vx v12, v8, a1, v0.t
339; CHECK-NEXT:    li a0, 85
340; CHECK-NEXT:    vnot.v v8, v8, v0.t
341; CHECK-NEXT:    vand.vv v8, v8, v12, v0.t
342; CHECK-NEXT:    vsrl.vi v12, v8, 1, v0.t
343; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
344; CHECK-NEXT:    li a0, 51
345; CHECK-NEXT:    vsub.vv v8, v8, v12, v0.t
346; CHECK-NEXT:    vand.vx v12, v8, a0, v0.t
347; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
348; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
349; CHECK-NEXT:    vadd.vv v8, v12, v8, v0.t
350; CHECK-NEXT:    vsrl.vi v12, v8, 4, v0.t
351; CHECK-NEXT:    vadd.vv v8, v8, v12, v0.t
352; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
353; CHECK-NEXT:    ret
354;
355; CHECK-ZVBB-LABEL: vp_cttz_nxv32i8:
356; CHECK-ZVBB:       # %bb.0:
357; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
358; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
359; CHECK-ZVBB-NEXT:    ret
360  %v = call <vscale x 32 x i8> @llvm.vp.cttz.nxv32i8(<vscale x 32 x i8> %va, i1 false, <vscale x 32 x i1> %m, i32 %evl)
361  ret <vscale x 32 x i8> %v
362}
363
364define <vscale x 32 x i8> @vp_cttz_nxv32i8_unmasked(<vscale x 32 x i8> %va, i32 zeroext %evl) {
365; CHECK-LABEL: vp_cttz_nxv32i8_unmasked:
366; CHECK:       # %bb.0:
367; CHECK-NEXT:    li a1, 1
368; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
369; CHECK-NEXT:    vnot.v v12, v8
370; CHECK-NEXT:    vsub.vx v8, v8, a1
371; CHECK-NEXT:    li a0, 85
372; CHECK-NEXT:    vand.vv v8, v12, v8
373; CHECK-NEXT:    vsrl.vi v12, v8, 1
374; CHECK-NEXT:    vand.vx v12, v12, a0
375; CHECK-NEXT:    li a0, 51
376; CHECK-NEXT:    vsub.vv v8, v8, v12
377; CHECK-NEXT:    vand.vx v12, v8, a0
378; CHECK-NEXT:    vsrl.vi v8, v8, 2
379; CHECK-NEXT:    vand.vx v8, v8, a0
380; CHECK-NEXT:    vadd.vv v8, v12, v8
381; CHECK-NEXT:    vsrl.vi v12, v8, 4
382; CHECK-NEXT:    vadd.vv v8, v8, v12
383; CHECK-NEXT:    vand.vi v8, v8, 15
384; CHECK-NEXT:    ret
385;
386; CHECK-ZVBB-LABEL: vp_cttz_nxv32i8_unmasked:
387; CHECK-ZVBB:       # %bb.0:
388; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
389; CHECK-ZVBB-NEXT:    vctz.v v8, v8
390; CHECK-ZVBB-NEXT:    ret
391  %v = call <vscale x 32 x i8> @llvm.vp.cttz.nxv32i8(<vscale x 32 x i8> %va, i1 false, <vscale x 32 x i1> splat (i1 true), i32 %evl)
392  ret <vscale x 32 x i8> %v
393}
394
395declare <vscale x 64 x i8> @llvm.vp.cttz.nxv64i8(<vscale x 64 x i8>, i1 immarg, <vscale x 64 x i1>, i32)
396
397define <vscale x 64 x i8> @vp_cttz_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) {
398; CHECK-LABEL: vp_cttz_nxv64i8:
399; CHECK:       # %bb.0:
400; CHECK-NEXT:    li a1, 1
401; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
402; CHECK-NEXT:    vsub.vx v16, v8, a1, v0.t
403; CHECK-NEXT:    li a0, 85
404; CHECK-NEXT:    vnot.v v8, v8, v0.t
405; CHECK-NEXT:    vand.vv v8, v8, v16, v0.t
406; CHECK-NEXT:    vsrl.vi v16, v8, 1, v0.t
407; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
408; CHECK-NEXT:    li a0, 51
409; CHECK-NEXT:    vsub.vv v8, v8, v16, v0.t
410; CHECK-NEXT:    vand.vx v16, v8, a0, v0.t
411; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
412; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
413; CHECK-NEXT:    vadd.vv v8, v16, v8, v0.t
414; CHECK-NEXT:    vsrl.vi v16, v8, 4, v0.t
415; CHECK-NEXT:    vadd.vv v8, v8, v16, v0.t
416; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
417; CHECK-NEXT:    ret
418;
419; CHECK-ZVBB-LABEL: vp_cttz_nxv64i8:
420; CHECK-ZVBB:       # %bb.0:
421; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
422; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
423; CHECK-ZVBB-NEXT:    ret
424  %v = call <vscale x 64 x i8> @llvm.vp.cttz.nxv64i8(<vscale x 64 x i8> %va, i1 false, <vscale x 64 x i1> %m, i32 %evl)
425  ret <vscale x 64 x i8> %v
426}
427
428define <vscale x 64 x i8> @vp_cttz_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32 zeroext %evl) {
429; CHECK-LABEL: vp_cttz_nxv64i8_unmasked:
430; CHECK:       # %bb.0:
431; CHECK-NEXT:    li a1, 1
432; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
433; CHECK-NEXT:    vnot.v v16, v8
434; CHECK-NEXT:    vsub.vx v8, v8, a1
435; CHECK-NEXT:    li a0, 85
436; CHECK-NEXT:    vand.vv v8, v16, v8
437; CHECK-NEXT:    vsrl.vi v16, v8, 1
438; CHECK-NEXT:    vand.vx v16, v16, a0
439; CHECK-NEXT:    li a0, 51
440; CHECK-NEXT:    vsub.vv v8, v8, v16
441; CHECK-NEXT:    vand.vx v16, v8, a0
442; CHECK-NEXT:    vsrl.vi v8, v8, 2
443; CHECK-NEXT:    vand.vx v8, v8, a0
444; CHECK-NEXT:    vadd.vv v8, v16, v8
445; CHECK-NEXT:    vsrl.vi v16, v8, 4
446; CHECK-NEXT:    vadd.vv v8, v8, v16
447; CHECK-NEXT:    vand.vi v8, v8, 15
448; CHECK-NEXT:    ret
449;
450; CHECK-ZVBB-LABEL: vp_cttz_nxv64i8_unmasked:
451; CHECK-ZVBB:       # %bb.0:
452; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
453; CHECK-ZVBB-NEXT:    vctz.v v8, v8
454; CHECK-ZVBB-NEXT:    ret
455  %v = call <vscale x 64 x i8> @llvm.vp.cttz.nxv64i8(<vscale x 64 x i8> %va, i1 false, <vscale x 64 x i1> splat (i1 true), i32 %evl)
456  ret <vscale x 64 x i8> %v
457}
458
459declare <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16>, i1 immarg, <vscale x 1 x i1>, i32)
460
461define <vscale x 1 x i16> @vp_cttz_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
462; CHECK-LABEL: vp_cttz_nxv1i16:
463; CHECK:       # %bb.0:
464; CHECK-NEXT:    li a1, 1
465; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
466; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
467; CHECK-NEXT:    lui a0, 5
468; CHECK-NEXT:    vnot.v v8, v8, v0.t
469; CHECK-NEXT:    addi a0, a0, 1365
470; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
471; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
472; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
473; CHECK-NEXT:    lui a0, 3
474; CHECK-NEXT:    addi a0, a0, 819
475; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
476; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
477; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
478; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
479; CHECK-NEXT:    lui a0, 1
480; CHECK-NEXT:    addi a0, a0, -241
481; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
482; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
483; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
484; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
485; CHECK-NEXT:    li a0, 257
486; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
487; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
488; CHECK-NEXT:    ret
489;
490; CHECK-ZVBB-LABEL: vp_cttz_nxv1i16:
491; CHECK-ZVBB:       # %bb.0:
492; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
493; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
494; CHECK-ZVBB-NEXT:    ret
495  %v = call <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16> %va, i1 false, <vscale x 1 x i1> %m, i32 %evl)
496  ret <vscale x 1 x i16> %v
497}
498
499define <vscale x 1 x i16> @vp_cttz_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) {
500; CHECK-LABEL: vp_cttz_nxv1i16_unmasked:
501; CHECK:       # %bb.0:
502; CHECK-NEXT:    li a1, 1
503; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
504; CHECK-NEXT:    vnot.v v9, v8
505; CHECK-NEXT:    vsub.vx v8, v8, a1
506; CHECK-NEXT:    lui a0, 5
507; CHECK-NEXT:    addi a0, a0, 1365
508; CHECK-NEXT:    vand.vv v8, v9, v8
509; CHECK-NEXT:    vsrl.vi v9, v8, 1
510; CHECK-NEXT:    vand.vx v9, v9, a0
511; CHECK-NEXT:    lui a0, 3
512; CHECK-NEXT:    addi a0, a0, 819
513; CHECK-NEXT:    vsub.vv v8, v8, v9
514; CHECK-NEXT:    vand.vx v9, v8, a0
515; CHECK-NEXT:    vsrl.vi v8, v8, 2
516; CHECK-NEXT:    vand.vx v8, v8, a0
517; CHECK-NEXT:    lui a0, 1
518; CHECK-NEXT:    addi a0, a0, -241
519; CHECK-NEXT:    vadd.vv v8, v9, v8
520; CHECK-NEXT:    vsrl.vi v9, v8, 4
521; CHECK-NEXT:    vadd.vv v8, v8, v9
522; CHECK-NEXT:    vand.vx v8, v8, a0
523; CHECK-NEXT:    li a0, 257
524; CHECK-NEXT:    vmul.vx v8, v8, a0
525; CHECK-NEXT:    vsrl.vi v8, v8, 8
526; CHECK-NEXT:    ret
527;
528; CHECK-ZVBB-LABEL: vp_cttz_nxv1i16_unmasked:
529; CHECK-ZVBB:       # %bb.0:
530; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
531; CHECK-ZVBB-NEXT:    vctz.v v8, v8
532; CHECK-ZVBB-NEXT:    ret
533  %v = call <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16> %va, i1 false, <vscale x 1 x i1> splat (i1 true), i32 %evl)
534  ret <vscale x 1 x i16> %v
535}
536
537declare <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16>, i1 immarg, <vscale x 2 x i1>, i32)
538
539define <vscale x 2 x i16> @vp_cttz_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
540; CHECK-LABEL: vp_cttz_nxv2i16:
541; CHECK:       # %bb.0:
542; CHECK-NEXT:    li a1, 1
543; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
544; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
545; CHECK-NEXT:    lui a0, 5
546; CHECK-NEXT:    vnot.v v8, v8, v0.t
547; CHECK-NEXT:    addi a0, a0, 1365
548; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
549; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
550; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
551; CHECK-NEXT:    lui a0, 3
552; CHECK-NEXT:    addi a0, a0, 819
553; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
554; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
555; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
556; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
557; CHECK-NEXT:    lui a0, 1
558; CHECK-NEXT:    addi a0, a0, -241
559; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
560; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
561; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
562; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
563; CHECK-NEXT:    li a0, 257
564; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
565; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
566; CHECK-NEXT:    ret
567;
568; CHECK-ZVBB-LABEL: vp_cttz_nxv2i16:
569; CHECK-ZVBB:       # %bb.0:
570; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
571; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
572; CHECK-ZVBB-NEXT:    ret
573  %v = call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> %va, i1 false, <vscale x 2 x i1> %m, i32 %evl)
574  ret <vscale x 2 x i16> %v
575}
576
577define <vscale x 2 x i16> @vp_cttz_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) {
578; CHECK-LABEL: vp_cttz_nxv2i16_unmasked:
579; CHECK:       # %bb.0:
580; CHECK-NEXT:    li a1, 1
581; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
582; CHECK-NEXT:    vnot.v v9, v8
583; CHECK-NEXT:    vsub.vx v8, v8, a1
584; CHECK-NEXT:    lui a0, 5
585; CHECK-NEXT:    addi a0, a0, 1365
586; CHECK-NEXT:    vand.vv v8, v9, v8
587; CHECK-NEXT:    vsrl.vi v9, v8, 1
588; CHECK-NEXT:    vand.vx v9, v9, a0
589; CHECK-NEXT:    lui a0, 3
590; CHECK-NEXT:    addi a0, a0, 819
591; CHECK-NEXT:    vsub.vv v8, v8, v9
592; CHECK-NEXT:    vand.vx v9, v8, a0
593; CHECK-NEXT:    vsrl.vi v8, v8, 2
594; CHECK-NEXT:    vand.vx v8, v8, a0
595; CHECK-NEXT:    lui a0, 1
596; CHECK-NEXT:    addi a0, a0, -241
597; CHECK-NEXT:    vadd.vv v8, v9, v8
598; CHECK-NEXT:    vsrl.vi v9, v8, 4
599; CHECK-NEXT:    vadd.vv v8, v8, v9
600; CHECK-NEXT:    vand.vx v8, v8, a0
601; CHECK-NEXT:    li a0, 257
602; CHECK-NEXT:    vmul.vx v8, v8, a0
603; CHECK-NEXT:    vsrl.vi v8, v8, 8
604; CHECK-NEXT:    ret
605;
606; CHECK-ZVBB-LABEL: vp_cttz_nxv2i16_unmasked:
607; CHECK-ZVBB:       # %bb.0:
608; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
609; CHECK-ZVBB-NEXT:    vctz.v v8, v8
610; CHECK-ZVBB-NEXT:    ret
611  %v = call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> %va, i1 false, <vscale x 2 x i1> splat (i1 true), i32 %evl)
612  ret <vscale x 2 x i16> %v
613}
614
615declare <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16>, i1 immarg, <vscale x 4 x i1>, i32)
616
617define <vscale x 4 x i16> @vp_cttz_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
618; CHECK-LABEL: vp_cttz_nxv4i16:
619; CHECK:       # %bb.0:
620; CHECK-NEXT:    li a1, 1
621; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
622; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
623; CHECK-NEXT:    lui a0, 5
624; CHECK-NEXT:    vnot.v v8, v8, v0.t
625; CHECK-NEXT:    addi a0, a0, 1365
626; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
627; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
628; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
629; CHECK-NEXT:    lui a0, 3
630; CHECK-NEXT:    addi a0, a0, 819
631; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
632; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
633; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
634; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
635; CHECK-NEXT:    lui a0, 1
636; CHECK-NEXT:    addi a0, a0, -241
637; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
638; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
639; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
640; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
641; CHECK-NEXT:    li a0, 257
642; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
643; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
644; CHECK-NEXT:    ret
645;
646; CHECK-ZVBB-LABEL: vp_cttz_nxv4i16:
647; CHECK-ZVBB:       # %bb.0:
648; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
649; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
650; CHECK-ZVBB-NEXT:    ret
651  %v = call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> %va, i1 false, <vscale x 4 x i1> %m, i32 %evl)
652  ret <vscale x 4 x i16> %v
653}
654
655define <vscale x 4 x i16> @vp_cttz_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) {
656; CHECK-LABEL: vp_cttz_nxv4i16_unmasked:
657; CHECK:       # %bb.0:
658; CHECK-NEXT:    li a1, 1
659; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
660; CHECK-NEXT:    vnot.v v9, v8
661; CHECK-NEXT:    vsub.vx v8, v8, a1
662; CHECK-NEXT:    lui a0, 5
663; CHECK-NEXT:    addi a0, a0, 1365
664; CHECK-NEXT:    vand.vv v8, v9, v8
665; CHECK-NEXT:    vsrl.vi v9, v8, 1
666; CHECK-NEXT:    vand.vx v9, v9, a0
667; CHECK-NEXT:    lui a0, 3
668; CHECK-NEXT:    addi a0, a0, 819
669; CHECK-NEXT:    vsub.vv v8, v8, v9
670; CHECK-NEXT:    vand.vx v9, v8, a0
671; CHECK-NEXT:    vsrl.vi v8, v8, 2
672; CHECK-NEXT:    vand.vx v8, v8, a0
673; CHECK-NEXT:    lui a0, 1
674; CHECK-NEXT:    addi a0, a0, -241
675; CHECK-NEXT:    vadd.vv v8, v9, v8
676; CHECK-NEXT:    vsrl.vi v9, v8, 4
677; CHECK-NEXT:    vadd.vv v8, v8, v9
678; CHECK-NEXT:    vand.vx v8, v8, a0
679; CHECK-NEXT:    li a0, 257
680; CHECK-NEXT:    vmul.vx v8, v8, a0
681; CHECK-NEXT:    vsrl.vi v8, v8, 8
682; CHECK-NEXT:    ret
683;
684; CHECK-ZVBB-LABEL: vp_cttz_nxv4i16_unmasked:
685; CHECK-ZVBB:       # %bb.0:
686; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
687; CHECK-ZVBB-NEXT:    vctz.v v8, v8
688; CHECK-ZVBB-NEXT:    ret
689  %v = call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> %va, i1 false, <vscale x 4 x i1> splat (i1 true), i32 %evl)
690  ret <vscale x 4 x i16> %v
691}
692
693declare <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16>, i1 immarg, <vscale x 8 x i1>, i32)
694
695define <vscale x 8 x i16> @vp_cttz_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
696; CHECK-LABEL: vp_cttz_nxv8i16:
697; CHECK:       # %bb.0:
698; CHECK-NEXT:    li a1, 1
699; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
700; CHECK-NEXT:    vsub.vx v10, v8, a1, v0.t
701; CHECK-NEXT:    lui a0, 5
702; CHECK-NEXT:    vnot.v v8, v8, v0.t
703; CHECK-NEXT:    addi a0, a0, 1365
704; CHECK-NEXT:    vand.vv v8, v8, v10, v0.t
705; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
706; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
707; CHECK-NEXT:    lui a0, 3
708; CHECK-NEXT:    addi a0, a0, 819
709; CHECK-NEXT:    vsub.vv v8, v8, v10, v0.t
710; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
711; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
712; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
713; CHECK-NEXT:    lui a0, 1
714; CHECK-NEXT:    addi a0, a0, -241
715; CHECK-NEXT:    vadd.vv v8, v10, v8, v0.t
716; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
717; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
718; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
719; CHECK-NEXT:    li a0, 257
720; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
721; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
722; CHECK-NEXT:    ret
723;
724; CHECK-ZVBB-LABEL: vp_cttz_nxv8i16:
725; CHECK-ZVBB:       # %bb.0:
726; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
727; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
728; CHECK-ZVBB-NEXT:    ret
729  %v = call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> %va, i1 false, <vscale x 8 x i1> %m, i32 %evl)
730  ret <vscale x 8 x i16> %v
731}
732
733define <vscale x 8 x i16> @vp_cttz_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) {
734; CHECK-LABEL: vp_cttz_nxv8i16_unmasked:
735; CHECK:       # %bb.0:
736; CHECK-NEXT:    li a1, 1
737; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
738; CHECK-NEXT:    vnot.v v10, v8
739; CHECK-NEXT:    vsub.vx v8, v8, a1
740; CHECK-NEXT:    lui a0, 5
741; CHECK-NEXT:    addi a0, a0, 1365
742; CHECK-NEXT:    vand.vv v8, v10, v8
743; CHECK-NEXT:    vsrl.vi v10, v8, 1
744; CHECK-NEXT:    vand.vx v10, v10, a0
745; CHECK-NEXT:    lui a0, 3
746; CHECK-NEXT:    addi a0, a0, 819
747; CHECK-NEXT:    vsub.vv v8, v8, v10
748; CHECK-NEXT:    vand.vx v10, v8, a0
749; CHECK-NEXT:    vsrl.vi v8, v8, 2
750; CHECK-NEXT:    vand.vx v8, v8, a0
751; CHECK-NEXT:    lui a0, 1
752; CHECK-NEXT:    addi a0, a0, -241
753; CHECK-NEXT:    vadd.vv v8, v10, v8
754; CHECK-NEXT:    vsrl.vi v10, v8, 4
755; CHECK-NEXT:    vadd.vv v8, v8, v10
756; CHECK-NEXT:    vand.vx v8, v8, a0
757; CHECK-NEXT:    li a0, 257
758; CHECK-NEXT:    vmul.vx v8, v8, a0
759; CHECK-NEXT:    vsrl.vi v8, v8, 8
760; CHECK-NEXT:    ret
761;
762; CHECK-ZVBB-LABEL: vp_cttz_nxv8i16_unmasked:
763; CHECK-ZVBB:       # %bb.0:
764; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
765; CHECK-ZVBB-NEXT:    vctz.v v8, v8
766; CHECK-ZVBB-NEXT:    ret
767  %v = call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> %va, i1 false, <vscale x 8 x i1> splat (i1 true), i32 %evl)
768  ret <vscale x 8 x i16> %v
769}
770
771declare <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16>, i1 immarg, <vscale x 16 x i1>, i32)
772
773define <vscale x 16 x i16> @vp_cttz_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
774; CHECK-LABEL: vp_cttz_nxv16i16:
775; CHECK:       # %bb.0:
776; CHECK-NEXT:    li a1, 1
777; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
778; CHECK-NEXT:    vsub.vx v12, v8, a1, v0.t
779; CHECK-NEXT:    lui a0, 5
780; CHECK-NEXT:    vnot.v v8, v8, v0.t
781; CHECK-NEXT:    addi a0, a0, 1365
782; CHECK-NEXT:    vand.vv v8, v8, v12, v0.t
783; CHECK-NEXT:    vsrl.vi v12, v8, 1, v0.t
784; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
785; CHECK-NEXT:    lui a0, 3
786; CHECK-NEXT:    addi a0, a0, 819
787; CHECK-NEXT:    vsub.vv v8, v8, v12, v0.t
788; CHECK-NEXT:    vand.vx v12, v8, a0, v0.t
789; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
790; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
791; CHECK-NEXT:    lui a0, 1
792; CHECK-NEXT:    addi a0, a0, -241
793; CHECK-NEXT:    vadd.vv v8, v12, v8, v0.t
794; CHECK-NEXT:    vsrl.vi v12, v8, 4, v0.t
795; CHECK-NEXT:    vadd.vv v8, v8, v12, v0.t
796; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
797; CHECK-NEXT:    li a0, 257
798; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
799; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
800; CHECK-NEXT:    ret
801;
802; CHECK-ZVBB-LABEL: vp_cttz_nxv16i16:
803; CHECK-ZVBB:       # %bb.0:
804; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
805; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
806; CHECK-ZVBB-NEXT:    ret
807  %v = call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> %va, i1 false, <vscale x 16 x i1> %m, i32 %evl)
808  ret <vscale x 16 x i16> %v
809}
810
811define <vscale x 16 x i16> @vp_cttz_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) {
812; CHECK-LABEL: vp_cttz_nxv16i16_unmasked:
813; CHECK:       # %bb.0:
814; CHECK-NEXT:    li a1, 1
815; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
816; CHECK-NEXT:    vnot.v v12, v8
817; CHECK-NEXT:    vsub.vx v8, v8, a1
818; CHECK-NEXT:    lui a0, 5
819; CHECK-NEXT:    addi a0, a0, 1365
820; CHECK-NEXT:    vand.vv v8, v12, v8
821; CHECK-NEXT:    vsrl.vi v12, v8, 1
822; CHECK-NEXT:    vand.vx v12, v12, a0
823; CHECK-NEXT:    lui a0, 3
824; CHECK-NEXT:    addi a0, a0, 819
825; CHECK-NEXT:    vsub.vv v8, v8, v12
826; CHECK-NEXT:    vand.vx v12, v8, a0
827; CHECK-NEXT:    vsrl.vi v8, v8, 2
828; CHECK-NEXT:    vand.vx v8, v8, a0
829; CHECK-NEXT:    lui a0, 1
830; CHECK-NEXT:    addi a0, a0, -241
831; CHECK-NEXT:    vadd.vv v8, v12, v8
832; CHECK-NEXT:    vsrl.vi v12, v8, 4
833; CHECK-NEXT:    vadd.vv v8, v8, v12
834; CHECK-NEXT:    vand.vx v8, v8, a0
835; CHECK-NEXT:    li a0, 257
836; CHECK-NEXT:    vmul.vx v8, v8, a0
837; CHECK-NEXT:    vsrl.vi v8, v8, 8
838; CHECK-NEXT:    ret
839;
840; CHECK-ZVBB-LABEL: vp_cttz_nxv16i16_unmasked:
841; CHECK-ZVBB:       # %bb.0:
842; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
843; CHECK-ZVBB-NEXT:    vctz.v v8, v8
844; CHECK-ZVBB-NEXT:    ret
845  %v = call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> %va, i1 false, <vscale x 16 x i1> splat (i1 true), i32 %evl)
846  ret <vscale x 16 x i16> %v
847}
848
849declare <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16>, i1 immarg, <vscale x 32 x i1>, i32)
850
851define <vscale x 32 x i16> @vp_cttz_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
852; CHECK-LABEL: vp_cttz_nxv32i16:
853; CHECK:       # %bb.0:
854; CHECK-NEXT:    li a1, 1
855; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
856; CHECK-NEXT:    vsub.vx v16, v8, a1, v0.t
857; CHECK-NEXT:    lui a0, 5
858; CHECK-NEXT:    vnot.v v8, v8, v0.t
859; CHECK-NEXT:    addi a0, a0, 1365
860; CHECK-NEXT:    vand.vv v8, v8, v16, v0.t
861; CHECK-NEXT:    vsrl.vi v16, v8, 1, v0.t
862; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
863; CHECK-NEXT:    lui a0, 3
864; CHECK-NEXT:    addi a0, a0, 819
865; CHECK-NEXT:    vsub.vv v16, v8, v16, v0.t
866; CHECK-NEXT:    vand.vx v8, v16, a0, v0.t
867; CHECK-NEXT:    vsrl.vi v16, v16, 2, v0.t
868; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
869; CHECK-NEXT:    lui a0, 1
870; CHECK-NEXT:    addi a0, a0, -241
871; CHECK-NEXT:    vadd.vv v8, v8, v16, v0.t
872; CHECK-NEXT:    vsrl.vi v16, v8, 4, v0.t
873; CHECK-NEXT:    vadd.vv v8, v8, v16, v0.t
874; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
875; CHECK-NEXT:    li a0, 257
876; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
877; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
878; CHECK-NEXT:    ret
879;
880; CHECK-ZVBB-LABEL: vp_cttz_nxv32i16:
881; CHECK-ZVBB:       # %bb.0:
882; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
883; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
884; CHECK-ZVBB-NEXT:    ret
885  %v = call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> %va, i1 false, <vscale x 32 x i1> %m, i32 %evl)
886  ret <vscale x 32 x i16> %v
887}
888
889define <vscale x 32 x i16> @vp_cttz_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) {
890; CHECK-LABEL: vp_cttz_nxv32i16_unmasked:
891; CHECK:       # %bb.0:
892; CHECK-NEXT:    li a1, 1
893; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
894; CHECK-NEXT:    vnot.v v16, v8
895; CHECK-NEXT:    vsub.vx v8, v8, a1
896; CHECK-NEXT:    lui a0, 5
897; CHECK-NEXT:    addi a0, a0, 1365
898; CHECK-NEXT:    vand.vv v8, v16, v8
899; CHECK-NEXT:    vsrl.vi v16, v8, 1
900; CHECK-NEXT:    vand.vx v16, v16, a0
901; CHECK-NEXT:    lui a0, 3
902; CHECK-NEXT:    addi a0, a0, 819
903; CHECK-NEXT:    vsub.vv v8, v8, v16
904; CHECK-NEXT:    vand.vx v16, v8, a0
905; CHECK-NEXT:    vsrl.vi v8, v8, 2
906; CHECK-NEXT:    vand.vx v8, v8, a0
907; CHECK-NEXT:    lui a0, 1
908; CHECK-NEXT:    addi a0, a0, -241
909; CHECK-NEXT:    vadd.vv v8, v16, v8
910; CHECK-NEXT:    vsrl.vi v16, v8, 4
911; CHECK-NEXT:    vadd.vv v8, v8, v16
912; CHECK-NEXT:    vand.vx v8, v8, a0
913; CHECK-NEXT:    li a0, 257
914; CHECK-NEXT:    vmul.vx v8, v8, a0
915; CHECK-NEXT:    vsrl.vi v8, v8, 8
916; CHECK-NEXT:    ret
917;
918; CHECK-ZVBB-LABEL: vp_cttz_nxv32i16_unmasked:
919; CHECK-ZVBB:       # %bb.0:
920; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
921; CHECK-ZVBB-NEXT:    vctz.v v8, v8
922; CHECK-ZVBB-NEXT:    ret
923  %v = call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> %va, i1 false, <vscale x 32 x i1> splat (i1 true), i32 %evl)
924  ret <vscale x 32 x i16> %v
925}
926
927declare <vscale x 1 x i32> @llvm.vp.cttz.nxv1i32(<vscale x 1 x i32>, i1 immarg, <vscale x 1 x i1>, i32)
928
929define <vscale x 1 x i32> @vp_cttz_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
930; CHECK-LABEL: vp_cttz_nxv1i32:
931; CHECK:       # %bb.0:
932; CHECK-NEXT:    li a1, 1
933; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
934; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
935; CHECK-NEXT:    lui a0, 349525
936; CHECK-NEXT:    vnot.v v8, v8, v0.t
937; CHECK-NEXT:    addi a0, a0, 1365
938; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
939; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
940; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
941; CHECK-NEXT:    lui a0, 209715
942; CHECK-NEXT:    addi a0, a0, 819
943; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
944; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
945; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
946; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
947; CHECK-NEXT:    lui a0, 61681
948; CHECK-NEXT:    addi a0, a0, -241
949; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
950; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
951; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
952; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
953; CHECK-NEXT:    lui a0, 4112
954; CHECK-NEXT:    addi a0, a0, 257
955; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
956; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
957; CHECK-NEXT:    ret
958;
959; CHECK-ZVBB-LABEL: vp_cttz_nxv1i32:
960; CHECK-ZVBB:       # %bb.0:
961; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
962; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
963; CHECK-ZVBB-NEXT:    ret
964  %v = call <vscale x 1 x i32> @llvm.vp.cttz.nxv1i32(<vscale x 1 x i32> %va, i1 false, <vscale x 1 x i1> %m, i32 %evl)
965  ret <vscale x 1 x i32> %v
966}
967
968define <vscale x 1 x i32> @vp_cttz_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) {
969; CHECK-LABEL: vp_cttz_nxv1i32_unmasked:
970; CHECK:       # %bb.0:
971; CHECK-NEXT:    li a1, 1
972; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
973; CHECK-NEXT:    vnot.v v9, v8
974; CHECK-NEXT:    vsub.vx v8, v8, a1
975; CHECK-NEXT:    lui a0, 349525
976; CHECK-NEXT:    addi a0, a0, 1365
977; CHECK-NEXT:    vand.vv v8, v9, v8
978; CHECK-NEXT:    vsrl.vi v9, v8, 1
979; CHECK-NEXT:    vand.vx v9, v9, a0
980; CHECK-NEXT:    lui a0, 209715
981; CHECK-NEXT:    addi a0, a0, 819
982; CHECK-NEXT:    vsub.vv v8, v8, v9
983; CHECK-NEXT:    vand.vx v9, v8, a0
984; CHECK-NEXT:    vsrl.vi v8, v8, 2
985; CHECK-NEXT:    vand.vx v8, v8, a0
986; CHECK-NEXT:    lui a0, 61681
987; CHECK-NEXT:    addi a0, a0, -241
988; CHECK-NEXT:    vadd.vv v8, v9, v8
989; CHECK-NEXT:    vsrl.vi v9, v8, 4
990; CHECK-NEXT:    vadd.vv v8, v8, v9
991; CHECK-NEXT:    vand.vx v8, v8, a0
992; CHECK-NEXT:    lui a0, 4112
993; CHECK-NEXT:    addi a0, a0, 257
994; CHECK-NEXT:    vmul.vx v8, v8, a0
995; CHECK-NEXT:    vsrl.vi v8, v8, 24
996; CHECK-NEXT:    ret
997;
998; CHECK-ZVBB-LABEL: vp_cttz_nxv1i32_unmasked:
999; CHECK-ZVBB:       # %bb.0:
1000; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
1001; CHECK-ZVBB-NEXT:    vctz.v v8, v8
1002; CHECK-ZVBB-NEXT:    ret
1003  %v = call <vscale x 1 x i32> @llvm.vp.cttz.nxv1i32(<vscale x 1 x i32> %va, i1 false, <vscale x 1 x i1> splat (i1 true), i32 %evl)
1004  ret <vscale x 1 x i32> %v
1005}
1006
1007declare <vscale x 2 x i32> @llvm.vp.cttz.nxv2i32(<vscale x 2 x i32>, i1 immarg, <vscale x 2 x i1>, i32)
1008
1009define <vscale x 2 x i32> @vp_cttz_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1010; CHECK-LABEL: vp_cttz_nxv2i32:
1011; CHECK:       # %bb.0:
1012; CHECK-NEXT:    li a1, 1
1013; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1014; CHECK-NEXT:    vsub.vx v9, v8, a1, v0.t
1015; CHECK-NEXT:    lui a0, 349525
1016; CHECK-NEXT:    vnot.v v8, v8, v0.t
1017; CHECK-NEXT:    addi a0, a0, 1365
1018; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
1019; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
1020; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
1021; CHECK-NEXT:    lui a0, 209715
1022; CHECK-NEXT:    addi a0, a0, 819
1023; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
1024; CHECK-NEXT:    vand.vx v9, v8, a0, v0.t
1025; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
1026; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
1027; CHECK-NEXT:    lui a0, 61681
1028; CHECK-NEXT:    addi a0, a0, -241
1029; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
1030; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
1031; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
1032; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
1033; CHECK-NEXT:    lui a0, 4112
1034; CHECK-NEXT:    addi a0, a0, 257
1035; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
1036; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
1037; CHECK-NEXT:    ret
1038;
1039; CHECK-ZVBB-LABEL: vp_cttz_nxv2i32:
1040; CHECK-ZVBB:       # %bb.0:
1041; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1042; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
1043; CHECK-ZVBB-NEXT:    ret
1044  %v = call <vscale x 2 x i32> @llvm.vp.cttz.nxv2i32(<vscale x 2 x i32> %va, i1 false, <vscale x 2 x i1> %m, i32 %evl)
1045  ret <vscale x 2 x i32> %v
1046}
1047
1048define <vscale x 2 x i32> @vp_cttz_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) {
1049; CHECK-LABEL: vp_cttz_nxv2i32_unmasked:
1050; CHECK:       # %bb.0:
1051; CHECK-NEXT:    li a1, 1
1052; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1053; CHECK-NEXT:    vnot.v v9, v8
1054; CHECK-NEXT:    vsub.vx v8, v8, a1
1055; CHECK-NEXT:    lui a0, 349525
1056; CHECK-NEXT:    addi a0, a0, 1365
1057; CHECK-NEXT:    vand.vv v8, v9, v8
1058; CHECK-NEXT:    vsrl.vi v9, v8, 1
1059; CHECK-NEXT:    vand.vx v9, v9, a0
1060; CHECK-NEXT:    lui a0, 209715
1061; CHECK-NEXT:    addi a0, a0, 819
1062; CHECK-NEXT:    vsub.vv v8, v8, v9
1063; CHECK-NEXT:    vand.vx v9, v8, a0
1064; CHECK-NEXT:    vsrl.vi v8, v8, 2
1065; CHECK-NEXT:    vand.vx v8, v8, a0
1066; CHECK-NEXT:    lui a0, 61681
1067; CHECK-NEXT:    addi a0, a0, -241
1068; CHECK-NEXT:    vadd.vv v8, v9, v8
1069; CHECK-NEXT:    vsrl.vi v9, v8, 4
1070; CHECK-NEXT:    vadd.vv v8, v8, v9
1071; CHECK-NEXT:    vand.vx v8, v8, a0
1072; CHECK-NEXT:    lui a0, 4112
1073; CHECK-NEXT:    addi a0, a0, 257
1074; CHECK-NEXT:    vmul.vx v8, v8, a0
1075; CHECK-NEXT:    vsrl.vi v8, v8, 24
1076; CHECK-NEXT:    ret
1077;
1078; CHECK-ZVBB-LABEL: vp_cttz_nxv2i32_unmasked:
1079; CHECK-ZVBB:       # %bb.0:
1080; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1081; CHECK-ZVBB-NEXT:    vctz.v v8, v8
1082; CHECK-ZVBB-NEXT:    ret
1083  %v = call <vscale x 2 x i32> @llvm.vp.cttz.nxv2i32(<vscale x 2 x i32> %va, i1 false, <vscale x 2 x i1> splat (i1 true), i32 %evl)
1084  ret <vscale x 2 x i32> %v
1085}
1086
1087declare <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32>, i1 immarg, <vscale x 4 x i1>, i32)
1088
1089define <vscale x 4 x i32> @vp_cttz_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1090; CHECK-LABEL: vp_cttz_nxv4i32:
1091; CHECK:       # %bb.0:
1092; CHECK-NEXT:    li a1, 1
1093; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1094; CHECK-NEXT:    vsub.vx v10, v8, a1, v0.t
1095; CHECK-NEXT:    lui a0, 349525
1096; CHECK-NEXT:    vnot.v v8, v8, v0.t
1097; CHECK-NEXT:    addi a0, a0, 1365
1098; CHECK-NEXT:    vand.vv v8, v8, v10, v0.t
1099; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
1100; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
1101; CHECK-NEXT:    lui a0, 209715
1102; CHECK-NEXT:    addi a0, a0, 819
1103; CHECK-NEXT:    vsub.vv v8, v8, v10, v0.t
1104; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
1105; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
1106; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
1107; CHECK-NEXT:    lui a0, 61681
1108; CHECK-NEXT:    addi a0, a0, -241
1109; CHECK-NEXT:    vadd.vv v8, v10, v8, v0.t
1110; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
1111; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
1112; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
1113; CHECK-NEXT:    lui a0, 4112
1114; CHECK-NEXT:    addi a0, a0, 257
1115; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
1116; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
1117; CHECK-NEXT:    ret
1118;
1119; CHECK-ZVBB-LABEL: vp_cttz_nxv4i32:
1120; CHECK-ZVBB:       # %bb.0:
1121; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1122; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
1123; CHECK-ZVBB-NEXT:    ret
1124  %v = call <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32> %va, i1 false, <vscale x 4 x i1> %m, i32 %evl)
1125  ret <vscale x 4 x i32> %v
1126}
1127
1128define <vscale x 4 x i32> @vp_cttz_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) {
1129; CHECK-LABEL: vp_cttz_nxv4i32_unmasked:
1130; CHECK:       # %bb.0:
1131; CHECK-NEXT:    li a1, 1
1132; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1133; CHECK-NEXT:    vnot.v v10, v8
1134; CHECK-NEXT:    vsub.vx v8, v8, a1
1135; CHECK-NEXT:    lui a0, 349525
1136; CHECK-NEXT:    addi a0, a0, 1365
1137; CHECK-NEXT:    vand.vv v8, v10, v8
1138; CHECK-NEXT:    vsrl.vi v10, v8, 1
1139; CHECK-NEXT:    vand.vx v10, v10, a0
1140; CHECK-NEXT:    lui a0, 209715
1141; CHECK-NEXT:    addi a0, a0, 819
1142; CHECK-NEXT:    vsub.vv v8, v8, v10
1143; CHECK-NEXT:    vand.vx v10, v8, a0
1144; CHECK-NEXT:    vsrl.vi v8, v8, 2
1145; CHECK-NEXT:    vand.vx v8, v8, a0
1146; CHECK-NEXT:    lui a0, 61681
1147; CHECK-NEXT:    addi a0, a0, -241
1148; CHECK-NEXT:    vadd.vv v8, v10, v8
1149; CHECK-NEXT:    vsrl.vi v10, v8, 4
1150; CHECK-NEXT:    vadd.vv v8, v8, v10
1151; CHECK-NEXT:    vand.vx v8, v8, a0
1152; CHECK-NEXT:    lui a0, 4112
1153; CHECK-NEXT:    addi a0, a0, 257
1154; CHECK-NEXT:    vmul.vx v8, v8, a0
1155; CHECK-NEXT:    vsrl.vi v8, v8, 24
1156; CHECK-NEXT:    ret
1157;
1158; CHECK-ZVBB-LABEL: vp_cttz_nxv4i32_unmasked:
1159; CHECK-ZVBB:       # %bb.0:
1160; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1161; CHECK-ZVBB-NEXT:    vctz.v v8, v8
1162; CHECK-ZVBB-NEXT:    ret
1163  %v = call <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32> %va, i1 false, <vscale x 4 x i1> splat (i1 true), i32 %evl)
1164  ret <vscale x 4 x i32> %v
1165}
1166
1167declare <vscale x 8 x i32> @llvm.vp.cttz.nxv8i32(<vscale x 8 x i32>, i1 immarg, <vscale x 8 x i1>, i32)
1168
1169define <vscale x 8 x i32> @vp_cttz_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1170; CHECK-LABEL: vp_cttz_nxv8i32:
1171; CHECK:       # %bb.0:
1172; CHECK-NEXT:    li a1, 1
1173; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
1174; CHECK-NEXT:    vsub.vx v12, v8, a1, v0.t
1175; CHECK-NEXT:    lui a0, 349525
1176; CHECK-NEXT:    vnot.v v8, v8, v0.t
1177; CHECK-NEXT:    addi a0, a0, 1365
1178; CHECK-NEXT:    vand.vv v8, v8, v12, v0.t
1179; CHECK-NEXT:    vsrl.vi v12, v8, 1, v0.t
1180; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
1181; CHECK-NEXT:    lui a0, 209715
1182; CHECK-NEXT:    addi a0, a0, 819
1183; CHECK-NEXT:    vsub.vv v8, v8, v12, v0.t
1184; CHECK-NEXT:    vand.vx v12, v8, a0, v0.t
1185; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
1186; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
1187; CHECK-NEXT:    lui a0, 61681
1188; CHECK-NEXT:    addi a0, a0, -241
1189; CHECK-NEXT:    vadd.vv v8, v12, v8, v0.t
1190; CHECK-NEXT:    vsrl.vi v12, v8, 4, v0.t
1191; CHECK-NEXT:    vadd.vv v8, v8, v12, v0.t
1192; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
1193; CHECK-NEXT:    lui a0, 4112
1194; CHECK-NEXT:    addi a0, a0, 257
1195; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
1196; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
1197; CHECK-NEXT:    ret
1198;
1199; CHECK-ZVBB-LABEL: vp_cttz_nxv8i32:
1200; CHECK-ZVBB:       # %bb.0:
1201; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
1202; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
1203; CHECK-ZVBB-NEXT:    ret
1204  %v = call <vscale x 8 x i32> @llvm.vp.cttz.nxv8i32(<vscale x 8 x i32> %va, i1 false, <vscale x 8 x i1> %m, i32 %evl)
1205  ret <vscale x 8 x i32> %v
1206}
1207
1208define <vscale x 8 x i32> @vp_cttz_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) {
1209; CHECK-LABEL: vp_cttz_nxv8i32_unmasked:
1210; CHECK:       # %bb.0:
1211; CHECK-NEXT:    li a1, 1
1212; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
1213; CHECK-NEXT:    vnot.v v12, v8
1214; CHECK-NEXT:    vsub.vx v8, v8, a1
1215; CHECK-NEXT:    lui a0, 349525
1216; CHECK-NEXT:    addi a0, a0, 1365
1217; CHECK-NEXT:    vand.vv v8, v12, v8
1218; CHECK-NEXT:    vsrl.vi v12, v8, 1
1219; CHECK-NEXT:    vand.vx v12, v12, a0
1220; CHECK-NEXT:    lui a0, 209715
1221; CHECK-NEXT:    addi a0, a0, 819
1222; CHECK-NEXT:    vsub.vv v8, v8, v12
1223; CHECK-NEXT:    vand.vx v12, v8, a0
1224; CHECK-NEXT:    vsrl.vi v8, v8, 2
1225; CHECK-NEXT:    vand.vx v8, v8, a0
1226; CHECK-NEXT:    lui a0, 61681
1227; CHECK-NEXT:    addi a0, a0, -241
1228; CHECK-NEXT:    vadd.vv v8, v12, v8
1229; CHECK-NEXT:    vsrl.vi v12, v8, 4
1230; CHECK-NEXT:    vadd.vv v8, v8, v12
1231; CHECK-NEXT:    vand.vx v8, v8, a0
1232; CHECK-NEXT:    lui a0, 4112
1233; CHECK-NEXT:    addi a0, a0, 257
1234; CHECK-NEXT:    vmul.vx v8, v8, a0
1235; CHECK-NEXT:    vsrl.vi v8, v8, 24
1236; CHECK-NEXT:    ret
1237;
1238; CHECK-ZVBB-LABEL: vp_cttz_nxv8i32_unmasked:
1239; CHECK-ZVBB:       # %bb.0:
1240; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
1241; CHECK-ZVBB-NEXT:    vctz.v v8, v8
1242; CHECK-ZVBB-NEXT:    ret
1243  %v = call <vscale x 8 x i32> @llvm.vp.cttz.nxv8i32(<vscale x 8 x i32> %va, i1 false, <vscale x 8 x i1> splat (i1 true), i32 %evl)
1244  ret <vscale x 8 x i32> %v
1245}
1246
1247declare <vscale x 16 x i32> @llvm.vp.cttz.nxv16i32(<vscale x 16 x i32>, i1 immarg, <vscale x 16 x i1>, i32)
1248
1249define <vscale x 16 x i32> @vp_cttz_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
1250; CHECK-LABEL: vp_cttz_nxv16i32:
1251; CHECK:       # %bb.0:
1252; CHECK-NEXT:    li a1, 1
1253; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
1254; CHECK-NEXT:    vsub.vx v16, v8, a1, v0.t
1255; CHECK-NEXT:    lui a0, 349525
1256; CHECK-NEXT:    vnot.v v8, v8, v0.t
1257; CHECK-NEXT:    addi a0, a0, 1365
1258; CHECK-NEXT:    vand.vv v8, v8, v16, v0.t
1259; CHECK-NEXT:    vsrl.vi v16, v8, 1, v0.t
1260; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
1261; CHECK-NEXT:    lui a0, 209715
1262; CHECK-NEXT:    addi a0, a0, 819
1263; CHECK-NEXT:    vsub.vv v16, v8, v16, v0.t
1264; CHECK-NEXT:    vand.vx v8, v16, a0, v0.t
1265; CHECK-NEXT:    vsrl.vi v16, v16, 2, v0.t
1266; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
1267; CHECK-NEXT:    lui a0, 61681
1268; CHECK-NEXT:    addi a0, a0, -241
1269; CHECK-NEXT:    vadd.vv v8, v8, v16, v0.t
1270; CHECK-NEXT:    vsrl.vi v16, v8, 4, v0.t
1271; CHECK-NEXT:    vadd.vv v8, v8, v16, v0.t
1272; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
1273; CHECK-NEXT:    lui a0, 4112
1274; CHECK-NEXT:    addi a0, a0, 257
1275; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
1276; CHECK-NEXT:    vsrl.vi v8, v8, 24, v0.t
1277; CHECK-NEXT:    ret
1278;
1279; CHECK-ZVBB-LABEL: vp_cttz_nxv16i32:
1280; CHECK-ZVBB:       # %bb.0:
1281; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
1282; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
1283; CHECK-ZVBB-NEXT:    ret
1284  %v = call <vscale x 16 x i32> @llvm.vp.cttz.nxv16i32(<vscale x 16 x i32> %va, i1 false, <vscale x 16 x i1> %m, i32 %evl)
1285  ret <vscale x 16 x i32> %v
1286}
1287
1288define <vscale x 16 x i32> @vp_cttz_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) {
1289; CHECK-LABEL: vp_cttz_nxv16i32_unmasked:
1290; CHECK:       # %bb.0:
1291; CHECK-NEXT:    li a1, 1
1292; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
1293; CHECK-NEXT:    vnot.v v16, v8
1294; CHECK-NEXT:    vsub.vx v8, v8, a1
1295; CHECK-NEXT:    lui a0, 349525
1296; CHECK-NEXT:    addi a0, a0, 1365
1297; CHECK-NEXT:    vand.vv v8, v16, v8
1298; CHECK-NEXT:    vsrl.vi v16, v8, 1
1299; CHECK-NEXT:    vand.vx v16, v16, a0
1300; CHECK-NEXT:    lui a0, 209715
1301; CHECK-NEXT:    addi a0, a0, 819
1302; CHECK-NEXT:    vsub.vv v8, v8, v16
1303; CHECK-NEXT:    vand.vx v16, v8, a0
1304; CHECK-NEXT:    vsrl.vi v8, v8, 2
1305; CHECK-NEXT:    vand.vx v8, v8, a0
1306; CHECK-NEXT:    lui a0, 61681
1307; CHECK-NEXT:    addi a0, a0, -241
1308; CHECK-NEXT:    vadd.vv v8, v16, v8
1309; CHECK-NEXT:    vsrl.vi v16, v8, 4
1310; CHECK-NEXT:    vadd.vv v8, v8, v16
1311; CHECK-NEXT:    vand.vx v8, v8, a0
1312; CHECK-NEXT:    lui a0, 4112
1313; CHECK-NEXT:    addi a0, a0, 257
1314; CHECK-NEXT:    vmul.vx v8, v8, a0
1315; CHECK-NEXT:    vsrl.vi v8, v8, 24
1316; CHECK-NEXT:    ret
1317;
1318; CHECK-ZVBB-LABEL: vp_cttz_nxv16i32_unmasked:
1319; CHECK-ZVBB:       # %bb.0:
1320; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
1321; CHECK-ZVBB-NEXT:    vctz.v v8, v8
1322; CHECK-ZVBB-NEXT:    ret
1323  %v = call <vscale x 16 x i32> @llvm.vp.cttz.nxv16i32(<vscale x 16 x i32> %va, i1 false, <vscale x 16 x i1> splat (i1 true), i32 %evl)
1324  ret <vscale x 16 x i32> %v
1325}
1326
1327declare <vscale x 1 x i64> @llvm.vp.cttz.nxv1i64(<vscale x 1 x i64>, i1 immarg, <vscale x 1 x i1>, i32)
1328
1329define <vscale x 1 x i64> @vp_cttz_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1330; RV32-LABEL: vp_cttz_nxv1i64:
1331; RV32:       # %bb.0:
1332; RV32-NEXT:    li a1, 1
1333; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1334; RV32-NEXT:    vsub.vx v9, v8, a1, v0.t
1335; RV32-NEXT:    lui a1, 349525
1336; RV32-NEXT:    addi a1, a1, 1365
1337; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
1338; RV32-NEXT:    vmv.v.x v10, a1
1339; RV32-NEXT:    lui a1, 209715
1340; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1341; RV32-NEXT:    vnot.v v8, v8, v0.t
1342; RV32-NEXT:    addi a1, a1, 819
1343; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
1344; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
1345; RV32-NEXT:    vand.vv v9, v9, v10, v0.t
1346; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
1347; RV32-NEXT:    vmv.v.x v10, a1
1348; RV32-NEXT:    lui a1, 61681
1349; RV32-NEXT:    addi a1, a1, -241
1350; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1351; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
1352; RV32-NEXT:    vand.vv v9, v8, v10, v0.t
1353; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
1354; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
1355; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
1356; RV32-NEXT:    vmv.v.x v10, a1
1357; RV32-NEXT:    lui a1, 4112
1358; RV32-NEXT:    addi a1, a1, 257
1359; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1360; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
1361; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
1362; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
1363; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
1364; RV32-NEXT:    vmv.v.x v9, a1
1365; RV32-NEXT:    li a1, 56
1366; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1367; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
1368; RV32-NEXT:    vmul.vv v8, v8, v9, v0.t
1369; RV32-NEXT:    vsrl.vx v8, v8, a1, v0.t
1370; RV32-NEXT:    ret
1371;
1372; RV64-LABEL: vp_cttz_nxv1i64:
1373; RV64:       # %bb.0:
1374; RV64-NEXT:    li a1, 1
1375; RV64-NEXT:    lui a2, 349525
1376; RV64-NEXT:    lui a3, 209715
1377; RV64-NEXT:    lui a4, 61681
1378; RV64-NEXT:    lui a5, 4112
1379; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1380; RV64-NEXT:    vsub.vx v9, v8, a1, v0.t
1381; RV64-NEXT:    addiw a0, a2, 1365
1382; RV64-NEXT:    addiw a1, a3, 819
1383; RV64-NEXT:    addiw a2, a4, -241
1384; RV64-NEXT:    addiw a3, a5, 257
1385; RV64-NEXT:    slli a4, a0, 32
1386; RV64-NEXT:    add a0, a0, a4
1387; RV64-NEXT:    slli a4, a1, 32
1388; RV64-NEXT:    add a1, a1, a4
1389; RV64-NEXT:    slli a4, a2, 32
1390; RV64-NEXT:    add a2, a2, a4
1391; RV64-NEXT:    slli a4, a3, 32
1392; RV64-NEXT:    add a3, a3, a4
1393; RV64-NEXT:    vnot.v v8, v8, v0.t
1394; RV64-NEXT:    vand.vv v8, v8, v9, v0.t
1395; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
1396; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
1397; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
1398; RV64-NEXT:    vand.vx v9, v8, a1, v0.t
1399; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
1400; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
1401; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
1402; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
1403; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
1404; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
1405; RV64-NEXT:    li a0, 56
1406; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
1407; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
1408; RV64-NEXT:    ret
1409;
1410; CHECK-ZVBB-LABEL: vp_cttz_nxv1i64:
1411; CHECK-ZVBB:       # %bb.0:
1412; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1413; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
1414; CHECK-ZVBB-NEXT:    ret
1415  %v = call <vscale x 1 x i64> @llvm.vp.cttz.nxv1i64(<vscale x 1 x i64> %va, i1 false, <vscale x 1 x i1> %m, i32 %evl)
1416  ret <vscale x 1 x i64> %v
1417}
1418
1419define <vscale x 1 x i64> @vp_cttz_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) {
1420; RV32-LABEL: vp_cttz_nxv1i64_unmasked:
1421; RV32:       # %bb.0:
1422; RV32-NEXT:    li a1, 1
1423; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1424; RV32-NEXT:    vnot.v v9, v8
1425; RV32-NEXT:    vsub.vx v8, v8, a1
1426; RV32-NEXT:    lui a1, 349525
1427; RV32-NEXT:    addi a1, a1, 1365
1428; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
1429; RV32-NEXT:    vmv.v.x v10, a1
1430; RV32-NEXT:    lui a1, 209715
1431; RV32-NEXT:    addi a1, a1, 819
1432; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1433; RV32-NEXT:    vand.vv v8, v9, v8
1434; RV32-NEXT:    vsrl.vi v9, v8, 1
1435; RV32-NEXT:    vand.vv v9, v9, v10
1436; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
1437; RV32-NEXT:    vmv.v.x v10, a1
1438; RV32-NEXT:    lui a1, 61681
1439; RV32-NEXT:    addi a1, a1, -241
1440; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1441; RV32-NEXT:    vsub.vv v8, v8, v9
1442; RV32-NEXT:    vand.vv v9, v8, v10
1443; RV32-NEXT:    vsrl.vi v8, v8, 2
1444; RV32-NEXT:    vand.vv v8, v8, v10
1445; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
1446; RV32-NEXT:    vmv.v.x v10, a1
1447; RV32-NEXT:    lui a1, 4112
1448; RV32-NEXT:    addi a1, a1, 257
1449; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1450; RV32-NEXT:    vadd.vv v8, v9, v8
1451; RV32-NEXT:    vsrl.vi v9, v8, 4
1452; RV32-NEXT:    vadd.vv v8, v8, v9
1453; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
1454; RV32-NEXT:    vmv.v.x v9, a1
1455; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1456; RV32-NEXT:    vand.vv v8, v8, v10
1457; RV32-NEXT:    vmul.vv v8, v8, v9
1458; RV32-NEXT:    li a0, 56
1459; RV32-NEXT:    vsrl.vx v8, v8, a0
1460; RV32-NEXT:    ret
1461;
1462; RV64-LABEL: vp_cttz_nxv1i64_unmasked:
1463; RV64:       # %bb.0:
1464; RV64-NEXT:    li a1, 1
1465; RV64-NEXT:    lui a2, 349525
1466; RV64-NEXT:    lui a3, 209715
1467; RV64-NEXT:    lui a4, 61681
1468; RV64-NEXT:    lui a5, 4112
1469; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1470; RV64-NEXT:    vsub.vx v9, v8, a1
1471; RV64-NEXT:    addiw a0, a2, 1365
1472; RV64-NEXT:    addiw a1, a3, 819
1473; RV64-NEXT:    addiw a2, a4, -241
1474; RV64-NEXT:    addiw a3, a5, 257
1475; RV64-NEXT:    slli a4, a0, 32
1476; RV64-NEXT:    add a0, a0, a4
1477; RV64-NEXT:    slli a4, a1, 32
1478; RV64-NEXT:    add a1, a1, a4
1479; RV64-NEXT:    slli a4, a2, 32
1480; RV64-NEXT:    add a2, a2, a4
1481; RV64-NEXT:    slli a4, a3, 32
1482; RV64-NEXT:    add a3, a3, a4
1483; RV64-NEXT:    vnot.v v8, v8
1484; RV64-NEXT:    vand.vv v8, v8, v9
1485; RV64-NEXT:    vsrl.vi v9, v8, 1
1486; RV64-NEXT:    vand.vx v9, v9, a0
1487; RV64-NEXT:    vsub.vv v8, v8, v9
1488; RV64-NEXT:    vand.vx v9, v8, a1
1489; RV64-NEXT:    vsrl.vi v8, v8, 2
1490; RV64-NEXT:    vand.vx v8, v8, a1
1491; RV64-NEXT:    vadd.vv v8, v9, v8
1492; RV64-NEXT:    vsrl.vi v9, v8, 4
1493; RV64-NEXT:    vadd.vv v8, v8, v9
1494; RV64-NEXT:    vand.vx v8, v8, a2
1495; RV64-NEXT:    vmul.vx v8, v8, a3
1496; RV64-NEXT:    li a0, 56
1497; RV64-NEXT:    vsrl.vx v8, v8, a0
1498; RV64-NEXT:    ret
1499;
1500; CHECK-ZVBB-LABEL: vp_cttz_nxv1i64_unmasked:
1501; CHECK-ZVBB:       # %bb.0:
1502; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1503; CHECK-ZVBB-NEXT:    vctz.v v8, v8
1504; CHECK-ZVBB-NEXT:    ret
1505  %v = call <vscale x 1 x i64> @llvm.vp.cttz.nxv1i64(<vscale x 1 x i64> %va, i1 false, <vscale x 1 x i1> splat (i1 true), i32 %evl)
1506  ret <vscale x 1 x i64> %v
1507}
1508
1509declare <vscale x 2 x i64> @llvm.vp.cttz.nxv2i64(<vscale x 2 x i64>, i1 immarg, <vscale x 2 x i1>, i32)
1510
1511define <vscale x 2 x i64> @vp_cttz_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1512; RV32-LABEL: vp_cttz_nxv2i64:
1513; RV32:       # %bb.0:
1514; RV32-NEXT:    li a1, 1
1515; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1516; RV32-NEXT:    vsub.vx v10, v8, a1, v0.t
1517; RV32-NEXT:    lui a1, 349525
1518; RV32-NEXT:    addi a1, a1, 1365
1519; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
1520; RV32-NEXT:    vmv.v.x v12, a1
1521; RV32-NEXT:    lui a1, 209715
1522; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1523; RV32-NEXT:    vnot.v v8, v8, v0.t
1524; RV32-NEXT:    addi a1, a1, 819
1525; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
1526; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
1527; RV32-NEXT:    vand.vv v10, v10, v12, v0.t
1528; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
1529; RV32-NEXT:    vmv.v.x v12, a1
1530; RV32-NEXT:    lui a1, 61681
1531; RV32-NEXT:    addi a1, a1, -241
1532; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1533; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
1534; RV32-NEXT:    vand.vv v10, v8, v12, v0.t
1535; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
1536; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
1537; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
1538; RV32-NEXT:    vmv.v.x v12, a1
1539; RV32-NEXT:    lui a1, 4112
1540; RV32-NEXT:    addi a1, a1, 257
1541; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1542; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
1543; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
1544; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
1545; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
1546; RV32-NEXT:    vmv.v.x v10, a1
1547; RV32-NEXT:    li a1, 56
1548; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1549; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
1550; RV32-NEXT:    vmul.vv v8, v8, v10, v0.t
1551; RV32-NEXT:    vsrl.vx v8, v8, a1, v0.t
1552; RV32-NEXT:    ret
1553;
1554; RV64-LABEL: vp_cttz_nxv2i64:
1555; RV64:       # %bb.0:
1556; RV64-NEXT:    li a1, 1
1557; RV64-NEXT:    lui a2, 349525
1558; RV64-NEXT:    lui a3, 209715
1559; RV64-NEXT:    lui a4, 61681
1560; RV64-NEXT:    lui a5, 4112
1561; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1562; RV64-NEXT:    vsub.vx v10, v8, a1, v0.t
1563; RV64-NEXT:    addiw a0, a2, 1365
1564; RV64-NEXT:    addiw a1, a3, 819
1565; RV64-NEXT:    addiw a2, a4, -241
1566; RV64-NEXT:    addiw a3, a5, 257
1567; RV64-NEXT:    slli a4, a0, 32
1568; RV64-NEXT:    add a0, a0, a4
1569; RV64-NEXT:    slli a4, a1, 32
1570; RV64-NEXT:    add a1, a1, a4
1571; RV64-NEXT:    slli a4, a2, 32
1572; RV64-NEXT:    add a2, a2, a4
1573; RV64-NEXT:    slli a4, a3, 32
1574; RV64-NEXT:    add a3, a3, a4
1575; RV64-NEXT:    vnot.v v8, v8, v0.t
1576; RV64-NEXT:    vand.vv v8, v8, v10, v0.t
1577; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
1578; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
1579; RV64-NEXT:    vsub.vv v8, v8, v10, v0.t
1580; RV64-NEXT:    vand.vx v10, v8, a1, v0.t
1581; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
1582; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
1583; RV64-NEXT:    vadd.vv v8, v10, v8, v0.t
1584; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
1585; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
1586; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
1587; RV64-NEXT:    li a0, 56
1588; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
1589; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
1590; RV64-NEXT:    ret
1591;
1592; CHECK-ZVBB-LABEL: vp_cttz_nxv2i64:
1593; CHECK-ZVBB:       # %bb.0:
1594; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1595; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
1596; CHECK-ZVBB-NEXT:    ret
1597  %v = call <vscale x 2 x i64> @llvm.vp.cttz.nxv2i64(<vscale x 2 x i64> %va, i1 false, <vscale x 2 x i1> %m, i32 %evl)
1598  ret <vscale x 2 x i64> %v
1599}
1600
1601define <vscale x 2 x i64> @vp_cttz_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) {
1602; RV32-LABEL: vp_cttz_nxv2i64_unmasked:
1603; RV32:       # %bb.0:
1604; RV32-NEXT:    li a1, 1
1605; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1606; RV32-NEXT:    vnot.v v10, v8
1607; RV32-NEXT:    vsub.vx v8, v8, a1
1608; RV32-NEXT:    lui a1, 349525
1609; RV32-NEXT:    addi a1, a1, 1365
1610; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
1611; RV32-NEXT:    vmv.v.x v12, a1
1612; RV32-NEXT:    lui a1, 209715
1613; RV32-NEXT:    addi a1, a1, 819
1614; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1615; RV32-NEXT:    vand.vv v8, v10, v8
1616; RV32-NEXT:    vsrl.vi v10, v8, 1
1617; RV32-NEXT:    vand.vv v10, v10, v12
1618; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
1619; RV32-NEXT:    vmv.v.x v12, a1
1620; RV32-NEXT:    lui a1, 61681
1621; RV32-NEXT:    addi a1, a1, -241
1622; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1623; RV32-NEXT:    vsub.vv v8, v8, v10
1624; RV32-NEXT:    vand.vv v10, v8, v12
1625; RV32-NEXT:    vsrl.vi v8, v8, 2
1626; RV32-NEXT:    vand.vv v8, v8, v12
1627; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
1628; RV32-NEXT:    vmv.v.x v12, a1
1629; RV32-NEXT:    lui a1, 4112
1630; RV32-NEXT:    addi a1, a1, 257
1631; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1632; RV32-NEXT:    vadd.vv v8, v10, v8
1633; RV32-NEXT:    vsrl.vi v10, v8, 4
1634; RV32-NEXT:    vadd.vv v8, v8, v10
1635; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
1636; RV32-NEXT:    vmv.v.x v10, a1
1637; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1638; RV32-NEXT:    vand.vv v8, v8, v12
1639; RV32-NEXT:    vmul.vv v8, v8, v10
1640; RV32-NEXT:    li a0, 56
1641; RV32-NEXT:    vsrl.vx v8, v8, a0
1642; RV32-NEXT:    ret
1643;
1644; RV64-LABEL: vp_cttz_nxv2i64_unmasked:
1645; RV64:       # %bb.0:
1646; RV64-NEXT:    li a1, 1
1647; RV64-NEXT:    lui a2, 349525
1648; RV64-NEXT:    lui a3, 209715
1649; RV64-NEXT:    lui a4, 61681
1650; RV64-NEXT:    lui a5, 4112
1651; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1652; RV64-NEXT:    vsub.vx v10, v8, a1
1653; RV64-NEXT:    addiw a0, a2, 1365
1654; RV64-NEXT:    addiw a1, a3, 819
1655; RV64-NEXT:    addiw a2, a4, -241
1656; RV64-NEXT:    addiw a3, a5, 257
1657; RV64-NEXT:    slli a4, a0, 32
1658; RV64-NEXT:    add a0, a0, a4
1659; RV64-NEXT:    slli a4, a1, 32
1660; RV64-NEXT:    add a1, a1, a4
1661; RV64-NEXT:    slli a4, a2, 32
1662; RV64-NEXT:    add a2, a2, a4
1663; RV64-NEXT:    slli a4, a3, 32
1664; RV64-NEXT:    add a3, a3, a4
1665; RV64-NEXT:    vnot.v v8, v8
1666; RV64-NEXT:    vand.vv v8, v8, v10
1667; RV64-NEXT:    vsrl.vi v10, v8, 1
1668; RV64-NEXT:    vand.vx v10, v10, a0
1669; RV64-NEXT:    vsub.vv v8, v8, v10
1670; RV64-NEXT:    vand.vx v10, v8, a1
1671; RV64-NEXT:    vsrl.vi v8, v8, 2
1672; RV64-NEXT:    vand.vx v8, v8, a1
1673; RV64-NEXT:    vadd.vv v8, v10, v8
1674; RV64-NEXT:    vsrl.vi v10, v8, 4
1675; RV64-NEXT:    vadd.vv v8, v8, v10
1676; RV64-NEXT:    vand.vx v8, v8, a2
1677; RV64-NEXT:    vmul.vx v8, v8, a3
1678; RV64-NEXT:    li a0, 56
1679; RV64-NEXT:    vsrl.vx v8, v8, a0
1680; RV64-NEXT:    ret
1681;
1682; CHECK-ZVBB-LABEL: vp_cttz_nxv2i64_unmasked:
1683; CHECK-ZVBB:       # %bb.0:
1684; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1685; CHECK-ZVBB-NEXT:    vctz.v v8, v8
1686; CHECK-ZVBB-NEXT:    ret
1687  %v = call <vscale x 2 x i64> @llvm.vp.cttz.nxv2i64(<vscale x 2 x i64> %va, i1 false, <vscale x 2 x i1> splat (i1 true), i32 %evl)
1688  ret <vscale x 2 x i64> %v
1689}
1690
1691declare <vscale x 4 x i64> @llvm.vp.cttz.nxv4i64(<vscale x 4 x i64>, i1 immarg, <vscale x 4 x i1>, i32)
1692
1693define <vscale x 4 x i64> @vp_cttz_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1694; RV32-LABEL: vp_cttz_nxv4i64:
1695; RV32:       # %bb.0:
1696; RV32-NEXT:    li a1, 1
1697; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1698; RV32-NEXT:    vsub.vx v12, v8, a1, v0.t
1699; RV32-NEXT:    lui a1, 349525
1700; RV32-NEXT:    addi a1, a1, 1365
1701; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
1702; RV32-NEXT:    vmv.v.x v16, a1
1703; RV32-NEXT:    lui a1, 209715
1704; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1705; RV32-NEXT:    vnot.v v8, v8, v0.t
1706; RV32-NEXT:    addi a1, a1, 819
1707; RV32-NEXT:    vand.vv v12, v8, v12, v0.t
1708; RV32-NEXT:    vsrl.vi v8, v12, 1, v0.t
1709; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
1710; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
1711; RV32-NEXT:    vmv.v.x v8, a1
1712; RV32-NEXT:    lui a1, 61681
1713; RV32-NEXT:    addi a1, a1, -241
1714; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1715; RV32-NEXT:    vsub.vv v12, v12, v16, v0.t
1716; RV32-NEXT:    vand.vv v16, v12, v8, v0.t
1717; RV32-NEXT:    vsrl.vi v12, v12, 2, v0.t
1718; RV32-NEXT:    vand.vv v8, v12, v8, v0.t
1719; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
1720; RV32-NEXT:    vmv.v.x v12, a1
1721; RV32-NEXT:    lui a1, 4112
1722; RV32-NEXT:    addi a1, a1, 257
1723; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1724; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
1725; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
1726; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
1727; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
1728; RV32-NEXT:    vmv.v.x v16, a1
1729; RV32-NEXT:    li a1, 56
1730; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1731; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
1732; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
1733; RV32-NEXT:    vsrl.vx v8, v8, a1, v0.t
1734; RV32-NEXT:    ret
1735;
1736; RV64-LABEL: vp_cttz_nxv4i64:
1737; RV64:       # %bb.0:
1738; RV64-NEXT:    li a1, 1
1739; RV64-NEXT:    lui a2, 349525
1740; RV64-NEXT:    lui a3, 209715
1741; RV64-NEXT:    lui a4, 61681
1742; RV64-NEXT:    lui a5, 4112
1743; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1744; RV64-NEXT:    vsub.vx v12, v8, a1, v0.t
1745; RV64-NEXT:    addiw a0, a2, 1365
1746; RV64-NEXT:    addiw a1, a3, 819
1747; RV64-NEXT:    addiw a2, a4, -241
1748; RV64-NEXT:    addiw a3, a5, 257
1749; RV64-NEXT:    slli a4, a0, 32
1750; RV64-NEXT:    add a0, a0, a4
1751; RV64-NEXT:    slli a4, a1, 32
1752; RV64-NEXT:    add a1, a1, a4
1753; RV64-NEXT:    slli a4, a2, 32
1754; RV64-NEXT:    add a2, a2, a4
1755; RV64-NEXT:    slli a4, a3, 32
1756; RV64-NEXT:    add a3, a3, a4
1757; RV64-NEXT:    vnot.v v8, v8, v0.t
1758; RV64-NEXT:    vand.vv v8, v8, v12, v0.t
1759; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
1760; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
1761; RV64-NEXT:    vsub.vv v8, v8, v12, v0.t
1762; RV64-NEXT:    vand.vx v12, v8, a1, v0.t
1763; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
1764; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
1765; RV64-NEXT:    vadd.vv v8, v12, v8, v0.t
1766; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
1767; RV64-NEXT:    vadd.vv v8, v8, v12, v0.t
1768; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
1769; RV64-NEXT:    li a0, 56
1770; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
1771; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
1772; RV64-NEXT:    ret
1773;
1774; CHECK-ZVBB-LABEL: vp_cttz_nxv4i64:
1775; CHECK-ZVBB:       # %bb.0:
1776; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1777; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
1778; CHECK-ZVBB-NEXT:    ret
1779  %v = call <vscale x 4 x i64> @llvm.vp.cttz.nxv4i64(<vscale x 4 x i64> %va, i1 false, <vscale x 4 x i1> %m, i32 %evl)
1780  ret <vscale x 4 x i64> %v
1781}
1782
1783define <vscale x 4 x i64> @vp_cttz_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) {
1784; RV32-LABEL: vp_cttz_nxv4i64_unmasked:
1785; RV32:       # %bb.0:
1786; RV32-NEXT:    li a1, 1
1787; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1788; RV32-NEXT:    vnot.v v12, v8
1789; RV32-NEXT:    vsub.vx v8, v8, a1
1790; RV32-NEXT:    lui a1, 349525
1791; RV32-NEXT:    addi a1, a1, 1365
1792; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
1793; RV32-NEXT:    vmv.v.x v16, a1
1794; RV32-NEXT:    lui a1, 209715
1795; RV32-NEXT:    addi a1, a1, 819
1796; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1797; RV32-NEXT:    vand.vv v8, v12, v8
1798; RV32-NEXT:    vsrl.vi v12, v8, 1
1799; RV32-NEXT:    vand.vv v12, v12, v16
1800; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
1801; RV32-NEXT:    vmv.v.x v16, a1
1802; RV32-NEXT:    lui a1, 61681
1803; RV32-NEXT:    addi a1, a1, -241
1804; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1805; RV32-NEXT:    vsub.vv v8, v8, v12
1806; RV32-NEXT:    vand.vv v12, v8, v16
1807; RV32-NEXT:    vsrl.vi v8, v8, 2
1808; RV32-NEXT:    vand.vv v8, v8, v16
1809; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
1810; RV32-NEXT:    vmv.v.x v16, a1
1811; RV32-NEXT:    lui a1, 4112
1812; RV32-NEXT:    addi a1, a1, 257
1813; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1814; RV32-NEXT:    vadd.vv v8, v12, v8
1815; RV32-NEXT:    vsrl.vi v12, v8, 4
1816; RV32-NEXT:    vadd.vv v8, v8, v12
1817; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
1818; RV32-NEXT:    vmv.v.x v12, a1
1819; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1820; RV32-NEXT:    vand.vv v8, v8, v16
1821; RV32-NEXT:    vmul.vv v8, v8, v12
1822; RV32-NEXT:    li a0, 56
1823; RV32-NEXT:    vsrl.vx v8, v8, a0
1824; RV32-NEXT:    ret
1825;
1826; RV64-LABEL: vp_cttz_nxv4i64_unmasked:
1827; RV64:       # %bb.0:
1828; RV64-NEXT:    li a1, 1
1829; RV64-NEXT:    lui a2, 349525
1830; RV64-NEXT:    lui a3, 209715
1831; RV64-NEXT:    lui a4, 61681
1832; RV64-NEXT:    lui a5, 4112
1833; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1834; RV64-NEXT:    vsub.vx v12, v8, a1
1835; RV64-NEXT:    addiw a0, a2, 1365
1836; RV64-NEXT:    addiw a1, a3, 819
1837; RV64-NEXT:    addiw a2, a4, -241
1838; RV64-NEXT:    addiw a3, a5, 257
1839; RV64-NEXT:    slli a4, a0, 32
1840; RV64-NEXT:    add a0, a0, a4
1841; RV64-NEXT:    slli a4, a1, 32
1842; RV64-NEXT:    add a1, a1, a4
1843; RV64-NEXT:    slli a4, a2, 32
1844; RV64-NEXT:    add a2, a2, a4
1845; RV64-NEXT:    slli a4, a3, 32
1846; RV64-NEXT:    add a3, a3, a4
1847; RV64-NEXT:    vnot.v v8, v8
1848; RV64-NEXT:    vand.vv v8, v8, v12
1849; RV64-NEXT:    vsrl.vi v12, v8, 1
1850; RV64-NEXT:    vand.vx v12, v12, a0
1851; RV64-NEXT:    vsub.vv v8, v8, v12
1852; RV64-NEXT:    vand.vx v12, v8, a1
1853; RV64-NEXT:    vsrl.vi v8, v8, 2
1854; RV64-NEXT:    vand.vx v8, v8, a1
1855; RV64-NEXT:    vadd.vv v8, v12, v8
1856; RV64-NEXT:    vsrl.vi v12, v8, 4
1857; RV64-NEXT:    vadd.vv v8, v8, v12
1858; RV64-NEXT:    vand.vx v8, v8, a2
1859; RV64-NEXT:    vmul.vx v8, v8, a3
1860; RV64-NEXT:    li a0, 56
1861; RV64-NEXT:    vsrl.vx v8, v8, a0
1862; RV64-NEXT:    ret
1863;
1864; CHECK-ZVBB-LABEL: vp_cttz_nxv4i64_unmasked:
1865; CHECK-ZVBB:       # %bb.0:
1866; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1867; CHECK-ZVBB-NEXT:    vctz.v v8, v8
1868; CHECK-ZVBB-NEXT:    ret
1869  %v = call <vscale x 4 x i64> @llvm.vp.cttz.nxv4i64(<vscale x 4 x i64> %va, i1 false, <vscale x 4 x i1> splat (i1 true), i32 %evl)
1870  ret <vscale x 4 x i64> %v
1871}
1872
1873declare <vscale x 7 x i64> @llvm.vp.cttz.nxv7i64(<vscale x 7 x i64>, i1 immarg, <vscale x 7 x i1>, i32)
1874
1875define <vscale x 7 x i64> @vp_cttz_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
1876; RV32-LABEL: vp_cttz_nxv7i64:
1877; RV32:       # %bb.0:
1878; RV32-NEXT:    li a1, 1
1879; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1880; RV32-NEXT:    vsub.vx v16, v8, a1, v0.t
1881; RV32-NEXT:    lui a1, 349525
1882; RV32-NEXT:    addi a1, a1, 1365
1883; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
1884; RV32-NEXT:    vmv.v.x v24, a1
1885; RV32-NEXT:    lui a1, 209715
1886; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1887; RV32-NEXT:    vnot.v v8, v8, v0.t
1888; RV32-NEXT:    addi a1, a1, 819
1889; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
1890; RV32-NEXT:    vsrl.vi v8, v16, 1, v0.t
1891; RV32-NEXT:    vand.vv v24, v8, v24, v0.t
1892; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
1893; RV32-NEXT:    vmv.v.x v8, a1
1894; RV32-NEXT:    lui a1, 61681
1895; RV32-NEXT:    addi a1, a1, -241
1896; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1897; RV32-NEXT:    vsub.vv v24, v16, v24, v0.t
1898; RV32-NEXT:    vand.vv v16, v24, v8, v0.t
1899; RV32-NEXT:    vsrl.vi v24, v24, 2, v0.t
1900; RV32-NEXT:    vand.vv v24, v24, v8, v0.t
1901; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
1902; RV32-NEXT:    vmv.v.x v8, a1
1903; RV32-NEXT:    lui a1, 4112
1904; RV32-NEXT:    addi a1, a1, 257
1905; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1906; RV32-NEXT:    vadd.vv v16, v16, v24, v0.t
1907; RV32-NEXT:    vsrl.vi v24, v16, 4, v0.t
1908; RV32-NEXT:    vadd.vv v16, v16, v24, v0.t
1909; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
1910; RV32-NEXT:    vmv.v.x v24, a1
1911; RV32-NEXT:    li a1, 56
1912; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1913; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
1914; RV32-NEXT:    vmul.vv v8, v8, v24, v0.t
1915; RV32-NEXT:    vsrl.vx v8, v8, a1, v0.t
1916; RV32-NEXT:    ret
1917;
1918; RV64-LABEL: vp_cttz_nxv7i64:
1919; RV64:       # %bb.0:
1920; RV64-NEXT:    li a1, 1
1921; RV64-NEXT:    lui a2, 349525
1922; RV64-NEXT:    lui a3, 209715
1923; RV64-NEXT:    lui a4, 61681
1924; RV64-NEXT:    lui a5, 4112
1925; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1926; RV64-NEXT:    vsub.vx v16, v8, a1, v0.t
1927; RV64-NEXT:    addiw a0, a2, 1365
1928; RV64-NEXT:    addiw a1, a3, 819
1929; RV64-NEXT:    addiw a2, a4, -241
1930; RV64-NEXT:    addiw a3, a5, 257
1931; RV64-NEXT:    slli a4, a0, 32
1932; RV64-NEXT:    add a0, a0, a4
1933; RV64-NEXT:    slli a4, a1, 32
1934; RV64-NEXT:    add a1, a1, a4
1935; RV64-NEXT:    slli a4, a2, 32
1936; RV64-NEXT:    add a2, a2, a4
1937; RV64-NEXT:    slli a4, a3, 32
1938; RV64-NEXT:    add a3, a3, a4
1939; RV64-NEXT:    vnot.v v8, v8, v0.t
1940; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
1941; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
1942; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
1943; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
1944; RV64-NEXT:    vand.vx v16, v8, a1, v0.t
1945; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
1946; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
1947; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
1948; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
1949; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
1950; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
1951; RV64-NEXT:    li a0, 56
1952; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
1953; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
1954; RV64-NEXT:    ret
1955;
1956; CHECK-ZVBB-LABEL: vp_cttz_nxv7i64:
1957; CHECK-ZVBB:       # %bb.0:
1958; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1959; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
1960; CHECK-ZVBB-NEXT:    ret
1961  %v = call <vscale x 7 x i64> @llvm.vp.cttz.nxv7i64(<vscale x 7 x i64> %va, i1 false, <vscale x 7 x i1> %m, i32 %evl)
1962  ret <vscale x 7 x i64> %v
1963}
1964
1965define <vscale x 7 x i64> @vp_cttz_nxv7i64_unmasked(<vscale x 7 x i64> %va, i32 zeroext %evl) {
1966; RV32-LABEL: vp_cttz_nxv7i64_unmasked:
1967; RV32:       # %bb.0:
1968; RV32-NEXT:    li a1, 1
1969; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1970; RV32-NEXT:    vnot.v v16, v8
1971; RV32-NEXT:    vsub.vx v8, v8, a1
1972; RV32-NEXT:    lui a1, 349525
1973; RV32-NEXT:    addi a1, a1, 1365
1974; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
1975; RV32-NEXT:    vmv.v.x v24, a1
1976; RV32-NEXT:    lui a1, 209715
1977; RV32-NEXT:    addi a1, a1, 819
1978; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1979; RV32-NEXT:    vand.vv v8, v16, v8
1980; RV32-NEXT:    vsrl.vi v16, v8, 1
1981; RV32-NEXT:    vand.vv v24, v16, v24
1982; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
1983; RV32-NEXT:    vmv.v.x v16, a1
1984; RV32-NEXT:    lui a1, 61681
1985; RV32-NEXT:    addi a1, a1, -241
1986; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1987; RV32-NEXT:    vsub.vv v8, v8, v24
1988; RV32-NEXT:    vand.vv v24, v8, v16
1989; RV32-NEXT:    vsrl.vi v8, v8, 2
1990; RV32-NEXT:    vand.vv v8, v8, v16
1991; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
1992; RV32-NEXT:    vmv.v.x v16, a1
1993; RV32-NEXT:    lui a1, 4112
1994; RV32-NEXT:    addi a1, a1, 257
1995; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1996; RV32-NEXT:    vadd.vv v8, v24, v8
1997; RV32-NEXT:    vsrl.vi v24, v8, 4
1998; RV32-NEXT:    vadd.vv v8, v8, v24
1999; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
2000; RV32-NEXT:    vmv.v.x v24, a1
2001; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2002; RV32-NEXT:    vand.vv v8, v8, v16
2003; RV32-NEXT:    vmul.vv v8, v8, v24
2004; RV32-NEXT:    li a0, 56
2005; RV32-NEXT:    vsrl.vx v8, v8, a0
2006; RV32-NEXT:    ret
2007;
2008; RV64-LABEL: vp_cttz_nxv7i64_unmasked:
2009; RV64:       # %bb.0:
2010; RV64-NEXT:    li a1, 1
2011; RV64-NEXT:    lui a2, 349525
2012; RV64-NEXT:    lui a3, 209715
2013; RV64-NEXT:    lui a4, 61681
2014; RV64-NEXT:    lui a5, 4112
2015; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2016; RV64-NEXT:    vsub.vx v16, v8, a1
2017; RV64-NEXT:    addiw a0, a2, 1365
2018; RV64-NEXT:    addiw a1, a3, 819
2019; RV64-NEXT:    addiw a2, a4, -241
2020; RV64-NEXT:    addiw a3, a5, 257
2021; RV64-NEXT:    slli a4, a0, 32
2022; RV64-NEXT:    add a0, a0, a4
2023; RV64-NEXT:    slli a4, a1, 32
2024; RV64-NEXT:    add a1, a1, a4
2025; RV64-NEXT:    slli a4, a2, 32
2026; RV64-NEXT:    add a2, a2, a4
2027; RV64-NEXT:    slli a4, a3, 32
2028; RV64-NEXT:    add a3, a3, a4
2029; RV64-NEXT:    vnot.v v8, v8
2030; RV64-NEXT:    vand.vv v8, v8, v16
2031; RV64-NEXT:    vsrl.vi v16, v8, 1
2032; RV64-NEXT:    vand.vx v16, v16, a0
2033; RV64-NEXT:    vsub.vv v8, v8, v16
2034; RV64-NEXT:    vand.vx v16, v8, a1
2035; RV64-NEXT:    vsrl.vi v8, v8, 2
2036; RV64-NEXT:    vand.vx v8, v8, a1
2037; RV64-NEXT:    vadd.vv v8, v16, v8
2038; RV64-NEXT:    vsrl.vi v16, v8, 4
2039; RV64-NEXT:    vadd.vv v8, v8, v16
2040; RV64-NEXT:    vand.vx v8, v8, a2
2041; RV64-NEXT:    vmul.vx v8, v8, a3
2042; RV64-NEXT:    li a0, 56
2043; RV64-NEXT:    vsrl.vx v8, v8, a0
2044; RV64-NEXT:    ret
2045;
2046; CHECK-ZVBB-LABEL: vp_cttz_nxv7i64_unmasked:
2047; CHECK-ZVBB:       # %bb.0:
2048; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2049; CHECK-ZVBB-NEXT:    vctz.v v8, v8
2050; CHECK-ZVBB-NEXT:    ret
2051  %v = call <vscale x 7 x i64> @llvm.vp.cttz.nxv7i64(<vscale x 7 x i64> %va, i1 false, <vscale x 7 x i1> splat (i1 true), i32 %evl)
2052  ret <vscale x 7 x i64> %v
2053}
2054
2055declare <vscale x 8 x i64> @llvm.vp.cttz.nxv8i64(<vscale x 8 x i64>, i1 immarg, <vscale x 8 x i1>, i32)
2056
2057define <vscale x 8 x i64> @vp_cttz_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2058; RV32-LABEL: vp_cttz_nxv8i64:
2059; RV32:       # %bb.0:
2060; RV32-NEXT:    li a1, 1
2061; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2062; RV32-NEXT:    vsub.vx v16, v8, a1, v0.t
2063; RV32-NEXT:    lui a1, 349525
2064; RV32-NEXT:    addi a1, a1, 1365
2065; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
2066; RV32-NEXT:    vmv.v.x v24, a1
2067; RV32-NEXT:    lui a1, 209715
2068; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2069; RV32-NEXT:    vnot.v v8, v8, v0.t
2070; RV32-NEXT:    addi a1, a1, 819
2071; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
2072; RV32-NEXT:    vsrl.vi v8, v16, 1, v0.t
2073; RV32-NEXT:    vand.vv v24, v8, v24, v0.t
2074; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
2075; RV32-NEXT:    vmv.v.x v8, a1
2076; RV32-NEXT:    lui a1, 61681
2077; RV32-NEXT:    addi a1, a1, -241
2078; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2079; RV32-NEXT:    vsub.vv v24, v16, v24, v0.t
2080; RV32-NEXT:    vand.vv v16, v24, v8, v0.t
2081; RV32-NEXT:    vsrl.vi v24, v24, 2, v0.t
2082; RV32-NEXT:    vand.vv v24, v24, v8, v0.t
2083; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
2084; RV32-NEXT:    vmv.v.x v8, a1
2085; RV32-NEXT:    lui a1, 4112
2086; RV32-NEXT:    addi a1, a1, 257
2087; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2088; RV32-NEXT:    vadd.vv v16, v16, v24, v0.t
2089; RV32-NEXT:    vsrl.vi v24, v16, 4, v0.t
2090; RV32-NEXT:    vadd.vv v16, v16, v24, v0.t
2091; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
2092; RV32-NEXT:    vmv.v.x v24, a1
2093; RV32-NEXT:    li a1, 56
2094; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2095; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
2096; RV32-NEXT:    vmul.vv v8, v8, v24, v0.t
2097; RV32-NEXT:    vsrl.vx v8, v8, a1, v0.t
2098; RV32-NEXT:    ret
2099;
2100; RV64-LABEL: vp_cttz_nxv8i64:
2101; RV64:       # %bb.0:
2102; RV64-NEXT:    li a1, 1
2103; RV64-NEXT:    lui a2, 349525
2104; RV64-NEXT:    lui a3, 209715
2105; RV64-NEXT:    lui a4, 61681
2106; RV64-NEXT:    lui a5, 4112
2107; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2108; RV64-NEXT:    vsub.vx v16, v8, a1, v0.t
2109; RV64-NEXT:    addiw a0, a2, 1365
2110; RV64-NEXT:    addiw a1, a3, 819
2111; RV64-NEXT:    addiw a2, a4, -241
2112; RV64-NEXT:    addiw a3, a5, 257
2113; RV64-NEXT:    slli a4, a0, 32
2114; RV64-NEXT:    add a0, a0, a4
2115; RV64-NEXT:    slli a4, a1, 32
2116; RV64-NEXT:    add a1, a1, a4
2117; RV64-NEXT:    slli a4, a2, 32
2118; RV64-NEXT:    add a2, a2, a4
2119; RV64-NEXT:    slli a4, a3, 32
2120; RV64-NEXT:    add a3, a3, a4
2121; RV64-NEXT:    vnot.v v8, v8, v0.t
2122; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
2123; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
2124; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
2125; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
2126; RV64-NEXT:    vand.vx v16, v8, a1, v0.t
2127; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
2128; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
2129; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
2130; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
2131; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
2132; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
2133; RV64-NEXT:    li a0, 56
2134; RV64-NEXT:    vmul.vx v8, v8, a3, v0.t
2135; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
2136; RV64-NEXT:    ret
2137;
2138; CHECK-ZVBB-LABEL: vp_cttz_nxv8i64:
2139; CHECK-ZVBB:       # %bb.0:
2140; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2141; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
2142; CHECK-ZVBB-NEXT:    ret
2143  %v = call <vscale x 8 x i64> @llvm.vp.cttz.nxv8i64(<vscale x 8 x i64> %va, i1 false, <vscale x 8 x i1> %m, i32 %evl)
2144  ret <vscale x 8 x i64> %v
2145}
2146
2147define <vscale x 8 x i64> @vp_cttz_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) {
2148; RV32-LABEL: vp_cttz_nxv8i64_unmasked:
2149; RV32:       # %bb.0:
2150; RV32-NEXT:    li a1, 1
2151; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2152; RV32-NEXT:    vnot.v v16, v8
2153; RV32-NEXT:    vsub.vx v8, v8, a1
2154; RV32-NEXT:    lui a1, 349525
2155; RV32-NEXT:    addi a1, a1, 1365
2156; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
2157; RV32-NEXT:    vmv.v.x v24, a1
2158; RV32-NEXT:    lui a1, 209715
2159; RV32-NEXT:    addi a1, a1, 819
2160; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2161; RV32-NEXT:    vand.vv v8, v16, v8
2162; RV32-NEXT:    vsrl.vi v16, v8, 1
2163; RV32-NEXT:    vand.vv v24, v16, v24
2164; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
2165; RV32-NEXT:    vmv.v.x v16, a1
2166; RV32-NEXT:    lui a1, 61681
2167; RV32-NEXT:    addi a1, a1, -241
2168; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2169; RV32-NEXT:    vsub.vv v8, v8, v24
2170; RV32-NEXT:    vand.vv v24, v8, v16
2171; RV32-NEXT:    vsrl.vi v8, v8, 2
2172; RV32-NEXT:    vand.vv v8, v8, v16
2173; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
2174; RV32-NEXT:    vmv.v.x v16, a1
2175; RV32-NEXT:    lui a1, 4112
2176; RV32-NEXT:    addi a1, a1, 257
2177; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2178; RV32-NEXT:    vadd.vv v8, v24, v8
2179; RV32-NEXT:    vsrl.vi v24, v8, 4
2180; RV32-NEXT:    vadd.vv v8, v8, v24
2181; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
2182; RV32-NEXT:    vmv.v.x v24, a1
2183; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2184; RV32-NEXT:    vand.vv v8, v8, v16
2185; RV32-NEXT:    vmul.vv v8, v8, v24
2186; RV32-NEXT:    li a0, 56
2187; RV32-NEXT:    vsrl.vx v8, v8, a0
2188; RV32-NEXT:    ret
2189;
2190; RV64-LABEL: vp_cttz_nxv8i64_unmasked:
2191; RV64:       # %bb.0:
2192; RV64-NEXT:    li a1, 1
2193; RV64-NEXT:    lui a2, 349525
2194; RV64-NEXT:    lui a3, 209715
2195; RV64-NEXT:    lui a4, 61681
2196; RV64-NEXT:    lui a5, 4112
2197; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2198; RV64-NEXT:    vsub.vx v16, v8, a1
2199; RV64-NEXT:    addiw a0, a2, 1365
2200; RV64-NEXT:    addiw a1, a3, 819
2201; RV64-NEXT:    addiw a2, a4, -241
2202; RV64-NEXT:    addiw a3, a5, 257
2203; RV64-NEXT:    slli a4, a0, 32
2204; RV64-NEXT:    add a0, a0, a4
2205; RV64-NEXT:    slli a4, a1, 32
2206; RV64-NEXT:    add a1, a1, a4
2207; RV64-NEXT:    slli a4, a2, 32
2208; RV64-NEXT:    add a2, a2, a4
2209; RV64-NEXT:    slli a4, a3, 32
2210; RV64-NEXT:    add a3, a3, a4
2211; RV64-NEXT:    vnot.v v8, v8
2212; RV64-NEXT:    vand.vv v8, v8, v16
2213; RV64-NEXT:    vsrl.vi v16, v8, 1
2214; RV64-NEXT:    vand.vx v16, v16, a0
2215; RV64-NEXT:    vsub.vv v8, v8, v16
2216; RV64-NEXT:    vand.vx v16, v8, a1
2217; RV64-NEXT:    vsrl.vi v8, v8, 2
2218; RV64-NEXT:    vand.vx v8, v8, a1
2219; RV64-NEXT:    vadd.vv v8, v16, v8
2220; RV64-NEXT:    vsrl.vi v16, v8, 4
2221; RV64-NEXT:    vadd.vv v8, v8, v16
2222; RV64-NEXT:    vand.vx v8, v8, a2
2223; RV64-NEXT:    vmul.vx v8, v8, a3
2224; RV64-NEXT:    li a0, 56
2225; RV64-NEXT:    vsrl.vx v8, v8, a0
2226; RV64-NEXT:    ret
2227;
2228; CHECK-ZVBB-LABEL: vp_cttz_nxv8i64_unmasked:
2229; CHECK-ZVBB:       # %bb.0:
2230; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2231; CHECK-ZVBB-NEXT:    vctz.v v8, v8
2232; CHECK-ZVBB-NEXT:    ret
2233  %v = call <vscale x 8 x i64> @llvm.vp.cttz.nxv8i64(<vscale x 8 x i64> %va, i1 false, <vscale x 8 x i1> splat (i1 true), i32 %evl)
2234  ret <vscale x 8 x i64> %v
2235}
2236
2237declare <vscale x 16 x i64> @llvm.vp.cttz.nxv16i64(<vscale x 16 x i64>, i1 immarg, <vscale x 16 x i1>, i32)
2238
2239define <vscale x 16 x i64> @vp_cttz_nxv16i64(<vscale x 16 x i64> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
2240; RV32-LABEL: vp_cttz_nxv16i64:
2241; RV32:       # %bb.0:
2242; RV32-NEXT:    addi sp, sp, -16
2243; RV32-NEXT:    .cfi_def_cfa_offset 16
2244; RV32-NEXT:    csrr a1, vlenb
2245; RV32-NEXT:    li a2, 56
2246; RV32-NEXT:    mul a1, a1, a2
2247; RV32-NEXT:    sub sp, sp, a1
2248; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb
2249; RV32-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
2250; RV32-NEXT:    vmv1r.v v24, v0
2251; RV32-NEXT:    csrr a1, vlenb
2252; RV32-NEXT:    slli a1, a1, 5
2253; RV32-NEXT:    add a1, sp, a1
2254; RV32-NEXT:    addi a1, a1, 16
2255; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
2256; RV32-NEXT:    csrr a1, vlenb
2257; RV32-NEXT:    li a2, 1
2258; RV32-NEXT:    srli a3, a1, 3
2259; RV32-NEXT:    sub a4, a0, a1
2260; RV32-NEXT:    vslidedown.vx v0, v0, a3
2261; RV32-NEXT:    sltu a3, a0, a4
2262; RV32-NEXT:    addi a3, a3, -1
2263; RV32-NEXT:    and a3, a3, a4
2264; RV32-NEXT:    lui a4, 349525
2265; RV32-NEXT:    addi a4, a4, 1365
2266; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
2267; RV32-NEXT:    vsub.vx v8, v16, a2, v0.t
2268; RV32-NEXT:    vnot.v v16, v16, v0.t
2269; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
2270; RV32-NEXT:    csrr a5, vlenb
2271; RV32-NEXT:    li a6, 48
2272; RV32-NEXT:    mul a5, a5, a6
2273; RV32-NEXT:    add a5, sp, a5
2274; RV32-NEXT:    addi a5, a5, 16
2275; RV32-NEXT:    vs8r.v v8, (a5) # Unknown-size Folded Spill
2276; RV32-NEXT:    vsetvli a5, zero, e32, m8, ta, ma
2277; RV32-NEXT:    vmv.v.x v16, a4
2278; RV32-NEXT:    csrr a4, vlenb
2279; RV32-NEXT:    li a5, 40
2280; RV32-NEXT:    mul a4, a4, a5
2281; RV32-NEXT:    add a4, sp, a4
2282; RV32-NEXT:    addi a4, a4, 16
2283; RV32-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
2284; RV32-NEXT:    csrr a4, vlenb
2285; RV32-NEXT:    li a5, 48
2286; RV32-NEXT:    mul a4, a4, a5
2287; RV32-NEXT:    add a4, sp, a4
2288; RV32-NEXT:    addi a4, a4, 16
2289; RV32-NEXT:    vl8r.v v8, (a4) # Unknown-size Folded Reload
2290; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
2291; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
2292; RV32-NEXT:    csrr a4, vlenb
2293; RV32-NEXT:    li a5, 24
2294; RV32-NEXT:    mul a4, a4, a5
2295; RV32-NEXT:    add a4, sp, a4
2296; RV32-NEXT:    addi a4, a4, 16
2297; RV32-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
2298; RV32-NEXT:    csrr a4, vlenb
2299; RV32-NEXT:    li a5, 40
2300; RV32-NEXT:    mul a4, a4, a5
2301; RV32-NEXT:    add a4, sp, a4
2302; RV32-NEXT:    addi a4, a4, 16
2303; RV32-NEXT:    vl8r.v v16, (a4) # Unknown-size Folded Reload
2304; RV32-NEXT:    csrr a4, vlenb
2305; RV32-NEXT:    li a5, 24
2306; RV32-NEXT:    mul a4, a4, a5
2307; RV32-NEXT:    add a4, sp, a4
2308; RV32-NEXT:    addi a4, a4, 16
2309; RV32-NEXT:    vl8r.v v8, (a4) # Unknown-size Folded Reload
2310; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
2311; RV32-NEXT:    csrr a4, vlenb
2312; RV32-NEXT:    li a5, 48
2313; RV32-NEXT:    mul a4, a4, a5
2314; RV32-NEXT:    add a4, sp, a4
2315; RV32-NEXT:    addi a4, a4, 16
2316; RV32-NEXT:    vl8r.v v8, (a4) # Unknown-size Folded Reload
2317; RV32-NEXT:    vsub.vv v16, v8, v16, v0.t
2318; RV32-NEXT:    lui a4, 209715
2319; RV32-NEXT:    addi a4, a4, 819
2320; RV32-NEXT:    vsetvli a5, zero, e32, m8, ta, ma
2321; RV32-NEXT:    vmv.v.x v8, a4
2322; RV32-NEXT:    csrr a4, vlenb
2323; RV32-NEXT:    li a5, 48
2324; RV32-NEXT:    mul a4, a4, a5
2325; RV32-NEXT:    add a4, sp, a4
2326; RV32-NEXT:    addi a4, a4, 16
2327; RV32-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
2328; RV32-NEXT:    csrr a4, vlenb
2329; RV32-NEXT:    li a5, 48
2330; RV32-NEXT:    mul a4, a4, a5
2331; RV32-NEXT:    add a4, sp, a4
2332; RV32-NEXT:    addi a4, a4, 16
2333; RV32-NEXT:    vl8r.v v8, (a4) # Unknown-size Folded Reload
2334; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
2335; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
2336; RV32-NEXT:    csrr a4, vlenb
2337; RV32-NEXT:    li a5, 24
2338; RV32-NEXT:    mul a4, a4, a5
2339; RV32-NEXT:    add a4, sp, a4
2340; RV32-NEXT:    addi a4, a4, 16
2341; RV32-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
2342; RV32-NEXT:    vsrl.vi v16, v16, 2, v0.t
2343; RV32-NEXT:    csrr a4, vlenb
2344; RV32-NEXT:    li a5, 48
2345; RV32-NEXT:    mul a4, a4, a5
2346; RV32-NEXT:    add a4, sp, a4
2347; RV32-NEXT:    addi a4, a4, 16
2348; RV32-NEXT:    vl8r.v v8, (a4) # Unknown-size Folded Reload
2349; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
2350; RV32-NEXT:    csrr a4, vlenb
2351; RV32-NEXT:    li a5, 24
2352; RV32-NEXT:    mul a4, a4, a5
2353; RV32-NEXT:    add a4, sp, a4
2354; RV32-NEXT:    addi a4, a4, 16
2355; RV32-NEXT:    vl8r.v v8, (a4) # Unknown-size Folded Reload
2356; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
2357; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
2358; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
2359; RV32-NEXT:    lui a4, 61681
2360; RV32-NEXT:    addi a4, a4, -241
2361; RV32-NEXT:    vsetvli a5, zero, e32, m8, ta, ma
2362; RV32-NEXT:    vmv.v.x v16, a4
2363; RV32-NEXT:    csrr a4, vlenb
2364; RV32-NEXT:    li a5, 24
2365; RV32-NEXT:    mul a4, a4, a5
2366; RV32-NEXT:    add a4, sp, a4
2367; RV32-NEXT:    addi a4, a4, 16
2368; RV32-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
2369; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
2370; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
2371; RV32-NEXT:    lui a4, 4112
2372; RV32-NEXT:    addi a4, a4, 257
2373; RV32-NEXT:    vsetvli a5, zero, e32, m8, ta, ma
2374; RV32-NEXT:    vmv.v.x v16, a4
2375; RV32-NEXT:    csrr a4, vlenb
2376; RV32-NEXT:    slli a4, a4, 4
2377; RV32-NEXT:    add a4, sp, a4
2378; RV32-NEXT:    addi a4, a4, 16
2379; RV32-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
2380; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
2381; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
2382; RV32-NEXT:    li a3, 56
2383; RV32-NEXT:    vsrl.vx v8, v8, a3, v0.t
2384; RV32-NEXT:    csrr a4, vlenb
2385; RV32-NEXT:    slli a4, a4, 3
2386; RV32-NEXT:    add a4, sp, a4
2387; RV32-NEXT:    addi a4, a4, 16
2388; RV32-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
2389; RV32-NEXT:    bltu a0, a1, .LBB46_2
2390; RV32-NEXT:  # %bb.1:
2391; RV32-NEXT:    mv a0, a1
2392; RV32-NEXT:  .LBB46_2:
2393; RV32-NEXT:    vmv1r.v v0, v24
2394; RV32-NEXT:    slli a1, a1, 5
2395; RV32-NEXT:    add a1, sp, a1
2396; RV32-NEXT:    addi a1, a1, 16
2397; RV32-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
2398; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2399; RV32-NEXT:    vsub.vx v16, v8, a2, v0.t
2400; RV32-NEXT:    vnot.v v8, v8, v0.t
2401; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
2402; RV32-NEXT:    addi a0, sp, 16
2403; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
2404; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
2405; RV32-NEXT:    csrr a0, vlenb
2406; RV32-NEXT:    slli a0, a0, 5
2407; RV32-NEXT:    add a0, sp, a0
2408; RV32-NEXT:    addi a0, a0, 16
2409; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
2410; RV32-NEXT:    csrr a0, vlenb
2411; RV32-NEXT:    li a1, 40
2412; RV32-NEXT:    mul a0, a0, a1
2413; RV32-NEXT:    add a0, sp, a0
2414; RV32-NEXT:    addi a0, a0, 16
2415; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
2416; RV32-NEXT:    csrr a0, vlenb
2417; RV32-NEXT:    slli a0, a0, 5
2418; RV32-NEXT:    add a0, sp, a0
2419; RV32-NEXT:    addi a0, a0, 16
2420; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
2421; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
2422; RV32-NEXT:    addi a0, sp, 16
2423; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
2424; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
2425; RV32-NEXT:    csrr a0, vlenb
2426; RV32-NEXT:    li a1, 40
2427; RV32-NEXT:    mul a0, a0, a1
2428; RV32-NEXT:    add a0, sp, a0
2429; RV32-NEXT:    addi a0, a0, 16
2430; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
2431; RV32-NEXT:    csrr a0, vlenb
2432; RV32-NEXT:    li a1, 48
2433; RV32-NEXT:    mul a0, a0, a1
2434; RV32-NEXT:    add a0, sp, a0
2435; RV32-NEXT:    addi a0, a0, 16
2436; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
2437; RV32-NEXT:    csrr a0, vlenb
2438; RV32-NEXT:    li a1, 40
2439; RV32-NEXT:    mul a0, a0, a1
2440; RV32-NEXT:    add a0, sp, a0
2441; RV32-NEXT:    addi a0, a0, 16
2442; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
2443; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
2444; RV32-NEXT:    csrr a0, vlenb
2445; RV32-NEXT:    slli a0, a0, 5
2446; RV32-NEXT:    add a0, sp, a0
2447; RV32-NEXT:    addi a0, a0, 16
2448; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
2449; RV32-NEXT:    vmv8r.v v16, v8
2450; RV32-NEXT:    csrr a0, vlenb
2451; RV32-NEXT:    li a1, 40
2452; RV32-NEXT:    mul a0, a0, a1
2453; RV32-NEXT:    add a0, sp, a0
2454; RV32-NEXT:    addi a0, a0, 16
2455; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
2456; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
2457; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
2458; RV32-NEXT:    csrr a0, vlenb
2459; RV32-NEXT:    slli a0, a0, 5
2460; RV32-NEXT:    add a0, sp, a0
2461; RV32-NEXT:    addi a0, a0, 16
2462; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
2463; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
2464; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
2465; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
2466; RV32-NEXT:    csrr a0, vlenb
2467; RV32-NEXT:    li a1, 24
2468; RV32-NEXT:    mul a0, a0, a1
2469; RV32-NEXT:    add a0, sp, a0
2470; RV32-NEXT:    addi a0, a0, 16
2471; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
2472; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
2473; RV32-NEXT:    csrr a0, vlenb
2474; RV32-NEXT:    slli a0, a0, 4
2475; RV32-NEXT:    add a0, sp, a0
2476; RV32-NEXT:    addi a0, a0, 16
2477; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
2478; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
2479; RV32-NEXT:    vsrl.vx v8, v8, a3, v0.t
2480; RV32-NEXT:    csrr a0, vlenb
2481; RV32-NEXT:    slli a0, a0, 3
2482; RV32-NEXT:    add a0, sp, a0
2483; RV32-NEXT:    addi a0, a0, 16
2484; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
2485; RV32-NEXT:    csrr a0, vlenb
2486; RV32-NEXT:    li a1, 56
2487; RV32-NEXT:    mul a0, a0, a1
2488; RV32-NEXT:    add sp, sp, a0
2489; RV32-NEXT:    .cfi_def_cfa sp, 16
2490; RV32-NEXT:    addi sp, sp, 16
2491; RV32-NEXT:    .cfi_def_cfa_offset 0
2492; RV32-NEXT:    ret
2493;
2494; RV64-LABEL: vp_cttz_nxv16i64:
2495; RV64:       # %bb.0:
2496; RV64-NEXT:    addi sp, sp, -16
2497; RV64-NEXT:    .cfi_def_cfa_offset 16
2498; RV64-NEXT:    csrr a1, vlenb
2499; RV64-NEXT:    slli a1, a1, 4
2500; RV64-NEXT:    sub sp, sp, a1
2501; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
2502; RV64-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
2503; RV64-NEXT:    vmv1r.v v24, v0
2504; RV64-NEXT:    csrr a1, vlenb
2505; RV64-NEXT:    slli a1, a1, 3
2506; RV64-NEXT:    add a1, sp, a1
2507; RV64-NEXT:    addi a1, a1, 16
2508; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
2509; RV64-NEXT:    csrr a1, vlenb
2510; RV64-NEXT:    li a2, 1
2511; RV64-NEXT:    lui a3, 349525
2512; RV64-NEXT:    lui a4, 209715
2513; RV64-NEXT:    lui a5, 61681
2514; RV64-NEXT:    lui a6, 4112
2515; RV64-NEXT:    srli a7, a1, 3
2516; RV64-NEXT:    sub t0, a0, a1
2517; RV64-NEXT:    addiw a3, a3, 1365
2518; RV64-NEXT:    addiw a4, a4, 819
2519; RV64-NEXT:    addiw a5, a5, -241
2520; RV64-NEXT:    addiw t1, a6, 257
2521; RV64-NEXT:    vslidedown.vx v0, v0, a7
2522; RV64-NEXT:    slli a7, a3, 32
2523; RV64-NEXT:    add a7, a3, a7
2524; RV64-NEXT:    slli a6, a4, 32
2525; RV64-NEXT:    add a6, a4, a6
2526; RV64-NEXT:    slli a3, a5, 32
2527; RV64-NEXT:    add a3, a5, a3
2528; RV64-NEXT:    slli a4, t1, 32
2529; RV64-NEXT:    add a4, t1, a4
2530; RV64-NEXT:    sltu a5, a0, t0
2531; RV64-NEXT:    addi a5, a5, -1
2532; RV64-NEXT:    and t0, a5, t0
2533; RV64-NEXT:    li a5, 56
2534; RV64-NEXT:    vsetvli zero, t0, e64, m8, ta, ma
2535; RV64-NEXT:    vsub.vx v8, v16, a2, v0.t
2536; RV64-NEXT:    vnot.v v16, v16, v0.t
2537; RV64-NEXT:    vand.vv v8, v16, v8, v0.t
2538; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
2539; RV64-NEXT:    vand.vx v16, v16, a7, v0.t
2540; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
2541; RV64-NEXT:    vand.vx v16, v8, a6, v0.t
2542; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
2543; RV64-NEXT:    vand.vx v8, v8, a6, v0.t
2544; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
2545; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
2546; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
2547; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
2548; RV64-NEXT:    vmul.vx v8, v8, a4, v0.t
2549; RV64-NEXT:    vsrl.vx v8, v8, a5, v0.t
2550; RV64-NEXT:    addi t0, sp, 16
2551; RV64-NEXT:    vs8r.v v8, (t0) # Unknown-size Folded Spill
2552; RV64-NEXT:    bltu a0, a1, .LBB46_2
2553; RV64-NEXT:  # %bb.1:
2554; RV64-NEXT:    mv a0, a1
2555; RV64-NEXT:  .LBB46_2:
2556; RV64-NEXT:    vmv1r.v v0, v24
2557; RV64-NEXT:    slli a1, a1, 3
2558; RV64-NEXT:    add a1, sp, a1
2559; RV64-NEXT:    addi a1, a1, 16
2560; RV64-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
2561; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2562; RV64-NEXT:    vsub.vx v16, v8, a2, v0.t
2563; RV64-NEXT:    vnot.v v8, v8, v0.t
2564; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
2565; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
2566; RV64-NEXT:    vand.vx v16, v16, a7, v0.t
2567; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
2568; RV64-NEXT:    vand.vx v16, v8, a6, v0.t
2569; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
2570; RV64-NEXT:    vand.vx v8, v8, a6, v0.t
2571; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
2572; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
2573; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
2574; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
2575; RV64-NEXT:    vmul.vx v8, v8, a4, v0.t
2576; RV64-NEXT:    vsrl.vx v8, v8, a5, v0.t
2577; RV64-NEXT:    addi a0, sp, 16
2578; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
2579; RV64-NEXT:    csrr a0, vlenb
2580; RV64-NEXT:    slli a0, a0, 4
2581; RV64-NEXT:    add sp, sp, a0
2582; RV64-NEXT:    .cfi_def_cfa sp, 16
2583; RV64-NEXT:    addi sp, sp, 16
2584; RV64-NEXT:    .cfi_def_cfa_offset 0
2585; RV64-NEXT:    ret
2586;
2587; CHECK-ZVBB-LABEL: vp_cttz_nxv16i64:
2588; CHECK-ZVBB:       # %bb.0:
2589; CHECK-ZVBB-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
2590; CHECK-ZVBB-NEXT:    vmv1r.v v24, v0
2591; CHECK-ZVBB-NEXT:    csrr a1, vlenb
2592; CHECK-ZVBB-NEXT:    srli a2, a1, 3
2593; CHECK-ZVBB-NEXT:    sub a3, a0, a1
2594; CHECK-ZVBB-NEXT:    vslidedown.vx v0, v0, a2
2595; CHECK-ZVBB-NEXT:    sltu a2, a0, a3
2596; CHECK-ZVBB-NEXT:    addi a2, a2, -1
2597; CHECK-ZVBB-NEXT:    and a2, a2, a3
2598; CHECK-ZVBB-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2599; CHECK-ZVBB-NEXT:    vctz.v v16, v16, v0.t
2600; CHECK-ZVBB-NEXT:    bltu a0, a1, .LBB46_2
2601; CHECK-ZVBB-NEXT:  # %bb.1:
2602; CHECK-ZVBB-NEXT:    mv a0, a1
2603; CHECK-ZVBB-NEXT:  .LBB46_2:
2604; CHECK-ZVBB-NEXT:    vmv1r.v v0, v24
2605; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2606; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
2607; CHECK-ZVBB-NEXT:    ret
2608  %v = call <vscale x 16 x i64> @llvm.vp.cttz.nxv16i64(<vscale x 16 x i64> %va, i1 false, <vscale x 16 x i1> %m, i32 %evl)
2609  ret <vscale x 16 x i64> %v
2610}
2611
2612define <vscale x 16 x i64> @vp_cttz_nxv16i64_unmasked(<vscale x 16 x i64> %va, i32 zeroext %evl) {
2613; RV32-LABEL: vp_cttz_nxv16i64_unmasked:
2614; RV32:       # %bb.0:
2615; RV32-NEXT:    addi sp, sp, -16
2616; RV32-NEXT:    .cfi_def_cfa_offset 16
2617; RV32-NEXT:    csrr a1, vlenb
2618; RV32-NEXT:    slli a1, a1, 5
2619; RV32-NEXT:    sub sp, sp, a1
2620; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
2621; RV32-NEXT:    csrr a1, vlenb
2622; RV32-NEXT:    li a2, 1
2623; RV32-NEXT:    lui a3, 349525
2624; RV32-NEXT:    lui a4, 209715
2625; RV32-NEXT:    sub a5, a0, a1
2626; RV32-NEXT:    addi a3, a3, 1365
2627; RV32-NEXT:    addi a4, a4, 819
2628; RV32-NEXT:    vsetvli a6, zero, e32, m8, ta, ma
2629; RV32-NEXT:    vmv.v.x v0, a3
2630; RV32-NEXT:    sltu a3, a0, a5
2631; RV32-NEXT:    addi a3, a3, -1
2632; RV32-NEXT:    and a3, a3, a5
2633; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
2634; RV32-NEXT:    vsub.vx v24, v16, a2
2635; RV32-NEXT:    vnot.v v16, v16
2636; RV32-NEXT:    vand.vv v16, v16, v24
2637; RV32-NEXT:    vsrl.vi v24, v16, 1
2638; RV32-NEXT:    csrr a5, vlenb
2639; RV32-NEXT:    li a6, 24
2640; RV32-NEXT:    mul a5, a5, a6
2641; RV32-NEXT:    add a5, sp, a5
2642; RV32-NEXT:    addi a5, a5, 16
2643; RV32-NEXT:    vs8r.v v0, (a5) # Unknown-size Folded Spill
2644; RV32-NEXT:    vand.vv v24, v24, v0
2645; RV32-NEXT:    vsub.vv v16, v16, v24
2646; RV32-NEXT:    vsetvli a5, zero, e32, m8, ta, ma
2647; RV32-NEXT:    vmv.v.x v0, a4
2648; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
2649; RV32-NEXT:    vand.vv v24, v16, v0
2650; RV32-NEXT:    vsrl.vi v16, v16, 2
2651; RV32-NEXT:    csrr a4, vlenb
2652; RV32-NEXT:    slli a4, a4, 4
2653; RV32-NEXT:    add a4, sp, a4
2654; RV32-NEXT:    addi a4, a4, 16
2655; RV32-NEXT:    vs8r.v v0, (a4) # Unknown-size Folded Spill
2656; RV32-NEXT:    vand.vv v16, v16, v0
2657; RV32-NEXT:    vadd.vv v16, v24, v16
2658; RV32-NEXT:    vsrl.vi v24, v16, 4
2659; RV32-NEXT:    vadd.vv v16, v16, v24
2660; RV32-NEXT:    lui a4, 61681
2661; RV32-NEXT:    lui a5, 4112
2662; RV32-NEXT:    addi a4, a4, -241
2663; RV32-NEXT:    addi a5, a5, 257
2664; RV32-NEXT:    vsetvli a6, zero, e32, m8, ta, ma
2665; RV32-NEXT:    vmv.v.x v24, a4
2666; RV32-NEXT:    csrr a4, vlenb
2667; RV32-NEXT:    slli a4, a4, 3
2668; RV32-NEXT:    add a4, sp, a4
2669; RV32-NEXT:    addi a4, a4, 16
2670; RV32-NEXT:    vs8r.v v24, (a4) # Unknown-size Folded Spill
2671; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
2672; RV32-NEXT:    vand.vv v16, v16, v24
2673; RV32-NEXT:    vsetvli a4, zero, e32, m8, ta, ma
2674; RV32-NEXT:    vmv.v.x v24, a5
2675; RV32-NEXT:    addi a4, sp, 16
2676; RV32-NEXT:    vs8r.v v24, (a4) # Unknown-size Folded Spill
2677; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
2678; RV32-NEXT:    vmul.vv v16, v16, v24
2679; RV32-NEXT:    li a3, 56
2680; RV32-NEXT:    vsrl.vx v16, v16, a3
2681; RV32-NEXT:    bltu a0, a1, .LBB47_2
2682; RV32-NEXT:  # %bb.1:
2683; RV32-NEXT:    mv a0, a1
2684; RV32-NEXT:  .LBB47_2:
2685; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2686; RV32-NEXT:    vsub.vx v24, v8, a2
2687; RV32-NEXT:    vnot.v v8, v8
2688; RV32-NEXT:    vand.vv v8, v8, v24
2689; RV32-NEXT:    vsrl.vi v24, v8, 1
2690; RV32-NEXT:    csrr a0, vlenb
2691; RV32-NEXT:    li a1, 24
2692; RV32-NEXT:    mul a0, a0, a1
2693; RV32-NEXT:    add a0, sp, a0
2694; RV32-NEXT:    addi a0, a0, 16
2695; RV32-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
2696; RV32-NEXT:    vand.vv v24, v24, v0
2697; RV32-NEXT:    vsub.vv v8, v8, v24
2698; RV32-NEXT:    csrr a0, vlenb
2699; RV32-NEXT:    slli a0, a0, 4
2700; RV32-NEXT:    add a0, sp, a0
2701; RV32-NEXT:    addi a0, a0, 16
2702; RV32-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
2703; RV32-NEXT:    vand.vv v24, v8, v0
2704; RV32-NEXT:    vsrl.vi v8, v8, 2
2705; RV32-NEXT:    vand.vv v8, v8, v0
2706; RV32-NEXT:    vadd.vv v8, v24, v8
2707; RV32-NEXT:    vsrl.vi v24, v8, 4
2708; RV32-NEXT:    vadd.vv v8, v8, v24
2709; RV32-NEXT:    csrr a0, vlenb
2710; RV32-NEXT:    slli a0, a0, 3
2711; RV32-NEXT:    add a0, sp, a0
2712; RV32-NEXT:    addi a0, a0, 16
2713; RV32-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
2714; RV32-NEXT:    vand.vv v8, v8, v24
2715; RV32-NEXT:    addi a0, sp, 16
2716; RV32-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
2717; RV32-NEXT:    vmul.vv v8, v8, v24
2718; RV32-NEXT:    vsrl.vx v8, v8, a3
2719; RV32-NEXT:    csrr a0, vlenb
2720; RV32-NEXT:    slli a0, a0, 5
2721; RV32-NEXT:    add sp, sp, a0
2722; RV32-NEXT:    .cfi_def_cfa sp, 16
2723; RV32-NEXT:    addi sp, sp, 16
2724; RV32-NEXT:    .cfi_def_cfa_offset 0
2725; RV32-NEXT:    ret
2726;
2727; RV64-LABEL: vp_cttz_nxv16i64_unmasked:
2728; RV64:       # %bb.0:
2729; RV64-NEXT:    csrr a1, vlenb
2730; RV64-NEXT:    li a2, 1
2731; RV64-NEXT:    lui a3, 349525
2732; RV64-NEXT:    lui a4, 209715
2733; RV64-NEXT:    lui a5, 61681
2734; RV64-NEXT:    lui a6, 4112
2735; RV64-NEXT:    sub a7, a0, a1
2736; RV64-NEXT:    addiw a3, a3, 1365
2737; RV64-NEXT:    addiw a4, a4, 819
2738; RV64-NEXT:    addiw t0, a5, -241
2739; RV64-NEXT:    addiw t1, a6, 257
2740; RV64-NEXT:    slli a6, a3, 32
2741; RV64-NEXT:    add a6, a3, a6
2742; RV64-NEXT:    slli a5, a4, 32
2743; RV64-NEXT:    add a5, a4, a5
2744; RV64-NEXT:    slli a3, t0, 32
2745; RV64-NEXT:    add a3, t0, a3
2746; RV64-NEXT:    slli a4, t1, 32
2747; RV64-NEXT:    add a4, t1, a4
2748; RV64-NEXT:    sltu t0, a0, a7
2749; RV64-NEXT:    addi t0, t0, -1
2750; RV64-NEXT:    and a7, t0, a7
2751; RV64-NEXT:    vsetvli zero, a7, e64, m8, ta, ma
2752; RV64-NEXT:    vsub.vx v24, v16, a2
2753; RV64-NEXT:    vnot.v v16, v16
2754; RV64-NEXT:    vand.vv v16, v16, v24
2755; RV64-NEXT:    vsrl.vi v24, v16, 1
2756; RV64-NEXT:    vand.vx v24, v24, a6
2757; RV64-NEXT:    vsub.vv v16, v16, v24
2758; RV64-NEXT:    vand.vx v24, v16, a5
2759; RV64-NEXT:    vsrl.vi v16, v16, 2
2760; RV64-NEXT:    vand.vx v16, v16, a5
2761; RV64-NEXT:    vadd.vv v16, v24, v16
2762; RV64-NEXT:    vsrl.vi v24, v16, 4
2763; RV64-NEXT:    vadd.vv v16, v16, v24
2764; RV64-NEXT:    vand.vx v16, v16, a3
2765; RV64-NEXT:    vmul.vx v16, v16, a4
2766; RV64-NEXT:    li a7, 56
2767; RV64-NEXT:    vsrl.vx v16, v16, a7
2768; RV64-NEXT:    bltu a0, a1, .LBB47_2
2769; RV64-NEXT:  # %bb.1:
2770; RV64-NEXT:    mv a0, a1
2771; RV64-NEXT:  .LBB47_2:
2772; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2773; RV64-NEXT:    vsub.vx v24, v8, a2
2774; RV64-NEXT:    vnot.v v8, v8
2775; RV64-NEXT:    vand.vv v8, v8, v24
2776; RV64-NEXT:    vsrl.vi v24, v8, 1
2777; RV64-NEXT:    vand.vx v24, v24, a6
2778; RV64-NEXT:    vsub.vv v8, v8, v24
2779; RV64-NEXT:    vand.vx v24, v8, a5
2780; RV64-NEXT:    vsrl.vi v8, v8, 2
2781; RV64-NEXT:    vand.vx v8, v8, a5
2782; RV64-NEXT:    vadd.vv v8, v24, v8
2783; RV64-NEXT:    vsrl.vi v24, v8, 4
2784; RV64-NEXT:    vadd.vv v8, v8, v24
2785; RV64-NEXT:    vand.vx v8, v8, a3
2786; RV64-NEXT:    vmul.vx v8, v8, a4
2787; RV64-NEXT:    vsrl.vx v8, v8, a7
2788; RV64-NEXT:    ret
2789;
2790; CHECK-ZVBB-LABEL: vp_cttz_nxv16i64_unmasked:
2791; CHECK-ZVBB:       # %bb.0:
2792; CHECK-ZVBB-NEXT:    csrr a1, vlenb
2793; CHECK-ZVBB-NEXT:    sub a2, a0, a1
2794; CHECK-ZVBB-NEXT:    sltu a3, a0, a2
2795; CHECK-ZVBB-NEXT:    addi a3, a3, -1
2796; CHECK-ZVBB-NEXT:    and a2, a3, a2
2797; CHECK-ZVBB-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2798; CHECK-ZVBB-NEXT:    vctz.v v16, v16
2799; CHECK-ZVBB-NEXT:    bltu a0, a1, .LBB47_2
2800; CHECK-ZVBB-NEXT:  # %bb.1:
2801; CHECK-ZVBB-NEXT:    mv a0, a1
2802; CHECK-ZVBB-NEXT:  .LBB47_2:
2803; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2804; CHECK-ZVBB-NEXT:    vctz.v v8, v8
2805; CHECK-ZVBB-NEXT:    ret
2806  %v = call <vscale x 16 x i64> @llvm.vp.cttz.nxv16i64(<vscale x 16 x i64> %va, i1 false, <vscale x 16 x i1> splat (i1 true), i32 %evl)
2807  ret <vscale x 16 x i64> %v
2808}
2809
2810define <vscale x 1 x i8> @vp_cttz_zero_undef_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
2811; CHECK-LABEL: vp_cttz_zero_undef_nxv1i8:
2812; CHECK:       # %bb.0:
2813; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
2814; CHECK-NEXT:    vrsub.vi v9, v8, 0, v0.t
2815; CHECK-NEXT:    li a0, 127
2816; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
2817; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
2818; CHECK-NEXT:    vzext.vf2 v9, v8, v0.t
2819; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9, v0.t
2820; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
2821; CHECK-NEXT:    vsrl.vi v8, v8, 23, v0.t
2822; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
2823; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
2824; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
2825; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
2826; CHECK-NEXT:    vsub.vx v8, v8, a0, v0.t
2827; CHECK-NEXT:    ret
2828;
2829; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i8:
2830; CHECK-ZVBB:       # %bb.0:
2831; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
2832; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
2833; CHECK-ZVBB-NEXT:    ret
2834  %v = call <vscale x 1 x i8> @llvm.vp.cttz.nxv1i8(<vscale x 1 x i8> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
2835  ret <vscale x 1 x i8> %v
2836}
2837
2838define <vscale x 1 x i8> @vp_cttz_zero_undef_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zeroext %evl) {
2839; CHECK-LABEL: vp_cttz_zero_undef_nxv1i8_unmasked:
2840; CHECK:       # %bb.0:
2841; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
2842; CHECK-NEXT:    vrsub.vi v9, v8, 0
2843; CHECK-NEXT:    vand.vv v8, v8, v9
2844; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
2845; CHECK-NEXT:    vzext.vf2 v9, v8
2846; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
2847; CHECK-NEXT:    vnsrl.wi v8, v8, 23
2848; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
2849; CHECK-NEXT:    vnsrl.wi v8, v8, 0
2850; CHECK-NEXT:    li a0, 127
2851; CHECK-NEXT:    vsub.vx v8, v8, a0
2852; CHECK-NEXT:    ret
2853;
2854; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i8_unmasked:
2855; CHECK-ZVBB:       # %bb.0:
2856; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
2857; CHECK-ZVBB-NEXT:    vctz.v v8, v8
2858; CHECK-ZVBB-NEXT:    ret
2859  %v = call <vscale x 1 x i8> @llvm.vp.cttz.nxv1i8(<vscale x 1 x i8> %va, i1 true, <vscale x 1 x i1> splat (i1 true), i32 %evl)
2860  ret <vscale x 1 x i8> %v
2861}
2862
2863
2864define <vscale x 2 x i8> @vp_cttz_zero_undef_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
2865; CHECK-LABEL: vp_cttz_zero_undef_nxv2i8:
2866; CHECK:       # %bb.0:
2867; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
2868; CHECK-NEXT:    vrsub.vi v9, v8, 0, v0.t
2869; CHECK-NEXT:    li a0, 127
2870; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
2871; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
2872; CHECK-NEXT:    vzext.vf2 v9, v8, v0.t
2873; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9, v0.t
2874; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
2875; CHECK-NEXT:    vsrl.vi v8, v8, 23, v0.t
2876; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
2877; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
2878; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
2879; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
2880; CHECK-NEXT:    vsub.vx v8, v8, a0, v0.t
2881; CHECK-NEXT:    ret
2882;
2883; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i8:
2884; CHECK-ZVBB:       # %bb.0:
2885; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
2886; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
2887; CHECK-ZVBB-NEXT:    ret
2888  %v = call <vscale x 2 x i8> @llvm.vp.cttz.nxv2i8(<vscale x 2 x i8> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl)
2889  ret <vscale x 2 x i8> %v
2890}
2891
2892define <vscale x 2 x i8> @vp_cttz_zero_undef_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zeroext %evl) {
2893; CHECK-LABEL: vp_cttz_zero_undef_nxv2i8_unmasked:
2894; CHECK:       # %bb.0:
2895; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
2896; CHECK-NEXT:    vrsub.vi v9, v8, 0
2897; CHECK-NEXT:    vand.vv v8, v8, v9
2898; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
2899; CHECK-NEXT:    vzext.vf2 v9, v8
2900; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9
2901; CHECK-NEXT:    vnsrl.wi v8, v8, 23
2902; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
2903; CHECK-NEXT:    vnsrl.wi v8, v8, 0
2904; CHECK-NEXT:    li a0, 127
2905; CHECK-NEXT:    vsub.vx v8, v8, a0
2906; CHECK-NEXT:    ret
2907;
2908; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i8_unmasked:
2909; CHECK-ZVBB:       # %bb.0:
2910; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
2911; CHECK-ZVBB-NEXT:    vctz.v v8, v8
2912; CHECK-ZVBB-NEXT:    ret
2913  %v = call <vscale x 2 x i8> @llvm.vp.cttz.nxv2i8(<vscale x 2 x i8> %va, i1 true, <vscale x 2 x i1> splat (i1 true), i32 %evl)
2914  ret <vscale x 2 x i8> %v
2915}
2916
2917
2918define <vscale x 4 x i8> @vp_cttz_zero_undef_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
2919; CHECK-LABEL: vp_cttz_zero_undef_nxv4i8:
2920; CHECK:       # %bb.0:
2921; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
2922; CHECK-NEXT:    vrsub.vi v9, v8, 0, v0.t
2923; CHECK-NEXT:    li a0, 127
2924; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
2925; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
2926; CHECK-NEXT:    vzext.vf2 v9, v8, v0.t
2927; CHECK-NEXT:    vfwcvt.f.xu.v v10, v9, v0.t
2928; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
2929; CHECK-NEXT:    vsrl.vi v8, v10, 23, v0.t
2930; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
2931; CHECK-NEXT:    vnsrl.wi v10, v8, 0, v0.t
2932; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
2933; CHECK-NEXT:    vnsrl.wi v8, v10, 0, v0.t
2934; CHECK-NEXT:    vsub.vx v8, v8, a0, v0.t
2935; CHECK-NEXT:    ret
2936;
2937; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i8:
2938; CHECK-ZVBB:       # %bb.0:
2939; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
2940; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
2941; CHECK-ZVBB-NEXT:    ret
2942  %v = call <vscale x 4 x i8> @llvm.vp.cttz.nxv4i8(<vscale x 4 x i8> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
2943  ret <vscale x 4 x i8> %v
2944}
2945
2946define <vscale x 4 x i8> @vp_cttz_zero_undef_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zeroext %evl) {
2947; CHECK-LABEL: vp_cttz_zero_undef_nxv4i8_unmasked:
2948; CHECK:       # %bb.0:
2949; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
2950; CHECK-NEXT:    vrsub.vi v9, v8, 0
2951; CHECK-NEXT:    vand.vv v8, v8, v9
2952; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
2953; CHECK-NEXT:    vzext.vf2 v9, v8
2954; CHECK-NEXT:    vfwcvt.f.xu.v v10, v9
2955; CHECK-NEXT:    vnsrl.wi v8, v10, 23
2956; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
2957; CHECK-NEXT:    vnsrl.wi v8, v8, 0
2958; CHECK-NEXT:    li a0, 127
2959; CHECK-NEXT:    vsub.vx v8, v8, a0
2960; CHECK-NEXT:    ret
2961;
2962; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i8_unmasked:
2963; CHECK-ZVBB:       # %bb.0:
2964; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
2965; CHECK-ZVBB-NEXT:    vctz.v v8, v8
2966; CHECK-ZVBB-NEXT:    ret
2967  %v = call <vscale x 4 x i8> @llvm.vp.cttz.nxv4i8(<vscale x 4 x i8> %va, i1 true, <vscale x 4 x i1> splat (i1 true), i32 %evl)
2968  ret <vscale x 4 x i8> %v
2969}
2970
2971
2972define <vscale x 8 x i8> @vp_cttz_zero_undef_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2973; CHECK-LABEL: vp_cttz_zero_undef_nxv8i8:
2974; CHECK:       # %bb.0:
2975; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
2976; CHECK-NEXT:    vrsub.vi v9, v8, 0, v0.t
2977; CHECK-NEXT:    li a0, 127
2978; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
2979; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
2980; CHECK-NEXT:    vzext.vf2 v10, v8, v0.t
2981; CHECK-NEXT:    vfwcvt.f.xu.v v12, v10, v0.t
2982; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
2983; CHECK-NEXT:    vsrl.vi v8, v12, 23, v0.t
2984; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
2985; CHECK-NEXT:    vnsrl.wi v12, v8, 0, v0.t
2986; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
2987; CHECK-NEXT:    vnsrl.wi v8, v12, 0, v0.t
2988; CHECK-NEXT:    vsub.vx v8, v8, a0, v0.t
2989; CHECK-NEXT:    ret
2990;
2991; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i8:
2992; CHECK-ZVBB:       # %bb.0:
2993; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
2994; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
2995; CHECK-ZVBB-NEXT:    ret
2996  %v = call <vscale x 8 x i8> @llvm.vp.cttz.nxv8i8(<vscale x 8 x i8> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
2997  ret <vscale x 8 x i8> %v
2998}
2999
3000define <vscale x 8 x i8> @vp_cttz_zero_undef_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zeroext %evl) {
3001; CHECK-LABEL: vp_cttz_zero_undef_nxv8i8_unmasked:
3002; CHECK:       # %bb.0:
3003; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
3004; CHECK-NEXT:    vrsub.vi v9, v8, 0
3005; CHECK-NEXT:    vand.vv v8, v8, v9
3006; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
3007; CHECK-NEXT:    vzext.vf2 v10, v8
3008; CHECK-NEXT:    vfwcvt.f.xu.v v12, v10
3009; CHECK-NEXT:    vnsrl.wi v8, v12, 23
3010; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
3011; CHECK-NEXT:    vnsrl.wi v10, v8, 0
3012; CHECK-NEXT:    li a0, 127
3013; CHECK-NEXT:    vsub.vx v8, v10, a0
3014; CHECK-NEXT:    ret
3015;
3016; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i8_unmasked:
3017; CHECK-ZVBB:       # %bb.0:
3018; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
3019; CHECK-ZVBB-NEXT:    vctz.v v8, v8
3020; CHECK-ZVBB-NEXT:    ret
3021  %v = call <vscale x 8 x i8> @llvm.vp.cttz.nxv8i8(<vscale x 8 x i8> %va, i1 true, <vscale x 8 x i1> splat (i1 true), i32 %evl)
3022  ret <vscale x 8 x i8> %v
3023}
3024
3025
3026define <vscale x 16 x i8> @vp_cttz_zero_undef_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
3027; CHECK-LABEL: vp_cttz_zero_undef_nxv16i8:
3028; CHECK:       # %bb.0:
3029; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
3030; CHECK-NEXT:    vrsub.vi v10, v8, 0, v0.t
3031; CHECK-NEXT:    li a0, 127
3032; CHECK-NEXT:    vand.vv v8, v8, v10, v0.t
3033; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
3034; CHECK-NEXT:    vzext.vf2 v12, v8, v0.t
3035; CHECK-NEXT:    vfwcvt.f.xu.v v16, v12, v0.t
3036; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
3037; CHECK-NEXT:    vsrl.vi v8, v16, 23, v0.t
3038; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
3039; CHECK-NEXT:    vnsrl.wi v16, v8, 0, v0.t
3040; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
3041; CHECK-NEXT:    vnsrl.wi v8, v16, 0, v0.t
3042; CHECK-NEXT:    vsub.vx v8, v8, a0, v0.t
3043; CHECK-NEXT:    ret
3044;
3045; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i8:
3046; CHECK-ZVBB:       # %bb.0:
3047; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
3048; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
3049; CHECK-ZVBB-NEXT:    ret
3050  %v = call <vscale x 16 x i8> @llvm.vp.cttz.nxv16i8(<vscale x 16 x i8> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl)
3051  ret <vscale x 16 x i8> %v
3052}
3053
3054define <vscale x 16 x i8> @vp_cttz_zero_undef_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32 zeroext %evl) {
3055; CHECK-LABEL: vp_cttz_zero_undef_nxv16i8_unmasked:
3056; CHECK:       # %bb.0:
3057; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
3058; CHECK-NEXT:    vrsub.vi v10, v8, 0
3059; CHECK-NEXT:    vand.vv v8, v8, v10
3060; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
3061; CHECK-NEXT:    vzext.vf2 v12, v8
3062; CHECK-NEXT:    vfwcvt.f.xu.v v16, v12
3063; CHECK-NEXT:    vnsrl.wi v8, v16, 23
3064; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
3065; CHECK-NEXT:    vnsrl.wi v12, v8, 0
3066; CHECK-NEXT:    li a0, 127
3067; CHECK-NEXT:    vsub.vx v8, v12, a0
3068; CHECK-NEXT:    ret
3069;
3070; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i8_unmasked:
3071; CHECK-ZVBB:       # %bb.0:
3072; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
3073; CHECK-ZVBB-NEXT:    vctz.v v8, v8
3074; CHECK-ZVBB-NEXT:    ret
3075  %v = call <vscale x 16 x i8> @llvm.vp.cttz.nxv16i8(<vscale x 16 x i8> %va, i1 true, <vscale x 16 x i1> splat (i1 true), i32 %evl)
3076  ret <vscale x 16 x i8> %v
3077}
3078
3079
3080define <vscale x 32 x i8> @vp_cttz_zero_undef_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
3081; CHECK-LABEL: vp_cttz_zero_undef_nxv32i8:
3082; CHECK:       # %bb.0:
3083; CHECK-NEXT:    li a1, 1
3084; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
3085; CHECK-NEXT:    vsub.vx v12, v8, a1, v0.t
3086; CHECK-NEXT:    li a0, 85
3087; CHECK-NEXT:    vnot.v v8, v8, v0.t
3088; CHECK-NEXT:    vand.vv v8, v8, v12, v0.t
3089; CHECK-NEXT:    vsrl.vi v12, v8, 1, v0.t
3090; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
3091; CHECK-NEXT:    li a0, 51
3092; CHECK-NEXT:    vsub.vv v8, v8, v12, v0.t
3093; CHECK-NEXT:    vand.vx v12, v8, a0, v0.t
3094; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
3095; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
3096; CHECK-NEXT:    vadd.vv v8, v12, v8, v0.t
3097; CHECK-NEXT:    vsrl.vi v12, v8, 4, v0.t
3098; CHECK-NEXT:    vadd.vv v8, v8, v12, v0.t
3099; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
3100; CHECK-NEXT:    ret
3101;
3102; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv32i8:
3103; CHECK-ZVBB:       # %bb.0:
3104; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
3105; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
3106; CHECK-ZVBB-NEXT:    ret
3107  %v = call <vscale x 32 x i8> @llvm.vp.cttz.nxv32i8(<vscale x 32 x i8> %va, i1 true, <vscale x 32 x i1> %m, i32 %evl)
3108  ret <vscale x 32 x i8> %v
3109}
3110
3111define <vscale x 32 x i8> @vp_cttz_zero_undef_nxv32i8_unmasked(<vscale x 32 x i8> %va, i32 zeroext %evl) {
3112; CHECK-LABEL: vp_cttz_zero_undef_nxv32i8_unmasked:
3113; CHECK:       # %bb.0:
3114; CHECK-NEXT:    li a1, 1
3115; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
3116; CHECK-NEXT:    vnot.v v12, v8
3117; CHECK-NEXT:    vsub.vx v8, v8, a1
3118; CHECK-NEXT:    li a0, 85
3119; CHECK-NEXT:    vand.vv v8, v12, v8
3120; CHECK-NEXT:    vsrl.vi v12, v8, 1
3121; CHECK-NEXT:    vand.vx v12, v12, a0
3122; CHECK-NEXT:    li a0, 51
3123; CHECK-NEXT:    vsub.vv v8, v8, v12
3124; CHECK-NEXT:    vand.vx v12, v8, a0
3125; CHECK-NEXT:    vsrl.vi v8, v8, 2
3126; CHECK-NEXT:    vand.vx v8, v8, a0
3127; CHECK-NEXT:    vadd.vv v8, v12, v8
3128; CHECK-NEXT:    vsrl.vi v12, v8, 4
3129; CHECK-NEXT:    vadd.vv v8, v8, v12
3130; CHECK-NEXT:    vand.vi v8, v8, 15
3131; CHECK-NEXT:    ret
3132;
3133; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv32i8_unmasked:
3134; CHECK-ZVBB:       # %bb.0:
3135; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
3136; CHECK-ZVBB-NEXT:    vctz.v v8, v8
3137; CHECK-ZVBB-NEXT:    ret
3138  %v = call <vscale x 32 x i8> @llvm.vp.cttz.nxv32i8(<vscale x 32 x i8> %va, i1 true, <vscale x 32 x i1> splat (i1 true), i32 %evl)
3139  ret <vscale x 32 x i8> %v
3140}
3141
3142
3143define <vscale x 64 x i8> @vp_cttz_zero_undef_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) {
3144; CHECK-LABEL: vp_cttz_zero_undef_nxv64i8:
3145; CHECK:       # %bb.0:
3146; CHECK-NEXT:    li a1, 1
3147; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
3148; CHECK-NEXT:    vsub.vx v16, v8, a1, v0.t
3149; CHECK-NEXT:    li a0, 85
3150; CHECK-NEXT:    vnot.v v8, v8, v0.t
3151; CHECK-NEXT:    vand.vv v8, v8, v16, v0.t
3152; CHECK-NEXT:    vsrl.vi v16, v8, 1, v0.t
3153; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
3154; CHECK-NEXT:    li a0, 51
3155; CHECK-NEXT:    vsub.vv v8, v8, v16, v0.t
3156; CHECK-NEXT:    vand.vx v16, v8, a0, v0.t
3157; CHECK-NEXT:    vsrl.vi v8, v8, 2, v0.t
3158; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
3159; CHECK-NEXT:    vadd.vv v8, v16, v8, v0.t
3160; CHECK-NEXT:    vsrl.vi v16, v8, 4, v0.t
3161; CHECK-NEXT:    vadd.vv v8, v8, v16, v0.t
3162; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
3163; CHECK-NEXT:    ret
3164;
3165; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv64i8:
3166; CHECK-ZVBB:       # %bb.0:
3167; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
3168; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
3169; CHECK-ZVBB-NEXT:    ret
3170  %v = call <vscale x 64 x i8> @llvm.vp.cttz.nxv64i8(<vscale x 64 x i8> %va, i1 true, <vscale x 64 x i1> %m, i32 %evl)
3171  ret <vscale x 64 x i8> %v
3172}
3173
3174define <vscale x 64 x i8> @vp_cttz_zero_undef_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32 zeroext %evl) {
3175; CHECK-LABEL: vp_cttz_zero_undef_nxv64i8_unmasked:
3176; CHECK:       # %bb.0:
3177; CHECK-NEXT:    li a1, 1
3178; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
3179; CHECK-NEXT:    vnot.v v16, v8
3180; CHECK-NEXT:    vsub.vx v8, v8, a1
3181; CHECK-NEXT:    li a0, 85
3182; CHECK-NEXT:    vand.vv v8, v16, v8
3183; CHECK-NEXT:    vsrl.vi v16, v8, 1
3184; CHECK-NEXT:    vand.vx v16, v16, a0
3185; CHECK-NEXT:    li a0, 51
3186; CHECK-NEXT:    vsub.vv v8, v8, v16
3187; CHECK-NEXT:    vand.vx v16, v8, a0
3188; CHECK-NEXT:    vsrl.vi v8, v8, 2
3189; CHECK-NEXT:    vand.vx v8, v8, a0
3190; CHECK-NEXT:    vadd.vv v8, v16, v8
3191; CHECK-NEXT:    vsrl.vi v16, v8, 4
3192; CHECK-NEXT:    vadd.vv v8, v8, v16
3193; CHECK-NEXT:    vand.vi v8, v8, 15
3194; CHECK-NEXT:    ret
3195;
3196; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv64i8_unmasked:
3197; CHECK-ZVBB:       # %bb.0:
3198; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
3199; CHECK-ZVBB-NEXT:    vctz.v v8, v8
3200; CHECK-ZVBB-NEXT:    ret
3201  %v = call <vscale x 64 x i8> @llvm.vp.cttz.nxv64i8(<vscale x 64 x i8> %va, i1 true, <vscale x 64 x i1> splat (i1 true), i32 %evl)
3202  ret <vscale x 64 x i8> %v
3203}
3204
3205
3206define <vscale x 1 x i16> @vp_cttz_zero_undef_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
3207; CHECK-LABEL: vp_cttz_zero_undef_nxv1i16:
3208; CHECK:       # %bb.0:
3209; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
3210; CHECK-NEXT:    vrsub.vi v9, v8, 0, v0.t
3211; CHECK-NEXT:    li a0, 127
3212; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
3213; CHECK-NEXT:    vfwcvt.f.xu.v v9, v8, v0.t
3214; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
3215; CHECK-NEXT:    vsrl.vi v8, v9, 23, v0.t
3216; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
3217; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
3218; CHECK-NEXT:    vsub.vx v8, v8, a0, v0.t
3219; CHECK-NEXT:    ret
3220;
3221; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i16:
3222; CHECK-ZVBB:       # %bb.0:
3223; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
3224; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
3225; CHECK-ZVBB-NEXT:    ret
3226  %v = call <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
3227  ret <vscale x 1 x i16> %v
3228}
3229
3230define <vscale x 1 x i16> @vp_cttz_zero_undef_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) {
3231; CHECK-LABEL: vp_cttz_zero_undef_nxv1i16_unmasked:
3232; CHECK:       # %bb.0:
3233; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
3234; CHECK-NEXT:    vrsub.vi v9, v8, 0
3235; CHECK-NEXT:    vand.vv v8, v8, v9
3236; CHECK-NEXT:    vfwcvt.f.xu.v v9, v8
3237; CHECK-NEXT:    vnsrl.wi v8, v9, 23
3238; CHECK-NEXT:    li a0, 127
3239; CHECK-NEXT:    vsub.vx v8, v8, a0
3240; CHECK-NEXT:    ret
3241;
3242; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i16_unmasked:
3243; CHECK-ZVBB:       # %bb.0:
3244; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
3245; CHECK-ZVBB-NEXT:    vctz.v v8, v8
3246; CHECK-ZVBB-NEXT:    ret
3247  %v = call <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16> %va, i1 true, <vscale x 1 x i1> splat (i1 true), i32 %evl)
3248  ret <vscale x 1 x i16> %v
3249}
3250
3251
3252define <vscale x 2 x i16> @vp_cttz_zero_undef_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
3253; CHECK-LABEL: vp_cttz_zero_undef_nxv2i16:
3254; CHECK:       # %bb.0:
3255; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
3256; CHECK-NEXT:    vrsub.vi v9, v8, 0, v0.t
3257; CHECK-NEXT:    li a0, 127
3258; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
3259; CHECK-NEXT:    vfwcvt.f.xu.v v9, v8, v0.t
3260; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
3261; CHECK-NEXT:    vsrl.vi v8, v9, 23, v0.t
3262; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
3263; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
3264; CHECK-NEXT:    vsub.vx v8, v8, a0, v0.t
3265; CHECK-NEXT:    ret
3266;
3267; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i16:
3268; CHECK-ZVBB:       # %bb.0:
3269; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
3270; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
3271; CHECK-ZVBB-NEXT:    ret
3272  %v = call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl)
3273  ret <vscale x 2 x i16> %v
3274}
3275
3276define <vscale x 2 x i16> @vp_cttz_zero_undef_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) {
3277; CHECK-LABEL: vp_cttz_zero_undef_nxv2i16_unmasked:
3278; CHECK:       # %bb.0:
3279; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
3280; CHECK-NEXT:    vrsub.vi v9, v8, 0
3281; CHECK-NEXT:    vand.vv v8, v8, v9
3282; CHECK-NEXT:    vfwcvt.f.xu.v v9, v8
3283; CHECK-NEXT:    vnsrl.wi v8, v9, 23
3284; CHECK-NEXT:    li a0, 127
3285; CHECK-NEXT:    vsub.vx v8, v8, a0
3286; CHECK-NEXT:    ret
3287;
3288; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i16_unmasked:
3289; CHECK-ZVBB:       # %bb.0:
3290; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
3291; CHECK-ZVBB-NEXT:    vctz.v v8, v8
3292; CHECK-ZVBB-NEXT:    ret
3293  %v = call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> %va, i1 true, <vscale x 2 x i1> splat (i1 true), i32 %evl)
3294  ret <vscale x 2 x i16> %v
3295}
3296
3297
3298define <vscale x 4 x i16> @vp_cttz_zero_undef_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
3299; CHECK-LABEL: vp_cttz_zero_undef_nxv4i16:
3300; CHECK:       # %bb.0:
3301; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
3302; CHECK-NEXT:    vrsub.vi v9, v8, 0, v0.t
3303; CHECK-NEXT:    li a0, 127
3304; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
3305; CHECK-NEXT:    vfwcvt.f.xu.v v10, v8, v0.t
3306; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
3307; CHECK-NEXT:    vsrl.vi v8, v10, 23, v0.t
3308; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
3309; CHECK-NEXT:    vnsrl.wi v10, v8, 0, v0.t
3310; CHECK-NEXT:    vsub.vx v8, v10, a0, v0.t
3311; CHECK-NEXT:    ret
3312;
3313; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i16:
3314; CHECK-ZVBB:       # %bb.0:
3315; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
3316; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
3317; CHECK-ZVBB-NEXT:    ret
3318  %v = call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
3319  ret <vscale x 4 x i16> %v
3320}
3321
3322define <vscale x 4 x i16> @vp_cttz_zero_undef_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) {
3323; CHECK-LABEL: vp_cttz_zero_undef_nxv4i16_unmasked:
3324; CHECK:       # %bb.0:
3325; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
3326; CHECK-NEXT:    vrsub.vi v9, v8, 0
3327; CHECK-NEXT:    vand.vv v8, v8, v9
3328; CHECK-NEXT:    vfwcvt.f.xu.v v10, v8
3329; CHECK-NEXT:    vnsrl.wi v8, v10, 23
3330; CHECK-NEXT:    li a0, 127
3331; CHECK-NEXT:    vsub.vx v8, v8, a0
3332; CHECK-NEXT:    ret
3333;
3334; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i16_unmasked:
3335; CHECK-ZVBB:       # %bb.0:
3336; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
3337; CHECK-ZVBB-NEXT:    vctz.v v8, v8
3338; CHECK-ZVBB-NEXT:    ret
3339  %v = call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> %va, i1 true, <vscale x 4 x i1> splat (i1 true), i32 %evl)
3340  ret <vscale x 4 x i16> %v
3341}
3342
3343
3344define <vscale x 8 x i16> @vp_cttz_zero_undef_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
3345; CHECK-LABEL: vp_cttz_zero_undef_nxv8i16:
3346; CHECK:       # %bb.0:
3347; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
3348; CHECK-NEXT:    vrsub.vi v10, v8, 0, v0.t
3349; CHECK-NEXT:    li a0, 127
3350; CHECK-NEXT:    vand.vv v8, v8, v10, v0.t
3351; CHECK-NEXT:    vfwcvt.f.xu.v v12, v8, v0.t
3352; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
3353; CHECK-NEXT:    vsrl.vi v8, v12, 23, v0.t
3354; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
3355; CHECK-NEXT:    vnsrl.wi v12, v8, 0, v0.t
3356; CHECK-NEXT:    vsub.vx v8, v12, a0, v0.t
3357; CHECK-NEXT:    ret
3358;
3359; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i16:
3360; CHECK-ZVBB:       # %bb.0:
3361; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
3362; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
3363; CHECK-ZVBB-NEXT:    ret
3364  %v = call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
3365  ret <vscale x 8 x i16> %v
3366}
3367
3368define <vscale x 8 x i16> @vp_cttz_zero_undef_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) {
3369; CHECK-LABEL: vp_cttz_zero_undef_nxv8i16_unmasked:
3370; CHECK:       # %bb.0:
3371; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
3372; CHECK-NEXT:    vrsub.vi v10, v8, 0
3373; CHECK-NEXT:    vand.vv v8, v8, v10
3374; CHECK-NEXT:    vfwcvt.f.xu.v v12, v8
3375; CHECK-NEXT:    vnsrl.wi v8, v12, 23
3376; CHECK-NEXT:    li a0, 127
3377; CHECK-NEXT:    vsub.vx v8, v8, a0
3378; CHECK-NEXT:    ret
3379;
3380; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i16_unmasked:
3381; CHECK-ZVBB:       # %bb.0:
3382; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
3383; CHECK-ZVBB-NEXT:    vctz.v v8, v8
3384; CHECK-ZVBB-NEXT:    ret
3385  %v = call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> %va, i1 true, <vscale x 8 x i1> splat (i1 true), i32 %evl)
3386  ret <vscale x 8 x i16> %v
3387}
3388
3389
3390define <vscale x 16 x i16> @vp_cttz_zero_undef_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
3391; CHECK-LABEL: vp_cttz_zero_undef_nxv16i16:
3392; CHECK:       # %bb.0:
3393; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
3394; CHECK-NEXT:    vrsub.vi v12, v8, 0, v0.t
3395; CHECK-NEXT:    li a0, 127
3396; CHECK-NEXT:    vand.vv v8, v8, v12, v0.t
3397; CHECK-NEXT:    vfwcvt.f.xu.v v16, v8, v0.t
3398; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
3399; CHECK-NEXT:    vsrl.vi v8, v16, 23, v0.t
3400; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
3401; CHECK-NEXT:    vnsrl.wi v16, v8, 0, v0.t
3402; CHECK-NEXT:    vsub.vx v8, v16, a0, v0.t
3403; CHECK-NEXT:    ret
3404;
3405; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i16:
3406; CHECK-ZVBB:       # %bb.0:
3407; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
3408; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
3409; CHECK-ZVBB-NEXT:    ret
3410  %v = call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl)
3411  ret <vscale x 16 x i16> %v
3412}
3413
3414define <vscale x 16 x i16> @vp_cttz_zero_undef_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) {
3415; CHECK-LABEL: vp_cttz_zero_undef_nxv16i16_unmasked:
3416; CHECK:       # %bb.0:
3417; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
3418; CHECK-NEXT:    vrsub.vi v12, v8, 0
3419; CHECK-NEXT:    vand.vv v8, v8, v12
3420; CHECK-NEXT:    vfwcvt.f.xu.v v16, v8
3421; CHECK-NEXT:    vnsrl.wi v8, v16, 23
3422; CHECK-NEXT:    li a0, 127
3423; CHECK-NEXT:    vsub.vx v8, v8, a0
3424; CHECK-NEXT:    ret
3425;
3426; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i16_unmasked:
3427; CHECK-ZVBB:       # %bb.0:
3428; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
3429; CHECK-ZVBB-NEXT:    vctz.v v8, v8
3430; CHECK-ZVBB-NEXT:    ret
3431  %v = call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> %va, i1 true, <vscale x 16 x i1> splat (i1 true), i32 %evl)
3432  ret <vscale x 16 x i16> %v
3433}
3434
3435
3436define <vscale x 32 x i16> @vp_cttz_zero_undef_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
3437; CHECK-LABEL: vp_cttz_zero_undef_nxv32i16:
3438; CHECK:       # %bb.0:
3439; CHECK-NEXT:    li a1, 1
3440; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
3441; CHECK-NEXT:    vsub.vx v16, v8, a1, v0.t
3442; CHECK-NEXT:    lui a0, 5
3443; CHECK-NEXT:    vnot.v v8, v8, v0.t
3444; CHECK-NEXT:    addi a0, a0, 1365
3445; CHECK-NEXT:    vand.vv v8, v8, v16, v0.t
3446; CHECK-NEXT:    vsrl.vi v16, v8, 1, v0.t
3447; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
3448; CHECK-NEXT:    lui a0, 3
3449; CHECK-NEXT:    addi a0, a0, 819
3450; CHECK-NEXT:    vsub.vv v16, v8, v16, v0.t
3451; CHECK-NEXT:    vand.vx v8, v16, a0, v0.t
3452; CHECK-NEXT:    vsrl.vi v16, v16, 2, v0.t
3453; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
3454; CHECK-NEXT:    lui a0, 1
3455; CHECK-NEXT:    addi a0, a0, -241
3456; CHECK-NEXT:    vadd.vv v8, v8, v16, v0.t
3457; CHECK-NEXT:    vsrl.vi v16, v8, 4, v0.t
3458; CHECK-NEXT:    vadd.vv v8, v8, v16, v0.t
3459; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
3460; CHECK-NEXT:    li a0, 257
3461; CHECK-NEXT:    vmul.vx v8, v8, a0, v0.t
3462; CHECK-NEXT:    vsrl.vi v8, v8, 8, v0.t
3463; CHECK-NEXT:    ret
3464;
3465; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv32i16:
3466; CHECK-ZVBB:       # %bb.0:
3467; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
3468; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
3469; CHECK-ZVBB-NEXT:    ret
3470  %v = call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> %va, i1 true, <vscale x 32 x i1> %m, i32 %evl)
3471  ret <vscale x 32 x i16> %v
3472}
3473
3474define <vscale x 32 x i16> @vp_cttz_zero_undef_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) {
3475; CHECK-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked:
3476; CHECK:       # %bb.0:
3477; CHECK-NEXT:    li a1, 1
3478; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
3479; CHECK-NEXT:    vnot.v v16, v8
3480; CHECK-NEXT:    vsub.vx v8, v8, a1
3481; CHECK-NEXT:    lui a0, 5
3482; CHECK-NEXT:    addi a0, a0, 1365
3483; CHECK-NEXT:    vand.vv v8, v16, v8
3484; CHECK-NEXT:    vsrl.vi v16, v8, 1
3485; CHECK-NEXT:    vand.vx v16, v16, a0
3486; CHECK-NEXT:    lui a0, 3
3487; CHECK-NEXT:    addi a0, a0, 819
3488; CHECK-NEXT:    vsub.vv v8, v8, v16
3489; CHECK-NEXT:    vand.vx v16, v8, a0
3490; CHECK-NEXT:    vsrl.vi v8, v8, 2
3491; CHECK-NEXT:    vand.vx v8, v8, a0
3492; CHECK-NEXT:    lui a0, 1
3493; CHECK-NEXT:    addi a0, a0, -241
3494; CHECK-NEXT:    vadd.vv v8, v16, v8
3495; CHECK-NEXT:    vsrl.vi v16, v8, 4
3496; CHECK-NEXT:    vadd.vv v8, v8, v16
3497; CHECK-NEXT:    vand.vx v8, v8, a0
3498; CHECK-NEXT:    li a0, 257
3499; CHECK-NEXT:    vmul.vx v8, v8, a0
3500; CHECK-NEXT:    vsrl.vi v8, v8, 8
3501; CHECK-NEXT:    ret
3502;
3503; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked:
3504; CHECK-ZVBB:       # %bb.0:
3505; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
3506; CHECK-ZVBB-NEXT:    vctz.v v8, v8
3507; CHECK-ZVBB-NEXT:    ret
3508  %v = call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> %va, i1 true, <vscale x 32 x i1> splat (i1 true), i32 %evl)
3509  ret <vscale x 32 x i16> %v
3510}
3511
3512
3513define <vscale x 1 x i32> @vp_cttz_zero_undef_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
3514; CHECK-LABEL: vp_cttz_zero_undef_nxv1i32:
3515; CHECK:       # %bb.0:
3516; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
3517; CHECK-NEXT:    vrsub.vi v9, v8, 0, v0.t
3518; CHECK-NEXT:    li a0, 52
3519; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
3520; CHECK-NEXT:    vfwcvt.f.xu.v v9, v8, v0.t
3521; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
3522; CHECK-NEXT:    vsrl.vx v8, v9, a0, v0.t
3523; CHECK-NEXT:    li a0, 1023
3524; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
3525; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
3526; CHECK-NEXT:    vsub.vx v8, v8, a0, v0.t
3527; CHECK-NEXT:    ret
3528;
3529; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i32:
3530; CHECK-ZVBB:       # %bb.0:
3531; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
3532; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
3533; CHECK-ZVBB-NEXT:    ret
3534  %v = call <vscale x 1 x i32> @llvm.vp.cttz.nxv1i32(<vscale x 1 x i32> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
3535  ret <vscale x 1 x i32> %v
3536}
3537
3538define <vscale x 1 x i32> @vp_cttz_zero_undef_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) {
3539; CHECK-LABEL: vp_cttz_zero_undef_nxv1i32_unmasked:
3540; CHECK:       # %bb.0:
3541; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
3542; CHECK-NEXT:    vrsub.vi v9, v8, 0
3543; CHECK-NEXT:    li a0, 52
3544; CHECK-NEXT:    vand.vv v8, v8, v9
3545; CHECK-NEXT:    vfwcvt.f.xu.v v9, v8
3546; CHECK-NEXT:    vnsrl.wx v8, v9, a0
3547; CHECK-NEXT:    li a0, 1023
3548; CHECK-NEXT:    vsub.vx v8, v8, a0
3549; CHECK-NEXT:    ret
3550;
3551; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i32_unmasked:
3552; CHECK-ZVBB:       # %bb.0:
3553; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
3554; CHECK-ZVBB-NEXT:    vctz.v v8, v8
3555; CHECK-ZVBB-NEXT:    ret
3556  %v = call <vscale x 1 x i32> @llvm.vp.cttz.nxv1i32(<vscale x 1 x i32> %va, i1 true, <vscale x 1 x i1> splat (i1 true), i32 %evl)
3557  ret <vscale x 1 x i32> %v
3558}
3559
3560
3561define <vscale x 2 x i32> @vp_cttz_zero_undef_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
3562; CHECK-LABEL: vp_cttz_zero_undef_nxv2i32:
3563; CHECK:       # %bb.0:
3564; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
3565; CHECK-NEXT:    vrsub.vi v9, v8, 0, v0.t
3566; CHECK-NEXT:    li a0, 52
3567; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
3568; CHECK-NEXT:    vfwcvt.f.xu.v v10, v8, v0.t
3569; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
3570; CHECK-NEXT:    vsrl.vx v8, v10, a0, v0.t
3571; CHECK-NEXT:    li a0, 1023
3572; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
3573; CHECK-NEXT:    vnsrl.wi v10, v8, 0, v0.t
3574; CHECK-NEXT:    vsub.vx v8, v10, a0, v0.t
3575; CHECK-NEXT:    ret
3576;
3577; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i32:
3578; CHECK-ZVBB:       # %bb.0:
3579; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
3580; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
3581; CHECK-ZVBB-NEXT:    ret
3582  %v = call <vscale x 2 x i32> @llvm.vp.cttz.nxv2i32(<vscale x 2 x i32> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl)
3583  ret <vscale x 2 x i32> %v
3584}
3585
3586define <vscale x 2 x i32> @vp_cttz_zero_undef_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) {
3587; CHECK-LABEL: vp_cttz_zero_undef_nxv2i32_unmasked:
3588; CHECK:       # %bb.0:
3589; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
3590; CHECK-NEXT:    vrsub.vi v9, v8, 0
3591; CHECK-NEXT:    li a0, 52
3592; CHECK-NEXT:    vand.vv v8, v8, v9
3593; CHECK-NEXT:    vfwcvt.f.xu.v v10, v8
3594; CHECK-NEXT:    vnsrl.wx v8, v10, a0
3595; CHECK-NEXT:    li a0, 1023
3596; CHECK-NEXT:    vsub.vx v8, v8, a0
3597; CHECK-NEXT:    ret
3598;
3599; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i32_unmasked:
3600; CHECK-ZVBB:       # %bb.0:
3601; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
3602; CHECK-ZVBB-NEXT:    vctz.v v8, v8
3603; CHECK-ZVBB-NEXT:    ret
3604  %v = call <vscale x 2 x i32> @llvm.vp.cttz.nxv2i32(<vscale x 2 x i32> %va, i1 true, <vscale x 2 x i1> splat (i1 true), i32 %evl)
3605  ret <vscale x 2 x i32> %v
3606}
3607
3608
3609define <vscale x 4 x i32> @vp_cttz_zero_undef_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
3610; CHECK-LABEL: vp_cttz_zero_undef_nxv4i32:
3611; CHECK:       # %bb.0:
3612; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
3613; CHECK-NEXT:    vrsub.vi v10, v8, 0, v0.t
3614; CHECK-NEXT:    li a0, 52
3615; CHECK-NEXT:    vand.vv v8, v8, v10, v0.t
3616; CHECK-NEXT:    vfwcvt.f.xu.v v12, v8, v0.t
3617; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
3618; CHECK-NEXT:    vsrl.vx v8, v12, a0, v0.t
3619; CHECK-NEXT:    li a0, 1023
3620; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
3621; CHECK-NEXT:    vnsrl.wi v12, v8, 0, v0.t
3622; CHECK-NEXT:    vsub.vx v8, v12, a0, v0.t
3623; CHECK-NEXT:    ret
3624;
3625; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i32:
3626; CHECK-ZVBB:       # %bb.0:
3627; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
3628; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
3629; CHECK-ZVBB-NEXT:    ret
3630  %v = call <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
3631  ret <vscale x 4 x i32> %v
3632}
3633
3634define <vscale x 4 x i32> @vp_cttz_zero_undef_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) {
3635; CHECK-LABEL: vp_cttz_zero_undef_nxv4i32_unmasked:
3636; CHECK:       # %bb.0:
3637; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
3638; CHECK-NEXT:    vrsub.vi v10, v8, 0
3639; CHECK-NEXT:    li a0, 52
3640; CHECK-NEXT:    vand.vv v8, v8, v10
3641; CHECK-NEXT:    vfwcvt.f.xu.v v12, v8
3642; CHECK-NEXT:    vnsrl.wx v8, v12, a0
3643; CHECK-NEXT:    li a0, 1023
3644; CHECK-NEXT:    vsub.vx v8, v8, a0
3645; CHECK-NEXT:    ret
3646;
3647; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i32_unmasked:
3648; CHECK-ZVBB:       # %bb.0:
3649; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
3650; CHECK-ZVBB-NEXT:    vctz.v v8, v8
3651; CHECK-ZVBB-NEXT:    ret
3652  %v = call <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32> %va, i1 true, <vscale x 4 x i1> splat (i1 true), i32 %evl)
3653  ret <vscale x 4 x i32> %v
3654}
3655
3656
3657define <vscale x 8 x i32> @vp_cttz_zero_undef_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
3658; CHECK-LABEL: vp_cttz_zero_undef_nxv8i32:
3659; CHECK:       # %bb.0:
3660; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
3661; CHECK-NEXT:    vrsub.vi v12, v8, 0, v0.t
3662; CHECK-NEXT:    li a0, 52
3663; CHECK-NEXT:    vand.vv v8, v8, v12, v0.t
3664; CHECK-NEXT:    vfwcvt.f.xu.v v16, v8, v0.t
3665; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
3666; CHECK-NEXT:    vsrl.vx v8, v16, a0, v0.t
3667; CHECK-NEXT:    li a0, 1023
3668; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
3669; CHECK-NEXT:    vnsrl.wi v16, v8, 0, v0.t
3670; CHECK-NEXT:    vsub.vx v8, v16, a0, v0.t
3671; CHECK-NEXT:    ret
3672;
3673; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i32:
3674; CHECK-ZVBB:       # %bb.0:
3675; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
3676; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
3677; CHECK-ZVBB-NEXT:    ret
3678  %v = call <vscale x 8 x i32> @llvm.vp.cttz.nxv8i32(<vscale x 8 x i32> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
3679  ret <vscale x 8 x i32> %v
3680}
3681
3682define <vscale x 8 x i32> @vp_cttz_zero_undef_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) {
3683; CHECK-LABEL: vp_cttz_zero_undef_nxv8i32_unmasked:
3684; CHECK:       # %bb.0:
3685; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
3686; CHECK-NEXT:    vrsub.vi v12, v8, 0
3687; CHECK-NEXT:    li a0, 52
3688; CHECK-NEXT:    vand.vv v8, v8, v12
3689; CHECK-NEXT:    vfwcvt.f.xu.v v16, v8
3690; CHECK-NEXT:    vnsrl.wx v8, v16, a0
3691; CHECK-NEXT:    li a0, 1023
3692; CHECK-NEXT:    vsub.vx v8, v8, a0
3693; CHECK-NEXT:    ret
3694;
3695; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i32_unmasked:
3696; CHECK-ZVBB:       # %bb.0:
3697; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
3698; CHECK-ZVBB-NEXT:    vctz.v v8, v8
3699; CHECK-ZVBB-NEXT:    ret
3700  %v = call <vscale x 8 x i32> @llvm.vp.cttz.nxv8i32(<vscale x 8 x i32> %va, i1 true, <vscale x 8 x i1> splat (i1 true), i32 %evl)
3701  ret <vscale x 8 x i32> %v
3702}
3703
3704
3705define <vscale x 16 x i32> @vp_cttz_zero_undef_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
3706; CHECK-LABEL: vp_cttz_zero_undef_nxv16i32:
3707; CHECK:       # %bb.0:
3708; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
3709; CHECK-NEXT:    vrsub.vi v16, v8, 0, v0.t
3710; CHECK-NEXT:    fsrmi a0, 1
3711; CHECK-NEXT:    li a1, 127
3712; CHECK-NEXT:    vand.vv v8, v8, v16, v0.t
3713; CHECK-NEXT:    vfcvt.f.xu.v v8, v8, v0.t
3714; CHECK-NEXT:    vsrl.vi v8, v8, 23, v0.t
3715; CHECK-NEXT:    vsub.vx v8, v8, a1, v0.t
3716; CHECK-NEXT:    fsrm a0
3717; CHECK-NEXT:    ret
3718;
3719; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i32:
3720; CHECK-ZVBB:       # %bb.0:
3721; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
3722; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
3723; CHECK-ZVBB-NEXT:    ret
3724  %v = call <vscale x 16 x i32> @llvm.vp.cttz.nxv16i32(<vscale x 16 x i32> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl)
3725  ret <vscale x 16 x i32> %v
3726}
3727
3728define <vscale x 16 x i32> @vp_cttz_zero_undef_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) {
3729; CHECK-LABEL: vp_cttz_zero_undef_nxv16i32_unmasked:
3730; CHECK:       # %bb.0:
3731; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
3732; CHECK-NEXT:    vrsub.vi v16, v8, 0
3733; CHECK-NEXT:    fsrmi a0, 1
3734; CHECK-NEXT:    vand.vv v8, v8, v16
3735; CHECK-NEXT:    vfcvt.f.xu.v v8, v8
3736; CHECK-NEXT:    vsrl.vi v8, v8, 23
3737; CHECK-NEXT:    li a1, 127
3738; CHECK-NEXT:    vsub.vx v8, v8, a1
3739; CHECK-NEXT:    fsrm a0
3740; CHECK-NEXT:    ret
3741;
3742; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i32_unmasked:
3743; CHECK-ZVBB:       # %bb.0:
3744; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
3745; CHECK-ZVBB-NEXT:    vctz.v v8, v8
3746; CHECK-ZVBB-NEXT:    ret
3747  %v = call <vscale x 16 x i32> @llvm.vp.cttz.nxv16i32(<vscale x 16 x i32> %va, i1 true, <vscale x 16 x i1> splat (i1 true), i32 %evl)
3748  ret <vscale x 16 x i32> %v
3749}
3750
3751
3752define <vscale x 1 x i64> @vp_cttz_zero_undef_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
3753; CHECK-LABEL: vp_cttz_zero_undef_nxv1i64:
3754; CHECK:       # %bb.0:
3755; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
3756; CHECK-NEXT:    vrsub.vi v9, v8, 0, v0.t
3757; CHECK-NEXT:    fsrmi a0, 1
3758; CHECK-NEXT:    li a1, 52
3759; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
3760; CHECK-NEXT:    vfcvt.f.xu.v v8, v8, v0.t
3761; CHECK-NEXT:    vsrl.vx v8, v8, a1, v0.t
3762; CHECK-NEXT:    li a1, 1023
3763; CHECK-NEXT:    vsub.vx v8, v8, a1, v0.t
3764; CHECK-NEXT:    fsrm a0
3765; CHECK-NEXT:    ret
3766;
3767; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i64:
3768; CHECK-ZVBB:       # %bb.0:
3769; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
3770; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
3771; CHECK-ZVBB-NEXT:    ret
3772  %v = call <vscale x 1 x i64> @llvm.vp.cttz.nxv1i64(<vscale x 1 x i64> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
3773  ret <vscale x 1 x i64> %v
3774}
3775
3776define <vscale x 1 x i64> @vp_cttz_zero_undef_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) {
3777; CHECK-LABEL: vp_cttz_zero_undef_nxv1i64_unmasked:
3778; CHECK:       # %bb.0:
3779; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
3780; CHECK-NEXT:    vrsub.vi v9, v8, 0
3781; CHECK-NEXT:    fsrmi a0, 1
3782; CHECK-NEXT:    li a1, 52
3783; CHECK-NEXT:    vand.vv v8, v8, v9
3784; CHECK-NEXT:    vfcvt.f.xu.v v8, v8
3785; CHECK-NEXT:    vsrl.vx v8, v8, a1
3786; CHECK-NEXT:    li a1, 1023
3787; CHECK-NEXT:    vsub.vx v8, v8, a1
3788; CHECK-NEXT:    fsrm a0
3789; CHECK-NEXT:    ret
3790;
3791; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i64_unmasked:
3792; CHECK-ZVBB:       # %bb.0:
3793; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
3794; CHECK-ZVBB-NEXT:    vctz.v v8, v8
3795; CHECK-ZVBB-NEXT:    ret
3796  %v = call <vscale x 1 x i64> @llvm.vp.cttz.nxv1i64(<vscale x 1 x i64> %va, i1 true, <vscale x 1 x i1> splat (i1 true), i32 %evl)
3797  ret <vscale x 1 x i64> %v
3798}
3799
3800
3801define <vscale x 2 x i64> @vp_cttz_zero_undef_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
3802; CHECK-LABEL: vp_cttz_zero_undef_nxv2i64:
3803; CHECK:       # %bb.0:
3804; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
3805; CHECK-NEXT:    vrsub.vi v10, v8, 0, v0.t
3806; CHECK-NEXT:    fsrmi a0, 1
3807; CHECK-NEXT:    li a1, 52
3808; CHECK-NEXT:    vand.vv v8, v8, v10, v0.t
3809; CHECK-NEXT:    vfcvt.f.xu.v v8, v8, v0.t
3810; CHECK-NEXT:    vsrl.vx v8, v8, a1, v0.t
3811; CHECK-NEXT:    li a1, 1023
3812; CHECK-NEXT:    vsub.vx v8, v8, a1, v0.t
3813; CHECK-NEXT:    fsrm a0
3814; CHECK-NEXT:    ret
3815;
3816; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i64:
3817; CHECK-ZVBB:       # %bb.0:
3818; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
3819; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
3820; CHECK-ZVBB-NEXT:    ret
3821  %v = call <vscale x 2 x i64> @llvm.vp.cttz.nxv2i64(<vscale x 2 x i64> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl)
3822  ret <vscale x 2 x i64> %v
3823}
3824
3825define <vscale x 2 x i64> @vp_cttz_zero_undef_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) {
3826; CHECK-LABEL: vp_cttz_zero_undef_nxv2i64_unmasked:
3827; CHECK:       # %bb.0:
3828; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
3829; CHECK-NEXT:    vrsub.vi v10, v8, 0
3830; CHECK-NEXT:    fsrmi a0, 1
3831; CHECK-NEXT:    li a1, 52
3832; CHECK-NEXT:    vand.vv v8, v8, v10
3833; CHECK-NEXT:    vfcvt.f.xu.v v8, v8
3834; CHECK-NEXT:    vsrl.vx v8, v8, a1
3835; CHECK-NEXT:    li a1, 1023
3836; CHECK-NEXT:    vsub.vx v8, v8, a1
3837; CHECK-NEXT:    fsrm a0
3838; CHECK-NEXT:    ret
3839;
3840; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i64_unmasked:
3841; CHECK-ZVBB:       # %bb.0:
3842; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
3843; CHECK-ZVBB-NEXT:    vctz.v v8, v8
3844; CHECK-ZVBB-NEXT:    ret
3845  %v = call <vscale x 2 x i64> @llvm.vp.cttz.nxv2i64(<vscale x 2 x i64> %va, i1 true, <vscale x 2 x i1> splat (i1 true), i32 %evl)
3846  ret <vscale x 2 x i64> %v
3847}
3848
3849
3850define <vscale x 4 x i64> @vp_cttz_zero_undef_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
3851; CHECK-LABEL: vp_cttz_zero_undef_nxv4i64:
3852; CHECK:       # %bb.0:
3853; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
3854; CHECK-NEXT:    vrsub.vi v12, v8, 0, v0.t
3855; CHECK-NEXT:    fsrmi a0, 1
3856; CHECK-NEXT:    li a1, 52
3857; CHECK-NEXT:    vand.vv v8, v8, v12, v0.t
3858; CHECK-NEXT:    vfcvt.f.xu.v v8, v8, v0.t
3859; CHECK-NEXT:    vsrl.vx v8, v8, a1, v0.t
3860; CHECK-NEXT:    li a1, 1023
3861; CHECK-NEXT:    vsub.vx v8, v8, a1, v0.t
3862; CHECK-NEXT:    fsrm a0
3863; CHECK-NEXT:    ret
3864;
3865; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i64:
3866; CHECK-ZVBB:       # %bb.0:
3867; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
3868; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
3869; CHECK-ZVBB-NEXT:    ret
3870  %v = call <vscale x 4 x i64> @llvm.vp.cttz.nxv4i64(<vscale x 4 x i64> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl)
3871  ret <vscale x 4 x i64> %v
3872}
3873
3874define <vscale x 4 x i64> @vp_cttz_zero_undef_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) {
3875; CHECK-LABEL: vp_cttz_zero_undef_nxv4i64_unmasked:
3876; CHECK:       # %bb.0:
3877; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
3878; CHECK-NEXT:    vrsub.vi v12, v8, 0
3879; CHECK-NEXT:    fsrmi a0, 1
3880; CHECK-NEXT:    li a1, 52
3881; CHECK-NEXT:    vand.vv v8, v8, v12
3882; CHECK-NEXT:    vfcvt.f.xu.v v8, v8
3883; CHECK-NEXT:    vsrl.vx v8, v8, a1
3884; CHECK-NEXT:    li a1, 1023
3885; CHECK-NEXT:    vsub.vx v8, v8, a1
3886; CHECK-NEXT:    fsrm a0
3887; CHECK-NEXT:    ret
3888;
3889; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i64_unmasked:
3890; CHECK-ZVBB:       # %bb.0:
3891; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
3892; CHECK-ZVBB-NEXT:    vctz.v v8, v8
3893; CHECK-ZVBB-NEXT:    ret
3894  %v = call <vscale x 4 x i64> @llvm.vp.cttz.nxv4i64(<vscale x 4 x i64> %va, i1 true, <vscale x 4 x i1> splat (i1 true), i32 %evl)
3895  ret <vscale x 4 x i64> %v
3896}
3897
3898
3899define <vscale x 7 x i64> @vp_cttz_zero_undef_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
3900; CHECK-LABEL: vp_cttz_zero_undef_nxv7i64:
3901; CHECK:       # %bb.0:
3902; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
3903; CHECK-NEXT:    vrsub.vi v16, v8, 0, v0.t
3904; CHECK-NEXT:    fsrmi a0, 1
3905; CHECK-NEXT:    li a1, 52
3906; CHECK-NEXT:    vand.vv v8, v8, v16, v0.t
3907; CHECK-NEXT:    vfcvt.f.xu.v v8, v8, v0.t
3908; CHECK-NEXT:    vsrl.vx v8, v8, a1, v0.t
3909; CHECK-NEXT:    li a1, 1023
3910; CHECK-NEXT:    vsub.vx v8, v8, a1, v0.t
3911; CHECK-NEXT:    fsrm a0
3912; CHECK-NEXT:    ret
3913;
3914; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv7i64:
3915; CHECK-ZVBB:       # %bb.0:
3916; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
3917; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
3918; CHECK-ZVBB-NEXT:    ret
3919  %v = call <vscale x 7 x i64> @llvm.vp.cttz.nxv7i64(<vscale x 7 x i64> %va, i1 true, <vscale x 7 x i1> %m, i32 %evl)
3920  ret <vscale x 7 x i64> %v
3921}
3922
3923define <vscale x 7 x i64> @vp_cttz_zero_undef_nxv7i64_unmasked(<vscale x 7 x i64> %va, i32 zeroext %evl) {
3924; CHECK-LABEL: vp_cttz_zero_undef_nxv7i64_unmasked:
3925; CHECK:       # %bb.0:
3926; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
3927; CHECK-NEXT:    vrsub.vi v16, v8, 0
3928; CHECK-NEXT:    fsrmi a0, 1
3929; CHECK-NEXT:    li a1, 52
3930; CHECK-NEXT:    vand.vv v8, v8, v16
3931; CHECK-NEXT:    vfcvt.f.xu.v v8, v8
3932; CHECK-NEXT:    vsrl.vx v8, v8, a1
3933; CHECK-NEXT:    li a1, 1023
3934; CHECK-NEXT:    vsub.vx v8, v8, a1
3935; CHECK-NEXT:    fsrm a0
3936; CHECK-NEXT:    ret
3937;
3938; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv7i64_unmasked:
3939; CHECK-ZVBB:       # %bb.0:
3940; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
3941; CHECK-ZVBB-NEXT:    vctz.v v8, v8
3942; CHECK-ZVBB-NEXT:    ret
3943  %v = call <vscale x 7 x i64> @llvm.vp.cttz.nxv7i64(<vscale x 7 x i64> %va, i1 true, <vscale x 7 x i1> splat (i1 true), i32 %evl)
3944  ret <vscale x 7 x i64> %v
3945}
3946
3947
3948define <vscale x 8 x i64> @vp_cttz_zero_undef_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
3949; CHECK-LABEL: vp_cttz_zero_undef_nxv8i64:
3950; CHECK:       # %bb.0:
3951; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
3952; CHECK-NEXT:    vrsub.vi v16, v8, 0, v0.t
3953; CHECK-NEXT:    fsrmi a0, 1
3954; CHECK-NEXT:    li a1, 52
3955; CHECK-NEXT:    vand.vv v8, v8, v16, v0.t
3956; CHECK-NEXT:    vfcvt.f.xu.v v8, v8, v0.t
3957; CHECK-NEXT:    vsrl.vx v8, v8, a1, v0.t
3958; CHECK-NEXT:    li a1, 1023
3959; CHECK-NEXT:    vsub.vx v8, v8, a1, v0.t
3960; CHECK-NEXT:    fsrm a0
3961; CHECK-NEXT:    ret
3962;
3963; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i64:
3964; CHECK-ZVBB:       # %bb.0:
3965; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
3966; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
3967; CHECK-ZVBB-NEXT:    ret
3968  %v = call <vscale x 8 x i64> @llvm.vp.cttz.nxv8i64(<vscale x 8 x i64> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl)
3969  ret <vscale x 8 x i64> %v
3970}
3971
3972define <vscale x 8 x i64> @vp_cttz_zero_undef_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) {
3973; CHECK-LABEL: vp_cttz_zero_undef_nxv8i64_unmasked:
3974; CHECK:       # %bb.0:
3975; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
3976; CHECK-NEXT:    vrsub.vi v16, v8, 0
3977; CHECK-NEXT:    fsrmi a0, 1
3978; CHECK-NEXT:    li a1, 52
3979; CHECK-NEXT:    vand.vv v8, v8, v16
3980; CHECK-NEXT:    vfcvt.f.xu.v v8, v8
3981; CHECK-NEXT:    vsrl.vx v8, v8, a1
3982; CHECK-NEXT:    li a1, 1023
3983; CHECK-NEXT:    vsub.vx v8, v8, a1
3984; CHECK-NEXT:    fsrm a0
3985; CHECK-NEXT:    ret
3986;
3987; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i64_unmasked:
3988; CHECK-ZVBB:       # %bb.0:
3989; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
3990; CHECK-ZVBB-NEXT:    vctz.v v8, v8
3991; CHECK-ZVBB-NEXT:    ret
3992  %v = call <vscale x 8 x i64> @llvm.vp.cttz.nxv8i64(<vscale x 8 x i64> %va, i1 true, <vscale x 8 x i1> splat (i1 true), i32 %evl)
3993  ret <vscale x 8 x i64> %v
3994}
3995
3996define <vscale x 16 x i64> @vp_cttz_zero_undef_nxv16i64(<vscale x 16 x i64> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
3997; CHECK-LABEL: vp_cttz_zero_undef_nxv16i64:
3998; CHECK:       # %bb.0:
3999; CHECK-NEXT:    addi sp, sp, -16
4000; CHECK-NEXT:    .cfi_def_cfa_offset 16
4001; CHECK-NEXT:    csrr a1, vlenb
4002; CHECK-NEXT:    slli a1, a1, 4
4003; CHECK-NEXT:    sub sp, sp, a1
4004; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
4005; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
4006; CHECK-NEXT:    vmv1r.v v24, v0
4007; CHECK-NEXT:    csrr a1, vlenb
4008; CHECK-NEXT:    slli a1, a1, 3
4009; CHECK-NEXT:    add a1, sp, a1
4010; CHECK-NEXT:    addi a1, a1, 16
4011; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
4012; CHECK-NEXT:    csrr a1, vlenb
4013; CHECK-NEXT:    fsrmi a3, 1
4014; CHECK-NEXT:    srli a2, a1, 3
4015; CHECK-NEXT:    sub a4, a0, a1
4016; CHECK-NEXT:    vslidedown.vx v0, v0, a2
4017; CHECK-NEXT:    sltu a2, a0, a4
4018; CHECK-NEXT:    addi a2, a2, -1
4019; CHECK-NEXT:    and a4, a2, a4
4020; CHECK-NEXT:    li a2, 52
4021; CHECK-NEXT:    vsetvli zero, a4, e64, m8, ta, ma
4022; CHECK-NEXT:    vrsub.vi v8, v16, 0, v0.t
4023; CHECK-NEXT:    vand.vv v8, v16, v8, v0.t
4024; CHECK-NEXT:    vfcvt.f.xu.v v8, v8, v0.t
4025; CHECK-NEXT:    fsrm a3
4026; CHECK-NEXT:    vsrl.vx v8, v8, a2, v0.t
4027; CHECK-NEXT:    li a3, 1023
4028; CHECK-NEXT:    vsub.vx v8, v8, a3, v0.t
4029; CHECK-NEXT:    addi a4, sp, 16
4030; CHECK-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
4031; CHECK-NEXT:    bltu a0, a1, .LBB94_2
4032; CHECK-NEXT:  # %bb.1:
4033; CHECK-NEXT:    mv a0, a1
4034; CHECK-NEXT:  .LBB94_2:
4035; CHECK-NEXT:    vmv1r.v v0, v24
4036; CHECK-NEXT:    slli a1, a1, 3
4037; CHECK-NEXT:    add a1, sp, a1
4038; CHECK-NEXT:    addi a1, a1, 16
4039; CHECK-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
4040; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4041; CHECK-NEXT:    vrsub.vi v16, v8, 0, v0.t
4042; CHECK-NEXT:    fsrmi a0, 1
4043; CHECK-NEXT:    vand.vv v8, v8, v16, v0.t
4044; CHECK-NEXT:    vfcvt.f.xu.v v8, v8, v0.t
4045; CHECK-NEXT:    vsrl.vx v8, v8, a2, v0.t
4046; CHECK-NEXT:    vsub.vx v8, v8, a3, v0.t
4047; CHECK-NEXT:    fsrm a0
4048; CHECK-NEXT:    addi a0, sp, 16
4049; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
4050; CHECK-NEXT:    csrr a0, vlenb
4051; CHECK-NEXT:    slli a0, a0, 4
4052; CHECK-NEXT:    add sp, sp, a0
4053; CHECK-NEXT:    .cfi_def_cfa sp, 16
4054; CHECK-NEXT:    addi sp, sp, 16
4055; CHECK-NEXT:    .cfi_def_cfa_offset 0
4056; CHECK-NEXT:    ret
4057;
4058; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i64:
4059; CHECK-ZVBB:       # %bb.0:
4060; CHECK-ZVBB-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
4061; CHECK-ZVBB-NEXT:    vmv1r.v v24, v0
4062; CHECK-ZVBB-NEXT:    csrr a1, vlenb
4063; CHECK-ZVBB-NEXT:    srli a2, a1, 3
4064; CHECK-ZVBB-NEXT:    sub a3, a0, a1
4065; CHECK-ZVBB-NEXT:    vslidedown.vx v0, v0, a2
4066; CHECK-ZVBB-NEXT:    sltu a2, a0, a3
4067; CHECK-ZVBB-NEXT:    addi a2, a2, -1
4068; CHECK-ZVBB-NEXT:    and a2, a2, a3
4069; CHECK-ZVBB-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
4070; CHECK-ZVBB-NEXT:    vctz.v v16, v16, v0.t
4071; CHECK-ZVBB-NEXT:    bltu a0, a1, .LBB94_2
4072; CHECK-ZVBB-NEXT:  # %bb.1:
4073; CHECK-ZVBB-NEXT:    mv a0, a1
4074; CHECK-ZVBB-NEXT:  .LBB94_2:
4075; CHECK-ZVBB-NEXT:    vmv1r.v v0, v24
4076; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4077; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
4078; CHECK-ZVBB-NEXT:    ret
4079  %v = call <vscale x 16 x i64> @llvm.vp.cttz.nxv16i64(<vscale x 16 x i64> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl)
4080  ret <vscale x 16 x i64> %v
4081}
4082
4083define <vscale x 16 x i64> @vp_cttz_zero_undef_nxv16i64_unmasked(<vscale x 16 x i64> %va, i32 zeroext %evl) {
4084; CHECK-LABEL: vp_cttz_zero_undef_nxv16i64_unmasked:
4085; CHECK:       # %bb.0:
4086; CHECK-NEXT:    csrr a1, vlenb
4087; CHECK-NEXT:    fsrmi a3, 1
4088; CHECK-NEXT:    sub a2, a0, a1
4089; CHECK-NEXT:    sltu a4, a0, a2
4090; CHECK-NEXT:    addi a4, a4, -1
4091; CHECK-NEXT:    and a4, a4, a2
4092; CHECK-NEXT:    li a2, 52
4093; CHECK-NEXT:    vsetvli zero, a4, e64, m8, ta, ma
4094; CHECK-NEXT:    vrsub.vi v24, v16, 0
4095; CHECK-NEXT:    vand.vv v16, v16, v24
4096; CHECK-NEXT:    vfcvt.f.xu.v v16, v16
4097; CHECK-NEXT:    fsrm a3
4098; CHECK-NEXT:    vsrl.vx v16, v16, a2
4099; CHECK-NEXT:    li a3, 1023
4100; CHECK-NEXT:    vsub.vx v16, v16, a3
4101; CHECK-NEXT:    bltu a0, a1, .LBB95_2
4102; CHECK-NEXT:  # %bb.1:
4103; CHECK-NEXT:    mv a0, a1
4104; CHECK-NEXT:  .LBB95_2:
4105; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4106; CHECK-NEXT:    vrsub.vi v24, v8, 0
4107; CHECK-NEXT:    vand.vv v8, v8, v24
4108; CHECK-NEXT:    fsrmi a0, 1
4109; CHECK-NEXT:    vfcvt.f.xu.v v8, v8
4110; CHECK-NEXT:    vsrl.vx v8, v8, a2
4111; CHECK-NEXT:    vsub.vx v8, v8, a3
4112; CHECK-NEXT:    fsrm a0
4113; CHECK-NEXT:    ret
4114;
4115; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i64_unmasked:
4116; CHECK-ZVBB:       # %bb.0:
4117; CHECK-ZVBB-NEXT:    csrr a1, vlenb
4118; CHECK-ZVBB-NEXT:    sub a2, a0, a1
4119; CHECK-ZVBB-NEXT:    sltu a3, a0, a2
4120; CHECK-ZVBB-NEXT:    addi a3, a3, -1
4121; CHECK-ZVBB-NEXT:    and a2, a3, a2
4122; CHECK-ZVBB-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
4123; CHECK-ZVBB-NEXT:    vctz.v v16, v16
4124; CHECK-ZVBB-NEXT:    bltu a0, a1, .LBB95_2
4125; CHECK-ZVBB-NEXT:  # %bb.1:
4126; CHECK-ZVBB-NEXT:    mv a0, a1
4127; CHECK-ZVBB-NEXT:  .LBB95_2:
4128; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
4129; CHECK-ZVBB-NEXT:    vctz.v v8, v8
4130; CHECK-ZVBB-NEXT:    ret
4131  %v = call <vscale x 16 x i64> @llvm.vp.cttz.nxv16i64(<vscale x 16 x i64> %va, i1 true, <vscale x 16 x i1> splat (i1 true), i32 %evl)
4132  ret <vscale x 16 x i64> %v
4133}
4134
4135; Test promotion.
4136declare <vscale x 1 x i9> @llvm.vp.cttz.nxv1i9(<vscale x 1 x i9>, i1 immarg, <vscale x 1 x i1>, i32)
4137define <vscale x 1 x i9> @vp_cttz_nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
4138; CHECK-LABEL: vp_cttz_nxv1i9:
4139; CHECK:       # %bb.0:
4140; CHECK-NEXT:    li a1, 512
4141; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
4142; CHECK-NEXT:    vor.vx v8, v8, a1, v0.t
4143; CHECK-NEXT:    vrsub.vi v9, v8, 0, v0.t
4144; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
4145; CHECK-NEXT:    vfwcvt.f.xu.v v9, v8, v0.t
4146; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
4147; CHECK-NEXT:    vsrl.vi v8, v9, 23, v0.t
4148; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
4149; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
4150; CHECK-NEXT:    li a0, 127
4151; CHECK-NEXT:    vsub.vx v8, v8, a0, v0.t
4152; CHECK-NEXT:    ret
4153;
4154; CHECK-ZVBB-LABEL: vp_cttz_nxv1i9:
4155; CHECK-ZVBB:       # %bb.0:
4156; CHECK-ZVBB-NEXT:    li a1, 512
4157; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
4158; CHECK-ZVBB-NEXT:    vor.vx v8, v8, a1, v0.t
4159; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
4160; CHECK-ZVBB-NEXT:    ret
4161  %v = call <vscale x 1 x i9> @llvm.vp.cttz.nxv1i9(<vscale x 1 x i9> %va, i1 false, <vscale x 1 x i1> %m, i32 %evl)
4162  ret <vscale x 1 x i9> %v
4163}
4164define <vscale x 1 x i9> @vp_zero_undef_cttz_nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
4165; CHECK-LABEL: vp_zero_undef_cttz_nxv1i9:
4166; CHECK:       # %bb.0:
4167; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
4168; CHECK-NEXT:    vrsub.vi v9, v8, 0, v0.t
4169; CHECK-NEXT:    li a0, 127
4170; CHECK-NEXT:    vand.vv v8, v8, v9, v0.t
4171; CHECK-NEXT:    vfwcvt.f.xu.v v9, v8, v0.t
4172; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
4173; CHECK-NEXT:    vsrl.vi v8, v9, 23, v0.t
4174; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
4175; CHECK-NEXT:    vnsrl.wi v8, v8, 0, v0.t
4176; CHECK-NEXT:    vsub.vx v8, v8, a0, v0.t
4177; CHECK-NEXT:    ret
4178;
4179; CHECK-ZVBB-LABEL: vp_zero_undef_cttz_nxv1i9:
4180; CHECK-ZVBB:       # %bb.0:
4181; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
4182; CHECK-ZVBB-NEXT:    vctz.v v8, v8, v0.t
4183; CHECK-ZVBB-NEXT:    ret
4184  %v = call <vscale x 1 x i9> @llvm.vp.cttz.nxv1i9(<vscale x 1 x i9> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
4185  ret <vscale x 1 x i9> %v
4186}
4187