xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVI,RV32I
3; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVI,RV64I
4; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64f,+zvl128b,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVF,RV32F
5; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64f,+zvl128b,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVF,RV64F
6; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVD,RV32D
7; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVD,RV64D
8; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
9; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
10
11define void @cttz_v16i8(ptr %x, ptr %y) nounwind {
12; RVI-LABEL: cttz_v16i8:
13; RVI:       # %bb.0:
14; RVI-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
15; RVI-NEXT:    vle8.v v8, (a0)
16; RVI-NEXT:    li a1, 1
17; RVI-NEXT:    vsub.vx v9, v8, a1
18; RVI-NEXT:    li a1, 85
19; RVI-NEXT:    vnot.v v8, v8
20; RVI-NEXT:    vand.vv v8, v8, v9
21; RVI-NEXT:    vsrl.vi v9, v8, 1
22; RVI-NEXT:    vand.vx v9, v9, a1
23; RVI-NEXT:    li a1, 51
24; RVI-NEXT:    vsub.vv v8, v8, v9
25; RVI-NEXT:    vand.vx v9, v8, a1
26; RVI-NEXT:    vsrl.vi v8, v8, 2
27; RVI-NEXT:    vand.vx v8, v8, a1
28; RVI-NEXT:    vadd.vv v8, v9, v8
29; RVI-NEXT:    vsrl.vi v9, v8, 4
30; RVI-NEXT:    vadd.vv v8, v8, v9
31; RVI-NEXT:    vand.vi v8, v8, 15
32; RVI-NEXT:    vse8.v v8, (a0)
33; RVI-NEXT:    ret
34;
35; RVF-LABEL: cttz_v16i8:
36; RVF:       # %bb.0:
37; RVF-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
38; RVF-NEXT:    vle8.v v8, (a0)
39; RVF-NEXT:    li a1, 127
40; RVF-NEXT:    vrsub.vi v9, v8, 0
41; RVF-NEXT:    vand.vv v9, v8, v9
42; RVF-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
43; RVF-NEXT:    vzext.vf2 v10, v9
44; RVF-NEXT:    vfwcvt.f.xu.v v12, v10
45; RVF-NEXT:    vnsrl.wi v10, v12, 23
46; RVF-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
47; RVF-NEXT:    vnsrl.wi v9, v10, 0
48; RVF-NEXT:    vmseq.vi v0, v8, 0
49; RVF-NEXT:    vsub.vx v8, v9, a1
50; RVF-NEXT:    vmerge.vim v8, v8, 8, v0
51; RVF-NEXT:    vse8.v v8, (a0)
52; RVF-NEXT:    ret
53;
54; RVD-LABEL: cttz_v16i8:
55; RVD:       # %bb.0:
56; RVD-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
57; RVD-NEXT:    vle8.v v8, (a0)
58; RVD-NEXT:    li a1, 127
59; RVD-NEXT:    vrsub.vi v9, v8, 0
60; RVD-NEXT:    vand.vv v9, v8, v9
61; RVD-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
62; RVD-NEXT:    vzext.vf2 v10, v9
63; RVD-NEXT:    vfwcvt.f.xu.v v12, v10
64; RVD-NEXT:    vnsrl.wi v10, v12, 23
65; RVD-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
66; RVD-NEXT:    vnsrl.wi v9, v10, 0
67; RVD-NEXT:    vmseq.vi v0, v8, 0
68; RVD-NEXT:    vsub.vx v8, v9, a1
69; RVD-NEXT:    vmerge.vim v8, v8, 8, v0
70; RVD-NEXT:    vse8.v v8, (a0)
71; RVD-NEXT:    ret
72;
73; ZVBB-LABEL: cttz_v16i8:
74; ZVBB:       # %bb.0:
75; ZVBB-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
76; ZVBB-NEXT:    vle8.v v8, (a0)
77; ZVBB-NEXT:    vctz.v v8, v8
78; ZVBB-NEXT:    vse8.v v8, (a0)
79; ZVBB-NEXT:    ret
80  %a = load <16 x i8>, ptr %x
81  %b = load <16 x i8>, ptr %y
82  %c = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
83  store <16 x i8> %c, ptr %x
84  ret void
85}
86declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1)
87
88define void @cttz_v8i16(ptr %x, ptr %y) nounwind {
89; RVI-LABEL: cttz_v8i16:
90; RVI:       # %bb.0:
91; RVI-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
92; RVI-NEXT:    vle16.v v8, (a0)
93; RVI-NEXT:    li a1, 1
94; RVI-NEXT:    vsub.vx v9, v8, a1
95; RVI-NEXT:    lui a1, 5
96; RVI-NEXT:    addi a1, a1, 1365
97; RVI-NEXT:    vnot.v v8, v8
98; RVI-NEXT:    vand.vv v8, v8, v9
99; RVI-NEXT:    vsrl.vi v9, v8, 1
100; RVI-NEXT:    vand.vx v9, v9, a1
101; RVI-NEXT:    lui a1, 3
102; RVI-NEXT:    addi a1, a1, 819
103; RVI-NEXT:    vsub.vv v8, v8, v9
104; RVI-NEXT:    vand.vx v9, v8, a1
105; RVI-NEXT:    vsrl.vi v8, v8, 2
106; RVI-NEXT:    vand.vx v8, v8, a1
107; RVI-NEXT:    lui a1, 1
108; RVI-NEXT:    addi a1, a1, -241
109; RVI-NEXT:    vadd.vv v8, v9, v8
110; RVI-NEXT:    vsrl.vi v9, v8, 4
111; RVI-NEXT:    vadd.vv v8, v8, v9
112; RVI-NEXT:    vand.vx v8, v8, a1
113; RVI-NEXT:    li a1, 257
114; RVI-NEXT:    vmul.vx v8, v8, a1
115; RVI-NEXT:    vsrl.vi v8, v8, 8
116; RVI-NEXT:    vse16.v v8, (a0)
117; RVI-NEXT:    ret
118;
119; RVF-LABEL: cttz_v8i16:
120; RVF:       # %bb.0:
121; RVF-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
122; RVF-NEXT:    vle16.v v8, (a0)
123; RVF-NEXT:    li a1, 127
124; RVF-NEXT:    vrsub.vi v9, v8, 0
125; RVF-NEXT:    vmseq.vi v0, v8, 0
126; RVF-NEXT:    vand.vv v8, v8, v9
127; RVF-NEXT:    vfwcvt.f.xu.v v10, v8
128; RVF-NEXT:    vnsrl.wi v8, v10, 23
129; RVF-NEXT:    vsub.vx v8, v8, a1
130; RVF-NEXT:    li a1, 16
131; RVF-NEXT:    vmerge.vxm v8, v8, a1, v0
132; RVF-NEXT:    vse16.v v8, (a0)
133; RVF-NEXT:    ret
134;
135; RVD-LABEL: cttz_v8i16:
136; RVD:       # %bb.0:
137; RVD-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
138; RVD-NEXT:    vle16.v v8, (a0)
139; RVD-NEXT:    li a1, 127
140; RVD-NEXT:    vrsub.vi v9, v8, 0
141; RVD-NEXT:    vmseq.vi v0, v8, 0
142; RVD-NEXT:    vand.vv v8, v8, v9
143; RVD-NEXT:    vfwcvt.f.xu.v v10, v8
144; RVD-NEXT:    vnsrl.wi v8, v10, 23
145; RVD-NEXT:    vsub.vx v8, v8, a1
146; RVD-NEXT:    li a1, 16
147; RVD-NEXT:    vmerge.vxm v8, v8, a1, v0
148; RVD-NEXT:    vse16.v v8, (a0)
149; RVD-NEXT:    ret
150;
151; ZVBB-LABEL: cttz_v8i16:
152; ZVBB:       # %bb.0:
153; ZVBB-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
154; ZVBB-NEXT:    vle16.v v8, (a0)
155; ZVBB-NEXT:    vctz.v v8, v8
156; ZVBB-NEXT:    vse16.v v8, (a0)
157; ZVBB-NEXT:    ret
158  %a = load <8 x i16>, ptr %x
159  %b = load <8 x i16>, ptr %y
160  %c = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
161  store <8 x i16> %c, ptr %x
162  ret void
163}
164declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>, i1)
165
166define void @cttz_v4i32(ptr %x, ptr %y) nounwind {
167; RVI-LABEL: cttz_v4i32:
168; RVI:       # %bb.0:
169; RVI-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
170; RVI-NEXT:    vle32.v v8, (a0)
171; RVI-NEXT:    li a1, 1
172; RVI-NEXT:    vsub.vx v9, v8, a1
173; RVI-NEXT:    lui a1, 349525
174; RVI-NEXT:    addi a1, a1, 1365
175; RVI-NEXT:    vnot.v v8, v8
176; RVI-NEXT:    vand.vv v8, v8, v9
177; RVI-NEXT:    vsrl.vi v9, v8, 1
178; RVI-NEXT:    vand.vx v9, v9, a1
179; RVI-NEXT:    lui a1, 209715
180; RVI-NEXT:    addi a1, a1, 819
181; RVI-NEXT:    vsub.vv v8, v8, v9
182; RVI-NEXT:    vand.vx v9, v8, a1
183; RVI-NEXT:    vsrl.vi v8, v8, 2
184; RVI-NEXT:    vand.vx v8, v8, a1
185; RVI-NEXT:    lui a1, 61681
186; RVI-NEXT:    addi a1, a1, -241
187; RVI-NEXT:    vadd.vv v8, v9, v8
188; RVI-NEXT:    vsrl.vi v9, v8, 4
189; RVI-NEXT:    vadd.vv v8, v8, v9
190; RVI-NEXT:    vand.vx v8, v8, a1
191; RVI-NEXT:    lui a1, 4112
192; RVI-NEXT:    addi a1, a1, 257
193; RVI-NEXT:    vmul.vx v8, v8, a1
194; RVI-NEXT:    vsrl.vi v8, v8, 24
195; RVI-NEXT:    vse32.v v8, (a0)
196; RVI-NEXT:    ret
197;
198; RVF-LABEL: cttz_v4i32:
199; RVF:       # %bb.0:
200; RVF-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
201; RVF-NEXT:    vle32.v v8, (a0)
202; RVF-NEXT:    fsrmi a1, 1
203; RVF-NEXT:    vrsub.vi v9, v8, 0
204; RVF-NEXT:    vand.vv v9, v8, v9
205; RVF-NEXT:    vfcvt.f.xu.v v9, v9
206; RVF-NEXT:    fsrm a1
207; RVF-NEXT:    li a1, 127
208; RVF-NEXT:    vmseq.vi v0, v8, 0
209; RVF-NEXT:    vsrl.vi v8, v9, 23
210; RVF-NEXT:    vsub.vx v8, v8, a1
211; RVF-NEXT:    li a1, 32
212; RVF-NEXT:    vmerge.vxm v8, v8, a1, v0
213; RVF-NEXT:    vse32.v v8, (a0)
214; RVF-NEXT:    ret
215;
216; RVD-LABEL: cttz_v4i32:
217; RVD:       # %bb.0:
218; RVD-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
219; RVD-NEXT:    vle32.v v8, (a0)
220; RVD-NEXT:    li a1, 52
221; RVD-NEXT:    vrsub.vi v9, v8, 0
222; RVD-NEXT:    vand.vv v9, v8, v9
223; RVD-NEXT:    vfwcvt.f.xu.v v10, v9
224; RVD-NEXT:    vnsrl.wx v9, v10, a1
225; RVD-NEXT:    li a1, 1023
226; RVD-NEXT:    vmseq.vi v0, v8, 0
227; RVD-NEXT:    vsub.vx v8, v9, a1
228; RVD-NEXT:    li a1, 32
229; RVD-NEXT:    vmerge.vxm v8, v8, a1, v0
230; RVD-NEXT:    vse32.v v8, (a0)
231; RVD-NEXT:    ret
232;
233; ZVBB-LABEL: cttz_v4i32:
234; ZVBB:       # %bb.0:
235; ZVBB-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
236; ZVBB-NEXT:    vle32.v v8, (a0)
237; ZVBB-NEXT:    vctz.v v8, v8
238; ZVBB-NEXT:    vse32.v v8, (a0)
239; ZVBB-NEXT:    ret
240  %a = load <4 x i32>, ptr %x
241  %b = load <4 x i32>, ptr %y
242  %c = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
243  store <4 x i32> %c, ptr %x
244  ret void
245}
246declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1)
247
248define void @cttz_v2i64(ptr %x, ptr %y) nounwind {
249; RV32I-LABEL: cttz_v2i64:
250; RV32I:       # %bb.0:
251; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
252; RV32I-NEXT:    vle64.v v8, (a0)
253; RV32I-NEXT:    lui a1, 349525
254; RV32I-NEXT:    addi a1, a1, 1365
255; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
256; RV32I-NEXT:    vmv.v.x v9, a1
257; RV32I-NEXT:    li a1, 1
258; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
259; RV32I-NEXT:    vsub.vx v10, v8, a1
260; RV32I-NEXT:    lui a1, 209715
261; RV32I-NEXT:    addi a1, a1, 819
262; RV32I-NEXT:    vnot.v v8, v8
263; RV32I-NEXT:    vand.vv v8, v8, v10
264; RV32I-NEXT:    vsrl.vi v10, v8, 1
265; RV32I-NEXT:    vand.vv v9, v10, v9
266; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
267; RV32I-NEXT:    vmv.v.x v10, a1
268; RV32I-NEXT:    lui a1, 61681
269; RV32I-NEXT:    addi a1, a1, -241
270; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
271; RV32I-NEXT:    vsub.vv v8, v8, v9
272; RV32I-NEXT:    vand.vv v9, v8, v10
273; RV32I-NEXT:    vsrl.vi v8, v8, 2
274; RV32I-NEXT:    vand.vv v8, v8, v10
275; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
276; RV32I-NEXT:    vmv.v.x v10, a1
277; RV32I-NEXT:    lui a1, 4112
278; RV32I-NEXT:    addi a1, a1, 257
279; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
280; RV32I-NEXT:    vadd.vv v8, v9, v8
281; RV32I-NEXT:    vsrl.vi v9, v8, 4
282; RV32I-NEXT:    vadd.vv v8, v8, v9
283; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
284; RV32I-NEXT:    vmv.v.x v9, a1
285; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
286; RV32I-NEXT:    vand.vv v8, v8, v10
287; RV32I-NEXT:    vmul.vv v8, v8, v9
288; RV32I-NEXT:    li a1, 56
289; RV32I-NEXT:    vsrl.vx v8, v8, a1
290; RV32I-NEXT:    vse64.v v8, (a0)
291; RV32I-NEXT:    ret
292;
293; RV64I-LABEL: cttz_v2i64:
294; RV64I:       # %bb.0:
295; RV64I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
296; RV64I-NEXT:    vle64.v v8, (a0)
297; RV64I-NEXT:    lui a1, 349525
298; RV64I-NEXT:    lui a2, 209715
299; RV64I-NEXT:    lui a3, 61681
300; RV64I-NEXT:    lui a4, 4112
301; RV64I-NEXT:    addiw a1, a1, 1365
302; RV64I-NEXT:    addiw a2, a2, 819
303; RV64I-NEXT:    addiw a3, a3, -241
304; RV64I-NEXT:    addiw a4, a4, 257
305; RV64I-NEXT:    slli a5, a1, 32
306; RV64I-NEXT:    add a1, a1, a5
307; RV64I-NEXT:    slli a5, a2, 32
308; RV64I-NEXT:    add a2, a2, a5
309; RV64I-NEXT:    slli a5, a3, 32
310; RV64I-NEXT:    add a3, a3, a5
311; RV64I-NEXT:    slli a5, a4, 32
312; RV64I-NEXT:    add a4, a4, a5
313; RV64I-NEXT:    li a5, 1
314; RV64I-NEXT:    vsub.vx v9, v8, a5
315; RV64I-NEXT:    vnot.v v8, v8
316; RV64I-NEXT:    vand.vv v8, v8, v9
317; RV64I-NEXT:    vsrl.vi v9, v8, 1
318; RV64I-NEXT:    vand.vx v9, v9, a1
319; RV64I-NEXT:    vsub.vv v8, v8, v9
320; RV64I-NEXT:    vand.vx v9, v8, a2
321; RV64I-NEXT:    vsrl.vi v8, v8, 2
322; RV64I-NEXT:    vand.vx v8, v8, a2
323; RV64I-NEXT:    vadd.vv v8, v9, v8
324; RV64I-NEXT:    vsrl.vi v9, v8, 4
325; RV64I-NEXT:    vadd.vv v8, v8, v9
326; RV64I-NEXT:    vand.vx v8, v8, a3
327; RV64I-NEXT:    vmul.vx v8, v8, a4
328; RV64I-NEXT:    li a1, 56
329; RV64I-NEXT:    vsrl.vx v8, v8, a1
330; RV64I-NEXT:    vse64.v v8, (a0)
331; RV64I-NEXT:    ret
332;
333; RVF-LABEL: cttz_v2i64:
334; RVF:       # %bb.0:
335; RVF-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
336; RVF-NEXT:    vle64.v v8, (a0)
337; RVF-NEXT:    fsrmi a1, 1
338; RVF-NEXT:    vrsub.vi v9, v8, 0
339; RVF-NEXT:    vand.vv v9, v8, v9
340; RVF-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
341; RVF-NEXT:    vfncvt.f.xu.w v10, v9
342; RVF-NEXT:    fsrm a1
343; RVF-NEXT:    li a1, 127
344; RVF-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
345; RVF-NEXT:    vmseq.vi v0, v8, 0
346; RVF-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
347; RVF-NEXT:    vsrl.vi v8, v10, 23
348; RVF-NEXT:    vwsubu.vx v9, v8, a1
349; RVF-NEXT:    li a1, 64
350; RVF-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
351; RVF-NEXT:    vmerge.vxm v8, v9, a1, v0
352; RVF-NEXT:    vse64.v v8, (a0)
353; RVF-NEXT:    ret
354;
355; RVD-LABEL: cttz_v2i64:
356; RVD:       # %bb.0:
357; RVD-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
358; RVD-NEXT:    vle64.v v8, (a0)
359; RVD-NEXT:    fsrmi a1, 1
360; RVD-NEXT:    vrsub.vi v9, v8, 0
361; RVD-NEXT:    vand.vv v9, v8, v9
362; RVD-NEXT:    vfcvt.f.xu.v v9, v9
363; RVD-NEXT:    fsrm a1
364; RVD-NEXT:    li a1, 52
365; RVD-NEXT:    vsrl.vx v9, v9, a1
366; RVD-NEXT:    li a1, 1023
367; RVD-NEXT:    vmseq.vi v0, v8, 0
368; RVD-NEXT:    vsub.vx v8, v9, a1
369; RVD-NEXT:    li a1, 64
370; RVD-NEXT:    vmerge.vxm v8, v8, a1, v0
371; RVD-NEXT:    vse64.v v8, (a0)
372; RVD-NEXT:    ret
373;
374; ZVBB-LABEL: cttz_v2i64:
375; ZVBB:       # %bb.0:
376; ZVBB-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
377; ZVBB-NEXT:    vle64.v v8, (a0)
378; ZVBB-NEXT:    vctz.v v8, v8
379; ZVBB-NEXT:    vse64.v v8, (a0)
380; ZVBB-NEXT:    ret
381  %a = load <2 x i64>, ptr %x
382  %b = load <2 x i64>, ptr %y
383  %c = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
384  store <2 x i64> %c, ptr %x
385  ret void
386}
387declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1)
388
389define void @cttz_v32i8(ptr %x, ptr %y) nounwind {
390; RVI-LABEL: cttz_v32i8:
391; RVI:       # %bb.0:
392; RVI-NEXT:    li a1, 32
393; RVI-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
394; RVI-NEXT:    vle8.v v8, (a0)
395; RVI-NEXT:    li a1, 1
396; RVI-NEXT:    vsub.vx v10, v8, a1
397; RVI-NEXT:    li a1, 85
398; RVI-NEXT:    vnot.v v8, v8
399; RVI-NEXT:    vand.vv v8, v8, v10
400; RVI-NEXT:    vsrl.vi v10, v8, 1
401; RVI-NEXT:    vand.vx v10, v10, a1
402; RVI-NEXT:    li a1, 51
403; RVI-NEXT:    vsub.vv v8, v8, v10
404; RVI-NEXT:    vand.vx v10, v8, a1
405; RVI-NEXT:    vsrl.vi v8, v8, 2
406; RVI-NEXT:    vand.vx v8, v8, a1
407; RVI-NEXT:    vadd.vv v8, v10, v8
408; RVI-NEXT:    vsrl.vi v10, v8, 4
409; RVI-NEXT:    vadd.vv v8, v8, v10
410; RVI-NEXT:    vand.vi v8, v8, 15
411; RVI-NEXT:    vse8.v v8, (a0)
412; RVI-NEXT:    ret
413;
414; RVF-LABEL: cttz_v32i8:
415; RVF:       # %bb.0:
416; RVF-NEXT:    li a1, 32
417; RVF-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
418; RVF-NEXT:    vle8.v v8, (a0)
419; RVF-NEXT:    li a1, 127
420; RVF-NEXT:    vrsub.vi v10, v8, 0
421; RVF-NEXT:    vand.vv v10, v8, v10
422; RVF-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
423; RVF-NEXT:    vzext.vf2 v12, v10
424; RVF-NEXT:    vfwcvt.f.xu.v v16, v12
425; RVF-NEXT:    vnsrl.wi v12, v16, 23
426; RVF-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
427; RVF-NEXT:    vnsrl.wi v10, v12, 0
428; RVF-NEXT:    vmseq.vi v0, v8, 0
429; RVF-NEXT:    vsub.vx v8, v10, a1
430; RVF-NEXT:    vmerge.vim v8, v8, 8, v0
431; RVF-NEXT:    vse8.v v8, (a0)
432; RVF-NEXT:    ret
433;
434; RVD-LABEL: cttz_v32i8:
435; RVD:       # %bb.0:
436; RVD-NEXT:    li a1, 32
437; RVD-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
438; RVD-NEXT:    vle8.v v8, (a0)
439; RVD-NEXT:    li a1, 127
440; RVD-NEXT:    vrsub.vi v10, v8, 0
441; RVD-NEXT:    vand.vv v10, v8, v10
442; RVD-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
443; RVD-NEXT:    vzext.vf2 v12, v10
444; RVD-NEXT:    vfwcvt.f.xu.v v16, v12
445; RVD-NEXT:    vnsrl.wi v12, v16, 23
446; RVD-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
447; RVD-NEXT:    vnsrl.wi v10, v12, 0
448; RVD-NEXT:    vmseq.vi v0, v8, 0
449; RVD-NEXT:    vsub.vx v8, v10, a1
450; RVD-NEXT:    vmerge.vim v8, v8, 8, v0
451; RVD-NEXT:    vse8.v v8, (a0)
452; RVD-NEXT:    ret
453;
454; ZVBB-LABEL: cttz_v32i8:
455; ZVBB:       # %bb.0:
456; ZVBB-NEXT:    li a1, 32
457; ZVBB-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
458; ZVBB-NEXT:    vle8.v v8, (a0)
459; ZVBB-NEXT:    vctz.v v8, v8
460; ZVBB-NEXT:    vse8.v v8, (a0)
461; ZVBB-NEXT:    ret
462  %a = load <32 x i8>, ptr %x
463  %b = load <32 x i8>, ptr %y
464  %c = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
465  store <32 x i8> %c, ptr %x
466  ret void
467}
468declare <32 x i8> @llvm.cttz.v32i8(<32 x i8>, i1)
469
470define void @cttz_v16i16(ptr %x, ptr %y) nounwind {
471; RVI-LABEL: cttz_v16i16:
472; RVI:       # %bb.0:
473; RVI-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
474; RVI-NEXT:    vle16.v v8, (a0)
475; RVI-NEXT:    li a1, 1
476; RVI-NEXT:    vsub.vx v10, v8, a1
477; RVI-NEXT:    lui a1, 5
478; RVI-NEXT:    addi a1, a1, 1365
479; RVI-NEXT:    vnot.v v8, v8
480; RVI-NEXT:    vand.vv v8, v8, v10
481; RVI-NEXT:    vsrl.vi v10, v8, 1
482; RVI-NEXT:    vand.vx v10, v10, a1
483; RVI-NEXT:    lui a1, 3
484; RVI-NEXT:    addi a1, a1, 819
485; RVI-NEXT:    vsub.vv v8, v8, v10
486; RVI-NEXT:    vand.vx v10, v8, a1
487; RVI-NEXT:    vsrl.vi v8, v8, 2
488; RVI-NEXT:    vand.vx v8, v8, a1
489; RVI-NEXT:    lui a1, 1
490; RVI-NEXT:    addi a1, a1, -241
491; RVI-NEXT:    vadd.vv v8, v10, v8
492; RVI-NEXT:    vsrl.vi v10, v8, 4
493; RVI-NEXT:    vadd.vv v8, v8, v10
494; RVI-NEXT:    vand.vx v8, v8, a1
495; RVI-NEXT:    li a1, 257
496; RVI-NEXT:    vmul.vx v8, v8, a1
497; RVI-NEXT:    vsrl.vi v8, v8, 8
498; RVI-NEXT:    vse16.v v8, (a0)
499; RVI-NEXT:    ret
500;
501; RVF-LABEL: cttz_v16i16:
502; RVF:       # %bb.0:
503; RVF-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
504; RVF-NEXT:    vle16.v v8, (a0)
505; RVF-NEXT:    li a1, 127
506; RVF-NEXT:    vrsub.vi v10, v8, 0
507; RVF-NEXT:    vmseq.vi v0, v8, 0
508; RVF-NEXT:    vand.vv v8, v8, v10
509; RVF-NEXT:    vfwcvt.f.xu.v v12, v8
510; RVF-NEXT:    vnsrl.wi v8, v12, 23
511; RVF-NEXT:    vsub.vx v8, v8, a1
512; RVF-NEXT:    li a1, 16
513; RVF-NEXT:    vmerge.vxm v8, v8, a1, v0
514; RVF-NEXT:    vse16.v v8, (a0)
515; RVF-NEXT:    ret
516;
517; RVD-LABEL: cttz_v16i16:
518; RVD:       # %bb.0:
519; RVD-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
520; RVD-NEXT:    vle16.v v8, (a0)
521; RVD-NEXT:    li a1, 127
522; RVD-NEXT:    vrsub.vi v10, v8, 0
523; RVD-NEXT:    vmseq.vi v0, v8, 0
524; RVD-NEXT:    vand.vv v8, v8, v10
525; RVD-NEXT:    vfwcvt.f.xu.v v12, v8
526; RVD-NEXT:    vnsrl.wi v8, v12, 23
527; RVD-NEXT:    vsub.vx v8, v8, a1
528; RVD-NEXT:    li a1, 16
529; RVD-NEXT:    vmerge.vxm v8, v8, a1, v0
530; RVD-NEXT:    vse16.v v8, (a0)
531; RVD-NEXT:    ret
532;
533; ZVBB-LABEL: cttz_v16i16:
534; ZVBB:       # %bb.0:
535; ZVBB-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
536; ZVBB-NEXT:    vle16.v v8, (a0)
537; ZVBB-NEXT:    vctz.v v8, v8
538; ZVBB-NEXT:    vse16.v v8, (a0)
539; ZVBB-NEXT:    ret
540  %a = load <16 x i16>, ptr %x
541  %b = load <16 x i16>, ptr %y
542  %c = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
543  store <16 x i16> %c, ptr %x
544  ret void
545}
546declare <16 x i16> @llvm.cttz.v16i16(<16 x i16>, i1)
547
548define void @cttz_v8i32(ptr %x, ptr %y) nounwind {
549; RVI-LABEL: cttz_v8i32:
550; RVI:       # %bb.0:
551; RVI-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
552; RVI-NEXT:    vle32.v v8, (a0)
553; RVI-NEXT:    li a1, 1
554; RVI-NEXT:    vsub.vx v10, v8, a1
555; RVI-NEXT:    lui a1, 349525
556; RVI-NEXT:    addi a1, a1, 1365
557; RVI-NEXT:    vnot.v v8, v8
558; RVI-NEXT:    vand.vv v8, v8, v10
559; RVI-NEXT:    vsrl.vi v10, v8, 1
560; RVI-NEXT:    vand.vx v10, v10, a1
561; RVI-NEXT:    lui a1, 209715
562; RVI-NEXT:    addi a1, a1, 819
563; RVI-NEXT:    vsub.vv v8, v8, v10
564; RVI-NEXT:    vand.vx v10, v8, a1
565; RVI-NEXT:    vsrl.vi v8, v8, 2
566; RVI-NEXT:    vand.vx v8, v8, a1
567; RVI-NEXT:    lui a1, 61681
568; RVI-NEXT:    addi a1, a1, -241
569; RVI-NEXT:    vadd.vv v8, v10, v8
570; RVI-NEXT:    vsrl.vi v10, v8, 4
571; RVI-NEXT:    vadd.vv v8, v8, v10
572; RVI-NEXT:    vand.vx v8, v8, a1
573; RVI-NEXT:    lui a1, 4112
574; RVI-NEXT:    addi a1, a1, 257
575; RVI-NEXT:    vmul.vx v8, v8, a1
576; RVI-NEXT:    vsrl.vi v8, v8, 24
577; RVI-NEXT:    vse32.v v8, (a0)
578; RVI-NEXT:    ret
579;
580; RVF-LABEL: cttz_v8i32:
581; RVF:       # %bb.0:
582; RVF-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
583; RVF-NEXT:    vle32.v v8, (a0)
584; RVF-NEXT:    fsrmi a1, 1
585; RVF-NEXT:    vrsub.vi v10, v8, 0
586; RVF-NEXT:    vand.vv v10, v8, v10
587; RVF-NEXT:    vfcvt.f.xu.v v10, v10
588; RVF-NEXT:    fsrm a1
589; RVF-NEXT:    li a1, 127
590; RVF-NEXT:    vmseq.vi v0, v8, 0
591; RVF-NEXT:    vsrl.vi v8, v10, 23
592; RVF-NEXT:    vsub.vx v8, v8, a1
593; RVF-NEXT:    li a1, 32
594; RVF-NEXT:    vmerge.vxm v8, v8, a1, v0
595; RVF-NEXT:    vse32.v v8, (a0)
596; RVF-NEXT:    ret
597;
598; RVD-LABEL: cttz_v8i32:
599; RVD:       # %bb.0:
600; RVD-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
601; RVD-NEXT:    vle32.v v8, (a0)
602; RVD-NEXT:    li a1, 52
603; RVD-NEXT:    vrsub.vi v10, v8, 0
604; RVD-NEXT:    vand.vv v10, v8, v10
605; RVD-NEXT:    vfwcvt.f.xu.v v12, v10
606; RVD-NEXT:    vnsrl.wx v10, v12, a1
607; RVD-NEXT:    li a1, 1023
608; RVD-NEXT:    vmseq.vi v0, v8, 0
609; RVD-NEXT:    vsub.vx v8, v10, a1
610; RVD-NEXT:    li a1, 32
611; RVD-NEXT:    vmerge.vxm v8, v8, a1, v0
612; RVD-NEXT:    vse32.v v8, (a0)
613; RVD-NEXT:    ret
614;
615; ZVBB-LABEL: cttz_v8i32:
616; ZVBB:       # %bb.0:
617; ZVBB-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
618; ZVBB-NEXT:    vle32.v v8, (a0)
619; ZVBB-NEXT:    vctz.v v8, v8
620; ZVBB-NEXT:    vse32.v v8, (a0)
621; ZVBB-NEXT:    ret
622  %a = load <8 x i32>, ptr %x
623  %b = load <8 x i32>, ptr %y
624  %c = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
625  store <8 x i32> %c, ptr %x
626  ret void
627}
628declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>, i1)
629
630define void @cttz_v4i64(ptr %x, ptr %y) nounwind {
631; RV32I-LABEL: cttz_v4i64:
632; RV32I:       # %bb.0:
633; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
634; RV32I-NEXT:    vle64.v v8, (a0)
635; RV32I-NEXT:    lui a1, 349525
636; RV32I-NEXT:    addi a1, a1, 1365
637; RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
638; RV32I-NEXT:    vmv.v.x v10, a1
639; RV32I-NEXT:    li a1, 1
640; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
641; RV32I-NEXT:    vsub.vx v12, v8, a1
642; RV32I-NEXT:    lui a1, 209715
643; RV32I-NEXT:    addi a1, a1, 819
644; RV32I-NEXT:    vnot.v v8, v8
645; RV32I-NEXT:    vand.vv v8, v8, v12
646; RV32I-NEXT:    vsrl.vi v12, v8, 1
647; RV32I-NEXT:    vand.vv v10, v12, v10
648; RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
649; RV32I-NEXT:    vmv.v.x v12, a1
650; RV32I-NEXT:    lui a1, 61681
651; RV32I-NEXT:    addi a1, a1, -241
652; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
653; RV32I-NEXT:    vsub.vv v8, v8, v10
654; RV32I-NEXT:    vand.vv v10, v8, v12
655; RV32I-NEXT:    vsrl.vi v8, v8, 2
656; RV32I-NEXT:    vand.vv v8, v8, v12
657; RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
658; RV32I-NEXT:    vmv.v.x v12, a1
659; RV32I-NEXT:    lui a1, 4112
660; RV32I-NEXT:    addi a1, a1, 257
661; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
662; RV32I-NEXT:    vadd.vv v8, v10, v8
663; RV32I-NEXT:    vsrl.vi v10, v8, 4
664; RV32I-NEXT:    vadd.vv v8, v8, v10
665; RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
666; RV32I-NEXT:    vmv.v.x v10, a1
667; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
668; RV32I-NEXT:    vand.vv v8, v8, v12
669; RV32I-NEXT:    vmul.vv v8, v8, v10
670; RV32I-NEXT:    li a1, 56
671; RV32I-NEXT:    vsrl.vx v8, v8, a1
672; RV32I-NEXT:    vse64.v v8, (a0)
673; RV32I-NEXT:    ret
674;
675; RV64I-LABEL: cttz_v4i64:
676; RV64I:       # %bb.0:
677; RV64I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
678; RV64I-NEXT:    vle64.v v8, (a0)
679; RV64I-NEXT:    lui a1, 349525
680; RV64I-NEXT:    lui a2, 209715
681; RV64I-NEXT:    lui a3, 61681
682; RV64I-NEXT:    lui a4, 4112
683; RV64I-NEXT:    addiw a1, a1, 1365
684; RV64I-NEXT:    addiw a2, a2, 819
685; RV64I-NEXT:    addiw a3, a3, -241
686; RV64I-NEXT:    addiw a4, a4, 257
687; RV64I-NEXT:    slli a5, a1, 32
688; RV64I-NEXT:    add a1, a1, a5
689; RV64I-NEXT:    slli a5, a2, 32
690; RV64I-NEXT:    add a2, a2, a5
691; RV64I-NEXT:    slli a5, a3, 32
692; RV64I-NEXT:    add a3, a3, a5
693; RV64I-NEXT:    slli a5, a4, 32
694; RV64I-NEXT:    add a4, a4, a5
695; RV64I-NEXT:    li a5, 1
696; RV64I-NEXT:    vsub.vx v10, v8, a5
697; RV64I-NEXT:    vnot.v v8, v8
698; RV64I-NEXT:    vand.vv v8, v8, v10
699; RV64I-NEXT:    vsrl.vi v10, v8, 1
700; RV64I-NEXT:    vand.vx v10, v10, a1
701; RV64I-NEXT:    vsub.vv v8, v8, v10
702; RV64I-NEXT:    vand.vx v10, v8, a2
703; RV64I-NEXT:    vsrl.vi v8, v8, 2
704; RV64I-NEXT:    vand.vx v8, v8, a2
705; RV64I-NEXT:    vadd.vv v8, v10, v8
706; RV64I-NEXT:    vsrl.vi v10, v8, 4
707; RV64I-NEXT:    vadd.vv v8, v8, v10
708; RV64I-NEXT:    vand.vx v8, v8, a3
709; RV64I-NEXT:    vmul.vx v8, v8, a4
710; RV64I-NEXT:    li a1, 56
711; RV64I-NEXT:    vsrl.vx v8, v8, a1
712; RV64I-NEXT:    vse64.v v8, (a0)
713; RV64I-NEXT:    ret
714;
715; RVF-LABEL: cttz_v4i64:
716; RVF:       # %bb.0:
717; RVF-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
718; RVF-NEXT:    vle64.v v8, (a0)
719; RVF-NEXT:    fsrmi a1, 1
720; RVF-NEXT:    vrsub.vi v10, v8, 0
721; RVF-NEXT:    vand.vv v10, v8, v10
722; RVF-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
723; RVF-NEXT:    vfncvt.f.xu.w v12, v10
724; RVF-NEXT:    fsrm a1
725; RVF-NEXT:    li a1, 127
726; RVF-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
727; RVF-NEXT:    vmseq.vi v0, v8, 0
728; RVF-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
729; RVF-NEXT:    vsrl.vi v8, v12, 23
730; RVF-NEXT:    vwsubu.vx v10, v8, a1
731; RVF-NEXT:    li a1, 64
732; RVF-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
733; RVF-NEXT:    vmerge.vxm v8, v10, a1, v0
734; RVF-NEXT:    vse64.v v8, (a0)
735; RVF-NEXT:    ret
736;
737; RVD-LABEL: cttz_v4i64:
738; RVD:       # %bb.0:
739; RVD-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
740; RVD-NEXT:    vle64.v v8, (a0)
741; RVD-NEXT:    fsrmi a1, 1
742; RVD-NEXT:    vrsub.vi v10, v8, 0
743; RVD-NEXT:    vand.vv v10, v8, v10
744; RVD-NEXT:    vfcvt.f.xu.v v10, v10
745; RVD-NEXT:    fsrm a1
746; RVD-NEXT:    li a1, 52
747; RVD-NEXT:    vsrl.vx v10, v10, a1
748; RVD-NEXT:    li a1, 1023
749; RVD-NEXT:    vmseq.vi v0, v8, 0
750; RVD-NEXT:    vsub.vx v8, v10, a1
751; RVD-NEXT:    li a1, 64
752; RVD-NEXT:    vmerge.vxm v8, v8, a1, v0
753; RVD-NEXT:    vse64.v v8, (a0)
754; RVD-NEXT:    ret
755;
756; ZVBB-LABEL: cttz_v4i64:
757; ZVBB:       # %bb.0:
758; ZVBB-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
759; ZVBB-NEXT:    vle64.v v8, (a0)
760; ZVBB-NEXT:    vctz.v v8, v8
761; ZVBB-NEXT:    vse64.v v8, (a0)
762; ZVBB-NEXT:    ret
763  %a = load <4 x i64>, ptr %x
764  %b = load <4 x i64>, ptr %y
765  %c = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
766  store <4 x i64> %c, ptr %x
767  ret void
768}
769declare <4 x i64> @llvm.cttz.v4i64(<4 x i64>, i1)
770
771define void @cttz_zero_undef_v16i8(ptr %x, ptr %y) nounwind {
772; RVI-LABEL: cttz_zero_undef_v16i8:
773; RVI:       # %bb.0:
774; RVI-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
775; RVI-NEXT:    vle8.v v8, (a0)
776; RVI-NEXT:    li a1, 1
777; RVI-NEXT:    vsub.vx v9, v8, a1
778; RVI-NEXT:    li a1, 85
779; RVI-NEXT:    vnot.v v8, v8
780; RVI-NEXT:    vand.vv v8, v8, v9
781; RVI-NEXT:    vsrl.vi v9, v8, 1
782; RVI-NEXT:    vand.vx v9, v9, a1
783; RVI-NEXT:    li a1, 51
784; RVI-NEXT:    vsub.vv v8, v8, v9
785; RVI-NEXT:    vand.vx v9, v8, a1
786; RVI-NEXT:    vsrl.vi v8, v8, 2
787; RVI-NEXT:    vand.vx v8, v8, a1
788; RVI-NEXT:    vadd.vv v8, v9, v8
789; RVI-NEXT:    vsrl.vi v9, v8, 4
790; RVI-NEXT:    vadd.vv v8, v8, v9
791; RVI-NEXT:    vand.vi v8, v8, 15
792; RVI-NEXT:    vse8.v v8, (a0)
793; RVI-NEXT:    ret
794;
795; RVF-LABEL: cttz_zero_undef_v16i8:
796; RVF:       # %bb.0:
797; RVF-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
798; RVF-NEXT:    vle8.v v8, (a0)
799; RVF-NEXT:    vrsub.vi v9, v8, 0
800; RVF-NEXT:    vand.vv v8, v8, v9
801; RVF-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
802; RVF-NEXT:    vzext.vf2 v10, v8
803; RVF-NEXT:    vfwcvt.f.xu.v v12, v10
804; RVF-NEXT:    vnsrl.wi v8, v12, 23
805; RVF-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
806; RVF-NEXT:    vnsrl.wi v10, v8, 0
807; RVF-NEXT:    li a1, 127
808; RVF-NEXT:    vsub.vx v8, v10, a1
809; RVF-NEXT:    vse8.v v8, (a0)
810; RVF-NEXT:    ret
811;
812; RVD-LABEL: cttz_zero_undef_v16i8:
813; RVD:       # %bb.0:
814; RVD-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
815; RVD-NEXT:    vle8.v v8, (a0)
816; RVD-NEXT:    vrsub.vi v9, v8, 0
817; RVD-NEXT:    vand.vv v8, v8, v9
818; RVD-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
819; RVD-NEXT:    vzext.vf2 v10, v8
820; RVD-NEXT:    vfwcvt.f.xu.v v12, v10
821; RVD-NEXT:    vnsrl.wi v8, v12, 23
822; RVD-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
823; RVD-NEXT:    vnsrl.wi v10, v8, 0
824; RVD-NEXT:    li a1, 127
825; RVD-NEXT:    vsub.vx v8, v10, a1
826; RVD-NEXT:    vse8.v v8, (a0)
827; RVD-NEXT:    ret
828;
829; ZVBB-LABEL: cttz_zero_undef_v16i8:
830; ZVBB:       # %bb.0:
831; ZVBB-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
832; ZVBB-NEXT:    vle8.v v8, (a0)
833; ZVBB-NEXT:    vctz.v v8, v8
834; ZVBB-NEXT:    vse8.v v8, (a0)
835; ZVBB-NEXT:    ret
836  %a = load <16 x i8>, ptr %x
837  %b = load <16 x i8>, ptr %y
838  %c = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
839  store <16 x i8> %c, ptr %x
840  ret void
841}
842
843define void @cttz_zero_undef_v8i16(ptr %x, ptr %y) nounwind {
844; RVI-LABEL: cttz_zero_undef_v8i16:
845; RVI:       # %bb.0:
846; RVI-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
847; RVI-NEXT:    vle16.v v8, (a0)
848; RVI-NEXT:    li a1, 1
849; RVI-NEXT:    vsub.vx v9, v8, a1
850; RVI-NEXT:    lui a1, 5
851; RVI-NEXT:    addi a1, a1, 1365
852; RVI-NEXT:    vnot.v v8, v8
853; RVI-NEXT:    vand.vv v8, v8, v9
854; RVI-NEXT:    vsrl.vi v9, v8, 1
855; RVI-NEXT:    vand.vx v9, v9, a1
856; RVI-NEXT:    lui a1, 3
857; RVI-NEXT:    addi a1, a1, 819
858; RVI-NEXT:    vsub.vv v8, v8, v9
859; RVI-NEXT:    vand.vx v9, v8, a1
860; RVI-NEXT:    vsrl.vi v8, v8, 2
861; RVI-NEXT:    vand.vx v8, v8, a1
862; RVI-NEXT:    lui a1, 1
863; RVI-NEXT:    addi a1, a1, -241
864; RVI-NEXT:    vadd.vv v8, v9, v8
865; RVI-NEXT:    vsrl.vi v9, v8, 4
866; RVI-NEXT:    vadd.vv v8, v8, v9
867; RVI-NEXT:    vand.vx v8, v8, a1
868; RVI-NEXT:    li a1, 257
869; RVI-NEXT:    vmul.vx v8, v8, a1
870; RVI-NEXT:    vsrl.vi v8, v8, 8
871; RVI-NEXT:    vse16.v v8, (a0)
872; RVI-NEXT:    ret
873;
874; RVF-LABEL: cttz_zero_undef_v8i16:
875; RVF:       # %bb.0:
876; RVF-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
877; RVF-NEXT:    vle16.v v8, (a0)
878; RVF-NEXT:    vrsub.vi v9, v8, 0
879; RVF-NEXT:    vand.vv v8, v8, v9
880; RVF-NEXT:    vfwcvt.f.xu.v v10, v8
881; RVF-NEXT:    vnsrl.wi v8, v10, 23
882; RVF-NEXT:    li a1, 127
883; RVF-NEXT:    vsub.vx v8, v8, a1
884; RVF-NEXT:    vse16.v v8, (a0)
885; RVF-NEXT:    ret
886;
887; RVD-LABEL: cttz_zero_undef_v8i16:
888; RVD:       # %bb.0:
889; RVD-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
890; RVD-NEXT:    vle16.v v8, (a0)
891; RVD-NEXT:    vrsub.vi v9, v8, 0
892; RVD-NEXT:    vand.vv v8, v8, v9
893; RVD-NEXT:    vfwcvt.f.xu.v v10, v8
894; RVD-NEXT:    vnsrl.wi v8, v10, 23
895; RVD-NEXT:    li a1, 127
896; RVD-NEXT:    vsub.vx v8, v8, a1
897; RVD-NEXT:    vse16.v v8, (a0)
898; RVD-NEXT:    ret
899;
900; ZVBB-LABEL: cttz_zero_undef_v8i16:
901; ZVBB:       # %bb.0:
902; ZVBB-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
903; ZVBB-NEXT:    vle16.v v8, (a0)
904; ZVBB-NEXT:    vctz.v v8, v8
905; ZVBB-NEXT:    vse16.v v8, (a0)
906; ZVBB-NEXT:    ret
907  %a = load <8 x i16>, ptr %x
908  %b = load <8 x i16>, ptr %y
909  %c = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
910  store <8 x i16> %c, ptr %x
911  ret void
912}
913
914define void @cttz_zero_undef_v4i32(ptr %x, ptr %y) nounwind {
915; RVI-LABEL: cttz_zero_undef_v4i32:
916; RVI:       # %bb.0:
917; RVI-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
918; RVI-NEXT:    vle32.v v8, (a0)
919; RVI-NEXT:    li a1, 1
920; RVI-NEXT:    vsub.vx v9, v8, a1
921; RVI-NEXT:    lui a1, 349525
922; RVI-NEXT:    addi a1, a1, 1365
923; RVI-NEXT:    vnot.v v8, v8
924; RVI-NEXT:    vand.vv v8, v8, v9
925; RVI-NEXT:    vsrl.vi v9, v8, 1
926; RVI-NEXT:    vand.vx v9, v9, a1
927; RVI-NEXT:    lui a1, 209715
928; RVI-NEXT:    addi a1, a1, 819
929; RVI-NEXT:    vsub.vv v8, v8, v9
930; RVI-NEXT:    vand.vx v9, v8, a1
931; RVI-NEXT:    vsrl.vi v8, v8, 2
932; RVI-NEXT:    vand.vx v8, v8, a1
933; RVI-NEXT:    lui a1, 61681
934; RVI-NEXT:    addi a1, a1, -241
935; RVI-NEXT:    vadd.vv v8, v9, v8
936; RVI-NEXT:    vsrl.vi v9, v8, 4
937; RVI-NEXT:    vadd.vv v8, v8, v9
938; RVI-NEXT:    vand.vx v8, v8, a1
939; RVI-NEXT:    lui a1, 4112
940; RVI-NEXT:    addi a1, a1, 257
941; RVI-NEXT:    vmul.vx v8, v8, a1
942; RVI-NEXT:    vsrl.vi v8, v8, 24
943; RVI-NEXT:    vse32.v v8, (a0)
944; RVI-NEXT:    ret
945;
946; RVF-LABEL: cttz_zero_undef_v4i32:
947; RVF:       # %bb.0:
948; RVF-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
949; RVF-NEXT:    vle32.v v8, (a0)
950; RVF-NEXT:    fsrmi a1, 1
951; RVF-NEXT:    vrsub.vi v9, v8, 0
952; RVF-NEXT:    vand.vv v8, v8, v9
953; RVF-NEXT:    vfcvt.f.xu.v v8, v8
954; RVF-NEXT:    fsrm a1
955; RVF-NEXT:    vsrl.vi v8, v8, 23
956; RVF-NEXT:    li a1, 127
957; RVF-NEXT:    vsub.vx v8, v8, a1
958; RVF-NEXT:    vse32.v v8, (a0)
959; RVF-NEXT:    ret
960;
961; RVD-LABEL: cttz_zero_undef_v4i32:
962; RVD:       # %bb.0:
963; RVD-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
964; RVD-NEXT:    vle32.v v8, (a0)
965; RVD-NEXT:    li a1, 52
966; RVD-NEXT:    vrsub.vi v9, v8, 0
967; RVD-NEXT:    vand.vv v8, v8, v9
968; RVD-NEXT:    vfwcvt.f.xu.v v10, v8
969; RVD-NEXT:    vnsrl.wx v8, v10, a1
970; RVD-NEXT:    li a1, 1023
971; RVD-NEXT:    vsub.vx v8, v8, a1
972; RVD-NEXT:    vse32.v v8, (a0)
973; RVD-NEXT:    ret
974;
975; ZVBB-LABEL: cttz_zero_undef_v4i32:
976; ZVBB:       # %bb.0:
977; ZVBB-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
978; ZVBB-NEXT:    vle32.v v8, (a0)
979; ZVBB-NEXT:    vctz.v v8, v8
980; ZVBB-NEXT:    vse32.v v8, (a0)
981; ZVBB-NEXT:    ret
982  %a = load <4 x i32>, ptr %x
983  %b = load <4 x i32>, ptr %y
984  %c = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
985  store <4 x i32> %c, ptr %x
986  ret void
987}
988
989define void @cttz_zero_undef_v2i64(ptr %x, ptr %y) nounwind {
990; RV32I-LABEL: cttz_zero_undef_v2i64:
991; RV32I:       # %bb.0:
992; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
993; RV32I-NEXT:    vle64.v v8, (a0)
994; RV32I-NEXT:    lui a1, 349525
995; RV32I-NEXT:    addi a1, a1, 1365
996; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
997; RV32I-NEXT:    vmv.v.x v9, a1
998; RV32I-NEXT:    li a1, 1
999; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1000; RV32I-NEXT:    vsub.vx v10, v8, a1
1001; RV32I-NEXT:    lui a1, 209715
1002; RV32I-NEXT:    addi a1, a1, 819
1003; RV32I-NEXT:    vnot.v v8, v8
1004; RV32I-NEXT:    vand.vv v8, v8, v10
1005; RV32I-NEXT:    vsrl.vi v10, v8, 1
1006; RV32I-NEXT:    vand.vv v9, v10, v9
1007; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1008; RV32I-NEXT:    vmv.v.x v10, a1
1009; RV32I-NEXT:    lui a1, 61681
1010; RV32I-NEXT:    addi a1, a1, -241
1011; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1012; RV32I-NEXT:    vsub.vv v8, v8, v9
1013; RV32I-NEXT:    vand.vv v9, v8, v10
1014; RV32I-NEXT:    vsrl.vi v8, v8, 2
1015; RV32I-NEXT:    vand.vv v8, v8, v10
1016; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1017; RV32I-NEXT:    vmv.v.x v10, a1
1018; RV32I-NEXT:    lui a1, 4112
1019; RV32I-NEXT:    addi a1, a1, 257
1020; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1021; RV32I-NEXT:    vadd.vv v8, v9, v8
1022; RV32I-NEXT:    vsrl.vi v9, v8, 4
1023; RV32I-NEXT:    vadd.vv v8, v8, v9
1024; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1025; RV32I-NEXT:    vmv.v.x v9, a1
1026; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1027; RV32I-NEXT:    vand.vv v8, v8, v10
1028; RV32I-NEXT:    vmul.vv v8, v8, v9
1029; RV32I-NEXT:    li a1, 56
1030; RV32I-NEXT:    vsrl.vx v8, v8, a1
1031; RV32I-NEXT:    vse64.v v8, (a0)
1032; RV32I-NEXT:    ret
1033;
1034; RV64I-LABEL: cttz_zero_undef_v2i64:
1035; RV64I:       # %bb.0:
1036; RV64I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1037; RV64I-NEXT:    vle64.v v8, (a0)
1038; RV64I-NEXT:    lui a1, 349525
1039; RV64I-NEXT:    lui a2, 209715
1040; RV64I-NEXT:    lui a3, 61681
1041; RV64I-NEXT:    lui a4, 4112
1042; RV64I-NEXT:    addiw a1, a1, 1365
1043; RV64I-NEXT:    addiw a2, a2, 819
1044; RV64I-NEXT:    addiw a3, a3, -241
1045; RV64I-NEXT:    addiw a4, a4, 257
1046; RV64I-NEXT:    slli a5, a1, 32
1047; RV64I-NEXT:    add a1, a1, a5
1048; RV64I-NEXT:    slli a5, a2, 32
1049; RV64I-NEXT:    add a2, a2, a5
1050; RV64I-NEXT:    slli a5, a3, 32
1051; RV64I-NEXT:    add a3, a3, a5
1052; RV64I-NEXT:    slli a5, a4, 32
1053; RV64I-NEXT:    add a4, a4, a5
1054; RV64I-NEXT:    li a5, 1
1055; RV64I-NEXT:    vsub.vx v9, v8, a5
1056; RV64I-NEXT:    vnot.v v8, v8
1057; RV64I-NEXT:    vand.vv v8, v8, v9
1058; RV64I-NEXT:    vsrl.vi v9, v8, 1
1059; RV64I-NEXT:    vand.vx v9, v9, a1
1060; RV64I-NEXT:    vsub.vv v8, v8, v9
1061; RV64I-NEXT:    vand.vx v9, v8, a2
1062; RV64I-NEXT:    vsrl.vi v8, v8, 2
1063; RV64I-NEXT:    vand.vx v8, v8, a2
1064; RV64I-NEXT:    vadd.vv v8, v9, v8
1065; RV64I-NEXT:    vsrl.vi v9, v8, 4
1066; RV64I-NEXT:    vadd.vv v8, v8, v9
1067; RV64I-NEXT:    vand.vx v8, v8, a3
1068; RV64I-NEXT:    vmul.vx v8, v8, a4
1069; RV64I-NEXT:    li a1, 56
1070; RV64I-NEXT:    vsrl.vx v8, v8, a1
1071; RV64I-NEXT:    vse64.v v8, (a0)
1072; RV64I-NEXT:    ret
1073;
1074; RVF-LABEL: cttz_zero_undef_v2i64:
1075; RVF:       # %bb.0:
1076; RVF-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1077; RVF-NEXT:    vle64.v v8, (a0)
1078; RVF-NEXT:    fsrmi a1, 1
1079; RVF-NEXT:    vrsub.vi v9, v8, 0
1080; RVF-NEXT:    vand.vv v8, v8, v9
1081; RVF-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
1082; RVF-NEXT:    vfncvt.f.xu.w v9, v8
1083; RVF-NEXT:    fsrm a1
1084; RVF-NEXT:    vsrl.vi v8, v9, 23
1085; RVF-NEXT:    li a1, 127
1086; RVF-NEXT:    vwsubu.vx v9, v8, a1
1087; RVF-NEXT:    vse64.v v9, (a0)
1088; RVF-NEXT:    ret
1089;
1090; RVD-LABEL: cttz_zero_undef_v2i64:
1091; RVD:       # %bb.0:
1092; RVD-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1093; RVD-NEXT:    vle64.v v8, (a0)
1094; RVD-NEXT:    fsrmi a1, 1
1095; RVD-NEXT:    vrsub.vi v9, v8, 0
1096; RVD-NEXT:    vand.vv v8, v8, v9
1097; RVD-NEXT:    vfcvt.f.xu.v v8, v8
1098; RVD-NEXT:    fsrm a1
1099; RVD-NEXT:    li a1, 52
1100; RVD-NEXT:    vsrl.vx v8, v8, a1
1101; RVD-NEXT:    li a1, 1023
1102; RVD-NEXT:    vsub.vx v8, v8, a1
1103; RVD-NEXT:    vse64.v v8, (a0)
1104; RVD-NEXT:    ret
1105;
1106; ZVBB-LABEL: cttz_zero_undef_v2i64:
1107; ZVBB:       # %bb.0:
1108; ZVBB-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1109; ZVBB-NEXT:    vle64.v v8, (a0)
1110; ZVBB-NEXT:    vctz.v v8, v8
1111; ZVBB-NEXT:    vse64.v v8, (a0)
1112; ZVBB-NEXT:    ret
1113  %a = load <2 x i64>, ptr %x
1114  %b = load <2 x i64>, ptr %y
1115  %c = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
1116  store <2 x i64> %c, ptr %x
1117  ret void
1118}
1119
1120define void @cttz_zero_undef_v32i8(ptr %x, ptr %y) nounwind {
1121; RVI-LABEL: cttz_zero_undef_v32i8:
1122; RVI:       # %bb.0:
1123; RVI-NEXT:    li a1, 32
1124; RVI-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
1125; RVI-NEXT:    vle8.v v8, (a0)
1126; RVI-NEXT:    li a1, 1
1127; RVI-NEXT:    vsub.vx v10, v8, a1
1128; RVI-NEXT:    li a1, 85
1129; RVI-NEXT:    vnot.v v8, v8
1130; RVI-NEXT:    vand.vv v8, v8, v10
1131; RVI-NEXT:    vsrl.vi v10, v8, 1
1132; RVI-NEXT:    vand.vx v10, v10, a1
1133; RVI-NEXT:    li a1, 51
1134; RVI-NEXT:    vsub.vv v8, v8, v10
1135; RVI-NEXT:    vand.vx v10, v8, a1
1136; RVI-NEXT:    vsrl.vi v8, v8, 2
1137; RVI-NEXT:    vand.vx v8, v8, a1
1138; RVI-NEXT:    vadd.vv v8, v10, v8
1139; RVI-NEXT:    vsrl.vi v10, v8, 4
1140; RVI-NEXT:    vadd.vv v8, v8, v10
1141; RVI-NEXT:    vand.vi v8, v8, 15
1142; RVI-NEXT:    vse8.v v8, (a0)
1143; RVI-NEXT:    ret
1144;
1145; RVF-LABEL: cttz_zero_undef_v32i8:
1146; RVF:       # %bb.0:
1147; RVF-NEXT:    li a1, 32
1148; RVF-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
1149; RVF-NEXT:    vle8.v v8, (a0)
1150; RVF-NEXT:    vrsub.vi v10, v8, 0
1151; RVF-NEXT:    vand.vv v8, v8, v10
1152; RVF-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
1153; RVF-NEXT:    vzext.vf2 v12, v8
1154; RVF-NEXT:    vfwcvt.f.xu.v v16, v12
1155; RVF-NEXT:    vnsrl.wi v8, v16, 23
1156; RVF-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
1157; RVF-NEXT:    vnsrl.wi v12, v8, 0
1158; RVF-NEXT:    li a1, 127
1159; RVF-NEXT:    vsub.vx v8, v12, a1
1160; RVF-NEXT:    vse8.v v8, (a0)
1161; RVF-NEXT:    ret
1162;
1163; RVD-LABEL: cttz_zero_undef_v32i8:
1164; RVD:       # %bb.0:
1165; RVD-NEXT:    li a1, 32
1166; RVD-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
1167; RVD-NEXT:    vle8.v v8, (a0)
1168; RVD-NEXT:    vrsub.vi v10, v8, 0
1169; RVD-NEXT:    vand.vv v8, v8, v10
1170; RVD-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
1171; RVD-NEXT:    vzext.vf2 v12, v8
1172; RVD-NEXT:    vfwcvt.f.xu.v v16, v12
1173; RVD-NEXT:    vnsrl.wi v8, v16, 23
1174; RVD-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
1175; RVD-NEXT:    vnsrl.wi v12, v8, 0
1176; RVD-NEXT:    li a1, 127
1177; RVD-NEXT:    vsub.vx v8, v12, a1
1178; RVD-NEXT:    vse8.v v8, (a0)
1179; RVD-NEXT:    ret
1180;
1181; ZVBB-LABEL: cttz_zero_undef_v32i8:
1182; ZVBB:       # %bb.0:
1183; ZVBB-NEXT:    li a1, 32
1184; ZVBB-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
1185; ZVBB-NEXT:    vle8.v v8, (a0)
1186; ZVBB-NEXT:    vctz.v v8, v8
1187; ZVBB-NEXT:    vse8.v v8, (a0)
1188; ZVBB-NEXT:    ret
1189  %a = load <32 x i8>, ptr %x
1190  %b = load <32 x i8>, ptr %y
1191  %c = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
1192  store <32 x i8> %c, ptr %x
1193  ret void
1194}
1195
1196define void @cttz_zero_undef_v16i16(ptr %x, ptr %y) nounwind {
1197; RVI-LABEL: cttz_zero_undef_v16i16:
1198; RVI:       # %bb.0:
1199; RVI-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
1200; RVI-NEXT:    vle16.v v8, (a0)
1201; RVI-NEXT:    li a1, 1
1202; RVI-NEXT:    vsub.vx v10, v8, a1
1203; RVI-NEXT:    lui a1, 5
1204; RVI-NEXT:    addi a1, a1, 1365
1205; RVI-NEXT:    vnot.v v8, v8
1206; RVI-NEXT:    vand.vv v8, v8, v10
1207; RVI-NEXT:    vsrl.vi v10, v8, 1
1208; RVI-NEXT:    vand.vx v10, v10, a1
1209; RVI-NEXT:    lui a1, 3
1210; RVI-NEXT:    addi a1, a1, 819
1211; RVI-NEXT:    vsub.vv v8, v8, v10
1212; RVI-NEXT:    vand.vx v10, v8, a1
1213; RVI-NEXT:    vsrl.vi v8, v8, 2
1214; RVI-NEXT:    vand.vx v8, v8, a1
1215; RVI-NEXT:    lui a1, 1
1216; RVI-NEXT:    addi a1, a1, -241
1217; RVI-NEXT:    vadd.vv v8, v10, v8
1218; RVI-NEXT:    vsrl.vi v10, v8, 4
1219; RVI-NEXT:    vadd.vv v8, v8, v10
1220; RVI-NEXT:    vand.vx v8, v8, a1
1221; RVI-NEXT:    li a1, 257
1222; RVI-NEXT:    vmul.vx v8, v8, a1
1223; RVI-NEXT:    vsrl.vi v8, v8, 8
1224; RVI-NEXT:    vse16.v v8, (a0)
1225; RVI-NEXT:    ret
1226;
1227; RVF-LABEL: cttz_zero_undef_v16i16:
1228; RVF:       # %bb.0:
1229; RVF-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
1230; RVF-NEXT:    vle16.v v8, (a0)
1231; RVF-NEXT:    vrsub.vi v10, v8, 0
1232; RVF-NEXT:    vand.vv v8, v8, v10
1233; RVF-NEXT:    vfwcvt.f.xu.v v12, v8
1234; RVF-NEXT:    vnsrl.wi v8, v12, 23
1235; RVF-NEXT:    li a1, 127
1236; RVF-NEXT:    vsub.vx v8, v8, a1
1237; RVF-NEXT:    vse16.v v8, (a0)
1238; RVF-NEXT:    ret
1239;
1240; RVD-LABEL: cttz_zero_undef_v16i16:
1241; RVD:       # %bb.0:
1242; RVD-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
1243; RVD-NEXT:    vle16.v v8, (a0)
1244; RVD-NEXT:    vrsub.vi v10, v8, 0
1245; RVD-NEXT:    vand.vv v8, v8, v10
1246; RVD-NEXT:    vfwcvt.f.xu.v v12, v8
1247; RVD-NEXT:    vnsrl.wi v8, v12, 23
1248; RVD-NEXT:    li a1, 127
1249; RVD-NEXT:    vsub.vx v8, v8, a1
1250; RVD-NEXT:    vse16.v v8, (a0)
1251; RVD-NEXT:    ret
1252;
1253; ZVBB-LABEL: cttz_zero_undef_v16i16:
1254; ZVBB:       # %bb.0:
1255; ZVBB-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
1256; ZVBB-NEXT:    vle16.v v8, (a0)
1257; ZVBB-NEXT:    vctz.v v8, v8
1258; ZVBB-NEXT:    vse16.v v8, (a0)
1259; ZVBB-NEXT:    ret
1260  %a = load <16 x i16>, ptr %x
1261  %b = load <16 x i16>, ptr %y
1262  %c = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
1263  store <16 x i16> %c, ptr %x
1264  ret void
1265}
1266
1267define void @cttz_zero_undef_v8i32(ptr %x, ptr %y) nounwind {
1268; RVI-LABEL: cttz_zero_undef_v8i32:
1269; RVI:       # %bb.0:
1270; RVI-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1271; RVI-NEXT:    vle32.v v8, (a0)
1272; RVI-NEXT:    li a1, 1
1273; RVI-NEXT:    vsub.vx v10, v8, a1
1274; RVI-NEXT:    lui a1, 349525
1275; RVI-NEXT:    addi a1, a1, 1365
1276; RVI-NEXT:    vnot.v v8, v8
1277; RVI-NEXT:    vand.vv v8, v8, v10
1278; RVI-NEXT:    vsrl.vi v10, v8, 1
1279; RVI-NEXT:    vand.vx v10, v10, a1
1280; RVI-NEXT:    lui a1, 209715
1281; RVI-NEXT:    addi a1, a1, 819
1282; RVI-NEXT:    vsub.vv v8, v8, v10
1283; RVI-NEXT:    vand.vx v10, v8, a1
1284; RVI-NEXT:    vsrl.vi v8, v8, 2
1285; RVI-NEXT:    vand.vx v8, v8, a1
1286; RVI-NEXT:    lui a1, 61681
1287; RVI-NEXT:    addi a1, a1, -241
1288; RVI-NEXT:    vadd.vv v8, v10, v8
1289; RVI-NEXT:    vsrl.vi v10, v8, 4
1290; RVI-NEXT:    vadd.vv v8, v8, v10
1291; RVI-NEXT:    vand.vx v8, v8, a1
1292; RVI-NEXT:    lui a1, 4112
1293; RVI-NEXT:    addi a1, a1, 257
1294; RVI-NEXT:    vmul.vx v8, v8, a1
1295; RVI-NEXT:    vsrl.vi v8, v8, 24
1296; RVI-NEXT:    vse32.v v8, (a0)
1297; RVI-NEXT:    ret
1298;
1299; RVF-LABEL: cttz_zero_undef_v8i32:
1300; RVF:       # %bb.0:
1301; RVF-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1302; RVF-NEXT:    vle32.v v8, (a0)
1303; RVF-NEXT:    fsrmi a1, 1
1304; RVF-NEXT:    vrsub.vi v10, v8, 0
1305; RVF-NEXT:    vand.vv v8, v8, v10
1306; RVF-NEXT:    vfcvt.f.xu.v v8, v8
1307; RVF-NEXT:    fsrm a1
1308; RVF-NEXT:    vsrl.vi v8, v8, 23
1309; RVF-NEXT:    li a1, 127
1310; RVF-NEXT:    vsub.vx v8, v8, a1
1311; RVF-NEXT:    vse32.v v8, (a0)
1312; RVF-NEXT:    ret
1313;
1314; RVD-LABEL: cttz_zero_undef_v8i32:
1315; RVD:       # %bb.0:
1316; RVD-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1317; RVD-NEXT:    vle32.v v8, (a0)
1318; RVD-NEXT:    li a1, 52
1319; RVD-NEXT:    vrsub.vi v10, v8, 0
1320; RVD-NEXT:    vand.vv v8, v8, v10
1321; RVD-NEXT:    vfwcvt.f.xu.v v12, v8
1322; RVD-NEXT:    vnsrl.wx v8, v12, a1
1323; RVD-NEXT:    li a1, 1023
1324; RVD-NEXT:    vsub.vx v8, v8, a1
1325; RVD-NEXT:    vse32.v v8, (a0)
1326; RVD-NEXT:    ret
1327;
1328; ZVBB-LABEL: cttz_zero_undef_v8i32:
1329; ZVBB:       # %bb.0:
1330; ZVBB-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1331; ZVBB-NEXT:    vle32.v v8, (a0)
1332; ZVBB-NEXT:    vctz.v v8, v8
1333; ZVBB-NEXT:    vse32.v v8, (a0)
1334; ZVBB-NEXT:    ret
1335  %a = load <8 x i32>, ptr %x
1336  %b = load <8 x i32>, ptr %y
1337  %c = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
1338  store <8 x i32> %c, ptr %x
1339  ret void
1340}
1341
1342define void @cttz_zero_undef_v4i64(ptr %x, ptr %y) nounwind {
1343; RV32I-LABEL: cttz_zero_undef_v4i64:
1344; RV32I:       # %bb.0:
1345; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1346; RV32I-NEXT:    vle64.v v8, (a0)
1347; RV32I-NEXT:    lui a1, 349525
1348; RV32I-NEXT:    addi a1, a1, 1365
1349; RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1350; RV32I-NEXT:    vmv.v.x v10, a1
1351; RV32I-NEXT:    li a1, 1
1352; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1353; RV32I-NEXT:    vsub.vx v12, v8, a1
1354; RV32I-NEXT:    lui a1, 209715
1355; RV32I-NEXT:    addi a1, a1, 819
1356; RV32I-NEXT:    vnot.v v8, v8
1357; RV32I-NEXT:    vand.vv v8, v8, v12
1358; RV32I-NEXT:    vsrl.vi v12, v8, 1
1359; RV32I-NEXT:    vand.vv v10, v12, v10
1360; RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1361; RV32I-NEXT:    vmv.v.x v12, a1
1362; RV32I-NEXT:    lui a1, 61681
1363; RV32I-NEXT:    addi a1, a1, -241
1364; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1365; RV32I-NEXT:    vsub.vv v8, v8, v10
1366; RV32I-NEXT:    vand.vv v10, v8, v12
1367; RV32I-NEXT:    vsrl.vi v8, v8, 2
1368; RV32I-NEXT:    vand.vv v8, v8, v12
1369; RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1370; RV32I-NEXT:    vmv.v.x v12, a1
1371; RV32I-NEXT:    lui a1, 4112
1372; RV32I-NEXT:    addi a1, a1, 257
1373; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1374; RV32I-NEXT:    vadd.vv v8, v10, v8
1375; RV32I-NEXT:    vsrl.vi v10, v8, 4
1376; RV32I-NEXT:    vadd.vv v8, v8, v10
1377; RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1378; RV32I-NEXT:    vmv.v.x v10, a1
1379; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1380; RV32I-NEXT:    vand.vv v8, v8, v12
1381; RV32I-NEXT:    vmul.vv v8, v8, v10
1382; RV32I-NEXT:    li a1, 56
1383; RV32I-NEXT:    vsrl.vx v8, v8, a1
1384; RV32I-NEXT:    vse64.v v8, (a0)
1385; RV32I-NEXT:    ret
1386;
1387; RV64I-LABEL: cttz_zero_undef_v4i64:
1388; RV64I:       # %bb.0:
1389; RV64I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1390; RV64I-NEXT:    vle64.v v8, (a0)
1391; RV64I-NEXT:    lui a1, 349525
1392; RV64I-NEXT:    lui a2, 209715
1393; RV64I-NEXT:    lui a3, 61681
1394; RV64I-NEXT:    lui a4, 4112
1395; RV64I-NEXT:    addiw a1, a1, 1365
1396; RV64I-NEXT:    addiw a2, a2, 819
1397; RV64I-NEXT:    addiw a3, a3, -241
1398; RV64I-NEXT:    addiw a4, a4, 257
1399; RV64I-NEXT:    slli a5, a1, 32
1400; RV64I-NEXT:    add a1, a1, a5
1401; RV64I-NEXT:    slli a5, a2, 32
1402; RV64I-NEXT:    add a2, a2, a5
1403; RV64I-NEXT:    slli a5, a3, 32
1404; RV64I-NEXT:    add a3, a3, a5
1405; RV64I-NEXT:    slli a5, a4, 32
1406; RV64I-NEXT:    add a4, a4, a5
1407; RV64I-NEXT:    li a5, 1
1408; RV64I-NEXT:    vsub.vx v10, v8, a5
1409; RV64I-NEXT:    vnot.v v8, v8
1410; RV64I-NEXT:    vand.vv v8, v8, v10
1411; RV64I-NEXT:    vsrl.vi v10, v8, 1
1412; RV64I-NEXT:    vand.vx v10, v10, a1
1413; RV64I-NEXT:    vsub.vv v8, v8, v10
1414; RV64I-NEXT:    vand.vx v10, v8, a2
1415; RV64I-NEXT:    vsrl.vi v8, v8, 2
1416; RV64I-NEXT:    vand.vx v8, v8, a2
1417; RV64I-NEXT:    vadd.vv v8, v10, v8
1418; RV64I-NEXT:    vsrl.vi v10, v8, 4
1419; RV64I-NEXT:    vadd.vv v8, v8, v10
1420; RV64I-NEXT:    vand.vx v8, v8, a3
1421; RV64I-NEXT:    vmul.vx v8, v8, a4
1422; RV64I-NEXT:    li a1, 56
1423; RV64I-NEXT:    vsrl.vx v8, v8, a1
1424; RV64I-NEXT:    vse64.v v8, (a0)
1425; RV64I-NEXT:    ret
1426;
1427; RVF-LABEL: cttz_zero_undef_v4i64:
1428; RVF:       # %bb.0:
1429; RVF-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1430; RVF-NEXT:    vle64.v v8, (a0)
1431; RVF-NEXT:    fsrmi a1, 1
1432; RVF-NEXT:    vrsub.vi v10, v8, 0
1433; RVF-NEXT:    vand.vv v8, v8, v10
1434; RVF-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
1435; RVF-NEXT:    vfncvt.f.xu.w v10, v8
1436; RVF-NEXT:    fsrm a1
1437; RVF-NEXT:    vsrl.vi v8, v10, 23
1438; RVF-NEXT:    li a1, 127
1439; RVF-NEXT:    vwsubu.vx v10, v8, a1
1440; RVF-NEXT:    vse64.v v10, (a0)
1441; RVF-NEXT:    ret
1442;
1443; RVD-LABEL: cttz_zero_undef_v4i64:
1444; RVD:       # %bb.0:
1445; RVD-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1446; RVD-NEXT:    vle64.v v8, (a0)
1447; RVD-NEXT:    fsrmi a1, 1
1448; RVD-NEXT:    vrsub.vi v10, v8, 0
1449; RVD-NEXT:    vand.vv v8, v8, v10
1450; RVD-NEXT:    vfcvt.f.xu.v v8, v8
1451; RVD-NEXT:    fsrm a1
1452; RVD-NEXT:    li a1, 52
1453; RVD-NEXT:    vsrl.vx v8, v8, a1
1454; RVD-NEXT:    li a1, 1023
1455; RVD-NEXT:    vsub.vx v8, v8, a1
1456; RVD-NEXT:    vse64.v v8, (a0)
1457; RVD-NEXT:    ret
1458;
1459; ZVBB-LABEL: cttz_zero_undef_v4i64:
1460; ZVBB:       # %bb.0:
1461; ZVBB-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1462; ZVBB-NEXT:    vle64.v v8, (a0)
1463; ZVBB-NEXT:    vctz.v v8, v8
1464; ZVBB-NEXT:    vse64.v v8, (a0)
1465; ZVBB-NEXT:    ret
1466  %a = load <4 x i64>, ptr %x
1467  %b = load <4 x i64>, ptr %y
1468  %c = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
1469  store <4 x i64> %c, ptr %x
1470  ret void
1471}
1472;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
1473; RV32D: {{.*}}
1474; RV32F: {{.*}}
1475; RV64D: {{.*}}
1476; RV64F: {{.*}}
1477