xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVI,RV32I
3; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVI,RV64I
4; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64f,+zvl128b,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVF,RV32F
5; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64f,+zvl128b,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVF,RV64F
6; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVD,RV32D
7; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVD,RV64D
8; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
9; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB
10
11define void @ctlz_v16i8(ptr %x, ptr %y) nounwind {
12; RVI-LABEL: ctlz_v16i8:
13; RVI:       # %bb.0:
14; RVI-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
15; RVI-NEXT:    vle8.v v8, (a0)
16; RVI-NEXT:    li a1, 85
17; RVI-NEXT:    vsrl.vi v9, v8, 1
18; RVI-NEXT:    vor.vv v8, v8, v9
19; RVI-NEXT:    vsrl.vi v9, v8, 2
20; RVI-NEXT:    vor.vv v8, v8, v9
21; RVI-NEXT:    vsrl.vi v9, v8, 4
22; RVI-NEXT:    vor.vv v8, v8, v9
23; RVI-NEXT:    vnot.v v8, v8
24; RVI-NEXT:    vsrl.vi v9, v8, 1
25; RVI-NEXT:    vand.vx v9, v9, a1
26; RVI-NEXT:    li a1, 51
27; RVI-NEXT:    vsub.vv v8, v8, v9
28; RVI-NEXT:    vand.vx v9, v8, a1
29; RVI-NEXT:    vsrl.vi v8, v8, 2
30; RVI-NEXT:    vand.vx v8, v8, a1
31; RVI-NEXT:    vadd.vv v8, v9, v8
32; RVI-NEXT:    vsrl.vi v9, v8, 4
33; RVI-NEXT:    vadd.vv v8, v8, v9
34; RVI-NEXT:    vand.vi v8, v8, 15
35; RVI-NEXT:    vse8.v v8, (a0)
36; RVI-NEXT:    ret
37;
38; RVF-LABEL: ctlz_v16i8:
39; RVF:       # %bb.0:
40; RVF-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
41; RVF-NEXT:    vle8.v v8, (a0)
42; RVF-NEXT:    li a1, 134
43; RVF-NEXT:    vzext.vf2 v10, v8
44; RVF-NEXT:    vfwcvt.f.xu.v v12, v10
45; RVF-NEXT:    vnsrl.wi v8, v12, 23
46; RVF-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
47; RVF-NEXT:    vnsrl.wi v10, v8, 0
48; RVF-NEXT:    vrsub.vx v8, v10, a1
49; RVF-NEXT:    li a1, 8
50; RVF-NEXT:    vminu.vx v8, v8, a1
51; RVF-NEXT:    vse8.v v8, (a0)
52; RVF-NEXT:    ret
53;
54; RVD-LABEL: ctlz_v16i8:
55; RVD:       # %bb.0:
56; RVD-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
57; RVD-NEXT:    vle8.v v8, (a0)
58; RVD-NEXT:    li a1, 134
59; RVD-NEXT:    vzext.vf2 v10, v8
60; RVD-NEXT:    vfwcvt.f.xu.v v12, v10
61; RVD-NEXT:    vnsrl.wi v8, v12, 23
62; RVD-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
63; RVD-NEXT:    vnsrl.wi v10, v8, 0
64; RVD-NEXT:    vrsub.vx v8, v10, a1
65; RVD-NEXT:    li a1, 8
66; RVD-NEXT:    vminu.vx v8, v8, a1
67; RVD-NEXT:    vse8.v v8, (a0)
68; RVD-NEXT:    ret
69;
70; ZVBB-LABEL: ctlz_v16i8:
71; ZVBB:       # %bb.0:
72; ZVBB-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
73; ZVBB-NEXT:    vle8.v v8, (a0)
74; ZVBB-NEXT:    vclz.v v8, v8
75; ZVBB-NEXT:    vse8.v v8, (a0)
76; ZVBB-NEXT:    ret
77  %a = load <16 x i8>, ptr %x
78  %b = load <16 x i8>, ptr %y
79  %c = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false)
80  store <16 x i8> %c, ptr %x
81  ret void
82}
83declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1)
84
85define void @ctlz_v8i16(ptr %x, ptr %y) nounwind {
86; RVI-LABEL: ctlz_v8i16:
87; RVI:       # %bb.0:
88; RVI-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
89; RVI-NEXT:    vle16.v v8, (a0)
90; RVI-NEXT:    lui a1, 5
91; RVI-NEXT:    addi a1, a1, 1365
92; RVI-NEXT:    vsrl.vi v9, v8, 1
93; RVI-NEXT:    vor.vv v8, v8, v9
94; RVI-NEXT:    vsrl.vi v9, v8, 2
95; RVI-NEXT:    vor.vv v8, v8, v9
96; RVI-NEXT:    vsrl.vi v9, v8, 4
97; RVI-NEXT:    vor.vv v8, v8, v9
98; RVI-NEXT:    vsrl.vi v9, v8, 8
99; RVI-NEXT:    vor.vv v8, v8, v9
100; RVI-NEXT:    vnot.v v8, v8
101; RVI-NEXT:    vsrl.vi v9, v8, 1
102; RVI-NEXT:    vand.vx v9, v9, a1
103; RVI-NEXT:    lui a1, 3
104; RVI-NEXT:    addi a1, a1, 819
105; RVI-NEXT:    vsub.vv v8, v8, v9
106; RVI-NEXT:    vand.vx v9, v8, a1
107; RVI-NEXT:    vsrl.vi v8, v8, 2
108; RVI-NEXT:    vand.vx v8, v8, a1
109; RVI-NEXT:    lui a1, 1
110; RVI-NEXT:    addi a1, a1, -241
111; RVI-NEXT:    vadd.vv v8, v9, v8
112; RVI-NEXT:    vsrl.vi v9, v8, 4
113; RVI-NEXT:    vadd.vv v8, v8, v9
114; RVI-NEXT:    vand.vx v8, v8, a1
115; RVI-NEXT:    li a1, 257
116; RVI-NEXT:    vmul.vx v8, v8, a1
117; RVI-NEXT:    vsrl.vi v8, v8, 8
118; RVI-NEXT:    vse16.v v8, (a0)
119; RVI-NEXT:    ret
120;
121; RVF-LABEL: ctlz_v8i16:
122; RVF:       # %bb.0:
123; RVF-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
124; RVF-NEXT:    vle16.v v8, (a0)
125; RVF-NEXT:    li a1, 142
126; RVF-NEXT:    vfwcvt.f.xu.v v10, v8
127; RVF-NEXT:    vnsrl.wi v8, v10, 23
128; RVF-NEXT:    vrsub.vx v8, v8, a1
129; RVF-NEXT:    li a1, 16
130; RVF-NEXT:    vminu.vx v8, v8, a1
131; RVF-NEXT:    vse16.v v8, (a0)
132; RVF-NEXT:    ret
133;
134; RVD-LABEL: ctlz_v8i16:
135; RVD:       # %bb.0:
136; RVD-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
137; RVD-NEXT:    vle16.v v8, (a0)
138; RVD-NEXT:    li a1, 142
139; RVD-NEXT:    vfwcvt.f.xu.v v10, v8
140; RVD-NEXT:    vnsrl.wi v8, v10, 23
141; RVD-NEXT:    vrsub.vx v8, v8, a1
142; RVD-NEXT:    li a1, 16
143; RVD-NEXT:    vminu.vx v8, v8, a1
144; RVD-NEXT:    vse16.v v8, (a0)
145; RVD-NEXT:    ret
146;
147; ZVBB-LABEL: ctlz_v8i16:
148; ZVBB:       # %bb.0:
149; ZVBB-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
150; ZVBB-NEXT:    vle16.v v8, (a0)
151; ZVBB-NEXT:    vclz.v v8, v8
152; ZVBB-NEXT:    vse16.v v8, (a0)
153; ZVBB-NEXT:    ret
154  %a = load <8 x i16>, ptr %x
155  %b = load <8 x i16>, ptr %y
156  %c = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false)
157  store <8 x i16> %c, ptr %x
158  ret void
159}
160declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1)
161
162define void @ctlz_v4i32(ptr %x, ptr %y) nounwind {
163; RVI-LABEL: ctlz_v4i32:
164; RVI:       # %bb.0:
165; RVI-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
166; RVI-NEXT:    vle32.v v8, (a0)
167; RVI-NEXT:    lui a1, 349525
168; RVI-NEXT:    addi a1, a1, 1365
169; RVI-NEXT:    vsrl.vi v9, v8, 1
170; RVI-NEXT:    vor.vv v8, v8, v9
171; RVI-NEXT:    vsrl.vi v9, v8, 2
172; RVI-NEXT:    vor.vv v8, v8, v9
173; RVI-NEXT:    vsrl.vi v9, v8, 4
174; RVI-NEXT:    vor.vv v8, v8, v9
175; RVI-NEXT:    vsrl.vi v9, v8, 8
176; RVI-NEXT:    vor.vv v8, v8, v9
177; RVI-NEXT:    vsrl.vi v9, v8, 16
178; RVI-NEXT:    vor.vv v8, v8, v9
179; RVI-NEXT:    vnot.v v8, v8
180; RVI-NEXT:    vsrl.vi v9, v8, 1
181; RVI-NEXT:    vand.vx v9, v9, a1
182; RVI-NEXT:    lui a1, 209715
183; RVI-NEXT:    addi a1, a1, 819
184; RVI-NEXT:    vsub.vv v8, v8, v9
185; RVI-NEXT:    vand.vx v9, v8, a1
186; RVI-NEXT:    vsrl.vi v8, v8, 2
187; RVI-NEXT:    vand.vx v8, v8, a1
188; RVI-NEXT:    lui a1, 61681
189; RVI-NEXT:    addi a1, a1, -241
190; RVI-NEXT:    vadd.vv v8, v9, v8
191; RVI-NEXT:    vsrl.vi v9, v8, 4
192; RVI-NEXT:    vadd.vv v8, v8, v9
193; RVI-NEXT:    vand.vx v8, v8, a1
194; RVI-NEXT:    lui a1, 4112
195; RVI-NEXT:    addi a1, a1, 257
196; RVI-NEXT:    vmul.vx v8, v8, a1
197; RVI-NEXT:    vsrl.vi v8, v8, 24
198; RVI-NEXT:    vse32.v v8, (a0)
199; RVI-NEXT:    ret
200;
201; RVF-LABEL: ctlz_v4i32:
202; RVF:       # %bb.0:
203; RVF-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
204; RVF-NEXT:    vle32.v v8, (a0)
205; RVF-NEXT:    fsrmi a1, 1
206; RVF-NEXT:    vfcvt.f.xu.v v8, v8
207; RVF-NEXT:    fsrm a1
208; RVF-NEXT:    li a1, 158
209; RVF-NEXT:    vsrl.vi v8, v8, 23
210; RVF-NEXT:    vrsub.vx v8, v8, a1
211; RVF-NEXT:    li a1, 32
212; RVF-NEXT:    vminu.vx v8, v8, a1
213; RVF-NEXT:    vse32.v v8, (a0)
214; RVF-NEXT:    ret
215;
216; RVD-LABEL: ctlz_v4i32:
217; RVD:       # %bb.0:
218; RVD-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
219; RVD-NEXT:    vle32.v v8, (a0)
220; RVD-NEXT:    li a1, 52
221; RVD-NEXT:    vfwcvt.f.xu.v v10, v8
222; RVD-NEXT:    vnsrl.wx v8, v10, a1
223; RVD-NEXT:    li a1, 1054
224; RVD-NEXT:    vrsub.vx v8, v8, a1
225; RVD-NEXT:    li a1, 32
226; RVD-NEXT:    vminu.vx v8, v8, a1
227; RVD-NEXT:    vse32.v v8, (a0)
228; RVD-NEXT:    ret
229;
230; ZVBB-LABEL: ctlz_v4i32:
231; ZVBB:       # %bb.0:
232; ZVBB-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
233; ZVBB-NEXT:    vle32.v v8, (a0)
234; ZVBB-NEXT:    vclz.v v8, v8
235; ZVBB-NEXT:    vse32.v v8, (a0)
236; ZVBB-NEXT:    ret
237  %a = load <4 x i32>, ptr %x
238  %b = load <4 x i32>, ptr %y
239  %c = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false)
240  store <4 x i32> %c, ptr %x
241  ret void
242}
243declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1)
244
245define void @ctlz_v2i64(ptr %x, ptr %y) nounwind {
246; RV32I-LABEL: ctlz_v2i64:
247; RV32I:       # %bb.0:
248; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
249; RV32I-NEXT:    vle64.v v8, (a0)
250; RV32I-NEXT:    lui a1, 349525
251; RV32I-NEXT:    addi a1, a1, 1365
252; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
253; RV32I-NEXT:    vmv.v.x v9, a1
254; RV32I-NEXT:    li a1, 32
255; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
256; RV32I-NEXT:    vsrl.vi v10, v8, 1
257; RV32I-NEXT:    vor.vv v8, v8, v10
258; RV32I-NEXT:    vsrl.vi v10, v8, 2
259; RV32I-NEXT:    vor.vv v8, v8, v10
260; RV32I-NEXT:    vsrl.vi v10, v8, 4
261; RV32I-NEXT:    vor.vv v8, v8, v10
262; RV32I-NEXT:    vsrl.vi v10, v8, 8
263; RV32I-NEXT:    vor.vv v8, v8, v10
264; RV32I-NEXT:    vsrl.vi v10, v8, 16
265; RV32I-NEXT:    vor.vv v8, v8, v10
266; RV32I-NEXT:    vsrl.vx v10, v8, a1
267; RV32I-NEXT:    lui a1, 209715
268; RV32I-NEXT:    addi a1, a1, 819
269; RV32I-NEXT:    vor.vv v8, v8, v10
270; RV32I-NEXT:    vnot.v v8, v8
271; RV32I-NEXT:    vsrl.vi v10, v8, 1
272; RV32I-NEXT:    vand.vv v9, v10, v9
273; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
274; RV32I-NEXT:    vmv.v.x v10, a1
275; RV32I-NEXT:    lui a1, 61681
276; RV32I-NEXT:    addi a1, a1, -241
277; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
278; RV32I-NEXT:    vsub.vv v8, v8, v9
279; RV32I-NEXT:    vand.vv v9, v8, v10
280; RV32I-NEXT:    vsrl.vi v8, v8, 2
281; RV32I-NEXT:    vand.vv v8, v8, v10
282; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
283; RV32I-NEXT:    vmv.v.x v10, a1
284; RV32I-NEXT:    lui a1, 4112
285; RV32I-NEXT:    addi a1, a1, 257
286; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
287; RV32I-NEXT:    vadd.vv v8, v9, v8
288; RV32I-NEXT:    vsrl.vi v9, v8, 4
289; RV32I-NEXT:    vadd.vv v8, v8, v9
290; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
291; RV32I-NEXT:    vmv.v.x v9, a1
292; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
293; RV32I-NEXT:    vand.vv v8, v8, v10
294; RV32I-NEXT:    vmul.vv v8, v8, v9
295; RV32I-NEXT:    li a1, 56
296; RV32I-NEXT:    vsrl.vx v8, v8, a1
297; RV32I-NEXT:    vse64.v v8, (a0)
298; RV32I-NEXT:    ret
299;
300; RV64I-LABEL: ctlz_v2i64:
301; RV64I:       # %bb.0:
302; RV64I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
303; RV64I-NEXT:    vle64.v v8, (a0)
304; RV64I-NEXT:    lui a1, 349525
305; RV64I-NEXT:    lui a2, 209715
306; RV64I-NEXT:    lui a3, 61681
307; RV64I-NEXT:    lui a4, 4112
308; RV64I-NEXT:    addiw a1, a1, 1365
309; RV64I-NEXT:    addiw a2, a2, 819
310; RV64I-NEXT:    addiw a3, a3, -241
311; RV64I-NEXT:    addiw a4, a4, 257
312; RV64I-NEXT:    slli a5, a1, 32
313; RV64I-NEXT:    add a1, a1, a5
314; RV64I-NEXT:    slli a5, a2, 32
315; RV64I-NEXT:    add a2, a2, a5
316; RV64I-NEXT:    slli a5, a3, 32
317; RV64I-NEXT:    add a3, a3, a5
318; RV64I-NEXT:    slli a5, a4, 32
319; RV64I-NEXT:    add a4, a4, a5
320; RV64I-NEXT:    li a5, 32
321; RV64I-NEXT:    vsrl.vi v9, v8, 1
322; RV64I-NEXT:    vor.vv v8, v8, v9
323; RV64I-NEXT:    vsrl.vi v9, v8, 2
324; RV64I-NEXT:    vor.vv v8, v8, v9
325; RV64I-NEXT:    vsrl.vi v9, v8, 4
326; RV64I-NEXT:    vor.vv v8, v8, v9
327; RV64I-NEXT:    vsrl.vi v9, v8, 8
328; RV64I-NEXT:    vor.vv v8, v8, v9
329; RV64I-NEXT:    vsrl.vi v9, v8, 16
330; RV64I-NEXT:    vor.vv v8, v8, v9
331; RV64I-NEXT:    vsrl.vx v9, v8, a5
332; RV64I-NEXT:    vor.vv v8, v8, v9
333; RV64I-NEXT:    vnot.v v8, v8
334; RV64I-NEXT:    vsrl.vi v9, v8, 1
335; RV64I-NEXT:    vand.vx v9, v9, a1
336; RV64I-NEXT:    vsub.vv v8, v8, v9
337; RV64I-NEXT:    vand.vx v9, v8, a2
338; RV64I-NEXT:    vsrl.vi v8, v8, 2
339; RV64I-NEXT:    vand.vx v8, v8, a2
340; RV64I-NEXT:    vadd.vv v8, v9, v8
341; RV64I-NEXT:    vsrl.vi v9, v8, 4
342; RV64I-NEXT:    vadd.vv v8, v8, v9
343; RV64I-NEXT:    vand.vx v8, v8, a3
344; RV64I-NEXT:    vmul.vx v8, v8, a4
345; RV64I-NEXT:    li a1, 56
346; RV64I-NEXT:    vsrl.vx v8, v8, a1
347; RV64I-NEXT:    vse64.v v8, (a0)
348; RV64I-NEXT:    ret
349;
350; RVF-LABEL: ctlz_v2i64:
351; RVF:       # %bb.0:
352; RVF-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
353; RVF-NEXT:    vle64.v v8, (a0)
354; RVF-NEXT:    li a1, 190
355; RVF-NEXT:    vmv.v.x v9, a1
356; RVF-NEXT:    fsrmi a1, 1
357; RVF-NEXT:    vfncvt.f.xu.w v10, v8
358; RVF-NEXT:    fsrm a1
359; RVF-NEXT:    vsrl.vi v8, v10, 23
360; RVF-NEXT:    vwsubu.vv v10, v9, v8
361; RVF-NEXT:    li a1, 64
362; RVF-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
363; RVF-NEXT:    vminu.vx v8, v10, a1
364; RVF-NEXT:    vse64.v v8, (a0)
365; RVF-NEXT:    ret
366;
367; RVD-LABEL: ctlz_v2i64:
368; RVD:       # %bb.0:
369; RVD-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
370; RVD-NEXT:    vle64.v v8, (a0)
371; RVD-NEXT:    fsrmi a1, 1
372; RVD-NEXT:    vfcvt.f.xu.v v8, v8
373; RVD-NEXT:    fsrm a1
374; RVD-NEXT:    li a1, 52
375; RVD-NEXT:    vsrl.vx v8, v8, a1
376; RVD-NEXT:    li a1, 1086
377; RVD-NEXT:    vrsub.vx v8, v8, a1
378; RVD-NEXT:    li a1, 64
379; RVD-NEXT:    vminu.vx v8, v8, a1
380; RVD-NEXT:    vse64.v v8, (a0)
381; RVD-NEXT:    ret
382;
383; ZVBB-LABEL: ctlz_v2i64:
384; ZVBB:       # %bb.0:
385; ZVBB-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
386; ZVBB-NEXT:    vle64.v v8, (a0)
387; ZVBB-NEXT:    vclz.v v8, v8
388; ZVBB-NEXT:    vse64.v v8, (a0)
389; ZVBB-NEXT:    ret
390  %a = load <2 x i64>, ptr %x
391  %b = load <2 x i64>, ptr %y
392  %c = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false)
393  store <2 x i64> %c, ptr %x
394  ret void
395}
396declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1)
397
398define void @ctlz_v32i8(ptr %x, ptr %y) nounwind {
399; RVI-LABEL: ctlz_v32i8:
400; RVI:       # %bb.0:
401; RVI-NEXT:    li a1, 32
402; RVI-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
403; RVI-NEXT:    vle8.v v8, (a0)
404; RVI-NEXT:    li a1, 85
405; RVI-NEXT:    vsrl.vi v10, v8, 1
406; RVI-NEXT:    vor.vv v8, v8, v10
407; RVI-NEXT:    vsrl.vi v10, v8, 2
408; RVI-NEXT:    vor.vv v8, v8, v10
409; RVI-NEXT:    vsrl.vi v10, v8, 4
410; RVI-NEXT:    vor.vv v8, v8, v10
411; RVI-NEXT:    vnot.v v8, v8
412; RVI-NEXT:    vsrl.vi v10, v8, 1
413; RVI-NEXT:    vand.vx v10, v10, a1
414; RVI-NEXT:    li a1, 51
415; RVI-NEXT:    vsub.vv v8, v8, v10
416; RVI-NEXT:    vand.vx v10, v8, a1
417; RVI-NEXT:    vsrl.vi v8, v8, 2
418; RVI-NEXT:    vand.vx v8, v8, a1
419; RVI-NEXT:    vadd.vv v8, v10, v8
420; RVI-NEXT:    vsrl.vi v10, v8, 4
421; RVI-NEXT:    vadd.vv v8, v8, v10
422; RVI-NEXT:    vand.vi v8, v8, 15
423; RVI-NEXT:    vse8.v v8, (a0)
424; RVI-NEXT:    ret
425;
426; RVF-LABEL: ctlz_v32i8:
427; RVF:       # %bb.0:
428; RVF-NEXT:    li a1, 32
429; RVF-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
430; RVF-NEXT:    vle8.v v8, (a0)
431; RVF-NEXT:    li a1, 134
432; RVF-NEXT:    vzext.vf2 v12, v8
433; RVF-NEXT:    vfwcvt.f.xu.v v16, v12
434; RVF-NEXT:    vnsrl.wi v8, v16, 23
435; RVF-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
436; RVF-NEXT:    vnsrl.wi v12, v8, 0
437; RVF-NEXT:    vrsub.vx v8, v12, a1
438; RVF-NEXT:    li a1, 8
439; RVF-NEXT:    vminu.vx v8, v8, a1
440; RVF-NEXT:    vse8.v v8, (a0)
441; RVF-NEXT:    ret
442;
443; RVD-LABEL: ctlz_v32i8:
444; RVD:       # %bb.0:
445; RVD-NEXT:    li a1, 32
446; RVD-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
447; RVD-NEXT:    vle8.v v8, (a0)
448; RVD-NEXT:    li a1, 134
449; RVD-NEXT:    vzext.vf2 v12, v8
450; RVD-NEXT:    vfwcvt.f.xu.v v16, v12
451; RVD-NEXT:    vnsrl.wi v8, v16, 23
452; RVD-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
453; RVD-NEXT:    vnsrl.wi v12, v8, 0
454; RVD-NEXT:    vrsub.vx v8, v12, a1
455; RVD-NEXT:    li a1, 8
456; RVD-NEXT:    vminu.vx v8, v8, a1
457; RVD-NEXT:    vse8.v v8, (a0)
458; RVD-NEXT:    ret
459;
460; ZVBB-LABEL: ctlz_v32i8:
461; ZVBB:       # %bb.0:
462; ZVBB-NEXT:    li a1, 32
463; ZVBB-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
464; ZVBB-NEXT:    vle8.v v8, (a0)
465; ZVBB-NEXT:    vclz.v v8, v8
466; ZVBB-NEXT:    vse8.v v8, (a0)
467; ZVBB-NEXT:    ret
468  %a = load <32 x i8>, ptr %x
469  %b = load <32 x i8>, ptr %y
470  %c = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false)
471  store <32 x i8> %c, ptr %x
472  ret void
473}
474declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1)
475
476define void @ctlz_v16i16(ptr %x, ptr %y) nounwind {
477; RVI-LABEL: ctlz_v16i16:
478; RVI:       # %bb.0:
479; RVI-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
480; RVI-NEXT:    vle16.v v8, (a0)
481; RVI-NEXT:    lui a1, 5
482; RVI-NEXT:    addi a1, a1, 1365
483; RVI-NEXT:    vsrl.vi v10, v8, 1
484; RVI-NEXT:    vor.vv v8, v8, v10
485; RVI-NEXT:    vsrl.vi v10, v8, 2
486; RVI-NEXT:    vor.vv v8, v8, v10
487; RVI-NEXT:    vsrl.vi v10, v8, 4
488; RVI-NEXT:    vor.vv v8, v8, v10
489; RVI-NEXT:    vsrl.vi v10, v8, 8
490; RVI-NEXT:    vor.vv v8, v8, v10
491; RVI-NEXT:    vnot.v v8, v8
492; RVI-NEXT:    vsrl.vi v10, v8, 1
493; RVI-NEXT:    vand.vx v10, v10, a1
494; RVI-NEXT:    lui a1, 3
495; RVI-NEXT:    addi a1, a1, 819
496; RVI-NEXT:    vsub.vv v8, v8, v10
497; RVI-NEXT:    vand.vx v10, v8, a1
498; RVI-NEXT:    vsrl.vi v8, v8, 2
499; RVI-NEXT:    vand.vx v8, v8, a1
500; RVI-NEXT:    lui a1, 1
501; RVI-NEXT:    addi a1, a1, -241
502; RVI-NEXT:    vadd.vv v8, v10, v8
503; RVI-NEXT:    vsrl.vi v10, v8, 4
504; RVI-NEXT:    vadd.vv v8, v8, v10
505; RVI-NEXT:    vand.vx v8, v8, a1
506; RVI-NEXT:    li a1, 257
507; RVI-NEXT:    vmul.vx v8, v8, a1
508; RVI-NEXT:    vsrl.vi v8, v8, 8
509; RVI-NEXT:    vse16.v v8, (a0)
510; RVI-NEXT:    ret
511;
512; RVF-LABEL: ctlz_v16i16:
513; RVF:       # %bb.0:
514; RVF-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
515; RVF-NEXT:    vle16.v v8, (a0)
516; RVF-NEXT:    li a1, 142
517; RVF-NEXT:    vfwcvt.f.xu.v v12, v8
518; RVF-NEXT:    vnsrl.wi v8, v12, 23
519; RVF-NEXT:    vrsub.vx v8, v8, a1
520; RVF-NEXT:    li a1, 16
521; RVF-NEXT:    vminu.vx v8, v8, a1
522; RVF-NEXT:    vse16.v v8, (a0)
523; RVF-NEXT:    ret
524;
525; RVD-LABEL: ctlz_v16i16:
526; RVD:       # %bb.0:
527; RVD-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
528; RVD-NEXT:    vle16.v v8, (a0)
529; RVD-NEXT:    li a1, 142
530; RVD-NEXT:    vfwcvt.f.xu.v v12, v8
531; RVD-NEXT:    vnsrl.wi v8, v12, 23
532; RVD-NEXT:    vrsub.vx v8, v8, a1
533; RVD-NEXT:    li a1, 16
534; RVD-NEXT:    vminu.vx v8, v8, a1
535; RVD-NEXT:    vse16.v v8, (a0)
536; RVD-NEXT:    ret
537;
538; ZVBB-LABEL: ctlz_v16i16:
539; ZVBB:       # %bb.0:
540; ZVBB-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
541; ZVBB-NEXT:    vle16.v v8, (a0)
542; ZVBB-NEXT:    vclz.v v8, v8
543; ZVBB-NEXT:    vse16.v v8, (a0)
544; ZVBB-NEXT:    ret
545  %a = load <16 x i16>, ptr %x
546  %b = load <16 x i16>, ptr %y
547  %c = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false)
548  store <16 x i16> %c, ptr %x
549  ret void
550}
551declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1)
552
553define void @ctlz_v8i32(ptr %x, ptr %y) nounwind {
554; RVI-LABEL: ctlz_v8i32:
555; RVI:       # %bb.0:
556; RVI-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
557; RVI-NEXT:    vle32.v v8, (a0)
558; RVI-NEXT:    lui a1, 349525
559; RVI-NEXT:    addi a1, a1, 1365
560; RVI-NEXT:    vsrl.vi v10, v8, 1
561; RVI-NEXT:    vor.vv v8, v8, v10
562; RVI-NEXT:    vsrl.vi v10, v8, 2
563; RVI-NEXT:    vor.vv v8, v8, v10
564; RVI-NEXT:    vsrl.vi v10, v8, 4
565; RVI-NEXT:    vor.vv v8, v8, v10
566; RVI-NEXT:    vsrl.vi v10, v8, 8
567; RVI-NEXT:    vor.vv v8, v8, v10
568; RVI-NEXT:    vsrl.vi v10, v8, 16
569; RVI-NEXT:    vor.vv v8, v8, v10
570; RVI-NEXT:    vnot.v v8, v8
571; RVI-NEXT:    vsrl.vi v10, v8, 1
572; RVI-NEXT:    vand.vx v10, v10, a1
573; RVI-NEXT:    lui a1, 209715
574; RVI-NEXT:    addi a1, a1, 819
575; RVI-NEXT:    vsub.vv v8, v8, v10
576; RVI-NEXT:    vand.vx v10, v8, a1
577; RVI-NEXT:    vsrl.vi v8, v8, 2
578; RVI-NEXT:    vand.vx v8, v8, a1
579; RVI-NEXT:    lui a1, 61681
580; RVI-NEXT:    addi a1, a1, -241
581; RVI-NEXT:    vadd.vv v8, v10, v8
582; RVI-NEXT:    vsrl.vi v10, v8, 4
583; RVI-NEXT:    vadd.vv v8, v8, v10
584; RVI-NEXT:    vand.vx v8, v8, a1
585; RVI-NEXT:    lui a1, 4112
586; RVI-NEXT:    addi a1, a1, 257
587; RVI-NEXT:    vmul.vx v8, v8, a1
588; RVI-NEXT:    vsrl.vi v8, v8, 24
589; RVI-NEXT:    vse32.v v8, (a0)
590; RVI-NEXT:    ret
591;
592; RVF-LABEL: ctlz_v8i32:
593; RVF:       # %bb.0:
594; RVF-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
595; RVF-NEXT:    vle32.v v8, (a0)
596; RVF-NEXT:    fsrmi a1, 1
597; RVF-NEXT:    vfcvt.f.xu.v v8, v8
598; RVF-NEXT:    fsrm a1
599; RVF-NEXT:    li a1, 158
600; RVF-NEXT:    vsrl.vi v8, v8, 23
601; RVF-NEXT:    vrsub.vx v8, v8, a1
602; RVF-NEXT:    li a1, 32
603; RVF-NEXT:    vminu.vx v8, v8, a1
604; RVF-NEXT:    vse32.v v8, (a0)
605; RVF-NEXT:    ret
606;
607; RVD-LABEL: ctlz_v8i32:
608; RVD:       # %bb.0:
609; RVD-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
610; RVD-NEXT:    vle32.v v8, (a0)
611; RVD-NEXT:    li a1, 52
612; RVD-NEXT:    vfwcvt.f.xu.v v12, v8
613; RVD-NEXT:    vnsrl.wx v8, v12, a1
614; RVD-NEXT:    li a1, 1054
615; RVD-NEXT:    vrsub.vx v8, v8, a1
616; RVD-NEXT:    li a1, 32
617; RVD-NEXT:    vminu.vx v8, v8, a1
618; RVD-NEXT:    vse32.v v8, (a0)
619; RVD-NEXT:    ret
620;
621; ZVBB-LABEL: ctlz_v8i32:
622; ZVBB:       # %bb.0:
623; ZVBB-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
624; ZVBB-NEXT:    vle32.v v8, (a0)
625; ZVBB-NEXT:    vclz.v v8, v8
626; ZVBB-NEXT:    vse32.v v8, (a0)
627; ZVBB-NEXT:    ret
628  %a = load <8 x i32>, ptr %x
629  %b = load <8 x i32>, ptr %y
630  %c = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false)
631  store <8 x i32> %c, ptr %x
632  ret void
633}
634declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1)
635
636define void @ctlz_v4i64(ptr %x, ptr %y) nounwind {
637; RV32I-LABEL: ctlz_v4i64:
638; RV32I:       # %bb.0:
639; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
640; RV32I-NEXT:    vle64.v v8, (a0)
641; RV32I-NEXT:    lui a1, 349525
642; RV32I-NEXT:    addi a1, a1, 1365
643; RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
644; RV32I-NEXT:    vmv.v.x v10, a1
645; RV32I-NEXT:    li a1, 32
646; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
647; RV32I-NEXT:    vsrl.vi v12, v8, 1
648; RV32I-NEXT:    vor.vv v8, v8, v12
649; RV32I-NEXT:    vsrl.vi v12, v8, 2
650; RV32I-NEXT:    vor.vv v8, v8, v12
651; RV32I-NEXT:    vsrl.vi v12, v8, 4
652; RV32I-NEXT:    vor.vv v8, v8, v12
653; RV32I-NEXT:    vsrl.vi v12, v8, 8
654; RV32I-NEXT:    vor.vv v8, v8, v12
655; RV32I-NEXT:    vsrl.vi v12, v8, 16
656; RV32I-NEXT:    vor.vv v8, v8, v12
657; RV32I-NEXT:    vsrl.vx v12, v8, a1
658; RV32I-NEXT:    lui a1, 209715
659; RV32I-NEXT:    addi a1, a1, 819
660; RV32I-NEXT:    vor.vv v8, v8, v12
661; RV32I-NEXT:    vnot.v v8, v8
662; RV32I-NEXT:    vsrl.vi v12, v8, 1
663; RV32I-NEXT:    vand.vv v10, v12, v10
664; RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
665; RV32I-NEXT:    vmv.v.x v12, a1
666; RV32I-NEXT:    lui a1, 61681
667; RV32I-NEXT:    addi a1, a1, -241
668; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
669; RV32I-NEXT:    vsub.vv v8, v8, v10
670; RV32I-NEXT:    vand.vv v10, v8, v12
671; RV32I-NEXT:    vsrl.vi v8, v8, 2
672; RV32I-NEXT:    vand.vv v8, v8, v12
673; RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
674; RV32I-NEXT:    vmv.v.x v12, a1
675; RV32I-NEXT:    lui a1, 4112
676; RV32I-NEXT:    addi a1, a1, 257
677; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
678; RV32I-NEXT:    vadd.vv v8, v10, v8
679; RV32I-NEXT:    vsrl.vi v10, v8, 4
680; RV32I-NEXT:    vadd.vv v8, v8, v10
681; RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
682; RV32I-NEXT:    vmv.v.x v10, a1
683; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
684; RV32I-NEXT:    vand.vv v8, v8, v12
685; RV32I-NEXT:    vmul.vv v8, v8, v10
686; RV32I-NEXT:    li a1, 56
687; RV32I-NEXT:    vsrl.vx v8, v8, a1
688; RV32I-NEXT:    vse64.v v8, (a0)
689; RV32I-NEXT:    ret
690;
691; RV64I-LABEL: ctlz_v4i64:
692; RV64I:       # %bb.0:
693; RV64I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
694; RV64I-NEXT:    vle64.v v8, (a0)
695; RV64I-NEXT:    lui a1, 349525
696; RV64I-NEXT:    lui a2, 209715
697; RV64I-NEXT:    lui a3, 61681
698; RV64I-NEXT:    lui a4, 4112
699; RV64I-NEXT:    addiw a1, a1, 1365
700; RV64I-NEXT:    addiw a2, a2, 819
701; RV64I-NEXT:    addiw a3, a3, -241
702; RV64I-NEXT:    addiw a4, a4, 257
703; RV64I-NEXT:    slli a5, a1, 32
704; RV64I-NEXT:    add a1, a1, a5
705; RV64I-NEXT:    slli a5, a2, 32
706; RV64I-NEXT:    add a2, a2, a5
707; RV64I-NEXT:    slli a5, a3, 32
708; RV64I-NEXT:    add a3, a3, a5
709; RV64I-NEXT:    slli a5, a4, 32
710; RV64I-NEXT:    add a4, a4, a5
711; RV64I-NEXT:    li a5, 32
712; RV64I-NEXT:    vsrl.vi v10, v8, 1
713; RV64I-NEXT:    vor.vv v8, v8, v10
714; RV64I-NEXT:    vsrl.vi v10, v8, 2
715; RV64I-NEXT:    vor.vv v8, v8, v10
716; RV64I-NEXT:    vsrl.vi v10, v8, 4
717; RV64I-NEXT:    vor.vv v8, v8, v10
718; RV64I-NEXT:    vsrl.vi v10, v8, 8
719; RV64I-NEXT:    vor.vv v8, v8, v10
720; RV64I-NEXT:    vsrl.vi v10, v8, 16
721; RV64I-NEXT:    vor.vv v8, v8, v10
722; RV64I-NEXT:    vsrl.vx v10, v8, a5
723; RV64I-NEXT:    vor.vv v8, v8, v10
724; RV64I-NEXT:    vnot.v v8, v8
725; RV64I-NEXT:    vsrl.vi v10, v8, 1
726; RV64I-NEXT:    vand.vx v10, v10, a1
727; RV64I-NEXT:    vsub.vv v8, v8, v10
728; RV64I-NEXT:    vand.vx v10, v8, a2
729; RV64I-NEXT:    vsrl.vi v8, v8, 2
730; RV64I-NEXT:    vand.vx v8, v8, a2
731; RV64I-NEXT:    vadd.vv v8, v10, v8
732; RV64I-NEXT:    vsrl.vi v10, v8, 4
733; RV64I-NEXT:    vadd.vv v8, v8, v10
734; RV64I-NEXT:    vand.vx v8, v8, a3
735; RV64I-NEXT:    vmul.vx v8, v8, a4
736; RV64I-NEXT:    li a1, 56
737; RV64I-NEXT:    vsrl.vx v8, v8, a1
738; RV64I-NEXT:    vse64.v v8, (a0)
739; RV64I-NEXT:    ret
740;
741; RVF-LABEL: ctlz_v4i64:
742; RVF:       # %bb.0:
743; RVF-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
744; RVF-NEXT:    vle64.v v8, (a0)
745; RVF-NEXT:    li a1, 190
746; RVF-NEXT:    vmv.v.x v10, a1
747; RVF-NEXT:    fsrmi a1, 1
748; RVF-NEXT:    vfncvt.f.xu.w v11, v8
749; RVF-NEXT:    fsrm a1
750; RVF-NEXT:    vsrl.vi v8, v11, 23
751; RVF-NEXT:    vwsubu.vv v12, v10, v8
752; RVF-NEXT:    li a1, 64
753; RVF-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
754; RVF-NEXT:    vminu.vx v8, v12, a1
755; RVF-NEXT:    vse64.v v8, (a0)
756; RVF-NEXT:    ret
757;
758; RVD-LABEL: ctlz_v4i64:
759; RVD:       # %bb.0:
760; RVD-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
761; RVD-NEXT:    vle64.v v8, (a0)
762; RVD-NEXT:    fsrmi a1, 1
763; RVD-NEXT:    vfcvt.f.xu.v v8, v8
764; RVD-NEXT:    fsrm a1
765; RVD-NEXT:    li a1, 52
766; RVD-NEXT:    vsrl.vx v8, v8, a1
767; RVD-NEXT:    li a1, 1086
768; RVD-NEXT:    vrsub.vx v8, v8, a1
769; RVD-NEXT:    li a1, 64
770; RVD-NEXT:    vminu.vx v8, v8, a1
771; RVD-NEXT:    vse64.v v8, (a0)
772; RVD-NEXT:    ret
773;
774; ZVBB-LABEL: ctlz_v4i64:
775; ZVBB:       # %bb.0:
776; ZVBB-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
777; ZVBB-NEXT:    vle64.v v8, (a0)
778; ZVBB-NEXT:    vclz.v v8, v8
779; ZVBB-NEXT:    vse64.v v8, (a0)
780; ZVBB-NEXT:    ret
781  %a = load <4 x i64>, ptr %x
782  %b = load <4 x i64>, ptr %y
783  %c = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false)
784  store <4 x i64> %c, ptr %x
785  ret void
786}
787declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1)
788
789define void @ctlz_zero_undef_v16i8(ptr %x, ptr %y) nounwind {
790; RVI-LABEL: ctlz_zero_undef_v16i8:
791; RVI:       # %bb.0:
792; RVI-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
793; RVI-NEXT:    vle8.v v8, (a0)
794; RVI-NEXT:    li a1, 85
795; RVI-NEXT:    vsrl.vi v9, v8, 1
796; RVI-NEXT:    vor.vv v8, v8, v9
797; RVI-NEXT:    vsrl.vi v9, v8, 2
798; RVI-NEXT:    vor.vv v8, v8, v9
799; RVI-NEXT:    vsrl.vi v9, v8, 4
800; RVI-NEXT:    vor.vv v8, v8, v9
801; RVI-NEXT:    vnot.v v8, v8
802; RVI-NEXT:    vsrl.vi v9, v8, 1
803; RVI-NEXT:    vand.vx v9, v9, a1
804; RVI-NEXT:    li a1, 51
805; RVI-NEXT:    vsub.vv v8, v8, v9
806; RVI-NEXT:    vand.vx v9, v8, a1
807; RVI-NEXT:    vsrl.vi v8, v8, 2
808; RVI-NEXT:    vand.vx v8, v8, a1
809; RVI-NEXT:    vadd.vv v8, v9, v8
810; RVI-NEXT:    vsrl.vi v9, v8, 4
811; RVI-NEXT:    vadd.vv v8, v8, v9
812; RVI-NEXT:    vand.vi v8, v8, 15
813; RVI-NEXT:    vse8.v v8, (a0)
814; RVI-NEXT:    ret
815;
816; RVF-LABEL: ctlz_zero_undef_v16i8:
817; RVF:       # %bb.0:
818; RVF-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
819; RVF-NEXT:    vle8.v v8, (a0)
820; RVF-NEXT:    vzext.vf2 v10, v8
821; RVF-NEXT:    vfwcvt.f.xu.v v12, v10
822; RVF-NEXT:    vnsrl.wi v8, v12, 23
823; RVF-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
824; RVF-NEXT:    vnsrl.wi v10, v8, 0
825; RVF-NEXT:    li a1, 134
826; RVF-NEXT:    vrsub.vx v8, v10, a1
827; RVF-NEXT:    vse8.v v8, (a0)
828; RVF-NEXT:    ret
829;
830; RVD-LABEL: ctlz_zero_undef_v16i8:
831; RVD:       # %bb.0:
832; RVD-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
833; RVD-NEXT:    vle8.v v8, (a0)
834; RVD-NEXT:    vzext.vf2 v10, v8
835; RVD-NEXT:    vfwcvt.f.xu.v v12, v10
836; RVD-NEXT:    vnsrl.wi v8, v12, 23
837; RVD-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
838; RVD-NEXT:    vnsrl.wi v10, v8, 0
839; RVD-NEXT:    li a1, 134
840; RVD-NEXT:    vrsub.vx v8, v10, a1
841; RVD-NEXT:    vse8.v v8, (a0)
842; RVD-NEXT:    ret
843;
844; ZVBB-LABEL: ctlz_zero_undef_v16i8:
845; ZVBB:       # %bb.0:
846; ZVBB-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
847; ZVBB-NEXT:    vle8.v v8, (a0)
848; ZVBB-NEXT:    vclz.v v8, v8
849; ZVBB-NEXT:    vse8.v v8, (a0)
850; ZVBB-NEXT:    ret
851  %a = load <16 x i8>, ptr %x
852  %b = load <16 x i8>, ptr %y
853  %c = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true)
854  store <16 x i8> %c, ptr %x
855  ret void
856}
857
858define void @ctlz_zero_undef_v8i16(ptr %x, ptr %y) nounwind {
859; RVI-LABEL: ctlz_zero_undef_v8i16:
860; RVI:       # %bb.0:
861; RVI-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
862; RVI-NEXT:    vle16.v v8, (a0)
863; RVI-NEXT:    lui a1, 5
864; RVI-NEXT:    addi a1, a1, 1365
865; RVI-NEXT:    vsrl.vi v9, v8, 1
866; RVI-NEXT:    vor.vv v8, v8, v9
867; RVI-NEXT:    vsrl.vi v9, v8, 2
868; RVI-NEXT:    vor.vv v8, v8, v9
869; RVI-NEXT:    vsrl.vi v9, v8, 4
870; RVI-NEXT:    vor.vv v8, v8, v9
871; RVI-NEXT:    vsrl.vi v9, v8, 8
872; RVI-NEXT:    vor.vv v8, v8, v9
873; RVI-NEXT:    vnot.v v8, v8
874; RVI-NEXT:    vsrl.vi v9, v8, 1
875; RVI-NEXT:    vand.vx v9, v9, a1
876; RVI-NEXT:    lui a1, 3
877; RVI-NEXT:    addi a1, a1, 819
878; RVI-NEXT:    vsub.vv v8, v8, v9
879; RVI-NEXT:    vand.vx v9, v8, a1
880; RVI-NEXT:    vsrl.vi v8, v8, 2
881; RVI-NEXT:    vand.vx v8, v8, a1
882; RVI-NEXT:    lui a1, 1
883; RVI-NEXT:    addi a1, a1, -241
884; RVI-NEXT:    vadd.vv v8, v9, v8
885; RVI-NEXT:    vsrl.vi v9, v8, 4
886; RVI-NEXT:    vadd.vv v8, v8, v9
887; RVI-NEXT:    vand.vx v8, v8, a1
888; RVI-NEXT:    li a1, 257
889; RVI-NEXT:    vmul.vx v8, v8, a1
890; RVI-NEXT:    vsrl.vi v8, v8, 8
891; RVI-NEXT:    vse16.v v8, (a0)
892; RVI-NEXT:    ret
893;
894; RVF-LABEL: ctlz_zero_undef_v8i16:
895; RVF:       # %bb.0:
896; RVF-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
897; RVF-NEXT:    vle16.v v8, (a0)
898; RVF-NEXT:    vfwcvt.f.xu.v v10, v8
899; RVF-NEXT:    vnsrl.wi v8, v10, 23
900; RVF-NEXT:    li a1, 142
901; RVF-NEXT:    vrsub.vx v8, v8, a1
902; RVF-NEXT:    vse16.v v8, (a0)
903; RVF-NEXT:    ret
904;
905; RVD-LABEL: ctlz_zero_undef_v8i16:
906; RVD:       # %bb.0:
907; RVD-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
908; RVD-NEXT:    vle16.v v8, (a0)
909; RVD-NEXT:    vfwcvt.f.xu.v v10, v8
910; RVD-NEXT:    vnsrl.wi v8, v10, 23
911; RVD-NEXT:    li a1, 142
912; RVD-NEXT:    vrsub.vx v8, v8, a1
913; RVD-NEXT:    vse16.v v8, (a0)
914; RVD-NEXT:    ret
915;
916; ZVBB-LABEL: ctlz_zero_undef_v8i16:
917; ZVBB:       # %bb.0:
918; ZVBB-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
919; ZVBB-NEXT:    vle16.v v8, (a0)
920; ZVBB-NEXT:    vclz.v v8, v8
921; ZVBB-NEXT:    vse16.v v8, (a0)
922; ZVBB-NEXT:    ret
923  %a = load <8 x i16>, ptr %x
924  %b = load <8 x i16>, ptr %y
925  %c = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true)
926  store <8 x i16> %c, ptr %x
927  ret void
928}
929
930define void @ctlz_zero_undef_v4i32(ptr %x, ptr %y) nounwind {
931; RVI-LABEL: ctlz_zero_undef_v4i32:
932; RVI:       # %bb.0:
933; RVI-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
934; RVI-NEXT:    vle32.v v8, (a0)
935; RVI-NEXT:    lui a1, 349525
936; RVI-NEXT:    addi a1, a1, 1365
937; RVI-NEXT:    vsrl.vi v9, v8, 1
938; RVI-NEXT:    vor.vv v8, v8, v9
939; RVI-NEXT:    vsrl.vi v9, v8, 2
940; RVI-NEXT:    vor.vv v8, v8, v9
941; RVI-NEXT:    vsrl.vi v9, v8, 4
942; RVI-NEXT:    vor.vv v8, v8, v9
943; RVI-NEXT:    vsrl.vi v9, v8, 8
944; RVI-NEXT:    vor.vv v8, v8, v9
945; RVI-NEXT:    vsrl.vi v9, v8, 16
946; RVI-NEXT:    vor.vv v8, v8, v9
947; RVI-NEXT:    vnot.v v8, v8
948; RVI-NEXT:    vsrl.vi v9, v8, 1
949; RVI-NEXT:    vand.vx v9, v9, a1
950; RVI-NEXT:    lui a1, 209715
951; RVI-NEXT:    addi a1, a1, 819
952; RVI-NEXT:    vsub.vv v8, v8, v9
953; RVI-NEXT:    vand.vx v9, v8, a1
954; RVI-NEXT:    vsrl.vi v8, v8, 2
955; RVI-NEXT:    vand.vx v8, v8, a1
956; RVI-NEXT:    lui a1, 61681
957; RVI-NEXT:    addi a1, a1, -241
958; RVI-NEXT:    vadd.vv v8, v9, v8
959; RVI-NEXT:    vsrl.vi v9, v8, 4
960; RVI-NEXT:    vadd.vv v8, v8, v9
961; RVI-NEXT:    vand.vx v8, v8, a1
962; RVI-NEXT:    lui a1, 4112
963; RVI-NEXT:    addi a1, a1, 257
964; RVI-NEXT:    vmul.vx v8, v8, a1
965; RVI-NEXT:    vsrl.vi v8, v8, 24
966; RVI-NEXT:    vse32.v v8, (a0)
967; RVI-NEXT:    ret
968;
969; RVF-LABEL: ctlz_zero_undef_v4i32:
970; RVF:       # %bb.0:
971; RVF-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
972; RVF-NEXT:    vle32.v v8, (a0)
973; RVF-NEXT:    fsrmi a1, 1
974; RVF-NEXT:    vfcvt.f.xu.v v8, v8
975; RVF-NEXT:    fsrm a1
976; RVF-NEXT:    vsrl.vi v8, v8, 23
977; RVF-NEXT:    li a1, 158
978; RVF-NEXT:    vrsub.vx v8, v8, a1
979; RVF-NEXT:    vse32.v v8, (a0)
980; RVF-NEXT:    ret
981;
982; RVD-LABEL: ctlz_zero_undef_v4i32:
983; RVD:       # %bb.0:
984; RVD-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
985; RVD-NEXT:    vle32.v v8, (a0)
986; RVD-NEXT:    li a1, 52
987; RVD-NEXT:    vfwcvt.f.xu.v v10, v8
988; RVD-NEXT:    vnsrl.wx v8, v10, a1
989; RVD-NEXT:    li a1, 1054
990; RVD-NEXT:    vrsub.vx v8, v8, a1
991; RVD-NEXT:    vse32.v v8, (a0)
992; RVD-NEXT:    ret
993;
994; ZVBB-LABEL: ctlz_zero_undef_v4i32:
995; ZVBB:       # %bb.0:
996; ZVBB-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
997; ZVBB-NEXT:    vle32.v v8, (a0)
998; ZVBB-NEXT:    vclz.v v8, v8
999; ZVBB-NEXT:    vse32.v v8, (a0)
1000; ZVBB-NEXT:    ret
1001  %a = load <4 x i32>, ptr %x
1002  %b = load <4 x i32>, ptr %y
1003  %c = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true)
1004  store <4 x i32> %c, ptr %x
1005  ret void
1006}
1007
1008define void @ctlz_zero_undef_v2i64(ptr %x, ptr %y) nounwind {
1009; RV32I-LABEL: ctlz_zero_undef_v2i64:
1010; RV32I:       # %bb.0:
1011; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1012; RV32I-NEXT:    vle64.v v8, (a0)
1013; RV32I-NEXT:    lui a1, 349525
1014; RV32I-NEXT:    addi a1, a1, 1365
1015; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1016; RV32I-NEXT:    vmv.v.x v9, a1
1017; RV32I-NEXT:    li a1, 32
1018; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1019; RV32I-NEXT:    vsrl.vi v10, v8, 1
1020; RV32I-NEXT:    vor.vv v8, v8, v10
1021; RV32I-NEXT:    vsrl.vi v10, v8, 2
1022; RV32I-NEXT:    vor.vv v8, v8, v10
1023; RV32I-NEXT:    vsrl.vi v10, v8, 4
1024; RV32I-NEXT:    vor.vv v8, v8, v10
1025; RV32I-NEXT:    vsrl.vi v10, v8, 8
1026; RV32I-NEXT:    vor.vv v8, v8, v10
1027; RV32I-NEXT:    vsrl.vi v10, v8, 16
1028; RV32I-NEXT:    vor.vv v8, v8, v10
1029; RV32I-NEXT:    vsrl.vx v10, v8, a1
1030; RV32I-NEXT:    lui a1, 209715
1031; RV32I-NEXT:    addi a1, a1, 819
1032; RV32I-NEXT:    vor.vv v8, v8, v10
1033; RV32I-NEXT:    vnot.v v8, v8
1034; RV32I-NEXT:    vsrl.vi v10, v8, 1
1035; RV32I-NEXT:    vand.vv v9, v10, v9
1036; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1037; RV32I-NEXT:    vmv.v.x v10, a1
1038; RV32I-NEXT:    lui a1, 61681
1039; RV32I-NEXT:    addi a1, a1, -241
1040; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1041; RV32I-NEXT:    vsub.vv v8, v8, v9
1042; RV32I-NEXT:    vand.vv v9, v8, v10
1043; RV32I-NEXT:    vsrl.vi v8, v8, 2
1044; RV32I-NEXT:    vand.vv v8, v8, v10
1045; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1046; RV32I-NEXT:    vmv.v.x v10, a1
1047; RV32I-NEXT:    lui a1, 4112
1048; RV32I-NEXT:    addi a1, a1, 257
1049; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1050; RV32I-NEXT:    vadd.vv v8, v9, v8
1051; RV32I-NEXT:    vsrl.vi v9, v8, 4
1052; RV32I-NEXT:    vadd.vv v8, v8, v9
1053; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1054; RV32I-NEXT:    vmv.v.x v9, a1
1055; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1056; RV32I-NEXT:    vand.vv v8, v8, v10
1057; RV32I-NEXT:    vmul.vv v8, v8, v9
1058; RV32I-NEXT:    li a1, 56
1059; RV32I-NEXT:    vsrl.vx v8, v8, a1
1060; RV32I-NEXT:    vse64.v v8, (a0)
1061; RV32I-NEXT:    ret
1062;
1063; RV64I-LABEL: ctlz_zero_undef_v2i64:
1064; RV64I:       # %bb.0:
1065; RV64I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1066; RV64I-NEXT:    vle64.v v8, (a0)
1067; RV64I-NEXT:    lui a1, 349525
1068; RV64I-NEXT:    lui a2, 209715
1069; RV64I-NEXT:    lui a3, 61681
1070; RV64I-NEXT:    lui a4, 4112
1071; RV64I-NEXT:    addiw a1, a1, 1365
1072; RV64I-NEXT:    addiw a2, a2, 819
1073; RV64I-NEXT:    addiw a3, a3, -241
1074; RV64I-NEXT:    addiw a4, a4, 257
1075; RV64I-NEXT:    slli a5, a1, 32
1076; RV64I-NEXT:    add a1, a1, a5
1077; RV64I-NEXT:    slli a5, a2, 32
1078; RV64I-NEXT:    add a2, a2, a5
1079; RV64I-NEXT:    slli a5, a3, 32
1080; RV64I-NEXT:    add a3, a3, a5
1081; RV64I-NEXT:    slli a5, a4, 32
1082; RV64I-NEXT:    add a4, a4, a5
1083; RV64I-NEXT:    li a5, 32
1084; RV64I-NEXT:    vsrl.vi v9, v8, 1
1085; RV64I-NEXT:    vor.vv v8, v8, v9
1086; RV64I-NEXT:    vsrl.vi v9, v8, 2
1087; RV64I-NEXT:    vor.vv v8, v8, v9
1088; RV64I-NEXT:    vsrl.vi v9, v8, 4
1089; RV64I-NEXT:    vor.vv v8, v8, v9
1090; RV64I-NEXT:    vsrl.vi v9, v8, 8
1091; RV64I-NEXT:    vor.vv v8, v8, v9
1092; RV64I-NEXT:    vsrl.vi v9, v8, 16
1093; RV64I-NEXT:    vor.vv v8, v8, v9
1094; RV64I-NEXT:    vsrl.vx v9, v8, a5
1095; RV64I-NEXT:    vor.vv v8, v8, v9
1096; RV64I-NEXT:    vnot.v v8, v8
1097; RV64I-NEXT:    vsrl.vi v9, v8, 1
1098; RV64I-NEXT:    vand.vx v9, v9, a1
1099; RV64I-NEXT:    vsub.vv v8, v8, v9
1100; RV64I-NEXT:    vand.vx v9, v8, a2
1101; RV64I-NEXT:    vsrl.vi v8, v8, 2
1102; RV64I-NEXT:    vand.vx v8, v8, a2
1103; RV64I-NEXT:    vadd.vv v8, v9, v8
1104; RV64I-NEXT:    vsrl.vi v9, v8, 4
1105; RV64I-NEXT:    vadd.vv v8, v8, v9
1106; RV64I-NEXT:    vand.vx v8, v8, a3
1107; RV64I-NEXT:    vmul.vx v8, v8, a4
1108; RV64I-NEXT:    li a1, 56
1109; RV64I-NEXT:    vsrl.vx v8, v8, a1
1110; RV64I-NEXT:    vse64.v v8, (a0)
1111; RV64I-NEXT:    ret
1112;
1113; RVF-LABEL: ctlz_zero_undef_v2i64:
1114; RVF:       # %bb.0:
1115; RVF-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
1116; RVF-NEXT:    vle64.v v8, (a0)
1117; RVF-NEXT:    li a1, 190
1118; RVF-NEXT:    vmv.v.x v9, a1
1119; RVF-NEXT:    fsrmi a1, 1
1120; RVF-NEXT:    vfncvt.f.xu.w v10, v8
1121; RVF-NEXT:    fsrm a1
1122; RVF-NEXT:    vsrl.vi v8, v10, 23
1123; RVF-NEXT:    vwsubu.vv v10, v9, v8
1124; RVF-NEXT:    vse64.v v10, (a0)
1125; RVF-NEXT:    ret
1126;
1127; RVD-LABEL: ctlz_zero_undef_v2i64:
1128; RVD:       # %bb.0:
1129; RVD-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1130; RVD-NEXT:    vle64.v v8, (a0)
1131; RVD-NEXT:    fsrmi a1, 1
1132; RVD-NEXT:    vfcvt.f.xu.v v8, v8
1133; RVD-NEXT:    fsrm a1
1134; RVD-NEXT:    li a1, 52
1135; RVD-NEXT:    vsrl.vx v8, v8, a1
1136; RVD-NEXT:    li a1, 1086
1137; RVD-NEXT:    vrsub.vx v8, v8, a1
1138; RVD-NEXT:    vse64.v v8, (a0)
1139; RVD-NEXT:    ret
1140;
1141; ZVBB-LABEL: ctlz_zero_undef_v2i64:
1142; ZVBB:       # %bb.0:
1143; ZVBB-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1144; ZVBB-NEXT:    vle64.v v8, (a0)
1145; ZVBB-NEXT:    vclz.v v8, v8
1146; ZVBB-NEXT:    vse64.v v8, (a0)
1147; ZVBB-NEXT:    ret
1148  %a = load <2 x i64>, ptr %x
1149  %b = load <2 x i64>, ptr %y
1150  %c = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true)
1151  store <2 x i64> %c, ptr %x
1152  ret void
1153}
1154
1155define void @ctlz_zero_undef_v32i8(ptr %x, ptr %y) nounwind {
1156; RVI-LABEL: ctlz_zero_undef_v32i8:
1157; RVI:       # %bb.0:
1158; RVI-NEXT:    li a1, 32
1159; RVI-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
1160; RVI-NEXT:    vle8.v v8, (a0)
1161; RVI-NEXT:    li a1, 85
1162; RVI-NEXT:    vsrl.vi v10, v8, 1
1163; RVI-NEXT:    vor.vv v8, v8, v10
1164; RVI-NEXT:    vsrl.vi v10, v8, 2
1165; RVI-NEXT:    vor.vv v8, v8, v10
1166; RVI-NEXT:    vsrl.vi v10, v8, 4
1167; RVI-NEXT:    vor.vv v8, v8, v10
1168; RVI-NEXT:    vnot.v v8, v8
1169; RVI-NEXT:    vsrl.vi v10, v8, 1
1170; RVI-NEXT:    vand.vx v10, v10, a1
1171; RVI-NEXT:    li a1, 51
1172; RVI-NEXT:    vsub.vv v8, v8, v10
1173; RVI-NEXT:    vand.vx v10, v8, a1
1174; RVI-NEXT:    vsrl.vi v8, v8, 2
1175; RVI-NEXT:    vand.vx v8, v8, a1
1176; RVI-NEXT:    vadd.vv v8, v10, v8
1177; RVI-NEXT:    vsrl.vi v10, v8, 4
1178; RVI-NEXT:    vadd.vv v8, v8, v10
1179; RVI-NEXT:    vand.vi v8, v8, 15
1180; RVI-NEXT:    vse8.v v8, (a0)
1181; RVI-NEXT:    ret
1182;
1183; RVF-LABEL: ctlz_zero_undef_v32i8:
1184; RVF:       # %bb.0:
1185; RVF-NEXT:    li a1, 32
1186; RVF-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
1187; RVF-NEXT:    vle8.v v8, (a0)
1188; RVF-NEXT:    vzext.vf2 v12, v8
1189; RVF-NEXT:    vfwcvt.f.xu.v v16, v12
1190; RVF-NEXT:    vnsrl.wi v8, v16, 23
1191; RVF-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
1192; RVF-NEXT:    vnsrl.wi v12, v8, 0
1193; RVF-NEXT:    li a1, 134
1194; RVF-NEXT:    vrsub.vx v8, v12, a1
1195; RVF-NEXT:    vse8.v v8, (a0)
1196; RVF-NEXT:    ret
1197;
1198; RVD-LABEL: ctlz_zero_undef_v32i8:
1199; RVD:       # %bb.0:
1200; RVD-NEXT:    li a1, 32
1201; RVD-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
1202; RVD-NEXT:    vle8.v v8, (a0)
1203; RVD-NEXT:    vzext.vf2 v12, v8
1204; RVD-NEXT:    vfwcvt.f.xu.v v16, v12
1205; RVD-NEXT:    vnsrl.wi v8, v16, 23
1206; RVD-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
1207; RVD-NEXT:    vnsrl.wi v12, v8, 0
1208; RVD-NEXT:    li a1, 134
1209; RVD-NEXT:    vrsub.vx v8, v12, a1
1210; RVD-NEXT:    vse8.v v8, (a0)
1211; RVD-NEXT:    ret
1212;
1213; ZVBB-LABEL: ctlz_zero_undef_v32i8:
1214; ZVBB:       # %bb.0:
1215; ZVBB-NEXT:    li a1, 32
1216; ZVBB-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
1217; ZVBB-NEXT:    vle8.v v8, (a0)
1218; ZVBB-NEXT:    vclz.v v8, v8
1219; ZVBB-NEXT:    vse8.v v8, (a0)
1220; ZVBB-NEXT:    ret
1221  %a = load <32 x i8>, ptr %x
1222  %b = load <32 x i8>, ptr %y
1223  %c = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true)
1224  store <32 x i8> %c, ptr %x
1225  ret void
1226}
1227
1228define void @ctlz_zero_undef_v16i16(ptr %x, ptr %y) nounwind {
1229; RVI-LABEL: ctlz_zero_undef_v16i16:
1230; RVI:       # %bb.0:
1231; RVI-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
1232; RVI-NEXT:    vle16.v v8, (a0)
1233; RVI-NEXT:    lui a1, 5
1234; RVI-NEXT:    addi a1, a1, 1365
1235; RVI-NEXT:    vsrl.vi v10, v8, 1
1236; RVI-NEXT:    vor.vv v8, v8, v10
1237; RVI-NEXT:    vsrl.vi v10, v8, 2
1238; RVI-NEXT:    vor.vv v8, v8, v10
1239; RVI-NEXT:    vsrl.vi v10, v8, 4
1240; RVI-NEXT:    vor.vv v8, v8, v10
1241; RVI-NEXT:    vsrl.vi v10, v8, 8
1242; RVI-NEXT:    vor.vv v8, v8, v10
1243; RVI-NEXT:    vnot.v v8, v8
1244; RVI-NEXT:    vsrl.vi v10, v8, 1
1245; RVI-NEXT:    vand.vx v10, v10, a1
1246; RVI-NEXT:    lui a1, 3
1247; RVI-NEXT:    addi a1, a1, 819
1248; RVI-NEXT:    vsub.vv v8, v8, v10
1249; RVI-NEXT:    vand.vx v10, v8, a1
1250; RVI-NEXT:    vsrl.vi v8, v8, 2
1251; RVI-NEXT:    vand.vx v8, v8, a1
1252; RVI-NEXT:    lui a1, 1
1253; RVI-NEXT:    addi a1, a1, -241
1254; RVI-NEXT:    vadd.vv v8, v10, v8
1255; RVI-NEXT:    vsrl.vi v10, v8, 4
1256; RVI-NEXT:    vadd.vv v8, v8, v10
1257; RVI-NEXT:    vand.vx v8, v8, a1
1258; RVI-NEXT:    li a1, 257
1259; RVI-NEXT:    vmul.vx v8, v8, a1
1260; RVI-NEXT:    vsrl.vi v8, v8, 8
1261; RVI-NEXT:    vse16.v v8, (a0)
1262; RVI-NEXT:    ret
1263;
1264; RVF-LABEL: ctlz_zero_undef_v16i16:
1265; RVF:       # %bb.0:
1266; RVF-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
1267; RVF-NEXT:    vle16.v v8, (a0)
1268; RVF-NEXT:    vfwcvt.f.xu.v v12, v8
1269; RVF-NEXT:    vnsrl.wi v8, v12, 23
1270; RVF-NEXT:    li a1, 142
1271; RVF-NEXT:    vrsub.vx v8, v8, a1
1272; RVF-NEXT:    vse16.v v8, (a0)
1273; RVF-NEXT:    ret
1274;
1275; RVD-LABEL: ctlz_zero_undef_v16i16:
1276; RVD:       # %bb.0:
1277; RVD-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
1278; RVD-NEXT:    vle16.v v8, (a0)
1279; RVD-NEXT:    vfwcvt.f.xu.v v12, v8
1280; RVD-NEXT:    vnsrl.wi v8, v12, 23
1281; RVD-NEXT:    li a1, 142
1282; RVD-NEXT:    vrsub.vx v8, v8, a1
1283; RVD-NEXT:    vse16.v v8, (a0)
1284; RVD-NEXT:    ret
1285;
1286; ZVBB-LABEL: ctlz_zero_undef_v16i16:
1287; ZVBB:       # %bb.0:
1288; ZVBB-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
1289; ZVBB-NEXT:    vle16.v v8, (a0)
1290; ZVBB-NEXT:    vclz.v v8, v8
1291; ZVBB-NEXT:    vse16.v v8, (a0)
1292; ZVBB-NEXT:    ret
1293  %a = load <16 x i16>, ptr %x
1294  %b = load <16 x i16>, ptr %y
1295  %c = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true)
1296  store <16 x i16> %c, ptr %x
1297  ret void
1298}
1299
1300define void @ctlz_zero_undef_v8i32(ptr %x, ptr %y) nounwind {
1301; RVI-LABEL: ctlz_zero_undef_v8i32:
1302; RVI:       # %bb.0:
1303; RVI-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1304; RVI-NEXT:    vle32.v v8, (a0)
1305; RVI-NEXT:    lui a1, 349525
1306; RVI-NEXT:    addi a1, a1, 1365
1307; RVI-NEXT:    vsrl.vi v10, v8, 1
1308; RVI-NEXT:    vor.vv v8, v8, v10
1309; RVI-NEXT:    vsrl.vi v10, v8, 2
1310; RVI-NEXT:    vor.vv v8, v8, v10
1311; RVI-NEXT:    vsrl.vi v10, v8, 4
1312; RVI-NEXT:    vor.vv v8, v8, v10
1313; RVI-NEXT:    vsrl.vi v10, v8, 8
1314; RVI-NEXT:    vor.vv v8, v8, v10
1315; RVI-NEXT:    vsrl.vi v10, v8, 16
1316; RVI-NEXT:    vor.vv v8, v8, v10
1317; RVI-NEXT:    vnot.v v8, v8
1318; RVI-NEXT:    vsrl.vi v10, v8, 1
1319; RVI-NEXT:    vand.vx v10, v10, a1
1320; RVI-NEXT:    lui a1, 209715
1321; RVI-NEXT:    addi a1, a1, 819
1322; RVI-NEXT:    vsub.vv v8, v8, v10
1323; RVI-NEXT:    vand.vx v10, v8, a1
1324; RVI-NEXT:    vsrl.vi v8, v8, 2
1325; RVI-NEXT:    vand.vx v8, v8, a1
1326; RVI-NEXT:    lui a1, 61681
1327; RVI-NEXT:    addi a1, a1, -241
1328; RVI-NEXT:    vadd.vv v8, v10, v8
1329; RVI-NEXT:    vsrl.vi v10, v8, 4
1330; RVI-NEXT:    vadd.vv v8, v8, v10
1331; RVI-NEXT:    vand.vx v8, v8, a1
1332; RVI-NEXT:    lui a1, 4112
1333; RVI-NEXT:    addi a1, a1, 257
1334; RVI-NEXT:    vmul.vx v8, v8, a1
1335; RVI-NEXT:    vsrl.vi v8, v8, 24
1336; RVI-NEXT:    vse32.v v8, (a0)
1337; RVI-NEXT:    ret
1338;
1339; RVF-LABEL: ctlz_zero_undef_v8i32:
1340; RVF:       # %bb.0:
1341; RVF-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1342; RVF-NEXT:    vle32.v v8, (a0)
1343; RVF-NEXT:    fsrmi a1, 1
1344; RVF-NEXT:    vfcvt.f.xu.v v8, v8
1345; RVF-NEXT:    fsrm a1
1346; RVF-NEXT:    vsrl.vi v8, v8, 23
1347; RVF-NEXT:    li a1, 158
1348; RVF-NEXT:    vrsub.vx v8, v8, a1
1349; RVF-NEXT:    vse32.v v8, (a0)
1350; RVF-NEXT:    ret
1351;
1352; RVD-LABEL: ctlz_zero_undef_v8i32:
1353; RVD:       # %bb.0:
1354; RVD-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1355; RVD-NEXT:    vle32.v v8, (a0)
1356; RVD-NEXT:    li a1, 52
1357; RVD-NEXT:    vfwcvt.f.xu.v v12, v8
1358; RVD-NEXT:    vnsrl.wx v8, v12, a1
1359; RVD-NEXT:    li a1, 1054
1360; RVD-NEXT:    vrsub.vx v8, v8, a1
1361; RVD-NEXT:    vse32.v v8, (a0)
1362; RVD-NEXT:    ret
1363;
1364; ZVBB-LABEL: ctlz_zero_undef_v8i32:
1365; ZVBB:       # %bb.0:
1366; ZVBB-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1367; ZVBB-NEXT:    vle32.v v8, (a0)
1368; ZVBB-NEXT:    vclz.v v8, v8
1369; ZVBB-NEXT:    vse32.v v8, (a0)
1370; ZVBB-NEXT:    ret
1371  %a = load <8 x i32>, ptr %x
1372  %b = load <8 x i32>, ptr %y
1373  %c = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true)
1374  store <8 x i32> %c, ptr %x
1375  ret void
1376}
1377
1378define void @ctlz_zero_undef_v4i64(ptr %x, ptr %y) nounwind {
1379; RV32I-LABEL: ctlz_zero_undef_v4i64:
1380; RV32I:       # %bb.0:
1381; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1382; RV32I-NEXT:    vle64.v v8, (a0)
1383; RV32I-NEXT:    lui a1, 349525
1384; RV32I-NEXT:    addi a1, a1, 1365
1385; RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1386; RV32I-NEXT:    vmv.v.x v10, a1
1387; RV32I-NEXT:    li a1, 32
1388; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1389; RV32I-NEXT:    vsrl.vi v12, v8, 1
1390; RV32I-NEXT:    vor.vv v8, v8, v12
1391; RV32I-NEXT:    vsrl.vi v12, v8, 2
1392; RV32I-NEXT:    vor.vv v8, v8, v12
1393; RV32I-NEXT:    vsrl.vi v12, v8, 4
1394; RV32I-NEXT:    vor.vv v8, v8, v12
1395; RV32I-NEXT:    vsrl.vi v12, v8, 8
1396; RV32I-NEXT:    vor.vv v8, v8, v12
1397; RV32I-NEXT:    vsrl.vi v12, v8, 16
1398; RV32I-NEXT:    vor.vv v8, v8, v12
1399; RV32I-NEXT:    vsrl.vx v12, v8, a1
1400; RV32I-NEXT:    lui a1, 209715
1401; RV32I-NEXT:    addi a1, a1, 819
1402; RV32I-NEXT:    vor.vv v8, v8, v12
1403; RV32I-NEXT:    vnot.v v8, v8
1404; RV32I-NEXT:    vsrl.vi v12, v8, 1
1405; RV32I-NEXT:    vand.vv v10, v12, v10
1406; RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1407; RV32I-NEXT:    vmv.v.x v12, a1
1408; RV32I-NEXT:    lui a1, 61681
1409; RV32I-NEXT:    addi a1, a1, -241
1410; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1411; RV32I-NEXT:    vsub.vv v8, v8, v10
1412; RV32I-NEXT:    vand.vv v10, v8, v12
1413; RV32I-NEXT:    vsrl.vi v8, v8, 2
1414; RV32I-NEXT:    vand.vv v8, v8, v12
1415; RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1416; RV32I-NEXT:    vmv.v.x v12, a1
1417; RV32I-NEXT:    lui a1, 4112
1418; RV32I-NEXT:    addi a1, a1, 257
1419; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1420; RV32I-NEXT:    vadd.vv v8, v10, v8
1421; RV32I-NEXT:    vsrl.vi v10, v8, 4
1422; RV32I-NEXT:    vadd.vv v8, v8, v10
1423; RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1424; RV32I-NEXT:    vmv.v.x v10, a1
1425; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1426; RV32I-NEXT:    vand.vv v8, v8, v12
1427; RV32I-NEXT:    vmul.vv v8, v8, v10
1428; RV32I-NEXT:    li a1, 56
1429; RV32I-NEXT:    vsrl.vx v8, v8, a1
1430; RV32I-NEXT:    vse64.v v8, (a0)
1431; RV32I-NEXT:    ret
1432;
1433; RV64I-LABEL: ctlz_zero_undef_v4i64:
1434; RV64I:       # %bb.0:
1435; RV64I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1436; RV64I-NEXT:    vle64.v v8, (a0)
1437; RV64I-NEXT:    lui a1, 349525
1438; RV64I-NEXT:    lui a2, 209715
1439; RV64I-NEXT:    lui a3, 61681
1440; RV64I-NEXT:    lui a4, 4112
1441; RV64I-NEXT:    addiw a1, a1, 1365
1442; RV64I-NEXT:    addiw a2, a2, 819
1443; RV64I-NEXT:    addiw a3, a3, -241
1444; RV64I-NEXT:    addiw a4, a4, 257
1445; RV64I-NEXT:    slli a5, a1, 32
1446; RV64I-NEXT:    add a1, a1, a5
1447; RV64I-NEXT:    slli a5, a2, 32
1448; RV64I-NEXT:    add a2, a2, a5
1449; RV64I-NEXT:    slli a5, a3, 32
1450; RV64I-NEXT:    add a3, a3, a5
1451; RV64I-NEXT:    slli a5, a4, 32
1452; RV64I-NEXT:    add a4, a4, a5
1453; RV64I-NEXT:    li a5, 32
1454; RV64I-NEXT:    vsrl.vi v10, v8, 1
1455; RV64I-NEXT:    vor.vv v8, v8, v10
1456; RV64I-NEXT:    vsrl.vi v10, v8, 2
1457; RV64I-NEXT:    vor.vv v8, v8, v10
1458; RV64I-NEXT:    vsrl.vi v10, v8, 4
1459; RV64I-NEXT:    vor.vv v8, v8, v10
1460; RV64I-NEXT:    vsrl.vi v10, v8, 8
1461; RV64I-NEXT:    vor.vv v8, v8, v10
1462; RV64I-NEXT:    vsrl.vi v10, v8, 16
1463; RV64I-NEXT:    vor.vv v8, v8, v10
1464; RV64I-NEXT:    vsrl.vx v10, v8, a5
1465; RV64I-NEXT:    vor.vv v8, v8, v10
1466; RV64I-NEXT:    vnot.v v8, v8
1467; RV64I-NEXT:    vsrl.vi v10, v8, 1
1468; RV64I-NEXT:    vand.vx v10, v10, a1
1469; RV64I-NEXT:    vsub.vv v8, v8, v10
1470; RV64I-NEXT:    vand.vx v10, v8, a2
1471; RV64I-NEXT:    vsrl.vi v8, v8, 2
1472; RV64I-NEXT:    vand.vx v8, v8, a2
1473; RV64I-NEXT:    vadd.vv v8, v10, v8
1474; RV64I-NEXT:    vsrl.vi v10, v8, 4
1475; RV64I-NEXT:    vadd.vv v8, v8, v10
1476; RV64I-NEXT:    vand.vx v8, v8, a3
1477; RV64I-NEXT:    vmul.vx v8, v8, a4
1478; RV64I-NEXT:    li a1, 56
1479; RV64I-NEXT:    vsrl.vx v8, v8, a1
1480; RV64I-NEXT:    vse64.v v8, (a0)
1481; RV64I-NEXT:    ret
1482;
1483; RVF-LABEL: ctlz_zero_undef_v4i64:
1484; RVF:       # %bb.0:
1485; RVF-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1486; RVF-NEXT:    vle64.v v8, (a0)
1487; RVF-NEXT:    li a1, 190
1488; RVF-NEXT:    vmv.v.x v10, a1
1489; RVF-NEXT:    fsrmi a1, 1
1490; RVF-NEXT:    vfncvt.f.xu.w v11, v8
1491; RVF-NEXT:    fsrm a1
1492; RVF-NEXT:    vsrl.vi v8, v11, 23
1493; RVF-NEXT:    vwsubu.vv v12, v10, v8
1494; RVF-NEXT:    vse64.v v12, (a0)
1495; RVF-NEXT:    ret
1496;
1497; RVD-LABEL: ctlz_zero_undef_v4i64:
1498; RVD:       # %bb.0:
1499; RVD-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1500; RVD-NEXT:    vle64.v v8, (a0)
1501; RVD-NEXT:    fsrmi a1, 1
1502; RVD-NEXT:    vfcvt.f.xu.v v8, v8
1503; RVD-NEXT:    fsrm a1
1504; RVD-NEXT:    li a1, 52
1505; RVD-NEXT:    vsrl.vx v8, v8, a1
1506; RVD-NEXT:    li a1, 1086
1507; RVD-NEXT:    vrsub.vx v8, v8, a1
1508; RVD-NEXT:    vse64.v v8, (a0)
1509; RVD-NEXT:    ret
1510;
1511; ZVBB-LABEL: ctlz_zero_undef_v4i64:
1512; ZVBB:       # %bb.0:
1513; ZVBB-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1514; ZVBB-NEXT:    vle64.v v8, (a0)
1515; ZVBB-NEXT:    vclz.v v8, v8
1516; ZVBB-NEXT:    vse64.v v8, (a0)
1517; ZVBB-NEXT:    ret
1518  %a = load <4 x i64>, ptr %x
1519  %b = load <4 x i64>, ptr %y
1520  %c = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true)
1521  store <4 x i64> %c, ptr %x
1522  ret void
1523}
1524;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
1525; RV32D: {{.*}}
1526; RV32F: {{.*}}
1527; RV64D: {{.*}}
1528; RV64F: {{.*}}
1529