xref: /llvm-project/llvm/test/Analysis/CostModel/RISCV/int-bit-manip.ll (revision eb3f1aec6eff08ce1c76259bb0801f6457a55400)
1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d | FileCheck %s --check-prefixes=CHECK,NOZVBB
3; Vector ctpop exists only under ZVBB
4; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zvbb | FileCheck %s --check-prefixes=CHECK,ZVBB
5
6define void @bswap() {
7; CHECK-LABEL: 'bswap'
8; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %1 = call i16 @llvm.bswap.i16(i16 undef)
9; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> undef)
10; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %3 = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> undef)
11; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %4 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> undef)
12; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %5 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> undef)
13; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = call <vscale x 1 x i16> @llvm.bswap.nxv1i16(<vscale x 1 x i16> undef)
14; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %7 = call <vscale x 2 x i16> @llvm.bswap.nxv2i16(<vscale x 2 x i16> undef)
15; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %8 = call <vscale x 4 x i16> @llvm.bswap.nxv4i16(<vscale x 4 x i16> undef)
16; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %9 = call <vscale x 8 x i16> @llvm.bswap.nxv8i16(<vscale x 8 x i16> undef)
17; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %10 = call <vscale x 16 x i16> @llvm.bswap.nxv16i16(<vscale x 16 x i16> undef)
18; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %11 = call i32 @llvm.bswap.i32(i32 undef)
19; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %12 = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> undef)
20; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %13 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> undef)
21; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %14 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> undef)
22; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %15 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> undef)
23; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %16 = call <vscale x 1 x i32> @llvm.bswap.nxv1i32(<vscale x 1 x i32> undef)
24; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %17 = call <vscale x 2 x i32> @llvm.bswap.nxv2i32(<vscale x 2 x i32> undef)
25; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %18 = call <vscale x 4 x i32> @llvm.bswap.nxv4i32(<vscale x 4 x i32> undef)
26; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %19 = call <vscale x 8 x i32> @llvm.bswap.nxv8i32(<vscale x 8 x i32> undef)
27; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %20 = call <vscale x 16 x i32> @llvm.bswap.nxv16i32(<vscale x 16 x i32> undef)
28; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %21 = call i64 @llvm.bswap.i64(i64 undef)
29; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %22 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> undef)
30; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %23 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> undef)
31; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %24 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> undef)
32; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %25 = call <16 x i64> @llvm.bswap.v16i64(<16 x i64> undef)
33; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %26 = call <vscale x 1 x i64> @llvm.bswap.nxv1i64(<vscale x 1 x i64> undef)
34; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %27 = call <vscale x 2 x i64> @llvm.bswap.nxv2i64(<vscale x 2 x i64> undef)
35; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %28 = call <vscale x 4 x i64> @llvm.bswap.nxv4i64(<vscale x 4 x i64> undef)
36; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %29 = call <vscale x 8 x i64> @llvm.bswap.nxv8i64(<vscale x 8 x i64> undef)
37; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %30 = call <vscale x 16 x i64> @llvm.bswap.nxv16i64(<vscale x 16 x i64> undef)
38; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
39;
40  call i16 @llvm.bswap.i16(i16 undef)
41  call <2 x i16> @llvm.bswap.v2i16(<2 x i16> undef)
42  call <4 x i16> @llvm.bswap.v4i16(<4 x i16> undef)
43  call <8 x i16> @llvm.bswap.v8i16(<8 x i16> undef)
44  call <16 x i16> @llvm.bswap.v16i16(<16 x i16> undef)
45  call <vscale x 1 x i16> @llvm.bswap.nxv1i16(<vscale x 1 x i16> undef)
46  call <vscale x 2 x i16> @llvm.bswap.nxv2i16(<vscale x 2 x i16> undef)
47  call <vscale x 4 x i16> @llvm.bswap.nxv4i16(<vscale x 4 x i16> undef)
48  call <vscale x 8 x i16> @llvm.bswap.nxv8i16(<vscale x 8 x i16> undef)
49  call <vscale x 16 x i16> @llvm.bswap.nxv16i16(<vscale x 16 x i16> undef)
50  call i32 @llvm.bswap.i32(i32 undef)
51  call <2 x i32> @llvm.bswap.v2i32(<2 x i32> undef)
52  call <4 x i32> @llvm.bswap.v4i32(<4 x i32> undef)
53  call <8 x i32> @llvm.bswap.v8i32(<8 x i32> undef)
54  call <16 x i32> @llvm.bswap.v16i32(<16 x i32> undef)
55  call <vscale x 1 x i32> @llvm.bswap.nxv1i32(<vscale x 1 x i32> undef)
56  call <vscale x 2 x i32> @llvm.bswap.nxv2i32(<vscale x 2 x i32> undef)
57  call <vscale x 4 x i32> @llvm.bswap.nxv4i32(<vscale x 4 x i32> undef)
58  call <vscale x 8 x i32> @llvm.bswap.nxv8i32(<vscale x 8 x i32> undef)
59  call <vscale x 16 x i32> @llvm.bswap.nxv16i32(<vscale x 16 x i32> undef)
60  call i64 @llvm.bswap.i64(i64 undef)
61  call <2 x i64> @llvm.bswap.v2i64(<2 x i64> undef)
62  call <4 x i64> @llvm.bswap.v4i64(<4 x i64> undef)
63  call <8 x i64> @llvm.bswap.v8i64(<8 x i64> undef)
64  call <16 x i64> @llvm.bswap.v16i64(<16 x i64> undef)
65  call <vscale x 1 x i64> @llvm.bswap.nxv1i64(<vscale x 1 x i64> undef)
66  call <vscale x 2 x i64> @llvm.bswap.nxv2i64(<vscale x 2 x i64> undef)
67  call <vscale x 4 x i64> @llvm.bswap.nxv4i64(<vscale x 4 x i64> undef)
68  call <vscale x 8 x i64> @llvm.bswap.nxv8i64(<vscale x 8 x i64> undef)
69  call <vscale x 16 x i64> @llvm.bswap.nxv16i64(<vscale x 16 x i64> undef)
70  ret void
71}
72
73define void @bitreverse() {
74; CHECK-LABEL: 'bitreverse'
75; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %1 = call i8 @llvm.bitreverse.i8(i8 undef)
76; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %2 = call <2 x i8> @llvm.bitreverse.v2i8(<2 x i8> undef)
77; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %3 = call <4 x i8> @llvm.bitreverse.v4i8(<4 x i8> undef)
78; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %4 = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> undef)
79; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %5 = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> undef)
80; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %6 = call <vscale x 1 x i8> @llvm.bitreverse.nxv1i8(<vscale x 1 x i8> undef)
81; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %7 = call <vscale x 2 x i8> @llvm.bitreverse.nxv2i8(<vscale x 2 x i8> undef)
82; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %8 = call <vscale x 4 x i8> @llvm.bitreverse.nxv4i8(<vscale x 4 x i8> undef)
83; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %9 = call <vscale x 8 x i8> @llvm.bitreverse.nxv8i8(<vscale x 8 x i8> undef)
84; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %10 = call <vscale x 16 x i8> @llvm.bitreverse.nxv16i8(<vscale x 16 x i8> undef)
85; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %11 = call i16 @llvm.bitreverse.i16(i16 undef)
86; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %12 = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> undef)
87; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %13 = call <4 x i16> @llvm.bitreverse.v4i16(<4 x i16> undef)
88; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %14 = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> undef)
89; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %15 = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> undef)
90; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %16 = call <vscale x 1 x i16> @llvm.bitreverse.nxv1i16(<vscale x 1 x i16> undef)
91; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %17 = call <vscale x 2 x i16> @llvm.bitreverse.nxv2i16(<vscale x 2 x i16> undef)
92; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %18 = call <vscale x 4 x i16> @llvm.bitreverse.nxv4i16(<vscale x 4 x i16> undef)
93; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %19 = call <vscale x 8 x i16> @llvm.bitreverse.nxv8i16(<vscale x 8 x i16> undef)
94; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %20 = call <vscale x 16 x i16> @llvm.bitreverse.nxv16i16(<vscale x 16 x i16> undef)
95; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %21 = call i32 @llvm.bitreverse.i32(i32 undef)
96; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %22 = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> undef)
97; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %23 = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> undef)
98; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %24 = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> undef)
99; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %25 = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> undef)
100; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %26 = call <vscale x 1 x i32> @llvm.bitreverse.nxv1i32(<vscale x 1 x i32> undef)
101; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %27 = call <vscale x 2 x i32> @llvm.bitreverse.nxv2i32(<vscale x 2 x i32> undef)
102; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %28 = call <vscale x 4 x i32> @llvm.bitreverse.nxv4i32(<vscale x 4 x i32> undef)
103; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %29 = call <vscale x 8 x i32> @llvm.bitreverse.nxv8i32(<vscale x 8 x i32> undef)
104; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %30 = call <vscale x 16 x i32> @llvm.bitreverse.nxv16i32(<vscale x 16 x i32> undef)
105; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %31 = call i64 @llvm.bitreverse.i64(i64 undef)
106; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %32 = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> undef)
107; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %33 = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> undef)
108; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %34 = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> undef)
109; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %35 = call <16 x i64> @llvm.bitreverse.v16i64(<16 x i64> undef)
110; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %36 = call <vscale x 1 x i64> @llvm.bitreverse.nxv1i64(<vscale x 1 x i64> undef)
111; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %37 = call <vscale x 2 x i64> @llvm.bitreverse.nxv2i64(<vscale x 2 x i64> undef)
112; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %38 = call <vscale x 4 x i64> @llvm.bitreverse.nxv4i64(<vscale x 4 x i64> undef)
113; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %39 = call <vscale x 8 x i64> @llvm.bitreverse.nxv8i64(<vscale x 8 x i64> undef)
114; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %40 = call <vscale x 16 x i64> @llvm.bitreverse.nxv16i64(<vscale x 16 x i64> undef)
115; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
116;
117  call i8 @llvm.bitreverse.i8(i8 undef)
118  call <2 x i8> @llvm.bitreverse.v2i8(<2 x i8> undef)
119  call <4 x i8> @llvm.bitreverse.v4i8(<4 x i8> undef)
120  call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> undef)
121  call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> undef)
122  call <vscale x 1 x i8> @llvm.bitreverse.nxv1i8(<vscale x 1 x i8> undef)
123  call <vscale x 2 x i8> @llvm.bitreverse.nxv2i8(<vscale x 2 x i8> undef)
124  call <vscale x 4 x i8> @llvm.bitreverse.nxv4i8(<vscale x 4 x i8> undef)
125  call <vscale x 8 x i8> @llvm.bitreverse.nxv8i8(<vscale x 8 x i8> undef)
126  call <vscale x 16 x i8> @llvm.bitreverse.nxv16i8(<vscale x 16 x i8> undef)
127  call i16 @llvm.bitreverse.i16(i16 undef)
128  call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> undef)
129  call <4 x i16> @llvm.bitreverse.v4i16(<4 x i16> undef)
130  call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> undef)
131  call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> undef)
132  call <vscale x 1 x i16> @llvm.bitreverse.nxv1i16(<vscale x 1 x i16> undef)
133  call <vscale x 2 x i16> @llvm.bitreverse.nxv2i16(<vscale x 2 x i16> undef)
134  call <vscale x 4 x i16> @llvm.bitreverse.nxv4i16(<vscale x 4 x i16> undef)
135  call <vscale x 8 x i16> @llvm.bitreverse.nxv8i16(<vscale x 8 x i16> undef)
136  call <vscale x 16 x i16> @llvm.bitreverse.nxv16i16(<vscale x 16 x i16> undef)
137  call i32 @llvm.bitreverse.i32(i32 undef)
138  call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> undef)
139  call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> undef)
140  call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> undef)
141  call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> undef)
142  call <vscale x 1 x i32> @llvm.bitreverse.nxv1i32(<vscale x 1 x i32> undef)
143  call <vscale x 2 x i32> @llvm.bitreverse.nxv2i32(<vscale x 2 x i32> undef)
144  call <vscale x 4 x i32> @llvm.bitreverse.nxv4i32(<vscale x 4 x i32> undef)
145  call <vscale x 8 x i32> @llvm.bitreverse.nxv8i32(<vscale x 8 x i32> undef)
146  call <vscale x 16 x i32> @llvm.bitreverse.nxv16i32(<vscale x 16 x i32> undef)
147  call i64 @llvm.bitreverse.i64(i64 undef)
148  call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> undef)
149  call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> undef)
150  call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> undef)
151  call <16 x i64> @llvm.bitreverse.v16i64(<16 x i64> undef)
152  call <vscale x 1 x i64> @llvm.bitreverse.nxv1i64(<vscale x 1 x i64> undef)
153  call <vscale x 2 x i64> @llvm.bitreverse.nxv2i64(<vscale x 2 x i64> undef)
154  call <vscale x 4 x i64> @llvm.bitreverse.nxv4i64(<vscale x 4 x i64> undef)
155  call <vscale x 8 x i64> @llvm.bitreverse.nxv8i64(<vscale x 8 x i64> undef)
156  call <vscale x 16 x i64> @llvm.bitreverse.nxv16i64(<vscale x 16 x i64> undef)
157  ret void
158}
159
160define void @ctlz() {
161; NOZVBB-LABEL: 'ctlz'
162; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %1 = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> undef, i1 false)
163; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %2 = call <4 x i8> @llvm.ctlz.v4i8(<4 x i8> undef, i1 false)
164; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %3 = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> undef, i1 false)
165; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %4 = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> undef, i1 false)
166; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %5 = call <vscale x 1 x i8> @llvm.ctlz.nxv1i8(<vscale x 1 x i8> undef, i1 false)
167; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %6 = call <vscale x 2 x i8> @llvm.ctlz.nxv2i8(<vscale x 2 x i8> undef, i1 false)
168; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %7 = call <vscale x 4 x i8> @llvm.ctlz.nxv4i8(<vscale x 4 x i8> undef, i1 false)
169; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %8 = call <vscale x 8 x i8> @llvm.ctlz.nxv8i8(<vscale x 8 x i8> undef, i1 false)
170; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %9 = call <vscale x 16 x i8> @llvm.ctlz.nxv16i8(<vscale x 16 x i8> undef, i1 false)
171; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %10 = call <vscale x 32 x i8> @llvm.ctlz.nxv32i8(<vscale x 32 x i8> undef, i1 false)
172; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %11 = call <vscale x 64 x i8> @llvm.ctlz.nxv64i8(<vscale x 64 x i8> undef, i1 false)
173; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %12 = call <2 x i16> @llvm.ctlz.v2i16(<2 x i16> undef, i1 false)
174; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %13 = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> undef, i1 false)
175; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %14 = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> undef, i1 false)
176; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %15 = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> undef, i1 false)
177; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %16 = call <vscale x 1 x i16> @llvm.ctlz.nxv1i16(<vscale x 1 x i16> undef, i1 false)
178; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %17 = call <vscale x 2 x i16> @llvm.ctlz.nxv2i16(<vscale x 2 x i16> undef, i1 false)
179; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %18 = call <vscale x 4 x i16> @llvm.ctlz.nxv4i16(<vscale x 4 x i16> undef, i1 false)
180; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %19 = call <vscale x 8 x i16> @llvm.ctlz.nxv8i16(<vscale x 8 x i16> undef, i1 false)
181; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %20 = call <vscale x 16 x i16> @llvm.ctlz.nxv16i16(<vscale x 16 x i16> undef, i1 false)
182; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %21 = call <vscale x 32 x i16> @llvm.ctlz.nxv32i16(<vscale x 32 x i16> undef, i1 false)
183; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %22 = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> undef, i1 false)
184; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %23 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> undef, i1 false)
185; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %24 = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> undef, i1 false)
186; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %25 = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> undef, i1 false)
187; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %26 = call <vscale x 1 x i32> @llvm.ctlz.nxv1i32(<vscale x 1 x i32> undef, i1 false)
188; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %27 = call <vscale x 2 x i32> @llvm.ctlz.nxv2i32(<vscale x 2 x i32> undef, i1 false)
189; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %28 = call <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32> undef, i1 false)
190; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %29 = call <vscale x 8 x i32> @llvm.ctlz.nxv8i32(<vscale x 8 x i32> undef, i1 false)
191; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %30 = call <vscale x 16 x i32> @llvm.ctlz.nxv16i32(<vscale x 16 x i32> undef, i1 false)
192; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %31 = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> undef, i1 false)
193; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %32 = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> undef, i1 false)
194; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %33 = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> undef, i1 false)
195; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %34 = call <16 x i64> @llvm.ctlz.v16i64(<16 x i64> undef, i1 false)
196; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %35 = call <vscale x 1 x i64> @llvm.ctlz.nxv1i64(<vscale x 1 x i64> undef, i1 false)
197; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %36 = call <vscale x 2 x i64> @llvm.ctlz.nxv2i64(<vscale x 2 x i64> undef, i1 false)
198; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %37 = call <vscale x 4 x i64> @llvm.ctlz.nxv4i64(<vscale x 4 x i64> undef, i1 false)
199; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %38 = call <vscale x 8 x i64> @llvm.ctlz.nxv8i64(<vscale x 8 x i64> undef, i1 false)
200; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %39 = call <vscale x 16 x i64> @llvm.ctlz.nxv16i64(<vscale x 16 x i64> undef, i1 false)
201; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
202;
203; ZVBB-LABEL: 'ctlz'
204; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> undef, i1 false)
205; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x i8> @llvm.ctlz.v4i8(<4 x i8> undef, i1 false)
206; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> undef, i1 false)
207; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> undef, i1 false)
208; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <vscale x 1 x i8> @llvm.ctlz.nxv1i8(<vscale x 1 x i8> undef, i1 false)
209; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x i8> @llvm.ctlz.nxv2i8(<vscale x 2 x i8> undef, i1 false)
210; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x i8> @llvm.ctlz.nxv4i8(<vscale x 4 x i8> undef, i1 false)
211; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x i8> @llvm.ctlz.nxv8i8(<vscale x 8 x i8> undef, i1 false)
212; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 16 x i8> @llvm.ctlz.nxv16i8(<vscale x 16 x i8> undef, i1 false)
213; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = call <vscale x 32 x i8> @llvm.ctlz.nxv32i8(<vscale x 32 x i8> undef, i1 false)
214; ZVBB-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %11 = call <vscale x 64 x i8> @llvm.ctlz.nxv64i8(<vscale x 64 x i8> undef, i1 false)
215; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i16> @llvm.ctlz.v2i16(<2 x i16> undef, i1 false)
216; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> undef, i1 false)
217; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> undef, i1 false)
218; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> undef, i1 false)
219; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i16> @llvm.ctlz.nxv1i16(<vscale x 1 x i16> undef, i1 false)
220; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i16> @llvm.ctlz.nxv2i16(<vscale x 2 x i16> undef, i1 false)
221; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i16> @llvm.ctlz.nxv4i16(<vscale x 4 x i16> undef, i1 false)
222; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %19 = call <vscale x 8 x i16> @llvm.ctlz.nxv8i16(<vscale x 8 x i16> undef, i1 false)
223; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %20 = call <vscale x 16 x i16> @llvm.ctlz.nxv16i16(<vscale x 16 x i16> undef, i1 false)
224; ZVBB-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %21 = call <vscale x 32 x i16> @llvm.ctlz.nxv32i16(<vscale x 32 x i16> undef, i1 false)
225; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> undef, i1 false)
226; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %23 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> undef, i1 false)
227; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %24 = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> undef, i1 false)
228; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %25 = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> undef, i1 false)
229; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %26 = call <vscale x 1 x i32> @llvm.ctlz.nxv1i32(<vscale x 1 x i32> undef, i1 false)
230; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %27 = call <vscale x 2 x i32> @llvm.ctlz.nxv2i32(<vscale x 2 x i32> undef, i1 false)
231; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = call <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32> undef, i1 false)
232; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %29 = call <vscale x 8 x i32> @llvm.ctlz.nxv8i32(<vscale x 8 x i32> undef, i1 false)
233; ZVBB-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %30 = call <vscale x 16 x i32> @llvm.ctlz.nxv16i32(<vscale x 16 x i32> undef, i1 false)
234; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %31 = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> undef, i1 false)
235; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %32 = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> undef, i1 false)
236; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %33 = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> undef, i1 false)
237; ZVBB-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %34 = call <16 x i64> @llvm.ctlz.v16i64(<16 x i64> undef, i1 false)
238; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %35 = call <vscale x 1 x i64> @llvm.ctlz.nxv1i64(<vscale x 1 x i64> undef, i1 false)
239; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %36 = call <vscale x 2 x i64> @llvm.ctlz.nxv2i64(<vscale x 2 x i64> undef, i1 false)
240; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %37 = call <vscale x 4 x i64> @llvm.ctlz.nxv4i64(<vscale x 4 x i64> undef, i1 false)
241; ZVBB-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %38 = call <vscale x 8 x i64> @llvm.ctlz.nxv8i64(<vscale x 8 x i64> undef, i1 false)
242; ZVBB-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %39 = call <vscale x 16 x i64> @llvm.ctlz.nxv16i64(<vscale x 16 x i64> undef, i1 false)
243; ZVBB-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
244;
245  call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> undef, i1 false)
246  call <4 x i8> @llvm.ctlz.v4i8(<4 x i8> undef, i1 false)
247  call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> undef, i1 false)
248  call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> undef, i1 false)
249  call <vscale x 1 x i8> @llvm.ctlz.nxv1i8(<vscale x 1 x i8> undef, i1 false)
250  call <vscale x 2 x i8> @llvm.ctlz.nxv2i8(<vscale x 2 x i8> undef, i1 false)
251  call <vscale x 4 x i8> @llvm.ctlz.nxv4i8(<vscale x 4 x i8> undef, i1 false)
252  call <vscale x 8 x i8> @llvm.ctlz.nxv8i8(<vscale x 8 x i8> undef, i1 false)
253  call <vscale x 16 x i8> @llvm.ctlz.nxv16i8(<vscale x 16 x i8> undef, i1 false)
254  call <vscale x 32 x i8> @llvm.ctlz.nxv32i8(<vscale x 32 x i8> undef, i1 false)
255  call <vscale x 64 x i8> @llvm.ctlz.nxv64i8(<vscale x 64 x i8> undef, i1 false)
256  call <2 x i16> @llvm.ctlz.v2i16(<2 x i16> undef, i1 false)
257  call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> undef, i1 false)
258  call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> undef, i1 false)
259  call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> undef, i1 false)
260  call <vscale x 1 x i16> @llvm.ctlz.nxv1i16(<vscale x 1 x i16> undef, i1 false)
261  call <vscale x 2 x i16> @llvm.ctlz.nxv2i16(<vscale x 2 x i16> undef, i1 false)
262  call <vscale x 4 x i16> @llvm.ctlz.nxv4i16(<vscale x 4 x i16> undef, i1 false)
263  call <vscale x 8 x i16> @llvm.ctlz.nxv8i16(<vscale x 8 x i16> undef, i1 false)
264  call <vscale x 16 x i16> @llvm.ctlz.nxv16i16(<vscale x 16 x i16> undef, i1 false)
265  call <vscale x 32 x i16> @llvm.ctlz.nxv32i16(<vscale x 32 x i16> undef, i1 false)
266  call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> undef, i1 false)
267  call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> undef, i1 false)
268  call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> undef, i1 false)
269  call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> undef, i1 false)
270  call <vscale x 1 x i32> @llvm.ctlz.nxv1i32(<vscale x 1 x i32> undef, i1 false)
271  call <vscale x 2 x i32> @llvm.ctlz.nxv2i32(<vscale x 2 x i32> undef, i1 false)
272  call <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32> undef, i1 false)
273  call <vscale x 8 x i32> @llvm.ctlz.nxv8i32(<vscale x 8 x i32> undef, i1 false)
274  call <vscale x 16 x i32> @llvm.ctlz.nxv16i32(<vscale x 16 x i32> undef, i1 false)
275  call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> undef, i1 false)
276  call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> undef, i1 false)
277  call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> undef, i1 false)
278  call <16 x i64> @llvm.ctlz.v16i64(<16 x i64> undef, i1 false)
279  call <vscale x 1 x i64> @llvm.ctlz.nxv1i64(<vscale x 1 x i64> undef, i1 false)
280  call <vscale x 2 x i64> @llvm.ctlz.nxv2i64(<vscale x 2 x i64> undef, i1 false)
281  call <vscale x 4 x i64> @llvm.ctlz.nxv4i64(<vscale x 4 x i64> undef, i1 false)
282  call <vscale x 8 x i64> @llvm.ctlz.nxv8i64(<vscale x 8 x i64> undef, i1 false)
283  call <vscale x 16 x i64> @llvm.ctlz.nxv16i64(<vscale x 16 x i64> undef, i1 false)
284  ret void
285}
286
287define void @cttz() {
288; NOZVBB-LABEL: 'cttz'
289; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %1 = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> undef, i1 false)
290; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %2 = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> undef, i1 false)
291; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %3 = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> undef, i1 false)
292; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %4 = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> undef, i1 false)
293; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = call <vscale x 1 x i8> @llvm.cttz.nxv1i8(<vscale x 1 x i8> undef, i1 false)
294; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = call <vscale x 2 x i8> @llvm.cttz.nxv2i8(<vscale x 2 x i8> undef, i1 false)
295; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %7 = call <vscale x 4 x i8> @llvm.cttz.nxv4i8(<vscale x 4 x i8> undef, i1 false)
296; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %8 = call <vscale x 8 x i8> @llvm.cttz.nxv8i8(<vscale x 8 x i8> undef, i1 false)
297; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %9 = call <vscale x 16 x i8> @llvm.cttz.nxv16i8(<vscale x 16 x i8> undef, i1 false)
298; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %10 = call <vscale x 32 x i8> @llvm.cttz.nxv32i8(<vscale x 32 x i8> undef, i1 false)
299; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %11 = call <vscale x 64 x i8> @llvm.cttz.nxv64i8(<vscale x 64 x i8> undef, i1 false)
300; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %12 = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> undef, i1 false)
301; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %13 = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> undef, i1 false)
302; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %14 = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> undef, i1 false)
303; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %15 = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> undef, i1 false)
304; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %16 = call <vscale x 1 x i16> @llvm.cttz.nxv1i16(<vscale x 1 x i16> undef, i1 false)
305; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %17 = call <vscale x 2 x i16> @llvm.cttz.nxv2i16(<vscale x 2 x i16> undef, i1 false)
306; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %18 = call <vscale x 4 x i16> @llvm.cttz.nxv4i16(<vscale x 4 x i16> undef, i1 false)
307; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %19 = call <vscale x 8 x i16> @llvm.cttz.nxv8i16(<vscale x 8 x i16> undef, i1 false)
308; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %20 = call <vscale x 16 x i16> @llvm.cttz.nxv16i16(<vscale x 16 x i16> undef, i1 false)
309; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %21 = call <vscale x 32 x i16> @llvm.cttz.nxv32i16(<vscale x 32 x i16> undef, i1 false)
310; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %22 = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> undef, i1 false)
311; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %23 = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> undef, i1 false)
312; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %24 = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> undef, i1 false)
313; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %25 = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> undef, i1 false)
314; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %26 = call <vscale x 1 x i32> @llvm.cttz.nxv1i32(<vscale x 1 x i32> undef, i1 false)
315; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %27 = call <vscale x 2 x i32> @llvm.cttz.nxv2i32(<vscale x 2 x i32> undef, i1 false)
316; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %28 = call <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32> undef, i1 false)
317; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %29 = call <vscale x 8 x i32> @llvm.cttz.nxv8i32(<vscale x 8 x i32> undef, i1 false)
318; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %30 = call <vscale x 16 x i32> @llvm.cttz.nxv16i32(<vscale x 16 x i32> undef, i1 false)
319; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %31 = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> undef, i1 false)
320; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %32 = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> undef, i1 false)
321; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %33 = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> undef, i1 false)
322; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %34 = call <16 x i64> @llvm.cttz.v16i64(<16 x i64> undef, i1 false)
323; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %35 = call <vscale x 1 x i64> @llvm.cttz.nxv1i64(<vscale x 1 x i64> undef, i1 false)
324; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %36 = call <vscale x 2 x i64> @llvm.cttz.nxv2i64(<vscale x 2 x i64> undef, i1 false)
325; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %37 = call <vscale x 4 x i64> @llvm.cttz.nxv4i64(<vscale x 4 x i64> undef, i1 false)
326; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %38 = call <vscale x 8 x i64> @llvm.cttz.nxv8i64(<vscale x 8 x i64> undef, i1 false)
327; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %39 = call <vscale x 16 x i64> @llvm.cttz.nxv16i64(<vscale x 16 x i64> undef, i1 false)
328; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
329;
330; ZVBB-LABEL: 'cttz'
331; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> undef, i1 false)
332; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> undef, i1 false)
333; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> undef, i1 false)
334; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> undef, i1 false)
335; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <vscale x 1 x i8> @llvm.cttz.nxv1i8(<vscale x 1 x i8> undef, i1 false)
336; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x i8> @llvm.cttz.nxv2i8(<vscale x 2 x i8> undef, i1 false)
337; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x i8> @llvm.cttz.nxv4i8(<vscale x 4 x i8> undef, i1 false)
338; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x i8> @llvm.cttz.nxv8i8(<vscale x 8 x i8> undef, i1 false)
339; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 16 x i8> @llvm.cttz.nxv16i8(<vscale x 16 x i8> undef, i1 false)
340; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = call <vscale x 32 x i8> @llvm.cttz.nxv32i8(<vscale x 32 x i8> undef, i1 false)
341; ZVBB-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %11 = call <vscale x 64 x i8> @llvm.cttz.nxv64i8(<vscale x 64 x i8> undef, i1 false)
342; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> undef, i1 false)
343; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> undef, i1 false)
344; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> undef, i1 false)
345; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> undef, i1 false)
346; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i16> @llvm.cttz.nxv1i16(<vscale x 1 x i16> undef, i1 false)
347; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i16> @llvm.cttz.nxv2i16(<vscale x 2 x i16> undef, i1 false)
348; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i16> @llvm.cttz.nxv4i16(<vscale x 4 x i16> undef, i1 false)
349; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %19 = call <vscale x 8 x i16> @llvm.cttz.nxv8i16(<vscale x 8 x i16> undef, i1 false)
350; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %20 = call <vscale x 16 x i16> @llvm.cttz.nxv16i16(<vscale x 16 x i16> undef, i1 false)
351; ZVBB-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %21 = call <vscale x 32 x i16> @llvm.cttz.nxv32i16(<vscale x 32 x i16> undef, i1 false)
352; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> undef, i1 false)
353; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %23 = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> undef, i1 false)
354; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %24 = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> undef, i1 false)
355; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %25 = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> undef, i1 false)
356; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %26 = call <vscale x 1 x i32> @llvm.cttz.nxv1i32(<vscale x 1 x i32> undef, i1 false)
357; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %27 = call <vscale x 2 x i32> @llvm.cttz.nxv2i32(<vscale x 2 x i32> undef, i1 false)
358; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = call <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32> undef, i1 false)
359; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %29 = call <vscale x 8 x i32> @llvm.cttz.nxv8i32(<vscale x 8 x i32> undef, i1 false)
360; ZVBB-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %30 = call <vscale x 16 x i32> @llvm.cttz.nxv16i32(<vscale x 16 x i32> undef, i1 false)
361; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %31 = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> undef, i1 false)
362; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %32 = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> undef, i1 false)
363; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %33 = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> undef, i1 false)
364; ZVBB-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %34 = call <16 x i64> @llvm.cttz.v16i64(<16 x i64> undef, i1 false)
365; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %35 = call <vscale x 1 x i64> @llvm.cttz.nxv1i64(<vscale x 1 x i64> undef, i1 false)
366; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %36 = call <vscale x 2 x i64> @llvm.cttz.nxv2i64(<vscale x 2 x i64> undef, i1 false)
367; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %37 = call <vscale x 4 x i64> @llvm.cttz.nxv4i64(<vscale x 4 x i64> undef, i1 false)
368; ZVBB-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %38 = call <vscale x 8 x i64> @llvm.cttz.nxv8i64(<vscale x 8 x i64> undef, i1 false)
369; ZVBB-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %39 = call <vscale x 16 x i64> @llvm.cttz.nxv16i64(<vscale x 16 x i64> undef, i1 false)
370; ZVBB-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
371;
372  call <2 x i8> @llvm.cttz.v2i8(<2 x i8> undef, i1 false)
373  call <4 x i8> @llvm.cttz.v4i8(<4 x i8> undef, i1 false)
374  call <8 x i8> @llvm.cttz.v8i8(<8 x i8> undef, i1 false)
375  call <16 x i8> @llvm.cttz.v16i8(<16 x i8> undef, i1 false)
376  call <vscale x 1 x i8> @llvm.cttz.nxv1i8(<vscale x 1 x i8> undef, i1 false)
377  call <vscale x 2 x i8> @llvm.cttz.nxv2i8(<vscale x 2 x i8> undef, i1 false)
378  call <vscale x 4 x i8> @llvm.cttz.nxv4i8(<vscale x 4 x i8> undef, i1 false)
379  call <vscale x 8 x i8> @llvm.cttz.nxv8i8(<vscale x 8 x i8> undef, i1 false)
380  call <vscale x 16 x i8> @llvm.cttz.nxv16i8(<vscale x 16 x i8> undef, i1 false)
381  call <vscale x 32 x i8> @llvm.cttz.nxv32i8(<vscale x 32 x i8> undef, i1 false)
382  call <vscale x 64 x i8> @llvm.cttz.nxv64i8(<vscale x 64 x i8> undef, i1 false)
383  call <2 x i16> @llvm.cttz.v2i16(<2 x i16> undef, i1 false)
384  call <4 x i16> @llvm.cttz.v4i16(<4 x i16> undef, i1 false)
385  call <8 x i16> @llvm.cttz.v8i16(<8 x i16> undef, i1 false)
386  call <16 x i16> @llvm.cttz.v16i16(<16 x i16> undef, i1 false)
387  call <vscale x 1 x i16> @llvm.cttz.nxv1i16(<vscale x 1 x i16> undef, i1 false)
388  call <vscale x 2 x i16> @llvm.cttz.nxv2i16(<vscale x 2 x i16> undef, i1 false)
389  call <vscale x 4 x i16> @llvm.cttz.nxv4i16(<vscale x 4 x i16> undef, i1 false)
390  call <vscale x 8 x i16> @llvm.cttz.nxv8i16(<vscale x 8 x i16> undef, i1 false)
391  call <vscale x 16 x i16> @llvm.cttz.nxv16i16(<vscale x 16 x i16> undef, i1 false)
392  call <vscale x 32 x i16> @llvm.cttz.nxv32i16(<vscale x 32 x i16> undef, i1 false)
393  call <2 x i32> @llvm.cttz.v2i32(<2 x i32> undef, i1 false)
394  call <4 x i32> @llvm.cttz.v4i32(<4 x i32> undef, i1 false)
395  call <8 x i32> @llvm.cttz.v8i32(<8 x i32> undef, i1 false)
396  call <16 x i32> @llvm.cttz.v16i32(<16 x i32> undef, i1 false)
397  call <vscale x 1 x i32> @llvm.cttz.nxv1i32(<vscale x 1 x i32> undef, i1 false)
398  call <vscale x 2 x i32> @llvm.cttz.nxv2i32(<vscale x 2 x i32> undef, i1 false)
399  call <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32> undef, i1 false)
400  call <vscale x 8 x i32> @llvm.cttz.nxv8i32(<vscale x 8 x i32> undef, i1 false)
401  call <vscale x 16 x i32> @llvm.cttz.nxv16i32(<vscale x 16 x i32> undef, i1 false)
402  call <2 x i64> @llvm.cttz.v2i64(<2 x i64> undef, i1 false)
403  call <4 x i64> @llvm.cttz.v4i64(<4 x i64> undef, i1 false)
404  call <8 x i64> @llvm.cttz.v8i64(<8 x i64> undef, i1 false)
405  call <16 x i64> @llvm.cttz.v16i64(<16 x i64> undef, i1 false)
406  call <vscale x 1 x i64> @llvm.cttz.nxv1i64(<vscale x 1 x i64> undef, i1 false)
407  call <vscale x 2 x i64> @llvm.cttz.nxv2i64(<vscale x 2 x i64> undef, i1 false)
408  call <vscale x 4 x i64> @llvm.cttz.nxv4i64(<vscale x 4 x i64> undef, i1 false)
409  call <vscale x 8 x i64> @llvm.cttz.nxv8i64(<vscale x 8 x i64> undef, i1 false)
410  call <vscale x 16 x i64> @llvm.cttz.nxv16i64(<vscale x 16 x i64> undef, i1 false)
411  ret void
412}
413
414define void @ctpop() {
415; NOZVBB-LABEL: 'ctpop'
416; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = call i8 @llvm.ctpop.i8(i8 undef)
417; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %2 = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> undef)
418; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %3 = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> undef)
419; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %4 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> undef)
420; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %5 = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> undef)
421; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %6 = call <vscale x 1 x i8> @llvm.ctpop.nxv1i8(<vscale x 1 x i8> undef)
422; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %7 = call <vscale x 2 x i8> @llvm.ctpop.nxv2i8(<vscale x 2 x i8> undef)
423; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %8 = call <vscale x 4 x i8> @llvm.ctpop.nxv4i8(<vscale x 4 x i8> undef)
424; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %9 = call <vscale x 8 x i8> @llvm.ctpop.nxv8i8(<vscale x 8 x i8> undef)
425; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %10 = call <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8> undef)
426; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %11 = call i16 @llvm.ctpop.i16(i16 undef)
427; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %12 = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> undef)
428; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %13 = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> undef)
429; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %14 = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> undef)
430; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %15 = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> undef)
431; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %16 = call <vscale x 1 x i16> @llvm.ctpop.nxv1i16(<vscale x 1 x i16> undef)
432; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %17 = call <vscale x 2 x i16> @llvm.ctpop.nxv2i16(<vscale x 2 x i16> undef)
433; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %18 = call <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16> undef)
434; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %19 = call <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16> undef)
435; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %20 = call <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16> undef)
436; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %21 = call i32 @llvm.ctpop.i32(i32 undef)
437; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %22 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> undef)
438; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %23 = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> undef)
439; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %24 = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> undef)
440; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %25 = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> undef)
441; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %26 = call <vscale x 1 x i32> @llvm.ctpop.nxv1i32(<vscale x 1 x i32> undef)
442; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %27 = call <vscale x 2 x i32> @llvm.ctpop.nxv2i32(<vscale x 2 x i32> undef)
443; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %28 = call <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32> undef)
444; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %29 = call <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32> undef)
445; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %30 = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> undef)
446; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %31 = call i64 @llvm.ctpop.i64(i64 undef)
447; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %32 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> undef)
448; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %33 = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> undef)
449; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %34 = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> undef)
450; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %35 = call <16 x i64> @llvm.ctpop.v16i64(<16 x i64> undef)
451; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %36 = call <vscale x 1 x i64> @llvm.ctpop.nxv1i64(<vscale x 1 x i64> undef)
452; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %37 = call <vscale x 2 x i64> @llvm.ctpop.nxv2i64(<vscale x 2 x i64> undef)
453; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %38 = call <vscale x 4 x i64> @llvm.ctpop.nxv4i64(<vscale x 4 x i64> undef)
454; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %39 = call <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64> undef)
455; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %40 = call <vscale x 16 x i64> @llvm.ctpop.nxv16i64(<vscale x 16 x i64> undef)
456; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
457;
458; ZVBB-LABEL: 'ctpop'
459; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = call i8 @llvm.ctpop.i8(i8 undef)
460; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> undef)
461; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> undef)
462; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> undef)
463; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> undef)
464; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x i8> @llvm.ctpop.nxv1i8(<vscale x 1 x i8> undef)
465; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x i8> @llvm.ctpop.nxv2i8(<vscale x 2 x i8> undef)
466; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x i8> @llvm.ctpop.nxv4i8(<vscale x 4 x i8> undef)
467; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x i8> @llvm.ctpop.nxv8i8(<vscale x 8 x i8> undef)
468; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %10 = call <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8> undef)
469; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %11 = call i16 @llvm.ctpop.i16(i16 undef)
470; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> undef)
471; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> undef)
472; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> undef)
473; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> undef)
474; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i16> @llvm.ctpop.nxv1i16(<vscale x 1 x i16> undef)
475; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i16> @llvm.ctpop.nxv2i16(<vscale x 2 x i16> undef)
476; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16> undef)
477; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %19 = call <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16> undef)
478; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %20 = call <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16> undef)
479; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %21 = call i32 @llvm.ctpop.i32(i32 undef)
480; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> undef)
481; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %23 = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> undef)
482; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %24 = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> undef)
483; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %25 = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> undef)
484; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %26 = call <vscale x 1 x i32> @llvm.ctpop.nxv1i32(<vscale x 1 x i32> undef)
485; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %27 = call <vscale x 2 x i32> @llvm.ctpop.nxv2i32(<vscale x 2 x i32> undef)
486; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = call <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32> undef)
487; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %29 = call <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32> undef)
488; ZVBB-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %30 = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> undef)
489; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %31 = call i64 @llvm.ctpop.i64(i64 undef)
490; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %32 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> undef)
491; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %33 = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> undef)
492; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %34 = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> undef)
493; ZVBB-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %35 = call <16 x i64> @llvm.ctpop.v16i64(<16 x i64> undef)
494; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %36 = call <vscale x 1 x i64> @llvm.ctpop.nxv1i64(<vscale x 1 x i64> undef)
495; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %37 = call <vscale x 2 x i64> @llvm.ctpop.nxv2i64(<vscale x 2 x i64> undef)
496; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %38 = call <vscale x 4 x i64> @llvm.ctpop.nxv4i64(<vscale x 4 x i64> undef)
497; ZVBB-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %39 = call <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64> undef)
498; ZVBB-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %40 = call <vscale x 16 x i64> @llvm.ctpop.nxv16i64(<vscale x 16 x i64> undef)
499; ZVBB-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
500;
501  call i8 @llvm.ctpop.i8(i8 undef)
502  call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> undef)
503  call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> undef)
504  call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> undef)
505  call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> undef)
506  call <vscale x 1 x i8> @llvm.ctpop.nxv1i8(<vscale x 1 x i8> undef)
507  call <vscale x 2 x i8> @llvm.ctpop.nxv2i8(<vscale x 2 x i8> undef)
508  call <vscale x 4 x i8> @llvm.ctpop.nxv4i8(<vscale x 4 x i8> undef)
509  call <vscale x 8 x i8> @llvm.ctpop.nxv8i8(<vscale x 8 x i8> undef)
510  call <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8> undef)
511  call i16 @llvm.ctpop.i16(i16 undef)
512  call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> undef)
513  call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> undef)
514  call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> undef)
515  call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> undef)
516  call <vscale x 1 x i16> @llvm.ctpop.nxv1i16(<vscale x 1 x i16> undef)
517  call <vscale x 2 x i16> @llvm.ctpop.nxv2i16(<vscale x 2 x i16> undef)
518  call <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16> undef)
519  call <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16> undef)
520  call <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16> undef)
521  call i32 @llvm.ctpop.i32(i32 undef)
522  call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> undef)
523  call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> undef)
524  call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> undef)
525  call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> undef)
526  call <vscale x 1 x i32> @llvm.ctpop.nxv1i32(<vscale x 1 x i32> undef)
527  call <vscale x 2 x i32> @llvm.ctpop.nxv2i32(<vscale x 2 x i32> undef)
528  call <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32> undef)
529  call <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32> undef)
530  call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> undef)
531  call i64 @llvm.ctpop.i64(i64 undef)
532  call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> undef)
533  call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> undef)
534  call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> undef)
535  call <16 x i64> @llvm.ctpop.v16i64(<16 x i64> undef)
536  call <vscale x 1 x i64> @llvm.ctpop.nxv1i64(<vscale x 1 x i64> undef)
537  call <vscale x 2 x i64> @llvm.ctpop.nxv2i64(<vscale x 2 x i64> undef)
538  call <vscale x 4 x i64> @llvm.ctpop.nxv4i64(<vscale x 4 x i64> undef)
539  call <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64> undef)
540  call <vscale x 16 x i64> @llvm.ctpop.nxv16i64(<vscale x 16 x i64> undef)
541  ret void
542}
543
544define void @vp_bswap() {
545; CHECK-LABEL: 'vp_bswap'
546; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = call <2 x i16> @llvm.vp.bswap.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
547; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = call <4 x i16> @llvm.vp.bswap.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
548; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %3 = call <8 x i16> @llvm.vp.bswap.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
549; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %4 = call <16 x i16> @llvm.vp.bswap.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
550; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %5 = call <vscale x 1 x i16> @llvm.vp.bswap.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
551; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = call <vscale x 2 x i16> @llvm.vp.bswap.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
552; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %7 = call <vscale x 4 x i16> @llvm.vp.bswap.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
553; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %8 = call <vscale x 8 x i16> @llvm.vp.bswap.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
554; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %9 = call <vscale x 16 x i16> @llvm.vp.bswap.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
555; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %10 = call <2 x i32> @llvm.vp.bswap.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
556; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %11 = call <4 x i32> @llvm.vp.bswap.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
557; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %12 = call <8 x i32> @llvm.vp.bswap.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
558; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %13 = call <16 x i32> @llvm.vp.bswap.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
559; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %14 = call <vscale x 1 x i32> @llvm.vp.bswap.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
560; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %15 = call <vscale x 2 x i32> @llvm.vp.bswap.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
561; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %16 = call <vscale x 4 x i32> @llvm.vp.bswap.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
562; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %17 = call <vscale x 8 x i32> @llvm.vp.bswap.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
563; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %18 = call <vscale x 16 x i32> @llvm.vp.bswap.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
564; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %19 = call <2 x i64> @llvm.vp.bswap.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
565; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %20 = call <4 x i64> @llvm.vp.bswap.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
566; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %21 = call <8 x i64> @llvm.vp.bswap.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
567; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %22 = call <16 x i64> @llvm.vp.bswap.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
568; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %23 = call <vscale x 1 x i64> @llvm.vp.bswap.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
569; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %24 = call <vscale x 2 x i64> @llvm.vp.bswap.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
570; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %25 = call <vscale x 4 x i64> @llvm.vp.bswap.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
571; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %26 = call <vscale x 8 x i64> @llvm.vp.bswap.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
572; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %27 = call <vscale x 16 x i64> @llvm.vp.bswap.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
573; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
574;
575  call <2 x i16> @llvm.vp.bswap.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
576  call <4 x i16> @llvm.vp.bswap.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
577  call <8 x i16> @llvm.vp.bswap.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
578  call <16 x i16> @llvm.vp.bswap.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
579  call <vscale x 1 x i16> @llvm.vp.bswap.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
580  call <vscale x 2 x i16> @llvm.vp.bswap.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
581  call <vscale x 4 x i16> @llvm.vp.bswap.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
582  call <vscale x 8 x i16> @llvm.vp.bswap.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
583  call <vscale x 16 x i16> @llvm.vp.bswap.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
584  call <2 x i32> @llvm.vp.bswap.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
585  call <4 x i32> @llvm.vp.bswap.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
586  call <8 x i32> @llvm.vp.bswap.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
587  call <16 x i32> @llvm.vp.bswap.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
588  call <vscale x 1 x i32> @llvm.vp.bswap.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
589  call <vscale x 2 x i32> @llvm.vp.bswap.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
590  call <vscale x 4 x i32> @llvm.vp.bswap.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
591  call <vscale x 8 x i32> @llvm.vp.bswap.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
592  call <vscale x 16 x i32> @llvm.vp.bswap.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
593  call <2 x i64> @llvm.vp.bswap.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
594  call <4 x i64> @llvm.vp.bswap.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
595  call <8 x i64> @llvm.vp.bswap.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
596  call <16 x i64> @llvm.vp.bswap.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
597  call <vscale x 1 x i64> @llvm.vp.bswap.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
598  call <vscale x 2 x i64> @llvm.vp.bswap.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
599  call <vscale x 4 x i64> @llvm.vp.bswap.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
600  call <vscale x 8 x i64> @llvm.vp.bswap.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
601  call <vscale x 16 x i64> @llvm.vp.bswap.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
602  ret void
603}
604
605define void @vp_ctpop() {
606; CHECK-LABEL: 'vp_ctpop'
607; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %1 = call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
608; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %2 = call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
609; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %3 = call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
610; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %4 = call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
611; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %5 = call <vscale x 1 x i16> @llvm.vp.ctpop.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
612; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %6 = call <vscale x 2 x i16> @llvm.vp.ctpop.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
613; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %7 = call <vscale x 4 x i16> @llvm.vp.ctpop.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
614; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %8 = call <vscale x 8 x i16> @llvm.vp.ctpop.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
615; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %9 = call <vscale x 16 x i16> @llvm.vp.ctpop.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
616; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %10 = call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
617; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %11 = call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
618; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %12 = call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
619; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %13 = call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
620; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %14 = call <vscale x 1 x i16> @llvm.vp.ctpop.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
621; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %15 = call <vscale x 2 x i16> @llvm.vp.ctpop.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
622; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %16 = call <vscale x 4 x i16> @llvm.vp.ctpop.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
623; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %17 = call <vscale x 8 x i16> @llvm.vp.ctpop.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
624; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %18 = call <vscale x 16 x i16> @llvm.vp.ctpop.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
625; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %19 = call <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
626; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %20 = call <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
627; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %21 = call <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
628; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %22 = call <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
629; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %23 = call <vscale x 1 x i32> @llvm.vp.ctpop.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
630; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %24 = call <vscale x 2 x i32> @llvm.vp.ctpop.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
631; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %25 = call <vscale x 4 x i32> @llvm.vp.ctpop.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
632; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %26 = call <vscale x 8 x i32> @llvm.vp.ctpop.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
633; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %27 = call <vscale x 16 x i32> @llvm.vp.ctpop.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
634; CHECK-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %28 = call <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
635; CHECK-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %29 = call <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
636; CHECK-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %30 = call <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
637; CHECK-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %31 = call <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
638; CHECK-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %32 = call <vscale x 1 x i64> @llvm.vp.ctpop.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
639; CHECK-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %33 = call <vscale x 2 x i64> @llvm.vp.ctpop.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
640; CHECK-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %34 = call <vscale x 4 x i64> @llvm.vp.ctpop.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
641; CHECK-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %35 = call <vscale x 8 x i64> @llvm.vp.ctpop.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
642; CHECK-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %36 = call <vscale x 16 x i64> @llvm.vp.ctpop.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
643; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
644;
645  call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
646  call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
647  call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
648  call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
649  call <vscale x 1 x i16> @llvm.vp.ctpop.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
650  call <vscale x 2 x i16> @llvm.vp.ctpop.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
651  call <vscale x 4 x i16> @llvm.vp.ctpop.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
652  call <vscale x 8 x i16> @llvm.vp.ctpop.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
653  call <vscale x 16 x i16> @llvm.vp.ctpop.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
654  call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
655  call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
656  call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
657  call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
658  call <vscale x 1 x i16> @llvm.vp.ctpop.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
659  call <vscale x 2 x i16> @llvm.vp.ctpop.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
660  call <vscale x 4 x i16> @llvm.vp.ctpop.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
661  call <vscale x 8 x i16> @llvm.vp.ctpop.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
662  call <vscale x 16 x i16> @llvm.vp.ctpop.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
663  call <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
664  call <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
665  call <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
666  call <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
667  call <vscale x 1 x i32> @llvm.vp.ctpop.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
668  call <vscale x 2 x i32> @llvm.vp.ctpop.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
669  call <vscale x 4 x i32> @llvm.vp.ctpop.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
670  call <vscale x 8 x i32> @llvm.vp.ctpop.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
671  call <vscale x 16 x i32> @llvm.vp.ctpop.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
672  call <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
673  call <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
674  call <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
675  call <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
676  call <vscale x 1 x i64> @llvm.vp.ctpop.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
677  call <vscale x 2 x i64> @llvm.vp.ctpop.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
678  call <vscale x 4 x i64> @llvm.vp.ctpop.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
679  call <vscale x 8 x i64> @llvm.vp.ctpop.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
680  call <vscale x 16 x i64> @llvm.vp.ctpop.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
681  ret void
682}
683
684define void @vp_ctlz() {
685; CHECK-LABEL: 'vp_ctlz'
686; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %1 = call <2 x i8> @llvm.vp.ctlz.v2i8(<2 x i8> undef, i1 false, <2 x i1> undef, i32 undef)
687; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %2 = call <4 x i8> @llvm.vp.ctlz.v4i8(<4 x i8> undef, i1 false, <4 x i1> undef, i32 undef)
688; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %3 = call <8 x i8> @llvm.vp.ctlz.v8i8(<8 x i8> undef, i1 false, <8 x i1> undef, i32 undef)
689; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %4 = call <16 x i8> @llvm.vp.ctlz.v16i8(<16 x i8> undef, i1 false, <16 x i1> undef, i32 undef)
690; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %5 = call <vscale x 1 x i8> @llvm.vp.ctlz.nxv1i8(<vscale x 1 x i8> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
691; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %6 = call <vscale x 2 x i8> @llvm.vp.ctlz.nxv2i8(<vscale x 2 x i8> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
692; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %7 = call <vscale x 4 x i8> @llvm.vp.ctlz.nxv4i8(<vscale x 4 x i8> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
693; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %8 = call <vscale x 8 x i8> @llvm.vp.ctlz.nxv8i8(<vscale x 8 x i8> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
694; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %9 = call <vscale x 16 x i8> @llvm.vp.ctlz.nxv16i8(<vscale x 16 x i8> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
695; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %10 = call <vscale x 32 x i8> @llvm.vp.ctlz.nxv32i8(<vscale x 32 x i8> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
696; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %11 = call <vscale x 64 x i8> @llvm.vp.ctlz.nxv64i8(<vscale x 64 x i8> undef, i1 false, <vscale x 64 x i1> undef, i32 undef)
697; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %12 = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
698; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %13 = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
699; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %14 = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
700; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %15 = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
701; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %16 = call <vscale x 1 x i16> @llvm.vp.ctlz.nxv1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
702; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %17 = call <vscale x 2 x i16> @llvm.vp.ctlz.nxv2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
703; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %18 = call <vscale x 4 x i16> @llvm.vp.ctlz.nxv4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
704; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %19 = call <vscale x 8 x i16> @llvm.vp.ctlz.nxv8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
705; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %20 = call <vscale x 16 x i16> @llvm.vp.ctlz.nxv16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
706; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %21 = call <vscale x 32 x i16> @llvm.vp.ctlz.nxv32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
707; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %22 = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
708; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %23 = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
709; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %24 = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
710; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %25 = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
711; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %26 = call <vscale x 1 x i16> @llvm.vp.ctlz.nxv1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
712; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %27 = call <vscale x 2 x i16> @llvm.vp.ctlz.nxv2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
713; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %28 = call <vscale x 4 x i16> @llvm.vp.ctlz.nxv4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
714; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %29 = call <vscale x 8 x i16> @llvm.vp.ctlz.nxv8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
715; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %30 = call <vscale x 16 x i16> @llvm.vp.ctlz.nxv16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
716; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %31 = call <vscale x 32 x i16> @llvm.vp.ctlz.nxv32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
717; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %32 = call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> undef, i1 false, <2 x i1> undef, i32 undef)
718; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %33 = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> undef, i1 false, <4 x i1> undef, i32 undef)
719; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %34 = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> undef, i1 false, <8 x i1> undef, i32 undef)
720; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %35 = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> undef, i1 false, <16 x i1> undef, i32 undef)
721; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %36 = call <vscale x 1 x i32> @llvm.vp.ctlz.nxv1i32(<vscale x 1 x i32> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
722; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %37 = call <vscale x 2 x i32> @llvm.vp.ctlz.nxv2i32(<vscale x 2 x i32> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
723; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %38 = call <vscale x 4 x i32> @llvm.vp.ctlz.nxv4i32(<vscale x 4 x i32> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
724; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %39 = call <vscale x 8 x i32> @llvm.vp.ctlz.nxv8i32(<vscale x 8 x i32> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
725; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %40 = call <vscale x 16 x i32> @llvm.vp.ctlz.nxv16i32(<vscale x 16 x i32> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
726; CHECK-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %41 = call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> undef, i1 false, <2 x i1> undef, i32 undef)
727; CHECK-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %42 = call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef)
728; CHECK-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %43 = call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef)
729; CHECK-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %44 = call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef)
730; CHECK-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %45 = call <vscale x 1 x i64> @llvm.vp.ctlz.nxv1i64(<vscale x 1 x i64> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
731; CHECK-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %46 = call <vscale x 2 x i64> @llvm.vp.ctlz.nxv2i64(<vscale x 2 x i64> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
732; CHECK-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %47 = call <vscale x 4 x i64> @llvm.vp.ctlz.nxv4i64(<vscale x 4 x i64> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
733; CHECK-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %48 = call <vscale x 8 x i64> @llvm.vp.ctlz.nxv8i64(<vscale x 8 x i64> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
734; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %49 = call <vscale x 16 x i64> @llvm.vp.ctlz.nxv16i64(<vscale x 16 x i64> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
735; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
736;
737  call <2 x i8> @llvm.vp.ctlz.v2i8(<2 x i8> undef, i1 false, <2 x i1> undef, i32 undef)
738  call <4 x i8> @llvm.vp.ctlz.v4i8(<4 x i8> undef, i1 false, <4 x i1> undef, i32 undef)
739  call <8 x i8> @llvm.vp.ctlz.v8i8(<8 x i8> undef, i1 false, <8 x i1> undef, i32 undef)
740  call <16 x i8> @llvm.vp.ctlz.v16i8(<16 x i8> undef, i1 false, <16 x i1> undef, i32 undef)
741  call <vscale x 1 x i8> @llvm.vp.ctlz.nxv1i8(<vscale x 1 x i8> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
742  call <vscale x 2 x i8> @llvm.vp.ctlz.nxv2i8(<vscale x 2 x i8> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
743  call <vscale x 4 x i8> @llvm.vp.ctlz.nxv4i8(<vscale x 4 x i8> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
744  call <vscale x 8 x i8> @llvm.vp.ctlz.nxv8i8(<vscale x 8 x i8> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
745  call <vscale x 16 x i8> @llvm.vp.ctlz.nxv16i8(<vscale x 16 x i8> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
746  call <vscale x 32 x i8> @llvm.vp.ctlz.nxv32i8(<vscale x 32 x i8> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
747  call <vscale x 64 x i8> @llvm.vp.ctlz.nxv64i8(<vscale x 64 x i8> undef, i1 false, <vscale x 64 x i1> undef, i32 undef)
748  call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
749  call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
750  call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
751  call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
752  call <vscale x 1 x i16> @llvm.vp.ctlz.nxv1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
753  call <vscale x 2 x i16> @llvm.vp.ctlz.nxv2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
754  call <vscale x 4 x i16> @llvm.vp.ctlz.nxv4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
755  call <vscale x 8 x i16> @llvm.vp.ctlz.nxv8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
756  call <vscale x 16 x i16> @llvm.vp.ctlz.nxv16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
757  call <vscale x 32 x i16> @llvm.vp.ctlz.nxv32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
758  call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
759  call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
760  call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
761  call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
762  call <vscale x 1 x i16> @llvm.vp.ctlz.nxv1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
763  call <vscale x 2 x i16> @llvm.vp.ctlz.nxv2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
764  call <vscale x 4 x i16> @llvm.vp.ctlz.nxv4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
765  call <vscale x 8 x i16> @llvm.vp.ctlz.nxv8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
766  call <vscale x 16 x i16> @llvm.vp.ctlz.nxv16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
767  call <vscale x 32 x i16> @llvm.vp.ctlz.nxv32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
768  call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> undef, i1 false, <2 x i1> undef, i32 undef)
769  call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> undef, i1 false, <4 x i1> undef, i32 undef)
770  call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> undef, i1 false, <8 x i1> undef, i32 undef)
771  call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> undef, i1 false, <16 x i1> undef, i32 undef)
772  call <vscale x 1 x i32> @llvm.vp.ctlz.nxv1i32(<vscale x 1 x i32> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
773  call <vscale x 2 x i32> @llvm.vp.ctlz.nxv2i32(<vscale x 2 x i32> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
774  call <vscale x 4 x i32> @llvm.vp.ctlz.nxv4i32(<vscale x 4 x i32> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
775  call <vscale x 8 x i32> @llvm.vp.ctlz.nxv8i32(<vscale x 8 x i32> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
776  call <vscale x 16 x i32> @llvm.vp.ctlz.nxv16i32(<vscale x 16 x i32> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
777  call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> undef, i1 false, <2 x i1> undef, i32 undef)
778  call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef)
779  call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef)
780  call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef)
781  call <vscale x 1 x i64> @llvm.vp.ctlz.nxv1i64(<vscale x 1 x i64> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
782  call <vscale x 2 x i64> @llvm.vp.ctlz.nxv2i64(<vscale x 2 x i64> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
783  call <vscale x 4 x i64> @llvm.vp.ctlz.nxv4i64(<vscale x 4 x i64> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
784  call <vscale x 8 x i64> @llvm.vp.ctlz.nxv8i64(<vscale x 8 x i64> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
785  call <vscale x 16 x i64> @llvm.vp.ctlz.nxv16i64(<vscale x 16 x i64> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
786  ret void
787}
788
789define void @vp_cttz() {
790; CHECK-LABEL: 'vp_cttz'
791; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %1 = call <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8> undef, i1 false, <2 x i1> undef, i32 undef)
792; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %2 = call <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8> undef, i1 false, <4 x i1> undef, i32 undef)
793; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %3 = call <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8> undef, i1 false, <8 x i1> undef, i32 undef)
794; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %4 = call <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8> undef, i1 false, <16 x i1> undef, i32 undef)
795; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = call <vscale x 1 x i8> @llvm.vp.cttz.nxv1i8(<vscale x 1 x i8> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
796; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = call <vscale x 2 x i8> @llvm.vp.cttz.nxv2i8(<vscale x 2 x i8> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
797; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %7 = call <vscale x 4 x i8> @llvm.vp.cttz.nxv4i8(<vscale x 4 x i8> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
798; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %8 = call <vscale x 8 x i8> @llvm.vp.cttz.nxv8i8(<vscale x 8 x i8> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
799; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %9 = call <vscale x 16 x i8> @llvm.vp.cttz.nxv16i8(<vscale x 16 x i8> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
800; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %10 = call <vscale x 32 x i8> @llvm.vp.cttz.nxv32i8(<vscale x 32 x i8> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
801; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %11 = call <vscale x 64 x i8> @llvm.vp.cttz.nxv64i8(<vscale x 64 x i8> undef, i1 false, <vscale x 64 x i1> undef, i32 undef)
802; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %12 = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
803; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %13 = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
804; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %14 = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
805; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %15 = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
806; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %16 = call <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
807; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %17 = call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
808; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %18 = call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
809; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %19 = call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
810; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %20 = call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
811; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %21 = call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
812; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %22 = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
813; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %23 = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
814; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %24 = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
815; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %25 = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
816; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %26 = call <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
817; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %27 = call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
818; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %28 = call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
819; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %29 = call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
820; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %30 = call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
821; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %31 = call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
822; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %32 = call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> undef, i1 false, <2 x i1> undef, i32 undef)
823; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %33 = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> undef, i1 false, <4 x i1> undef, i32 undef)
824; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %34 = call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> undef, i1 false, <8 x i1> undef, i32 undef)
825; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %35 = call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> undef, i1 false, <16 x i1> undef, i32 undef)
826; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %36 = call <vscale x 1 x i32> @llvm.vp.cttz.nxv1i32(<vscale x 1 x i32> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
827; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %37 = call <vscale x 2 x i32> @llvm.vp.cttz.nxv2i32(<vscale x 2 x i32> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
828; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %38 = call <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
829; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %39 = call <vscale x 8 x i32> @llvm.vp.cttz.nxv8i32(<vscale x 8 x i32> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
830; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %40 = call <vscale x 16 x i32> @llvm.vp.cttz.nxv16i32(<vscale x 16 x i32> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
831; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %41 = call <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64> undef, i1 false, <2 x i1> undef, i32 undef)
832; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %42 = call <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef)
833; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %43 = call <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef)
834; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %44 = call <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef)
835; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %45 = call <vscale x 1 x i64> @llvm.vp.cttz.nxv1i64(<vscale x 1 x i64> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
836; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %46 = call <vscale x 2 x i64> @llvm.vp.cttz.nxv2i64(<vscale x 2 x i64> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
837; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %47 = call <vscale x 4 x i64> @llvm.vp.cttz.nxv4i64(<vscale x 4 x i64> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
838; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %48 = call <vscale x 8 x i64> @llvm.vp.cttz.nxv8i64(<vscale x 8 x i64> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
839; CHECK-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %49 = call <vscale x 16 x i64> @llvm.vp.cttz.nxv16i64(<vscale x 16 x i64> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
840; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
841;
842  call <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8> undef, i1 false, <2 x i1> undef, i32 undef)
843  call <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8> undef, i1 false, <4 x i1> undef, i32 undef)
844  call <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8> undef, i1 false, <8 x i1> undef, i32 undef)
845  call <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8> undef, i1 false, <16 x i1> undef, i32 undef)
846  call <vscale x 1 x i8> @llvm.vp.cttz.nxv1i8(<vscale x 1 x i8> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
847  call <vscale x 2 x i8> @llvm.vp.cttz.nxv2i8(<vscale x 2 x i8> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
848  call <vscale x 4 x i8> @llvm.vp.cttz.nxv4i8(<vscale x 4 x i8> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
849  call <vscale x 8 x i8> @llvm.vp.cttz.nxv8i8(<vscale x 8 x i8> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
850  call <vscale x 16 x i8> @llvm.vp.cttz.nxv16i8(<vscale x 16 x i8> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
851  call <vscale x 32 x i8> @llvm.vp.cttz.nxv32i8(<vscale x 32 x i8> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
852  call <vscale x 64 x i8> @llvm.vp.cttz.nxv64i8(<vscale x 64 x i8> undef, i1 false, <vscale x 64 x i1> undef, i32 undef)
853  call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
854  call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
855  call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
856  call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
857  call <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
858  call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
859  call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
860  call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
861  call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
862  call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
863  call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
864  call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
865  call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
866  call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
867  call <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
868  call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
869  call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
870  call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
871  call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
872  call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
873  call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> undef, i1 false, <2 x i1> undef, i32 undef)
874  call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> undef, i1 false, <4 x i1> undef, i32 undef)
875  call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> undef, i1 false, <8 x i1> undef, i32 undef)
876  call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> undef, i1 false, <16 x i1> undef, i32 undef)
877  call <vscale x 1 x i32> @llvm.vp.cttz.nxv1i32(<vscale x 1 x i32> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
878  call <vscale x 2 x i32> @llvm.vp.cttz.nxv2i32(<vscale x 2 x i32> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
879  call <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
880  call <vscale x 8 x i32> @llvm.vp.cttz.nxv8i32(<vscale x 8 x i32> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
881  call <vscale x 16 x i32> @llvm.vp.cttz.nxv16i32(<vscale x 16 x i32> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
882  call <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64> undef, i1 false, <2 x i1> undef, i32 undef)
883  call <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef)
884  call <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef)
885  call <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef)
886  call <vscale x 1 x i64> @llvm.vp.cttz.nxv1i64(<vscale x 1 x i64> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
887  call <vscale x 2 x i64> @llvm.vp.cttz.nxv2i64(<vscale x 2 x i64> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
888  call <vscale x 4 x i64> @llvm.vp.cttz.nxv4i64(<vscale x 4 x i64> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
889  call <vscale x 8 x i64> @llvm.vp.cttz.nxv8i64(<vscale x 8 x i64> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
890  call <vscale x 16 x i64> @llvm.vp.cttz.nxv16i64(<vscale x 16 x i64> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
891  ret void
892}
893
894declare i16 @llvm.bswap.i16(i16)
895declare <2 x i16> @llvm.bswap.v2i16(<2 x i16>)
896declare <4 x i16> @llvm.bswap.v4i16(<4 x i16>)
897declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
898declare <16 x i16> @llvm.bswap.v16i16(<16 x i16>)
899declare <vscale x 1 x i16> @llvm.bswap.nxv1i16(<vscale x 1 x i16>)
900declare <vscale x 2 x i16> @llvm.bswap.nxv2i16(<vscale x 2 x i16>)
901declare <vscale x 4 x i16> @llvm.bswap.nxv4i16(<vscale x 4 x i16>)
902declare <vscale x 8 x i16> @llvm.bswap.nxv8i16(<vscale x 8 x i16>)
903declare <vscale x 16 x i16> @llvm.bswap.nxv16i16(<vscale x 16 x i16>)
904declare i32 @llvm.bswap.i32(i32)
905declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>)
906declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
907declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>)
908declare <16 x i32> @llvm.bswap.v16i32(<16 x i32>)
909declare <vscale x 1 x i32> @llvm.bswap.nxv1i32(<vscale x 1 x i32>)
910declare <vscale x 2 x i32> @llvm.bswap.nxv2i32(<vscale x 2 x i32>)
911declare <vscale x 4 x i32> @llvm.bswap.nxv4i32(<vscale x 4 x i32>)
912declare <vscale x 8 x i32> @llvm.bswap.nxv8i32(<vscale x 8 x i32>)
913declare <vscale x 16 x i32> @llvm.bswap.nxv16i32(<vscale x 16 x i32>)
914declare i64 @llvm.bswap.i64(i64)
915declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
916declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>)
917declare <8 x i64> @llvm.bswap.v8i64(<8 x i64>)
918declare <16 x i64> @llvm.bswap.v16i64(<16 x i64>)
919declare <vscale x 1 x i64> @llvm.bswap.nxv1i64(<vscale x 1 x i64>)
920declare <vscale x 2 x i64> @llvm.bswap.nxv2i64(<vscale x 2 x i64>)
921declare <vscale x 4 x i64> @llvm.bswap.nxv4i64(<vscale x 4 x i64>)
922declare <vscale x 8 x i64> @llvm.bswap.nxv8i64(<vscale x 8 x i64>)
923declare <vscale x 16 x i64> @llvm.bswap.nxv16i64(<vscale x 16 x i64>)
924
925declare i8 @llvm.bitreverse.i8(i8)
926declare <2 x i8> @llvm.bitreverse.v2i8(<2 x i8>)
927declare <4 x i8> @llvm.bitreverse.v4i8(<4 x i8>)
928declare <8 x i8> @llvm.bitreverse.v8i8(<8 x i8>)
929declare <16 x i8> @llvm.bitreverse.v16i8(<16 x i8>)
930declare <vscale x 1 x i8> @llvm.bitreverse.nxv1i8(<vscale x 1 x i8>)
931declare <vscale x 2 x i8> @llvm.bitreverse.nxv2i8(<vscale x 2 x i8>)
932declare <vscale x 4 x i8> @llvm.bitreverse.nxv4i8(<vscale x 4 x i8>)
933declare <vscale x 8 x i8> @llvm.bitreverse.nxv8i8(<vscale x 8 x i8>)
934declare <vscale x 16 x i8> @llvm.bitreverse.nxv16i8(<vscale x 16 x i8>)
935declare i16 @llvm.bitreverse.i16(i16)
936declare <2 x i16> @llvm.bitreverse.v2i16(<2 x i16>)
937declare <4 x i16> @llvm.bitreverse.v4i16(<4 x i16>)
938declare <8 x i16> @llvm.bitreverse.v8i16(<8 x i16>)
939declare <16 x i16> @llvm.bitreverse.v16i16(<16 x i16>)
940declare <vscale x 1 x i16> @llvm.bitreverse.nxv1i16(<vscale x 1 x i16>)
941declare <vscale x 2 x i16> @llvm.bitreverse.nxv2i16(<vscale x 2 x i16>)
942declare <vscale x 4 x i16> @llvm.bitreverse.nxv4i16(<vscale x 4 x i16>)
943declare <vscale x 8 x i16> @llvm.bitreverse.nxv8i16(<vscale x 8 x i16>)
944declare <vscale x 16 x i16> @llvm.bitreverse.nxv16i16(<vscale x 16 x i16>)
945declare i32 @llvm.bitreverse.i32(i32)
946declare <2 x i32> @llvm.bitreverse.v2i32(<2 x i32>)
947declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>)
948declare <8 x i32> @llvm.bitreverse.v8i32(<8 x i32>)
949declare <16 x i32> @llvm.bitreverse.v16i32(<16 x i32>)
950declare <vscale x 1 x i32> @llvm.bitreverse.nxv1i32(<vscale x 1 x i32>)
951declare <vscale x 2 x i32> @llvm.bitreverse.nxv2i32(<vscale x 2 x i32>)
952declare <vscale x 4 x i32> @llvm.bitreverse.nxv4i32(<vscale x 4 x i32>)
953declare <vscale x 8 x i32> @llvm.bitreverse.nxv8i32(<vscale x 8 x i32>)
954declare <vscale x 16 x i32> @llvm.bitreverse.nxv16i32(<vscale x 16 x i32>)
955declare i64 @llvm.bitreverse.i64(i64)
956declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>)
957declare <4 x i64> @llvm.bitreverse.v4i64(<4 x i64>)
958declare <8 x i64> @llvm.bitreverse.v8i64(<8 x i64>)
959declare <16 x i64> @llvm.bitreverse.v16i64(<16 x i64>)
960declare <vscale x 1 x i64> @llvm.bitreverse.nxv1i64(<vscale x 1 x i64>)
961declare <vscale x 2 x i64> @llvm.bitreverse.nxv2i64(<vscale x 2 x i64>)
962declare <vscale x 4 x i64> @llvm.bitreverse.nxv4i64(<vscale x 4 x i64>)
963declare <vscale x 8 x i64> @llvm.bitreverse.nxv8i64(<vscale x 8 x i64>)
964declare <vscale x 16 x i64> @llvm.bitreverse.nxv16i64(<vscale x 16 x i64>)
965
966declare i8 @llvm.ctpop.i8(i8)
967declare <2 x i8> @llvm.ctpop.v2i8(<2 x i8>)
968declare <4 x i8> @llvm.ctpop.v4i8(<4 x i8>)
969declare <8 x i8> @llvm.ctpop.v8i8(<8 x i8>)
970declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>)
971declare <vscale x 1 x i8> @llvm.ctpop.nxv1i8(<vscale x 1 x i8>)
972declare <vscale x 2 x i8> @llvm.ctpop.nxv2i8(<vscale x 2 x i8>)
973declare <vscale x 4 x i8> @llvm.ctpop.nxv4i8(<vscale x 4 x i8>)
974declare <vscale x 8 x i8> @llvm.ctpop.nxv8i8(<vscale x 8 x i8>)
975declare <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8>)
976declare i16 @llvm.ctpop.i16(i16)
977declare <2 x i16> @llvm.ctpop.v2i16(<2 x i16>)
978declare <4 x i16> @llvm.ctpop.v4i16(<4 x i16>)
979declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
980declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>)
981declare <vscale x 1 x i16> @llvm.ctpop.nxv1i16(<vscale x 1 x i16>)
982declare <vscale x 2 x i16> @llvm.ctpop.nxv2i16(<vscale x 2 x i16>)
983declare <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16>)
984declare <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16>)
985declare <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16>)
986declare i32 @llvm.ctpop.i32(i32)
987declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
988declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
989declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>)
990declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>)
991declare <vscale x 1 x i32> @llvm.ctpop.nxv1i32(<vscale x 1 x i32>)
992declare <vscale x 2 x i32> @llvm.ctpop.nxv2i32(<vscale x 2 x i32>)
993declare <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32>)
994declare <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32>)
995declare <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32>)
996declare i64 @llvm.ctpop.i64(i64)
997declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
998declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>)
999declare <8 x i64> @llvm.ctpop.v8i64(<8 x i64>)
1000declare <16 x i64> @llvm.ctpop.v16i64(<16 x i64>)
1001declare <vscale x 1 x i64> @llvm.ctpop.nxv1i64(<vscale x 1 x i64>)
1002declare <vscale x 2 x i64> @llvm.ctpop.nxv2i64(<vscale x 2 x i64>)
1003declare <vscale x 4 x i64> @llvm.ctpop.nxv4i64(<vscale x 4 x i64>)
1004declare <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64>)
1005declare <vscale x 16 x i64> @llvm.ctpop.nxv16i64(<vscale x 16 x i64>)
1006
1007declare <2 x i16> @llvm.vp.bswap.v2i16(<2 x i16>, <2 x i1>, i32)
1008declare <4 x i16> @llvm.vp.bswap.v4i16(<4 x i16>, <4 x i1>, i32)
1009declare <8 x i16> @llvm.vp.bswap.v8i16(<8 x i16>, <8 x i1>, i32)
1010declare <16 x i16> @llvm.vp.bswap.v16i16(<16 x i16>, <16 x i1>, i32)
1011declare <vscale x 1 x i16> @llvm.vp.bswap.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32)
1012declare <vscale x 2 x i16> @llvm.vp.bswap.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32)
1013declare <vscale x 4 x i16> @llvm.vp.bswap.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i32)
1014declare <vscale x 8 x i16> @llvm.vp.bswap.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
1015declare <vscale x 16 x i16> @llvm.vp.bswap.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i1>, i32)
1016declare <2 x i32> @llvm.vp.bswap.v2i32(<2 x i32>, <2 x i1>, i32)
1017declare <4 x i32> @llvm.vp.bswap.v4i32(<4 x i32>, <4 x i1>, i32)
1018declare <8 x i32> @llvm.vp.bswap.v8i32(<8 x i32>, <8 x i1>, i32)
1019declare <16 x i32> @llvm.vp.bswap.v16i32(<16 x i32>, <16 x i1>, i32)
1020declare <vscale x 1 x i32> @llvm.vp.bswap.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32)
1021declare <vscale x 2 x i32> @llvm.vp.bswap.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32)
1022declare <vscale x 4 x i32> @llvm.vp.bswap.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
1023declare <vscale x 8 x i32> @llvm.vp.bswap.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
1024declare <vscale x 16 x i32> @llvm.vp.bswap.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32)
1025declare <2 x i64> @llvm.vp.bswap.v2i64(<2 x i64>, <2 x i1>, i32)
1026declare <4 x i64> @llvm.vp.bswap.v4i64(<4 x i64>, <4 x i1>, i32)
1027declare <8 x i64> @llvm.vp.bswap.v8i64(<8 x i64>, <8 x i1>, i32)
1028declare <16 x i64> @llvm.vp.bswap.v16i64(<16 x i64>, <16 x i1>, i32)
1029declare <vscale x 1 x i64> @llvm.vp.bswap.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i1>, i32)
1030declare <vscale x 2 x i64> @llvm.vp.bswap.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32)
1031declare <vscale x 4 x i64> @llvm.vp.bswap.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i1>, i32)
1032declare <vscale x 8 x i64> @llvm.vp.bswap.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i1>, i32)
1033declare <vscale x 16 x i64> @llvm.vp.bswap.nxv16i64(<vscale x 16 x i64>, <vscale x 16 x i1>, i32)
1034
1035declare <2 x i8> @llvm.ctlz.v2i8(<2 x i8>, i1)
1036declare <4 x i8> @llvm.ctlz.v4i8(<4 x i8>, i1)
1037declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1)
1038declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1)
1039declare <vscale x 1 x i8> @llvm.ctlz.nxv1i8(<vscale x 1 x i8>, i1)
1040declare <vscale x 2 x i8> @llvm.ctlz.nxv2i8(<vscale x 2 x i8>, i1)
1041declare <vscale x 4 x i8> @llvm.ctlz.nxv4i8(<vscale x 4 x i8>, i1)
1042declare <vscale x 8 x i8> @llvm.ctlz.nxv8i8(<vscale x 8 x i8>, i1)
1043declare <vscale x 16 x i8> @llvm.ctlz.nxv16i8(<vscale x 16 x i8>, i1)
1044declare <vscale x 32 x i8> @llvm.ctlz.nxv32i8(<vscale x 32 x i8>, i1)
1045declare <vscale x 64 x i8> @llvm.ctlz.nxv64i8(<vscale x 64 x i8>, i1)
1046declare <2 x i16> @llvm.ctlz.v2i16(<2 x i16>, i1)
1047declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1)
1048declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1)
1049declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1)
1050declare <vscale x 1 x i16> @llvm.ctlz.nxv1i16(<vscale x 1 x i16>, i1)
1051declare <vscale x 2 x i16> @llvm.ctlz.nxv2i16(<vscale x 2 x i16>, i1)
1052declare <vscale x 4 x i16> @llvm.ctlz.nxv4i16(<vscale x 4 x i16>, i1)
1053declare <vscale x 8 x i16> @llvm.ctlz.nxv8i16(<vscale x 8 x i16>, i1)
1054declare <vscale x 16 x i16> @llvm.ctlz.nxv16i16(<vscale x 16 x i16>, i1)
1055declare <vscale x 32 x i16> @llvm.ctlz.nxv32i16(<vscale x 32 x i16>, i1)
1056declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1)
1057declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1)
1058declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1)
1059declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1)
1060declare <vscale x 1 x i32> @llvm.ctlz.nxv1i32(<vscale x 1 x i32>, i1)
1061declare <vscale x 2 x i32> @llvm.ctlz.nxv2i32(<vscale x 2 x i32>, i1)
1062declare <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32>, i1)
1063declare <vscale x 8 x i32> @llvm.ctlz.nxv8i32(<vscale x 8 x i32>, i1)
1064declare <vscale x 16 x i32> @llvm.ctlz.nxv16i32(<vscale x 16 x i32>, i1)
1065declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1)
1066declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1)
1067declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1)
1068declare <16 x i64> @llvm.ctlz.v16i64(<16 x i64>, i1)
1069declare <vscale x 1 x i64> @llvm.ctlz.nxv1i64(<vscale x 1 x i64>, i1)
1070declare <vscale x 2 x i64> @llvm.ctlz.nxv2i64(<vscale x 2 x i64>, i1)
1071declare <vscale x 4 x i64> @llvm.ctlz.nxv4i64(<vscale x 4 x i64>, i1)
1072declare <vscale x 8 x i64> @llvm.ctlz.nxv8i64(<vscale x 8 x i64>, i1)
1073declare <vscale x 16 x i64> @llvm.ctlz.nxv16i64(<vscale x 16 x i64>, i1)
1074
1075declare <2 x i8> @llvm.cttz.v2i8(<2 x i8>, i1)
1076declare <4 x i8> @llvm.cttz.v4i8(<4 x i8>, i1)
1077declare <8 x i8> @llvm.cttz.v8i8(<8 x i8>, i1)
1078declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1)
1079declare <vscale x 1 x i8> @llvm.cttz.nxv1i8(<vscale x 1 x i8>, i1)
1080declare <vscale x 2 x i8> @llvm.cttz.nxv2i8(<vscale x 2 x i8>, i1)
1081declare <vscale x 4 x i8> @llvm.cttz.nxv4i8(<vscale x 4 x i8>, i1)
1082declare <vscale x 8 x i8> @llvm.cttz.nxv8i8(<vscale x 8 x i8>, i1)
1083declare <vscale x 16 x i8> @llvm.cttz.nxv16i8(<vscale x 16 x i8>, i1)
1084declare <vscale x 32 x i8> @llvm.cttz.nxv32i8(<vscale x 32 x i8>, i1)
1085declare <vscale x 64 x i8> @llvm.cttz.nxv64i8(<vscale x 64 x i8>, i1)
1086declare <2 x i16> @llvm.cttz.v2i16(<2 x i16>, i1)
1087declare <4 x i16> @llvm.cttz.v4i16(<4 x i16>, i1)
1088declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>, i1)
1089declare <16 x i16> @llvm.cttz.v16i16(<16 x i16>, i1)
1090declare <vscale x 1 x i16> @llvm.cttz.nxv1i16(<vscale x 1 x i16>, i1)
1091declare <vscale x 2 x i16> @llvm.cttz.nxv2i16(<vscale x 2 x i16>, i1)
1092declare <vscale x 4 x i16> @llvm.cttz.nxv4i16(<vscale x 4 x i16>, i1)
1093declare <vscale x 8 x i16> @llvm.cttz.nxv8i16(<vscale x 8 x i16>, i1)
1094declare <vscale x 16 x i16> @llvm.cttz.nxv16i16(<vscale x 16 x i16>, i1)
1095declare <vscale x 32 x i16> @llvm.cttz.nxv32i16(<vscale x 32 x i16>, i1)
1096declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1)
1097declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1)
1098declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>, i1)
1099declare <16 x i32> @llvm.cttz.v16i32(<16 x i32>, i1)
1100declare <vscale x 1 x i32> @llvm.cttz.nxv1i32(<vscale x 1 x i32>, i1)
1101declare <vscale x 2 x i32> @llvm.cttz.nxv2i32(<vscale x 2 x i32>, i1)
1102declare <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32>, i1)
1103declare <vscale x 8 x i32> @llvm.cttz.nxv8i32(<vscale x 8 x i32>, i1)
1104declare <vscale x 16 x i32> @llvm.cttz.nxv16i32(<vscale x 16 x i32>, i1)
1105declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1)
1106declare <4 x i64> @llvm.cttz.v4i64(<4 x i64>, i1)
1107declare <8 x i64> @llvm.cttz.v8i64(<8 x i64>, i1)
1108declare <16 x i64> @llvm.cttz.v16i64(<16 x i64>, i1)
1109declare <vscale x 1 x i64> @llvm.cttz.nxv1i64(<vscale x 1 x i64>, i1)
1110declare <vscale x 2 x i64> @llvm.cttz.nxv2i64(<vscale x 2 x i64>, i1)
1111declare <vscale x 4 x i64> @llvm.cttz.nxv4i64(<vscale x 4 x i64>, i1)
1112declare <vscale x 8 x i64> @llvm.cttz.nxv8i64(<vscale x 8 x i64>, i1)
1113declare <vscale x 16 x i64> @llvm.cttz.nxv16i64(<vscale x 16 x i64>, i1)
1114
1115declare <2 x i8> @llvm.vp.ctpop.v2i8(<2 x i8>, <2 x i1>, i32)
1116declare <4 x i8> @llvm.vp.ctpop.v4i8(<4 x i8>, <4 x i1>, i32)
1117declare <8 x i8> @llvm.vp.ctpop.v8i8(<8 x i8>, <8 x i1>, i32)
1118declare <16 x i8> @llvm.vp.ctpop.v16i8(<16 x i8>, <16 x i1>, i32)
1119declare <vscale x 1 x i8> @llvm.vp.ctpop.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i1>, i32)
1120declare <vscale x 2 x i8> @llvm.vp.ctpop.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i32)
1121declare <vscale x 4 x i8> @llvm.vp.ctpop.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i1>, i32)
1122declare <vscale x 8 x i8> @llvm.vp.ctpop.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i1>, i32)
1123declare <vscale x 16 x i8> @llvm.vp.ctpop.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i32)
1124declare <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16>, <2 x i1>, i32)
1125declare <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16>, <4 x i1>, i32)
1126declare <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16>, <8 x i1>, i32)
1127declare <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16>, <16 x i1>, i32)
1128declare <vscale x 1 x i16> @llvm.vp.ctpop.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32)
1129declare <vscale x 2 x i16> @llvm.vp.ctpop.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32)
1130declare <vscale x 4 x i16> @llvm.vp.ctpop.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i32)
1131declare <vscale x 8 x i16> @llvm.vp.ctpop.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
1132declare <vscale x 16 x i16> @llvm.vp.ctpop.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i1>, i32)
1133declare <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32>, <2 x i1>, i32)
1134declare <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32>, <4 x i1>, i32)
1135declare <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32>, <8 x i1>, i32)
1136declare <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32>, <16 x i1>, i32)
1137declare <vscale x 1 x i32> @llvm.vp.ctpop.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32)
1138declare <vscale x 2 x i32> @llvm.vp.ctpop.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32)
1139declare <vscale x 4 x i32> @llvm.vp.ctpop.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
1140declare <vscale x 8 x i32> @llvm.vp.ctpop.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
1141declare <vscale x 16 x i32> @llvm.vp.ctpop.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32)
1142declare <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64>, <2 x i1>, i32)
1143declare <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64>, <4 x i1>, i32)
1144declare <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64>, <8 x i1>, i32)
1145declare <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64>, <16 x i1>, i32)
1146declare <vscale x 1 x i64> @llvm.vp.ctpop.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i1>, i32)
1147declare <vscale x 2 x i64> @llvm.vp.ctpop.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32)
1148declare <vscale x 4 x i64> @llvm.vp.ctpop.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i1>, i32)
1149declare <vscale x 8 x i64> @llvm.vp.ctpop.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i1>, i32)
1150declare <vscale x 16 x i64> @llvm.vp.ctpop.nxv16i64(<vscale x 16 x i64>, <vscale x 16 x i1>, i32)
1151
1152declare <2 x i8> @llvm.vp.ctlz.v2i8(<2 x i8>, i1 immarg, <2 x i1>, i32)
1153declare <4 x i8> @llvm.vp.ctlz.v4i8(<4 x i8>, i1 immarg, <4 x i1>, i32)
1154declare <8 x i8> @llvm.vp.ctlz.v8i8(<8 x i8>, i1 immarg, <8 x i1>, i32)
1155declare <16 x i8> @llvm.vp.ctlz.v16i8(<16 x i8>, i1 immarg, <16 x i1>, i32)
1156declare <vscale x 1 x i8> @llvm.vp.ctlz.nxv1i8(<vscale x 1 x i8>, i1 immarg, <vscale x 1 x i1>, i32)
1157declare <vscale x 2 x i8> @llvm.vp.ctlz.nxv2i8(<vscale x 2 x i8>, i1 immarg, <vscale x 2 x i1>, i32)
1158declare <vscale x 4 x i8> @llvm.vp.ctlz.nxv4i8(<vscale x 4 x i8>, i1 immarg, <vscale x 4 x i1>, i32)
1159declare <vscale x 8 x i8> @llvm.vp.ctlz.nxv8i8(<vscale x 8 x i8>, i1 immarg, <vscale x 8 x i1>, i32)
1160declare <vscale x 16 x i8> @llvm.vp.ctlz.nxv16i8(<vscale x 16 x i8>, i1 immarg, <vscale x 16 x i1>, i32)
1161declare <vscale x 32 x i8> @llvm.vp.ctlz.nxv32i8(<vscale x 32 x i8>, i1 immarg, <vscale x 32 x i1>, i32)
1162declare <vscale x 64 x i8> @llvm.vp.ctlz.nxv64i8(<vscale x 64 x i8>, i1 immarg, <vscale x 64 x i1>, i32)
1163declare <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16>, i1 immarg, <2 x i1>, i32)
1164declare <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16>, i1 immarg, <4 x i1>, i32)
1165declare <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16>, i1 immarg, <8 x i1>, i32)
1166declare <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16>, i1 immarg, <16 x i1>, i32)
1167declare <vscale x 1 x i16> @llvm.vp.ctlz.nxv1i16(<vscale x 1 x i16>, i1 immarg, <vscale x 1 x i1>, i32)
1168declare <vscale x 2 x i16> @llvm.vp.ctlz.nxv2i16(<vscale x 2 x i16>, i1 immarg, <vscale x 2 x i1>, i32)
1169declare <vscale x 4 x i16> @llvm.vp.ctlz.nxv4i16(<vscale x 4 x i16>, i1 immarg, <vscale x 4 x i1>, i32)
1170declare <vscale x 8 x i16> @llvm.vp.ctlz.nxv8i16(<vscale x 8 x i16>, i1 immarg, <vscale x 8 x i1>, i32)
1171declare <vscale x 16 x i16> @llvm.vp.ctlz.nxv16i16(<vscale x 16 x i16>, i1 immarg, <vscale x 16 x i1>, i32)
1172declare <vscale x 32 x i16> @llvm.vp.ctlz.nxv32i16(<vscale x 32 x i16>, i1 immarg, <vscale x 32 x i1>, i32)
1173declare <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32>, i1 immarg, <2 x i1>, i32)
1174declare <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32>, i1 immarg, <4 x i1>, i32)
1175declare <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32>, i1 immarg, <8 x i1>, i32)
1176declare <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32>, i1 immarg, <16 x i1>, i32)
1177declare <vscale x 1 x i32> @llvm.vp.ctlz.nxv1i32(<vscale x 1 x i32>, i1 immarg, <vscale x 1 x i1>, i32)
1178declare <vscale x 2 x i32> @llvm.vp.ctlz.nxv2i32(<vscale x 2 x i32>, i1 immarg, <vscale x 2 x i1>, i32)
1179declare <vscale x 4 x i32> @llvm.vp.ctlz.nxv4i32(<vscale x 4 x i32>, i1 immarg, <vscale x 4 x i1>, i32)
1180declare <vscale x 8 x i32> @llvm.vp.ctlz.nxv8i32(<vscale x 8 x i32>, i1 immarg, <vscale x 8 x i1>, i32)
1181declare <vscale x 16 x i32> @llvm.vp.ctlz.nxv16i32(<vscale x 16 x i32>, i1 immarg, <vscale x 16 x i1>, i32)
1182declare <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64>, i1 immarg, <2 x i1>, i32)
1183declare <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64>, i1 immarg, <4 x i1>, i32)
1184declare <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64>, i1 immarg, <8 x i1>, i32)
1185declare <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64>, i1 immarg, <16 x i1>, i32)
1186declare <vscale x 1 x i64> @llvm.vp.ctlz.nxv1i64(<vscale x 1 x i64>, i1 immarg, <vscale x 1 x i1>, i32)
1187declare <vscale x 2 x i64> @llvm.vp.ctlz.nxv2i64(<vscale x 2 x i64>, i1 immarg, <vscale x 2 x i1>, i32)
1188declare <vscale x 4 x i64> @llvm.vp.ctlz.nxv4i64(<vscale x 4 x i64>, i1 immarg, <vscale x 4 x i1>, i32)
1189declare <vscale x 8 x i64> @llvm.vp.ctlz.nxv8i64(<vscale x 8 x i64>, i1 immarg, <vscale x 8 x i1>, i32)
1190declare <vscale x 16 x i64> @llvm.vp.ctlz.nxv16i64(<vscale x 16 x i64>, i1 immarg, <vscale x 16 x i1>, i32)
1191
1192declare <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8>, i1 immarg, <2 x i1>, i32)
1193declare <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8>, i1 immarg, <4 x i1>, i32)
1194declare <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8>, i1 immarg, <8 x i1>, i32)
1195declare <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8>, i1 immarg, <16 x i1>, i32)
1196declare <vscale x 1 x i8> @llvm.vp.cttz.nxv1i8(<vscale x 1 x i8>, i1 immarg, <vscale x 1 x i1>, i32)
1197declare <vscale x 2 x i8> @llvm.vp.cttz.nxv2i8(<vscale x 2 x i8>, i1 immarg, <vscale x 2 x i1>, i32)
1198declare <vscale x 4 x i8> @llvm.vp.cttz.nxv4i8(<vscale x 4 x i8>, i1 immarg, <vscale x 4 x i1>, i32)
1199declare <vscale x 8 x i8> @llvm.vp.cttz.nxv8i8(<vscale x 8 x i8>, i1 immarg, <vscale x 8 x i1>, i32)
1200declare <vscale x 16 x i8> @llvm.vp.cttz.nxv16i8(<vscale x 16 x i8>, i1 immarg, <vscale x 16 x i1>, i32)
1201declare <vscale x 32 x i8> @llvm.vp.cttz.nxv32i8(<vscale x 32 x i8>, i1 immarg, <vscale x 32 x i1>, i32)
1202declare <vscale x 64 x i8> @llvm.vp.cttz.nxv64i8(<vscale x 64 x i8>, i1 immarg, <vscale x 64 x i1>, i32)
1203declare <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16>, i1 immarg, <2 x i1>, i32)
1204declare <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16>, i1 immarg, <4 x i1>, i32)
1205declare <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16>, i1 immarg, <8 x i1>, i32)
1206declare <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16>, i1 immarg, <16 x i1>, i32)
1207declare <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16>, i1 immarg, <vscale x 1 x i1>, i32)
1208declare <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16>, i1 immarg, <vscale x 2 x i1>, i32)
1209declare <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16>, i1 immarg, <vscale x 4 x i1>, i32)
1210declare <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16>, i1 immarg, <vscale x 8 x i1>, i32)
1211declare <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16>, i1 immarg, <vscale x 16 x i1>, i32)
1212declare <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16>, i1 immarg, <vscale x 32 x i1>, i32)
1213declare <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32>, i1 immarg, <2 x i1>, i32)
1214declare <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32>, i1 immarg, <4 x i1>, i32)
1215declare <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32>, i1 immarg, <8 x i1>, i32)
1216declare <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32>, i1 immarg, <16 x i1>, i32)
1217declare <vscale x 1 x i32> @llvm.vp.cttz.nxv1i32(<vscale x 1 x i32>, i1 immarg, <vscale x 1 x i1>, i32)
1218declare <vscale x 2 x i32> @llvm.vp.cttz.nxv2i32(<vscale x 2 x i32>, i1 immarg, <vscale x 2 x i1>, i32)
1219declare <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32>, i1 immarg, <vscale x 4 x i1>, i32)
1220declare <vscale x 8 x i32> @llvm.vp.cttz.nxv8i32(<vscale x 8 x i32>, i1 immarg, <vscale x 8 x i1>, i32)
1221declare <vscale x 16 x i32> @llvm.vp.cttz.nxv16i32(<vscale x 16 x i32>, i1 immarg, <vscale x 16 x i1>, i32)
1222declare <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64>, i1 immarg, <2 x i1>, i32)
1223declare <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64>, i1 immarg, <4 x i1>, i32)
1224declare <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64>, i1 immarg, <8 x i1>, i32)
1225declare <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64>, i1 immarg, <16 x i1>, i32)
1226declare <vscale x 1 x i64> @llvm.vp.cttz.nxv1i64(<vscale x 1 x i64>, i1 immarg, <vscale x 1 x i1>, i32)
1227declare <vscale x 2 x i64> @llvm.vp.cttz.nxv2i64(<vscale x 2 x i64>, i1 immarg, <vscale x 2 x i1>, i32)
1228declare <vscale x 4 x i64> @llvm.vp.cttz.nxv4i64(<vscale x 4 x i64>, i1 immarg, <vscale x 4 x i1>, i32)
1229declare <vscale x 8 x i64> @llvm.vp.cttz.nxv8i64(<vscale x 8 x i64>, i1 immarg, <vscale x 8 x i1>, i32)
1230declare <vscale x 16 x i64> @llvm.vp.cttz.nxv16i64(<vscale x 16 x i64>, i1 immarg, <vscale x 16 x i1>, i32)
1231