xref: /llvm-project/lldb/scripts/analyze-project-deps.py (revision 602e47c5f9fd2e14c7bfb6111e6558fa0d27c87f)
1#!/usr/bin/env python
2
3import argparse
4import itertools
5import os
6import re
7import sys
8from collections import defaultdict
9
10from use_lldb_suite import lldb_root
11
12parser = argparse.ArgumentParser(
13    description="Analyze LLDB project #include dependencies."
14)
15parser.add_argument(
16    "--show-counts",
17    default=False,
18    action="store_true",
19    help="When true, show the number of dependencies from each subproject",
20)
21parser.add_argument(
22    "--discover-cycles",
23    default=False,
24    action="store_true",
25    help="When true, find and display all project dependency cycles.  Note,"
26    "this option is very slow",
27)
28
29args = parser.parse_args()
30
31src_dir = os.path.join(lldb_root, "source")
32inc_dir = os.path.join(lldb_root, "include")
33
34src_map = {}
35
36include_regex = re.compile('#include "((lldb|Plugins|clang)(.*/)+).*"')
37
38
39def is_sublist(small, big):
40    it = iter(big)
41    return all(c in it for c in small)
42
43
44def normalize_host(str):
45    if str.startswith("lldb/Host"):
46        return "lldb/Host"
47    if str.startswith("Plugins"):
48        return "lldb/" + str
49    if str.startswith("lldb/../../source"):
50        return str.replace("lldb/../../source", "lldb")
51    return str
52
53
54def scan_deps(this_dir, file):
55    global src_map
56    deps = {}
57    this_dir = normalize_host(this_dir)
58    if this_dir in src_map:
59        deps = src_map[this_dir]
60
61    with open(file) as f:
62        for line in list(f):
63            m = include_regex.match(line)
64            if m is None:
65                continue
66            relative = m.groups()[0].rstrip("/")
67            if relative == this_dir:
68                continue
69            relative = normalize_host(relative)
70            if relative in deps:
71                deps[relative] += 1
72            elif relative != this_dir:
73                deps[relative] = 1
74    if this_dir not in src_map and len(deps) > 0:
75        src_map[this_dir] = deps
76
77
78for base, dirs, files in os.walk(inc_dir):
79    dir = os.path.basename(base)
80    relative = os.path.relpath(base, inc_dir)
81    inc_files = [x for x in files if os.path.splitext(x)[1] in [".h"]]
82    relative = relative.replace("\\", "/")
83    for inc in inc_files:
84        inc_path = os.path.join(base, inc)
85        scan_deps(relative, inc_path)
86
87for base, dirs, files in os.walk(src_dir):
88    dir = os.path.basename(base)
89    relative = os.path.relpath(base, src_dir)
90    src_files = [x for x in files if os.path.splitext(x)[1] in [".cpp", ".h", ".mm"]]
91    norm_base_path = os.path.normpath(os.path.join("lldb", relative))
92    norm_base_path = norm_base_path.replace("\\", "/")
93    for src in src_files:
94        src_path = os.path.join(base, src)
95        scan_deps(norm_base_path, src_path)
96    pass
97
98
99def is_existing_cycle(path, cycles):
100    # If we have a cycle like # A -> B -> C (with an implicit -> A at the end)
101    # then we don't just want to check for an occurrence of A -> B -> C in the
102    # list of known cycles, but every possible rotation of A -> B -> C.  For
103    # example, if we previously encountered B -> C -> A (with an implicit -> B
104    # at the end), then A -> B -> C is also a cycle.  This is an important
105    # optimization which reduces the search space by multiple orders of
106    # magnitude.
107    for i in range(0, len(path)):
108        if any(is_sublist(x, path) for x in cycles):
109            return True
110        path = [path[-1]] + path[0:-1]
111    return False
112
113
114def expand(path_queue, path_lengths, cycles, src_map):
115    # We do a breadth first search, to make sure we visit all paths in order
116    # of ascending length.  This is an important optimization to make sure that
117    # short cycles are discovered first, which will allow us to discard longer
118    # cycles which grow the search space exponentially the longer they get.
119    while len(path_queue) > 0:
120        cur_path = path_queue.pop(0)
121        if is_existing_cycle(cur_path, cycles):
122            continue
123
124        next_len = path_lengths.pop(0) + 1
125        last_component = cur_path[-1]
126
127        for item in src_map.get(last_component, []):
128            if item.startswith("clang"):
129                continue
130
131            if item in cur_path:
132                # This is a cycle.  Minimize it and then check if the result is
133                # already in the list of cycles.  Insert it (or not) and then
134                # exit.
135                new_index = cur_path.index(item)
136                cycle = cur_path[new_index:]
137                if not is_existing_cycle(cycle, cycles):
138                    cycles.append(cycle)
139                continue
140
141            path_lengths.append(next_len)
142            path_queue.append(cur_path + [item])
143    pass
144
145
146cycles = []
147
148path_queue = [[x] for x in iter(src_map)]
149path_lens = [1] * len(path_queue)
150
151items = list(src_map.items())
152items.sort(key=lambda A: A[0])
153
154for path, deps in items:
155    print(path + ":")
156    sorted_deps = list(deps.items())
157    if args.show_counts:
158        sorted_deps.sort(key=lambda A: (A[1], A[0]))
159        for dep in sorted_deps:
160            print("\t{} [{}]".format(dep[0], dep[1]))
161    else:
162        sorted_deps.sort(key=lambda A: A[0])
163        for dep in sorted_deps:
164            print("\t{}".format(dep[0]))
165
166
167def iter_cycles(cycles):
168    global src_map
169    for cycle in cycles:
170        cycle.append(cycle[0])
171        zipper = list(zip(cycle[0:-1], cycle[1:]))
172        result = [(x, src_map[x][y], y) for (x, y) in zipper]
173        total = 0
174        smallest = result[0][1]
175        for first, value, last in result:
176            total += value
177            smallest = min(smallest, value)
178        yield (total, smallest, result)
179
180
181if args.discover_cycles:
182    print("Analyzing cycles...")
183
184    expand(path_queue, path_lens, cycles, src_map)
185
186    average = sum([len(x) + 1 for x in cycles]) / len(cycles)
187
188    print("Found {} cycles.  Average cycle length = {}.".format(len(cycles), average))
189    counted = list(iter_cycles(cycles))
190    if args.show_counts:
191        counted.sort(key=lambda A: A[0])
192        for total, smallest, cycle in counted:
193            sys.stdout.write("{} deps to break: ".format(total))
194            sys.stdout.write(cycle[0][0])
195            for first, count, last in cycle:
196                sys.stdout.write(" [{}->] {}".format(count, last))
197            sys.stdout.write("\n")
198    else:
199        for cycle in cycles:
200            cycle.append(cycle[0])
201            print(" -> ".join(cycle))
202
203    print("Analyzing islands...")
204    islands = []
205    outgoing_counts = defaultdict(int)
206    incoming_counts = defaultdict(int)
207    for total, smallest, cycle in counted:
208        for first, count, last in cycle:
209            outgoing_counts[first] += count
210            incoming_counts[last] += count
211    for cycle in cycles:
212        this_cycle = set(cycle)
213        disjoints = [x for x in islands if this_cycle.isdisjoint(x)]
214        overlaps = [x for x in islands if not this_cycle.isdisjoint(x)]
215        islands = disjoints + [set.union(this_cycle, *overlaps)]
216    print("Found {} disjoint cycle islands...".format(len(islands)))
217    for island in islands:
218        print("Island ({} elements)".format(len(island)))
219        sorted = []
220        for node in island:
221            sorted.append((node, incoming_counts[node], outgoing_counts[node]))
222        sorted.sort(key=lambda x: x[1] + x[2])
223        for node, inc, outg in sorted:
224            print("  {} [{} in, {} out]".format(node, inc, outg))
225    sys.stdout.flush()
226pass
227