xref: /openbsd-src/gnu/llvm/lldb/scripts/analyze-project-deps.py (revision be691f3bb6417f04a68938fadbcaee2d5795e764)
1061da546Spatrick#!/usr/bin/env python
2061da546Spatrick
3061da546Spatrickimport argparse
4061da546Spatrickimport itertools
5061da546Spatrickimport os
6061da546Spatrickimport re
7061da546Spatrickimport sys
8061da546Spatrickfrom collections import defaultdict
9061da546Spatrick
10061da546Spatrickfrom use_lldb_suite import lldb_root
11061da546Spatrick
12061da546Spatrickparser = argparse.ArgumentParser(
13061da546Spatrick    description='Analyze LLDB project #include dependencies.')
14061da546Spatrickparser.add_argument('--show-counts', default=False, action='store_true',
15061da546Spatrick    help='When true, show the number of dependencies from each subproject')
16061da546Spatrickparser.add_argument('--discover-cycles', default=False, action='store_true',
17061da546Spatrick    help='When true, find and display all project dependency cycles.  Note,'
18061da546Spatrick         'this option is very slow')
19061da546Spatrick
20061da546Spatrickargs = parser.parse_args()
21061da546Spatrick
22061da546Spatricksrc_dir = os.path.join(lldb_root, "source")
23061da546Spatrickinc_dir = os.path.join(lldb_root, "include")
24061da546Spatrick
25061da546Spatricksrc_map = {}
26061da546Spatrick
27061da546Spatrickinclude_regex = re.compile('#include \"((lldb|Plugins|clang)(.*/)+).*\"')
28061da546Spatrick
29061da546Spatrickdef is_sublist(small, big):
30061da546Spatrick    it = iter(big)
31061da546Spatrick    return all(c in it for c in small)
32061da546Spatrick
33061da546Spatrickdef normalize_host(str):
34061da546Spatrick    if str.startswith("lldb/Host"):
35061da546Spatrick        return "lldb/Host"
36061da546Spatrick    if str.startswith("Plugins"):
37061da546Spatrick        return "lldb/" + str
38061da546Spatrick    if str.startswith("lldb/../../source"):
39061da546Spatrick        return str.replace("lldb/../../source", "lldb")
40061da546Spatrick    return str
41061da546Spatrick
42061da546Spatrickdef scan_deps(this_dir, file):
43061da546Spatrick    global src_map
44061da546Spatrick    deps = {}
45061da546Spatrick    this_dir = normalize_host(this_dir)
46061da546Spatrick    if this_dir in src_map:
47061da546Spatrick        deps = src_map[this_dir]
48061da546Spatrick
49061da546Spatrick    with open(file) as f:
50061da546Spatrick        for line in list(f):
51061da546Spatrick            m = include_regex.match(line)
52061da546Spatrick            if m is None:
53061da546Spatrick                continue
54061da546Spatrick            relative = m.groups()[0].rstrip("/")
55061da546Spatrick            if relative == this_dir:
56061da546Spatrick                continue
57061da546Spatrick            relative = normalize_host(relative)
58061da546Spatrick            if relative in deps:
59061da546Spatrick                deps[relative] += 1
60061da546Spatrick            elif relative != this_dir:
61061da546Spatrick                deps[relative] = 1
62061da546Spatrick    if this_dir not in src_map and len(deps) > 0:
63061da546Spatrick        src_map[this_dir] = deps
64061da546Spatrick
65061da546Spatrickfor (base, dirs, files) in os.walk(inc_dir):
66061da546Spatrick    dir = os.path.basename(base)
67061da546Spatrick    relative = os.path.relpath(base, inc_dir)
68061da546Spatrick    inc_files = [x for x in files if os.path.splitext(x)[1] in [".h"]]
69061da546Spatrick    relative = relative.replace("\\", "/")
70061da546Spatrick    for inc in inc_files:
71061da546Spatrick        inc_path = os.path.join(base, inc)
72061da546Spatrick        scan_deps(relative, inc_path)
73061da546Spatrick
74061da546Spatrickfor (base, dirs, files) in os.walk(src_dir):
75061da546Spatrick    dir = os.path.basename(base)
76061da546Spatrick    relative = os.path.relpath(base, src_dir)
77061da546Spatrick    src_files = [x for x in files if os.path.splitext(x)[1] in [".cpp", ".h", ".mm"]]
78061da546Spatrick    norm_base_path = os.path.normpath(os.path.join("lldb", relative))
79061da546Spatrick    norm_base_path = norm_base_path.replace("\\", "/")
80061da546Spatrick    for src in src_files:
81061da546Spatrick        src_path = os.path.join(base, src)
82061da546Spatrick        scan_deps(norm_base_path, src_path)
83061da546Spatrick    pass
84061da546Spatrick
85061da546Spatrickdef is_existing_cycle(path, cycles):
86061da546Spatrick    # If we have a cycle like # A -> B -> C (with an implicit -> A at the end)
87061da546Spatrick    # then we don't just want to check for an occurrence of A -> B -> C in the
88061da546Spatrick    # list of known cycles, but every possible rotation of A -> B -> C.  For
89061da546Spatrick    # example, if we previously encountered B -> C -> A (with an implicit -> B
90061da546Spatrick    # at the end), then A -> B -> C is also a cycle.  This is an important
91061da546Spatrick    # optimization which reduces the search space by multiple orders of
92061da546Spatrick    # magnitude.
93061da546Spatrick    for i in range(0,len(path)):
94061da546Spatrick        if any(is_sublist(x, path) for x in cycles):
95061da546Spatrick            return True
96061da546Spatrick        path = [path[-1]] + path[0:-1]
97061da546Spatrick    return False
98061da546Spatrick
99061da546Spatrickdef expand(path_queue, path_lengths, cycles, src_map):
100061da546Spatrick    # We do a breadth first search, to make sure we visit all paths in order
101061da546Spatrick    # of ascending length.  This is an important optimization to make sure that
102061da546Spatrick    # short cycles are discovered first, which will allow us to discard longer
103061da546Spatrick    # cycles which grow the search space exponentially the longer they get.
104061da546Spatrick    while len(path_queue) > 0:
105061da546Spatrick        cur_path = path_queue.pop(0)
106061da546Spatrick        if is_existing_cycle(cur_path, cycles):
107061da546Spatrick            continue
108061da546Spatrick
109061da546Spatrick        next_len = path_lengths.pop(0) + 1
110061da546Spatrick        last_component = cur_path[-1]
111061da546Spatrick
112*dda28197Spatrick        for item in src_map.get(last_component, []):
113061da546Spatrick            if item.startswith("clang"):
114061da546Spatrick                continue
115061da546Spatrick
116061da546Spatrick            if item in cur_path:
117061da546Spatrick                # This is a cycle.  Minimize it and then check if the result is
118061da546Spatrick                # already in the list of cycles.  Insert it (or not) and then
119061da546Spatrick                # exit.
120061da546Spatrick                new_index = cur_path.index(item)
121061da546Spatrick                cycle = cur_path[new_index:]
122061da546Spatrick                if not is_existing_cycle(cycle, cycles):
123061da546Spatrick                    cycles.append(cycle)
124061da546Spatrick                continue
125061da546Spatrick
126061da546Spatrick            path_lengths.append(next_len)
127061da546Spatrick            path_queue.append(cur_path + [item])
128061da546Spatrick    pass
129061da546Spatrick
130061da546Spatrickcycles = []
131061da546Spatrick
132061da546Spatrickpath_queue = [[x] for x in iter(src_map)]
133061da546Spatrickpath_lens = [1] * len(path_queue)
134061da546Spatrick
135061da546Spatrickitems = list(src_map.items())
136061da546Spatrickitems.sort(key = lambda A : A[0])
137061da546Spatrick
138061da546Spatrickfor (path, deps) in items:
139061da546Spatrick    print(path + ":")
140061da546Spatrick    sorted_deps = list(deps.items())
141061da546Spatrick    if args.show_counts:
142061da546Spatrick        sorted_deps.sort(key = lambda A: (A[1], A[0]))
143061da546Spatrick        for dep in sorted_deps:
144061da546Spatrick            print("\t{} [{}]".format(dep[0], dep[1]))
145061da546Spatrick    else:
146061da546Spatrick        sorted_deps.sort(key = lambda A: A[0])
147061da546Spatrick        for dep in sorted_deps:
148061da546Spatrick            print("\t{}".format(dep[0]))
149061da546Spatrick
150061da546Spatrickdef iter_cycles(cycles):
151061da546Spatrick    global src_map
152061da546Spatrick    for cycle in cycles:
153061da546Spatrick        cycle.append(cycle[0])
154061da546Spatrick        zipper = list(zip(cycle[0:-1], cycle[1:]))
155061da546Spatrick        result = [(x, src_map[x][y], y) for (x,y) in zipper]
156061da546Spatrick        total = 0
157061da546Spatrick        smallest = result[0][1]
158061da546Spatrick        for (first, value, last) in result:
159061da546Spatrick            total += value
160061da546Spatrick            smallest = min(smallest, value)
161061da546Spatrick        yield (total, smallest, result)
162061da546Spatrick
163061da546Spatrickif args.discover_cycles:
164061da546Spatrick    print("Analyzing cycles...")
165061da546Spatrick
166061da546Spatrick    expand(path_queue, path_lens, cycles, src_map)
167061da546Spatrick
168061da546Spatrick    average = sum([len(x)+1 for x in cycles]) / len(cycles)
169061da546Spatrick
170061da546Spatrick    print("Found {} cycles.  Average cycle length = {}.".format(len(cycles), average))
171061da546Spatrick    counted = list(iter_cycles(cycles))
172061da546Spatrick    if args.show_counts:
173061da546Spatrick        counted.sort(key = lambda A: A[0])
174061da546Spatrick        for (total, smallest, cycle) in counted:
175061da546Spatrick            sys.stdout.write("{} deps to break: ".format(total))
176061da546Spatrick            sys.stdout.write(cycle[0][0])
177061da546Spatrick            for (first, count, last) in cycle:
178061da546Spatrick                sys.stdout.write(" [{}->] {}".format(count, last))
179061da546Spatrick            sys.stdout.write("\n")
180061da546Spatrick    else:
181061da546Spatrick        for cycle in cycles:
182061da546Spatrick            cycle.append(cycle[0])
183061da546Spatrick            print(" -> ".join(cycle))
184061da546Spatrick
185061da546Spatrick    print("Analyzing islands...")
186061da546Spatrick    islands = []
187061da546Spatrick    outgoing_counts = defaultdict(int)
188061da546Spatrick    incoming_counts = defaultdict(int)
189061da546Spatrick    for (total, smallest, cycle) in counted:
190061da546Spatrick        for (first, count, last) in cycle:
191061da546Spatrick            outgoing_counts[first] += count
192061da546Spatrick            incoming_counts[last] += count
193061da546Spatrick    for cycle in cycles:
194061da546Spatrick        this_cycle = set(cycle)
195061da546Spatrick        disjoints = [x for x in islands if this_cycle.isdisjoint(x)]
196061da546Spatrick        overlaps = [x for x in islands if not this_cycle.isdisjoint(x)]
197061da546Spatrick        islands = disjoints + [set.union(this_cycle, *overlaps)]
198061da546Spatrick    print("Found {} disjoint cycle islands...".format(len(islands)))
199061da546Spatrick    for island in islands:
200061da546Spatrick        print("Island ({} elements)".format(len(island)))
201061da546Spatrick        sorted = []
202061da546Spatrick        for node in island:
203061da546Spatrick            sorted.append((node, incoming_counts[node], outgoing_counts[node]))
204061da546Spatrick        sorted.sort(key = lambda x: x[1]+x[2])
205061da546Spatrick        for (node, inc, outg) in sorted:
206061da546Spatrick            print("  {} [{} in, {} out]".format(node, inc, outg))
207061da546Spatrick    sys.stdout.flush()
208061da546Spatrickpass
209