1061da546Spatrick#!/usr/bin/env python 2061da546Spatrick 3061da546Spatrickimport argparse 4061da546Spatrickimport itertools 5061da546Spatrickimport os 6061da546Spatrickimport re 7061da546Spatrickimport sys 8061da546Spatrickfrom collections import defaultdict 9061da546Spatrick 10061da546Spatrickfrom use_lldb_suite import lldb_root 11061da546Spatrick 12061da546Spatrickparser = argparse.ArgumentParser( 13061da546Spatrick description='Analyze LLDB project #include dependencies.') 14061da546Spatrickparser.add_argument('--show-counts', default=False, action='store_true', 15061da546Spatrick help='When true, show the number of dependencies from each subproject') 16061da546Spatrickparser.add_argument('--discover-cycles', default=False, action='store_true', 17061da546Spatrick help='When true, find and display all project dependency cycles. Note,' 18061da546Spatrick 'this option is very slow') 19061da546Spatrick 20061da546Spatrickargs = parser.parse_args() 21061da546Spatrick 22061da546Spatricksrc_dir = os.path.join(lldb_root, "source") 23061da546Spatrickinc_dir = os.path.join(lldb_root, "include") 24061da546Spatrick 25061da546Spatricksrc_map = {} 26061da546Spatrick 27061da546Spatrickinclude_regex = re.compile('#include \"((lldb|Plugins|clang)(.*/)+).*\"') 28061da546Spatrick 29061da546Spatrickdef is_sublist(small, big): 30061da546Spatrick it = iter(big) 31061da546Spatrick return all(c in it for c in small) 32061da546Spatrick 33061da546Spatrickdef normalize_host(str): 34061da546Spatrick if str.startswith("lldb/Host"): 35061da546Spatrick return "lldb/Host" 36061da546Spatrick if str.startswith("Plugins"): 37061da546Spatrick return "lldb/" + str 38061da546Spatrick if str.startswith("lldb/../../source"): 39061da546Spatrick return str.replace("lldb/../../source", "lldb") 40061da546Spatrick return str 41061da546Spatrick 42061da546Spatrickdef scan_deps(this_dir, file): 43061da546Spatrick global src_map 44061da546Spatrick deps = {} 45061da546Spatrick this_dir = normalize_host(this_dir) 46061da546Spatrick if this_dir in src_map: 47061da546Spatrick deps = src_map[this_dir] 48061da546Spatrick 49061da546Spatrick with open(file) as f: 50061da546Spatrick for line in list(f): 51061da546Spatrick m = include_regex.match(line) 52061da546Spatrick if m is None: 53061da546Spatrick continue 54061da546Spatrick relative = m.groups()[0].rstrip("/") 55061da546Spatrick if relative == this_dir: 56061da546Spatrick continue 57061da546Spatrick relative = normalize_host(relative) 58061da546Spatrick if relative in deps: 59061da546Spatrick deps[relative] += 1 60061da546Spatrick elif relative != this_dir: 61061da546Spatrick deps[relative] = 1 62061da546Spatrick if this_dir not in src_map and len(deps) > 0: 63061da546Spatrick src_map[this_dir] = deps 64061da546Spatrick 65061da546Spatrickfor (base, dirs, files) in os.walk(inc_dir): 66061da546Spatrick dir = os.path.basename(base) 67061da546Spatrick relative = os.path.relpath(base, inc_dir) 68061da546Spatrick inc_files = [x for x in files if os.path.splitext(x)[1] in [".h"]] 69061da546Spatrick relative = relative.replace("\\", "/") 70061da546Spatrick for inc in inc_files: 71061da546Spatrick inc_path = os.path.join(base, inc) 72061da546Spatrick scan_deps(relative, inc_path) 73061da546Spatrick 74061da546Spatrickfor (base, dirs, files) in os.walk(src_dir): 75061da546Spatrick dir = os.path.basename(base) 76061da546Spatrick relative = os.path.relpath(base, src_dir) 77061da546Spatrick src_files = [x for x in files if os.path.splitext(x)[1] in [".cpp", ".h", ".mm"]] 78061da546Spatrick norm_base_path = os.path.normpath(os.path.join("lldb", relative)) 79061da546Spatrick norm_base_path = norm_base_path.replace("\\", "/") 80061da546Spatrick for src in src_files: 81061da546Spatrick src_path = os.path.join(base, src) 82061da546Spatrick scan_deps(norm_base_path, src_path) 83061da546Spatrick pass 84061da546Spatrick 85061da546Spatrickdef is_existing_cycle(path, cycles): 86061da546Spatrick # If we have a cycle like # A -> B -> C (with an implicit -> A at the end) 87061da546Spatrick # then we don't just want to check for an occurrence of A -> B -> C in the 88061da546Spatrick # list of known cycles, but every possible rotation of A -> B -> C. For 89061da546Spatrick # example, if we previously encountered B -> C -> A (with an implicit -> B 90061da546Spatrick # at the end), then A -> B -> C is also a cycle. This is an important 91061da546Spatrick # optimization which reduces the search space by multiple orders of 92061da546Spatrick # magnitude. 93061da546Spatrick for i in range(0,len(path)): 94061da546Spatrick if any(is_sublist(x, path) for x in cycles): 95061da546Spatrick return True 96061da546Spatrick path = [path[-1]] + path[0:-1] 97061da546Spatrick return False 98061da546Spatrick 99061da546Spatrickdef expand(path_queue, path_lengths, cycles, src_map): 100061da546Spatrick # We do a breadth first search, to make sure we visit all paths in order 101061da546Spatrick # of ascending length. This is an important optimization to make sure that 102061da546Spatrick # short cycles are discovered first, which will allow us to discard longer 103061da546Spatrick # cycles which grow the search space exponentially the longer they get. 104061da546Spatrick while len(path_queue) > 0: 105061da546Spatrick cur_path = path_queue.pop(0) 106061da546Spatrick if is_existing_cycle(cur_path, cycles): 107061da546Spatrick continue 108061da546Spatrick 109061da546Spatrick next_len = path_lengths.pop(0) + 1 110061da546Spatrick last_component = cur_path[-1] 111061da546Spatrick 112*dda28197Spatrick for item in src_map.get(last_component, []): 113061da546Spatrick if item.startswith("clang"): 114061da546Spatrick continue 115061da546Spatrick 116061da546Spatrick if item in cur_path: 117061da546Spatrick # This is a cycle. Minimize it and then check if the result is 118061da546Spatrick # already in the list of cycles. Insert it (or not) and then 119061da546Spatrick # exit. 120061da546Spatrick new_index = cur_path.index(item) 121061da546Spatrick cycle = cur_path[new_index:] 122061da546Spatrick if not is_existing_cycle(cycle, cycles): 123061da546Spatrick cycles.append(cycle) 124061da546Spatrick continue 125061da546Spatrick 126061da546Spatrick path_lengths.append(next_len) 127061da546Spatrick path_queue.append(cur_path + [item]) 128061da546Spatrick pass 129061da546Spatrick 130061da546Spatrickcycles = [] 131061da546Spatrick 132061da546Spatrickpath_queue = [[x] for x in iter(src_map)] 133061da546Spatrickpath_lens = [1] * len(path_queue) 134061da546Spatrick 135061da546Spatrickitems = list(src_map.items()) 136061da546Spatrickitems.sort(key = lambda A : A[0]) 137061da546Spatrick 138061da546Spatrickfor (path, deps) in items: 139061da546Spatrick print(path + ":") 140061da546Spatrick sorted_deps = list(deps.items()) 141061da546Spatrick if args.show_counts: 142061da546Spatrick sorted_deps.sort(key = lambda A: (A[1], A[0])) 143061da546Spatrick for dep in sorted_deps: 144061da546Spatrick print("\t{} [{}]".format(dep[0], dep[1])) 145061da546Spatrick else: 146061da546Spatrick sorted_deps.sort(key = lambda A: A[0]) 147061da546Spatrick for dep in sorted_deps: 148061da546Spatrick print("\t{}".format(dep[0])) 149061da546Spatrick 150061da546Spatrickdef iter_cycles(cycles): 151061da546Spatrick global src_map 152061da546Spatrick for cycle in cycles: 153061da546Spatrick cycle.append(cycle[0]) 154061da546Spatrick zipper = list(zip(cycle[0:-1], cycle[1:])) 155061da546Spatrick result = [(x, src_map[x][y], y) for (x,y) in zipper] 156061da546Spatrick total = 0 157061da546Spatrick smallest = result[0][1] 158061da546Spatrick for (first, value, last) in result: 159061da546Spatrick total += value 160061da546Spatrick smallest = min(smallest, value) 161061da546Spatrick yield (total, smallest, result) 162061da546Spatrick 163061da546Spatrickif args.discover_cycles: 164061da546Spatrick print("Analyzing cycles...") 165061da546Spatrick 166061da546Spatrick expand(path_queue, path_lens, cycles, src_map) 167061da546Spatrick 168061da546Spatrick average = sum([len(x)+1 for x in cycles]) / len(cycles) 169061da546Spatrick 170061da546Spatrick print("Found {} cycles. Average cycle length = {}.".format(len(cycles), average)) 171061da546Spatrick counted = list(iter_cycles(cycles)) 172061da546Spatrick if args.show_counts: 173061da546Spatrick counted.sort(key = lambda A: A[0]) 174061da546Spatrick for (total, smallest, cycle) in counted: 175061da546Spatrick sys.stdout.write("{} deps to break: ".format(total)) 176061da546Spatrick sys.stdout.write(cycle[0][0]) 177061da546Spatrick for (first, count, last) in cycle: 178061da546Spatrick sys.stdout.write(" [{}->] {}".format(count, last)) 179061da546Spatrick sys.stdout.write("\n") 180061da546Spatrick else: 181061da546Spatrick for cycle in cycles: 182061da546Spatrick cycle.append(cycle[0]) 183061da546Spatrick print(" -> ".join(cycle)) 184061da546Spatrick 185061da546Spatrick print("Analyzing islands...") 186061da546Spatrick islands = [] 187061da546Spatrick outgoing_counts = defaultdict(int) 188061da546Spatrick incoming_counts = defaultdict(int) 189061da546Spatrick for (total, smallest, cycle) in counted: 190061da546Spatrick for (first, count, last) in cycle: 191061da546Spatrick outgoing_counts[first] += count 192061da546Spatrick incoming_counts[last] += count 193061da546Spatrick for cycle in cycles: 194061da546Spatrick this_cycle = set(cycle) 195061da546Spatrick disjoints = [x for x in islands if this_cycle.isdisjoint(x)] 196061da546Spatrick overlaps = [x for x in islands if not this_cycle.isdisjoint(x)] 197061da546Spatrick islands = disjoints + [set.union(this_cycle, *overlaps)] 198061da546Spatrick print("Found {} disjoint cycle islands...".format(len(islands))) 199061da546Spatrick for island in islands: 200061da546Spatrick print("Island ({} elements)".format(len(island))) 201061da546Spatrick sorted = [] 202061da546Spatrick for node in island: 203061da546Spatrick sorted.append((node, incoming_counts[node], outgoing_counts[node])) 204061da546Spatrick sorted.sort(key = lambda x: x[1]+x[2]) 205061da546Spatrick for (node, inc, outg) in sorted: 206061da546Spatrick print(" {} [{} in, {} out]".format(node, inc, outg)) 207061da546Spatrick sys.stdout.flush() 208061da546Spatrickpass 209