1#!/usr/bin/env python 2 3import argparse 4import itertools 5import os 6import re 7import sys 8from collections import defaultdict 9 10from use_lldb_suite import lldb_root 11 12parser = argparse.ArgumentParser( 13 description="Analyze LLDB project #include dependencies." 14) 15parser.add_argument( 16 "--show-counts", 17 default=False, 18 action="store_true", 19 help="When true, show the number of dependencies from each subproject", 20) 21parser.add_argument( 22 "--discover-cycles", 23 default=False, 24 action="store_true", 25 help="When true, find and display all project dependency cycles. Note," 26 "this option is very slow", 27) 28 29args = parser.parse_args() 30 31src_dir = os.path.join(lldb_root, "source") 32inc_dir = os.path.join(lldb_root, "include") 33 34src_map = {} 35 36include_regex = re.compile('#include "((lldb|Plugins|clang)(.*/)+).*"') 37 38 39def is_sublist(small, big): 40 it = iter(big) 41 return all(c in it for c in small) 42 43 44def normalize_host(str): 45 if str.startswith("lldb/Host"): 46 return "lldb/Host" 47 if str.startswith("Plugins"): 48 return "lldb/" + str 49 if str.startswith("lldb/../../source"): 50 return str.replace("lldb/../../source", "lldb") 51 return str 52 53 54def scan_deps(this_dir, file): 55 global src_map 56 deps = {} 57 this_dir = normalize_host(this_dir) 58 if this_dir in src_map: 59 deps = src_map[this_dir] 60 61 with open(file) as f: 62 for line in list(f): 63 m = include_regex.match(line) 64 if m is None: 65 continue 66 relative = m.groups()[0].rstrip("/") 67 if relative == this_dir: 68 continue 69 relative = normalize_host(relative) 70 if relative in deps: 71 deps[relative] += 1 72 elif relative != this_dir: 73 deps[relative] = 1 74 if this_dir not in src_map and len(deps) > 0: 75 src_map[this_dir] = deps 76 77 78for base, dirs, files in os.walk(inc_dir): 79 dir = os.path.basename(base) 80 relative = os.path.relpath(base, inc_dir) 81 inc_files = [x for x in files if os.path.splitext(x)[1] in [".h"]] 82 relative = relative.replace("\\", "/") 83 for inc in inc_files: 84 inc_path = os.path.join(base, inc) 85 scan_deps(relative, inc_path) 86 87for base, dirs, files in os.walk(src_dir): 88 dir = os.path.basename(base) 89 relative = os.path.relpath(base, src_dir) 90 src_files = [x for x in files if os.path.splitext(x)[1] in [".cpp", ".h", ".mm"]] 91 norm_base_path = os.path.normpath(os.path.join("lldb", relative)) 92 norm_base_path = norm_base_path.replace("\\", "/") 93 for src in src_files: 94 src_path = os.path.join(base, src) 95 scan_deps(norm_base_path, src_path) 96 pass 97 98 99def is_existing_cycle(path, cycles): 100 # If we have a cycle like # A -> B -> C (with an implicit -> A at the end) 101 # then we don't just want to check for an occurrence of A -> B -> C in the 102 # list of known cycles, but every possible rotation of A -> B -> C. For 103 # example, if we previously encountered B -> C -> A (with an implicit -> B 104 # at the end), then A -> B -> C is also a cycle. This is an important 105 # optimization which reduces the search space by multiple orders of 106 # magnitude. 107 for i in range(0, len(path)): 108 if any(is_sublist(x, path) for x in cycles): 109 return True 110 path = [path[-1]] + path[0:-1] 111 return False 112 113 114def expand(path_queue, path_lengths, cycles, src_map): 115 # We do a breadth first search, to make sure we visit all paths in order 116 # of ascending length. This is an important optimization to make sure that 117 # short cycles are discovered first, which will allow us to discard longer 118 # cycles which grow the search space exponentially the longer they get. 119 while len(path_queue) > 0: 120 cur_path = path_queue.pop(0) 121 if is_existing_cycle(cur_path, cycles): 122 continue 123 124 next_len = path_lengths.pop(0) + 1 125 last_component = cur_path[-1] 126 127 for item in src_map.get(last_component, []): 128 if item.startswith("clang"): 129 continue 130 131 if item in cur_path: 132 # This is a cycle. Minimize it and then check if the result is 133 # already in the list of cycles. Insert it (or not) and then 134 # exit. 135 new_index = cur_path.index(item) 136 cycle = cur_path[new_index:] 137 if not is_existing_cycle(cycle, cycles): 138 cycles.append(cycle) 139 continue 140 141 path_lengths.append(next_len) 142 path_queue.append(cur_path + [item]) 143 pass 144 145 146cycles = [] 147 148path_queue = [[x] for x in iter(src_map)] 149path_lens = [1] * len(path_queue) 150 151items = list(src_map.items()) 152items.sort(key=lambda A: A[0]) 153 154for path, deps in items: 155 print(path + ":") 156 sorted_deps = list(deps.items()) 157 if args.show_counts: 158 sorted_deps.sort(key=lambda A: (A[1], A[0])) 159 for dep in sorted_deps: 160 print("\t{} [{}]".format(dep[0], dep[1])) 161 else: 162 sorted_deps.sort(key=lambda A: A[0]) 163 for dep in sorted_deps: 164 print("\t{}".format(dep[0])) 165 166 167def iter_cycles(cycles): 168 global src_map 169 for cycle in cycles: 170 cycle.append(cycle[0]) 171 zipper = list(zip(cycle[0:-1], cycle[1:])) 172 result = [(x, src_map[x][y], y) for (x, y) in zipper] 173 total = 0 174 smallest = result[0][1] 175 for first, value, last in result: 176 total += value 177 smallest = min(smallest, value) 178 yield (total, smallest, result) 179 180 181if args.discover_cycles: 182 print("Analyzing cycles...") 183 184 expand(path_queue, path_lens, cycles, src_map) 185 186 average = sum([len(x) + 1 for x in cycles]) / len(cycles) 187 188 print("Found {} cycles. Average cycle length = {}.".format(len(cycles), average)) 189 counted = list(iter_cycles(cycles)) 190 if args.show_counts: 191 counted.sort(key=lambda A: A[0]) 192 for total, smallest, cycle in counted: 193 sys.stdout.write("{} deps to break: ".format(total)) 194 sys.stdout.write(cycle[0][0]) 195 for first, count, last in cycle: 196 sys.stdout.write(" [{}->] {}".format(count, last)) 197 sys.stdout.write("\n") 198 else: 199 for cycle in cycles: 200 cycle.append(cycle[0]) 201 print(" -> ".join(cycle)) 202 203 print("Analyzing islands...") 204 islands = [] 205 outgoing_counts = defaultdict(int) 206 incoming_counts = defaultdict(int) 207 for total, smallest, cycle in counted: 208 for first, count, last in cycle: 209 outgoing_counts[first] += count 210 incoming_counts[last] += count 211 for cycle in cycles: 212 this_cycle = set(cycle) 213 disjoints = [x for x in islands if this_cycle.isdisjoint(x)] 214 overlaps = [x for x in islands if not this_cycle.isdisjoint(x)] 215 islands = disjoints + [set.union(this_cycle, *overlaps)] 216 print("Found {} disjoint cycle islands...".format(len(islands))) 217 for island in islands: 218 print("Island ({} elements)".format(len(island))) 219 sorted = [] 220 for node in island: 221 sorted.append((node, incoming_counts[node], outgoing_counts[node])) 222 sorted.sort(key=lambda x: x[1] + x[2]) 223 for node, inc, outg in sorted: 224 print(" {} [{} in, {} out]".format(node, inc, outg)) 225 sys.stdout.flush() 226pass 227