1# Given a path to llvm-objdump and a directory tree, spider the directory tree 2# dumping every object file encountered with correct options needed to demangle 3# symbols in the object file, and collect statistics about failed / crashed 4# demanglings. Useful for stress testing the demangler against a large corpus 5# of inputs. 6 7from __future__ import print_function 8 9import argparse 10import functools 11import os 12import re 13import sys 14import subprocess 15import traceback 16from multiprocessing import Pool 17import multiprocessing 18 19args = None 20 21 22def parse_line(line): 23 question = line.find("?") 24 if question == -1: 25 return None, None 26 27 open_paren = line.find("(", question) 28 if open_paren == -1: 29 return None, None 30 close_paren = line.rfind(")", open_paren) 31 if open_paren == -1: 32 return None, None 33 mangled = line[question:open_paren] 34 demangled = line[open_paren + 1 : close_paren] 35 return mangled.strip(), demangled.strip() 36 37 38class Result(object): 39 def __init__(self): 40 self.crashed = [] 41 self.file = None 42 self.nsymbols = 0 43 self.errors = set() 44 self.nfiles = 0 45 46 47class MapContext(object): 48 def __init__(self): 49 self.rincomplete = None 50 self.rcumulative = Result() 51 self.pending_objs = [] 52 self.npending = 0 53 54 55def process_file(path, objdump): 56 r = Result() 57 r.file = path 58 59 popen_args = [objdump, "-t", "-demangle", path] 60 p = subprocess.Popen(popen_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 61 stdout, stderr = p.communicate() 62 if p.returncode != 0: 63 r.crashed = [r.file] 64 return r 65 66 output = stdout.decode("utf-8") 67 68 for line in output.splitlines(): 69 mangled, demangled = parse_line(line) 70 if mangled is None: 71 continue 72 r.nsymbols += 1 73 if "invalid mangled name" in demangled: 74 r.errors.add(mangled) 75 return r 76 77 78def add_results(r1, r2): 79 r1.crashed.extend(r2.crashed) 80 r1.errors.update(r2.errors) 81 r1.nsymbols += r2.nsymbols 82 r1.nfiles += r2.nfiles 83 84 85def print_result_row(directory, result): 86 print( 87 "[{0} files, {1} crashes, {2} errors, {3} symbols]: '{4}'".format( 88 result.nfiles, 89 len(result.crashed), 90 len(result.errors), 91 result.nsymbols, 92 directory, 93 ) 94 ) 95 96 97def process_one_chunk(pool, chunk_size, objdump, context): 98 objs = [] 99 100 incomplete = False 101 dir_results = {} 102 ordered_dirs = [] 103 while context.npending > 0 and len(objs) < chunk_size: 104 this_dir = context.pending_objs[0][0] 105 ordered_dirs.append(this_dir) 106 re = Result() 107 if context.rincomplete is not None: 108 re = context.rincomplete 109 context.rincomplete = None 110 111 dir_results[this_dir] = re 112 re.file = this_dir 113 114 nneeded = chunk_size - len(objs) 115 objs_this_dir = context.pending_objs[0][1] 116 navail = len(objs_this_dir) 117 ntaken = min(nneeded, navail) 118 objs.extend(objs_this_dir[0:ntaken]) 119 remaining_objs_this_dir = objs_this_dir[ntaken:] 120 context.pending_objs[0] = (context.pending_objs[0][0], remaining_objs_this_dir) 121 context.npending -= ntaken 122 if ntaken == navail: 123 context.pending_objs.pop(0) 124 else: 125 incomplete = True 126 127 re.nfiles += ntaken 128 129 assert len(objs) == chunk_size or context.npending == 0 130 131 copier = functools.partial(process_file, objdump=objdump) 132 mapped_results = list(pool.map(copier, objs)) 133 134 for mr in mapped_results: 135 result_dir = os.path.dirname(mr.file) 136 result_entry = dir_results[result_dir] 137 add_results(result_entry, mr) 138 139 # It's only possible that a single item is incomplete, and it has to be the 140 # last item. 141 if incomplete: 142 context.rincomplete = dir_results[ordered_dirs[-1]] 143 ordered_dirs.pop() 144 145 # Now ordered_dirs contains a list of all directories which *did* complete. 146 for c in ordered_dirs: 147 re = dir_results[c] 148 add_results(context.rcumulative, re) 149 print_result_row(c, re) 150 151 152def process_pending_files(pool, chunk_size, objdump, context): 153 while context.npending >= chunk_size: 154 process_one_chunk(pool, chunk_size, objdump, context) 155 156 157def go(): 158 global args 159 160 obj_dir = args.dir 161 extensions = args.extensions.split(",") 162 extensions = [x if x[0] == "." else "." + x for x in extensions] 163 164 pool_size = 48 165 pool = Pool(processes=pool_size) 166 167 try: 168 nfiles = 0 169 context = MapContext() 170 171 for root, dirs, files in os.walk(obj_dir): 172 root = os.path.normpath(root) 173 pending = [] 174 for f in files: 175 file, ext = os.path.splitext(f) 176 if not ext in extensions: 177 continue 178 179 nfiles += 1 180 full_path = os.path.join(root, f) 181 full_path = os.path.normpath(full_path) 182 pending.append(full_path) 183 184 # If this directory had no object files, just print a default 185 # status line and continue with the next dir 186 if len(pending) == 0: 187 print_result_row(root, Result()) 188 continue 189 190 context.npending += len(pending) 191 context.pending_objs.append((root, pending)) 192 # Drain the tasks, `pool_size` at a time, until we have less than 193 # `pool_size` tasks remaining. 194 process_pending_files(pool, pool_size, args.objdump, context) 195 196 assert context.npending < pool_size 197 process_one_chunk(pool, pool_size, args.objdump, context) 198 199 total = context.rcumulative 200 nfailed = len(total.errors) 201 nsuccess = total.nsymbols - nfailed 202 ncrashed = len(total.crashed) 203 204 if nfailed > 0: 205 print("Failures:") 206 for m in sorted(total.errors): 207 print(" " + m) 208 if ncrashed > 0: 209 print("Crashes:") 210 for f in sorted(total.crashed): 211 print(" " + f) 212 print("Summary:") 213 spct = float(nsuccess) / float(total.nsymbols) 214 fpct = float(nfailed) / float(total.nsymbols) 215 cpct = float(ncrashed) / float(nfiles) 216 print("Processed {0} object files.".format(nfiles)) 217 print( 218 "{0}/{1} symbols successfully demangled ({2:.4%})".format( 219 nsuccess, total.nsymbols, spct 220 ) 221 ) 222 print("{0} symbols could not be demangled ({1:.4%})".format(nfailed, fpct)) 223 print("{0} files crashed while demangling ({1:.4%})".format(ncrashed, cpct)) 224 225 except: 226 traceback.print_exc() 227 228 pool.close() 229 pool.join() 230 231 232if __name__ == "__main__": 233 def_obj = "obj" if sys.platform == "win32" else "o" 234 235 parser = argparse.ArgumentParser( 236 description="Demangle all symbols in a tree of object files, looking for failures." 237 ) 238 parser.add_argument( 239 "dir", type=str, help="the root directory at which to start crawling" 240 ) 241 parser.add_argument( 242 "--objdump", 243 type=str, 244 help="path to llvm-objdump. If not specified " 245 + "the tool is located as if by `which llvm-objdump`.", 246 ) 247 parser.add_argument( 248 "--extensions", 249 type=str, 250 default=def_obj, 251 help="comma separated list of extensions to demangle (e.g. `o,obj`). " 252 + "By default this will be `obj` on Windows and `o` otherwise.", 253 ) 254 255 args = parser.parse_args() 256 257 multiprocessing.freeze_support() 258 go() 259