140d05cc1SZachary Turner# Given a path to llvm-objdump and a directory tree, spider the directory tree 240d05cc1SZachary Turner# dumping every object file encountered with correct options needed to demangle 340d05cc1SZachary Turner# symbols in the object file, and collect statistics about failed / crashed 440d05cc1SZachary Turner# demanglings. Useful for stress testing the demangler against a large corpus 540d05cc1SZachary Turner# of inputs. 640d05cc1SZachary Turner 74a27478aSSerge Gueltonfrom __future__ import print_function 84a27478aSSerge Guelton 940d05cc1SZachary Turnerimport argparse 1040d05cc1SZachary Turnerimport functools 1140d05cc1SZachary Turnerimport os 1240d05cc1SZachary Turnerimport re 1340d05cc1SZachary Turnerimport sys 1440d05cc1SZachary Turnerimport subprocess 1540d05cc1SZachary Turnerimport traceback 1640d05cc1SZachary Turnerfrom multiprocessing import Pool 1740d05cc1SZachary Turnerimport multiprocessing 1840d05cc1SZachary Turner 1940d05cc1SZachary Turnerargs = None 2040d05cc1SZachary Turner 21*b71edfaaSTobias Hieta 2240d05cc1SZachary Turnerdef parse_line(line): 23*b71edfaaSTobias Hieta question = line.find("?") 2440d05cc1SZachary Turner if question == -1: 2540d05cc1SZachary Turner return None, None 2640d05cc1SZachary Turner 27*b71edfaaSTobias Hieta open_paren = line.find("(", question) 2840d05cc1SZachary Turner if open_paren == -1: 2940d05cc1SZachary Turner return None, None 30*b71edfaaSTobias Hieta close_paren = line.rfind(")", open_paren) 3140d05cc1SZachary Turner if open_paren == -1: 3240d05cc1SZachary Turner return None, None 3340d05cc1SZachary Turner mangled = line[question:open_paren] 3440d05cc1SZachary Turner demangled = line[open_paren + 1 : close_paren] 3540d05cc1SZachary Turner return mangled.strip(), demangled.strip() 3640d05cc1SZachary Turner 37*b71edfaaSTobias Hieta 3840d05cc1SZachary Turnerclass Result(object): 3940d05cc1SZachary Turner def __init__(self): 4040d05cc1SZachary Turner self.crashed = [] 4140d05cc1SZachary Turner self.file = None 4240d05cc1SZachary Turner self.nsymbols = 0 4340d05cc1SZachary Turner self.errors = set() 4440d05cc1SZachary Turner self.nfiles = 0 4540d05cc1SZachary Turner 46*b71edfaaSTobias Hieta 4740d05cc1SZachary Turnerclass MapContext(object): 4840d05cc1SZachary Turner def __init__(self): 4940d05cc1SZachary Turner self.rincomplete = None 5040d05cc1SZachary Turner self.rcumulative = Result() 5140d05cc1SZachary Turner self.pending_objs = [] 5240d05cc1SZachary Turner self.npending = 0 5340d05cc1SZachary Turner 54*b71edfaaSTobias Hieta 5540d05cc1SZachary Turnerdef process_file(path, objdump): 5640d05cc1SZachary Turner r = Result() 5740d05cc1SZachary Turner r.file = path 5840d05cc1SZachary Turner 59*b71edfaaSTobias Hieta popen_args = [objdump, "-t", "-demangle", path] 6040d05cc1SZachary Turner p = subprocess.Popen(popen_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 6140d05cc1SZachary Turner stdout, stderr = p.communicate() 6240d05cc1SZachary Turner if p.returncode != 0: 6340d05cc1SZachary Turner r.crashed = [r.file] 6440d05cc1SZachary Turner return r 6540d05cc1SZachary Turner 66*b71edfaaSTobias Hieta output = stdout.decode("utf-8") 6740d05cc1SZachary Turner 6840d05cc1SZachary Turner for line in output.splitlines(): 6940d05cc1SZachary Turner mangled, demangled = parse_line(line) 7040d05cc1SZachary Turner if mangled is None: 7140d05cc1SZachary Turner continue 7240d05cc1SZachary Turner r.nsymbols += 1 7340d05cc1SZachary Turner if "invalid mangled name" in demangled: 7440d05cc1SZachary Turner r.errors.add(mangled) 7540d05cc1SZachary Turner return r 7640d05cc1SZachary Turner 77*b71edfaaSTobias Hieta 7840d05cc1SZachary Turnerdef add_results(r1, r2): 7940d05cc1SZachary Turner r1.crashed.extend(r2.crashed) 8040d05cc1SZachary Turner r1.errors.update(r2.errors) 8140d05cc1SZachary Turner r1.nsymbols += r2.nsymbols 8240d05cc1SZachary Turner r1.nfiles += r2.nfiles 8340d05cc1SZachary Turner 84*b71edfaaSTobias Hieta 8540d05cc1SZachary Turnerdef print_result_row(directory, result): 86*b71edfaaSTobias Hieta print( 87*b71edfaaSTobias Hieta "[{0} files, {1} crashes, {2} errors, {3} symbols]: '{4}'".format( 88*b71edfaaSTobias Hieta result.nfiles, 89*b71edfaaSTobias Hieta len(result.crashed), 90*b71edfaaSTobias Hieta len(result.errors), 91*b71edfaaSTobias Hieta result.nsymbols, 92*b71edfaaSTobias Hieta directory, 93*b71edfaaSTobias Hieta ) 94*b71edfaaSTobias Hieta ) 95*b71edfaaSTobias Hieta 9640d05cc1SZachary Turner 9740d05cc1SZachary Turnerdef process_one_chunk(pool, chunk_size, objdump, context): 9840d05cc1SZachary Turner objs = [] 9940d05cc1SZachary Turner 10040d05cc1SZachary Turner incomplete = False 10140d05cc1SZachary Turner dir_results = {} 10240d05cc1SZachary Turner ordered_dirs = [] 10340d05cc1SZachary Turner while context.npending > 0 and len(objs) < chunk_size: 10440d05cc1SZachary Turner this_dir = context.pending_objs[0][0] 10540d05cc1SZachary Turner ordered_dirs.append(this_dir) 10640d05cc1SZachary Turner re = Result() 10740d05cc1SZachary Turner if context.rincomplete is not None: 10840d05cc1SZachary Turner re = context.rincomplete 10940d05cc1SZachary Turner context.rincomplete = None 11040d05cc1SZachary Turner 11140d05cc1SZachary Turner dir_results[this_dir] = re 11240d05cc1SZachary Turner re.file = this_dir 11340d05cc1SZachary Turner 11440d05cc1SZachary Turner nneeded = chunk_size - len(objs) 11540d05cc1SZachary Turner objs_this_dir = context.pending_objs[0][1] 11640d05cc1SZachary Turner navail = len(objs_this_dir) 11740d05cc1SZachary Turner ntaken = min(nneeded, navail) 11840d05cc1SZachary Turner objs.extend(objs_this_dir[0:ntaken]) 11940d05cc1SZachary Turner remaining_objs_this_dir = objs_this_dir[ntaken:] 12040d05cc1SZachary Turner context.pending_objs[0] = (context.pending_objs[0][0], remaining_objs_this_dir) 12140d05cc1SZachary Turner context.npending -= ntaken 12240d05cc1SZachary Turner if ntaken == navail: 12340d05cc1SZachary Turner context.pending_objs.pop(0) 12440d05cc1SZachary Turner else: 12540d05cc1SZachary Turner incomplete = True 12640d05cc1SZachary Turner 12740d05cc1SZachary Turner re.nfiles += ntaken 12840d05cc1SZachary Turner 129*b71edfaaSTobias Hieta assert len(objs) == chunk_size or context.npending == 0 13040d05cc1SZachary Turner 13140d05cc1SZachary Turner copier = functools.partial(process_file, objdump=objdump) 13240d05cc1SZachary Turner mapped_results = list(pool.map(copier, objs)) 13340d05cc1SZachary Turner 13440d05cc1SZachary Turner for mr in mapped_results: 13540d05cc1SZachary Turner result_dir = os.path.dirname(mr.file) 13640d05cc1SZachary Turner result_entry = dir_results[result_dir] 13740d05cc1SZachary Turner add_results(result_entry, mr) 13840d05cc1SZachary Turner 13940d05cc1SZachary Turner # It's only possible that a single item is incomplete, and it has to be the 14040d05cc1SZachary Turner # last item. 14140d05cc1SZachary Turner if incomplete: 14240d05cc1SZachary Turner context.rincomplete = dir_results[ordered_dirs[-1]] 14340d05cc1SZachary Turner ordered_dirs.pop() 14440d05cc1SZachary Turner 14540d05cc1SZachary Turner # Now ordered_dirs contains a list of all directories which *did* complete. 14640d05cc1SZachary Turner for c in ordered_dirs: 14740d05cc1SZachary Turner re = dir_results[c] 14840d05cc1SZachary Turner add_results(context.rcumulative, re) 14940d05cc1SZachary Turner print_result_row(c, re) 15040d05cc1SZachary Turner 151*b71edfaaSTobias Hieta 15240d05cc1SZachary Turnerdef process_pending_files(pool, chunk_size, objdump, context): 15340d05cc1SZachary Turner while context.npending >= chunk_size: 15440d05cc1SZachary Turner process_one_chunk(pool, chunk_size, objdump, context) 15540d05cc1SZachary Turner 156*b71edfaaSTobias Hieta 15740d05cc1SZachary Turnerdef go(): 15840d05cc1SZachary Turner global args 15940d05cc1SZachary Turner 16040d05cc1SZachary Turner obj_dir = args.dir 161*b71edfaaSTobias Hieta extensions = args.extensions.split(",") 162*b71edfaaSTobias Hieta extensions = [x if x[0] == "." else "." + x for x in extensions] 16340d05cc1SZachary Turner 16440d05cc1SZachary Turner pool_size = 48 16540d05cc1SZachary Turner pool = Pool(processes=pool_size) 16640d05cc1SZachary Turner 16740d05cc1SZachary Turner try: 16840d05cc1SZachary Turner nfiles = 0 16940d05cc1SZachary Turner context = MapContext() 17040d05cc1SZachary Turner 17140d05cc1SZachary Turner for root, dirs, files in os.walk(obj_dir): 17240d05cc1SZachary Turner root = os.path.normpath(root) 17340d05cc1SZachary Turner pending = [] 17440d05cc1SZachary Turner for f in files: 17540d05cc1SZachary Turner file, ext = os.path.splitext(f) 17640d05cc1SZachary Turner if not ext in extensions: 17740d05cc1SZachary Turner continue 17840d05cc1SZachary Turner 17940d05cc1SZachary Turner nfiles += 1 18040d05cc1SZachary Turner full_path = os.path.join(root, f) 18140d05cc1SZachary Turner full_path = os.path.normpath(full_path) 18240d05cc1SZachary Turner pending.append(full_path) 18340d05cc1SZachary Turner 18440d05cc1SZachary Turner # If this directory had no object files, just print a default 18540d05cc1SZachary Turner # status line and continue with the next dir 18640d05cc1SZachary Turner if len(pending) == 0: 18740d05cc1SZachary Turner print_result_row(root, Result()) 18840d05cc1SZachary Turner continue 18940d05cc1SZachary Turner 19040d05cc1SZachary Turner context.npending += len(pending) 19140d05cc1SZachary Turner context.pending_objs.append((root, pending)) 19240d05cc1SZachary Turner # Drain the tasks, `pool_size` at a time, until we have less than 19340d05cc1SZachary Turner # `pool_size` tasks remaining. 19440d05cc1SZachary Turner process_pending_files(pool, pool_size, args.objdump, context) 19540d05cc1SZachary Turner 196*b71edfaaSTobias Hieta assert context.npending < pool_size 19740d05cc1SZachary Turner process_one_chunk(pool, pool_size, args.objdump, context) 19840d05cc1SZachary Turner 19940d05cc1SZachary Turner total = context.rcumulative 20040d05cc1SZachary Turner nfailed = len(total.errors) 20140d05cc1SZachary Turner nsuccess = total.nsymbols - nfailed 20240d05cc1SZachary Turner ncrashed = len(total.crashed) 20340d05cc1SZachary Turner 204*b71edfaaSTobias Hieta if nfailed > 0: 20540d05cc1SZachary Turner print("Failures:") 20640d05cc1SZachary Turner for m in sorted(total.errors): 20740d05cc1SZachary Turner print(" " + m) 208*b71edfaaSTobias Hieta if ncrashed > 0: 20940d05cc1SZachary Turner print("Crashes:") 21040d05cc1SZachary Turner for f in sorted(total.crashed): 21140d05cc1SZachary Turner print(" " + f) 21240d05cc1SZachary Turner print("Summary:") 21340d05cc1SZachary Turner spct = float(nsuccess) / float(total.nsymbols) 21440d05cc1SZachary Turner fpct = float(nfailed) / float(total.nsymbols) 21540d05cc1SZachary Turner cpct = float(ncrashed) / float(nfiles) 21640d05cc1SZachary Turner print("Processed {0} object files.".format(nfiles)) 217*b71edfaaSTobias Hieta print( 218*b71edfaaSTobias Hieta "{0}/{1} symbols successfully demangled ({2:.4%})".format( 219*b71edfaaSTobias Hieta nsuccess, total.nsymbols, spct 220*b71edfaaSTobias Hieta ) 221*b71edfaaSTobias Hieta ) 22240d05cc1SZachary Turner print("{0} symbols could not be demangled ({1:.4%})".format(nfailed, fpct)) 22340d05cc1SZachary Turner print("{0} files crashed while demangling ({1:.4%})".format(ncrashed, cpct)) 22440d05cc1SZachary Turner 22540d05cc1SZachary Turner except: 22640d05cc1SZachary Turner traceback.print_exc() 22740d05cc1SZachary Turner 22840d05cc1SZachary Turner pool.close() 22940d05cc1SZachary Turner pool.join() 23040d05cc1SZachary Turner 23140d05cc1SZachary Turner 232*b71edfaaSTobias Hietaif __name__ == "__main__": 233*b71edfaaSTobias Hieta def_obj = "obj" if sys.platform == "win32" else "o" 234*b71edfaaSTobias Hieta 235*b71edfaaSTobias Hieta parser = argparse.ArgumentParser( 236*b71edfaaSTobias Hieta description="Demangle all symbols in a tree of object files, looking for failures." 237*b71edfaaSTobias Hieta ) 238*b71edfaaSTobias Hieta parser.add_argument( 239*b71edfaaSTobias Hieta "dir", type=str, help="the root directory at which to start crawling" 240*b71edfaaSTobias Hieta ) 241*b71edfaaSTobias Hieta parser.add_argument( 242*b71edfaaSTobias Hieta "--objdump", 243*b71edfaaSTobias Hieta type=str, 244*b71edfaaSTobias Hieta help="path to llvm-objdump. If not specified " 245*b71edfaaSTobias Hieta + "the tool is located as if by `which llvm-objdump`.", 246*b71edfaaSTobias Hieta ) 247*b71edfaaSTobias Hieta parser.add_argument( 248*b71edfaaSTobias Hieta "--extensions", 249*b71edfaaSTobias Hieta type=str, 250*b71edfaaSTobias Hieta default=def_obj, 251*b71edfaaSTobias Hieta help="comma separated list of extensions to demangle (e.g. `o,obj`). " 252*b71edfaaSTobias Hieta + "By default this will be `obj` on Windows and `o` otherwise.", 253*b71edfaaSTobias Hieta ) 25440d05cc1SZachary Turner 25540d05cc1SZachary Turner args = parser.parse_args() 25640d05cc1SZachary Turner 25740d05cc1SZachary Turner multiprocessing.freeze_support() 25840d05cc1SZachary Turner go() 259