xref: /llvm-project/llvm/utils/demangle_tree.py (revision b71edfaa4ec3c998aadb35255ce2f60bba2940b0)
140d05cc1SZachary Turner# Given a path to llvm-objdump and a directory tree, spider the directory tree
240d05cc1SZachary Turner# dumping every object file encountered with correct options needed to demangle
340d05cc1SZachary Turner# symbols in the object file, and collect statistics about failed / crashed
440d05cc1SZachary Turner# demanglings.  Useful for stress testing the demangler against a large corpus
540d05cc1SZachary Turner# of inputs.
640d05cc1SZachary Turner
74a27478aSSerge Gueltonfrom __future__ import print_function
84a27478aSSerge Guelton
940d05cc1SZachary Turnerimport argparse
1040d05cc1SZachary Turnerimport functools
1140d05cc1SZachary Turnerimport os
1240d05cc1SZachary Turnerimport re
1340d05cc1SZachary Turnerimport sys
1440d05cc1SZachary Turnerimport subprocess
1540d05cc1SZachary Turnerimport traceback
1640d05cc1SZachary Turnerfrom multiprocessing import Pool
1740d05cc1SZachary Turnerimport multiprocessing
1840d05cc1SZachary Turner
1940d05cc1SZachary Turnerargs = None
2040d05cc1SZachary Turner
21*b71edfaaSTobias Hieta
2240d05cc1SZachary Turnerdef parse_line(line):
23*b71edfaaSTobias Hieta    question = line.find("?")
2440d05cc1SZachary Turner    if question == -1:
2540d05cc1SZachary Turner        return None, None
2640d05cc1SZachary Turner
27*b71edfaaSTobias Hieta    open_paren = line.find("(", question)
2840d05cc1SZachary Turner    if open_paren == -1:
2940d05cc1SZachary Turner        return None, None
30*b71edfaaSTobias Hieta    close_paren = line.rfind(")", open_paren)
3140d05cc1SZachary Turner    if open_paren == -1:
3240d05cc1SZachary Turner        return None, None
3340d05cc1SZachary Turner    mangled = line[question:open_paren]
3440d05cc1SZachary Turner    demangled = line[open_paren + 1 : close_paren]
3540d05cc1SZachary Turner    return mangled.strip(), demangled.strip()
3640d05cc1SZachary Turner
37*b71edfaaSTobias Hieta
3840d05cc1SZachary Turnerclass Result(object):
3940d05cc1SZachary Turner    def __init__(self):
4040d05cc1SZachary Turner        self.crashed = []
4140d05cc1SZachary Turner        self.file = None
4240d05cc1SZachary Turner        self.nsymbols = 0
4340d05cc1SZachary Turner        self.errors = set()
4440d05cc1SZachary Turner        self.nfiles = 0
4540d05cc1SZachary Turner
46*b71edfaaSTobias Hieta
4740d05cc1SZachary Turnerclass MapContext(object):
4840d05cc1SZachary Turner    def __init__(self):
4940d05cc1SZachary Turner        self.rincomplete = None
5040d05cc1SZachary Turner        self.rcumulative = Result()
5140d05cc1SZachary Turner        self.pending_objs = []
5240d05cc1SZachary Turner        self.npending = 0
5340d05cc1SZachary Turner
54*b71edfaaSTobias Hieta
5540d05cc1SZachary Turnerdef process_file(path, objdump):
5640d05cc1SZachary Turner    r = Result()
5740d05cc1SZachary Turner    r.file = path
5840d05cc1SZachary Turner
59*b71edfaaSTobias Hieta    popen_args = [objdump, "-t", "-demangle", path]
6040d05cc1SZachary Turner    p = subprocess.Popen(popen_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
6140d05cc1SZachary Turner    stdout, stderr = p.communicate()
6240d05cc1SZachary Turner    if p.returncode != 0:
6340d05cc1SZachary Turner        r.crashed = [r.file]
6440d05cc1SZachary Turner        return r
6540d05cc1SZachary Turner
66*b71edfaaSTobias Hieta    output = stdout.decode("utf-8")
6740d05cc1SZachary Turner
6840d05cc1SZachary Turner    for line in output.splitlines():
6940d05cc1SZachary Turner        mangled, demangled = parse_line(line)
7040d05cc1SZachary Turner        if mangled is None:
7140d05cc1SZachary Turner            continue
7240d05cc1SZachary Turner        r.nsymbols += 1
7340d05cc1SZachary Turner        if "invalid mangled name" in demangled:
7440d05cc1SZachary Turner            r.errors.add(mangled)
7540d05cc1SZachary Turner    return r
7640d05cc1SZachary Turner
77*b71edfaaSTobias Hieta
7840d05cc1SZachary Turnerdef add_results(r1, r2):
7940d05cc1SZachary Turner    r1.crashed.extend(r2.crashed)
8040d05cc1SZachary Turner    r1.errors.update(r2.errors)
8140d05cc1SZachary Turner    r1.nsymbols += r2.nsymbols
8240d05cc1SZachary Turner    r1.nfiles += r2.nfiles
8340d05cc1SZachary Turner
84*b71edfaaSTobias Hieta
8540d05cc1SZachary Turnerdef print_result_row(directory, result):
86*b71edfaaSTobias Hieta    print(
87*b71edfaaSTobias Hieta        "[{0} files, {1} crashes, {2} errors, {3} symbols]: '{4}'".format(
88*b71edfaaSTobias Hieta            result.nfiles,
89*b71edfaaSTobias Hieta            len(result.crashed),
90*b71edfaaSTobias Hieta            len(result.errors),
91*b71edfaaSTobias Hieta            result.nsymbols,
92*b71edfaaSTobias Hieta            directory,
93*b71edfaaSTobias Hieta        )
94*b71edfaaSTobias Hieta    )
95*b71edfaaSTobias Hieta
9640d05cc1SZachary Turner
9740d05cc1SZachary Turnerdef process_one_chunk(pool, chunk_size, objdump, context):
9840d05cc1SZachary Turner    objs = []
9940d05cc1SZachary Turner
10040d05cc1SZachary Turner    incomplete = False
10140d05cc1SZachary Turner    dir_results = {}
10240d05cc1SZachary Turner    ordered_dirs = []
10340d05cc1SZachary Turner    while context.npending > 0 and len(objs) < chunk_size:
10440d05cc1SZachary Turner        this_dir = context.pending_objs[0][0]
10540d05cc1SZachary Turner        ordered_dirs.append(this_dir)
10640d05cc1SZachary Turner        re = Result()
10740d05cc1SZachary Turner        if context.rincomplete is not None:
10840d05cc1SZachary Turner            re = context.rincomplete
10940d05cc1SZachary Turner            context.rincomplete = None
11040d05cc1SZachary Turner
11140d05cc1SZachary Turner        dir_results[this_dir] = re
11240d05cc1SZachary Turner        re.file = this_dir
11340d05cc1SZachary Turner
11440d05cc1SZachary Turner        nneeded = chunk_size - len(objs)
11540d05cc1SZachary Turner        objs_this_dir = context.pending_objs[0][1]
11640d05cc1SZachary Turner        navail = len(objs_this_dir)
11740d05cc1SZachary Turner        ntaken = min(nneeded, navail)
11840d05cc1SZachary Turner        objs.extend(objs_this_dir[0:ntaken])
11940d05cc1SZachary Turner        remaining_objs_this_dir = objs_this_dir[ntaken:]
12040d05cc1SZachary Turner        context.pending_objs[0] = (context.pending_objs[0][0], remaining_objs_this_dir)
12140d05cc1SZachary Turner        context.npending -= ntaken
12240d05cc1SZachary Turner        if ntaken == navail:
12340d05cc1SZachary Turner            context.pending_objs.pop(0)
12440d05cc1SZachary Turner        else:
12540d05cc1SZachary Turner            incomplete = True
12640d05cc1SZachary Turner
12740d05cc1SZachary Turner        re.nfiles += ntaken
12840d05cc1SZachary Turner
129*b71edfaaSTobias Hieta    assert len(objs) == chunk_size or context.npending == 0
13040d05cc1SZachary Turner
13140d05cc1SZachary Turner    copier = functools.partial(process_file, objdump=objdump)
13240d05cc1SZachary Turner    mapped_results = list(pool.map(copier, objs))
13340d05cc1SZachary Turner
13440d05cc1SZachary Turner    for mr in mapped_results:
13540d05cc1SZachary Turner        result_dir = os.path.dirname(mr.file)
13640d05cc1SZachary Turner        result_entry = dir_results[result_dir]
13740d05cc1SZachary Turner        add_results(result_entry, mr)
13840d05cc1SZachary Turner
13940d05cc1SZachary Turner    # It's only possible that a single item is incomplete, and it has to be the
14040d05cc1SZachary Turner    # last item.
14140d05cc1SZachary Turner    if incomplete:
14240d05cc1SZachary Turner        context.rincomplete = dir_results[ordered_dirs[-1]]
14340d05cc1SZachary Turner        ordered_dirs.pop()
14440d05cc1SZachary Turner
14540d05cc1SZachary Turner    # Now ordered_dirs contains a list of all directories which *did* complete.
14640d05cc1SZachary Turner    for c in ordered_dirs:
14740d05cc1SZachary Turner        re = dir_results[c]
14840d05cc1SZachary Turner        add_results(context.rcumulative, re)
14940d05cc1SZachary Turner        print_result_row(c, re)
15040d05cc1SZachary Turner
151*b71edfaaSTobias Hieta
15240d05cc1SZachary Turnerdef process_pending_files(pool, chunk_size, objdump, context):
15340d05cc1SZachary Turner    while context.npending >= chunk_size:
15440d05cc1SZachary Turner        process_one_chunk(pool, chunk_size, objdump, context)
15540d05cc1SZachary Turner
156*b71edfaaSTobias Hieta
15740d05cc1SZachary Turnerdef go():
15840d05cc1SZachary Turner    global args
15940d05cc1SZachary Turner
16040d05cc1SZachary Turner    obj_dir = args.dir
161*b71edfaaSTobias Hieta    extensions = args.extensions.split(",")
162*b71edfaaSTobias Hieta    extensions = [x if x[0] == "." else "." + x for x in extensions]
16340d05cc1SZachary Turner
16440d05cc1SZachary Turner    pool_size = 48
16540d05cc1SZachary Turner    pool = Pool(processes=pool_size)
16640d05cc1SZachary Turner
16740d05cc1SZachary Turner    try:
16840d05cc1SZachary Turner        nfiles = 0
16940d05cc1SZachary Turner        context = MapContext()
17040d05cc1SZachary Turner
17140d05cc1SZachary Turner        for root, dirs, files in os.walk(obj_dir):
17240d05cc1SZachary Turner            root = os.path.normpath(root)
17340d05cc1SZachary Turner            pending = []
17440d05cc1SZachary Turner            for f in files:
17540d05cc1SZachary Turner                file, ext = os.path.splitext(f)
17640d05cc1SZachary Turner                if not ext in extensions:
17740d05cc1SZachary Turner                    continue
17840d05cc1SZachary Turner
17940d05cc1SZachary Turner                nfiles += 1
18040d05cc1SZachary Turner                full_path = os.path.join(root, f)
18140d05cc1SZachary Turner                full_path = os.path.normpath(full_path)
18240d05cc1SZachary Turner                pending.append(full_path)
18340d05cc1SZachary Turner
18440d05cc1SZachary Turner            # If this directory had no object files, just print a default
18540d05cc1SZachary Turner            # status line and continue with the next dir
18640d05cc1SZachary Turner            if len(pending) == 0:
18740d05cc1SZachary Turner                print_result_row(root, Result())
18840d05cc1SZachary Turner                continue
18940d05cc1SZachary Turner
19040d05cc1SZachary Turner            context.npending += len(pending)
19140d05cc1SZachary Turner            context.pending_objs.append((root, pending))
19240d05cc1SZachary Turner            # Drain the tasks, `pool_size` at a time, until we have less than
19340d05cc1SZachary Turner            # `pool_size` tasks remaining.
19440d05cc1SZachary Turner            process_pending_files(pool, pool_size, args.objdump, context)
19540d05cc1SZachary Turner
196*b71edfaaSTobias Hieta        assert context.npending < pool_size
19740d05cc1SZachary Turner        process_one_chunk(pool, pool_size, args.objdump, context)
19840d05cc1SZachary Turner
19940d05cc1SZachary Turner        total = context.rcumulative
20040d05cc1SZachary Turner        nfailed = len(total.errors)
20140d05cc1SZachary Turner        nsuccess = total.nsymbols - nfailed
20240d05cc1SZachary Turner        ncrashed = len(total.crashed)
20340d05cc1SZachary Turner
204*b71edfaaSTobias Hieta        if nfailed > 0:
20540d05cc1SZachary Turner            print("Failures:")
20640d05cc1SZachary Turner            for m in sorted(total.errors):
20740d05cc1SZachary Turner                print("  " + m)
208*b71edfaaSTobias Hieta        if ncrashed > 0:
20940d05cc1SZachary Turner            print("Crashes:")
21040d05cc1SZachary Turner            for f in sorted(total.crashed):
21140d05cc1SZachary Turner                print("  " + f)
21240d05cc1SZachary Turner        print("Summary:")
21340d05cc1SZachary Turner        spct = float(nsuccess) / float(total.nsymbols)
21440d05cc1SZachary Turner        fpct = float(nfailed) / float(total.nsymbols)
21540d05cc1SZachary Turner        cpct = float(ncrashed) / float(nfiles)
21640d05cc1SZachary Turner        print("Processed {0} object files.".format(nfiles))
217*b71edfaaSTobias Hieta        print(
218*b71edfaaSTobias Hieta            "{0}/{1} symbols successfully demangled ({2:.4%})".format(
219*b71edfaaSTobias Hieta                nsuccess, total.nsymbols, spct
220*b71edfaaSTobias Hieta            )
221*b71edfaaSTobias Hieta        )
22240d05cc1SZachary Turner        print("{0} symbols could not be demangled ({1:.4%})".format(nfailed, fpct))
22340d05cc1SZachary Turner        print("{0} files crashed while demangling ({1:.4%})".format(ncrashed, cpct))
22440d05cc1SZachary Turner
22540d05cc1SZachary Turner    except:
22640d05cc1SZachary Turner        traceback.print_exc()
22740d05cc1SZachary Turner
22840d05cc1SZachary Turner    pool.close()
22940d05cc1SZachary Turner    pool.join()
23040d05cc1SZachary Turner
23140d05cc1SZachary Turner
232*b71edfaaSTobias Hietaif __name__ == "__main__":
233*b71edfaaSTobias Hieta    def_obj = "obj" if sys.platform == "win32" else "o"
234*b71edfaaSTobias Hieta
235*b71edfaaSTobias Hieta    parser = argparse.ArgumentParser(
236*b71edfaaSTobias Hieta        description="Demangle all symbols in a tree of object files, looking for failures."
237*b71edfaaSTobias Hieta    )
238*b71edfaaSTobias Hieta    parser.add_argument(
239*b71edfaaSTobias Hieta        "dir", type=str, help="the root directory at which to start crawling"
240*b71edfaaSTobias Hieta    )
241*b71edfaaSTobias Hieta    parser.add_argument(
242*b71edfaaSTobias Hieta        "--objdump",
243*b71edfaaSTobias Hieta        type=str,
244*b71edfaaSTobias Hieta        help="path to llvm-objdump.  If not specified "
245*b71edfaaSTobias Hieta        + "the tool is located as if by `which llvm-objdump`.",
246*b71edfaaSTobias Hieta    )
247*b71edfaaSTobias Hieta    parser.add_argument(
248*b71edfaaSTobias Hieta        "--extensions",
249*b71edfaaSTobias Hieta        type=str,
250*b71edfaaSTobias Hieta        default=def_obj,
251*b71edfaaSTobias Hieta        help="comma separated list of extensions to demangle (e.g. `o,obj`).  "
252*b71edfaaSTobias Hieta        + "By default this will be `obj` on Windows and `o` otherwise.",
253*b71edfaaSTobias Hieta    )
25440d05cc1SZachary Turner
25540d05cc1SZachary Turner    args = parser.parse_args()
25640d05cc1SZachary Turner
25740d05cc1SZachary Turner    multiprocessing.freeze_support()
25840d05cc1SZachary Turner    go()
259