xref: /llvm-project/llvm/utils/demangle_tree.py (revision b71edfaa4ec3c998aadb35255ce2f60bba2940b0)
1# Given a path to llvm-objdump and a directory tree, spider the directory tree
2# dumping every object file encountered with correct options needed to demangle
3# symbols in the object file, and collect statistics about failed / crashed
4# demanglings.  Useful for stress testing the demangler against a large corpus
5# of inputs.
6
7from __future__ import print_function
8
9import argparse
10import functools
11import os
12import re
13import sys
14import subprocess
15import traceback
16from multiprocessing import Pool
17import multiprocessing
18
19args = None
20
21
22def parse_line(line):
23    question = line.find("?")
24    if question == -1:
25        return None, None
26
27    open_paren = line.find("(", question)
28    if open_paren == -1:
29        return None, None
30    close_paren = line.rfind(")", open_paren)
31    if open_paren == -1:
32        return None, None
33    mangled = line[question:open_paren]
34    demangled = line[open_paren + 1 : close_paren]
35    return mangled.strip(), demangled.strip()
36
37
38class Result(object):
39    def __init__(self):
40        self.crashed = []
41        self.file = None
42        self.nsymbols = 0
43        self.errors = set()
44        self.nfiles = 0
45
46
47class MapContext(object):
48    def __init__(self):
49        self.rincomplete = None
50        self.rcumulative = Result()
51        self.pending_objs = []
52        self.npending = 0
53
54
55def process_file(path, objdump):
56    r = Result()
57    r.file = path
58
59    popen_args = [objdump, "-t", "-demangle", path]
60    p = subprocess.Popen(popen_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
61    stdout, stderr = p.communicate()
62    if p.returncode != 0:
63        r.crashed = [r.file]
64        return r
65
66    output = stdout.decode("utf-8")
67
68    for line in output.splitlines():
69        mangled, demangled = parse_line(line)
70        if mangled is None:
71            continue
72        r.nsymbols += 1
73        if "invalid mangled name" in demangled:
74            r.errors.add(mangled)
75    return r
76
77
78def add_results(r1, r2):
79    r1.crashed.extend(r2.crashed)
80    r1.errors.update(r2.errors)
81    r1.nsymbols += r2.nsymbols
82    r1.nfiles += r2.nfiles
83
84
85def print_result_row(directory, result):
86    print(
87        "[{0} files, {1} crashes, {2} errors, {3} symbols]: '{4}'".format(
88            result.nfiles,
89            len(result.crashed),
90            len(result.errors),
91            result.nsymbols,
92            directory,
93        )
94    )
95
96
97def process_one_chunk(pool, chunk_size, objdump, context):
98    objs = []
99
100    incomplete = False
101    dir_results = {}
102    ordered_dirs = []
103    while context.npending > 0 and len(objs) < chunk_size:
104        this_dir = context.pending_objs[0][0]
105        ordered_dirs.append(this_dir)
106        re = Result()
107        if context.rincomplete is not None:
108            re = context.rincomplete
109            context.rincomplete = None
110
111        dir_results[this_dir] = re
112        re.file = this_dir
113
114        nneeded = chunk_size - len(objs)
115        objs_this_dir = context.pending_objs[0][1]
116        navail = len(objs_this_dir)
117        ntaken = min(nneeded, navail)
118        objs.extend(objs_this_dir[0:ntaken])
119        remaining_objs_this_dir = objs_this_dir[ntaken:]
120        context.pending_objs[0] = (context.pending_objs[0][0], remaining_objs_this_dir)
121        context.npending -= ntaken
122        if ntaken == navail:
123            context.pending_objs.pop(0)
124        else:
125            incomplete = True
126
127        re.nfiles += ntaken
128
129    assert len(objs) == chunk_size or context.npending == 0
130
131    copier = functools.partial(process_file, objdump=objdump)
132    mapped_results = list(pool.map(copier, objs))
133
134    for mr in mapped_results:
135        result_dir = os.path.dirname(mr.file)
136        result_entry = dir_results[result_dir]
137        add_results(result_entry, mr)
138
139    # It's only possible that a single item is incomplete, and it has to be the
140    # last item.
141    if incomplete:
142        context.rincomplete = dir_results[ordered_dirs[-1]]
143        ordered_dirs.pop()
144
145    # Now ordered_dirs contains a list of all directories which *did* complete.
146    for c in ordered_dirs:
147        re = dir_results[c]
148        add_results(context.rcumulative, re)
149        print_result_row(c, re)
150
151
152def process_pending_files(pool, chunk_size, objdump, context):
153    while context.npending >= chunk_size:
154        process_one_chunk(pool, chunk_size, objdump, context)
155
156
157def go():
158    global args
159
160    obj_dir = args.dir
161    extensions = args.extensions.split(",")
162    extensions = [x if x[0] == "." else "." + x for x in extensions]
163
164    pool_size = 48
165    pool = Pool(processes=pool_size)
166
167    try:
168        nfiles = 0
169        context = MapContext()
170
171        for root, dirs, files in os.walk(obj_dir):
172            root = os.path.normpath(root)
173            pending = []
174            for f in files:
175                file, ext = os.path.splitext(f)
176                if not ext in extensions:
177                    continue
178
179                nfiles += 1
180                full_path = os.path.join(root, f)
181                full_path = os.path.normpath(full_path)
182                pending.append(full_path)
183
184            # If this directory had no object files, just print a default
185            # status line and continue with the next dir
186            if len(pending) == 0:
187                print_result_row(root, Result())
188                continue
189
190            context.npending += len(pending)
191            context.pending_objs.append((root, pending))
192            # Drain the tasks, `pool_size` at a time, until we have less than
193            # `pool_size` tasks remaining.
194            process_pending_files(pool, pool_size, args.objdump, context)
195
196        assert context.npending < pool_size
197        process_one_chunk(pool, pool_size, args.objdump, context)
198
199        total = context.rcumulative
200        nfailed = len(total.errors)
201        nsuccess = total.nsymbols - nfailed
202        ncrashed = len(total.crashed)
203
204        if nfailed > 0:
205            print("Failures:")
206            for m in sorted(total.errors):
207                print("  " + m)
208        if ncrashed > 0:
209            print("Crashes:")
210            for f in sorted(total.crashed):
211                print("  " + f)
212        print("Summary:")
213        spct = float(nsuccess) / float(total.nsymbols)
214        fpct = float(nfailed) / float(total.nsymbols)
215        cpct = float(ncrashed) / float(nfiles)
216        print("Processed {0} object files.".format(nfiles))
217        print(
218            "{0}/{1} symbols successfully demangled ({2:.4%})".format(
219                nsuccess, total.nsymbols, spct
220            )
221        )
222        print("{0} symbols could not be demangled ({1:.4%})".format(nfailed, fpct))
223        print("{0} files crashed while demangling ({1:.4%})".format(ncrashed, cpct))
224
225    except:
226        traceback.print_exc()
227
228    pool.close()
229    pool.join()
230
231
232if __name__ == "__main__":
233    def_obj = "obj" if sys.platform == "win32" else "o"
234
235    parser = argparse.ArgumentParser(
236        description="Demangle all symbols in a tree of object files, looking for failures."
237    )
238    parser.add_argument(
239        "dir", type=str, help="the root directory at which to start crawling"
240    )
241    parser.add_argument(
242        "--objdump",
243        type=str,
244        help="path to llvm-objdump.  If not specified "
245        + "the tool is located as if by `which llvm-objdump`.",
246    )
247    parser.add_argument(
248        "--extensions",
249        type=str,
250        default=def_obj,
251        help="comma separated list of extensions to demangle (e.g. `o,obj`).  "
252        + "By default this will be `obj` on Windows and `o` otherwise.",
253    )
254
255    args = parser.parse_args()
256
257    multiprocessing.freeze_support()
258    go()
259