xref: /llvm-project/llvm/tools/opt-viewer/optrecord.py (revision b71edfaa4ec3c998aadb35255ce2f60bba2940b0)
1#!/usr/bin/env python
2
3from __future__ import print_function
4
5import io
6import yaml
7
8# Try to use the C parser.
9try:
10    from yaml import CLoader as Loader
11except ImportError:
12    print("For faster parsing, you may want to install libYAML for PyYAML")
13    from yaml import Loader
14
15import html
16from collections import defaultdict
17import fnmatch
18import functools
19from multiprocessing import Lock
20import os, os.path
21import subprocess
22
23try:
24    # The previously builtin function `intern()` was moved
25    # to the `sys` module in Python 3.
26    from sys import intern
27except:
28    pass
29
30import re
31
32import optpmap
33
34try:
35    dict.iteritems
36except AttributeError:
37    # Python 3
38    def itervalues(d):
39        return iter(d.values())
40
41    def iteritems(d):
42        return iter(d.items())
43
44else:
45    # Python 2
46    def itervalues(d):
47        return d.itervalues()
48
49    def iteritems(d):
50        return d.iteritems()
51
52
53def html_file_name(filename):
54    return filename.replace("/", "_").replace("#", "_") + ".html"
55
56
57def make_link(File, Line):
58    return '"{}#L{}"'.format(html_file_name(File), Line)
59
60
61class Remark(yaml.YAMLObject):
62    # Work-around for http://pyyaml.org/ticket/154.
63    yaml_loader = Loader
64
65    default_demangler = "c++filt -n"
66    demangler_proc = None
67
68    @classmethod
69    def set_demangler(cls, demangler):
70        cls.demangler_proc = subprocess.Popen(
71            demangler.split(), stdin=subprocess.PIPE, stdout=subprocess.PIPE
72        )
73        cls.demangler_lock = Lock()
74
75    @classmethod
76    def demangle(cls, name):
77        with cls.demangler_lock:
78            cls.demangler_proc.stdin.write((name + "\n").encode("utf-8"))
79            cls.demangler_proc.stdin.flush()
80            return cls.demangler_proc.stdout.readline().rstrip().decode("utf-8")
81
82    # Intern all strings since we have lot of duplication across filenames,
83    # remark text.
84    #
85    # Change Args from a list of dicts to a tuple of tuples.  This saves
86    # memory in two ways.  One, a small tuple is significantly smaller than a
87    # small dict.  Two, using tuple instead of list allows Args to be directly
88    # used as part of the key (in Python only immutable types are hashable).
89    def _reduce_memory(self):
90        self.Pass = intern(self.Pass)
91        self.Name = intern(self.Name)
92        try:
93            # Can't intern unicode strings.
94            self.Function = intern(self.Function)
95        except:
96            pass
97
98        def _reduce_memory_dict(old_dict):
99            new_dict = dict()
100            for (k, v) in iteritems(old_dict):
101                if type(k) is str:
102                    k = intern(k)
103
104                if type(v) is str:
105                    v = intern(v)
106                elif type(v) is dict:
107                    # This handles [{'Caller': ..., 'DebugLoc': { 'File': ... }}]
108                    v = _reduce_memory_dict(v)
109                new_dict[k] = v
110            return tuple(new_dict.items())
111
112        self.Args = tuple([_reduce_memory_dict(arg_dict) for arg_dict in self.Args])
113
114    # The inverse operation of the dictonary-related memory optimization in
115    # _reduce_memory_dict.  E.g.
116    #     (('DebugLoc', (('File', ...) ... ))) -> [{'DebugLoc': {'File': ...} ....}]
117    def recover_yaml_structure(self):
118        def tuple_to_dict(t):
119            d = dict()
120            for (k, v) in t:
121                if type(v) is tuple:
122                    v = tuple_to_dict(v)
123                d[k] = v
124            return d
125
126        self.Args = [tuple_to_dict(arg_tuple) for arg_tuple in self.Args]
127
128    def canonicalize(self):
129        if not hasattr(self, "Hotness"):
130            self.Hotness = 0
131        if not hasattr(self, "Args"):
132            self.Args = []
133        self._reduce_memory()
134
135    @property
136    def File(self):
137        return self.DebugLoc["File"]
138
139    @property
140    def Line(self):
141        return int(self.DebugLoc["Line"])
142
143    @property
144    def Column(self):
145        return self.DebugLoc["Column"]
146
147    @property
148    def DebugLocString(self):
149        return "{}:{}:{}".format(self.File, self.Line, self.Column)
150
151    @property
152    def DemangledFunctionName(self):
153        return self.demangle(self.Function)
154
155    @property
156    def Link(self):
157        return make_link(self.File, self.Line)
158
159    def getArgString(self, mapping):
160        mapping = dict(list(mapping))
161        dl = mapping.get("DebugLoc")
162        if dl:
163            del mapping["DebugLoc"]
164
165        assert len(mapping) == 1
166        (key, value) = list(mapping.items())[0]
167
168        if key == "Caller" or key == "Callee" or key == "DirectCallee":
169            value = html.escape(self.demangle(value))
170
171        if dl and key != "Caller":
172            dl_dict = dict(list(dl))
173            return "<a href={}>{}</a>".format(
174                make_link(dl_dict["File"], dl_dict["Line"]), value
175            )
176        else:
177            return value
178
179    # Return a cached dictionary for the arguments.  The key for each entry is
180    # the argument key (e.g. 'Callee' for inlining remarks.  The value is a
181    # list containing the value (e.g. for 'Callee' the function) and
182    # optionally a DebugLoc.
183    def getArgDict(self):
184        if hasattr(self, "ArgDict"):
185            return self.ArgDict
186        self.ArgDict = {}
187        for arg in self.Args:
188            if len(arg) == 2:
189                if arg[0][0] == "DebugLoc":
190                    dbgidx = 0
191                else:
192                    assert arg[1][0] == "DebugLoc"
193                    dbgidx = 1
194
195                key = arg[1 - dbgidx][0]
196                entry = (arg[1 - dbgidx][1], arg[dbgidx][1])
197            else:
198                arg = arg[0]
199                key = arg[0]
200                entry = (arg[1],)
201
202            self.ArgDict[key] = entry
203        return self.ArgDict
204
205    def getDiffPrefix(self):
206        if hasattr(self, "Added"):
207            if self.Added:
208                return "+"
209            else:
210                return "-"
211        return ""
212
213    @property
214    def PassWithDiffPrefix(self):
215        return self.getDiffPrefix() + self.Pass
216
217    @property
218    def message(self):
219        # Args is a list of mappings (dictionaries)
220        values = [self.getArgString(mapping) for mapping in self.Args]
221        return "".join(values)
222
223    @property
224    def RelativeHotness(self):
225        if self.max_hotness:
226            return "{0:.2f}%".format(self.Hotness * 100.0 / self.max_hotness)
227        else:
228            return ""
229
230    @property
231    def key(self):
232        return (
233            self.__class__,
234            self.PassWithDiffPrefix,
235            self.Name,
236            self.File,
237            self.Line,
238            self.Column,
239            self.Function,
240            self.Args,
241        )
242
243    def __hash__(self):
244        return hash(self.key)
245
246    def __eq__(self, other):
247        return self.key == other.key
248
249    def __repr__(self):
250        return str(self.key)
251
252
253class Analysis(Remark):
254    yaml_tag = "!Analysis"
255
256    @property
257    def color(self):
258        return "white"
259
260
261class AnalysisFPCommute(Analysis):
262    yaml_tag = "!AnalysisFPCommute"
263
264
265class AnalysisAliasing(Analysis):
266    yaml_tag = "!AnalysisAliasing"
267
268
269class Passed(Remark):
270    yaml_tag = "!Passed"
271
272    @property
273    def color(self):
274        return "green"
275
276
277class Missed(Remark):
278    yaml_tag = "!Missed"
279
280    @property
281    def color(self):
282        return "red"
283
284
285class Failure(Missed):
286    yaml_tag = "!Failure"
287
288
289def get_remarks(input_file, filter_=None):
290    max_hotness = 0
291    all_remarks = dict()
292    file_remarks = defaultdict(functools.partial(defaultdict, list))
293
294    with io.open(input_file, encoding="utf-8") as f:
295        docs = yaml.load_all(f, Loader=Loader)
296
297        filter_e = None
298        if filter_:
299            filter_e = re.compile(filter_)
300        for remark in docs:
301            remark.canonicalize()
302            # Avoid remarks withoug debug location or if they are duplicated
303            if not hasattr(remark, "DebugLoc") or remark.key in all_remarks:
304                continue
305
306            if filter_e and not filter_e.search(remark.Pass):
307                continue
308
309            all_remarks[remark.key] = remark
310
311            file_remarks[remark.File][remark.Line].append(remark)
312
313            # If we're reading a back a diff yaml file, max_hotness is already
314            # captured which may actually be less than the max hotness found
315            # in the file.
316            if hasattr(remark, "max_hotness"):
317                max_hotness = remark.max_hotness
318            max_hotness = max(max_hotness, remark.Hotness)
319
320    return max_hotness, all_remarks, file_remarks
321
322
323def gather_results(filenames, num_jobs, should_print_progress, filter_=None):
324    if should_print_progress:
325        print("Reading YAML files...")
326    if not Remark.demangler_proc:
327        Remark.set_demangler(Remark.default_demangler)
328    remarks = optpmap.pmap(
329        get_remarks, filenames, num_jobs, should_print_progress, filter_
330    )
331    max_hotness = max(entry[0] for entry in remarks)
332
333    def merge_file_remarks(file_remarks_job, all_remarks, merged):
334        for filename, d in iteritems(file_remarks_job):
335            for line, remarks in iteritems(d):
336                for remark in remarks:
337                    # Bring max_hotness into the remarks so that
338                    # RelativeHotness does not depend on an external global.
339                    remark.max_hotness = max_hotness
340                    if remark.key not in all_remarks:
341                        merged[filename][line].append(remark)
342
343    all_remarks = dict()
344    file_remarks = defaultdict(functools.partial(defaultdict, list))
345    for _, all_remarks_job, file_remarks_job in remarks:
346        merge_file_remarks(file_remarks_job, all_remarks, file_remarks)
347        all_remarks.update(all_remarks_job)
348
349    return all_remarks, file_remarks, max_hotness != 0
350
351
352def find_opt_files(*dirs_or_files):
353    all = []
354    for dir_or_file in dirs_or_files:
355        if os.path.isfile(dir_or_file):
356            all.append(dir_or_file)
357        else:
358            for dir, subdirs, files in os.walk(dir_or_file):
359                # Exclude mounted directories and symlinks (os.walk default).
360                subdirs[:] = [
361                    d for d in subdirs if not os.path.ismount(os.path.join(dir, d))
362                ]
363                for file in files:
364                    if fnmatch.fnmatch(file, "*.opt.yaml*"):
365                        all.append(os.path.join(dir, file))
366    return all
367