xref: /llvm-project/clang/utils/analyzer/exploded-graph-rewriter.py (revision b9c94f946f2ca45734af666d6afba0234b21411b)
1#!/usr/bin/env python
2#
3#===- exploded-graph-rewriter.py - ExplodedGraph dump tool -----*- python -*--#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9#===-----------------------------------------------------------------------===#
10
11
12from __future__ import print_function
13
14import argparse
15import collections
16import json
17import logging
18import re
19
20
21# A helper function for finding the difference between two dictionaries.
22def diff_dicts(curr, prev):
23    removed = [k for k in prev if k not in curr or curr[k] != prev[k]]
24    added = [k for k in curr if k not in prev or curr[k] != prev[k]]
25    return (removed, added)
26
27
28# A deserialized source location.
29class SourceLocation(object):
30    def __init__(self, json_loc):
31        super(SourceLocation, self).__init__()
32        self.line = json_loc['line']
33        self.col = json_loc['column']
34        self.filename = json_loc['filename'] \
35            if 'filename' in json_loc else '(main file)'
36
37
38# A deserialized program point.
39class ProgramPoint(object):
40    def __init__(self, json_pp):
41        super(ProgramPoint, self).__init__()
42        self.kind = json_pp['kind']
43        self.tag = json_pp['tag']
44        if self.kind == 'Edge':
45            self.src_id = json_pp['src_id']
46            self.dst_id = json_pp['dst_id']
47        elif self.kind == 'Statement':
48            self.stmt_kind = json_pp['stmt_kind']
49            self.pointer = json_pp['pointer']
50            self.pretty = json_pp['pretty']
51            self.loc = SourceLocation(json_pp['location']) \
52                if json_pp['location'] is not None else None
53        elif self.kind == 'BlockEntrance':
54            self.block_id = json_pp['block_id']
55
56
57# A single expression acting as a key in a deserialized Environment.
58class EnvironmentBindingKey(object):
59    def __init__(self, json_ek):
60        super(EnvironmentBindingKey, self).__init__()
61        self.stmt_id = json_ek['stmt_id']
62        self.pretty = json_ek['pretty']
63
64    def _key(self):
65        return self.stmt_id
66
67    def __eq__(self, other):
68        return self._key() == other._key()
69
70    def __hash__(self):
71        return hash(self._key())
72
73
74# Deserialized description of a location context.
75class LocationContext(object):
76    def __init__(self, json_frame):
77        super(LocationContext, self).__init__()
78        self.lctx_id = json_frame['lctx_id']
79        self.caption = json_frame['location_context']
80        self.decl = json_frame['calling']
81        self.line = json_frame['call_line']
82
83    def _key(self):
84        return self.lctx_id
85
86    def __eq__(self, other):
87        return self._key() == other._key()
88
89    def __hash__(self):
90        return hash(self._key())
91
92
93# A group of deserialized Environment bindings that correspond to a specific
94# location context.
95class EnvironmentFrame(object):
96    def __init__(self, json_frame):
97        super(EnvironmentFrame, self).__init__()
98        self.location_context = LocationContext(json_frame)
99        self.bindings = collections.OrderedDict(
100            [(EnvironmentBindingKey(b),
101              b['value']) for b in json_frame['items']]
102            if json_frame['items'] is not None else [])
103
104    def diff_bindings(self, prev):
105        return diff_dicts(self.bindings, prev.bindings)
106
107    def is_different(self, prev):
108        removed, added = self.diff_bindings(prev)
109        return len(removed) != 0 or len(added) != 0
110
111
112# A deserialized Environment.
113class Environment(object):
114    def __init__(self, json_e):
115        super(Environment, self).__init__()
116        self.frames = [EnvironmentFrame(f) for f in json_e]
117
118    def diff_frames(self, prev):
119        # TODO: It's difficult to display a good diff when frame numbers shift.
120        if len(self.frames) != len(prev.frames):
121            return None
122
123        updated = []
124        for i in range(len(self.frames)):
125            f = self.frames[i]
126            prev_f = prev.frames[i]
127            if f.location_context == prev_f.location_context:
128                if f.is_different(prev_f):
129                    updated.append(i)
130            else:
131                # We have the whole frame replaced with another frame.
132                # TODO: Produce a nice diff.
133                return None
134
135        # TODO: Add support for added/removed.
136        return updated
137
138    def is_different(self, prev):
139        updated = self.diff_frames(prev)
140        return updated is None or len(updated) > 0
141
142
143# A single binding key in a deserialized RegionStore cluster.
144class StoreBindingKey(object):
145    def __init__(self, json_sk):
146        super(StoreBindingKey, self).__init__()
147        self.kind = json_sk['kind']
148        self.offset = json_sk['offset']
149
150    def _key(self):
151        return (self.kind, self.offset)
152
153    def __eq__(self, other):
154        return self._key() == other._key()
155
156    def __hash__(self):
157        return hash(self._key())
158
159
160# A single cluster of the deserialized RegionStore.
161class StoreCluster(object):
162    def __init__(self, json_sc):
163        super(StoreCluster, self).__init__()
164        self.base_region = json_sc['cluster']
165        self.bindings = collections.OrderedDict(
166            [(StoreBindingKey(b), b['value']) for b in json_sc['items']])
167
168    def diff_bindings(self, prev):
169        return diff_dicts(self.bindings, prev.bindings)
170
171    def is_different(self, prev):
172        removed, added = self.diff_bindings(prev)
173        return len(removed) != 0 or len(added) != 0
174
175
176# A deserialized RegionStore.
177class Store(object):
178    def __init__(self, json_s):
179        super(Store, self).__init__()
180        self.clusters = collections.OrderedDict(
181            [(c['pointer'], StoreCluster(c)) for c in json_s])
182
183    def diff_clusters(self, prev):
184        removed = [k for k in prev.clusters if k not in self.clusters]
185        added = [k for k in self.clusters if k not in prev.clusters]
186        updated = [k for k in prev.clusters if k in self.clusters
187                   and prev.clusters[k].is_different(self.clusters[k])]
188        return (removed, added, updated)
189
190    def is_different(self, prev):
191        removed, added, updated = self.diff_clusters(prev)
192        return len(removed) != 0 or len(added) != 0 or len(updated) != 0
193
194
195# A deserialized program state.
196class ProgramState(object):
197    def __init__(self, state_id, json_ps):
198        super(ProgramState, self).__init__()
199        logging.debug('Adding ProgramState ' + str(state_id))
200
201        self.state_id = state_id
202        self.store = Store(json_ps['store']) \
203            if json_ps['store'] is not None else None
204        self.environment = Environment(json_ps['environment']) \
205            if json_ps['environment'] is not None else None
206        # TODO: Objects under construction.
207        # TODO: Constraint ranges.
208        # TODO: Dynamic types of objects.
209        # TODO: Checker messages.
210
211
212# A deserialized exploded graph node. Has a default constructor because it
213# may be referenced as part of an edge before its contents are deserialized,
214# and in this moment we already need a room for predecessors and successors.
215class ExplodedNode(object):
216    def __init__(self):
217        super(ExplodedNode, self).__init__()
218        self.predecessors = []
219        self.successors = []
220
221    def construct(self, node_id, json_node):
222        logging.debug('Adding ' + node_id)
223        self.node_id = json_node['node_id']
224        self.ptr = json_node['pointer']
225        self.points = [ProgramPoint(p) for p in json_node['program_points']]
226        self.state = ProgramState(json_node['state_id'],
227                                  json_node['program_state']) \
228            if json_node['program_state'] is not None else None
229
230        assert self.node_name() == node_id
231
232    def node_name(self):
233        return 'Node' + self.ptr
234
235
236# A deserialized ExplodedGraph. Constructed by consuming a .dot file
237# line-by-line.
238class ExplodedGraph(object):
239    # Parse .dot files with regular expressions.
240    node_re = re.compile(
241        '^(Node0x[0-9a-f]*) \\[shape=record,.*label="{(.*)\\\\l}"\\];$')
242    edge_re = re.compile(
243        '^(Node0x[0-9a-f]*) -> (Node0x[0-9a-f]*);$')
244
245    def __init__(self):
246        super(ExplodedGraph, self).__init__()
247        self.nodes = collections.defaultdict(ExplodedNode)
248        self.root_id = None
249        self.incomplete_line = ''
250
251    def add_raw_line(self, raw_line):
252        if raw_line.startswith('//'):
253            return
254
255        # Allow line breaks by waiting for ';'. This is not valid in
256        # a .dot file, but it is useful for writing tests.
257        if len(raw_line) > 0 and raw_line[-1] != ';':
258            self.incomplete_line += raw_line
259            return
260        raw_line = self.incomplete_line + raw_line
261        self.incomplete_line = ''
262
263        # Apply regexps one by one to see if it's a node or an edge
264        # and extract contents if necessary.
265        logging.debug('Line: ' + raw_line)
266        result = self.edge_re.match(raw_line)
267        if result is not None:
268            logging.debug('Classified as edge line.')
269            pred = result.group(1)
270            succ = result.group(2)
271            self.nodes[pred].successors.append(succ)
272            self.nodes[succ].predecessors.append(pred)
273            return
274        result = self.node_re.match(raw_line)
275        if result is not None:
276            logging.debug('Classified as node line.')
277            node_id = result.group(1)
278            if len(self.nodes) == 0:
279                self.root_id = node_id
280            # Note: when writing tests you don't need to escape everything,
281            # even though in a valid dot file everything is escaped.
282            node_label = result.group(2).replace('\\l', '') \
283                                        .replace(' ', '') \
284                                        .replace('\\"', '"') \
285                                        .replace('\\{', '{') \
286                                        .replace('\\}', '}') \
287                                        .replace('\\\\', '\\') \
288                                        .replace('\\<', '\\\\<') \
289                                        .replace('\\>', '\\\\>') \
290                                        .rstrip(',')
291            logging.debug(node_label)
292            json_node = json.loads(node_label)
293            self.nodes[node_id].construct(node_id, json_node)
294            return
295        logging.debug('Skipping.')
296
297
298# A visitor that dumps the ExplodedGraph into a DOT file with fancy HTML-based
299# syntax highlighing.
300class DotDumpVisitor(object):
301    def __init__(self, do_diffs):
302        super(DotDumpVisitor, self).__init__()
303        self._do_diffs = do_diffs
304
305    @staticmethod
306    def _dump_raw(s):
307        print(s, end='')
308
309    @staticmethod
310    def _dump(s):
311        print(s.replace('&', '&amp;')
312               .replace('{', '\\{')
313               .replace('}', '\\}')
314               .replace('\\<', '&lt;')
315               .replace('\\>', '&gt;')
316               .replace('\\l', '<br />')
317               .replace('|', ''), end='')
318
319    @staticmethod
320    def _diff_plus_minus(is_added):
321        if is_added is None:
322            return ''
323        if is_added:
324            return '<font color="forestgreen">+</font>'
325        return '<font color="red">-</font>'
326
327    def visit_begin_graph(self, graph):
328        self._graph = graph
329        self._dump_raw('digraph "ExplodedGraph" {\n')
330        self._dump_raw('label="";\n')
331
332    def visit_program_point(self, p):
333        if p.kind in ['Edge', 'BlockEntrance', 'BlockExit']:
334            color = 'gold3'
335        elif p.kind in ['PreStmtPurgeDeadSymbols',
336                        'PostStmtPurgeDeadSymbols']:
337            color = 'red'
338        elif p.kind in ['CallEnter', 'CallExitBegin', 'CallExitEnd']:
339            color = 'blue'
340        elif p.kind in ['Statement']:
341            color = 'cyan3'
342        else:
343            color = 'forestgreen'
344
345        if p.kind == 'Statement':
346            if p.loc is not None:
347                self._dump('<tr><td align="left" width="0">'
348                           '%s:<b>%s</b>:<b>%s</b>:</td>'
349                           '<td align="left" width="0"><font color="%s">'
350                           '%s</font></td><td>%s</td></tr>'
351                           % (p.loc.filename, p.loc.line,
352                              p.loc.col, color, p.stmt_kind, p.pretty))
353            else:
354                self._dump('<tr><td align="left" width="0">'
355                           '<i>Invalid Source Location</i>:</td>'
356                           '<td align="left" width="0">'
357                           '<font color="%s">%s</font></td><td>%s</td></tr>'
358                           % (color, p.stmt_kind, p.pretty))
359        elif p.kind == 'Edge':
360            self._dump('<tr><td width="0"></td>'
361                       '<td align="left" width="0">'
362                       '<font color="%s">%s</font></td><td align="left">'
363                       '[B%d] -\\> [B%d]</td></tr>'
364                       % (color, p.kind, p.src_id, p.dst_id))
365        else:
366            # TODO: Print more stuff for other kinds of points.
367            self._dump('<tr><td width="0"></td>'
368                       '<td align="left" width="0" colspan="2">'
369                       '<font color="%s">%s</font></td></tr>'
370                       % (color, p.kind))
371
372    def visit_environment(self, e, prev_e=None):
373        self._dump('<table border="0">')
374
375        def dump_location_context(lc, is_added=None):
376            self._dump('<tr><td>%s</td>'
377                       '<td align="left"><b>%s</b></td>'
378                       '<td align="left"><font color="grey60">%s </font>'
379                       '%s</td></tr>'
380                       % (self._diff_plus_minus(is_added),
381                          lc.caption, lc.decl,
382                          ('(line %s)' % lc.line) if lc.line is not None
383                          else ''))
384
385        def dump_binding(f, b, is_added=None):
386            self._dump('<tr><td>%s</td>'
387                       '<td align="left"><i>S%s</i></td>'
388                       '<td align="left">%s</td>'
389                       '<td align="left">%s</td></tr>'
390                       % (self._diff_plus_minus(is_added),
391                          b.stmt_id, b.pretty, f.bindings[b]))
392
393        frames_updated = e.diff_frames(prev_e) if prev_e is not None else None
394        if frames_updated:
395            for i in frames_updated:
396                f = e.frames[i]
397                prev_f = prev_e.frames[i]
398                dump_location_context(f.location_context)
399                bindings_removed, bindings_added = f.diff_bindings(prev_f)
400                for b in bindings_removed:
401                    dump_binding(prev_f, b, False)
402                for b in bindings_added:
403                    dump_binding(f, b, True)
404        else:
405            for f in e.frames:
406                dump_location_context(f.location_context)
407                for b in f.bindings:
408                    dump_binding(f, b)
409
410        self._dump('</table>')
411
412    def visit_environment_in_state(self, s, prev_s=None):
413        self._dump('<tr><td align="left">'
414                   '<b>Environment: </b>')
415        if s.environment is None:
416            self._dump('<i> Nothing!</i>')
417        else:
418            if prev_s is not None and prev_s.environment is not None:
419                if s.environment.is_different(prev_s.environment):
420                    self._dump('</td></tr><tr><td align="left">')
421                    self.visit_environment(s.environment, prev_s.environment)
422                else:
423                    self._dump('<i> No changes!</i>')
424            else:
425                self._dump('</td></tr><tr><td align="left">')
426                self.visit_environment(s.environment)
427
428        self._dump('</td></tr>')
429
430    def visit_store(self, s, prev_s=None):
431        self._dump('<table border="0">')
432
433        def dump_binding(s, c, b, is_added=None):
434            self._dump('<tr><td>%s</td>'
435                       '<td align="left">%s</td>'
436                       '<td align="left">%s</td>'
437                       '<td align="left">%s</td>'
438                       '<td align="left">%s</td></tr>'
439                       % (self._diff_plus_minus(is_added),
440                          s.clusters[c].base_region, b.offset,
441                          '(<i>Default</i>)' if b.kind == 'Default'
442                          else '',
443                          s.clusters[c].bindings[b]))
444
445        if prev_s is not None:
446            clusters_removed, clusters_added, clusters_updated = \
447                s.diff_clusters(prev_s)
448            for c in clusters_removed:
449                for b in prev_s.clusters[c].bindings:
450                    dump_binding(prev_s, c, b, False)
451            for c in clusters_updated:
452                bindings_removed, bindings_added = \
453                    s.clusters[c].diff_bindings(prev_s.clusters[c])
454                for b in bindings_removed:
455                    dump_binding(prev_s, c, b, False)
456                for b in bindings_added:
457                    dump_binding(s, c, b, True)
458            for c in clusters_added:
459                for b in s.clusters[c].bindings:
460                    dump_binding(s, c, b, True)
461        else:
462            for c in s.clusters:
463                for b in s.clusters[c].bindings:
464                    dump_binding(s, c, b)
465
466        self._dump('</table>')
467
468    def visit_store_in_state(self, s, prev_s=None):
469        self._dump('<tr><td align="left"><b>Store: </b>')
470        if s.store is None:
471            self._dump('<i> Nothing!</i>')
472        else:
473            if prev_s is not None and prev_s.store is not None:
474                if s.store.is_different(prev_s.store):
475                    self._dump('</td></tr><tr><td align="left">')
476                    self.visit_store(s.store, prev_s.store)
477                else:
478                    self._dump('<i> No changes!</i>')
479            else:
480                self._dump('</td></tr><tr><td align="left">')
481                self.visit_store(s.store)
482        self._dump('</td></tr><hr />')
483
484    def visit_state(self, s, prev_s):
485        self.visit_store_in_state(s, prev_s)
486        self.visit_environment_in_state(s, prev_s)
487
488    def visit_node(self, node):
489        self._dump('%s [shape=record,label=<<table border="0">'
490                   % (node.node_name()))
491
492        self._dump('<tr><td bgcolor="grey"><b>Node %d (%s) - '
493                   'State %s</b></td></tr>'
494                   % (node.node_id, node.ptr, node.state.state_id
495                      if node.state is not None else 'Unspecified'))
496        self._dump('<tr><td align="left" width="0">')
497        if len(node.points) > 1:
498            self._dump('<b>Program points:</b></td></tr>')
499        else:
500            self._dump('<b>Program point:</b></td></tr>')
501        self._dump('<tr><td align="left" width="0">'
502                   '<table border="0" align="left" width="0">')
503        for p in node.points:
504            self.visit_program_point(p)
505        self._dump('</table></td></tr>')
506
507        if node.state is not None:
508            self._dump('<hr />')
509            prev_s = None
510            # Do diffs only when we have a unique predecessor.
511            # Don't do diffs on the leaf nodes because they're
512            # the important ones.
513            if self._do_diffs and len(node.predecessors) == 1 \
514               and len(node.successors) > 0:
515                prev_s = self._graph.nodes[node.predecessors[0]].state
516            self.visit_state(node.state, prev_s)
517        self._dump_raw('</table>>];\n')
518
519    def visit_edge(self, pred, succ):
520        self._dump_raw('%s -> %s;\n' % (pred.node_name(), succ.node_name()))
521
522    def visit_end_of_graph(self):
523        self._dump_raw('}\n')
524
525
526# A class that encapsulates traversal of the ExplodedGraph. Different explorer
527# kinds could potentially traverse specific sub-graphs.
528class Explorer(object):
529    def __init__(self):
530        super(Explorer, self).__init__()
531
532    def explore(self, graph, visitor):
533        visitor.visit_begin_graph(graph)
534        for node in sorted(graph.nodes):
535            logging.debug('Visiting ' + node)
536            visitor.visit_node(graph.nodes[node])
537            for succ in sorted(graph.nodes[node].successors):
538                logging.debug('Visiting edge: %s -> %s ' % (node, succ))
539                visitor.visit_edge(graph.nodes[node], graph.nodes[succ])
540        visitor.visit_end_of_graph()
541
542
543def main():
544    parser = argparse.ArgumentParser()
545    parser.add_argument('filename', type=str)
546    parser.add_argument('-v', '--verbose', action='store_const',
547                        dest='loglevel', const=logging.DEBUG,
548                        default=logging.WARNING,
549                        help='enable info prints')
550    parser.add_argument('-d', '--diff', action='store_const', dest='diff',
551                        const=True, default=False,
552                        help='display differences between states')
553    args = parser.parse_args()
554    logging.basicConfig(level=args.loglevel)
555
556    graph = ExplodedGraph()
557    with open(args.filename) as fd:
558        for raw_line in fd:
559            raw_line = raw_line.strip()
560            graph.add_raw_line(raw_line)
561
562    explorer = Explorer()
563    visitor = DotDumpVisitor(args.diff)
564    explorer.explore(graph, visitor)
565
566
567if __name__ == '__main__':
568    main()
569