xref: /llvm-project/clang/utils/analyzer/exploded-graph-rewriter.py (revision 02f91ddf1b353b28697b288edbe6a8a828705781)
1#!/usr/bin/env python
2#
3#===- exploded-graph-rewriter.py - ExplodedGraph dump tool -----*- python -*--#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9#===-----------------------------------------------------------------------===#
10
11
12from __future__ import print_function
13
14import argparse
15import collections
16import json
17import logging
18import re
19
20
21# A helper function for finding the difference between two dictionaries.
22def diff_dicts(curr, prev):
23    removed = [k for k in prev if k not in curr or curr[k] != prev[k]]
24    added = [k for k in curr if k not in prev or curr[k] != prev[k]]
25    return (removed, added)
26
27
28# Represents any program state trait that is a dictionary of key-value pairs.
29class GenericMap(object):
30    def __init__(self, items):
31        self.generic_map = collections.OrderedDict(items)
32
33    def diff(self, prev):
34        return diff_dicts(self.generic_map, prev.generic_map)
35
36    def is_different(self, prev):
37        removed, added = self.diff(prev)
38        return len(removed) != 0 or len(added) != 0
39
40
41# A deserialized source location.
42class SourceLocation(object):
43    def __init__(self, json_loc):
44        super(SourceLocation, self).__init__()
45        self.line = json_loc['line']
46        self.col = json_loc['column']
47        self.filename = json_loc['filename'] \
48            if 'filename' in json_loc else '(main file)'
49
50
51# A deserialized program point.
52class ProgramPoint(object):
53    def __init__(self, json_pp):
54        super(ProgramPoint, self).__init__()
55        self.kind = json_pp['kind']
56        self.tag = json_pp['tag']
57        if self.kind == 'Edge':
58            self.src_id = json_pp['src_id']
59            self.dst_id = json_pp['dst_id']
60        elif self.kind == 'Statement':
61            self.stmt_kind = json_pp['stmt_kind']
62            self.pointer = json_pp['pointer']
63            self.pretty = json_pp['pretty']
64            self.loc = SourceLocation(json_pp['location']) \
65                if json_pp['location'] is not None else None
66        elif self.kind == 'BlockEntrance':
67            self.block_id = json_pp['block_id']
68
69
70# A single expression acting as a key in a deserialized Environment.
71class EnvironmentBindingKey(object):
72    def __init__(self, json_ek):
73        super(EnvironmentBindingKey, self).__init__()
74        self.stmt_id = json_ek['stmt_id']
75        self.pretty = json_ek['pretty']
76
77    def _key(self):
78        return self.stmt_id
79
80    def __eq__(self, other):
81        return self._key() == other._key()
82
83    def __hash__(self):
84        return hash(self._key())
85
86
87# Deserialized description of a location context.
88class LocationContext(object):
89    def __init__(self, json_frame):
90        super(LocationContext, self).__init__()
91        self.lctx_id = json_frame['lctx_id']
92        self.caption = json_frame['location_context']
93        self.decl = json_frame['calling']
94        self.line = json_frame['call_line']
95
96    def _key(self):
97        return self.lctx_id
98
99    def __eq__(self, other):
100        return self._key() == other._key()
101
102    def __hash__(self):
103        return hash(self._key())
104
105
106# A group of deserialized Environment bindings that correspond to a specific
107# location context.
108class EnvironmentFrame(object):
109    def __init__(self, json_frame):
110        super(EnvironmentFrame, self).__init__()
111        self.location_context = LocationContext(json_frame)
112        self.bindings = collections.OrderedDict(
113            [(EnvironmentBindingKey(b),
114              b['value']) for b in json_frame['items']]
115            if json_frame['items'] is not None else [])
116
117    def diff_bindings(self, prev):
118        return diff_dicts(self.bindings, prev.bindings)
119
120    def is_different(self, prev):
121        removed, added = self.diff_bindings(prev)
122        return len(removed) != 0 or len(added) != 0
123
124
125# A deserialized Environment.
126class Environment(object):
127    def __init__(self, json_e):
128        super(Environment, self).__init__()
129        self.ptr = json_e['pointer']
130        self.frames = [EnvironmentFrame(f) for f in json_e['items']]
131
132    def diff_frames(self, prev):
133        # TODO: It's difficult to display a good diff when frame numbers shift.
134        if len(self.frames) != len(prev.frames):
135            return None
136
137        updated = []
138        for i in range(len(self.frames)):
139            f = self.frames[i]
140            prev_f = prev.frames[i]
141            if f.location_context == prev_f.location_context:
142                if f.is_different(prev_f):
143                    updated.append(i)
144            else:
145                # We have the whole frame replaced with another frame.
146                # TODO: Produce a nice diff.
147                return None
148
149        # TODO: Add support for added/removed.
150        return updated
151
152    def is_different(self, prev):
153        updated = self.diff_frames(prev)
154        return updated is None or len(updated) > 0
155
156
157# A single binding key in a deserialized RegionStore cluster.
158class StoreBindingKey(object):
159    def __init__(self, json_sk):
160        super(StoreBindingKey, self).__init__()
161        self.kind = json_sk['kind']
162        self.offset = json_sk['offset']
163
164    def _key(self):
165        return (self.kind, self.offset)
166
167    def __eq__(self, other):
168        return self._key() == other._key()
169
170    def __hash__(self):
171        return hash(self._key())
172
173
174# A single cluster of the deserialized RegionStore.
175class StoreCluster(object):
176    def __init__(self, json_sc):
177        super(StoreCluster, self).__init__()
178        self.base_region = json_sc['cluster']
179        self.bindings = collections.OrderedDict(
180            [(StoreBindingKey(b), b['value']) for b in json_sc['items']])
181
182    def diff_bindings(self, prev):
183        return diff_dicts(self.bindings, prev.bindings)
184
185    def is_different(self, prev):
186        removed, added = self.diff_bindings(prev)
187        return len(removed) != 0 or len(added) != 0
188
189
190# A deserialized RegionStore.
191class Store(object):
192    def __init__(self, json_s):
193        super(Store, self).__init__()
194        self.ptr = json_s['pointer']
195        self.clusters = collections.OrderedDict(
196            [(c['pointer'], StoreCluster(c)) for c in json_s['items']])
197
198    def diff_clusters(self, prev):
199        removed = [k for k in prev.clusters if k not in self.clusters]
200        added = [k for k in self.clusters if k not in prev.clusters]
201        updated = [k for k in prev.clusters if k in self.clusters
202                   and prev.clusters[k].is_different(self.clusters[k])]
203        return (removed, added, updated)
204
205    def is_different(self, prev):
206        removed, added, updated = self.diff_clusters(prev)
207        return len(removed) != 0 or len(added) != 0 or len(updated) != 0
208
209
210# A deserialized program state.
211class ProgramState(object):
212    def __init__(self, state_id, json_ps):
213        super(ProgramState, self).__init__()
214        logging.debug('Adding ProgramState ' + str(state_id))
215
216        self.state_id = state_id
217        self.store = Store(json_ps['store']) \
218            if json_ps['store'] is not None else None
219        self.environment = Environment(json_ps['environment']) \
220            if json_ps['environment'] is not None else None
221        self.constraints = GenericMap([
222            (c['symbol'], c['range']) for c in json_ps['constraints']
223        ]) if json_ps['constraints'] is not None else None
224        self.dynamic_types = GenericMap([
225                (t['region'], '%s%s' % (t['dyn_type'],
226                                        ' (or a sub-class)'
227                                        if t['sub_classable'] else ''))
228                for t in json_ps['dynamic_types']]) \
229            if json_ps['dynamic_types'] is not None else None
230
231        # TODO: Objects under construction.
232        # TODO: Checker messages.
233
234
235# A deserialized exploded graph node. Has a default constructor because it
236# may be referenced as part of an edge before its contents are deserialized,
237# and in this moment we already need a room for predecessors and successors.
238class ExplodedNode(object):
239    def __init__(self):
240        super(ExplodedNode, self).__init__()
241        self.predecessors = []
242        self.successors = []
243
244    def construct(self, node_id, json_node):
245        logging.debug('Adding ' + node_id)
246        self.node_id = json_node['node_id']
247        self.ptr = json_node['pointer']
248        self.points = [ProgramPoint(p) for p in json_node['program_points']]
249        self.state = ProgramState(json_node['state_id'],
250                                  json_node['program_state']) \
251            if json_node['program_state'] is not None else None
252
253        assert self.node_name() == node_id
254
255    def node_name(self):
256        return 'Node' + self.ptr
257
258
259# A deserialized ExplodedGraph. Constructed by consuming a .dot file
260# line-by-line.
261class ExplodedGraph(object):
262    # Parse .dot files with regular expressions.
263    node_re = re.compile(
264        '^(Node0x[0-9a-f]*) \\[shape=record,.*label="{(.*)\\\\l}"\\];$')
265    edge_re = re.compile(
266        '^(Node0x[0-9a-f]*) -> (Node0x[0-9a-f]*);$')
267
268    def __init__(self):
269        super(ExplodedGraph, self).__init__()
270        self.nodes = collections.defaultdict(ExplodedNode)
271        self.root_id = None
272        self.incomplete_line = ''
273
274    def add_raw_line(self, raw_line):
275        if raw_line.startswith('//'):
276            return
277
278        # Allow line breaks by waiting for ';'. This is not valid in
279        # a .dot file, but it is useful for writing tests.
280        if len(raw_line) > 0 and raw_line[-1] != ';':
281            self.incomplete_line += raw_line
282            return
283        raw_line = self.incomplete_line + raw_line
284        self.incomplete_line = ''
285
286        # Apply regexps one by one to see if it's a node or an edge
287        # and extract contents if necessary.
288        logging.debug('Line: ' + raw_line)
289        result = self.edge_re.match(raw_line)
290        if result is not None:
291            logging.debug('Classified as edge line.')
292            pred = result.group(1)
293            succ = result.group(2)
294            self.nodes[pred].successors.append(succ)
295            self.nodes[succ].predecessors.append(pred)
296            return
297        result = self.node_re.match(raw_line)
298        if result is not None:
299            logging.debug('Classified as node line.')
300            node_id = result.group(1)
301            if len(self.nodes) == 0:
302                self.root_id = node_id
303            # Note: when writing tests you don't need to escape everything,
304            # even though in a valid dot file everything is escaped.
305            node_label = result.group(2).replace('\\l', '') \
306                                        .replace(' ', '') \
307                                        .replace('\\"', '"') \
308                                        .replace('\\{', '{') \
309                                        .replace('\\}', '}') \
310                                        .replace('\\\\', '\\') \
311                                        .replace('\\|', '|') \
312                                        .replace('\\<', '\\\\<') \
313                                        .replace('\\>', '\\\\>') \
314                                        .rstrip(',')
315            logging.debug(node_label)
316            json_node = json.loads(node_label)
317            self.nodes[node_id].construct(node_id, json_node)
318            return
319        logging.debug('Skipping.')
320
321
322# A visitor that dumps the ExplodedGraph into a DOT file with fancy HTML-based
323# syntax highlighing.
324class DotDumpVisitor(object):
325    def __init__(self, do_diffs):
326        super(DotDumpVisitor, self).__init__()
327        self._do_diffs = do_diffs
328
329    @staticmethod
330    def _dump_raw(s):
331        print(s, end='')
332
333    @staticmethod
334    def _dump(s):
335        print(s.replace('&', '&amp;')
336               .replace('{', '\\{')
337               .replace('}', '\\}')
338               .replace('\\<', '&lt;')
339               .replace('\\>', '&gt;')
340               .replace('\\l', '<br />')
341               .replace('|', '\\|'), end='')
342
343    @staticmethod
344    def _diff_plus_minus(is_added):
345        if is_added is None:
346            return ''
347        if is_added:
348            return '<font color="forestgreen">+</font>'
349        return '<font color="red">-</font>'
350
351    def visit_begin_graph(self, graph):
352        self._graph = graph
353        self._dump_raw('digraph "ExplodedGraph" {\n')
354        self._dump_raw('label="";\n')
355
356    def visit_program_point(self, p):
357        if p.kind in ['Edge', 'BlockEntrance', 'BlockExit']:
358            color = 'gold3'
359        elif p.kind in ['PreStmtPurgeDeadSymbols',
360                        'PostStmtPurgeDeadSymbols']:
361            color = 'red'
362        elif p.kind in ['CallEnter', 'CallExitBegin', 'CallExitEnd']:
363            color = 'blue'
364        elif p.kind in ['Statement']:
365            color = 'cyan3'
366        else:
367            color = 'forestgreen'
368
369        if p.kind == 'Statement':
370            if p.loc is not None:
371                self._dump('<tr><td align="left" width="0">'
372                           '%s:<b>%s</b>:<b>%s</b>:</td>'
373                           '<td align="left" width="0"><font color="%s">'
374                           '%s</font></td><td>%s</td></tr>'
375                           % (p.loc.filename, p.loc.line,
376                              p.loc.col, color, p.stmt_kind, p.pretty))
377            else:
378                self._dump('<tr><td align="left" width="0">'
379                           '<i>Invalid Source Location</i>:</td>'
380                           '<td align="left" width="0">'
381                           '<font color="%s">%s</font></td><td>%s</td></tr>'
382                           % (color, p.stmt_kind, p.pretty))
383        elif p.kind == 'Edge':
384            self._dump('<tr><td width="0"></td>'
385                       '<td align="left" width="0">'
386                       '<font color="%s">%s</font></td><td align="left">'
387                       '[B%d] -\\> [B%d]</td></tr>'
388                       % (color, p.kind, p.src_id, p.dst_id))
389        else:
390            # TODO: Print more stuff for other kinds of points.
391            self._dump('<tr><td width="0"></td>'
392                       '<td align="left" width="0" colspan="2">'
393                       '<font color="%s">%s</font></td></tr>'
394                       % (color, p.kind))
395
396        if p.tag is not None:
397            self._dump('<tr><td width="0"></td>'
398                       '<td colspan="2" align="left">'
399                       '<b>Tag: </b> <font color="crimson">'
400                       '%s</font></td></tr>' % p.tag)
401
402    def visit_environment(self, e, prev_e=None):
403        self._dump('<table border="0">')
404
405        def dump_location_context(lc, is_added=None):
406            self._dump('<tr><td>%s</td>'
407                       '<td align="left"><b>%s</b></td>'
408                       '<td align="left" colspan="2">'
409                       '<font color="grey60">%s </font>'
410                       '%s</td></tr>'
411                       % (self._diff_plus_minus(is_added),
412                          lc.caption, lc.decl,
413                          ('(line %s)' % lc.line) if lc.line is not None
414                          else ''))
415
416        def dump_binding(f, b, is_added=None):
417            self._dump('<tr><td>%s</td>'
418                       '<td align="left"><i>S%s</i></td>'
419                       '<td align="left">%s</td>'
420                       '<td align="left">%s</td></tr>'
421                       % (self._diff_plus_minus(is_added),
422                          b.stmt_id, b.pretty, f.bindings[b]))
423
424        frames_updated = e.diff_frames(prev_e) if prev_e is not None else None
425        if frames_updated:
426            for i in frames_updated:
427                f = e.frames[i]
428                prev_f = prev_e.frames[i]
429                dump_location_context(f.location_context)
430                bindings_removed, bindings_added = f.diff_bindings(prev_f)
431                for b in bindings_removed:
432                    dump_binding(prev_f, b, False)
433                for b in bindings_added:
434                    dump_binding(f, b, True)
435        else:
436            for f in e.frames:
437                dump_location_context(f.location_context)
438                for b in f.bindings:
439                    dump_binding(f, b)
440
441        self._dump('</table>')
442
443    def visit_environment_in_state(self, s, prev_s=None):
444        self._dump('<hr /><tr><td align="left"><b>Environment: </b>')
445        if s.environment is None:
446            self._dump('<i> Nothing!</i>')
447        else:
448            if prev_s is not None and prev_s.environment is not None:
449                if s.environment.is_different(prev_s.environment):
450                    self._dump('</td></tr><tr><td align="left">')
451                    self.visit_environment(s.environment, prev_s.environment)
452                else:
453                    self._dump('<i> No changes!</i>')
454            else:
455                self._dump('</td></tr><tr><td align="left">')
456                self.visit_environment(s.environment)
457
458        self._dump('</td></tr>')
459
460    def visit_store(self, s, prev_s=None):
461        self._dump('<table border="0">')
462
463        def dump_binding(s, c, b, is_added=None):
464            self._dump('<tr><td>%s</td>'
465                       '<td align="left">%s</td>'
466                       '<td align="left">%s</td>'
467                       '<td align="left">%s</td>'
468                       '<td align="left">%s</td></tr>'
469                       % (self._diff_plus_minus(is_added),
470                          s.clusters[c].base_region, b.offset,
471                          '(<i>Default</i>)' if b.kind == 'Default'
472                          else '',
473                          s.clusters[c].bindings[b]))
474
475        if prev_s is not None:
476            clusters_removed, clusters_added, clusters_updated = \
477                s.diff_clusters(prev_s)
478            for c in clusters_removed:
479                for b in prev_s.clusters[c].bindings:
480                    dump_binding(prev_s, c, b, False)
481            for c in clusters_updated:
482                bindings_removed, bindings_added = \
483                    s.clusters[c].diff_bindings(prev_s.clusters[c])
484                for b in bindings_removed:
485                    dump_binding(prev_s, c, b, False)
486                for b in bindings_added:
487                    dump_binding(s, c, b, True)
488            for c in clusters_added:
489                for b in s.clusters[c].bindings:
490                    dump_binding(s, c, b, True)
491        else:
492            for c in s.clusters:
493                for b in s.clusters[c].bindings:
494                    dump_binding(s, c, b)
495
496        self._dump('</table>')
497
498    def visit_store_in_state(self, s, prev_s=None):
499        self._dump('<hr /><tr><td align="left"><b>Store: </b>')
500        if s.store is None:
501            self._dump('<i> Nothing!</i>')
502        else:
503            if prev_s is not None and prev_s.store is not None:
504                if s.store.is_different(prev_s.store):
505                    self._dump('</td></tr><tr><td align="left">')
506                    self.visit_store(s.store, prev_s.store)
507                else:
508                    self._dump('<i> No changes!</i>')
509            else:
510                self._dump('</td></tr><tr><td align="left">')
511                self.visit_store(s.store)
512        self._dump('</td></tr>')
513
514    def visit_generic_map(self, m, prev_m=None):
515        self._dump('<table border="0">')
516
517        def dump_pair(m, k, is_added=None):
518            self._dump('<tr><td>%s</td>'
519                       '<td align="left">%s</td>'
520                       '<td align="left">%s</td></tr>'
521                       % (self._diff_plus_minus(is_added),
522                          k, m.generic_map[k]))
523
524        if prev_m is not None:
525            removed, added = m.diff(prev_m)
526            for k in removed:
527                dump_pair(prev_m, k, False)
528            for k in added:
529                dump_pair(m, k, True)
530        else:
531            for k in m.generic_map:
532                dump_pair(m, k, None)
533
534        self._dump('</table>')
535
536    def visit_generic_map_in_state(self, selector, title, s, prev_s=None):
537        m = getattr(s, selector)
538        prev_m = getattr(prev_s, selector) if prev_s is not None else None
539        if m is None and prev_m is None:
540            return
541
542        self._dump('<hr />')
543        self._dump('<tr><td align="left">'
544                   '<b>%s: </b>' % title)
545        if m is None:
546            self._dump('<i> Nothing!</i>')
547        else:
548            if prev_s is not None:
549                if prev_m is not None:
550                    if m.is_different(prev_m):
551                        self._dump('</td></tr><tr><td align="left">')
552                        self.visit_generic_map(m, prev_m)
553                    else:
554                        self._dump('<i> No changes!</i>')
555            if prev_m is None:
556                self._dump('</td></tr><tr><td align="left">')
557                self.visit_generic_map(m)
558        self._dump('</td></tr>')
559
560    def visit_state(self, s, prev_s):
561        self.visit_store_in_state(s, prev_s)
562        self.visit_environment_in_state(s, prev_s)
563        self.visit_generic_map_in_state('constraints', 'Ranges',
564                                        s, prev_s)
565        self.visit_generic_map_in_state('dynamic_types', 'Dynamic Types',
566                                        s, prev_s)
567
568    def visit_node(self, node):
569        self._dump('%s [shape=record,label=<<table border="0">'
570                   % (node.node_name()))
571
572        self._dump('<tr><td bgcolor="grey"><b>Node %d (%s) - '
573                   'State %s</b></td></tr>'
574                   % (node.node_id, node.ptr, node.state.state_id
575                      if node.state is not None else 'Unspecified'))
576        self._dump('<tr><td align="left" width="0">')
577        if len(node.points) > 1:
578            self._dump('<b>Program points:</b></td></tr>')
579        else:
580            self._dump('<b>Program point:</b></td></tr>')
581        self._dump('<tr><td align="left" width="0">'
582                   '<table border="0" align="left" width="0">')
583        for p in node.points:
584            self.visit_program_point(p)
585        self._dump('</table></td></tr>')
586
587        if node.state is not None:
588            prev_s = None
589            # Do diffs only when we have a unique predecessor.
590            # Don't do diffs on the leaf nodes because they're
591            # the important ones.
592            if self._do_diffs and len(node.predecessors) == 1 \
593               and len(node.successors) > 0:
594                prev_s = self._graph.nodes[node.predecessors[0]].state
595            self.visit_state(node.state, prev_s)
596        self._dump_raw('</table>>];\n')
597
598    def visit_edge(self, pred, succ):
599        self._dump_raw('%s -> %s;\n' % (pred.node_name(), succ.node_name()))
600
601    def visit_end_of_graph(self):
602        self._dump_raw('}\n')
603
604
605# A class that encapsulates traversal of the ExplodedGraph. Different explorer
606# kinds could potentially traverse specific sub-graphs.
607class Explorer(object):
608    def __init__(self):
609        super(Explorer, self).__init__()
610
611    def explore(self, graph, visitor):
612        visitor.visit_begin_graph(graph)
613        for node in sorted(graph.nodes):
614            logging.debug('Visiting ' + node)
615            visitor.visit_node(graph.nodes[node])
616            for succ in sorted(graph.nodes[node].successors):
617                logging.debug('Visiting edge: %s -> %s ' % (node, succ))
618                visitor.visit_edge(graph.nodes[node], graph.nodes[succ])
619        visitor.visit_end_of_graph()
620
621
622def main():
623    parser = argparse.ArgumentParser()
624    parser.add_argument('filename', type=str)
625    parser.add_argument('-v', '--verbose', action='store_const',
626                        dest='loglevel', const=logging.DEBUG,
627                        default=logging.WARNING,
628                        help='enable info prints')
629    parser.add_argument('-d', '--diff', action='store_const', dest='diff',
630                        const=True, default=False,
631                        help='display differences between states')
632    args = parser.parse_args()
633    logging.basicConfig(level=args.loglevel)
634
635    graph = ExplodedGraph()
636    with open(args.filename) as fd:
637        for raw_line in fd:
638            raw_line = raw_line.strip()
639            graph.add_raw_line(raw_line)
640
641    explorer = Explorer()
642    visitor = DotDumpVisitor(args.diff)
643    explorer.explore(graph, visitor)
644
645
646if __name__ == '__main__':
647    main()
648