xref: /llvm-project/clang/utils/analyzer/exploded-graph-rewriter.py (revision 0a77d9192ab75b8fcf218747d6bcd213dce1f4ce)
1#!/usr/bin/env python
2#
3#===- exploded-graph-rewriter.py - ExplodedGraph dump tool -----*- python -*--#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9#===-----------------------------------------------------------------------===#
10
11
12from __future__ import print_function
13
14import argparse
15import collections
16import json
17import logging
18import re
19
20
21# A helper function for finding the difference between two dictionaries.
22def diff_dicts(curr, prev):
23    removed = [k for k in prev if k not in curr or curr[k] != prev[k]]
24    added = [k for k in curr if k not in prev or curr[k] != prev[k]]
25    return (removed, added)
26
27
28# Represents any program state trait that is a dictionary of key-value pairs.
29class GenericMap(object):
30    def __init__(self, items):
31        self.generic_map = collections.OrderedDict(items)
32
33    def diff(self, prev):
34        return diff_dicts(self.generic_map, prev.generic_map)
35
36    def is_different(self, prev):
37        removed, added = self.diff(prev)
38        return len(removed) != 0 or len(added) != 0
39
40
41# A deserialized source location.
42class SourceLocation(object):
43    def __init__(self, json_loc):
44        super(SourceLocation, self).__init__()
45        self.line = json_loc['line']
46        self.col = json_loc['column']
47        self.filename = json_loc['filename'] \
48            if 'filename' in json_loc else '(main file)'
49
50
51# A deserialized program point.
52class ProgramPoint(object):
53    def __init__(self, json_pp):
54        super(ProgramPoint, self).__init__()
55        self.kind = json_pp['kind']
56        self.tag = json_pp['tag']
57        if self.kind == 'Edge':
58            self.src_id = json_pp['src_id']
59            self.dst_id = json_pp['dst_id']
60        elif self.kind == 'Statement':
61            self.stmt_kind = json_pp['stmt_kind']
62            self.pointer = json_pp['pointer']
63            self.pretty = json_pp['pretty']
64            self.loc = SourceLocation(json_pp['location']) \
65                if json_pp['location'] is not None else None
66        elif self.kind == 'BlockEntrance':
67            self.block_id = json_pp['block_id']
68
69
70# A single expression acting as a key in a deserialized Environment.
71class EnvironmentBindingKey(object):
72    def __init__(self, json_ek):
73        super(EnvironmentBindingKey, self).__init__()
74        # CXXCtorInitializer is not a Stmt!
75        self.stmt_id = json_ek['stmt_id'] if 'stmt_id' in json_ek \
76            else json_ek['init_id']
77        self.pretty = json_ek['pretty']
78        self.kind = json_ek['kind'] if 'kind' in json_ek else None
79
80    def _key(self):
81        return self.stmt_id
82
83    def __eq__(self, other):
84        return self._key() == other._key()
85
86    def __hash__(self):
87        return hash(self._key())
88
89
90# Deserialized description of a location context.
91class LocationContext(object):
92    def __init__(self, json_frame):
93        super(LocationContext, self).__init__()
94        self.lctx_id = json_frame['lctx_id']
95        self.caption = json_frame['location_context']
96        self.decl = json_frame['calling']
97        self.line = json_frame['call_line']
98
99    def _key(self):
100        return self.lctx_id
101
102    def __eq__(self, other):
103        return self._key() == other._key()
104
105    def __hash__(self):
106        return hash(self._key())
107
108
109# A group of deserialized Environment bindings that correspond to a specific
110# location context.
111class EnvironmentFrame(object):
112    def __init__(self, json_frame):
113        super(EnvironmentFrame, self).__init__()
114        self.location_context = LocationContext(json_frame)
115        self.bindings = collections.OrderedDict(
116            [(EnvironmentBindingKey(b),
117              b['value']) for b in json_frame['items']]
118            if json_frame['items'] is not None else [])
119
120    def diff_bindings(self, prev):
121        return diff_dicts(self.bindings, prev.bindings)
122
123    def is_different(self, prev):
124        removed, added = self.diff_bindings(prev)
125        return len(removed) != 0 or len(added) != 0
126
127
128# A deserialized Environment. This class can also hold other entities that
129# are similar to Environment, such as Objects Under Construction.
130class GenericEnvironment(object):
131    def __init__(self, json_e):
132        super(GenericEnvironment, self).__init__()
133        self.frames = [EnvironmentFrame(f) for f in json_e]
134
135    def diff_frames(self, prev):
136        # TODO: It's difficult to display a good diff when frame numbers shift.
137        if len(self.frames) != len(prev.frames):
138            return None
139
140        updated = []
141        for i in range(len(self.frames)):
142            f = self.frames[i]
143            prev_f = prev.frames[i]
144            if f.location_context == prev_f.location_context:
145                if f.is_different(prev_f):
146                    updated.append(i)
147            else:
148                # We have the whole frame replaced with another frame.
149                # TODO: Produce a nice diff.
150                return None
151
152        # TODO: Add support for added/removed.
153        return updated
154
155    def is_different(self, prev):
156        updated = self.diff_frames(prev)
157        return updated is None or len(updated) > 0
158
159
160# A single binding key in a deserialized RegionStore cluster.
161class StoreBindingKey(object):
162    def __init__(self, json_sk):
163        super(StoreBindingKey, self).__init__()
164        self.kind = json_sk['kind']
165        self.offset = json_sk['offset']
166
167    def _key(self):
168        return (self.kind, self.offset)
169
170    def __eq__(self, other):
171        return self._key() == other._key()
172
173    def __hash__(self):
174        return hash(self._key())
175
176
177# A single cluster of the deserialized RegionStore.
178class StoreCluster(object):
179    def __init__(self, json_sc):
180        super(StoreCluster, self).__init__()
181        self.base_region = json_sc['cluster']
182        self.bindings = collections.OrderedDict(
183            [(StoreBindingKey(b), b['value']) for b in json_sc['items']])
184
185    def diff_bindings(self, prev):
186        return diff_dicts(self.bindings, prev.bindings)
187
188    def is_different(self, prev):
189        removed, added = self.diff_bindings(prev)
190        return len(removed) != 0 or len(added) != 0
191
192
193# A deserialized RegionStore.
194class Store(object):
195    def __init__(self, json_s):
196        super(Store, self).__init__()
197        self.ptr = json_s['pointer']
198        self.clusters = collections.OrderedDict(
199            [(c['pointer'], StoreCluster(c)) for c in json_s['items']])
200
201    def diff_clusters(self, prev):
202        removed = [k for k in prev.clusters if k not in self.clusters]
203        added = [k for k in self.clusters if k not in prev.clusters]
204        updated = [k for k in prev.clusters if k in self.clusters
205                   and prev.clusters[k].is_different(self.clusters[k])]
206        return (removed, added, updated)
207
208    def is_different(self, prev):
209        removed, added, updated = self.diff_clusters(prev)
210        return len(removed) != 0 or len(added) != 0 or len(updated) != 0
211
212
213# A deserialized program state.
214class ProgramState(object):
215    def __init__(self, state_id, json_ps):
216        super(ProgramState, self).__init__()
217        logging.debug('Adding ProgramState ' + str(state_id))
218
219        self.state_id = state_id
220
221        self.store = Store(json_ps['store']) \
222            if json_ps['store'] is not None else None
223
224        self.environment = \
225            GenericEnvironment(json_ps['environment']['items']) \
226            if json_ps['environment'] is not None else None
227
228        self.constraints = GenericMap([
229            (c['symbol'], c['range']) for c in json_ps['constraints']
230        ]) if json_ps['constraints'] is not None else None
231
232        self.dynamic_types = GenericMap([
233                (t['region'], '%s%s' % (t['dyn_type'],
234                                        ' (or a sub-class)'
235                                        if t['sub_classable'] else ''))
236                for t in json_ps['dynamic_types']]) \
237            if json_ps['dynamic_types'] is not None else None
238
239        self.constructing_objects = \
240            GenericEnvironment(json_ps['constructing_objects']) \
241            if json_ps['constructing_objects'] is not None else None
242
243        # TODO: Checker messages.
244
245
246# A deserialized exploded graph node. Has a default constructor because it
247# may be referenced as part of an edge before its contents are deserialized,
248# and in this moment we already need a room for predecessors and successors.
249class ExplodedNode(object):
250    def __init__(self):
251        super(ExplodedNode, self).__init__()
252        self.predecessors = []
253        self.successors = []
254
255    def construct(self, node_id, json_node):
256        logging.debug('Adding ' + node_id)
257        self.node_id = json_node['node_id']
258        self.ptr = json_node['pointer']
259        self.points = [ProgramPoint(p) for p in json_node['program_points']]
260        self.state = ProgramState(json_node['state_id'],
261                                  json_node['program_state']) \
262            if json_node['program_state'] is not None else None
263
264        assert self.node_name() == node_id
265
266    def node_name(self):
267        return 'Node' + self.ptr
268
269
270# A deserialized ExplodedGraph. Constructed by consuming a .dot file
271# line-by-line.
272class ExplodedGraph(object):
273    # Parse .dot files with regular expressions.
274    node_re = re.compile(
275        '^(Node0x[0-9a-f]*) \\[shape=record,.*label="{(.*)\\\\l}"\\];$')
276    edge_re = re.compile(
277        '^(Node0x[0-9a-f]*) -> (Node0x[0-9a-f]*);$')
278
279    def __init__(self):
280        super(ExplodedGraph, self).__init__()
281        self.nodes = collections.defaultdict(ExplodedNode)
282        self.root_id = None
283        self.incomplete_line = ''
284
285    def add_raw_line(self, raw_line):
286        if raw_line.startswith('//'):
287            return
288
289        # Allow line breaks by waiting for ';'. This is not valid in
290        # a .dot file, but it is useful for writing tests.
291        if len(raw_line) > 0 and raw_line[-1] != ';':
292            self.incomplete_line += raw_line
293            return
294        raw_line = self.incomplete_line + raw_line
295        self.incomplete_line = ''
296
297        # Apply regexps one by one to see if it's a node or an edge
298        # and extract contents if necessary.
299        logging.debug('Line: ' + raw_line)
300        result = self.edge_re.match(raw_line)
301        if result is not None:
302            logging.debug('Classified as edge line.')
303            pred = result.group(1)
304            succ = result.group(2)
305            self.nodes[pred].successors.append(succ)
306            self.nodes[succ].predecessors.append(pred)
307            return
308        result = self.node_re.match(raw_line)
309        if result is not None:
310            logging.debug('Classified as node line.')
311            node_id = result.group(1)
312            if len(self.nodes) == 0:
313                self.root_id = node_id
314            # Note: when writing tests you don't need to escape everything,
315            # even though in a valid dot file everything is escaped.
316            node_label = result.group(2).replace('\\l', '') \
317                                        .replace(' ', '') \
318                                        .replace('\\"', '"') \
319                                        .replace('\\{', '{') \
320                                        .replace('\\}', '}') \
321                                        .replace('\\\\', '\\') \
322                                        .replace('\\|', '|') \
323                                        .replace('\\<', '\\\\<') \
324                                        .replace('\\>', '\\\\>') \
325                                        .rstrip(',')
326            logging.debug(node_label)
327            json_node = json.loads(node_label)
328            self.nodes[node_id].construct(node_id, json_node)
329            return
330        logging.debug('Skipping.')
331
332
333# A visitor that dumps the ExplodedGraph into a DOT file with fancy HTML-based
334# syntax highlighing.
335class DotDumpVisitor(object):
336    def __init__(self, do_diffs):
337        super(DotDumpVisitor, self).__init__()
338        self._do_diffs = do_diffs
339
340    @staticmethod
341    def _dump_raw(s):
342        print(s, end='')
343
344    @staticmethod
345    def _dump(s):
346        print(s.replace('&', '&amp;')
347               .replace('{', '\\{')
348               .replace('}', '\\}')
349               .replace('\\<', '&lt;')
350               .replace('\\>', '&gt;')
351               .replace('\\l', '<br />')
352               .replace('|', '\\|'), end='')
353
354    @staticmethod
355    def _diff_plus_minus(is_added):
356        if is_added is None:
357            return ''
358        if is_added:
359            return '<font color="forestgreen">+</font>'
360        return '<font color="red">-</font>'
361
362    def visit_begin_graph(self, graph):
363        self._graph = graph
364        self._dump_raw('digraph "ExplodedGraph" {\n')
365        self._dump_raw('label="";\n')
366
367    def visit_program_point(self, p):
368        if p.kind in ['Edge', 'BlockEntrance', 'BlockExit']:
369            color = 'gold3'
370        elif p.kind in ['PreStmtPurgeDeadSymbols',
371                        'PostStmtPurgeDeadSymbols']:
372            color = 'red'
373        elif p.kind in ['CallEnter', 'CallExitBegin', 'CallExitEnd']:
374            color = 'blue'
375        elif p.kind in ['Statement']:
376            color = 'cyan3'
377        else:
378            color = 'forestgreen'
379
380        if p.kind == 'Statement':
381            if p.loc is not None:
382                self._dump('<tr><td align="left" width="0">'
383                           '%s:<b>%s</b>:<b>%s</b>:</td>'
384                           '<td align="left" width="0"><font color="%s">'
385                           '%s</font></td><td>%s</td></tr>'
386                           % (p.loc.filename, p.loc.line,
387                              p.loc.col, color, p.stmt_kind, p.pretty))
388            else:
389                self._dump('<tr><td align="left" width="0">'
390                           '<i>Invalid Source Location</i>:</td>'
391                           '<td align="left" width="0">'
392                           '<font color="%s">%s</font></td><td>%s</td></tr>'
393                           % (color, p.stmt_kind, p.pretty))
394        elif p.kind == 'Edge':
395            self._dump('<tr><td width="0"></td>'
396                       '<td align="left" width="0">'
397                       '<font color="%s">%s</font></td><td align="left">'
398                       '[B%d] -\\> [B%d]</td></tr>'
399                       % (color, p.kind, p.src_id, p.dst_id))
400        else:
401            # TODO: Print more stuff for other kinds of points.
402            self._dump('<tr><td width="0"></td>'
403                       '<td align="left" width="0" colspan="2">'
404                       '<font color="%s">%s</font></td></tr>'
405                       % (color, p.kind))
406
407        if p.tag is not None:
408            self._dump('<tr><td width="0"></td>'
409                       '<td colspan="2" align="left">'
410                       '<b>Tag: </b> <font color="crimson">'
411                       '%s</font></td></tr>' % p.tag)
412
413    def visit_environment(self, e, prev_e=None):
414        self._dump('<table border="0">')
415
416        def dump_location_context(lc, is_added=None):
417            self._dump('<tr><td>%s</td>'
418                       '<td align="left"><b>%s</b></td>'
419                       '<td align="left" colspan="2">'
420                       '<font color="grey60">%s </font>'
421                       '%s</td></tr>'
422                       % (self._diff_plus_minus(is_added),
423                          lc.caption, lc.decl,
424                          ('(line %s)' % lc.line) if lc.line is not None
425                          else ''))
426
427        def dump_binding(f, b, is_added=None):
428            self._dump('<tr><td>%s</td>'
429                       '<td align="left"><i>S%s</i></td>'
430                       '%s'
431                       '<td align="left">%s</td>'
432                       '<td align="left">%s</td></tr>'
433                       % (self._diff_plus_minus(is_added),
434                          b.stmt_id,
435                          '<td align="left"><font color="darkgreen"><i>'
436                          '(%s)</i></font></td>' % b.kind
437                          if b.kind is not None else '',
438                          b.pretty, f.bindings[b]))
439
440        frames_updated = e.diff_frames(prev_e) if prev_e is not None else None
441        if frames_updated:
442            for i in frames_updated:
443                f = e.frames[i]
444                prev_f = prev_e.frames[i]
445                dump_location_context(f.location_context)
446                bindings_removed, bindings_added = f.diff_bindings(prev_f)
447                for b in bindings_removed:
448                    dump_binding(prev_f, b, False)
449                for b in bindings_added:
450                    dump_binding(f, b, True)
451        else:
452            for f in e.frames:
453                dump_location_context(f.location_context)
454                for b in f.bindings:
455                    dump_binding(f, b)
456
457        self._dump('</table>')
458
459    def visit_environment_in_state(self, selector, title, s, prev_s=None):
460        e = getattr(s, selector)
461        prev_e = getattr(prev_s, selector) if prev_s is not None else None
462        if e is None and prev_e is None:
463            return
464
465        self._dump('<hr /><tr><td align="left"><b>%s: </b>' % title)
466        if e is None:
467            self._dump('<i> Nothing!</i>')
468        else:
469            if prev_e is not None:
470                if e.is_different(prev_e):
471                    self._dump('</td></tr><tr><td align="left">')
472                    self.visit_environment(e, prev_e)
473                else:
474                    self._dump('<i> No changes!</i>')
475            else:
476                self._dump('</td></tr><tr><td align="left">')
477                self.visit_environment(e)
478
479        self._dump('</td></tr>')
480
481    def visit_store(self, s, prev_s=None):
482        self._dump('<table border="0">')
483
484        def dump_binding(s, c, b, is_added=None):
485            self._dump('<tr><td>%s</td>'
486                       '<td align="left">%s</td>'
487                       '<td align="left">%s</td>'
488                       '<td align="left">%s</td>'
489                       '<td align="left">%s</td></tr>'
490                       % (self._diff_plus_minus(is_added),
491                          s.clusters[c].base_region, b.offset,
492                          '(<i>Default</i>)' if b.kind == 'Default'
493                          else '',
494                          s.clusters[c].bindings[b]))
495
496        if prev_s is not None:
497            clusters_removed, clusters_added, clusters_updated = \
498                s.diff_clusters(prev_s)
499            for c in clusters_removed:
500                for b in prev_s.clusters[c].bindings:
501                    dump_binding(prev_s, c, b, False)
502            for c in clusters_updated:
503                bindings_removed, bindings_added = \
504                    s.clusters[c].diff_bindings(prev_s.clusters[c])
505                for b in bindings_removed:
506                    dump_binding(prev_s, c, b, False)
507                for b in bindings_added:
508                    dump_binding(s, c, b, True)
509            for c in clusters_added:
510                for b in s.clusters[c].bindings:
511                    dump_binding(s, c, b, True)
512        else:
513            for c in s.clusters:
514                for b in s.clusters[c].bindings:
515                    dump_binding(s, c, b)
516
517        self._dump('</table>')
518
519    def visit_store_in_state(self, s, prev_s=None):
520        st = s.store
521        prev_st = prev_s.store if prev_s is not None else None
522        if st is None and prev_st is None:
523            return
524
525        self._dump('<hr /><tr><td align="left"><b>Store: </b>')
526        if st is None:
527            self._dump('<i> Nothing!</i>')
528        else:
529            if prev_st is not None:
530                if s.store.is_different(prev_st):
531                    self._dump('</td></tr><tr><td align="left">')
532                    self.visit_store(st, prev_st)
533                else:
534                    self._dump('<i> No changes!</i>')
535            else:
536                self._dump('</td></tr><tr><td align="left">')
537                self.visit_store(st)
538        self._dump('</td></tr>')
539
540    def visit_generic_map(self, m, prev_m=None):
541        self._dump('<table border="0">')
542
543        def dump_pair(m, k, is_added=None):
544            self._dump('<tr><td>%s</td>'
545                       '<td align="left">%s</td>'
546                       '<td align="left">%s</td></tr>'
547                       % (self._diff_plus_minus(is_added),
548                          k, m.generic_map[k]))
549
550        if prev_m is not None:
551            removed, added = m.diff(prev_m)
552            for k in removed:
553                dump_pair(prev_m, k, False)
554            for k in added:
555                dump_pair(m, k, True)
556        else:
557            for k in m.generic_map:
558                dump_pair(m, k, None)
559
560        self._dump('</table>')
561
562    def visit_generic_map_in_state(self, selector, title, s, prev_s=None):
563        m = getattr(s, selector)
564        prev_m = getattr(prev_s, selector) if prev_s is not None else None
565        if m is None and prev_m is None:
566            return
567
568        self._dump('<hr />')
569        self._dump('<tr><td align="left">'
570                   '<b>%s: </b>' % title)
571        if m is None:
572            self._dump('<i> Nothing!</i>')
573        else:
574            if prev_s is not None:
575                if prev_m is not None:
576                    if m.is_different(prev_m):
577                        self._dump('</td></tr><tr><td align="left">')
578                        self.visit_generic_map(m, prev_m)
579                    else:
580                        self._dump('<i> No changes!</i>')
581            if prev_m is None:
582                self._dump('</td></tr><tr><td align="left">')
583                self.visit_generic_map(m)
584        self._dump('</td></tr>')
585
586    def visit_state(self, s, prev_s):
587        self.visit_store_in_state(s, prev_s)
588        self.visit_environment_in_state('environment', 'Environment',
589                                        s, prev_s)
590        self.visit_generic_map_in_state('constraints', 'Ranges',
591                                        s, prev_s)
592        self.visit_generic_map_in_state('dynamic_types', 'Dynamic Types',
593                                        s, prev_s)
594        self.visit_environment_in_state('constructing_objects',
595                                        'Objects Under Construction',
596                                        s, prev_s)
597
598    def visit_node(self, node):
599        self._dump('%s [shape=record,label=<<table border="0">'
600                   % (node.node_name()))
601
602        self._dump('<tr><td bgcolor="grey"><b>Node %d (%s) - '
603                   'State %s</b></td></tr>'
604                   % (node.node_id, node.ptr, node.state.state_id
605                      if node.state is not None else 'Unspecified'))
606        self._dump('<tr><td align="left" width="0">')
607        if len(node.points) > 1:
608            self._dump('<b>Program points:</b></td></tr>')
609        else:
610            self._dump('<b>Program point:</b></td></tr>')
611        self._dump('<tr><td align="left" width="0">'
612                   '<table border="0" align="left" width="0">')
613        for p in node.points:
614            self.visit_program_point(p)
615        self._dump('</table></td></tr>')
616
617        if node.state is not None:
618            prev_s = None
619            # Do diffs only when we have a unique predecessor.
620            # Don't do diffs on the leaf nodes because they're
621            # the important ones.
622            if self._do_diffs and len(node.predecessors) == 1 \
623               and len(node.successors) > 0:
624                prev_s = self._graph.nodes[node.predecessors[0]].state
625            self.visit_state(node.state, prev_s)
626        self._dump_raw('</table>>];\n')
627
628    def visit_edge(self, pred, succ):
629        self._dump_raw('%s -> %s;\n' % (pred.node_name(), succ.node_name()))
630
631    def visit_end_of_graph(self):
632        self._dump_raw('}\n')
633
634
635# A class that encapsulates traversal of the ExplodedGraph. Different explorer
636# kinds could potentially traverse specific sub-graphs.
637class Explorer(object):
638    def __init__(self):
639        super(Explorer, self).__init__()
640
641    def explore(self, graph, visitor):
642        visitor.visit_begin_graph(graph)
643        for node in sorted(graph.nodes):
644            logging.debug('Visiting ' + node)
645            visitor.visit_node(graph.nodes[node])
646            for succ in sorted(graph.nodes[node].successors):
647                logging.debug('Visiting edge: %s -> %s ' % (node, succ))
648                visitor.visit_edge(graph.nodes[node], graph.nodes[succ])
649        visitor.visit_end_of_graph()
650
651
652def main():
653    parser = argparse.ArgumentParser()
654    parser.add_argument('filename', type=str)
655    parser.add_argument('-v', '--verbose', action='store_const',
656                        dest='loglevel', const=logging.DEBUG,
657                        default=logging.WARNING,
658                        help='enable info prints')
659    parser.add_argument('-d', '--diff', action='store_const', dest='diff',
660                        const=True, default=False,
661                        help='display differences between states')
662    args = parser.parse_args()
663    logging.basicConfig(level=args.loglevel)
664
665    graph = ExplodedGraph()
666    with open(args.filename) as fd:
667        for raw_line in fd:
668            raw_line = raw_line.strip()
669            graph.add_raw_line(raw_line)
670
671    explorer = Explorer()
672    visitor = DotDumpVisitor(args.diff)
673    explorer.explore(graph, visitor)
674
675
676if __name__ == '__main__':
677    main()
678