xref: /llvm-project/clang/utils/analyzer/exploded-graph-rewriter.py (revision c6921379f55ee566fb62ba5aa47b217cf7c5d960)
1#!/usr/bin/env python
2#
3#===- exploded-graph-rewriter.py - ExplodedGraph dump tool -----*- python -*--#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9#===-----------------------------------------------------------------------===#
10
11
12from __future__ import print_function
13
14import argparse
15import collections
16import difflib
17import json
18import logging
19import os
20import re
21
22
23#===-----------------------------------------------------------------------===#
24# These data structures represent a deserialized ExplodedGraph.
25#===-----------------------------------------------------------------------===#
26
27
28# A helper function for finding the difference between two dictionaries.
29def diff_dicts(curr, prev):
30    removed = [k for k in prev if k not in curr or curr[k] != prev[k]]
31    added = [k for k in curr if k not in prev or curr[k] != prev[k]]
32    return (removed, added)
33
34
35# Represents any program state trait that is a dictionary of key-value pairs.
36class GenericMap(object):
37    def __init__(self, items):
38        self.generic_map = collections.OrderedDict(items)
39
40    def diff(self, prev):
41        return diff_dicts(self.generic_map, prev.generic_map)
42
43    def is_different(self, prev):
44        removed, added = self.diff(prev)
45        return len(removed) != 0 or len(added) != 0
46
47
48# A deserialized source location.
49class SourceLocation(object):
50    def __init__(self, json_loc):
51        super(SourceLocation, self).__init__()
52        logging.debug('json: %s' % json_loc)
53        self.line = json_loc['line']
54        self.col = json_loc['column']
55        self.filename = os.path.basename(json_loc['file']) \
56            if 'file' in json_loc else '(main file)'
57        self.spelling = SourceLocation(json_loc['spelling']) \
58            if 'spelling' in json_loc else None
59
60    def is_macro(self):
61        return self.spelling is not None
62
63
64# A deserialized program point.
65class ProgramPoint(object):
66    def __init__(self, json_pp):
67        super(ProgramPoint, self).__init__()
68        self.kind = json_pp['kind']
69        self.tag = json_pp['tag']
70        self.node_id = json_pp['node_id']
71        self.is_sink = bool(json_pp['is_sink'])
72        self.has_report = bool(json_pp['has_report'])
73        if self.kind == 'Edge':
74            self.src_id = json_pp['src_id']
75            self.dst_id = json_pp['dst_id']
76        elif self.kind == 'Statement':
77            logging.debug(json_pp)
78            self.stmt_kind = json_pp['stmt_kind']
79            self.cast_kind = json_pp['cast_kind'] \
80                if 'cast_kind' in json_pp else None
81            self.stmt_point_kind = json_pp['stmt_point_kind']
82            self.stmt_id = json_pp['stmt_id']
83            self.pointer = json_pp['pointer']
84            self.pretty = json_pp['pretty']
85            self.loc = SourceLocation(json_pp['location']) \
86                if json_pp['location'] is not None else None
87        elif self.kind == 'BlockEntrance':
88            self.block_id = json_pp['block_id']
89
90
91# A single expression acting as a key in a deserialized Environment.
92class EnvironmentBindingKey(object):
93    def __init__(self, json_ek):
94        super(EnvironmentBindingKey, self).__init__()
95        # CXXCtorInitializer is not a Stmt!
96        self.stmt_id = json_ek['stmt_id'] if 'stmt_id' in json_ek \
97            else json_ek['init_id']
98        self.pretty = json_ek['pretty']
99        self.kind = json_ek['kind'] if 'kind' in json_ek else None
100
101    def _key(self):
102        return self.stmt_id
103
104    def __eq__(self, other):
105        return self._key() == other._key()
106
107    def __hash__(self):
108        return hash(self._key())
109
110
111# Deserialized description of a location context.
112class LocationContext(object):
113    def __init__(self, json_frame):
114        super(LocationContext, self).__init__()
115        self.lctx_id = json_frame['lctx_id']
116        self.caption = json_frame['location_context']
117        self.decl = json_frame['calling']
118        self.loc = SourceLocation(json_frame['location']) \
119            if json_frame['location'] is not None else None
120
121    def _key(self):
122        return self.lctx_id
123
124    def __eq__(self, other):
125        return self._key() == other._key()
126
127    def __hash__(self):
128        return hash(self._key())
129
130
131# A group of deserialized Environment bindings that correspond to a specific
132# location context.
133class EnvironmentFrame(object):
134    def __init__(self, json_frame):
135        super(EnvironmentFrame, self).__init__()
136        self.location_context = LocationContext(json_frame)
137        self.bindings = collections.OrderedDict(
138            [(EnvironmentBindingKey(b),
139              b['value']) for b in json_frame['items']]
140            if json_frame['items'] is not None else [])
141
142    def diff_bindings(self, prev):
143        return diff_dicts(self.bindings, prev.bindings)
144
145    def is_different(self, prev):
146        removed, added = self.diff_bindings(prev)
147        return len(removed) != 0 or len(added) != 0
148
149
150# A deserialized Environment. This class can also hold other entities that
151# are similar to Environment, such as Objects Under Construction.
152class GenericEnvironment(object):
153    def __init__(self, json_e):
154        super(GenericEnvironment, self).__init__()
155        self.frames = [EnvironmentFrame(f) for f in json_e]
156
157    def diff_frames(self, prev):
158        # TODO: It's difficult to display a good diff when frame numbers shift.
159        if len(self.frames) != len(prev.frames):
160            return None
161
162        updated = []
163        for i in range(len(self.frames)):
164            f = self.frames[i]
165            prev_f = prev.frames[i]
166            if f.location_context == prev_f.location_context:
167                if f.is_different(prev_f):
168                    updated.append(i)
169            else:
170                # We have the whole frame replaced with another frame.
171                # TODO: Produce a nice diff.
172                return None
173
174        # TODO: Add support for added/removed.
175        return updated
176
177    def is_different(self, prev):
178        updated = self.diff_frames(prev)
179        return updated is None or len(updated) > 0
180
181
182# A single binding key in a deserialized RegionStore cluster.
183class StoreBindingKey(object):
184    def __init__(self, json_sk):
185        super(StoreBindingKey, self).__init__()
186        self.kind = json_sk['kind']
187        self.offset = json_sk['offset']
188
189    def _key(self):
190        return (self.kind, self.offset)
191
192    def __eq__(self, other):
193        return self._key() == other._key()
194
195    def __hash__(self):
196        return hash(self._key())
197
198
199# A single cluster of the deserialized RegionStore.
200class StoreCluster(object):
201    def __init__(self, json_sc):
202        super(StoreCluster, self).__init__()
203        self.base_region = json_sc['cluster']
204        self.bindings = collections.OrderedDict(
205            [(StoreBindingKey(b), b['value']) for b in json_sc['items']])
206
207    def diff_bindings(self, prev):
208        return diff_dicts(self.bindings, prev.bindings)
209
210    def is_different(self, prev):
211        removed, added = self.diff_bindings(prev)
212        return len(removed) != 0 or len(added) != 0
213
214
215# A deserialized RegionStore.
216class Store(object):
217    def __init__(self, json_s):
218        super(Store, self).__init__()
219        self.ptr = json_s['pointer']
220        self.clusters = collections.OrderedDict(
221            [(c['pointer'], StoreCluster(c)) for c in json_s['items']])
222
223    def diff_clusters(self, prev):
224        removed = [k for k in prev.clusters if k not in self.clusters]
225        added = [k for k in self.clusters if k not in prev.clusters]
226        updated = [k for k in prev.clusters if k in self.clusters
227                   and prev.clusters[k].is_different(self.clusters[k])]
228        return (removed, added, updated)
229
230    def is_different(self, prev):
231        removed, added, updated = self.diff_clusters(prev)
232        return len(removed) != 0 or len(added) != 0 or len(updated) != 0
233
234
235# Deserialized messages from a single checker in a single program state.
236# Basically a list of raw strings.
237class CheckerLines(object):
238    def __init__(self, json_lines):
239        super(CheckerLines, self).__init__()
240        self.lines = json_lines
241
242    def diff_lines(self, prev):
243        lines = difflib.ndiff(prev.lines, self.lines)
244        return [l.strip() for l in lines
245                if l.startswith('+') or l.startswith('-')]
246
247    def is_different(self, prev):
248        return len(self.diff_lines(prev)) > 0
249
250
251# Deserialized messages of all checkers, separated by checker.
252class CheckerMessages(object):
253    def __init__(self, json_m):
254        super(CheckerMessages, self).__init__()
255        self.items = collections.OrderedDict(
256            [(m['checker'], CheckerLines(m['messages'])) for m in json_m])
257
258    def diff_messages(self, prev):
259        removed = [k for k in prev.items if k not in self.items]
260        added = [k for k in self.items if k not in prev.items]
261        updated = [k for k in prev.items if k in self.items
262                   and prev.items[k].is_different(self.items[k])]
263        return (removed, added, updated)
264
265    def is_different(self, prev):
266        removed, added, updated = self.diff_messages(prev)
267        return len(removed) != 0 or len(added) != 0 or len(updated) != 0
268
269
270# A deserialized program state.
271class ProgramState(object):
272    def __init__(self, state_id, json_ps):
273        super(ProgramState, self).__init__()
274        logging.debug('Adding ProgramState ' + str(state_id))
275
276        if json_ps is None:
277            json_ps = {
278                'store': None,
279                'environment': None,
280                'constraints': None,
281                'dynamic_types': None,
282                'constructing_objects': None,
283                'checker_messages': None
284            }
285
286        self.state_id = state_id
287
288        self.store = Store(json_ps['store']) \
289            if json_ps['store'] is not None else None
290
291        self.environment = \
292            GenericEnvironment(json_ps['environment']['items']) \
293            if json_ps['environment'] is not None else None
294
295        self.constraints = GenericMap([
296            (c['symbol'], c['range']) for c in json_ps['constraints']
297        ]) if json_ps['constraints'] is not None else None
298
299        self.dynamic_types = GenericMap([
300                (t['region'], '%s%s' % (t['dyn_type'],
301                                        ' (or a sub-class)'
302                                        if t['sub_classable'] else ''))
303                for t in json_ps['dynamic_types']]) \
304            if json_ps['dynamic_types'] is not None else None
305
306        self.constructing_objects = \
307            GenericEnvironment(json_ps['constructing_objects']) \
308            if json_ps['constructing_objects'] is not None else None
309
310        self.checker_messages = CheckerMessages(json_ps['checker_messages']) \
311            if json_ps['checker_messages'] is not None else None
312
313
314# A deserialized exploded graph node. Has a default constructor because it
315# may be referenced as part of an edge before its contents are deserialized,
316# and in this moment we already need a room for predecessors and successors.
317class ExplodedNode(object):
318    def __init__(self):
319        super(ExplodedNode, self).__init__()
320        self.predecessors = []
321        self.successors = []
322
323    def construct(self, node_id, json_node):
324        logging.debug('Adding ' + node_id)
325        self.ptr = node_id[4:]
326        self.points = [ProgramPoint(p) for p in json_node['program_points']]
327        self.node_id = self.points[-1].node_id
328        self.state = ProgramState(json_node['state_id'],
329                                  json_node['program_state']
330            if json_node['program_state'] is not None else None);
331
332        assert self.node_name() == node_id
333
334    def node_name(self):
335        return 'Node' + self.ptr
336
337
338# A deserialized ExplodedGraph. Constructed by consuming a .dot file
339# line-by-line.
340class ExplodedGraph(object):
341    # Parse .dot files with regular expressions.
342    node_re = re.compile(
343        '^(Node0x[0-9a-f]*) \\[shape=record,.*label="{(.*)\\\\l}"\\];$')
344    edge_re = re.compile(
345        '^(Node0x[0-9a-f]*) -> (Node0x[0-9a-f]*);$')
346
347    def __init__(self):
348        super(ExplodedGraph, self).__init__()
349        self.nodes = collections.defaultdict(ExplodedNode)
350        self.root_id = None
351        self.incomplete_line = ''
352
353    def add_raw_line(self, raw_line):
354        if raw_line.startswith('//'):
355            return
356
357        # Allow line breaks by waiting for ';'. This is not valid in
358        # a .dot file, but it is useful for writing tests.
359        if len(raw_line) > 0 and raw_line[-1] != ';':
360            self.incomplete_line += raw_line
361            return
362        raw_line = self.incomplete_line + raw_line
363        self.incomplete_line = ''
364
365        # Apply regexps one by one to see if it's a node or an edge
366        # and extract contents if necessary.
367        logging.debug('Line: ' + raw_line)
368        result = self.edge_re.match(raw_line)
369        if result is not None:
370            logging.debug('Classified as edge line.')
371            pred = result.group(1)
372            succ = result.group(2)
373            self.nodes[pred].successors.append(succ)
374            self.nodes[succ].predecessors.append(pred)
375            return
376        result = self.node_re.match(raw_line)
377        if result is not None:
378            logging.debug('Classified as node line.')
379            node_id = result.group(1)
380            if len(self.nodes) == 0:
381                self.root_id = node_id
382            # Note: when writing tests you don't need to escape everything,
383            # even though in a valid dot file everything is escaped.
384            node_label = result.group(2).replace('\\l', '') \
385                                        .replace(' ', '') \
386                                        .replace('\\"', '"') \
387                                        .replace('\\{', '{') \
388                                        .replace('\\}', '}') \
389                                        .replace('\\\\', '\\') \
390                                        .replace('\\|', '|') \
391                                        .replace('\\<', '\\\\<') \
392                                        .replace('\\>', '\\\\>') \
393                                        .rstrip(',')
394            logging.debug(node_label)
395            json_node = json.loads(node_label)
396            self.nodes[node_id].construct(node_id, json_node)
397            return
398        logging.debug('Skipping.')
399
400
401#===-----------------------------------------------------------------------===#
402# Visitors traverse a deserialized ExplodedGraph and do different things
403# with every node and edge.
404#===-----------------------------------------------------------------------===#
405
406
407# A visitor that dumps the ExplodedGraph into a DOT file with fancy HTML-based
408# syntax highlighing.
409class DotDumpVisitor(object):
410    def __init__(self, do_diffs, dark_mode, gray_mode,
411                 topo_mode, dump_dot_only):
412        super(DotDumpVisitor, self).__init__()
413        self._do_diffs = do_diffs
414        self._dark_mode = dark_mode
415        self._gray_mode = gray_mode
416        self._topo_mode = topo_mode
417        self._dump_dot_only = dump_dot_only
418        self._output = []
419
420    def _dump_raw(self, s):
421        if self._dump_dot_only:
422            print(s, end='')
423        else:
424            self._output.append(s)
425
426    def output(self):
427        assert not self._dump_dot_only
428        return ''.join(self._output)
429
430    def _dump(self, s):
431        s = s.replace('&', '&amp;') \
432             .replace('{', '\\{') \
433             .replace('}', '\\}') \
434             .replace('\\<', '&lt;') \
435             .replace('\\>', '&gt;') \
436             .replace('\\l', '<br />') \
437             .replace('|', '\\|')
438        if self._gray_mode:
439            s = re.sub(r'<font color="[a-z0-9]*">', '', s)
440            s = re.sub(r'</font>', '', s)
441        self._dump_raw(s)
442
443    @staticmethod
444    def _diff_plus_minus(is_added):
445        if is_added is None:
446            return ''
447        if is_added:
448            return '<font color="forestgreen">+</font>'
449        return '<font color="red">-</font>'
450
451    @staticmethod
452    def _short_pretty(s):
453        if s is None:
454            return None
455        if len(s) < 20:
456            return s
457        left = s.find('{')
458        right = s.rfind('}')
459        if left == -1 or right == -1 or left >= right:
460            return s
461        candidate = s[0:left + 1] + ' ... ' + s[right:]
462        if len(candidate) >= len(s):
463            return s
464        return candidate
465
466    @staticmethod
467    def _make_sloc(loc):
468        if loc is None:
469            return '<i>Invalid Source Location</i>'
470
471        def make_plain_loc(loc):
472            return '%s:<b>%s</b>:<b>%s</b>' \
473                % (loc.filename, loc.line, loc.col)
474
475        if loc.is_macro():
476            return '%s <font color="royalblue1">' \
477                   '(<i>spelling at </i> %s)</font>' \
478                % (make_plain_loc(loc), make_plain_loc(loc.spelling))
479
480        return make_plain_loc(loc)
481
482    def visit_begin_graph(self, graph):
483        self._graph = graph
484        self._dump_raw('digraph "ExplodedGraph" {\n')
485        if self._dark_mode:
486            self._dump_raw('bgcolor="gray10";\n')
487        self._dump_raw('label="";\n')
488
489    def visit_program_point(self, p):
490        if p.kind in ['Edge', 'BlockEntrance', 'BlockExit']:
491            color = 'gold3'
492        elif p.kind in ['PreStmtPurgeDeadSymbols',
493                        'PostStmtPurgeDeadSymbols']:
494            color = 'red'
495        elif p.kind in ['CallEnter', 'CallExitBegin', 'CallExitEnd']:
496            color = 'dodgerblue' if self._dark_mode else 'blue'
497        elif p.kind in ['Statement']:
498            color = 'cyan4'
499        else:
500            color = 'forestgreen'
501
502        self._dump('<tr><td align="left">%s.</td>' % p.node_id)
503
504        if p.kind == 'Statement':
505            # This avoids pretty-printing huge statements such as CompoundStmt.
506            # Such statements show up only at [Pre|Post]StmtPurgeDeadSymbols
507            skip_pretty = 'PurgeDeadSymbols' in p.stmt_point_kind
508            stmt_color = 'cyan3'
509            self._dump('<td align="left" width="0">%s:</td>'
510                       '<td align="left" width="0"><font color="%s">'
511                       '%s</font> </td>'
512                       '<td align="left"><i>S%s</i></td>'
513                       '<td align="left"><font color="%s">%s</font></td>'
514                       '<td align="left">%s</td></tr>'
515                       % (self._make_sloc(p.loc), color,
516                          '%s (%s)' % (p.stmt_kind, p.cast_kind)
517                          if p.cast_kind is not None else p.stmt_kind,
518                          p.stmt_id, stmt_color, p.stmt_point_kind,
519                          self._short_pretty(p.pretty)
520                          if not skip_pretty else ''))
521        elif p.kind == 'Edge':
522            self._dump('<td width="0"></td>'
523                       '<td align="left" width="0">'
524                       '<font color="%s">%s</font></td><td align="left">'
525                       '[B%d] -\\> [B%d]</td></tr>'
526                       % (color, 'BlockEdge', p.src_id, p.dst_id))
527        elif p.kind == 'BlockEntrance':
528            self._dump('<td width="0"></td>'
529                       '<td align="left" width="0">'
530                       '<font color="%s">%s</font></td>'
531                       '<td align="left">[B%d]</td></tr>'
532                       % (color, p.kind, p.block_id))
533        else:
534            # TODO: Print more stuff for other kinds of points.
535            self._dump('<td width="0"></td>'
536                       '<td align="left" width="0" colspan="2">'
537                       '<font color="%s">%s</font></td></tr>'
538                       % (color, p.kind))
539
540        if p.tag is not None:
541            self._dump('<tr><td width="0"></td><td width="0"></td>'
542                       '<td colspan="3" align="left">'
543                       '<b>Tag: </b> <font color="crimson">'
544                       '%s</font></td></tr>' % p.tag)
545
546        if p.has_report:
547            self._dump('<tr><td width="0"></td><td width="0"></td>'
548                       '<td colspan="3" align="left">'
549                       '<font color="red"><b>Bug Report Attached'
550                       '</b></font></td></tr>')
551        if p.is_sink:
552            self._dump('<tr><td width="0"></td><td width="0"></td>'
553                       '<td colspan="3" align="left">'
554                       '<font color="cornflowerblue"><b>Sink Node'
555                       '</b></font></td></tr>')
556
557    def visit_environment(self, e, prev_e=None):
558        self._dump('<table border="0">')
559
560        def dump_location_context(lc, is_added=None):
561            self._dump('<tr><td>%s</td>'
562                       '<td align="left"><b>%s</b></td>'
563                       '<td align="left" colspan="2">'
564                       '<font color="gray60">%s </font>'
565                       '%s</td></tr>'
566                       % (self._diff_plus_minus(is_added),
567                          lc.caption, lc.decl,
568                          ('(%s)' % self._make_sloc(lc.loc))
569                          if lc.loc is not None else ''))
570
571        def dump_binding(f, b, is_added=None):
572            self._dump('<tr><td>%s</td>'
573                       '<td align="left"><i>S%s</i></td>'
574                       '%s'
575                       '<td align="left">%s</td>'
576                       '<td align="left">%s</td></tr>'
577                       % (self._diff_plus_minus(is_added),
578                          b.stmt_id,
579                          '<td align="left"><font color="%s"><i>'
580                          '%s</i></font></td>' % (
581                              'lavender' if self._dark_mode else 'darkgreen',
582                              ('(%s)' % b.kind) if b.kind is not None else ' '
583                          ),
584                          self._short_pretty(b.pretty), f.bindings[b]))
585
586        frames_updated = e.diff_frames(prev_e) if prev_e is not None else None
587        if frames_updated:
588            for i in frames_updated:
589                f = e.frames[i]
590                prev_f = prev_e.frames[i]
591                dump_location_context(f.location_context)
592                bindings_removed, bindings_added = f.diff_bindings(prev_f)
593                for b in bindings_removed:
594                    dump_binding(prev_f, b, False)
595                for b in bindings_added:
596                    dump_binding(f, b, True)
597        else:
598            for f in e.frames:
599                dump_location_context(f.location_context)
600                for b in f.bindings:
601                    dump_binding(f, b)
602
603        self._dump('</table>')
604
605    def visit_environment_in_state(self, selector, title, s, prev_s=None):
606        e = getattr(s, selector)
607        prev_e = getattr(prev_s, selector) if prev_s is not None else None
608        if e is None and prev_e is None:
609            return
610
611        self._dump('<hr /><tr><td align="left"><b>%s: </b>' % title)
612        if e is None:
613            self._dump('<i> Nothing!</i>')
614        else:
615            if prev_e is not None:
616                if e.is_different(prev_e):
617                    self._dump('</td></tr><tr><td align="left">')
618                    self.visit_environment(e, prev_e)
619                else:
620                    self._dump('<i> No changes!</i>')
621            else:
622                self._dump('</td></tr><tr><td align="left">')
623                self.visit_environment(e)
624
625        self._dump('</td></tr>')
626
627    def visit_store(self, s, prev_s=None):
628        self._dump('<table border="0">')
629
630        def dump_binding(s, c, b, is_added=None):
631            self._dump('<tr><td>%s</td>'
632                       '<td align="left">%s</td>'
633                       '<td align="left">%s</td>'
634                       '<td align="left">%s</td>'
635                       '<td align="left">%s</td></tr>'
636                       % (self._diff_plus_minus(is_added),
637                          s.clusters[c].base_region, b.offset,
638                          '(<i>Default</i>)' if b.kind == 'Default'
639                          else '',
640                          s.clusters[c].bindings[b]))
641
642        if prev_s is not None:
643            clusters_removed, clusters_added, clusters_updated = \
644                s.diff_clusters(prev_s)
645            for c in clusters_removed:
646                for b in prev_s.clusters[c].bindings:
647                    dump_binding(prev_s, c, b, False)
648            for c in clusters_updated:
649                bindings_removed, bindings_added = \
650                    s.clusters[c].diff_bindings(prev_s.clusters[c])
651                for b in bindings_removed:
652                    dump_binding(prev_s, c, b, False)
653                for b in bindings_added:
654                    dump_binding(s, c, b, True)
655            for c in clusters_added:
656                for b in s.clusters[c].bindings:
657                    dump_binding(s, c, b, True)
658        else:
659            for c in s.clusters:
660                for b in s.clusters[c].bindings:
661                    dump_binding(s, c, b)
662
663        self._dump('</table>')
664
665    def visit_store_in_state(self, s, prev_s=None):
666        st = s.store
667        prev_st = prev_s.store if prev_s is not None else None
668        if st is None and prev_st is None:
669            return
670
671        self._dump('<hr /><tr><td align="left"><b>Store: </b>')
672        if st is None:
673            self._dump('<i> Nothing!</i>')
674        else:
675            if self._dark_mode:
676                self._dump(' <font color="gray30">(%s)</font>' % st.ptr)
677            else:
678                self._dump(' <font color="gray">(%s)</font>' % st.ptr)
679            if prev_st is not None:
680                if s.store.is_different(prev_st):
681                    self._dump('</td></tr><tr><td align="left">')
682                    self.visit_store(st, prev_st)
683                else:
684                    self._dump('<i> No changes!</i>')
685            else:
686                self._dump('</td></tr><tr><td align="left">')
687                self.visit_store(st)
688        self._dump('</td></tr>')
689
690    def visit_generic_map(self, m, prev_m=None):
691        self._dump('<table border="0">')
692
693        def dump_pair(m, k, is_added=None):
694            self._dump('<tr><td>%s</td>'
695                       '<td align="left">%s</td>'
696                       '<td align="left">%s</td></tr>'
697                       % (self._diff_plus_minus(is_added),
698                          k, m.generic_map[k]))
699
700        if prev_m is not None:
701            removed, added = m.diff(prev_m)
702            for k in removed:
703                dump_pair(prev_m, k, False)
704            for k in added:
705                dump_pair(m, k, True)
706        else:
707            for k in m.generic_map:
708                dump_pair(m, k, None)
709
710        self._dump('</table>')
711
712    def visit_generic_map_in_state(self, selector, title, s, prev_s=None):
713        m = getattr(s, selector)
714        prev_m = getattr(prev_s, selector) if prev_s is not None else None
715        if m is None and prev_m is None:
716            return
717
718        self._dump('<hr />')
719        self._dump('<tr><td align="left">'
720                   '<b>%s: </b>' % title)
721        if m is None:
722            self._dump('<i> Nothing!</i>')
723        else:
724            if prev_m is not None:
725                if m.is_different(prev_m):
726                    self._dump('</td></tr><tr><td align="left">')
727                    self.visit_generic_map(m, prev_m)
728                else:
729                    self._dump('<i> No changes!</i>')
730            else:
731                self._dump('</td></tr><tr><td align="left">')
732                self.visit_generic_map(m)
733
734        self._dump('</td></tr>')
735
736    def visit_checker_messages(self, m, prev_m=None):
737        self._dump('<table border="0">')
738
739        def dump_line(l, is_added=None):
740            self._dump('<tr><td>%s</td>'
741                       '<td align="left">%s</td></tr>'
742                       % (self._diff_plus_minus(is_added), l))
743
744        def dump_chk(chk, is_added=None):
745            dump_line('<i>%s</i>:' % chk, is_added)
746
747        if prev_m is not None:
748            removed, added, updated = m.diff_messages(prev_m)
749            for chk in removed:
750                dump_chk(chk, False)
751                for l in prev_m.items[chk].lines:
752                    dump_line(l, False)
753            for chk in updated:
754                dump_chk(chk)
755                for l in m.items[chk].diff_lines(prev_m.items[chk]):
756                    dump_line(l[1:], l.startswith('+'))
757            for chk in added:
758                dump_chk(chk, True)
759                for l in m.items[chk].lines:
760                    dump_line(l, True)
761        else:
762            for chk in m.items:
763                dump_chk(chk)
764                for l in m.items[chk].lines:
765                    dump_line(l)
766
767        self._dump('</table>')
768
769    def visit_checker_messages_in_state(self, s, prev_s=None):
770        m = s.checker_messages
771        prev_m = prev_s.checker_messages if prev_s is not None else None
772        if m is None and prev_m is None:
773            return
774
775        self._dump('<hr />')
776        self._dump('<tr><td align="left">'
777                   '<b>Checker State: </b>')
778        if m is None:
779            self._dump('<i> Nothing!</i>')
780        else:
781            if prev_m is not None:
782                if m.is_different(prev_m):
783                    self._dump('</td></tr><tr><td align="left">')
784                    self.visit_checker_messages(m, prev_m)
785                else:
786                    self._dump('<i> No changes!</i>')
787            else:
788                self._dump('</td></tr><tr><td align="left">')
789                self.visit_checker_messages(m)
790
791        self._dump('</td></tr>')
792
793    def visit_state(self, s, prev_s):
794        self.visit_store_in_state(s, prev_s)
795        self.visit_environment_in_state('environment', 'Expressions',
796                                        s, prev_s)
797        self.visit_generic_map_in_state('constraints', 'Ranges',
798                                        s, prev_s)
799        self.visit_generic_map_in_state('dynamic_types', 'Dynamic Types',
800                                        s, prev_s)
801        self.visit_environment_in_state('constructing_objects',
802                                        'Objects Under Construction',
803                                        s, prev_s)
804        self.visit_checker_messages_in_state(s, prev_s)
805
806    def visit_node(self, node):
807        self._dump('%s [shape=record,'
808                   % (node.node_name()))
809        if self._dark_mode:
810            self._dump('color="white",fontcolor="gray80",')
811        self._dump('label=<<table border="0">')
812
813        self._dump('<tr><td bgcolor="%s"><b>State %s</b></td></tr>'
814                   % ("gray20" if self._dark_mode else "gray70",
815                      node.state.state_id
816                      if node.state is not None else 'Unspecified'))
817        if not self._topo_mode:
818            self._dump('<tr><td align="left" width="0">')
819            if len(node.points) > 1:
820                self._dump('<b>Program points:</b></td></tr>')
821            else:
822                self._dump('<b>Program point:</b></td></tr>')
823        self._dump('<tr><td align="left" width="0">'
824                   '<table border="0" align="left" width="0">')
825        for p in node.points:
826            self.visit_program_point(p)
827        self._dump('</table></td></tr>')
828
829        if node.state is not None and not self._topo_mode:
830            prev_s = None
831            # Do diffs only when we have a unique predecessor.
832            # Don't do diffs on the leaf nodes because they're
833            # the important ones.
834            if self._do_diffs and len(node.predecessors) == 1 \
835               and len(node.successors) > 0:
836                prev_s = self._graph.nodes[node.predecessors[0]].state
837            self.visit_state(node.state, prev_s)
838        self._dump_raw('</table>>];\n')
839
840    def visit_edge(self, pred, succ):
841        self._dump_raw('%s -> %s%s;\n' % (
842            pred.node_name(), succ.node_name(),
843            ' [color="white"]' if self._dark_mode else ''
844        ))
845
846    def visit_end_of_graph(self):
847        self._dump_raw('}\n')
848
849        if not self._dump_dot_only:
850            import sys
851            import tempfile
852
853            def write_temp_file(suffix, data):
854                fd, filename = tempfile.mkstemp(suffix=suffix)
855                print('Writing "%s"...' % filename)
856                with os.fdopen(fd, 'w') as fp:
857                    fp.write(data)
858                print('Done! Please remember to remove the file.')
859                return filename
860
861            try:
862                import graphviz
863            except ImportError:
864                # The fallback behavior if graphviz is not installed!
865                print('Python graphviz not found. Please invoke')
866                print('  $ pip install graphviz')
867                print('in order to enable automatic conversion to HTML.')
868                print()
869                print('You may also convert DOT to SVG manually via')
870                print('  $ dot -Tsvg input.dot -o output.svg')
871                print()
872                write_temp_file('.dot', self.output())
873                return
874
875            svg = graphviz.pipe('dot', 'svg', self.output())
876
877            filename = write_temp_file(
878                '.html', '<html><body bgcolor="%s">%s</body></html>' % (
879                             '#1a1a1a' if self._dark_mode else 'white', svg))
880            if sys.platform == 'win32':
881                os.startfile(filename)
882            elif sys.platform == 'darwin':
883                os.system('open "%s"' % filename)
884            else:
885                os.system('xdg-open "%s"' % filename)
886
887
888#===-----------------------------------------------------------------------===#
889# Explorers know how to traverse the ExplodedGraph in a certain order.
890# They would invoke a Visitor on every node or edge they encounter.
891#===-----------------------------------------------------------------------===#
892
893
894# BasicExplorer explores the whole graph in no particular order.
895class BasicExplorer(object):
896    def __init__(self):
897        super(BasicExplorer, self).__init__()
898
899    def explore(self, graph, visitor):
900        visitor.visit_begin_graph(graph)
901        for node in sorted(graph.nodes):
902            logging.debug('Visiting ' + node)
903            visitor.visit_node(graph.nodes[node])
904            for succ in sorted(graph.nodes[node].successors):
905                logging.debug('Visiting edge: %s -> %s ' % (node, succ))
906                visitor.visit_edge(graph.nodes[node], graph.nodes[succ])
907        visitor.visit_end_of_graph()
908
909
910#===-----------------------------------------------------------------------===#
911# Trimmers cut out parts of the ExplodedGraph so that to focus on other parts.
912# Trimmers can be combined together by applying them sequentially.
913#===-----------------------------------------------------------------------===#
914
915
916# SinglePathTrimmer keeps only a single path - the leftmost path from the root.
917# Useful when the trimmed graph is still too large.
918class SinglePathTrimmer(object):
919    def __init__(self):
920        super(SinglePathTrimmer, self).__init__()
921
922    def trim(self, graph):
923        visited_nodes = set()
924        node_id = graph.root_id
925        while True:
926            visited_nodes.add(node_id)
927            node = graph.nodes[node_id]
928            if len(node.successors) > 0:
929                succ_id = node.successors[0]
930                succ = graph.nodes[succ_id]
931                node.successors = [succ_id]
932                succ.predecessors = [node_id]
933                if succ_id in visited_nodes:
934                    break
935                node_id = succ_id
936            else:
937                break
938        graph.nodes = {node_id: graph.nodes[node_id]
939                       for node_id in visited_nodes}
940
941
942# TargetedTrimmer keeps paths that lead to specific nodes and discards all
943# other paths. Useful when you cannot use -trim-egraph (e.g. when debugging
944# a crash).
945class TargetedTrimmer(object):
946    def __init__(self, target_nodes):
947        super(TargetedTrimmer, self).__init__()
948        self._target_nodes = target_nodes
949
950    @staticmethod
951    def parse_target_node(node, graph):
952        if node.startswith('0x'):
953            ret = 'Node' + node
954            assert ret in graph.nodes
955            return ret
956        else:
957            for other_id in graph.nodes:
958                other = graph.nodes[other_id]
959                if other.node_id == int(node):
960                    return other_id
961
962    @staticmethod
963    def parse_target_nodes(target_nodes, graph):
964        return [TargetedTrimmer.parse_target_node(node, graph)
965                for node in target_nodes.split(',')]
966
967    def trim(self, graph):
968        queue = self._target_nodes
969        visited_nodes = set()
970
971        while len(queue) > 0:
972            node_id = queue.pop()
973            visited_nodes.add(node_id)
974            node = graph.nodes[node_id]
975            for pred_id in node.predecessors:
976                if pred_id not in visited_nodes:
977                    queue.append(pred_id)
978        graph.nodes = {node_id: graph.nodes[node_id]
979                       for node_id in visited_nodes}
980        for node_id in graph.nodes:
981            node = graph.nodes[node_id]
982            node.successors = [succ_id for succ_id in node.successors
983                               if succ_id in visited_nodes]
984            node.predecessors = [succ_id for succ_id in node.predecessors
985                                 if succ_id in visited_nodes]
986
987
988#===-----------------------------------------------------------------------===#
989# The entry point to the script.
990#===-----------------------------------------------------------------------===#
991
992
993def main():
994    parser = argparse.ArgumentParser(
995        description='Display and manipulate Exploded Graph dumps.')
996    parser.add_argument('filename', type=str,
997                        help='the .dot file produced by the Static Analyzer')
998    parser.add_argument('-v', '--verbose', action='store_const',
999                        dest='loglevel', const=logging.DEBUG,
1000                        default=logging.WARNING,
1001                        help='enable info prints')
1002    parser.add_argument('-d', '--diff', action='store_const', dest='diff',
1003                        const=True, default=False,
1004                        help='display differences between states')
1005    parser.add_argument('-t', '--topology', action='store_const',
1006                        dest='topology', const=True, default=False,
1007                        help='only display program points, omit states')
1008    parser.add_argument('-s', '--single-path', action='store_const',
1009                        dest='single_path', const=True, default=False,
1010                        help='only display the leftmost path in the graph '
1011                             '(useful for trimmed graphs that still '
1012                             'branch too much)')
1013    parser.add_argument('--to', type=str, default=None,
1014                        help='only display execution paths from the root '
1015                             'to the given comma-separated list of nodes '
1016                             'identified by a pointer or a stable ID; '
1017                             'compatible with --single-path')
1018    parser.add_argument('--dark', action='store_const', dest='dark',
1019                        const=True, default=False,
1020                        help='dark mode')
1021    parser.add_argument('--gray', action='store_const', dest='gray',
1022                        const=True, default=False,
1023                        help='black-and-white mode')
1024    parser.add_argument('--dump-dot-only', action='store_const',
1025                        dest='dump_dot_only', const=True, default=False,
1026                        help='instead of writing an HTML file and immediately '
1027                             'displaying it, dump the rewritten dot file '
1028                             'to stdout')
1029    args = parser.parse_args()
1030    logging.basicConfig(level=args.loglevel)
1031
1032    graph = ExplodedGraph()
1033    with open(args.filename) as fd:
1034        for raw_line in fd:
1035            raw_line = raw_line.strip()
1036            graph.add_raw_line(raw_line)
1037
1038    trimmers = []
1039    if args.to is not None:
1040        trimmers.append(TargetedTrimmer(
1041            TargetedTrimmer.parse_target_nodes(args.to, graph)))
1042    if args.single_path:
1043        trimmers.append(SinglePathTrimmer())
1044
1045    explorer = BasicExplorer()
1046
1047    visitor = DotDumpVisitor(args.diff, args.dark, args.gray, args.topology,
1048                             args.dump_dot_only)
1049
1050    for trimmer in trimmers:
1051        trimmer.trim(graph)
1052
1053    explorer.explore(graph, visitor)
1054
1055
1056if __name__ == '__main__':
1057    main()
1058