xref: /openbsd-src/gnu/llvm/clang/utils/analyzer/exploded-graph-rewriter.py (revision 46035553bfdd96e63c94e32da0210227ec2e3cf1)
1#!/usr/bin/env python
2#
3#===- exploded-graph-rewriter.py - ExplodedGraph dump tool -----*- python -*--#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9#===-----------------------------------------------------------------------===#
10
11
12from __future__ import print_function
13
14import argparse
15import collections
16import difflib
17import json
18import logging
19import os
20import re
21import sys
22
23
24#===-----------------------------------------------------------------------===#
25# These data structures represent a deserialized ExplodedGraph.
26#===-----------------------------------------------------------------------===#
27
28
29# A helper function for finding the difference between two dictionaries.
30def diff_dicts(curr, prev):
31    removed = [k for k in prev if k not in curr or curr[k] != prev[k]]
32    added = [k for k in curr if k not in prev or curr[k] != prev[k]]
33    return (removed, added)
34
35
36# Represents any program state trait that is a dictionary of key-value pairs.
37class GenericMap(object):
38    def __init__(self, items):
39        self.generic_map = collections.OrderedDict(items)
40
41    def diff(self, prev):
42        return diff_dicts(self.generic_map, prev.generic_map)
43
44    def is_different(self, prev):
45        removed, added = self.diff(prev)
46        return len(removed) != 0 or len(added) != 0
47
48
49# A deserialized source location.
50class SourceLocation(object):
51    def __init__(self, json_loc):
52        super(SourceLocation, self).__init__()
53        logging.debug('json: %s' % json_loc)
54        self.line = json_loc['line']
55        self.col = json_loc['column']
56        self.filename = os.path.basename(json_loc['file']) \
57            if 'file' in json_loc else '(main file)'
58        self.spelling = SourceLocation(json_loc['spelling']) \
59            if 'spelling' in json_loc else None
60
61    def is_macro(self):
62        return self.spelling is not None
63
64
65# A deserialized program point.
66class ProgramPoint(object):
67    def __init__(self, json_pp):
68        super(ProgramPoint, self).__init__()
69        self.kind = json_pp['kind']
70        self.tag = json_pp['tag']
71        self.node_id = json_pp['node_id']
72        self.is_sink = bool(json_pp['is_sink'])
73        self.has_report = bool(json_pp['has_report'])
74        if self.kind == 'Edge':
75            self.src_id = json_pp['src_id']
76            self.dst_id = json_pp['dst_id']
77        elif self.kind == 'Statement':
78            logging.debug(json_pp)
79            self.stmt_kind = json_pp['stmt_kind']
80            self.cast_kind = json_pp['cast_kind'] \
81                if 'cast_kind' in json_pp else None
82            self.stmt_point_kind = json_pp['stmt_point_kind']
83            self.stmt_id = json_pp['stmt_id']
84            self.pointer = json_pp['pointer']
85            self.pretty = json_pp['pretty']
86            self.loc = SourceLocation(json_pp['location']) \
87                if json_pp['location'] is not None else None
88        elif self.kind == 'BlockEntrance':
89            self.block_id = json_pp['block_id']
90
91
92# A single expression acting as a key in a deserialized Environment.
93class EnvironmentBindingKey(object):
94    def __init__(self, json_ek):
95        super(EnvironmentBindingKey, self).__init__()
96        # CXXCtorInitializer is not a Stmt!
97        self.stmt_id = json_ek['stmt_id'] if 'stmt_id' in json_ek \
98            else json_ek['init_id']
99        self.pretty = json_ek['pretty']
100        self.kind = json_ek['kind'] if 'kind' in json_ek else None
101
102    def _key(self):
103        return self.stmt_id
104
105    def __eq__(self, other):
106        return self._key() == other._key()
107
108    def __hash__(self):
109        return hash(self._key())
110
111
112# Deserialized description of a location context.
113class LocationContext(object):
114    def __init__(self, json_frame):
115        super(LocationContext, self).__init__()
116        self.lctx_id = json_frame['lctx_id']
117        self.caption = json_frame['location_context']
118        self.decl = json_frame['calling']
119        self.loc = SourceLocation(json_frame['location']) \
120            if json_frame['location'] is not None else None
121
122    def _key(self):
123        return self.lctx_id
124
125    def __eq__(self, other):
126        return self._key() == other._key()
127
128    def __hash__(self):
129        return hash(self._key())
130
131
132# A group of deserialized Environment bindings that correspond to a specific
133# location context.
134class EnvironmentFrame(object):
135    def __init__(self, json_frame):
136        super(EnvironmentFrame, self).__init__()
137        self.location_context = LocationContext(json_frame)
138        self.bindings = collections.OrderedDict(
139            [(EnvironmentBindingKey(b),
140              b['value']) for b in json_frame['items']]
141            if json_frame['items'] is not None else [])
142
143    def diff_bindings(self, prev):
144        return diff_dicts(self.bindings, prev.bindings)
145
146    def is_different(self, prev):
147        removed, added = self.diff_bindings(prev)
148        return len(removed) != 0 or len(added) != 0
149
150
151# A deserialized Environment. This class can also hold other entities that
152# are similar to Environment, such as Objects Under Construction.
153class GenericEnvironment(object):
154    def __init__(self, json_e):
155        super(GenericEnvironment, self).__init__()
156        self.frames = [EnvironmentFrame(f) for f in json_e]
157
158    def diff_frames(self, prev):
159        # TODO: It's difficult to display a good diff when frame numbers shift.
160        if len(self.frames) != len(prev.frames):
161            return None
162
163        updated = []
164        for i in range(len(self.frames)):
165            f = self.frames[i]
166            prev_f = prev.frames[i]
167            if f.location_context == prev_f.location_context:
168                if f.is_different(prev_f):
169                    updated.append(i)
170            else:
171                # We have the whole frame replaced with another frame.
172                # TODO: Produce a nice diff.
173                return None
174
175        # TODO: Add support for added/removed.
176        return updated
177
178    def is_different(self, prev):
179        updated = self.diff_frames(prev)
180        return updated is None or len(updated) > 0
181
182
183# A single binding key in a deserialized RegionStore cluster.
184class StoreBindingKey(object):
185    def __init__(self, json_sk):
186        super(StoreBindingKey, self).__init__()
187        self.kind = json_sk['kind']
188        self.offset = json_sk['offset']
189
190    def _key(self):
191        return (self.kind, self.offset)
192
193    def __eq__(self, other):
194        return self._key() == other._key()
195
196    def __hash__(self):
197        return hash(self._key())
198
199
200# A single cluster of the deserialized RegionStore.
201class StoreCluster(object):
202    def __init__(self, json_sc):
203        super(StoreCluster, self).__init__()
204        self.base_region = json_sc['cluster']
205        self.bindings = collections.OrderedDict(
206            [(StoreBindingKey(b), b['value']) for b in json_sc['items']])
207
208    def diff_bindings(self, prev):
209        return diff_dicts(self.bindings, prev.bindings)
210
211    def is_different(self, prev):
212        removed, added = self.diff_bindings(prev)
213        return len(removed) != 0 or len(added) != 0
214
215
216# A deserialized RegionStore.
217class Store(object):
218    def __init__(self, json_s):
219        super(Store, self).__init__()
220        self.ptr = json_s['pointer']
221        self.clusters = collections.OrderedDict(
222            [(c['pointer'], StoreCluster(c)) for c in json_s['items']])
223
224    def diff_clusters(self, prev):
225        removed = [k for k in prev.clusters if k not in self.clusters]
226        added = [k for k in self.clusters if k not in prev.clusters]
227        updated = [k for k in prev.clusters if k in self.clusters
228                   and prev.clusters[k].is_different(self.clusters[k])]
229        return (removed, added, updated)
230
231    def is_different(self, prev):
232        removed, added, updated = self.diff_clusters(prev)
233        return len(removed) != 0 or len(added) != 0 or len(updated) != 0
234
235
236# Deserialized messages from a single checker in a single program state.
237# Basically a list of raw strings.
238class CheckerLines(object):
239    def __init__(self, json_lines):
240        super(CheckerLines, self).__init__()
241        self.lines = json_lines
242
243    def diff_lines(self, prev):
244        lines = difflib.ndiff(prev.lines, self.lines)
245        return [l.strip() for l in lines
246                if l.startswith('+') or l.startswith('-')]
247
248    def is_different(self, prev):
249        return len(self.diff_lines(prev)) > 0
250
251
252# Deserialized messages of all checkers, separated by checker.
253class CheckerMessages(object):
254    def __init__(self, json_m):
255        super(CheckerMessages, self).__init__()
256        self.items = collections.OrderedDict(
257            [(m['checker'], CheckerLines(m['messages'])) for m in json_m])
258
259    def diff_messages(self, prev):
260        removed = [k for k in prev.items if k not in self.items]
261        added = [k for k in self.items if k not in prev.items]
262        updated = [k for k in prev.items if k in self.items
263                   and prev.items[k].is_different(self.items[k])]
264        return (removed, added, updated)
265
266    def is_different(self, prev):
267        removed, added, updated = self.diff_messages(prev)
268        return len(removed) != 0 or len(added) != 0 or len(updated) != 0
269
270
271# A deserialized program state.
272class ProgramState(object):
273    def __init__(self, state_id, json_ps):
274        super(ProgramState, self).__init__()
275        logging.debug('Adding ProgramState ' + str(state_id))
276
277        if json_ps is None:
278            json_ps = {
279                'store': None,
280                'environment': None,
281                'constraints': None,
282                'dynamic_types': None,
283                'constructing_objects': None,
284                'checker_messages': None
285            }
286
287        self.state_id = state_id
288
289        self.store = Store(json_ps['store']) \
290            if json_ps['store'] is not None else None
291
292        self.environment = \
293            GenericEnvironment(json_ps['environment']['items']) \
294            if json_ps['environment'] is not None else None
295
296        self.constraints = GenericMap([
297            (c['symbol'], c['range']) for c in json_ps['constraints']
298        ]) if json_ps['constraints'] is not None else None
299
300        self.dynamic_types = GenericMap([
301                (t['region'], '%s%s' % (t['dyn_type'],
302                                        ' (or a sub-class)'
303                                        if t['sub_classable'] else ''))
304                for t in json_ps['dynamic_types']]) \
305            if json_ps['dynamic_types'] is not None else None
306
307        self.constructing_objects = \
308            GenericEnvironment(json_ps['constructing_objects']) \
309            if json_ps['constructing_objects'] is not None else None
310
311        self.checker_messages = CheckerMessages(json_ps['checker_messages']) \
312            if json_ps['checker_messages'] is not None else None
313
314
315# A deserialized exploded graph node. Has a default constructor because it
316# may be referenced as part of an edge before its contents are deserialized,
317# and in this moment we already need a room for predecessors and successors.
318class ExplodedNode(object):
319    def __init__(self):
320        super(ExplodedNode, self).__init__()
321        self.predecessors = []
322        self.successors = []
323
324    def construct(self, node_id, json_node):
325        logging.debug('Adding ' + node_id)
326        self.ptr = node_id[4:]
327        self.points = [ProgramPoint(p) for p in json_node['program_points']]
328        self.node_id = self.points[-1].node_id
329        self.state = ProgramState(json_node['state_id'],
330                                  json_node['program_state']
331            if json_node['program_state'] is not None else None);
332
333        assert self.node_name() == node_id
334
335    def node_name(self):
336        return 'Node' + self.ptr
337
338
339# A deserialized ExplodedGraph. Constructed by consuming a .dot file
340# line-by-line.
341class ExplodedGraph(object):
342    # Parse .dot files with regular expressions.
343    node_re = re.compile(
344        '^(Node0x[0-9a-f]*) \\[shape=record,.*label="{(.*)\\\\l}"\\];$')
345    edge_re = re.compile(
346        '^(Node0x[0-9a-f]*) -> (Node0x[0-9a-f]*);$')
347
348    def __init__(self):
349        super(ExplodedGraph, self).__init__()
350        self.nodes = collections.defaultdict(ExplodedNode)
351        self.root_id = None
352        self.incomplete_line = ''
353
354    def add_raw_line(self, raw_line):
355        if raw_line.startswith('//'):
356            return
357
358        # Allow line breaks by waiting for ';'. This is not valid in
359        # a .dot file, but it is useful for writing tests.
360        if len(raw_line) > 0 and raw_line[-1] != ';':
361            self.incomplete_line += raw_line
362            return
363        raw_line = self.incomplete_line + raw_line
364        self.incomplete_line = ''
365
366        # Apply regexps one by one to see if it's a node or an edge
367        # and extract contents if necessary.
368        logging.debug('Line: ' + raw_line)
369        result = self.edge_re.match(raw_line)
370        if result is not None:
371            logging.debug('Classified as edge line.')
372            pred = result.group(1)
373            succ = result.group(2)
374            self.nodes[pred].successors.append(succ)
375            self.nodes[succ].predecessors.append(pred)
376            return
377        result = self.node_re.match(raw_line)
378        if result is not None:
379            logging.debug('Classified as node line.')
380            node_id = result.group(1)
381            if len(self.nodes) == 0:
382                self.root_id = node_id
383            # Note: when writing tests you don't need to escape everything,
384            # even though in a valid dot file everything is escaped.
385            node_label = result.group(2).replace('\\l', '') \
386                                        .replace(' ', '') \
387                                        .replace('\\"', '"') \
388                                        .replace('\\{', '{') \
389                                        .replace('\\}', '}') \
390                                        .replace('\\\\', '\\') \
391                                        .replace('\\|', '|') \
392                                        .replace('\\<', '\\\\<') \
393                                        .replace('\\>', '\\\\>') \
394                                        .rstrip(',')
395            logging.debug(node_label)
396            json_node = json.loads(node_label)
397            self.nodes[node_id].construct(node_id, json_node)
398            return
399        logging.debug('Skipping.')
400
401
402#===-----------------------------------------------------------------------===#
403# Visitors traverse a deserialized ExplodedGraph and do different things
404# with every node and edge.
405#===-----------------------------------------------------------------------===#
406
407
408# A visitor that dumps the ExplodedGraph into a DOT file with fancy HTML-based
409# syntax highlighing.
410class DotDumpVisitor(object):
411    def __init__(self, do_diffs, dark_mode, gray_mode,
412                 topo_mode, dump_dot_only):
413        super(DotDumpVisitor, self).__init__()
414        self._do_diffs = do_diffs
415        self._dark_mode = dark_mode
416        self._gray_mode = gray_mode
417        self._topo_mode = topo_mode
418        self._dump_dot_only = dump_dot_only
419        self._output = []
420
421    def _dump_raw(self, s):
422        if self._dump_dot_only:
423            print(s, end='')
424        else:
425            self._output.append(s)
426
427    def output(self):
428        assert not self._dump_dot_only
429        if sys.version_info[0] > 2 and sys.version_info[1] >= 5:
430            return ''.join(self._output).encode()
431        else:
432            return ''.join(self._output)
433
434    def _dump(self, s):
435        s = s.replace('&', '&amp;') \
436             .replace('{', '\\{') \
437             .replace('}', '\\}') \
438             .replace('\\<', '&lt;') \
439             .replace('\\>', '&gt;') \
440             .replace('\\l', '<br />') \
441             .replace('|', '\\|')
442        if self._gray_mode:
443            s = re.sub(r'<font color="[a-z0-9]*">', '', s)
444            s = re.sub(r'</font>', '', s)
445        self._dump_raw(s)
446
447    @staticmethod
448    def _diff_plus_minus(is_added):
449        if is_added is None:
450            return ''
451        if is_added:
452            return '<font color="forestgreen">+</font>'
453        return '<font color="red">-</font>'
454
455    @staticmethod
456    def _short_pretty(s):
457        if s is None:
458            return None
459        if len(s) < 20:
460            return s
461        left = s.find('{')
462        right = s.rfind('}')
463        if left == -1 or right == -1 or left >= right:
464            return s
465        candidate = s[0:left + 1] + ' ... ' + s[right:]
466        if len(candidate) >= len(s):
467            return s
468        return candidate
469
470    @staticmethod
471    def _make_sloc(loc):
472        if loc is None:
473            return '<i>Invalid Source Location</i>'
474
475        def make_plain_loc(loc):
476            return '%s:<b>%s</b>:<b>%s</b>' \
477                % (loc.filename, loc.line, loc.col)
478
479        if loc.is_macro():
480            return '%s <font color="royalblue1">' \
481                   '(<i>spelling at </i> %s)</font>' \
482                % (make_plain_loc(loc), make_plain_loc(loc.spelling))
483
484        return make_plain_loc(loc)
485
486    def visit_begin_graph(self, graph):
487        self._graph = graph
488        self._dump_raw('digraph "ExplodedGraph" {\n')
489        if self._dark_mode:
490            self._dump_raw('bgcolor="gray10";\n')
491        self._dump_raw('label="";\n')
492
493    def visit_program_point(self, p):
494        if p.kind in ['Edge', 'BlockEntrance', 'BlockExit']:
495            color = 'gold3'
496        elif p.kind in ['PreStmtPurgeDeadSymbols',
497                        'PostStmtPurgeDeadSymbols']:
498            color = 'red'
499        elif p.kind in ['CallEnter', 'CallExitBegin', 'CallExitEnd']:
500            color = 'dodgerblue' if self._dark_mode else 'blue'
501        elif p.kind in ['Statement']:
502            color = 'cyan4'
503        else:
504            color = 'forestgreen'
505
506        self._dump('<tr><td align="left">%s.</td>' % p.node_id)
507
508        if p.kind == 'Statement':
509            # This avoids pretty-printing huge statements such as CompoundStmt.
510            # Such statements show up only at [Pre|Post]StmtPurgeDeadSymbols
511            skip_pretty = 'PurgeDeadSymbols' in p.stmt_point_kind
512            stmt_color = 'cyan3'
513            self._dump('<td align="left" width="0">%s:</td>'
514                       '<td align="left" width="0"><font color="%s">'
515                       '%s</font> </td>'
516                       '<td align="left"><i>S%s</i></td>'
517                       '<td align="left"><font color="%s">%s</font></td>'
518                       '<td align="left">%s</td></tr>'
519                       % (self._make_sloc(p.loc), color,
520                          '%s (%s)' % (p.stmt_kind, p.cast_kind)
521                          if p.cast_kind is not None else p.stmt_kind,
522                          p.stmt_id, stmt_color, p.stmt_point_kind,
523                          self._short_pretty(p.pretty)
524                          if not skip_pretty else ''))
525        elif p.kind == 'Edge':
526            self._dump('<td width="0"></td>'
527                       '<td align="left" width="0">'
528                       '<font color="%s">%s</font></td><td align="left">'
529                       '[B%d] -\\> [B%d]</td></tr>'
530                       % (color, 'BlockEdge', p.src_id, p.dst_id))
531        elif p.kind == 'BlockEntrance':
532            self._dump('<td width="0"></td>'
533                       '<td align="left" width="0">'
534                       '<font color="%s">%s</font></td>'
535                       '<td align="left">[B%d]</td></tr>'
536                       % (color, p.kind, p.block_id))
537        else:
538            # TODO: Print more stuff for other kinds of points.
539            self._dump('<td width="0"></td>'
540                       '<td align="left" width="0" colspan="2">'
541                       '<font color="%s">%s</font></td></tr>'
542                       % (color, p.kind))
543
544        if p.tag is not None:
545            self._dump('<tr><td width="0"></td><td width="0"></td>'
546                       '<td colspan="3" align="left">'
547                       '<b>Tag: </b> <font color="crimson">'
548                       '%s</font></td></tr>' % p.tag)
549
550        if p.has_report:
551            self._dump('<tr><td width="0"></td><td width="0"></td>'
552                       '<td colspan="3" align="left">'
553                       '<font color="red"><b>Bug Report Attached'
554                       '</b></font></td></tr>')
555        if p.is_sink:
556            self._dump('<tr><td width="0"></td><td width="0"></td>'
557                       '<td colspan="3" align="left">'
558                       '<font color="cornflowerblue"><b>Sink Node'
559                       '</b></font></td></tr>')
560
561    def visit_environment(self, e, prev_e=None):
562        self._dump('<table border="0">')
563
564        def dump_location_context(lc, is_added=None):
565            self._dump('<tr><td>%s</td>'
566                       '<td align="left"><b>%s</b></td>'
567                       '<td align="left" colspan="2">'
568                       '<font color="gray60">%s </font>'
569                       '%s</td></tr>'
570                       % (self._diff_plus_minus(is_added),
571                          lc.caption, lc.decl,
572                          ('(%s)' % self._make_sloc(lc.loc))
573                          if lc.loc is not None else ''))
574
575        def dump_binding(f, b, is_added=None):
576            self._dump('<tr><td>%s</td>'
577                       '<td align="left"><i>S%s</i></td>'
578                       '%s'
579                       '<td align="left">%s</td>'
580                       '<td align="left">%s</td></tr>'
581                       % (self._diff_plus_minus(is_added),
582                          b.stmt_id,
583                          '<td align="left"><font color="%s"><i>'
584                          '%s</i></font></td>' % (
585                              'lavender' if self._dark_mode else 'darkgreen',
586                              ('(%s)' % b.kind) if b.kind is not None else ' '
587                          ),
588                          self._short_pretty(b.pretty), f.bindings[b]))
589
590        frames_updated = e.diff_frames(prev_e) if prev_e is not None else None
591        if frames_updated:
592            for i in frames_updated:
593                f = e.frames[i]
594                prev_f = prev_e.frames[i]
595                dump_location_context(f.location_context)
596                bindings_removed, bindings_added = f.diff_bindings(prev_f)
597                for b in bindings_removed:
598                    dump_binding(prev_f, b, False)
599                for b in bindings_added:
600                    dump_binding(f, b, True)
601        else:
602            for f in e.frames:
603                dump_location_context(f.location_context)
604                for b in f.bindings:
605                    dump_binding(f, b)
606
607        self._dump('</table>')
608
609    def visit_environment_in_state(self, selector, title, s, prev_s=None):
610        e = getattr(s, selector)
611        prev_e = getattr(prev_s, selector) if prev_s is not None else None
612        if e is None and prev_e is None:
613            return
614
615        self._dump('<hr /><tr><td align="left"><b>%s: </b>' % title)
616        if e is None:
617            self._dump('<i> Nothing!</i>')
618        else:
619            if prev_e is not None:
620                if e.is_different(prev_e):
621                    self._dump('</td></tr><tr><td align="left">')
622                    self.visit_environment(e, prev_e)
623                else:
624                    self._dump('<i> No changes!</i>')
625            else:
626                self._dump('</td></tr><tr><td align="left">')
627                self.visit_environment(e)
628
629        self._dump('</td></tr>')
630
631    def visit_store(self, s, prev_s=None):
632        self._dump('<table border="0">')
633
634        def dump_binding(s, c, b, is_added=None):
635            self._dump('<tr><td>%s</td>'
636                       '<td align="left">%s</td>'
637                       '<td align="left">%s</td>'
638                       '<td align="left">%s</td>'
639                       '<td align="left">%s</td></tr>'
640                       % (self._diff_plus_minus(is_added),
641                          s.clusters[c].base_region, b.offset,
642                          '(<i>Default</i>)' if b.kind == 'Default'
643                          else '',
644                          s.clusters[c].bindings[b]))
645
646        if prev_s is not None:
647            clusters_removed, clusters_added, clusters_updated = \
648                s.diff_clusters(prev_s)
649            for c in clusters_removed:
650                for b in prev_s.clusters[c].bindings:
651                    dump_binding(prev_s, c, b, False)
652            for c in clusters_updated:
653                bindings_removed, bindings_added = \
654                    s.clusters[c].diff_bindings(prev_s.clusters[c])
655                for b in bindings_removed:
656                    dump_binding(prev_s, c, b, False)
657                for b in bindings_added:
658                    dump_binding(s, c, b, True)
659            for c in clusters_added:
660                for b in s.clusters[c].bindings:
661                    dump_binding(s, c, b, True)
662        else:
663            for c in s.clusters:
664                for b in s.clusters[c].bindings:
665                    dump_binding(s, c, b)
666
667        self._dump('</table>')
668
669    def visit_store_in_state(self, s, prev_s=None):
670        st = s.store
671        prev_st = prev_s.store if prev_s is not None else None
672        if st is None and prev_st is None:
673            return
674
675        self._dump('<hr /><tr><td align="left"><b>Store: </b>')
676        if st is None:
677            self._dump('<i> Nothing!</i>')
678        else:
679            if self._dark_mode:
680                self._dump(' <font color="gray30">(%s)</font>' % st.ptr)
681            else:
682                self._dump(' <font color="gray">(%s)</font>' % st.ptr)
683            if prev_st is not None:
684                if s.store.is_different(prev_st):
685                    self._dump('</td></tr><tr><td align="left">')
686                    self.visit_store(st, prev_st)
687                else:
688                    self._dump('<i> No changes!</i>')
689            else:
690                self._dump('</td></tr><tr><td align="left">')
691                self.visit_store(st)
692        self._dump('</td></tr>')
693
694    def visit_generic_map(self, m, prev_m=None):
695        self._dump('<table border="0">')
696
697        def dump_pair(m, k, is_added=None):
698            self._dump('<tr><td>%s</td>'
699                       '<td align="left">%s</td>'
700                       '<td align="left">%s</td></tr>'
701                       % (self._diff_plus_minus(is_added),
702                          k, m.generic_map[k]))
703
704        if prev_m is not None:
705            removed, added = m.diff(prev_m)
706            for k in removed:
707                dump_pair(prev_m, k, False)
708            for k in added:
709                dump_pair(m, k, True)
710        else:
711            for k in m.generic_map:
712                dump_pair(m, k, None)
713
714        self._dump('</table>')
715
716    def visit_generic_map_in_state(self, selector, title, s, prev_s=None):
717        m = getattr(s, selector)
718        prev_m = getattr(prev_s, selector) if prev_s is not None else None
719        if m is None and prev_m is None:
720            return
721
722        self._dump('<hr />')
723        self._dump('<tr><td align="left">'
724                   '<b>%s: </b>' % title)
725        if m is None:
726            self._dump('<i> Nothing!</i>')
727        else:
728            if prev_m is not None:
729                if m.is_different(prev_m):
730                    self._dump('</td></tr><tr><td align="left">')
731                    self.visit_generic_map(m, prev_m)
732                else:
733                    self._dump('<i> No changes!</i>')
734            else:
735                self._dump('</td></tr><tr><td align="left">')
736                self.visit_generic_map(m)
737
738        self._dump('</td></tr>')
739
740    def visit_checker_messages(self, m, prev_m=None):
741        self._dump('<table border="0">')
742
743        def dump_line(l, is_added=None):
744            self._dump('<tr><td>%s</td>'
745                       '<td align="left">%s</td></tr>'
746                       % (self._diff_plus_minus(is_added), l))
747
748        def dump_chk(chk, is_added=None):
749            dump_line('<i>%s</i>:' % chk, is_added)
750
751        if prev_m is not None:
752            removed, added, updated = m.diff_messages(prev_m)
753            for chk in removed:
754                dump_chk(chk, False)
755                for l in prev_m.items[chk].lines:
756                    dump_line(l, False)
757            for chk in updated:
758                dump_chk(chk)
759                for l in m.items[chk].diff_lines(prev_m.items[chk]):
760                    dump_line(l[1:], l.startswith('+'))
761            for chk in added:
762                dump_chk(chk, True)
763                for l in m.items[chk].lines:
764                    dump_line(l, True)
765        else:
766            for chk in m.items:
767                dump_chk(chk)
768                for l in m.items[chk].lines:
769                    dump_line(l)
770
771        self._dump('</table>')
772
773    def visit_checker_messages_in_state(self, s, prev_s=None):
774        m = s.checker_messages
775        prev_m = prev_s.checker_messages if prev_s is not None else None
776        if m is None and prev_m is None:
777            return
778
779        self._dump('<hr />')
780        self._dump('<tr><td align="left">'
781                   '<b>Checker State: </b>')
782        if m is None:
783            self._dump('<i> Nothing!</i>')
784        else:
785            if prev_m is not None:
786                if m.is_different(prev_m):
787                    self._dump('</td></tr><tr><td align="left">')
788                    self.visit_checker_messages(m, prev_m)
789                else:
790                    self._dump('<i> No changes!</i>')
791            else:
792                self._dump('</td></tr><tr><td align="left">')
793                self.visit_checker_messages(m)
794
795        self._dump('</td></tr>')
796
797    def visit_state(self, s, prev_s):
798        self.visit_store_in_state(s, prev_s)
799        self.visit_environment_in_state('environment', 'Expressions',
800                                        s, prev_s)
801        self.visit_generic_map_in_state('constraints', 'Ranges',
802                                        s, prev_s)
803        self.visit_generic_map_in_state('dynamic_types', 'Dynamic Types',
804                                        s, prev_s)
805        self.visit_environment_in_state('constructing_objects',
806                                        'Objects Under Construction',
807                                        s, prev_s)
808        self.visit_checker_messages_in_state(s, prev_s)
809
810    def visit_node(self, node):
811        self._dump('%s [shape=record,'
812                   % (node.node_name()))
813        if self._dark_mode:
814            self._dump('color="white",fontcolor="gray80",')
815        self._dump('label=<<table border="0">')
816
817        self._dump('<tr><td bgcolor="%s"><b>State %s</b></td></tr>'
818                   % ("gray20" if self._dark_mode else "gray70",
819                      node.state.state_id
820                      if node.state is not None else 'Unspecified'))
821        if not self._topo_mode:
822            self._dump('<tr><td align="left" width="0">')
823            if len(node.points) > 1:
824                self._dump('<b>Program points:</b></td></tr>')
825            else:
826                self._dump('<b>Program point:</b></td></tr>')
827        self._dump('<tr><td align="left" width="0">'
828                   '<table border="0" align="left" width="0">')
829        for p in node.points:
830            self.visit_program_point(p)
831        self._dump('</table></td></tr>')
832
833        if node.state is not None and not self._topo_mode:
834            prev_s = None
835            # Do diffs only when we have a unique predecessor.
836            # Don't do diffs on the leaf nodes because they're
837            # the important ones.
838            if self._do_diffs and len(node.predecessors) == 1 \
839               and len(node.successors) > 0:
840                prev_s = self._graph.nodes[node.predecessors[0]].state
841            self.visit_state(node.state, prev_s)
842        self._dump_raw('</table>>];\n')
843
844    def visit_edge(self, pred, succ):
845        self._dump_raw('%s -> %s%s;\n' % (
846            pred.node_name(), succ.node_name(),
847            ' [color="white"]' if self._dark_mode else ''
848        ))
849
850    def visit_end_of_graph(self):
851        self._dump_raw('}\n')
852
853        if not self._dump_dot_only:
854            import sys
855            import tempfile
856
857            def write_temp_file(suffix, data):
858                fd, filename = tempfile.mkstemp(suffix=suffix)
859                print('Writing "%s"...' % filename)
860                with os.fdopen(fd, 'w') as fp:
861                    fp.write(data)
862                print('Done! Please remember to remove the file.')
863                return filename
864
865            try:
866                import graphviz
867            except ImportError:
868                # The fallback behavior if graphviz is not installed!
869                print('Python graphviz not found. Please invoke')
870                print('  $ pip install graphviz')
871                print('in order to enable automatic conversion to HTML.')
872                print()
873                print('You may also convert DOT to SVG manually via')
874                print('  $ dot -Tsvg input.dot -o output.svg')
875                print()
876                write_temp_file('.dot', self.output())
877                return
878
879            svg = graphviz.pipe('dot', 'svg', self.output())
880
881            filename = write_temp_file(
882                '.html', '<html><body bgcolor="%s">%s</body></html>' % (
883                             '#1a1a1a' if self._dark_mode else 'white', svg))
884            if sys.platform == 'win32':
885                os.startfile(filename)
886            elif sys.platform == 'darwin':
887                os.system('open "%s"' % filename)
888            else:
889                os.system('xdg-open "%s"' % filename)
890
891
892#===-----------------------------------------------------------------------===#
893# Explorers know how to traverse the ExplodedGraph in a certain order.
894# They would invoke a Visitor on every node or edge they encounter.
895#===-----------------------------------------------------------------------===#
896
897
898# BasicExplorer explores the whole graph in no particular order.
899class BasicExplorer(object):
900    def __init__(self):
901        super(BasicExplorer, self).__init__()
902
903    def explore(self, graph, visitor):
904        visitor.visit_begin_graph(graph)
905        for node in sorted(graph.nodes):
906            logging.debug('Visiting ' + node)
907            visitor.visit_node(graph.nodes[node])
908            for succ in sorted(graph.nodes[node].successors):
909                logging.debug('Visiting edge: %s -> %s ' % (node, succ))
910                visitor.visit_edge(graph.nodes[node], graph.nodes[succ])
911        visitor.visit_end_of_graph()
912
913
914#===-----------------------------------------------------------------------===#
915# Trimmers cut out parts of the ExplodedGraph so that to focus on other parts.
916# Trimmers can be combined together by applying them sequentially.
917#===-----------------------------------------------------------------------===#
918
919
920# SinglePathTrimmer keeps only a single path - the leftmost path from the root.
921# Useful when the trimmed graph is still too large.
922class SinglePathTrimmer(object):
923    def __init__(self):
924        super(SinglePathTrimmer, self).__init__()
925
926    def trim(self, graph):
927        visited_nodes = set()
928        node_id = graph.root_id
929        while True:
930            visited_nodes.add(node_id)
931            node = graph.nodes[node_id]
932            if len(node.successors) > 0:
933                succ_id = node.successors[0]
934                succ = graph.nodes[succ_id]
935                node.successors = [succ_id]
936                succ.predecessors = [node_id]
937                if succ_id in visited_nodes:
938                    break
939                node_id = succ_id
940            else:
941                break
942        graph.nodes = {node_id: graph.nodes[node_id]
943                       for node_id in visited_nodes}
944
945
946# TargetedTrimmer keeps paths that lead to specific nodes and discards all
947# other paths. Useful when you cannot use -trim-egraph (e.g. when debugging
948# a crash).
949class TargetedTrimmer(object):
950    def __init__(self, target_nodes):
951        super(TargetedTrimmer, self).__init__()
952        self._target_nodes = target_nodes
953
954    @staticmethod
955    def parse_target_node(node, graph):
956        if node.startswith('0x'):
957            ret = 'Node' + node
958            assert ret in graph.nodes
959            return ret
960        else:
961            for other_id in graph.nodes:
962                other = graph.nodes[other_id]
963                if other.node_id == int(node):
964                    return other_id
965
966    @staticmethod
967    def parse_target_nodes(target_nodes, graph):
968        return [TargetedTrimmer.parse_target_node(node, graph)
969                for node in target_nodes.split(',')]
970
971    def trim(self, graph):
972        queue = self._target_nodes
973        visited_nodes = set()
974
975        while len(queue) > 0:
976            node_id = queue.pop()
977            visited_nodes.add(node_id)
978            node = graph.nodes[node_id]
979            for pred_id in node.predecessors:
980                if pred_id not in visited_nodes:
981                    queue.append(pred_id)
982        graph.nodes = {node_id: graph.nodes[node_id]
983                       for node_id in visited_nodes}
984        for node_id in graph.nodes:
985            node = graph.nodes[node_id]
986            node.successors = [succ_id for succ_id in node.successors
987                               if succ_id in visited_nodes]
988            node.predecessors = [succ_id for succ_id in node.predecessors
989                                 if succ_id in visited_nodes]
990
991
992#===-----------------------------------------------------------------------===#
993# The entry point to the script.
994#===-----------------------------------------------------------------------===#
995
996
997def main():
998    parser = argparse.ArgumentParser(
999        description='Display and manipulate Exploded Graph dumps.')
1000    parser.add_argument('filename', type=str,
1001                        help='the .dot file produced by the Static Analyzer')
1002    parser.add_argument('-v', '--verbose', action='store_const',
1003                        dest='loglevel', const=logging.DEBUG,
1004                        default=logging.WARNING,
1005                        help='enable info prints')
1006    parser.add_argument('-d', '--diff', action='store_const', dest='diff',
1007                        const=True, default=False,
1008                        help='display differences between states')
1009    parser.add_argument('-t', '--topology', action='store_const',
1010                        dest='topology', const=True, default=False,
1011                        help='only display program points, omit states')
1012    parser.add_argument('-s', '--single-path', action='store_const',
1013                        dest='single_path', const=True, default=False,
1014                        help='only display the leftmost path in the graph '
1015                             '(useful for trimmed graphs that still '
1016                             'branch too much)')
1017    parser.add_argument('--to', type=str, default=None,
1018                        help='only display execution paths from the root '
1019                             'to the given comma-separated list of nodes '
1020                             'identified by a pointer or a stable ID; '
1021                             'compatible with --single-path')
1022    parser.add_argument('--dark', action='store_const', dest='dark',
1023                        const=True, default=False,
1024                        help='dark mode')
1025    parser.add_argument('--gray', action='store_const', dest='gray',
1026                        const=True, default=False,
1027                        help='black-and-white mode')
1028    parser.add_argument('--dump-dot-only', action='store_const',
1029                        dest='dump_dot_only', const=True, default=False,
1030                        help='instead of writing an HTML file and immediately '
1031                             'displaying it, dump the rewritten dot file '
1032                             'to stdout')
1033    args = parser.parse_args()
1034    logging.basicConfig(level=args.loglevel)
1035
1036    graph = ExplodedGraph()
1037    with open(args.filename) as fd:
1038        for raw_line in fd:
1039            raw_line = raw_line.strip()
1040            graph.add_raw_line(raw_line)
1041
1042    trimmers = []
1043    if args.to is not None:
1044        trimmers.append(TargetedTrimmer(
1045            TargetedTrimmer.parse_target_nodes(args.to, graph)))
1046    if args.single_path:
1047        trimmers.append(SinglePathTrimmer())
1048
1049    explorer = BasicExplorer()
1050
1051    visitor = DotDumpVisitor(args.diff, args.dark, args.gray, args.topology,
1052                             args.dump_dot_only)
1053
1054    for trimmer in trimmers:
1055        trimmer.trim(graph)
1056
1057    explorer.explore(graph, visitor)
1058
1059
1060if __name__ == '__main__':
1061    main()
1062