xref: /llvm-project/clang/utils/analyzer/exploded-graph-rewriter.py (revision daf41722bd54e03cc5a367d35b2208ff652f553d)
1#!/usr/bin/env python
2#
3#===- exploded-graph-rewriter.py - ExplodedGraph dump tool -----*- python -*--#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9#===-----------------------------------------------------------------------===#
10
11
12from __future__ import print_function
13
14import argparse
15import collections
16import difflib
17import json
18import logging
19import os
20import re
21
22
23#===-----------------------------------------------------------------------===#
24# These data structures represent a deserialized ExplodedGraph.
25#===-----------------------------------------------------------------------===#
26
27
28# A helper function for finding the difference between two dictionaries.
29def diff_dicts(curr, prev):
30    removed = [k for k in prev if k not in curr or curr[k] != prev[k]]
31    added = [k for k in curr if k not in prev or curr[k] != prev[k]]
32    return (removed, added)
33
34
35# Represents any program state trait that is a dictionary of key-value pairs.
36class GenericMap(object):
37    def __init__(self, items):
38        self.generic_map = collections.OrderedDict(items)
39
40    def diff(self, prev):
41        return diff_dicts(self.generic_map, prev.generic_map)
42
43    def is_different(self, prev):
44        removed, added = self.diff(prev)
45        return len(removed) != 0 or len(added) != 0
46
47
48# A deserialized source location.
49class SourceLocation(object):
50    def __init__(self, json_loc):
51        super(SourceLocation, self).__init__()
52        logging.debug('json: %s' % json_loc)
53        self.line = json_loc['line']
54        self.col = json_loc['column']
55        self.filename = os.path.basename(json_loc['file']) \
56            if 'file' in json_loc else '(main file)'
57        self.spelling = SourceLocation(json_loc['spelling']) \
58            if 'spelling' in json_loc else None
59
60    def is_macro(self):
61        return self.spelling is not None
62
63
64# A deserialized program point.
65class ProgramPoint(object):
66    def __init__(self, json_pp):
67        super(ProgramPoint, self).__init__()
68        self.kind = json_pp['kind']
69        self.tag = json_pp['tag']
70        if self.kind == 'Edge':
71            self.src_id = json_pp['src_id']
72            self.dst_id = json_pp['dst_id']
73        elif self.kind == 'Statement':
74            logging.debug(json_pp)
75            self.stmt_kind = json_pp['stmt_kind']
76            self.stmt_point_kind = json_pp['stmt_point_kind']
77            self.stmt_id = json_pp['stmt_id']
78            self.pointer = json_pp['pointer']
79            self.pretty = json_pp['pretty']
80            self.loc = SourceLocation(json_pp['location']) \
81                if json_pp['location'] is not None else None
82        elif self.kind == 'BlockEntrance':
83            self.block_id = json_pp['block_id']
84
85
86# A single expression acting as a key in a deserialized Environment.
87class EnvironmentBindingKey(object):
88    def __init__(self, json_ek):
89        super(EnvironmentBindingKey, self).__init__()
90        # CXXCtorInitializer is not a Stmt!
91        self.stmt_id = json_ek['stmt_id'] if 'stmt_id' in json_ek \
92            else json_ek['init_id']
93        self.pretty = json_ek['pretty']
94        self.kind = json_ek['kind'] if 'kind' in json_ek else None
95
96    def _key(self):
97        return self.stmt_id
98
99    def __eq__(self, other):
100        return self._key() == other._key()
101
102    def __hash__(self):
103        return hash(self._key())
104
105
106# Deserialized description of a location context.
107class LocationContext(object):
108    def __init__(self, json_frame):
109        super(LocationContext, self).__init__()
110        self.lctx_id = json_frame['lctx_id']
111        self.caption = json_frame['location_context']
112        self.decl = json_frame['calling']
113        self.loc = SourceLocation(json_frame['location']) \
114            if json_frame['location'] is not None else None
115
116    def _key(self):
117        return self.lctx_id
118
119    def __eq__(self, other):
120        return self._key() == other._key()
121
122    def __hash__(self):
123        return hash(self._key())
124
125
126# A group of deserialized Environment bindings that correspond to a specific
127# location context.
128class EnvironmentFrame(object):
129    def __init__(self, json_frame):
130        super(EnvironmentFrame, self).__init__()
131        self.location_context = LocationContext(json_frame)
132        self.bindings = collections.OrderedDict(
133            [(EnvironmentBindingKey(b),
134              b['value']) for b in json_frame['items']]
135            if json_frame['items'] is not None else [])
136
137    def diff_bindings(self, prev):
138        return diff_dicts(self.bindings, prev.bindings)
139
140    def is_different(self, prev):
141        removed, added = self.diff_bindings(prev)
142        return len(removed) != 0 or len(added) != 0
143
144
145# A deserialized Environment. This class can also hold other entities that
146# are similar to Environment, such as Objects Under Construction.
147class GenericEnvironment(object):
148    def __init__(self, json_e):
149        super(GenericEnvironment, self).__init__()
150        self.frames = [EnvironmentFrame(f) for f in json_e]
151
152    def diff_frames(self, prev):
153        # TODO: It's difficult to display a good diff when frame numbers shift.
154        if len(self.frames) != len(prev.frames):
155            return None
156
157        updated = []
158        for i in range(len(self.frames)):
159            f = self.frames[i]
160            prev_f = prev.frames[i]
161            if f.location_context == prev_f.location_context:
162                if f.is_different(prev_f):
163                    updated.append(i)
164            else:
165                # We have the whole frame replaced with another frame.
166                # TODO: Produce a nice diff.
167                return None
168
169        # TODO: Add support for added/removed.
170        return updated
171
172    def is_different(self, prev):
173        updated = self.diff_frames(prev)
174        return updated is None or len(updated) > 0
175
176
177# A single binding key in a deserialized RegionStore cluster.
178class StoreBindingKey(object):
179    def __init__(self, json_sk):
180        super(StoreBindingKey, self).__init__()
181        self.kind = json_sk['kind']
182        self.offset = json_sk['offset']
183
184    def _key(self):
185        return (self.kind, self.offset)
186
187    def __eq__(self, other):
188        return self._key() == other._key()
189
190    def __hash__(self):
191        return hash(self._key())
192
193
194# A single cluster of the deserialized RegionStore.
195class StoreCluster(object):
196    def __init__(self, json_sc):
197        super(StoreCluster, self).__init__()
198        self.base_region = json_sc['cluster']
199        self.bindings = collections.OrderedDict(
200            [(StoreBindingKey(b), b['value']) for b in json_sc['items']])
201
202    def diff_bindings(self, prev):
203        return diff_dicts(self.bindings, prev.bindings)
204
205    def is_different(self, prev):
206        removed, added = self.diff_bindings(prev)
207        return len(removed) != 0 or len(added) != 0
208
209
210# A deserialized RegionStore.
211class Store(object):
212    def __init__(self, json_s):
213        super(Store, self).__init__()
214        self.ptr = json_s['pointer']
215        self.clusters = collections.OrderedDict(
216            [(c['pointer'], StoreCluster(c)) for c in json_s['items']])
217
218    def diff_clusters(self, prev):
219        removed = [k for k in prev.clusters if k not in self.clusters]
220        added = [k for k in self.clusters if k not in prev.clusters]
221        updated = [k for k in prev.clusters if k in self.clusters
222                   and prev.clusters[k].is_different(self.clusters[k])]
223        return (removed, added, updated)
224
225    def is_different(self, prev):
226        removed, added, updated = self.diff_clusters(prev)
227        return len(removed) != 0 or len(added) != 0 or len(updated) != 0
228
229
230# Deserialized messages from a single checker in a single program state.
231# Basically a list of raw strings.
232class CheckerLines(object):
233    def __init__(self, json_lines):
234        super(CheckerLines, self).__init__()
235        self.lines = json_lines
236
237    def diff_lines(self, prev):
238        lines = difflib.ndiff(prev.lines, self.lines)
239        return [l.strip() for l in lines
240                if l.startswith('+') or l.startswith('-')]
241
242    def is_different(self, prev):
243        return len(self.diff_lines(prev)) > 0
244
245
246# Deserialized messages of all checkers, separated by checker.
247class CheckerMessages(object):
248    def __init__(self, json_m):
249        super(CheckerMessages, self).__init__()
250        self.items = collections.OrderedDict(
251            [(m['checker'], CheckerLines(m['messages'])) for m in json_m])
252
253    def diff_messages(self, prev):
254        removed = [k for k in prev.items if k not in self.items]
255        added = [k for k in self.items if k not in prev.items]
256        updated = [k for k in prev.items if k in self.items
257                   and prev.items[k].is_different(self.items[k])]
258        return (removed, added, updated)
259
260    def is_different(self, prev):
261        removed, added, updated = self.diff_messages(prev)
262        return len(removed) != 0 or len(added) != 0 or len(updated) != 0
263
264
265# A deserialized program state.
266class ProgramState(object):
267    def __init__(self, state_id, json_ps):
268        super(ProgramState, self).__init__()
269        logging.debug('Adding ProgramState ' + str(state_id))
270
271        self.state_id = state_id
272
273        self.store = Store(json_ps['store']) \
274            if json_ps['store'] is not None else None
275
276        self.environment = \
277            GenericEnvironment(json_ps['environment']['items']) \
278            if json_ps['environment'] is not None else None
279
280        self.constraints = GenericMap([
281            (c['symbol'], c['range']) for c in json_ps['constraints']
282        ]) if json_ps['constraints'] is not None else None
283
284        self.dynamic_types = GenericMap([
285                (t['region'], '%s%s' % (t['dyn_type'],
286                                        ' (or a sub-class)'
287                                        if t['sub_classable'] else ''))
288                for t in json_ps['dynamic_types']]) \
289            if json_ps['dynamic_types'] is not None else None
290
291        self.constructing_objects = \
292            GenericEnvironment(json_ps['constructing_objects']) \
293            if json_ps['constructing_objects'] is not None else None
294
295        self.checker_messages = CheckerMessages(json_ps['checker_messages']) \
296            if json_ps['checker_messages'] is not None else None
297
298
299# A deserialized exploded graph node. Has a default constructor because it
300# may be referenced as part of an edge before its contents are deserialized,
301# and in this moment we already need a room for predecessors and successors.
302class ExplodedNode(object):
303    def __init__(self):
304        super(ExplodedNode, self).__init__()
305        self.predecessors = []
306        self.successors = []
307
308    def construct(self, node_id, json_node):
309        logging.debug('Adding ' + node_id)
310        self.node_id = json_node['node_id']
311        self.ptr = json_node['pointer']
312        self.has_report = json_node['has_report']
313        self.is_sink = json_node['is_sink']
314        self.points = [ProgramPoint(p) for p in json_node['program_points']]
315        self.state = ProgramState(json_node['state_id'],
316                                  json_node['program_state']) \
317            if json_node['program_state'] is not None else None
318
319        assert self.node_name() == node_id
320
321    def node_name(self):
322        return 'Node' + self.ptr
323
324
325# A deserialized ExplodedGraph. Constructed by consuming a .dot file
326# line-by-line.
327class ExplodedGraph(object):
328    # Parse .dot files with regular expressions.
329    node_re = re.compile(
330        '^(Node0x[0-9a-f]*) \\[shape=record,.*label="{(.*)\\\\l}"\\];$')
331    edge_re = re.compile(
332        '^(Node0x[0-9a-f]*) -> (Node0x[0-9a-f]*);$')
333
334    def __init__(self):
335        super(ExplodedGraph, self).__init__()
336        self.nodes = collections.defaultdict(ExplodedNode)
337        self.root_id = None
338        self.incomplete_line = ''
339
340    def add_raw_line(self, raw_line):
341        if raw_line.startswith('//'):
342            return
343
344        # Allow line breaks by waiting for ';'. This is not valid in
345        # a .dot file, but it is useful for writing tests.
346        if len(raw_line) > 0 and raw_line[-1] != ';':
347            self.incomplete_line += raw_line
348            return
349        raw_line = self.incomplete_line + raw_line
350        self.incomplete_line = ''
351
352        # Apply regexps one by one to see if it's a node or an edge
353        # and extract contents if necessary.
354        logging.debug('Line: ' + raw_line)
355        result = self.edge_re.match(raw_line)
356        if result is not None:
357            logging.debug('Classified as edge line.')
358            pred = result.group(1)
359            succ = result.group(2)
360            self.nodes[pred].successors.append(succ)
361            self.nodes[succ].predecessors.append(pred)
362            return
363        result = self.node_re.match(raw_line)
364        if result is not None:
365            logging.debug('Classified as node line.')
366            node_id = result.group(1)
367            if len(self.nodes) == 0:
368                self.root_id = node_id
369            # Note: when writing tests you don't need to escape everything,
370            # even though in a valid dot file everything is escaped.
371            node_label = result.group(2).replace('\\l', '') \
372                                        .replace(' ', '') \
373                                        .replace('\\"', '"') \
374                                        .replace('\\{', '{') \
375                                        .replace('\\}', '}') \
376                                        .replace('\\\\', '\\') \
377                                        .replace('\\|', '|') \
378                                        .replace('\\<', '\\\\<') \
379                                        .replace('\\>', '\\\\>') \
380                                        .rstrip(',')
381            logging.debug(node_label)
382            json_node = json.loads(node_label)
383            self.nodes[node_id].construct(node_id, json_node)
384            return
385        logging.debug('Skipping.')
386
387
388#===-----------------------------------------------------------------------===#
389# Visitors traverse a deserialized ExplodedGraph and do different things
390# with every node and edge.
391#===-----------------------------------------------------------------------===#
392
393
394# A visitor that dumps the ExplodedGraph into a DOT file with fancy HTML-based
395# syntax highlighing.
396class DotDumpVisitor(object):
397    def __init__(self, do_diffs, dark_mode, gray_mode,
398                 topo_mode, dump_dot_only):
399        super(DotDumpVisitor, self).__init__()
400        self._do_diffs = do_diffs
401        self._dark_mode = dark_mode
402        self._gray_mode = gray_mode
403        self._topo_mode = topo_mode
404        self._dump_dot_only = dump_dot_only
405        self._output = []
406
407    def _dump_raw(self, s):
408        if self._dump_dot_only:
409            print(s, end='')
410        else:
411            self._output.append(s)
412
413    def output(self):
414        assert not self._dump_dot_only
415        return ''.join(self._output)
416
417    def _dump(self, s):
418        s = s.replace('&', '&amp;') \
419             .replace('{', '\\{') \
420             .replace('}', '\\}') \
421             .replace('\\<', '&lt;') \
422             .replace('\\>', '&gt;') \
423             .replace('\\l', '<br />') \
424             .replace('|', '\\|')
425        if self._gray_mode:
426            s = re.sub(r'<font color="[a-z0-9]*">', '', s)
427            s = re.sub(r'</font>', '', s)
428        self._dump_raw(s)
429
430    @staticmethod
431    def _diff_plus_minus(is_added):
432        if is_added is None:
433            return ''
434        if is_added:
435            return '<font color="forestgreen">+</font>'
436        return '<font color="red">-</font>'
437
438    @staticmethod
439    def _short_pretty(s):
440        if s is None:
441            return None
442        if len(s) < 20:
443            return s
444        left = s.find('{')
445        right = s.rfind('}')
446        if left == -1 or right == -1 or left >= right:
447            return s
448        candidate = s[0:left + 1] + ' ... ' + s[right:]
449        if len(candidate) >= len(s):
450            return s
451        return candidate
452
453    @staticmethod
454    def _make_sloc(loc):
455        if loc is None:
456            return '<i>Invalid Source Location</i>'
457
458        def make_plain_loc(loc):
459            return '%s:<b>%s</b>:<b>%s</b>' \
460                % (loc.filename, loc.line, loc.col)
461
462        if loc.is_macro():
463            return '%s <font color="royalblue1">' \
464                   '(<i>spelling at </i> %s)</font>' \
465                % (make_plain_loc(loc), make_plain_loc(loc.spelling))
466
467        return make_plain_loc(loc)
468
469    def visit_begin_graph(self, graph):
470        self._graph = graph
471        self._dump_raw('digraph "ExplodedGraph" {\n')
472        if self._dark_mode:
473            self._dump_raw('bgcolor="gray10";\n')
474        self._dump_raw('label="";\n')
475
476    def visit_program_point(self, p):
477        if p.kind in ['Edge', 'BlockEntrance', 'BlockExit']:
478            color = 'gold3'
479        elif p.kind in ['PreStmtPurgeDeadSymbols',
480                        'PostStmtPurgeDeadSymbols']:
481            color = 'red'
482        elif p.kind in ['CallEnter', 'CallExitBegin', 'CallExitEnd']:
483            color = 'dodgerblue' if self._dark_mode else 'blue'
484        elif p.kind in ['Statement']:
485            color = 'cyan4'
486        else:
487            color = 'forestgreen'
488
489        if p.kind == 'Statement':
490            # This avoids pretty-printing huge statements such as CompoundStmt.
491            # Such statements show up only at [Pre|Post]StmtPurgeDeadSymbols
492            skip_pretty = 'PurgeDeadSymbols' in p.stmt_point_kind
493            stmt_color = 'cyan3'
494            self._dump('<tr><td align="left" width="0">%s:</td>'
495                       '<td align="left" width="0"><font color="%s">'
496                       '%s</font> </td>'
497                       '<td align="left"><i>S%s</i></td>'
498                       '<td align="left"><font color="%s">%s</font></td>'
499                       '<td align="left">%s</td></tr>'
500                       % (self._make_sloc(p.loc), color, p.stmt_kind,
501                          p.stmt_id, stmt_color, p.stmt_point_kind,
502                          self._short_pretty(p.pretty)
503                          if not skip_pretty else ''))
504        elif p.kind == 'Edge':
505            self._dump('<tr><td width="0"></td>'
506                       '<td align="left" width="0">'
507                       '<font color="%s">%s</font></td><td align="left">'
508                       '[B%d] -\\> [B%d]</td></tr>'
509                       % (color, 'BlockEdge', p.src_id, p.dst_id))
510        elif p.kind == 'BlockEntrance':
511            self._dump('<tr><td width="0"></td>'
512                       '<td align="left" width="0">'
513                       '<font color="%s">%s</font></td>'
514                       '<td align="left">[B%d]</td></tr>'
515                       % (color, p.kind, p.block_id))
516        else:
517            # TODO: Print more stuff for other kinds of points.
518            self._dump('<tr><td width="0"></td>'
519                       '<td align="left" width="0" colspan="2">'
520                       '<font color="%s">%s</font></td></tr>'
521                       % (color, p.kind))
522
523        if p.tag is not None:
524            self._dump('<tr><td width="0"></td>'
525                       '<td colspan="3" align="left">'
526                       '<b>Tag: </b> <font color="crimson">'
527                       '%s</font></td></tr>' % p.tag)
528
529    def visit_environment(self, e, prev_e=None):
530        self._dump('<table border="0">')
531
532        def dump_location_context(lc, is_added=None):
533            self._dump('<tr><td>%s</td>'
534                       '<td align="left"><b>%s</b></td>'
535                       '<td align="left" colspan="2">'
536                       '<font color="gray60">%s </font>'
537                       '%s</td></tr>'
538                       % (self._diff_plus_minus(is_added),
539                          lc.caption, lc.decl,
540                          ('(%s)' % self._make_sloc(lc.loc))
541                          if lc.loc is not None else ''))
542
543        def dump_binding(f, b, is_added=None):
544            self._dump('<tr><td>%s</td>'
545                       '<td align="left"><i>S%s</i></td>'
546                       '%s'
547                       '<td align="left">%s</td>'
548                       '<td align="left">%s</td></tr>'
549                       % (self._diff_plus_minus(is_added),
550                          b.stmt_id,
551                          '<td align="left"><font color="%s"><i>'
552                          '%s</i></font></td>' % (
553                              'lavender' if self._dark_mode else 'darkgreen',
554                              ('(%s)' % b.kind) if b.kind is not None else ' '
555                          ),
556                          self._short_pretty(b.pretty), f.bindings[b]))
557
558        frames_updated = e.diff_frames(prev_e) if prev_e is not None else None
559        if frames_updated:
560            for i in frames_updated:
561                f = e.frames[i]
562                prev_f = prev_e.frames[i]
563                dump_location_context(f.location_context)
564                bindings_removed, bindings_added = f.diff_bindings(prev_f)
565                for b in bindings_removed:
566                    dump_binding(prev_f, b, False)
567                for b in bindings_added:
568                    dump_binding(f, b, True)
569        else:
570            for f in e.frames:
571                dump_location_context(f.location_context)
572                for b in f.bindings:
573                    dump_binding(f, b)
574
575        self._dump('</table>')
576
577    def visit_environment_in_state(self, selector, title, s, prev_s=None):
578        e = getattr(s, selector)
579        prev_e = getattr(prev_s, selector) if prev_s is not None else None
580        if e is None and prev_e is None:
581            return
582
583        self._dump('<hr /><tr><td align="left"><b>%s: </b>' % title)
584        if e is None:
585            self._dump('<i> Nothing!</i>')
586        else:
587            if prev_e is not None:
588                if e.is_different(prev_e):
589                    self._dump('</td></tr><tr><td align="left">')
590                    self.visit_environment(e, prev_e)
591                else:
592                    self._dump('<i> No changes!</i>')
593            else:
594                self._dump('</td></tr><tr><td align="left">')
595                self.visit_environment(e)
596
597        self._dump('</td></tr>')
598
599    def visit_store(self, s, prev_s=None):
600        self._dump('<table border="0">')
601
602        def dump_binding(s, c, b, is_added=None):
603            self._dump('<tr><td>%s</td>'
604                       '<td align="left">%s</td>'
605                       '<td align="left">%s</td>'
606                       '<td align="left">%s</td>'
607                       '<td align="left">%s</td></tr>'
608                       % (self._diff_plus_minus(is_added),
609                          s.clusters[c].base_region, b.offset,
610                          '(<i>Default</i>)' if b.kind == 'Default'
611                          else '',
612                          s.clusters[c].bindings[b]))
613
614        if prev_s is not None:
615            clusters_removed, clusters_added, clusters_updated = \
616                s.diff_clusters(prev_s)
617            for c in clusters_removed:
618                for b in prev_s.clusters[c].bindings:
619                    dump_binding(prev_s, c, b, False)
620            for c in clusters_updated:
621                bindings_removed, bindings_added = \
622                    s.clusters[c].diff_bindings(prev_s.clusters[c])
623                for b in bindings_removed:
624                    dump_binding(prev_s, c, b, False)
625                for b in bindings_added:
626                    dump_binding(s, c, b, True)
627            for c in clusters_added:
628                for b in s.clusters[c].bindings:
629                    dump_binding(s, c, b, True)
630        else:
631            for c in s.clusters:
632                for b in s.clusters[c].bindings:
633                    dump_binding(s, c, b)
634
635        self._dump('</table>')
636
637    def visit_store_in_state(self, s, prev_s=None):
638        st = s.store
639        prev_st = prev_s.store if prev_s is not None else None
640        if st is None and prev_st is None:
641            return
642
643        self._dump('<hr /><tr><td align="left"><b>Store: </b>')
644        if st is None:
645            self._dump('<i> Nothing!</i>')
646        else:
647            if self._dark_mode:
648                self._dump(' <font color="gray30">(%s)</font>' % st.ptr)
649            else:
650                self._dump(' <font color="gray">(%s)</font>' % st.ptr)
651            if prev_st is not None:
652                if s.store.is_different(prev_st):
653                    self._dump('</td></tr><tr><td align="left">')
654                    self.visit_store(st, prev_st)
655                else:
656                    self._dump('<i> No changes!</i>')
657            else:
658                self._dump('</td></tr><tr><td align="left">')
659                self.visit_store(st)
660        self._dump('</td></tr>')
661
662    def visit_generic_map(self, m, prev_m=None):
663        self._dump('<table border="0">')
664
665        def dump_pair(m, k, is_added=None):
666            self._dump('<tr><td>%s</td>'
667                       '<td align="left">%s</td>'
668                       '<td align="left">%s</td></tr>'
669                       % (self._diff_plus_minus(is_added),
670                          k, m.generic_map[k]))
671
672        if prev_m is not None:
673            removed, added = m.diff(prev_m)
674            for k in removed:
675                dump_pair(prev_m, k, False)
676            for k in added:
677                dump_pair(m, k, True)
678        else:
679            for k in m.generic_map:
680                dump_pair(m, k, None)
681
682        self._dump('</table>')
683
684    def visit_generic_map_in_state(self, selector, title, s, prev_s=None):
685        m = getattr(s, selector)
686        prev_m = getattr(prev_s, selector) if prev_s is not None else None
687        if m is None and prev_m is None:
688            return
689
690        self._dump('<hr />')
691        self._dump('<tr><td align="left">'
692                   '<b>%s: </b>' % title)
693        if m is None:
694            self._dump('<i> Nothing!</i>')
695        else:
696            if prev_m is not None:
697                if m.is_different(prev_m):
698                    self._dump('</td></tr><tr><td align="left">')
699                    self.visit_generic_map(m, prev_m)
700                else:
701                    self._dump('<i> No changes!</i>')
702            else:
703                self._dump('</td></tr><tr><td align="left">')
704                self.visit_generic_map(m)
705
706        self._dump('</td></tr>')
707
708    def visit_checker_messages(self, m, prev_m=None):
709        self._dump('<table border="0">')
710
711        def dump_line(l, is_added=None):
712            self._dump('<tr><td>%s</td>'
713                       '<td align="left">%s</td></tr>'
714                       % (self._diff_plus_minus(is_added), l))
715
716        def dump_chk(chk, is_added=None):
717            dump_line('<i>%s</i>:' % chk, is_added)
718
719        if prev_m is not None:
720            removed, added, updated = m.diff_messages(prev_m)
721            for chk in removed:
722                dump_chk(chk, False)
723                for l in prev_m.items[chk].lines:
724                    dump_line(l, False)
725            for chk in updated:
726                dump_chk(chk)
727                for l in m.items[chk].diff_lines(prev_m.items[chk]):
728                    dump_line(l[1:], l.startswith('+'))
729            for chk in added:
730                dump_chk(chk, True)
731                for l in m.items[chk].lines:
732                    dump_line(l, True)
733        else:
734            for chk in m.items:
735                dump_chk(chk)
736                for l in m.items[chk].lines:
737                    dump_line(l)
738
739        self._dump('</table>')
740
741    def visit_checker_messages_in_state(self, s, prev_s=None):
742        m = s.checker_messages
743        prev_m = prev_s.checker_messages if prev_s is not None else None
744        if m is None and prev_m is None:
745            return
746
747        self._dump('<hr />')
748        self._dump('<tr><td align="left">'
749                   '<b>Checker State: </b>')
750        if m is None:
751            self._dump('<i> Nothing!</i>')
752        else:
753            if prev_m is not None:
754                if m.is_different(prev_m):
755                    self._dump('</td></tr><tr><td align="left">')
756                    self.visit_checker_messages(m, prev_m)
757                else:
758                    self._dump('<i> No changes!</i>')
759            else:
760                self._dump('</td></tr><tr><td align="left">')
761                self.visit_checker_messages(m)
762
763        self._dump('</td></tr>')
764
765    def visit_state(self, s, prev_s):
766        self.visit_store_in_state(s, prev_s)
767        self.visit_environment_in_state('environment', 'Environment',
768                                        s, prev_s)
769        self.visit_generic_map_in_state('constraints', 'Ranges',
770                                        s, prev_s)
771        self.visit_generic_map_in_state('dynamic_types', 'Dynamic Types',
772                                        s, prev_s)
773        self.visit_environment_in_state('constructing_objects',
774                                        'Objects Under Construction',
775                                        s, prev_s)
776        self.visit_checker_messages_in_state(s, prev_s)
777
778    def visit_node(self, node):
779        self._dump('%s [shape=record,'
780                   % (node.node_name()))
781        if self._dark_mode:
782            self._dump('color="white",fontcolor="gray80",')
783        self._dump('label=<<table border="0">')
784
785        self._dump('<tr><td bgcolor="%s"><b>Node %d (%s) - '
786                   'State %s</b></td></tr>'
787                   % ("gray20" if self._dark_mode else "gray",
788                      node.node_id, node.ptr, node.state.state_id
789                      if node.state is not None else 'Unspecified'))
790        if node.has_report:
791            self._dump('<tr><td><font color="red"><b>Bug Report Attached'
792                       '</b></font></td></tr>')
793        if node.is_sink:
794            self._dump('<tr><td><font color="cornflowerblue"><b>Sink Node'
795                       '</b></font></td></tr>')
796        if not self._topo_mode:
797            self._dump('<tr><td align="left" width="0">')
798            if len(node.points) > 1:
799                self._dump('<b>Program points:</b></td></tr>')
800            else:
801                self._dump('<b>Program point:</b></td></tr>')
802        self._dump('<tr><td align="left" width="0">'
803                   '<table border="0" align="left" width="0">')
804        for p in node.points:
805            self.visit_program_point(p)
806        self._dump('</table></td></tr>')
807
808        if node.state is not None and not self._topo_mode:
809            prev_s = None
810            # Do diffs only when we have a unique predecessor.
811            # Don't do diffs on the leaf nodes because they're
812            # the important ones.
813            if self._do_diffs and len(node.predecessors) == 1 \
814               and len(node.successors) > 0:
815                prev_s = self._graph.nodes[node.predecessors[0]].state
816            self.visit_state(node.state, prev_s)
817        self._dump_raw('</table>>];\n')
818
819    def visit_edge(self, pred, succ):
820        self._dump_raw('%s -> %s%s;\n' % (
821            pred.node_name(), succ.node_name(),
822            ' [color="white"]' if self._dark_mode else ''
823        ))
824
825    def visit_end_of_graph(self):
826        self._dump_raw('}\n')
827
828        if not self._dump_dot_only:
829            import sys
830            import tempfile
831
832            def write_temp_file(suffix, data):
833                fd, filename = tempfile.mkstemp(suffix=suffix)
834                print('Writing "%s"...' % filename)
835                with os.fdopen(fd, 'w') as fp:
836                    fp.write(data)
837                print('Done! Please remember to remove the file.')
838                return filename
839
840            try:
841                import graphviz
842            except ImportError:
843                # The fallback behavior if graphviz is not installed!
844                print('Python graphviz not found. Please invoke')
845                print('  $ pip install graphviz')
846                print('in order to enable automatic conversion to HTML.')
847                print()
848                print('You may also convert DOT to SVG manually via')
849                print('  $ dot -Tsvg input.dot -o output.svg')
850                print()
851                write_temp_file('.dot', self.output())
852                return
853
854            svg = graphviz.pipe('dot', 'svg', self.output())
855
856            filename = write_temp_file(
857                '.html', '<html><body bgcolor="%s">%s</body></html>' % (
858                             '#1a1a1a' if self._dark_mode else 'white', svg))
859            if sys.platform == 'win32':
860                os.startfile(filename)
861            elif sys.platform == 'darwin':
862                os.system('open "%s"' % filename)
863            else:
864                os.system('xdg-open "%s"' % filename)
865
866
867#===-----------------------------------------------------------------------===#
868# Explorers know how to traverse the ExplodedGraph in a certain order.
869# They would invoke a Visitor on every node or edge they encounter.
870#===-----------------------------------------------------------------------===#
871
872
873# BasicExplorer explores the whole graph in no particular order.
874class BasicExplorer(object):
875    def __init__(self):
876        super(BasicExplorer, self).__init__()
877
878    def explore(self, graph, visitor):
879        visitor.visit_begin_graph(graph)
880        for node in sorted(graph.nodes):
881            logging.debug('Visiting ' + node)
882            visitor.visit_node(graph.nodes[node])
883            for succ in sorted(graph.nodes[node].successors):
884                logging.debug('Visiting edge: %s -> %s ' % (node, succ))
885                visitor.visit_edge(graph.nodes[node], graph.nodes[succ])
886        visitor.visit_end_of_graph()
887
888
889#===-----------------------------------------------------------------------===#
890# Trimmers cut out parts of the ExplodedGraph so that to focus on other parts.
891# Trimmers can be combined together by applying them sequentially.
892#===-----------------------------------------------------------------------===#
893
894
895# SinglePathTrimmer keeps only a single path - the leftmost path from the root.
896# Useful when the trimmed graph is still too large.
897class SinglePathTrimmer(object):
898    def __init__(self):
899        super(SinglePathTrimmer, self).__init__()
900
901    def trim(self, graph):
902        visited_nodes = set()
903        node_id = graph.root_id
904        while True:
905            visited_nodes.add(node_id)
906            node = graph.nodes[node_id]
907            if len(node.successors) > 0:
908                succ_id = node.successors[0]
909                succ = graph.nodes[succ_id]
910                node.successors = [succ_id]
911                succ.predecessors = [node_id]
912                if succ_id in visited_nodes:
913                    break
914                node_id = succ_id
915            else:
916                break
917        graph.nodes = {node_id: graph.nodes[node_id]
918                       for node_id in visited_nodes}
919
920
921# TargetedTrimmer keeps paths that lead to specific nodes and discards all
922# other paths. Useful when you cannot use -trim-egraph (e.g. when debugging
923# a crash).
924class TargetedTrimmer(object):
925    def __init__(self, target_nodes):
926        super(TargetedTrimmer, self).__init__()
927        self._target_nodes = target_nodes
928
929    @staticmethod
930    def parse_target_node(node, graph):
931        if node.startswith('0x'):
932            ret = 'Node' + node
933            assert ret in graph.nodes
934            return ret
935        else:
936            for other_id in graph.nodes:
937                other = graph.nodes[other_id]
938                if other.node_id == int(node):
939                    return other_id
940
941    @staticmethod
942    def parse_target_nodes(target_nodes, graph):
943        return [TargetedTrimmer.parse_target_node(node, graph)
944                for node in target_nodes.split(',')]
945
946    def trim(self, graph):
947        queue = self._target_nodes
948        visited_nodes = set()
949
950        while len(queue) > 0:
951            node_id = queue.pop()
952            visited_nodes.add(node_id)
953            node = graph.nodes[node_id]
954            for pred_id in node.predecessors:
955                if pred_id not in visited_nodes:
956                    queue.append(pred_id)
957        graph.nodes = {node_id: graph.nodes[node_id]
958                       for node_id in visited_nodes}
959        for node_id in graph.nodes:
960            node = graph.nodes[node_id]
961            node.successors = [succ_id for succ_id in node.successors
962                               if succ_id in visited_nodes]
963            node.predecessors = [succ_id for succ_id in node.predecessors
964                                 if succ_id in visited_nodes]
965
966
967#===-----------------------------------------------------------------------===#
968# The entry point to the script.
969#===-----------------------------------------------------------------------===#
970
971
972def main():
973    parser = argparse.ArgumentParser(
974        description='Display and manipulate Exploded Graph dumps.')
975    parser.add_argument('filename', type=str,
976                        help='the .dot file produced by the Static Analyzer')
977    parser.add_argument('-v', '--verbose', action='store_const',
978                        dest='loglevel', const=logging.DEBUG,
979                        default=logging.WARNING,
980                        help='enable info prints')
981    parser.add_argument('-d', '--diff', action='store_const', dest='diff',
982                        const=True, default=False,
983                        help='display differences between states')
984    parser.add_argument('-t', '--topology', action='store_const',
985                        dest='topology', const=True, default=False,
986                        help='only display program points, omit states')
987    parser.add_argument('-s', '--single-path', action='store_const',
988                        dest='single_path', const=True, default=False,
989                        help='only display the leftmost path in the graph '
990                             '(useful for trimmed graphs that still '
991                             'branch too much)')
992    parser.add_argument('--to', type=str, default=None,
993                        help='only display execution paths from the root '
994                             'to the given comma-separated list of nodes '
995                             'identified by a pointer or a stable ID; '
996                             'compatible with --single-path')
997    parser.add_argument('--dark', action='store_const', dest='dark',
998                        const=True, default=False,
999                        help='dark mode')
1000    parser.add_argument('--gray', action='store_const', dest='gray',
1001                        const=True, default=False,
1002                        help='black-and-white mode')
1003    parser.add_argument('--dump-dot-only', action='store_const',
1004                        dest='dump_dot_only', const=True, default=False,
1005                        help='instead of writing an HTML file and immediately '
1006                             'displaying it, dump the rewritten dot file '
1007                             'to stdout')
1008    args = parser.parse_args()
1009    logging.basicConfig(level=args.loglevel)
1010
1011    graph = ExplodedGraph()
1012    with open(args.filename) as fd:
1013        for raw_line in fd:
1014            raw_line = raw_line.strip()
1015            graph.add_raw_line(raw_line)
1016
1017    trimmers = []
1018    if args.to is not None:
1019        trimmers.append(TargetedTrimmer(
1020            TargetedTrimmer.parse_target_nodes(args.to, graph)))
1021    if args.single_path:
1022        trimmers.append(SinglePathTrimmer())
1023
1024    explorer = BasicExplorer()
1025
1026    visitor = DotDumpVisitor(args.diff, args.dark, args.gray, args.topology,
1027                             args.dump_dot_only)
1028
1029    for trimmer in trimmers:
1030        trimmer.trim(graph)
1031
1032    explorer.explore(graph, visitor)
1033
1034
1035if __name__ == '__main__':
1036    main()
1037