xref: /llvm-project/clang/utils/analyzer/exploded-graph-rewriter.py (revision d325196f19bfecff59252f3d214278fb6ee4ad61)
1#!/usr/bin/env python
2#
3#===- exploded-graph-rewriter.py - ExplodedGraph dump tool -----*- python -*--#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9#===-----------------------------------------------------------------------===#
10
11
12from __future__ import print_function
13
14import argparse
15import collections
16import difflib
17import json
18import logging
19import os
20import re
21
22
23#===-----------------------------------------------------------------------===#
24# These data structures represent a deserialized ExplodedGraph.
25#===-----------------------------------------------------------------------===#
26
27
28# A helper function for finding the difference between two dictionaries.
29def diff_dicts(curr, prev):
30    removed = [k for k in prev if k not in curr or curr[k] != prev[k]]
31    added = [k for k in curr if k not in prev or curr[k] != prev[k]]
32    return (removed, added)
33
34
35# Represents any program state trait that is a dictionary of key-value pairs.
36class GenericMap(object):
37    def __init__(self, items):
38        self.generic_map = collections.OrderedDict(items)
39
40    def diff(self, prev):
41        return diff_dicts(self.generic_map, prev.generic_map)
42
43    def is_different(self, prev):
44        removed, added = self.diff(prev)
45        return len(removed) != 0 or len(added) != 0
46
47
48# A deserialized source location.
49class SourceLocation(object):
50    def __init__(self, json_loc):
51        super(SourceLocation, self).__init__()
52        logging.debug('json: %s' % json_loc)
53        self.line = json_loc['line']
54        self.col = json_loc['column']
55        self.filename = os.path.basename(json_loc['file']) \
56            if 'file' in json_loc else '(main file)'
57        self.spelling = SourceLocation(json_loc['spelling']) \
58            if 'spelling' in json_loc else None
59
60    def is_macro(self):
61        return self.spelling is not None
62
63
64# A deserialized program point.
65class ProgramPoint(object):
66    def __init__(self, json_pp):
67        super(ProgramPoint, self).__init__()
68        self.kind = json_pp['kind']
69        self.tag = json_pp['tag']
70        if self.kind == 'Edge':
71            self.src_id = json_pp['src_id']
72            self.dst_id = json_pp['dst_id']
73        elif self.kind == 'Statement':
74            logging.debug(json_pp)
75            self.stmt_kind = json_pp['stmt_kind']
76            self.cast_kind = json_pp['cast_kind'] \
77                if 'cast_kind' in json_pp else None
78            self.stmt_point_kind = json_pp['stmt_point_kind']
79            self.stmt_id = json_pp['stmt_id']
80            self.pointer = json_pp['pointer']
81            self.pretty = json_pp['pretty']
82            self.loc = SourceLocation(json_pp['location']) \
83                if json_pp['location'] is not None else None
84        elif self.kind == 'BlockEntrance':
85            self.block_id = json_pp['block_id']
86
87
88# A single expression acting as a key in a deserialized Environment.
89class EnvironmentBindingKey(object):
90    def __init__(self, json_ek):
91        super(EnvironmentBindingKey, self).__init__()
92        # CXXCtorInitializer is not a Stmt!
93        self.stmt_id = json_ek['stmt_id'] if 'stmt_id' in json_ek \
94            else json_ek['init_id']
95        self.pretty = json_ek['pretty']
96        self.kind = json_ek['kind'] if 'kind' in json_ek else None
97
98    def _key(self):
99        return self.stmt_id
100
101    def __eq__(self, other):
102        return self._key() == other._key()
103
104    def __hash__(self):
105        return hash(self._key())
106
107
108# Deserialized description of a location context.
109class LocationContext(object):
110    def __init__(self, json_frame):
111        super(LocationContext, self).__init__()
112        self.lctx_id = json_frame['lctx_id']
113        self.caption = json_frame['location_context']
114        self.decl = json_frame['calling']
115        self.loc = SourceLocation(json_frame['location']) \
116            if json_frame['location'] is not None else None
117
118    def _key(self):
119        return self.lctx_id
120
121    def __eq__(self, other):
122        return self._key() == other._key()
123
124    def __hash__(self):
125        return hash(self._key())
126
127
128# A group of deserialized Environment bindings that correspond to a specific
129# location context.
130class EnvironmentFrame(object):
131    def __init__(self, json_frame):
132        super(EnvironmentFrame, self).__init__()
133        self.location_context = LocationContext(json_frame)
134        self.bindings = collections.OrderedDict(
135            [(EnvironmentBindingKey(b),
136              b['value']) for b in json_frame['items']]
137            if json_frame['items'] is not None else [])
138
139    def diff_bindings(self, prev):
140        return diff_dicts(self.bindings, prev.bindings)
141
142    def is_different(self, prev):
143        removed, added = self.diff_bindings(prev)
144        return len(removed) != 0 or len(added) != 0
145
146
147# A deserialized Environment. This class can also hold other entities that
148# are similar to Environment, such as Objects Under Construction.
149class GenericEnvironment(object):
150    def __init__(self, json_e):
151        super(GenericEnvironment, self).__init__()
152        self.frames = [EnvironmentFrame(f) for f in json_e]
153
154    def diff_frames(self, prev):
155        # TODO: It's difficult to display a good diff when frame numbers shift.
156        if len(self.frames) != len(prev.frames):
157            return None
158
159        updated = []
160        for i in range(len(self.frames)):
161            f = self.frames[i]
162            prev_f = prev.frames[i]
163            if f.location_context == prev_f.location_context:
164                if f.is_different(prev_f):
165                    updated.append(i)
166            else:
167                # We have the whole frame replaced with another frame.
168                # TODO: Produce a nice diff.
169                return None
170
171        # TODO: Add support for added/removed.
172        return updated
173
174    def is_different(self, prev):
175        updated = self.diff_frames(prev)
176        return updated is None or len(updated) > 0
177
178
179# A single binding key in a deserialized RegionStore cluster.
180class StoreBindingKey(object):
181    def __init__(self, json_sk):
182        super(StoreBindingKey, self).__init__()
183        self.kind = json_sk['kind']
184        self.offset = json_sk['offset']
185
186    def _key(self):
187        return (self.kind, self.offset)
188
189    def __eq__(self, other):
190        return self._key() == other._key()
191
192    def __hash__(self):
193        return hash(self._key())
194
195
196# A single cluster of the deserialized RegionStore.
197class StoreCluster(object):
198    def __init__(self, json_sc):
199        super(StoreCluster, self).__init__()
200        self.base_region = json_sc['cluster']
201        self.bindings = collections.OrderedDict(
202            [(StoreBindingKey(b), b['value']) for b in json_sc['items']])
203
204    def diff_bindings(self, prev):
205        return diff_dicts(self.bindings, prev.bindings)
206
207    def is_different(self, prev):
208        removed, added = self.diff_bindings(prev)
209        return len(removed) != 0 or len(added) != 0
210
211
212# A deserialized RegionStore.
213class Store(object):
214    def __init__(self, json_s):
215        super(Store, self).__init__()
216        self.ptr = json_s['pointer']
217        self.clusters = collections.OrderedDict(
218            [(c['pointer'], StoreCluster(c)) for c in json_s['items']])
219
220    def diff_clusters(self, prev):
221        removed = [k for k in prev.clusters if k not in self.clusters]
222        added = [k for k in self.clusters if k not in prev.clusters]
223        updated = [k for k in prev.clusters if k in self.clusters
224                   and prev.clusters[k].is_different(self.clusters[k])]
225        return (removed, added, updated)
226
227    def is_different(self, prev):
228        removed, added, updated = self.diff_clusters(prev)
229        return len(removed) != 0 or len(added) != 0 or len(updated) != 0
230
231
232# Deserialized messages from a single checker in a single program state.
233# Basically a list of raw strings.
234class CheckerLines(object):
235    def __init__(self, json_lines):
236        super(CheckerLines, self).__init__()
237        self.lines = json_lines
238
239    def diff_lines(self, prev):
240        lines = difflib.ndiff(prev.lines, self.lines)
241        return [l.strip() for l in lines
242                if l.startswith('+') or l.startswith('-')]
243
244    def is_different(self, prev):
245        return len(self.diff_lines(prev)) > 0
246
247
248# Deserialized messages of all checkers, separated by checker.
249class CheckerMessages(object):
250    def __init__(self, json_m):
251        super(CheckerMessages, self).__init__()
252        self.items = collections.OrderedDict(
253            [(m['checker'], CheckerLines(m['messages'])) for m in json_m])
254
255    def diff_messages(self, prev):
256        removed = [k for k in prev.items if k not in self.items]
257        added = [k for k in self.items if k not in prev.items]
258        updated = [k for k in prev.items if k in self.items
259                   and prev.items[k].is_different(self.items[k])]
260        return (removed, added, updated)
261
262    def is_different(self, prev):
263        removed, added, updated = self.diff_messages(prev)
264        return len(removed) != 0 or len(added) != 0 or len(updated) != 0
265
266
267# A deserialized program state.
268class ProgramState(object):
269    def __init__(self, state_id, json_ps):
270        super(ProgramState, self).__init__()
271        logging.debug('Adding ProgramState ' + str(state_id))
272
273        self.state_id = state_id
274
275        self.store = Store(json_ps['store']) \
276            if json_ps['store'] is not None else None
277
278        self.environment = \
279            GenericEnvironment(json_ps['environment']['items']) \
280            if json_ps['environment'] is not None else None
281
282        self.constraints = GenericMap([
283            (c['symbol'], c['range']) for c in json_ps['constraints']
284        ]) if json_ps['constraints'] is not None else None
285
286        self.dynamic_types = GenericMap([
287                (t['region'], '%s%s' % (t['dyn_type'],
288                                        ' (or a sub-class)'
289                                        if t['sub_classable'] else ''))
290                for t in json_ps['dynamic_types']]) \
291            if json_ps['dynamic_types'] is not None else None
292
293        self.constructing_objects = \
294            GenericEnvironment(json_ps['constructing_objects']) \
295            if json_ps['constructing_objects'] is not None else None
296
297        self.checker_messages = CheckerMessages(json_ps['checker_messages']) \
298            if json_ps['checker_messages'] is not None else None
299
300
301# A deserialized exploded graph node. Has a default constructor because it
302# may be referenced as part of an edge before its contents are deserialized,
303# and in this moment we already need a room for predecessors and successors.
304class ExplodedNode(object):
305    def __init__(self):
306        super(ExplodedNode, self).__init__()
307        self.predecessors = []
308        self.successors = []
309
310    def construct(self, node_id, json_node):
311        logging.debug('Adding ' + node_id)
312        self.node_id = json_node['node_id']
313        self.ptr = json_node['pointer']
314        self.has_report = json_node['has_report']
315        self.is_sink = json_node['is_sink']
316        self.points = [ProgramPoint(p) for p in json_node['program_points']]
317        self.state = ProgramState(json_node['state_id'],
318                                  json_node['program_state']) \
319            if json_node['program_state'] is not None else None
320
321        assert self.node_name() == node_id
322
323    def node_name(self):
324        return 'Node' + self.ptr
325
326
327# A deserialized ExplodedGraph. Constructed by consuming a .dot file
328# line-by-line.
329class ExplodedGraph(object):
330    # Parse .dot files with regular expressions.
331    node_re = re.compile(
332        '^(Node0x[0-9a-f]*) \\[shape=record,.*label="{(.*)\\\\l}"\\];$')
333    edge_re = re.compile(
334        '^(Node0x[0-9a-f]*) -> (Node0x[0-9a-f]*);$')
335
336    def __init__(self):
337        super(ExplodedGraph, self).__init__()
338        self.nodes = collections.defaultdict(ExplodedNode)
339        self.root_id = None
340        self.incomplete_line = ''
341
342    def add_raw_line(self, raw_line):
343        if raw_line.startswith('//'):
344            return
345
346        # Allow line breaks by waiting for ';'. This is not valid in
347        # a .dot file, but it is useful for writing tests.
348        if len(raw_line) > 0 and raw_line[-1] != ';':
349            self.incomplete_line += raw_line
350            return
351        raw_line = self.incomplete_line + raw_line
352        self.incomplete_line = ''
353
354        # Apply regexps one by one to see if it's a node or an edge
355        # and extract contents if necessary.
356        logging.debug('Line: ' + raw_line)
357        result = self.edge_re.match(raw_line)
358        if result is not None:
359            logging.debug('Classified as edge line.')
360            pred = result.group(1)
361            succ = result.group(2)
362            self.nodes[pred].successors.append(succ)
363            self.nodes[succ].predecessors.append(pred)
364            return
365        result = self.node_re.match(raw_line)
366        if result is not None:
367            logging.debug('Classified as node line.')
368            node_id = result.group(1)
369            if len(self.nodes) == 0:
370                self.root_id = node_id
371            # Note: when writing tests you don't need to escape everything,
372            # even though in a valid dot file everything is escaped.
373            node_label = result.group(2).replace('\\l', '') \
374                                        .replace(' ', '') \
375                                        .replace('\\"', '"') \
376                                        .replace('\\{', '{') \
377                                        .replace('\\}', '}') \
378                                        .replace('\\\\', '\\') \
379                                        .replace('\\|', '|') \
380                                        .replace('\\<', '\\\\<') \
381                                        .replace('\\>', '\\\\>') \
382                                        .rstrip(',')
383            logging.debug(node_label)
384            json_node = json.loads(node_label)
385            self.nodes[node_id].construct(node_id, json_node)
386            return
387        logging.debug('Skipping.')
388
389
390#===-----------------------------------------------------------------------===#
391# Visitors traverse a deserialized ExplodedGraph and do different things
392# with every node and edge.
393#===-----------------------------------------------------------------------===#
394
395
396# A visitor that dumps the ExplodedGraph into a DOT file with fancy HTML-based
397# syntax highlighing.
398class DotDumpVisitor(object):
399    def __init__(self, do_diffs, dark_mode, gray_mode,
400                 topo_mode, dump_dot_only):
401        super(DotDumpVisitor, self).__init__()
402        self._do_diffs = do_diffs
403        self._dark_mode = dark_mode
404        self._gray_mode = gray_mode
405        self._topo_mode = topo_mode
406        self._dump_dot_only = dump_dot_only
407        self._output = []
408
409    def _dump_raw(self, s):
410        if self._dump_dot_only:
411            print(s, end='')
412        else:
413            self._output.append(s)
414
415    def output(self):
416        assert not self._dump_dot_only
417        return ''.join(self._output)
418
419    def _dump(self, s):
420        s = s.replace('&', '&amp;') \
421             .replace('{', '\\{') \
422             .replace('}', '\\}') \
423             .replace('\\<', '&lt;') \
424             .replace('\\>', '&gt;') \
425             .replace('\\l', '<br />') \
426             .replace('|', '\\|')
427        if self._gray_mode:
428            s = re.sub(r'<font color="[a-z0-9]*">', '', s)
429            s = re.sub(r'</font>', '', s)
430        self._dump_raw(s)
431
432    @staticmethod
433    def _diff_plus_minus(is_added):
434        if is_added is None:
435            return ''
436        if is_added:
437            return '<font color="forestgreen">+</font>'
438        return '<font color="red">-</font>'
439
440    @staticmethod
441    def _short_pretty(s):
442        if s is None:
443            return None
444        if len(s) < 20:
445            return s
446        left = s.find('{')
447        right = s.rfind('}')
448        if left == -1 or right == -1 or left >= right:
449            return s
450        candidate = s[0:left + 1] + ' ... ' + s[right:]
451        if len(candidate) >= len(s):
452            return s
453        return candidate
454
455    @staticmethod
456    def _make_sloc(loc):
457        if loc is None:
458            return '<i>Invalid Source Location</i>'
459
460        def make_plain_loc(loc):
461            return '%s:<b>%s</b>:<b>%s</b>' \
462                % (loc.filename, loc.line, loc.col)
463
464        if loc.is_macro():
465            return '%s <font color="royalblue1">' \
466                   '(<i>spelling at </i> %s)</font>' \
467                % (make_plain_loc(loc), make_plain_loc(loc.spelling))
468
469        return make_plain_loc(loc)
470
471    def visit_begin_graph(self, graph):
472        self._graph = graph
473        self._dump_raw('digraph "ExplodedGraph" {\n')
474        if self._dark_mode:
475            self._dump_raw('bgcolor="gray10";\n')
476        self._dump_raw('label="";\n')
477
478    def visit_program_point(self, p):
479        if p.kind in ['Edge', 'BlockEntrance', 'BlockExit']:
480            color = 'gold3'
481        elif p.kind in ['PreStmtPurgeDeadSymbols',
482                        'PostStmtPurgeDeadSymbols']:
483            color = 'red'
484        elif p.kind in ['CallEnter', 'CallExitBegin', 'CallExitEnd']:
485            color = 'dodgerblue' if self._dark_mode else 'blue'
486        elif p.kind in ['Statement']:
487            color = 'cyan4'
488        else:
489            color = 'forestgreen'
490
491        if p.kind == 'Statement':
492            # This avoids pretty-printing huge statements such as CompoundStmt.
493            # Such statements show up only at [Pre|Post]StmtPurgeDeadSymbols
494            skip_pretty = 'PurgeDeadSymbols' in p.stmt_point_kind
495            stmt_color = 'cyan3'
496            self._dump('<tr><td align="left" width="0">%s:</td>'
497                       '<td align="left" width="0"><font color="%s">'
498                       '%s</font> </td>'
499                       '<td align="left"><i>S%s</i></td>'
500                       '<td align="left"><font color="%s">%s</font></td>'
501                       '<td align="left">%s</td></tr>'
502                       % (self._make_sloc(p.loc), color,
503                          '%s (%s)' % (p.stmt_kind, p.cast_kind)
504                          if p.cast_kind is not None else p.stmt_kind,
505                          p.stmt_id, stmt_color, p.stmt_point_kind,
506                          self._short_pretty(p.pretty)
507                          if not skip_pretty else ''))
508        elif p.kind == 'Edge':
509            self._dump('<tr><td width="0"></td>'
510                       '<td align="left" width="0">'
511                       '<font color="%s">%s</font></td><td align="left">'
512                       '[B%d] -\\> [B%d]</td></tr>'
513                       % (color, 'BlockEdge', p.src_id, p.dst_id))
514        elif p.kind == 'BlockEntrance':
515            self._dump('<tr><td width="0"></td>'
516                       '<td align="left" width="0">'
517                       '<font color="%s">%s</font></td>'
518                       '<td align="left">[B%d]</td></tr>'
519                       % (color, p.kind, p.block_id))
520        else:
521            # TODO: Print more stuff for other kinds of points.
522            self._dump('<tr><td width="0"></td>'
523                       '<td align="left" width="0" colspan="2">'
524                       '<font color="%s">%s</font></td></tr>'
525                       % (color, p.kind))
526
527        if p.tag is not None:
528            self._dump('<tr><td width="0"></td>'
529                       '<td colspan="3" align="left">'
530                       '<b>Tag: </b> <font color="crimson">'
531                       '%s</font></td></tr>' % p.tag)
532
533    def visit_environment(self, e, prev_e=None):
534        self._dump('<table border="0">')
535
536        def dump_location_context(lc, is_added=None):
537            self._dump('<tr><td>%s</td>'
538                       '<td align="left"><b>%s</b></td>'
539                       '<td align="left" colspan="2">'
540                       '<font color="gray60">%s </font>'
541                       '%s</td></tr>'
542                       % (self._diff_plus_minus(is_added),
543                          lc.caption, lc.decl,
544                          ('(%s)' % self._make_sloc(lc.loc))
545                          if lc.loc is not None else ''))
546
547        def dump_binding(f, b, is_added=None):
548            self._dump('<tr><td>%s</td>'
549                       '<td align="left"><i>S%s</i></td>'
550                       '%s'
551                       '<td align="left">%s</td>'
552                       '<td align="left">%s</td></tr>'
553                       % (self._diff_plus_minus(is_added),
554                          b.stmt_id,
555                          '<td align="left"><font color="%s"><i>'
556                          '%s</i></font></td>' % (
557                              'lavender' if self._dark_mode else 'darkgreen',
558                              ('(%s)' % b.kind) if b.kind is not None else ' '
559                          ),
560                          self._short_pretty(b.pretty), f.bindings[b]))
561
562        frames_updated = e.diff_frames(prev_e) if prev_e is not None else None
563        if frames_updated:
564            for i in frames_updated:
565                f = e.frames[i]
566                prev_f = prev_e.frames[i]
567                dump_location_context(f.location_context)
568                bindings_removed, bindings_added = f.diff_bindings(prev_f)
569                for b in bindings_removed:
570                    dump_binding(prev_f, b, False)
571                for b in bindings_added:
572                    dump_binding(f, b, True)
573        else:
574            for f in e.frames:
575                dump_location_context(f.location_context)
576                for b in f.bindings:
577                    dump_binding(f, b)
578
579        self._dump('</table>')
580
581    def visit_environment_in_state(self, selector, title, s, prev_s=None):
582        e = getattr(s, selector)
583        prev_e = getattr(prev_s, selector) if prev_s is not None else None
584        if e is None and prev_e is None:
585            return
586
587        self._dump('<hr /><tr><td align="left"><b>%s: </b>' % title)
588        if e is None:
589            self._dump('<i> Nothing!</i>')
590        else:
591            if prev_e is not None:
592                if e.is_different(prev_e):
593                    self._dump('</td></tr><tr><td align="left">')
594                    self.visit_environment(e, prev_e)
595                else:
596                    self._dump('<i> No changes!</i>')
597            else:
598                self._dump('</td></tr><tr><td align="left">')
599                self.visit_environment(e)
600
601        self._dump('</td></tr>')
602
603    def visit_store(self, s, prev_s=None):
604        self._dump('<table border="0">')
605
606        def dump_binding(s, c, b, is_added=None):
607            self._dump('<tr><td>%s</td>'
608                       '<td align="left">%s</td>'
609                       '<td align="left">%s</td>'
610                       '<td align="left">%s</td>'
611                       '<td align="left">%s</td></tr>'
612                       % (self._diff_plus_minus(is_added),
613                          s.clusters[c].base_region, b.offset,
614                          '(<i>Default</i>)' if b.kind == 'Default'
615                          else '',
616                          s.clusters[c].bindings[b]))
617
618        if prev_s is not None:
619            clusters_removed, clusters_added, clusters_updated = \
620                s.diff_clusters(prev_s)
621            for c in clusters_removed:
622                for b in prev_s.clusters[c].bindings:
623                    dump_binding(prev_s, c, b, False)
624            for c in clusters_updated:
625                bindings_removed, bindings_added = \
626                    s.clusters[c].diff_bindings(prev_s.clusters[c])
627                for b in bindings_removed:
628                    dump_binding(prev_s, c, b, False)
629                for b in bindings_added:
630                    dump_binding(s, c, b, True)
631            for c in clusters_added:
632                for b in s.clusters[c].bindings:
633                    dump_binding(s, c, b, True)
634        else:
635            for c in s.clusters:
636                for b in s.clusters[c].bindings:
637                    dump_binding(s, c, b)
638
639        self._dump('</table>')
640
641    def visit_store_in_state(self, s, prev_s=None):
642        st = s.store
643        prev_st = prev_s.store if prev_s is not None else None
644        if st is None and prev_st is None:
645            return
646
647        self._dump('<hr /><tr><td align="left"><b>Store: </b>')
648        if st is None:
649            self._dump('<i> Nothing!</i>')
650        else:
651            if self._dark_mode:
652                self._dump(' <font color="gray30">(%s)</font>' % st.ptr)
653            else:
654                self._dump(' <font color="gray">(%s)</font>' % st.ptr)
655            if prev_st is not None:
656                if s.store.is_different(prev_st):
657                    self._dump('</td></tr><tr><td align="left">')
658                    self.visit_store(st, prev_st)
659                else:
660                    self._dump('<i> No changes!</i>')
661            else:
662                self._dump('</td></tr><tr><td align="left">')
663                self.visit_store(st)
664        self._dump('</td></tr>')
665
666    def visit_generic_map(self, m, prev_m=None):
667        self._dump('<table border="0">')
668
669        def dump_pair(m, k, is_added=None):
670            self._dump('<tr><td>%s</td>'
671                       '<td align="left">%s</td>'
672                       '<td align="left">%s</td></tr>'
673                       % (self._diff_plus_minus(is_added),
674                          k, m.generic_map[k]))
675
676        if prev_m is not None:
677            removed, added = m.diff(prev_m)
678            for k in removed:
679                dump_pair(prev_m, k, False)
680            for k in added:
681                dump_pair(m, k, True)
682        else:
683            for k in m.generic_map:
684                dump_pair(m, k, None)
685
686        self._dump('</table>')
687
688    def visit_generic_map_in_state(self, selector, title, s, prev_s=None):
689        m = getattr(s, selector)
690        prev_m = getattr(prev_s, selector) if prev_s is not None else None
691        if m is None and prev_m is None:
692            return
693
694        self._dump('<hr />')
695        self._dump('<tr><td align="left">'
696                   '<b>%s: </b>' % title)
697        if m is None:
698            self._dump('<i> Nothing!</i>')
699        else:
700            if prev_m is not None:
701                if m.is_different(prev_m):
702                    self._dump('</td></tr><tr><td align="left">')
703                    self.visit_generic_map(m, prev_m)
704                else:
705                    self._dump('<i> No changes!</i>')
706            else:
707                self._dump('</td></tr><tr><td align="left">')
708                self.visit_generic_map(m)
709
710        self._dump('</td></tr>')
711
712    def visit_checker_messages(self, m, prev_m=None):
713        self._dump('<table border="0">')
714
715        def dump_line(l, is_added=None):
716            self._dump('<tr><td>%s</td>'
717                       '<td align="left">%s</td></tr>'
718                       % (self._diff_plus_minus(is_added), l))
719
720        def dump_chk(chk, is_added=None):
721            dump_line('<i>%s</i>:' % chk, is_added)
722
723        if prev_m is not None:
724            removed, added, updated = m.diff_messages(prev_m)
725            for chk in removed:
726                dump_chk(chk, False)
727                for l in prev_m.items[chk].lines:
728                    dump_line(l, False)
729            for chk in updated:
730                dump_chk(chk)
731                for l in m.items[chk].diff_lines(prev_m.items[chk]):
732                    dump_line(l[1:], l.startswith('+'))
733            for chk in added:
734                dump_chk(chk, True)
735                for l in m.items[chk].lines:
736                    dump_line(l, True)
737        else:
738            for chk in m.items:
739                dump_chk(chk)
740                for l in m.items[chk].lines:
741                    dump_line(l)
742
743        self._dump('</table>')
744
745    def visit_checker_messages_in_state(self, s, prev_s=None):
746        m = s.checker_messages
747        prev_m = prev_s.checker_messages if prev_s is not None else None
748        if m is None and prev_m is None:
749            return
750
751        self._dump('<hr />')
752        self._dump('<tr><td align="left">'
753                   '<b>Checker State: </b>')
754        if m is None:
755            self._dump('<i> Nothing!</i>')
756        else:
757            if prev_m is not None:
758                if m.is_different(prev_m):
759                    self._dump('</td></tr><tr><td align="left">')
760                    self.visit_checker_messages(m, prev_m)
761                else:
762                    self._dump('<i> No changes!</i>')
763            else:
764                self._dump('</td></tr><tr><td align="left">')
765                self.visit_checker_messages(m)
766
767        self._dump('</td></tr>')
768
769    def visit_state(self, s, prev_s):
770        self.visit_store_in_state(s, prev_s)
771        self.visit_environment_in_state('environment', 'Environment',
772                                        s, prev_s)
773        self.visit_generic_map_in_state('constraints', 'Ranges',
774                                        s, prev_s)
775        self.visit_generic_map_in_state('dynamic_types', 'Dynamic Types',
776                                        s, prev_s)
777        self.visit_environment_in_state('constructing_objects',
778                                        'Objects Under Construction',
779                                        s, prev_s)
780        self.visit_checker_messages_in_state(s, prev_s)
781
782    def visit_node(self, node):
783        self._dump('%s [shape=record,'
784                   % (node.node_name()))
785        if self._dark_mode:
786            self._dump('color="white",fontcolor="gray80",')
787        self._dump('label=<<table border="0">')
788
789        self._dump('<tr><td bgcolor="%s"><b>Node %d (%s) - '
790                   'State %s</b></td></tr>'
791                   % ("gray20" if self._dark_mode else "gray70",
792                      node.node_id, node.ptr, node.state.state_id
793                      if node.state is not None else 'Unspecified'))
794        if node.has_report:
795            self._dump('<tr><td><font color="red"><b>Bug Report Attached'
796                       '</b></font></td></tr>')
797        if node.is_sink:
798            self._dump('<tr><td><font color="cornflowerblue"><b>Sink Node'
799                       '</b></font></td></tr>')
800        if not self._topo_mode:
801            self._dump('<tr><td align="left" width="0">')
802            if len(node.points) > 1:
803                self._dump('<b>Program points:</b></td></tr>')
804            else:
805                self._dump('<b>Program point:</b></td></tr>')
806        self._dump('<tr><td align="left" width="0">'
807                   '<table border="0" align="left" width="0">')
808        for p in node.points:
809            self.visit_program_point(p)
810        self._dump('</table></td></tr>')
811
812        if node.state is not None and not self._topo_mode:
813            prev_s = None
814            # Do diffs only when we have a unique predecessor.
815            # Don't do diffs on the leaf nodes because they're
816            # the important ones.
817            if self._do_diffs and len(node.predecessors) == 1 \
818               and len(node.successors) > 0:
819                prev_s = self._graph.nodes[node.predecessors[0]].state
820            self.visit_state(node.state, prev_s)
821        self._dump_raw('</table>>];\n')
822
823    def visit_edge(self, pred, succ):
824        self._dump_raw('%s -> %s%s;\n' % (
825            pred.node_name(), succ.node_name(),
826            ' [color="white"]' if self._dark_mode else ''
827        ))
828
829    def visit_end_of_graph(self):
830        self._dump_raw('}\n')
831
832        if not self._dump_dot_only:
833            import sys
834            import tempfile
835
836            def write_temp_file(suffix, data):
837                fd, filename = tempfile.mkstemp(suffix=suffix)
838                print('Writing "%s"...' % filename)
839                with os.fdopen(fd, 'w') as fp:
840                    fp.write(data)
841                print('Done! Please remember to remove the file.')
842                return filename
843
844            try:
845                import graphviz
846            except ImportError:
847                # The fallback behavior if graphviz is not installed!
848                print('Python graphviz not found. Please invoke')
849                print('  $ pip install graphviz')
850                print('in order to enable automatic conversion to HTML.')
851                print()
852                print('You may also convert DOT to SVG manually via')
853                print('  $ dot -Tsvg input.dot -o output.svg')
854                print()
855                write_temp_file('.dot', self.output())
856                return
857
858            svg = graphviz.pipe('dot', 'svg', self.output())
859
860            filename = write_temp_file(
861                '.html', '<html><body bgcolor="%s">%s</body></html>' % (
862                             '#1a1a1a' if self._dark_mode else 'white', svg))
863            if sys.platform == 'win32':
864                os.startfile(filename)
865            elif sys.platform == 'darwin':
866                os.system('open "%s"' % filename)
867            else:
868                os.system('xdg-open "%s"' % filename)
869
870
871#===-----------------------------------------------------------------------===#
872# Explorers know how to traverse the ExplodedGraph in a certain order.
873# They would invoke a Visitor on every node or edge they encounter.
874#===-----------------------------------------------------------------------===#
875
876
877# BasicExplorer explores the whole graph in no particular order.
878class BasicExplorer(object):
879    def __init__(self):
880        super(BasicExplorer, self).__init__()
881
882    def explore(self, graph, visitor):
883        visitor.visit_begin_graph(graph)
884        for node in sorted(graph.nodes):
885            logging.debug('Visiting ' + node)
886            visitor.visit_node(graph.nodes[node])
887            for succ in sorted(graph.nodes[node].successors):
888                logging.debug('Visiting edge: %s -> %s ' % (node, succ))
889                visitor.visit_edge(graph.nodes[node], graph.nodes[succ])
890        visitor.visit_end_of_graph()
891
892
893#===-----------------------------------------------------------------------===#
894# Trimmers cut out parts of the ExplodedGraph so that to focus on other parts.
895# Trimmers can be combined together by applying them sequentially.
896#===-----------------------------------------------------------------------===#
897
898
899# SinglePathTrimmer keeps only a single path - the leftmost path from the root.
900# Useful when the trimmed graph is still too large.
901class SinglePathTrimmer(object):
902    def __init__(self):
903        super(SinglePathTrimmer, self).__init__()
904
905    def trim(self, graph):
906        visited_nodes = set()
907        node_id = graph.root_id
908        while True:
909            visited_nodes.add(node_id)
910            node = graph.nodes[node_id]
911            if len(node.successors) > 0:
912                succ_id = node.successors[0]
913                succ = graph.nodes[succ_id]
914                node.successors = [succ_id]
915                succ.predecessors = [node_id]
916                if succ_id in visited_nodes:
917                    break
918                node_id = succ_id
919            else:
920                break
921        graph.nodes = {node_id: graph.nodes[node_id]
922                       for node_id in visited_nodes}
923
924
925# TargetedTrimmer keeps paths that lead to specific nodes and discards all
926# other paths. Useful when you cannot use -trim-egraph (e.g. when debugging
927# a crash).
928class TargetedTrimmer(object):
929    def __init__(self, target_nodes):
930        super(TargetedTrimmer, self).__init__()
931        self._target_nodes = target_nodes
932
933    @staticmethod
934    def parse_target_node(node, graph):
935        if node.startswith('0x'):
936            ret = 'Node' + node
937            assert ret in graph.nodes
938            return ret
939        else:
940            for other_id in graph.nodes:
941                other = graph.nodes[other_id]
942                if other.node_id == int(node):
943                    return other_id
944
945    @staticmethod
946    def parse_target_nodes(target_nodes, graph):
947        return [TargetedTrimmer.parse_target_node(node, graph)
948                for node in target_nodes.split(',')]
949
950    def trim(self, graph):
951        queue = self._target_nodes
952        visited_nodes = set()
953
954        while len(queue) > 0:
955            node_id = queue.pop()
956            visited_nodes.add(node_id)
957            node = graph.nodes[node_id]
958            for pred_id in node.predecessors:
959                if pred_id not in visited_nodes:
960                    queue.append(pred_id)
961        graph.nodes = {node_id: graph.nodes[node_id]
962                       for node_id in visited_nodes}
963        for node_id in graph.nodes:
964            node = graph.nodes[node_id]
965            node.successors = [succ_id for succ_id in node.successors
966                               if succ_id in visited_nodes]
967            node.predecessors = [succ_id for succ_id in node.predecessors
968                                 if succ_id in visited_nodes]
969
970
971#===-----------------------------------------------------------------------===#
972# The entry point to the script.
973#===-----------------------------------------------------------------------===#
974
975
976def main():
977    parser = argparse.ArgumentParser(
978        description='Display and manipulate Exploded Graph dumps.')
979    parser.add_argument('filename', type=str,
980                        help='the .dot file produced by the Static Analyzer')
981    parser.add_argument('-v', '--verbose', action='store_const',
982                        dest='loglevel', const=logging.DEBUG,
983                        default=logging.WARNING,
984                        help='enable info prints')
985    parser.add_argument('-d', '--diff', action='store_const', dest='diff',
986                        const=True, default=False,
987                        help='display differences between states')
988    parser.add_argument('-t', '--topology', action='store_const',
989                        dest='topology', const=True, default=False,
990                        help='only display program points, omit states')
991    parser.add_argument('-s', '--single-path', action='store_const',
992                        dest='single_path', const=True, default=False,
993                        help='only display the leftmost path in the graph '
994                             '(useful for trimmed graphs that still '
995                             'branch too much)')
996    parser.add_argument('--to', type=str, default=None,
997                        help='only display execution paths from the root '
998                             'to the given comma-separated list of nodes '
999                             'identified by a pointer or a stable ID; '
1000                             'compatible with --single-path')
1001    parser.add_argument('--dark', action='store_const', dest='dark',
1002                        const=True, default=False,
1003                        help='dark mode')
1004    parser.add_argument('--gray', action='store_const', dest='gray',
1005                        const=True, default=False,
1006                        help='black-and-white mode')
1007    parser.add_argument('--dump-dot-only', action='store_const',
1008                        dest='dump_dot_only', const=True, default=False,
1009                        help='instead of writing an HTML file and immediately '
1010                             'displaying it, dump the rewritten dot file '
1011                             'to stdout')
1012    args = parser.parse_args()
1013    logging.basicConfig(level=args.loglevel)
1014
1015    graph = ExplodedGraph()
1016    with open(args.filename) as fd:
1017        for raw_line in fd:
1018            raw_line = raw_line.strip()
1019            graph.add_raw_line(raw_line)
1020
1021    trimmers = []
1022    if args.to is not None:
1023        trimmers.append(TargetedTrimmer(
1024            TargetedTrimmer.parse_target_nodes(args.to, graph)))
1025    if args.single_path:
1026        trimmers.append(SinglePathTrimmer())
1027
1028    explorer = BasicExplorer()
1029
1030    visitor = DotDumpVisitor(args.diff, args.dark, args.gray, args.topology,
1031                             args.dump_dot_only)
1032
1033    for trimmer in trimmers:
1034        trimmer.trim(graph)
1035
1036    explorer.explore(graph, visitor)
1037
1038
1039if __name__ == '__main__':
1040    main()
1041