xref: /llvm-project/clang/utils/analyzer/exploded-graph-rewriter.py (revision 14e9eb3d7caed026a805033a9ce7b9e70d18bf04)
1#!/usr/bin/env python
2#
3#===- exploded-graph-rewriter.py - ExplodedGraph dump tool -----*- python -*--#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9#===-----------------------------------------------------------------------===#
10
11
12from __future__ import print_function
13
14import argparse
15import collections
16import difflib
17import json
18import logging
19import os
20import re
21
22
23#===-----------------------------------------------------------------------===#
24# These data structures represent a deserialized ExplodedGraph.
25#===-----------------------------------------------------------------------===#
26
27
28# A helper function for finding the difference between two dictionaries.
29def diff_dicts(curr, prev):
30    removed = [k for k in prev if k not in curr or curr[k] != prev[k]]
31    added = [k for k in curr if k not in prev or curr[k] != prev[k]]
32    return (removed, added)
33
34
35# Represents any program state trait that is a dictionary of key-value pairs.
36class GenericMap(object):
37    def __init__(self, items):
38        self.generic_map = collections.OrderedDict(items)
39
40    def diff(self, prev):
41        return diff_dicts(self.generic_map, prev.generic_map)
42
43    def is_different(self, prev):
44        removed, added = self.diff(prev)
45        return len(removed) != 0 or len(added) != 0
46
47
48# A deserialized source location.
49class SourceLocation(object):
50    def __init__(self, json_loc):
51        super(SourceLocation, self).__init__()
52        logging.debug('json: %s' % json_loc)
53        self.line = json_loc['line']
54        self.col = json_loc['column']
55        self.filename = os.path.basename(json_loc['file']) \
56            if 'file' in json_loc else '(main file)'
57        self.spelling = SourceLocation(json_loc['spelling']) \
58            if 'spelling' in json_loc else None
59
60    def is_macro(self):
61        return self.spelling is not None
62
63
64# A deserialized program point.
65class ProgramPoint(object):
66    def __init__(self, json_pp):
67        super(ProgramPoint, self).__init__()
68        self.kind = json_pp['kind']
69        self.tag = json_pp['tag']
70        self.node_id = json_pp['node_id']
71        self.is_sink = bool(json_pp['is_sink'])
72        self.has_report = bool(json_pp['has_report'])
73        if self.kind == 'Edge':
74            self.src_id = json_pp['src_id']
75            self.dst_id = json_pp['dst_id']
76        elif self.kind == 'Statement':
77            logging.debug(json_pp)
78            self.stmt_kind = json_pp['stmt_kind']
79            self.cast_kind = json_pp['cast_kind'] \
80                if 'cast_kind' in json_pp else None
81            self.stmt_point_kind = json_pp['stmt_point_kind']
82            self.stmt_id = json_pp['stmt_id']
83            self.pointer = json_pp['pointer']
84            self.pretty = json_pp['pretty']
85            self.loc = SourceLocation(json_pp['location']) \
86                if json_pp['location'] is not None else None
87        elif self.kind == 'BlockEntrance':
88            self.block_id = json_pp['block_id']
89
90
91# A single expression acting as a key in a deserialized Environment.
92class EnvironmentBindingKey(object):
93    def __init__(self, json_ek):
94        super(EnvironmentBindingKey, self).__init__()
95        # CXXCtorInitializer is not a Stmt!
96        self.stmt_id = json_ek['stmt_id'] if 'stmt_id' in json_ek \
97            else json_ek['init_id']
98        self.pretty = json_ek['pretty']
99        self.kind = json_ek['kind'] if 'kind' in json_ek else None
100
101    def _key(self):
102        return self.stmt_id
103
104    def __eq__(self, other):
105        return self._key() == other._key()
106
107    def __hash__(self):
108        return hash(self._key())
109
110
111# Deserialized description of a location context.
112class LocationContext(object):
113    def __init__(self, json_frame):
114        super(LocationContext, self).__init__()
115        self.lctx_id = json_frame['lctx_id']
116        self.caption = json_frame['location_context']
117        self.decl = json_frame['calling']
118        self.loc = SourceLocation(json_frame['location']) \
119            if json_frame['location'] is not None else None
120
121    def _key(self):
122        return self.lctx_id
123
124    def __eq__(self, other):
125        return self._key() == other._key()
126
127    def __hash__(self):
128        return hash(self._key())
129
130
131# A group of deserialized Environment bindings that correspond to a specific
132# location context.
133class EnvironmentFrame(object):
134    def __init__(self, json_frame):
135        super(EnvironmentFrame, self).__init__()
136        self.location_context = LocationContext(json_frame)
137        self.bindings = collections.OrderedDict(
138            [(EnvironmentBindingKey(b),
139              b['value']) for b in json_frame['items']]
140            if json_frame['items'] is not None else [])
141
142    def diff_bindings(self, prev):
143        return diff_dicts(self.bindings, prev.bindings)
144
145    def is_different(self, prev):
146        removed, added = self.diff_bindings(prev)
147        return len(removed) != 0 or len(added) != 0
148
149
150# A deserialized Environment. This class can also hold other entities that
151# are similar to Environment, such as Objects Under Construction.
152class GenericEnvironment(object):
153    def __init__(self, json_e):
154        super(GenericEnvironment, self).__init__()
155        self.frames = [EnvironmentFrame(f) for f in json_e]
156
157    def diff_frames(self, prev):
158        # TODO: It's difficult to display a good diff when frame numbers shift.
159        if len(self.frames) != len(prev.frames):
160            return None
161
162        updated = []
163        for i in range(len(self.frames)):
164            f = self.frames[i]
165            prev_f = prev.frames[i]
166            if f.location_context == prev_f.location_context:
167                if f.is_different(prev_f):
168                    updated.append(i)
169            else:
170                # We have the whole frame replaced with another frame.
171                # TODO: Produce a nice diff.
172                return None
173
174        # TODO: Add support for added/removed.
175        return updated
176
177    def is_different(self, prev):
178        updated = self.diff_frames(prev)
179        return updated is None or len(updated) > 0
180
181
182# A single binding key in a deserialized RegionStore cluster.
183class StoreBindingKey(object):
184    def __init__(self, json_sk):
185        super(StoreBindingKey, self).__init__()
186        self.kind = json_sk['kind']
187        self.offset = json_sk['offset']
188
189    def _key(self):
190        return (self.kind, self.offset)
191
192    def __eq__(self, other):
193        return self._key() == other._key()
194
195    def __hash__(self):
196        return hash(self._key())
197
198
199# A single cluster of the deserialized RegionStore.
200class StoreCluster(object):
201    def __init__(self, json_sc):
202        super(StoreCluster, self).__init__()
203        self.base_region = json_sc['cluster']
204        self.bindings = collections.OrderedDict(
205            [(StoreBindingKey(b), b['value']) for b in json_sc['items']])
206
207    def diff_bindings(self, prev):
208        return diff_dicts(self.bindings, prev.bindings)
209
210    def is_different(self, prev):
211        removed, added = self.diff_bindings(prev)
212        return len(removed) != 0 or len(added) != 0
213
214
215# A deserialized RegionStore.
216class Store(object):
217    def __init__(self, json_s):
218        super(Store, self).__init__()
219        self.ptr = json_s['pointer']
220        self.clusters = collections.OrderedDict(
221            [(c['pointer'], StoreCluster(c)) for c in json_s['items']])
222
223    def diff_clusters(self, prev):
224        removed = [k for k in prev.clusters if k not in self.clusters]
225        added = [k for k in self.clusters if k not in prev.clusters]
226        updated = [k for k in prev.clusters if k in self.clusters
227                   and prev.clusters[k].is_different(self.clusters[k])]
228        return (removed, added, updated)
229
230    def is_different(self, prev):
231        removed, added, updated = self.diff_clusters(prev)
232        return len(removed) != 0 or len(added) != 0 or len(updated) != 0
233
234
235# Deserialized messages from a single checker in a single program state.
236# Basically a list of raw strings.
237class CheckerLines(object):
238    def __init__(self, json_lines):
239        super(CheckerLines, self).__init__()
240        self.lines = json_lines
241
242    def diff_lines(self, prev):
243        lines = difflib.ndiff(prev.lines, self.lines)
244        return [l.strip() for l in lines
245                if l.startswith('+') or l.startswith('-')]
246
247    def is_different(self, prev):
248        return len(self.diff_lines(prev)) > 0
249
250
251# Deserialized messages of all checkers, separated by checker.
252class CheckerMessages(object):
253    def __init__(self, json_m):
254        super(CheckerMessages, self).__init__()
255        self.items = collections.OrderedDict(
256            [(m['checker'], CheckerLines(m['messages'])) for m in json_m])
257
258    def diff_messages(self, prev):
259        removed = [k for k in prev.items if k not in self.items]
260        added = [k for k in self.items if k not in prev.items]
261        updated = [k for k in prev.items if k in self.items
262                   and prev.items[k].is_different(self.items[k])]
263        return (removed, added, updated)
264
265    def is_different(self, prev):
266        removed, added, updated = self.diff_messages(prev)
267        return len(removed) != 0 or len(added) != 0 or len(updated) != 0
268
269
270# A deserialized program state.
271class ProgramState(object):
272    def __init__(self, state_id, json_ps):
273        super(ProgramState, self).__init__()
274        logging.debug('Adding ProgramState ' + str(state_id))
275
276        self.state_id = state_id
277
278        self.store = Store(json_ps['store']) \
279            if json_ps['store'] is not None else None
280
281        self.environment = \
282            GenericEnvironment(json_ps['environment']['items']) \
283            if json_ps['environment'] is not None else None
284
285        self.constraints = GenericMap([
286            (c['symbol'], c['range']) for c in json_ps['constraints']
287        ]) if json_ps['constraints'] is not None else None
288
289        self.dynamic_types = GenericMap([
290                (t['region'], '%s%s' % (t['dyn_type'],
291                                        ' (or a sub-class)'
292                                        if t['sub_classable'] else ''))
293                for t in json_ps['dynamic_types']]) \
294            if json_ps['dynamic_types'] is not None else None
295
296        self.constructing_objects = \
297            GenericEnvironment(json_ps['constructing_objects']) \
298            if json_ps['constructing_objects'] is not None else None
299
300        self.checker_messages = CheckerMessages(json_ps['checker_messages']) \
301            if json_ps['checker_messages'] is not None else None
302
303
304# A deserialized exploded graph node. Has a default constructor because it
305# may be referenced as part of an edge before its contents are deserialized,
306# and in this moment we already need a room for predecessors and successors.
307class ExplodedNode(object):
308    def __init__(self):
309        super(ExplodedNode, self).__init__()
310        self.predecessors = []
311        self.successors = []
312
313    def construct(self, node_id, json_node):
314        logging.debug('Adding ' + node_id)
315        self.ptr = node_id[4:]
316        self.points = [ProgramPoint(p) for p in json_node['program_points']]
317        self.node_id = self.points[-1].node_id
318        self.state = ProgramState(json_node['state_id'],
319                                  json_node['program_state']) \
320            if json_node['program_state'] is not None else None
321
322        assert self.node_name() == node_id
323
324    def node_name(self):
325        return 'Node' + self.ptr
326
327
328# A deserialized ExplodedGraph. Constructed by consuming a .dot file
329# line-by-line.
330class ExplodedGraph(object):
331    # Parse .dot files with regular expressions.
332    node_re = re.compile(
333        '^(Node0x[0-9a-f]*) \\[shape=record,.*label="{(.*)\\\\l}"\\];$')
334    edge_re = re.compile(
335        '^(Node0x[0-9a-f]*) -> (Node0x[0-9a-f]*);$')
336
337    def __init__(self):
338        super(ExplodedGraph, self).__init__()
339        self.nodes = collections.defaultdict(ExplodedNode)
340        self.root_id = None
341        self.incomplete_line = ''
342
343    def add_raw_line(self, raw_line):
344        if raw_line.startswith('//'):
345            return
346
347        # Allow line breaks by waiting for ';'. This is not valid in
348        # a .dot file, but it is useful for writing tests.
349        if len(raw_line) > 0 and raw_line[-1] != ';':
350            self.incomplete_line += raw_line
351            return
352        raw_line = self.incomplete_line + raw_line
353        self.incomplete_line = ''
354
355        # Apply regexps one by one to see if it's a node or an edge
356        # and extract contents if necessary.
357        logging.debug('Line: ' + raw_line)
358        result = self.edge_re.match(raw_line)
359        if result is not None:
360            logging.debug('Classified as edge line.')
361            pred = result.group(1)
362            succ = result.group(2)
363            self.nodes[pred].successors.append(succ)
364            self.nodes[succ].predecessors.append(pred)
365            return
366        result = self.node_re.match(raw_line)
367        if result is not None:
368            logging.debug('Classified as node line.')
369            node_id = result.group(1)
370            if len(self.nodes) == 0:
371                self.root_id = node_id
372            # Note: when writing tests you don't need to escape everything,
373            # even though in a valid dot file everything is escaped.
374            node_label = result.group(2).replace('\\l', '') \
375                                        .replace(' ', '') \
376                                        .replace('\\"', '"') \
377                                        .replace('\\{', '{') \
378                                        .replace('\\}', '}') \
379                                        .replace('\\\\', '\\') \
380                                        .replace('\\|', '|') \
381                                        .replace('\\<', '\\\\<') \
382                                        .replace('\\>', '\\\\>') \
383                                        .rstrip(',')
384            logging.debug(node_label)
385            json_node = json.loads(node_label)
386            self.nodes[node_id].construct(node_id, json_node)
387            return
388        logging.debug('Skipping.')
389
390
391#===-----------------------------------------------------------------------===#
392# Visitors traverse a deserialized ExplodedGraph and do different things
393# with every node and edge.
394#===-----------------------------------------------------------------------===#
395
396
397# A visitor that dumps the ExplodedGraph into a DOT file with fancy HTML-based
398# syntax highlighing.
399class DotDumpVisitor(object):
400    def __init__(self, do_diffs, dark_mode, gray_mode,
401                 topo_mode, dump_dot_only):
402        super(DotDumpVisitor, self).__init__()
403        self._do_diffs = do_diffs
404        self._dark_mode = dark_mode
405        self._gray_mode = gray_mode
406        self._topo_mode = topo_mode
407        self._dump_dot_only = dump_dot_only
408        self._output = []
409
410    def _dump_raw(self, s):
411        if self._dump_dot_only:
412            print(s, end='')
413        else:
414            self._output.append(s)
415
416    def output(self):
417        assert not self._dump_dot_only
418        return ''.join(self._output)
419
420    def _dump(self, s):
421        s = s.replace('&', '&amp;') \
422             .replace('{', '\\{') \
423             .replace('}', '\\}') \
424             .replace('\\<', '&lt;') \
425             .replace('\\>', '&gt;') \
426             .replace('\\l', '<br />') \
427             .replace('|', '\\|')
428        if self._gray_mode:
429            s = re.sub(r'<font color="[a-z0-9]*">', '', s)
430            s = re.sub(r'</font>', '', s)
431        self._dump_raw(s)
432
433    @staticmethod
434    def _diff_plus_minus(is_added):
435        if is_added is None:
436            return ''
437        if is_added:
438            return '<font color="forestgreen">+</font>'
439        return '<font color="red">-</font>'
440
441    @staticmethod
442    def _short_pretty(s):
443        if s is None:
444            return None
445        if len(s) < 20:
446            return s
447        left = s.find('{')
448        right = s.rfind('}')
449        if left == -1 or right == -1 or left >= right:
450            return s
451        candidate = s[0:left + 1] + ' ... ' + s[right:]
452        if len(candidate) >= len(s):
453            return s
454        return candidate
455
456    @staticmethod
457    def _make_sloc(loc):
458        if loc is None:
459            return '<i>Invalid Source Location</i>'
460
461        def make_plain_loc(loc):
462            return '%s:<b>%s</b>:<b>%s</b>' \
463                % (loc.filename, loc.line, loc.col)
464
465        if loc.is_macro():
466            return '%s <font color="royalblue1">' \
467                   '(<i>spelling at </i> %s)</font>' \
468                % (make_plain_loc(loc), make_plain_loc(loc.spelling))
469
470        return make_plain_loc(loc)
471
472    def visit_begin_graph(self, graph):
473        self._graph = graph
474        self._dump_raw('digraph "ExplodedGraph" {\n')
475        if self._dark_mode:
476            self._dump_raw('bgcolor="gray10";\n')
477        self._dump_raw('label="";\n')
478
479    def visit_program_point(self, p):
480        if p.kind in ['Edge', 'BlockEntrance', 'BlockExit']:
481            color = 'gold3'
482        elif p.kind in ['PreStmtPurgeDeadSymbols',
483                        'PostStmtPurgeDeadSymbols']:
484            color = 'red'
485        elif p.kind in ['CallEnter', 'CallExitBegin', 'CallExitEnd']:
486            color = 'dodgerblue' if self._dark_mode else 'blue'
487        elif p.kind in ['Statement']:
488            color = 'cyan4'
489        else:
490            color = 'forestgreen'
491
492        self._dump('<tr><td align="left">%s.</td>' % p.node_id)
493
494        if p.kind == 'Statement':
495            # This avoids pretty-printing huge statements such as CompoundStmt.
496            # Such statements show up only at [Pre|Post]StmtPurgeDeadSymbols
497            skip_pretty = 'PurgeDeadSymbols' in p.stmt_point_kind
498            stmt_color = 'cyan3'
499            self._dump('<td align="left" width="0">%s:</td>'
500                       '<td align="left" width="0"><font color="%s">'
501                       '%s</font> </td>'
502                       '<td align="left"><i>S%s</i></td>'
503                       '<td align="left"><font color="%s">%s</font></td>'
504                       '<td align="left">%s</td></tr>'
505                       % (self._make_sloc(p.loc), color,
506                          '%s (%s)' % (p.stmt_kind, p.cast_kind)
507                          if p.cast_kind is not None else p.stmt_kind,
508                          p.stmt_id, stmt_color, p.stmt_point_kind,
509                          self._short_pretty(p.pretty)
510                          if not skip_pretty else ''))
511        elif p.kind == 'Edge':
512            self._dump('<td width="0"></td>'
513                       '<td align="left" width="0">'
514                       '<font color="%s">%s</font></td><td align="left">'
515                       '[B%d] -\\> [B%d]</td></tr>'
516                       % (color, 'BlockEdge', p.src_id, p.dst_id))
517        elif p.kind == 'BlockEntrance':
518            self._dump('<td width="0"></td>'
519                       '<td align="left" width="0">'
520                       '<font color="%s">%s</font></td>'
521                       '<td align="left">[B%d]</td></tr>'
522                       % (color, p.kind, p.block_id))
523        else:
524            # TODO: Print more stuff for other kinds of points.
525            self._dump('<td width="0"></td>'
526                       '<td align="left" width="0" colspan="2">'
527                       '<font color="%s">%s</font></td></tr>'
528                       % (color, p.kind))
529
530        if p.tag is not None:
531            self._dump('<tr><td width="0"></td><td width="0"></td>'
532                       '<td colspan="3" align="left">'
533                       '<b>Tag: </b> <font color="crimson">'
534                       '%s</font></td></tr>' % p.tag)
535
536        if p.has_report:
537            self._dump('<tr><td width="0"></td><td width="0"></td>'
538                       '<td colspan="3" align="left">'
539                       '<font color="red"><b>Bug Report Attached'
540                       '</b></font></td></tr>')
541        if p.is_sink:
542            self._dump('<tr><td width="0"></td><td width="0"></td>'
543                       '<td colspan="3" align="left">'
544                       '<font color="cornflowerblue"><b>Sink Node'
545                       '</b></font></td></tr>')
546
547    def visit_environment(self, e, prev_e=None):
548        self._dump('<table border="0">')
549
550        def dump_location_context(lc, is_added=None):
551            self._dump('<tr><td>%s</td>'
552                       '<td align="left"><b>%s</b></td>'
553                       '<td align="left" colspan="2">'
554                       '<font color="gray60">%s </font>'
555                       '%s</td></tr>'
556                       % (self._diff_plus_minus(is_added),
557                          lc.caption, lc.decl,
558                          ('(%s)' % self._make_sloc(lc.loc))
559                          if lc.loc is not None else ''))
560
561        def dump_binding(f, b, is_added=None):
562            self._dump('<tr><td>%s</td>'
563                       '<td align="left"><i>S%s</i></td>'
564                       '%s'
565                       '<td align="left">%s</td>'
566                       '<td align="left">%s</td></tr>'
567                       % (self._diff_plus_minus(is_added),
568                          b.stmt_id,
569                          '<td align="left"><font color="%s"><i>'
570                          '%s</i></font></td>' % (
571                              'lavender' if self._dark_mode else 'darkgreen',
572                              ('(%s)' % b.kind) if b.kind is not None else ' '
573                          ),
574                          self._short_pretty(b.pretty), f.bindings[b]))
575
576        frames_updated = e.diff_frames(prev_e) if prev_e is not None else None
577        if frames_updated:
578            for i in frames_updated:
579                f = e.frames[i]
580                prev_f = prev_e.frames[i]
581                dump_location_context(f.location_context)
582                bindings_removed, bindings_added = f.diff_bindings(prev_f)
583                for b in bindings_removed:
584                    dump_binding(prev_f, b, False)
585                for b in bindings_added:
586                    dump_binding(f, b, True)
587        else:
588            for f in e.frames:
589                dump_location_context(f.location_context)
590                for b in f.bindings:
591                    dump_binding(f, b)
592
593        self._dump('</table>')
594
595    def visit_environment_in_state(self, selector, title, s, prev_s=None):
596        e = getattr(s, selector)
597        prev_e = getattr(prev_s, selector) if prev_s is not None else None
598        if e is None and prev_e is None:
599            return
600
601        self._dump('<hr /><tr><td align="left"><b>%s: </b>' % title)
602        if e is None:
603            self._dump('<i> Nothing!</i>')
604        else:
605            if prev_e is not None:
606                if e.is_different(prev_e):
607                    self._dump('</td></tr><tr><td align="left">')
608                    self.visit_environment(e, prev_e)
609                else:
610                    self._dump('<i> No changes!</i>')
611            else:
612                self._dump('</td></tr><tr><td align="left">')
613                self.visit_environment(e)
614
615        self._dump('</td></tr>')
616
617    def visit_store(self, s, prev_s=None):
618        self._dump('<table border="0">')
619
620        def dump_binding(s, c, b, is_added=None):
621            self._dump('<tr><td>%s</td>'
622                       '<td align="left">%s</td>'
623                       '<td align="left">%s</td>'
624                       '<td align="left">%s</td>'
625                       '<td align="left">%s</td></tr>'
626                       % (self._diff_plus_minus(is_added),
627                          s.clusters[c].base_region, b.offset,
628                          '(<i>Default</i>)' if b.kind == 'Default'
629                          else '',
630                          s.clusters[c].bindings[b]))
631
632        if prev_s is not None:
633            clusters_removed, clusters_added, clusters_updated = \
634                s.diff_clusters(prev_s)
635            for c in clusters_removed:
636                for b in prev_s.clusters[c].bindings:
637                    dump_binding(prev_s, c, b, False)
638            for c in clusters_updated:
639                bindings_removed, bindings_added = \
640                    s.clusters[c].diff_bindings(prev_s.clusters[c])
641                for b in bindings_removed:
642                    dump_binding(prev_s, c, b, False)
643                for b in bindings_added:
644                    dump_binding(s, c, b, True)
645            for c in clusters_added:
646                for b in s.clusters[c].bindings:
647                    dump_binding(s, c, b, True)
648        else:
649            for c in s.clusters:
650                for b in s.clusters[c].bindings:
651                    dump_binding(s, c, b)
652
653        self._dump('</table>')
654
655    def visit_store_in_state(self, s, prev_s=None):
656        st = s.store
657        prev_st = prev_s.store if prev_s is not None else None
658        if st is None and prev_st is None:
659            return
660
661        self._dump('<hr /><tr><td align="left"><b>Store: </b>')
662        if st is None:
663            self._dump('<i> Nothing!</i>')
664        else:
665            if self._dark_mode:
666                self._dump(' <font color="gray30">(%s)</font>' % st.ptr)
667            else:
668                self._dump(' <font color="gray">(%s)</font>' % st.ptr)
669            if prev_st is not None:
670                if s.store.is_different(prev_st):
671                    self._dump('</td></tr><tr><td align="left">')
672                    self.visit_store(st, prev_st)
673                else:
674                    self._dump('<i> No changes!</i>')
675            else:
676                self._dump('</td></tr><tr><td align="left">')
677                self.visit_store(st)
678        self._dump('</td></tr>')
679
680    def visit_generic_map(self, m, prev_m=None):
681        self._dump('<table border="0">')
682
683        def dump_pair(m, k, is_added=None):
684            self._dump('<tr><td>%s</td>'
685                       '<td align="left">%s</td>'
686                       '<td align="left">%s</td></tr>'
687                       % (self._diff_plus_minus(is_added),
688                          k, m.generic_map[k]))
689
690        if prev_m is not None:
691            removed, added = m.diff(prev_m)
692            for k in removed:
693                dump_pair(prev_m, k, False)
694            for k in added:
695                dump_pair(m, k, True)
696        else:
697            for k in m.generic_map:
698                dump_pair(m, k, None)
699
700        self._dump('</table>')
701
702    def visit_generic_map_in_state(self, selector, title, s, prev_s=None):
703        m = getattr(s, selector)
704        prev_m = getattr(prev_s, selector) if prev_s is not None else None
705        if m is None and prev_m is None:
706            return
707
708        self._dump('<hr />')
709        self._dump('<tr><td align="left">'
710                   '<b>%s: </b>' % title)
711        if m is None:
712            self._dump('<i> Nothing!</i>')
713        else:
714            if prev_m is not None:
715                if m.is_different(prev_m):
716                    self._dump('</td></tr><tr><td align="left">')
717                    self.visit_generic_map(m, prev_m)
718                else:
719                    self._dump('<i> No changes!</i>')
720            else:
721                self._dump('</td></tr><tr><td align="left">')
722                self.visit_generic_map(m)
723
724        self._dump('</td></tr>')
725
726    def visit_checker_messages(self, m, prev_m=None):
727        self._dump('<table border="0">')
728
729        def dump_line(l, is_added=None):
730            self._dump('<tr><td>%s</td>'
731                       '<td align="left">%s</td></tr>'
732                       % (self._diff_plus_minus(is_added), l))
733
734        def dump_chk(chk, is_added=None):
735            dump_line('<i>%s</i>:' % chk, is_added)
736
737        if prev_m is not None:
738            removed, added, updated = m.diff_messages(prev_m)
739            for chk in removed:
740                dump_chk(chk, False)
741                for l in prev_m.items[chk].lines:
742                    dump_line(l, False)
743            for chk in updated:
744                dump_chk(chk)
745                for l in m.items[chk].diff_lines(prev_m.items[chk]):
746                    dump_line(l[1:], l.startswith('+'))
747            for chk in added:
748                dump_chk(chk, True)
749                for l in m.items[chk].lines:
750                    dump_line(l, True)
751        else:
752            for chk in m.items:
753                dump_chk(chk)
754                for l in m.items[chk].lines:
755                    dump_line(l)
756
757        self._dump('</table>')
758
759    def visit_checker_messages_in_state(self, s, prev_s=None):
760        m = s.checker_messages
761        prev_m = prev_s.checker_messages if prev_s is not None else None
762        if m is None and prev_m is None:
763            return
764
765        self._dump('<hr />')
766        self._dump('<tr><td align="left">'
767                   '<b>Checker State: </b>')
768        if m is None:
769            self._dump('<i> Nothing!</i>')
770        else:
771            if prev_m is not None:
772                if m.is_different(prev_m):
773                    self._dump('</td></tr><tr><td align="left">')
774                    self.visit_checker_messages(m, prev_m)
775                else:
776                    self._dump('<i> No changes!</i>')
777            else:
778                self._dump('</td></tr><tr><td align="left">')
779                self.visit_checker_messages(m)
780
781        self._dump('</td></tr>')
782
783    def visit_state(self, s, prev_s):
784        self.visit_store_in_state(s, prev_s)
785        self.visit_environment_in_state('environment', 'Environment',
786                                        s, prev_s)
787        self.visit_generic_map_in_state('constraints', 'Ranges',
788                                        s, prev_s)
789        self.visit_generic_map_in_state('dynamic_types', 'Dynamic Types',
790                                        s, prev_s)
791        self.visit_environment_in_state('constructing_objects',
792                                        'Objects Under Construction',
793                                        s, prev_s)
794        self.visit_checker_messages_in_state(s, prev_s)
795
796    def visit_node(self, node):
797        self._dump('%s [shape=record,'
798                   % (node.node_name()))
799        if self._dark_mode:
800            self._dump('color="white",fontcolor="gray80",')
801        self._dump('label=<<table border="0">')
802
803        self._dump('<tr><td bgcolor="%s"><b>State %s</b></td></tr>'
804                   % ("gray20" if self._dark_mode else "gray70",
805                      node.state.state_id
806                      if node.state is not None else 'Unspecified'))
807        if not self._topo_mode:
808            self._dump('<tr><td align="left" width="0">')
809            if len(node.points) > 1:
810                self._dump('<b>Program points:</b></td></tr>')
811            else:
812                self._dump('<b>Program point:</b></td></tr>')
813        self._dump('<tr><td align="left" width="0">'
814                   '<table border="0" align="left" width="0">')
815        for p in node.points:
816            self.visit_program_point(p)
817        self._dump('</table></td></tr>')
818
819        if node.state is not None and not self._topo_mode:
820            prev_s = None
821            # Do diffs only when we have a unique predecessor.
822            # Don't do diffs on the leaf nodes because they're
823            # the important ones.
824            if self._do_diffs and len(node.predecessors) == 1 \
825               and len(node.successors) > 0:
826                prev_s = self._graph.nodes[node.predecessors[0]].state
827            self.visit_state(node.state, prev_s)
828        self._dump_raw('</table>>];\n')
829
830    def visit_edge(self, pred, succ):
831        self._dump_raw('%s -> %s%s;\n' % (
832            pred.node_name(), succ.node_name(),
833            ' [color="white"]' if self._dark_mode else ''
834        ))
835
836    def visit_end_of_graph(self):
837        self._dump_raw('}\n')
838
839        if not self._dump_dot_only:
840            import sys
841            import tempfile
842
843            def write_temp_file(suffix, data):
844                fd, filename = tempfile.mkstemp(suffix=suffix)
845                print('Writing "%s"...' % filename)
846                with os.fdopen(fd, 'w') as fp:
847                    fp.write(data)
848                print('Done! Please remember to remove the file.')
849                return filename
850
851            try:
852                import graphviz
853            except ImportError:
854                # The fallback behavior if graphviz is not installed!
855                print('Python graphviz not found. Please invoke')
856                print('  $ pip install graphviz')
857                print('in order to enable automatic conversion to HTML.')
858                print()
859                print('You may also convert DOT to SVG manually via')
860                print('  $ dot -Tsvg input.dot -o output.svg')
861                print()
862                write_temp_file('.dot', self.output())
863                return
864
865            svg = graphviz.pipe('dot', 'svg', self.output())
866
867            filename = write_temp_file(
868                '.html', '<html><body bgcolor="%s">%s</body></html>' % (
869                             '#1a1a1a' if self._dark_mode else 'white', svg))
870            if sys.platform == 'win32':
871                os.startfile(filename)
872            elif sys.platform == 'darwin':
873                os.system('open "%s"' % filename)
874            else:
875                os.system('xdg-open "%s"' % filename)
876
877
878#===-----------------------------------------------------------------------===#
879# Explorers know how to traverse the ExplodedGraph in a certain order.
880# They would invoke a Visitor on every node or edge they encounter.
881#===-----------------------------------------------------------------------===#
882
883
884# BasicExplorer explores the whole graph in no particular order.
885class BasicExplorer(object):
886    def __init__(self):
887        super(BasicExplorer, self).__init__()
888
889    def explore(self, graph, visitor):
890        visitor.visit_begin_graph(graph)
891        for node in sorted(graph.nodes):
892            logging.debug('Visiting ' + node)
893            visitor.visit_node(graph.nodes[node])
894            for succ in sorted(graph.nodes[node].successors):
895                logging.debug('Visiting edge: %s -> %s ' % (node, succ))
896                visitor.visit_edge(graph.nodes[node], graph.nodes[succ])
897        visitor.visit_end_of_graph()
898
899
900#===-----------------------------------------------------------------------===#
901# Trimmers cut out parts of the ExplodedGraph so that to focus on other parts.
902# Trimmers can be combined together by applying them sequentially.
903#===-----------------------------------------------------------------------===#
904
905
906# SinglePathTrimmer keeps only a single path - the leftmost path from the root.
907# Useful when the trimmed graph is still too large.
908class SinglePathTrimmer(object):
909    def __init__(self):
910        super(SinglePathTrimmer, self).__init__()
911
912    def trim(self, graph):
913        visited_nodes = set()
914        node_id = graph.root_id
915        while True:
916            visited_nodes.add(node_id)
917            node = graph.nodes[node_id]
918            if len(node.successors) > 0:
919                succ_id = node.successors[0]
920                succ = graph.nodes[succ_id]
921                node.successors = [succ_id]
922                succ.predecessors = [node_id]
923                if succ_id in visited_nodes:
924                    break
925                node_id = succ_id
926            else:
927                break
928        graph.nodes = {node_id: graph.nodes[node_id]
929                       for node_id in visited_nodes}
930
931
932# TargetedTrimmer keeps paths that lead to specific nodes and discards all
933# other paths. Useful when you cannot use -trim-egraph (e.g. when debugging
934# a crash).
935class TargetedTrimmer(object):
936    def __init__(self, target_nodes):
937        super(TargetedTrimmer, self).__init__()
938        self._target_nodes = target_nodes
939
940    @staticmethod
941    def parse_target_node(node, graph):
942        if node.startswith('0x'):
943            ret = 'Node' + node
944            assert ret in graph.nodes
945            return ret
946        else:
947            for other_id in graph.nodes:
948                other = graph.nodes[other_id]
949                if other.node_id == int(node):
950                    return other_id
951
952    @staticmethod
953    def parse_target_nodes(target_nodes, graph):
954        return [TargetedTrimmer.parse_target_node(node, graph)
955                for node in target_nodes.split(',')]
956
957    def trim(self, graph):
958        queue = self._target_nodes
959        visited_nodes = set()
960
961        while len(queue) > 0:
962            node_id = queue.pop()
963            visited_nodes.add(node_id)
964            node = graph.nodes[node_id]
965            for pred_id in node.predecessors:
966                if pred_id not in visited_nodes:
967                    queue.append(pred_id)
968        graph.nodes = {node_id: graph.nodes[node_id]
969                       for node_id in visited_nodes}
970        for node_id in graph.nodes:
971            node = graph.nodes[node_id]
972            node.successors = [succ_id for succ_id in node.successors
973                               if succ_id in visited_nodes]
974            node.predecessors = [succ_id for succ_id in node.predecessors
975                                 if succ_id in visited_nodes]
976
977
978#===-----------------------------------------------------------------------===#
979# The entry point to the script.
980#===-----------------------------------------------------------------------===#
981
982
983def main():
984    parser = argparse.ArgumentParser(
985        description='Display and manipulate Exploded Graph dumps.')
986    parser.add_argument('filename', type=str,
987                        help='the .dot file produced by the Static Analyzer')
988    parser.add_argument('-v', '--verbose', action='store_const',
989                        dest='loglevel', const=logging.DEBUG,
990                        default=logging.WARNING,
991                        help='enable info prints')
992    parser.add_argument('-d', '--diff', action='store_const', dest='diff',
993                        const=True, default=False,
994                        help='display differences between states')
995    parser.add_argument('-t', '--topology', action='store_const',
996                        dest='topology', const=True, default=False,
997                        help='only display program points, omit states')
998    parser.add_argument('-s', '--single-path', action='store_const',
999                        dest='single_path', const=True, default=False,
1000                        help='only display the leftmost path in the graph '
1001                             '(useful for trimmed graphs that still '
1002                             'branch too much)')
1003    parser.add_argument('--to', type=str, default=None,
1004                        help='only display execution paths from the root '
1005                             'to the given comma-separated list of nodes '
1006                             'identified by a pointer or a stable ID; '
1007                             'compatible with --single-path')
1008    parser.add_argument('--dark', action='store_const', dest='dark',
1009                        const=True, default=False,
1010                        help='dark mode')
1011    parser.add_argument('--gray', action='store_const', dest='gray',
1012                        const=True, default=False,
1013                        help='black-and-white mode')
1014    parser.add_argument('--dump-dot-only', action='store_const',
1015                        dest='dump_dot_only', const=True, default=False,
1016                        help='instead of writing an HTML file and immediately '
1017                             'displaying it, dump the rewritten dot file '
1018                             'to stdout')
1019    args = parser.parse_args()
1020    logging.basicConfig(level=args.loglevel)
1021
1022    graph = ExplodedGraph()
1023    with open(args.filename) as fd:
1024        for raw_line in fd:
1025            raw_line = raw_line.strip()
1026            graph.add_raw_line(raw_line)
1027
1028    trimmers = []
1029    if args.to is not None:
1030        trimmers.append(TargetedTrimmer(
1031            TargetedTrimmer.parse_target_nodes(args.to, graph)))
1032    if args.single_path:
1033        trimmers.append(SinglePathTrimmer())
1034
1035    explorer = BasicExplorer()
1036
1037    visitor = DotDumpVisitor(args.diff, args.dark, args.gray, args.topology,
1038                             args.dump_dot_only)
1039
1040    for trimmer in trimmers:
1041        trimmer.trim(graph)
1042
1043    explorer.explore(graph, visitor)
1044
1045
1046if __name__ == '__main__':
1047    main()
1048