xref: /llvm-project/clang/utils/analyzer/exploded-graph-rewriter.py (revision 5e876c54f2d70195dd1bde827e908825aab2f4fb)
1#!/usr/bin/env python
2#
3#===- exploded-graph-rewriter.py - ExplodedGraph dump tool -----*- python -*--#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9#===-----------------------------------------------------------------------===#
10
11
12from __future__ import print_function
13
14import argparse
15import collections
16import difflib
17import json
18import logging
19import os
20import re
21
22
23#===-----------------------------------------------------------------------===#
24# These data structures represent a deserialized ExplodedGraph.
25#===-----------------------------------------------------------------------===#
26
27
28# A helper function for finding the difference between two dictionaries.
29def diff_dicts(curr, prev):
30    removed = [k for k in prev if k not in curr or curr[k] != prev[k]]
31    added = [k for k in curr if k not in prev or curr[k] != prev[k]]
32    return (removed, added)
33
34
35# Represents any program state trait that is a dictionary of key-value pairs.
36class GenericMap:
37    def __init__(self, items):
38        self.generic_map = collections.OrderedDict(items)
39
40    def diff(self, prev):
41        return diff_dicts(self.generic_map, prev.generic_map)
42
43    def is_different(self, prev):
44        removed, added = self.diff(prev)
45        return len(removed) != 0 or len(added) != 0
46
47
48# A deserialized source location.
49class SourceLocation:
50    def __init__(self, json_loc):
51        logging.debug('json: %s' % json_loc)
52        self.line = json_loc['line']
53        self.col = json_loc['column']
54        self.filename = os.path.basename(json_loc['file']) \
55            if 'file' in json_loc else '(main file)'
56        self.spelling = SourceLocation(json_loc['spelling']) \
57            if 'spelling' in json_loc else None
58
59    def is_macro(self):
60        return self.spelling is not None
61
62
63# A deserialized program point.
64class ProgramPoint:
65    def __init__(self, json_pp):
66        self.kind = json_pp['kind']
67        self.tag = json_pp['tag']
68        self.node_id = json_pp['node_id']
69        self.is_sink = bool(json_pp['is_sink'])
70        self.has_report = bool(json_pp['has_report'])
71        if self.kind == 'Edge':
72            self.src_id = json_pp['src_id']
73            self.dst_id = json_pp['dst_id']
74        elif self.kind == 'Statement':
75            logging.debug(json_pp)
76            self.stmt_kind = json_pp['stmt_kind']
77            self.cast_kind = json_pp['cast_kind'] \
78                if 'cast_kind' in json_pp else None
79            self.stmt_point_kind = json_pp['stmt_point_kind']
80            self.stmt_id = json_pp['stmt_id']
81            self.pointer = json_pp['pointer']
82            self.pretty = json_pp['pretty']
83            self.loc = SourceLocation(json_pp['location']) \
84                if json_pp['location'] is not None else None
85        elif self.kind == 'BlockEntrance':
86            self.block_id = json_pp['block_id']
87
88
89# A single expression acting as a key in a deserialized Environment.
90class EnvironmentBindingKey:
91    def __init__(self, json_ek):
92        # CXXCtorInitializer is not a Stmt!
93        self.stmt_id = json_ek['stmt_id'] if 'stmt_id' in json_ek \
94            else json_ek['init_id']
95        self.pretty = json_ek['pretty']
96        self.kind = json_ek['kind'] if 'kind' in json_ek else None
97
98    def _key(self):
99        return self.stmt_id
100
101    def __eq__(self, other):
102        return self._key() == other._key()
103
104    def __hash__(self):
105        return hash(self._key())
106
107
108# Deserialized description of a location context.
109class LocationContext:
110    def __init__(self, json_frame):
111        self.lctx_id = json_frame['lctx_id']
112        self.caption = json_frame['location_context']
113        self.decl = json_frame['calling']
114        self.loc = SourceLocation(json_frame['location']) \
115            if json_frame['location'] is not None else None
116
117    def _key(self):
118        return self.lctx_id
119
120    def __eq__(self, other):
121        return self._key() == other._key()
122
123    def __hash__(self):
124        return hash(self._key())
125
126
127# A group of deserialized Environment bindings that correspond to a specific
128# location context.
129class EnvironmentFrame:
130    def __init__(self, json_frame):
131        self.location_context = LocationContext(json_frame)
132        self.bindings = collections.OrderedDict(
133            [(EnvironmentBindingKey(b),
134              b['value']) for b in json_frame['items']]
135            if json_frame['items'] is not None else [])
136
137    def diff_bindings(self, prev):
138        return diff_dicts(self.bindings, prev.bindings)
139
140    def is_different(self, prev):
141        removed, added = self.diff_bindings(prev)
142        return len(removed) != 0 or len(added) != 0
143
144
145# A deserialized Environment. This class can also hold other entities that
146# are similar to Environment, such as Objects Under Construction or
147# Indices Of Elements Under Construction.
148class GenericEnvironment:
149    def __init__(self, json_e):
150        self.frames = [EnvironmentFrame(f) for f in json_e]
151
152    def diff_frames(self, prev):
153        # TODO: It's difficult to display a good diff when frame numbers shift.
154        if len(self.frames) != len(prev.frames):
155            return None
156
157        updated = []
158        for i in range(len(self.frames)):
159            f = self.frames[i]
160            prev_f = prev.frames[i]
161            if f.location_context == prev_f.location_context:
162                if f.is_different(prev_f):
163                    updated.append(i)
164            else:
165                # We have the whole frame replaced with another frame.
166                # TODO: Produce a nice diff.
167                return None
168
169        # TODO: Add support for added/removed.
170        return updated
171
172    def is_different(self, prev):
173        updated = self.diff_frames(prev)
174        return updated is None or len(updated) > 0
175
176
177# A single binding key in a deserialized RegionStore cluster.
178class StoreBindingKey:
179    def __init__(self, json_sk):
180        self.kind = json_sk['kind']
181        self.offset = json_sk['offset']
182
183    def _key(self):
184        return (self.kind, self.offset)
185
186    def __eq__(self, other):
187        return self._key() == other._key()
188
189    def __hash__(self):
190        return hash(self._key())
191
192
193# A single cluster of the deserialized RegionStore.
194class StoreCluster:
195    def __init__(self, json_sc):
196        self.base_region = json_sc['cluster']
197        self.bindings = collections.OrderedDict(
198            [(StoreBindingKey(b), b['value']) for b in json_sc['items']])
199
200    def diff_bindings(self, prev):
201        return diff_dicts(self.bindings, prev.bindings)
202
203    def is_different(self, prev):
204        removed, added = self.diff_bindings(prev)
205        return len(removed) != 0 or len(added) != 0
206
207
208# A deserialized RegionStore.
209class Store:
210    def __init__(self, json_s):
211        self.ptr = json_s['pointer']
212        self.clusters = collections.OrderedDict(
213            [(c['pointer'], StoreCluster(c)) for c in json_s['items']])
214
215    def diff_clusters(self, prev):
216        removed = [k for k in prev.clusters if k not in self.clusters]
217        added = [k for k in self.clusters if k not in prev.clusters]
218        updated = [k for k in prev.clusters if k in self.clusters
219                   and prev.clusters[k].is_different(self.clusters[k])]
220        return (removed, added, updated)
221
222    def is_different(self, prev):
223        removed, added, updated = self.diff_clusters(prev)
224        return len(removed) != 0 or len(added) != 0 or len(updated) != 0
225
226
227# Deserialized messages from a single checker in a single program state.
228# Basically a list of raw strings.
229class CheckerLines:
230    def __init__(self, json_lines):
231        self.lines = json_lines
232
233    def diff_lines(self, prev):
234        lines = difflib.ndiff(prev.lines, self.lines)
235        return [l.strip() for l in lines
236                if l.startswith('+') or l.startswith('-')]
237
238    def is_different(self, prev):
239        return len(self.diff_lines(prev)) > 0
240
241
242# Deserialized messages of all checkers, separated by checker.
243class CheckerMessages:
244    def __init__(self, json_m):
245        self.items = collections.OrderedDict(
246            [(m['checker'], CheckerLines(m['messages'])) for m in json_m])
247
248    def diff_messages(self, prev):
249        removed = [k for k in prev.items if k not in self.items]
250        added = [k for k in self.items if k not in prev.items]
251        updated = [k for k in prev.items if k in self.items
252                   and prev.items[k].is_different(self.items[k])]
253        return (removed, added, updated)
254
255    def is_different(self, prev):
256        removed, added, updated = self.diff_messages(prev)
257        return len(removed) != 0 or len(added) != 0 or len(updated) != 0
258
259
260# A deserialized program state.
261class ProgramState:
262    def __init__(self, state_id, json_ps):
263        logging.debug('Adding ProgramState ' + str(state_id))
264
265        if json_ps is None:
266            json_ps = {
267                'store': None,
268                'environment': None,
269                'constraints': None,
270                'dynamic_types': None,
271                'constructing_objects': None,
272                'index_of_element': None,
273                'checker_messages': None
274            }
275
276        self.state_id = state_id
277
278        self.store = Store(json_ps['store']) \
279            if json_ps['store'] is not None else None
280
281        self.environment = \
282            GenericEnvironment(json_ps['environment']['items']) \
283            if json_ps['environment'] is not None else None
284
285        self.constraints = GenericMap([
286            (c['symbol'], c['range']) for c in json_ps['constraints']
287        ]) if json_ps['constraints'] is not None else None
288
289        self.dynamic_types = GenericMap([
290                (t['region'], '%s%s' % (t['dyn_type'],
291                                        ' (or a sub-class)'
292                                        if t['sub_classable'] else ''))
293                for t in json_ps['dynamic_types']]) \
294            if json_ps['dynamic_types'] is not None else None
295
296        self.constructing_objects = \
297            GenericEnvironment(json_ps['constructing_objects']) \
298            if json_ps['constructing_objects'] is not None else None
299
300        self.index_of_element = \
301            GenericEnvironment(json_ps['index_of_element']) \
302            if json_ps['index_of_element'] is not None else None
303
304        self.checker_messages = CheckerMessages(json_ps['checker_messages']) \
305            if json_ps['checker_messages'] is not None else None
306
307
308# A deserialized exploded graph node. Has a default constructor because it
309# may be referenced as part of an edge before its contents are deserialized,
310# and in this moment we already need a room for predecessors and successors.
311class ExplodedNode:
312    def __init__(self):
313        self.predecessors = []
314        self.successors = []
315
316    def construct(self, node_id, json_node):
317        logging.debug('Adding ' + node_id)
318        self.ptr = node_id[4:]
319        self.points = [ProgramPoint(p) for p in json_node['program_points']]
320        self.node_id = self.points[-1].node_id
321        self.state = ProgramState(json_node['state_id'],
322                                  json_node['program_state']
323            if json_node['program_state'] is not None else None);
324
325        assert self.node_name() == node_id
326
327    def node_name(self):
328        return 'Node' + self.ptr
329
330
331# A deserialized ExplodedGraph. Constructed by consuming a .dot file
332# line-by-line.
333class ExplodedGraph:
334    # Parse .dot files with regular expressions.
335    node_re = re.compile(
336        '^(Node0x[0-9a-f]*) \\[shape=record,.*label="{(.*)\\\\l}"\\];$')
337    edge_re = re.compile(
338        '^(Node0x[0-9a-f]*) -> (Node0x[0-9a-f]*);$')
339
340    def __init__(self):
341        self.nodes = collections.defaultdict(ExplodedNode)
342        self.root_id = None
343        self.incomplete_line = ''
344
345    def add_raw_line(self, raw_line):
346        if raw_line.startswith('//'):
347            return
348
349        # Allow line breaks by waiting for ';'. This is not valid in
350        # a .dot file, but it is useful for writing tests.
351        if len(raw_line) > 0 and raw_line[-1] != ';':
352            self.incomplete_line += raw_line
353            return
354        raw_line = self.incomplete_line + raw_line
355        self.incomplete_line = ''
356
357        # Apply regexps one by one to see if it's a node or an edge
358        # and extract contents if necessary.
359        logging.debug('Line: ' + raw_line)
360        result = self.edge_re.match(raw_line)
361        if result is not None:
362            logging.debug('Classified as edge line.')
363            pred = result.group(1)
364            succ = result.group(2)
365            self.nodes[pred].successors.append(succ)
366            self.nodes[succ].predecessors.append(pred)
367            return
368        result = self.node_re.match(raw_line)
369        if result is not None:
370            logging.debug('Classified as node line.')
371            node_id = result.group(1)
372            if len(self.nodes) == 0:
373                self.root_id = node_id
374            # Note: when writing tests you don't need to escape everything,
375            # even though in a valid dot file everything is escaped.
376            node_label = result.group(2).replace(' ', '') \
377                                        .replace('\\"', '"') \
378                                        .replace('\\{', '{') \
379                                        .replace('\\}', '}') \
380                                        .replace('\\\\', '\\') \
381                                        .replace('\\|', '|') \
382                                        .replace('\\<', '\\\\<') \
383                                        .replace('\\>', '\\\\>') \
384                                        .rstrip(',')
385            # Handle `\l` separately because a string literal can be in code
386            # like "string\\literal" with the `\l` inside.
387            # Also on Windows macros __FILE__ produces specific delimiters `\`
388            # and a directory or file may starts with the letter `l`.
389            # Find all `\l` (like `,\l`, `}\l`, `[\l`) except `\\l`,
390            # because the literal as a rule contains multiple `\` before `\l`.
391            node_label = re.sub(r'(?<!\\)\\l', '', node_label)
392            logging.debug(node_label)
393            json_node = json.loads(node_label)
394            self.nodes[node_id].construct(node_id, json_node)
395            return
396        logging.debug('Skipping.')
397
398
399#===-----------------------------------------------------------------------===#
400# Visitors traverse a deserialized ExplodedGraph and do different things
401# with every node and edge.
402#===-----------------------------------------------------------------------===#
403
404
405# A visitor that dumps the ExplodedGraph into a DOT file with fancy HTML-based
406# syntax highlighing.
407class DotDumpVisitor:
408    def __init__(self, do_diffs, dark_mode, gray_mode,
409                 topo_mode, dump_dot_only):
410        self._do_diffs = do_diffs
411        self._dark_mode = dark_mode
412        self._gray_mode = gray_mode
413        self._topo_mode = topo_mode
414        self._dump_dot_only = dump_dot_only
415        self._output = []
416
417    def _dump_raw(self, s):
418        if self._dump_dot_only:
419            print(s, end='')
420        else:
421            self._output.append(s)
422
423    def output(self):
424        assert not self._dump_dot_only
425        return ''.join(self._output)
426
427    def _dump(self, s):
428        s = s.replace('&', '&amp;') \
429             .replace('{', '\\{') \
430             .replace('}', '\\}') \
431             .replace('\\<', '&lt;') \
432             .replace('\\>', '&gt;') \
433             .replace('|', '\\|')
434        s = re.sub(r'(?<!\\)\\l', '<br />', s)
435        if self._gray_mode:
436            s = re.sub(r'<font color="[a-z0-9]*">', '', s)
437            s = re.sub(r'</font>', '', s)
438        self._dump_raw(s)
439
440    @staticmethod
441    def _diff_plus_minus(is_added):
442        if is_added is None:
443            return ''
444        if is_added:
445            return '<font color="forestgreen">+</font>'
446        return '<font color="red">-</font>'
447
448    @staticmethod
449    def _short_pretty(s):
450        if s is None:
451            return None
452        if len(s) < 20:
453            return s
454        left = s.find('{')
455        right = s.rfind('}')
456        if left == -1 or right == -1 or left >= right:
457            return s
458        candidate = s[0:left + 1] + ' ... ' + s[right:]
459        if len(candidate) >= len(s):
460            return s
461        return candidate
462
463    @staticmethod
464    def _make_sloc(loc):
465        if loc is None:
466            return '<i>Invalid Source Location</i>'
467
468        def make_plain_loc(loc):
469            return '%s:<b>%s</b>:<b>%s</b>' \
470                % (loc.filename, loc.line, loc.col)
471
472        if loc.is_macro():
473            return '%s <font color="royalblue1">' \
474                   '(<i>spelling at </i> %s)</font>' \
475                % (make_plain_loc(loc), make_plain_loc(loc.spelling))
476
477        return make_plain_loc(loc)
478
479    def visit_begin_graph(self, graph):
480        self._graph = graph
481        self._dump_raw('digraph "ExplodedGraph" {\n')
482        if self._dark_mode:
483            self._dump_raw('bgcolor="gray10";\n')
484        self._dump_raw('label="";\n')
485
486    def visit_program_point(self, p):
487        if p.kind in ['Edge', 'BlockEntrance', 'BlockExit']:
488            color = 'gold3'
489        elif p.kind in ['PreStmtPurgeDeadSymbols',
490                        'PostStmtPurgeDeadSymbols']:
491            color = 'red'
492        elif p.kind in ['CallEnter', 'CallExitBegin', 'CallExitEnd']:
493            color = 'dodgerblue' if self._dark_mode else 'blue'
494        elif p.kind in ['Statement']:
495            color = 'cyan4'
496        else:
497            color = 'forestgreen'
498
499        self._dump('<tr><td align="left">%s.</td>' % p.node_id)
500
501        if p.kind == 'Statement':
502            # This avoids pretty-printing huge statements such as CompoundStmt.
503            # Such statements show up only at [Pre|Post]StmtPurgeDeadSymbols
504            skip_pretty = 'PurgeDeadSymbols' in p.stmt_point_kind
505            stmt_color = 'cyan3'
506            self._dump('<td align="left" width="0">%s:</td>'
507                       '<td align="left" width="0"><font color="%s">'
508                       '%s</font> </td>'
509                       '<td align="left"><i>S%s</i></td>'
510                       '<td align="left"><font color="%s">%s</font></td>'
511                       '<td align="left">%s</td></tr>'
512                       % (self._make_sloc(p.loc), color,
513                          '%s (%s)' % (p.stmt_kind, p.cast_kind)
514                          if p.cast_kind is not None else p.stmt_kind,
515                          p.stmt_id, stmt_color, p.stmt_point_kind,
516                          self._short_pretty(p.pretty)
517                          if not skip_pretty else ''))
518        elif p.kind == 'Edge':
519            self._dump('<td width="0"></td>'
520                       '<td align="left" width="0">'
521                       '<font color="%s">%s</font></td><td align="left">'
522                       '[B%d] -\\> [B%d]</td></tr>'
523                       % (color, 'BlockEdge', p.src_id, p.dst_id))
524        elif p.kind == 'BlockEntrance':
525            self._dump('<td width="0"></td>'
526                       '<td align="left" width="0">'
527                       '<font color="%s">%s</font></td>'
528                       '<td align="left">[B%d]</td></tr>'
529                       % (color, p.kind, p.block_id))
530        else:
531            # TODO: Print more stuff for other kinds of points.
532            self._dump('<td width="0"></td>'
533                       '<td align="left" width="0" colspan="2">'
534                       '<font color="%s">%s</font></td></tr>'
535                       % (color, p.kind))
536
537        if p.tag is not None:
538            self._dump('<tr><td width="0"></td><td width="0"></td>'
539                       '<td colspan="3" align="left">'
540                       '<b>Tag: </b> <font color="crimson">'
541                       '%s</font></td></tr>' % p.tag)
542
543        if p.has_report:
544            self._dump('<tr><td width="0"></td><td width="0"></td>'
545                       '<td colspan="3" align="left">'
546                       '<font color="red"><b>Bug Report Attached'
547                       '</b></font></td></tr>')
548        if p.is_sink:
549            self._dump('<tr><td width="0"></td><td width="0"></td>'
550                       '<td colspan="3" align="left">'
551                       '<font color="cornflowerblue"><b>Sink Node'
552                       '</b></font></td></tr>')
553
554    def visit_environment(self, e, prev_e=None):
555        self._dump('<table border="0">')
556
557        def dump_location_context(lc, is_added=None):
558            self._dump('<tr><td>%s</td>'
559                       '<td align="left"><b>%s</b></td>'
560                       '<td align="left" colspan="2">'
561                       '<font color="gray60">%s </font>'
562                       '%s</td></tr>'
563                       % (self._diff_plus_minus(is_added),
564                          lc.caption, lc.decl,
565                          ('(%s)' % self._make_sloc(lc.loc))
566                          if lc.loc is not None else ''))
567
568        def dump_binding(f, b, is_added=None):
569            self._dump('<tr><td>%s</td>'
570                       '<td align="left"><i>S%s</i></td>'
571                       '%s'
572                       '<td align="left">%s</td>'
573                       '<td align="left">%s</td></tr>'
574                       % (self._diff_plus_minus(is_added),
575                          b.stmt_id,
576                          '<td align="left"><font color="%s"><i>'
577                          '%s</i></font></td>' % (
578                              'lavender' if self._dark_mode else 'darkgreen',
579                              ('(%s)' % b.kind) if b.kind is not None else ' '
580                          ),
581                          self._short_pretty(b.pretty), f.bindings[b]))
582
583        frames_updated = e.diff_frames(prev_e) if prev_e is not None else None
584        if frames_updated:
585            for i in frames_updated:
586                f = e.frames[i]
587                prev_f = prev_e.frames[i]
588                dump_location_context(f.location_context)
589                bindings_removed, bindings_added = f.diff_bindings(prev_f)
590                for b in bindings_removed:
591                    dump_binding(prev_f, b, False)
592                for b in bindings_added:
593                    dump_binding(f, b, True)
594        else:
595            for f in e.frames:
596                dump_location_context(f.location_context)
597                for b in f.bindings:
598                    dump_binding(f, b)
599
600        self._dump('</table>')
601
602    def visit_environment_in_state(self, selector, title, s, prev_s=None):
603        e = getattr(s, selector)
604        prev_e = getattr(prev_s, selector) if prev_s is not None else None
605        if e is None and prev_e is None:
606            return
607
608        self._dump('<hr /><tr><td align="left"><b>%s: </b>' % title)
609        if e is None:
610            self._dump('<i> Nothing!</i>')
611        else:
612            if prev_e is not None:
613                if e.is_different(prev_e):
614                    self._dump('</td></tr><tr><td align="left">')
615                    self.visit_environment(e, prev_e)
616                else:
617                    self._dump('<i> No changes!</i>')
618            else:
619                self._dump('</td></tr><tr><td align="left">')
620                self.visit_environment(e)
621
622        self._dump('</td></tr>')
623
624    def visit_store(self, s, prev_s=None):
625        self._dump('<table border="0">')
626
627        def dump_binding(s, c, b, is_added=None):
628            self._dump('<tr><td>%s</td>'
629                       '<td align="left">%s</td>'
630                       '<td align="left">%s</td>'
631                       '<td align="left">%s</td>'
632                       '<td align="left">%s</td></tr>'
633                       % (self._diff_plus_minus(is_added),
634                          s.clusters[c].base_region, b.offset,
635                          '(<i>Default</i>)' if b.kind == 'Default'
636                          else '',
637                          s.clusters[c].bindings[b]))
638
639        if prev_s is not None:
640            clusters_removed, clusters_added, clusters_updated = \
641                s.diff_clusters(prev_s)
642            for c in clusters_removed:
643                for b in prev_s.clusters[c].bindings:
644                    dump_binding(prev_s, c, b, False)
645            for c in clusters_updated:
646                bindings_removed, bindings_added = \
647                    s.clusters[c].diff_bindings(prev_s.clusters[c])
648                for b in bindings_removed:
649                    dump_binding(prev_s, c, b, False)
650                for b in bindings_added:
651                    dump_binding(s, c, b, True)
652            for c in clusters_added:
653                for b in s.clusters[c].bindings:
654                    dump_binding(s, c, b, True)
655        else:
656            for c in s.clusters:
657                for b in s.clusters[c].bindings:
658                    dump_binding(s, c, b)
659
660        self._dump('</table>')
661
662    def visit_store_in_state(self, s, prev_s=None):
663        st = s.store
664        prev_st = prev_s.store if prev_s is not None else None
665        if st is None and prev_st is None:
666            return
667
668        self._dump('<hr /><tr><td align="left"><b>Store: </b>')
669        if st is None:
670            self._dump('<i> Nothing!</i>')
671        else:
672            if self._dark_mode:
673                self._dump(' <font color="gray30">(%s)</font>' % st.ptr)
674            else:
675                self._dump(' <font color="gray">(%s)</font>' % st.ptr)
676            if prev_st is not None:
677                if s.store.is_different(prev_st):
678                    self._dump('</td></tr><tr><td align="left">')
679                    self.visit_store(st, prev_st)
680                else:
681                    self._dump('<i> No changes!</i>')
682            else:
683                self._dump('</td></tr><tr><td align="left">')
684                self.visit_store(st)
685        self._dump('</td></tr>')
686
687    def visit_generic_map(self, m, prev_m=None):
688        self._dump('<table border="0">')
689
690        def dump_pair(m, k, is_added=None):
691            self._dump('<tr><td>%s</td>'
692                       '<td align="left">%s</td>'
693                       '<td align="left">%s</td></tr>'
694                       % (self._diff_plus_minus(is_added),
695                          k, m.generic_map[k]))
696
697        if prev_m is not None:
698            removed, added = m.diff(prev_m)
699            for k in removed:
700                dump_pair(prev_m, k, False)
701            for k in added:
702                dump_pair(m, k, True)
703        else:
704            for k in m.generic_map:
705                dump_pair(m, k, None)
706
707        self._dump('</table>')
708
709    def visit_generic_map_in_state(self, selector, title, s, prev_s=None):
710        m = getattr(s, selector)
711        prev_m = getattr(prev_s, selector) if prev_s is not None else None
712        if m is None and prev_m is None:
713            return
714
715        self._dump('<hr />')
716        self._dump('<tr><td align="left">'
717                   '<b>%s: </b>' % title)
718        if m is None:
719            self._dump('<i> Nothing!</i>')
720        else:
721            if prev_m is not None:
722                if m.is_different(prev_m):
723                    self._dump('</td></tr><tr><td align="left">')
724                    self.visit_generic_map(m, prev_m)
725                else:
726                    self._dump('<i> No changes!</i>')
727            else:
728                self._dump('</td></tr><tr><td align="left">')
729                self.visit_generic_map(m)
730
731        self._dump('</td></tr>')
732
733    def visit_checker_messages(self, m, prev_m=None):
734        self._dump('<table border="0">')
735
736        def dump_line(l, is_added=None):
737            self._dump('<tr><td>%s</td>'
738                       '<td align="left">%s</td></tr>'
739                       % (self._diff_plus_minus(is_added), l))
740
741        def dump_chk(chk, is_added=None):
742            dump_line('<i>%s</i>:' % chk, is_added)
743
744        if prev_m is not None:
745            removed, added, updated = m.diff_messages(prev_m)
746            for chk in removed:
747                dump_chk(chk, False)
748                for l in prev_m.items[chk].lines:
749                    dump_line(l, False)
750            for chk in updated:
751                dump_chk(chk)
752                for l in m.items[chk].diff_lines(prev_m.items[chk]):
753                    dump_line(l[1:], l.startswith('+'))
754            for chk in added:
755                dump_chk(chk, True)
756                for l in m.items[chk].lines:
757                    dump_line(l, True)
758        else:
759            for chk in m.items:
760                dump_chk(chk)
761                for l in m.items[chk].lines:
762                    dump_line(l)
763
764        self._dump('</table>')
765
766    def visit_checker_messages_in_state(self, s, prev_s=None):
767        m = s.checker_messages
768        prev_m = prev_s.checker_messages if prev_s is not None else None
769        if m is None and prev_m is None:
770            return
771
772        self._dump('<hr />')
773        self._dump('<tr><td align="left">'
774                   '<b>Checker State: </b>')
775        if m is None:
776            self._dump('<i> Nothing!</i>')
777        else:
778            if prev_m is not None:
779                if m.is_different(prev_m):
780                    self._dump('</td></tr><tr><td align="left">')
781                    self.visit_checker_messages(m, prev_m)
782                else:
783                    self._dump('<i> No changes!</i>')
784            else:
785                self._dump('</td></tr><tr><td align="left">')
786                self.visit_checker_messages(m)
787
788        self._dump('</td></tr>')
789
790    def visit_state(self, s, prev_s):
791        self.visit_store_in_state(s, prev_s)
792        self.visit_environment_in_state('environment', 'Expressions',
793                                        s, prev_s)
794        self.visit_generic_map_in_state('constraints', 'Ranges',
795                                        s, prev_s)
796        self.visit_generic_map_in_state('dynamic_types', 'Dynamic Types',
797                                        s, prev_s)
798        self.visit_environment_in_state('constructing_objects',
799                                        'Objects Under Construction',
800                                        s, prev_s)
801        self.visit_environment_in_state('index_of_element',
802                                        'Indices Of Elements Under Construction',
803                                        s, prev_s)
804        self.visit_checker_messages_in_state(s, prev_s)
805
806    def visit_node(self, node):
807        self._dump('%s [shape=record,'
808                   % (node.node_name()))
809        if self._dark_mode:
810            self._dump('color="white",fontcolor="gray80",')
811        self._dump('label=<<table border="0">')
812
813        self._dump('<tr><td bgcolor="%s"><b>State %s</b></td></tr>'
814                   % ("gray20" if self._dark_mode else "gray70",
815                      node.state.state_id
816                      if node.state is not None else 'Unspecified'))
817        if not self._topo_mode:
818            self._dump('<tr><td align="left" width="0">')
819            if len(node.points) > 1:
820                self._dump('<b>Program points:</b></td></tr>')
821            else:
822                self._dump('<b>Program point:</b></td></tr>')
823        self._dump('<tr><td align="left" width="0">'
824                   '<table border="0" align="left" width="0">')
825        for p in node.points:
826            self.visit_program_point(p)
827        self._dump('</table></td></tr>')
828
829        if node.state is not None and not self._topo_mode:
830            prev_s = None
831            # Do diffs only when we have a unique predecessor.
832            # Don't do diffs on the leaf nodes because they're
833            # the important ones.
834            if self._do_diffs and len(node.predecessors) == 1 \
835               and len(node.successors) > 0:
836                prev_s = self._graph.nodes[node.predecessors[0]].state
837            self.visit_state(node.state, prev_s)
838        self._dump_raw('</table>>];\n')
839
840    def visit_edge(self, pred, succ):
841        self._dump_raw('%s -> %s%s;\n' % (
842            pred.node_name(), succ.node_name(),
843            ' [color="white"]' if self._dark_mode else ''
844        ))
845
846    def visit_end_of_graph(self):
847        self._dump_raw('}\n')
848
849        if not self._dump_dot_only:
850            import sys
851            import tempfile
852
853            def write_temp_file(suffix, prefix, data):
854                fd, filename = tempfile.mkstemp(suffix, prefix, '.', True)
855                print('Writing "%s"...' % filename)
856                with os.fdopen(fd, 'w') as fp:
857                    fp.write(data)
858                print('Done! Please remember to remove the file.')
859                return filename
860
861            try:
862                import graphviz
863            except ImportError:
864                # The fallback behavior if graphviz is not installed!
865                print('Python graphviz not found. Please invoke')
866                print('  $ pip install graphviz')
867                print('in order to enable automatic conversion to HTML.')
868                print()
869                print('You may also convert DOT to SVG manually via')
870                print('  $ dot -Tsvg input.dot -o output.svg')
871                print()
872                write_temp_file('.dot', 'egraph-', self.output())
873                return
874
875            svg = graphviz.pipe('dot', 'svg', self.output().encode()).decode()
876
877            filename = write_temp_file(
878                '.html', 'egraph-', '<html><body bgcolor="%s">%s</body></html>' % (
879                             '#1a1a1a' if self._dark_mode else 'white', svg))
880            if sys.platform == 'win32':
881                os.startfile(filename)
882            elif sys.platform == 'darwin':
883                os.system('open "%s"' % filename)
884            else:
885                os.system('xdg-open "%s"' % filename)
886
887
888#===-----------------------------------------------------------------------===#
889# Explorers know how to traverse the ExplodedGraph in a certain order.
890# They would invoke a Visitor on every node or edge they encounter.
891#===-----------------------------------------------------------------------===#
892
893
894# BasicExplorer explores the whole graph in no particular order.
895class BasicExplorer:
896    def explore(self, graph, visitor):
897        visitor.visit_begin_graph(graph)
898        for node in sorted(graph.nodes):
899            logging.debug('Visiting ' + node)
900            visitor.visit_node(graph.nodes[node])
901            for succ in sorted(graph.nodes[node].successors):
902                logging.debug('Visiting edge: %s -> %s ' % (node, succ))
903                visitor.visit_edge(graph.nodes[node], graph.nodes[succ])
904        visitor.visit_end_of_graph()
905
906
907#===-----------------------------------------------------------------------===#
908# Trimmers cut out parts of the ExplodedGraph so that to focus on other parts.
909# Trimmers can be combined together by applying them sequentially.
910#===-----------------------------------------------------------------------===#
911
912
913# SinglePathTrimmer keeps only a single path - the leftmost path from the root.
914# Useful when the trimmed graph is still too large.
915class SinglePathTrimmer:
916    def trim(self, graph):
917        visited_nodes = set()
918        node_id = graph.root_id
919        while True:
920            visited_nodes.add(node_id)
921            node = graph.nodes[node_id]
922            if len(node.successors) > 0:
923                succ_id = node.successors[0]
924                succ = graph.nodes[succ_id]
925                node.successors = [succ_id]
926                succ.predecessors = [node_id]
927                if succ_id in visited_nodes:
928                    break
929                node_id = succ_id
930            else:
931                break
932        graph.nodes = {node_id: graph.nodes[node_id]
933                       for node_id in visited_nodes}
934
935
936# TargetedTrimmer keeps paths that lead to specific nodes and discards all
937# other paths. Useful when you cannot use -trim-egraph (e.g. when debugging
938# a crash).
939class TargetedTrimmer:
940    def __init__(self, target_nodes):
941        self._target_nodes = target_nodes
942
943    @staticmethod
944    def parse_target_node(node, graph):
945        if node.startswith('0x'):
946            ret = 'Node' + node
947            assert ret in graph.nodes
948            return ret
949        else:
950            for other_id in graph.nodes:
951                other = graph.nodes[other_id]
952                if other.node_id == int(node):
953                    return other_id
954
955    @staticmethod
956    def parse_target_nodes(target_nodes, graph):
957        return [TargetedTrimmer.parse_target_node(node, graph)
958                for node in target_nodes.split(',')]
959
960    def trim(self, graph):
961        queue = self._target_nodes
962        visited_nodes = set()
963
964        while len(queue) > 0:
965            node_id = queue.pop()
966            visited_nodes.add(node_id)
967            node = graph.nodes[node_id]
968            for pred_id in node.predecessors:
969                if pred_id not in visited_nodes:
970                    queue.append(pred_id)
971        graph.nodes = {node_id: graph.nodes[node_id]
972                       for node_id in visited_nodes}
973        for node_id in graph.nodes:
974            node = graph.nodes[node_id]
975            node.successors = [succ_id for succ_id in node.successors
976                               if succ_id in visited_nodes]
977            node.predecessors = [succ_id for succ_id in node.predecessors
978                                 if succ_id in visited_nodes]
979
980
981#===-----------------------------------------------------------------------===#
982# The entry point to the script.
983#===-----------------------------------------------------------------------===#
984
985
986def main():
987    parser = argparse.ArgumentParser(
988        description='Display and manipulate Exploded Graph dumps.')
989    parser.add_argument('filename', type=str,
990                        help='the .dot file produced by the Static Analyzer')
991    parser.add_argument('-v', '--verbose', action='store_const',
992                        dest='loglevel', const=logging.DEBUG,
993                        default=logging.WARNING,
994                        help='enable info prints')
995    parser.add_argument('-d', '--diff', action='store_const', dest='diff',
996                        const=True, default=False,
997                        help='display differences between states')
998    parser.add_argument('-t', '--topology', action='store_const',
999                        dest='topology', const=True, default=False,
1000                        help='only display program points, omit states')
1001    parser.add_argument('-s', '--single-path', action='store_const',
1002                        dest='single_path', const=True, default=False,
1003                        help='only display the leftmost path in the graph '
1004                             '(useful for trimmed graphs that still '
1005                             'branch too much)')
1006    parser.add_argument('--to', type=str, default=None,
1007                        help='only display execution paths from the root '
1008                             'to the given comma-separated list of nodes '
1009                             'identified by a pointer or a stable ID; '
1010                             'compatible with --single-path')
1011    parser.add_argument('--dark', action='store_const', dest='dark',
1012                        const=True, default=False,
1013                        help='dark mode')
1014    parser.add_argument('--gray', action='store_const', dest='gray',
1015                        const=True, default=False,
1016                        help='black-and-white mode')
1017    parser.add_argument('--dump-dot-only', action='store_const',
1018                        dest='dump_dot_only', const=True, default=False,
1019                        help='instead of writing an HTML file and immediately '
1020                             'displaying it, dump the rewritten dot file '
1021                             'to stdout')
1022    args = parser.parse_args()
1023    logging.basicConfig(level=args.loglevel)
1024
1025    graph = ExplodedGraph()
1026    with open(args.filename) as fd:
1027        for raw_line in fd:
1028            raw_line = raw_line.strip()
1029            graph.add_raw_line(raw_line)
1030
1031    trimmers = []
1032    if args.to is not None:
1033        trimmers.append(TargetedTrimmer(
1034            TargetedTrimmer.parse_target_nodes(args.to, graph)))
1035    if args.single_path:
1036        trimmers.append(SinglePathTrimmer())
1037
1038    explorer = BasicExplorer()
1039
1040    visitor = DotDumpVisitor(args.diff, args.dark, args.gray, args.topology,
1041                             args.dump_dot_only)
1042
1043    for trimmer in trimmers:
1044        trimmer.trim(graph)
1045
1046    explorer.explore(graph, visitor)
1047
1048
1049if __name__ == '__main__':
1050    main()
1051