xref: /llvm-project/clang/utils/analyzer/exploded-graph-rewriter.py (revision dd3c26a045c081620375a878159f536758baba6e)
1#!/usr/bin/env python
2#
3# ===- exploded-graph-rewriter.py - ExplodedGraph dump tool -----*- python -*--#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9# ===-----------------------------------------------------------------------===#
10
11
12from __future__ import print_function
13
14import argparse
15import collections
16import difflib
17import json
18import logging
19import os
20import re
21
22
23# ===-----------------------------------------------------------------------===#
24# These data structures represent a deserialized ExplodedGraph.
25# ===-----------------------------------------------------------------------===#
26
27
28# A helper function for finding the difference between two dictionaries.
29def diff_dicts(curr, prev):
30    removed = [k for k in prev if k not in curr or curr[k] != prev[k]]
31    added = [k for k in curr if k not in prev or curr[k] != prev[k]]
32    return (removed, added)
33
34
35# Represents any program state trait that is a dictionary of key-value pairs.
36class GenericMap:
37    def __init__(self, items):
38        self.generic_map = collections.OrderedDict(items)
39
40    def diff(self, prev):
41        return diff_dicts(self.generic_map, prev.generic_map)
42
43    def is_different(self, prev):
44        removed, added = self.diff(prev)
45        return len(removed) != 0 or len(added) != 0
46
47
48# A deserialized source location.
49class SourceLocation:
50    def __init__(self, json_loc):
51        logging.debug("json: %s" % json_loc)
52        self.line = json_loc["line"]
53        self.col = json_loc["column"]
54        self.filename = (
55            os.path.basename(json_loc["file"]) if "file" in json_loc else "(main file)"
56        )
57        self.spelling = (
58            SourceLocation(json_loc["spelling"]) if "spelling" in json_loc else None
59        )
60
61    def is_macro(self):
62        return self.spelling is not None
63
64
65# A deserialized program point.
66class ProgramPoint:
67    def __init__(self, json_pp):
68        self.kind = json_pp["kind"]
69        self.tag = json_pp["tag"]
70        self.node_id = json_pp["node_id"]
71        self.is_sink = bool(json_pp["is_sink"])
72        self.has_report = bool(json_pp["has_report"])
73        if self.kind == "Edge":
74            self.src_id = json_pp["src_id"]
75            self.dst_id = json_pp["dst_id"]
76        elif self.kind == "Statement":
77            logging.debug(json_pp)
78            self.stmt_kind = json_pp["stmt_kind"]
79            self.cast_kind = json_pp["cast_kind"] if "cast_kind" in json_pp else None
80            self.stmt_point_kind = json_pp["stmt_point_kind"]
81            self.stmt_id = json_pp["stmt_id"]
82            self.pointer = json_pp["pointer"]
83            self.pretty = json_pp["pretty"]
84            self.loc = (
85                SourceLocation(json_pp["location"])
86                if json_pp["location"] is not None
87                else None
88            )
89        elif self.kind == "BlockEntrance":
90            self.block_id = json_pp["block_id"]
91
92
93# A single expression acting as a key in a deserialized Environment.
94class EnvironmentBindingKey:
95    def __init__(self, json_ek):
96        # CXXCtorInitializer is not a Stmt!
97        self.stmt_id = (
98            json_ek["stmt_id"] if "stmt_id" in json_ek else json_ek["init_id"]
99        )
100        self.pretty = json_ek["pretty"]
101        self.kind = json_ek["kind"] if "kind" in json_ek else None
102
103    def _key(self):
104        return self.stmt_id
105
106    def __eq__(self, other):
107        return self._key() == other._key()
108
109    def __hash__(self):
110        return hash(self._key())
111
112
113# Deserialized description of a location context.
114class LocationContext:
115    def __init__(self, json_frame):
116        self.lctx_id = json_frame["lctx_id"]
117        self.caption = json_frame["location_context"]
118        self.decl = json_frame["calling"]
119        self.loc = (
120            SourceLocation(json_frame["location"])
121            if json_frame["location"] is not None
122            else None
123        )
124
125    def _key(self):
126        return self.lctx_id
127
128    def __eq__(self, other):
129        return self._key() == other._key()
130
131    def __hash__(self):
132        return hash(self._key())
133
134
135# A group of deserialized Environment bindings that correspond to a specific
136# location context.
137class EnvironmentFrame:
138    def __init__(self, json_frame):
139        self.location_context = LocationContext(json_frame)
140        self.bindings = collections.OrderedDict(
141            [(EnvironmentBindingKey(b), b["value"]) for b in json_frame["items"]]
142            if json_frame["items"] is not None
143            else []
144        )
145
146    def diff_bindings(self, prev):
147        return diff_dicts(self.bindings, prev.bindings)
148
149    def is_different(self, prev):
150        removed, added = self.diff_bindings(prev)
151        return len(removed) != 0 or len(added) != 0
152
153
154# A deserialized Environment. This class can also hold other entities that
155# are similar to Environment, such as Objects Under Construction or
156# Indices Of Elements Under Construction.
157class GenericEnvironment:
158    def __init__(self, json_e):
159        self.frames = [EnvironmentFrame(f) for f in json_e]
160
161    def diff_frames(self, prev):
162        # TODO: It's difficult to display a good diff when frame numbers shift.
163        if len(self.frames) != len(prev.frames):
164            return None
165
166        updated = []
167        for i in range(len(self.frames)):
168            f = self.frames[i]
169            prev_f = prev.frames[i]
170            if f.location_context == prev_f.location_context:
171                if f.is_different(prev_f):
172                    updated.append(i)
173            else:
174                # We have the whole frame replaced with another frame.
175                # TODO: Produce a nice diff.
176                return None
177
178        # TODO: Add support for added/removed.
179        return updated
180
181    def is_different(self, prev):
182        updated = self.diff_frames(prev)
183        return updated is None or len(updated) > 0
184
185
186# A single binding key in a deserialized RegionStore cluster.
187class StoreBindingKey:
188    def __init__(self, json_sk):
189        self.kind = json_sk["kind"]
190        self.offset = json_sk["offset"]
191
192    def _key(self):
193        return (self.kind, self.offset)
194
195    def __eq__(self, other):
196        return self._key() == other._key()
197
198    def __hash__(self):
199        return hash(self._key())
200
201
202# A single cluster of the deserialized RegionStore.
203class StoreCluster:
204    def __init__(self, json_sc):
205        self.base_region = json_sc["cluster"]
206        self.bindings = collections.OrderedDict(
207            [(StoreBindingKey(b), b["value"]) for b in json_sc["items"]]
208        )
209
210    def diff_bindings(self, prev):
211        return diff_dicts(self.bindings, prev.bindings)
212
213    def is_different(self, prev):
214        removed, added = self.diff_bindings(prev)
215        return len(removed) != 0 or len(added) != 0
216
217
218# A deserialized RegionStore.
219class Store:
220    def __init__(self, json_s):
221        self.ptr = json_s["pointer"]
222        self.clusters = collections.OrderedDict(
223            [(c["pointer"], StoreCluster(c)) for c in json_s["items"]]
224        )
225
226    def diff_clusters(self, prev):
227        removed = [k for k in prev.clusters if k not in self.clusters]
228        added = [k for k in self.clusters if k not in prev.clusters]
229        updated = [
230            k
231            for k in prev.clusters
232            if k in self.clusters and prev.clusters[k].is_different(self.clusters[k])
233        ]
234        return (removed, added, updated)
235
236    def is_different(self, prev):
237        removed, added, updated = self.diff_clusters(prev)
238        return len(removed) != 0 or len(added) != 0 or len(updated) != 0
239
240
241# Deserialized messages from a single checker in a single program state.
242# Basically a list of raw strings.
243class CheckerLines:
244    def __init__(self, json_lines):
245        self.lines = json_lines
246
247    def diff_lines(self, prev):
248        lines = difflib.ndiff(prev.lines, self.lines)
249        return [l.strip() for l in lines if l.startswith("+") or l.startswith("-")]
250
251    def is_different(self, prev):
252        return len(self.diff_lines(prev)) > 0
253
254
255# Deserialized messages of all checkers, separated by checker.
256class CheckerMessages:
257    def __init__(self, json_m):
258        self.items = collections.OrderedDict(
259            [(m["checker"], CheckerLines(m["messages"])) for m in json_m]
260        )
261
262    def diff_messages(self, prev):
263        removed = [k for k in prev.items if k not in self.items]
264        added = [k for k in self.items if k not in prev.items]
265        updated = [
266            k
267            for k in prev.items
268            if k in self.items and prev.items[k].is_different(self.items[k])
269        ]
270        return (removed, added, updated)
271
272    def is_different(self, prev):
273        removed, added, updated = self.diff_messages(prev)
274        return len(removed) != 0 or len(added) != 0 or len(updated) != 0
275
276
277# A deserialized program state.
278class ProgramState:
279    def __init__(self, state_id, json_ps):
280        logging.debug("Adding ProgramState " + str(state_id))
281
282        store_key = "store"
283        env_key = "environment"
284        constraints_key = "constraints"
285        dyn_ty_key = "dynamic_types"
286        ctor_key = "constructing_objects"
287        ind_key = "index_of_element"
288        init_loop_key = "pending_init_loops"
289        dtor_key = "pending_destructors"
290        msg_key = "checker_messages"
291
292        if json_ps is None:
293            json_ps = {
294                store_key: None,
295                env_key: None,
296                constraints_key: None,
297                dyn_ty_key: None,
298                ctor_key: None,
299                ind_key: None,
300                init_loop_key: None,
301                dtor_key: None,
302                msg_key: None,
303            }
304
305        self.state_id = state_id
306
307        self.store = (
308            Store(json_ps[store_key]) if json_ps[store_key] is not None else None
309        )
310
311        self.environment = (
312            GenericEnvironment(json_ps[env_key]["items"])
313            if json_ps[env_key] is not None
314            else None
315        )
316
317        self.constraints = (
318            GenericMap([(c["symbol"], c["range"]) for c in json_ps[constraints_key]])
319            if json_ps[constraints_key] is not None
320            else None
321        )
322
323        self.dynamic_types = (
324            GenericMap(
325                [
326                    (
327                        t["region"],
328                        "%s%s"
329                        % (
330                            t["dyn_type"],
331                            " (or a sub-class)" if t["sub_classable"] else "",
332                        ),
333                    )
334                    for t in json_ps[dyn_ty_key]
335                ]
336            )
337            if json_ps[dyn_ty_key] is not None
338            else None
339        )
340
341        self.checker_messages = (
342            CheckerMessages(json_ps[msg_key]) if json_ps[msg_key] is not None else None
343        )
344
345        # State traits
346        #
347        # For traits we always check if a key exists because if a trait
348        # has no imformation, nothing will be printed in the .dot file
349        # we parse.
350
351        self.constructing_objects = (
352            GenericEnvironment(json_ps[ctor_key])
353            if ctor_key in json_ps and json_ps[ctor_key] is not None
354            else None
355        )
356
357        self.index_of_element = (
358            GenericEnvironment(json_ps[ind_key])
359            if ind_key in json_ps and json_ps[ind_key] is not None
360            else None
361        )
362
363        self.pending_init_loops = (
364            GenericEnvironment(json_ps[init_loop_key])
365            if init_loop_key in json_ps and json_ps[init_loop_key] is not None
366            else None
367        )
368
369        self.pending_destructors = (
370            GenericEnvironment(json_ps[dtor_key])
371            if dtor_key in json_ps and json_ps[dtor_key] is not None
372            else None
373        )
374
375
376# A deserialized exploded graph node. Has a default constructor because it
377# may be referenced as part of an edge before its contents are deserialized,
378# and in this moment we already need a room for predecessors and successors.
379class ExplodedNode:
380    def __init__(self):
381        self.predecessors = []
382        self.successors = []
383
384    def construct(self, node_id, json_node):
385        logging.debug("Adding " + node_id)
386        self.ptr = node_id[4:]
387        self.points = [ProgramPoint(p) for p in json_node["program_points"]]
388        self.node_id = self.points[-1].node_id
389        self.state = ProgramState(
390            json_node["state_id"],
391            json_node["program_state"]
392            if json_node["program_state"] is not None
393            else None,
394        )
395
396        assert self.node_name() == node_id
397
398    def node_name(self):
399        return "Node" + self.ptr
400
401
402# A deserialized ExplodedGraph. Constructed by consuming a .dot file
403# line-by-line.
404class ExplodedGraph:
405    # Parse .dot files with regular expressions.
406    node_re = re.compile(
407        '^(Node0x[0-9a-f]*) \\[shape=record,.*label="{(.*)\\\\l}"\\];$'
408    )
409    edge_re = re.compile("^(Node0x[0-9a-f]*) -> (Node0x[0-9a-f]*);$")
410
411    def __init__(self):
412        self.nodes = collections.defaultdict(ExplodedNode)
413        self.root_id = None
414        self.incomplete_line = ""
415
416    def add_raw_line(self, raw_line):
417        if raw_line.startswith("//"):
418            return
419
420        # Allow line breaks by waiting for ';'. This is not valid in
421        # a .dot file, but it is useful for writing tests.
422        if len(raw_line) > 0 and raw_line[-1] != ";":
423            self.incomplete_line += raw_line
424            return
425        raw_line = self.incomplete_line + raw_line
426        self.incomplete_line = ""
427
428        # Apply regexps one by one to see if it's a node or an edge
429        # and extract contents if necessary.
430        logging.debug("Line: " + raw_line)
431        result = self.edge_re.match(raw_line)
432        if result is not None:
433            logging.debug("Classified as edge line.")
434            pred = result.group(1)
435            succ = result.group(2)
436            self.nodes[pred].successors.append(succ)
437            self.nodes[succ].predecessors.append(pred)
438            return
439        result = self.node_re.match(raw_line)
440        if result is not None:
441            logging.debug("Classified as node line.")
442            node_id = result.group(1)
443            if len(self.nodes) == 0:
444                self.root_id = node_id
445            # Note: when writing tests you don't need to escape everything,
446            # even though in a valid dot file everything is escaped.
447            node_label = (
448                result.group(2)
449                .replace(" ", "")
450                .replace('\\"', '"')
451                .replace("\\{", "{")
452                .replace("\\}", "}")
453                .replace("\\\\", "\\")
454                .replace("\\|", "|")
455                .replace("\\<", "\\\\<")
456                .replace("\\>", "\\\\>")
457                .rstrip(",")
458            )
459            # Handle `\l` separately because a string literal can be in code
460            # like "string\\literal" with the `\l` inside.
461            # Also on Windows macros __FILE__ produces specific delimiters `\`
462            # and a directory or file may starts with the letter `l`.
463            # Find all `\l` (like `,\l`, `}\l`, `[\l`) except `\\l`,
464            # because the literal as a rule contains multiple `\` before `\l`.
465            node_label = re.sub(r"(?<!\\)\\l", "", node_label)
466            logging.debug(node_label)
467            json_node = json.loads(node_label)
468            self.nodes[node_id].construct(node_id, json_node)
469            return
470        logging.debug("Skipping.")
471
472
473# ===-----------------------------------------------------------------------===#
474# Visitors traverse a deserialized ExplodedGraph and do different things
475# with every node and edge.
476# ===-----------------------------------------------------------------------===#
477
478
479# A visitor that dumps the ExplodedGraph into a DOT file with fancy HTML-based
480# syntax highlighing.
481class DotDumpVisitor:
482    def __init__(self, do_diffs, dark_mode, gray_mode, topo_mode, dump_dot_only):
483        self._do_diffs = do_diffs
484        self._dark_mode = dark_mode
485        self._gray_mode = gray_mode
486        self._topo_mode = topo_mode
487        self._dump_dot_only = dump_dot_only
488        self._output = []
489
490    def _dump_raw(self, s):
491        if self._dump_dot_only:
492            print(s, end="")
493        else:
494            self._output.append(s)
495
496    def output(self):
497        assert not self._dump_dot_only
498        return "".join(self._output)
499
500    def _dump(self, s):
501        s = (
502            s.replace("&", "&amp;")
503            .replace("{", "\\{")
504            .replace("}", "\\}")
505            .replace("\\<", "&lt;")
506            .replace("\\>", "&gt;")
507            .replace("|", "\\|")
508        )
509        s = re.sub(r"(?<!\\)\\l", "<br />", s)
510        if self._gray_mode:
511            s = re.sub(r'<font color="[a-z0-9]*">', "", s)
512            s = re.sub(r"</font>", "", s)
513        self._dump_raw(s)
514
515    @staticmethod
516    def _diff_plus_minus(is_added):
517        if is_added is None:
518            return ""
519        if is_added:
520            return '<font color="forestgreen">+</font>'
521        return '<font color="red">-</font>'
522
523    @staticmethod
524    def _short_pretty(s):
525        if s is None:
526            return None
527        if len(s) < 20:
528            return s
529        left = s.find("{")
530        right = s.rfind("}")
531        if left == -1 or right == -1 or left >= right:
532            return s
533        candidate = s[0 : left + 1] + " ... " + s[right:]
534        if len(candidate) >= len(s):
535            return s
536        return candidate
537
538    @staticmethod
539    def _make_sloc(loc):
540        if loc is None:
541            return "<i>Invalid Source Location</i>"
542
543        def make_plain_loc(loc):
544            return "%s:<b>%s</b>:<b>%s</b>" % (loc.filename, loc.line, loc.col)
545
546        if loc.is_macro():
547            return '%s <font color="royalblue1">' "(<i>spelling at </i> %s)</font>" % (
548                make_plain_loc(loc),
549                make_plain_loc(loc.spelling),
550            )
551
552        return make_plain_loc(loc)
553
554    def visit_begin_graph(self, graph):
555        self._graph = graph
556        self._dump_raw('digraph "ExplodedGraph" {\n')
557        if self._dark_mode:
558            self._dump_raw('bgcolor="gray10";\n')
559        self._dump_raw('label="";\n')
560
561    def visit_program_point(self, p):
562        if p.kind in ["Edge", "BlockEntrance", "BlockExit"]:
563            color = "gold3"
564        elif p.kind in ["PreStmtPurgeDeadSymbols", "PostStmtPurgeDeadSymbols"]:
565            color = "red"
566        elif p.kind in ["CallEnter", "CallExitBegin", "CallExitEnd"]:
567            color = "dodgerblue" if self._dark_mode else "blue"
568        elif p.kind in ["Statement"]:
569            color = "cyan4"
570        else:
571            color = "forestgreen"
572
573        self._dump('<tr><td align="left">%s.</td>' % p.node_id)
574
575        if p.kind == "Statement":
576            # This avoids pretty-printing huge statements such as CompoundStmt.
577            # Such statements show up only at [Pre|Post]StmtPurgeDeadSymbols
578            skip_pretty = "PurgeDeadSymbols" in p.stmt_point_kind
579            stmt_color = "cyan3"
580            self._dump(
581                '<td align="left" width="0">%s:</td>'
582                '<td align="left" width="0"><font color="%s">'
583                "%s</font> </td>"
584                '<td align="left"><i>S%s</i></td>'
585                '<td align="left"><font color="%s">%s</font></td>'
586                '<td align="left">%s</td></tr>'
587                % (
588                    self._make_sloc(p.loc),
589                    color,
590                    "%s (%s)" % (p.stmt_kind, p.cast_kind)
591                    if p.cast_kind is not None
592                    else p.stmt_kind,
593                    p.stmt_id,
594                    stmt_color,
595                    p.stmt_point_kind,
596                    self._short_pretty(p.pretty) if not skip_pretty else "",
597                )
598            )
599        elif p.kind == "Edge":
600            self._dump(
601                '<td width="0"></td>'
602                '<td align="left" width="0">'
603                '<font color="%s">%s</font></td><td align="left">'
604                "[B%d] -\\> [B%d]</td></tr>" % (color, "BlockEdge", p.src_id, p.dst_id)
605            )
606        elif p.kind == "BlockEntrance":
607            self._dump(
608                '<td width="0"></td>'
609                '<td align="left" width="0">'
610                '<font color="%s">%s</font></td>'
611                '<td align="left">[B%d]</td></tr>' % (color, p.kind, p.block_id)
612            )
613        else:
614            # TODO: Print more stuff for other kinds of points.
615            self._dump(
616                '<td width="0"></td>'
617                '<td align="left" width="0" colspan="2">'
618                '<font color="%s">%s</font></td></tr>' % (color, p.kind)
619            )
620
621        if p.tag is not None:
622            self._dump(
623                '<tr><td width="0"></td><td width="0"></td>'
624                '<td colspan="3" align="left">'
625                '<b>Tag: </b> <font color="crimson">'
626                "%s</font></td></tr>" % p.tag
627            )
628
629        if p.has_report:
630            self._dump(
631                '<tr><td width="0"></td><td width="0"></td>'
632                '<td colspan="3" align="left">'
633                '<font color="red"><b>Bug Report Attached'
634                "</b></font></td></tr>"
635            )
636        if p.is_sink:
637            self._dump(
638                '<tr><td width="0"></td><td width="0"></td>'
639                '<td colspan="3" align="left">'
640                '<font color="cornflowerblue"><b>Sink Node'
641                "</b></font></td></tr>"
642            )
643
644    def visit_environment(self, e, prev_e=None):
645        self._dump('<table border="0">')
646
647        def dump_location_context(lc, is_added=None):
648            self._dump(
649                "<tr><td>%s</td>"
650                '<td align="left"><b>%s</b></td>'
651                '<td align="left" colspan="2">'
652                '<font color="gray60">%s </font>'
653                "%s</td></tr>"
654                % (
655                    self._diff_plus_minus(is_added),
656                    lc.caption,
657                    lc.decl,
658                    ("(%s)" % self._make_sloc(lc.loc)) if lc.loc is not None else "",
659                )
660            )
661
662        def dump_binding(f, b, is_added=None):
663            self._dump(
664                "<tr><td>%s</td>"
665                '<td align="left"><i>S%s</i></td>'
666                "%s"
667                '<td align="left">%s</td>'
668                '<td align="left">%s</td></tr>'
669                % (
670                    self._diff_plus_minus(is_added),
671                    b.stmt_id,
672                    '<td align="left"><font color="%s"><i>'
673                    "%s</i></font></td>"
674                    % (
675                        "lavender" if self._dark_mode else "darkgreen",
676                        ("(%s)" % b.kind) if b.kind is not None else " ",
677                    ),
678                    self._short_pretty(b.pretty),
679                    f.bindings[b],
680                )
681            )
682
683        frames_updated = e.diff_frames(prev_e) if prev_e is not None else None
684        if frames_updated:
685            for i in frames_updated:
686                f = e.frames[i]
687                prev_f = prev_e.frames[i]
688                dump_location_context(f.location_context)
689                bindings_removed, bindings_added = f.diff_bindings(prev_f)
690                for b in bindings_removed:
691                    dump_binding(prev_f, b, False)
692                for b in bindings_added:
693                    dump_binding(f, b, True)
694        else:
695            for f in e.frames:
696                dump_location_context(f.location_context)
697                for b in f.bindings:
698                    dump_binding(f, b)
699
700        self._dump("</table>")
701
702    def visit_environment_in_state(self, selector, title, s, prev_s=None):
703        e = getattr(s, selector)
704        prev_e = getattr(prev_s, selector) if prev_s is not None else None
705        if e is None and prev_e is None:
706            return
707
708        self._dump('<hr /><tr><td align="left"><b>%s: </b>' % title)
709        if e is None:
710            self._dump("<i> Nothing!</i>")
711        else:
712            if prev_e is not None:
713                if e.is_different(prev_e):
714                    self._dump('</td></tr><tr><td align="left">')
715                    self.visit_environment(e, prev_e)
716                else:
717                    self._dump("<i> No changes!</i>")
718            else:
719                self._dump('</td></tr><tr><td align="left">')
720                self.visit_environment(e)
721
722        self._dump("</td></tr>")
723
724    def visit_store(self, s, prev_s=None):
725        self._dump('<table border="0">')
726
727        def dump_binding(s, c, b, is_added=None):
728            self._dump(
729                "<tr><td>%s</td>"
730                '<td align="left">%s</td>'
731                '<td align="left">%s</td>'
732                '<td align="left">%s</td>'
733                '<td align="left">%s</td></tr>'
734                % (
735                    self._diff_plus_minus(is_added),
736                    s.clusters[c].base_region,
737                    b.offset,
738                    "(<i>Default</i>)" if b.kind == "Default" else "",
739                    s.clusters[c].bindings[b],
740                )
741            )
742
743        if prev_s is not None:
744            clusters_removed, clusters_added, clusters_updated = s.diff_clusters(prev_s)
745            for c in clusters_removed:
746                for b in prev_s.clusters[c].bindings:
747                    dump_binding(prev_s, c, b, False)
748            for c in clusters_updated:
749                bindings_removed, bindings_added = s.clusters[c].diff_bindings(
750                    prev_s.clusters[c]
751                )
752                for b in bindings_removed:
753                    dump_binding(prev_s, c, b, False)
754                for b in bindings_added:
755                    dump_binding(s, c, b, True)
756            for c in clusters_added:
757                for b in s.clusters[c].bindings:
758                    dump_binding(s, c, b, True)
759        else:
760            for c in s.clusters:
761                for b in s.clusters[c].bindings:
762                    dump_binding(s, c, b)
763
764        self._dump("</table>")
765
766    def visit_store_in_state(self, s, prev_s=None):
767        st = s.store
768        prev_st = prev_s.store if prev_s is not None else None
769        if st is None and prev_st is None:
770            return
771
772        self._dump('<hr /><tr><td align="left"><b>Store: </b>')
773        if st is None:
774            self._dump("<i> Nothing!</i>")
775        else:
776            if self._dark_mode:
777                self._dump(' <font color="gray30">(%s)</font>' % st.ptr)
778            else:
779                self._dump(' <font color="gray">(%s)</font>' % st.ptr)
780            if prev_st is not None:
781                if s.store.is_different(prev_st):
782                    self._dump('</td></tr><tr><td align="left">')
783                    self.visit_store(st, prev_st)
784                else:
785                    self._dump("<i> No changes!</i>")
786            else:
787                self._dump('</td></tr><tr><td align="left">')
788                self.visit_store(st)
789        self._dump("</td></tr>")
790
791    def visit_generic_map(self, m, prev_m=None):
792        self._dump('<table border="0">')
793
794        def dump_pair(m, k, is_added=None):
795            self._dump(
796                "<tr><td>%s</td>"
797                '<td align="left">%s</td>'
798                '<td align="left">%s</td></tr>'
799                % (self._diff_plus_minus(is_added), k, m.generic_map[k])
800            )
801
802        if prev_m is not None:
803            removed, added = m.diff(prev_m)
804            for k in removed:
805                dump_pair(prev_m, k, False)
806            for k in added:
807                dump_pair(m, k, True)
808        else:
809            for k in m.generic_map:
810                dump_pair(m, k, None)
811
812        self._dump("</table>")
813
814    def visit_generic_map_in_state(self, selector, title, s, prev_s=None):
815        m = getattr(s, selector)
816        prev_m = getattr(prev_s, selector) if prev_s is not None else None
817        if m is None and prev_m is None:
818            return
819
820        self._dump("<hr />")
821        self._dump('<tr><td align="left">' "<b>%s: </b>" % title)
822        if m is None:
823            self._dump("<i> Nothing!</i>")
824        else:
825            if prev_m is not None:
826                if m.is_different(prev_m):
827                    self._dump('</td></tr><tr><td align="left">')
828                    self.visit_generic_map(m, prev_m)
829                else:
830                    self._dump("<i> No changes!</i>")
831            else:
832                self._dump('</td></tr><tr><td align="left">')
833                self.visit_generic_map(m)
834
835        self._dump("</td></tr>")
836
837    def visit_checker_messages(self, m, prev_m=None):
838        self._dump('<table border="0">')
839
840        def dump_line(l, is_added=None):
841            self._dump(
842                "<tr><td>%s</td>"
843                '<td align="left">%s</td></tr>' % (self._diff_plus_minus(is_added), l)
844            )
845
846        def dump_chk(chk, is_added=None):
847            dump_line("<i>%s</i>:" % chk, is_added)
848
849        if prev_m is not None:
850            removed, added, updated = m.diff_messages(prev_m)
851            for chk in removed:
852                dump_chk(chk, False)
853                for l in prev_m.items[chk].lines:
854                    dump_line(l, False)
855            for chk in updated:
856                dump_chk(chk)
857                for l in m.items[chk].diff_lines(prev_m.items[chk]):
858                    dump_line(l[1:], l.startswith("+"))
859            for chk in added:
860                dump_chk(chk, True)
861                for l in m.items[chk].lines:
862                    dump_line(l, True)
863        else:
864            for chk in m.items:
865                dump_chk(chk)
866                for l in m.items[chk].lines:
867                    dump_line(l)
868
869        self._dump("</table>")
870
871    def visit_checker_messages_in_state(self, s, prev_s=None):
872        m = s.checker_messages
873        prev_m = prev_s.checker_messages if prev_s is not None else None
874        if m is None and prev_m is None:
875            return
876
877        self._dump("<hr />")
878        self._dump('<tr><td align="left">' "<b>Checker State: </b>")
879        if m is None:
880            self._dump("<i> Nothing!</i>")
881        else:
882            if prev_m is not None:
883                if m.is_different(prev_m):
884                    self._dump('</td></tr><tr><td align="left">')
885                    self.visit_checker_messages(m, prev_m)
886                else:
887                    self._dump("<i> No changes!</i>")
888            else:
889                self._dump('</td></tr><tr><td align="left">')
890                self.visit_checker_messages(m)
891
892        self._dump("</td></tr>")
893
894    def visit_state(self, s, prev_s):
895        self.visit_store_in_state(s, prev_s)
896        self.visit_environment_in_state("environment", "Expressions", s, prev_s)
897        self.visit_generic_map_in_state("constraints", "Ranges", s, prev_s)
898        self.visit_generic_map_in_state("dynamic_types", "Dynamic Types", s, prev_s)
899        self.visit_environment_in_state(
900            "constructing_objects", "Objects Under Construction", s, prev_s
901        )
902        self.visit_environment_in_state(
903            "index_of_element", "Indices Of Elements Under Construction", s, prev_s
904        )
905        self.visit_environment_in_state(
906            "pending_init_loops", "Pending Array Init Loop Expressions", s, prev_s
907        )
908        self.visit_environment_in_state(
909            "pending_destructors", "Indices of Elements Under Destruction", s, prev_s
910        )
911        self.visit_checker_messages_in_state(s, prev_s)
912
913    def visit_node(self, node):
914        self._dump("%s [shape=record," % (node.node_name()))
915        if self._dark_mode:
916            self._dump('color="white",fontcolor="gray80",')
917        self._dump('label=<<table border="0">')
918
919        self._dump(
920            '<tr><td bgcolor="%s"><b>State %s</b></td></tr>'
921            % (
922                "gray20" if self._dark_mode else "gray70",
923                node.state.state_id if node.state is not None else "Unspecified",
924            )
925        )
926        if not self._topo_mode:
927            self._dump('<tr><td align="left" width="0">')
928            if len(node.points) > 1:
929                self._dump("<b>Program points:</b></td></tr>")
930            else:
931                self._dump("<b>Program point:</b></td></tr>")
932        self._dump(
933            '<tr><td align="left" width="0">'
934            '<table border="0" align="left" width="0">'
935        )
936        for p in node.points:
937            self.visit_program_point(p)
938        self._dump("</table></td></tr>")
939
940        if node.state is not None and not self._topo_mode:
941            prev_s = None
942            # Do diffs only when we have a unique predecessor.
943            # Don't do diffs on the leaf nodes because they're
944            # the important ones.
945            if (
946                self._do_diffs
947                and len(node.predecessors) == 1
948                and len(node.successors) > 0
949            ):
950                prev_s = self._graph.nodes[node.predecessors[0]].state
951            self.visit_state(node.state, prev_s)
952        self._dump_raw("</table>>];\n")
953
954    def visit_edge(self, pred, succ):
955        self._dump_raw(
956            "%s -> %s%s;\n"
957            % (
958                pred.node_name(),
959                succ.node_name(),
960                ' [color="white"]' if self._dark_mode else "",
961            )
962        )
963
964    def visit_end_of_graph(self):
965        self._dump_raw("}\n")
966
967        if not self._dump_dot_only:
968            import sys
969            import tempfile
970
971            def write_temp_file(suffix, prefix, data):
972                fd, filename = tempfile.mkstemp(suffix, prefix, ".", True)
973                print('Writing "%s"...' % filename)
974                with os.fdopen(fd, "w") as fp:
975                    fp.write(data)
976                print("Done! Please remember to remove the file.")
977                return filename
978
979            try:
980                import graphviz
981            except ImportError:
982                # The fallback behavior if graphviz is not installed!
983                print("Python graphviz not found. Please invoke")
984                print("  $ pip install graphviz")
985                print("in order to enable automatic conversion to HTML.")
986                print()
987                print("You may also convert DOT to SVG manually via")
988                print("  $ dot -Tsvg input.dot -o output.svg")
989                print()
990                write_temp_file(".dot", "egraph-", self.output())
991                return
992
993            svg = graphviz.pipe("dot", "svg", self.output().encode()).decode()
994
995            filename = write_temp_file(
996                ".html",
997                "egraph-",
998                '<html><body bgcolor="%s">%s</body></html>'
999                % ("#1a1a1a" if self._dark_mode else "white", svg),
1000            )
1001            if sys.platform == "win32":
1002                os.startfile(filename)
1003            elif sys.platform == "darwin":
1004                os.system('open "%s"' % filename)
1005            else:
1006                os.system('xdg-open "%s"' % filename)
1007
1008
1009# ===-----------------------------------------------------------------------===#
1010# Explorers know how to traverse the ExplodedGraph in a certain order.
1011# They would invoke a Visitor on every node or edge they encounter.
1012# ===-----------------------------------------------------------------------===#
1013
1014
1015# BasicExplorer explores the whole graph in no particular order.
1016class BasicExplorer:
1017    def explore(self, graph, visitor):
1018        visitor.visit_begin_graph(graph)
1019        for node in sorted(graph.nodes):
1020            logging.debug("Visiting " + node)
1021            visitor.visit_node(graph.nodes[node])
1022            for succ in sorted(graph.nodes[node].successors):
1023                logging.debug("Visiting edge: %s -> %s " % (node, succ))
1024                visitor.visit_edge(graph.nodes[node], graph.nodes[succ])
1025        visitor.visit_end_of_graph()
1026
1027
1028# ===-----------------------------------------------------------------------===#
1029# Trimmers cut out parts of the ExplodedGraph so that to focus on other parts.
1030# Trimmers can be combined together by applying them sequentially.
1031# ===-----------------------------------------------------------------------===#
1032
1033
1034# SinglePathTrimmer keeps only a single path - the leftmost path from the root.
1035# Useful when the trimmed graph is still too large.
1036class SinglePathTrimmer:
1037    def trim(self, graph):
1038        visited_nodes = set()
1039        node_id = graph.root_id
1040        while True:
1041            visited_nodes.add(node_id)
1042            node = graph.nodes[node_id]
1043            if len(node.successors) > 0:
1044                succ_id = node.successors[0]
1045                succ = graph.nodes[succ_id]
1046                node.successors = [succ_id]
1047                succ.predecessors = [node_id]
1048                if succ_id in visited_nodes:
1049                    break
1050                node_id = succ_id
1051            else:
1052                break
1053        graph.nodes = {node_id: graph.nodes[node_id] for node_id in visited_nodes}
1054
1055
1056# TargetedTrimmer keeps paths that lead to specific nodes and discards all
1057# other paths. Useful when you cannot use -trim-egraph (e.g. when debugging
1058# a crash).
1059class TargetedTrimmer:
1060    def __init__(self, target_nodes):
1061        self._target_nodes = target_nodes
1062
1063    @staticmethod
1064    def parse_target_node(node, graph):
1065        if node.startswith("0x"):
1066            ret = "Node" + node
1067            assert ret in graph.nodes
1068            return ret
1069        else:
1070            for other_id in graph.nodes:
1071                other = graph.nodes[other_id]
1072                if other.node_id == int(node):
1073                    return other_id
1074
1075    @staticmethod
1076    def parse_target_nodes(target_nodes, graph):
1077        return [
1078            TargetedTrimmer.parse_target_node(node, graph)
1079            for node in target_nodes.split(",")
1080        ]
1081
1082    def trim(self, graph):
1083        queue = self._target_nodes
1084        visited_nodes = set()
1085
1086        while len(queue) > 0:
1087            node_id = queue.pop()
1088            visited_nodes.add(node_id)
1089            node = graph.nodes[node_id]
1090            for pred_id in node.predecessors:
1091                if pred_id not in visited_nodes:
1092                    queue.append(pred_id)
1093        graph.nodes = {node_id: graph.nodes[node_id] for node_id in visited_nodes}
1094        for node_id in graph.nodes:
1095            node = graph.nodes[node_id]
1096            node.successors = [
1097                succ_id for succ_id in node.successors if succ_id in visited_nodes
1098            ]
1099            node.predecessors = [
1100                succ_id for succ_id in node.predecessors if succ_id in visited_nodes
1101            ]
1102
1103
1104# ===-----------------------------------------------------------------------===#
1105# The entry point to the script.
1106# ===-----------------------------------------------------------------------===#
1107
1108
1109def main():
1110    parser = argparse.ArgumentParser(
1111        description="Display and manipulate Exploded Graph dumps."
1112    )
1113    parser.add_argument(
1114        "filename", type=str, help="the .dot file produced by the Static Analyzer"
1115    )
1116    parser.add_argument(
1117        "-v",
1118        "--verbose",
1119        action="store_const",
1120        dest="loglevel",
1121        const=logging.DEBUG,
1122        default=logging.WARNING,
1123        help="enable info prints",
1124    )
1125    parser.add_argument(
1126        "-d",
1127        "--diff",
1128        action="store_const",
1129        dest="diff",
1130        const=True,
1131        default=False,
1132        help="display differences between states",
1133    )
1134    parser.add_argument(
1135        "-t",
1136        "--topology",
1137        action="store_const",
1138        dest="topology",
1139        const=True,
1140        default=False,
1141        help="only display program points, omit states",
1142    )
1143    parser.add_argument(
1144        "-s",
1145        "--single-path",
1146        action="store_const",
1147        dest="single_path",
1148        const=True,
1149        default=False,
1150        help="only display the leftmost path in the graph "
1151        "(useful for trimmed graphs that still "
1152        "branch too much)",
1153    )
1154    parser.add_argument(
1155        "--to",
1156        type=str,
1157        default=None,
1158        help="only display execution paths from the root "
1159        "to the given comma-separated list of nodes "
1160        "identified by a pointer or a stable ID; "
1161        "compatible with --single-path",
1162    )
1163    parser.add_argument(
1164        "--dark",
1165        action="store_const",
1166        dest="dark",
1167        const=True,
1168        default=False,
1169        help="dark mode",
1170    )
1171    parser.add_argument(
1172        "--gray",
1173        action="store_const",
1174        dest="gray",
1175        const=True,
1176        default=False,
1177        help="black-and-white mode",
1178    )
1179    parser.add_argument(
1180        "--dump-dot-only",
1181        action="store_const",
1182        dest="dump_dot_only",
1183        const=True,
1184        default=False,
1185        help="instead of writing an HTML file and immediately "
1186        "displaying it, dump the rewritten dot file "
1187        "to stdout",
1188    )
1189    args = parser.parse_args()
1190    logging.basicConfig(level=args.loglevel)
1191
1192    graph = ExplodedGraph()
1193    with open(args.filename) as fd:
1194        for raw_line in fd:
1195            raw_line = raw_line.strip()
1196            graph.add_raw_line(raw_line)
1197
1198    trimmers = []
1199    if args.to is not None:
1200        trimmers.append(
1201            TargetedTrimmer(TargetedTrimmer.parse_target_nodes(args.to, graph))
1202        )
1203    if args.single_path:
1204        trimmers.append(SinglePathTrimmer())
1205
1206    explorer = BasicExplorer()
1207
1208    visitor = DotDumpVisitor(
1209        args.diff, args.dark, args.gray, args.topology, args.dump_dot_only
1210    )
1211
1212    for trimmer in trimmers:
1213        trimmer.trim(graph)
1214
1215    explorer.explore(graph, visitor)
1216
1217
1218if __name__ == "__main__":
1219    main()
1220