xref: /llvm-project/clang/utils/analyzer/exploded-graph-rewriter.py (revision 20cb4ec845dec70f304c054ba5b45c0a388112b8)
1#!/usr/bin/env python
2#
3# ===- exploded-graph-rewriter.py - ExplodedGraph dump tool -----*- python -*--#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9# ===-----------------------------------------------------------------------===#
10
11
12from __future__ import print_function
13
14import argparse
15import collections
16import difflib
17import json
18import logging
19import os
20import re
21
22
23# ===-----------------------------------------------------------------------===#
24# These data structures represent a deserialized ExplodedGraph.
25# ===-----------------------------------------------------------------------===#
26
27
28# A helper function for finding the difference between two dictionaries.
29def diff_dicts(curr, prev):
30    removed = [k for k in prev if k not in curr or curr[k] != prev[k]]
31    added = [k for k in curr if k not in prev or curr[k] != prev[k]]
32    return (removed, added)
33
34
35# Represents any program state trait that is a dictionary of key-value pairs.
36class GenericMap:
37    def __init__(self, items):
38        self.generic_map = collections.OrderedDict(items)
39
40    def diff(self, prev):
41        return diff_dicts(self.generic_map, prev.generic_map)
42
43    def is_different(self, prev):
44        removed, added = self.diff(prev)
45        return len(removed) != 0 or len(added) != 0
46
47
48# A deserialized source location.
49class SourceLocation:
50    def __init__(self, json_loc):
51        logging.debug("json: %s" % json_loc)
52        self.line = json_loc["line"]
53        self.col = json_loc["column"]
54        self.filename = (
55            os.path.basename(json_loc["file"]) if "file" in json_loc else "(main file)"
56        )
57        self.spelling = (
58            SourceLocation(json_loc["spelling"]) if "spelling" in json_loc else None
59        )
60
61    def is_macro(self):
62        return self.spelling is not None
63
64
65# A deserialized program point.
66class ProgramPoint:
67    def __init__(self, json_pp):
68        self.kind = json_pp["kind"]
69        self.tag = json_pp["tag"]
70        self.node_id = json_pp["node_id"]
71        self.is_sink = bool(json_pp["is_sink"])
72        self.has_report = bool(json_pp["has_report"])
73        if self.kind == "Edge":
74            self.src_id = json_pp["src_id"]
75            self.dst_id = json_pp["dst_id"]
76        elif self.kind == "Statement":
77            logging.debug(json_pp)
78            self.stmt_kind = json_pp["stmt_kind"]
79            self.cast_kind = json_pp["cast_kind"] if "cast_kind" in json_pp else None
80            self.stmt_point_kind = json_pp["stmt_point_kind"]
81            self.stmt_id = json_pp["stmt_id"]
82            self.pointer = json_pp["pointer"]
83            self.pretty = json_pp["pretty"]
84            self.loc = (
85                SourceLocation(json_pp["location"])
86                if json_pp["location"] is not None
87                else None
88            )
89        elif self.kind == "CallEnter":
90            self.callee_decl = json_pp.get("callee_decl", "None")
91        elif self.kind == "BlockEntrance":
92            self.block_id = json_pp["block_id"]
93        elif self.kind == "PostInitializer":
94            if "field_decl" in json_pp:
95                self.target = json_pp["field_decl"]
96            else:
97                self.target = json_pp["type"]
98
99
100# A single expression acting as a key in a deserialized Environment.
101class EnvironmentBindingKey:
102    def __init__(self, json_ek):
103        # CXXCtorInitializer is not a Stmt!
104        self.stmt_id = (
105            json_ek["stmt_id"] if "stmt_id" in json_ek else json_ek["init_id"]
106        )
107        self.pretty = json_ek["pretty"]
108        self.kind = json_ek["kind"] if "kind" in json_ek else None
109
110    def _key(self):
111        return self.stmt_id
112
113    def __eq__(self, other):
114        return self._key() == other._key()
115
116    def __hash__(self):
117        return hash(self._key())
118
119
120# Deserialized description of a location context.
121class LocationContext:
122    def __init__(self, json_frame):
123        self.lctx_id = json_frame["lctx_id"]
124        self.caption = json_frame["location_context"]
125        self.decl = json_frame["calling"]
126        self.loc = (
127            SourceLocation(json_frame["location"])
128            if json_frame["location"] is not None
129            else None
130        )
131
132    def _key(self):
133        return self.lctx_id
134
135    def __eq__(self, other):
136        return self._key() == other._key()
137
138    def __hash__(self):
139        return hash(self._key())
140
141
142# A group of deserialized Environment bindings that correspond to a specific
143# location context.
144class EnvironmentFrame:
145    def __init__(self, json_frame):
146        self.location_context = LocationContext(json_frame)
147        self.bindings = collections.OrderedDict(
148            [(EnvironmentBindingKey(b), b["value"]) for b in json_frame["items"]]
149            if json_frame["items"] is not None
150            else []
151        )
152
153    def diff_bindings(self, prev):
154        return diff_dicts(self.bindings, prev.bindings)
155
156    def is_different(self, prev):
157        removed, added = self.diff_bindings(prev)
158        return len(removed) != 0 or len(added) != 0
159
160
161# A deserialized Environment. This class can also hold other entities that
162# are similar to Environment, such as Objects Under Construction or
163# Indices Of Elements Under Construction.
164class GenericEnvironment:
165    def __init__(self, json_e):
166        self.frames = [EnvironmentFrame(f) for f in json_e]
167
168    def diff_frames(self, prev):
169        # TODO: It's difficult to display a good diff when frame numbers shift.
170        if len(self.frames) != len(prev.frames):
171            return None
172
173        updated = []
174        for i in range(len(self.frames)):
175            f = self.frames[i]
176            prev_f = prev.frames[i]
177            if f.location_context == prev_f.location_context:
178                if f.is_different(prev_f):
179                    updated.append(i)
180            else:
181                # We have the whole frame replaced with another frame.
182                # TODO: Produce a nice diff.
183                return None
184
185        # TODO: Add support for added/removed.
186        return updated
187
188    def is_different(self, prev):
189        updated = self.diff_frames(prev)
190        return updated is None or len(updated) > 0
191
192
193# A single binding key in a deserialized RegionStore cluster.
194class StoreBindingKey:
195    def __init__(self, json_sk):
196        self.kind = json_sk["kind"]
197        self.offset = json_sk["offset"]
198
199    def _key(self):
200        return (self.kind, self.offset)
201
202    def __eq__(self, other):
203        return self._key() == other._key()
204
205    def __hash__(self):
206        return hash(self._key())
207
208
209# A single cluster of the deserialized RegionStore.
210class StoreCluster:
211    def __init__(self, json_sc):
212        self.base_region = json_sc["cluster"]
213        self.bindings = collections.OrderedDict(
214            [(StoreBindingKey(b), b["value"]) for b in json_sc["items"]]
215        )
216
217    def diff_bindings(self, prev):
218        return diff_dicts(self.bindings, prev.bindings)
219
220    def is_different(self, prev):
221        removed, added = self.diff_bindings(prev)
222        return len(removed) != 0 or len(added) != 0
223
224
225# A deserialized RegionStore.
226class Store:
227    def __init__(self, json_s):
228        self.ptr = json_s["pointer"]
229        self.clusters = collections.OrderedDict(
230            [(c["pointer"], StoreCluster(c)) for c in json_s["items"]]
231        )
232
233    def diff_clusters(self, prev):
234        removed = [k for k in prev.clusters if k not in self.clusters]
235        added = [k for k in self.clusters if k not in prev.clusters]
236        updated = [
237            k
238            for k in prev.clusters
239            if k in self.clusters and prev.clusters[k].is_different(self.clusters[k])
240        ]
241        return (removed, added, updated)
242
243    def is_different(self, prev):
244        removed, added, updated = self.diff_clusters(prev)
245        return len(removed) != 0 or len(added) != 0 or len(updated) != 0
246
247
248# Deserialized messages from a single checker in a single program state.
249# Basically a list of raw strings.
250class CheckerLines:
251    def __init__(self, json_lines):
252        self.lines = json_lines
253
254    def diff_lines(self, prev):
255        lines = difflib.ndiff(prev.lines, self.lines)
256        return [l.strip() for l in lines if l.startswith("+") or l.startswith("-")]
257
258    def is_different(self, prev):
259        return len(self.diff_lines(prev)) > 0
260
261
262# Deserialized messages of all checkers, separated by checker.
263class CheckerMessages:
264    def __init__(self, json_m):
265        self.items = collections.OrderedDict(
266            [(m["checker"], CheckerLines(m["messages"])) for m in json_m]
267        )
268
269    def diff_messages(self, prev):
270        removed = [k for k in prev.items if k not in self.items]
271        added = [k for k in self.items if k not in prev.items]
272        updated = [
273            k
274            for k in prev.items
275            if k in self.items and prev.items[k].is_different(self.items[k])
276        ]
277        return (removed, added, updated)
278
279    def is_different(self, prev):
280        removed, added, updated = self.diff_messages(prev)
281        return len(removed) != 0 or len(added) != 0 or len(updated) != 0
282
283
284# A deserialized program state.
285class ProgramState:
286    def __init__(self, state_id, json_ps):
287        logging.debug("Adding ProgramState " + str(state_id))
288
289        store_key = "store"
290        env_key = "environment"
291        constraints_key = "constraints"
292        dyn_ty_key = "dynamic_types"
293        ctor_key = "constructing_objects"
294        ind_key = "index_of_element"
295        init_loop_key = "pending_init_loops"
296        dtor_key = "pending_destructors"
297        msg_key = "checker_messages"
298
299        if json_ps is None:
300            json_ps = {
301                store_key: None,
302                env_key: None,
303                constraints_key: None,
304                dyn_ty_key: None,
305                ctor_key: None,
306                ind_key: None,
307                init_loop_key: None,
308                dtor_key: None,
309                msg_key: None,
310            }
311
312        self.state_id = state_id
313
314        self.store = (
315            Store(json_ps[store_key]) if json_ps[store_key] is not None else None
316        )
317
318        self.environment = (
319            GenericEnvironment(json_ps[env_key]["items"])
320            if json_ps[env_key] is not None
321            else None
322        )
323
324        self.constraints = (
325            GenericMap([(c["symbol"], c["range"]) for c in json_ps[constraints_key]])
326            if json_ps[constraints_key] is not None
327            else None
328        )
329
330        self.dynamic_types = (
331            GenericMap(
332                [
333                    (
334                        t["region"],
335                        "%s%s"
336                        % (
337                            t["dyn_type"],
338                            " (or a sub-class)" if t["sub_classable"] else "",
339                        ),
340                    )
341                    for t in json_ps[dyn_ty_key]
342                ]
343            )
344            if json_ps[dyn_ty_key] is not None
345            else None
346        )
347
348        self.checker_messages = (
349            CheckerMessages(json_ps[msg_key]) if json_ps[msg_key] is not None else None
350        )
351
352        # State traits
353        #
354        # For traits we always check if a key exists because if a trait
355        # has no imformation, nothing will be printed in the .dot file
356        # we parse.
357
358        self.constructing_objects = (
359            GenericEnvironment(json_ps[ctor_key])
360            if ctor_key in json_ps and json_ps[ctor_key] is not None
361            else None
362        )
363
364        self.index_of_element = (
365            GenericEnvironment(json_ps[ind_key])
366            if ind_key in json_ps and json_ps[ind_key] is not None
367            else None
368        )
369
370        self.pending_init_loops = (
371            GenericEnvironment(json_ps[init_loop_key])
372            if init_loop_key in json_ps and json_ps[init_loop_key] is not None
373            else None
374        )
375
376        self.pending_destructors = (
377            GenericEnvironment(json_ps[dtor_key])
378            if dtor_key in json_ps and json_ps[dtor_key] is not None
379            else None
380        )
381
382
383# A deserialized exploded graph node. Has a default constructor because it
384# may be referenced as part of an edge before its contents are deserialized,
385# and in this moment we already need a room for predecessors and successors.
386class ExplodedNode:
387    def __init__(self):
388        self.predecessors = []
389        self.successors = []
390
391    def construct(self, node_id, json_node):
392        logging.debug("Adding " + node_id)
393        self.ptr = node_id[4:]
394        self.points = [ProgramPoint(p) for p in json_node["program_points"]]
395        self.node_id = self.points[-1].node_id
396        self.state = ProgramState(
397            json_node["state_id"],
398            json_node["program_state"]
399            if json_node["program_state"] is not None
400            else None,
401        )
402
403        assert self.node_name() == node_id
404
405    def node_name(self):
406        return "Node" + self.ptr
407
408
409# A deserialized ExplodedGraph. Constructed by consuming a .dot file
410# line-by-line.
411class ExplodedGraph:
412    # Parse .dot files with regular expressions.
413    node_re = re.compile(
414        '^(Node0x[0-9a-f]*) \\[shape=record,.*label="{(.*)\\\\l}"\\];$'
415    )
416    edge_re = re.compile("^(Node0x[0-9a-f]*) -> (Node0x[0-9a-f]*);$")
417
418    def __init__(self):
419        self.nodes = collections.defaultdict(ExplodedNode)
420        self.root_id = None
421        self.incomplete_line = ""
422
423    def add_raw_line(self, raw_line):
424        if raw_line.startswith("//"):
425            return
426
427        # Allow line breaks by waiting for ';'. This is not valid in
428        # a .dot file, but it is useful for writing tests.
429        if len(raw_line) > 0 and raw_line[-1] != ";":
430            self.incomplete_line += raw_line
431            return
432        raw_line = self.incomplete_line + raw_line
433        self.incomplete_line = ""
434
435        # Apply regexps one by one to see if it's a node or an edge
436        # and extract contents if necessary.
437        logging.debug("Line: " + raw_line)
438        result = self.edge_re.match(raw_line)
439        if result is not None:
440            logging.debug("Classified as edge line.")
441            pred = result.group(1)
442            succ = result.group(2)
443            self.nodes[pred].successors.append(succ)
444            self.nodes[succ].predecessors.append(pred)
445            return
446        result = self.node_re.match(raw_line)
447        if result is not None:
448            logging.debug("Classified as node line.")
449            node_id = result.group(1)
450            if len(self.nodes) == 0:
451                self.root_id = node_id
452            # Note: when writing tests you don't need to escape everything,
453            # even though in a valid dot file everything is escaped.
454            node_label = (
455                result.group(2)
456                .replace(" ", "")
457                .replace('\\"', '"')
458                .replace("\\{", "{")
459                .replace("\\}", "}")
460                .replace("\\\\", "\\")
461                .replace("\\|", "|")
462                .replace("\\<", "\\\\<")
463                .replace("\\>", "\\\\>")
464                .rstrip(",")
465            )
466            # Handle `\l` separately because a string literal can be in code
467            # like "string\\literal" with the `\l` inside.
468            # Also on Windows macros __FILE__ produces specific delimiters `\`
469            # and a directory or file may starts with the letter `l`.
470            # Find all `\l` (like `,\l`, `}\l`, `[\l`) except `\\l`,
471            # because the literal as a rule contains multiple `\` before `\l`.
472            node_label = re.sub(r"(?<!\\)\\l", "", node_label)
473            logging.debug(node_label)
474            json_node = json.loads(node_label)
475            self.nodes[node_id].construct(node_id, json_node)
476            return
477        logging.debug("Skipping.")
478
479
480# ===-----------------------------------------------------------------------===#
481# Visitors traverse a deserialized ExplodedGraph and do different things
482# with every node and edge.
483# ===-----------------------------------------------------------------------===#
484
485
486# A visitor that dumps the ExplodedGraph into a DOT file with fancy HTML-based
487# syntax highlighing.
488class DotDumpVisitor:
489    def __init__(
490        self, do_diffs, dark_mode, gray_mode, topo_mode, dump_html_only, dump_dot_only
491    ):
492        assert not (dump_html_only and dump_dot_only), (
493            "Option dump_html_only and dump_dot_only are conflict, "
494            "they cannot be true at the same time."
495        )
496
497        self._do_diffs = do_diffs
498        self._dark_mode = dark_mode
499        self._gray_mode = gray_mode
500        self._topo_mode = topo_mode
501        self._dump_html_only = dump_html_only
502        self._dump_dot_only = dump_dot_only
503        self._output = []
504
505    def _dump_raw(self, s):
506        if self._dump_dot_only:
507            print(s, end="")
508        else:
509            self._output.append(s)
510
511    def output(self):
512        assert not self._dump_dot_only
513        return "".join(self._output)
514
515    def _dump(self, s):
516        s = (
517            s.replace("&", "&amp;")
518            .replace("{", "\\{")
519            .replace("}", "\\}")
520            .replace("\\<", "&lt;")
521            .replace("\\>", "&gt;")
522            .replace("|", "\\|")
523        )
524        s = re.sub(r"(?<!\\)\\l", "<br />", s)
525        if self._gray_mode:
526            s = re.sub(r'<font color="[a-z0-9]*">', "", s)
527            s = re.sub(r"</font>", "", s)
528        self._dump_raw(s)
529
530    @staticmethod
531    def _diff_plus_minus(is_added):
532        if is_added is None:
533            return ""
534        if is_added:
535            return '<font color="forestgreen">+</font>'
536        return '<font color="red">-</font>'
537
538    @staticmethod
539    def _short_pretty(s):
540        if s is None:
541            return None
542        if len(s) < 20:
543            return s
544        left = s.find("{")
545        right = s.rfind("}")
546        if left == -1 or right == -1 or left >= right:
547            return s
548        candidate = s[0 : left + 1] + " ... " + s[right:]
549        if len(candidate) >= len(s):
550            return s
551        return candidate
552
553    @staticmethod
554    def _make_sloc(loc):
555        if loc is None:
556            return "<i>Invalid Source Location</i>"
557
558        def make_plain_loc(loc):
559            return "%s:<b>%s</b>:<b>%s</b>" % (loc.filename, loc.line, loc.col)
560
561        if loc.is_macro():
562            return '%s <font color="royalblue1">' "(<i>spelling at </i> %s)</font>" % (
563                make_plain_loc(loc),
564                make_plain_loc(loc.spelling),
565            )
566
567        return make_plain_loc(loc)
568
569    def visit_begin_graph(self, graph):
570        self._graph = graph
571        self._dump_raw('digraph "ExplodedGraph" {\n')
572        if self._dark_mode:
573            self._dump_raw('bgcolor="gray10";\n')
574        self._dump_raw('label="";\n')
575
576    def visit_program_point(self, p):
577        if p.kind in ["Edge", "BlockEntrance", "BlockExit"]:
578            color = "gold3"
579        elif p.kind in ["PreStmtPurgeDeadSymbols", "PostStmtPurgeDeadSymbols"]:
580            color = "red"
581        elif p.kind in ["CallEnter", "CallExitBegin", "CallExitEnd"]:
582            color = "dodgerblue" if self._dark_mode else "blue"
583        elif p.kind in ["Statement"]:
584            color = "cyan4"
585        else:
586            color = "forestgreen"
587
588        self._dump('<tr><td align="left">%s.</td>' % p.node_id)
589
590        if p.kind == "Statement":
591            # This avoids pretty-printing huge statements such as CompoundStmt.
592            # Such statements show up only at [Pre|Post]StmtPurgeDeadSymbols
593            skip_pretty = "PurgeDeadSymbols" in p.stmt_point_kind
594            stmt_color = "cyan3"
595            self._dump(
596                '<td align="left" width="0">%s:</td>'
597                '<td align="left" width="0"><font color="%s">'
598                "%s</font> </td>"
599                '<td align="left"><i>S%s</i></td>'
600                '<td align="left"><font color="%s">%s</font></td>'
601                '<td align="left">%s</td></tr>'
602                % (
603                    self._make_sloc(p.loc),
604                    color,
605                    "%s (%s)" % (p.stmt_kind, p.cast_kind)
606                    if p.cast_kind is not None
607                    else p.stmt_kind,
608                    p.stmt_id,
609                    stmt_color,
610                    p.stmt_point_kind,
611                    self._short_pretty(p.pretty) if not skip_pretty else "",
612                )
613            )
614        elif p.kind == "Edge":
615            self._dump(
616                '<td width="0"></td>'
617                '<td align="left" width="0">'
618                '<font color="%s">%s</font></td><td align="left">'
619                "[B%d] -\\> [B%d]</td></tr>" % (color, "BlockEdge", p.src_id, p.dst_id)
620            )
621        elif p.kind == "BlockEntrance":
622            self._dump(
623                '<td width="0"></td>'
624                '<td align="left" width="0">'
625                '<font color="%s">%s</font></td>'
626                '<td align="left">[B%d]</td></tr>' % (color, p.kind, p.block_id)
627            )
628        elif p.kind == "CallEnter":
629            self._dump(
630                '<td width="0"></td>'
631                '<td align="left" width="0">'
632                '<font color="%s">%s</font></td>'
633                '<td align="left">%s</td></tr>' % (color, p.kind, p.callee_decl)
634            )
635        elif p.kind == "PostInitializer":
636            self._dump(
637                '<td width="0"></td>'
638                '<td align="left" width="0">'
639                '<font color="%s">%s</font></td>'
640                '<td align="left">%s</td></tr>' % (color, p.kind, p.target)
641            )
642        else:
643            # TODO: Print more stuff for other kinds of points.
644            self._dump(
645                '<td width="0"></td>'
646                '<td align="left" width="0" colspan="2">'
647                '<font color="%s">%s</font></td></tr>' % (color, p.kind)
648            )
649
650        if p.tag is not None:
651            self._dump(
652                '<tr><td width="0"></td><td width="0"></td>'
653                '<td colspan="3" align="left">'
654                '<b>Tag: </b> <font color="crimson">'
655                "%s</font></td></tr>" % p.tag
656            )
657
658        if p.has_report:
659            self._dump(
660                '<tr><td width="0"></td><td width="0"></td>'
661                '<td colspan="3" align="left">'
662                '<font color="red"><b>Bug Report Attached'
663                "</b></font></td></tr>"
664            )
665        if p.is_sink:
666            self._dump(
667                '<tr><td width="0"></td><td width="0"></td>'
668                '<td colspan="3" align="left">'
669                '<font color="cornflowerblue"><b>Sink Node'
670                "</b></font></td></tr>"
671            )
672
673    def visit_environment(self, e, prev_e=None):
674        self._dump('<table border="0">')
675
676        def dump_location_context(lc, is_added=None):
677            self._dump(
678                "<tr><td>%s</td>"
679                '<td align="left"><b>%s</b></td>'
680                '<td align="left" colspan="2">'
681                '<font color="gray60">%s </font>'
682                "%s</td></tr>"
683                % (
684                    self._diff_plus_minus(is_added),
685                    lc.caption,
686                    lc.decl,
687                    ("(%s)" % self._make_sloc(lc.loc)) if lc.loc is not None else "",
688                )
689            )
690
691        def dump_binding(f, b, is_added=None):
692            self._dump(
693                "<tr><td>%s</td>"
694                '<td align="left"><i>S%s</i></td>'
695                "%s"
696                '<td align="left">%s</td>'
697                '<td align="left">%s</td></tr>'
698                % (
699                    self._diff_plus_minus(is_added),
700                    b.stmt_id,
701                    '<td align="left"><font color="%s"><i>'
702                    "%s</i></font></td>"
703                    % (
704                        "lavender" if self._dark_mode else "darkgreen",
705                        ("(%s)" % b.kind) if b.kind is not None else " ",
706                    ),
707                    self._short_pretty(b.pretty),
708                    f.bindings[b],
709                )
710            )
711
712        frames_updated = e.diff_frames(prev_e) if prev_e is not None else None
713        if frames_updated:
714            for i in frames_updated:
715                f = e.frames[i]
716                prev_f = prev_e.frames[i]
717                dump_location_context(f.location_context)
718                bindings_removed, bindings_added = f.diff_bindings(prev_f)
719                for b in bindings_removed:
720                    dump_binding(prev_f, b, False)
721                for b in bindings_added:
722                    dump_binding(f, b, True)
723        else:
724            for f in e.frames:
725                dump_location_context(f.location_context)
726                for b in f.bindings:
727                    dump_binding(f, b)
728
729        self._dump("</table>")
730
731    def visit_environment_in_state(self, selector, title, s, prev_s=None):
732        e = getattr(s, selector)
733        prev_e = getattr(prev_s, selector) if prev_s is not None else None
734        if e is None and prev_e is None:
735            return
736
737        self._dump('<hr /><tr><td align="left"><b>%s: </b>' % title)
738        if e is None:
739            self._dump("<i> Nothing!</i>")
740        else:
741            if prev_e is not None:
742                if e.is_different(prev_e):
743                    self._dump('</td></tr><tr><td align="left">')
744                    self.visit_environment(e, prev_e)
745                else:
746                    self._dump("<i> No changes!</i>")
747            else:
748                self._dump('</td></tr><tr><td align="left">')
749                self.visit_environment(e)
750
751        self._dump("</td></tr>")
752
753    def visit_store(self, s, prev_s=None):
754        self._dump('<table border="0">')
755
756        def dump_binding(s, c, b, is_added=None):
757            self._dump(
758                "<tr><td>%s</td>"
759                '<td align="left">%s</td>'
760                '<td align="left">%s</td>'
761                '<td align="left">%s</td>'
762                '<td align="left">%s</td></tr>'
763                % (
764                    self._diff_plus_minus(is_added),
765                    s.clusters[c].base_region,
766                    b.offset,
767                    "(<i>Default</i>)" if b.kind == "Default" else "",
768                    s.clusters[c].bindings[b],
769                )
770            )
771
772        if prev_s is not None:
773            clusters_removed, clusters_added, clusters_updated = s.diff_clusters(prev_s)
774            for c in clusters_removed:
775                for b in prev_s.clusters[c].bindings:
776                    dump_binding(prev_s, c, b, False)
777            for c in clusters_updated:
778                bindings_removed, bindings_added = s.clusters[c].diff_bindings(
779                    prev_s.clusters[c]
780                )
781                for b in bindings_removed:
782                    dump_binding(prev_s, c, b, False)
783                for b in bindings_added:
784                    dump_binding(s, c, b, True)
785            for c in clusters_added:
786                for b in s.clusters[c].bindings:
787                    dump_binding(s, c, b, True)
788        else:
789            for c in s.clusters:
790                for b in s.clusters[c].bindings:
791                    dump_binding(s, c, b)
792
793        self._dump("</table>")
794
795    def visit_store_in_state(self, s, prev_s=None):
796        st = s.store
797        prev_st = prev_s.store if prev_s is not None else None
798        if st is None and prev_st is None:
799            return
800
801        self._dump('<hr /><tr><td align="left"><b>Store: </b>')
802        if st is None:
803            self._dump("<i> Nothing!</i>")
804        else:
805            if self._dark_mode:
806                self._dump(' <font color="gray30">(%s)</font>' % st.ptr)
807            else:
808                self._dump(' <font color="gray">(%s)</font>' % st.ptr)
809            if prev_st is not None:
810                if s.store.is_different(prev_st):
811                    self._dump('</td></tr><tr><td align="left">')
812                    self.visit_store(st, prev_st)
813                else:
814                    self._dump("<i> No changes!</i>")
815            else:
816                self._dump('</td></tr><tr><td align="left">')
817                self.visit_store(st)
818        self._dump("</td></tr>")
819
820    def visit_generic_map(self, m, prev_m=None):
821        self._dump('<table border="0">')
822
823        def dump_pair(m, k, is_added=None):
824            self._dump(
825                "<tr><td>%s</td>"
826                '<td align="left">%s</td>'
827                '<td align="left">%s</td></tr>'
828                % (self._diff_plus_minus(is_added), k, m.generic_map[k])
829            )
830
831        if prev_m is not None:
832            removed, added = m.diff(prev_m)
833            for k in removed:
834                dump_pair(prev_m, k, False)
835            for k in added:
836                dump_pair(m, k, True)
837        else:
838            for k in m.generic_map:
839                dump_pair(m, k, None)
840
841        self._dump("</table>")
842
843    def visit_generic_map_in_state(self, selector, title, s, prev_s=None):
844        m = getattr(s, selector)
845        prev_m = getattr(prev_s, selector) if prev_s is not None else None
846        if m is None and prev_m is None:
847            return
848
849        self._dump("<hr />")
850        self._dump('<tr><td align="left">' "<b>%s: </b>" % title)
851        if m is None:
852            self._dump("<i> Nothing!</i>")
853        else:
854            if prev_m is not None:
855                if m.is_different(prev_m):
856                    self._dump('</td></tr><tr><td align="left">')
857                    self.visit_generic_map(m, prev_m)
858                else:
859                    self._dump("<i> No changes!</i>")
860            else:
861                self._dump('</td></tr><tr><td align="left">')
862                self.visit_generic_map(m)
863
864        self._dump("</td></tr>")
865
866    def visit_checker_messages(self, m, prev_m=None):
867        self._dump('<table border="0">')
868
869        def dump_line(l, is_added=None):
870            self._dump(
871                "<tr><td>%s</td>"
872                '<td align="left">%s</td></tr>' % (self._diff_plus_minus(is_added), l)
873            )
874
875        def dump_chk(chk, is_added=None):
876            dump_line("<i>%s</i>:" % chk, is_added)
877
878        if prev_m is not None:
879            removed, added, updated = m.diff_messages(prev_m)
880            for chk in removed:
881                dump_chk(chk, False)
882                for l in prev_m.items[chk].lines:
883                    dump_line(l, False)
884            for chk in updated:
885                dump_chk(chk)
886                for l in m.items[chk].diff_lines(prev_m.items[chk]):
887                    dump_line(l[1:], l.startswith("+"))
888            for chk in added:
889                dump_chk(chk, True)
890                for l in m.items[chk].lines:
891                    dump_line(l, True)
892        else:
893            for chk in m.items:
894                dump_chk(chk)
895                for l in m.items[chk].lines:
896                    dump_line(l)
897
898        self._dump("</table>")
899
900    def visit_checker_messages_in_state(self, s, prev_s=None):
901        m = s.checker_messages
902        prev_m = prev_s.checker_messages if prev_s is not None else None
903        if m is None and prev_m is None:
904            return
905
906        self._dump("<hr />")
907        self._dump('<tr><td align="left">' "<b>Checker State: </b>")
908        if m is None:
909            self._dump("<i> Nothing!</i>")
910        else:
911            if prev_m is not None:
912                if m.is_different(prev_m):
913                    self._dump('</td></tr><tr><td align="left">')
914                    self.visit_checker_messages(m, prev_m)
915                else:
916                    self._dump("<i> No changes!</i>")
917            else:
918                self._dump('</td></tr><tr><td align="left">')
919                self.visit_checker_messages(m)
920
921        self._dump("</td></tr>")
922
923    def visit_state(self, s, prev_s):
924        self.visit_store_in_state(s, prev_s)
925        self.visit_environment_in_state("environment", "Expressions", s, prev_s)
926        self.visit_generic_map_in_state("constraints", "Ranges", s, prev_s)
927        self.visit_generic_map_in_state("dynamic_types", "Dynamic Types", s, prev_s)
928        self.visit_environment_in_state(
929            "constructing_objects", "Objects Under Construction", s, prev_s
930        )
931        self.visit_environment_in_state(
932            "index_of_element", "Indices Of Elements Under Construction", s, prev_s
933        )
934        self.visit_environment_in_state(
935            "pending_init_loops", "Pending Array Init Loop Expressions", s, prev_s
936        )
937        self.visit_environment_in_state(
938            "pending_destructors", "Indices of Elements Under Destruction", s, prev_s
939        )
940        self.visit_checker_messages_in_state(s, prev_s)
941
942    def visit_node(self, node):
943        self._dump("%s [shape=record," % (node.node_name()))
944        if self._dark_mode:
945            self._dump('color="white",fontcolor="gray80",')
946        self._dump('label=<<table border="0">')
947
948        self._dump(
949            '<tr><td bgcolor="%s"><b>State %s</b></td></tr>'
950            % (
951                "gray20" if self._dark_mode else "gray70",
952                node.state.state_id if node.state is not None else "Unspecified",
953            )
954        )
955        if not self._topo_mode:
956            self._dump('<tr><td align="left" width="0">')
957            if len(node.points) > 1:
958                self._dump("<b>Program points:</b></td></tr>")
959            else:
960                self._dump("<b>Program point:</b></td></tr>")
961        self._dump(
962            '<tr><td align="left" width="0">'
963            '<table border="0" align="left" width="0">'
964        )
965        for p in node.points:
966            self.visit_program_point(p)
967        self._dump("</table></td></tr>")
968
969        if node.state is not None and not self._topo_mode:
970            prev_s = None
971            # Do diffs only when we have a unique predecessor.
972            # Don't do diffs on the leaf nodes because they're
973            # the important ones.
974            if (
975                self._do_diffs
976                and len(node.predecessors) == 1
977                and len(node.successors) > 0
978            ):
979                prev_s = self._graph.nodes[node.predecessors[0]].state
980            self.visit_state(node.state, prev_s)
981        self._dump_raw("</table>>];\n")
982
983    def visit_edge(self, pred, succ):
984        self._dump_raw(
985            "%s -> %s%s;\n"
986            % (
987                pred.node_name(),
988                succ.node_name(),
989                ' [color="white"]' if self._dark_mode else "",
990            )
991        )
992
993    def visit_end_of_graph(self):
994        self._dump_raw("}\n")
995
996        if not self._dump_dot_only:
997            import sys
998            import tempfile
999
1000            def write_temp_file(suffix, prefix, data):
1001                fd, filename = tempfile.mkstemp(suffix, prefix, ".", True)
1002                print('Writing "%s"...' % filename)
1003                with os.fdopen(fd, "w") as fp:
1004                    fp.write(data)
1005                print("Done! Please remember to remove the file.")
1006                return filename
1007
1008            try:
1009                import graphviz
1010            except ImportError:
1011                # The fallback behavior if graphviz is not installed!
1012                print("Python graphviz not found. Please invoke")
1013                print("  $ pip install graphviz")
1014                print("in order to enable automatic conversion to HTML.")
1015                print()
1016                print("You may also convert DOT to SVG manually via")
1017                print("  $ dot -Tsvg input.dot -o output.svg")
1018                print()
1019                write_temp_file(".dot", "egraph-", self.output())
1020                return
1021
1022            svg = graphviz.pipe("dot", "svg", self.output().encode()).decode()
1023
1024            filename = write_temp_file(
1025                ".html",
1026                "egraph-",
1027                '<html><body bgcolor="%s">%s</body></html>'
1028                % ("#1a1a1a" if self._dark_mode else "white", svg),
1029            )
1030            if self._dump_html_only:
1031                return
1032            if sys.platform == "win32":
1033                os.startfile(filename)
1034            elif sys.platform == "darwin":
1035                os.system('open "%s"' % filename)
1036            else:
1037                os.system('xdg-open "%s"' % filename)
1038
1039
1040# ===-----------------------------------------------------------------------===#
1041# Explorers know how to traverse the ExplodedGraph in a certain order.
1042# They would invoke a Visitor on every node or edge they encounter.
1043# ===-----------------------------------------------------------------------===#
1044
1045
1046# BasicExplorer explores the whole graph in no particular order.
1047class BasicExplorer:
1048    def explore(self, graph, visitor):
1049        visitor.visit_begin_graph(graph)
1050        for node in sorted(graph.nodes):
1051            logging.debug("Visiting " + node)
1052            visitor.visit_node(graph.nodes[node])
1053            for succ in sorted(graph.nodes[node].successors):
1054                logging.debug("Visiting edge: %s -> %s " % (node, succ))
1055                visitor.visit_edge(graph.nodes[node], graph.nodes[succ])
1056        visitor.visit_end_of_graph()
1057
1058
1059# ===-----------------------------------------------------------------------===#
1060# Trimmers cut out parts of the ExplodedGraph so that to focus on other parts.
1061# Trimmers can be combined together by applying them sequentially.
1062# ===-----------------------------------------------------------------------===#
1063
1064
1065# SinglePathTrimmer keeps only a single path - the leftmost path from the root.
1066# Useful when the trimmed graph is still too large.
1067class SinglePathTrimmer:
1068    def trim(self, graph):
1069        visited_nodes = set()
1070        node_id = graph.root_id
1071        while True:
1072            visited_nodes.add(node_id)
1073            node = graph.nodes[node_id]
1074            if len(node.successors) > 0:
1075                succ_id = node.successors[0]
1076                succ = graph.nodes[succ_id]
1077                node.successors = [succ_id]
1078                succ.predecessors = [node_id]
1079                if succ_id in visited_nodes:
1080                    break
1081                node_id = succ_id
1082            else:
1083                break
1084        graph.nodes = {node_id: graph.nodes[node_id] for node_id in visited_nodes}
1085
1086
1087# TargetedTrimmer keeps paths that lead to specific nodes and discards all
1088# other paths. Useful when you cannot use -trim-egraph (e.g. when debugging
1089# a crash).
1090class TargetedTrimmer:
1091    def __init__(self, target_nodes):
1092        self._target_nodes = target_nodes
1093
1094    @staticmethod
1095    def parse_target_node(node, graph):
1096        if node.startswith("0x"):
1097            ret = "Node" + node
1098            assert ret in graph.nodes
1099            return ret
1100        else:
1101            for other_id in graph.nodes:
1102                other = graph.nodes[other_id]
1103                if other.node_id == int(node):
1104                    return other_id
1105
1106    @staticmethod
1107    def parse_target_nodes(target_nodes, graph):
1108        return [
1109            TargetedTrimmer.parse_target_node(node, graph)
1110            for node in target_nodes.split(",")
1111        ]
1112
1113    def trim(self, graph):
1114        queue = self._target_nodes
1115        visited_nodes = set()
1116
1117        while len(queue) > 0:
1118            node_id = queue.pop()
1119            visited_nodes.add(node_id)
1120            node = graph.nodes[node_id]
1121            for pred_id in node.predecessors:
1122                if pred_id not in visited_nodes:
1123                    queue.append(pred_id)
1124        graph.nodes = {node_id: graph.nodes[node_id] for node_id in visited_nodes}
1125        for node_id in graph.nodes:
1126            node = graph.nodes[node_id]
1127            node.successors = [
1128                succ_id for succ_id in node.successors if succ_id in visited_nodes
1129            ]
1130            node.predecessors = [
1131                succ_id for succ_id in node.predecessors if succ_id in visited_nodes
1132            ]
1133
1134
1135# ===-----------------------------------------------------------------------===#
1136# The entry point to the script.
1137# ===-----------------------------------------------------------------------===#
1138
1139
1140def main():
1141    parser = argparse.ArgumentParser(
1142        description="Display and manipulate Exploded Graph dumps."
1143    )
1144    parser.add_argument(
1145        "filename", type=str, help="the .dot file produced by the Static Analyzer"
1146    )
1147    parser.add_argument(
1148        "-v",
1149        "--verbose",
1150        action="store_const",
1151        dest="loglevel",
1152        const=logging.DEBUG,
1153        default=logging.WARNING,
1154        help="enable info prints",
1155    )
1156    parser.add_argument(
1157        "-d",
1158        "--diff",
1159        action="store_const",
1160        dest="diff",
1161        const=True,
1162        default=False,
1163        help="display differences between states",
1164    )
1165    parser.add_argument(
1166        "-t",
1167        "--topology",
1168        action="store_const",
1169        dest="topology",
1170        const=True,
1171        default=False,
1172        help="only display program points, omit states",
1173    )
1174    parser.add_argument(
1175        "-s",
1176        "--single-path",
1177        action="store_const",
1178        dest="single_path",
1179        const=True,
1180        default=False,
1181        help="only display the leftmost path in the graph "
1182        "(useful for trimmed graphs that still "
1183        "branch too much)",
1184    )
1185    parser.add_argument(
1186        "--to",
1187        type=str,
1188        default=None,
1189        help="only display execution paths from the root "
1190        "to the given comma-separated list of nodes "
1191        "identified by a pointer or a stable ID; "
1192        "compatible with --single-path",
1193    )
1194    parser.add_argument(
1195        "--dark",
1196        action="store_const",
1197        dest="dark",
1198        const=True,
1199        default=False,
1200        help="dark mode",
1201    )
1202    parser.add_argument(
1203        "--gray",
1204        action="store_const",
1205        dest="gray",
1206        const=True,
1207        default=False,
1208        help="black-and-white mode",
1209    )
1210    dump_conflict = parser.add_mutually_exclusive_group()
1211    dump_conflict.add_argument(
1212        "--dump-html-only",
1213        action="store_const",
1214        dest="dump_html_only",
1215        const=True,
1216        default=False,
1217        help="dump the rewritten egraph to a temporary HTML file, "
1218        "but do not open it immediately as by default",
1219    )
1220    dump_conflict.add_argument(
1221        "--dump-dot-only",
1222        action="store_const",
1223        dest="dump_dot_only",
1224        const=True,
1225        default=False,
1226        help="instead of writing an HTML file and immediately "
1227        "displaying it, dump the rewritten dot file "
1228        "to stdout",
1229    )
1230    args = parser.parse_args()
1231    logging.basicConfig(level=args.loglevel)
1232
1233    graph = ExplodedGraph()
1234    with open(args.filename) as fd:
1235        for raw_line in fd:
1236            raw_line = raw_line.strip()
1237            graph.add_raw_line(raw_line)
1238
1239    trimmers = []
1240    if args.to is not None:
1241        trimmers.append(
1242            TargetedTrimmer(TargetedTrimmer.parse_target_nodes(args.to, graph))
1243        )
1244    if args.single_path:
1245        trimmers.append(SinglePathTrimmer())
1246
1247    explorer = BasicExplorer()
1248
1249    visitor = DotDumpVisitor(
1250        args.diff,
1251        args.dark,
1252        args.gray,
1253        args.topology,
1254        args.dump_html_only,
1255        args.dump_dot_only,
1256    )
1257
1258    for trimmer in trimmers:
1259        trimmer.trim(graph)
1260
1261    explorer.explore(graph, visitor)
1262
1263
1264if __name__ == "__main__":
1265    main()
1266