xref: /llvm-project/clang/utils/analyzer/exploded-graph-rewriter.py (revision 9cbf2dd6f3900045f1bbbdf44142f572d8f3b339)
1#!/usr/bin/env python
2#
3# ===- exploded-graph-rewriter.py - ExplodedGraph dump tool -----*- python -*--#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9# ===-----------------------------------------------------------------------===#
10
11
12from __future__ import print_function
13
14import argparse
15import collections
16import difflib
17import json
18import logging
19import os
20import re
21
22
23# ===-----------------------------------------------------------------------===#
24# These data structures represent a deserialized ExplodedGraph.
25# ===-----------------------------------------------------------------------===#
26
27
28# A helper function for finding the difference between two dictionaries.
29def diff_dicts(curr, prev):
30    removed = [k for k in prev if k not in curr or curr[k] != prev[k]]
31    added = [k for k in curr if k not in prev or curr[k] != prev[k]]
32    return (removed, added)
33
34
35# Represents any program state trait that is a dictionary of key-value pairs.
36class GenericMap:
37    def __init__(self, items):
38        self.generic_map = collections.OrderedDict(items)
39
40    def diff(self, prev):
41        return diff_dicts(self.generic_map, prev.generic_map)
42
43    def is_different(self, prev):
44        removed, added = self.diff(prev)
45        return len(removed) != 0 or len(added) != 0
46
47
48# A deserialized source location.
49class SourceLocation:
50    def __init__(self, json_loc):
51        logging.debug("json: %s" % json_loc)
52        self.line = json_loc["line"]
53        self.col = json_loc["column"]
54        self.filename = (
55            os.path.basename(json_loc["file"]) if "file" in json_loc else "(main file)"
56        )
57        self.spelling = (
58            SourceLocation(json_loc["spelling"]) if "spelling" in json_loc else None
59        )
60
61    def is_macro(self):
62        return self.spelling is not None
63
64
65# A deserialized program point.
66class ProgramPoint:
67    def __init__(self, json_pp):
68        self.kind = json_pp["kind"]
69        self.tag = json_pp["tag"]
70        self.node_id = json_pp["node_id"]
71        self.is_sink = bool(json_pp["is_sink"])
72        self.has_report = bool(json_pp["has_report"])
73        if self.kind == "Edge":
74            self.src_id = json_pp["src_id"]
75            self.dst_id = json_pp["dst_id"]
76        elif self.kind == "Statement":
77            logging.debug(json_pp)
78            self.stmt_kind = json_pp["stmt_kind"]
79            self.cast_kind = json_pp["cast_kind"] if "cast_kind" in json_pp else None
80            self.stmt_point_kind = json_pp["stmt_point_kind"]
81            self.stmt_id = json_pp["stmt_id"]
82            self.pointer = json_pp["pointer"]
83            self.pretty = json_pp["pretty"]
84            self.loc = (
85                SourceLocation(json_pp["location"])
86                if json_pp["location"] is not None
87                else None
88            )
89        elif self.kind == "CallEnter":
90            self.callee_decl = json_pp.get("callee_decl", "None")
91        elif self.kind == "BlockEntrance":
92            self.block_id = json_pp["block_id"]
93
94
95# A single expression acting as a key in a deserialized Environment.
96class EnvironmentBindingKey:
97    def __init__(self, json_ek):
98        # CXXCtorInitializer is not a Stmt!
99        self.stmt_id = (
100            json_ek["stmt_id"] if "stmt_id" in json_ek else json_ek["init_id"]
101        )
102        self.pretty = json_ek["pretty"]
103        self.kind = json_ek["kind"] if "kind" in json_ek else None
104
105    def _key(self):
106        return self.stmt_id
107
108    def __eq__(self, other):
109        return self._key() == other._key()
110
111    def __hash__(self):
112        return hash(self._key())
113
114
115# Deserialized description of a location context.
116class LocationContext:
117    def __init__(self, json_frame):
118        self.lctx_id = json_frame["lctx_id"]
119        self.caption = json_frame["location_context"]
120        self.decl = json_frame["calling"]
121        self.loc = (
122            SourceLocation(json_frame["location"])
123            if json_frame["location"] is not None
124            else None
125        )
126
127    def _key(self):
128        return self.lctx_id
129
130    def __eq__(self, other):
131        return self._key() == other._key()
132
133    def __hash__(self):
134        return hash(self._key())
135
136
137# A group of deserialized Environment bindings that correspond to a specific
138# location context.
139class EnvironmentFrame:
140    def __init__(self, json_frame):
141        self.location_context = LocationContext(json_frame)
142        self.bindings = collections.OrderedDict(
143            [(EnvironmentBindingKey(b), b["value"]) for b in json_frame["items"]]
144            if json_frame["items"] is not None
145            else []
146        )
147
148    def diff_bindings(self, prev):
149        return diff_dicts(self.bindings, prev.bindings)
150
151    def is_different(self, prev):
152        removed, added = self.diff_bindings(prev)
153        return len(removed) != 0 or len(added) != 0
154
155
156# A deserialized Environment. This class can also hold other entities that
157# are similar to Environment, such as Objects Under Construction or
158# Indices Of Elements Under Construction.
159class GenericEnvironment:
160    def __init__(self, json_e):
161        self.frames = [EnvironmentFrame(f) for f in json_e]
162
163    def diff_frames(self, prev):
164        # TODO: It's difficult to display a good diff when frame numbers shift.
165        if len(self.frames) != len(prev.frames):
166            return None
167
168        updated = []
169        for i in range(len(self.frames)):
170            f = self.frames[i]
171            prev_f = prev.frames[i]
172            if f.location_context == prev_f.location_context:
173                if f.is_different(prev_f):
174                    updated.append(i)
175            else:
176                # We have the whole frame replaced with another frame.
177                # TODO: Produce a nice diff.
178                return None
179
180        # TODO: Add support for added/removed.
181        return updated
182
183    def is_different(self, prev):
184        updated = self.diff_frames(prev)
185        return updated is None or len(updated) > 0
186
187
188# A single binding key in a deserialized RegionStore cluster.
189class StoreBindingKey:
190    def __init__(self, json_sk):
191        self.kind = json_sk["kind"]
192        self.offset = json_sk["offset"]
193
194    def _key(self):
195        return (self.kind, self.offset)
196
197    def __eq__(self, other):
198        return self._key() == other._key()
199
200    def __hash__(self):
201        return hash(self._key())
202
203
204# A single cluster of the deserialized RegionStore.
205class StoreCluster:
206    def __init__(self, json_sc):
207        self.base_region = json_sc["cluster"]
208        self.bindings = collections.OrderedDict(
209            [(StoreBindingKey(b), b["value"]) for b in json_sc["items"]]
210        )
211
212    def diff_bindings(self, prev):
213        return diff_dicts(self.bindings, prev.bindings)
214
215    def is_different(self, prev):
216        removed, added = self.diff_bindings(prev)
217        return len(removed) != 0 or len(added) != 0
218
219
220# A deserialized RegionStore.
221class Store:
222    def __init__(self, json_s):
223        self.ptr = json_s["pointer"]
224        self.clusters = collections.OrderedDict(
225            [(c["pointer"], StoreCluster(c)) for c in json_s["items"]]
226        )
227
228    def diff_clusters(self, prev):
229        removed = [k for k in prev.clusters if k not in self.clusters]
230        added = [k for k in self.clusters if k not in prev.clusters]
231        updated = [
232            k
233            for k in prev.clusters
234            if k in self.clusters and prev.clusters[k].is_different(self.clusters[k])
235        ]
236        return (removed, added, updated)
237
238    def is_different(self, prev):
239        removed, added, updated = self.diff_clusters(prev)
240        return len(removed) != 0 or len(added) != 0 or len(updated) != 0
241
242
243# Deserialized messages from a single checker in a single program state.
244# Basically a list of raw strings.
245class CheckerLines:
246    def __init__(self, json_lines):
247        self.lines = json_lines
248
249    def diff_lines(self, prev):
250        lines = difflib.ndiff(prev.lines, self.lines)
251        return [l.strip() for l in lines if l.startswith("+") or l.startswith("-")]
252
253    def is_different(self, prev):
254        return len(self.diff_lines(prev)) > 0
255
256
257# Deserialized messages of all checkers, separated by checker.
258class CheckerMessages:
259    def __init__(self, json_m):
260        self.items = collections.OrderedDict(
261            [(m["checker"], CheckerLines(m["messages"])) for m in json_m]
262        )
263
264    def diff_messages(self, prev):
265        removed = [k for k in prev.items if k not in self.items]
266        added = [k for k in self.items if k not in prev.items]
267        updated = [
268            k
269            for k in prev.items
270            if k in self.items and prev.items[k].is_different(self.items[k])
271        ]
272        return (removed, added, updated)
273
274    def is_different(self, prev):
275        removed, added, updated = self.diff_messages(prev)
276        return len(removed) != 0 or len(added) != 0 or len(updated) != 0
277
278
279# A deserialized program state.
280class ProgramState:
281    def __init__(self, state_id, json_ps):
282        logging.debug("Adding ProgramState " + str(state_id))
283
284        store_key = "store"
285        env_key = "environment"
286        constraints_key = "constraints"
287        dyn_ty_key = "dynamic_types"
288        ctor_key = "constructing_objects"
289        ind_key = "index_of_element"
290        init_loop_key = "pending_init_loops"
291        dtor_key = "pending_destructors"
292        msg_key = "checker_messages"
293
294        if json_ps is None:
295            json_ps = {
296                store_key: None,
297                env_key: None,
298                constraints_key: None,
299                dyn_ty_key: None,
300                ctor_key: None,
301                ind_key: None,
302                init_loop_key: None,
303                dtor_key: None,
304                msg_key: None,
305            }
306
307        self.state_id = state_id
308
309        self.store = (
310            Store(json_ps[store_key]) if json_ps[store_key] is not None else None
311        )
312
313        self.environment = (
314            GenericEnvironment(json_ps[env_key]["items"])
315            if json_ps[env_key] is not None
316            else None
317        )
318
319        self.constraints = (
320            GenericMap([(c["symbol"], c["range"]) for c in json_ps[constraints_key]])
321            if json_ps[constraints_key] is not None
322            else None
323        )
324
325        self.dynamic_types = (
326            GenericMap(
327                [
328                    (
329                        t["region"],
330                        "%s%s"
331                        % (
332                            t["dyn_type"],
333                            " (or a sub-class)" if t["sub_classable"] else "",
334                        ),
335                    )
336                    for t in json_ps[dyn_ty_key]
337                ]
338            )
339            if json_ps[dyn_ty_key] is not None
340            else None
341        )
342
343        self.checker_messages = (
344            CheckerMessages(json_ps[msg_key]) if json_ps[msg_key] is not None else None
345        )
346
347        # State traits
348        #
349        # For traits we always check if a key exists because if a trait
350        # has no imformation, nothing will be printed in the .dot file
351        # we parse.
352
353        self.constructing_objects = (
354            GenericEnvironment(json_ps[ctor_key])
355            if ctor_key in json_ps and json_ps[ctor_key] is not None
356            else None
357        )
358
359        self.index_of_element = (
360            GenericEnvironment(json_ps[ind_key])
361            if ind_key in json_ps and json_ps[ind_key] is not None
362            else None
363        )
364
365        self.pending_init_loops = (
366            GenericEnvironment(json_ps[init_loop_key])
367            if init_loop_key in json_ps and json_ps[init_loop_key] is not None
368            else None
369        )
370
371        self.pending_destructors = (
372            GenericEnvironment(json_ps[dtor_key])
373            if dtor_key in json_ps and json_ps[dtor_key] is not None
374            else None
375        )
376
377
378# A deserialized exploded graph node. Has a default constructor because it
379# may be referenced as part of an edge before its contents are deserialized,
380# and in this moment we already need a room for predecessors and successors.
381class ExplodedNode:
382    def __init__(self):
383        self.predecessors = []
384        self.successors = []
385
386    def construct(self, node_id, json_node):
387        logging.debug("Adding " + node_id)
388        self.ptr = node_id[4:]
389        self.points = [ProgramPoint(p) for p in json_node["program_points"]]
390        self.node_id = self.points[-1].node_id
391        self.state = ProgramState(
392            json_node["state_id"],
393            json_node["program_state"]
394            if json_node["program_state"] is not None
395            else None,
396        )
397
398        assert self.node_name() == node_id
399
400    def node_name(self):
401        return "Node" + self.ptr
402
403
404# A deserialized ExplodedGraph. Constructed by consuming a .dot file
405# line-by-line.
406class ExplodedGraph:
407    # Parse .dot files with regular expressions.
408    node_re = re.compile(
409        '^(Node0x[0-9a-f]*) \\[shape=record,.*label="{(.*)\\\\l}"\\];$'
410    )
411    edge_re = re.compile("^(Node0x[0-9a-f]*) -> (Node0x[0-9a-f]*);$")
412
413    def __init__(self):
414        self.nodes = collections.defaultdict(ExplodedNode)
415        self.root_id = None
416        self.incomplete_line = ""
417
418    def add_raw_line(self, raw_line):
419        if raw_line.startswith("//"):
420            return
421
422        # Allow line breaks by waiting for ';'. This is not valid in
423        # a .dot file, but it is useful for writing tests.
424        if len(raw_line) > 0 and raw_line[-1] != ";":
425            self.incomplete_line += raw_line
426            return
427        raw_line = self.incomplete_line + raw_line
428        self.incomplete_line = ""
429
430        # Apply regexps one by one to see if it's a node or an edge
431        # and extract contents if necessary.
432        logging.debug("Line: " + raw_line)
433        result = self.edge_re.match(raw_line)
434        if result is not None:
435            logging.debug("Classified as edge line.")
436            pred = result.group(1)
437            succ = result.group(2)
438            self.nodes[pred].successors.append(succ)
439            self.nodes[succ].predecessors.append(pred)
440            return
441        result = self.node_re.match(raw_line)
442        if result is not None:
443            logging.debug("Classified as node line.")
444            node_id = result.group(1)
445            if len(self.nodes) == 0:
446                self.root_id = node_id
447            # Note: when writing tests you don't need to escape everything,
448            # even though in a valid dot file everything is escaped.
449            node_label = (
450                result.group(2)
451                .replace(" ", "")
452                .replace('\\"', '"')
453                .replace("\\{", "{")
454                .replace("\\}", "}")
455                .replace("\\\\", "\\")
456                .replace("\\|", "|")
457                .replace("\\<", "\\\\<")
458                .replace("\\>", "\\\\>")
459                .rstrip(",")
460            )
461            # Handle `\l` separately because a string literal can be in code
462            # like "string\\literal" with the `\l` inside.
463            # Also on Windows macros __FILE__ produces specific delimiters `\`
464            # and a directory or file may starts with the letter `l`.
465            # Find all `\l` (like `,\l`, `}\l`, `[\l`) except `\\l`,
466            # because the literal as a rule contains multiple `\` before `\l`.
467            node_label = re.sub(r"(?<!\\)\\l", "", node_label)
468            logging.debug(node_label)
469            json_node = json.loads(node_label)
470            self.nodes[node_id].construct(node_id, json_node)
471            return
472        logging.debug("Skipping.")
473
474
475# ===-----------------------------------------------------------------------===#
476# Visitors traverse a deserialized ExplodedGraph and do different things
477# with every node and edge.
478# ===-----------------------------------------------------------------------===#
479
480
481# A visitor that dumps the ExplodedGraph into a DOT file with fancy HTML-based
482# syntax highlighing.
483class DotDumpVisitor:
484    def __init__(
485        self, do_diffs, dark_mode, gray_mode, topo_mode, dump_html_only, dump_dot_only
486    ):
487        assert not (dump_html_only and dump_dot_only), (
488            "Option dump_html_only and dump_dot_only are conflict, "
489            "they cannot be true at the same time."
490        )
491
492        self._do_diffs = do_diffs
493        self._dark_mode = dark_mode
494        self._gray_mode = gray_mode
495        self._topo_mode = topo_mode
496        self._dump_html_only = dump_html_only
497        self._dump_dot_only = dump_dot_only
498        self._output = []
499
500    def _dump_raw(self, s):
501        if self._dump_dot_only:
502            print(s, end="")
503        else:
504            self._output.append(s)
505
506    def output(self):
507        assert not self._dump_dot_only
508        return "".join(self._output)
509
510    def _dump(self, s):
511        s = (
512            s.replace("&", "&amp;")
513            .replace("{", "\\{")
514            .replace("}", "\\}")
515            .replace("\\<", "&lt;")
516            .replace("\\>", "&gt;")
517            .replace("|", "\\|")
518        )
519        s = re.sub(r"(?<!\\)\\l", "<br />", s)
520        if self._gray_mode:
521            s = re.sub(r'<font color="[a-z0-9]*">', "", s)
522            s = re.sub(r"</font>", "", s)
523        self._dump_raw(s)
524
525    @staticmethod
526    def _diff_plus_minus(is_added):
527        if is_added is None:
528            return ""
529        if is_added:
530            return '<font color="forestgreen">+</font>'
531        return '<font color="red">-</font>'
532
533    @staticmethod
534    def _short_pretty(s):
535        if s is None:
536            return None
537        if len(s) < 20:
538            return s
539        left = s.find("{")
540        right = s.rfind("}")
541        if left == -1 or right == -1 or left >= right:
542            return s
543        candidate = s[0 : left + 1] + " ... " + s[right:]
544        if len(candidate) >= len(s):
545            return s
546        return candidate
547
548    @staticmethod
549    def _make_sloc(loc):
550        if loc is None:
551            return "<i>Invalid Source Location</i>"
552
553        def make_plain_loc(loc):
554            return "%s:<b>%s</b>:<b>%s</b>" % (loc.filename, loc.line, loc.col)
555
556        if loc.is_macro():
557            return '%s <font color="royalblue1">' "(<i>spelling at </i> %s)</font>" % (
558                make_plain_loc(loc),
559                make_plain_loc(loc.spelling),
560            )
561
562        return make_plain_loc(loc)
563
564    def visit_begin_graph(self, graph):
565        self._graph = graph
566        self._dump_raw('digraph "ExplodedGraph" {\n')
567        if self._dark_mode:
568            self._dump_raw('bgcolor="gray10";\n')
569        self._dump_raw('label="";\n')
570
571    def visit_program_point(self, p):
572        if p.kind in ["Edge", "BlockEntrance", "BlockExit"]:
573            color = "gold3"
574        elif p.kind in ["PreStmtPurgeDeadSymbols", "PostStmtPurgeDeadSymbols"]:
575            color = "red"
576        elif p.kind in ["CallEnter", "CallExitBegin", "CallExitEnd"]:
577            color = "dodgerblue" if self._dark_mode else "blue"
578        elif p.kind in ["Statement"]:
579            color = "cyan4"
580        else:
581            color = "forestgreen"
582
583        self._dump('<tr><td align="left">%s.</td>' % p.node_id)
584
585        if p.kind == "Statement":
586            # This avoids pretty-printing huge statements such as CompoundStmt.
587            # Such statements show up only at [Pre|Post]StmtPurgeDeadSymbols
588            skip_pretty = "PurgeDeadSymbols" in p.stmt_point_kind
589            stmt_color = "cyan3"
590            self._dump(
591                '<td align="left" width="0">%s:</td>'
592                '<td align="left" width="0"><font color="%s">'
593                "%s</font> </td>"
594                '<td align="left"><i>S%s</i></td>'
595                '<td align="left"><font color="%s">%s</font></td>'
596                '<td align="left">%s</td></tr>'
597                % (
598                    self._make_sloc(p.loc),
599                    color,
600                    "%s (%s)" % (p.stmt_kind, p.cast_kind)
601                    if p.cast_kind is not None
602                    else p.stmt_kind,
603                    p.stmt_id,
604                    stmt_color,
605                    p.stmt_point_kind,
606                    self._short_pretty(p.pretty) if not skip_pretty else "",
607                )
608            )
609        elif p.kind == "Edge":
610            self._dump(
611                '<td width="0"></td>'
612                '<td align="left" width="0">'
613                '<font color="%s">%s</font></td><td align="left">'
614                "[B%d] -\\> [B%d]</td></tr>" % (color, "BlockEdge", p.src_id, p.dst_id)
615            )
616        elif p.kind == "BlockEntrance":
617            self._dump(
618                '<td width="0"></td>'
619                '<td align="left" width="0">'
620                '<font color="%s">%s</font></td>'
621                '<td align="left">[B%d]</td></tr>' % (color, p.kind, p.block_id)
622            )
623        elif p.kind == "CallEnter":
624            self._dump(
625                '<td width="0"></td>'
626                '<td align="left" width="0">'
627                '<font color="%s">%s</font></td>'
628                '<td align="left">%s</td></tr>' % (color, p.kind, p.callee_decl)
629            )
630        else:
631            # TODO: Print more stuff for other kinds of points.
632            self._dump(
633                '<td width="0"></td>'
634                '<td align="left" width="0" colspan="2">'
635                '<font color="%s">%s</font></td></tr>' % (color, p.kind)
636            )
637
638        if p.tag is not None:
639            self._dump(
640                '<tr><td width="0"></td><td width="0"></td>'
641                '<td colspan="3" align="left">'
642                '<b>Tag: </b> <font color="crimson">'
643                "%s</font></td></tr>" % p.tag
644            )
645
646        if p.has_report:
647            self._dump(
648                '<tr><td width="0"></td><td width="0"></td>'
649                '<td colspan="3" align="left">'
650                '<font color="red"><b>Bug Report Attached'
651                "</b></font></td></tr>"
652            )
653        if p.is_sink:
654            self._dump(
655                '<tr><td width="0"></td><td width="0"></td>'
656                '<td colspan="3" align="left">'
657                '<font color="cornflowerblue"><b>Sink Node'
658                "</b></font></td></tr>"
659            )
660
661    def visit_environment(self, e, prev_e=None):
662        self._dump('<table border="0">')
663
664        def dump_location_context(lc, is_added=None):
665            self._dump(
666                "<tr><td>%s</td>"
667                '<td align="left"><b>%s</b></td>'
668                '<td align="left" colspan="2">'
669                '<font color="gray60">%s </font>'
670                "%s</td></tr>"
671                % (
672                    self._diff_plus_minus(is_added),
673                    lc.caption,
674                    lc.decl,
675                    ("(%s)" % self._make_sloc(lc.loc)) if lc.loc is not None else "",
676                )
677            )
678
679        def dump_binding(f, b, is_added=None):
680            self._dump(
681                "<tr><td>%s</td>"
682                '<td align="left"><i>S%s</i></td>'
683                "%s"
684                '<td align="left">%s</td>'
685                '<td align="left">%s</td></tr>'
686                % (
687                    self._diff_plus_minus(is_added),
688                    b.stmt_id,
689                    '<td align="left"><font color="%s"><i>'
690                    "%s</i></font></td>"
691                    % (
692                        "lavender" if self._dark_mode else "darkgreen",
693                        ("(%s)" % b.kind) if b.kind is not None else " ",
694                    ),
695                    self._short_pretty(b.pretty),
696                    f.bindings[b],
697                )
698            )
699
700        frames_updated = e.diff_frames(prev_e) if prev_e is not None else None
701        if frames_updated:
702            for i in frames_updated:
703                f = e.frames[i]
704                prev_f = prev_e.frames[i]
705                dump_location_context(f.location_context)
706                bindings_removed, bindings_added = f.diff_bindings(prev_f)
707                for b in bindings_removed:
708                    dump_binding(prev_f, b, False)
709                for b in bindings_added:
710                    dump_binding(f, b, True)
711        else:
712            for f in e.frames:
713                dump_location_context(f.location_context)
714                for b in f.bindings:
715                    dump_binding(f, b)
716
717        self._dump("</table>")
718
719    def visit_environment_in_state(self, selector, title, s, prev_s=None):
720        e = getattr(s, selector)
721        prev_e = getattr(prev_s, selector) if prev_s is not None else None
722        if e is None and prev_e is None:
723            return
724
725        self._dump('<hr /><tr><td align="left"><b>%s: </b>' % title)
726        if e is None:
727            self._dump("<i> Nothing!</i>")
728        else:
729            if prev_e is not None:
730                if e.is_different(prev_e):
731                    self._dump('</td></tr><tr><td align="left">')
732                    self.visit_environment(e, prev_e)
733                else:
734                    self._dump("<i> No changes!</i>")
735            else:
736                self._dump('</td></tr><tr><td align="left">')
737                self.visit_environment(e)
738
739        self._dump("</td></tr>")
740
741    def visit_store(self, s, prev_s=None):
742        self._dump('<table border="0">')
743
744        def dump_binding(s, c, b, is_added=None):
745            self._dump(
746                "<tr><td>%s</td>"
747                '<td align="left">%s</td>'
748                '<td align="left">%s</td>'
749                '<td align="left">%s</td>'
750                '<td align="left">%s</td></tr>'
751                % (
752                    self._diff_plus_minus(is_added),
753                    s.clusters[c].base_region,
754                    b.offset,
755                    "(<i>Default</i>)" if b.kind == "Default" else "",
756                    s.clusters[c].bindings[b],
757                )
758            )
759
760        if prev_s is not None:
761            clusters_removed, clusters_added, clusters_updated = s.diff_clusters(prev_s)
762            for c in clusters_removed:
763                for b in prev_s.clusters[c].bindings:
764                    dump_binding(prev_s, c, b, False)
765            for c in clusters_updated:
766                bindings_removed, bindings_added = s.clusters[c].diff_bindings(
767                    prev_s.clusters[c]
768                )
769                for b in bindings_removed:
770                    dump_binding(prev_s, c, b, False)
771                for b in bindings_added:
772                    dump_binding(s, c, b, True)
773            for c in clusters_added:
774                for b in s.clusters[c].bindings:
775                    dump_binding(s, c, b, True)
776        else:
777            for c in s.clusters:
778                for b in s.clusters[c].bindings:
779                    dump_binding(s, c, b)
780
781        self._dump("</table>")
782
783    def visit_store_in_state(self, s, prev_s=None):
784        st = s.store
785        prev_st = prev_s.store if prev_s is not None else None
786        if st is None and prev_st is None:
787            return
788
789        self._dump('<hr /><tr><td align="left"><b>Store: </b>')
790        if st is None:
791            self._dump("<i> Nothing!</i>")
792        else:
793            if self._dark_mode:
794                self._dump(' <font color="gray30">(%s)</font>' % st.ptr)
795            else:
796                self._dump(' <font color="gray">(%s)</font>' % st.ptr)
797            if prev_st is not None:
798                if s.store.is_different(prev_st):
799                    self._dump('</td></tr><tr><td align="left">')
800                    self.visit_store(st, prev_st)
801                else:
802                    self._dump("<i> No changes!</i>")
803            else:
804                self._dump('</td></tr><tr><td align="left">')
805                self.visit_store(st)
806        self._dump("</td></tr>")
807
808    def visit_generic_map(self, m, prev_m=None):
809        self._dump('<table border="0">')
810
811        def dump_pair(m, k, is_added=None):
812            self._dump(
813                "<tr><td>%s</td>"
814                '<td align="left">%s</td>'
815                '<td align="left">%s</td></tr>'
816                % (self._diff_plus_minus(is_added), k, m.generic_map[k])
817            )
818
819        if prev_m is not None:
820            removed, added = m.diff(prev_m)
821            for k in removed:
822                dump_pair(prev_m, k, False)
823            for k in added:
824                dump_pair(m, k, True)
825        else:
826            for k in m.generic_map:
827                dump_pair(m, k, None)
828
829        self._dump("</table>")
830
831    def visit_generic_map_in_state(self, selector, title, s, prev_s=None):
832        m = getattr(s, selector)
833        prev_m = getattr(prev_s, selector) if prev_s is not None else None
834        if m is None and prev_m is None:
835            return
836
837        self._dump("<hr />")
838        self._dump('<tr><td align="left">' "<b>%s: </b>" % title)
839        if m is None:
840            self._dump("<i> Nothing!</i>")
841        else:
842            if prev_m is not None:
843                if m.is_different(prev_m):
844                    self._dump('</td></tr><tr><td align="left">')
845                    self.visit_generic_map(m, prev_m)
846                else:
847                    self._dump("<i> No changes!</i>")
848            else:
849                self._dump('</td></tr><tr><td align="left">')
850                self.visit_generic_map(m)
851
852        self._dump("</td></tr>")
853
854    def visit_checker_messages(self, m, prev_m=None):
855        self._dump('<table border="0">')
856
857        def dump_line(l, is_added=None):
858            self._dump(
859                "<tr><td>%s</td>"
860                '<td align="left">%s</td></tr>' % (self._diff_plus_minus(is_added), l)
861            )
862
863        def dump_chk(chk, is_added=None):
864            dump_line("<i>%s</i>:" % chk, is_added)
865
866        if prev_m is not None:
867            removed, added, updated = m.diff_messages(prev_m)
868            for chk in removed:
869                dump_chk(chk, False)
870                for l in prev_m.items[chk].lines:
871                    dump_line(l, False)
872            for chk in updated:
873                dump_chk(chk)
874                for l in m.items[chk].diff_lines(prev_m.items[chk]):
875                    dump_line(l[1:], l.startswith("+"))
876            for chk in added:
877                dump_chk(chk, True)
878                for l in m.items[chk].lines:
879                    dump_line(l, True)
880        else:
881            for chk in m.items:
882                dump_chk(chk)
883                for l in m.items[chk].lines:
884                    dump_line(l)
885
886        self._dump("</table>")
887
888    def visit_checker_messages_in_state(self, s, prev_s=None):
889        m = s.checker_messages
890        prev_m = prev_s.checker_messages if prev_s is not None else None
891        if m is None and prev_m is None:
892            return
893
894        self._dump("<hr />")
895        self._dump('<tr><td align="left">' "<b>Checker State: </b>")
896        if m is None:
897            self._dump("<i> Nothing!</i>")
898        else:
899            if prev_m is not None:
900                if m.is_different(prev_m):
901                    self._dump('</td></tr><tr><td align="left">')
902                    self.visit_checker_messages(m, prev_m)
903                else:
904                    self._dump("<i> No changes!</i>")
905            else:
906                self._dump('</td></tr><tr><td align="left">')
907                self.visit_checker_messages(m)
908
909        self._dump("</td></tr>")
910
911    def visit_state(self, s, prev_s):
912        self.visit_store_in_state(s, prev_s)
913        self.visit_environment_in_state("environment", "Expressions", s, prev_s)
914        self.visit_generic_map_in_state("constraints", "Ranges", s, prev_s)
915        self.visit_generic_map_in_state("dynamic_types", "Dynamic Types", s, prev_s)
916        self.visit_environment_in_state(
917            "constructing_objects", "Objects Under Construction", s, prev_s
918        )
919        self.visit_environment_in_state(
920            "index_of_element", "Indices Of Elements Under Construction", s, prev_s
921        )
922        self.visit_environment_in_state(
923            "pending_init_loops", "Pending Array Init Loop Expressions", s, prev_s
924        )
925        self.visit_environment_in_state(
926            "pending_destructors", "Indices of Elements Under Destruction", s, prev_s
927        )
928        self.visit_checker_messages_in_state(s, prev_s)
929
930    def visit_node(self, node):
931        self._dump("%s [shape=record," % (node.node_name()))
932        if self._dark_mode:
933            self._dump('color="white",fontcolor="gray80",')
934        self._dump('label=<<table border="0">')
935
936        self._dump(
937            '<tr><td bgcolor="%s"><b>State %s</b></td></tr>'
938            % (
939                "gray20" if self._dark_mode else "gray70",
940                node.state.state_id if node.state is not None else "Unspecified",
941            )
942        )
943        if not self._topo_mode:
944            self._dump('<tr><td align="left" width="0">')
945            if len(node.points) > 1:
946                self._dump("<b>Program points:</b></td></tr>")
947            else:
948                self._dump("<b>Program point:</b></td></tr>")
949        self._dump(
950            '<tr><td align="left" width="0">'
951            '<table border="0" align="left" width="0">'
952        )
953        for p in node.points:
954            self.visit_program_point(p)
955        self._dump("</table></td></tr>")
956
957        if node.state is not None and not self._topo_mode:
958            prev_s = None
959            # Do diffs only when we have a unique predecessor.
960            # Don't do diffs on the leaf nodes because they're
961            # the important ones.
962            if (
963                self._do_diffs
964                and len(node.predecessors) == 1
965                and len(node.successors) > 0
966            ):
967                prev_s = self._graph.nodes[node.predecessors[0]].state
968            self.visit_state(node.state, prev_s)
969        self._dump_raw("</table>>];\n")
970
971    def visit_edge(self, pred, succ):
972        self._dump_raw(
973            "%s -> %s%s;\n"
974            % (
975                pred.node_name(),
976                succ.node_name(),
977                ' [color="white"]' if self._dark_mode else "",
978            )
979        )
980
981    def visit_end_of_graph(self):
982        self._dump_raw("}\n")
983
984        if not self._dump_dot_only:
985            import sys
986            import tempfile
987
988            def write_temp_file(suffix, prefix, data):
989                fd, filename = tempfile.mkstemp(suffix, prefix, ".", True)
990                print('Writing "%s"...' % filename)
991                with os.fdopen(fd, "w") as fp:
992                    fp.write(data)
993                print("Done! Please remember to remove the file.")
994                return filename
995
996            try:
997                import graphviz
998            except ImportError:
999                # The fallback behavior if graphviz is not installed!
1000                print("Python graphviz not found. Please invoke")
1001                print("  $ pip install graphviz")
1002                print("in order to enable automatic conversion to HTML.")
1003                print()
1004                print("You may also convert DOT to SVG manually via")
1005                print("  $ dot -Tsvg input.dot -o output.svg")
1006                print()
1007                write_temp_file(".dot", "egraph-", self.output())
1008                return
1009
1010            svg = graphviz.pipe("dot", "svg", self.output().encode()).decode()
1011
1012            filename = write_temp_file(
1013                ".html",
1014                "egraph-",
1015                '<html><body bgcolor="%s">%s</body></html>'
1016                % ("#1a1a1a" if self._dark_mode else "white", svg),
1017            )
1018            if self._dump_html_only:
1019                return
1020            if sys.platform == "win32":
1021                os.startfile(filename)
1022            elif sys.platform == "darwin":
1023                os.system('open "%s"' % filename)
1024            else:
1025                os.system('xdg-open "%s"' % filename)
1026
1027
1028# ===-----------------------------------------------------------------------===#
1029# Explorers know how to traverse the ExplodedGraph in a certain order.
1030# They would invoke a Visitor on every node or edge they encounter.
1031# ===-----------------------------------------------------------------------===#
1032
1033
1034# BasicExplorer explores the whole graph in no particular order.
1035class BasicExplorer:
1036    def explore(self, graph, visitor):
1037        visitor.visit_begin_graph(graph)
1038        for node in sorted(graph.nodes):
1039            logging.debug("Visiting " + node)
1040            visitor.visit_node(graph.nodes[node])
1041            for succ in sorted(graph.nodes[node].successors):
1042                logging.debug("Visiting edge: %s -> %s " % (node, succ))
1043                visitor.visit_edge(graph.nodes[node], graph.nodes[succ])
1044        visitor.visit_end_of_graph()
1045
1046
1047# ===-----------------------------------------------------------------------===#
1048# Trimmers cut out parts of the ExplodedGraph so that to focus on other parts.
1049# Trimmers can be combined together by applying them sequentially.
1050# ===-----------------------------------------------------------------------===#
1051
1052
1053# SinglePathTrimmer keeps only a single path - the leftmost path from the root.
1054# Useful when the trimmed graph is still too large.
1055class SinglePathTrimmer:
1056    def trim(self, graph):
1057        visited_nodes = set()
1058        node_id = graph.root_id
1059        while True:
1060            visited_nodes.add(node_id)
1061            node = graph.nodes[node_id]
1062            if len(node.successors) > 0:
1063                succ_id = node.successors[0]
1064                succ = graph.nodes[succ_id]
1065                node.successors = [succ_id]
1066                succ.predecessors = [node_id]
1067                if succ_id in visited_nodes:
1068                    break
1069                node_id = succ_id
1070            else:
1071                break
1072        graph.nodes = {node_id: graph.nodes[node_id] for node_id in visited_nodes}
1073
1074
1075# TargetedTrimmer keeps paths that lead to specific nodes and discards all
1076# other paths. Useful when you cannot use -trim-egraph (e.g. when debugging
1077# a crash).
1078class TargetedTrimmer:
1079    def __init__(self, target_nodes):
1080        self._target_nodes = target_nodes
1081
1082    @staticmethod
1083    def parse_target_node(node, graph):
1084        if node.startswith("0x"):
1085            ret = "Node" + node
1086            assert ret in graph.nodes
1087            return ret
1088        else:
1089            for other_id in graph.nodes:
1090                other = graph.nodes[other_id]
1091                if other.node_id == int(node):
1092                    return other_id
1093
1094    @staticmethod
1095    def parse_target_nodes(target_nodes, graph):
1096        return [
1097            TargetedTrimmer.parse_target_node(node, graph)
1098            for node in target_nodes.split(",")
1099        ]
1100
1101    def trim(self, graph):
1102        queue = self._target_nodes
1103        visited_nodes = set()
1104
1105        while len(queue) > 0:
1106            node_id = queue.pop()
1107            visited_nodes.add(node_id)
1108            node = graph.nodes[node_id]
1109            for pred_id in node.predecessors:
1110                if pred_id not in visited_nodes:
1111                    queue.append(pred_id)
1112        graph.nodes = {node_id: graph.nodes[node_id] for node_id in visited_nodes}
1113        for node_id in graph.nodes:
1114            node = graph.nodes[node_id]
1115            node.successors = [
1116                succ_id for succ_id in node.successors if succ_id in visited_nodes
1117            ]
1118            node.predecessors = [
1119                succ_id for succ_id in node.predecessors if succ_id in visited_nodes
1120            ]
1121
1122
1123# ===-----------------------------------------------------------------------===#
1124# The entry point to the script.
1125# ===-----------------------------------------------------------------------===#
1126
1127
1128def main():
1129    parser = argparse.ArgumentParser(
1130        description="Display and manipulate Exploded Graph dumps."
1131    )
1132    parser.add_argument(
1133        "filename", type=str, help="the .dot file produced by the Static Analyzer"
1134    )
1135    parser.add_argument(
1136        "-v",
1137        "--verbose",
1138        action="store_const",
1139        dest="loglevel",
1140        const=logging.DEBUG,
1141        default=logging.WARNING,
1142        help="enable info prints",
1143    )
1144    parser.add_argument(
1145        "-d",
1146        "--diff",
1147        action="store_const",
1148        dest="diff",
1149        const=True,
1150        default=False,
1151        help="display differences between states",
1152    )
1153    parser.add_argument(
1154        "-t",
1155        "--topology",
1156        action="store_const",
1157        dest="topology",
1158        const=True,
1159        default=False,
1160        help="only display program points, omit states",
1161    )
1162    parser.add_argument(
1163        "-s",
1164        "--single-path",
1165        action="store_const",
1166        dest="single_path",
1167        const=True,
1168        default=False,
1169        help="only display the leftmost path in the graph "
1170        "(useful for trimmed graphs that still "
1171        "branch too much)",
1172    )
1173    parser.add_argument(
1174        "--to",
1175        type=str,
1176        default=None,
1177        help="only display execution paths from the root "
1178        "to the given comma-separated list of nodes "
1179        "identified by a pointer or a stable ID; "
1180        "compatible with --single-path",
1181    )
1182    parser.add_argument(
1183        "--dark",
1184        action="store_const",
1185        dest="dark",
1186        const=True,
1187        default=False,
1188        help="dark mode",
1189    )
1190    parser.add_argument(
1191        "--gray",
1192        action="store_const",
1193        dest="gray",
1194        const=True,
1195        default=False,
1196        help="black-and-white mode",
1197    )
1198    dump_conflict = parser.add_mutually_exclusive_group()
1199    dump_conflict.add_argument(
1200        "--dump-html-only",
1201        action="store_const",
1202        dest="dump_html_only",
1203        const=True,
1204        default=False,
1205        help="dump the rewritten egraph to a temporary HTML file, "
1206        "but do not open it immediately as by default",
1207    )
1208    dump_conflict.add_argument(
1209        "--dump-dot-only",
1210        action="store_const",
1211        dest="dump_dot_only",
1212        const=True,
1213        default=False,
1214        help="instead of writing an HTML file and immediately "
1215        "displaying it, dump the rewritten dot file "
1216        "to stdout",
1217    )
1218    args = parser.parse_args()
1219    logging.basicConfig(level=args.loglevel)
1220
1221    graph = ExplodedGraph()
1222    with open(args.filename) as fd:
1223        for raw_line in fd:
1224            raw_line = raw_line.strip()
1225            graph.add_raw_line(raw_line)
1226
1227    trimmers = []
1228    if args.to is not None:
1229        trimmers.append(
1230            TargetedTrimmer(TargetedTrimmer.parse_target_nodes(args.to, graph))
1231        )
1232    if args.single_path:
1233        trimmers.append(SinglePathTrimmer())
1234
1235    explorer = BasicExplorer()
1236
1237    visitor = DotDumpVisitor(
1238        args.diff,
1239        args.dark,
1240        args.gray,
1241        args.topology,
1242        args.dump_html_only,
1243        args.dump_dot_only,
1244    )
1245
1246    for trimmer in trimmers:
1247        trimmer.trim(graph)
1248
1249    explorer.explore(graph, visitor)
1250
1251
1252if __name__ == "__main__":
1253    main()
1254