xref: /llvm-project/clang/utils/analyzer/exploded-graph-rewriter.py (revision 0c3e24f7c90243040fedcdfbbb417505f7cc0102)
1#!/usr/bin/env python
2#
3# ===- exploded-graph-rewriter.py - ExplodedGraph dump tool -----*- python -*--#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9# ===-----------------------------------------------------------------------===#
10
11
12from __future__ import print_function
13
14import argparse
15import collections
16import difflib
17import json
18import logging
19import os
20import re
21
22
23# ===-----------------------------------------------------------------------===#
24# These data structures represent a deserialized ExplodedGraph.
25# ===-----------------------------------------------------------------------===#
26
27
28# A helper function for finding the difference between two dictionaries.
29def diff_dicts(curr, prev):
30    removed = [k for k in prev if k not in curr or curr[k] != prev[k]]
31    added = [k for k in curr if k not in prev or curr[k] != prev[k]]
32    return (removed, added)
33
34
35# Represents any program state trait that is a dictionary of key-value pairs.
36class GenericMap:
37    def __init__(self, items):
38        self.generic_map = collections.OrderedDict(items)
39
40    def diff(self, prev):
41        return diff_dicts(self.generic_map, prev.generic_map)
42
43    def is_different(self, prev):
44        removed, added = self.diff(prev)
45        return len(removed) != 0 or len(added) != 0
46
47
48# A deserialized source location.
49class SourceLocation:
50    def __init__(self, json_loc):
51        logging.debug("json: %s" % json_loc)
52        self.line = json_loc["line"]
53        self.col = json_loc["column"]
54        self.filename = (
55            os.path.basename(json_loc["file"]) if "file" in json_loc else "(main file)"
56        )
57        self.spelling = (
58            SourceLocation(json_loc["spelling"]) if "spelling" in json_loc else None
59        )
60
61    def is_macro(self):
62        return self.spelling is not None
63
64
65# A deserialized program point.
66class ProgramPoint:
67    def __init__(self, json_pp):
68        self.kind = json_pp["kind"]
69        self.tag = json_pp["tag"]
70        self.node_id = json_pp["node_id"]
71        self.is_sink = bool(json_pp["is_sink"])
72        self.has_report = bool(json_pp["has_report"])
73        if self.kind == "Edge":
74            self.src_id = json_pp["src_id"]
75            self.dst_id = json_pp["dst_id"]
76        elif self.kind == "Statement":
77            logging.debug(json_pp)
78            self.stmt_kind = json_pp["stmt_kind"]
79            self.cast_kind = json_pp["cast_kind"] if "cast_kind" in json_pp else None
80            self.stmt_point_kind = json_pp["stmt_point_kind"]
81            self.stmt_id = json_pp["stmt_id"]
82            self.pointer = json_pp["pointer"]
83            self.pretty = json_pp["pretty"]
84            self.loc = (
85                SourceLocation(json_pp["location"])
86                if json_pp["location"] is not None
87                else None
88            )
89        elif self.kind == "BlockEntrance":
90            self.block_id = json_pp["block_id"]
91
92
93# A single expression acting as a key in a deserialized Environment.
94class EnvironmentBindingKey:
95    def __init__(self, json_ek):
96        # CXXCtorInitializer is not a Stmt!
97        self.stmt_id = (
98            json_ek["stmt_id"] if "stmt_id" in json_ek else json_ek["init_id"]
99        )
100        self.pretty = json_ek["pretty"]
101        self.kind = json_ek["kind"] if "kind" in json_ek else None
102
103    def _key(self):
104        return self.stmt_id
105
106    def __eq__(self, other):
107        return self._key() == other._key()
108
109    def __hash__(self):
110        return hash(self._key())
111
112
113# Deserialized description of a location context.
114class LocationContext:
115    def __init__(self, json_frame):
116        self.lctx_id = json_frame["lctx_id"]
117        self.caption = json_frame["location_context"]
118        self.decl = json_frame["calling"]
119        self.loc = (
120            SourceLocation(json_frame["location"])
121            if json_frame["location"] is not None
122            else None
123        )
124
125    def _key(self):
126        return self.lctx_id
127
128    def __eq__(self, other):
129        return self._key() == other._key()
130
131    def __hash__(self):
132        return hash(self._key())
133
134
135# A group of deserialized Environment bindings that correspond to a specific
136# location context.
137class EnvironmentFrame:
138    def __init__(self, json_frame):
139        self.location_context = LocationContext(json_frame)
140        self.bindings = collections.OrderedDict(
141            [(EnvironmentBindingKey(b), b["value"]) for b in json_frame["items"]]
142            if json_frame["items"] is not None
143            else []
144        )
145
146    def diff_bindings(self, prev):
147        return diff_dicts(self.bindings, prev.bindings)
148
149    def is_different(self, prev):
150        removed, added = self.diff_bindings(prev)
151        return len(removed) != 0 or len(added) != 0
152
153
154# A deserialized Environment. This class can also hold other entities that
155# are similar to Environment, such as Objects Under Construction or
156# Indices Of Elements Under Construction.
157class GenericEnvironment:
158    def __init__(self, json_e):
159        self.frames = [EnvironmentFrame(f) for f in json_e]
160
161    def diff_frames(self, prev):
162        # TODO: It's difficult to display a good diff when frame numbers shift.
163        if len(self.frames) != len(prev.frames):
164            return None
165
166        updated = []
167        for i in range(len(self.frames)):
168            f = self.frames[i]
169            prev_f = prev.frames[i]
170            if f.location_context == prev_f.location_context:
171                if f.is_different(prev_f):
172                    updated.append(i)
173            else:
174                # We have the whole frame replaced with another frame.
175                # TODO: Produce a nice diff.
176                return None
177
178        # TODO: Add support for added/removed.
179        return updated
180
181    def is_different(self, prev):
182        updated = self.diff_frames(prev)
183        return updated is None or len(updated) > 0
184
185
186# A single binding key in a deserialized RegionStore cluster.
187class StoreBindingKey:
188    def __init__(self, json_sk):
189        self.kind = json_sk["kind"]
190        self.offset = json_sk["offset"]
191
192    def _key(self):
193        return (self.kind, self.offset)
194
195    def __eq__(self, other):
196        return self._key() == other._key()
197
198    def __hash__(self):
199        return hash(self._key())
200
201
202# A single cluster of the deserialized RegionStore.
203class StoreCluster:
204    def __init__(self, json_sc):
205        self.base_region = json_sc["cluster"]
206        self.bindings = collections.OrderedDict(
207            [(StoreBindingKey(b), b["value"]) for b in json_sc["items"]]
208        )
209
210    def diff_bindings(self, prev):
211        return diff_dicts(self.bindings, prev.bindings)
212
213    def is_different(self, prev):
214        removed, added = self.diff_bindings(prev)
215        return len(removed) != 0 or len(added) != 0
216
217
218# A deserialized RegionStore.
219class Store:
220    def __init__(self, json_s):
221        self.ptr = json_s["pointer"]
222        self.clusters = collections.OrderedDict(
223            [(c["pointer"], StoreCluster(c)) for c in json_s["items"]]
224        )
225
226    def diff_clusters(self, prev):
227        removed = [k for k in prev.clusters if k not in self.clusters]
228        added = [k for k in self.clusters if k not in prev.clusters]
229        updated = [
230            k
231            for k in prev.clusters
232            if k in self.clusters and prev.clusters[k].is_different(self.clusters[k])
233        ]
234        return (removed, added, updated)
235
236    def is_different(self, prev):
237        removed, added, updated = self.diff_clusters(prev)
238        return len(removed) != 0 or len(added) != 0 or len(updated) != 0
239
240
241# Deserialized messages from a single checker in a single program state.
242# Basically a list of raw strings.
243class CheckerLines:
244    def __init__(self, json_lines):
245        self.lines = json_lines
246
247    def diff_lines(self, prev):
248        lines = difflib.ndiff(prev.lines, self.lines)
249        return [l.strip() for l in lines if l.startswith("+") or l.startswith("-")]
250
251    def is_different(self, prev):
252        return len(self.diff_lines(prev)) > 0
253
254
255# Deserialized messages of all checkers, separated by checker.
256class CheckerMessages:
257    def __init__(self, json_m):
258        self.items = collections.OrderedDict(
259            [(m["checker"], CheckerLines(m["messages"])) for m in json_m]
260        )
261
262    def diff_messages(self, prev):
263        removed = [k for k in prev.items if k not in self.items]
264        added = [k for k in self.items if k not in prev.items]
265        updated = [
266            k
267            for k in prev.items
268            if k in self.items and prev.items[k].is_different(self.items[k])
269        ]
270        return (removed, added, updated)
271
272    def is_different(self, prev):
273        removed, added, updated = self.diff_messages(prev)
274        return len(removed) != 0 or len(added) != 0 or len(updated) != 0
275
276
277# A deserialized program state.
278class ProgramState:
279    def __init__(self, state_id, json_ps):
280        logging.debug("Adding ProgramState " + str(state_id))
281
282        store_key = "store"
283        env_key = "environment"
284        constraints_key = "constraints"
285        dyn_ty_key = "dynamic_types"
286        ctor_key = "constructing_objects"
287        ind_key = "index_of_element"
288        init_loop_key = "pending_init_loops"
289        dtor_key = "pending_destructors"
290        msg_key = "checker_messages"
291
292        if json_ps is None:
293            json_ps = {
294                store_key: None,
295                env_key: None,
296                constraints_key: None,
297                dyn_ty_key: None,
298                ctor_key: None,
299                ind_key: None,
300                init_loop_key: None,
301                dtor_key: None,
302                msg_key: None,
303            }
304
305        self.state_id = state_id
306
307        self.store = (
308            Store(json_ps[store_key]) if json_ps[store_key] is not None else None
309        )
310
311        self.environment = (
312            GenericEnvironment(json_ps[env_key]["items"])
313            if json_ps[env_key] is not None
314            else None
315        )
316
317        self.constraints = (
318            GenericMap([(c["symbol"], c["range"]) for c in json_ps[constraints_key]])
319            if json_ps[constraints_key] is not None
320            else None
321        )
322
323        self.dynamic_types = (
324            GenericMap(
325                [
326                    (
327                        t["region"],
328                        "%s%s"
329                        % (
330                            t["dyn_type"],
331                            " (or a sub-class)" if t["sub_classable"] else "",
332                        ),
333                    )
334                    for t in json_ps[dyn_ty_key]
335                ]
336            )
337            if json_ps[dyn_ty_key] is not None
338            else None
339        )
340
341        self.checker_messages = (
342            CheckerMessages(json_ps[msg_key]) if json_ps[msg_key] is not None else None
343        )
344
345        # State traits
346        #
347        # For traits we always check if a key exists because if a trait
348        # has no imformation, nothing will be printed in the .dot file
349        # we parse.
350
351        self.constructing_objects = (
352            GenericEnvironment(json_ps[ctor_key])
353            if ctor_key in json_ps and json_ps[ctor_key] is not None
354            else None
355        )
356
357        self.index_of_element = (
358            GenericEnvironment(json_ps[ind_key])
359            if ind_key in json_ps and json_ps[ind_key] is not None
360            else None
361        )
362
363        self.pending_init_loops = (
364            GenericEnvironment(json_ps[init_loop_key])
365            if init_loop_key in json_ps and json_ps[init_loop_key] is not None
366            else None
367        )
368
369        self.pending_destructors = (
370            GenericEnvironment(json_ps[dtor_key])
371            if dtor_key in json_ps and json_ps[dtor_key] is not None
372            else None
373        )
374
375
376# A deserialized exploded graph node. Has a default constructor because it
377# may be referenced as part of an edge before its contents are deserialized,
378# and in this moment we already need a room for predecessors and successors.
379class ExplodedNode:
380    def __init__(self):
381        self.predecessors = []
382        self.successors = []
383
384    def construct(self, node_id, json_node):
385        logging.debug("Adding " + node_id)
386        self.ptr = node_id[4:]
387        self.points = [ProgramPoint(p) for p in json_node["program_points"]]
388        self.node_id = self.points[-1].node_id
389        self.state = ProgramState(
390            json_node["state_id"],
391            json_node["program_state"]
392            if json_node["program_state"] is not None
393            else None,
394        )
395
396        assert self.node_name() == node_id
397
398    def node_name(self):
399        return "Node" + self.ptr
400
401
402# A deserialized ExplodedGraph. Constructed by consuming a .dot file
403# line-by-line.
404class ExplodedGraph:
405    # Parse .dot files with regular expressions.
406    node_re = re.compile(
407        '^(Node0x[0-9a-f]*) \\[shape=record,.*label="{(.*)\\\\l}"\\];$'
408    )
409    edge_re = re.compile("^(Node0x[0-9a-f]*) -> (Node0x[0-9a-f]*);$")
410
411    def __init__(self):
412        self.nodes = collections.defaultdict(ExplodedNode)
413        self.root_id = None
414        self.incomplete_line = ""
415
416    def add_raw_line(self, raw_line):
417        if raw_line.startswith("//"):
418            return
419
420        # Allow line breaks by waiting for ';'. This is not valid in
421        # a .dot file, but it is useful for writing tests.
422        if len(raw_line) > 0 and raw_line[-1] != ";":
423            self.incomplete_line += raw_line
424            return
425        raw_line = self.incomplete_line + raw_line
426        self.incomplete_line = ""
427
428        # Apply regexps one by one to see if it's a node or an edge
429        # and extract contents if necessary.
430        logging.debug("Line: " + raw_line)
431        result = self.edge_re.match(raw_line)
432        if result is not None:
433            logging.debug("Classified as edge line.")
434            pred = result.group(1)
435            succ = result.group(2)
436            self.nodes[pred].successors.append(succ)
437            self.nodes[succ].predecessors.append(pred)
438            return
439        result = self.node_re.match(raw_line)
440        if result is not None:
441            logging.debug("Classified as node line.")
442            node_id = result.group(1)
443            if len(self.nodes) == 0:
444                self.root_id = node_id
445            # Note: when writing tests you don't need to escape everything,
446            # even though in a valid dot file everything is escaped.
447            node_label = (
448                result.group(2)
449                .replace(" ", "")
450                .replace('\\"', '"')
451                .replace("\\{", "{")
452                .replace("\\}", "}")
453                .replace("\\\\", "\\")
454                .replace("\\|", "|")
455                .replace("\\<", "\\\\<")
456                .replace("\\>", "\\\\>")
457                .rstrip(",")
458            )
459            # Handle `\l` separately because a string literal can be in code
460            # like "string\\literal" with the `\l` inside.
461            # Also on Windows macros __FILE__ produces specific delimiters `\`
462            # and a directory or file may starts with the letter `l`.
463            # Find all `\l` (like `,\l`, `}\l`, `[\l`) except `\\l`,
464            # because the literal as a rule contains multiple `\` before `\l`.
465            node_label = re.sub(r"(?<!\\)\\l", "", node_label)
466            logging.debug(node_label)
467            json_node = json.loads(node_label)
468            self.nodes[node_id].construct(node_id, json_node)
469            return
470        logging.debug("Skipping.")
471
472
473# ===-----------------------------------------------------------------------===#
474# Visitors traverse a deserialized ExplodedGraph and do different things
475# with every node and edge.
476# ===-----------------------------------------------------------------------===#
477
478
479# A visitor that dumps the ExplodedGraph into a DOT file with fancy HTML-based
480# syntax highlighing.
481class DotDumpVisitor:
482    def __init__(
483        self, do_diffs, dark_mode, gray_mode, topo_mode, dump_html_only, dump_dot_only
484    ):
485        assert not (dump_html_only and dump_dot_only), (
486            "Option dump_html_only and dump_dot_only are conflict, "
487            "they cannot be true at the same time."
488        )
489
490        self._do_diffs = do_diffs
491        self._dark_mode = dark_mode
492        self._gray_mode = gray_mode
493        self._topo_mode = topo_mode
494        self._dump_html_only = dump_html_only
495        self._dump_dot_only = dump_dot_only
496        self._output = []
497
498    def _dump_raw(self, s):
499        if self._dump_dot_only:
500            print(s, end="")
501        else:
502            self._output.append(s)
503
504    def output(self):
505        assert not self._dump_dot_only
506        return "".join(self._output)
507
508    def _dump(self, s):
509        s = (
510            s.replace("&", "&amp;")
511            .replace("{", "\\{")
512            .replace("}", "\\}")
513            .replace("\\<", "&lt;")
514            .replace("\\>", "&gt;")
515            .replace("|", "\\|")
516        )
517        s = re.sub(r"(?<!\\)\\l", "<br />", s)
518        if self._gray_mode:
519            s = re.sub(r'<font color="[a-z0-9]*">', "", s)
520            s = re.sub(r"</font>", "", s)
521        self._dump_raw(s)
522
523    @staticmethod
524    def _diff_plus_minus(is_added):
525        if is_added is None:
526            return ""
527        if is_added:
528            return '<font color="forestgreen">+</font>'
529        return '<font color="red">-</font>'
530
531    @staticmethod
532    def _short_pretty(s):
533        if s is None:
534            return None
535        if len(s) < 20:
536            return s
537        left = s.find("{")
538        right = s.rfind("}")
539        if left == -1 or right == -1 or left >= right:
540            return s
541        candidate = s[0 : left + 1] + " ... " + s[right:]
542        if len(candidate) >= len(s):
543            return s
544        return candidate
545
546    @staticmethod
547    def _make_sloc(loc):
548        if loc is None:
549            return "<i>Invalid Source Location</i>"
550
551        def make_plain_loc(loc):
552            return "%s:<b>%s</b>:<b>%s</b>" % (loc.filename, loc.line, loc.col)
553
554        if loc.is_macro():
555            return '%s <font color="royalblue1">' "(<i>spelling at </i> %s)</font>" % (
556                make_plain_loc(loc),
557                make_plain_loc(loc.spelling),
558            )
559
560        return make_plain_loc(loc)
561
562    def visit_begin_graph(self, graph):
563        self._graph = graph
564        self._dump_raw('digraph "ExplodedGraph" {\n')
565        if self._dark_mode:
566            self._dump_raw('bgcolor="gray10";\n')
567        self._dump_raw('label="";\n')
568
569    def visit_program_point(self, p):
570        if p.kind in ["Edge", "BlockEntrance", "BlockExit"]:
571            color = "gold3"
572        elif p.kind in ["PreStmtPurgeDeadSymbols", "PostStmtPurgeDeadSymbols"]:
573            color = "red"
574        elif p.kind in ["CallEnter", "CallExitBegin", "CallExitEnd"]:
575            color = "dodgerblue" if self._dark_mode else "blue"
576        elif p.kind in ["Statement"]:
577            color = "cyan4"
578        else:
579            color = "forestgreen"
580
581        self._dump('<tr><td align="left">%s.</td>' % p.node_id)
582
583        if p.kind == "Statement":
584            # This avoids pretty-printing huge statements such as CompoundStmt.
585            # Such statements show up only at [Pre|Post]StmtPurgeDeadSymbols
586            skip_pretty = "PurgeDeadSymbols" in p.stmt_point_kind
587            stmt_color = "cyan3"
588            self._dump(
589                '<td align="left" width="0">%s:</td>'
590                '<td align="left" width="0"><font color="%s">'
591                "%s</font> </td>"
592                '<td align="left"><i>S%s</i></td>'
593                '<td align="left"><font color="%s">%s</font></td>'
594                '<td align="left">%s</td></tr>'
595                % (
596                    self._make_sloc(p.loc),
597                    color,
598                    "%s (%s)" % (p.stmt_kind, p.cast_kind)
599                    if p.cast_kind is not None
600                    else p.stmt_kind,
601                    p.stmt_id,
602                    stmt_color,
603                    p.stmt_point_kind,
604                    self._short_pretty(p.pretty) if not skip_pretty else "",
605                )
606            )
607        elif p.kind == "Edge":
608            self._dump(
609                '<td width="0"></td>'
610                '<td align="left" width="0">'
611                '<font color="%s">%s</font></td><td align="left">'
612                "[B%d] -\\> [B%d]</td></tr>" % (color, "BlockEdge", p.src_id, p.dst_id)
613            )
614        elif p.kind == "BlockEntrance":
615            self._dump(
616                '<td width="0"></td>'
617                '<td align="left" width="0">'
618                '<font color="%s">%s</font></td>'
619                '<td align="left">[B%d]</td></tr>' % (color, p.kind, p.block_id)
620            )
621        else:
622            # TODO: Print more stuff for other kinds of points.
623            self._dump(
624                '<td width="0"></td>'
625                '<td align="left" width="0" colspan="2">'
626                '<font color="%s">%s</font></td></tr>' % (color, p.kind)
627            )
628
629        if p.tag is not None:
630            self._dump(
631                '<tr><td width="0"></td><td width="0"></td>'
632                '<td colspan="3" align="left">'
633                '<b>Tag: </b> <font color="crimson">'
634                "%s</font></td></tr>" % p.tag
635            )
636
637        if p.has_report:
638            self._dump(
639                '<tr><td width="0"></td><td width="0"></td>'
640                '<td colspan="3" align="left">'
641                '<font color="red"><b>Bug Report Attached'
642                "</b></font></td></tr>"
643            )
644        if p.is_sink:
645            self._dump(
646                '<tr><td width="0"></td><td width="0"></td>'
647                '<td colspan="3" align="left">'
648                '<font color="cornflowerblue"><b>Sink Node'
649                "</b></font></td></tr>"
650            )
651
652    def visit_environment(self, e, prev_e=None):
653        self._dump('<table border="0">')
654
655        def dump_location_context(lc, is_added=None):
656            self._dump(
657                "<tr><td>%s</td>"
658                '<td align="left"><b>%s</b></td>'
659                '<td align="left" colspan="2">'
660                '<font color="gray60">%s </font>'
661                "%s</td></tr>"
662                % (
663                    self._diff_plus_minus(is_added),
664                    lc.caption,
665                    lc.decl,
666                    ("(%s)" % self._make_sloc(lc.loc)) if lc.loc is not None else "",
667                )
668            )
669
670        def dump_binding(f, b, is_added=None):
671            self._dump(
672                "<tr><td>%s</td>"
673                '<td align="left"><i>S%s</i></td>'
674                "%s"
675                '<td align="left">%s</td>'
676                '<td align="left">%s</td></tr>'
677                % (
678                    self._diff_plus_minus(is_added),
679                    b.stmt_id,
680                    '<td align="left"><font color="%s"><i>'
681                    "%s</i></font></td>"
682                    % (
683                        "lavender" if self._dark_mode else "darkgreen",
684                        ("(%s)" % b.kind) if b.kind is not None else " ",
685                    ),
686                    self._short_pretty(b.pretty),
687                    f.bindings[b],
688                )
689            )
690
691        frames_updated = e.diff_frames(prev_e) if prev_e is not None else None
692        if frames_updated:
693            for i in frames_updated:
694                f = e.frames[i]
695                prev_f = prev_e.frames[i]
696                dump_location_context(f.location_context)
697                bindings_removed, bindings_added = f.diff_bindings(prev_f)
698                for b in bindings_removed:
699                    dump_binding(prev_f, b, False)
700                for b in bindings_added:
701                    dump_binding(f, b, True)
702        else:
703            for f in e.frames:
704                dump_location_context(f.location_context)
705                for b in f.bindings:
706                    dump_binding(f, b)
707
708        self._dump("</table>")
709
710    def visit_environment_in_state(self, selector, title, s, prev_s=None):
711        e = getattr(s, selector)
712        prev_e = getattr(prev_s, selector) if prev_s is not None else None
713        if e is None and prev_e is None:
714            return
715
716        self._dump('<hr /><tr><td align="left"><b>%s: </b>' % title)
717        if e is None:
718            self._dump("<i> Nothing!</i>")
719        else:
720            if prev_e is not None:
721                if e.is_different(prev_e):
722                    self._dump('</td></tr><tr><td align="left">')
723                    self.visit_environment(e, prev_e)
724                else:
725                    self._dump("<i> No changes!</i>")
726            else:
727                self._dump('</td></tr><tr><td align="left">')
728                self.visit_environment(e)
729
730        self._dump("</td></tr>")
731
732    def visit_store(self, s, prev_s=None):
733        self._dump('<table border="0">')
734
735        def dump_binding(s, c, b, is_added=None):
736            self._dump(
737                "<tr><td>%s</td>"
738                '<td align="left">%s</td>'
739                '<td align="left">%s</td>'
740                '<td align="left">%s</td>'
741                '<td align="left">%s</td></tr>'
742                % (
743                    self._diff_plus_minus(is_added),
744                    s.clusters[c].base_region,
745                    b.offset,
746                    "(<i>Default</i>)" if b.kind == "Default" else "",
747                    s.clusters[c].bindings[b],
748                )
749            )
750
751        if prev_s is not None:
752            clusters_removed, clusters_added, clusters_updated = s.diff_clusters(prev_s)
753            for c in clusters_removed:
754                for b in prev_s.clusters[c].bindings:
755                    dump_binding(prev_s, c, b, False)
756            for c in clusters_updated:
757                bindings_removed, bindings_added = s.clusters[c].diff_bindings(
758                    prev_s.clusters[c]
759                )
760                for b in bindings_removed:
761                    dump_binding(prev_s, c, b, False)
762                for b in bindings_added:
763                    dump_binding(s, c, b, True)
764            for c in clusters_added:
765                for b in s.clusters[c].bindings:
766                    dump_binding(s, c, b, True)
767        else:
768            for c in s.clusters:
769                for b in s.clusters[c].bindings:
770                    dump_binding(s, c, b)
771
772        self._dump("</table>")
773
774    def visit_store_in_state(self, s, prev_s=None):
775        st = s.store
776        prev_st = prev_s.store if prev_s is not None else None
777        if st is None and prev_st is None:
778            return
779
780        self._dump('<hr /><tr><td align="left"><b>Store: </b>')
781        if st is None:
782            self._dump("<i> Nothing!</i>")
783        else:
784            if self._dark_mode:
785                self._dump(' <font color="gray30">(%s)</font>' % st.ptr)
786            else:
787                self._dump(' <font color="gray">(%s)</font>' % st.ptr)
788            if prev_st is not None:
789                if s.store.is_different(prev_st):
790                    self._dump('</td></tr><tr><td align="left">')
791                    self.visit_store(st, prev_st)
792                else:
793                    self._dump("<i> No changes!</i>")
794            else:
795                self._dump('</td></tr><tr><td align="left">')
796                self.visit_store(st)
797        self._dump("</td></tr>")
798
799    def visit_generic_map(self, m, prev_m=None):
800        self._dump('<table border="0">')
801
802        def dump_pair(m, k, is_added=None):
803            self._dump(
804                "<tr><td>%s</td>"
805                '<td align="left">%s</td>'
806                '<td align="left">%s</td></tr>'
807                % (self._diff_plus_minus(is_added), k, m.generic_map[k])
808            )
809
810        if prev_m is not None:
811            removed, added = m.diff(prev_m)
812            for k in removed:
813                dump_pair(prev_m, k, False)
814            for k in added:
815                dump_pair(m, k, True)
816        else:
817            for k in m.generic_map:
818                dump_pair(m, k, None)
819
820        self._dump("</table>")
821
822    def visit_generic_map_in_state(self, selector, title, s, prev_s=None):
823        m = getattr(s, selector)
824        prev_m = getattr(prev_s, selector) if prev_s is not None else None
825        if m is None and prev_m is None:
826            return
827
828        self._dump("<hr />")
829        self._dump('<tr><td align="left">' "<b>%s: </b>" % title)
830        if m is None:
831            self._dump("<i> Nothing!</i>")
832        else:
833            if prev_m is not None:
834                if m.is_different(prev_m):
835                    self._dump('</td></tr><tr><td align="left">')
836                    self.visit_generic_map(m, prev_m)
837                else:
838                    self._dump("<i> No changes!</i>")
839            else:
840                self._dump('</td></tr><tr><td align="left">')
841                self.visit_generic_map(m)
842
843        self._dump("</td></tr>")
844
845    def visit_checker_messages(self, m, prev_m=None):
846        self._dump('<table border="0">')
847
848        def dump_line(l, is_added=None):
849            self._dump(
850                "<tr><td>%s</td>"
851                '<td align="left">%s</td></tr>' % (self._diff_plus_minus(is_added), l)
852            )
853
854        def dump_chk(chk, is_added=None):
855            dump_line("<i>%s</i>:" % chk, is_added)
856
857        if prev_m is not None:
858            removed, added, updated = m.diff_messages(prev_m)
859            for chk in removed:
860                dump_chk(chk, False)
861                for l in prev_m.items[chk].lines:
862                    dump_line(l, False)
863            for chk in updated:
864                dump_chk(chk)
865                for l in m.items[chk].diff_lines(prev_m.items[chk]):
866                    dump_line(l[1:], l.startswith("+"))
867            for chk in added:
868                dump_chk(chk, True)
869                for l in m.items[chk].lines:
870                    dump_line(l, True)
871        else:
872            for chk in m.items:
873                dump_chk(chk)
874                for l in m.items[chk].lines:
875                    dump_line(l)
876
877        self._dump("</table>")
878
879    def visit_checker_messages_in_state(self, s, prev_s=None):
880        m = s.checker_messages
881        prev_m = prev_s.checker_messages if prev_s is not None else None
882        if m is None and prev_m is None:
883            return
884
885        self._dump("<hr />")
886        self._dump('<tr><td align="left">' "<b>Checker State: </b>")
887        if m is None:
888            self._dump("<i> Nothing!</i>")
889        else:
890            if prev_m is not None:
891                if m.is_different(prev_m):
892                    self._dump('</td></tr><tr><td align="left">')
893                    self.visit_checker_messages(m, prev_m)
894                else:
895                    self._dump("<i> No changes!</i>")
896            else:
897                self._dump('</td></tr><tr><td align="left">')
898                self.visit_checker_messages(m)
899
900        self._dump("</td></tr>")
901
902    def visit_state(self, s, prev_s):
903        self.visit_store_in_state(s, prev_s)
904        self.visit_environment_in_state("environment", "Expressions", s, prev_s)
905        self.visit_generic_map_in_state("constraints", "Ranges", s, prev_s)
906        self.visit_generic_map_in_state("dynamic_types", "Dynamic Types", s, prev_s)
907        self.visit_environment_in_state(
908            "constructing_objects", "Objects Under Construction", s, prev_s
909        )
910        self.visit_environment_in_state(
911            "index_of_element", "Indices Of Elements Under Construction", s, prev_s
912        )
913        self.visit_environment_in_state(
914            "pending_init_loops", "Pending Array Init Loop Expressions", s, prev_s
915        )
916        self.visit_environment_in_state(
917            "pending_destructors", "Indices of Elements Under Destruction", s, prev_s
918        )
919        self.visit_checker_messages_in_state(s, prev_s)
920
921    def visit_node(self, node):
922        self._dump("%s [shape=record," % (node.node_name()))
923        if self._dark_mode:
924            self._dump('color="white",fontcolor="gray80",')
925        self._dump('label=<<table border="0">')
926
927        self._dump(
928            '<tr><td bgcolor="%s"><b>State %s</b></td></tr>'
929            % (
930                "gray20" if self._dark_mode else "gray70",
931                node.state.state_id if node.state is not None else "Unspecified",
932            )
933        )
934        if not self._topo_mode:
935            self._dump('<tr><td align="left" width="0">')
936            if len(node.points) > 1:
937                self._dump("<b>Program points:</b></td></tr>")
938            else:
939                self._dump("<b>Program point:</b></td></tr>")
940        self._dump(
941            '<tr><td align="left" width="0">'
942            '<table border="0" align="left" width="0">'
943        )
944        for p in node.points:
945            self.visit_program_point(p)
946        self._dump("</table></td></tr>")
947
948        if node.state is not None and not self._topo_mode:
949            prev_s = None
950            # Do diffs only when we have a unique predecessor.
951            # Don't do diffs on the leaf nodes because they're
952            # the important ones.
953            if (
954                self._do_diffs
955                and len(node.predecessors) == 1
956                and len(node.successors) > 0
957            ):
958                prev_s = self._graph.nodes[node.predecessors[0]].state
959            self.visit_state(node.state, prev_s)
960        self._dump_raw("</table>>];\n")
961
962    def visit_edge(self, pred, succ):
963        self._dump_raw(
964            "%s -> %s%s;\n"
965            % (
966                pred.node_name(),
967                succ.node_name(),
968                ' [color="white"]' if self._dark_mode else "",
969            )
970        )
971
972    def visit_end_of_graph(self):
973        self._dump_raw("}\n")
974
975        if not self._dump_dot_only:
976            import sys
977            import tempfile
978
979            def write_temp_file(suffix, prefix, data):
980                fd, filename = tempfile.mkstemp(suffix, prefix, ".", True)
981                print('Writing "%s"...' % filename)
982                with os.fdopen(fd, "w") as fp:
983                    fp.write(data)
984                print("Done! Please remember to remove the file.")
985                return filename
986
987            try:
988                import graphviz
989            except ImportError:
990                # The fallback behavior if graphviz is not installed!
991                print("Python graphviz not found. Please invoke")
992                print("  $ pip install graphviz")
993                print("in order to enable automatic conversion to HTML.")
994                print()
995                print("You may also convert DOT to SVG manually via")
996                print("  $ dot -Tsvg input.dot -o output.svg")
997                print()
998                write_temp_file(".dot", "egraph-", self.output())
999                return
1000
1001            svg = graphviz.pipe("dot", "svg", self.output().encode()).decode()
1002
1003            filename = write_temp_file(
1004                ".html",
1005                "egraph-",
1006                '<html><body bgcolor="%s">%s</body></html>'
1007                % ("#1a1a1a" if self._dark_mode else "white", svg),
1008            )
1009            if self._dump_html_only:
1010                return
1011            if sys.platform == "win32":
1012                os.startfile(filename)
1013            elif sys.platform == "darwin":
1014                os.system('open "%s"' % filename)
1015            else:
1016                os.system('xdg-open "%s"' % filename)
1017
1018
1019# ===-----------------------------------------------------------------------===#
1020# Explorers know how to traverse the ExplodedGraph in a certain order.
1021# They would invoke a Visitor on every node or edge they encounter.
1022# ===-----------------------------------------------------------------------===#
1023
1024
1025# BasicExplorer explores the whole graph in no particular order.
1026class BasicExplorer:
1027    def explore(self, graph, visitor):
1028        visitor.visit_begin_graph(graph)
1029        for node in sorted(graph.nodes):
1030            logging.debug("Visiting " + node)
1031            visitor.visit_node(graph.nodes[node])
1032            for succ in sorted(graph.nodes[node].successors):
1033                logging.debug("Visiting edge: %s -> %s " % (node, succ))
1034                visitor.visit_edge(graph.nodes[node], graph.nodes[succ])
1035        visitor.visit_end_of_graph()
1036
1037
1038# ===-----------------------------------------------------------------------===#
1039# Trimmers cut out parts of the ExplodedGraph so that to focus on other parts.
1040# Trimmers can be combined together by applying them sequentially.
1041# ===-----------------------------------------------------------------------===#
1042
1043
1044# SinglePathTrimmer keeps only a single path - the leftmost path from the root.
1045# Useful when the trimmed graph is still too large.
1046class SinglePathTrimmer:
1047    def trim(self, graph):
1048        visited_nodes = set()
1049        node_id = graph.root_id
1050        while True:
1051            visited_nodes.add(node_id)
1052            node = graph.nodes[node_id]
1053            if len(node.successors) > 0:
1054                succ_id = node.successors[0]
1055                succ = graph.nodes[succ_id]
1056                node.successors = [succ_id]
1057                succ.predecessors = [node_id]
1058                if succ_id in visited_nodes:
1059                    break
1060                node_id = succ_id
1061            else:
1062                break
1063        graph.nodes = {node_id: graph.nodes[node_id] for node_id in visited_nodes}
1064
1065
1066# TargetedTrimmer keeps paths that lead to specific nodes and discards all
1067# other paths. Useful when you cannot use -trim-egraph (e.g. when debugging
1068# a crash).
1069class TargetedTrimmer:
1070    def __init__(self, target_nodes):
1071        self._target_nodes = target_nodes
1072
1073    @staticmethod
1074    def parse_target_node(node, graph):
1075        if node.startswith("0x"):
1076            ret = "Node" + node
1077            assert ret in graph.nodes
1078            return ret
1079        else:
1080            for other_id in graph.nodes:
1081                other = graph.nodes[other_id]
1082                if other.node_id == int(node):
1083                    return other_id
1084
1085    @staticmethod
1086    def parse_target_nodes(target_nodes, graph):
1087        return [
1088            TargetedTrimmer.parse_target_node(node, graph)
1089            for node in target_nodes.split(",")
1090        ]
1091
1092    def trim(self, graph):
1093        queue = self._target_nodes
1094        visited_nodes = set()
1095
1096        while len(queue) > 0:
1097            node_id = queue.pop()
1098            visited_nodes.add(node_id)
1099            node = graph.nodes[node_id]
1100            for pred_id in node.predecessors:
1101                if pred_id not in visited_nodes:
1102                    queue.append(pred_id)
1103        graph.nodes = {node_id: graph.nodes[node_id] for node_id in visited_nodes}
1104        for node_id in graph.nodes:
1105            node = graph.nodes[node_id]
1106            node.successors = [
1107                succ_id for succ_id in node.successors if succ_id in visited_nodes
1108            ]
1109            node.predecessors = [
1110                succ_id for succ_id in node.predecessors if succ_id in visited_nodes
1111            ]
1112
1113
1114# ===-----------------------------------------------------------------------===#
1115# The entry point to the script.
1116# ===-----------------------------------------------------------------------===#
1117
1118
1119def main():
1120    parser = argparse.ArgumentParser(
1121        description="Display and manipulate Exploded Graph dumps."
1122    )
1123    parser.add_argument(
1124        "filename", type=str, help="the .dot file produced by the Static Analyzer"
1125    )
1126    parser.add_argument(
1127        "-v",
1128        "--verbose",
1129        action="store_const",
1130        dest="loglevel",
1131        const=logging.DEBUG,
1132        default=logging.WARNING,
1133        help="enable info prints",
1134    )
1135    parser.add_argument(
1136        "-d",
1137        "--diff",
1138        action="store_const",
1139        dest="diff",
1140        const=True,
1141        default=False,
1142        help="display differences between states",
1143    )
1144    parser.add_argument(
1145        "-t",
1146        "--topology",
1147        action="store_const",
1148        dest="topology",
1149        const=True,
1150        default=False,
1151        help="only display program points, omit states",
1152    )
1153    parser.add_argument(
1154        "-s",
1155        "--single-path",
1156        action="store_const",
1157        dest="single_path",
1158        const=True,
1159        default=False,
1160        help="only display the leftmost path in the graph "
1161        "(useful for trimmed graphs that still "
1162        "branch too much)",
1163    )
1164    parser.add_argument(
1165        "--to",
1166        type=str,
1167        default=None,
1168        help="only display execution paths from the root "
1169        "to the given comma-separated list of nodes "
1170        "identified by a pointer or a stable ID; "
1171        "compatible with --single-path",
1172    )
1173    parser.add_argument(
1174        "--dark",
1175        action="store_const",
1176        dest="dark",
1177        const=True,
1178        default=False,
1179        help="dark mode",
1180    )
1181    parser.add_argument(
1182        "--gray",
1183        action="store_const",
1184        dest="gray",
1185        const=True,
1186        default=False,
1187        help="black-and-white mode",
1188    )
1189    dump_conflict = parser.add_mutually_exclusive_group()
1190    dump_conflict.add_argument(
1191        "--dump-html-only",
1192        action="store_const",
1193        dest="dump_html_only",
1194        const=True,
1195        default=False,
1196        help="dump the rewritten egraph to a temporary HTML file, "
1197        "but do not open it immediately as by default",
1198    )
1199    dump_conflict.add_argument(
1200        "--dump-dot-only",
1201        action="store_const",
1202        dest="dump_dot_only",
1203        const=True,
1204        default=False,
1205        help="instead of writing an HTML file and immediately "
1206        "displaying it, dump the rewritten dot file "
1207        "to stdout",
1208    )
1209    args = parser.parse_args()
1210    logging.basicConfig(level=args.loglevel)
1211
1212    graph = ExplodedGraph()
1213    with open(args.filename) as fd:
1214        for raw_line in fd:
1215            raw_line = raw_line.strip()
1216            graph.add_raw_line(raw_line)
1217
1218    trimmers = []
1219    if args.to is not None:
1220        trimmers.append(
1221            TargetedTrimmer(TargetedTrimmer.parse_target_nodes(args.to, graph))
1222        )
1223    if args.single_path:
1224        trimmers.append(SinglePathTrimmer())
1225
1226    explorer = BasicExplorer()
1227
1228    visitor = DotDumpVisitor(
1229        args.diff,
1230        args.dark,
1231        args.gray,
1232        args.topology,
1233        args.dump_html_only,
1234        args.dump_dot_only,
1235    )
1236
1237    for trimmer in trimmers:
1238        trimmer.trim(graph)
1239
1240    explorer.explore(graph, visitor)
1241
1242
1243if __name__ == "__main__":
1244    main()
1245