1#!/usr/bin/env python 2# 3# ===- exploded-graph-rewriter.py - ExplodedGraph dump tool -----*- python -*--# 4# 5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 6# See https://llvm.org/LICENSE.txt for license information. 7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 8# 9# ===-----------------------------------------------------------------------===# 10 11 12from __future__ import print_function 13 14import argparse 15import collections 16import difflib 17import json 18import logging 19import os 20import re 21 22 23# ===-----------------------------------------------------------------------===# 24# These data structures represent a deserialized ExplodedGraph. 25# ===-----------------------------------------------------------------------===# 26 27 28# A helper function for finding the difference between two dictionaries. 29def diff_dicts(curr, prev): 30 removed = [k for k in prev if k not in curr or curr[k] != prev[k]] 31 added = [k for k in curr if k not in prev or curr[k] != prev[k]] 32 return (removed, added) 33 34 35# Represents any program state trait that is a dictionary of key-value pairs. 36class GenericMap: 37 def __init__(self, items): 38 self.generic_map = collections.OrderedDict(items) 39 40 def diff(self, prev): 41 return diff_dicts(self.generic_map, prev.generic_map) 42 43 def is_different(self, prev): 44 removed, added = self.diff(prev) 45 return len(removed) != 0 or len(added) != 0 46 47 48# A deserialized source location. 49class SourceLocation: 50 def __init__(self, json_loc): 51 logging.debug("json: %s" % json_loc) 52 self.line = json_loc["line"] 53 self.col = json_loc["column"] 54 self.filename = ( 55 os.path.basename(json_loc["file"]) if "file" in json_loc else "(main file)" 56 ) 57 self.spelling = ( 58 SourceLocation(json_loc["spelling"]) if "spelling" in json_loc else None 59 ) 60 61 def is_macro(self): 62 return self.spelling is not None 63 64 65# A deserialized program point. 66class ProgramPoint: 67 def __init__(self, json_pp): 68 self.kind = json_pp["kind"] 69 self.tag = json_pp["tag"] 70 self.node_id = json_pp["node_id"] 71 self.is_sink = bool(json_pp["is_sink"]) 72 self.has_report = bool(json_pp["has_report"]) 73 if self.kind == "Edge": 74 self.src_id = json_pp["src_id"] 75 self.dst_id = json_pp["dst_id"] 76 elif self.kind == "Statement": 77 logging.debug(json_pp) 78 self.stmt_kind = json_pp["stmt_kind"] 79 self.cast_kind = json_pp["cast_kind"] if "cast_kind" in json_pp else None 80 self.stmt_point_kind = json_pp["stmt_point_kind"] 81 self.stmt_id = json_pp["stmt_id"] 82 self.pointer = json_pp["pointer"] 83 self.pretty = json_pp["pretty"] 84 self.loc = ( 85 SourceLocation(json_pp["location"]) 86 if json_pp["location"] is not None 87 else None 88 ) 89 elif self.kind == "BlockEntrance": 90 self.block_id = json_pp["block_id"] 91 92 93# A single expression acting as a key in a deserialized Environment. 94class EnvironmentBindingKey: 95 def __init__(self, json_ek): 96 # CXXCtorInitializer is not a Stmt! 97 self.stmt_id = ( 98 json_ek["stmt_id"] if "stmt_id" in json_ek else json_ek["init_id"] 99 ) 100 self.pretty = json_ek["pretty"] 101 self.kind = json_ek["kind"] if "kind" in json_ek else None 102 103 def _key(self): 104 return self.stmt_id 105 106 def __eq__(self, other): 107 return self._key() == other._key() 108 109 def __hash__(self): 110 return hash(self._key()) 111 112 113# Deserialized description of a location context. 114class LocationContext: 115 def __init__(self, json_frame): 116 self.lctx_id = json_frame["lctx_id"] 117 self.caption = json_frame["location_context"] 118 self.decl = json_frame["calling"] 119 self.loc = ( 120 SourceLocation(json_frame["location"]) 121 if json_frame["location"] is not None 122 else None 123 ) 124 125 def _key(self): 126 return self.lctx_id 127 128 def __eq__(self, other): 129 return self._key() == other._key() 130 131 def __hash__(self): 132 return hash(self._key()) 133 134 135# A group of deserialized Environment bindings that correspond to a specific 136# location context. 137class EnvironmentFrame: 138 def __init__(self, json_frame): 139 self.location_context = LocationContext(json_frame) 140 self.bindings = collections.OrderedDict( 141 [(EnvironmentBindingKey(b), b["value"]) for b in json_frame["items"]] 142 if json_frame["items"] is not None 143 else [] 144 ) 145 146 def diff_bindings(self, prev): 147 return diff_dicts(self.bindings, prev.bindings) 148 149 def is_different(self, prev): 150 removed, added = self.diff_bindings(prev) 151 return len(removed) != 0 or len(added) != 0 152 153 154# A deserialized Environment. This class can also hold other entities that 155# are similar to Environment, such as Objects Under Construction or 156# Indices Of Elements Under Construction. 157class GenericEnvironment: 158 def __init__(self, json_e): 159 self.frames = [EnvironmentFrame(f) for f in json_e] 160 161 def diff_frames(self, prev): 162 # TODO: It's difficult to display a good diff when frame numbers shift. 163 if len(self.frames) != len(prev.frames): 164 return None 165 166 updated = [] 167 for i in range(len(self.frames)): 168 f = self.frames[i] 169 prev_f = prev.frames[i] 170 if f.location_context == prev_f.location_context: 171 if f.is_different(prev_f): 172 updated.append(i) 173 else: 174 # We have the whole frame replaced with another frame. 175 # TODO: Produce a nice diff. 176 return None 177 178 # TODO: Add support for added/removed. 179 return updated 180 181 def is_different(self, prev): 182 updated = self.diff_frames(prev) 183 return updated is None or len(updated) > 0 184 185 186# A single binding key in a deserialized RegionStore cluster. 187class StoreBindingKey: 188 def __init__(self, json_sk): 189 self.kind = json_sk["kind"] 190 self.offset = json_sk["offset"] 191 192 def _key(self): 193 return (self.kind, self.offset) 194 195 def __eq__(self, other): 196 return self._key() == other._key() 197 198 def __hash__(self): 199 return hash(self._key()) 200 201 202# A single cluster of the deserialized RegionStore. 203class StoreCluster: 204 def __init__(self, json_sc): 205 self.base_region = json_sc["cluster"] 206 self.bindings = collections.OrderedDict( 207 [(StoreBindingKey(b), b["value"]) for b in json_sc["items"]] 208 ) 209 210 def diff_bindings(self, prev): 211 return diff_dicts(self.bindings, prev.bindings) 212 213 def is_different(self, prev): 214 removed, added = self.diff_bindings(prev) 215 return len(removed) != 0 or len(added) != 0 216 217 218# A deserialized RegionStore. 219class Store: 220 def __init__(self, json_s): 221 self.ptr = json_s["pointer"] 222 self.clusters = collections.OrderedDict( 223 [(c["pointer"], StoreCluster(c)) for c in json_s["items"]] 224 ) 225 226 def diff_clusters(self, prev): 227 removed = [k for k in prev.clusters if k not in self.clusters] 228 added = [k for k in self.clusters if k not in prev.clusters] 229 updated = [ 230 k 231 for k in prev.clusters 232 if k in self.clusters and prev.clusters[k].is_different(self.clusters[k]) 233 ] 234 return (removed, added, updated) 235 236 def is_different(self, prev): 237 removed, added, updated = self.diff_clusters(prev) 238 return len(removed) != 0 or len(added) != 0 or len(updated) != 0 239 240 241# Deserialized messages from a single checker in a single program state. 242# Basically a list of raw strings. 243class CheckerLines: 244 def __init__(self, json_lines): 245 self.lines = json_lines 246 247 def diff_lines(self, prev): 248 lines = difflib.ndiff(prev.lines, self.lines) 249 return [l.strip() for l in lines if l.startswith("+") or l.startswith("-")] 250 251 def is_different(self, prev): 252 return len(self.diff_lines(prev)) > 0 253 254 255# Deserialized messages of all checkers, separated by checker. 256class CheckerMessages: 257 def __init__(self, json_m): 258 self.items = collections.OrderedDict( 259 [(m["checker"], CheckerLines(m["messages"])) for m in json_m] 260 ) 261 262 def diff_messages(self, prev): 263 removed = [k for k in prev.items if k not in self.items] 264 added = [k for k in self.items if k not in prev.items] 265 updated = [ 266 k 267 for k in prev.items 268 if k in self.items and prev.items[k].is_different(self.items[k]) 269 ] 270 return (removed, added, updated) 271 272 def is_different(self, prev): 273 removed, added, updated = self.diff_messages(prev) 274 return len(removed) != 0 or len(added) != 0 or len(updated) != 0 275 276 277# A deserialized program state. 278class ProgramState: 279 def __init__(self, state_id, json_ps): 280 logging.debug("Adding ProgramState " + str(state_id)) 281 282 store_key = "store" 283 env_key = "environment" 284 constraints_key = "constraints" 285 dyn_ty_key = "dynamic_types" 286 ctor_key = "constructing_objects" 287 ind_key = "index_of_element" 288 init_loop_key = "pending_init_loops" 289 dtor_key = "pending_destructors" 290 msg_key = "checker_messages" 291 292 if json_ps is None: 293 json_ps = { 294 store_key: None, 295 env_key: None, 296 constraints_key: None, 297 dyn_ty_key: None, 298 ctor_key: None, 299 ind_key: None, 300 init_loop_key: None, 301 dtor_key: None, 302 msg_key: None, 303 } 304 305 self.state_id = state_id 306 307 self.store = ( 308 Store(json_ps[store_key]) if json_ps[store_key] is not None else None 309 ) 310 311 self.environment = ( 312 GenericEnvironment(json_ps[env_key]["items"]) 313 if json_ps[env_key] is not None 314 else None 315 ) 316 317 self.constraints = ( 318 GenericMap([(c["symbol"], c["range"]) for c in json_ps[constraints_key]]) 319 if json_ps[constraints_key] is not None 320 else None 321 ) 322 323 self.dynamic_types = ( 324 GenericMap( 325 [ 326 ( 327 t["region"], 328 "%s%s" 329 % ( 330 t["dyn_type"], 331 " (or a sub-class)" if t["sub_classable"] else "", 332 ), 333 ) 334 for t in json_ps[dyn_ty_key] 335 ] 336 ) 337 if json_ps[dyn_ty_key] is not None 338 else None 339 ) 340 341 self.checker_messages = ( 342 CheckerMessages(json_ps[msg_key]) if json_ps[msg_key] is not None else None 343 ) 344 345 # State traits 346 # 347 # For traits we always check if a key exists because if a trait 348 # has no imformation, nothing will be printed in the .dot file 349 # we parse. 350 351 self.constructing_objects = ( 352 GenericEnvironment(json_ps[ctor_key]) 353 if ctor_key in json_ps and json_ps[ctor_key] is not None 354 else None 355 ) 356 357 self.index_of_element = ( 358 GenericEnvironment(json_ps[ind_key]) 359 if ind_key in json_ps and json_ps[ind_key] is not None 360 else None 361 ) 362 363 self.pending_init_loops = ( 364 GenericEnvironment(json_ps[init_loop_key]) 365 if init_loop_key in json_ps and json_ps[init_loop_key] is not None 366 else None 367 ) 368 369 self.pending_destructors = ( 370 GenericEnvironment(json_ps[dtor_key]) 371 if dtor_key in json_ps and json_ps[dtor_key] is not None 372 else None 373 ) 374 375 376# A deserialized exploded graph node. Has a default constructor because it 377# may be referenced as part of an edge before its contents are deserialized, 378# and in this moment we already need a room for predecessors and successors. 379class ExplodedNode: 380 def __init__(self): 381 self.predecessors = [] 382 self.successors = [] 383 384 def construct(self, node_id, json_node): 385 logging.debug("Adding " + node_id) 386 self.ptr = node_id[4:] 387 self.points = [ProgramPoint(p) for p in json_node["program_points"]] 388 self.node_id = self.points[-1].node_id 389 self.state = ProgramState( 390 json_node["state_id"], 391 json_node["program_state"] 392 if json_node["program_state"] is not None 393 else None, 394 ) 395 396 assert self.node_name() == node_id 397 398 def node_name(self): 399 return "Node" + self.ptr 400 401 402# A deserialized ExplodedGraph. Constructed by consuming a .dot file 403# line-by-line. 404class ExplodedGraph: 405 # Parse .dot files with regular expressions. 406 node_re = re.compile( 407 '^(Node0x[0-9a-f]*) \\[shape=record,.*label="{(.*)\\\\l}"\\];$' 408 ) 409 edge_re = re.compile("^(Node0x[0-9a-f]*) -> (Node0x[0-9a-f]*);$") 410 411 def __init__(self): 412 self.nodes = collections.defaultdict(ExplodedNode) 413 self.root_id = None 414 self.incomplete_line = "" 415 416 def add_raw_line(self, raw_line): 417 if raw_line.startswith("//"): 418 return 419 420 # Allow line breaks by waiting for ';'. This is not valid in 421 # a .dot file, but it is useful for writing tests. 422 if len(raw_line) > 0 and raw_line[-1] != ";": 423 self.incomplete_line += raw_line 424 return 425 raw_line = self.incomplete_line + raw_line 426 self.incomplete_line = "" 427 428 # Apply regexps one by one to see if it's a node or an edge 429 # and extract contents if necessary. 430 logging.debug("Line: " + raw_line) 431 result = self.edge_re.match(raw_line) 432 if result is not None: 433 logging.debug("Classified as edge line.") 434 pred = result.group(1) 435 succ = result.group(2) 436 self.nodes[pred].successors.append(succ) 437 self.nodes[succ].predecessors.append(pred) 438 return 439 result = self.node_re.match(raw_line) 440 if result is not None: 441 logging.debug("Classified as node line.") 442 node_id = result.group(1) 443 if len(self.nodes) == 0: 444 self.root_id = node_id 445 # Note: when writing tests you don't need to escape everything, 446 # even though in a valid dot file everything is escaped. 447 node_label = ( 448 result.group(2) 449 .replace(" ", "") 450 .replace('\\"', '"') 451 .replace("\\{", "{") 452 .replace("\\}", "}") 453 .replace("\\\\", "\\") 454 .replace("\\|", "|") 455 .replace("\\<", "\\\\<") 456 .replace("\\>", "\\\\>") 457 .rstrip(",") 458 ) 459 # Handle `\l` separately because a string literal can be in code 460 # like "string\\literal" with the `\l` inside. 461 # Also on Windows macros __FILE__ produces specific delimiters `\` 462 # and a directory or file may starts with the letter `l`. 463 # Find all `\l` (like `,\l`, `}\l`, `[\l`) except `\\l`, 464 # because the literal as a rule contains multiple `\` before `\l`. 465 node_label = re.sub(r"(?<!\\)\\l", "", node_label) 466 logging.debug(node_label) 467 json_node = json.loads(node_label) 468 self.nodes[node_id].construct(node_id, json_node) 469 return 470 logging.debug("Skipping.") 471 472 473# ===-----------------------------------------------------------------------===# 474# Visitors traverse a deserialized ExplodedGraph and do different things 475# with every node and edge. 476# ===-----------------------------------------------------------------------===# 477 478 479# A visitor that dumps the ExplodedGraph into a DOT file with fancy HTML-based 480# syntax highlighing. 481class DotDumpVisitor: 482 def __init__( 483 self, do_diffs, dark_mode, gray_mode, topo_mode, dump_html_only, dump_dot_only 484 ): 485 assert not (dump_html_only and dump_dot_only), ( 486 "Option dump_html_only and dump_dot_only are conflict, " 487 "they cannot be true at the same time." 488 ) 489 490 self._do_diffs = do_diffs 491 self._dark_mode = dark_mode 492 self._gray_mode = gray_mode 493 self._topo_mode = topo_mode 494 self._dump_html_only = dump_html_only 495 self._dump_dot_only = dump_dot_only 496 self._output = [] 497 498 def _dump_raw(self, s): 499 if self._dump_dot_only: 500 print(s, end="") 501 else: 502 self._output.append(s) 503 504 def output(self): 505 assert not self._dump_dot_only 506 return "".join(self._output) 507 508 def _dump(self, s): 509 s = ( 510 s.replace("&", "&") 511 .replace("{", "\\{") 512 .replace("}", "\\}") 513 .replace("\\<", "<") 514 .replace("\\>", ">") 515 .replace("|", "\\|") 516 ) 517 s = re.sub(r"(?<!\\)\\l", "<br />", s) 518 if self._gray_mode: 519 s = re.sub(r'<font color="[a-z0-9]*">', "", s) 520 s = re.sub(r"</font>", "", s) 521 self._dump_raw(s) 522 523 @staticmethod 524 def _diff_plus_minus(is_added): 525 if is_added is None: 526 return "" 527 if is_added: 528 return '<font color="forestgreen">+</font>' 529 return '<font color="red">-</font>' 530 531 @staticmethod 532 def _short_pretty(s): 533 if s is None: 534 return None 535 if len(s) < 20: 536 return s 537 left = s.find("{") 538 right = s.rfind("}") 539 if left == -1 or right == -1 or left >= right: 540 return s 541 candidate = s[0 : left + 1] + " ... " + s[right:] 542 if len(candidate) >= len(s): 543 return s 544 return candidate 545 546 @staticmethod 547 def _make_sloc(loc): 548 if loc is None: 549 return "<i>Invalid Source Location</i>" 550 551 def make_plain_loc(loc): 552 return "%s:<b>%s</b>:<b>%s</b>" % (loc.filename, loc.line, loc.col) 553 554 if loc.is_macro(): 555 return '%s <font color="royalblue1">' "(<i>spelling at </i> %s)</font>" % ( 556 make_plain_loc(loc), 557 make_plain_loc(loc.spelling), 558 ) 559 560 return make_plain_loc(loc) 561 562 def visit_begin_graph(self, graph): 563 self._graph = graph 564 self._dump_raw('digraph "ExplodedGraph" {\n') 565 if self._dark_mode: 566 self._dump_raw('bgcolor="gray10";\n') 567 self._dump_raw('label="";\n') 568 569 def visit_program_point(self, p): 570 if p.kind in ["Edge", "BlockEntrance", "BlockExit"]: 571 color = "gold3" 572 elif p.kind in ["PreStmtPurgeDeadSymbols", "PostStmtPurgeDeadSymbols"]: 573 color = "red" 574 elif p.kind in ["CallEnter", "CallExitBegin", "CallExitEnd"]: 575 color = "dodgerblue" if self._dark_mode else "blue" 576 elif p.kind in ["Statement"]: 577 color = "cyan4" 578 else: 579 color = "forestgreen" 580 581 self._dump('<tr><td align="left">%s.</td>' % p.node_id) 582 583 if p.kind == "Statement": 584 # This avoids pretty-printing huge statements such as CompoundStmt. 585 # Such statements show up only at [Pre|Post]StmtPurgeDeadSymbols 586 skip_pretty = "PurgeDeadSymbols" in p.stmt_point_kind 587 stmt_color = "cyan3" 588 self._dump( 589 '<td align="left" width="0">%s:</td>' 590 '<td align="left" width="0"><font color="%s">' 591 "%s</font> </td>" 592 '<td align="left"><i>S%s</i></td>' 593 '<td align="left"><font color="%s">%s</font></td>' 594 '<td align="left">%s</td></tr>' 595 % ( 596 self._make_sloc(p.loc), 597 color, 598 "%s (%s)" % (p.stmt_kind, p.cast_kind) 599 if p.cast_kind is not None 600 else p.stmt_kind, 601 p.stmt_id, 602 stmt_color, 603 p.stmt_point_kind, 604 self._short_pretty(p.pretty) if not skip_pretty else "", 605 ) 606 ) 607 elif p.kind == "Edge": 608 self._dump( 609 '<td width="0"></td>' 610 '<td align="left" width="0">' 611 '<font color="%s">%s</font></td><td align="left">' 612 "[B%d] -\\> [B%d]</td></tr>" % (color, "BlockEdge", p.src_id, p.dst_id) 613 ) 614 elif p.kind == "BlockEntrance": 615 self._dump( 616 '<td width="0"></td>' 617 '<td align="left" width="0">' 618 '<font color="%s">%s</font></td>' 619 '<td align="left">[B%d]</td></tr>' % (color, p.kind, p.block_id) 620 ) 621 else: 622 # TODO: Print more stuff for other kinds of points. 623 self._dump( 624 '<td width="0"></td>' 625 '<td align="left" width="0" colspan="2">' 626 '<font color="%s">%s</font></td></tr>' % (color, p.kind) 627 ) 628 629 if p.tag is not None: 630 self._dump( 631 '<tr><td width="0"></td><td width="0"></td>' 632 '<td colspan="3" align="left">' 633 '<b>Tag: </b> <font color="crimson">' 634 "%s</font></td></tr>" % p.tag 635 ) 636 637 if p.has_report: 638 self._dump( 639 '<tr><td width="0"></td><td width="0"></td>' 640 '<td colspan="3" align="left">' 641 '<font color="red"><b>Bug Report Attached' 642 "</b></font></td></tr>" 643 ) 644 if p.is_sink: 645 self._dump( 646 '<tr><td width="0"></td><td width="0"></td>' 647 '<td colspan="3" align="left">' 648 '<font color="cornflowerblue"><b>Sink Node' 649 "</b></font></td></tr>" 650 ) 651 652 def visit_environment(self, e, prev_e=None): 653 self._dump('<table border="0">') 654 655 def dump_location_context(lc, is_added=None): 656 self._dump( 657 "<tr><td>%s</td>" 658 '<td align="left"><b>%s</b></td>' 659 '<td align="left" colspan="2">' 660 '<font color="gray60">%s </font>' 661 "%s</td></tr>" 662 % ( 663 self._diff_plus_minus(is_added), 664 lc.caption, 665 lc.decl, 666 ("(%s)" % self._make_sloc(lc.loc)) if lc.loc is not None else "", 667 ) 668 ) 669 670 def dump_binding(f, b, is_added=None): 671 self._dump( 672 "<tr><td>%s</td>" 673 '<td align="left"><i>S%s</i></td>' 674 "%s" 675 '<td align="left">%s</td>' 676 '<td align="left">%s</td></tr>' 677 % ( 678 self._diff_plus_minus(is_added), 679 b.stmt_id, 680 '<td align="left"><font color="%s"><i>' 681 "%s</i></font></td>" 682 % ( 683 "lavender" if self._dark_mode else "darkgreen", 684 ("(%s)" % b.kind) if b.kind is not None else " ", 685 ), 686 self._short_pretty(b.pretty), 687 f.bindings[b], 688 ) 689 ) 690 691 frames_updated = e.diff_frames(prev_e) if prev_e is not None else None 692 if frames_updated: 693 for i in frames_updated: 694 f = e.frames[i] 695 prev_f = prev_e.frames[i] 696 dump_location_context(f.location_context) 697 bindings_removed, bindings_added = f.diff_bindings(prev_f) 698 for b in bindings_removed: 699 dump_binding(prev_f, b, False) 700 for b in bindings_added: 701 dump_binding(f, b, True) 702 else: 703 for f in e.frames: 704 dump_location_context(f.location_context) 705 for b in f.bindings: 706 dump_binding(f, b) 707 708 self._dump("</table>") 709 710 def visit_environment_in_state(self, selector, title, s, prev_s=None): 711 e = getattr(s, selector) 712 prev_e = getattr(prev_s, selector) if prev_s is not None else None 713 if e is None and prev_e is None: 714 return 715 716 self._dump('<hr /><tr><td align="left"><b>%s: </b>' % title) 717 if e is None: 718 self._dump("<i> Nothing!</i>") 719 else: 720 if prev_e is not None: 721 if e.is_different(prev_e): 722 self._dump('</td></tr><tr><td align="left">') 723 self.visit_environment(e, prev_e) 724 else: 725 self._dump("<i> No changes!</i>") 726 else: 727 self._dump('</td></tr><tr><td align="left">') 728 self.visit_environment(e) 729 730 self._dump("</td></tr>") 731 732 def visit_store(self, s, prev_s=None): 733 self._dump('<table border="0">') 734 735 def dump_binding(s, c, b, is_added=None): 736 self._dump( 737 "<tr><td>%s</td>" 738 '<td align="left">%s</td>' 739 '<td align="left">%s</td>' 740 '<td align="left">%s</td>' 741 '<td align="left">%s</td></tr>' 742 % ( 743 self._diff_plus_minus(is_added), 744 s.clusters[c].base_region, 745 b.offset, 746 "(<i>Default</i>)" if b.kind == "Default" else "", 747 s.clusters[c].bindings[b], 748 ) 749 ) 750 751 if prev_s is not None: 752 clusters_removed, clusters_added, clusters_updated = s.diff_clusters(prev_s) 753 for c in clusters_removed: 754 for b in prev_s.clusters[c].bindings: 755 dump_binding(prev_s, c, b, False) 756 for c in clusters_updated: 757 bindings_removed, bindings_added = s.clusters[c].diff_bindings( 758 prev_s.clusters[c] 759 ) 760 for b in bindings_removed: 761 dump_binding(prev_s, c, b, False) 762 for b in bindings_added: 763 dump_binding(s, c, b, True) 764 for c in clusters_added: 765 for b in s.clusters[c].bindings: 766 dump_binding(s, c, b, True) 767 else: 768 for c in s.clusters: 769 for b in s.clusters[c].bindings: 770 dump_binding(s, c, b) 771 772 self._dump("</table>") 773 774 def visit_store_in_state(self, s, prev_s=None): 775 st = s.store 776 prev_st = prev_s.store if prev_s is not None else None 777 if st is None and prev_st is None: 778 return 779 780 self._dump('<hr /><tr><td align="left"><b>Store: </b>') 781 if st is None: 782 self._dump("<i> Nothing!</i>") 783 else: 784 if self._dark_mode: 785 self._dump(' <font color="gray30">(%s)</font>' % st.ptr) 786 else: 787 self._dump(' <font color="gray">(%s)</font>' % st.ptr) 788 if prev_st is not None: 789 if s.store.is_different(prev_st): 790 self._dump('</td></tr><tr><td align="left">') 791 self.visit_store(st, prev_st) 792 else: 793 self._dump("<i> No changes!</i>") 794 else: 795 self._dump('</td></tr><tr><td align="left">') 796 self.visit_store(st) 797 self._dump("</td></tr>") 798 799 def visit_generic_map(self, m, prev_m=None): 800 self._dump('<table border="0">') 801 802 def dump_pair(m, k, is_added=None): 803 self._dump( 804 "<tr><td>%s</td>" 805 '<td align="left">%s</td>' 806 '<td align="left">%s</td></tr>' 807 % (self._diff_plus_minus(is_added), k, m.generic_map[k]) 808 ) 809 810 if prev_m is not None: 811 removed, added = m.diff(prev_m) 812 for k in removed: 813 dump_pair(prev_m, k, False) 814 for k in added: 815 dump_pair(m, k, True) 816 else: 817 for k in m.generic_map: 818 dump_pair(m, k, None) 819 820 self._dump("</table>") 821 822 def visit_generic_map_in_state(self, selector, title, s, prev_s=None): 823 m = getattr(s, selector) 824 prev_m = getattr(prev_s, selector) if prev_s is not None else None 825 if m is None and prev_m is None: 826 return 827 828 self._dump("<hr />") 829 self._dump('<tr><td align="left">' "<b>%s: </b>" % title) 830 if m is None: 831 self._dump("<i> Nothing!</i>") 832 else: 833 if prev_m is not None: 834 if m.is_different(prev_m): 835 self._dump('</td></tr><tr><td align="left">') 836 self.visit_generic_map(m, prev_m) 837 else: 838 self._dump("<i> No changes!</i>") 839 else: 840 self._dump('</td></tr><tr><td align="left">') 841 self.visit_generic_map(m) 842 843 self._dump("</td></tr>") 844 845 def visit_checker_messages(self, m, prev_m=None): 846 self._dump('<table border="0">') 847 848 def dump_line(l, is_added=None): 849 self._dump( 850 "<tr><td>%s</td>" 851 '<td align="left">%s</td></tr>' % (self._diff_plus_minus(is_added), l) 852 ) 853 854 def dump_chk(chk, is_added=None): 855 dump_line("<i>%s</i>:" % chk, is_added) 856 857 if prev_m is not None: 858 removed, added, updated = m.diff_messages(prev_m) 859 for chk in removed: 860 dump_chk(chk, False) 861 for l in prev_m.items[chk].lines: 862 dump_line(l, False) 863 for chk in updated: 864 dump_chk(chk) 865 for l in m.items[chk].diff_lines(prev_m.items[chk]): 866 dump_line(l[1:], l.startswith("+")) 867 for chk in added: 868 dump_chk(chk, True) 869 for l in m.items[chk].lines: 870 dump_line(l, True) 871 else: 872 for chk in m.items: 873 dump_chk(chk) 874 for l in m.items[chk].lines: 875 dump_line(l) 876 877 self._dump("</table>") 878 879 def visit_checker_messages_in_state(self, s, prev_s=None): 880 m = s.checker_messages 881 prev_m = prev_s.checker_messages if prev_s is not None else None 882 if m is None and prev_m is None: 883 return 884 885 self._dump("<hr />") 886 self._dump('<tr><td align="left">' "<b>Checker State: </b>") 887 if m is None: 888 self._dump("<i> Nothing!</i>") 889 else: 890 if prev_m is not None: 891 if m.is_different(prev_m): 892 self._dump('</td></tr><tr><td align="left">') 893 self.visit_checker_messages(m, prev_m) 894 else: 895 self._dump("<i> No changes!</i>") 896 else: 897 self._dump('</td></tr><tr><td align="left">') 898 self.visit_checker_messages(m) 899 900 self._dump("</td></tr>") 901 902 def visit_state(self, s, prev_s): 903 self.visit_store_in_state(s, prev_s) 904 self.visit_environment_in_state("environment", "Expressions", s, prev_s) 905 self.visit_generic_map_in_state("constraints", "Ranges", s, prev_s) 906 self.visit_generic_map_in_state("dynamic_types", "Dynamic Types", s, prev_s) 907 self.visit_environment_in_state( 908 "constructing_objects", "Objects Under Construction", s, prev_s 909 ) 910 self.visit_environment_in_state( 911 "index_of_element", "Indices Of Elements Under Construction", s, prev_s 912 ) 913 self.visit_environment_in_state( 914 "pending_init_loops", "Pending Array Init Loop Expressions", s, prev_s 915 ) 916 self.visit_environment_in_state( 917 "pending_destructors", "Indices of Elements Under Destruction", s, prev_s 918 ) 919 self.visit_checker_messages_in_state(s, prev_s) 920 921 def visit_node(self, node): 922 self._dump("%s [shape=record," % (node.node_name())) 923 if self._dark_mode: 924 self._dump('color="white",fontcolor="gray80",') 925 self._dump('label=<<table border="0">') 926 927 self._dump( 928 '<tr><td bgcolor="%s"><b>State %s</b></td></tr>' 929 % ( 930 "gray20" if self._dark_mode else "gray70", 931 node.state.state_id if node.state is not None else "Unspecified", 932 ) 933 ) 934 if not self._topo_mode: 935 self._dump('<tr><td align="left" width="0">') 936 if len(node.points) > 1: 937 self._dump("<b>Program points:</b></td></tr>") 938 else: 939 self._dump("<b>Program point:</b></td></tr>") 940 self._dump( 941 '<tr><td align="left" width="0">' 942 '<table border="0" align="left" width="0">' 943 ) 944 for p in node.points: 945 self.visit_program_point(p) 946 self._dump("</table></td></tr>") 947 948 if node.state is not None and not self._topo_mode: 949 prev_s = None 950 # Do diffs only when we have a unique predecessor. 951 # Don't do diffs on the leaf nodes because they're 952 # the important ones. 953 if ( 954 self._do_diffs 955 and len(node.predecessors) == 1 956 and len(node.successors) > 0 957 ): 958 prev_s = self._graph.nodes[node.predecessors[0]].state 959 self.visit_state(node.state, prev_s) 960 self._dump_raw("</table>>];\n") 961 962 def visit_edge(self, pred, succ): 963 self._dump_raw( 964 "%s -> %s%s;\n" 965 % ( 966 pred.node_name(), 967 succ.node_name(), 968 ' [color="white"]' if self._dark_mode else "", 969 ) 970 ) 971 972 def visit_end_of_graph(self): 973 self._dump_raw("}\n") 974 975 if not self._dump_dot_only: 976 import sys 977 import tempfile 978 979 def write_temp_file(suffix, prefix, data): 980 fd, filename = tempfile.mkstemp(suffix, prefix, ".", True) 981 print('Writing "%s"...' % filename) 982 with os.fdopen(fd, "w") as fp: 983 fp.write(data) 984 print("Done! Please remember to remove the file.") 985 return filename 986 987 try: 988 import graphviz 989 except ImportError: 990 # The fallback behavior if graphviz is not installed! 991 print("Python graphviz not found. Please invoke") 992 print(" $ pip install graphviz") 993 print("in order to enable automatic conversion to HTML.") 994 print() 995 print("You may also convert DOT to SVG manually via") 996 print(" $ dot -Tsvg input.dot -o output.svg") 997 print() 998 write_temp_file(".dot", "egraph-", self.output()) 999 return 1000 1001 svg = graphviz.pipe("dot", "svg", self.output().encode()).decode() 1002 1003 filename = write_temp_file( 1004 ".html", 1005 "egraph-", 1006 '<html><body bgcolor="%s">%s</body></html>' 1007 % ("#1a1a1a" if self._dark_mode else "white", svg), 1008 ) 1009 if self._dump_html_only: 1010 return 1011 if sys.platform == "win32": 1012 os.startfile(filename) 1013 elif sys.platform == "darwin": 1014 os.system('open "%s"' % filename) 1015 else: 1016 os.system('xdg-open "%s"' % filename) 1017 1018 1019# ===-----------------------------------------------------------------------===# 1020# Explorers know how to traverse the ExplodedGraph in a certain order. 1021# They would invoke a Visitor on every node or edge they encounter. 1022# ===-----------------------------------------------------------------------===# 1023 1024 1025# BasicExplorer explores the whole graph in no particular order. 1026class BasicExplorer: 1027 def explore(self, graph, visitor): 1028 visitor.visit_begin_graph(graph) 1029 for node in sorted(graph.nodes): 1030 logging.debug("Visiting " + node) 1031 visitor.visit_node(graph.nodes[node]) 1032 for succ in sorted(graph.nodes[node].successors): 1033 logging.debug("Visiting edge: %s -> %s " % (node, succ)) 1034 visitor.visit_edge(graph.nodes[node], graph.nodes[succ]) 1035 visitor.visit_end_of_graph() 1036 1037 1038# ===-----------------------------------------------------------------------===# 1039# Trimmers cut out parts of the ExplodedGraph so that to focus on other parts. 1040# Trimmers can be combined together by applying them sequentially. 1041# ===-----------------------------------------------------------------------===# 1042 1043 1044# SinglePathTrimmer keeps only a single path - the leftmost path from the root. 1045# Useful when the trimmed graph is still too large. 1046class SinglePathTrimmer: 1047 def trim(self, graph): 1048 visited_nodes = set() 1049 node_id = graph.root_id 1050 while True: 1051 visited_nodes.add(node_id) 1052 node = graph.nodes[node_id] 1053 if len(node.successors) > 0: 1054 succ_id = node.successors[0] 1055 succ = graph.nodes[succ_id] 1056 node.successors = [succ_id] 1057 succ.predecessors = [node_id] 1058 if succ_id in visited_nodes: 1059 break 1060 node_id = succ_id 1061 else: 1062 break 1063 graph.nodes = {node_id: graph.nodes[node_id] for node_id in visited_nodes} 1064 1065 1066# TargetedTrimmer keeps paths that lead to specific nodes and discards all 1067# other paths. Useful when you cannot use -trim-egraph (e.g. when debugging 1068# a crash). 1069class TargetedTrimmer: 1070 def __init__(self, target_nodes): 1071 self._target_nodes = target_nodes 1072 1073 @staticmethod 1074 def parse_target_node(node, graph): 1075 if node.startswith("0x"): 1076 ret = "Node" + node 1077 assert ret in graph.nodes 1078 return ret 1079 else: 1080 for other_id in graph.nodes: 1081 other = graph.nodes[other_id] 1082 if other.node_id == int(node): 1083 return other_id 1084 1085 @staticmethod 1086 def parse_target_nodes(target_nodes, graph): 1087 return [ 1088 TargetedTrimmer.parse_target_node(node, graph) 1089 for node in target_nodes.split(",") 1090 ] 1091 1092 def trim(self, graph): 1093 queue = self._target_nodes 1094 visited_nodes = set() 1095 1096 while len(queue) > 0: 1097 node_id = queue.pop() 1098 visited_nodes.add(node_id) 1099 node = graph.nodes[node_id] 1100 for pred_id in node.predecessors: 1101 if pred_id not in visited_nodes: 1102 queue.append(pred_id) 1103 graph.nodes = {node_id: graph.nodes[node_id] for node_id in visited_nodes} 1104 for node_id in graph.nodes: 1105 node = graph.nodes[node_id] 1106 node.successors = [ 1107 succ_id for succ_id in node.successors if succ_id in visited_nodes 1108 ] 1109 node.predecessors = [ 1110 succ_id for succ_id in node.predecessors if succ_id in visited_nodes 1111 ] 1112 1113 1114# ===-----------------------------------------------------------------------===# 1115# The entry point to the script. 1116# ===-----------------------------------------------------------------------===# 1117 1118 1119def main(): 1120 parser = argparse.ArgumentParser( 1121 description="Display and manipulate Exploded Graph dumps." 1122 ) 1123 parser.add_argument( 1124 "filename", type=str, help="the .dot file produced by the Static Analyzer" 1125 ) 1126 parser.add_argument( 1127 "-v", 1128 "--verbose", 1129 action="store_const", 1130 dest="loglevel", 1131 const=logging.DEBUG, 1132 default=logging.WARNING, 1133 help="enable info prints", 1134 ) 1135 parser.add_argument( 1136 "-d", 1137 "--diff", 1138 action="store_const", 1139 dest="diff", 1140 const=True, 1141 default=False, 1142 help="display differences between states", 1143 ) 1144 parser.add_argument( 1145 "-t", 1146 "--topology", 1147 action="store_const", 1148 dest="topology", 1149 const=True, 1150 default=False, 1151 help="only display program points, omit states", 1152 ) 1153 parser.add_argument( 1154 "-s", 1155 "--single-path", 1156 action="store_const", 1157 dest="single_path", 1158 const=True, 1159 default=False, 1160 help="only display the leftmost path in the graph " 1161 "(useful for trimmed graphs that still " 1162 "branch too much)", 1163 ) 1164 parser.add_argument( 1165 "--to", 1166 type=str, 1167 default=None, 1168 help="only display execution paths from the root " 1169 "to the given comma-separated list of nodes " 1170 "identified by a pointer or a stable ID; " 1171 "compatible with --single-path", 1172 ) 1173 parser.add_argument( 1174 "--dark", 1175 action="store_const", 1176 dest="dark", 1177 const=True, 1178 default=False, 1179 help="dark mode", 1180 ) 1181 parser.add_argument( 1182 "--gray", 1183 action="store_const", 1184 dest="gray", 1185 const=True, 1186 default=False, 1187 help="black-and-white mode", 1188 ) 1189 dump_conflict = parser.add_mutually_exclusive_group() 1190 dump_conflict.add_argument( 1191 "--dump-html-only", 1192 action="store_const", 1193 dest="dump_html_only", 1194 const=True, 1195 default=False, 1196 help="dump the rewritten egraph to a temporary HTML file, " 1197 "but do not open it immediately as by default", 1198 ) 1199 dump_conflict.add_argument( 1200 "--dump-dot-only", 1201 action="store_const", 1202 dest="dump_dot_only", 1203 const=True, 1204 default=False, 1205 help="instead of writing an HTML file and immediately " 1206 "displaying it, dump the rewritten dot file " 1207 "to stdout", 1208 ) 1209 args = parser.parse_args() 1210 logging.basicConfig(level=args.loglevel) 1211 1212 graph = ExplodedGraph() 1213 with open(args.filename) as fd: 1214 for raw_line in fd: 1215 raw_line = raw_line.strip() 1216 graph.add_raw_line(raw_line) 1217 1218 trimmers = [] 1219 if args.to is not None: 1220 trimmers.append( 1221 TargetedTrimmer(TargetedTrimmer.parse_target_nodes(args.to, graph)) 1222 ) 1223 if args.single_path: 1224 trimmers.append(SinglePathTrimmer()) 1225 1226 explorer = BasicExplorer() 1227 1228 visitor = DotDumpVisitor( 1229 args.diff, 1230 args.dark, 1231 args.gray, 1232 args.topology, 1233 args.dump_html_only, 1234 args.dump_dot_only, 1235 ) 1236 1237 for trimmer in trimmers: 1238 trimmer.trim(graph) 1239 1240 explorer.explore(graph, visitor) 1241 1242 1243if __name__ == "__main__": 1244 main() 1245