1#!/usr/bin/env python 2# 3# ===- exploded-graph-rewriter.py - ExplodedGraph dump tool -----*- python -*--# 4# 5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 6# See https://llvm.org/LICENSE.txt for license information. 7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 8# 9# ===-----------------------------------------------------------------------===# 10 11 12from __future__ import print_function 13 14import argparse 15import collections 16import difflib 17import json 18import logging 19import os 20import re 21 22 23# ===-----------------------------------------------------------------------===# 24# These data structures represent a deserialized ExplodedGraph. 25# ===-----------------------------------------------------------------------===# 26 27 28# A helper function for finding the difference between two dictionaries. 29def diff_dicts(curr, prev): 30 removed = [k for k in prev if k not in curr or curr[k] != prev[k]] 31 added = [k for k in curr if k not in prev or curr[k] != prev[k]] 32 return (removed, added) 33 34 35# Represents any program state trait that is a dictionary of key-value pairs. 36class GenericMap: 37 def __init__(self, items): 38 self.generic_map = collections.OrderedDict(items) 39 40 def diff(self, prev): 41 return diff_dicts(self.generic_map, prev.generic_map) 42 43 def is_different(self, prev): 44 removed, added = self.diff(prev) 45 return len(removed) != 0 or len(added) != 0 46 47 48# A deserialized source location. 49class SourceLocation: 50 def __init__(self, json_loc): 51 logging.debug("json: %s" % json_loc) 52 self.line = json_loc["line"] 53 self.col = json_loc["column"] 54 self.filename = ( 55 os.path.basename(json_loc["file"]) if "file" in json_loc else "(main file)" 56 ) 57 self.spelling = ( 58 SourceLocation(json_loc["spelling"]) if "spelling" in json_loc else None 59 ) 60 61 def is_macro(self): 62 return self.spelling is not None 63 64 65# A deserialized program point. 66class ProgramPoint: 67 def __init__(self, json_pp): 68 self.kind = json_pp["kind"] 69 self.tag = json_pp["tag"] 70 self.node_id = json_pp["node_id"] 71 self.is_sink = bool(json_pp["is_sink"]) 72 self.has_report = bool(json_pp["has_report"]) 73 if self.kind == "Edge": 74 self.src_id = json_pp["src_id"] 75 self.dst_id = json_pp["dst_id"] 76 elif self.kind == "Statement": 77 logging.debug(json_pp) 78 self.stmt_kind = json_pp["stmt_kind"] 79 self.cast_kind = json_pp["cast_kind"] if "cast_kind" in json_pp else None 80 self.stmt_point_kind = json_pp["stmt_point_kind"] 81 self.stmt_id = json_pp["stmt_id"] 82 self.pointer = json_pp["pointer"] 83 self.pretty = json_pp["pretty"] 84 self.loc = ( 85 SourceLocation(json_pp["location"]) 86 if json_pp["location"] is not None 87 else None 88 ) 89 elif self.kind == "CallEnter": 90 self.callee_decl = json_pp.get("callee_decl", "None") 91 elif self.kind == "BlockEntrance": 92 self.block_id = json_pp["block_id"] 93 94 95# A single expression acting as a key in a deserialized Environment. 96class EnvironmentBindingKey: 97 def __init__(self, json_ek): 98 # CXXCtorInitializer is not a Stmt! 99 self.stmt_id = ( 100 json_ek["stmt_id"] if "stmt_id" in json_ek else json_ek["init_id"] 101 ) 102 self.pretty = json_ek["pretty"] 103 self.kind = json_ek["kind"] if "kind" in json_ek else None 104 105 def _key(self): 106 return self.stmt_id 107 108 def __eq__(self, other): 109 return self._key() == other._key() 110 111 def __hash__(self): 112 return hash(self._key()) 113 114 115# Deserialized description of a location context. 116class LocationContext: 117 def __init__(self, json_frame): 118 self.lctx_id = json_frame["lctx_id"] 119 self.caption = json_frame["location_context"] 120 self.decl = json_frame["calling"] 121 self.loc = ( 122 SourceLocation(json_frame["location"]) 123 if json_frame["location"] is not None 124 else None 125 ) 126 127 def _key(self): 128 return self.lctx_id 129 130 def __eq__(self, other): 131 return self._key() == other._key() 132 133 def __hash__(self): 134 return hash(self._key()) 135 136 137# A group of deserialized Environment bindings that correspond to a specific 138# location context. 139class EnvironmentFrame: 140 def __init__(self, json_frame): 141 self.location_context = LocationContext(json_frame) 142 self.bindings = collections.OrderedDict( 143 [(EnvironmentBindingKey(b), b["value"]) for b in json_frame["items"]] 144 if json_frame["items"] is not None 145 else [] 146 ) 147 148 def diff_bindings(self, prev): 149 return diff_dicts(self.bindings, prev.bindings) 150 151 def is_different(self, prev): 152 removed, added = self.diff_bindings(prev) 153 return len(removed) != 0 or len(added) != 0 154 155 156# A deserialized Environment. This class can also hold other entities that 157# are similar to Environment, such as Objects Under Construction or 158# Indices Of Elements Under Construction. 159class GenericEnvironment: 160 def __init__(self, json_e): 161 self.frames = [EnvironmentFrame(f) for f in json_e] 162 163 def diff_frames(self, prev): 164 # TODO: It's difficult to display a good diff when frame numbers shift. 165 if len(self.frames) != len(prev.frames): 166 return None 167 168 updated = [] 169 for i in range(len(self.frames)): 170 f = self.frames[i] 171 prev_f = prev.frames[i] 172 if f.location_context == prev_f.location_context: 173 if f.is_different(prev_f): 174 updated.append(i) 175 else: 176 # We have the whole frame replaced with another frame. 177 # TODO: Produce a nice diff. 178 return None 179 180 # TODO: Add support for added/removed. 181 return updated 182 183 def is_different(self, prev): 184 updated = self.diff_frames(prev) 185 return updated is None or len(updated) > 0 186 187 188# A single binding key in a deserialized RegionStore cluster. 189class StoreBindingKey: 190 def __init__(self, json_sk): 191 self.kind = json_sk["kind"] 192 self.offset = json_sk["offset"] 193 194 def _key(self): 195 return (self.kind, self.offset) 196 197 def __eq__(self, other): 198 return self._key() == other._key() 199 200 def __hash__(self): 201 return hash(self._key()) 202 203 204# A single cluster of the deserialized RegionStore. 205class StoreCluster: 206 def __init__(self, json_sc): 207 self.base_region = json_sc["cluster"] 208 self.bindings = collections.OrderedDict( 209 [(StoreBindingKey(b), b["value"]) for b in json_sc["items"]] 210 ) 211 212 def diff_bindings(self, prev): 213 return diff_dicts(self.bindings, prev.bindings) 214 215 def is_different(self, prev): 216 removed, added = self.diff_bindings(prev) 217 return len(removed) != 0 or len(added) != 0 218 219 220# A deserialized RegionStore. 221class Store: 222 def __init__(self, json_s): 223 self.ptr = json_s["pointer"] 224 self.clusters = collections.OrderedDict( 225 [(c["pointer"], StoreCluster(c)) for c in json_s["items"]] 226 ) 227 228 def diff_clusters(self, prev): 229 removed = [k for k in prev.clusters if k not in self.clusters] 230 added = [k for k in self.clusters if k not in prev.clusters] 231 updated = [ 232 k 233 for k in prev.clusters 234 if k in self.clusters and prev.clusters[k].is_different(self.clusters[k]) 235 ] 236 return (removed, added, updated) 237 238 def is_different(self, prev): 239 removed, added, updated = self.diff_clusters(prev) 240 return len(removed) != 0 or len(added) != 0 or len(updated) != 0 241 242 243# Deserialized messages from a single checker in a single program state. 244# Basically a list of raw strings. 245class CheckerLines: 246 def __init__(self, json_lines): 247 self.lines = json_lines 248 249 def diff_lines(self, prev): 250 lines = difflib.ndiff(prev.lines, self.lines) 251 return [l.strip() for l in lines if l.startswith("+") or l.startswith("-")] 252 253 def is_different(self, prev): 254 return len(self.diff_lines(prev)) > 0 255 256 257# Deserialized messages of all checkers, separated by checker. 258class CheckerMessages: 259 def __init__(self, json_m): 260 self.items = collections.OrderedDict( 261 [(m["checker"], CheckerLines(m["messages"])) for m in json_m] 262 ) 263 264 def diff_messages(self, prev): 265 removed = [k for k in prev.items if k not in self.items] 266 added = [k for k in self.items if k not in prev.items] 267 updated = [ 268 k 269 for k in prev.items 270 if k in self.items and prev.items[k].is_different(self.items[k]) 271 ] 272 return (removed, added, updated) 273 274 def is_different(self, prev): 275 removed, added, updated = self.diff_messages(prev) 276 return len(removed) != 0 or len(added) != 0 or len(updated) != 0 277 278 279# A deserialized program state. 280class ProgramState: 281 def __init__(self, state_id, json_ps): 282 logging.debug("Adding ProgramState " + str(state_id)) 283 284 store_key = "store" 285 env_key = "environment" 286 constraints_key = "constraints" 287 dyn_ty_key = "dynamic_types" 288 ctor_key = "constructing_objects" 289 ind_key = "index_of_element" 290 init_loop_key = "pending_init_loops" 291 dtor_key = "pending_destructors" 292 msg_key = "checker_messages" 293 294 if json_ps is None: 295 json_ps = { 296 store_key: None, 297 env_key: None, 298 constraints_key: None, 299 dyn_ty_key: None, 300 ctor_key: None, 301 ind_key: None, 302 init_loop_key: None, 303 dtor_key: None, 304 msg_key: None, 305 } 306 307 self.state_id = state_id 308 309 self.store = ( 310 Store(json_ps[store_key]) if json_ps[store_key] is not None else None 311 ) 312 313 self.environment = ( 314 GenericEnvironment(json_ps[env_key]["items"]) 315 if json_ps[env_key] is not None 316 else None 317 ) 318 319 self.constraints = ( 320 GenericMap([(c["symbol"], c["range"]) for c in json_ps[constraints_key]]) 321 if json_ps[constraints_key] is not None 322 else None 323 ) 324 325 self.dynamic_types = ( 326 GenericMap( 327 [ 328 ( 329 t["region"], 330 "%s%s" 331 % ( 332 t["dyn_type"], 333 " (or a sub-class)" if t["sub_classable"] else "", 334 ), 335 ) 336 for t in json_ps[dyn_ty_key] 337 ] 338 ) 339 if json_ps[dyn_ty_key] is not None 340 else None 341 ) 342 343 self.checker_messages = ( 344 CheckerMessages(json_ps[msg_key]) if json_ps[msg_key] is not None else None 345 ) 346 347 # State traits 348 # 349 # For traits we always check if a key exists because if a trait 350 # has no imformation, nothing will be printed in the .dot file 351 # we parse. 352 353 self.constructing_objects = ( 354 GenericEnvironment(json_ps[ctor_key]) 355 if ctor_key in json_ps and json_ps[ctor_key] is not None 356 else None 357 ) 358 359 self.index_of_element = ( 360 GenericEnvironment(json_ps[ind_key]) 361 if ind_key in json_ps and json_ps[ind_key] is not None 362 else None 363 ) 364 365 self.pending_init_loops = ( 366 GenericEnvironment(json_ps[init_loop_key]) 367 if init_loop_key in json_ps and json_ps[init_loop_key] is not None 368 else None 369 ) 370 371 self.pending_destructors = ( 372 GenericEnvironment(json_ps[dtor_key]) 373 if dtor_key in json_ps and json_ps[dtor_key] is not None 374 else None 375 ) 376 377 378# A deserialized exploded graph node. Has a default constructor because it 379# may be referenced as part of an edge before its contents are deserialized, 380# and in this moment we already need a room for predecessors and successors. 381class ExplodedNode: 382 def __init__(self): 383 self.predecessors = [] 384 self.successors = [] 385 386 def construct(self, node_id, json_node): 387 logging.debug("Adding " + node_id) 388 self.ptr = node_id[4:] 389 self.points = [ProgramPoint(p) for p in json_node["program_points"]] 390 self.node_id = self.points[-1].node_id 391 self.state = ProgramState( 392 json_node["state_id"], 393 json_node["program_state"] 394 if json_node["program_state"] is not None 395 else None, 396 ) 397 398 assert self.node_name() == node_id 399 400 def node_name(self): 401 return "Node" + self.ptr 402 403 404# A deserialized ExplodedGraph. Constructed by consuming a .dot file 405# line-by-line. 406class ExplodedGraph: 407 # Parse .dot files with regular expressions. 408 node_re = re.compile( 409 '^(Node0x[0-9a-f]*) \\[shape=record,.*label="{(.*)\\\\l}"\\];$' 410 ) 411 edge_re = re.compile("^(Node0x[0-9a-f]*) -> (Node0x[0-9a-f]*);$") 412 413 def __init__(self): 414 self.nodes = collections.defaultdict(ExplodedNode) 415 self.root_id = None 416 self.incomplete_line = "" 417 418 def add_raw_line(self, raw_line): 419 if raw_line.startswith("//"): 420 return 421 422 # Allow line breaks by waiting for ';'. This is not valid in 423 # a .dot file, but it is useful for writing tests. 424 if len(raw_line) > 0 and raw_line[-1] != ";": 425 self.incomplete_line += raw_line 426 return 427 raw_line = self.incomplete_line + raw_line 428 self.incomplete_line = "" 429 430 # Apply regexps one by one to see if it's a node or an edge 431 # and extract contents if necessary. 432 logging.debug("Line: " + raw_line) 433 result = self.edge_re.match(raw_line) 434 if result is not None: 435 logging.debug("Classified as edge line.") 436 pred = result.group(1) 437 succ = result.group(2) 438 self.nodes[pred].successors.append(succ) 439 self.nodes[succ].predecessors.append(pred) 440 return 441 result = self.node_re.match(raw_line) 442 if result is not None: 443 logging.debug("Classified as node line.") 444 node_id = result.group(1) 445 if len(self.nodes) == 0: 446 self.root_id = node_id 447 # Note: when writing tests you don't need to escape everything, 448 # even though in a valid dot file everything is escaped. 449 node_label = ( 450 result.group(2) 451 .replace(" ", "") 452 .replace('\\"', '"') 453 .replace("\\{", "{") 454 .replace("\\}", "}") 455 .replace("\\\\", "\\") 456 .replace("\\|", "|") 457 .replace("\\<", "\\\\<") 458 .replace("\\>", "\\\\>") 459 .rstrip(",") 460 ) 461 # Handle `\l` separately because a string literal can be in code 462 # like "string\\literal" with the `\l` inside. 463 # Also on Windows macros __FILE__ produces specific delimiters `\` 464 # and a directory or file may starts with the letter `l`. 465 # Find all `\l` (like `,\l`, `}\l`, `[\l`) except `\\l`, 466 # because the literal as a rule contains multiple `\` before `\l`. 467 node_label = re.sub(r"(?<!\\)\\l", "", node_label) 468 logging.debug(node_label) 469 json_node = json.loads(node_label) 470 self.nodes[node_id].construct(node_id, json_node) 471 return 472 logging.debug("Skipping.") 473 474 475# ===-----------------------------------------------------------------------===# 476# Visitors traverse a deserialized ExplodedGraph and do different things 477# with every node and edge. 478# ===-----------------------------------------------------------------------===# 479 480 481# A visitor that dumps the ExplodedGraph into a DOT file with fancy HTML-based 482# syntax highlighing. 483class DotDumpVisitor: 484 def __init__( 485 self, do_diffs, dark_mode, gray_mode, topo_mode, dump_html_only, dump_dot_only 486 ): 487 assert not (dump_html_only and dump_dot_only), ( 488 "Option dump_html_only and dump_dot_only are conflict, " 489 "they cannot be true at the same time." 490 ) 491 492 self._do_diffs = do_diffs 493 self._dark_mode = dark_mode 494 self._gray_mode = gray_mode 495 self._topo_mode = topo_mode 496 self._dump_html_only = dump_html_only 497 self._dump_dot_only = dump_dot_only 498 self._output = [] 499 500 def _dump_raw(self, s): 501 if self._dump_dot_only: 502 print(s, end="") 503 else: 504 self._output.append(s) 505 506 def output(self): 507 assert not self._dump_dot_only 508 return "".join(self._output) 509 510 def _dump(self, s): 511 s = ( 512 s.replace("&", "&") 513 .replace("{", "\\{") 514 .replace("}", "\\}") 515 .replace("\\<", "<") 516 .replace("\\>", ">") 517 .replace("|", "\\|") 518 ) 519 s = re.sub(r"(?<!\\)\\l", "<br />", s) 520 if self._gray_mode: 521 s = re.sub(r'<font color="[a-z0-9]*">', "", s) 522 s = re.sub(r"</font>", "", s) 523 self._dump_raw(s) 524 525 @staticmethod 526 def _diff_plus_minus(is_added): 527 if is_added is None: 528 return "" 529 if is_added: 530 return '<font color="forestgreen">+</font>' 531 return '<font color="red">-</font>' 532 533 @staticmethod 534 def _short_pretty(s): 535 if s is None: 536 return None 537 if len(s) < 20: 538 return s 539 left = s.find("{") 540 right = s.rfind("}") 541 if left == -1 or right == -1 or left >= right: 542 return s 543 candidate = s[0 : left + 1] + " ... " + s[right:] 544 if len(candidate) >= len(s): 545 return s 546 return candidate 547 548 @staticmethod 549 def _make_sloc(loc): 550 if loc is None: 551 return "<i>Invalid Source Location</i>" 552 553 def make_plain_loc(loc): 554 return "%s:<b>%s</b>:<b>%s</b>" % (loc.filename, loc.line, loc.col) 555 556 if loc.is_macro(): 557 return '%s <font color="royalblue1">' "(<i>spelling at </i> %s)</font>" % ( 558 make_plain_loc(loc), 559 make_plain_loc(loc.spelling), 560 ) 561 562 return make_plain_loc(loc) 563 564 def visit_begin_graph(self, graph): 565 self._graph = graph 566 self._dump_raw('digraph "ExplodedGraph" {\n') 567 if self._dark_mode: 568 self._dump_raw('bgcolor="gray10";\n') 569 self._dump_raw('label="";\n') 570 571 def visit_program_point(self, p): 572 if p.kind in ["Edge", "BlockEntrance", "BlockExit"]: 573 color = "gold3" 574 elif p.kind in ["PreStmtPurgeDeadSymbols", "PostStmtPurgeDeadSymbols"]: 575 color = "red" 576 elif p.kind in ["CallEnter", "CallExitBegin", "CallExitEnd"]: 577 color = "dodgerblue" if self._dark_mode else "blue" 578 elif p.kind in ["Statement"]: 579 color = "cyan4" 580 else: 581 color = "forestgreen" 582 583 self._dump('<tr><td align="left">%s.</td>' % p.node_id) 584 585 if p.kind == "Statement": 586 # This avoids pretty-printing huge statements such as CompoundStmt. 587 # Such statements show up only at [Pre|Post]StmtPurgeDeadSymbols 588 skip_pretty = "PurgeDeadSymbols" in p.stmt_point_kind 589 stmt_color = "cyan3" 590 self._dump( 591 '<td align="left" width="0">%s:</td>' 592 '<td align="left" width="0"><font color="%s">' 593 "%s</font> </td>" 594 '<td align="left"><i>S%s</i></td>' 595 '<td align="left"><font color="%s">%s</font></td>' 596 '<td align="left">%s</td></tr>' 597 % ( 598 self._make_sloc(p.loc), 599 color, 600 "%s (%s)" % (p.stmt_kind, p.cast_kind) 601 if p.cast_kind is not None 602 else p.stmt_kind, 603 p.stmt_id, 604 stmt_color, 605 p.stmt_point_kind, 606 self._short_pretty(p.pretty) if not skip_pretty else "", 607 ) 608 ) 609 elif p.kind == "Edge": 610 self._dump( 611 '<td width="0"></td>' 612 '<td align="left" width="0">' 613 '<font color="%s">%s</font></td><td align="left">' 614 "[B%d] -\\> [B%d]</td></tr>" % (color, "BlockEdge", p.src_id, p.dst_id) 615 ) 616 elif p.kind == "BlockEntrance": 617 self._dump( 618 '<td width="0"></td>' 619 '<td align="left" width="0">' 620 '<font color="%s">%s</font></td>' 621 '<td align="left">[B%d]</td></tr>' % (color, p.kind, p.block_id) 622 ) 623 elif p.kind == "CallEnter": 624 self._dump( 625 '<td width="0"></td>' 626 '<td align="left" width="0">' 627 '<font color="%s">%s</font></td>' 628 '<td align="left">%s</td></tr>' % (color, p.kind, p.callee_decl) 629 ) 630 else: 631 # TODO: Print more stuff for other kinds of points. 632 self._dump( 633 '<td width="0"></td>' 634 '<td align="left" width="0" colspan="2">' 635 '<font color="%s">%s</font></td></tr>' % (color, p.kind) 636 ) 637 638 if p.tag is not None: 639 self._dump( 640 '<tr><td width="0"></td><td width="0"></td>' 641 '<td colspan="3" align="left">' 642 '<b>Tag: </b> <font color="crimson">' 643 "%s</font></td></tr>" % p.tag 644 ) 645 646 if p.has_report: 647 self._dump( 648 '<tr><td width="0"></td><td width="0"></td>' 649 '<td colspan="3" align="left">' 650 '<font color="red"><b>Bug Report Attached' 651 "</b></font></td></tr>" 652 ) 653 if p.is_sink: 654 self._dump( 655 '<tr><td width="0"></td><td width="0"></td>' 656 '<td colspan="3" align="left">' 657 '<font color="cornflowerblue"><b>Sink Node' 658 "</b></font></td></tr>" 659 ) 660 661 def visit_environment(self, e, prev_e=None): 662 self._dump('<table border="0">') 663 664 def dump_location_context(lc, is_added=None): 665 self._dump( 666 "<tr><td>%s</td>" 667 '<td align="left"><b>%s</b></td>' 668 '<td align="left" colspan="2">' 669 '<font color="gray60">%s </font>' 670 "%s</td></tr>" 671 % ( 672 self._diff_plus_minus(is_added), 673 lc.caption, 674 lc.decl, 675 ("(%s)" % self._make_sloc(lc.loc)) if lc.loc is not None else "", 676 ) 677 ) 678 679 def dump_binding(f, b, is_added=None): 680 self._dump( 681 "<tr><td>%s</td>" 682 '<td align="left"><i>S%s</i></td>' 683 "%s" 684 '<td align="left">%s</td>' 685 '<td align="left">%s</td></tr>' 686 % ( 687 self._diff_plus_minus(is_added), 688 b.stmt_id, 689 '<td align="left"><font color="%s"><i>' 690 "%s</i></font></td>" 691 % ( 692 "lavender" if self._dark_mode else "darkgreen", 693 ("(%s)" % b.kind) if b.kind is not None else " ", 694 ), 695 self._short_pretty(b.pretty), 696 f.bindings[b], 697 ) 698 ) 699 700 frames_updated = e.diff_frames(prev_e) if prev_e is not None else None 701 if frames_updated: 702 for i in frames_updated: 703 f = e.frames[i] 704 prev_f = prev_e.frames[i] 705 dump_location_context(f.location_context) 706 bindings_removed, bindings_added = f.diff_bindings(prev_f) 707 for b in bindings_removed: 708 dump_binding(prev_f, b, False) 709 for b in bindings_added: 710 dump_binding(f, b, True) 711 else: 712 for f in e.frames: 713 dump_location_context(f.location_context) 714 for b in f.bindings: 715 dump_binding(f, b) 716 717 self._dump("</table>") 718 719 def visit_environment_in_state(self, selector, title, s, prev_s=None): 720 e = getattr(s, selector) 721 prev_e = getattr(prev_s, selector) if prev_s is not None else None 722 if e is None and prev_e is None: 723 return 724 725 self._dump('<hr /><tr><td align="left"><b>%s: </b>' % title) 726 if e is None: 727 self._dump("<i> Nothing!</i>") 728 else: 729 if prev_e is not None: 730 if e.is_different(prev_e): 731 self._dump('</td></tr><tr><td align="left">') 732 self.visit_environment(e, prev_e) 733 else: 734 self._dump("<i> No changes!</i>") 735 else: 736 self._dump('</td></tr><tr><td align="left">') 737 self.visit_environment(e) 738 739 self._dump("</td></tr>") 740 741 def visit_store(self, s, prev_s=None): 742 self._dump('<table border="0">') 743 744 def dump_binding(s, c, b, is_added=None): 745 self._dump( 746 "<tr><td>%s</td>" 747 '<td align="left">%s</td>' 748 '<td align="left">%s</td>' 749 '<td align="left">%s</td>' 750 '<td align="left">%s</td></tr>' 751 % ( 752 self._diff_plus_minus(is_added), 753 s.clusters[c].base_region, 754 b.offset, 755 "(<i>Default</i>)" if b.kind == "Default" else "", 756 s.clusters[c].bindings[b], 757 ) 758 ) 759 760 if prev_s is not None: 761 clusters_removed, clusters_added, clusters_updated = s.diff_clusters(prev_s) 762 for c in clusters_removed: 763 for b in prev_s.clusters[c].bindings: 764 dump_binding(prev_s, c, b, False) 765 for c in clusters_updated: 766 bindings_removed, bindings_added = s.clusters[c].diff_bindings( 767 prev_s.clusters[c] 768 ) 769 for b in bindings_removed: 770 dump_binding(prev_s, c, b, False) 771 for b in bindings_added: 772 dump_binding(s, c, b, True) 773 for c in clusters_added: 774 for b in s.clusters[c].bindings: 775 dump_binding(s, c, b, True) 776 else: 777 for c in s.clusters: 778 for b in s.clusters[c].bindings: 779 dump_binding(s, c, b) 780 781 self._dump("</table>") 782 783 def visit_store_in_state(self, s, prev_s=None): 784 st = s.store 785 prev_st = prev_s.store if prev_s is not None else None 786 if st is None and prev_st is None: 787 return 788 789 self._dump('<hr /><tr><td align="left"><b>Store: </b>') 790 if st is None: 791 self._dump("<i> Nothing!</i>") 792 else: 793 if self._dark_mode: 794 self._dump(' <font color="gray30">(%s)</font>' % st.ptr) 795 else: 796 self._dump(' <font color="gray">(%s)</font>' % st.ptr) 797 if prev_st is not None: 798 if s.store.is_different(prev_st): 799 self._dump('</td></tr><tr><td align="left">') 800 self.visit_store(st, prev_st) 801 else: 802 self._dump("<i> No changes!</i>") 803 else: 804 self._dump('</td></tr><tr><td align="left">') 805 self.visit_store(st) 806 self._dump("</td></tr>") 807 808 def visit_generic_map(self, m, prev_m=None): 809 self._dump('<table border="0">') 810 811 def dump_pair(m, k, is_added=None): 812 self._dump( 813 "<tr><td>%s</td>" 814 '<td align="left">%s</td>' 815 '<td align="left">%s</td></tr>' 816 % (self._diff_plus_minus(is_added), k, m.generic_map[k]) 817 ) 818 819 if prev_m is not None: 820 removed, added = m.diff(prev_m) 821 for k in removed: 822 dump_pair(prev_m, k, False) 823 for k in added: 824 dump_pair(m, k, True) 825 else: 826 for k in m.generic_map: 827 dump_pair(m, k, None) 828 829 self._dump("</table>") 830 831 def visit_generic_map_in_state(self, selector, title, s, prev_s=None): 832 m = getattr(s, selector) 833 prev_m = getattr(prev_s, selector) if prev_s is not None else None 834 if m is None and prev_m is None: 835 return 836 837 self._dump("<hr />") 838 self._dump('<tr><td align="left">' "<b>%s: </b>" % title) 839 if m is None: 840 self._dump("<i> Nothing!</i>") 841 else: 842 if prev_m is not None: 843 if m.is_different(prev_m): 844 self._dump('</td></tr><tr><td align="left">') 845 self.visit_generic_map(m, prev_m) 846 else: 847 self._dump("<i> No changes!</i>") 848 else: 849 self._dump('</td></tr><tr><td align="left">') 850 self.visit_generic_map(m) 851 852 self._dump("</td></tr>") 853 854 def visit_checker_messages(self, m, prev_m=None): 855 self._dump('<table border="0">') 856 857 def dump_line(l, is_added=None): 858 self._dump( 859 "<tr><td>%s</td>" 860 '<td align="left">%s</td></tr>' % (self._diff_plus_minus(is_added), l) 861 ) 862 863 def dump_chk(chk, is_added=None): 864 dump_line("<i>%s</i>:" % chk, is_added) 865 866 if prev_m is not None: 867 removed, added, updated = m.diff_messages(prev_m) 868 for chk in removed: 869 dump_chk(chk, False) 870 for l in prev_m.items[chk].lines: 871 dump_line(l, False) 872 for chk in updated: 873 dump_chk(chk) 874 for l in m.items[chk].diff_lines(prev_m.items[chk]): 875 dump_line(l[1:], l.startswith("+")) 876 for chk in added: 877 dump_chk(chk, True) 878 for l in m.items[chk].lines: 879 dump_line(l, True) 880 else: 881 for chk in m.items: 882 dump_chk(chk) 883 for l in m.items[chk].lines: 884 dump_line(l) 885 886 self._dump("</table>") 887 888 def visit_checker_messages_in_state(self, s, prev_s=None): 889 m = s.checker_messages 890 prev_m = prev_s.checker_messages if prev_s is not None else None 891 if m is None and prev_m is None: 892 return 893 894 self._dump("<hr />") 895 self._dump('<tr><td align="left">' "<b>Checker State: </b>") 896 if m is None: 897 self._dump("<i> Nothing!</i>") 898 else: 899 if prev_m is not None: 900 if m.is_different(prev_m): 901 self._dump('</td></tr><tr><td align="left">') 902 self.visit_checker_messages(m, prev_m) 903 else: 904 self._dump("<i> No changes!</i>") 905 else: 906 self._dump('</td></tr><tr><td align="left">') 907 self.visit_checker_messages(m) 908 909 self._dump("</td></tr>") 910 911 def visit_state(self, s, prev_s): 912 self.visit_store_in_state(s, prev_s) 913 self.visit_environment_in_state("environment", "Expressions", s, prev_s) 914 self.visit_generic_map_in_state("constraints", "Ranges", s, prev_s) 915 self.visit_generic_map_in_state("dynamic_types", "Dynamic Types", s, prev_s) 916 self.visit_environment_in_state( 917 "constructing_objects", "Objects Under Construction", s, prev_s 918 ) 919 self.visit_environment_in_state( 920 "index_of_element", "Indices Of Elements Under Construction", s, prev_s 921 ) 922 self.visit_environment_in_state( 923 "pending_init_loops", "Pending Array Init Loop Expressions", s, prev_s 924 ) 925 self.visit_environment_in_state( 926 "pending_destructors", "Indices of Elements Under Destruction", s, prev_s 927 ) 928 self.visit_checker_messages_in_state(s, prev_s) 929 930 def visit_node(self, node): 931 self._dump("%s [shape=record," % (node.node_name())) 932 if self._dark_mode: 933 self._dump('color="white",fontcolor="gray80",') 934 self._dump('label=<<table border="0">') 935 936 self._dump( 937 '<tr><td bgcolor="%s"><b>State %s</b></td></tr>' 938 % ( 939 "gray20" if self._dark_mode else "gray70", 940 node.state.state_id if node.state is not None else "Unspecified", 941 ) 942 ) 943 if not self._topo_mode: 944 self._dump('<tr><td align="left" width="0">') 945 if len(node.points) > 1: 946 self._dump("<b>Program points:</b></td></tr>") 947 else: 948 self._dump("<b>Program point:</b></td></tr>") 949 self._dump( 950 '<tr><td align="left" width="0">' 951 '<table border="0" align="left" width="0">' 952 ) 953 for p in node.points: 954 self.visit_program_point(p) 955 self._dump("</table></td></tr>") 956 957 if node.state is not None and not self._topo_mode: 958 prev_s = None 959 # Do diffs only when we have a unique predecessor. 960 # Don't do diffs on the leaf nodes because they're 961 # the important ones. 962 if ( 963 self._do_diffs 964 and len(node.predecessors) == 1 965 and len(node.successors) > 0 966 ): 967 prev_s = self._graph.nodes[node.predecessors[0]].state 968 self.visit_state(node.state, prev_s) 969 self._dump_raw("</table>>];\n") 970 971 def visit_edge(self, pred, succ): 972 self._dump_raw( 973 "%s -> %s%s;\n" 974 % ( 975 pred.node_name(), 976 succ.node_name(), 977 ' [color="white"]' if self._dark_mode else "", 978 ) 979 ) 980 981 def visit_end_of_graph(self): 982 self._dump_raw("}\n") 983 984 if not self._dump_dot_only: 985 import sys 986 import tempfile 987 988 def write_temp_file(suffix, prefix, data): 989 fd, filename = tempfile.mkstemp(suffix, prefix, ".", True) 990 print('Writing "%s"...' % filename) 991 with os.fdopen(fd, "w") as fp: 992 fp.write(data) 993 print("Done! Please remember to remove the file.") 994 return filename 995 996 try: 997 import graphviz 998 except ImportError: 999 # The fallback behavior if graphviz is not installed! 1000 print("Python graphviz not found. Please invoke") 1001 print(" $ pip install graphviz") 1002 print("in order to enable automatic conversion to HTML.") 1003 print() 1004 print("You may also convert DOT to SVG manually via") 1005 print(" $ dot -Tsvg input.dot -o output.svg") 1006 print() 1007 write_temp_file(".dot", "egraph-", self.output()) 1008 return 1009 1010 svg = graphviz.pipe("dot", "svg", self.output().encode()).decode() 1011 1012 filename = write_temp_file( 1013 ".html", 1014 "egraph-", 1015 '<html><body bgcolor="%s">%s</body></html>' 1016 % ("#1a1a1a" if self._dark_mode else "white", svg), 1017 ) 1018 if self._dump_html_only: 1019 return 1020 if sys.platform == "win32": 1021 os.startfile(filename) 1022 elif sys.platform == "darwin": 1023 os.system('open "%s"' % filename) 1024 else: 1025 os.system('xdg-open "%s"' % filename) 1026 1027 1028# ===-----------------------------------------------------------------------===# 1029# Explorers know how to traverse the ExplodedGraph in a certain order. 1030# They would invoke a Visitor on every node or edge they encounter. 1031# ===-----------------------------------------------------------------------===# 1032 1033 1034# BasicExplorer explores the whole graph in no particular order. 1035class BasicExplorer: 1036 def explore(self, graph, visitor): 1037 visitor.visit_begin_graph(graph) 1038 for node in sorted(graph.nodes): 1039 logging.debug("Visiting " + node) 1040 visitor.visit_node(graph.nodes[node]) 1041 for succ in sorted(graph.nodes[node].successors): 1042 logging.debug("Visiting edge: %s -> %s " % (node, succ)) 1043 visitor.visit_edge(graph.nodes[node], graph.nodes[succ]) 1044 visitor.visit_end_of_graph() 1045 1046 1047# ===-----------------------------------------------------------------------===# 1048# Trimmers cut out parts of the ExplodedGraph so that to focus on other parts. 1049# Trimmers can be combined together by applying them sequentially. 1050# ===-----------------------------------------------------------------------===# 1051 1052 1053# SinglePathTrimmer keeps only a single path - the leftmost path from the root. 1054# Useful when the trimmed graph is still too large. 1055class SinglePathTrimmer: 1056 def trim(self, graph): 1057 visited_nodes = set() 1058 node_id = graph.root_id 1059 while True: 1060 visited_nodes.add(node_id) 1061 node = graph.nodes[node_id] 1062 if len(node.successors) > 0: 1063 succ_id = node.successors[0] 1064 succ = graph.nodes[succ_id] 1065 node.successors = [succ_id] 1066 succ.predecessors = [node_id] 1067 if succ_id in visited_nodes: 1068 break 1069 node_id = succ_id 1070 else: 1071 break 1072 graph.nodes = {node_id: graph.nodes[node_id] for node_id in visited_nodes} 1073 1074 1075# TargetedTrimmer keeps paths that lead to specific nodes and discards all 1076# other paths. Useful when you cannot use -trim-egraph (e.g. when debugging 1077# a crash). 1078class TargetedTrimmer: 1079 def __init__(self, target_nodes): 1080 self._target_nodes = target_nodes 1081 1082 @staticmethod 1083 def parse_target_node(node, graph): 1084 if node.startswith("0x"): 1085 ret = "Node" + node 1086 assert ret in graph.nodes 1087 return ret 1088 else: 1089 for other_id in graph.nodes: 1090 other = graph.nodes[other_id] 1091 if other.node_id == int(node): 1092 return other_id 1093 1094 @staticmethod 1095 def parse_target_nodes(target_nodes, graph): 1096 return [ 1097 TargetedTrimmer.parse_target_node(node, graph) 1098 for node in target_nodes.split(",") 1099 ] 1100 1101 def trim(self, graph): 1102 queue = self._target_nodes 1103 visited_nodes = set() 1104 1105 while len(queue) > 0: 1106 node_id = queue.pop() 1107 visited_nodes.add(node_id) 1108 node = graph.nodes[node_id] 1109 for pred_id in node.predecessors: 1110 if pred_id not in visited_nodes: 1111 queue.append(pred_id) 1112 graph.nodes = {node_id: graph.nodes[node_id] for node_id in visited_nodes} 1113 for node_id in graph.nodes: 1114 node = graph.nodes[node_id] 1115 node.successors = [ 1116 succ_id for succ_id in node.successors if succ_id in visited_nodes 1117 ] 1118 node.predecessors = [ 1119 succ_id for succ_id in node.predecessors if succ_id in visited_nodes 1120 ] 1121 1122 1123# ===-----------------------------------------------------------------------===# 1124# The entry point to the script. 1125# ===-----------------------------------------------------------------------===# 1126 1127 1128def main(): 1129 parser = argparse.ArgumentParser( 1130 description="Display and manipulate Exploded Graph dumps." 1131 ) 1132 parser.add_argument( 1133 "filename", type=str, help="the .dot file produced by the Static Analyzer" 1134 ) 1135 parser.add_argument( 1136 "-v", 1137 "--verbose", 1138 action="store_const", 1139 dest="loglevel", 1140 const=logging.DEBUG, 1141 default=logging.WARNING, 1142 help="enable info prints", 1143 ) 1144 parser.add_argument( 1145 "-d", 1146 "--diff", 1147 action="store_const", 1148 dest="diff", 1149 const=True, 1150 default=False, 1151 help="display differences between states", 1152 ) 1153 parser.add_argument( 1154 "-t", 1155 "--topology", 1156 action="store_const", 1157 dest="topology", 1158 const=True, 1159 default=False, 1160 help="only display program points, omit states", 1161 ) 1162 parser.add_argument( 1163 "-s", 1164 "--single-path", 1165 action="store_const", 1166 dest="single_path", 1167 const=True, 1168 default=False, 1169 help="only display the leftmost path in the graph " 1170 "(useful for trimmed graphs that still " 1171 "branch too much)", 1172 ) 1173 parser.add_argument( 1174 "--to", 1175 type=str, 1176 default=None, 1177 help="only display execution paths from the root " 1178 "to the given comma-separated list of nodes " 1179 "identified by a pointer or a stable ID; " 1180 "compatible with --single-path", 1181 ) 1182 parser.add_argument( 1183 "--dark", 1184 action="store_const", 1185 dest="dark", 1186 const=True, 1187 default=False, 1188 help="dark mode", 1189 ) 1190 parser.add_argument( 1191 "--gray", 1192 action="store_const", 1193 dest="gray", 1194 const=True, 1195 default=False, 1196 help="black-and-white mode", 1197 ) 1198 dump_conflict = parser.add_mutually_exclusive_group() 1199 dump_conflict.add_argument( 1200 "--dump-html-only", 1201 action="store_const", 1202 dest="dump_html_only", 1203 const=True, 1204 default=False, 1205 help="dump the rewritten egraph to a temporary HTML file, " 1206 "but do not open it immediately as by default", 1207 ) 1208 dump_conflict.add_argument( 1209 "--dump-dot-only", 1210 action="store_const", 1211 dest="dump_dot_only", 1212 const=True, 1213 default=False, 1214 help="instead of writing an HTML file and immediately " 1215 "displaying it, dump the rewritten dot file " 1216 "to stdout", 1217 ) 1218 args = parser.parse_args() 1219 logging.basicConfig(level=args.loglevel) 1220 1221 graph = ExplodedGraph() 1222 with open(args.filename) as fd: 1223 for raw_line in fd: 1224 raw_line = raw_line.strip() 1225 graph.add_raw_line(raw_line) 1226 1227 trimmers = [] 1228 if args.to is not None: 1229 trimmers.append( 1230 TargetedTrimmer(TargetedTrimmer.parse_target_nodes(args.to, graph)) 1231 ) 1232 if args.single_path: 1233 trimmers.append(SinglePathTrimmer()) 1234 1235 explorer = BasicExplorer() 1236 1237 visitor = DotDumpVisitor( 1238 args.diff, 1239 args.dark, 1240 args.gray, 1241 args.topology, 1242 args.dump_html_only, 1243 args.dump_dot_only, 1244 ) 1245 1246 for trimmer in trimmers: 1247 trimmer.trim(graph) 1248 1249 explorer.explore(graph, visitor) 1250 1251 1252if __name__ == "__main__": 1253 main() 1254