1#!/usr/bin/env python 2# 3# ===- exploded-graph-rewriter.py - ExplodedGraph dump tool -----*- python -*--# 4# 5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 6# See https://llvm.org/LICENSE.txt for license information. 7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 8# 9# ===-----------------------------------------------------------------------===# 10 11 12from __future__ import print_function 13 14import argparse 15import collections 16import difflib 17import json 18import logging 19import os 20import re 21 22 23# ===-----------------------------------------------------------------------===# 24# These data structures represent a deserialized ExplodedGraph. 25# ===-----------------------------------------------------------------------===# 26 27 28# A helper function for finding the difference between two dictionaries. 29def diff_dicts(curr, prev): 30 removed = [k for k in prev if k not in curr or curr[k] != prev[k]] 31 added = [k for k in curr if k not in prev or curr[k] != prev[k]] 32 return (removed, added) 33 34 35# Represents any program state trait that is a dictionary of key-value pairs. 36class GenericMap: 37 def __init__(self, items): 38 self.generic_map = collections.OrderedDict(items) 39 40 def diff(self, prev): 41 return diff_dicts(self.generic_map, prev.generic_map) 42 43 def is_different(self, prev): 44 removed, added = self.diff(prev) 45 return len(removed) != 0 or len(added) != 0 46 47 48# A deserialized source location. 49class SourceLocation: 50 def __init__(self, json_loc): 51 logging.debug("json: %s" % json_loc) 52 self.line = json_loc["line"] 53 self.col = json_loc["column"] 54 self.filename = ( 55 os.path.basename(json_loc["file"]) if "file" in json_loc else "(main file)" 56 ) 57 self.spelling = ( 58 SourceLocation(json_loc["spelling"]) if "spelling" in json_loc else None 59 ) 60 61 def is_macro(self): 62 return self.spelling is not None 63 64 65# A deserialized program point. 66class ProgramPoint: 67 def __init__(self, json_pp): 68 self.kind = json_pp["kind"] 69 self.tag = json_pp["tag"] 70 self.node_id = json_pp["node_id"] 71 self.is_sink = bool(json_pp["is_sink"]) 72 self.has_report = bool(json_pp["has_report"]) 73 if self.kind == "Edge": 74 self.src_id = json_pp["src_id"] 75 self.dst_id = json_pp["dst_id"] 76 elif self.kind == "Statement": 77 logging.debug(json_pp) 78 self.stmt_kind = json_pp["stmt_kind"] 79 self.cast_kind = json_pp["cast_kind"] if "cast_kind" in json_pp else None 80 self.stmt_point_kind = json_pp["stmt_point_kind"] 81 self.stmt_id = json_pp["stmt_id"] 82 self.pointer = json_pp["pointer"] 83 self.pretty = json_pp["pretty"] 84 self.loc = ( 85 SourceLocation(json_pp["location"]) 86 if json_pp["location"] is not None 87 else None 88 ) 89 elif self.kind == "BlockEntrance": 90 self.block_id = json_pp["block_id"] 91 92 93# A single expression acting as a key in a deserialized Environment. 94class EnvironmentBindingKey: 95 def __init__(self, json_ek): 96 # CXXCtorInitializer is not a Stmt! 97 self.stmt_id = ( 98 json_ek["stmt_id"] if "stmt_id" in json_ek else json_ek["init_id"] 99 ) 100 self.pretty = json_ek["pretty"] 101 self.kind = json_ek["kind"] if "kind" in json_ek else None 102 103 def _key(self): 104 return self.stmt_id 105 106 def __eq__(self, other): 107 return self._key() == other._key() 108 109 def __hash__(self): 110 return hash(self._key()) 111 112 113# Deserialized description of a location context. 114class LocationContext: 115 def __init__(self, json_frame): 116 self.lctx_id = json_frame["lctx_id"] 117 self.caption = json_frame["location_context"] 118 self.decl = json_frame["calling"] 119 self.loc = ( 120 SourceLocation(json_frame["location"]) 121 if json_frame["location"] is not None 122 else None 123 ) 124 125 def _key(self): 126 return self.lctx_id 127 128 def __eq__(self, other): 129 return self._key() == other._key() 130 131 def __hash__(self): 132 return hash(self._key()) 133 134 135# A group of deserialized Environment bindings that correspond to a specific 136# location context. 137class EnvironmentFrame: 138 def __init__(self, json_frame): 139 self.location_context = LocationContext(json_frame) 140 self.bindings = collections.OrderedDict( 141 [(EnvironmentBindingKey(b), b["value"]) for b in json_frame["items"]] 142 if json_frame["items"] is not None 143 else [] 144 ) 145 146 def diff_bindings(self, prev): 147 return diff_dicts(self.bindings, prev.bindings) 148 149 def is_different(self, prev): 150 removed, added = self.diff_bindings(prev) 151 return len(removed) != 0 or len(added) != 0 152 153 154# A deserialized Environment. This class can also hold other entities that 155# are similar to Environment, such as Objects Under Construction or 156# Indices Of Elements Under Construction. 157class GenericEnvironment: 158 def __init__(self, json_e): 159 self.frames = [EnvironmentFrame(f) for f in json_e] 160 161 def diff_frames(self, prev): 162 # TODO: It's difficult to display a good diff when frame numbers shift. 163 if len(self.frames) != len(prev.frames): 164 return None 165 166 updated = [] 167 for i in range(len(self.frames)): 168 f = self.frames[i] 169 prev_f = prev.frames[i] 170 if f.location_context == prev_f.location_context: 171 if f.is_different(prev_f): 172 updated.append(i) 173 else: 174 # We have the whole frame replaced with another frame. 175 # TODO: Produce a nice diff. 176 return None 177 178 # TODO: Add support for added/removed. 179 return updated 180 181 def is_different(self, prev): 182 updated = self.diff_frames(prev) 183 return updated is None or len(updated) > 0 184 185 186# A single binding key in a deserialized RegionStore cluster. 187class StoreBindingKey: 188 def __init__(self, json_sk): 189 self.kind = json_sk["kind"] 190 self.offset = json_sk["offset"] 191 192 def _key(self): 193 return (self.kind, self.offset) 194 195 def __eq__(self, other): 196 return self._key() == other._key() 197 198 def __hash__(self): 199 return hash(self._key()) 200 201 202# A single cluster of the deserialized RegionStore. 203class StoreCluster: 204 def __init__(self, json_sc): 205 self.base_region = json_sc["cluster"] 206 self.bindings = collections.OrderedDict( 207 [(StoreBindingKey(b), b["value"]) for b in json_sc["items"]] 208 ) 209 210 def diff_bindings(self, prev): 211 return diff_dicts(self.bindings, prev.bindings) 212 213 def is_different(self, prev): 214 removed, added = self.diff_bindings(prev) 215 return len(removed) != 0 or len(added) != 0 216 217 218# A deserialized RegionStore. 219class Store: 220 def __init__(self, json_s): 221 self.ptr = json_s["pointer"] 222 self.clusters = collections.OrderedDict( 223 [(c["pointer"], StoreCluster(c)) for c in json_s["items"]] 224 ) 225 226 def diff_clusters(self, prev): 227 removed = [k for k in prev.clusters if k not in self.clusters] 228 added = [k for k in self.clusters if k not in prev.clusters] 229 updated = [ 230 k 231 for k in prev.clusters 232 if k in self.clusters and prev.clusters[k].is_different(self.clusters[k]) 233 ] 234 return (removed, added, updated) 235 236 def is_different(self, prev): 237 removed, added, updated = self.diff_clusters(prev) 238 return len(removed) != 0 or len(added) != 0 or len(updated) != 0 239 240 241# Deserialized messages from a single checker in a single program state. 242# Basically a list of raw strings. 243class CheckerLines: 244 def __init__(self, json_lines): 245 self.lines = json_lines 246 247 def diff_lines(self, prev): 248 lines = difflib.ndiff(prev.lines, self.lines) 249 return [l.strip() for l in lines if l.startswith("+") or l.startswith("-")] 250 251 def is_different(self, prev): 252 return len(self.diff_lines(prev)) > 0 253 254 255# Deserialized messages of all checkers, separated by checker. 256class CheckerMessages: 257 def __init__(self, json_m): 258 self.items = collections.OrderedDict( 259 [(m["checker"], CheckerLines(m["messages"])) for m in json_m] 260 ) 261 262 def diff_messages(self, prev): 263 removed = [k for k in prev.items if k not in self.items] 264 added = [k for k in self.items if k not in prev.items] 265 updated = [ 266 k 267 for k in prev.items 268 if k in self.items and prev.items[k].is_different(self.items[k]) 269 ] 270 return (removed, added, updated) 271 272 def is_different(self, prev): 273 removed, added, updated = self.diff_messages(prev) 274 return len(removed) != 0 or len(added) != 0 or len(updated) != 0 275 276 277# A deserialized program state. 278class ProgramState: 279 def __init__(self, state_id, json_ps): 280 logging.debug("Adding ProgramState " + str(state_id)) 281 282 store_key = "store" 283 env_key = "environment" 284 constraints_key = "constraints" 285 dyn_ty_key = "dynamic_types" 286 ctor_key = "constructing_objects" 287 ind_key = "index_of_element" 288 init_loop_key = "pending_init_loops" 289 dtor_key = "pending_destructors" 290 msg_key = "checker_messages" 291 292 if json_ps is None: 293 json_ps = { 294 store_key: None, 295 env_key: None, 296 constraints_key: None, 297 dyn_ty_key: None, 298 ctor_key: None, 299 ind_key: None, 300 init_loop_key: None, 301 dtor_key: None, 302 msg_key: None, 303 } 304 305 self.state_id = state_id 306 307 self.store = ( 308 Store(json_ps[store_key]) if json_ps[store_key] is not None else None 309 ) 310 311 self.environment = ( 312 GenericEnvironment(json_ps[env_key]["items"]) 313 if json_ps[env_key] is not None 314 else None 315 ) 316 317 self.constraints = ( 318 GenericMap([(c["symbol"], c["range"]) for c in json_ps[constraints_key]]) 319 if json_ps[constraints_key] is not None 320 else None 321 ) 322 323 self.dynamic_types = ( 324 GenericMap( 325 [ 326 ( 327 t["region"], 328 "%s%s" 329 % ( 330 t["dyn_type"], 331 " (or a sub-class)" if t["sub_classable"] else "", 332 ), 333 ) 334 for t in json_ps[dyn_ty_key] 335 ] 336 ) 337 if json_ps[dyn_ty_key] is not None 338 else None 339 ) 340 341 self.checker_messages = ( 342 CheckerMessages(json_ps[msg_key]) if json_ps[msg_key] is not None else None 343 ) 344 345 # State traits 346 # 347 # For traits we always check if a key exists because if a trait 348 # has no imformation, nothing will be printed in the .dot file 349 # we parse. 350 351 self.constructing_objects = ( 352 GenericEnvironment(json_ps[ctor_key]) 353 if ctor_key in json_ps and json_ps[ctor_key] is not None 354 else None 355 ) 356 357 self.index_of_element = ( 358 GenericEnvironment(json_ps[ind_key]) 359 if ind_key in json_ps and json_ps[ind_key] is not None 360 else None 361 ) 362 363 self.pending_init_loops = ( 364 GenericEnvironment(json_ps[init_loop_key]) 365 if init_loop_key in json_ps and json_ps[init_loop_key] is not None 366 else None 367 ) 368 369 self.pending_destructors = ( 370 GenericEnvironment(json_ps[dtor_key]) 371 if dtor_key in json_ps and json_ps[dtor_key] is not None 372 else None 373 ) 374 375 376# A deserialized exploded graph node. Has a default constructor because it 377# may be referenced as part of an edge before its contents are deserialized, 378# and in this moment we already need a room for predecessors and successors. 379class ExplodedNode: 380 def __init__(self): 381 self.predecessors = [] 382 self.successors = [] 383 384 def construct(self, node_id, json_node): 385 logging.debug("Adding " + node_id) 386 self.ptr = node_id[4:] 387 self.points = [ProgramPoint(p) for p in json_node["program_points"]] 388 self.node_id = self.points[-1].node_id 389 self.state = ProgramState( 390 json_node["state_id"], 391 json_node["program_state"] 392 if json_node["program_state"] is not None 393 else None, 394 ) 395 396 assert self.node_name() == node_id 397 398 def node_name(self): 399 return "Node" + self.ptr 400 401 402# A deserialized ExplodedGraph. Constructed by consuming a .dot file 403# line-by-line. 404class ExplodedGraph: 405 # Parse .dot files with regular expressions. 406 node_re = re.compile( 407 '^(Node0x[0-9a-f]*) \\[shape=record,.*label="{(.*)\\\\l}"\\];$' 408 ) 409 edge_re = re.compile("^(Node0x[0-9a-f]*) -> (Node0x[0-9a-f]*);$") 410 411 def __init__(self): 412 self.nodes = collections.defaultdict(ExplodedNode) 413 self.root_id = None 414 self.incomplete_line = "" 415 416 def add_raw_line(self, raw_line): 417 if raw_line.startswith("//"): 418 return 419 420 # Allow line breaks by waiting for ';'. This is not valid in 421 # a .dot file, but it is useful for writing tests. 422 if len(raw_line) > 0 and raw_line[-1] != ";": 423 self.incomplete_line += raw_line 424 return 425 raw_line = self.incomplete_line + raw_line 426 self.incomplete_line = "" 427 428 # Apply regexps one by one to see if it's a node or an edge 429 # and extract contents if necessary. 430 logging.debug("Line: " + raw_line) 431 result = self.edge_re.match(raw_line) 432 if result is not None: 433 logging.debug("Classified as edge line.") 434 pred = result.group(1) 435 succ = result.group(2) 436 self.nodes[pred].successors.append(succ) 437 self.nodes[succ].predecessors.append(pred) 438 return 439 result = self.node_re.match(raw_line) 440 if result is not None: 441 logging.debug("Classified as node line.") 442 node_id = result.group(1) 443 if len(self.nodes) == 0: 444 self.root_id = node_id 445 # Note: when writing tests you don't need to escape everything, 446 # even though in a valid dot file everything is escaped. 447 node_label = ( 448 result.group(2) 449 .replace(" ", "") 450 .replace('\\"', '"') 451 .replace("\\{", "{") 452 .replace("\\}", "}") 453 .replace("\\\\", "\\") 454 .replace("\\|", "|") 455 .replace("\\<", "\\\\<") 456 .replace("\\>", "\\\\>") 457 .rstrip(",") 458 ) 459 # Handle `\l` separately because a string literal can be in code 460 # like "string\\literal" with the `\l` inside. 461 # Also on Windows macros __FILE__ produces specific delimiters `\` 462 # and a directory or file may starts with the letter `l`. 463 # Find all `\l` (like `,\l`, `}\l`, `[\l`) except `\\l`, 464 # because the literal as a rule contains multiple `\` before `\l`. 465 node_label = re.sub(r"(?<!\\)\\l", "", node_label) 466 logging.debug(node_label) 467 json_node = json.loads(node_label) 468 self.nodes[node_id].construct(node_id, json_node) 469 return 470 logging.debug("Skipping.") 471 472 473# ===-----------------------------------------------------------------------===# 474# Visitors traverse a deserialized ExplodedGraph and do different things 475# with every node and edge. 476# ===-----------------------------------------------------------------------===# 477 478 479# A visitor that dumps the ExplodedGraph into a DOT file with fancy HTML-based 480# syntax highlighing. 481class DotDumpVisitor: 482 def __init__(self, do_diffs, dark_mode, gray_mode, topo_mode, dump_dot_only): 483 self._do_diffs = do_diffs 484 self._dark_mode = dark_mode 485 self._gray_mode = gray_mode 486 self._topo_mode = topo_mode 487 self._dump_dot_only = dump_dot_only 488 self._output = [] 489 490 def _dump_raw(self, s): 491 if self._dump_dot_only: 492 print(s, end="") 493 else: 494 self._output.append(s) 495 496 def output(self): 497 assert not self._dump_dot_only 498 return "".join(self._output) 499 500 def _dump(self, s): 501 s = ( 502 s.replace("&", "&") 503 .replace("{", "\\{") 504 .replace("}", "\\}") 505 .replace("\\<", "<") 506 .replace("\\>", ">") 507 .replace("|", "\\|") 508 ) 509 s = re.sub(r"(?<!\\)\\l", "<br />", s) 510 if self._gray_mode: 511 s = re.sub(r'<font color="[a-z0-9]*">', "", s) 512 s = re.sub(r"</font>", "", s) 513 self._dump_raw(s) 514 515 @staticmethod 516 def _diff_plus_minus(is_added): 517 if is_added is None: 518 return "" 519 if is_added: 520 return '<font color="forestgreen">+</font>' 521 return '<font color="red">-</font>' 522 523 @staticmethod 524 def _short_pretty(s): 525 if s is None: 526 return None 527 if len(s) < 20: 528 return s 529 left = s.find("{") 530 right = s.rfind("}") 531 if left == -1 or right == -1 or left >= right: 532 return s 533 candidate = s[0 : left + 1] + " ... " + s[right:] 534 if len(candidate) >= len(s): 535 return s 536 return candidate 537 538 @staticmethod 539 def _make_sloc(loc): 540 if loc is None: 541 return "<i>Invalid Source Location</i>" 542 543 def make_plain_loc(loc): 544 return "%s:<b>%s</b>:<b>%s</b>" % (loc.filename, loc.line, loc.col) 545 546 if loc.is_macro(): 547 return '%s <font color="royalblue1">' "(<i>spelling at </i> %s)</font>" % ( 548 make_plain_loc(loc), 549 make_plain_loc(loc.spelling), 550 ) 551 552 return make_plain_loc(loc) 553 554 def visit_begin_graph(self, graph): 555 self._graph = graph 556 self._dump_raw('digraph "ExplodedGraph" {\n') 557 if self._dark_mode: 558 self._dump_raw('bgcolor="gray10";\n') 559 self._dump_raw('label="";\n') 560 561 def visit_program_point(self, p): 562 if p.kind in ["Edge", "BlockEntrance", "BlockExit"]: 563 color = "gold3" 564 elif p.kind in ["PreStmtPurgeDeadSymbols", "PostStmtPurgeDeadSymbols"]: 565 color = "red" 566 elif p.kind in ["CallEnter", "CallExitBegin", "CallExitEnd"]: 567 color = "dodgerblue" if self._dark_mode else "blue" 568 elif p.kind in ["Statement"]: 569 color = "cyan4" 570 else: 571 color = "forestgreen" 572 573 self._dump('<tr><td align="left">%s.</td>' % p.node_id) 574 575 if p.kind == "Statement": 576 # This avoids pretty-printing huge statements such as CompoundStmt. 577 # Such statements show up only at [Pre|Post]StmtPurgeDeadSymbols 578 skip_pretty = "PurgeDeadSymbols" in p.stmt_point_kind 579 stmt_color = "cyan3" 580 self._dump( 581 '<td align="left" width="0">%s:</td>' 582 '<td align="left" width="0"><font color="%s">' 583 "%s</font> </td>" 584 '<td align="left"><i>S%s</i></td>' 585 '<td align="left"><font color="%s">%s</font></td>' 586 '<td align="left">%s</td></tr>' 587 % ( 588 self._make_sloc(p.loc), 589 color, 590 "%s (%s)" % (p.stmt_kind, p.cast_kind) 591 if p.cast_kind is not None 592 else p.stmt_kind, 593 p.stmt_id, 594 stmt_color, 595 p.stmt_point_kind, 596 self._short_pretty(p.pretty) if not skip_pretty else "", 597 ) 598 ) 599 elif p.kind == "Edge": 600 self._dump( 601 '<td width="0"></td>' 602 '<td align="left" width="0">' 603 '<font color="%s">%s</font></td><td align="left">' 604 "[B%d] -\\> [B%d]</td></tr>" % (color, "BlockEdge", p.src_id, p.dst_id) 605 ) 606 elif p.kind == "BlockEntrance": 607 self._dump( 608 '<td width="0"></td>' 609 '<td align="left" width="0">' 610 '<font color="%s">%s</font></td>' 611 '<td align="left">[B%d]</td></tr>' % (color, p.kind, p.block_id) 612 ) 613 else: 614 # TODO: Print more stuff for other kinds of points. 615 self._dump( 616 '<td width="0"></td>' 617 '<td align="left" width="0" colspan="2">' 618 '<font color="%s">%s</font></td></tr>' % (color, p.kind) 619 ) 620 621 if p.tag is not None: 622 self._dump( 623 '<tr><td width="0"></td><td width="0"></td>' 624 '<td colspan="3" align="left">' 625 '<b>Tag: </b> <font color="crimson">' 626 "%s</font></td></tr>" % p.tag 627 ) 628 629 if p.has_report: 630 self._dump( 631 '<tr><td width="0"></td><td width="0"></td>' 632 '<td colspan="3" align="left">' 633 '<font color="red"><b>Bug Report Attached' 634 "</b></font></td></tr>" 635 ) 636 if p.is_sink: 637 self._dump( 638 '<tr><td width="0"></td><td width="0"></td>' 639 '<td colspan="3" align="left">' 640 '<font color="cornflowerblue"><b>Sink Node' 641 "</b></font></td></tr>" 642 ) 643 644 def visit_environment(self, e, prev_e=None): 645 self._dump('<table border="0">') 646 647 def dump_location_context(lc, is_added=None): 648 self._dump( 649 "<tr><td>%s</td>" 650 '<td align="left"><b>%s</b></td>' 651 '<td align="left" colspan="2">' 652 '<font color="gray60">%s </font>' 653 "%s</td></tr>" 654 % ( 655 self._diff_plus_minus(is_added), 656 lc.caption, 657 lc.decl, 658 ("(%s)" % self._make_sloc(lc.loc)) if lc.loc is not None else "", 659 ) 660 ) 661 662 def dump_binding(f, b, is_added=None): 663 self._dump( 664 "<tr><td>%s</td>" 665 '<td align="left"><i>S%s</i></td>' 666 "%s" 667 '<td align="left">%s</td>' 668 '<td align="left">%s</td></tr>' 669 % ( 670 self._diff_plus_minus(is_added), 671 b.stmt_id, 672 '<td align="left"><font color="%s"><i>' 673 "%s</i></font></td>" 674 % ( 675 "lavender" if self._dark_mode else "darkgreen", 676 ("(%s)" % b.kind) if b.kind is not None else " ", 677 ), 678 self._short_pretty(b.pretty), 679 f.bindings[b], 680 ) 681 ) 682 683 frames_updated = e.diff_frames(prev_e) if prev_e is not None else None 684 if frames_updated: 685 for i in frames_updated: 686 f = e.frames[i] 687 prev_f = prev_e.frames[i] 688 dump_location_context(f.location_context) 689 bindings_removed, bindings_added = f.diff_bindings(prev_f) 690 for b in bindings_removed: 691 dump_binding(prev_f, b, False) 692 for b in bindings_added: 693 dump_binding(f, b, True) 694 else: 695 for f in e.frames: 696 dump_location_context(f.location_context) 697 for b in f.bindings: 698 dump_binding(f, b) 699 700 self._dump("</table>") 701 702 def visit_environment_in_state(self, selector, title, s, prev_s=None): 703 e = getattr(s, selector) 704 prev_e = getattr(prev_s, selector) if prev_s is not None else None 705 if e is None and prev_e is None: 706 return 707 708 self._dump('<hr /><tr><td align="left"><b>%s: </b>' % title) 709 if e is None: 710 self._dump("<i> Nothing!</i>") 711 else: 712 if prev_e is not None: 713 if e.is_different(prev_e): 714 self._dump('</td></tr><tr><td align="left">') 715 self.visit_environment(e, prev_e) 716 else: 717 self._dump("<i> No changes!</i>") 718 else: 719 self._dump('</td></tr><tr><td align="left">') 720 self.visit_environment(e) 721 722 self._dump("</td></tr>") 723 724 def visit_store(self, s, prev_s=None): 725 self._dump('<table border="0">') 726 727 def dump_binding(s, c, b, is_added=None): 728 self._dump( 729 "<tr><td>%s</td>" 730 '<td align="left">%s</td>' 731 '<td align="left">%s</td>' 732 '<td align="left">%s</td>' 733 '<td align="left">%s</td></tr>' 734 % ( 735 self._diff_plus_minus(is_added), 736 s.clusters[c].base_region, 737 b.offset, 738 "(<i>Default</i>)" if b.kind == "Default" else "", 739 s.clusters[c].bindings[b], 740 ) 741 ) 742 743 if prev_s is not None: 744 clusters_removed, clusters_added, clusters_updated = s.diff_clusters(prev_s) 745 for c in clusters_removed: 746 for b in prev_s.clusters[c].bindings: 747 dump_binding(prev_s, c, b, False) 748 for c in clusters_updated: 749 bindings_removed, bindings_added = s.clusters[c].diff_bindings( 750 prev_s.clusters[c] 751 ) 752 for b in bindings_removed: 753 dump_binding(prev_s, c, b, False) 754 for b in bindings_added: 755 dump_binding(s, c, b, True) 756 for c in clusters_added: 757 for b in s.clusters[c].bindings: 758 dump_binding(s, c, b, True) 759 else: 760 for c in s.clusters: 761 for b in s.clusters[c].bindings: 762 dump_binding(s, c, b) 763 764 self._dump("</table>") 765 766 def visit_store_in_state(self, s, prev_s=None): 767 st = s.store 768 prev_st = prev_s.store if prev_s is not None else None 769 if st is None and prev_st is None: 770 return 771 772 self._dump('<hr /><tr><td align="left"><b>Store: </b>') 773 if st is None: 774 self._dump("<i> Nothing!</i>") 775 else: 776 if self._dark_mode: 777 self._dump(' <font color="gray30">(%s)</font>' % st.ptr) 778 else: 779 self._dump(' <font color="gray">(%s)</font>' % st.ptr) 780 if prev_st is not None: 781 if s.store.is_different(prev_st): 782 self._dump('</td></tr><tr><td align="left">') 783 self.visit_store(st, prev_st) 784 else: 785 self._dump("<i> No changes!</i>") 786 else: 787 self._dump('</td></tr><tr><td align="left">') 788 self.visit_store(st) 789 self._dump("</td></tr>") 790 791 def visit_generic_map(self, m, prev_m=None): 792 self._dump('<table border="0">') 793 794 def dump_pair(m, k, is_added=None): 795 self._dump( 796 "<tr><td>%s</td>" 797 '<td align="left">%s</td>' 798 '<td align="left">%s</td></tr>' 799 % (self._diff_plus_minus(is_added), k, m.generic_map[k]) 800 ) 801 802 if prev_m is not None: 803 removed, added = m.diff(prev_m) 804 for k in removed: 805 dump_pair(prev_m, k, False) 806 for k in added: 807 dump_pair(m, k, True) 808 else: 809 for k in m.generic_map: 810 dump_pair(m, k, None) 811 812 self._dump("</table>") 813 814 def visit_generic_map_in_state(self, selector, title, s, prev_s=None): 815 m = getattr(s, selector) 816 prev_m = getattr(prev_s, selector) if prev_s is not None else None 817 if m is None and prev_m is None: 818 return 819 820 self._dump("<hr />") 821 self._dump('<tr><td align="left">' "<b>%s: </b>" % title) 822 if m is None: 823 self._dump("<i> Nothing!</i>") 824 else: 825 if prev_m is not None: 826 if m.is_different(prev_m): 827 self._dump('</td></tr><tr><td align="left">') 828 self.visit_generic_map(m, prev_m) 829 else: 830 self._dump("<i> No changes!</i>") 831 else: 832 self._dump('</td></tr><tr><td align="left">') 833 self.visit_generic_map(m) 834 835 self._dump("</td></tr>") 836 837 def visit_checker_messages(self, m, prev_m=None): 838 self._dump('<table border="0">') 839 840 def dump_line(l, is_added=None): 841 self._dump( 842 "<tr><td>%s</td>" 843 '<td align="left">%s</td></tr>' % (self._diff_plus_minus(is_added), l) 844 ) 845 846 def dump_chk(chk, is_added=None): 847 dump_line("<i>%s</i>:" % chk, is_added) 848 849 if prev_m is not None: 850 removed, added, updated = m.diff_messages(prev_m) 851 for chk in removed: 852 dump_chk(chk, False) 853 for l in prev_m.items[chk].lines: 854 dump_line(l, False) 855 for chk in updated: 856 dump_chk(chk) 857 for l in m.items[chk].diff_lines(prev_m.items[chk]): 858 dump_line(l[1:], l.startswith("+")) 859 for chk in added: 860 dump_chk(chk, True) 861 for l in m.items[chk].lines: 862 dump_line(l, True) 863 else: 864 for chk in m.items: 865 dump_chk(chk) 866 for l in m.items[chk].lines: 867 dump_line(l) 868 869 self._dump("</table>") 870 871 def visit_checker_messages_in_state(self, s, prev_s=None): 872 m = s.checker_messages 873 prev_m = prev_s.checker_messages if prev_s is not None else None 874 if m is None and prev_m is None: 875 return 876 877 self._dump("<hr />") 878 self._dump('<tr><td align="left">' "<b>Checker State: </b>") 879 if m is None: 880 self._dump("<i> Nothing!</i>") 881 else: 882 if prev_m is not None: 883 if m.is_different(prev_m): 884 self._dump('</td></tr><tr><td align="left">') 885 self.visit_checker_messages(m, prev_m) 886 else: 887 self._dump("<i> No changes!</i>") 888 else: 889 self._dump('</td></tr><tr><td align="left">') 890 self.visit_checker_messages(m) 891 892 self._dump("</td></tr>") 893 894 def visit_state(self, s, prev_s): 895 self.visit_store_in_state(s, prev_s) 896 self.visit_environment_in_state("environment", "Expressions", s, prev_s) 897 self.visit_generic_map_in_state("constraints", "Ranges", s, prev_s) 898 self.visit_generic_map_in_state("dynamic_types", "Dynamic Types", s, prev_s) 899 self.visit_environment_in_state( 900 "constructing_objects", "Objects Under Construction", s, prev_s 901 ) 902 self.visit_environment_in_state( 903 "index_of_element", "Indices Of Elements Under Construction", s, prev_s 904 ) 905 self.visit_environment_in_state( 906 "pending_init_loops", "Pending Array Init Loop Expressions", s, prev_s 907 ) 908 self.visit_environment_in_state( 909 "pending_destructors", "Indices of Elements Under Destruction", s, prev_s 910 ) 911 self.visit_checker_messages_in_state(s, prev_s) 912 913 def visit_node(self, node): 914 self._dump("%s [shape=record," % (node.node_name())) 915 if self._dark_mode: 916 self._dump('color="white",fontcolor="gray80",') 917 self._dump('label=<<table border="0">') 918 919 self._dump( 920 '<tr><td bgcolor="%s"><b>State %s</b></td></tr>' 921 % ( 922 "gray20" if self._dark_mode else "gray70", 923 node.state.state_id if node.state is not None else "Unspecified", 924 ) 925 ) 926 if not self._topo_mode: 927 self._dump('<tr><td align="left" width="0">') 928 if len(node.points) > 1: 929 self._dump("<b>Program points:</b></td></tr>") 930 else: 931 self._dump("<b>Program point:</b></td></tr>") 932 self._dump( 933 '<tr><td align="left" width="0">' 934 '<table border="0" align="left" width="0">' 935 ) 936 for p in node.points: 937 self.visit_program_point(p) 938 self._dump("</table></td></tr>") 939 940 if node.state is not None and not self._topo_mode: 941 prev_s = None 942 # Do diffs only when we have a unique predecessor. 943 # Don't do diffs on the leaf nodes because they're 944 # the important ones. 945 if ( 946 self._do_diffs 947 and len(node.predecessors) == 1 948 and len(node.successors) > 0 949 ): 950 prev_s = self._graph.nodes[node.predecessors[0]].state 951 self.visit_state(node.state, prev_s) 952 self._dump_raw("</table>>];\n") 953 954 def visit_edge(self, pred, succ): 955 self._dump_raw( 956 "%s -> %s%s;\n" 957 % ( 958 pred.node_name(), 959 succ.node_name(), 960 ' [color="white"]' if self._dark_mode else "", 961 ) 962 ) 963 964 def visit_end_of_graph(self): 965 self._dump_raw("}\n") 966 967 if not self._dump_dot_only: 968 import sys 969 import tempfile 970 971 def write_temp_file(suffix, prefix, data): 972 fd, filename = tempfile.mkstemp(suffix, prefix, ".", True) 973 print('Writing "%s"...' % filename) 974 with os.fdopen(fd, "w") as fp: 975 fp.write(data) 976 print("Done! Please remember to remove the file.") 977 return filename 978 979 try: 980 import graphviz 981 except ImportError: 982 # The fallback behavior if graphviz is not installed! 983 print("Python graphviz not found. Please invoke") 984 print(" $ pip install graphviz") 985 print("in order to enable automatic conversion to HTML.") 986 print() 987 print("You may also convert DOT to SVG manually via") 988 print(" $ dot -Tsvg input.dot -o output.svg") 989 print() 990 write_temp_file(".dot", "egraph-", self.output()) 991 return 992 993 svg = graphviz.pipe("dot", "svg", self.output().encode()).decode() 994 995 filename = write_temp_file( 996 ".html", 997 "egraph-", 998 '<html><body bgcolor="%s">%s</body></html>' 999 % ("#1a1a1a" if self._dark_mode else "white", svg), 1000 ) 1001 if sys.platform == "win32": 1002 os.startfile(filename) 1003 elif sys.platform == "darwin": 1004 os.system('open "%s"' % filename) 1005 else: 1006 os.system('xdg-open "%s"' % filename) 1007 1008 1009# ===-----------------------------------------------------------------------===# 1010# Explorers know how to traverse the ExplodedGraph in a certain order. 1011# They would invoke a Visitor on every node or edge they encounter. 1012# ===-----------------------------------------------------------------------===# 1013 1014 1015# BasicExplorer explores the whole graph in no particular order. 1016class BasicExplorer: 1017 def explore(self, graph, visitor): 1018 visitor.visit_begin_graph(graph) 1019 for node in sorted(graph.nodes): 1020 logging.debug("Visiting " + node) 1021 visitor.visit_node(graph.nodes[node]) 1022 for succ in sorted(graph.nodes[node].successors): 1023 logging.debug("Visiting edge: %s -> %s " % (node, succ)) 1024 visitor.visit_edge(graph.nodes[node], graph.nodes[succ]) 1025 visitor.visit_end_of_graph() 1026 1027 1028# ===-----------------------------------------------------------------------===# 1029# Trimmers cut out parts of the ExplodedGraph so that to focus on other parts. 1030# Trimmers can be combined together by applying them sequentially. 1031# ===-----------------------------------------------------------------------===# 1032 1033 1034# SinglePathTrimmer keeps only a single path - the leftmost path from the root. 1035# Useful when the trimmed graph is still too large. 1036class SinglePathTrimmer: 1037 def trim(self, graph): 1038 visited_nodes = set() 1039 node_id = graph.root_id 1040 while True: 1041 visited_nodes.add(node_id) 1042 node = graph.nodes[node_id] 1043 if len(node.successors) > 0: 1044 succ_id = node.successors[0] 1045 succ = graph.nodes[succ_id] 1046 node.successors = [succ_id] 1047 succ.predecessors = [node_id] 1048 if succ_id in visited_nodes: 1049 break 1050 node_id = succ_id 1051 else: 1052 break 1053 graph.nodes = {node_id: graph.nodes[node_id] for node_id in visited_nodes} 1054 1055 1056# TargetedTrimmer keeps paths that lead to specific nodes and discards all 1057# other paths. Useful when you cannot use -trim-egraph (e.g. when debugging 1058# a crash). 1059class TargetedTrimmer: 1060 def __init__(self, target_nodes): 1061 self._target_nodes = target_nodes 1062 1063 @staticmethod 1064 def parse_target_node(node, graph): 1065 if node.startswith("0x"): 1066 ret = "Node" + node 1067 assert ret in graph.nodes 1068 return ret 1069 else: 1070 for other_id in graph.nodes: 1071 other = graph.nodes[other_id] 1072 if other.node_id == int(node): 1073 return other_id 1074 1075 @staticmethod 1076 def parse_target_nodes(target_nodes, graph): 1077 return [ 1078 TargetedTrimmer.parse_target_node(node, graph) 1079 for node in target_nodes.split(",") 1080 ] 1081 1082 def trim(self, graph): 1083 queue = self._target_nodes 1084 visited_nodes = set() 1085 1086 while len(queue) > 0: 1087 node_id = queue.pop() 1088 visited_nodes.add(node_id) 1089 node = graph.nodes[node_id] 1090 for pred_id in node.predecessors: 1091 if pred_id not in visited_nodes: 1092 queue.append(pred_id) 1093 graph.nodes = {node_id: graph.nodes[node_id] for node_id in visited_nodes} 1094 for node_id in graph.nodes: 1095 node = graph.nodes[node_id] 1096 node.successors = [ 1097 succ_id for succ_id in node.successors if succ_id in visited_nodes 1098 ] 1099 node.predecessors = [ 1100 succ_id for succ_id in node.predecessors if succ_id in visited_nodes 1101 ] 1102 1103 1104# ===-----------------------------------------------------------------------===# 1105# The entry point to the script. 1106# ===-----------------------------------------------------------------------===# 1107 1108 1109def main(): 1110 parser = argparse.ArgumentParser( 1111 description="Display and manipulate Exploded Graph dumps." 1112 ) 1113 parser.add_argument( 1114 "filename", type=str, help="the .dot file produced by the Static Analyzer" 1115 ) 1116 parser.add_argument( 1117 "-v", 1118 "--verbose", 1119 action="store_const", 1120 dest="loglevel", 1121 const=logging.DEBUG, 1122 default=logging.WARNING, 1123 help="enable info prints", 1124 ) 1125 parser.add_argument( 1126 "-d", 1127 "--diff", 1128 action="store_const", 1129 dest="diff", 1130 const=True, 1131 default=False, 1132 help="display differences between states", 1133 ) 1134 parser.add_argument( 1135 "-t", 1136 "--topology", 1137 action="store_const", 1138 dest="topology", 1139 const=True, 1140 default=False, 1141 help="only display program points, omit states", 1142 ) 1143 parser.add_argument( 1144 "-s", 1145 "--single-path", 1146 action="store_const", 1147 dest="single_path", 1148 const=True, 1149 default=False, 1150 help="only display the leftmost path in the graph " 1151 "(useful for trimmed graphs that still " 1152 "branch too much)", 1153 ) 1154 parser.add_argument( 1155 "--to", 1156 type=str, 1157 default=None, 1158 help="only display execution paths from the root " 1159 "to the given comma-separated list of nodes " 1160 "identified by a pointer or a stable ID; " 1161 "compatible with --single-path", 1162 ) 1163 parser.add_argument( 1164 "--dark", 1165 action="store_const", 1166 dest="dark", 1167 const=True, 1168 default=False, 1169 help="dark mode", 1170 ) 1171 parser.add_argument( 1172 "--gray", 1173 action="store_const", 1174 dest="gray", 1175 const=True, 1176 default=False, 1177 help="black-and-white mode", 1178 ) 1179 parser.add_argument( 1180 "--dump-dot-only", 1181 action="store_const", 1182 dest="dump_dot_only", 1183 const=True, 1184 default=False, 1185 help="instead of writing an HTML file and immediately " 1186 "displaying it, dump the rewritten dot file " 1187 "to stdout", 1188 ) 1189 args = parser.parse_args() 1190 logging.basicConfig(level=args.loglevel) 1191 1192 graph = ExplodedGraph() 1193 with open(args.filename) as fd: 1194 for raw_line in fd: 1195 raw_line = raw_line.strip() 1196 graph.add_raw_line(raw_line) 1197 1198 trimmers = [] 1199 if args.to is not None: 1200 trimmers.append( 1201 TargetedTrimmer(TargetedTrimmer.parse_target_nodes(args.to, graph)) 1202 ) 1203 if args.single_path: 1204 trimmers.append(SinglePathTrimmer()) 1205 1206 explorer = BasicExplorer() 1207 1208 visitor = DotDumpVisitor( 1209 args.diff, args.dark, args.gray, args.topology, args.dump_dot_only 1210 ) 1211 1212 for trimmer in trimmers: 1213 trimmer.trim(graph) 1214 1215 explorer.explore(graph, visitor) 1216 1217 1218if __name__ == "__main__": 1219 main() 1220