1#!/usr/bin/env python 2# 3#===- exploded-graph-rewriter.py - ExplodedGraph dump tool -----*- python -*--# 4# 5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 6# See https://llvm.org/LICENSE.txt for license information. 7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 8# 9#===-----------------------------------------------------------------------===# 10 11 12from __future__ import print_function 13 14import argparse 15import collections 16import json 17import logging 18import re 19 20 21# A helper function for finding the difference between two dictionaries. 22def diff_dicts(curr, prev): 23 removed = [k for k in prev if k not in curr or curr[k] != prev[k]] 24 added = [k for k in curr if k not in prev or curr[k] != prev[k]] 25 return (removed, added) 26 27 28# Represents any program state trait that is a dictionary of key-value pairs. 29class GenericMap(object): 30 def __init__(self, items): 31 self.generic_map = collections.OrderedDict(items) 32 33 def diff(self, prev): 34 return diff_dicts(self.generic_map, prev.generic_map) 35 36 def is_different(self, prev): 37 removed, added = self.diff(prev) 38 return len(removed) != 0 or len(added) != 0 39 40 41# A deserialized source location. 42class SourceLocation(object): 43 def __init__(self, json_loc): 44 super(SourceLocation, self).__init__() 45 self.line = json_loc['line'] 46 self.col = json_loc['column'] 47 self.filename = json_loc['filename'] \ 48 if 'filename' in json_loc else '(main file)' 49 50 51# A deserialized program point. 52class ProgramPoint(object): 53 def __init__(self, json_pp): 54 super(ProgramPoint, self).__init__() 55 self.kind = json_pp['kind'] 56 self.tag = json_pp['tag'] 57 if self.kind == 'Edge': 58 self.src_id = json_pp['src_id'] 59 self.dst_id = json_pp['dst_id'] 60 elif self.kind == 'Statement': 61 self.stmt_kind = json_pp['stmt_kind'] 62 self.pointer = json_pp['pointer'] 63 self.pretty = json_pp['pretty'] 64 self.loc = SourceLocation(json_pp['location']) \ 65 if json_pp['location'] is not None else None 66 elif self.kind == 'BlockEntrance': 67 self.block_id = json_pp['block_id'] 68 69 70# A single expression acting as a key in a deserialized Environment. 71class EnvironmentBindingKey(object): 72 def __init__(self, json_ek): 73 super(EnvironmentBindingKey, self).__init__() 74 # CXXCtorInitializer is not a Stmt! 75 self.stmt_id = json_ek['stmt_id'] if 'stmt_id' in json_ek \ 76 else json_ek['init_id'] 77 self.pretty = json_ek['pretty'] 78 self.kind = json_ek['kind'] if 'kind' in json_ek else None 79 80 def _key(self): 81 return self.stmt_id 82 83 def __eq__(self, other): 84 return self._key() == other._key() 85 86 def __hash__(self): 87 return hash(self._key()) 88 89 90# Deserialized description of a location context. 91class LocationContext(object): 92 def __init__(self, json_frame): 93 super(LocationContext, self).__init__() 94 self.lctx_id = json_frame['lctx_id'] 95 self.caption = json_frame['location_context'] 96 self.decl = json_frame['calling'] 97 self.line = json_frame['call_line'] 98 99 def _key(self): 100 return self.lctx_id 101 102 def __eq__(self, other): 103 return self._key() == other._key() 104 105 def __hash__(self): 106 return hash(self._key()) 107 108 109# A group of deserialized Environment bindings that correspond to a specific 110# location context. 111class EnvironmentFrame(object): 112 def __init__(self, json_frame): 113 super(EnvironmentFrame, self).__init__() 114 self.location_context = LocationContext(json_frame) 115 self.bindings = collections.OrderedDict( 116 [(EnvironmentBindingKey(b), 117 b['value']) for b in json_frame['items']] 118 if json_frame['items'] is not None else []) 119 120 def diff_bindings(self, prev): 121 return diff_dicts(self.bindings, prev.bindings) 122 123 def is_different(self, prev): 124 removed, added = self.diff_bindings(prev) 125 return len(removed) != 0 or len(added) != 0 126 127 128# A deserialized Environment. This class can also hold other entities that 129# are similar to Environment, such as Objects Under Construction. 130class GenericEnvironment(object): 131 def __init__(self, json_e): 132 super(GenericEnvironment, self).__init__() 133 self.frames = [EnvironmentFrame(f) for f in json_e] 134 135 def diff_frames(self, prev): 136 # TODO: It's difficult to display a good diff when frame numbers shift. 137 if len(self.frames) != len(prev.frames): 138 return None 139 140 updated = [] 141 for i in range(len(self.frames)): 142 f = self.frames[i] 143 prev_f = prev.frames[i] 144 if f.location_context == prev_f.location_context: 145 if f.is_different(prev_f): 146 updated.append(i) 147 else: 148 # We have the whole frame replaced with another frame. 149 # TODO: Produce a nice diff. 150 return None 151 152 # TODO: Add support for added/removed. 153 return updated 154 155 def is_different(self, prev): 156 updated = self.diff_frames(prev) 157 return updated is None or len(updated) > 0 158 159 160# A single binding key in a deserialized RegionStore cluster. 161class StoreBindingKey(object): 162 def __init__(self, json_sk): 163 super(StoreBindingKey, self).__init__() 164 self.kind = json_sk['kind'] 165 self.offset = json_sk['offset'] 166 167 def _key(self): 168 return (self.kind, self.offset) 169 170 def __eq__(self, other): 171 return self._key() == other._key() 172 173 def __hash__(self): 174 return hash(self._key()) 175 176 177# A single cluster of the deserialized RegionStore. 178class StoreCluster(object): 179 def __init__(self, json_sc): 180 super(StoreCluster, self).__init__() 181 self.base_region = json_sc['cluster'] 182 self.bindings = collections.OrderedDict( 183 [(StoreBindingKey(b), b['value']) for b in json_sc['items']]) 184 185 def diff_bindings(self, prev): 186 return diff_dicts(self.bindings, prev.bindings) 187 188 def is_different(self, prev): 189 removed, added = self.diff_bindings(prev) 190 return len(removed) != 0 or len(added) != 0 191 192 193# A deserialized RegionStore. 194class Store(object): 195 def __init__(self, json_s): 196 super(Store, self).__init__() 197 self.ptr = json_s['pointer'] 198 self.clusters = collections.OrderedDict( 199 [(c['pointer'], StoreCluster(c)) for c in json_s['items']]) 200 201 def diff_clusters(self, prev): 202 removed = [k for k in prev.clusters if k not in self.clusters] 203 added = [k for k in self.clusters if k not in prev.clusters] 204 updated = [k for k in prev.clusters if k in self.clusters 205 and prev.clusters[k].is_different(self.clusters[k])] 206 return (removed, added, updated) 207 208 def is_different(self, prev): 209 removed, added, updated = self.diff_clusters(prev) 210 return len(removed) != 0 or len(added) != 0 or len(updated) != 0 211 212 213# A deserialized program state. 214class ProgramState(object): 215 def __init__(self, state_id, json_ps): 216 super(ProgramState, self).__init__() 217 logging.debug('Adding ProgramState ' + str(state_id)) 218 219 self.state_id = state_id 220 221 self.store = Store(json_ps['store']) \ 222 if json_ps['store'] is not None else None 223 224 self.environment = \ 225 GenericEnvironment(json_ps['environment']['items']) \ 226 if json_ps['environment'] is not None else None 227 228 self.constraints = GenericMap([ 229 (c['symbol'], c['range']) for c in json_ps['constraints'] 230 ]) if json_ps['constraints'] is not None else None 231 232 self.dynamic_types = GenericMap([ 233 (t['region'], '%s%s' % (t['dyn_type'], 234 ' (or a sub-class)' 235 if t['sub_classable'] else '')) 236 for t in json_ps['dynamic_types']]) \ 237 if json_ps['dynamic_types'] is not None else None 238 239 self.constructing_objects = \ 240 GenericEnvironment(json_ps['constructing_objects']) \ 241 if json_ps['constructing_objects'] is not None else None 242 243 # TODO: Checker messages. 244 245 246# A deserialized exploded graph node. Has a default constructor because it 247# may be referenced as part of an edge before its contents are deserialized, 248# and in this moment we already need a room for predecessors and successors. 249class ExplodedNode(object): 250 def __init__(self): 251 super(ExplodedNode, self).__init__() 252 self.predecessors = [] 253 self.successors = [] 254 255 def construct(self, node_id, json_node): 256 logging.debug('Adding ' + node_id) 257 self.node_id = json_node['node_id'] 258 self.ptr = json_node['pointer'] 259 self.points = [ProgramPoint(p) for p in json_node['program_points']] 260 self.state = ProgramState(json_node['state_id'], 261 json_node['program_state']) \ 262 if json_node['program_state'] is not None else None 263 264 assert self.node_name() == node_id 265 266 def node_name(self): 267 return 'Node' + self.ptr 268 269 270# A deserialized ExplodedGraph. Constructed by consuming a .dot file 271# line-by-line. 272class ExplodedGraph(object): 273 # Parse .dot files with regular expressions. 274 node_re = re.compile( 275 '^(Node0x[0-9a-f]*) \\[shape=record,.*label="{(.*)\\\\l}"\\];$') 276 edge_re = re.compile( 277 '^(Node0x[0-9a-f]*) -> (Node0x[0-9a-f]*);$') 278 279 def __init__(self): 280 super(ExplodedGraph, self).__init__() 281 self.nodes = collections.defaultdict(ExplodedNode) 282 self.root_id = None 283 self.incomplete_line = '' 284 285 def add_raw_line(self, raw_line): 286 if raw_line.startswith('//'): 287 return 288 289 # Allow line breaks by waiting for ';'. This is not valid in 290 # a .dot file, but it is useful for writing tests. 291 if len(raw_line) > 0 and raw_line[-1] != ';': 292 self.incomplete_line += raw_line 293 return 294 raw_line = self.incomplete_line + raw_line 295 self.incomplete_line = '' 296 297 # Apply regexps one by one to see if it's a node or an edge 298 # and extract contents if necessary. 299 logging.debug('Line: ' + raw_line) 300 result = self.edge_re.match(raw_line) 301 if result is not None: 302 logging.debug('Classified as edge line.') 303 pred = result.group(1) 304 succ = result.group(2) 305 self.nodes[pred].successors.append(succ) 306 self.nodes[succ].predecessors.append(pred) 307 return 308 result = self.node_re.match(raw_line) 309 if result is not None: 310 logging.debug('Classified as node line.') 311 node_id = result.group(1) 312 if len(self.nodes) == 0: 313 self.root_id = node_id 314 # Note: when writing tests you don't need to escape everything, 315 # even though in a valid dot file everything is escaped. 316 node_label = result.group(2).replace('\\l', '') \ 317 .replace(' ', '') \ 318 .replace('\\"', '"') \ 319 .replace('\\{', '{') \ 320 .replace('\\}', '}') \ 321 .replace('\\\\', '\\') \ 322 .replace('\\|', '|') \ 323 .replace('\\<', '\\\\<') \ 324 .replace('\\>', '\\\\>') \ 325 .rstrip(',') 326 logging.debug(node_label) 327 json_node = json.loads(node_label) 328 self.nodes[node_id].construct(node_id, json_node) 329 return 330 logging.debug('Skipping.') 331 332 333# A visitor that dumps the ExplodedGraph into a DOT file with fancy HTML-based 334# syntax highlighing. 335class DotDumpVisitor(object): 336 def __init__(self, do_diffs): 337 super(DotDumpVisitor, self).__init__() 338 self._do_diffs = do_diffs 339 340 @staticmethod 341 def _dump_raw(s): 342 print(s, end='') 343 344 @staticmethod 345 def _dump(s): 346 print(s.replace('&', '&') 347 .replace('{', '\\{') 348 .replace('}', '\\}') 349 .replace('\\<', '<') 350 .replace('\\>', '>') 351 .replace('\\l', '<br />') 352 .replace('|', '\\|'), end='') 353 354 @staticmethod 355 def _diff_plus_minus(is_added): 356 if is_added is None: 357 return '' 358 if is_added: 359 return '<font color="forestgreen">+</font>' 360 return '<font color="red">-</font>' 361 362 def visit_begin_graph(self, graph): 363 self._graph = graph 364 self._dump_raw('digraph "ExplodedGraph" {\n') 365 self._dump_raw('label="";\n') 366 367 def visit_program_point(self, p): 368 if p.kind in ['Edge', 'BlockEntrance', 'BlockExit']: 369 color = 'gold3' 370 elif p.kind in ['PreStmtPurgeDeadSymbols', 371 'PostStmtPurgeDeadSymbols']: 372 color = 'red' 373 elif p.kind in ['CallEnter', 'CallExitBegin', 'CallExitEnd']: 374 color = 'blue' 375 elif p.kind in ['Statement']: 376 color = 'cyan3' 377 else: 378 color = 'forestgreen' 379 380 if p.kind == 'Statement': 381 if p.loc is not None: 382 self._dump('<tr><td align="left" width="0">' 383 '%s:<b>%s</b>:<b>%s</b>:</td>' 384 '<td align="left" width="0"><font color="%s">' 385 '%s</font></td><td>%s</td></tr>' 386 % (p.loc.filename, p.loc.line, 387 p.loc.col, color, p.stmt_kind, p.pretty)) 388 else: 389 self._dump('<tr><td align="left" width="0">' 390 '<i>Invalid Source Location</i>:</td>' 391 '<td align="left" width="0">' 392 '<font color="%s">%s</font></td><td>%s</td></tr>' 393 % (color, p.stmt_kind, p.pretty)) 394 elif p.kind == 'Edge': 395 self._dump('<tr><td width="0"></td>' 396 '<td align="left" width="0">' 397 '<font color="%s">%s</font></td><td align="left">' 398 '[B%d] -\\> [B%d]</td></tr>' 399 % (color, p.kind, p.src_id, p.dst_id)) 400 else: 401 # TODO: Print more stuff for other kinds of points. 402 self._dump('<tr><td width="0"></td>' 403 '<td align="left" width="0" colspan="2">' 404 '<font color="%s">%s</font></td></tr>' 405 % (color, p.kind)) 406 407 if p.tag is not None: 408 self._dump('<tr><td width="0"></td>' 409 '<td colspan="2" align="left">' 410 '<b>Tag: </b> <font color="crimson">' 411 '%s</font></td></tr>' % p.tag) 412 413 def visit_environment(self, e, prev_e=None): 414 self._dump('<table border="0">') 415 416 def dump_location_context(lc, is_added=None): 417 self._dump('<tr><td>%s</td>' 418 '<td align="left"><b>%s</b></td>' 419 '<td align="left" colspan="2">' 420 '<font color="grey60">%s </font>' 421 '%s</td></tr>' 422 % (self._diff_plus_minus(is_added), 423 lc.caption, lc.decl, 424 ('(line %s)' % lc.line) if lc.line is not None 425 else '')) 426 427 def dump_binding(f, b, is_added=None): 428 self._dump('<tr><td>%s</td>' 429 '<td align="left"><i>S%s</i></td>' 430 '%s' 431 '<td align="left">%s</td>' 432 '<td align="left">%s</td></tr>' 433 % (self._diff_plus_minus(is_added), 434 b.stmt_id, 435 '<td align="left"><font color="darkgreen"><i>' 436 '(%s)</i></font></td>' % b.kind 437 if b.kind is not None else '', 438 b.pretty, f.bindings[b])) 439 440 frames_updated = e.diff_frames(prev_e) if prev_e is not None else None 441 if frames_updated: 442 for i in frames_updated: 443 f = e.frames[i] 444 prev_f = prev_e.frames[i] 445 dump_location_context(f.location_context) 446 bindings_removed, bindings_added = f.diff_bindings(prev_f) 447 for b in bindings_removed: 448 dump_binding(prev_f, b, False) 449 for b in bindings_added: 450 dump_binding(f, b, True) 451 else: 452 for f in e.frames: 453 dump_location_context(f.location_context) 454 for b in f.bindings: 455 dump_binding(f, b) 456 457 self._dump('</table>') 458 459 def visit_environment_in_state(self, selector, title, s, prev_s=None): 460 e = getattr(s, selector) 461 prev_e = getattr(prev_s, selector) if prev_s is not None else None 462 if e is None and prev_e is None: 463 return 464 465 self._dump('<hr /><tr><td align="left"><b>%s: </b>' % title) 466 if e is None: 467 self._dump('<i> Nothing!</i>') 468 else: 469 if prev_e is not None: 470 if e.is_different(prev_e): 471 self._dump('</td></tr><tr><td align="left">') 472 self.visit_environment(e, prev_e) 473 else: 474 self._dump('<i> No changes!</i>') 475 else: 476 self._dump('</td></tr><tr><td align="left">') 477 self.visit_environment(e) 478 479 self._dump('</td></tr>') 480 481 def visit_store(self, s, prev_s=None): 482 self._dump('<table border="0">') 483 484 def dump_binding(s, c, b, is_added=None): 485 self._dump('<tr><td>%s</td>' 486 '<td align="left">%s</td>' 487 '<td align="left">%s</td>' 488 '<td align="left">%s</td>' 489 '<td align="left">%s</td></tr>' 490 % (self._diff_plus_minus(is_added), 491 s.clusters[c].base_region, b.offset, 492 '(<i>Default</i>)' if b.kind == 'Default' 493 else '', 494 s.clusters[c].bindings[b])) 495 496 if prev_s is not None: 497 clusters_removed, clusters_added, clusters_updated = \ 498 s.diff_clusters(prev_s) 499 for c in clusters_removed: 500 for b in prev_s.clusters[c].bindings: 501 dump_binding(prev_s, c, b, False) 502 for c in clusters_updated: 503 bindings_removed, bindings_added = \ 504 s.clusters[c].diff_bindings(prev_s.clusters[c]) 505 for b in bindings_removed: 506 dump_binding(prev_s, c, b, False) 507 for b in bindings_added: 508 dump_binding(s, c, b, True) 509 for c in clusters_added: 510 for b in s.clusters[c].bindings: 511 dump_binding(s, c, b, True) 512 else: 513 for c in s.clusters: 514 for b in s.clusters[c].bindings: 515 dump_binding(s, c, b) 516 517 self._dump('</table>') 518 519 def visit_store_in_state(self, s, prev_s=None): 520 st = s.store 521 prev_st = prev_s.store if prev_s is not None else None 522 if st is None and prev_st is None: 523 return 524 525 self._dump('<hr /><tr><td align="left"><b>Store: </b>') 526 if st is None: 527 self._dump('<i> Nothing!</i>') 528 else: 529 if prev_st is not None: 530 if s.store.is_different(prev_st): 531 self._dump('</td></tr><tr><td align="left">') 532 self.visit_store(st, prev_st) 533 else: 534 self._dump('<i> No changes!</i>') 535 else: 536 self._dump('</td></tr><tr><td align="left">') 537 self.visit_store(st) 538 self._dump('</td></tr>') 539 540 def visit_generic_map(self, m, prev_m=None): 541 self._dump('<table border="0">') 542 543 def dump_pair(m, k, is_added=None): 544 self._dump('<tr><td>%s</td>' 545 '<td align="left">%s</td>' 546 '<td align="left">%s</td></tr>' 547 % (self._diff_plus_minus(is_added), 548 k, m.generic_map[k])) 549 550 if prev_m is not None: 551 removed, added = m.diff(prev_m) 552 for k in removed: 553 dump_pair(prev_m, k, False) 554 for k in added: 555 dump_pair(m, k, True) 556 else: 557 for k in m.generic_map: 558 dump_pair(m, k, None) 559 560 self._dump('</table>') 561 562 def visit_generic_map_in_state(self, selector, title, s, prev_s=None): 563 m = getattr(s, selector) 564 prev_m = getattr(prev_s, selector) if prev_s is not None else None 565 if m is None and prev_m is None: 566 return 567 568 self._dump('<hr />') 569 self._dump('<tr><td align="left">' 570 '<b>%s: </b>' % title) 571 if m is None: 572 self._dump('<i> Nothing!</i>') 573 else: 574 if prev_s is not None: 575 if prev_m is not None: 576 if m.is_different(prev_m): 577 self._dump('</td></tr><tr><td align="left">') 578 self.visit_generic_map(m, prev_m) 579 else: 580 self._dump('<i> No changes!</i>') 581 if prev_m is None: 582 self._dump('</td></tr><tr><td align="left">') 583 self.visit_generic_map(m) 584 self._dump('</td></tr>') 585 586 def visit_state(self, s, prev_s): 587 self.visit_store_in_state(s, prev_s) 588 self.visit_environment_in_state('environment', 'Environment', 589 s, prev_s) 590 self.visit_generic_map_in_state('constraints', 'Ranges', 591 s, prev_s) 592 self.visit_generic_map_in_state('dynamic_types', 'Dynamic Types', 593 s, prev_s) 594 self.visit_environment_in_state('constructing_objects', 595 'Objects Under Construction', 596 s, prev_s) 597 598 def visit_node(self, node): 599 self._dump('%s [shape=record,label=<<table border="0">' 600 % (node.node_name())) 601 602 self._dump('<tr><td bgcolor="grey"><b>Node %d (%s) - ' 603 'State %s</b></td></tr>' 604 % (node.node_id, node.ptr, node.state.state_id 605 if node.state is not None else 'Unspecified')) 606 self._dump('<tr><td align="left" width="0">') 607 if len(node.points) > 1: 608 self._dump('<b>Program points:</b></td></tr>') 609 else: 610 self._dump('<b>Program point:</b></td></tr>') 611 self._dump('<tr><td align="left" width="0">' 612 '<table border="0" align="left" width="0">') 613 for p in node.points: 614 self.visit_program_point(p) 615 self._dump('</table></td></tr>') 616 617 if node.state is not None: 618 prev_s = None 619 # Do diffs only when we have a unique predecessor. 620 # Don't do diffs on the leaf nodes because they're 621 # the important ones. 622 if self._do_diffs and len(node.predecessors) == 1 \ 623 and len(node.successors) > 0: 624 prev_s = self._graph.nodes[node.predecessors[0]].state 625 self.visit_state(node.state, prev_s) 626 self._dump_raw('</table>>];\n') 627 628 def visit_edge(self, pred, succ): 629 self._dump_raw('%s -> %s;\n' % (pred.node_name(), succ.node_name())) 630 631 def visit_end_of_graph(self): 632 self._dump_raw('}\n') 633 634 635# A class that encapsulates traversal of the ExplodedGraph. Different explorer 636# kinds could potentially traverse specific sub-graphs. 637class Explorer(object): 638 def __init__(self): 639 super(Explorer, self).__init__() 640 641 def explore(self, graph, visitor): 642 visitor.visit_begin_graph(graph) 643 for node in sorted(graph.nodes): 644 logging.debug('Visiting ' + node) 645 visitor.visit_node(graph.nodes[node]) 646 for succ in sorted(graph.nodes[node].successors): 647 logging.debug('Visiting edge: %s -> %s ' % (node, succ)) 648 visitor.visit_edge(graph.nodes[node], graph.nodes[succ]) 649 visitor.visit_end_of_graph() 650 651 652def main(): 653 parser = argparse.ArgumentParser() 654 parser.add_argument('filename', type=str) 655 parser.add_argument('-v', '--verbose', action='store_const', 656 dest='loglevel', const=logging.DEBUG, 657 default=logging.WARNING, 658 help='enable info prints') 659 parser.add_argument('-d', '--diff', action='store_const', dest='diff', 660 const=True, default=False, 661 help='display differences between states') 662 args = parser.parse_args() 663 logging.basicConfig(level=args.loglevel) 664 665 graph = ExplodedGraph() 666 with open(args.filename) as fd: 667 for raw_line in fd: 668 raw_line = raw_line.strip() 669 graph.add_raw_line(raw_line) 670 671 explorer = Explorer() 672 visitor = DotDumpVisitor(args.diff) 673 explorer.explore(graph, visitor) 674 675 676if __name__ == '__main__': 677 main() 678