1#!/usr/bin/env python 2# 3#===- exploded-graph-rewriter.py - ExplodedGraph dump tool -----*- python -*--# 4# 5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 6# See https://llvm.org/LICENSE.txt for license information. 7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 8# 9#===-----------------------------------------------------------------------===# 10 11 12from __future__ import print_function 13 14import argparse 15import collections 16import json 17import logging 18import re 19 20 21# A helper function for finding the difference between two dictionaries. 22def diff_dicts(curr, prev): 23 removed = [k for k in prev if k not in curr or curr[k] != prev[k]] 24 added = [k for k in curr if k not in prev or curr[k] != prev[k]] 25 return (removed, added) 26 27 28# Represents any program state trait that is a dictionary of key-value pairs. 29class GenericMap(object): 30 def __init__(self, items): 31 self.generic_map = collections.OrderedDict(items) 32 33 def diff(self, prev): 34 return diff_dicts(self.generic_map, prev.generic_map) 35 36 def is_different(self, prev): 37 removed, added = self.diff(prev) 38 return len(removed) != 0 or len(added) != 0 39 40 41# A deserialized source location. 42class SourceLocation(object): 43 def __init__(self, json_loc): 44 super(SourceLocation, self).__init__() 45 self.line = json_loc['line'] 46 self.col = json_loc['column'] 47 self.filename = json_loc['filename'] \ 48 if 'filename' in json_loc else '(main file)' 49 50 51# A deserialized program point. 52class ProgramPoint(object): 53 def __init__(self, json_pp): 54 super(ProgramPoint, self).__init__() 55 self.kind = json_pp['kind'] 56 self.tag = json_pp['tag'] 57 if self.kind == 'Edge': 58 self.src_id = json_pp['src_id'] 59 self.dst_id = json_pp['dst_id'] 60 elif self.kind == 'Statement': 61 self.stmt_kind = json_pp['stmt_kind'] 62 self.pointer = json_pp['pointer'] 63 self.pretty = json_pp['pretty'] 64 self.loc = SourceLocation(json_pp['location']) \ 65 if json_pp['location'] is not None else None 66 elif self.kind == 'BlockEntrance': 67 self.block_id = json_pp['block_id'] 68 69 70# A single expression acting as a key in a deserialized Environment. 71class EnvironmentBindingKey(object): 72 def __init__(self, json_ek): 73 super(EnvironmentBindingKey, self).__init__() 74 self.stmt_id = json_ek['stmt_id'] 75 self.pretty = json_ek['pretty'] 76 77 def _key(self): 78 return self.stmt_id 79 80 def __eq__(self, other): 81 return self._key() == other._key() 82 83 def __hash__(self): 84 return hash(self._key()) 85 86 87# Deserialized description of a location context. 88class LocationContext(object): 89 def __init__(self, json_frame): 90 super(LocationContext, self).__init__() 91 self.lctx_id = json_frame['lctx_id'] 92 self.caption = json_frame['location_context'] 93 self.decl = json_frame['calling'] 94 self.line = json_frame['call_line'] 95 96 def _key(self): 97 return self.lctx_id 98 99 def __eq__(self, other): 100 return self._key() == other._key() 101 102 def __hash__(self): 103 return hash(self._key()) 104 105 106# A group of deserialized Environment bindings that correspond to a specific 107# location context. 108class EnvironmentFrame(object): 109 def __init__(self, json_frame): 110 super(EnvironmentFrame, self).__init__() 111 self.location_context = LocationContext(json_frame) 112 self.bindings = collections.OrderedDict( 113 [(EnvironmentBindingKey(b), 114 b['value']) for b in json_frame['items']] 115 if json_frame['items'] is not None else []) 116 117 def diff_bindings(self, prev): 118 return diff_dicts(self.bindings, prev.bindings) 119 120 def is_different(self, prev): 121 removed, added = self.diff_bindings(prev) 122 return len(removed) != 0 or len(added) != 0 123 124 125# A deserialized Environment. 126class Environment(object): 127 def __init__(self, json_e): 128 super(Environment, self).__init__() 129 self.ptr = json_e['pointer'] 130 self.frames = [EnvironmentFrame(f) for f in json_e['items']] 131 132 def diff_frames(self, prev): 133 # TODO: It's difficult to display a good diff when frame numbers shift. 134 if len(self.frames) != len(prev.frames): 135 return None 136 137 updated = [] 138 for i in range(len(self.frames)): 139 f = self.frames[i] 140 prev_f = prev.frames[i] 141 if f.location_context == prev_f.location_context: 142 if f.is_different(prev_f): 143 updated.append(i) 144 else: 145 # We have the whole frame replaced with another frame. 146 # TODO: Produce a nice diff. 147 return None 148 149 # TODO: Add support for added/removed. 150 return updated 151 152 def is_different(self, prev): 153 updated = self.diff_frames(prev) 154 return updated is None or len(updated) > 0 155 156 157# A single binding key in a deserialized RegionStore cluster. 158class StoreBindingKey(object): 159 def __init__(self, json_sk): 160 super(StoreBindingKey, self).__init__() 161 self.kind = json_sk['kind'] 162 self.offset = json_sk['offset'] 163 164 def _key(self): 165 return (self.kind, self.offset) 166 167 def __eq__(self, other): 168 return self._key() == other._key() 169 170 def __hash__(self): 171 return hash(self._key()) 172 173 174# A single cluster of the deserialized RegionStore. 175class StoreCluster(object): 176 def __init__(self, json_sc): 177 super(StoreCluster, self).__init__() 178 self.base_region = json_sc['cluster'] 179 self.bindings = collections.OrderedDict( 180 [(StoreBindingKey(b), b['value']) for b in json_sc['items']]) 181 182 def diff_bindings(self, prev): 183 return diff_dicts(self.bindings, prev.bindings) 184 185 def is_different(self, prev): 186 removed, added = self.diff_bindings(prev) 187 return len(removed) != 0 or len(added) != 0 188 189 190# A deserialized RegionStore. 191class Store(object): 192 def __init__(self, json_s): 193 super(Store, self).__init__() 194 self.ptr = json_s['pointer'] 195 self.clusters = collections.OrderedDict( 196 [(c['pointer'], StoreCluster(c)) for c in json_s['items']]) 197 198 def diff_clusters(self, prev): 199 removed = [k for k in prev.clusters if k not in self.clusters] 200 added = [k for k in self.clusters if k not in prev.clusters] 201 updated = [k for k in prev.clusters if k in self.clusters 202 and prev.clusters[k].is_different(self.clusters[k])] 203 return (removed, added, updated) 204 205 def is_different(self, prev): 206 removed, added, updated = self.diff_clusters(prev) 207 return len(removed) != 0 or len(added) != 0 or len(updated) != 0 208 209 210# A deserialized program state. 211class ProgramState(object): 212 def __init__(self, state_id, json_ps): 213 super(ProgramState, self).__init__() 214 logging.debug('Adding ProgramState ' + str(state_id)) 215 216 self.state_id = state_id 217 self.store = Store(json_ps['store']) \ 218 if json_ps['store'] is not None else None 219 self.environment = Environment(json_ps['environment']) \ 220 if json_ps['environment'] is not None else None 221 self.constraints = GenericMap([ 222 (c['symbol'], c['range']) for c in json_ps['constraints'] 223 ]) if json_ps['constraints'] is not None else None 224 self.dynamic_types = GenericMap([ 225 (t['region'], '%s%s' % (t['dyn_type'], 226 ' (or a sub-class)' 227 if t['sub_classable'] else '')) 228 for t in json_ps['dynamic_types']]) \ 229 if json_ps['dynamic_types'] is not None else None 230 231 # TODO: Objects under construction. 232 # TODO: Checker messages. 233 234 235# A deserialized exploded graph node. Has a default constructor because it 236# may be referenced as part of an edge before its contents are deserialized, 237# and in this moment we already need a room for predecessors and successors. 238class ExplodedNode(object): 239 def __init__(self): 240 super(ExplodedNode, self).__init__() 241 self.predecessors = [] 242 self.successors = [] 243 244 def construct(self, node_id, json_node): 245 logging.debug('Adding ' + node_id) 246 self.node_id = json_node['node_id'] 247 self.ptr = json_node['pointer'] 248 self.points = [ProgramPoint(p) for p in json_node['program_points']] 249 self.state = ProgramState(json_node['state_id'], 250 json_node['program_state']) \ 251 if json_node['program_state'] is not None else None 252 253 assert self.node_name() == node_id 254 255 def node_name(self): 256 return 'Node' + self.ptr 257 258 259# A deserialized ExplodedGraph. Constructed by consuming a .dot file 260# line-by-line. 261class ExplodedGraph(object): 262 # Parse .dot files with regular expressions. 263 node_re = re.compile( 264 '^(Node0x[0-9a-f]*) \\[shape=record,.*label="{(.*)\\\\l}"\\];$') 265 edge_re = re.compile( 266 '^(Node0x[0-9a-f]*) -> (Node0x[0-9a-f]*);$') 267 268 def __init__(self): 269 super(ExplodedGraph, self).__init__() 270 self.nodes = collections.defaultdict(ExplodedNode) 271 self.root_id = None 272 self.incomplete_line = '' 273 274 def add_raw_line(self, raw_line): 275 if raw_line.startswith('//'): 276 return 277 278 # Allow line breaks by waiting for ';'. This is not valid in 279 # a .dot file, but it is useful for writing tests. 280 if len(raw_line) > 0 and raw_line[-1] != ';': 281 self.incomplete_line += raw_line 282 return 283 raw_line = self.incomplete_line + raw_line 284 self.incomplete_line = '' 285 286 # Apply regexps one by one to see if it's a node or an edge 287 # and extract contents if necessary. 288 logging.debug('Line: ' + raw_line) 289 result = self.edge_re.match(raw_line) 290 if result is not None: 291 logging.debug('Classified as edge line.') 292 pred = result.group(1) 293 succ = result.group(2) 294 self.nodes[pred].successors.append(succ) 295 self.nodes[succ].predecessors.append(pred) 296 return 297 result = self.node_re.match(raw_line) 298 if result is not None: 299 logging.debug('Classified as node line.') 300 node_id = result.group(1) 301 if len(self.nodes) == 0: 302 self.root_id = node_id 303 # Note: when writing tests you don't need to escape everything, 304 # even though in a valid dot file everything is escaped. 305 node_label = result.group(2).replace('\\l', '') \ 306 .replace(' ', '') \ 307 .replace('\\"', '"') \ 308 .replace('\\{', '{') \ 309 .replace('\\}', '}') \ 310 .replace('\\\\', '\\') \ 311 .replace('\\|', '|') \ 312 .replace('\\<', '\\\\<') \ 313 .replace('\\>', '\\\\>') \ 314 .rstrip(',') 315 logging.debug(node_label) 316 json_node = json.loads(node_label) 317 self.nodes[node_id].construct(node_id, json_node) 318 return 319 logging.debug('Skipping.') 320 321 322# A visitor that dumps the ExplodedGraph into a DOT file with fancy HTML-based 323# syntax highlighing. 324class DotDumpVisitor(object): 325 def __init__(self, do_diffs): 326 super(DotDumpVisitor, self).__init__() 327 self._do_diffs = do_diffs 328 329 @staticmethod 330 def _dump_raw(s): 331 print(s, end='') 332 333 @staticmethod 334 def _dump(s): 335 print(s.replace('&', '&') 336 .replace('{', '\\{') 337 .replace('}', '\\}') 338 .replace('\\<', '<') 339 .replace('\\>', '>') 340 .replace('\\l', '<br />') 341 .replace('|', '\\|'), end='') 342 343 @staticmethod 344 def _diff_plus_minus(is_added): 345 if is_added is None: 346 return '' 347 if is_added: 348 return '<font color="forestgreen">+</font>' 349 return '<font color="red">-</font>' 350 351 def visit_begin_graph(self, graph): 352 self._graph = graph 353 self._dump_raw('digraph "ExplodedGraph" {\n') 354 self._dump_raw('label="";\n') 355 356 def visit_program_point(self, p): 357 if p.kind in ['Edge', 'BlockEntrance', 'BlockExit']: 358 color = 'gold3' 359 elif p.kind in ['PreStmtPurgeDeadSymbols', 360 'PostStmtPurgeDeadSymbols']: 361 color = 'red' 362 elif p.kind in ['CallEnter', 'CallExitBegin', 'CallExitEnd']: 363 color = 'blue' 364 elif p.kind in ['Statement']: 365 color = 'cyan3' 366 else: 367 color = 'forestgreen' 368 369 if p.kind == 'Statement': 370 if p.loc is not None: 371 self._dump('<tr><td align="left" width="0">' 372 '%s:<b>%s</b>:<b>%s</b>:</td>' 373 '<td align="left" width="0"><font color="%s">' 374 '%s</font></td><td>%s</td></tr>' 375 % (p.loc.filename, p.loc.line, 376 p.loc.col, color, p.stmt_kind, p.pretty)) 377 else: 378 self._dump('<tr><td align="left" width="0">' 379 '<i>Invalid Source Location</i>:</td>' 380 '<td align="left" width="0">' 381 '<font color="%s">%s</font></td><td>%s</td></tr>' 382 % (color, p.stmt_kind, p.pretty)) 383 elif p.kind == 'Edge': 384 self._dump('<tr><td width="0"></td>' 385 '<td align="left" width="0">' 386 '<font color="%s">%s</font></td><td align="left">' 387 '[B%d] -\\> [B%d]</td></tr>' 388 % (color, p.kind, p.src_id, p.dst_id)) 389 else: 390 # TODO: Print more stuff for other kinds of points. 391 self._dump('<tr><td width="0"></td>' 392 '<td align="left" width="0" colspan="2">' 393 '<font color="%s">%s</font></td></tr>' 394 % (color, p.kind)) 395 396 if p.tag is not None: 397 self._dump('<tr><td width="0"></td>' 398 '<td colspan="2" align="left">' 399 '<b>Tag: </b> <font color="crimson">' 400 '%s</font></td></tr>' % p.tag) 401 402 def visit_environment(self, e, prev_e=None): 403 self._dump('<table border="0">') 404 405 def dump_location_context(lc, is_added=None): 406 self._dump('<tr><td>%s</td>' 407 '<td align="left"><b>%s</b></td>' 408 '<td align="left" colspan="2">' 409 '<font color="grey60">%s </font>' 410 '%s</td></tr>' 411 % (self._diff_plus_minus(is_added), 412 lc.caption, lc.decl, 413 ('(line %s)' % lc.line) if lc.line is not None 414 else '')) 415 416 def dump_binding(f, b, is_added=None): 417 self._dump('<tr><td>%s</td>' 418 '<td align="left"><i>S%s</i></td>' 419 '<td align="left">%s</td>' 420 '<td align="left">%s</td></tr>' 421 % (self._diff_plus_minus(is_added), 422 b.stmt_id, b.pretty, f.bindings[b])) 423 424 frames_updated = e.diff_frames(prev_e) if prev_e is not None else None 425 if frames_updated: 426 for i in frames_updated: 427 f = e.frames[i] 428 prev_f = prev_e.frames[i] 429 dump_location_context(f.location_context) 430 bindings_removed, bindings_added = f.diff_bindings(prev_f) 431 for b in bindings_removed: 432 dump_binding(prev_f, b, False) 433 for b in bindings_added: 434 dump_binding(f, b, True) 435 else: 436 for f in e.frames: 437 dump_location_context(f.location_context) 438 for b in f.bindings: 439 dump_binding(f, b) 440 441 self._dump('</table>') 442 443 def visit_environment_in_state(self, s, prev_s=None): 444 self._dump('<hr /><tr><td align="left"><b>Environment: </b>') 445 if s.environment is None: 446 self._dump('<i> Nothing!</i>') 447 else: 448 if prev_s is not None and prev_s.environment is not None: 449 if s.environment.is_different(prev_s.environment): 450 self._dump('</td></tr><tr><td align="left">') 451 self.visit_environment(s.environment, prev_s.environment) 452 else: 453 self._dump('<i> No changes!</i>') 454 else: 455 self._dump('</td></tr><tr><td align="left">') 456 self.visit_environment(s.environment) 457 458 self._dump('</td></tr>') 459 460 def visit_store(self, s, prev_s=None): 461 self._dump('<table border="0">') 462 463 def dump_binding(s, c, b, is_added=None): 464 self._dump('<tr><td>%s</td>' 465 '<td align="left">%s</td>' 466 '<td align="left">%s</td>' 467 '<td align="left">%s</td>' 468 '<td align="left">%s</td></tr>' 469 % (self._diff_plus_minus(is_added), 470 s.clusters[c].base_region, b.offset, 471 '(<i>Default</i>)' if b.kind == 'Default' 472 else '', 473 s.clusters[c].bindings[b])) 474 475 if prev_s is not None: 476 clusters_removed, clusters_added, clusters_updated = \ 477 s.diff_clusters(prev_s) 478 for c in clusters_removed: 479 for b in prev_s.clusters[c].bindings: 480 dump_binding(prev_s, c, b, False) 481 for c in clusters_updated: 482 bindings_removed, bindings_added = \ 483 s.clusters[c].diff_bindings(prev_s.clusters[c]) 484 for b in bindings_removed: 485 dump_binding(prev_s, c, b, False) 486 for b in bindings_added: 487 dump_binding(s, c, b, True) 488 for c in clusters_added: 489 for b in s.clusters[c].bindings: 490 dump_binding(s, c, b, True) 491 else: 492 for c in s.clusters: 493 for b in s.clusters[c].bindings: 494 dump_binding(s, c, b) 495 496 self._dump('</table>') 497 498 def visit_store_in_state(self, s, prev_s=None): 499 self._dump('<hr /><tr><td align="left"><b>Store: </b>') 500 if s.store is None: 501 self._dump('<i> Nothing!</i>') 502 else: 503 if prev_s is not None and prev_s.store is not None: 504 if s.store.is_different(prev_s.store): 505 self._dump('</td></tr><tr><td align="left">') 506 self.visit_store(s.store, prev_s.store) 507 else: 508 self._dump('<i> No changes!</i>') 509 else: 510 self._dump('</td></tr><tr><td align="left">') 511 self.visit_store(s.store) 512 self._dump('</td></tr>') 513 514 def visit_generic_map(self, m, prev_m=None): 515 self._dump('<table border="0">') 516 517 def dump_pair(m, k, is_added=None): 518 self._dump('<tr><td>%s</td>' 519 '<td align="left">%s</td>' 520 '<td align="left">%s</td></tr>' 521 % (self._diff_plus_minus(is_added), 522 k, m.generic_map[k])) 523 524 if prev_m is not None: 525 removed, added = m.diff(prev_m) 526 for k in removed: 527 dump_pair(prev_m, k, False) 528 for k in added: 529 dump_pair(m, k, True) 530 else: 531 for k in m.generic_map: 532 dump_pair(m, k, None) 533 534 self._dump('</table>') 535 536 def visit_generic_map_in_state(self, selector, title, s, prev_s=None): 537 m = getattr(s, selector) 538 prev_m = getattr(prev_s, selector) if prev_s is not None else None 539 if m is None and prev_m is None: 540 return 541 542 self._dump('<hr />') 543 self._dump('<tr><td align="left">' 544 '<b>%s: </b>' % title) 545 if m is None: 546 self._dump('<i> Nothing!</i>') 547 else: 548 if prev_s is not None: 549 if prev_m is not None: 550 if m.is_different(prev_m): 551 self._dump('</td></tr><tr><td align="left">') 552 self.visit_generic_map(m, prev_m) 553 else: 554 self._dump('<i> No changes!</i>') 555 if prev_m is None: 556 self._dump('</td></tr><tr><td align="left">') 557 self.visit_generic_map(m) 558 self._dump('</td></tr>') 559 560 def visit_state(self, s, prev_s): 561 self.visit_store_in_state(s, prev_s) 562 self.visit_environment_in_state(s, prev_s) 563 self.visit_generic_map_in_state('constraints', 'Ranges', 564 s, prev_s) 565 self.visit_generic_map_in_state('dynamic_types', 'Dynamic Types', 566 s, prev_s) 567 568 def visit_node(self, node): 569 self._dump('%s [shape=record,label=<<table border="0">' 570 % (node.node_name())) 571 572 self._dump('<tr><td bgcolor="grey"><b>Node %d (%s) - ' 573 'State %s</b></td></tr>' 574 % (node.node_id, node.ptr, node.state.state_id 575 if node.state is not None else 'Unspecified')) 576 self._dump('<tr><td align="left" width="0">') 577 if len(node.points) > 1: 578 self._dump('<b>Program points:</b></td></tr>') 579 else: 580 self._dump('<b>Program point:</b></td></tr>') 581 self._dump('<tr><td align="left" width="0">' 582 '<table border="0" align="left" width="0">') 583 for p in node.points: 584 self.visit_program_point(p) 585 self._dump('</table></td></tr>') 586 587 if node.state is not None: 588 prev_s = None 589 # Do diffs only when we have a unique predecessor. 590 # Don't do diffs on the leaf nodes because they're 591 # the important ones. 592 if self._do_diffs and len(node.predecessors) == 1 \ 593 and len(node.successors) > 0: 594 prev_s = self._graph.nodes[node.predecessors[0]].state 595 self.visit_state(node.state, prev_s) 596 self._dump_raw('</table>>];\n') 597 598 def visit_edge(self, pred, succ): 599 self._dump_raw('%s -> %s;\n' % (pred.node_name(), succ.node_name())) 600 601 def visit_end_of_graph(self): 602 self._dump_raw('}\n') 603 604 605# A class that encapsulates traversal of the ExplodedGraph. Different explorer 606# kinds could potentially traverse specific sub-graphs. 607class Explorer(object): 608 def __init__(self): 609 super(Explorer, self).__init__() 610 611 def explore(self, graph, visitor): 612 visitor.visit_begin_graph(graph) 613 for node in sorted(graph.nodes): 614 logging.debug('Visiting ' + node) 615 visitor.visit_node(graph.nodes[node]) 616 for succ in sorted(graph.nodes[node].successors): 617 logging.debug('Visiting edge: %s -> %s ' % (node, succ)) 618 visitor.visit_edge(graph.nodes[node], graph.nodes[succ]) 619 visitor.visit_end_of_graph() 620 621 622def main(): 623 parser = argparse.ArgumentParser() 624 parser.add_argument('filename', type=str) 625 parser.add_argument('-v', '--verbose', action='store_const', 626 dest='loglevel', const=logging.DEBUG, 627 default=logging.WARNING, 628 help='enable info prints') 629 parser.add_argument('-d', '--diff', action='store_const', dest='diff', 630 const=True, default=False, 631 help='display differences between states') 632 args = parser.parse_args() 633 logging.basicConfig(level=args.loglevel) 634 635 graph = ExplodedGraph() 636 with open(args.filename) as fd: 637 for raw_line in fd: 638 raw_line = raw_line.strip() 639 graph.add_raw_line(raw_line) 640 641 explorer = Explorer() 642 visitor = DotDumpVisitor(args.diff) 643 explorer.explore(graph, visitor) 644 645 646if __name__ == '__main__': 647 main() 648