1#!/usr/bin/env python 2# 3#===- exploded-graph-rewriter.py - ExplodedGraph dump tool -----*- python -*--# 4# 5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 6# See https://llvm.org/LICENSE.txt for license information. 7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 8# 9#===-----------------------------------------------------------------------===# 10 11 12from __future__ import print_function 13 14import argparse 15import collections 16import json 17import logging 18import re 19 20 21# A helper function for finding the difference between two dictionaries. 22def diff_dicts(curr, prev): 23 removed = [k for k in prev if k not in curr or curr[k] != prev[k]] 24 added = [k for k in curr if k not in prev or curr[k] != prev[k]] 25 return (removed, added) 26 27 28# Represents any program state trait that is a dictionary of key-value pairs. 29class GenericMap(object): 30 def __init__(self, generic_map): 31 self.generic_map = generic_map 32 33 def diff(self, prev): 34 return diff_dicts(self.generic_map, prev.generic_map) 35 36 def is_different(self, prev): 37 removed, added = self.diff(prev) 38 return len(removed) != 0 or len(added) != 0 39 40 41# A deserialized source location. 42class SourceLocation(object): 43 def __init__(self, json_loc): 44 super(SourceLocation, self).__init__() 45 self.line = json_loc['line'] 46 self.col = json_loc['column'] 47 self.filename = json_loc['filename'] \ 48 if 'filename' in json_loc else '(main file)' 49 50 51# A deserialized program point. 52class ProgramPoint(object): 53 def __init__(self, json_pp): 54 super(ProgramPoint, self).__init__() 55 self.kind = json_pp['kind'] 56 self.tag = json_pp['tag'] 57 if self.kind == 'Edge': 58 self.src_id = json_pp['src_id'] 59 self.dst_id = json_pp['dst_id'] 60 elif self.kind == 'Statement': 61 self.stmt_kind = json_pp['stmt_kind'] 62 self.pointer = json_pp['pointer'] 63 self.pretty = json_pp['pretty'] 64 self.loc = SourceLocation(json_pp['location']) \ 65 if json_pp['location'] is not None else None 66 elif self.kind == 'BlockEntrance': 67 self.block_id = json_pp['block_id'] 68 69 70# A single expression acting as a key in a deserialized Environment. 71class EnvironmentBindingKey(object): 72 def __init__(self, json_ek): 73 super(EnvironmentBindingKey, self).__init__() 74 self.stmt_id = json_ek['stmt_id'] 75 self.pretty = json_ek['pretty'] 76 77 def _key(self): 78 return self.stmt_id 79 80 def __eq__(self, other): 81 return self._key() == other._key() 82 83 def __hash__(self): 84 return hash(self._key()) 85 86 87# Deserialized description of a location context. 88class LocationContext(object): 89 def __init__(self, json_frame): 90 super(LocationContext, self).__init__() 91 self.lctx_id = json_frame['lctx_id'] 92 self.caption = json_frame['location_context'] 93 self.decl = json_frame['calling'] 94 self.line = json_frame['call_line'] 95 96 def _key(self): 97 return self.lctx_id 98 99 def __eq__(self, other): 100 return self._key() == other._key() 101 102 def __hash__(self): 103 return hash(self._key()) 104 105 106# A group of deserialized Environment bindings that correspond to a specific 107# location context. 108class EnvironmentFrame(object): 109 def __init__(self, json_frame): 110 super(EnvironmentFrame, self).__init__() 111 self.location_context = LocationContext(json_frame) 112 self.bindings = collections.OrderedDict( 113 [(EnvironmentBindingKey(b), 114 b['value']) for b in json_frame['items']] 115 if json_frame['items'] is not None else []) 116 117 def diff_bindings(self, prev): 118 return diff_dicts(self.bindings, prev.bindings) 119 120 def is_different(self, prev): 121 removed, added = self.diff_bindings(prev) 122 return len(removed) != 0 or len(added) != 0 123 124 125# A deserialized Environment. 126class Environment(object): 127 def __init__(self, json_e): 128 super(Environment, self).__init__() 129 self.ptr = json_e['pointer'] 130 self.frames = [EnvironmentFrame(f) for f in json_e['items']] 131 132 def diff_frames(self, prev): 133 # TODO: It's difficult to display a good diff when frame numbers shift. 134 if len(self.frames) != len(prev.frames): 135 return None 136 137 updated = [] 138 for i in range(len(self.frames)): 139 f = self.frames[i] 140 prev_f = prev.frames[i] 141 if f.location_context == prev_f.location_context: 142 if f.is_different(prev_f): 143 updated.append(i) 144 else: 145 # We have the whole frame replaced with another frame. 146 # TODO: Produce a nice diff. 147 return None 148 149 # TODO: Add support for added/removed. 150 return updated 151 152 def is_different(self, prev): 153 updated = self.diff_frames(prev) 154 return updated is None or len(updated) > 0 155 156 157# A single binding key in a deserialized RegionStore cluster. 158class StoreBindingKey(object): 159 def __init__(self, json_sk): 160 super(StoreBindingKey, self).__init__() 161 self.kind = json_sk['kind'] 162 self.offset = json_sk['offset'] 163 164 def _key(self): 165 return (self.kind, self.offset) 166 167 def __eq__(self, other): 168 return self._key() == other._key() 169 170 def __hash__(self): 171 return hash(self._key()) 172 173 174# A single cluster of the deserialized RegionStore. 175class StoreCluster(object): 176 def __init__(self, json_sc): 177 super(StoreCluster, self).__init__() 178 self.base_region = json_sc['cluster'] 179 self.bindings = collections.OrderedDict( 180 [(StoreBindingKey(b), b['value']) for b in json_sc['items']]) 181 182 def diff_bindings(self, prev): 183 return diff_dicts(self.bindings, prev.bindings) 184 185 def is_different(self, prev): 186 removed, added = self.diff_bindings(prev) 187 return len(removed) != 0 or len(added) != 0 188 189 190# A deserialized RegionStore. 191class Store(object): 192 def __init__(self, json_s): 193 super(Store, self).__init__() 194 self.ptr = json_s['pointer'] 195 self.clusters = collections.OrderedDict( 196 [(c['pointer'], StoreCluster(c)) for c in json_s['items']]) 197 198 def diff_clusters(self, prev): 199 removed = [k for k in prev.clusters if k not in self.clusters] 200 added = [k for k in self.clusters if k not in prev.clusters] 201 updated = [k for k in prev.clusters if k in self.clusters 202 and prev.clusters[k].is_different(self.clusters[k])] 203 return (removed, added, updated) 204 205 def is_different(self, prev): 206 removed, added, updated = self.diff_clusters(prev) 207 return len(removed) != 0 or len(added) != 0 or len(updated) != 0 208 209 210# A deserialized program state. 211class ProgramState(object): 212 def __init__(self, state_id, json_ps): 213 super(ProgramState, self).__init__() 214 logging.debug('Adding ProgramState ' + str(state_id)) 215 216 self.state_id = state_id 217 self.store = Store(json_ps['store']) \ 218 if json_ps['store'] is not None else None 219 self.environment = Environment(json_ps['environment']) \ 220 if json_ps['environment'] is not None else None 221 self.constraints = GenericMap(collections.OrderedDict([ 222 (c['symbol'], c['range']) for c in json_ps['constraints'] 223 ])) if json_ps['constraints'] is not None else None 224 # TODO: Objects under construction. 225 # TODO: Dynamic types of objects. 226 # TODO: Checker messages. 227 228 229# A deserialized exploded graph node. Has a default constructor because it 230# may be referenced as part of an edge before its contents are deserialized, 231# and in this moment we already need a room for predecessors and successors. 232class ExplodedNode(object): 233 def __init__(self): 234 super(ExplodedNode, self).__init__() 235 self.predecessors = [] 236 self.successors = [] 237 238 def construct(self, node_id, json_node): 239 logging.debug('Adding ' + node_id) 240 self.node_id = json_node['node_id'] 241 self.ptr = json_node['pointer'] 242 self.points = [ProgramPoint(p) for p in json_node['program_points']] 243 self.state = ProgramState(json_node['state_id'], 244 json_node['program_state']) \ 245 if json_node['program_state'] is not None else None 246 247 assert self.node_name() == node_id 248 249 def node_name(self): 250 return 'Node' + self.ptr 251 252 253# A deserialized ExplodedGraph. Constructed by consuming a .dot file 254# line-by-line. 255class ExplodedGraph(object): 256 # Parse .dot files with regular expressions. 257 node_re = re.compile( 258 '^(Node0x[0-9a-f]*) \\[shape=record,.*label="{(.*)\\\\l}"\\];$') 259 edge_re = re.compile( 260 '^(Node0x[0-9a-f]*) -> (Node0x[0-9a-f]*);$') 261 262 def __init__(self): 263 super(ExplodedGraph, self).__init__() 264 self.nodes = collections.defaultdict(ExplodedNode) 265 self.root_id = None 266 self.incomplete_line = '' 267 268 def add_raw_line(self, raw_line): 269 if raw_line.startswith('//'): 270 return 271 272 # Allow line breaks by waiting for ';'. This is not valid in 273 # a .dot file, but it is useful for writing tests. 274 if len(raw_line) > 0 and raw_line[-1] != ';': 275 self.incomplete_line += raw_line 276 return 277 raw_line = self.incomplete_line + raw_line 278 self.incomplete_line = '' 279 280 # Apply regexps one by one to see if it's a node or an edge 281 # and extract contents if necessary. 282 logging.debug('Line: ' + raw_line) 283 result = self.edge_re.match(raw_line) 284 if result is not None: 285 logging.debug('Classified as edge line.') 286 pred = result.group(1) 287 succ = result.group(2) 288 self.nodes[pred].successors.append(succ) 289 self.nodes[succ].predecessors.append(pred) 290 return 291 result = self.node_re.match(raw_line) 292 if result is not None: 293 logging.debug('Classified as node line.') 294 node_id = result.group(1) 295 if len(self.nodes) == 0: 296 self.root_id = node_id 297 # Note: when writing tests you don't need to escape everything, 298 # even though in a valid dot file everything is escaped. 299 node_label = result.group(2).replace('\\l', '') \ 300 .replace(' ', '') \ 301 .replace('\\"', '"') \ 302 .replace('\\{', '{') \ 303 .replace('\\}', '}') \ 304 .replace('\\\\', '\\') \ 305 .replace('\\|', '|') \ 306 .replace('\\<', '\\\\<') \ 307 .replace('\\>', '\\\\>') \ 308 .rstrip(',') 309 logging.debug(node_label) 310 json_node = json.loads(node_label) 311 self.nodes[node_id].construct(node_id, json_node) 312 return 313 logging.debug('Skipping.') 314 315 316# A visitor that dumps the ExplodedGraph into a DOT file with fancy HTML-based 317# syntax highlighing. 318class DotDumpVisitor(object): 319 def __init__(self, do_diffs): 320 super(DotDumpVisitor, self).__init__() 321 self._do_diffs = do_diffs 322 323 @staticmethod 324 def _dump_raw(s): 325 print(s, end='') 326 327 @staticmethod 328 def _dump(s): 329 print(s.replace('&', '&') 330 .replace('{', '\\{') 331 .replace('}', '\\}') 332 .replace('\\<', '<') 333 .replace('\\>', '>') 334 .replace('\\l', '<br />') 335 .replace('|', '\\|'), end='') 336 337 @staticmethod 338 def _diff_plus_minus(is_added): 339 if is_added is None: 340 return '' 341 if is_added: 342 return '<font color="forestgreen">+</font>' 343 return '<font color="red">-</font>' 344 345 def visit_begin_graph(self, graph): 346 self._graph = graph 347 self._dump_raw('digraph "ExplodedGraph" {\n') 348 self._dump_raw('label="";\n') 349 350 def visit_program_point(self, p): 351 if p.kind in ['Edge', 'BlockEntrance', 'BlockExit']: 352 color = 'gold3' 353 elif p.kind in ['PreStmtPurgeDeadSymbols', 354 'PostStmtPurgeDeadSymbols']: 355 color = 'red' 356 elif p.kind in ['CallEnter', 'CallExitBegin', 'CallExitEnd']: 357 color = 'blue' 358 elif p.kind in ['Statement']: 359 color = 'cyan3' 360 else: 361 color = 'forestgreen' 362 363 if p.kind == 'Statement': 364 if p.loc is not None: 365 self._dump('<tr><td align="left" width="0">' 366 '%s:<b>%s</b>:<b>%s</b>:</td>' 367 '<td align="left" width="0"><font color="%s">' 368 '%s</font></td><td>%s</td></tr>' 369 % (p.loc.filename, p.loc.line, 370 p.loc.col, color, p.stmt_kind, p.pretty)) 371 else: 372 self._dump('<tr><td align="left" width="0">' 373 '<i>Invalid Source Location</i>:</td>' 374 '<td align="left" width="0">' 375 '<font color="%s">%s</font></td><td>%s</td></tr>' 376 % (color, p.stmt_kind, p.pretty)) 377 elif p.kind == 'Edge': 378 self._dump('<tr><td width="0"></td>' 379 '<td align="left" width="0">' 380 '<font color="%s">%s</font></td><td align="left">' 381 '[B%d] -\\> [B%d]</td></tr>' 382 % (color, p.kind, p.src_id, p.dst_id)) 383 else: 384 # TODO: Print more stuff for other kinds of points. 385 self._dump('<tr><td width="0"></td>' 386 '<td align="left" width="0" colspan="2">' 387 '<font color="%s">%s</font></td></tr>' 388 % (color, p.kind)) 389 390 def visit_environment(self, e, prev_e=None): 391 self._dump('<table border="0">') 392 393 def dump_location_context(lc, is_added=None): 394 self._dump('<tr><td>%s</td>' 395 '<td align="left"><b>%s</b></td>' 396 '<td align="left"><font color="grey60">%s </font>' 397 '%s</td></tr>' 398 % (self._diff_plus_minus(is_added), 399 lc.caption, lc.decl, 400 ('(line %s)' % lc.line) if lc.line is not None 401 else '')) 402 403 def dump_binding(f, b, is_added=None): 404 self._dump('<tr><td>%s</td>' 405 '<td align="left"><i>S%s</i></td>' 406 '<td align="left">%s</td>' 407 '<td align="left">%s</td></tr>' 408 % (self._diff_plus_minus(is_added), 409 b.stmt_id, b.pretty, f.bindings[b])) 410 411 frames_updated = e.diff_frames(prev_e) if prev_e is not None else None 412 if frames_updated: 413 for i in frames_updated: 414 f = e.frames[i] 415 prev_f = prev_e.frames[i] 416 dump_location_context(f.location_context) 417 bindings_removed, bindings_added = f.diff_bindings(prev_f) 418 for b in bindings_removed: 419 dump_binding(prev_f, b, False) 420 for b in bindings_added: 421 dump_binding(f, b, True) 422 else: 423 for f in e.frames: 424 dump_location_context(f.location_context) 425 for b in f.bindings: 426 dump_binding(f, b) 427 428 self._dump('</table>') 429 430 def visit_environment_in_state(self, s, prev_s=None): 431 self._dump('<tr><td align="left">' 432 '<b>Environment: </b>') 433 if s.environment is None: 434 self._dump('<i> Nothing!</i>') 435 else: 436 if prev_s is not None and prev_s.environment is not None: 437 if s.environment.is_different(prev_s.environment): 438 self._dump('</td></tr><tr><td align="left">') 439 self.visit_environment(s.environment, prev_s.environment) 440 else: 441 self._dump('<i> No changes!</i>') 442 else: 443 self._dump('</td></tr><tr><td align="left">') 444 self.visit_environment(s.environment) 445 446 self._dump('</td></tr>') 447 448 def visit_store(self, s, prev_s=None): 449 self._dump('<table border="0">') 450 451 def dump_binding(s, c, b, is_added=None): 452 self._dump('<tr><td>%s</td>' 453 '<td align="left">%s</td>' 454 '<td align="left">%s</td>' 455 '<td align="left">%s</td>' 456 '<td align="left">%s</td></tr>' 457 % (self._diff_plus_minus(is_added), 458 s.clusters[c].base_region, b.offset, 459 '(<i>Default</i>)' if b.kind == 'Default' 460 else '', 461 s.clusters[c].bindings[b])) 462 463 if prev_s is not None: 464 clusters_removed, clusters_added, clusters_updated = \ 465 s.diff_clusters(prev_s) 466 for c in clusters_removed: 467 for b in prev_s.clusters[c].bindings: 468 dump_binding(prev_s, c, b, False) 469 for c in clusters_updated: 470 bindings_removed, bindings_added = \ 471 s.clusters[c].diff_bindings(prev_s.clusters[c]) 472 for b in bindings_removed: 473 dump_binding(prev_s, c, b, False) 474 for b in bindings_added: 475 dump_binding(s, c, b, True) 476 for c in clusters_added: 477 for b in s.clusters[c].bindings: 478 dump_binding(s, c, b, True) 479 else: 480 for c in s.clusters: 481 for b in s.clusters[c].bindings: 482 dump_binding(s, c, b) 483 484 self._dump('</table>') 485 486 def visit_store_in_state(self, s, prev_s=None): 487 self._dump('<tr><td align="left"><b>Store: </b>') 488 if s.store is None: 489 self._dump('<i> Nothing!</i>') 490 else: 491 if prev_s is not None and prev_s.store is not None: 492 if s.store.is_different(prev_s.store): 493 self._dump('</td></tr><tr><td align="left">') 494 self.visit_store(s.store, prev_s.store) 495 else: 496 self._dump('<i> No changes!</i>') 497 else: 498 self._dump('</td></tr><tr><td align="left">') 499 self.visit_store(s.store) 500 self._dump('</td></tr>') 501 502 def visit_generic_map(self, m, prev_m=None): 503 self._dump('<table border="0">') 504 505 def dump_pair(m, k, is_added=None): 506 self._dump('<tr><td>%s</td>' 507 '<td align="left">%s</td>' 508 '<td align="left">%s</td></tr>' 509 % (self._diff_plus_minus(is_added), 510 k, m.generic_map[k])) 511 512 if prev_m is not None: 513 removed, added = m.diff(prev_m) 514 for k in removed: 515 dump_pair(prev_m, k, False) 516 for k in added: 517 dump_pair(m, k, True) 518 else: 519 for k in m.generic_map: 520 dump_pair(m, k, None) 521 522 self._dump('</table>') 523 524 def visit_generic_map_in_state(self, selector, s, prev_s=None): 525 self._dump('<tr><td align="left">' 526 '<b>Ranges: </b>') 527 m = getattr(s, selector) 528 if m is None: 529 self._dump('<i> Nothing!</i>') 530 else: 531 prev_m = None 532 if prev_s is not None: 533 prev_m = getattr(prev_s, selector) 534 if prev_m is not None: 535 if m.is_different(prev_m): 536 self._dump('</td></tr><tr><td align="left">') 537 self.visit_generic_map(m, prev_m) 538 else: 539 self._dump('<i> No changes!</i>') 540 if prev_m is None: 541 self._dump('</td></tr><tr><td align="left">') 542 self.visit_generic_map(m) 543 self._dump('</td></tr>') 544 545 def visit_state(self, s, prev_s): 546 self.visit_store_in_state(s, prev_s) 547 self._dump('<hr />') 548 self.visit_environment_in_state(s, prev_s) 549 self._dump('<hr />') 550 self.visit_generic_map_in_state('constraints', s, prev_s) 551 552 def visit_node(self, node): 553 self._dump('%s [shape=record,label=<<table border="0">' 554 % (node.node_name())) 555 556 self._dump('<tr><td bgcolor="grey"><b>Node %d (%s) - ' 557 'State %s</b></td></tr>' 558 % (node.node_id, node.ptr, node.state.state_id 559 if node.state is not None else 'Unspecified')) 560 self._dump('<tr><td align="left" width="0">') 561 if len(node.points) > 1: 562 self._dump('<b>Program points:</b></td></tr>') 563 else: 564 self._dump('<b>Program point:</b></td></tr>') 565 self._dump('<tr><td align="left" width="0">' 566 '<table border="0" align="left" width="0">') 567 for p in node.points: 568 self.visit_program_point(p) 569 self._dump('</table></td></tr>') 570 571 if node.state is not None: 572 self._dump('<hr />') 573 prev_s = None 574 # Do diffs only when we have a unique predecessor. 575 # Don't do diffs on the leaf nodes because they're 576 # the important ones. 577 if self._do_diffs and len(node.predecessors) == 1 \ 578 and len(node.successors) > 0: 579 prev_s = self._graph.nodes[node.predecessors[0]].state 580 self.visit_state(node.state, prev_s) 581 self._dump_raw('</table>>];\n') 582 583 def visit_edge(self, pred, succ): 584 self._dump_raw('%s -> %s;\n' % (pred.node_name(), succ.node_name())) 585 586 def visit_end_of_graph(self): 587 self._dump_raw('}\n') 588 589 590# A class that encapsulates traversal of the ExplodedGraph. Different explorer 591# kinds could potentially traverse specific sub-graphs. 592class Explorer(object): 593 def __init__(self): 594 super(Explorer, self).__init__() 595 596 def explore(self, graph, visitor): 597 visitor.visit_begin_graph(graph) 598 for node in sorted(graph.nodes): 599 logging.debug('Visiting ' + node) 600 visitor.visit_node(graph.nodes[node]) 601 for succ in sorted(graph.nodes[node].successors): 602 logging.debug('Visiting edge: %s -> %s ' % (node, succ)) 603 visitor.visit_edge(graph.nodes[node], graph.nodes[succ]) 604 visitor.visit_end_of_graph() 605 606 607def main(): 608 parser = argparse.ArgumentParser() 609 parser.add_argument('filename', type=str) 610 parser.add_argument('-v', '--verbose', action='store_const', 611 dest='loglevel', const=logging.DEBUG, 612 default=logging.WARNING, 613 help='enable info prints') 614 parser.add_argument('-d', '--diff', action='store_const', dest='diff', 615 const=True, default=False, 616 help='display differences between states') 617 args = parser.parse_args() 618 logging.basicConfig(level=args.loglevel) 619 620 graph = ExplodedGraph() 621 with open(args.filename) as fd: 622 for raw_line in fd: 623 raw_line = raw_line.strip() 624 graph.add_raw_line(raw_line) 625 626 explorer = Explorer() 627 visitor = DotDumpVisitor(args.diff) 628 explorer.explore(graph, visitor) 629 630 631if __name__ == '__main__': 632 main() 633