1#!/usr/bin/env python 2# 3#===- exploded-graph-rewriter.py - ExplodedGraph dump tool -----*- python -*--# 4# 5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 6# See https://llvm.org/LICENSE.txt for license information. 7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 8# 9#===-----------------------------------------------------------------------===# 10 11 12from __future__ import print_function 13 14import argparse 15import collections 16import json 17import logging 18import re 19 20 21# A helper function for finding the difference between two dictionaries. 22def diff_dicts(curr, prev): 23 removed = [k for k in prev if k not in curr or curr[k] != prev[k]] 24 added = [k for k in curr if k not in prev or curr[k] != prev[k]] 25 return (removed, added) 26 27 28# A deserialized source location. 29class SourceLocation(object): 30 def __init__(self, json_loc): 31 super(SourceLocation, self).__init__() 32 self.line = json_loc['line'] 33 self.col = json_loc['column'] 34 self.filename = json_loc['filename'] \ 35 if 'filename' in json_loc else '(main file)' 36 37 38# A deserialized program point. 39class ProgramPoint(object): 40 def __init__(self, json_pp): 41 super(ProgramPoint, self).__init__() 42 self.kind = json_pp['kind'] 43 self.tag = json_pp['tag'] 44 if self.kind == 'Edge': 45 self.src_id = json_pp['src_id'] 46 self.dst_id = json_pp['dst_id'] 47 elif self.kind == 'Statement': 48 self.stmt_kind = json_pp['stmt_kind'] 49 self.pointer = json_pp['pointer'] 50 self.pretty = json_pp['pretty'] 51 self.loc = SourceLocation(json_pp['location']) \ 52 if json_pp['location'] is not None else None 53 elif self.kind == 'BlockEntrance': 54 self.block_id = json_pp['block_id'] 55 56 57# A single expression acting as a key in a deserialized Environment. 58class EnvironmentBindingKey(object): 59 def __init__(self, json_ek): 60 super(EnvironmentBindingKey, self).__init__() 61 self.stmt_id = json_ek['stmt_id'] 62 self.pretty = json_ek['pretty'] 63 64 def _key(self): 65 return self.stmt_id 66 67 def __eq__(self, other): 68 return self._key() == other._key() 69 70 def __hash__(self): 71 return hash(self._key()) 72 73 74# Deserialized description of a location context. 75class LocationContext(object): 76 def __init__(self, json_frame): 77 super(LocationContext, self).__init__() 78 self.lctx_id = json_frame['lctx_id'] 79 self.caption = json_frame['location_context'] 80 self.decl = json_frame['calling'] 81 self.line = json_frame['call_line'] 82 83 def _key(self): 84 return self.lctx_id 85 86 def __eq__(self, other): 87 return self._key() == other._key() 88 89 def __hash__(self): 90 return hash(self._key()) 91 92 93# A group of deserialized Environment bindings that correspond to a specific 94# location context. 95class EnvironmentFrame(object): 96 def __init__(self, json_frame): 97 super(EnvironmentFrame, self).__init__() 98 self.location_context = LocationContext(json_frame) 99 self.bindings = collections.OrderedDict( 100 [(EnvironmentBindingKey(b), 101 b['value']) for b in json_frame['items']] 102 if json_frame['items'] is not None else []) 103 104 def diff_bindings(self, prev): 105 return diff_dicts(self.bindings, prev.bindings) 106 107 def is_different(self, prev): 108 removed, added = self.diff_bindings(prev) 109 return len(removed) != 0 or len(added) != 0 110 111 112# A deserialized Environment. 113class Environment(object): 114 def __init__(self, json_e): 115 super(Environment, self).__init__() 116 self.frames = [EnvironmentFrame(f) for f in json_e] 117 118 def diff_frames(self, prev): 119 # TODO: It's difficult to display a good diff when frame numbers shift. 120 if len(self.frames) != len(prev.frames): 121 return None 122 123 updated = [] 124 for i in range(len(self.frames)): 125 f = self.frames[i] 126 prev_f = prev.frames[i] 127 if f.location_context == prev_f.location_context: 128 if f.is_different(prev_f): 129 updated.append(i) 130 else: 131 # We have the whole frame replaced with another frame. 132 # TODO: Produce a nice diff. 133 return None 134 135 # TODO: Add support for added/removed. 136 return updated 137 138 def is_different(self, prev): 139 updated = self.diff_frames(prev) 140 return updated is None or len(updated) > 0 141 142 143# A single binding key in a deserialized RegionStore cluster. 144class StoreBindingKey(object): 145 def __init__(self, json_sk): 146 super(StoreBindingKey, self).__init__() 147 self.kind = json_sk['kind'] 148 self.offset = json_sk['offset'] 149 150 def _key(self): 151 return (self.kind, self.offset) 152 153 def __eq__(self, other): 154 return self._key() == other._key() 155 156 def __hash__(self): 157 return hash(self._key()) 158 159 160# A single cluster of the deserialized RegionStore. 161class StoreCluster(object): 162 def __init__(self, json_sc): 163 super(StoreCluster, self).__init__() 164 self.base_region = json_sc['cluster'] 165 self.bindings = collections.OrderedDict( 166 [(StoreBindingKey(b), b['value']) for b in json_sc['items']]) 167 168 def diff_bindings(self, prev): 169 return diff_dicts(self.bindings, prev.bindings) 170 171 def is_different(self, prev): 172 removed, added = self.diff_bindings(prev) 173 return len(removed) != 0 or len(added) != 0 174 175 176# A deserialized RegionStore. 177class Store(object): 178 def __init__(self, json_s): 179 super(Store, self).__init__() 180 self.clusters = collections.OrderedDict( 181 [(c['pointer'], StoreCluster(c)) for c in json_s]) 182 183 def diff_clusters(self, prev): 184 removed = [k for k in prev.clusters if k not in self.clusters] 185 added = [k for k in self.clusters if k not in prev.clusters] 186 updated = [k for k in prev.clusters if k in self.clusters 187 and prev.clusters[k].is_different(self.clusters[k])] 188 return (removed, added, updated) 189 190 def is_different(self, prev): 191 removed, added, updated = self.diff_clusters(prev) 192 return len(removed) != 0 or len(added) != 0 or len(updated) != 0 193 194 195# A deserialized program state. 196class ProgramState(object): 197 def __init__(self, state_id, json_ps): 198 super(ProgramState, self).__init__() 199 logging.debug('Adding ProgramState ' + str(state_id)) 200 201 self.state_id = state_id 202 self.store = Store(json_ps['store']) \ 203 if json_ps['store'] is not None else None 204 self.environment = Environment(json_ps['environment']) \ 205 if json_ps['environment'] is not None else None 206 # TODO: Objects under construction. 207 # TODO: Constraint ranges. 208 # TODO: Dynamic types of objects. 209 # TODO: Checker messages. 210 211 212# A deserialized exploded graph node. Has a default constructor because it 213# may be referenced as part of an edge before its contents are deserialized, 214# and in this moment we already need a room for predecessors and successors. 215class ExplodedNode(object): 216 def __init__(self): 217 super(ExplodedNode, self).__init__() 218 self.predecessors = [] 219 self.successors = [] 220 221 def construct(self, node_id, json_node): 222 logging.debug('Adding ' + node_id) 223 self.node_id = json_node['node_id'] 224 self.ptr = json_node['pointer'] 225 self.points = [ProgramPoint(p) for p in json_node['program_points']] 226 self.state = ProgramState(json_node['state_id'], 227 json_node['program_state']) \ 228 if json_node['program_state'] is not None else None 229 230 assert self.node_name() == node_id 231 232 def node_name(self): 233 return 'Node' + self.ptr 234 235 236# A deserialized ExplodedGraph. Constructed by consuming a .dot file 237# line-by-line. 238class ExplodedGraph(object): 239 # Parse .dot files with regular expressions. 240 node_re = re.compile( 241 '^(Node0x[0-9a-f]*) \\[shape=record,.*label="{(.*)\\\\l}"\\];$') 242 edge_re = re.compile( 243 '^(Node0x[0-9a-f]*) -> (Node0x[0-9a-f]*);$') 244 245 def __init__(self): 246 super(ExplodedGraph, self).__init__() 247 self.nodes = collections.defaultdict(ExplodedNode) 248 self.root_id = None 249 self.incomplete_line = '' 250 251 def add_raw_line(self, raw_line): 252 if raw_line.startswith('//'): 253 return 254 255 # Allow line breaks by waiting for ';'. This is not valid in 256 # a .dot file, but it is useful for writing tests. 257 if len(raw_line) > 0 and raw_line[-1] != ';': 258 self.incomplete_line += raw_line 259 return 260 raw_line = self.incomplete_line + raw_line 261 self.incomplete_line = '' 262 263 # Apply regexps one by one to see if it's a node or an edge 264 # and extract contents if necessary. 265 logging.debug('Line: ' + raw_line) 266 result = self.edge_re.match(raw_line) 267 if result is not None: 268 logging.debug('Classified as edge line.') 269 pred = result.group(1) 270 succ = result.group(2) 271 self.nodes[pred].successors.append(succ) 272 self.nodes[succ].predecessors.append(pred) 273 return 274 result = self.node_re.match(raw_line) 275 if result is not None: 276 logging.debug('Classified as node line.') 277 node_id = result.group(1) 278 if len(self.nodes) == 0: 279 self.root_id = node_id 280 # Note: when writing tests you don't need to escape everything, 281 # even though in a valid dot file everything is escaped. 282 node_label = result.group(2).replace('\\l', '') \ 283 .replace(' ', '') \ 284 .replace('\\"', '"') \ 285 .replace('\\{', '{') \ 286 .replace('\\}', '}') \ 287 .replace('\\\\', '\\') \ 288 .replace('\\<', '\\\\<') \ 289 .replace('\\>', '\\\\>') \ 290 .rstrip(',') 291 logging.debug(node_label) 292 json_node = json.loads(node_label) 293 self.nodes[node_id].construct(node_id, json_node) 294 return 295 logging.debug('Skipping.') 296 297 298# A visitor that dumps the ExplodedGraph into a DOT file with fancy HTML-based 299# syntax highlighing. 300class DotDumpVisitor(object): 301 def __init__(self, do_diffs): 302 super(DotDumpVisitor, self).__init__() 303 self._do_diffs = do_diffs 304 305 @staticmethod 306 def _dump_raw(s): 307 print(s, end='') 308 309 @staticmethod 310 def _dump(s): 311 print(s.replace('&', '&') 312 .replace('{', '\\{') 313 .replace('}', '\\}') 314 .replace('\\<', '<') 315 .replace('\\>', '>') 316 .replace('\\l', '<br />') 317 .replace('|', ''), end='') 318 319 @staticmethod 320 def _diff_plus_minus(is_added): 321 if is_added is None: 322 return '' 323 if is_added: 324 return '<font color="forestgreen">+</font>' 325 return '<font color="red">-</font>' 326 327 def visit_begin_graph(self, graph): 328 self._graph = graph 329 self._dump_raw('digraph "ExplodedGraph" {\n') 330 self._dump_raw('label="";\n') 331 332 def visit_program_point(self, p): 333 if p.kind in ['Edge', 'BlockEntrance', 'BlockExit']: 334 color = 'gold3' 335 elif p.kind in ['PreStmtPurgeDeadSymbols', 336 'PostStmtPurgeDeadSymbols']: 337 color = 'red' 338 elif p.kind in ['CallEnter', 'CallExitBegin', 'CallExitEnd']: 339 color = 'blue' 340 elif p.kind in ['Statement']: 341 color = 'cyan3' 342 else: 343 color = 'forestgreen' 344 345 if p.kind == 'Statement': 346 if p.loc is not None: 347 self._dump('<tr><td align="left" width="0">' 348 '%s:<b>%s</b>:<b>%s</b>:</td>' 349 '<td align="left" width="0"><font color="%s">' 350 '%s</font></td><td>%s</td></tr>' 351 % (p.loc.filename, p.loc.line, 352 p.loc.col, color, p.stmt_kind, p.pretty)) 353 else: 354 self._dump('<tr><td align="left" width="0">' 355 '<i>Invalid Source Location</i>:</td>' 356 '<td align="left" width="0">' 357 '<font color="%s">%s</font></td><td>%s</td></tr>' 358 % (color, p.stmt_kind, p.pretty)) 359 elif p.kind == 'Edge': 360 self._dump('<tr><td width="0"></td>' 361 '<td align="left" width="0">' 362 '<font color="%s">%s</font></td><td align="left">' 363 '[B%d] -\\> [B%d]</td></tr>' 364 % (color, p.kind, p.src_id, p.dst_id)) 365 else: 366 # TODO: Print more stuff for other kinds of points. 367 self._dump('<tr><td width="0"></td>' 368 '<td align="left" width="0" colspan="2">' 369 '<font color="%s">%s</font></td></tr>' 370 % (color, p.kind)) 371 372 def visit_environment(self, e, prev_e=None): 373 self._dump('<table border="0">') 374 375 def dump_location_context(lc, is_added=None): 376 self._dump('<tr><td>%s</td>' 377 '<td align="left"><b>%s</b></td>' 378 '<td align="left"><font color="grey60">%s </font>' 379 '%s</td></tr>' 380 % (self._diff_plus_minus(is_added), 381 lc.caption, lc.decl, 382 ('(line %s)' % lc.line) if lc.line is not None 383 else '')) 384 385 def dump_binding(f, b, is_added=None): 386 self._dump('<tr><td>%s</td>' 387 '<td align="left"><i>S%s</i></td>' 388 '<td align="left">%s</td>' 389 '<td align="left">%s</td></tr>' 390 % (self._diff_plus_minus(is_added), 391 b.stmt_id, b.pretty, f.bindings[b])) 392 393 frames_updated = e.diff_frames(prev_e) if prev_e is not None else None 394 if frames_updated: 395 for i in frames_updated: 396 f = e.frames[i] 397 prev_f = prev_e.frames[i] 398 dump_location_context(f.location_context) 399 bindings_removed, bindings_added = f.diff_bindings(prev_f) 400 for b in bindings_removed: 401 dump_binding(prev_f, b, False) 402 for b in bindings_added: 403 dump_binding(f, b, True) 404 else: 405 for f in e.frames: 406 dump_location_context(f.location_context) 407 for b in f.bindings: 408 dump_binding(f, b) 409 410 self._dump('</table>') 411 412 def visit_environment_in_state(self, s, prev_s=None): 413 self._dump('<tr><td align="left">' 414 '<b>Environment: </b>') 415 if s.environment is None: 416 self._dump('<i> Nothing!</i>') 417 else: 418 if prev_s is not None and prev_s.environment is not None: 419 if s.environment.is_different(prev_s.environment): 420 self._dump('</td></tr><tr><td align="left">') 421 self.visit_environment(s.environment, prev_s.environment) 422 else: 423 self._dump('<i> No changes!</i>') 424 else: 425 self._dump('</td></tr><tr><td align="left">') 426 self.visit_environment(s.environment) 427 428 self._dump('</td></tr>') 429 430 def visit_store(self, s, prev_s=None): 431 self._dump('<table border="0">') 432 433 def dump_binding(s, c, b, is_added=None): 434 self._dump('<tr><td>%s</td>' 435 '<td align="left">%s</td>' 436 '<td align="left">%s</td>' 437 '<td align="left">%s</td>' 438 '<td align="left">%s</td></tr>' 439 % (self._diff_plus_minus(is_added), 440 s.clusters[c].base_region, b.offset, 441 '(<i>Default</i>)' if b.kind == 'Default' 442 else '', 443 s.clusters[c].bindings[b])) 444 445 if prev_s is not None: 446 clusters_removed, clusters_added, clusters_updated = \ 447 s.diff_clusters(prev_s) 448 for c in clusters_removed: 449 for b in prev_s.clusters[c].bindings: 450 dump_binding(prev_s, c, b, False) 451 for c in clusters_updated: 452 bindings_removed, bindings_added = \ 453 s.clusters[c].diff_bindings(prev_s.clusters[c]) 454 for b in bindings_removed: 455 dump_binding(prev_s, c, b, False) 456 for b in bindings_added: 457 dump_binding(s, c, b, True) 458 for c in clusters_added: 459 for b in s.clusters[c].bindings: 460 dump_binding(s, c, b, True) 461 else: 462 for c in s.clusters: 463 for b in s.clusters[c].bindings: 464 dump_binding(s, c, b) 465 466 self._dump('</table>') 467 468 def visit_store_in_state(self, s, prev_s=None): 469 self._dump('<tr><td align="left"><b>Store: </b>') 470 if s.store is None: 471 self._dump('<i> Nothing!</i>') 472 else: 473 if prev_s is not None and prev_s.store is not None: 474 if s.store.is_different(prev_s.store): 475 self._dump('</td></tr><tr><td align="left">') 476 self.visit_store(s.store, prev_s.store) 477 else: 478 self._dump('<i> No changes!</i>') 479 else: 480 self._dump('</td></tr><tr><td align="left">') 481 self.visit_store(s.store) 482 self._dump('</td></tr><hr />') 483 484 def visit_state(self, s, prev_s): 485 self.visit_store_in_state(s, prev_s) 486 self.visit_environment_in_state(s, prev_s) 487 488 def visit_node(self, node): 489 self._dump('%s [shape=record,label=<<table border="0">' 490 % (node.node_name())) 491 492 self._dump('<tr><td bgcolor="grey"><b>Node %d (%s) - ' 493 'State %s</b></td></tr>' 494 % (node.node_id, node.ptr, node.state.state_id 495 if node.state is not None else 'Unspecified')) 496 self._dump('<tr><td align="left" width="0">') 497 if len(node.points) > 1: 498 self._dump('<b>Program points:</b></td></tr>') 499 else: 500 self._dump('<b>Program point:</b></td></tr>') 501 self._dump('<tr><td align="left" width="0">' 502 '<table border="0" align="left" width="0">') 503 for p in node.points: 504 self.visit_program_point(p) 505 self._dump('</table></td></tr>') 506 507 if node.state is not None: 508 self._dump('<hr />') 509 prev_s = None 510 # Do diffs only when we have a unique predecessor. 511 # Don't do diffs on the leaf nodes because they're 512 # the important ones. 513 if self._do_diffs and len(node.predecessors) == 1 \ 514 and len(node.successors) > 0: 515 prev_s = self._graph.nodes[node.predecessors[0]].state 516 self.visit_state(node.state, prev_s) 517 self._dump_raw('</table>>];\n') 518 519 def visit_edge(self, pred, succ): 520 self._dump_raw('%s -> %s;\n' % (pred.node_name(), succ.node_name())) 521 522 def visit_end_of_graph(self): 523 self._dump_raw('}\n') 524 525 526# A class that encapsulates traversal of the ExplodedGraph. Different explorer 527# kinds could potentially traverse specific sub-graphs. 528class Explorer(object): 529 def __init__(self): 530 super(Explorer, self).__init__() 531 532 def explore(self, graph, visitor): 533 visitor.visit_begin_graph(graph) 534 for node in sorted(graph.nodes): 535 logging.debug('Visiting ' + node) 536 visitor.visit_node(graph.nodes[node]) 537 for succ in sorted(graph.nodes[node].successors): 538 logging.debug('Visiting edge: %s -> %s ' % (node, succ)) 539 visitor.visit_edge(graph.nodes[node], graph.nodes[succ]) 540 visitor.visit_end_of_graph() 541 542 543def main(): 544 parser = argparse.ArgumentParser() 545 parser.add_argument('filename', type=str) 546 parser.add_argument('-v', '--verbose', action='store_const', 547 dest='loglevel', const=logging.DEBUG, 548 default=logging.WARNING, 549 help='enable info prints') 550 parser.add_argument('-d', '--diff', action='store_const', dest='diff', 551 const=True, default=False, 552 help='display differences between states') 553 args = parser.parse_args() 554 logging.basicConfig(level=args.loglevel) 555 556 graph = ExplodedGraph() 557 with open(args.filename) as fd: 558 for raw_line in fd: 559 raw_line = raw_line.strip() 560 graph.add_raw_line(raw_line) 561 562 explorer = Explorer() 563 visitor = DotDumpVisitor(args.diff) 564 explorer.explore(graph, visitor) 565 566 567if __name__ == '__main__': 568 main() 569