xref: /llvm-project/clang/utils/analyzer/exploded-graph-rewriter.py (revision b50d1673581a4f8dd33c36eb5a9964ad5fe4b2c4)
1#!/usr/bin/env python
2#
3#===- exploded-graph-rewriter.py - ExplodedGraph dump tool -----*- python -*--#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9#===-----------------------------------------------------------------------===#
10
11
12from __future__ import print_function
13
14import argparse
15import collections
16import json
17import logging
18import re
19
20
21# A deserialized source location.
22class SourceLocation(object):
23    def __init__(self, json_loc):
24        super(SourceLocation, self).__init__()
25        self.line = json_loc['line']
26        self.col = json_loc['column']
27        self.filename = json_loc['filename'] \
28            if 'filename' in json_loc else '(main file)'
29
30
31# A deserialized program point.
32class ProgramPoint(object):
33    def __init__(self, json_pp):
34        super(ProgramPoint, self).__init__()
35        self.kind = json_pp['kind']
36        self.tag = json_pp['tag']
37        if self.kind == 'Edge':
38            self.src_id = json_pp['src_id']
39            self.dst_id = json_pp['dst_id']
40        elif self.kind == 'Statement':
41            self.stmt_kind = json_pp['stmt_kind']
42            self.pointer = json_pp['pointer']
43            self.pretty = json_pp['pretty']
44            self.loc = SourceLocation(json_pp['location']) \
45                if json_pp['location'] is not None else None
46        elif self.kind == 'BlockEntrance':
47            self.block_id = json_pp['block_id']
48
49
50# A value of a single expression in a deserialized Environment.
51class EnvironmentBinding(object):
52    def __init__(self, json_eb):
53        super(EnvironmentBinding, self).__init__()
54        self.stmt_id = json_eb['stmt_id']
55        self.pretty = json_eb['pretty']
56        self.value = json_eb['value']
57
58
59# Deserialized description of a location context.
60class LocationContext(object):
61    def __init__(self, json_frame):
62        super(LocationContext, self).__init__()
63        self.lctx_id = json_frame['lctx_id']
64        self.caption = json_frame['location_context']
65        self.decl = json_frame['calling']
66        self.line = json_frame['call_line']
67
68
69# A group of deserialized Environment bindings that correspond to a specific
70# location context.
71class EnvironmentFrame(object):
72    def __init__(self, json_frame):
73        super(EnvironmentFrame, self).__init__()
74        self.location_context = LocationContext(json_frame)
75        self.bindings = [EnvironmentBinding(b) for b in json_frame['items']] \
76            if json_frame['items'] is not None else []
77
78
79# A deserialized Environment.
80class Environment(object):
81    def __init__(self, json_e):
82        super(Environment, self).__init__()
83        self.frames = [EnvironmentFrame(f) for f in json_e]
84
85
86# A single binding in a deserialized RegionStore cluster.
87class StoreBinding(object):
88    def __init__(self, json_sb):
89        super(StoreBinding, self).__init__()
90        self.kind = json_sb['kind']
91        self.offset = json_sb['offset']
92        self.value = json_sb['value']
93
94
95# A single cluster of the deserialized RegionStore.
96class StoreCluster(object):
97    def __init__(self, json_sc):
98        super(StoreCluster, self).__init__()
99        self.base_region = json_sc['cluster']
100        self.bindings = [StoreBinding(b) for b in json_sc['items']]
101
102
103# A deserialized RegionStore.
104class Store(object):
105    def __init__(self, json_s):
106        super(Store, self).__init__()
107        self.clusters = [StoreCluster(c) for c in json_s]
108
109
110# A deserialized program state.
111class ProgramState(object):
112    def __init__(self, state_id, json_ps):
113        super(ProgramState, self).__init__()
114        logging.debug('Adding ProgramState ' + str(state_id))
115
116        self.state_id = state_id
117        self.store = Store(json_ps['store']) \
118            if json_ps['store'] is not None else None
119        self.environment = Environment(json_ps['environment']) \
120            if json_ps['environment'] is not None else None
121        # TODO: Objects under construction.
122        # TODO: Constraint ranges.
123        # TODO: Dynamic types of objects.
124        # TODO: Checker messages.
125
126
127# A deserialized exploded graph node. Has a default constructor because it
128# may be referenced as part of an edge before its contents are deserialized,
129# and in this moment we already need a room for predecessors and successors.
130class ExplodedNode(object):
131    def __init__(self):
132        super(ExplodedNode, self).__init__()
133        self.predecessors = []
134        self.successors = []
135
136    def construct(self, node_id, json_node):
137        logging.debug('Adding ' + node_id)
138        self.node_id = json_node['node_id']
139        self.ptr = json_node['pointer']
140        self.points = [ProgramPoint(p) for p in json_node['program_points']]
141        self.state = ProgramState(json_node['state_id'],
142                                  json_node['program_state']) \
143            if json_node['program_state'] is not None else None
144
145        assert self.node_name() == node_id
146
147    def node_name(self):
148        return 'Node' + self.ptr
149
150
151# A deserialized ExplodedGraph. Constructed by consuming a .dot file
152# line-by-line.
153class ExplodedGraph(object):
154    # Parse .dot files with regular expressions.
155    node_re = re.compile(
156        '^(Node0x[0-9a-f]*) \\[shape=record,.*label="{(.*)\\\\l}"\\];$')
157    edge_re = re.compile(
158        '^(Node0x[0-9a-f]*) -> (Node0x[0-9a-f]*);$')
159
160    def __init__(self):
161        super(ExplodedGraph, self).__init__()
162        self.nodes = collections.defaultdict(ExplodedNode)
163        self.root_id = None
164        self.incomplete_line = ''
165
166    def add_raw_line(self, raw_line):
167        if raw_line.startswith('//'):
168            return
169
170        # Allow line breaks by waiting for ';'. This is not valid in
171        # a .dot file, but it is useful for writing tests.
172        if len(raw_line) > 0 and raw_line[-1] != ';':
173            self.incomplete_line += raw_line
174            return
175        raw_line = self.incomplete_line + raw_line
176        self.incomplete_line = ''
177
178        # Apply regexps one by one to see if it's a node or an edge
179        # and extract contents if necessary.
180        logging.debug('Line: ' + raw_line)
181        result = self.edge_re.match(raw_line)
182        if result is not None:
183            logging.debug('Classified as edge line.')
184            pred = result.group(1)
185            succ = result.group(2)
186            self.nodes[pred].successors.append(succ)
187            self.nodes[succ].predecessors.append(pred)
188            return
189        result = self.node_re.match(raw_line)
190        if result is not None:
191            logging.debug('Classified as node line.')
192            node_id = result.group(1)
193            if len(self.nodes) == 0:
194                self.root_id = node_id
195            # Note: when writing tests you don't need to escape everything,
196            # even though in a valid dot file everything is escaped.
197            node_label = result.group(2).replace('\\l', '') \
198                                        .replace(' ', '') \
199                                        .replace('\\"', '"') \
200                                        .replace('\\{', '{') \
201                                        .replace('\\}', '}') \
202                                        .replace('\\\\', '\\') \
203                                        .replace('\\<', '\\\\<') \
204                                        .replace('\\>', '\\\\>') \
205                                        .rstrip(',')
206            logging.debug(node_label)
207            json_node = json.loads(node_label)
208            self.nodes[node_id].construct(node_id, json_node)
209            return
210        logging.debug('Skipping.')
211
212
213# A visitor that dumps the ExplodedGraph into a DOT file with fancy HTML-based
214# syntax highlighing.
215class DotDumpVisitor(object):
216    def __init__(self):
217        super(DotDumpVisitor, self).__init__()
218
219    @staticmethod
220    def _dump_raw(s):
221        print(s, end='')
222
223    @staticmethod
224    def _dump(s):
225        print(s.replace('&', '&amp;')
226               .replace('{', '\\{')
227               .replace('}', '\\}')
228               .replace('\\<', '&lt;')
229               .replace('\\>', '&gt;')
230               .replace('\\l', '<br />')
231               .replace('|', ''), end='')
232
233    def visit_begin_graph(self, graph):
234        self._graph = graph
235        self._dump_raw('digraph "ExplodedGraph" {\n')
236        self._dump_raw('label="";\n')
237
238    def visit_program_point(self, p):
239        if p.kind in ['Edge', 'BlockEntrance', 'BlockExit']:
240            color = 'gold3'
241        elif p.kind in ['PreStmtPurgeDeadSymbols',
242                        'PostStmtPurgeDeadSymbols']:
243            color = 'red'
244        elif p.kind in ['CallEnter', 'CallExitBegin', 'CallExitEnd']:
245            color = 'blue'
246        elif p.kind in ['Statement']:
247            color = 'cyan3'
248        else:
249            color = 'forestgreen'
250
251        if p.kind == 'Statement':
252            if p.loc is not None:
253                self._dump('<tr><td align="left" width="0">'
254                           '%s:<b>%s</b>:<b>%s</b>:</td>'
255                           '<td align="left" width="0"><font color="%s">'
256                           '%s</font></td><td>%s</td></tr>'
257                           % (p.loc.filename, p.loc.line,
258                              p.loc.col, color, p.stmt_kind, p.pretty))
259            else:
260                self._dump('<tr><td align="left" width="0">'
261                           '<i>Invalid Source Location</i>:</td>'
262                           '<td align="left" width="0">'
263                           '<font color="%s">%s</font></td><td>%s</td></tr>'
264                           % (color, p.stmt_kind, p.pretty))
265        elif p.kind == 'Edge':
266            self._dump('<tr><td width="0">-</td>'
267                       '<td align="left" width="0">'
268                       '<font color="%s">%s</font></td><td align="left">'
269                       '[B%d] -\\> [B%d]</td></tr>'
270                       % (color, p.kind, p.src_id, p.dst_id))
271        else:
272            # TODO: Print more stuff for other kinds of points.
273            self._dump('<tr><td width="0">-</td>'
274                       '<td align="left" width="0" colspan="2">'
275                       '<font color="%s">%s</font></td></tr>'
276                       % (color, p.kind))
277
278    def visit_environment(self, e):
279        self._dump('<table border="0">')
280
281        for f in e.frames:
282            self._dump('<tr><td align="left"><b>%s</b></td>'
283                       '<td align="left"><font color="grey60">%s </font>'
284                       '%s</td></tr>'
285                       % (f.location_context.caption,
286                          f.location_context.decl,
287                          ('(line %s)' % f.location_context.line)
288                          if f.location_context.line is not None else ''))
289            for b in f.bindings:
290                self._dump('<tr><td align="left"><i>S%s</i></td>'
291                           '<td align="left">%s</td>'
292                           '<td align="left">%s</td></tr>'
293                           % (b.stmt_id, b.pretty, b.value))
294
295        self._dump('</table>')
296
297    def visit_store(self, s):
298        self._dump('<table border="0">')
299
300        for c in s.clusters:
301            for b in c.bindings:
302                self._dump('<tr><td align="left">%s</td>'
303                           '<td align="left">%s</td>'
304                           '<td align="left">%s</td>'
305                           '<td align="left">%s</td></tr>'
306                           % (c.base_region, b.offset,
307                              '(<i>Default</i>)' if b.kind == 'Default'
308                              else '',
309                              b.value))
310
311        self._dump('</table>')
312
313    def visit_state(self, s):
314        self._dump('<tr><td align="left">'
315                   '<b>Store: </b>')
316        if s.store is None:
317            self._dump('<i> Nothing!</i>')
318        else:
319            self._dump('</td></tr>'
320                       '<tr><td align="left">')
321            self.visit_store(s.store)
322
323        self._dump('</td></tr><hr />'
324                   '<tr><td align="left">'
325                   '<b>Environment: </b>')
326        if s.environment is None:
327            self._dump('<i> Nothing!</i>')
328        else:
329            self._dump('</td></tr>'
330                       '<tr><td align="left">')
331            self.visit_environment(s.environment)
332
333        self._dump('</td></tr>')
334
335    def visit_node(self, node):
336        self._dump('%s [shape=record,label=<<table border="0">'
337                   % (node.node_name()))
338
339        self._dump('<tr><td bgcolor="grey"><b>Node %d (%s) - '
340                   'State %s</b></td></tr>'
341                   % (node.node_id, node.ptr, node.state.state_id
342                      if node.state is not None else 'Unspecified'))
343        self._dump('<tr><td align="left" width="0">')
344        if len(node.points) > 1:
345            self._dump('<b>Program points:</b></td></tr>')
346        else:
347            self._dump('<b>Program point:</b></td></tr>')
348        self._dump('<tr><td align="left" width="0">'
349                   '<table border="0" align="left" width="0">')
350        for p in node.points:
351            self.visit_program_point(p)
352        self._dump('</table></td></tr>')
353
354        if node.state is not None:
355            self._dump('<hr />')
356            self.visit_state(node.state)
357        self._dump_raw('</table>>];\n')
358
359    def visit_edge(self, pred, succ):
360        self._dump_raw('%s -> %s;\n' % (pred.node_name(), succ.node_name()))
361
362    def visit_end_of_graph(self):
363        self._dump_raw('}\n')
364
365
366# A class that encapsulates traversal of the ExplodedGraph. Different explorer
367# kinds could potentially traverse specific sub-graphs.
368class Explorer(object):
369    def __init__(self):
370        super(Explorer, self).__init__()
371
372    def explore(self, graph, visitor):
373        visitor.visit_begin_graph(graph)
374        for node in sorted(graph.nodes):
375            logging.debug('Visiting ' + node)
376            visitor.visit_node(graph.nodes[node])
377            for succ in sorted(graph.nodes[node].successors):
378                logging.debug('Visiting edge: %s -> %s ' % (node, succ))
379                visitor.visit_edge(graph.nodes[node], graph.nodes[succ])
380        visitor.visit_end_of_graph()
381
382
383def main():
384    parser = argparse.ArgumentParser()
385    parser.add_argument('filename', type=str)
386    parser.add_argument('-d', '--debug', action='store_const', dest='loglevel',
387                        const=logging.DEBUG, default=logging.WARNING,
388                        help='enable debug prints')
389    parser.add_argument('-v', '--verbose', action='store_const',
390                        dest='loglevel', const=logging.INFO,
391                        default=logging.WARNING,
392                        help='enable info prints')
393    args = parser.parse_args()
394    logging.basicConfig(level=args.loglevel)
395
396    graph = ExplodedGraph()
397    with open(args.filename) as fd:
398        for raw_line in fd:
399            raw_line = raw_line.strip()
400            graph.add_raw_line(raw_line)
401
402    explorer = Explorer()
403    visitor = DotDumpVisitor()
404    explorer.explore(graph, visitor)
405
406
407if __name__ == '__main__':
408    main()
409