xref: /llvm-project/polly/lib/External/isl/imath/tools/mkdoc.py (revision f98ee40f4b5d7474fc67e82824bf6abbaedb7b1c)
1#!/usr/bin/env python
2##
3## Name:    mkdoc.py
4## Purpose: Extract documentation from header files.
5##
6## Copyright (C) 2018 Michael J. Fromberger. All Rights Reserved.
7##
8## Usage: mkdoc.py <template> <output>
9##
10from __future__ import print_function
11
12import collections, re, sys
13
14# A regular expression to match commented declarations.
15# This is specific to C and not very general; it should work fine for the imath
16# headers but will not adapt well to arbitrary code or to C++.
17doc = re.compile(
18    r"""(?mx)/\*\* # open  /**
19(?P<text>(?:[^*]|\*[^/])*)      # text      Does a thing
20\*/\n                           # close */
21(?P<decl>[^;{]*(?:;$|\{))"""
22)  # decl  void f(x);
23
24# A regular expression matching up to 4 spaces at the head of a line.
25spc = re.compile(r"(?m)^ {1,4}")
26
27# A regular expression matching an insertion point.  An insertion point has the
28# form {{include "header" name ...}}.  If no names are given, all the names in
29# the given header are inserted.
30ins = re.compile(r'{{insert "(?P<file>[^"]*)"(?P<names>(?:\s+\w+)+)?\s*}}')
31
32# A regular expression matching non-identifier characters, for splitting.
33nid = re.compile(r"\W+")
34
35# A cache of already-parsed files, maps filename to declarations.
36CACHE = {}
37
38
39def last_word(s):
40    """Returns the last identifier-shaped word in s."""
41    return nid.split(s.strip())[-1]
42
43
44def typeset(text):
45    """Renders text with verbatim sections into markdown."""
46    lines = []
47    fence = False
48    for line in text.split("\n"):
49        if fence != line.startswith(" "):
50            lines.append("```")
51            fence = not fence
52        lines.append(line)
53    if fence:
54        lines.append("```")
55    for i, line in enumerate(lines):
56        if i == 0:
57            lines[i] = " -  " + line
58        elif line:
59            lines[i] = "    " + line
60    return "\n".join(lines)
61
62
63class LIndex(object):
64    """Represents a line offset index for text."""
65
66    def __init__(self, text):
67        pos = 0
68
69        # An array of ending offsets for each line, with a sentinel at position
70        # 0 to make the index arithmetic easier.
71        idx = [0]
72
73        # Scan forward for newlines or EOF, and push the offsets of the line
74        # breaks onto the list so we can binary search them later.
75        while pos < len(text):
76            next = text.find("\n", pos)
77            if next < 0:
78                break
79            idx.append(next)
80            pos = next + 1
81        if idx[-1] < len(text):
82            idx.append(len(text))
83        self._len = len(text)
84        self._index = idx
85
86    def linecol(self, pos):
87        """Returns the (line, col) corresponding to pos.
88
89        Line numbers are 1-based, columns are 0-based.
90        """
91        if pos < 0 or pos > self._len:
92            raise IndexError("position %d out of range" % pos)
93
94        # Binary search for the largest line number whose end marker is at or
95        # after pos and whose previous line's end is before pos.
96        idx = self._index
97        i, j = 1, len(idx)
98        while i < j:
99            m = (i + j) / 2
100            if idx[m] < pos:
101                i = m + 1
102            elif idx[m - 1] < pos:
103                return m, pos - idx[m - 1]
104            else:
105                j = m
106
107        # This happens if (and only if) the whole file is one line.
108        return 1, pos
109
110
111class Decl(object):
112    """Represents a single documented declaration."""
113
114    def __init__(self, com, decl, line=None):
115        """Initialize a new documented declaration.
116
117        Params:
118          com: the raw text of the comment
119          decl: the raw text of the declaration
120          line: the line number of the declaration
121        """
122        lp = decl.find("(")
123        if lp < 0:
124            self.name = last_word(decl.rstrip(";"))
125        else:
126            self.name = last_word(decl[:lp])
127        self.decl = " ".join(decl.rstrip(";{").strip().split())
128        self.comment = spc.sub("", com.rstrip())
129        self.line = line
130
131    def __repr__(self):
132        return '#Decl["%s"]' % self.decl
133
134    def markdown(self, path):
135        pos = self.decl.index(self.name)
136        decl = '%s<a href="%s#L%d">%s</a>%s' % (
137            self.decl[:pos],
138            path,
139            self.line,
140            self.name,
141            self.decl[pos + len(self.name) :],
142        )
143        return """------------
144<a id="{name}"></a><pre>
145{decl};
146</pre>
147{comment}
148""".format(
149            name=self.name, decl=decl, comment=typeset(self.comment)
150        )
151
152
153def parse_decls(text):
154    """Parse a dictionary of declarations from text."""
155    decls = collections.OrderedDict()
156    idx = LIndex(text)
157    for m in doc.finditer(text):
158        line, _ = idx.linecol(m.span("decl")[0])
159        d = Decl(m.group("text"), m.group("decl"), line)
160        decls[d.name] = d
161    return decls
162
163
164def load_file(path):
165    """Load declarations from path, or use cached results."""
166    if path not in CACHE:
167        with file(path, "rU") as fp:
168            CACHE[path] = parse_decls(fp.read())
169    return CACHE[path]
170
171
172def main(args):
173    if len(args) != 2:
174        print("Usage: mkdoc.py <input> <output>", file=sys.stderr)
175        sys.exit(1)
176
177    doc_template = args[0]
178    doc_markdown = args[1]
179
180    with file(doc_template, "rU") as input:
181        template = input.read()
182
183    with file(doc_markdown, "wt") as output:
184        print(
185            """<!--
186  This file was generated from "{0}" by mkdoc.py
187  DO NOT EDIT
188-->
189""".format(
190                doc_template
191            ),
192            file=output,
193        )
194
195        pos = 0  # last position of input copied
196
197        # Look for substitution markers in the template, and replace them with
198        # their content.
199        for ip in ins.finditer(template):
200            output.write(template[pos : ip.start()])
201            pos = ip.end()
202
203            decls = load_file(ip.group("file"))
204            if ip.group("names"):  # pick the selected names, in order
205                decls = collections.OrderedDict(
206                    (key, decls[key]) for key in ip.group("names").strip().split()
207                )
208
209            # Render the selected declarations.
210            for decl in decls.values():
211                print(decl.markdown(ip.group("file")), file=output)
212
213        # Clean up any remaining template bits
214        output.write(template[pos:])
215
216
217if __name__ == "__main__":
218    main(sys.argv[1:])
219