xref: /llvm-project/polly/lib/External/isl/imath/tools/mkdoc.py (revision f98ee40f4b5d7474fc67e82824bf6abbaedb7b1c)
1658eb9e1SMichael Kruse#!/usr/bin/env python
2658eb9e1SMichael Kruse##
3658eb9e1SMichael Kruse## Name:    mkdoc.py
4658eb9e1SMichael Kruse## Purpose: Extract documentation from header files.
5658eb9e1SMichael Kruse##
6658eb9e1SMichael Kruse## Copyright (C) 2018 Michael J. Fromberger. All Rights Reserved.
7658eb9e1SMichael Kruse##
8658eb9e1SMichael Kruse## Usage: mkdoc.py <template> <output>
9658eb9e1SMichael Kruse##
10658eb9e1SMichael Krusefrom __future__ import print_function
11658eb9e1SMichael Kruse
12658eb9e1SMichael Kruseimport collections, re, sys
13658eb9e1SMichael Kruse
14658eb9e1SMichael Kruse# A regular expression to match commented declarations.
15658eb9e1SMichael Kruse# This is specific to C and not very general; it should work fine for the imath
16658eb9e1SMichael Kruse# headers but will not adapt well to arbitrary code or to C++.
17*f98ee40fSTobias Hietadoc = re.compile(
18*f98ee40fSTobias Hieta    r"""(?mx)/\*\* # open  /**
19658eb9e1SMichael Kruse(?P<text>(?:[^*]|\*[^/])*)      # text      Does a thing
20658eb9e1SMichael Kruse\*/\n                           # close */
21*f98ee40fSTobias Hieta(?P<decl>[^;{]*(?:;$|\{))"""
22*f98ee40fSTobias Hieta)  # decl  void f(x);
23658eb9e1SMichael Kruse
24658eb9e1SMichael Kruse# A regular expression matching up to 4 spaces at the head of a line.
25*f98ee40fSTobias Hietaspc = re.compile(r"(?m)^ {1,4}")
26658eb9e1SMichael Kruse
27658eb9e1SMichael Kruse# A regular expression matching an insertion point.  An insertion point has the
28658eb9e1SMichael Kruse# form {{include "header" name ...}}.  If no names are given, all the names in
29658eb9e1SMichael Kruse# the given header are inserted.
30658eb9e1SMichael Kruseins = re.compile(r'{{insert "(?P<file>[^"]*)"(?P<names>(?:\s+\w+)+)?\s*}}')
31658eb9e1SMichael Kruse
32658eb9e1SMichael Kruse# A regular expression matching non-identifier characters, for splitting.
33*f98ee40fSTobias Hietanid = re.compile(r"\W+")
34658eb9e1SMichael Kruse
35658eb9e1SMichael Kruse# A cache of already-parsed files, maps filename to declarations.
36658eb9e1SMichael KruseCACHE = {}
37658eb9e1SMichael Kruse
38658eb9e1SMichael Kruse
39658eb9e1SMichael Krusedef last_word(s):
40658eb9e1SMichael Kruse    """Returns the last identifier-shaped word in s."""
41658eb9e1SMichael Kruse    return nid.split(s.strip())[-1]
42658eb9e1SMichael Kruse
43658eb9e1SMichael Kruse
44658eb9e1SMichael Krusedef typeset(text):
45658eb9e1SMichael Kruse    """Renders text with verbatim sections into markdown."""
46658eb9e1SMichael Kruse    lines = []
47658eb9e1SMichael Kruse    fence = False
48*f98ee40fSTobias Hieta    for line in text.split("\n"):
49*f98ee40fSTobias Hieta        if fence != line.startswith(" "):
50*f98ee40fSTobias Hieta            lines.append("```")
51658eb9e1SMichael Kruse            fence = not fence
52658eb9e1SMichael Kruse        lines.append(line)
53658eb9e1SMichael Kruse    if fence:
54*f98ee40fSTobias Hieta        lines.append("```")
55658eb9e1SMichael Kruse    for i, line in enumerate(lines):
56*f98ee40fSTobias Hieta        if i == 0:
57*f98ee40fSTobias Hieta            lines[i] = " -  " + line
58*f98ee40fSTobias Hieta        elif line:
59*f98ee40fSTobias Hieta            lines[i] = "    " + line
60*f98ee40fSTobias Hieta    return "\n".join(lines)
61658eb9e1SMichael Kruse
62658eb9e1SMichael Kruse
63658eb9e1SMichael Kruseclass LIndex(object):
64658eb9e1SMichael Kruse    """Represents a line offset index for text."""
65658eb9e1SMichael Kruse
66658eb9e1SMichael Kruse    def __init__(self, text):
67658eb9e1SMichael Kruse        pos = 0
68658eb9e1SMichael Kruse
69658eb9e1SMichael Kruse        # An array of ending offsets for each line, with a sentinel at position
70658eb9e1SMichael Kruse        # 0 to make the index arithmetic easier.
71658eb9e1SMichael Kruse        idx = [0]
72658eb9e1SMichael Kruse
73658eb9e1SMichael Kruse        # Scan forward for newlines or EOF, and push the offsets of the line
74658eb9e1SMichael Kruse        # breaks onto the list so we can binary search them later.
75658eb9e1SMichael Kruse        while pos < len(text):
76*f98ee40fSTobias Hieta            next = text.find("\n", pos)
77658eb9e1SMichael Kruse            if next < 0:
78658eb9e1SMichael Kruse                break
79658eb9e1SMichael Kruse            idx.append(next)
80658eb9e1SMichael Kruse            pos = next + 1
81658eb9e1SMichael Kruse        if idx[-1] < len(text):
82658eb9e1SMichael Kruse            idx.append(len(text))
83658eb9e1SMichael Kruse        self._len = len(text)
84658eb9e1SMichael Kruse        self._index = idx
85658eb9e1SMichael Kruse
86658eb9e1SMichael Kruse    def linecol(self, pos):
87658eb9e1SMichael Kruse        """Returns the (line, col) corresponding to pos.
88658eb9e1SMichael Kruse
89658eb9e1SMichael Kruse        Line numbers are 1-based, columns are 0-based.
90658eb9e1SMichael Kruse        """
91658eb9e1SMichael Kruse        if pos < 0 or pos > self._len:
92658eb9e1SMichael Kruse            raise IndexError("position %d out of range" % pos)
93658eb9e1SMichael Kruse
94658eb9e1SMichael Kruse        # Binary search for the largest line number whose end marker is at or
95658eb9e1SMichael Kruse        # after pos and whose previous line's end is before pos.
96658eb9e1SMichael Kruse        idx = self._index
97658eb9e1SMichael Kruse        i, j = 1, len(idx)
98658eb9e1SMichael Kruse        while i < j:
99658eb9e1SMichael Kruse            m = (i + j) / 2
100658eb9e1SMichael Kruse            if idx[m] < pos:
101658eb9e1SMichael Kruse                i = m + 1
102658eb9e1SMichael Kruse            elif idx[m - 1] < pos:
103658eb9e1SMichael Kruse                return m, pos - idx[m - 1]
104658eb9e1SMichael Kruse            else:
105658eb9e1SMichael Kruse                j = m
106658eb9e1SMichael Kruse
107658eb9e1SMichael Kruse        # This happens if (and only if) the whole file is one line.
108658eb9e1SMichael Kruse        return 1, pos
109658eb9e1SMichael Kruse
110658eb9e1SMichael Kruse
111658eb9e1SMichael Kruseclass Decl(object):
112658eb9e1SMichael Kruse    """Represents a single documented declaration."""
113658eb9e1SMichael Kruse
114658eb9e1SMichael Kruse    def __init__(self, com, decl, line=None):
115658eb9e1SMichael Kruse        """Initialize a new documented declaration.
116658eb9e1SMichael Kruse
117658eb9e1SMichael Kruse        Params:
118658eb9e1SMichael Kruse          com: the raw text of the comment
119658eb9e1SMichael Kruse          decl: the raw text of the declaration
120658eb9e1SMichael Kruse          line: the line number of the declaration
121658eb9e1SMichael Kruse        """
122*f98ee40fSTobias Hieta        lp = decl.find("(")
123658eb9e1SMichael Kruse        if lp < 0:
124*f98ee40fSTobias Hieta            self.name = last_word(decl.rstrip(";"))
125658eb9e1SMichael Kruse        else:
126658eb9e1SMichael Kruse            self.name = last_word(decl[:lp])
127*f98ee40fSTobias Hieta        self.decl = " ".join(decl.rstrip(";{").strip().split())
128*f98ee40fSTobias Hieta        self.comment = spc.sub("", com.rstrip())
129658eb9e1SMichael Kruse        self.line = line
130658eb9e1SMichael Kruse
131658eb9e1SMichael Kruse    def __repr__(self):
132658eb9e1SMichael Kruse        return '#Decl["%s"]' % self.decl
133658eb9e1SMichael Kruse
134658eb9e1SMichael Kruse    def markdown(self, path):
135658eb9e1SMichael Kruse        pos = self.decl.index(self.name)
136658eb9e1SMichael Kruse        decl = '%s<a href="%s#L%d">%s</a>%s' % (
137658eb9e1SMichael Kruse            self.decl[:pos],
138658eb9e1SMichael Kruse            path,
139658eb9e1SMichael Kruse            self.line,
140658eb9e1SMichael Kruse            self.name,
141658eb9e1SMichael Kruse            self.decl[pos + len(self.name) :],
142658eb9e1SMichael Kruse        )
143*f98ee40fSTobias Hieta        return """------------
144658eb9e1SMichael Kruse<a id="{name}"></a><pre>
145658eb9e1SMichael Kruse{decl};
146658eb9e1SMichael Kruse</pre>
147658eb9e1SMichael Kruse{comment}
148*f98ee40fSTobias Hieta""".format(
149*f98ee40fSTobias Hieta            name=self.name, decl=decl, comment=typeset(self.comment)
150*f98ee40fSTobias Hieta        )
151658eb9e1SMichael Kruse
152658eb9e1SMichael Kruse
153658eb9e1SMichael Krusedef parse_decls(text):
154658eb9e1SMichael Kruse    """Parse a dictionary of declarations from text."""
155658eb9e1SMichael Kruse    decls = collections.OrderedDict()
156658eb9e1SMichael Kruse    idx = LIndex(text)
157658eb9e1SMichael Kruse    for m in doc.finditer(text):
158*f98ee40fSTobias Hieta        line, _ = idx.linecol(m.span("decl")[0])
159*f98ee40fSTobias Hieta        d = Decl(m.group("text"), m.group("decl"), line)
160658eb9e1SMichael Kruse        decls[d.name] = d
161658eb9e1SMichael Kruse    return decls
162658eb9e1SMichael Kruse
163658eb9e1SMichael Kruse
164658eb9e1SMichael Krusedef load_file(path):
165658eb9e1SMichael Kruse    """Load declarations from path, or use cached results."""
166658eb9e1SMichael Kruse    if path not in CACHE:
167*f98ee40fSTobias Hieta        with file(path, "rU") as fp:
168658eb9e1SMichael Kruse            CACHE[path] = parse_decls(fp.read())
169658eb9e1SMichael Kruse    return CACHE[path]
170658eb9e1SMichael Kruse
171658eb9e1SMichael Kruse
172658eb9e1SMichael Krusedef main(args):
173658eb9e1SMichael Kruse    if len(args) != 2:
174658eb9e1SMichael Kruse        print("Usage: mkdoc.py <input> <output>", file=sys.stderr)
175658eb9e1SMichael Kruse        sys.exit(1)
176658eb9e1SMichael Kruse
177658eb9e1SMichael Kruse    doc_template = args[0]
178658eb9e1SMichael Kruse    doc_markdown = args[1]
179658eb9e1SMichael Kruse
180*f98ee40fSTobias Hieta    with file(doc_template, "rU") as input:
181658eb9e1SMichael Kruse        template = input.read()
182658eb9e1SMichael Kruse
183*f98ee40fSTobias Hieta    with file(doc_markdown, "wt") as output:
184658eb9e1SMichael Kruse        print(
185*f98ee40fSTobias Hieta            """<!--
186658eb9e1SMichael Kruse  This file was generated from "{0}" by mkdoc.py
187658eb9e1SMichael Kruse  DO NOT EDIT
188658eb9e1SMichael Kruse-->
189*f98ee40fSTobias Hieta""".format(
190*f98ee40fSTobias Hieta                doc_template
191*f98ee40fSTobias Hieta            ),
192*f98ee40fSTobias Hieta            file=output,
193*f98ee40fSTobias Hieta        )
194658eb9e1SMichael Kruse
195658eb9e1SMichael Kruse        pos = 0  # last position of input copied
196658eb9e1SMichael Kruse
197658eb9e1SMichael Kruse        # Look for substitution markers in the template, and replace them with
198658eb9e1SMichael Kruse        # their content.
199658eb9e1SMichael Kruse        for ip in ins.finditer(template):
200658eb9e1SMichael Kruse            output.write(template[pos : ip.start()])
201658eb9e1SMichael Kruse            pos = ip.end()
202658eb9e1SMichael Kruse
203*f98ee40fSTobias Hieta            decls = load_file(ip.group("file"))
204*f98ee40fSTobias Hieta            if ip.group("names"):  # pick the selected names, in order
205658eb9e1SMichael Kruse                decls = collections.OrderedDict(
206*f98ee40fSTobias Hieta                    (key, decls[key]) for key in ip.group("names").strip().split()
207*f98ee40fSTobias Hieta                )
208658eb9e1SMichael Kruse
209658eb9e1SMichael Kruse            # Render the selected declarations.
210658eb9e1SMichael Kruse            for decl in decls.values():
211*f98ee40fSTobias Hieta                print(decl.markdown(ip.group("file")), file=output)
212658eb9e1SMichael Kruse
213658eb9e1SMichael Kruse        # Clean up any remaining template bits
214658eb9e1SMichael Kruse        output.write(template[pos:])
215658eb9e1SMichael Kruse
216658eb9e1SMichael Kruse
217658eb9e1SMichael Kruseif __name__ == "__main__":
218658eb9e1SMichael Kruse    main(sys.argv[1:])
219