1658eb9e1SMichael Kruse#!/usr/bin/env python 2658eb9e1SMichael Kruse## 3658eb9e1SMichael Kruse## Name: mkdoc.py 4658eb9e1SMichael Kruse## Purpose: Extract documentation from header files. 5658eb9e1SMichael Kruse## 6658eb9e1SMichael Kruse## Copyright (C) 2018 Michael J. Fromberger. All Rights Reserved. 7658eb9e1SMichael Kruse## 8658eb9e1SMichael Kruse## Usage: mkdoc.py <template> <output> 9658eb9e1SMichael Kruse## 10658eb9e1SMichael Krusefrom __future__ import print_function 11658eb9e1SMichael Kruse 12658eb9e1SMichael Kruseimport collections, re, sys 13658eb9e1SMichael Kruse 14658eb9e1SMichael Kruse# A regular expression to match commented declarations. 15658eb9e1SMichael Kruse# This is specific to C and not very general; it should work fine for the imath 16658eb9e1SMichael Kruse# headers but will not adapt well to arbitrary code or to C++. 17*f98ee40fSTobias Hietadoc = re.compile( 18*f98ee40fSTobias Hieta r"""(?mx)/\*\* # open /** 19658eb9e1SMichael Kruse(?P<text>(?:[^*]|\*[^/])*) # text Does a thing 20658eb9e1SMichael Kruse\*/\n # close */ 21*f98ee40fSTobias Hieta(?P<decl>[^;{]*(?:;$|\{))""" 22*f98ee40fSTobias Hieta) # decl void f(x); 23658eb9e1SMichael Kruse 24658eb9e1SMichael Kruse# A regular expression matching up to 4 spaces at the head of a line. 25*f98ee40fSTobias Hietaspc = re.compile(r"(?m)^ {1,4}") 26658eb9e1SMichael Kruse 27658eb9e1SMichael Kruse# A regular expression matching an insertion point. An insertion point has the 28658eb9e1SMichael Kruse# form {{include "header" name ...}}. If no names are given, all the names in 29658eb9e1SMichael Kruse# the given header are inserted. 30658eb9e1SMichael Kruseins = re.compile(r'{{insert "(?P<file>[^"]*)"(?P<names>(?:\s+\w+)+)?\s*}}') 31658eb9e1SMichael Kruse 32658eb9e1SMichael Kruse# A regular expression matching non-identifier characters, for splitting. 33*f98ee40fSTobias Hietanid = re.compile(r"\W+") 34658eb9e1SMichael Kruse 35658eb9e1SMichael Kruse# A cache of already-parsed files, maps filename to declarations. 36658eb9e1SMichael KruseCACHE = {} 37658eb9e1SMichael Kruse 38658eb9e1SMichael Kruse 39658eb9e1SMichael Krusedef last_word(s): 40658eb9e1SMichael Kruse """Returns the last identifier-shaped word in s.""" 41658eb9e1SMichael Kruse return nid.split(s.strip())[-1] 42658eb9e1SMichael Kruse 43658eb9e1SMichael Kruse 44658eb9e1SMichael Krusedef typeset(text): 45658eb9e1SMichael Kruse """Renders text with verbatim sections into markdown.""" 46658eb9e1SMichael Kruse lines = [] 47658eb9e1SMichael Kruse fence = False 48*f98ee40fSTobias Hieta for line in text.split("\n"): 49*f98ee40fSTobias Hieta if fence != line.startswith(" "): 50*f98ee40fSTobias Hieta lines.append("```") 51658eb9e1SMichael Kruse fence = not fence 52658eb9e1SMichael Kruse lines.append(line) 53658eb9e1SMichael Kruse if fence: 54*f98ee40fSTobias Hieta lines.append("```") 55658eb9e1SMichael Kruse for i, line in enumerate(lines): 56*f98ee40fSTobias Hieta if i == 0: 57*f98ee40fSTobias Hieta lines[i] = " - " + line 58*f98ee40fSTobias Hieta elif line: 59*f98ee40fSTobias Hieta lines[i] = " " + line 60*f98ee40fSTobias Hieta return "\n".join(lines) 61658eb9e1SMichael Kruse 62658eb9e1SMichael Kruse 63658eb9e1SMichael Kruseclass LIndex(object): 64658eb9e1SMichael Kruse """Represents a line offset index for text.""" 65658eb9e1SMichael Kruse 66658eb9e1SMichael Kruse def __init__(self, text): 67658eb9e1SMichael Kruse pos = 0 68658eb9e1SMichael Kruse 69658eb9e1SMichael Kruse # An array of ending offsets for each line, with a sentinel at position 70658eb9e1SMichael Kruse # 0 to make the index arithmetic easier. 71658eb9e1SMichael Kruse idx = [0] 72658eb9e1SMichael Kruse 73658eb9e1SMichael Kruse # Scan forward for newlines or EOF, and push the offsets of the line 74658eb9e1SMichael Kruse # breaks onto the list so we can binary search them later. 75658eb9e1SMichael Kruse while pos < len(text): 76*f98ee40fSTobias Hieta next = text.find("\n", pos) 77658eb9e1SMichael Kruse if next < 0: 78658eb9e1SMichael Kruse break 79658eb9e1SMichael Kruse idx.append(next) 80658eb9e1SMichael Kruse pos = next + 1 81658eb9e1SMichael Kruse if idx[-1] < len(text): 82658eb9e1SMichael Kruse idx.append(len(text)) 83658eb9e1SMichael Kruse self._len = len(text) 84658eb9e1SMichael Kruse self._index = idx 85658eb9e1SMichael Kruse 86658eb9e1SMichael Kruse def linecol(self, pos): 87658eb9e1SMichael Kruse """Returns the (line, col) corresponding to pos. 88658eb9e1SMichael Kruse 89658eb9e1SMichael Kruse Line numbers are 1-based, columns are 0-based. 90658eb9e1SMichael Kruse """ 91658eb9e1SMichael Kruse if pos < 0 or pos > self._len: 92658eb9e1SMichael Kruse raise IndexError("position %d out of range" % pos) 93658eb9e1SMichael Kruse 94658eb9e1SMichael Kruse # Binary search for the largest line number whose end marker is at or 95658eb9e1SMichael Kruse # after pos and whose previous line's end is before pos. 96658eb9e1SMichael Kruse idx = self._index 97658eb9e1SMichael Kruse i, j = 1, len(idx) 98658eb9e1SMichael Kruse while i < j: 99658eb9e1SMichael Kruse m = (i + j) / 2 100658eb9e1SMichael Kruse if idx[m] < pos: 101658eb9e1SMichael Kruse i = m + 1 102658eb9e1SMichael Kruse elif idx[m - 1] < pos: 103658eb9e1SMichael Kruse return m, pos - idx[m - 1] 104658eb9e1SMichael Kruse else: 105658eb9e1SMichael Kruse j = m 106658eb9e1SMichael Kruse 107658eb9e1SMichael Kruse # This happens if (and only if) the whole file is one line. 108658eb9e1SMichael Kruse return 1, pos 109658eb9e1SMichael Kruse 110658eb9e1SMichael Kruse 111658eb9e1SMichael Kruseclass Decl(object): 112658eb9e1SMichael Kruse """Represents a single documented declaration.""" 113658eb9e1SMichael Kruse 114658eb9e1SMichael Kruse def __init__(self, com, decl, line=None): 115658eb9e1SMichael Kruse """Initialize a new documented declaration. 116658eb9e1SMichael Kruse 117658eb9e1SMichael Kruse Params: 118658eb9e1SMichael Kruse com: the raw text of the comment 119658eb9e1SMichael Kruse decl: the raw text of the declaration 120658eb9e1SMichael Kruse line: the line number of the declaration 121658eb9e1SMichael Kruse """ 122*f98ee40fSTobias Hieta lp = decl.find("(") 123658eb9e1SMichael Kruse if lp < 0: 124*f98ee40fSTobias Hieta self.name = last_word(decl.rstrip(";")) 125658eb9e1SMichael Kruse else: 126658eb9e1SMichael Kruse self.name = last_word(decl[:lp]) 127*f98ee40fSTobias Hieta self.decl = " ".join(decl.rstrip(";{").strip().split()) 128*f98ee40fSTobias Hieta self.comment = spc.sub("", com.rstrip()) 129658eb9e1SMichael Kruse self.line = line 130658eb9e1SMichael Kruse 131658eb9e1SMichael Kruse def __repr__(self): 132658eb9e1SMichael Kruse return '#Decl["%s"]' % self.decl 133658eb9e1SMichael Kruse 134658eb9e1SMichael Kruse def markdown(self, path): 135658eb9e1SMichael Kruse pos = self.decl.index(self.name) 136658eb9e1SMichael Kruse decl = '%s<a href="%s#L%d">%s</a>%s' % ( 137658eb9e1SMichael Kruse self.decl[:pos], 138658eb9e1SMichael Kruse path, 139658eb9e1SMichael Kruse self.line, 140658eb9e1SMichael Kruse self.name, 141658eb9e1SMichael Kruse self.decl[pos + len(self.name) :], 142658eb9e1SMichael Kruse ) 143*f98ee40fSTobias Hieta return """------------ 144658eb9e1SMichael Kruse<a id="{name}"></a><pre> 145658eb9e1SMichael Kruse{decl}; 146658eb9e1SMichael Kruse</pre> 147658eb9e1SMichael Kruse{comment} 148*f98ee40fSTobias Hieta""".format( 149*f98ee40fSTobias Hieta name=self.name, decl=decl, comment=typeset(self.comment) 150*f98ee40fSTobias Hieta ) 151658eb9e1SMichael Kruse 152658eb9e1SMichael Kruse 153658eb9e1SMichael Krusedef parse_decls(text): 154658eb9e1SMichael Kruse """Parse a dictionary of declarations from text.""" 155658eb9e1SMichael Kruse decls = collections.OrderedDict() 156658eb9e1SMichael Kruse idx = LIndex(text) 157658eb9e1SMichael Kruse for m in doc.finditer(text): 158*f98ee40fSTobias Hieta line, _ = idx.linecol(m.span("decl")[0]) 159*f98ee40fSTobias Hieta d = Decl(m.group("text"), m.group("decl"), line) 160658eb9e1SMichael Kruse decls[d.name] = d 161658eb9e1SMichael Kruse return decls 162658eb9e1SMichael Kruse 163658eb9e1SMichael Kruse 164658eb9e1SMichael Krusedef load_file(path): 165658eb9e1SMichael Kruse """Load declarations from path, or use cached results.""" 166658eb9e1SMichael Kruse if path not in CACHE: 167*f98ee40fSTobias Hieta with file(path, "rU") as fp: 168658eb9e1SMichael Kruse CACHE[path] = parse_decls(fp.read()) 169658eb9e1SMichael Kruse return CACHE[path] 170658eb9e1SMichael Kruse 171658eb9e1SMichael Kruse 172658eb9e1SMichael Krusedef main(args): 173658eb9e1SMichael Kruse if len(args) != 2: 174658eb9e1SMichael Kruse print("Usage: mkdoc.py <input> <output>", file=sys.stderr) 175658eb9e1SMichael Kruse sys.exit(1) 176658eb9e1SMichael Kruse 177658eb9e1SMichael Kruse doc_template = args[0] 178658eb9e1SMichael Kruse doc_markdown = args[1] 179658eb9e1SMichael Kruse 180*f98ee40fSTobias Hieta with file(doc_template, "rU") as input: 181658eb9e1SMichael Kruse template = input.read() 182658eb9e1SMichael Kruse 183*f98ee40fSTobias Hieta with file(doc_markdown, "wt") as output: 184658eb9e1SMichael Kruse print( 185*f98ee40fSTobias Hieta """<!-- 186658eb9e1SMichael Kruse This file was generated from "{0}" by mkdoc.py 187658eb9e1SMichael Kruse DO NOT EDIT 188658eb9e1SMichael Kruse--> 189*f98ee40fSTobias Hieta""".format( 190*f98ee40fSTobias Hieta doc_template 191*f98ee40fSTobias Hieta ), 192*f98ee40fSTobias Hieta file=output, 193*f98ee40fSTobias Hieta ) 194658eb9e1SMichael Kruse 195658eb9e1SMichael Kruse pos = 0 # last position of input copied 196658eb9e1SMichael Kruse 197658eb9e1SMichael Kruse # Look for substitution markers in the template, and replace them with 198658eb9e1SMichael Kruse # their content. 199658eb9e1SMichael Kruse for ip in ins.finditer(template): 200658eb9e1SMichael Kruse output.write(template[pos : ip.start()]) 201658eb9e1SMichael Kruse pos = ip.end() 202658eb9e1SMichael Kruse 203*f98ee40fSTobias Hieta decls = load_file(ip.group("file")) 204*f98ee40fSTobias Hieta if ip.group("names"): # pick the selected names, in order 205658eb9e1SMichael Kruse decls = collections.OrderedDict( 206*f98ee40fSTobias Hieta (key, decls[key]) for key in ip.group("names").strip().split() 207*f98ee40fSTobias Hieta ) 208658eb9e1SMichael Kruse 209658eb9e1SMichael Kruse # Render the selected declarations. 210658eb9e1SMichael Kruse for decl in decls.values(): 211*f98ee40fSTobias Hieta print(decl.markdown(ip.group("file")), file=output) 212658eb9e1SMichael Kruse 213658eb9e1SMichael Kruse # Clean up any remaining template bits 214658eb9e1SMichael Kruse output.write(template[pos:]) 215658eb9e1SMichael Kruse 216658eb9e1SMichael Kruse 217658eb9e1SMichael Kruseif __name__ == "__main__": 218658eb9e1SMichael Kruse main(sys.argv[1:]) 219