1#!/usr/bin/env python 2## 3## Name: mkdoc.py 4## Purpose: Extract documentation from header files. 5## 6## Copyright (C) 2018 Michael J. Fromberger. All Rights Reserved. 7## 8## Usage: mkdoc.py <template> <output> 9## 10from __future__ import print_function 11 12import collections, re, sys 13 14# A regular expression to match commented declarations. 15# This is specific to C and not very general; it should work fine for the imath 16# headers but will not adapt well to arbitrary code or to C++. 17doc = re.compile( 18 r"""(?mx)/\*\* # open /** 19(?P<text>(?:[^*]|\*[^/])*) # text Does a thing 20\*/\n # close */ 21(?P<decl>[^;{]*(?:;$|\{))""" 22) # decl void f(x); 23 24# A regular expression matching up to 4 spaces at the head of a line. 25spc = re.compile(r"(?m)^ {1,4}") 26 27# A regular expression matching an insertion point. An insertion point has the 28# form {{include "header" name ...}}. If no names are given, all the names in 29# the given header are inserted. 30ins = re.compile(r'{{insert "(?P<file>[^"]*)"(?P<names>(?:\s+\w+)+)?\s*}}') 31 32# A regular expression matching non-identifier characters, for splitting. 33nid = re.compile(r"\W+") 34 35# A cache of already-parsed files, maps filename to declarations. 36CACHE = {} 37 38 39def last_word(s): 40 """Returns the last identifier-shaped word in s.""" 41 return nid.split(s.strip())[-1] 42 43 44def typeset(text): 45 """Renders text with verbatim sections into markdown.""" 46 lines = [] 47 fence = False 48 for line in text.split("\n"): 49 if fence != line.startswith(" "): 50 lines.append("```") 51 fence = not fence 52 lines.append(line) 53 if fence: 54 lines.append("```") 55 for i, line in enumerate(lines): 56 if i == 0: 57 lines[i] = " - " + line 58 elif line: 59 lines[i] = " " + line 60 return "\n".join(lines) 61 62 63class LIndex(object): 64 """Represents a line offset index for text.""" 65 66 def __init__(self, text): 67 pos = 0 68 69 # An array of ending offsets for each line, with a sentinel at position 70 # 0 to make the index arithmetic easier. 71 idx = [0] 72 73 # Scan forward for newlines or EOF, and push the offsets of the line 74 # breaks onto the list so we can binary search them later. 75 while pos < len(text): 76 next = text.find("\n", pos) 77 if next < 0: 78 break 79 idx.append(next) 80 pos = next + 1 81 if idx[-1] < len(text): 82 idx.append(len(text)) 83 self._len = len(text) 84 self._index = idx 85 86 def linecol(self, pos): 87 """Returns the (line, col) corresponding to pos. 88 89 Line numbers are 1-based, columns are 0-based. 90 """ 91 if pos < 0 or pos > self._len: 92 raise IndexError("position %d out of range" % pos) 93 94 # Binary search for the largest line number whose end marker is at or 95 # after pos and whose previous line's end is before pos. 96 idx = self._index 97 i, j = 1, len(idx) 98 while i < j: 99 m = (i + j) / 2 100 if idx[m] < pos: 101 i = m + 1 102 elif idx[m - 1] < pos: 103 return m, pos - idx[m - 1] 104 else: 105 j = m 106 107 # This happens if (and only if) the whole file is one line. 108 return 1, pos 109 110 111class Decl(object): 112 """Represents a single documented declaration.""" 113 114 def __init__(self, com, decl, line=None): 115 """Initialize a new documented declaration. 116 117 Params: 118 com: the raw text of the comment 119 decl: the raw text of the declaration 120 line: the line number of the declaration 121 """ 122 lp = decl.find("(") 123 if lp < 0: 124 self.name = last_word(decl.rstrip(";")) 125 else: 126 self.name = last_word(decl[:lp]) 127 self.decl = " ".join(decl.rstrip(";{").strip().split()) 128 self.comment = spc.sub("", com.rstrip()) 129 self.line = line 130 131 def __repr__(self): 132 return '#Decl["%s"]' % self.decl 133 134 def markdown(self, path): 135 pos = self.decl.index(self.name) 136 decl = '%s<a href="%s#L%d">%s</a>%s' % ( 137 self.decl[:pos], 138 path, 139 self.line, 140 self.name, 141 self.decl[pos + len(self.name) :], 142 ) 143 return """------------ 144<a id="{name}"></a><pre> 145{decl}; 146</pre> 147{comment} 148""".format( 149 name=self.name, decl=decl, comment=typeset(self.comment) 150 ) 151 152 153def parse_decls(text): 154 """Parse a dictionary of declarations from text.""" 155 decls = collections.OrderedDict() 156 idx = LIndex(text) 157 for m in doc.finditer(text): 158 line, _ = idx.linecol(m.span("decl")[0]) 159 d = Decl(m.group("text"), m.group("decl"), line) 160 decls[d.name] = d 161 return decls 162 163 164def load_file(path): 165 """Load declarations from path, or use cached results.""" 166 if path not in CACHE: 167 with file(path, "rU") as fp: 168 CACHE[path] = parse_decls(fp.read()) 169 return CACHE[path] 170 171 172def main(args): 173 if len(args) != 2: 174 print("Usage: mkdoc.py <input> <output>", file=sys.stderr) 175 sys.exit(1) 176 177 doc_template = args[0] 178 doc_markdown = args[1] 179 180 with file(doc_template, "rU") as input: 181 template = input.read() 182 183 with file(doc_markdown, "wt") as output: 184 print( 185 """<!-- 186 This file was generated from "{0}" by mkdoc.py 187 DO NOT EDIT 188--> 189""".format( 190 doc_template 191 ), 192 file=output, 193 ) 194 195 pos = 0 # last position of input copied 196 197 # Look for substitution markers in the template, and replace them with 198 # their content. 199 for ip in ins.finditer(template): 200 output.write(template[pos : ip.start()]) 201 pos = ip.end() 202 203 decls = load_file(ip.group("file")) 204 if ip.group("names"): # pick the selected names, in order 205 decls = collections.OrderedDict( 206 (key, decls[key]) for key in ip.group("names").strip().split() 207 ) 208 209 # Render the selected declarations. 210 for decl in decls.values(): 211 print(decl.markdown(ip.group("file")), file=output) 212 213 # Clean up any remaining template bits 214 output.write(template[pos:]) 215 216 217if __name__ == "__main__": 218 main(sys.argv[1:]) 219