1*12c85518Srobert#!/usr/bin/env python3 2e5dd7070Spatrick# A tool to parse the FormatStyle struct from Format.h and update the 3e5dd7070Spatrick# documentation in ../ClangFormatStyleOptions.rst automatically. 4e5dd7070Spatrick# Run from the directory in which this file is located to update the docs. 5e5dd7070Spatrick 6*12c85518Srobertimport inspect 7e5dd7070Spatrickimport os 8e5dd7070Spatrickimport re 9*12c85518Srobertimport sys 10*12c85518Srobertfrom io import TextIOWrapper 11*12c85518Srobertfrom typing import Set 12e5dd7070Spatrick 13e5dd7070SpatrickCLANG_DIR = os.path.join(os.path.dirname(__file__), '../..') 14e5dd7070SpatrickFORMAT_STYLE_FILE = os.path.join(CLANG_DIR, 'include/clang/Format/Format.h') 15e5dd7070SpatrickINCLUDE_STYLE_FILE = os.path.join(CLANG_DIR, 'include/clang/Tooling/Inclusions/IncludeStyle.h') 16e5dd7070SpatrickDOC_FILE = os.path.join(CLANG_DIR, 'docs/ClangFormatStyleOptions.rst') 17e5dd7070Spatrick 18*12c85518SrobertPLURALS_FILE = os.path.join(os.path.dirname(__file__), 'plurals.txt') 19*12c85518Srobert 20*12c85518Srobertplurals: Set[str] = set() 21*12c85518Srobertwith open(PLURALS_FILE, 'a+') as f: 22*12c85518Srobert f.seek(0) 23*12c85518Srobert plurals = set(f.read().splitlines()) 24e5dd7070Spatrick 25e5dd7070Spatrickdef substitute(text, tag, contents): 26e5dd7070Spatrick replacement = '\n.. START_%s\n\n%s\n\n.. END_%s\n' % (tag, contents, tag) 27e5dd7070Spatrick pattern = r'\n\.\. START_%s\n.*\n\.\. END_%s\n' % (tag, tag) 28e5dd7070Spatrick return re.sub(pattern, '%s', text, flags=re.S) % replacement 29e5dd7070Spatrick 30*12c85518Srobertdef register_plural(singular: str, plural: str): 31*12c85518Srobert if plural not in plurals: 32*12c85518Srobert if not hasattr(register_plural, "generated_new_plural"): 33*12c85518Srobert print('Plural generation: you can use ' 34*12c85518Srobert f'`git checkout -- {os.path.relpath(PLURALS_FILE)}` ' 35*12c85518Srobert 'to reemit warnings or `git add` to include new plurals\n') 36*12c85518Srobert register_plural.generated_new_plural = True 37*12c85518Srobert 38*12c85518Srobert plurals.add(plural) 39*12c85518Srobert with open(PLURALS_FILE, 'a') as f: 40*12c85518Srobert f.write(plural + '\n') 41*12c85518Srobert cf = inspect.currentframe() 42*12c85518Srobert lineno = '' 43*12c85518Srobert if cf and cf.f_back: 44*12c85518Srobert lineno = ':' + str(cf.f_back.f_lineno) 45*12c85518Srobert print(f'{__file__}{lineno} check if plural of {singular} is {plural}', file=sys.stderr) 46*12c85518Srobert return plural 47*12c85518Srobert 48*12c85518Srobertdef pluralize(word: str): 49*12c85518Srobert lword = word.lower() 50*12c85518Srobert if len(lword) >= 2 and lword[-1] == 'y' and lword[-2] not in 'aeiou': 51*12c85518Srobert return register_plural(word, word[:-1] + 'ies') 52*12c85518Srobert elif lword.endswith(('s', 'sh', 'ch', 'x', 'z')): 53*12c85518Srobert return register_plural(word, word[:-1] + 'es') 54*12c85518Srobert elif lword.endswith('fe'): 55*12c85518Srobert return register_plural(word, word[:-2] + 'ves') 56*12c85518Srobert elif lword.endswith('f') and not lword.endswith('ff'): 57*12c85518Srobert return register_plural(word, word[:-1] + 'ves') 58*12c85518Srobert else: 59*12c85518Srobert return register_plural(word, word + 's') 60*12c85518Srobert 61*12c85518Srobert 62*12c85518Srobertdef to_yaml_type(typestr: str): 63*12c85518Srobert if typestr == 'bool': 64*12c85518Srobert return 'Boolean' 65*12c85518Srobert elif typestr == 'int': 66*12c85518Srobert return 'Integer' 67*12c85518Srobert elif typestr == 'unsigned': 68*12c85518Srobert return 'Unsigned' 69*12c85518Srobert elif typestr == 'std::string': 70*12c85518Srobert return 'String' 71*12c85518Srobert 72*12c85518Srobert subtype, napplied = re.subn(r'^std::vector<(.*)>$', r'\1', typestr) 73*12c85518Srobert if napplied == 1: 74*12c85518Srobert return 'List of ' + pluralize(to_yaml_type(subtype)) 75*12c85518Srobert 76*12c85518Srobert return typestr 77*12c85518Srobert 78e5dd7070Spatrickdef doxygen2rst(text): 79e5dd7070Spatrick text = re.sub(r'<tt>\s*(.*?)\s*<\/tt>', r'``\1``', text) 80e5dd7070Spatrick text = re.sub(r'\\c ([^ ,;\.]+)', r'``\1``', text) 81e5dd7070Spatrick text = re.sub(r'\\\w+ ', '', text) 82e5dd7070Spatrick return text 83e5dd7070Spatrick 84e5dd7070Spatrickdef indent(text, columns, indent_first_line=True): 85*12c85518Srobert indent_str = ' ' * columns 86*12c85518Srobert s = re.sub(r'\n([^\n])', '\n' + indent_str + '\\1', text, flags=re.S) 87e5dd7070Spatrick if not indent_first_line or s.startswith('\n'): 88e5dd7070Spatrick return s 89*12c85518Srobert return indent_str + s 90e5dd7070Spatrick 91e5dd7070Spatrickclass Option(object): 92*12c85518Srobert def __init__(self, name, opt_type, comment, version): 93e5dd7070Spatrick self.name = name 94*12c85518Srobert self.type = opt_type 95e5dd7070Spatrick self.comment = comment.strip() 96e5dd7070Spatrick self.enum = None 97e5dd7070Spatrick self.nested_struct = None 98*12c85518Srobert self.version = version 99e5dd7070Spatrick 100e5dd7070Spatrick def __str__(self): 101*12c85518Srobert s = ".. _%s:\n\n**%s** (``%s``) " % (self.name, self.name, to_yaml_type(self.type)) 102*12c85518Srobert if self.version: 103*12c85518Srobert s += ':versionbadge:`clang-format %s` ' % self.version 104*12c85518Srobert s += ':ref:`¶ <%s>`\n%s' % (self.name, doxygen2rst(indent(self.comment, 2))) 105a9ac8606Spatrick if self.enum and self.enum.values: 106e5dd7070Spatrick s += indent('\n\nPossible values:\n\n%s\n' % self.enum, 2) 107e5dd7070Spatrick if self.nested_struct: 108e5dd7070Spatrick s += indent('\n\nNested configuration flags:\n\n%s\n' %self.nested_struct, 109e5dd7070Spatrick 2) 110e5dd7070Spatrick return s 111e5dd7070Spatrick 112e5dd7070Spatrickclass NestedStruct(object): 113e5dd7070Spatrick def __init__(self, name, comment): 114e5dd7070Spatrick self.name = name 115e5dd7070Spatrick self.comment = comment.strip() 116e5dd7070Spatrick self.values = [] 117e5dd7070Spatrick 118e5dd7070Spatrick def __str__(self): 119*12c85518Srobert return self.comment + '\n' + '\n'.join(map(str, self.values)) 120e5dd7070Spatrick 121e5dd7070Spatrickclass NestedField(object): 122e5dd7070Spatrick def __init__(self, name, comment): 123e5dd7070Spatrick self.name = name 124e5dd7070Spatrick self.comment = comment.strip() 125e5dd7070Spatrick 126e5dd7070Spatrick def __str__(self): 127e5dd7070Spatrick return '\n* ``%s`` %s' % ( 128e5dd7070Spatrick self.name, 129e5dd7070Spatrick doxygen2rst(indent(self.comment, 2, indent_first_line=False))) 130e5dd7070Spatrick 131e5dd7070Spatrickclass Enum(object): 132e5dd7070Spatrick def __init__(self, name, comment): 133e5dd7070Spatrick self.name = name 134e5dd7070Spatrick self.comment = comment.strip() 135e5dd7070Spatrick self.values = [] 136e5dd7070Spatrick 137e5dd7070Spatrick def __str__(self): 138e5dd7070Spatrick return '\n'.join(map(str, self.values)) 139e5dd7070Spatrick 140e5dd7070Spatrickclass NestedEnum(object): 141e5dd7070Spatrick def __init__(self, name, enumtype, comment, values): 142e5dd7070Spatrick self.name = name 143e5dd7070Spatrick self.comment = comment 144e5dd7070Spatrick self.values = values 145e5dd7070Spatrick self.type = enumtype 146e5dd7070Spatrick 147e5dd7070Spatrick def __str__(self): 148*12c85518Srobert s = '\n* ``%s %s``\n%s' % (to_yaml_type(self.type), self.name, 149e5dd7070Spatrick doxygen2rst(indent(self.comment, 2))) 150e5dd7070Spatrick s += indent('\nPossible values:\n\n', 2) 151e5dd7070Spatrick s += indent('\n'.join(map(str, self.values)), 2) 152*12c85518Srobert return s 153e5dd7070Spatrick 154e5dd7070Spatrickclass EnumValue(object): 155e5dd7070Spatrick def __init__(self, name, comment, config): 156e5dd7070Spatrick self.name = name 157e5dd7070Spatrick self.comment = comment 158e5dd7070Spatrick self.config = config 159e5dd7070Spatrick 160e5dd7070Spatrick def __str__(self): 161e5dd7070Spatrick return '* ``%s`` (in configuration: ``%s``)\n%s' % ( 162e5dd7070Spatrick self.name, 163e5dd7070Spatrick re.sub('.*_', '', self.config), 164e5dd7070Spatrick doxygen2rst(indent(self.comment, 2))) 165e5dd7070Spatrick 166*12c85518Srobert 167*12c85518Srobertclass OptionsReader: 168*12c85518Srobert def __init__(self, header: TextIOWrapper): 169*12c85518Srobert self.header = header 170*12c85518Srobert self.in_code_block = False 171*12c85518Srobert self.code_indent = 0 172*12c85518Srobert self.lineno = 0 173*12c85518Srobert self.last_err_lineno = -1 174*12c85518Srobert 175*12c85518Srobert def __file_path(self): 176*12c85518Srobert return os.path.relpath(self.header.name) 177*12c85518Srobert 178*12c85518Srobert def __print_line(self, line: str): 179*12c85518Srobert print(f'{self.lineno:>6} | {line}', file=sys.stderr) 180*12c85518Srobert 181*12c85518Srobert def __warning(self, msg: str, line: str): 182*12c85518Srobert print(f'{self.__file_path()}:{self.lineno}: warning: {msg}:', file=sys.stderr) 183*12c85518Srobert self.__print_line(line) 184*12c85518Srobert 185*12c85518Srobert def __clean_comment_line(self, line: str): 186a9ac8606Spatrick match = re.match(r'^/// (?P<indent> +)?\\code(\{.(?P<lang>\w+)\})?$', line) 187e5dd7070Spatrick if match: 188*12c85518Srobert if self.in_code_block: 189*12c85518Srobert self.__warning('`\\code` in another `\\code`', line) 190*12c85518Srobert self.in_code_block = True 191*12c85518Srobert indent_str = match.group('indent') 192*12c85518Srobert if not indent_str: 193*12c85518Srobert indent_str = '' 194*12c85518Srobert self.code_indent = len(indent_str) 195a9ac8606Spatrick lang = match.group('lang') 196e5dd7070Spatrick if not lang: 197e5dd7070Spatrick lang = 'c++' 198*12c85518Srobert return f'\n{indent_str}.. code-block:: {lang}\n\n' 199a9ac8606Spatrick 200a9ac8606Spatrick endcode_match = re.match(r'^/// +\\endcode$', line) 201a9ac8606Spatrick if endcode_match: 202*12c85518Srobert if not self.in_code_block: 203*12c85518Srobert self.__warning('no correct `\\code` found before this `\\endcode`', line) 204*12c85518Srobert self.in_code_block = False 205*12c85518Srobert return '' 206*12c85518Srobert 207*12c85518Srobert # check code block indentation 208*12c85518Srobert if (self.in_code_block and not line == '///' and not 209*12c85518Srobert line.startswith('/// ' + ' ' * self.code_indent)): 210*12c85518Srobert if self.last_err_lineno == self.lineno - 1: 211*12c85518Srobert self.__print_line(line) 212*12c85518Srobert else: 213*12c85518Srobert self.__warning('code block should be indented', line) 214*12c85518Srobert self.last_err_lineno = self.lineno 215*12c85518Srobert 216*12c85518Srobert match = re.match(r'^/// \\warning$', line) 217*12c85518Srobert if match: 218*12c85518Srobert return '\n.. warning:: \n\n' 219*12c85518Srobert 220*12c85518Srobert endwarning_match = re.match(r'^/// +\\endwarning$', line) 221*12c85518Srobert if endwarning_match: 222e5dd7070Spatrick return '' 223e5dd7070Spatrick return line[4:] + '\n' 224e5dd7070Spatrick 225*12c85518Srobert def read_options(self): 226*12c85518Srobert class State: 227*12c85518Srobert BeforeStruct, Finished, InStruct, InNestedStruct, InNestedFieldComment, \ 228e5dd7070Spatrick InFieldComment, InEnum, InEnumMemberComment = range(8) 229e5dd7070Spatrick state = State.BeforeStruct 230e5dd7070Spatrick 231e5dd7070Spatrick options = [] 232e5dd7070Spatrick enums = {} 233e5dd7070Spatrick nested_structs = {} 234e5dd7070Spatrick comment = '' 235e5dd7070Spatrick enum = None 236e5dd7070Spatrick nested_struct = None 237*12c85518Srobert version = None 238e5dd7070Spatrick 239*12c85518Srobert for line in self.header: 240*12c85518Srobert self.lineno += 1 241e5dd7070Spatrick line = line.strip() 242e5dd7070Spatrick if state == State.BeforeStruct: 243*12c85518Srobert if line in ('struct FormatStyle {', 'struct IncludeStyle {'): 244e5dd7070Spatrick state = State.InStruct 245e5dd7070Spatrick elif state == State.InStruct: 246e5dd7070Spatrick if line.startswith('///'): 247e5dd7070Spatrick state = State.InFieldComment 248*12c85518Srobert comment = self.__clean_comment_line(line) 249e5dd7070Spatrick elif line == '};': 250e5dd7070Spatrick state = State.Finished 251e5dd7070Spatrick break 252e5dd7070Spatrick elif state == State.InFieldComment: 253*12c85518Srobert if line.startswith(r'/// \version'): 254*12c85518Srobert match = re.match(r'/// \\version\s*(?P<version>[0-9.]+)*', line) 255*12c85518Srobert if match: 256*12c85518Srobert version = match.group('version') 257*12c85518Srobert elif line.startswith('///'): 258*12c85518Srobert comment += self.__clean_comment_line(line) 259e5dd7070Spatrick elif line.startswith('enum'): 260e5dd7070Spatrick state = State.InEnum 261a9ac8606Spatrick name = re.sub(r'enum\s+(\w+)\s*(:((\s*\w+)+)\s*)?\{', '\\1', line) 262e5dd7070Spatrick enum = Enum(name, comment) 263e5dd7070Spatrick elif line.startswith('struct'): 264e5dd7070Spatrick state = State.InNestedStruct 265e5dd7070Spatrick name = re.sub(r'struct\s+(\w+)\s*\{', '\\1', line) 266e5dd7070Spatrick nested_struct = NestedStruct(name, comment) 267e5dd7070Spatrick elif line.endswith(';'): 268*12c85518Srobert prefix = '// ' 269*12c85518Srobert if line.startswith(prefix): 270*12c85518Srobert line = line[len(prefix):] 271e5dd7070Spatrick state = State.InStruct 272e5dd7070Spatrick field_type, field_name = re.match(r'([<>:\w(,\s)]+)\s+(\w+);', 273e5dd7070Spatrick line).groups() 274*12c85518Srobert 275*12c85518Srobert if not version: 276*12c85518Srobert self.__warning(f'missing version for {field_name}', line) 277*12c85518Srobert option = Option(str(field_name), str(field_type), comment, version) 278e5dd7070Spatrick options.append(option) 279*12c85518Srobert version = None 280e5dd7070Spatrick else: 281*12c85518Srobert raise Exception('Invalid format, expected comment, field or enum\n' + line) 282e5dd7070Spatrick elif state == State.InNestedStruct: 283e5dd7070Spatrick if line.startswith('///'): 284*12c85518Srobert state = State.InNestedFieldComment 285*12c85518Srobert comment = self.__clean_comment_line(line) 286e5dd7070Spatrick elif line == '};': 287e5dd7070Spatrick state = State.InStruct 288e5dd7070Spatrick nested_structs[nested_struct.name] = nested_struct 289*12c85518Srobert elif state == State.InNestedFieldComment: 290e5dd7070Spatrick if line.startswith('///'): 291*12c85518Srobert comment += self.__clean_comment_line(line) 292e5dd7070Spatrick else: 293e5dd7070Spatrick state = State.InNestedStruct 294e5dd7070Spatrick field_type, field_name = re.match(r'([<>:\w(,\s)]+)\s+(\w+);', line).groups() 295e5dd7070Spatrick if field_type in enums: 296*12c85518Srobert nested_struct.values.append(NestedEnum(field_name, 297*12c85518Srobert field_type, 298*12c85518Srobert comment, 299*12c85518Srobert enums[field_type].values)) 300e5dd7070Spatrick else: 301e5dd7070Spatrick nested_struct.values.append(NestedField(field_type + " " + field_name, comment)) 302e5dd7070Spatrick 303e5dd7070Spatrick elif state == State.InEnum: 304e5dd7070Spatrick if line.startswith('///'): 305e5dd7070Spatrick state = State.InEnumMemberComment 306*12c85518Srobert comment = self.__clean_comment_line(line) 307e5dd7070Spatrick elif line == '};': 308e5dd7070Spatrick state = State.InStruct 309e5dd7070Spatrick enums[enum.name] = enum 310e5dd7070Spatrick else: 311a9ac8606Spatrick # Enum member without documentation. Must be documented where the enum 312a9ac8606Spatrick # is used. 313a9ac8606Spatrick pass 314e5dd7070Spatrick elif state == State.InEnumMemberComment: 315e5dd7070Spatrick if line.startswith('///'): 316*12c85518Srobert comment += self.__clean_comment_line(line) 317e5dd7070Spatrick else: 318e5dd7070Spatrick state = State.InEnum 319e5dd7070Spatrick val = line.replace(',', '') 320e5dd7070Spatrick pos = val.find(" // ") 321*12c85518Srobert if pos != -1: 322e5dd7070Spatrick config = val[pos + 4:] 323e5dd7070Spatrick val = val[:pos] 324e5dd7070Spatrick else: 325*12c85518Srobert config = val 326e5dd7070Spatrick enum.values.append(EnumValue(val, comment, config)) 327e5dd7070Spatrick if state != State.Finished: 328e5dd7070Spatrick raise Exception('Not finished by the end of file') 329e5dd7070Spatrick 330e5dd7070Spatrick for option in options: 331*12c85518Srobert if option.type not in ['bool', 'unsigned', 'int', 'std::string', 332e5dd7070Spatrick 'std::vector<std::string>', 333e5dd7070Spatrick 'std::vector<IncludeCategory>', 334e5dd7070Spatrick 'std::vector<RawStringFormat>']: 335e5dd7070Spatrick if option.type in enums: 336e5dd7070Spatrick option.enum = enums[option.type] 337e5dd7070Spatrick elif option.type in nested_structs: 338e5dd7070Spatrick option.nested_struct = nested_structs[option.type] 339e5dd7070Spatrick else: 340e5dd7070Spatrick raise Exception('Unknown type: %s' % option.type) 341e5dd7070Spatrick return options 342e5dd7070Spatrick 343e5dd7070Spatrick 344*12c85518Srobertwith open(FORMAT_STYLE_FILE) as f: 345*12c85518Srobert opts = OptionsReader(f).read_options() 346*12c85518Srobertwith open(INCLUDE_STYLE_FILE) as f: 347*12c85518Srobert opts += OptionsReader(f).read_options() 348e5dd7070Spatrick 349*12c85518Srobertopts = sorted(opts, key=lambda x: x.name) 350*12c85518Srobertoptions_text = '\n\n'.join(map(str, opts)) 351*12c85518Srobert 352*12c85518Srobertwith open(DOC_FILE) as f: 353*12c85518Srobert contents = f.read() 354e5dd7070Spatrick 355e5dd7070Spatrickcontents = substitute(contents, 'FORMAT_STYLE_OPTIONS', options_text) 356e5dd7070Spatrick 357e5dd7070Spatrickwith open(DOC_FILE, 'wb') as output: 358ec727ea7Spatrick output.write(contents.encode()) 359