xref: /openbsd-src/gnu/llvm/clang/docs/tools/dump_format_style.py (revision 12c855180aad702bbcca06e0398d774beeafb155)
1*12c85518Srobert#!/usr/bin/env python3
2e5dd7070Spatrick# A tool to parse the FormatStyle struct from Format.h and update the
3e5dd7070Spatrick# documentation in ../ClangFormatStyleOptions.rst automatically.
4e5dd7070Spatrick# Run from the directory in which this file is located to update the docs.
5e5dd7070Spatrick
6*12c85518Srobertimport inspect
7e5dd7070Spatrickimport os
8e5dd7070Spatrickimport re
9*12c85518Srobertimport sys
10*12c85518Srobertfrom io import TextIOWrapper
11*12c85518Srobertfrom typing import Set
12e5dd7070Spatrick
13e5dd7070SpatrickCLANG_DIR = os.path.join(os.path.dirname(__file__), '../..')
14e5dd7070SpatrickFORMAT_STYLE_FILE = os.path.join(CLANG_DIR, 'include/clang/Format/Format.h')
15e5dd7070SpatrickINCLUDE_STYLE_FILE = os.path.join(CLANG_DIR, 'include/clang/Tooling/Inclusions/IncludeStyle.h')
16e5dd7070SpatrickDOC_FILE = os.path.join(CLANG_DIR, 'docs/ClangFormatStyleOptions.rst')
17e5dd7070Spatrick
18*12c85518SrobertPLURALS_FILE = os.path.join(os.path.dirname(__file__), 'plurals.txt')
19*12c85518Srobert
20*12c85518Srobertplurals: Set[str] = set()
21*12c85518Srobertwith open(PLURALS_FILE, 'a+') as f:
22*12c85518Srobert  f.seek(0)
23*12c85518Srobert  plurals = set(f.read().splitlines())
24e5dd7070Spatrick
25e5dd7070Spatrickdef substitute(text, tag, contents):
26e5dd7070Spatrick  replacement = '\n.. START_%s\n\n%s\n\n.. END_%s\n' % (tag, contents, tag)
27e5dd7070Spatrick  pattern = r'\n\.\. START_%s\n.*\n\.\. END_%s\n' % (tag, tag)
28e5dd7070Spatrick  return re.sub(pattern, '%s', text, flags=re.S) % replacement
29e5dd7070Spatrick
30*12c85518Srobertdef register_plural(singular: str, plural: str):
31*12c85518Srobert  if plural not in plurals:
32*12c85518Srobert    if not hasattr(register_plural, "generated_new_plural"):
33*12c85518Srobert      print('Plural generation: you can use '
34*12c85518Srobert      f'`git checkout -- {os.path.relpath(PLURALS_FILE)}` '
35*12c85518Srobert      'to reemit warnings or `git add` to include new plurals\n')
36*12c85518Srobert    register_plural.generated_new_plural = True
37*12c85518Srobert
38*12c85518Srobert    plurals.add(plural)
39*12c85518Srobert    with open(PLURALS_FILE, 'a') as f:
40*12c85518Srobert      f.write(plural + '\n')
41*12c85518Srobert    cf = inspect.currentframe()
42*12c85518Srobert    lineno = ''
43*12c85518Srobert    if cf and cf.f_back:
44*12c85518Srobert      lineno = ':' + str(cf.f_back.f_lineno)
45*12c85518Srobert    print(f'{__file__}{lineno} check if plural of {singular} is {plural}', file=sys.stderr)
46*12c85518Srobert  return plural
47*12c85518Srobert
48*12c85518Srobertdef pluralize(word: str):
49*12c85518Srobert  lword = word.lower()
50*12c85518Srobert  if len(lword) >= 2 and lword[-1] == 'y' and lword[-2] not in 'aeiou':
51*12c85518Srobert    return register_plural(word, word[:-1] + 'ies')
52*12c85518Srobert  elif lword.endswith(('s', 'sh', 'ch', 'x', 'z')):
53*12c85518Srobert    return register_plural(word, word[:-1] + 'es')
54*12c85518Srobert  elif lword.endswith('fe'):
55*12c85518Srobert    return register_plural(word, word[:-2] + 'ves')
56*12c85518Srobert  elif lword.endswith('f') and not lword.endswith('ff'):
57*12c85518Srobert    return register_plural(word, word[:-1] + 'ves')
58*12c85518Srobert  else:
59*12c85518Srobert    return register_plural(word, word + 's')
60*12c85518Srobert
61*12c85518Srobert
62*12c85518Srobertdef to_yaml_type(typestr: str):
63*12c85518Srobert  if typestr == 'bool':
64*12c85518Srobert    return 'Boolean'
65*12c85518Srobert  elif typestr == 'int':
66*12c85518Srobert    return 'Integer'
67*12c85518Srobert  elif typestr == 'unsigned':
68*12c85518Srobert    return 'Unsigned'
69*12c85518Srobert  elif typestr == 'std::string':
70*12c85518Srobert    return 'String'
71*12c85518Srobert
72*12c85518Srobert  subtype, napplied = re.subn(r'^std::vector<(.*)>$', r'\1', typestr)
73*12c85518Srobert  if napplied == 1:
74*12c85518Srobert    return 'List of ' + pluralize(to_yaml_type(subtype))
75*12c85518Srobert
76*12c85518Srobert  return typestr
77*12c85518Srobert
78e5dd7070Spatrickdef doxygen2rst(text):
79e5dd7070Spatrick  text = re.sub(r'<tt>\s*(.*?)\s*<\/tt>', r'``\1``', text)
80e5dd7070Spatrick  text = re.sub(r'\\c ([^ ,;\.]+)', r'``\1``', text)
81e5dd7070Spatrick  text = re.sub(r'\\\w+ ', '', text)
82e5dd7070Spatrick  return text
83e5dd7070Spatrick
84e5dd7070Spatrickdef indent(text, columns, indent_first_line=True):
85*12c85518Srobert  indent_str = ' ' * columns
86*12c85518Srobert  s = re.sub(r'\n([^\n])', '\n' + indent_str + '\\1', text, flags=re.S)
87e5dd7070Spatrick  if not indent_first_line or s.startswith('\n'):
88e5dd7070Spatrick    return s
89*12c85518Srobert  return indent_str + s
90e5dd7070Spatrick
91e5dd7070Spatrickclass Option(object):
92*12c85518Srobert  def __init__(self, name, opt_type, comment, version):
93e5dd7070Spatrick    self.name = name
94*12c85518Srobert    self.type = opt_type
95e5dd7070Spatrick    self.comment = comment.strip()
96e5dd7070Spatrick    self.enum = None
97e5dd7070Spatrick    self.nested_struct = None
98*12c85518Srobert    self.version = version
99e5dd7070Spatrick
100e5dd7070Spatrick  def __str__(self):
101*12c85518Srobert    s = ".. _%s:\n\n**%s** (``%s``) " % (self.name, self.name, to_yaml_type(self.type))
102*12c85518Srobert    if self.version:
103*12c85518Srobert      s += ':versionbadge:`clang-format %s` ' % self.version
104*12c85518Srobert    s += ':ref:`¶ <%s>`\n%s' % (self.name, doxygen2rst(indent(self.comment, 2)))
105a9ac8606Spatrick    if self.enum and self.enum.values:
106e5dd7070Spatrick      s += indent('\n\nPossible values:\n\n%s\n' % self.enum, 2)
107e5dd7070Spatrick    if self.nested_struct:
108e5dd7070Spatrick      s += indent('\n\nNested configuration flags:\n\n%s\n' %self.nested_struct,
109e5dd7070Spatrick                  2)
110e5dd7070Spatrick    return s
111e5dd7070Spatrick
112e5dd7070Spatrickclass NestedStruct(object):
113e5dd7070Spatrick  def __init__(self, name, comment):
114e5dd7070Spatrick    self.name = name
115e5dd7070Spatrick    self.comment = comment.strip()
116e5dd7070Spatrick    self.values = []
117e5dd7070Spatrick
118e5dd7070Spatrick  def __str__(self):
119*12c85518Srobert    return self.comment + '\n' + '\n'.join(map(str, self.values))
120e5dd7070Spatrick
121e5dd7070Spatrickclass NestedField(object):
122e5dd7070Spatrick  def __init__(self, name, comment):
123e5dd7070Spatrick    self.name = name
124e5dd7070Spatrick    self.comment = comment.strip()
125e5dd7070Spatrick
126e5dd7070Spatrick  def __str__(self):
127e5dd7070Spatrick    return '\n* ``%s`` %s' % (
128e5dd7070Spatrick        self.name,
129e5dd7070Spatrick        doxygen2rst(indent(self.comment, 2, indent_first_line=False)))
130e5dd7070Spatrick
131e5dd7070Spatrickclass Enum(object):
132e5dd7070Spatrick  def __init__(self, name, comment):
133e5dd7070Spatrick    self.name = name
134e5dd7070Spatrick    self.comment = comment.strip()
135e5dd7070Spatrick    self.values = []
136e5dd7070Spatrick
137e5dd7070Spatrick  def __str__(self):
138e5dd7070Spatrick    return '\n'.join(map(str, self.values))
139e5dd7070Spatrick
140e5dd7070Spatrickclass NestedEnum(object):
141e5dd7070Spatrick  def __init__(self, name, enumtype, comment, values):
142e5dd7070Spatrick    self.name = name
143e5dd7070Spatrick    self.comment = comment
144e5dd7070Spatrick    self.values = values
145e5dd7070Spatrick    self.type = enumtype
146e5dd7070Spatrick
147e5dd7070Spatrick  def __str__(self):
148*12c85518Srobert    s = '\n* ``%s %s``\n%s' % (to_yaml_type(self.type), self.name,
149e5dd7070Spatrick                                 doxygen2rst(indent(self.comment, 2)))
150e5dd7070Spatrick    s += indent('\nPossible values:\n\n', 2)
151e5dd7070Spatrick    s += indent('\n'.join(map(str, self.values)), 2)
152*12c85518Srobert    return s
153e5dd7070Spatrick
154e5dd7070Spatrickclass EnumValue(object):
155e5dd7070Spatrick  def __init__(self, name, comment, config):
156e5dd7070Spatrick    self.name = name
157e5dd7070Spatrick    self.comment = comment
158e5dd7070Spatrick    self.config = config
159e5dd7070Spatrick
160e5dd7070Spatrick  def __str__(self):
161e5dd7070Spatrick    return '* ``%s`` (in configuration: ``%s``)\n%s' % (
162e5dd7070Spatrick        self.name,
163e5dd7070Spatrick        re.sub('.*_', '', self.config),
164e5dd7070Spatrick        doxygen2rst(indent(self.comment, 2)))
165e5dd7070Spatrick
166*12c85518Srobert
167*12c85518Srobertclass OptionsReader:
168*12c85518Srobert  def __init__(self, header: TextIOWrapper):
169*12c85518Srobert    self.header = header
170*12c85518Srobert    self.in_code_block = False
171*12c85518Srobert    self.code_indent = 0
172*12c85518Srobert    self.lineno = 0
173*12c85518Srobert    self.last_err_lineno = -1
174*12c85518Srobert
175*12c85518Srobert  def __file_path(self):
176*12c85518Srobert    return os.path.relpath(self.header.name)
177*12c85518Srobert
178*12c85518Srobert  def __print_line(self, line: str):
179*12c85518Srobert    print(f'{self.lineno:>6} | {line}', file=sys.stderr)
180*12c85518Srobert
181*12c85518Srobert  def __warning(self, msg: str, line: str):
182*12c85518Srobert    print(f'{self.__file_path()}:{self.lineno}: warning: {msg}:', file=sys.stderr)
183*12c85518Srobert    self.__print_line(line)
184*12c85518Srobert
185*12c85518Srobert  def __clean_comment_line(self, line: str):
186a9ac8606Spatrick    match = re.match(r'^/// (?P<indent> +)?\\code(\{.(?P<lang>\w+)\})?$', line)
187e5dd7070Spatrick    if match:
188*12c85518Srobert      if self.in_code_block:
189*12c85518Srobert        self.__warning('`\\code` in another `\\code`', line)
190*12c85518Srobert      self.in_code_block = True
191*12c85518Srobert      indent_str = match.group('indent')
192*12c85518Srobert      if not indent_str:
193*12c85518Srobert        indent_str = ''
194*12c85518Srobert      self.code_indent = len(indent_str)
195a9ac8606Spatrick      lang = match.group('lang')
196e5dd7070Spatrick      if not lang:
197e5dd7070Spatrick        lang = 'c++'
198*12c85518Srobert      return f'\n{indent_str}.. code-block:: {lang}\n\n'
199a9ac8606Spatrick
200a9ac8606Spatrick    endcode_match = re.match(r'^/// +\\endcode$', line)
201a9ac8606Spatrick    if endcode_match:
202*12c85518Srobert      if not self.in_code_block:
203*12c85518Srobert        self.__warning('no correct `\\code` found before this `\\endcode`', line)
204*12c85518Srobert      self.in_code_block = False
205*12c85518Srobert      return ''
206*12c85518Srobert
207*12c85518Srobert    # check code block indentation
208*12c85518Srobert    if (self.in_code_block and not line == '///' and not
209*12c85518Srobert        line.startswith('///  ' + ' ' * self.code_indent)):
210*12c85518Srobert      if self.last_err_lineno == self.lineno - 1:
211*12c85518Srobert        self.__print_line(line)
212*12c85518Srobert      else:
213*12c85518Srobert        self.__warning('code block should be indented', line)
214*12c85518Srobert      self.last_err_lineno = self.lineno
215*12c85518Srobert
216*12c85518Srobert    match = re.match(r'^/// \\warning$', line)
217*12c85518Srobert    if match:
218*12c85518Srobert      return '\n.. warning:: \n\n'
219*12c85518Srobert
220*12c85518Srobert    endwarning_match = re.match(r'^/// +\\endwarning$', line)
221*12c85518Srobert    if endwarning_match:
222e5dd7070Spatrick      return ''
223e5dd7070Spatrick    return line[4:] + '\n'
224e5dd7070Spatrick
225*12c85518Srobert  def read_options(self):
226*12c85518Srobert    class State:
227*12c85518Srobert      BeforeStruct, Finished, InStruct, InNestedStruct, InNestedFieldComment, \
228e5dd7070Spatrick        InFieldComment, InEnum, InEnumMemberComment = range(8)
229e5dd7070Spatrick    state = State.BeforeStruct
230e5dd7070Spatrick
231e5dd7070Spatrick    options = []
232e5dd7070Spatrick    enums = {}
233e5dd7070Spatrick    nested_structs = {}
234e5dd7070Spatrick    comment = ''
235e5dd7070Spatrick    enum = None
236e5dd7070Spatrick    nested_struct = None
237*12c85518Srobert    version = None
238e5dd7070Spatrick
239*12c85518Srobert    for line in self.header:
240*12c85518Srobert      self.lineno += 1
241e5dd7070Spatrick      line = line.strip()
242e5dd7070Spatrick      if state == State.BeforeStruct:
243*12c85518Srobert        if line in ('struct FormatStyle {', 'struct IncludeStyle {'):
244e5dd7070Spatrick          state = State.InStruct
245e5dd7070Spatrick      elif state == State.InStruct:
246e5dd7070Spatrick        if line.startswith('///'):
247e5dd7070Spatrick          state = State.InFieldComment
248*12c85518Srobert          comment = self.__clean_comment_line(line)
249e5dd7070Spatrick        elif line == '};':
250e5dd7070Spatrick          state = State.Finished
251e5dd7070Spatrick          break
252e5dd7070Spatrick      elif state == State.InFieldComment:
253*12c85518Srobert        if line.startswith(r'/// \version'):
254*12c85518Srobert          match = re.match(r'/// \\version\s*(?P<version>[0-9.]+)*', line)
255*12c85518Srobert          if match:
256*12c85518Srobert            version = match.group('version')
257*12c85518Srobert        elif line.startswith('///'):
258*12c85518Srobert          comment += self.__clean_comment_line(line)
259e5dd7070Spatrick        elif line.startswith('enum'):
260e5dd7070Spatrick          state = State.InEnum
261a9ac8606Spatrick          name = re.sub(r'enum\s+(\w+)\s*(:((\s*\w+)+)\s*)?\{', '\\1', line)
262e5dd7070Spatrick          enum = Enum(name, comment)
263e5dd7070Spatrick        elif line.startswith('struct'):
264e5dd7070Spatrick          state = State.InNestedStruct
265e5dd7070Spatrick          name = re.sub(r'struct\s+(\w+)\s*\{', '\\1', line)
266e5dd7070Spatrick          nested_struct = NestedStruct(name, comment)
267e5dd7070Spatrick        elif line.endswith(';'):
268*12c85518Srobert          prefix = '// '
269*12c85518Srobert          if line.startswith(prefix):
270*12c85518Srobert            line = line[len(prefix):]
271e5dd7070Spatrick          state = State.InStruct
272e5dd7070Spatrick          field_type, field_name = re.match(r'([<>:\w(,\s)]+)\s+(\w+);',
273e5dd7070Spatrick                                            line).groups()
274*12c85518Srobert
275*12c85518Srobert          if not version:
276*12c85518Srobert            self.__warning(f'missing version for {field_name}', line)
277*12c85518Srobert          option = Option(str(field_name), str(field_type), comment, version)
278e5dd7070Spatrick          options.append(option)
279*12c85518Srobert          version = None
280e5dd7070Spatrick        else:
281*12c85518Srobert          raise Exception('Invalid format, expected comment, field or enum\n' + line)
282e5dd7070Spatrick      elif state == State.InNestedStruct:
283e5dd7070Spatrick        if line.startswith('///'):
284*12c85518Srobert          state = State.InNestedFieldComment
285*12c85518Srobert          comment = self.__clean_comment_line(line)
286e5dd7070Spatrick        elif line == '};':
287e5dd7070Spatrick          state = State.InStruct
288e5dd7070Spatrick          nested_structs[nested_struct.name] = nested_struct
289*12c85518Srobert      elif state == State.InNestedFieldComment:
290e5dd7070Spatrick        if line.startswith('///'):
291*12c85518Srobert          comment += self.__clean_comment_line(line)
292e5dd7070Spatrick        else:
293e5dd7070Spatrick          state = State.InNestedStruct
294e5dd7070Spatrick          field_type, field_name = re.match(r'([<>:\w(,\s)]+)\s+(\w+);', line).groups()
295e5dd7070Spatrick          if field_type in enums:
296*12c85518Srobert            nested_struct.values.append(NestedEnum(field_name,
297*12c85518Srobert                                                   field_type,
298*12c85518Srobert                                                   comment,
299*12c85518Srobert                                                   enums[field_type].values))
300e5dd7070Spatrick          else:
301e5dd7070Spatrick            nested_struct.values.append(NestedField(field_type + " " + field_name, comment))
302e5dd7070Spatrick
303e5dd7070Spatrick      elif state == State.InEnum:
304e5dd7070Spatrick        if line.startswith('///'):
305e5dd7070Spatrick          state = State.InEnumMemberComment
306*12c85518Srobert          comment = self.__clean_comment_line(line)
307e5dd7070Spatrick        elif line == '};':
308e5dd7070Spatrick          state = State.InStruct
309e5dd7070Spatrick          enums[enum.name] = enum
310e5dd7070Spatrick        else:
311a9ac8606Spatrick          # Enum member without documentation. Must be documented where the enum
312a9ac8606Spatrick          # is used.
313a9ac8606Spatrick          pass
314e5dd7070Spatrick      elif state == State.InEnumMemberComment:
315e5dd7070Spatrick        if line.startswith('///'):
316*12c85518Srobert          comment += self.__clean_comment_line(line)
317e5dd7070Spatrick        else:
318e5dd7070Spatrick          state = State.InEnum
319e5dd7070Spatrick          val = line.replace(',', '')
320e5dd7070Spatrick          pos = val.find(" // ")
321*12c85518Srobert          if pos != -1:
322e5dd7070Spatrick            config = val[pos + 4:]
323e5dd7070Spatrick            val = val[:pos]
324e5dd7070Spatrick          else:
325*12c85518Srobert            config = val
326e5dd7070Spatrick          enum.values.append(EnumValue(val, comment, config))
327e5dd7070Spatrick    if state != State.Finished:
328e5dd7070Spatrick      raise Exception('Not finished by the end of file')
329e5dd7070Spatrick
330e5dd7070Spatrick    for option in options:
331*12c85518Srobert      if option.type not in ['bool', 'unsigned', 'int', 'std::string',
332e5dd7070Spatrick                             'std::vector<std::string>',
333e5dd7070Spatrick                             'std::vector<IncludeCategory>',
334e5dd7070Spatrick                             'std::vector<RawStringFormat>']:
335e5dd7070Spatrick        if option.type in enums:
336e5dd7070Spatrick          option.enum = enums[option.type]
337e5dd7070Spatrick        elif option.type in nested_structs:
338e5dd7070Spatrick          option.nested_struct = nested_structs[option.type]
339e5dd7070Spatrick        else:
340e5dd7070Spatrick          raise Exception('Unknown type: %s' % option.type)
341e5dd7070Spatrick    return options
342e5dd7070Spatrick
343e5dd7070Spatrick
344*12c85518Srobertwith open(FORMAT_STYLE_FILE) as f:
345*12c85518Srobert  opts = OptionsReader(f).read_options()
346*12c85518Srobertwith open(INCLUDE_STYLE_FILE) as f:
347*12c85518Srobert  opts += OptionsReader(f).read_options()
348e5dd7070Spatrick
349*12c85518Srobertopts = sorted(opts, key=lambda x: x.name)
350*12c85518Srobertoptions_text = '\n\n'.join(map(str, opts))
351*12c85518Srobert
352*12c85518Srobertwith open(DOC_FILE) as f:
353*12c85518Srobert  contents = f.read()
354e5dd7070Spatrick
355e5dd7070Spatrickcontents = substitute(contents, 'FORMAT_STYLE_OPTIONS', options_text)
356e5dd7070Spatrick
357e5dd7070Spatrickwith open(DOC_FILE, 'wb') as output:
358ec727ea7Spatrick  output.write(contents.encode())
359