xref: /llvm-project/clang/docs/tools/dump_format_style.py (revision 23a5090c6ac7a207f49a11bf868a7c3fae2aeea2)
1#!/usr/bin/env python3
2# A tool to parse the FormatStyle struct from Format.h and update the
3# documentation in ../ClangFormatStyleOptions.rst automatically.
4# Run from the directory in which this file is located to update the docs.
5
6import inspect
7import os
8import re
9from typing import Set
10
11CLANG_DIR = os.path.join(os.path.dirname(__file__), '../..')
12FORMAT_STYLE_FILE = os.path.join(CLANG_DIR, 'include/clang/Format/Format.h')
13INCLUDE_STYLE_FILE = os.path.join(CLANG_DIR, 'include/clang/Tooling/Inclusions/IncludeStyle.h')
14DOC_FILE = os.path.join(CLANG_DIR, 'docs/ClangFormatStyleOptions.rst')
15
16PLURALS_FILE = os.path.join(os.path.dirname(__file__), 'plurals.txt')
17
18plurals: Set[str] = set()
19with open(PLURALS_FILE, 'a+') as f:
20  f.seek(0)
21  plurals = set(f.read().splitlines())
22
23def substitute(text, tag, contents):
24  replacement = '\n.. START_%s\n\n%s\n\n.. END_%s\n' % (tag, contents, tag)
25  pattern = r'\n\.\. START_%s\n.*\n\.\. END_%s\n' % (tag, tag)
26  return re.sub(pattern, '%s', text, flags=re.S) % replacement
27
28def register_plural(singular: str, plural: str):
29  if plural not in plurals:
30    if not hasattr(register_plural, "generated_new_plural"):
31      print('Plural generation: you can use '
32      f'`git checkout -- {os.path.relpath(PLURALS_FILE)}` '
33      'to reemit warnings or `git add` to include new plurals\n')
34    register_plural.generated_new_plural = True
35
36    plurals.add(plural)
37    with open(PLURALS_FILE, 'a') as f:
38      f.write(plural + '\n')
39    cf = inspect.currentframe()
40    lineno = ''
41    if cf and cf.f_back:
42      lineno = ':' + str(cf.f_back.f_lineno)
43    print(f'{__file__}{lineno} check if plural of {singular} is {plural}', file=os.sys.stderr)
44  return plural
45
46def pluralize(word: str):
47  lword = word.lower()
48  if len(lword) >= 2 and lword[-1] == 'y' and lword[-2] not in 'aeiou':
49    return register_plural(word, word[:-1] + 'ies')
50  elif lword.endswith(('s', 'sh', 'ch', 'x', 'z')):
51    return register_plural(word, word[:-1] + 'es')
52  elif lword.endswith('fe'):
53    return register_plural(word, word[:-2] + 'ves')
54  elif lword.endswith('f') and not lword.endswith('ff'):
55    return register_plural(word, word[:-1] + 'ves')
56  else:
57    return register_plural(word, word + 's')
58
59
60def to_yaml_type(typestr: str):
61  if typestr == 'bool':
62    return 'Boolean'
63  elif typestr == 'int':
64    return 'Integer'
65  elif typestr == 'unsigned':
66    return 'Unsigned'
67  elif typestr == 'std::string':
68    return 'String'
69
70  subtype, napplied = re.subn(r'^std::vector<(.*)>$', r'\1', typestr)
71  if napplied == 1:
72    return 'List of ' + pluralize(to_yaml_type(subtype))
73
74  return typestr
75
76def doxygen2rst(text):
77  text = re.sub(r'<tt>\s*(.*?)\s*<\/tt>', r'``\1``', text)
78  text = re.sub(r'\\c ([^ ,;\.]+)', r'``\1``', text)
79  text = re.sub(r'\\\w+ ', '', text)
80  return text
81
82def indent(text, columns, indent_first_line=True):
83  indent = ' ' * columns
84  s = re.sub(r'\n([^\n])', '\n' + indent + '\\1', text, flags=re.S)
85  if not indent_first_line or s.startswith('\n'):
86    return s
87  return indent + s
88
89class Option(object):
90  def __init__(self, name, type, comment, version):
91    self.name = name
92    self.type = type
93    self.comment = comment.strip()
94    self.enum = None
95    self.nested_struct = None
96    self.version = version
97
98  def __str__(self):
99    if self.version:
100      s = '**%s** (``%s``) :versionbadge:`clang-format %s`\n%s' % (self.name, to_yaml_type(self.type), self.version,
101                                 doxygen2rst(indent(self.comment, 2)))
102    else:
103      s = '**%s** (``%s``)\n%s' % (self.name, to_yaml_type(self.type),
104                                 doxygen2rst(indent(self.comment, 2)))
105    if self.enum and self.enum.values:
106      s += indent('\n\nPossible values:\n\n%s\n' % self.enum, 2)
107    if self.nested_struct:
108      s += indent('\n\nNested configuration flags:\n\n%s\n' %self.nested_struct,
109                  2)
110    return s
111
112class NestedStruct(object):
113  def __init__(self, name, comment):
114    self.name = name
115    self.comment = comment.strip()
116    self.values = []
117
118  def __str__(self):
119    return '\n'.join(map(str, self.values))
120
121class NestedField(object):
122  def __init__(self, name, comment):
123    self.name = name
124    self.comment = comment.strip()
125
126  def __str__(self):
127    return '\n* ``%s`` %s' % (
128        self.name,
129        doxygen2rst(indent(self.comment, 2, indent_first_line=False)))
130
131class Enum(object):
132  def __init__(self, name, comment):
133    self.name = name
134    self.comment = comment.strip()
135    self.values = []
136
137  def __str__(self):
138    return '\n'.join(map(str, self.values))
139
140class NestedEnum(object):
141  def __init__(self, name, enumtype, comment, values):
142    self.name = name
143    self.comment = comment
144    self.values = values
145    self.type = enumtype
146
147  def __str__(self):
148    s = '\n* ``%s %s``\n%s' % (to_yaml_type(self.type), self.name,
149                                 doxygen2rst(indent(self.comment, 2)))
150    s += indent('\nPossible values:\n\n', 2)
151    s += indent('\n'.join(map(str, self.values)),2)
152    return s;
153
154class EnumValue(object):
155  def __init__(self, name, comment, config):
156    self.name = name
157    self.comment = comment
158    self.config = config
159
160  def __str__(self):
161    return '* ``%s`` (in configuration: ``%s``)\n%s' % (
162        self.name,
163        re.sub('.*_', '', self.config),
164        doxygen2rst(indent(self.comment, 2)))
165
166def clean_comment_line(line):
167  match = re.match(r'^/// (?P<indent> +)?\\code(\{.(?P<lang>\w+)\})?$', line)
168  if match:
169    indent = match.group('indent')
170    if not indent:
171      indent = ''
172    lang = match.group('lang')
173    if not lang:
174      lang = 'c++'
175    return '\n%s.. code-block:: %s\n\n' % (indent, lang)
176
177  endcode_match = re.match(r'^/// +\\endcode$', line)
178  if endcode_match:
179    return ''
180
181  match = re.match(r'^/// \\warning$', line)
182  if match:
183    return '\n.. warning:: \n\n'
184
185  endwarning_match = re.match(r'^/// +\\endwarning$', line)
186  if endwarning_match:
187    return ''
188  return line[4:] + '\n'
189
190def read_options(header):
191  class State(object):
192    BeforeStruct, Finished, InStruct, InNestedStruct, InNestedFieldComment, \
193    InFieldComment, InEnum, InEnumMemberComment = range(8)
194  state = State.BeforeStruct
195
196  options = []
197  enums = {}
198  nested_structs = {}
199  comment = ''
200  enum = None
201  nested_struct = None
202  version = None
203
204  for line in header:
205    line = line.strip()
206    if state == State.BeforeStruct:
207      if line == 'struct FormatStyle {' or line == 'struct IncludeStyle {':
208        state = State.InStruct
209    elif state == State.InStruct:
210      if line.startswith('///'):
211        state = State.InFieldComment
212        comment = clean_comment_line(line)
213      elif line == '};':
214        state = State.Finished
215        break
216    elif state == State.InFieldComment:
217      if line.startswith(r'/// \version'):
218        match = re.match(r'/// \\version\s*(?P<version>[0-9.]+)*',line)
219        if match:
220            version = match.group('version')
221      elif line.startswith('///'):
222        comment += clean_comment_line(line)
223      elif line.startswith('enum'):
224        state = State.InEnum
225        name = re.sub(r'enum\s+(\w+)\s*(:((\s*\w+)+)\s*)?\{', '\\1', line)
226        enum = Enum(name, comment)
227      elif line.startswith('struct'):
228        state = State.InNestedStruct
229        name = re.sub(r'struct\s+(\w+)\s*\{', '\\1', line)
230        nested_struct = NestedStruct(name, comment)
231      elif line.endswith(';'):
232        state = State.InStruct
233        field_type, field_name = re.match(r'([<>:\w(,\s)]+)\s+(\w+);',
234                                          line).groups()
235
236        if not version:
237            print('Warning missing version for ', field_name)
238        option = Option(str(field_name), str(field_type), comment, version)
239        options.append(option)
240        version=None
241      else:
242        raise Exception('Invalid format, expected comment, field or enum\n'+line)
243    elif state == State.InNestedStruct:
244      if line.startswith('///'):
245        state = State.InNestedFieldComment
246        comment = clean_comment_line(line)
247      elif line == '};':
248        state = State.InStruct
249        nested_structs[nested_struct.name] = nested_struct
250    elif state == State.InNestedFieldComment:
251      if line.startswith('///'):
252        comment += clean_comment_line(line)
253      else:
254        state = State.InNestedStruct
255        field_type, field_name = re.match(r'([<>:\w(,\s)]+)\s+(\w+);',line).groups()
256        if field_type in enums:
257            nested_struct.values.append(NestedEnum(field_name,field_type,comment,enums[field_type].values))
258        else:
259            nested_struct.values.append(NestedField(field_type + " " + field_name, comment))
260
261    elif state == State.InEnum:
262      if line.startswith('///'):
263        state = State.InEnumMemberComment
264        comment = clean_comment_line(line)
265      elif line == '};':
266        state = State.InStruct
267        enums[enum.name] = enum
268      else:
269        # Enum member without documentation. Must be documented where the enum
270        # is used.
271        pass
272    elif state == State.InEnumMemberComment:
273      if line.startswith('///'):
274        comment += clean_comment_line(line)
275      else:
276        state = State.InEnum
277        val = line.replace(',', '')
278        pos = val.find(" // ")
279        if (pos != -1):
280            config = val[pos+4:]
281            val = val[:pos]
282        else:
283            config = val;
284        enum.values.append(EnumValue(val, comment,config))
285  if state != State.Finished:
286    raise Exception('Not finished by the end of file')
287
288  for option in options:
289    if not option.type in ['bool', 'unsigned', 'int', 'std::string',
290                           'std::vector<std::string>',
291                           'std::vector<IncludeCategory>',
292                           'std::vector<RawStringFormat>']:
293      if option.type in enums:
294        option.enum = enums[option.type]
295      elif option.type in nested_structs:
296        option.nested_struct = nested_structs[option.type]
297      else:
298        raise Exception('Unknown type: %s' % option.type)
299  return options
300
301options = read_options(open(FORMAT_STYLE_FILE))
302options += read_options(open(INCLUDE_STYLE_FILE))
303
304options = sorted(options, key=lambda x: x.name)
305options_text = '\n\n'.join(map(str, options))
306
307contents = open(DOC_FILE).read()
308
309contents = substitute(contents, 'FORMAT_STYLE_OPTIONS', options_text)
310
311with open(DOC_FILE, 'wb') as output:
312  output.write(contents.encode())
313