xref: /llvm-project/clang/docs/tools/dump_format_style.py (revision ea44647a0b49de826191eeb6e05020262b5a81e9)
1#!/usr/bin/env python3
2# A tool to parse the FormatStyle struct from Format.h and update the
3# documentation in ../ClangFormatStyleOptions.rst automatically.
4# Run from the directory in which this file is located to update the docs.
5
6import argparse
7import inspect
8import os
9import re
10import sys
11from io import TextIOWrapper
12from typing import Set
13
14CLANG_DIR = os.path.join(os.path.dirname(__file__), "../..")
15FORMAT_STYLE_FILE = os.path.join(CLANG_DIR, "include/clang/Format/Format.h")
16INCLUDE_STYLE_FILE = os.path.join(
17    CLANG_DIR, "include/clang/Tooling/Inclusions/IncludeStyle.h"
18)
19DOC_FILE = os.path.join(CLANG_DIR, "docs/ClangFormatStyleOptions.rst")
20
21PLURALS_FILE = os.path.join(os.path.dirname(__file__), "plurals.txt")
22
23plurals: Set[str] = set()
24with open(PLURALS_FILE) as f:
25    f.seek(0)
26    plurals = set(f.read().splitlines())
27
28
29def substitute(text, tag, contents):
30    replacement = "\n.. START_%s\n\n%s\n\n.. END_%s\n" % (tag, contents, tag)
31    pattern = r"\n\.\. START_%s\n.*\n\.\. END_%s\n" % (tag, tag)
32    return re.sub(pattern, "%s", text, flags=re.S) % replacement
33
34
35def register_plural(singular: str, plural: str):
36    if plural not in plurals:
37        if not hasattr(register_plural, "generated_new_plural"):
38            print(
39                "Plural generation: you can use "
40                f"`git checkout -- {os.path.relpath(PLURALS_FILE)}` "
41                "to reemit warnings or `git add` to include new plurals\n"
42            )
43        register_plural.generated_new_plural = True
44
45        plurals.add(plural)
46        with open(PLURALS_FILE, "a") as f:
47            f.write(plural + "\n")
48        cf = inspect.currentframe()
49        lineno = ""
50        if cf and cf.f_back:
51            lineno = ":" + str(cf.f_back.f_lineno)
52        print(
53            f"{__file__}{lineno} check if plural of {singular} is {plural}",
54            file=sys.stderr,
55        )
56    return plural
57
58
59def pluralize(word: str):
60    lword = word.lower()
61    if len(lword) >= 2 and lword[-1] == "y" and lword[-2] not in "aeiou":
62        return register_plural(word, word[:-1] + "ies")
63    elif lword.endswith(("s", "sh", "ch", "x", "z")):
64        return register_plural(word, word[:-1] + "es")
65    elif lword.endswith("fe"):
66        return register_plural(word, word[:-2] + "ves")
67    elif lword.endswith("f") and not lword.endswith("ff"):
68        return register_plural(word, word[:-1] + "ves")
69    else:
70        return register_plural(word, word + "s")
71
72
73def to_yaml_type(typestr: str):
74    if typestr == "bool":
75        return "Boolean"
76    elif typestr == "int":
77        return "Integer"
78    elif typestr == "unsigned":
79        return "Unsigned"
80    elif typestr == "std::string":
81        return "String"
82
83    match = re.match(r"std::vector<(.*)>$", typestr)
84    if match:
85        return "List of " + pluralize(to_yaml_type(match.group(1)))
86
87    match = re.match(r"std::optional<(.*)>$", typestr)
88    if match:
89        return to_yaml_type(match.group(1))
90
91    return typestr
92
93
94def doxygen2rst(text):
95    text = re.sub(r"<tt>\s*(.*?)\s*<\/tt>", r"``\1``", text)
96    text = re.sub(r"\\c ([^ ,;\.]+)", r"``\1``", text)
97    text = re.sub(r"\\\w+ ", "", text)
98    return text
99
100
101def indent(text, columns, indent_first_line=True):
102    indent_str = " " * columns
103    s = re.sub(r"\n([^\n])", "\n" + indent_str + "\\1", text, flags=re.S)
104    if not indent_first_line or s.startswith("\n"):
105        return s
106    return indent_str + s
107
108
109class Option(object):
110    def __init__(self, name, opt_type, comment, version):
111        self.name = name
112        self.type = opt_type
113        self.comment = comment.strip()
114        self.enum = None
115        self.nested_struct = None
116        self.version = version
117
118    def __str__(self):
119        s = ".. _%s:\n\n**%s** (``%s``) " % (
120            self.name,
121            self.name,
122            to_yaml_type(self.type),
123        )
124        if self.version:
125            s += ":versionbadge:`clang-format %s` " % self.version
126        s += ":ref:`¶ <%s>`\n%s" % (self.name, doxygen2rst(indent(self.comment, 2)))
127        if self.enum and self.enum.values:
128            s += indent("\n\nPossible values:\n\n%s\n" % self.enum, 2)
129        if self.nested_struct:
130            s += indent(
131                "\n\nNested configuration flags:\n\n%s\n" % self.nested_struct, 2
132            )
133            s = s.replace("<option-name>", self.name)
134        return s
135
136
137class NestedStruct(object):
138    def __init__(self, name, comment):
139        self.name = name
140        self.comment = comment.strip()
141        self.values = []
142
143    def __str__(self):
144        return self.comment + "\n" + "\n".join(map(str, self.values))
145
146
147class NestedField(object):
148    def __init__(self, name, comment, version):
149        self.name = name
150        self.comment = comment.strip()
151        self.version = version
152
153    def __str__(self):
154        if self.version:
155            return "\n* ``%s`` :versionbadge:`clang-format %s`\n%s" % (
156                self.name,
157                self.version,
158                doxygen2rst(indent(self.comment, 2, indent_first_line=False)),
159            )
160        return "\n* ``%s`` %s" % (
161            self.name,
162            doxygen2rst(indent(self.comment, 2, indent_first_line=False)),
163        )
164
165
166class Enum(object):
167    def __init__(self, name, comment):
168        self.name = name
169        self.comment = comment.strip()
170        self.values = []
171
172    def __str__(self):
173        return "\n".join(map(str, self.values))
174
175
176class NestedEnum(object):
177    def __init__(self, name, enumtype, comment, version, values):
178        self.name = name
179        self.comment = comment
180        self.values = values
181        self.type = enumtype
182        self.version = version
183
184    def __str__(self):
185        s = ""
186        if self.version:
187            s = "\n* ``%s %s`` :versionbadge:`clang-format %s`\n\n%s" % (
188                to_yaml_type(self.type),
189                self.name,
190                self.version,
191                doxygen2rst(indent(self.comment, 2)),
192            )
193        else:
194            s = "\n* ``%s %s``\n%s" % (
195                to_yaml_type(self.type),
196                self.name,
197                doxygen2rst(indent(self.comment, 2)),
198            )
199        s += indent("\nPossible values:\n\n", 2)
200        s += indent("\n".join(map(str, self.values)), 2)
201        return s
202
203
204class EnumValue(object):
205    def __init__(self, name, comment, config):
206        self.name = name
207        self.comment = comment
208        self.config = config
209
210    def __str__(self):
211        return "* ``%s`` (in configuration: ``%s``)\n%s" % (
212            self.name,
213            re.sub(".*_", "", self.config),
214            doxygen2rst(indent(self.comment, 2)),
215        )
216
217
218class OptionsReader:
219    def __init__(self, header: TextIOWrapper):
220        self.header = header
221        self.in_code_block = False
222        self.code_indent = 0
223        self.lineno = 0
224        self.last_err_lineno = -1
225
226    def __file_path(self):
227        return os.path.relpath(self.header.name)
228
229    def __print_line(self, line: str):
230        print(f"{self.lineno:>6} | {line}", file=sys.stderr)
231
232    def __warning(self, msg: str, line: str):
233        print(f"{self.__file_path()}:{self.lineno}: warning: {msg}:", file=sys.stderr)
234        self.__print_line(line)
235
236    def __clean_comment_line(self, line: str):
237        match = re.match(r"^/// (?P<indent> +)?\\code(\{.(?P<lang>\w+)\})?$", line)
238        if match:
239            if self.in_code_block:
240                self.__warning("`\\code` in another `\\code`", line)
241            self.in_code_block = True
242            indent_str = match.group("indent")
243            if not indent_str:
244                indent_str = ""
245            self.code_indent = len(indent_str)
246            lang = match.group("lang")
247            if not lang:
248                lang = "c++"
249            return f"\n{indent_str}.. code-block:: {lang}\n\n"
250
251        endcode_match = re.match(r"^/// +\\endcode$", line)
252        if endcode_match:
253            if not self.in_code_block:
254                self.__warning(
255                    "no correct `\\code` found before this `\\endcode`", line
256                )
257            self.in_code_block = False
258            return ""
259
260        # check code block indentation
261        if (
262            self.in_code_block
263            and not line == "///"
264            and not line.startswith("///  " + " " * self.code_indent)
265        ):
266            if self.last_err_lineno == self.lineno - 1:
267                self.__print_line(line)
268            else:
269                self.__warning("code block should be indented", line)
270            self.last_err_lineno = self.lineno
271
272        match = re.match(r"^/// \\warning$", line)
273        if match:
274            return "\n.. warning::\n\n"
275
276        endwarning_match = re.match(r"^/// +\\endwarning$", line)
277        if endwarning_match:
278            return ""
279
280        match = re.match(r"^/// \\note$", line)
281        if match:
282            return "\n.. note::\n\n"
283
284        endnote_match = re.match(r"^/// +\\endnote$", line)
285        if endnote_match:
286            return ""
287        return line[4:] + "\n"
288
289    def read_options(self):
290        class State:
291            (
292                BeforeStruct,
293                Finished,
294                InStruct,
295                InNestedStruct,
296                InNestedFieldComment,
297                InFieldComment,
298                InEnum,
299                InEnumMemberComment,
300                InNestedEnum,
301                InNestedEnumMemberComment,
302            ) = range(10)
303
304        state = State.BeforeStruct
305
306        options = []
307        enums = {}
308        nested_structs = {}
309        comment = ""
310        enum = None
311        nested_struct = None
312        version = None
313        deprecated = False
314
315        for line in self.header:
316            self.lineno += 1
317            line = line.strip()
318            if state == State.BeforeStruct:
319                if line in ("struct FormatStyle {", "struct IncludeStyle {"):
320                    state = State.InStruct
321            elif state == State.InStruct:
322                if line.startswith("///"):
323                    state = State.InFieldComment
324                    comment = self.__clean_comment_line(line)
325                elif line == "};":
326                    state = State.Finished
327                    break
328            elif state == State.InFieldComment:
329                if line.startswith(r"/// \version"):
330                    match = re.match(r"/// \\version\s*(?P<version>[0-9.]+)*", line)
331                    if match:
332                        version = match.group("version")
333                elif line.startswith("/// @deprecated"):
334                    deprecated = True
335                elif line.startswith("///"):
336                    comment += self.__clean_comment_line(line)
337                elif line.startswith("enum"):
338                    state = State.InEnum
339                    name = re.sub(r"enum\s+(\w+)\s*(:((\s*\w+)+)\s*)?\{", "\\1", line)
340                    enum = Enum(name, comment)
341                elif line.startswith("struct"):
342                    state = State.InNestedStruct
343                    name = re.sub(r"struct\s+(\w+)\s*\{", "\\1", line)
344                    nested_struct = NestedStruct(name, comment)
345                elif line.endswith(";"):
346                    prefix = "// "
347                    if line.startswith(prefix):
348                        line = line[len(prefix) :]
349                    state = State.InStruct
350                    field_type, field_name = re.match(
351                        r"([<>:\w(,\s)]+)\s+(\w+);", line
352                    ).groups()
353                    if deprecated:
354                        field_type = "deprecated"
355                        deprecated = False
356
357                    if not version:
358                        self.__warning(f"missing version for {field_name}", line)
359                    option = Option(str(field_name), str(field_type), comment, version)
360                    options.append(option)
361                    version = None
362                else:
363                    raise Exception(
364                        "Invalid format, expected comment, field or enum\n" + line
365                    )
366            elif state == State.InNestedStruct:
367                if line.startswith("///"):
368                    state = State.InNestedFieldComment
369                    comment = self.__clean_comment_line(line)
370                elif line == "};":
371                    state = State.InStruct
372                    nested_structs[nested_struct.name] = nested_struct
373            elif state == State.InNestedFieldComment:
374                if line.startswith(r"/// \version"):
375                    match = re.match(r"/// \\version\s*(?P<version>[0-9.]+)*", line)
376                    if match:
377                        version = match.group("version")
378                elif line.startswith("///"):
379                    comment += self.__clean_comment_line(line)
380                elif line.startswith("enum"):
381                    state = State.InNestedEnum
382                    name = re.sub(r"enum\s+(\w+)\s*(:((\s*\w+)+)\s*)?\{", "\\1", line)
383                    enum = Enum(name, comment)
384                else:
385                    state = State.InNestedStruct
386                    field_type, field_name = re.match(
387                        r"([<>:\w(,\s)]+)\s+(\w+);", line
388                    ).groups()
389                    # if not version:
390                    #    self.__warning(f"missing version for {field_name}", line)
391                    if field_type in enums:
392                        nested_struct.values.append(
393                            NestedEnum(
394                                field_name,
395                                field_type,
396                                comment,
397                                version,
398                                enums[field_type].values,
399                            )
400                        )
401                    else:
402                        nested_struct.values.append(
403                            NestedField(field_type + " " + field_name, comment, version)
404                        )
405                    version = None
406            elif state == State.InEnum:
407                if line.startswith("///"):
408                    state = State.InEnumMemberComment
409                    comment = self.__clean_comment_line(line)
410                elif line == "};":
411                    state = State.InStruct
412                    enums[enum.name] = enum
413                else:
414                    # Enum member without documentation. Must be documented
415                    # where the enum is used.
416                    pass
417            elif state == State.InNestedEnum:
418                if line.startswith("///"):
419                    state = State.InNestedEnumMemberComment
420                    comment = self.__clean_comment_line(line)
421                elif line == "};":
422                    state = State.InNestedStruct
423                    enums[enum.name] = enum
424                else:
425                    # Enum member without documentation. Must be
426                    # documented where the enum is used.
427                    pass
428            elif state == State.InEnumMemberComment:
429                if line.startswith("///"):
430                    comment += self.__clean_comment_line(line)
431                else:
432                    state = State.InEnum
433                    val = line.replace(",", "")
434                    pos = val.find(" // ")
435                    if pos != -1:
436                        config = val[pos + 4 :]
437                        val = val[:pos]
438                    else:
439                        config = val
440                    enum.values.append(EnumValue(val, comment, config))
441            elif state == State.InNestedEnumMemberComment:
442                if line.startswith("///"):
443                    comment += self.__clean_comment_line(line)
444                else:
445                    state = State.InNestedEnum
446                    val = line.replace(",", "")
447                    pos = val.find(" // ")
448                    if pos != -1:
449                        config = val[pos + 4 :]
450                        val = val[:pos]
451                    else:
452                        config = val
453                    enum.values.append(EnumValue(val, comment, config))
454        if state != State.Finished:
455            raise Exception("Not finished by the end of file")
456
457        for option in options:
458            if option.type not in [
459                "bool",
460                "unsigned",
461                "int",
462                "std::string",
463                "std::vector<std::string>",
464                "std::vector<IncludeCategory>",
465                "std::vector<RawStringFormat>",
466                "std::optional<unsigned>",
467                "deprecated",
468            ]:
469                if option.type in enums:
470                    option.enum = enums[option.type]
471                elif option.type in nested_structs:
472                    option.nested_struct = nested_structs[option.type]
473                else:
474                    raise Exception("Unknown type: %s" % option.type)
475        return options
476
477
478p = argparse.ArgumentParser()
479p.add_argument("-o", "--output", help="path of output file")
480args = p.parse_args()
481
482with open(FORMAT_STYLE_FILE) as f:
483    opts = OptionsReader(f).read_options()
484with open(INCLUDE_STYLE_FILE) as f:
485    opts += OptionsReader(f).read_options()
486
487opts = sorted(opts, key=lambda x: x.name)
488options_text = "\n\n".join(map(str, opts))
489
490with open(DOC_FILE, encoding="utf-8") as f:
491    contents = f.read()
492
493contents = substitute(contents, "FORMAT_STYLE_OPTIONS", options_text)
494
495with open(
496    args.output if args.output else DOC_FILE, "w", newline="", encoding="utf-8"
497) as f:
498    f.write(contents)
499