xref: /llvm-project/llvm/utils/git/code-format-helper.py (revision c84f5a9e00c02e6a4349846ed59ec85154b65e3f)
1#!/usr/bin/env python3
2#
3# ====- code-format-helper, runs code formatters from the ci or in a hook --*- python -*--==#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9# ==--------------------------------------------------------------------------------------==#
10
11import argparse
12import os
13import re
14import shlex
15import subprocess
16import sys
17from typing import List, Optional
18
19"""
20This script is run by GitHub actions to ensure that the code in PR's conform to
21the coding style of LLVM. It can also be installed as a pre-commit git hook to
22check the coding style before submitting it. The canonical source of this script
23is in the LLVM source tree under llvm/utils/git.
24
25For C/C++ code it uses clang-format and for Python code it uses darker (which
26in turn invokes black).
27
28You can learn more about the LLVM coding style on llvm.org:
29https://llvm.org/docs/CodingStandards.html
30
31You can install this script as a git hook by symlinking it to the .git/hooks
32directory:
33
34ln -s $(pwd)/llvm/utils/git/code-format-helper.py .git/hooks/pre-commit
35
36You can control the exact path to clang-format or darker with the following
37environment variables: $CLANG_FORMAT_PATH and $DARKER_FORMAT_PATH.
38"""
39
40
41class FormatArgs:
42    start_rev: str = None
43    end_rev: str = None
44    repo: str = None
45    changed_files: List[str] = []
46    token: str = None
47    verbose: bool = True
48    issue_number: int = 0
49    write_comment_to_file: bool = False
50
51    def __init__(self, args: argparse.Namespace = None) -> None:
52        if not args is None:
53            self.start_rev = args.start_rev
54            self.end_rev = args.end_rev
55            self.repo = args.repo
56            self.token = args.token
57            self.changed_files = args.changed_files
58            self.issue_number = args.issue_number
59            self.write_comment_to_file = args.write_comment_to_file
60
61
62class FormatHelper:
63    COMMENT_TAG = "<!--LLVM CODE FORMAT COMMENT: {fmt}-->"
64    name: str
65    friendly_name: str
66    comment: dict = None
67
68    @property
69    def comment_tag(self) -> str:
70        return self.COMMENT_TAG.replace("fmt", self.name)
71
72    @property
73    def instructions(self) -> str:
74        raise NotImplementedError()
75
76    def has_tool(self) -> bool:
77        raise NotImplementedError()
78
79    def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str]:
80        raise NotImplementedError()
81
82    def pr_comment_text_for_diff(self, diff: str) -> str:
83        return f"""
84:warning: {self.friendly_name}, {self.name} found issues in your code. :warning:
85
86<details>
87<summary>
88You can test this locally with the following command:
89</summary>
90
91``````````bash
92{self.instructions}
93``````````
94
95</details>
96
97<details>
98<summary>
99View the diff from {self.name} here.
100</summary>
101
102``````````diff
103{diff}
104``````````
105
106</details>
107"""
108
109    # TODO: any type should be replaced with the correct github type, but it requires refactoring to
110    # not require the github module to be installed everywhere.
111    def find_comment(self, pr: any) -> any:
112        for comment in pr.as_issue().get_comments():
113            if self.comment_tag in comment.body:
114                return comment
115        return None
116
117    def update_pr(self, comment_text: str, args: FormatArgs, create_new: bool) -> None:
118        import github
119        from github import IssueComment, PullRequest
120
121        repo = github.Github(args.token).get_repo(args.repo)
122        pr = repo.get_issue(args.issue_number).as_pull_request()
123
124        comment_text = self.comment_tag + "\n\n" + comment_text
125
126        existing_comment = self.find_comment(pr)
127
128        if args.write_comment_to_file:
129            if create_new or existing_comment:
130                self.comment = {"body": comment_text}
131            if existing_comment:
132                self.comment["id"] = existing_comment.id
133            return
134
135        if existing_comment:
136            existing_comment.edit(comment_text)
137        elif create_new:
138            pr.as_issue().create_comment(comment_text)
139
140    def run(self, changed_files: List[str], args: FormatArgs) -> bool:
141        changed_files = [arg for arg in changed_files if "third-party" not in arg]
142        diff = self.format_run(changed_files, args)
143        should_update_gh = args.token is not None and args.repo is not None
144
145        if diff is None:
146            if should_update_gh:
147                comment_text = (
148                    ":white_check_mark: With the latest revision "
149                    f"this PR passed the {self.friendly_name}."
150                )
151                self.update_pr(comment_text, args, create_new=False)
152            return True
153        elif len(diff) > 0:
154            if should_update_gh:
155                comment_text = self.pr_comment_text_for_diff(diff)
156                self.update_pr(comment_text, args, create_new=True)
157            else:
158                print(
159                    f"Warning: {self.friendly_name}, {self.name} detected "
160                    "some issues with your code formatting..."
161                )
162            return False
163        else:
164            # The formatter failed but didn't output a diff (e.g. some sort of
165            # infrastructure failure).
166            comment_text = (
167                f":warning: The {self.friendly_name} failed without printing "
168                "a diff. Check the logs for stderr output. :warning:"
169            )
170            self.update_pr(comment_text, args, create_new=False)
171            return False
172
173
174class ClangFormatHelper(FormatHelper):
175    name = "clang-format"
176    friendly_name = "C/C++ code formatter"
177
178    @property
179    def instructions(self) -> str:
180        return " ".join(self.cf_cmd)
181
182    def should_include_extensionless_file(self, path: str) -> bool:
183        return path.startswith("libcxx/include")
184
185    def filter_changed_files(self, changed_files: List[str]) -> List[str]:
186        filtered_files = []
187        for path in changed_files:
188            _, ext = os.path.splitext(path)
189            if ext in (".cpp", ".c", ".h", ".hpp", ".hxx", ".cxx", ".inc", ".cppm"):
190                filtered_files.append(path)
191            elif ext == "" and self.should_include_extensionless_file(path):
192                filtered_files.append(path)
193        return filtered_files
194
195    @property
196    def clang_fmt_path(self) -> str:
197        if "CLANG_FORMAT_PATH" in os.environ:
198            return os.environ["CLANG_FORMAT_PATH"]
199        return "git-clang-format"
200
201    def has_tool(self) -> bool:
202        cmd = [self.clang_fmt_path, "-h"]
203        proc = None
204        try:
205            proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
206        except:
207            return False
208        return proc.returncode == 0
209
210    def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str]:
211        cpp_files = self.filter_changed_files(changed_files)
212        if not cpp_files:
213            return None
214
215        cf_cmd = [self.clang_fmt_path, "--diff"]
216
217        if args.start_rev and args.end_rev:
218            cf_cmd.append(args.start_rev)
219            cf_cmd.append(args.end_rev)
220
221        # Gather the extension of all modified files and pass them explicitly to git-clang-format.
222        # This prevents git-clang-format from applying its own filtering rules on top of ours.
223        extensions = set()
224        for file in cpp_files:
225            _, ext = os.path.splitext(file)
226            extensions.add(
227                ext.strip(".")
228            )  # Exclude periods since git-clang-format takes extensions without them
229        cf_cmd.append("--extensions")
230        cf_cmd.append(",".join(extensions))
231
232        cf_cmd.append("--")
233        cf_cmd += cpp_files
234
235        if args.verbose:
236            print(f"Running: {' '.join(cf_cmd)}")
237        self.cf_cmd = cf_cmd
238        proc = subprocess.run(cf_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
239        sys.stdout.write(proc.stderr.decode("utf-8"))
240
241        if proc.returncode != 0:
242            # formatting needed, or the command otherwise failed
243            if args.verbose:
244                print(f"error: {self.name} exited with code {proc.returncode}")
245                # Print the diff in the log so that it is viewable there
246                print(proc.stdout.decode("utf-8"))
247            return proc.stdout.decode("utf-8")
248        else:
249            return None
250
251
252class DarkerFormatHelper(FormatHelper):
253    name = "darker"
254    friendly_name = "Python code formatter"
255
256    @property
257    def instructions(self) -> str:
258        return " ".join(self.darker_cmd)
259
260    def filter_changed_files(self, changed_files: List[str]) -> List[str]:
261        filtered_files = []
262        for path in changed_files:
263            name, ext = os.path.splitext(path)
264            if ext == ".py":
265                filtered_files.append(path)
266
267        return filtered_files
268
269    @property
270    def darker_fmt_path(self) -> str:
271        if "DARKER_FORMAT_PATH" in os.environ:
272            return os.environ["DARKER_FORMAT_PATH"]
273        return "darker"
274
275    def has_tool(self) -> bool:
276        cmd = [self.darker_fmt_path, "--version"]
277        proc = None
278        try:
279            proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
280        except:
281            return False
282        return proc.returncode == 0
283
284    def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str]:
285        py_files = self.filter_changed_files(changed_files)
286        if not py_files:
287            return None
288        darker_cmd = [
289            self.darker_fmt_path,
290            "--check",
291            "--diff",
292        ]
293        if args.start_rev and args.end_rev:
294            darker_cmd += ["-r", f"{args.start_rev}...{args.end_rev}"]
295        darker_cmd += py_files
296        if args.verbose:
297            print(f"Running: {' '.join(darker_cmd)}")
298        self.darker_cmd = darker_cmd
299        proc = subprocess.run(
300            darker_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
301        )
302        if args.verbose:
303            sys.stdout.write(proc.stderr.decode("utf-8"))
304
305        if proc.returncode != 0:
306            # formatting needed, or the command otherwise failed
307            if args.verbose:
308                print(f"error: {self.name} exited with code {proc.returncode}")
309                # Print the diff in the log so that it is viewable there
310                print(proc.stdout.decode("utf-8"))
311            return proc.stdout.decode("utf-8")
312        else:
313            sys.stdout.write(proc.stdout.decode("utf-8"))
314            return None
315
316
317class UndefGetFormatHelper(FormatHelper):
318    name = "undef deprecator"
319    friendly_name = "undef deprecator"
320
321    @property
322    def instructions(self) -> str:
323        return " ".join(shlex.quote(c) for c in self.cmd)
324
325    def filter_changed_files(self, changed_files: List[str]) -> List[str]:
326        filtered_files = []
327        for path in changed_files:
328            _, ext = os.path.splitext(path)
329            if ext in (".cpp", ".c", ".h", ".hpp", ".hxx", ".cxx", ".inc", ".cppm", ".ll"):
330                filtered_files.append(path)
331        return filtered_files
332
333    def has_tool(self) -> bool:
334        return True
335
336    def pr_comment_text_for_diff(self, diff: str) -> str:
337        return f"""
338:warning: {self.name} found issues in your code. :warning:
339
340<details>
341<summary>
342You can test this locally with the following command:
343</summary>
344
345``````````bash
346{self.instructions}
347``````````
348
349</details>
350
351{diff}
352"""
353
354    def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str]:
355        files = self.filter_changed_files(changed_files)
356
357        # Use git to find files that have had a change in the number of undefs
358        regex = "([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)"
359        cmd = ["git", "diff", "-U0", "--pickaxe-regex", "-S", regex]
360
361        if args.start_rev and args.end_rev:
362            cmd.append(args.start_rev)
363            cmd.append(args.end_rev)
364
365        cmd += files
366        self.cmd = cmd
367
368        if args.verbose:
369            print(f"Running: {self.instructions}")
370
371        proc = subprocess.run(
372            cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8"
373        )
374        sys.stdout.write(proc.stderr)
375        stdout = proc.stdout
376
377        files = []
378        # Split the diff so we have one array entry per file.
379        # Each file is prefixed like:
380        # diff --git a/file b/file
381        for file in re.split("^diff --git ", stdout, 0, re.MULTILINE):
382            # We skip checking in MIR files as undef is a valid token and not
383            # going away.
384            if file.endswith(".mir"):
385                continue
386            # search for additions of undef
387            if re.search(r"^[+](?!\s*#\s*).*(\bundef\b|UndefValue::get)", file, re.MULTILINE):
388                files.append(re.match("a/([^ ]+)", file.splitlines()[0])[1])
389
390        if not files:
391            return None
392
393        files = "\n".join(" - " + f for f in files)
394        report = f"""
395The following files introduce new uses of undef:
396{files}
397
398[Undef](https://llvm.org/docs/LangRef.html#undefined-values) is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields `undef`. You should use `poison` values for placeholders instead.
399
400In tests, avoid using `undef` and having tests that trigger undefined behavior. If you need an operand with some unimportant value, you can add a new argument to the function and use that instead.
401
402For example, this is considered a bad practice:
403```llvm
404define void @fn() {{
405  ...
406  br i1 undef, ...
407}}
408```
409
410Please use the following instead:
411```llvm
412define void @fn(i1 %cond) {{
413  ...
414  br i1 %cond, ...
415}}
416```
417
418Please refer to the [Undefined Behavior Manual](https://llvm.org/docs/UndefinedBehavior.html) for more information.
419"""
420        if args.verbose:
421            print(f"error: {self.name} failed")
422            print(report)
423        return report
424
425
426ALL_FORMATTERS = (DarkerFormatHelper(), ClangFormatHelper(), UndefGetFormatHelper())
427
428
429def hook_main():
430    # fill out args
431    args = FormatArgs()
432    args.verbose = os.getenv("FORMAT_HOOK_VERBOSE", False)
433
434    # find the changed files
435    cmd = ["git", "diff", "--cached", "--name-only", "--diff-filter=d"]
436    proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
437    output = proc.stdout.decode("utf-8")
438    for line in output.splitlines():
439        args.changed_files.append(line)
440
441    failed_fmts = []
442    for fmt in ALL_FORMATTERS:
443        if fmt.has_tool():
444            if not fmt.run(args.changed_files, args):
445                failed_fmts.append(fmt.name)
446            if fmt.comment:
447                comments.append(fmt.comment)
448        else:
449            print(f"Couldn't find {fmt.name}, can't check " + fmt.friendly_name.lower())
450
451    if len(failed_fmts) > 0:
452        print(
453            "Pre-commit format hook failed, rerun with FORMAT_HOOK_VERBOSE=1 environment for verbose output"
454        )
455        sys.exit(1)
456
457    sys.exit(0)
458
459
460if __name__ == "__main__":
461    script_path = os.path.abspath(__file__)
462    if ".git/hooks" in script_path:
463        hook_main()
464        sys.exit(0)
465
466    parser = argparse.ArgumentParser()
467    parser.add_argument(
468        "--token", type=str, required=True, help="GitHub authentiation token"
469    )
470    parser.add_argument(
471        "--repo",
472        type=str,
473        default=os.getenv("GITHUB_REPOSITORY", "llvm/llvm-project"),
474        help="The GitHub repository that we are working with in the form of <owner>/<repo> (e.g. llvm/llvm-project)",
475    )
476    parser.add_argument("--issue-number", type=int, required=True)
477    parser.add_argument(
478        "--start-rev",
479        type=str,
480        required=True,
481        help="Compute changes from this revision.",
482    )
483    parser.add_argument(
484        "--end-rev", type=str, required=True, help="Compute changes to this revision"
485    )
486    parser.add_argument(
487        "--changed-files",
488        type=str,
489        help="Comma separated list of files that has been changed",
490    )
491    parser.add_argument(
492        "--write-comment-to-file",
493        action="store_true",
494        help="Don't post comments on the PR, instead write the comments and metadata a file called 'comment'",
495    )
496
497    args = FormatArgs(parser.parse_args())
498
499    changed_files = []
500    if args.changed_files:
501        changed_files = args.changed_files.split(",")
502
503    failed_formatters = []
504    comments = []
505    for fmt in ALL_FORMATTERS:
506        if not fmt.run(changed_files, args):
507            failed_formatters.append(fmt.name)
508        if fmt.comment:
509            comments.append(fmt.comment)
510
511    if len(comments):
512        with open("comments", "w") as f:
513            import json
514
515            json.dump(comments, f)
516
517    if len(failed_formatters) > 0:
518        print(f"error: some formatters failed: {' '.join(failed_formatters)}")
519        sys.exit(1)
520