xref: /llvm-project/llvm/utils/git/code-format-helper.py (revision c4aa83840b72b9eb94e6bc2088326fb27c43ada6)
1#!/usr/bin/env python3
2#
3# ====- code-format-helper, runs code formatters from the ci or in a hook --*- python -*--==#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9# ==--------------------------------------------------------------------------------------==#
10
11import argparse
12import os
13import subprocess
14import sys
15from typing import List, Optional
16
17"""
18This script is run by GitHub actions to ensure that the code in PR's conform to
19the coding style of LLVM. It can also be installed as a pre-commit git hook to
20check the coding style before submitting it. The canonical source of this script
21is in the LLVM source tree under llvm/utils/git.
22
23For C/C++ code it uses clang-format and for Python code it uses darker (which
24in turn invokes black).
25
26You can learn more about the LLVM coding style on llvm.org:
27https://llvm.org/docs/CodingStandards.html
28
29You can install this script as a git hook by symlinking it to the .git/hooks
30directory:
31
32ln -s $(pwd)/llvm/utils/git/code-format-helper.py .git/hooks/pre-commit
33
34You can control the exact path to clang-format or darker with the following
35environment variables: $CLANG_FORMAT_PATH and $DARKER_FORMAT_PATH.
36"""
37
38
39class FormatArgs:
40    start_rev: str = None
41    end_rev: str = None
42    repo: str = None
43    changed_files: List[str] = []
44    token: str = None
45    verbose: bool = True
46    issue_number: int = 0
47    write_comment_to_file: bool = False
48
49    def __init__(self, args: argparse.Namespace = None) -> None:
50        if not args is None:
51            self.start_rev = args.start_rev
52            self.end_rev = args.end_rev
53            self.repo = args.repo
54            self.token = args.token
55            self.changed_files = args.changed_files
56            self.issue_number = args.issue_number
57            self.write_comment_to_file = args.write_comment_to_file
58
59
60class FormatHelper:
61    COMMENT_TAG = "<!--LLVM CODE FORMAT COMMENT: {fmt}-->"
62    name: str
63    friendly_name: str
64    comment: dict = None
65
66    @property
67    def comment_tag(self) -> str:
68        return self.COMMENT_TAG.replace("fmt", self.name)
69
70    @property
71    def instructions(self) -> str:
72        raise NotImplementedError()
73
74    def has_tool(self) -> bool:
75        raise NotImplementedError()
76
77    def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str]:
78        raise NotImplementedError()
79
80    def pr_comment_text_for_diff(self, diff: str) -> str:
81        return f"""
82:warning: {self.friendly_name}, {self.name} found issues in your code. :warning:
83
84<details>
85<summary>
86You can test this locally with the following command:
87</summary>
88
89``````````bash
90{self.instructions}
91``````````
92
93</details>
94
95<details>
96<summary>
97View the diff from {self.name} here.
98</summary>
99
100``````````diff
101{diff}
102``````````
103
104</details>
105"""
106
107    # TODO: any type should be replaced with the correct github type, but it requires refactoring to
108    # not require the github module to be installed everywhere.
109    def find_comment(self, pr: any) -> any:
110        for comment in pr.as_issue().get_comments():
111            if self.comment_tag in comment.body:
112                return comment
113        return None
114
115    def update_pr(self, comment_text: str, args: FormatArgs, create_new: bool) -> None:
116        import github
117        from github import IssueComment, PullRequest
118
119        repo = github.Github(args.token).get_repo(args.repo)
120        pr = repo.get_issue(args.issue_number).as_pull_request()
121
122        comment_text = self.comment_tag + "\n\n" + comment_text
123
124        existing_comment = self.find_comment(pr)
125
126        if args.write_comment_to_file:
127            if create_new or existing_comment:
128                self.comment = {"body": comment_text}
129            if existing_comment:
130                self.comment["id"] = existing_comment.id
131            return
132
133        if existing_comment:
134            existing_comment.edit(comment_text)
135        elif create_new:
136            pr.as_issue().create_comment(comment_text)
137
138    def run(self, changed_files: List[str], args: FormatArgs) -> bool:
139        changed_files = [arg for arg in changed_files if "third-party" not in arg]
140        diff = self.format_run(changed_files, args)
141        should_update_gh = args.token is not None and args.repo is not None
142
143        if diff is None:
144            if should_update_gh:
145                comment_text = (
146                    ":white_check_mark: With the latest revision "
147                    f"this PR passed the {self.friendly_name}."
148                )
149                self.update_pr(comment_text, args, create_new=False)
150            return True
151        elif len(diff) > 0:
152            if should_update_gh:
153                comment_text = self.pr_comment_text_for_diff(diff)
154                self.update_pr(comment_text, args, create_new=True)
155            else:
156                print(
157                    f"Warning: {self.friendly_name}, {self.name} detected "
158                    "some issues with your code formatting..."
159                )
160            return False
161        else:
162            # The formatter failed but didn't output a diff (e.g. some sort of
163            # infrastructure failure).
164            comment_text = (
165                f":warning: The {self.friendly_name} failed without printing "
166                "a diff. Check the logs for stderr output. :warning:"
167            )
168            self.update_pr(comment_text, args, create_new=False)
169            return False
170
171
172class ClangFormatHelper(FormatHelper):
173    name = "clang-format"
174    friendly_name = "C/C++ code formatter"
175
176    @property
177    def instructions(self) -> str:
178        return " ".join(self.cf_cmd)
179
180    def should_include_extensionless_file(self, path: str) -> bool:
181        return path.startswith("libcxx/include")
182
183    def filter_changed_files(self, changed_files: List[str]) -> List[str]:
184        filtered_files = []
185        for path in changed_files:
186            _, ext = os.path.splitext(path)
187            if ext in (".cpp", ".c", ".h", ".hpp", ".hxx", ".cxx", ".inc", ".cppm"):
188                filtered_files.append(path)
189            elif ext == "" and self.should_include_extensionless_file(path):
190                filtered_files.append(path)
191        return filtered_files
192
193    @property
194    def clang_fmt_path(self) -> str:
195        if "CLANG_FORMAT_PATH" in os.environ:
196            return os.environ["CLANG_FORMAT_PATH"]
197        return "git-clang-format"
198
199    def has_tool(self) -> bool:
200        cmd = [self.clang_fmt_path, "-h"]
201        proc = None
202        try:
203            proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
204        except:
205            return False
206        return proc.returncode == 0
207
208    def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str]:
209        cpp_files = self.filter_changed_files(changed_files)
210        if not cpp_files:
211            return None
212
213        cf_cmd = [self.clang_fmt_path, "--diff"]
214
215        if args.start_rev and args.end_rev:
216            cf_cmd.append(args.start_rev)
217            cf_cmd.append(args.end_rev)
218
219        # Gather the extension of all modified files and pass them explicitly to git-clang-format.
220        # This prevents git-clang-format from applying its own filtering rules on top of ours.
221        extensions = set()
222        for file in cpp_files:
223            _, ext = os.path.splitext(file)
224            extensions.add(
225                ext.strip(".")
226            )  # Exclude periods since git-clang-format takes extensions without them
227        cf_cmd.append("--extensions")
228        cf_cmd.append(",".join(extensions))
229
230        cf_cmd.append("--")
231        cf_cmd += cpp_files
232
233        if args.verbose:
234            print(f"Running: {' '.join(cf_cmd)}")
235        self.cf_cmd = cf_cmd
236        proc = subprocess.run(cf_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
237        sys.stdout.write(proc.stderr.decode("utf-8"))
238
239        if proc.returncode != 0:
240            # formatting needed, or the command otherwise failed
241            if args.verbose:
242                print(f"error: {self.name} exited with code {proc.returncode}")
243                # Print the diff in the log so that it is viewable there
244                print(proc.stdout.decode("utf-8"))
245            return proc.stdout.decode("utf-8")
246        else:
247            return None
248
249
250class DarkerFormatHelper(FormatHelper):
251    name = "darker"
252    friendly_name = "Python code formatter"
253
254    @property
255    def instructions(self) -> str:
256        return " ".join(self.darker_cmd)
257
258    def filter_changed_files(self, changed_files: List[str]) -> List[str]:
259        filtered_files = []
260        for path in changed_files:
261            name, ext = os.path.splitext(path)
262            if ext == ".py":
263                filtered_files.append(path)
264
265        return filtered_files
266
267    @property
268    def darker_fmt_path(self) -> str:
269        if "DARKER_FORMAT_PATH" in os.environ:
270            return os.environ["DARKER_FORMAT_PATH"]
271        return "darker"
272
273    def has_tool(self) -> bool:
274        cmd = [self.darker_fmt_path, "--version"]
275        proc = None
276        try:
277            proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
278        except:
279            return False
280        return proc.returncode == 0
281
282    def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str]:
283        py_files = self.filter_changed_files(changed_files)
284        if not py_files:
285            return None
286        darker_cmd = [
287            self.darker_fmt_path,
288            "--check",
289            "--diff",
290        ]
291        if args.start_rev and args.end_rev:
292            darker_cmd += ["-r", f"{args.start_rev}...{args.end_rev}"]
293        darker_cmd += py_files
294        if args.verbose:
295            print(f"Running: {' '.join(darker_cmd)}")
296        self.darker_cmd = darker_cmd
297        proc = subprocess.run(
298            darker_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
299        )
300        if args.verbose:
301            sys.stdout.write(proc.stderr.decode("utf-8"))
302
303        if proc.returncode != 0:
304            # formatting needed, or the command otherwise failed
305            if args.verbose:
306                print(f"error: {self.name} exited with code {proc.returncode}")
307                # Print the diff in the log so that it is viewable there
308                print(proc.stdout.decode("utf-8"))
309            return proc.stdout.decode("utf-8")
310        else:
311            sys.stdout.write(proc.stdout.decode("utf-8"))
312            return None
313
314
315ALL_FORMATTERS = (DarkerFormatHelper(), ClangFormatHelper())
316
317
318def hook_main():
319    # fill out args
320    args = FormatArgs()
321    args.verbose = os.getenv("FORMAT_HOOK_VERBOSE", False)
322
323    # find the changed files
324    cmd = ["git", "diff", "--cached", "--name-only", "--diff-filter=d"]
325    proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
326    output = proc.stdout.decode("utf-8")
327    for line in output.splitlines():
328        args.changed_files.append(line)
329
330    failed_fmts = []
331    for fmt in ALL_FORMATTERS:
332        if fmt.has_tool():
333            if not fmt.run(args.changed_files, args):
334                failed_fmts.append(fmt.name)
335            if fmt.comment:
336                comments.append(fmt.comment)
337        else:
338            print(f"Couldn't find {fmt.name}, can't check " + fmt.friendly_name.lower())
339
340    if len(failed_fmts) > 0:
341        print(
342            "Pre-commit format hook failed, rerun with FORMAT_HOOK_VERBOSE=1 environment for verbose output"
343        )
344        sys.exit(1)
345
346    sys.exit(0)
347
348
349if __name__ == "__main__":
350    script_path = os.path.abspath(__file__)
351    if ".git/hooks" in script_path:
352        hook_main()
353        sys.exit(0)
354
355    parser = argparse.ArgumentParser()
356    parser.add_argument(
357        "--token", type=str, required=True, help="GitHub authentiation token"
358    )
359    parser.add_argument(
360        "--repo",
361        type=str,
362        default=os.getenv("GITHUB_REPOSITORY", "llvm/llvm-project"),
363        help="The GitHub repository that we are working with in the form of <owner>/<repo> (e.g. llvm/llvm-project)",
364    )
365    parser.add_argument("--issue-number", type=int, required=True)
366    parser.add_argument(
367        "--start-rev",
368        type=str,
369        required=True,
370        help="Compute changes from this revision.",
371    )
372    parser.add_argument(
373        "--end-rev", type=str, required=True, help="Compute changes to this revision"
374    )
375    parser.add_argument(
376        "--changed-files",
377        type=str,
378        help="Comma separated list of files that has been changed",
379    )
380    parser.add_argument(
381        "--write-comment-to-file",
382        action="store_true",
383        help="Don't post comments on the PR, instead write the comments and metadata a file called 'comment'",
384    )
385
386    args = FormatArgs(parser.parse_args())
387
388    changed_files = []
389    if args.changed_files:
390        changed_files = args.changed_files.split(",")
391
392    failed_formatters = []
393    comments = []
394    for fmt in ALL_FORMATTERS:
395        if not fmt.run(changed_files, args):
396            failed_formatters.append(fmt.name)
397        if fmt.comment:
398            comments.append(fmt.comment)
399
400    if len(comments):
401        with open("comments", "w") as f:
402            import json
403
404            json.dump(comments, f)
405
406    if len(failed_formatters) > 0:
407        print(f"error: some formatters failed: {' '.join(failed_formatters)}")
408        sys.exit(1)
409