xref: /llvm-project/llvm/utils/git/code-format-helper.py (revision de917dc20ece9f23eaefe5354bbc9ca194ce7555)
1#!/usr/bin/env python3
2#
3# ====- code-format-helper, runs code formatters from the ci or in a hook --*- python -*--==#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9# ==--------------------------------------------------------------------------------------==#
10
11import argparse
12import os
13import subprocess
14import sys
15from typing import List, Optional
16
17"""
18This script is run by GitHub actions to ensure that the code in PR's conform to
19the coding style of LLVM. It can also be installed as a pre-commit git hook to
20check the coding style before submitting it. The canonical source of this script
21is in the LLVM source tree under llvm/utils/git.
22
23For C/C++ code it uses clang-format and for Python code it uses darker (which
24in turn invokes black).
25
26You can learn more about the LLVM coding style on llvm.org:
27https://llvm.org/docs/CodingStandards.html
28
29You can install this script as a git hook by symlinking it to the .git/hooks
30directory:
31
32ln -s $(pwd)/llvm/utils/git/code-format-helper.py .git/hooks/pre-commit
33
34You can control the exact path to clang-format or darker with the following
35environment variables: $CLANG_FORMAT_PATH and $DARKER_FORMAT_PATH.
36"""
37
38
39class FormatArgs:
40    start_rev: str = None
41    end_rev: str = None
42    repo: str = None
43    changed_files: List[str] = []
44    token: str = None
45    verbose: bool = True
46    issue_number: int = 0
47    write_comment_to_file: bool = False
48
49    def __init__(self, args: argparse.Namespace = None) -> None:
50        if not args is None:
51            self.start_rev = args.start_rev
52            self.end_rev = args.end_rev
53            self.repo = args.repo
54            self.token = args.token
55            self.changed_files = args.changed_files
56            self.issue_number = args.issue_number
57            self.write_comment_to_file = args.write_comment_to_file
58
59
60class FormatHelper:
61    COMMENT_TAG = "<!--LLVM CODE FORMAT COMMENT: {fmt}-->"
62    name: str
63    friendly_name: str
64    comment: dict = None
65
66    @property
67    def comment_tag(self) -> str:
68        return self.COMMENT_TAG.replace("fmt", self.name)
69
70    @property
71    def instructions(self) -> str:
72        raise NotImplementedError()
73
74    def has_tool(self) -> bool:
75        raise NotImplementedError()
76
77    def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str]:
78        raise NotImplementedError()
79
80    def pr_comment_text_for_diff(self, diff: str) -> str:
81        return f"""
82:warning: {self.friendly_name}, {self.name} found issues in your code. :warning:
83
84<details>
85<summary>
86You can test this locally with the following command:
87</summary>
88
89``````````bash
90{self.instructions}
91``````````
92
93</details>
94
95<details>
96<summary>
97View the diff from {self.name} here.
98</summary>
99
100``````````diff
101{diff}
102``````````
103
104</details>
105"""
106
107    # TODO: any type should be replaced with the correct github type, but it requires refactoring to
108    # not require the github module to be installed everywhere.
109    def find_comment(self, pr: any) -> any:
110        for comment in pr.as_issue().get_comments():
111            if self.comment_tag in comment.body:
112                return comment
113        return None
114
115    def update_pr(self, comment_text: str, args: FormatArgs, create_new: bool) -> None:
116        import github
117        from github import IssueComment, PullRequest
118
119        repo = github.Github(args.token).get_repo(args.repo)
120        pr = repo.get_issue(args.issue_number).as_pull_request()
121
122        comment_text = self.comment_tag + "\n\n" + comment_text
123
124        existing_comment = self.find_comment(pr)
125
126        if args.write_comment_to_file:
127            if create_new or existing_comment:
128                self.comment = {"body": comment_text}
129            if existing_comment:
130                self.comment["id"] = existing_comment.id
131            return
132
133        if existing_comment:
134            existing_comment.edit(comment_text)
135        elif create_new:
136            pr.as_issue().create_comment(comment_text)
137
138    def run(self, changed_files: List[str], args: FormatArgs) -> bool:
139        changed_files = [arg for arg in changed_files if "third-party" not in arg]
140        diff = self.format_run(changed_files, args)
141        should_update_gh = args.token is not None and args.repo is not None
142
143        if diff is None:
144            if should_update_gh:
145                comment_text = (
146                    ":white_check_mark: With the latest revision "
147                    f"this PR passed the {self.friendly_name}."
148                )
149                self.update_pr(comment_text, args, create_new=False)
150            return True
151        elif len(diff) > 0:
152            if should_update_gh:
153                comment_text = self.pr_comment_text_for_diff(diff)
154                self.update_pr(comment_text, args, create_new=True)
155            else:
156                print(
157                    f"Warning: {self.friendly_name}, {self.name} detected "
158                    "some issues with your code formatting..."
159                )
160            return False
161        else:
162            # The formatter failed but didn't output a diff (e.g. some sort of
163            # infrastructure failure).
164            comment_text = (
165                f":warning: The {self.friendly_name} failed without printing "
166                "a diff. Check the logs for stderr output. :warning:"
167            )
168            self.update_pr(comment_text, args, create_new=False)
169            return False
170
171
172class ClangFormatHelper(FormatHelper):
173    name = "clang-format"
174    friendly_name = "C/C++ code formatter"
175
176    @property
177    def instructions(self) -> str:
178        return " ".join(self.cf_cmd)
179
180    def should_include_extensionless_file(self, path: str) -> bool:
181        return path.startswith("libcxx/include")
182
183    def filter_changed_files(self, changed_files: List[str]) -> List[str]:
184        filtered_files = []
185        for path in changed_files:
186            _, ext = os.path.splitext(path)
187            if ext in (".cpp", ".c", ".h", ".hpp", ".hxx", ".cxx", ".inc", ".cppm"):
188                filtered_files.append(path)
189            elif ext == "" and self.should_include_extensionless_file(path):
190                filtered_files.append(path)
191        return filtered_files
192
193    @property
194    def clang_fmt_path(self) -> str:
195        if "CLANG_FORMAT_PATH" in os.environ:
196            return os.environ["CLANG_FORMAT_PATH"]
197        return "git-clang-format"
198
199    def has_tool(self) -> bool:
200        cmd = [self.clang_fmt_path, "-h"]
201        proc = None
202        try:
203            proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
204        except:
205            return False
206        return proc.returncode == 0
207
208    def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str]:
209        cpp_files = self.filter_changed_files(changed_files)
210        if not cpp_files:
211            return None
212
213        cf_cmd = [self.clang_fmt_path, "--diff"]
214
215        if args.start_rev and args.end_rev:
216            cf_cmd.append(args.start_rev)
217            cf_cmd.append(args.end_rev)
218
219        cf_cmd.append("--")
220        cf_cmd += cpp_files
221
222        if args.verbose:
223            print(f"Running: {' '.join(cf_cmd)}")
224        self.cf_cmd = cf_cmd
225        proc = subprocess.run(cf_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
226        sys.stdout.write(proc.stderr.decode("utf-8"))
227
228        if proc.returncode != 0:
229            # formatting needed, or the command otherwise failed
230            if args.verbose:
231                print(f"error: {self.name} exited with code {proc.returncode}")
232                # Print the diff in the log so that it is viewable there
233                print(proc.stdout.decode("utf-8"))
234            return proc.stdout.decode("utf-8")
235        else:
236            return None
237
238
239class DarkerFormatHelper(FormatHelper):
240    name = "darker"
241    friendly_name = "Python code formatter"
242
243    @property
244    def instructions(self) -> str:
245        return " ".join(self.darker_cmd)
246
247    def filter_changed_files(self, changed_files: List[str]) -> List[str]:
248        filtered_files = []
249        for path in changed_files:
250            name, ext = os.path.splitext(path)
251            if ext == ".py":
252                filtered_files.append(path)
253
254        return filtered_files
255
256    @property
257    def darker_fmt_path(self) -> str:
258        if "DARKER_FORMAT_PATH" in os.environ:
259            return os.environ["DARKER_FORMAT_PATH"]
260        return "darker"
261
262    def has_tool(self) -> bool:
263        cmd = [self.darker_fmt_path, "--version"]
264        proc = None
265        try:
266            proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
267        except:
268            return False
269        return proc.returncode == 0
270
271    def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str]:
272        py_files = self.filter_changed_files(changed_files)
273        if not py_files:
274            return None
275        darker_cmd = [
276            self.darker_fmt_path,
277            "--check",
278            "--diff",
279        ]
280        if args.start_rev and args.end_rev:
281            darker_cmd += ["-r", f"{args.start_rev}...{args.end_rev}"]
282        darker_cmd += py_files
283        if args.verbose:
284            print(f"Running: {' '.join(darker_cmd)}")
285        self.darker_cmd = darker_cmd
286        proc = subprocess.run(
287            darker_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
288        )
289        if args.verbose:
290            sys.stdout.write(proc.stderr.decode("utf-8"))
291
292        if proc.returncode != 0:
293            # formatting needed, or the command otherwise failed
294            if args.verbose:
295                print(f"error: {self.name} exited with code {proc.returncode}")
296                # Print the diff in the log so that it is viewable there
297                print(proc.stdout.decode("utf-8"))
298            return proc.stdout.decode("utf-8")
299        else:
300            sys.stdout.write(proc.stdout.decode("utf-8"))
301            return None
302
303
304ALL_FORMATTERS = (DarkerFormatHelper(), ClangFormatHelper())
305
306
307def hook_main():
308    # fill out args
309    args = FormatArgs()
310    args.verbose = False
311
312    # find the changed files
313    cmd = ["git", "diff", "--cached", "--name-only", "--diff-filter=d"]
314    proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
315    output = proc.stdout.decode("utf-8")
316    for line in output.splitlines():
317        args.changed_files.append(line)
318
319    failed_fmts = []
320    for fmt in ALL_FORMATTERS:
321        if fmt.has_tool():
322            if not fmt.run(args.changed_files, args):
323                failed_fmts.append(fmt.name)
324            if fmt.comment:
325                comments.append(fmt.comment)
326        else:
327            print(f"Couldn't find {fmt.name}, can't check " + fmt.friendly_name.lower())
328
329    if len(failed_fmts) > 0:
330        sys.exit(1)
331
332    sys.exit(0)
333
334
335if __name__ == "__main__":
336    script_path = os.path.abspath(__file__)
337    if ".git/hooks" in script_path:
338        hook_main()
339        sys.exit(0)
340
341    parser = argparse.ArgumentParser()
342    parser.add_argument(
343        "--token", type=str, required=True, help="GitHub authentiation token"
344    )
345    parser.add_argument(
346        "--repo",
347        type=str,
348        default=os.getenv("GITHUB_REPOSITORY", "llvm/llvm-project"),
349        help="The GitHub repository that we are working with in the form of <owner>/<repo> (e.g. llvm/llvm-project)",
350    )
351    parser.add_argument("--issue-number", type=int, required=True)
352    parser.add_argument(
353        "--start-rev",
354        type=str,
355        required=True,
356        help="Compute changes from this revision.",
357    )
358    parser.add_argument(
359        "--end-rev", type=str, required=True, help="Compute changes to this revision"
360    )
361    parser.add_argument(
362        "--changed-files",
363        type=str,
364        help="Comma separated list of files that has been changed",
365    )
366    parser.add_argument(
367        "--write-comment-to-file",
368        action="store_true",
369        help="Don't post comments on the PR, instead write the comments and metadata a file called 'comment'",
370    )
371
372    args = FormatArgs(parser.parse_args())
373
374    changed_files = []
375    if args.changed_files:
376        changed_files = args.changed_files.split(",")
377
378    failed_formatters = []
379    comments = []
380    for fmt in ALL_FORMATTERS:
381        if not fmt.run(changed_files, args):
382            failed_formatters.append(fmt.name)
383        if fmt.comment:
384            comments.append(fmt.comment)
385
386    if len(comments):
387        with open("comments", "w") as f:
388            import json
389
390            json.dump(comments, f)
391
392    if len(failed_formatters) > 0:
393        print(f"error: some formatters failed: {' '.join(failed_formatters)}")
394        sys.exit(1)
395