xref: /llvm-project/llvm/utils/git/code-format-helper.py (revision bc06cd5cbcfc22dd976f6742d10bc934e1353b8a)
1#!/usr/bin/env python3
2#
3# ====- code-format-helper, runs code formatters from the ci or in a hook --*- python -*--==#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9# ==--------------------------------------------------------------------------------------==#
10
11import argparse
12import os
13import subprocess
14import sys
15from typing import List, Optional
16
17"""
18This script is run by GitHub actions to ensure that the code in PR's conform to
19the coding style of LLVM. It can also be installed as a pre-commit git hook to
20check the coding style before submitting it. The canonical source of this script
21is in the LLVM source tree under llvm/utils/git.
22
23For C/C++ code it uses clang-format and for Python code it uses darker (which
24in turn invokes black).
25
26You can learn more about the LLVM coding style on llvm.org:
27https://llvm.org/docs/CodingStandards.html
28
29You can install this script as a git hook by symlinking it to the .git/hooks
30directory:
31
32ln -s $(pwd)/llvm/utils/git/code-format-helper.py .git/hooks/pre-commit
33
34You can control the exact path to clang-format or darker with the following
35environment variables: $CLANG_FORMAT_PATH and $DARKER_FORMAT_PATH.
36"""
37
38
39class FormatArgs:
40    start_rev: str = None
41    end_rev: str = None
42    repo: str = None
43    changed_files: List[str] = []
44    token: str = None
45    verbose: bool = True
46    issue_number: int = 0
47    write_comment_to_file: bool = False
48
49    def __init__(self, args: argparse.Namespace = None) -> None:
50        if not args is None:
51            self.start_rev = args.start_rev
52            self.end_rev = args.end_rev
53            self.repo = args.repo
54            self.token = args.token
55            self.changed_files = args.changed_files
56            self.issue_number = args.issue_number
57            self.write_comment_to_file = args.write_comment_to_file
58
59
60class FormatHelper:
61    COMMENT_TAG = "<!--LLVM CODE FORMAT COMMENT: {fmt}-->"
62    name: str
63    friendly_name: str
64    comment: dict = None
65
66    @property
67    def comment_tag(self) -> str:
68        return self.COMMENT_TAG.replace("fmt", self.name)
69
70    @property
71    def instructions(self) -> str:
72        raise NotImplementedError()
73
74    def has_tool(self) -> bool:
75        raise NotImplementedError()
76
77    def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str]:
78        raise NotImplementedError()
79
80    def pr_comment_text_for_diff(self, diff: str) -> str:
81        return f"""
82:warning: {self.friendly_name}, {self.name} found issues in your code. :warning:
83
84<details>
85<summary>
86You can test this locally with the following command:
87</summary>
88
89``````````bash
90{self.instructions}
91``````````
92
93</details>
94
95<details>
96<summary>
97View the diff from {self.name} here.
98</summary>
99
100``````````diff
101{diff}
102``````````
103
104</details>
105"""
106
107    # TODO: any type should be replaced with the correct github type, but it requires refactoring to
108    # not require the github module to be installed everywhere.
109    def find_comment(self, pr: any) -> any:
110        for comment in pr.as_issue().get_comments():
111            if self.comment_tag in comment.body:
112                return comment
113        return None
114
115    def update_pr(self, comment_text: str, args: FormatArgs, create_new: bool) -> None:
116        import github
117        from github import IssueComment, PullRequest
118
119        repo = github.Github(args.token).get_repo(args.repo)
120        pr = repo.get_issue(args.issue_number).as_pull_request()
121
122        comment_text = self.comment_tag + "\n\n" + comment_text
123
124        existing_comment = self.find_comment(pr)
125
126        if args.write_comment_to_file:
127            self.comment = {"body": comment_text}
128            if existing_comment:
129                self.comment["id"] = existing_comment.id
130            return
131
132        if existing_comment:
133            existing_comment.edit(comment_text)
134        elif create_new:
135            pr.as_issue().create_comment(comment_text)
136
137    def run(self, changed_files: List[str], args: FormatArgs) -> bool:
138        diff = self.format_run(changed_files, args)
139        should_update_gh = args.token is not None and args.repo is not None
140
141        if diff is None:
142            if should_update_gh:
143                comment_text = (
144                    ":white_check_mark: With the latest revision "
145                    f"this PR passed the {self.friendly_name}."
146                )
147                self.update_pr(comment_text, args, create_new=False)
148            return True
149        elif len(diff) > 0:
150            if should_update_gh:
151                comment_text = self.pr_comment_text_for_diff(diff)
152                self.update_pr(comment_text, args, create_new=True)
153            else:
154                print(
155                    f"Warning: {self.friendly_name}, {self.name} detected "
156                    "some issues with your code formatting..."
157                )
158            return False
159        else:
160            # The formatter failed but didn't output a diff (e.g. some sort of
161            # infrastructure failure).
162            comment_text = (
163                f":warning: The {self.friendly_name} failed without printing "
164                "a diff. Check the logs for stderr output. :warning:"
165            )
166            self.update_pr(comment_text, args, create_new=False)
167            return False
168
169
170class ClangFormatHelper(FormatHelper):
171    name = "clang-format"
172    friendly_name = "C/C++ code formatter"
173
174    @property
175    def instructions(self) -> str:
176        return " ".join(self.cf_cmd)
177
178    def should_include_extensionless_file(self, path: str) -> bool:
179        return path.startswith("libcxx/include")
180
181    def filter_changed_files(self, changed_files: List[str]) -> List[str]:
182        filtered_files = []
183        for path in changed_files:
184            _, ext = os.path.splitext(path)
185            if ext in (".cpp", ".c", ".h", ".hpp", ".hxx", ".cxx", ".inc", ".cppm"):
186                filtered_files.append(path)
187            elif ext == "" and self.should_include_extensionless_file(path):
188                filtered_files.append(path)
189        return filtered_files
190
191    @property
192    def clang_fmt_path(self) -> str:
193        if "CLANG_FORMAT_PATH" in os.environ:
194            return os.environ["CLANG_FORMAT_PATH"]
195        return "git-clang-format"
196
197    def has_tool(self) -> bool:
198        cmd = [self.clang_fmt_path, "-h"]
199        proc = None
200        try:
201            proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
202        except:
203            return False
204        return proc.returncode == 0
205
206    def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str]:
207        cpp_files = self.filter_changed_files(changed_files)
208        if not cpp_files:
209            return None
210
211        cf_cmd = [self.clang_fmt_path, "--diff"]
212
213        if args.start_rev and args.end_rev:
214            cf_cmd.append(args.start_rev)
215            cf_cmd.append(args.end_rev)
216
217        cf_cmd.append("--")
218        cf_cmd += cpp_files
219
220        if args.verbose:
221            print(f"Running: {' '.join(cf_cmd)}")
222        self.cf_cmd = cf_cmd
223        proc = subprocess.run(cf_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
224        sys.stdout.write(proc.stderr.decode("utf-8"))
225
226        if proc.returncode != 0:
227            # formatting needed, or the command otherwise failed
228            if args.verbose:
229                print(f"error: {self.name} exited with code {proc.returncode}")
230                # Print the diff in the log so that it is viewable there
231                print(proc.stdout.decode("utf-8"))
232            return proc.stdout.decode("utf-8")
233        else:
234            return None
235
236
237class DarkerFormatHelper(FormatHelper):
238    name = "darker"
239    friendly_name = "Python code formatter"
240
241    @property
242    def instructions(self) -> str:
243        return " ".join(self.darker_cmd)
244
245    def filter_changed_files(self, changed_files: List[str]) -> List[str]:
246        filtered_files = []
247        for path in changed_files:
248            name, ext = os.path.splitext(path)
249            if ext == ".py":
250                filtered_files.append(path)
251
252        return filtered_files
253
254    @property
255    def darker_fmt_path(self) -> str:
256        if "DARKER_FORMAT_PATH" in os.environ:
257            return os.environ["DARKER_FORMAT_PATH"]
258        return "darker"
259
260    def has_tool(self) -> bool:
261        cmd = [self.darker_fmt_path, "--version"]
262        proc = None
263        try:
264            proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
265        except:
266            return False
267        return proc.returncode == 0
268
269    def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str]:
270        py_files = self.filter_changed_files(changed_files)
271        if not py_files:
272            return None
273        darker_cmd = [
274            self.darker_fmt_path,
275            "--check",
276            "--diff",
277        ]
278        if args.start_rev and args.end_rev:
279            darker_cmd += ["-r", f"{args.start_rev}...{args.end_rev}"]
280        darker_cmd += py_files
281        if args.verbose:
282            print(f"Running: {' '.join(darker_cmd)}")
283        self.darker_cmd = darker_cmd
284        proc = subprocess.run(
285            darker_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
286        )
287        if args.verbose:
288            sys.stdout.write(proc.stderr.decode("utf-8"))
289
290        if proc.returncode != 0:
291            # formatting needed, or the command otherwise failed
292            if args.verbose:
293                print(f"error: {self.name} exited with code {proc.returncode}")
294                # Print the diff in the log so that it is viewable there
295                print(proc.stdout.decode("utf-8"))
296            return proc.stdout.decode("utf-8")
297        else:
298            sys.stdout.write(proc.stdout.decode("utf-8"))
299            return None
300
301
302ALL_FORMATTERS = (DarkerFormatHelper(), ClangFormatHelper())
303
304
305def hook_main():
306    # fill out args
307    args = FormatArgs()
308    args.verbose = False
309
310    # find the changed files
311    cmd = ["git", "diff", "--cached", "--name-only", "--diff-filter=d"]
312    proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
313    output = proc.stdout.decode("utf-8")
314    for line in output.splitlines():
315        args.changed_files.append(line)
316
317    failed_fmts = []
318    for fmt in ALL_FORMATTERS:
319        if fmt.has_tool():
320            if not fmt.run(args.changed_files, args):
321                failed_fmts.append(fmt.name)
322            if fmt.comment:
323                comments.append(fmt.comment)
324        else:
325            print(f"Couldn't find {fmt.name}, can't check " + fmt.friendly_name.lower())
326
327    if len(failed_fmts) > 0:
328        sys.exit(1)
329
330    sys.exit(0)
331
332
333if __name__ == "__main__":
334    script_path = os.path.abspath(__file__)
335    if ".git/hooks" in script_path:
336        hook_main()
337        sys.exit(0)
338
339    parser = argparse.ArgumentParser()
340    parser.add_argument(
341        "--token", type=str, required=True, help="GitHub authentiation token"
342    )
343    parser.add_argument(
344        "--repo",
345        type=str,
346        default=os.getenv("GITHUB_REPOSITORY", "llvm/llvm-project"),
347        help="The GitHub repository that we are working with in the form of <owner>/<repo> (e.g. llvm/llvm-project)",
348    )
349    parser.add_argument("--issue-number", type=int, required=True)
350    parser.add_argument(
351        "--start-rev",
352        type=str,
353        required=True,
354        help="Compute changes from this revision.",
355    )
356    parser.add_argument(
357        "--end-rev", type=str, required=True, help="Compute changes to this revision"
358    )
359    parser.add_argument(
360        "--changed-files",
361        type=str,
362        help="Comma separated list of files that has been changed",
363    )
364    parser.add_argument(
365        "--write-comment-to-file",
366        action="store_true",
367        help="Don't post comments on the PR, instead write the comments and metadata a file called 'comment'",
368    )
369
370    args = FormatArgs(parser.parse_args())
371
372    changed_files = []
373    if args.changed_files:
374        changed_files = args.changed_files.split(",")
375
376    failed_formatters = []
377    comments = []
378    for fmt in ALL_FORMATTERS:
379        if not fmt.run(changed_files, args):
380            failed_formatters.append(fmt.name)
381        if fmt.comment:
382            comments.append(fmt.comment)
383
384    if len(comments):
385        with open("comments", "w") as f:
386            import json
387
388            json.dump(comments, f)
389
390    if len(failed_formatters) > 0:
391        print(f"error: some formatters failed: {' '.join(failed_formatters)}")
392        sys.exit(1)
393