xref: /llvm-project/llvm/utils/git/github-automation.py (revision 56444d5687818938a6ce798e7221aa920c54098e)
1#!/usr/bin/env python3
2#
3# ======- github-automation - LLVM GitHub Automation Routines--*- python -*--==#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9# ==-------------------------------------------------------------------------==#
10
11import argparse
12from git import Repo  # type: ignore
13import html
14import github
15import os
16import re
17import requests
18import sys
19import time
20from typing import List, Optional
21
22beginner_comment = """
23Hi!
24
25This issue may be a good introductory issue for people new to working on LLVM. If you would like to work on this issue, your first steps are:
26
271. In the comments of the issue, request for it to be assigned to you.
282. Fix the issue locally.
293. [Run the test suite](https://llvm.org/docs/TestingGuide.html#unit-and-regression-tests) locally. Remember that the subdirectories under `test/` create fine-grained testing targets, so you can e.g. use `make check-clang-ast` to only run Clang's AST tests.
304. Create a Git commit.
315. Run [`git clang-format HEAD~1`](https://clang.llvm.org/docs/ClangFormat.html#git-integration) to format your changes.
326. Open a [pull request](https://github.com/llvm/llvm-project/pulls) to the [upstream repository](https://github.com/llvm/llvm-project) on GitHub. Detailed instructions can be found [in GitHub's documentation](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request).
33
34If you have any further questions about this issue, don't hesitate to ask via a comment in the thread below.
35"""
36
37
38def _get_curent_team(team_name, teams) -> Optional[github.Team.Team]:
39    for team in teams:
40        if team_name == team.name.lower():
41            return team
42    return None
43
44
45def escape_description(str):
46    # If the description of an issue/pull request is empty, the Github API
47    # library returns None instead of an empty string. Handle this here to
48    # avoid failures from trying to manipulate None.
49    if str is None:
50        return ""
51    # https://github.com/github/markup/issues/1168#issuecomment-494946168
52    str = html.escape(str, False)
53    # '@' followed by alphanum is a user name
54    str = re.sub("@(?=\w)", "@<!-- -->", str)
55    # '#' followed by digits is considered an issue number
56    str = re.sub("#(?=\d)", "#<!-- -->", str)
57    return str
58
59
60class IssueSubscriber:
61    @property
62    def team_name(self) -> str:
63        return self._team_name
64
65    def __init__(self, token: str, repo: str, issue_number: int, label_name: str):
66        self.repo = github.Github(token).get_repo(repo)
67        self.org = github.Github(token).get_organization(self.repo.organization.login)
68        self.issue = self.repo.get_issue(issue_number)
69        self._team_name = "issue-subscribers-{}".format(label_name).lower()
70
71    def run(self) -> bool:
72        team = _get_curent_team(self.team_name, self.org.get_teams())
73        if not team:
74            print(f"couldn't find team named {self.team_name}")
75            return False
76
77        comment = ""
78        if team.slug == "issue-subscribers-good-first-issue":
79            comment = "{}\n".format(beginner_comment)
80            self.issue.create_comment(comment)
81
82        body = escape_description(self.issue.body)
83        comment = f"""
84@llvm/{team.slug}
85
86Author: {self.issue.user.name} ({self.issue.user.login})
87
88<details>
89{body}
90</details>
91"""
92
93        self.issue.create_comment(comment)
94        return True
95
96
97def human_readable_size(size, decimal_places=2):
98    for unit in ["B", "KiB", "MiB", "GiB", "TiB", "PiB"]:
99        if size < 1024.0 or unit == "PiB":
100            break
101        size /= 1024.0
102    return f"{size:.{decimal_places}f} {unit}"
103
104
105class PRSubscriber:
106    @property
107    def team_name(self) -> str:
108        return self._team_name
109
110    def __init__(self, token: str, repo: str, pr_number: int, label_name: str):
111        self.repo = github.Github(token).get_repo(repo)
112        self.org = github.Github(token).get_organization(self.repo.organization.login)
113        self.pr = self.repo.get_issue(pr_number).as_pull_request()
114        self._team_name = "pr-subscribers-{}".format(
115            label_name.replace("+", "x")
116        ).lower()
117        self.COMMENT_TAG = "<!--LLVM PR SUMMARY COMMENT-->\n"
118
119    def get_summary_comment(self) -> github.IssueComment.IssueComment:
120        for comment in self.pr.as_issue().get_comments():
121            if self.COMMENT_TAG in comment.body:
122                return comment
123        return None
124
125    def run(self) -> bool:
126        patch = None
127        team = _get_curent_team(self.team_name, self.org.get_teams())
128        if not team:
129            print(f"couldn't find team named {self.team_name}")
130            return False
131
132        # GitHub limits comments to 65,536 characters, let's limit the diff
133        # and the file list to 20kB each.
134        STAT_LIMIT = 20 * 1024
135        DIFF_LIMIT = 20 * 1024
136
137        # Get statistics for each file
138        diff_stats = f"{self.pr.changed_files} Files Affected:\n\n"
139        for file in self.pr.get_files():
140            diff_stats += f"- ({file.status}) {file.filename} ("
141            if file.additions:
142                diff_stats += f"+{file.additions}"
143            if file.deletions:
144                diff_stats += f"-{file.deletions}"
145            diff_stats += ") "
146            if file.status == "renamed":
147                print(f"(from {file.previous_filename})")
148            diff_stats += "\n"
149            if len(diff_stats) > STAT_LIMIT:
150                break
151
152        # Get the diff
153        try:
154            patch = requests.get(self.pr.diff_url).text
155        except:
156            patch = ""
157
158        patch_link = f"Full diff: {self.pr.diff_url}\n"
159        if len(patch) > DIFF_LIMIT:
160            patch_link = f"\nPatch is {human_readable_size(len(patch))}, truncated to {human_readable_size(DIFF_LIMIT)} below, full version: {self.pr.diff_url}\n"
161            patch = patch[0:DIFF_LIMIT] + "...\n[truncated]\n"
162        team_mention = "@llvm/{}".format(team.slug)
163
164        body = escape_description(self.pr.body)
165        # Note: the comment is in markdown and the code below
166        # is sensible to line break
167        comment = f"""
168{self.COMMENT_TAG}
169{team_mention}
170
171Author: {self.pr.user.name} ({self.pr.user.login})
172
173<details>
174<summary>Changes</summary>
175
176{body}
177
178---
179{patch_link}
180
181{diff_stats}
182
183``````````diff
184{patch}
185``````````
186
187</details>
188"""
189
190        summary_comment = self.get_summary_comment()
191        if not summary_comment:
192            self.pr.as_issue().create_comment(comment)
193        elif team_mention + "\n" in summary_comment.body:
194            print("Team {} already mentioned.".format(team.slug))
195        else:
196            summary_comment.edit(
197                summary_comment.body.replace(
198                    self.COMMENT_TAG, self.COMMENT_TAG + team_mention + "\n"
199                )
200            )
201        return True
202
203    def _get_curent_team(self) -> Optional[github.Team.Team]:
204        for team in self.org.get_teams():
205            if self.team_name == team.name.lower():
206                return team
207        return None
208
209
210class PRGreeter:
211    def __init__(self, token: str, repo: str, pr_number: int):
212        repo = github.Github(token).get_repo(repo)
213        self.pr = repo.get_issue(pr_number).as_pull_request()
214
215    def run(self) -> bool:
216        # We assume that this is only called for a PR that has just been opened
217        # by a user new to LLVM and/or GitHub itself.
218
219        # This text is using Markdown formatting.
220        comment = f"""\
221Thank you for submitting a Pull Request (PR) to the LLVM Project!
222
223This PR will be automatically labeled and the relevant teams will be
224notified.
225
226If you wish to, you can add reviewers by using the "Reviewers" section on this page.
227
228If this is not working for you, it is probably because you do not have write
229permissions for the repository. In which case you can instead tag reviewers by
230name in a comment by using `@` followed by their GitHub username.
231
232If you have received no comments on your PR for a week, you can request a review
233by "ping"ing the PR by adding a comment “Ping”. The common courtesy "ping" rate
234is once a week. Please remember that you are asking for valuable time from other developers.
235
236If you have further questions, they may be answered by the [LLVM GitHub User Guide](https://llvm.org/docs/GitHub.html).
237
238You can also ask questions in a comment on this PR, on the [LLVM Discord](https://discord.com/invite/xS7Z362) or on the [forums](https://discourse.llvm.org/)."""
239        self.pr.as_issue().create_comment(comment)
240        return True
241
242
243def setup_llvmbot_git(git_dir="."):
244    """
245    Configure the git repo in `git_dir` with the llvmbot account so
246    commits are attributed to llvmbot.
247    """
248    repo = Repo(git_dir)
249    with repo.config_writer() as config:
250        config.set_value("user", "name", "llvmbot")
251        config.set_value("user", "email", "llvmbot@llvm.org")
252
253
254def extract_commit_hash(arg: str):
255    """
256    Extract the commit hash from the argument passed to /action github
257    comment actions. We currently only support passing the commit hash
258    directly or use the github URL, such as
259    https://github.com/llvm/llvm-project/commit/2832d7941f4207f1fcf813b27cf08cecc3086959
260    """
261    github_prefix = "https://github.com/llvm/llvm-project/commit/"
262    if arg.startswith(github_prefix):
263        return arg[len(github_prefix) :]
264    return arg
265
266
267class ReleaseWorkflow:
268    CHERRY_PICK_FAILED_LABEL = "release:cherry-pick-failed"
269
270    """
271    This class implements the sub-commands for the release-workflow command.
272    The current sub-commands are:
273        * create-branch
274        * create-pull-request
275
276    The execute_command method will automatically choose the correct sub-command
277    based on the text in stdin.
278    """
279
280    def __init__(
281        self,
282        token: str,
283        repo: str,
284        issue_number: int,
285        branch_repo_name: str,
286        branch_repo_token: str,
287        llvm_project_dir: str,
288    ) -> None:
289        self._token = token
290        self._repo_name = repo
291        self._issue_number = issue_number
292        self._branch_repo_name = branch_repo_name
293        if branch_repo_token:
294            self._branch_repo_token = branch_repo_token
295        else:
296            self._branch_repo_token = self.token
297        self._llvm_project_dir = llvm_project_dir
298
299    @property
300    def token(self) -> str:
301        return self._token
302
303    @property
304    def repo_name(self) -> str:
305        return self._repo_name
306
307    @property
308    def issue_number(self) -> int:
309        return self._issue_number
310
311    @property
312    def branch_repo_name(self) -> str:
313        return self._branch_repo_name
314
315    @property
316    def branch_repo_token(self) -> str:
317        return self._branch_repo_token
318
319    @property
320    def llvm_project_dir(self) -> str:
321        return self._llvm_project_dir
322
323    @property
324    def repo(self) -> github.Repository.Repository:
325        return github.Github(self.token).get_repo(self.repo_name)
326
327    @property
328    def issue(self) -> github.Issue.Issue:
329        return self.repo.get_issue(self.issue_number)
330
331    @property
332    def push_url(self) -> str:
333        return "https://{}@github.com/{}".format(
334            self.branch_repo_token, self.branch_repo_name
335        )
336
337    @property
338    def branch_name(self) -> str:
339        return "issue{}".format(self.issue_number)
340
341    @property
342    def release_branch_for_issue(self) -> Optional[str]:
343        issue = self.issue
344        milestone = issue.milestone
345        if milestone is None:
346            return None
347        m = re.search("branch: (.+)", milestone.description)
348        if m:
349            return m.group(1)
350        return None
351
352    def print_release_branch(self) -> None:
353        print(self.release_branch_for_issue)
354
355    def issue_notify_branch(self) -> None:
356        self.issue.create_comment(
357            "/branch {}/{}".format(self.branch_repo_name, self.branch_name)
358        )
359
360    def issue_notify_pull_request(self, pull: github.PullRequest.PullRequest) -> None:
361        self.issue.create_comment(
362            "/pull-request {}#{}".format(self.repo_name, pull.number)
363        )
364
365    def make_ignore_comment(self, comment: str) -> str:
366        """
367        Returns the comment string with a prefix that will cause
368        a Github workflow to skip parsing this comment.
369
370        :param str comment: The comment to ignore
371        """
372        return "<!--IGNORE-->\n" + comment
373
374    def issue_notify_no_milestone(self, comment: List[str]) -> None:
375        message = "{}\n\nError: Command failed due to missing milestone.".format(
376            "".join([">" + line for line in comment])
377        )
378        self.issue.create_comment(self.make_ignore_comment(message))
379
380    @property
381    def action_url(self) -> str:
382        if os.getenv("CI"):
383            return "https://github.com/{}/actions/runs/{}".format(
384                os.getenv("GITHUB_REPOSITORY"), os.getenv("GITHUB_RUN_ID")
385            )
386        return ""
387
388    def issue_notify_cherry_pick_failure(
389        self, commit: str
390    ) -> github.IssueComment.IssueComment:
391        message = self.make_ignore_comment(
392            "Failed to cherry-pick: {}\n\n".format(commit)
393        )
394        action_url = self.action_url
395        if action_url:
396            message += action_url + "\n\n"
397        message += "Please manually backport the fix and push it to your github fork.  Once this is done, please add a comment like this:\n\n`/branch <user>/<repo>/<branch>`"
398        issue = self.issue
399        comment = issue.create_comment(message)
400        issue.add_to_labels(self.CHERRY_PICK_FAILED_LABEL)
401        return comment
402
403    def issue_notify_pull_request_failure(
404        self, branch: str
405    ) -> github.IssueComment.IssueComment:
406        message = "Failed to create pull request for {} ".format(branch)
407        message += self.action_url
408        return self.issue.create_comment(message)
409
410    def issue_remove_cherry_pick_failed_label(self):
411        if self.CHERRY_PICK_FAILED_LABEL in [l.name for l in self.issue.labels]:
412            self.issue.remove_from_labels(self.CHERRY_PICK_FAILED_LABEL)
413
414    def get_main_commit(self, cherry_pick_sha: str) -> github.Commit.Commit:
415        commit = self.repo.get_commit(cherry_pick_sha)
416        message = commit.commit.message
417        m = re.search("\(cherry picked from commit ([0-9a-f]+)\)", message)
418        if not m:
419            return None
420        return self.repo.get_commit(m.group(1))
421
422    def pr_request_review(self, pr: github.PullRequest.PullRequest):
423        """
424        This function will try to find the best reviewers for `commits` and
425        then add a comment requesting review of the backport and add them as
426        reviewers.
427
428        The reviewers selected are those users who approved the pull request
429        for the main branch.
430        """
431        reviewers = []
432        for commit in pr.get_commits():
433            main_commit = self.get_main_commit(commit.sha)
434            if not main_commit:
435                continue
436            for pull in main_commit.get_pulls():
437                for review in pull.get_reviews():
438                    if review.state != "APPROVED":
439                        continue
440                reviewers.append(review.user.login)
441        if len(reviewers):
442            message = "{} What do you think about merging this PR to the release branch?".format(
443                " ".join(["@" + r for r in reviewers])
444            )
445            pr.create_issue_comment(message)
446            pr.create_review_request(reviewers)
447
448    def create_branch(self, commits: List[str]) -> bool:
449        """
450        This function attempts to backport `commits` into the branch associated
451        with `self.issue_number`.
452
453        If this is successful, then the branch is pushed to `self.branch_repo_name`, if not,
454        a comment is added to the issue saying that the cherry-pick failed.
455
456        :param list commits: List of commits to cherry-pick.
457
458        """
459        print("cherry-picking", commits)
460        branch_name = self.branch_name
461        local_repo = Repo(self.llvm_project_dir)
462        local_repo.git.checkout(self.release_branch_for_issue)
463
464        for c in commits:
465            try:
466                local_repo.git.cherry_pick("-x", c)
467            except Exception as e:
468                self.issue_notify_cherry_pick_failure(c)
469                raise e
470
471        push_url = self.push_url
472        print("Pushing to {} {}".format(push_url, branch_name))
473        local_repo.git.push(push_url, "HEAD:{}".format(branch_name), force=True)
474
475        self.issue_notify_branch()
476        self.issue_remove_cherry_pick_failed_label()
477        return True
478
479    def check_if_pull_request_exists(
480        self, repo: github.Repository.Repository, head: str
481    ) -> bool:
482        pulls = repo.get_pulls(head=head)
483        return pulls.totalCount != 0
484
485    def create_pull_request(self, owner: str, repo_name: str, branch: str) -> bool:
486        """
487        Create a pull request in `self.repo_name`.  The base branch of the
488        pull request will be chosen based on the the milestone attached to
489        the issue represented by `self.issue_number`  For example if the milestone
490        is Release 13.0.1, then the base branch will be release/13.x. `branch`
491        will be used as the compare branch.
492        https://docs.github.com/en/get-started/quickstart/github-glossary#base-branch
493        https://docs.github.com/en/get-started/quickstart/github-glossary#compare-branch
494        """
495        repo = github.Github(self.token).get_repo(self.repo_name)
496        issue_ref = "{}#{}".format(self.repo_name, self.issue_number)
497        pull = None
498        release_branch_for_issue = self.release_branch_for_issue
499        if release_branch_for_issue is None:
500            return False
501
502        head = f"{owner}:{branch}"
503        if self.check_if_pull_request_exists(repo, head):
504            print("PR already exists...")
505            return True
506        try:
507            pull = repo.create_pull(
508                title=f"PR for {issue_ref}",
509                body="resolves {}".format(issue_ref),
510                base=release_branch_for_issue,
511                head=head,
512                maintainer_can_modify=False,
513            )
514
515            pull.as_issue().edit(milestone=self.issue.milestone)
516
517            try:
518                self.pr_request_review(pull)
519            except Exception as e:
520                print("error: Failed while searching for reviewers", e)
521
522        except Exception as e:
523            self.issue_notify_pull_request_failure(branch)
524            raise e
525
526        if pull is None:
527            return False
528
529        self.issue_notify_pull_request(pull)
530        self.issue_remove_cherry_pick_failed_label()
531
532        # TODO(tstellar): Do you really want to always return True?
533        return True
534
535    def execute_command(self) -> bool:
536        """
537        This function reads lines from STDIN and executes the first command
538        that it finds.  The 2 supported commands are:
539        /cherry-pick commit0 <commit1> <commit2> <...>
540        /branch <owner>/<repo>/<branch>
541        """
542        for line in sys.stdin:
543            line.rstrip()
544            m = re.search(r"/([a-z-]+)\s(.+)", line)
545            if not m:
546                continue
547            command = m.group(1)
548            args = m.group(2)
549
550            if command == "cherry-pick":
551                arg_list = args.split()
552                commits = list(map(lambda a: extract_commit_hash(a), arg_list))
553                return self.create_branch(commits)
554
555            if command == "branch":
556                m = re.match("([^/]+)/([^/]+)/(.+)", args)
557                if m:
558                    owner = m.group(1)
559                    repo = m.group(2)
560                    branch = m.group(3)
561                    return self.create_pull_request(owner, repo, branch)
562
563        print("Do not understand input:")
564        print(sys.stdin.readlines())
565        return False
566
567
568parser = argparse.ArgumentParser()
569parser.add_argument(
570    "--token", type=str, required=True, help="GitHub authentiation token"
571)
572parser.add_argument(
573    "--repo",
574    type=str,
575    default=os.getenv("GITHUB_REPOSITORY", "llvm/llvm-project"),
576    help="The GitHub repository that we are working with in the form of <owner>/<repo> (e.g. llvm/llvm-project)",
577)
578subparsers = parser.add_subparsers(dest="command")
579
580issue_subscriber_parser = subparsers.add_parser("issue-subscriber")
581issue_subscriber_parser.add_argument("--label-name", type=str, required=True)
582issue_subscriber_parser.add_argument("--issue-number", type=int, required=True)
583
584pr_subscriber_parser = subparsers.add_parser("pr-subscriber")
585pr_subscriber_parser.add_argument("--label-name", type=str, required=True)
586pr_subscriber_parser.add_argument("--issue-number", type=int, required=True)
587
588pr_greeter_parser = subparsers.add_parser("pr-greeter")
589pr_greeter_parser.add_argument("--issue-number", type=int, required=True)
590
591release_workflow_parser = subparsers.add_parser("release-workflow")
592release_workflow_parser.add_argument(
593    "--llvm-project-dir",
594    type=str,
595    default=".",
596    help="directory containing the llvm-project checout",
597)
598release_workflow_parser.add_argument(
599    "--issue-number", type=int, required=True, help="The issue number to update"
600)
601release_workflow_parser.add_argument(
602    "--branch-repo-token",
603    type=str,
604    help="GitHub authentication token to use for the repository where new branches will be pushed. Defaults to TOKEN.",
605)
606release_workflow_parser.add_argument(
607    "--branch-repo",
608    type=str,
609    default="llvmbot/llvm-project",
610    help="The name of the repo where new branches will be pushed (e.g. llvm/llvm-project)",
611)
612release_workflow_parser.add_argument(
613    "sub_command",
614    type=str,
615    choices=["print-release-branch", "auto"],
616    help="Print to stdout the name of the release branch ISSUE_NUMBER should be backported to",
617)
618
619llvmbot_git_config_parser = subparsers.add_parser(
620    "setup-llvmbot-git",
621    help="Set the default user and email for the git repo in LLVM_PROJECT_DIR to llvmbot",
622)
623
624args = parser.parse_args()
625
626if args.command == "issue-subscriber":
627    issue_subscriber = IssueSubscriber(
628        args.token, args.repo, args.issue_number, args.label_name
629    )
630    issue_subscriber.run()
631elif args.command == "pr-subscriber":
632    pr_subscriber = PRSubscriber(
633        args.token, args.repo, args.issue_number, args.label_name
634    )
635    pr_subscriber.run()
636elif args.command == "pr-greeter":
637    pr_greeter = PRGreeter(args.token, args.repo, args.issue_number)
638    pr_greeter.run()
639elif args.command == "release-workflow":
640    release_workflow = ReleaseWorkflow(
641        args.token,
642        args.repo,
643        args.issue_number,
644        args.branch_repo,
645        args.branch_repo_token,
646        args.llvm_project_dir,
647    )
648    if not release_workflow.release_branch_for_issue:
649        release_workflow.issue_notify_no_milestone(sys.stdin.readlines())
650        sys.exit(1)
651    if args.sub_command == "print-release-branch":
652        release_workflow.print_release_branch()
653    else:
654        if not release_workflow.execute_command():
655            sys.exit(1)
656elif args.command == "setup-llvmbot-git":
657    setup_llvmbot_git()
658