xref: /llvm-project/llvm/utils/git/github-automation.py (revision 416b079336c6d6e48858f951cd494a7a3577deb8)
1#!/usr/bin/env python3
2#
3# ======- github-automation - LLVM GitHub Automation Routines--*- python -*--==#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9# ==-------------------------------------------------------------------------==#
10
11import argparse
12from git import Repo  # type: ignore
13import html
14import github
15import os
16import re
17import requests
18import sys
19import time
20from typing import List, Optional
21
22beginner_comment = """
23Hi!
24
25This issue may be a good introductory issue for people new to working on LLVM. If you would like to work on this issue, your first steps are:
26
271. In the comments of the issue, request for it to be assigned to you.
282. Fix the issue locally.
293. [Run the test suite](https://llvm.org/docs/TestingGuide.html#unit-and-regression-tests) locally. Remember that the subdirectories under `test/` create fine-grained testing targets, so you can e.g. use `make check-clang-ast` to only run Clang's AST tests.
304. Create a Git commit.
315. Run [`git clang-format HEAD~1`](https://clang.llvm.org/docs/ClangFormat.html#git-integration) to format your changes.
326. Open a [pull request](https://github.com/llvm/llvm-project/pulls) to the [upstream repository](https://github.com/llvm/llvm-project) on GitHub. Detailed instructions can be found [in GitHub's documentation](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request).
33
34If you have any further questions about this issue, don't hesitate to ask via a comment in the thread below.
35"""
36
37
38def _get_curent_team(team_name, teams) -> Optional[github.Team.Team]:
39    for team in teams:
40        if team_name == team.name.lower():
41            return team
42    return None
43
44
45def escape_description(str):
46    # If the description of an issue/pull request is empty, the Github API
47    # library returns None instead of an empty string. Handle this here to
48    # avoid failures from trying to manipulate None.
49    if str is None:
50        return ""
51    # https://github.com/github/markup/issues/1168#issuecomment-494946168
52    str = html.escape(str, False)
53    # '@' followed by alphanum is a user name
54    str = re.sub("@(?=\w)", "@<!-- -->", str)
55    # '#' followed by digits is considered an issue number
56    str = re.sub("#(?=\d)", "#<!-- -->", str)
57    return str
58
59
60class IssueSubscriber:
61    @property
62    def team_name(self) -> str:
63        return self._team_name
64
65    def __init__(self, token: str, repo: str, issue_number: int, label_name: str):
66        self.repo = github.Github(token).get_repo(repo)
67        self.org = github.Github(token).get_organization(self.repo.organization.login)
68        self.issue = self.repo.get_issue(issue_number)
69        self._team_name = "issue-subscribers-{}".format(label_name).lower()
70
71    def run(self) -> bool:
72        team = _get_curent_team(self.team_name, self.org.get_teams())
73        if not team:
74            print(f"couldn't find team named {self.team_name}")
75            return False
76
77        comment = ""
78        if team.slug == "issue-subscribers-good-first-issue":
79            comment = "{}\n".format(beginner_comment)
80            self.issue.create_comment(comment)
81
82        body = escape_description(self.issue.body)
83        comment = f"""
84@llvm/{team.slug}
85
86Author: {self.issue.user.name} ({self.issue.user.login})
87
88<details>
89{body}
90</details>
91"""
92
93        self.issue.create_comment(comment)
94        return True
95
96
97def human_readable_size(size, decimal_places=2):
98    for unit in ["B", "KiB", "MiB", "GiB", "TiB", "PiB"]:
99        if size < 1024.0 or unit == "PiB":
100            break
101        size /= 1024.0
102    return f"{size:.{decimal_places}f} {unit}"
103
104
105class PRSubscriber:
106    @property
107    def team_name(self) -> str:
108        return self._team_name
109
110    def __init__(self, token: str, repo: str, pr_number: int, label_name: str):
111        self.repo = github.Github(token).get_repo(repo)
112        self.org = github.Github(token).get_organization(self.repo.organization.login)
113        self.pr = self.repo.get_issue(pr_number).as_pull_request()
114        self._team_name = "pr-subscribers-{}".format(
115            label_name.replace("+", "x")
116        ).lower()
117        self.COMMENT_TAG = "<!--LLVM PR SUMMARY COMMENT-->\n"
118
119    def get_summary_comment(self) -> github.IssueComment.IssueComment:
120        for comment in self.pr.as_issue().get_comments():
121            if self.COMMENT_TAG in comment.body:
122                return comment
123        return None
124
125    def run(self) -> bool:
126        patch = None
127        team = _get_curent_team(self.team_name, self.org.get_teams())
128        if not team:
129            print(f"couldn't find team named {self.team_name}")
130            return False
131
132        # GitHub limits comments to 65,536 characters, let's limit the diff
133        # and the file list to 20kB each.
134        STAT_LIMIT = 20 * 1024
135        DIFF_LIMIT = 20 * 1024
136
137        # Get statistics for each file
138        diff_stats = f"{self.pr.changed_files} Files Affected:\n\n"
139        for file in self.pr.get_files():
140            diff_stats += f"- ({file.status}) {file.filename} ("
141            if file.additions:
142                diff_stats += f"+{file.additions}"
143            if file.deletions:
144                diff_stats += f"-{file.deletions}"
145            diff_stats += ") "
146            if file.status == "renamed":
147                print(f"(from {file.previous_filename})")
148            diff_stats += "\n"
149            if len(diff_stats) > STAT_LIMIT:
150                break
151
152        # Get the diff
153        try:
154            patch = requests.get(self.pr.diff_url).text
155        except:
156            patch = ""
157
158        patch_link = f"Full diff: {self.pr.diff_url}\n"
159        if len(patch) > DIFF_LIMIT:
160            patch_link = f"\nPatch is {human_readable_size(len(patch))}, truncated to {human_readable_size(DIFF_LIMIT)} below, full version: {self.pr.diff_url}\n"
161            patch = patch[0:DIFF_LIMIT] + "...\n[truncated]\n"
162        team_mention = "@llvm/{}".format(team.slug)
163
164        body = escape_description(self.pr.body)
165        # Note: the comment is in markdown and the code below
166        # is sensible to line break
167        comment = f"""
168{self.COMMENT_TAG}
169{team_mention}
170
171Author: {self.pr.user.name} ({self.pr.user.login})
172
173<details>
174<summary>Changes</summary>
175
176{body}
177
178---
179{patch_link}
180
181{diff_stats}
182
183``````````diff
184{patch}
185``````````
186
187</details>
188"""
189
190        summary_comment = self.get_summary_comment()
191        if not summary_comment:
192            self.pr.as_issue().create_comment(comment)
193        elif team_mention + "\n" in summary_comment.body:
194            print("Team {} already mentioned.".format(team.slug))
195        else:
196            summary_comment.edit(
197                summary_comment.body.replace(
198                    self.COMMENT_TAG, self.COMMENT_TAG + team_mention + "\n"
199                )
200            )
201        return True
202
203    def _get_curent_team(self) -> Optional[github.Team.Team]:
204        for team in self.org.get_teams():
205            if self.team_name == team.name.lower():
206                return team
207        return None
208
209
210class PRGreeter:
211    def __init__(self, token: str, repo: str, pr_number: int):
212        repo = github.Github(token).get_repo(repo)
213        self.pr = repo.get_issue(pr_number).as_pull_request()
214
215    def run(self) -> bool:
216        # We assume that this is only called for a PR that has just been opened
217        # by a user new to LLVM and/or GitHub itself.
218
219        # This text is using Markdown formatting.
220        comment = f"""\
221Thank you for submitting a Pull Request (PR) to the LLVM Project!
222
223This PR will be automatically labeled and the relevant teams will be
224notified.
225
226If you wish to, you can add reviewers by using the "Reviewers" section on this page.
227
228If this is not working for you, it is probably because you do not have write
229permissions for the repository. In which case you can instead tag reviewers by
230name in a comment by using `@` followed by their GitHub username.
231
232If you have received no comments on your PR for a week, you can request a review
233by "ping"ing the PR by adding a comment “Ping”. The common courtesy "ping" rate
234is once a week. Please remember that you are asking for valuable time from other developers.
235
236If you have further questions, they may be answered by the [LLVM GitHub User Guide](https://llvm.org/docs/GitHub.html).
237
238You can also ask questions in a comment on this PR, on the [LLVM Discord](https://discord.com/invite/xS7Z362) or on the [forums](https://discourse.llvm.org/)."""
239        self.pr.as_issue().create_comment(comment)
240        return True
241
242
243def setup_llvmbot_git(git_dir="."):
244    """
245    Configure the git repo in `git_dir` with the llvmbot account so
246    commits are attributed to llvmbot.
247    """
248    repo = Repo(git_dir)
249    with repo.config_writer() as config:
250        config.set_value("user", "name", "llvmbot")
251        config.set_value("user", "email", "llvmbot@llvm.org")
252
253
254def extract_commit_hash(arg: str):
255    """
256    Extract the commit hash from the argument passed to /action github
257    comment actions. We currently only support passing the commit hash
258    directly or use the github URL, such as
259    https://github.com/llvm/llvm-project/commit/2832d7941f4207f1fcf813b27cf08cecc3086959
260    """
261    github_prefix = "https://github.com/llvm/llvm-project/commit/"
262    if arg.startswith(github_prefix):
263        return arg[len(github_prefix) :]
264    return arg
265
266
267class ReleaseWorkflow:
268    CHERRY_PICK_FAILED_LABEL = "release:cherry-pick-failed"
269
270    """
271    This class implements the sub-commands for the release-workflow command.
272    The current sub-commands are:
273        * create-branch
274        * create-pull-request
275
276    The execute_command method will automatically choose the correct sub-command
277    based on the text in stdin.
278    """
279
280    def __init__(
281        self,
282        token: str,
283        repo: str,
284        issue_number: int,
285        branch_repo_name: str,
286        branch_repo_token: str,
287        llvm_project_dir: str,
288    ) -> None:
289        self._token = token
290        self._repo_name = repo
291        self._issue_number = issue_number
292        self._branch_repo_name = branch_repo_name
293        if branch_repo_token:
294            self._branch_repo_token = branch_repo_token
295        else:
296            self._branch_repo_token = self.token
297        self._llvm_project_dir = llvm_project_dir
298
299    @property
300    def token(self) -> str:
301        return self._token
302
303    @property
304    def repo_name(self) -> str:
305        return self._repo_name
306
307    @property
308    def issue_number(self) -> int:
309        return self._issue_number
310
311    @property
312    def branch_repo_name(self) -> str:
313        return self._branch_repo_name
314
315    @property
316    def branch_repo_token(self) -> str:
317        return self._branch_repo_token
318
319    @property
320    def llvm_project_dir(self) -> str:
321        return self._llvm_project_dir
322
323    @property
324    def repo(self) -> github.Repository.Repository:
325        return github.Github(self.token).get_repo(self.repo_name)
326
327    @property
328    def issue(self) -> github.Issue.Issue:
329        return self.repo.get_issue(self.issue_number)
330
331    @property
332    def push_url(self) -> str:
333        return "https://{}@github.com/{}".format(
334            self.branch_repo_token, self.branch_repo_name
335        )
336
337    @property
338    def branch_name(self) -> str:
339        return "issue{}".format(self.issue_number)
340
341    @property
342    def release_branch_for_issue(self) -> Optional[str]:
343        issue = self.issue
344        milestone = issue.milestone
345        if milestone is None:
346            return None
347        m = re.search("branch: (.+)", milestone.description)
348        if m:
349            return m.group(1)
350        return None
351
352    def print_release_branch(self) -> None:
353        print(self.release_branch_for_issue)
354
355    def issue_notify_branch(self) -> None:
356        self.issue.create_comment(
357            "/branch {}/{}".format(self.branch_repo_name, self.branch_name)
358        )
359
360    def issue_notify_pull_request(self, pull: github.PullRequest.PullRequest) -> None:
361        self.issue.create_comment(
362            "/pull-request {}#{}".format(self.repo_name, pull.number)
363        )
364
365    def make_ignore_comment(self, comment: str) -> str:
366        """
367        Returns the comment string with a prefix that will cause
368        a Github workflow to skip parsing this comment.
369
370        :param str comment: The comment to ignore
371        """
372        return "<!--IGNORE-->\n" + comment
373
374    def issue_notify_no_milestone(self, comment: List[str]) -> None:
375        message = "{}\n\nError: Command failed due to missing milestone.".format(
376            "".join([">" + line for line in comment])
377        )
378        self.issue.create_comment(self.make_ignore_comment(message))
379
380    @property
381    def action_url(self) -> str:
382        if os.getenv("CI"):
383            return "https://github.com/{}/actions/runs/{}".format(
384                os.getenv("GITHUB_REPOSITORY"), os.getenv("GITHUB_RUN_ID")
385            )
386        return ""
387
388    def issue_notify_cherry_pick_failure(
389        self, commit: str
390    ) -> github.IssueComment.IssueComment:
391        message = self.make_ignore_comment(
392            "Failed to cherry-pick: {}\n\n".format(commit)
393        )
394        action_url = self.action_url
395        if action_url:
396            message += action_url + "\n\n"
397        message += "Please manually backport the fix and push it to your github fork.  Once this is done, please add a comment like this:\n\n`/branch <user>/<repo>/<branch>`"
398        issue = self.issue
399        comment = issue.create_comment(message)
400        issue.add_to_labels(self.CHERRY_PICK_FAILED_LABEL)
401        return comment
402
403    def issue_notify_pull_request_failure(
404        self, branch: str
405    ) -> github.IssueComment.IssueComment:
406        message = "Failed to create pull request for {} ".format(branch)
407        message += self.action_url
408        return self.issue.create_comment(message)
409
410    def issue_remove_cherry_pick_failed_label(self):
411        if self.CHERRY_PICK_FAILED_LABEL in [l.name for l in self.issue.labels]:
412            self.issue.remove_from_labels(self.CHERRY_PICK_FAILED_LABEL)
413
414    def get_main_commit(self, cherry_pick_sha: str) -> github.Commit.Commit:
415        commit = self.repo.get_commit(cherry_pick_sha)
416        message = commit.commit.message
417        m = re.search("\(cherry picked from commit ([0-9a-f]+)\)", message)
418        if not m:
419            return None
420        return self.repo.get_commit(m.group(1))
421
422    def pr_request_review(self, pr: github.PullRequest.PullRequest):
423        """
424        This function will try to find the best reviewers for `commits` and
425        then add a comment requesting review of the backport and add them as
426        reviewers.
427
428        The reviewers selected are those users who approved the pull request
429        for the main branch.
430        """
431        reviewers = []
432        for commit in pr.get_commits():
433            main_commit = self.get_main_commit(commit.sha)
434            if not main_commit:
435                continue
436            for pull in main_commit.get_pulls():
437                for review in pull.get_reviews():
438                    if review.state != "APPROVED":
439                        continue
440                reviewers.append(review.user.login)
441        if len(reviewers):
442            message = "{} What do you think about merging this PR to the release branch?".format(
443                " ".join(["@" + r for r in reviewers])
444            )
445            pr.create_issue_comment(message)
446            pr.create_review_request(reviewers)
447
448    def create_branch(self, commits: List[str]) -> bool:
449        """
450        This function attempts to backport `commits` into the branch associated
451        with `self.issue_number`.
452
453        If this is successful, then the branch is pushed to `self.branch_repo_name`, if not,
454        a comment is added to the issue saying that the cherry-pick failed.
455
456        :param list commits: List of commits to cherry-pick.
457
458        """
459        print("cherry-picking", commits)
460        branch_name = self.branch_name
461        local_repo = Repo(self.llvm_project_dir)
462        local_repo.git.checkout(self.release_branch_for_issue)
463
464        for c in commits:
465            try:
466                local_repo.git.cherry_pick("-x", c)
467            except Exception as e:
468                self.issue_notify_cherry_pick_failure(c)
469                raise e
470
471        push_url = self.push_url
472        print("Pushing to {} {}".format(push_url, branch_name))
473        local_repo.git.push(push_url, "HEAD:{}".format(branch_name), force=True)
474
475        self.issue_notify_branch()
476        self.issue_remove_cherry_pick_failed_label()
477        return True
478
479    def check_if_pull_request_exists(
480        self, repo: github.Repository.Repository, head: str
481    ) -> bool:
482        pulls = repo.get_pulls(head=head)
483        return pulls.totalCount != 0
484
485    def create_pull_request(self, owner: str, repo_name: str, branch: str) -> bool:
486        """
487        Create a pull request in `self.repo_name`.  The base branch of the
488        pull request will be chosen based on the the milestone attached to
489        the issue represented by `self.issue_number`  For example if the milestone
490        is Release 13.0.1, then the base branch will be release/13.x. `branch`
491        will be used as the compare branch.
492        https://docs.github.com/en/get-started/quickstart/github-glossary#base-branch
493        https://docs.github.com/en/get-started/quickstart/github-glossary#compare-branch
494        """
495        repo = github.Github(self.token).get_repo(self.repo_name)
496        issue_ref = "{}#{}".format(self.repo_name, self.issue_number)
497        pull = None
498        release_branch_for_issue = self.release_branch_for_issue
499        if release_branch_for_issue is None:
500            return False
501        head_branch = branch
502        if not repo.fork:
503            # If the target repo is not a fork of llvm-project, we need to copy
504            # the branch into the target repo.  GitHub only supports cross-repo pull
505            # requests on forked repos.
506            head_branch = f"{owner}-{branch}"
507            local_repo = Repo(self.llvm_project_dir)
508            push_done = False
509            for _ in range(0, 5):
510                try:
511                    local_repo.git.fetch(
512                        f"https://github.com/{owner}/{repo_name}", f"{branch}:{branch}"
513                    )
514                    local_repo.git.push(
515                        self.push_url, f"{branch}:{head_branch}", force=True
516                    )
517                    push_done = True
518                    break
519                except Exception as e:
520                    print(e)
521                    time.sleep(30)
522                    continue
523            if not push_done:
524                raise Exception("Failed to mirror branch into {}".format(self.push_url))
525            owner = repo.owner.login
526
527        head = f"{owner}:{head_branch}"
528        if self.check_if_pull_request_exists(repo, head):
529            print("PR already exists...")
530            return True
531        try:
532            pull = repo.create_pull(
533                title=f"PR for {issue_ref}",
534                body="resolves {}".format(issue_ref),
535                base=release_branch_for_issue,
536                head=head,
537                maintainer_can_modify=False,
538            )
539
540            pull.as_issue().edit(milestone=self.issue.milestone)
541
542            try:
543                self.pr_request_review(pull)
544            except Exception as e:
545                print("error: Failed while searching for reviewers", e)
546
547        except Exception as e:
548            self.issue_notify_pull_request_failure(branch)
549            raise e
550
551        if pull is None:
552            return False
553
554        self.issue_notify_pull_request(pull)
555        self.issue_remove_cherry_pick_failed_label()
556
557        # TODO(tstellar): Do you really want to always return True?
558        return True
559
560    def execute_command(self) -> bool:
561        """
562        This function reads lines from STDIN and executes the first command
563        that it finds.  The 2 supported commands are:
564        /cherry-pick commit0 <commit1> <commit2> <...>
565        /branch <owner>/<repo>/<branch>
566        """
567        for line in sys.stdin:
568            line.rstrip()
569            m = re.search(r"/([a-z-]+)\s(.+)", line)
570            if not m:
571                continue
572            command = m.group(1)
573            args = m.group(2)
574
575            if command == "cherry-pick":
576                arg_list = args.split()
577                commits = list(map(lambda a: extract_commit_hash(a), arg_list))
578                return self.create_branch(commits)
579
580            if command == "branch":
581                m = re.match("([^/]+)/([^/]+)/(.+)", args)
582                if m:
583                    owner = m.group(1)
584                    repo = m.group(2)
585                    branch = m.group(3)
586                    return self.create_pull_request(owner, repo, branch)
587
588        print("Do not understand input:")
589        print(sys.stdin.readlines())
590        return False
591
592
593parser = argparse.ArgumentParser()
594parser.add_argument(
595    "--token", type=str, required=True, help="GitHub authentiation token"
596)
597parser.add_argument(
598    "--repo",
599    type=str,
600    default=os.getenv("GITHUB_REPOSITORY", "llvm/llvm-project"),
601    help="The GitHub repository that we are working with in the form of <owner>/<repo> (e.g. llvm/llvm-project)",
602)
603subparsers = parser.add_subparsers(dest="command")
604
605issue_subscriber_parser = subparsers.add_parser("issue-subscriber")
606issue_subscriber_parser.add_argument("--label-name", type=str, required=True)
607issue_subscriber_parser.add_argument("--issue-number", type=int, required=True)
608
609pr_subscriber_parser = subparsers.add_parser("pr-subscriber")
610pr_subscriber_parser.add_argument("--label-name", type=str, required=True)
611pr_subscriber_parser.add_argument("--issue-number", type=int, required=True)
612
613pr_greeter_parser = subparsers.add_parser("pr-greeter")
614pr_greeter_parser.add_argument("--issue-number", type=int, required=True)
615
616release_workflow_parser = subparsers.add_parser("release-workflow")
617release_workflow_parser.add_argument(
618    "--llvm-project-dir",
619    type=str,
620    default=".",
621    help="directory containing the llvm-project checout",
622)
623release_workflow_parser.add_argument(
624    "--issue-number", type=int, required=True, help="The issue number to update"
625)
626release_workflow_parser.add_argument(
627    "--branch-repo-token",
628    type=str,
629    help="GitHub authentication token to use for the repository where new branches will be pushed. Defaults to TOKEN.",
630)
631release_workflow_parser.add_argument(
632    "--branch-repo",
633    type=str,
634    default="llvmbot/llvm-project",
635    help="The name of the repo where new branches will be pushed (e.g. llvm/llvm-project)",
636)
637release_workflow_parser.add_argument(
638    "sub_command",
639    type=str,
640    choices=["print-release-branch", "auto"],
641    help="Print to stdout the name of the release branch ISSUE_NUMBER should be backported to",
642)
643
644llvmbot_git_config_parser = subparsers.add_parser(
645    "setup-llvmbot-git",
646    help="Set the default user and email for the git repo in LLVM_PROJECT_DIR to llvmbot",
647)
648
649args = parser.parse_args()
650
651if args.command == "issue-subscriber":
652    issue_subscriber = IssueSubscriber(
653        args.token, args.repo, args.issue_number, args.label_name
654    )
655    issue_subscriber.run()
656elif args.command == "pr-subscriber":
657    pr_subscriber = PRSubscriber(
658        args.token, args.repo, args.issue_number, args.label_name
659    )
660    pr_subscriber.run()
661elif args.command == "pr-greeter":
662    pr_greeter = PRGreeter(args.token, args.repo, args.issue_number)
663    pr_greeter.run()
664elif args.command == "release-workflow":
665    release_workflow = ReleaseWorkflow(
666        args.token,
667        args.repo,
668        args.issue_number,
669        args.branch_repo,
670        args.branch_repo_token,
671        args.llvm_project_dir,
672    )
673    if not release_workflow.release_branch_for_issue:
674        release_workflow.issue_notify_no_milestone(sys.stdin.readlines())
675        sys.exit(1)
676    if args.sub_command == "print-release-branch":
677        release_workflow.print_release_branch()
678    else:
679        if not release_workflow.execute_command():
680            sys.exit(1)
681elif args.command == "setup-llvmbot-git":
682    setup_llvmbot_git()
683