xref: /llvm-project/llvm/utils/git/github-automation.py (revision e99edf6bcb20169e153110426f840a2dfeeec66d)
1#!/usr/bin/env python3
2#
3# ======- github-automation - LLVM GitHub Automation Routines--*- python -*--==#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9# ==-------------------------------------------------------------------------==#
10
11import argparse
12from git import Repo  # type: ignore
13import html
14import github
15import os
16import re
17import requests
18import sys
19import time
20from typing import List, Optional
21
22beginner_comment = """
23Hi!
24
25This issue may be a good introductory issue for people new to working on LLVM. If you would like to work on this issue, your first steps are:
26
271. In the comments of the issue, request for it to be assigned to you.
282. Fix the issue locally.
293. [Run the test suite](https://llvm.org/docs/TestingGuide.html#unit-and-regression-tests) locally. Remember that the subdirectories under `test/` create fine-grained testing targets, so you can e.g. use `make check-clang-ast` to only run Clang's AST tests.
304. Create a Git commit.
315. Run [`git clang-format HEAD~1`](https://clang.llvm.org/docs/ClangFormat.html#git-integration) to format your changes.
326. Open a [pull request](https://github.com/llvm/llvm-project/pulls) to the [upstream repository](https://github.com/llvm/llvm-project) on GitHub. Detailed instructions can be found [in GitHub's documentation](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request).
33
34If you have any further questions about this issue, don't hesitate to ask via a comment in the thread below.
35"""
36
37
38def _get_curent_team(team_name, teams) -> Optional[github.Team.Team]:
39    for team in teams:
40        if team_name == team.name.lower():
41            return team
42    return None
43
44
45def escape_description(str):
46    # If the description of an issue/pull request is empty, the Github API
47    # library returns None instead of an empty string. Handle this here to
48    # avoid failures from trying to manipulate None.
49    if str is None:
50        return ""
51    # https://github.com/github/markup/issues/1168#issuecomment-494946168
52    str = html.escape(str, False)
53    # '@' followed by alphanum is a user name
54    str = re.sub("@(?=\w)", "@<!-- -->", str)
55    # '#' followed by digits is considered an issue number
56    str = re.sub("#(?=\d)", "#<!-- -->", str)
57    return str
58
59
60class IssueSubscriber:
61    @property
62    def team_name(self) -> str:
63        return self._team_name
64
65    def __init__(self, token: str, repo: str, issue_number: int, label_name: str):
66        self.repo = github.Github(token).get_repo(repo)
67        self.org = github.Github(token).get_organization(self.repo.organization.login)
68        self.issue = self.repo.get_issue(issue_number)
69        self._team_name = "issue-subscribers-{}".format(label_name).lower()
70
71    def run(self) -> bool:
72        team = _get_curent_team(self.team_name, self.org.get_teams())
73        if not team:
74            print(f"couldn't find team named {self.team_name}")
75            return False
76
77        comment = ""
78        if team.slug == "issue-subscribers-good-first-issue":
79            comment = "{}\n".format(beginner_comment)
80            self.issue.create_comment(comment)
81
82        body = escape_description(self.issue.body)
83        comment = f"""
84@llvm/{team.slug}
85
86Author: {self.issue.user.name} ({self.issue.user.login})
87
88<details>
89{body}
90</details>
91"""
92
93        self.issue.create_comment(comment)
94        return True
95
96
97def human_readable_size(size, decimal_places=2):
98    for unit in ["B", "KiB", "MiB", "GiB", "TiB", "PiB"]:
99        if size < 1024.0 or unit == "PiB":
100            break
101        size /= 1024.0
102    return f"{size:.{decimal_places}f} {unit}"
103
104
105class PRSubscriber:
106    @property
107    def team_name(self) -> str:
108        return self._team_name
109
110    def __init__(self, token: str, repo: str, pr_number: int, label_name: str):
111        self.repo = github.Github(token).get_repo(repo)
112        self.org = github.Github(token).get_organization(self.repo.organization.login)
113        self.pr = self.repo.get_issue(pr_number).as_pull_request()
114        self._team_name = "pr-subscribers-{}".format(
115            label_name.replace("+", "x")
116        ).lower()
117        self.COMMENT_TAG = "<!--LLVM PR SUMMARY COMMENT-->\n"
118
119    def get_summary_comment(self) -> github.IssueComment.IssueComment:
120        for comment in self.pr.as_issue().get_comments():
121            if self.COMMENT_TAG in comment.body:
122                return comment
123        return None
124
125    def run(self) -> bool:
126        patch = None
127        team = _get_curent_team(self.team_name, self.org.get_teams())
128        if not team:
129            print(f"couldn't find team named {self.team_name}")
130            return False
131
132        # GitHub limits comments to 65,536 characters, let's limit the diff
133        # and the file list to 20kB each.
134        STAT_LIMIT = 20 * 1024
135        DIFF_LIMIT = 20 * 1024
136
137        # Get statistics for each file
138        diff_stats = f"{self.pr.changed_files} Files Affected:\n\n"
139        for file in self.pr.get_files():
140            diff_stats += f"- ({file.status}) {file.filename} ("
141            if file.additions:
142                diff_stats += f"+{file.additions}"
143            if file.deletions:
144                diff_stats += f"-{file.deletions}"
145            diff_stats += ") "
146            if file.status == "renamed":
147                print(f"(from {file.previous_filename})")
148            diff_stats += "\n"
149            if len(diff_stats) > STAT_LIMIT:
150                break
151
152        # Get the diff
153        try:
154            patch = requests.get(self.pr.diff_url).text
155        except:
156            patch = ""
157
158        patch_link = f"Full diff: {self.pr.diff_url}\n"
159        if len(patch) > DIFF_LIMIT:
160            patch_link = f"\nPatch is {human_readable_size(len(patch))}, truncated to {human_readable_size(DIFF_LIMIT)} below, full version: {self.pr.diff_url}\n"
161            patch = patch[0:DIFF_LIMIT] + "...\n[truncated]\n"
162        team_mention = "@llvm/{}".format(team.slug)
163
164        body = escape_description(self.pr.body)
165        # Note: the comment is in markdown and the code below
166        # is sensible to line break
167        comment = f"""
168{self.COMMENT_TAG}
169{team_mention}
170
171Author: {self.pr.user.name} ({self.pr.user.login})
172
173<details>
174<summary>Changes</summary>
175
176{body}
177
178---
179{patch_link}
180
181{diff_stats}
182
183``````````diff
184{patch}
185``````````
186
187</details>
188"""
189
190        summary_comment = self.get_summary_comment()
191        if not summary_comment:
192            self.pr.as_issue().create_comment(comment)
193        elif team_mention + "\n" in summary_comment.body:
194            print("Team {} already mentioned.".format(team.slug))
195        else:
196            summary_comment.edit(
197                summary_comment.body.replace(
198                    self.COMMENT_TAG, self.COMMENT_TAG + team_mention + "\n"
199                )
200            )
201        return True
202
203    def _get_curent_team(self) -> Optional[github.Team.Team]:
204        for team in self.org.get_teams():
205            if self.team_name == team.name.lower():
206                return team
207        return None
208
209
210class PRGreeter:
211    def __init__(self, token: str, repo: str, pr_number: int):
212        repo = github.Github(token).get_repo(repo)
213        self.pr = repo.get_issue(pr_number).as_pull_request()
214
215    def run(self) -> bool:
216        # We assume that this is only called for a PR that has just been opened
217        # by a user new to LLVM and/or GitHub itself.
218
219        # This text is using Markdown formatting.
220        comment = f"""\
221Thank you for submitting a Pull Request (PR) to the LLVM Project!
222
223This PR will be automatically labeled and the relevant teams will be
224notified.
225
226If you wish to, you can add reviewers by using the "Reviewers" section on this page.
227
228If this is not working for you, it is probably because you do not have write
229permissions for the repository. In which case you can instead tag reviewers by
230name in a comment by using `@` followed by their GitHub username.
231
232If you have received no comments on your PR for a week, you can request a review
233by "ping"ing the PR by adding a comment “Ping”. The common courtesy "ping" rate
234is once a week. Please remember that you are asking for valuable time from other developers.
235
236If you have further questions, they may be answered by the [LLVM GitHub User Guide](https://llvm.org/docs/GitHub.html).
237
238You can also ask questions in a comment on this PR, on the [LLVM Discord](https://discord.com/invite/xS7Z362) or on the [forums](https://discourse.llvm.org/)."""
239        self.pr.as_issue().create_comment(comment)
240        return True
241
242
243def setup_llvmbot_git(git_dir="."):
244    """
245    Configure the git repo in `git_dir` with the llvmbot account so
246    commits are attributed to llvmbot.
247    """
248    repo = Repo(git_dir)
249    with repo.config_writer() as config:
250        config.set_value("user", "name", "llvmbot")
251        config.set_value("user", "email", "llvmbot@llvm.org")
252
253
254def extract_commit_hash(arg: str):
255    """
256    Extract the commit hash from the argument passed to /action github
257    comment actions. We currently only support passing the commit hash
258    directly or use the github URL, such as
259    https://github.com/llvm/llvm-project/commit/2832d7941f4207f1fcf813b27cf08cecc3086959
260    """
261    github_prefix = "https://github.com/llvm/llvm-project/commit/"
262    if arg.startswith(github_prefix):
263        return arg[len(github_prefix) :]
264    return arg
265
266
267class ReleaseWorkflow:
268    CHERRY_PICK_FAILED_LABEL = "release:cherry-pick-failed"
269
270    """
271    This class implements the sub-commands for the release-workflow command.
272    The current sub-commands are:
273        * create-branch
274        * create-pull-request
275
276    The execute_command method will automatically choose the correct sub-command
277    based on the text in stdin.
278    """
279
280    def __init__(
281        self,
282        token: str,
283        repo: str,
284        issue_number: int,
285        branch_repo_name: str,
286        branch_repo_token: str,
287        llvm_project_dir: str,
288    ) -> None:
289        self._token = token
290        self._repo_name = repo
291        self._issue_number = issue_number
292        self._branch_repo_name = branch_repo_name
293        if branch_repo_token:
294            self._branch_repo_token = branch_repo_token
295        else:
296            self._branch_repo_token = self.token
297        self._llvm_project_dir = llvm_project_dir
298
299    @property
300    def token(self) -> str:
301        return self._token
302
303    @property
304    def repo_name(self) -> str:
305        return self._repo_name
306
307    @property
308    def issue_number(self) -> int:
309        return self._issue_number
310
311    @property
312    def branch_repo_owner(self) -> str:
313        return self.branch_repo_name.split("/")[0]
314
315    @property
316    def branch_repo_name(self) -> str:
317        return self._branch_repo_name
318
319    @property
320    def branch_repo_token(self) -> str:
321        return self._branch_repo_token
322
323    @property
324    def llvm_project_dir(self) -> str:
325        return self._llvm_project_dir
326
327    @property
328    def repo(self) -> github.Repository.Repository:
329        return github.Github(self.token).get_repo(self.repo_name)
330
331    @property
332    def issue(self) -> github.Issue.Issue:
333        return self.repo.get_issue(self.issue_number)
334
335    @property
336    def push_url(self) -> str:
337        return "https://{}@github.com/{}".format(
338            self.branch_repo_token, self.branch_repo_name
339        )
340
341    @property
342    def branch_name(self) -> str:
343        return "issue{}".format(self.issue_number)
344
345    @property
346    def release_branch_for_issue(self) -> Optional[str]:
347        issue = self.issue
348        milestone = issue.milestone
349        if milestone is None:
350            return None
351        m = re.search("branch: (.+)", milestone.description)
352        if m:
353            return m.group(1)
354        return None
355
356    def print_release_branch(self) -> None:
357        print(self.release_branch_for_issue)
358
359    def issue_notify_branch(self) -> None:
360        self.issue.create_comment(
361            "/branch {}/{}".format(self.branch_repo_name, self.branch_name)
362        )
363
364    def issue_notify_pull_request(self, pull: github.PullRequest.PullRequest) -> None:
365        self.issue.create_comment(
366            "/pull-request {}#{}".format(self.repo_name, pull.number)
367        )
368
369    def make_ignore_comment(self, comment: str) -> str:
370        """
371        Returns the comment string with a prefix that will cause
372        a Github workflow to skip parsing this comment.
373
374        :param str comment: The comment to ignore
375        """
376        return "<!--IGNORE-->\n" + comment
377
378    def issue_notify_no_milestone(self, comment: List[str]) -> None:
379        message = "{}\n\nError: Command failed due to missing milestone.".format(
380            "".join([">" + line for line in comment])
381        )
382        self.issue.create_comment(self.make_ignore_comment(message))
383
384    @property
385    def action_url(self) -> str:
386        if os.getenv("CI"):
387            return "https://github.com/{}/actions/runs/{}".format(
388                os.getenv("GITHUB_REPOSITORY"), os.getenv("GITHUB_RUN_ID")
389            )
390        return ""
391
392    def issue_notify_cherry_pick_failure(
393        self, commit: str
394    ) -> github.IssueComment.IssueComment:
395        message = self.make_ignore_comment(
396            "Failed to cherry-pick: {}\n\n".format(commit)
397        )
398        action_url = self.action_url
399        if action_url:
400            message += action_url + "\n\n"
401        message += "Please manually backport the fix and push it to your github fork.  Once this is done, please create a [pull request](https://github.com/llvm/llvm-project/compare)"
402        issue = self.issue
403        comment = issue.create_comment(message)
404        issue.add_to_labels(self.CHERRY_PICK_FAILED_LABEL)
405        return comment
406
407    def issue_notify_pull_request_failure(
408        self, branch: str
409    ) -> github.IssueComment.IssueComment:
410        message = "Failed to create pull request for {} ".format(branch)
411        message += self.action_url
412        return self.issue.create_comment(message)
413
414    def issue_remove_cherry_pick_failed_label(self):
415        if self.CHERRY_PICK_FAILED_LABEL in [l.name for l in self.issue.labels]:
416            self.issue.remove_from_labels(self.CHERRY_PICK_FAILED_LABEL)
417
418    def get_main_commit(self, cherry_pick_sha: str) -> github.Commit.Commit:
419        commit = self.repo.get_commit(cherry_pick_sha)
420        message = commit.commit.message
421        m = re.search("\(cherry picked from commit ([0-9a-f]+)\)", message)
422        if not m:
423            return None
424        return self.repo.get_commit(m.group(1))
425
426    def pr_request_review(self, pr: github.PullRequest.PullRequest):
427        """
428        This function will try to find the best reviewers for `commits` and
429        then add a comment requesting review of the backport and add them as
430        reviewers.
431
432        The reviewers selected are those users who approved the pull request
433        for the main branch.
434        """
435        reviewers = []
436        for commit in pr.get_commits():
437            main_commit = self.get_main_commit(commit.sha)
438            if not main_commit:
439                continue
440            for pull in main_commit.get_pulls():
441                for review in pull.get_reviews():
442                    if review.state != "APPROVED":
443                        continue
444                reviewers.append(review.user.login)
445        if len(reviewers):
446            message = "{} What do you think about merging this PR to the release branch?".format(
447                " ".join(["@" + r for r in reviewers])
448            )
449            pr.create_issue_comment(message)
450            pr.create_review_request(reviewers)
451
452    def create_branch(self, commits: List[str]) -> bool:
453        """
454        This function attempts to backport `commits` into the branch associated
455        with `self.issue_number`.
456
457        If this is successful, then the branch is pushed to `self.branch_repo_name`, if not,
458        a comment is added to the issue saying that the cherry-pick failed.
459
460        :param list commits: List of commits to cherry-pick.
461
462        """
463        print("cherry-picking", commits)
464        branch_name = self.branch_name
465        local_repo = Repo(self.llvm_project_dir)
466        local_repo.git.checkout(self.release_branch_for_issue)
467
468        for c in commits:
469            try:
470                local_repo.git.cherry_pick("-x", c)
471            except Exception as e:
472                self.issue_notify_cherry_pick_failure(c)
473                raise e
474
475        push_url = self.push_url
476        print("Pushing to {} {}".format(push_url, branch_name))
477        local_repo.git.push(push_url, "HEAD:{}".format(branch_name), force=True)
478
479        self.issue_remove_cherry_pick_failed_label()
480        return self.create_pull_request(
481            self.branch_repo_owner, self.repo_name, branch_name
482        )
483
484    def check_if_pull_request_exists(
485        self, repo: github.Repository.Repository, head: str
486    ) -> bool:
487        pulls = repo.get_pulls(head=head)
488        return pulls.totalCount != 0
489
490    def create_pull_request(self, owner: str, repo_name: str, branch: str) -> bool:
491        """
492        Create a pull request in `self.repo_name`.  The base branch of the
493        pull request will be chosen based on the the milestone attached to
494        the issue represented by `self.issue_number`  For example if the milestone
495        is Release 13.0.1, then the base branch will be release/13.x. `branch`
496        will be used as the compare branch.
497        https://docs.github.com/en/get-started/quickstart/github-glossary#base-branch
498        https://docs.github.com/en/get-started/quickstart/github-glossary#compare-branch
499        """
500        repo = github.Github(self.token).get_repo(self.repo_name)
501        issue_ref = "{}#{}".format(self.repo_name, self.issue_number)
502        pull = None
503        release_branch_for_issue = self.release_branch_for_issue
504        if release_branch_for_issue is None:
505            return False
506
507        head = f"{owner}:{branch}"
508        if self.check_if_pull_request_exists(repo, head):
509            print("PR already exists...")
510            return True
511        try:
512            pull = repo.create_pull(
513                title=f"PR for {issue_ref}",
514                body="resolves {}".format(issue_ref),
515                base=release_branch_for_issue,
516                head=head,
517                maintainer_can_modify=False,
518            )
519
520            pull.as_issue().edit(milestone=self.issue.milestone)
521
522            try:
523                self.pr_request_review(pull)
524            except Exception as e:
525                print("error: Failed while searching for reviewers", e)
526
527        except Exception as e:
528            self.issue_notify_pull_request_failure(branch)
529            raise e
530
531        if pull is None:
532            return False
533
534        self.issue_notify_pull_request(pull)
535        self.issue_remove_cherry_pick_failed_label()
536
537        # TODO(tstellar): Do you really want to always return True?
538        return True
539
540    def execute_command(self) -> bool:
541        """
542        This function reads lines from STDIN and executes the first command
543        that it finds.  The 2 supported commands are:
544        /cherry-pick commit0 <commit1> <commit2> <...>
545        /branch <owner>/<repo>/<branch>
546        """
547        for line in sys.stdin:
548            line.rstrip()
549            m = re.search(r"/([a-z-]+)\s(.+)", line)
550            if not m:
551                continue
552            command = m.group(1)
553            args = m.group(2)
554
555            if command == "cherry-pick":
556                arg_list = args.split()
557                commits = list(map(lambda a: extract_commit_hash(a), arg_list))
558                return self.create_branch(commits)
559
560        print("Do not understand input:")
561        print(sys.stdin.readlines())
562        return False
563
564
565parser = argparse.ArgumentParser()
566parser.add_argument(
567    "--token", type=str, required=True, help="GitHub authentiation token"
568)
569parser.add_argument(
570    "--repo",
571    type=str,
572    default=os.getenv("GITHUB_REPOSITORY", "llvm/llvm-project"),
573    help="The GitHub repository that we are working with in the form of <owner>/<repo> (e.g. llvm/llvm-project)",
574)
575subparsers = parser.add_subparsers(dest="command")
576
577issue_subscriber_parser = subparsers.add_parser("issue-subscriber")
578issue_subscriber_parser.add_argument("--label-name", type=str, required=True)
579issue_subscriber_parser.add_argument("--issue-number", type=int, required=True)
580
581pr_subscriber_parser = subparsers.add_parser("pr-subscriber")
582pr_subscriber_parser.add_argument("--label-name", type=str, required=True)
583pr_subscriber_parser.add_argument("--issue-number", type=int, required=True)
584
585pr_greeter_parser = subparsers.add_parser("pr-greeter")
586pr_greeter_parser.add_argument("--issue-number", type=int, required=True)
587
588release_workflow_parser = subparsers.add_parser("release-workflow")
589release_workflow_parser.add_argument(
590    "--llvm-project-dir",
591    type=str,
592    default=".",
593    help="directory containing the llvm-project checout",
594)
595release_workflow_parser.add_argument(
596    "--issue-number", type=int, required=True, help="The issue number to update"
597)
598release_workflow_parser.add_argument(
599    "--branch-repo-token",
600    type=str,
601    help="GitHub authentication token to use for the repository where new branches will be pushed. Defaults to TOKEN.",
602)
603release_workflow_parser.add_argument(
604    "--branch-repo",
605    type=str,
606    default="llvmbot/llvm-project",
607    help="The name of the repo where new branches will be pushed (e.g. llvm/llvm-project)",
608)
609release_workflow_parser.add_argument(
610    "sub_command",
611    type=str,
612    choices=["print-release-branch", "auto"],
613    help="Print to stdout the name of the release branch ISSUE_NUMBER should be backported to",
614)
615
616llvmbot_git_config_parser = subparsers.add_parser(
617    "setup-llvmbot-git",
618    help="Set the default user and email for the git repo in LLVM_PROJECT_DIR to llvmbot",
619)
620
621args = parser.parse_args()
622
623if args.command == "issue-subscriber":
624    issue_subscriber = IssueSubscriber(
625        args.token, args.repo, args.issue_number, args.label_name
626    )
627    issue_subscriber.run()
628elif args.command == "pr-subscriber":
629    pr_subscriber = PRSubscriber(
630        args.token, args.repo, args.issue_number, args.label_name
631    )
632    pr_subscriber.run()
633elif args.command == "pr-greeter":
634    pr_greeter = PRGreeter(args.token, args.repo, args.issue_number)
635    pr_greeter.run()
636elif args.command == "release-workflow":
637    release_workflow = ReleaseWorkflow(
638        args.token,
639        args.repo,
640        args.issue_number,
641        args.branch_repo,
642        args.branch_repo_token,
643        args.llvm_project_dir,
644    )
645    if not release_workflow.release_branch_for_issue:
646        release_workflow.issue_notify_no_milestone(sys.stdin.readlines())
647        sys.exit(1)
648    if args.sub_command == "print-release-branch":
649        release_workflow.print_release_branch()
650    else:
651        if not release_workflow.execute_command():
652            sys.exit(1)
653elif args.command == "setup-llvmbot-git":
654    setup_llvmbot_git()
655