xref: /llvm-project/llvm/utils/git/github-automation.py (revision 3058d2908e36c871d94f36982bcb87b753968bc0)
1#!/usr/bin/env python3
2#
3# ======- github-automation - LLVM GitHub Automation Routines--*- python -*--==#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9# ==-------------------------------------------------------------------------==#
10
11import argparse
12from git import Repo  # type: ignore
13import html
14import github
15import os
16import re
17import requests
18import sys
19import time
20from typing import List, Optional
21
22beginner_comment = """
23Hi!
24
25This issue may be a good introductory issue for people new to working on LLVM. If you would like to work on this issue, your first steps are:
26
27  1) Assign the issue to you.
28  2) Fix the issue locally.
29  3) [Run the test suite](https://llvm.org/docs/TestingGuide.html#unit-and-regression-tests) locally.
30    3.1) Remember that the subdirectories under `test/` create fine-grained testing targets, so you can
31         e.g. use `make check-clang-ast` to only run Clang's AST tests.
32  4) Create a `git` commit
33  5) Run [`git clang-format HEAD~1`](https://clang.llvm.org/docs/ClangFormat.html#git-integration) to format your changes.
34  6) Submit the patch to [Phabricator](https://reviews.llvm.org/).
35    6.1) Detailed instructions can be found [here](https://llvm.org/docs/Phabricator.html#requesting-a-review-via-the-web-interface)
36
37For more instructions on how to submit a patch to LLVM, see our [documentation](https://llvm.org/docs/Contributing.html).
38
39If you have any further questions about this issue, don't hesitate to ask via a comment on this Github issue.
40"""
41
42
43def _get_curent_team(team_name, teams) -> Optional[github.Team.Team]:
44    for team in teams:
45        if team_name == team.name.lower():
46            return team
47    return None
48
49
50def escape_description(str):
51    # https://github.com/github/markup/issues/1168#issuecomment-494946168
52    str = html.escape(str, False)
53    # '@' followed by alphanum is a user name
54    str = re.sub("@(?=\w)", "@<!-- -->", str)
55    # '#' followed by digits is considered an issue number
56    str = re.sub("#(?=\d)", "#<!-- -->", str)
57    return str
58
59
60class IssueSubscriber:
61    @property
62    def team_name(self) -> str:
63        return self._team_name
64
65    def __init__(self, token: str, repo: str, issue_number: int, label_name: str):
66        self.repo = github.Github(token).get_repo(repo)
67        self.org = github.Github(token).get_organization(self.repo.organization.login)
68        self.issue = self.repo.get_issue(issue_number)
69        self._team_name = "issue-subscribers-{}".format(label_name).lower()
70
71    def run(self) -> bool:
72        team = _get_curent_team(self.team_name, self.org.get_teams())
73        if not team:
74            print(f"couldn't find team named {self.team_name}")
75            return False
76        comment = ""
77        if team.slug == "issue-subscribers-good-first-issue":
78            comment = "{}\n".format(beginner_comment)
79
80        body = escape_description(self.issue.body)
81
82        comment = f"""
83@llvm/{team.slug}
84
85<details>
86{body}
87</details>
88"""
89
90        self.issue.create_comment(comment)
91        return True
92
93
94def human_readable_size(size, decimal_places=2):
95    for unit in ["B", "KiB", "MiB", "GiB", "TiB", "PiB"]:
96        if size < 1024.0 or unit == "PiB":
97            break
98        size /= 1024.0
99    return f"{size:.{decimal_places}f} {unit}"
100
101
102class PRSubscriber:
103    @property
104    def team_name(self) -> str:
105        return self._team_name
106
107    def __init__(self, token: str, repo: str, pr_number: int, label_name: str):
108        self.repo = github.Github(token).get_repo(repo)
109        self.org = github.Github(token).get_organization(self.repo.organization.login)
110        self.pr = self.repo.get_issue(pr_number).as_pull_request()
111        self._team_name = "pr-subscribers-{}".format(
112            label_name.replace("+", "x")
113        ).lower()
114        self.COMMENT_TAG = "<!--LLVM PR SUMMARY COMMENT-->\n"
115
116    def get_summary_comment(self) -> github.IssueComment.IssueComment:
117        for comment in self.pr.as_issue().get_comments():
118            if self.COMMENT_TAG in comment.body:
119                return comment
120        return None
121
122    def run(self) -> bool:
123        patch = None
124        team = _get_curent_team(self.team_name, self.org.get_teams())
125        if not team:
126            print(f"couldn't find team named {self.team_name}")
127            return False
128
129        # GitHub limits comments to 65,536 characters, let's limit the diff
130        # and the file list to 20kB each.
131        STAT_LIMIT = 20 * 1024
132        DIFF_LIMIT = 20 * 1024
133
134        # Get statistics for each file
135        diff_stats = f"{self.pr.changed_files} Files Affected:\n\n"
136        for file in self.pr.get_files():
137            diff_stats += f"- ({file.status}) {file.filename} ("
138            if file.additions:
139                diff_stats += f"+{file.additions}"
140            if file.deletions:
141                diff_stats += f"-{file.deletions}"
142            diff_stats += ") "
143            if file.status == "renamed":
144                print(f"(from {file.previous_filename})")
145            diff_stats += "\n"
146            if len(diff_stats) > STAT_LIMIT:
147                break
148
149        # Get the diff
150        try:
151            patch = requests.get(self.pr.diff_url).text
152        except:
153            patch = ""
154
155        patch_link = f"Full diff: {self.pr.diff_url}\n"
156        if len(patch) > DIFF_LIMIT:
157            patch_link = f"\nPatch is {human_readable_size(len(patch))}, truncated to {human_readable_size(DIFF_LIMIT)} below, full version: {self.pr.diff_url}\n"
158            patch = patch[0:DIFF_LIMIT] + "...\n[truncated]\n"
159        team_mention = "@llvm/{}".format(team.slug)
160
161        body = escape_description(self.pr.body)
162        # Note: the comment is in markdown and the code below
163        # is sensible to line break
164        comment = f"""
165{self.COMMENT_TAG}
166{team_mention}
167
168<details>
169<summary>Changes</summary>
170
171{body}
172---
173{patch_link}
174
175{diff_stats}
176
177``````````diff
178{patch}
179``````````
180
181</details>
182"""
183
184        summary_comment = self.get_summary_comment()
185        if not summary_comment:
186            self.pr.as_issue().create_comment(comment)
187        elif team_mention + "\n" in summary_comment.body:
188            print("Team {} already mentioned.".format(team.slug))
189        else:
190            summary_comment.edit(
191                summary_comment.body.replace(
192                    self.COMMENT_TAG, self.COMMENT_TAG + team_mention + "\n"
193                )
194            )
195        return True
196
197    def _get_curent_team(self) -> Optional[github.Team.Team]:
198        for team in self.org.get_teams():
199            if self.team_name == team.name.lower():
200                return team
201        return None
202
203
204def setup_llvmbot_git(git_dir="."):
205    """
206    Configure the git repo in `git_dir` with the llvmbot account so
207    commits are attributed to llvmbot.
208    """
209    repo = Repo(git_dir)
210    with repo.config_writer() as config:
211        config.set_value("user", "name", "llvmbot")
212        config.set_value("user", "email", "llvmbot@llvm.org")
213
214
215def phab_api_call(phab_token: str, url: str, args: dict) -> dict:
216    """
217    Make an API call to the Phabricator web service and return a dictionary
218    containing the json response.
219    """
220    data = {"api.token": phab_token}
221    data.update(args)
222    response = requests.post(url, data=data)
223    return response.json()
224
225
226def phab_login_to_github_login(
227    phab_token: str, repo: github.Repository.Repository, phab_login: str
228) -> Optional[str]:
229    """
230    Tries to translate a Phabricator login to a github login by
231    finding a commit made in Phabricator's Differential.
232    The commit's SHA1 is then looked up in the github repo and
233    the committer's login associated with that commit is returned.
234
235    :param str phab_token: The Conduit API token to use for communication with Pabricator
236    :param github.Repository.Repository repo: The github repo to use when looking for the SHA1 found in Differential
237    :param str phab_login: The Phabricator login to be translated.
238    """
239
240    args = {
241        "constraints[authors][0]": phab_login,
242        # PHID for "LLVM Github Monorepo" repository
243        "constraints[repositories][0]": "PHID-REPO-f4scjekhnkmh7qilxlcy",
244        "limit": 1,
245    }
246    # API documentation: https://reviews.llvm.org/conduit/method/diffusion.commit.search/
247    r = phab_api_call(
248        phab_token, "https://reviews.llvm.org/api/diffusion.commit.search", args
249    )
250    data = r["result"]["data"]
251    if len(data) == 0:
252        # Can't find any commits associated with this user
253        return None
254
255    commit_sha = data[0]["fields"]["identifier"]
256    committer = repo.get_commit(commit_sha).committer
257    if not committer:
258        # This committer had an email address GitHub could not recognize, so
259        # it can't link the user to a GitHub account.
260        print(f"Warning: Can't find github account for {phab_login}")
261        return None
262    return committer.login
263
264
265def phab_get_commit_approvers(phab_token: str, commit: github.Commit.Commit) -> list:
266    args = {"corpus": commit.commit.message}
267    # API documentation: https://reviews.llvm.org/conduit/method/differential.parsecommitmessage/
268    r = phab_api_call(
269        phab_token, "https://reviews.llvm.org/api/differential.parsecommitmessage", args
270    )
271    review_id = r["result"]["revisionIDFieldInfo"]["value"]
272    if not review_id:
273        # No Phabricator revision for this commit
274        return []
275
276    args = {"constraints[ids][0]": review_id, "attachments[reviewers]": True}
277    # API documentation: https://reviews.llvm.org/conduit/method/differential.revision.search/
278    r = phab_api_call(
279        phab_token, "https://reviews.llvm.org/api/differential.revision.search", args
280    )
281    reviewers = r["result"]["data"][0]["attachments"]["reviewers"]["reviewers"]
282    accepted = []
283    for reviewer in reviewers:
284        if reviewer["status"] != "accepted":
285            continue
286        phid = reviewer["reviewerPHID"]
287        args = {"constraints[phids][0]": phid}
288        # API documentation: https://reviews.llvm.org/conduit/method/user.search/
289        r = phab_api_call(phab_token, "https://reviews.llvm.org/api/user.search", args)
290        accepted.append(r["result"]["data"][0]["fields"]["username"])
291    return accepted
292
293
294def extract_commit_hash(arg: str):
295    """
296    Extract the commit hash from the argument passed to /action github
297    comment actions. We currently only support passing the commit hash
298    directly or use the github URL, such as
299    https://github.com/llvm/llvm-project/commit/2832d7941f4207f1fcf813b27cf08cecc3086959
300    """
301    github_prefix = "https://github.com/llvm/llvm-project/commit/"
302    if arg.startswith(github_prefix):
303        return arg[len(github_prefix) :]
304    return arg
305
306
307class ReleaseWorkflow:
308    CHERRY_PICK_FAILED_LABEL = "release:cherry-pick-failed"
309
310    """
311    This class implements the sub-commands for the release-workflow command.
312    The current sub-commands are:
313        * create-branch
314        * create-pull-request
315
316    The execute_command method will automatically choose the correct sub-command
317    based on the text in stdin.
318    """
319
320    def __init__(
321        self,
322        token: str,
323        repo: str,
324        issue_number: int,
325        branch_repo_name: str,
326        branch_repo_token: str,
327        llvm_project_dir: str,
328        phab_token: str,
329    ) -> None:
330        self._token = token
331        self._repo_name = repo
332        self._issue_number = issue_number
333        self._branch_repo_name = branch_repo_name
334        if branch_repo_token:
335            self._branch_repo_token = branch_repo_token
336        else:
337            self._branch_repo_token = self.token
338        self._llvm_project_dir = llvm_project_dir
339        self._phab_token = phab_token
340
341    @property
342    def token(self) -> str:
343        return self._token
344
345    @property
346    def repo_name(self) -> str:
347        return self._repo_name
348
349    @property
350    def issue_number(self) -> int:
351        return self._issue_number
352
353    @property
354    def branch_repo_name(self) -> str:
355        return self._branch_repo_name
356
357    @property
358    def branch_repo_token(self) -> str:
359        return self._branch_repo_token
360
361    @property
362    def llvm_project_dir(self) -> str:
363        return self._llvm_project_dir
364
365    @property
366    def phab_token(self) -> str:
367        return self._phab_token
368
369    @property
370    def repo(self) -> github.Repository.Repository:
371        return github.Github(self.token).get_repo(self.repo_name)
372
373    @property
374    def issue(self) -> github.Issue.Issue:
375        return self.repo.get_issue(self.issue_number)
376
377    @property
378    def push_url(self) -> str:
379        return "https://{}@github.com/{}".format(
380            self.branch_repo_token, self.branch_repo_name
381        )
382
383    @property
384    def branch_name(self) -> str:
385        return "issue{}".format(self.issue_number)
386
387    @property
388    def release_branch_for_issue(self) -> Optional[str]:
389        issue = self.issue
390        milestone = issue.milestone
391        if milestone is None:
392            return None
393        m = re.search("branch: (.+)", milestone.description)
394        if m:
395            return m.group(1)
396        return None
397
398    def print_release_branch(self) -> None:
399        print(self.release_branch_for_issue)
400
401    def issue_notify_branch(self) -> None:
402        self.issue.create_comment(
403            "/branch {}/{}".format(self.branch_repo_name, self.branch_name)
404        )
405
406    def issue_notify_pull_request(self, pull: github.PullRequest.PullRequest) -> None:
407        self.issue.create_comment(
408            "/pull-request {}#{}".format(self.branch_repo_name, pull.number)
409        )
410
411    def make_ignore_comment(self, comment: str) -> str:
412        """
413        Returns the comment string with a prefix that will cause
414        a Github workflow to skip parsing this comment.
415
416        :param str comment: The comment to ignore
417        """
418        return "<!--IGNORE-->\n" + comment
419
420    def issue_notify_no_milestone(self, comment: List[str]) -> None:
421        message = "{}\n\nError: Command failed due to missing milestone.".format(
422            "".join([">" + line for line in comment])
423        )
424        self.issue.create_comment(self.make_ignore_comment(message))
425
426    @property
427    def action_url(self) -> str:
428        if os.getenv("CI"):
429            return "https://github.com/{}/actions/runs/{}".format(
430                os.getenv("GITHUB_REPOSITORY"), os.getenv("GITHUB_RUN_ID")
431            )
432        return ""
433
434    def issue_notify_cherry_pick_failure(
435        self, commit: str
436    ) -> github.IssueComment.IssueComment:
437        message = self.make_ignore_comment(
438            "Failed to cherry-pick: {}\n\n".format(commit)
439        )
440        action_url = self.action_url
441        if action_url:
442            message += action_url + "\n\n"
443        message += "Please manually backport the fix and push it to your github fork.  Once this is done, please add a comment like this:\n\n`/branch <user>/<repo>/<branch>`"
444        issue = self.issue
445        comment = issue.create_comment(message)
446        issue.add_to_labels(self.CHERRY_PICK_FAILED_LABEL)
447        return comment
448
449    def issue_notify_pull_request_failure(
450        self, branch: str
451    ) -> github.IssueComment.IssueComment:
452        message = "Failed to create pull request for {} ".format(branch)
453        message += self.action_url
454        return self.issue.create_comment(message)
455
456    def issue_remove_cherry_pick_failed_label(self):
457        if self.CHERRY_PICK_FAILED_LABEL in [l.name for l in self.issue.labels]:
458            self.issue.remove_from_labels(self.CHERRY_PICK_FAILED_LABEL)
459
460    def pr_request_review(self, pr: github.PullRequest.PullRequest):
461        """
462        This function will try to find the best reviewers for `commits` and
463        then add a comment requesting review of the backport and assign the
464        pull request to the selected reviewers.
465
466        The reviewers selected are those users who approved the patch in
467        Phabricator.
468        """
469        reviewers = []
470        for commit in pr.get_commits():
471            approvers = phab_get_commit_approvers(self.phab_token, commit)
472            for a in approvers:
473                login = phab_login_to_github_login(self.phab_token, self.repo, a)
474                if not login:
475                    continue
476                reviewers.append(login)
477        if len(reviewers):
478            message = "{} What do you think about merging this PR to the release branch?".format(
479                " ".join(["@" + r for r in reviewers])
480            )
481            pr.create_issue_comment(message)
482            pr.add_to_assignees(*reviewers)
483
484    def create_branch(self, commits: List[str]) -> bool:
485        """
486        This function attempts to backport `commits` into the branch associated
487        with `self.issue_number`.
488
489        If this is successful, then the branch is pushed to `self.branch_repo_name`, if not,
490        a comment is added to the issue saying that the cherry-pick failed.
491
492        :param list commits: List of commits to cherry-pick.
493
494        """
495        print("cherry-picking", commits)
496        branch_name = self.branch_name
497        local_repo = Repo(self.llvm_project_dir)
498        local_repo.git.checkout(self.release_branch_for_issue)
499
500        for c in commits:
501            try:
502                local_repo.git.cherry_pick("-x", c)
503            except Exception as e:
504                self.issue_notify_cherry_pick_failure(c)
505                raise e
506
507        push_url = self.push_url
508        print("Pushing to {} {}".format(push_url, branch_name))
509        local_repo.git.push(push_url, "HEAD:{}".format(branch_name), force=True)
510
511        self.issue_notify_branch()
512        self.issue_remove_cherry_pick_failed_label()
513        return True
514
515    def check_if_pull_request_exists(
516        self, repo: github.Repository.Repository, head: str
517    ) -> bool:
518        pulls = repo.get_pulls(head=head)
519        return pulls.totalCount != 0
520
521    def create_pull_request(self, owner: str, repo_name: str, branch: str) -> bool:
522        """
523        reate a pull request in `self.branch_repo_name`.  The base branch of the
524        pull request will be chosen based on the the milestone attached to
525        the issue represented by `self.issue_number`  For example if the milestone
526        is Release 13.0.1, then the base branch will be release/13.x. `branch`
527        will be used as the compare branch.
528        https://docs.github.com/en/get-started/quickstart/github-glossary#base-branch
529        https://docs.github.com/en/get-started/quickstart/github-glossary#compare-branch
530        """
531        repo = github.Github(self.token).get_repo(self.branch_repo_name)
532        issue_ref = "{}#{}".format(self.repo_name, self.issue_number)
533        pull = None
534        release_branch_for_issue = self.release_branch_for_issue
535        if release_branch_for_issue is None:
536            return False
537        head_branch = branch
538        if not repo.fork:
539            # If the target repo is not a fork of llvm-project, we need to copy
540            # the branch into the target repo.  GitHub only supports cross-repo pull
541            # requests on forked repos.
542            head_branch = f"{owner}-{branch}"
543            local_repo = Repo(self.llvm_project_dir)
544            push_done = False
545            for _ in range(0, 5):
546                try:
547                    local_repo.git.fetch(
548                        f"https://github.com/{owner}/{repo_name}", f"{branch}:{branch}"
549                    )
550                    local_repo.git.push(
551                        self.push_url, f"{branch}:{head_branch}", force=True
552                    )
553                    push_done = True
554                    break
555                except Exception as e:
556                    print(e)
557                    time.sleep(30)
558                    continue
559            if not push_done:
560                raise Exception("Failed to mirror branch into {}".format(self.push_url))
561            owner = repo.owner.login
562
563        head = f"{owner}:{head_branch}"
564        if self.check_if_pull_request_exists(repo, head):
565            print("PR already exists...")
566            return True
567        try:
568            pull = repo.create_pull(
569                title=f"PR for {issue_ref}",
570                body="resolves {}".format(issue_ref),
571                base=release_branch_for_issue,
572                head=head,
573                maintainer_can_modify=False,
574            )
575
576            try:
577                if self.phab_token:
578                    self.pr_request_review(pull)
579            except Exception as e:
580                print("error: Failed while searching for reviewers", e)
581
582        except Exception as e:
583            self.issue_notify_pull_request_failure(branch)
584            raise e
585
586        if pull is None:
587            return False
588
589        self.issue_notify_pull_request(pull)
590        self.issue_remove_cherry_pick_failed_label()
591
592        # TODO(tstellar): Do you really want to always return True?
593        return True
594
595    def execute_command(self) -> bool:
596        """
597        This function reads lines from STDIN and executes the first command
598        that it finds.  The 2 supported commands are:
599        /cherry-pick commit0 <commit1> <commit2> <...>
600        /branch <owner>/<repo>/<branch>
601        """
602        for line in sys.stdin:
603            line.rstrip()
604            m = re.search(r"/([a-z-]+)\s(.+)", line)
605            if not m:
606                continue
607            command = m.group(1)
608            args = m.group(2)
609
610            if command == "cherry-pick":
611                arg_list = args.split()
612                commits = list(map(lambda a: extract_commit_hash(a), arg_list))
613                return self.create_branch(commits)
614
615            if command == "branch":
616                m = re.match("([^/]+)/([^/]+)/(.+)", args)
617                if m:
618                    owner = m.group(1)
619                    repo = m.group(2)
620                    branch = m.group(3)
621                    return self.create_pull_request(owner, repo, branch)
622
623        print("Do not understand input:")
624        print(sys.stdin.readlines())
625        return False
626
627
628parser = argparse.ArgumentParser()
629parser.add_argument(
630    "--token", type=str, required=True, help="GitHub authentiation token"
631)
632parser.add_argument(
633    "--repo",
634    type=str,
635    default=os.getenv("GITHUB_REPOSITORY", "llvm/llvm-project"),
636    help="The GitHub repository that we are working with in the form of <owner>/<repo> (e.g. llvm/llvm-project)",
637)
638subparsers = parser.add_subparsers(dest="command")
639
640issue_subscriber_parser = subparsers.add_parser("issue-subscriber")
641issue_subscriber_parser.add_argument("--label-name", type=str, required=True)
642issue_subscriber_parser.add_argument("--issue-number", type=int, required=True)
643
644pr_subscriber_parser = subparsers.add_parser("pr-subscriber")
645pr_subscriber_parser.add_argument("--label-name", type=str, required=True)
646pr_subscriber_parser.add_argument("--issue-number", type=int, required=True)
647
648release_workflow_parser = subparsers.add_parser("release-workflow")
649release_workflow_parser.add_argument(
650    "--llvm-project-dir",
651    type=str,
652    default=".",
653    help="directory containing the llvm-project checout",
654)
655release_workflow_parser.add_argument(
656    "--issue-number", type=int, required=True, help="The issue number to update"
657)
658release_workflow_parser.add_argument(
659    "--phab-token",
660    type=str,
661    help="Phabricator conduit API token. See https://reviews.llvm.org/settings/user/<USER>/page/apitokens/",
662)
663release_workflow_parser.add_argument(
664    "--branch-repo-token",
665    type=str,
666    help="GitHub authentication token to use for the repository where new branches will be pushed. Defaults to TOKEN.",
667)
668release_workflow_parser.add_argument(
669    "--branch-repo",
670    type=str,
671    default="llvm/llvm-project-release-prs",
672    help="The name of the repo where new branches will be pushed (e.g. llvm/llvm-project)",
673)
674release_workflow_parser.add_argument(
675    "sub_command",
676    type=str,
677    choices=["print-release-branch", "auto"],
678    help="Print to stdout the name of the release branch ISSUE_NUMBER should be backported to",
679)
680
681llvmbot_git_config_parser = subparsers.add_parser(
682    "setup-llvmbot-git",
683    help="Set the default user and email for the git repo in LLVM_PROJECT_DIR to llvmbot",
684)
685
686args = parser.parse_args()
687
688if args.command == "issue-subscriber":
689    issue_subscriber = IssueSubscriber(
690        args.token, args.repo, args.issue_number, args.label_name
691    )
692    issue_subscriber.run()
693elif args.command == "pr-subscriber":
694    pr_subscriber = PRSubscriber(
695        args.token, args.repo, args.issue_number, args.label_name
696    )
697    pr_subscriber.run()
698elif args.command == "release-workflow":
699    release_workflow = ReleaseWorkflow(
700        args.token,
701        args.repo,
702        args.issue_number,
703        args.branch_repo,
704        args.branch_repo_token,
705        args.llvm_project_dir,
706        args.phab_token,
707    )
708    if not release_workflow.release_branch_for_issue:
709        release_workflow.issue_notify_no_milestone(sys.stdin.readlines())
710        sys.exit(1)
711    if args.sub_command == "print-release-branch":
712        release_workflow.print_release_branch()
713    else:
714        if not release_workflow.execute_command():
715            sys.exit(1)
716elif args.command == "setup-llvmbot-git":
717    setup_llvmbot_git()
718