xref: /llvm-project/llvm/utils/git/github-automation.py (revision 3ce8eda5926b3ab7b35915b351e3d03ca0b0960c)
1#!/usr/bin/env python3
2#
3# ======- github-automation - LLVM GitHub Automation Routines--*- python -*--==#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9# ==-------------------------------------------------------------------------==#
10
11import argparse
12from git import Repo  # type: ignore
13import html
14import github
15import os
16import re
17import requests
18import sys
19import time
20from typing import List, Optional
21
22beginner_comment = """
23Hi!
24
25This issue may be a good introductory issue for people new to working on LLVM. If you would like to work on this issue, your first steps are:
26
27  1) Assign the issue to you.
28  2) Fix the issue locally.
29  3) [Run the test suite](https://llvm.org/docs/TestingGuide.html#unit-and-regression-tests) locally.
30    3.1) Remember that the subdirectories under `test/` create fine-grained testing targets, so you can
31         e.g. use `make check-clang-ast` to only run Clang's AST tests.
32  4) Create a `git` commit
33  5) Run [`git clang-format HEAD~1`](https://clang.llvm.org/docs/ClangFormat.html#git-integration) to format your changes.
34  6) Submit the patch to [Phabricator](https://reviews.llvm.org/).
35    6.1) Detailed instructions can be found [here](https://llvm.org/docs/Phabricator.html#requesting-a-review-via-the-web-interface)
36
37For more instructions on how to submit a patch to LLVM, see our [documentation](https://llvm.org/docs/Contributing.html).
38
39If you have any further questions about this issue, don't hesitate to ask via a comment on this Github issue.
40"""
41
42
43def _get_curent_team(team_name, teams) -> Optional[github.Team.Team]:
44    for team in teams:
45        if team_name == team.name.lower():
46            return team
47    return None
48
49
50def escape_description(str):
51    # https://github.com/github/markup/issues/1168#issuecomment-494946168
52    str = html.escape(str, False)
53    # '@' followed by alphanum is a user name
54    str = re.sub("@(?=\w)", "@<!-- -->", str)
55    # '#' followed by digits is considered an issue number
56    str = re.sub("#(?=\d)", "#<!-- -->", str)
57    return str
58
59
60class IssueSubscriber:
61    @property
62    def team_name(self) -> str:
63        return self._team_name
64
65    def __init__(self, token: str, repo: str, issue_number: int, label_name: str):
66        self.repo = github.Github(token).get_repo(repo)
67        self.org = github.Github(token).get_organization(self.repo.organization.login)
68        self.issue = self.repo.get_issue(issue_number)
69        self._team_name = "issue-subscribers-{}".format(label_name).lower()
70
71    def run(self) -> bool:
72        team = _get_curent_team(self.team_name, self.org.get_teams())
73        if not team:
74            print(f"couldn't find team named {self.team_name}")
75            return False
76        comment = ""
77        if team.slug == "issue-subscribers-good-first-issue":
78            comment = "{}\n".format(beginner_comment)
79
80        body = escape_description(self.issue.body)
81
82        comment = f"""
83@llvm/{team.slug}
84
85<details>
86{body}
87</details>
88"""
89
90        self.issue.create_comment(comment)
91        return True
92
93
94def human_readable_size(size, decimal_places=2):
95    for unit in ["B", "KiB", "MiB", "GiB", "TiB", "PiB"]:
96        if size < 1024.0 or unit == "PiB":
97            break
98        size /= 1024.0
99    return f"{size:.{decimal_places}f} {unit}"
100
101
102class PRSubscriber:
103    @property
104    def team_name(self) -> str:
105        return self._team_name
106
107    def __init__(self, token: str, repo: str, pr_number: int, label_name: str):
108        self.repo = github.Github(token).get_repo(repo)
109        self.org = github.Github(token).get_organization(self.repo.organization.login)
110        self.pr = self.repo.get_issue(pr_number).as_pull_request()
111        self._team_name = "pr-subscribers-{}".format(
112            label_name.replace("+", "x")
113        ).lower()
114        self.COMMENT_TAG = "<!--LLVM PR SUMMARY COMMENT-->\n"
115
116    def get_summary_comment(self) -> github.IssueComment.IssueComment:
117        for comment in self.pr.as_issue().get_comments():
118            if self.COMMENT_TAG in comment.body:
119                return comment
120        return None
121
122    def run(self) -> bool:
123        patch = None
124        team = _get_curent_team(self.team_name, self.org.get_teams())
125        if not team:
126            print(f"couldn't find team named {self.team_name}")
127            return False
128
129        # GitHub limits comments to 65,536 characters, let's limit the diff
130        # and the file list to 20kB each.
131        STAT_LIMIT = 20 * 1024
132        DIFF_LIMIT = 20 * 1024
133
134        # Get statistics for each file
135        diff_stats = f"{self.pr.changed_files} Files Affected:\n\n"
136        for file in self.pr.get_files():
137            diff_stats += f"- ({file.status}) {file.filename} ("
138            if file.additions:
139                diff_stats += f"+{file.additions}"
140            if file.deletions:
141                diff_stats += f"-{file.deletions}"
142            diff_stats += ") "
143            if file.status == "renamed":
144                print(f"(from {file.previous_filename})")
145            diff_stats += "\n"
146            if len(diff_stats) > STAT_LIMIT:
147                break
148
149        # Get the diff
150        try:
151            patch = requests.get(self.pr.diff_url).text
152        except:
153            patch = ""
154
155        patch_link = f"Full diff: {self.pr.diff_url}\n"
156        if len(patch) > DIFF_LIMIT:
157            patch_link = f"\nPatch is {human_readable_size(len(patch))}, truncated to {human_readable_size(DIFF_LIMIT)} below, full version: {self.pr.diff_url}\n"
158            patch = patch[0:DIFF_LIMIT] + "...\n[truncated]\n"
159        team_mention = "@llvm/{}".format(team.slug)
160
161        body = escape_description(self.pr.body)
162        # Note: the comment is in markdown and the code below
163        # is sensible to line break
164        comment = f"""
165{self.COMMENT_TAG}
166{team_mention}
167
168<details>
169<summary>Changes</summary>
170
171{body}
172
173---
174{patch_link}
175
176{diff_stats}
177
178``````````diff
179{patch}
180``````````
181
182</details>
183"""
184
185        summary_comment = self.get_summary_comment()
186        if not summary_comment:
187            self.pr.as_issue().create_comment(comment)
188        elif team_mention + "\n" in summary_comment.body:
189            print("Team {} already mentioned.".format(team.slug))
190        else:
191            summary_comment.edit(
192                summary_comment.body.replace(
193                    self.COMMENT_TAG, self.COMMENT_TAG + team_mention + "\n"
194                )
195            )
196        return True
197
198    def _get_curent_team(self) -> Optional[github.Team.Team]:
199        for team in self.org.get_teams():
200            if self.team_name == team.name.lower():
201                return team
202        return None
203
204
205def setup_llvmbot_git(git_dir="."):
206    """
207    Configure the git repo in `git_dir` with the llvmbot account so
208    commits are attributed to llvmbot.
209    """
210    repo = Repo(git_dir)
211    with repo.config_writer() as config:
212        config.set_value("user", "name", "llvmbot")
213        config.set_value("user", "email", "llvmbot@llvm.org")
214
215
216def phab_api_call(phab_token: str, url: str, args: dict) -> dict:
217    """
218    Make an API call to the Phabricator web service and return a dictionary
219    containing the json response.
220    """
221    data = {"api.token": phab_token}
222    data.update(args)
223    response = requests.post(url, data=data)
224    return response.json()
225
226
227def phab_login_to_github_login(
228    phab_token: str, repo: github.Repository.Repository, phab_login: str
229) -> Optional[str]:
230    """
231    Tries to translate a Phabricator login to a github login by
232    finding a commit made in Phabricator's Differential.
233    The commit's SHA1 is then looked up in the github repo and
234    the committer's login associated with that commit is returned.
235
236    :param str phab_token: The Conduit API token to use for communication with Pabricator
237    :param github.Repository.Repository repo: The github repo to use when looking for the SHA1 found in Differential
238    :param str phab_login: The Phabricator login to be translated.
239    """
240
241    args = {
242        "constraints[authors][0]": phab_login,
243        # PHID for "LLVM Github Monorepo" repository
244        "constraints[repositories][0]": "PHID-REPO-f4scjekhnkmh7qilxlcy",
245        "limit": 1,
246    }
247    # API documentation: https://reviews.llvm.org/conduit/method/diffusion.commit.search/
248    r = phab_api_call(
249        phab_token, "https://reviews.llvm.org/api/diffusion.commit.search", args
250    )
251    data = r["result"]["data"]
252    if len(data) == 0:
253        # Can't find any commits associated with this user
254        return None
255
256    commit_sha = data[0]["fields"]["identifier"]
257    committer = repo.get_commit(commit_sha).committer
258    if not committer:
259        # This committer had an email address GitHub could not recognize, so
260        # it can't link the user to a GitHub account.
261        print(f"Warning: Can't find github account for {phab_login}")
262        return None
263    return committer.login
264
265
266def phab_get_commit_approvers(phab_token: str, commit: github.Commit.Commit) -> list:
267    args = {"corpus": commit.commit.message}
268    # API documentation: https://reviews.llvm.org/conduit/method/differential.parsecommitmessage/
269    r = phab_api_call(
270        phab_token, "https://reviews.llvm.org/api/differential.parsecommitmessage", args
271    )
272    review_id = r["result"]["revisionIDFieldInfo"]["value"]
273    if not review_id:
274        # No Phabricator revision for this commit
275        return []
276
277    args = {"constraints[ids][0]": review_id, "attachments[reviewers]": True}
278    # API documentation: https://reviews.llvm.org/conduit/method/differential.revision.search/
279    r = phab_api_call(
280        phab_token, "https://reviews.llvm.org/api/differential.revision.search", args
281    )
282    reviewers = r["result"]["data"][0]["attachments"]["reviewers"]["reviewers"]
283    accepted = []
284    for reviewer in reviewers:
285        if reviewer["status"] != "accepted":
286            continue
287        phid = reviewer["reviewerPHID"]
288        args = {"constraints[phids][0]": phid}
289        # API documentation: https://reviews.llvm.org/conduit/method/user.search/
290        r = phab_api_call(phab_token, "https://reviews.llvm.org/api/user.search", args)
291        accepted.append(r["result"]["data"][0]["fields"]["username"])
292    return accepted
293
294
295def extract_commit_hash(arg: str):
296    """
297    Extract the commit hash from the argument passed to /action github
298    comment actions. We currently only support passing the commit hash
299    directly or use the github URL, such as
300    https://github.com/llvm/llvm-project/commit/2832d7941f4207f1fcf813b27cf08cecc3086959
301    """
302    github_prefix = "https://github.com/llvm/llvm-project/commit/"
303    if arg.startswith(github_prefix):
304        return arg[len(github_prefix) :]
305    return arg
306
307
308class ReleaseWorkflow:
309    CHERRY_PICK_FAILED_LABEL = "release:cherry-pick-failed"
310
311    """
312    This class implements the sub-commands for the release-workflow command.
313    The current sub-commands are:
314        * create-branch
315        * create-pull-request
316
317    The execute_command method will automatically choose the correct sub-command
318    based on the text in stdin.
319    """
320
321    def __init__(
322        self,
323        token: str,
324        repo: str,
325        issue_number: int,
326        branch_repo_name: str,
327        branch_repo_token: str,
328        llvm_project_dir: str,
329        phab_token: str,
330    ) -> None:
331        self._token = token
332        self._repo_name = repo
333        self._issue_number = issue_number
334        self._branch_repo_name = branch_repo_name
335        if branch_repo_token:
336            self._branch_repo_token = branch_repo_token
337        else:
338            self._branch_repo_token = self.token
339        self._llvm_project_dir = llvm_project_dir
340        self._phab_token = phab_token
341
342    @property
343    def token(self) -> str:
344        return self._token
345
346    @property
347    def repo_name(self) -> str:
348        return self._repo_name
349
350    @property
351    def issue_number(self) -> int:
352        return self._issue_number
353
354    @property
355    def branch_repo_name(self) -> str:
356        return self._branch_repo_name
357
358    @property
359    def branch_repo_token(self) -> str:
360        return self._branch_repo_token
361
362    @property
363    def llvm_project_dir(self) -> str:
364        return self._llvm_project_dir
365
366    @property
367    def phab_token(self) -> str:
368        return self._phab_token
369
370    @property
371    def repo(self) -> github.Repository.Repository:
372        return github.Github(self.token).get_repo(self.repo_name)
373
374    @property
375    def issue(self) -> github.Issue.Issue:
376        return self.repo.get_issue(self.issue_number)
377
378    @property
379    def push_url(self) -> str:
380        return "https://{}@github.com/{}".format(
381            self.branch_repo_token, self.branch_repo_name
382        )
383
384    @property
385    def branch_name(self) -> str:
386        return "issue{}".format(self.issue_number)
387
388    @property
389    def release_branch_for_issue(self) -> Optional[str]:
390        issue = self.issue
391        milestone = issue.milestone
392        if milestone is None:
393            return None
394        m = re.search("branch: (.+)", milestone.description)
395        if m:
396            return m.group(1)
397        return None
398
399    def print_release_branch(self) -> None:
400        print(self.release_branch_for_issue)
401
402    def issue_notify_branch(self) -> None:
403        self.issue.create_comment(
404            "/branch {}/{}".format(self.branch_repo_name, self.branch_name)
405        )
406
407    def issue_notify_pull_request(self, pull: github.PullRequest.PullRequest) -> None:
408        self.issue.create_comment(
409            "/pull-request {}#{}".format(self.branch_repo_name, pull.number)
410        )
411
412    def make_ignore_comment(self, comment: str) -> str:
413        """
414        Returns the comment string with a prefix that will cause
415        a Github workflow to skip parsing this comment.
416
417        :param str comment: The comment to ignore
418        """
419        return "<!--IGNORE-->\n" + comment
420
421    def issue_notify_no_milestone(self, comment: List[str]) -> None:
422        message = "{}\n\nError: Command failed due to missing milestone.".format(
423            "".join([">" + line for line in comment])
424        )
425        self.issue.create_comment(self.make_ignore_comment(message))
426
427    @property
428    def action_url(self) -> str:
429        if os.getenv("CI"):
430            return "https://github.com/{}/actions/runs/{}".format(
431                os.getenv("GITHUB_REPOSITORY"), os.getenv("GITHUB_RUN_ID")
432            )
433        return ""
434
435    def issue_notify_cherry_pick_failure(
436        self, commit: str
437    ) -> github.IssueComment.IssueComment:
438        message = self.make_ignore_comment(
439            "Failed to cherry-pick: {}\n\n".format(commit)
440        )
441        action_url = self.action_url
442        if action_url:
443            message += action_url + "\n\n"
444        message += "Please manually backport the fix and push it to your github fork.  Once this is done, please add a comment like this:\n\n`/branch <user>/<repo>/<branch>`"
445        issue = self.issue
446        comment = issue.create_comment(message)
447        issue.add_to_labels(self.CHERRY_PICK_FAILED_LABEL)
448        return comment
449
450    def issue_notify_pull_request_failure(
451        self, branch: str
452    ) -> github.IssueComment.IssueComment:
453        message = "Failed to create pull request for {} ".format(branch)
454        message += self.action_url
455        return self.issue.create_comment(message)
456
457    def issue_remove_cherry_pick_failed_label(self):
458        if self.CHERRY_PICK_FAILED_LABEL in [l.name for l in self.issue.labels]:
459            self.issue.remove_from_labels(self.CHERRY_PICK_FAILED_LABEL)
460
461    def pr_request_review(self, pr: github.PullRequest.PullRequest):
462        """
463        This function will try to find the best reviewers for `commits` and
464        then add a comment requesting review of the backport and assign the
465        pull request to the selected reviewers.
466
467        The reviewers selected are those users who approved the patch in
468        Phabricator.
469        """
470        reviewers = []
471        for commit in pr.get_commits():
472            approvers = phab_get_commit_approvers(self.phab_token, commit)
473            for a in approvers:
474                login = phab_login_to_github_login(self.phab_token, self.repo, a)
475                if not login:
476                    continue
477                reviewers.append(login)
478        if len(reviewers):
479            message = "{} What do you think about merging this PR to the release branch?".format(
480                " ".join(["@" + r for r in reviewers])
481            )
482            pr.create_issue_comment(message)
483            pr.add_to_assignees(*reviewers)
484
485    def create_branch(self, commits: List[str]) -> bool:
486        """
487        This function attempts to backport `commits` into the branch associated
488        with `self.issue_number`.
489
490        If this is successful, then the branch is pushed to `self.branch_repo_name`, if not,
491        a comment is added to the issue saying that the cherry-pick failed.
492
493        :param list commits: List of commits to cherry-pick.
494
495        """
496        print("cherry-picking", commits)
497        branch_name = self.branch_name
498        local_repo = Repo(self.llvm_project_dir)
499        local_repo.git.checkout(self.release_branch_for_issue)
500
501        for c in commits:
502            try:
503                local_repo.git.cherry_pick("-x", c)
504            except Exception as e:
505                self.issue_notify_cherry_pick_failure(c)
506                raise e
507
508        push_url = self.push_url
509        print("Pushing to {} {}".format(push_url, branch_name))
510        local_repo.git.push(push_url, "HEAD:{}".format(branch_name), force=True)
511
512        self.issue_notify_branch()
513        self.issue_remove_cherry_pick_failed_label()
514        return True
515
516    def check_if_pull_request_exists(
517        self, repo: github.Repository.Repository, head: str
518    ) -> bool:
519        pulls = repo.get_pulls(head=head)
520        return pulls.totalCount != 0
521
522    def create_pull_request(self, owner: str, repo_name: str, branch: str) -> bool:
523        """
524        reate a pull request in `self.branch_repo_name`.  The base branch of the
525        pull request will be chosen based on the the milestone attached to
526        the issue represented by `self.issue_number`  For example if the milestone
527        is Release 13.0.1, then the base branch will be release/13.x. `branch`
528        will be used as the compare branch.
529        https://docs.github.com/en/get-started/quickstart/github-glossary#base-branch
530        https://docs.github.com/en/get-started/quickstart/github-glossary#compare-branch
531        """
532        repo = github.Github(self.token).get_repo(self.branch_repo_name)
533        issue_ref = "{}#{}".format(self.repo_name, self.issue_number)
534        pull = None
535        release_branch_for_issue = self.release_branch_for_issue
536        if release_branch_for_issue is None:
537            return False
538        head_branch = branch
539        if not repo.fork:
540            # If the target repo is not a fork of llvm-project, we need to copy
541            # the branch into the target repo.  GitHub only supports cross-repo pull
542            # requests on forked repos.
543            head_branch = f"{owner}-{branch}"
544            local_repo = Repo(self.llvm_project_dir)
545            push_done = False
546            for _ in range(0, 5):
547                try:
548                    local_repo.git.fetch(
549                        f"https://github.com/{owner}/{repo_name}", f"{branch}:{branch}"
550                    )
551                    local_repo.git.push(
552                        self.push_url, f"{branch}:{head_branch}", force=True
553                    )
554                    push_done = True
555                    break
556                except Exception as e:
557                    print(e)
558                    time.sleep(30)
559                    continue
560            if not push_done:
561                raise Exception("Failed to mirror branch into {}".format(self.push_url))
562            owner = repo.owner.login
563
564        head = f"{owner}:{head_branch}"
565        if self.check_if_pull_request_exists(repo, head):
566            print("PR already exists...")
567            return True
568        try:
569            pull = repo.create_pull(
570                title=f"PR for {issue_ref}",
571                body="resolves {}".format(issue_ref),
572                base=release_branch_for_issue,
573                head=head,
574                maintainer_can_modify=False,
575            )
576
577            try:
578                if self.phab_token:
579                    self.pr_request_review(pull)
580            except Exception as e:
581                print("error: Failed while searching for reviewers", e)
582
583        except Exception as e:
584            self.issue_notify_pull_request_failure(branch)
585            raise e
586
587        if pull is None:
588            return False
589
590        self.issue_notify_pull_request(pull)
591        self.issue_remove_cherry_pick_failed_label()
592
593        # TODO(tstellar): Do you really want to always return True?
594        return True
595
596    def execute_command(self) -> bool:
597        """
598        This function reads lines from STDIN and executes the first command
599        that it finds.  The 2 supported commands are:
600        /cherry-pick commit0 <commit1> <commit2> <...>
601        /branch <owner>/<repo>/<branch>
602        """
603        for line in sys.stdin:
604            line.rstrip()
605            m = re.search(r"/([a-z-]+)\s(.+)", line)
606            if not m:
607                continue
608            command = m.group(1)
609            args = m.group(2)
610
611            if command == "cherry-pick":
612                arg_list = args.split()
613                commits = list(map(lambda a: extract_commit_hash(a), arg_list))
614                return self.create_branch(commits)
615
616            if command == "branch":
617                m = re.match("([^/]+)/([^/]+)/(.+)", args)
618                if m:
619                    owner = m.group(1)
620                    repo = m.group(2)
621                    branch = m.group(3)
622                    return self.create_pull_request(owner, repo, branch)
623
624        print("Do not understand input:")
625        print(sys.stdin.readlines())
626        return False
627
628
629parser = argparse.ArgumentParser()
630parser.add_argument(
631    "--token", type=str, required=True, help="GitHub authentiation token"
632)
633parser.add_argument(
634    "--repo",
635    type=str,
636    default=os.getenv("GITHUB_REPOSITORY", "llvm/llvm-project"),
637    help="The GitHub repository that we are working with in the form of <owner>/<repo> (e.g. llvm/llvm-project)",
638)
639subparsers = parser.add_subparsers(dest="command")
640
641issue_subscriber_parser = subparsers.add_parser("issue-subscriber")
642issue_subscriber_parser.add_argument("--label-name", type=str, required=True)
643issue_subscriber_parser.add_argument("--issue-number", type=int, required=True)
644
645pr_subscriber_parser = subparsers.add_parser("pr-subscriber")
646pr_subscriber_parser.add_argument("--label-name", type=str, required=True)
647pr_subscriber_parser.add_argument("--issue-number", type=int, required=True)
648
649release_workflow_parser = subparsers.add_parser("release-workflow")
650release_workflow_parser.add_argument(
651    "--llvm-project-dir",
652    type=str,
653    default=".",
654    help="directory containing the llvm-project checout",
655)
656release_workflow_parser.add_argument(
657    "--issue-number", type=int, required=True, help="The issue number to update"
658)
659release_workflow_parser.add_argument(
660    "--phab-token",
661    type=str,
662    help="Phabricator conduit API token. See https://reviews.llvm.org/settings/user/<USER>/page/apitokens/",
663)
664release_workflow_parser.add_argument(
665    "--branch-repo-token",
666    type=str,
667    help="GitHub authentication token to use for the repository where new branches will be pushed. Defaults to TOKEN.",
668)
669release_workflow_parser.add_argument(
670    "--branch-repo",
671    type=str,
672    default="llvm/llvm-project-release-prs",
673    help="The name of the repo where new branches will be pushed (e.g. llvm/llvm-project)",
674)
675release_workflow_parser.add_argument(
676    "sub_command",
677    type=str,
678    choices=["print-release-branch", "auto"],
679    help="Print to stdout the name of the release branch ISSUE_NUMBER should be backported to",
680)
681
682llvmbot_git_config_parser = subparsers.add_parser(
683    "setup-llvmbot-git",
684    help="Set the default user and email for the git repo in LLVM_PROJECT_DIR to llvmbot",
685)
686
687args = parser.parse_args()
688
689if args.command == "issue-subscriber":
690    issue_subscriber = IssueSubscriber(
691        args.token, args.repo, args.issue_number, args.label_name
692    )
693    issue_subscriber.run()
694elif args.command == "pr-subscriber":
695    pr_subscriber = PRSubscriber(
696        args.token, args.repo, args.issue_number, args.label_name
697    )
698    pr_subscriber.run()
699elif args.command == "release-workflow":
700    release_workflow = ReleaseWorkflow(
701        args.token,
702        args.repo,
703        args.issue_number,
704        args.branch_repo,
705        args.branch_repo_token,
706        args.llvm_project_dir,
707        args.phab_token,
708    )
709    if not release_workflow.release_branch_for_issue:
710        release_workflow.issue_notify_no_milestone(sys.stdin.readlines())
711        sys.exit(1)
712    if args.sub_command == "print-release-branch":
713        release_workflow.print_release_branch()
714    else:
715        if not release_workflow.execute_command():
716            sys.exit(1)
717elif args.command == "setup-llvmbot-git":
718    setup_llvmbot_git()
719