xref: /llvm-project/.github/workflows/commit-access-review.py (revision 2359635457b1f2c6f2c5d33ca84d0fda7729a19d)
184efc8edSTom Stellard#!/usr/bin/env python3
284efc8edSTom Stellard# ===-- commit-access-review.py  --------------------------------------------===#
384efc8edSTom Stellard#
484efc8edSTom Stellard# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
584efc8edSTom Stellard# See https://llvm.org/LICENSE.txt for license information.
684efc8edSTom Stellard# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
784efc8edSTom Stellard#
884efc8edSTom Stellard# ===------------------------------------------------------------------------===#
984efc8edSTom Stellard#
1084efc8edSTom Stellard# ===------------------------------------------------------------------------===#
1184efc8edSTom Stellard
1284efc8edSTom Stellardimport datetime
1384efc8edSTom Stellardimport github
1484efc8edSTom Stellardimport re
1584efc8edSTom Stellardimport requests
1684efc8edSTom Stellardimport time
1784efc8edSTom Stellardimport sys
1884efc8edSTom Stellardimport re
1984efc8edSTom Stellard
2084efc8edSTom Stellard
2184efc8edSTom Stellardclass User:
2284efc8edSTom Stellard    THRESHOLD = 5
2384efc8edSTom Stellard
2484efc8edSTom Stellard    def __init__(self, name, triage_list):
2584efc8edSTom Stellard        self.name = name
2684efc8edSTom Stellard        self.authored = 0
2784efc8edSTom Stellard        self.merged = 0
2884efc8edSTom Stellard        self.reviewed = 0
2984efc8edSTom Stellard        self.triage_list = triage_list
3084efc8edSTom Stellard
3184efc8edSTom Stellard    def add_authored(self, val=1):
3284efc8edSTom Stellard        self.authored += val
3384efc8edSTom Stellard        if self.meets_threshold():
3484efc8edSTom Stellard            print(self.name, "meets the threshold with authored commits")
3584efc8edSTom Stellard            del self.triage_list[self.name]
3684efc8edSTom Stellard
3784efc8edSTom Stellard    def set_authored(self, val):
3884efc8edSTom Stellard        self.authored = 0
3984efc8edSTom Stellard        self.add_authored(val)
4084efc8edSTom Stellard
4184efc8edSTom Stellard    def add_merged(self, val=1):
4284efc8edSTom Stellard        self.merged += val
4384efc8edSTom Stellard        if self.meets_threshold():
4484efc8edSTom Stellard            print(self.name, "meets the threshold with merged commits")
4584efc8edSTom Stellard            del self.triage_list[self.name]
4684efc8edSTom Stellard
4784efc8edSTom Stellard    def add_reviewed(self, val=1):
4884efc8edSTom Stellard        self.reviewed += val
4984efc8edSTom Stellard        if self.meets_threshold():
5084efc8edSTom Stellard            print(self.name, "meets the threshold with reviewed commits")
5184efc8edSTom Stellard            del self.triage_list[self.name]
5284efc8edSTom Stellard
5384efc8edSTom Stellard    def get_total(self):
5484efc8edSTom Stellard        return self.authored + self.merged + self.reviewed
5584efc8edSTom Stellard
5684efc8edSTom Stellard    def meets_threshold(self):
5784efc8edSTom Stellard        return self.get_total() >= self.THRESHOLD
5884efc8edSTom Stellard
5984efc8edSTom Stellard    def __repr__(self):
6084efc8edSTom Stellard        return "{} : a: {} m: {} r: {}".format(
6184efc8edSTom Stellard            self.name, self.authored, self.merged, self.reviewed
6284efc8edSTom Stellard        )
6384efc8edSTom Stellard
6484efc8edSTom Stellard
650d5ae363STom Stellarddef check_manual_requests(
660d5ae363STom Stellard    gh: github.Github, start_date: datetime.datetime
670d5ae363STom Stellard) -> list[str]:
6884efc8edSTom Stellard    """
6984efc8edSTom Stellard    Return a list of users who have been asked since ``start_date`` if they
70*23596354STom Stellard    want to keep their commit access or if they have applied for commit
71*23596354STom Stellard    access since ``start_date``
7284efc8edSTom Stellard    """
73*23596354STom Stellard
7484efc8edSTom Stellard    query = """
75*23596354STom Stellard        query ($query: String!, $after: String) {
76*23596354STom Stellard          search(query: $query, type: ISSUE, first: 100, after: $after) {
7784efc8edSTom Stellard            nodes {
7884efc8edSTom Stellard              ... on Issue {
7984efc8edSTom Stellard                author {
8084efc8edSTom Stellard                  login
8184efc8edSTom Stellard                }
82*23596354STom Stellard                body
8384efc8edSTom Stellard              }
8484efc8edSTom Stellard            }
85*23596354STom Stellard            pageInfo {
86*23596354STom Stellard              hasNextPage
87*23596354STom Stellard              endCursor
8884efc8edSTom Stellard            }
8984efc8edSTom Stellard          }
9084efc8edSTom Stellard        }
9184efc8edSTom Stellard        """
9284efc8edSTom Stellard    formatted_start_date = start_date.strftime("%Y-%m-%dT%H:%M:%S")
9384efc8edSTom Stellard    variables = {
94*23596354STom Stellard        "query": f"type:issue created:>{formatted_start_date} org:llvm repo:llvm-project label:infra:commit-access,infra:commit-access-request"
9584efc8edSTom Stellard    }
9684efc8edSTom Stellard
97*23596354STom Stellard    has_next_page = True
98*23596354STom Stellard    users = []
99*23596354STom Stellard    while has_next_page:
1000d5ae363STom Stellard        res_header, res_data = gh._Github__requester.graphql_query(
1010d5ae363STom Stellard            query=query, variables=variables
1020d5ae363STom Stellard        )
1030d5ae363STom Stellard        data = res_data["data"]
10484efc8edSTom Stellard        for issue in data["search"]["nodes"]:
10584efc8edSTom Stellard            users.extend([user[1:] for user in re.findall("@[^ ,\n]+", issue["body"])])
106*23596354STom Stellard            if issue["author"]:
107*23596354STom Stellard                users.append(issue["author"]["login"])
108*23596354STom Stellard        has_next_page = data["search"]["pageInfo"]["hasNextPage"]
109*23596354STom Stellard        if has_next_page:
110*23596354STom Stellard            variables["after"] = data["search"]["pageInfo"]["endCursor"]
11184efc8edSTom Stellard    return users
11284efc8edSTom Stellard
11384efc8edSTom Stellard
1140d5ae363STom Stellarddef get_num_commits(gh: github.Github, user: str, start_date: datetime.datetime) -> int:
11584efc8edSTom Stellard    """
11684efc8edSTom Stellard    Get number of commits that ``user`` has been made since ``start_date`.
11784efc8edSTom Stellard    """
11884efc8edSTom Stellard    variables = {
11984efc8edSTom Stellard        "owner": "llvm",
12084efc8edSTom Stellard        "user": user,
12184efc8edSTom Stellard        "start_date": start_date.strftime("%Y-%m-%dT%H:%M:%S"),
12284efc8edSTom Stellard    }
12384efc8edSTom Stellard
12484efc8edSTom Stellard    user_query = """
12584efc8edSTom Stellard        query ($user: String!) {
12684efc8edSTom Stellard          user(login: $user) {
12784efc8edSTom Stellard            id
12884efc8edSTom Stellard          }
12984efc8edSTom Stellard        }
13084efc8edSTom Stellard    """
13184efc8edSTom Stellard
1320d5ae363STom Stellard    res_header, res_data = gh._Github__requester.graphql_query(
1330d5ae363STom Stellard        query=user_query, variables=variables
1340d5ae363STom Stellard    )
1350d5ae363STom Stellard    data = res_data["data"]
13684efc8edSTom Stellard    variables["user_id"] = data["user"]["id"]
13784efc8edSTom Stellard
13884efc8edSTom Stellard    query = """
13984efc8edSTom Stellard        query ($owner: String!, $user_id: ID!, $start_date: GitTimestamp!){
14084efc8edSTom Stellard          organization(login: $owner) {
14184efc8edSTom Stellard            teams(query: "llvm-committers" first:1) {
14284efc8edSTom Stellard              nodes {
14384efc8edSTom Stellard                repositories {
14484efc8edSTom Stellard                  nodes {
14584efc8edSTom Stellard                    ref(qualifiedName: "main") {
14684efc8edSTom Stellard                      target {
14784efc8edSTom Stellard                        ... on Commit {
14884efc8edSTom Stellard                          history(since: $start_date, author: {id: $user_id }) {
14984efc8edSTom Stellard                            totalCount
15084efc8edSTom Stellard                          }
15184efc8edSTom Stellard                        }
15284efc8edSTom Stellard                      }
15384efc8edSTom Stellard                    }
15484efc8edSTom Stellard                  }
15584efc8edSTom Stellard                }
15684efc8edSTom Stellard              }
15784efc8edSTom Stellard            }
15884efc8edSTom Stellard          }
15984efc8edSTom Stellard        }
16084efc8edSTom Stellard     """
16184efc8edSTom Stellard    count = 0
1620d5ae363STom Stellard    res_header, res_data = gh._Github__requester.graphql_query(
1630d5ae363STom Stellard        query=query, variables=variables
1640d5ae363STom Stellard    )
1650d5ae363STom Stellard    data = res_data["data"]
16684efc8edSTom Stellard    for repo in data["organization"]["teams"]["nodes"][0]["repositories"]["nodes"]:
16784efc8edSTom Stellard        count += int(repo["ref"]["target"]["history"]["totalCount"])
16884efc8edSTom Stellard        if count >= User.THRESHOLD:
16984efc8edSTom Stellard            break
17084efc8edSTom Stellard    return count
17184efc8edSTom Stellard
17284efc8edSTom Stellard
17384efc8edSTom Stellarddef is_new_committer_query_repo(
1740d5ae363STom Stellard    gh: github.Github, user: str, start_date: datetime.datetime
17584efc8edSTom Stellard) -> bool:
17684efc8edSTom Stellard    """
17784efc8edSTom Stellard    Determine if ``user`` is a new committer.  A new committer can keep their
17884efc8edSTom Stellard    commit access even if they don't meet the criteria.
17984efc8edSTom Stellard    """
18084efc8edSTom Stellard    variables = {
18184efc8edSTom Stellard        "user": user,
18284efc8edSTom Stellard    }
18384efc8edSTom Stellard
18484efc8edSTom Stellard    user_query = """
18584efc8edSTom Stellard        query ($user: String!) {
18684efc8edSTom Stellard          user(login: $user) {
18784efc8edSTom Stellard            id
18884efc8edSTom Stellard          }
18984efc8edSTom Stellard        }
19084efc8edSTom Stellard    """
19184efc8edSTom Stellard
1920d5ae363STom Stellard    res_header, res_data = gh._Github__requester.graphql_query(
1930d5ae363STom Stellard        query=user_query, variables=variables
1940d5ae363STom Stellard    )
1950d5ae363STom Stellard    data = res_data["data"]
19684efc8edSTom Stellard    variables["owner"] = "llvm"
19784efc8edSTom Stellard    variables["user_id"] = data["user"]["id"]
19884efc8edSTom Stellard    variables["start_date"] = start_date.strftime("%Y-%m-%dT%H:%M:%S")
19984efc8edSTom Stellard
20084efc8edSTom Stellard    query = """
20184efc8edSTom Stellard        query ($owner: String!, $user_id: ID!){
20284efc8edSTom Stellard          organization(login: $owner) {
20384efc8edSTom Stellard            repository(name: "llvm-project") {
20484efc8edSTom Stellard              ref(qualifiedName: "main") {
20584efc8edSTom Stellard                target {
20684efc8edSTom Stellard                  ... on Commit {
20784efc8edSTom Stellard                    history(author: {id: $user_id }, first: 5) {
20884efc8edSTom Stellard                      nodes {
20984efc8edSTom Stellard                        committedDate
21084efc8edSTom Stellard                      }
21184efc8edSTom Stellard                    }
21284efc8edSTom Stellard                  }
21384efc8edSTom Stellard                }
21484efc8edSTom Stellard              }
21584efc8edSTom Stellard            }
21684efc8edSTom Stellard          }
21784efc8edSTom Stellard        }
21884efc8edSTom Stellard     """
21984efc8edSTom Stellard
2200d5ae363STom Stellard    res_header, res_data = gh._Github__requester.graphql_query(
2210d5ae363STom Stellard        query=query, variables=variables
2220d5ae363STom Stellard    )
2230d5ae363STom Stellard    data = res_data["data"]
22484efc8edSTom Stellard    repo = data["organization"]["repository"]
22584efc8edSTom Stellard    commits = repo["ref"]["target"]["history"]["nodes"]
22684efc8edSTom Stellard    if len(commits) == 0:
22784efc8edSTom Stellard        return True
22884efc8edSTom Stellard    committed_date = commits[-1]["committedDate"]
22984efc8edSTom Stellard    if datetime.datetime.strptime(committed_date, "%Y-%m-%dT%H:%M:%SZ") < start_date:
23084efc8edSTom Stellard        return False
23184efc8edSTom Stellard    return True
23284efc8edSTom Stellard
23384efc8edSTom Stellard
2340d5ae363STom Stellarddef is_new_committer(
2350d5ae363STom Stellard    gh: github.Github, user: str, start_date: datetime.datetime
2360d5ae363STom Stellard) -> bool:
23784efc8edSTom Stellard    """
23884efc8edSTom Stellard    Wrapper around is_new_commiter_query_repo to handle exceptions.
23984efc8edSTom Stellard    """
24084efc8edSTom Stellard    try:
2410d5ae363STom Stellard        return is_new_committer_query_repo(gh, user, start_date)
24284efc8edSTom Stellard    except:
24384efc8edSTom Stellard        pass
24484efc8edSTom Stellard    return True
24584efc8edSTom Stellard
24684efc8edSTom Stellard
2470d5ae363STom Stellarddef get_review_count(
2480d5ae363STom Stellard    gh: github.Github, user: str, start_date: datetime.datetime
2490d5ae363STom Stellard) -> int:
25084efc8edSTom Stellard    """
25184efc8edSTom Stellard    Return the number of reviews that ``user`` has done since ``start_date``.
25284efc8edSTom Stellard    """
25384efc8edSTom Stellard    query = """
25484efc8edSTom Stellard        query ($query: String!) {
25584efc8edSTom Stellard          search(query: $query, type: ISSUE, first: 5) {
25684efc8edSTom Stellard            issueCount
25784efc8edSTom Stellard          }
25884efc8edSTom Stellard        }
25984efc8edSTom Stellard        """
26084efc8edSTom Stellard    formatted_start_date = start_date.strftime("%Y-%m-%dT%H:%M:%S")
26184efc8edSTom Stellard    variables = {
26284efc8edSTom Stellard        "owner": "llvm",
26384efc8edSTom Stellard        "repo": "llvm-project",
26484efc8edSTom Stellard        "user": user,
26584efc8edSTom Stellard        "query": f"type:pr commenter:{user} -author:{user} merged:>{formatted_start_date} org:llvm",
26684efc8edSTom Stellard    }
26784efc8edSTom Stellard
2680d5ae363STom Stellard    res_header, res_data = gh._Github__requester.graphql_query(
2690d5ae363STom Stellard        query=query, variables=variables
2700d5ae363STom Stellard    )
2710d5ae363STom Stellard    data = res_data["data"]
27284efc8edSTom Stellard    return int(data["search"]["issueCount"])
27384efc8edSTom Stellard
27484efc8edSTom Stellard
2750d5ae363STom Stellarddef count_prs(gh: github.Github, triage_list: dict, start_date: datetime.datetime):
27684efc8edSTom Stellard    """
27784efc8edSTom Stellard    Fetch all the merged PRs for the project since ``start_date`` and update
27884efc8edSTom Stellard    ``triage_list`` with the number of PRs merged for each user.
27984efc8edSTom Stellard    """
28084efc8edSTom Stellard
28184efc8edSTom Stellard    query = """
28284efc8edSTom Stellard        query ($query: String!, $after: String) {
28384efc8edSTom Stellard          search(query: $query, type: ISSUE, first: 100, after: $after) {
28484efc8edSTom Stellard            issueCount,
28584efc8edSTom Stellard            nodes {
28684efc8edSTom Stellard              ... on PullRequest {
28784efc8edSTom Stellard                 author {
28884efc8edSTom Stellard                   login
28984efc8edSTom Stellard                 }
29084efc8edSTom Stellard                 mergedBy {
29184efc8edSTom Stellard                   login
29284efc8edSTom Stellard                 }
29384efc8edSTom Stellard              }
29484efc8edSTom Stellard            }
29584efc8edSTom Stellard            pageInfo {
29684efc8edSTom Stellard              hasNextPage
29784efc8edSTom Stellard              endCursor
29884efc8edSTom Stellard            }
29984efc8edSTom Stellard          }
30084efc8edSTom Stellard        }
30184efc8edSTom Stellard    """
30284efc8edSTom Stellard    date_begin = start_date
30384efc8edSTom Stellard    date_end = None
30484efc8edSTom Stellard    while date_begin < datetime.datetime.now():
30584efc8edSTom Stellard        date_end = date_begin + datetime.timedelta(days=7)
30684efc8edSTom Stellard        formatted_date_begin = date_begin.strftime("%Y-%m-%dT%H:%M:%S")
30784efc8edSTom Stellard        formatted_date_end = date_end.strftime("%Y-%m-%dT%H:%M:%S")
30884efc8edSTom Stellard        variables = {
30984efc8edSTom Stellard            "query": f"type:pr is:merged merged:{formatted_date_begin}..{formatted_date_end} org:llvm",
31084efc8edSTom Stellard        }
31184efc8edSTom Stellard        has_next_page = True
31284efc8edSTom Stellard        while has_next_page:
31384efc8edSTom Stellard            print(variables)
3140d5ae363STom Stellard            res_header, res_data = gh._Github__requester.graphql_query(
3150d5ae363STom Stellard                query=query, variables=variables
3160d5ae363STom Stellard            )
3170d5ae363STom Stellard            data = res_data["data"]
31884efc8edSTom Stellard            for pr in data["search"]["nodes"]:
31984efc8edSTom Stellard                # Users can be None if the user has been deleted.
32084efc8edSTom Stellard                if not pr["author"]:
32184efc8edSTom Stellard                    continue
32284efc8edSTom Stellard                author = pr["author"]["login"]
32384efc8edSTom Stellard                if author in triage_list:
32484efc8edSTom Stellard                    triage_list[author].add_authored()
32584efc8edSTom Stellard
32684efc8edSTom Stellard                if not pr["mergedBy"]:
32784efc8edSTom Stellard                    continue
32884efc8edSTom Stellard                merger = pr["mergedBy"]["login"]
32984efc8edSTom Stellard                if author == merger:
33084efc8edSTom Stellard                    continue
33184efc8edSTom Stellard                if merger not in triage_list:
33284efc8edSTom Stellard                    continue
33384efc8edSTom Stellard                triage_list[merger].add_merged()
33484efc8edSTom Stellard
33584efc8edSTom Stellard            has_next_page = data["search"]["pageInfo"]["hasNextPage"]
33684efc8edSTom Stellard            if has_next_page:
33784efc8edSTom Stellard                variables["after"] = data["search"]["pageInfo"]["endCursor"]
33884efc8edSTom Stellard        date_begin = date_end
33984efc8edSTom Stellard
34084efc8edSTom Stellard
34184efc8edSTom Stellarddef main():
34284efc8edSTom Stellard    token = sys.argv[1]
34384efc8edSTom Stellard    gh = github.Github(login_or_token=token)
34484efc8edSTom Stellard    org = gh.get_organization("llvm")
34584efc8edSTom Stellard    repo = org.get_repo("llvm-project")
34684efc8edSTom Stellard    one_year_ago = datetime.datetime.now() - datetime.timedelta(days=365)
34784efc8edSTom Stellard    triage_list = {}
348f14fd326STom Stellard    for collaborator in repo.get_collaborators(permission="push"):
349f14fd326STom Stellard        triage_list[collaborator.login] = User(collaborator.login, triage_list)
35084efc8edSTom Stellard
35184efc8edSTom Stellard    print("Start:", len(triage_list), "triagers")
35284efc8edSTom Stellard    # Step 0 Check if users have requested commit access in the last year.
3530d5ae363STom Stellard    for user in check_manual_requests(gh, one_year_ago):
35484efc8edSTom Stellard        if user in triage_list:
35584efc8edSTom Stellard            print(user, "requested commit access in the last year.")
35684efc8edSTom Stellard            del triage_list[user]
35784efc8edSTom Stellard    print("After Request Check:", len(triage_list), "triagers")
35884efc8edSTom Stellard
35984efc8edSTom Stellard    # Step 1 count all PRs authored or merged
3600d5ae363STom Stellard    count_prs(gh, triage_list, one_year_ago)
36184efc8edSTom Stellard
36284efc8edSTom Stellard    print("After PRs:", len(triage_list), "triagers")
36384efc8edSTom Stellard
36484efc8edSTom Stellard    if len(triage_list) == 0:
36584efc8edSTom Stellard        sys.exit(0)
36684efc8edSTom Stellard
36784efc8edSTom Stellard    # Step 2 check for reviews
36884efc8edSTom Stellard    for user in list(triage_list.keys()):
3690d5ae363STom Stellard        review_count = get_review_count(gh, user, one_year_ago)
37084efc8edSTom Stellard        triage_list[user].add_reviewed(review_count)
37184efc8edSTom Stellard
37284efc8edSTom Stellard    print("After Reviews:", len(triage_list), "triagers")
37384efc8edSTom Stellard
37484efc8edSTom Stellard    if len(triage_list) == 0:
37584efc8edSTom Stellard        sys.exit(0)
37684efc8edSTom Stellard
37784efc8edSTom Stellard    # Step 3 check for number of commits
37884efc8edSTom Stellard    for user in list(triage_list.keys()):
3790d5ae363STom Stellard        num_commits = get_num_commits(gh, user, one_year_ago)
38084efc8edSTom Stellard        # Override the total number of commits to not double count commits and
38184efc8edSTom Stellard        # authored PRs.
38284efc8edSTom Stellard        triage_list[user].set_authored(num_commits)
38384efc8edSTom Stellard
38484efc8edSTom Stellard    print("After Commits:", len(triage_list), "triagers")
38584efc8edSTom Stellard
38684efc8edSTom Stellard    # Step 4 check for new committers
38784efc8edSTom Stellard    for user in list(triage_list.keys()):
38884efc8edSTom Stellard        print("Checking", user)
3890d5ae363STom Stellard        if is_new_committer(gh, user, one_year_ago):
39084efc8edSTom Stellard            print("Removing new committer: ", user)
39184efc8edSTom Stellard            del triage_list[user]
39284efc8edSTom Stellard
39384efc8edSTom Stellard    print("Complete:", len(triage_list), "triagers")
39484efc8edSTom Stellard
39584efc8edSTom Stellard    with open("triagers.log", "w") as triagers_log:
39684efc8edSTom Stellard        for user in triage_list:
39784efc8edSTom Stellard            print(triage_list[user].__repr__())
39884efc8edSTom Stellard            triagers_log.write(user + "\n")
39984efc8edSTom Stellard
40084efc8edSTom Stellard
40184efc8edSTom Stellardif __name__ == "__main__":
40284efc8edSTom Stellard    main()
403