xref: /llvm-project/.github/workflows/commit-access-review.py (revision 2359635457b1f2c6f2c5d33ca84d0fda7729a19d)
1#!/usr/bin/env python3
2# ===-- commit-access-review.py  --------------------------------------------===#
3#
4# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5# See https://llvm.org/LICENSE.txt for license information.
6# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7#
8# ===------------------------------------------------------------------------===#
9#
10# ===------------------------------------------------------------------------===#
11
12import datetime
13import github
14import re
15import requests
16import time
17import sys
18import re
19
20
21class User:
22    THRESHOLD = 5
23
24    def __init__(self, name, triage_list):
25        self.name = name
26        self.authored = 0
27        self.merged = 0
28        self.reviewed = 0
29        self.triage_list = triage_list
30
31    def add_authored(self, val=1):
32        self.authored += val
33        if self.meets_threshold():
34            print(self.name, "meets the threshold with authored commits")
35            del self.triage_list[self.name]
36
37    def set_authored(self, val):
38        self.authored = 0
39        self.add_authored(val)
40
41    def add_merged(self, val=1):
42        self.merged += val
43        if self.meets_threshold():
44            print(self.name, "meets the threshold with merged commits")
45            del self.triage_list[self.name]
46
47    def add_reviewed(self, val=1):
48        self.reviewed += val
49        if self.meets_threshold():
50            print(self.name, "meets the threshold with reviewed commits")
51            del self.triage_list[self.name]
52
53    def get_total(self):
54        return self.authored + self.merged + self.reviewed
55
56    def meets_threshold(self):
57        return self.get_total() >= self.THRESHOLD
58
59    def __repr__(self):
60        return "{} : a: {} m: {} r: {}".format(
61            self.name, self.authored, self.merged, self.reviewed
62        )
63
64
65def check_manual_requests(
66    gh: github.Github, start_date: datetime.datetime
67) -> list[str]:
68    """
69    Return a list of users who have been asked since ``start_date`` if they
70    want to keep their commit access or if they have applied for commit
71    access since ``start_date``
72    """
73
74    query = """
75        query ($query: String!, $after: String) {
76          search(query: $query, type: ISSUE, first: 100, after: $after) {
77            nodes {
78              ... on Issue {
79                author {
80                  login
81                }
82                body
83              }
84            }
85            pageInfo {
86              hasNextPage
87              endCursor
88            }
89          }
90        }
91        """
92    formatted_start_date = start_date.strftime("%Y-%m-%dT%H:%M:%S")
93    variables = {
94        "query": f"type:issue created:>{formatted_start_date} org:llvm repo:llvm-project label:infra:commit-access,infra:commit-access-request"
95    }
96
97    has_next_page = True
98    users = []
99    while has_next_page:
100        res_header, res_data = gh._Github__requester.graphql_query(
101            query=query, variables=variables
102        )
103        data = res_data["data"]
104        for issue in data["search"]["nodes"]:
105            users.extend([user[1:] for user in re.findall("@[^ ,\n]+", issue["body"])])
106            if issue["author"]:
107                users.append(issue["author"]["login"])
108        has_next_page = data["search"]["pageInfo"]["hasNextPage"]
109        if has_next_page:
110            variables["after"] = data["search"]["pageInfo"]["endCursor"]
111    return users
112
113
114def get_num_commits(gh: github.Github, user: str, start_date: datetime.datetime) -> int:
115    """
116    Get number of commits that ``user`` has been made since ``start_date`.
117    """
118    variables = {
119        "owner": "llvm",
120        "user": user,
121        "start_date": start_date.strftime("%Y-%m-%dT%H:%M:%S"),
122    }
123
124    user_query = """
125        query ($user: String!) {
126          user(login: $user) {
127            id
128          }
129        }
130    """
131
132    res_header, res_data = gh._Github__requester.graphql_query(
133        query=user_query, variables=variables
134    )
135    data = res_data["data"]
136    variables["user_id"] = data["user"]["id"]
137
138    query = """
139        query ($owner: String!, $user_id: ID!, $start_date: GitTimestamp!){
140          organization(login: $owner) {
141            teams(query: "llvm-committers" first:1) {
142              nodes {
143                repositories {
144                  nodes {
145                    ref(qualifiedName: "main") {
146                      target {
147                        ... on Commit {
148                          history(since: $start_date, author: {id: $user_id }) {
149                            totalCount
150                          }
151                        }
152                      }
153                    }
154                  }
155                }
156              }
157            }
158          }
159        }
160     """
161    count = 0
162    res_header, res_data = gh._Github__requester.graphql_query(
163        query=query, variables=variables
164    )
165    data = res_data["data"]
166    for repo in data["organization"]["teams"]["nodes"][0]["repositories"]["nodes"]:
167        count += int(repo["ref"]["target"]["history"]["totalCount"])
168        if count >= User.THRESHOLD:
169            break
170    return count
171
172
173def is_new_committer_query_repo(
174    gh: github.Github, user: str, start_date: datetime.datetime
175) -> bool:
176    """
177    Determine if ``user`` is a new committer.  A new committer can keep their
178    commit access even if they don't meet the criteria.
179    """
180    variables = {
181        "user": user,
182    }
183
184    user_query = """
185        query ($user: String!) {
186          user(login: $user) {
187            id
188          }
189        }
190    """
191
192    res_header, res_data = gh._Github__requester.graphql_query(
193        query=user_query, variables=variables
194    )
195    data = res_data["data"]
196    variables["owner"] = "llvm"
197    variables["user_id"] = data["user"]["id"]
198    variables["start_date"] = start_date.strftime("%Y-%m-%dT%H:%M:%S")
199
200    query = """
201        query ($owner: String!, $user_id: ID!){
202          organization(login: $owner) {
203            repository(name: "llvm-project") {
204              ref(qualifiedName: "main") {
205                target {
206                  ... on Commit {
207                    history(author: {id: $user_id }, first: 5) {
208                      nodes {
209                        committedDate
210                      }
211                    }
212                  }
213                }
214              }
215            }
216          }
217        }
218     """
219
220    res_header, res_data = gh._Github__requester.graphql_query(
221        query=query, variables=variables
222    )
223    data = res_data["data"]
224    repo = data["organization"]["repository"]
225    commits = repo["ref"]["target"]["history"]["nodes"]
226    if len(commits) == 0:
227        return True
228    committed_date = commits[-1]["committedDate"]
229    if datetime.datetime.strptime(committed_date, "%Y-%m-%dT%H:%M:%SZ") < start_date:
230        return False
231    return True
232
233
234def is_new_committer(
235    gh: github.Github, user: str, start_date: datetime.datetime
236) -> bool:
237    """
238    Wrapper around is_new_commiter_query_repo to handle exceptions.
239    """
240    try:
241        return is_new_committer_query_repo(gh, user, start_date)
242    except:
243        pass
244    return True
245
246
247def get_review_count(
248    gh: github.Github, user: str, start_date: datetime.datetime
249) -> int:
250    """
251    Return the number of reviews that ``user`` has done since ``start_date``.
252    """
253    query = """
254        query ($query: String!) {
255          search(query: $query, type: ISSUE, first: 5) {
256            issueCount
257          }
258        }
259        """
260    formatted_start_date = start_date.strftime("%Y-%m-%dT%H:%M:%S")
261    variables = {
262        "owner": "llvm",
263        "repo": "llvm-project",
264        "user": user,
265        "query": f"type:pr commenter:{user} -author:{user} merged:>{formatted_start_date} org:llvm",
266    }
267
268    res_header, res_data = gh._Github__requester.graphql_query(
269        query=query, variables=variables
270    )
271    data = res_data["data"]
272    return int(data["search"]["issueCount"])
273
274
275def count_prs(gh: github.Github, triage_list: dict, start_date: datetime.datetime):
276    """
277    Fetch all the merged PRs for the project since ``start_date`` and update
278    ``triage_list`` with the number of PRs merged for each user.
279    """
280
281    query = """
282        query ($query: String!, $after: String) {
283          search(query: $query, type: ISSUE, first: 100, after: $after) {
284            issueCount,
285            nodes {
286              ... on PullRequest {
287                 author {
288                   login
289                 }
290                 mergedBy {
291                   login
292                 }
293              }
294            }
295            pageInfo {
296              hasNextPage
297              endCursor
298            }
299          }
300        }
301    """
302    date_begin = start_date
303    date_end = None
304    while date_begin < datetime.datetime.now():
305        date_end = date_begin + datetime.timedelta(days=7)
306        formatted_date_begin = date_begin.strftime("%Y-%m-%dT%H:%M:%S")
307        formatted_date_end = date_end.strftime("%Y-%m-%dT%H:%M:%S")
308        variables = {
309            "query": f"type:pr is:merged merged:{formatted_date_begin}..{formatted_date_end} org:llvm",
310        }
311        has_next_page = True
312        while has_next_page:
313            print(variables)
314            res_header, res_data = gh._Github__requester.graphql_query(
315                query=query, variables=variables
316            )
317            data = res_data["data"]
318            for pr in data["search"]["nodes"]:
319                # Users can be None if the user has been deleted.
320                if not pr["author"]:
321                    continue
322                author = pr["author"]["login"]
323                if author in triage_list:
324                    triage_list[author].add_authored()
325
326                if not pr["mergedBy"]:
327                    continue
328                merger = pr["mergedBy"]["login"]
329                if author == merger:
330                    continue
331                if merger not in triage_list:
332                    continue
333                triage_list[merger].add_merged()
334
335            has_next_page = data["search"]["pageInfo"]["hasNextPage"]
336            if has_next_page:
337                variables["after"] = data["search"]["pageInfo"]["endCursor"]
338        date_begin = date_end
339
340
341def main():
342    token = sys.argv[1]
343    gh = github.Github(login_or_token=token)
344    org = gh.get_organization("llvm")
345    repo = org.get_repo("llvm-project")
346    one_year_ago = datetime.datetime.now() - datetime.timedelta(days=365)
347    triage_list = {}
348    for collaborator in repo.get_collaborators(permission="push"):
349        triage_list[collaborator.login] = User(collaborator.login, triage_list)
350
351    print("Start:", len(triage_list), "triagers")
352    # Step 0 Check if users have requested commit access in the last year.
353    for user in check_manual_requests(gh, one_year_ago):
354        if user in triage_list:
355            print(user, "requested commit access in the last year.")
356            del triage_list[user]
357    print("After Request Check:", len(triage_list), "triagers")
358
359    # Step 1 count all PRs authored or merged
360    count_prs(gh, triage_list, one_year_ago)
361
362    print("After PRs:", len(triage_list), "triagers")
363
364    if len(triage_list) == 0:
365        sys.exit(0)
366
367    # Step 2 check for reviews
368    for user in list(triage_list.keys()):
369        review_count = get_review_count(gh, user, one_year_ago)
370        triage_list[user].add_reviewed(review_count)
371
372    print("After Reviews:", len(triage_list), "triagers")
373
374    if len(triage_list) == 0:
375        sys.exit(0)
376
377    # Step 3 check for number of commits
378    for user in list(triage_list.keys()):
379        num_commits = get_num_commits(gh, user, one_year_ago)
380        # Override the total number of commits to not double count commits and
381        # authored PRs.
382        triage_list[user].set_authored(num_commits)
383
384    print("After Commits:", len(triage_list), "triagers")
385
386    # Step 4 check for new committers
387    for user in list(triage_list.keys()):
388        print("Checking", user)
389        if is_new_committer(gh, user, one_year_ago):
390            print("Removing new committer: ", user)
391            del triage_list[user]
392
393    print("Complete:", len(triage_list), "triagers")
394
395    with open("triagers.log", "w") as triagers_log:
396        for user in triage_list:
397            print(triage_list[user].__repr__())
398            triagers_log.write(user + "\n")
399
400
401if __name__ == "__main__":
402    main()
403