1#!/usr/bin/env python3 2# ===-- commit-access-review.py --------------------------------------------===# 3# 4# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5# See https://llvm.org/LICENSE.txt for license information. 6# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7# 8# ===------------------------------------------------------------------------===# 9# 10# ===------------------------------------------------------------------------===# 11 12import datetime 13import github 14import re 15import requests 16import time 17import sys 18import re 19 20 21class User: 22 THRESHOLD = 5 23 24 def __init__(self, name, triage_list): 25 self.name = name 26 self.authored = 0 27 self.merged = 0 28 self.reviewed = 0 29 self.triage_list = triage_list 30 31 def add_authored(self, val=1): 32 self.authored += val 33 if self.meets_threshold(): 34 print(self.name, "meets the threshold with authored commits") 35 del self.triage_list[self.name] 36 37 def set_authored(self, val): 38 self.authored = 0 39 self.add_authored(val) 40 41 def add_merged(self, val=1): 42 self.merged += val 43 if self.meets_threshold(): 44 print(self.name, "meets the threshold with merged commits") 45 del self.triage_list[self.name] 46 47 def add_reviewed(self, val=1): 48 self.reviewed += val 49 if self.meets_threshold(): 50 print(self.name, "meets the threshold with reviewed commits") 51 del self.triage_list[self.name] 52 53 def get_total(self): 54 return self.authored + self.merged + self.reviewed 55 56 def meets_threshold(self): 57 return self.get_total() >= self.THRESHOLD 58 59 def __repr__(self): 60 return "{} : a: {} m: {} r: {}".format( 61 self.name, self.authored, self.merged, self.reviewed 62 ) 63 64 65def check_manual_requests( 66 gh: github.Github, start_date: datetime.datetime 67) -> list[str]: 68 """ 69 Return a list of users who have been asked since ``start_date`` if they 70 want to keep their commit access or if they have applied for commit 71 access since ``start_date`` 72 """ 73 74 query = """ 75 query ($query: String!, $after: String) { 76 search(query: $query, type: ISSUE, first: 100, after: $after) { 77 nodes { 78 ... on Issue { 79 author { 80 login 81 } 82 body 83 } 84 } 85 pageInfo { 86 hasNextPage 87 endCursor 88 } 89 } 90 } 91 """ 92 formatted_start_date = start_date.strftime("%Y-%m-%dT%H:%M:%S") 93 variables = { 94 "query": f"type:issue created:>{formatted_start_date} org:llvm repo:llvm-project label:infra:commit-access,infra:commit-access-request" 95 } 96 97 has_next_page = True 98 users = [] 99 while has_next_page: 100 res_header, res_data = gh._Github__requester.graphql_query( 101 query=query, variables=variables 102 ) 103 data = res_data["data"] 104 for issue in data["search"]["nodes"]: 105 users.extend([user[1:] for user in re.findall("@[^ ,\n]+", issue["body"])]) 106 if issue["author"]: 107 users.append(issue["author"]["login"]) 108 has_next_page = data["search"]["pageInfo"]["hasNextPage"] 109 if has_next_page: 110 variables["after"] = data["search"]["pageInfo"]["endCursor"] 111 return users 112 113 114def get_num_commits(gh: github.Github, user: str, start_date: datetime.datetime) -> int: 115 """ 116 Get number of commits that ``user`` has been made since ``start_date`. 117 """ 118 variables = { 119 "owner": "llvm", 120 "user": user, 121 "start_date": start_date.strftime("%Y-%m-%dT%H:%M:%S"), 122 } 123 124 user_query = """ 125 query ($user: String!) { 126 user(login: $user) { 127 id 128 } 129 } 130 """ 131 132 res_header, res_data = gh._Github__requester.graphql_query( 133 query=user_query, variables=variables 134 ) 135 data = res_data["data"] 136 variables["user_id"] = data["user"]["id"] 137 138 query = """ 139 query ($owner: String!, $user_id: ID!, $start_date: GitTimestamp!){ 140 organization(login: $owner) { 141 teams(query: "llvm-committers" first:1) { 142 nodes { 143 repositories { 144 nodes { 145 ref(qualifiedName: "main") { 146 target { 147 ... on Commit { 148 history(since: $start_date, author: {id: $user_id }) { 149 totalCount 150 } 151 } 152 } 153 } 154 } 155 } 156 } 157 } 158 } 159 } 160 """ 161 count = 0 162 res_header, res_data = gh._Github__requester.graphql_query( 163 query=query, variables=variables 164 ) 165 data = res_data["data"] 166 for repo in data["organization"]["teams"]["nodes"][0]["repositories"]["nodes"]: 167 count += int(repo["ref"]["target"]["history"]["totalCount"]) 168 if count >= User.THRESHOLD: 169 break 170 return count 171 172 173def is_new_committer_query_repo( 174 gh: github.Github, user: str, start_date: datetime.datetime 175) -> bool: 176 """ 177 Determine if ``user`` is a new committer. A new committer can keep their 178 commit access even if they don't meet the criteria. 179 """ 180 variables = { 181 "user": user, 182 } 183 184 user_query = """ 185 query ($user: String!) { 186 user(login: $user) { 187 id 188 } 189 } 190 """ 191 192 res_header, res_data = gh._Github__requester.graphql_query( 193 query=user_query, variables=variables 194 ) 195 data = res_data["data"] 196 variables["owner"] = "llvm" 197 variables["user_id"] = data["user"]["id"] 198 variables["start_date"] = start_date.strftime("%Y-%m-%dT%H:%M:%S") 199 200 query = """ 201 query ($owner: String!, $user_id: ID!){ 202 organization(login: $owner) { 203 repository(name: "llvm-project") { 204 ref(qualifiedName: "main") { 205 target { 206 ... on Commit { 207 history(author: {id: $user_id }, first: 5) { 208 nodes { 209 committedDate 210 } 211 } 212 } 213 } 214 } 215 } 216 } 217 } 218 """ 219 220 res_header, res_data = gh._Github__requester.graphql_query( 221 query=query, variables=variables 222 ) 223 data = res_data["data"] 224 repo = data["organization"]["repository"] 225 commits = repo["ref"]["target"]["history"]["nodes"] 226 if len(commits) == 0: 227 return True 228 committed_date = commits[-1]["committedDate"] 229 if datetime.datetime.strptime(committed_date, "%Y-%m-%dT%H:%M:%SZ") < start_date: 230 return False 231 return True 232 233 234def is_new_committer( 235 gh: github.Github, user: str, start_date: datetime.datetime 236) -> bool: 237 """ 238 Wrapper around is_new_commiter_query_repo to handle exceptions. 239 """ 240 try: 241 return is_new_committer_query_repo(gh, user, start_date) 242 except: 243 pass 244 return True 245 246 247def get_review_count( 248 gh: github.Github, user: str, start_date: datetime.datetime 249) -> int: 250 """ 251 Return the number of reviews that ``user`` has done since ``start_date``. 252 """ 253 query = """ 254 query ($query: String!) { 255 search(query: $query, type: ISSUE, first: 5) { 256 issueCount 257 } 258 } 259 """ 260 formatted_start_date = start_date.strftime("%Y-%m-%dT%H:%M:%S") 261 variables = { 262 "owner": "llvm", 263 "repo": "llvm-project", 264 "user": user, 265 "query": f"type:pr commenter:{user} -author:{user} merged:>{formatted_start_date} org:llvm", 266 } 267 268 res_header, res_data = gh._Github__requester.graphql_query( 269 query=query, variables=variables 270 ) 271 data = res_data["data"] 272 return int(data["search"]["issueCount"]) 273 274 275def count_prs(gh: github.Github, triage_list: dict, start_date: datetime.datetime): 276 """ 277 Fetch all the merged PRs for the project since ``start_date`` and update 278 ``triage_list`` with the number of PRs merged for each user. 279 """ 280 281 query = """ 282 query ($query: String!, $after: String) { 283 search(query: $query, type: ISSUE, first: 100, after: $after) { 284 issueCount, 285 nodes { 286 ... on PullRequest { 287 author { 288 login 289 } 290 mergedBy { 291 login 292 } 293 } 294 } 295 pageInfo { 296 hasNextPage 297 endCursor 298 } 299 } 300 } 301 """ 302 date_begin = start_date 303 date_end = None 304 while date_begin < datetime.datetime.now(): 305 date_end = date_begin + datetime.timedelta(days=7) 306 formatted_date_begin = date_begin.strftime("%Y-%m-%dT%H:%M:%S") 307 formatted_date_end = date_end.strftime("%Y-%m-%dT%H:%M:%S") 308 variables = { 309 "query": f"type:pr is:merged merged:{formatted_date_begin}..{formatted_date_end} org:llvm", 310 } 311 has_next_page = True 312 while has_next_page: 313 print(variables) 314 res_header, res_data = gh._Github__requester.graphql_query( 315 query=query, variables=variables 316 ) 317 data = res_data["data"] 318 for pr in data["search"]["nodes"]: 319 # Users can be None if the user has been deleted. 320 if not pr["author"]: 321 continue 322 author = pr["author"]["login"] 323 if author in triage_list: 324 triage_list[author].add_authored() 325 326 if not pr["mergedBy"]: 327 continue 328 merger = pr["mergedBy"]["login"] 329 if author == merger: 330 continue 331 if merger not in triage_list: 332 continue 333 triage_list[merger].add_merged() 334 335 has_next_page = data["search"]["pageInfo"]["hasNextPage"] 336 if has_next_page: 337 variables["after"] = data["search"]["pageInfo"]["endCursor"] 338 date_begin = date_end 339 340 341def main(): 342 token = sys.argv[1] 343 gh = github.Github(login_or_token=token) 344 org = gh.get_organization("llvm") 345 repo = org.get_repo("llvm-project") 346 one_year_ago = datetime.datetime.now() - datetime.timedelta(days=365) 347 triage_list = {} 348 for collaborator in repo.get_collaborators(permission="push"): 349 triage_list[collaborator.login] = User(collaborator.login, triage_list) 350 351 print("Start:", len(triage_list), "triagers") 352 # Step 0 Check if users have requested commit access in the last year. 353 for user in check_manual_requests(gh, one_year_ago): 354 if user in triage_list: 355 print(user, "requested commit access in the last year.") 356 del triage_list[user] 357 print("After Request Check:", len(triage_list), "triagers") 358 359 # Step 1 count all PRs authored or merged 360 count_prs(gh, triage_list, one_year_ago) 361 362 print("After PRs:", len(triage_list), "triagers") 363 364 if len(triage_list) == 0: 365 sys.exit(0) 366 367 # Step 2 check for reviews 368 for user in list(triage_list.keys()): 369 review_count = get_review_count(gh, user, one_year_ago) 370 triage_list[user].add_reviewed(review_count) 371 372 print("After Reviews:", len(triage_list), "triagers") 373 374 if len(triage_list) == 0: 375 sys.exit(0) 376 377 # Step 3 check for number of commits 378 for user in list(triage_list.keys()): 379 num_commits = get_num_commits(gh, user, one_year_ago) 380 # Override the total number of commits to not double count commits and 381 # authored PRs. 382 triage_list[user].set_authored(num_commits) 383 384 print("After Commits:", len(triage_list), "triagers") 385 386 # Step 4 check for new committers 387 for user in list(triage_list.keys()): 388 print("Checking", user) 389 if is_new_committer(gh, user, one_year_ago): 390 print("Removing new committer: ", user) 391 del triage_list[user] 392 393 print("Complete:", len(triage_list), "triagers") 394 395 with open("triagers.log", "w") as triagers_log: 396 for user in triage_list: 397 print(triage_list[user].__repr__()) 398 triagers_log.write(user + "\n") 399 400 401if __name__ == "__main__": 402 main() 403