1#!/usr/bin/env python3 2# ===----------------------------------------------------------------------===## 3# 4# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5# See https://llvm.org/LICENSE.txt for license information. 6# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7# 8# ===----------------------------------------------------------------------===## 9 10from typing import List, Dict, Tuple, Optional 11import copy 12import csv 13import itertools 14import json 15import os 16import pathlib 17import re 18import subprocess 19 20# Number of the 'Libc++ Standards Conformance' project on Github 21LIBCXX_CONFORMANCE_PROJECT = '31' 22 23def extract_between_markers(text: str, begin_marker: str, end_marker: str) -> Optional[str]: 24 """ 25 Given a string containing special markers, extract everything located beetwen these markers. 26 27 If the beginning marker is not found, None is returned. If the beginning marker is found but 28 there is no end marker, it is an error (this is done to avoid silently accepting inputs that 29 are erroneous by mistake). 30 """ 31 start = text.find(begin_marker) 32 if start == -1: 33 return None 34 35 start += len(begin_marker) # skip the marker itself 36 end = text.find(end_marker, start) 37 if end == -1: 38 raise ArgumentError(f"Could not find end marker {end_marker} in: {text[start:]}") 39 40 return text[start:end] 41 42class PaperStatus: 43 TODO = 1 44 IN_PROGRESS = 2 45 PARTIAL = 3 46 DONE = 4 47 NOTHING_TO_DO = 5 48 49 _status: int 50 51 _original: Optional[str] 52 """ 53 Optional string from which the paper status was created. This is used to carry additional 54 information from CSV rows, like any notes associated to the status. 55 """ 56 57 def __init__(self, status: int, original: Optional[str] = None): 58 self._status = status 59 self._original = original 60 61 def __eq__(self, other) -> bool: 62 return self._status == other._status 63 64 def __lt__(self, other) -> bool: 65 relative_order = { 66 PaperStatus.TODO: 0, 67 PaperStatus.IN_PROGRESS: 1, 68 PaperStatus.PARTIAL: 2, 69 PaperStatus.DONE: 3, 70 PaperStatus.NOTHING_TO_DO: 3, 71 } 72 return relative_order[self._status] < relative_order[other._status] 73 74 @staticmethod 75 def from_csv_entry(entry: str): 76 """ 77 Parse a paper status out of a CSV row entry. Entries can look like: 78 - '' (an empty string, which means the paper is not done yet) 79 - '|In Progress|' 80 - '|Partial|' 81 - '|Complete|' 82 - '|Nothing To Do|' 83 """ 84 if entry == '': 85 return PaperStatus(PaperStatus.TODO, entry) 86 elif entry == '|In Progress|': 87 return PaperStatus(PaperStatus.IN_PROGRESS, entry) 88 elif entry == '|Partial|': 89 return PaperStatus(PaperStatus.PARTIAL, entry) 90 elif entry == '|Complete|': 91 return PaperStatus(PaperStatus.DONE, entry) 92 elif entry == '|Nothing To Do|': 93 return PaperStatus(PaperStatus.NOTHING_TO_DO, entry) 94 else: 95 raise RuntimeError(f'Unexpected CSV entry for status: {entry}') 96 97 @staticmethod 98 def from_github_issue(issue: Dict): 99 """ 100 Parse a paper status out of a Github issue obtained from querying a Github project. 101 """ 102 if 'status' not in issue: 103 return PaperStatus(PaperStatus.TODO) 104 elif issue['status'] == 'Todo': 105 return PaperStatus(PaperStatus.TODO) 106 elif issue['status'] == 'In Progress': 107 return PaperStatus(PaperStatus.IN_PROGRESS) 108 elif issue['status'] == 'Partial': 109 return PaperStatus(PaperStatus.PARTIAL) 110 elif issue['status'] == 'Done': 111 return PaperStatus(PaperStatus.DONE) 112 elif issue['status'] == 'Nothing To Do': 113 return PaperStatus(PaperStatus.NOTHING_TO_DO) 114 else: 115 raise RuntimeError(f"Received unrecognizable Github issue status: {issue['status']}") 116 117 def to_csv_entry(self) -> str: 118 """ 119 Return the issue state formatted for a CSV entry. The status is formatted as '|Complete|', 120 '|In Progress|', etc. 121 """ 122 mapping = { 123 PaperStatus.TODO: '', 124 PaperStatus.IN_PROGRESS: '|In Progress|', 125 PaperStatus.PARTIAL: '|Partial|', 126 PaperStatus.DONE: '|Complete|', 127 PaperStatus.NOTHING_TO_DO: '|Nothing To Do|', 128 } 129 return self._original if self._original is not None else mapping[self._status] 130 131class PaperInfo: 132 paper_number: str 133 """ 134 Identifier for the paper or the LWG issue. This must be something like 'PnnnnRx', 'Nxxxxx' or 'LWGxxxxx'. 135 """ 136 137 paper_name: str 138 """ 139 Plain text string representing the name of the paper. 140 """ 141 142 status: PaperStatus 143 """ 144 Status of the paper/issue. This can be complete, in progress, partial, or done. 145 """ 146 147 meeting: Optional[str] 148 """ 149 Plain text string representing the meeting at which the paper/issue was voted. 150 """ 151 152 first_released_version: Optional[str] 153 """ 154 First version of LLVM in which this paper/issue was resolved. 155 """ 156 157 notes: Optional[str] 158 """ 159 Optional plain text string representing notes to associate to the paper. 160 This is used to populate the "Notes" column in the CSV status pages. 161 """ 162 163 original: Optional[object] 164 """ 165 Object from which this PaperInfo originated. This is used to track the CSV row or Github issue that 166 was used to generate this PaperInfo and is useful for error reporting purposes. 167 """ 168 169 def __init__(self, paper_number: str, paper_name: str, 170 status: PaperStatus, 171 meeting: Optional[str] = None, 172 first_released_version: Optional[str] = None, 173 notes: Optional[str] = None, 174 original: Optional[object] = None): 175 self.paper_number = paper_number 176 self.paper_name = paper_name 177 self.status = status 178 self.meeting = meeting 179 self.first_released_version = first_released_version 180 self.notes = notes 181 self.original = original 182 183 def for_printing(self) -> Tuple[str, str, str, str, str, str]: 184 return ( 185 f'`{self.paper_number} <https://wg21.link/{self.paper_number}>`__', 186 self.paper_name, 187 self.meeting if self.meeting is not None else '', 188 self.status.to_csv_entry(), 189 self.first_released_version if self.first_released_version is not None else '', 190 self.notes if self.notes is not None else '', 191 ) 192 193 def __repr__(self) -> str: 194 return repr(self.original) if self.original is not None else repr(self.for_printing()) 195 196 @staticmethod 197 def from_csv_row(row: Tuple[str, str, str, str, str, str]):# -> PaperInfo: 198 """ 199 Given a row from one of our status-tracking CSV files, create a PaperInfo object representing that row. 200 """ 201 # Extract the paper number from the first column 202 match = re.search(r"((P[0-9R]+)|(LWG[0-9]+)|(N[0-9]+))\s+", row[0]) 203 if match is None: 204 raise RuntimeError(f"Can't parse paper/issue number out of row: {row}") 205 206 return PaperInfo( 207 paper_number=match.group(1), 208 paper_name=row[1], 209 status=PaperStatus.from_csv_entry(row[3]), 210 meeting=row[2] or None, 211 first_released_version=row[4] or None, 212 notes=row[5] or None, 213 original=row, 214 ) 215 216 @staticmethod 217 def from_github_issue(issue: Dict):# -> PaperInfo: 218 """ 219 Create a PaperInfo object from the Github issue information obtained from querying a Github Project. 220 """ 221 # Extract the paper number from the issue title 222 match = re.search(r"((P[0-9R]+)|(LWG[0-9]+)|(N[0-9]+)):", issue['title']) 223 if match is None: 224 raise RuntimeError(f"Issue doesn't have a title that we know how to parse: {issue}") 225 paper = match.group(1) 226 227 # Extract any notes from the Github issue and populate the RST notes with them 228 issue_description = issue['content']['body'] 229 notes = extract_between_markers(issue_description, 'BEGIN-RST-NOTES', 'END-RST-NOTES') 230 notes = notes.strip() if notes is not None else notes 231 232 return PaperInfo( 233 paper_number=paper, 234 paper_name=issue['title'], 235 status=PaperStatus.from_github_issue(issue), 236 meeting=issue.get('meeting Voted', None), 237 first_released_version=None, # TODO 238 notes=notes, 239 original=issue, 240 ) 241 242def merge(paper: PaperInfo, gh: PaperInfo) -> PaperInfo: 243 """ 244 Merge a paper coming from a CSV row with a corresponding Github-tracked paper. 245 246 If the CSV row has a status that is "less advanced" than the Github issue, simply update the CSV 247 row with the newer status. Otherwise, report an error if they have a different status because 248 something must be wrong. 249 250 We don't update issues from 'To Do' to 'In Progress', since that only creates churn and the 251 status files aim to document user-facing functionality in releases, for which 'In Progress' 252 is not useful. 253 254 In case we don't update the CSV row's status, we still take any updated notes coming 255 from the Github issue. 256 """ 257 if paper.status == PaperStatus(PaperStatus.TODO) and gh.status == PaperStatus(PaperStatus.IN_PROGRESS): 258 result = copy.deepcopy(paper) 259 result.notes = gh.notes 260 elif paper.status < gh.status: 261 result = copy.deepcopy(gh) 262 elif paper.status == gh.status: 263 result = copy.deepcopy(paper) 264 result.notes = gh.notes 265 else: 266 print(f"We found a CSV row and a Github issue with different statuses:\nrow: {paper}\nGithub issue: {gh}") 267 result = copy.deepcopy(paper) 268 return result 269 270def load_csv(file: pathlib.Path) -> List[Tuple]: 271 rows = [] 272 with open(file, newline='') as f: 273 reader = csv.reader(f, delimiter=',') 274 for row in reader: 275 rows.append(row) 276 return rows 277 278def write_csv(output: pathlib.Path, rows: List[Tuple]): 279 with open(output, 'w', newline='') as f: 280 writer = csv.writer(f, quoting=csv.QUOTE_ALL, lineterminator='\n') 281 for row in rows: 282 writer.writerow(row) 283 284def create_github_issue(paper: PaperInfo, labels: List[str]) -> None: 285 """ 286 Create a new Github issue representing the given PaperInfo. 287 """ 288 paper_name = paper.paper_name.replace('``', '`').replace('\\', '') 289 290 create_cli = ['gh', 'issue', 'create', '--repo', 'llvm/llvm-project', 291 '--title', f'{paper.paper_number}: {paper_name}', 292 '--body', f'**Link:** https://wg21.link/{paper.paper_number}', 293 '--project', 'libc++ Standards Conformance', 294 '--label', 'libc++'] 295 296 for label in labels: 297 create_cli += ['--label', label] 298 299 print("Do you want to create the following issue?") 300 print(create_cli) 301 answer = input("y/n: ") 302 if answer == 'n': 303 print("Not creating issue") 304 return 305 elif answer != 'y': 306 print(f"Invalid answer {answer}, skipping") 307 return 308 309 print("Creating issue") 310 issue_link = subprocess.check_output(create_cli).decode().strip() 311 print(f"Created tracking issue for {paper.paper_number}: {issue_link}") 312 313 # Retrieve the "Github project item ID" by re-adding the issue to the project again, 314 # even though we created it inside the project in the first place. 315 item_add_cli = ['gh', 'project', 'item-add', LIBCXX_CONFORMANCE_PROJECT, '--owner', 'llvm', '--url', issue_link, '--format', 'json'] 316 item = json.loads(subprocess.check_output(item_add_cli).decode().strip()) 317 318 # Then, adjust the 'Meeting Voted' field of that item. 319 meeting_voted_cli = ['gh', 'project', 'item-edit', 320 '--project-id', 'PVT_kwDOAQWwKc4AlOgt', 321 '--field-id', 'PVTF_lADOAQWwKc4AlOgtzgdUEXI', '--text', paper.meeting, 322 '--id', item['id']] 323 subprocess.check_call(meeting_voted_cli) 324 325 # And also adjust the 'Status' field of the item to 'To Do'. 326 status_cli = ['gh', 'project', 'item-edit', 327 '--project-id', 'PVT_kwDOAQWwKc4AlOgt', 328 '--field-id', 'PVTSSF_lADOAQWwKc4AlOgtzgdUBak', '--single-select-option-id', 'f75ad846', 329 '--id', item['id']] 330 subprocess.check_call(status_cli) 331 332def sync_csv(rows: List[Tuple], from_github: List[PaperInfo], create_new: bool, labels: List[str] = None) -> List[Tuple]: 333 """ 334 Given a list of CSV rows representing an existing status file and a list of PaperInfos representing 335 up-to-date (but potentially incomplete) tracking information from Github, this function returns the 336 new CSV rows synchronized with the up-to-date information. 337 338 If `create_new` is True and a paper from the CSV file is not tracked on Github yet, this also prompts 339 to create a new issue on Github for tracking it. In that case the created issue is tagged with the 340 provided labels. 341 342 Note that this only tracks changes from 'not implemented' issues to 'implemented'. If an up-to-date 343 PaperInfo reports that a paper is not implemented but the existing CSV rows report it as implemented, 344 it is an error (i.e. the result is not a CSV row where the paper is *not* implemented). 345 """ 346 results = [rows[0]] # Start with the header 347 for row in rows[1:]: # Skip the header 348 # If the row contains empty entries, this is a "separator row" between meetings. 349 # Preserve it as-is. 350 if row[0] == "": 351 results.append(row) 352 continue 353 354 paper = PaperInfo.from_csv_row(row) 355 356 # Find any Github issues tracking this paper. Each row must have one and exactly one Github 357 # issue tracking it, which we validate below. 358 tracking = [gh for gh in from_github if paper.paper_number == gh.paper_number] 359 360 # If there's more than one tracking issue, something is weird. 361 if len(tracking) > 1: 362 print(f"Found a row with more than one tracking issue: {row}\ntracked by: {tracking}") 363 results.append(row) 364 continue 365 366 # If there is no tracking issue for that row and we are creating new issues, do that. 367 # Otherwise just log that we're missing an issue. 368 if len(tracking) == 0: 369 if create_new: 370 assert labels is not None, "Missing labels when creating new Github issues" 371 create_github_issue(paper, labels=labels) 372 else: 373 print(f"Can't find any Github issue for CSV row: {row}") 374 results.append(row) 375 continue 376 377 results.append(merge(paper, tracking[0]).for_printing()) 378 379 return results 380 381CSV_FILES_TO_SYNC = { 382 'Cxx17Issues.csv': ['c++17', 'lwg-issue'], 383 'Cxx17Papers.csv': ['c++17', 'wg21 paper'], 384 'Cxx20Issues.csv': ['c++20', 'lwg-issue'], 385 'Cxx20Papers.csv': ['c++20', 'wg21 paper'], 386 'Cxx23Issues.csv': ['c++23', 'lwg-issue'], 387 'Cxx23Papers.csv': ['c++23', 'wg21 paper'], 388 'Cxx2cIssues.csv': ['c++26', 'lwg-issue'], 389 'Cxx2cPapers.csv': ['c++26', 'wg21 paper'], 390} 391 392def main(argv): 393 import argparse 394 parser = argparse.ArgumentParser(prog='synchronize-status-files', 395 description='Synchronize the libc++ conformance status files with Github issues') 396 parser.add_argument('--validate-only', action='store_true', 397 help="Only perform the data validation of CSV files.") 398 parser.add_argument('--create-new', action='store_true', 399 help="Create new Github issues for CSV rows that do not correspond to any existing Github issue.") 400 parser.add_argument('--load-github-from', type=str, 401 help="A json file to load the Github project information from instead of querying the API. This is useful for testing to avoid rate limiting.") 402 args = parser.parse_args(argv) 403 404 libcxx_root = pathlib.Path(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 405 406 # Perform data validation for all the CSV files. 407 print("Performing data validation of the CSV files") 408 for filename in CSV_FILES_TO_SYNC: 409 csv = load_csv(libcxx_root / 'docs' / 'Status' / filename) 410 for row in csv[1:]: # Skip the header 411 if row[0] != "": # Skip separator rows 412 PaperInfo.from_csv_row(row) 413 414 if args.validate_only: 415 return 416 417 # Load all the Github issues tracking papers from Github. 418 if args.load_github_from: 419 print(f"Loading all issues from {args.load_github_from}") 420 with open(args.load_github_from, 'r') as f: 421 project_info = json.load(f) 422 else: 423 print("Loading all issues from Github") 424 gh_command_line = ['gh', 'project', 'item-list', LIBCXX_CONFORMANCE_PROJECT, '--owner', 'llvm', '--format', 'json', '--limit', '9999999'] 425 project_info = json.loads(subprocess.check_output(gh_command_line)) 426 from_github = [PaperInfo.from_github_issue(i) for i in project_info['items']] 427 428 # Synchronize CSV files with the Github issues. 429 for (filename, labels) in CSV_FILES_TO_SYNC.items(): 430 print(f"Synchronizing {filename} with Github issues") 431 file = libcxx_root / 'docs' / 'Status' / filename 432 csv = load_csv(file) 433 synced = sync_csv(csv, from_github, create_new=args.create_new, labels=labels) 434 write_csv(file, synced) 435 436if __name__ == '__main__': 437 import sys 438 main(sys.argv[1:]) 439