xref: /llvm-project/libcxx/utils/synchronize_csv_status_files.py (revision 1b03747ed85cd4a6573b728674e88f4bd3fa844d)
1#!/usr/bin/env python3
2# ===----------------------------------------------------------------------===##
3#
4# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5# See https://llvm.org/LICENSE.txt for license information.
6# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7#
8# ===----------------------------------------------------------------------===##
9
10from typing import List, Dict, Tuple, Optional
11import copy
12import csv
13import itertools
14import json
15import os
16import pathlib
17import re
18import subprocess
19
20# Number of the 'Libc++ Standards Conformance' project on Github
21LIBCXX_CONFORMANCE_PROJECT = '31'
22
23def extract_between_markers(text: str, begin_marker: str, end_marker: str) -> Optional[str]:
24    """
25    Given a string containing special markers, extract everything located beetwen these markers.
26
27    If the beginning marker is not found, None is returned. If the beginning marker is found but
28    there is no end marker, it is an error (this is done to avoid silently accepting inputs that
29    are erroneous by mistake).
30    """
31    start = text.find(begin_marker)
32    if start == -1:
33        return None
34
35    start += len(begin_marker) # skip the marker itself
36    end = text.find(end_marker, start)
37    if end == -1:
38        raise ArgumentError(f"Could not find end marker {end_marker} in: {text[start:]}")
39
40    return text[start:end]
41
42class PaperStatus:
43    TODO = 1
44    IN_PROGRESS = 2
45    PARTIAL = 3
46    DONE = 4
47    NOTHING_TO_DO = 5
48
49    _status: int
50
51    _original: Optional[str]
52    """
53    Optional string from which the paper status was created. This is used to carry additional
54    information from CSV rows, like any notes associated to the status.
55    """
56
57    def __init__(self, status: int, original: Optional[str] = None):
58        self._status = status
59        self._original = original
60
61    def __eq__(self, other) -> bool:
62        return self._status == other._status
63
64    def __lt__(self, other) -> bool:
65        relative_order = {
66            PaperStatus.TODO: 0,
67            PaperStatus.IN_PROGRESS: 1,
68            PaperStatus.PARTIAL: 2,
69            PaperStatus.DONE: 3,
70            PaperStatus.NOTHING_TO_DO: 3,
71        }
72        return relative_order[self._status] < relative_order[other._status]
73
74    @staticmethod
75    def from_csv_entry(entry: str):
76        """
77        Parse a paper status out of a CSV row entry. Entries can look like:
78        - '' (an empty string, which means the paper is not done yet)
79        - '|In Progress|'
80        - '|Partial|'
81        - '|Complete|'
82        - '|Nothing To Do|'
83        """
84        if entry == '':
85            return PaperStatus(PaperStatus.TODO, entry)
86        elif entry == '|In Progress|':
87            return PaperStatus(PaperStatus.IN_PROGRESS, entry)
88        elif entry == '|Partial|':
89            return PaperStatus(PaperStatus.PARTIAL, entry)
90        elif entry == '|Complete|':
91            return PaperStatus(PaperStatus.DONE, entry)
92        elif entry == '|Nothing To Do|':
93            return PaperStatus(PaperStatus.NOTHING_TO_DO, entry)
94        else:
95            raise RuntimeError(f'Unexpected CSV entry for status: {entry}')
96
97    @staticmethod
98    def from_github_issue(issue: Dict):
99        """
100        Parse a paper status out of a Github issue obtained from querying a Github project.
101        """
102        if 'status' not in issue:
103            return PaperStatus(PaperStatus.TODO)
104        elif issue['status'] == 'Todo':
105            return PaperStatus(PaperStatus.TODO)
106        elif issue['status'] == 'In Progress':
107            return PaperStatus(PaperStatus.IN_PROGRESS)
108        elif issue['status'] == 'Partial':
109            return PaperStatus(PaperStatus.PARTIAL)
110        elif issue['status'] == 'Done':
111            return PaperStatus(PaperStatus.DONE)
112        elif issue['status'] == 'Nothing To Do':
113            return PaperStatus(PaperStatus.NOTHING_TO_DO)
114        else:
115            raise RuntimeError(f"Received unrecognizable Github issue status: {issue['status']}")
116
117    def to_csv_entry(self) -> str:
118        """
119        Return the issue state formatted for a CSV entry. The status is formatted as '|Complete|',
120        '|In Progress|', etc.
121        """
122        mapping = {
123            PaperStatus.TODO: '',
124            PaperStatus.IN_PROGRESS: '|In Progress|',
125            PaperStatus.PARTIAL: '|Partial|',
126            PaperStatus.DONE: '|Complete|',
127            PaperStatus.NOTHING_TO_DO: '|Nothing To Do|',
128        }
129        return self._original if self._original is not None else mapping[self._status]
130
131class PaperInfo:
132    paper_number: str
133    """
134    Identifier for the paper or the LWG issue. This must be something like 'PnnnnRx', 'Nxxxxx' or 'LWGxxxxx'.
135    """
136
137    paper_name: str
138    """
139    Plain text string representing the name of the paper.
140    """
141
142    status: PaperStatus
143    """
144    Status of the paper/issue. This can be complete, in progress, partial, or done.
145    """
146
147    meeting: Optional[str]
148    """
149    Plain text string representing the meeting at which the paper/issue was voted.
150    """
151
152    first_released_version: Optional[str]
153    """
154    First version of LLVM in which this paper/issue was resolved.
155    """
156
157    notes: Optional[str]
158    """
159    Optional plain text string representing notes to associate to the paper.
160    This is used to populate the "Notes" column in the CSV status pages.
161    """
162
163    original: Optional[object]
164    """
165    Object from which this PaperInfo originated. This is used to track the CSV row or Github issue that
166    was used to generate this PaperInfo and is useful for error reporting purposes.
167    """
168
169    def __init__(self, paper_number: str, paper_name: str,
170                       status: PaperStatus,
171                       meeting: Optional[str] = None,
172                       first_released_version: Optional[str] = None,
173                       notes: Optional[str] = None,
174                       original: Optional[object] = None):
175        self.paper_number = paper_number
176        self.paper_name = paper_name
177        self.status = status
178        self.meeting = meeting
179        self.first_released_version = first_released_version
180        self.notes = notes
181        self.original = original
182
183    def for_printing(self) -> Tuple[str, str, str, str, str, str]:
184        return (
185            f'`{self.paper_number} <https://wg21.link/{self.paper_number}>`__',
186            self.paper_name,
187            self.meeting if self.meeting is not None else '',
188            self.status.to_csv_entry(),
189            self.first_released_version if self.first_released_version is not None else '',
190            self.notes if self.notes is not None else '',
191        )
192
193    def __repr__(self) -> str:
194        return repr(self.original) if self.original is not None else repr(self.for_printing())
195
196    @staticmethod
197    def from_csv_row(row: Tuple[str, str, str, str, str, str]):# -> PaperInfo:
198        """
199        Given a row from one of our status-tracking CSV files, create a PaperInfo object representing that row.
200        """
201        # Extract the paper number from the first column
202        match = re.search(r"((P[0-9R]+)|(LWG[0-9]+)|(N[0-9]+))\s+", row[0])
203        if match is None:
204            raise RuntimeError(f"Can't parse paper/issue number out of row: {row}")
205
206        return PaperInfo(
207            paper_number=match.group(1),
208            paper_name=row[1],
209            status=PaperStatus.from_csv_entry(row[3]),
210            meeting=row[2] or None,
211            first_released_version=row[4] or None,
212            notes=row[5] or None,
213            original=row,
214        )
215
216    @staticmethod
217    def from_github_issue(issue: Dict):# -> PaperInfo:
218        """
219        Create a PaperInfo object from the Github issue information obtained from querying a Github Project.
220        """
221        # Extract the paper number from the issue title
222        match = re.search(r"((P[0-9R]+)|(LWG[0-9]+)|(N[0-9]+)):", issue['title'])
223        if match is None:
224            raise RuntimeError(f"Issue doesn't have a title that we know how to parse: {issue}")
225        paper = match.group(1)
226
227        # Extract any notes from the Github issue and populate the RST notes with them
228        issue_description = issue['content']['body']
229        notes = extract_between_markers(issue_description, 'BEGIN-RST-NOTES', 'END-RST-NOTES')
230        notes = notes.strip() if notes is not None else notes
231
232        return PaperInfo(
233            paper_number=paper,
234            paper_name=issue['title'],
235            status=PaperStatus.from_github_issue(issue),
236            meeting=issue.get('meeting Voted', None),
237            first_released_version=None, # TODO
238            notes=notes,
239            original=issue,
240        )
241
242def merge(paper: PaperInfo, gh: PaperInfo) -> PaperInfo:
243    """
244    Merge a paper coming from a CSV row with a corresponding Github-tracked paper.
245
246    If the CSV row has a status that is "less advanced" than the Github issue, simply update the CSV
247    row with the newer status. Otherwise, report an error if they have a different status because
248    something must be wrong.
249
250    We don't update issues from 'To Do' to 'In Progress', since that only creates churn and the
251    status files aim to document user-facing functionality in releases, for which 'In Progress'
252    is not useful.
253
254    In case we don't update the CSV row's status, we still take any updated notes coming
255    from the Github issue.
256    """
257    if paper.status == PaperStatus(PaperStatus.TODO) and gh.status == PaperStatus(PaperStatus.IN_PROGRESS):
258        result = copy.deepcopy(paper)
259        result.notes = gh.notes
260    elif paper.status < gh.status:
261        result = copy.deepcopy(gh)
262    elif paper.status == gh.status:
263        result = copy.deepcopy(paper)
264        result.notes = gh.notes
265    else:
266        print(f"We found a CSV row and a Github issue with different statuses:\nrow: {paper}\nGithub issue: {gh}")
267        result = copy.deepcopy(paper)
268    return result
269
270def load_csv(file: pathlib.Path) -> List[Tuple]:
271    rows = []
272    with open(file, newline='') as f:
273        reader = csv.reader(f, delimiter=',')
274        for row in reader:
275            rows.append(row)
276    return rows
277
278def write_csv(output: pathlib.Path, rows: List[Tuple]):
279    with open(output, 'w', newline='') as f:
280        writer = csv.writer(f, quoting=csv.QUOTE_ALL, lineterminator='\n')
281        for row in rows:
282            writer.writerow(row)
283
284def create_github_issue(paper: PaperInfo, labels: List[str]) -> None:
285    """
286    Create a new Github issue representing the given PaperInfo.
287    """
288    paper_name = paper.paper_name.replace('``', '`').replace('\\', '')
289
290    create_cli = ['gh', 'issue', 'create', '--repo', 'llvm/llvm-project',
291                    '--title', f'{paper.paper_number}: {paper_name}',
292                    '--body', f'**Link:** https://wg21.link/{paper.paper_number}',
293                    '--project', 'libc++ Standards Conformance',
294                    '--label', 'libc++']
295
296    for label in labels:
297        create_cli += ['--label', label]
298
299    print("Do you want to create the following issue?")
300    print(create_cli)
301    answer = input("y/n: ")
302    if answer == 'n':
303        print("Not creating issue")
304        return
305    elif answer != 'y':
306        print(f"Invalid answer {answer}, skipping")
307        return
308
309    print("Creating issue")
310    issue_link = subprocess.check_output(create_cli).decode().strip()
311    print(f"Created tracking issue for {paper.paper_number}: {issue_link}")
312
313    # Retrieve the "Github project item ID" by re-adding the issue to the project again,
314    # even though we created it inside the project in the first place.
315    item_add_cli = ['gh', 'project', 'item-add', LIBCXX_CONFORMANCE_PROJECT, '--owner', 'llvm', '--url', issue_link, '--format', 'json']
316    item = json.loads(subprocess.check_output(item_add_cli).decode().strip())
317
318    # Then, adjust the 'Meeting Voted' field of that item.
319    meeting_voted_cli = ['gh', 'project', 'item-edit',
320                                '--project-id', 'PVT_kwDOAQWwKc4AlOgt',
321                                '--field-id', 'PVTF_lADOAQWwKc4AlOgtzgdUEXI', '--text', paper.meeting,
322                                '--id', item['id']]
323    subprocess.check_call(meeting_voted_cli)
324
325    # And also adjust the 'Status' field of the item to 'To Do'.
326    status_cli = ['gh', 'project', 'item-edit',
327                                '--project-id', 'PVT_kwDOAQWwKc4AlOgt',
328                                '--field-id', 'PVTSSF_lADOAQWwKc4AlOgtzgdUBak', '--single-select-option-id', 'f75ad846',
329                                '--id', item['id']]
330    subprocess.check_call(status_cli)
331
332def sync_csv(rows: List[Tuple], from_github: List[PaperInfo], create_new: bool, labels: List[str] = None) -> List[Tuple]:
333    """
334    Given a list of CSV rows representing an existing status file and a list of PaperInfos representing
335    up-to-date (but potentially incomplete) tracking information from Github, this function returns the
336    new CSV rows synchronized with the up-to-date information.
337
338    If `create_new` is True and a paper from the CSV file is not tracked on Github yet, this also prompts
339    to create a new issue on Github for tracking it. In that case the created issue is tagged with the
340    provided labels.
341
342    Note that this only tracks changes from 'not implemented' issues to 'implemented'. If an up-to-date
343    PaperInfo reports that a paper is not implemented but the existing CSV rows report it as implemented,
344    it is an error (i.e. the result is not a CSV row where the paper is *not* implemented).
345    """
346    results = [rows[0]] # Start with the header
347    for row in rows[1:]: # Skip the header
348        # If the row contains empty entries, this is a "separator row" between meetings.
349        # Preserve it as-is.
350        if row[0] == "":
351            results.append(row)
352            continue
353
354        paper = PaperInfo.from_csv_row(row)
355
356        # Find any Github issues tracking this paper. Each row must have one and exactly one Github
357        # issue tracking it, which we validate below.
358        tracking = [gh for gh in from_github if paper.paper_number == gh.paper_number]
359
360        # If there's more than one tracking issue, something is weird.
361        if len(tracking) > 1:
362            print(f"Found a row with more than one tracking issue: {row}\ntracked by: {tracking}")
363            results.append(row)
364            continue
365
366        # If there is no tracking issue for that row and we are creating new issues, do that.
367        # Otherwise just log that we're missing an issue.
368        if len(tracking) == 0:
369            if create_new:
370                assert labels is not None, "Missing labels when creating new Github issues"
371                create_github_issue(paper, labels=labels)
372            else:
373                print(f"Can't find any Github issue for CSV row: {row}")
374            results.append(row)
375            continue
376
377        results.append(merge(paper, tracking[0]).for_printing())
378
379    return results
380
381CSV_FILES_TO_SYNC = {
382    'Cxx17Issues.csv': ['c++17', 'lwg-issue'],
383    'Cxx17Papers.csv': ['c++17', 'wg21 paper'],
384    'Cxx20Issues.csv': ['c++20', 'lwg-issue'],
385    'Cxx20Papers.csv': ['c++20', 'wg21 paper'],
386    'Cxx23Issues.csv': ['c++23', 'lwg-issue'],
387    'Cxx23Papers.csv': ['c++23', 'wg21 paper'],
388    'Cxx2cIssues.csv': ['c++26', 'lwg-issue'],
389    'Cxx2cPapers.csv': ['c++26', 'wg21 paper'],
390}
391
392def main(argv):
393    import argparse
394    parser = argparse.ArgumentParser(prog='synchronize-status-files',
395        description='Synchronize the libc++ conformance status files with Github issues')
396    parser.add_argument('--validate-only', action='store_true',
397        help="Only perform the data validation of CSV files.")
398    parser.add_argument('--create-new', action='store_true',
399        help="Create new Github issues for CSV rows that do not correspond to any existing Github issue.")
400    parser.add_argument('--load-github-from', type=str,
401        help="A json file to load the Github project information from instead of querying the API. This is useful for testing to avoid rate limiting.")
402    args = parser.parse_args(argv)
403
404    libcxx_root = pathlib.Path(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
405
406    # Perform data validation for all the CSV files.
407    print("Performing data validation of the CSV files")
408    for filename in CSV_FILES_TO_SYNC:
409        csv = load_csv(libcxx_root / 'docs' / 'Status' / filename)
410        for row in csv[1:]: # Skip the header
411            if row[0] != "": # Skip separator rows
412                PaperInfo.from_csv_row(row)
413
414    if args.validate_only:
415        return
416
417    # Load all the Github issues tracking papers from Github.
418    if args.load_github_from:
419        print(f"Loading all issues from {args.load_github_from}")
420        with open(args.load_github_from, 'r') as f:
421            project_info = json.load(f)
422    else:
423        print("Loading all issues from Github")
424        gh_command_line = ['gh', 'project', 'item-list', LIBCXX_CONFORMANCE_PROJECT, '--owner', 'llvm', '--format', 'json', '--limit', '9999999']
425        project_info = json.loads(subprocess.check_output(gh_command_line))
426    from_github = [PaperInfo.from_github_issue(i) for i in project_info['items']]
427
428    # Synchronize CSV files with the Github issues.
429    for (filename, labels) in CSV_FILES_TO_SYNC.items():
430        print(f"Synchronizing {filename} with Github issues")
431        file = libcxx_root / 'docs' / 'Status' / filename
432        csv = load_csv(file)
433        synced = sync_csv(csv, from_github, create_new=args.create_new, labels=labels)
434        write_csv(file, synced)
435
436if __name__ == '__main__':
437    import sys
438    main(sys.argv[1:])
439