xref: /llvm-project/libcxx/utils/synchronize_csv_status_files.py (revision f0a3f8a370e3c85ee00cbc5e5d1c29e8ad3c51da)
1#!/usr/bin/env python3
2# ===----------------------------------------------------------------------===##
3#
4# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5# See https://llvm.org/LICENSE.txt for license information.
6# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7#
8# ===----------------------------------------------------------------------===##
9
10from typing import List, Dict, Tuple, Optional
11import csv
12import itertools
13import json
14import os
15import pathlib
16import re
17import subprocess
18
19# Number of the 'Libc++ Standards Conformance' project on Github
20LIBCXX_CONFORMANCE_PROJECT = '31'
21
22class PaperInfo:
23    paper_number: str
24    """
25    Identifier for the paper or the LWG issue. This must be something like 'PnnnnRx', 'Nxxxxx' or 'LWGxxxxx'.
26    """
27
28    paper_name: str
29    """
30    Plain text string representing the name of the paper.
31    """
32
33    meeting: Optional[str]
34    """
35    Plain text string representing the meeting at which the paper/issue was voted.
36    """
37
38    status: Optional[str]
39    """
40    Status of the paper/issue. This must be '|Complete|', '|Nothing To Do|', '|In Progress|',
41    '|Partial|' or 'Resolved by <something>'.
42    """
43
44    first_released_version: Optional[str]
45    """
46    First version of LLVM in which this paper/issue was resolved.
47    """
48
49    labels: Optional[List[str]]
50    """
51    List of labels to associate to the issue in the status-tracking table. Supported labels are
52    'format', 'ranges', 'spaceship', 'flat_containers', 'concurrency TS' and 'DR'.
53    """
54
55    original: Optional[object]
56    """
57    Object from which this PaperInfo originated. This is used to track the CSV row or Github issue that
58    was used to generate this PaperInfo and is useful for error reporting purposes.
59    """
60
61    def __init__(self, paper_number: str, paper_name: str,
62                       meeting: Optional[str] = None,
63                       status: Optional[str] = None,
64                       first_released_version: Optional[str] = None,
65                       labels: Optional[List[str]] = None,
66                       original: Optional[object] = None):
67        self.paper_number = paper_number
68        self.paper_name = paper_name
69        self.meeting = meeting
70        self.status = status
71        self.first_released_version = first_released_version
72        self.labels = labels
73        self.original = original
74
75    def for_printing(self) -> Tuple[str, str, str, str, str, str]:
76        return (
77            f'`{self.paper_number} <https://wg21.link/{self.paper_number}>`__',
78            self.paper_name,
79            self.meeting if self.meeting is not None else '',
80            self.status if self.status is not None else '',
81            self.first_released_version if self.first_released_version is not None else '',
82            ' '.join(f'|{label}|' for label in self.labels) if self.labels is not None else '',
83        )
84
85    def __repr__(self) -> str:
86        return repr(self.original) if self.original is not None else repr(self.for_printing())
87
88    def is_implemented(self) -> bool:
89        if self.status is None:
90            return False
91        if re.search(r'(in progress|partial)', self.status.lower()):
92            return False
93        return True
94
95    @staticmethod
96    def from_csv_row(row: Tuple[str, str, str, str, str, str]):# -> PaperInfo:
97        """
98        Given a row from one of our status-tracking CSV files, create a PaperInfo object representing that row.
99        """
100        # Extract the paper number from the first column
101        match = re.search(r"((P[0-9R]+)|(LWG[0-9]+)|(N[0-9]+))\s+", row[0])
102        if match is None:
103            raise RuntimeError(f"Can't parse paper/issue number out of row: {row}")
104
105        return PaperInfo(
106            paper_number=match.group(1),
107            paper_name=row[1],
108            meeting=row[2] or None,
109            status=row[3] or None,
110            first_released_version=row[4] or None,
111            labels=[l.strip('|') for l in row[5].split(' ') if l] or None,
112            original=row,
113        )
114
115    @staticmethod
116    def from_github_issue(issue: Dict):# -> PaperInfo:
117        """
118        Create a PaperInfo object from the Github issue information obtained from querying a Github Project.
119        """
120        # Extract the paper number from the issue title
121        match = re.search(r"((P[0-9R]+)|(LWG[0-9]+)|(N[0-9]+)):", issue['title'])
122        if match is None:
123            raise RuntimeError(f"Issue doesn't have a title that we know how to parse: {issue}")
124        paper = match.group(1)
125
126        # Figure out the status of the paper according to the Github project information.
127        #
128        # Sadly, we can't make a finer-grained distiction about *how* the issue
129        # was closed (such as Nothing To Do or similar).
130        status = '|Complete|' if 'status' in issue and issue['status'] == 'Done' else None
131
132        # Handle labels
133        valid_labels = ('format', 'ranges', 'spaceship', 'flat_containers', 'concurrency TS', 'DR')
134        labels = [label for label in issue['labels'] if label in valid_labels]
135
136        return PaperInfo(
137            paper_number=paper,
138            paper_name=issue['title'],
139            meeting=issue.get('meeting Voted', None),
140            status=status,
141            first_released_version=None, # TODO
142            labels=labels if labels else None,
143            original=issue,
144        )
145
146def load_csv(file: pathlib.Path) -> List[Tuple]:
147    rows = []
148    with open(file, newline='') as f:
149        reader = csv.reader(f, delimiter=',')
150        for row in reader:
151            rows.append(row)
152    return rows
153
154def write_csv(output: pathlib.Path, rows: List[Tuple]):
155    with open(output, 'w', newline='') as f:
156        writer = csv.writer(f, quoting=csv.QUOTE_ALL, lineterminator='\n')
157        for row in rows:
158            writer.writerow(row)
159
160def sync_csv(rows: List[Tuple], from_github: List[PaperInfo]) -> List[Tuple]:
161    """
162    Given a list of CSV rows representing an existing status file and a list of PaperInfos representing
163    up-to-date (but potentially incomplete) tracking information from Github, this function returns the
164    new CSV rows synchronized with the up-to-date information.
165
166    Note that this only tracks changes from 'not implemented' issues to 'implemented'. If an up-to-date
167    PaperInfo reports that a paper is not implemented but the existing CSV rows report it as implemented,
168    it is an error (i.e. the result is not a CSV row where the paper is *not* implemented).
169    """
170    results = [rows[0]] # Start with the header
171    for row in rows[1:]: # Skip the header
172        # If the row contains empty entries, this is a "separator row" between meetings.
173        # Preserve it as-is.
174        if row[0] == "":
175            results.append(row)
176            continue
177
178        paper = PaperInfo.from_csv_row(row)
179
180        # If the row is already implemented, basically keep it unchanged but also validate that we're not
181        # out-of-sync with any still-open Github issue tracking the same paper.
182        if paper.is_implemented():
183            dangling = [gh for gh in from_github if gh.paper_number == paper.paper_number and not gh.is_implemented()]
184            if dangling:
185                print(f"We found the following open tracking issues for a row which is already marked as implemented:\nrow: {row}\ntracking issues: {dangling}")
186                print("The Github issue should be closed if the work has indeed been done.")
187            results.append(paper.for_printing())
188        else:
189            # Find any Github issues tracking this paper
190            tracking = [gh for gh in from_github if paper.paper_number == gh.paper_number]
191
192            # If there is no tracking issue for that row in the CSV, this is an error since we're
193            # missing a Github issue.
194            if not tracking:
195                raise RuntimeError(f"Can't find any Github issue for CSV row which isn't marked as done yet: {row}")
196
197            # If there's more than one tracking issue, something is weird too.
198            if len(tracking) > 1:
199                raise RuntimeError(f"Found a row with more than one tracking issue: {row}\ntracked by: {tracking}")
200
201            # If the issue is closed, synchronize the row based on the Github issue. Otherwise, use the
202            # existing CSV row as-is.
203            results.append(tracking[0].for_printing() if tracking[0].is_implemented() else row)
204
205    return results
206
207CSV_FILES_TO_SYNC = [
208    'Cxx17Issues.csv',
209    'Cxx17Papers.csv',
210    'Cxx20Issues.csv',
211    'Cxx20Papers.csv',
212    'Cxx23Issues.csv',
213    'Cxx23Papers.csv',
214    'Cxx2cIssues.csv',
215    'Cxx2cPapers.csv',
216]
217
218def main():
219    libcxx_root = pathlib.Path(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
220
221    # Extract the list of PaperInfos from issues we're tracking on Github.
222    print("Loading all issues from Github")
223    gh_command_line = ['gh', 'project', 'item-list', LIBCXX_CONFORMANCE_PROJECT, '--owner', 'llvm', '--format', 'json', '--limit', '9999999']
224    project_info = json.loads(subprocess.check_output(gh_command_line))
225    from_github = [PaperInfo.from_github_issue(i) for i in project_info['items']]
226
227    for filename in CSV_FILES_TO_SYNC:
228        print(f"Synchronizing {filename} with Github issues")
229        file = libcxx_root / 'docs' / 'Status' / filename
230        csv = load_csv(file)
231        synced = sync_csv(csv, from_github)
232        write_csv(file, synced)
233
234if __name__ == '__main__':
235    main()
236