xref: /llvm-project/llvm/utils/revert_checker.py (revision 0a53f43c0c7e33cde07b24169e8f45db7eba2fea)
181ee4952SGeorge Burgess IV#!/usr/bin/env python3
281ee4952SGeorge Burgess IV# -*- coding: utf-8 -*-
381ee4952SGeorge Burgess IV# ===----------------------------------------------------------------------===##
481ee4952SGeorge Burgess IV#
581ee4952SGeorge Burgess IV# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
681ee4952SGeorge Burgess IV# See https://llvm.org/LICENSE.txt for license information.
781ee4952SGeorge Burgess IV# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
881ee4952SGeorge Burgess IV#
981ee4952SGeorge Burgess IV# ===----------------------------------------------------------------------===##
1081ee4952SGeorge Burgess IV"""Checks for reverts of commits across a given git commit.
1181ee4952SGeorge Burgess IV
1281ee4952SGeorge Burgess IVTo clarify the meaning of 'across' with an example, if we had the following
1381ee4952SGeorge Burgess IVcommit history (where `a -> b` notes that `b` is a direct child of `a`):
1481ee4952SGeorge Burgess IV
1581ee4952SGeorge Burgess IV123abc -> 223abc -> 323abc -> 423abc -> 523abc
1681ee4952SGeorge Burgess IV
1781ee4952SGeorge Burgess IVAnd where 423abc is a revert of 223abc, this revert is considered to be 'across'
1881ee4952SGeorge Burgess IV323abc. More generally, a revert A of a parent commit B is considered to be
1981ee4952SGeorge Burgess IV'across' a commit C if C is a parent of A and B is a parent of C.
2081ee4952SGeorge Burgess IV
2181ee4952SGeorge Burgess IVPlease note that revert detection in general is really difficult, since merge
2281ee4952SGeorge Burgess IVconflicts/etc always introduce _some_ amount of fuzziness. This script just
2381ee4952SGeorge Burgess IVuses a bundle of heuristics, and is bound to ignore / incorrectly flag some
2481ee4952SGeorge Burgess IVreverts. The hope is that it'll easily catch the vast majority (>90%) of them,
2581ee4952SGeorge Burgess IVthough.
2681ee4952SGeorge Burgess IV
2781ee4952SGeorge Burgess IVThis is designed to be used in one of two ways: an import in Python, or run
2881ee4952SGeorge Burgess IVdirectly from a shell. If you want to import this, the `find_reverts`
2981ee4952SGeorge Burgess IVfunction is the thing to look at. If you'd rather use this from a shell, have a
3081ee4952SGeorge Burgess IVusage example:
3181ee4952SGeorge Burgess IV
3281ee4952SGeorge Burgess IV```
3381ee4952SGeorge Burgess IV./revert_checker.py c47f97169 origin/main origin/release/12.x
3481ee4952SGeorge Burgess IV```
3581ee4952SGeorge Burgess IV
3681ee4952SGeorge Burgess IVThis checks for all reverts from the tip of origin/main to c47f97169, which are
3781ee4952SGeorge Burgess IVacross the latter. It then does the same for origin/release/12.x to c47f97169.
3881ee4952SGeorge Burgess IVDuplicate reverts discovered when walking both roots (origin/main and
3981ee4952SGeorge Burgess IVorigin/release/12.x) are deduplicated in output.
4081ee4952SGeorge Burgess IV"""
4181ee4952SGeorge Burgess IV
4281ee4952SGeorge Burgess IVimport argparse
4381ee4952SGeorge Burgess IVimport collections
4481ee4952SGeorge Burgess IVimport logging
4581ee4952SGeorge Burgess IVimport re
4681ee4952SGeorge Burgess IVimport subprocess
4781ee4952SGeorge Burgess IVimport sys
48*0a53f43cSGeorge Burgess IVfrom typing import Dict, Generator, Iterable, List, NamedTuple, Optional, Tuple
4981ee4952SGeorge Burgess IV
50b71edfaaSTobias Hietaassert sys.version_info >= (3, 6), "Only Python 3.6+ is supported."
5181ee4952SGeorge Burgess IV
5281ee4952SGeorge Burgess IV# People are creative with their reverts, and heuristics are a bit difficult.
53*0a53f43cSGeorge Burgess IV# At a glance, most reverts have "This reverts commit ${full_sha}". Many others
54*0a53f43cSGeorge Burgess IV# have `Reverts llvm/llvm-project#${PR_NUMBER}`.
5581ee4952SGeorge Burgess IV#
56*0a53f43cSGeorge Burgess IV# By their powers combined, we should be able to automatically catch something
57*0a53f43cSGeorge Burgess IV# like 80% of reverts with reasonable confidence. At some point, human
58*0a53f43cSGeorge Burgess IV# intervention will always be required (e.g., I saw
59*0a53f43cSGeorge Burgess IV# ```
60*0a53f43cSGeorge Burgess IV# This reverts commit ${commit_sha_1} and
61*0a53f43cSGeorge Burgess IV# also ${commit_sha_2_shorthand}
62*0a53f43cSGeorge Burgess IV# ```
63*0a53f43cSGeorge Burgess IV# during my sample)
64*0a53f43cSGeorge Burgess IV
65*0a53f43cSGeorge Burgess IV_CommitMessageReverts = NamedTuple(
66*0a53f43cSGeorge Burgess IV    "_CommitMessageReverts",
67*0a53f43cSGeorge Burgess IV    [
68*0a53f43cSGeorge Burgess IV        ("potential_shas", List[str]),
69*0a53f43cSGeorge Burgess IV        ("potential_pr_numbers", List[int]),
70*0a53f43cSGeorge Burgess IV    ],
71*0a53f43cSGeorge Burgess IV)
7281ee4952SGeorge Burgess IV
7381ee4952SGeorge Burgess IV
74*0a53f43cSGeorge Burgess IVdef _try_parse_reverts_from_commit_message(
75*0a53f43cSGeorge Burgess IV    commit_message: str,
76*0a53f43cSGeorge Burgess IV) -> _CommitMessageReverts:
77*0a53f43cSGeorge Burgess IV    """Tries to parse revert SHAs and LLVM PR numbers form the commit message.
78*0a53f43cSGeorge Burgess IV
79*0a53f43cSGeorge Burgess IV    Returns:
80*0a53f43cSGeorge Burgess IV        A namedtuple containing:
81*0a53f43cSGeorge Burgess IV        - A list of potentially reverted SHAs
82*0a53f43cSGeorge Burgess IV        - A list of potentially reverted LLVM PR numbers
83*0a53f43cSGeorge Burgess IV    """
8481ee4952SGeorge Burgess IV    if not commit_message:
85*0a53f43cSGeorge Burgess IV        return _CommitMessageReverts([], [])
8681ee4952SGeorge Burgess IV
87*0a53f43cSGeorge Burgess IV    sha_reverts = re.findall(
88*0a53f43cSGeorge Burgess IV        r"This reverts commit ([a-f0-9]{40})\b",
89*0a53f43cSGeorge Burgess IV        commit_message,
90*0a53f43cSGeorge Burgess IV    )
9181ee4952SGeorge Burgess IV
9281ee4952SGeorge Burgess IV    first_line = commit_message.splitlines()[0]
9381ee4952SGeorge Burgess IV    initial_revert = re.match(r'Revert ([a-f0-9]{6,}) "', first_line)
9481ee4952SGeorge Burgess IV    if initial_revert:
95*0a53f43cSGeorge Burgess IV        sha_reverts.append(initial_revert.group(1))
96*0a53f43cSGeorge Burgess IV
97*0a53f43cSGeorge Burgess IV    pr_numbers = [
98*0a53f43cSGeorge Burgess IV        int(x)
99*0a53f43cSGeorge Burgess IV        for x in re.findall(
100*0a53f43cSGeorge Burgess IV            r"Reverts llvm/llvm-project#(\d+)",
101*0a53f43cSGeorge Burgess IV            commit_message,
102*0a53f43cSGeorge Burgess IV        )
103*0a53f43cSGeorge Burgess IV    ]
104*0a53f43cSGeorge Burgess IV
105*0a53f43cSGeorge Burgess IV    return _CommitMessageReverts(
106*0a53f43cSGeorge Burgess IV        potential_shas=sha_reverts,
107*0a53f43cSGeorge Burgess IV        potential_pr_numbers=pr_numbers,
108*0a53f43cSGeorge Burgess IV    )
10981ee4952SGeorge Burgess IV
11081ee4952SGeorge Burgess IV
111*0a53f43cSGeorge Burgess IVdef _stream_stdout(
112*0a53f43cSGeorge Burgess IV    command: List[str], cwd: Optional[str] = None
113*0a53f43cSGeorge Burgess IV) -> Generator[str, None, None]:
11481ee4952SGeorge Burgess IV    with subprocess.Popen(
115*0a53f43cSGeorge Burgess IV        command,
116*0a53f43cSGeorge Burgess IV        cwd=cwd,
117*0a53f43cSGeorge Burgess IV        stdout=subprocess.PIPE,
118*0a53f43cSGeorge Burgess IV        encoding="utf-8",
119*0a53f43cSGeorge Burgess IV        errors="replace",
120b71edfaaSTobias Hieta    ) as p:
12181ee4952SGeorge Burgess IV        assert p.stdout is not None  # for mypy's happiness.
12281ee4952SGeorge Burgess IV        yield from p.stdout
12381ee4952SGeorge Burgess IV
12481ee4952SGeorge Burgess IV
12581ee4952SGeorge Burgess IVdef _resolve_sha(git_dir: str, sha: str) -> str:
12681ee4952SGeorge Burgess IV    if len(sha) == 40:
12781ee4952SGeorge Burgess IV        return sha
12881ee4952SGeorge Burgess IV
12981ee4952SGeorge Burgess IV    return subprocess.check_output(
130b71edfaaSTobias Hieta        ["git", "-C", git_dir, "rev-parse", sha],
131b71edfaaSTobias Hieta        encoding="utf-8",
13281ee4952SGeorge Burgess IV        stderr=subprocess.DEVNULL,
13381ee4952SGeorge Burgess IV    ).strip()
13481ee4952SGeorge Burgess IV
13581ee4952SGeorge Burgess IV
136b71edfaaSTobias Hieta_LogEntry = NamedTuple(
137b71edfaaSTobias Hieta    "_LogEntry",
138b71edfaaSTobias Hieta    [
139b71edfaaSTobias Hieta        ("sha", str),
140b71edfaaSTobias Hieta        ("commit_message", str),
141b71edfaaSTobias Hieta    ],
142b71edfaaSTobias Hieta)
14381ee4952SGeorge Burgess IV
14481ee4952SGeorge Burgess IV
145b71edfaaSTobias Hietadef _log_stream(git_dir: str, root_sha: str, end_at_sha: str) -> Iterable[_LogEntry]:
146b71edfaaSTobias Hieta    sep = 50 * "<>"
14781ee4952SGeorge Burgess IV    log_command = [
148b71edfaaSTobias Hieta        "git",
149b71edfaaSTobias Hieta        "-C",
15081ee4952SGeorge Burgess IV        git_dir,
151b71edfaaSTobias Hieta        "log",
152b71edfaaSTobias Hieta        "^" + end_at_sha,
15381ee4952SGeorge Burgess IV        root_sha,
154b71edfaaSTobias Hieta        "--format=" + sep + "%n%H%n%B%n",
15581ee4952SGeorge Burgess IV    ]
15681ee4952SGeorge Burgess IV
15781ee4952SGeorge Burgess IV    stdout_stream = iter(_stream_stdout(log_command))
15881ee4952SGeorge Burgess IV
15981ee4952SGeorge Burgess IV    # Find the next separator line. If there's nothing to log, it may not exist.
16081ee4952SGeorge Burgess IV    # It might not be the first line if git feels complainy.
16181ee4952SGeorge Burgess IV    found_commit_header = False
16281ee4952SGeorge Burgess IV    for line in stdout_stream:
16381ee4952SGeorge Burgess IV        if line.rstrip() == sep:
16481ee4952SGeorge Burgess IV            found_commit_header = True
16581ee4952SGeorge Burgess IV            break
16681ee4952SGeorge Burgess IV
16781ee4952SGeorge Burgess IV    while found_commit_header:
16881ee4952SGeorge Burgess IV        sha = next(stdout_stream, None)
169b71edfaaSTobias Hieta        assert sha is not None, "git died?"
17081ee4952SGeorge Burgess IV        sha = sha.rstrip()
17181ee4952SGeorge Burgess IV
17281ee4952SGeorge Burgess IV        commit_message = []
17381ee4952SGeorge Burgess IV
17481ee4952SGeorge Burgess IV        found_commit_header = False
17581ee4952SGeorge Burgess IV        for line in stdout_stream:
17681ee4952SGeorge Burgess IV            line = line.rstrip()
17781ee4952SGeorge Burgess IV            if line.rstrip() == sep:
17881ee4952SGeorge Burgess IV                found_commit_header = True
17981ee4952SGeorge Burgess IV                break
18081ee4952SGeorge Burgess IV            commit_message.append(line)
18181ee4952SGeorge Burgess IV
182b71edfaaSTobias Hieta        yield _LogEntry(sha, "\n".join(commit_message).rstrip())
18381ee4952SGeorge Burgess IV
18481ee4952SGeorge Burgess IV
18581ee4952SGeorge Burgess IVdef _shas_between(git_dir: str, base_ref: str, head_ref: str) -> Iterable[str]:
18681ee4952SGeorge Burgess IV    rev_list = [
187b71edfaaSTobias Hieta        "git",
188b71edfaaSTobias Hieta        "-C",
18981ee4952SGeorge Burgess IV        git_dir,
190b71edfaaSTobias Hieta        "rev-list",
191b71edfaaSTobias Hieta        "--first-parent",
192b71edfaaSTobias Hieta        f"{base_ref}..{head_ref}",
19381ee4952SGeorge Burgess IV    ]
19481ee4952SGeorge Burgess IV    return (x.strip() for x in _stream_stdout(rev_list))
19581ee4952SGeorge Burgess IV
19681ee4952SGeorge Burgess IV
19781ee4952SGeorge Burgess IVdef _rev_parse(git_dir: str, ref: str) -> str:
19881ee4952SGeorge Burgess IV    return subprocess.check_output(
199b71edfaaSTobias Hieta        ["git", "-C", git_dir, "rev-parse", ref],
200b71edfaaSTobias Hieta        encoding="utf-8",
20181ee4952SGeorge Burgess IV    ).strip()
20281ee4952SGeorge Burgess IV
20381ee4952SGeorge Burgess IV
204b71edfaaSTobias HietaRevert = NamedTuple(
205b71edfaaSTobias Hieta    "Revert",
206b71edfaaSTobias Hieta    [
207b71edfaaSTobias Hieta        ("sha", str),
208b71edfaaSTobias Hieta        ("reverted_sha", str),
209b71edfaaSTobias Hieta    ],
210b71edfaaSTobias Hieta)
21181ee4952SGeorge Burgess IV
21281ee4952SGeorge Burgess IV
21381ee4952SGeorge Burgess IVdef _find_common_parent_commit(git_dir: str, ref_a: str, ref_b: str) -> str:
21481ee4952SGeorge Burgess IV    """Finds the closest common parent commit between `ref_a` and `ref_b`."""
21581ee4952SGeorge Burgess IV    return subprocess.check_output(
216b71edfaaSTobias Hieta        ["git", "-C", git_dir, "merge-base", ref_a, ref_b],
217b71edfaaSTobias Hieta        encoding="utf-8",
21881ee4952SGeorge Burgess IV    ).strip()
21981ee4952SGeorge Burgess IV
22081ee4952SGeorge Burgess IV
221*0a53f43cSGeorge Burgess IVdef _load_pr_commit_mappings(
222*0a53f43cSGeorge Burgess IV    git_dir: str, root: str, min_ref: str
223*0a53f43cSGeorge Burgess IV) -> Dict[int, List[str]]:
224*0a53f43cSGeorge Burgess IV    git_log = ["git", "log", "--format=%H %s", f"{min_ref}..{root}"]
225*0a53f43cSGeorge Burgess IV    results = collections.defaultdict(list)
226*0a53f43cSGeorge Burgess IV    pr_regex = re.compile(r"\s\(#(\d+)\)$")
227*0a53f43cSGeorge Burgess IV    for line in _stream_stdout(git_log, cwd=git_dir):
228*0a53f43cSGeorge Burgess IV        m = pr_regex.search(line)
229*0a53f43cSGeorge Burgess IV        if not m:
230*0a53f43cSGeorge Burgess IV            continue
231*0a53f43cSGeorge Burgess IV
232*0a53f43cSGeorge Burgess IV        pr_number = int(m.group(1))
233*0a53f43cSGeorge Burgess IV        sha = line.split(None, 1)[0]
234*0a53f43cSGeorge Burgess IV        # N.B., these are kept in log (read: reverse chronological) order,
235*0a53f43cSGeorge Burgess IV        # which is what's expected by `find_reverts`.
236*0a53f43cSGeorge Burgess IV        results[pr_number].append(sha)
237*0a53f43cSGeorge Burgess IV    return results
238*0a53f43cSGeorge Burgess IV
239*0a53f43cSGeorge Burgess IV
240*0a53f43cSGeorge Burgess IV# N.B., max_pr_lookback's default of 20K commits is arbitrary, but should be
241*0a53f43cSGeorge Burgess IV# enough for the 99% case of reverts: rarely should someone land a cleanish
242*0a53f43cSGeorge Burgess IV# revert of a >6 month old change...
243*0a53f43cSGeorge Burgess IVdef find_reverts(
244*0a53f43cSGeorge Burgess IV    git_dir: str, across_ref: str, root: str, max_pr_lookback: int = 20000
245*0a53f43cSGeorge Burgess IV) -> List[Revert]:
246ce2a5fa7SGeorge Burgess IV    """Finds reverts across `across_ref` in `git_dir`, starting from `root`.
247ce2a5fa7SGeorge Burgess IV
248ce2a5fa7SGeorge Burgess IV    These reverts are returned in order of oldest reverts first.
249*0a53f43cSGeorge Burgess IV
250*0a53f43cSGeorge Burgess IV    Args:
251*0a53f43cSGeorge Burgess IV        git_dir: git directory to find reverts in.
252*0a53f43cSGeorge Burgess IV        across_ref: the ref to find reverts across.
253*0a53f43cSGeorge Burgess IV        root: the 'main' ref to look for reverts on.
254*0a53f43cSGeorge Burgess IV        max_pr_lookback: this function uses heuristics to map PR numbers to
255*0a53f43cSGeorge Burgess IV            SHAs. These heuristics require that commit history from `root` to
256*0a53f43cSGeorge Burgess IV            `some_parent_of_root` is loaded in memory. `max_pr_lookback` is how
257*0a53f43cSGeorge Burgess IV            many commits behind `across_ref` should be loaded in memory.
258ce2a5fa7SGeorge Burgess IV    """
25981ee4952SGeorge Burgess IV    across_sha = _rev_parse(git_dir, across_ref)
26081ee4952SGeorge Burgess IV    root_sha = _rev_parse(git_dir, root)
26181ee4952SGeorge Burgess IV
26281ee4952SGeorge Burgess IV    common_ancestor = _find_common_parent_commit(git_dir, across_sha, root_sha)
26381ee4952SGeorge Burgess IV    if common_ancestor != across_sha:
264b71edfaaSTobias Hieta        raise ValueError(
265b71edfaaSTobias Hieta            f"{across_sha} isn't an ancestor of {root_sha} "
266b71edfaaSTobias Hieta            "(common ancestor: {common_ancestor})"
267b71edfaaSTobias Hieta        )
26881ee4952SGeorge Burgess IV
26981ee4952SGeorge Burgess IV    intermediate_commits = set(_shas_between(git_dir, across_sha, root_sha))
2703a7ca4caSGeorge Burgess IV    assert across_sha not in intermediate_commits
27181ee4952SGeorge Burgess IV
272b71edfaaSTobias Hieta    logging.debug(
273b71edfaaSTobias Hieta        "%d commits appear between %s and %s",
274b71edfaaSTobias Hieta        len(intermediate_commits),
275b71edfaaSTobias Hieta        across_sha,
276b71edfaaSTobias Hieta        root_sha,
277b71edfaaSTobias Hieta    )
27881ee4952SGeorge Burgess IV
27981ee4952SGeorge Burgess IV    all_reverts = []
280*0a53f43cSGeorge Burgess IV    # Lazily load PR <-> commit mappings, since it can be expensive.
281*0a53f43cSGeorge Burgess IV    pr_commit_mappings = None
28281ee4952SGeorge Burgess IV    for sha, commit_message in _log_stream(git_dir, root_sha, across_sha):
283*0a53f43cSGeorge Burgess IV        reverts, pr_reverts = _try_parse_reverts_from_commit_message(
284*0a53f43cSGeorge Burgess IV            commit_message,
285*0a53f43cSGeorge Burgess IV        )
286*0a53f43cSGeorge Burgess IV        if pr_reverts:
287*0a53f43cSGeorge Burgess IV            if pr_commit_mappings is None:
288*0a53f43cSGeorge Burgess IV                logging.info(
289*0a53f43cSGeorge Burgess IV                    "Loading PR <-> commit mappings. This may take a moment..."
290*0a53f43cSGeorge Burgess IV                )
291*0a53f43cSGeorge Burgess IV                pr_commit_mappings = _load_pr_commit_mappings(
292*0a53f43cSGeorge Burgess IV                    git_dir, root_sha, f"{across_sha}~{max_pr_lookback}"
293*0a53f43cSGeorge Burgess IV                )
294*0a53f43cSGeorge Burgess IV                logging.info(
295*0a53f43cSGeorge Burgess IV                    "Loaded %d PR <-> commit mappings", len(pr_commit_mappings)
296*0a53f43cSGeorge Burgess IV                )
297*0a53f43cSGeorge Burgess IV
298*0a53f43cSGeorge Burgess IV            for reverted_pr_number in pr_reverts:
299*0a53f43cSGeorge Burgess IV                reverted_shas = pr_commit_mappings.get(reverted_pr_number)
300*0a53f43cSGeorge Burgess IV                if not reverted_shas:
301*0a53f43cSGeorge Burgess IV                    logging.warning(
302*0a53f43cSGeorge Burgess IV                        "No SHAs for reverted PR %d (commit %s)",
303*0a53f43cSGeorge Burgess IV                        reverted_pr_number,
304*0a53f43cSGeorge Burgess IV                        sha,
305*0a53f43cSGeorge Burgess IV                    )
306*0a53f43cSGeorge Burgess IV                    continue
307*0a53f43cSGeorge Burgess IV                logging.debug(
308*0a53f43cSGeorge Burgess IV                    "Inferred SHAs %s for reverted PR %d (commit %s)",
309*0a53f43cSGeorge Burgess IV                    reverted_shas,
310*0a53f43cSGeorge Burgess IV                    reverted_pr_number,
311*0a53f43cSGeorge Burgess IV                    sha,
312*0a53f43cSGeorge Burgess IV                )
313*0a53f43cSGeorge Burgess IV                reverts.extend(reverted_shas)
314*0a53f43cSGeorge Burgess IV
31581ee4952SGeorge Burgess IV        if not reverts:
31681ee4952SGeorge Burgess IV            continue
31781ee4952SGeorge Burgess IV
31881ee4952SGeorge Burgess IV        resolved_reverts = sorted(set(_resolve_sha(git_dir, x) for x in reverts))
31981ee4952SGeorge Burgess IV        for reverted_sha in resolved_reverts:
32081ee4952SGeorge Burgess IV            if reverted_sha in intermediate_commits:
321b71edfaaSTobias Hieta                logging.debug(
322b71edfaaSTobias Hieta                    "Commit %s reverts %s, which happened after %s",
323b71edfaaSTobias Hieta                    sha,
324b71edfaaSTobias Hieta                    reverted_sha,
325b71edfaaSTobias Hieta                    across_sha,
326b71edfaaSTobias Hieta                )
32781ee4952SGeorge Burgess IV                continue
32881ee4952SGeorge Burgess IV
32981ee4952SGeorge Burgess IV            try:
33081ee4952SGeorge Burgess IV                object_type = subprocess.check_output(
331b71edfaaSTobias Hieta                    ["git", "-C", git_dir, "cat-file", "-t", reverted_sha],
332b71edfaaSTobias Hieta                    encoding="utf-8",
33381ee4952SGeorge Burgess IV                    stderr=subprocess.DEVNULL,
33481ee4952SGeorge Burgess IV                ).strip()
33581ee4952SGeorge Burgess IV            except subprocess.CalledProcessError:
33681ee4952SGeorge Burgess IV                logging.warning(
337b71edfaaSTobias Hieta                    "Failed to resolve reverted object %s (claimed to be reverted "
338b71edfaaSTobias Hieta                    "by sha %s)",
339b71edfaaSTobias Hieta                    reverted_sha,
340b71edfaaSTobias Hieta                    sha,
341b71edfaaSTobias Hieta                )
34281ee4952SGeorge Burgess IV                continue
34381ee4952SGeorge Burgess IV
344b71edfaaSTobias Hieta            if object_type == "commit":
34581ee4952SGeorge Burgess IV                all_reverts.append(Revert(sha, reverted_sha))
34681ee4952SGeorge Burgess IV                continue
34781ee4952SGeorge Burgess IV
348b71edfaaSTobias Hieta            logging.error(
349b71edfaaSTobias Hieta                "%s claims to revert %s -- which isn't a commit -- %s",
350b71edfaaSTobias Hieta                sha,
351b71edfaaSTobias Hieta                object_type,
352b71edfaaSTobias Hieta                reverted_sha,
353b71edfaaSTobias Hieta            )
35481ee4952SGeorge Burgess IV
355ce2a5fa7SGeorge Burgess IV    # Since `all_reverts` contains reverts in log order (e.g., newer comes before
356ce2a5fa7SGeorge Burgess IV    # older), we need to reverse this to keep with our guarantee of older =
357ce2a5fa7SGeorge Burgess IV    # earlier in the result.
358ce2a5fa7SGeorge Burgess IV    all_reverts.reverse()
35981ee4952SGeorge Burgess IV    return all_reverts
36081ee4952SGeorge Burgess IV
36181ee4952SGeorge Burgess IV
36281ee4952SGeorge Burgess IVdef _main() -> None:
36381ee4952SGeorge Burgess IV    parser = argparse.ArgumentParser(
364b71edfaaSTobias Hieta        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
365b71edfaaSTobias Hieta    )
366b71edfaaSTobias Hieta    parser.add_argument("base_ref", help="Git ref or sha to check for reverts around.")
367b71edfaaSTobias Hieta    parser.add_argument("-C", "--git_dir", default=".", help="Git directory to use.")
368b71edfaaSTobias Hieta    parser.add_argument("root", nargs="+", help="Root(s) to search for commits from.")
369b71edfaaSTobias Hieta    parser.add_argument("--debug", action="store_true")
37081ee4952SGeorge Burgess IV    parser.add_argument(
371b71edfaaSTobias Hieta        "-u",
372b71edfaaSTobias Hieta        "--review_url",
373b71edfaaSTobias Hieta        action="store_true",
374b71edfaaSTobias Hieta        help="Format SHAs as llvm review URLs",
375b71edfaaSTobias Hieta    )
37681ee4952SGeorge Burgess IV    opts = parser.parse_args()
37781ee4952SGeorge Burgess IV
37881ee4952SGeorge Burgess IV    logging.basicConfig(
379b71edfaaSTobias Hieta        format="%(asctime)s: %(levelname)s: %(filename)s:%(lineno)d: %(message)s",
38081ee4952SGeorge Burgess IV        level=logging.DEBUG if opts.debug else logging.INFO,
38181ee4952SGeorge Burgess IV    )
38281ee4952SGeorge Burgess IV
38381ee4952SGeorge Burgess IV    # `root`s can have related history, so we want to filter duplicate commits
38481ee4952SGeorge Burgess IV    # out. The overwhelmingly common case is also to have one root, and it's way
38581ee4952SGeorge Burgess IV    # easier to reason about output that comes in an order that's meaningful to
38681ee4952SGeorge Burgess IV    # git.
38781ee4952SGeorge Burgess IV    seen_reverts = set()
38881ee4952SGeorge Burgess IV    all_reverts = []
38981ee4952SGeorge Burgess IV    for root in opts.root:
39081ee4952SGeorge Burgess IV        for revert in find_reverts(opts.git_dir, opts.base_ref, root):
39181ee4952SGeorge Burgess IV            if revert not in seen_reverts:
39281ee4952SGeorge Burgess IV                seen_reverts.add(revert)
39381ee4952SGeorge Burgess IV                all_reverts.append(revert)
39481ee4952SGeorge Burgess IV
3959def85f9Singlorion    sha_prefix = (
3969def85f9Singlorion        "https://github.com/llvm/llvm-project/commit/" if opts.review_url else ""
3979def85f9Singlorion    )
39881ee4952SGeorge Burgess IV    for revert in all_reverts:
3999def85f9Singlorion        sha_fmt = f"{sha_prefix}{revert.sha}"
4009def85f9Singlorion        reverted_sha_fmt = f"{sha_prefix}{revert.reverted_sha}"
401b71edfaaSTobias Hieta        print(f"{sha_fmt} claims to revert {reverted_sha_fmt}")
40281ee4952SGeorge Burgess IV
40381ee4952SGeorge Burgess IV
404b71edfaaSTobias Hietaif __name__ == "__main__":
40581ee4952SGeorge Burgess IV    _main()
406