181ee4952SGeorge Burgess IV#!/usr/bin/env python3 281ee4952SGeorge Burgess IV# -*- coding: utf-8 -*- 381ee4952SGeorge Burgess IV# ===----------------------------------------------------------------------===## 481ee4952SGeorge Burgess IV# 581ee4952SGeorge Burgess IV# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 681ee4952SGeorge Burgess IV# See https://llvm.org/LICENSE.txt for license information. 781ee4952SGeorge Burgess IV# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 881ee4952SGeorge Burgess IV# 981ee4952SGeorge Burgess IV# ===----------------------------------------------------------------------===## 1081ee4952SGeorge Burgess IV"""Checks for reverts of commits across a given git commit. 1181ee4952SGeorge Burgess IV 1281ee4952SGeorge Burgess IVTo clarify the meaning of 'across' with an example, if we had the following 1381ee4952SGeorge Burgess IVcommit history (where `a -> b` notes that `b` is a direct child of `a`): 1481ee4952SGeorge Burgess IV 1581ee4952SGeorge Burgess IV123abc -> 223abc -> 323abc -> 423abc -> 523abc 1681ee4952SGeorge Burgess IV 1781ee4952SGeorge Burgess IVAnd where 423abc is a revert of 223abc, this revert is considered to be 'across' 1881ee4952SGeorge Burgess IV323abc. More generally, a revert A of a parent commit B is considered to be 1981ee4952SGeorge Burgess IV'across' a commit C if C is a parent of A and B is a parent of C. 2081ee4952SGeorge Burgess IV 2181ee4952SGeorge Burgess IVPlease note that revert detection in general is really difficult, since merge 2281ee4952SGeorge Burgess IVconflicts/etc always introduce _some_ amount of fuzziness. This script just 2381ee4952SGeorge Burgess IVuses a bundle of heuristics, and is bound to ignore / incorrectly flag some 2481ee4952SGeorge Burgess IVreverts. The hope is that it'll easily catch the vast majority (>90%) of them, 2581ee4952SGeorge Burgess IVthough. 2681ee4952SGeorge Burgess IV 2781ee4952SGeorge Burgess IVThis is designed to be used in one of two ways: an import in Python, or run 2881ee4952SGeorge Burgess IVdirectly from a shell. If you want to import this, the `find_reverts` 2981ee4952SGeorge Burgess IVfunction is the thing to look at. If you'd rather use this from a shell, have a 3081ee4952SGeorge Burgess IVusage example: 3181ee4952SGeorge Burgess IV 3281ee4952SGeorge Burgess IV``` 3381ee4952SGeorge Burgess IV./revert_checker.py c47f97169 origin/main origin/release/12.x 3481ee4952SGeorge Burgess IV``` 3581ee4952SGeorge Burgess IV 3681ee4952SGeorge Burgess IVThis checks for all reverts from the tip of origin/main to c47f97169, which are 3781ee4952SGeorge Burgess IVacross the latter. It then does the same for origin/release/12.x to c47f97169. 3881ee4952SGeorge Burgess IVDuplicate reverts discovered when walking both roots (origin/main and 3981ee4952SGeorge Burgess IVorigin/release/12.x) are deduplicated in output. 4081ee4952SGeorge Burgess IV""" 4181ee4952SGeorge Burgess IV 4281ee4952SGeorge Burgess IVimport argparse 4381ee4952SGeorge Burgess IVimport collections 4481ee4952SGeorge Burgess IVimport logging 4581ee4952SGeorge Burgess IVimport re 4681ee4952SGeorge Burgess IVimport subprocess 4781ee4952SGeorge Burgess IVimport sys 48*0a53f43cSGeorge Burgess IVfrom typing import Dict, Generator, Iterable, List, NamedTuple, Optional, Tuple 4981ee4952SGeorge Burgess IV 50b71edfaaSTobias Hietaassert sys.version_info >= (3, 6), "Only Python 3.6+ is supported." 5181ee4952SGeorge Burgess IV 5281ee4952SGeorge Burgess IV# People are creative with their reverts, and heuristics are a bit difficult. 53*0a53f43cSGeorge Burgess IV# At a glance, most reverts have "This reverts commit ${full_sha}". Many others 54*0a53f43cSGeorge Burgess IV# have `Reverts llvm/llvm-project#${PR_NUMBER}`. 5581ee4952SGeorge Burgess IV# 56*0a53f43cSGeorge Burgess IV# By their powers combined, we should be able to automatically catch something 57*0a53f43cSGeorge Burgess IV# like 80% of reverts with reasonable confidence. At some point, human 58*0a53f43cSGeorge Burgess IV# intervention will always be required (e.g., I saw 59*0a53f43cSGeorge Burgess IV# ``` 60*0a53f43cSGeorge Burgess IV# This reverts commit ${commit_sha_1} and 61*0a53f43cSGeorge Burgess IV# also ${commit_sha_2_shorthand} 62*0a53f43cSGeorge Burgess IV# ``` 63*0a53f43cSGeorge Burgess IV# during my sample) 64*0a53f43cSGeorge Burgess IV 65*0a53f43cSGeorge Burgess IV_CommitMessageReverts = NamedTuple( 66*0a53f43cSGeorge Burgess IV "_CommitMessageReverts", 67*0a53f43cSGeorge Burgess IV [ 68*0a53f43cSGeorge Burgess IV ("potential_shas", List[str]), 69*0a53f43cSGeorge Burgess IV ("potential_pr_numbers", List[int]), 70*0a53f43cSGeorge Burgess IV ], 71*0a53f43cSGeorge Burgess IV) 7281ee4952SGeorge Burgess IV 7381ee4952SGeorge Burgess IV 74*0a53f43cSGeorge Burgess IVdef _try_parse_reverts_from_commit_message( 75*0a53f43cSGeorge Burgess IV commit_message: str, 76*0a53f43cSGeorge Burgess IV) -> _CommitMessageReverts: 77*0a53f43cSGeorge Burgess IV """Tries to parse revert SHAs and LLVM PR numbers form the commit message. 78*0a53f43cSGeorge Burgess IV 79*0a53f43cSGeorge Burgess IV Returns: 80*0a53f43cSGeorge Burgess IV A namedtuple containing: 81*0a53f43cSGeorge Burgess IV - A list of potentially reverted SHAs 82*0a53f43cSGeorge Burgess IV - A list of potentially reverted LLVM PR numbers 83*0a53f43cSGeorge Burgess IV """ 8481ee4952SGeorge Burgess IV if not commit_message: 85*0a53f43cSGeorge Burgess IV return _CommitMessageReverts([], []) 8681ee4952SGeorge Burgess IV 87*0a53f43cSGeorge Burgess IV sha_reverts = re.findall( 88*0a53f43cSGeorge Burgess IV r"This reverts commit ([a-f0-9]{40})\b", 89*0a53f43cSGeorge Burgess IV commit_message, 90*0a53f43cSGeorge Burgess IV ) 9181ee4952SGeorge Burgess IV 9281ee4952SGeorge Burgess IV first_line = commit_message.splitlines()[0] 9381ee4952SGeorge Burgess IV initial_revert = re.match(r'Revert ([a-f0-9]{6,}) "', first_line) 9481ee4952SGeorge Burgess IV if initial_revert: 95*0a53f43cSGeorge Burgess IV sha_reverts.append(initial_revert.group(1)) 96*0a53f43cSGeorge Burgess IV 97*0a53f43cSGeorge Burgess IV pr_numbers = [ 98*0a53f43cSGeorge Burgess IV int(x) 99*0a53f43cSGeorge Burgess IV for x in re.findall( 100*0a53f43cSGeorge Burgess IV r"Reverts llvm/llvm-project#(\d+)", 101*0a53f43cSGeorge Burgess IV commit_message, 102*0a53f43cSGeorge Burgess IV ) 103*0a53f43cSGeorge Burgess IV ] 104*0a53f43cSGeorge Burgess IV 105*0a53f43cSGeorge Burgess IV return _CommitMessageReverts( 106*0a53f43cSGeorge Burgess IV potential_shas=sha_reverts, 107*0a53f43cSGeorge Burgess IV potential_pr_numbers=pr_numbers, 108*0a53f43cSGeorge Burgess IV ) 10981ee4952SGeorge Burgess IV 11081ee4952SGeorge Burgess IV 111*0a53f43cSGeorge Burgess IVdef _stream_stdout( 112*0a53f43cSGeorge Burgess IV command: List[str], cwd: Optional[str] = None 113*0a53f43cSGeorge Burgess IV) -> Generator[str, None, None]: 11481ee4952SGeorge Burgess IV with subprocess.Popen( 115*0a53f43cSGeorge Burgess IV command, 116*0a53f43cSGeorge Burgess IV cwd=cwd, 117*0a53f43cSGeorge Burgess IV stdout=subprocess.PIPE, 118*0a53f43cSGeorge Burgess IV encoding="utf-8", 119*0a53f43cSGeorge Burgess IV errors="replace", 120b71edfaaSTobias Hieta ) as p: 12181ee4952SGeorge Burgess IV assert p.stdout is not None # for mypy's happiness. 12281ee4952SGeorge Burgess IV yield from p.stdout 12381ee4952SGeorge Burgess IV 12481ee4952SGeorge Burgess IV 12581ee4952SGeorge Burgess IVdef _resolve_sha(git_dir: str, sha: str) -> str: 12681ee4952SGeorge Burgess IV if len(sha) == 40: 12781ee4952SGeorge Burgess IV return sha 12881ee4952SGeorge Burgess IV 12981ee4952SGeorge Burgess IV return subprocess.check_output( 130b71edfaaSTobias Hieta ["git", "-C", git_dir, "rev-parse", sha], 131b71edfaaSTobias Hieta encoding="utf-8", 13281ee4952SGeorge Burgess IV stderr=subprocess.DEVNULL, 13381ee4952SGeorge Burgess IV ).strip() 13481ee4952SGeorge Burgess IV 13581ee4952SGeorge Burgess IV 136b71edfaaSTobias Hieta_LogEntry = NamedTuple( 137b71edfaaSTobias Hieta "_LogEntry", 138b71edfaaSTobias Hieta [ 139b71edfaaSTobias Hieta ("sha", str), 140b71edfaaSTobias Hieta ("commit_message", str), 141b71edfaaSTobias Hieta ], 142b71edfaaSTobias Hieta) 14381ee4952SGeorge Burgess IV 14481ee4952SGeorge Burgess IV 145b71edfaaSTobias Hietadef _log_stream(git_dir: str, root_sha: str, end_at_sha: str) -> Iterable[_LogEntry]: 146b71edfaaSTobias Hieta sep = 50 * "<>" 14781ee4952SGeorge Burgess IV log_command = [ 148b71edfaaSTobias Hieta "git", 149b71edfaaSTobias Hieta "-C", 15081ee4952SGeorge Burgess IV git_dir, 151b71edfaaSTobias Hieta "log", 152b71edfaaSTobias Hieta "^" + end_at_sha, 15381ee4952SGeorge Burgess IV root_sha, 154b71edfaaSTobias Hieta "--format=" + sep + "%n%H%n%B%n", 15581ee4952SGeorge Burgess IV ] 15681ee4952SGeorge Burgess IV 15781ee4952SGeorge Burgess IV stdout_stream = iter(_stream_stdout(log_command)) 15881ee4952SGeorge Burgess IV 15981ee4952SGeorge Burgess IV # Find the next separator line. If there's nothing to log, it may not exist. 16081ee4952SGeorge Burgess IV # It might not be the first line if git feels complainy. 16181ee4952SGeorge Burgess IV found_commit_header = False 16281ee4952SGeorge Burgess IV for line in stdout_stream: 16381ee4952SGeorge Burgess IV if line.rstrip() == sep: 16481ee4952SGeorge Burgess IV found_commit_header = True 16581ee4952SGeorge Burgess IV break 16681ee4952SGeorge Burgess IV 16781ee4952SGeorge Burgess IV while found_commit_header: 16881ee4952SGeorge Burgess IV sha = next(stdout_stream, None) 169b71edfaaSTobias Hieta assert sha is not None, "git died?" 17081ee4952SGeorge Burgess IV sha = sha.rstrip() 17181ee4952SGeorge Burgess IV 17281ee4952SGeorge Burgess IV commit_message = [] 17381ee4952SGeorge Burgess IV 17481ee4952SGeorge Burgess IV found_commit_header = False 17581ee4952SGeorge Burgess IV for line in stdout_stream: 17681ee4952SGeorge Burgess IV line = line.rstrip() 17781ee4952SGeorge Burgess IV if line.rstrip() == sep: 17881ee4952SGeorge Burgess IV found_commit_header = True 17981ee4952SGeorge Burgess IV break 18081ee4952SGeorge Burgess IV commit_message.append(line) 18181ee4952SGeorge Burgess IV 182b71edfaaSTobias Hieta yield _LogEntry(sha, "\n".join(commit_message).rstrip()) 18381ee4952SGeorge Burgess IV 18481ee4952SGeorge Burgess IV 18581ee4952SGeorge Burgess IVdef _shas_between(git_dir: str, base_ref: str, head_ref: str) -> Iterable[str]: 18681ee4952SGeorge Burgess IV rev_list = [ 187b71edfaaSTobias Hieta "git", 188b71edfaaSTobias Hieta "-C", 18981ee4952SGeorge Burgess IV git_dir, 190b71edfaaSTobias Hieta "rev-list", 191b71edfaaSTobias Hieta "--first-parent", 192b71edfaaSTobias Hieta f"{base_ref}..{head_ref}", 19381ee4952SGeorge Burgess IV ] 19481ee4952SGeorge Burgess IV return (x.strip() for x in _stream_stdout(rev_list)) 19581ee4952SGeorge Burgess IV 19681ee4952SGeorge Burgess IV 19781ee4952SGeorge Burgess IVdef _rev_parse(git_dir: str, ref: str) -> str: 19881ee4952SGeorge Burgess IV return subprocess.check_output( 199b71edfaaSTobias Hieta ["git", "-C", git_dir, "rev-parse", ref], 200b71edfaaSTobias Hieta encoding="utf-8", 20181ee4952SGeorge Burgess IV ).strip() 20281ee4952SGeorge Burgess IV 20381ee4952SGeorge Burgess IV 204b71edfaaSTobias HietaRevert = NamedTuple( 205b71edfaaSTobias Hieta "Revert", 206b71edfaaSTobias Hieta [ 207b71edfaaSTobias Hieta ("sha", str), 208b71edfaaSTobias Hieta ("reverted_sha", str), 209b71edfaaSTobias Hieta ], 210b71edfaaSTobias Hieta) 21181ee4952SGeorge Burgess IV 21281ee4952SGeorge Burgess IV 21381ee4952SGeorge Burgess IVdef _find_common_parent_commit(git_dir: str, ref_a: str, ref_b: str) -> str: 21481ee4952SGeorge Burgess IV """Finds the closest common parent commit between `ref_a` and `ref_b`.""" 21581ee4952SGeorge Burgess IV return subprocess.check_output( 216b71edfaaSTobias Hieta ["git", "-C", git_dir, "merge-base", ref_a, ref_b], 217b71edfaaSTobias Hieta encoding="utf-8", 21881ee4952SGeorge Burgess IV ).strip() 21981ee4952SGeorge Burgess IV 22081ee4952SGeorge Burgess IV 221*0a53f43cSGeorge Burgess IVdef _load_pr_commit_mappings( 222*0a53f43cSGeorge Burgess IV git_dir: str, root: str, min_ref: str 223*0a53f43cSGeorge Burgess IV) -> Dict[int, List[str]]: 224*0a53f43cSGeorge Burgess IV git_log = ["git", "log", "--format=%H %s", f"{min_ref}..{root}"] 225*0a53f43cSGeorge Burgess IV results = collections.defaultdict(list) 226*0a53f43cSGeorge Burgess IV pr_regex = re.compile(r"\s\(#(\d+)\)$") 227*0a53f43cSGeorge Burgess IV for line in _stream_stdout(git_log, cwd=git_dir): 228*0a53f43cSGeorge Burgess IV m = pr_regex.search(line) 229*0a53f43cSGeorge Burgess IV if not m: 230*0a53f43cSGeorge Burgess IV continue 231*0a53f43cSGeorge Burgess IV 232*0a53f43cSGeorge Burgess IV pr_number = int(m.group(1)) 233*0a53f43cSGeorge Burgess IV sha = line.split(None, 1)[0] 234*0a53f43cSGeorge Burgess IV # N.B., these are kept in log (read: reverse chronological) order, 235*0a53f43cSGeorge Burgess IV # which is what's expected by `find_reverts`. 236*0a53f43cSGeorge Burgess IV results[pr_number].append(sha) 237*0a53f43cSGeorge Burgess IV return results 238*0a53f43cSGeorge Burgess IV 239*0a53f43cSGeorge Burgess IV 240*0a53f43cSGeorge Burgess IV# N.B., max_pr_lookback's default of 20K commits is arbitrary, but should be 241*0a53f43cSGeorge Burgess IV# enough for the 99% case of reverts: rarely should someone land a cleanish 242*0a53f43cSGeorge Burgess IV# revert of a >6 month old change... 243*0a53f43cSGeorge Burgess IVdef find_reverts( 244*0a53f43cSGeorge Burgess IV git_dir: str, across_ref: str, root: str, max_pr_lookback: int = 20000 245*0a53f43cSGeorge Burgess IV) -> List[Revert]: 246ce2a5fa7SGeorge Burgess IV """Finds reverts across `across_ref` in `git_dir`, starting from `root`. 247ce2a5fa7SGeorge Burgess IV 248ce2a5fa7SGeorge Burgess IV These reverts are returned in order of oldest reverts first. 249*0a53f43cSGeorge Burgess IV 250*0a53f43cSGeorge Burgess IV Args: 251*0a53f43cSGeorge Burgess IV git_dir: git directory to find reverts in. 252*0a53f43cSGeorge Burgess IV across_ref: the ref to find reverts across. 253*0a53f43cSGeorge Burgess IV root: the 'main' ref to look for reverts on. 254*0a53f43cSGeorge Burgess IV max_pr_lookback: this function uses heuristics to map PR numbers to 255*0a53f43cSGeorge Burgess IV SHAs. These heuristics require that commit history from `root` to 256*0a53f43cSGeorge Burgess IV `some_parent_of_root` is loaded in memory. `max_pr_lookback` is how 257*0a53f43cSGeorge Burgess IV many commits behind `across_ref` should be loaded in memory. 258ce2a5fa7SGeorge Burgess IV """ 25981ee4952SGeorge Burgess IV across_sha = _rev_parse(git_dir, across_ref) 26081ee4952SGeorge Burgess IV root_sha = _rev_parse(git_dir, root) 26181ee4952SGeorge Burgess IV 26281ee4952SGeorge Burgess IV common_ancestor = _find_common_parent_commit(git_dir, across_sha, root_sha) 26381ee4952SGeorge Burgess IV if common_ancestor != across_sha: 264b71edfaaSTobias Hieta raise ValueError( 265b71edfaaSTobias Hieta f"{across_sha} isn't an ancestor of {root_sha} " 266b71edfaaSTobias Hieta "(common ancestor: {common_ancestor})" 267b71edfaaSTobias Hieta ) 26881ee4952SGeorge Burgess IV 26981ee4952SGeorge Burgess IV intermediate_commits = set(_shas_between(git_dir, across_sha, root_sha)) 2703a7ca4caSGeorge Burgess IV assert across_sha not in intermediate_commits 27181ee4952SGeorge Burgess IV 272b71edfaaSTobias Hieta logging.debug( 273b71edfaaSTobias Hieta "%d commits appear between %s and %s", 274b71edfaaSTobias Hieta len(intermediate_commits), 275b71edfaaSTobias Hieta across_sha, 276b71edfaaSTobias Hieta root_sha, 277b71edfaaSTobias Hieta ) 27881ee4952SGeorge Burgess IV 27981ee4952SGeorge Burgess IV all_reverts = [] 280*0a53f43cSGeorge Burgess IV # Lazily load PR <-> commit mappings, since it can be expensive. 281*0a53f43cSGeorge Burgess IV pr_commit_mappings = None 28281ee4952SGeorge Burgess IV for sha, commit_message in _log_stream(git_dir, root_sha, across_sha): 283*0a53f43cSGeorge Burgess IV reverts, pr_reverts = _try_parse_reverts_from_commit_message( 284*0a53f43cSGeorge Burgess IV commit_message, 285*0a53f43cSGeorge Burgess IV ) 286*0a53f43cSGeorge Burgess IV if pr_reverts: 287*0a53f43cSGeorge Burgess IV if pr_commit_mappings is None: 288*0a53f43cSGeorge Burgess IV logging.info( 289*0a53f43cSGeorge Burgess IV "Loading PR <-> commit mappings. This may take a moment..." 290*0a53f43cSGeorge Burgess IV ) 291*0a53f43cSGeorge Burgess IV pr_commit_mappings = _load_pr_commit_mappings( 292*0a53f43cSGeorge Burgess IV git_dir, root_sha, f"{across_sha}~{max_pr_lookback}" 293*0a53f43cSGeorge Burgess IV ) 294*0a53f43cSGeorge Burgess IV logging.info( 295*0a53f43cSGeorge Burgess IV "Loaded %d PR <-> commit mappings", len(pr_commit_mappings) 296*0a53f43cSGeorge Burgess IV ) 297*0a53f43cSGeorge Burgess IV 298*0a53f43cSGeorge Burgess IV for reverted_pr_number in pr_reverts: 299*0a53f43cSGeorge Burgess IV reverted_shas = pr_commit_mappings.get(reverted_pr_number) 300*0a53f43cSGeorge Burgess IV if not reverted_shas: 301*0a53f43cSGeorge Burgess IV logging.warning( 302*0a53f43cSGeorge Burgess IV "No SHAs for reverted PR %d (commit %s)", 303*0a53f43cSGeorge Burgess IV reverted_pr_number, 304*0a53f43cSGeorge Burgess IV sha, 305*0a53f43cSGeorge Burgess IV ) 306*0a53f43cSGeorge Burgess IV continue 307*0a53f43cSGeorge Burgess IV logging.debug( 308*0a53f43cSGeorge Burgess IV "Inferred SHAs %s for reverted PR %d (commit %s)", 309*0a53f43cSGeorge Burgess IV reverted_shas, 310*0a53f43cSGeorge Burgess IV reverted_pr_number, 311*0a53f43cSGeorge Burgess IV sha, 312*0a53f43cSGeorge Burgess IV ) 313*0a53f43cSGeorge Burgess IV reverts.extend(reverted_shas) 314*0a53f43cSGeorge Burgess IV 31581ee4952SGeorge Burgess IV if not reverts: 31681ee4952SGeorge Burgess IV continue 31781ee4952SGeorge Burgess IV 31881ee4952SGeorge Burgess IV resolved_reverts = sorted(set(_resolve_sha(git_dir, x) for x in reverts)) 31981ee4952SGeorge Burgess IV for reverted_sha in resolved_reverts: 32081ee4952SGeorge Burgess IV if reverted_sha in intermediate_commits: 321b71edfaaSTobias Hieta logging.debug( 322b71edfaaSTobias Hieta "Commit %s reverts %s, which happened after %s", 323b71edfaaSTobias Hieta sha, 324b71edfaaSTobias Hieta reverted_sha, 325b71edfaaSTobias Hieta across_sha, 326b71edfaaSTobias Hieta ) 32781ee4952SGeorge Burgess IV continue 32881ee4952SGeorge Burgess IV 32981ee4952SGeorge Burgess IV try: 33081ee4952SGeorge Burgess IV object_type = subprocess.check_output( 331b71edfaaSTobias Hieta ["git", "-C", git_dir, "cat-file", "-t", reverted_sha], 332b71edfaaSTobias Hieta encoding="utf-8", 33381ee4952SGeorge Burgess IV stderr=subprocess.DEVNULL, 33481ee4952SGeorge Burgess IV ).strip() 33581ee4952SGeorge Burgess IV except subprocess.CalledProcessError: 33681ee4952SGeorge Burgess IV logging.warning( 337b71edfaaSTobias Hieta "Failed to resolve reverted object %s (claimed to be reverted " 338b71edfaaSTobias Hieta "by sha %s)", 339b71edfaaSTobias Hieta reverted_sha, 340b71edfaaSTobias Hieta sha, 341b71edfaaSTobias Hieta ) 34281ee4952SGeorge Burgess IV continue 34381ee4952SGeorge Burgess IV 344b71edfaaSTobias Hieta if object_type == "commit": 34581ee4952SGeorge Burgess IV all_reverts.append(Revert(sha, reverted_sha)) 34681ee4952SGeorge Burgess IV continue 34781ee4952SGeorge Burgess IV 348b71edfaaSTobias Hieta logging.error( 349b71edfaaSTobias Hieta "%s claims to revert %s -- which isn't a commit -- %s", 350b71edfaaSTobias Hieta sha, 351b71edfaaSTobias Hieta object_type, 352b71edfaaSTobias Hieta reverted_sha, 353b71edfaaSTobias Hieta ) 35481ee4952SGeorge Burgess IV 355ce2a5fa7SGeorge Burgess IV # Since `all_reverts` contains reverts in log order (e.g., newer comes before 356ce2a5fa7SGeorge Burgess IV # older), we need to reverse this to keep with our guarantee of older = 357ce2a5fa7SGeorge Burgess IV # earlier in the result. 358ce2a5fa7SGeorge Burgess IV all_reverts.reverse() 35981ee4952SGeorge Burgess IV return all_reverts 36081ee4952SGeorge Burgess IV 36181ee4952SGeorge Burgess IV 36281ee4952SGeorge Burgess IVdef _main() -> None: 36381ee4952SGeorge Burgess IV parser = argparse.ArgumentParser( 364b71edfaaSTobias Hieta description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter 365b71edfaaSTobias Hieta ) 366b71edfaaSTobias Hieta parser.add_argument("base_ref", help="Git ref or sha to check for reverts around.") 367b71edfaaSTobias Hieta parser.add_argument("-C", "--git_dir", default=".", help="Git directory to use.") 368b71edfaaSTobias Hieta parser.add_argument("root", nargs="+", help="Root(s) to search for commits from.") 369b71edfaaSTobias Hieta parser.add_argument("--debug", action="store_true") 37081ee4952SGeorge Burgess IV parser.add_argument( 371b71edfaaSTobias Hieta "-u", 372b71edfaaSTobias Hieta "--review_url", 373b71edfaaSTobias Hieta action="store_true", 374b71edfaaSTobias Hieta help="Format SHAs as llvm review URLs", 375b71edfaaSTobias Hieta ) 37681ee4952SGeorge Burgess IV opts = parser.parse_args() 37781ee4952SGeorge Burgess IV 37881ee4952SGeorge Burgess IV logging.basicConfig( 379b71edfaaSTobias Hieta format="%(asctime)s: %(levelname)s: %(filename)s:%(lineno)d: %(message)s", 38081ee4952SGeorge Burgess IV level=logging.DEBUG if opts.debug else logging.INFO, 38181ee4952SGeorge Burgess IV ) 38281ee4952SGeorge Burgess IV 38381ee4952SGeorge Burgess IV # `root`s can have related history, so we want to filter duplicate commits 38481ee4952SGeorge Burgess IV # out. The overwhelmingly common case is also to have one root, and it's way 38581ee4952SGeorge Burgess IV # easier to reason about output that comes in an order that's meaningful to 38681ee4952SGeorge Burgess IV # git. 38781ee4952SGeorge Burgess IV seen_reverts = set() 38881ee4952SGeorge Burgess IV all_reverts = [] 38981ee4952SGeorge Burgess IV for root in opts.root: 39081ee4952SGeorge Burgess IV for revert in find_reverts(opts.git_dir, opts.base_ref, root): 39181ee4952SGeorge Burgess IV if revert not in seen_reverts: 39281ee4952SGeorge Burgess IV seen_reverts.add(revert) 39381ee4952SGeorge Burgess IV all_reverts.append(revert) 39481ee4952SGeorge Burgess IV 3959def85f9Singlorion sha_prefix = ( 3969def85f9Singlorion "https://github.com/llvm/llvm-project/commit/" if opts.review_url else "" 3979def85f9Singlorion ) 39881ee4952SGeorge Burgess IV for revert in all_reverts: 3999def85f9Singlorion sha_fmt = f"{sha_prefix}{revert.sha}" 4009def85f9Singlorion reverted_sha_fmt = f"{sha_prefix}{revert.reverted_sha}" 401b71edfaaSTobias Hieta print(f"{sha_fmt} claims to revert {reverted_sha_fmt}") 40281ee4952SGeorge Burgess IV 40381ee4952SGeorge Burgess IV 404b71edfaaSTobias Hietaif __name__ == "__main__": 40581ee4952SGeorge Burgess IV _main() 406