1#!/usr/bin/env python3 2# 3# ====- code-format-helper, runs code formatters from the ci or in a hook --*- python -*--==# 4# 5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 6# See https://llvm.org/LICENSE.txt for license information. 7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 8# 9# ==--------------------------------------------------------------------------------------==# 10 11import argparse 12import os 13import re 14import shlex 15import subprocess 16import sys 17from typing import List, Optional 18 19""" 20This script is run by GitHub actions to ensure that the code in PR's conform to 21the coding style of LLVM. It can also be installed as a pre-commit git hook to 22check the coding style before submitting it. The canonical source of this script 23is in the LLVM source tree under llvm/utils/git. 24 25For C/C++ code it uses clang-format and for Python code it uses darker (which 26in turn invokes black). 27 28You can learn more about the LLVM coding style on llvm.org: 29https://llvm.org/docs/CodingStandards.html 30 31You can install this script as a git hook by symlinking it to the .git/hooks 32directory: 33 34ln -s $(pwd)/llvm/utils/git/code-format-helper.py .git/hooks/pre-commit 35 36You can control the exact path to clang-format or darker with the following 37environment variables: $CLANG_FORMAT_PATH and $DARKER_FORMAT_PATH. 38""" 39 40 41class FormatArgs: 42 start_rev: str = None 43 end_rev: str = None 44 repo: str = None 45 changed_files: List[str] = [] 46 token: str = None 47 verbose: bool = True 48 issue_number: int = 0 49 write_comment_to_file: bool = False 50 51 def __init__(self, args: argparse.Namespace = None) -> None: 52 if not args is None: 53 self.start_rev = args.start_rev 54 self.end_rev = args.end_rev 55 self.repo = args.repo 56 self.token = args.token 57 self.changed_files = args.changed_files 58 self.issue_number = args.issue_number 59 self.write_comment_to_file = args.write_comment_to_file 60 61 62class FormatHelper: 63 COMMENT_TAG = "<!--LLVM CODE FORMAT COMMENT: {fmt}-->" 64 name: str 65 friendly_name: str 66 comment: dict = None 67 68 @property 69 def comment_tag(self) -> str: 70 return self.COMMENT_TAG.replace("fmt", self.name) 71 72 @property 73 def instructions(self) -> str: 74 raise NotImplementedError() 75 76 def has_tool(self) -> bool: 77 raise NotImplementedError() 78 79 def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str]: 80 raise NotImplementedError() 81 82 def pr_comment_text_for_diff(self, diff: str) -> str: 83 return f""" 84:warning: {self.friendly_name}, {self.name} found issues in your code. :warning: 85 86<details> 87<summary> 88You can test this locally with the following command: 89</summary> 90 91``````````bash 92{self.instructions} 93`````````` 94 95</details> 96 97<details> 98<summary> 99View the diff from {self.name} here. 100</summary> 101 102``````````diff 103{diff} 104`````````` 105 106</details> 107""" 108 109 # TODO: any type should be replaced with the correct github type, but it requires refactoring to 110 # not require the github module to be installed everywhere. 111 def find_comment(self, pr: any) -> any: 112 for comment in pr.as_issue().get_comments(): 113 if self.comment_tag in comment.body: 114 return comment 115 return None 116 117 def update_pr(self, comment_text: str, args: FormatArgs, create_new: bool) -> None: 118 import github 119 from github import IssueComment, PullRequest 120 121 repo = github.Github(args.token).get_repo(args.repo) 122 pr = repo.get_issue(args.issue_number).as_pull_request() 123 124 comment_text = self.comment_tag + "\n\n" + comment_text 125 126 existing_comment = self.find_comment(pr) 127 128 if args.write_comment_to_file: 129 if create_new or existing_comment: 130 self.comment = {"body": comment_text} 131 if existing_comment: 132 self.comment["id"] = existing_comment.id 133 return 134 135 if existing_comment: 136 existing_comment.edit(comment_text) 137 elif create_new: 138 pr.as_issue().create_comment(comment_text) 139 140 def run(self, changed_files: List[str], args: FormatArgs) -> bool: 141 changed_files = [arg for arg in changed_files if "third-party" not in arg] 142 diff = self.format_run(changed_files, args) 143 should_update_gh = args.token is not None and args.repo is not None 144 145 if diff is None: 146 if should_update_gh: 147 comment_text = ( 148 ":white_check_mark: With the latest revision " 149 f"this PR passed the {self.friendly_name}." 150 ) 151 self.update_pr(comment_text, args, create_new=False) 152 return True 153 elif len(diff) > 0: 154 if should_update_gh: 155 comment_text = self.pr_comment_text_for_diff(diff) 156 self.update_pr(comment_text, args, create_new=True) 157 else: 158 print( 159 f"Warning: {self.friendly_name}, {self.name} detected " 160 "some issues with your code formatting..." 161 ) 162 return False 163 else: 164 # The formatter failed but didn't output a diff (e.g. some sort of 165 # infrastructure failure). 166 comment_text = ( 167 f":warning: The {self.friendly_name} failed without printing " 168 "a diff. Check the logs for stderr output. :warning:" 169 ) 170 self.update_pr(comment_text, args, create_new=False) 171 return False 172 173 174class ClangFormatHelper(FormatHelper): 175 name = "clang-format" 176 friendly_name = "C/C++ code formatter" 177 178 @property 179 def instructions(self) -> str: 180 return " ".join(self.cf_cmd) 181 182 def should_include_extensionless_file(self, path: str) -> bool: 183 return path.startswith("libcxx/include") 184 185 def filter_changed_files(self, changed_files: List[str]) -> List[str]: 186 filtered_files = [] 187 for path in changed_files: 188 _, ext = os.path.splitext(path) 189 if ext in (".cpp", ".c", ".h", ".hpp", ".hxx", ".cxx", ".inc", ".cppm"): 190 filtered_files.append(path) 191 elif ext == "" and self.should_include_extensionless_file(path): 192 filtered_files.append(path) 193 return filtered_files 194 195 @property 196 def clang_fmt_path(self) -> str: 197 if "CLANG_FORMAT_PATH" in os.environ: 198 return os.environ["CLANG_FORMAT_PATH"] 199 return "git-clang-format" 200 201 def has_tool(self) -> bool: 202 cmd = [self.clang_fmt_path, "-h"] 203 proc = None 204 try: 205 proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 206 except: 207 return False 208 return proc.returncode == 0 209 210 def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str]: 211 cpp_files = self.filter_changed_files(changed_files) 212 if not cpp_files: 213 return None 214 215 cf_cmd = [self.clang_fmt_path, "--diff"] 216 217 if args.start_rev and args.end_rev: 218 cf_cmd.append(args.start_rev) 219 cf_cmd.append(args.end_rev) 220 221 # Gather the extension of all modified files and pass them explicitly to git-clang-format. 222 # This prevents git-clang-format from applying its own filtering rules on top of ours. 223 extensions = set() 224 for file in cpp_files: 225 _, ext = os.path.splitext(file) 226 extensions.add( 227 ext.strip(".") 228 ) # Exclude periods since git-clang-format takes extensions without them 229 cf_cmd.append("--extensions") 230 cf_cmd.append(",".join(extensions)) 231 232 cf_cmd.append("--") 233 cf_cmd += cpp_files 234 235 if args.verbose: 236 print(f"Running: {' '.join(cf_cmd)}") 237 self.cf_cmd = cf_cmd 238 proc = subprocess.run(cf_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 239 sys.stdout.write(proc.stderr.decode("utf-8")) 240 241 if proc.returncode != 0: 242 # formatting needed, or the command otherwise failed 243 if args.verbose: 244 print(f"error: {self.name} exited with code {proc.returncode}") 245 # Print the diff in the log so that it is viewable there 246 print(proc.stdout.decode("utf-8")) 247 return proc.stdout.decode("utf-8") 248 else: 249 return None 250 251 252class DarkerFormatHelper(FormatHelper): 253 name = "darker" 254 friendly_name = "Python code formatter" 255 256 @property 257 def instructions(self) -> str: 258 return " ".join(self.darker_cmd) 259 260 def filter_changed_files(self, changed_files: List[str]) -> List[str]: 261 filtered_files = [] 262 for path in changed_files: 263 name, ext = os.path.splitext(path) 264 if ext == ".py": 265 filtered_files.append(path) 266 267 return filtered_files 268 269 @property 270 def darker_fmt_path(self) -> str: 271 if "DARKER_FORMAT_PATH" in os.environ: 272 return os.environ["DARKER_FORMAT_PATH"] 273 return "darker" 274 275 def has_tool(self) -> bool: 276 cmd = [self.darker_fmt_path, "--version"] 277 proc = None 278 try: 279 proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 280 except: 281 return False 282 return proc.returncode == 0 283 284 def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str]: 285 py_files = self.filter_changed_files(changed_files) 286 if not py_files: 287 return None 288 darker_cmd = [ 289 self.darker_fmt_path, 290 "--check", 291 "--diff", 292 ] 293 if args.start_rev and args.end_rev: 294 darker_cmd += ["-r", f"{args.start_rev}...{args.end_rev}"] 295 darker_cmd += py_files 296 if args.verbose: 297 print(f"Running: {' '.join(darker_cmd)}") 298 self.darker_cmd = darker_cmd 299 proc = subprocess.run( 300 darker_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE 301 ) 302 if args.verbose: 303 sys.stdout.write(proc.stderr.decode("utf-8")) 304 305 if proc.returncode != 0: 306 # formatting needed, or the command otherwise failed 307 if args.verbose: 308 print(f"error: {self.name} exited with code {proc.returncode}") 309 # Print the diff in the log so that it is viewable there 310 print(proc.stdout.decode("utf-8")) 311 return proc.stdout.decode("utf-8") 312 else: 313 sys.stdout.write(proc.stdout.decode("utf-8")) 314 return None 315 316 317class UndefGetFormatHelper(FormatHelper): 318 name = "undef deprecator" 319 friendly_name = "undef deprecator" 320 321 @property 322 def instructions(self) -> str: 323 return " ".join(shlex.quote(c) for c in self.cmd) 324 325 def filter_changed_files(self, changed_files: List[str]) -> List[str]: 326 filtered_files = [] 327 for path in changed_files: 328 _, ext = os.path.splitext(path) 329 if ext in (".cpp", ".c", ".h", ".hpp", ".hxx", ".cxx", ".inc", ".cppm", ".ll"): 330 filtered_files.append(path) 331 return filtered_files 332 333 def has_tool(self) -> bool: 334 return True 335 336 def pr_comment_text_for_diff(self, diff: str) -> str: 337 return f""" 338:warning: {self.name} found issues in your code. :warning: 339 340<details> 341<summary> 342You can test this locally with the following command: 343</summary> 344 345``````````bash 346{self.instructions} 347`````````` 348 349</details> 350 351{diff} 352""" 353 354 def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str]: 355 files = self.filter_changed_files(changed_files) 356 357 # Use git to find files that have had a change in the number of undefs 358 regex = "([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)" 359 cmd = ["git", "diff", "-U0", "--pickaxe-regex", "-S", regex] 360 361 if args.start_rev and args.end_rev: 362 cmd.append(args.start_rev) 363 cmd.append(args.end_rev) 364 365 cmd += files 366 self.cmd = cmd 367 368 if args.verbose: 369 print(f"Running: {self.instructions}") 370 371 proc = subprocess.run( 372 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8" 373 ) 374 sys.stdout.write(proc.stderr) 375 stdout = proc.stdout 376 377 files = [] 378 # Split the diff so we have one array entry per file. 379 # Each file is prefixed like: 380 # diff --git a/file b/file 381 for file in re.split("^diff --git ", stdout, 0, re.MULTILINE): 382 # We skip checking in MIR files as undef is a valid token and not 383 # going away. 384 if file.endswith(".mir"): 385 continue 386 # search for additions of undef 387 if re.search(r"^[+](?!\s*#\s*).*(\bundef\b|UndefValue::get)", file, re.MULTILINE): 388 files.append(re.match("a/([^ ]+)", file.splitlines()[0])[1]) 389 390 if not files: 391 return None 392 393 files = "\n".join(" - " + f for f in files) 394 report = f""" 395The following files introduce new uses of undef: 396{files} 397 398[Undef](https://llvm.org/docs/LangRef.html#undefined-values) is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields `undef`. You should use `poison` values for placeholders instead. 399 400In tests, avoid using `undef` and having tests that trigger undefined behavior. If you need an operand with some unimportant value, you can add a new argument to the function and use that instead. 401 402For example, this is considered a bad practice: 403```llvm 404define void @fn() {{ 405 ... 406 br i1 undef, ... 407}} 408``` 409 410Please use the following instead: 411```llvm 412define void @fn(i1 %cond) {{ 413 ... 414 br i1 %cond, ... 415}} 416``` 417 418Please refer to the [Undefined Behavior Manual](https://llvm.org/docs/UndefinedBehavior.html) for more information. 419""" 420 if args.verbose: 421 print(f"error: {self.name} failed") 422 print(report) 423 return report 424 425 426ALL_FORMATTERS = (DarkerFormatHelper(), ClangFormatHelper(), UndefGetFormatHelper()) 427 428 429def hook_main(): 430 # fill out args 431 args = FormatArgs() 432 args.verbose = os.getenv("FORMAT_HOOK_VERBOSE", False) 433 434 # find the changed files 435 cmd = ["git", "diff", "--cached", "--name-only", "--diff-filter=d"] 436 proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 437 output = proc.stdout.decode("utf-8") 438 for line in output.splitlines(): 439 args.changed_files.append(line) 440 441 failed_fmts = [] 442 for fmt in ALL_FORMATTERS: 443 if fmt.has_tool(): 444 if not fmt.run(args.changed_files, args): 445 failed_fmts.append(fmt.name) 446 if fmt.comment: 447 comments.append(fmt.comment) 448 else: 449 print(f"Couldn't find {fmt.name}, can't check " + fmt.friendly_name.lower()) 450 451 if len(failed_fmts) > 0: 452 print( 453 "Pre-commit format hook failed, rerun with FORMAT_HOOK_VERBOSE=1 environment for verbose output" 454 ) 455 sys.exit(1) 456 457 sys.exit(0) 458 459 460if __name__ == "__main__": 461 script_path = os.path.abspath(__file__) 462 if ".git/hooks" in script_path: 463 hook_main() 464 sys.exit(0) 465 466 parser = argparse.ArgumentParser() 467 parser.add_argument( 468 "--token", type=str, required=True, help="GitHub authentiation token" 469 ) 470 parser.add_argument( 471 "--repo", 472 type=str, 473 default=os.getenv("GITHUB_REPOSITORY", "llvm/llvm-project"), 474 help="The GitHub repository that we are working with in the form of <owner>/<repo> (e.g. llvm/llvm-project)", 475 ) 476 parser.add_argument("--issue-number", type=int, required=True) 477 parser.add_argument( 478 "--start-rev", 479 type=str, 480 required=True, 481 help="Compute changes from this revision.", 482 ) 483 parser.add_argument( 484 "--end-rev", type=str, required=True, help="Compute changes to this revision" 485 ) 486 parser.add_argument( 487 "--changed-files", 488 type=str, 489 help="Comma separated list of files that has been changed", 490 ) 491 parser.add_argument( 492 "--write-comment-to-file", 493 action="store_true", 494 help="Don't post comments on the PR, instead write the comments and metadata a file called 'comment'", 495 ) 496 497 args = FormatArgs(parser.parse_args()) 498 499 changed_files = [] 500 if args.changed_files: 501 changed_files = args.changed_files.split(",") 502 503 failed_formatters = [] 504 comments = [] 505 for fmt in ALL_FORMATTERS: 506 if not fmt.run(changed_files, args): 507 failed_formatters.append(fmt.name) 508 if fmt.comment: 509 comments.append(fmt.comment) 510 511 if len(comments): 512 with open("comments", "w") as f: 513 import json 514 515 json.dump(comments, f) 516 517 if len(failed_formatters) > 0: 518 print(f"error: some formatters failed: {' '.join(failed_formatters)}") 519 sys.exit(1) 520