1#!/usr/bin/env python 2# 3#===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===# 4# 5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 6# See https://llvm.org/LICENSE.txt for license information. 7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 8# 9#===------------------------------------------------------------------------===# 10 11r""" 12clang-format git integration 13============================ 14 15This file provides a clang-format integration for git. Put it somewhere in your 16path and ensure that it is executable. Then, "git clang-format" will invoke 17clang-format on the changes in current files or a specific commit. 18 19For further details, run: 20git clang-format -h 21 22Requires Python 2.7 or Python 3 23""" 24 25from __future__ import absolute_import, division, print_function 26import argparse 27import collections 28import contextlib 29import errno 30import os 31import re 32import subprocess 33import sys 34 35usage = 'git clang-format [OPTIONS] [<commit>] [<commit>] [--] [<file>...]' 36 37desc = ''' 38If zero or one commits are given, run clang-format on all lines that differ 39between the working directory and <commit>, which defaults to HEAD. Changes are 40only applied to the working directory. 41 42If two commits are given (requires --diff), run clang-format on all lines in the 43second <commit> that differ from the first <commit>. 44 45The following git-config settings set the default of the corresponding option: 46 clangFormat.binary 47 clangFormat.commit 48 clangFormat.extensions 49 clangFormat.style 50''' 51 52# Name of the temporary index file in which save the output of clang-format. 53# This file is created within the .git directory. 54temp_index_basename = 'clang-format-index' 55 56 57Range = collections.namedtuple('Range', 'start, count') 58 59 60def main(): 61 config = load_git_config() 62 63 # In order to keep '--' yet allow options after positionals, we need to 64 # check for '--' ourselves. (Setting nargs='*' throws away the '--', while 65 # nargs=argparse.REMAINDER disallows options after positionals.) 66 argv = sys.argv[1:] 67 try: 68 idx = argv.index('--') 69 except ValueError: 70 dash_dash = [] 71 else: 72 dash_dash = argv[idx:] 73 argv = argv[:idx] 74 75 default_extensions = ','.join([ 76 # From clang/lib/Frontend/FrontendOptions.cpp, all lower case 77 'c', 'h', # C 78 'm', # ObjC 79 'mm', # ObjC++ 80 'cc', 'cp', 'cpp', 'c++', 'cxx', 'hh', 'hpp', 'hxx', # C++ 81 'cu', 'cuh', # CUDA 82 # Other languages that clang-format supports 83 'proto', 'protodevel', # Protocol Buffers 84 'java', # Java 85 'js', # JavaScript 86 'ts', # TypeScript 87 'cs', # C Sharp 88 ]) 89 90 p = argparse.ArgumentParser( 91 usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter, 92 description=desc) 93 p.add_argument('--binary', 94 default=config.get('clangformat.binary', 'clang-format'), 95 help='path to clang-format'), 96 p.add_argument('--commit', 97 default=config.get('clangformat.commit', 'HEAD'), 98 help='default commit to use if none is specified'), 99 p.add_argument('--diff', action='store_true', 100 help='print a diff instead of applying the changes') 101 p.add_argument('--extensions', 102 default=config.get('clangformat.extensions', 103 default_extensions), 104 help=('comma-separated list of file extensions to format, ' 105 'excluding the period and case-insensitive')), 106 p.add_argument('-f', '--force', action='store_true', 107 help='allow changes to unstaged files') 108 p.add_argument('-p', '--patch', action='store_true', 109 help='select hunks interactively') 110 p.add_argument('-q', '--quiet', action='count', default=0, 111 help='print less information') 112 p.add_argument('--style', 113 default=config.get('clangformat.style', None), 114 help='passed to clang-format'), 115 p.add_argument('-v', '--verbose', action='count', default=0, 116 help='print extra information') 117 # We gather all the remaining positional arguments into 'args' since we need 118 # to use some heuristics to determine whether or not <commit> was present. 119 # However, to print pretty messages, we make use of metavar and help. 120 p.add_argument('args', nargs='*', metavar='<commit>', 121 help='revision from which to compute the diff') 122 p.add_argument('ignored', nargs='*', metavar='<file>...', 123 help='if specified, only consider differences in these files') 124 opts = p.parse_args(argv) 125 126 opts.verbose -= opts.quiet 127 del opts.quiet 128 129 commits, files = interpret_args(opts.args, dash_dash, opts.commit) 130 if len(commits) > 1: 131 if not opts.diff: 132 die('--diff is required when two commits are given') 133 else: 134 if len(commits) > 2: 135 die('at most two commits allowed; %d given' % len(commits)) 136 changed_lines = compute_diff_and_extract_lines(commits, files) 137 if opts.verbose >= 1: 138 ignored_files = set(changed_lines) 139 filter_by_extension(changed_lines, opts.extensions.lower().split(',')) 140 # The computed diff outputs absolute paths, so we must cd before accessing 141 # those files. 142 cd_to_toplevel() 143 filter_symlinks(changed_lines) 144 if opts.verbose >= 1: 145 ignored_files.difference_update(changed_lines) 146 if ignored_files: 147 print( 148 'Ignoring changes in the following files (wrong extension or symlink):') 149 for filename in ignored_files: 150 print(' %s' % filename) 151 if changed_lines: 152 print('Running clang-format on the following files:') 153 for filename in changed_lines: 154 print(' %s' % filename) 155 if not changed_lines: 156 if opts.verbose >= 0: 157 print('no modified files to format') 158 return 159 if len(commits) > 1: 160 old_tree = commits[1] 161 new_tree = run_clang_format_and_save_to_tree(changed_lines, 162 revision=commits[1], 163 binary=opts.binary, 164 style=opts.style) 165 else: 166 old_tree = create_tree_from_workdir(changed_lines) 167 new_tree = run_clang_format_and_save_to_tree(changed_lines, 168 binary=opts.binary, 169 style=opts.style) 170 if opts.verbose >= 1: 171 print('old tree: %s' % old_tree) 172 print('new tree: %s' % new_tree) 173 if old_tree == new_tree: 174 if opts.verbose >= 0: 175 print('clang-format did not modify any files') 176 elif opts.diff: 177 print_diff(old_tree, new_tree) 178 else: 179 changed_files = apply_changes(old_tree, new_tree, force=opts.force, 180 patch_mode=opts.patch) 181 if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1: 182 print('changed files:') 183 for filename in changed_files: 184 print(' %s' % filename) 185 186 187def load_git_config(non_string_options=None): 188 """Return the git configuration as a dictionary. 189 190 All options are assumed to be strings unless in `non_string_options`, in which 191 is a dictionary mapping option name (in lower case) to either "--bool" or 192 "--int".""" 193 if non_string_options is None: 194 non_string_options = {} 195 out = {} 196 for entry in run('git', 'config', '--list', '--null').split('\0'): 197 if entry: 198 if '\n' in entry: 199 name, value = entry.split('\n', 1) 200 else: 201 # A setting with no '=' ('\n' with --null) is implicitly 'true' 202 name = entry 203 value = 'true' 204 if name in non_string_options: 205 value = run('git', 'config', non_string_options[name], name) 206 out[name] = value 207 return out 208 209 210def interpret_args(args, dash_dash, default_commit): 211 """Interpret `args` as "[commits] [--] [files]" and return (commits, files). 212 213 It is assumed that "--" and everything that follows has been removed from 214 args and placed in `dash_dash`. 215 216 If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its 217 left (if present) are taken as commits. Otherwise, the arguments are checked 218 from left to right if they are commits or files. If commits are not given, 219 a list with `default_commit` is used.""" 220 if dash_dash: 221 if len(args) == 0: 222 commits = [default_commit] 223 else: 224 commits = args 225 for commit in commits: 226 object_type = get_object_type(commit) 227 if object_type not in ('commit', 'tag'): 228 if object_type is None: 229 die("'%s' is not a commit" % commit) 230 else: 231 die("'%s' is a %s, but a commit was expected" % (commit, object_type)) 232 files = dash_dash[1:] 233 elif args: 234 commits = [] 235 while args: 236 if not disambiguate_revision(args[0]): 237 break 238 commits.append(args.pop(0)) 239 if not commits: 240 commits = [default_commit] 241 files = args 242 else: 243 commits = [default_commit] 244 files = [] 245 return commits, files 246 247 248def disambiguate_revision(value): 249 """Returns True if `value` is a revision, False if it is a file, or dies.""" 250 # If `value` is ambiguous (neither a commit nor a file), the following 251 # command will die with an appropriate error message. 252 run('git', 'rev-parse', value, verbose=False) 253 object_type = get_object_type(value) 254 if object_type is None: 255 return False 256 if object_type in ('commit', 'tag'): 257 return True 258 die('`%s` is a %s, but a commit or filename was expected' % 259 (value, object_type)) 260 261 262def get_object_type(value): 263 """Returns a string description of an object's type, or None if it is not 264 a valid git object.""" 265 cmd = ['git', 'cat-file', '-t', value] 266 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 267 stdout, stderr = p.communicate() 268 if p.returncode != 0: 269 return None 270 return convert_string(stdout.strip()) 271 272 273def compute_diff_and_extract_lines(commits, files): 274 """Calls compute_diff() followed by extract_lines().""" 275 diff_process = compute_diff(commits, files) 276 changed_lines = extract_lines(diff_process.stdout) 277 diff_process.stdout.close() 278 diff_process.wait() 279 if diff_process.returncode != 0: 280 # Assume error was already printed to stderr. 281 sys.exit(2) 282 return changed_lines 283 284 285def compute_diff(commits, files): 286 """Return a subprocess object producing the diff from `commits`. 287 288 The return value's `stdin` file object will produce a patch with the 289 differences between the working directory and the first commit if a single 290 one was specified, or the difference between both specified commits, filtered 291 on `files` (if non-empty). Zero context lines are used in the patch.""" 292 git_tool = 'diff-index' 293 if len(commits) > 1: 294 git_tool = 'diff-tree' 295 cmd = ['git', git_tool, '-p', '-U0'] + commits + ['--'] 296 cmd.extend(files) 297 p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) 298 p.stdin.close() 299 return p 300 301 302def extract_lines(patch_file): 303 """Extract the changed lines in `patch_file`. 304 305 The return value is a dictionary mapping filename to a list of (start_line, 306 line_count) pairs. 307 308 The input must have been produced with ``-U0``, meaning unidiff format with 309 zero lines of context. The return value is a dict mapping filename to a 310 list of line `Range`s.""" 311 matches = {} 312 for line in patch_file: 313 line = convert_string(line) 314 match = re.search(r'^\+\+\+\ [^/]+/(.*)', line) 315 if match: 316 filename = match.group(1).rstrip('\r\n') 317 match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line) 318 if match: 319 start_line = int(match.group(1)) 320 line_count = 1 321 if match.group(3): 322 line_count = int(match.group(3)) 323 if line_count > 0: 324 matches.setdefault(filename, []).append(Range(start_line, line_count)) 325 return matches 326 327 328def filter_by_extension(dictionary, allowed_extensions): 329 """Delete every key in `dictionary` that doesn't have an allowed extension. 330 331 `allowed_extensions` must be a collection of lowercase file extensions, 332 excluding the period.""" 333 allowed_extensions = frozenset(allowed_extensions) 334 for filename in list(dictionary.keys()): 335 base_ext = filename.rsplit('.', 1) 336 if len(base_ext) == 1 and '' in allowed_extensions: 337 continue 338 if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions: 339 del dictionary[filename] 340 341 342def filter_symlinks(dictionary): 343 """Delete every key in `dictionary` that is a symlink.""" 344 for filename in list(dictionary.keys()): 345 if os.path.islink(filename): 346 del dictionary[filename] 347 348 349def cd_to_toplevel(): 350 """Change to the top level of the git repository.""" 351 toplevel = run('git', 'rev-parse', '--show-toplevel') 352 os.chdir(toplevel) 353 354 355def create_tree_from_workdir(filenames): 356 """Create a new git tree with the given files from the working directory. 357 358 Returns the object ID (SHA-1) of the created tree.""" 359 return create_tree(filenames, '--stdin') 360 361 362def run_clang_format_and_save_to_tree(changed_lines, revision=None, 363 binary='clang-format', style=None): 364 """Run clang-format on each file and save the result to a git tree. 365 366 Returns the object ID (SHA-1) of the created tree.""" 367 def iteritems(container): 368 try: 369 return container.iteritems() # Python 2 370 except AttributeError: 371 return container.items() # Python 3 372 def index_info_generator(): 373 for filename, line_ranges in iteritems(changed_lines): 374 if revision: 375 git_metadata_cmd = ['git', 'ls-tree', 376 '%s:%s' % (revision, os.path.dirname(filename)), 377 os.path.basename(filename)] 378 git_metadata = subprocess.Popen(git_metadata_cmd, stdin=subprocess.PIPE, 379 stdout=subprocess.PIPE) 380 stdout = git_metadata.communicate()[0] 381 mode = oct(int(stdout.split()[0], 8)) 382 else: 383 mode = oct(os.stat(filename).st_mode) 384 # Adjust python3 octal format so that it matches what git expects 385 if mode.startswith('0o'): 386 mode = '0' + mode[2:] 387 blob_id = clang_format_to_blob(filename, line_ranges, 388 revision=revision, 389 binary=binary, 390 style=style) 391 yield '%s %s\t%s' % (mode, blob_id, filename) 392 return create_tree(index_info_generator(), '--index-info') 393 394 395def create_tree(input_lines, mode): 396 """Create a tree object from the given input. 397 398 If mode is '--stdin', it must be a list of filenames. If mode is 399 '--index-info' is must be a list of values suitable for "git update-index 400 --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>". Any other mode 401 is invalid.""" 402 assert mode in ('--stdin', '--index-info') 403 cmd = ['git', 'update-index', '--add', '-z', mode] 404 with temporary_index_file(): 405 p = subprocess.Popen(cmd, stdin=subprocess.PIPE) 406 for line in input_lines: 407 p.stdin.write(to_bytes('%s\0' % line)) 408 p.stdin.close() 409 if p.wait() != 0: 410 die('`%s` failed' % ' '.join(cmd)) 411 tree_id = run('git', 'write-tree') 412 return tree_id 413 414 415def clang_format_to_blob(filename, line_ranges, revision=None, 416 binary='clang-format', style=None): 417 """Run clang-format on the given file and save the result to a git blob. 418 419 Runs on the file in `revision` if not None, or on the file in the working 420 directory if `revision` is None. 421 422 Returns the object ID (SHA-1) of the created blob.""" 423 clang_format_cmd = [binary] 424 if style: 425 clang_format_cmd.extend(['-style='+style]) 426 clang_format_cmd.extend([ 427 '-lines=%s:%s' % (start_line, start_line+line_count-1) 428 for start_line, line_count in line_ranges]) 429 if revision: 430 clang_format_cmd.extend(['-assume-filename='+filename]) 431 git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)] 432 git_show = subprocess.Popen(git_show_cmd, stdin=subprocess.PIPE, 433 stdout=subprocess.PIPE) 434 git_show.stdin.close() 435 clang_format_stdin = git_show.stdout 436 else: 437 clang_format_cmd.extend([filename]) 438 git_show = None 439 clang_format_stdin = subprocess.PIPE 440 try: 441 clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin, 442 stdout=subprocess.PIPE) 443 if clang_format_stdin == subprocess.PIPE: 444 clang_format_stdin = clang_format.stdin 445 except OSError as e: 446 if e.errno == errno.ENOENT: 447 die('cannot find executable "%s"' % binary) 448 else: 449 raise 450 clang_format_stdin.close() 451 hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin'] 452 hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout, 453 stdout=subprocess.PIPE) 454 clang_format.stdout.close() 455 stdout = hash_object.communicate()[0] 456 if hash_object.returncode != 0: 457 die('`%s` failed' % ' '.join(hash_object_cmd)) 458 if clang_format.wait() != 0: 459 die('`%s` failed' % ' '.join(clang_format_cmd)) 460 if git_show and git_show.wait() != 0: 461 die('`%s` failed' % ' '.join(git_show_cmd)) 462 return convert_string(stdout).rstrip('\r\n') 463 464 465@contextlib.contextmanager 466def temporary_index_file(tree=None): 467 """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting 468 the file afterward.""" 469 index_path = create_temporary_index(tree) 470 old_index_path = os.environ.get('GIT_INDEX_FILE') 471 os.environ['GIT_INDEX_FILE'] = index_path 472 try: 473 yield 474 finally: 475 if old_index_path is None: 476 del os.environ['GIT_INDEX_FILE'] 477 else: 478 os.environ['GIT_INDEX_FILE'] = old_index_path 479 os.remove(index_path) 480 481 482def create_temporary_index(tree=None): 483 """Create a temporary index file and return the created file's path. 484 485 If `tree` is not None, use that as the tree to read in. Otherwise, an 486 empty index is created.""" 487 gitdir = run('git', 'rev-parse', '--git-dir') 488 path = os.path.join(gitdir, temp_index_basename) 489 if tree is None: 490 tree = '--empty' 491 run('git', 'read-tree', '--index-output='+path, tree) 492 return path 493 494 495def print_diff(old_tree, new_tree): 496 """Print the diff between the two trees to stdout.""" 497 # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output 498 # is expected to be viewed by the user, and only the former does nice things 499 # like color and pagination. 500 # 501 # We also only print modified files since `new_tree` only contains the files 502 # that were modified, so unmodified files would show as deleted without the 503 # filter. 504 subprocess.check_call(['git', 'diff', '--diff-filter=M', old_tree, new_tree, 505 '--']) 506 507 508def apply_changes(old_tree, new_tree, force=False, patch_mode=False): 509 """Apply the changes in `new_tree` to the working directory. 510 511 Bails if there are local changes in those files and not `force`. If 512 `patch_mode`, runs `git checkout --patch` to select hunks interactively.""" 513 changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z', 514 '--name-only', old_tree, 515 new_tree).rstrip('\0').split('\0') 516 if not force: 517 unstaged_files = run('git', 'diff-files', '--name-status', *changed_files) 518 if unstaged_files: 519 print('The following files would be modified but ' 520 'have unstaged changes:', file=sys.stderr) 521 print(unstaged_files, file=sys.stderr) 522 print('Please commit, stage, or stash them first.', file=sys.stderr) 523 sys.exit(2) 524 if patch_mode: 525 # In patch mode, we could just as well create an index from the new tree 526 # and checkout from that, but then the user will be presented with a 527 # message saying "Discard ... from worktree". Instead, we use the old 528 # tree as the index and checkout from new_tree, which gives the slightly 529 # better message, "Apply ... to index and worktree". This is not quite 530 # right, since it won't be applied to the user's index, but oh well. 531 with temporary_index_file(old_tree): 532 subprocess.check_call(['git', 'checkout', '--patch', new_tree]) 533 index_tree = old_tree 534 else: 535 with temporary_index_file(new_tree): 536 run('git', 'checkout-index', '-a', '-f') 537 return changed_files 538 539 540def run(*args, **kwargs): 541 stdin = kwargs.pop('stdin', '') 542 verbose = kwargs.pop('verbose', True) 543 strip = kwargs.pop('strip', True) 544 for name in kwargs: 545 raise TypeError("run() got an unexpected keyword argument '%s'" % name) 546 p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, 547 stdin=subprocess.PIPE) 548 stdout, stderr = p.communicate(input=stdin) 549 550 stdout = convert_string(stdout) 551 stderr = convert_string(stderr) 552 553 if p.returncode == 0: 554 if stderr: 555 if verbose: 556 print('`%s` printed to stderr:' % ' '.join(args), file=sys.stderr) 557 print(stderr.rstrip(), file=sys.stderr) 558 if strip: 559 stdout = stdout.rstrip('\r\n') 560 return stdout 561 if verbose: 562 print('`%s` returned %s' % (' '.join(args), p.returncode), file=sys.stderr) 563 if stderr: 564 print(stderr.rstrip(), file=sys.stderr) 565 sys.exit(2) 566 567 568def die(message): 569 print('error:', message, file=sys.stderr) 570 sys.exit(2) 571 572 573def to_bytes(str_input): 574 # Encode to UTF-8 to get binary data. 575 if isinstance(str_input, bytes): 576 return str_input 577 return str_input.encode('utf-8') 578 579 580def to_string(bytes_input): 581 if isinstance(bytes_input, str): 582 return bytes_input 583 return bytes_input.encode('utf-8') 584 585 586def convert_string(bytes_input): 587 try: 588 return to_string(bytes_input.decode('utf-8')) 589 except AttributeError: # 'str' object has no attribute 'decode'. 590 return str(bytes_input) 591 except UnicodeError: 592 return str(bytes_input) 593 594if __name__ == '__main__': 595 main() 596