1#!/usr/bin/env python 2# 3#===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===# 4# 5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 6# See https://llvm.org/LICENSE.txt for license information. 7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 8# 9#===------------------------------------------------------------------------===# 10 11r""" 12clang-format git integration 13============================ 14 15This file provides a clang-format integration for git. Put it somewhere in your 16path and ensure that it is executable. Then, "git clang-format" will invoke 17clang-format on the changes in current files or a specific commit. 18 19For further details, run: 20git clang-format -h 21 22Requires Python 2.7 or Python 3 23""" 24 25from __future__ import absolute_import, division, print_function 26import argparse 27import collections 28import contextlib 29import errno 30import os 31import re 32import subprocess 33import sys 34 35usage = 'git clang-format [OPTIONS] [<commit>] [<commit>] [--] [<file>...]' 36 37desc = ''' 38If zero or one commits are given, run clang-format on all lines that differ 39between the working directory and <commit>, which defaults to HEAD. Changes are 40only applied to the working directory. 41 42If two commits are given (requires --diff), run clang-format on all lines in the 43second <commit> that differ from the first <commit>. 44 45The following git-config settings set the default of the corresponding option: 46 clangFormat.binary 47 clangFormat.commit 48 clangFormat.extensions 49 clangFormat.style 50''' 51 52# Name of the temporary index file in which save the output of clang-format. 53# This file is created within the .git directory. 54temp_index_basename = 'clang-format-index' 55 56 57Range = collections.namedtuple('Range', 'start, count') 58 59 60def main(): 61 config = load_git_config() 62 63 # In order to keep '--' yet allow options after positionals, we need to 64 # check for '--' ourselves. (Setting nargs='*' throws away the '--', while 65 # nargs=argparse.REMAINDER disallows options after positionals.) 66 argv = sys.argv[1:] 67 try: 68 idx = argv.index('--') 69 except ValueError: 70 dash_dash = [] 71 else: 72 dash_dash = argv[idx:] 73 argv = argv[:idx] 74 75 default_extensions = ','.join([ 76 # From clang/lib/Frontend/FrontendOptions.cpp, all lower case 77 'c', 'h', # C 78 'm', # ObjC 79 'mm', # ObjC++ 80 'cc', 'cp', 'cpp', 'c++', 'cxx', 'hh', 'hpp', 'hxx', # C++ 81 'cu', # CUDA 82 # Other languages that clang-format supports 83 'proto', 'protodevel', # Protocol Buffers 84 'java', # Java 85 'js', # JavaScript 86 'ts', # TypeScript 87 'cs', # C Sharp 88 ]) 89 90 p = argparse.ArgumentParser( 91 usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter, 92 description=desc) 93 p.add_argument('--binary', 94 default=config.get('clangformat.binary', 'clang-format'), 95 help='path to clang-format'), 96 p.add_argument('--commit', 97 default=config.get('clangformat.commit', 'HEAD'), 98 help='default commit to use if none is specified'), 99 p.add_argument('--diff', action='store_true', 100 help='print a diff instead of applying the changes') 101 p.add_argument('--extensions', 102 default=config.get('clangformat.extensions', 103 default_extensions), 104 help=('comma-separated list of file extensions to format, ' 105 'excluding the period and case-insensitive')), 106 p.add_argument('-f', '--force', action='store_true', 107 help='allow changes to unstaged files') 108 p.add_argument('-p', '--patch', action='store_true', 109 help='select hunks interactively') 110 p.add_argument('-q', '--quiet', action='count', default=0, 111 help='print less information') 112 p.add_argument('--style', 113 default=config.get('clangformat.style', None), 114 help='passed to clang-format'), 115 p.add_argument('-v', '--verbose', action='count', default=0, 116 help='print extra information') 117 # We gather all the remaining positional arguments into 'args' since we need 118 # to use some heuristics to determine whether or not <commit> was present. 119 # However, to print pretty messages, we make use of metavar and help. 120 p.add_argument('args', nargs='*', metavar='<commit>', 121 help='revision from which to compute the diff') 122 p.add_argument('ignored', nargs='*', metavar='<file>...', 123 help='if specified, only consider differences in these files') 124 opts = p.parse_args(argv) 125 126 opts.verbose -= opts.quiet 127 del opts.quiet 128 129 commits, files = interpret_args(opts.args, dash_dash, opts.commit) 130 if len(commits) > 1: 131 if not opts.diff: 132 die('--diff is required when two commits are given') 133 else: 134 if len(commits) > 2: 135 die('at most two commits allowed; %d given' % len(commits)) 136 changed_lines = compute_diff_and_extract_lines(commits, files) 137 if opts.verbose >= 1: 138 ignored_files = set(changed_lines) 139 filter_by_extension(changed_lines, opts.extensions.lower().split(',')) 140 if opts.verbose >= 1: 141 ignored_files.difference_update(changed_lines) 142 if ignored_files: 143 print('Ignoring changes in the following files (wrong extension):') 144 for filename in ignored_files: 145 print(' %s' % filename) 146 if changed_lines: 147 print('Running clang-format on the following files:') 148 for filename in changed_lines: 149 print(' %s' % filename) 150 if not changed_lines: 151 print('no modified files to format') 152 return 153 # The computed diff outputs absolute paths, so we must cd before accessing 154 # those files. 155 cd_to_toplevel() 156 if len(commits) > 1: 157 old_tree = commits[1] 158 new_tree = run_clang_format_and_save_to_tree(changed_lines, 159 revision=commits[1], 160 binary=opts.binary, 161 style=opts.style) 162 else: 163 old_tree = create_tree_from_workdir(changed_lines) 164 new_tree = run_clang_format_and_save_to_tree(changed_lines, 165 binary=opts.binary, 166 style=opts.style) 167 if opts.verbose >= 1: 168 print('old tree: %s' % old_tree) 169 print('new tree: %s' % new_tree) 170 if old_tree == new_tree: 171 if opts.verbose >= 0: 172 print('clang-format did not modify any files') 173 elif opts.diff: 174 print_diff(old_tree, new_tree) 175 else: 176 changed_files = apply_changes(old_tree, new_tree, force=opts.force, 177 patch_mode=opts.patch) 178 if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1: 179 print('changed files:') 180 for filename in changed_files: 181 print(' %s' % filename) 182 183 184def load_git_config(non_string_options=None): 185 """Return the git configuration as a dictionary. 186 187 All options are assumed to be strings unless in `non_string_options`, in which 188 is a dictionary mapping option name (in lower case) to either "--bool" or 189 "--int".""" 190 if non_string_options is None: 191 non_string_options = {} 192 out = {} 193 for entry in run('git', 'config', '--list', '--null').split('\0'): 194 if entry: 195 if '\n' in entry: 196 name, value = entry.split('\n', 1) 197 else: 198 # A setting with no '=' ('\n' with --null) is implicitly 'true' 199 name = entry 200 value = 'true' 201 if name in non_string_options: 202 value = run('git', 'config', non_string_options[name], name) 203 out[name] = value 204 return out 205 206 207def interpret_args(args, dash_dash, default_commit): 208 """Interpret `args` as "[commits] [--] [files]" and return (commits, files). 209 210 It is assumed that "--" and everything that follows has been removed from 211 args and placed in `dash_dash`. 212 213 If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its 214 left (if present) are taken as commits. Otherwise, the arguments are checked 215 from left to right if they are commits or files. If commits are not given, 216 a list with `default_commit` is used.""" 217 if dash_dash: 218 if len(args) == 0: 219 commits = [default_commit] 220 else: 221 commits = args 222 for commit in commits: 223 object_type = get_object_type(commit) 224 if object_type not in ('commit', 'tag'): 225 if object_type is None: 226 die("'%s' is not a commit" % commit) 227 else: 228 die("'%s' is a %s, but a commit was expected" % (commit, object_type)) 229 files = dash_dash[1:] 230 elif args: 231 commits = [] 232 while args: 233 if not disambiguate_revision(args[0]): 234 break 235 commits.append(args.pop(0)) 236 if not commits: 237 commits = [default_commit] 238 files = args 239 else: 240 commits = [default_commit] 241 files = [] 242 return commits, files 243 244 245def disambiguate_revision(value): 246 """Returns True if `value` is a revision, False if it is a file, or dies.""" 247 # If `value` is ambiguous (neither a commit nor a file), the following 248 # command will die with an appropriate error message. 249 run('git', 'rev-parse', value, verbose=False) 250 object_type = get_object_type(value) 251 if object_type is None: 252 return False 253 if object_type in ('commit', 'tag'): 254 return True 255 die('`%s` is a %s, but a commit or filename was expected' % 256 (value, object_type)) 257 258 259def get_object_type(value): 260 """Returns a string description of an object's type, or None if it is not 261 a valid git object.""" 262 cmd = ['git', 'cat-file', '-t', value] 263 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 264 stdout, stderr = p.communicate() 265 if p.returncode != 0: 266 return None 267 return convert_string(stdout.strip()) 268 269 270def compute_diff_and_extract_lines(commits, files): 271 """Calls compute_diff() followed by extract_lines().""" 272 diff_process = compute_diff(commits, files) 273 changed_lines = extract_lines(diff_process.stdout) 274 diff_process.stdout.close() 275 diff_process.wait() 276 if diff_process.returncode != 0: 277 # Assume error was already printed to stderr. 278 sys.exit(2) 279 return changed_lines 280 281 282def compute_diff(commits, files): 283 """Return a subprocess object producing the diff from `commits`. 284 285 The return value's `stdin` file object will produce a patch with the 286 differences between the working directory and the first commit if a single 287 one was specified, or the difference between both specified commits, filtered 288 on `files` (if non-empty). Zero context lines are used in the patch.""" 289 git_tool = 'diff-index' 290 if len(commits) > 1: 291 git_tool = 'diff-tree' 292 cmd = ['git', git_tool, '-p', '-U0'] + commits + ['--'] 293 cmd.extend(files) 294 p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) 295 p.stdin.close() 296 return p 297 298 299def extract_lines(patch_file): 300 """Extract the changed lines in `patch_file`. 301 302 The return value is a dictionary mapping filename to a list of (start_line, 303 line_count) pairs. 304 305 The input must have been produced with ``-U0``, meaning unidiff format with 306 zero lines of context. The return value is a dict mapping filename to a 307 list of line `Range`s.""" 308 matches = {} 309 for line in patch_file: 310 line = convert_string(line) 311 match = re.search(r'^\+\+\+\ [^/]+/(.*)', line) 312 if match: 313 filename = match.group(1).rstrip('\r\n') 314 match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line) 315 if match: 316 start_line = int(match.group(1)) 317 line_count = 1 318 if match.group(3): 319 line_count = int(match.group(3)) 320 if line_count > 0: 321 matches.setdefault(filename, []).append(Range(start_line, line_count)) 322 return matches 323 324 325def filter_by_extension(dictionary, allowed_extensions): 326 """Delete every key in `dictionary` that doesn't have an allowed extension. 327 328 `allowed_extensions` must be a collection of lowercase file extensions, 329 excluding the period.""" 330 allowed_extensions = frozenset(allowed_extensions) 331 for filename in list(dictionary.keys()): 332 base_ext = filename.rsplit('.', 1) 333 if len(base_ext) == 1 and '' in allowed_extensions: 334 continue 335 if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions: 336 del dictionary[filename] 337 338 339def cd_to_toplevel(): 340 """Change to the top level of the git repository.""" 341 toplevel = run('git', 'rev-parse', '--show-toplevel') 342 os.chdir(toplevel) 343 344 345def create_tree_from_workdir(filenames): 346 """Create a new git tree with the given files from the working directory. 347 348 Returns the object ID (SHA-1) of the created tree.""" 349 return create_tree(filenames, '--stdin') 350 351 352def run_clang_format_and_save_to_tree(changed_lines, revision=None, 353 binary='clang-format', style=None): 354 """Run clang-format on each file and save the result to a git tree. 355 356 Returns the object ID (SHA-1) of the created tree.""" 357 def iteritems(container): 358 try: 359 return container.iteritems() # Python 2 360 except AttributeError: 361 return container.items() # Python 3 362 def index_info_generator(): 363 for filename, line_ranges in iteritems(changed_lines): 364 if revision: 365 git_metadata_cmd = ['git', 'ls-tree', 366 '%s:%s' % (revision, os.path.dirname(filename)), 367 os.path.basename(filename)] 368 git_metadata = subprocess.Popen(git_metadata_cmd, stdin=subprocess.PIPE, 369 stdout=subprocess.PIPE) 370 stdout = git_metadata.communicate()[0] 371 mode = oct(int(stdout.split()[0], 8)) 372 else: 373 mode = oct(os.stat(filename).st_mode) 374 # Adjust python3 octal format so that it matches what git expects 375 if mode.startswith('0o'): 376 mode = '0' + mode[2:] 377 blob_id = clang_format_to_blob(filename, line_ranges, 378 revision=revision, 379 binary=binary, 380 style=style) 381 yield '%s %s\t%s' % (mode, blob_id, filename) 382 return create_tree(index_info_generator(), '--index-info') 383 384 385def create_tree(input_lines, mode): 386 """Create a tree object from the given input. 387 388 If mode is '--stdin', it must be a list of filenames. If mode is 389 '--index-info' is must be a list of values suitable for "git update-index 390 --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>". Any other mode 391 is invalid.""" 392 assert mode in ('--stdin', '--index-info') 393 cmd = ['git', 'update-index', '--add', '-z', mode] 394 with temporary_index_file(): 395 p = subprocess.Popen(cmd, stdin=subprocess.PIPE) 396 for line in input_lines: 397 p.stdin.write(to_bytes('%s\0' % line)) 398 p.stdin.close() 399 if p.wait() != 0: 400 die('`%s` failed' % ' '.join(cmd)) 401 tree_id = run('git', 'write-tree') 402 return tree_id 403 404 405def clang_format_to_blob(filename, line_ranges, revision=None, 406 binary='clang-format', style=None): 407 """Run clang-format on the given file and save the result to a git blob. 408 409 Runs on the file in `revision` if not None, or on the file in the working 410 directory if `revision` is None. 411 412 Returns the object ID (SHA-1) of the created blob.""" 413 clang_format_cmd = [binary] 414 if style: 415 clang_format_cmd.extend(['-style='+style]) 416 clang_format_cmd.extend([ 417 '-lines=%s:%s' % (start_line, start_line+line_count-1) 418 for start_line, line_count in line_ranges]) 419 if revision: 420 clang_format_cmd.extend(['-assume-filename='+filename]) 421 git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)] 422 git_show = subprocess.Popen(git_show_cmd, stdin=subprocess.PIPE, 423 stdout=subprocess.PIPE) 424 git_show.stdin.close() 425 clang_format_stdin = git_show.stdout 426 else: 427 clang_format_cmd.extend([filename]) 428 git_show = None 429 clang_format_stdin = subprocess.PIPE 430 try: 431 clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin, 432 stdout=subprocess.PIPE) 433 if clang_format_stdin == subprocess.PIPE: 434 clang_format_stdin = clang_format.stdin 435 except OSError as e: 436 if e.errno == errno.ENOENT: 437 die('cannot find executable "%s"' % binary) 438 else: 439 raise 440 clang_format_stdin.close() 441 hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin'] 442 hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout, 443 stdout=subprocess.PIPE) 444 clang_format.stdout.close() 445 stdout = hash_object.communicate()[0] 446 if hash_object.returncode != 0: 447 die('`%s` failed' % ' '.join(hash_object_cmd)) 448 if clang_format.wait() != 0: 449 die('`%s` failed' % ' '.join(clang_format_cmd)) 450 if git_show and git_show.wait() != 0: 451 die('`%s` failed' % ' '.join(git_show_cmd)) 452 return convert_string(stdout).rstrip('\r\n') 453 454 455@contextlib.contextmanager 456def temporary_index_file(tree=None): 457 """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting 458 the file afterward.""" 459 index_path = create_temporary_index(tree) 460 old_index_path = os.environ.get('GIT_INDEX_FILE') 461 os.environ['GIT_INDEX_FILE'] = index_path 462 try: 463 yield 464 finally: 465 if old_index_path is None: 466 del os.environ['GIT_INDEX_FILE'] 467 else: 468 os.environ['GIT_INDEX_FILE'] = old_index_path 469 os.remove(index_path) 470 471 472def create_temporary_index(tree=None): 473 """Create a temporary index file and return the created file's path. 474 475 If `tree` is not None, use that as the tree to read in. Otherwise, an 476 empty index is created.""" 477 gitdir = run('git', 'rev-parse', '--git-dir') 478 path = os.path.join(gitdir, temp_index_basename) 479 if tree is None: 480 tree = '--empty' 481 run('git', 'read-tree', '--index-output='+path, tree) 482 return path 483 484 485def print_diff(old_tree, new_tree): 486 """Print the diff between the two trees to stdout.""" 487 # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output 488 # is expected to be viewed by the user, and only the former does nice things 489 # like color and pagination. 490 # 491 # We also only print modified files since `new_tree` only contains the files 492 # that were modified, so unmodified files would show as deleted without the 493 # filter. 494 subprocess.check_call(['git', 'diff', '--diff-filter=M', old_tree, new_tree, 495 '--']) 496 497 498def apply_changes(old_tree, new_tree, force=False, patch_mode=False): 499 """Apply the changes in `new_tree` to the working directory. 500 501 Bails if there are local changes in those files and not `force`. If 502 `patch_mode`, runs `git checkout --patch` to select hunks interactively.""" 503 changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z', 504 '--name-only', old_tree, 505 new_tree).rstrip('\0').split('\0') 506 if not force: 507 unstaged_files = run('git', 'diff-files', '--name-status', *changed_files) 508 if unstaged_files: 509 print('The following files would be modified but ' 510 'have unstaged changes:', file=sys.stderr) 511 print(unstaged_files, file=sys.stderr) 512 print('Please commit, stage, or stash them first.', file=sys.stderr) 513 sys.exit(2) 514 if patch_mode: 515 # In patch mode, we could just as well create an index from the new tree 516 # and checkout from that, but then the user will be presented with a 517 # message saying "Discard ... from worktree". Instead, we use the old 518 # tree as the index and checkout from new_tree, which gives the slightly 519 # better message, "Apply ... to index and worktree". This is not quite 520 # right, since it won't be applied to the user's index, but oh well. 521 with temporary_index_file(old_tree): 522 subprocess.check_call(['git', 'checkout', '--patch', new_tree]) 523 index_tree = old_tree 524 else: 525 with temporary_index_file(new_tree): 526 run('git', 'checkout-index', '-a', '-f') 527 return changed_files 528 529 530def run(*args, **kwargs): 531 stdin = kwargs.pop('stdin', '') 532 verbose = kwargs.pop('verbose', True) 533 strip = kwargs.pop('strip', True) 534 for name in kwargs: 535 raise TypeError("run() got an unexpected keyword argument '%s'" % name) 536 p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, 537 stdin=subprocess.PIPE) 538 stdout, stderr = p.communicate(input=stdin) 539 540 stdout = convert_string(stdout) 541 stderr = convert_string(stderr) 542 543 if p.returncode == 0: 544 if stderr: 545 if verbose: 546 print('`%s` printed to stderr:' % ' '.join(args), file=sys.stderr) 547 print(stderr.rstrip(), file=sys.stderr) 548 if strip: 549 stdout = stdout.rstrip('\r\n') 550 return stdout 551 if verbose: 552 print('`%s` returned %s' % (' '.join(args), p.returncode), file=sys.stderr) 553 if stderr: 554 print(stderr.rstrip(), file=sys.stderr) 555 sys.exit(2) 556 557 558def die(message): 559 print('error:', message, file=sys.stderr) 560 sys.exit(2) 561 562 563def to_bytes(str_input): 564 # Encode to UTF-8 to get binary data. 565 if isinstance(str_input, bytes): 566 return str_input 567 return str_input.encode('utf-8') 568 569 570def to_string(bytes_input): 571 if isinstance(bytes_input, str): 572 return bytes_input 573 return bytes_input.encode('utf-8') 574 575 576def convert_string(bytes_input): 577 try: 578 return to_string(bytes_input.decode('utf-8')) 579 except AttributeError: # 'str' object has no attribute 'decode'. 580 return str(bytes_input) 581 except UnicodeError: 582 return str(bytes_input) 583 584if __name__ == '__main__': 585 main() 586