xref: /netbsd-src/external/apache2/llvm/dist/clang/tools/clang-format/git-clang-format (revision e038c9c4676b0f19b1b7dd08a940c6ed64a6d5ae)
1#!/usr/bin/env python
2#
3#===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9#===------------------------------------------------------------------------===#
10
11r"""
12clang-format git integration
13============================
14
15This file provides a clang-format integration for git. Put it somewhere in your
16path and ensure that it is executable. Then, "git clang-format" will invoke
17clang-format on the changes in current files or a specific commit.
18
19For further details, run:
20git clang-format -h
21
22Requires Python 2.7 or Python 3
23"""
24
25from __future__ import absolute_import, division, print_function
26import argparse
27import collections
28import contextlib
29import errno
30import os
31import re
32import subprocess
33import sys
34
35usage = 'git clang-format [OPTIONS] [<commit>] [<commit>] [--] [<file>...]'
36
37desc = '''
38If zero or one commits are given, run clang-format on all lines that differ
39between the working directory and <commit>, which defaults to HEAD.  Changes are
40only applied to the working directory.
41
42If two commits are given (requires --diff), run clang-format on all lines in the
43second <commit> that differ from the first <commit>.
44
45The following git-config settings set the default of the corresponding option:
46  clangFormat.binary
47  clangFormat.commit
48  clangFormat.extensions
49  clangFormat.style
50'''
51
52# Name of the temporary index file in which save the output of clang-format.
53# This file is created within the .git directory.
54temp_index_basename = 'clang-format-index'
55
56
57Range = collections.namedtuple('Range', 'start, count')
58
59
60def main():
61  config = load_git_config()
62
63  # In order to keep '--' yet allow options after positionals, we need to
64  # check for '--' ourselves.  (Setting nargs='*' throws away the '--', while
65  # nargs=argparse.REMAINDER disallows options after positionals.)
66  argv = sys.argv[1:]
67  try:
68    idx = argv.index('--')
69  except ValueError:
70    dash_dash = []
71  else:
72    dash_dash = argv[idx:]
73    argv = argv[:idx]
74
75  default_extensions = ','.join([
76      # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
77      'c', 'h',  # C
78      'm',  # ObjC
79      'mm',  # ObjC++
80      'cc', 'cp', 'cpp', 'c++', 'cxx', 'hh', 'hpp', 'hxx',  # C++
81      'cu', 'cuh',  # CUDA
82      # Other languages that clang-format supports
83      'proto', 'protodevel',  # Protocol Buffers
84      'java',  # Java
85      'js',  # JavaScript
86      'ts',  # TypeScript
87      'cs',  # C Sharp
88      ])
89
90  p = argparse.ArgumentParser(
91    usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
92    description=desc)
93  p.add_argument('--binary',
94                 default=config.get('clangformat.binary', 'clang-format'),
95                 help='path to clang-format'),
96  p.add_argument('--commit',
97                 default=config.get('clangformat.commit', 'HEAD'),
98                 help='default commit to use if none is specified'),
99  p.add_argument('--diff', action='store_true',
100                 help='print a diff instead of applying the changes')
101  p.add_argument('--extensions',
102                 default=config.get('clangformat.extensions',
103                                    default_extensions),
104                 help=('comma-separated list of file extensions to format, '
105                       'excluding the period and case-insensitive')),
106  p.add_argument('-f', '--force', action='store_true',
107                 help='allow changes to unstaged files')
108  p.add_argument('-p', '--patch', action='store_true',
109                 help='select hunks interactively')
110  p.add_argument('-q', '--quiet', action='count', default=0,
111                 help='print less information')
112  p.add_argument('--style',
113                 default=config.get('clangformat.style', None),
114                 help='passed to clang-format'),
115  p.add_argument('-v', '--verbose', action='count', default=0,
116                 help='print extra information')
117  # We gather all the remaining positional arguments into 'args' since we need
118  # to use some heuristics to determine whether or not <commit> was present.
119  # However, to print pretty messages, we make use of metavar and help.
120  p.add_argument('args', nargs='*', metavar='<commit>',
121                 help='revision from which to compute the diff')
122  p.add_argument('ignored', nargs='*', metavar='<file>...',
123                 help='if specified, only consider differences in these files')
124  opts = p.parse_args(argv)
125
126  opts.verbose -= opts.quiet
127  del opts.quiet
128
129  commits, files = interpret_args(opts.args, dash_dash, opts.commit)
130  if len(commits) > 1:
131    if not opts.diff:
132      die('--diff is required when two commits are given')
133  else:
134    if len(commits) > 2:
135      die('at most two commits allowed; %d given' % len(commits))
136  changed_lines = compute_diff_and_extract_lines(commits, files)
137  if opts.verbose >= 1:
138    ignored_files = set(changed_lines)
139  filter_by_extension(changed_lines, opts.extensions.lower().split(','))
140  # The computed diff outputs absolute paths, so we must cd before accessing
141  # those files.
142  cd_to_toplevel()
143  filter_symlinks(changed_lines)
144  if opts.verbose >= 1:
145    ignored_files.difference_update(changed_lines)
146    if ignored_files:
147      print(
148        'Ignoring changes in the following files (wrong extension or symlink):')
149      for filename in ignored_files:
150        print('    %s' % filename)
151    if changed_lines:
152      print('Running clang-format on the following files:')
153      for filename in changed_lines:
154        print('    %s' % filename)
155  if not changed_lines:
156    if opts.verbose >= 0:
157      print('no modified files to format')
158    return
159  if len(commits) > 1:
160    old_tree = commits[1]
161    new_tree = run_clang_format_and_save_to_tree(changed_lines,
162                                                 revision=commits[1],
163                                                 binary=opts.binary,
164                                                 style=opts.style)
165  else:
166    old_tree = create_tree_from_workdir(changed_lines)
167    new_tree = run_clang_format_and_save_to_tree(changed_lines,
168                                                 binary=opts.binary,
169                                                 style=opts.style)
170  if opts.verbose >= 1:
171    print('old tree: %s' % old_tree)
172    print('new tree: %s' % new_tree)
173  if old_tree == new_tree:
174    if opts.verbose >= 0:
175      print('clang-format did not modify any files')
176  elif opts.diff:
177    print_diff(old_tree, new_tree)
178  else:
179    changed_files = apply_changes(old_tree, new_tree, force=opts.force,
180                                  patch_mode=opts.patch)
181    if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
182      print('changed files:')
183      for filename in changed_files:
184        print('    %s' % filename)
185
186
187def load_git_config(non_string_options=None):
188  """Return the git configuration as a dictionary.
189
190  All options are assumed to be strings unless in `non_string_options`, in which
191  is a dictionary mapping option name (in lower case) to either "--bool" or
192  "--int"."""
193  if non_string_options is None:
194    non_string_options = {}
195  out = {}
196  for entry in run('git', 'config', '--list', '--null').split('\0'):
197    if entry:
198      if '\n' in entry:
199        name, value = entry.split('\n', 1)
200      else:
201        # A setting with no '=' ('\n' with --null) is implicitly 'true'
202        name = entry
203        value = 'true'
204      if name in non_string_options:
205        value = run('git', 'config', non_string_options[name], name)
206      out[name] = value
207  return out
208
209
210def interpret_args(args, dash_dash, default_commit):
211  """Interpret `args` as "[commits] [--] [files]" and return (commits, files).
212
213  It is assumed that "--" and everything that follows has been removed from
214  args and placed in `dash_dash`.
215
216  If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its
217  left (if present) are taken as commits.  Otherwise, the arguments are checked
218  from left to right if they are commits or files.  If commits are not given,
219  a list with `default_commit` is used."""
220  if dash_dash:
221    if len(args) == 0:
222      commits = [default_commit]
223    else:
224      commits = args
225    for commit in commits:
226      object_type = get_object_type(commit)
227      if object_type not in ('commit', 'tag'):
228        if object_type is None:
229          die("'%s' is not a commit" % commit)
230        else:
231          die("'%s' is a %s, but a commit was expected" % (commit, object_type))
232    files = dash_dash[1:]
233  elif args:
234    commits = []
235    while args:
236      if not disambiguate_revision(args[0]):
237        break
238      commits.append(args.pop(0))
239    if not commits:
240      commits = [default_commit]
241    files = args
242  else:
243    commits = [default_commit]
244    files = []
245  return commits, files
246
247
248def disambiguate_revision(value):
249  """Returns True if `value` is a revision, False if it is a file, or dies."""
250  # If `value` is ambiguous (neither a commit nor a file), the following
251  # command will die with an appropriate error message.
252  run('git', 'rev-parse', value, verbose=False)
253  object_type = get_object_type(value)
254  if object_type is None:
255    return False
256  if object_type in ('commit', 'tag'):
257    return True
258  die('`%s` is a %s, but a commit or filename was expected' %
259      (value, object_type))
260
261
262def get_object_type(value):
263  """Returns a string description of an object's type, or None if it is not
264  a valid git object."""
265  cmd = ['git', 'cat-file', '-t', value]
266  p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
267  stdout, stderr = p.communicate()
268  if p.returncode != 0:
269    return None
270  return convert_string(stdout.strip())
271
272
273def compute_diff_and_extract_lines(commits, files):
274  """Calls compute_diff() followed by extract_lines()."""
275  diff_process = compute_diff(commits, files)
276  changed_lines = extract_lines(diff_process.stdout)
277  diff_process.stdout.close()
278  diff_process.wait()
279  if diff_process.returncode != 0:
280    # Assume error was already printed to stderr.
281    sys.exit(2)
282  return changed_lines
283
284
285def compute_diff(commits, files):
286  """Return a subprocess object producing the diff from `commits`.
287
288  The return value's `stdin` file object will produce a patch with the
289  differences between the working directory and the first commit if a single
290  one was specified, or the difference between both specified commits, filtered
291  on `files` (if non-empty).  Zero context lines are used in the patch."""
292  git_tool = 'diff-index'
293  if len(commits) > 1:
294    git_tool = 'diff-tree'
295  cmd = ['git', git_tool, '-p', '-U0'] + commits + ['--']
296  cmd.extend(files)
297  p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
298  p.stdin.close()
299  return p
300
301
302def extract_lines(patch_file):
303  """Extract the changed lines in `patch_file`.
304
305  The return value is a dictionary mapping filename to a list of (start_line,
306  line_count) pairs.
307
308  The input must have been produced with ``-U0``, meaning unidiff format with
309  zero lines of context.  The return value is a dict mapping filename to a
310  list of line `Range`s."""
311  matches = {}
312  for line in patch_file:
313    line = convert_string(line)
314    match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
315    if match:
316      filename = match.group(1).rstrip('\r\n')
317    match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
318    if match:
319      start_line = int(match.group(1))
320      line_count = 1
321      if match.group(3):
322        line_count = int(match.group(3))
323      if line_count > 0:
324        matches.setdefault(filename, []).append(Range(start_line, line_count))
325  return matches
326
327
328def filter_by_extension(dictionary, allowed_extensions):
329  """Delete every key in `dictionary` that doesn't have an allowed extension.
330
331  `allowed_extensions` must be a collection of lowercase file extensions,
332  excluding the period."""
333  allowed_extensions = frozenset(allowed_extensions)
334  for filename in list(dictionary.keys()):
335    base_ext = filename.rsplit('.', 1)
336    if len(base_ext) == 1 and '' in allowed_extensions:
337        continue
338    if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
339      del dictionary[filename]
340
341
342def filter_symlinks(dictionary):
343  """Delete every key in `dictionary` that is a symlink."""
344  for filename in list(dictionary.keys()):
345    if os.path.islink(filename):
346      del dictionary[filename]
347
348
349def cd_to_toplevel():
350  """Change to the top level of the git repository."""
351  toplevel = run('git', 'rev-parse', '--show-toplevel')
352  os.chdir(toplevel)
353
354
355def create_tree_from_workdir(filenames):
356  """Create a new git tree with the given files from the working directory.
357
358  Returns the object ID (SHA-1) of the created tree."""
359  return create_tree(filenames, '--stdin')
360
361
362def run_clang_format_and_save_to_tree(changed_lines, revision=None,
363                                      binary='clang-format', style=None):
364  """Run clang-format on each file and save the result to a git tree.
365
366  Returns the object ID (SHA-1) of the created tree."""
367  def iteritems(container):
368      try:
369          return container.iteritems() # Python 2
370      except AttributeError:
371          return container.items() # Python 3
372  def index_info_generator():
373    for filename, line_ranges in iteritems(changed_lines):
374      if revision:
375        git_metadata_cmd = ['git', 'ls-tree',
376                            '%s:%s' % (revision, os.path.dirname(filename)),
377                            os.path.basename(filename)]
378        git_metadata = subprocess.Popen(git_metadata_cmd, stdin=subprocess.PIPE,
379                                        stdout=subprocess.PIPE)
380        stdout = git_metadata.communicate()[0]
381        mode = oct(int(stdout.split()[0], 8))
382      else:
383        mode = oct(os.stat(filename).st_mode)
384      # Adjust python3 octal format so that it matches what git expects
385      if mode.startswith('0o'):
386          mode = '0' + mode[2:]
387      blob_id = clang_format_to_blob(filename, line_ranges,
388                                     revision=revision,
389                                     binary=binary,
390                                     style=style)
391      yield '%s %s\t%s' % (mode, blob_id, filename)
392  return create_tree(index_info_generator(), '--index-info')
393
394
395def create_tree(input_lines, mode):
396  """Create a tree object from the given input.
397
398  If mode is '--stdin', it must be a list of filenames.  If mode is
399  '--index-info' is must be a list of values suitable for "git update-index
400  --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>".  Any other mode
401  is invalid."""
402  assert mode in ('--stdin', '--index-info')
403  cmd = ['git', 'update-index', '--add', '-z', mode]
404  with temporary_index_file():
405    p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
406    for line in input_lines:
407      p.stdin.write(to_bytes('%s\0' % line))
408    p.stdin.close()
409    if p.wait() != 0:
410      die('`%s` failed' % ' '.join(cmd))
411    tree_id = run('git', 'write-tree')
412    return tree_id
413
414
415def clang_format_to_blob(filename, line_ranges, revision=None,
416                         binary='clang-format', style=None):
417  """Run clang-format on the given file and save the result to a git blob.
418
419  Runs on the file in `revision` if not None, or on the file in the working
420  directory if `revision` is None.
421
422  Returns the object ID (SHA-1) of the created blob."""
423  clang_format_cmd = [binary]
424  if style:
425    clang_format_cmd.extend(['-style='+style])
426  clang_format_cmd.extend([
427      '-lines=%s:%s' % (start_line, start_line+line_count-1)
428      for start_line, line_count in line_ranges])
429  if revision:
430    clang_format_cmd.extend(['-assume-filename='+filename])
431    git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)]
432    git_show = subprocess.Popen(git_show_cmd, stdin=subprocess.PIPE,
433                                stdout=subprocess.PIPE)
434    git_show.stdin.close()
435    clang_format_stdin = git_show.stdout
436  else:
437    clang_format_cmd.extend([filename])
438    git_show = None
439    clang_format_stdin = subprocess.PIPE
440  try:
441    clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin,
442                                    stdout=subprocess.PIPE)
443    if clang_format_stdin == subprocess.PIPE:
444      clang_format_stdin = clang_format.stdin
445  except OSError as e:
446    if e.errno == errno.ENOENT:
447      die('cannot find executable "%s"' % binary)
448    else:
449      raise
450  clang_format_stdin.close()
451  hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
452  hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
453                                 stdout=subprocess.PIPE)
454  clang_format.stdout.close()
455  stdout = hash_object.communicate()[0]
456  if hash_object.returncode != 0:
457    die('`%s` failed' % ' '.join(hash_object_cmd))
458  if clang_format.wait() != 0:
459    die('`%s` failed' % ' '.join(clang_format_cmd))
460  if git_show and git_show.wait() != 0:
461    die('`%s` failed' % ' '.join(git_show_cmd))
462  return convert_string(stdout).rstrip('\r\n')
463
464
465@contextlib.contextmanager
466def temporary_index_file(tree=None):
467  """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
468  the file afterward."""
469  index_path = create_temporary_index(tree)
470  old_index_path = os.environ.get('GIT_INDEX_FILE')
471  os.environ['GIT_INDEX_FILE'] = index_path
472  try:
473    yield
474  finally:
475    if old_index_path is None:
476      del os.environ['GIT_INDEX_FILE']
477    else:
478      os.environ['GIT_INDEX_FILE'] = old_index_path
479    os.remove(index_path)
480
481
482def create_temporary_index(tree=None):
483  """Create a temporary index file and return the created file's path.
484
485  If `tree` is not None, use that as the tree to read in.  Otherwise, an
486  empty index is created."""
487  gitdir = run('git', 'rev-parse', '--git-dir')
488  path = os.path.join(gitdir, temp_index_basename)
489  if tree is None:
490    tree = '--empty'
491  run('git', 'read-tree', '--index-output='+path, tree)
492  return path
493
494
495def print_diff(old_tree, new_tree):
496  """Print the diff between the two trees to stdout."""
497  # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
498  # is expected to be viewed by the user, and only the former does nice things
499  # like color and pagination.
500  #
501  # We also only print modified files since `new_tree` only contains the files
502  # that were modified, so unmodified files would show as deleted without the
503  # filter.
504  subprocess.check_call(['git', 'diff', '--diff-filter=M', old_tree, new_tree,
505                         '--'])
506
507
508def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
509  """Apply the changes in `new_tree` to the working directory.
510
511  Bails if there are local changes in those files and not `force`.  If
512  `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
513  changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z',
514                      '--name-only', old_tree,
515                      new_tree).rstrip('\0').split('\0')
516  if not force:
517    unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
518    if unstaged_files:
519      print('The following files would be modified but '
520                'have unstaged changes:', file=sys.stderr)
521      print(unstaged_files, file=sys.stderr)
522      print('Please commit, stage, or stash them first.', file=sys.stderr)
523      sys.exit(2)
524  if patch_mode:
525    # In patch mode, we could just as well create an index from the new tree
526    # and checkout from that, but then the user will be presented with a
527    # message saying "Discard ... from worktree".  Instead, we use the old
528    # tree as the index and checkout from new_tree, which gives the slightly
529    # better message, "Apply ... to index and worktree".  This is not quite
530    # right, since it won't be applied to the user's index, but oh well.
531    with temporary_index_file(old_tree):
532      subprocess.check_call(['git', 'checkout', '--patch', new_tree])
533    index_tree = old_tree
534  else:
535    with temporary_index_file(new_tree):
536      run('git', 'checkout-index', '-a', '-f')
537  return changed_files
538
539
540def run(*args, **kwargs):
541  stdin = kwargs.pop('stdin', '')
542  verbose = kwargs.pop('verbose', True)
543  strip = kwargs.pop('strip', True)
544  for name in kwargs:
545    raise TypeError("run() got an unexpected keyword argument '%s'" % name)
546  p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
547                       stdin=subprocess.PIPE)
548  stdout, stderr = p.communicate(input=stdin)
549
550  stdout = convert_string(stdout)
551  stderr = convert_string(stderr)
552
553  if p.returncode == 0:
554    if stderr:
555      if verbose:
556        print('`%s` printed to stderr:' % ' '.join(args), file=sys.stderr)
557      print(stderr.rstrip(), file=sys.stderr)
558    if strip:
559      stdout = stdout.rstrip('\r\n')
560    return stdout
561  if verbose:
562    print('`%s` returned %s' % (' '.join(args), p.returncode), file=sys.stderr)
563  if stderr:
564    print(stderr.rstrip(), file=sys.stderr)
565  sys.exit(2)
566
567
568def die(message):
569  print('error:', message, file=sys.stderr)
570  sys.exit(2)
571
572
573def to_bytes(str_input):
574    # Encode to UTF-8 to get binary data.
575    if isinstance(str_input, bytes):
576        return str_input
577    return str_input.encode('utf-8')
578
579
580def to_string(bytes_input):
581    if isinstance(bytes_input, str):
582        return bytes_input
583    return bytes_input.encode('utf-8')
584
585
586def convert_string(bytes_input):
587    try:
588        return to_string(bytes_input.decode('utf-8'))
589    except AttributeError: # 'str' object has no attribute 'decode'.
590        return str(bytes_input)
591    except UnicodeError:
592        return str(bytes_input)
593
594if __name__ == '__main__':
595  main()
596