xref: /openbsd-src/gnu/llvm/clang/tools/clang-format/git-clang-format (revision d0fc3bb68efd6c434b4053cd7adb29023cbec341)
1#!/usr/bin/env python
2#
3#===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9#===------------------------------------------------------------------------===#
10
11r"""
12clang-format git integration
13============================
14
15This file provides a clang-format integration for git. Put it somewhere in your
16path and ensure that it is executable. Then, "git clang-format" will invoke
17clang-format on the changes in current files or a specific commit.
18
19For further details, run:
20git clang-format -h
21
22Requires Python 2.7 or Python 3
23"""
24
25from __future__ import absolute_import, division, print_function
26import argparse
27import collections
28import contextlib
29import errno
30import os
31import re
32import subprocess
33import sys
34
35usage = 'git clang-format [OPTIONS] [<commit>] [<commit>] [--] [<file>...]'
36
37desc = '''
38If zero or one commits are given, run clang-format on all lines that differ
39between the working directory and <commit>, which defaults to HEAD.  Changes are
40only applied to the working directory.
41
42If two commits are given (requires --diff), run clang-format on all lines in the
43second <commit> that differ from the first <commit>.
44
45The following git-config settings set the default of the corresponding option:
46  clangFormat.binary
47  clangFormat.commit
48  clangFormat.extensions
49  clangFormat.style
50'''
51
52# Name of the temporary index file in which save the output of clang-format.
53# This file is created within the .git directory.
54temp_index_basename = 'clang-format-index'
55
56
57Range = collections.namedtuple('Range', 'start, count')
58
59
60def main():
61  config = load_git_config()
62
63  # In order to keep '--' yet allow options after positionals, we need to
64  # check for '--' ourselves.  (Setting nargs='*' throws away the '--', while
65  # nargs=argparse.REMAINDER disallows options after positionals.)
66  argv = sys.argv[1:]
67  try:
68    idx = argv.index('--')
69  except ValueError:
70    dash_dash = []
71  else:
72    dash_dash = argv[idx:]
73    argv = argv[:idx]
74
75  default_extensions = ','.join([
76      # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
77      'c', 'h',  # C
78      'm',  # ObjC
79      'mm',  # ObjC++
80      'cc', 'cp', 'cpp', 'c++', 'cxx', 'hh', 'hpp', 'hxx',  # C++
81      'cu',  # CUDA
82      # Other languages that clang-format supports
83      'proto', 'protodevel',  # Protocol Buffers
84      'java',  # Java
85      'js',  # JavaScript
86      'ts',  # TypeScript
87      'cs',  # C Sharp
88      ])
89
90  p = argparse.ArgumentParser(
91    usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
92    description=desc)
93  p.add_argument('--binary',
94                 default=config.get('clangformat.binary', 'clang-format'),
95                 help='path to clang-format'),
96  p.add_argument('--commit',
97                 default=config.get('clangformat.commit', 'HEAD'),
98                 help='default commit to use if none is specified'),
99  p.add_argument('--diff', action='store_true',
100                 help='print a diff instead of applying the changes')
101  p.add_argument('--extensions',
102                 default=config.get('clangformat.extensions',
103                                    default_extensions),
104                 help=('comma-separated list of file extensions to format, '
105                       'excluding the period and case-insensitive')),
106  p.add_argument('-f', '--force', action='store_true',
107                 help='allow changes to unstaged files')
108  p.add_argument('-p', '--patch', action='store_true',
109                 help='select hunks interactively')
110  p.add_argument('-q', '--quiet', action='count', default=0,
111                 help='print less information')
112  p.add_argument('--style',
113                 default=config.get('clangformat.style', None),
114                 help='passed to clang-format'),
115  p.add_argument('-v', '--verbose', action='count', default=0,
116                 help='print extra information')
117  # We gather all the remaining positional arguments into 'args' since we need
118  # to use some heuristics to determine whether or not <commit> was present.
119  # However, to print pretty messages, we make use of metavar and help.
120  p.add_argument('args', nargs='*', metavar='<commit>',
121                 help='revision from which to compute the diff')
122  p.add_argument('ignored', nargs='*', metavar='<file>...',
123                 help='if specified, only consider differences in these files')
124  opts = p.parse_args(argv)
125
126  opts.verbose -= opts.quiet
127  del opts.quiet
128
129  commits, files = interpret_args(opts.args, dash_dash, opts.commit)
130  if len(commits) > 1:
131    if not opts.diff:
132      die('--diff is required when two commits are given')
133  else:
134    if len(commits) > 2:
135      die('at most two commits allowed; %d given' % len(commits))
136  changed_lines = compute_diff_and_extract_lines(commits, files)
137  if opts.verbose >= 1:
138    ignored_files = set(changed_lines)
139  filter_by_extension(changed_lines, opts.extensions.lower().split(','))
140  if opts.verbose >= 1:
141    ignored_files.difference_update(changed_lines)
142    if ignored_files:
143      print('Ignoring changes in the following files (wrong extension):')
144      for filename in ignored_files:
145        print('    %s' % filename)
146    if changed_lines:
147      print('Running clang-format on the following files:')
148      for filename in changed_lines:
149        print('    %s' % filename)
150  if not changed_lines:
151    print('no modified files to format')
152    return
153  # The computed diff outputs absolute paths, so we must cd before accessing
154  # those files.
155  cd_to_toplevel()
156  if len(commits) > 1:
157    old_tree = commits[1]
158    new_tree = run_clang_format_and_save_to_tree(changed_lines,
159                                                 revision=commits[1],
160                                                 binary=opts.binary,
161                                                 style=opts.style)
162  else:
163    old_tree = create_tree_from_workdir(changed_lines)
164    new_tree = run_clang_format_and_save_to_tree(changed_lines,
165                                                 binary=opts.binary,
166                                                 style=opts.style)
167  if opts.verbose >= 1:
168    print('old tree: %s' % old_tree)
169    print('new tree: %s' % new_tree)
170  if old_tree == new_tree:
171    if opts.verbose >= 0:
172      print('clang-format did not modify any files')
173  elif opts.diff:
174    print_diff(old_tree, new_tree)
175  else:
176    changed_files = apply_changes(old_tree, new_tree, force=opts.force,
177                                  patch_mode=opts.patch)
178    if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
179      print('changed files:')
180      for filename in changed_files:
181        print('    %s' % filename)
182
183
184def load_git_config(non_string_options=None):
185  """Return the git configuration as a dictionary.
186
187  All options are assumed to be strings unless in `non_string_options`, in which
188  is a dictionary mapping option name (in lower case) to either "--bool" or
189  "--int"."""
190  if non_string_options is None:
191    non_string_options = {}
192  out = {}
193  for entry in run('git', 'config', '--list', '--null').split('\0'):
194    if entry:
195      if '\n' in entry:
196        name, value = entry.split('\n', 1)
197      else:
198        # A setting with no '=' ('\n' with --null) is implicitly 'true'
199        name = entry
200        value = 'true'
201      if name in non_string_options:
202        value = run('git', 'config', non_string_options[name], name)
203      out[name] = value
204  return out
205
206
207def interpret_args(args, dash_dash, default_commit):
208  """Interpret `args` as "[commits] [--] [files]" and return (commits, files).
209
210  It is assumed that "--" and everything that follows has been removed from
211  args and placed in `dash_dash`.
212
213  If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its
214  left (if present) are taken as commits.  Otherwise, the arguments are checked
215  from left to right if they are commits or files.  If commits are not given,
216  a list with `default_commit` is used."""
217  if dash_dash:
218    if len(args) == 0:
219      commits = [default_commit]
220    else:
221      commits = args
222    for commit in commits:
223      object_type = get_object_type(commit)
224      if object_type not in ('commit', 'tag'):
225        if object_type is None:
226          die("'%s' is not a commit" % commit)
227        else:
228          die("'%s' is a %s, but a commit was expected" % (commit, object_type))
229    files = dash_dash[1:]
230  elif args:
231    commits = []
232    while args:
233      if not disambiguate_revision(args[0]):
234        break
235      commits.append(args.pop(0))
236    if not commits:
237      commits = [default_commit]
238    files = args
239  else:
240    commits = [default_commit]
241    files = []
242  return commits, files
243
244
245def disambiguate_revision(value):
246  """Returns True if `value` is a revision, False if it is a file, or dies."""
247  # If `value` is ambiguous (neither a commit nor a file), the following
248  # command will die with an appropriate error message.
249  run('git', 'rev-parse', value, verbose=False)
250  object_type = get_object_type(value)
251  if object_type is None:
252    return False
253  if object_type in ('commit', 'tag'):
254    return True
255  die('`%s` is a %s, but a commit or filename was expected' %
256      (value, object_type))
257
258
259def get_object_type(value):
260  """Returns a string description of an object's type, or None if it is not
261  a valid git object."""
262  cmd = ['git', 'cat-file', '-t', value]
263  p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
264  stdout, stderr = p.communicate()
265  if p.returncode != 0:
266    return None
267  return convert_string(stdout.strip())
268
269
270def compute_diff_and_extract_lines(commits, files):
271  """Calls compute_diff() followed by extract_lines()."""
272  diff_process = compute_diff(commits, files)
273  changed_lines = extract_lines(diff_process.stdout)
274  diff_process.stdout.close()
275  diff_process.wait()
276  if diff_process.returncode != 0:
277    # Assume error was already printed to stderr.
278    sys.exit(2)
279  return changed_lines
280
281
282def compute_diff(commits, files):
283  """Return a subprocess object producing the diff from `commits`.
284
285  The return value's `stdin` file object will produce a patch with the
286  differences between the working directory and the first commit if a single
287  one was specified, or the difference between both specified commits, filtered
288  on `files` (if non-empty).  Zero context lines are used in the patch."""
289  git_tool = 'diff-index'
290  if len(commits) > 1:
291    git_tool = 'diff-tree'
292  cmd = ['git', git_tool, '-p', '-U0'] + commits + ['--']
293  cmd.extend(files)
294  p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
295  p.stdin.close()
296  return p
297
298
299def extract_lines(patch_file):
300  """Extract the changed lines in `patch_file`.
301
302  The return value is a dictionary mapping filename to a list of (start_line,
303  line_count) pairs.
304
305  The input must have been produced with ``-U0``, meaning unidiff format with
306  zero lines of context.  The return value is a dict mapping filename to a
307  list of line `Range`s."""
308  matches = {}
309  for line in patch_file:
310    line = convert_string(line)
311    match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
312    if match:
313      filename = match.group(1).rstrip('\r\n')
314    match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
315    if match:
316      start_line = int(match.group(1))
317      line_count = 1
318      if match.group(3):
319        line_count = int(match.group(3))
320      if line_count > 0:
321        matches.setdefault(filename, []).append(Range(start_line, line_count))
322  return matches
323
324
325def filter_by_extension(dictionary, allowed_extensions):
326  """Delete every key in `dictionary` that doesn't have an allowed extension.
327
328  `allowed_extensions` must be a collection of lowercase file extensions,
329  excluding the period."""
330  allowed_extensions = frozenset(allowed_extensions)
331  for filename in list(dictionary.keys()):
332    base_ext = filename.rsplit('.', 1)
333    if len(base_ext) == 1 and '' in allowed_extensions:
334        continue
335    if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
336      del dictionary[filename]
337
338
339def cd_to_toplevel():
340  """Change to the top level of the git repository."""
341  toplevel = run('git', 'rev-parse', '--show-toplevel')
342  os.chdir(toplevel)
343
344
345def create_tree_from_workdir(filenames):
346  """Create a new git tree with the given files from the working directory.
347
348  Returns the object ID (SHA-1) of the created tree."""
349  return create_tree(filenames, '--stdin')
350
351
352def run_clang_format_and_save_to_tree(changed_lines, revision=None,
353                                      binary='clang-format', style=None):
354  """Run clang-format on each file and save the result to a git tree.
355
356  Returns the object ID (SHA-1) of the created tree."""
357  def iteritems(container):
358      try:
359          return container.iteritems() # Python 2
360      except AttributeError:
361          return container.items() # Python 3
362  def index_info_generator():
363    for filename, line_ranges in iteritems(changed_lines):
364      if revision:
365        git_metadata_cmd = ['git', 'ls-tree',
366                            '%s:%s' % (revision, os.path.dirname(filename)),
367                            os.path.basename(filename)]
368        git_metadata = subprocess.Popen(git_metadata_cmd, stdin=subprocess.PIPE,
369                                        stdout=subprocess.PIPE)
370        stdout = git_metadata.communicate()[0]
371        mode = oct(int(stdout.split()[0], 8))
372      else:
373        mode = oct(os.stat(filename).st_mode)
374      # Adjust python3 octal format so that it matches what git expects
375      if mode.startswith('0o'):
376          mode = '0' + mode[2:]
377      blob_id = clang_format_to_blob(filename, line_ranges,
378                                     revision=revision,
379                                     binary=binary,
380                                     style=style)
381      yield '%s %s\t%s' % (mode, blob_id, filename)
382  return create_tree(index_info_generator(), '--index-info')
383
384
385def create_tree(input_lines, mode):
386  """Create a tree object from the given input.
387
388  If mode is '--stdin', it must be a list of filenames.  If mode is
389  '--index-info' is must be a list of values suitable for "git update-index
390  --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>".  Any other mode
391  is invalid."""
392  assert mode in ('--stdin', '--index-info')
393  cmd = ['git', 'update-index', '--add', '-z', mode]
394  with temporary_index_file():
395    p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
396    for line in input_lines:
397      p.stdin.write(to_bytes('%s\0' % line))
398    p.stdin.close()
399    if p.wait() != 0:
400      die('`%s` failed' % ' '.join(cmd))
401    tree_id = run('git', 'write-tree')
402    return tree_id
403
404
405def clang_format_to_blob(filename, line_ranges, revision=None,
406                         binary='clang-format', style=None):
407  """Run clang-format on the given file and save the result to a git blob.
408
409  Runs on the file in `revision` if not None, or on the file in the working
410  directory if `revision` is None.
411
412  Returns the object ID (SHA-1) of the created blob."""
413  clang_format_cmd = [binary]
414  if style:
415    clang_format_cmd.extend(['-style='+style])
416  clang_format_cmd.extend([
417      '-lines=%s:%s' % (start_line, start_line+line_count-1)
418      for start_line, line_count in line_ranges])
419  if revision:
420    clang_format_cmd.extend(['-assume-filename='+filename])
421    git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)]
422    git_show = subprocess.Popen(git_show_cmd, stdin=subprocess.PIPE,
423                                stdout=subprocess.PIPE)
424    git_show.stdin.close()
425    clang_format_stdin = git_show.stdout
426  else:
427    clang_format_cmd.extend([filename])
428    git_show = None
429    clang_format_stdin = subprocess.PIPE
430  try:
431    clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin,
432                                    stdout=subprocess.PIPE)
433    if clang_format_stdin == subprocess.PIPE:
434      clang_format_stdin = clang_format.stdin
435  except OSError as e:
436    if e.errno == errno.ENOENT:
437      die('cannot find executable "%s"' % binary)
438    else:
439      raise
440  clang_format_stdin.close()
441  hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
442  hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
443                                 stdout=subprocess.PIPE)
444  clang_format.stdout.close()
445  stdout = hash_object.communicate()[0]
446  if hash_object.returncode != 0:
447    die('`%s` failed' % ' '.join(hash_object_cmd))
448  if clang_format.wait() != 0:
449    die('`%s` failed' % ' '.join(clang_format_cmd))
450  if git_show and git_show.wait() != 0:
451    die('`%s` failed' % ' '.join(git_show_cmd))
452  return convert_string(stdout).rstrip('\r\n')
453
454
455@contextlib.contextmanager
456def temporary_index_file(tree=None):
457  """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
458  the file afterward."""
459  index_path = create_temporary_index(tree)
460  old_index_path = os.environ.get('GIT_INDEX_FILE')
461  os.environ['GIT_INDEX_FILE'] = index_path
462  try:
463    yield
464  finally:
465    if old_index_path is None:
466      del os.environ['GIT_INDEX_FILE']
467    else:
468      os.environ['GIT_INDEX_FILE'] = old_index_path
469    os.remove(index_path)
470
471
472def create_temporary_index(tree=None):
473  """Create a temporary index file and return the created file's path.
474
475  If `tree` is not None, use that as the tree to read in.  Otherwise, an
476  empty index is created."""
477  gitdir = run('git', 'rev-parse', '--git-dir')
478  path = os.path.join(gitdir, temp_index_basename)
479  if tree is None:
480    tree = '--empty'
481  run('git', 'read-tree', '--index-output='+path, tree)
482  return path
483
484
485def print_diff(old_tree, new_tree):
486  """Print the diff between the two trees to stdout."""
487  # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
488  # is expected to be viewed by the user, and only the former does nice things
489  # like color and pagination.
490  #
491  # We also only print modified files since `new_tree` only contains the files
492  # that were modified, so unmodified files would show as deleted without the
493  # filter.
494  subprocess.check_call(['git', 'diff', '--diff-filter=M', old_tree, new_tree,
495                         '--'])
496
497
498def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
499  """Apply the changes in `new_tree` to the working directory.
500
501  Bails if there are local changes in those files and not `force`.  If
502  `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
503  changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z',
504                      '--name-only', old_tree,
505                      new_tree).rstrip('\0').split('\0')
506  if not force:
507    unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
508    if unstaged_files:
509      print('The following files would be modified but '
510                'have unstaged changes:', file=sys.stderr)
511      print(unstaged_files, file=sys.stderr)
512      print('Please commit, stage, or stash them first.', file=sys.stderr)
513      sys.exit(2)
514  if patch_mode:
515    # In patch mode, we could just as well create an index from the new tree
516    # and checkout from that, but then the user will be presented with a
517    # message saying "Discard ... from worktree".  Instead, we use the old
518    # tree as the index and checkout from new_tree, which gives the slightly
519    # better message, "Apply ... to index and worktree".  This is not quite
520    # right, since it won't be applied to the user's index, but oh well.
521    with temporary_index_file(old_tree):
522      subprocess.check_call(['git', 'checkout', '--patch', new_tree])
523    index_tree = old_tree
524  else:
525    with temporary_index_file(new_tree):
526      run('git', 'checkout-index', '-a', '-f')
527  return changed_files
528
529
530def run(*args, **kwargs):
531  stdin = kwargs.pop('stdin', '')
532  verbose = kwargs.pop('verbose', True)
533  strip = kwargs.pop('strip', True)
534  for name in kwargs:
535    raise TypeError("run() got an unexpected keyword argument '%s'" % name)
536  p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
537                       stdin=subprocess.PIPE)
538  stdout, stderr = p.communicate(input=stdin)
539
540  stdout = convert_string(stdout)
541  stderr = convert_string(stderr)
542
543  if p.returncode == 0:
544    if stderr:
545      if verbose:
546        print('`%s` printed to stderr:' % ' '.join(args), file=sys.stderr)
547      print(stderr.rstrip(), file=sys.stderr)
548    if strip:
549      stdout = stdout.rstrip('\r\n')
550    return stdout
551  if verbose:
552    print('`%s` returned %s' % (' '.join(args), p.returncode), file=sys.stderr)
553  if stderr:
554    print(stderr.rstrip(), file=sys.stderr)
555  sys.exit(2)
556
557
558def die(message):
559  print('error:', message, file=sys.stderr)
560  sys.exit(2)
561
562
563def to_bytes(str_input):
564    # Encode to UTF-8 to get binary data.
565    if isinstance(str_input, bytes):
566        return str_input
567    return str_input.encode('utf-8')
568
569
570def to_string(bytes_input):
571    if isinstance(bytes_input, str):
572        return bytes_input
573    return bytes_input.encode('utf-8')
574
575
576def convert_string(bytes_input):
577    try:
578        return to_string(bytes_input.decode('utf-8'))
579    except AttributeError: # 'str' object has no attribute 'decode'.
580        return str(bytes_input)
581    except UnicodeError:
582        return str(bytes_input)
583
584if __name__ == '__main__':
585  main()
586