xref: /openbsd-src/gnu/llvm/clang/tools/clang-format/git-clang-format (revision 12c855180aad702bbcca06e0398d774beeafb155)
1*12c85518Srobert#!/usr/bin/env python3
2e5dd7070Spatrick#
3e5dd7070Spatrick#===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
4e5dd7070Spatrick#
5e5dd7070Spatrick# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6e5dd7070Spatrick# See https://llvm.org/LICENSE.txt for license information.
7e5dd7070Spatrick# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8e5dd7070Spatrick#
9e5dd7070Spatrick#===------------------------------------------------------------------------===#
10e5dd7070Spatrick
11e5dd7070Spatrickr"""
12e5dd7070Spatrickclang-format git integration
13e5dd7070Spatrick============================
14e5dd7070Spatrick
15e5dd7070SpatrickThis file provides a clang-format integration for git. Put it somewhere in your
16e5dd7070Spatrickpath and ensure that it is executable. Then, "git clang-format" will invoke
17e5dd7070Spatrickclang-format on the changes in current files or a specific commit.
18e5dd7070Spatrick
19e5dd7070SpatrickFor further details, run:
20e5dd7070Spatrickgit clang-format -h
21e5dd7070Spatrick
22e5dd7070SpatrickRequires Python 2.7 or Python 3
23e5dd7070Spatrick"""
24e5dd7070Spatrick
25e5dd7070Spatrickfrom __future__ import absolute_import, division, print_function
26e5dd7070Spatrickimport argparse
27e5dd7070Spatrickimport collections
28e5dd7070Spatrickimport contextlib
29e5dd7070Spatrickimport errno
30e5dd7070Spatrickimport os
31e5dd7070Spatrickimport re
32e5dd7070Spatrickimport subprocess
33e5dd7070Spatrickimport sys
34e5dd7070Spatrick
35*12c85518Srobertusage = ('git clang-format [OPTIONS] [<commit>] [<commit>|--staged] '
36*12c85518Srobert         '[--] [<file>...]')
37e5dd7070Spatrick
38e5dd7070Spatrickdesc = '''
39e5dd7070SpatrickIf zero or one commits are given, run clang-format on all lines that differ
40e5dd7070Spatrickbetween the working directory and <commit>, which defaults to HEAD.  Changes are
41*12c85518Srobertonly applied to the working directory, or in the stage/index.
42*12c85518Srobert
43*12c85518SrobertExamples:
44*12c85518Srobert  To format staged changes, i.e everything that's been `git add`ed:
45*12c85518Srobert    git clang-format
46*12c85518Srobert
47*12c85518Srobert  To also format everything touched in the most recent commit:
48*12c85518Srobert    git clang-format HEAD~1
49*12c85518Srobert
50*12c85518Srobert  If you're on a branch off main, to format everything touched on your branch:
51*12c85518Srobert    git clang-format main
52e5dd7070Spatrick
53e5dd7070SpatrickIf two commits are given (requires --diff), run clang-format on all lines in the
54e5dd7070Spatricksecond <commit> that differ from the first <commit>.
55e5dd7070Spatrick
56e5dd7070SpatrickThe following git-config settings set the default of the corresponding option:
57e5dd7070Spatrick  clangFormat.binary
58e5dd7070Spatrick  clangFormat.commit
59ec727ea7Spatrick  clangFormat.extensions
60e5dd7070Spatrick  clangFormat.style
61e5dd7070Spatrick'''
62e5dd7070Spatrick
63e5dd7070Spatrick# Name of the temporary index file in which save the output of clang-format.
64e5dd7070Spatrick# This file is created within the .git directory.
65e5dd7070Spatricktemp_index_basename = 'clang-format-index'
66e5dd7070Spatrick
67e5dd7070Spatrick
68e5dd7070SpatrickRange = collections.namedtuple('Range', 'start, count')
69e5dd7070Spatrick
70e5dd7070Spatrick
71e5dd7070Spatrickdef main():
72e5dd7070Spatrick  config = load_git_config()
73e5dd7070Spatrick
74e5dd7070Spatrick  # In order to keep '--' yet allow options after positionals, we need to
75e5dd7070Spatrick  # check for '--' ourselves.  (Setting nargs='*' throws away the '--', while
76e5dd7070Spatrick  # nargs=argparse.REMAINDER disallows options after positionals.)
77e5dd7070Spatrick  argv = sys.argv[1:]
78e5dd7070Spatrick  try:
79e5dd7070Spatrick    idx = argv.index('--')
80e5dd7070Spatrick  except ValueError:
81e5dd7070Spatrick    dash_dash = []
82e5dd7070Spatrick  else:
83e5dd7070Spatrick    dash_dash = argv[idx:]
84e5dd7070Spatrick    argv = argv[:idx]
85e5dd7070Spatrick
86e5dd7070Spatrick  default_extensions = ','.join([
87e5dd7070Spatrick      # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
88e5dd7070Spatrick      'c', 'h',  # C
89e5dd7070Spatrick      'm',  # ObjC
90e5dd7070Spatrick      'mm',  # ObjC++
91*12c85518Srobert      'cc', 'cp', 'cpp', 'c++', 'cxx', 'hh', 'hpp', 'hxx', 'inc',  # C++
92*12c85518Srobert      'ccm', 'cppm', 'cxxm', 'c++m',  # C++ Modules
93a9ac8606Spatrick      'cu', 'cuh',  # CUDA
94e5dd7070Spatrick      # Other languages that clang-format supports
95e5dd7070Spatrick      'proto', 'protodevel',  # Protocol Buffers
96e5dd7070Spatrick      'java',  # Java
97e5dd7070Spatrick      'js',  # JavaScript
98e5dd7070Spatrick      'ts',  # TypeScript
99e5dd7070Spatrick      'cs',  # C Sharp
100a9ac8606Spatrick      'json',  # Json
101e5dd7070Spatrick      ])
102e5dd7070Spatrick
103e5dd7070Spatrick  p = argparse.ArgumentParser(
104e5dd7070Spatrick    usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
105e5dd7070Spatrick    description=desc)
106e5dd7070Spatrick  p.add_argument('--binary',
107e5dd7070Spatrick                 default=config.get('clangformat.binary', 'clang-format'),
108e5dd7070Spatrick                 help='path to clang-format'),
109e5dd7070Spatrick  p.add_argument('--commit',
110e5dd7070Spatrick                 default=config.get('clangformat.commit', 'HEAD'),
111e5dd7070Spatrick                 help='default commit to use if none is specified'),
112e5dd7070Spatrick  p.add_argument('--diff', action='store_true',
113e5dd7070Spatrick                 help='print a diff instead of applying the changes')
114*12c85518Srobert  p.add_argument('--diffstat', action='store_true',
115*12c85518Srobert                 help='print a diffstat instead of applying the changes')
116e5dd7070Spatrick  p.add_argument('--extensions',
117e5dd7070Spatrick                 default=config.get('clangformat.extensions',
118e5dd7070Spatrick                                    default_extensions),
119e5dd7070Spatrick                 help=('comma-separated list of file extensions to format, '
120e5dd7070Spatrick                       'excluding the period and case-insensitive')),
121e5dd7070Spatrick  p.add_argument('-f', '--force', action='store_true',
122e5dd7070Spatrick                 help='allow changes to unstaged files')
123e5dd7070Spatrick  p.add_argument('-p', '--patch', action='store_true',
124e5dd7070Spatrick                 help='select hunks interactively')
125e5dd7070Spatrick  p.add_argument('-q', '--quiet', action='count', default=0,
126e5dd7070Spatrick                 help='print less information')
127*12c85518Srobert  p.add_argument('--staged', '--cached', action='store_true',
128*12c85518Srobert                 help='format lines in the stage instead of the working dir')
129e5dd7070Spatrick  p.add_argument('--style',
130e5dd7070Spatrick                 default=config.get('clangformat.style', None),
131e5dd7070Spatrick                 help='passed to clang-format'),
132e5dd7070Spatrick  p.add_argument('-v', '--verbose', action='count', default=0,
133e5dd7070Spatrick                 help='print extra information')
134e5dd7070Spatrick  # We gather all the remaining positional arguments into 'args' since we need
135e5dd7070Spatrick  # to use some heuristics to determine whether or not <commit> was present.
136e5dd7070Spatrick  # However, to print pretty messages, we make use of metavar and help.
137e5dd7070Spatrick  p.add_argument('args', nargs='*', metavar='<commit>',
138e5dd7070Spatrick                 help='revision from which to compute the diff')
139e5dd7070Spatrick  p.add_argument('ignored', nargs='*', metavar='<file>...',
140e5dd7070Spatrick                 help='if specified, only consider differences in these files')
141e5dd7070Spatrick  opts = p.parse_args(argv)
142e5dd7070Spatrick
143e5dd7070Spatrick  opts.verbose -= opts.quiet
144e5dd7070Spatrick  del opts.quiet
145e5dd7070Spatrick
146e5dd7070Spatrick  commits, files = interpret_args(opts.args, dash_dash, opts.commit)
147e5dd7070Spatrick  if len(commits) > 1:
148*12c85518Srobert    if opts.staged:
149*12c85518Srobert      die('--staged is not allowed when two commits are given')
150e5dd7070Spatrick    if not opts.diff:
151e5dd7070Spatrick      die('--diff is required when two commits are given')
152e5dd7070Spatrick  else:
153e5dd7070Spatrick    if len(commits) > 2:
154e5dd7070Spatrick      die('at most two commits allowed; %d given' % len(commits))
155*12c85518Srobert  changed_lines = compute_diff_and_extract_lines(commits, files, opts.staged)
156e5dd7070Spatrick  if opts.verbose >= 1:
157e5dd7070Spatrick    ignored_files = set(changed_lines)
158e5dd7070Spatrick  filter_by_extension(changed_lines, opts.extensions.lower().split(','))
159a9ac8606Spatrick  # The computed diff outputs absolute paths, so we must cd before accessing
160a9ac8606Spatrick  # those files.
161a9ac8606Spatrick  cd_to_toplevel()
162a9ac8606Spatrick  filter_symlinks(changed_lines)
163e5dd7070Spatrick  if opts.verbose >= 1:
164e5dd7070Spatrick    ignored_files.difference_update(changed_lines)
165e5dd7070Spatrick    if ignored_files:
166a9ac8606Spatrick      print(
167a9ac8606Spatrick        'Ignoring changes in the following files (wrong extension or symlink):')
168e5dd7070Spatrick      for filename in ignored_files:
169e5dd7070Spatrick        print('    %s' % filename)
170e5dd7070Spatrick    if changed_lines:
171e5dd7070Spatrick      print('Running clang-format on the following files:')
172e5dd7070Spatrick      for filename in changed_lines:
173e5dd7070Spatrick        print('    %s' % filename)
174*12c85518Srobert
175e5dd7070Spatrick  if not changed_lines:
176a9ac8606Spatrick    if opts.verbose >= 0:
177e5dd7070Spatrick      print('no modified files to format')
178*12c85518Srobert    return 0
179*12c85518Srobert
180e5dd7070Spatrick  if len(commits) > 1:
181e5dd7070Spatrick    old_tree = commits[1]
182*12c85518Srobert    revision = old_tree
183*12c85518Srobert  elif opts.staged:
184*12c85518Srobert    old_tree = create_tree_from_index(changed_lines)
185*12c85518Srobert    revision = ''
186e5dd7070Spatrick  else:
187e5dd7070Spatrick    old_tree = create_tree_from_workdir(changed_lines)
188*12c85518Srobert    revision = None
189e5dd7070Spatrick  new_tree = run_clang_format_and_save_to_tree(changed_lines,
190*12c85518Srobert                                               revision,
191e5dd7070Spatrick                                               binary=opts.binary,
192e5dd7070Spatrick                                               style=opts.style)
193e5dd7070Spatrick  if opts.verbose >= 1:
194e5dd7070Spatrick    print('old tree: %s' % old_tree)
195e5dd7070Spatrick    print('new tree: %s' % new_tree)
196*12c85518Srobert
197e5dd7070Spatrick  if old_tree == new_tree:
198e5dd7070Spatrick    if opts.verbose >= 0:
199e5dd7070Spatrick      print('clang-format did not modify any files')
200*12c85518Srobert    return 0
201*12c85518Srobert
202*12c85518Srobert  if opts.diff:
203*12c85518Srobert    return print_diff(old_tree, new_tree)
204*12c85518Srobert  if opts.diffstat:
205*12c85518Srobert    return print_diffstat(old_tree, new_tree)
206*12c85518Srobert
207e5dd7070Spatrick  changed_files = apply_changes(old_tree, new_tree, force=opts.force,
208e5dd7070Spatrick                                patch_mode=opts.patch)
209e5dd7070Spatrick  if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
210e5dd7070Spatrick    print('changed files:')
211e5dd7070Spatrick    for filename in changed_files:
212e5dd7070Spatrick      print('    %s' % filename)
213e5dd7070Spatrick
214*12c85518Srobert  return 1
215*12c85518Srobert
216e5dd7070Spatrick
217e5dd7070Spatrickdef load_git_config(non_string_options=None):
218e5dd7070Spatrick  """Return the git configuration as a dictionary.
219e5dd7070Spatrick
220e5dd7070Spatrick  All options are assumed to be strings unless in `non_string_options`, in which
221e5dd7070Spatrick  is a dictionary mapping option name (in lower case) to either "--bool" or
222e5dd7070Spatrick  "--int"."""
223e5dd7070Spatrick  if non_string_options is None:
224e5dd7070Spatrick    non_string_options = {}
225e5dd7070Spatrick  out = {}
226e5dd7070Spatrick  for entry in run('git', 'config', '--list', '--null').split('\0'):
227e5dd7070Spatrick    if entry:
228ec727ea7Spatrick      if '\n' in entry:
229e5dd7070Spatrick        name, value = entry.split('\n', 1)
230ec727ea7Spatrick      else:
231ec727ea7Spatrick        # A setting with no '=' ('\n' with --null) is implicitly 'true'
232ec727ea7Spatrick        name = entry
233ec727ea7Spatrick        value = 'true'
234e5dd7070Spatrick      if name in non_string_options:
235e5dd7070Spatrick        value = run('git', 'config', non_string_options[name], name)
236e5dd7070Spatrick      out[name] = value
237e5dd7070Spatrick  return out
238e5dd7070Spatrick
239e5dd7070Spatrick
240e5dd7070Spatrickdef interpret_args(args, dash_dash, default_commit):
241e5dd7070Spatrick  """Interpret `args` as "[commits] [--] [files]" and return (commits, files).
242e5dd7070Spatrick
243e5dd7070Spatrick  It is assumed that "--" and everything that follows has been removed from
244e5dd7070Spatrick  args and placed in `dash_dash`.
245e5dd7070Spatrick
246e5dd7070Spatrick  If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its
247e5dd7070Spatrick  left (if present) are taken as commits.  Otherwise, the arguments are checked
248e5dd7070Spatrick  from left to right if they are commits or files.  If commits are not given,
249e5dd7070Spatrick  a list with `default_commit` is used."""
250e5dd7070Spatrick  if dash_dash:
251e5dd7070Spatrick    if len(args) == 0:
252e5dd7070Spatrick      commits = [default_commit]
253e5dd7070Spatrick    else:
254e5dd7070Spatrick      commits = args
255e5dd7070Spatrick    for commit in commits:
256e5dd7070Spatrick      object_type = get_object_type(commit)
257e5dd7070Spatrick      if object_type not in ('commit', 'tag'):
258e5dd7070Spatrick        if object_type is None:
259e5dd7070Spatrick          die("'%s' is not a commit" % commit)
260e5dd7070Spatrick        else:
261e5dd7070Spatrick          die("'%s' is a %s, but a commit was expected" % (commit, object_type))
262e5dd7070Spatrick    files = dash_dash[1:]
263e5dd7070Spatrick  elif args:
264e5dd7070Spatrick    commits = []
265e5dd7070Spatrick    while args:
266e5dd7070Spatrick      if not disambiguate_revision(args[0]):
267e5dd7070Spatrick        break
268e5dd7070Spatrick      commits.append(args.pop(0))
269e5dd7070Spatrick    if not commits:
270e5dd7070Spatrick      commits = [default_commit]
271e5dd7070Spatrick    files = args
272e5dd7070Spatrick  else:
273e5dd7070Spatrick    commits = [default_commit]
274e5dd7070Spatrick    files = []
275e5dd7070Spatrick  return commits, files
276e5dd7070Spatrick
277e5dd7070Spatrick
278e5dd7070Spatrickdef disambiguate_revision(value):
279e5dd7070Spatrick  """Returns True if `value` is a revision, False if it is a file, or dies."""
280e5dd7070Spatrick  # If `value` is ambiguous (neither a commit nor a file), the following
281e5dd7070Spatrick  # command will die with an appropriate error message.
282e5dd7070Spatrick  run('git', 'rev-parse', value, verbose=False)
283e5dd7070Spatrick  object_type = get_object_type(value)
284e5dd7070Spatrick  if object_type is None:
285e5dd7070Spatrick    return False
286e5dd7070Spatrick  if object_type in ('commit', 'tag'):
287e5dd7070Spatrick    return True
288e5dd7070Spatrick  die('`%s` is a %s, but a commit or filename was expected' %
289e5dd7070Spatrick      (value, object_type))
290e5dd7070Spatrick
291e5dd7070Spatrick
292e5dd7070Spatrickdef get_object_type(value):
293e5dd7070Spatrick  """Returns a string description of an object's type, or None if it is not
294e5dd7070Spatrick  a valid git object."""
295e5dd7070Spatrick  cmd = ['git', 'cat-file', '-t', value]
296e5dd7070Spatrick  p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
297e5dd7070Spatrick  stdout, stderr = p.communicate()
298e5dd7070Spatrick  if p.returncode != 0:
299e5dd7070Spatrick    return None
300e5dd7070Spatrick  return convert_string(stdout.strip())
301e5dd7070Spatrick
302e5dd7070Spatrick
303*12c85518Srobertdef compute_diff_and_extract_lines(commits, files, staged):
304e5dd7070Spatrick  """Calls compute_diff() followed by extract_lines()."""
305*12c85518Srobert  diff_process = compute_diff(commits, files, staged)
306e5dd7070Spatrick  changed_lines = extract_lines(diff_process.stdout)
307e5dd7070Spatrick  diff_process.stdout.close()
308e5dd7070Spatrick  diff_process.wait()
309e5dd7070Spatrick  if diff_process.returncode != 0:
310e5dd7070Spatrick    # Assume error was already printed to stderr.
311e5dd7070Spatrick    sys.exit(2)
312e5dd7070Spatrick  return changed_lines
313e5dd7070Spatrick
314e5dd7070Spatrick
315*12c85518Srobertdef compute_diff(commits, files, staged):
316e5dd7070Spatrick  """Return a subprocess object producing the diff from `commits`.
317e5dd7070Spatrick
318e5dd7070Spatrick  The return value's `stdin` file object will produce a patch with the
319*12c85518Srobert  differences between the working directory (or stage if --staged is used) and
320*12c85518Srobert  the first commit if a single one was specified, or the difference between
321*12c85518Srobert  both specified commits, filtered on `files` (if non-empty).
322*12c85518Srobert  Zero context lines are used in the patch."""
323e5dd7070Spatrick  git_tool = 'diff-index'
324*12c85518Srobert  extra_args = []
325e5dd7070Spatrick  if len(commits) > 1:
326e5dd7070Spatrick    git_tool = 'diff-tree'
327*12c85518Srobert  elif staged:
328*12c85518Srobert    extra_args += ['--cached']
329*12c85518Srobert  cmd = ['git', git_tool, '-p', '-U0'] + extra_args + commits + ['--']
330e5dd7070Spatrick  cmd.extend(files)
331e5dd7070Spatrick  p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
332e5dd7070Spatrick  p.stdin.close()
333e5dd7070Spatrick  return p
334e5dd7070Spatrick
335e5dd7070Spatrick
336e5dd7070Spatrickdef extract_lines(patch_file):
337e5dd7070Spatrick  """Extract the changed lines in `patch_file`.
338e5dd7070Spatrick
339e5dd7070Spatrick  The return value is a dictionary mapping filename to a list of (start_line,
340e5dd7070Spatrick  line_count) pairs.
341e5dd7070Spatrick
342e5dd7070Spatrick  The input must have been produced with ``-U0``, meaning unidiff format with
343e5dd7070Spatrick  zero lines of context.  The return value is a dict mapping filename to a
344e5dd7070Spatrick  list of line `Range`s."""
345e5dd7070Spatrick  matches = {}
346e5dd7070Spatrick  for line in patch_file:
347e5dd7070Spatrick    line = convert_string(line)
348e5dd7070Spatrick    match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
349e5dd7070Spatrick    if match:
350e5dd7070Spatrick      filename = match.group(1).rstrip('\r\n')
351e5dd7070Spatrick    match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
352e5dd7070Spatrick    if match:
353e5dd7070Spatrick      start_line = int(match.group(1))
354e5dd7070Spatrick      line_count = 1
355e5dd7070Spatrick      if match.group(3):
356e5dd7070Spatrick        line_count = int(match.group(3))
357*12c85518Srobert      if line_count == 0:
358*12c85518Srobert        line_count = 1
359*12c85518Srobert      if start_line == 0:
360*12c85518Srobert        continue
361e5dd7070Spatrick      matches.setdefault(filename, []).append(Range(start_line, line_count))
362e5dd7070Spatrick  return matches
363e5dd7070Spatrick
364e5dd7070Spatrick
365e5dd7070Spatrickdef filter_by_extension(dictionary, allowed_extensions):
366e5dd7070Spatrick  """Delete every key in `dictionary` that doesn't have an allowed extension.
367e5dd7070Spatrick
368e5dd7070Spatrick  `allowed_extensions` must be a collection of lowercase file extensions,
369e5dd7070Spatrick  excluding the period."""
370e5dd7070Spatrick  allowed_extensions = frozenset(allowed_extensions)
371e5dd7070Spatrick  for filename in list(dictionary.keys()):
372e5dd7070Spatrick    base_ext = filename.rsplit('.', 1)
373e5dd7070Spatrick    if len(base_ext) == 1 and '' in allowed_extensions:
374e5dd7070Spatrick        continue
375e5dd7070Spatrick    if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
376e5dd7070Spatrick      del dictionary[filename]
377e5dd7070Spatrick
378e5dd7070Spatrick
379a9ac8606Spatrickdef filter_symlinks(dictionary):
380a9ac8606Spatrick  """Delete every key in `dictionary` that is a symlink."""
381a9ac8606Spatrick  for filename in list(dictionary.keys()):
382a9ac8606Spatrick    if os.path.islink(filename):
383a9ac8606Spatrick      del dictionary[filename]
384a9ac8606Spatrick
385a9ac8606Spatrick
386e5dd7070Spatrickdef cd_to_toplevel():
387e5dd7070Spatrick  """Change to the top level of the git repository."""
388e5dd7070Spatrick  toplevel = run('git', 'rev-parse', '--show-toplevel')
389e5dd7070Spatrick  os.chdir(toplevel)
390e5dd7070Spatrick
391e5dd7070Spatrick
392e5dd7070Spatrickdef create_tree_from_workdir(filenames):
393e5dd7070Spatrick  """Create a new git tree with the given files from the working directory.
394e5dd7070Spatrick
395e5dd7070Spatrick  Returns the object ID (SHA-1) of the created tree."""
396e5dd7070Spatrick  return create_tree(filenames, '--stdin')
397e5dd7070Spatrick
398e5dd7070Spatrick
399*12c85518Srobertdef create_tree_from_index(filenames):
400*12c85518Srobert  # Copy the environment, because the files have to be read from the original
401*12c85518Srobert  # index.
402*12c85518Srobert  env = os.environ.copy()
403*12c85518Srobert  def index_contents_generator():
404*12c85518Srobert    for filename in filenames:
405*12c85518Srobert      git_ls_files_cmd = ['git', 'ls-files', '--stage', '-z', '--', filename]
406*12c85518Srobert      git_ls_files = subprocess.Popen(git_ls_files_cmd, env=env,
407*12c85518Srobert                                      stdin=subprocess.PIPE,
408*12c85518Srobert                                      stdout=subprocess.PIPE)
409*12c85518Srobert      stdout = git_ls_files.communicate()[0]
410*12c85518Srobert      yield convert_string(stdout.split(b'\0')[0])
411*12c85518Srobert  return create_tree(index_contents_generator(), '--index-info')
412*12c85518Srobert
413*12c85518Srobert
414e5dd7070Spatrickdef run_clang_format_and_save_to_tree(changed_lines, revision=None,
415e5dd7070Spatrick                                      binary='clang-format', style=None):
416e5dd7070Spatrick  """Run clang-format on each file and save the result to a git tree.
417e5dd7070Spatrick
418e5dd7070Spatrick  Returns the object ID (SHA-1) of the created tree."""
419*12c85518Srobert  # Copy the environment when formatting the files in the index, because the
420*12c85518Srobert  # files have to be read from the original index.
421*12c85518Srobert  env = os.environ.copy() if revision == '' else None
422e5dd7070Spatrick  def iteritems(container):
423e5dd7070Spatrick      try:
424e5dd7070Spatrick          return container.iteritems() # Python 2
425e5dd7070Spatrick      except AttributeError:
426e5dd7070Spatrick          return container.items() # Python 3
427e5dd7070Spatrick  def index_info_generator():
428e5dd7070Spatrick    for filename, line_ranges in iteritems(changed_lines):
429*12c85518Srobert      if revision is not None:
430*12c85518Srobert        if len(revision) > 0:
431e5dd7070Spatrick          git_metadata_cmd = ['git', 'ls-tree',
432e5dd7070Spatrick                              '%s:%s' % (revision, os.path.dirname(filename)),
433e5dd7070Spatrick                              os.path.basename(filename)]
434*12c85518Srobert        else:
435*12c85518Srobert          git_metadata_cmd = ['git', 'ls-files', '--stage', '--', filename]
436*12c85518Srobert        git_metadata = subprocess.Popen(git_metadata_cmd, env=env,
437*12c85518Srobert                                        stdin=subprocess.PIPE,
438e5dd7070Spatrick                                        stdout=subprocess.PIPE)
439e5dd7070Spatrick        stdout = git_metadata.communicate()[0]
440e5dd7070Spatrick        mode = oct(int(stdout.split()[0], 8))
441e5dd7070Spatrick      else:
442e5dd7070Spatrick        mode = oct(os.stat(filename).st_mode)
443e5dd7070Spatrick      # Adjust python3 octal format so that it matches what git expects
444e5dd7070Spatrick      if mode.startswith('0o'):
445e5dd7070Spatrick          mode = '0' + mode[2:]
446e5dd7070Spatrick      blob_id = clang_format_to_blob(filename, line_ranges,
447e5dd7070Spatrick                                     revision=revision,
448e5dd7070Spatrick                                     binary=binary,
449*12c85518Srobert                                     style=style,
450*12c85518Srobert                                     env=env)
451e5dd7070Spatrick      yield '%s %s\t%s' % (mode, blob_id, filename)
452e5dd7070Spatrick  return create_tree(index_info_generator(), '--index-info')
453e5dd7070Spatrick
454e5dd7070Spatrick
455e5dd7070Spatrickdef create_tree(input_lines, mode):
456e5dd7070Spatrick  """Create a tree object from the given input.
457e5dd7070Spatrick
458e5dd7070Spatrick  If mode is '--stdin', it must be a list of filenames.  If mode is
459e5dd7070Spatrick  '--index-info' is must be a list of values suitable for "git update-index
460e5dd7070Spatrick  --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>".  Any other mode
461e5dd7070Spatrick  is invalid."""
462e5dd7070Spatrick  assert mode in ('--stdin', '--index-info')
463e5dd7070Spatrick  cmd = ['git', 'update-index', '--add', '-z', mode]
464e5dd7070Spatrick  with temporary_index_file():
465e5dd7070Spatrick    p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
466e5dd7070Spatrick    for line in input_lines:
467e5dd7070Spatrick      p.stdin.write(to_bytes('%s\0' % line))
468e5dd7070Spatrick    p.stdin.close()
469e5dd7070Spatrick    if p.wait() != 0:
470e5dd7070Spatrick      die('`%s` failed' % ' '.join(cmd))
471e5dd7070Spatrick    tree_id = run('git', 'write-tree')
472e5dd7070Spatrick    return tree_id
473e5dd7070Spatrick
474e5dd7070Spatrick
475e5dd7070Spatrickdef clang_format_to_blob(filename, line_ranges, revision=None,
476*12c85518Srobert                         binary='clang-format', style=None, env=None):
477e5dd7070Spatrick  """Run clang-format on the given file and save the result to a git blob.
478e5dd7070Spatrick
479e5dd7070Spatrick  Runs on the file in `revision` if not None, or on the file in the working
480*12c85518Srobert  directory if `revision` is None. Revision can be set to an empty string to run
481*12c85518Srobert  clang-format on the file in the index.
482e5dd7070Spatrick
483e5dd7070Spatrick  Returns the object ID (SHA-1) of the created blob."""
484e5dd7070Spatrick  clang_format_cmd = [binary]
485e5dd7070Spatrick  if style:
486e5dd7070Spatrick    clang_format_cmd.extend(['-style='+style])
487e5dd7070Spatrick  clang_format_cmd.extend([
488e5dd7070Spatrick      '-lines=%s:%s' % (start_line, start_line+line_count-1)
489e5dd7070Spatrick      for start_line, line_count in line_ranges])
490*12c85518Srobert  if revision is not None:
491e5dd7070Spatrick    clang_format_cmd.extend(['-assume-filename='+filename])
492e5dd7070Spatrick    git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)]
493*12c85518Srobert    git_show = subprocess.Popen(git_show_cmd, env=env, stdin=subprocess.PIPE,
494e5dd7070Spatrick                                stdout=subprocess.PIPE)
495e5dd7070Spatrick    git_show.stdin.close()
496e5dd7070Spatrick    clang_format_stdin = git_show.stdout
497e5dd7070Spatrick  else:
498e5dd7070Spatrick    clang_format_cmd.extend([filename])
499e5dd7070Spatrick    git_show = None
500e5dd7070Spatrick    clang_format_stdin = subprocess.PIPE
501e5dd7070Spatrick  try:
502e5dd7070Spatrick    clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin,
503e5dd7070Spatrick                                    stdout=subprocess.PIPE)
504e5dd7070Spatrick    if clang_format_stdin == subprocess.PIPE:
505e5dd7070Spatrick      clang_format_stdin = clang_format.stdin
506e5dd7070Spatrick  except OSError as e:
507e5dd7070Spatrick    if e.errno == errno.ENOENT:
508e5dd7070Spatrick      die('cannot find executable "%s"' % binary)
509e5dd7070Spatrick    else:
510e5dd7070Spatrick      raise
511e5dd7070Spatrick  clang_format_stdin.close()
512e5dd7070Spatrick  hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
513e5dd7070Spatrick  hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
514e5dd7070Spatrick                                 stdout=subprocess.PIPE)
515e5dd7070Spatrick  clang_format.stdout.close()
516e5dd7070Spatrick  stdout = hash_object.communicate()[0]
517e5dd7070Spatrick  if hash_object.returncode != 0:
518e5dd7070Spatrick    die('`%s` failed' % ' '.join(hash_object_cmd))
519e5dd7070Spatrick  if clang_format.wait() != 0:
520e5dd7070Spatrick    die('`%s` failed' % ' '.join(clang_format_cmd))
521e5dd7070Spatrick  if git_show and git_show.wait() != 0:
522e5dd7070Spatrick    die('`%s` failed' % ' '.join(git_show_cmd))
523e5dd7070Spatrick  return convert_string(stdout).rstrip('\r\n')
524e5dd7070Spatrick
525e5dd7070Spatrick
526e5dd7070Spatrick@contextlib.contextmanager
527e5dd7070Spatrickdef temporary_index_file(tree=None):
528e5dd7070Spatrick  """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
529e5dd7070Spatrick  the file afterward."""
530e5dd7070Spatrick  index_path = create_temporary_index(tree)
531e5dd7070Spatrick  old_index_path = os.environ.get('GIT_INDEX_FILE')
532e5dd7070Spatrick  os.environ['GIT_INDEX_FILE'] = index_path
533e5dd7070Spatrick  try:
534e5dd7070Spatrick    yield
535e5dd7070Spatrick  finally:
536e5dd7070Spatrick    if old_index_path is None:
537e5dd7070Spatrick      del os.environ['GIT_INDEX_FILE']
538e5dd7070Spatrick    else:
539e5dd7070Spatrick      os.environ['GIT_INDEX_FILE'] = old_index_path
540e5dd7070Spatrick    os.remove(index_path)
541e5dd7070Spatrick
542e5dd7070Spatrick
543e5dd7070Spatrickdef create_temporary_index(tree=None):
544e5dd7070Spatrick  """Create a temporary index file and return the created file's path.
545e5dd7070Spatrick
546e5dd7070Spatrick  If `tree` is not None, use that as the tree to read in.  Otherwise, an
547e5dd7070Spatrick  empty index is created."""
548e5dd7070Spatrick  gitdir = run('git', 'rev-parse', '--git-dir')
549e5dd7070Spatrick  path = os.path.join(gitdir, temp_index_basename)
550e5dd7070Spatrick  if tree is None:
551e5dd7070Spatrick    tree = '--empty'
552e5dd7070Spatrick  run('git', 'read-tree', '--index-output='+path, tree)
553e5dd7070Spatrick  return path
554e5dd7070Spatrick
555e5dd7070Spatrick
556e5dd7070Spatrickdef print_diff(old_tree, new_tree):
557e5dd7070Spatrick  """Print the diff between the two trees to stdout."""
558e5dd7070Spatrick  # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
559e5dd7070Spatrick  # is expected to be viewed by the user, and only the former does nice things
560e5dd7070Spatrick  # like color and pagination.
561e5dd7070Spatrick  #
562e5dd7070Spatrick  # We also only print modified files since `new_tree` only contains the files
563e5dd7070Spatrick  # that were modified, so unmodified files would show as deleted without the
564e5dd7070Spatrick  # filter.
565*12c85518Srobert  return subprocess.run(['git', 'diff', '--diff-filter=M',
566*12c85518Srobert                         '--exit-code', old_tree, new_tree]).returncode
567e5dd7070Spatrick
568*12c85518Srobertdef print_diffstat(old_tree, new_tree):
569*12c85518Srobert  """Print the diffstat between the two trees to stdout."""
570*12c85518Srobert  # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
571*12c85518Srobert  # is expected to be viewed by the user, and only the former does nice things
572*12c85518Srobert  # like color and pagination.
573*12c85518Srobert  #
574*12c85518Srobert  # We also only print modified files since `new_tree` only contains the files
575*12c85518Srobert  # that were modified, so unmodified files would show as deleted without the
576*12c85518Srobert  # filter.
577*12c85518Srobert  return subprocess.run(['git', 'diff', '--diff-filter=M', '--exit-code',
578*12c85518Srobert                         '--stat', old_tree, new_tree]).returncode
579e5dd7070Spatrick
580e5dd7070Spatrickdef apply_changes(old_tree, new_tree, force=False, patch_mode=False):
581e5dd7070Spatrick  """Apply the changes in `new_tree` to the working directory.
582e5dd7070Spatrick
583e5dd7070Spatrick  Bails if there are local changes in those files and not `force`.  If
584e5dd7070Spatrick  `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
585e5dd7070Spatrick  changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z',
586e5dd7070Spatrick                      '--name-only', old_tree,
587e5dd7070Spatrick                      new_tree).rstrip('\0').split('\0')
588e5dd7070Spatrick  if not force:
589e5dd7070Spatrick    unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
590e5dd7070Spatrick    if unstaged_files:
591e5dd7070Spatrick      print('The following files would be modified but '
592e5dd7070Spatrick                'have unstaged changes:', file=sys.stderr)
593e5dd7070Spatrick      print(unstaged_files, file=sys.stderr)
594e5dd7070Spatrick      print('Please commit, stage, or stash them first.', file=sys.stderr)
595e5dd7070Spatrick      sys.exit(2)
596e5dd7070Spatrick  if patch_mode:
597e5dd7070Spatrick    # In patch mode, we could just as well create an index from the new tree
598e5dd7070Spatrick    # and checkout from that, but then the user will be presented with a
599e5dd7070Spatrick    # message saying "Discard ... from worktree".  Instead, we use the old
600e5dd7070Spatrick    # tree as the index and checkout from new_tree, which gives the slightly
601e5dd7070Spatrick    # better message, "Apply ... to index and worktree".  This is not quite
602e5dd7070Spatrick    # right, since it won't be applied to the user's index, but oh well.
603e5dd7070Spatrick    with temporary_index_file(old_tree):
604*12c85518Srobert      subprocess.run(['git', 'checkout', '--patch', new_tree], check=True)
605e5dd7070Spatrick    index_tree = old_tree
606e5dd7070Spatrick  else:
607e5dd7070Spatrick    with temporary_index_file(new_tree):
608e5dd7070Spatrick      run('git', 'checkout-index', '-a', '-f')
609e5dd7070Spatrick  return changed_files
610e5dd7070Spatrick
611e5dd7070Spatrick
612e5dd7070Spatrickdef run(*args, **kwargs):
613e5dd7070Spatrick  stdin = kwargs.pop('stdin', '')
614e5dd7070Spatrick  verbose = kwargs.pop('verbose', True)
615e5dd7070Spatrick  strip = kwargs.pop('strip', True)
616e5dd7070Spatrick  for name in kwargs:
617e5dd7070Spatrick    raise TypeError("run() got an unexpected keyword argument '%s'" % name)
618e5dd7070Spatrick  p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
619e5dd7070Spatrick                       stdin=subprocess.PIPE)
620e5dd7070Spatrick  stdout, stderr = p.communicate(input=stdin)
621e5dd7070Spatrick
622e5dd7070Spatrick  stdout = convert_string(stdout)
623e5dd7070Spatrick  stderr = convert_string(stderr)
624e5dd7070Spatrick
625e5dd7070Spatrick  if p.returncode == 0:
626e5dd7070Spatrick    if stderr:
627e5dd7070Spatrick      if verbose:
628e5dd7070Spatrick        print('`%s` printed to stderr:' % ' '.join(args), file=sys.stderr)
629e5dd7070Spatrick      print(stderr.rstrip(), file=sys.stderr)
630e5dd7070Spatrick    if strip:
631e5dd7070Spatrick      stdout = stdout.rstrip('\r\n')
632e5dd7070Spatrick    return stdout
633e5dd7070Spatrick  if verbose:
634e5dd7070Spatrick    print('`%s` returned %s' % (' '.join(args), p.returncode), file=sys.stderr)
635e5dd7070Spatrick  if stderr:
636e5dd7070Spatrick    print(stderr.rstrip(), file=sys.stderr)
637e5dd7070Spatrick  sys.exit(2)
638e5dd7070Spatrick
639e5dd7070Spatrick
640e5dd7070Spatrickdef die(message):
641e5dd7070Spatrick  print('error:', message, file=sys.stderr)
642e5dd7070Spatrick  sys.exit(2)
643e5dd7070Spatrick
644e5dd7070Spatrick
645e5dd7070Spatrickdef to_bytes(str_input):
646e5dd7070Spatrick    # Encode to UTF-8 to get binary data.
647e5dd7070Spatrick    if isinstance(str_input, bytes):
648e5dd7070Spatrick        return str_input
649e5dd7070Spatrick    return str_input.encode('utf-8')
650e5dd7070Spatrick
651e5dd7070Spatrick
652e5dd7070Spatrickdef to_string(bytes_input):
653e5dd7070Spatrick    if isinstance(bytes_input, str):
654e5dd7070Spatrick        return bytes_input
655e5dd7070Spatrick    return bytes_input.encode('utf-8')
656e5dd7070Spatrick
657e5dd7070Spatrick
658e5dd7070Spatrickdef convert_string(bytes_input):
659e5dd7070Spatrick    try:
660e5dd7070Spatrick        return to_string(bytes_input.decode('utf-8'))
661e5dd7070Spatrick    except AttributeError: # 'str' object has no attribute 'decode'.
662e5dd7070Spatrick        return str(bytes_input)
663e5dd7070Spatrick    except UnicodeError:
664e5dd7070Spatrick        return str(bytes_input)
665e5dd7070Spatrick
666e5dd7070Spatrickif __name__ == '__main__':
667*12c85518Srobert  sys.exit(main())
668