xref: /netbsd-src/external/apache2/llvm/dist/clang/utils/creduce-clang-crash.py (revision 404ee5b9334f618040b6cdef96a0ff35a6fc4636)
1#!/usr/bin/env python
2"""Calls C-Reduce to create a minimal reproducer for clang crashes.
3
4Output files:
5  *.reduced.sh -- crash reproducer with minimal arguments
6  *.reduced.cpp -- the reduced file
7  *.test.sh -- interestingness test for C-Reduce
8"""
9
10from __future__ import print_function
11from argparse import ArgumentParser, RawTextHelpFormatter
12import os
13import re
14import stat
15import sys
16import subprocess
17import pipes
18import shlex
19import tempfile
20import shutil
21from distutils.spawn import find_executable
22
23verbose = False
24creduce_cmd = None
25clang_cmd = None
26not_cmd = None
27
28def verbose_print(*args, **kwargs):
29  if verbose:
30    print(*args, **kwargs)
31
32def check_file(fname):
33  if not os.path.isfile(fname):
34    sys.exit("ERROR: %s does not exist" % (fname))
35  return fname
36
37def check_cmd(cmd_name, cmd_dir, cmd_path=None):
38  """
39  Returns absolute path to cmd_path if it is given,
40  or absolute path to cmd_dir/cmd_name.
41  """
42  if cmd_path:
43    cmd = find_executable(cmd_path)
44    if cmd:
45      return cmd
46    sys.exit("ERROR: executable `%s` not found" % (cmd_path))
47
48  cmd = find_executable(cmd_name, path=cmd_dir)
49  if cmd:
50    return cmd
51
52  if not cmd_dir:
53    cmd_dir = "$PATH"
54  sys.exit("ERROR: `%s` not found in %s" % (cmd_name, cmd_dir))
55
56def quote_cmd(cmd):
57  return ' '.join(pipes.quote(arg) for arg in cmd)
58
59def write_to_script(text, filename):
60  with open(filename, 'w') as f:
61    f.write(text)
62  os.chmod(filename, os.stat(filename).st_mode | stat.S_IEXEC)
63
64class Reduce(object):
65  def __init__(self, crash_script, file_to_reduce):
66    crash_script_name, crash_script_ext = os.path.splitext(crash_script)
67    file_reduce_name, file_reduce_ext = os.path.splitext(file_to_reduce)
68
69    self.testfile = file_reduce_name + '.test.sh'
70    self.crash_script = crash_script_name + '.reduced' + crash_script_ext
71    self.file_to_reduce = file_reduce_name + '.reduced' + file_reduce_ext
72    shutil.copy(file_to_reduce, self.file_to_reduce)
73
74    self.clang = clang_cmd
75    self.clang_args = []
76    self.expected_output = []
77    self.is_crash = True
78    self.creduce_flags = ["--tidy"]
79
80    self.read_clang_args(crash_script, file_to_reduce)
81    self.read_expected_output()
82
83  def get_crash_cmd(self, cmd=None, args=None, filename=None):
84    if not cmd:
85      cmd = self.clang
86    if not args:
87      args = self.clang_args
88    if not filename:
89      filename = self.file_to_reduce
90
91    return [cmd] + args + [filename]
92
93  def read_clang_args(self, crash_script, filename):
94    print("\nReading arguments from crash script...")
95    with open(crash_script) as f:
96      # Assume clang call is the first non comment line.
97      cmd = []
98      for line in f:
99        if not line.lstrip().startswith('#'):
100          cmd = shlex.split(line)
101          break
102    if not cmd:
103      sys.exit("Could not find command in the crash script.");
104
105    # Remove clang and filename from the command
106    # Assume the last occurrence of the filename is the clang input file
107    del cmd[0]
108    for i in range(len(cmd)-1, -1, -1):
109      if cmd[i] == filename:
110        del cmd[i]
111        break
112    self.clang_args = cmd
113    verbose_print("Clang arguments:", quote_cmd(self.clang_args))
114
115  def read_expected_output(self):
116    print("\nGetting expected crash output...")
117    p = subprocess.Popen(self.get_crash_cmd(),
118                         stdout=subprocess.PIPE,
119                         stderr=subprocess.STDOUT)
120    crash_output, _ = p.communicate()
121    result = []
122
123    # Remove color codes
124    ansi_escape = r'\x1b\[[0-?]*m'
125    crash_output = re.sub(ansi_escape, '', crash_output.decode('utf-8'))
126
127    # Look for specific error messages
128    regexes = [r"Assertion `(.+)' failed", # Linux assert()
129               r"Assertion failed: (.+),", # FreeBSD/Mac assert()
130               r"fatal error: error in backend: (.+)",
131               r"LLVM ERROR: (.+)",
132               r"UNREACHABLE executed (at .+)?!",
133               r"LLVM IR generation of ceclaration '(.+)'",
134               r"Generating code for declaration '(.+)'",
135               r"\*\*\* Bad machine code: (.+) \*\*\*"]
136    for msg_re in regexes:
137      match = re.search(msg_re, crash_output)
138      if match:
139        msg = match.group(1)
140        result = [msg]
141        print("Found message:", msg)
142
143        if "fatal error:" in msg_re:
144          self.is_crash = False
145        break
146
147    # If no message was found, use the top five stack trace functions,
148    # ignoring some common functions
149    # Five is a somewhat arbitrary number; the goal is to get a small number
150    # of identifying functions with some leeway for common functions
151    if not result:
152      stacktrace_re = r'[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\('
153      filters = ["PrintStackTraceSignalHandler",
154                 "llvm::sys::RunSignalHandlers",
155                 "SignalHandler", "__restore_rt", "gsignal", "abort"]
156      matches = re.findall(stacktrace_re, crash_output)
157      result = [x for x in matches if x and x.strip() not in filters][:5]
158      for msg in result:
159        print("Found stack trace function:", msg)
160
161    if not result:
162      print("ERROR: no crash was found")
163      print("The crash output was:\n========\n%s========" % crash_output)
164      sys.exit(1)
165
166    self.expected_output = result
167
168  def check_expected_output(self, args=None, filename=None):
169    if not args:
170      args = self.clang_args
171    if not filename:
172      filename = self.file_to_reduce
173
174    p = subprocess.Popen(self.get_crash_cmd(args=args, filename=filename),
175                         stdout=subprocess.PIPE,
176                         stderr=subprocess.STDOUT)
177    crash_output, _ = p.communicate()
178    return all(msg in crash_output.decode('utf-8') for msg in
179               self.expected_output)
180
181  def write_interestingness_test(self):
182    print("\nCreating the interestingness test...")
183
184    crash_flag = "--crash" if self.is_crash else ""
185
186    output = "#!/bin/bash\n%s %s %s >& t.log || exit 1\n" % \
187        (pipes.quote(not_cmd), crash_flag, quote_cmd(self.get_crash_cmd()))
188
189    for msg in self.expected_output:
190      output += 'grep -F %s t.log || exit 1\n' % pipes.quote(msg)
191
192    write_to_script(output, self.testfile)
193    self.check_interestingness()
194
195  def check_interestingness(self):
196    testfile = os.path.abspath(self.testfile)
197
198    # Check that the test considers the original file interesting
199    with open(os.devnull, 'w') as devnull:
200      returncode = subprocess.call(testfile, stdout=devnull)
201    if returncode:
202      sys.exit("The interestingness test does not pass for the original file.")
203
204    # Check that an empty file is not interesting
205    # Instead of modifying the filename in the test file, just run the command
206    with tempfile.NamedTemporaryFile() as empty_file:
207      is_interesting = self.check_expected_output(filename=empty_file.name)
208    if is_interesting:
209      sys.exit("The interestingness test passes for an empty file.")
210
211  def clang_preprocess(self):
212    print("\nTrying to preprocess the source file...")
213    with tempfile.NamedTemporaryFile() as tmpfile:
214      cmd_preprocess = self.get_crash_cmd() + ['-E', '-o', tmpfile.name]
215      cmd_preprocess_no_lines = cmd_preprocess + ['-P']
216      try:
217        subprocess.check_call(cmd_preprocess_no_lines)
218        if self.check_expected_output(filename=tmpfile.name):
219          print("Successfully preprocessed with line markers removed")
220          shutil.copy(tmpfile.name, self.file_to_reduce)
221        else:
222          subprocess.check_call(cmd_preprocess)
223          if self.check_expected_output(filename=tmpfile.name):
224            print("Successfully preprocessed without removing line markers")
225            shutil.copy(tmpfile.name, self.file_to_reduce)
226          else:
227            print("No longer crashes after preprocessing -- "
228                  "using original source")
229      except subprocess.CalledProcessError:
230        print("Preprocessing failed")
231
232  @staticmethod
233  def filter_args(args, opts_equal=[], opts_startswith=[],
234                  opts_one_arg_startswith=[]):
235    result = []
236    skip_next = False
237    for arg in args:
238      if skip_next:
239        skip_next = False
240        continue
241      if any(arg == a for a in opts_equal):
242        continue
243      if any(arg.startswith(a) for a in opts_startswith):
244        continue
245      if any(arg.startswith(a) for a in opts_one_arg_startswith):
246        skip_next = True
247        continue
248      result.append(arg)
249    return result
250
251  def try_remove_args(self, args, msg=None, extra_arg=None, **kwargs):
252    new_args = self.filter_args(args, **kwargs)
253
254    if extra_arg:
255      if extra_arg in new_args:
256        new_args.remove(extra_arg)
257      new_args.append(extra_arg)
258
259    if (new_args != args and
260        self.check_expected_output(args=new_args)):
261      if msg:
262        verbose_print(msg)
263      return new_args
264    return args
265
266  def try_remove_arg_by_index(self, args, index):
267    new_args = args[:index] + args[index+1:]
268    removed_arg = args[index]
269
270    # Heuristic for grouping arguments:
271    # remove next argument if it doesn't start with "-"
272    if index < len(new_args) and not new_args[index].startswith('-'):
273      del new_args[index]
274      removed_arg += ' ' + args[index+1]
275
276    if self.check_expected_output(args=new_args):
277      verbose_print("Removed", removed_arg)
278      return new_args, index
279    return args, index+1
280
281  def simplify_clang_args(self):
282    """Simplify clang arguments before running C-Reduce to reduce the time the
283    interestingness test takes to run.
284    """
285    print("\nSimplifying the clang command...")
286
287    # Remove some clang arguments to speed up the interestingness test
288    new_args = self.clang_args
289    new_args = self.try_remove_args(new_args,
290                                    msg="Removed debug info options",
291                                    opts_startswith=["-gcodeview",
292                                                     "-debug-info-kind=",
293                                                     "-debugger-tuning="])
294
295    new_args = self.try_remove_args(new_args,
296                                    msg="Removed --show-includes",
297                                    opts_startswith=["--show-includes"])
298    # Not suppressing warnings (-w) sometimes prevents the crash from occurring
299    # after preprocessing
300    new_args = self.try_remove_args(new_args,
301                                    msg="Replaced -W options with -w",
302                                    extra_arg='-w',
303                                    opts_startswith=["-W"])
304    new_args = self.try_remove_args(new_args,
305                                    msg="Replaced optimization level with -O0",
306                                    extra_arg="-O0",
307                                    opts_startswith=["-O"])
308
309    # Try to remove compilation steps
310    new_args = self.try_remove_args(new_args, msg="Added -emit-llvm",
311                                    extra_arg="-emit-llvm")
312    new_args = self.try_remove_args(new_args, msg="Added -fsyntax-only",
313                                    extra_arg="-fsyntax-only")
314
315    # Try to make implicit int an error for more sensible test output
316    new_args = self.try_remove_args(new_args, msg="Added -Werror=implicit-int",
317                                    opts_equal=["-w"],
318                                    extra_arg="-Werror=implicit-int")
319
320    self.clang_args = new_args
321    verbose_print("Simplified command:", quote_cmd(self.get_crash_cmd()))
322
323  def reduce_clang_args(self):
324    """Minimize the clang arguments after running C-Reduce, to get the smallest
325    command that reproduces the crash on the reduced file.
326    """
327    print("\nReducing the clang crash command...")
328
329    new_args = self.clang_args
330
331    # Remove some often occurring args
332    new_args = self.try_remove_args(new_args, msg="Removed -D options",
333                                    opts_startswith=["-D"])
334    new_args = self.try_remove_args(new_args, msg="Removed -D options",
335                                    opts_one_arg_startswith=["-D"])
336    new_args = self.try_remove_args(new_args, msg="Removed -I options",
337                                    opts_startswith=["-I"])
338    new_args = self.try_remove_args(new_args, msg="Removed -I options",
339                                    opts_one_arg_startswith=["-I"])
340    new_args = self.try_remove_args(new_args, msg="Removed -W options",
341                                    opts_startswith=["-W"])
342
343    # Remove other cases that aren't covered by the heuristic
344    new_args = self.try_remove_args(new_args, msg="Removed -mllvm",
345                                    opts_one_arg_startswith=["-mllvm"])
346
347    i = 0
348    while i < len(new_args):
349      new_args, i = self.try_remove_arg_by_index(new_args, i)
350
351    self.clang_args = new_args
352
353    reduced_cmd = quote_cmd(self.get_crash_cmd())
354    write_to_script(reduced_cmd, self.crash_script)
355    print("Reduced command:", reduced_cmd)
356
357  def run_creduce(self):
358    print("\nRunning C-Reduce...")
359    try:
360      p = subprocess.Popen([creduce_cmd] + self.creduce_flags +
361                           [self.testfile, self.file_to_reduce])
362      p.communicate()
363    except KeyboardInterrupt:
364      # Hack to kill C-Reduce because it jumps into its own pgid
365      print('\n\nctrl-c detected, killed creduce')
366      p.kill()
367
368def main():
369  global verbose
370  global creduce_cmd
371  global clang_cmd
372  global not_cmd
373
374  parser = ArgumentParser(description=__doc__,
375                          formatter_class=RawTextHelpFormatter)
376  parser.add_argument('crash_script', type=str, nargs=1,
377                      help="Name of the script that generates the crash.")
378  parser.add_argument('file_to_reduce', type=str, nargs=1,
379                      help="Name of the file to be reduced.")
380  parser.add_argument('--llvm-bin', dest='llvm_bin', type=str,
381                      help="Path to the LLVM bin directory.")
382  parser.add_argument('--llvm-not', dest='llvm_not', type=str,
383                      help="The path to the `not` executable. "
384                      "By default uses the llvm-bin directory.")
385  parser.add_argument('--clang', dest='clang', type=str,
386                      help="The path to the `clang` executable. "
387                      "By default uses the llvm-bin directory.")
388  parser.add_argument('--creduce', dest='creduce', type=str,
389                      help="The path to the `creduce` executable. "
390                      "Required if `creduce` is not in PATH environment.")
391  parser.add_argument('-v', '--verbose', action='store_true')
392  args = parser.parse_args()
393
394  verbose = args.verbose
395  llvm_bin = os.path.abspath(args.llvm_bin) if args.llvm_bin else None
396  creduce_cmd = check_cmd('creduce', None, args.creduce)
397  clang_cmd = check_cmd('clang', llvm_bin, args.clang)
398  not_cmd = check_cmd('not', llvm_bin, args.llvm_not)
399
400  crash_script = check_file(args.crash_script[0])
401  file_to_reduce = check_file(args.file_to_reduce[0])
402
403  r = Reduce(crash_script, file_to_reduce)
404
405  r.simplify_clang_args()
406  r.write_interestingness_test()
407  r.clang_preprocess()
408  r.run_creduce()
409  r.reduce_clang_args()
410
411if __name__ == '__main__':
412  main()
413