1#!/usr/bin/env python 2"""Calls C-Reduce to create a minimal reproducer for clang crashes. 3 4Output files: 5 *.reduced.sh -- crash reproducer with minimal arguments 6 *.reduced.cpp -- the reduced file 7 *.test.sh -- interestingness test for C-Reduce 8""" 9 10from __future__ import print_function 11from argparse import ArgumentParser, RawTextHelpFormatter 12import os 13import re 14import stat 15import sys 16import subprocess 17import pipes 18import shlex 19import tempfile 20import shutil 21from distutils.spawn import find_executable 22 23verbose = False 24creduce_cmd = None 25clang_cmd = None 26not_cmd = None 27 28def verbose_print(*args, **kwargs): 29 if verbose: 30 print(*args, **kwargs) 31 32def check_file(fname): 33 if not os.path.isfile(fname): 34 sys.exit("ERROR: %s does not exist" % (fname)) 35 return fname 36 37def check_cmd(cmd_name, cmd_dir, cmd_path=None): 38 """ 39 Returns absolute path to cmd_path if it is given, 40 or absolute path to cmd_dir/cmd_name. 41 """ 42 if cmd_path: 43 cmd = find_executable(cmd_path) 44 if cmd: 45 return cmd 46 sys.exit("ERROR: executable `%s` not found" % (cmd_path)) 47 48 cmd = find_executable(cmd_name, path=cmd_dir) 49 if cmd: 50 return cmd 51 52 if not cmd_dir: 53 cmd_dir = "$PATH" 54 sys.exit("ERROR: `%s` not found in %s" % (cmd_name, cmd_dir)) 55 56def quote_cmd(cmd): 57 return ' '.join(pipes.quote(arg) for arg in cmd) 58 59def write_to_script(text, filename): 60 with open(filename, 'w') as f: 61 f.write(text) 62 os.chmod(filename, os.stat(filename).st_mode | stat.S_IEXEC) 63 64class Reduce(object): 65 def __init__(self, crash_script, file_to_reduce): 66 crash_script_name, crash_script_ext = os.path.splitext(crash_script) 67 file_reduce_name, file_reduce_ext = os.path.splitext(file_to_reduce) 68 69 self.testfile = file_reduce_name + '.test.sh' 70 self.crash_script = crash_script_name + '.reduced' + crash_script_ext 71 self.file_to_reduce = file_reduce_name + '.reduced' + file_reduce_ext 72 shutil.copy(file_to_reduce, self.file_to_reduce) 73 74 self.clang = clang_cmd 75 self.clang_args = [] 76 self.expected_output = [] 77 self.is_crash = True 78 self.creduce_flags = ["--tidy"] 79 80 self.read_clang_args(crash_script, file_to_reduce) 81 self.read_expected_output() 82 83 def get_crash_cmd(self, cmd=None, args=None, filename=None): 84 if not cmd: 85 cmd = self.clang 86 if not args: 87 args = self.clang_args 88 if not filename: 89 filename = self.file_to_reduce 90 91 return [cmd] + args + [filename] 92 93 def read_clang_args(self, crash_script, filename): 94 print("\nReading arguments from crash script...") 95 with open(crash_script) as f: 96 # Assume clang call is the first non comment line. 97 cmd = [] 98 for line in f: 99 if not line.lstrip().startswith('#'): 100 cmd = shlex.split(line) 101 break 102 if not cmd: 103 sys.exit("Could not find command in the crash script."); 104 105 # Remove clang and filename from the command 106 # Assume the last occurrence of the filename is the clang input file 107 del cmd[0] 108 for i in range(len(cmd)-1, -1, -1): 109 if cmd[i] == filename: 110 del cmd[i] 111 break 112 self.clang_args = cmd 113 verbose_print("Clang arguments:", quote_cmd(self.clang_args)) 114 115 def read_expected_output(self): 116 print("\nGetting expected crash output...") 117 p = subprocess.Popen(self.get_crash_cmd(), 118 stdout=subprocess.PIPE, 119 stderr=subprocess.STDOUT) 120 crash_output, _ = p.communicate() 121 result = [] 122 123 # Remove color codes 124 ansi_escape = r'\x1b\[[0-?]*m' 125 crash_output = re.sub(ansi_escape, '', crash_output.decode('utf-8')) 126 127 # Look for specific error messages 128 regexes = [r"Assertion `(.+)' failed", # Linux assert() 129 r"Assertion failed: (.+),", # FreeBSD/Mac assert() 130 r"fatal error: error in backend: (.+)", 131 r"LLVM ERROR: (.+)", 132 r"UNREACHABLE executed (at .+)?!", 133 r"LLVM IR generation of ceclaration '(.+)'", 134 r"Generating code for declaration '(.+)'", 135 r"\*\*\* Bad machine code: (.+) \*\*\*"] 136 for msg_re in regexes: 137 match = re.search(msg_re, crash_output) 138 if match: 139 msg = match.group(1) 140 result = [msg] 141 print("Found message:", msg) 142 143 if "fatal error:" in msg_re: 144 self.is_crash = False 145 break 146 147 # If no message was found, use the top five stack trace functions, 148 # ignoring some common functions 149 # Five is a somewhat arbitrary number; the goal is to get a small number 150 # of identifying functions with some leeway for common functions 151 if not result: 152 stacktrace_re = r'[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\(' 153 filters = ["PrintStackTraceSignalHandler", 154 "llvm::sys::RunSignalHandlers", 155 "SignalHandler", "__restore_rt", "gsignal", "abort"] 156 matches = re.findall(stacktrace_re, crash_output) 157 result = [x for x in matches if x and x.strip() not in filters][:5] 158 for msg in result: 159 print("Found stack trace function:", msg) 160 161 if not result: 162 print("ERROR: no crash was found") 163 print("The crash output was:\n========\n%s========" % crash_output) 164 sys.exit(1) 165 166 self.expected_output = result 167 168 def check_expected_output(self, args=None, filename=None): 169 if not args: 170 args = self.clang_args 171 if not filename: 172 filename = self.file_to_reduce 173 174 p = subprocess.Popen(self.get_crash_cmd(args=args, filename=filename), 175 stdout=subprocess.PIPE, 176 stderr=subprocess.STDOUT) 177 crash_output, _ = p.communicate() 178 return all(msg in crash_output.decode('utf-8') for msg in 179 self.expected_output) 180 181 def write_interestingness_test(self): 182 print("\nCreating the interestingness test...") 183 184 crash_flag = "--crash" if self.is_crash else "" 185 186 output = "#!/bin/bash\n%s %s %s >& t.log || exit 1\n" % \ 187 (pipes.quote(not_cmd), crash_flag, quote_cmd(self.get_crash_cmd())) 188 189 for msg in self.expected_output: 190 output += 'grep -F %s t.log || exit 1\n' % pipes.quote(msg) 191 192 write_to_script(output, self.testfile) 193 self.check_interestingness() 194 195 def check_interestingness(self): 196 testfile = os.path.abspath(self.testfile) 197 198 # Check that the test considers the original file interesting 199 with open(os.devnull, 'w') as devnull: 200 returncode = subprocess.call(testfile, stdout=devnull) 201 if returncode: 202 sys.exit("The interestingness test does not pass for the original file.") 203 204 # Check that an empty file is not interesting 205 # Instead of modifying the filename in the test file, just run the command 206 with tempfile.NamedTemporaryFile() as empty_file: 207 is_interesting = self.check_expected_output(filename=empty_file.name) 208 if is_interesting: 209 sys.exit("The interestingness test passes for an empty file.") 210 211 def clang_preprocess(self): 212 print("\nTrying to preprocess the source file...") 213 with tempfile.NamedTemporaryFile() as tmpfile: 214 cmd_preprocess = self.get_crash_cmd() + ['-E', '-o', tmpfile.name] 215 cmd_preprocess_no_lines = cmd_preprocess + ['-P'] 216 try: 217 subprocess.check_call(cmd_preprocess_no_lines) 218 if self.check_expected_output(filename=tmpfile.name): 219 print("Successfully preprocessed with line markers removed") 220 shutil.copy(tmpfile.name, self.file_to_reduce) 221 else: 222 subprocess.check_call(cmd_preprocess) 223 if self.check_expected_output(filename=tmpfile.name): 224 print("Successfully preprocessed without removing line markers") 225 shutil.copy(tmpfile.name, self.file_to_reduce) 226 else: 227 print("No longer crashes after preprocessing -- " 228 "using original source") 229 except subprocess.CalledProcessError: 230 print("Preprocessing failed") 231 232 @staticmethod 233 def filter_args(args, opts_equal=[], opts_startswith=[], 234 opts_one_arg_startswith=[]): 235 result = [] 236 skip_next = False 237 for arg in args: 238 if skip_next: 239 skip_next = False 240 continue 241 if any(arg == a for a in opts_equal): 242 continue 243 if any(arg.startswith(a) for a in opts_startswith): 244 continue 245 if any(arg.startswith(a) for a in opts_one_arg_startswith): 246 skip_next = True 247 continue 248 result.append(arg) 249 return result 250 251 def try_remove_args(self, args, msg=None, extra_arg=None, **kwargs): 252 new_args = self.filter_args(args, **kwargs) 253 254 if extra_arg: 255 if extra_arg in new_args: 256 new_args.remove(extra_arg) 257 new_args.append(extra_arg) 258 259 if (new_args != args and 260 self.check_expected_output(args=new_args)): 261 if msg: 262 verbose_print(msg) 263 return new_args 264 return args 265 266 def try_remove_arg_by_index(self, args, index): 267 new_args = args[:index] + args[index+1:] 268 removed_arg = args[index] 269 270 # Heuristic for grouping arguments: 271 # remove next argument if it doesn't start with "-" 272 if index < len(new_args) and not new_args[index].startswith('-'): 273 del new_args[index] 274 removed_arg += ' ' + args[index+1] 275 276 if self.check_expected_output(args=new_args): 277 verbose_print("Removed", removed_arg) 278 return new_args, index 279 return args, index+1 280 281 def simplify_clang_args(self): 282 """Simplify clang arguments before running C-Reduce to reduce the time the 283 interestingness test takes to run. 284 """ 285 print("\nSimplifying the clang command...") 286 287 # Remove some clang arguments to speed up the interestingness test 288 new_args = self.clang_args 289 new_args = self.try_remove_args(new_args, 290 msg="Removed debug info options", 291 opts_startswith=["-gcodeview", 292 "-debug-info-kind=", 293 "-debugger-tuning="]) 294 295 new_args = self.try_remove_args(new_args, 296 msg="Removed --show-includes", 297 opts_startswith=["--show-includes"]) 298 # Not suppressing warnings (-w) sometimes prevents the crash from occurring 299 # after preprocessing 300 new_args = self.try_remove_args(new_args, 301 msg="Replaced -W options with -w", 302 extra_arg='-w', 303 opts_startswith=["-W"]) 304 new_args = self.try_remove_args(new_args, 305 msg="Replaced optimization level with -O0", 306 extra_arg="-O0", 307 opts_startswith=["-O"]) 308 309 # Try to remove compilation steps 310 new_args = self.try_remove_args(new_args, msg="Added -emit-llvm", 311 extra_arg="-emit-llvm") 312 new_args = self.try_remove_args(new_args, msg="Added -fsyntax-only", 313 extra_arg="-fsyntax-only") 314 315 # Try to make implicit int an error for more sensible test output 316 new_args = self.try_remove_args(new_args, msg="Added -Werror=implicit-int", 317 opts_equal=["-w"], 318 extra_arg="-Werror=implicit-int") 319 320 self.clang_args = new_args 321 verbose_print("Simplified command:", quote_cmd(self.get_crash_cmd())) 322 323 def reduce_clang_args(self): 324 """Minimize the clang arguments after running C-Reduce, to get the smallest 325 command that reproduces the crash on the reduced file. 326 """ 327 print("\nReducing the clang crash command...") 328 329 new_args = self.clang_args 330 331 # Remove some often occurring args 332 new_args = self.try_remove_args(new_args, msg="Removed -D options", 333 opts_startswith=["-D"]) 334 new_args = self.try_remove_args(new_args, msg="Removed -D options", 335 opts_one_arg_startswith=["-D"]) 336 new_args = self.try_remove_args(new_args, msg="Removed -I options", 337 opts_startswith=["-I"]) 338 new_args = self.try_remove_args(new_args, msg="Removed -I options", 339 opts_one_arg_startswith=["-I"]) 340 new_args = self.try_remove_args(new_args, msg="Removed -W options", 341 opts_startswith=["-W"]) 342 343 # Remove other cases that aren't covered by the heuristic 344 new_args = self.try_remove_args(new_args, msg="Removed -mllvm", 345 opts_one_arg_startswith=["-mllvm"]) 346 347 i = 0 348 while i < len(new_args): 349 new_args, i = self.try_remove_arg_by_index(new_args, i) 350 351 self.clang_args = new_args 352 353 reduced_cmd = quote_cmd(self.get_crash_cmd()) 354 write_to_script(reduced_cmd, self.crash_script) 355 print("Reduced command:", reduced_cmd) 356 357 def run_creduce(self): 358 print("\nRunning C-Reduce...") 359 try: 360 p = subprocess.Popen([creduce_cmd] + self.creduce_flags + 361 [self.testfile, self.file_to_reduce]) 362 p.communicate() 363 except KeyboardInterrupt: 364 # Hack to kill C-Reduce because it jumps into its own pgid 365 print('\n\nctrl-c detected, killed creduce') 366 p.kill() 367 368def main(): 369 global verbose 370 global creduce_cmd 371 global clang_cmd 372 global not_cmd 373 374 parser = ArgumentParser(description=__doc__, 375 formatter_class=RawTextHelpFormatter) 376 parser.add_argument('crash_script', type=str, nargs=1, 377 help="Name of the script that generates the crash.") 378 parser.add_argument('file_to_reduce', type=str, nargs=1, 379 help="Name of the file to be reduced.") 380 parser.add_argument('--llvm-bin', dest='llvm_bin', type=str, 381 help="Path to the LLVM bin directory.") 382 parser.add_argument('--llvm-not', dest='llvm_not', type=str, 383 help="The path to the `not` executable. " 384 "By default uses the llvm-bin directory.") 385 parser.add_argument('--clang', dest='clang', type=str, 386 help="The path to the `clang` executable. " 387 "By default uses the llvm-bin directory.") 388 parser.add_argument('--creduce', dest='creduce', type=str, 389 help="The path to the `creduce` executable. " 390 "Required if `creduce` is not in PATH environment.") 391 parser.add_argument('-v', '--verbose', action='store_true') 392 args = parser.parse_args() 393 394 verbose = args.verbose 395 llvm_bin = os.path.abspath(args.llvm_bin) if args.llvm_bin else None 396 creduce_cmd = check_cmd('creduce', None, args.creduce) 397 clang_cmd = check_cmd('clang', llvm_bin, args.clang) 398 not_cmd = check_cmd('not', llvm_bin, args.llvm_not) 399 400 crash_script = check_file(args.crash_script[0]) 401 file_to_reduce = check_file(args.file_to_reduce[0]) 402 403 r = Reduce(crash_script, file_to_reduce) 404 405 r.simplify_clang_args() 406 r.write_interestingness_test() 407 r.clang_preprocess() 408 r.run_creduce() 409 r.reduce_clang_args() 410 411if __name__ == '__main__': 412 main() 413