1#!/usr/bin/env python3 2"""Calls C-Reduce to create a minimal reproducer for clang crashes. 3Unknown arguments are treated at creduce options. 4 5Output files: 6 *.reduced.sh -- crash reproducer with minimal arguments 7 *.reduced.cpp -- the reduced file 8 *.test.sh -- interestingness test for C-Reduce 9""" 10 11from argparse import ArgumentParser, RawTextHelpFormatter 12import os 13import re 14import shutil 15import stat 16import sys 17import subprocess 18import shlex 19import tempfile 20import shutil 21import multiprocessing 22 23verbose = False 24creduce_cmd = None 25clang_cmd = None 26 27 28def verbose_print(*args, **kwargs): 29 if verbose: 30 print(*args, **kwargs) 31 32 33def check_file(fname): 34 fname = os.path.normpath(fname) 35 if not os.path.isfile(fname): 36 sys.exit("ERROR: %s does not exist" % (fname)) 37 return fname 38 39 40def check_cmd(cmd_name, cmd_dir, cmd_path=None): 41 """ 42 Returns absolute path to cmd_path if it is given, 43 or absolute path to cmd_dir/cmd_name. 44 """ 45 if cmd_path: 46 # Make the path absolute so the creduce test can be run from any directory. 47 cmd_path = os.path.abspath(cmd_path) 48 cmd = shutil.which(cmd_path) 49 if cmd: 50 return cmd 51 sys.exit("ERROR: executable `%s` not found" % (cmd_path)) 52 53 cmd = shutil.which(cmd_name, path=cmd_dir) 54 if cmd: 55 return cmd 56 57 if not cmd_dir: 58 cmd_dir = "$PATH" 59 sys.exit("ERROR: `%s` not found in %s" % (cmd_name, cmd_dir)) 60 61 62def quote_cmd(cmd): 63 return " ".join(shlex.quote(arg) for arg in cmd) 64 65 66def write_to_script(text, filename): 67 with open(filename, "w") as f: 68 f.write(text) 69 os.chmod(filename, os.stat(filename).st_mode | stat.S_IEXEC) 70 71 72class Reduce(object): 73 def __init__(self, crash_script, file_to_reduce, creduce_flags): 74 crash_script_name, crash_script_ext = os.path.splitext(crash_script) 75 file_reduce_name, file_reduce_ext = os.path.splitext(file_to_reduce) 76 77 self.testfile = file_reduce_name + ".test.sh" 78 self.crash_script = crash_script_name + ".reduced" + crash_script_ext 79 self.file_to_reduce = file_reduce_name + ".reduced" + file_reduce_ext 80 shutil.copy(file_to_reduce, self.file_to_reduce) 81 82 self.clang = clang_cmd 83 self.clang_args = [] 84 self.expected_output = [] 85 self.needs_stack_trace = False 86 self.creduce_flags = ["--tidy"] + creduce_flags 87 88 self.read_clang_args(crash_script, file_to_reduce) 89 self.read_expected_output() 90 91 def get_crash_cmd(self, cmd=None, args=None, filename=None): 92 if not cmd: 93 cmd = self.clang 94 if not args: 95 args = self.clang_args 96 if not filename: 97 filename = self.file_to_reduce 98 99 return [cmd] + args + [filename] 100 101 def read_clang_args(self, crash_script, filename): 102 print("\nReading arguments from crash script...") 103 with open(crash_script) as f: 104 # Assume clang call is the first non comment line. 105 cmd = [] 106 for line in f: 107 if not line.lstrip().startswith("#"): 108 cmd = shlex.split(line) 109 break 110 if not cmd: 111 sys.exit("Could not find command in the crash script.") 112 113 # Remove clang and filename from the command 114 # Assume the last occurrence of the filename is the clang input file 115 del cmd[0] 116 for i in range(len(cmd) - 1, -1, -1): 117 if cmd[i] == filename: 118 del cmd[i] 119 break 120 self.clang_args = cmd 121 verbose_print("Clang arguments:", quote_cmd(self.clang_args)) 122 123 def read_expected_output(self): 124 print("\nGetting expected crash output...") 125 p = subprocess.Popen( 126 self.get_crash_cmd(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT 127 ) 128 crash_output, _ = p.communicate() 129 result = [] 130 131 # Remove color codes 132 ansi_escape = r"\x1b\[[0-?]*m" 133 crash_output = re.sub(ansi_escape, "", crash_output.decode("utf-8")) 134 135 # Look for specific error messages 136 regexes = [ 137 r"Assertion .+ failed", # Linux assert() 138 r"Assertion failed: .+,", # FreeBSD/Mac assert() 139 r"fatal error: error in backend: .+", 140 r"LLVM ERROR: .+", 141 r"UNREACHABLE executed at .+?!", 142 r"LLVM IR generation of declaration '.+'", 143 r"Generating code for declaration '.+'", 144 r"\*\*\* Bad machine code: .+ \*\*\*", 145 r"ERROR: .*Sanitizer: [^ ]+ ", 146 ] 147 for msg_re in regexes: 148 match = re.search(msg_re, crash_output) 149 if match: 150 msg = match.group(0) 151 result = [msg] 152 print("Found message:", msg) 153 break 154 155 # If no message was found, use the top five stack trace functions, 156 # ignoring some common functions 157 # Five is a somewhat arbitrary number; the goal is to get a small number 158 # of identifying functions with some leeway for common functions 159 if not result: 160 self.needs_stack_trace = True 161 stacktrace_re = r"[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\(" 162 filters = [ 163 "PrintStackTrace", 164 "RunSignalHandlers", 165 "CleanupOnSignal", 166 "HandleCrash", 167 "SignalHandler", 168 "__restore_rt", 169 "gsignal", 170 "abort", 171 ] 172 173 def skip_function(func_name): 174 return any(name in func_name for name in filters) 175 176 matches = re.findall(stacktrace_re, crash_output) 177 result = [x for x in matches if x and not skip_function(x)][:5] 178 for msg in result: 179 print("Found stack trace function:", msg) 180 181 if not result: 182 print("ERROR: no crash was found") 183 print("The crash output was:\n========\n%s========" % crash_output) 184 sys.exit(1) 185 186 self.expected_output = result 187 188 def check_expected_output(self, args=None, filename=None): 189 if not args: 190 args = self.clang_args 191 if not filename: 192 filename = self.file_to_reduce 193 194 p = subprocess.Popen( 195 self.get_crash_cmd(args=args, filename=filename), 196 stdout=subprocess.PIPE, 197 stderr=subprocess.STDOUT, 198 ) 199 crash_output, _ = p.communicate() 200 return all(msg in crash_output.decode("utf-8") for msg in self.expected_output) 201 202 def write_interestingness_test(self): 203 print("\nCreating the interestingness test...") 204 205 # Disable symbolization if it's not required to avoid slow symbolization. 206 disable_symbolization = "" 207 if not self.needs_stack_trace: 208 disable_symbolization = "export LLVM_DISABLE_SYMBOLIZATION=1" 209 210 output = """#!/bin/bash 211%s 212if %s >& t.log ; then 213 exit 1 214fi 215""" % ( 216 disable_symbolization, 217 quote_cmd(self.get_crash_cmd()), 218 ) 219 220 for msg in self.expected_output: 221 output += "grep -F %s t.log || exit 1\n" % shlex.quote(msg) 222 223 write_to_script(output, self.testfile) 224 self.check_interestingness() 225 226 def check_interestingness(self): 227 testfile = os.path.abspath(self.testfile) 228 229 # Check that the test considers the original file interesting 230 returncode = subprocess.call(testfile, stdout=subprocess.DEVNULL) 231 if returncode: 232 sys.exit("The interestingness test does not pass for the original file.") 233 234 # Check that an empty file is not interesting 235 # Instead of modifying the filename in the test file, just run the command 236 with tempfile.NamedTemporaryFile() as empty_file: 237 is_interesting = self.check_expected_output(filename=empty_file.name) 238 if is_interesting: 239 sys.exit("The interestingness test passes for an empty file.") 240 241 def clang_preprocess(self): 242 print("\nTrying to preprocess the source file...") 243 with tempfile.NamedTemporaryFile() as tmpfile: 244 cmd_preprocess = self.get_crash_cmd() + ["-E", "-o", tmpfile.name] 245 cmd_preprocess_no_lines = cmd_preprocess + ["-P"] 246 try: 247 subprocess.check_call(cmd_preprocess_no_lines) 248 if self.check_expected_output(filename=tmpfile.name): 249 print("Successfully preprocessed with line markers removed") 250 shutil.copy(tmpfile.name, self.file_to_reduce) 251 else: 252 subprocess.check_call(cmd_preprocess) 253 if self.check_expected_output(filename=tmpfile.name): 254 print("Successfully preprocessed without removing line markers") 255 shutil.copy(tmpfile.name, self.file_to_reduce) 256 else: 257 print( 258 "No longer crashes after preprocessing -- " 259 "using original source" 260 ) 261 except subprocess.CalledProcessError: 262 print("Preprocessing failed") 263 264 @staticmethod 265 def filter_args( 266 args, opts_equal=[], opts_startswith=[], opts_one_arg_startswith=[] 267 ): 268 result = [] 269 skip_next = False 270 for arg in args: 271 if skip_next: 272 skip_next = False 273 continue 274 if any(arg == a for a in opts_equal): 275 continue 276 if any(arg.startswith(a) for a in opts_startswith): 277 continue 278 if any(arg.startswith(a) for a in opts_one_arg_startswith): 279 skip_next = True 280 continue 281 result.append(arg) 282 return result 283 284 def try_remove_args(self, args, msg=None, extra_arg=None, **kwargs): 285 new_args = self.filter_args(args, **kwargs) 286 287 if extra_arg: 288 if extra_arg in new_args: 289 new_args.remove(extra_arg) 290 new_args.append(extra_arg) 291 292 if new_args != args and self.check_expected_output(args=new_args): 293 if msg: 294 verbose_print(msg) 295 return new_args 296 return args 297 298 def try_remove_arg_by_index(self, args, index): 299 new_args = args[:index] + args[index + 1 :] 300 removed_arg = args[index] 301 302 # Heuristic for grouping arguments: 303 # remove next argument if it doesn't start with "-" 304 if index < len(new_args) and not new_args[index].startswith("-"): 305 del new_args[index] 306 removed_arg += " " + args[index + 1] 307 308 if self.check_expected_output(args=new_args): 309 verbose_print("Removed", removed_arg) 310 return new_args, index 311 return args, index + 1 312 313 def simplify_clang_args(self): 314 """Simplify clang arguments before running C-Reduce to reduce the time the 315 interestingness test takes to run. 316 """ 317 print("\nSimplifying the clang command...") 318 new_args = self.clang_args 319 320 # Remove the color diagnostics flag to make it easier to match error 321 # text. 322 new_args = self.try_remove_args( 323 new_args, 324 msg="Removed -fcolor-diagnostics", 325 opts_equal=["-fcolor-diagnostics"], 326 ) 327 328 # Remove some clang arguments to speed up the interestingness test 329 new_args = self.try_remove_args( 330 new_args, 331 msg="Removed debug info options", 332 opts_startswith=["-gcodeview", "-debug-info-kind=", "-debugger-tuning="], 333 ) 334 335 new_args = self.try_remove_args( 336 new_args, msg="Removed --show-includes", opts_startswith=["--show-includes"] 337 ) 338 # Not suppressing warnings (-w) sometimes prevents the crash from occurring 339 # after preprocessing 340 new_args = self.try_remove_args( 341 new_args, 342 msg="Replaced -W options with -w", 343 extra_arg="-w", 344 opts_startswith=["-W"], 345 ) 346 new_args = self.try_remove_args( 347 new_args, 348 msg="Replaced optimization level with -O0", 349 extra_arg="-O0", 350 opts_startswith=["-O"], 351 ) 352 353 # Try to remove compilation steps 354 new_args = self.try_remove_args( 355 new_args, msg="Added -emit-llvm", extra_arg="-emit-llvm" 356 ) 357 new_args = self.try_remove_args( 358 new_args, msg="Added -fsyntax-only", extra_arg="-fsyntax-only" 359 ) 360 361 # Try to make implicit int an error for more sensible test output 362 new_args = self.try_remove_args( 363 new_args, 364 msg="Added -Werror=implicit-int", 365 opts_equal=["-w"], 366 extra_arg="-Werror=implicit-int", 367 ) 368 369 self.clang_args = new_args 370 verbose_print("Simplified command:", quote_cmd(self.get_crash_cmd())) 371 372 def reduce_clang_args(self): 373 """Minimize the clang arguments after running C-Reduce, to get the smallest 374 command that reproduces the crash on the reduced file. 375 """ 376 print("\nReducing the clang crash command...") 377 378 new_args = self.clang_args 379 380 # Remove some often occurring args 381 new_args = self.try_remove_args( 382 new_args, msg="Removed -D options", opts_startswith=["-D"] 383 ) 384 new_args = self.try_remove_args( 385 new_args, msg="Removed -D options", opts_one_arg_startswith=["-D"] 386 ) 387 new_args = self.try_remove_args( 388 new_args, msg="Removed -I options", opts_startswith=["-I"] 389 ) 390 new_args = self.try_remove_args( 391 new_args, msg="Removed -I options", opts_one_arg_startswith=["-I"] 392 ) 393 new_args = self.try_remove_args( 394 new_args, msg="Removed -W options", opts_startswith=["-W"] 395 ) 396 397 # Remove other cases that aren't covered by the heuristic 398 new_args = self.try_remove_args( 399 new_args, msg="Removed -mllvm", opts_one_arg_startswith=["-mllvm"] 400 ) 401 402 i = 0 403 while i < len(new_args): 404 new_args, i = self.try_remove_arg_by_index(new_args, i) 405 406 self.clang_args = new_args 407 408 reduced_cmd = quote_cmd(self.get_crash_cmd()) 409 write_to_script(reduced_cmd, self.crash_script) 410 print("Reduced command:", reduced_cmd) 411 412 def run_creduce(self): 413 full_creduce_cmd = ( 414 [creduce_cmd] + self.creduce_flags + [self.testfile, self.file_to_reduce] 415 ) 416 print("\nRunning C-Reduce...") 417 verbose_print(quote_cmd(full_creduce_cmd)) 418 try: 419 p = subprocess.Popen(full_creduce_cmd) 420 p.communicate() 421 except KeyboardInterrupt: 422 # Hack to kill C-Reduce because it jumps into its own pgid 423 print("\n\nctrl-c detected, killed creduce") 424 p.kill() 425 426 427def main(): 428 global verbose 429 global creduce_cmd 430 global clang_cmd 431 432 parser = ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter) 433 parser.add_argument( 434 "crash_script", 435 type=str, 436 nargs=1, 437 help="Name of the script that generates the crash.", 438 ) 439 parser.add_argument( 440 "file_to_reduce", type=str, nargs=1, help="Name of the file to be reduced." 441 ) 442 parser.add_argument( 443 "--llvm-bin", dest="llvm_bin", type=str, help="Path to the LLVM bin directory." 444 ) 445 parser.add_argument( 446 "--clang", 447 dest="clang", 448 type=str, 449 help="The path to the `clang` executable. " 450 "By default uses the llvm-bin directory.", 451 ) 452 parser.add_argument( 453 "--creduce", 454 dest="creduce", 455 type=str, 456 help="The path to the `creduce` executable. " 457 "Required if `creduce` is not in PATH environment.", 458 ) 459 parser.add_argument("-v", "--verbose", action="store_true") 460 args, creduce_flags = parser.parse_known_args() 461 verbose = args.verbose 462 llvm_bin = os.path.abspath(args.llvm_bin) if args.llvm_bin else None 463 creduce_cmd = check_cmd("creduce", None, args.creduce) 464 clang_cmd = check_cmd("clang", llvm_bin, args.clang) 465 466 crash_script = check_file(args.crash_script[0]) 467 file_to_reduce = check_file(args.file_to_reduce[0]) 468 469 if "--n" not in creduce_flags: 470 creduce_flags += ["--n", str(max(4, multiprocessing.cpu_count() // 2))] 471 472 r = Reduce(crash_script, file_to_reduce, creduce_flags) 473 474 r.simplify_clang_args() 475 r.write_interestingness_test() 476 r.clang_preprocess() 477 r.run_creduce() 478 r.reduce_clang_args() 479 480 481if __name__ == "__main__": 482 main() 483