1#!/usr/bin/env python3 2"""A utility to update LLVM IR CHECK lines in C/C++ FileCheck test files. 3 4Example RUN lines in .c/.cc test files: 5 6// RUN: %clang -emit-llvm -S %s -o - -O2 | FileCheck %s 7// RUN: %clangxx -emit-llvm -S %s -o - -O2 | FileCheck -check-prefix=CHECK-A %s 8 9Usage: 10 11% utils/update_cc_test_checks.py --llvm-bin=release/bin test/a.cc 12% utils/update_cc_test_checks.py --clang=release/bin/clang /tmp/c/a.cc 13""" 14 15from __future__ import print_function 16 17import argparse 18import collections 19import json 20import os 21import re 22import shlex 23import shutil 24import subprocess 25import sys 26import tempfile 27 28from UpdateTestChecks import common 29 30SUBST = { 31 "%clang": [], 32 "%clang_cc1": ["-cc1"], 33 "%clangxx": ["--driver-mode=g++"], 34} 35 36 37def get_line2func_list(args, clang_args, globals_name_prefix): 38 ret = collections.defaultdict(list) 39 # Use clang's JSON AST dump to get the mangled name 40 json_dump_args = [args.clang] + clang_args + ["-fsyntax-only", "-o", "-"] 41 if "-cc1" not in json_dump_args: 42 # For tests that invoke %clang instead if %clang_cc1 we have to use 43 # -Xclang -ast-dump=json instead: 44 json_dump_args.append("-Xclang") 45 json_dump_args.append("-ast-dump=json") 46 common.debug("Running", " ".join(json_dump_args)) 47 48 popen = subprocess.Popen( 49 json_dump_args, 50 stdout=subprocess.PIPE, 51 stderr=subprocess.PIPE, 52 universal_newlines=True, 53 ) 54 stdout, stderr = popen.communicate() 55 if popen.returncode != 0: 56 sys.stderr.write("Failed to run " + " ".join(json_dump_args) + "\n") 57 sys.stderr.write(stderr) 58 sys.stderr.write(stdout) 59 sys.exit(2) 60 61 # Parse the clang JSON and add all children of type FunctionDecl. 62 # TODO: Should we add checks for global variables being emitted? 63 def parse_clang_ast_json(node, loc, search): 64 node_kind = node["kind"] 65 # Recurse for the following nodes that can contain nested function decls: 66 if node_kind in ( 67 "NamespaceDecl", 68 "LinkageSpecDecl", 69 "TranslationUnitDecl", 70 "CXXRecordDecl", 71 "ClassTemplateSpecializationDecl", 72 ): 73 # Specializations must use the loc from the specialization, not the 74 # template, and search for the class's spelling as the specialization 75 # does not mention the method names in the source. 76 if node_kind == "ClassTemplateSpecializationDecl": 77 inner_loc = node["loc"] 78 inner_search = node["name"] 79 else: 80 inner_loc = None 81 inner_search = None 82 if "inner" in node: 83 for inner in node["inner"]: 84 parse_clang_ast_json(inner, inner_loc, inner_search) 85 # Otherwise we ignore everything except functions: 86 if node_kind not in ( 87 "FunctionDecl", 88 "CXXMethodDecl", 89 "CXXConstructorDecl", 90 "CXXDestructorDecl", 91 "CXXConversionDecl", 92 ): 93 return 94 if loc is None: 95 loc = node["loc"] 96 if node.get("isImplicit") is True and node.get("storageClass") == "extern": 97 common.debug("Skipping builtin function:", node["name"], "@", loc) 98 return 99 common.debug("Found function:", node["kind"], node["name"], "@", loc) 100 line = loc.get("line") 101 # If there is no line it is probably a builtin function -> skip 102 if line is None: 103 common.debug( 104 "Skipping function without line number:", node["name"], "@", loc 105 ) 106 return 107 108 # If there is no 'inner' object, it is a function declaration and we can 109 # skip it. However, function declarations may also contain an 'inner' list, 110 # but in that case it will only contains ParmVarDecls. If we find an entry 111 # that is not a ParmVarDecl, we know that this is a function definition. 112 has_body = False 113 if "inner" in node: 114 for i in node["inner"]: 115 if i.get("kind", "ParmVarDecl") != "ParmVarDecl": 116 has_body = True 117 break 118 if not has_body: 119 common.debug("Skipping function without body:", node["name"], "@", loc) 120 return 121 spell = node["name"] 122 if search is None: 123 search = spell 124 mangled = node.get("mangledName", spell) 125 # Clang's AST dump includes the globals prefix, but when Clang emits 126 # LLVM IR this is not included and instead added as part of the asm 127 # output. Strip it from the mangled name of globals when needed 128 # (see DataLayout::getGlobalPrefix()). 129 if globals_name_prefix: 130 storage = node.get("storageClass", None) 131 if storage != "static" and mangled[0] == globals_name_prefix: 132 mangled = mangled[1:] 133 ret[int(line) - 1].append((spell, mangled, search)) 134 135 ast = json.loads(stdout) 136 if ast["kind"] != "TranslationUnitDecl": 137 common.error("Clang AST dump JSON format changed?") 138 sys.exit(2) 139 parse_clang_ast_json(ast, None, None) 140 141 for line, funcs in sorted(ret.items()): 142 for func in funcs: 143 common.debug( 144 "line {}: found function {}".format(line + 1, func), file=sys.stderr 145 ) 146 if not ret: 147 common.warn("Did not find any functions using", " ".join(json_dump_args)) 148 return ret 149 150 151def str_to_commandline(value): 152 if not value: 153 return [] 154 return shlex.split(value) 155 156 157def infer_dependent_args(args): 158 if not args.clang: 159 if not args.llvm_bin: 160 args.clang = "clang" 161 else: 162 args.clang = os.path.join(args.llvm_bin, "clang") 163 if not args.opt: 164 if not args.llvm_bin: 165 args.opt = "opt" 166 else: 167 args.opt = os.path.join(args.llvm_bin, "opt") 168 169 170def find_executable(executable): 171 _, ext = os.path.splitext(executable) 172 if sys.platform == "win32" and ext != ".exe": 173 executable = executable + ".exe" 174 175 return shutil.which(executable) 176 177 178def config(): 179 parser = argparse.ArgumentParser( 180 description=__doc__, formatter_class=argparse.RawTextHelpFormatter 181 ) 182 parser.add_argument("--llvm-bin", help="llvm $prefix/bin path") 183 parser.add_argument( 184 "--clang", help='"clang" executable, defaults to $llvm_bin/clang' 185 ) 186 parser.add_argument( 187 "--clang-args", 188 default=[], 189 type=str_to_commandline, 190 help="Space-separated extra args to clang, e.g. --clang-args=-v", 191 ) 192 parser.add_argument("--opt", help='"opt" executable, defaults to $llvm_bin/opt') 193 parser.add_argument( 194 "--functions", 195 nargs="+", 196 help="A list of function name regexes. " 197 "If specified, update CHECK lines for functions matching at least one regex", 198 ) 199 parser.add_argument( 200 "--x86_extra_scrub", 201 action="store_true", 202 help="Use more regex for x86 matching to reduce diffs between various subtargets", 203 ) 204 parser.add_argument( 205 "--function-signature", 206 action="store_true", 207 help="Keep function signature information around for the check line", 208 ) 209 parser.add_argument( 210 "--check-attributes", 211 action="store_true", 212 help='Check "Function Attributes" for functions', 213 ) 214 parser.add_argument( 215 "--check-globals", 216 nargs="?", 217 const="all", 218 default="default", 219 choices=["none", "smart", "all"], 220 help="Check global entries (global variables, metadata, attribute sets, ...) for functions", 221 ) 222 parser.add_argument("tests", nargs="+") 223 args = common.parse_commandline_args(parser) 224 infer_dependent_args(args) 225 226 if not find_executable(args.clang): 227 print("Please specify --llvm-bin or --clang", file=sys.stderr) 228 sys.exit(1) 229 230 # Determine the builtin includes directory so that we can update tests that 231 # depend on the builtin headers. See get_clang_builtin_include_dir() and 232 # use_clang() in llvm/utils/lit/lit/llvm/config.py. 233 try: 234 builtin_include_dir = ( 235 subprocess.check_output([args.clang, "-print-file-name=include"]) 236 .decode() 237 .strip() 238 ) 239 SUBST["%clang_cc1"] = [ 240 "-cc1", 241 "-internal-isystem", 242 builtin_include_dir, 243 "-nostdsysteminc", 244 ] 245 except subprocess.CalledProcessError: 246 common.warn( 247 "Could not determine clang builtins directory, some tests " 248 "might not update correctly." 249 ) 250 251 if not find_executable(args.opt): 252 # Many uses of this tool will not need an opt binary, because it's only 253 # needed for updating a test that runs clang | opt | FileCheck. So we 254 # defer this error message until we find that opt is actually needed. 255 args.opt = None 256 257 return args, parser 258 259 260def get_function_body( 261 builder, args, filename, clang_args, extra_commands, prefixes, raw_tool_output 262): 263 # TODO Clean up duplication of asm/common build_function_body_dictionary 264 for extra_command in extra_commands: 265 extra_args = shlex.split(extra_command) 266 with tempfile.NamedTemporaryFile() as f: 267 f.write(raw_tool_output.encode()) 268 f.flush() 269 if extra_args[0] == "opt": 270 if args.opt is None: 271 print( 272 filename, 273 "needs to run opt. " "Please specify --llvm-bin or --opt", 274 file=sys.stderr, 275 ) 276 sys.exit(1) 277 extra_args[0] = args.opt 278 raw_tool_output = common.invoke_tool(extra_args[0], extra_args[1:], f.name) 279 if "-emit-llvm" in clang_args: 280 builder.process_run_line( 281 common.OPT_FUNCTION_RE, common.scrub_body, raw_tool_output, prefixes 282 ) 283 builder.processed_prefixes(prefixes) 284 else: 285 print( 286 "The clang command line should include -emit-llvm as asm tests " 287 "are discouraged in Clang testsuite.", 288 file=sys.stderr, 289 ) 290 sys.exit(1) 291 292 293def exec_run_line(exe): 294 popen = subprocess.Popen( 295 exe, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True 296 ) 297 stdout, stderr = popen.communicate() 298 if popen.returncode != 0: 299 sys.stderr.write("Failed to run " + " ".join(exe) + "\n") 300 sys.stderr.write(stderr) 301 sys.stderr.write(stdout) 302 sys.exit(3) 303 304 305def main(): 306 initial_args, parser = config() 307 script_name = os.path.basename(__file__) 308 309 for ti in common.itertests( 310 initial_args.tests, 311 parser, 312 "utils/" + script_name, 313 comment_prefix="//", 314 argparse_callback=infer_dependent_args, 315 ): 316 # Build a list of filechecked and non-filechecked RUN lines. 317 run_list = [] 318 line2func_list = collections.defaultdict(list) 319 320 subs = { 321 "%s": ti.path, 322 "%t": tempfile.NamedTemporaryFile().name, 323 "%S": os.path.dirname(ti.path), 324 } 325 326 for l in ti.run_lines: 327 commands = [cmd.strip() for cmd in l.split("|")] 328 329 triple_in_cmd = None 330 m = common.TRIPLE_ARG_RE.search(commands[0]) 331 if m: 332 triple_in_cmd = m.groups()[0] 333 334 # Parse executable args. 335 exec_args = shlex.split(commands[0]) 336 # Execute non-clang runline. 337 if exec_args[0] not in SUBST: 338 # Do lit-like substitutions. 339 for s in subs: 340 exec_args = [ 341 i.replace(s, subs[s]) if s in i else i for i in exec_args 342 ] 343 run_list.append((None, exec_args, None, None)) 344 continue 345 # This is a clang runline, apply %clang substitution rule, do lit-like substitutions, 346 # and append args.clang_args 347 clang_args = exec_args 348 clang_args[0:1] = SUBST[clang_args[0]] 349 for s in subs: 350 clang_args = [ 351 i.replace(s, subs[s]) if s in i else i for i in clang_args 352 ] 353 clang_args += ti.args.clang_args 354 355 # Extract -check-prefix in FileCheck args 356 filecheck_cmd = commands[-1] 357 common.verify_filecheck_prefixes(filecheck_cmd) 358 if not filecheck_cmd.startswith("FileCheck "): 359 # Execute non-filechecked clang runline. 360 exe = [ti.args.clang] + clang_args 361 run_list.append((None, exe, None, None)) 362 continue 363 364 check_prefixes = common.get_check_prefixes(filecheck_cmd) 365 run_list.append((check_prefixes, clang_args, commands[1:-1], triple_in_cmd)) 366 367 # Execute clang, generate LLVM IR, and extract functions. 368 369 # Store only filechecked runlines. 370 filecheck_run_list = [i for i in run_list if i[0]] 371 ginfo = common.make_ir_generalizer(version=ti.args.version) 372 builder = common.FunctionTestBuilder( 373 run_list=filecheck_run_list, 374 flags=ti.args, 375 scrubber_args=[], 376 path=ti.path, 377 ginfo=ginfo, 378 ) 379 380 for prefixes, args, extra_commands, triple_in_cmd in run_list: 381 # Execute non-filechecked runline. 382 if not prefixes: 383 print( 384 "NOTE: Executing non-FileChecked RUN line: " + " ".join(args), 385 file=sys.stderr, 386 ) 387 exec_run_line(args) 388 continue 389 390 clang_args = args 391 common.debug("Extracted clang cmd: clang {}".format(clang_args)) 392 common.debug("Extracted FileCheck prefixes: {}".format(prefixes)) 393 394 # Invoke external tool and extract function bodies. 395 raw_tool_output = common.invoke_tool(ti.args.clang, clang_args, ti.path) 396 get_function_body( 397 builder, 398 ti.args, 399 ti.path, 400 clang_args, 401 extra_commands, 402 prefixes, 403 raw_tool_output, 404 ) 405 406 # Invoke clang -Xclang -ast-dump=json to get mapping from start lines to 407 # mangled names. Forward all clang args for now. 408 for k, v in get_line2func_list( 409 ti.args, clang_args, common.get_globals_name_prefix(raw_tool_output) 410 ).items(): 411 line2func_list[k].extend(v) 412 413 func_dict = builder.finish_and_get_func_dict() 414 global_vars_seen_dict = {} 415 prefix_set = set([prefix for p in filecheck_run_list for prefix in p[0]]) 416 output_lines = [] 417 has_checked_pre_function_globals = False 418 419 include_generated_funcs = common.find_arg_in_test( 420 ti, 421 lambda args: ti.args.include_generated_funcs, 422 "--include-generated-funcs", 423 True, 424 ) 425 generated_prefixes = [] 426 if include_generated_funcs: 427 # Generate the appropriate checks for each function. We need to emit 428 # these in the order according to the generated output so that CHECK-LABEL 429 # works properly. func_order provides that. 430 431 # It turns out that when clang generates functions (for example, with 432 # -fopenmp), it can sometimes cause functions to be re-ordered in the 433 # output, even functions that exist in the source file. Therefore we 434 # can't insert check lines before each source function and instead have to 435 # put them at the end. So the first thing to do is dump out the source 436 # lines. 437 common.dump_input_lines(output_lines, ti, prefix_set, "//") 438 439 # Now generate all the checks. 440 def check_generator(my_output_lines, prefixes, func): 441 return common.add_ir_checks( 442 my_output_lines, 443 "//", 444 prefixes, 445 func_dict, 446 func, 447 False, 448 ti.args.function_signature, 449 ginfo, 450 global_vars_seen_dict, 451 is_filtered=builder.is_filtered(), 452 ) 453 454 if ti.args.check_globals != 'none': 455 generated_prefixes.extend( 456 common.add_global_checks( 457 builder.global_var_dict(), 458 "//", 459 run_list, 460 output_lines, 461 ginfo, 462 global_vars_seen_dict, 463 False, 464 True, 465 ti.args.check_globals, 466 ) 467 ) 468 generated_prefixes.extend( 469 common.add_checks_at_end( 470 output_lines, 471 filecheck_run_list, 472 builder.func_order(), 473 "//", 474 lambda my_output_lines, prefixes, func: check_generator( 475 my_output_lines, prefixes, func 476 ), 477 ) 478 ) 479 else: 480 # Normal mode. Put checks before each source function. 481 for line_info in ti.iterlines(output_lines): 482 idx = line_info.line_number 483 line = line_info.line 484 args = line_info.args 485 include_line = True 486 m = common.CHECK_RE.match(line) 487 if m and m.group(1) in prefix_set: 488 continue # Don't append the existing CHECK lines 489 # Skip special separator comments added by commmon.add_global_checks. 490 if line.strip() == "//" + common.SEPARATOR: 491 continue 492 if idx in line2func_list: 493 added = set() 494 for spell, mangled, search in line2func_list[idx]: 495 # One line may contain multiple function declarations. 496 # Skip if the mangled name has been added before. 497 # The line number may come from an included file, we simply require 498 # the search string (normally the function's spelling name, but is 499 # the class's spelling name for class specializations) to appear on 500 # the line to exclude functions from other files. 501 if mangled in added or search not in line: 502 continue 503 if args.functions is None or any( 504 re.search(regex, spell) for regex in args.functions 505 ): 506 last_line = output_lines[-1].strip() 507 while last_line == "//": 508 # Remove the comment line since we will generate a new comment 509 # line as part of common.add_ir_checks() 510 output_lines.pop() 511 last_line = output_lines[-1].strip() 512 if ( 513 ti.args.check_globals != 'none' 514 and not has_checked_pre_function_globals 515 ): 516 generated_prefixes.extend( 517 common.add_global_checks( 518 builder.global_var_dict(), 519 "//", 520 run_list, 521 output_lines, 522 ginfo, 523 global_vars_seen_dict, 524 False, 525 True, 526 ti.args.check_globals, 527 ) 528 ) 529 has_checked_pre_function_globals = True 530 if added: 531 output_lines.append("//") 532 added.add(mangled) 533 generated_prefixes.extend( 534 common.add_ir_checks( 535 output_lines, 536 "//", 537 filecheck_run_list, 538 func_dict, 539 mangled, 540 False, 541 args.function_signature, 542 ginfo, 543 global_vars_seen_dict, 544 is_filtered=builder.is_filtered(), 545 ) 546 ) 547 if line.rstrip("\n") == "//": 548 include_line = False 549 550 if include_line: 551 output_lines.append(line.rstrip("\n")) 552 553 if ti.args.check_globals != 'none': 554 generated_prefixes.extend( 555 common.add_global_checks( 556 builder.global_var_dict(), 557 "//", 558 run_list, 559 output_lines, 560 ginfo, 561 global_vars_seen_dict, 562 False, 563 False, 564 ti.args.check_globals, 565 ) 566 ) 567 if ti.args.gen_unused_prefix_body: 568 output_lines.extend( 569 ti.get_checks_for_unused_prefixes(run_list, generated_prefixes) 570 ) 571 common.debug("Writing %d lines to %s..." % (len(output_lines), ti.path)) 572 with open(ti.path, "wb") as f: 573 f.writelines(["{}\n".format(l).encode("utf-8") for l in output_lines]) 574 575 return 0 576 577 578if __name__ == "__main__": 579 sys.exit(main()) 580