1#!/usr/bin/env python3 2'''A utility to update LLVM IR CHECK lines in C/C++ FileCheck test files. 3 4Example RUN lines in .c/.cc test files: 5 6// RUN: %clang -emit-llvm -S %s -o - -O2 | FileCheck %s 7// RUN: %clangxx -emit-llvm -S %s -o - -O2 | FileCheck -check-prefix=CHECK-A %s 8 9Usage: 10 11% utils/update_cc_test_checks.py --llvm-bin=release/bin test/a.cc 12% utils/update_cc_test_checks.py --clang=release/bin/clang /tmp/c/a.cc 13''' 14 15from __future__ import print_function 16 17import argparse 18import collections 19import json 20import os 21import re 22import shlex 23import shutil 24import subprocess 25import sys 26import tempfile 27 28from UpdateTestChecks import common 29 30SUBST = { 31 '%clang': [], 32 '%clang_cc1': ['-cc1'], 33 '%clangxx': ['--driver-mode=g++'], 34} 35 36def get_line2func_list(args, clang_args): 37 ret = collections.defaultdict(list) 38 # Use clang's JSON AST dump to get the mangled name 39 json_dump_args = [args.clang] + clang_args + ['-fsyntax-only', '-o', '-'] 40 if '-cc1' not in json_dump_args: 41 # For tests that invoke %clang instead if %clang_cc1 we have to use 42 # -Xclang -ast-dump=json instead: 43 json_dump_args.append('-Xclang') 44 json_dump_args.append('-ast-dump=json') 45 common.debug('Running', ' '.join(json_dump_args)) 46 47 popen = subprocess.Popen(json_dump_args, stdout=subprocess.PIPE, 48 stderr=subprocess.PIPE, universal_newlines=True) 49 stdout, stderr = popen.communicate() 50 if popen.returncode != 0: 51 sys.stderr.write('Failed to run ' + ' '.join(json_dump_args) + '\n') 52 sys.stderr.write(stderr) 53 sys.stderr.write(stdout) 54 sys.exit(2) 55 56 # Parse the clang JSON and add all children of type FunctionDecl. 57 # TODO: Should we add checks for global variables being emitted? 58 def parse_clang_ast_json(node, loc, search): 59 node_kind = node['kind'] 60 # Recurse for the following nodes that can contain nested function decls: 61 if node_kind in ('NamespaceDecl', 'LinkageSpecDecl', 'TranslationUnitDecl', 62 'CXXRecordDecl', 'ClassTemplateSpecializationDecl'): 63 # Specializations must use the loc from the specialization, not the 64 # template, and search for the class's spelling as the specialization 65 # does not mention the method names in the source. 66 if node_kind == 'ClassTemplateSpecializationDecl': 67 inner_loc = node['loc'] 68 inner_search = node['name'] 69 else: 70 inner_loc = None 71 inner_search = None 72 if 'inner' in node: 73 for inner in node['inner']: 74 parse_clang_ast_json(inner, inner_loc, inner_search) 75 # Otherwise we ignore everything except functions: 76 if node_kind not in ('FunctionDecl', 'CXXMethodDecl', 'CXXConstructorDecl', 77 'CXXDestructorDecl', 'CXXConversionDecl'): 78 return 79 if loc is None: 80 loc = node['loc'] 81 if node.get('isImplicit') is True and node.get('storageClass') == 'extern': 82 common.debug('Skipping builtin function:', node['name'], '@', loc) 83 return 84 common.debug('Found function:', node['kind'], node['name'], '@', loc) 85 line = loc.get('line') 86 # If there is no line it is probably a builtin function -> skip 87 if line is None: 88 common.debug('Skipping function without line number:', node['name'], '@', loc) 89 return 90 91 # If there is no 'inner' object, it is a function declaration and we can 92 # skip it. However, function declarations may also contain an 'inner' list, 93 # but in that case it will only contains ParmVarDecls. If we find an entry 94 # that is not a ParmVarDecl, we know that this is a function definition. 95 has_body = False 96 if 'inner' in node: 97 for i in node['inner']: 98 if i.get('kind', 'ParmVarDecl') != 'ParmVarDecl': 99 has_body = True 100 break 101 if not has_body: 102 common.debug('Skipping function without body:', node['name'], '@', loc) 103 return 104 spell = node['name'] 105 if search is None: 106 search = spell 107 mangled = node.get('mangledName', spell) 108 ret[int(line)-1].append((spell, mangled, search)) 109 110 ast = json.loads(stdout) 111 if ast['kind'] != 'TranslationUnitDecl': 112 common.error('Clang AST dump JSON format changed?') 113 sys.exit(2) 114 parse_clang_ast_json(ast, None, None) 115 116 for line, funcs in sorted(ret.items()): 117 for func in funcs: 118 common.debug('line {}: found function {}'.format(line+1, func), file=sys.stderr) 119 if not ret: 120 common.warn('Did not find any functions using', ' '.join(json_dump_args)) 121 return ret 122 123 124def str_to_commandline(value): 125 if not value: 126 return [] 127 return shlex.split(value) 128 129 130def infer_dependent_args(args): 131 if not args.clang: 132 if not args.llvm_bin: 133 args.clang = 'clang' 134 else: 135 args.clang = os.path.join(args.llvm_bin, 'clang') 136 if not args.opt: 137 if not args.llvm_bin: 138 args.opt = 'opt' 139 else: 140 args.opt = os.path.join(args.llvm_bin, 'opt') 141 142 143def find_executable(executable): 144 _, ext = os.path.splitext(executable) 145 if sys.platform == 'win32' and ext != '.exe': 146 executable = executable + '.exe' 147 148 return shutil.which(executable) 149 150 151def config(): 152 parser = argparse.ArgumentParser( 153 description=__doc__, 154 formatter_class=argparse.RawTextHelpFormatter) 155 parser.add_argument('--llvm-bin', help='llvm $prefix/bin path') 156 parser.add_argument('--clang', 157 help='"clang" executable, defaults to $llvm_bin/clang') 158 parser.add_argument('--clang-args', default=[], type=str_to_commandline, 159 help='Space-separated extra args to clang, e.g. --clang-args=-v') 160 parser.add_argument('--opt', 161 help='"opt" executable, defaults to $llvm_bin/opt') 162 parser.add_argument( 163 '--functions', nargs='+', help='A list of function name regexes. ' 164 'If specified, update CHECK lines for functions matching at least one regex') 165 parser.add_argument( 166 '--x86_extra_scrub', action='store_true', 167 help='Use more regex for x86 matching to reduce diffs between various subtargets') 168 parser.add_argument('--function-signature', action='store_true', 169 help='Keep function signature information around for the check line') 170 parser.add_argument('--check-attributes', action='store_true', 171 help='Check "Function Attributes" for functions') 172 parser.add_argument('--check-globals', action='store_true', 173 help='Check global entries (global variables, metadata, attribute sets, ...) for functions') 174 parser.add_argument('tests', nargs='+') 175 args = common.parse_commandline_args(parser) 176 infer_dependent_args(args) 177 178 if not find_executable(args.clang): 179 print('Please specify --llvm-bin or --clang', file=sys.stderr) 180 sys.exit(1) 181 182 # Determine the builtin includes directory so that we can update tests that 183 # depend on the builtin headers. See get_clang_builtin_include_dir() and 184 # use_clang() in llvm/utils/lit/lit/llvm/config.py. 185 try: 186 builtin_include_dir = subprocess.check_output( 187 [args.clang, '-print-file-name=include']).decode().strip() 188 SUBST['%clang_cc1'] = ['-cc1', '-internal-isystem', builtin_include_dir, 189 '-nostdsysteminc'] 190 except subprocess.CalledProcessError: 191 common.warn('Could not determine clang builtins directory, some tests ' 192 'might not update correctly.') 193 194 if not find_executable(args.opt): 195 # Many uses of this tool will not need an opt binary, because it's only 196 # needed for updating a test that runs clang | opt | FileCheck. So we 197 # defer this error message until we find that opt is actually needed. 198 args.opt = None 199 200 return args, parser 201 202 203def get_function_body(builder, args, filename, clang_args, extra_commands, 204 prefixes): 205 # TODO Clean up duplication of asm/common build_function_body_dictionary 206 # Invoke external tool and extract function bodies. 207 raw_tool_output = common.invoke_tool(args.clang, clang_args, filename) 208 for extra_command in extra_commands: 209 extra_args = shlex.split(extra_command) 210 with tempfile.NamedTemporaryFile() as f: 211 f.write(raw_tool_output.encode()) 212 f.flush() 213 if extra_args[0] == 'opt': 214 if args.opt is None: 215 print(filename, 'needs to run opt. ' 216 'Please specify --llvm-bin or --opt', file=sys.stderr) 217 sys.exit(1) 218 extra_args[0] = args.opt 219 raw_tool_output = common.invoke_tool(extra_args[0], 220 extra_args[1:], f.name) 221 if '-emit-llvm' in clang_args: 222 builder.process_run_line( 223 common.OPT_FUNCTION_RE, common.scrub_body, raw_tool_output, 224 prefixes, False) 225 builder.processed_prefixes(prefixes) 226 else: 227 print('The clang command line should include -emit-llvm as asm tests ' 228 'are discouraged in Clang testsuite.', file=sys.stderr) 229 sys.exit(1) 230 231def exec_run_line(exe): 232 popen = subprocess.Popen(exe, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) 233 stdout, stderr = popen.communicate() 234 if popen.returncode != 0: 235 sys.stderr.write('Failed to run ' + ' '.join(exe) + '\n') 236 sys.stderr.write(stderr) 237 sys.stderr.write(stdout) 238 sys.exit(3) 239 240def main(): 241 initial_args, parser = config() 242 script_name = os.path.basename(__file__) 243 244 for ti in common.itertests(initial_args.tests, parser, 'utils/' + script_name, 245 comment_prefix='//', argparse_callback=infer_dependent_args): 246 # Build a list of filechecked and non-filechecked RUN lines. 247 run_list = [] 248 line2func_list = collections.defaultdict(list) 249 250 subs = { 251 '%s' : ti.path, 252 '%t' : tempfile.NamedTemporaryFile().name, 253 '%S' : os.path.dirname(ti.path), 254 } 255 256 for l in ti.run_lines: 257 commands = [cmd.strip() for cmd in l.split('|')] 258 259 triple_in_cmd = None 260 m = common.TRIPLE_ARG_RE.search(commands[0]) 261 if m: 262 triple_in_cmd = m.groups()[0] 263 264 # Parse executable args. 265 exec_args = shlex.split(commands[0]) 266 # Execute non-clang runline. 267 if exec_args[0] not in SUBST: 268 # Do lit-like substitutions. 269 for s in subs: 270 exec_args = [i.replace(s, subs[s]) if s in i else i for i in exec_args] 271 run_list.append((None, exec_args, None, None)) 272 continue 273 # This is a clang runline, apply %clang substitution rule, do lit-like substitutions, 274 # and append args.clang_args 275 clang_args = exec_args 276 clang_args[0:1] = SUBST[clang_args[0]] 277 for s in subs: 278 clang_args = [i.replace(s, subs[s]) if s in i else i for i in clang_args] 279 clang_args += ti.args.clang_args 280 281 # Extract -check-prefix in FileCheck args 282 filecheck_cmd = commands[-1] 283 common.verify_filecheck_prefixes(filecheck_cmd) 284 if not filecheck_cmd.startswith('FileCheck '): 285 # Execute non-filechecked clang runline. 286 exe = [ti.args.clang] + clang_args 287 run_list.append((None, exe, None, None)) 288 continue 289 290 check_prefixes = [item for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd) 291 for item in m.group(1).split(',')] 292 if not check_prefixes: 293 check_prefixes = ['CHECK'] 294 run_list.append((check_prefixes, clang_args, commands[1:-1], triple_in_cmd)) 295 296 # Execute clang, generate LLVM IR, and extract functions. 297 298 # Store only filechecked runlines. 299 filecheck_run_list = [i for i in run_list if i[0]] 300 builder = common.FunctionTestBuilder( 301 run_list=filecheck_run_list, 302 flags=ti.args, 303 scrubber_args=[], 304 path=ti.path) 305 306 for prefixes, args, extra_commands, triple_in_cmd in run_list: 307 # Execute non-filechecked runline. 308 if not prefixes: 309 print('NOTE: Executing non-FileChecked RUN line: ' + ' '.join(args), file=sys.stderr) 310 exec_run_line(args) 311 continue 312 313 clang_args = args 314 common.debug('Extracted clang cmd: clang {}'.format(clang_args)) 315 common.debug('Extracted FileCheck prefixes: {}'.format(prefixes)) 316 317 get_function_body(builder, ti.args, ti.path, clang_args, extra_commands, 318 prefixes) 319 320 # Invoke clang -Xclang -ast-dump=json to get mapping from start lines to 321 # mangled names. Forward all clang args for now. 322 for k, v in get_line2func_list(ti.args, clang_args).items(): 323 line2func_list[k].extend(v) 324 325 func_dict = builder.finish_and_get_func_dict() 326 global_vars_seen_dict = {} 327 prefix_set = set([prefix for p in filecheck_run_list for prefix in p[0]]) 328 output_lines = [] 329 has_checked_pre_function_globals = False 330 331 include_generated_funcs = common.find_arg_in_test(ti, 332 lambda args: ti.args.include_generated_funcs, 333 '--include-generated-funcs', 334 True) 335 generated_prefixes = [] 336 if include_generated_funcs: 337 # Generate the appropriate checks for each function. We need to emit 338 # these in the order according to the generated output so that CHECK-LABEL 339 # works properly. func_order provides that. 340 341 # It turns out that when clang generates functions (for example, with 342 # -fopenmp), it can sometimes cause functions to be re-ordered in the 343 # output, even functions that exist in the source file. Therefore we 344 # can't insert check lines before each source function and instead have to 345 # put them at the end. So the first thing to do is dump out the source 346 # lines. 347 common.dump_input_lines(output_lines, ti, prefix_set, '//') 348 349 # Now generate all the checks. 350 def check_generator(my_output_lines, prefixes, func): 351 if '-emit-llvm' in clang_args: 352 return common.add_ir_checks(my_output_lines, '//', 353 prefixes, 354 func_dict, func, False, 355 ti.args.function_signature, 356 global_vars_seen_dict, 357 is_filtered=builder.is_filtered()) 358 else: 359 return asm.add_checks(my_output_lines, '//', 360 prefixes, 361 func_dict, func, global_vars_seen_dict, 362 is_filtered=builder.is_filtered()) 363 364 if ti.args.check_globals: 365 generated_prefixes.extend( 366 common.add_global_checks(builder.global_var_dict(), '//', run_list, 367 output_lines, global_vars_seen_dict, True, 368 True)) 369 generated_prefixes.extend( 370 common.add_checks_at_end( 371 output_lines, filecheck_run_list, builder.func_order(), '//', 372 lambda my_output_lines, prefixes, func: check_generator( 373 my_output_lines, prefixes, func))) 374 else: 375 # Normal mode. Put checks before each source function. 376 for line_info in ti.iterlines(output_lines): 377 idx = line_info.line_number 378 line = line_info.line 379 args = line_info.args 380 include_line = True 381 m = common.CHECK_RE.match(line) 382 if m and m.group(1) in prefix_set: 383 continue # Don't append the existing CHECK lines 384 # Skip special separator comments added by commmon.add_global_checks. 385 if line.strip() == '//' + common.SEPARATOR: 386 continue 387 if idx in line2func_list: 388 added = set() 389 for spell, mangled, search in line2func_list[idx]: 390 # One line may contain multiple function declarations. 391 # Skip if the mangled name has been added before. 392 # The line number may come from an included file, we simply require 393 # the search string (normally the function's spelling name, but is 394 # the class's spelling name for class specializations) to appear on 395 # the line to exclude functions from other files. 396 if mangled in added or search not in line: 397 continue 398 if args.functions is None or any(re.search(regex, spell) for regex in args.functions): 399 last_line = output_lines[-1].strip() 400 while last_line == '//': 401 # Remove the comment line since we will generate a new comment 402 # line as part of common.add_ir_checks() 403 output_lines.pop() 404 last_line = output_lines[-1].strip() 405 if ti.args.check_globals and not has_checked_pre_function_globals: 406 generated_prefixes.extend( 407 common.add_global_checks(builder.global_var_dict(), '//', 408 run_list, output_lines, 409 global_vars_seen_dict, True, True)) 410 has_checked_pre_function_globals = True 411 if added: 412 output_lines.append('//') 413 added.add(mangled) 414 generated_prefixes.extend( 415 common.add_ir_checks( 416 output_lines, 417 '//', 418 filecheck_run_list, 419 func_dict, 420 mangled, 421 False, 422 args.function_signature, 423 global_vars_seen_dict, 424 is_filtered=builder.is_filtered())) 425 if line.rstrip('\n') == '//': 426 include_line = False 427 428 if include_line: 429 output_lines.append(line.rstrip('\n')) 430 431 if ti.args.check_globals: 432 generated_prefixes.extend( 433 common.add_global_checks(builder.global_var_dict(), '//', run_list, 434 output_lines, global_vars_seen_dict, True, 435 False)) 436 if ti.args.gen_unused_prefix_body: 437 output_lines.extend( 438 ti.get_checks_for_unused_prefixes(run_list, generated_prefixes)) 439 common.debug('Writing %d lines to %s...' % (len(output_lines), ti.path)) 440 with open(ti.path, 'wb') as f: 441 f.writelines(['{}\n'.format(l).encode('utf-8') for l in output_lines]) 442 443 return 0 444 445 446if __name__ == '__main__': 447 sys.exit(main()) 448