1from __future__ import print_function 2 3import copy 4import glob 5import re 6import subprocess 7import sys 8 9if sys.version_info[0] > 2: 10 class string: 11 expandtabs = str.expandtabs 12else: 13 import string 14 15##### Common utilities for update_*test_checks.py 16 17 18_verbose = False 19 20def parse_commandline_args(parser): 21 parser.add_argument('-v', '--verbose', action='store_true', 22 help='Show verbose output') 23 parser.add_argument('-u', '--update-only', action='store_true', 24 help='Only update test if it was already autogened') 25 parser.add_argument('--force-update', action='store_true', 26 help='Update test even if it was autogened by a different script') 27 parser.add_argument('--enable', action='store_true', dest='enabled', default=True, 28 help='Activate CHECK line generation from this point forward') 29 parser.add_argument('--disable', action='store_false', dest='enabled', 30 help='Deactivate CHECK line generation from this point forward') 31 args = parser.parse_args() 32 global _verbose 33 _verbose = args.verbose 34 return args 35 36 37class InputLineInfo(object): 38 def __init__(self, line, line_number, args, argv): 39 self.line = line 40 self.line_number = line_number 41 self.args = args 42 self.argv = argv 43 44 45class TestInfo(object): 46 def __init__(self, test, parser, script_name, input_lines, args, argv, 47 comment_prefix): 48 self.parser = parser 49 self.path = test 50 self.args = args 51 self.argv = argv 52 self.input_lines = input_lines 53 self.run_lines = find_run_lines(test, self.input_lines) 54 self.comment_prefix = comment_prefix 55 if self.comment_prefix is None: 56 if self.path.endswith('.mir'): 57 self.comment_prefix = '#' 58 else: 59 self.comment_prefix = ';' 60 self.autogenerated_note_prefix = self.comment_prefix + ' ' + UTC_ADVERT 61 self.test_autogenerated_note = self.autogenerated_note_prefix + script_name 62 self.test_autogenerated_note += get_autogennote_suffix(parser, self.args) 63 64 def iterlines(self, output_lines): 65 output_lines.append(self.test_autogenerated_note) 66 for line_num, input_line in enumerate(self.input_lines): 67 # Discard any previous script advertising. 68 if input_line.startswith(self.autogenerated_note_prefix): 69 continue 70 self.args, self.argv = check_for_command(input_line, self.parser, 71 self.args, self.argv) 72 if not self.args.enabled: 73 output_lines.append(input_line) 74 continue 75 yield InputLineInfo(input_line, line_num, self.args, self.argv) 76 77 78def itertests(test_patterns, parser, script_name, comment_prefix=None): 79 for pattern in test_patterns: 80 # On Windows we must expand the patterns ourselves. 81 tests_list = glob.glob(pattern) 82 if not tests_list: 83 warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,)) 84 continue 85 for test in tests_list: 86 with open(test) as f: 87 input_lines = [l.rstrip() for l in f] 88 args = parser.parse_args() 89 argv = sys.argv[:] 90 first_line = input_lines[0] if input_lines else "" 91 if UTC_ADVERT in first_line: 92 if script_name not in first_line and not args.force_update: 93 warn("Skipping test which wasn't autogenerated by " + script_name, test) 94 continue 95 args, argv = check_for_command(first_line, parser, args, argv) 96 elif args.update_only: 97 assert UTC_ADVERT not in first_line 98 warn("Skipping test which isn't autogenerated: " + test) 99 continue 100 yield TestInfo(test, parser, script_name, input_lines, args, argv, 101 comment_prefix) 102 103 104def should_add_line_to_output(input_line, prefix_set): 105 # Skip any blank comment lines in the IR. 106 if input_line.strip() == ';': 107 return False 108 # Skip any blank lines in the IR. 109 #if input_line.strip() == '': 110 # return False 111 # And skip any CHECK lines. We're building our own. 112 m = CHECK_RE.match(input_line) 113 if m and m.group(1) in prefix_set: 114 return False 115 116 return True 117 118# Invoke the tool that is being tested. 119def invoke_tool(exe, cmd_args, ir): 120 with open(ir) as ir_file: 121 # TODO Remove the str form which is used by update_test_checks.py and 122 # update_llc_test_checks.py 123 # The safer list form is used by update_cc_test_checks.py 124 if isinstance(cmd_args, list): 125 stdout = subprocess.check_output([exe] + cmd_args, stdin=ir_file) 126 else: 127 stdout = subprocess.check_output(exe + ' ' + cmd_args, 128 shell=True, stdin=ir_file) 129 if sys.version_info[0] > 2: 130 stdout = stdout.decode() 131 # Fix line endings to unix CR style. 132 return stdout.replace('\r\n', '\n') 133 134##### LLVM IR parser 135RUN_LINE_RE = re.compile(r'^\s*(?://|[;#])\s*RUN:\s*(.*)$') 136CHECK_PREFIX_RE = re.compile(r'--?check-prefix(?:es)?[= ](\S+)') 137PREFIX_RE = re.compile('^[a-zA-Z0-9_-]+$') 138CHECK_RE = re.compile(r'^\s*(?://|[;#])\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME|-EMPTY)?:') 139 140UTC_ARGS_KEY = 'UTC_ARGS:' 141UTC_ARGS_CMD = re.compile(r'.*' + UTC_ARGS_KEY + '\s*(?P<cmd>.*)\s*$') 142UTC_ADVERT = 'NOTE: Assertions have been autogenerated by ' 143 144OPT_FUNCTION_RE = re.compile( 145 r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w.-]+?)\s*' 146 r'(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*)\{\n(?P<body>.*?)^\}$', 147 flags=(re.M | re.S)) 148 149ANALYZE_FUNCTION_RE = re.compile( 150 r'^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.-]+?)\':' 151 r'\s*\n(?P<body>.*)$', 152 flags=(re.X | re.S)) 153 154IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@([\w.-]+)\s*\(') 155TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$') 156TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)') 157MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)') 158 159SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)') 160SCRUB_WHITESPACE_RE = re.compile(r'(?!^(| \w))[ \t]+', flags=re.M) 161SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M) 162SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE 163SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(r'([ \t]|(#[0-9]+))+$', flags=re.M) 164SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n') 165SCRUB_LOOP_COMMENT_RE = re.compile( 166 r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M) 167SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r'(?<=\S)+[ \t]*#$', flags=re.M) 168 169 170def error(msg, test_file=None): 171 if test_file: 172 msg = '{}: {}'.format(msg, test_file) 173 print('ERROR: {}'.format(msg), file=sys.stderr) 174 175def warn(msg, test_file=None): 176 if test_file: 177 msg = '{}: {}'.format(msg, test_file) 178 print('WARNING: {}'.format(msg), file=sys.stderr) 179 180def debug(*args, **kwargs): 181 # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs): 182 if 'file' not in kwargs: 183 kwargs['file'] = sys.stderr 184 if _verbose: 185 print(*args, **kwargs) 186 187def find_run_lines(test, lines): 188 debug('Scanning for RUN lines in test file:', test) 189 raw_lines = [m.group(1) 190 for m in [RUN_LINE_RE.match(l) for l in lines] if m] 191 run_lines = [raw_lines[0]] if len(raw_lines) > 0 else [] 192 for l in raw_lines[1:]: 193 if run_lines[-1].endswith('\\'): 194 run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l 195 else: 196 run_lines.append(l) 197 debug('Found {} RUN lines in {}:'.format(len(run_lines), test)) 198 for l in run_lines: 199 debug(' RUN: {}'.format(l)) 200 return run_lines 201 202def scrub_body(body): 203 # Scrub runs of whitespace out of the assembly, but leave the leading 204 # whitespace in place. 205 body = SCRUB_WHITESPACE_RE.sub(r' ', body) 206 # Expand the tabs used for indentation. 207 body = string.expandtabs(body, 2) 208 # Strip trailing whitespace. 209 body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r'', body) 210 return body 211 212def do_scrub(body, scrubber, scrubber_args, extra): 213 if scrubber_args: 214 local_args = copy.deepcopy(scrubber_args) 215 local_args[0].extra_scrub = extra 216 return scrubber(body, *local_args) 217 return scrubber(body, *scrubber_args) 218 219# Build up a dictionary of all the function bodies. 220class function_body(object): 221 def __init__(self, string, extra, args_and_sig): 222 self.scrub = string 223 self.extrascrub = extra 224 self.args_and_sig = args_and_sig 225 def is_same_except_arg_names(self, extrascrub, args_and_sig): 226 arg_names = set() 227 def drop_arg_names(match): 228 arg_names.add(match.group(2)) 229 return match.group(1) + match.group(3) 230 def repl_arg_names(match): 231 if match.group(2) in arg_names: 232 return match.group(1) + match.group(3) 233 return match.group(1) + match.group(2) + match.group(3) 234 ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig) 235 ans1 = IR_VALUE_RE.sub(drop_arg_names, args_and_sig) 236 if ans0 != ans1: 237 return False 238 es0 = IR_VALUE_RE.sub(repl_arg_names, self.extrascrub) 239 es1 = IR_VALUE_RE.sub(repl_arg_names, extrascrub) 240 es0 = SCRUB_IR_COMMENT_RE.sub(r'', es0) 241 es1 = SCRUB_IR_COMMENT_RE.sub(r'', es1) 242 return es0 == es1 243 244 def __str__(self): 245 return self.scrub 246 247def build_function_body_dictionary(function_re, scrubber, scrubber_args, raw_tool_output, prefixes, func_dict, verbose, record_args): 248 for m in function_re.finditer(raw_tool_output): 249 if not m: 250 continue 251 func = m.group('func') 252 body = m.group('body') 253 # Determine if we print arguments, the opening brace, or nothing after the function name 254 if record_args and 'args_and_sig' in m.groupdict(): 255 args_and_sig = scrub_body(m.group('args_and_sig').strip()) 256 elif 'args_and_sig' in m.groupdict(): 257 args_and_sig = '(' 258 else: 259 args_and_sig = '' 260 scrubbed_body = do_scrub(body, scrubber, scrubber_args, extra = False) 261 scrubbed_extra = do_scrub(body, scrubber, scrubber_args, extra = True) 262 if 'analysis' in m.groupdict(): 263 analysis = m.group('analysis') 264 if analysis.lower() != 'cost model analysis': 265 warn('Unsupported analysis mode: %r!' % (analysis,)) 266 if func.startswith('stress'): 267 # We only use the last line of the function body for stress tests. 268 scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:]) 269 if verbose: 270 print('Processing function: ' + func, file=sys.stderr) 271 for l in scrubbed_body.splitlines(): 272 print(' ' + l, file=sys.stderr) 273 for prefix in prefixes: 274 if func in func_dict[prefix] and (str(func_dict[prefix][func]) != scrubbed_body or (func_dict[prefix][func] and func_dict[prefix][func].args_and_sig != args_and_sig)): 275 if func_dict[prefix][func] and func_dict[prefix][func].is_same_except_arg_names(scrubbed_extra, args_and_sig): 276 func_dict[prefix][func].scrub = scrubbed_extra 277 func_dict[prefix][func].args_and_sig = args_and_sig 278 continue 279 else: 280 if prefix == prefixes[-1]: 281 warn('Found conflicting asm under the same prefix: %r!' % (prefix,)) 282 else: 283 func_dict[prefix][func] = None 284 continue 285 286 func_dict[prefix][func] = function_body(scrubbed_body, scrubbed_extra, args_and_sig) 287 288##### Generator of LLVM IR CHECK lines 289 290SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*') 291 292# Match things that look at identifiers, but only if they are followed by 293# spaces, commas, paren, or end of the string 294IR_VALUE_RE = re.compile(r'(\s+)%([\w.-]+?)([,\s\(\)]|\Z)') 295 296NAMELESS_PREFIX = "TMP" 297 298# Create a FileCheck variable name based on an IR name. 299def get_value_name(var): 300 if var.isdigit(): 301 var = NAMELESS_PREFIX + var 302 var = var.replace('.', '_') 303 var = var.replace('-', '_') 304 return var.upper() 305 306 307# Create a FileCheck variable from regex. 308def get_value_definition(var): 309 return '[[' + get_value_name(var) + ':%.*]]' 310 311 312# Use a FileCheck variable. 313def get_value_use(var): 314 return '[[' + get_value_name(var) + ']]' 315 316# Replace IR value defs and uses with FileCheck variables. 317def genericize_check_lines(lines, is_analyze, vars_seen): 318 # This gets called for each match that occurs in 319 # a line. We transform variables we haven't seen 320 # into defs, and variables we have seen into uses. 321 def transform_line_vars(match): 322 var = match.group(2) 323 if NAMELESS_PREFIX.lower() in var.lower(): 324 warn("Change IR value name '%s' to prevent possible conflict with scripted FileCheck name." % (var,)) 325 if var in vars_seen: 326 rv = get_value_use(var) 327 else: 328 vars_seen.add(var) 329 rv = get_value_definition(var) 330 # re.sub replaces the entire regex match 331 # with whatever you return, so we have 332 # to make sure to hand it back everything 333 # including the commas and spaces. 334 return match.group(1) + rv + match.group(3) 335 336 lines_with_def = [] 337 338 for i, line in enumerate(lines): 339 # An IR variable named '%.' matches the FileCheck regex string. 340 line = line.replace('%.', '%dot') 341 # Ignore any comments, since the check lines will too. 342 scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line) 343 if is_analyze: 344 lines[i] = scrubbed_line 345 else: 346 lines[i] = IR_VALUE_RE.sub(transform_line_vars, scrubbed_line) 347 return lines 348 349 350def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, is_asm, is_analyze): 351 # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well. 352 prefix_exclusions = set() 353 printed_prefixes = [] 354 for p in prefix_list: 355 checkprefixes = p[0] 356 # If not all checkprefixes of this run line produced the function we cannot check for it as it does not 357 # exist for this run line. A subset of the check prefixes might know about the function but only because 358 # other run lines created it. 359 if any(map(lambda checkprefix: func_name not in func_dict[checkprefix], checkprefixes)): 360 prefix_exclusions |= set(checkprefixes) 361 continue 362 363 # prefix_exclusions is constructed, we can now emit the output 364 for p in prefix_list: 365 checkprefixes = p[0] 366 for checkprefix in checkprefixes: 367 if checkprefix in printed_prefixes: 368 break 369 370 # Check if the prefix is excluded. 371 if checkprefix in prefix_exclusions: 372 continue 373 374 # If we do not have output for this prefix we skip it. 375 if not func_dict[checkprefix][func_name]: 376 continue 377 378 # Add some space between different check prefixes, but not after the last 379 # check line (before the test code). 380 if is_asm: 381 if len(printed_prefixes) != 0: 382 output_lines.append(comment_marker) 383 384 vars_seen = set() 385 printed_prefixes.append(checkprefix) 386 args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig) 387 args_and_sig = genericize_check_lines([args_and_sig], is_analyze, vars_seen)[0] 388 if '[[' in args_and_sig: 389 output_lines.append(check_label_format % (checkprefix, func_name, '')) 390 output_lines.append('%s %s-SAME: %s' % (comment_marker, checkprefix, args_and_sig)) 391 else: 392 output_lines.append(check_label_format % (checkprefix, func_name, args_and_sig)) 393 func_body = str(func_dict[checkprefix][func_name]).splitlines() 394 395 # For ASM output, just emit the check lines. 396 if is_asm: 397 output_lines.append('%s %s: %s' % (comment_marker, checkprefix, func_body[0])) 398 for func_line in func_body[1:]: 399 if func_line.strip() == '': 400 output_lines.append('%s %s-EMPTY:' % (comment_marker, checkprefix)) 401 else: 402 output_lines.append('%s %s-NEXT: %s' % (comment_marker, checkprefix, func_line)) 403 break 404 405 # For IR output, change all defs to FileCheck variables, so we're immune 406 # to variable naming fashions. 407 func_body = genericize_check_lines(func_body, is_analyze, vars_seen) 408 409 # This could be selectively enabled with an optional invocation argument. 410 # Disabled for now: better to check everything. Be safe rather than sorry. 411 412 # Handle the first line of the function body as a special case because 413 # it's often just noise (a useless asm comment or entry label). 414 #if func_body[0].startswith("#") or func_body[0].startswith("entry:"): 415 # is_blank_line = True 416 #else: 417 # output_lines.append('%s %s: %s' % (comment_marker, checkprefix, func_body[0])) 418 # is_blank_line = False 419 420 is_blank_line = False 421 422 for func_line in func_body: 423 if func_line.strip() == '': 424 is_blank_line = True 425 continue 426 # Do not waste time checking IR comments. 427 func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line) 428 429 # Skip blank lines instead of checking them. 430 if is_blank_line: 431 output_lines.append('{} {}: {}'.format( 432 comment_marker, checkprefix, func_line)) 433 else: 434 output_lines.append('{} {}-NEXT: {}'.format( 435 comment_marker, checkprefix, func_line)) 436 is_blank_line = False 437 438 # Add space between different check prefixes and also before the first 439 # line of code in the test function. 440 output_lines.append(comment_marker) 441 break 442 443def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict, 444 func_name, preserve_names, function_sig): 445 # Label format is based on IR string. 446 function_def_regex = 'define {{[^@]+}}' if function_sig else '' 447 check_label_format = '{} %s-LABEL: {}@%s%s'.format(comment_marker, function_def_regex) 448 add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, 449 check_label_format, False, preserve_names) 450 451def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name): 452 check_label_format = '{} %s-LABEL: \'%s%s\''.format(comment_marker) 453 add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, False, True) 454 455 456def check_prefix(prefix): 457 if not PREFIX_RE.match(prefix): 458 hint = "" 459 if ',' in prefix: 460 hint = " Did you mean '--check-prefixes=" + prefix + "'?" 461 warn(("Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." + hint) % 462 (prefix)) 463 464 465def verify_filecheck_prefixes(fc_cmd): 466 fc_cmd_parts = fc_cmd.split() 467 for part in fc_cmd_parts: 468 if "check-prefix=" in part: 469 prefix = part.split('=', 1)[1] 470 check_prefix(prefix) 471 elif "check-prefixes=" in part: 472 prefixes = part.split('=', 1)[1].split(',') 473 for prefix in prefixes: 474 check_prefix(prefix) 475 if prefixes.count(prefix) > 1: 476 warn("Supplied prefix '%s' is not unique in the prefix list." % (prefix,)) 477 478 479def get_autogennote_suffix(parser, args): 480 autogenerated_note_args = '' 481 for action in parser._actions: 482 if not hasattr(args, action.dest): 483 continue # Ignore options such as --help that aren't included in args 484 # Ignore parameters such as paths to the binary or the list of tests 485 if action.dest in ('tests', 'update_only', 'opt_binary', 'llc_binary', 486 'clang', 'opt', 'llvm_bin', 'verbose'): 487 continue 488 value = getattr(args, action.dest) 489 if action.const is not None: # action stores a constant (usually True/False) 490 # Skip actions with different constant values (this happens with boolean 491 # --foo/--no-foo options) 492 if value != action.const: 493 continue 494 if parser.get_default(action.dest) == value: 495 continue # Don't add default values 496 autogenerated_note_args += action.option_strings[0] + ' ' 497 if action.const is None: # action takes a parameter 498 autogenerated_note_args += '%s ' % value 499 if autogenerated_note_args: 500 autogenerated_note_args = ' %s %s' % (UTC_ARGS_KEY, autogenerated_note_args[:-1]) 501 return autogenerated_note_args 502 503 504def check_for_command(line, parser, args, argv): 505 cmd_m = UTC_ARGS_CMD.match(line) 506 if cmd_m: 507 cmd = cmd_m.group('cmd').strip().split(' ') 508 argv = argv + cmd 509 args = parser.parse_args(filter(lambda arg: arg not in args.tests, argv)) 510 return args, argv 511