1from __future__ import print_function 2 3import copy 4import glob 5import re 6import subprocess 7import sys 8 9if sys.version_info[0] > 2: 10 class string: 11 expandtabs = str.expandtabs 12else: 13 import string 14 15##### Common utilities for update_*test_checks.py 16 17 18_verbose = False 19 20def parse_commandline_args(parser): 21 parser.add_argument('-v', '--verbose', action='store_true', 22 help='Show verbose output') 23 parser.add_argument('-u', '--update-only', action='store_true', 24 help='Only update test if it was already autogened') 25 parser.add_argument('--force-update', action='store_true', 26 help='Update test even if it was autogened by a different script') 27 parser.add_argument('--enable', action='store_true', dest='enabled', default=True, 28 help='Activate CHECK line generation from this point forward') 29 parser.add_argument('--disable', action='store_false', dest='enabled', 30 help='Deactivate CHECK line generation from this point forward') 31 args = parser.parse_args() 32 global _verbose 33 _verbose = args.verbose 34 return args 35 36 37class InputLineInfo(object): 38 def __init__(self, line, line_number, args, argv): 39 self.line = line 40 self.line_number = line_number 41 self.args = args 42 self.argv = argv 43 44 45class TestInfo(object): 46 def __init__(self, test, parser, script_name, input_lines, args, argv, 47 comment_prefix): 48 self.parser = parser 49 self.path = test 50 self.args = args 51 self.argv = argv 52 self.input_lines = input_lines 53 self.run_lines = find_run_lines(test, self.input_lines) 54 self.comment_prefix = comment_prefix 55 if self.comment_prefix is None: 56 if self.path.endswith('.mir'): 57 self.comment_prefix = '#' 58 else: 59 self.comment_prefix = ';' 60 self.autogenerated_note_prefix = self.comment_prefix + ' ' + UTC_ADVERT 61 self.test_autogenerated_note = self.autogenerated_note_prefix + script_name 62 self.test_autogenerated_note += get_autogennote_suffix(parser, self.args) 63 64 def iterlines(self, output_lines): 65 output_lines.append(self.test_autogenerated_note) 66 for line_num, input_line in enumerate(self.input_lines): 67 # Discard any previous script advertising. 68 if input_line.startswith(self.autogenerated_note_prefix): 69 continue 70 self.args, self.argv = check_for_command(input_line, self.parser, 71 self.args, self.argv) 72 if not self.args.enabled: 73 output_lines.append(input_line) 74 continue 75 yield InputLineInfo(input_line, line_num, self.args, self.argv) 76 77 78def itertests(test_patterns, parser, script_name, comment_prefix=None): 79 for pattern in test_patterns: 80 # On Windows we must expand the patterns ourselves. 81 tests_list = glob.glob(pattern) 82 if not tests_list: 83 warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,)) 84 continue 85 for test in tests_list: 86 with open(test) as f: 87 input_lines = [l.rstrip() for l in f] 88 args = parser.parse_args() 89 argv = sys.argv[:] 90 first_line = input_lines[0] if input_lines else "" 91 if UTC_ADVERT in first_line: 92 if script_name not in first_line and not args.force_update: 93 warn("Skipping test which wasn't autogenerated by " + script_name, test) 94 continue 95 args, argv = check_for_command(first_line, parser, args, argv) 96 elif args.update_only: 97 assert UTC_ADVERT not in first_line 98 warn("Skipping test which isn't autogenerated: " + test) 99 continue 100 yield TestInfo(test, parser, script_name, input_lines, args, argv, 101 comment_prefix) 102 103 104def should_add_line_to_output(input_line, prefix_set): 105 # Skip any blank comment lines in the IR. 106 if input_line.strip() == ';': 107 return False 108 # Skip any blank lines in the IR. 109 #if input_line.strip() == '': 110 # return False 111 # And skip any CHECK lines. We're building our own. 112 m = CHECK_RE.match(input_line) 113 if m and m.group(1) in prefix_set: 114 return False 115 116 return True 117 118# Invoke the tool that is being tested. 119def invoke_tool(exe, cmd_args, ir): 120 with open(ir) as ir_file: 121 # TODO Remove the str form which is used by update_test_checks.py and 122 # update_llc_test_checks.py 123 # The safer list form is used by update_cc_test_checks.py 124 if isinstance(cmd_args, list): 125 stdout = subprocess.check_output([exe] + cmd_args, stdin=ir_file) 126 else: 127 stdout = subprocess.check_output(exe + ' ' + cmd_args, 128 shell=True, stdin=ir_file) 129 if sys.version_info[0] > 2: 130 stdout = stdout.decode() 131 # Fix line endings to unix CR style. 132 return stdout.replace('\r\n', '\n') 133 134##### LLVM IR parser 135RUN_LINE_RE = re.compile(r'^\s*(?://|[;#])\s*RUN:\s*(.*)$') 136CHECK_PREFIX_RE = re.compile(r'--?check-prefix(?:es)?[= ](\S+)') 137PREFIX_RE = re.compile('^[a-zA-Z0-9_-]+$') 138CHECK_RE = re.compile(r'^\s*(?://|[;#])\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME|-EMPTY)?:') 139 140UTC_ARGS_KEY = 'UTC_ARGS:' 141UTC_ARGS_CMD = re.compile(r'.*' + UTC_ARGS_KEY + '\s*(?P<cmd>.*)\s*$') 142UTC_ADVERT = 'NOTE: Assertions have been autogenerated by ' 143 144OPT_FUNCTION_RE = re.compile( 145 r'^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s]+?))?\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w.-]+?)\s*' 146 r'(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*)\{\n(?P<body>.*?)^\}$', 147 flags=(re.M | re.S)) 148 149ANALYZE_FUNCTION_RE = re.compile( 150 r'^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.-]+?)\':' 151 r'\s*\n(?P<body>.*)$', 152 flags=(re.X | re.S)) 153 154IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@([\w.-]+)\s*\(') 155TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$') 156TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)') 157MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)') 158 159SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)') 160SCRUB_WHITESPACE_RE = re.compile(r'(?!^(| \w))[ \t]+', flags=re.M) 161SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M) 162SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE 163SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(r'([ \t]|(#[0-9]+))+$', flags=re.M) 164SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n') 165SCRUB_LOOP_COMMENT_RE = re.compile( 166 r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M) 167SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r'(?<=\S)+[ \t]*#$', flags=re.M) 168 169 170def error(msg, test_file=None): 171 if test_file: 172 msg = '{}: {}'.format(msg, test_file) 173 print('ERROR: {}'.format(msg), file=sys.stderr) 174 175def warn(msg, test_file=None): 176 if test_file: 177 msg = '{}: {}'.format(msg, test_file) 178 print('WARNING: {}'.format(msg), file=sys.stderr) 179 180def debug(*args, **kwargs): 181 # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs): 182 if 'file' not in kwargs: 183 kwargs['file'] = sys.stderr 184 if _verbose: 185 print(*args, **kwargs) 186 187def find_run_lines(test, lines): 188 debug('Scanning for RUN lines in test file:', test) 189 raw_lines = [m.group(1) 190 for m in [RUN_LINE_RE.match(l) for l in lines] if m] 191 run_lines = [raw_lines[0]] if len(raw_lines) > 0 else [] 192 for l in raw_lines[1:]: 193 if run_lines[-1].endswith('\\'): 194 run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l 195 else: 196 run_lines.append(l) 197 debug('Found {} RUN lines in {}:'.format(len(run_lines), test)) 198 for l in run_lines: 199 debug(' RUN: {}'.format(l)) 200 return run_lines 201 202def scrub_body(body): 203 # Scrub runs of whitespace out of the assembly, but leave the leading 204 # whitespace in place. 205 body = SCRUB_WHITESPACE_RE.sub(r' ', body) 206 # Expand the tabs used for indentation. 207 body = string.expandtabs(body, 2) 208 # Strip trailing whitespace. 209 body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r'', body) 210 return body 211 212def do_scrub(body, scrubber, scrubber_args, extra): 213 if scrubber_args: 214 local_args = copy.deepcopy(scrubber_args) 215 local_args[0].extra_scrub = extra 216 return scrubber(body, *local_args) 217 return scrubber(body, *scrubber_args) 218 219# Build up a dictionary of all the function bodies. 220class function_body(object): 221 def __init__(self, string, extra, args_and_sig, attrs): 222 self.scrub = string 223 self.extrascrub = extra 224 self.args_and_sig = args_and_sig 225 self.attrs = attrs 226 def is_same_except_arg_names(self, extrascrub, args_and_sig, attrs): 227 arg_names = set() 228 def drop_arg_names(match): 229 arg_names.add(match.group(2)) 230 return match.group(1) + match.group(3) 231 def repl_arg_names(match): 232 if match.group(2) in arg_names: 233 return match.group(1) + match.group(3) 234 return match.group(1) + match.group(2) + match.group(3) 235 if self.attrs != attrs: 236 return False 237 ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig) 238 ans1 = IR_VALUE_RE.sub(drop_arg_names, args_and_sig) 239 if ans0 != ans1: 240 return False 241 es0 = IR_VALUE_RE.sub(repl_arg_names, self.extrascrub) 242 es1 = IR_VALUE_RE.sub(repl_arg_names, extrascrub) 243 es0 = SCRUB_IR_COMMENT_RE.sub(r'', es0) 244 es1 = SCRUB_IR_COMMENT_RE.sub(r'', es1) 245 return es0 == es1 246 247 def __str__(self): 248 return self.scrub 249 250def build_function_body_dictionary(function_re, scrubber, scrubber_args, raw_tool_output, prefixes, func_dict, verbose, record_args, check_attributes): 251 for m in function_re.finditer(raw_tool_output): 252 if not m: 253 continue 254 func = m.group('func') 255 body = m.group('body') 256 attrs = m.group('attrs') if check_attributes else '' 257 # Determine if we print arguments, the opening brace, or nothing after the function name 258 if record_args and 'args_and_sig' in m.groupdict(): 259 args_and_sig = scrub_body(m.group('args_and_sig').strip()) 260 elif 'args_and_sig' in m.groupdict(): 261 args_and_sig = '(' 262 else: 263 args_and_sig = '' 264 scrubbed_body = do_scrub(body, scrubber, scrubber_args, extra = False) 265 scrubbed_extra = do_scrub(body, scrubber, scrubber_args, extra = True) 266 if 'analysis' in m.groupdict(): 267 analysis = m.group('analysis') 268 if analysis.lower() != 'cost model analysis': 269 warn('Unsupported analysis mode: %r!' % (analysis,)) 270 if func.startswith('stress'): 271 # We only use the last line of the function body for stress tests. 272 scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:]) 273 if verbose: 274 print('Processing function: ' + func, file=sys.stderr) 275 for l in scrubbed_body.splitlines(): 276 print(' ' + l, file=sys.stderr) 277 for prefix in prefixes: 278 if func in func_dict[prefix]: 279 if str(func_dict[prefix][func]) != scrubbed_body or (func_dict[prefix][func] and (func_dict[prefix][func].args_and_sig != args_and_sig or func_dict[prefix][func].attrs != attrs)): 280 if func_dict[prefix][func] and func_dict[prefix][func].is_same_except_arg_names(scrubbed_extra, args_and_sig, attrs): 281 func_dict[prefix][func].scrub = scrubbed_extra 282 func_dict[prefix][func].args_and_sig = args_and_sig 283 continue 284 else: 285 if prefix == prefixes[-1]: 286 warn('Found conflicting asm under the same prefix: %r!' % (prefix,)) 287 else: 288 func_dict[prefix][func] = None 289 continue 290 291 func_dict[prefix][func] = function_body(scrubbed_body, scrubbed_extra, args_and_sig, attrs) 292 293##### Generator of LLVM IR CHECK lines 294 295SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*') 296 297# Match things that look at identifiers, but only if they are followed by 298# spaces, commas, paren, or end of the string 299IR_VALUE_RE = re.compile(r'(\s+)%([\w.-]+?)([,\s\(\)]|\Z)') 300 301NAMELESS_PREFIX = "TMP" 302 303# Create a FileCheck variable name based on an IR name. 304def get_value_name(var): 305 if var.isdigit(): 306 var = NAMELESS_PREFIX + var 307 var = var.replace('.', '_') 308 var = var.replace('-', '_') 309 return var.upper() 310 311 312# Create a FileCheck variable from regex. 313def get_value_definition(var): 314 return '[[' + get_value_name(var) + ':%.*]]' 315 316 317# Use a FileCheck variable. 318def get_value_use(var): 319 return '[[' + get_value_name(var) + ']]' 320 321# Replace IR value defs and uses with FileCheck variables. 322def genericize_check_lines(lines, is_analyze, vars_seen): 323 # This gets called for each match that occurs in 324 # a line. We transform variables we haven't seen 325 # into defs, and variables we have seen into uses. 326 def transform_line_vars(match): 327 var = match.group(2) 328 if NAMELESS_PREFIX.lower() in var.lower(): 329 warn("Change IR value name '%s' to prevent possible conflict with scripted FileCheck name." % (var,)) 330 if var in vars_seen: 331 rv = get_value_use(var) 332 else: 333 vars_seen.add(var) 334 rv = get_value_definition(var) 335 # re.sub replaces the entire regex match 336 # with whatever you return, so we have 337 # to make sure to hand it back everything 338 # including the commas and spaces. 339 return match.group(1) + rv + match.group(3) 340 341 lines_with_def = [] 342 343 for i, line in enumerate(lines): 344 # An IR variable named '%.' matches the FileCheck regex string. 345 line = line.replace('%.', '%dot') 346 # Ignore any comments, since the check lines will too. 347 scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line) 348 if is_analyze: 349 lines[i] = scrubbed_line 350 else: 351 lines[i] = IR_VALUE_RE.sub(transform_line_vars, scrubbed_line) 352 return lines 353 354 355def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, is_asm, is_analyze): 356 # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well. 357 prefix_exclusions = set() 358 printed_prefixes = [] 359 for p in prefix_list: 360 checkprefixes = p[0] 361 # If not all checkprefixes of this run line produced the function we cannot check for it as it does not 362 # exist for this run line. A subset of the check prefixes might know about the function but only because 363 # other run lines created it. 364 if any(map(lambda checkprefix: func_name not in func_dict[checkprefix], checkprefixes)): 365 prefix_exclusions |= set(checkprefixes) 366 continue 367 368 # prefix_exclusions is constructed, we can now emit the output 369 for p in prefix_list: 370 checkprefixes = p[0] 371 for checkprefix in checkprefixes: 372 if checkprefix in printed_prefixes: 373 break 374 375 # Check if the prefix is excluded. 376 if checkprefix in prefix_exclusions: 377 continue 378 379 # If we do not have output for this prefix we skip it. 380 if not func_dict[checkprefix][func_name]: 381 continue 382 383 # Add some space between different check prefixes, but not after the last 384 # check line (before the test code). 385 if is_asm: 386 if len(printed_prefixes) != 0: 387 output_lines.append(comment_marker) 388 389 vars_seen = set() 390 printed_prefixes.append(checkprefix) 391 attrs = str(func_dict[checkprefix][func_name].attrs) 392 attrs = '' if attrs == 'None' else attrs 393 if attrs: 394 output_lines.append('%s %s: Function Attrs: %s;' % (comment_marker, checkprefix, attrs)) 395 args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig) 396 args_and_sig = genericize_check_lines([args_and_sig], is_analyze, vars_seen)[0] 397 if '[[' in args_and_sig: 398 output_lines.append(check_label_format % (checkprefix, func_name, '')) 399 output_lines.append('%s %s-SAME: %s' % (comment_marker, checkprefix, args_and_sig)) 400 else: 401 output_lines.append(check_label_format % (checkprefix, func_name, args_and_sig)) 402 func_body = str(func_dict[checkprefix][func_name]).splitlines() 403 404 # For ASM output, just emit the check lines. 405 if is_asm: 406 output_lines.append('%s %s: %s' % (comment_marker, checkprefix, func_body[0])) 407 for func_line in func_body[1:]: 408 if func_line.strip() == '': 409 output_lines.append('%s %s-EMPTY:' % (comment_marker, checkprefix)) 410 else: 411 output_lines.append('%s %s-NEXT: %s' % (comment_marker, checkprefix, func_line)) 412 break 413 414 # For IR output, change all defs to FileCheck variables, so we're immune 415 # to variable naming fashions. 416 func_body = genericize_check_lines(func_body, is_analyze, vars_seen) 417 418 # This could be selectively enabled with an optional invocation argument. 419 # Disabled for now: better to check everything. Be safe rather than sorry. 420 421 # Handle the first line of the function body as a special case because 422 # it's often just noise (a useless asm comment or entry label). 423 #if func_body[0].startswith("#") or func_body[0].startswith("entry:"): 424 # is_blank_line = True 425 #else: 426 # output_lines.append('%s %s: %s' % (comment_marker, checkprefix, func_body[0])) 427 # is_blank_line = False 428 429 is_blank_line = False 430 431 for func_line in func_body: 432 if func_line.strip() == '': 433 is_blank_line = True 434 continue 435 # Do not waste time checking IR comments. 436 func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line) 437 438 # Skip blank lines instead of checking them. 439 if is_blank_line: 440 output_lines.append('{} {}: {}'.format( 441 comment_marker, checkprefix, func_line)) 442 else: 443 output_lines.append('{} {}-NEXT: {}'.format( 444 comment_marker, checkprefix, func_line)) 445 is_blank_line = False 446 447 # Add space between different check prefixes and also before the first 448 # line of code in the test function. 449 output_lines.append(comment_marker) 450 break 451 452def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict, 453 func_name, preserve_names, function_sig): 454 # Label format is based on IR string. 455 function_def_regex = 'define {{[^@]+}}' if function_sig else '' 456 check_label_format = '{} %s-LABEL: {}@%s%s'.format(comment_marker, function_def_regex) 457 add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, 458 check_label_format, False, preserve_names) 459 460def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name): 461 check_label_format = '{} %s-LABEL: \'%s%s\''.format(comment_marker) 462 add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, False, True) 463 464 465def check_prefix(prefix): 466 if not PREFIX_RE.match(prefix): 467 hint = "" 468 if ',' in prefix: 469 hint = " Did you mean '--check-prefixes=" + prefix + "'?" 470 warn(("Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." + hint) % 471 (prefix)) 472 473 474def verify_filecheck_prefixes(fc_cmd): 475 fc_cmd_parts = fc_cmd.split() 476 for part in fc_cmd_parts: 477 if "check-prefix=" in part: 478 prefix = part.split('=', 1)[1] 479 check_prefix(prefix) 480 elif "check-prefixes=" in part: 481 prefixes = part.split('=', 1)[1].split(',') 482 for prefix in prefixes: 483 check_prefix(prefix) 484 if prefixes.count(prefix) > 1: 485 warn("Supplied prefix '%s' is not unique in the prefix list." % (prefix,)) 486 487 488def get_autogennote_suffix(parser, args): 489 autogenerated_note_args = '' 490 for action in parser._actions: 491 if not hasattr(args, action.dest): 492 continue # Ignore options such as --help that aren't included in args 493 # Ignore parameters such as paths to the binary or the list of tests 494 if action.dest in ('tests', 'update_only', 'opt_binary', 'llc_binary', 495 'clang', 'opt', 'llvm_bin', 'verbose'): 496 continue 497 value = getattr(args, action.dest) 498 if action.const is not None: # action stores a constant (usually True/False) 499 # Skip actions with different constant values (this happens with boolean 500 # --foo/--no-foo options) 501 if value != action.const: 502 continue 503 if parser.get_default(action.dest) == value: 504 continue # Don't add default values 505 autogenerated_note_args += action.option_strings[0] + ' ' 506 if action.const is None: # action takes a parameter 507 autogenerated_note_args += '%s ' % value 508 if autogenerated_note_args: 509 autogenerated_note_args = ' %s %s' % (UTC_ARGS_KEY, autogenerated_note_args[:-1]) 510 return autogenerated_note_args 511 512 513def check_for_command(line, parser, args, argv): 514 cmd_m = UTC_ARGS_CMD.match(line) 515 if cmd_m: 516 cmd = cmd_m.group('cmd').strip().split(' ') 517 argv = argv + cmd 518 args = parser.parse_args(filter(lambda arg: arg not in args.tests, argv)) 519 return args, argv 520