1from __future__ import print_function 2 3import argparse 4import bisect 5import collections 6import copy 7import glob 8import itertools 9import os 10import re 11import subprocess 12import sys 13import shlex 14 15from typing import List, Mapping, Set 16 17##### Common utilities for update_*test_checks.py 18 19 20_verbose = False 21_prefix_filecheck_ir_name = "" 22 23""" 24Version changelog: 25 261: Initial version, used by tests that don't specify --version explicitly. 272: --function-signature is now enabled by default and also checks return 28 type/attributes. 293: Opening parenthesis of function args is kept on the first LABEL line 30 in case arguments are split to a separate SAME line. 314: --check-globals now has a third option ('smart'). The others are now called 32 'none' and 'all'. 'smart' is the default. 33""" 34DEFAULT_VERSION = 4 35 36 37SUPPORTED_ANALYSES = { 38 "Branch Probability Analysis", 39 "Cost Model Analysis", 40 "Loop Access Analysis", 41 "Scalar Evolution Analysis", 42} 43 44 45class Regex(object): 46 """Wrap a compiled regular expression object to allow deep copy of a regexp. 47 This is required for the deep copy done in do_scrub. 48 49 """ 50 51 def __init__(self, regex): 52 self.regex = regex 53 54 def __deepcopy__(self, memo): 55 result = copy.copy(self) 56 result.regex = self.regex 57 return result 58 59 def search(self, line): 60 return self.regex.search(line) 61 62 def sub(self, repl, line): 63 return self.regex.sub(repl, line) 64 65 def pattern(self): 66 return self.regex.pattern 67 68 def flags(self): 69 return self.regex.flags 70 71 72class Filter(Regex): 73 """Augment a Regex object with a flag indicating whether a match should be 74 added (!is_filter_out) or removed (is_filter_out) from the generated checks. 75 76 """ 77 78 def __init__(self, regex, is_filter_out): 79 super(Filter, self).__init__(regex) 80 self.is_filter_out = is_filter_out 81 82 def __deepcopy__(self, memo): 83 result = copy.deepcopy(super(Filter, self), memo) 84 result.is_filter_out = copy.deepcopy(self.is_filter_out, memo) 85 return result 86 87 88def parse_commandline_args(parser): 89 class RegexAction(argparse.Action): 90 """Add a regular expression option value to a list of regular expressions. 91 This compiles the expression, wraps it in a Regex and adds it to the option 92 value list.""" 93 94 def __init__(self, option_strings, dest, nargs=None, **kwargs): 95 if nargs is not None: 96 raise ValueError("nargs not allowed") 97 super(RegexAction, self).__init__(option_strings, dest, **kwargs) 98 99 def do_call(self, namespace, values, flags): 100 value_list = getattr(namespace, self.dest) 101 if value_list is None: 102 value_list = [] 103 104 try: 105 value_list.append(Regex(re.compile(values, flags))) 106 except re.error as error: 107 raise ValueError( 108 "{}: Invalid regular expression '{}' ({})".format( 109 option_string, error.pattern, error.msg 110 ) 111 ) 112 113 setattr(namespace, self.dest, value_list) 114 115 def __call__(self, parser, namespace, values, option_string=None): 116 self.do_call(namespace, values, 0) 117 118 class FilterAction(RegexAction): 119 """Add a filter to a list of filter option values.""" 120 121 def __init__(self, option_strings, dest, nargs=None, **kwargs): 122 super(FilterAction, self).__init__(option_strings, dest, nargs, **kwargs) 123 124 def __call__(self, parser, namespace, values, option_string=None): 125 super(FilterAction, self).__call__(parser, namespace, values, option_string) 126 127 value_list = getattr(namespace, self.dest) 128 129 is_filter_out = option_string == "--filter-out" 130 131 value_list[-1] = Filter(value_list[-1].regex, is_filter_out) 132 133 setattr(namespace, self.dest, value_list) 134 135 filter_group = parser.add_argument_group( 136 "filtering", 137 """Filters are applied to each output line according to the order given. The 138 first matching filter terminates filter processing for that current line.""", 139 ) 140 141 filter_group.add_argument( 142 "--filter", 143 action=FilterAction, 144 dest="filters", 145 metavar="REGEX", 146 help="Only include lines matching REGEX (may be specified multiple times)", 147 ) 148 filter_group.add_argument( 149 "--filter-out", 150 action=FilterAction, 151 dest="filters", 152 metavar="REGEX", 153 help="Exclude lines matching REGEX", 154 ) 155 156 parser.add_argument( 157 "--include-generated-funcs", 158 action="store_true", 159 help="Output checks for functions not in source", 160 ) 161 parser.add_argument( 162 "-v", "--verbose", action="store_true", help="Show verbose output" 163 ) 164 parser.add_argument( 165 "-u", 166 "--update-only", 167 action="store_true", 168 help="Only update test if it was already autogened", 169 ) 170 parser.add_argument( 171 "--force-update", 172 action="store_true", 173 help="Update test even if it was autogened by a different script", 174 ) 175 parser.add_argument( 176 "--enable", 177 action="store_true", 178 dest="enabled", 179 default=True, 180 help="Activate CHECK line generation from this point forward", 181 ) 182 parser.add_argument( 183 "--disable", 184 action="store_false", 185 dest="enabled", 186 help="Deactivate CHECK line generation from this point forward", 187 ) 188 parser.add_argument( 189 "--replace-value-regex", 190 nargs="+", 191 default=[], 192 help="List of regular expressions to replace matching value names", 193 ) 194 parser.add_argument( 195 "--prefix-filecheck-ir-name", 196 default="", 197 help="Add a prefix to FileCheck IR value names to avoid conflicts with scripted names", 198 ) 199 parser.add_argument( 200 "--global-value-regex", 201 nargs="+", 202 default=[], 203 help="List of regular expressions that a global value declaration must match to generate a check (has no effect if checking globals is not enabled)", 204 ) 205 parser.add_argument( 206 "--global-hex-value-regex", 207 nargs="+", 208 default=[], 209 help="List of regular expressions such that, for matching global value declarations, literal integer values should be encoded in hex in the associated FileCheck directives", 210 ) 211 # FIXME: in 3.9, we can use argparse.BooleanOptionalAction. At that point, 212 # we need to rename the flag to just -generate-body-for-unused-prefixes. 213 parser.add_argument( 214 "--no-generate-body-for-unused-prefixes", 215 action="store_false", 216 dest="gen_unused_prefix_body", 217 default=True, 218 help="Generate a function body that always matches for unused prefixes. This is useful when unused prefixes are desired, and it avoids needing to annotate each FileCheck as allowing them.", 219 ) 220 # This is the default when regenerating existing tests. The default when 221 # generating new tests is determined by DEFAULT_VERSION. 222 parser.add_argument( 223 "--version", type=int, default=1, help="The version of output format" 224 ) 225 args = parser.parse_args() 226 # TODO: This should not be handled differently from the other options 227 global _verbose, _global_value_regex, _global_hex_value_regex 228 _verbose = args.verbose 229 _global_value_regex = args.global_value_regex 230 _global_hex_value_regex = args.global_hex_value_regex 231 return args 232 233 234def parse_args(parser, argv): 235 args = parser.parse_args(argv) 236 if args.version >= 2: 237 args.function_signature = True 238 # TODO: This should not be handled differently from the other options 239 global _verbose, _global_value_regex, _global_hex_value_regex 240 _verbose = args.verbose 241 _global_value_regex = args.global_value_regex 242 _global_hex_value_regex = args.global_hex_value_regex 243 if "check_globals" in args and args.check_globals == "default": 244 args.check_globals = "none" if args.version < 4 else "smart" 245 return args 246 247 248class InputLineInfo(object): 249 def __init__(self, line, line_number, args, argv): 250 self.line = line 251 self.line_number = line_number 252 self.args = args 253 self.argv = argv 254 255 256class TestInfo(object): 257 def __init__( 258 self, 259 test, 260 parser, 261 script_name, 262 input_lines, 263 args, 264 argv, 265 comment_prefix, 266 argparse_callback, 267 ): 268 self.parser = parser 269 self.argparse_callback = argparse_callback 270 self.path = test 271 self.args = args 272 if args.prefix_filecheck_ir_name: 273 global _prefix_filecheck_ir_name 274 _prefix_filecheck_ir_name = args.prefix_filecheck_ir_name 275 self.argv = argv 276 self.input_lines = input_lines 277 self.run_lines = find_run_lines(test, self.input_lines) 278 self.comment_prefix = comment_prefix 279 if self.comment_prefix is None: 280 if self.path.endswith(".mir"): 281 self.comment_prefix = "#" 282 else: 283 self.comment_prefix = ";" 284 self.autogenerated_note_prefix = self.comment_prefix + " " + UTC_ADVERT 285 self.test_autogenerated_note = self.autogenerated_note_prefix + script_name 286 self.test_autogenerated_note += get_autogennote_suffix(parser, self.args) 287 self.test_unused_note = ( 288 self.comment_prefix + self.comment_prefix + " " + UNUSED_NOTE 289 ) 290 291 def ro_iterlines(self): 292 for line_num, input_line in enumerate(self.input_lines): 293 args, argv = check_for_command( 294 input_line, self.parser, self.args, self.argv, self.argparse_callback 295 ) 296 yield InputLineInfo(input_line, line_num, args, argv) 297 298 def iterlines(self, output_lines): 299 output_lines.append(self.test_autogenerated_note) 300 for line_info in self.ro_iterlines(): 301 input_line = line_info.line 302 # Discard any previous script advertising. 303 if input_line.startswith(self.autogenerated_note_prefix): 304 continue 305 self.args = line_info.args 306 self.argv = line_info.argv 307 if not self.args.enabled: 308 output_lines.append(input_line) 309 continue 310 yield line_info 311 312 def get_checks_for_unused_prefixes( 313 self, run_list, used_prefixes: List[str] 314 ) -> List[str]: 315 run_list = [element for element in run_list if element[0] is not None] 316 unused_prefixes = set( 317 [prefix for sublist in run_list for prefix in sublist[0]] 318 ).difference(set(used_prefixes)) 319 320 ret = [] 321 if not unused_prefixes: 322 return ret 323 ret.append(self.test_unused_note) 324 for unused in sorted(unused_prefixes): 325 ret.append( 326 "{comment} {prefix}: {match_everything}".format( 327 comment=self.comment_prefix, 328 prefix=unused, 329 match_everything=r"""{{.*}}""", 330 ) 331 ) 332 return ret 333 334 335def itertests( 336 test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None 337): 338 for pattern in test_patterns: 339 # On Windows we must expand the patterns ourselves. 340 tests_list = glob.glob(pattern) 341 if not tests_list: 342 warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,)) 343 continue 344 for test in tests_list: 345 with open(test) as f: 346 input_lines = [l.rstrip() for l in f] 347 first_line = input_lines[0] if input_lines else "" 348 if UTC_AVOID in first_line: 349 warn("Skipping test that must not be autogenerated: " + test) 350 continue 351 is_regenerate = UTC_ADVERT in first_line 352 353 # If we're generating a new test, set the default version to the latest. 354 argv = sys.argv[:] 355 if not is_regenerate: 356 argv.insert(1, "--version=" + str(DEFAULT_VERSION)) 357 358 args = parse_args(parser, argv[1:]) 359 if argparse_callback is not None: 360 argparse_callback(args) 361 if is_regenerate: 362 if script_name not in first_line and not args.force_update: 363 warn( 364 "Skipping test which wasn't autogenerated by " + script_name, 365 test, 366 ) 367 continue 368 args, argv = check_for_command( 369 first_line, parser, args, argv, argparse_callback 370 ) 371 elif args.update_only: 372 assert UTC_ADVERT not in first_line 373 warn("Skipping test which isn't autogenerated: " + test) 374 continue 375 final_input_lines = [] 376 for l in input_lines: 377 if UNUSED_NOTE in l: 378 break 379 final_input_lines.append(l) 380 yield TestInfo( 381 test, 382 parser, 383 script_name, 384 final_input_lines, 385 args, 386 argv, 387 comment_prefix, 388 argparse_callback, 389 ) 390 391 392def should_add_line_to_output( 393 input_line, 394 prefix_set, 395 *, 396 skip_global_checks=False, 397 skip_same_checks=False, 398 comment_marker=";", 399): 400 # Skip any blank comment lines in the IR. 401 if not skip_global_checks and input_line.strip() == comment_marker: 402 return False 403 # Skip a special double comment line we use as a separator. 404 if input_line.strip() == comment_marker + SEPARATOR: 405 return False 406 # Skip any blank lines in the IR. 407 # if input_line.strip() == '': 408 # return False 409 # And skip any CHECK lines. We're building our own. 410 m = CHECK_RE.match(input_line) 411 if m and m.group(1) in prefix_set: 412 if skip_same_checks and CHECK_SAME_RE.match(input_line): 413 # The previous CHECK line was removed, so don't leave this dangling 414 return False 415 if skip_global_checks: 416 # Skip checks only if they are of global value definitions 417 global_ir_value_re = re.compile(r"(\[\[|@)", flags=(re.M)) 418 is_global = global_ir_value_re.search(input_line) 419 return not is_global 420 return False 421 422 return True 423 424 425def collect_original_check_lines(ti: TestInfo, prefix_set: set): 426 """ 427 Collect pre-existing check lines into a dictionary `result` which is 428 returned. 429 430 result[func_name][prefix] is filled with a list of right-hand-sides of check 431 lines. 432 """ 433 result = collections.defaultdict(lambda: {}) 434 435 current_prefix = None 436 current_function = None 437 for input_line_info in ti.ro_iterlines(): 438 input_line = input_line_info.line 439 if input_line.lstrip().startswith(";"): 440 m = CHECK_RE.match(input_line) 441 if m is not None: 442 prefix = m.group(1) 443 check_kind = m.group(2) 444 line = input_line[m.end() :].strip() 445 446 if prefix != current_prefix: 447 current_function = None 448 current_prefix = None 449 450 if check_kind not in ["LABEL", "SAME"]: 451 if current_function is not None: 452 current_function.append(line) 453 continue 454 455 if check_kind == "SAME": 456 continue 457 458 if check_kind == "LABEL": 459 m = IR_FUNCTION_RE.match(line) 460 if m is not None: 461 func_name = m.group(1) 462 if ( 463 ti.args.function is not None 464 and func_name != ti.args.function 465 ): 466 # When filtering on a specific function, skip all others. 467 continue 468 469 current_prefix = prefix 470 current_function = result[func_name][prefix] = [] 471 continue 472 473 current_function = None 474 475 return result 476 477 478# Perform lit-like substitutions 479def getSubstitutions(sourcepath): 480 sourcedir = os.path.dirname(sourcepath) 481 return [ 482 ("%s", sourcepath), 483 ("%S", sourcedir), 484 ("%p", sourcedir), 485 ("%{pathsep}", os.pathsep), 486 ] 487 488 489def applySubstitutions(s, substitutions): 490 for a, b in substitutions: 491 s = s.replace(a, b) 492 return s 493 494 495# Invoke the tool that is being tested. 496def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False): 497 with open(ir) as ir_file: 498 substitutions = getSubstitutions(ir) 499 500 # TODO Remove the str form which is used by update_test_checks.py and 501 # update_llc_test_checks.py 502 # The safer list form is used by update_cc_test_checks.py 503 if preprocess_cmd: 504 # Allow pre-processing the IR file (e.g. using sed): 505 assert isinstance( 506 preprocess_cmd, str 507 ) # TODO: use a list instead of using shell 508 preprocess_cmd = applySubstitutions(preprocess_cmd, substitutions).strip() 509 if verbose: 510 print( 511 "Pre-processing input file: ", 512 ir, 513 " with command '", 514 preprocess_cmd, 515 "'", 516 sep="", 517 file=sys.stderr, 518 ) 519 # Python 2.7 doesn't have subprocess.DEVNULL: 520 with open(os.devnull, "w") as devnull: 521 pp = subprocess.Popen( 522 preprocess_cmd, shell=True, stdin=devnull, stdout=subprocess.PIPE 523 ) 524 ir_file = pp.stdout 525 526 if isinstance(cmd_args, list): 527 args = [applySubstitutions(a, substitutions) for a in cmd_args] 528 stdout = subprocess.check_output([exe] + args, stdin=ir_file) 529 else: 530 stdout = subprocess.check_output( 531 exe + " " + applySubstitutions(cmd_args, substitutions), 532 shell=True, 533 stdin=ir_file, 534 ) 535 if sys.version_info[0] > 2: 536 # FYI, if you crashed here with a decode error, your run line probably 537 # results in bitcode or other binary format being written to the pipe. 538 # For an opt test, you probably want to add -S or -disable-output. 539 stdout = stdout.decode() 540 # Fix line endings to unix CR style. 541 return stdout.replace("\r\n", "\n") 542 543 544##### LLVM IR parser 545RUN_LINE_RE = re.compile(r"^\s*(?://|[;#])\s*RUN:\s*(.*)$") 546CHECK_PREFIX_RE = re.compile(r"--?check-prefix(?:es)?[= ](\S+)") 547PREFIX_RE = re.compile("^[a-zA-Z0-9_-]+$") 548CHECK_RE = re.compile( 549 r"^\s*(?://|[;#])\s*([^:]+?)(?:-(NEXT|NOT|DAG|LABEL|SAME|EMPTY))?:" 550) 551CHECK_SAME_RE = re.compile(r"^\s*(?://|[;#])\s*([^:]+?)(?:-SAME)?:") 552 553UTC_ARGS_KEY = "UTC_ARGS:" 554UTC_ARGS_CMD = re.compile(r".*" + UTC_ARGS_KEY + r"\s*(?P<cmd>.*)\s*$") 555UTC_ADVERT = "NOTE: Assertions have been autogenerated by " 556UTC_AVOID = "NOTE: Do not autogenerate" 557UNUSED_NOTE = "NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:" 558 559OPT_FUNCTION_RE = re.compile( 560 r"^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s():,]+?))?\s*define\s+(?P<funcdef_attrs_and_ret>[^@]*)@(?P<func>[\w.$-]+?)\s*" 561 r"(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P<body>.*?)^\}$", 562 flags=(re.M | re.S), 563) 564 565ANALYZE_FUNCTION_RE = re.compile( 566 r"^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.$-]+?)\':" 567 r"\s*\n(?P<body>.*)$", 568 flags=(re.X | re.S), 569) 570 571LV_DEBUG_RE = re.compile( 572 r"^\s*\'(?P<func>[\w.$-]+?)\'[^\n]*" r"\s*\n(?P<body>.*)$", flags=(re.X | re.S) 573) 574 575IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(') 576TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$') 577TRIPLE_ARG_RE = re.compile(r"-mtriple[= ]([^ ]+)") 578MARCH_ARG_RE = re.compile(r"-march[= ]([^ ]+)") 579DEBUG_ONLY_ARG_RE = re.compile(r"-debug-only[= ]([^ ]+)") 580 581SCRUB_LEADING_WHITESPACE_RE = re.compile(r"^(\s+)") 582SCRUB_WHITESPACE_RE = re.compile(r"(?!^(| \w))[ \t]+", flags=re.M) 583SCRUB_PRESERVE_LEADING_WHITESPACE_RE = re.compile(r"((?!^)[ \t]*(\S))[ \t]+") 584SCRUB_TRAILING_WHITESPACE_RE = re.compile(r"[ \t]+$", flags=re.M) 585SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE 586SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile( 587 r"([ \t]|(#[0-9]+))+$", flags=re.M 588) 589SCRUB_KILL_COMMENT_RE = re.compile(r"^ *#+ +kill:.*\n") 590SCRUB_LOOP_COMMENT_RE = re.compile( 591 r"# =>This Inner Loop Header:.*|# in Loop:.*", flags=re.M 592) 593SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r"(?<=\S)+[ \t]*#$", flags=re.M) 594 595SEPARATOR = "." 596 597 598def error(msg, test_file=None): 599 if test_file: 600 msg = "{}: {}".format(msg, test_file) 601 print("ERROR: {}".format(msg), file=sys.stderr) 602 603 604def warn(msg, test_file=None): 605 if test_file: 606 msg = "{}: {}".format(msg, test_file) 607 print("WARNING: {}".format(msg), file=sys.stderr) 608 609 610def debug(*args, **kwargs): 611 # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs): 612 if "file" not in kwargs: 613 kwargs["file"] = sys.stderr 614 if _verbose: 615 print(*args, **kwargs) 616 617 618def find_run_lines(test, lines): 619 debug("Scanning for RUN lines in test file:", test) 620 raw_lines = [m.group(1) for m in [RUN_LINE_RE.match(l) for l in lines] if m] 621 run_lines = [raw_lines[0]] if len(raw_lines) > 0 else [] 622 for l in raw_lines[1:]: 623 if run_lines[-1].endswith("\\"): 624 run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l 625 else: 626 run_lines.append(l) 627 debug("Found {} RUN lines in {}:".format(len(run_lines), test)) 628 for l in run_lines: 629 debug(" RUN: {}".format(l)) 630 return run_lines 631 632 633def get_triple_from_march(march): 634 triples = { 635 "amdgcn": "amdgcn", 636 "r600": "r600", 637 "mips": "mips", 638 "sparc": "sparc", 639 "hexagon": "hexagon", 640 "ve": "ve", 641 } 642 for prefix, triple in triples.items(): 643 if march.startswith(prefix): 644 return triple 645 print("Cannot find a triple. Assume 'x86'", file=sys.stderr) 646 return "x86" 647 648 649def apply_filters(line, filters): 650 has_filter = False 651 for f in filters: 652 if not f.is_filter_out: 653 has_filter = True 654 if f.search(line): 655 return False if f.is_filter_out else True 656 # If we only used filter-out, keep the line, otherwise discard it since no 657 # filter matched. 658 return False if has_filter else True 659 660 661def do_filter(body, filters): 662 return ( 663 body 664 if not filters 665 else "\n".join( 666 filter(lambda line: apply_filters(line, filters), body.splitlines()) 667 ) 668 ) 669 670 671def scrub_body(body): 672 # Scrub runs of whitespace out of the assembly, but leave the leading 673 # whitespace in place. 674 body = SCRUB_PRESERVE_LEADING_WHITESPACE_RE.sub(lambda m: m.group(2) + " ", body) 675 676 # Expand the tabs used for indentation. 677 body = str.expandtabs(body, 2) 678 # Strip trailing whitespace. 679 body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r"", body) 680 return body 681 682 683def do_scrub(body, scrubber, scrubber_args, extra): 684 if scrubber_args: 685 local_args = copy.deepcopy(scrubber_args) 686 local_args[0].extra_scrub = extra 687 return scrubber(body, *local_args) 688 return scrubber(body, *scrubber_args) 689 690 691# Build up a dictionary of all the function bodies. 692class function_body(object): 693 def __init__( 694 self, 695 string, 696 extra, 697 funcdef_attrs_and_ret, 698 args_and_sig, 699 attrs, 700 func_name_separator, 701 ): 702 self.scrub = string 703 self.extrascrub = extra 704 self.funcdef_attrs_and_ret = funcdef_attrs_and_ret 705 self.args_and_sig = args_and_sig 706 self.attrs = attrs 707 self.func_name_separator = func_name_separator 708 709 def is_same_except_arg_names( 710 self, extrascrub, funcdef_attrs_and_ret, args_and_sig, attrs, is_backend 711 ): 712 arg_names = set() 713 714 def drop_arg_names(match): 715 arg_names.add(match.group(variable_group_in_ir_value_match)) 716 if match.group(attribute_group_in_ir_value_match): 717 attr = match.group(attribute_group_in_ir_value_match) 718 else: 719 attr = "" 720 return match.group(1) + attr + match.group(match.lastindex) 721 722 def repl_arg_names(match): 723 if ( 724 match.group(variable_group_in_ir_value_match) is not None 725 and match.group(variable_group_in_ir_value_match) in arg_names 726 ): 727 return match.group(1) + match.group(match.lastindex) 728 return match.group(1) + match.group(2) + match.group(match.lastindex) 729 730 if self.funcdef_attrs_and_ret != funcdef_attrs_and_ret: 731 return False 732 if self.attrs != attrs: 733 return False 734 ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig) 735 ans1 = IR_VALUE_RE.sub(drop_arg_names, args_and_sig) 736 if ans0 != ans1: 737 return False 738 if is_backend: 739 # Check without replacements, the replacements are not applied to the 740 # body for backend checks. 741 return self.extrascrub == extrascrub 742 743 es0 = IR_VALUE_RE.sub(repl_arg_names, self.extrascrub) 744 es1 = IR_VALUE_RE.sub(repl_arg_names, extrascrub) 745 es0 = SCRUB_IR_COMMENT_RE.sub(r"", es0) 746 es1 = SCRUB_IR_COMMENT_RE.sub(r"", es1) 747 return es0 == es1 748 749 def __str__(self): 750 return self.scrub 751 752 753class FunctionTestBuilder: 754 def __init__(self, run_list, flags, scrubber_args, path): 755 self._verbose = flags.verbose 756 self._record_args = flags.function_signature 757 self._check_attributes = flags.check_attributes 758 # Strip double-quotes if input was read by UTC_ARGS 759 self._filters = ( 760 list( 761 map( 762 lambda f: Filter( 763 re.compile(f.pattern().strip('"'), f.flags()), f.is_filter_out 764 ), 765 flags.filters, 766 ) 767 ) 768 if flags.filters 769 else [] 770 ) 771 self._scrubber_args = scrubber_args 772 self._path = path 773 # Strip double-quotes if input was read by UTC_ARGS 774 self._replace_value_regex = list( 775 map(lambda x: x.strip('"'), flags.replace_value_regex) 776 ) 777 self._func_dict = {} 778 self._func_order = {} 779 self._global_var_dict = {} 780 self._processed_prefixes = set() 781 for tuple in run_list: 782 for prefix in tuple[0]: 783 self._func_dict.update({prefix: dict()}) 784 self._func_order.update({prefix: []}) 785 self._global_var_dict.update({prefix: dict()}) 786 787 def finish_and_get_func_dict(self): 788 for prefix in self.get_failed_prefixes(): 789 warn( 790 "Prefix %s had conflicting output from different RUN lines for all functions in test %s" 791 % ( 792 prefix, 793 self._path, 794 ) 795 ) 796 return self._func_dict 797 798 def func_order(self): 799 return self._func_order 800 801 def global_var_dict(self): 802 return self._global_var_dict 803 804 def is_filtered(self): 805 return bool(self._filters) 806 807 def process_run_line( 808 self, function_re, scrubber, raw_tool_output, prefixes, is_backend 809 ): 810 build_global_values_dictionary(self._global_var_dict, raw_tool_output, prefixes) 811 for m in function_re.finditer(raw_tool_output): 812 if not m: 813 continue 814 func = m.group("func") 815 body = m.group("body") 816 # func_name_separator is the string that is placed right after function name at the 817 # beginning of assembly function definition. In most assemblies, that is just a 818 # colon: `foo:`. But, for example, in nvptx it is a brace: `foo(`. If is_backend is 819 # False, just assume that separator is an empty string. 820 if is_backend: 821 # Use ':' as default separator. 822 func_name_separator = ( 823 m.group("func_name_separator") 824 if "func_name_separator" in m.groupdict() 825 else ":" 826 ) 827 else: 828 func_name_separator = "" 829 attrs = m.group("attrs") if self._check_attributes else "" 830 funcdef_attrs_and_ret = ( 831 m.group("funcdef_attrs_and_ret") if self._record_args else "" 832 ) 833 # Determine if we print arguments, the opening brace, or nothing after the 834 # function name 835 if self._record_args and "args_and_sig" in m.groupdict(): 836 args_and_sig = scrub_body(m.group("args_and_sig").strip()) 837 elif "args_and_sig" in m.groupdict(): 838 args_and_sig = "(" 839 else: 840 args_and_sig = "" 841 filtered_body = do_filter(body, self._filters) 842 scrubbed_body = do_scrub( 843 filtered_body, scrubber, self._scrubber_args, extra=False 844 ) 845 scrubbed_extra = do_scrub( 846 filtered_body, scrubber, self._scrubber_args, extra=True 847 ) 848 if "analysis" in m.groupdict(): 849 analysis = m.group("analysis") 850 if analysis not in SUPPORTED_ANALYSES: 851 warn("Unsupported analysis mode: %r!" % (analysis,)) 852 if func.startswith("stress"): 853 # We only use the last line of the function body for stress tests. 854 scrubbed_body = "\n".join(scrubbed_body.splitlines()[-1:]) 855 if self._verbose: 856 print("Processing function: " + func, file=sys.stderr) 857 for l in scrubbed_body.splitlines(): 858 print(" " + l, file=sys.stderr) 859 for prefix in prefixes: 860 # Replace function names matching the regex. 861 for regex in self._replace_value_regex: 862 # Pattern that matches capture groups in the regex in leftmost order. 863 group_regex = re.compile(r"\(.*?\)") 864 # Replace function name with regex. 865 match = re.match(regex, func) 866 if match: 867 func_repl = regex 868 # Replace any capture groups with their matched strings. 869 for g in match.groups(): 870 func_repl = group_regex.sub( 871 re.escape(g), func_repl, count=1 872 ) 873 func = re.sub(func_repl, "{{" + func_repl + "}}", func) 874 875 # Replace all calls to regex matching functions. 876 matches = re.finditer(regex, scrubbed_body) 877 for match in matches: 878 func_repl = regex 879 # Replace any capture groups with their matched strings. 880 for g in match.groups(): 881 func_repl = group_regex.sub( 882 re.escape(g), func_repl, count=1 883 ) 884 # Substitute function call names that match the regex with the same 885 # capture groups set. 886 scrubbed_body = re.sub( 887 func_repl, "{{" + func_repl + "}}", scrubbed_body 888 ) 889 890 if func in self._func_dict[prefix]: 891 if self._func_dict[prefix][func] is not None and ( 892 str(self._func_dict[prefix][func]) != scrubbed_body 893 or self._func_dict[prefix][func].args_and_sig != args_and_sig 894 or self._func_dict[prefix][func].attrs != attrs 895 or self._func_dict[prefix][func].funcdef_attrs_and_ret 896 != funcdef_attrs_and_ret 897 ): 898 if self._func_dict[prefix][func].is_same_except_arg_names( 899 scrubbed_extra, 900 funcdef_attrs_and_ret, 901 args_and_sig, 902 attrs, 903 is_backend, 904 ): 905 self._func_dict[prefix][func].scrub = scrubbed_extra 906 self._func_dict[prefix][func].args_and_sig = args_and_sig 907 else: 908 # This means a previous RUN line produced a body for this function 909 # that is different from the one produced by this current RUN line, 910 # so the body can't be common across RUN lines. We use None to 911 # indicate that. 912 self._func_dict[prefix][func] = None 913 else: 914 if prefix not in self._processed_prefixes: 915 self._func_dict[prefix][func] = function_body( 916 scrubbed_body, 917 scrubbed_extra, 918 funcdef_attrs_and_ret, 919 args_and_sig, 920 attrs, 921 func_name_separator, 922 ) 923 self._func_order[prefix].append(func) 924 else: 925 # An earlier RUN line used this check prefixes but didn't produce 926 # a body for this function. This happens in Clang tests that use 927 # preprocesser directives to exclude individual functions from some 928 # RUN lines. 929 self._func_dict[prefix][func] = None 930 931 def processed_prefixes(self, prefixes): 932 """ 933 Mark a set of prefixes as having had at least one applicable RUN line fully 934 processed. This is used to filter out function bodies that don't have 935 outputs for all RUN lines. 936 """ 937 self._processed_prefixes.update(prefixes) 938 939 def get_failed_prefixes(self): 940 # This returns the list of those prefixes that failed to match any function, 941 # because there were conflicting bodies produced by different RUN lines, in 942 # all instances of the prefix. 943 for prefix in self._func_dict: 944 if self._func_dict[prefix] and ( 945 not [ 946 fct 947 for fct in self._func_dict[prefix] 948 if self._func_dict[prefix][fct] is not None 949 ] 950 ): 951 yield prefix 952 953 954##### Generator of LLVM IR CHECK lines 955 956SCRUB_IR_COMMENT_RE = re.compile(r"\s*;.*") 957 958# TODO: We should also derive check lines for global, debug, loop declarations, etc.. 959 960 961class NamelessValue: 962 def __init__( 963 self, 964 check_prefix, 965 check_key, 966 ir_prefix, 967 ir_regexp, 968 global_ir_rhs_regexp, 969 *, 970 is_before_functions=False, 971 is_number=False, 972 replace_number_with_counter=False, 973 match_literally=False, 974 interlaced_with_previous=False 975 ): 976 self.check_prefix = check_prefix 977 self.check_key = check_key 978 self.ir_prefix = ir_prefix 979 self.ir_regexp = ir_regexp 980 self.global_ir_rhs_regexp = global_ir_rhs_regexp 981 self.is_before_functions = is_before_functions 982 self.is_number = is_number 983 # Some variable numbers (e.g. MCINST1234) will change based on unrelated 984 # modifications to LLVM, replace those with an incrementing counter. 985 self.replace_number_with_counter = replace_number_with_counter 986 self.match_literally = match_literally 987 self.interlaced_with_previous = interlaced_with_previous 988 self.variable_mapping = {} 989 990 # Return true if this kind of IR value is "local", basically if it matches '%{{.*}}'. 991 def is_local_def_ir_value(self): 992 return self.ir_prefix == "%" 993 994 # Return the IR prefix and check prefix we use for this kind or IR value, 995 # e.g., (%, TMP) for locals. If the IR prefix is a regex, return the prefix 996 # used in the IR output 997 def get_ir_prefix_from_ir_value_match(self, match): 998 return re.search(self.ir_prefix, match[0])[0], self.check_prefix 999 1000 # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals 1001 def get_ir_regex(self): 1002 # for backwards compatibility we check locals with '.*' 1003 if self.is_local_def_ir_value(): 1004 return ".*" 1005 return self.ir_regexp 1006 1007 # Create a FileCheck variable name based on an IR name. 1008 def get_value_name(self, var: str, check_prefix: str): 1009 var = var.replace("!", "") 1010 if self.replace_number_with_counter: 1011 assert var 1012 replacement = self.variable_mapping.get(var, None) 1013 if replacement is None: 1014 # Replace variable with an incrementing counter 1015 replacement = str(len(self.variable_mapping) + 1) 1016 self.variable_mapping[var] = replacement 1017 var = replacement 1018 # This is a nameless value, prepend check_prefix. 1019 if var.isdigit(): 1020 var = check_prefix + var 1021 else: 1022 # This is a named value that clashes with the check_prefix, prepend with 1023 # _prefix_filecheck_ir_name, if it has been defined. 1024 if ( 1025 may_clash_with_default_check_prefix_name(check_prefix, var) 1026 and _prefix_filecheck_ir_name 1027 ): 1028 var = _prefix_filecheck_ir_name + var 1029 var = var.replace(".", "_") 1030 var = var.replace("-", "_") 1031 return var.upper() 1032 1033 # Create a FileCheck variable from regex. 1034 def get_value_definition(self, var, match): 1035 # for backwards compatibility we check locals with '.*' 1036 varname = self.get_value_name(var, self.check_prefix) 1037 prefix = self.get_ir_prefix_from_ir_value_match(match)[0] 1038 if self.is_number: 1039 regex = "" # always capture a number in the default format 1040 capture_start = "[[#" 1041 else: 1042 regex = self.get_ir_regex() 1043 capture_start = "[[" 1044 if self.is_local_def_ir_value(): 1045 return capture_start + varname + ":" + prefix + regex + "]]" 1046 return prefix + capture_start + varname + ":" + regex + "]]" 1047 1048 # Use a FileCheck variable. 1049 def get_value_use(self, var, match, var_prefix=None): 1050 if var_prefix is None: 1051 var_prefix = self.check_prefix 1052 capture_start = "[[#" if self.is_number else "[[" 1053 if self.is_local_def_ir_value(): 1054 return capture_start + self.get_value_name(var, var_prefix) + "]]" 1055 prefix = self.get_ir_prefix_from_ir_value_match(match)[0] 1056 return prefix + capture_start + self.get_value_name(var, var_prefix) + "]]" 1057 1058 1059# Description of the different "unnamed" values we match in the IR, e.g., 1060# (local) ssa values, (debug) metadata, etc. 1061ir_nameless_values = [ 1062 # check_prefix check_key ir_prefix ir_regexp global_ir_rhs_regexp 1063 NamelessValue(r"TMP", "%", r"%", r"[\w$.-]+?", None), 1064 NamelessValue(r"ATTR", "#", r"#", r"[0-9]+", None), 1065 NamelessValue(r"ATTR", "#", r"attributes #", r"[0-9]+", r"{[^}]*}"), 1066 NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", None), 1067 NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", r".+", is_before_functions=True), 1068 NamelessValue( 1069 r"GLOBNAMED", 1070 "@", 1071 r"@", 1072 r"[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*", 1073 r".+", 1074 is_before_functions=True, 1075 match_literally=True, 1076 interlaced_with_previous=True, 1077 ), 1078 NamelessValue(r"DBG", "!", r"!dbg ", r"![0-9]+", None), 1079 NamelessValue(r"DIASSIGNID", "!", r"!DIAssignID ", r"![0-9]+", None), 1080 NamelessValue(r"PROF", "!", r"!prof ", r"![0-9]+", None), 1081 NamelessValue(r"TBAA", "!", r"!tbaa ", r"![0-9]+", None), 1082 NamelessValue(r"TBAA_STRUCT", "!", r"!tbaa.struct ", r"![0-9]+", None), 1083 NamelessValue(r"RNG", "!", r"!range ", r"![0-9]+", None), 1084 NamelessValue(r"LOOP", "!", r"!llvm.loop ", r"![0-9]+", None), 1085 NamelessValue(r"META", "!", r"metadata ", r"![0-9]+", None), 1086 NamelessValue(r"META", "!", r"", r"![0-9]+", r"(?:distinct |)!.*"), 1087 NamelessValue(r"ACC_GRP", "!", r"!llvm.access.group ", r"![0-9]+", None), 1088 NamelessValue(r"META", "!", r"![a-z.]+ ", r"![0-9]+", None), 1089] 1090 1091global_nameless_values = [ 1092 nameless_value 1093 for nameless_value in ir_nameless_values 1094 if nameless_value.global_ir_rhs_regexp is not None 1095] 1096# global variable names should be matched literally 1097global_nameless_values_w_unstable_ids = [ 1098 nameless_value 1099 for nameless_value in global_nameless_values 1100 if not nameless_value.match_literally 1101] 1102 1103asm_nameless_values = [ 1104 NamelessValue( 1105 r"MCINST", 1106 "Inst#", 1107 "<MCInst #", 1108 r"\d+", 1109 r".+", 1110 is_number=True, 1111 replace_number_with_counter=True, 1112 ), 1113 NamelessValue( 1114 r"MCREG", 1115 "Reg:", 1116 "<MCOperand Reg:", 1117 r"\d+", 1118 r".+", 1119 is_number=True, 1120 replace_number_with_counter=True, 1121 ), 1122] 1123 1124analyze_nameless_values = [ 1125 NamelessValue( 1126 r"GRP", 1127 "#", 1128 r"", 1129 r"0x[0-9a-f]+", 1130 None, 1131 replace_number_with_counter=True, 1132 ), 1133] 1134 1135 1136def createOrRegexp(old, new): 1137 if not old: 1138 return new 1139 if not new: 1140 return old 1141 return old + "|" + new 1142 1143 1144def createPrefixMatch(prefix_str, prefix_re): 1145 return "(?:" + prefix_str + "(" + prefix_re + "))" 1146 1147 1148# Build the regexp that matches an "IR value". This can be a local variable, 1149# argument, global, or metadata, anything that is "named". It is important that 1150# the PREFIX and SUFFIX below only contain a single group, if that changes 1151# other locations will need adjustment as well. 1152IR_VALUE_REGEXP_PREFIX = r"(\s*)" 1153IR_VALUE_REGEXP_STRING = r"" 1154for nameless_value in ir_nameless_values: 1155 match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp) 1156 if nameless_value.global_ir_rhs_regexp is not None: 1157 match = "^" + match 1158 IR_VALUE_REGEXP_STRING = createOrRegexp(IR_VALUE_REGEXP_STRING, match) 1159IR_VALUE_REGEXP_SUFFIX = r"([,\s\(\)\}]|\Z)" 1160IR_VALUE_RE = re.compile( 1161 IR_VALUE_REGEXP_PREFIX 1162 + r"(" 1163 + IR_VALUE_REGEXP_STRING 1164 + r")" 1165 + IR_VALUE_REGEXP_SUFFIX 1166) 1167 1168GLOBAL_VALUE_REGEXP_STRING = r"" 1169for nameless_value in global_nameless_values_w_unstable_ids: 1170 match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp) 1171 GLOBAL_VALUE_REGEXP_STRING = createOrRegexp(GLOBAL_VALUE_REGEXP_STRING, match) 1172GLOBAL_VALUE_RE = re.compile( 1173 IR_VALUE_REGEXP_PREFIX 1174 + r"(" 1175 + GLOBAL_VALUE_REGEXP_STRING 1176 + r")" 1177 + IR_VALUE_REGEXP_SUFFIX 1178) 1179 1180# Build the regexp that matches an "ASM value" (currently only for --asm-show-inst comments). 1181ASM_VALUE_REGEXP_STRING = "" 1182for nameless_value in asm_nameless_values: 1183 match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp) 1184 ASM_VALUE_REGEXP_STRING = createOrRegexp(ASM_VALUE_REGEXP_STRING, match) 1185ASM_VALUE_REGEXP_SUFFIX = r"([>\s]|\Z)" 1186ASM_VALUE_RE = re.compile( 1187 r"((?:#|//)\s*)" + "(" + ASM_VALUE_REGEXP_STRING + ")" + ASM_VALUE_REGEXP_SUFFIX 1188) 1189 1190ANALYZE_VALUE_REGEXP_PREFIX = r"(\s*)" 1191ANALYZE_VALUE_REGEXP_STRING = r"" 1192for nameless_value in analyze_nameless_values: 1193 match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp) 1194 ANALYZE_VALUE_REGEXP_STRING = createOrRegexp(ANALYZE_VALUE_REGEXP_STRING, match) 1195ANALYZE_VALUE_REGEXP_SUFFIX = r"(\)?:)" 1196ANALYZE_VALUE_RE = re.compile( 1197 ANALYZE_VALUE_REGEXP_PREFIX 1198 + r"(" 1199 + ANALYZE_VALUE_REGEXP_STRING 1200 + r")" 1201 + ANALYZE_VALUE_REGEXP_SUFFIX 1202) 1203 1204# The entire match is group 0, the prefix has one group (=1), the entire 1205# IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start. 1206first_nameless_group_in_ir_value_match = 3 1207 1208# constants for the group id of special matches 1209variable_group_in_ir_value_match = 3 1210attribute_group_in_ir_value_match = 4 1211 1212 1213# Check a match for IR_VALUE_RE and inspect it to determine if it was a local 1214# value, %..., global @..., debug number !dbg !..., etc. See the PREFIXES above. 1215def get_idx_from_ir_value_match(match): 1216 for i in range(first_nameless_group_in_ir_value_match, match.lastindex): 1217 if match.group(i) is not None: 1218 return i - first_nameless_group_in_ir_value_match 1219 error("Unable to identify the kind of IR value from the match!") 1220 return 0 1221 1222 1223# See get_idx_from_ir_value_match 1224def get_name_from_ir_value_match(match): 1225 return match.group( 1226 get_idx_from_ir_value_match(match) + first_nameless_group_in_ir_value_match 1227 ) 1228 1229 1230def get_nameless_value_from_match(match, nameless_values) -> NamelessValue: 1231 return nameless_values[get_idx_from_ir_value_match(match)] 1232 1233 1234# Return true if var clashes with the scripted FileCheck check_prefix. 1235def may_clash_with_default_check_prefix_name(check_prefix, var): 1236 return check_prefix and re.match( 1237 r"^" + check_prefix + r"[0-9]+?$", var, re.IGNORECASE 1238 ) 1239 1240 1241def find_diff_matching(lhs: List[str], rhs: List[str]) -> List[tuple]: 1242 """ 1243 Find a large ordered matching between strings in lhs and rhs. 1244 1245 Think of this as finding the *unchanged* lines in a diff, where the entries 1246 of lhs and rhs are lines of the files being diffed. 1247 1248 Returns a list of matched (lhs_idx, rhs_idx) pairs. 1249 """ 1250 1251 if not lhs or not rhs: 1252 return [] 1253 1254 # Collect matches in reverse order. 1255 matches = [] 1256 1257 # First, collect a set of candidate matching edges. We limit this to a 1258 # constant multiple of the input size to avoid quadratic runtime. 1259 patterns = collections.defaultdict(lambda: ([], [])) 1260 1261 for idx in range(len(lhs)): 1262 patterns[lhs[idx]][0].append(idx) 1263 for idx in range(len(rhs)): 1264 patterns[rhs[idx]][1].append(idx) 1265 1266 multiple_patterns = [] 1267 1268 candidates = [] 1269 for pattern in patterns.values(): 1270 if not pattern[0] or not pattern[1]: 1271 continue 1272 1273 if len(pattern[0]) == len(pattern[1]) == 1: 1274 candidates.append((pattern[0][0], pattern[1][0])) 1275 else: 1276 multiple_patterns.append(pattern) 1277 1278 multiple_patterns.sort(key=lambda pattern: len(pattern[0]) * len(pattern[1])) 1279 1280 for pattern in multiple_patterns: 1281 if len(candidates) + len(pattern[0]) * len(pattern[1]) > 2 * ( 1282 len(lhs) + len(rhs) 1283 ): 1284 break 1285 for lhs_idx in pattern[0]: 1286 for rhs_idx in pattern[1]: 1287 candidates.append((lhs_idx, rhs_idx)) 1288 1289 if not candidates: 1290 # The LHS and RHS either share nothing in common, or lines are just too 1291 # identical. In that case, let's give up and not match anything. 1292 return [] 1293 1294 # Compute a maximal crossing-free matching via an algorithm that is 1295 # inspired by a mixture of dynamic programming and line-sweeping in 1296 # discrete geometry. 1297 # 1298 # I would be surprised if this algorithm didn't exist somewhere in the 1299 # literature, but I found it without consciously recalling any 1300 # references, so you'll have to make do with the explanation below. 1301 # Sorry. 1302 # 1303 # The underlying graph is bipartite: 1304 # - nodes on the LHS represent lines in the original check 1305 # - nodes on the RHS represent lines in the new (updated) check 1306 # 1307 # Nodes are implicitly sorted by the corresponding line number. 1308 # Edges (unique_matches) are sorted by the line number on the LHS. 1309 # 1310 # Here's the geometric intuition for the algorithm. 1311 # 1312 # * Plot the edges as points in the plane, with the original line 1313 # number on the X axis and the updated line number on the Y axis. 1314 # * The goal is to find a longest "chain" of points where each point 1315 # is strictly above and to the right of the previous point. 1316 # * The algorithm proceeds by sweeping a vertical line from left to 1317 # right. 1318 # * The algorithm maintains a table where `table[N]` answers the 1319 # question "What is currently the 'best' way to build a chain of N+1 1320 # points to the left of the vertical line". Here, 'best' means 1321 # that the last point of the chain is a as low as possible (minimal 1322 # Y coordinate). 1323 # * `table[N]` is `(y, point_idx)` where `point_idx` is the index of 1324 # the last point in the chain and `y` is its Y coordinate 1325 # * A key invariant is that the Y values in the table are 1326 # monotonically increasing 1327 # * Thanks to these properties, the table can be used to answer the 1328 # question "What is the longest chain that can be built to the left 1329 # of the vertical line using only points below a certain Y value", 1330 # using a binary search over the table. 1331 # * The algorithm also builds a backlink structure in which every point 1332 # links back to the previous point on a best (longest) chain ending 1333 # at that point 1334 # 1335 # The core loop of the algorithm sweeps the line and updates the table 1336 # and backlink structure for every point that we cross during the sweep. 1337 # Therefore, the algorithm is trivially O(M log M) in the number of 1338 # points. 1339 candidates.sort(key=lambda candidate: (candidate[0], -candidate[1])) 1340 1341 backlinks = [] 1342 table_rhs_idx = [] 1343 table_candidate_idx = [] 1344 for _, rhs_idx in candidates: 1345 candidate_idx = len(backlinks) 1346 ti = bisect.bisect_left(table_rhs_idx, rhs_idx) 1347 1348 # Update the table to record a best chain ending in the current point. 1349 # There always is one, and if any of the previously visited points had 1350 # a higher Y coordinate, then there is always a previously recorded best 1351 # chain that can be improved upon by using the current point. 1352 # 1353 # There is only one case where there is some ambiguity. If the 1354 # pre-existing entry table[ti] has the same Y coordinate / rhs_idx as 1355 # the current point (this can only happen if the same line appeared 1356 # multiple times on the LHS), then we could choose to keep the 1357 # previously recorded best chain instead. That would bias the algorithm 1358 # differently but should have no systematic impact on the quality of the 1359 # result. 1360 if ti < len(table_rhs_idx): 1361 table_rhs_idx[ti] = rhs_idx 1362 table_candidate_idx[ti] = candidate_idx 1363 else: 1364 table_rhs_idx.append(rhs_idx) 1365 table_candidate_idx.append(candidate_idx) 1366 if ti > 0: 1367 backlinks.append(table_candidate_idx[ti - 1]) 1368 else: 1369 backlinks.append(None) 1370 1371 # Commit to names in the matching by walking the backlinks. Recursively 1372 # attempt to fill in more matches in-betweem. 1373 match_idx = table_candidate_idx[-1] 1374 while match_idx is not None: 1375 current = candidates[match_idx] 1376 matches.append(current) 1377 match_idx = backlinks[match_idx] 1378 1379 matches.reverse() 1380 return matches 1381 1382 1383VARIABLE_TAG = "[[@@]]" 1384METAVAR_RE = re.compile(r"\[\[([A-Z0-9_]+)(?::[^]]+)?\]\]") 1385NUMERIC_SUFFIX_RE = re.compile(r"[0-9]*$") 1386 1387 1388class CheckValueInfo: 1389 def __init__( 1390 self, 1391 nameless_value: NamelessValue, 1392 var: str, 1393 prefix: str, 1394 ): 1395 self.nameless_value = nameless_value 1396 self.var = var 1397 self.prefix = prefix 1398 1399 1400# Represent a check line in a way that allows us to compare check lines while 1401# ignoring some or all of the FileCheck variable names. 1402class CheckLineInfo: 1403 def __init__(self, line, values): 1404 # Line with all FileCheck variable name occurrences replaced by VARIABLE_TAG 1405 self.line: str = line 1406 1407 # Information on each FileCheck variable name occurrences in the line 1408 self.values: List[CheckValueInfo] = values 1409 1410 def __repr__(self): 1411 return f"CheckLineInfo(line={self.line}, self.values={self.values})" 1412 1413 1414def remap_metavar_names( 1415 old_line_infos: List[CheckLineInfo], 1416 new_line_infos: List[CheckLineInfo], 1417 committed_names: Set[str], 1418) -> Mapping[str, str]: 1419 """ 1420 Map all FileCheck variable names that appear in new_line_infos to new 1421 FileCheck variable names in an attempt to reduce the diff from old_line_infos 1422 to new_line_infos. 1423 1424 This is done by: 1425 * Matching old check lines and new check lines using a diffing algorithm 1426 applied after replacing names with wildcards. 1427 * Committing to variable names such that the matched lines become equal 1428 (without wildcards) if possible 1429 * This is done recursively to handle cases where many lines are equal 1430 after wildcard replacement 1431 """ 1432 # Initialize uncommitted identity mappings 1433 new_mapping = {} 1434 for line in new_line_infos: 1435 for value in line.values: 1436 new_mapping[value.var] = value.var 1437 1438 # Recursively commit to the identity mapping or find a better one 1439 def recurse(old_begin, old_end, new_begin, new_end): 1440 if old_begin == old_end or new_begin == new_end: 1441 return 1442 1443 # Find a matching of lines where uncommitted names are replaced 1444 # with a placeholder. 1445 def diffify_line(line, mapper): 1446 values = [] 1447 for value in line.values: 1448 mapped = mapper(value.var) 1449 values.append(mapped if mapped in committed_names else "?") 1450 return line.line.strip() + " @@@ " + " @ ".join(values) 1451 1452 lhs_lines = [ 1453 diffify_line(line, lambda x: x) 1454 for line in old_line_infos[old_begin:old_end] 1455 ] 1456 rhs_lines = [ 1457 diffify_line(line, lambda x: new_mapping[x]) 1458 for line in new_line_infos[new_begin:new_end] 1459 ] 1460 1461 candidate_matches = find_diff_matching(lhs_lines, rhs_lines) 1462 1463 # Apply commits greedily on a match-by-match basis 1464 matches = [(-1, -1)] 1465 committed_anything = False 1466 for lhs_idx, rhs_idx in candidate_matches: 1467 lhs_line = old_line_infos[lhs_idx] 1468 rhs_line = new_line_infos[rhs_idx] 1469 1470 local_commits = {} 1471 1472 for lhs_value, rhs_value in zip(lhs_line.values, rhs_line.values): 1473 if new_mapping[rhs_value.var] in committed_names: 1474 # The new value has already been committed. If it was mapped 1475 # to the same name as the original value, we can consider 1476 # committing other values from this line. Otherwise, we 1477 # should ignore this line. 1478 if new_mapping[rhs_value.var] == lhs_value.var: 1479 continue 1480 else: 1481 break 1482 1483 if rhs_value.var in local_commits: 1484 # Same, but for a possible commit happening on the same line 1485 if local_commits[rhs_value.var] == lhs_value.var: 1486 continue 1487 else: 1488 break 1489 1490 if lhs_value.var in committed_names: 1491 # We can't map this value because the name we would map it to has already been 1492 # committed for something else. Give up on this line. 1493 break 1494 1495 local_commits[rhs_value.var] = lhs_value.var 1496 else: 1497 # No reason not to add any commitments for this line 1498 for rhs_var, lhs_var in local_commits.items(): 1499 new_mapping[rhs_var] = lhs_var 1500 committed_names.add(lhs_var) 1501 committed_anything = True 1502 1503 if ( 1504 lhs_var != rhs_var 1505 and lhs_var in new_mapping 1506 and new_mapping[lhs_var] == lhs_var 1507 ): 1508 new_mapping[lhs_var] = "conflict_" + lhs_var 1509 1510 matches.append((lhs_idx, rhs_idx)) 1511 1512 matches.append((old_end, new_end)) 1513 1514 # Recursively handle sequences between matches 1515 if committed_anything: 1516 for (lhs_prev, rhs_prev), (lhs_next, rhs_next) in zip(matches, matches[1:]): 1517 recurse(lhs_prev + 1, lhs_next, rhs_prev + 1, rhs_next) 1518 1519 recurse(0, len(old_line_infos), 0, len(new_line_infos)) 1520 1521 # Commit to remaining names and resolve conflicts 1522 for new_name, mapped_name in new_mapping.items(): 1523 if mapped_name in committed_names: 1524 continue 1525 if not mapped_name.startswith("conflict_"): 1526 assert mapped_name == new_name 1527 committed_names.add(mapped_name) 1528 1529 for new_name, mapped_name in new_mapping.items(): 1530 if mapped_name in committed_names: 1531 continue 1532 assert mapped_name.startswith("conflict_") 1533 1534 m = NUMERIC_SUFFIX_RE.search(new_name) 1535 base_name = new_name[: m.start()] 1536 suffix = int(new_name[m.start() :]) if m.start() != m.end() else 1 1537 while True: 1538 candidate = f"{base_name}{suffix}" 1539 if candidate not in committed_names: 1540 new_mapping[new_name] = candidate 1541 committed_names.add(candidate) 1542 break 1543 suffix += 1 1544 1545 return new_mapping 1546 1547 1548def generalize_check_lines_common( 1549 lines, 1550 is_analyze, 1551 vars_seen, 1552 global_vars_seen, 1553 nameless_values, 1554 nameless_value_regex, 1555 is_asm, 1556 preserve_names, 1557 original_check_lines=None, 1558): 1559 # This gets called for each match that occurs in 1560 # a line. We transform variables we haven't seen 1561 # into defs, and variables we have seen into uses. 1562 def transform_line_vars(match, transform_locals=True): 1563 var = get_name_from_ir_value_match(match) 1564 nameless_value = get_nameless_value_from_match(match, nameless_values) 1565 if may_clash_with_default_check_prefix_name(nameless_value.check_prefix, var): 1566 warn( 1567 "Change IR value name '%s' or use --prefix-filecheck-ir-name to prevent possible conflict" 1568 " with scripted FileCheck name." % (var,) 1569 ) 1570 key = (var, nameless_value.check_key) 1571 is_local_def = nameless_value.is_local_def_ir_value() 1572 if is_local_def and not transform_locals: 1573 return None 1574 if is_local_def and key in vars_seen: 1575 rv = nameless_value.get_value_use(var, match) 1576 elif not is_local_def and key in global_vars_seen: 1577 # We could have seen a different prefix for the global variables first, 1578 # ensure we use that one instead of the prefix for the current match. 1579 rv = nameless_value.get_value_use(var, match, global_vars_seen[key]) 1580 else: 1581 if is_local_def: 1582 vars_seen.add(key) 1583 else: 1584 global_vars_seen[key] = nameless_value.check_prefix 1585 rv = nameless_value.get_value_definition(var, match) 1586 # re.sub replaces the entire regex match 1587 # with whatever you return, so we have 1588 # to make sure to hand it back everything 1589 # including the commas and spaces. 1590 return match.group(1) + rv + match.group(match.lastindex) 1591 1592 def transform_non_local_line_vars(match): 1593 return transform_line_vars(match, False) 1594 1595 multiple_braces_re = re.compile(r"({{+)|(}}+)") 1596 def escape_braces(match_obj): 1597 return '{{' + re.escape(match_obj.group(0)) + '}}' 1598 1599 if not is_asm and not is_analyze: 1600 for i, line in enumerate(lines): 1601 # An IR variable named '%.' matches the FileCheck regex string. 1602 line = line.replace("%.", "%dot") 1603 for regex in _global_hex_value_regex: 1604 if re.match("^@" + regex + " = ", line): 1605 line = re.sub( 1606 r"\bi([0-9]+) ([0-9]+)", 1607 lambda m: "i" 1608 + m.group(1) 1609 + " [[#" 1610 + hex(int(m.group(2))) 1611 + "]]", 1612 line, 1613 ) 1614 break 1615 # Ignore any comments, since the check lines will too. 1616 scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line) 1617 lines[i] = scrubbed_line 1618 1619 if not preserve_names: 1620 if is_asm: 1621 for i, _ in enumerate(lines): 1622 # It can happen that two matches are back-to-back and for some reason sub 1623 # will not replace both of them. For now we work around this by 1624 # substituting until there is no more match. 1625 changed = True 1626 while changed: 1627 (lines[i], changed) = nameless_value_regex.subn( 1628 transform_line_vars, lines[i], count=1 1629 ) 1630 else: 1631 # LLVM IR case. Start by handling global meta variables (global IR variables, 1632 # metadata, attributes) 1633 for i, _ in enumerate(lines): 1634 start = 0 1635 while True: 1636 m = nameless_value_regex.search(lines[i][start:]) 1637 if m is None: 1638 break 1639 start += m.start() 1640 sub = transform_non_local_line_vars(m) 1641 if sub is not None: 1642 lines[i] = ( 1643 lines[i][:start] + sub + lines[i][start + len(m.group(0)) :] 1644 ) 1645 start += 1 1646 1647 # Collect information about new check lines and original check lines (if any) 1648 new_line_infos = [] 1649 for line in lines: 1650 filtered_line = "" 1651 values = [] 1652 while True: 1653 m = nameless_value_regex.search(line) 1654 if m is None: 1655 filtered_line += line 1656 break 1657 1658 var = get_name_from_ir_value_match(m) 1659 nameless_value = get_nameless_value_from_match(m, nameless_values) 1660 var = nameless_value.get_value_name( 1661 var, nameless_value.check_prefix 1662 ) 1663 1664 # Replace with a [[@@]] tag, but be sure to keep the spaces and commas. 1665 filtered_line += ( 1666 line[: m.start()] 1667 + m.group(1) 1668 + VARIABLE_TAG 1669 + m.group(m.lastindex) 1670 ) 1671 line = line[m.end() :] 1672 values.append( 1673 CheckValueInfo( 1674 nameless_value=nameless_value, 1675 var=var, 1676 prefix=nameless_value.get_ir_prefix_from_ir_value_match(m)[ 1677 0 1678 ], 1679 ) 1680 ) 1681 new_line_infos.append(CheckLineInfo(filtered_line, values)) 1682 1683 orig_line_infos = [] 1684 for line in original_check_lines or []: 1685 filtered_line = "" 1686 values = [] 1687 while True: 1688 m = METAVAR_RE.search(line) 1689 if m is None: 1690 filtered_line += line 1691 break 1692 1693 # Replace with a [[@@]] tag, but be sure to keep the spaces and commas. 1694 filtered_line += line[: m.start()] + VARIABLE_TAG 1695 line = line[m.end() :] 1696 values.append( 1697 CheckValueInfo( 1698 nameless_value=None, 1699 var=m.group(1), 1700 prefix=None, 1701 ) 1702 ) 1703 orig_line_infos.append(CheckLineInfo(filtered_line, values)) 1704 1705 # Compute the variable name mapping 1706 committed_names = set(vars_seen) 1707 1708 mapping = remap_metavar_names( 1709 orig_line_infos, new_line_infos, committed_names 1710 ) 1711 1712 for i, line_info in enumerate(new_line_infos): 1713 line_template = line_info.line 1714 line = "" 1715 1716 for value in line_info.values: 1717 idx = line_template.find(VARIABLE_TAG) 1718 line += line_template[:idx] 1719 line_template = line_template[idx + len(VARIABLE_TAG) :] 1720 1721 key = (mapping[value.var], nameless_value.check_key) 1722 is_local_def = nameless_value.is_local_def_ir_value() 1723 if is_local_def: 1724 if mapping[value.var] in vars_seen: 1725 line += f"[[{mapping[value.var]}]]" 1726 else: 1727 line += f"[[{mapping[value.var]}:{value.prefix}{value.nameless_value.get_ir_regex()}]]" 1728 vars_seen.add(mapping[value.var]) 1729 else: 1730 raise RuntimeError("not implemented") 1731 1732 line += line_template 1733 1734 lines[i] = line 1735 1736 if is_analyze: 1737 for i, _ in enumerate(lines): 1738 # Escape multiple {{ or }} as {{}} denotes a FileCheck regex. 1739 scrubbed_line = multiple_braces_re.sub(escape_braces, lines[i]) 1740 lines[i] = scrubbed_line 1741 1742 return lines 1743 1744 1745# Replace IR value defs and uses with FileCheck variables. 1746def generalize_check_lines( 1747 lines, is_analyze, vars_seen, global_vars_seen, preserve_names, original_check_lines 1748): 1749 return generalize_check_lines_common( 1750 lines, 1751 is_analyze, 1752 vars_seen, 1753 global_vars_seen, 1754 ir_nameless_values, 1755 IR_VALUE_RE, 1756 False, 1757 preserve_names, 1758 original_check_lines=original_check_lines, 1759 ) 1760 1761 1762def generalize_global_check_line(line, preserve_names, global_vars_seen): 1763 [new_line] = generalize_check_lines_common( 1764 [line], 1765 False, 1766 set(), 1767 global_vars_seen, 1768 global_nameless_values_w_unstable_ids, 1769 GLOBAL_VALUE_RE, 1770 False, 1771 preserve_names, 1772 ) 1773 return new_line 1774 1775 1776def generalize_asm_check_lines(lines, vars_seen, global_vars_seen): 1777 return generalize_check_lines_common( 1778 lines, 1779 False, 1780 vars_seen, 1781 global_vars_seen, 1782 asm_nameless_values, 1783 ASM_VALUE_RE, 1784 True, 1785 False, 1786 ) 1787 1788 1789def generalize_analyze_check_lines(lines, vars_seen, global_vars_seen): 1790 return generalize_check_lines_common( 1791 lines, 1792 True, 1793 vars_seen, 1794 global_vars_seen, 1795 analyze_nameless_values, 1796 ANALYZE_VALUE_RE, 1797 False, 1798 False, 1799 ) 1800 1801 1802def add_checks( 1803 output_lines, 1804 comment_marker, 1805 prefix_list, 1806 func_dict, 1807 func_name, 1808 check_label_format, 1809 is_backend, 1810 is_analyze, 1811 version, 1812 global_vars_seen_dict, 1813 is_filtered, 1814 preserve_names=False, 1815 original_check_lines: Mapping[str, List[str]] = {}, 1816): 1817 # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well. 1818 prefix_exclusions = set() 1819 printed_prefixes = [] 1820 for p in prefix_list: 1821 checkprefixes = p[0] 1822 # If not all checkprefixes of this run line produced the function we cannot check for it as it does not 1823 # exist for this run line. A subset of the check prefixes might know about the function but only because 1824 # other run lines created it. 1825 if any( 1826 map( 1827 lambda checkprefix: func_name not in func_dict[checkprefix], 1828 checkprefixes, 1829 ) 1830 ): 1831 prefix_exclusions |= set(checkprefixes) 1832 continue 1833 1834 # prefix_exclusions is constructed, we can now emit the output 1835 for p in prefix_list: 1836 global_vars_seen = {} 1837 checkprefixes = p[0] 1838 for checkprefix in checkprefixes: 1839 if checkprefix in global_vars_seen_dict: 1840 global_vars_seen.update(global_vars_seen_dict[checkprefix]) 1841 else: 1842 global_vars_seen_dict[checkprefix] = {} 1843 if checkprefix in printed_prefixes: 1844 break 1845 1846 # Check if the prefix is excluded. 1847 if checkprefix in prefix_exclusions: 1848 continue 1849 1850 # If we do not have output for this prefix we skip it. 1851 if not func_dict[checkprefix][func_name]: 1852 continue 1853 1854 # Add some space between different check prefixes, but not after the last 1855 # check line (before the test code). 1856 if is_backend: 1857 if len(printed_prefixes) != 0: 1858 output_lines.append(comment_marker) 1859 1860 if checkprefix not in global_vars_seen_dict: 1861 global_vars_seen_dict[checkprefix] = {} 1862 1863 global_vars_seen_before = [key for key in global_vars_seen.keys()] 1864 1865 vars_seen = set() 1866 printed_prefixes.append(checkprefix) 1867 attrs = str(func_dict[checkprefix][func_name].attrs) 1868 attrs = "" if attrs == "None" else attrs 1869 if version > 1: 1870 funcdef_attrs_and_ret = func_dict[checkprefix][ 1871 func_name 1872 ].funcdef_attrs_and_ret 1873 else: 1874 funcdef_attrs_and_ret = "" 1875 1876 if attrs: 1877 output_lines.append( 1878 "%s %s: Function Attrs: %s" % (comment_marker, checkprefix, attrs) 1879 ) 1880 args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig) 1881 if args_and_sig: 1882 args_and_sig = generalize_check_lines( 1883 [args_and_sig], 1884 is_analyze, 1885 vars_seen, 1886 global_vars_seen, 1887 preserve_names, 1888 original_check_lines=[], 1889 )[0] 1890 func_name_separator = func_dict[checkprefix][func_name].func_name_separator 1891 if "[[" in args_and_sig: 1892 # Captures in label lines are not supported, thus split into a -LABEL 1893 # and a separate -SAME line that contains the arguments with captures. 1894 args_and_sig_prefix = "" 1895 if version >= 3 and args_and_sig.startswith("("): 1896 # Ensure the "(" separating function name and arguments is in the 1897 # label line. This is required in case of function names that are 1898 # prefixes of each other. Otherwise, the label line for "foo" might 1899 # incorrectly match on "foo.specialized". 1900 args_and_sig_prefix = args_and_sig[0] 1901 args_and_sig = args_and_sig[1:] 1902 1903 # Removing args_and_sig from the label match line requires 1904 # func_name_separator to be empty. Otherwise, the match will not work. 1905 assert func_name_separator == "" 1906 output_lines.append( 1907 check_label_format 1908 % ( 1909 checkprefix, 1910 funcdef_attrs_and_ret, 1911 func_name, 1912 args_and_sig_prefix, 1913 func_name_separator, 1914 ) 1915 ) 1916 output_lines.append( 1917 "%s %s-SAME: %s" % (comment_marker, checkprefix, args_and_sig) 1918 ) 1919 else: 1920 output_lines.append( 1921 check_label_format 1922 % ( 1923 checkprefix, 1924 funcdef_attrs_and_ret, 1925 func_name, 1926 args_and_sig, 1927 func_name_separator, 1928 ) 1929 ) 1930 func_body = str(func_dict[checkprefix][func_name]).splitlines() 1931 if not func_body: 1932 # We have filtered everything. 1933 continue 1934 1935 # For ASM output, just emit the check lines. 1936 if is_backend: 1937 body_start = 1 1938 if is_filtered: 1939 # For filtered output we don't add "-NEXT" so don't add extra spaces 1940 # before the first line. 1941 body_start = 0 1942 else: 1943 output_lines.append( 1944 "%s %s: %s" % (comment_marker, checkprefix, func_body[0]) 1945 ) 1946 func_lines = generalize_asm_check_lines( 1947 func_body[body_start:], vars_seen, global_vars_seen 1948 ) 1949 for func_line in func_lines: 1950 if func_line.strip() == "": 1951 output_lines.append( 1952 "%s %s-EMPTY:" % (comment_marker, checkprefix) 1953 ) 1954 else: 1955 check_suffix = "-NEXT" if not is_filtered else "" 1956 output_lines.append( 1957 "%s %s%s: %s" 1958 % (comment_marker, checkprefix, check_suffix, func_line) 1959 ) 1960 # Remember new global variables we have not seen before 1961 for key in global_vars_seen: 1962 if key not in global_vars_seen_before: 1963 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 1964 break 1965 # For analyze output, generalize the output, and emit CHECK-EMPTY lines as well. 1966 elif is_analyze: 1967 func_body = generalize_analyze_check_lines( 1968 func_body, vars_seen, global_vars_seen 1969 ) 1970 for func_line in func_body: 1971 if func_line.strip() == "": 1972 output_lines.append( 1973 "{} {}-EMPTY:".format(comment_marker, checkprefix) 1974 ) 1975 else: 1976 check_suffix = "-NEXT" if not is_filtered else "" 1977 output_lines.append( 1978 "{} {}{}: {}".format( 1979 comment_marker, checkprefix, check_suffix, func_line 1980 ) 1981 ) 1982 1983 # Add space between different check prefixes and also before the first 1984 # line of code in the test function. 1985 output_lines.append(comment_marker) 1986 1987 # Remember new global variables we have not seen before 1988 for key in global_vars_seen: 1989 if key not in global_vars_seen_before: 1990 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 1991 break 1992 # For IR output, change all defs to FileCheck variables, so we're immune 1993 # to variable naming fashions. 1994 else: 1995 func_body = generalize_check_lines( 1996 func_body, 1997 False, 1998 vars_seen, 1999 global_vars_seen, 2000 preserve_names, 2001 original_check_lines=original_check_lines.get(checkprefix), 2002 ) 2003 2004 # This could be selectively enabled with an optional invocation argument. 2005 # Disabled for now: better to check everything. Be safe rather than sorry. 2006 2007 # Handle the first line of the function body as a special case because 2008 # it's often just noise (a useless asm comment or entry label). 2009 # if func_body[0].startswith("#") or func_body[0].startswith("entry:"): 2010 # is_blank_line = True 2011 # else: 2012 # output_lines.append('%s %s: %s' % (comment_marker, checkprefix, func_body[0])) 2013 # is_blank_line = False 2014 2015 is_blank_line = False 2016 2017 for func_line in func_body: 2018 if func_line.strip() == "": 2019 is_blank_line = True 2020 continue 2021 # Do not waste time checking IR comments. 2022 func_line = SCRUB_IR_COMMENT_RE.sub(r"", func_line) 2023 2024 # Skip blank lines instead of checking them. 2025 if is_blank_line: 2026 output_lines.append( 2027 "{} {}: {}".format( 2028 comment_marker, checkprefix, func_line 2029 ) 2030 ) 2031 else: 2032 check_suffix = "-NEXT" if not is_filtered else "" 2033 output_lines.append( 2034 "{} {}{}: {}".format( 2035 comment_marker, checkprefix, check_suffix, func_line 2036 ) 2037 ) 2038 is_blank_line = False 2039 2040 # Add space between different check prefixes and also before the first 2041 # line of code in the test function. 2042 output_lines.append(comment_marker) 2043 2044 # Remember new global variables we have not seen before 2045 for key in global_vars_seen: 2046 if key not in global_vars_seen_before: 2047 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 2048 break 2049 return printed_prefixes 2050 2051 2052def add_ir_checks( 2053 output_lines, 2054 comment_marker, 2055 prefix_list, 2056 func_dict, 2057 func_name, 2058 preserve_names, 2059 function_sig, 2060 version, 2061 global_vars_seen_dict, 2062 is_filtered, 2063 original_check_lines={}, 2064): 2065 # Label format is based on IR string. 2066 if function_sig and version > 1: 2067 function_def_regex = "define %s" 2068 elif function_sig: 2069 function_def_regex = "define {{[^@]+}}%s" 2070 else: 2071 function_def_regex = "%s" 2072 check_label_format = "{} %s-LABEL: {}@%s%s%s".format( 2073 comment_marker, function_def_regex 2074 ) 2075 return add_checks( 2076 output_lines, 2077 comment_marker, 2078 prefix_list, 2079 func_dict, 2080 func_name, 2081 check_label_format, 2082 False, 2083 False, 2084 version, 2085 global_vars_seen_dict, 2086 is_filtered, 2087 preserve_names, 2088 original_check_lines=original_check_lines, 2089 ) 2090 2091 2092def add_analyze_checks( 2093 output_lines, comment_marker, prefix_list, func_dict, func_name, is_filtered 2094): 2095 check_label_format = "{} %s-LABEL: '%s%s%s%s'".format(comment_marker) 2096 global_vars_seen_dict = {} 2097 return add_checks( 2098 output_lines, 2099 comment_marker, 2100 prefix_list, 2101 func_dict, 2102 func_name, 2103 check_label_format, 2104 False, 2105 True, 2106 1, 2107 global_vars_seen_dict, 2108 is_filtered, 2109 ) 2110 2111 2112def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes): 2113 for nameless_value in itertools.chain(global_nameless_values, asm_nameless_values): 2114 if nameless_value.global_ir_rhs_regexp is None: 2115 continue 2116 2117 lhs_re_str = nameless_value.ir_prefix + nameless_value.ir_regexp 2118 rhs_re_str = nameless_value.global_ir_rhs_regexp 2119 2120 global_ir_value_re_str = r"^" + lhs_re_str + r"\s=\s" + rhs_re_str + r"$" 2121 global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M)) 2122 lines = [] 2123 for m in global_ir_value_re.finditer(raw_tool_output): 2124 # Attach the substring's start index so that CHECK lines 2125 # can be sorted properly even if they are matched by different nameless values. 2126 # This is relevant for GLOB and GLOBNAMED since they may appear interlaced. 2127 lines.append((m.start(), m.group(0))) 2128 2129 for prefix in prefixes: 2130 if glob_val_dict[prefix] is None: 2131 continue 2132 if nameless_value.check_prefix in glob_val_dict[prefix]: 2133 if lines == glob_val_dict[prefix][nameless_value.check_prefix]: 2134 continue 2135 if prefix == prefixes[-1]: 2136 warn("Found conflicting asm under the same prefix: %r!" % (prefix,)) 2137 else: 2138 glob_val_dict[prefix][nameless_value.check_prefix] = None 2139 continue 2140 glob_val_dict[prefix][nameless_value.check_prefix] = lines 2141 2142 2143def filter_globals_according_to_preference( 2144 global_val_lines_w_index, global_vars_seen, nameless_value, global_check_setting 2145): 2146 if global_check_setting == "none": 2147 return [] 2148 if global_check_setting == "all": 2149 return global_val_lines_w_index 2150 assert global_check_setting == "smart" 2151 2152 if nameless_value.check_key == "#": 2153 # attribute sets are usually better checked by --check-attributes 2154 return [] 2155 2156 def extract(line, nv): 2157 p = ( 2158 "^" 2159 + nv.ir_prefix 2160 + "(" 2161 + nv.ir_regexp 2162 + ") = (" 2163 + nv.global_ir_rhs_regexp 2164 + ")" 2165 ) 2166 match = re.match(p, line) 2167 return (match.group(1), re.findall(nv.ir_regexp, match.group(2))) 2168 2169 transitively_visible = set() 2170 contains_refs_to = {} 2171 2172 def add(var): 2173 nonlocal transitively_visible 2174 nonlocal contains_refs_to 2175 if var in transitively_visible: 2176 return 2177 transitively_visible.add(var) 2178 if not var in contains_refs_to: 2179 return 2180 for x in contains_refs_to[var]: 2181 add(x) 2182 2183 for i, line in global_val_lines_w_index: 2184 (var, refs) = extract(line, nameless_value) 2185 contains_refs_to[var] = refs 2186 for var, check_key in global_vars_seen: 2187 if check_key != nameless_value.check_key: 2188 continue 2189 add(var) 2190 return [ 2191 (i, line) 2192 for i, line in global_val_lines_w_index 2193 if extract(line, nameless_value)[0] in transitively_visible 2194 ] 2195 2196 2197METADATA_FILTERS = [ 2198 ( 2199 r"(?<=\")(.+ )?(\w+ version )[\d.]+(?:[^\" ]*)(?: \([^)]+\))?", 2200 r"{{.*}}\2{{.*}}", 2201 ), # preface with glob also, to capture optional CLANG_VENDOR 2202 (r'(!DIFile\(filename: ".+", directory: )".+"', r"\1{{.*}}"), 2203] 2204METADATA_FILTERS_RE = [(re.compile(f), r) for (f, r) in METADATA_FILTERS] 2205 2206 2207def filter_unstable_metadata(line): 2208 for f, replacement in METADATA_FILTERS_RE: 2209 line = f.sub(replacement, line) 2210 return line 2211 2212 2213def flush_current_checks(output_lines, new_lines_w_index, comment_marker): 2214 if not new_lines_w_index: 2215 return 2216 output_lines.append(comment_marker + SEPARATOR) 2217 new_lines_w_index.sort() 2218 for _, line in new_lines_w_index: 2219 output_lines.append(line) 2220 new_lines_w_index.clear() 2221 2222 2223def add_global_checks( 2224 glob_val_dict, 2225 comment_marker, 2226 prefix_list, 2227 output_lines, 2228 global_vars_seen_dict, 2229 preserve_names, 2230 is_before_functions, 2231 global_check_setting, 2232): 2233 printed_prefixes = set() 2234 output_lines_loc = {} # Allows GLOB and GLOBNAMED to be sorted correctly 2235 for nameless_value in global_nameless_values: 2236 if nameless_value.is_before_functions != is_before_functions: 2237 continue 2238 for p in prefix_list: 2239 global_vars_seen = {} 2240 checkprefixes = p[0] 2241 if checkprefixes is None: 2242 continue 2243 for checkprefix in checkprefixes: 2244 if checkprefix in global_vars_seen_dict: 2245 global_vars_seen.update(global_vars_seen_dict[checkprefix]) 2246 else: 2247 global_vars_seen_dict[checkprefix] = {} 2248 if (checkprefix, nameless_value.check_prefix) in printed_prefixes: 2249 break 2250 if not glob_val_dict[checkprefix]: 2251 continue 2252 if nameless_value.check_prefix not in glob_val_dict[checkprefix]: 2253 continue 2254 if not glob_val_dict[checkprefix][nameless_value.check_prefix]: 2255 continue 2256 2257 check_lines = [] 2258 global_vars_seen_before = [key for key in global_vars_seen.keys()] 2259 lines_w_index = glob_val_dict[checkprefix][nameless_value.check_prefix] 2260 lines_w_index = filter_globals_according_to_preference( 2261 lines_w_index, 2262 global_vars_seen_before, 2263 nameless_value, 2264 global_check_setting, 2265 ) 2266 for i, line in lines_w_index: 2267 if _global_value_regex: 2268 matched = False 2269 for regex in _global_value_regex: 2270 if re.match("^@" + regex + " = ", line) or re.match( 2271 "^!" + regex + " = ", line 2272 ): 2273 matched = True 2274 break 2275 if not matched: 2276 continue 2277 new_line = generalize_global_check_line( 2278 line, preserve_names, global_vars_seen 2279 ) 2280 new_line = filter_unstable_metadata(new_line) 2281 check_line = "%s %s: %s" % (comment_marker, checkprefix, new_line) 2282 check_lines.append((i, check_line)) 2283 if not check_lines: 2284 continue 2285 2286 if not checkprefix in output_lines_loc: 2287 output_lines_loc[checkprefix] = [] 2288 if not nameless_value.interlaced_with_previous: 2289 flush_current_checks( 2290 output_lines, output_lines_loc[checkprefix], comment_marker 2291 ) 2292 for check_line in check_lines: 2293 output_lines_loc[checkprefix].append(check_line) 2294 2295 printed_prefixes.add((checkprefix, nameless_value.check_prefix)) 2296 2297 # Remembe new global variables we have not seen before 2298 for key in global_vars_seen: 2299 if key not in global_vars_seen_before: 2300 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 2301 break 2302 2303 if printed_prefixes: 2304 for p in prefix_list: 2305 if p[0] is None: 2306 continue 2307 for checkprefix in p[0]: 2308 if checkprefix not in output_lines_loc: 2309 continue 2310 flush_current_checks( 2311 output_lines, output_lines_loc[checkprefix], comment_marker 2312 ) 2313 break 2314 output_lines.append(comment_marker + SEPARATOR) 2315 return printed_prefixes 2316 2317 2318def check_prefix(prefix): 2319 if not PREFIX_RE.match(prefix): 2320 hint = "" 2321 if "," in prefix: 2322 hint = " Did you mean '--check-prefixes=" + prefix + "'?" 2323 warn( 2324 ( 2325 "Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." 2326 + hint 2327 ) 2328 % (prefix) 2329 ) 2330 2331 2332def get_check_prefixes(filecheck_cmd): 2333 check_prefixes = [ 2334 item 2335 for m in CHECK_PREFIX_RE.finditer(filecheck_cmd) 2336 for item in m.group(1).split(",") 2337 ] 2338 if not check_prefixes: 2339 check_prefixes = ["CHECK"] 2340 return check_prefixes 2341 2342 2343def verify_filecheck_prefixes(fc_cmd): 2344 fc_cmd_parts = fc_cmd.split() 2345 for part in fc_cmd_parts: 2346 if "check-prefix=" in part: 2347 prefix = part.split("=", 1)[1] 2348 check_prefix(prefix) 2349 elif "check-prefixes=" in part: 2350 prefixes = part.split("=", 1)[1].split(",") 2351 for prefix in prefixes: 2352 check_prefix(prefix) 2353 if prefixes.count(prefix) > 1: 2354 warn( 2355 "Supplied prefix '%s' is not unique in the prefix list." 2356 % (prefix,) 2357 ) 2358 2359 2360def get_autogennote_suffix(parser, args): 2361 autogenerated_note_args = "" 2362 for action in parser._actions: 2363 if not hasattr(args, action.dest): 2364 continue # Ignore options such as --help that aren't included in args 2365 # Ignore parameters such as paths to the binary or the list of tests 2366 if action.dest in ( 2367 "tests", 2368 "update_only", 2369 "tool_binary", 2370 "opt_binary", 2371 "llc_binary", 2372 "clang", 2373 "opt", 2374 "llvm_bin", 2375 "verbose", 2376 "force_update", 2377 "reset_variable_names", 2378 ): 2379 continue 2380 value = getattr(args, action.dest) 2381 if action.dest == "check_globals": 2382 default_value = "none" if args.version < 4 else "smart" 2383 if value == default_value: 2384 continue 2385 autogenerated_note_args += action.option_strings[0] + " " 2386 if args.version < 4 and value == "all": 2387 continue 2388 autogenerated_note_args += "%s " % value 2389 continue 2390 if action.const is not None: # action stores a constant (usually True/False) 2391 # Skip actions with different constant values (this happens with boolean 2392 # --foo/--no-foo options) 2393 if value != action.const: 2394 continue 2395 if parser.get_default(action.dest) == value: 2396 continue # Don't add default values 2397 if action.dest == "function_signature" and args.version >= 2: 2398 continue # Enabled by default in version 2 2399 if action.dest == "filters": 2400 # Create a separate option for each filter element. The value is a list 2401 # of Filter objects. 2402 for elem in value: 2403 opt_name = "filter-out" if elem.is_filter_out else "filter" 2404 opt_value = elem.pattern() 2405 new_arg = '--%s "%s" ' % (opt_name, opt_value.strip('"')) 2406 if new_arg not in autogenerated_note_args: 2407 autogenerated_note_args += new_arg 2408 else: 2409 autogenerated_note_args += action.option_strings[0] + " " 2410 if action.const is None: # action takes a parameter 2411 if action.nargs == "+": 2412 value = " ".join(map(lambda v: '"' + v.strip('"') + '"', value)) 2413 autogenerated_note_args += "%s " % value 2414 if autogenerated_note_args: 2415 autogenerated_note_args = " %s %s" % ( 2416 UTC_ARGS_KEY, 2417 autogenerated_note_args[:-1], 2418 ) 2419 return autogenerated_note_args 2420 2421 2422def check_for_command(line, parser, args, argv, argparse_callback): 2423 cmd_m = UTC_ARGS_CMD.match(line) 2424 if cmd_m: 2425 for option in shlex.split(cmd_m.group("cmd").strip()): 2426 if option: 2427 argv.append(option) 2428 args = parse_args(parser, filter(lambda arg: arg not in args.tests, argv)) 2429 if argparse_callback is not None: 2430 argparse_callback(args) 2431 return args, argv 2432 2433 2434def find_arg_in_test(test_info, get_arg_to_check, arg_string, is_global): 2435 result = get_arg_to_check(test_info.args) 2436 if not result and is_global: 2437 # See if this has been specified via UTC_ARGS. This is a "global" option 2438 # that affects the entire generation of test checks. If it exists anywhere 2439 # in the test, apply it to everything. 2440 saw_line = False 2441 for line_info in test_info.ro_iterlines(): 2442 line = line_info.line 2443 if not line.startswith(";") and line.strip() != "": 2444 saw_line = True 2445 result = get_arg_to_check(line_info.args) 2446 if result: 2447 if warn and saw_line: 2448 # We saw the option after already reading some test input lines. 2449 # Warn about it. 2450 print( 2451 "WARNING: Found {} in line following test start: ".format( 2452 arg_string 2453 ) 2454 + line, 2455 file=sys.stderr, 2456 ) 2457 print( 2458 "WARNING: Consider moving {} to top of file".format(arg_string), 2459 file=sys.stderr, 2460 ) 2461 break 2462 return result 2463 2464 2465def dump_input_lines(output_lines, test_info, prefix_set, comment_string): 2466 for input_line_info in test_info.iterlines(output_lines): 2467 line = input_line_info.line 2468 args = input_line_info.args 2469 if line.strip() == comment_string: 2470 continue 2471 if line.strip() == comment_string + SEPARATOR: 2472 continue 2473 if line.lstrip().startswith(comment_string): 2474 m = CHECK_RE.match(line) 2475 if m and m.group(1) in prefix_set: 2476 continue 2477 output_lines.append(line.rstrip("\n")) 2478 2479 2480def add_checks_at_end( 2481 output_lines, prefix_list, func_order, comment_string, check_generator 2482): 2483 added = set() 2484 generated_prefixes = set() 2485 for prefix in prefix_list: 2486 prefixes = prefix[0] 2487 tool_args = prefix[1] 2488 for prefix in prefixes: 2489 for func in func_order[prefix]: 2490 # The func order can contain the same functions multiple times. 2491 # If we see one again we are done. 2492 if (func, prefix) in added: 2493 continue 2494 if added: 2495 output_lines.append(comment_string) 2496 2497 # The add_*_checks routines expect a run list whose items are 2498 # tuples that have a list of prefixes as their first element and 2499 # tool command args string as their second element. They output 2500 # checks for each prefix in the list of prefixes. By doing so, it 2501 # implicitly assumes that for each function every run line will 2502 # generate something for that function. That is not the case for 2503 # generated functions as some run lines might not generate them 2504 # (e.g. -fopenmp vs. no -fopenmp). 2505 # 2506 # Therefore, pass just the prefix we're interested in. This has 2507 # the effect of generating all of the checks for functions of a 2508 # single prefix before moving on to the next prefix. So checks 2509 # are ordered by prefix instead of by function as in "normal" 2510 # mode. 2511 for generated_prefix in check_generator( 2512 output_lines, [([prefix], tool_args)], func 2513 ): 2514 added.add((func, generated_prefix)) 2515 generated_prefixes.add(generated_prefix) 2516 return generated_prefixes 2517