1import argparse 2import bisect 3import collections 4import copy 5import glob 6import os 7import re 8import subprocess 9import sys 10import shlex 11 12from typing import List, Mapping, Set 13 14##### Common utilities for update_*test_checks.py 15 16 17_verbose = False 18_prefix_filecheck_ir_name = "" 19 20""" 21Version changelog: 22 231: Initial version, used by tests that don't specify --version explicitly. 242: --function-signature is now enabled by default and also checks return 25 type/attributes. 263: Opening parenthesis of function args is kept on the first LABEL line 27 in case arguments are split to a separate SAME line. 284: --check-globals now has a third option ('smart'). The others are now called 29 'none' and 'all'. 'smart' is the default. 305: Basic block labels are matched by FileCheck expressions 31""" 32DEFAULT_VERSION = 5 33 34 35SUPPORTED_ANALYSES = { 36 "Branch Probability Analysis", 37 "Cost Model Analysis", 38 "Loop Access Analysis", 39 "Scalar Evolution Analysis", 40} 41 42 43class Regex(object): 44 """Wrap a compiled regular expression object to allow deep copy of a regexp. 45 This is required for the deep copy done in do_scrub. 46 47 """ 48 49 def __init__(self, regex): 50 self.regex = regex 51 52 def __deepcopy__(self, memo): 53 result = copy.copy(self) 54 result.regex = self.regex 55 return result 56 57 def search(self, line): 58 return self.regex.search(line) 59 60 def sub(self, repl, line): 61 return self.regex.sub(repl, line) 62 63 def pattern(self): 64 return self.regex.pattern 65 66 def flags(self): 67 return self.regex.flags 68 69 70class Filter(Regex): 71 """Augment a Regex object with a flag indicating whether a match should be 72 added (!is_filter_out) or removed (is_filter_out) from the generated checks. 73 74 """ 75 76 def __init__(self, regex, is_filter_out): 77 super(Filter, self).__init__(regex) 78 self.is_filter_out = is_filter_out 79 80 def __deepcopy__(self, memo): 81 result = copy.deepcopy(super(Filter, self), memo) 82 result.is_filter_out = copy.deepcopy(self.is_filter_out, memo) 83 return result 84 85 86def parse_commandline_args(parser): 87 class RegexAction(argparse.Action): 88 """Add a regular expression option value to a list of regular expressions. 89 This compiles the expression, wraps it in a Regex and adds it to the option 90 value list.""" 91 92 def __init__(self, option_strings, dest, nargs=None, **kwargs): 93 if nargs is not None: 94 raise ValueError("nargs not allowed") 95 super(RegexAction, self).__init__(option_strings, dest, **kwargs) 96 97 def do_call(self, namespace, values, flags): 98 value_list = getattr(namespace, self.dest) 99 if value_list is None: 100 value_list = [] 101 102 try: 103 value_list.append(Regex(re.compile(values, flags))) 104 except re.error as error: 105 raise ValueError( 106 "{}: Invalid regular expression '{}' ({})".format( 107 option_string, error.pattern, error.msg 108 ) 109 ) 110 111 setattr(namespace, self.dest, value_list) 112 113 def __call__(self, parser, namespace, values, option_string=None): 114 self.do_call(namespace, values, 0) 115 116 class FilterAction(RegexAction): 117 """Add a filter to a list of filter option values.""" 118 119 def __init__(self, option_strings, dest, nargs=None, **kwargs): 120 super(FilterAction, self).__init__(option_strings, dest, nargs, **kwargs) 121 122 def __call__(self, parser, namespace, values, option_string=None): 123 super(FilterAction, self).__call__(parser, namespace, values, option_string) 124 125 value_list = getattr(namespace, self.dest) 126 127 is_filter_out = option_string == "--filter-out" 128 129 value_list[-1] = Filter(value_list[-1].regex, is_filter_out) 130 131 setattr(namespace, self.dest, value_list) 132 133 filter_group = parser.add_argument_group( 134 "filtering", 135 """Filters are applied to each output line according to the order given. The 136 first matching filter terminates filter processing for that current line.""", 137 ) 138 139 filter_group.add_argument( 140 "--filter", 141 action=FilterAction, 142 dest="filters", 143 metavar="REGEX", 144 help="Only include lines matching REGEX (may be specified multiple times)", 145 ) 146 filter_group.add_argument( 147 "--filter-out", 148 action=FilterAction, 149 dest="filters", 150 metavar="REGEX", 151 help="Exclude lines matching REGEX", 152 ) 153 154 parser.add_argument( 155 "--include-generated-funcs", 156 action="store_true", 157 help="Output checks for functions not in source", 158 ) 159 parser.add_argument( 160 "-v", "--verbose", action="store_true", help="Show verbose output" 161 ) 162 parser.add_argument( 163 "-u", 164 "--update-only", 165 action="store_true", 166 help="Only update test if it was already autogened", 167 ) 168 parser.add_argument( 169 "--force-update", 170 action="store_true", 171 help="Update test even if it was autogened by a different script", 172 ) 173 parser.add_argument( 174 "--enable", 175 action="store_true", 176 dest="enabled", 177 default=True, 178 help="Activate CHECK line generation from this point forward", 179 ) 180 parser.add_argument( 181 "--disable", 182 action="store_false", 183 dest="enabled", 184 help="Deactivate CHECK line generation from this point forward", 185 ) 186 parser.add_argument( 187 "--replace-value-regex", 188 nargs="+", 189 default=[], 190 help="List of regular expressions to replace matching value names", 191 ) 192 parser.add_argument( 193 "--prefix-filecheck-ir-name", 194 default="", 195 help="Add a prefix to FileCheck IR value names to avoid conflicts with scripted names", 196 ) 197 parser.add_argument( 198 "--global-value-regex", 199 nargs="+", 200 default=[], 201 help="List of regular expressions that a global value declaration must match to generate a check (has no effect if checking globals is not enabled)", 202 ) 203 parser.add_argument( 204 "--global-hex-value-regex", 205 nargs="+", 206 default=[], 207 help="List of regular expressions such that, for matching global value declarations, literal integer values should be encoded in hex in the associated FileCheck directives", 208 ) 209 # FIXME: in 3.9, we can use argparse.BooleanOptionalAction. At that point, 210 # we need to rename the flag to just -generate-body-for-unused-prefixes. 211 parser.add_argument( 212 "--no-generate-body-for-unused-prefixes", 213 action="store_false", 214 dest="gen_unused_prefix_body", 215 default=True, 216 help="Generate a function body that always matches for unused prefixes. This is useful when unused prefixes are desired, and it avoids needing to annotate each FileCheck as allowing them.", 217 ) 218 # This is the default when regenerating existing tests. The default when 219 # generating new tests is determined by DEFAULT_VERSION. 220 parser.add_argument( 221 "--version", type=int, default=1, help="The version of output format" 222 ) 223 args = parser.parse_args() 224 # TODO: This should not be handled differently from the other options 225 global _verbose, _global_value_regex, _global_hex_value_regex 226 _verbose = args.verbose 227 _global_value_regex = args.global_value_regex 228 _global_hex_value_regex = args.global_hex_value_regex 229 return args 230 231 232def parse_args(parser, argv): 233 args = parser.parse_args(argv) 234 if args.version >= 2: 235 args.function_signature = True 236 # TODO: This should not be handled differently from the other options 237 global _verbose, _global_value_regex, _global_hex_value_regex 238 _verbose = args.verbose 239 _global_value_regex = args.global_value_regex 240 _global_hex_value_regex = args.global_hex_value_regex 241 if "check_globals" in args and args.check_globals == "default": 242 args.check_globals = "none" if args.version < 4 else "smart" 243 return args 244 245 246class InputLineInfo(object): 247 def __init__(self, line, line_number, args, argv): 248 self.line = line 249 self.line_number = line_number 250 self.args = args 251 self.argv = argv 252 253 254class TestInfo(object): 255 def __init__( 256 self, 257 test, 258 parser, 259 script_name, 260 input_lines, 261 args, 262 argv, 263 comment_prefix, 264 argparse_callback, 265 ): 266 self.parser = parser 267 self.argparse_callback = argparse_callback 268 self.path = test 269 self.args = args 270 if args.prefix_filecheck_ir_name: 271 global _prefix_filecheck_ir_name 272 _prefix_filecheck_ir_name = args.prefix_filecheck_ir_name 273 self.argv = argv 274 self.input_lines = input_lines 275 self.run_lines = find_run_lines(test, self.input_lines) 276 self.comment_prefix = comment_prefix 277 if self.comment_prefix is None: 278 if self.path.endswith(".mir") or self.path.endswith(".txt"): 279 self.comment_prefix = "#" 280 elif self.path.endswith(".s"): 281 self.comment_prefix = "//" 282 else: 283 self.comment_prefix = ";" 284 self.autogenerated_note_prefix = self.comment_prefix + " " + UTC_ADVERT 285 self.test_autogenerated_note = self.autogenerated_note_prefix + script_name 286 self.test_autogenerated_note += get_autogennote_suffix(parser, self.args) 287 self.test_unused_note = ( 288 self.comment_prefix + self.comment_prefix + " " + UNUSED_NOTE 289 ) 290 291 def ro_iterlines(self): 292 for line_num, input_line in enumerate(self.input_lines): 293 args, argv = check_for_command( 294 input_line, self.parser, self.args, self.argv, self.argparse_callback 295 ) 296 yield InputLineInfo(input_line, line_num, args, argv) 297 298 def iterlines(self, output_lines): 299 output_lines.append(self.test_autogenerated_note) 300 for line_info in self.ro_iterlines(): 301 input_line = line_info.line 302 # Discard any previous script advertising. 303 if input_line.startswith(self.autogenerated_note_prefix): 304 continue 305 self.args = line_info.args 306 self.argv = line_info.argv 307 if not self.args.enabled: 308 output_lines.append(input_line) 309 continue 310 yield line_info 311 312 def get_checks_for_unused_prefixes( 313 self, run_list, used_prefixes: List[str] 314 ) -> List[str]: 315 run_list = [element for element in run_list if element[0] is not None] 316 unused_prefixes = set( 317 [prefix for sublist in run_list for prefix in sublist[0]] 318 ).difference(set(used_prefixes)) 319 320 ret = [] 321 if not unused_prefixes: 322 return ret 323 ret.append(self.test_unused_note) 324 for unused in sorted(unused_prefixes): 325 ret.append( 326 "{comment} {prefix}: {match_everything}".format( 327 comment=self.comment_prefix, 328 prefix=unused, 329 match_everything=r"""{{.*}}""", 330 ) 331 ) 332 return ret 333 334 335def itertests( 336 test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None 337): 338 for pattern in test_patterns: 339 # On Windows we must expand the patterns ourselves. 340 tests_list = glob.glob(pattern) 341 if not tests_list: 342 warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,)) 343 continue 344 for test in tests_list: 345 with open(test) as f: 346 input_lines = [l.rstrip() for l in f] 347 first_line = input_lines[0] if input_lines else "" 348 if UTC_AVOID in first_line: 349 warn("Skipping test that must not be autogenerated: " + test) 350 continue 351 is_regenerate = UTC_ADVERT in first_line 352 353 # If we're generating a new test, set the default version to the latest. 354 argv = sys.argv[:] 355 if not is_regenerate: 356 argv.insert(1, "--version=" + str(DEFAULT_VERSION)) 357 358 args = parse_args(parser, argv[1:]) 359 if argparse_callback is not None: 360 argparse_callback(args) 361 if is_regenerate: 362 if script_name not in first_line and not args.force_update: 363 warn( 364 "Skipping test which wasn't autogenerated by " + script_name, 365 test, 366 ) 367 continue 368 args, argv = check_for_command( 369 first_line, parser, args, argv, argparse_callback 370 ) 371 elif args.update_only: 372 assert UTC_ADVERT not in first_line 373 warn("Skipping test which isn't autogenerated: " + test) 374 continue 375 final_input_lines = [] 376 for l in input_lines: 377 if UNUSED_NOTE in l: 378 break 379 final_input_lines.append(l) 380 yield TestInfo( 381 test, 382 parser, 383 script_name, 384 final_input_lines, 385 args, 386 argv, 387 comment_prefix, 388 argparse_callback, 389 ) 390 391 392def should_add_line_to_output( 393 input_line, 394 prefix_set, 395 *, 396 skip_global_checks=False, 397 skip_same_checks=False, 398 comment_marker=";", 399): 400 # Skip any blank comment lines in the IR. 401 if not skip_global_checks and input_line.strip() == comment_marker: 402 return False 403 # Skip a special double comment line we use as a separator. 404 if input_line.strip() == comment_marker + SEPARATOR: 405 return False 406 # Skip any blank lines in the IR. 407 # if input_line.strip() == '': 408 # return False 409 # And skip any CHECK lines. We're building our own. 410 m = CHECK_RE.match(input_line) 411 if m and m.group(1) in prefix_set: 412 if skip_same_checks and CHECK_SAME_RE.match(input_line): 413 # The previous CHECK line was removed, so don't leave this dangling 414 return False 415 if skip_global_checks: 416 # Skip checks only if they are of global value definitions 417 global_ir_value_re = re.compile(r"(\[\[|@)", flags=(re.M)) 418 is_global = global_ir_value_re.search(input_line) 419 return not is_global 420 return False 421 422 return True 423 424 425def collect_original_check_lines(ti: TestInfo, prefix_set: set): 426 """ 427 Collect pre-existing check lines into a dictionary `result` which is 428 returned. 429 430 result[func_name][prefix] is filled with a list of right-hand-sides of check 431 lines. 432 """ 433 result = collections.defaultdict(lambda: {}) 434 435 current_prefix = None 436 current_function = None 437 for input_line_info in ti.ro_iterlines(): 438 input_line = input_line_info.line 439 if input_line.lstrip().startswith(";"): 440 m = CHECK_RE.match(input_line) 441 if m is not None: 442 prefix = m.group(1) 443 check_kind = m.group(2) 444 line = input_line[m.end() :].strip() 445 446 if prefix != current_prefix: 447 current_function = None 448 current_prefix = None 449 450 if check_kind not in ["LABEL", "SAME"]: 451 if current_function is not None: 452 current_function.append(line) 453 continue 454 455 if check_kind == "SAME": 456 continue 457 458 if check_kind == "LABEL": 459 m = IR_FUNCTION_LABEL_RE.match(line) 460 if m is not None: 461 func_name = m.group(1) 462 if ( 463 ti.args.function is not None 464 and func_name != ti.args.function 465 ): 466 # When filtering on a specific function, skip all others. 467 continue 468 469 current_prefix = prefix 470 current_function = result[func_name][prefix] = [] 471 continue 472 473 current_function = None 474 475 return result 476 477 478# Perform lit-like substitutions 479def getSubstitutions(sourcepath): 480 sourcedir = os.path.dirname(sourcepath) 481 return [ 482 ("%s", sourcepath), 483 ("%S", sourcedir), 484 ("%p", sourcedir), 485 ("%{pathsep}", os.pathsep), 486 ] 487 488 489def applySubstitutions(s, substitutions): 490 for a, b in substitutions: 491 s = s.replace(a, b) 492 return s 493 494 495# Invoke the tool that is being tested. 496def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False): 497 with open(ir) as ir_file: 498 substitutions = getSubstitutions(ir) 499 500 # TODO Remove the str form which is used by update_test_checks.py and 501 # update_llc_test_checks.py 502 # The safer list form is used by update_cc_test_checks.py 503 if preprocess_cmd: 504 # Allow pre-processing the IR file (e.g. using sed): 505 assert isinstance( 506 preprocess_cmd, str 507 ) # TODO: use a list instead of using shell 508 preprocess_cmd = applySubstitutions(preprocess_cmd, substitutions).strip() 509 if verbose: 510 print( 511 "Pre-processing input file: ", 512 ir, 513 " with command '", 514 preprocess_cmd, 515 "'", 516 sep="", 517 file=sys.stderr, 518 ) 519 pp = subprocess.Popen( 520 preprocess_cmd, 521 shell=True, 522 stdin=subprocess.DEVNULL, 523 stdout=subprocess.PIPE, 524 ) 525 ir_file = pp.stdout 526 527 if isinstance(cmd_args, list): 528 args = [applySubstitutions(a, substitutions) for a in cmd_args] 529 stdout = subprocess.check_output([exe] + args, stdin=ir_file) 530 else: 531 stdout = subprocess.check_output( 532 exe + " " + applySubstitutions(cmd_args, substitutions), 533 shell=True, 534 stdin=ir_file, 535 ) 536 if sys.version_info[0] > 2: 537 # FYI, if you crashed here with a decode error, your run line probably 538 # results in bitcode or other binary format being written to the pipe. 539 # For an opt test, you probably want to add -S or -disable-output. 540 stdout = stdout.decode() 541 # Fix line endings to unix CR style. 542 return stdout.replace("\r\n", "\n") 543 544 545##### LLVM IR parser 546RUN_LINE_RE = re.compile(r"^\s*(?://|[;#])\s*RUN:\s*(.*)$") 547CHECK_PREFIX_RE = re.compile(r"--?check-prefix(?:es)?[= ](\S+)") 548PREFIX_RE = re.compile("^[a-zA-Z0-9_-]+$") 549CHECK_RE = re.compile( 550 r"^\s*(?://|[;#])\s*([^:]+?)(?:-(NEXT|NOT|DAG|LABEL|SAME|EMPTY))?:" 551) 552CHECK_SAME_RE = re.compile(r"^\s*(?://|[;#])\s*([^:]+?)(?:-SAME)?:") 553 554UTC_ARGS_KEY = "UTC_ARGS:" 555UTC_ARGS_CMD = re.compile(r".*" + UTC_ARGS_KEY + r"\s*(?P<cmd>.*)\s*$") 556UTC_ADVERT = "NOTE: Assertions have been autogenerated by " 557UTC_AVOID = "NOTE: Do not autogenerate" 558UNUSED_NOTE = "NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:" 559 560OPT_FUNCTION_RE = re.compile( 561 r"^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s():,]+?))?\s*define\s+(?P<funcdef_attrs_and_ret>[^@]*)@(?P<func>[\w.$-]+?)\s*" 562 r"(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P<body>.*?)^\}$", 563 flags=(re.M | re.S), 564) 565 566ANALYZE_FUNCTION_RE = re.compile( 567 r"^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.$-]+?)\':" 568 r"\s*\n(?P<body>.*)$", 569 flags=(re.X | re.S), 570) 571 572LOOP_PASS_DEBUG_RE = re.compile( 573 r"^\s*\'(?P<func>[\w.$-]+?)\'[^\n]*" r"\s*\n(?P<body>.*)$", flags=(re.X | re.S) 574) 575 576IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(') 577IR_FUNCTION_LABEL_RE = re.compile( 578 r'^\s*(?:define\s+(?:internal\s+)?[^@]*)?@"?([\w.$-]+)"?\s*\(' 579) 580TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$') 581TRIPLE_ARG_RE = re.compile(r"-m?triple[= ]([^ ]+)") 582MARCH_ARG_RE = re.compile(r"-march[= ]([^ ]+)") 583DEBUG_ONLY_ARG_RE = re.compile(r"-debug-only[= ]([^ ]+)") 584 585SCRUB_LEADING_WHITESPACE_RE = re.compile(r"^(\s+)") 586SCRUB_WHITESPACE_RE = re.compile(r"(?!^(| \w))[ \t]+", flags=re.M) 587SCRUB_PRESERVE_LEADING_WHITESPACE_RE = re.compile(r"((?!^)[ \t]*(\S))[ \t]+") 588SCRUB_TRAILING_WHITESPACE_RE = re.compile(r"[ \t]+$", flags=re.M) 589SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE 590SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile( 591 r"([ \t]|(#[0-9]+))+$", flags=re.M 592) 593SCRUB_KILL_COMMENT_RE = re.compile(r"^ *#+ +kill:.*\n") 594SCRUB_LOOP_COMMENT_RE = re.compile( 595 r"# =>This Inner Loop Header:.*|# in Loop:.*", flags=re.M 596) 597SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r"(?<=\S)+[ \t]*#$", flags=re.M) 598 599SEPARATOR = "." 600 601 602def error(msg, test_file=None): 603 if test_file: 604 msg = "{}: {}".format(msg, test_file) 605 print("ERROR: {}".format(msg), file=sys.stderr) 606 607 608def warn(msg, test_file=None): 609 if test_file: 610 msg = "{}: {}".format(msg, test_file) 611 print("WARNING: {}".format(msg), file=sys.stderr) 612 613 614def debug(*args, **kwargs): 615 # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs): 616 if "file" not in kwargs: 617 kwargs["file"] = sys.stderr 618 if _verbose: 619 print(*args, **kwargs) 620 621 622def find_run_lines(test, lines): 623 debug("Scanning for RUN lines in test file:", test) 624 raw_lines = [m.group(1) for m in [RUN_LINE_RE.match(l) for l in lines] if m] 625 run_lines = [raw_lines[0]] if len(raw_lines) > 0 else [] 626 for l in raw_lines[1:]: 627 if run_lines[-1].endswith("\\"): 628 run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l 629 else: 630 run_lines.append(l) 631 debug("Found {} RUN lines in {}:".format(len(run_lines), test)) 632 for l in run_lines: 633 debug(" RUN: {}".format(l)) 634 return run_lines 635 636 637def get_triple_from_march(march): 638 triples = { 639 "amdgcn": "amdgcn", 640 "r600": "r600", 641 "mips": "mips", 642 "nvptx64": "nvptx64", 643 "sparc": "sparc", 644 "hexagon": "hexagon", 645 "ve": "ve", 646 } 647 for prefix, triple in triples.items(): 648 if march.startswith(prefix): 649 return triple 650 print("Cannot find a triple. Assume 'x86'", file=sys.stderr) 651 return "x86" 652 653 654def apply_filters(line, filters): 655 has_filter = False 656 for f in filters: 657 if not f.is_filter_out: 658 has_filter = True 659 if f.search(line): 660 return False if f.is_filter_out else True 661 # If we only used filter-out, keep the line, otherwise discard it since no 662 # filter matched. 663 return False if has_filter else True 664 665 666def do_filter(body, filters): 667 return ( 668 body 669 if not filters 670 else "\n".join( 671 filter(lambda line: apply_filters(line, filters), body.splitlines()) 672 ) 673 ) 674 675 676def scrub_body(body): 677 # Scrub runs of whitespace out of the assembly, but leave the leading 678 # whitespace in place. 679 body = SCRUB_PRESERVE_LEADING_WHITESPACE_RE.sub(lambda m: m.group(2) + " ", body) 680 681 # Expand the tabs used for indentation. 682 body = str.expandtabs(body, 2) 683 # Strip trailing whitespace. 684 body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r"", body) 685 return body 686 687 688def do_scrub(body, scrubber, scrubber_args, extra): 689 if scrubber_args: 690 local_args = copy.deepcopy(scrubber_args) 691 local_args[0].extra_scrub = extra 692 return scrubber(body, *local_args) 693 return scrubber(body, *scrubber_args) 694 695 696# Build up a dictionary of all the function bodies. 697class function_body(object): 698 def __init__( 699 self, 700 string, 701 extra, 702 funcdef_attrs_and_ret, 703 args_and_sig, 704 attrs, 705 func_name_separator, 706 ginfo, 707 ): 708 self.scrub = string 709 self.extrascrub = extra 710 self.funcdef_attrs_and_ret = funcdef_attrs_and_ret 711 self.args_and_sig = args_and_sig 712 self.attrs = attrs 713 self.func_name_separator = func_name_separator 714 self._ginfo = ginfo 715 716 def is_same_except_arg_names( 717 self, extrascrub, funcdef_attrs_and_ret, args_and_sig, attrs 718 ): 719 arg_names = set() 720 721 def drop_arg_names(match): 722 nameless_value = self._ginfo.get_nameless_value_from_match(match) 723 if nameless_value.check_key == "%": 724 arg_names.add(self._ginfo.get_name_from_match(match)) 725 substitute = "" 726 else: 727 substitute = match.group(2) 728 return match.group(1) + substitute + match.group(match.lastindex) 729 730 def repl_arg_names(match): 731 nameless_value = self._ginfo.get_nameless_value_from_match(match) 732 if ( 733 nameless_value.check_key == "%" 734 and self._ginfo.get_name_from_match(match) in arg_names 735 ): 736 return match.group(1) + match.group(match.lastindex) 737 return match.group(1) + match.group(2) + match.group(match.lastindex) 738 739 if self.funcdef_attrs_and_ret != funcdef_attrs_and_ret: 740 return False 741 if self.attrs != attrs: 742 return False 743 744 regexp = self._ginfo.get_regexp() 745 ans0 = regexp.sub(drop_arg_names, self.args_and_sig) 746 ans1 = regexp.sub(drop_arg_names, args_and_sig) 747 if ans0 != ans1: 748 return False 749 if self._ginfo.is_asm(): 750 # Check without replacements, the replacements are not applied to the 751 # body for backend checks. 752 return self.extrascrub == extrascrub 753 754 es0 = regexp.sub(repl_arg_names, self.extrascrub) 755 es1 = regexp.sub(repl_arg_names, extrascrub) 756 es0 = SCRUB_IR_COMMENT_RE.sub(r"", es0) 757 es1 = SCRUB_IR_COMMENT_RE.sub(r"", es1) 758 return es0 == es1 759 760 def __str__(self): 761 return self.scrub 762 763 764class FunctionTestBuilder: 765 def __init__(self, run_list, flags, scrubber_args, path, ginfo): 766 self._verbose = flags.verbose 767 self._record_args = flags.function_signature 768 self._check_attributes = flags.check_attributes 769 # Strip double-quotes if input was read by UTC_ARGS 770 self._filters = ( 771 list( 772 map( 773 lambda f: Filter( 774 re.compile(f.pattern().strip('"'), f.flags()), f.is_filter_out 775 ), 776 flags.filters, 777 ) 778 ) 779 if flags.filters 780 else [] 781 ) 782 self._scrubber_args = scrubber_args 783 self._path = path 784 self._ginfo = ginfo 785 # Strip double-quotes if input was read by UTC_ARGS 786 self._replace_value_regex = list( 787 map(lambda x: x.strip('"'), flags.replace_value_regex) 788 ) 789 self._func_dict = {} 790 self._func_order = {} 791 self._global_var_dict = {} 792 self._processed_prefixes = set() 793 for tuple in run_list: 794 for prefix in tuple[0]: 795 self._func_dict.update({prefix: dict()}) 796 self._func_order.update({prefix: []}) 797 self._global_var_dict.update({prefix: dict()}) 798 799 def finish_and_get_func_dict(self): 800 for prefix in self.get_failed_prefixes(): 801 warn( 802 "Prefix %s had conflicting output from different RUN lines for all functions in test %s" 803 % ( 804 prefix, 805 self._path, 806 ) 807 ) 808 return self._func_dict 809 810 def func_order(self): 811 return self._func_order 812 813 def global_var_dict(self): 814 return self._global_var_dict 815 816 def is_filtered(self): 817 return bool(self._filters) 818 819 def process_run_line(self, function_re, scrubber, raw_tool_output, prefixes): 820 build_global_values_dictionary( 821 self._global_var_dict, raw_tool_output, prefixes, self._ginfo 822 ) 823 for m in function_re.finditer(raw_tool_output): 824 if not m: 825 continue 826 func = m.group("func") 827 body = m.group("body") 828 # func_name_separator is the string that is placed right after function name at the 829 # beginning of assembly function definition. In most assemblies, that is just a 830 # colon: `foo:`. But, for example, in nvptx it is a brace: `foo(`. If is_backend is 831 # False, just assume that separator is an empty string. 832 if self._ginfo.is_asm(): 833 # Use ':' as default separator. 834 func_name_separator = ( 835 m.group("func_name_separator") 836 if "func_name_separator" in m.groupdict() 837 else ":" 838 ) 839 else: 840 func_name_separator = "" 841 attrs = m.group("attrs") if self._check_attributes else "" 842 funcdef_attrs_and_ret = ( 843 m.group("funcdef_attrs_and_ret") if self._record_args else "" 844 ) 845 # Determine if we print arguments, the opening brace, or nothing after the 846 # function name 847 if self._record_args and "args_and_sig" in m.groupdict(): 848 args_and_sig = scrub_body(m.group("args_and_sig").strip()) 849 elif "args_and_sig" in m.groupdict(): 850 args_and_sig = "(" 851 else: 852 args_and_sig = "" 853 filtered_body = do_filter(body, self._filters) 854 scrubbed_body = do_scrub( 855 filtered_body, scrubber, self._scrubber_args, extra=False 856 ) 857 scrubbed_extra = do_scrub( 858 filtered_body, scrubber, self._scrubber_args, extra=True 859 ) 860 if "analysis" in m.groupdict(): 861 analysis = m.group("analysis") 862 if analysis not in SUPPORTED_ANALYSES: 863 warn("Unsupported analysis mode: %r!" % (analysis,)) 864 if func.startswith("stress"): 865 # We only use the last line of the function body for stress tests. 866 scrubbed_body = "\n".join(scrubbed_body.splitlines()[-1:]) 867 if self._verbose: 868 print("Processing function: " + func, file=sys.stderr) 869 for l in scrubbed_body.splitlines(): 870 print(" " + l, file=sys.stderr) 871 for prefix in prefixes: 872 # Replace function names matching the regex. 873 for regex in self._replace_value_regex: 874 # Pattern that matches capture groups in the regex in leftmost order. 875 group_regex = re.compile(r"\(.*?\)") 876 # Replace function name with regex. 877 match = re.match(regex, func) 878 if match: 879 func_repl = regex 880 # Replace any capture groups with their matched strings. 881 for g in match.groups(): 882 func_repl = group_regex.sub( 883 re.escape(g), func_repl, count=1 884 ) 885 func = re.sub(func_repl, "{{" + func_repl + "}}", func) 886 887 # Replace all calls to regex matching functions. 888 matches = re.finditer(regex, scrubbed_body) 889 for match in matches: 890 func_repl = regex 891 # Replace any capture groups with their matched strings. 892 for g in match.groups(): 893 func_repl = group_regex.sub( 894 re.escape(g), func_repl, count=1 895 ) 896 # Substitute function call names that match the regex with the same 897 # capture groups set. 898 scrubbed_body = re.sub( 899 func_repl, "{{" + func_repl + "}}", scrubbed_body 900 ) 901 902 if func in self._func_dict[prefix]: 903 if self._func_dict[prefix][func] is not None and ( 904 str(self._func_dict[prefix][func]) != scrubbed_body 905 or self._func_dict[prefix][func].args_and_sig != args_and_sig 906 or self._func_dict[prefix][func].attrs != attrs 907 or self._func_dict[prefix][func].funcdef_attrs_and_ret 908 != funcdef_attrs_and_ret 909 ): 910 if self._func_dict[prefix][func].is_same_except_arg_names( 911 scrubbed_extra, 912 funcdef_attrs_and_ret, 913 args_and_sig, 914 attrs, 915 ): 916 self._func_dict[prefix][func].scrub = scrubbed_extra 917 self._func_dict[prefix][func].args_and_sig = args_and_sig 918 else: 919 # This means a previous RUN line produced a body for this function 920 # that is different from the one produced by this current RUN line, 921 # so the body can't be common across RUN lines. We use None to 922 # indicate that. 923 self._func_dict[prefix][func] = None 924 else: 925 if prefix not in self._processed_prefixes: 926 self._func_dict[prefix][func] = function_body( 927 scrubbed_body, 928 scrubbed_extra, 929 funcdef_attrs_and_ret, 930 args_and_sig, 931 attrs, 932 func_name_separator, 933 self._ginfo, 934 ) 935 self._func_order[prefix].append(func) 936 else: 937 # An earlier RUN line used this check prefixes but didn't produce 938 # a body for this function. This happens in Clang tests that use 939 # preprocesser directives to exclude individual functions from some 940 # RUN lines. 941 self._func_dict[prefix][func] = None 942 943 def processed_prefixes(self, prefixes): 944 """ 945 Mark a set of prefixes as having had at least one applicable RUN line fully 946 processed. This is used to filter out function bodies that don't have 947 outputs for all RUN lines. 948 """ 949 self._processed_prefixes.update(prefixes) 950 951 def get_failed_prefixes(self): 952 # This returns the list of those prefixes that failed to match any function, 953 # because there were conflicting bodies produced by different RUN lines, in 954 # all instances of the prefix. 955 for prefix in self._func_dict: 956 if self._func_dict[prefix] and ( 957 not [ 958 fct 959 for fct in self._func_dict[prefix] 960 if self._func_dict[prefix][fct] is not None 961 ] 962 ): 963 yield prefix 964 965 966##### Generator of LLVM IR CHECK lines 967 968SCRUB_IR_COMMENT_RE = re.compile(r"\s*;.*") 969 970# TODO: We should also derive check lines for global, debug, loop declarations, etc.. 971 972 973class NamelessValue: 974 """ 975 A NamelessValue object represents a type of value in the IR whose "name" we 976 generalize in the generated check lines; where the "name" could be an actual 977 name (as in e.g. `@some_global` or `%x`) or just a number (as in e.g. `%12` 978 or `!4`). 979 """ 980 981 def __init__( 982 self, 983 check_prefix, 984 check_key, 985 ir_prefix, 986 ir_regexp, 987 global_ir_rhs_regexp, 988 *, 989 is_before_functions=False, 990 is_number=False, 991 replace_number_with_counter=False, 992 match_literally=False, 993 interlaced_with_previous=False, 994 ir_suffix=r"", 995 ): 996 self.check_prefix = check_prefix 997 self.check_key = check_key 998 self.ir_prefix = ir_prefix 999 self.ir_regexp = ir_regexp 1000 self.ir_suffix = ir_suffix 1001 self.global_ir_rhs_regexp = global_ir_rhs_regexp 1002 self.is_before_functions = is_before_functions 1003 self.is_number = is_number 1004 # Some variable numbers (e.g. MCINST1234) will change based on unrelated 1005 # modifications to LLVM, replace those with an incrementing counter. 1006 self.replace_number_with_counter = replace_number_with_counter 1007 self.match_literally = match_literally 1008 self.interlaced_with_previous = interlaced_with_previous 1009 self.variable_mapping = {} 1010 1011 # Return true if this kind of IR value is defined "locally" to functions, 1012 # which we assume is only the case precisely for LLVM IR local values. 1013 def is_local_def_ir_value(self): 1014 return self.check_key == "%" 1015 1016 # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals 1017 def get_ir_regex(self): 1018 # for backwards compatibility we check locals with '.*' 1019 if self.is_local_def_ir_value(): 1020 return ".*" 1021 return self.ir_regexp 1022 1023 # Create a FileCheck variable name based on an IR name. 1024 def get_value_name(self, var: str, check_prefix: str): 1025 var = var.replace("!", "") 1026 if self.replace_number_with_counter: 1027 assert var 1028 replacement = self.variable_mapping.get(var, None) 1029 if replacement is None: 1030 # Replace variable with an incrementing counter 1031 replacement = str(len(self.variable_mapping) + 1) 1032 self.variable_mapping[var] = replacement 1033 var = replacement 1034 # This is a nameless value, prepend check_prefix. 1035 if var.isdigit(): 1036 var = check_prefix + var 1037 else: 1038 # This is a named value that clashes with the check_prefix, prepend with 1039 # _prefix_filecheck_ir_name, if it has been defined. 1040 if ( 1041 may_clash_with_default_check_prefix_name(check_prefix, var) 1042 and _prefix_filecheck_ir_name 1043 ): 1044 var = _prefix_filecheck_ir_name + var 1045 var = var.replace(".", "_") 1046 var = var.replace("-", "_") 1047 return var.upper() 1048 1049 def get_affixes_from_match(self, match): 1050 prefix = re.match(self.ir_prefix, match.group(2)).group(0) 1051 suffix = re.search(self.ir_suffix + "$", match.group(2)).group(0) 1052 return prefix, suffix 1053 1054 1055class GeneralizerInfo: 1056 """ 1057 A GeneralizerInfo object holds information about how check lines should be generalized 1058 (e.g., variable names replaced by FileCheck meta variables) as well as per-test-file 1059 state (e.g. information about IR global variables). 1060 """ 1061 1062 MODE_IR = 0 1063 MODE_ASM = 1 1064 MODE_ANALYZE = 2 1065 1066 def __init__( 1067 self, 1068 version, 1069 mode, 1070 nameless_values: List[NamelessValue], 1071 regexp_prefix, 1072 regexp_suffix, 1073 ): 1074 self._version = version 1075 self._mode = mode 1076 self._nameless_values = nameless_values 1077 1078 self._regexp_prefix = regexp_prefix 1079 self._regexp_suffix = regexp_suffix 1080 1081 self._regexp, _ = self._build_regexp(False, False) 1082 ( 1083 self._unstable_globals_regexp, 1084 self._unstable_globals_values, 1085 ) = self._build_regexp(True, True) 1086 1087 def _build_regexp(self, globals_only, unstable_only): 1088 matches = [] 1089 values = [] 1090 for nameless_value in self._nameless_values: 1091 is_global = nameless_value.global_ir_rhs_regexp is not None 1092 if globals_only and not is_global: 1093 continue 1094 if unstable_only and nameless_value.match_literally: 1095 continue 1096 1097 match = f"(?:{nameless_value.ir_prefix}({nameless_value.ir_regexp}){nameless_value.ir_suffix})" 1098 if self.is_ir() and not globals_only and is_global: 1099 match = "^" + match 1100 matches.append(match) 1101 values.append(nameless_value) 1102 1103 regexp_string = r"|".join(matches) 1104 1105 return ( 1106 re.compile( 1107 self._regexp_prefix + r"(" + regexp_string + r")" + self._regexp_suffix 1108 ), 1109 values, 1110 ) 1111 1112 def get_version(self): 1113 return self._version 1114 1115 def is_ir(self): 1116 return self._mode == GeneralizerInfo.MODE_IR 1117 1118 def is_asm(self): 1119 return self._mode == GeneralizerInfo.MODE_ASM 1120 1121 def is_analyze(self): 1122 return self._mode == GeneralizerInfo.MODE_ANALYZE 1123 1124 def get_nameless_values(self): 1125 return self._nameless_values 1126 1127 def get_regexp(self): 1128 return self._regexp 1129 1130 def get_unstable_globals_regexp(self): 1131 return self._unstable_globals_regexp 1132 1133 # The entire match is group 0, the prefix has one group (=1), the entire 1134 # IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start. 1135 FIRST_NAMELESS_GROUP_IN_MATCH = 3 1136 1137 def get_match_info(self, match): 1138 """ 1139 Returns (name, nameless_value) for the given match object 1140 """ 1141 if match.re == self._regexp: 1142 values = self._nameless_values 1143 else: 1144 match.re == self._unstable_globals_regexp 1145 values = self._unstable_globals_values 1146 for i in range(len(values)): 1147 g = match.group(i + GeneralizerInfo.FIRST_NAMELESS_GROUP_IN_MATCH) 1148 if g is not None: 1149 return g, values[i] 1150 error("Unable to identify the kind of IR value from the match!") 1151 return None, None 1152 1153 # See get_idx_from_match 1154 def get_name_from_match(self, match): 1155 return self.get_match_info(match)[0] 1156 1157 def get_nameless_value_from_match(self, match) -> NamelessValue: 1158 return self.get_match_info(match)[1] 1159 1160 1161def make_ir_generalizer(version): 1162 values = [] 1163 1164 if version >= 5: 1165 values += [ 1166 NamelessValue(r"BB", "%", r"label %", r"[\w$.-]+?", None), 1167 NamelessValue(r"BB", "%", r"^", r"[\w$.-]+?", None, ir_suffix=r":"), 1168 ] 1169 1170 values += [ 1171 # check_prefix check_key ir_prefix ir_regexp global_ir_rhs_regexp 1172 NamelessValue(r"TMP", "%", r"%", r"[\w$.-]+?", None), 1173 NamelessValue(r"ATTR", "#", r"#", r"[0-9]+", None), 1174 NamelessValue(r"ATTR", "#", r"attributes #", r"[0-9]+", r"{[^}]*}"), 1175 NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", None), 1176 NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", r".+", is_before_functions=True), 1177 NamelessValue( 1178 r"GLOBNAMED", 1179 "@", 1180 r"@", 1181 r"[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*", 1182 r".+", 1183 is_before_functions=True, 1184 match_literally=True, 1185 interlaced_with_previous=True, 1186 ), 1187 NamelessValue(r"DBG", "!", r"!dbg ", r"![0-9]+", None), 1188 NamelessValue(r"DIASSIGNID", "!", r"!DIAssignID ", r"![0-9]+", None), 1189 NamelessValue(r"PROF", "!", r"!prof ", r"![0-9]+", None), 1190 NamelessValue(r"TBAA", "!", r"!tbaa ", r"![0-9]+", None), 1191 NamelessValue(r"TBAA_STRUCT", "!", r"!tbaa.struct ", r"![0-9]+", None), 1192 NamelessValue(r"RNG", "!", r"!range ", r"![0-9]+", None), 1193 NamelessValue(r"LOOP", "!", r"!llvm.loop ", r"![0-9]+", None), 1194 NamelessValue(r"META", "!", r"", r"![0-9]+", r"(?:distinct |)!.*"), 1195 NamelessValue(r"ACC_GRP", "!", r"!llvm.access.group ", r"![0-9]+", None), 1196 NamelessValue(r"META", "!", r"![a-z.]+ ", r"![0-9]+", None), 1197 NamelessValue(r"META", "!", r"[, (]", r"![0-9]+", None), 1198 ] 1199 1200 prefix = r"(\s*)" 1201 suffix = r"([,\s\(\)\}]|\Z)" 1202 1203 # values = [ 1204 # nameless_value 1205 # for nameless_value in IR_NAMELESS_VALUES 1206 # if not (globals_only and nameless_value.global_ir_rhs_regexp is None) and 1207 # not (unstable_ids_only and nameless_value.match_literally) 1208 # ] 1209 1210 return GeneralizerInfo(version, GeneralizerInfo.MODE_IR, values, prefix, suffix) 1211 1212 1213def make_asm_generalizer(version): 1214 values = [ 1215 NamelessValue( 1216 r"MCINST", 1217 "Inst#", 1218 "<MCInst #", 1219 r"\d+", 1220 r".+", 1221 is_number=True, 1222 replace_number_with_counter=True, 1223 ), 1224 NamelessValue( 1225 r"MCREG", 1226 "Reg:", 1227 "<MCOperand Reg:", 1228 r"\d+", 1229 r".+", 1230 is_number=True, 1231 replace_number_with_counter=True, 1232 ), 1233 ] 1234 1235 prefix = r"((?:#|//)\s*)" 1236 suffix = r"([>\s]|\Z)" 1237 1238 return GeneralizerInfo(version, GeneralizerInfo.MODE_ASM, values, prefix, suffix) 1239 1240 1241def make_analyze_generalizer(version): 1242 values = [ 1243 NamelessValue( 1244 r"GRP", 1245 "#", 1246 r"", 1247 r"0x[0-9a-f]+", 1248 None, 1249 replace_number_with_counter=True, 1250 ), 1251 ] 1252 1253 prefix = r"(\s*)" 1254 suffix = r"(\)?:)" 1255 1256 return GeneralizerInfo( 1257 version, GeneralizerInfo.MODE_ANALYZE, values, prefix, suffix 1258 ) 1259 1260 1261# Return true if var clashes with the scripted FileCheck check_prefix. 1262def may_clash_with_default_check_prefix_name(check_prefix, var): 1263 return check_prefix and re.match( 1264 r"^" + check_prefix + r"[0-9]+?$", var, re.IGNORECASE 1265 ) 1266 1267 1268def find_diff_matching(lhs: List[str], rhs: List[str]) -> List[tuple]: 1269 """ 1270 Find a large ordered matching between strings in lhs and rhs. 1271 1272 Think of this as finding the *unchanged* lines in a diff, where the entries 1273 of lhs and rhs are lines of the files being diffed. 1274 1275 Returns a list of matched (lhs_idx, rhs_idx) pairs. 1276 """ 1277 1278 if not lhs or not rhs: 1279 return [] 1280 1281 # Collect matches in reverse order. 1282 matches = [] 1283 1284 # First, collect a set of candidate matching edges. We limit this to a 1285 # constant multiple of the input size to avoid quadratic runtime. 1286 patterns = collections.defaultdict(lambda: ([], [])) 1287 1288 for idx in range(len(lhs)): 1289 patterns[lhs[idx]][0].append(idx) 1290 for idx in range(len(rhs)): 1291 patterns[rhs[idx]][1].append(idx) 1292 1293 multiple_patterns = [] 1294 1295 candidates = [] 1296 for pattern in patterns.values(): 1297 if not pattern[0] or not pattern[1]: 1298 continue 1299 1300 if len(pattern[0]) == len(pattern[1]) == 1: 1301 candidates.append((pattern[0][0], pattern[1][0])) 1302 else: 1303 multiple_patterns.append(pattern) 1304 1305 multiple_patterns.sort(key=lambda pattern: len(pattern[0]) * len(pattern[1])) 1306 1307 for pattern in multiple_patterns: 1308 if len(candidates) + len(pattern[0]) * len(pattern[1]) > 2 * ( 1309 len(lhs) + len(rhs) 1310 ): 1311 break 1312 for lhs_idx in pattern[0]: 1313 for rhs_idx in pattern[1]: 1314 candidates.append((lhs_idx, rhs_idx)) 1315 1316 if not candidates: 1317 # The LHS and RHS either share nothing in common, or lines are just too 1318 # identical. In that case, let's give up and not match anything. 1319 return [] 1320 1321 # Compute a maximal crossing-free matching via an algorithm that is 1322 # inspired by a mixture of dynamic programming and line-sweeping in 1323 # discrete geometry. 1324 # 1325 # I would be surprised if this algorithm didn't exist somewhere in the 1326 # literature, but I found it without consciously recalling any 1327 # references, so you'll have to make do with the explanation below. 1328 # Sorry. 1329 # 1330 # The underlying graph is bipartite: 1331 # - nodes on the LHS represent lines in the original check 1332 # - nodes on the RHS represent lines in the new (updated) check 1333 # 1334 # Nodes are implicitly sorted by the corresponding line number. 1335 # Edges (unique_matches) are sorted by the line number on the LHS. 1336 # 1337 # Here's the geometric intuition for the algorithm. 1338 # 1339 # * Plot the edges as points in the plane, with the original line 1340 # number on the X axis and the updated line number on the Y axis. 1341 # * The goal is to find a longest "chain" of points where each point 1342 # is strictly above and to the right of the previous point. 1343 # * The algorithm proceeds by sweeping a vertical line from left to 1344 # right. 1345 # * The algorithm maintains a table where `table[N]` answers the 1346 # question "What is currently the 'best' way to build a chain of N+1 1347 # points to the left of the vertical line". Here, 'best' means 1348 # that the last point of the chain is a as low as possible (minimal 1349 # Y coordinate). 1350 # * `table[N]` is `(y, point_idx)` where `point_idx` is the index of 1351 # the last point in the chain and `y` is its Y coordinate 1352 # * A key invariant is that the Y values in the table are 1353 # monotonically increasing 1354 # * Thanks to these properties, the table can be used to answer the 1355 # question "What is the longest chain that can be built to the left 1356 # of the vertical line using only points below a certain Y value", 1357 # using a binary search over the table. 1358 # * The algorithm also builds a backlink structure in which every point 1359 # links back to the previous point on a best (longest) chain ending 1360 # at that point 1361 # 1362 # The core loop of the algorithm sweeps the line and updates the table 1363 # and backlink structure for every point that we cross during the sweep. 1364 # Therefore, the algorithm is trivially O(M log M) in the number of 1365 # points. 1366 candidates.sort(key=lambda candidate: (candidate[0], -candidate[1])) 1367 1368 backlinks = [] 1369 table_rhs_idx = [] 1370 table_candidate_idx = [] 1371 for _, rhs_idx in candidates: 1372 candidate_idx = len(backlinks) 1373 ti = bisect.bisect_left(table_rhs_idx, rhs_idx) 1374 1375 # Update the table to record a best chain ending in the current point. 1376 # There always is one, and if any of the previously visited points had 1377 # a higher Y coordinate, then there is always a previously recorded best 1378 # chain that can be improved upon by using the current point. 1379 # 1380 # There is only one case where there is some ambiguity. If the 1381 # pre-existing entry table[ti] has the same Y coordinate / rhs_idx as 1382 # the current point (this can only happen if the same line appeared 1383 # multiple times on the LHS), then we could choose to keep the 1384 # previously recorded best chain instead. That would bias the algorithm 1385 # differently but should have no systematic impact on the quality of the 1386 # result. 1387 if ti < len(table_rhs_idx): 1388 table_rhs_idx[ti] = rhs_idx 1389 table_candidate_idx[ti] = candidate_idx 1390 else: 1391 table_rhs_idx.append(rhs_idx) 1392 table_candidate_idx.append(candidate_idx) 1393 if ti > 0: 1394 backlinks.append(table_candidate_idx[ti - 1]) 1395 else: 1396 backlinks.append(None) 1397 1398 # Commit to names in the matching by walking the backlinks. Recursively 1399 # attempt to fill in more matches in-between. 1400 match_idx = table_candidate_idx[-1] 1401 while match_idx is not None: 1402 current = candidates[match_idx] 1403 matches.append(current) 1404 match_idx = backlinks[match_idx] 1405 1406 matches.reverse() 1407 return matches 1408 1409 1410VARIABLE_TAG = "[[@@]]" 1411METAVAR_RE = re.compile(r"\[\[([A-Z0-9_]+)(?::[^]]+)?\]\]") 1412NUMERIC_SUFFIX_RE = re.compile(r"[0-9]*$") 1413 1414 1415class TestVar: 1416 def __init__(self, nameless_value: NamelessValue, prefix: str, suffix: str): 1417 self._nameless_value = nameless_value 1418 1419 self._prefix = prefix 1420 self._suffix = suffix 1421 1422 def seen(self, nameless_value: NamelessValue, prefix: str, suffix: str): 1423 if prefix != self._prefix: 1424 self._prefix = "" 1425 if suffix != self._suffix: 1426 self._suffix = "" 1427 1428 def get_variable_name(self, text): 1429 return self._nameless_value.get_value_name( 1430 text, self._nameless_value.check_prefix 1431 ) 1432 1433 def get_def(self, name, prefix, suffix): 1434 if self._nameless_value.is_number: 1435 return f"{prefix}[[#{name}:]]{suffix}" 1436 if self._prefix: 1437 assert self._prefix == prefix 1438 prefix = "" 1439 if self._suffix: 1440 assert self._suffix == suffix 1441 suffix = "" 1442 return f"{prefix}[[{name}:{self._prefix}{self._nameless_value.get_ir_regex()}{self._suffix}]]{suffix}" 1443 1444 def get_use(self, name, prefix, suffix): 1445 if self._nameless_value.is_number: 1446 return f"{prefix}[[#{name}]]{suffix}" 1447 if self._prefix: 1448 assert self._prefix == prefix 1449 prefix = "" 1450 if self._suffix: 1451 assert self._suffix == suffix 1452 suffix = "" 1453 return f"{prefix}[[{name}]]{suffix}" 1454 1455 1456class CheckValueInfo: 1457 def __init__( 1458 self, 1459 key, 1460 text, 1461 name: str, 1462 prefix: str, 1463 suffix: str, 1464 ): 1465 # Key for the value, e.g. '%' 1466 self.key = key 1467 1468 # Text to be matched by the FileCheck variable (without any prefix or suffix) 1469 self.text = text 1470 1471 # Name of the FileCheck variable 1472 self.name = name 1473 1474 # Prefix and suffix that were captured by the NamelessValue regular expression 1475 self.prefix = prefix 1476 self.suffix = suffix 1477 1478 1479# Represent a check line in a way that allows us to compare check lines while 1480# ignoring some or all of the FileCheck variable names. 1481class CheckLineInfo: 1482 def __init__(self, line, values): 1483 # Line with all FileCheck variable name occurrences replaced by VARIABLE_TAG 1484 self.line: str = line 1485 1486 # Information on each FileCheck variable name occurrences in the line 1487 self.values: List[CheckValueInfo] = values 1488 1489 def __repr__(self): 1490 return f"CheckLineInfo(line={self.line}, self.values={self.values})" 1491 1492 1493def remap_metavar_names( 1494 old_line_infos: List[CheckLineInfo], 1495 new_line_infos: List[CheckLineInfo], 1496 committed_names: Set[str], 1497) -> Mapping[str, str]: 1498 """ 1499 Map all FileCheck variable names that appear in new_line_infos to new 1500 FileCheck variable names in an attempt to reduce the diff from old_line_infos 1501 to new_line_infos. 1502 1503 This is done by: 1504 * Matching old check lines and new check lines using a diffing algorithm 1505 applied after replacing names with wildcards. 1506 * Committing to variable names such that the matched lines become equal 1507 (without wildcards) if possible 1508 * This is done recursively to handle cases where many lines are equal 1509 after wildcard replacement 1510 """ 1511 # Initialize uncommitted identity mappings 1512 new_mapping = {} 1513 for line in new_line_infos: 1514 for value in line.values: 1515 new_mapping[value.name] = value.name 1516 1517 # Recursively commit to the identity mapping or find a better one 1518 def recurse(old_begin, old_end, new_begin, new_end): 1519 if old_begin == old_end or new_begin == new_end: 1520 return 1521 1522 # Find a matching of lines where uncommitted names are replaced 1523 # with a placeholder. 1524 def diffify_line(line, mapper): 1525 values = [] 1526 for value in line.values: 1527 mapped = mapper(value.name) 1528 values.append(mapped if mapped in committed_names else "?") 1529 return line.line.strip() + " @@@ " + " @ ".join(values) 1530 1531 lhs_lines = [ 1532 diffify_line(line, lambda x: x) 1533 for line in old_line_infos[old_begin:old_end] 1534 ] 1535 rhs_lines = [ 1536 diffify_line(line, lambda x: new_mapping[x]) 1537 for line in new_line_infos[new_begin:new_end] 1538 ] 1539 1540 candidate_matches = find_diff_matching(lhs_lines, rhs_lines) 1541 1542 candidate_matches = [ 1543 (old_begin + lhs_idx, new_begin + rhs_idx) 1544 for lhs_idx, rhs_idx in candidate_matches 1545 ] 1546 1547 # Candidate matches may conflict if they require conflicting mappings of 1548 # names. We want to determine a large set of compatible candidates, 1549 # because that leads to a small diff. 1550 # 1551 # We think of the candidates as vertices in a conflict graph. The 1552 # conflict graph has edges between incompatible candidates. We want to 1553 # find a large independent set in this graph. 1554 # 1555 # Greedily selecting candidates and removing incompatible ones has the 1556 # disadvantage that making few bad decisions early on can have huge 1557 # consequences. 1558 # 1559 # Instead, we implicitly compute multiple independent sets by greedily 1560 # assigning a *coloring* to the conflict graph. Then, we select the 1561 # largest color class (which is the largest independent set we found), 1562 # commit to all candidates in it, and recurse. 1563 # 1564 # Note that we don't actually materialize the conflict graph. Instead, 1565 # each color class tracks the information needed to decide implicitly 1566 # whether a vertex conflicts (has an edge to) any of the vertices added 1567 # to the color class so far. 1568 class Color: 1569 def __init__(self): 1570 # (lhs_idx, rhs_idx) of matches in this color 1571 self.matches = [] 1572 1573 # rhs_name -> lhs_name mappings required by this color 1574 self.mapping = {} 1575 1576 # lhs_names committed for this color 1577 self.committed = set() 1578 1579 colors = [] 1580 1581 for lhs_idx, rhs_idx in candidate_matches: 1582 lhs_line = old_line_infos[lhs_idx] 1583 rhs_line = new_line_infos[rhs_idx] 1584 1585 # We scan through the uncommitted names in the candidate line and 1586 # filter out the color classes to which the candidate could be 1587 # assigned. 1588 # 1589 # Simultaneously, we prepare a new color class in case the candidate 1590 # conflicts with all colors that have been established so far. 1591 compatible_colors = colors[:] 1592 new_color = Color() 1593 new_color.matches.append((lhs_idx, rhs_idx)) 1594 1595 for lhs_value, rhs_value in zip(lhs_line.values, rhs_line.values): 1596 if new_mapping[rhs_value.name] in committed_names: 1597 # The new value has already been committed. If it was mapped 1598 # to the same name as the original value, we can consider 1599 # committing other values from this line. Otherwise, we 1600 # should ignore this line. 1601 if new_mapping[rhs_value.name] == lhs_value.name: 1602 continue 1603 else: 1604 break 1605 1606 if rhs_value.name in new_color.mapping: 1607 # Same, but for a possible commit happening on the same line 1608 if new_color.mapping[rhs_value.name] == lhs_value.name: 1609 continue 1610 else: 1611 break 1612 1613 if ( 1614 lhs_value.name in committed_names 1615 or lhs_value.name in new_color.committed 1616 ): 1617 # We can't map this value because the name we would map it 1618 # to has already been committed for something else. Give up 1619 # on this line. 1620 break 1621 1622 new_color.mapping[rhs_value.name] = lhs_value.name 1623 new_color.committed.add(lhs_value.name) 1624 1625 color_idx = 0 1626 while color_idx < len(compatible_colors): 1627 color = compatible_colors[color_idx] 1628 compatible = True 1629 if rhs_value.name in color.mapping: 1630 compatible = color.mapping[rhs_value.name] == lhs_value.name 1631 else: 1632 compatible = lhs_value.name not in color.committed 1633 if compatible: 1634 color_idx += 1 1635 else: 1636 del compatible_colors[color_idx] 1637 else: 1638 # We never broke out of the loop, which means that at a minimum, 1639 # this line is viable standalone 1640 if compatible_colors: 1641 color = max(compatible_colors, key=lambda color: len(color.matches)) 1642 color.mapping.update(new_color.mapping) 1643 color.committed.update(new_color.committed) 1644 color.matches.append((lhs_idx, rhs_idx)) 1645 else: 1646 colors.append(new_color) 1647 1648 if colors: 1649 # Pick the largest color class. This gives us a large independent 1650 # (non-conflicting) set of candidate matches. Assign all names 1651 # required by the independent set and recurse. 1652 max_color = max(colors, key=lambda color: len(color.matches)) 1653 1654 for rhs_var, lhs_var in max_color.mapping.items(): 1655 new_mapping[rhs_var] = lhs_var 1656 committed_names.add(lhs_var) 1657 1658 if ( 1659 lhs_var != rhs_var 1660 and lhs_var in new_mapping 1661 and new_mapping[lhs_var] == lhs_var 1662 ): 1663 new_mapping[lhs_var] = "conflict_" + lhs_var 1664 1665 matches = ( 1666 [(old_begin - 1, new_begin - 1)] 1667 + max_color.matches 1668 + [(old_end, new_end)] 1669 ) 1670 1671 for (lhs_prev, rhs_prev), (lhs_next, rhs_next) in zip(matches, matches[1:]): 1672 recurse(lhs_prev + 1, lhs_next, rhs_prev + 1, rhs_next) 1673 1674 recurse(0, len(old_line_infos), 0, len(new_line_infos)) 1675 1676 # Commit to remaining names and resolve conflicts 1677 for new_name, mapped_name in new_mapping.items(): 1678 if mapped_name in committed_names: 1679 continue 1680 if not mapped_name.startswith("conflict_"): 1681 assert mapped_name == new_name 1682 committed_names.add(mapped_name) 1683 1684 for new_name, mapped_name in new_mapping.items(): 1685 if mapped_name in committed_names: 1686 continue 1687 assert mapped_name.startswith("conflict_") 1688 1689 m = NUMERIC_SUFFIX_RE.search(new_name) 1690 base_name = new_name[: m.start()] 1691 suffix = int(new_name[m.start() :]) if m.start() != m.end() else 1 1692 while True: 1693 candidate = f"{base_name}{suffix}" 1694 if candidate not in committed_names: 1695 new_mapping[new_name] = candidate 1696 committed_names.add(candidate) 1697 break 1698 suffix += 1 1699 1700 return new_mapping 1701 1702 1703def generalize_check_lines( 1704 lines, 1705 ginfo: GeneralizerInfo, 1706 vars_seen, 1707 global_vars_seen, 1708 preserve_names=False, 1709 original_check_lines=None, 1710 *, 1711 unstable_globals_only=False, 1712): 1713 if unstable_globals_only: 1714 regexp = ginfo.get_unstable_globals_regexp() 1715 else: 1716 regexp = ginfo.get_regexp() 1717 1718 multiple_braces_re = re.compile(r"({{+)|(}}+)") 1719 1720 def escape_braces(match_obj): 1721 return "{{" + re.escape(match_obj.group(0)) + "}}" 1722 1723 if ginfo.is_ir(): 1724 for i, line in enumerate(lines): 1725 # An IR variable named '%.' matches the FileCheck regex string. 1726 line = line.replace("%.", "%dot") 1727 for regex in _global_hex_value_regex: 1728 if re.match("^@" + regex + " = ", line): 1729 line = re.sub( 1730 r"\bi([0-9]+) ([0-9]+)", 1731 lambda m: "i" 1732 + m.group(1) 1733 + " [[#" 1734 + hex(int(m.group(2))) 1735 + "]]", 1736 line, 1737 ) 1738 break 1739 # Ignore any comments, since the check lines will too. 1740 scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line) 1741 lines[i] = scrubbed_line 1742 1743 if not preserve_names: 1744 committed_names = set( 1745 test_var.get_variable_name(name) 1746 for (name, _), test_var in vars_seen.items() 1747 ) 1748 defs = set() 1749 1750 # Collect information about new check lines, and generalize global reference 1751 new_line_infos = [] 1752 for line in lines: 1753 filtered_line = "" 1754 values = [] 1755 while True: 1756 m = regexp.search(line) 1757 if m is None: 1758 filtered_line += line 1759 break 1760 1761 name = ginfo.get_name_from_match(m) 1762 nameless_value = ginfo.get_nameless_value_from_match(m) 1763 prefix, suffix = nameless_value.get_affixes_from_match(m) 1764 if may_clash_with_default_check_prefix_name( 1765 nameless_value.check_prefix, name 1766 ): 1767 warn( 1768 "Change IR value name '%s' or use --prefix-filecheck-ir-name to prevent possible conflict" 1769 " with scripted FileCheck name." % (name,) 1770 ) 1771 1772 # Record the variable as seen and (for locals) accumulate 1773 # prefixes/suffixes 1774 is_local_def = nameless_value.is_local_def_ir_value() 1775 if is_local_def: 1776 vars_dict = vars_seen 1777 else: 1778 vars_dict = global_vars_seen 1779 1780 key = (name, nameless_value.check_key) 1781 1782 if is_local_def: 1783 test_prefix = prefix 1784 test_suffix = suffix 1785 else: 1786 test_prefix = "" 1787 test_suffix = "" 1788 1789 if key in vars_dict: 1790 vars_dict[key].seen(nameless_value, test_prefix, test_suffix) 1791 else: 1792 vars_dict[key] = TestVar(nameless_value, test_prefix, test_suffix) 1793 defs.add(key) 1794 1795 var = vars_dict[key].get_variable_name(name) 1796 1797 # Replace with a [[@@]] tag, but be sure to keep the spaces and commas. 1798 filtered_line += ( 1799 line[: m.start()] + m.group(1) + VARIABLE_TAG + m.group(m.lastindex) 1800 ) 1801 line = line[m.end() :] 1802 1803 values.append( 1804 CheckValueInfo( 1805 key=nameless_value.check_key, 1806 text=name, 1807 name=var, 1808 prefix=prefix, 1809 suffix=suffix, 1810 ) 1811 ) 1812 1813 new_line_infos.append(CheckLineInfo(filtered_line, values)) 1814 1815 committed_names.update( 1816 test_var.get_variable_name(name) 1817 for (name, _), test_var in global_vars_seen.items() 1818 ) 1819 1820 # Collect information about original check lines, if any. 1821 orig_line_infos = [] 1822 for line in original_check_lines or []: 1823 filtered_line = "" 1824 values = [] 1825 while True: 1826 m = METAVAR_RE.search(line) 1827 if m is None: 1828 filtered_line += line 1829 break 1830 1831 # Replace with a [[@@]] tag, but be sure to keep the spaces and commas. 1832 filtered_line += line[: m.start()] + VARIABLE_TAG 1833 line = line[m.end() :] 1834 values.append( 1835 CheckValueInfo( 1836 key=None, 1837 text=None, 1838 name=m.group(1), 1839 prefix="", 1840 suffix="", 1841 ) 1842 ) 1843 orig_line_infos.append(CheckLineInfo(filtered_line, values)) 1844 1845 # Compute the variable name mapping 1846 mapping = remap_metavar_names(orig_line_infos, new_line_infos, committed_names) 1847 1848 # Apply the variable name mapping 1849 for i, line_info in enumerate(new_line_infos): 1850 line_template = line_info.line 1851 line = "" 1852 1853 for value in line_info.values: 1854 idx = line_template.find(VARIABLE_TAG) 1855 line += line_template[:idx] 1856 line_template = line_template[idx + len(VARIABLE_TAG) :] 1857 1858 key = (value.text, value.key) 1859 if value.key == "%": 1860 vars_dict = vars_seen 1861 else: 1862 vars_dict = global_vars_seen 1863 1864 if key in defs: 1865 line += vars_dict[key].get_def( 1866 mapping[value.name], value.prefix, value.suffix 1867 ) 1868 defs.remove(key) 1869 else: 1870 line += vars_dict[key].get_use( 1871 mapping[value.name], value.prefix, value.suffix 1872 ) 1873 1874 line += line_template 1875 1876 lines[i] = line 1877 1878 if ginfo.is_analyze(): 1879 for i, _ in enumerate(lines): 1880 # Escape multiple {{ or }} as {{}} denotes a FileCheck regex. 1881 scrubbed_line = multiple_braces_re.sub(escape_braces, lines[i]) 1882 lines[i] = scrubbed_line 1883 1884 return lines 1885 1886 1887def add_checks( 1888 output_lines, 1889 comment_marker, 1890 prefix_list, 1891 func_dict, 1892 func_name, 1893 check_label_format, 1894 ginfo, 1895 global_vars_seen_dict, 1896 is_filtered, 1897 preserve_names=False, 1898 original_check_lines: Mapping[str, List[str]] = {}, 1899): 1900 # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well. 1901 prefix_exclusions = set() 1902 printed_prefixes = [] 1903 for p in prefix_list: 1904 checkprefixes = p[0] 1905 # If not all checkprefixes of this run line produced the function we cannot check for it as it does not 1906 # exist for this run line. A subset of the check prefixes might know about the function but only because 1907 # other run lines created it. 1908 if any( 1909 map( 1910 lambda checkprefix: func_name not in func_dict[checkprefix], 1911 checkprefixes, 1912 ) 1913 ): 1914 prefix_exclusions |= set(checkprefixes) 1915 continue 1916 1917 # prefix_exclusions is constructed, we can now emit the output 1918 for p in prefix_list: 1919 global_vars_seen = {} 1920 checkprefixes = p[0] 1921 for checkprefix in checkprefixes: 1922 if checkprefix in global_vars_seen_dict: 1923 global_vars_seen.update(global_vars_seen_dict[checkprefix]) 1924 else: 1925 global_vars_seen_dict[checkprefix] = {} 1926 if checkprefix in printed_prefixes: 1927 break 1928 1929 # Check if the prefix is excluded. 1930 if checkprefix in prefix_exclusions: 1931 continue 1932 1933 # If we do not have output for this prefix we skip it. 1934 if not func_dict[checkprefix][func_name]: 1935 continue 1936 1937 # Add some space between different check prefixes, but not after the last 1938 # check line (before the test code). 1939 if ginfo.is_asm(): 1940 if len(printed_prefixes) != 0: 1941 output_lines.append(comment_marker) 1942 1943 if checkprefix not in global_vars_seen_dict: 1944 global_vars_seen_dict[checkprefix] = {} 1945 1946 global_vars_seen_before = [key for key in global_vars_seen.keys()] 1947 1948 vars_seen = {} 1949 printed_prefixes.append(checkprefix) 1950 attrs = str(func_dict[checkprefix][func_name].attrs) 1951 attrs = "" if attrs == "None" else attrs 1952 if ginfo.get_version() > 1: 1953 funcdef_attrs_and_ret = func_dict[checkprefix][ 1954 func_name 1955 ].funcdef_attrs_and_ret 1956 else: 1957 funcdef_attrs_and_ret = "" 1958 1959 if attrs: 1960 output_lines.append( 1961 "%s %s: Function Attrs: %s" % (comment_marker, checkprefix, attrs) 1962 ) 1963 args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig) 1964 if args_and_sig: 1965 args_and_sig = generalize_check_lines( 1966 [args_and_sig], 1967 ginfo, 1968 vars_seen, 1969 global_vars_seen, 1970 preserve_names, 1971 original_check_lines=[], 1972 )[0] 1973 func_name_separator = func_dict[checkprefix][func_name].func_name_separator 1974 if "[[" in args_and_sig: 1975 # Captures in label lines are not supported, thus split into a -LABEL 1976 # and a separate -SAME line that contains the arguments with captures. 1977 args_and_sig_prefix = "" 1978 if ginfo.get_version() >= 3 and args_and_sig.startswith("("): 1979 # Ensure the "(" separating function name and arguments is in the 1980 # label line. This is required in case of function names that are 1981 # prefixes of each other. Otherwise, the label line for "foo" might 1982 # incorrectly match on "foo.specialized". 1983 args_and_sig_prefix = args_and_sig[0] 1984 args_and_sig = args_and_sig[1:] 1985 1986 # Removing args_and_sig from the label match line requires 1987 # func_name_separator to be empty. Otherwise, the match will not work. 1988 assert func_name_separator == "" 1989 output_lines.append( 1990 check_label_format 1991 % ( 1992 checkprefix, 1993 funcdef_attrs_and_ret, 1994 func_name, 1995 args_and_sig_prefix, 1996 func_name_separator, 1997 ) 1998 ) 1999 output_lines.append( 2000 "%s %s-SAME: %s" % (comment_marker, checkprefix, args_and_sig) 2001 ) 2002 else: 2003 output_lines.append( 2004 check_label_format 2005 % ( 2006 checkprefix, 2007 funcdef_attrs_and_ret, 2008 func_name, 2009 args_and_sig, 2010 func_name_separator, 2011 ) 2012 ) 2013 func_body = str(func_dict[checkprefix][func_name]).splitlines() 2014 if not func_body: 2015 # We have filtered everything. 2016 continue 2017 2018 # For ASM output, just emit the check lines. 2019 if ginfo.is_asm(): 2020 body_start = 1 2021 if is_filtered: 2022 # For filtered output we don't add "-NEXT" so don't add extra spaces 2023 # before the first line. 2024 body_start = 0 2025 else: 2026 output_lines.append( 2027 "%s %s: %s" % (comment_marker, checkprefix, func_body[0]) 2028 ) 2029 func_lines = generalize_check_lines( 2030 func_body[body_start:], ginfo, vars_seen, global_vars_seen 2031 ) 2032 for func_line in func_lines: 2033 if func_line.strip() == "": 2034 output_lines.append( 2035 "%s %s-EMPTY:" % (comment_marker, checkprefix) 2036 ) 2037 else: 2038 check_suffix = "-NEXT" if not is_filtered else "" 2039 output_lines.append( 2040 "%s %s%s: %s" 2041 % (comment_marker, checkprefix, check_suffix, func_line) 2042 ) 2043 # Remember new global variables we have not seen before 2044 for key in global_vars_seen: 2045 if key not in global_vars_seen_before: 2046 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 2047 break 2048 # For analyze output, generalize the output, and emit CHECK-EMPTY lines as well. 2049 elif ginfo.is_analyze(): 2050 func_body = generalize_check_lines( 2051 func_body, ginfo, vars_seen, global_vars_seen 2052 ) 2053 for func_line in func_body: 2054 if func_line.strip() == "": 2055 output_lines.append( 2056 "{} {}-EMPTY:".format(comment_marker, checkprefix) 2057 ) 2058 else: 2059 check_suffix = "-NEXT" if not is_filtered else "" 2060 output_lines.append( 2061 "{} {}{}: {}".format( 2062 comment_marker, checkprefix, check_suffix, func_line 2063 ) 2064 ) 2065 2066 # Add space between different check prefixes and also before the first 2067 # line of code in the test function. 2068 output_lines.append(comment_marker) 2069 2070 # Remember new global variables we have not seen before 2071 for key in global_vars_seen: 2072 if key not in global_vars_seen_before: 2073 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 2074 break 2075 # For IR output, change all defs to FileCheck variables, so we're immune 2076 # to variable naming fashions. 2077 else: 2078 func_body = generalize_check_lines( 2079 func_body, 2080 ginfo, 2081 vars_seen, 2082 global_vars_seen, 2083 preserve_names, 2084 original_check_lines=original_check_lines.get(checkprefix), 2085 ) 2086 2087 # This could be selectively enabled with an optional invocation argument. 2088 # Disabled for now: better to check everything. Be safe rather than sorry. 2089 2090 # Handle the first line of the function body as a special case because 2091 # it's often just noise (a useless asm comment or entry label). 2092 # if func_body[0].startswith("#") or func_body[0].startswith("entry:"): 2093 # is_blank_line = True 2094 # else: 2095 # output_lines.append('%s %s: %s' % (comment_marker, checkprefix, func_body[0])) 2096 # is_blank_line = False 2097 2098 is_blank_line = False 2099 2100 for func_line in func_body: 2101 if func_line.strip() == "": 2102 is_blank_line = True 2103 continue 2104 # Do not waste time checking IR comments. 2105 func_line = SCRUB_IR_COMMENT_RE.sub(r"", func_line) 2106 2107 # Skip blank lines instead of checking them. 2108 if is_blank_line: 2109 output_lines.append( 2110 "{} {}: {}".format( 2111 comment_marker, checkprefix, func_line 2112 ) 2113 ) 2114 else: 2115 check_suffix = "-NEXT" if not is_filtered else "" 2116 output_lines.append( 2117 "{} {}{}: {}".format( 2118 comment_marker, checkprefix, check_suffix, func_line 2119 ) 2120 ) 2121 is_blank_line = False 2122 2123 # Add space between different check prefixes and also before the first 2124 # line of code in the test function. 2125 output_lines.append(comment_marker) 2126 2127 # Remember new global variables we have not seen before 2128 for key in global_vars_seen: 2129 if key not in global_vars_seen_before: 2130 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 2131 break 2132 return printed_prefixes 2133 2134 2135def add_ir_checks( 2136 output_lines, 2137 comment_marker, 2138 prefix_list, 2139 func_dict, 2140 func_name, 2141 preserve_names, 2142 function_sig, 2143 ginfo: GeneralizerInfo, 2144 global_vars_seen_dict, 2145 is_filtered, 2146 original_check_lines={}, 2147): 2148 assert ginfo.is_ir() 2149 # Label format is based on IR string. 2150 if function_sig and ginfo.get_version() > 1: 2151 function_def_regex = "define %s" 2152 elif function_sig: 2153 function_def_regex = "define {{[^@]+}}%s" 2154 else: 2155 function_def_regex = "%s" 2156 check_label_format = "{} %s-LABEL: {}@%s%s%s".format( 2157 comment_marker, function_def_regex 2158 ) 2159 return add_checks( 2160 output_lines, 2161 comment_marker, 2162 prefix_list, 2163 func_dict, 2164 func_name, 2165 check_label_format, 2166 ginfo, 2167 global_vars_seen_dict, 2168 is_filtered, 2169 preserve_names, 2170 original_check_lines=original_check_lines, 2171 ) 2172 2173 2174def add_analyze_checks( 2175 output_lines, 2176 comment_marker, 2177 prefix_list, 2178 func_dict, 2179 func_name, 2180 ginfo: GeneralizerInfo, 2181 is_filtered, 2182): 2183 assert ginfo.is_analyze() 2184 check_label_format = "{} %s-LABEL: '%s%s%s%s'".format(comment_marker) 2185 global_vars_seen_dict = {} 2186 return add_checks( 2187 output_lines, 2188 comment_marker, 2189 prefix_list, 2190 func_dict, 2191 func_name, 2192 check_label_format, 2193 ginfo, 2194 global_vars_seen_dict, 2195 is_filtered, 2196 ) 2197 2198 2199def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes, ginfo): 2200 for nameless_value in ginfo.get_nameless_values(): 2201 if nameless_value.global_ir_rhs_regexp is None: 2202 continue 2203 2204 lhs_re_str = nameless_value.ir_prefix + nameless_value.ir_regexp 2205 rhs_re_str = nameless_value.global_ir_rhs_regexp 2206 2207 global_ir_value_re_str = r"^" + lhs_re_str + r"\s=\s" + rhs_re_str + r"$" 2208 global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M)) 2209 lines = [] 2210 for m in global_ir_value_re.finditer(raw_tool_output): 2211 # Attach the substring's start index so that CHECK lines 2212 # can be sorted properly even if they are matched by different nameless values. 2213 # This is relevant for GLOB and GLOBNAMED since they may appear interlaced. 2214 lines.append((m.start(), m.group(0))) 2215 2216 for prefix in prefixes: 2217 if glob_val_dict[prefix] is None: 2218 continue 2219 if nameless_value.check_prefix in glob_val_dict[prefix]: 2220 if lines == glob_val_dict[prefix][nameless_value.check_prefix]: 2221 continue 2222 if prefix == prefixes[-1]: 2223 warn("Found conflicting asm under the same prefix: %r!" % (prefix,)) 2224 else: 2225 glob_val_dict[prefix][nameless_value.check_prefix] = None 2226 continue 2227 glob_val_dict[prefix][nameless_value.check_prefix] = lines 2228 2229 2230def filter_globals_according_to_preference( 2231 global_val_lines_w_index, global_vars_seen, nameless_value, global_check_setting 2232): 2233 if global_check_setting == "none": 2234 return [] 2235 if global_check_setting == "all": 2236 return global_val_lines_w_index 2237 assert global_check_setting == "smart" 2238 2239 if nameless_value.check_key == "#": 2240 # attribute sets are usually better checked by --check-attributes 2241 return [] 2242 2243 def extract(line, nv): 2244 p = ( 2245 "^" 2246 + nv.ir_prefix 2247 + "(" 2248 + nv.ir_regexp 2249 + ") = (" 2250 + nv.global_ir_rhs_regexp 2251 + ")" 2252 ) 2253 match = re.match(p, line) 2254 return (match.group(1), re.findall(nv.ir_regexp, match.group(2))) 2255 2256 transitively_visible = set() 2257 contains_refs_to = {} 2258 2259 def add(var): 2260 nonlocal transitively_visible 2261 nonlocal contains_refs_to 2262 if var in transitively_visible: 2263 return 2264 transitively_visible.add(var) 2265 if not var in contains_refs_to: 2266 return 2267 for x in contains_refs_to[var]: 2268 add(x) 2269 2270 for i, line in global_val_lines_w_index: 2271 (var, refs) = extract(line, nameless_value) 2272 contains_refs_to[var] = refs 2273 for var, check_key in global_vars_seen: 2274 if check_key != nameless_value.check_key: 2275 continue 2276 add(var) 2277 return [ 2278 (i, line) 2279 for i, line in global_val_lines_w_index 2280 if extract(line, nameless_value)[0] in transitively_visible 2281 ] 2282 2283 2284METADATA_FILTERS = [ 2285 ( 2286 r"(?<=\")(.+ )?(\w+ version )[\d.]+(?:[^\" ]*)(?: \([^)]+\))?", 2287 r"{{.*}}\2{{.*}}", 2288 ), # preface with glob also, to capture optional CLANG_VENDOR 2289 (r'(!DIFile\(filename: ".+", directory: )".+"', r"\1{{.*}}"), 2290] 2291METADATA_FILTERS_RE = [(re.compile(f), r) for (f, r) in METADATA_FILTERS] 2292 2293 2294def filter_unstable_metadata(line): 2295 for f, replacement in METADATA_FILTERS_RE: 2296 line = f.sub(replacement, line) 2297 return line 2298 2299 2300def flush_current_checks(output_lines, new_lines_w_index, comment_marker): 2301 if not new_lines_w_index: 2302 return 2303 output_lines.append(comment_marker + SEPARATOR) 2304 new_lines_w_index.sort() 2305 for _, line in new_lines_w_index: 2306 output_lines.append(line) 2307 new_lines_w_index.clear() 2308 2309 2310def add_global_checks( 2311 glob_val_dict, 2312 comment_marker, 2313 prefix_list, 2314 output_lines, 2315 ginfo: GeneralizerInfo, 2316 global_vars_seen_dict, 2317 preserve_names, 2318 is_before_functions, 2319 global_check_setting, 2320): 2321 printed_prefixes = set() 2322 output_lines_loc = {} # Allows GLOB and GLOBNAMED to be sorted correctly 2323 for nameless_value in ginfo.get_nameless_values(): 2324 if nameless_value.global_ir_rhs_regexp is None: 2325 continue 2326 if nameless_value.is_before_functions != is_before_functions: 2327 continue 2328 for p in prefix_list: 2329 global_vars_seen = {} 2330 checkprefixes = p[0] 2331 if checkprefixes is None: 2332 continue 2333 for checkprefix in checkprefixes: 2334 if checkprefix in global_vars_seen_dict: 2335 global_vars_seen.update(global_vars_seen_dict[checkprefix]) 2336 else: 2337 global_vars_seen_dict[checkprefix] = {} 2338 if (checkprefix, nameless_value.check_prefix) in printed_prefixes: 2339 break 2340 if not glob_val_dict[checkprefix]: 2341 continue 2342 if nameless_value.check_prefix not in glob_val_dict[checkprefix]: 2343 continue 2344 if not glob_val_dict[checkprefix][nameless_value.check_prefix]: 2345 continue 2346 2347 check_lines = [] 2348 global_vars_seen_before = [key for key in global_vars_seen.keys()] 2349 lines_w_index = glob_val_dict[checkprefix][nameless_value.check_prefix] 2350 lines_w_index = filter_globals_according_to_preference( 2351 lines_w_index, 2352 global_vars_seen_before, 2353 nameless_value, 2354 global_check_setting, 2355 ) 2356 for i, line in lines_w_index: 2357 if _global_value_regex: 2358 matched = False 2359 for regex in _global_value_regex: 2360 if re.match("^@" + regex + " = ", line) or re.match( 2361 "^!" + regex + " = ", line 2362 ): 2363 matched = True 2364 break 2365 if not matched: 2366 continue 2367 [new_line] = generalize_check_lines( 2368 [line], 2369 ginfo, 2370 {}, 2371 global_vars_seen, 2372 preserve_names, 2373 unstable_globals_only=True, 2374 ) 2375 new_line = filter_unstable_metadata(new_line) 2376 check_line = "%s %s: %s" % (comment_marker, checkprefix, new_line) 2377 check_lines.append((i, check_line)) 2378 if not check_lines: 2379 continue 2380 2381 if not checkprefix in output_lines_loc: 2382 output_lines_loc[checkprefix] = [] 2383 if not nameless_value.interlaced_with_previous: 2384 flush_current_checks( 2385 output_lines, output_lines_loc[checkprefix], comment_marker 2386 ) 2387 for check_line in check_lines: 2388 output_lines_loc[checkprefix].append(check_line) 2389 2390 printed_prefixes.add((checkprefix, nameless_value.check_prefix)) 2391 2392 # Remembe new global variables we have not seen before 2393 for key in global_vars_seen: 2394 if key not in global_vars_seen_before: 2395 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 2396 break 2397 2398 if printed_prefixes: 2399 for p in prefix_list: 2400 if p[0] is None: 2401 continue 2402 for checkprefix in p[0]: 2403 if checkprefix not in output_lines_loc: 2404 continue 2405 flush_current_checks( 2406 output_lines, output_lines_loc[checkprefix], comment_marker 2407 ) 2408 break 2409 output_lines.append(comment_marker + SEPARATOR) 2410 return printed_prefixes 2411 2412 2413def check_prefix(prefix): 2414 if not PREFIX_RE.match(prefix): 2415 hint = "" 2416 if "," in prefix: 2417 hint = " Did you mean '--check-prefixes=" + prefix + "'?" 2418 warn( 2419 ( 2420 "Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." 2421 + hint 2422 ) 2423 % (prefix) 2424 ) 2425 2426 2427def get_check_prefixes(filecheck_cmd): 2428 check_prefixes = [ 2429 item 2430 for m in CHECK_PREFIX_RE.finditer(filecheck_cmd) 2431 for item in m.group(1).split(",") 2432 ] 2433 if not check_prefixes: 2434 check_prefixes = ["CHECK"] 2435 return check_prefixes 2436 2437 2438def verify_filecheck_prefixes(fc_cmd): 2439 fc_cmd_parts = fc_cmd.split() 2440 for part in fc_cmd_parts: 2441 if "check-prefix=" in part: 2442 prefix = part.split("=", 1)[1] 2443 check_prefix(prefix) 2444 elif "check-prefixes=" in part: 2445 prefixes = part.split("=", 1)[1].split(",") 2446 for prefix in prefixes: 2447 check_prefix(prefix) 2448 if prefixes.count(prefix) > 1: 2449 warn( 2450 "Supplied prefix '%s' is not unique in the prefix list." 2451 % (prefix,) 2452 ) 2453 2454 2455def get_autogennote_suffix(parser, args): 2456 autogenerated_note_args = "" 2457 for action in parser._actions: 2458 if not hasattr(args, action.dest): 2459 continue # Ignore options such as --help that aren't included in args 2460 # Ignore parameters such as paths to the binary or the list of tests 2461 if action.dest in ( 2462 "tests", 2463 "update_only", 2464 "tool_binary", 2465 "opt_binary", 2466 "llc_binary", 2467 "clang", 2468 "opt", 2469 "llvm_bin", 2470 "verbose", 2471 "force_update", 2472 "reset_variable_names", 2473 "llvm_mc_binary", 2474 ): 2475 continue 2476 value = getattr(args, action.dest) 2477 if action.dest == "check_globals": 2478 default_value = "none" if args.version < 4 else "smart" 2479 if value == default_value: 2480 continue 2481 autogenerated_note_args += action.option_strings[0] + " " 2482 if args.version < 4 and value == "all": 2483 continue 2484 autogenerated_note_args += "%s " % value 2485 continue 2486 if action.const is not None: # action stores a constant (usually True/False) 2487 # Skip actions with different constant values (this happens with boolean 2488 # --foo/--no-foo options) 2489 if value != action.const: 2490 continue 2491 if parser.get_default(action.dest) == value: 2492 continue # Don't add default values 2493 if action.dest == "function_signature" and args.version >= 2: 2494 continue # Enabled by default in version 2 2495 if action.dest == "filters": 2496 # Create a separate option for each filter element. The value is a list 2497 # of Filter objects. 2498 for elem in value: 2499 opt_name = "filter-out" if elem.is_filter_out else "filter" 2500 opt_value = elem.pattern() 2501 new_arg = '--%s "%s" ' % (opt_name, opt_value.strip('"')) 2502 if new_arg not in autogenerated_note_args: 2503 autogenerated_note_args += new_arg 2504 else: 2505 autogenerated_note_args += action.option_strings[0] + " " 2506 if action.const is None: # action takes a parameter 2507 if action.nargs == "+": 2508 value = " ".join(map(lambda v: '"' + v.strip('"') + '"', value)) 2509 autogenerated_note_args += "%s " % value 2510 if autogenerated_note_args: 2511 autogenerated_note_args = " %s %s" % ( 2512 UTC_ARGS_KEY, 2513 autogenerated_note_args[:-1], 2514 ) 2515 return autogenerated_note_args 2516 2517 2518def check_for_command(line, parser, args, argv, argparse_callback): 2519 cmd_m = UTC_ARGS_CMD.match(line) 2520 if cmd_m: 2521 for option in shlex.split(cmd_m.group("cmd").strip()): 2522 if option: 2523 argv.append(option) 2524 args = parse_args(parser, filter(lambda arg: arg not in args.tests, argv)) 2525 if argparse_callback is not None: 2526 argparse_callback(args) 2527 return args, argv 2528 2529 2530def find_arg_in_test(test_info, get_arg_to_check, arg_string, is_global): 2531 result = get_arg_to_check(test_info.args) 2532 if not result and is_global: 2533 # See if this has been specified via UTC_ARGS. This is a "global" option 2534 # that affects the entire generation of test checks. If it exists anywhere 2535 # in the test, apply it to everything. 2536 saw_line = False 2537 for line_info in test_info.ro_iterlines(): 2538 line = line_info.line 2539 if not line.startswith(";") and line.strip() != "": 2540 saw_line = True 2541 result = get_arg_to_check(line_info.args) 2542 if result: 2543 if warn and saw_line: 2544 # We saw the option after already reading some test input lines. 2545 # Warn about it. 2546 print( 2547 "WARNING: Found {} in line following test start: ".format( 2548 arg_string 2549 ) 2550 + line, 2551 file=sys.stderr, 2552 ) 2553 print( 2554 "WARNING: Consider moving {} to top of file".format(arg_string), 2555 file=sys.stderr, 2556 ) 2557 break 2558 return result 2559 2560 2561def dump_input_lines(output_lines, test_info, prefix_set, comment_string): 2562 for input_line_info in test_info.iterlines(output_lines): 2563 line = input_line_info.line 2564 args = input_line_info.args 2565 if line.strip() == comment_string: 2566 continue 2567 if line.strip() == comment_string + SEPARATOR: 2568 continue 2569 if line.lstrip().startswith(comment_string): 2570 m = CHECK_RE.match(line) 2571 if m and m.group(1) in prefix_set: 2572 continue 2573 output_lines.append(line.rstrip("\n")) 2574 2575 2576def add_checks_at_end( 2577 output_lines, prefix_list, func_order, comment_string, check_generator 2578): 2579 added = set() 2580 generated_prefixes = set() 2581 for prefix in prefix_list: 2582 prefixes = prefix[0] 2583 tool_args = prefix[1] 2584 for prefix in prefixes: 2585 for func in func_order[prefix]: 2586 # The func order can contain the same functions multiple times. 2587 # If we see one again we are done. 2588 if (func, prefix) in added: 2589 continue 2590 if added: 2591 output_lines.append(comment_string) 2592 2593 # The add_*_checks routines expect a run list whose items are 2594 # tuples that have a list of prefixes as their first element and 2595 # tool command args string as their second element. They output 2596 # checks for each prefix in the list of prefixes. By doing so, it 2597 # implicitly assumes that for each function every run line will 2598 # generate something for that function. That is not the case for 2599 # generated functions as some run lines might not generate them 2600 # (e.g. -fopenmp vs. no -fopenmp). 2601 # 2602 # Therefore, pass just the prefix we're interested in. This has 2603 # the effect of generating all of the checks for functions of a 2604 # single prefix before moving on to the next prefix. So checks 2605 # are ordered by prefix instead of by function as in "normal" 2606 # mode. 2607 for generated_prefix in check_generator( 2608 output_lines, [([prefix], tool_args)], func 2609 ): 2610 added.add((func, generated_prefix)) 2611 generated_prefixes.add(generated_prefix) 2612 return generated_prefixes 2613