1import argparse 2import bisect 3import collections 4import copy 5import glob 6import os 7import re 8import subprocess 9import sys 10import shlex 11 12from typing import List, Mapping, Set 13 14##### Common utilities for update_*test_checks.py 15 16 17_verbose = False 18_prefix_filecheck_ir_name = "" 19 20""" 21Version changelog: 22 231: Initial version, used by tests that don't specify --version explicitly. 242: --function-signature is now enabled by default and also checks return 25 type/attributes. 263: Opening parenthesis of function args is kept on the first LABEL line 27 in case arguments are split to a separate SAME line. 284: --check-globals now has a third option ('smart'). The others are now called 29 'none' and 'all'. 'smart' is the default. 305: Basic block labels are matched by FileCheck expressions 31""" 32DEFAULT_VERSION = 5 33 34 35SUPPORTED_ANALYSES = { 36 "Branch Probability Analysis", 37 "Cost Model Analysis", 38 "Loop Access Analysis", 39 "Scalar Evolution Analysis", 40} 41 42 43class Regex(object): 44 """Wrap a compiled regular expression object to allow deep copy of a regexp. 45 This is required for the deep copy done in do_scrub. 46 47 """ 48 49 def __init__(self, regex): 50 self.regex = regex 51 52 def __deepcopy__(self, memo): 53 result = copy.copy(self) 54 result.regex = self.regex 55 return result 56 57 def search(self, line): 58 return self.regex.search(line) 59 60 def sub(self, repl, line): 61 return self.regex.sub(repl, line) 62 63 def pattern(self): 64 return self.regex.pattern 65 66 def flags(self): 67 return self.regex.flags 68 69 70class Filter(Regex): 71 """Augment a Regex object with a flag indicating whether a match should be 72 added (!is_filter_out) or removed (is_filter_out) from the generated checks. 73 74 """ 75 76 def __init__(self, regex, is_filter_out): 77 super(Filter, self).__init__(regex) 78 self.is_filter_out = is_filter_out 79 80 def __deepcopy__(self, memo): 81 result = copy.deepcopy(super(Filter, self), memo) 82 result.is_filter_out = copy.deepcopy(self.is_filter_out, memo) 83 return result 84 85 86def parse_commandline_args(parser): 87 class RegexAction(argparse.Action): 88 """Add a regular expression option value to a list of regular expressions. 89 This compiles the expression, wraps it in a Regex and adds it to the option 90 value list.""" 91 92 def __init__(self, option_strings, dest, nargs=None, **kwargs): 93 if nargs is not None: 94 raise ValueError("nargs not allowed") 95 super(RegexAction, self).__init__(option_strings, dest, **kwargs) 96 97 def do_call(self, namespace, values, flags): 98 value_list = getattr(namespace, self.dest) 99 if value_list is None: 100 value_list = [] 101 102 try: 103 value_list.append(Regex(re.compile(values, flags))) 104 except re.error as error: 105 raise ValueError( 106 "{}: Invalid regular expression '{}' ({})".format( 107 option_string, error.pattern, error.msg 108 ) 109 ) 110 111 setattr(namespace, self.dest, value_list) 112 113 def __call__(self, parser, namespace, values, option_string=None): 114 self.do_call(namespace, values, 0) 115 116 class FilterAction(RegexAction): 117 """Add a filter to a list of filter option values.""" 118 119 def __init__(self, option_strings, dest, nargs=None, **kwargs): 120 super(FilterAction, self).__init__(option_strings, dest, nargs, **kwargs) 121 122 def __call__(self, parser, namespace, values, option_string=None): 123 super(FilterAction, self).__call__(parser, namespace, values, option_string) 124 125 value_list = getattr(namespace, self.dest) 126 127 is_filter_out = option_string == "--filter-out" 128 129 value_list[-1] = Filter(value_list[-1].regex, is_filter_out) 130 131 setattr(namespace, self.dest, value_list) 132 133 filter_group = parser.add_argument_group( 134 "filtering", 135 """Filters are applied to each output line according to the order given. The 136 first matching filter terminates filter processing for that current line.""", 137 ) 138 139 filter_group.add_argument( 140 "--filter", 141 action=FilterAction, 142 dest="filters", 143 metavar="REGEX", 144 help="Only include lines matching REGEX (may be specified multiple times)", 145 ) 146 filter_group.add_argument( 147 "--filter-out", 148 action=FilterAction, 149 dest="filters", 150 metavar="REGEX", 151 help="Exclude lines matching REGEX", 152 ) 153 154 parser.add_argument( 155 "--include-generated-funcs", 156 action="store_true", 157 help="Output checks for functions not in source", 158 ) 159 parser.add_argument( 160 "-v", "--verbose", action="store_true", help="Show verbose output" 161 ) 162 parser.add_argument( 163 "-u", 164 "--update-only", 165 action="store_true", 166 help="Only update test if it was already autogened", 167 ) 168 parser.add_argument( 169 "--force-update", 170 action="store_true", 171 help="Update test even if it was autogened by a different script", 172 ) 173 parser.add_argument( 174 "--enable", 175 action="store_true", 176 dest="enabled", 177 default=True, 178 help="Activate CHECK line generation from this point forward", 179 ) 180 parser.add_argument( 181 "--disable", 182 action="store_false", 183 dest="enabled", 184 help="Deactivate CHECK line generation from this point forward", 185 ) 186 parser.add_argument( 187 "--replace-value-regex", 188 nargs="+", 189 default=[], 190 help="List of regular expressions to replace matching value names", 191 ) 192 parser.add_argument( 193 "--prefix-filecheck-ir-name", 194 default="", 195 help="Add a prefix to FileCheck IR value names to avoid conflicts with scripted names", 196 ) 197 parser.add_argument( 198 "--global-value-regex", 199 nargs="+", 200 default=[], 201 help="List of regular expressions that a global value declaration must match to generate a check (has no effect if checking globals is not enabled)", 202 ) 203 parser.add_argument( 204 "--global-hex-value-regex", 205 nargs="+", 206 default=[], 207 help="List of regular expressions such that, for matching global value declarations, literal integer values should be encoded in hex in the associated FileCheck directives", 208 ) 209 # FIXME: in 3.9, we can use argparse.BooleanOptionalAction. At that point, 210 # we need to rename the flag to just -generate-body-for-unused-prefixes. 211 parser.add_argument( 212 "--no-generate-body-for-unused-prefixes", 213 action="store_false", 214 dest="gen_unused_prefix_body", 215 default=True, 216 help="Generate a function body that always matches for unused prefixes. This is useful when unused prefixes are desired, and it avoids needing to annotate each FileCheck as allowing them.", 217 ) 218 # This is the default when regenerating existing tests. The default when 219 # generating new tests is determined by DEFAULT_VERSION. 220 parser.add_argument( 221 "--version", type=int, default=1, help="The version of output format" 222 ) 223 args = parser.parse_args() 224 # TODO: This should not be handled differently from the other options 225 global _verbose, _global_value_regex, _global_hex_value_regex 226 _verbose = args.verbose 227 _global_value_regex = args.global_value_regex 228 _global_hex_value_regex = args.global_hex_value_regex 229 return args 230 231 232def parse_args(parser, argv): 233 args = parser.parse_args(argv) 234 if args.version >= 2: 235 args.function_signature = True 236 # TODO: This should not be handled differently from the other options 237 global _verbose, _global_value_regex, _global_hex_value_regex 238 _verbose = args.verbose 239 _global_value_regex = args.global_value_regex 240 _global_hex_value_regex = args.global_hex_value_regex 241 if "check_globals" in args and args.check_globals == "default": 242 args.check_globals = "none" if args.version < 4 else "smart" 243 return args 244 245 246class InputLineInfo(object): 247 def __init__(self, line, line_number, args, argv): 248 self.line = line 249 self.line_number = line_number 250 self.args = args 251 self.argv = argv 252 253 254class TestInfo(object): 255 def __init__( 256 self, 257 test, 258 parser, 259 script_name, 260 input_lines, 261 args, 262 argv, 263 comment_prefix, 264 argparse_callback, 265 ): 266 self.parser = parser 267 self.argparse_callback = argparse_callback 268 self.path = test 269 self.args = args 270 if args.prefix_filecheck_ir_name: 271 global _prefix_filecheck_ir_name 272 _prefix_filecheck_ir_name = args.prefix_filecheck_ir_name 273 self.argv = argv 274 self.input_lines = input_lines 275 self.run_lines = find_run_lines(test, self.input_lines) 276 self.comment_prefix = comment_prefix 277 if self.comment_prefix is None: 278 if self.path.endswith(".mir"): 279 self.comment_prefix = "#" 280 else: 281 self.comment_prefix = ";" 282 self.autogenerated_note_prefix = self.comment_prefix + " " + UTC_ADVERT 283 self.test_autogenerated_note = self.autogenerated_note_prefix + script_name 284 self.test_autogenerated_note += get_autogennote_suffix(parser, self.args) 285 self.test_unused_note = ( 286 self.comment_prefix + self.comment_prefix + " " + UNUSED_NOTE 287 ) 288 289 def ro_iterlines(self): 290 for line_num, input_line in enumerate(self.input_lines): 291 args, argv = check_for_command( 292 input_line, self.parser, self.args, self.argv, self.argparse_callback 293 ) 294 yield InputLineInfo(input_line, line_num, args, argv) 295 296 def iterlines(self, output_lines): 297 output_lines.append(self.test_autogenerated_note) 298 for line_info in self.ro_iterlines(): 299 input_line = line_info.line 300 # Discard any previous script advertising. 301 if input_line.startswith(self.autogenerated_note_prefix): 302 continue 303 self.args = line_info.args 304 self.argv = line_info.argv 305 if not self.args.enabled: 306 output_lines.append(input_line) 307 continue 308 yield line_info 309 310 def get_checks_for_unused_prefixes( 311 self, run_list, used_prefixes: List[str] 312 ) -> List[str]: 313 run_list = [element for element in run_list if element[0] is not None] 314 unused_prefixes = set( 315 [prefix for sublist in run_list for prefix in sublist[0]] 316 ).difference(set(used_prefixes)) 317 318 ret = [] 319 if not unused_prefixes: 320 return ret 321 ret.append(self.test_unused_note) 322 for unused in sorted(unused_prefixes): 323 ret.append( 324 "{comment} {prefix}: {match_everything}".format( 325 comment=self.comment_prefix, 326 prefix=unused, 327 match_everything=r"""{{.*}}""", 328 ) 329 ) 330 return ret 331 332 333def itertests( 334 test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None 335): 336 for pattern in test_patterns: 337 # On Windows we must expand the patterns ourselves. 338 tests_list = glob.glob(pattern) 339 if not tests_list: 340 warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,)) 341 continue 342 for test in tests_list: 343 with open(test) as f: 344 input_lines = [l.rstrip() for l in f] 345 first_line = input_lines[0] if input_lines else "" 346 if UTC_AVOID in first_line: 347 warn("Skipping test that must not be autogenerated: " + test) 348 continue 349 is_regenerate = UTC_ADVERT in first_line 350 351 # If we're generating a new test, set the default version to the latest. 352 argv = sys.argv[:] 353 if not is_regenerate: 354 argv.insert(1, "--version=" + str(DEFAULT_VERSION)) 355 356 args = parse_args(parser, argv[1:]) 357 if argparse_callback is not None: 358 argparse_callback(args) 359 if is_regenerate: 360 if script_name not in first_line and not args.force_update: 361 warn( 362 "Skipping test which wasn't autogenerated by " + script_name, 363 test, 364 ) 365 continue 366 args, argv = check_for_command( 367 first_line, parser, args, argv, argparse_callback 368 ) 369 elif args.update_only: 370 assert UTC_ADVERT not in first_line 371 warn("Skipping test which isn't autogenerated: " + test) 372 continue 373 final_input_lines = [] 374 for l in input_lines: 375 if UNUSED_NOTE in l: 376 break 377 final_input_lines.append(l) 378 yield TestInfo( 379 test, 380 parser, 381 script_name, 382 final_input_lines, 383 args, 384 argv, 385 comment_prefix, 386 argparse_callback, 387 ) 388 389 390def should_add_line_to_output( 391 input_line, 392 prefix_set, 393 *, 394 skip_global_checks=False, 395 skip_same_checks=False, 396 comment_marker=";", 397): 398 # Skip any blank comment lines in the IR. 399 if not skip_global_checks and input_line.strip() == comment_marker: 400 return False 401 # Skip a special double comment line we use as a separator. 402 if input_line.strip() == comment_marker + SEPARATOR: 403 return False 404 # Skip any blank lines in the IR. 405 # if input_line.strip() == '': 406 # return False 407 # And skip any CHECK lines. We're building our own. 408 m = CHECK_RE.match(input_line) 409 if m and m.group(1) in prefix_set: 410 if skip_same_checks and CHECK_SAME_RE.match(input_line): 411 # The previous CHECK line was removed, so don't leave this dangling 412 return False 413 if skip_global_checks: 414 # Skip checks only if they are of global value definitions 415 global_ir_value_re = re.compile(r"(\[\[|@)", flags=(re.M)) 416 is_global = global_ir_value_re.search(input_line) 417 return not is_global 418 return False 419 420 return True 421 422 423def collect_original_check_lines(ti: TestInfo, prefix_set: set): 424 """ 425 Collect pre-existing check lines into a dictionary `result` which is 426 returned. 427 428 result[func_name][prefix] is filled with a list of right-hand-sides of check 429 lines. 430 """ 431 result = collections.defaultdict(lambda: {}) 432 433 current_prefix = None 434 current_function = None 435 for input_line_info in ti.ro_iterlines(): 436 input_line = input_line_info.line 437 if input_line.lstrip().startswith(";"): 438 m = CHECK_RE.match(input_line) 439 if m is not None: 440 prefix = m.group(1) 441 check_kind = m.group(2) 442 line = input_line[m.end() :].strip() 443 444 if prefix != current_prefix: 445 current_function = None 446 current_prefix = None 447 448 if check_kind not in ["LABEL", "SAME"]: 449 if current_function is not None: 450 current_function.append(line) 451 continue 452 453 if check_kind == "SAME": 454 continue 455 456 if check_kind == "LABEL": 457 m = IR_FUNCTION_LABEL_RE.match(line) 458 if m is not None: 459 func_name = m.group(1) 460 if ( 461 ti.args.function is not None 462 and func_name != ti.args.function 463 ): 464 # When filtering on a specific function, skip all others. 465 continue 466 467 current_prefix = prefix 468 current_function = result[func_name][prefix] = [] 469 continue 470 471 current_function = None 472 473 return result 474 475 476# Perform lit-like substitutions 477def getSubstitutions(sourcepath): 478 sourcedir = os.path.dirname(sourcepath) 479 return [ 480 ("%s", sourcepath), 481 ("%S", sourcedir), 482 ("%p", sourcedir), 483 ("%{pathsep}", os.pathsep), 484 ] 485 486 487def applySubstitutions(s, substitutions): 488 for a, b in substitutions: 489 s = s.replace(a, b) 490 return s 491 492 493# Invoke the tool that is being tested. 494def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False): 495 with open(ir) as ir_file: 496 substitutions = getSubstitutions(ir) 497 498 # TODO Remove the str form which is used by update_test_checks.py and 499 # update_llc_test_checks.py 500 # The safer list form is used by update_cc_test_checks.py 501 if preprocess_cmd: 502 # Allow pre-processing the IR file (e.g. using sed): 503 assert isinstance( 504 preprocess_cmd, str 505 ) # TODO: use a list instead of using shell 506 preprocess_cmd = applySubstitutions(preprocess_cmd, substitutions).strip() 507 if verbose: 508 print( 509 "Pre-processing input file: ", 510 ir, 511 " with command '", 512 preprocess_cmd, 513 "'", 514 sep="", 515 file=sys.stderr, 516 ) 517 pp = subprocess.Popen( 518 preprocess_cmd, 519 shell=True, 520 stdin=subprocess.DEVNULL, 521 stdout=subprocess.PIPE, 522 ) 523 ir_file = pp.stdout 524 525 if isinstance(cmd_args, list): 526 args = [applySubstitutions(a, substitutions) for a in cmd_args] 527 stdout = subprocess.check_output([exe] + args, stdin=ir_file) 528 else: 529 stdout = subprocess.check_output( 530 exe + " " + applySubstitutions(cmd_args, substitutions), 531 shell=True, 532 stdin=ir_file, 533 ) 534 if sys.version_info[0] > 2: 535 # FYI, if you crashed here with a decode error, your run line probably 536 # results in bitcode or other binary format being written to the pipe. 537 # For an opt test, you probably want to add -S or -disable-output. 538 stdout = stdout.decode() 539 # Fix line endings to unix CR style. 540 return stdout.replace("\r\n", "\n") 541 542 543##### LLVM IR parser 544RUN_LINE_RE = re.compile(r"^\s*(?://|[;#])\s*RUN:\s*(.*)$") 545CHECK_PREFIX_RE = re.compile(r"--?check-prefix(?:es)?[= ](\S+)") 546PREFIX_RE = re.compile("^[a-zA-Z0-9_-]+$") 547CHECK_RE = re.compile( 548 r"^\s*(?://|[;#])\s*([^:]+?)(?:-(NEXT|NOT|DAG|LABEL|SAME|EMPTY))?:" 549) 550CHECK_SAME_RE = re.compile(r"^\s*(?://|[;#])\s*([^:]+?)(?:-SAME)?:") 551 552UTC_ARGS_KEY = "UTC_ARGS:" 553UTC_ARGS_CMD = re.compile(r".*" + UTC_ARGS_KEY + r"\s*(?P<cmd>.*)\s*$") 554UTC_ADVERT = "NOTE: Assertions have been autogenerated by " 555UTC_AVOID = "NOTE: Do not autogenerate" 556UNUSED_NOTE = "NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:" 557 558OPT_FUNCTION_RE = re.compile( 559 r"^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s():,]+?))?\s*define\s+(?P<funcdef_attrs_and_ret>[^@]*)@(?P<func>[\w.$-]+?)\s*" 560 r"(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P<body>.*?)^\}$", 561 flags=(re.M | re.S), 562) 563 564ANALYZE_FUNCTION_RE = re.compile( 565 r"^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.$-]+?)\':" 566 r"\s*\n(?P<body>.*)$", 567 flags=(re.X | re.S), 568) 569 570LOOP_PASS_DEBUG_RE = re.compile( 571 r"^\s*\'(?P<func>[\w.$-]+?)\'[^\n]*" r"\s*\n(?P<body>.*)$", flags=(re.X | re.S) 572) 573 574IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(') 575IR_FUNCTION_LABEL_RE = re.compile( 576 r'^\s*(?:define\s+(?:internal\s+)?[^@]*)?@"?([\w.$-]+)"?\s*\(' 577) 578TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$') 579TRIPLE_ARG_RE = re.compile(r"-m?triple[= ]([^ ]+)") 580MARCH_ARG_RE = re.compile(r"-march[= ]([^ ]+)") 581DEBUG_ONLY_ARG_RE = re.compile(r"-debug-only[= ]([^ ]+)") 582 583SCRUB_LEADING_WHITESPACE_RE = re.compile(r"^(\s+)") 584SCRUB_WHITESPACE_RE = re.compile(r"(?!^(| \w))[ \t]+", flags=re.M) 585SCRUB_PRESERVE_LEADING_WHITESPACE_RE = re.compile(r"((?!^)[ \t]*(\S))[ \t]+") 586SCRUB_TRAILING_WHITESPACE_RE = re.compile(r"[ \t]+$", flags=re.M) 587SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE 588SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile( 589 r"([ \t]|(#[0-9]+))+$", flags=re.M 590) 591SCRUB_KILL_COMMENT_RE = re.compile(r"^ *#+ +kill:.*\n") 592SCRUB_LOOP_COMMENT_RE = re.compile( 593 r"# =>This Inner Loop Header:.*|# in Loop:.*", flags=re.M 594) 595SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r"(?<=\S)+[ \t]*#$", flags=re.M) 596 597SEPARATOR = "." 598 599 600def error(msg, test_file=None): 601 if test_file: 602 msg = "{}: {}".format(msg, test_file) 603 print("ERROR: {}".format(msg), file=sys.stderr) 604 605 606def warn(msg, test_file=None): 607 if test_file: 608 msg = "{}: {}".format(msg, test_file) 609 print("WARNING: {}".format(msg), file=sys.stderr) 610 611 612def debug(*args, **kwargs): 613 # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs): 614 if "file" not in kwargs: 615 kwargs["file"] = sys.stderr 616 if _verbose: 617 print(*args, **kwargs) 618 619 620def find_run_lines(test, lines): 621 debug("Scanning for RUN lines in test file:", test) 622 raw_lines = [m.group(1) for m in [RUN_LINE_RE.match(l) for l in lines] if m] 623 run_lines = [raw_lines[0]] if len(raw_lines) > 0 else [] 624 for l in raw_lines[1:]: 625 if run_lines[-1].endswith("\\"): 626 run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l 627 else: 628 run_lines.append(l) 629 debug("Found {} RUN lines in {}:".format(len(run_lines), test)) 630 for l in run_lines: 631 debug(" RUN: {}".format(l)) 632 return run_lines 633 634 635def get_triple_from_march(march): 636 triples = { 637 "amdgcn": "amdgcn", 638 "r600": "r600", 639 "mips": "mips", 640 "nvptx64": "nvptx64", 641 "sparc": "sparc", 642 "hexagon": "hexagon", 643 "ve": "ve", 644 } 645 for prefix, triple in triples.items(): 646 if march.startswith(prefix): 647 return triple 648 print("Cannot find a triple. Assume 'x86'", file=sys.stderr) 649 return "x86" 650 651 652def apply_filters(line, filters): 653 has_filter = False 654 for f in filters: 655 if not f.is_filter_out: 656 has_filter = True 657 if f.search(line): 658 return False if f.is_filter_out else True 659 # If we only used filter-out, keep the line, otherwise discard it since no 660 # filter matched. 661 return False if has_filter else True 662 663 664def do_filter(body, filters): 665 return ( 666 body 667 if not filters 668 else "\n".join( 669 filter(lambda line: apply_filters(line, filters), body.splitlines()) 670 ) 671 ) 672 673 674def scrub_body(body): 675 # Scrub runs of whitespace out of the assembly, but leave the leading 676 # whitespace in place. 677 body = SCRUB_PRESERVE_LEADING_WHITESPACE_RE.sub(lambda m: m.group(2) + " ", body) 678 679 # Expand the tabs used for indentation. 680 body = str.expandtabs(body, 2) 681 # Strip trailing whitespace. 682 body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r"", body) 683 return body 684 685 686def do_scrub(body, scrubber, scrubber_args, extra): 687 if scrubber_args: 688 local_args = copy.deepcopy(scrubber_args) 689 local_args[0].extra_scrub = extra 690 return scrubber(body, *local_args) 691 return scrubber(body, *scrubber_args) 692 693 694# Build up a dictionary of all the function bodies. 695class function_body(object): 696 def __init__( 697 self, 698 string, 699 extra, 700 funcdef_attrs_and_ret, 701 args_and_sig, 702 attrs, 703 func_name_separator, 704 ginfo, 705 ): 706 self.scrub = string 707 self.extrascrub = extra 708 self.funcdef_attrs_and_ret = funcdef_attrs_and_ret 709 self.args_and_sig = args_and_sig 710 self.attrs = attrs 711 self.func_name_separator = func_name_separator 712 self._ginfo = ginfo 713 714 def is_same_except_arg_names( 715 self, extrascrub, funcdef_attrs_and_ret, args_and_sig, attrs 716 ): 717 arg_names = set() 718 719 def drop_arg_names(match): 720 nameless_value = self._ginfo.get_nameless_value_from_match(match) 721 if nameless_value.check_key == "%": 722 arg_names.add(self._ginfo.get_name_from_match(match)) 723 substitute = "" 724 else: 725 substitute = match.group(2) 726 return match.group(1) + substitute + match.group(match.lastindex) 727 728 def repl_arg_names(match): 729 nameless_value = self._ginfo.get_nameless_value_from_match(match) 730 if ( 731 nameless_value.check_key == "%" 732 and self._ginfo.get_name_from_match(match) in arg_names 733 ): 734 return match.group(1) + match.group(match.lastindex) 735 return match.group(1) + match.group(2) + match.group(match.lastindex) 736 737 if self.funcdef_attrs_and_ret != funcdef_attrs_and_ret: 738 return False 739 if self.attrs != attrs: 740 return False 741 742 regexp = self._ginfo.get_regexp() 743 ans0 = regexp.sub(drop_arg_names, self.args_and_sig) 744 ans1 = regexp.sub(drop_arg_names, args_and_sig) 745 if ans0 != ans1: 746 return False 747 if self._ginfo.is_asm(): 748 # Check without replacements, the replacements are not applied to the 749 # body for backend checks. 750 return self.extrascrub == extrascrub 751 752 es0 = regexp.sub(repl_arg_names, self.extrascrub) 753 es1 = regexp.sub(repl_arg_names, extrascrub) 754 es0 = SCRUB_IR_COMMENT_RE.sub(r"", es0) 755 es1 = SCRUB_IR_COMMENT_RE.sub(r"", es1) 756 return es0 == es1 757 758 def __str__(self): 759 return self.scrub 760 761 762class FunctionTestBuilder: 763 def __init__(self, run_list, flags, scrubber_args, path, ginfo): 764 self._verbose = flags.verbose 765 self._record_args = flags.function_signature 766 self._check_attributes = flags.check_attributes 767 # Strip double-quotes if input was read by UTC_ARGS 768 self._filters = ( 769 list( 770 map( 771 lambda f: Filter( 772 re.compile(f.pattern().strip('"'), f.flags()), f.is_filter_out 773 ), 774 flags.filters, 775 ) 776 ) 777 if flags.filters 778 else [] 779 ) 780 self._scrubber_args = scrubber_args 781 self._path = path 782 self._ginfo = ginfo 783 # Strip double-quotes if input was read by UTC_ARGS 784 self._replace_value_regex = list( 785 map(lambda x: x.strip('"'), flags.replace_value_regex) 786 ) 787 self._func_dict = {} 788 self._func_order = {} 789 self._global_var_dict = {} 790 self._processed_prefixes = set() 791 for tuple in run_list: 792 for prefix in tuple[0]: 793 self._func_dict.update({prefix: dict()}) 794 self._func_order.update({prefix: []}) 795 self._global_var_dict.update({prefix: dict()}) 796 797 def finish_and_get_func_dict(self): 798 for prefix in self.get_failed_prefixes(): 799 warn( 800 "Prefix %s had conflicting output from different RUN lines for all functions in test %s" 801 % ( 802 prefix, 803 self._path, 804 ) 805 ) 806 return self._func_dict 807 808 def func_order(self): 809 return self._func_order 810 811 def global_var_dict(self): 812 return self._global_var_dict 813 814 def is_filtered(self): 815 return bool(self._filters) 816 817 def process_run_line(self, function_re, scrubber, raw_tool_output, prefixes): 818 build_global_values_dictionary( 819 self._global_var_dict, raw_tool_output, prefixes, self._ginfo 820 ) 821 for m in function_re.finditer(raw_tool_output): 822 if not m: 823 continue 824 func = m.group("func") 825 body = m.group("body") 826 # func_name_separator is the string that is placed right after function name at the 827 # beginning of assembly function definition. In most assemblies, that is just a 828 # colon: `foo:`. But, for example, in nvptx it is a brace: `foo(`. If is_backend is 829 # False, just assume that separator is an empty string. 830 if self._ginfo.is_asm(): 831 # Use ':' as default separator. 832 func_name_separator = ( 833 m.group("func_name_separator") 834 if "func_name_separator" in m.groupdict() 835 else ":" 836 ) 837 else: 838 func_name_separator = "" 839 attrs = m.group("attrs") if self._check_attributes else "" 840 funcdef_attrs_and_ret = ( 841 m.group("funcdef_attrs_and_ret") if self._record_args else "" 842 ) 843 # Determine if we print arguments, the opening brace, or nothing after the 844 # function name 845 if self._record_args and "args_and_sig" in m.groupdict(): 846 args_and_sig = scrub_body(m.group("args_and_sig").strip()) 847 elif "args_and_sig" in m.groupdict(): 848 args_and_sig = "(" 849 else: 850 args_and_sig = "" 851 filtered_body = do_filter(body, self._filters) 852 scrubbed_body = do_scrub( 853 filtered_body, scrubber, self._scrubber_args, extra=False 854 ) 855 scrubbed_extra = do_scrub( 856 filtered_body, scrubber, self._scrubber_args, extra=True 857 ) 858 if "analysis" in m.groupdict(): 859 analysis = m.group("analysis") 860 if analysis not in SUPPORTED_ANALYSES: 861 warn("Unsupported analysis mode: %r!" % (analysis,)) 862 if func.startswith("stress"): 863 # We only use the last line of the function body for stress tests. 864 scrubbed_body = "\n".join(scrubbed_body.splitlines()[-1:]) 865 if self._verbose: 866 print("Processing function: " + func, file=sys.stderr) 867 for l in scrubbed_body.splitlines(): 868 print(" " + l, file=sys.stderr) 869 for prefix in prefixes: 870 # Replace function names matching the regex. 871 for regex in self._replace_value_regex: 872 # Pattern that matches capture groups in the regex in leftmost order. 873 group_regex = re.compile(r"\(.*?\)") 874 # Replace function name with regex. 875 match = re.match(regex, func) 876 if match: 877 func_repl = regex 878 # Replace any capture groups with their matched strings. 879 for g in match.groups(): 880 func_repl = group_regex.sub( 881 re.escape(g), func_repl, count=1 882 ) 883 func = re.sub(func_repl, "{{" + func_repl + "}}", func) 884 885 # Replace all calls to regex matching functions. 886 matches = re.finditer(regex, scrubbed_body) 887 for match in matches: 888 func_repl = regex 889 # Replace any capture groups with their matched strings. 890 for g in match.groups(): 891 func_repl = group_regex.sub( 892 re.escape(g), func_repl, count=1 893 ) 894 # Substitute function call names that match the regex with the same 895 # capture groups set. 896 scrubbed_body = re.sub( 897 func_repl, "{{" + func_repl + "}}", scrubbed_body 898 ) 899 900 if func in self._func_dict[prefix]: 901 if self._func_dict[prefix][func] is not None and ( 902 str(self._func_dict[prefix][func]) != scrubbed_body 903 or self._func_dict[prefix][func].args_and_sig != args_and_sig 904 or self._func_dict[prefix][func].attrs != attrs 905 or self._func_dict[prefix][func].funcdef_attrs_and_ret 906 != funcdef_attrs_and_ret 907 ): 908 if self._func_dict[prefix][func].is_same_except_arg_names( 909 scrubbed_extra, 910 funcdef_attrs_and_ret, 911 args_and_sig, 912 attrs, 913 ): 914 self._func_dict[prefix][func].scrub = scrubbed_extra 915 self._func_dict[prefix][func].args_and_sig = args_and_sig 916 else: 917 # This means a previous RUN line produced a body for this function 918 # that is different from the one produced by this current RUN line, 919 # so the body can't be common across RUN lines. We use None to 920 # indicate that. 921 self._func_dict[prefix][func] = None 922 else: 923 if prefix not in self._processed_prefixes: 924 self._func_dict[prefix][func] = function_body( 925 scrubbed_body, 926 scrubbed_extra, 927 funcdef_attrs_and_ret, 928 args_and_sig, 929 attrs, 930 func_name_separator, 931 self._ginfo, 932 ) 933 self._func_order[prefix].append(func) 934 else: 935 # An earlier RUN line used this check prefixes but didn't produce 936 # a body for this function. This happens in Clang tests that use 937 # preprocesser directives to exclude individual functions from some 938 # RUN lines. 939 self._func_dict[prefix][func] = None 940 941 def processed_prefixes(self, prefixes): 942 """ 943 Mark a set of prefixes as having had at least one applicable RUN line fully 944 processed. This is used to filter out function bodies that don't have 945 outputs for all RUN lines. 946 """ 947 self._processed_prefixes.update(prefixes) 948 949 def get_failed_prefixes(self): 950 # This returns the list of those prefixes that failed to match any function, 951 # because there were conflicting bodies produced by different RUN lines, in 952 # all instances of the prefix. 953 for prefix in self._func_dict: 954 if self._func_dict[prefix] and ( 955 not [ 956 fct 957 for fct in self._func_dict[prefix] 958 if self._func_dict[prefix][fct] is not None 959 ] 960 ): 961 yield prefix 962 963 964##### Generator of LLVM IR CHECK lines 965 966SCRUB_IR_COMMENT_RE = re.compile(r"\s*;.*") 967 968# TODO: We should also derive check lines for global, debug, loop declarations, etc.. 969 970 971class NamelessValue: 972 """ 973 A NamelessValue object represents a type of value in the IR whose "name" we 974 generalize in the generated check lines; where the "name" could be an actual 975 name (as in e.g. `@some_global` or `%x`) or just a number (as in e.g. `%12` 976 or `!4`). 977 """ 978 979 def __init__( 980 self, 981 check_prefix, 982 check_key, 983 ir_prefix, 984 ir_regexp, 985 global_ir_rhs_regexp, 986 *, 987 is_before_functions=False, 988 is_number=False, 989 replace_number_with_counter=False, 990 match_literally=False, 991 interlaced_with_previous=False, 992 ir_suffix=r"", 993 ): 994 self.check_prefix = check_prefix 995 self.check_key = check_key 996 self.ir_prefix = ir_prefix 997 self.ir_regexp = ir_regexp 998 self.ir_suffix = ir_suffix 999 self.global_ir_rhs_regexp = global_ir_rhs_regexp 1000 self.is_before_functions = is_before_functions 1001 self.is_number = is_number 1002 # Some variable numbers (e.g. MCINST1234) will change based on unrelated 1003 # modifications to LLVM, replace those with an incrementing counter. 1004 self.replace_number_with_counter = replace_number_with_counter 1005 self.match_literally = match_literally 1006 self.interlaced_with_previous = interlaced_with_previous 1007 self.variable_mapping = {} 1008 1009 # Return true if this kind of IR value is defined "locally" to functions, 1010 # which we assume is only the case precisely for LLVM IR local values. 1011 def is_local_def_ir_value(self): 1012 return self.check_key == "%" 1013 1014 # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals 1015 def get_ir_regex(self): 1016 # for backwards compatibility we check locals with '.*' 1017 if self.is_local_def_ir_value(): 1018 return ".*" 1019 return self.ir_regexp 1020 1021 # Create a FileCheck variable name based on an IR name. 1022 def get_value_name(self, var: str, check_prefix: str): 1023 var = var.replace("!", "") 1024 if self.replace_number_with_counter: 1025 assert var 1026 replacement = self.variable_mapping.get(var, None) 1027 if replacement is None: 1028 # Replace variable with an incrementing counter 1029 replacement = str(len(self.variable_mapping) + 1) 1030 self.variable_mapping[var] = replacement 1031 var = replacement 1032 # This is a nameless value, prepend check_prefix. 1033 if var.isdigit(): 1034 var = check_prefix + var 1035 else: 1036 # This is a named value that clashes with the check_prefix, prepend with 1037 # _prefix_filecheck_ir_name, if it has been defined. 1038 if ( 1039 may_clash_with_default_check_prefix_name(check_prefix, var) 1040 and _prefix_filecheck_ir_name 1041 ): 1042 var = _prefix_filecheck_ir_name + var 1043 var = var.replace(".", "_") 1044 var = var.replace("-", "_") 1045 return var.upper() 1046 1047 def get_affixes_from_match(self, match): 1048 prefix = re.match(self.ir_prefix, match.group(2)).group(0) 1049 suffix = re.search(self.ir_suffix + "$", match.group(2)).group(0) 1050 return prefix, suffix 1051 1052 1053class GeneralizerInfo: 1054 """ 1055 A GeneralizerInfo object holds information about how check lines should be generalized 1056 (e.g., variable names replaced by FileCheck meta variables) as well as per-test-file 1057 state (e.g. information about IR global variables). 1058 """ 1059 1060 MODE_IR = 0 1061 MODE_ASM = 1 1062 MODE_ANALYZE = 2 1063 1064 def __init__( 1065 self, 1066 version, 1067 mode, 1068 nameless_values: List[NamelessValue], 1069 regexp_prefix, 1070 regexp_suffix, 1071 ): 1072 self._version = version 1073 self._mode = mode 1074 self._nameless_values = nameless_values 1075 1076 self._regexp_prefix = regexp_prefix 1077 self._regexp_suffix = regexp_suffix 1078 1079 self._regexp, _ = self._build_regexp(False, False) 1080 ( 1081 self._unstable_globals_regexp, 1082 self._unstable_globals_values, 1083 ) = self._build_regexp(True, True) 1084 1085 def _build_regexp(self, globals_only, unstable_only): 1086 matches = [] 1087 values = [] 1088 for nameless_value in self._nameless_values: 1089 is_global = nameless_value.global_ir_rhs_regexp is not None 1090 if globals_only and not is_global: 1091 continue 1092 if unstable_only and nameless_value.match_literally: 1093 continue 1094 1095 match = f"(?:{nameless_value.ir_prefix}({nameless_value.ir_regexp}){nameless_value.ir_suffix})" 1096 if self.is_ir() and not globals_only and is_global: 1097 match = "^" + match 1098 matches.append(match) 1099 values.append(nameless_value) 1100 1101 regexp_string = r"|".join(matches) 1102 1103 return ( 1104 re.compile( 1105 self._regexp_prefix + r"(" + regexp_string + r")" + self._regexp_suffix 1106 ), 1107 values, 1108 ) 1109 1110 def get_version(self): 1111 return self._version 1112 1113 def is_ir(self): 1114 return self._mode == GeneralizerInfo.MODE_IR 1115 1116 def is_asm(self): 1117 return self._mode == GeneralizerInfo.MODE_ASM 1118 1119 def is_analyze(self): 1120 return self._mode == GeneralizerInfo.MODE_ANALYZE 1121 1122 def get_nameless_values(self): 1123 return self._nameless_values 1124 1125 def get_regexp(self): 1126 return self._regexp 1127 1128 def get_unstable_globals_regexp(self): 1129 return self._unstable_globals_regexp 1130 1131 # The entire match is group 0, the prefix has one group (=1), the entire 1132 # IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start. 1133 FIRST_NAMELESS_GROUP_IN_MATCH = 3 1134 1135 def get_match_info(self, match): 1136 """ 1137 Returns (name, nameless_value) for the given match object 1138 """ 1139 if match.re == self._regexp: 1140 values = self._nameless_values 1141 else: 1142 match.re == self._unstable_globals_regexp 1143 values = self._unstable_globals_values 1144 for i in range(len(values)): 1145 g = match.group(i + GeneralizerInfo.FIRST_NAMELESS_GROUP_IN_MATCH) 1146 if g is not None: 1147 return g, values[i] 1148 error("Unable to identify the kind of IR value from the match!") 1149 return None, None 1150 1151 # See get_idx_from_match 1152 def get_name_from_match(self, match): 1153 return self.get_match_info(match)[0] 1154 1155 def get_nameless_value_from_match(self, match) -> NamelessValue: 1156 return self.get_match_info(match)[1] 1157 1158 1159def make_ir_generalizer(version): 1160 values = [] 1161 1162 if version >= 5: 1163 values += [ 1164 NamelessValue(r"BB", "%", r"label %", r"[\w$.-]+?", None), 1165 NamelessValue(r"BB", "%", r"^", r"[\w$.-]+?", None, ir_suffix=r":"), 1166 ] 1167 1168 values += [ 1169 # check_prefix check_key ir_prefix ir_regexp global_ir_rhs_regexp 1170 NamelessValue(r"TMP", "%", r"%", r"[\w$.-]+?", None), 1171 NamelessValue(r"ATTR", "#", r"#", r"[0-9]+", None), 1172 NamelessValue(r"ATTR", "#", r"attributes #", r"[0-9]+", r"{[^}]*}"), 1173 NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", None), 1174 NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", r".+", is_before_functions=True), 1175 NamelessValue( 1176 r"GLOBNAMED", 1177 "@", 1178 r"@", 1179 r"[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*", 1180 r".+", 1181 is_before_functions=True, 1182 match_literally=True, 1183 interlaced_with_previous=True, 1184 ), 1185 NamelessValue(r"DBG", "!", r"!dbg ", r"![0-9]+", None), 1186 NamelessValue(r"DIASSIGNID", "!", r"!DIAssignID ", r"![0-9]+", None), 1187 NamelessValue(r"PROF", "!", r"!prof ", r"![0-9]+", None), 1188 NamelessValue(r"TBAA", "!", r"!tbaa ", r"![0-9]+", None), 1189 NamelessValue(r"TBAA_STRUCT", "!", r"!tbaa.struct ", r"![0-9]+", None), 1190 NamelessValue(r"RNG", "!", r"!range ", r"![0-9]+", None), 1191 NamelessValue(r"LOOP", "!", r"!llvm.loop ", r"![0-9]+", None), 1192 NamelessValue(r"META", "!", r"", r"![0-9]+", r"(?:distinct |)!.*"), 1193 NamelessValue(r"ACC_GRP", "!", r"!llvm.access.group ", r"![0-9]+", None), 1194 NamelessValue(r"META", "!", r"![a-z.]+ ", r"![0-9]+", None), 1195 NamelessValue(r"META", "!", r"[, (]", r"![0-9]+", None), 1196 ] 1197 1198 prefix = r"(\s*)" 1199 suffix = r"([,\s\(\)\}]|\Z)" 1200 1201 # values = [ 1202 # nameless_value 1203 # for nameless_value in IR_NAMELESS_VALUES 1204 # if not (globals_only and nameless_value.global_ir_rhs_regexp is None) and 1205 # not (unstable_ids_only and nameless_value.match_literally) 1206 # ] 1207 1208 return GeneralizerInfo(version, GeneralizerInfo.MODE_IR, values, prefix, suffix) 1209 1210 1211def make_asm_generalizer(version): 1212 values = [ 1213 NamelessValue( 1214 r"MCINST", 1215 "Inst#", 1216 "<MCInst #", 1217 r"\d+", 1218 r".+", 1219 is_number=True, 1220 replace_number_with_counter=True, 1221 ), 1222 NamelessValue( 1223 r"MCREG", 1224 "Reg:", 1225 "<MCOperand Reg:", 1226 r"\d+", 1227 r".+", 1228 is_number=True, 1229 replace_number_with_counter=True, 1230 ), 1231 ] 1232 1233 prefix = r"((?:#|//)\s*)" 1234 suffix = r"([>\s]|\Z)" 1235 1236 return GeneralizerInfo(version, GeneralizerInfo.MODE_ASM, values, prefix, suffix) 1237 1238 1239def make_analyze_generalizer(version): 1240 values = [ 1241 NamelessValue( 1242 r"GRP", 1243 "#", 1244 r"", 1245 r"0x[0-9a-f]+", 1246 None, 1247 replace_number_with_counter=True, 1248 ), 1249 ] 1250 1251 prefix = r"(\s*)" 1252 suffix = r"(\)?:)" 1253 1254 return GeneralizerInfo( 1255 version, GeneralizerInfo.MODE_ANALYZE, values, prefix, suffix 1256 ) 1257 1258 1259# Return true if var clashes with the scripted FileCheck check_prefix. 1260def may_clash_with_default_check_prefix_name(check_prefix, var): 1261 return check_prefix and re.match( 1262 r"^" + check_prefix + r"[0-9]+?$", var, re.IGNORECASE 1263 ) 1264 1265 1266def find_diff_matching(lhs: List[str], rhs: List[str]) -> List[tuple]: 1267 """ 1268 Find a large ordered matching between strings in lhs and rhs. 1269 1270 Think of this as finding the *unchanged* lines in a diff, where the entries 1271 of lhs and rhs are lines of the files being diffed. 1272 1273 Returns a list of matched (lhs_idx, rhs_idx) pairs. 1274 """ 1275 1276 if not lhs or not rhs: 1277 return [] 1278 1279 # Collect matches in reverse order. 1280 matches = [] 1281 1282 # First, collect a set of candidate matching edges. We limit this to a 1283 # constant multiple of the input size to avoid quadratic runtime. 1284 patterns = collections.defaultdict(lambda: ([], [])) 1285 1286 for idx in range(len(lhs)): 1287 patterns[lhs[idx]][0].append(idx) 1288 for idx in range(len(rhs)): 1289 patterns[rhs[idx]][1].append(idx) 1290 1291 multiple_patterns = [] 1292 1293 candidates = [] 1294 for pattern in patterns.values(): 1295 if not pattern[0] or not pattern[1]: 1296 continue 1297 1298 if len(pattern[0]) == len(pattern[1]) == 1: 1299 candidates.append((pattern[0][0], pattern[1][0])) 1300 else: 1301 multiple_patterns.append(pattern) 1302 1303 multiple_patterns.sort(key=lambda pattern: len(pattern[0]) * len(pattern[1])) 1304 1305 for pattern in multiple_patterns: 1306 if len(candidates) + len(pattern[0]) * len(pattern[1]) > 2 * ( 1307 len(lhs) + len(rhs) 1308 ): 1309 break 1310 for lhs_idx in pattern[0]: 1311 for rhs_idx in pattern[1]: 1312 candidates.append((lhs_idx, rhs_idx)) 1313 1314 if not candidates: 1315 # The LHS and RHS either share nothing in common, or lines are just too 1316 # identical. In that case, let's give up and not match anything. 1317 return [] 1318 1319 # Compute a maximal crossing-free matching via an algorithm that is 1320 # inspired by a mixture of dynamic programming and line-sweeping in 1321 # discrete geometry. 1322 # 1323 # I would be surprised if this algorithm didn't exist somewhere in the 1324 # literature, but I found it without consciously recalling any 1325 # references, so you'll have to make do with the explanation below. 1326 # Sorry. 1327 # 1328 # The underlying graph is bipartite: 1329 # - nodes on the LHS represent lines in the original check 1330 # - nodes on the RHS represent lines in the new (updated) check 1331 # 1332 # Nodes are implicitly sorted by the corresponding line number. 1333 # Edges (unique_matches) are sorted by the line number on the LHS. 1334 # 1335 # Here's the geometric intuition for the algorithm. 1336 # 1337 # * Plot the edges as points in the plane, with the original line 1338 # number on the X axis and the updated line number on the Y axis. 1339 # * The goal is to find a longest "chain" of points where each point 1340 # is strictly above and to the right of the previous point. 1341 # * The algorithm proceeds by sweeping a vertical line from left to 1342 # right. 1343 # * The algorithm maintains a table where `table[N]` answers the 1344 # question "What is currently the 'best' way to build a chain of N+1 1345 # points to the left of the vertical line". Here, 'best' means 1346 # that the last point of the chain is a as low as possible (minimal 1347 # Y coordinate). 1348 # * `table[N]` is `(y, point_idx)` where `point_idx` is the index of 1349 # the last point in the chain and `y` is its Y coordinate 1350 # * A key invariant is that the Y values in the table are 1351 # monotonically increasing 1352 # * Thanks to these properties, the table can be used to answer the 1353 # question "What is the longest chain that can be built to the left 1354 # of the vertical line using only points below a certain Y value", 1355 # using a binary search over the table. 1356 # * The algorithm also builds a backlink structure in which every point 1357 # links back to the previous point on a best (longest) chain ending 1358 # at that point 1359 # 1360 # The core loop of the algorithm sweeps the line and updates the table 1361 # and backlink structure for every point that we cross during the sweep. 1362 # Therefore, the algorithm is trivially O(M log M) in the number of 1363 # points. 1364 candidates.sort(key=lambda candidate: (candidate[0], -candidate[1])) 1365 1366 backlinks = [] 1367 table_rhs_idx = [] 1368 table_candidate_idx = [] 1369 for _, rhs_idx in candidates: 1370 candidate_idx = len(backlinks) 1371 ti = bisect.bisect_left(table_rhs_idx, rhs_idx) 1372 1373 # Update the table to record a best chain ending in the current point. 1374 # There always is one, and if any of the previously visited points had 1375 # a higher Y coordinate, then there is always a previously recorded best 1376 # chain that can be improved upon by using the current point. 1377 # 1378 # There is only one case where there is some ambiguity. If the 1379 # pre-existing entry table[ti] has the same Y coordinate / rhs_idx as 1380 # the current point (this can only happen if the same line appeared 1381 # multiple times on the LHS), then we could choose to keep the 1382 # previously recorded best chain instead. That would bias the algorithm 1383 # differently but should have no systematic impact on the quality of the 1384 # result. 1385 if ti < len(table_rhs_idx): 1386 table_rhs_idx[ti] = rhs_idx 1387 table_candidate_idx[ti] = candidate_idx 1388 else: 1389 table_rhs_idx.append(rhs_idx) 1390 table_candidate_idx.append(candidate_idx) 1391 if ti > 0: 1392 backlinks.append(table_candidate_idx[ti - 1]) 1393 else: 1394 backlinks.append(None) 1395 1396 # Commit to names in the matching by walking the backlinks. Recursively 1397 # attempt to fill in more matches in-betweem. 1398 match_idx = table_candidate_idx[-1] 1399 while match_idx is not None: 1400 current = candidates[match_idx] 1401 matches.append(current) 1402 match_idx = backlinks[match_idx] 1403 1404 matches.reverse() 1405 return matches 1406 1407 1408VARIABLE_TAG = "[[@@]]" 1409METAVAR_RE = re.compile(r"\[\[([A-Z0-9_]+)(?::[^]]+)?\]\]") 1410NUMERIC_SUFFIX_RE = re.compile(r"[0-9]*$") 1411 1412 1413class TestVar: 1414 def __init__(self, nameless_value: NamelessValue, prefix: str, suffix: str): 1415 self._nameless_value = nameless_value 1416 1417 self._prefix = prefix 1418 self._suffix = suffix 1419 1420 def seen(self, nameless_value: NamelessValue, prefix: str, suffix: str): 1421 if prefix != self._prefix: 1422 self._prefix = "" 1423 if suffix != self._suffix: 1424 self._suffix = "" 1425 1426 def get_variable_name(self, text): 1427 return self._nameless_value.get_value_name( 1428 text, self._nameless_value.check_prefix 1429 ) 1430 1431 def get_def(self, name, prefix, suffix): 1432 if self._nameless_value.is_number: 1433 return f"{prefix}[[#{name}:]]{suffix}" 1434 if self._prefix: 1435 assert self._prefix == prefix 1436 prefix = "" 1437 if self._suffix: 1438 assert self._suffix == suffix 1439 suffix = "" 1440 return f"{prefix}[[{name}:{self._prefix}{self._nameless_value.get_ir_regex()}{self._suffix}]]{suffix}" 1441 1442 def get_use(self, name, prefix, suffix): 1443 if self._nameless_value.is_number: 1444 return f"{prefix}[[#{name}]]{suffix}" 1445 if self._prefix: 1446 assert self._prefix == prefix 1447 prefix = "" 1448 if self._suffix: 1449 assert self._suffix == suffix 1450 suffix = "" 1451 return f"{prefix}[[{name}]]{suffix}" 1452 1453 1454class CheckValueInfo: 1455 def __init__( 1456 self, 1457 key, 1458 text, 1459 name: str, 1460 prefix: str, 1461 suffix: str, 1462 ): 1463 # Key for the value, e.g. '%' 1464 self.key = key 1465 1466 # Text to be matched by the FileCheck variable (without any prefix or suffix) 1467 self.text = text 1468 1469 # Name of the FileCheck variable 1470 self.name = name 1471 1472 # Prefix and suffix that were captured by the NamelessValue regular expression 1473 self.prefix = prefix 1474 self.suffix = suffix 1475 1476 1477# Represent a check line in a way that allows us to compare check lines while 1478# ignoring some or all of the FileCheck variable names. 1479class CheckLineInfo: 1480 def __init__(self, line, values): 1481 # Line with all FileCheck variable name occurrences replaced by VARIABLE_TAG 1482 self.line: str = line 1483 1484 # Information on each FileCheck variable name occurrences in the line 1485 self.values: List[CheckValueInfo] = values 1486 1487 def __repr__(self): 1488 return f"CheckLineInfo(line={self.line}, self.values={self.values})" 1489 1490 1491def remap_metavar_names( 1492 old_line_infos: List[CheckLineInfo], 1493 new_line_infos: List[CheckLineInfo], 1494 committed_names: Set[str], 1495) -> Mapping[str, str]: 1496 """ 1497 Map all FileCheck variable names that appear in new_line_infos to new 1498 FileCheck variable names in an attempt to reduce the diff from old_line_infos 1499 to new_line_infos. 1500 1501 This is done by: 1502 * Matching old check lines and new check lines using a diffing algorithm 1503 applied after replacing names with wildcards. 1504 * Committing to variable names such that the matched lines become equal 1505 (without wildcards) if possible 1506 * This is done recursively to handle cases where many lines are equal 1507 after wildcard replacement 1508 """ 1509 # Initialize uncommitted identity mappings 1510 new_mapping = {} 1511 for line in new_line_infos: 1512 for value in line.values: 1513 new_mapping[value.name] = value.name 1514 1515 # Recursively commit to the identity mapping or find a better one 1516 def recurse(old_begin, old_end, new_begin, new_end): 1517 if old_begin == old_end or new_begin == new_end: 1518 return 1519 1520 # Find a matching of lines where uncommitted names are replaced 1521 # with a placeholder. 1522 def diffify_line(line, mapper): 1523 values = [] 1524 for value in line.values: 1525 mapped = mapper(value.name) 1526 values.append(mapped if mapped in committed_names else "?") 1527 return line.line.strip() + " @@@ " + " @ ".join(values) 1528 1529 lhs_lines = [ 1530 diffify_line(line, lambda x: x) 1531 for line in old_line_infos[old_begin:old_end] 1532 ] 1533 rhs_lines = [ 1534 diffify_line(line, lambda x: new_mapping[x]) 1535 for line in new_line_infos[new_begin:new_end] 1536 ] 1537 1538 candidate_matches = find_diff_matching(lhs_lines, rhs_lines) 1539 1540 candidate_matches = [ 1541 (old_begin + lhs_idx, new_begin + rhs_idx) 1542 for lhs_idx, rhs_idx in candidate_matches 1543 ] 1544 1545 # Candidate matches may conflict if they require conflicting mappings of 1546 # names. We want to determine a large set of compatible candidates, 1547 # because that leads to a small diff. 1548 # 1549 # We think of the candidates as vertices in a conflict graph. The 1550 # conflict graph has edges between incompatible candidates. We want to 1551 # find a large independent set in this graph. 1552 # 1553 # Greedily selecting candidates and removing incompatible ones has the 1554 # disadvantage that making few bad decisions early on can have huge 1555 # consequences. 1556 # 1557 # Instead, we implicitly compute multiple independent sets by greedily 1558 # assigning a *coloring* to the conflict graph. Then, we select the 1559 # largest color class (which is the largest independent set we found), 1560 # commit to all candidates in it, and recurse. 1561 # 1562 # Note that we don't actually materialize the conflict graph. Instead, 1563 # each color class tracks the information needed to decide implicitly 1564 # whether a vertex conflicts (has an edge to) any of the vertices added 1565 # to the color class so far. 1566 class Color: 1567 def __init__(self): 1568 # (lhs_idx, rhs_idx) of matches in this color 1569 self.matches = [] 1570 1571 # rhs_name -> lhs_name mappings required by this color 1572 self.mapping = {} 1573 1574 # lhs_names committed for this color 1575 self.committed = set() 1576 1577 colors = [] 1578 1579 for lhs_idx, rhs_idx in candidate_matches: 1580 lhs_line = old_line_infos[lhs_idx] 1581 rhs_line = new_line_infos[rhs_idx] 1582 1583 # We scan through the uncommitted names in the candidate line and 1584 # filter out the color classes to which the candidate could be 1585 # assigned. 1586 # 1587 # Simultaneously, we prepare a new color class in case the candidate 1588 # conflicts with all colors that have been established so far. 1589 compatible_colors = colors[:] 1590 new_color = Color() 1591 new_color.matches.append((lhs_idx, rhs_idx)) 1592 1593 for lhs_value, rhs_value in zip(lhs_line.values, rhs_line.values): 1594 if new_mapping[rhs_value.name] in committed_names: 1595 # The new value has already been committed. If it was mapped 1596 # to the same name as the original value, we can consider 1597 # committing other values from this line. Otherwise, we 1598 # should ignore this line. 1599 if new_mapping[rhs_value.name] == lhs_value.name: 1600 continue 1601 else: 1602 break 1603 1604 if rhs_value.name in new_color.mapping: 1605 # Same, but for a possible commit happening on the same line 1606 if new_color.mapping[rhs_value.name] == lhs_value.name: 1607 continue 1608 else: 1609 break 1610 1611 if ( 1612 lhs_value.name in committed_names 1613 or lhs_value.name in new_color.committed 1614 ): 1615 # We can't map this value because the name we would map it 1616 # to has already been committed for something else. Give up 1617 # on this line. 1618 break 1619 1620 new_color.mapping[rhs_value.name] = lhs_value.name 1621 new_color.committed.add(lhs_value.name) 1622 1623 color_idx = 0 1624 while color_idx < len(compatible_colors): 1625 color = compatible_colors[color_idx] 1626 compatible = True 1627 if rhs_value.name in color.mapping: 1628 compatible = color.mapping[rhs_value.name] == lhs_value.name 1629 else: 1630 compatible = lhs_value.name not in color.committed 1631 if compatible: 1632 color_idx += 1 1633 else: 1634 del compatible_colors[color_idx] 1635 else: 1636 # We never broke out of the loop, which means that at a minimum, 1637 # this line is viable standalone 1638 if compatible_colors: 1639 color = max(compatible_colors, key=lambda color: len(color.matches)) 1640 color.mapping.update(new_color.mapping) 1641 color.committed.update(new_color.committed) 1642 color.matches.append((lhs_idx, rhs_idx)) 1643 else: 1644 colors.append(new_color) 1645 1646 if colors: 1647 # Pick the largest color class. This gives us a large independent 1648 # (non-conflicting) set of candidate matches. Assign all names 1649 # required by the independent set and recurse. 1650 max_color = max(colors, key=lambda color: len(color.matches)) 1651 1652 for rhs_var, lhs_var in max_color.mapping.items(): 1653 new_mapping[rhs_var] = lhs_var 1654 committed_names.add(lhs_var) 1655 1656 if ( 1657 lhs_var != rhs_var 1658 and lhs_var in new_mapping 1659 and new_mapping[lhs_var] == lhs_var 1660 ): 1661 new_mapping[lhs_var] = "conflict_" + lhs_var 1662 1663 matches = ( 1664 [(old_begin - 1, new_begin - 1)] 1665 + max_color.matches 1666 + [(old_end, new_end)] 1667 ) 1668 1669 for (lhs_prev, rhs_prev), (lhs_next, rhs_next) in zip(matches, matches[1:]): 1670 recurse(lhs_prev + 1, lhs_next, rhs_prev + 1, rhs_next) 1671 1672 recurse(0, len(old_line_infos), 0, len(new_line_infos)) 1673 1674 # Commit to remaining names and resolve conflicts 1675 for new_name, mapped_name in new_mapping.items(): 1676 if mapped_name in committed_names: 1677 continue 1678 if not mapped_name.startswith("conflict_"): 1679 assert mapped_name == new_name 1680 committed_names.add(mapped_name) 1681 1682 for new_name, mapped_name in new_mapping.items(): 1683 if mapped_name in committed_names: 1684 continue 1685 assert mapped_name.startswith("conflict_") 1686 1687 m = NUMERIC_SUFFIX_RE.search(new_name) 1688 base_name = new_name[: m.start()] 1689 suffix = int(new_name[m.start() :]) if m.start() != m.end() else 1 1690 while True: 1691 candidate = f"{base_name}{suffix}" 1692 if candidate not in committed_names: 1693 new_mapping[new_name] = candidate 1694 committed_names.add(candidate) 1695 break 1696 suffix += 1 1697 1698 return new_mapping 1699 1700 1701def generalize_check_lines( 1702 lines, 1703 ginfo: GeneralizerInfo, 1704 vars_seen, 1705 global_vars_seen, 1706 preserve_names=False, 1707 original_check_lines=None, 1708 *, 1709 unstable_globals_only=False, 1710): 1711 if unstable_globals_only: 1712 regexp = ginfo.get_unstable_globals_regexp() 1713 else: 1714 regexp = ginfo.get_regexp() 1715 1716 multiple_braces_re = re.compile(r"({{+)|(}}+)") 1717 1718 def escape_braces(match_obj): 1719 return "{{" + re.escape(match_obj.group(0)) + "}}" 1720 1721 if ginfo.is_ir(): 1722 for i, line in enumerate(lines): 1723 # An IR variable named '%.' matches the FileCheck regex string. 1724 line = line.replace("%.", "%dot") 1725 for regex in _global_hex_value_regex: 1726 if re.match("^@" + regex + " = ", line): 1727 line = re.sub( 1728 r"\bi([0-9]+) ([0-9]+)", 1729 lambda m: "i" 1730 + m.group(1) 1731 + " [[#" 1732 + hex(int(m.group(2))) 1733 + "]]", 1734 line, 1735 ) 1736 break 1737 # Ignore any comments, since the check lines will too. 1738 scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line) 1739 lines[i] = scrubbed_line 1740 1741 if not preserve_names: 1742 committed_names = set( 1743 test_var.get_variable_name(name) 1744 for (name, _), test_var in vars_seen.items() 1745 ) 1746 defs = set() 1747 1748 # Collect information about new check lines, and generalize global reference 1749 new_line_infos = [] 1750 for line in lines: 1751 filtered_line = "" 1752 values = [] 1753 while True: 1754 m = regexp.search(line) 1755 if m is None: 1756 filtered_line += line 1757 break 1758 1759 name = ginfo.get_name_from_match(m) 1760 nameless_value = ginfo.get_nameless_value_from_match(m) 1761 prefix, suffix = nameless_value.get_affixes_from_match(m) 1762 if may_clash_with_default_check_prefix_name( 1763 nameless_value.check_prefix, name 1764 ): 1765 warn( 1766 "Change IR value name '%s' or use --prefix-filecheck-ir-name to prevent possible conflict" 1767 " with scripted FileCheck name." % (name,) 1768 ) 1769 1770 # Record the variable as seen and (for locals) accumulate 1771 # prefixes/suffixes 1772 is_local_def = nameless_value.is_local_def_ir_value() 1773 if is_local_def: 1774 vars_dict = vars_seen 1775 else: 1776 vars_dict = global_vars_seen 1777 1778 key = (name, nameless_value.check_key) 1779 1780 if is_local_def: 1781 test_prefix = prefix 1782 test_suffix = suffix 1783 else: 1784 test_prefix = "" 1785 test_suffix = "" 1786 1787 if key in vars_dict: 1788 vars_dict[key].seen(nameless_value, test_prefix, test_suffix) 1789 else: 1790 vars_dict[key] = TestVar(nameless_value, test_prefix, test_suffix) 1791 defs.add(key) 1792 1793 var = vars_dict[key].get_variable_name(name) 1794 1795 # Replace with a [[@@]] tag, but be sure to keep the spaces and commas. 1796 filtered_line += ( 1797 line[: m.start()] + m.group(1) + VARIABLE_TAG + m.group(m.lastindex) 1798 ) 1799 line = line[m.end() :] 1800 1801 values.append( 1802 CheckValueInfo( 1803 key=nameless_value.check_key, 1804 text=name, 1805 name=var, 1806 prefix=prefix, 1807 suffix=suffix, 1808 ) 1809 ) 1810 1811 new_line_infos.append(CheckLineInfo(filtered_line, values)) 1812 1813 committed_names.update( 1814 test_var.get_variable_name(name) 1815 for (name, _), test_var in global_vars_seen.items() 1816 ) 1817 1818 # Collect information about original check lines, if any. 1819 orig_line_infos = [] 1820 for line in original_check_lines or []: 1821 filtered_line = "" 1822 values = [] 1823 while True: 1824 m = METAVAR_RE.search(line) 1825 if m is None: 1826 filtered_line += line 1827 break 1828 1829 # Replace with a [[@@]] tag, but be sure to keep the spaces and commas. 1830 filtered_line += line[: m.start()] + VARIABLE_TAG 1831 line = line[m.end() :] 1832 values.append( 1833 CheckValueInfo( 1834 key=None, 1835 text=None, 1836 name=m.group(1), 1837 prefix="", 1838 suffix="", 1839 ) 1840 ) 1841 orig_line_infos.append(CheckLineInfo(filtered_line, values)) 1842 1843 # Compute the variable name mapping 1844 mapping = remap_metavar_names(orig_line_infos, new_line_infos, committed_names) 1845 1846 # Apply the variable name mapping 1847 for i, line_info in enumerate(new_line_infos): 1848 line_template = line_info.line 1849 line = "" 1850 1851 for value in line_info.values: 1852 idx = line_template.find(VARIABLE_TAG) 1853 line += line_template[:idx] 1854 line_template = line_template[idx + len(VARIABLE_TAG) :] 1855 1856 key = (value.text, value.key) 1857 if value.key == "%": 1858 vars_dict = vars_seen 1859 else: 1860 vars_dict = global_vars_seen 1861 1862 if key in defs: 1863 line += vars_dict[key].get_def( 1864 mapping[value.name], value.prefix, value.suffix 1865 ) 1866 defs.remove(key) 1867 else: 1868 line += vars_dict[key].get_use( 1869 mapping[value.name], value.prefix, value.suffix 1870 ) 1871 1872 line += line_template 1873 1874 lines[i] = line 1875 1876 if ginfo.is_analyze(): 1877 for i, _ in enumerate(lines): 1878 # Escape multiple {{ or }} as {{}} denotes a FileCheck regex. 1879 scrubbed_line = multiple_braces_re.sub(escape_braces, lines[i]) 1880 lines[i] = scrubbed_line 1881 1882 return lines 1883 1884 1885def add_checks( 1886 output_lines, 1887 comment_marker, 1888 prefix_list, 1889 func_dict, 1890 func_name, 1891 check_label_format, 1892 ginfo, 1893 global_vars_seen_dict, 1894 is_filtered, 1895 preserve_names=False, 1896 original_check_lines: Mapping[str, List[str]] = {}, 1897): 1898 # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well. 1899 prefix_exclusions = set() 1900 printed_prefixes = [] 1901 for p in prefix_list: 1902 checkprefixes = p[0] 1903 # If not all checkprefixes of this run line produced the function we cannot check for it as it does not 1904 # exist for this run line. A subset of the check prefixes might know about the function but only because 1905 # other run lines created it. 1906 if any( 1907 map( 1908 lambda checkprefix: func_name not in func_dict[checkprefix], 1909 checkprefixes, 1910 ) 1911 ): 1912 prefix_exclusions |= set(checkprefixes) 1913 continue 1914 1915 # prefix_exclusions is constructed, we can now emit the output 1916 for p in prefix_list: 1917 global_vars_seen = {} 1918 checkprefixes = p[0] 1919 for checkprefix in checkprefixes: 1920 if checkprefix in global_vars_seen_dict: 1921 global_vars_seen.update(global_vars_seen_dict[checkprefix]) 1922 else: 1923 global_vars_seen_dict[checkprefix] = {} 1924 if checkprefix in printed_prefixes: 1925 break 1926 1927 # Check if the prefix is excluded. 1928 if checkprefix in prefix_exclusions: 1929 continue 1930 1931 # If we do not have output for this prefix we skip it. 1932 if not func_dict[checkprefix][func_name]: 1933 continue 1934 1935 # Add some space between different check prefixes, but not after the last 1936 # check line (before the test code). 1937 if ginfo.is_asm(): 1938 if len(printed_prefixes) != 0: 1939 output_lines.append(comment_marker) 1940 1941 if checkprefix not in global_vars_seen_dict: 1942 global_vars_seen_dict[checkprefix] = {} 1943 1944 global_vars_seen_before = [key for key in global_vars_seen.keys()] 1945 1946 vars_seen = {} 1947 printed_prefixes.append(checkprefix) 1948 attrs = str(func_dict[checkprefix][func_name].attrs) 1949 attrs = "" if attrs == "None" else attrs 1950 if ginfo.get_version() > 1: 1951 funcdef_attrs_and_ret = func_dict[checkprefix][ 1952 func_name 1953 ].funcdef_attrs_and_ret 1954 else: 1955 funcdef_attrs_and_ret = "" 1956 1957 if attrs: 1958 output_lines.append( 1959 "%s %s: Function Attrs: %s" % (comment_marker, checkprefix, attrs) 1960 ) 1961 args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig) 1962 if args_and_sig: 1963 args_and_sig = generalize_check_lines( 1964 [args_and_sig], 1965 ginfo, 1966 vars_seen, 1967 global_vars_seen, 1968 preserve_names, 1969 original_check_lines=[], 1970 )[0] 1971 func_name_separator = func_dict[checkprefix][func_name].func_name_separator 1972 if "[[" in args_and_sig: 1973 # Captures in label lines are not supported, thus split into a -LABEL 1974 # and a separate -SAME line that contains the arguments with captures. 1975 args_and_sig_prefix = "" 1976 if ginfo.get_version() >= 3 and args_and_sig.startswith("("): 1977 # Ensure the "(" separating function name and arguments is in the 1978 # label line. This is required in case of function names that are 1979 # prefixes of each other. Otherwise, the label line for "foo" might 1980 # incorrectly match on "foo.specialized". 1981 args_and_sig_prefix = args_and_sig[0] 1982 args_and_sig = args_and_sig[1:] 1983 1984 # Removing args_and_sig from the label match line requires 1985 # func_name_separator to be empty. Otherwise, the match will not work. 1986 assert func_name_separator == "" 1987 output_lines.append( 1988 check_label_format 1989 % ( 1990 checkprefix, 1991 funcdef_attrs_and_ret, 1992 func_name, 1993 args_and_sig_prefix, 1994 func_name_separator, 1995 ) 1996 ) 1997 output_lines.append( 1998 "%s %s-SAME: %s" % (comment_marker, checkprefix, args_and_sig) 1999 ) 2000 else: 2001 output_lines.append( 2002 check_label_format 2003 % ( 2004 checkprefix, 2005 funcdef_attrs_and_ret, 2006 func_name, 2007 args_and_sig, 2008 func_name_separator, 2009 ) 2010 ) 2011 func_body = str(func_dict[checkprefix][func_name]).splitlines() 2012 if not func_body: 2013 # We have filtered everything. 2014 continue 2015 2016 # For ASM output, just emit the check lines. 2017 if ginfo.is_asm(): 2018 body_start = 1 2019 if is_filtered: 2020 # For filtered output we don't add "-NEXT" so don't add extra spaces 2021 # before the first line. 2022 body_start = 0 2023 else: 2024 output_lines.append( 2025 "%s %s: %s" % (comment_marker, checkprefix, func_body[0]) 2026 ) 2027 func_lines = generalize_check_lines( 2028 func_body[body_start:], ginfo, vars_seen, global_vars_seen 2029 ) 2030 for func_line in func_lines: 2031 if func_line.strip() == "": 2032 output_lines.append( 2033 "%s %s-EMPTY:" % (comment_marker, checkprefix) 2034 ) 2035 else: 2036 check_suffix = "-NEXT" if not is_filtered else "" 2037 output_lines.append( 2038 "%s %s%s: %s" 2039 % (comment_marker, checkprefix, check_suffix, func_line) 2040 ) 2041 # Remember new global variables we have not seen before 2042 for key in global_vars_seen: 2043 if key not in global_vars_seen_before: 2044 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 2045 break 2046 # For analyze output, generalize the output, and emit CHECK-EMPTY lines as well. 2047 elif ginfo.is_analyze(): 2048 func_body = generalize_check_lines( 2049 func_body, ginfo, vars_seen, global_vars_seen 2050 ) 2051 for func_line in func_body: 2052 if func_line.strip() == "": 2053 output_lines.append( 2054 "{} {}-EMPTY:".format(comment_marker, checkprefix) 2055 ) 2056 else: 2057 check_suffix = "-NEXT" if not is_filtered else "" 2058 output_lines.append( 2059 "{} {}{}: {}".format( 2060 comment_marker, checkprefix, check_suffix, func_line 2061 ) 2062 ) 2063 2064 # Add space between different check prefixes and also before the first 2065 # line of code in the test function. 2066 output_lines.append(comment_marker) 2067 2068 # Remember new global variables we have not seen before 2069 for key in global_vars_seen: 2070 if key not in global_vars_seen_before: 2071 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 2072 break 2073 # For IR output, change all defs to FileCheck variables, so we're immune 2074 # to variable naming fashions. 2075 else: 2076 func_body = generalize_check_lines( 2077 func_body, 2078 ginfo, 2079 vars_seen, 2080 global_vars_seen, 2081 preserve_names, 2082 original_check_lines=original_check_lines.get(checkprefix), 2083 ) 2084 2085 # This could be selectively enabled with an optional invocation argument. 2086 # Disabled for now: better to check everything. Be safe rather than sorry. 2087 2088 # Handle the first line of the function body as a special case because 2089 # it's often just noise (a useless asm comment or entry label). 2090 # if func_body[0].startswith("#") or func_body[0].startswith("entry:"): 2091 # is_blank_line = True 2092 # else: 2093 # output_lines.append('%s %s: %s' % (comment_marker, checkprefix, func_body[0])) 2094 # is_blank_line = False 2095 2096 is_blank_line = False 2097 2098 for func_line in func_body: 2099 if func_line.strip() == "": 2100 is_blank_line = True 2101 continue 2102 # Do not waste time checking IR comments. 2103 func_line = SCRUB_IR_COMMENT_RE.sub(r"", func_line) 2104 2105 # Skip blank lines instead of checking them. 2106 if is_blank_line: 2107 output_lines.append( 2108 "{} {}: {}".format( 2109 comment_marker, checkprefix, func_line 2110 ) 2111 ) 2112 else: 2113 check_suffix = "-NEXT" if not is_filtered else "" 2114 output_lines.append( 2115 "{} {}{}: {}".format( 2116 comment_marker, checkprefix, check_suffix, func_line 2117 ) 2118 ) 2119 is_blank_line = False 2120 2121 # Add space between different check prefixes and also before the first 2122 # line of code in the test function. 2123 output_lines.append(comment_marker) 2124 2125 # Remember new global variables we have not seen before 2126 for key in global_vars_seen: 2127 if key not in global_vars_seen_before: 2128 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 2129 break 2130 return printed_prefixes 2131 2132 2133def add_ir_checks( 2134 output_lines, 2135 comment_marker, 2136 prefix_list, 2137 func_dict, 2138 func_name, 2139 preserve_names, 2140 function_sig, 2141 ginfo: GeneralizerInfo, 2142 global_vars_seen_dict, 2143 is_filtered, 2144 original_check_lines={}, 2145): 2146 assert ginfo.is_ir() 2147 # Label format is based on IR string. 2148 if function_sig and ginfo.get_version() > 1: 2149 function_def_regex = "define %s" 2150 elif function_sig: 2151 function_def_regex = "define {{[^@]+}}%s" 2152 else: 2153 function_def_regex = "%s" 2154 check_label_format = "{} %s-LABEL: {}@%s%s%s".format( 2155 comment_marker, function_def_regex 2156 ) 2157 return add_checks( 2158 output_lines, 2159 comment_marker, 2160 prefix_list, 2161 func_dict, 2162 func_name, 2163 check_label_format, 2164 ginfo, 2165 global_vars_seen_dict, 2166 is_filtered, 2167 preserve_names, 2168 original_check_lines=original_check_lines, 2169 ) 2170 2171 2172def add_analyze_checks( 2173 output_lines, 2174 comment_marker, 2175 prefix_list, 2176 func_dict, 2177 func_name, 2178 ginfo: GeneralizerInfo, 2179 is_filtered, 2180): 2181 assert ginfo.is_analyze() 2182 check_label_format = "{} %s-LABEL: '%s%s%s%s'".format(comment_marker) 2183 global_vars_seen_dict = {} 2184 return add_checks( 2185 output_lines, 2186 comment_marker, 2187 prefix_list, 2188 func_dict, 2189 func_name, 2190 check_label_format, 2191 ginfo, 2192 global_vars_seen_dict, 2193 is_filtered, 2194 ) 2195 2196 2197def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes, ginfo): 2198 for nameless_value in ginfo.get_nameless_values(): 2199 if nameless_value.global_ir_rhs_regexp is None: 2200 continue 2201 2202 lhs_re_str = nameless_value.ir_prefix + nameless_value.ir_regexp 2203 rhs_re_str = nameless_value.global_ir_rhs_regexp 2204 2205 global_ir_value_re_str = r"^" + lhs_re_str + r"\s=\s" + rhs_re_str + r"$" 2206 global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M)) 2207 lines = [] 2208 for m in global_ir_value_re.finditer(raw_tool_output): 2209 # Attach the substring's start index so that CHECK lines 2210 # can be sorted properly even if they are matched by different nameless values. 2211 # This is relevant for GLOB and GLOBNAMED since they may appear interlaced. 2212 lines.append((m.start(), m.group(0))) 2213 2214 for prefix in prefixes: 2215 if glob_val_dict[prefix] is None: 2216 continue 2217 if nameless_value.check_prefix in glob_val_dict[prefix]: 2218 if lines == glob_val_dict[prefix][nameless_value.check_prefix]: 2219 continue 2220 if prefix == prefixes[-1]: 2221 warn("Found conflicting asm under the same prefix: %r!" % (prefix,)) 2222 else: 2223 glob_val_dict[prefix][nameless_value.check_prefix] = None 2224 continue 2225 glob_val_dict[prefix][nameless_value.check_prefix] = lines 2226 2227 2228def filter_globals_according_to_preference( 2229 global_val_lines_w_index, global_vars_seen, nameless_value, global_check_setting 2230): 2231 if global_check_setting == "none": 2232 return [] 2233 if global_check_setting == "all": 2234 return global_val_lines_w_index 2235 assert global_check_setting == "smart" 2236 2237 if nameless_value.check_key == "#": 2238 # attribute sets are usually better checked by --check-attributes 2239 return [] 2240 2241 def extract(line, nv): 2242 p = ( 2243 "^" 2244 + nv.ir_prefix 2245 + "(" 2246 + nv.ir_regexp 2247 + ") = (" 2248 + nv.global_ir_rhs_regexp 2249 + ")" 2250 ) 2251 match = re.match(p, line) 2252 return (match.group(1), re.findall(nv.ir_regexp, match.group(2))) 2253 2254 transitively_visible = set() 2255 contains_refs_to = {} 2256 2257 def add(var): 2258 nonlocal transitively_visible 2259 nonlocal contains_refs_to 2260 if var in transitively_visible: 2261 return 2262 transitively_visible.add(var) 2263 if not var in contains_refs_to: 2264 return 2265 for x in contains_refs_to[var]: 2266 add(x) 2267 2268 for i, line in global_val_lines_w_index: 2269 (var, refs) = extract(line, nameless_value) 2270 contains_refs_to[var] = refs 2271 for var, check_key in global_vars_seen: 2272 if check_key != nameless_value.check_key: 2273 continue 2274 add(var) 2275 return [ 2276 (i, line) 2277 for i, line in global_val_lines_w_index 2278 if extract(line, nameless_value)[0] in transitively_visible 2279 ] 2280 2281 2282METADATA_FILTERS = [ 2283 ( 2284 r"(?<=\")(.+ )?(\w+ version )[\d.]+(?:[^\" ]*)(?: \([^)]+\))?", 2285 r"{{.*}}\2{{.*}}", 2286 ), # preface with glob also, to capture optional CLANG_VENDOR 2287 (r'(!DIFile\(filename: ".+", directory: )".+"', r"\1{{.*}}"), 2288] 2289METADATA_FILTERS_RE = [(re.compile(f), r) for (f, r) in METADATA_FILTERS] 2290 2291 2292def filter_unstable_metadata(line): 2293 for f, replacement in METADATA_FILTERS_RE: 2294 line = f.sub(replacement, line) 2295 return line 2296 2297 2298def flush_current_checks(output_lines, new_lines_w_index, comment_marker): 2299 if not new_lines_w_index: 2300 return 2301 output_lines.append(comment_marker + SEPARATOR) 2302 new_lines_w_index.sort() 2303 for _, line in new_lines_w_index: 2304 output_lines.append(line) 2305 new_lines_w_index.clear() 2306 2307 2308def add_global_checks( 2309 glob_val_dict, 2310 comment_marker, 2311 prefix_list, 2312 output_lines, 2313 ginfo: GeneralizerInfo, 2314 global_vars_seen_dict, 2315 preserve_names, 2316 is_before_functions, 2317 global_check_setting, 2318): 2319 printed_prefixes = set() 2320 output_lines_loc = {} # Allows GLOB and GLOBNAMED to be sorted correctly 2321 for nameless_value in ginfo.get_nameless_values(): 2322 if nameless_value.global_ir_rhs_regexp is None: 2323 continue 2324 if nameless_value.is_before_functions != is_before_functions: 2325 continue 2326 for p in prefix_list: 2327 global_vars_seen = {} 2328 checkprefixes = p[0] 2329 if checkprefixes is None: 2330 continue 2331 for checkprefix in checkprefixes: 2332 if checkprefix in global_vars_seen_dict: 2333 global_vars_seen.update(global_vars_seen_dict[checkprefix]) 2334 else: 2335 global_vars_seen_dict[checkprefix] = {} 2336 if (checkprefix, nameless_value.check_prefix) in printed_prefixes: 2337 break 2338 if not glob_val_dict[checkprefix]: 2339 continue 2340 if nameless_value.check_prefix not in glob_val_dict[checkprefix]: 2341 continue 2342 if not glob_val_dict[checkprefix][nameless_value.check_prefix]: 2343 continue 2344 2345 check_lines = [] 2346 global_vars_seen_before = [key for key in global_vars_seen.keys()] 2347 lines_w_index = glob_val_dict[checkprefix][nameless_value.check_prefix] 2348 lines_w_index = filter_globals_according_to_preference( 2349 lines_w_index, 2350 global_vars_seen_before, 2351 nameless_value, 2352 global_check_setting, 2353 ) 2354 for i, line in lines_w_index: 2355 if _global_value_regex: 2356 matched = False 2357 for regex in _global_value_regex: 2358 if re.match("^@" + regex + " = ", line) or re.match( 2359 "^!" + regex + " = ", line 2360 ): 2361 matched = True 2362 break 2363 if not matched: 2364 continue 2365 [new_line] = generalize_check_lines( 2366 [line], 2367 ginfo, 2368 {}, 2369 global_vars_seen, 2370 preserve_names, 2371 unstable_globals_only=True, 2372 ) 2373 new_line = filter_unstable_metadata(new_line) 2374 check_line = "%s %s: %s" % (comment_marker, checkprefix, new_line) 2375 check_lines.append((i, check_line)) 2376 if not check_lines: 2377 continue 2378 2379 if not checkprefix in output_lines_loc: 2380 output_lines_loc[checkprefix] = [] 2381 if not nameless_value.interlaced_with_previous: 2382 flush_current_checks( 2383 output_lines, output_lines_loc[checkprefix], comment_marker 2384 ) 2385 for check_line in check_lines: 2386 output_lines_loc[checkprefix].append(check_line) 2387 2388 printed_prefixes.add((checkprefix, nameless_value.check_prefix)) 2389 2390 # Remembe new global variables we have not seen before 2391 for key in global_vars_seen: 2392 if key not in global_vars_seen_before: 2393 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 2394 break 2395 2396 if printed_prefixes: 2397 for p in prefix_list: 2398 if p[0] is None: 2399 continue 2400 for checkprefix in p[0]: 2401 if checkprefix not in output_lines_loc: 2402 continue 2403 flush_current_checks( 2404 output_lines, output_lines_loc[checkprefix], comment_marker 2405 ) 2406 break 2407 output_lines.append(comment_marker + SEPARATOR) 2408 return printed_prefixes 2409 2410 2411def check_prefix(prefix): 2412 if not PREFIX_RE.match(prefix): 2413 hint = "" 2414 if "," in prefix: 2415 hint = " Did you mean '--check-prefixes=" + prefix + "'?" 2416 warn( 2417 ( 2418 "Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." 2419 + hint 2420 ) 2421 % (prefix) 2422 ) 2423 2424 2425def get_check_prefixes(filecheck_cmd): 2426 check_prefixes = [ 2427 item 2428 for m in CHECK_PREFIX_RE.finditer(filecheck_cmd) 2429 for item in m.group(1).split(",") 2430 ] 2431 if not check_prefixes: 2432 check_prefixes = ["CHECK"] 2433 return check_prefixes 2434 2435 2436def verify_filecheck_prefixes(fc_cmd): 2437 fc_cmd_parts = fc_cmd.split() 2438 for part in fc_cmd_parts: 2439 if "check-prefix=" in part: 2440 prefix = part.split("=", 1)[1] 2441 check_prefix(prefix) 2442 elif "check-prefixes=" in part: 2443 prefixes = part.split("=", 1)[1].split(",") 2444 for prefix in prefixes: 2445 check_prefix(prefix) 2446 if prefixes.count(prefix) > 1: 2447 warn( 2448 "Supplied prefix '%s' is not unique in the prefix list." 2449 % (prefix,) 2450 ) 2451 2452 2453def get_autogennote_suffix(parser, args): 2454 autogenerated_note_args = "" 2455 for action in parser._actions: 2456 if not hasattr(args, action.dest): 2457 continue # Ignore options such as --help that aren't included in args 2458 # Ignore parameters such as paths to the binary or the list of tests 2459 if action.dest in ( 2460 "tests", 2461 "update_only", 2462 "tool_binary", 2463 "opt_binary", 2464 "llc_binary", 2465 "clang", 2466 "opt", 2467 "llvm_bin", 2468 "verbose", 2469 "force_update", 2470 "reset_variable_names", 2471 ): 2472 continue 2473 value = getattr(args, action.dest) 2474 if action.dest == "check_globals": 2475 default_value = "none" if args.version < 4 else "smart" 2476 if value == default_value: 2477 continue 2478 autogenerated_note_args += action.option_strings[0] + " " 2479 if args.version < 4 and value == "all": 2480 continue 2481 autogenerated_note_args += "%s " % value 2482 continue 2483 if action.const is not None: # action stores a constant (usually True/False) 2484 # Skip actions with different constant values (this happens with boolean 2485 # --foo/--no-foo options) 2486 if value != action.const: 2487 continue 2488 if parser.get_default(action.dest) == value: 2489 continue # Don't add default values 2490 if action.dest == "function_signature" and args.version >= 2: 2491 continue # Enabled by default in version 2 2492 if action.dest == "filters": 2493 # Create a separate option for each filter element. The value is a list 2494 # of Filter objects. 2495 for elem in value: 2496 opt_name = "filter-out" if elem.is_filter_out else "filter" 2497 opt_value = elem.pattern() 2498 new_arg = '--%s "%s" ' % (opt_name, opt_value.strip('"')) 2499 if new_arg not in autogenerated_note_args: 2500 autogenerated_note_args += new_arg 2501 else: 2502 autogenerated_note_args += action.option_strings[0] + " " 2503 if action.const is None: # action takes a parameter 2504 if action.nargs == "+": 2505 value = " ".join(map(lambda v: '"' + v.strip('"') + '"', value)) 2506 autogenerated_note_args += "%s " % value 2507 if autogenerated_note_args: 2508 autogenerated_note_args = " %s %s" % ( 2509 UTC_ARGS_KEY, 2510 autogenerated_note_args[:-1], 2511 ) 2512 return autogenerated_note_args 2513 2514 2515def check_for_command(line, parser, args, argv, argparse_callback): 2516 cmd_m = UTC_ARGS_CMD.match(line) 2517 if cmd_m: 2518 for option in shlex.split(cmd_m.group("cmd").strip()): 2519 if option: 2520 argv.append(option) 2521 args = parse_args(parser, filter(lambda arg: arg not in args.tests, argv)) 2522 if argparse_callback is not None: 2523 argparse_callback(args) 2524 return args, argv 2525 2526 2527def find_arg_in_test(test_info, get_arg_to_check, arg_string, is_global): 2528 result = get_arg_to_check(test_info.args) 2529 if not result and is_global: 2530 # See if this has been specified via UTC_ARGS. This is a "global" option 2531 # that affects the entire generation of test checks. If it exists anywhere 2532 # in the test, apply it to everything. 2533 saw_line = False 2534 for line_info in test_info.ro_iterlines(): 2535 line = line_info.line 2536 if not line.startswith(";") and line.strip() != "": 2537 saw_line = True 2538 result = get_arg_to_check(line_info.args) 2539 if result: 2540 if warn and saw_line: 2541 # We saw the option after already reading some test input lines. 2542 # Warn about it. 2543 print( 2544 "WARNING: Found {} in line following test start: ".format( 2545 arg_string 2546 ) 2547 + line, 2548 file=sys.stderr, 2549 ) 2550 print( 2551 "WARNING: Consider moving {} to top of file".format(arg_string), 2552 file=sys.stderr, 2553 ) 2554 break 2555 return result 2556 2557 2558def dump_input_lines(output_lines, test_info, prefix_set, comment_string): 2559 for input_line_info in test_info.iterlines(output_lines): 2560 line = input_line_info.line 2561 args = input_line_info.args 2562 if line.strip() == comment_string: 2563 continue 2564 if line.strip() == comment_string + SEPARATOR: 2565 continue 2566 if line.lstrip().startswith(comment_string): 2567 m = CHECK_RE.match(line) 2568 if m and m.group(1) in prefix_set: 2569 continue 2570 output_lines.append(line.rstrip("\n")) 2571 2572 2573def add_checks_at_end( 2574 output_lines, prefix_list, func_order, comment_string, check_generator 2575): 2576 added = set() 2577 generated_prefixes = set() 2578 for prefix in prefix_list: 2579 prefixes = prefix[0] 2580 tool_args = prefix[1] 2581 for prefix in prefixes: 2582 for func in func_order[prefix]: 2583 # The func order can contain the same functions multiple times. 2584 # If we see one again we are done. 2585 if (func, prefix) in added: 2586 continue 2587 if added: 2588 output_lines.append(comment_string) 2589 2590 # The add_*_checks routines expect a run list whose items are 2591 # tuples that have a list of prefixes as their first element and 2592 # tool command args string as their second element. They output 2593 # checks for each prefix in the list of prefixes. By doing so, it 2594 # implicitly assumes that for each function every run line will 2595 # generate something for that function. That is not the case for 2596 # generated functions as some run lines might not generate them 2597 # (e.g. -fopenmp vs. no -fopenmp). 2598 # 2599 # Therefore, pass just the prefix we're interested in. This has 2600 # the effect of generating all of the checks for functions of a 2601 # single prefix before moving on to the next prefix. So checks 2602 # are ordered by prefix instead of by function as in "normal" 2603 # mode. 2604 for generated_prefix in check_generator( 2605 output_lines, [([prefix], tool_args)], func 2606 ): 2607 added.add((func, generated_prefix)) 2608 generated_prefixes.add(generated_prefix) 2609 return generated_prefixes 2610