1import argparse 2import bisect 3import collections 4import copy 5import glob 6import os 7import re 8import subprocess 9import sys 10import shlex 11 12from typing import List, Mapping, Set 13 14##### Common utilities for update_*test_checks.py 15 16 17_verbose = False 18_prefix_filecheck_ir_name = "" 19 20""" 21Version changelog: 22 231: Initial version, used by tests that don't specify --version explicitly. 242: --function-signature is now enabled by default and also checks return 25 type/attributes. 263: Opening parenthesis of function args is kept on the first LABEL line 27 in case arguments are split to a separate SAME line. 284: --check-globals now has a third option ('smart'). The others are now called 29 'none' and 'all'. 'smart' is the default. 305: Basic block labels are matched by FileCheck expressions 31""" 32DEFAULT_VERSION = 5 33 34 35SUPPORTED_ANALYSES = { 36 "Branch Probability Analysis", 37 "Cost Model Analysis", 38 "Loop Access Analysis", 39 "Scalar Evolution Analysis", 40} 41 42 43class Regex(object): 44 """Wrap a compiled regular expression object to allow deep copy of a regexp. 45 This is required for the deep copy done in do_scrub. 46 47 """ 48 49 def __init__(self, regex): 50 self.regex = regex 51 52 def __deepcopy__(self, memo): 53 result = copy.copy(self) 54 result.regex = self.regex 55 return result 56 57 def search(self, line): 58 return self.regex.search(line) 59 60 def sub(self, repl, line): 61 return self.regex.sub(repl, line) 62 63 def pattern(self): 64 return self.regex.pattern 65 66 def flags(self): 67 return self.regex.flags 68 69 70class Filter(Regex): 71 """Augment a Regex object with a flag indicating whether a match should be 72 added (!is_filter_out) or removed (is_filter_out) from the generated checks. 73 74 """ 75 76 def __init__(self, regex, is_filter_out): 77 super(Filter, self).__init__(regex) 78 self.is_filter_out = is_filter_out 79 80 def __deepcopy__(self, memo): 81 result = copy.deepcopy(super(Filter, self), memo) 82 result.is_filter_out = copy.deepcopy(self.is_filter_out, memo) 83 return result 84 85 86def parse_commandline_args(parser): 87 class RegexAction(argparse.Action): 88 """Add a regular expression option value to a list of regular expressions. 89 This compiles the expression, wraps it in a Regex and adds it to the option 90 value list.""" 91 92 def __init__(self, option_strings, dest, nargs=None, **kwargs): 93 if nargs is not None: 94 raise ValueError("nargs not allowed") 95 super(RegexAction, self).__init__(option_strings, dest, **kwargs) 96 97 def do_call(self, namespace, values, flags): 98 value_list = getattr(namespace, self.dest) 99 if value_list is None: 100 value_list = [] 101 102 try: 103 value_list.append(Regex(re.compile(values, flags))) 104 except re.error as error: 105 raise ValueError( 106 "{}: Invalid regular expression '{}' ({})".format( 107 option_string, error.pattern, error.msg 108 ) 109 ) 110 111 setattr(namespace, self.dest, value_list) 112 113 def __call__(self, parser, namespace, values, option_string=None): 114 self.do_call(namespace, values, 0) 115 116 class FilterAction(RegexAction): 117 """Add a filter to a list of filter option values.""" 118 119 def __init__(self, option_strings, dest, nargs=None, **kwargs): 120 super(FilterAction, self).__init__(option_strings, dest, nargs, **kwargs) 121 122 def __call__(self, parser, namespace, values, option_string=None): 123 super(FilterAction, self).__call__(parser, namespace, values, option_string) 124 125 value_list = getattr(namespace, self.dest) 126 127 is_filter_out = option_string == "--filter-out" 128 129 value_list[-1] = Filter(value_list[-1].regex, is_filter_out) 130 131 setattr(namespace, self.dest, value_list) 132 133 filter_group = parser.add_argument_group( 134 "filtering", 135 """Filters are applied to each output line according to the order given. The 136 first matching filter terminates filter processing for that current line.""", 137 ) 138 139 filter_group.add_argument( 140 "--filter", 141 action=FilterAction, 142 dest="filters", 143 metavar="REGEX", 144 help="Only include lines matching REGEX (may be specified multiple times)", 145 ) 146 filter_group.add_argument( 147 "--filter-out", 148 action=FilterAction, 149 dest="filters", 150 metavar="REGEX", 151 help="Exclude lines matching REGEX", 152 ) 153 154 parser.add_argument( 155 "--include-generated-funcs", 156 action="store_true", 157 help="Output checks for functions not in source", 158 ) 159 parser.add_argument( 160 "-v", "--verbose", action="store_true", help="Show verbose output" 161 ) 162 parser.add_argument( 163 "-u", 164 "--update-only", 165 action="store_true", 166 help="Only update test if it was already autogened", 167 ) 168 parser.add_argument( 169 "--force-update", 170 action="store_true", 171 help="Update test even if it was autogened by a different script", 172 ) 173 parser.add_argument( 174 "--enable", 175 action="store_true", 176 dest="enabled", 177 default=True, 178 help="Activate CHECK line generation from this point forward", 179 ) 180 parser.add_argument( 181 "--disable", 182 action="store_false", 183 dest="enabled", 184 help="Deactivate CHECK line generation from this point forward", 185 ) 186 parser.add_argument( 187 "--replace-value-regex", 188 nargs="+", 189 default=[], 190 help="List of regular expressions to replace matching value names", 191 ) 192 parser.add_argument( 193 "--prefix-filecheck-ir-name", 194 default="", 195 help="Add a prefix to FileCheck IR value names to avoid conflicts with scripted names", 196 ) 197 parser.add_argument( 198 "--global-value-regex", 199 nargs="+", 200 default=[], 201 help="List of regular expressions that a global value declaration must match to generate a check (has no effect if checking globals is not enabled)", 202 ) 203 parser.add_argument( 204 "--global-hex-value-regex", 205 nargs="+", 206 default=[], 207 help="List of regular expressions such that, for matching global value declarations, literal integer values should be encoded in hex in the associated FileCheck directives", 208 ) 209 # FIXME: in 3.9, we can use argparse.BooleanOptionalAction. At that point, 210 # we need to rename the flag to just -generate-body-for-unused-prefixes. 211 parser.add_argument( 212 "--no-generate-body-for-unused-prefixes", 213 action="store_false", 214 dest="gen_unused_prefix_body", 215 default=True, 216 help="Generate a function body that always matches for unused prefixes. This is useful when unused prefixes are desired, and it avoids needing to annotate each FileCheck as allowing them.", 217 ) 218 # This is the default when regenerating existing tests. The default when 219 # generating new tests is determined by DEFAULT_VERSION. 220 parser.add_argument( 221 "--version", type=int, default=1, help="The version of output format" 222 ) 223 args = parser.parse_args() 224 # TODO: This should not be handled differently from the other options 225 global _verbose, _global_value_regex, _global_hex_value_regex 226 _verbose = args.verbose 227 _global_value_regex = args.global_value_regex 228 _global_hex_value_regex = args.global_hex_value_regex 229 return args 230 231 232def parse_args(parser, argv): 233 args = parser.parse_args(argv) 234 if args.version >= 2: 235 args.function_signature = True 236 # TODO: This should not be handled differently from the other options 237 global _verbose, _global_value_regex, _global_hex_value_regex 238 _verbose = args.verbose 239 _global_value_regex = args.global_value_regex 240 _global_hex_value_regex = args.global_hex_value_regex 241 if "check_globals" in args and args.check_globals == "default": 242 args.check_globals = "none" if args.version < 4 else "smart" 243 return args 244 245 246class InputLineInfo(object): 247 def __init__(self, line, line_number, args, argv): 248 self.line = line 249 self.line_number = line_number 250 self.args = args 251 self.argv = argv 252 253 254class TestInfo(object): 255 def __init__( 256 self, 257 test, 258 parser, 259 script_name, 260 input_lines, 261 args, 262 argv, 263 comment_prefix, 264 argparse_callback, 265 ): 266 self.parser = parser 267 self.argparse_callback = argparse_callback 268 self.path = test 269 self.args = args 270 if args.prefix_filecheck_ir_name: 271 global _prefix_filecheck_ir_name 272 _prefix_filecheck_ir_name = args.prefix_filecheck_ir_name 273 self.argv = argv 274 self.input_lines = input_lines 275 self.run_lines = find_run_lines(test, self.input_lines) 276 self.comment_prefix = comment_prefix 277 if self.comment_prefix is None: 278 if self.path.endswith(".mir"): 279 self.comment_prefix = "#" 280 else: 281 self.comment_prefix = ";" 282 self.autogenerated_note_prefix = self.comment_prefix + " " + UTC_ADVERT 283 self.test_autogenerated_note = self.autogenerated_note_prefix + script_name 284 self.test_autogenerated_note += get_autogennote_suffix(parser, self.args) 285 self.test_unused_note = ( 286 self.comment_prefix + self.comment_prefix + " " + UNUSED_NOTE 287 ) 288 289 def ro_iterlines(self): 290 for line_num, input_line in enumerate(self.input_lines): 291 args, argv = check_for_command( 292 input_line, self.parser, self.args, self.argv, self.argparse_callback 293 ) 294 yield InputLineInfo(input_line, line_num, args, argv) 295 296 def iterlines(self, output_lines): 297 output_lines.append(self.test_autogenerated_note) 298 for line_info in self.ro_iterlines(): 299 input_line = line_info.line 300 # Discard any previous script advertising. 301 if input_line.startswith(self.autogenerated_note_prefix): 302 continue 303 self.args = line_info.args 304 self.argv = line_info.argv 305 if not self.args.enabled: 306 output_lines.append(input_line) 307 continue 308 yield line_info 309 310 def get_checks_for_unused_prefixes( 311 self, run_list, used_prefixes: List[str] 312 ) -> List[str]: 313 run_list = [element for element in run_list if element[0] is not None] 314 unused_prefixes = set( 315 [prefix for sublist in run_list for prefix in sublist[0]] 316 ).difference(set(used_prefixes)) 317 318 ret = [] 319 if not unused_prefixes: 320 return ret 321 ret.append(self.test_unused_note) 322 for unused in sorted(unused_prefixes): 323 ret.append( 324 "{comment} {prefix}: {match_everything}".format( 325 comment=self.comment_prefix, 326 prefix=unused, 327 match_everything=r"""{{.*}}""", 328 ) 329 ) 330 return ret 331 332 333def itertests( 334 test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None 335): 336 for pattern in test_patterns: 337 # On Windows we must expand the patterns ourselves. 338 tests_list = glob.glob(pattern) 339 if not tests_list: 340 warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,)) 341 continue 342 for test in tests_list: 343 with open(test) as f: 344 input_lines = [l.rstrip() for l in f] 345 first_line = input_lines[0] if input_lines else "" 346 if UTC_AVOID in first_line: 347 warn("Skipping test that must not be autogenerated: " + test) 348 continue 349 is_regenerate = UTC_ADVERT in first_line 350 351 # If we're generating a new test, set the default version to the latest. 352 argv = sys.argv[:] 353 if not is_regenerate: 354 argv.insert(1, "--version=" + str(DEFAULT_VERSION)) 355 356 args = parse_args(parser, argv[1:]) 357 if argparse_callback is not None: 358 argparse_callback(args) 359 if is_regenerate: 360 if script_name not in first_line and not args.force_update: 361 warn( 362 "Skipping test which wasn't autogenerated by " + script_name, 363 test, 364 ) 365 continue 366 args, argv = check_for_command( 367 first_line, parser, args, argv, argparse_callback 368 ) 369 elif args.update_only: 370 assert UTC_ADVERT not in first_line 371 warn("Skipping test which isn't autogenerated: " + test) 372 continue 373 final_input_lines = [] 374 for l in input_lines: 375 if UNUSED_NOTE in l: 376 break 377 final_input_lines.append(l) 378 yield TestInfo( 379 test, 380 parser, 381 script_name, 382 final_input_lines, 383 args, 384 argv, 385 comment_prefix, 386 argparse_callback, 387 ) 388 389 390def should_add_line_to_output( 391 input_line, 392 prefix_set, 393 *, 394 skip_global_checks=False, 395 skip_same_checks=False, 396 comment_marker=";", 397): 398 # Skip any blank comment lines in the IR. 399 if not skip_global_checks and input_line.strip() == comment_marker: 400 return False 401 # Skip a special double comment line we use as a separator. 402 if input_line.strip() == comment_marker + SEPARATOR: 403 return False 404 # Skip any blank lines in the IR. 405 # if input_line.strip() == '': 406 # return False 407 # And skip any CHECK lines. We're building our own. 408 m = CHECK_RE.match(input_line) 409 if m and m.group(1) in prefix_set: 410 if skip_same_checks and CHECK_SAME_RE.match(input_line): 411 # The previous CHECK line was removed, so don't leave this dangling 412 return False 413 if skip_global_checks: 414 # Skip checks only if they are of global value definitions 415 global_ir_value_re = re.compile(r"(\[\[|@)", flags=(re.M)) 416 is_global = global_ir_value_re.search(input_line) 417 return not is_global 418 return False 419 420 return True 421 422 423def collect_original_check_lines(ti: TestInfo, prefix_set: set): 424 """ 425 Collect pre-existing check lines into a dictionary `result` which is 426 returned. 427 428 result[func_name][prefix] is filled with a list of right-hand-sides of check 429 lines. 430 """ 431 result = collections.defaultdict(lambda: {}) 432 433 current_prefix = None 434 current_function = None 435 for input_line_info in ti.ro_iterlines(): 436 input_line = input_line_info.line 437 if input_line.lstrip().startswith(";"): 438 m = CHECK_RE.match(input_line) 439 if m is not None: 440 prefix = m.group(1) 441 check_kind = m.group(2) 442 line = input_line[m.end() :].strip() 443 444 if prefix != current_prefix: 445 current_function = None 446 current_prefix = None 447 448 if check_kind not in ["LABEL", "SAME"]: 449 if current_function is not None: 450 current_function.append(line) 451 continue 452 453 if check_kind == "SAME": 454 continue 455 456 if check_kind == "LABEL": 457 m = IR_FUNCTION_RE.match(line) 458 if m is not None: 459 func_name = m.group(1) 460 if ( 461 ti.args.function is not None 462 and func_name != ti.args.function 463 ): 464 # When filtering on a specific function, skip all others. 465 continue 466 467 current_prefix = prefix 468 current_function = result[func_name][prefix] = [] 469 continue 470 471 current_function = None 472 473 return result 474 475 476# Perform lit-like substitutions 477def getSubstitutions(sourcepath): 478 sourcedir = os.path.dirname(sourcepath) 479 return [ 480 ("%s", sourcepath), 481 ("%S", sourcedir), 482 ("%p", sourcedir), 483 ("%{pathsep}", os.pathsep), 484 ] 485 486 487def applySubstitutions(s, substitutions): 488 for a, b in substitutions: 489 s = s.replace(a, b) 490 return s 491 492 493# Invoke the tool that is being tested. 494def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False): 495 with open(ir) as ir_file: 496 substitutions = getSubstitutions(ir) 497 498 # TODO Remove the str form which is used by update_test_checks.py and 499 # update_llc_test_checks.py 500 # The safer list form is used by update_cc_test_checks.py 501 if preprocess_cmd: 502 # Allow pre-processing the IR file (e.g. using sed): 503 assert isinstance( 504 preprocess_cmd, str 505 ) # TODO: use a list instead of using shell 506 preprocess_cmd = applySubstitutions(preprocess_cmd, substitutions).strip() 507 if verbose: 508 print( 509 "Pre-processing input file: ", 510 ir, 511 " with command '", 512 preprocess_cmd, 513 "'", 514 sep="", 515 file=sys.stderr, 516 ) 517 pp = subprocess.Popen( 518 preprocess_cmd, 519 shell=True, 520 stdin=subprocess.DEVNULL, 521 stdout=subprocess.PIPE, 522 ) 523 ir_file = pp.stdout 524 525 if isinstance(cmd_args, list): 526 args = [applySubstitutions(a, substitutions) for a in cmd_args] 527 stdout = subprocess.check_output([exe] + args, stdin=ir_file) 528 else: 529 stdout = subprocess.check_output( 530 exe + " " + applySubstitutions(cmd_args, substitutions), 531 shell=True, 532 stdin=ir_file, 533 ) 534 if sys.version_info[0] > 2: 535 # FYI, if you crashed here with a decode error, your run line probably 536 # results in bitcode or other binary format being written to the pipe. 537 # For an opt test, you probably want to add -S or -disable-output. 538 stdout = stdout.decode() 539 # Fix line endings to unix CR style. 540 return stdout.replace("\r\n", "\n") 541 542 543##### LLVM IR parser 544RUN_LINE_RE = re.compile(r"^\s*(?://|[;#])\s*RUN:\s*(.*)$") 545CHECK_PREFIX_RE = re.compile(r"--?check-prefix(?:es)?[= ](\S+)") 546PREFIX_RE = re.compile("^[a-zA-Z0-9_-]+$") 547CHECK_RE = re.compile( 548 r"^\s*(?://|[;#])\s*([^:]+?)(?:-(NEXT|NOT|DAG|LABEL|SAME|EMPTY))?:" 549) 550CHECK_SAME_RE = re.compile(r"^\s*(?://|[;#])\s*([^:]+?)(?:-SAME)?:") 551 552UTC_ARGS_KEY = "UTC_ARGS:" 553UTC_ARGS_CMD = re.compile(r".*" + UTC_ARGS_KEY + r"\s*(?P<cmd>.*)\s*$") 554UTC_ADVERT = "NOTE: Assertions have been autogenerated by " 555UTC_AVOID = "NOTE: Do not autogenerate" 556UNUSED_NOTE = "NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:" 557 558OPT_FUNCTION_RE = re.compile( 559 r"^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s():,]+?))?\s*define\s+(?P<funcdef_attrs_and_ret>[^@]*)@(?P<func>[\w.$-]+?)\s*" 560 r"(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P<body>.*?)^\}$", 561 flags=(re.M | re.S), 562) 563 564ANALYZE_FUNCTION_RE = re.compile( 565 r"^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.$-]+?)\':" 566 r"\s*\n(?P<body>.*)$", 567 flags=(re.X | re.S), 568) 569 570LOOP_PASS_DEBUG_RE = re.compile( 571 r"^\s*\'(?P<func>[\w.$-]+?)\'[^\n]*" r"\s*\n(?P<body>.*)$", flags=(re.X | re.S) 572) 573 574IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(') 575TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$') 576TRIPLE_ARG_RE = re.compile(r"-m?triple[= ]([^ ]+)") 577MARCH_ARG_RE = re.compile(r"-march[= ]([^ ]+)") 578DEBUG_ONLY_ARG_RE = re.compile(r"-debug-only[= ]([^ ]+)") 579 580SCRUB_LEADING_WHITESPACE_RE = re.compile(r"^(\s+)") 581SCRUB_WHITESPACE_RE = re.compile(r"(?!^(| \w))[ \t]+", flags=re.M) 582SCRUB_PRESERVE_LEADING_WHITESPACE_RE = re.compile(r"((?!^)[ \t]*(\S))[ \t]+") 583SCRUB_TRAILING_WHITESPACE_RE = re.compile(r"[ \t]+$", flags=re.M) 584SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE 585SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile( 586 r"([ \t]|(#[0-9]+))+$", flags=re.M 587) 588SCRUB_KILL_COMMENT_RE = re.compile(r"^ *#+ +kill:.*\n") 589SCRUB_LOOP_COMMENT_RE = re.compile( 590 r"# =>This Inner Loop Header:.*|# in Loop:.*", flags=re.M 591) 592SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r"(?<=\S)+[ \t]*#$", flags=re.M) 593 594SEPARATOR = "." 595 596 597def error(msg, test_file=None): 598 if test_file: 599 msg = "{}: {}".format(msg, test_file) 600 print("ERROR: {}".format(msg), file=sys.stderr) 601 602 603def warn(msg, test_file=None): 604 if test_file: 605 msg = "{}: {}".format(msg, test_file) 606 print("WARNING: {}".format(msg), file=sys.stderr) 607 608 609def debug(*args, **kwargs): 610 # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs): 611 if "file" not in kwargs: 612 kwargs["file"] = sys.stderr 613 if _verbose: 614 print(*args, **kwargs) 615 616 617def find_run_lines(test, lines): 618 debug("Scanning for RUN lines in test file:", test) 619 raw_lines = [m.group(1) for m in [RUN_LINE_RE.match(l) for l in lines] if m] 620 run_lines = [raw_lines[0]] if len(raw_lines) > 0 else [] 621 for l in raw_lines[1:]: 622 if run_lines[-1].endswith("\\"): 623 run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l 624 else: 625 run_lines.append(l) 626 debug("Found {} RUN lines in {}:".format(len(run_lines), test)) 627 for l in run_lines: 628 debug(" RUN: {}".format(l)) 629 return run_lines 630 631 632def get_triple_from_march(march): 633 triples = { 634 "amdgcn": "amdgcn", 635 "r600": "r600", 636 "mips": "mips", 637 "nvptx64": "nvptx64", 638 "sparc": "sparc", 639 "hexagon": "hexagon", 640 "ve": "ve", 641 } 642 for prefix, triple in triples.items(): 643 if march.startswith(prefix): 644 return triple 645 print("Cannot find a triple. Assume 'x86'", file=sys.stderr) 646 return "x86" 647 648 649def apply_filters(line, filters): 650 has_filter = False 651 for f in filters: 652 if not f.is_filter_out: 653 has_filter = True 654 if f.search(line): 655 return False if f.is_filter_out else True 656 # If we only used filter-out, keep the line, otherwise discard it since no 657 # filter matched. 658 return False if has_filter else True 659 660 661def do_filter(body, filters): 662 return ( 663 body 664 if not filters 665 else "\n".join( 666 filter(lambda line: apply_filters(line, filters), body.splitlines()) 667 ) 668 ) 669 670 671def scrub_body(body): 672 # Scrub runs of whitespace out of the assembly, but leave the leading 673 # whitespace in place. 674 body = SCRUB_PRESERVE_LEADING_WHITESPACE_RE.sub(lambda m: m.group(2) + " ", body) 675 676 # Expand the tabs used for indentation. 677 body = str.expandtabs(body, 2) 678 # Strip trailing whitespace. 679 body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r"", body) 680 return body 681 682 683def do_scrub(body, scrubber, scrubber_args, extra): 684 if scrubber_args: 685 local_args = copy.deepcopy(scrubber_args) 686 local_args[0].extra_scrub = extra 687 return scrubber(body, *local_args) 688 return scrubber(body, *scrubber_args) 689 690 691# Build up a dictionary of all the function bodies. 692class function_body(object): 693 def __init__( 694 self, 695 string, 696 extra, 697 funcdef_attrs_and_ret, 698 args_and_sig, 699 attrs, 700 func_name_separator, 701 ginfo, 702 ): 703 self.scrub = string 704 self.extrascrub = extra 705 self.funcdef_attrs_and_ret = funcdef_attrs_and_ret 706 self.args_and_sig = args_and_sig 707 self.attrs = attrs 708 self.func_name_separator = func_name_separator 709 self._ginfo = ginfo 710 711 def is_same_except_arg_names( 712 self, extrascrub, funcdef_attrs_and_ret, args_and_sig, attrs 713 ): 714 arg_names = set() 715 716 def drop_arg_names(match): 717 nameless_value = self._ginfo.get_nameless_value_from_match(match) 718 if nameless_value.check_key == "%": 719 arg_names.add(self._ginfo.get_name_from_match(match)) 720 substitute = "" 721 else: 722 substitute = match.group(2) 723 return match.group(1) + substitute + match.group(match.lastindex) 724 725 def repl_arg_names(match): 726 nameless_value = self._ginfo.get_nameless_value_from_match(match) 727 if ( 728 nameless_value.check_key == "%" 729 and self._ginfo.get_name_from_match(match) in arg_names 730 ): 731 return match.group(1) + match.group(match.lastindex) 732 return match.group(1) + match.group(2) + match.group(match.lastindex) 733 734 if self.funcdef_attrs_and_ret != funcdef_attrs_and_ret: 735 return False 736 if self.attrs != attrs: 737 return False 738 739 regexp = self._ginfo.get_regexp() 740 ans0 = regexp.sub(drop_arg_names, self.args_and_sig) 741 ans1 = regexp.sub(drop_arg_names, args_and_sig) 742 if ans0 != ans1: 743 return False 744 if self._ginfo.is_asm(): 745 # Check without replacements, the replacements are not applied to the 746 # body for backend checks. 747 return self.extrascrub == extrascrub 748 749 es0 = regexp.sub(repl_arg_names, self.extrascrub) 750 es1 = regexp.sub(repl_arg_names, extrascrub) 751 es0 = SCRUB_IR_COMMENT_RE.sub(r"", es0) 752 es1 = SCRUB_IR_COMMENT_RE.sub(r"", es1) 753 return es0 == es1 754 755 def __str__(self): 756 return self.scrub 757 758 759class FunctionTestBuilder: 760 def __init__(self, run_list, flags, scrubber_args, path, ginfo): 761 self._verbose = flags.verbose 762 self._record_args = flags.function_signature 763 self._check_attributes = flags.check_attributes 764 # Strip double-quotes if input was read by UTC_ARGS 765 self._filters = ( 766 list( 767 map( 768 lambda f: Filter( 769 re.compile(f.pattern().strip('"'), f.flags()), f.is_filter_out 770 ), 771 flags.filters, 772 ) 773 ) 774 if flags.filters 775 else [] 776 ) 777 self._scrubber_args = scrubber_args 778 self._path = path 779 self._ginfo = ginfo 780 # Strip double-quotes if input was read by UTC_ARGS 781 self._replace_value_regex = list( 782 map(lambda x: x.strip('"'), flags.replace_value_regex) 783 ) 784 self._func_dict = {} 785 self._func_order = {} 786 self._global_var_dict = {} 787 self._processed_prefixes = set() 788 for tuple in run_list: 789 for prefix in tuple[0]: 790 self._func_dict.update({prefix: dict()}) 791 self._func_order.update({prefix: []}) 792 self._global_var_dict.update({prefix: dict()}) 793 794 def finish_and_get_func_dict(self): 795 for prefix in self.get_failed_prefixes(): 796 warn( 797 "Prefix %s had conflicting output from different RUN lines for all functions in test %s" 798 % ( 799 prefix, 800 self._path, 801 ) 802 ) 803 return self._func_dict 804 805 def func_order(self): 806 return self._func_order 807 808 def global_var_dict(self): 809 return self._global_var_dict 810 811 def is_filtered(self): 812 return bool(self._filters) 813 814 def process_run_line(self, function_re, scrubber, raw_tool_output, prefixes): 815 build_global_values_dictionary( 816 self._global_var_dict, raw_tool_output, prefixes, self._ginfo 817 ) 818 for m in function_re.finditer(raw_tool_output): 819 if not m: 820 continue 821 func = m.group("func") 822 body = m.group("body") 823 # func_name_separator is the string that is placed right after function name at the 824 # beginning of assembly function definition. In most assemblies, that is just a 825 # colon: `foo:`. But, for example, in nvptx it is a brace: `foo(`. If is_backend is 826 # False, just assume that separator is an empty string. 827 if self._ginfo.is_asm(): 828 # Use ':' as default separator. 829 func_name_separator = ( 830 m.group("func_name_separator") 831 if "func_name_separator" in m.groupdict() 832 else ":" 833 ) 834 else: 835 func_name_separator = "" 836 attrs = m.group("attrs") if self._check_attributes else "" 837 funcdef_attrs_and_ret = ( 838 m.group("funcdef_attrs_and_ret") if self._record_args else "" 839 ) 840 # Determine if we print arguments, the opening brace, or nothing after the 841 # function name 842 if self._record_args and "args_and_sig" in m.groupdict(): 843 args_and_sig = scrub_body(m.group("args_and_sig").strip()) 844 elif "args_and_sig" in m.groupdict(): 845 args_and_sig = "(" 846 else: 847 args_and_sig = "" 848 filtered_body = do_filter(body, self._filters) 849 scrubbed_body = do_scrub( 850 filtered_body, scrubber, self._scrubber_args, extra=False 851 ) 852 scrubbed_extra = do_scrub( 853 filtered_body, scrubber, self._scrubber_args, extra=True 854 ) 855 if "analysis" in m.groupdict(): 856 analysis = m.group("analysis") 857 if analysis not in SUPPORTED_ANALYSES: 858 warn("Unsupported analysis mode: %r!" % (analysis,)) 859 if func.startswith("stress"): 860 # We only use the last line of the function body for stress tests. 861 scrubbed_body = "\n".join(scrubbed_body.splitlines()[-1:]) 862 if self._verbose: 863 print("Processing function: " + func, file=sys.stderr) 864 for l in scrubbed_body.splitlines(): 865 print(" " + l, file=sys.stderr) 866 for prefix in prefixes: 867 # Replace function names matching the regex. 868 for regex in self._replace_value_regex: 869 # Pattern that matches capture groups in the regex in leftmost order. 870 group_regex = re.compile(r"\(.*?\)") 871 # Replace function name with regex. 872 match = re.match(regex, func) 873 if match: 874 func_repl = regex 875 # Replace any capture groups with their matched strings. 876 for g in match.groups(): 877 func_repl = group_regex.sub( 878 re.escape(g), func_repl, count=1 879 ) 880 func = re.sub(func_repl, "{{" + func_repl + "}}", func) 881 882 # Replace all calls to regex matching functions. 883 matches = re.finditer(regex, scrubbed_body) 884 for match in matches: 885 func_repl = regex 886 # Replace any capture groups with their matched strings. 887 for g in match.groups(): 888 func_repl = group_regex.sub( 889 re.escape(g), func_repl, count=1 890 ) 891 # Substitute function call names that match the regex with the same 892 # capture groups set. 893 scrubbed_body = re.sub( 894 func_repl, "{{" + func_repl + "}}", scrubbed_body 895 ) 896 897 if func in self._func_dict[prefix]: 898 if self._func_dict[prefix][func] is not None and ( 899 str(self._func_dict[prefix][func]) != scrubbed_body 900 or self._func_dict[prefix][func].args_and_sig != args_and_sig 901 or self._func_dict[prefix][func].attrs != attrs 902 or self._func_dict[prefix][func].funcdef_attrs_and_ret 903 != funcdef_attrs_and_ret 904 ): 905 if self._func_dict[prefix][func].is_same_except_arg_names( 906 scrubbed_extra, 907 funcdef_attrs_and_ret, 908 args_and_sig, 909 attrs, 910 ): 911 self._func_dict[prefix][func].scrub = scrubbed_extra 912 self._func_dict[prefix][func].args_and_sig = args_and_sig 913 else: 914 # This means a previous RUN line produced a body for this function 915 # that is different from the one produced by this current RUN line, 916 # so the body can't be common across RUN lines. We use None to 917 # indicate that. 918 self._func_dict[prefix][func] = None 919 else: 920 if prefix not in self._processed_prefixes: 921 self._func_dict[prefix][func] = function_body( 922 scrubbed_body, 923 scrubbed_extra, 924 funcdef_attrs_and_ret, 925 args_and_sig, 926 attrs, 927 func_name_separator, 928 self._ginfo, 929 ) 930 self._func_order[prefix].append(func) 931 else: 932 # An earlier RUN line used this check prefixes but didn't produce 933 # a body for this function. This happens in Clang tests that use 934 # preprocesser directives to exclude individual functions from some 935 # RUN lines. 936 self._func_dict[prefix][func] = None 937 938 def processed_prefixes(self, prefixes): 939 """ 940 Mark a set of prefixes as having had at least one applicable RUN line fully 941 processed. This is used to filter out function bodies that don't have 942 outputs for all RUN lines. 943 """ 944 self._processed_prefixes.update(prefixes) 945 946 def get_failed_prefixes(self): 947 # This returns the list of those prefixes that failed to match any function, 948 # because there were conflicting bodies produced by different RUN lines, in 949 # all instances of the prefix. 950 for prefix in self._func_dict: 951 if self._func_dict[prefix] and ( 952 not [ 953 fct 954 for fct in self._func_dict[prefix] 955 if self._func_dict[prefix][fct] is not None 956 ] 957 ): 958 yield prefix 959 960 961##### Generator of LLVM IR CHECK lines 962 963SCRUB_IR_COMMENT_RE = re.compile(r"\s*;.*") 964 965# TODO: We should also derive check lines for global, debug, loop declarations, etc.. 966 967 968class NamelessValue: 969 """ 970 A NamelessValue object represents a type of value in the IR whose "name" we 971 generalize in the generated check lines; where the "name" could be an actual 972 name (as in e.g. `@some_global` or `%x`) or just a number (as in e.g. `%12` 973 or `!4`). 974 """ 975 976 def __init__( 977 self, 978 check_prefix, 979 check_key, 980 ir_prefix, 981 ir_regexp, 982 global_ir_rhs_regexp, 983 *, 984 is_before_functions=False, 985 is_number=False, 986 replace_number_with_counter=False, 987 match_literally=False, 988 interlaced_with_previous=False, 989 ir_suffix=r"", 990 ): 991 self.check_prefix = check_prefix 992 self.check_key = check_key 993 self.ir_prefix = ir_prefix 994 self.ir_regexp = ir_regexp 995 self.ir_suffix = ir_suffix 996 self.global_ir_rhs_regexp = global_ir_rhs_regexp 997 self.is_before_functions = is_before_functions 998 self.is_number = is_number 999 # Some variable numbers (e.g. MCINST1234) will change based on unrelated 1000 # modifications to LLVM, replace those with an incrementing counter. 1001 self.replace_number_with_counter = replace_number_with_counter 1002 self.match_literally = match_literally 1003 self.interlaced_with_previous = interlaced_with_previous 1004 self.variable_mapping = {} 1005 1006 # Return true if this kind of IR value is defined "locally" to functions, 1007 # which we assume is only the case precisely for LLVM IR local values. 1008 def is_local_def_ir_value(self): 1009 return self.check_key == "%" 1010 1011 # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals 1012 def get_ir_regex(self): 1013 # for backwards compatibility we check locals with '.*' 1014 if self.is_local_def_ir_value(): 1015 return ".*" 1016 return self.ir_regexp 1017 1018 # Create a FileCheck variable name based on an IR name. 1019 def get_value_name(self, var: str, check_prefix: str): 1020 var = var.replace("!", "") 1021 if self.replace_number_with_counter: 1022 assert var 1023 replacement = self.variable_mapping.get(var, None) 1024 if replacement is None: 1025 # Replace variable with an incrementing counter 1026 replacement = str(len(self.variable_mapping) + 1) 1027 self.variable_mapping[var] = replacement 1028 var = replacement 1029 # This is a nameless value, prepend check_prefix. 1030 if var.isdigit(): 1031 var = check_prefix + var 1032 else: 1033 # This is a named value that clashes with the check_prefix, prepend with 1034 # _prefix_filecheck_ir_name, if it has been defined. 1035 if ( 1036 may_clash_with_default_check_prefix_name(check_prefix, var) 1037 and _prefix_filecheck_ir_name 1038 ): 1039 var = _prefix_filecheck_ir_name + var 1040 var = var.replace(".", "_") 1041 var = var.replace("-", "_") 1042 return var.upper() 1043 1044 def get_affixes_from_match(self, match): 1045 prefix = re.match(self.ir_prefix, match.group(2)).group(0) 1046 suffix = re.search(self.ir_suffix + "$", match.group(2)).group(0) 1047 return prefix, suffix 1048 1049 1050class GeneralizerInfo: 1051 """ 1052 A GeneralizerInfo object holds information about how check lines should be generalized 1053 (e.g., variable names replaced by FileCheck meta variables) as well as per-test-file 1054 state (e.g. information about IR global variables). 1055 """ 1056 1057 MODE_IR = 0 1058 MODE_ASM = 1 1059 MODE_ANALYZE = 2 1060 1061 def __init__( 1062 self, 1063 version, 1064 mode, 1065 nameless_values: List[NamelessValue], 1066 regexp_prefix, 1067 regexp_suffix, 1068 ): 1069 self._version = version 1070 self._mode = mode 1071 self._nameless_values = nameless_values 1072 1073 self._regexp_prefix = regexp_prefix 1074 self._regexp_suffix = regexp_suffix 1075 1076 self._regexp, _ = self._build_regexp(False, False) 1077 ( 1078 self._unstable_globals_regexp, 1079 self._unstable_globals_values, 1080 ) = self._build_regexp(True, True) 1081 1082 def _build_regexp(self, globals_only, unstable_only): 1083 matches = [] 1084 values = [] 1085 for nameless_value in self._nameless_values: 1086 is_global = nameless_value.global_ir_rhs_regexp is not None 1087 if globals_only and not is_global: 1088 continue 1089 if unstable_only and nameless_value.match_literally: 1090 continue 1091 1092 match = f"(?:{nameless_value.ir_prefix}({nameless_value.ir_regexp}){nameless_value.ir_suffix})" 1093 if self.is_ir() and not globals_only and is_global: 1094 match = "^" + match 1095 matches.append(match) 1096 values.append(nameless_value) 1097 1098 regexp_string = r"|".join(matches) 1099 1100 return ( 1101 re.compile( 1102 self._regexp_prefix + r"(" + regexp_string + r")" + self._regexp_suffix 1103 ), 1104 values, 1105 ) 1106 1107 def get_version(self): 1108 return self._version 1109 1110 def is_ir(self): 1111 return self._mode == GeneralizerInfo.MODE_IR 1112 1113 def is_asm(self): 1114 return self._mode == GeneralizerInfo.MODE_ASM 1115 1116 def is_analyze(self): 1117 return self._mode == GeneralizerInfo.MODE_ANALYZE 1118 1119 def get_nameless_values(self): 1120 return self._nameless_values 1121 1122 def get_regexp(self): 1123 return self._regexp 1124 1125 def get_unstable_globals_regexp(self): 1126 return self._unstable_globals_regexp 1127 1128 # The entire match is group 0, the prefix has one group (=1), the entire 1129 # IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start. 1130 FIRST_NAMELESS_GROUP_IN_MATCH = 3 1131 1132 def get_match_info(self, match): 1133 """ 1134 Returns (name, nameless_value) for the given match object 1135 """ 1136 if match.re == self._regexp: 1137 values = self._nameless_values 1138 else: 1139 match.re == self._unstable_globals_regexp 1140 values = self._unstable_globals_values 1141 for i in range(len(values)): 1142 g = match.group(i + GeneralizerInfo.FIRST_NAMELESS_GROUP_IN_MATCH) 1143 if g is not None: 1144 return g, values[i] 1145 error("Unable to identify the kind of IR value from the match!") 1146 return None, None 1147 1148 # See get_idx_from_match 1149 def get_name_from_match(self, match): 1150 return self.get_match_info(match)[0] 1151 1152 def get_nameless_value_from_match(self, match) -> NamelessValue: 1153 return self.get_match_info(match)[1] 1154 1155 1156def make_ir_generalizer(version): 1157 values = [] 1158 1159 if version >= 5: 1160 values += [ 1161 NamelessValue(r"BB", "%", r"label %", r"[\w$.-]+?", None), 1162 NamelessValue(r"BB", "%", r"^", r"[\w$.-]+?", None, ir_suffix=r":"), 1163 ] 1164 1165 values += [ 1166 # check_prefix check_key ir_prefix ir_regexp global_ir_rhs_regexp 1167 NamelessValue(r"TMP", "%", r"%", r"[\w$.-]+?", None), 1168 NamelessValue(r"ATTR", "#", r"#", r"[0-9]+", None), 1169 NamelessValue(r"ATTR", "#", r"attributes #", r"[0-9]+", r"{[^}]*}"), 1170 NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", None), 1171 NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", r".+", is_before_functions=True), 1172 NamelessValue( 1173 r"GLOBNAMED", 1174 "@", 1175 r"@", 1176 r"[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*", 1177 r".+", 1178 is_before_functions=True, 1179 match_literally=True, 1180 interlaced_with_previous=True, 1181 ), 1182 NamelessValue(r"DBG", "!", r"!dbg ", r"![0-9]+", None), 1183 NamelessValue(r"DIASSIGNID", "!", r"!DIAssignID ", r"![0-9]+", None), 1184 NamelessValue(r"PROF", "!", r"!prof ", r"![0-9]+", None), 1185 NamelessValue(r"TBAA", "!", r"!tbaa ", r"![0-9]+", None), 1186 NamelessValue(r"TBAA_STRUCT", "!", r"!tbaa.struct ", r"![0-9]+", None), 1187 NamelessValue(r"RNG", "!", r"!range ", r"![0-9]+", None), 1188 NamelessValue(r"LOOP", "!", r"!llvm.loop ", r"![0-9]+", None), 1189 NamelessValue(r"META", "!", r"", r"![0-9]+", r"(?:distinct |)!.*"), 1190 NamelessValue(r"ACC_GRP", "!", r"!llvm.access.group ", r"![0-9]+", None), 1191 NamelessValue(r"META", "!", r"![a-z.]+ ", r"![0-9]+", None), 1192 NamelessValue(r"META", "!", r"[, (]", r"![0-9]+", None), 1193 ] 1194 1195 prefix = r"(\s*)" 1196 suffix = r"([,\s\(\)\}]|\Z)" 1197 1198 # values = [ 1199 # nameless_value 1200 # for nameless_value in IR_NAMELESS_VALUES 1201 # if not (globals_only and nameless_value.global_ir_rhs_regexp is None) and 1202 # not (unstable_ids_only and nameless_value.match_literally) 1203 # ] 1204 1205 return GeneralizerInfo(version, GeneralizerInfo.MODE_IR, values, prefix, suffix) 1206 1207 1208def make_asm_generalizer(version): 1209 values = [ 1210 NamelessValue( 1211 r"MCINST", 1212 "Inst#", 1213 "<MCInst #", 1214 r"\d+", 1215 r".+", 1216 is_number=True, 1217 replace_number_with_counter=True, 1218 ), 1219 NamelessValue( 1220 r"MCREG", 1221 "Reg:", 1222 "<MCOperand Reg:", 1223 r"\d+", 1224 r".+", 1225 is_number=True, 1226 replace_number_with_counter=True, 1227 ), 1228 ] 1229 1230 prefix = r"((?:#|//)\s*)" 1231 suffix = r"([>\s]|\Z)" 1232 1233 return GeneralizerInfo(version, GeneralizerInfo.MODE_ASM, values, prefix, suffix) 1234 1235 1236def make_analyze_generalizer(version): 1237 values = [ 1238 NamelessValue( 1239 r"GRP", 1240 "#", 1241 r"", 1242 r"0x[0-9a-f]+", 1243 None, 1244 replace_number_with_counter=True, 1245 ), 1246 ] 1247 1248 prefix = r"(\s*)" 1249 suffix = r"(\)?:)" 1250 1251 return GeneralizerInfo( 1252 version, GeneralizerInfo.MODE_ANALYZE, values, prefix, suffix 1253 ) 1254 1255 1256# Return true if var clashes with the scripted FileCheck check_prefix. 1257def may_clash_with_default_check_prefix_name(check_prefix, var): 1258 return check_prefix and re.match( 1259 r"^" + check_prefix + r"[0-9]+?$", var, re.IGNORECASE 1260 ) 1261 1262 1263def find_diff_matching(lhs: List[str], rhs: List[str]) -> List[tuple]: 1264 """ 1265 Find a large ordered matching between strings in lhs and rhs. 1266 1267 Think of this as finding the *unchanged* lines in a diff, where the entries 1268 of lhs and rhs are lines of the files being diffed. 1269 1270 Returns a list of matched (lhs_idx, rhs_idx) pairs. 1271 """ 1272 1273 if not lhs or not rhs: 1274 return [] 1275 1276 # Collect matches in reverse order. 1277 matches = [] 1278 1279 # First, collect a set of candidate matching edges. We limit this to a 1280 # constant multiple of the input size to avoid quadratic runtime. 1281 patterns = collections.defaultdict(lambda: ([], [])) 1282 1283 for idx in range(len(lhs)): 1284 patterns[lhs[idx]][0].append(idx) 1285 for idx in range(len(rhs)): 1286 patterns[rhs[idx]][1].append(idx) 1287 1288 multiple_patterns = [] 1289 1290 candidates = [] 1291 for pattern in patterns.values(): 1292 if not pattern[0] or not pattern[1]: 1293 continue 1294 1295 if len(pattern[0]) == len(pattern[1]) == 1: 1296 candidates.append((pattern[0][0], pattern[1][0])) 1297 else: 1298 multiple_patterns.append(pattern) 1299 1300 multiple_patterns.sort(key=lambda pattern: len(pattern[0]) * len(pattern[1])) 1301 1302 for pattern in multiple_patterns: 1303 if len(candidates) + len(pattern[0]) * len(pattern[1]) > 2 * ( 1304 len(lhs) + len(rhs) 1305 ): 1306 break 1307 for lhs_idx in pattern[0]: 1308 for rhs_idx in pattern[1]: 1309 candidates.append((lhs_idx, rhs_idx)) 1310 1311 if not candidates: 1312 # The LHS and RHS either share nothing in common, or lines are just too 1313 # identical. In that case, let's give up and not match anything. 1314 return [] 1315 1316 # Compute a maximal crossing-free matching via an algorithm that is 1317 # inspired by a mixture of dynamic programming and line-sweeping in 1318 # discrete geometry. 1319 # 1320 # I would be surprised if this algorithm didn't exist somewhere in the 1321 # literature, but I found it without consciously recalling any 1322 # references, so you'll have to make do with the explanation below. 1323 # Sorry. 1324 # 1325 # The underlying graph is bipartite: 1326 # - nodes on the LHS represent lines in the original check 1327 # - nodes on the RHS represent lines in the new (updated) check 1328 # 1329 # Nodes are implicitly sorted by the corresponding line number. 1330 # Edges (unique_matches) are sorted by the line number on the LHS. 1331 # 1332 # Here's the geometric intuition for the algorithm. 1333 # 1334 # * Plot the edges as points in the plane, with the original line 1335 # number on the X axis and the updated line number on the Y axis. 1336 # * The goal is to find a longest "chain" of points where each point 1337 # is strictly above and to the right of the previous point. 1338 # * The algorithm proceeds by sweeping a vertical line from left to 1339 # right. 1340 # * The algorithm maintains a table where `table[N]` answers the 1341 # question "What is currently the 'best' way to build a chain of N+1 1342 # points to the left of the vertical line". Here, 'best' means 1343 # that the last point of the chain is a as low as possible (minimal 1344 # Y coordinate). 1345 # * `table[N]` is `(y, point_idx)` where `point_idx` is the index of 1346 # the last point in the chain and `y` is its Y coordinate 1347 # * A key invariant is that the Y values in the table are 1348 # monotonically increasing 1349 # * Thanks to these properties, the table can be used to answer the 1350 # question "What is the longest chain that can be built to the left 1351 # of the vertical line using only points below a certain Y value", 1352 # using a binary search over the table. 1353 # * The algorithm also builds a backlink structure in which every point 1354 # links back to the previous point on a best (longest) chain ending 1355 # at that point 1356 # 1357 # The core loop of the algorithm sweeps the line and updates the table 1358 # and backlink structure for every point that we cross during the sweep. 1359 # Therefore, the algorithm is trivially O(M log M) in the number of 1360 # points. 1361 candidates.sort(key=lambda candidate: (candidate[0], -candidate[1])) 1362 1363 backlinks = [] 1364 table_rhs_idx = [] 1365 table_candidate_idx = [] 1366 for _, rhs_idx in candidates: 1367 candidate_idx = len(backlinks) 1368 ti = bisect.bisect_left(table_rhs_idx, rhs_idx) 1369 1370 # Update the table to record a best chain ending in the current point. 1371 # There always is one, and if any of the previously visited points had 1372 # a higher Y coordinate, then there is always a previously recorded best 1373 # chain that can be improved upon by using the current point. 1374 # 1375 # There is only one case where there is some ambiguity. If the 1376 # pre-existing entry table[ti] has the same Y coordinate / rhs_idx as 1377 # the current point (this can only happen if the same line appeared 1378 # multiple times on the LHS), then we could choose to keep the 1379 # previously recorded best chain instead. That would bias the algorithm 1380 # differently but should have no systematic impact on the quality of the 1381 # result. 1382 if ti < len(table_rhs_idx): 1383 table_rhs_idx[ti] = rhs_idx 1384 table_candidate_idx[ti] = candidate_idx 1385 else: 1386 table_rhs_idx.append(rhs_idx) 1387 table_candidate_idx.append(candidate_idx) 1388 if ti > 0: 1389 backlinks.append(table_candidate_idx[ti - 1]) 1390 else: 1391 backlinks.append(None) 1392 1393 # Commit to names in the matching by walking the backlinks. Recursively 1394 # attempt to fill in more matches in-betweem. 1395 match_idx = table_candidate_idx[-1] 1396 while match_idx is not None: 1397 current = candidates[match_idx] 1398 matches.append(current) 1399 match_idx = backlinks[match_idx] 1400 1401 matches.reverse() 1402 return matches 1403 1404 1405VARIABLE_TAG = "[[@@]]" 1406METAVAR_RE = re.compile(r"\[\[([A-Z0-9_]+)(?::[^]]+)?\]\]") 1407NUMERIC_SUFFIX_RE = re.compile(r"[0-9]*$") 1408 1409 1410class TestVar: 1411 def __init__(self, nameless_value: NamelessValue, prefix: str, suffix: str): 1412 self._nameless_value = nameless_value 1413 1414 self._prefix = prefix 1415 self._suffix = suffix 1416 1417 def seen(self, nameless_value: NamelessValue, prefix: str, suffix: str): 1418 if prefix != self._prefix: 1419 self._prefix = "" 1420 if suffix != self._suffix: 1421 self._suffix = "" 1422 1423 def get_variable_name(self, text): 1424 return self._nameless_value.get_value_name( 1425 text, self._nameless_value.check_prefix 1426 ) 1427 1428 def get_def(self, name, prefix, suffix): 1429 if self._nameless_value.is_number: 1430 return f"{prefix}[[#{name}:]]{suffix}" 1431 if self._prefix: 1432 assert self._prefix == prefix 1433 prefix = "" 1434 if self._suffix: 1435 assert self._suffix == suffix 1436 suffix = "" 1437 return f"{prefix}[[{name}:{self._prefix}{self._nameless_value.get_ir_regex()}{self._suffix}]]{suffix}" 1438 1439 def get_use(self, name, prefix, suffix): 1440 if self._nameless_value.is_number: 1441 return f"{prefix}[[#{name}]]{suffix}" 1442 if self._prefix: 1443 assert self._prefix == prefix 1444 prefix = "" 1445 if self._suffix: 1446 assert self._suffix == suffix 1447 suffix = "" 1448 return f"{prefix}[[{name}]]{suffix}" 1449 1450 1451class CheckValueInfo: 1452 def __init__( 1453 self, 1454 key, 1455 text, 1456 name: str, 1457 prefix: str, 1458 suffix: str, 1459 ): 1460 # Key for the value, e.g. '%' 1461 self.key = key 1462 1463 # Text to be matched by the FileCheck variable (without any prefix or suffix) 1464 self.text = text 1465 1466 # Name of the FileCheck variable 1467 self.name = name 1468 1469 # Prefix and suffix that were captured by the NamelessValue regular expression 1470 self.prefix = prefix 1471 self.suffix = suffix 1472 1473 1474# Represent a check line in a way that allows us to compare check lines while 1475# ignoring some or all of the FileCheck variable names. 1476class CheckLineInfo: 1477 def __init__(self, line, values): 1478 # Line with all FileCheck variable name occurrences replaced by VARIABLE_TAG 1479 self.line: str = line 1480 1481 # Information on each FileCheck variable name occurrences in the line 1482 self.values: List[CheckValueInfo] = values 1483 1484 def __repr__(self): 1485 return f"CheckLineInfo(line={self.line}, self.values={self.values})" 1486 1487 1488def remap_metavar_names( 1489 old_line_infos: List[CheckLineInfo], 1490 new_line_infos: List[CheckLineInfo], 1491 committed_names: Set[str], 1492) -> Mapping[str, str]: 1493 """ 1494 Map all FileCheck variable names that appear in new_line_infos to new 1495 FileCheck variable names in an attempt to reduce the diff from old_line_infos 1496 to new_line_infos. 1497 1498 This is done by: 1499 * Matching old check lines and new check lines using a diffing algorithm 1500 applied after replacing names with wildcards. 1501 * Committing to variable names such that the matched lines become equal 1502 (without wildcards) if possible 1503 * This is done recursively to handle cases where many lines are equal 1504 after wildcard replacement 1505 """ 1506 # Initialize uncommitted identity mappings 1507 new_mapping = {} 1508 for line in new_line_infos: 1509 for value in line.values: 1510 new_mapping[value.name] = value.name 1511 1512 # Recursively commit to the identity mapping or find a better one 1513 def recurse(old_begin, old_end, new_begin, new_end): 1514 if old_begin == old_end or new_begin == new_end: 1515 return 1516 1517 # Find a matching of lines where uncommitted names are replaced 1518 # with a placeholder. 1519 def diffify_line(line, mapper): 1520 values = [] 1521 for value in line.values: 1522 mapped = mapper(value.name) 1523 values.append(mapped if mapped in committed_names else "?") 1524 return line.line.strip() + " @@@ " + " @ ".join(values) 1525 1526 lhs_lines = [ 1527 diffify_line(line, lambda x: x) 1528 for line in old_line_infos[old_begin:old_end] 1529 ] 1530 rhs_lines = [ 1531 diffify_line(line, lambda x: new_mapping[x]) 1532 for line in new_line_infos[new_begin:new_end] 1533 ] 1534 1535 candidate_matches = find_diff_matching(lhs_lines, rhs_lines) 1536 1537 candidate_matches = [ 1538 (old_begin + lhs_idx, new_begin + rhs_idx) 1539 for lhs_idx, rhs_idx in candidate_matches 1540 ] 1541 1542 # Candidate matches may conflict if they require conflicting mappings of 1543 # names. We want to determine a large set of compatible candidates, 1544 # because that leads to a small diff. 1545 # 1546 # We think of the candidates as vertices in a conflict graph. The 1547 # conflict graph has edges between incompatible candidates. We want to 1548 # find a large independent set in this graph. 1549 # 1550 # Greedily selecting candidates and removing incompatible ones has the 1551 # disadvantage that making few bad decisions early on can have huge 1552 # consequences. 1553 # 1554 # Instead, we implicitly compute multiple independent sets by greedily 1555 # assigning a *coloring* to the conflict graph. Then, we select the 1556 # largest color class (which is the largest independent set we found), 1557 # commit to all candidates in it, and recurse. 1558 # 1559 # Note that we don't actually materialize the conflict graph. Instead, 1560 # each color class tracks the information needed to decide implicitly 1561 # whether a vertex conflicts (has an edge to) any of the vertices added 1562 # to the color class so far. 1563 class Color: 1564 def __init__(self): 1565 # (lhs_idx, rhs_idx) of matches in this color 1566 self.matches = [] 1567 1568 # rhs_name -> lhs_name mappings required by this color 1569 self.mapping = {} 1570 1571 # lhs_names committed for this color 1572 self.committed = set() 1573 1574 colors = [] 1575 1576 for lhs_idx, rhs_idx in candidate_matches: 1577 lhs_line = old_line_infos[lhs_idx] 1578 rhs_line = new_line_infos[rhs_idx] 1579 1580 # We scan through the uncommitted names in the candidate line and 1581 # filter out the color classes to which the candidate could be 1582 # assigned. 1583 # 1584 # Simultaneously, we prepare a new color class in case the candidate 1585 # conflicts with all colors that have been established so far. 1586 compatible_colors = colors[:] 1587 new_color = Color() 1588 new_color.matches.append((lhs_idx, rhs_idx)) 1589 1590 for lhs_value, rhs_value in zip(lhs_line.values, rhs_line.values): 1591 if new_mapping[rhs_value.name] in committed_names: 1592 # The new value has already been committed. If it was mapped 1593 # to the same name as the original value, we can consider 1594 # committing other values from this line. Otherwise, we 1595 # should ignore this line. 1596 if new_mapping[rhs_value.name] == lhs_value.name: 1597 continue 1598 else: 1599 break 1600 1601 if rhs_value.name in new_color.mapping: 1602 # Same, but for a possible commit happening on the same line 1603 if new_color.color[rhs_value.name] == lhs_value.name: 1604 continue 1605 else: 1606 break 1607 1608 if ( 1609 lhs_value.name in committed_names 1610 or lhs_value.name in new_color.committed 1611 ): 1612 # We can't map this value because the name we would map it 1613 # to has already been committed for something else. Give up 1614 # on this line. 1615 break 1616 1617 new_color.mapping[rhs_value.name] = lhs_value.name 1618 new_color.committed.add(lhs_value.name) 1619 1620 color_idx = 0 1621 while color_idx < len(compatible_colors): 1622 color = compatible_colors[color_idx] 1623 compatible = True 1624 if rhs_value.name in color.mapping: 1625 compatible = color.mapping[rhs_value.name] == lhs_value.name 1626 else: 1627 compatible = lhs_value.name not in color.committed 1628 if compatible: 1629 color_idx += 1 1630 else: 1631 del compatible_colors[color_idx] 1632 else: 1633 # We never broke out of the loop, which means that at a minimum, 1634 # this line is viable standalone 1635 if compatible_colors: 1636 color = max(compatible_colors, key=lambda color: len(color.matches)) 1637 color.mapping.update(new_color.mapping) 1638 color.committed.update(new_color.committed) 1639 color.matches.append((lhs_idx, rhs_idx)) 1640 else: 1641 colors.append(new_color) 1642 1643 if colors: 1644 # Pick the largest color class. This gives us a large independent 1645 # (non-conflicting) set of candidate matches. Assign all names 1646 # required by the independent set and recurse. 1647 max_color = max(colors, key=lambda color: len(color.matches)) 1648 1649 for rhs_var, lhs_var in max_color.mapping.items(): 1650 new_mapping[rhs_var] = lhs_var 1651 committed_names.add(lhs_var) 1652 1653 if ( 1654 lhs_var != rhs_var 1655 and lhs_var in new_mapping 1656 and new_mapping[lhs_var] == lhs_var 1657 ): 1658 new_mapping[lhs_var] = "conflict_" + lhs_var 1659 1660 matches = ( 1661 [(old_begin - 1, new_begin - 1)] 1662 + max_color.matches 1663 + [(old_end, new_end)] 1664 ) 1665 1666 for (lhs_prev, rhs_prev), (lhs_next, rhs_next) in zip(matches, matches[1:]): 1667 recurse(lhs_prev + 1, lhs_next, rhs_prev + 1, rhs_next) 1668 1669 recurse(0, len(old_line_infos), 0, len(new_line_infos)) 1670 1671 # Commit to remaining names and resolve conflicts 1672 for new_name, mapped_name in new_mapping.items(): 1673 if mapped_name in committed_names: 1674 continue 1675 if not mapped_name.startswith("conflict_"): 1676 assert mapped_name == new_name 1677 committed_names.add(mapped_name) 1678 1679 for new_name, mapped_name in new_mapping.items(): 1680 if mapped_name in committed_names: 1681 continue 1682 assert mapped_name.startswith("conflict_") 1683 1684 m = NUMERIC_SUFFIX_RE.search(new_name) 1685 base_name = new_name[: m.start()] 1686 suffix = int(new_name[m.start() :]) if m.start() != m.end() else 1 1687 while True: 1688 candidate = f"{base_name}{suffix}" 1689 if candidate not in committed_names: 1690 new_mapping[new_name] = candidate 1691 committed_names.add(candidate) 1692 break 1693 suffix += 1 1694 1695 return new_mapping 1696 1697 1698def generalize_check_lines( 1699 lines, 1700 ginfo: GeneralizerInfo, 1701 vars_seen, 1702 global_vars_seen, 1703 preserve_names=False, 1704 original_check_lines=None, 1705 *, 1706 unstable_globals_only=False, 1707): 1708 if unstable_globals_only: 1709 regexp = ginfo.get_unstable_globals_regexp() 1710 else: 1711 regexp = ginfo.get_regexp() 1712 1713 multiple_braces_re = re.compile(r"({{+)|(}}+)") 1714 1715 def escape_braces(match_obj): 1716 return "{{" + re.escape(match_obj.group(0)) + "}}" 1717 1718 if ginfo.is_ir(): 1719 for i, line in enumerate(lines): 1720 # An IR variable named '%.' matches the FileCheck regex string. 1721 line = line.replace("%.", "%dot") 1722 for regex in _global_hex_value_regex: 1723 if re.match("^@" + regex + " = ", line): 1724 line = re.sub( 1725 r"\bi([0-9]+) ([0-9]+)", 1726 lambda m: "i" 1727 + m.group(1) 1728 + " [[#" 1729 + hex(int(m.group(2))) 1730 + "]]", 1731 line, 1732 ) 1733 break 1734 # Ignore any comments, since the check lines will too. 1735 scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line) 1736 lines[i] = scrubbed_line 1737 1738 if not preserve_names: 1739 committed_names = set( 1740 test_var.get_variable_name(name) 1741 for (name, _), test_var in vars_seen.items() 1742 ) 1743 defs = set() 1744 1745 # Collect information about new check lines, and generalize global reference 1746 new_line_infos = [] 1747 for line in lines: 1748 filtered_line = "" 1749 values = [] 1750 while True: 1751 m = regexp.search(line) 1752 if m is None: 1753 filtered_line += line 1754 break 1755 1756 name = ginfo.get_name_from_match(m) 1757 nameless_value = ginfo.get_nameless_value_from_match(m) 1758 prefix, suffix = nameless_value.get_affixes_from_match(m) 1759 if may_clash_with_default_check_prefix_name( 1760 nameless_value.check_prefix, name 1761 ): 1762 warn( 1763 "Change IR value name '%s' or use --prefix-filecheck-ir-name to prevent possible conflict" 1764 " with scripted FileCheck name." % (name,) 1765 ) 1766 1767 # Record the variable as seen and (for locals) accumulate 1768 # prefixes/suffixes 1769 is_local_def = nameless_value.is_local_def_ir_value() 1770 if is_local_def: 1771 vars_dict = vars_seen 1772 else: 1773 vars_dict = global_vars_seen 1774 1775 key = (name, nameless_value.check_key) 1776 1777 if is_local_def: 1778 test_prefix = prefix 1779 test_suffix = suffix 1780 else: 1781 test_prefix = "" 1782 test_suffix = "" 1783 1784 if key in vars_dict: 1785 vars_dict[key].seen(nameless_value, test_prefix, test_suffix) 1786 else: 1787 vars_dict[key] = TestVar(nameless_value, test_prefix, test_suffix) 1788 defs.add(key) 1789 1790 var = vars_dict[key].get_variable_name(name) 1791 1792 # Replace with a [[@@]] tag, but be sure to keep the spaces and commas. 1793 filtered_line += ( 1794 line[: m.start()] + m.group(1) + VARIABLE_TAG + m.group(m.lastindex) 1795 ) 1796 line = line[m.end() :] 1797 1798 values.append( 1799 CheckValueInfo( 1800 key=nameless_value.check_key, 1801 text=name, 1802 name=var, 1803 prefix=prefix, 1804 suffix=suffix, 1805 ) 1806 ) 1807 1808 new_line_infos.append(CheckLineInfo(filtered_line, values)) 1809 1810 committed_names.update( 1811 test_var.get_variable_name(name) 1812 for (name, _), test_var in global_vars_seen.items() 1813 ) 1814 1815 # Collect information about original check lines, if any. 1816 orig_line_infos = [] 1817 for line in original_check_lines or []: 1818 filtered_line = "" 1819 values = [] 1820 while True: 1821 m = METAVAR_RE.search(line) 1822 if m is None: 1823 filtered_line += line 1824 break 1825 1826 # Replace with a [[@@]] tag, but be sure to keep the spaces and commas. 1827 filtered_line += line[: m.start()] + VARIABLE_TAG 1828 line = line[m.end() :] 1829 values.append( 1830 CheckValueInfo( 1831 key=None, 1832 text=None, 1833 name=m.group(1), 1834 prefix="", 1835 suffix="", 1836 ) 1837 ) 1838 orig_line_infos.append(CheckLineInfo(filtered_line, values)) 1839 1840 # Compute the variable name mapping 1841 mapping = remap_metavar_names(orig_line_infos, new_line_infos, committed_names) 1842 1843 # Apply the variable name mapping 1844 for i, line_info in enumerate(new_line_infos): 1845 line_template = line_info.line 1846 line = "" 1847 1848 for value in line_info.values: 1849 idx = line_template.find(VARIABLE_TAG) 1850 line += line_template[:idx] 1851 line_template = line_template[idx + len(VARIABLE_TAG) :] 1852 1853 key = (value.text, value.key) 1854 if value.key == "%": 1855 vars_dict = vars_seen 1856 else: 1857 vars_dict = global_vars_seen 1858 1859 if key in defs: 1860 line += vars_dict[key].get_def( 1861 mapping[value.name], value.prefix, value.suffix 1862 ) 1863 defs.remove(key) 1864 else: 1865 line += vars_dict[key].get_use( 1866 mapping[value.name], value.prefix, value.suffix 1867 ) 1868 1869 line += line_template 1870 1871 lines[i] = line 1872 1873 if ginfo.is_analyze(): 1874 for i, _ in enumerate(lines): 1875 # Escape multiple {{ or }} as {{}} denotes a FileCheck regex. 1876 scrubbed_line = multiple_braces_re.sub(escape_braces, lines[i]) 1877 lines[i] = scrubbed_line 1878 1879 return lines 1880 1881 1882def add_checks( 1883 output_lines, 1884 comment_marker, 1885 prefix_list, 1886 func_dict, 1887 func_name, 1888 check_label_format, 1889 ginfo, 1890 global_vars_seen_dict, 1891 is_filtered, 1892 preserve_names=False, 1893 original_check_lines: Mapping[str, List[str]] = {}, 1894): 1895 # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well. 1896 prefix_exclusions = set() 1897 printed_prefixes = [] 1898 for p in prefix_list: 1899 checkprefixes = p[0] 1900 # If not all checkprefixes of this run line produced the function we cannot check for it as it does not 1901 # exist for this run line. A subset of the check prefixes might know about the function but only because 1902 # other run lines created it. 1903 if any( 1904 map( 1905 lambda checkprefix: func_name not in func_dict[checkprefix], 1906 checkprefixes, 1907 ) 1908 ): 1909 prefix_exclusions |= set(checkprefixes) 1910 continue 1911 1912 # prefix_exclusions is constructed, we can now emit the output 1913 for p in prefix_list: 1914 global_vars_seen = {} 1915 checkprefixes = p[0] 1916 for checkprefix in checkprefixes: 1917 if checkprefix in global_vars_seen_dict: 1918 global_vars_seen.update(global_vars_seen_dict[checkprefix]) 1919 else: 1920 global_vars_seen_dict[checkprefix] = {} 1921 if checkprefix in printed_prefixes: 1922 break 1923 1924 # Check if the prefix is excluded. 1925 if checkprefix in prefix_exclusions: 1926 continue 1927 1928 # If we do not have output for this prefix we skip it. 1929 if not func_dict[checkprefix][func_name]: 1930 continue 1931 1932 # Add some space between different check prefixes, but not after the last 1933 # check line (before the test code). 1934 if ginfo.is_asm(): 1935 if len(printed_prefixes) != 0: 1936 output_lines.append(comment_marker) 1937 1938 if checkprefix not in global_vars_seen_dict: 1939 global_vars_seen_dict[checkprefix] = {} 1940 1941 global_vars_seen_before = [key for key in global_vars_seen.keys()] 1942 1943 vars_seen = {} 1944 printed_prefixes.append(checkprefix) 1945 attrs = str(func_dict[checkprefix][func_name].attrs) 1946 attrs = "" if attrs == "None" else attrs 1947 if ginfo.get_version() > 1: 1948 funcdef_attrs_and_ret = func_dict[checkprefix][ 1949 func_name 1950 ].funcdef_attrs_and_ret 1951 else: 1952 funcdef_attrs_and_ret = "" 1953 1954 if attrs: 1955 output_lines.append( 1956 "%s %s: Function Attrs: %s" % (comment_marker, checkprefix, attrs) 1957 ) 1958 args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig) 1959 if args_and_sig: 1960 args_and_sig = generalize_check_lines( 1961 [args_and_sig], 1962 ginfo, 1963 vars_seen, 1964 global_vars_seen, 1965 preserve_names, 1966 original_check_lines=[], 1967 )[0] 1968 func_name_separator = func_dict[checkprefix][func_name].func_name_separator 1969 if "[[" in args_and_sig: 1970 # Captures in label lines are not supported, thus split into a -LABEL 1971 # and a separate -SAME line that contains the arguments with captures. 1972 args_and_sig_prefix = "" 1973 if ginfo.get_version() >= 3 and args_and_sig.startswith("("): 1974 # Ensure the "(" separating function name and arguments is in the 1975 # label line. This is required in case of function names that are 1976 # prefixes of each other. Otherwise, the label line for "foo" might 1977 # incorrectly match on "foo.specialized". 1978 args_and_sig_prefix = args_and_sig[0] 1979 args_and_sig = args_and_sig[1:] 1980 1981 # Removing args_and_sig from the label match line requires 1982 # func_name_separator to be empty. Otherwise, the match will not work. 1983 assert func_name_separator == "" 1984 output_lines.append( 1985 check_label_format 1986 % ( 1987 checkprefix, 1988 funcdef_attrs_and_ret, 1989 func_name, 1990 args_and_sig_prefix, 1991 func_name_separator, 1992 ) 1993 ) 1994 output_lines.append( 1995 "%s %s-SAME: %s" % (comment_marker, checkprefix, args_and_sig) 1996 ) 1997 else: 1998 output_lines.append( 1999 check_label_format 2000 % ( 2001 checkprefix, 2002 funcdef_attrs_and_ret, 2003 func_name, 2004 args_and_sig, 2005 func_name_separator, 2006 ) 2007 ) 2008 func_body = str(func_dict[checkprefix][func_name]).splitlines() 2009 if not func_body: 2010 # We have filtered everything. 2011 continue 2012 2013 # For ASM output, just emit the check lines. 2014 if ginfo.is_asm(): 2015 body_start = 1 2016 if is_filtered: 2017 # For filtered output we don't add "-NEXT" so don't add extra spaces 2018 # before the first line. 2019 body_start = 0 2020 else: 2021 output_lines.append( 2022 "%s %s: %s" % (comment_marker, checkprefix, func_body[0]) 2023 ) 2024 func_lines = generalize_check_lines( 2025 func_body[body_start:], ginfo, vars_seen, global_vars_seen 2026 ) 2027 for func_line in func_lines: 2028 if func_line.strip() == "": 2029 output_lines.append( 2030 "%s %s-EMPTY:" % (comment_marker, checkprefix) 2031 ) 2032 else: 2033 check_suffix = "-NEXT" if not is_filtered else "" 2034 output_lines.append( 2035 "%s %s%s: %s" 2036 % (comment_marker, checkprefix, check_suffix, func_line) 2037 ) 2038 # Remember new global variables we have not seen before 2039 for key in global_vars_seen: 2040 if key not in global_vars_seen_before: 2041 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 2042 break 2043 # For analyze output, generalize the output, and emit CHECK-EMPTY lines as well. 2044 elif ginfo.is_analyze(): 2045 func_body = generalize_check_lines( 2046 func_body, ginfo, vars_seen, global_vars_seen 2047 ) 2048 for func_line in func_body: 2049 if func_line.strip() == "": 2050 output_lines.append( 2051 "{} {}-EMPTY:".format(comment_marker, checkprefix) 2052 ) 2053 else: 2054 check_suffix = "-NEXT" if not is_filtered else "" 2055 output_lines.append( 2056 "{} {}{}: {}".format( 2057 comment_marker, checkprefix, check_suffix, func_line 2058 ) 2059 ) 2060 2061 # Add space between different check prefixes and also before the first 2062 # line of code in the test function. 2063 output_lines.append(comment_marker) 2064 2065 # Remember new global variables we have not seen before 2066 for key in global_vars_seen: 2067 if key not in global_vars_seen_before: 2068 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 2069 break 2070 # For IR output, change all defs to FileCheck variables, so we're immune 2071 # to variable naming fashions. 2072 else: 2073 func_body = generalize_check_lines( 2074 func_body, 2075 ginfo, 2076 vars_seen, 2077 global_vars_seen, 2078 preserve_names, 2079 original_check_lines=original_check_lines.get(checkprefix), 2080 ) 2081 2082 # This could be selectively enabled with an optional invocation argument. 2083 # Disabled for now: better to check everything. Be safe rather than sorry. 2084 2085 # Handle the first line of the function body as a special case because 2086 # it's often just noise (a useless asm comment or entry label). 2087 # if func_body[0].startswith("#") or func_body[0].startswith("entry:"): 2088 # is_blank_line = True 2089 # else: 2090 # output_lines.append('%s %s: %s' % (comment_marker, checkprefix, func_body[0])) 2091 # is_blank_line = False 2092 2093 is_blank_line = False 2094 2095 for func_line in func_body: 2096 if func_line.strip() == "": 2097 is_blank_line = True 2098 continue 2099 # Do not waste time checking IR comments. 2100 func_line = SCRUB_IR_COMMENT_RE.sub(r"", func_line) 2101 2102 # Skip blank lines instead of checking them. 2103 if is_blank_line: 2104 output_lines.append( 2105 "{} {}: {}".format( 2106 comment_marker, checkprefix, func_line 2107 ) 2108 ) 2109 else: 2110 check_suffix = "-NEXT" if not is_filtered else "" 2111 output_lines.append( 2112 "{} {}{}: {}".format( 2113 comment_marker, checkprefix, check_suffix, func_line 2114 ) 2115 ) 2116 is_blank_line = False 2117 2118 # Add space between different check prefixes and also before the first 2119 # line of code in the test function. 2120 output_lines.append(comment_marker) 2121 2122 # Remember new global variables we have not seen before 2123 for key in global_vars_seen: 2124 if key not in global_vars_seen_before: 2125 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 2126 break 2127 return printed_prefixes 2128 2129 2130def add_ir_checks( 2131 output_lines, 2132 comment_marker, 2133 prefix_list, 2134 func_dict, 2135 func_name, 2136 preserve_names, 2137 function_sig, 2138 ginfo: GeneralizerInfo, 2139 global_vars_seen_dict, 2140 is_filtered, 2141 original_check_lines={}, 2142): 2143 assert ginfo.is_ir() 2144 # Label format is based on IR string. 2145 if function_sig and ginfo.get_version() > 1: 2146 function_def_regex = "define %s" 2147 elif function_sig: 2148 function_def_regex = "define {{[^@]+}}%s" 2149 else: 2150 function_def_regex = "%s" 2151 check_label_format = "{} %s-LABEL: {}@%s%s%s".format( 2152 comment_marker, function_def_regex 2153 ) 2154 return add_checks( 2155 output_lines, 2156 comment_marker, 2157 prefix_list, 2158 func_dict, 2159 func_name, 2160 check_label_format, 2161 ginfo, 2162 global_vars_seen_dict, 2163 is_filtered, 2164 preserve_names, 2165 original_check_lines=original_check_lines, 2166 ) 2167 2168 2169def add_analyze_checks( 2170 output_lines, 2171 comment_marker, 2172 prefix_list, 2173 func_dict, 2174 func_name, 2175 ginfo: GeneralizerInfo, 2176 is_filtered, 2177): 2178 assert ginfo.is_analyze() 2179 check_label_format = "{} %s-LABEL: '%s%s%s%s'".format(comment_marker) 2180 global_vars_seen_dict = {} 2181 return add_checks( 2182 output_lines, 2183 comment_marker, 2184 prefix_list, 2185 func_dict, 2186 func_name, 2187 check_label_format, 2188 ginfo, 2189 global_vars_seen_dict, 2190 is_filtered, 2191 ) 2192 2193 2194def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes, ginfo): 2195 for nameless_value in ginfo.get_nameless_values(): 2196 if nameless_value.global_ir_rhs_regexp is None: 2197 continue 2198 2199 lhs_re_str = nameless_value.ir_prefix + nameless_value.ir_regexp 2200 rhs_re_str = nameless_value.global_ir_rhs_regexp 2201 2202 global_ir_value_re_str = r"^" + lhs_re_str + r"\s=\s" + rhs_re_str + r"$" 2203 global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M)) 2204 lines = [] 2205 for m in global_ir_value_re.finditer(raw_tool_output): 2206 # Attach the substring's start index so that CHECK lines 2207 # can be sorted properly even if they are matched by different nameless values. 2208 # This is relevant for GLOB and GLOBNAMED since they may appear interlaced. 2209 lines.append((m.start(), m.group(0))) 2210 2211 for prefix in prefixes: 2212 if glob_val_dict[prefix] is None: 2213 continue 2214 if nameless_value.check_prefix in glob_val_dict[prefix]: 2215 if lines == glob_val_dict[prefix][nameless_value.check_prefix]: 2216 continue 2217 if prefix == prefixes[-1]: 2218 warn("Found conflicting asm under the same prefix: %r!" % (prefix,)) 2219 else: 2220 glob_val_dict[prefix][nameless_value.check_prefix] = None 2221 continue 2222 glob_val_dict[prefix][nameless_value.check_prefix] = lines 2223 2224 2225def filter_globals_according_to_preference( 2226 global_val_lines_w_index, global_vars_seen, nameless_value, global_check_setting 2227): 2228 if global_check_setting == "none": 2229 return [] 2230 if global_check_setting == "all": 2231 return global_val_lines_w_index 2232 assert global_check_setting == "smart" 2233 2234 if nameless_value.check_key == "#": 2235 # attribute sets are usually better checked by --check-attributes 2236 return [] 2237 2238 def extract(line, nv): 2239 p = ( 2240 "^" 2241 + nv.ir_prefix 2242 + "(" 2243 + nv.ir_regexp 2244 + ") = (" 2245 + nv.global_ir_rhs_regexp 2246 + ")" 2247 ) 2248 match = re.match(p, line) 2249 return (match.group(1), re.findall(nv.ir_regexp, match.group(2))) 2250 2251 transitively_visible = set() 2252 contains_refs_to = {} 2253 2254 def add(var): 2255 nonlocal transitively_visible 2256 nonlocal contains_refs_to 2257 if var in transitively_visible: 2258 return 2259 transitively_visible.add(var) 2260 if not var in contains_refs_to: 2261 return 2262 for x in contains_refs_to[var]: 2263 add(x) 2264 2265 for i, line in global_val_lines_w_index: 2266 (var, refs) = extract(line, nameless_value) 2267 contains_refs_to[var] = refs 2268 for var, check_key in global_vars_seen: 2269 if check_key != nameless_value.check_key: 2270 continue 2271 add(var) 2272 return [ 2273 (i, line) 2274 for i, line in global_val_lines_w_index 2275 if extract(line, nameless_value)[0] in transitively_visible 2276 ] 2277 2278 2279METADATA_FILTERS = [ 2280 ( 2281 r"(?<=\")(.+ )?(\w+ version )[\d.]+(?:[^\" ]*)(?: \([^)]+\))?", 2282 r"{{.*}}\2{{.*}}", 2283 ), # preface with glob also, to capture optional CLANG_VENDOR 2284 (r'(!DIFile\(filename: ".+", directory: )".+"', r"\1{{.*}}"), 2285] 2286METADATA_FILTERS_RE = [(re.compile(f), r) for (f, r) in METADATA_FILTERS] 2287 2288 2289def filter_unstable_metadata(line): 2290 for f, replacement in METADATA_FILTERS_RE: 2291 line = f.sub(replacement, line) 2292 return line 2293 2294 2295def flush_current_checks(output_lines, new_lines_w_index, comment_marker): 2296 if not new_lines_w_index: 2297 return 2298 output_lines.append(comment_marker + SEPARATOR) 2299 new_lines_w_index.sort() 2300 for _, line in new_lines_w_index: 2301 output_lines.append(line) 2302 new_lines_w_index.clear() 2303 2304 2305def add_global_checks( 2306 glob_val_dict, 2307 comment_marker, 2308 prefix_list, 2309 output_lines, 2310 ginfo: GeneralizerInfo, 2311 global_vars_seen_dict, 2312 preserve_names, 2313 is_before_functions, 2314 global_check_setting, 2315): 2316 printed_prefixes = set() 2317 output_lines_loc = {} # Allows GLOB and GLOBNAMED to be sorted correctly 2318 for nameless_value in ginfo.get_nameless_values(): 2319 if nameless_value.global_ir_rhs_regexp is None: 2320 continue 2321 if nameless_value.is_before_functions != is_before_functions: 2322 continue 2323 for p in prefix_list: 2324 global_vars_seen = {} 2325 checkprefixes = p[0] 2326 if checkprefixes is None: 2327 continue 2328 for checkprefix in checkprefixes: 2329 if checkprefix in global_vars_seen_dict: 2330 global_vars_seen.update(global_vars_seen_dict[checkprefix]) 2331 else: 2332 global_vars_seen_dict[checkprefix] = {} 2333 if (checkprefix, nameless_value.check_prefix) in printed_prefixes: 2334 break 2335 if not glob_val_dict[checkprefix]: 2336 continue 2337 if nameless_value.check_prefix not in glob_val_dict[checkprefix]: 2338 continue 2339 if not glob_val_dict[checkprefix][nameless_value.check_prefix]: 2340 continue 2341 2342 check_lines = [] 2343 global_vars_seen_before = [key for key in global_vars_seen.keys()] 2344 lines_w_index = glob_val_dict[checkprefix][nameless_value.check_prefix] 2345 lines_w_index = filter_globals_according_to_preference( 2346 lines_w_index, 2347 global_vars_seen_before, 2348 nameless_value, 2349 global_check_setting, 2350 ) 2351 for i, line in lines_w_index: 2352 if _global_value_regex: 2353 matched = False 2354 for regex in _global_value_regex: 2355 if re.match("^@" + regex + " = ", line) or re.match( 2356 "^!" + regex + " = ", line 2357 ): 2358 matched = True 2359 break 2360 if not matched: 2361 continue 2362 [new_line] = generalize_check_lines( 2363 [line], 2364 ginfo, 2365 {}, 2366 global_vars_seen, 2367 preserve_names, 2368 unstable_globals_only=True, 2369 ) 2370 new_line = filter_unstable_metadata(new_line) 2371 check_line = "%s %s: %s" % (comment_marker, checkprefix, new_line) 2372 check_lines.append((i, check_line)) 2373 if not check_lines: 2374 continue 2375 2376 if not checkprefix in output_lines_loc: 2377 output_lines_loc[checkprefix] = [] 2378 if not nameless_value.interlaced_with_previous: 2379 flush_current_checks( 2380 output_lines, output_lines_loc[checkprefix], comment_marker 2381 ) 2382 for check_line in check_lines: 2383 output_lines_loc[checkprefix].append(check_line) 2384 2385 printed_prefixes.add((checkprefix, nameless_value.check_prefix)) 2386 2387 # Remembe new global variables we have not seen before 2388 for key in global_vars_seen: 2389 if key not in global_vars_seen_before: 2390 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 2391 break 2392 2393 if printed_prefixes: 2394 for p in prefix_list: 2395 if p[0] is None: 2396 continue 2397 for checkprefix in p[0]: 2398 if checkprefix not in output_lines_loc: 2399 continue 2400 flush_current_checks( 2401 output_lines, output_lines_loc[checkprefix], comment_marker 2402 ) 2403 break 2404 output_lines.append(comment_marker + SEPARATOR) 2405 return printed_prefixes 2406 2407 2408def check_prefix(prefix): 2409 if not PREFIX_RE.match(prefix): 2410 hint = "" 2411 if "," in prefix: 2412 hint = " Did you mean '--check-prefixes=" + prefix + "'?" 2413 warn( 2414 ( 2415 "Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." 2416 + hint 2417 ) 2418 % (prefix) 2419 ) 2420 2421 2422def get_check_prefixes(filecheck_cmd): 2423 check_prefixes = [ 2424 item 2425 for m in CHECK_PREFIX_RE.finditer(filecheck_cmd) 2426 for item in m.group(1).split(",") 2427 ] 2428 if not check_prefixes: 2429 check_prefixes = ["CHECK"] 2430 return check_prefixes 2431 2432 2433def verify_filecheck_prefixes(fc_cmd): 2434 fc_cmd_parts = fc_cmd.split() 2435 for part in fc_cmd_parts: 2436 if "check-prefix=" in part: 2437 prefix = part.split("=", 1)[1] 2438 check_prefix(prefix) 2439 elif "check-prefixes=" in part: 2440 prefixes = part.split("=", 1)[1].split(",") 2441 for prefix in prefixes: 2442 check_prefix(prefix) 2443 if prefixes.count(prefix) > 1: 2444 warn( 2445 "Supplied prefix '%s' is not unique in the prefix list." 2446 % (prefix,) 2447 ) 2448 2449 2450def get_autogennote_suffix(parser, args): 2451 autogenerated_note_args = "" 2452 for action in parser._actions: 2453 if not hasattr(args, action.dest): 2454 continue # Ignore options such as --help that aren't included in args 2455 # Ignore parameters such as paths to the binary or the list of tests 2456 if action.dest in ( 2457 "tests", 2458 "update_only", 2459 "tool_binary", 2460 "opt_binary", 2461 "llc_binary", 2462 "clang", 2463 "opt", 2464 "llvm_bin", 2465 "verbose", 2466 "force_update", 2467 "reset_variable_names", 2468 ): 2469 continue 2470 value = getattr(args, action.dest) 2471 if action.dest == "check_globals": 2472 default_value = "none" if args.version < 4 else "smart" 2473 if value == default_value: 2474 continue 2475 autogenerated_note_args += action.option_strings[0] + " " 2476 if args.version < 4 and value == "all": 2477 continue 2478 autogenerated_note_args += "%s " % value 2479 continue 2480 if action.const is not None: # action stores a constant (usually True/False) 2481 # Skip actions with different constant values (this happens with boolean 2482 # --foo/--no-foo options) 2483 if value != action.const: 2484 continue 2485 if parser.get_default(action.dest) == value: 2486 continue # Don't add default values 2487 if action.dest == "function_signature" and args.version >= 2: 2488 continue # Enabled by default in version 2 2489 if action.dest == "filters": 2490 # Create a separate option for each filter element. The value is a list 2491 # of Filter objects. 2492 for elem in value: 2493 opt_name = "filter-out" if elem.is_filter_out else "filter" 2494 opt_value = elem.pattern() 2495 new_arg = '--%s "%s" ' % (opt_name, opt_value.strip('"')) 2496 if new_arg not in autogenerated_note_args: 2497 autogenerated_note_args += new_arg 2498 else: 2499 autogenerated_note_args += action.option_strings[0] + " " 2500 if action.const is None: # action takes a parameter 2501 if action.nargs == "+": 2502 value = " ".join(map(lambda v: '"' + v.strip('"') + '"', value)) 2503 autogenerated_note_args += "%s " % value 2504 if autogenerated_note_args: 2505 autogenerated_note_args = " %s %s" % ( 2506 UTC_ARGS_KEY, 2507 autogenerated_note_args[:-1], 2508 ) 2509 return autogenerated_note_args 2510 2511 2512def check_for_command(line, parser, args, argv, argparse_callback): 2513 cmd_m = UTC_ARGS_CMD.match(line) 2514 if cmd_m: 2515 for option in shlex.split(cmd_m.group("cmd").strip()): 2516 if option: 2517 argv.append(option) 2518 args = parse_args(parser, filter(lambda arg: arg not in args.tests, argv)) 2519 if argparse_callback is not None: 2520 argparse_callback(args) 2521 return args, argv 2522 2523 2524def find_arg_in_test(test_info, get_arg_to_check, arg_string, is_global): 2525 result = get_arg_to_check(test_info.args) 2526 if not result and is_global: 2527 # See if this has been specified via UTC_ARGS. This is a "global" option 2528 # that affects the entire generation of test checks. If it exists anywhere 2529 # in the test, apply it to everything. 2530 saw_line = False 2531 for line_info in test_info.ro_iterlines(): 2532 line = line_info.line 2533 if not line.startswith(";") and line.strip() != "": 2534 saw_line = True 2535 result = get_arg_to_check(line_info.args) 2536 if result: 2537 if warn and saw_line: 2538 # We saw the option after already reading some test input lines. 2539 # Warn about it. 2540 print( 2541 "WARNING: Found {} in line following test start: ".format( 2542 arg_string 2543 ) 2544 + line, 2545 file=sys.stderr, 2546 ) 2547 print( 2548 "WARNING: Consider moving {} to top of file".format(arg_string), 2549 file=sys.stderr, 2550 ) 2551 break 2552 return result 2553 2554 2555def dump_input_lines(output_lines, test_info, prefix_set, comment_string): 2556 for input_line_info in test_info.iterlines(output_lines): 2557 line = input_line_info.line 2558 args = input_line_info.args 2559 if line.strip() == comment_string: 2560 continue 2561 if line.strip() == comment_string + SEPARATOR: 2562 continue 2563 if line.lstrip().startswith(comment_string): 2564 m = CHECK_RE.match(line) 2565 if m and m.group(1) in prefix_set: 2566 continue 2567 output_lines.append(line.rstrip("\n")) 2568 2569 2570def add_checks_at_end( 2571 output_lines, prefix_list, func_order, comment_string, check_generator 2572): 2573 added = set() 2574 generated_prefixes = set() 2575 for prefix in prefix_list: 2576 prefixes = prefix[0] 2577 tool_args = prefix[1] 2578 for prefix in prefixes: 2579 for func in func_order[prefix]: 2580 # The func order can contain the same functions multiple times. 2581 # If we see one again we are done. 2582 if (func, prefix) in added: 2583 continue 2584 if added: 2585 output_lines.append(comment_string) 2586 2587 # The add_*_checks routines expect a run list whose items are 2588 # tuples that have a list of prefixes as their first element and 2589 # tool command args string as their second element. They output 2590 # checks for each prefix in the list of prefixes. By doing so, it 2591 # implicitly assumes that for each function every run line will 2592 # generate something for that function. That is not the case for 2593 # generated functions as some run lines might not generate them 2594 # (e.g. -fopenmp vs. no -fopenmp). 2595 # 2596 # Therefore, pass just the prefix we're interested in. This has 2597 # the effect of generating all of the checks for functions of a 2598 # single prefix before moving on to the next prefix. So checks 2599 # are ordered by prefix instead of by function as in "normal" 2600 # mode. 2601 for generated_prefix in check_generator( 2602 output_lines, [([prefix], tool_args)], func 2603 ): 2604 added.add((func, generated_prefix)) 2605 generated_prefixes.add(generated_prefix) 2606 return generated_prefixes 2607