1from __future__ import print_function 2 3import argparse 4import bisect 5import collections 6import copy 7import glob 8import itertools 9import os 10import re 11import subprocess 12import sys 13import shlex 14 15from typing import List, Mapping, Set 16 17##### Common utilities for update_*test_checks.py 18 19 20_verbose = False 21_prefix_filecheck_ir_name = "" 22 23""" 24Version changelog: 25 261: Initial version, used by tests that don't specify --version explicitly. 272: --function-signature is now enabled by default and also checks return 28 type/attributes. 293: Opening parenthesis of function args is kept on the first LABEL line 30 in case arguments are split to a separate SAME line. 314: --check-globals now has a third option ('smart'). The others are now called 32 'none' and 'all'. 'smart' is the default. 335: Basic block labels are matched by FileCheck expressions 34""" 35DEFAULT_VERSION = 5 36 37 38SUPPORTED_ANALYSES = { 39 "Branch Probability Analysis", 40 "Cost Model Analysis", 41 "Loop Access Analysis", 42 "Scalar Evolution Analysis", 43} 44 45 46class Regex(object): 47 """Wrap a compiled regular expression object to allow deep copy of a regexp. 48 This is required for the deep copy done in do_scrub. 49 50 """ 51 52 def __init__(self, regex): 53 self.regex = regex 54 55 def __deepcopy__(self, memo): 56 result = copy.copy(self) 57 result.regex = self.regex 58 return result 59 60 def search(self, line): 61 return self.regex.search(line) 62 63 def sub(self, repl, line): 64 return self.regex.sub(repl, line) 65 66 def pattern(self): 67 return self.regex.pattern 68 69 def flags(self): 70 return self.regex.flags 71 72 73class Filter(Regex): 74 """Augment a Regex object with a flag indicating whether a match should be 75 added (!is_filter_out) or removed (is_filter_out) from the generated checks. 76 77 """ 78 79 def __init__(self, regex, is_filter_out): 80 super(Filter, self).__init__(regex) 81 self.is_filter_out = is_filter_out 82 83 def __deepcopy__(self, memo): 84 result = copy.deepcopy(super(Filter, self), memo) 85 result.is_filter_out = copy.deepcopy(self.is_filter_out, memo) 86 return result 87 88 89def parse_commandline_args(parser): 90 class RegexAction(argparse.Action): 91 """Add a regular expression option value to a list of regular expressions. 92 This compiles the expression, wraps it in a Regex and adds it to the option 93 value list.""" 94 95 def __init__(self, option_strings, dest, nargs=None, **kwargs): 96 if nargs is not None: 97 raise ValueError("nargs not allowed") 98 super(RegexAction, self).__init__(option_strings, dest, **kwargs) 99 100 def do_call(self, namespace, values, flags): 101 value_list = getattr(namespace, self.dest) 102 if value_list is None: 103 value_list = [] 104 105 try: 106 value_list.append(Regex(re.compile(values, flags))) 107 except re.error as error: 108 raise ValueError( 109 "{}: Invalid regular expression '{}' ({})".format( 110 option_string, error.pattern, error.msg 111 ) 112 ) 113 114 setattr(namespace, self.dest, value_list) 115 116 def __call__(self, parser, namespace, values, option_string=None): 117 self.do_call(namespace, values, 0) 118 119 class FilterAction(RegexAction): 120 """Add a filter to a list of filter option values.""" 121 122 def __init__(self, option_strings, dest, nargs=None, **kwargs): 123 super(FilterAction, self).__init__(option_strings, dest, nargs, **kwargs) 124 125 def __call__(self, parser, namespace, values, option_string=None): 126 super(FilterAction, self).__call__(parser, namespace, values, option_string) 127 128 value_list = getattr(namespace, self.dest) 129 130 is_filter_out = option_string == "--filter-out" 131 132 value_list[-1] = Filter(value_list[-1].regex, is_filter_out) 133 134 setattr(namespace, self.dest, value_list) 135 136 filter_group = parser.add_argument_group( 137 "filtering", 138 """Filters are applied to each output line according to the order given. The 139 first matching filter terminates filter processing for that current line.""", 140 ) 141 142 filter_group.add_argument( 143 "--filter", 144 action=FilterAction, 145 dest="filters", 146 metavar="REGEX", 147 help="Only include lines matching REGEX (may be specified multiple times)", 148 ) 149 filter_group.add_argument( 150 "--filter-out", 151 action=FilterAction, 152 dest="filters", 153 metavar="REGEX", 154 help="Exclude lines matching REGEX", 155 ) 156 157 parser.add_argument( 158 "--include-generated-funcs", 159 action="store_true", 160 help="Output checks for functions not in source", 161 ) 162 parser.add_argument( 163 "-v", "--verbose", action="store_true", help="Show verbose output" 164 ) 165 parser.add_argument( 166 "-u", 167 "--update-only", 168 action="store_true", 169 help="Only update test if it was already autogened", 170 ) 171 parser.add_argument( 172 "--force-update", 173 action="store_true", 174 help="Update test even if it was autogened by a different script", 175 ) 176 parser.add_argument( 177 "--enable", 178 action="store_true", 179 dest="enabled", 180 default=True, 181 help="Activate CHECK line generation from this point forward", 182 ) 183 parser.add_argument( 184 "--disable", 185 action="store_false", 186 dest="enabled", 187 help="Deactivate CHECK line generation from this point forward", 188 ) 189 parser.add_argument( 190 "--replace-value-regex", 191 nargs="+", 192 default=[], 193 help="List of regular expressions to replace matching value names", 194 ) 195 parser.add_argument( 196 "--prefix-filecheck-ir-name", 197 default="", 198 help="Add a prefix to FileCheck IR value names to avoid conflicts with scripted names", 199 ) 200 parser.add_argument( 201 "--global-value-regex", 202 nargs="+", 203 default=[], 204 help="List of regular expressions that a global value declaration must match to generate a check (has no effect if checking globals is not enabled)", 205 ) 206 parser.add_argument( 207 "--global-hex-value-regex", 208 nargs="+", 209 default=[], 210 help="List of regular expressions such that, for matching global value declarations, literal integer values should be encoded in hex in the associated FileCheck directives", 211 ) 212 # FIXME: in 3.9, we can use argparse.BooleanOptionalAction. At that point, 213 # we need to rename the flag to just -generate-body-for-unused-prefixes. 214 parser.add_argument( 215 "--no-generate-body-for-unused-prefixes", 216 action="store_false", 217 dest="gen_unused_prefix_body", 218 default=True, 219 help="Generate a function body that always matches for unused prefixes. This is useful when unused prefixes are desired, and it avoids needing to annotate each FileCheck as allowing them.", 220 ) 221 # This is the default when regenerating existing tests. The default when 222 # generating new tests is determined by DEFAULT_VERSION. 223 parser.add_argument( 224 "--version", type=int, default=1, help="The version of output format" 225 ) 226 args = parser.parse_args() 227 # TODO: This should not be handled differently from the other options 228 global _verbose, _global_value_regex, _global_hex_value_regex 229 _verbose = args.verbose 230 _global_value_regex = args.global_value_regex 231 _global_hex_value_regex = args.global_hex_value_regex 232 return args 233 234 235def parse_args(parser, argv): 236 args = parser.parse_args(argv) 237 if args.version >= 2: 238 args.function_signature = True 239 # TODO: This should not be handled differently from the other options 240 global _verbose, _global_value_regex, _global_hex_value_regex 241 _verbose = args.verbose 242 _global_value_regex = args.global_value_regex 243 _global_hex_value_regex = args.global_hex_value_regex 244 if "check_globals" in args and args.check_globals == "default": 245 args.check_globals = "none" if args.version < 4 else "smart" 246 return args 247 248 249class InputLineInfo(object): 250 def __init__(self, line, line_number, args, argv): 251 self.line = line 252 self.line_number = line_number 253 self.args = args 254 self.argv = argv 255 256 257class TestInfo(object): 258 def __init__( 259 self, 260 test, 261 parser, 262 script_name, 263 input_lines, 264 args, 265 argv, 266 comment_prefix, 267 argparse_callback, 268 ): 269 self.parser = parser 270 self.argparse_callback = argparse_callback 271 self.path = test 272 self.args = args 273 if args.prefix_filecheck_ir_name: 274 global _prefix_filecheck_ir_name 275 _prefix_filecheck_ir_name = args.prefix_filecheck_ir_name 276 self.argv = argv 277 self.input_lines = input_lines 278 self.run_lines = find_run_lines(test, self.input_lines) 279 self.comment_prefix = comment_prefix 280 if self.comment_prefix is None: 281 if self.path.endswith(".mir"): 282 self.comment_prefix = "#" 283 else: 284 self.comment_prefix = ";" 285 self.autogenerated_note_prefix = self.comment_prefix + " " + UTC_ADVERT 286 self.test_autogenerated_note = self.autogenerated_note_prefix + script_name 287 self.test_autogenerated_note += get_autogennote_suffix(parser, self.args) 288 self.test_unused_note = ( 289 self.comment_prefix + self.comment_prefix + " " + UNUSED_NOTE 290 ) 291 292 def ro_iterlines(self): 293 for line_num, input_line in enumerate(self.input_lines): 294 args, argv = check_for_command( 295 input_line, self.parser, self.args, self.argv, self.argparse_callback 296 ) 297 yield InputLineInfo(input_line, line_num, args, argv) 298 299 def iterlines(self, output_lines): 300 output_lines.append(self.test_autogenerated_note) 301 for line_info in self.ro_iterlines(): 302 input_line = line_info.line 303 # Discard any previous script advertising. 304 if input_line.startswith(self.autogenerated_note_prefix): 305 continue 306 self.args = line_info.args 307 self.argv = line_info.argv 308 if not self.args.enabled: 309 output_lines.append(input_line) 310 continue 311 yield line_info 312 313 def get_checks_for_unused_prefixes( 314 self, run_list, used_prefixes: List[str] 315 ) -> List[str]: 316 run_list = [element for element in run_list if element[0] is not None] 317 unused_prefixes = set( 318 [prefix for sublist in run_list for prefix in sublist[0]] 319 ).difference(set(used_prefixes)) 320 321 ret = [] 322 if not unused_prefixes: 323 return ret 324 ret.append(self.test_unused_note) 325 for unused in sorted(unused_prefixes): 326 ret.append( 327 "{comment} {prefix}: {match_everything}".format( 328 comment=self.comment_prefix, 329 prefix=unused, 330 match_everything=r"""{{.*}}""", 331 ) 332 ) 333 return ret 334 335 336def itertests( 337 test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None 338): 339 for pattern in test_patterns: 340 # On Windows we must expand the patterns ourselves. 341 tests_list = glob.glob(pattern) 342 if not tests_list: 343 warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,)) 344 continue 345 for test in tests_list: 346 with open(test) as f: 347 input_lines = [l.rstrip() for l in f] 348 first_line = input_lines[0] if input_lines else "" 349 if UTC_AVOID in first_line: 350 warn("Skipping test that must not be autogenerated: " + test) 351 continue 352 is_regenerate = UTC_ADVERT in first_line 353 354 # If we're generating a new test, set the default version to the latest. 355 argv = sys.argv[:] 356 if not is_regenerate: 357 argv.insert(1, "--version=" + str(DEFAULT_VERSION)) 358 359 args = parse_args(parser, argv[1:]) 360 if argparse_callback is not None: 361 argparse_callback(args) 362 if is_regenerate: 363 if script_name not in first_line and not args.force_update: 364 warn( 365 "Skipping test which wasn't autogenerated by " + script_name, 366 test, 367 ) 368 continue 369 args, argv = check_for_command( 370 first_line, parser, args, argv, argparse_callback 371 ) 372 elif args.update_only: 373 assert UTC_ADVERT not in first_line 374 warn("Skipping test which isn't autogenerated: " + test) 375 continue 376 final_input_lines = [] 377 for l in input_lines: 378 if UNUSED_NOTE in l: 379 break 380 final_input_lines.append(l) 381 yield TestInfo( 382 test, 383 parser, 384 script_name, 385 final_input_lines, 386 args, 387 argv, 388 comment_prefix, 389 argparse_callback, 390 ) 391 392 393def should_add_line_to_output( 394 input_line, 395 prefix_set, 396 *, 397 skip_global_checks=False, 398 skip_same_checks=False, 399 comment_marker=";", 400): 401 # Skip any blank comment lines in the IR. 402 if not skip_global_checks and input_line.strip() == comment_marker: 403 return False 404 # Skip a special double comment line we use as a separator. 405 if input_line.strip() == comment_marker + SEPARATOR: 406 return False 407 # Skip any blank lines in the IR. 408 # if input_line.strip() == '': 409 # return False 410 # And skip any CHECK lines. We're building our own. 411 m = CHECK_RE.match(input_line) 412 if m and m.group(1) in prefix_set: 413 if skip_same_checks and CHECK_SAME_RE.match(input_line): 414 # The previous CHECK line was removed, so don't leave this dangling 415 return False 416 if skip_global_checks: 417 # Skip checks only if they are of global value definitions 418 global_ir_value_re = re.compile(r"(\[\[|@)", flags=(re.M)) 419 is_global = global_ir_value_re.search(input_line) 420 return not is_global 421 return False 422 423 return True 424 425 426def collect_original_check_lines(ti: TestInfo, prefix_set: set): 427 """ 428 Collect pre-existing check lines into a dictionary `result` which is 429 returned. 430 431 result[func_name][prefix] is filled with a list of right-hand-sides of check 432 lines. 433 """ 434 result = collections.defaultdict(lambda: {}) 435 436 current_prefix = None 437 current_function = None 438 for input_line_info in ti.ro_iterlines(): 439 input_line = input_line_info.line 440 if input_line.lstrip().startswith(";"): 441 m = CHECK_RE.match(input_line) 442 if m is not None: 443 prefix = m.group(1) 444 check_kind = m.group(2) 445 line = input_line[m.end() :].strip() 446 447 if prefix != current_prefix: 448 current_function = None 449 current_prefix = None 450 451 if check_kind not in ["LABEL", "SAME"]: 452 if current_function is not None: 453 current_function.append(line) 454 continue 455 456 if check_kind == "SAME": 457 continue 458 459 if check_kind == "LABEL": 460 m = IR_FUNCTION_RE.match(line) 461 if m is not None: 462 func_name = m.group(1) 463 if ( 464 ti.args.function is not None 465 and func_name != ti.args.function 466 ): 467 # When filtering on a specific function, skip all others. 468 continue 469 470 current_prefix = prefix 471 current_function = result[func_name][prefix] = [] 472 continue 473 474 current_function = None 475 476 return result 477 478 479# Perform lit-like substitutions 480def getSubstitutions(sourcepath): 481 sourcedir = os.path.dirname(sourcepath) 482 return [ 483 ("%s", sourcepath), 484 ("%S", sourcedir), 485 ("%p", sourcedir), 486 ("%{pathsep}", os.pathsep), 487 ] 488 489 490def applySubstitutions(s, substitutions): 491 for a, b in substitutions: 492 s = s.replace(a, b) 493 return s 494 495 496# Invoke the tool that is being tested. 497def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False): 498 with open(ir) as ir_file: 499 substitutions = getSubstitutions(ir) 500 501 # TODO Remove the str form which is used by update_test_checks.py and 502 # update_llc_test_checks.py 503 # The safer list form is used by update_cc_test_checks.py 504 if preprocess_cmd: 505 # Allow pre-processing the IR file (e.g. using sed): 506 assert isinstance( 507 preprocess_cmd, str 508 ) # TODO: use a list instead of using shell 509 preprocess_cmd = applySubstitutions(preprocess_cmd, substitutions).strip() 510 if verbose: 511 print( 512 "Pre-processing input file: ", 513 ir, 514 " with command '", 515 preprocess_cmd, 516 "'", 517 sep="", 518 file=sys.stderr, 519 ) 520 # Python 2.7 doesn't have subprocess.DEVNULL: 521 with open(os.devnull, "w") as devnull: 522 pp = subprocess.Popen( 523 preprocess_cmd, shell=True, stdin=devnull, stdout=subprocess.PIPE 524 ) 525 ir_file = pp.stdout 526 527 if isinstance(cmd_args, list): 528 args = [applySubstitutions(a, substitutions) for a in cmd_args] 529 stdout = subprocess.check_output([exe] + args, stdin=ir_file) 530 else: 531 stdout = subprocess.check_output( 532 exe + " " + applySubstitutions(cmd_args, substitutions), 533 shell=True, 534 stdin=ir_file, 535 ) 536 if sys.version_info[0] > 2: 537 # FYI, if you crashed here with a decode error, your run line probably 538 # results in bitcode or other binary format being written to the pipe. 539 # For an opt test, you probably want to add -S or -disable-output. 540 stdout = stdout.decode() 541 # Fix line endings to unix CR style. 542 return stdout.replace("\r\n", "\n") 543 544 545##### LLVM IR parser 546RUN_LINE_RE = re.compile(r"^\s*(?://|[;#])\s*RUN:\s*(.*)$") 547CHECK_PREFIX_RE = re.compile(r"--?check-prefix(?:es)?[= ](\S+)") 548PREFIX_RE = re.compile("^[a-zA-Z0-9_-]+$") 549CHECK_RE = re.compile( 550 r"^\s*(?://|[;#])\s*([^:]+?)(?:-(NEXT|NOT|DAG|LABEL|SAME|EMPTY))?:" 551) 552CHECK_SAME_RE = re.compile(r"^\s*(?://|[;#])\s*([^:]+?)(?:-SAME)?:") 553 554UTC_ARGS_KEY = "UTC_ARGS:" 555UTC_ARGS_CMD = re.compile(r".*" + UTC_ARGS_KEY + r"\s*(?P<cmd>.*)\s*$") 556UTC_ADVERT = "NOTE: Assertions have been autogenerated by " 557UTC_AVOID = "NOTE: Do not autogenerate" 558UNUSED_NOTE = "NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:" 559 560OPT_FUNCTION_RE = re.compile( 561 r"^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s():,]+?))?\s*define\s+(?P<funcdef_attrs_and_ret>[^@]*)@(?P<func>[\w.$-]+?)\s*" 562 r"(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P<body>.*?)^\}$", 563 flags=(re.M | re.S), 564) 565 566ANALYZE_FUNCTION_RE = re.compile( 567 r"^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.$-]+?)\':" 568 r"\s*\n(?P<body>.*)$", 569 flags=(re.X | re.S), 570) 571 572LV_DEBUG_RE = re.compile( 573 r"^\s*\'(?P<func>[\w.$-]+?)\'[^\n]*" r"\s*\n(?P<body>.*)$", flags=(re.X | re.S) 574) 575 576IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(') 577TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$') 578TRIPLE_ARG_RE = re.compile(r"-mtriple[= ]([^ ]+)") 579MARCH_ARG_RE = re.compile(r"-march[= ]([^ ]+)") 580DEBUG_ONLY_ARG_RE = re.compile(r"-debug-only[= ]([^ ]+)") 581 582SCRUB_LEADING_WHITESPACE_RE = re.compile(r"^(\s+)") 583SCRUB_WHITESPACE_RE = re.compile(r"(?!^(| \w))[ \t]+", flags=re.M) 584SCRUB_PRESERVE_LEADING_WHITESPACE_RE = re.compile(r"((?!^)[ \t]*(\S))[ \t]+") 585SCRUB_TRAILING_WHITESPACE_RE = re.compile(r"[ \t]+$", flags=re.M) 586SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE 587SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile( 588 r"([ \t]|(#[0-9]+))+$", flags=re.M 589) 590SCRUB_KILL_COMMENT_RE = re.compile(r"^ *#+ +kill:.*\n") 591SCRUB_LOOP_COMMENT_RE = re.compile( 592 r"# =>This Inner Loop Header:.*|# in Loop:.*", flags=re.M 593) 594SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r"(?<=\S)+[ \t]*#$", flags=re.M) 595 596SEPARATOR = "." 597 598 599def error(msg, test_file=None): 600 if test_file: 601 msg = "{}: {}".format(msg, test_file) 602 print("ERROR: {}".format(msg), file=sys.stderr) 603 604 605def warn(msg, test_file=None): 606 if test_file: 607 msg = "{}: {}".format(msg, test_file) 608 print("WARNING: {}".format(msg), file=sys.stderr) 609 610 611def debug(*args, **kwargs): 612 # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs): 613 if "file" not in kwargs: 614 kwargs["file"] = sys.stderr 615 if _verbose: 616 print(*args, **kwargs) 617 618 619def find_run_lines(test, lines): 620 debug("Scanning for RUN lines in test file:", test) 621 raw_lines = [m.group(1) for m in [RUN_LINE_RE.match(l) for l in lines] if m] 622 run_lines = [raw_lines[0]] if len(raw_lines) > 0 else [] 623 for l in raw_lines[1:]: 624 if run_lines[-1].endswith("\\"): 625 run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l 626 else: 627 run_lines.append(l) 628 debug("Found {} RUN lines in {}:".format(len(run_lines), test)) 629 for l in run_lines: 630 debug(" RUN: {}".format(l)) 631 return run_lines 632 633 634def get_triple_from_march(march): 635 triples = { 636 "amdgcn": "amdgcn", 637 "r600": "r600", 638 "mips": "mips", 639 "sparc": "sparc", 640 "hexagon": "hexagon", 641 "ve": "ve", 642 } 643 for prefix, triple in triples.items(): 644 if march.startswith(prefix): 645 return triple 646 print("Cannot find a triple. Assume 'x86'", file=sys.stderr) 647 return "x86" 648 649 650def apply_filters(line, filters): 651 has_filter = False 652 for f in filters: 653 if not f.is_filter_out: 654 has_filter = True 655 if f.search(line): 656 return False if f.is_filter_out else True 657 # If we only used filter-out, keep the line, otherwise discard it since no 658 # filter matched. 659 return False if has_filter else True 660 661 662def do_filter(body, filters): 663 return ( 664 body 665 if not filters 666 else "\n".join( 667 filter(lambda line: apply_filters(line, filters), body.splitlines()) 668 ) 669 ) 670 671 672def scrub_body(body): 673 # Scrub runs of whitespace out of the assembly, but leave the leading 674 # whitespace in place. 675 body = SCRUB_PRESERVE_LEADING_WHITESPACE_RE.sub(lambda m: m.group(2) + " ", body) 676 677 # Expand the tabs used for indentation. 678 body = str.expandtabs(body, 2) 679 # Strip trailing whitespace. 680 body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r"", body) 681 return body 682 683 684def do_scrub(body, scrubber, scrubber_args, extra): 685 if scrubber_args: 686 local_args = copy.deepcopy(scrubber_args) 687 local_args[0].extra_scrub = extra 688 return scrubber(body, *local_args) 689 return scrubber(body, *scrubber_args) 690 691 692# Build up a dictionary of all the function bodies. 693class function_body(object): 694 def __init__( 695 self, 696 string, 697 extra, 698 funcdef_attrs_and_ret, 699 args_and_sig, 700 attrs, 701 func_name_separator, 702 ginfo, 703 ): 704 self.scrub = string 705 self.extrascrub = extra 706 self.funcdef_attrs_and_ret = funcdef_attrs_and_ret 707 self.args_and_sig = args_and_sig 708 self.attrs = attrs 709 self.func_name_separator = func_name_separator 710 self._ginfo = ginfo 711 712 def is_same_except_arg_names( 713 self, extrascrub, funcdef_attrs_and_ret, args_and_sig, attrs 714 ): 715 arg_names = set() 716 717 def drop_arg_names(match): 718 nameless_value = self._ginfo.get_nameless_value_from_match(match) 719 if nameless_value.check_key == "%": 720 arg_names.add(self._ginfo.get_name_from_match(match)) 721 substitute = "" 722 else: 723 substitute = match.group(2) 724 return match.group(1) + substitute + match.group(match.lastindex) 725 726 def repl_arg_names(match): 727 nameless_value = self._ginfo.get_nameless_value_from_match(match) 728 if ( 729 nameless_value.check_key == "%" 730 and self._ginfo.get_name_from_match(match) in arg_names 731 ): 732 return match.group(1) + match.group(match.lastindex) 733 return match.group(1) + match.group(2) + match.group(match.lastindex) 734 735 if self.funcdef_attrs_and_ret != funcdef_attrs_and_ret: 736 return False 737 if self.attrs != attrs: 738 return False 739 740 regexp = self._ginfo.get_regexp() 741 ans0 = regexp.sub(drop_arg_names, self.args_and_sig) 742 ans1 = regexp.sub(drop_arg_names, args_and_sig) 743 if ans0 != ans1: 744 return False 745 if self._ginfo.is_asm(): 746 # Check without replacements, the replacements are not applied to the 747 # body for backend checks. 748 return self.extrascrub == extrascrub 749 750 es0 = regexp.sub(repl_arg_names, self.extrascrub) 751 es1 = regexp.sub(repl_arg_names, extrascrub) 752 es0 = SCRUB_IR_COMMENT_RE.sub(r"", es0) 753 es1 = SCRUB_IR_COMMENT_RE.sub(r"", es1) 754 return es0 == es1 755 756 def __str__(self): 757 return self.scrub 758 759 760class FunctionTestBuilder: 761 def __init__(self, run_list, flags, scrubber_args, path, ginfo): 762 self._verbose = flags.verbose 763 self._record_args = flags.function_signature 764 self._check_attributes = flags.check_attributes 765 # Strip double-quotes if input was read by UTC_ARGS 766 self._filters = ( 767 list( 768 map( 769 lambda f: Filter( 770 re.compile(f.pattern().strip('"'), f.flags()), f.is_filter_out 771 ), 772 flags.filters, 773 ) 774 ) 775 if flags.filters 776 else [] 777 ) 778 self._scrubber_args = scrubber_args 779 self._path = path 780 self._ginfo = ginfo 781 # Strip double-quotes if input was read by UTC_ARGS 782 self._replace_value_regex = list( 783 map(lambda x: x.strip('"'), flags.replace_value_regex) 784 ) 785 self._func_dict = {} 786 self._func_order = {} 787 self._global_var_dict = {} 788 self._processed_prefixes = set() 789 for tuple in run_list: 790 for prefix in tuple[0]: 791 self._func_dict.update({prefix: dict()}) 792 self._func_order.update({prefix: []}) 793 self._global_var_dict.update({prefix: dict()}) 794 795 def finish_and_get_func_dict(self): 796 for prefix in self.get_failed_prefixes(): 797 warn( 798 "Prefix %s had conflicting output from different RUN lines for all functions in test %s" 799 % ( 800 prefix, 801 self._path, 802 ) 803 ) 804 return self._func_dict 805 806 def func_order(self): 807 return self._func_order 808 809 def global_var_dict(self): 810 return self._global_var_dict 811 812 def is_filtered(self): 813 return bool(self._filters) 814 815 def process_run_line(self, function_re, scrubber, raw_tool_output, prefixes): 816 build_global_values_dictionary( 817 self._global_var_dict, raw_tool_output, prefixes, self._ginfo 818 ) 819 for m in function_re.finditer(raw_tool_output): 820 if not m: 821 continue 822 func = m.group("func") 823 body = m.group("body") 824 # func_name_separator is the string that is placed right after function name at the 825 # beginning of assembly function definition. In most assemblies, that is just a 826 # colon: `foo:`. But, for example, in nvptx it is a brace: `foo(`. If is_backend is 827 # False, just assume that separator is an empty string. 828 if self._ginfo.is_asm(): 829 # Use ':' as default separator. 830 func_name_separator = ( 831 m.group("func_name_separator") 832 if "func_name_separator" in m.groupdict() 833 else ":" 834 ) 835 else: 836 func_name_separator = "" 837 attrs = m.group("attrs") if self._check_attributes else "" 838 funcdef_attrs_and_ret = ( 839 m.group("funcdef_attrs_and_ret") if self._record_args else "" 840 ) 841 # Determine if we print arguments, the opening brace, or nothing after the 842 # function name 843 if self._record_args and "args_and_sig" in m.groupdict(): 844 args_and_sig = scrub_body(m.group("args_and_sig").strip()) 845 elif "args_and_sig" in m.groupdict(): 846 args_and_sig = "(" 847 else: 848 args_and_sig = "" 849 filtered_body = do_filter(body, self._filters) 850 scrubbed_body = do_scrub( 851 filtered_body, scrubber, self._scrubber_args, extra=False 852 ) 853 scrubbed_extra = do_scrub( 854 filtered_body, scrubber, self._scrubber_args, extra=True 855 ) 856 if "analysis" in m.groupdict(): 857 analysis = m.group("analysis") 858 if analysis not in SUPPORTED_ANALYSES: 859 warn("Unsupported analysis mode: %r!" % (analysis,)) 860 if func.startswith("stress"): 861 # We only use the last line of the function body for stress tests. 862 scrubbed_body = "\n".join(scrubbed_body.splitlines()[-1:]) 863 if self._verbose: 864 print("Processing function: " + func, file=sys.stderr) 865 for l in scrubbed_body.splitlines(): 866 print(" " + l, file=sys.stderr) 867 for prefix in prefixes: 868 # Replace function names matching the regex. 869 for regex in self._replace_value_regex: 870 # Pattern that matches capture groups in the regex in leftmost order. 871 group_regex = re.compile(r"\(.*?\)") 872 # Replace function name with regex. 873 match = re.match(regex, func) 874 if match: 875 func_repl = regex 876 # Replace any capture groups with their matched strings. 877 for g in match.groups(): 878 func_repl = group_regex.sub( 879 re.escape(g), func_repl, count=1 880 ) 881 func = re.sub(func_repl, "{{" + func_repl + "}}", func) 882 883 # Replace all calls to regex matching functions. 884 matches = re.finditer(regex, scrubbed_body) 885 for match in matches: 886 func_repl = regex 887 # Replace any capture groups with their matched strings. 888 for g in match.groups(): 889 func_repl = group_regex.sub( 890 re.escape(g), func_repl, count=1 891 ) 892 # Substitute function call names that match the regex with the same 893 # capture groups set. 894 scrubbed_body = re.sub( 895 func_repl, "{{" + func_repl + "}}", scrubbed_body 896 ) 897 898 if func in self._func_dict[prefix]: 899 if self._func_dict[prefix][func] is not None and ( 900 str(self._func_dict[prefix][func]) != scrubbed_body 901 or self._func_dict[prefix][func].args_and_sig != args_and_sig 902 or self._func_dict[prefix][func].attrs != attrs 903 or self._func_dict[prefix][func].funcdef_attrs_and_ret 904 != funcdef_attrs_and_ret 905 ): 906 if self._func_dict[prefix][func].is_same_except_arg_names( 907 scrubbed_extra, 908 funcdef_attrs_and_ret, 909 args_and_sig, 910 attrs, 911 ): 912 self._func_dict[prefix][func].scrub = scrubbed_extra 913 self._func_dict[prefix][func].args_and_sig = args_and_sig 914 else: 915 # This means a previous RUN line produced a body for this function 916 # that is different from the one produced by this current RUN line, 917 # so the body can't be common across RUN lines. We use None to 918 # indicate that. 919 self._func_dict[prefix][func] = None 920 else: 921 if prefix not in self._processed_prefixes: 922 self._func_dict[prefix][func] = function_body( 923 scrubbed_body, 924 scrubbed_extra, 925 funcdef_attrs_and_ret, 926 args_and_sig, 927 attrs, 928 func_name_separator, 929 self._ginfo, 930 ) 931 self._func_order[prefix].append(func) 932 else: 933 # An earlier RUN line used this check prefixes but didn't produce 934 # a body for this function. This happens in Clang tests that use 935 # preprocesser directives to exclude individual functions from some 936 # RUN lines. 937 self._func_dict[prefix][func] = None 938 939 def processed_prefixes(self, prefixes): 940 """ 941 Mark a set of prefixes as having had at least one applicable RUN line fully 942 processed. This is used to filter out function bodies that don't have 943 outputs for all RUN lines. 944 """ 945 self._processed_prefixes.update(prefixes) 946 947 def get_failed_prefixes(self): 948 # This returns the list of those prefixes that failed to match any function, 949 # because there were conflicting bodies produced by different RUN lines, in 950 # all instances of the prefix. 951 for prefix in self._func_dict: 952 if self._func_dict[prefix] and ( 953 not [ 954 fct 955 for fct in self._func_dict[prefix] 956 if self._func_dict[prefix][fct] is not None 957 ] 958 ): 959 yield prefix 960 961 962##### Generator of LLVM IR CHECK lines 963 964SCRUB_IR_COMMENT_RE = re.compile(r"\s*;.*") 965 966# TODO: We should also derive check lines for global, debug, loop declarations, etc.. 967 968 969class NamelessValue: 970 """ 971 A NamelessValue object represents a type of value in the IR whose "name" we 972 generalize in the generated check lines; where the "name" could be an actual 973 name (as in e.g. `@some_global` or `%x`) or just a number (as in e.g. `%12` 974 or `!4`). 975 """ 976 def __init__( 977 self, 978 check_prefix, 979 check_key, 980 ir_prefix, 981 ir_regexp, 982 global_ir_rhs_regexp, 983 *, 984 is_before_functions=False, 985 is_number=False, 986 replace_number_with_counter=False, 987 match_literally=False, 988 interlaced_with_previous=False, 989 ir_suffix=r"", 990 ): 991 self.check_prefix = check_prefix 992 self.check_key = check_key 993 self.ir_prefix = ir_prefix 994 self.ir_regexp = ir_regexp 995 self.ir_suffix = ir_suffix 996 self.global_ir_rhs_regexp = global_ir_rhs_regexp 997 self.is_before_functions = is_before_functions 998 self.is_number = is_number 999 # Some variable numbers (e.g. MCINST1234) will change based on unrelated 1000 # modifications to LLVM, replace those with an incrementing counter. 1001 self.replace_number_with_counter = replace_number_with_counter 1002 self.match_literally = match_literally 1003 self.interlaced_with_previous = interlaced_with_previous 1004 self.variable_mapping = {} 1005 1006 # Return true if this kind of IR value is defined "locally" to functions, 1007 # which we assume is only the case precisely for LLVM IR local values. 1008 def is_local_def_ir_value(self): 1009 return self.check_key == "%" 1010 1011 # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals 1012 def get_ir_regex(self): 1013 # for backwards compatibility we check locals with '.*' 1014 if self.is_local_def_ir_value(): 1015 return ".*" 1016 return self.ir_regexp 1017 1018 # Create a FileCheck variable name based on an IR name. 1019 def get_value_name(self, var: str, check_prefix: str): 1020 var = var.replace("!", "") 1021 if self.replace_number_with_counter: 1022 assert var 1023 replacement = self.variable_mapping.get(var, None) 1024 if replacement is None: 1025 # Replace variable with an incrementing counter 1026 replacement = str(len(self.variable_mapping) + 1) 1027 self.variable_mapping[var] = replacement 1028 var = replacement 1029 # This is a nameless value, prepend check_prefix. 1030 if var.isdigit(): 1031 var = check_prefix + var 1032 else: 1033 # This is a named value that clashes with the check_prefix, prepend with 1034 # _prefix_filecheck_ir_name, if it has been defined. 1035 if ( 1036 may_clash_with_default_check_prefix_name(check_prefix, var) 1037 and _prefix_filecheck_ir_name 1038 ): 1039 var = _prefix_filecheck_ir_name + var 1040 var = var.replace(".", "_") 1041 var = var.replace("-", "_") 1042 return var.upper() 1043 1044 def get_affixes_from_match(self, match): 1045 prefix = re.match(self.ir_prefix, match.group(2)).group(0) 1046 suffix = re.search(self.ir_suffix + "$", match.group(2)).group(0) 1047 return prefix, suffix 1048 1049 1050class GeneralizerInfo: 1051 """ 1052 A GeneralizerInfo object holds information about how check lines should be generalized 1053 (e.g., variable names replaced by FileCheck meta variables) as well as per-test-file 1054 state (e.g. information about IR global variables). 1055 """ 1056 1057 MODE_IR = 0 1058 MODE_ASM = 1 1059 MODE_ANALYZE = 2 1060 1061 def __init__( 1062 self, 1063 version, 1064 mode, 1065 nameless_values: List[NamelessValue], 1066 regexp_prefix, 1067 regexp_suffix, 1068 ): 1069 self._version = version 1070 self._mode = mode 1071 self._nameless_values = nameless_values 1072 1073 self._regexp_prefix = regexp_prefix 1074 self._regexp_suffix = regexp_suffix 1075 1076 self._regexp, _ = self._build_regexp(False, False) 1077 ( 1078 self._unstable_globals_regexp, 1079 self._unstable_globals_values, 1080 ) = self._build_regexp(True, True) 1081 1082 def _build_regexp(self, globals_only, unstable_only): 1083 matches = [] 1084 values = [] 1085 for nameless_value in self._nameless_values: 1086 is_global = nameless_value.global_ir_rhs_regexp is not None 1087 if globals_only and not is_global: 1088 continue 1089 if unstable_only and nameless_value.match_literally: 1090 continue 1091 1092 match = f"(?:{nameless_value.ir_prefix}({nameless_value.ir_regexp}){nameless_value.ir_suffix})" 1093 if self.is_ir() and not globals_only and is_global: 1094 match = "^" + match 1095 matches.append(match) 1096 values.append(nameless_value) 1097 1098 regexp_string = r"|".join(matches) 1099 1100 return ( 1101 re.compile( 1102 self._regexp_prefix + r"(" + regexp_string + r")" + self._regexp_suffix 1103 ), 1104 values, 1105 ) 1106 1107 def get_version(self): 1108 return self._version 1109 1110 def is_ir(self): 1111 return self._mode == GeneralizerInfo.MODE_IR 1112 1113 def is_asm(self): 1114 return self._mode == GeneralizerInfo.MODE_ASM 1115 1116 def is_analyze(self): 1117 return self._mode == GeneralizerInfo.MODE_ANALYZE 1118 1119 def get_nameless_values(self): 1120 return self._nameless_values 1121 1122 def get_regexp(self): 1123 return self._regexp 1124 1125 def get_unstable_globals_regexp(self): 1126 return self._unstable_globals_regexp 1127 1128 # The entire match is group 0, the prefix has one group (=1), the entire 1129 # IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start. 1130 FIRST_NAMELESS_GROUP_IN_MATCH = 3 1131 1132 def get_match_info(self, match): 1133 """ 1134 Returns (name, nameless_value) for the given match object 1135 """ 1136 if match.re == self._regexp: 1137 values = self._nameless_values 1138 else: 1139 match.re == self._unstable_globals_regexp 1140 values = self._unstable_globals_values 1141 for i in range(len(values)): 1142 g = match.group(i + GeneralizerInfo.FIRST_NAMELESS_GROUP_IN_MATCH) 1143 if g is not None: 1144 return g, values[i] 1145 error("Unable to identify the kind of IR value from the match!") 1146 return None, None 1147 1148 # See get_idx_from_match 1149 def get_name_from_match(self, match): 1150 return self.get_match_info(match)[0] 1151 1152 def get_nameless_value_from_match(self, match) -> NamelessValue: 1153 return self.get_match_info(match)[1] 1154 1155 1156def make_ir_generalizer(version): 1157 values = [] 1158 1159 if version >= 5: 1160 values += [ 1161 NamelessValue(r"BB", "%", r"label %", r"[\w$.-]+?", None), 1162 NamelessValue(r"BB", "%", r"^", r"[\w$.-]+?", None, ir_suffix=r":"), 1163 ] 1164 1165 values += [ 1166 # check_prefix check_key ir_prefix ir_regexp global_ir_rhs_regexp 1167 NamelessValue(r"TMP", "%", r"%", r"[\w$.-]+?", None), 1168 NamelessValue(r"ATTR", "#", r"#", r"[0-9]+", None), 1169 NamelessValue(r"ATTR", "#", r"attributes #", r"[0-9]+", r"{[^}]*}"), 1170 NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", None), 1171 NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", r".+", is_before_functions=True), 1172 NamelessValue( 1173 r"GLOBNAMED", 1174 "@", 1175 r"@", 1176 r"[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*", 1177 r".+", 1178 is_before_functions=True, 1179 match_literally=True, 1180 interlaced_with_previous=True, 1181 ), 1182 NamelessValue(r"DBG", "!", r"!dbg ", r"![0-9]+", None), 1183 NamelessValue(r"DIASSIGNID", "!", r"!DIAssignID ", r"![0-9]+", None), 1184 NamelessValue(r"PROF", "!", r"!prof ", r"![0-9]+", None), 1185 NamelessValue(r"TBAA", "!", r"!tbaa ", r"![0-9]+", None), 1186 NamelessValue(r"TBAA_STRUCT", "!", r"!tbaa.struct ", r"![0-9]+", None), 1187 NamelessValue(r"RNG", "!", r"!range ", r"![0-9]+", None), 1188 NamelessValue(r"LOOP", "!", r"!llvm.loop ", r"![0-9]+", None), 1189 NamelessValue(r"META", "!", r"", r"![0-9]+", r"(?:distinct |)!.*"), 1190 NamelessValue(r"ACC_GRP", "!", r"!llvm.access.group ", r"![0-9]+", None), 1191 NamelessValue(r"META", "!", r"![a-z.]+ ", r"![0-9]+", None), 1192 NamelessValue(r"META", "!", r"[, (]", r"![0-9]+", None), 1193 ] 1194 1195 prefix = r"(\s*)" 1196 suffix = r"([,\s\(\)\}]|\Z)" 1197 1198 # values = [ 1199 # nameless_value 1200 # for nameless_value in IR_NAMELESS_VALUES 1201 # if not (globals_only and nameless_value.global_ir_rhs_regexp is None) and 1202 # not (unstable_ids_only and nameless_value.match_literally) 1203 # ] 1204 1205 return GeneralizerInfo(version, GeneralizerInfo.MODE_IR, values, prefix, suffix) 1206 1207 1208def make_asm_generalizer(version): 1209 values = [ 1210 NamelessValue( 1211 r"MCINST", 1212 "Inst#", 1213 "<MCInst #", 1214 r"\d+", 1215 r".+", 1216 is_number=True, 1217 replace_number_with_counter=True, 1218 ), 1219 NamelessValue( 1220 r"MCREG", 1221 "Reg:", 1222 "<MCOperand Reg:", 1223 r"\d+", 1224 r".+", 1225 is_number=True, 1226 replace_number_with_counter=True, 1227 ), 1228 ] 1229 1230 prefix = r"((?:#|//)\s*)" 1231 suffix = r"([>\s]|\Z)" 1232 1233 return GeneralizerInfo(version, GeneralizerInfo.MODE_ASM, values, prefix, suffix) 1234 1235 1236def make_analyze_generalizer(version): 1237 values = [ 1238 NamelessValue( 1239 r"GRP", 1240 "#", 1241 r"", 1242 r"0x[0-9a-f]+", 1243 None, 1244 replace_number_with_counter=True, 1245 ), 1246 ] 1247 1248 prefix = r"(\s*)" 1249 suffix = r"(\)?:)" 1250 1251 return GeneralizerInfo( 1252 version, GeneralizerInfo.MODE_ANALYZE, values, prefix, suffix 1253 ) 1254 1255 1256# Return true if var clashes with the scripted FileCheck check_prefix. 1257def may_clash_with_default_check_prefix_name(check_prefix, var): 1258 return check_prefix and re.match( 1259 r"^" + check_prefix + r"[0-9]+?$", var, re.IGNORECASE 1260 ) 1261 1262 1263def find_diff_matching(lhs: List[str], rhs: List[str]) -> List[tuple]: 1264 """ 1265 Find a large ordered matching between strings in lhs and rhs. 1266 1267 Think of this as finding the *unchanged* lines in a diff, where the entries 1268 of lhs and rhs are lines of the files being diffed. 1269 1270 Returns a list of matched (lhs_idx, rhs_idx) pairs. 1271 """ 1272 1273 if not lhs or not rhs: 1274 return [] 1275 1276 # Collect matches in reverse order. 1277 matches = [] 1278 1279 # First, collect a set of candidate matching edges. We limit this to a 1280 # constant multiple of the input size to avoid quadratic runtime. 1281 patterns = collections.defaultdict(lambda: ([], [])) 1282 1283 for idx in range(len(lhs)): 1284 patterns[lhs[idx]][0].append(idx) 1285 for idx in range(len(rhs)): 1286 patterns[rhs[idx]][1].append(idx) 1287 1288 multiple_patterns = [] 1289 1290 candidates = [] 1291 for pattern in patterns.values(): 1292 if not pattern[0] or not pattern[1]: 1293 continue 1294 1295 if len(pattern[0]) == len(pattern[1]) == 1: 1296 candidates.append((pattern[0][0], pattern[1][0])) 1297 else: 1298 multiple_patterns.append(pattern) 1299 1300 multiple_patterns.sort(key=lambda pattern: len(pattern[0]) * len(pattern[1])) 1301 1302 for pattern in multiple_patterns: 1303 if len(candidates) + len(pattern[0]) * len(pattern[1]) > 2 * ( 1304 len(lhs) + len(rhs) 1305 ): 1306 break 1307 for lhs_idx in pattern[0]: 1308 for rhs_idx in pattern[1]: 1309 candidates.append((lhs_idx, rhs_idx)) 1310 1311 if not candidates: 1312 # The LHS and RHS either share nothing in common, or lines are just too 1313 # identical. In that case, let's give up and not match anything. 1314 return [] 1315 1316 # Compute a maximal crossing-free matching via an algorithm that is 1317 # inspired by a mixture of dynamic programming and line-sweeping in 1318 # discrete geometry. 1319 # 1320 # I would be surprised if this algorithm didn't exist somewhere in the 1321 # literature, but I found it without consciously recalling any 1322 # references, so you'll have to make do with the explanation below. 1323 # Sorry. 1324 # 1325 # The underlying graph is bipartite: 1326 # - nodes on the LHS represent lines in the original check 1327 # - nodes on the RHS represent lines in the new (updated) check 1328 # 1329 # Nodes are implicitly sorted by the corresponding line number. 1330 # Edges (unique_matches) are sorted by the line number on the LHS. 1331 # 1332 # Here's the geometric intuition for the algorithm. 1333 # 1334 # * Plot the edges as points in the plane, with the original line 1335 # number on the X axis and the updated line number on the Y axis. 1336 # * The goal is to find a longest "chain" of points where each point 1337 # is strictly above and to the right of the previous point. 1338 # * The algorithm proceeds by sweeping a vertical line from left to 1339 # right. 1340 # * The algorithm maintains a table where `table[N]` answers the 1341 # question "What is currently the 'best' way to build a chain of N+1 1342 # points to the left of the vertical line". Here, 'best' means 1343 # that the last point of the chain is a as low as possible (minimal 1344 # Y coordinate). 1345 # * `table[N]` is `(y, point_idx)` where `point_idx` is the index of 1346 # the last point in the chain and `y` is its Y coordinate 1347 # * A key invariant is that the Y values in the table are 1348 # monotonically increasing 1349 # * Thanks to these properties, the table can be used to answer the 1350 # question "What is the longest chain that can be built to the left 1351 # of the vertical line using only points below a certain Y value", 1352 # using a binary search over the table. 1353 # * The algorithm also builds a backlink structure in which every point 1354 # links back to the previous point on a best (longest) chain ending 1355 # at that point 1356 # 1357 # The core loop of the algorithm sweeps the line and updates the table 1358 # and backlink structure for every point that we cross during the sweep. 1359 # Therefore, the algorithm is trivially O(M log M) in the number of 1360 # points. 1361 candidates.sort(key=lambda candidate: (candidate[0], -candidate[1])) 1362 1363 backlinks = [] 1364 table_rhs_idx = [] 1365 table_candidate_idx = [] 1366 for _, rhs_idx in candidates: 1367 candidate_idx = len(backlinks) 1368 ti = bisect.bisect_left(table_rhs_idx, rhs_idx) 1369 1370 # Update the table to record a best chain ending in the current point. 1371 # There always is one, and if any of the previously visited points had 1372 # a higher Y coordinate, then there is always a previously recorded best 1373 # chain that can be improved upon by using the current point. 1374 # 1375 # There is only one case where there is some ambiguity. If the 1376 # pre-existing entry table[ti] has the same Y coordinate / rhs_idx as 1377 # the current point (this can only happen if the same line appeared 1378 # multiple times on the LHS), then we could choose to keep the 1379 # previously recorded best chain instead. That would bias the algorithm 1380 # differently but should have no systematic impact on the quality of the 1381 # result. 1382 if ti < len(table_rhs_idx): 1383 table_rhs_idx[ti] = rhs_idx 1384 table_candidate_idx[ti] = candidate_idx 1385 else: 1386 table_rhs_idx.append(rhs_idx) 1387 table_candidate_idx.append(candidate_idx) 1388 if ti > 0: 1389 backlinks.append(table_candidate_idx[ti - 1]) 1390 else: 1391 backlinks.append(None) 1392 1393 # Commit to names in the matching by walking the backlinks. Recursively 1394 # attempt to fill in more matches in-betweem. 1395 match_idx = table_candidate_idx[-1] 1396 while match_idx is not None: 1397 current = candidates[match_idx] 1398 matches.append(current) 1399 match_idx = backlinks[match_idx] 1400 1401 matches.reverse() 1402 return matches 1403 1404 1405VARIABLE_TAG = "[[@@]]" 1406METAVAR_RE = re.compile(r"\[\[([A-Z0-9_]+)(?::[^]]+)?\]\]") 1407NUMERIC_SUFFIX_RE = re.compile(r"[0-9]*$") 1408 1409 1410class TestVar: 1411 def __init__(self, nameless_value: NamelessValue, prefix: str, suffix: str): 1412 self._nameless_value = nameless_value 1413 1414 self._prefix = prefix 1415 self._suffix = suffix 1416 1417 def seen(self, nameless_value: NamelessValue, prefix: str, suffix: str): 1418 if prefix != self._prefix: 1419 self._prefix = "" 1420 if suffix != self._suffix: 1421 self._suffix = "" 1422 1423 def get_variable_name(self, text): 1424 return self._nameless_value.get_value_name( 1425 text, self._nameless_value.check_prefix 1426 ) 1427 1428 def get_def(self, name, prefix, suffix): 1429 if self._nameless_value.is_number: 1430 return f"{prefix}[[#{name}:]]{suffix}" 1431 if self._prefix: 1432 assert self._prefix == prefix 1433 prefix = "" 1434 if self._suffix: 1435 assert self._suffix == suffix 1436 suffix = "" 1437 return f"{prefix}[[{name}:{self._prefix}{self._nameless_value.get_ir_regex()}{self._suffix}]]{suffix}" 1438 1439 def get_use(self, name, prefix, suffix): 1440 if self._nameless_value.is_number: 1441 return f"{prefix}[[#{name}]]{suffix}" 1442 if self._prefix: 1443 assert self._prefix == prefix 1444 prefix = "" 1445 if self._suffix: 1446 assert self._suffix == suffix 1447 suffix = "" 1448 return f"{prefix}[[{name}]]{suffix}" 1449 1450 1451class CheckValueInfo: 1452 def __init__( 1453 self, 1454 key, 1455 text, 1456 name: str, 1457 prefix: str, 1458 suffix: str, 1459 ): 1460 # Key for the value, e.g. '%' 1461 self.key = key 1462 1463 # Text to be matched by the FileCheck variable (without any prefix or suffix) 1464 self.text = text 1465 1466 # Name of the FileCheck variable 1467 self.name = name 1468 1469 # Prefix and suffix that were captured by the NamelessValue regular expression 1470 self.prefix = prefix 1471 self.suffix = suffix 1472 1473 1474# Represent a check line in a way that allows us to compare check lines while 1475# ignoring some or all of the FileCheck variable names. 1476class CheckLineInfo: 1477 def __init__(self, line, values): 1478 # Line with all FileCheck variable name occurrences replaced by VARIABLE_TAG 1479 self.line: str = line 1480 1481 # Information on each FileCheck variable name occurrences in the line 1482 self.values: List[CheckValueInfo] = values 1483 1484 def __repr__(self): 1485 return f"CheckLineInfo(line={self.line}, self.values={self.values})" 1486 1487 1488def remap_metavar_names( 1489 old_line_infos: List[CheckLineInfo], 1490 new_line_infos: List[CheckLineInfo], 1491 committed_names: Set[str], 1492) -> Mapping[str, str]: 1493 """ 1494 Map all FileCheck variable names that appear in new_line_infos to new 1495 FileCheck variable names in an attempt to reduce the diff from old_line_infos 1496 to new_line_infos. 1497 1498 This is done by: 1499 * Matching old check lines and new check lines using a diffing algorithm 1500 applied after replacing names with wildcards. 1501 * Committing to variable names such that the matched lines become equal 1502 (without wildcards) if possible 1503 * This is done recursively to handle cases where many lines are equal 1504 after wildcard replacement 1505 """ 1506 # Initialize uncommitted identity mappings 1507 new_mapping = {} 1508 for line in new_line_infos: 1509 for value in line.values: 1510 new_mapping[value.name] = value.name 1511 1512 # Recursively commit to the identity mapping or find a better one 1513 def recurse(old_begin, old_end, new_begin, new_end): 1514 if old_begin == old_end or new_begin == new_end: 1515 return 1516 1517 # Find a matching of lines where uncommitted names are replaced 1518 # with a placeholder. 1519 def diffify_line(line, mapper): 1520 values = [] 1521 for value in line.values: 1522 mapped = mapper(value.name) 1523 values.append(mapped if mapped in committed_names else "?") 1524 return line.line.strip() + " @@@ " + " @ ".join(values) 1525 1526 lhs_lines = [ 1527 diffify_line(line, lambda x: x) 1528 for line in old_line_infos[old_begin:old_end] 1529 ] 1530 rhs_lines = [ 1531 diffify_line(line, lambda x: new_mapping[x]) 1532 for line in new_line_infos[new_begin:new_end] 1533 ] 1534 1535 candidate_matches = find_diff_matching(lhs_lines, rhs_lines) 1536 1537 # Apply commits greedily on a match-by-match basis 1538 matches = [(-1, -1)] 1539 committed_anything = False 1540 for lhs_idx, rhs_idx in candidate_matches: 1541 lhs_line = old_line_infos[lhs_idx] 1542 rhs_line = new_line_infos[rhs_idx] 1543 1544 local_commits = {} 1545 1546 for lhs_value, rhs_value in zip(lhs_line.values, rhs_line.values): 1547 if new_mapping[rhs_value.name] in committed_names: 1548 # The new value has already been committed. If it was mapped 1549 # to the same name as the original value, we can consider 1550 # committing other values from this line. Otherwise, we 1551 # should ignore this line. 1552 if new_mapping[rhs_value.name] == lhs_value.name: 1553 continue 1554 else: 1555 break 1556 1557 if rhs_value.name in local_commits: 1558 # Same, but for a possible commit happening on the same line 1559 if local_commits[rhs_value.name] == lhs_value.name: 1560 continue 1561 else: 1562 break 1563 1564 if lhs_value.name in committed_names: 1565 # We can't map this value because the name we would map it to has already been 1566 # committed for something else. Give up on this line. 1567 break 1568 1569 local_commits[rhs_value.name] = lhs_value.name 1570 else: 1571 # No reason not to add any commitments for this line 1572 for rhs_var, lhs_var in local_commits.items(): 1573 new_mapping[rhs_var] = lhs_var 1574 committed_names.add(lhs_var) 1575 committed_anything = True 1576 1577 if ( 1578 lhs_var != rhs_var 1579 and lhs_var in new_mapping 1580 and new_mapping[lhs_var] == lhs_var 1581 ): 1582 new_mapping[lhs_var] = "conflict_" + lhs_var 1583 1584 matches.append((lhs_idx, rhs_idx)) 1585 1586 matches.append((old_end, new_end)) 1587 1588 # Recursively handle sequences between matches 1589 if committed_anything: 1590 for (lhs_prev, rhs_prev), (lhs_next, rhs_next) in zip(matches, matches[1:]): 1591 recurse(lhs_prev + 1, lhs_next, rhs_prev + 1, rhs_next) 1592 1593 recurse(0, len(old_line_infos), 0, len(new_line_infos)) 1594 1595 # Commit to remaining names and resolve conflicts 1596 for new_name, mapped_name in new_mapping.items(): 1597 if mapped_name in committed_names: 1598 continue 1599 if not mapped_name.startswith("conflict_"): 1600 assert mapped_name == new_name 1601 committed_names.add(mapped_name) 1602 1603 for new_name, mapped_name in new_mapping.items(): 1604 if mapped_name in committed_names: 1605 continue 1606 assert mapped_name.startswith("conflict_") 1607 1608 m = NUMERIC_SUFFIX_RE.search(new_name) 1609 base_name = new_name[: m.start()] 1610 suffix = int(new_name[m.start() :]) if m.start() != m.end() else 1 1611 while True: 1612 candidate = f"{base_name}{suffix}" 1613 if candidate not in committed_names: 1614 new_mapping[new_name] = candidate 1615 committed_names.add(candidate) 1616 break 1617 suffix += 1 1618 1619 return new_mapping 1620 1621 1622def generalize_check_lines( 1623 lines, 1624 ginfo: GeneralizerInfo, 1625 vars_seen, 1626 global_vars_seen, 1627 preserve_names=False, 1628 original_check_lines=None, 1629 *, 1630 unstable_globals_only=False, 1631): 1632 if unstable_globals_only: 1633 regexp = ginfo.get_unstable_globals_regexp() 1634 else: 1635 regexp = ginfo.get_regexp() 1636 1637 multiple_braces_re = re.compile(r"({{+)|(}}+)") 1638 def escape_braces(match_obj): 1639 return '{{' + re.escape(match_obj.group(0)) + '}}' 1640 1641 if ginfo.is_ir(): 1642 for i, line in enumerate(lines): 1643 # An IR variable named '%.' matches the FileCheck regex string. 1644 line = line.replace("%.", "%dot") 1645 for regex in _global_hex_value_regex: 1646 if re.match("^@" + regex + " = ", line): 1647 line = re.sub( 1648 r"\bi([0-9]+) ([0-9]+)", 1649 lambda m: "i" 1650 + m.group(1) 1651 + " [[#" 1652 + hex(int(m.group(2))) 1653 + "]]", 1654 line, 1655 ) 1656 break 1657 # Ignore any comments, since the check lines will too. 1658 scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line) 1659 lines[i] = scrubbed_line 1660 1661 if not preserve_names: 1662 committed_names = set( 1663 test_var.get_variable_name(name) 1664 for (name, _), test_var in vars_seen.items() 1665 ) 1666 defs = set() 1667 1668 # Collect information about new check lines, and generalize global reference 1669 new_line_infos = [] 1670 for line in lines: 1671 filtered_line = "" 1672 values = [] 1673 while True: 1674 m = regexp.search(line) 1675 if m is None: 1676 filtered_line += line 1677 break 1678 1679 name = ginfo.get_name_from_match(m) 1680 nameless_value = ginfo.get_nameless_value_from_match(m) 1681 prefix, suffix = nameless_value.get_affixes_from_match(m) 1682 if may_clash_with_default_check_prefix_name( 1683 nameless_value.check_prefix, name 1684 ): 1685 warn( 1686 "Change IR value name '%s' or use --prefix-filecheck-ir-name to prevent possible conflict" 1687 " with scripted FileCheck name." % (name,) 1688 ) 1689 1690 # Record the variable as seen and (for locals) accumulate 1691 # prefixes/suffixes 1692 is_local_def = nameless_value.is_local_def_ir_value() 1693 if is_local_def: 1694 vars_dict = vars_seen 1695 else: 1696 vars_dict = global_vars_seen 1697 1698 key = (name, nameless_value.check_key) 1699 1700 if is_local_def: 1701 test_prefix = prefix 1702 test_suffix = suffix 1703 else: 1704 test_prefix = "" 1705 test_suffix = "" 1706 1707 if key in vars_dict: 1708 vars_dict[key].seen(nameless_value, test_prefix, test_suffix) 1709 else: 1710 vars_dict[key] = TestVar(nameless_value, test_prefix, test_suffix) 1711 defs.add(key) 1712 1713 var = vars_dict[key].get_variable_name(name) 1714 1715 # Replace with a [[@@]] tag, but be sure to keep the spaces and commas. 1716 filtered_line += ( 1717 line[: m.start()] + m.group(1) + VARIABLE_TAG + m.group(m.lastindex) 1718 ) 1719 line = line[m.end() :] 1720 1721 values.append( 1722 CheckValueInfo( 1723 key=nameless_value.check_key, 1724 text=name, 1725 name=var, 1726 prefix=prefix, 1727 suffix=suffix, 1728 ) 1729 ) 1730 1731 new_line_infos.append(CheckLineInfo(filtered_line, values)) 1732 1733 committed_names.update( 1734 test_var.get_variable_name(name) 1735 for (name, _), test_var in global_vars_seen.items() 1736 ) 1737 1738 # Collect information about original check lines, if any. 1739 orig_line_infos = [] 1740 for line in original_check_lines or []: 1741 filtered_line = "" 1742 values = [] 1743 while True: 1744 m = METAVAR_RE.search(line) 1745 if m is None: 1746 filtered_line += line 1747 break 1748 1749 # Replace with a [[@@]] tag, but be sure to keep the spaces and commas. 1750 filtered_line += line[: m.start()] + VARIABLE_TAG 1751 line = line[m.end() :] 1752 values.append( 1753 CheckValueInfo( 1754 key=None, 1755 text=None, 1756 name=m.group(1), 1757 prefix="", 1758 suffix="", 1759 ) 1760 ) 1761 orig_line_infos.append(CheckLineInfo(filtered_line, values)) 1762 1763 # Compute the variable name mapping 1764 mapping = remap_metavar_names(orig_line_infos, new_line_infos, committed_names) 1765 1766 # Apply the variable name mapping 1767 for i, line_info in enumerate(new_line_infos): 1768 line_template = line_info.line 1769 line = "" 1770 1771 for value in line_info.values: 1772 idx = line_template.find(VARIABLE_TAG) 1773 line += line_template[:idx] 1774 line_template = line_template[idx + len(VARIABLE_TAG) :] 1775 1776 key = (value.text, value.key) 1777 if value.key == "%": 1778 vars_dict = vars_seen 1779 else: 1780 vars_dict = global_vars_seen 1781 1782 if key in defs: 1783 line += vars_dict[key].get_def( 1784 mapping[value.name], value.prefix, value.suffix 1785 ) 1786 defs.remove(key) 1787 else: 1788 line += vars_dict[key].get_use( 1789 mapping[value.name], value.prefix, value.suffix 1790 ) 1791 1792 line += line_template 1793 1794 lines[i] = line 1795 1796 if ginfo.is_analyze(): 1797 for i, _ in enumerate(lines): 1798 # Escape multiple {{ or }} as {{}} denotes a FileCheck regex. 1799 scrubbed_line = multiple_braces_re.sub(escape_braces, lines[i]) 1800 lines[i] = scrubbed_line 1801 1802 return lines 1803 1804 1805def add_checks( 1806 output_lines, 1807 comment_marker, 1808 prefix_list, 1809 func_dict, 1810 func_name, 1811 check_label_format, 1812 ginfo, 1813 global_vars_seen_dict, 1814 is_filtered, 1815 preserve_names=False, 1816 original_check_lines: Mapping[str, List[str]] = {}, 1817): 1818 # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well. 1819 prefix_exclusions = set() 1820 printed_prefixes = [] 1821 for p in prefix_list: 1822 checkprefixes = p[0] 1823 # If not all checkprefixes of this run line produced the function we cannot check for it as it does not 1824 # exist for this run line. A subset of the check prefixes might know about the function but only because 1825 # other run lines created it. 1826 if any( 1827 map( 1828 lambda checkprefix: func_name not in func_dict[checkprefix], 1829 checkprefixes, 1830 ) 1831 ): 1832 prefix_exclusions |= set(checkprefixes) 1833 continue 1834 1835 # prefix_exclusions is constructed, we can now emit the output 1836 for p in prefix_list: 1837 global_vars_seen = {} 1838 checkprefixes = p[0] 1839 for checkprefix in checkprefixes: 1840 if checkprefix in global_vars_seen_dict: 1841 global_vars_seen.update(global_vars_seen_dict[checkprefix]) 1842 else: 1843 global_vars_seen_dict[checkprefix] = {} 1844 if checkprefix in printed_prefixes: 1845 break 1846 1847 # Check if the prefix is excluded. 1848 if checkprefix in prefix_exclusions: 1849 continue 1850 1851 # If we do not have output for this prefix we skip it. 1852 if not func_dict[checkprefix][func_name]: 1853 continue 1854 1855 # Add some space between different check prefixes, but not after the last 1856 # check line (before the test code). 1857 if ginfo.is_asm(): 1858 if len(printed_prefixes) != 0: 1859 output_lines.append(comment_marker) 1860 1861 if checkprefix not in global_vars_seen_dict: 1862 global_vars_seen_dict[checkprefix] = {} 1863 1864 global_vars_seen_before = [key for key in global_vars_seen.keys()] 1865 1866 vars_seen = {} 1867 printed_prefixes.append(checkprefix) 1868 attrs = str(func_dict[checkprefix][func_name].attrs) 1869 attrs = "" if attrs == "None" else attrs 1870 if ginfo.get_version() > 1: 1871 funcdef_attrs_and_ret = func_dict[checkprefix][ 1872 func_name 1873 ].funcdef_attrs_and_ret 1874 else: 1875 funcdef_attrs_and_ret = "" 1876 1877 if attrs: 1878 output_lines.append( 1879 "%s %s: Function Attrs: %s" % (comment_marker, checkprefix, attrs) 1880 ) 1881 args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig) 1882 if args_and_sig: 1883 args_and_sig = generalize_check_lines( 1884 [args_and_sig], 1885 ginfo, 1886 vars_seen, 1887 global_vars_seen, 1888 preserve_names, 1889 original_check_lines=[], 1890 )[0] 1891 func_name_separator = func_dict[checkprefix][func_name].func_name_separator 1892 if "[[" in args_and_sig: 1893 # Captures in label lines are not supported, thus split into a -LABEL 1894 # and a separate -SAME line that contains the arguments with captures. 1895 args_and_sig_prefix = "" 1896 if ginfo.get_version() >= 3 and args_and_sig.startswith("("): 1897 # Ensure the "(" separating function name and arguments is in the 1898 # label line. This is required in case of function names that are 1899 # prefixes of each other. Otherwise, the label line for "foo" might 1900 # incorrectly match on "foo.specialized". 1901 args_and_sig_prefix = args_and_sig[0] 1902 args_and_sig = args_and_sig[1:] 1903 1904 # Removing args_and_sig from the label match line requires 1905 # func_name_separator to be empty. Otherwise, the match will not work. 1906 assert func_name_separator == "" 1907 output_lines.append( 1908 check_label_format 1909 % ( 1910 checkprefix, 1911 funcdef_attrs_and_ret, 1912 func_name, 1913 args_and_sig_prefix, 1914 func_name_separator, 1915 ) 1916 ) 1917 output_lines.append( 1918 "%s %s-SAME: %s" % (comment_marker, checkprefix, args_and_sig) 1919 ) 1920 else: 1921 output_lines.append( 1922 check_label_format 1923 % ( 1924 checkprefix, 1925 funcdef_attrs_and_ret, 1926 func_name, 1927 args_and_sig, 1928 func_name_separator, 1929 ) 1930 ) 1931 func_body = str(func_dict[checkprefix][func_name]).splitlines() 1932 if not func_body: 1933 # We have filtered everything. 1934 continue 1935 1936 # For ASM output, just emit the check lines. 1937 if ginfo.is_asm(): 1938 body_start = 1 1939 if is_filtered: 1940 # For filtered output we don't add "-NEXT" so don't add extra spaces 1941 # before the first line. 1942 body_start = 0 1943 else: 1944 output_lines.append( 1945 "%s %s: %s" % (comment_marker, checkprefix, func_body[0]) 1946 ) 1947 func_lines = generalize_check_lines( 1948 func_body[body_start:], ginfo, vars_seen, global_vars_seen 1949 ) 1950 for func_line in func_lines: 1951 if func_line.strip() == "": 1952 output_lines.append( 1953 "%s %s-EMPTY:" % (comment_marker, checkprefix) 1954 ) 1955 else: 1956 check_suffix = "-NEXT" if not is_filtered else "" 1957 output_lines.append( 1958 "%s %s%s: %s" 1959 % (comment_marker, checkprefix, check_suffix, func_line) 1960 ) 1961 # Remember new global variables we have not seen before 1962 for key in global_vars_seen: 1963 if key not in global_vars_seen_before: 1964 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 1965 break 1966 # For analyze output, generalize the output, and emit CHECK-EMPTY lines as well. 1967 elif ginfo.is_analyze(): 1968 func_body = generalize_check_lines( 1969 func_body, ginfo, vars_seen, global_vars_seen 1970 ) 1971 for func_line in func_body: 1972 if func_line.strip() == "": 1973 output_lines.append( 1974 "{} {}-EMPTY:".format(comment_marker, checkprefix) 1975 ) 1976 else: 1977 check_suffix = "-NEXT" if not is_filtered else "" 1978 output_lines.append( 1979 "{} {}{}: {}".format( 1980 comment_marker, checkprefix, check_suffix, func_line 1981 ) 1982 ) 1983 1984 # Add space between different check prefixes and also before the first 1985 # line of code in the test function. 1986 output_lines.append(comment_marker) 1987 1988 # Remember new global variables we have not seen before 1989 for key in global_vars_seen: 1990 if key not in global_vars_seen_before: 1991 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 1992 break 1993 # For IR output, change all defs to FileCheck variables, so we're immune 1994 # to variable naming fashions. 1995 else: 1996 func_body = generalize_check_lines( 1997 func_body, 1998 ginfo, 1999 vars_seen, 2000 global_vars_seen, 2001 preserve_names, 2002 original_check_lines=original_check_lines.get(checkprefix), 2003 ) 2004 2005 # This could be selectively enabled with an optional invocation argument. 2006 # Disabled for now: better to check everything. Be safe rather than sorry. 2007 2008 # Handle the first line of the function body as a special case because 2009 # it's often just noise (a useless asm comment or entry label). 2010 # if func_body[0].startswith("#") or func_body[0].startswith("entry:"): 2011 # is_blank_line = True 2012 # else: 2013 # output_lines.append('%s %s: %s' % (comment_marker, checkprefix, func_body[0])) 2014 # is_blank_line = False 2015 2016 is_blank_line = False 2017 2018 for func_line in func_body: 2019 if func_line.strip() == "": 2020 is_blank_line = True 2021 continue 2022 # Do not waste time checking IR comments. 2023 func_line = SCRUB_IR_COMMENT_RE.sub(r"", func_line) 2024 2025 # Skip blank lines instead of checking them. 2026 if is_blank_line: 2027 output_lines.append( 2028 "{} {}: {}".format( 2029 comment_marker, checkprefix, func_line 2030 ) 2031 ) 2032 else: 2033 check_suffix = "-NEXT" if not is_filtered else "" 2034 output_lines.append( 2035 "{} {}{}: {}".format( 2036 comment_marker, checkprefix, check_suffix, func_line 2037 ) 2038 ) 2039 is_blank_line = False 2040 2041 # Add space between different check prefixes and also before the first 2042 # line of code in the test function. 2043 output_lines.append(comment_marker) 2044 2045 # Remember new global variables we have not seen before 2046 for key in global_vars_seen: 2047 if key not in global_vars_seen_before: 2048 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 2049 break 2050 return printed_prefixes 2051 2052 2053def add_ir_checks( 2054 output_lines, 2055 comment_marker, 2056 prefix_list, 2057 func_dict, 2058 func_name, 2059 preserve_names, 2060 function_sig, 2061 ginfo: GeneralizerInfo, 2062 global_vars_seen_dict, 2063 is_filtered, 2064 original_check_lines={}, 2065): 2066 assert ginfo.is_ir() 2067 # Label format is based on IR string. 2068 if function_sig and ginfo.get_version() > 1: 2069 function_def_regex = "define %s" 2070 elif function_sig: 2071 function_def_regex = "define {{[^@]+}}%s" 2072 else: 2073 function_def_regex = "%s" 2074 check_label_format = "{} %s-LABEL: {}@%s%s%s".format( 2075 comment_marker, function_def_regex 2076 ) 2077 return add_checks( 2078 output_lines, 2079 comment_marker, 2080 prefix_list, 2081 func_dict, 2082 func_name, 2083 check_label_format, 2084 ginfo, 2085 global_vars_seen_dict, 2086 is_filtered, 2087 preserve_names, 2088 original_check_lines=original_check_lines, 2089 ) 2090 2091 2092def add_analyze_checks( 2093 output_lines, 2094 comment_marker, 2095 prefix_list, 2096 func_dict, 2097 func_name, 2098 ginfo: GeneralizerInfo, 2099 is_filtered, 2100): 2101 assert ginfo.is_analyze() 2102 check_label_format = "{} %s-LABEL: '%s%s%s%s'".format(comment_marker) 2103 global_vars_seen_dict = {} 2104 return add_checks( 2105 output_lines, 2106 comment_marker, 2107 prefix_list, 2108 func_dict, 2109 func_name, 2110 check_label_format, 2111 ginfo, 2112 global_vars_seen_dict, 2113 is_filtered, 2114 ) 2115 2116 2117def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes, ginfo): 2118 for nameless_value in ginfo.get_nameless_values(): 2119 if nameless_value.global_ir_rhs_regexp is None: 2120 continue 2121 2122 lhs_re_str = nameless_value.ir_prefix + nameless_value.ir_regexp 2123 rhs_re_str = nameless_value.global_ir_rhs_regexp 2124 2125 global_ir_value_re_str = r"^" + lhs_re_str + r"\s=\s" + rhs_re_str + r"$" 2126 global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M)) 2127 lines = [] 2128 for m in global_ir_value_re.finditer(raw_tool_output): 2129 # Attach the substring's start index so that CHECK lines 2130 # can be sorted properly even if they are matched by different nameless values. 2131 # This is relevant for GLOB and GLOBNAMED since they may appear interlaced. 2132 lines.append((m.start(), m.group(0))) 2133 2134 for prefix in prefixes: 2135 if glob_val_dict[prefix] is None: 2136 continue 2137 if nameless_value.check_prefix in glob_val_dict[prefix]: 2138 if lines == glob_val_dict[prefix][nameless_value.check_prefix]: 2139 continue 2140 if prefix == prefixes[-1]: 2141 warn("Found conflicting asm under the same prefix: %r!" % (prefix,)) 2142 else: 2143 glob_val_dict[prefix][nameless_value.check_prefix] = None 2144 continue 2145 glob_val_dict[prefix][nameless_value.check_prefix] = lines 2146 2147 2148def filter_globals_according_to_preference( 2149 global_val_lines_w_index, global_vars_seen, nameless_value, global_check_setting 2150): 2151 if global_check_setting == "none": 2152 return [] 2153 if global_check_setting == "all": 2154 return global_val_lines_w_index 2155 assert global_check_setting == "smart" 2156 2157 if nameless_value.check_key == "#": 2158 # attribute sets are usually better checked by --check-attributes 2159 return [] 2160 2161 def extract(line, nv): 2162 p = ( 2163 "^" 2164 + nv.ir_prefix 2165 + "(" 2166 + nv.ir_regexp 2167 + ") = (" 2168 + nv.global_ir_rhs_regexp 2169 + ")" 2170 ) 2171 match = re.match(p, line) 2172 return (match.group(1), re.findall(nv.ir_regexp, match.group(2))) 2173 2174 transitively_visible = set() 2175 contains_refs_to = {} 2176 2177 def add(var): 2178 nonlocal transitively_visible 2179 nonlocal contains_refs_to 2180 if var in transitively_visible: 2181 return 2182 transitively_visible.add(var) 2183 if not var in contains_refs_to: 2184 return 2185 for x in contains_refs_to[var]: 2186 add(x) 2187 2188 for i, line in global_val_lines_w_index: 2189 (var, refs) = extract(line, nameless_value) 2190 contains_refs_to[var] = refs 2191 for var, check_key in global_vars_seen: 2192 if check_key != nameless_value.check_key: 2193 continue 2194 add(var) 2195 return [ 2196 (i, line) 2197 for i, line in global_val_lines_w_index 2198 if extract(line, nameless_value)[0] in transitively_visible 2199 ] 2200 2201 2202METADATA_FILTERS = [ 2203 ( 2204 r"(?<=\")(.+ )?(\w+ version )[\d.]+(?:[^\" ]*)(?: \([^)]+\))?", 2205 r"{{.*}}\2{{.*}}", 2206 ), # preface with glob also, to capture optional CLANG_VENDOR 2207 (r'(!DIFile\(filename: ".+", directory: )".+"', r"\1{{.*}}"), 2208] 2209METADATA_FILTERS_RE = [(re.compile(f), r) for (f, r) in METADATA_FILTERS] 2210 2211 2212def filter_unstable_metadata(line): 2213 for f, replacement in METADATA_FILTERS_RE: 2214 line = f.sub(replacement, line) 2215 return line 2216 2217 2218def flush_current_checks(output_lines, new_lines_w_index, comment_marker): 2219 if not new_lines_w_index: 2220 return 2221 output_lines.append(comment_marker + SEPARATOR) 2222 new_lines_w_index.sort() 2223 for _, line in new_lines_w_index: 2224 output_lines.append(line) 2225 new_lines_w_index.clear() 2226 2227 2228def add_global_checks( 2229 glob_val_dict, 2230 comment_marker, 2231 prefix_list, 2232 output_lines, 2233 ginfo: GeneralizerInfo, 2234 global_vars_seen_dict, 2235 preserve_names, 2236 is_before_functions, 2237 global_check_setting, 2238): 2239 printed_prefixes = set() 2240 output_lines_loc = {} # Allows GLOB and GLOBNAMED to be sorted correctly 2241 for nameless_value in ginfo.get_nameless_values(): 2242 if nameless_value.global_ir_rhs_regexp is None: 2243 continue 2244 if nameless_value.is_before_functions != is_before_functions: 2245 continue 2246 for p in prefix_list: 2247 global_vars_seen = {} 2248 checkprefixes = p[0] 2249 if checkprefixes is None: 2250 continue 2251 for checkprefix in checkprefixes: 2252 if checkprefix in global_vars_seen_dict: 2253 global_vars_seen.update(global_vars_seen_dict[checkprefix]) 2254 else: 2255 global_vars_seen_dict[checkprefix] = {} 2256 if (checkprefix, nameless_value.check_prefix) in printed_prefixes: 2257 break 2258 if not glob_val_dict[checkprefix]: 2259 continue 2260 if nameless_value.check_prefix not in glob_val_dict[checkprefix]: 2261 continue 2262 if not glob_val_dict[checkprefix][nameless_value.check_prefix]: 2263 continue 2264 2265 check_lines = [] 2266 global_vars_seen_before = [key for key in global_vars_seen.keys()] 2267 lines_w_index = glob_val_dict[checkprefix][nameless_value.check_prefix] 2268 lines_w_index = filter_globals_according_to_preference( 2269 lines_w_index, 2270 global_vars_seen_before, 2271 nameless_value, 2272 global_check_setting, 2273 ) 2274 for i, line in lines_w_index: 2275 if _global_value_regex: 2276 matched = False 2277 for regex in _global_value_regex: 2278 if re.match("^@" + regex + " = ", line) or re.match( 2279 "^!" + regex + " = ", line 2280 ): 2281 matched = True 2282 break 2283 if not matched: 2284 continue 2285 [new_line] = generalize_check_lines( 2286 [line], 2287 ginfo, 2288 {}, 2289 global_vars_seen, 2290 preserve_names, 2291 unstable_globals_only=True, 2292 ) 2293 new_line = filter_unstable_metadata(new_line) 2294 check_line = "%s %s: %s" % (comment_marker, checkprefix, new_line) 2295 check_lines.append((i, check_line)) 2296 if not check_lines: 2297 continue 2298 2299 if not checkprefix in output_lines_loc: 2300 output_lines_loc[checkprefix] = [] 2301 if not nameless_value.interlaced_with_previous: 2302 flush_current_checks( 2303 output_lines, output_lines_loc[checkprefix], comment_marker 2304 ) 2305 for check_line in check_lines: 2306 output_lines_loc[checkprefix].append(check_line) 2307 2308 printed_prefixes.add((checkprefix, nameless_value.check_prefix)) 2309 2310 # Remembe new global variables we have not seen before 2311 for key in global_vars_seen: 2312 if key not in global_vars_seen_before: 2313 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 2314 break 2315 2316 if printed_prefixes: 2317 for p in prefix_list: 2318 if p[0] is None: 2319 continue 2320 for checkprefix in p[0]: 2321 if checkprefix not in output_lines_loc: 2322 continue 2323 flush_current_checks( 2324 output_lines, output_lines_loc[checkprefix], comment_marker 2325 ) 2326 break 2327 output_lines.append(comment_marker + SEPARATOR) 2328 return printed_prefixes 2329 2330 2331def check_prefix(prefix): 2332 if not PREFIX_RE.match(prefix): 2333 hint = "" 2334 if "," in prefix: 2335 hint = " Did you mean '--check-prefixes=" + prefix + "'?" 2336 warn( 2337 ( 2338 "Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." 2339 + hint 2340 ) 2341 % (prefix) 2342 ) 2343 2344 2345def get_check_prefixes(filecheck_cmd): 2346 check_prefixes = [ 2347 item 2348 for m in CHECK_PREFIX_RE.finditer(filecheck_cmd) 2349 for item in m.group(1).split(",") 2350 ] 2351 if not check_prefixes: 2352 check_prefixes = ["CHECK"] 2353 return check_prefixes 2354 2355 2356def verify_filecheck_prefixes(fc_cmd): 2357 fc_cmd_parts = fc_cmd.split() 2358 for part in fc_cmd_parts: 2359 if "check-prefix=" in part: 2360 prefix = part.split("=", 1)[1] 2361 check_prefix(prefix) 2362 elif "check-prefixes=" in part: 2363 prefixes = part.split("=", 1)[1].split(",") 2364 for prefix in prefixes: 2365 check_prefix(prefix) 2366 if prefixes.count(prefix) > 1: 2367 warn( 2368 "Supplied prefix '%s' is not unique in the prefix list." 2369 % (prefix,) 2370 ) 2371 2372 2373def get_autogennote_suffix(parser, args): 2374 autogenerated_note_args = "" 2375 for action in parser._actions: 2376 if not hasattr(args, action.dest): 2377 continue # Ignore options such as --help that aren't included in args 2378 # Ignore parameters such as paths to the binary or the list of tests 2379 if action.dest in ( 2380 "tests", 2381 "update_only", 2382 "tool_binary", 2383 "opt_binary", 2384 "llc_binary", 2385 "clang", 2386 "opt", 2387 "llvm_bin", 2388 "verbose", 2389 "force_update", 2390 "reset_variable_names", 2391 ): 2392 continue 2393 value = getattr(args, action.dest) 2394 if action.dest == "check_globals": 2395 default_value = "none" if args.version < 4 else "smart" 2396 if value == default_value: 2397 continue 2398 autogenerated_note_args += action.option_strings[0] + " " 2399 if args.version < 4 and value == "all": 2400 continue 2401 autogenerated_note_args += "%s " % value 2402 continue 2403 if action.const is not None: # action stores a constant (usually True/False) 2404 # Skip actions with different constant values (this happens with boolean 2405 # --foo/--no-foo options) 2406 if value != action.const: 2407 continue 2408 if parser.get_default(action.dest) == value: 2409 continue # Don't add default values 2410 if action.dest == "function_signature" and args.version >= 2: 2411 continue # Enabled by default in version 2 2412 if action.dest == "filters": 2413 # Create a separate option for each filter element. The value is a list 2414 # of Filter objects. 2415 for elem in value: 2416 opt_name = "filter-out" if elem.is_filter_out else "filter" 2417 opt_value = elem.pattern() 2418 new_arg = '--%s "%s" ' % (opt_name, opt_value.strip('"')) 2419 if new_arg not in autogenerated_note_args: 2420 autogenerated_note_args += new_arg 2421 else: 2422 autogenerated_note_args += action.option_strings[0] + " " 2423 if action.const is None: # action takes a parameter 2424 if action.nargs == "+": 2425 value = " ".join(map(lambda v: '"' + v.strip('"') + '"', value)) 2426 autogenerated_note_args += "%s " % value 2427 if autogenerated_note_args: 2428 autogenerated_note_args = " %s %s" % ( 2429 UTC_ARGS_KEY, 2430 autogenerated_note_args[:-1], 2431 ) 2432 return autogenerated_note_args 2433 2434 2435def check_for_command(line, parser, args, argv, argparse_callback): 2436 cmd_m = UTC_ARGS_CMD.match(line) 2437 if cmd_m: 2438 for option in shlex.split(cmd_m.group("cmd").strip()): 2439 if option: 2440 argv.append(option) 2441 args = parse_args(parser, filter(lambda arg: arg not in args.tests, argv)) 2442 if argparse_callback is not None: 2443 argparse_callback(args) 2444 return args, argv 2445 2446 2447def find_arg_in_test(test_info, get_arg_to_check, arg_string, is_global): 2448 result = get_arg_to_check(test_info.args) 2449 if not result and is_global: 2450 # See if this has been specified via UTC_ARGS. This is a "global" option 2451 # that affects the entire generation of test checks. If it exists anywhere 2452 # in the test, apply it to everything. 2453 saw_line = False 2454 for line_info in test_info.ro_iterlines(): 2455 line = line_info.line 2456 if not line.startswith(";") and line.strip() != "": 2457 saw_line = True 2458 result = get_arg_to_check(line_info.args) 2459 if result: 2460 if warn and saw_line: 2461 # We saw the option after already reading some test input lines. 2462 # Warn about it. 2463 print( 2464 "WARNING: Found {} in line following test start: ".format( 2465 arg_string 2466 ) 2467 + line, 2468 file=sys.stderr, 2469 ) 2470 print( 2471 "WARNING: Consider moving {} to top of file".format(arg_string), 2472 file=sys.stderr, 2473 ) 2474 break 2475 return result 2476 2477 2478def dump_input_lines(output_lines, test_info, prefix_set, comment_string): 2479 for input_line_info in test_info.iterlines(output_lines): 2480 line = input_line_info.line 2481 args = input_line_info.args 2482 if line.strip() == comment_string: 2483 continue 2484 if line.strip() == comment_string + SEPARATOR: 2485 continue 2486 if line.lstrip().startswith(comment_string): 2487 m = CHECK_RE.match(line) 2488 if m and m.group(1) in prefix_set: 2489 continue 2490 output_lines.append(line.rstrip("\n")) 2491 2492 2493def add_checks_at_end( 2494 output_lines, prefix_list, func_order, comment_string, check_generator 2495): 2496 added = set() 2497 generated_prefixes = set() 2498 for prefix in prefix_list: 2499 prefixes = prefix[0] 2500 tool_args = prefix[1] 2501 for prefix in prefixes: 2502 for func in func_order[prefix]: 2503 # The func order can contain the same functions multiple times. 2504 # If we see one again we are done. 2505 if (func, prefix) in added: 2506 continue 2507 if added: 2508 output_lines.append(comment_string) 2509 2510 # The add_*_checks routines expect a run list whose items are 2511 # tuples that have a list of prefixes as their first element and 2512 # tool command args string as their second element. They output 2513 # checks for each prefix in the list of prefixes. By doing so, it 2514 # implicitly assumes that for each function every run line will 2515 # generate something for that function. That is not the case for 2516 # generated functions as some run lines might not generate them 2517 # (e.g. -fopenmp vs. no -fopenmp). 2518 # 2519 # Therefore, pass just the prefix we're interested in. This has 2520 # the effect of generating all of the checks for functions of a 2521 # single prefix before moving on to the next prefix. So checks 2522 # are ordered by prefix instead of by function as in "normal" 2523 # mode. 2524 for generated_prefix in check_generator( 2525 output_lines, [([prefix], tool_args)], func 2526 ): 2527 added.add((func, generated_prefix)) 2528 generated_prefixes.add(generated_prefix) 2529 return generated_prefixes 2530