1from __future__ import print_function 2 3import argparse 4import bisect 5import collections 6import copy 7import glob 8import itertools 9import os 10import re 11import subprocess 12import sys 13import shlex 14 15from typing import List, Mapping, Set 16 17##### Common utilities for update_*test_checks.py 18 19 20_verbose = False 21_prefix_filecheck_ir_name = "" 22 23""" 24Version changelog: 25 261: Initial version, used by tests that don't specify --version explicitly. 272: --function-signature is now enabled by default and also checks return 28 type/attributes. 293: Opening parenthesis of function args is kept on the first LABEL line 30 in case arguments are split to a separate SAME line. 314: --check-globals now has a third option ('smart'). The others are now called 32 'none' and 'all'. 'smart' is the default. 335: Basic block labels are matched by FileCheck expressions 34""" 35DEFAULT_VERSION = 5 36 37 38SUPPORTED_ANALYSES = { 39 "Branch Probability Analysis", 40 "Cost Model Analysis", 41 "Loop Access Analysis", 42 "Scalar Evolution Analysis", 43} 44 45 46class Regex(object): 47 """Wrap a compiled regular expression object to allow deep copy of a regexp. 48 This is required for the deep copy done in do_scrub. 49 50 """ 51 52 def __init__(self, regex): 53 self.regex = regex 54 55 def __deepcopy__(self, memo): 56 result = copy.copy(self) 57 result.regex = self.regex 58 return result 59 60 def search(self, line): 61 return self.regex.search(line) 62 63 def sub(self, repl, line): 64 return self.regex.sub(repl, line) 65 66 def pattern(self): 67 return self.regex.pattern 68 69 def flags(self): 70 return self.regex.flags 71 72 73class Filter(Regex): 74 """Augment a Regex object with a flag indicating whether a match should be 75 added (!is_filter_out) or removed (is_filter_out) from the generated checks. 76 77 """ 78 79 def __init__(self, regex, is_filter_out): 80 super(Filter, self).__init__(regex) 81 self.is_filter_out = is_filter_out 82 83 def __deepcopy__(self, memo): 84 result = copy.deepcopy(super(Filter, self), memo) 85 result.is_filter_out = copy.deepcopy(self.is_filter_out, memo) 86 return result 87 88 89def parse_commandline_args(parser): 90 class RegexAction(argparse.Action): 91 """Add a regular expression option value to a list of regular expressions. 92 This compiles the expression, wraps it in a Regex and adds it to the option 93 value list.""" 94 95 def __init__(self, option_strings, dest, nargs=None, **kwargs): 96 if nargs is not None: 97 raise ValueError("nargs not allowed") 98 super(RegexAction, self).__init__(option_strings, dest, **kwargs) 99 100 def do_call(self, namespace, values, flags): 101 value_list = getattr(namespace, self.dest) 102 if value_list is None: 103 value_list = [] 104 105 try: 106 value_list.append(Regex(re.compile(values, flags))) 107 except re.error as error: 108 raise ValueError( 109 "{}: Invalid regular expression '{}' ({})".format( 110 option_string, error.pattern, error.msg 111 ) 112 ) 113 114 setattr(namespace, self.dest, value_list) 115 116 def __call__(self, parser, namespace, values, option_string=None): 117 self.do_call(namespace, values, 0) 118 119 class FilterAction(RegexAction): 120 """Add a filter to a list of filter option values.""" 121 122 def __init__(self, option_strings, dest, nargs=None, **kwargs): 123 super(FilterAction, self).__init__(option_strings, dest, nargs, **kwargs) 124 125 def __call__(self, parser, namespace, values, option_string=None): 126 super(FilterAction, self).__call__(parser, namespace, values, option_string) 127 128 value_list = getattr(namespace, self.dest) 129 130 is_filter_out = option_string == "--filter-out" 131 132 value_list[-1] = Filter(value_list[-1].regex, is_filter_out) 133 134 setattr(namespace, self.dest, value_list) 135 136 filter_group = parser.add_argument_group( 137 "filtering", 138 """Filters are applied to each output line according to the order given. The 139 first matching filter terminates filter processing for that current line.""", 140 ) 141 142 filter_group.add_argument( 143 "--filter", 144 action=FilterAction, 145 dest="filters", 146 metavar="REGEX", 147 help="Only include lines matching REGEX (may be specified multiple times)", 148 ) 149 filter_group.add_argument( 150 "--filter-out", 151 action=FilterAction, 152 dest="filters", 153 metavar="REGEX", 154 help="Exclude lines matching REGEX", 155 ) 156 157 parser.add_argument( 158 "--include-generated-funcs", 159 action="store_true", 160 help="Output checks for functions not in source", 161 ) 162 parser.add_argument( 163 "-v", "--verbose", action="store_true", help="Show verbose output" 164 ) 165 parser.add_argument( 166 "-u", 167 "--update-only", 168 action="store_true", 169 help="Only update test if it was already autogened", 170 ) 171 parser.add_argument( 172 "--force-update", 173 action="store_true", 174 help="Update test even if it was autogened by a different script", 175 ) 176 parser.add_argument( 177 "--enable", 178 action="store_true", 179 dest="enabled", 180 default=True, 181 help="Activate CHECK line generation from this point forward", 182 ) 183 parser.add_argument( 184 "--disable", 185 action="store_false", 186 dest="enabled", 187 help="Deactivate CHECK line generation from this point forward", 188 ) 189 parser.add_argument( 190 "--replace-value-regex", 191 nargs="+", 192 default=[], 193 help="List of regular expressions to replace matching value names", 194 ) 195 parser.add_argument( 196 "--prefix-filecheck-ir-name", 197 default="", 198 help="Add a prefix to FileCheck IR value names to avoid conflicts with scripted names", 199 ) 200 parser.add_argument( 201 "--global-value-regex", 202 nargs="+", 203 default=[], 204 help="List of regular expressions that a global value declaration must match to generate a check (has no effect if checking globals is not enabled)", 205 ) 206 parser.add_argument( 207 "--global-hex-value-regex", 208 nargs="+", 209 default=[], 210 help="List of regular expressions such that, for matching global value declarations, literal integer values should be encoded in hex in the associated FileCheck directives", 211 ) 212 # FIXME: in 3.9, we can use argparse.BooleanOptionalAction. At that point, 213 # we need to rename the flag to just -generate-body-for-unused-prefixes. 214 parser.add_argument( 215 "--no-generate-body-for-unused-prefixes", 216 action="store_false", 217 dest="gen_unused_prefix_body", 218 default=True, 219 help="Generate a function body that always matches for unused prefixes. This is useful when unused prefixes are desired, and it avoids needing to annotate each FileCheck as allowing them.", 220 ) 221 # This is the default when regenerating existing tests. The default when 222 # generating new tests is determined by DEFAULT_VERSION. 223 parser.add_argument( 224 "--version", type=int, default=1, help="The version of output format" 225 ) 226 args = parser.parse_args() 227 # TODO: This should not be handled differently from the other options 228 global _verbose, _global_value_regex, _global_hex_value_regex 229 _verbose = args.verbose 230 _global_value_regex = args.global_value_regex 231 _global_hex_value_regex = args.global_hex_value_regex 232 return args 233 234 235def parse_args(parser, argv): 236 args = parser.parse_args(argv) 237 if args.version >= 2: 238 args.function_signature = True 239 # TODO: This should not be handled differently from the other options 240 global _verbose, _global_value_regex, _global_hex_value_regex 241 _verbose = args.verbose 242 _global_value_regex = args.global_value_regex 243 _global_hex_value_regex = args.global_hex_value_regex 244 if "check_globals" in args and args.check_globals == "default": 245 args.check_globals = "none" if args.version < 4 else "smart" 246 return args 247 248 249class InputLineInfo(object): 250 def __init__(self, line, line_number, args, argv): 251 self.line = line 252 self.line_number = line_number 253 self.args = args 254 self.argv = argv 255 256 257class TestInfo(object): 258 def __init__( 259 self, 260 test, 261 parser, 262 script_name, 263 input_lines, 264 args, 265 argv, 266 comment_prefix, 267 argparse_callback, 268 ): 269 self.parser = parser 270 self.argparse_callback = argparse_callback 271 self.path = test 272 self.args = args 273 if args.prefix_filecheck_ir_name: 274 global _prefix_filecheck_ir_name 275 _prefix_filecheck_ir_name = args.prefix_filecheck_ir_name 276 self.argv = argv 277 self.input_lines = input_lines 278 self.run_lines = find_run_lines(test, self.input_lines) 279 self.comment_prefix = comment_prefix 280 if self.comment_prefix is None: 281 if self.path.endswith(".mir"): 282 self.comment_prefix = "#" 283 else: 284 self.comment_prefix = ";" 285 self.autogenerated_note_prefix = self.comment_prefix + " " + UTC_ADVERT 286 self.test_autogenerated_note = self.autogenerated_note_prefix + script_name 287 self.test_autogenerated_note += get_autogennote_suffix(parser, self.args) 288 self.test_unused_note = ( 289 self.comment_prefix + self.comment_prefix + " " + UNUSED_NOTE 290 ) 291 292 def ro_iterlines(self): 293 for line_num, input_line in enumerate(self.input_lines): 294 args, argv = check_for_command( 295 input_line, self.parser, self.args, self.argv, self.argparse_callback 296 ) 297 yield InputLineInfo(input_line, line_num, args, argv) 298 299 def iterlines(self, output_lines): 300 output_lines.append(self.test_autogenerated_note) 301 for line_info in self.ro_iterlines(): 302 input_line = line_info.line 303 # Discard any previous script advertising. 304 if input_line.startswith(self.autogenerated_note_prefix): 305 continue 306 self.args = line_info.args 307 self.argv = line_info.argv 308 if not self.args.enabled: 309 output_lines.append(input_line) 310 continue 311 yield line_info 312 313 def get_checks_for_unused_prefixes( 314 self, run_list, used_prefixes: List[str] 315 ) -> List[str]: 316 run_list = [element for element in run_list if element[0] is not None] 317 unused_prefixes = set( 318 [prefix for sublist in run_list for prefix in sublist[0]] 319 ).difference(set(used_prefixes)) 320 321 ret = [] 322 if not unused_prefixes: 323 return ret 324 ret.append(self.test_unused_note) 325 for unused in sorted(unused_prefixes): 326 ret.append( 327 "{comment} {prefix}: {match_everything}".format( 328 comment=self.comment_prefix, 329 prefix=unused, 330 match_everything=r"""{{.*}}""", 331 ) 332 ) 333 return ret 334 335 336def itertests( 337 test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None 338): 339 for pattern in test_patterns: 340 # On Windows we must expand the patterns ourselves. 341 tests_list = glob.glob(pattern) 342 if not tests_list: 343 warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,)) 344 continue 345 for test in tests_list: 346 with open(test) as f: 347 input_lines = [l.rstrip() for l in f] 348 first_line = input_lines[0] if input_lines else "" 349 if UTC_AVOID in first_line: 350 warn("Skipping test that must not be autogenerated: " + test) 351 continue 352 is_regenerate = UTC_ADVERT in first_line 353 354 # If we're generating a new test, set the default version to the latest. 355 argv = sys.argv[:] 356 if not is_regenerate: 357 argv.insert(1, "--version=" + str(DEFAULT_VERSION)) 358 359 args = parse_args(parser, argv[1:]) 360 if argparse_callback is not None: 361 argparse_callback(args) 362 if is_regenerate: 363 if script_name not in first_line and not args.force_update: 364 warn( 365 "Skipping test which wasn't autogenerated by " + script_name, 366 test, 367 ) 368 continue 369 args, argv = check_for_command( 370 first_line, parser, args, argv, argparse_callback 371 ) 372 elif args.update_only: 373 assert UTC_ADVERT not in first_line 374 warn("Skipping test which isn't autogenerated: " + test) 375 continue 376 final_input_lines = [] 377 for l in input_lines: 378 if UNUSED_NOTE in l: 379 break 380 final_input_lines.append(l) 381 yield TestInfo( 382 test, 383 parser, 384 script_name, 385 final_input_lines, 386 args, 387 argv, 388 comment_prefix, 389 argparse_callback, 390 ) 391 392 393def should_add_line_to_output( 394 input_line, 395 prefix_set, 396 *, 397 skip_global_checks=False, 398 skip_same_checks=False, 399 comment_marker=";", 400): 401 # Skip any blank comment lines in the IR. 402 if not skip_global_checks and input_line.strip() == comment_marker: 403 return False 404 # Skip a special double comment line we use as a separator. 405 if input_line.strip() == comment_marker + SEPARATOR: 406 return False 407 # Skip any blank lines in the IR. 408 # if input_line.strip() == '': 409 # return False 410 # And skip any CHECK lines. We're building our own. 411 m = CHECK_RE.match(input_line) 412 if m and m.group(1) in prefix_set: 413 if skip_same_checks and CHECK_SAME_RE.match(input_line): 414 # The previous CHECK line was removed, so don't leave this dangling 415 return False 416 if skip_global_checks: 417 # Skip checks only if they are of global value definitions 418 global_ir_value_re = re.compile(r"(\[\[|@)", flags=(re.M)) 419 is_global = global_ir_value_re.search(input_line) 420 return not is_global 421 return False 422 423 return True 424 425 426def collect_original_check_lines(ti: TestInfo, prefix_set: set): 427 """ 428 Collect pre-existing check lines into a dictionary `result` which is 429 returned. 430 431 result[func_name][prefix] is filled with a list of right-hand-sides of check 432 lines. 433 """ 434 result = collections.defaultdict(lambda: {}) 435 436 current_prefix = None 437 current_function = None 438 for input_line_info in ti.ro_iterlines(): 439 input_line = input_line_info.line 440 if input_line.lstrip().startswith(";"): 441 m = CHECK_RE.match(input_line) 442 if m is not None: 443 prefix = m.group(1) 444 check_kind = m.group(2) 445 line = input_line[m.end() :].strip() 446 447 if prefix != current_prefix: 448 current_function = None 449 current_prefix = None 450 451 if check_kind not in ["LABEL", "SAME"]: 452 if current_function is not None: 453 current_function.append(line) 454 continue 455 456 if check_kind == "SAME": 457 continue 458 459 if check_kind == "LABEL": 460 m = IR_FUNCTION_RE.match(line) 461 if m is not None: 462 func_name = m.group(1) 463 if ( 464 ti.args.function is not None 465 and func_name != ti.args.function 466 ): 467 # When filtering on a specific function, skip all others. 468 continue 469 470 current_prefix = prefix 471 current_function = result[func_name][prefix] = [] 472 continue 473 474 current_function = None 475 476 return result 477 478 479# Perform lit-like substitutions 480def getSubstitutions(sourcepath): 481 sourcedir = os.path.dirname(sourcepath) 482 return [ 483 ("%s", sourcepath), 484 ("%S", sourcedir), 485 ("%p", sourcedir), 486 ("%{pathsep}", os.pathsep), 487 ] 488 489 490def applySubstitutions(s, substitutions): 491 for a, b in substitutions: 492 s = s.replace(a, b) 493 return s 494 495 496# Invoke the tool that is being tested. 497def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False): 498 with open(ir) as ir_file: 499 substitutions = getSubstitutions(ir) 500 501 # TODO Remove the str form which is used by update_test_checks.py and 502 # update_llc_test_checks.py 503 # The safer list form is used by update_cc_test_checks.py 504 if preprocess_cmd: 505 # Allow pre-processing the IR file (e.g. using sed): 506 assert isinstance( 507 preprocess_cmd, str 508 ) # TODO: use a list instead of using shell 509 preprocess_cmd = applySubstitutions(preprocess_cmd, substitutions).strip() 510 if verbose: 511 print( 512 "Pre-processing input file: ", 513 ir, 514 " with command '", 515 preprocess_cmd, 516 "'", 517 sep="", 518 file=sys.stderr, 519 ) 520 # Python 2.7 doesn't have subprocess.DEVNULL: 521 with open(os.devnull, "w") as devnull: 522 pp = subprocess.Popen( 523 preprocess_cmd, shell=True, stdin=devnull, stdout=subprocess.PIPE 524 ) 525 ir_file = pp.stdout 526 527 if isinstance(cmd_args, list): 528 args = [applySubstitutions(a, substitutions) for a in cmd_args] 529 stdout = subprocess.check_output([exe] + args, stdin=ir_file) 530 else: 531 stdout = subprocess.check_output( 532 exe + " " + applySubstitutions(cmd_args, substitutions), 533 shell=True, 534 stdin=ir_file, 535 ) 536 if sys.version_info[0] > 2: 537 # FYI, if you crashed here with a decode error, your run line probably 538 # results in bitcode or other binary format being written to the pipe. 539 # For an opt test, you probably want to add -S or -disable-output. 540 stdout = stdout.decode() 541 # Fix line endings to unix CR style. 542 return stdout.replace("\r\n", "\n") 543 544 545##### LLVM IR parser 546RUN_LINE_RE = re.compile(r"^\s*(?://|[;#])\s*RUN:\s*(.*)$") 547CHECK_PREFIX_RE = re.compile(r"--?check-prefix(?:es)?[= ](\S+)") 548PREFIX_RE = re.compile("^[a-zA-Z0-9_-]+$") 549CHECK_RE = re.compile( 550 r"^\s*(?://|[;#])\s*([^:]+?)(?:-(NEXT|NOT|DAG|LABEL|SAME|EMPTY))?:" 551) 552CHECK_SAME_RE = re.compile(r"^\s*(?://|[;#])\s*([^:]+?)(?:-SAME)?:") 553 554UTC_ARGS_KEY = "UTC_ARGS:" 555UTC_ARGS_CMD = re.compile(r".*" + UTC_ARGS_KEY + r"\s*(?P<cmd>.*)\s*$") 556UTC_ADVERT = "NOTE: Assertions have been autogenerated by " 557UTC_AVOID = "NOTE: Do not autogenerate" 558UNUSED_NOTE = "NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:" 559 560OPT_FUNCTION_RE = re.compile( 561 r"^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s():,]+?))?\s*define\s+(?P<funcdef_attrs_and_ret>[^@]*)@(?P<func>[\w.$-]+?)\s*" 562 r"(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P<body>.*?)^\}$", 563 flags=(re.M | re.S), 564) 565 566ANALYZE_FUNCTION_RE = re.compile( 567 r"^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.$-]+?)\':" 568 r"\s*\n(?P<body>.*)$", 569 flags=(re.X | re.S), 570) 571 572LOOP_PASS_DEBUG_RE = re.compile( 573 r"^\s*\'(?P<func>[\w.$-]+?)\'[^\n]*" r"\s*\n(?P<body>.*)$", flags=(re.X | re.S) 574) 575 576IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(') 577TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$') 578TRIPLE_ARG_RE = re.compile(r"-mtriple[= ]([^ ]+)") 579MARCH_ARG_RE = re.compile(r"-march[= ]([^ ]+)") 580DEBUG_ONLY_ARG_RE = re.compile(r"-debug-only[= ]([^ ]+)") 581 582SCRUB_LEADING_WHITESPACE_RE = re.compile(r"^(\s+)") 583SCRUB_WHITESPACE_RE = re.compile(r"(?!^(| \w))[ \t]+", flags=re.M) 584SCRUB_PRESERVE_LEADING_WHITESPACE_RE = re.compile(r"((?!^)[ \t]*(\S))[ \t]+") 585SCRUB_TRAILING_WHITESPACE_RE = re.compile(r"[ \t]+$", flags=re.M) 586SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE 587SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile( 588 r"([ \t]|(#[0-9]+))+$", flags=re.M 589) 590SCRUB_KILL_COMMENT_RE = re.compile(r"^ *#+ +kill:.*\n") 591SCRUB_LOOP_COMMENT_RE = re.compile( 592 r"# =>This Inner Loop Header:.*|# in Loop:.*", flags=re.M 593) 594SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r"(?<=\S)+[ \t]*#$", flags=re.M) 595 596SEPARATOR = "." 597 598 599def error(msg, test_file=None): 600 if test_file: 601 msg = "{}: {}".format(msg, test_file) 602 print("ERROR: {}".format(msg), file=sys.stderr) 603 604 605def warn(msg, test_file=None): 606 if test_file: 607 msg = "{}: {}".format(msg, test_file) 608 print("WARNING: {}".format(msg), file=sys.stderr) 609 610 611def debug(*args, **kwargs): 612 # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs): 613 if "file" not in kwargs: 614 kwargs["file"] = sys.stderr 615 if _verbose: 616 print(*args, **kwargs) 617 618 619def find_run_lines(test, lines): 620 debug("Scanning for RUN lines in test file:", test) 621 raw_lines = [m.group(1) for m in [RUN_LINE_RE.match(l) for l in lines] if m] 622 run_lines = [raw_lines[0]] if len(raw_lines) > 0 else [] 623 for l in raw_lines[1:]: 624 if run_lines[-1].endswith("\\"): 625 run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l 626 else: 627 run_lines.append(l) 628 debug("Found {} RUN lines in {}:".format(len(run_lines), test)) 629 for l in run_lines: 630 debug(" RUN: {}".format(l)) 631 return run_lines 632 633 634def get_triple_from_march(march): 635 triples = { 636 "amdgcn": "amdgcn", 637 "r600": "r600", 638 "mips": "mips", 639 "sparc": "sparc", 640 "hexagon": "hexagon", 641 "ve": "ve", 642 } 643 for prefix, triple in triples.items(): 644 if march.startswith(prefix): 645 return triple 646 print("Cannot find a triple. Assume 'x86'", file=sys.stderr) 647 return "x86" 648 649 650def apply_filters(line, filters): 651 has_filter = False 652 for f in filters: 653 if not f.is_filter_out: 654 has_filter = True 655 if f.search(line): 656 return False if f.is_filter_out else True 657 # If we only used filter-out, keep the line, otherwise discard it since no 658 # filter matched. 659 return False if has_filter else True 660 661 662def do_filter(body, filters): 663 return ( 664 body 665 if not filters 666 else "\n".join( 667 filter(lambda line: apply_filters(line, filters), body.splitlines()) 668 ) 669 ) 670 671 672def scrub_body(body): 673 # Scrub runs of whitespace out of the assembly, but leave the leading 674 # whitespace in place. 675 body = SCRUB_PRESERVE_LEADING_WHITESPACE_RE.sub(lambda m: m.group(2) + " ", body) 676 677 # Expand the tabs used for indentation. 678 body = str.expandtabs(body, 2) 679 # Strip trailing whitespace. 680 body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r"", body) 681 return body 682 683 684def do_scrub(body, scrubber, scrubber_args, extra): 685 if scrubber_args: 686 local_args = copy.deepcopy(scrubber_args) 687 local_args[0].extra_scrub = extra 688 return scrubber(body, *local_args) 689 return scrubber(body, *scrubber_args) 690 691 692# Build up a dictionary of all the function bodies. 693class function_body(object): 694 def __init__( 695 self, 696 string, 697 extra, 698 funcdef_attrs_and_ret, 699 args_and_sig, 700 attrs, 701 func_name_separator, 702 ginfo, 703 ): 704 self.scrub = string 705 self.extrascrub = extra 706 self.funcdef_attrs_and_ret = funcdef_attrs_and_ret 707 self.args_and_sig = args_and_sig 708 self.attrs = attrs 709 self.func_name_separator = func_name_separator 710 self._ginfo = ginfo 711 712 def is_same_except_arg_names( 713 self, extrascrub, funcdef_attrs_and_ret, args_and_sig, attrs 714 ): 715 arg_names = set() 716 717 def drop_arg_names(match): 718 nameless_value = self._ginfo.get_nameless_value_from_match(match) 719 if nameless_value.check_key == "%": 720 arg_names.add(self._ginfo.get_name_from_match(match)) 721 substitute = "" 722 else: 723 substitute = match.group(2) 724 return match.group(1) + substitute + match.group(match.lastindex) 725 726 def repl_arg_names(match): 727 nameless_value = self._ginfo.get_nameless_value_from_match(match) 728 if ( 729 nameless_value.check_key == "%" 730 and self._ginfo.get_name_from_match(match) in arg_names 731 ): 732 return match.group(1) + match.group(match.lastindex) 733 return match.group(1) + match.group(2) + match.group(match.lastindex) 734 735 if self.funcdef_attrs_and_ret != funcdef_attrs_and_ret: 736 return False 737 if self.attrs != attrs: 738 return False 739 740 regexp = self._ginfo.get_regexp() 741 ans0 = regexp.sub(drop_arg_names, self.args_and_sig) 742 ans1 = regexp.sub(drop_arg_names, args_and_sig) 743 if ans0 != ans1: 744 return False 745 if self._ginfo.is_asm(): 746 # Check without replacements, the replacements are not applied to the 747 # body for backend checks. 748 return self.extrascrub == extrascrub 749 750 es0 = regexp.sub(repl_arg_names, self.extrascrub) 751 es1 = regexp.sub(repl_arg_names, extrascrub) 752 es0 = SCRUB_IR_COMMENT_RE.sub(r"", es0) 753 es1 = SCRUB_IR_COMMENT_RE.sub(r"", es1) 754 return es0 == es1 755 756 def __str__(self): 757 return self.scrub 758 759 760class FunctionTestBuilder: 761 def __init__(self, run_list, flags, scrubber_args, path, ginfo): 762 self._verbose = flags.verbose 763 self._record_args = flags.function_signature 764 self._check_attributes = flags.check_attributes 765 # Strip double-quotes if input was read by UTC_ARGS 766 self._filters = ( 767 list( 768 map( 769 lambda f: Filter( 770 re.compile(f.pattern().strip('"'), f.flags()), f.is_filter_out 771 ), 772 flags.filters, 773 ) 774 ) 775 if flags.filters 776 else [] 777 ) 778 self._scrubber_args = scrubber_args 779 self._path = path 780 self._ginfo = ginfo 781 # Strip double-quotes if input was read by UTC_ARGS 782 self._replace_value_regex = list( 783 map(lambda x: x.strip('"'), flags.replace_value_regex) 784 ) 785 self._func_dict = {} 786 self._func_order = {} 787 self._global_var_dict = {} 788 self._processed_prefixes = set() 789 for tuple in run_list: 790 for prefix in tuple[0]: 791 self._func_dict.update({prefix: dict()}) 792 self._func_order.update({prefix: []}) 793 self._global_var_dict.update({prefix: dict()}) 794 795 def finish_and_get_func_dict(self): 796 for prefix in self.get_failed_prefixes(): 797 warn( 798 "Prefix %s had conflicting output from different RUN lines for all functions in test %s" 799 % ( 800 prefix, 801 self._path, 802 ) 803 ) 804 return self._func_dict 805 806 def func_order(self): 807 return self._func_order 808 809 def global_var_dict(self): 810 return self._global_var_dict 811 812 def is_filtered(self): 813 return bool(self._filters) 814 815 def process_run_line(self, function_re, scrubber, raw_tool_output, prefixes): 816 build_global_values_dictionary( 817 self._global_var_dict, raw_tool_output, prefixes, self._ginfo 818 ) 819 for m in function_re.finditer(raw_tool_output): 820 if not m: 821 continue 822 func = m.group("func") 823 body = m.group("body") 824 # func_name_separator is the string that is placed right after function name at the 825 # beginning of assembly function definition. In most assemblies, that is just a 826 # colon: `foo:`. But, for example, in nvptx it is a brace: `foo(`. If is_backend is 827 # False, just assume that separator is an empty string. 828 if self._ginfo.is_asm(): 829 # Use ':' as default separator. 830 func_name_separator = ( 831 m.group("func_name_separator") 832 if "func_name_separator" in m.groupdict() 833 else ":" 834 ) 835 else: 836 func_name_separator = "" 837 attrs = m.group("attrs") if self._check_attributes else "" 838 funcdef_attrs_and_ret = ( 839 m.group("funcdef_attrs_and_ret") if self._record_args else "" 840 ) 841 # Determine if we print arguments, the opening brace, or nothing after the 842 # function name 843 if self._record_args and "args_and_sig" in m.groupdict(): 844 args_and_sig = scrub_body(m.group("args_and_sig").strip()) 845 elif "args_and_sig" in m.groupdict(): 846 args_and_sig = "(" 847 else: 848 args_and_sig = "" 849 filtered_body = do_filter(body, self._filters) 850 scrubbed_body = do_scrub( 851 filtered_body, scrubber, self._scrubber_args, extra=False 852 ) 853 scrubbed_extra = do_scrub( 854 filtered_body, scrubber, self._scrubber_args, extra=True 855 ) 856 if "analysis" in m.groupdict(): 857 analysis = m.group("analysis") 858 if analysis not in SUPPORTED_ANALYSES: 859 warn("Unsupported analysis mode: %r!" % (analysis,)) 860 if func.startswith("stress"): 861 # We only use the last line of the function body for stress tests. 862 scrubbed_body = "\n".join(scrubbed_body.splitlines()[-1:]) 863 if self._verbose: 864 print("Processing function: " + func, file=sys.stderr) 865 for l in scrubbed_body.splitlines(): 866 print(" " + l, file=sys.stderr) 867 for prefix in prefixes: 868 # Replace function names matching the regex. 869 for regex in self._replace_value_regex: 870 # Pattern that matches capture groups in the regex in leftmost order. 871 group_regex = re.compile(r"\(.*?\)") 872 # Replace function name with regex. 873 match = re.match(regex, func) 874 if match: 875 func_repl = regex 876 # Replace any capture groups with their matched strings. 877 for g in match.groups(): 878 func_repl = group_regex.sub( 879 re.escape(g), func_repl, count=1 880 ) 881 func = re.sub(func_repl, "{{" + func_repl + "}}", func) 882 883 # Replace all calls to regex matching functions. 884 matches = re.finditer(regex, scrubbed_body) 885 for match in matches: 886 func_repl = regex 887 # Replace any capture groups with their matched strings. 888 for g in match.groups(): 889 func_repl = group_regex.sub( 890 re.escape(g), func_repl, count=1 891 ) 892 # Substitute function call names that match the regex with the same 893 # capture groups set. 894 scrubbed_body = re.sub( 895 func_repl, "{{" + func_repl + "}}", scrubbed_body 896 ) 897 898 if func in self._func_dict[prefix]: 899 if self._func_dict[prefix][func] is not None and ( 900 str(self._func_dict[prefix][func]) != scrubbed_body 901 or self._func_dict[prefix][func].args_and_sig != args_and_sig 902 or self._func_dict[prefix][func].attrs != attrs 903 or self._func_dict[prefix][func].funcdef_attrs_and_ret 904 != funcdef_attrs_and_ret 905 ): 906 if self._func_dict[prefix][func].is_same_except_arg_names( 907 scrubbed_extra, 908 funcdef_attrs_and_ret, 909 args_and_sig, 910 attrs, 911 ): 912 self._func_dict[prefix][func].scrub = scrubbed_extra 913 self._func_dict[prefix][func].args_and_sig = args_and_sig 914 else: 915 # This means a previous RUN line produced a body for this function 916 # that is different from the one produced by this current RUN line, 917 # so the body can't be common across RUN lines. We use None to 918 # indicate that. 919 self._func_dict[prefix][func] = None 920 else: 921 if prefix not in self._processed_prefixes: 922 self._func_dict[prefix][func] = function_body( 923 scrubbed_body, 924 scrubbed_extra, 925 funcdef_attrs_and_ret, 926 args_and_sig, 927 attrs, 928 func_name_separator, 929 self._ginfo, 930 ) 931 self._func_order[prefix].append(func) 932 else: 933 # An earlier RUN line used this check prefixes but didn't produce 934 # a body for this function. This happens in Clang tests that use 935 # preprocesser directives to exclude individual functions from some 936 # RUN lines. 937 self._func_dict[prefix][func] = None 938 939 def processed_prefixes(self, prefixes): 940 """ 941 Mark a set of prefixes as having had at least one applicable RUN line fully 942 processed. This is used to filter out function bodies that don't have 943 outputs for all RUN lines. 944 """ 945 self._processed_prefixes.update(prefixes) 946 947 def get_failed_prefixes(self): 948 # This returns the list of those prefixes that failed to match any function, 949 # because there were conflicting bodies produced by different RUN lines, in 950 # all instances of the prefix. 951 for prefix in self._func_dict: 952 if self._func_dict[prefix] and ( 953 not [ 954 fct 955 for fct in self._func_dict[prefix] 956 if self._func_dict[prefix][fct] is not None 957 ] 958 ): 959 yield prefix 960 961 962##### Generator of LLVM IR CHECK lines 963 964SCRUB_IR_COMMENT_RE = re.compile(r"\s*;.*") 965 966# TODO: We should also derive check lines for global, debug, loop declarations, etc.. 967 968 969class NamelessValue: 970 """ 971 A NamelessValue object represents a type of value in the IR whose "name" we 972 generalize in the generated check lines; where the "name" could be an actual 973 name (as in e.g. `@some_global` or `%x`) or just a number (as in e.g. `%12` 974 or `!4`). 975 """ 976 977 def __init__( 978 self, 979 check_prefix, 980 check_key, 981 ir_prefix, 982 ir_regexp, 983 global_ir_rhs_regexp, 984 *, 985 is_before_functions=False, 986 is_number=False, 987 replace_number_with_counter=False, 988 match_literally=False, 989 interlaced_with_previous=False, 990 ir_suffix=r"", 991 ): 992 self.check_prefix = check_prefix 993 self.check_key = check_key 994 self.ir_prefix = ir_prefix 995 self.ir_regexp = ir_regexp 996 self.ir_suffix = ir_suffix 997 self.global_ir_rhs_regexp = global_ir_rhs_regexp 998 self.is_before_functions = is_before_functions 999 self.is_number = is_number 1000 # Some variable numbers (e.g. MCINST1234) will change based on unrelated 1001 # modifications to LLVM, replace those with an incrementing counter. 1002 self.replace_number_with_counter = replace_number_with_counter 1003 self.match_literally = match_literally 1004 self.interlaced_with_previous = interlaced_with_previous 1005 self.variable_mapping = {} 1006 1007 # Return true if this kind of IR value is defined "locally" to functions, 1008 # which we assume is only the case precisely for LLVM IR local values. 1009 def is_local_def_ir_value(self): 1010 return self.check_key == "%" 1011 1012 # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals 1013 def get_ir_regex(self): 1014 # for backwards compatibility we check locals with '.*' 1015 if self.is_local_def_ir_value(): 1016 return ".*" 1017 return self.ir_regexp 1018 1019 # Create a FileCheck variable name based on an IR name. 1020 def get_value_name(self, var: str, check_prefix: str): 1021 var = var.replace("!", "") 1022 if self.replace_number_with_counter: 1023 assert var 1024 replacement = self.variable_mapping.get(var, None) 1025 if replacement is None: 1026 # Replace variable with an incrementing counter 1027 replacement = str(len(self.variable_mapping) + 1) 1028 self.variable_mapping[var] = replacement 1029 var = replacement 1030 # This is a nameless value, prepend check_prefix. 1031 if var.isdigit(): 1032 var = check_prefix + var 1033 else: 1034 # This is a named value that clashes with the check_prefix, prepend with 1035 # _prefix_filecheck_ir_name, if it has been defined. 1036 if ( 1037 may_clash_with_default_check_prefix_name(check_prefix, var) 1038 and _prefix_filecheck_ir_name 1039 ): 1040 var = _prefix_filecheck_ir_name + var 1041 var = var.replace(".", "_") 1042 var = var.replace("-", "_") 1043 return var.upper() 1044 1045 def get_affixes_from_match(self, match): 1046 prefix = re.match(self.ir_prefix, match.group(2)).group(0) 1047 suffix = re.search(self.ir_suffix + "$", match.group(2)).group(0) 1048 return prefix, suffix 1049 1050 1051class GeneralizerInfo: 1052 """ 1053 A GeneralizerInfo object holds information about how check lines should be generalized 1054 (e.g., variable names replaced by FileCheck meta variables) as well as per-test-file 1055 state (e.g. information about IR global variables). 1056 """ 1057 1058 MODE_IR = 0 1059 MODE_ASM = 1 1060 MODE_ANALYZE = 2 1061 1062 def __init__( 1063 self, 1064 version, 1065 mode, 1066 nameless_values: List[NamelessValue], 1067 regexp_prefix, 1068 regexp_suffix, 1069 ): 1070 self._version = version 1071 self._mode = mode 1072 self._nameless_values = nameless_values 1073 1074 self._regexp_prefix = regexp_prefix 1075 self._regexp_suffix = regexp_suffix 1076 1077 self._regexp, _ = self._build_regexp(False, False) 1078 ( 1079 self._unstable_globals_regexp, 1080 self._unstable_globals_values, 1081 ) = self._build_regexp(True, True) 1082 1083 def _build_regexp(self, globals_only, unstable_only): 1084 matches = [] 1085 values = [] 1086 for nameless_value in self._nameless_values: 1087 is_global = nameless_value.global_ir_rhs_regexp is not None 1088 if globals_only and not is_global: 1089 continue 1090 if unstable_only and nameless_value.match_literally: 1091 continue 1092 1093 match = f"(?:{nameless_value.ir_prefix}({nameless_value.ir_regexp}){nameless_value.ir_suffix})" 1094 if self.is_ir() and not globals_only and is_global: 1095 match = "^" + match 1096 matches.append(match) 1097 values.append(nameless_value) 1098 1099 regexp_string = r"|".join(matches) 1100 1101 return ( 1102 re.compile( 1103 self._regexp_prefix + r"(" + regexp_string + r")" + self._regexp_suffix 1104 ), 1105 values, 1106 ) 1107 1108 def get_version(self): 1109 return self._version 1110 1111 def is_ir(self): 1112 return self._mode == GeneralizerInfo.MODE_IR 1113 1114 def is_asm(self): 1115 return self._mode == GeneralizerInfo.MODE_ASM 1116 1117 def is_analyze(self): 1118 return self._mode == GeneralizerInfo.MODE_ANALYZE 1119 1120 def get_nameless_values(self): 1121 return self._nameless_values 1122 1123 def get_regexp(self): 1124 return self._regexp 1125 1126 def get_unstable_globals_regexp(self): 1127 return self._unstable_globals_regexp 1128 1129 # The entire match is group 0, the prefix has one group (=1), the entire 1130 # IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start. 1131 FIRST_NAMELESS_GROUP_IN_MATCH = 3 1132 1133 def get_match_info(self, match): 1134 """ 1135 Returns (name, nameless_value) for the given match object 1136 """ 1137 if match.re == self._regexp: 1138 values = self._nameless_values 1139 else: 1140 match.re == self._unstable_globals_regexp 1141 values = self._unstable_globals_values 1142 for i in range(len(values)): 1143 g = match.group(i + GeneralizerInfo.FIRST_NAMELESS_GROUP_IN_MATCH) 1144 if g is not None: 1145 return g, values[i] 1146 error("Unable to identify the kind of IR value from the match!") 1147 return None, None 1148 1149 # See get_idx_from_match 1150 def get_name_from_match(self, match): 1151 return self.get_match_info(match)[0] 1152 1153 def get_nameless_value_from_match(self, match) -> NamelessValue: 1154 return self.get_match_info(match)[1] 1155 1156 1157def make_ir_generalizer(version): 1158 values = [] 1159 1160 if version >= 5: 1161 values += [ 1162 NamelessValue(r"BB", "%", r"label %", r"[\w$.-]+?", None), 1163 NamelessValue(r"BB", "%", r"^", r"[\w$.-]+?", None, ir_suffix=r":"), 1164 ] 1165 1166 values += [ 1167 # check_prefix check_key ir_prefix ir_regexp global_ir_rhs_regexp 1168 NamelessValue(r"TMP", "%", r"%", r"[\w$.-]+?", None), 1169 NamelessValue(r"ATTR", "#", r"#", r"[0-9]+", None), 1170 NamelessValue(r"ATTR", "#", r"attributes #", r"[0-9]+", r"{[^}]*}"), 1171 NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", None), 1172 NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", r".+", is_before_functions=True), 1173 NamelessValue( 1174 r"GLOBNAMED", 1175 "@", 1176 r"@", 1177 r"[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*", 1178 r".+", 1179 is_before_functions=True, 1180 match_literally=True, 1181 interlaced_with_previous=True, 1182 ), 1183 NamelessValue(r"DBG", "!", r"!dbg ", r"![0-9]+", None), 1184 NamelessValue(r"DIASSIGNID", "!", r"!DIAssignID ", r"![0-9]+", None), 1185 NamelessValue(r"PROF", "!", r"!prof ", r"![0-9]+", None), 1186 NamelessValue(r"TBAA", "!", r"!tbaa ", r"![0-9]+", None), 1187 NamelessValue(r"TBAA_STRUCT", "!", r"!tbaa.struct ", r"![0-9]+", None), 1188 NamelessValue(r"RNG", "!", r"!range ", r"![0-9]+", None), 1189 NamelessValue(r"LOOP", "!", r"!llvm.loop ", r"![0-9]+", None), 1190 NamelessValue(r"META", "!", r"", r"![0-9]+", r"(?:distinct |)!.*"), 1191 NamelessValue(r"ACC_GRP", "!", r"!llvm.access.group ", r"![0-9]+", None), 1192 NamelessValue(r"META", "!", r"![a-z.]+ ", r"![0-9]+", None), 1193 NamelessValue(r"META", "!", r"[, (]", r"![0-9]+", None), 1194 ] 1195 1196 prefix = r"(\s*)" 1197 suffix = r"([,\s\(\)\}]|\Z)" 1198 1199 # values = [ 1200 # nameless_value 1201 # for nameless_value in IR_NAMELESS_VALUES 1202 # if not (globals_only and nameless_value.global_ir_rhs_regexp is None) and 1203 # not (unstable_ids_only and nameless_value.match_literally) 1204 # ] 1205 1206 return GeneralizerInfo(version, GeneralizerInfo.MODE_IR, values, prefix, suffix) 1207 1208 1209def make_asm_generalizer(version): 1210 values = [ 1211 NamelessValue( 1212 r"MCINST", 1213 "Inst#", 1214 "<MCInst #", 1215 r"\d+", 1216 r".+", 1217 is_number=True, 1218 replace_number_with_counter=True, 1219 ), 1220 NamelessValue( 1221 r"MCREG", 1222 "Reg:", 1223 "<MCOperand Reg:", 1224 r"\d+", 1225 r".+", 1226 is_number=True, 1227 replace_number_with_counter=True, 1228 ), 1229 ] 1230 1231 prefix = r"((?:#|//)\s*)" 1232 suffix = r"([>\s]|\Z)" 1233 1234 return GeneralizerInfo(version, GeneralizerInfo.MODE_ASM, values, prefix, suffix) 1235 1236 1237def make_analyze_generalizer(version): 1238 values = [ 1239 NamelessValue( 1240 r"GRP", 1241 "#", 1242 r"", 1243 r"0x[0-9a-f]+", 1244 None, 1245 replace_number_with_counter=True, 1246 ), 1247 ] 1248 1249 prefix = r"(\s*)" 1250 suffix = r"(\)?:)" 1251 1252 return GeneralizerInfo( 1253 version, GeneralizerInfo.MODE_ANALYZE, values, prefix, suffix 1254 ) 1255 1256 1257# Return true if var clashes with the scripted FileCheck check_prefix. 1258def may_clash_with_default_check_prefix_name(check_prefix, var): 1259 return check_prefix and re.match( 1260 r"^" + check_prefix + r"[0-9]+?$", var, re.IGNORECASE 1261 ) 1262 1263 1264def find_diff_matching(lhs: List[str], rhs: List[str]) -> List[tuple]: 1265 """ 1266 Find a large ordered matching between strings in lhs and rhs. 1267 1268 Think of this as finding the *unchanged* lines in a diff, where the entries 1269 of lhs and rhs are lines of the files being diffed. 1270 1271 Returns a list of matched (lhs_idx, rhs_idx) pairs. 1272 """ 1273 1274 if not lhs or not rhs: 1275 return [] 1276 1277 # Collect matches in reverse order. 1278 matches = [] 1279 1280 # First, collect a set of candidate matching edges. We limit this to a 1281 # constant multiple of the input size to avoid quadratic runtime. 1282 patterns = collections.defaultdict(lambda: ([], [])) 1283 1284 for idx in range(len(lhs)): 1285 patterns[lhs[idx]][0].append(idx) 1286 for idx in range(len(rhs)): 1287 patterns[rhs[idx]][1].append(idx) 1288 1289 multiple_patterns = [] 1290 1291 candidates = [] 1292 for pattern in patterns.values(): 1293 if not pattern[0] or not pattern[1]: 1294 continue 1295 1296 if len(pattern[0]) == len(pattern[1]) == 1: 1297 candidates.append((pattern[0][0], pattern[1][0])) 1298 else: 1299 multiple_patterns.append(pattern) 1300 1301 multiple_patterns.sort(key=lambda pattern: len(pattern[0]) * len(pattern[1])) 1302 1303 for pattern in multiple_patterns: 1304 if len(candidates) + len(pattern[0]) * len(pattern[1]) > 2 * ( 1305 len(lhs) + len(rhs) 1306 ): 1307 break 1308 for lhs_idx in pattern[0]: 1309 for rhs_idx in pattern[1]: 1310 candidates.append((lhs_idx, rhs_idx)) 1311 1312 if not candidates: 1313 # The LHS and RHS either share nothing in common, or lines are just too 1314 # identical. In that case, let's give up and not match anything. 1315 return [] 1316 1317 # Compute a maximal crossing-free matching via an algorithm that is 1318 # inspired by a mixture of dynamic programming and line-sweeping in 1319 # discrete geometry. 1320 # 1321 # I would be surprised if this algorithm didn't exist somewhere in the 1322 # literature, but I found it without consciously recalling any 1323 # references, so you'll have to make do with the explanation below. 1324 # Sorry. 1325 # 1326 # The underlying graph is bipartite: 1327 # - nodes on the LHS represent lines in the original check 1328 # - nodes on the RHS represent lines in the new (updated) check 1329 # 1330 # Nodes are implicitly sorted by the corresponding line number. 1331 # Edges (unique_matches) are sorted by the line number on the LHS. 1332 # 1333 # Here's the geometric intuition for the algorithm. 1334 # 1335 # * Plot the edges as points in the plane, with the original line 1336 # number on the X axis and the updated line number on the Y axis. 1337 # * The goal is to find a longest "chain" of points where each point 1338 # is strictly above and to the right of the previous point. 1339 # * The algorithm proceeds by sweeping a vertical line from left to 1340 # right. 1341 # * The algorithm maintains a table where `table[N]` answers the 1342 # question "What is currently the 'best' way to build a chain of N+1 1343 # points to the left of the vertical line". Here, 'best' means 1344 # that the last point of the chain is a as low as possible (minimal 1345 # Y coordinate). 1346 # * `table[N]` is `(y, point_idx)` where `point_idx` is the index of 1347 # the last point in the chain and `y` is its Y coordinate 1348 # * A key invariant is that the Y values in the table are 1349 # monotonically increasing 1350 # * Thanks to these properties, the table can be used to answer the 1351 # question "What is the longest chain that can be built to the left 1352 # of the vertical line using only points below a certain Y value", 1353 # using a binary search over the table. 1354 # * The algorithm also builds a backlink structure in which every point 1355 # links back to the previous point on a best (longest) chain ending 1356 # at that point 1357 # 1358 # The core loop of the algorithm sweeps the line and updates the table 1359 # and backlink structure for every point that we cross during the sweep. 1360 # Therefore, the algorithm is trivially O(M log M) in the number of 1361 # points. 1362 candidates.sort(key=lambda candidate: (candidate[0], -candidate[1])) 1363 1364 backlinks = [] 1365 table_rhs_idx = [] 1366 table_candidate_idx = [] 1367 for _, rhs_idx in candidates: 1368 candidate_idx = len(backlinks) 1369 ti = bisect.bisect_left(table_rhs_idx, rhs_idx) 1370 1371 # Update the table to record a best chain ending in the current point. 1372 # There always is one, and if any of the previously visited points had 1373 # a higher Y coordinate, then there is always a previously recorded best 1374 # chain that can be improved upon by using the current point. 1375 # 1376 # There is only one case where there is some ambiguity. If the 1377 # pre-existing entry table[ti] has the same Y coordinate / rhs_idx as 1378 # the current point (this can only happen if the same line appeared 1379 # multiple times on the LHS), then we could choose to keep the 1380 # previously recorded best chain instead. That would bias the algorithm 1381 # differently but should have no systematic impact on the quality of the 1382 # result. 1383 if ti < len(table_rhs_idx): 1384 table_rhs_idx[ti] = rhs_idx 1385 table_candidate_idx[ti] = candidate_idx 1386 else: 1387 table_rhs_idx.append(rhs_idx) 1388 table_candidate_idx.append(candidate_idx) 1389 if ti > 0: 1390 backlinks.append(table_candidate_idx[ti - 1]) 1391 else: 1392 backlinks.append(None) 1393 1394 # Commit to names in the matching by walking the backlinks. Recursively 1395 # attempt to fill in more matches in-betweem. 1396 match_idx = table_candidate_idx[-1] 1397 while match_idx is not None: 1398 current = candidates[match_idx] 1399 matches.append(current) 1400 match_idx = backlinks[match_idx] 1401 1402 matches.reverse() 1403 return matches 1404 1405 1406VARIABLE_TAG = "[[@@]]" 1407METAVAR_RE = re.compile(r"\[\[([A-Z0-9_]+)(?::[^]]+)?\]\]") 1408NUMERIC_SUFFIX_RE = re.compile(r"[0-9]*$") 1409 1410 1411class TestVar: 1412 def __init__(self, nameless_value: NamelessValue, prefix: str, suffix: str): 1413 self._nameless_value = nameless_value 1414 1415 self._prefix = prefix 1416 self._suffix = suffix 1417 1418 def seen(self, nameless_value: NamelessValue, prefix: str, suffix: str): 1419 if prefix != self._prefix: 1420 self._prefix = "" 1421 if suffix != self._suffix: 1422 self._suffix = "" 1423 1424 def get_variable_name(self, text): 1425 return self._nameless_value.get_value_name( 1426 text, self._nameless_value.check_prefix 1427 ) 1428 1429 def get_def(self, name, prefix, suffix): 1430 if self._nameless_value.is_number: 1431 return f"{prefix}[[#{name}:]]{suffix}" 1432 if self._prefix: 1433 assert self._prefix == prefix 1434 prefix = "" 1435 if self._suffix: 1436 assert self._suffix == suffix 1437 suffix = "" 1438 return f"{prefix}[[{name}:{self._prefix}{self._nameless_value.get_ir_regex()}{self._suffix}]]{suffix}" 1439 1440 def get_use(self, name, prefix, suffix): 1441 if self._nameless_value.is_number: 1442 return f"{prefix}[[#{name}]]{suffix}" 1443 if self._prefix: 1444 assert self._prefix == prefix 1445 prefix = "" 1446 if self._suffix: 1447 assert self._suffix == suffix 1448 suffix = "" 1449 return f"{prefix}[[{name}]]{suffix}" 1450 1451 1452class CheckValueInfo: 1453 def __init__( 1454 self, 1455 key, 1456 text, 1457 name: str, 1458 prefix: str, 1459 suffix: str, 1460 ): 1461 # Key for the value, e.g. '%' 1462 self.key = key 1463 1464 # Text to be matched by the FileCheck variable (without any prefix or suffix) 1465 self.text = text 1466 1467 # Name of the FileCheck variable 1468 self.name = name 1469 1470 # Prefix and suffix that were captured by the NamelessValue regular expression 1471 self.prefix = prefix 1472 self.suffix = suffix 1473 1474 1475# Represent a check line in a way that allows us to compare check lines while 1476# ignoring some or all of the FileCheck variable names. 1477class CheckLineInfo: 1478 def __init__(self, line, values): 1479 # Line with all FileCheck variable name occurrences replaced by VARIABLE_TAG 1480 self.line: str = line 1481 1482 # Information on each FileCheck variable name occurrences in the line 1483 self.values: List[CheckValueInfo] = values 1484 1485 def __repr__(self): 1486 return f"CheckLineInfo(line={self.line}, self.values={self.values})" 1487 1488 1489def remap_metavar_names( 1490 old_line_infos: List[CheckLineInfo], 1491 new_line_infos: List[CheckLineInfo], 1492 committed_names: Set[str], 1493) -> Mapping[str, str]: 1494 """ 1495 Map all FileCheck variable names that appear in new_line_infos to new 1496 FileCheck variable names in an attempt to reduce the diff from old_line_infos 1497 to new_line_infos. 1498 1499 This is done by: 1500 * Matching old check lines and new check lines using a diffing algorithm 1501 applied after replacing names with wildcards. 1502 * Committing to variable names such that the matched lines become equal 1503 (without wildcards) if possible 1504 * This is done recursively to handle cases where many lines are equal 1505 after wildcard replacement 1506 """ 1507 # Initialize uncommitted identity mappings 1508 new_mapping = {} 1509 for line in new_line_infos: 1510 for value in line.values: 1511 new_mapping[value.name] = value.name 1512 1513 # Recursively commit to the identity mapping or find a better one 1514 def recurse(old_begin, old_end, new_begin, new_end): 1515 if old_begin == old_end or new_begin == new_end: 1516 return 1517 1518 # Find a matching of lines where uncommitted names are replaced 1519 # with a placeholder. 1520 def diffify_line(line, mapper): 1521 values = [] 1522 for value in line.values: 1523 mapped = mapper(value.name) 1524 values.append(mapped if mapped in committed_names else "?") 1525 return line.line.strip() + " @@@ " + " @ ".join(values) 1526 1527 lhs_lines = [ 1528 diffify_line(line, lambda x: x) 1529 for line in old_line_infos[old_begin:old_end] 1530 ] 1531 rhs_lines = [ 1532 diffify_line(line, lambda x: new_mapping[x]) 1533 for line in new_line_infos[new_begin:new_end] 1534 ] 1535 1536 candidate_matches = find_diff_matching(lhs_lines, rhs_lines) 1537 1538 # Apply commits greedily on a match-by-match basis 1539 matches = [(-1, -1)] 1540 committed_anything = False 1541 for lhs_idx, rhs_idx in candidate_matches: 1542 lhs_line = old_line_infos[lhs_idx] 1543 rhs_line = new_line_infos[rhs_idx] 1544 1545 local_commits = {} 1546 1547 for lhs_value, rhs_value in zip(lhs_line.values, rhs_line.values): 1548 if new_mapping[rhs_value.name] in committed_names: 1549 # The new value has already been committed. If it was mapped 1550 # to the same name as the original value, we can consider 1551 # committing other values from this line. Otherwise, we 1552 # should ignore this line. 1553 if new_mapping[rhs_value.name] == lhs_value.name: 1554 continue 1555 else: 1556 break 1557 1558 if rhs_value.name in local_commits: 1559 # Same, but for a possible commit happening on the same line 1560 if local_commits[rhs_value.name] == lhs_value.name: 1561 continue 1562 else: 1563 break 1564 1565 if lhs_value.name in committed_names: 1566 # We can't map this value because the name we would map it to has already been 1567 # committed for something else. Give up on this line. 1568 break 1569 1570 local_commits[rhs_value.name] = lhs_value.name 1571 else: 1572 # No reason not to add any commitments for this line 1573 for rhs_var, lhs_var in local_commits.items(): 1574 new_mapping[rhs_var] = lhs_var 1575 committed_names.add(lhs_var) 1576 committed_anything = True 1577 1578 if ( 1579 lhs_var != rhs_var 1580 and lhs_var in new_mapping 1581 and new_mapping[lhs_var] == lhs_var 1582 ): 1583 new_mapping[lhs_var] = "conflict_" + lhs_var 1584 1585 matches.append((lhs_idx, rhs_idx)) 1586 1587 matches.append((old_end, new_end)) 1588 1589 # Recursively handle sequences between matches 1590 if committed_anything: 1591 for (lhs_prev, rhs_prev), (lhs_next, rhs_next) in zip(matches, matches[1:]): 1592 recurse(lhs_prev + 1, lhs_next, rhs_prev + 1, rhs_next) 1593 1594 recurse(0, len(old_line_infos), 0, len(new_line_infos)) 1595 1596 # Commit to remaining names and resolve conflicts 1597 for new_name, mapped_name in new_mapping.items(): 1598 if mapped_name in committed_names: 1599 continue 1600 if not mapped_name.startswith("conflict_"): 1601 assert mapped_name == new_name 1602 committed_names.add(mapped_name) 1603 1604 for new_name, mapped_name in new_mapping.items(): 1605 if mapped_name in committed_names: 1606 continue 1607 assert mapped_name.startswith("conflict_") 1608 1609 m = NUMERIC_SUFFIX_RE.search(new_name) 1610 base_name = new_name[: m.start()] 1611 suffix = int(new_name[m.start() :]) if m.start() != m.end() else 1 1612 while True: 1613 candidate = f"{base_name}{suffix}" 1614 if candidate not in committed_names: 1615 new_mapping[new_name] = candidate 1616 committed_names.add(candidate) 1617 break 1618 suffix += 1 1619 1620 return new_mapping 1621 1622 1623def generalize_check_lines( 1624 lines, 1625 ginfo: GeneralizerInfo, 1626 vars_seen, 1627 global_vars_seen, 1628 preserve_names=False, 1629 original_check_lines=None, 1630 *, 1631 unstable_globals_only=False, 1632): 1633 if unstable_globals_only: 1634 regexp = ginfo.get_unstable_globals_regexp() 1635 else: 1636 regexp = ginfo.get_regexp() 1637 1638 multiple_braces_re = re.compile(r"({{+)|(}}+)") 1639 1640 def escape_braces(match_obj): 1641 return "{{" + re.escape(match_obj.group(0)) + "}}" 1642 1643 if ginfo.is_ir(): 1644 for i, line in enumerate(lines): 1645 # An IR variable named '%.' matches the FileCheck regex string. 1646 line = line.replace("%.", "%dot") 1647 for regex in _global_hex_value_regex: 1648 if re.match("^@" + regex + " = ", line): 1649 line = re.sub( 1650 r"\bi([0-9]+) ([0-9]+)", 1651 lambda m: "i" 1652 + m.group(1) 1653 + " [[#" 1654 + hex(int(m.group(2))) 1655 + "]]", 1656 line, 1657 ) 1658 break 1659 # Ignore any comments, since the check lines will too. 1660 scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line) 1661 lines[i] = scrubbed_line 1662 1663 if not preserve_names: 1664 committed_names = set( 1665 test_var.get_variable_name(name) 1666 for (name, _), test_var in vars_seen.items() 1667 ) 1668 defs = set() 1669 1670 # Collect information about new check lines, and generalize global reference 1671 new_line_infos = [] 1672 for line in lines: 1673 filtered_line = "" 1674 values = [] 1675 while True: 1676 m = regexp.search(line) 1677 if m is None: 1678 filtered_line += line 1679 break 1680 1681 name = ginfo.get_name_from_match(m) 1682 nameless_value = ginfo.get_nameless_value_from_match(m) 1683 prefix, suffix = nameless_value.get_affixes_from_match(m) 1684 if may_clash_with_default_check_prefix_name( 1685 nameless_value.check_prefix, name 1686 ): 1687 warn( 1688 "Change IR value name '%s' or use --prefix-filecheck-ir-name to prevent possible conflict" 1689 " with scripted FileCheck name." % (name,) 1690 ) 1691 1692 # Record the variable as seen and (for locals) accumulate 1693 # prefixes/suffixes 1694 is_local_def = nameless_value.is_local_def_ir_value() 1695 if is_local_def: 1696 vars_dict = vars_seen 1697 else: 1698 vars_dict = global_vars_seen 1699 1700 key = (name, nameless_value.check_key) 1701 1702 if is_local_def: 1703 test_prefix = prefix 1704 test_suffix = suffix 1705 else: 1706 test_prefix = "" 1707 test_suffix = "" 1708 1709 if key in vars_dict: 1710 vars_dict[key].seen(nameless_value, test_prefix, test_suffix) 1711 else: 1712 vars_dict[key] = TestVar(nameless_value, test_prefix, test_suffix) 1713 defs.add(key) 1714 1715 var = vars_dict[key].get_variable_name(name) 1716 1717 # Replace with a [[@@]] tag, but be sure to keep the spaces and commas. 1718 filtered_line += ( 1719 line[: m.start()] + m.group(1) + VARIABLE_TAG + m.group(m.lastindex) 1720 ) 1721 line = line[m.end() :] 1722 1723 values.append( 1724 CheckValueInfo( 1725 key=nameless_value.check_key, 1726 text=name, 1727 name=var, 1728 prefix=prefix, 1729 suffix=suffix, 1730 ) 1731 ) 1732 1733 new_line_infos.append(CheckLineInfo(filtered_line, values)) 1734 1735 committed_names.update( 1736 test_var.get_variable_name(name) 1737 for (name, _), test_var in global_vars_seen.items() 1738 ) 1739 1740 # Collect information about original check lines, if any. 1741 orig_line_infos = [] 1742 for line in original_check_lines or []: 1743 filtered_line = "" 1744 values = [] 1745 while True: 1746 m = METAVAR_RE.search(line) 1747 if m is None: 1748 filtered_line += line 1749 break 1750 1751 # Replace with a [[@@]] tag, but be sure to keep the spaces and commas. 1752 filtered_line += line[: m.start()] + VARIABLE_TAG 1753 line = line[m.end() :] 1754 values.append( 1755 CheckValueInfo( 1756 key=None, 1757 text=None, 1758 name=m.group(1), 1759 prefix="", 1760 suffix="", 1761 ) 1762 ) 1763 orig_line_infos.append(CheckLineInfo(filtered_line, values)) 1764 1765 # Compute the variable name mapping 1766 mapping = remap_metavar_names(orig_line_infos, new_line_infos, committed_names) 1767 1768 # Apply the variable name mapping 1769 for i, line_info in enumerate(new_line_infos): 1770 line_template = line_info.line 1771 line = "" 1772 1773 for value in line_info.values: 1774 idx = line_template.find(VARIABLE_TAG) 1775 line += line_template[:idx] 1776 line_template = line_template[idx + len(VARIABLE_TAG) :] 1777 1778 key = (value.text, value.key) 1779 if value.key == "%": 1780 vars_dict = vars_seen 1781 else: 1782 vars_dict = global_vars_seen 1783 1784 if key in defs: 1785 line += vars_dict[key].get_def( 1786 mapping[value.name], value.prefix, value.suffix 1787 ) 1788 defs.remove(key) 1789 else: 1790 line += vars_dict[key].get_use( 1791 mapping[value.name], value.prefix, value.suffix 1792 ) 1793 1794 line += line_template 1795 1796 lines[i] = line 1797 1798 if ginfo.is_analyze(): 1799 for i, _ in enumerate(lines): 1800 # Escape multiple {{ or }} as {{}} denotes a FileCheck regex. 1801 scrubbed_line = multiple_braces_re.sub(escape_braces, lines[i]) 1802 lines[i] = scrubbed_line 1803 1804 return lines 1805 1806 1807def add_checks( 1808 output_lines, 1809 comment_marker, 1810 prefix_list, 1811 func_dict, 1812 func_name, 1813 check_label_format, 1814 ginfo, 1815 global_vars_seen_dict, 1816 is_filtered, 1817 preserve_names=False, 1818 original_check_lines: Mapping[str, List[str]] = {}, 1819): 1820 # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well. 1821 prefix_exclusions = set() 1822 printed_prefixes = [] 1823 for p in prefix_list: 1824 checkprefixes = p[0] 1825 # If not all checkprefixes of this run line produced the function we cannot check for it as it does not 1826 # exist for this run line. A subset of the check prefixes might know about the function but only because 1827 # other run lines created it. 1828 if any( 1829 map( 1830 lambda checkprefix: func_name not in func_dict[checkprefix], 1831 checkprefixes, 1832 ) 1833 ): 1834 prefix_exclusions |= set(checkprefixes) 1835 continue 1836 1837 # prefix_exclusions is constructed, we can now emit the output 1838 for p in prefix_list: 1839 global_vars_seen = {} 1840 checkprefixes = p[0] 1841 for checkprefix in checkprefixes: 1842 if checkprefix in global_vars_seen_dict: 1843 global_vars_seen.update(global_vars_seen_dict[checkprefix]) 1844 else: 1845 global_vars_seen_dict[checkprefix] = {} 1846 if checkprefix in printed_prefixes: 1847 break 1848 1849 # Check if the prefix is excluded. 1850 if checkprefix in prefix_exclusions: 1851 continue 1852 1853 # If we do not have output for this prefix we skip it. 1854 if not func_dict[checkprefix][func_name]: 1855 continue 1856 1857 # Add some space between different check prefixes, but not after the last 1858 # check line (before the test code). 1859 if ginfo.is_asm(): 1860 if len(printed_prefixes) != 0: 1861 output_lines.append(comment_marker) 1862 1863 if checkprefix not in global_vars_seen_dict: 1864 global_vars_seen_dict[checkprefix] = {} 1865 1866 global_vars_seen_before = [key for key in global_vars_seen.keys()] 1867 1868 vars_seen = {} 1869 printed_prefixes.append(checkprefix) 1870 attrs = str(func_dict[checkprefix][func_name].attrs) 1871 attrs = "" if attrs == "None" else attrs 1872 if ginfo.get_version() > 1: 1873 funcdef_attrs_and_ret = func_dict[checkprefix][ 1874 func_name 1875 ].funcdef_attrs_and_ret 1876 else: 1877 funcdef_attrs_and_ret = "" 1878 1879 if attrs: 1880 output_lines.append( 1881 "%s %s: Function Attrs: %s" % (comment_marker, checkprefix, attrs) 1882 ) 1883 args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig) 1884 if args_and_sig: 1885 args_and_sig = generalize_check_lines( 1886 [args_and_sig], 1887 ginfo, 1888 vars_seen, 1889 global_vars_seen, 1890 preserve_names, 1891 original_check_lines=[], 1892 )[0] 1893 func_name_separator = func_dict[checkprefix][func_name].func_name_separator 1894 if "[[" in args_and_sig: 1895 # Captures in label lines are not supported, thus split into a -LABEL 1896 # and a separate -SAME line that contains the arguments with captures. 1897 args_and_sig_prefix = "" 1898 if ginfo.get_version() >= 3 and args_and_sig.startswith("("): 1899 # Ensure the "(" separating function name and arguments is in the 1900 # label line. This is required in case of function names that are 1901 # prefixes of each other. Otherwise, the label line for "foo" might 1902 # incorrectly match on "foo.specialized". 1903 args_and_sig_prefix = args_and_sig[0] 1904 args_and_sig = args_and_sig[1:] 1905 1906 # Removing args_and_sig from the label match line requires 1907 # func_name_separator to be empty. Otherwise, the match will not work. 1908 assert func_name_separator == "" 1909 output_lines.append( 1910 check_label_format 1911 % ( 1912 checkprefix, 1913 funcdef_attrs_and_ret, 1914 func_name, 1915 args_and_sig_prefix, 1916 func_name_separator, 1917 ) 1918 ) 1919 output_lines.append( 1920 "%s %s-SAME: %s" % (comment_marker, checkprefix, args_and_sig) 1921 ) 1922 else: 1923 output_lines.append( 1924 check_label_format 1925 % ( 1926 checkprefix, 1927 funcdef_attrs_and_ret, 1928 func_name, 1929 args_and_sig, 1930 func_name_separator, 1931 ) 1932 ) 1933 func_body = str(func_dict[checkprefix][func_name]).splitlines() 1934 if not func_body: 1935 # We have filtered everything. 1936 continue 1937 1938 # For ASM output, just emit the check lines. 1939 if ginfo.is_asm(): 1940 body_start = 1 1941 if is_filtered: 1942 # For filtered output we don't add "-NEXT" so don't add extra spaces 1943 # before the first line. 1944 body_start = 0 1945 else: 1946 output_lines.append( 1947 "%s %s: %s" % (comment_marker, checkprefix, func_body[0]) 1948 ) 1949 func_lines = generalize_check_lines( 1950 func_body[body_start:], ginfo, vars_seen, global_vars_seen 1951 ) 1952 for func_line in func_lines: 1953 if func_line.strip() == "": 1954 output_lines.append( 1955 "%s %s-EMPTY:" % (comment_marker, checkprefix) 1956 ) 1957 else: 1958 check_suffix = "-NEXT" if not is_filtered else "" 1959 output_lines.append( 1960 "%s %s%s: %s" 1961 % (comment_marker, checkprefix, check_suffix, func_line) 1962 ) 1963 # Remember new global variables we have not seen before 1964 for key in global_vars_seen: 1965 if key not in global_vars_seen_before: 1966 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 1967 break 1968 # For analyze output, generalize the output, and emit CHECK-EMPTY lines as well. 1969 elif ginfo.is_analyze(): 1970 func_body = generalize_check_lines( 1971 func_body, ginfo, vars_seen, global_vars_seen 1972 ) 1973 for func_line in func_body: 1974 if func_line.strip() == "": 1975 output_lines.append( 1976 "{} {}-EMPTY:".format(comment_marker, checkprefix) 1977 ) 1978 else: 1979 check_suffix = "-NEXT" if not is_filtered else "" 1980 output_lines.append( 1981 "{} {}{}: {}".format( 1982 comment_marker, checkprefix, check_suffix, func_line 1983 ) 1984 ) 1985 1986 # Add space between different check prefixes and also before the first 1987 # line of code in the test function. 1988 output_lines.append(comment_marker) 1989 1990 # Remember new global variables we have not seen before 1991 for key in global_vars_seen: 1992 if key not in global_vars_seen_before: 1993 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 1994 break 1995 # For IR output, change all defs to FileCheck variables, so we're immune 1996 # to variable naming fashions. 1997 else: 1998 func_body = generalize_check_lines( 1999 func_body, 2000 ginfo, 2001 vars_seen, 2002 global_vars_seen, 2003 preserve_names, 2004 original_check_lines=original_check_lines.get(checkprefix), 2005 ) 2006 2007 # This could be selectively enabled with an optional invocation argument. 2008 # Disabled for now: better to check everything. Be safe rather than sorry. 2009 2010 # Handle the first line of the function body as a special case because 2011 # it's often just noise (a useless asm comment or entry label). 2012 # if func_body[0].startswith("#") or func_body[0].startswith("entry:"): 2013 # is_blank_line = True 2014 # else: 2015 # output_lines.append('%s %s: %s' % (comment_marker, checkprefix, func_body[0])) 2016 # is_blank_line = False 2017 2018 is_blank_line = False 2019 2020 for func_line in func_body: 2021 if func_line.strip() == "": 2022 is_blank_line = True 2023 continue 2024 # Do not waste time checking IR comments. 2025 func_line = SCRUB_IR_COMMENT_RE.sub(r"", func_line) 2026 2027 # Skip blank lines instead of checking them. 2028 if is_blank_line: 2029 output_lines.append( 2030 "{} {}: {}".format( 2031 comment_marker, checkprefix, func_line 2032 ) 2033 ) 2034 else: 2035 check_suffix = "-NEXT" if not is_filtered else "" 2036 output_lines.append( 2037 "{} {}{}: {}".format( 2038 comment_marker, checkprefix, check_suffix, func_line 2039 ) 2040 ) 2041 is_blank_line = False 2042 2043 # Add space between different check prefixes and also before the first 2044 # line of code in the test function. 2045 output_lines.append(comment_marker) 2046 2047 # Remember new global variables we have not seen before 2048 for key in global_vars_seen: 2049 if key not in global_vars_seen_before: 2050 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 2051 break 2052 return printed_prefixes 2053 2054 2055def add_ir_checks( 2056 output_lines, 2057 comment_marker, 2058 prefix_list, 2059 func_dict, 2060 func_name, 2061 preserve_names, 2062 function_sig, 2063 ginfo: GeneralizerInfo, 2064 global_vars_seen_dict, 2065 is_filtered, 2066 original_check_lines={}, 2067): 2068 assert ginfo.is_ir() 2069 # Label format is based on IR string. 2070 if function_sig and ginfo.get_version() > 1: 2071 function_def_regex = "define %s" 2072 elif function_sig: 2073 function_def_regex = "define {{[^@]+}}%s" 2074 else: 2075 function_def_regex = "%s" 2076 check_label_format = "{} %s-LABEL: {}@%s%s%s".format( 2077 comment_marker, function_def_regex 2078 ) 2079 return add_checks( 2080 output_lines, 2081 comment_marker, 2082 prefix_list, 2083 func_dict, 2084 func_name, 2085 check_label_format, 2086 ginfo, 2087 global_vars_seen_dict, 2088 is_filtered, 2089 preserve_names, 2090 original_check_lines=original_check_lines, 2091 ) 2092 2093 2094def add_analyze_checks( 2095 output_lines, 2096 comment_marker, 2097 prefix_list, 2098 func_dict, 2099 func_name, 2100 ginfo: GeneralizerInfo, 2101 is_filtered, 2102): 2103 assert ginfo.is_analyze() 2104 check_label_format = "{} %s-LABEL: '%s%s%s%s'".format(comment_marker) 2105 global_vars_seen_dict = {} 2106 return add_checks( 2107 output_lines, 2108 comment_marker, 2109 prefix_list, 2110 func_dict, 2111 func_name, 2112 check_label_format, 2113 ginfo, 2114 global_vars_seen_dict, 2115 is_filtered, 2116 ) 2117 2118 2119def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes, ginfo): 2120 for nameless_value in ginfo.get_nameless_values(): 2121 if nameless_value.global_ir_rhs_regexp is None: 2122 continue 2123 2124 lhs_re_str = nameless_value.ir_prefix + nameless_value.ir_regexp 2125 rhs_re_str = nameless_value.global_ir_rhs_regexp 2126 2127 global_ir_value_re_str = r"^" + lhs_re_str + r"\s=\s" + rhs_re_str + r"$" 2128 global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M)) 2129 lines = [] 2130 for m in global_ir_value_re.finditer(raw_tool_output): 2131 # Attach the substring's start index so that CHECK lines 2132 # can be sorted properly even if they are matched by different nameless values. 2133 # This is relevant for GLOB and GLOBNAMED since they may appear interlaced. 2134 lines.append((m.start(), m.group(0))) 2135 2136 for prefix in prefixes: 2137 if glob_val_dict[prefix] is None: 2138 continue 2139 if nameless_value.check_prefix in glob_val_dict[prefix]: 2140 if lines == glob_val_dict[prefix][nameless_value.check_prefix]: 2141 continue 2142 if prefix == prefixes[-1]: 2143 warn("Found conflicting asm under the same prefix: %r!" % (prefix,)) 2144 else: 2145 glob_val_dict[prefix][nameless_value.check_prefix] = None 2146 continue 2147 glob_val_dict[prefix][nameless_value.check_prefix] = lines 2148 2149 2150def filter_globals_according_to_preference( 2151 global_val_lines_w_index, global_vars_seen, nameless_value, global_check_setting 2152): 2153 if global_check_setting == "none": 2154 return [] 2155 if global_check_setting == "all": 2156 return global_val_lines_w_index 2157 assert global_check_setting == "smart" 2158 2159 if nameless_value.check_key == "#": 2160 # attribute sets are usually better checked by --check-attributes 2161 return [] 2162 2163 def extract(line, nv): 2164 p = ( 2165 "^" 2166 + nv.ir_prefix 2167 + "(" 2168 + nv.ir_regexp 2169 + ") = (" 2170 + nv.global_ir_rhs_regexp 2171 + ")" 2172 ) 2173 match = re.match(p, line) 2174 return (match.group(1), re.findall(nv.ir_regexp, match.group(2))) 2175 2176 transitively_visible = set() 2177 contains_refs_to = {} 2178 2179 def add(var): 2180 nonlocal transitively_visible 2181 nonlocal contains_refs_to 2182 if var in transitively_visible: 2183 return 2184 transitively_visible.add(var) 2185 if not var in contains_refs_to: 2186 return 2187 for x in contains_refs_to[var]: 2188 add(x) 2189 2190 for i, line in global_val_lines_w_index: 2191 (var, refs) = extract(line, nameless_value) 2192 contains_refs_to[var] = refs 2193 for var, check_key in global_vars_seen: 2194 if check_key != nameless_value.check_key: 2195 continue 2196 add(var) 2197 return [ 2198 (i, line) 2199 for i, line in global_val_lines_w_index 2200 if extract(line, nameless_value)[0] in transitively_visible 2201 ] 2202 2203 2204METADATA_FILTERS = [ 2205 ( 2206 r"(?<=\")(.+ )?(\w+ version )[\d.]+(?:[^\" ]*)(?: \([^)]+\))?", 2207 r"{{.*}}\2{{.*}}", 2208 ), # preface with glob also, to capture optional CLANG_VENDOR 2209 (r'(!DIFile\(filename: ".+", directory: )".+"', r"\1{{.*}}"), 2210] 2211METADATA_FILTERS_RE = [(re.compile(f), r) for (f, r) in METADATA_FILTERS] 2212 2213 2214def filter_unstable_metadata(line): 2215 for f, replacement in METADATA_FILTERS_RE: 2216 line = f.sub(replacement, line) 2217 return line 2218 2219 2220def flush_current_checks(output_lines, new_lines_w_index, comment_marker): 2221 if not new_lines_w_index: 2222 return 2223 output_lines.append(comment_marker + SEPARATOR) 2224 new_lines_w_index.sort() 2225 for _, line in new_lines_w_index: 2226 output_lines.append(line) 2227 new_lines_w_index.clear() 2228 2229 2230def add_global_checks( 2231 glob_val_dict, 2232 comment_marker, 2233 prefix_list, 2234 output_lines, 2235 ginfo: GeneralizerInfo, 2236 global_vars_seen_dict, 2237 preserve_names, 2238 is_before_functions, 2239 global_check_setting, 2240): 2241 printed_prefixes = set() 2242 output_lines_loc = {} # Allows GLOB and GLOBNAMED to be sorted correctly 2243 for nameless_value in ginfo.get_nameless_values(): 2244 if nameless_value.global_ir_rhs_regexp is None: 2245 continue 2246 if nameless_value.is_before_functions != is_before_functions: 2247 continue 2248 for p in prefix_list: 2249 global_vars_seen = {} 2250 checkprefixes = p[0] 2251 if checkprefixes is None: 2252 continue 2253 for checkprefix in checkprefixes: 2254 if checkprefix in global_vars_seen_dict: 2255 global_vars_seen.update(global_vars_seen_dict[checkprefix]) 2256 else: 2257 global_vars_seen_dict[checkprefix] = {} 2258 if (checkprefix, nameless_value.check_prefix) in printed_prefixes: 2259 break 2260 if not glob_val_dict[checkprefix]: 2261 continue 2262 if nameless_value.check_prefix not in glob_val_dict[checkprefix]: 2263 continue 2264 if not glob_val_dict[checkprefix][nameless_value.check_prefix]: 2265 continue 2266 2267 check_lines = [] 2268 global_vars_seen_before = [key for key in global_vars_seen.keys()] 2269 lines_w_index = glob_val_dict[checkprefix][nameless_value.check_prefix] 2270 lines_w_index = filter_globals_according_to_preference( 2271 lines_w_index, 2272 global_vars_seen_before, 2273 nameless_value, 2274 global_check_setting, 2275 ) 2276 for i, line in lines_w_index: 2277 if _global_value_regex: 2278 matched = False 2279 for regex in _global_value_regex: 2280 if re.match("^@" + regex + " = ", line) or re.match( 2281 "^!" + regex + " = ", line 2282 ): 2283 matched = True 2284 break 2285 if not matched: 2286 continue 2287 [new_line] = generalize_check_lines( 2288 [line], 2289 ginfo, 2290 {}, 2291 global_vars_seen, 2292 preserve_names, 2293 unstable_globals_only=True, 2294 ) 2295 new_line = filter_unstable_metadata(new_line) 2296 check_line = "%s %s: %s" % (comment_marker, checkprefix, new_line) 2297 check_lines.append((i, check_line)) 2298 if not check_lines: 2299 continue 2300 2301 if not checkprefix in output_lines_loc: 2302 output_lines_loc[checkprefix] = [] 2303 if not nameless_value.interlaced_with_previous: 2304 flush_current_checks( 2305 output_lines, output_lines_loc[checkprefix], comment_marker 2306 ) 2307 for check_line in check_lines: 2308 output_lines_loc[checkprefix].append(check_line) 2309 2310 printed_prefixes.add((checkprefix, nameless_value.check_prefix)) 2311 2312 # Remembe new global variables we have not seen before 2313 for key in global_vars_seen: 2314 if key not in global_vars_seen_before: 2315 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 2316 break 2317 2318 if printed_prefixes: 2319 for p in prefix_list: 2320 if p[0] is None: 2321 continue 2322 for checkprefix in p[0]: 2323 if checkprefix not in output_lines_loc: 2324 continue 2325 flush_current_checks( 2326 output_lines, output_lines_loc[checkprefix], comment_marker 2327 ) 2328 break 2329 output_lines.append(comment_marker + SEPARATOR) 2330 return printed_prefixes 2331 2332 2333def check_prefix(prefix): 2334 if not PREFIX_RE.match(prefix): 2335 hint = "" 2336 if "," in prefix: 2337 hint = " Did you mean '--check-prefixes=" + prefix + "'?" 2338 warn( 2339 ( 2340 "Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." 2341 + hint 2342 ) 2343 % (prefix) 2344 ) 2345 2346 2347def get_check_prefixes(filecheck_cmd): 2348 check_prefixes = [ 2349 item 2350 for m in CHECK_PREFIX_RE.finditer(filecheck_cmd) 2351 for item in m.group(1).split(",") 2352 ] 2353 if not check_prefixes: 2354 check_prefixes = ["CHECK"] 2355 return check_prefixes 2356 2357 2358def verify_filecheck_prefixes(fc_cmd): 2359 fc_cmd_parts = fc_cmd.split() 2360 for part in fc_cmd_parts: 2361 if "check-prefix=" in part: 2362 prefix = part.split("=", 1)[1] 2363 check_prefix(prefix) 2364 elif "check-prefixes=" in part: 2365 prefixes = part.split("=", 1)[1].split(",") 2366 for prefix in prefixes: 2367 check_prefix(prefix) 2368 if prefixes.count(prefix) > 1: 2369 warn( 2370 "Supplied prefix '%s' is not unique in the prefix list." 2371 % (prefix,) 2372 ) 2373 2374 2375def get_autogennote_suffix(parser, args): 2376 autogenerated_note_args = "" 2377 for action in parser._actions: 2378 if not hasattr(args, action.dest): 2379 continue # Ignore options such as --help that aren't included in args 2380 # Ignore parameters such as paths to the binary or the list of tests 2381 if action.dest in ( 2382 "tests", 2383 "update_only", 2384 "tool_binary", 2385 "opt_binary", 2386 "llc_binary", 2387 "clang", 2388 "opt", 2389 "llvm_bin", 2390 "verbose", 2391 "force_update", 2392 "reset_variable_names", 2393 ): 2394 continue 2395 value = getattr(args, action.dest) 2396 if action.dest == "check_globals": 2397 default_value = "none" if args.version < 4 else "smart" 2398 if value == default_value: 2399 continue 2400 autogenerated_note_args += action.option_strings[0] + " " 2401 if args.version < 4 and value == "all": 2402 continue 2403 autogenerated_note_args += "%s " % value 2404 continue 2405 if action.const is not None: # action stores a constant (usually True/False) 2406 # Skip actions with different constant values (this happens with boolean 2407 # --foo/--no-foo options) 2408 if value != action.const: 2409 continue 2410 if parser.get_default(action.dest) == value: 2411 continue # Don't add default values 2412 if action.dest == "function_signature" and args.version >= 2: 2413 continue # Enabled by default in version 2 2414 if action.dest == "filters": 2415 # Create a separate option for each filter element. The value is a list 2416 # of Filter objects. 2417 for elem in value: 2418 opt_name = "filter-out" if elem.is_filter_out else "filter" 2419 opt_value = elem.pattern() 2420 new_arg = '--%s "%s" ' % (opt_name, opt_value.strip('"')) 2421 if new_arg not in autogenerated_note_args: 2422 autogenerated_note_args += new_arg 2423 else: 2424 autogenerated_note_args += action.option_strings[0] + " " 2425 if action.const is None: # action takes a parameter 2426 if action.nargs == "+": 2427 value = " ".join(map(lambda v: '"' + v.strip('"') + '"', value)) 2428 autogenerated_note_args += "%s " % value 2429 if autogenerated_note_args: 2430 autogenerated_note_args = " %s %s" % ( 2431 UTC_ARGS_KEY, 2432 autogenerated_note_args[:-1], 2433 ) 2434 return autogenerated_note_args 2435 2436 2437def check_for_command(line, parser, args, argv, argparse_callback): 2438 cmd_m = UTC_ARGS_CMD.match(line) 2439 if cmd_m: 2440 for option in shlex.split(cmd_m.group("cmd").strip()): 2441 if option: 2442 argv.append(option) 2443 args = parse_args(parser, filter(lambda arg: arg not in args.tests, argv)) 2444 if argparse_callback is not None: 2445 argparse_callback(args) 2446 return args, argv 2447 2448 2449def find_arg_in_test(test_info, get_arg_to_check, arg_string, is_global): 2450 result = get_arg_to_check(test_info.args) 2451 if not result and is_global: 2452 # See if this has been specified via UTC_ARGS. This is a "global" option 2453 # that affects the entire generation of test checks. If it exists anywhere 2454 # in the test, apply it to everything. 2455 saw_line = False 2456 for line_info in test_info.ro_iterlines(): 2457 line = line_info.line 2458 if not line.startswith(";") and line.strip() != "": 2459 saw_line = True 2460 result = get_arg_to_check(line_info.args) 2461 if result: 2462 if warn and saw_line: 2463 # We saw the option after already reading some test input lines. 2464 # Warn about it. 2465 print( 2466 "WARNING: Found {} in line following test start: ".format( 2467 arg_string 2468 ) 2469 + line, 2470 file=sys.stderr, 2471 ) 2472 print( 2473 "WARNING: Consider moving {} to top of file".format(arg_string), 2474 file=sys.stderr, 2475 ) 2476 break 2477 return result 2478 2479 2480def dump_input_lines(output_lines, test_info, prefix_set, comment_string): 2481 for input_line_info in test_info.iterlines(output_lines): 2482 line = input_line_info.line 2483 args = input_line_info.args 2484 if line.strip() == comment_string: 2485 continue 2486 if line.strip() == comment_string + SEPARATOR: 2487 continue 2488 if line.lstrip().startswith(comment_string): 2489 m = CHECK_RE.match(line) 2490 if m and m.group(1) in prefix_set: 2491 continue 2492 output_lines.append(line.rstrip("\n")) 2493 2494 2495def add_checks_at_end( 2496 output_lines, prefix_list, func_order, comment_string, check_generator 2497): 2498 added = set() 2499 generated_prefixes = set() 2500 for prefix in prefix_list: 2501 prefixes = prefix[0] 2502 tool_args = prefix[1] 2503 for prefix in prefixes: 2504 for func in func_order[prefix]: 2505 # The func order can contain the same functions multiple times. 2506 # If we see one again we are done. 2507 if (func, prefix) in added: 2508 continue 2509 if added: 2510 output_lines.append(comment_string) 2511 2512 # The add_*_checks routines expect a run list whose items are 2513 # tuples that have a list of prefixes as their first element and 2514 # tool command args string as their second element. They output 2515 # checks for each prefix in the list of prefixes. By doing so, it 2516 # implicitly assumes that for each function every run line will 2517 # generate something for that function. That is not the case for 2518 # generated functions as some run lines might not generate them 2519 # (e.g. -fopenmp vs. no -fopenmp). 2520 # 2521 # Therefore, pass just the prefix we're interested in. This has 2522 # the effect of generating all of the checks for functions of a 2523 # single prefix before moving on to the next prefix. So checks 2524 # are ordered by prefix instead of by function as in "normal" 2525 # mode. 2526 for generated_prefix in check_generator( 2527 output_lines, [([prefix], tool_args)], func 2528 ): 2529 added.add((func, generated_prefix)) 2530 generated_prefixes.add(generated_prefix) 2531 return generated_prefixes 2532