1import argparse 2import bisect 3import collections 4import copy 5import glob 6import os 7import re 8import subprocess 9import sys 10import shlex 11 12from typing import List, Mapping, Set 13 14##### Common utilities for update_*test_checks.py 15 16 17_verbose = False 18_prefix_filecheck_ir_name = "" 19 20""" 21Version changelog: 22 231: Initial version, used by tests that don't specify --version explicitly. 242: --function-signature is now enabled by default and also checks return 25 type/attributes. 263: Opening parenthesis of function args is kept on the first LABEL line 27 in case arguments are split to a separate SAME line. 284: --check-globals now has a third option ('smart'). The others are now called 29 'none' and 'all'. 'smart' is the default. 305: Basic block labels are matched by FileCheck expressions 31""" 32DEFAULT_VERSION = 5 33 34 35SUPPORTED_ANALYSES = { 36 "Branch Probability Analysis", 37 "Cost Model Analysis", 38 "Loop Access Analysis", 39 "Scalar Evolution Analysis", 40} 41 42 43class Regex(object): 44 """Wrap a compiled regular expression object to allow deep copy of a regexp. 45 This is required for the deep copy done in do_scrub. 46 47 """ 48 49 def __init__(self, regex): 50 self.regex = regex 51 52 def __deepcopy__(self, memo): 53 result = copy.copy(self) 54 result.regex = self.regex 55 return result 56 57 def search(self, line): 58 return self.regex.search(line) 59 60 def sub(self, repl, line): 61 return self.regex.sub(repl, line) 62 63 def pattern(self): 64 return self.regex.pattern 65 66 def flags(self): 67 return self.regex.flags 68 69 70class Filter(Regex): 71 """Augment a Regex object with a flag indicating whether a match should be 72 added (!is_filter_out) or removed (is_filter_out) from the generated checks. 73 74 """ 75 76 def __init__(self, regex, is_filter_out): 77 super(Filter, self).__init__(regex) 78 self.is_filter_out = is_filter_out 79 80 def __deepcopy__(self, memo): 81 result = copy.deepcopy(super(Filter, self), memo) 82 result.is_filter_out = copy.deepcopy(self.is_filter_out, memo) 83 return result 84 85 86def parse_commandline_args(parser): 87 class RegexAction(argparse.Action): 88 """Add a regular expression option value to a list of regular expressions. 89 This compiles the expression, wraps it in a Regex and adds it to the option 90 value list.""" 91 92 def __init__(self, option_strings, dest, nargs=None, **kwargs): 93 if nargs is not None: 94 raise ValueError("nargs not allowed") 95 super(RegexAction, self).__init__(option_strings, dest, **kwargs) 96 97 def do_call(self, namespace, values, flags): 98 value_list = getattr(namespace, self.dest) 99 if value_list is None: 100 value_list = [] 101 102 try: 103 value_list.append(Regex(re.compile(values, flags))) 104 except re.error as error: 105 raise ValueError( 106 "{}: Invalid regular expression '{}' ({})".format( 107 option_string, error.pattern, error.msg 108 ) 109 ) 110 111 setattr(namespace, self.dest, value_list) 112 113 def __call__(self, parser, namespace, values, option_string=None): 114 self.do_call(namespace, values, 0) 115 116 class FilterAction(RegexAction): 117 """Add a filter to a list of filter option values.""" 118 119 def __init__(self, option_strings, dest, nargs=None, **kwargs): 120 super(FilterAction, self).__init__(option_strings, dest, nargs, **kwargs) 121 122 def __call__(self, parser, namespace, values, option_string=None): 123 super(FilterAction, self).__call__(parser, namespace, values, option_string) 124 125 value_list = getattr(namespace, self.dest) 126 127 is_filter_out = option_string == "--filter-out" 128 129 value_list[-1] = Filter(value_list[-1].regex, is_filter_out) 130 131 setattr(namespace, self.dest, value_list) 132 133 filter_group = parser.add_argument_group( 134 "filtering", 135 """Filters are applied to each output line according to the order given. The 136 first matching filter terminates filter processing for that current line.""", 137 ) 138 139 filter_group.add_argument( 140 "--filter", 141 action=FilterAction, 142 dest="filters", 143 metavar="REGEX", 144 help="Only include lines matching REGEX (may be specified multiple times)", 145 ) 146 filter_group.add_argument( 147 "--filter-out", 148 action=FilterAction, 149 dest="filters", 150 metavar="REGEX", 151 help="Exclude lines matching REGEX", 152 ) 153 154 parser.add_argument( 155 "--include-generated-funcs", 156 action="store_true", 157 help="Output checks for functions not in source", 158 ) 159 parser.add_argument( 160 "-v", "--verbose", action="store_true", help="Show verbose output" 161 ) 162 parser.add_argument( 163 "-u", 164 "--update-only", 165 action="store_true", 166 help="Only update test if it was already autogened", 167 ) 168 parser.add_argument( 169 "--force-update", 170 action="store_true", 171 help="Update test even if it was autogened by a different script", 172 ) 173 parser.add_argument( 174 "--enable", 175 action="store_true", 176 dest="enabled", 177 default=True, 178 help="Activate CHECK line generation from this point forward", 179 ) 180 parser.add_argument( 181 "--disable", 182 action="store_false", 183 dest="enabled", 184 help="Deactivate CHECK line generation from this point forward", 185 ) 186 parser.add_argument( 187 "--replace-value-regex", 188 nargs="+", 189 default=[], 190 help="List of regular expressions to replace matching value names", 191 ) 192 parser.add_argument( 193 "--prefix-filecheck-ir-name", 194 default="", 195 help="Add a prefix to FileCheck IR value names to avoid conflicts with scripted names", 196 ) 197 parser.add_argument( 198 "--global-value-regex", 199 nargs="+", 200 default=[], 201 help="List of regular expressions that a global value declaration must match to generate a check (has no effect if checking globals is not enabled)", 202 ) 203 parser.add_argument( 204 "--global-hex-value-regex", 205 nargs="+", 206 default=[], 207 help="List of regular expressions such that, for matching global value declarations, literal integer values should be encoded in hex in the associated FileCheck directives", 208 ) 209 # FIXME: in 3.9, we can use argparse.BooleanOptionalAction. At that point, 210 # we need to rename the flag to just -generate-body-for-unused-prefixes. 211 parser.add_argument( 212 "--no-generate-body-for-unused-prefixes", 213 action="store_false", 214 dest="gen_unused_prefix_body", 215 default=True, 216 help="Generate a function body that always matches for unused prefixes. This is useful when unused prefixes are desired, and it avoids needing to annotate each FileCheck as allowing them.", 217 ) 218 # This is the default when regenerating existing tests. The default when 219 # generating new tests is determined by DEFAULT_VERSION. 220 parser.add_argument( 221 "--version", type=int, default=1, help="The version of output format" 222 ) 223 args = parser.parse_args() 224 # TODO: This should not be handled differently from the other options 225 global _verbose, _global_value_regex, _global_hex_value_regex 226 _verbose = args.verbose 227 _global_value_regex = args.global_value_regex 228 _global_hex_value_regex = args.global_hex_value_regex 229 return args 230 231 232def parse_args(parser, argv): 233 args = parser.parse_args(argv) 234 if args.version >= 2: 235 args.function_signature = True 236 # TODO: This should not be handled differently from the other options 237 global _verbose, _global_value_regex, _global_hex_value_regex 238 _verbose = args.verbose 239 _global_value_regex = args.global_value_regex 240 _global_hex_value_regex = args.global_hex_value_regex 241 if "check_globals" in args and args.check_globals == "default": 242 args.check_globals = "none" if args.version < 4 else "smart" 243 return args 244 245 246class InputLineInfo(object): 247 def __init__(self, line, line_number, args, argv): 248 self.line = line 249 self.line_number = line_number 250 self.args = args 251 self.argv = argv 252 253 254class TestInfo(object): 255 def __init__( 256 self, 257 test, 258 parser, 259 script_name, 260 input_lines, 261 args, 262 argv, 263 comment_prefix, 264 argparse_callback, 265 ): 266 self.parser = parser 267 self.argparse_callback = argparse_callback 268 self.path = test 269 self.args = args 270 if args.prefix_filecheck_ir_name: 271 global _prefix_filecheck_ir_name 272 _prefix_filecheck_ir_name = args.prefix_filecheck_ir_name 273 self.argv = argv 274 self.input_lines = input_lines 275 self.run_lines = find_run_lines(test, self.input_lines) 276 self.comment_prefix = comment_prefix 277 if self.comment_prefix is None: 278 if self.path.endswith(".mir") or self.path.endswith(".txt"): 279 self.comment_prefix = "#" 280 elif self.path.endswith(".s"): 281 self.comment_prefix = "//" 282 else: 283 self.comment_prefix = ";" 284 self.autogenerated_note_prefix = self.comment_prefix + " " + UTC_ADVERT 285 self.test_autogenerated_note = self.autogenerated_note_prefix + script_name 286 self.test_autogenerated_note += get_autogennote_suffix(parser, self.args) 287 self.test_unused_note = ( 288 self.comment_prefix + self.comment_prefix + " " + UNUSED_NOTE 289 ) 290 291 def ro_iterlines(self): 292 for line_num, input_line in enumerate(self.input_lines): 293 args, argv = check_for_command( 294 input_line, self.parser, self.args, self.argv, self.argparse_callback 295 ) 296 yield InputLineInfo(input_line, line_num, args, argv) 297 298 def iterlines(self, output_lines): 299 output_lines.append(self.test_autogenerated_note) 300 for line_info in self.ro_iterlines(): 301 input_line = line_info.line 302 # Discard any previous script advertising. 303 if input_line.startswith(self.autogenerated_note_prefix): 304 continue 305 self.args = line_info.args 306 self.argv = line_info.argv 307 if not self.args.enabled: 308 output_lines.append(input_line) 309 continue 310 yield line_info 311 312 def get_checks_for_unused_prefixes( 313 self, run_list, used_prefixes: List[str] 314 ) -> List[str]: 315 run_list = [element for element in run_list if element[0] is not None] 316 unused_prefixes = set( 317 [prefix for sublist in run_list for prefix in sublist[0]] 318 ).difference(set(used_prefixes)) 319 320 ret = [] 321 if not unused_prefixes: 322 return ret 323 ret.append(self.test_unused_note) 324 for unused in sorted(unused_prefixes): 325 ret.append( 326 "{comment} {prefix}: {match_everything}".format( 327 comment=self.comment_prefix, 328 prefix=unused, 329 match_everything=r"""{{.*}}""", 330 ) 331 ) 332 return ret 333 334 335def itertests( 336 test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None 337): 338 for pattern in test_patterns: 339 # On Windows we must expand the patterns ourselves. 340 tests_list = glob.glob(pattern) 341 if not tests_list: 342 warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,)) 343 continue 344 for test in tests_list: 345 with open(test) as f: 346 input_lines = [l.rstrip() for l in f] 347 first_line = input_lines[0] if input_lines else "" 348 if UTC_AVOID in first_line: 349 warn("Skipping test that must not be autogenerated: " + test) 350 continue 351 is_regenerate = UTC_ADVERT in first_line 352 353 # If we're generating a new test, set the default version to the latest. 354 argv = sys.argv[:] 355 if not is_regenerate: 356 argv.insert(1, "--version=" + str(DEFAULT_VERSION)) 357 358 args = parse_args(parser, argv[1:]) 359 if argparse_callback is not None: 360 argparse_callback(args) 361 if is_regenerate: 362 if script_name not in first_line and not args.force_update: 363 warn( 364 "Skipping test which wasn't autogenerated by " + script_name, 365 test, 366 ) 367 continue 368 args, argv = check_for_command( 369 first_line, parser, args, argv, argparse_callback 370 ) 371 elif args.update_only: 372 assert UTC_ADVERT not in first_line 373 warn("Skipping test which isn't autogenerated: " + test) 374 continue 375 final_input_lines = [] 376 for l in input_lines: 377 if UNUSED_NOTE in l: 378 break 379 final_input_lines.append(l) 380 yield TestInfo( 381 test, 382 parser, 383 script_name, 384 final_input_lines, 385 args, 386 argv, 387 comment_prefix, 388 argparse_callback, 389 ) 390 391 392def should_add_line_to_output( 393 input_line, 394 prefix_set, 395 *, 396 skip_global_checks=False, 397 skip_same_checks=False, 398 comment_marker=";", 399): 400 # Skip any blank comment lines in the IR. 401 if not skip_global_checks and input_line.strip() == comment_marker: 402 return False 403 # Skip a special double comment line we use as a separator. 404 if input_line.strip() == comment_marker + SEPARATOR: 405 return False 406 # Skip any blank lines in the IR. 407 # if input_line.strip() == '': 408 # return False 409 # And skip any CHECK lines. We're building our own. 410 m = CHECK_RE.match(input_line) 411 if m and m.group(1) in prefix_set: 412 if skip_same_checks and CHECK_SAME_RE.match(input_line): 413 # The previous CHECK line was removed, so don't leave this dangling 414 return False 415 if skip_global_checks: 416 # Skip checks only if they are of global value definitions 417 global_ir_value_re = re.compile(r"(\[\[|@)", flags=(re.M)) 418 is_global = global_ir_value_re.search(input_line) 419 return not is_global 420 return False 421 422 return True 423 424 425def collect_original_check_lines(ti: TestInfo, prefix_set: set): 426 """ 427 Collect pre-existing check lines into a dictionary `result` which is 428 returned. 429 430 result[func_name][prefix] is filled with a list of right-hand-sides of check 431 lines. 432 """ 433 result = collections.defaultdict(lambda: {}) 434 435 current_prefix = None 436 current_function = None 437 for input_line_info in ti.ro_iterlines(): 438 input_line = input_line_info.line 439 if input_line.lstrip().startswith(";"): 440 m = CHECK_RE.match(input_line) 441 if m is not None: 442 prefix = m.group(1) 443 check_kind = m.group(2) 444 line = input_line[m.end() :].strip() 445 446 if prefix != current_prefix: 447 current_function = None 448 current_prefix = None 449 450 if check_kind not in ["LABEL", "SAME"]: 451 if current_function is not None: 452 current_function.append(line) 453 continue 454 455 if check_kind == "SAME": 456 continue 457 458 if check_kind == "LABEL": 459 m = IR_FUNCTION_LABEL_RE.match(line) 460 if m is not None: 461 func_name = m.group(1) 462 if ( 463 ti.args.function is not None 464 and func_name != ti.args.function 465 ): 466 # When filtering on a specific function, skip all others. 467 continue 468 469 current_prefix = prefix 470 current_function = result[func_name][prefix] = [] 471 continue 472 473 current_function = None 474 475 return result 476 477 478# Perform lit-like substitutions 479def getSubstitutions(sourcepath): 480 sourcedir = os.path.dirname(sourcepath) 481 return [ 482 ("%s", sourcepath), 483 ("%S", sourcedir), 484 ("%p", sourcedir), 485 ("%{pathsep}", os.pathsep), 486 ] 487 488 489def applySubstitutions(s, substitutions): 490 for a, b in substitutions: 491 s = s.replace(a, b) 492 return s 493 494 495# Invoke the tool that is being tested. 496def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False): 497 with open(ir) as ir_file: 498 substitutions = getSubstitutions(ir) 499 500 # TODO Remove the str form which is used by update_test_checks.py and 501 # update_llc_test_checks.py 502 # The safer list form is used by update_cc_test_checks.py 503 if preprocess_cmd: 504 # Allow pre-processing the IR file (e.g. using sed): 505 assert isinstance( 506 preprocess_cmd, str 507 ) # TODO: use a list instead of using shell 508 preprocess_cmd = applySubstitutions(preprocess_cmd, substitutions).strip() 509 if verbose: 510 print( 511 "Pre-processing input file: ", 512 ir, 513 " with command '", 514 preprocess_cmd, 515 "'", 516 sep="", 517 file=sys.stderr, 518 ) 519 pp = subprocess.Popen( 520 preprocess_cmd, 521 shell=True, 522 stdin=subprocess.DEVNULL, 523 stdout=subprocess.PIPE, 524 ) 525 ir_file = pp.stdout 526 527 if isinstance(cmd_args, list): 528 args = [applySubstitutions(a, substitutions) for a in cmd_args] 529 stdout = subprocess.check_output([exe] + args, stdin=ir_file) 530 else: 531 stdout = subprocess.check_output( 532 exe + " " + applySubstitutions(cmd_args, substitutions), 533 shell=True, 534 stdin=ir_file, 535 ) 536 if sys.version_info[0] > 2: 537 # FYI, if you crashed here with a decode error, your run line probably 538 # results in bitcode or other binary format being written to the pipe. 539 # For an opt test, you probably want to add -S or -disable-output. 540 stdout = stdout.decode() 541 # Fix line endings to unix CR style. 542 return stdout.replace("\r\n", "\n") 543 544 545##### LLVM IR parser 546RUN_LINE_RE = re.compile(r"^\s*(?://|[;#])\s*RUN:\s*(.*)$") 547CHECK_PREFIX_RE = re.compile(r"--?check-prefix(?:es)?[= ](\S+)") 548PREFIX_RE = re.compile("^[a-zA-Z0-9_-]+$") 549CHECK_RE = re.compile( 550 r"^\s*(?://|[;#])\s*([^:]+?)(?:-(NEXT|NOT|DAG|LABEL|SAME|EMPTY))?:" 551) 552CHECK_SAME_RE = re.compile(r"^\s*(?://|[;#])\s*([^:]+?)(?:-SAME)?:") 553 554UTC_ARGS_KEY = "UTC_ARGS:" 555UTC_ARGS_CMD = re.compile(r".*" + UTC_ARGS_KEY + r"\s*(?P<cmd>.*)\s*$") 556UTC_ADVERT = "NOTE: Assertions have been autogenerated by " 557UTC_AVOID = "NOTE: Do not autogenerate" 558UNUSED_NOTE = "NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:" 559 560DATA_LAYOUT_RE = re.compile( 561 r"target\s+datalayout\s+=\s+\"(?P<layout>.+)\"$", flags=(re.M | re.S) 562) 563 564OPT_FUNCTION_RE = re.compile( 565 r"^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s():,]+?))?\s*define\s+(?P<funcdef_attrs_and_ret>[^@]*)@(?P<func>[\w.$-]+?)\s*" 566 r"(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P<body>.*?)^\}$", 567 flags=(re.M | re.S), 568) 569 570ANALYZE_FUNCTION_RE = re.compile( 571 r"^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.$-]+?)\':" 572 r"\s*\n(?P<body>.*)$", 573 flags=(re.X | re.S), 574) 575 576LOOP_PASS_DEBUG_RE = re.compile( 577 r"^\s*\'(?P<func>[\w.$-]+?)\'[^\n]*" r"\s*\n(?P<body>.*)$", flags=(re.X | re.S) 578) 579 580IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(') 581IR_FUNCTION_LABEL_RE = re.compile( 582 r'^\s*(?:define\s+(?:internal\s+)?[^@]*)?@"?([\w.$-]+)"?\s*\(' 583) 584TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$') 585TRIPLE_ARG_RE = re.compile(r"-m?triple[= ]([^ ]+)") 586MARCH_ARG_RE = re.compile(r"-march[= ]([^ ]+)") 587DEBUG_ONLY_ARG_RE = re.compile(r"-debug-only[= ]([^ ]+)") 588 589SCRUB_LEADING_WHITESPACE_RE = re.compile(r"^(\s+)") 590SCRUB_WHITESPACE_RE = re.compile(r"(?!^(| \w))[ \t]+", flags=re.M) 591SCRUB_PRESERVE_LEADING_WHITESPACE_RE = re.compile(r"((?!^)[ \t]*(\S))[ \t]+") 592SCRUB_TRAILING_WHITESPACE_RE = re.compile(r"[ \t]+$", flags=re.M) 593SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE 594SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile( 595 r"([ \t]|(#[0-9]+))+$", flags=re.M 596) 597SCRUB_KILL_COMMENT_RE = re.compile(r"^ *#+ +kill:.*\n") 598SCRUB_LOOP_COMMENT_RE = re.compile( 599 r"# =>This Inner Loop Header:.*|# in Loop:.*", flags=re.M 600) 601SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r"(?<=\S)+[ \t]*#$", flags=re.M) 602 603SEPARATOR = "." 604 605 606def error(msg, test_file=None): 607 if test_file: 608 msg = "{}: {}".format(msg, test_file) 609 print("ERROR: {}".format(msg), file=sys.stderr) 610 611 612def warn(msg, test_file=None): 613 if test_file: 614 msg = "{}: {}".format(msg, test_file) 615 print("WARNING: {}".format(msg), file=sys.stderr) 616 617 618def debug(*args, **kwargs): 619 # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs): 620 if "file" not in kwargs: 621 kwargs["file"] = sys.stderr 622 if _verbose: 623 print(*args, **kwargs) 624 625 626def find_run_lines(test, lines): 627 debug("Scanning for RUN lines in test file:", test) 628 raw_lines = [m.group(1) for m in [RUN_LINE_RE.match(l) for l in lines] if m] 629 run_lines = [raw_lines[0]] if len(raw_lines) > 0 else [] 630 for l in raw_lines[1:]: 631 if run_lines[-1].endswith("\\"): 632 run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l 633 else: 634 run_lines.append(l) 635 debug("Found {} RUN lines in {}:".format(len(run_lines), test)) 636 for l in run_lines: 637 debug(" RUN: {}".format(l)) 638 return run_lines 639 640 641def get_triple_from_march(march): 642 triples = { 643 "amdgcn": "amdgcn", 644 "r600": "r600", 645 "mips": "mips", 646 "nvptx64": "nvptx64", 647 "sparc": "sparc", 648 "hexagon": "hexagon", 649 "ve": "ve", 650 } 651 for prefix, triple in triples.items(): 652 if march.startswith(prefix): 653 return triple 654 print("Cannot find a triple. Assume 'x86'", file=sys.stderr) 655 return "x86" 656 657 658def get_globals_name_prefix(raw_tool_output): 659 m = DATA_LAYOUT_RE.search(raw_tool_output) 660 if not m: 661 return None 662 data_layout = m.group("layout") 663 idx = data_layout.find("m:") 664 if idx < 0: 665 return None 666 ch = data_layout[idx + 2] 667 return "_" if ch == "o" or ch == "x" else None 668 669 670def apply_filters(line, filters): 671 has_filter = False 672 for f in filters: 673 if not f.is_filter_out: 674 has_filter = True 675 if f.search(line): 676 return False if f.is_filter_out else True 677 # If we only used filter-out, keep the line, otherwise discard it since no 678 # filter matched. 679 return False if has_filter else True 680 681 682def do_filter(body, filters): 683 return ( 684 body 685 if not filters 686 else "\n".join( 687 filter(lambda line: apply_filters(line, filters), body.splitlines()) 688 ) 689 ) 690 691 692def scrub_body(body): 693 # Scrub runs of whitespace out of the assembly, but leave the leading 694 # whitespace in place. 695 body = SCRUB_PRESERVE_LEADING_WHITESPACE_RE.sub(lambda m: m.group(2) + " ", body) 696 697 # Expand the tabs used for indentation. 698 body = str.expandtabs(body, 2) 699 # Strip trailing whitespace. 700 body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r"", body) 701 return body 702 703 704def do_scrub(body, scrubber, scrubber_args, extra): 705 if scrubber_args: 706 local_args = copy.deepcopy(scrubber_args) 707 local_args[0].extra_scrub = extra 708 return scrubber(body, *local_args) 709 return scrubber(body, *scrubber_args) 710 711 712# Build up a dictionary of all the function bodies. 713class function_body(object): 714 def __init__( 715 self, 716 string, 717 extra, 718 funcdef_attrs_and_ret, 719 args_and_sig, 720 attrs, 721 func_name_separator, 722 ginfo, 723 ): 724 self.scrub = string 725 self.extrascrub = extra 726 self.funcdef_attrs_and_ret = funcdef_attrs_and_ret 727 self.args_and_sig = args_and_sig 728 self.attrs = attrs 729 self.func_name_separator = func_name_separator 730 self._ginfo = ginfo 731 732 def is_same_except_arg_names( 733 self, extrascrub, funcdef_attrs_and_ret, args_and_sig, attrs 734 ): 735 arg_names = set() 736 737 def drop_arg_names(match): 738 nameless_value = self._ginfo.get_nameless_value_from_match(match) 739 if nameless_value.check_key == "%": 740 arg_names.add(self._ginfo.get_name_from_match(match)) 741 substitute = "" 742 else: 743 substitute = match.group(2) 744 return match.group(1) + substitute + match.group(match.lastindex) 745 746 def repl_arg_names(match): 747 nameless_value = self._ginfo.get_nameless_value_from_match(match) 748 if ( 749 nameless_value.check_key == "%" 750 and self._ginfo.get_name_from_match(match) in arg_names 751 ): 752 return match.group(1) + match.group(match.lastindex) 753 return match.group(1) + match.group(2) + match.group(match.lastindex) 754 755 if self.funcdef_attrs_and_ret != funcdef_attrs_and_ret: 756 return False 757 if self.attrs != attrs: 758 return False 759 760 regexp = self._ginfo.get_regexp() 761 ans0 = regexp.sub(drop_arg_names, self.args_and_sig) 762 ans1 = regexp.sub(drop_arg_names, args_and_sig) 763 if ans0 != ans1: 764 return False 765 if self._ginfo.is_asm(): 766 # Check without replacements, the replacements are not applied to the 767 # body for backend checks. 768 return self.extrascrub == extrascrub 769 770 es0 = regexp.sub(repl_arg_names, self.extrascrub) 771 es1 = regexp.sub(repl_arg_names, extrascrub) 772 es0 = SCRUB_IR_COMMENT_RE.sub(r"", es0) 773 es1 = SCRUB_IR_COMMENT_RE.sub(r"", es1) 774 return es0 == es1 775 776 def __str__(self): 777 return self.scrub 778 779 780class FunctionTestBuilder: 781 def __init__(self, run_list, flags, scrubber_args, path, ginfo): 782 self._verbose = flags.verbose 783 self._record_args = flags.function_signature 784 self._check_attributes = flags.check_attributes 785 # Strip double-quotes if input was read by UTC_ARGS 786 self._filters = ( 787 list( 788 map( 789 lambda f: Filter( 790 re.compile(f.pattern().strip('"'), f.flags()), f.is_filter_out 791 ), 792 flags.filters, 793 ) 794 ) 795 if flags.filters 796 else [] 797 ) 798 self._scrubber_args = scrubber_args 799 self._path = path 800 self._ginfo = ginfo 801 # Strip double-quotes if input was read by UTC_ARGS 802 self._replace_value_regex = list( 803 map(lambda x: x.strip('"'), flags.replace_value_regex) 804 ) 805 self._func_dict = {} 806 self._func_order = {} 807 self._global_var_dict = {} 808 self._processed_prefixes = set() 809 for tuple in run_list: 810 for prefix in tuple[0]: 811 self._func_dict.update({prefix: dict()}) 812 self._func_order.update({prefix: []}) 813 self._global_var_dict.update({prefix: dict()}) 814 815 def finish_and_get_func_dict(self): 816 for prefix in self.get_failed_prefixes(): 817 warn( 818 "Prefix %s had conflicting output from different RUN lines for all functions in test %s" 819 % ( 820 prefix, 821 self._path, 822 ) 823 ) 824 return self._func_dict 825 826 def func_order(self): 827 return self._func_order 828 829 def global_var_dict(self): 830 return self._global_var_dict 831 832 def is_filtered(self): 833 return bool(self._filters) 834 835 def process_run_line(self, function_re, scrubber, raw_tool_output, prefixes): 836 build_global_values_dictionary( 837 self._global_var_dict, raw_tool_output, prefixes, self._ginfo 838 ) 839 for m in function_re.finditer(raw_tool_output): 840 if not m: 841 continue 842 func = m.group("func") 843 body = m.group("body") 844 # func_name_separator is the string that is placed right after function name at the 845 # beginning of assembly function definition. In most assemblies, that is just a 846 # colon: `foo:`. But, for example, in nvptx it is a brace: `foo(`. If is_backend is 847 # False, just assume that separator is an empty string. 848 if self._ginfo.is_asm(): 849 # Use ':' as default separator. 850 func_name_separator = ( 851 m.group("func_name_separator") 852 if "func_name_separator" in m.groupdict() 853 else ":" 854 ) 855 else: 856 func_name_separator = "" 857 attrs = m.group("attrs") if self._check_attributes else "" 858 funcdef_attrs_and_ret = ( 859 m.group("funcdef_attrs_and_ret") if self._record_args else "" 860 ) 861 # Determine if we print arguments, the opening brace, or nothing after the 862 # function name 863 if self._record_args and "args_and_sig" in m.groupdict(): 864 args_and_sig = scrub_body(m.group("args_and_sig").strip()) 865 elif "args_and_sig" in m.groupdict(): 866 args_and_sig = "(" 867 else: 868 args_and_sig = "" 869 filtered_body = do_filter(body, self._filters) 870 scrubbed_body = do_scrub( 871 filtered_body, scrubber, self._scrubber_args, extra=False 872 ) 873 scrubbed_extra = do_scrub( 874 filtered_body, scrubber, self._scrubber_args, extra=True 875 ) 876 if "analysis" in m.groupdict(): 877 analysis = m.group("analysis") 878 if analysis not in SUPPORTED_ANALYSES: 879 warn("Unsupported analysis mode: %r!" % (analysis,)) 880 if func.startswith("stress"): 881 # We only use the last line of the function body for stress tests. 882 scrubbed_body = "\n".join(scrubbed_body.splitlines()[-1:]) 883 if self._verbose: 884 print("Processing function: " + func, file=sys.stderr) 885 for l in scrubbed_body.splitlines(): 886 print(" " + l, file=sys.stderr) 887 for prefix in prefixes: 888 # Replace function names matching the regex. 889 for regex in self._replace_value_regex: 890 # Pattern that matches capture groups in the regex in leftmost order. 891 group_regex = re.compile(r"\(.*?\)") 892 # Replace function name with regex. 893 match = re.match(regex, func) 894 if match: 895 func_repl = regex 896 # Replace any capture groups with their matched strings. 897 for g in match.groups(): 898 func_repl = group_regex.sub( 899 re.escape(g), func_repl, count=1 900 ) 901 func = re.sub(func_repl, "{{" + func_repl + "}}", func) 902 903 # Replace all calls to regex matching functions. 904 matches = re.finditer(regex, scrubbed_body) 905 for match in matches: 906 func_repl = regex 907 # Replace any capture groups with their matched strings. 908 for g in match.groups(): 909 func_repl = group_regex.sub( 910 re.escape(g), func_repl, count=1 911 ) 912 # Substitute function call names that match the regex with the same 913 # capture groups set. 914 scrubbed_body = re.sub( 915 func_repl, "{{" + func_repl + "}}", scrubbed_body 916 ) 917 918 if func in self._func_dict[prefix]: 919 if self._func_dict[prefix][func] is not None and ( 920 str(self._func_dict[prefix][func]) != scrubbed_body 921 or self._func_dict[prefix][func].args_and_sig != args_and_sig 922 or self._func_dict[prefix][func].attrs != attrs 923 or self._func_dict[prefix][func].funcdef_attrs_and_ret 924 != funcdef_attrs_and_ret 925 ): 926 if self._func_dict[prefix][func].is_same_except_arg_names( 927 scrubbed_extra, 928 funcdef_attrs_and_ret, 929 args_and_sig, 930 attrs, 931 ): 932 self._func_dict[prefix][func].scrub = scrubbed_extra 933 self._func_dict[prefix][func].args_and_sig = args_and_sig 934 else: 935 # This means a previous RUN line produced a body for this function 936 # that is different from the one produced by this current RUN line, 937 # so the body can't be common across RUN lines. We use None to 938 # indicate that. 939 self._func_dict[prefix][func] = None 940 else: 941 if prefix not in self._processed_prefixes: 942 self._func_dict[prefix][func] = function_body( 943 scrubbed_body, 944 scrubbed_extra, 945 funcdef_attrs_and_ret, 946 args_and_sig, 947 attrs, 948 func_name_separator, 949 self._ginfo, 950 ) 951 self._func_order[prefix].append(func) 952 else: 953 # An earlier RUN line used this check prefixes but didn't produce 954 # a body for this function. This happens in Clang tests that use 955 # preprocesser directives to exclude individual functions from some 956 # RUN lines. 957 self._func_dict[prefix][func] = None 958 959 def processed_prefixes(self, prefixes): 960 """ 961 Mark a set of prefixes as having had at least one applicable RUN line fully 962 processed. This is used to filter out function bodies that don't have 963 outputs for all RUN lines. 964 """ 965 self._processed_prefixes.update(prefixes) 966 967 def get_failed_prefixes(self): 968 # This returns the list of those prefixes that failed to match any function, 969 # because there were conflicting bodies produced by different RUN lines, in 970 # all instances of the prefix. 971 for prefix in self._func_dict: 972 if self._func_dict[prefix] and ( 973 not [ 974 fct 975 for fct in self._func_dict[prefix] 976 if self._func_dict[prefix][fct] is not None 977 ] 978 ): 979 yield prefix 980 981 982##### Generator of LLVM IR CHECK lines 983 984SCRUB_IR_COMMENT_RE = re.compile(r"\s*;.*") 985 986# TODO: We should also derive check lines for global, debug, loop declarations, etc.. 987 988 989class NamelessValue: 990 """ 991 A NamelessValue object represents a type of value in the IR whose "name" we 992 generalize in the generated check lines; where the "name" could be an actual 993 name (as in e.g. `@some_global` or `%x`) or just a number (as in e.g. `%12` 994 or `!4`). 995 """ 996 997 def __init__( 998 self, 999 check_prefix, 1000 check_key, 1001 ir_prefix, 1002 ir_regexp, 1003 global_ir_rhs_regexp, 1004 *, 1005 is_before_functions=False, 1006 is_number=False, 1007 replace_number_with_counter=False, 1008 match_literally=False, 1009 interlaced_with_previous=False, 1010 ir_suffix=r"", 1011 ): 1012 self.check_prefix = check_prefix 1013 self.check_key = check_key 1014 self.ir_prefix = ir_prefix 1015 self.ir_regexp = ir_regexp 1016 self.ir_suffix = ir_suffix 1017 self.global_ir_rhs_regexp = global_ir_rhs_regexp 1018 self.is_before_functions = is_before_functions 1019 self.is_number = is_number 1020 # Some variable numbers (e.g. MCINST1234) will change based on unrelated 1021 # modifications to LLVM, replace those with an incrementing counter. 1022 self.replace_number_with_counter = replace_number_with_counter 1023 self.match_literally = match_literally 1024 self.interlaced_with_previous = interlaced_with_previous 1025 self.variable_mapping = {} 1026 1027 # Return true if this kind of IR value is defined "locally" to functions, 1028 # which we assume is only the case precisely for LLVM IR local values. 1029 def is_local_def_ir_value(self): 1030 return self.check_key == "%" 1031 1032 # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals 1033 def get_ir_regex(self): 1034 # for backwards compatibility we check locals with '.*' 1035 if self.is_local_def_ir_value(): 1036 return ".*" 1037 return self.ir_regexp 1038 1039 # Create a FileCheck variable name based on an IR name. 1040 def get_value_name(self, var: str, check_prefix: str): 1041 var = var.replace("!", "") 1042 if self.replace_number_with_counter: 1043 assert var 1044 replacement = self.variable_mapping.get(var, None) 1045 if replacement is None: 1046 # Replace variable with an incrementing counter 1047 replacement = str(len(self.variable_mapping) + 1) 1048 self.variable_mapping[var] = replacement 1049 var = replacement 1050 # This is a nameless value, prepend check_prefix. 1051 if var.isdigit(): 1052 var = check_prefix + var 1053 else: 1054 # This is a named value that clashes with the check_prefix, prepend with 1055 # _prefix_filecheck_ir_name, if it has been defined. 1056 if ( 1057 may_clash_with_default_check_prefix_name(check_prefix, var) 1058 and _prefix_filecheck_ir_name 1059 ): 1060 var = _prefix_filecheck_ir_name + var 1061 var = var.replace(".", "_") 1062 var = var.replace("-", "_") 1063 return var.upper() 1064 1065 def get_affixes_from_match(self, match): 1066 prefix = re.match(self.ir_prefix, match.group(2)).group(0) 1067 suffix = re.search(self.ir_suffix + "$", match.group(2)).group(0) 1068 return prefix, suffix 1069 1070 1071class GeneralizerInfo: 1072 """ 1073 A GeneralizerInfo object holds information about how check lines should be generalized 1074 (e.g., variable names replaced by FileCheck meta variables) as well as per-test-file 1075 state (e.g. information about IR global variables). 1076 """ 1077 1078 MODE_IR = 0 1079 MODE_ASM = 1 1080 MODE_ANALYZE = 2 1081 1082 def __init__( 1083 self, 1084 version, 1085 mode, 1086 nameless_values: List[NamelessValue], 1087 regexp_prefix, 1088 regexp_suffix, 1089 ): 1090 self._version = version 1091 self._mode = mode 1092 self._nameless_values = nameless_values 1093 1094 self._regexp_prefix = regexp_prefix 1095 self._regexp_suffix = regexp_suffix 1096 1097 self._regexp, _ = self._build_regexp(False, False) 1098 ( 1099 self._unstable_globals_regexp, 1100 self._unstable_globals_values, 1101 ) = self._build_regexp(True, True) 1102 1103 def _build_regexp(self, globals_only, unstable_only): 1104 matches = [] 1105 values = [] 1106 for nameless_value in self._nameless_values: 1107 is_global = nameless_value.global_ir_rhs_regexp is not None 1108 if globals_only and not is_global: 1109 continue 1110 if unstable_only and nameless_value.match_literally: 1111 continue 1112 1113 match = f"(?:{nameless_value.ir_prefix}({nameless_value.ir_regexp}){nameless_value.ir_suffix})" 1114 if self.is_ir() and not globals_only and is_global: 1115 match = "^" + match 1116 matches.append(match) 1117 values.append(nameless_value) 1118 1119 regexp_string = r"|".join(matches) 1120 1121 return ( 1122 re.compile( 1123 self._regexp_prefix + r"(" + regexp_string + r")" + self._regexp_suffix 1124 ), 1125 values, 1126 ) 1127 1128 def get_version(self): 1129 return self._version 1130 1131 def is_ir(self): 1132 return self._mode == GeneralizerInfo.MODE_IR 1133 1134 def is_asm(self): 1135 return self._mode == GeneralizerInfo.MODE_ASM 1136 1137 def is_analyze(self): 1138 return self._mode == GeneralizerInfo.MODE_ANALYZE 1139 1140 def get_nameless_values(self): 1141 return self._nameless_values 1142 1143 def get_regexp(self): 1144 return self._regexp 1145 1146 def get_unstable_globals_regexp(self): 1147 return self._unstable_globals_regexp 1148 1149 # The entire match is group 0, the prefix has one group (=1), the entire 1150 # IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start. 1151 FIRST_NAMELESS_GROUP_IN_MATCH = 3 1152 1153 def get_match_info(self, match): 1154 """ 1155 Returns (name, nameless_value) for the given match object 1156 """ 1157 if match.re == self._regexp: 1158 values = self._nameless_values 1159 else: 1160 match.re == self._unstable_globals_regexp 1161 values = self._unstable_globals_values 1162 for i in range(len(values)): 1163 g = match.group(i + GeneralizerInfo.FIRST_NAMELESS_GROUP_IN_MATCH) 1164 if g is not None: 1165 return g, values[i] 1166 error("Unable to identify the kind of IR value from the match!") 1167 return None, None 1168 1169 # See get_idx_from_match 1170 def get_name_from_match(self, match): 1171 return self.get_match_info(match)[0] 1172 1173 def get_nameless_value_from_match(self, match) -> NamelessValue: 1174 return self.get_match_info(match)[1] 1175 1176 1177def make_ir_generalizer(version): 1178 values = [] 1179 1180 if version >= 5: 1181 values += [ 1182 NamelessValue(r"BB", "%", r"label %", r"[\w$.-]+?", None), 1183 NamelessValue(r"BB", "%", r"^", r"[\w$.-]+?", None, ir_suffix=r":"), 1184 ] 1185 1186 values += [ 1187 # check_prefix check_key ir_prefix ir_regexp global_ir_rhs_regexp 1188 NamelessValue(r"TMP", "%", r"%", r"[\w$.-]+?", None), 1189 NamelessValue(r"ATTR", "#", r"#", r"[0-9]+", None), 1190 NamelessValue(r"ATTR", "#", r"attributes #", r"[0-9]+", r"{[^}]*}"), 1191 NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", None), 1192 NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", r".+", is_before_functions=True), 1193 NamelessValue( 1194 r"GLOBNAMED", 1195 "@", 1196 r"@", 1197 r"[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*", 1198 r".+", 1199 is_before_functions=True, 1200 match_literally=True, 1201 interlaced_with_previous=True, 1202 ), 1203 NamelessValue(r"DBG", "!", r"!dbg ", r"![0-9]+", None), 1204 NamelessValue(r"DIASSIGNID", "!", r"!DIAssignID ", r"![0-9]+", None), 1205 NamelessValue(r"PROF", "!", r"!prof ", r"![0-9]+", None), 1206 NamelessValue(r"TBAA", "!", r"!tbaa ", r"![0-9]+", None), 1207 NamelessValue(r"TBAA_STRUCT", "!", r"!tbaa.struct ", r"![0-9]+", None), 1208 NamelessValue(r"RNG", "!", r"!range ", r"![0-9]+", None), 1209 NamelessValue(r"LOOP", "!", r"!llvm.loop ", r"![0-9]+", None), 1210 NamelessValue(r"META", "!", r"", r"![0-9]+", r"(?:distinct |)!.*"), 1211 NamelessValue(r"ACC_GRP", "!", r"!llvm.access.group ", r"![0-9]+", None), 1212 NamelessValue(r"META", "!", r"![a-z.]+ ", r"![0-9]+", None), 1213 NamelessValue(r"META", "!", r"[, (]", r"![0-9]+", None), 1214 ] 1215 1216 prefix = r"(\s*)" 1217 suffix = r"([,\s\(\)\}]|\Z)" 1218 1219 # values = [ 1220 # nameless_value 1221 # for nameless_value in IR_NAMELESS_VALUES 1222 # if not (globals_only and nameless_value.global_ir_rhs_regexp is None) and 1223 # not (unstable_ids_only and nameless_value.match_literally) 1224 # ] 1225 1226 return GeneralizerInfo(version, GeneralizerInfo.MODE_IR, values, prefix, suffix) 1227 1228 1229def make_asm_generalizer(version): 1230 values = [ 1231 NamelessValue( 1232 r"MCINST", 1233 "Inst#", 1234 "<MCInst #", 1235 r"\d+", 1236 r".+", 1237 is_number=True, 1238 replace_number_with_counter=True, 1239 ), 1240 NamelessValue( 1241 r"MCREG", 1242 "Reg:", 1243 "<MCOperand Reg:", 1244 r"\d+", 1245 r".+", 1246 is_number=True, 1247 replace_number_with_counter=True, 1248 ), 1249 ] 1250 1251 prefix = r"((?:#|//)\s*)" 1252 suffix = r"([>\s]|\Z)" 1253 1254 return GeneralizerInfo(version, GeneralizerInfo.MODE_ASM, values, prefix, suffix) 1255 1256 1257def make_analyze_generalizer(version): 1258 values = [ 1259 NamelessValue( 1260 r"GRP", 1261 "#", 1262 r"", 1263 r"0x[0-9a-f]+", 1264 None, 1265 replace_number_with_counter=True, 1266 ), 1267 ] 1268 1269 prefix = r"(\s*)" 1270 suffix = r"(\)?:)" 1271 1272 return GeneralizerInfo( 1273 version, GeneralizerInfo.MODE_ANALYZE, values, prefix, suffix 1274 ) 1275 1276 1277# Return true if var clashes with the scripted FileCheck check_prefix. 1278def may_clash_with_default_check_prefix_name(check_prefix, var): 1279 return check_prefix and re.match( 1280 r"^" + check_prefix + r"[0-9]+?$", var, re.IGNORECASE 1281 ) 1282 1283 1284def find_diff_matching(lhs: List[str], rhs: List[str]) -> List[tuple]: 1285 """ 1286 Find a large ordered matching between strings in lhs and rhs. 1287 1288 Think of this as finding the *unchanged* lines in a diff, where the entries 1289 of lhs and rhs are lines of the files being diffed. 1290 1291 Returns a list of matched (lhs_idx, rhs_idx) pairs. 1292 """ 1293 1294 if not lhs or not rhs: 1295 return [] 1296 1297 # Collect matches in reverse order. 1298 matches = [] 1299 1300 # First, collect a set of candidate matching edges. We limit this to a 1301 # constant multiple of the input size to avoid quadratic runtime. 1302 patterns = collections.defaultdict(lambda: ([], [])) 1303 1304 for idx in range(len(lhs)): 1305 patterns[lhs[idx]][0].append(idx) 1306 for idx in range(len(rhs)): 1307 patterns[rhs[idx]][1].append(idx) 1308 1309 multiple_patterns = [] 1310 1311 candidates = [] 1312 for pattern in patterns.values(): 1313 if not pattern[0] or not pattern[1]: 1314 continue 1315 1316 if len(pattern[0]) == len(pattern[1]) == 1: 1317 candidates.append((pattern[0][0], pattern[1][0])) 1318 else: 1319 multiple_patterns.append(pattern) 1320 1321 multiple_patterns.sort(key=lambda pattern: len(pattern[0]) * len(pattern[1])) 1322 1323 for pattern in multiple_patterns: 1324 if len(candidates) + len(pattern[0]) * len(pattern[1]) > 2 * ( 1325 len(lhs) + len(rhs) 1326 ): 1327 break 1328 for lhs_idx in pattern[0]: 1329 for rhs_idx in pattern[1]: 1330 candidates.append((lhs_idx, rhs_idx)) 1331 1332 if not candidates: 1333 # The LHS and RHS either share nothing in common, or lines are just too 1334 # identical. In that case, let's give up and not match anything. 1335 return [] 1336 1337 # Compute a maximal crossing-free matching via an algorithm that is 1338 # inspired by a mixture of dynamic programming and line-sweeping in 1339 # discrete geometry. 1340 # 1341 # I would be surprised if this algorithm didn't exist somewhere in the 1342 # literature, but I found it without consciously recalling any 1343 # references, so you'll have to make do with the explanation below. 1344 # Sorry. 1345 # 1346 # The underlying graph is bipartite: 1347 # - nodes on the LHS represent lines in the original check 1348 # - nodes on the RHS represent lines in the new (updated) check 1349 # 1350 # Nodes are implicitly sorted by the corresponding line number. 1351 # Edges (unique_matches) are sorted by the line number on the LHS. 1352 # 1353 # Here's the geometric intuition for the algorithm. 1354 # 1355 # * Plot the edges as points in the plane, with the original line 1356 # number on the X axis and the updated line number on the Y axis. 1357 # * The goal is to find a longest "chain" of points where each point 1358 # is strictly above and to the right of the previous point. 1359 # * The algorithm proceeds by sweeping a vertical line from left to 1360 # right. 1361 # * The algorithm maintains a table where `table[N]` answers the 1362 # question "What is currently the 'best' way to build a chain of N+1 1363 # points to the left of the vertical line". Here, 'best' means 1364 # that the last point of the chain is a as low as possible (minimal 1365 # Y coordinate). 1366 # * `table[N]` is `(y, point_idx)` where `point_idx` is the index of 1367 # the last point in the chain and `y` is its Y coordinate 1368 # * A key invariant is that the Y values in the table are 1369 # monotonically increasing 1370 # * Thanks to these properties, the table can be used to answer the 1371 # question "What is the longest chain that can be built to the left 1372 # of the vertical line using only points below a certain Y value", 1373 # using a binary search over the table. 1374 # * The algorithm also builds a backlink structure in which every point 1375 # links back to the previous point on a best (longest) chain ending 1376 # at that point 1377 # 1378 # The core loop of the algorithm sweeps the line and updates the table 1379 # and backlink structure for every point that we cross during the sweep. 1380 # Therefore, the algorithm is trivially O(M log M) in the number of 1381 # points. 1382 candidates.sort(key=lambda candidate: (candidate[0], -candidate[1])) 1383 1384 backlinks = [] 1385 table_rhs_idx = [] 1386 table_candidate_idx = [] 1387 for _, rhs_idx in candidates: 1388 candidate_idx = len(backlinks) 1389 ti = bisect.bisect_left(table_rhs_idx, rhs_idx) 1390 1391 # Update the table to record a best chain ending in the current point. 1392 # There always is one, and if any of the previously visited points had 1393 # a higher Y coordinate, then there is always a previously recorded best 1394 # chain that can be improved upon by using the current point. 1395 # 1396 # There is only one case where there is some ambiguity. If the 1397 # pre-existing entry table[ti] has the same Y coordinate / rhs_idx as 1398 # the current point (this can only happen if the same line appeared 1399 # multiple times on the LHS), then we could choose to keep the 1400 # previously recorded best chain instead. That would bias the algorithm 1401 # differently but should have no systematic impact on the quality of the 1402 # result. 1403 if ti < len(table_rhs_idx): 1404 table_rhs_idx[ti] = rhs_idx 1405 table_candidate_idx[ti] = candidate_idx 1406 else: 1407 table_rhs_idx.append(rhs_idx) 1408 table_candidate_idx.append(candidate_idx) 1409 if ti > 0: 1410 backlinks.append(table_candidate_idx[ti - 1]) 1411 else: 1412 backlinks.append(None) 1413 1414 # Commit to names in the matching by walking the backlinks. Recursively 1415 # attempt to fill in more matches in-between. 1416 match_idx = table_candidate_idx[-1] 1417 while match_idx is not None: 1418 current = candidates[match_idx] 1419 matches.append(current) 1420 match_idx = backlinks[match_idx] 1421 1422 matches.reverse() 1423 return matches 1424 1425 1426VARIABLE_TAG = "[[@@]]" 1427METAVAR_RE = re.compile(r"\[\[([A-Z0-9_]+)(?::[^]]+)?\]\]") 1428NUMERIC_SUFFIX_RE = re.compile(r"[0-9]*$") 1429 1430 1431class TestVar: 1432 def __init__(self, nameless_value: NamelessValue, prefix: str, suffix: str): 1433 self._nameless_value = nameless_value 1434 1435 self._prefix = prefix 1436 self._suffix = suffix 1437 1438 def seen(self, nameless_value: NamelessValue, prefix: str, suffix: str): 1439 if prefix != self._prefix: 1440 self._prefix = "" 1441 if suffix != self._suffix: 1442 self._suffix = "" 1443 1444 def get_variable_name(self, text): 1445 return self._nameless_value.get_value_name( 1446 text, self._nameless_value.check_prefix 1447 ) 1448 1449 def get_def(self, name, prefix, suffix): 1450 if self._nameless_value.is_number: 1451 return f"{prefix}[[#{name}:]]{suffix}" 1452 if self._prefix: 1453 assert self._prefix == prefix 1454 prefix = "" 1455 if self._suffix: 1456 assert self._suffix == suffix 1457 suffix = "" 1458 return f"{prefix}[[{name}:{self._prefix}{self._nameless_value.get_ir_regex()}{self._suffix}]]{suffix}" 1459 1460 def get_use(self, name, prefix, suffix): 1461 if self._nameless_value.is_number: 1462 return f"{prefix}[[#{name}]]{suffix}" 1463 if self._prefix: 1464 assert self._prefix == prefix 1465 prefix = "" 1466 if self._suffix: 1467 assert self._suffix == suffix 1468 suffix = "" 1469 return f"{prefix}[[{name}]]{suffix}" 1470 1471 1472class CheckValueInfo: 1473 def __init__( 1474 self, 1475 key, 1476 text, 1477 name: str, 1478 prefix: str, 1479 suffix: str, 1480 ): 1481 # Key for the value, e.g. '%' 1482 self.key = key 1483 1484 # Text to be matched by the FileCheck variable (without any prefix or suffix) 1485 self.text = text 1486 1487 # Name of the FileCheck variable 1488 self.name = name 1489 1490 # Prefix and suffix that were captured by the NamelessValue regular expression 1491 self.prefix = prefix 1492 self.suffix = suffix 1493 1494 1495# Represent a check line in a way that allows us to compare check lines while 1496# ignoring some or all of the FileCheck variable names. 1497class CheckLineInfo: 1498 def __init__(self, line, values): 1499 # Line with all FileCheck variable name occurrences replaced by VARIABLE_TAG 1500 self.line: str = line 1501 1502 # Information on each FileCheck variable name occurrences in the line 1503 self.values: List[CheckValueInfo] = values 1504 1505 def __repr__(self): 1506 return f"CheckLineInfo(line={self.line}, self.values={self.values})" 1507 1508 1509def remap_metavar_names( 1510 old_line_infos: List[CheckLineInfo], 1511 new_line_infos: List[CheckLineInfo], 1512 committed_names: Set[str], 1513) -> Mapping[str, str]: 1514 """ 1515 Map all FileCheck variable names that appear in new_line_infos to new 1516 FileCheck variable names in an attempt to reduce the diff from old_line_infos 1517 to new_line_infos. 1518 1519 This is done by: 1520 * Matching old check lines and new check lines using a diffing algorithm 1521 applied after replacing names with wildcards. 1522 * Committing to variable names such that the matched lines become equal 1523 (without wildcards) if possible 1524 * This is done recursively to handle cases where many lines are equal 1525 after wildcard replacement 1526 """ 1527 # Initialize uncommitted identity mappings 1528 new_mapping = {} 1529 for line in new_line_infos: 1530 for value in line.values: 1531 new_mapping[value.name] = value.name 1532 1533 # Recursively commit to the identity mapping or find a better one 1534 def recurse(old_begin, old_end, new_begin, new_end): 1535 if old_begin == old_end or new_begin == new_end: 1536 return 1537 1538 # Find a matching of lines where uncommitted names are replaced 1539 # with a placeholder. 1540 def diffify_line(line, mapper): 1541 values = [] 1542 for value in line.values: 1543 mapped = mapper(value.name) 1544 values.append(mapped if mapped in committed_names else "?") 1545 return line.line.strip() + " @@@ " + " @ ".join(values) 1546 1547 lhs_lines = [ 1548 diffify_line(line, lambda x: x) 1549 for line in old_line_infos[old_begin:old_end] 1550 ] 1551 rhs_lines = [ 1552 diffify_line(line, lambda x: new_mapping[x]) 1553 for line in new_line_infos[new_begin:new_end] 1554 ] 1555 1556 candidate_matches = find_diff_matching(lhs_lines, rhs_lines) 1557 1558 candidate_matches = [ 1559 (old_begin + lhs_idx, new_begin + rhs_idx) 1560 for lhs_idx, rhs_idx in candidate_matches 1561 ] 1562 1563 # Candidate matches may conflict if they require conflicting mappings of 1564 # names. We want to determine a large set of compatible candidates, 1565 # because that leads to a small diff. 1566 # 1567 # We think of the candidates as vertices in a conflict graph. The 1568 # conflict graph has edges between incompatible candidates. We want to 1569 # find a large independent set in this graph. 1570 # 1571 # Greedily selecting candidates and removing incompatible ones has the 1572 # disadvantage that making few bad decisions early on can have huge 1573 # consequences. 1574 # 1575 # Instead, we implicitly compute multiple independent sets by greedily 1576 # assigning a *coloring* to the conflict graph. Then, we select the 1577 # largest color class (which is the largest independent set we found), 1578 # commit to all candidates in it, and recurse. 1579 # 1580 # Note that we don't actually materialize the conflict graph. Instead, 1581 # each color class tracks the information needed to decide implicitly 1582 # whether a vertex conflicts (has an edge to) any of the vertices added 1583 # to the color class so far. 1584 class Color: 1585 def __init__(self): 1586 # (lhs_idx, rhs_idx) of matches in this color 1587 self.matches = [] 1588 1589 # rhs_name -> lhs_name mappings required by this color 1590 self.mapping = {} 1591 1592 # lhs_names committed for this color 1593 self.committed = set() 1594 1595 colors = [] 1596 1597 for lhs_idx, rhs_idx in candidate_matches: 1598 lhs_line = old_line_infos[lhs_idx] 1599 rhs_line = new_line_infos[rhs_idx] 1600 1601 # We scan through the uncommitted names in the candidate line and 1602 # filter out the color classes to which the candidate could be 1603 # assigned. 1604 # 1605 # Simultaneously, we prepare a new color class in case the candidate 1606 # conflicts with all colors that have been established so far. 1607 compatible_colors = colors[:] 1608 new_color = Color() 1609 new_color.matches.append((lhs_idx, rhs_idx)) 1610 1611 for lhs_value, rhs_value in zip(lhs_line.values, rhs_line.values): 1612 if new_mapping[rhs_value.name] in committed_names: 1613 # The new value has already been committed. If it was mapped 1614 # to the same name as the original value, we can consider 1615 # committing other values from this line. Otherwise, we 1616 # should ignore this line. 1617 if new_mapping[rhs_value.name] == lhs_value.name: 1618 continue 1619 else: 1620 break 1621 1622 if rhs_value.name in new_color.mapping: 1623 # Same, but for a possible commit happening on the same line 1624 if new_color.mapping[rhs_value.name] == lhs_value.name: 1625 continue 1626 else: 1627 break 1628 1629 if ( 1630 lhs_value.name in committed_names 1631 or lhs_value.name in new_color.committed 1632 ): 1633 # We can't map this value because the name we would map it 1634 # to has already been committed for something else. Give up 1635 # on this line. 1636 break 1637 1638 new_color.mapping[rhs_value.name] = lhs_value.name 1639 new_color.committed.add(lhs_value.name) 1640 1641 color_idx = 0 1642 while color_idx < len(compatible_colors): 1643 color = compatible_colors[color_idx] 1644 compatible = True 1645 if rhs_value.name in color.mapping: 1646 compatible = color.mapping[rhs_value.name] == lhs_value.name 1647 else: 1648 compatible = lhs_value.name not in color.committed 1649 if compatible: 1650 color_idx += 1 1651 else: 1652 del compatible_colors[color_idx] 1653 else: 1654 # We never broke out of the loop, which means that at a minimum, 1655 # this line is viable standalone 1656 if compatible_colors: 1657 color = max(compatible_colors, key=lambda color: len(color.matches)) 1658 color.mapping.update(new_color.mapping) 1659 color.committed.update(new_color.committed) 1660 color.matches.append((lhs_idx, rhs_idx)) 1661 else: 1662 colors.append(new_color) 1663 1664 if colors: 1665 # Pick the largest color class. This gives us a large independent 1666 # (non-conflicting) set of candidate matches. Assign all names 1667 # required by the independent set and recurse. 1668 max_color = max(colors, key=lambda color: len(color.matches)) 1669 1670 for rhs_var, lhs_var in max_color.mapping.items(): 1671 new_mapping[rhs_var] = lhs_var 1672 committed_names.add(lhs_var) 1673 1674 if ( 1675 lhs_var != rhs_var 1676 and lhs_var in new_mapping 1677 and new_mapping[lhs_var] == lhs_var 1678 ): 1679 new_mapping[lhs_var] = "conflict_" + lhs_var 1680 1681 matches = ( 1682 [(old_begin - 1, new_begin - 1)] 1683 + max_color.matches 1684 + [(old_end, new_end)] 1685 ) 1686 1687 for (lhs_prev, rhs_prev), (lhs_next, rhs_next) in zip(matches, matches[1:]): 1688 recurse(lhs_prev + 1, lhs_next, rhs_prev + 1, rhs_next) 1689 1690 recurse(0, len(old_line_infos), 0, len(new_line_infos)) 1691 1692 # Commit to remaining names and resolve conflicts 1693 for new_name, mapped_name in new_mapping.items(): 1694 if mapped_name in committed_names: 1695 continue 1696 if not mapped_name.startswith("conflict_"): 1697 assert mapped_name == new_name 1698 committed_names.add(mapped_name) 1699 1700 for new_name, mapped_name in new_mapping.items(): 1701 if mapped_name in committed_names: 1702 continue 1703 assert mapped_name.startswith("conflict_") 1704 1705 m = NUMERIC_SUFFIX_RE.search(new_name) 1706 base_name = new_name[: m.start()] 1707 suffix = int(new_name[m.start() :]) if m.start() != m.end() else 1 1708 while True: 1709 candidate = f"{base_name}{suffix}" 1710 if candidate not in committed_names: 1711 new_mapping[new_name] = candidate 1712 committed_names.add(candidate) 1713 break 1714 suffix += 1 1715 1716 return new_mapping 1717 1718 1719def generalize_check_lines( 1720 lines, 1721 ginfo: GeneralizerInfo, 1722 vars_seen, 1723 global_vars_seen, 1724 preserve_names=False, 1725 original_check_lines=None, 1726 *, 1727 unstable_globals_only=False, 1728): 1729 if unstable_globals_only: 1730 regexp = ginfo.get_unstable_globals_regexp() 1731 else: 1732 regexp = ginfo.get_regexp() 1733 1734 multiple_braces_re = re.compile(r"({{+)|(}}+)") 1735 1736 def escape_braces(match_obj): 1737 return "{{" + re.escape(match_obj.group(0)) + "}}" 1738 1739 if ginfo.is_ir(): 1740 for i, line in enumerate(lines): 1741 # An IR variable named '%.' matches the FileCheck regex string. 1742 line = line.replace("%.", "%dot") 1743 for regex in _global_hex_value_regex: 1744 if re.match("^@" + regex + " = ", line): 1745 line = re.sub( 1746 r"\bi([0-9]+) ([0-9]+)", 1747 lambda m: "i" 1748 + m.group(1) 1749 + " [[#" 1750 + hex(int(m.group(2))) 1751 + "]]", 1752 line, 1753 ) 1754 break 1755 # Ignore any comments, since the check lines will too. 1756 scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line) 1757 lines[i] = scrubbed_line 1758 1759 if not preserve_names: 1760 committed_names = set( 1761 test_var.get_variable_name(name) 1762 for (name, _), test_var in vars_seen.items() 1763 ) 1764 defs = set() 1765 1766 # Collect information about new check lines, and generalize global reference 1767 new_line_infos = [] 1768 for line in lines: 1769 filtered_line = "" 1770 values = [] 1771 while True: 1772 m = regexp.search(line) 1773 if m is None: 1774 filtered_line += line 1775 break 1776 1777 name = ginfo.get_name_from_match(m) 1778 nameless_value = ginfo.get_nameless_value_from_match(m) 1779 prefix, suffix = nameless_value.get_affixes_from_match(m) 1780 if may_clash_with_default_check_prefix_name( 1781 nameless_value.check_prefix, name 1782 ): 1783 warn( 1784 "Change IR value name '%s' or use --prefix-filecheck-ir-name to prevent possible conflict" 1785 " with scripted FileCheck name." % (name,) 1786 ) 1787 1788 # Record the variable as seen and (for locals) accumulate 1789 # prefixes/suffixes 1790 is_local_def = nameless_value.is_local_def_ir_value() 1791 if is_local_def: 1792 vars_dict = vars_seen 1793 else: 1794 vars_dict = global_vars_seen 1795 1796 key = (name, nameless_value.check_key) 1797 1798 if is_local_def: 1799 test_prefix = prefix 1800 test_suffix = suffix 1801 else: 1802 test_prefix = "" 1803 test_suffix = "" 1804 1805 if key in vars_dict: 1806 vars_dict[key].seen(nameless_value, test_prefix, test_suffix) 1807 else: 1808 vars_dict[key] = TestVar(nameless_value, test_prefix, test_suffix) 1809 defs.add(key) 1810 1811 var = vars_dict[key].get_variable_name(name) 1812 1813 # Replace with a [[@@]] tag, but be sure to keep the spaces and commas. 1814 filtered_line += ( 1815 line[: m.start()] + m.group(1) + VARIABLE_TAG + m.group(m.lastindex) 1816 ) 1817 line = line[m.end() :] 1818 1819 values.append( 1820 CheckValueInfo( 1821 key=nameless_value.check_key, 1822 text=name, 1823 name=var, 1824 prefix=prefix, 1825 suffix=suffix, 1826 ) 1827 ) 1828 1829 new_line_infos.append(CheckLineInfo(filtered_line, values)) 1830 1831 committed_names.update( 1832 test_var.get_variable_name(name) 1833 for (name, _), test_var in global_vars_seen.items() 1834 ) 1835 1836 # Collect information about original check lines, if any. 1837 orig_line_infos = [] 1838 for line in original_check_lines or []: 1839 filtered_line = "" 1840 values = [] 1841 while True: 1842 m = METAVAR_RE.search(line) 1843 if m is None: 1844 filtered_line += line 1845 break 1846 1847 # Replace with a [[@@]] tag, but be sure to keep the spaces and commas. 1848 filtered_line += line[: m.start()] + VARIABLE_TAG 1849 line = line[m.end() :] 1850 values.append( 1851 CheckValueInfo( 1852 key=None, 1853 text=None, 1854 name=m.group(1), 1855 prefix="", 1856 suffix="", 1857 ) 1858 ) 1859 orig_line_infos.append(CheckLineInfo(filtered_line, values)) 1860 1861 # Compute the variable name mapping 1862 mapping = remap_metavar_names(orig_line_infos, new_line_infos, committed_names) 1863 1864 # Apply the variable name mapping 1865 for i, line_info in enumerate(new_line_infos): 1866 line_template = line_info.line 1867 line = "" 1868 1869 for value in line_info.values: 1870 idx = line_template.find(VARIABLE_TAG) 1871 line += line_template[:idx] 1872 line_template = line_template[idx + len(VARIABLE_TAG) :] 1873 1874 key = (value.text, value.key) 1875 if value.key == "%": 1876 vars_dict = vars_seen 1877 else: 1878 vars_dict = global_vars_seen 1879 1880 if key in defs: 1881 line += vars_dict[key].get_def( 1882 mapping[value.name], value.prefix, value.suffix 1883 ) 1884 defs.remove(key) 1885 else: 1886 line += vars_dict[key].get_use( 1887 mapping[value.name], value.prefix, value.suffix 1888 ) 1889 1890 line += line_template 1891 1892 lines[i] = line 1893 1894 if ginfo.is_analyze(): 1895 for i, _ in enumerate(lines): 1896 # Escape multiple {{ or }} as {{}} denotes a FileCheck regex. 1897 scrubbed_line = multiple_braces_re.sub(escape_braces, lines[i]) 1898 lines[i] = scrubbed_line 1899 1900 return lines 1901 1902 1903def add_checks( 1904 output_lines, 1905 comment_marker, 1906 prefix_list, 1907 func_dict, 1908 func_name, 1909 check_label_format, 1910 ginfo, 1911 global_vars_seen_dict, 1912 is_filtered, 1913 preserve_names=False, 1914 original_check_lines: Mapping[str, List[str]] = {}, 1915): 1916 # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well. 1917 prefix_exclusions = set() 1918 printed_prefixes = [] 1919 for p in prefix_list: 1920 checkprefixes = p[0] 1921 # If not all checkprefixes of this run line produced the function we cannot check for it as it does not 1922 # exist for this run line. A subset of the check prefixes might know about the function but only because 1923 # other run lines created it. 1924 if any( 1925 map( 1926 lambda checkprefix: func_name not in func_dict[checkprefix], 1927 checkprefixes, 1928 ) 1929 ): 1930 prefix_exclusions |= set(checkprefixes) 1931 continue 1932 1933 # prefix_exclusions is constructed, we can now emit the output 1934 for p in prefix_list: 1935 global_vars_seen = {} 1936 checkprefixes = p[0] 1937 for checkprefix in checkprefixes: 1938 if checkprefix in global_vars_seen_dict: 1939 global_vars_seen.update(global_vars_seen_dict[checkprefix]) 1940 else: 1941 global_vars_seen_dict[checkprefix] = {} 1942 if checkprefix in printed_prefixes: 1943 break 1944 1945 # Check if the prefix is excluded. 1946 if checkprefix in prefix_exclusions: 1947 continue 1948 1949 # If we do not have output for this prefix we skip it. 1950 if not func_dict[checkprefix][func_name]: 1951 continue 1952 1953 # Add some space between different check prefixes, but not after the last 1954 # check line (before the test code). 1955 if ginfo.is_asm(): 1956 if len(printed_prefixes) != 0: 1957 output_lines.append(comment_marker) 1958 1959 if checkprefix not in global_vars_seen_dict: 1960 global_vars_seen_dict[checkprefix] = {} 1961 1962 global_vars_seen_before = [key for key in global_vars_seen.keys()] 1963 1964 vars_seen = {} 1965 printed_prefixes.append(checkprefix) 1966 attrs = str(func_dict[checkprefix][func_name].attrs) 1967 attrs = "" if attrs == "None" else attrs 1968 if ginfo.get_version() > 1: 1969 funcdef_attrs_and_ret = func_dict[checkprefix][ 1970 func_name 1971 ].funcdef_attrs_and_ret 1972 else: 1973 funcdef_attrs_and_ret = "" 1974 1975 if attrs: 1976 output_lines.append( 1977 "%s %s: Function Attrs: %s" % (comment_marker, checkprefix, attrs) 1978 ) 1979 args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig) 1980 if args_and_sig: 1981 args_and_sig = generalize_check_lines( 1982 [args_and_sig], 1983 ginfo, 1984 vars_seen, 1985 global_vars_seen, 1986 preserve_names, 1987 original_check_lines=[], 1988 )[0] 1989 func_name_separator = func_dict[checkprefix][func_name].func_name_separator 1990 if "[[" in args_and_sig: 1991 # Captures in label lines are not supported, thus split into a -LABEL 1992 # and a separate -SAME line that contains the arguments with captures. 1993 args_and_sig_prefix = "" 1994 if ginfo.get_version() >= 3 and args_and_sig.startswith("("): 1995 # Ensure the "(" separating function name and arguments is in the 1996 # label line. This is required in case of function names that are 1997 # prefixes of each other. Otherwise, the label line for "foo" might 1998 # incorrectly match on "foo.specialized". 1999 args_and_sig_prefix = args_and_sig[0] 2000 args_and_sig = args_and_sig[1:] 2001 2002 # Removing args_and_sig from the label match line requires 2003 # func_name_separator to be empty. Otherwise, the match will not work. 2004 assert func_name_separator == "" 2005 output_lines.append( 2006 check_label_format 2007 % ( 2008 checkprefix, 2009 funcdef_attrs_and_ret, 2010 func_name, 2011 args_and_sig_prefix, 2012 func_name_separator, 2013 ) 2014 ) 2015 output_lines.append( 2016 "%s %s-SAME: %s" % (comment_marker, checkprefix, args_and_sig) 2017 ) 2018 else: 2019 output_lines.append( 2020 check_label_format 2021 % ( 2022 checkprefix, 2023 funcdef_attrs_and_ret, 2024 func_name, 2025 args_and_sig, 2026 func_name_separator, 2027 ) 2028 ) 2029 func_body = str(func_dict[checkprefix][func_name]).splitlines() 2030 if not func_body: 2031 # We have filtered everything. 2032 continue 2033 2034 # For ASM output, just emit the check lines. 2035 if ginfo.is_asm(): 2036 body_start = 1 2037 if is_filtered: 2038 # For filtered output we don't add "-NEXT" so don't add extra spaces 2039 # before the first line. 2040 body_start = 0 2041 else: 2042 output_lines.append( 2043 "%s %s: %s" % (comment_marker, checkprefix, func_body[0]) 2044 ) 2045 func_lines = generalize_check_lines( 2046 func_body[body_start:], ginfo, vars_seen, global_vars_seen 2047 ) 2048 for func_line in func_lines: 2049 if func_line.strip() == "": 2050 output_lines.append( 2051 "%s %s-EMPTY:" % (comment_marker, checkprefix) 2052 ) 2053 else: 2054 check_suffix = "-NEXT" if not is_filtered else "" 2055 output_lines.append( 2056 "%s %s%s: %s" 2057 % (comment_marker, checkprefix, check_suffix, func_line) 2058 ) 2059 # Remember new global variables we have not seen before 2060 for key in global_vars_seen: 2061 if key not in global_vars_seen_before: 2062 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 2063 break 2064 # For analyze output, generalize the output, and emit CHECK-EMPTY lines as well. 2065 elif ginfo.is_analyze(): 2066 func_body = generalize_check_lines( 2067 func_body, ginfo, vars_seen, global_vars_seen 2068 ) 2069 for func_line in func_body: 2070 if func_line.strip() == "": 2071 output_lines.append( 2072 "{} {}-EMPTY:".format(comment_marker, checkprefix) 2073 ) 2074 else: 2075 check_suffix = "-NEXT" if not is_filtered else "" 2076 output_lines.append( 2077 "{} {}{}: {}".format( 2078 comment_marker, checkprefix, check_suffix, func_line 2079 ) 2080 ) 2081 2082 # Add space between different check prefixes and also before the first 2083 # line of code in the test function. 2084 output_lines.append(comment_marker) 2085 2086 # Remember new global variables we have not seen before 2087 for key in global_vars_seen: 2088 if key not in global_vars_seen_before: 2089 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 2090 break 2091 # For IR output, change all defs to FileCheck variables, so we're immune 2092 # to variable naming fashions. 2093 else: 2094 func_body = generalize_check_lines( 2095 func_body, 2096 ginfo, 2097 vars_seen, 2098 global_vars_seen, 2099 preserve_names, 2100 original_check_lines=original_check_lines.get(checkprefix), 2101 ) 2102 2103 # This could be selectively enabled with an optional invocation argument. 2104 # Disabled for now: better to check everything. Be safe rather than sorry. 2105 2106 # Handle the first line of the function body as a special case because 2107 # it's often just noise (a useless asm comment or entry label). 2108 # if func_body[0].startswith("#") or func_body[0].startswith("entry:"): 2109 # is_blank_line = True 2110 # else: 2111 # output_lines.append('%s %s: %s' % (comment_marker, checkprefix, func_body[0])) 2112 # is_blank_line = False 2113 2114 is_blank_line = False 2115 2116 for func_line in func_body: 2117 if func_line.strip() == "": 2118 is_blank_line = True 2119 continue 2120 # Do not waste time checking IR comments. 2121 func_line = SCRUB_IR_COMMENT_RE.sub(r"", func_line) 2122 2123 # Skip blank lines instead of checking them. 2124 if is_blank_line: 2125 output_lines.append( 2126 "{} {}: {}".format( 2127 comment_marker, checkprefix, func_line 2128 ) 2129 ) 2130 else: 2131 check_suffix = "-NEXT" if not is_filtered else "" 2132 output_lines.append( 2133 "{} {}{}: {}".format( 2134 comment_marker, checkprefix, check_suffix, func_line 2135 ) 2136 ) 2137 is_blank_line = False 2138 2139 # Add space between different check prefixes and also before the first 2140 # line of code in the test function. 2141 output_lines.append(comment_marker) 2142 2143 # Remember new global variables we have not seen before 2144 for key in global_vars_seen: 2145 if key not in global_vars_seen_before: 2146 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 2147 break 2148 return printed_prefixes 2149 2150 2151def add_ir_checks( 2152 output_lines, 2153 comment_marker, 2154 prefix_list, 2155 func_dict, 2156 func_name, 2157 preserve_names, 2158 function_sig, 2159 ginfo: GeneralizerInfo, 2160 global_vars_seen_dict, 2161 is_filtered, 2162 original_check_lines={}, 2163): 2164 assert ginfo.is_ir() 2165 # Label format is based on IR string. 2166 if function_sig and ginfo.get_version() > 1: 2167 function_def_regex = "define %s" 2168 elif function_sig: 2169 function_def_regex = "define {{[^@]+}}%s" 2170 else: 2171 function_def_regex = "%s" 2172 check_label_format = "{} %s-LABEL: {}@%s%s%s".format( 2173 comment_marker, function_def_regex 2174 ) 2175 return add_checks( 2176 output_lines, 2177 comment_marker, 2178 prefix_list, 2179 func_dict, 2180 func_name, 2181 check_label_format, 2182 ginfo, 2183 global_vars_seen_dict, 2184 is_filtered, 2185 preserve_names, 2186 original_check_lines=original_check_lines, 2187 ) 2188 2189 2190def add_analyze_checks( 2191 output_lines, 2192 comment_marker, 2193 prefix_list, 2194 func_dict, 2195 func_name, 2196 ginfo: GeneralizerInfo, 2197 is_filtered, 2198): 2199 assert ginfo.is_analyze() 2200 check_label_format = "{} %s-LABEL: '%s%s%s%s'".format(comment_marker) 2201 global_vars_seen_dict = {} 2202 return add_checks( 2203 output_lines, 2204 comment_marker, 2205 prefix_list, 2206 func_dict, 2207 func_name, 2208 check_label_format, 2209 ginfo, 2210 global_vars_seen_dict, 2211 is_filtered, 2212 ) 2213 2214 2215def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes, ginfo): 2216 for nameless_value in ginfo.get_nameless_values(): 2217 if nameless_value.global_ir_rhs_regexp is None: 2218 continue 2219 2220 lhs_re_str = nameless_value.ir_prefix + nameless_value.ir_regexp 2221 rhs_re_str = nameless_value.global_ir_rhs_regexp 2222 2223 global_ir_value_re_str = r"^" + lhs_re_str + r"\s=\s" + rhs_re_str + r"$" 2224 global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M)) 2225 lines = [] 2226 for m in global_ir_value_re.finditer(raw_tool_output): 2227 # Attach the substring's start index so that CHECK lines 2228 # can be sorted properly even if they are matched by different nameless values. 2229 # This is relevant for GLOB and GLOBNAMED since they may appear interlaced. 2230 lines.append((m.start(), m.group(0))) 2231 2232 for prefix in prefixes: 2233 if glob_val_dict[prefix] is None: 2234 continue 2235 if nameless_value.check_prefix in glob_val_dict[prefix]: 2236 if lines == glob_val_dict[prefix][nameless_value.check_prefix]: 2237 continue 2238 if prefix == prefixes[-1]: 2239 warn("Found conflicting asm under the same prefix: %r!" % (prefix,)) 2240 else: 2241 glob_val_dict[prefix][nameless_value.check_prefix] = None 2242 continue 2243 glob_val_dict[prefix][nameless_value.check_prefix] = lines 2244 2245 2246def filter_globals_according_to_preference( 2247 global_val_lines_w_index, global_vars_seen, nameless_value, global_check_setting 2248): 2249 if global_check_setting == "none": 2250 return [] 2251 if global_check_setting == "all": 2252 return global_val_lines_w_index 2253 assert global_check_setting == "smart" 2254 2255 if nameless_value.check_key == "#": 2256 # attribute sets are usually better checked by --check-attributes 2257 return [] 2258 2259 def extract(line, nv): 2260 p = ( 2261 "^" 2262 + nv.ir_prefix 2263 + "(" 2264 + nv.ir_regexp 2265 + ") = (" 2266 + nv.global_ir_rhs_regexp 2267 + ")" 2268 ) 2269 match = re.match(p, line) 2270 return (match.group(1), re.findall(nv.ir_regexp, match.group(2))) 2271 2272 transitively_visible = set() 2273 contains_refs_to = {} 2274 2275 def add(var): 2276 nonlocal transitively_visible 2277 nonlocal contains_refs_to 2278 if var in transitively_visible: 2279 return 2280 transitively_visible.add(var) 2281 if not var in contains_refs_to: 2282 return 2283 for x in contains_refs_to[var]: 2284 add(x) 2285 2286 for i, line in global_val_lines_w_index: 2287 (var, refs) = extract(line, nameless_value) 2288 contains_refs_to[var] = refs 2289 for var, check_key in global_vars_seen: 2290 if check_key != nameless_value.check_key: 2291 continue 2292 add(var) 2293 return [ 2294 (i, line) 2295 for i, line in global_val_lines_w_index 2296 if extract(line, nameless_value)[0] in transitively_visible 2297 ] 2298 2299 2300METADATA_FILTERS = [ 2301 ( 2302 r"(?<=\")(.+ )?(\w+ version )[\d.]+(?:[^\" ]*)(?: \([^)]+\))?", 2303 r"{{.*}}\2{{.*}}", 2304 ), # preface with glob also, to capture optional CLANG_VENDOR 2305 (r'(!DIFile\(filename: ".+", directory: )".+"', r"\1{{.*}}"), 2306] 2307METADATA_FILTERS_RE = [(re.compile(f), r) for (f, r) in METADATA_FILTERS] 2308 2309 2310def filter_unstable_metadata(line): 2311 for f, replacement in METADATA_FILTERS_RE: 2312 line = f.sub(replacement, line) 2313 return line 2314 2315 2316def flush_current_checks(output_lines, new_lines_w_index, comment_marker): 2317 if not new_lines_w_index: 2318 return 2319 output_lines.append(comment_marker + SEPARATOR) 2320 new_lines_w_index.sort() 2321 for _, line in new_lines_w_index: 2322 output_lines.append(line) 2323 new_lines_w_index.clear() 2324 2325 2326def add_global_checks( 2327 glob_val_dict, 2328 comment_marker, 2329 prefix_list, 2330 output_lines, 2331 ginfo: GeneralizerInfo, 2332 global_vars_seen_dict, 2333 preserve_names, 2334 is_before_functions, 2335 global_check_setting, 2336): 2337 printed_prefixes = set() 2338 output_lines_loc = {} # Allows GLOB and GLOBNAMED to be sorted correctly 2339 for nameless_value in ginfo.get_nameless_values(): 2340 if nameless_value.global_ir_rhs_regexp is None: 2341 continue 2342 if nameless_value.is_before_functions != is_before_functions: 2343 continue 2344 for p in prefix_list: 2345 global_vars_seen = {} 2346 checkprefixes = p[0] 2347 if checkprefixes is None: 2348 continue 2349 for checkprefix in checkprefixes: 2350 if checkprefix in global_vars_seen_dict: 2351 global_vars_seen.update(global_vars_seen_dict[checkprefix]) 2352 else: 2353 global_vars_seen_dict[checkprefix] = {} 2354 if (checkprefix, nameless_value.check_prefix) in printed_prefixes: 2355 break 2356 if not glob_val_dict[checkprefix]: 2357 continue 2358 if nameless_value.check_prefix not in glob_val_dict[checkprefix]: 2359 continue 2360 if not glob_val_dict[checkprefix][nameless_value.check_prefix]: 2361 continue 2362 2363 check_lines = [] 2364 global_vars_seen_before = [key for key in global_vars_seen.keys()] 2365 lines_w_index = glob_val_dict[checkprefix][nameless_value.check_prefix] 2366 lines_w_index = filter_globals_according_to_preference( 2367 lines_w_index, 2368 global_vars_seen_before, 2369 nameless_value, 2370 global_check_setting, 2371 ) 2372 for i, line in lines_w_index: 2373 if _global_value_regex: 2374 matched = False 2375 for regex in _global_value_regex: 2376 if re.match("^@" + regex + " = ", line) or re.match( 2377 "^!" + regex + " = ", line 2378 ): 2379 matched = True 2380 break 2381 if not matched: 2382 continue 2383 [new_line] = generalize_check_lines( 2384 [line], 2385 ginfo, 2386 {}, 2387 global_vars_seen, 2388 preserve_names, 2389 unstable_globals_only=True, 2390 ) 2391 new_line = filter_unstable_metadata(new_line) 2392 check_line = "%s %s: %s" % (comment_marker, checkprefix, new_line) 2393 check_lines.append((i, check_line)) 2394 if not check_lines: 2395 continue 2396 2397 if not checkprefix in output_lines_loc: 2398 output_lines_loc[checkprefix] = [] 2399 if not nameless_value.interlaced_with_previous: 2400 flush_current_checks( 2401 output_lines, output_lines_loc[checkprefix], comment_marker 2402 ) 2403 for check_line in check_lines: 2404 output_lines_loc[checkprefix].append(check_line) 2405 2406 printed_prefixes.add((checkprefix, nameless_value.check_prefix)) 2407 2408 # Remembe new global variables we have not seen before 2409 for key in global_vars_seen: 2410 if key not in global_vars_seen_before: 2411 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 2412 break 2413 2414 if printed_prefixes: 2415 for p in prefix_list: 2416 if p[0] is None: 2417 continue 2418 for checkprefix in p[0]: 2419 if checkprefix not in output_lines_loc: 2420 continue 2421 flush_current_checks( 2422 output_lines, output_lines_loc[checkprefix], comment_marker 2423 ) 2424 break 2425 output_lines.append(comment_marker + SEPARATOR) 2426 return printed_prefixes 2427 2428 2429def check_prefix(prefix): 2430 if not PREFIX_RE.match(prefix): 2431 hint = "" 2432 if "," in prefix: 2433 hint = " Did you mean '--check-prefixes=" + prefix + "'?" 2434 warn( 2435 ( 2436 "Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." 2437 + hint 2438 ) 2439 % (prefix) 2440 ) 2441 2442 2443def get_check_prefixes(filecheck_cmd): 2444 check_prefixes = [ 2445 item 2446 for m in CHECK_PREFIX_RE.finditer(filecheck_cmd) 2447 for item in m.group(1).split(",") 2448 ] 2449 if not check_prefixes: 2450 check_prefixes = ["CHECK"] 2451 return check_prefixes 2452 2453 2454def verify_filecheck_prefixes(fc_cmd): 2455 fc_cmd_parts = fc_cmd.split() 2456 for part in fc_cmd_parts: 2457 if "check-prefix=" in part: 2458 prefix = part.split("=", 1)[1] 2459 check_prefix(prefix) 2460 elif "check-prefixes=" in part: 2461 prefixes = part.split("=", 1)[1].split(",") 2462 for prefix in prefixes: 2463 check_prefix(prefix) 2464 if prefixes.count(prefix) > 1: 2465 warn( 2466 "Supplied prefix '%s' is not unique in the prefix list." 2467 % (prefix,) 2468 ) 2469 2470 2471def get_autogennote_suffix(parser, args): 2472 autogenerated_note_args = "" 2473 for action in parser._actions: 2474 if not hasattr(args, action.dest): 2475 continue # Ignore options such as --help that aren't included in args 2476 # Ignore parameters such as paths to the binary or the list of tests 2477 if action.dest in ( 2478 "tests", 2479 "update_only", 2480 "tool_binary", 2481 "opt_binary", 2482 "llc_binary", 2483 "clang", 2484 "opt", 2485 "llvm_bin", 2486 "verbose", 2487 "force_update", 2488 "reset_variable_names", 2489 "llvm_mc_binary", 2490 ): 2491 continue 2492 value = getattr(args, action.dest) 2493 if action.dest == "check_globals": 2494 default_value = "none" if args.version < 4 else "smart" 2495 if value == default_value: 2496 continue 2497 autogenerated_note_args += action.option_strings[0] + " " 2498 if args.version < 4 and value == "all": 2499 continue 2500 autogenerated_note_args += "%s " % value 2501 continue 2502 if action.const is not None: # action stores a constant (usually True/False) 2503 # Skip actions with different constant values (this happens with boolean 2504 # --foo/--no-foo options) 2505 if value != action.const: 2506 continue 2507 if parser.get_default(action.dest) == value: 2508 continue # Don't add default values 2509 if action.dest == "function_signature" and args.version >= 2: 2510 continue # Enabled by default in version 2 2511 if action.dest == "filters": 2512 # Create a separate option for each filter element. The value is a list 2513 # of Filter objects. 2514 for elem in value: 2515 opt_name = "filter-out" if elem.is_filter_out else "filter" 2516 opt_value = elem.pattern() 2517 new_arg = '--%s "%s" ' % (opt_name, opt_value.strip('"')) 2518 if new_arg not in autogenerated_note_args: 2519 autogenerated_note_args += new_arg 2520 else: 2521 autogenerated_note_args += action.option_strings[0] + " " 2522 if action.const is None: # action takes a parameter 2523 if action.nargs == "+": 2524 value = " ".join(map(lambda v: '"' + v.strip('"') + '"', value)) 2525 autogenerated_note_args += "%s " % value 2526 if autogenerated_note_args: 2527 autogenerated_note_args = " %s %s" % ( 2528 UTC_ARGS_KEY, 2529 autogenerated_note_args[:-1], 2530 ) 2531 return autogenerated_note_args 2532 2533 2534def check_for_command(line, parser, args, argv, argparse_callback): 2535 cmd_m = UTC_ARGS_CMD.match(line) 2536 if cmd_m: 2537 for option in shlex.split(cmd_m.group("cmd").strip()): 2538 if option: 2539 argv.append(option) 2540 args = parse_args(parser, filter(lambda arg: arg not in args.tests, argv)) 2541 if argparse_callback is not None: 2542 argparse_callback(args) 2543 return args, argv 2544 2545 2546def find_arg_in_test(test_info, get_arg_to_check, arg_string, is_global): 2547 result = get_arg_to_check(test_info.args) 2548 if not result and is_global: 2549 # See if this has been specified via UTC_ARGS. This is a "global" option 2550 # that affects the entire generation of test checks. If it exists anywhere 2551 # in the test, apply it to everything. 2552 saw_line = False 2553 for line_info in test_info.ro_iterlines(): 2554 line = line_info.line 2555 if not line.startswith(";") and line.strip() != "": 2556 saw_line = True 2557 result = get_arg_to_check(line_info.args) 2558 if result: 2559 if warn and saw_line: 2560 # We saw the option after already reading some test input lines. 2561 # Warn about it. 2562 print( 2563 "WARNING: Found {} in line following test start: ".format( 2564 arg_string 2565 ) 2566 + line, 2567 file=sys.stderr, 2568 ) 2569 print( 2570 "WARNING: Consider moving {} to top of file".format(arg_string), 2571 file=sys.stderr, 2572 ) 2573 break 2574 return result 2575 2576 2577def dump_input_lines(output_lines, test_info, prefix_set, comment_string): 2578 for input_line_info in test_info.iterlines(output_lines): 2579 line = input_line_info.line 2580 args = input_line_info.args 2581 if line.strip() == comment_string: 2582 continue 2583 if line.strip() == comment_string + SEPARATOR: 2584 continue 2585 if line.lstrip().startswith(comment_string): 2586 m = CHECK_RE.match(line) 2587 if m and m.group(1) in prefix_set: 2588 continue 2589 output_lines.append(line.rstrip("\n")) 2590 2591 2592def add_checks_at_end( 2593 output_lines, prefix_list, func_order, comment_string, check_generator 2594): 2595 added = set() 2596 generated_prefixes = set() 2597 for prefix in prefix_list: 2598 prefixes = prefix[0] 2599 tool_args = prefix[1] 2600 for prefix in prefixes: 2601 for func in func_order[prefix]: 2602 # The func order can contain the same functions multiple times. 2603 # If we see one again we are done. 2604 if (func, prefix) in added: 2605 continue 2606 if added: 2607 output_lines.append(comment_string) 2608 2609 # The add_*_checks routines expect a run list whose items are 2610 # tuples that have a list of prefixes as their first element and 2611 # tool command args string as their second element. They output 2612 # checks for each prefix in the list of prefixes. By doing so, it 2613 # implicitly assumes that for each function every run line will 2614 # generate something for that function. That is not the case for 2615 # generated functions as some run lines might not generate them 2616 # (e.g. -fopenmp vs. no -fopenmp). 2617 # 2618 # Therefore, pass just the prefix we're interested in. This has 2619 # the effect of generating all of the checks for functions of a 2620 # single prefix before moving on to the next prefix. So checks 2621 # are ordered by prefix instead of by function as in "normal" 2622 # mode. 2623 for generated_prefix in check_generator( 2624 output_lines, [([prefix], tool_args)], func 2625 ): 2626 added.add((func, generated_prefix)) 2627 generated_prefixes.add(generated_prefix) 2628 return generated_prefixes 2629