1import argparse 2import bisect 3import collections 4import copy 5import glob 6import os 7import re 8import subprocess 9import sys 10import shlex 11 12from typing import List, Mapping, Set 13 14##### Common utilities for update_*test_checks.py 15 16 17_verbose = False 18_prefix_filecheck_ir_name = "" 19 20""" 21Version changelog: 22 231: Initial version, used by tests that don't specify --version explicitly. 242: --function-signature is now enabled by default and also checks return 25 type/attributes. 263: Opening parenthesis of function args is kept on the first LABEL line 27 in case arguments are split to a separate SAME line. 284: --check-globals now has a third option ('smart'). The others are now called 29 'none' and 'all'. 'smart' is the default. 305: Basic block labels are matched by FileCheck expressions 31""" 32DEFAULT_VERSION = 5 33 34 35SUPPORTED_ANALYSES = { 36 "Branch Probability Analysis", 37 "Cost Model Analysis", 38 "Dependence Analysis", 39 "Loop Access Analysis", 40 "Scalar Evolution Analysis", 41} 42 43 44class Regex(object): 45 """Wrap a compiled regular expression object to allow deep copy of a regexp. 46 This is required for the deep copy done in do_scrub. 47 48 """ 49 50 def __init__(self, regex): 51 self.regex = regex 52 53 def __deepcopy__(self, memo): 54 result = copy.copy(self) 55 result.regex = self.regex 56 return result 57 58 def search(self, line): 59 return self.regex.search(line) 60 61 def sub(self, repl, line): 62 return self.regex.sub(repl, line) 63 64 def pattern(self): 65 return self.regex.pattern 66 67 def flags(self): 68 return self.regex.flags 69 70 71class Filter(Regex): 72 """Augment a Regex object with a flag indicating whether a match should be 73 added (!is_filter_out) or removed (is_filter_out) from the generated checks. 74 75 """ 76 77 def __init__(self, regex, is_filter_out): 78 super(Filter, self).__init__(regex) 79 self.is_filter_out = is_filter_out 80 81 def __deepcopy__(self, memo): 82 result = copy.deepcopy(super(Filter, self), memo) 83 result.is_filter_out = copy.deepcopy(self.is_filter_out, memo) 84 return result 85 86 87def parse_commandline_args(parser): 88 class RegexAction(argparse.Action): 89 """Add a regular expression option value to a list of regular expressions. 90 This compiles the expression, wraps it in a Regex and adds it to the option 91 value list.""" 92 93 def __init__(self, option_strings, dest, nargs=None, **kwargs): 94 if nargs is not None: 95 raise ValueError("nargs not allowed") 96 super(RegexAction, self).__init__(option_strings, dest, **kwargs) 97 98 def do_call(self, namespace, values, flags): 99 value_list = getattr(namespace, self.dest) 100 if value_list is None: 101 value_list = [] 102 103 try: 104 value_list.append(Regex(re.compile(values, flags))) 105 except re.error as error: 106 raise ValueError( 107 "{}: Invalid regular expression '{}' ({})".format( 108 option_string, error.pattern, error.msg 109 ) 110 ) 111 112 setattr(namespace, self.dest, value_list) 113 114 def __call__(self, parser, namespace, values, option_string=None): 115 self.do_call(namespace, values, 0) 116 117 class FilterAction(RegexAction): 118 """Add a filter to a list of filter option values.""" 119 120 def __init__(self, option_strings, dest, nargs=None, **kwargs): 121 super(FilterAction, self).__init__(option_strings, dest, nargs, **kwargs) 122 123 def __call__(self, parser, namespace, values, option_string=None): 124 super(FilterAction, self).__call__(parser, namespace, values, option_string) 125 126 value_list = getattr(namespace, self.dest) 127 128 is_filter_out = option_string == "--filter-out" 129 130 value_list[-1] = Filter(value_list[-1].regex, is_filter_out) 131 132 setattr(namespace, self.dest, value_list) 133 134 filter_group = parser.add_argument_group( 135 "filtering", 136 """Filters are applied to each output line according to the order given. The 137 first matching filter terminates filter processing for that current line.""", 138 ) 139 140 filter_group.add_argument( 141 "--filter", 142 action=FilterAction, 143 dest="filters", 144 metavar="REGEX", 145 help="Only include lines matching REGEX (may be specified multiple times)", 146 ) 147 filter_group.add_argument( 148 "--filter-out", 149 action=FilterAction, 150 dest="filters", 151 metavar="REGEX", 152 help="Exclude lines matching REGEX", 153 ) 154 155 parser.add_argument( 156 "--include-generated-funcs", 157 action="store_true", 158 help="Output checks for functions not in source", 159 ) 160 parser.add_argument( 161 "-v", "--verbose", action="store_true", help="Show verbose output" 162 ) 163 parser.add_argument( 164 "-u", 165 "--update-only", 166 action="store_true", 167 help="Only update test if it was already autogened", 168 ) 169 parser.add_argument( 170 "--force-update", 171 action="store_true", 172 help="Update test even if it was autogened by a different script", 173 ) 174 parser.add_argument( 175 "--enable", 176 action="store_true", 177 dest="enabled", 178 default=True, 179 help="Activate CHECK line generation from this point forward", 180 ) 181 parser.add_argument( 182 "--disable", 183 action="store_false", 184 dest="enabled", 185 help="Deactivate CHECK line generation from this point forward", 186 ) 187 parser.add_argument( 188 "--replace-value-regex", 189 nargs="+", 190 default=[], 191 help="List of regular expressions to replace matching value names", 192 ) 193 parser.add_argument( 194 "--prefix-filecheck-ir-name", 195 default="", 196 help="Add a prefix to FileCheck IR value names to avoid conflicts with scripted names", 197 ) 198 parser.add_argument( 199 "--global-value-regex", 200 nargs="+", 201 default=[], 202 help="List of regular expressions that a global value declaration must match to generate a check (has no effect if checking globals is not enabled)", 203 ) 204 parser.add_argument( 205 "--global-hex-value-regex", 206 nargs="+", 207 default=[], 208 help="List of regular expressions such that, for matching global value declarations, literal integer values should be encoded in hex in the associated FileCheck directives", 209 ) 210 # FIXME: in 3.9, we can use argparse.BooleanOptionalAction. At that point, 211 # we need to rename the flag to just -generate-body-for-unused-prefixes. 212 parser.add_argument( 213 "--no-generate-body-for-unused-prefixes", 214 action="store_false", 215 dest="gen_unused_prefix_body", 216 default=True, 217 help="Generate a function body that always matches for unused prefixes. This is useful when unused prefixes are desired, and it avoids needing to annotate each FileCheck as allowing them.", 218 ) 219 # This is the default when regenerating existing tests. The default when 220 # generating new tests is determined by DEFAULT_VERSION. 221 parser.add_argument( 222 "--version", type=int, default=1, help="The version of output format" 223 ) 224 args = parser.parse_args() 225 # TODO: This should not be handled differently from the other options 226 global _verbose, _global_value_regex, _global_hex_value_regex 227 _verbose = args.verbose 228 _global_value_regex = args.global_value_regex 229 _global_hex_value_regex = args.global_hex_value_regex 230 return args 231 232 233def parse_args(parser, argv): 234 args = parser.parse_args(argv) 235 if args.version >= 2: 236 args.function_signature = True 237 # TODO: This should not be handled differently from the other options 238 global _verbose, _global_value_regex, _global_hex_value_regex 239 _verbose = args.verbose 240 _global_value_regex = args.global_value_regex 241 _global_hex_value_regex = args.global_hex_value_regex 242 if "check_globals" in args and args.check_globals == "default": 243 args.check_globals = "none" if args.version < 4 else "smart" 244 return args 245 246 247class InputLineInfo(object): 248 def __init__(self, line, line_number, args, argv): 249 self.line = line 250 self.line_number = line_number 251 self.args = args 252 self.argv = argv 253 254 255class TestInfo(object): 256 def __init__( 257 self, 258 test, 259 parser, 260 script_name, 261 input_lines, 262 args, 263 argv, 264 comment_prefix, 265 argparse_callback, 266 ): 267 self.parser = parser 268 self.argparse_callback = argparse_callback 269 self.path = test 270 self.args = args 271 if args.prefix_filecheck_ir_name: 272 global _prefix_filecheck_ir_name 273 _prefix_filecheck_ir_name = args.prefix_filecheck_ir_name 274 self.argv = argv 275 self.input_lines = input_lines 276 self.run_lines = find_run_lines(test, self.input_lines) 277 self.comment_prefix = comment_prefix 278 if self.comment_prefix is None: 279 if self.path.endswith(".mir") or self.path.endswith(".txt"): 280 self.comment_prefix = "#" 281 elif self.path.endswith(".s"): 282 self.comment_prefix = "//" 283 else: 284 self.comment_prefix = ";" 285 self.autogenerated_note_prefix = self.comment_prefix + " " + UTC_ADVERT 286 self.test_autogenerated_note = self.autogenerated_note_prefix + script_name 287 self.test_autogenerated_note += get_autogennote_suffix(parser, self.args) 288 self.test_unused_note = ( 289 self.comment_prefix + self.comment_prefix + " " + UNUSED_NOTE 290 ) 291 292 def ro_iterlines(self): 293 for line_num, input_line in enumerate(self.input_lines): 294 args, argv = check_for_command( 295 input_line, self.parser, self.args, self.argv, self.argparse_callback 296 ) 297 yield InputLineInfo(input_line, line_num, args, argv) 298 299 def iterlines(self, output_lines): 300 output_lines.append(self.test_autogenerated_note) 301 for line_info in self.ro_iterlines(): 302 input_line = line_info.line 303 # Discard any previous script advertising. 304 if input_line.startswith(self.autogenerated_note_prefix): 305 continue 306 self.args = line_info.args 307 self.argv = line_info.argv 308 if not self.args.enabled: 309 output_lines.append(input_line) 310 continue 311 yield line_info 312 313 def get_checks_for_unused_prefixes( 314 self, run_list, used_prefixes: List[str] 315 ) -> List[str]: 316 run_list = [element for element in run_list if element[0] is not None] 317 unused_prefixes = set( 318 [prefix for sublist in run_list for prefix in sublist[0]] 319 ).difference(set(used_prefixes)) 320 321 ret = [] 322 if not unused_prefixes: 323 return ret 324 ret.append(self.test_unused_note) 325 for unused in sorted(unused_prefixes): 326 ret.append( 327 "{comment} {prefix}: {match_everything}".format( 328 comment=self.comment_prefix, 329 prefix=unused, 330 match_everything=r"""{{.*}}""", 331 ) 332 ) 333 return ret 334 335 336def itertests( 337 test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None 338): 339 for pattern in test_patterns: 340 # On Windows we must expand the patterns ourselves. 341 tests_list = glob.glob(pattern) 342 if not tests_list: 343 warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,)) 344 continue 345 for test in tests_list: 346 with open(test) as f: 347 input_lines = [l.rstrip() for l in f] 348 first_line = input_lines[0] if input_lines else "" 349 if UTC_AVOID in first_line: 350 warn("Skipping test that must not be autogenerated: " + test) 351 continue 352 is_regenerate = UTC_ADVERT in first_line 353 354 # If we're generating a new test, set the default version to the latest. 355 argv = sys.argv[:] 356 if not is_regenerate: 357 argv.insert(1, "--version=" + str(DEFAULT_VERSION)) 358 359 args = parse_args(parser, argv[1:]) 360 if argparse_callback is not None: 361 argparse_callback(args) 362 if is_regenerate: 363 if script_name not in first_line and not args.force_update: 364 warn( 365 "Skipping test which wasn't autogenerated by " + script_name, 366 test, 367 ) 368 continue 369 args, argv = check_for_command( 370 first_line, parser, args, argv, argparse_callback 371 ) 372 elif args.update_only: 373 assert UTC_ADVERT not in first_line 374 warn("Skipping test which isn't autogenerated: " + test) 375 continue 376 final_input_lines = [] 377 for l in input_lines: 378 if UNUSED_NOTE in l: 379 break 380 final_input_lines.append(l) 381 yield TestInfo( 382 test, 383 parser, 384 script_name, 385 final_input_lines, 386 args, 387 argv, 388 comment_prefix, 389 argparse_callback, 390 ) 391 392 393def should_add_line_to_output( 394 input_line, 395 prefix_set, 396 *, 397 skip_global_checks=False, 398 skip_same_checks=False, 399 comment_marker=";", 400): 401 # Skip any blank comment lines in the IR. 402 if not skip_global_checks and input_line.strip() == comment_marker: 403 return False 404 # Skip a special double comment line we use as a separator. 405 if input_line.strip() == comment_marker + SEPARATOR: 406 return False 407 # Skip any blank lines in the IR. 408 # if input_line.strip() == '': 409 # return False 410 # And skip any CHECK lines. We're building our own. 411 m = CHECK_RE.match(input_line) 412 if m and m.group(1) in prefix_set: 413 if skip_same_checks and CHECK_SAME_RE.match(input_line): 414 # The previous CHECK line was removed, so don't leave this dangling 415 return False 416 if skip_global_checks: 417 # Skip checks only if they are of global value definitions 418 global_ir_value_re = re.compile(r"(\[\[|@)", flags=(re.M)) 419 is_global = global_ir_value_re.search(input_line) 420 return not is_global 421 return False 422 423 return True 424 425 426def collect_original_check_lines(ti: TestInfo, prefix_set: set): 427 """ 428 Collect pre-existing check lines into a dictionary `result` which is 429 returned. 430 431 result[func_name][prefix] is filled with a list of right-hand-sides of check 432 lines. 433 """ 434 result = collections.defaultdict(lambda: {}) 435 436 current_prefix = None 437 current_function = None 438 for input_line_info in ti.ro_iterlines(): 439 input_line = input_line_info.line 440 if input_line.lstrip().startswith(";"): 441 m = CHECK_RE.match(input_line) 442 if m is not None: 443 prefix = m.group(1) 444 check_kind = m.group(2) 445 line = input_line[m.end() :].strip() 446 447 if prefix != current_prefix: 448 current_function = None 449 current_prefix = None 450 451 if check_kind not in ["LABEL", "SAME"]: 452 if current_function is not None: 453 current_function.append(line) 454 continue 455 456 if check_kind == "SAME": 457 continue 458 459 if check_kind == "LABEL": 460 m = IR_FUNCTION_LABEL_RE.match(line) 461 if m is not None: 462 func_name = m.group(1) 463 if ( 464 ti.args.function is not None 465 and func_name != ti.args.function 466 ): 467 # When filtering on a specific function, skip all others. 468 continue 469 470 current_prefix = prefix 471 current_function = result[func_name][prefix] = [] 472 continue 473 474 current_function = None 475 476 return result 477 478 479# Perform lit-like substitutions 480def getSubstitutions(sourcepath): 481 sourcedir = os.path.dirname(sourcepath) 482 return [ 483 ("%s", sourcepath), 484 ("%S", sourcedir), 485 ("%p", sourcedir), 486 ("%{pathsep}", os.pathsep), 487 ] 488 489 490def applySubstitutions(s, substitutions): 491 for a, b in substitutions: 492 s = s.replace(a, b) 493 return s 494 495 496# Invoke the tool that is being tested. 497def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False): 498 with open(ir) as ir_file: 499 substitutions = getSubstitutions(ir) 500 501 # TODO Remove the str form which is used by update_test_checks.py and 502 # update_llc_test_checks.py 503 # The safer list form is used by update_cc_test_checks.py 504 if preprocess_cmd: 505 # Allow pre-processing the IR file (e.g. using sed): 506 assert isinstance( 507 preprocess_cmd, str 508 ) # TODO: use a list instead of using shell 509 preprocess_cmd = applySubstitutions(preprocess_cmd, substitutions).strip() 510 if verbose: 511 print( 512 "Pre-processing input file: ", 513 ir, 514 " with command '", 515 preprocess_cmd, 516 "'", 517 sep="", 518 file=sys.stderr, 519 ) 520 pp = subprocess.Popen( 521 preprocess_cmd, 522 shell=True, 523 stdin=subprocess.DEVNULL, 524 stdout=subprocess.PIPE, 525 ) 526 ir_file = pp.stdout 527 528 if isinstance(cmd_args, list): 529 args = [applySubstitutions(a, substitutions) for a in cmd_args] 530 stdout = subprocess.check_output([exe] + args, stdin=ir_file) 531 else: 532 stdout = subprocess.check_output( 533 exe + " " + applySubstitutions(cmd_args, substitutions), 534 shell=True, 535 stdin=ir_file, 536 ) 537 if sys.version_info[0] > 2: 538 # FYI, if you crashed here with a decode error, your run line probably 539 # results in bitcode or other binary format being written to the pipe. 540 # For an opt test, you probably want to add -S or -disable-output. 541 stdout = stdout.decode() 542 # Fix line endings to unix CR style. 543 return stdout.replace("\r\n", "\n") 544 545 546##### LLVM IR parser 547RUN_LINE_RE = re.compile(r"^\s*(?://|[;#])\s*RUN:\s*(.*)$") 548CHECK_PREFIX_RE = re.compile(r"--?check-prefix(?:es)?[= ](\S+)") 549PREFIX_RE = re.compile("^[a-zA-Z0-9_-]+$") 550CHECK_RE = re.compile( 551 r"^\s*(?://|[;#])\s*([^:]+?)(?:-(NEXT|NOT|DAG|LABEL|SAME|EMPTY))?:" 552) 553CHECK_SAME_RE = re.compile(r"^\s*(?://|[;#])\s*([^:]+?)(?:-SAME)?:") 554 555UTC_ARGS_KEY = "UTC_ARGS:" 556UTC_ARGS_CMD = re.compile(r".*" + UTC_ARGS_KEY + r"\s*(?P<cmd>.*)\s*$") 557UTC_ADVERT = "NOTE: Assertions have been autogenerated by " 558UTC_AVOID = "NOTE: Do not autogenerate" 559UNUSED_NOTE = "NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:" 560 561DATA_LAYOUT_RE = re.compile( 562 r"target\s+datalayout\s+=\s+\"(?P<layout>.+)\"$", flags=(re.M | re.S) 563) 564 565OPT_FUNCTION_RE = re.compile( 566 r"^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s():,]+?))?\s*define\s+(?P<funcdef_attrs_and_ret>[^@]*)@(?P<func>[\w.$-]+?)\s*" 567 r"(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P<body>.*?)^\}$", 568 flags=(re.M | re.S), 569) 570 571ANALYZE_FUNCTION_RE = re.compile( 572 r"^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.$-]+?)\':" 573 r"\s*\n(?P<body>.*)$", 574 flags=(re.X | re.S), 575) 576 577LOOP_PASS_DEBUG_RE = re.compile( 578 r"^\s*\'(?P<func>[\w.$-]+?)\'[^\n]*" r"\s*\n(?P<body>.*)$", flags=(re.X | re.S) 579) 580 581IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(') 582IR_FUNCTION_LABEL_RE = re.compile( 583 r'^\s*(?:define\s+(?:internal\s+)?[^@]*)?@"?([\w.$-]+)"?\s*\(' 584) 585TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$') 586TRIPLE_ARG_RE = re.compile(r"-m?triple[= ]([^ ]+)") 587MARCH_ARG_RE = re.compile(r"-march[= ]([^ ]+)") 588DEBUG_ONLY_ARG_RE = re.compile(r"-debug-only[= ]([^ ]+)") 589 590SCRUB_LEADING_WHITESPACE_RE = re.compile(r"^(\s+)") 591SCRUB_WHITESPACE_RE = re.compile(r"(?!^(| \w))[ \t]+", flags=re.M) 592SCRUB_PRESERVE_LEADING_WHITESPACE_RE = re.compile(r"((?!^)[ \t]*(\S))[ \t]+") 593SCRUB_TRAILING_WHITESPACE_RE = re.compile(r"[ \t]+$", flags=re.M) 594SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE 595SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile( 596 r"([ \t]|(#[0-9]+))+$", flags=re.M 597) 598SCRUB_KILL_COMMENT_RE = re.compile(r"^ *#+ +kill:.*\n") 599SCRUB_LOOP_COMMENT_RE = re.compile( 600 r"# =>This Inner Loop Header:.*|# in Loop:.*", flags=re.M 601) 602SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r"(?<=\S)+[ \t]*#$", flags=re.M) 603 604SEPARATOR = "." 605 606 607def error(msg, test_file=None): 608 if test_file: 609 msg = "{}: {}".format(msg, test_file) 610 print("ERROR: {}".format(msg), file=sys.stderr) 611 612 613def warn(msg, test_file=None): 614 if test_file: 615 msg = "{}: {}".format(msg, test_file) 616 print("WARNING: {}".format(msg), file=sys.stderr) 617 618 619def debug(*args, **kwargs): 620 # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs): 621 if "file" not in kwargs: 622 kwargs["file"] = sys.stderr 623 if _verbose: 624 print(*args, **kwargs) 625 626 627def find_run_lines(test, lines): 628 debug("Scanning for RUN lines in test file:", test) 629 raw_lines = [m.group(1) for m in [RUN_LINE_RE.match(l) for l in lines] if m] 630 run_lines = [raw_lines[0]] if len(raw_lines) > 0 else [] 631 for l in raw_lines[1:]: 632 if run_lines[-1].endswith("\\"): 633 run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l 634 else: 635 run_lines.append(l) 636 debug("Found {} RUN lines in {}:".format(len(run_lines), test)) 637 for l in run_lines: 638 debug(" RUN: {}".format(l)) 639 return run_lines 640 641 642def get_triple_from_march(march): 643 triples = { 644 "amdgcn": "amdgcn", 645 "r600": "r600", 646 "mips": "mips", 647 "nvptx64": "nvptx64", 648 "sparc": "sparc", 649 "hexagon": "hexagon", 650 "ve": "ve", 651 } 652 for prefix, triple in triples.items(): 653 if march.startswith(prefix): 654 return triple 655 print("Cannot find a triple. Assume 'x86'", file=sys.stderr) 656 return "x86" 657 658 659def get_globals_name_prefix(raw_tool_output): 660 m = DATA_LAYOUT_RE.search(raw_tool_output) 661 if not m: 662 return None 663 data_layout = m.group("layout") 664 idx = data_layout.find("m:") 665 if idx < 0: 666 return None 667 ch = data_layout[idx + 2] 668 return "_" if ch == "o" or ch == "x" else None 669 670 671def apply_filters(line, filters): 672 has_filter = False 673 for f in filters: 674 if not f.is_filter_out: 675 has_filter = True 676 if f.search(line): 677 return False if f.is_filter_out else True 678 # If we only used filter-out, keep the line, otherwise discard it since no 679 # filter matched. 680 return False if has_filter else True 681 682 683def do_filter(body, filters): 684 return ( 685 body 686 if not filters 687 else "\n".join( 688 filter(lambda line: apply_filters(line, filters), body.splitlines()) 689 ) 690 ) 691 692 693def scrub_body(body): 694 # Scrub runs of whitespace out of the assembly, but leave the leading 695 # whitespace in place. 696 body = SCRUB_PRESERVE_LEADING_WHITESPACE_RE.sub(lambda m: m.group(2) + " ", body) 697 698 # Expand the tabs used for indentation. 699 body = str.expandtabs(body, 2) 700 # Strip trailing whitespace. 701 body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r"", body) 702 return body 703 704 705def do_scrub(body, scrubber, scrubber_args, extra): 706 if scrubber_args: 707 local_args = copy.deepcopy(scrubber_args) 708 local_args[0].extra_scrub = extra 709 return scrubber(body, *local_args) 710 return scrubber(body, *scrubber_args) 711 712 713# Build up a dictionary of all the function bodies. 714class function_body(object): 715 def __init__( 716 self, 717 string, 718 extra, 719 funcdef_attrs_and_ret, 720 args_and_sig, 721 attrs, 722 func_name_separator, 723 ginfo, 724 ): 725 self.scrub = string 726 self.extrascrub = extra 727 self.funcdef_attrs_and_ret = funcdef_attrs_and_ret 728 self.args_and_sig = args_and_sig 729 self.attrs = attrs 730 self.func_name_separator = func_name_separator 731 self._ginfo = ginfo 732 733 def is_same_except_arg_names( 734 self, extrascrub, funcdef_attrs_and_ret, args_and_sig, attrs 735 ): 736 arg_names = set() 737 738 def drop_arg_names(match): 739 nameless_value = self._ginfo.get_nameless_value_from_match(match) 740 if nameless_value.check_key == "%": 741 arg_names.add(self._ginfo.get_name_from_match(match)) 742 substitute = "" 743 else: 744 substitute = match.group(2) 745 return match.group(1) + substitute + match.group(match.lastindex) 746 747 def repl_arg_names(match): 748 nameless_value = self._ginfo.get_nameless_value_from_match(match) 749 if ( 750 nameless_value.check_key == "%" 751 and self._ginfo.get_name_from_match(match) in arg_names 752 ): 753 return match.group(1) + match.group(match.lastindex) 754 return match.group(1) + match.group(2) + match.group(match.lastindex) 755 756 if self.funcdef_attrs_and_ret != funcdef_attrs_and_ret: 757 return False 758 if self.attrs != attrs: 759 return False 760 761 regexp = self._ginfo.get_regexp() 762 ans0 = regexp.sub(drop_arg_names, self.args_and_sig) 763 ans1 = regexp.sub(drop_arg_names, args_and_sig) 764 if ans0 != ans1: 765 return False 766 if self._ginfo.is_asm(): 767 # Check without replacements, the replacements are not applied to the 768 # body for backend checks. 769 return self.extrascrub == extrascrub 770 771 es0 = regexp.sub(repl_arg_names, self.extrascrub) 772 es1 = regexp.sub(repl_arg_names, extrascrub) 773 es0 = SCRUB_IR_COMMENT_RE.sub(r"", es0) 774 es1 = SCRUB_IR_COMMENT_RE.sub(r"", es1) 775 return es0 == es1 776 777 def __str__(self): 778 return self.scrub 779 780 781class FunctionTestBuilder: 782 def __init__(self, run_list, flags, scrubber_args, path, ginfo): 783 self._verbose = flags.verbose 784 self._record_args = flags.function_signature 785 self._check_attributes = flags.check_attributes 786 # Strip double-quotes if input was read by UTC_ARGS 787 self._filters = ( 788 list( 789 map( 790 lambda f: Filter( 791 re.compile(f.pattern().strip('"'), f.flags()), f.is_filter_out 792 ), 793 flags.filters, 794 ) 795 ) 796 if flags.filters 797 else [] 798 ) 799 self._scrubber_args = scrubber_args 800 self._path = path 801 self._ginfo = ginfo 802 # Strip double-quotes if input was read by UTC_ARGS 803 self._replace_value_regex = list( 804 map(lambda x: x.strip('"'), flags.replace_value_regex) 805 ) 806 self._func_dict = {} 807 self._func_order = {} 808 self._global_var_dict = {} 809 self._processed_prefixes = set() 810 for tuple in run_list: 811 for prefix in tuple[0]: 812 self._func_dict.update({prefix: dict()}) 813 self._func_order.update({prefix: []}) 814 self._global_var_dict.update({prefix: dict()}) 815 816 def finish_and_get_func_dict(self): 817 for prefix in self.get_failed_prefixes(): 818 warn( 819 "Prefix %s had conflicting output from different RUN lines for all functions in test %s" 820 % ( 821 prefix, 822 self._path, 823 ) 824 ) 825 return self._func_dict 826 827 def func_order(self): 828 return self._func_order 829 830 def global_var_dict(self): 831 return self._global_var_dict 832 833 def is_filtered(self): 834 return bool(self._filters) 835 836 def process_run_line(self, function_re, scrubber, raw_tool_output, prefixes): 837 build_global_values_dictionary( 838 self._global_var_dict, raw_tool_output, prefixes, self._ginfo 839 ) 840 for m in function_re.finditer(raw_tool_output): 841 if not m: 842 continue 843 func = m.group("func") 844 body = m.group("body") 845 # func_name_separator is the string that is placed right after function name at the 846 # beginning of assembly function definition. In most assemblies, that is just a 847 # colon: `foo:`. But, for example, in nvptx it is a brace: `foo(`. If is_backend is 848 # False, just assume that separator is an empty string. 849 if self._ginfo.is_asm(): 850 # Use ':' as default separator. 851 func_name_separator = ( 852 m.group("func_name_separator") 853 if "func_name_separator" in m.groupdict() 854 else ":" 855 ) 856 else: 857 func_name_separator = "" 858 attrs = m.group("attrs") if self._check_attributes else "" 859 funcdef_attrs_and_ret = ( 860 m.group("funcdef_attrs_and_ret") if self._record_args else "" 861 ) 862 # Determine if we print arguments, the opening brace, or nothing after the 863 # function name 864 if self._record_args and "args_and_sig" in m.groupdict(): 865 args_and_sig = scrub_body(m.group("args_and_sig").strip()) 866 elif "args_and_sig" in m.groupdict(): 867 args_and_sig = "(" 868 else: 869 args_and_sig = "" 870 filtered_body = do_filter(body, self._filters) 871 scrubbed_body = do_scrub( 872 filtered_body, scrubber, self._scrubber_args, extra=False 873 ) 874 scrubbed_extra = do_scrub( 875 filtered_body, scrubber, self._scrubber_args, extra=True 876 ) 877 if "analysis" in m.groupdict(): 878 analysis = m.group("analysis") 879 if analysis not in SUPPORTED_ANALYSES: 880 warn("Unsupported analysis mode: %r!" % (analysis,)) 881 if func.startswith("stress"): 882 # We only use the last line of the function body for stress tests. 883 scrubbed_body = "\n".join(scrubbed_body.splitlines()[-1:]) 884 if self._verbose: 885 print("Processing function: " + func, file=sys.stderr) 886 for l in scrubbed_body.splitlines(): 887 print(" " + l, file=sys.stderr) 888 for prefix in prefixes: 889 # Replace function names matching the regex. 890 for regex in self._replace_value_regex: 891 # Pattern that matches capture groups in the regex in leftmost order. 892 group_regex = re.compile(r"\(.*?\)") 893 # Replace function name with regex. 894 match = re.match(regex, func) 895 if match: 896 func_repl = regex 897 # Replace any capture groups with their matched strings. 898 for g in match.groups(): 899 func_repl = group_regex.sub( 900 re.escape(g), func_repl, count=1 901 ) 902 func = re.sub(func_repl, "{{" + func_repl + "}}", func) 903 904 # Replace all calls to regex matching functions. 905 matches = re.finditer(regex, scrubbed_body) 906 for match in matches: 907 func_repl = regex 908 # Replace any capture groups with their matched strings. 909 for g in match.groups(): 910 func_repl = group_regex.sub( 911 re.escape(g), func_repl, count=1 912 ) 913 # Substitute function call names that match the regex with the same 914 # capture groups set. 915 scrubbed_body = re.sub( 916 func_repl, "{{" + func_repl + "}}", scrubbed_body 917 ) 918 919 if func in self._func_dict[prefix]: 920 if self._func_dict[prefix][func] is not None and ( 921 str(self._func_dict[prefix][func]) != scrubbed_body 922 or self._func_dict[prefix][func].args_and_sig != args_and_sig 923 or self._func_dict[prefix][func].attrs != attrs 924 or self._func_dict[prefix][func].funcdef_attrs_and_ret 925 != funcdef_attrs_and_ret 926 ): 927 if self._func_dict[prefix][func].is_same_except_arg_names( 928 scrubbed_extra, 929 funcdef_attrs_and_ret, 930 args_and_sig, 931 attrs, 932 ): 933 self._func_dict[prefix][func].scrub = scrubbed_extra 934 self._func_dict[prefix][func].args_and_sig = args_and_sig 935 else: 936 # This means a previous RUN line produced a body for this function 937 # that is different from the one produced by this current RUN line, 938 # so the body can't be common across RUN lines. We use None to 939 # indicate that. 940 self._func_dict[prefix][func] = None 941 else: 942 if prefix not in self._processed_prefixes: 943 self._func_dict[prefix][func] = function_body( 944 scrubbed_body, 945 scrubbed_extra, 946 funcdef_attrs_and_ret, 947 args_and_sig, 948 attrs, 949 func_name_separator, 950 self._ginfo, 951 ) 952 self._func_order[prefix].append(func) 953 else: 954 # An earlier RUN line used this check prefixes but didn't produce 955 # a body for this function. This happens in Clang tests that use 956 # preprocesser directives to exclude individual functions from some 957 # RUN lines. 958 self._func_dict[prefix][func] = None 959 960 def processed_prefixes(self, prefixes): 961 """ 962 Mark a set of prefixes as having had at least one applicable RUN line fully 963 processed. This is used to filter out function bodies that don't have 964 outputs for all RUN lines. 965 """ 966 self._processed_prefixes.update(prefixes) 967 968 def get_failed_prefixes(self): 969 # This returns the list of those prefixes that failed to match any function, 970 # because there were conflicting bodies produced by different RUN lines, in 971 # all instances of the prefix. 972 for prefix in self._func_dict: 973 if self._func_dict[prefix] and ( 974 not [ 975 fct 976 for fct in self._func_dict[prefix] 977 if self._func_dict[prefix][fct] is not None 978 ] 979 ): 980 yield prefix 981 982 983##### Generator of LLVM IR CHECK lines 984 985SCRUB_IR_COMMENT_RE = re.compile(r"\s*;.*") 986 987# TODO: We should also derive check lines for global, debug, loop declarations, etc.. 988 989 990class NamelessValue: 991 """ 992 A NamelessValue object represents a type of value in the IR whose "name" we 993 generalize in the generated check lines; where the "name" could be an actual 994 name (as in e.g. `@some_global` or `%x`) or just a number (as in e.g. `%12` 995 or `!4`). 996 """ 997 998 def __init__( 999 self, 1000 check_prefix, 1001 check_key, 1002 ir_prefix, 1003 ir_regexp, 1004 global_ir_rhs_regexp, 1005 *, 1006 is_before_functions=False, 1007 is_number=False, 1008 replace_number_with_counter=False, 1009 match_literally=False, 1010 interlaced_with_previous=False, 1011 ir_suffix=r"", 1012 ): 1013 self.check_prefix = check_prefix 1014 self.check_key = check_key 1015 self.ir_prefix = ir_prefix 1016 self.ir_regexp = ir_regexp 1017 self.ir_suffix = ir_suffix 1018 self.global_ir_rhs_regexp = global_ir_rhs_regexp 1019 self.is_before_functions = is_before_functions 1020 self.is_number = is_number 1021 # Some variable numbers (e.g. MCINST1234) will change based on unrelated 1022 # modifications to LLVM, replace those with an incrementing counter. 1023 self.replace_number_with_counter = replace_number_with_counter 1024 self.match_literally = match_literally 1025 self.interlaced_with_previous = interlaced_with_previous 1026 self.variable_mapping = {} 1027 1028 # Return true if this kind of IR value is defined "locally" to functions, 1029 # which we assume is only the case precisely for LLVM IR local values. 1030 def is_local_def_ir_value(self): 1031 return self.check_key == "%" 1032 1033 # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals 1034 def get_ir_regex(self): 1035 # for backwards compatibility we check locals with '.*' 1036 if self.is_local_def_ir_value(): 1037 return ".*" 1038 return self.ir_regexp 1039 1040 # Create a FileCheck variable name based on an IR name. 1041 def get_value_name(self, var: str, check_prefix: str): 1042 var = var.replace("!", "") 1043 if self.replace_number_with_counter: 1044 assert var 1045 replacement = self.variable_mapping.get(var, None) 1046 if replacement is None: 1047 # Replace variable with an incrementing counter 1048 replacement = str(len(self.variable_mapping) + 1) 1049 self.variable_mapping[var] = replacement 1050 var = replacement 1051 # This is a nameless value, prepend check_prefix. 1052 if var.isdigit(): 1053 var = check_prefix + var 1054 else: 1055 # This is a named value that clashes with the check_prefix, prepend with 1056 # _prefix_filecheck_ir_name, if it has been defined. 1057 if ( 1058 may_clash_with_default_check_prefix_name(check_prefix, var) 1059 and _prefix_filecheck_ir_name 1060 ): 1061 var = _prefix_filecheck_ir_name + var 1062 var = var.replace(".", "_") 1063 var = var.replace("-", "_") 1064 return var.upper() 1065 1066 def get_affixes_from_match(self, match): 1067 prefix = re.match(self.ir_prefix, match.group(2)).group(0) 1068 suffix = re.search(self.ir_suffix + "$", match.group(2)).group(0) 1069 return prefix, suffix 1070 1071 1072class GeneralizerInfo: 1073 """ 1074 A GeneralizerInfo object holds information about how check lines should be generalized 1075 (e.g., variable names replaced by FileCheck meta variables) as well as per-test-file 1076 state (e.g. information about IR global variables). 1077 """ 1078 1079 MODE_IR = 0 1080 MODE_ASM = 1 1081 MODE_ANALYZE = 2 1082 1083 def __init__( 1084 self, 1085 version, 1086 mode, 1087 nameless_values: List[NamelessValue], 1088 regexp_prefix, 1089 regexp_suffix, 1090 ): 1091 self._version = version 1092 self._mode = mode 1093 self._nameless_values = nameless_values 1094 1095 self._regexp_prefix = regexp_prefix 1096 self._regexp_suffix = regexp_suffix 1097 1098 self._regexp, _ = self._build_regexp(False, False) 1099 ( 1100 self._unstable_globals_regexp, 1101 self._unstable_globals_values, 1102 ) = self._build_regexp(True, True) 1103 1104 def _build_regexp(self, globals_only, unstable_only): 1105 matches = [] 1106 values = [] 1107 for nameless_value in self._nameless_values: 1108 is_global = nameless_value.global_ir_rhs_regexp is not None 1109 if globals_only and not is_global: 1110 continue 1111 if unstable_only and nameless_value.match_literally: 1112 continue 1113 1114 match = f"(?:{nameless_value.ir_prefix}({nameless_value.ir_regexp}){nameless_value.ir_suffix})" 1115 if self.is_ir() and not globals_only and is_global: 1116 match = "^" + match 1117 matches.append(match) 1118 values.append(nameless_value) 1119 1120 regexp_string = r"|".join(matches) 1121 1122 return ( 1123 re.compile( 1124 self._regexp_prefix + r"(" + regexp_string + r")" + self._regexp_suffix 1125 ), 1126 values, 1127 ) 1128 1129 def get_version(self): 1130 return self._version 1131 1132 def is_ir(self): 1133 return self._mode == GeneralizerInfo.MODE_IR 1134 1135 def is_asm(self): 1136 return self._mode == GeneralizerInfo.MODE_ASM 1137 1138 def is_analyze(self): 1139 return self._mode == GeneralizerInfo.MODE_ANALYZE 1140 1141 def get_nameless_values(self): 1142 return self._nameless_values 1143 1144 def get_regexp(self): 1145 return self._regexp 1146 1147 def get_unstable_globals_regexp(self): 1148 return self._unstable_globals_regexp 1149 1150 # The entire match is group 0, the prefix has one group (=1), the entire 1151 # IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start. 1152 FIRST_NAMELESS_GROUP_IN_MATCH = 3 1153 1154 def get_match_info(self, match): 1155 """ 1156 Returns (name, nameless_value) for the given match object 1157 """ 1158 if match.re == self._regexp: 1159 values = self._nameless_values 1160 else: 1161 match.re == self._unstable_globals_regexp 1162 values = self._unstable_globals_values 1163 for i in range(len(values)): 1164 g = match.group(i + GeneralizerInfo.FIRST_NAMELESS_GROUP_IN_MATCH) 1165 if g is not None: 1166 return g, values[i] 1167 error("Unable to identify the kind of IR value from the match!") 1168 return None, None 1169 1170 # See get_idx_from_match 1171 def get_name_from_match(self, match): 1172 return self.get_match_info(match)[0] 1173 1174 def get_nameless_value_from_match(self, match) -> NamelessValue: 1175 return self.get_match_info(match)[1] 1176 1177 1178def make_ir_generalizer(version): 1179 values = [] 1180 1181 if version >= 5: 1182 values += [ 1183 NamelessValue(r"BB", "%", r"label %", r"[\w$.-]+?", None), 1184 NamelessValue(r"BB", "%", r"^", r"[\w$.-]+?", None, ir_suffix=r":"), 1185 ] 1186 1187 values += [ 1188 # check_prefix check_key ir_prefix ir_regexp global_ir_rhs_regexp 1189 NamelessValue(r"TMP", "%", r"%", r"[\w$.-]+?", None), 1190 NamelessValue(r"ATTR", "#", r"#", r"[0-9]+", None), 1191 NamelessValue(r"ATTR", "#", r"attributes #", r"[0-9]+", r"{[^}]*}"), 1192 NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", None), 1193 NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", r".+", is_before_functions=True), 1194 NamelessValue( 1195 r"GLOBNAMED", 1196 "@", 1197 r"@", 1198 r"[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*", 1199 r".+", 1200 is_before_functions=True, 1201 match_literally=True, 1202 interlaced_with_previous=True, 1203 ), 1204 NamelessValue(r"DBG", "!", r"!dbg ", r"![0-9]+", None), 1205 NamelessValue(r"DIASSIGNID", "!", r"!DIAssignID ", r"![0-9]+", None), 1206 NamelessValue(r"PROF", "!", r"!prof ", r"![0-9]+", None), 1207 NamelessValue(r"TBAA", "!", r"!tbaa ", r"![0-9]+", None), 1208 NamelessValue(r"TBAA_STRUCT", "!", r"!tbaa.struct ", r"![0-9]+", None), 1209 NamelessValue(r"RNG", "!", r"!range ", r"![0-9]+", None), 1210 NamelessValue(r"LOOP", "!", r"!llvm.loop ", r"![0-9]+", None), 1211 NamelessValue(r"META", "!", r"", r"![0-9]+", r"(?:distinct |)!.*"), 1212 NamelessValue(r"ACC_GRP", "!", r"!llvm.access.group ", r"![0-9]+", None), 1213 NamelessValue(r"META", "!", r"![a-z.]+ ", r"![0-9]+", None), 1214 NamelessValue(r"META", "!", r"[, (]", r"![0-9]+", None), 1215 ] 1216 1217 prefix = r"(\s*)" 1218 suffix = r"([,\s\(\)\}]|\Z)" 1219 1220 # values = [ 1221 # nameless_value 1222 # for nameless_value in IR_NAMELESS_VALUES 1223 # if not (globals_only and nameless_value.global_ir_rhs_regexp is None) and 1224 # not (unstable_ids_only and nameless_value.match_literally) 1225 # ] 1226 1227 return GeneralizerInfo(version, GeneralizerInfo.MODE_IR, values, prefix, suffix) 1228 1229 1230def make_asm_generalizer(version): 1231 values = [ 1232 NamelessValue( 1233 r"MCINST", 1234 "Inst#", 1235 "<MCInst #", 1236 r"\d+", 1237 r".+", 1238 is_number=True, 1239 replace_number_with_counter=True, 1240 ), 1241 NamelessValue( 1242 r"MCREG", 1243 "Reg:", 1244 "<MCOperand Reg:", 1245 r"\d+", 1246 r".+", 1247 is_number=True, 1248 replace_number_with_counter=True, 1249 ), 1250 ] 1251 1252 prefix = r"((?:#|//)\s*)" 1253 suffix = r"([>\s]|\Z)" 1254 1255 return GeneralizerInfo(version, GeneralizerInfo.MODE_ASM, values, prefix, suffix) 1256 1257 1258def make_analyze_generalizer(version): 1259 values = [ 1260 NamelessValue( 1261 r"GRP", 1262 "#", 1263 r"", 1264 r"0x[0-9a-f]+", 1265 None, 1266 replace_number_with_counter=True, 1267 ), 1268 ] 1269 1270 prefix = r"(\s*)" 1271 suffix = r"(\)?:)" 1272 1273 return GeneralizerInfo( 1274 version, GeneralizerInfo.MODE_ANALYZE, values, prefix, suffix 1275 ) 1276 1277 1278# Return true if var clashes with the scripted FileCheck check_prefix. 1279def may_clash_with_default_check_prefix_name(check_prefix, var): 1280 return check_prefix and re.match( 1281 r"^" + check_prefix + r"[0-9]+?$", var, re.IGNORECASE 1282 ) 1283 1284 1285def find_diff_matching(lhs: List[str], rhs: List[str]) -> List[tuple]: 1286 """ 1287 Find a large ordered matching between strings in lhs and rhs. 1288 1289 Think of this as finding the *unchanged* lines in a diff, where the entries 1290 of lhs and rhs are lines of the files being diffed. 1291 1292 Returns a list of matched (lhs_idx, rhs_idx) pairs. 1293 """ 1294 1295 if not lhs or not rhs: 1296 return [] 1297 1298 # Collect matches in reverse order. 1299 matches = [] 1300 1301 # First, collect a set of candidate matching edges. We limit this to a 1302 # constant multiple of the input size to avoid quadratic runtime. 1303 patterns = collections.defaultdict(lambda: ([], [])) 1304 1305 for idx in range(len(lhs)): 1306 patterns[lhs[idx]][0].append(idx) 1307 for idx in range(len(rhs)): 1308 patterns[rhs[idx]][1].append(idx) 1309 1310 multiple_patterns = [] 1311 1312 candidates = [] 1313 for pattern in patterns.values(): 1314 if not pattern[0] or not pattern[1]: 1315 continue 1316 1317 if len(pattern[0]) == len(pattern[1]) == 1: 1318 candidates.append((pattern[0][0], pattern[1][0])) 1319 else: 1320 multiple_patterns.append(pattern) 1321 1322 multiple_patterns.sort(key=lambda pattern: len(pattern[0]) * len(pattern[1])) 1323 1324 for pattern in multiple_patterns: 1325 if len(candidates) + len(pattern[0]) * len(pattern[1]) > 2 * ( 1326 len(lhs) + len(rhs) 1327 ): 1328 break 1329 for lhs_idx in pattern[0]: 1330 for rhs_idx in pattern[1]: 1331 candidates.append((lhs_idx, rhs_idx)) 1332 1333 if not candidates: 1334 # The LHS and RHS either share nothing in common, or lines are just too 1335 # identical. In that case, let's give up and not match anything. 1336 return [] 1337 1338 # Compute a maximal crossing-free matching via an algorithm that is 1339 # inspired by a mixture of dynamic programming and line-sweeping in 1340 # discrete geometry. 1341 # 1342 # I would be surprised if this algorithm didn't exist somewhere in the 1343 # literature, but I found it without consciously recalling any 1344 # references, so you'll have to make do with the explanation below. 1345 # Sorry. 1346 # 1347 # The underlying graph is bipartite: 1348 # - nodes on the LHS represent lines in the original check 1349 # - nodes on the RHS represent lines in the new (updated) check 1350 # 1351 # Nodes are implicitly sorted by the corresponding line number. 1352 # Edges (unique_matches) are sorted by the line number on the LHS. 1353 # 1354 # Here's the geometric intuition for the algorithm. 1355 # 1356 # * Plot the edges as points in the plane, with the original line 1357 # number on the X axis and the updated line number on the Y axis. 1358 # * The goal is to find a longest "chain" of points where each point 1359 # is strictly above and to the right of the previous point. 1360 # * The algorithm proceeds by sweeping a vertical line from left to 1361 # right. 1362 # * The algorithm maintains a table where `table[N]` answers the 1363 # question "What is currently the 'best' way to build a chain of N+1 1364 # points to the left of the vertical line". Here, 'best' means 1365 # that the last point of the chain is a as low as possible (minimal 1366 # Y coordinate). 1367 # * `table[N]` is `(y, point_idx)` where `point_idx` is the index of 1368 # the last point in the chain and `y` is its Y coordinate 1369 # * A key invariant is that the Y values in the table are 1370 # monotonically increasing 1371 # * Thanks to these properties, the table can be used to answer the 1372 # question "What is the longest chain that can be built to the left 1373 # of the vertical line using only points below a certain Y value", 1374 # using a binary search over the table. 1375 # * The algorithm also builds a backlink structure in which every point 1376 # links back to the previous point on a best (longest) chain ending 1377 # at that point 1378 # 1379 # The core loop of the algorithm sweeps the line and updates the table 1380 # and backlink structure for every point that we cross during the sweep. 1381 # Therefore, the algorithm is trivially O(M log M) in the number of 1382 # points. 1383 candidates.sort(key=lambda candidate: (candidate[0], -candidate[1])) 1384 1385 backlinks = [] 1386 table_rhs_idx = [] 1387 table_candidate_idx = [] 1388 for _, rhs_idx in candidates: 1389 candidate_idx = len(backlinks) 1390 ti = bisect.bisect_left(table_rhs_idx, rhs_idx) 1391 1392 # Update the table to record a best chain ending in the current point. 1393 # There always is one, and if any of the previously visited points had 1394 # a higher Y coordinate, then there is always a previously recorded best 1395 # chain that can be improved upon by using the current point. 1396 # 1397 # There is only one case where there is some ambiguity. If the 1398 # pre-existing entry table[ti] has the same Y coordinate / rhs_idx as 1399 # the current point (this can only happen if the same line appeared 1400 # multiple times on the LHS), then we could choose to keep the 1401 # previously recorded best chain instead. That would bias the algorithm 1402 # differently but should have no systematic impact on the quality of the 1403 # result. 1404 if ti < len(table_rhs_idx): 1405 table_rhs_idx[ti] = rhs_idx 1406 table_candidate_idx[ti] = candidate_idx 1407 else: 1408 table_rhs_idx.append(rhs_idx) 1409 table_candidate_idx.append(candidate_idx) 1410 if ti > 0: 1411 backlinks.append(table_candidate_idx[ti - 1]) 1412 else: 1413 backlinks.append(None) 1414 1415 # Commit to names in the matching by walking the backlinks. Recursively 1416 # attempt to fill in more matches in-between. 1417 match_idx = table_candidate_idx[-1] 1418 while match_idx is not None: 1419 current = candidates[match_idx] 1420 matches.append(current) 1421 match_idx = backlinks[match_idx] 1422 1423 matches.reverse() 1424 return matches 1425 1426 1427VARIABLE_TAG = "[[@@]]" 1428METAVAR_RE = re.compile(r"\[\[([A-Z0-9_]+)(?::[^]]+)?\]\]") 1429NUMERIC_SUFFIX_RE = re.compile(r"[0-9]*$") 1430 1431 1432class TestVar: 1433 def __init__(self, nameless_value: NamelessValue, prefix: str, suffix: str): 1434 self._nameless_value = nameless_value 1435 1436 self._prefix = prefix 1437 self._suffix = suffix 1438 1439 def seen(self, nameless_value: NamelessValue, prefix: str, suffix: str): 1440 if prefix != self._prefix: 1441 self._prefix = "" 1442 if suffix != self._suffix: 1443 self._suffix = "" 1444 1445 def get_variable_name(self, text): 1446 return self._nameless_value.get_value_name( 1447 text, self._nameless_value.check_prefix 1448 ) 1449 1450 def get_def(self, name, prefix, suffix): 1451 if self._nameless_value.is_number: 1452 return f"{prefix}[[#{name}:]]{suffix}" 1453 if self._prefix: 1454 assert self._prefix == prefix 1455 prefix = "" 1456 if self._suffix: 1457 assert self._suffix == suffix 1458 suffix = "" 1459 return f"{prefix}[[{name}:{self._prefix}{self._nameless_value.get_ir_regex()}{self._suffix}]]{suffix}" 1460 1461 def get_use(self, name, prefix, suffix): 1462 if self._nameless_value.is_number: 1463 return f"{prefix}[[#{name}]]{suffix}" 1464 if self._prefix: 1465 assert self._prefix == prefix 1466 prefix = "" 1467 if self._suffix: 1468 assert self._suffix == suffix 1469 suffix = "" 1470 return f"{prefix}[[{name}]]{suffix}" 1471 1472 1473class CheckValueInfo: 1474 def __init__( 1475 self, 1476 key, 1477 text, 1478 name: str, 1479 prefix: str, 1480 suffix: str, 1481 ): 1482 # Key for the value, e.g. '%' 1483 self.key = key 1484 1485 # Text to be matched by the FileCheck variable (without any prefix or suffix) 1486 self.text = text 1487 1488 # Name of the FileCheck variable 1489 self.name = name 1490 1491 # Prefix and suffix that were captured by the NamelessValue regular expression 1492 self.prefix = prefix 1493 self.suffix = suffix 1494 1495 1496# Represent a check line in a way that allows us to compare check lines while 1497# ignoring some or all of the FileCheck variable names. 1498class CheckLineInfo: 1499 def __init__(self, line, values): 1500 # Line with all FileCheck variable name occurrences replaced by VARIABLE_TAG 1501 self.line: str = line 1502 1503 # Information on each FileCheck variable name occurrences in the line 1504 self.values: List[CheckValueInfo] = values 1505 1506 def __repr__(self): 1507 return f"CheckLineInfo(line={self.line}, self.values={self.values})" 1508 1509 1510def remap_metavar_names( 1511 old_line_infos: List[CheckLineInfo], 1512 new_line_infos: List[CheckLineInfo], 1513 committed_names: Set[str], 1514) -> Mapping[str, str]: 1515 """ 1516 Map all FileCheck variable names that appear in new_line_infos to new 1517 FileCheck variable names in an attempt to reduce the diff from old_line_infos 1518 to new_line_infos. 1519 1520 This is done by: 1521 * Matching old check lines and new check lines using a diffing algorithm 1522 applied after replacing names with wildcards. 1523 * Committing to variable names such that the matched lines become equal 1524 (without wildcards) if possible 1525 * This is done recursively to handle cases where many lines are equal 1526 after wildcard replacement 1527 """ 1528 # Initialize uncommitted identity mappings 1529 new_mapping = {} 1530 for line in new_line_infos: 1531 for value in line.values: 1532 new_mapping[value.name] = value.name 1533 1534 # Recursively commit to the identity mapping or find a better one 1535 def recurse(old_begin, old_end, new_begin, new_end): 1536 if old_begin == old_end or new_begin == new_end: 1537 return 1538 1539 # Find a matching of lines where uncommitted names are replaced 1540 # with a placeholder. 1541 def diffify_line(line, mapper): 1542 values = [] 1543 for value in line.values: 1544 mapped = mapper(value.name) 1545 values.append(mapped if mapped in committed_names else "?") 1546 return line.line.strip() + " @@@ " + " @ ".join(values) 1547 1548 lhs_lines = [ 1549 diffify_line(line, lambda x: x) 1550 for line in old_line_infos[old_begin:old_end] 1551 ] 1552 rhs_lines = [ 1553 diffify_line(line, lambda x: new_mapping[x]) 1554 for line in new_line_infos[new_begin:new_end] 1555 ] 1556 1557 candidate_matches = find_diff_matching(lhs_lines, rhs_lines) 1558 1559 candidate_matches = [ 1560 (old_begin + lhs_idx, new_begin + rhs_idx) 1561 for lhs_idx, rhs_idx in candidate_matches 1562 ] 1563 1564 # Candidate matches may conflict if they require conflicting mappings of 1565 # names. We want to determine a large set of compatible candidates, 1566 # because that leads to a small diff. 1567 # 1568 # We think of the candidates as vertices in a conflict graph. The 1569 # conflict graph has edges between incompatible candidates. We want to 1570 # find a large independent set in this graph. 1571 # 1572 # Greedily selecting candidates and removing incompatible ones has the 1573 # disadvantage that making few bad decisions early on can have huge 1574 # consequences. 1575 # 1576 # Instead, we implicitly compute multiple independent sets by greedily 1577 # assigning a *coloring* to the conflict graph. Then, we select the 1578 # largest color class (which is the largest independent set we found), 1579 # commit to all candidates in it, and recurse. 1580 # 1581 # Note that we don't actually materialize the conflict graph. Instead, 1582 # each color class tracks the information needed to decide implicitly 1583 # whether a vertex conflicts (has an edge to) any of the vertices added 1584 # to the color class so far. 1585 class Color: 1586 def __init__(self): 1587 # (lhs_idx, rhs_idx) of matches in this color 1588 self.matches = [] 1589 1590 # rhs_name -> lhs_name mappings required by this color 1591 self.mapping = {} 1592 1593 # lhs_names committed for this color 1594 self.committed = set() 1595 1596 colors = [] 1597 1598 for lhs_idx, rhs_idx in candidate_matches: 1599 lhs_line = old_line_infos[lhs_idx] 1600 rhs_line = new_line_infos[rhs_idx] 1601 1602 # We scan through the uncommitted names in the candidate line and 1603 # filter out the color classes to which the candidate could be 1604 # assigned. 1605 # 1606 # Simultaneously, we prepare a new color class in case the candidate 1607 # conflicts with all colors that have been established so far. 1608 compatible_colors = colors[:] 1609 new_color = Color() 1610 new_color.matches.append((lhs_idx, rhs_idx)) 1611 1612 for lhs_value, rhs_value in zip(lhs_line.values, rhs_line.values): 1613 if new_mapping[rhs_value.name] in committed_names: 1614 # The new value has already been committed. If it was mapped 1615 # to the same name as the original value, we can consider 1616 # committing other values from this line. Otherwise, we 1617 # should ignore this line. 1618 if new_mapping[rhs_value.name] == lhs_value.name: 1619 continue 1620 else: 1621 break 1622 1623 if rhs_value.name in new_color.mapping: 1624 # Same, but for a possible commit happening on the same line 1625 if new_color.mapping[rhs_value.name] == lhs_value.name: 1626 continue 1627 else: 1628 break 1629 1630 if ( 1631 lhs_value.name in committed_names 1632 or lhs_value.name in new_color.committed 1633 ): 1634 # We can't map this value because the name we would map it 1635 # to has already been committed for something else. Give up 1636 # on this line. 1637 break 1638 1639 new_color.mapping[rhs_value.name] = lhs_value.name 1640 new_color.committed.add(lhs_value.name) 1641 1642 color_idx = 0 1643 while color_idx < len(compatible_colors): 1644 color = compatible_colors[color_idx] 1645 compatible = True 1646 if rhs_value.name in color.mapping: 1647 compatible = color.mapping[rhs_value.name] == lhs_value.name 1648 else: 1649 compatible = lhs_value.name not in color.committed 1650 if compatible: 1651 color_idx += 1 1652 else: 1653 del compatible_colors[color_idx] 1654 else: 1655 # We never broke out of the loop, which means that at a minimum, 1656 # this line is viable standalone 1657 if compatible_colors: 1658 color = max(compatible_colors, key=lambda color: len(color.matches)) 1659 color.mapping.update(new_color.mapping) 1660 color.committed.update(new_color.committed) 1661 color.matches.append((lhs_idx, rhs_idx)) 1662 else: 1663 colors.append(new_color) 1664 1665 if colors: 1666 # Pick the largest color class. This gives us a large independent 1667 # (non-conflicting) set of candidate matches. Assign all names 1668 # required by the independent set and recurse. 1669 max_color = max(colors, key=lambda color: len(color.matches)) 1670 1671 for rhs_var, lhs_var in max_color.mapping.items(): 1672 new_mapping[rhs_var] = lhs_var 1673 committed_names.add(lhs_var) 1674 1675 if ( 1676 lhs_var != rhs_var 1677 and lhs_var in new_mapping 1678 and new_mapping[lhs_var] == lhs_var 1679 ): 1680 new_mapping[lhs_var] = "conflict_" + lhs_var 1681 1682 matches = ( 1683 [(old_begin - 1, new_begin - 1)] 1684 + max_color.matches 1685 + [(old_end, new_end)] 1686 ) 1687 1688 for (lhs_prev, rhs_prev), (lhs_next, rhs_next) in zip(matches, matches[1:]): 1689 recurse(lhs_prev + 1, lhs_next, rhs_prev + 1, rhs_next) 1690 1691 recurse(0, len(old_line_infos), 0, len(new_line_infos)) 1692 1693 # Commit to remaining names and resolve conflicts 1694 for new_name, mapped_name in new_mapping.items(): 1695 if mapped_name in committed_names: 1696 continue 1697 if not mapped_name.startswith("conflict_"): 1698 assert mapped_name == new_name 1699 committed_names.add(mapped_name) 1700 1701 for new_name, mapped_name in new_mapping.items(): 1702 if mapped_name in committed_names: 1703 continue 1704 assert mapped_name.startswith("conflict_") 1705 1706 m = NUMERIC_SUFFIX_RE.search(new_name) 1707 base_name = new_name[: m.start()] 1708 suffix = int(new_name[m.start() :]) if m.start() != m.end() else 1 1709 while True: 1710 candidate = f"{base_name}{suffix}" 1711 if candidate not in committed_names: 1712 new_mapping[new_name] = candidate 1713 committed_names.add(candidate) 1714 break 1715 suffix += 1 1716 1717 return new_mapping 1718 1719 1720def generalize_check_lines( 1721 lines, 1722 ginfo: GeneralizerInfo, 1723 vars_seen, 1724 global_vars_seen, 1725 preserve_names=False, 1726 original_check_lines=None, 1727 *, 1728 unstable_globals_only=False, 1729): 1730 if unstable_globals_only: 1731 regexp = ginfo.get_unstable_globals_regexp() 1732 else: 1733 regexp = ginfo.get_regexp() 1734 1735 multiple_braces_re = re.compile(r"({{+)|(}}+)") 1736 1737 def escape_braces(match_obj): 1738 return "{{" + re.escape(match_obj.group(0)) + "}}" 1739 1740 if ginfo.is_ir(): 1741 for i, line in enumerate(lines): 1742 # An IR variable named '%.' matches the FileCheck regex string. 1743 line = line.replace("%.", "%dot") 1744 for regex in _global_hex_value_regex: 1745 if re.match("^@" + regex + " = ", line): 1746 line = re.sub( 1747 r"\bi([0-9]+) ([0-9]+)", 1748 lambda m: "i" 1749 + m.group(1) 1750 + " [[#" 1751 + hex(int(m.group(2))) 1752 + "]]", 1753 line, 1754 ) 1755 break 1756 # Ignore any comments, since the check lines will too. 1757 scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line) 1758 lines[i] = scrubbed_line 1759 1760 if not preserve_names: 1761 committed_names = set( 1762 test_var.get_variable_name(name) 1763 for (name, _), test_var in vars_seen.items() 1764 ) 1765 defs = set() 1766 1767 # Collect information about new check lines, and generalize global reference 1768 new_line_infos = [] 1769 for line in lines: 1770 filtered_line = "" 1771 values = [] 1772 while True: 1773 m = regexp.search(line) 1774 if m is None: 1775 filtered_line += line 1776 break 1777 1778 name = ginfo.get_name_from_match(m) 1779 nameless_value = ginfo.get_nameless_value_from_match(m) 1780 prefix, suffix = nameless_value.get_affixes_from_match(m) 1781 if may_clash_with_default_check_prefix_name( 1782 nameless_value.check_prefix, name 1783 ): 1784 warn( 1785 "Change IR value name '%s' or use --prefix-filecheck-ir-name to prevent possible conflict" 1786 " with scripted FileCheck name." % (name,) 1787 ) 1788 1789 # Record the variable as seen and (for locals) accumulate 1790 # prefixes/suffixes 1791 is_local_def = nameless_value.is_local_def_ir_value() 1792 if is_local_def: 1793 vars_dict = vars_seen 1794 else: 1795 vars_dict = global_vars_seen 1796 1797 key = (name, nameless_value.check_key) 1798 1799 if is_local_def: 1800 test_prefix = prefix 1801 test_suffix = suffix 1802 else: 1803 test_prefix = "" 1804 test_suffix = "" 1805 1806 if key in vars_dict: 1807 vars_dict[key].seen(nameless_value, test_prefix, test_suffix) 1808 else: 1809 vars_dict[key] = TestVar(nameless_value, test_prefix, test_suffix) 1810 defs.add(key) 1811 1812 var = vars_dict[key].get_variable_name(name) 1813 1814 # Replace with a [[@@]] tag, but be sure to keep the spaces and commas. 1815 filtered_line += ( 1816 line[: m.start()] + m.group(1) + VARIABLE_TAG + m.group(m.lastindex) 1817 ) 1818 line = line[m.end() :] 1819 1820 values.append( 1821 CheckValueInfo( 1822 key=nameless_value.check_key, 1823 text=name, 1824 name=var, 1825 prefix=prefix, 1826 suffix=suffix, 1827 ) 1828 ) 1829 1830 new_line_infos.append(CheckLineInfo(filtered_line, values)) 1831 1832 committed_names.update( 1833 test_var.get_variable_name(name) 1834 for (name, _), test_var in global_vars_seen.items() 1835 ) 1836 1837 # Collect information about original check lines, if any. 1838 orig_line_infos = [] 1839 for line in original_check_lines or []: 1840 filtered_line = "" 1841 values = [] 1842 while True: 1843 m = METAVAR_RE.search(line) 1844 if m is None: 1845 filtered_line += line 1846 break 1847 1848 # Replace with a [[@@]] tag, but be sure to keep the spaces and commas. 1849 filtered_line += line[: m.start()] + VARIABLE_TAG 1850 line = line[m.end() :] 1851 values.append( 1852 CheckValueInfo( 1853 key=None, 1854 text=None, 1855 name=m.group(1), 1856 prefix="", 1857 suffix="", 1858 ) 1859 ) 1860 orig_line_infos.append(CheckLineInfo(filtered_line, values)) 1861 1862 # Compute the variable name mapping 1863 mapping = remap_metavar_names(orig_line_infos, new_line_infos, committed_names) 1864 1865 # Apply the variable name mapping 1866 for i, line_info in enumerate(new_line_infos): 1867 line_template = line_info.line 1868 line = "" 1869 1870 for value in line_info.values: 1871 idx = line_template.find(VARIABLE_TAG) 1872 line += line_template[:idx] 1873 line_template = line_template[idx + len(VARIABLE_TAG) :] 1874 1875 key = (value.text, value.key) 1876 if value.key == "%": 1877 vars_dict = vars_seen 1878 else: 1879 vars_dict = global_vars_seen 1880 1881 if key in defs: 1882 line += vars_dict[key].get_def( 1883 mapping[value.name], value.prefix, value.suffix 1884 ) 1885 defs.remove(key) 1886 else: 1887 line += vars_dict[key].get_use( 1888 mapping[value.name], value.prefix, value.suffix 1889 ) 1890 1891 line += line_template 1892 1893 lines[i] = line 1894 1895 if ginfo.is_analyze(): 1896 for i, _ in enumerate(lines): 1897 # Escape multiple {{ or }} as {{}} denotes a FileCheck regex. 1898 scrubbed_line = multiple_braces_re.sub(escape_braces, lines[i]) 1899 lines[i] = scrubbed_line 1900 1901 return lines 1902 1903 1904def add_checks( 1905 output_lines, 1906 comment_marker, 1907 prefix_list, 1908 func_dict, 1909 func_name, 1910 check_label_format, 1911 ginfo, 1912 global_vars_seen_dict, 1913 is_filtered, 1914 preserve_names=False, 1915 original_check_lines: Mapping[str, List[str]] = {}, 1916): 1917 # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well. 1918 prefix_exclusions = set() 1919 printed_prefixes = [] 1920 for p in prefix_list: 1921 checkprefixes = p[0] 1922 # If not all checkprefixes of this run line produced the function we cannot check for it as it does not 1923 # exist for this run line. A subset of the check prefixes might know about the function but only because 1924 # other run lines created it. 1925 if any( 1926 map( 1927 lambda checkprefix: func_name not in func_dict[checkprefix], 1928 checkprefixes, 1929 ) 1930 ): 1931 prefix_exclusions |= set(checkprefixes) 1932 continue 1933 1934 # prefix_exclusions is constructed, we can now emit the output 1935 for p in prefix_list: 1936 global_vars_seen = {} 1937 checkprefixes = p[0] 1938 for checkprefix in checkprefixes: 1939 if checkprefix in global_vars_seen_dict: 1940 global_vars_seen.update(global_vars_seen_dict[checkprefix]) 1941 else: 1942 global_vars_seen_dict[checkprefix] = {} 1943 if checkprefix in printed_prefixes: 1944 break 1945 1946 # Check if the prefix is excluded. 1947 if checkprefix in prefix_exclusions: 1948 continue 1949 1950 # If we do not have output for this prefix we skip it. 1951 if not func_dict[checkprefix][func_name]: 1952 continue 1953 1954 # Add some space between different check prefixes, but not after the last 1955 # check line (before the test code). 1956 if ginfo.is_asm(): 1957 if len(printed_prefixes) != 0: 1958 output_lines.append(comment_marker) 1959 1960 if checkprefix not in global_vars_seen_dict: 1961 global_vars_seen_dict[checkprefix] = {} 1962 1963 global_vars_seen_before = [key for key in global_vars_seen.keys()] 1964 1965 vars_seen = {} 1966 printed_prefixes.append(checkprefix) 1967 attrs = str(func_dict[checkprefix][func_name].attrs) 1968 attrs = "" if attrs == "None" else attrs 1969 if ginfo.get_version() > 1: 1970 funcdef_attrs_and_ret = func_dict[checkprefix][ 1971 func_name 1972 ].funcdef_attrs_and_ret 1973 else: 1974 funcdef_attrs_and_ret = "" 1975 1976 if attrs: 1977 output_lines.append( 1978 "%s %s: Function Attrs: %s" % (comment_marker, checkprefix, attrs) 1979 ) 1980 args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig) 1981 if args_and_sig: 1982 args_and_sig = generalize_check_lines( 1983 [args_and_sig], 1984 ginfo, 1985 vars_seen, 1986 global_vars_seen, 1987 preserve_names, 1988 original_check_lines=[], 1989 )[0] 1990 func_name_separator = func_dict[checkprefix][func_name].func_name_separator 1991 if "[[" in args_and_sig: 1992 # Captures in label lines are not supported, thus split into a -LABEL 1993 # and a separate -SAME line that contains the arguments with captures. 1994 args_and_sig_prefix = "" 1995 if ginfo.get_version() >= 3 and args_and_sig.startswith("("): 1996 # Ensure the "(" separating function name and arguments is in the 1997 # label line. This is required in case of function names that are 1998 # prefixes of each other. Otherwise, the label line for "foo" might 1999 # incorrectly match on "foo.specialized". 2000 args_and_sig_prefix = args_and_sig[0] 2001 args_and_sig = args_and_sig[1:] 2002 2003 # Removing args_and_sig from the label match line requires 2004 # func_name_separator to be empty. Otherwise, the match will not work. 2005 assert func_name_separator == "" 2006 output_lines.append( 2007 check_label_format 2008 % ( 2009 checkprefix, 2010 funcdef_attrs_and_ret, 2011 func_name, 2012 args_and_sig_prefix, 2013 func_name_separator, 2014 ) 2015 ) 2016 output_lines.append( 2017 "%s %s-SAME: %s" % (comment_marker, checkprefix, args_and_sig) 2018 ) 2019 else: 2020 output_lines.append( 2021 check_label_format 2022 % ( 2023 checkprefix, 2024 funcdef_attrs_and_ret, 2025 func_name, 2026 args_and_sig, 2027 func_name_separator, 2028 ) 2029 ) 2030 func_body = str(func_dict[checkprefix][func_name]).splitlines() 2031 if not func_body: 2032 # We have filtered everything. 2033 continue 2034 2035 # For ASM output, just emit the check lines. 2036 if ginfo.is_asm(): 2037 body_start = 1 2038 if is_filtered: 2039 # For filtered output we don't add "-NEXT" so don't add extra spaces 2040 # before the first line. 2041 body_start = 0 2042 else: 2043 output_lines.append( 2044 "%s %s: %s" % (comment_marker, checkprefix, func_body[0]) 2045 ) 2046 func_lines = generalize_check_lines( 2047 func_body[body_start:], ginfo, vars_seen, global_vars_seen 2048 ) 2049 for func_line in func_lines: 2050 if func_line.strip() == "": 2051 output_lines.append( 2052 "%s %s-EMPTY:" % (comment_marker, checkprefix) 2053 ) 2054 else: 2055 check_suffix = "-NEXT" if not is_filtered else "" 2056 output_lines.append( 2057 "%s %s%s: %s" 2058 % (comment_marker, checkprefix, check_suffix, func_line) 2059 ) 2060 # Remember new global variables we have not seen before 2061 for key in global_vars_seen: 2062 if key not in global_vars_seen_before: 2063 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 2064 break 2065 # For analyze output, generalize the output, and emit CHECK-EMPTY lines as well. 2066 elif ginfo.is_analyze(): 2067 func_body = generalize_check_lines( 2068 func_body, ginfo, vars_seen, global_vars_seen 2069 ) 2070 for func_line in func_body: 2071 if func_line.strip() == "": 2072 output_lines.append( 2073 "{} {}-EMPTY:".format(comment_marker, checkprefix) 2074 ) 2075 else: 2076 check_suffix = "-NEXT" if not is_filtered else "" 2077 output_lines.append( 2078 "{} {}{}: {}".format( 2079 comment_marker, checkprefix, check_suffix, func_line 2080 ) 2081 ) 2082 2083 # Add space between different check prefixes and also before the first 2084 # line of code in the test function. 2085 output_lines.append(comment_marker) 2086 2087 # Remember new global variables we have not seen before 2088 for key in global_vars_seen: 2089 if key not in global_vars_seen_before: 2090 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 2091 break 2092 # For IR output, change all defs to FileCheck variables, so we're immune 2093 # to variable naming fashions. 2094 else: 2095 func_body = generalize_check_lines( 2096 func_body, 2097 ginfo, 2098 vars_seen, 2099 global_vars_seen, 2100 preserve_names, 2101 original_check_lines=original_check_lines.get(checkprefix), 2102 ) 2103 2104 # This could be selectively enabled with an optional invocation argument. 2105 # Disabled for now: better to check everything. Be safe rather than sorry. 2106 2107 # Handle the first line of the function body as a special case because 2108 # it's often just noise (a useless asm comment or entry label). 2109 # if func_body[0].startswith("#") or func_body[0].startswith("entry:"): 2110 # is_blank_line = True 2111 # else: 2112 # output_lines.append('%s %s: %s' % (comment_marker, checkprefix, func_body[0])) 2113 # is_blank_line = False 2114 2115 is_blank_line = False 2116 2117 for func_line in func_body: 2118 if func_line.strip() == "": 2119 is_blank_line = True 2120 continue 2121 # Do not waste time checking IR comments. 2122 func_line = SCRUB_IR_COMMENT_RE.sub(r"", func_line) 2123 2124 # Skip blank lines instead of checking them. 2125 if is_blank_line: 2126 output_lines.append( 2127 "{} {}: {}".format( 2128 comment_marker, checkprefix, func_line 2129 ) 2130 ) 2131 else: 2132 check_suffix = "-NEXT" if not is_filtered else "" 2133 output_lines.append( 2134 "{} {}{}: {}".format( 2135 comment_marker, checkprefix, check_suffix, func_line 2136 ) 2137 ) 2138 is_blank_line = False 2139 2140 # Add space between different check prefixes and also before the first 2141 # line of code in the test function. 2142 output_lines.append(comment_marker) 2143 2144 # Remember new global variables we have not seen before 2145 for key in global_vars_seen: 2146 if key not in global_vars_seen_before: 2147 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 2148 break 2149 return printed_prefixes 2150 2151 2152def add_ir_checks( 2153 output_lines, 2154 comment_marker, 2155 prefix_list, 2156 func_dict, 2157 func_name, 2158 preserve_names, 2159 function_sig, 2160 ginfo: GeneralizerInfo, 2161 global_vars_seen_dict, 2162 is_filtered, 2163 original_check_lines={}, 2164): 2165 assert ginfo.is_ir() 2166 # Label format is based on IR string. 2167 if function_sig and ginfo.get_version() > 1: 2168 function_def_regex = "define %s" 2169 elif function_sig: 2170 function_def_regex = "define {{[^@]+}}%s" 2171 else: 2172 function_def_regex = "%s" 2173 check_label_format = "{} %s-LABEL: {}@%s%s%s".format( 2174 comment_marker, function_def_regex 2175 ) 2176 return add_checks( 2177 output_lines, 2178 comment_marker, 2179 prefix_list, 2180 func_dict, 2181 func_name, 2182 check_label_format, 2183 ginfo, 2184 global_vars_seen_dict, 2185 is_filtered, 2186 preserve_names, 2187 original_check_lines=original_check_lines, 2188 ) 2189 2190 2191def add_analyze_checks( 2192 output_lines, 2193 comment_marker, 2194 prefix_list, 2195 func_dict, 2196 func_name, 2197 ginfo: GeneralizerInfo, 2198 is_filtered, 2199): 2200 assert ginfo.is_analyze() 2201 check_label_format = "{} %s-LABEL: '%s%s%s%s'".format(comment_marker) 2202 global_vars_seen_dict = {} 2203 return add_checks( 2204 output_lines, 2205 comment_marker, 2206 prefix_list, 2207 func_dict, 2208 func_name, 2209 check_label_format, 2210 ginfo, 2211 global_vars_seen_dict, 2212 is_filtered, 2213 ) 2214 2215 2216def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes, ginfo): 2217 for nameless_value in ginfo.get_nameless_values(): 2218 if nameless_value.global_ir_rhs_regexp is None: 2219 continue 2220 2221 lhs_re_str = nameless_value.ir_prefix + nameless_value.ir_regexp 2222 rhs_re_str = nameless_value.global_ir_rhs_regexp 2223 2224 global_ir_value_re_str = r"^" + lhs_re_str + r"\s=\s" + rhs_re_str + r"$" 2225 global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M)) 2226 lines = [] 2227 for m in global_ir_value_re.finditer(raw_tool_output): 2228 # Attach the substring's start index so that CHECK lines 2229 # can be sorted properly even if they are matched by different nameless values. 2230 # This is relevant for GLOB and GLOBNAMED since they may appear interlaced. 2231 lines.append((m.start(), m.group(0))) 2232 2233 for prefix in prefixes: 2234 if glob_val_dict[prefix] is None: 2235 continue 2236 if nameless_value.check_prefix in glob_val_dict[prefix]: 2237 if lines == glob_val_dict[prefix][nameless_value.check_prefix]: 2238 continue 2239 if prefix == prefixes[-1]: 2240 warn("Found conflicting asm under the same prefix: %r!" % (prefix,)) 2241 else: 2242 glob_val_dict[prefix][nameless_value.check_prefix] = None 2243 continue 2244 glob_val_dict[prefix][nameless_value.check_prefix] = lines 2245 2246 2247def filter_globals_according_to_preference( 2248 global_val_lines_w_index, global_vars_seen, nameless_value, global_check_setting 2249): 2250 if global_check_setting == "none": 2251 return [] 2252 if global_check_setting == "all": 2253 return global_val_lines_w_index 2254 assert global_check_setting == "smart" 2255 2256 if nameless_value.check_key == "#": 2257 # attribute sets are usually better checked by --check-attributes 2258 return [] 2259 2260 def extract(line, nv): 2261 p = ( 2262 "^" 2263 + nv.ir_prefix 2264 + "(" 2265 + nv.ir_regexp 2266 + ") = (" 2267 + nv.global_ir_rhs_regexp 2268 + ")" 2269 ) 2270 match = re.match(p, line) 2271 return (match.group(1), re.findall(nv.ir_regexp, match.group(2))) 2272 2273 transitively_visible = set() 2274 contains_refs_to = {} 2275 2276 def add(var): 2277 nonlocal transitively_visible 2278 nonlocal contains_refs_to 2279 if var in transitively_visible: 2280 return 2281 transitively_visible.add(var) 2282 if not var in contains_refs_to: 2283 return 2284 for x in contains_refs_to[var]: 2285 add(x) 2286 2287 for i, line in global_val_lines_w_index: 2288 (var, refs) = extract(line, nameless_value) 2289 contains_refs_to[var] = refs 2290 for var, check_key in global_vars_seen: 2291 if check_key != nameless_value.check_key: 2292 continue 2293 add(var) 2294 return [ 2295 (i, line) 2296 for i, line in global_val_lines_w_index 2297 if extract(line, nameless_value)[0] in transitively_visible 2298 ] 2299 2300 2301METADATA_FILTERS = [ 2302 ( 2303 r"(?<=\")(.+ )?(\w+ version )[\d.]+(?:[^\" ]*)(?: \([^)]+\))?", 2304 r"{{.*}}\2{{.*}}", 2305 ), # preface with glob also, to capture optional CLANG_VENDOR 2306 (r'(!DIFile\(filename: ".+", directory: )".+"', r"\1{{.*}}"), 2307] 2308METADATA_FILTERS_RE = [(re.compile(f), r) for (f, r) in METADATA_FILTERS] 2309 2310 2311def filter_unstable_metadata(line): 2312 for f, replacement in METADATA_FILTERS_RE: 2313 line = f.sub(replacement, line) 2314 return line 2315 2316 2317def flush_current_checks(output_lines, new_lines_w_index, comment_marker): 2318 if not new_lines_w_index: 2319 return 2320 output_lines.append(comment_marker + SEPARATOR) 2321 new_lines_w_index.sort() 2322 for _, line in new_lines_w_index: 2323 output_lines.append(line) 2324 new_lines_w_index.clear() 2325 2326 2327def add_global_checks( 2328 glob_val_dict, 2329 comment_marker, 2330 prefix_list, 2331 output_lines, 2332 ginfo: GeneralizerInfo, 2333 global_vars_seen_dict, 2334 preserve_names, 2335 is_before_functions, 2336 global_check_setting, 2337): 2338 printed_prefixes = set() 2339 output_lines_loc = {} # Allows GLOB and GLOBNAMED to be sorted correctly 2340 for nameless_value in ginfo.get_nameless_values(): 2341 if nameless_value.global_ir_rhs_regexp is None: 2342 continue 2343 if nameless_value.is_before_functions != is_before_functions: 2344 continue 2345 for p in prefix_list: 2346 global_vars_seen = {} 2347 checkprefixes = p[0] 2348 if checkprefixes is None: 2349 continue 2350 for checkprefix in checkprefixes: 2351 if checkprefix in global_vars_seen_dict: 2352 global_vars_seen.update(global_vars_seen_dict[checkprefix]) 2353 else: 2354 global_vars_seen_dict[checkprefix] = {} 2355 if (checkprefix, nameless_value.check_prefix) in printed_prefixes: 2356 break 2357 if not glob_val_dict[checkprefix]: 2358 continue 2359 if nameless_value.check_prefix not in glob_val_dict[checkprefix]: 2360 continue 2361 if not glob_val_dict[checkprefix][nameless_value.check_prefix]: 2362 continue 2363 2364 check_lines = [] 2365 global_vars_seen_before = [key for key in global_vars_seen.keys()] 2366 lines_w_index = glob_val_dict[checkprefix][nameless_value.check_prefix] 2367 lines_w_index = filter_globals_according_to_preference( 2368 lines_w_index, 2369 global_vars_seen_before, 2370 nameless_value, 2371 global_check_setting, 2372 ) 2373 for i, line in lines_w_index: 2374 if _global_value_regex: 2375 matched = False 2376 for regex in _global_value_regex: 2377 if re.match("^@" + regex + " = ", line) or re.match( 2378 "^!" + regex + " = ", line 2379 ): 2380 matched = True 2381 break 2382 if not matched: 2383 continue 2384 [new_line] = generalize_check_lines( 2385 [line], 2386 ginfo, 2387 {}, 2388 global_vars_seen, 2389 preserve_names, 2390 unstable_globals_only=True, 2391 ) 2392 new_line = filter_unstable_metadata(new_line) 2393 check_line = "%s %s: %s" % (comment_marker, checkprefix, new_line) 2394 check_lines.append((i, check_line)) 2395 if not check_lines: 2396 continue 2397 2398 if not checkprefix in output_lines_loc: 2399 output_lines_loc[checkprefix] = [] 2400 if not nameless_value.interlaced_with_previous: 2401 flush_current_checks( 2402 output_lines, output_lines_loc[checkprefix], comment_marker 2403 ) 2404 for check_line in check_lines: 2405 output_lines_loc[checkprefix].append(check_line) 2406 2407 printed_prefixes.add((checkprefix, nameless_value.check_prefix)) 2408 2409 # Remembe new global variables we have not seen before 2410 for key in global_vars_seen: 2411 if key not in global_vars_seen_before: 2412 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 2413 break 2414 2415 if printed_prefixes: 2416 for p in prefix_list: 2417 if p[0] is None: 2418 continue 2419 for checkprefix in p[0]: 2420 if checkprefix not in output_lines_loc: 2421 continue 2422 flush_current_checks( 2423 output_lines, output_lines_loc[checkprefix], comment_marker 2424 ) 2425 break 2426 output_lines.append(comment_marker + SEPARATOR) 2427 return printed_prefixes 2428 2429 2430def check_prefix(prefix): 2431 if not PREFIX_RE.match(prefix): 2432 hint = "" 2433 if "," in prefix: 2434 hint = " Did you mean '--check-prefixes=" + prefix + "'?" 2435 warn( 2436 ( 2437 "Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." 2438 + hint 2439 ) 2440 % (prefix) 2441 ) 2442 2443 2444def get_check_prefixes(filecheck_cmd): 2445 check_prefixes = [ 2446 item 2447 for m in CHECK_PREFIX_RE.finditer(filecheck_cmd) 2448 for item in m.group(1).split(",") 2449 ] 2450 if not check_prefixes: 2451 check_prefixes = ["CHECK"] 2452 return check_prefixes 2453 2454 2455def verify_filecheck_prefixes(fc_cmd): 2456 fc_cmd_parts = fc_cmd.split() 2457 for part in fc_cmd_parts: 2458 if "check-prefix=" in part: 2459 prefix = part.split("=", 1)[1] 2460 check_prefix(prefix) 2461 elif "check-prefixes=" in part: 2462 prefixes = part.split("=", 1)[1].split(",") 2463 for prefix in prefixes: 2464 check_prefix(prefix) 2465 if prefixes.count(prefix) > 1: 2466 warn( 2467 "Supplied prefix '%s' is not unique in the prefix list." 2468 % (prefix,) 2469 ) 2470 2471 2472def get_autogennote_suffix(parser, args): 2473 autogenerated_note_args = "" 2474 for action in parser._actions: 2475 if not hasattr(args, action.dest): 2476 continue # Ignore options such as --help that aren't included in args 2477 # Ignore parameters such as paths to the binary or the list of tests 2478 if action.dest in ( 2479 "tests", 2480 "update_only", 2481 "tool_binary", 2482 "opt_binary", 2483 "llc_binary", 2484 "clang", 2485 "opt", 2486 "llvm_bin", 2487 "verbose", 2488 "force_update", 2489 "reset_variable_names", 2490 "llvm_mc_binary", 2491 ): 2492 continue 2493 value = getattr(args, action.dest) 2494 if action.dest == "check_globals": 2495 default_value = "none" if args.version < 4 else "smart" 2496 if value == default_value: 2497 continue 2498 autogenerated_note_args += action.option_strings[0] + " " 2499 if args.version < 4 and value == "all": 2500 continue 2501 autogenerated_note_args += "%s " % value 2502 continue 2503 if action.const is not None: # action stores a constant (usually True/False) 2504 # Skip actions with different constant values (this happens with boolean 2505 # --foo/--no-foo options) 2506 if value != action.const: 2507 continue 2508 if parser.get_default(action.dest) == value: 2509 continue # Don't add default values 2510 if action.dest == "function_signature" and args.version >= 2: 2511 continue # Enabled by default in version 2 2512 if action.dest == "filters": 2513 # Create a separate option for each filter element. The value is a list 2514 # of Filter objects. 2515 for elem in value: 2516 opt_name = "filter-out" if elem.is_filter_out else "filter" 2517 opt_value = elem.pattern() 2518 new_arg = '--%s "%s" ' % (opt_name, opt_value.strip('"')) 2519 if new_arg not in autogenerated_note_args: 2520 autogenerated_note_args += new_arg 2521 else: 2522 autogenerated_note_args += action.option_strings[0] + " " 2523 if action.const is None: # action takes a parameter 2524 if action.nargs == "+": 2525 value = " ".join(map(lambda v: '"' + v.strip('"') + '"', value)) 2526 autogenerated_note_args += "%s " % value 2527 if autogenerated_note_args: 2528 autogenerated_note_args = " %s %s" % ( 2529 UTC_ARGS_KEY, 2530 autogenerated_note_args[:-1], 2531 ) 2532 return autogenerated_note_args 2533 2534 2535def check_for_command(line, parser, args, argv, argparse_callback): 2536 cmd_m = UTC_ARGS_CMD.match(line) 2537 if cmd_m: 2538 for option in shlex.split(cmd_m.group("cmd").strip()): 2539 if option: 2540 argv.append(option) 2541 args = parse_args(parser, filter(lambda arg: arg not in args.tests, argv)) 2542 if argparse_callback is not None: 2543 argparse_callback(args) 2544 return args, argv 2545 2546 2547def find_arg_in_test(test_info, get_arg_to_check, arg_string, is_global): 2548 result = get_arg_to_check(test_info.args) 2549 if not result and is_global: 2550 # See if this has been specified via UTC_ARGS. This is a "global" option 2551 # that affects the entire generation of test checks. If it exists anywhere 2552 # in the test, apply it to everything. 2553 saw_line = False 2554 for line_info in test_info.ro_iterlines(): 2555 line = line_info.line 2556 if not line.startswith(";") and line.strip() != "": 2557 saw_line = True 2558 result = get_arg_to_check(line_info.args) 2559 if result: 2560 if warn and saw_line: 2561 # We saw the option after already reading some test input lines. 2562 # Warn about it. 2563 print( 2564 "WARNING: Found {} in line following test start: ".format( 2565 arg_string 2566 ) 2567 + line, 2568 file=sys.stderr, 2569 ) 2570 print( 2571 "WARNING: Consider moving {} to top of file".format(arg_string), 2572 file=sys.stderr, 2573 ) 2574 break 2575 return result 2576 2577 2578def dump_input_lines(output_lines, test_info, prefix_set, comment_string): 2579 for input_line_info in test_info.iterlines(output_lines): 2580 line = input_line_info.line 2581 args = input_line_info.args 2582 if line.strip() == comment_string: 2583 continue 2584 if line.strip() == comment_string + SEPARATOR: 2585 continue 2586 if line.lstrip().startswith(comment_string): 2587 m = CHECK_RE.match(line) 2588 if m and m.group(1) in prefix_set: 2589 continue 2590 output_lines.append(line.rstrip("\n")) 2591 2592 2593def add_checks_at_end( 2594 output_lines, prefix_list, func_order, comment_string, check_generator 2595): 2596 added = set() 2597 generated_prefixes = set() 2598 for prefix in prefix_list: 2599 prefixes = prefix[0] 2600 tool_args = prefix[1] 2601 for prefix in prefixes: 2602 for func in func_order[prefix]: 2603 # The func order can contain the same functions multiple times. 2604 # If we see one again we are done. 2605 if (func, prefix) in added: 2606 continue 2607 if added: 2608 output_lines.append(comment_string) 2609 2610 # The add_*_checks routines expect a run list whose items are 2611 # tuples that have a list of prefixes as their first element and 2612 # tool command args string as their second element. They output 2613 # checks for each prefix in the list of prefixes. By doing so, it 2614 # implicitly assumes that for each function every run line will 2615 # generate something for that function. That is not the case for 2616 # generated functions as some run lines might not generate them 2617 # (e.g. -fopenmp vs. no -fopenmp). 2618 # 2619 # Therefore, pass just the prefix we're interested in. This has 2620 # the effect of generating all of the checks for functions of a 2621 # single prefix before moving on to the next prefix. So checks 2622 # are ordered by prefix instead of by function as in "normal" 2623 # mode. 2624 for generated_prefix in check_generator( 2625 output_lines, [([prefix], tool_args)], func 2626 ): 2627 added.add((func, generated_prefix)) 2628 generated_prefixes.add(generated_prefix) 2629 return generated_prefixes 2630