1from __future__ import print_function 2 3import argparse 4import copy 5import glob 6import itertools 7import os 8import re 9import subprocess 10import sys 11import shlex 12 13from typing import List 14 15##### Common utilities for update_*test_checks.py 16 17 18_verbose = False 19_prefix_filecheck_ir_name = "" 20 21""" 22Version changelog: 23 241: Initial version, used by tests that don't specify --version explicitly. 252: --function-signature is now enabled by default and also checks return 26 type/attributes. 273: Opening parenthesis of function args is kept on the first LABEL line 28 in case arguments are split to a separate SAME line. 294: --check-globals now has a third option ('smart'). The others are now called 30 'none' and 'all'. 'smart' is the default. 31""" 32DEFAULT_VERSION = 4 33 34 35SUPPORTED_ANALYSES = { 36 "Branch Probability Analysis", 37 "Cost Model Analysis", 38 "Loop Access Analysis", 39 "Scalar Evolution Analysis", 40} 41 42 43class Regex(object): 44 """Wrap a compiled regular expression object to allow deep copy of a regexp. 45 This is required for the deep copy done in do_scrub. 46 47 """ 48 49 def __init__(self, regex): 50 self.regex = regex 51 52 def __deepcopy__(self, memo): 53 result = copy.copy(self) 54 result.regex = self.regex 55 return result 56 57 def search(self, line): 58 return self.regex.search(line) 59 60 def sub(self, repl, line): 61 return self.regex.sub(repl, line) 62 63 def pattern(self): 64 return self.regex.pattern 65 66 def flags(self): 67 return self.regex.flags 68 69 70class Filter(Regex): 71 """Augment a Regex object with a flag indicating whether a match should be 72 added (!is_filter_out) or removed (is_filter_out) from the generated checks. 73 74 """ 75 76 def __init__(self, regex, is_filter_out): 77 super(Filter, self).__init__(regex) 78 self.is_filter_out = is_filter_out 79 80 def __deepcopy__(self, memo): 81 result = copy.deepcopy(super(Filter, self), memo) 82 result.is_filter_out = copy.deepcopy(self.is_filter_out, memo) 83 return result 84 85 86def parse_commandline_args(parser): 87 class RegexAction(argparse.Action): 88 """Add a regular expression option value to a list of regular expressions. 89 This compiles the expression, wraps it in a Regex and adds it to the option 90 value list.""" 91 92 def __init__(self, option_strings, dest, nargs=None, **kwargs): 93 if nargs is not None: 94 raise ValueError("nargs not allowed") 95 super(RegexAction, self).__init__(option_strings, dest, **kwargs) 96 97 def do_call(self, namespace, values, flags): 98 value_list = getattr(namespace, self.dest) 99 if value_list is None: 100 value_list = [] 101 102 try: 103 value_list.append(Regex(re.compile(values, flags))) 104 except re.error as error: 105 raise ValueError( 106 "{}: Invalid regular expression '{}' ({})".format( 107 option_string, error.pattern, error.msg 108 ) 109 ) 110 111 setattr(namespace, self.dest, value_list) 112 113 def __call__(self, parser, namespace, values, option_string=None): 114 self.do_call(namespace, values, 0) 115 116 class FilterAction(RegexAction): 117 """Add a filter to a list of filter option values.""" 118 119 def __init__(self, option_strings, dest, nargs=None, **kwargs): 120 super(FilterAction, self).__init__(option_strings, dest, nargs, **kwargs) 121 122 def __call__(self, parser, namespace, values, option_string=None): 123 super(FilterAction, self).__call__(parser, namespace, values, option_string) 124 125 value_list = getattr(namespace, self.dest) 126 127 is_filter_out = option_string == "--filter-out" 128 129 value_list[-1] = Filter(value_list[-1].regex, is_filter_out) 130 131 setattr(namespace, self.dest, value_list) 132 133 filter_group = parser.add_argument_group( 134 "filtering", 135 """Filters are applied to each output line according to the order given. The 136 first matching filter terminates filter processing for that current line.""", 137 ) 138 139 filter_group.add_argument( 140 "--filter", 141 action=FilterAction, 142 dest="filters", 143 metavar="REGEX", 144 help="Only include lines matching REGEX (may be specified multiple times)", 145 ) 146 filter_group.add_argument( 147 "--filter-out", 148 action=FilterAction, 149 dest="filters", 150 metavar="REGEX", 151 help="Exclude lines matching REGEX", 152 ) 153 154 parser.add_argument( 155 "--include-generated-funcs", 156 action="store_true", 157 help="Output checks for functions not in source", 158 ) 159 parser.add_argument( 160 "-v", "--verbose", action="store_true", help="Show verbose output" 161 ) 162 parser.add_argument( 163 "-u", 164 "--update-only", 165 action="store_true", 166 help="Only update test if it was already autogened", 167 ) 168 parser.add_argument( 169 "--force-update", 170 action="store_true", 171 help="Update test even if it was autogened by a different script", 172 ) 173 parser.add_argument( 174 "--enable", 175 action="store_true", 176 dest="enabled", 177 default=True, 178 help="Activate CHECK line generation from this point forward", 179 ) 180 parser.add_argument( 181 "--disable", 182 action="store_false", 183 dest="enabled", 184 help="Deactivate CHECK line generation from this point forward", 185 ) 186 parser.add_argument( 187 "--replace-value-regex", 188 nargs="+", 189 default=[], 190 help="List of regular expressions to replace matching value names", 191 ) 192 parser.add_argument( 193 "--prefix-filecheck-ir-name", 194 default="", 195 help="Add a prefix to FileCheck IR value names to avoid conflicts with scripted names", 196 ) 197 parser.add_argument( 198 "--global-value-regex", 199 nargs="+", 200 default=[], 201 help="List of regular expressions that a global value declaration must match to generate a check (has no effect if checking globals is not enabled)", 202 ) 203 parser.add_argument( 204 "--global-hex-value-regex", 205 nargs="+", 206 default=[], 207 help="List of regular expressions such that, for matching global value declarations, literal integer values should be encoded in hex in the associated FileCheck directives", 208 ) 209 # FIXME: in 3.9, we can use argparse.BooleanOptionalAction. At that point, 210 # we need to rename the flag to just -generate-body-for-unused-prefixes. 211 parser.add_argument( 212 "--no-generate-body-for-unused-prefixes", 213 action="store_false", 214 dest="gen_unused_prefix_body", 215 default=True, 216 help="Generate a function body that always matches for unused prefixes. This is useful when unused prefixes are desired, and it avoids needing to annotate each FileCheck as allowing them.", 217 ) 218 # This is the default when regenerating existing tests. The default when 219 # generating new tests is determined by DEFAULT_VERSION. 220 parser.add_argument( 221 "--version", type=int, default=1, help="The version of output format" 222 ) 223 args = parser.parse_args() 224 # TODO: This should not be handled differently from the other options 225 global _verbose, _global_value_regex, _global_hex_value_regex 226 _verbose = args.verbose 227 _global_value_regex = args.global_value_regex 228 _global_hex_value_regex = args.global_hex_value_regex 229 return args 230 231 232def parse_args(parser, argv): 233 args = parser.parse_args(argv) 234 if args.version >= 2: 235 args.function_signature = True 236 # TODO: This should not be handled differently from the other options 237 global _verbose, _global_value_regex, _global_hex_value_regex 238 _verbose = args.verbose 239 _global_value_regex = args.global_value_regex 240 _global_hex_value_regex = args.global_hex_value_regex 241 if "check_globals" in args and args.check_globals == "default": 242 args.check_globals = "none" if args.version < 4 else "smart" 243 return args 244 245 246class InputLineInfo(object): 247 def __init__(self, line, line_number, args, argv): 248 self.line = line 249 self.line_number = line_number 250 self.args = args 251 self.argv = argv 252 253 254class TestInfo(object): 255 def __init__( 256 self, 257 test, 258 parser, 259 script_name, 260 input_lines, 261 args, 262 argv, 263 comment_prefix, 264 argparse_callback, 265 ): 266 self.parser = parser 267 self.argparse_callback = argparse_callback 268 self.path = test 269 self.args = args 270 if args.prefix_filecheck_ir_name: 271 global _prefix_filecheck_ir_name 272 _prefix_filecheck_ir_name = args.prefix_filecheck_ir_name 273 self.argv = argv 274 self.input_lines = input_lines 275 self.run_lines = find_run_lines(test, self.input_lines) 276 self.comment_prefix = comment_prefix 277 if self.comment_prefix is None: 278 if self.path.endswith(".mir"): 279 self.comment_prefix = "#" 280 else: 281 self.comment_prefix = ";" 282 self.autogenerated_note_prefix = self.comment_prefix + " " + UTC_ADVERT 283 self.test_autogenerated_note = self.autogenerated_note_prefix + script_name 284 self.test_autogenerated_note += get_autogennote_suffix(parser, self.args) 285 self.test_unused_note = ( 286 self.comment_prefix + self.comment_prefix + " " + UNUSED_NOTE 287 ) 288 289 def ro_iterlines(self): 290 for line_num, input_line in enumerate(self.input_lines): 291 args, argv = check_for_command( 292 input_line, self.parser, self.args, self.argv, self.argparse_callback 293 ) 294 yield InputLineInfo(input_line, line_num, args, argv) 295 296 def iterlines(self, output_lines): 297 output_lines.append(self.test_autogenerated_note) 298 for line_info in self.ro_iterlines(): 299 input_line = line_info.line 300 # Discard any previous script advertising. 301 if input_line.startswith(self.autogenerated_note_prefix): 302 continue 303 self.args = line_info.args 304 self.argv = line_info.argv 305 if not self.args.enabled: 306 output_lines.append(input_line) 307 continue 308 yield line_info 309 310 def get_checks_for_unused_prefixes( 311 self, run_list, used_prefixes: List[str] 312 ) -> List[str]: 313 run_list = [element for element in run_list if element[0] is not None] 314 unused_prefixes = set( 315 [prefix for sublist in run_list for prefix in sublist[0]] 316 ).difference(set(used_prefixes)) 317 318 ret = [] 319 if not unused_prefixes: 320 return ret 321 ret.append(self.test_unused_note) 322 for unused in sorted(unused_prefixes): 323 ret.append( 324 "{comment} {prefix}: {match_everything}".format( 325 comment=self.comment_prefix, 326 prefix=unused, 327 match_everything=r"""{{.*}}""", 328 ) 329 ) 330 return ret 331 332 333def itertests( 334 test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None 335): 336 for pattern in test_patterns: 337 # On Windows we must expand the patterns ourselves. 338 tests_list = glob.glob(pattern) 339 if not tests_list: 340 warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,)) 341 continue 342 for test in tests_list: 343 with open(test) as f: 344 input_lines = [l.rstrip() for l in f] 345 first_line = input_lines[0] if input_lines else "" 346 if UTC_AVOID in first_line: 347 warn("Skipping test that must not be autogenerated: " + test) 348 continue 349 is_regenerate = UTC_ADVERT in first_line 350 351 # If we're generating a new test, set the default version to the latest. 352 argv = sys.argv[:] 353 if not is_regenerate: 354 argv.insert(1, "--version=" + str(DEFAULT_VERSION)) 355 356 args = parse_args(parser, argv[1:]) 357 if argparse_callback is not None: 358 argparse_callback(args) 359 if is_regenerate: 360 if script_name not in first_line and not args.force_update: 361 warn( 362 "Skipping test which wasn't autogenerated by " + script_name, 363 test, 364 ) 365 continue 366 args, argv = check_for_command( 367 first_line, parser, args, argv, argparse_callback 368 ) 369 elif args.update_only: 370 assert UTC_ADVERT not in first_line 371 warn("Skipping test which isn't autogenerated: " + test) 372 continue 373 final_input_lines = [] 374 for l in input_lines: 375 if UNUSED_NOTE in l: 376 break 377 final_input_lines.append(l) 378 yield TestInfo( 379 test, 380 parser, 381 script_name, 382 final_input_lines, 383 args, 384 argv, 385 comment_prefix, 386 argparse_callback, 387 ) 388 389 390def should_add_line_to_output( 391 input_line, prefix_set, skip_global_checks=False, comment_marker=";" 392): 393 # Skip any blank comment lines in the IR. 394 if not skip_global_checks and input_line.strip() == comment_marker: 395 return False 396 # Skip a special double comment line we use as a separator. 397 if input_line.strip() == comment_marker + SEPARATOR: 398 return False 399 # Skip any blank lines in the IR. 400 # if input_line.strip() == '': 401 # return False 402 # And skip any CHECK lines. We're building our own. 403 m = CHECK_RE.match(input_line) 404 if m and m.group(1) in prefix_set: 405 if skip_global_checks: 406 global_ir_value_re = re.compile(r"(\[\[|@)", flags=(re.M)) 407 return not global_ir_value_re.search(input_line) 408 return False 409 410 return True 411 412 413# Perform lit-like substitutions 414def getSubstitutions(sourcepath): 415 sourcedir = os.path.dirname(sourcepath) 416 return [ 417 ("%s", sourcepath), 418 ("%S", sourcedir), 419 ("%p", sourcedir), 420 ("%{pathsep}", os.pathsep), 421 ] 422 423 424def applySubstitutions(s, substitutions): 425 for a, b in substitutions: 426 s = s.replace(a, b) 427 return s 428 429 430# Invoke the tool that is being tested. 431def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False): 432 with open(ir) as ir_file: 433 substitutions = getSubstitutions(ir) 434 435 # TODO Remove the str form which is used by update_test_checks.py and 436 # update_llc_test_checks.py 437 # The safer list form is used by update_cc_test_checks.py 438 if preprocess_cmd: 439 # Allow pre-processing the IR file (e.g. using sed): 440 assert isinstance( 441 preprocess_cmd, str 442 ) # TODO: use a list instead of using shell 443 preprocess_cmd = applySubstitutions(preprocess_cmd, substitutions).strip() 444 if verbose: 445 print( 446 "Pre-processing input file: ", 447 ir, 448 " with command '", 449 preprocess_cmd, 450 "'", 451 sep="", 452 file=sys.stderr, 453 ) 454 # Python 2.7 doesn't have subprocess.DEVNULL: 455 with open(os.devnull, "w") as devnull: 456 pp = subprocess.Popen( 457 preprocess_cmd, shell=True, stdin=devnull, stdout=subprocess.PIPE 458 ) 459 ir_file = pp.stdout 460 461 if isinstance(cmd_args, list): 462 args = [applySubstitutions(a, substitutions) for a in cmd_args] 463 stdout = subprocess.check_output([exe] + args, stdin=ir_file) 464 else: 465 stdout = subprocess.check_output( 466 exe + " " + applySubstitutions(cmd_args, substitutions), 467 shell=True, 468 stdin=ir_file, 469 ) 470 if sys.version_info[0] > 2: 471 # FYI, if you crashed here with a decode error, your run line probably 472 # results in bitcode or other binary format being written to the pipe. 473 # For an opt test, you probably want to add -S or -disable-output. 474 stdout = stdout.decode() 475 # Fix line endings to unix CR style. 476 return stdout.replace("\r\n", "\n") 477 478 479##### LLVM IR parser 480RUN_LINE_RE = re.compile(r"^\s*(?://|[;#])\s*RUN:\s*(.*)$") 481CHECK_PREFIX_RE = re.compile(r"--?check-prefix(?:es)?[= ](\S+)") 482PREFIX_RE = re.compile("^[a-zA-Z0-9_-]+$") 483CHECK_RE = re.compile( 484 r"^\s*(?://|[;#])\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME|-EMPTY)?:" 485) 486 487UTC_ARGS_KEY = "UTC_ARGS:" 488UTC_ARGS_CMD = re.compile(r".*" + UTC_ARGS_KEY + "\s*(?P<cmd>.*)\s*$") 489UTC_ADVERT = "NOTE: Assertions have been autogenerated by " 490UTC_AVOID = "NOTE: Do not autogenerate" 491UNUSED_NOTE = "NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:" 492 493OPT_FUNCTION_RE = re.compile( 494 r"^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s():,]+?))?\s*define\s+(?P<funcdef_attrs_and_ret>[^@]*)@(?P<func>[\w.$-]+?)\s*" 495 r"(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P<body>.*?)^\}$", 496 flags=(re.M | re.S), 497) 498 499ANALYZE_FUNCTION_RE = re.compile( 500 r"^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.$-]+?)\':" 501 r"\s*\n(?P<body>.*)$", 502 flags=(re.X | re.S), 503) 504 505LV_DEBUG_RE = re.compile( 506 r"^\s*\'(?P<func>[\w.$-]+?)\'[^\n]*" r"\s*\n(?P<body>.*)$", flags=(re.X | re.S) 507) 508 509IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(') 510TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$') 511TRIPLE_ARG_RE = re.compile(r"-mtriple[= ]([^ ]+)") 512MARCH_ARG_RE = re.compile(r"-march[= ]([^ ]+)") 513DEBUG_ONLY_ARG_RE = re.compile(r"-debug-only[= ]([^ ]+)") 514 515SCRUB_LEADING_WHITESPACE_RE = re.compile(r"^(\s+)") 516SCRUB_WHITESPACE_RE = re.compile(r"(?!^(| \w))[ \t]+", flags=re.M) 517SCRUB_PRESERVE_LEADING_WHITESPACE_RE = re.compile(r"((?!^)[ \t]*(\S))[ \t]+") 518SCRUB_TRAILING_WHITESPACE_RE = re.compile(r"[ \t]+$", flags=re.M) 519SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE 520SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile( 521 r"([ \t]|(#[0-9]+))+$", flags=re.M 522) 523SCRUB_KILL_COMMENT_RE = re.compile(r"^ *#+ +kill:.*\n") 524SCRUB_LOOP_COMMENT_RE = re.compile( 525 r"# =>This Inner Loop Header:.*|# in Loop:.*", flags=re.M 526) 527SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r"(?<=\S)+[ \t]*#$", flags=re.M) 528 529SEPARATOR = "." 530 531 532def error(msg, test_file=None): 533 if test_file: 534 msg = "{}: {}".format(msg, test_file) 535 print("ERROR: {}".format(msg), file=sys.stderr) 536 537 538def warn(msg, test_file=None): 539 if test_file: 540 msg = "{}: {}".format(msg, test_file) 541 print("WARNING: {}".format(msg), file=sys.stderr) 542 543 544def debug(*args, **kwargs): 545 # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs): 546 if "file" not in kwargs: 547 kwargs["file"] = sys.stderr 548 if _verbose: 549 print(*args, **kwargs) 550 551 552def find_run_lines(test, lines): 553 debug("Scanning for RUN lines in test file:", test) 554 raw_lines = [m.group(1) for m in [RUN_LINE_RE.match(l) for l in lines] if m] 555 run_lines = [raw_lines[0]] if len(raw_lines) > 0 else [] 556 for l in raw_lines[1:]: 557 if run_lines[-1].endswith("\\"): 558 run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l 559 else: 560 run_lines.append(l) 561 debug("Found {} RUN lines in {}:".format(len(run_lines), test)) 562 for l in run_lines: 563 debug(" RUN: {}".format(l)) 564 return run_lines 565 566 567def get_triple_from_march(march): 568 triples = { 569 "amdgcn": "amdgcn", 570 "r600": "r600", 571 "mips": "mips", 572 "sparc": "sparc", 573 "hexagon": "hexagon", 574 "ve": "ve", 575 } 576 for prefix, triple in triples.items(): 577 if march.startswith(prefix): 578 return triple 579 print("Cannot find a triple. Assume 'x86'", file=sys.stderr) 580 return "x86" 581 582 583def apply_filters(line, filters): 584 has_filter = False 585 for f in filters: 586 if not f.is_filter_out: 587 has_filter = True 588 if f.search(line): 589 return False if f.is_filter_out else True 590 # If we only used filter-out, keep the line, otherwise discard it since no 591 # filter matched. 592 return False if has_filter else True 593 594 595def do_filter(body, filters): 596 return ( 597 body 598 if not filters 599 else "\n".join( 600 filter(lambda line: apply_filters(line, filters), body.splitlines()) 601 ) 602 ) 603 604 605def scrub_body(body): 606 # Scrub runs of whitespace out of the assembly, but leave the leading 607 # whitespace in place. 608 body = SCRUB_PRESERVE_LEADING_WHITESPACE_RE.sub(lambda m: m.group(2) + " ", body) 609 610 # Expand the tabs used for indentation. 611 body = str.expandtabs(body, 2) 612 # Strip trailing whitespace. 613 body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r"", body) 614 return body 615 616 617def do_scrub(body, scrubber, scrubber_args, extra): 618 if scrubber_args: 619 local_args = copy.deepcopy(scrubber_args) 620 local_args[0].extra_scrub = extra 621 return scrubber(body, *local_args) 622 return scrubber(body, *scrubber_args) 623 624 625# Build up a dictionary of all the function bodies. 626class function_body(object): 627 def __init__( 628 self, 629 string, 630 extra, 631 funcdef_attrs_and_ret, 632 args_and_sig, 633 attrs, 634 func_name_separator, 635 ): 636 self.scrub = string 637 self.extrascrub = extra 638 self.funcdef_attrs_and_ret = funcdef_attrs_and_ret 639 self.args_and_sig = args_and_sig 640 self.attrs = attrs 641 self.func_name_separator = func_name_separator 642 643 def is_same_except_arg_names( 644 self, extrascrub, funcdef_attrs_and_ret, args_and_sig, attrs, is_backend 645 ): 646 arg_names = set() 647 648 def drop_arg_names(match): 649 arg_names.add(match.group(variable_group_in_ir_value_match)) 650 if match.group(attribute_group_in_ir_value_match): 651 attr = match.group(attribute_group_in_ir_value_match) 652 else: 653 attr = "" 654 return match.group(1) + attr + match.group(match.lastindex) 655 656 def repl_arg_names(match): 657 if ( 658 match.group(variable_group_in_ir_value_match) is not None 659 and match.group(variable_group_in_ir_value_match) in arg_names 660 ): 661 return match.group(1) + match.group(match.lastindex) 662 return match.group(1) + match.group(2) + match.group(match.lastindex) 663 664 if self.funcdef_attrs_and_ret != funcdef_attrs_and_ret: 665 return False 666 if self.attrs != attrs: 667 return False 668 ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig) 669 ans1 = IR_VALUE_RE.sub(drop_arg_names, args_and_sig) 670 if ans0 != ans1: 671 return False 672 if is_backend: 673 # Check without replacements, the replacements are not applied to the 674 # body for backend checks. 675 return self.extrascrub == extrascrub 676 677 es0 = IR_VALUE_RE.sub(repl_arg_names, self.extrascrub) 678 es1 = IR_VALUE_RE.sub(repl_arg_names, extrascrub) 679 es0 = SCRUB_IR_COMMENT_RE.sub(r"", es0) 680 es1 = SCRUB_IR_COMMENT_RE.sub(r"", es1) 681 return es0 == es1 682 683 def __str__(self): 684 return self.scrub 685 686 687class FunctionTestBuilder: 688 def __init__(self, run_list, flags, scrubber_args, path): 689 self._verbose = flags.verbose 690 self._record_args = flags.function_signature 691 self._check_attributes = flags.check_attributes 692 # Strip double-quotes if input was read by UTC_ARGS 693 self._filters = ( 694 list( 695 map( 696 lambda f: Filter( 697 re.compile(f.pattern().strip('"'), f.flags()), f.is_filter_out 698 ), 699 flags.filters, 700 ) 701 ) 702 if flags.filters 703 else [] 704 ) 705 self._scrubber_args = scrubber_args 706 self._path = path 707 # Strip double-quotes if input was read by UTC_ARGS 708 self._replace_value_regex = list( 709 map(lambda x: x.strip('"'), flags.replace_value_regex) 710 ) 711 self._func_dict = {} 712 self._func_order = {} 713 self._global_var_dict = {} 714 self._processed_prefixes = set() 715 for tuple in run_list: 716 for prefix in tuple[0]: 717 self._func_dict.update({prefix: dict()}) 718 self._func_order.update({prefix: []}) 719 self._global_var_dict.update({prefix: dict()}) 720 721 def finish_and_get_func_dict(self): 722 for prefix in self.get_failed_prefixes(): 723 warn( 724 "Prefix %s had conflicting output from different RUN lines for all functions in test %s" 725 % ( 726 prefix, 727 self._path, 728 ) 729 ) 730 return self._func_dict 731 732 def func_order(self): 733 return self._func_order 734 735 def global_var_dict(self): 736 return self._global_var_dict 737 738 def is_filtered(self): 739 return bool(self._filters) 740 741 def process_run_line( 742 self, function_re, scrubber, raw_tool_output, prefixes, is_backend 743 ): 744 build_global_values_dictionary(self._global_var_dict, raw_tool_output, prefixes) 745 for m in function_re.finditer(raw_tool_output): 746 if not m: 747 continue 748 func = m.group("func") 749 body = m.group("body") 750 # func_name_separator is the string that is placed right after function name at the 751 # beginning of assembly function definition. In most assemblies, that is just a 752 # colon: `foo:`. But, for example, in nvptx it is a brace: `foo(`. If is_backend is 753 # False, just assume that separator is an empty string. 754 if is_backend: 755 # Use ':' as default separator. 756 func_name_separator = ( 757 m.group("func_name_separator") 758 if "func_name_separator" in m.groupdict() 759 else ":" 760 ) 761 else: 762 func_name_separator = "" 763 attrs = m.group("attrs") if self._check_attributes else "" 764 funcdef_attrs_and_ret = ( 765 m.group("funcdef_attrs_and_ret") if self._record_args else "" 766 ) 767 # Determine if we print arguments, the opening brace, or nothing after the 768 # function name 769 if self._record_args and "args_and_sig" in m.groupdict(): 770 args_and_sig = scrub_body(m.group("args_and_sig").strip()) 771 elif "args_and_sig" in m.groupdict(): 772 args_and_sig = "(" 773 else: 774 args_and_sig = "" 775 filtered_body = do_filter(body, self._filters) 776 scrubbed_body = do_scrub( 777 filtered_body, scrubber, self._scrubber_args, extra=False 778 ) 779 scrubbed_extra = do_scrub( 780 filtered_body, scrubber, self._scrubber_args, extra=True 781 ) 782 if "analysis" in m.groupdict(): 783 analysis = m.group("analysis") 784 if analysis not in SUPPORTED_ANALYSES: 785 warn("Unsupported analysis mode: %r!" % (analysis,)) 786 if func.startswith("stress"): 787 # We only use the last line of the function body for stress tests. 788 scrubbed_body = "\n".join(scrubbed_body.splitlines()[-1:]) 789 if self._verbose: 790 print("Processing function: " + func, file=sys.stderr) 791 for l in scrubbed_body.splitlines(): 792 print(" " + l, file=sys.stderr) 793 for prefix in prefixes: 794 # Replace function names matching the regex. 795 for regex in self._replace_value_regex: 796 # Pattern that matches capture groups in the regex in leftmost order. 797 group_regex = re.compile(r"\(.*?\)") 798 # Replace function name with regex. 799 match = re.match(regex, func) 800 if match: 801 func_repl = regex 802 # Replace any capture groups with their matched strings. 803 for g in match.groups(): 804 func_repl = group_regex.sub( 805 re.escape(g), func_repl, count=1 806 ) 807 func = re.sub(func_repl, "{{" + func_repl + "}}", func) 808 809 # Replace all calls to regex matching functions. 810 matches = re.finditer(regex, scrubbed_body) 811 for match in matches: 812 func_repl = regex 813 # Replace any capture groups with their matched strings. 814 for g in match.groups(): 815 func_repl = group_regex.sub( 816 re.escape(g), func_repl, count=1 817 ) 818 # Substitute function call names that match the regex with the same 819 # capture groups set. 820 scrubbed_body = re.sub( 821 func_repl, "{{" + func_repl + "}}", scrubbed_body 822 ) 823 824 if func in self._func_dict[prefix]: 825 if self._func_dict[prefix][func] is not None and ( 826 str(self._func_dict[prefix][func]) != scrubbed_body 827 or self._func_dict[prefix][func].args_and_sig != args_and_sig 828 or self._func_dict[prefix][func].attrs != attrs 829 or self._func_dict[prefix][func].funcdef_attrs_and_ret 830 != funcdef_attrs_and_ret 831 ): 832 if self._func_dict[prefix][func].is_same_except_arg_names( 833 scrubbed_extra, 834 funcdef_attrs_and_ret, 835 args_and_sig, 836 attrs, 837 is_backend, 838 ): 839 self._func_dict[prefix][func].scrub = scrubbed_extra 840 self._func_dict[prefix][func].args_and_sig = args_and_sig 841 else: 842 # This means a previous RUN line produced a body for this function 843 # that is different from the one produced by this current RUN line, 844 # so the body can't be common across RUN lines. We use None to 845 # indicate that. 846 self._func_dict[prefix][func] = None 847 else: 848 if prefix not in self._processed_prefixes: 849 self._func_dict[prefix][func] = function_body( 850 scrubbed_body, 851 scrubbed_extra, 852 funcdef_attrs_and_ret, 853 args_and_sig, 854 attrs, 855 func_name_separator, 856 ) 857 self._func_order[prefix].append(func) 858 else: 859 # An earlier RUN line used this check prefixes but didn't produce 860 # a body for this function. This happens in Clang tests that use 861 # preprocesser directives to exclude individual functions from some 862 # RUN lines. 863 self._func_dict[prefix][func] = None 864 865 def processed_prefixes(self, prefixes): 866 """ 867 Mark a set of prefixes as having had at least one applicable RUN line fully 868 processed. This is used to filter out function bodies that don't have 869 outputs for all RUN lines. 870 """ 871 self._processed_prefixes.update(prefixes) 872 873 def get_failed_prefixes(self): 874 # This returns the list of those prefixes that failed to match any function, 875 # because there were conflicting bodies produced by different RUN lines, in 876 # all instances of the prefix. 877 for prefix in self._func_dict: 878 if self._func_dict[prefix] and ( 879 not [ 880 fct 881 for fct in self._func_dict[prefix] 882 if self._func_dict[prefix][fct] is not None 883 ] 884 ): 885 yield prefix 886 887 888##### Generator of LLVM IR CHECK lines 889 890SCRUB_IR_COMMENT_RE = re.compile(r"\s*;.*") 891 892# TODO: We should also derive check lines for global, debug, loop declarations, etc.. 893 894 895class NamelessValue: 896 def __init__( 897 self, 898 check_prefix, 899 check_key, 900 ir_prefix, 901 ir_regexp, 902 global_ir_rhs_regexp, 903 *, 904 is_before_functions=False, 905 is_number=False, 906 replace_number_with_counter=False, 907 match_literally=False, 908 interlaced_with_previous=False 909 ): 910 self.check_prefix = check_prefix 911 self.check_key = check_key 912 self.ir_prefix = ir_prefix 913 self.ir_regexp = ir_regexp 914 self.global_ir_rhs_regexp = global_ir_rhs_regexp 915 self.is_before_functions = is_before_functions 916 self.is_number = is_number 917 # Some variable numbers (e.g. MCINST1234) will change based on unrelated 918 # modifications to LLVM, replace those with an incrementing counter. 919 self.replace_number_with_counter = replace_number_with_counter 920 self.match_literally = match_literally 921 self.interlaced_with_previous = interlaced_with_previous 922 self.variable_mapping = {} 923 924 # Return true if this kind of IR value is "local", basically if it matches '%{{.*}}'. 925 def is_local_def_ir_value_match(self, match): 926 return self.ir_prefix == "%" 927 928 # Return true if this kind of IR value is "global", basically if it matches '#{{.*}}'. 929 def is_global_scope_ir_value_match(self, match): 930 return self.global_ir_rhs_regexp is not None 931 932 # Return the IR prefix and check prefix we use for this kind or IR value, 933 # e.g., (%, TMP) for locals. If the IR prefix is a regex, return the prefix 934 # used in the IR output 935 def get_ir_prefix_from_ir_value_match(self, match): 936 return re.search(self.ir_prefix, match[0])[0], self.check_prefix 937 938 # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals 939 def get_ir_regex_from_ir_value_re_match(self, match): 940 # for backwards compatibility we check locals with '.*' 941 if self.is_local_def_ir_value_match(match): 942 return ".*" 943 return self.ir_regexp 944 945 # Create a FileCheck variable name based on an IR name. 946 def get_value_name(self, var: str, check_prefix: str): 947 var = var.replace("!", "") 948 if self.replace_number_with_counter: 949 assert var 950 replacement = self.variable_mapping.get(var, None) 951 if replacement is None: 952 # Replace variable with an incrementing counter 953 replacement = str(len(self.variable_mapping) + 1) 954 self.variable_mapping[var] = replacement 955 var = replacement 956 # This is a nameless value, prepend check_prefix. 957 if var.isdigit(): 958 var = check_prefix + var 959 else: 960 # This is a named value that clashes with the check_prefix, prepend with 961 # _prefix_filecheck_ir_name, if it has been defined. 962 if ( 963 may_clash_with_default_check_prefix_name(check_prefix, var) 964 and _prefix_filecheck_ir_name 965 ): 966 var = _prefix_filecheck_ir_name + var 967 var = var.replace(".", "_") 968 var = var.replace("-", "_") 969 return var.upper() 970 971 # Create a FileCheck variable from regex. 972 def get_value_definition(self, var, match): 973 # for backwards compatibility we check locals with '.*' 974 varname = self.get_value_name(var, self.check_prefix) 975 prefix = self.get_ir_prefix_from_ir_value_match(match)[0] 976 if self.is_number: 977 regex = "" # always capture a number in the default format 978 capture_start = "[[#" 979 else: 980 regex = self.get_ir_regex_from_ir_value_re_match(match) 981 capture_start = "[[" 982 if self.is_local_def_ir_value_match(match): 983 return capture_start + varname + ":" + prefix + regex + "]]" 984 return prefix + capture_start + varname + ":" + regex + "]]" 985 986 # Use a FileCheck variable. 987 def get_value_use(self, var, match, var_prefix=None): 988 if var_prefix is None: 989 var_prefix = self.check_prefix 990 capture_start = "[[#" if self.is_number else "[[" 991 if self.is_local_def_ir_value_match(match): 992 return capture_start + self.get_value_name(var, var_prefix) + "]]" 993 prefix = self.get_ir_prefix_from_ir_value_match(match)[0] 994 return prefix + capture_start + self.get_value_name(var, var_prefix) + "]]" 995 996 997# Description of the different "unnamed" values we match in the IR, e.g., 998# (local) ssa values, (debug) metadata, etc. 999ir_nameless_values = [ 1000 # check_prefix check_key ir_prefix ir_regexp global_ir_rhs_regexp 1001 NamelessValue(r"TMP", "%", r"%", r"[\w$.-]+?", None), 1002 NamelessValue(r"ATTR", "#", r"#", r"[0-9]+", None), 1003 NamelessValue(r"ATTR", "#", r"attributes #", r"[0-9]+", r"{[^}]*}"), 1004 NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", None), 1005 NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", r".+", is_before_functions=True), 1006 NamelessValue( 1007 r"GLOBNAMED", 1008 "@", 1009 r"@", 1010 r"[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*", 1011 r".+", 1012 is_before_functions=True, 1013 match_literally=True, 1014 interlaced_with_previous=True, 1015 ), 1016 NamelessValue(r"DBG", "!", r"!dbg ", r"![0-9]+", None), 1017 NamelessValue(r"DIASSIGNID", "!", r"!DIAssignID ", r"![0-9]+", None), 1018 NamelessValue(r"PROF", "!", r"!prof ", r"![0-9]+", None), 1019 NamelessValue(r"TBAA", "!", r"!tbaa ", r"![0-9]+", None), 1020 NamelessValue(r"TBAA_STRUCT", "!", r"!tbaa.struct ", r"![0-9]+", None), 1021 NamelessValue(r"RNG", "!", r"!range ", r"![0-9]+", None), 1022 NamelessValue(r"LOOP", "!", r"!llvm.loop ", r"![0-9]+", None), 1023 NamelessValue(r"META", "!", r"metadata ", r"![0-9]+", None), 1024 NamelessValue(r"META", "!", r"", r"![0-9]+", r"(?:distinct |)!.*"), 1025 NamelessValue(r"ACC_GRP", "!", r"!llvm.access.group ", r"![0-9]+", None), 1026 NamelessValue(r"META", "!", r"![a-z.]+ ", r"![0-9]+", None), 1027] 1028 1029global_nameless_values = [ 1030 nameless_value 1031 for nameless_value in ir_nameless_values 1032 if nameless_value.global_ir_rhs_regexp is not None 1033] 1034# global variable names should be matched literally 1035global_nameless_values_w_unstable_ids = [ 1036 nameless_value 1037 for nameless_value in global_nameless_values 1038 if not nameless_value.match_literally 1039] 1040 1041asm_nameless_values = [ 1042 NamelessValue( 1043 r"MCINST", 1044 "Inst#", 1045 "<MCInst #", 1046 r"\d+", 1047 r".+", 1048 is_number=True, 1049 replace_number_with_counter=True, 1050 ), 1051 NamelessValue( 1052 r"MCREG", 1053 "Reg:", 1054 "<MCOperand Reg:", 1055 r"\d+", 1056 r".+", 1057 is_number=True, 1058 replace_number_with_counter=True, 1059 ), 1060] 1061 1062analyze_nameless_values = [ 1063 NamelessValue( 1064 r"GRP", 1065 "#", 1066 r"", 1067 r"0x[0-9a-f]+", 1068 None, 1069 replace_number_with_counter=True, 1070 ), 1071] 1072 1073 1074def createOrRegexp(old, new): 1075 if not old: 1076 return new 1077 if not new: 1078 return old 1079 return old + "|" + new 1080 1081 1082def createPrefixMatch(prefix_str, prefix_re): 1083 return "(?:" + prefix_str + "(" + prefix_re + "))" 1084 1085 1086# Build the regexp that matches an "IR value". This can be a local variable, 1087# argument, global, or metadata, anything that is "named". It is important that 1088# the PREFIX and SUFFIX below only contain a single group, if that changes 1089# other locations will need adjustment as well. 1090IR_VALUE_REGEXP_PREFIX = r"(\s*)" 1091IR_VALUE_REGEXP_STRING = r"" 1092for nameless_value in ir_nameless_values: 1093 match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp) 1094 if nameless_value.global_ir_rhs_regexp is not None: 1095 match = "^" + match 1096 IR_VALUE_REGEXP_STRING = createOrRegexp(IR_VALUE_REGEXP_STRING, match) 1097IR_VALUE_REGEXP_SUFFIX = r"([,\s\(\)\}]|\Z)" 1098IR_VALUE_RE = re.compile( 1099 IR_VALUE_REGEXP_PREFIX 1100 + r"(" 1101 + IR_VALUE_REGEXP_STRING 1102 + r")" 1103 + IR_VALUE_REGEXP_SUFFIX 1104) 1105 1106GLOBAL_VALUE_REGEXP_STRING = r"" 1107for nameless_value in global_nameless_values_w_unstable_ids: 1108 match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp) 1109 GLOBAL_VALUE_REGEXP_STRING = createOrRegexp(GLOBAL_VALUE_REGEXP_STRING, match) 1110GLOBAL_VALUE_RE = re.compile( 1111 IR_VALUE_REGEXP_PREFIX 1112 + r"(" 1113 + GLOBAL_VALUE_REGEXP_STRING 1114 + r")" 1115 + IR_VALUE_REGEXP_SUFFIX 1116) 1117 1118# Build the regexp that matches an "ASM value" (currently only for --asm-show-inst comments). 1119ASM_VALUE_REGEXP_STRING = "" 1120for nameless_value in asm_nameless_values: 1121 match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp) 1122 ASM_VALUE_REGEXP_STRING = createOrRegexp(ASM_VALUE_REGEXP_STRING, match) 1123ASM_VALUE_REGEXP_SUFFIX = r"([>\s]|\Z)" 1124ASM_VALUE_RE = re.compile( 1125 r"((?:#|//)\s*)" + "(" + ASM_VALUE_REGEXP_STRING + ")" + ASM_VALUE_REGEXP_SUFFIX 1126) 1127 1128ANALYZE_VALUE_REGEXP_PREFIX = r"(\s*)" 1129ANALYZE_VALUE_REGEXP_STRING = r"" 1130for nameless_value in analyze_nameless_values: 1131 match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp) 1132 ANALYZE_VALUE_REGEXP_STRING = createOrRegexp(ANALYZE_VALUE_REGEXP_STRING, match) 1133ANALYZE_VALUE_REGEXP_SUFFIX = r"(\)?:)" 1134ANALYZE_VALUE_RE = re.compile( 1135 ANALYZE_VALUE_REGEXP_PREFIX 1136 + r"(" 1137 + ANALYZE_VALUE_REGEXP_STRING 1138 + r")" 1139 + ANALYZE_VALUE_REGEXP_SUFFIX 1140) 1141 1142# The entire match is group 0, the prefix has one group (=1), the entire 1143# IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start. 1144first_nameless_group_in_ir_value_match = 3 1145 1146# constants for the group id of special matches 1147variable_group_in_ir_value_match = 3 1148attribute_group_in_ir_value_match = 4 1149 1150 1151# Check a match for IR_VALUE_RE and inspect it to determine if it was a local 1152# value, %..., global @..., debug number !dbg !..., etc. See the PREFIXES above. 1153def get_idx_from_ir_value_match(match): 1154 for i in range(first_nameless_group_in_ir_value_match, match.lastindex): 1155 if match.group(i) is not None: 1156 return i - first_nameless_group_in_ir_value_match 1157 error("Unable to identify the kind of IR value from the match!") 1158 return 0 1159 1160 1161# See get_idx_from_ir_value_match 1162def get_name_from_ir_value_match(match): 1163 return match.group( 1164 get_idx_from_ir_value_match(match) + first_nameless_group_in_ir_value_match 1165 ) 1166 1167 1168def get_nameless_value_from_match(match, nameless_values) -> NamelessValue: 1169 return nameless_values[get_idx_from_ir_value_match(match)] 1170 1171 1172# Return true if var clashes with the scripted FileCheck check_prefix. 1173def may_clash_with_default_check_prefix_name(check_prefix, var): 1174 return check_prefix and re.match( 1175 r"^" + check_prefix + r"[0-9]+?$", var, re.IGNORECASE 1176 ) 1177 1178 1179def generalize_check_lines_common( 1180 lines, 1181 is_analyze, 1182 vars_seen, 1183 global_vars_seen, 1184 nameless_values, 1185 nameless_value_regex, 1186 is_asm, 1187 preserve_names, 1188): 1189 # This gets called for each match that occurs in 1190 # a line. We transform variables we haven't seen 1191 # into defs, and variables we have seen into uses. 1192 def transform_line_vars(match): 1193 var = get_name_from_ir_value_match(match) 1194 nameless_value = get_nameless_value_from_match(match, nameless_values) 1195 if may_clash_with_default_check_prefix_name(nameless_value.check_prefix, var): 1196 warn( 1197 "Change IR value name '%s' or use --prefix-filecheck-ir-name to prevent possible conflict" 1198 " with scripted FileCheck name." % (var,) 1199 ) 1200 key = (var, nameless_value.check_key) 1201 is_local_def = nameless_value.is_local_def_ir_value_match(match) 1202 if is_local_def and key in vars_seen: 1203 rv = nameless_value.get_value_use(var, match) 1204 elif not is_local_def and key in global_vars_seen: 1205 # We could have seen a different prefix for the global variables first, 1206 # ensure we use that one instead of the prefix for the current match. 1207 rv = nameless_value.get_value_use(var, match, global_vars_seen[key]) 1208 else: 1209 if is_local_def: 1210 vars_seen.add(key) 1211 else: 1212 global_vars_seen[key] = nameless_value.check_prefix 1213 rv = nameless_value.get_value_definition(var, match) 1214 # re.sub replaces the entire regex match 1215 # with whatever you return, so we have 1216 # to make sure to hand it back everything 1217 # including the commas and spaces. 1218 return match.group(1) + rv + match.group(match.lastindex) 1219 1220 lines_with_def = [] 1221 multiple_braces_re = re.compile(r"({{+)|(}}+)") 1222 def escape_braces(match_obj): 1223 return '{{' + re.escape(match_obj.group(0)) + '}}' 1224 1225 for i, line in enumerate(lines): 1226 if not is_asm and not is_analyze: 1227 # An IR variable named '%.' matches the FileCheck regex string. 1228 line = line.replace("%.", "%dot") 1229 for regex in _global_hex_value_regex: 1230 if re.match("^@" + regex + " = ", line): 1231 line = re.sub( 1232 r"\bi([0-9]+) ([0-9]+)", 1233 lambda m: "i" 1234 + m.group(1) 1235 + " [[#" 1236 + hex(int(m.group(2))) 1237 + "]]", 1238 line, 1239 ) 1240 break 1241 # Ignore any comments, since the check lines will too. 1242 scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line) 1243 lines[i] = scrubbed_line 1244 if not preserve_names: 1245 # It can happen that two matches are back-to-back and for some reason sub 1246 # will not replace both of them. For now we work around this by 1247 # substituting until there is no more match. 1248 changed = True 1249 while changed: 1250 (lines[i], changed) = nameless_value_regex.subn( 1251 transform_line_vars, lines[i], count=1 1252 ) 1253 if is_analyze: 1254 # Escape multiple {{ or }} as {{}} denotes a FileCheck regex. 1255 scrubbed_line = multiple_braces_re.sub(escape_braces, lines[i]) 1256 lines[i] = scrubbed_line 1257 return lines 1258 1259 1260# Replace IR value defs and uses with FileCheck variables. 1261def generalize_check_lines( 1262 lines, is_analyze, vars_seen, global_vars_seen, preserve_names 1263): 1264 return generalize_check_lines_common( 1265 lines, 1266 is_analyze, 1267 vars_seen, 1268 global_vars_seen, 1269 ir_nameless_values, 1270 IR_VALUE_RE, 1271 False, 1272 preserve_names, 1273 ) 1274 1275 1276def generalize_global_check_line(line, preserve_names, global_vars_seen): 1277 [new_line] = generalize_check_lines_common( 1278 [line], 1279 False, 1280 set(), 1281 global_vars_seen, 1282 global_nameless_values_w_unstable_ids, 1283 GLOBAL_VALUE_RE, 1284 False, 1285 preserve_names, 1286 ) 1287 return new_line 1288 1289 1290def generalize_asm_check_lines(lines, vars_seen, global_vars_seen): 1291 return generalize_check_lines_common( 1292 lines, 1293 False, 1294 vars_seen, 1295 global_vars_seen, 1296 asm_nameless_values, 1297 ASM_VALUE_RE, 1298 True, 1299 False, 1300 ) 1301 1302 1303def generalize_analyze_check_lines(lines, vars_seen, global_vars_seen): 1304 return generalize_check_lines_common( 1305 lines, 1306 True, 1307 vars_seen, 1308 global_vars_seen, 1309 analyze_nameless_values, 1310 ANALYZE_VALUE_RE, 1311 False, 1312 False, 1313 ) 1314 1315 1316def add_checks( 1317 output_lines, 1318 comment_marker, 1319 prefix_list, 1320 func_dict, 1321 func_name, 1322 check_label_format, 1323 is_backend, 1324 is_analyze, 1325 version, 1326 global_vars_seen_dict, 1327 is_filtered, 1328 preserve_names=False, 1329): 1330 # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well. 1331 prefix_exclusions = set() 1332 printed_prefixes = [] 1333 for p in prefix_list: 1334 checkprefixes = p[0] 1335 # If not all checkprefixes of this run line produced the function we cannot check for it as it does not 1336 # exist for this run line. A subset of the check prefixes might know about the function but only because 1337 # other run lines created it. 1338 if any( 1339 map( 1340 lambda checkprefix: func_name not in func_dict[checkprefix], 1341 checkprefixes, 1342 ) 1343 ): 1344 prefix_exclusions |= set(checkprefixes) 1345 continue 1346 1347 # prefix_exclusions is constructed, we can now emit the output 1348 for p in prefix_list: 1349 global_vars_seen = {} 1350 checkprefixes = p[0] 1351 for checkprefix in checkprefixes: 1352 if checkprefix in global_vars_seen_dict: 1353 global_vars_seen.update(global_vars_seen_dict[checkprefix]) 1354 else: 1355 global_vars_seen_dict[checkprefix] = {} 1356 if checkprefix in printed_prefixes: 1357 break 1358 1359 # Check if the prefix is excluded. 1360 if checkprefix in prefix_exclusions: 1361 continue 1362 1363 # If we do not have output for this prefix we skip it. 1364 if not func_dict[checkprefix][func_name]: 1365 continue 1366 1367 # Add some space between different check prefixes, but not after the last 1368 # check line (before the test code). 1369 if is_backend: 1370 if len(printed_prefixes) != 0: 1371 output_lines.append(comment_marker) 1372 1373 if checkprefix not in global_vars_seen_dict: 1374 global_vars_seen_dict[checkprefix] = {} 1375 1376 global_vars_seen_before = [key for key in global_vars_seen.keys()] 1377 1378 vars_seen = set() 1379 printed_prefixes.append(checkprefix) 1380 attrs = str(func_dict[checkprefix][func_name].attrs) 1381 attrs = "" if attrs == "None" else attrs 1382 if version > 1: 1383 funcdef_attrs_and_ret = func_dict[checkprefix][ 1384 func_name 1385 ].funcdef_attrs_and_ret 1386 else: 1387 funcdef_attrs_and_ret = "" 1388 1389 if attrs: 1390 output_lines.append( 1391 "%s %s: Function Attrs: %s" % (comment_marker, checkprefix, attrs) 1392 ) 1393 args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig) 1394 if args_and_sig: 1395 args_and_sig = generalize_check_lines( 1396 [args_and_sig], 1397 is_analyze, 1398 vars_seen, 1399 global_vars_seen, 1400 preserve_names, 1401 )[0] 1402 func_name_separator = func_dict[checkprefix][func_name].func_name_separator 1403 if "[[" in args_and_sig: 1404 # Captures in label lines are not supported, thus split into a -LABEL 1405 # and a separate -SAME line that contains the arguments with captures. 1406 args_and_sig_prefix = "" 1407 if version >= 3 and args_and_sig.startswith("("): 1408 # Ensure the "(" separating function name and arguments is in the 1409 # label line. This is required in case of function names that are 1410 # prefixes of each other. Otherwise, the label line for "foo" might 1411 # incorrectly match on "foo.specialized". 1412 args_and_sig_prefix = args_and_sig[0] 1413 args_and_sig = args_and_sig[1:] 1414 1415 # Removing args_and_sig from the label match line requires 1416 # func_name_separator to be empty. Otherwise, the match will not work. 1417 assert func_name_separator == "" 1418 output_lines.append( 1419 check_label_format 1420 % ( 1421 checkprefix, 1422 funcdef_attrs_and_ret, 1423 func_name, 1424 args_and_sig_prefix, 1425 func_name_separator, 1426 ) 1427 ) 1428 output_lines.append( 1429 "%s %s-SAME: %s" % (comment_marker, checkprefix, args_and_sig) 1430 ) 1431 else: 1432 output_lines.append( 1433 check_label_format 1434 % ( 1435 checkprefix, 1436 funcdef_attrs_and_ret, 1437 func_name, 1438 args_and_sig, 1439 func_name_separator, 1440 ) 1441 ) 1442 func_body = str(func_dict[checkprefix][func_name]).splitlines() 1443 if not func_body: 1444 # We have filtered everything. 1445 continue 1446 1447 # For ASM output, just emit the check lines. 1448 if is_backend: 1449 body_start = 1 1450 if is_filtered: 1451 # For filtered output we don't add "-NEXT" so don't add extra spaces 1452 # before the first line. 1453 body_start = 0 1454 else: 1455 output_lines.append( 1456 "%s %s: %s" % (comment_marker, checkprefix, func_body[0]) 1457 ) 1458 func_lines = generalize_asm_check_lines( 1459 func_body[body_start:], vars_seen, global_vars_seen 1460 ) 1461 for func_line in func_lines: 1462 if func_line.strip() == "": 1463 output_lines.append( 1464 "%s %s-EMPTY:" % (comment_marker, checkprefix) 1465 ) 1466 else: 1467 check_suffix = "-NEXT" if not is_filtered else "" 1468 output_lines.append( 1469 "%s %s%s: %s" 1470 % (comment_marker, checkprefix, check_suffix, func_line) 1471 ) 1472 # Remember new global variables we have not seen before 1473 for key in global_vars_seen: 1474 if key not in global_vars_seen_before: 1475 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 1476 break 1477 # For analyze output, generalize the output, and emit CHECK-EMPTY lines as well. 1478 elif is_analyze: 1479 func_body = generalize_analyze_check_lines( 1480 func_body, vars_seen, global_vars_seen 1481 ) 1482 for func_line in func_body: 1483 if func_line.strip() == "": 1484 output_lines.append( 1485 "{} {}-EMPTY:".format(comment_marker, checkprefix) 1486 ) 1487 else: 1488 check_suffix = "-NEXT" if not is_filtered else "" 1489 output_lines.append( 1490 "{} {}{}: {}".format( 1491 comment_marker, checkprefix, check_suffix, func_line 1492 ) 1493 ) 1494 1495 # Add space between different check prefixes and also before the first 1496 # line of code in the test function. 1497 output_lines.append(comment_marker) 1498 1499 # Remember new global variables we have not seen before 1500 for key in global_vars_seen: 1501 if key not in global_vars_seen_before: 1502 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 1503 break 1504 # For IR output, change all defs to FileCheck variables, so we're immune 1505 # to variable naming fashions. 1506 else: 1507 func_body = generalize_check_lines( 1508 func_body, False, vars_seen, global_vars_seen, preserve_names 1509 ) 1510 1511 # This could be selectively enabled with an optional invocation argument. 1512 # Disabled for now: better to check everything. Be safe rather than sorry. 1513 1514 # Handle the first line of the function body as a special case because 1515 # it's often just noise (a useless asm comment or entry label). 1516 # if func_body[0].startswith("#") or func_body[0].startswith("entry:"): 1517 # is_blank_line = True 1518 # else: 1519 # output_lines.append('%s %s: %s' % (comment_marker, checkprefix, func_body[0])) 1520 # is_blank_line = False 1521 1522 is_blank_line = False 1523 1524 for func_line in func_body: 1525 if func_line.strip() == "": 1526 is_blank_line = True 1527 continue 1528 # Do not waste time checking IR comments. 1529 func_line = SCRUB_IR_COMMENT_RE.sub(r"", func_line) 1530 1531 # Skip blank lines instead of checking them. 1532 if is_blank_line: 1533 output_lines.append( 1534 "{} {}: {}".format( 1535 comment_marker, checkprefix, func_line 1536 ) 1537 ) 1538 else: 1539 check_suffix = "-NEXT" if not is_filtered else "" 1540 output_lines.append( 1541 "{} {}{}: {}".format( 1542 comment_marker, checkprefix, check_suffix, func_line 1543 ) 1544 ) 1545 is_blank_line = False 1546 1547 # Add space between different check prefixes and also before the first 1548 # line of code in the test function. 1549 output_lines.append(comment_marker) 1550 1551 # Remember new global variables we have not seen before 1552 for key in global_vars_seen: 1553 if key not in global_vars_seen_before: 1554 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 1555 break 1556 return printed_prefixes 1557 1558 1559def add_ir_checks( 1560 output_lines, 1561 comment_marker, 1562 prefix_list, 1563 func_dict, 1564 func_name, 1565 preserve_names, 1566 function_sig, 1567 version, 1568 global_vars_seen_dict, 1569 is_filtered, 1570): 1571 # Label format is based on IR string. 1572 if function_sig and version > 1: 1573 function_def_regex = "define %s" 1574 elif function_sig: 1575 function_def_regex = "define {{[^@]+}}%s" 1576 else: 1577 function_def_regex = "%s" 1578 check_label_format = "{} %s-LABEL: {}@%s%s%s".format( 1579 comment_marker, function_def_regex 1580 ) 1581 return add_checks( 1582 output_lines, 1583 comment_marker, 1584 prefix_list, 1585 func_dict, 1586 func_name, 1587 check_label_format, 1588 False, 1589 False, 1590 version, 1591 global_vars_seen_dict, 1592 is_filtered, 1593 preserve_names, 1594 ) 1595 1596 1597def add_analyze_checks( 1598 output_lines, comment_marker, prefix_list, func_dict, func_name, is_filtered 1599): 1600 check_label_format = "{} %s-LABEL: '%s%s%s%s'".format(comment_marker) 1601 global_vars_seen_dict = {} 1602 return add_checks( 1603 output_lines, 1604 comment_marker, 1605 prefix_list, 1606 func_dict, 1607 func_name, 1608 check_label_format, 1609 False, 1610 True, 1611 1, 1612 global_vars_seen_dict, 1613 is_filtered, 1614 ) 1615 1616 1617def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes): 1618 for nameless_value in itertools.chain(global_nameless_values, asm_nameless_values): 1619 if nameless_value.global_ir_rhs_regexp is None: 1620 continue 1621 1622 lhs_re_str = nameless_value.ir_prefix + nameless_value.ir_regexp 1623 rhs_re_str = nameless_value.global_ir_rhs_regexp 1624 1625 global_ir_value_re_str = r"^" + lhs_re_str + r"\s=\s" + rhs_re_str + r"$" 1626 global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M)) 1627 lines = [] 1628 for m in global_ir_value_re.finditer(raw_tool_output): 1629 # Attach the substring's start index so that CHECK lines 1630 # can be sorted properly even if they are matched by different nameless values. 1631 # This is relevant for GLOB and GLOBNAMED since they may appear interlaced. 1632 lines.append((m.start(), m.group(0))) 1633 1634 for prefix in prefixes: 1635 if glob_val_dict[prefix] is None: 1636 continue 1637 if nameless_value.check_prefix in glob_val_dict[prefix]: 1638 if lines == glob_val_dict[prefix][nameless_value.check_prefix]: 1639 continue 1640 if prefix == prefixes[-1]: 1641 warn("Found conflicting asm under the same prefix: %r!" % (prefix,)) 1642 else: 1643 glob_val_dict[prefix][nameless_value.check_prefix] = None 1644 continue 1645 glob_val_dict[prefix][nameless_value.check_prefix] = lines 1646 1647 1648def filter_globals_according_to_preference( 1649 global_val_lines_w_index, global_vars_seen, nameless_value, global_check_setting 1650): 1651 if global_check_setting == "none": 1652 return [] 1653 if global_check_setting == "all": 1654 return global_val_lines_w_index 1655 assert global_check_setting == "smart" 1656 1657 if nameless_value.check_key == "#": 1658 # attribute sets are usually better checked by --check-attributes 1659 return [] 1660 1661 def extract(line, nv): 1662 p = ( 1663 "^" 1664 + nv.ir_prefix 1665 + "(" 1666 + nv.ir_regexp 1667 + ") = (" 1668 + nv.global_ir_rhs_regexp 1669 + ")" 1670 ) 1671 match = re.match(p, line) 1672 return (match.group(1), re.findall(nv.ir_regexp, match.group(2))) 1673 1674 transitively_visible = set() 1675 contains_refs_to = {} 1676 1677 def add(var): 1678 nonlocal transitively_visible 1679 nonlocal contains_refs_to 1680 if var in transitively_visible: 1681 return 1682 transitively_visible.add(var) 1683 if not var in contains_refs_to: 1684 return 1685 for x in contains_refs_to[var]: 1686 add(x) 1687 1688 for i, line in global_val_lines_w_index: 1689 (var, refs) = extract(line, nameless_value) 1690 contains_refs_to[var] = refs 1691 for var, check_key in global_vars_seen: 1692 if check_key != nameless_value.check_key: 1693 continue 1694 add(var) 1695 return [ 1696 (i, line) 1697 for i, line in global_val_lines_w_index 1698 if extract(line, nameless_value)[0] in transitively_visible 1699 ] 1700 1701 1702METADATA_FILTERS = [ 1703 ( 1704 r"(?<=\")(.+ )?(\w+ version )[\d.]+(?:[^\" ]*)(?: \([^)]+\))?", 1705 r"{{.*}}\2{{.*}}", 1706 ), # preface with glob also, to capture optional CLANG_VENDOR 1707 (r'(!DIFile\(filename: ".+", directory: )".+"', r"\1{{.*}}"), 1708] 1709METADATA_FILTERS_RE = [(re.compile(f), r) for (f, r) in METADATA_FILTERS] 1710 1711 1712def filter_unstable_metadata(line): 1713 for f, replacement in METADATA_FILTERS_RE: 1714 line = f.sub(replacement, line) 1715 return line 1716 1717 1718def flush_current_checks(output_lines, new_lines_w_index, comment_marker): 1719 if not new_lines_w_index: 1720 return 1721 output_lines.append(comment_marker + SEPARATOR) 1722 new_lines_w_index.sort() 1723 for _, line in new_lines_w_index: 1724 output_lines.append(line) 1725 new_lines_w_index.clear() 1726 1727 1728def add_global_checks( 1729 glob_val_dict, 1730 comment_marker, 1731 prefix_list, 1732 output_lines, 1733 global_vars_seen_dict, 1734 preserve_names, 1735 is_before_functions, 1736 global_check_setting, 1737): 1738 printed_prefixes = set() 1739 output_lines_loc = {} # Allows GLOB and GLOBNAMED to be sorted correctly 1740 for nameless_value in global_nameless_values: 1741 if nameless_value.is_before_functions != is_before_functions: 1742 continue 1743 for p in prefix_list: 1744 global_vars_seen = {} 1745 checkprefixes = p[0] 1746 if checkprefixes is None: 1747 continue 1748 for checkprefix in checkprefixes: 1749 if checkprefix in global_vars_seen_dict: 1750 global_vars_seen.update(global_vars_seen_dict[checkprefix]) 1751 else: 1752 global_vars_seen_dict[checkprefix] = {} 1753 if (checkprefix, nameless_value.check_prefix) in printed_prefixes: 1754 break 1755 if not glob_val_dict[checkprefix]: 1756 continue 1757 if nameless_value.check_prefix not in glob_val_dict[checkprefix]: 1758 continue 1759 if not glob_val_dict[checkprefix][nameless_value.check_prefix]: 1760 continue 1761 1762 check_lines = [] 1763 global_vars_seen_before = [key for key in global_vars_seen.keys()] 1764 lines_w_index = glob_val_dict[checkprefix][nameless_value.check_prefix] 1765 lines_w_index = filter_globals_according_to_preference( 1766 lines_w_index, 1767 global_vars_seen_before, 1768 nameless_value, 1769 global_check_setting, 1770 ) 1771 for i, line in lines_w_index: 1772 if _global_value_regex: 1773 matched = False 1774 for regex in _global_value_regex: 1775 if re.match("^@" + regex + " = ", line) or re.match( 1776 "^!" + regex + " = ", line 1777 ): 1778 matched = True 1779 break 1780 if not matched: 1781 continue 1782 new_line = generalize_global_check_line( 1783 line, preserve_names, global_vars_seen 1784 ) 1785 new_line = filter_unstable_metadata(new_line) 1786 check_line = "%s %s: %s" % (comment_marker, checkprefix, new_line) 1787 check_lines.append((i, check_line)) 1788 if not check_lines: 1789 continue 1790 1791 if not checkprefix in output_lines_loc: 1792 output_lines_loc[checkprefix] = [] 1793 if not nameless_value.interlaced_with_previous: 1794 flush_current_checks( 1795 output_lines, output_lines_loc[checkprefix], comment_marker 1796 ) 1797 for check_line in check_lines: 1798 output_lines_loc[checkprefix].append(check_line) 1799 1800 printed_prefixes.add((checkprefix, nameless_value.check_prefix)) 1801 1802 # Remembe new global variables we have not seen before 1803 for key in global_vars_seen: 1804 if key not in global_vars_seen_before: 1805 global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] 1806 break 1807 1808 if printed_prefixes: 1809 for p in prefix_list: 1810 if p[0] is None: 1811 continue 1812 for checkprefix in p[0]: 1813 if checkprefix not in output_lines_loc: 1814 continue 1815 flush_current_checks( 1816 output_lines, output_lines_loc[checkprefix], comment_marker 1817 ) 1818 break 1819 output_lines.append(comment_marker + SEPARATOR) 1820 return printed_prefixes 1821 1822 1823def check_prefix(prefix): 1824 if not PREFIX_RE.match(prefix): 1825 hint = "" 1826 if "," in prefix: 1827 hint = " Did you mean '--check-prefixes=" + prefix + "'?" 1828 warn( 1829 ( 1830 "Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." 1831 + hint 1832 ) 1833 % (prefix) 1834 ) 1835 1836 1837def get_check_prefixes(filecheck_cmd): 1838 check_prefixes = [ 1839 item 1840 for m in CHECK_PREFIX_RE.finditer(filecheck_cmd) 1841 for item in m.group(1).split(",") 1842 ] 1843 if not check_prefixes: 1844 check_prefixes = ["CHECK"] 1845 return check_prefixes 1846 1847 1848def verify_filecheck_prefixes(fc_cmd): 1849 fc_cmd_parts = fc_cmd.split() 1850 for part in fc_cmd_parts: 1851 if "check-prefix=" in part: 1852 prefix = part.split("=", 1)[1] 1853 check_prefix(prefix) 1854 elif "check-prefixes=" in part: 1855 prefixes = part.split("=", 1)[1].split(",") 1856 for prefix in prefixes: 1857 check_prefix(prefix) 1858 if prefixes.count(prefix) > 1: 1859 warn( 1860 "Supplied prefix '%s' is not unique in the prefix list." 1861 % (prefix,) 1862 ) 1863 1864 1865def get_autogennote_suffix(parser, args): 1866 autogenerated_note_args = "" 1867 for action in parser._actions: 1868 if not hasattr(args, action.dest): 1869 continue # Ignore options such as --help that aren't included in args 1870 # Ignore parameters such as paths to the binary or the list of tests 1871 if action.dest in ( 1872 "tests", 1873 "update_only", 1874 "tool_binary", 1875 "opt_binary", 1876 "llc_binary", 1877 "clang", 1878 "opt", 1879 "llvm_bin", 1880 "verbose", 1881 "force_update", 1882 ): 1883 continue 1884 value = getattr(args, action.dest) 1885 if action.dest == "check_globals": 1886 default_value = "none" if args.version < 4 else "smart" 1887 if value == default_value: 1888 continue 1889 autogenerated_note_args += action.option_strings[0] + " " 1890 if args.version < 4 and value == "all": 1891 continue 1892 autogenerated_note_args += "%s " % value 1893 continue 1894 if action.const is not None: # action stores a constant (usually True/False) 1895 # Skip actions with different constant values (this happens with boolean 1896 # --foo/--no-foo options) 1897 if value != action.const: 1898 continue 1899 if parser.get_default(action.dest) == value: 1900 continue # Don't add default values 1901 if action.dest == "function_signature" and args.version >= 2: 1902 continue # Enabled by default in version 2 1903 if action.dest == "filters": 1904 # Create a separate option for each filter element. The value is a list 1905 # of Filter objects. 1906 for elem in value: 1907 opt_name = "filter-out" if elem.is_filter_out else "filter" 1908 opt_value = elem.pattern() 1909 new_arg = '--%s "%s" ' % (opt_name, opt_value.strip('"')) 1910 if new_arg not in autogenerated_note_args: 1911 autogenerated_note_args += new_arg 1912 else: 1913 autogenerated_note_args += action.option_strings[0] + " " 1914 if action.const is None: # action takes a parameter 1915 if action.nargs == "+": 1916 value = " ".join(map(lambda v: '"' + v.strip('"') + '"', value)) 1917 autogenerated_note_args += "%s " % value 1918 if autogenerated_note_args: 1919 autogenerated_note_args = " %s %s" % ( 1920 UTC_ARGS_KEY, 1921 autogenerated_note_args[:-1], 1922 ) 1923 return autogenerated_note_args 1924 1925 1926def check_for_command(line, parser, args, argv, argparse_callback): 1927 cmd_m = UTC_ARGS_CMD.match(line) 1928 if cmd_m: 1929 for option in shlex.split(cmd_m.group("cmd").strip()): 1930 if option: 1931 argv.append(option) 1932 args = parse_args(parser, filter(lambda arg: arg not in args.tests, argv)) 1933 if argparse_callback is not None: 1934 argparse_callback(args) 1935 return args, argv 1936 1937 1938def find_arg_in_test(test_info, get_arg_to_check, arg_string, is_global): 1939 result = get_arg_to_check(test_info.args) 1940 if not result and is_global: 1941 # See if this has been specified via UTC_ARGS. This is a "global" option 1942 # that affects the entire generation of test checks. If it exists anywhere 1943 # in the test, apply it to everything. 1944 saw_line = False 1945 for line_info in test_info.ro_iterlines(): 1946 line = line_info.line 1947 if not line.startswith(";") and line.strip() != "": 1948 saw_line = True 1949 result = get_arg_to_check(line_info.args) 1950 if result: 1951 if warn and saw_line: 1952 # We saw the option after already reading some test input lines. 1953 # Warn about it. 1954 print( 1955 "WARNING: Found {} in line following test start: ".format( 1956 arg_string 1957 ) 1958 + line, 1959 file=sys.stderr, 1960 ) 1961 print( 1962 "WARNING: Consider moving {} to top of file".format(arg_string), 1963 file=sys.stderr, 1964 ) 1965 break 1966 return result 1967 1968 1969def dump_input_lines(output_lines, test_info, prefix_set, comment_string): 1970 for input_line_info in test_info.iterlines(output_lines): 1971 line = input_line_info.line 1972 args = input_line_info.args 1973 if line.strip() == comment_string: 1974 continue 1975 if line.strip() == comment_string + SEPARATOR: 1976 continue 1977 if line.lstrip().startswith(comment_string): 1978 m = CHECK_RE.match(line) 1979 if m and m.group(1) in prefix_set: 1980 continue 1981 output_lines.append(line.rstrip("\n")) 1982 1983 1984def add_checks_at_end( 1985 output_lines, prefix_list, func_order, comment_string, check_generator 1986): 1987 added = set() 1988 generated_prefixes = set() 1989 for prefix in prefix_list: 1990 prefixes = prefix[0] 1991 tool_args = prefix[1] 1992 for prefix in prefixes: 1993 for func in func_order[prefix]: 1994 # The func order can contain the same functions multiple times. 1995 # If we see one again we are done. 1996 if (func, prefix) in added: 1997 continue 1998 if added: 1999 output_lines.append(comment_string) 2000 2001 # The add_*_checks routines expect a run list whose items are 2002 # tuples that have a list of prefixes as their first element and 2003 # tool command args string as their second element. They output 2004 # checks for each prefix in the list of prefixes. By doing so, it 2005 # implicitly assumes that for each function every run line will 2006 # generate something for that function. That is not the case for 2007 # generated functions as some run lines might not generate them 2008 # (e.g. -fopenmp vs. no -fopenmp). 2009 # 2010 # Therefore, pass just the prefix we're interested in. This has 2011 # the effect of generating all of the checks for functions of a 2012 # single prefix before moving on to the next prefix. So checks 2013 # are ordered by prefix instead of by function as in "normal" 2014 # mode. 2015 for generated_prefix in check_generator( 2016 output_lines, [([prefix], tool_args)], func 2017 ): 2018 added.add((func, generated_prefix)) 2019 generated_prefixes.add(generated_prefix) 2020 return generated_prefixes 2021