xref: /llvm-project/llvm/utils/UpdateTestChecks/common.py (revision 46f9cddfd7e40998422d1e34a3f1193210ee2fb8)
1import argparse
2import bisect
3import collections
4import copy
5import glob
6import os
7import re
8import subprocess
9import sys
10import shlex
11
12from typing import List, Mapping, Set
13
14##### Common utilities for update_*test_checks.py
15
16
17_verbose = False
18_prefix_filecheck_ir_name = ""
19
20"""
21Version changelog:
22
231: Initial version, used by tests that don't specify --version explicitly.
242: --function-signature is now enabled by default and also checks return
25   type/attributes.
263: Opening parenthesis of function args is kept on the first LABEL line
27   in case arguments are split to a separate SAME line.
284: --check-globals now has a third option ('smart'). The others are now called
29   'none' and 'all'. 'smart' is the default.
305: Basic block labels are matched by FileCheck expressions
31"""
32DEFAULT_VERSION = 5
33
34
35SUPPORTED_ANALYSES = {
36    "Branch Probability Analysis",
37    "Cost Model Analysis",
38    "Dependence Analysis",
39    "Loop Access Analysis",
40    "Scalar Evolution Analysis",
41}
42
43
44class Regex(object):
45    """Wrap a compiled regular expression object to allow deep copy of a regexp.
46    This is required for the deep copy done in do_scrub.
47
48    """
49
50    def __init__(self, regex):
51        self.regex = regex
52
53    def __deepcopy__(self, memo):
54        result = copy.copy(self)
55        result.regex = self.regex
56        return result
57
58    def search(self, line):
59        return self.regex.search(line)
60
61    def sub(self, repl, line):
62        return self.regex.sub(repl, line)
63
64    def pattern(self):
65        return self.regex.pattern
66
67    def flags(self):
68        return self.regex.flags
69
70
71class Filter(Regex):
72    """Augment a Regex object with a flag indicating whether a match should be
73    added (!is_filter_out) or removed (is_filter_out) from the generated checks.
74
75    """
76
77    def __init__(self, regex, is_filter_out):
78        super(Filter, self).__init__(regex)
79        self.is_filter_out = is_filter_out
80
81    def __deepcopy__(self, memo):
82        result = copy.deepcopy(super(Filter, self), memo)
83        result.is_filter_out = copy.deepcopy(self.is_filter_out, memo)
84        return result
85
86
87def parse_commandline_args(parser):
88    class RegexAction(argparse.Action):
89        """Add a regular expression option value to a list of regular expressions.
90        This compiles the expression, wraps it in a Regex and adds it to the option
91        value list."""
92
93        def __init__(self, option_strings, dest, nargs=None, **kwargs):
94            if nargs is not None:
95                raise ValueError("nargs not allowed")
96            super(RegexAction, self).__init__(option_strings, dest, **kwargs)
97
98        def do_call(self, namespace, values, flags):
99            value_list = getattr(namespace, self.dest)
100            if value_list is None:
101                value_list = []
102
103            try:
104                value_list.append(Regex(re.compile(values, flags)))
105            except re.error as error:
106                raise ValueError(
107                    "{}: Invalid regular expression '{}' ({})".format(
108                        option_string, error.pattern, error.msg
109                    )
110                )
111
112            setattr(namespace, self.dest, value_list)
113
114        def __call__(self, parser, namespace, values, option_string=None):
115            self.do_call(namespace, values, 0)
116
117    class FilterAction(RegexAction):
118        """Add a filter to a list of filter option values."""
119
120        def __init__(self, option_strings, dest, nargs=None, **kwargs):
121            super(FilterAction, self).__init__(option_strings, dest, nargs, **kwargs)
122
123        def __call__(self, parser, namespace, values, option_string=None):
124            super(FilterAction, self).__call__(parser, namespace, values, option_string)
125
126            value_list = getattr(namespace, self.dest)
127
128            is_filter_out = option_string == "--filter-out"
129
130            value_list[-1] = Filter(value_list[-1].regex, is_filter_out)
131
132            setattr(namespace, self.dest, value_list)
133
134    filter_group = parser.add_argument_group(
135        "filtering",
136        """Filters are applied to each output line according to the order given. The
137    first matching filter terminates filter processing for that current line.""",
138    )
139
140    filter_group.add_argument(
141        "--filter",
142        action=FilterAction,
143        dest="filters",
144        metavar="REGEX",
145        help="Only include lines matching REGEX (may be specified multiple times)",
146    )
147    filter_group.add_argument(
148        "--filter-out",
149        action=FilterAction,
150        dest="filters",
151        metavar="REGEX",
152        help="Exclude lines matching REGEX",
153    )
154
155    parser.add_argument(
156        "--include-generated-funcs",
157        action="store_true",
158        help="Output checks for functions not in source",
159    )
160    parser.add_argument(
161        "-v", "--verbose", action="store_true", help="Show verbose output"
162    )
163    parser.add_argument(
164        "-u",
165        "--update-only",
166        action="store_true",
167        help="Only update test if it was already autogened",
168    )
169    parser.add_argument(
170        "--force-update",
171        action="store_true",
172        help="Update test even if it was autogened by a different script",
173    )
174    parser.add_argument(
175        "--enable",
176        action="store_true",
177        dest="enabled",
178        default=True,
179        help="Activate CHECK line generation from this point forward",
180    )
181    parser.add_argument(
182        "--disable",
183        action="store_false",
184        dest="enabled",
185        help="Deactivate CHECK line generation from this point forward",
186    )
187    parser.add_argument(
188        "--replace-value-regex",
189        nargs="+",
190        default=[],
191        help="List of regular expressions to replace matching value names",
192    )
193    parser.add_argument(
194        "--prefix-filecheck-ir-name",
195        default="",
196        help="Add a prefix to FileCheck IR value names to avoid conflicts with scripted names",
197    )
198    parser.add_argument(
199        "--global-value-regex",
200        nargs="+",
201        default=[],
202        help="List of regular expressions that a global value declaration must match to generate a check (has no effect if checking globals is not enabled)",
203    )
204    parser.add_argument(
205        "--global-hex-value-regex",
206        nargs="+",
207        default=[],
208        help="List of regular expressions such that, for matching global value declarations, literal integer values should be encoded in hex in the associated FileCheck directives",
209    )
210    # FIXME: in 3.9, we can use argparse.BooleanOptionalAction. At that point,
211    # we need to rename the flag to just -generate-body-for-unused-prefixes.
212    parser.add_argument(
213        "--no-generate-body-for-unused-prefixes",
214        action="store_false",
215        dest="gen_unused_prefix_body",
216        default=True,
217        help="Generate a function body that always matches for unused prefixes. This is useful when unused prefixes are desired, and it avoids needing to annotate each FileCheck as allowing them.",
218    )
219    # This is the default when regenerating existing tests. The default when
220    # generating new tests is determined by DEFAULT_VERSION.
221    parser.add_argument(
222        "--version", type=int, default=1, help="The version of output format"
223    )
224    args = parser.parse_args()
225    # TODO: This should not be handled differently from the other options
226    global _verbose, _global_value_regex, _global_hex_value_regex
227    _verbose = args.verbose
228    _global_value_regex = args.global_value_regex
229    _global_hex_value_regex = args.global_hex_value_regex
230    return args
231
232
233def parse_args(parser, argv):
234    args = parser.parse_args(argv)
235    if args.version >= 2:
236        args.function_signature = True
237    # TODO: This should not be handled differently from the other options
238    global _verbose, _global_value_regex, _global_hex_value_regex
239    _verbose = args.verbose
240    _global_value_regex = args.global_value_regex
241    _global_hex_value_regex = args.global_hex_value_regex
242    if "check_globals" in args and args.check_globals == "default":
243        args.check_globals = "none" if args.version < 4 else "smart"
244    return args
245
246
247class InputLineInfo(object):
248    def __init__(self, line, line_number, args, argv):
249        self.line = line
250        self.line_number = line_number
251        self.args = args
252        self.argv = argv
253
254
255class TestInfo(object):
256    def __init__(
257        self,
258        test,
259        parser,
260        script_name,
261        input_lines,
262        args,
263        argv,
264        comment_prefix,
265        argparse_callback,
266    ):
267        self.parser = parser
268        self.argparse_callback = argparse_callback
269        self.path = test
270        self.args = args
271        if args.prefix_filecheck_ir_name:
272            global _prefix_filecheck_ir_name
273            _prefix_filecheck_ir_name = args.prefix_filecheck_ir_name
274        self.argv = argv
275        self.input_lines = input_lines
276        self.run_lines = find_run_lines(test, self.input_lines)
277        self.comment_prefix = comment_prefix
278        if self.comment_prefix is None:
279            if self.path.endswith(".mir") or self.path.endswith(".txt"):
280                self.comment_prefix = "#"
281            elif self.path.endswith(".s"):
282                self.comment_prefix = "//"
283            else:
284                self.comment_prefix = ";"
285        self.autogenerated_note_prefix = self.comment_prefix + " " + UTC_ADVERT
286        self.test_autogenerated_note = self.autogenerated_note_prefix + script_name
287        self.test_autogenerated_note += get_autogennote_suffix(parser, self.args)
288        self.test_unused_note = (
289            self.comment_prefix + self.comment_prefix + " " + UNUSED_NOTE
290        )
291
292    def ro_iterlines(self):
293        for line_num, input_line in enumerate(self.input_lines):
294            args, argv = check_for_command(
295                input_line, self.parser, self.args, self.argv, self.argparse_callback
296            )
297            yield InputLineInfo(input_line, line_num, args, argv)
298
299    def iterlines(self, output_lines):
300        output_lines.append(self.test_autogenerated_note)
301        for line_info in self.ro_iterlines():
302            input_line = line_info.line
303            # Discard any previous script advertising.
304            if input_line.startswith(self.autogenerated_note_prefix):
305                continue
306            self.args = line_info.args
307            self.argv = line_info.argv
308            if not self.args.enabled:
309                output_lines.append(input_line)
310                continue
311            yield line_info
312
313    def get_checks_for_unused_prefixes(
314        self, run_list, used_prefixes: List[str]
315    ) -> List[str]:
316        run_list = [element for element in run_list if element[0] is not None]
317        unused_prefixes = set(
318            [prefix for sublist in run_list for prefix in sublist[0]]
319        ).difference(set(used_prefixes))
320
321        ret = []
322        if not unused_prefixes:
323            return ret
324        ret.append(self.test_unused_note)
325        for unused in sorted(unused_prefixes):
326            ret.append(
327                "{comment} {prefix}: {match_everything}".format(
328                    comment=self.comment_prefix,
329                    prefix=unused,
330                    match_everything=r"""{{.*}}""",
331                )
332            )
333        return ret
334
335
336def itertests(
337    test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None
338):
339    for pattern in test_patterns:
340        # On Windows we must expand the patterns ourselves.
341        tests_list = glob.glob(pattern)
342        if not tests_list:
343            warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,))
344            continue
345        for test in tests_list:
346            with open(test) as f:
347                input_lines = [l.rstrip() for l in f]
348            first_line = input_lines[0] if input_lines else ""
349            if UTC_AVOID in first_line:
350                warn("Skipping test that must not be autogenerated: " + test)
351                continue
352            is_regenerate = UTC_ADVERT in first_line
353
354            # If we're generating a new test, set the default version to the latest.
355            argv = sys.argv[:]
356            if not is_regenerate:
357                argv.insert(1, "--version=" + str(DEFAULT_VERSION))
358
359            args = parse_args(parser, argv[1:])
360            if argparse_callback is not None:
361                argparse_callback(args)
362            if is_regenerate:
363                if script_name not in first_line and not args.force_update:
364                    warn(
365                        "Skipping test which wasn't autogenerated by " + script_name,
366                        test,
367                    )
368                    continue
369                args, argv = check_for_command(
370                    first_line, parser, args, argv, argparse_callback
371                )
372            elif args.update_only:
373                assert UTC_ADVERT not in first_line
374                warn("Skipping test which isn't autogenerated: " + test)
375                continue
376            final_input_lines = []
377            for l in input_lines:
378                if UNUSED_NOTE in l:
379                    break
380                final_input_lines.append(l)
381            yield TestInfo(
382                test,
383                parser,
384                script_name,
385                final_input_lines,
386                args,
387                argv,
388                comment_prefix,
389                argparse_callback,
390            )
391
392
393def should_add_line_to_output(
394    input_line,
395    prefix_set,
396    *,
397    skip_global_checks=False,
398    skip_same_checks=False,
399    comment_marker=";",
400):
401    # Skip any blank comment lines in the IR.
402    if not skip_global_checks and input_line.strip() == comment_marker:
403        return False
404    # Skip a special double comment line we use as a separator.
405    if input_line.strip() == comment_marker + SEPARATOR:
406        return False
407    # Skip any blank lines in the IR.
408    # if input_line.strip() == '':
409    #  return False
410    # And skip any CHECK lines. We're building our own.
411    m = CHECK_RE.match(input_line)
412    if m and m.group(1) in prefix_set:
413        if skip_same_checks and CHECK_SAME_RE.match(input_line):
414            # The previous CHECK line was removed, so don't leave this dangling
415            return False
416        if skip_global_checks:
417            # Skip checks only if they are of global value definitions
418            global_ir_value_re = re.compile(r"(\[\[|@)", flags=(re.M))
419            is_global = global_ir_value_re.search(input_line)
420            return not is_global
421        return False
422
423    return True
424
425
426def collect_original_check_lines(ti: TestInfo, prefix_set: set):
427    """
428    Collect pre-existing check lines into a dictionary `result` which is
429    returned.
430
431    result[func_name][prefix] is filled with a list of right-hand-sides of check
432    lines.
433    """
434    result = collections.defaultdict(lambda: {})
435
436    current_prefix = None
437    current_function = None
438    for input_line_info in ti.ro_iterlines():
439        input_line = input_line_info.line
440        if input_line.lstrip().startswith(";"):
441            m = CHECK_RE.match(input_line)
442            if m is not None:
443                prefix = m.group(1)
444                check_kind = m.group(2)
445                line = input_line[m.end() :].strip()
446
447                if prefix != current_prefix:
448                    current_function = None
449                    current_prefix = None
450
451                if check_kind not in ["LABEL", "SAME"]:
452                    if current_function is not None:
453                        current_function.append(line)
454                    continue
455
456                if check_kind == "SAME":
457                    continue
458
459                if check_kind == "LABEL":
460                    m = IR_FUNCTION_LABEL_RE.match(line)
461                    if m is not None:
462                        func_name = m.group(1)
463                        if (
464                            ti.args.function is not None
465                            and func_name != ti.args.function
466                        ):
467                            # When filtering on a specific function, skip all others.
468                            continue
469
470                        current_prefix = prefix
471                        current_function = result[func_name][prefix] = []
472                        continue
473
474        current_function = None
475
476    return result
477
478
479# Perform lit-like substitutions
480def getSubstitutions(sourcepath):
481    sourcedir = os.path.dirname(sourcepath)
482    return [
483        ("%s", sourcepath),
484        ("%S", sourcedir),
485        ("%p", sourcedir),
486        ("%{pathsep}", os.pathsep),
487    ]
488
489
490def applySubstitutions(s, substitutions):
491    for a, b in substitutions:
492        s = s.replace(a, b)
493    return s
494
495
496# Invoke the tool that is being tested.
497def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False):
498    with open(ir) as ir_file:
499        substitutions = getSubstitutions(ir)
500
501        # TODO Remove the str form which is used by update_test_checks.py and
502        # update_llc_test_checks.py
503        # The safer list form is used by update_cc_test_checks.py
504        if preprocess_cmd:
505            # Allow pre-processing the IR file (e.g. using sed):
506            assert isinstance(
507                preprocess_cmd, str
508            )  # TODO: use a list instead of using shell
509            preprocess_cmd = applySubstitutions(preprocess_cmd, substitutions).strip()
510            if verbose:
511                print(
512                    "Pre-processing input file: ",
513                    ir,
514                    " with command '",
515                    preprocess_cmd,
516                    "'",
517                    sep="",
518                    file=sys.stderr,
519                )
520            pp = subprocess.Popen(
521                preprocess_cmd,
522                shell=True,
523                stdin=subprocess.DEVNULL,
524                stdout=subprocess.PIPE,
525            )
526            ir_file = pp.stdout
527
528        if isinstance(cmd_args, list):
529            args = [applySubstitutions(a, substitutions) for a in cmd_args]
530            stdout = subprocess.check_output([exe] + args, stdin=ir_file)
531        else:
532            stdout = subprocess.check_output(
533                exe + " " + applySubstitutions(cmd_args, substitutions),
534                shell=True,
535                stdin=ir_file,
536            )
537        if sys.version_info[0] > 2:
538            # FYI, if you crashed here with a decode error, your run line probably
539            # results in bitcode or other binary format being written to the pipe.
540            # For an opt test, you probably want to add -S or -disable-output.
541            stdout = stdout.decode()
542    # Fix line endings to unix CR style.
543    return stdout.replace("\r\n", "\n")
544
545
546##### LLVM IR parser
547RUN_LINE_RE = re.compile(r"^\s*(?://|[;#])\s*RUN:\s*(.*)$")
548CHECK_PREFIX_RE = re.compile(r"--?check-prefix(?:es)?[= ](\S+)")
549PREFIX_RE = re.compile("^[a-zA-Z0-9_-]+$")
550CHECK_RE = re.compile(
551    r"^\s*(?://|[;#])\s*([^:]+?)(?:-(NEXT|NOT|DAG|LABEL|SAME|EMPTY))?:"
552)
553CHECK_SAME_RE = re.compile(r"^\s*(?://|[;#])\s*([^:]+?)(?:-SAME)?:")
554
555UTC_ARGS_KEY = "UTC_ARGS:"
556UTC_ARGS_CMD = re.compile(r".*" + UTC_ARGS_KEY + r"\s*(?P<cmd>.*)\s*$")
557UTC_ADVERT = "NOTE: Assertions have been autogenerated by "
558UTC_AVOID = "NOTE: Do not autogenerate"
559UNUSED_NOTE = "NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:"
560
561DATA_LAYOUT_RE = re.compile(
562    r"target\s+datalayout\s+=\s+\"(?P<layout>.+)\"$", flags=(re.M | re.S)
563)
564
565OPT_FUNCTION_RE = re.compile(
566    r"^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s():,]+?))?\s*define\s+(?P<funcdef_attrs_and_ret>[^@]*)@(?P<func>[\w.$-]+?)\s*"
567    r"(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P<body>.*?)^\}$",
568    flags=(re.M | re.S),
569)
570
571ANALYZE_FUNCTION_RE = re.compile(
572    r"^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.$-]+?)\':"
573    r"\s*\n(?P<body>.*)$",
574    flags=(re.X | re.S),
575)
576
577LOOP_PASS_DEBUG_RE = re.compile(
578    r"^\s*\'(?P<func>[\w.$-]+?)\'[^\n]*" r"\s*\n(?P<body>.*)$", flags=(re.X | re.S)
579)
580
581IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(')
582IR_FUNCTION_LABEL_RE = re.compile(
583    r'^\s*(?:define\s+(?:internal\s+)?[^@]*)?@"?([\w.$-]+)"?\s*\('
584)
585TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
586TRIPLE_ARG_RE = re.compile(r"-m?triple[= ]([^ ]+)")
587MARCH_ARG_RE = re.compile(r"-march[= ]([^ ]+)")
588DEBUG_ONLY_ARG_RE = re.compile(r"-debug-only[= ]([^ ]+)")
589
590SCRUB_LEADING_WHITESPACE_RE = re.compile(r"^(\s+)")
591SCRUB_WHITESPACE_RE = re.compile(r"(?!^(|  \w))[ \t]+", flags=re.M)
592SCRUB_PRESERVE_LEADING_WHITESPACE_RE = re.compile(r"((?!^)[ \t]*(\S))[ \t]+")
593SCRUB_TRAILING_WHITESPACE_RE = re.compile(r"[ \t]+$", flags=re.M)
594SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE
595SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(
596    r"([ \t]|(#[0-9]+))+$", flags=re.M
597)
598SCRUB_KILL_COMMENT_RE = re.compile(r"^ *#+ +kill:.*\n")
599SCRUB_LOOP_COMMENT_RE = re.compile(
600    r"# =>This Inner Loop Header:.*|# in Loop:.*", flags=re.M
601)
602SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r"(?<=\S)+[ \t]*#$", flags=re.M)
603
604SEPARATOR = "."
605
606
607def error(msg, test_file=None):
608    if test_file:
609        msg = "{}: {}".format(msg, test_file)
610    print("ERROR: {}".format(msg), file=sys.stderr)
611
612
613def warn(msg, test_file=None):
614    if test_file:
615        msg = "{}: {}".format(msg, test_file)
616    print("WARNING: {}".format(msg), file=sys.stderr)
617
618
619def debug(*args, **kwargs):
620    # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs):
621    if "file" not in kwargs:
622        kwargs["file"] = sys.stderr
623    if _verbose:
624        print(*args, **kwargs)
625
626
627def find_run_lines(test, lines):
628    debug("Scanning for RUN lines in test file:", test)
629    raw_lines = [m.group(1) for m in [RUN_LINE_RE.match(l) for l in lines] if m]
630    run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
631    for l in raw_lines[1:]:
632        if run_lines[-1].endswith("\\"):
633            run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
634        else:
635            run_lines.append(l)
636    debug("Found {} RUN lines in {}:".format(len(run_lines), test))
637    for l in run_lines:
638        debug("  RUN: {}".format(l))
639    return run_lines
640
641
642def get_triple_from_march(march):
643    triples = {
644        "amdgcn": "amdgcn",
645        "r600": "r600",
646        "mips": "mips",
647        "nvptx64": "nvptx64",
648        "sparc": "sparc",
649        "hexagon": "hexagon",
650        "ve": "ve",
651    }
652    for prefix, triple in triples.items():
653        if march.startswith(prefix):
654            return triple
655    print("Cannot find a triple. Assume 'x86'", file=sys.stderr)
656    return "x86"
657
658
659def get_globals_name_prefix(raw_tool_output):
660    m = DATA_LAYOUT_RE.search(raw_tool_output)
661    if not m:
662        return None
663    data_layout = m.group("layout")
664    idx = data_layout.find("m:")
665    if idx < 0:
666        return None
667    ch = data_layout[idx + 2]
668    return "_" if ch == "o" or ch == "x" else None
669
670
671def apply_filters(line, filters):
672    has_filter = False
673    for f in filters:
674        if not f.is_filter_out:
675            has_filter = True
676        if f.search(line):
677            return False if f.is_filter_out else True
678    # If we only used filter-out, keep the line, otherwise discard it since no
679    # filter matched.
680    return False if has_filter else True
681
682
683def do_filter(body, filters):
684    return (
685        body
686        if not filters
687        else "\n".join(
688            filter(lambda line: apply_filters(line, filters), body.splitlines())
689        )
690    )
691
692
693def scrub_body(body):
694    # Scrub runs of whitespace out of the assembly, but leave the leading
695    # whitespace in place.
696    body = SCRUB_PRESERVE_LEADING_WHITESPACE_RE.sub(lambda m: m.group(2) + " ", body)
697
698    # Expand the tabs used for indentation.
699    body = str.expandtabs(body, 2)
700    # Strip trailing whitespace.
701    body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r"", body)
702    return body
703
704
705def do_scrub(body, scrubber, scrubber_args, extra):
706    if scrubber_args:
707        local_args = copy.deepcopy(scrubber_args)
708        local_args[0].extra_scrub = extra
709        return scrubber(body, *local_args)
710    return scrubber(body, *scrubber_args)
711
712
713# Build up a dictionary of all the function bodies.
714class function_body(object):
715    def __init__(
716        self,
717        string,
718        extra,
719        funcdef_attrs_and_ret,
720        args_and_sig,
721        attrs,
722        func_name_separator,
723        ginfo,
724    ):
725        self.scrub = string
726        self.extrascrub = extra
727        self.funcdef_attrs_and_ret = funcdef_attrs_and_ret
728        self.args_and_sig = args_and_sig
729        self.attrs = attrs
730        self.func_name_separator = func_name_separator
731        self._ginfo = ginfo
732
733    def is_same_except_arg_names(
734        self, extrascrub, funcdef_attrs_and_ret, args_and_sig, attrs
735    ):
736        arg_names = set()
737
738        def drop_arg_names(match):
739            nameless_value = self._ginfo.get_nameless_value_from_match(match)
740            if nameless_value.check_key == "%":
741                arg_names.add(self._ginfo.get_name_from_match(match))
742                substitute = ""
743            else:
744                substitute = match.group(2)
745            return match.group(1) + substitute + match.group(match.lastindex)
746
747        def repl_arg_names(match):
748            nameless_value = self._ginfo.get_nameless_value_from_match(match)
749            if (
750                nameless_value.check_key == "%"
751                and self._ginfo.get_name_from_match(match) in arg_names
752            ):
753                return match.group(1) + match.group(match.lastindex)
754            return match.group(1) + match.group(2) + match.group(match.lastindex)
755
756        if self.funcdef_attrs_and_ret != funcdef_attrs_and_ret:
757            return False
758        if self.attrs != attrs:
759            return False
760
761        regexp = self._ginfo.get_regexp()
762        ans0 = regexp.sub(drop_arg_names, self.args_and_sig)
763        ans1 = regexp.sub(drop_arg_names, args_and_sig)
764        if ans0 != ans1:
765            return False
766        if self._ginfo.is_asm():
767            # Check without replacements, the replacements are not applied to the
768            # body for backend checks.
769            return self.extrascrub == extrascrub
770
771        es0 = regexp.sub(repl_arg_names, self.extrascrub)
772        es1 = regexp.sub(repl_arg_names, extrascrub)
773        es0 = SCRUB_IR_COMMENT_RE.sub(r"", es0)
774        es1 = SCRUB_IR_COMMENT_RE.sub(r"", es1)
775        return es0 == es1
776
777    def __str__(self):
778        return self.scrub
779
780
781class FunctionTestBuilder:
782    def __init__(self, run_list, flags, scrubber_args, path, ginfo):
783        self._verbose = flags.verbose
784        self._record_args = flags.function_signature
785        self._check_attributes = flags.check_attributes
786        # Strip double-quotes if input was read by UTC_ARGS
787        self._filters = (
788            list(
789                map(
790                    lambda f: Filter(
791                        re.compile(f.pattern().strip('"'), f.flags()), f.is_filter_out
792                    ),
793                    flags.filters,
794                )
795            )
796            if flags.filters
797            else []
798        )
799        self._scrubber_args = scrubber_args
800        self._path = path
801        self._ginfo = ginfo
802        # Strip double-quotes if input was read by UTC_ARGS
803        self._replace_value_regex = list(
804            map(lambda x: x.strip('"'), flags.replace_value_regex)
805        )
806        self._func_dict = {}
807        self._func_order = {}
808        self._global_var_dict = {}
809        self._processed_prefixes = set()
810        for tuple in run_list:
811            for prefix in tuple[0]:
812                self._func_dict.update({prefix: dict()})
813                self._func_order.update({prefix: []})
814                self._global_var_dict.update({prefix: dict()})
815
816    def finish_and_get_func_dict(self):
817        for prefix in self.get_failed_prefixes():
818            warn(
819                "Prefix %s had conflicting output from different RUN lines for all functions in test %s"
820                % (
821                    prefix,
822                    self._path,
823                )
824            )
825        return self._func_dict
826
827    def func_order(self):
828        return self._func_order
829
830    def global_var_dict(self):
831        return self._global_var_dict
832
833    def is_filtered(self):
834        return bool(self._filters)
835
836    def process_run_line(self, function_re, scrubber, raw_tool_output, prefixes):
837        build_global_values_dictionary(
838            self._global_var_dict, raw_tool_output, prefixes, self._ginfo
839        )
840        for m in function_re.finditer(raw_tool_output):
841            if not m:
842                continue
843            func = m.group("func")
844            body = m.group("body")
845            # func_name_separator is the string that is placed right after function name at the
846            # beginning of assembly function definition. In most assemblies, that is just a
847            # colon: `foo:`. But, for example, in nvptx it is a brace: `foo(`. If is_backend is
848            # False, just assume that separator is an empty string.
849            if self._ginfo.is_asm():
850                # Use ':' as default separator.
851                func_name_separator = (
852                    m.group("func_name_separator")
853                    if "func_name_separator" in m.groupdict()
854                    else ":"
855                )
856            else:
857                func_name_separator = ""
858            attrs = m.group("attrs") if self._check_attributes else ""
859            funcdef_attrs_and_ret = (
860                m.group("funcdef_attrs_and_ret") if self._record_args else ""
861            )
862            # Determine if we print arguments, the opening brace, or nothing after the
863            # function name
864            if self._record_args and "args_and_sig" in m.groupdict():
865                args_and_sig = scrub_body(m.group("args_and_sig").strip())
866            elif "args_and_sig" in m.groupdict():
867                args_and_sig = "("
868            else:
869                args_and_sig = ""
870            filtered_body = do_filter(body, self._filters)
871            scrubbed_body = do_scrub(
872                filtered_body, scrubber, self._scrubber_args, extra=False
873            )
874            scrubbed_extra = do_scrub(
875                filtered_body, scrubber, self._scrubber_args, extra=True
876            )
877            if "analysis" in m.groupdict():
878                analysis = m.group("analysis")
879                if analysis not in SUPPORTED_ANALYSES:
880                    warn("Unsupported analysis mode: %r!" % (analysis,))
881            if func.startswith("stress"):
882                # We only use the last line of the function body for stress tests.
883                scrubbed_body = "\n".join(scrubbed_body.splitlines()[-1:])
884            if self._verbose:
885                print("Processing function: " + func, file=sys.stderr)
886                for l in scrubbed_body.splitlines():
887                    print("  " + l, file=sys.stderr)
888            for prefix in prefixes:
889                # Replace function names matching the regex.
890                for regex in self._replace_value_regex:
891                    # Pattern that matches capture groups in the regex in leftmost order.
892                    group_regex = re.compile(r"\(.*?\)")
893                    # Replace function name with regex.
894                    match = re.match(regex, func)
895                    if match:
896                        func_repl = regex
897                        # Replace any capture groups with their matched strings.
898                        for g in match.groups():
899                            func_repl = group_regex.sub(
900                                re.escape(g), func_repl, count=1
901                            )
902                        func = re.sub(func_repl, "{{" + func_repl + "}}", func)
903
904                    # Replace all calls to regex matching functions.
905                    matches = re.finditer(regex, scrubbed_body)
906                    for match in matches:
907                        func_repl = regex
908                        # Replace any capture groups with their matched strings.
909                        for g in match.groups():
910                            func_repl = group_regex.sub(
911                                re.escape(g), func_repl, count=1
912                            )
913                        # Substitute function call names that match the regex with the same
914                        # capture groups set.
915                        scrubbed_body = re.sub(
916                            func_repl, "{{" + func_repl + "}}", scrubbed_body
917                        )
918
919                if func in self._func_dict[prefix]:
920                    if self._func_dict[prefix][func] is not None and (
921                        str(self._func_dict[prefix][func]) != scrubbed_body
922                        or self._func_dict[prefix][func].args_and_sig != args_and_sig
923                        or self._func_dict[prefix][func].attrs != attrs
924                        or self._func_dict[prefix][func].funcdef_attrs_and_ret
925                        != funcdef_attrs_and_ret
926                    ):
927                        if self._func_dict[prefix][func].is_same_except_arg_names(
928                            scrubbed_extra,
929                            funcdef_attrs_and_ret,
930                            args_and_sig,
931                            attrs,
932                        ):
933                            self._func_dict[prefix][func].scrub = scrubbed_extra
934                            self._func_dict[prefix][func].args_and_sig = args_and_sig
935                        else:
936                            # This means a previous RUN line produced a body for this function
937                            # that is different from the one produced by this current RUN line,
938                            # so the body can't be common across RUN lines. We use None to
939                            # indicate that.
940                            self._func_dict[prefix][func] = None
941                else:
942                    if prefix not in self._processed_prefixes:
943                        self._func_dict[prefix][func] = function_body(
944                            scrubbed_body,
945                            scrubbed_extra,
946                            funcdef_attrs_and_ret,
947                            args_and_sig,
948                            attrs,
949                            func_name_separator,
950                            self._ginfo,
951                        )
952                        self._func_order[prefix].append(func)
953                    else:
954                        # An earlier RUN line used this check prefixes but didn't produce
955                        # a body for this function. This happens in Clang tests that use
956                        # preprocesser directives to exclude individual functions from some
957                        # RUN lines.
958                        self._func_dict[prefix][func] = None
959
960    def processed_prefixes(self, prefixes):
961        """
962        Mark a set of prefixes as having had at least one applicable RUN line fully
963        processed. This is used to filter out function bodies that don't have
964        outputs for all RUN lines.
965        """
966        self._processed_prefixes.update(prefixes)
967
968    def get_failed_prefixes(self):
969        # This returns the list of those prefixes that failed to match any function,
970        # because there were conflicting bodies produced by different RUN lines, in
971        # all instances of the prefix.
972        for prefix in self._func_dict:
973            if self._func_dict[prefix] and (
974                not [
975                    fct
976                    for fct in self._func_dict[prefix]
977                    if self._func_dict[prefix][fct] is not None
978                ]
979            ):
980                yield prefix
981
982
983##### Generator of LLVM IR CHECK lines
984
985SCRUB_IR_COMMENT_RE = re.compile(r"\s*;.*")
986
987# TODO: We should also derive check lines for global, debug, loop declarations, etc..
988
989
990class NamelessValue:
991    """
992    A NamelessValue object represents a type of value in the IR whose "name" we
993    generalize in the generated check lines; where the "name" could be an actual
994    name (as in e.g. `@some_global` or `%x`) or just a number (as in e.g. `%12`
995    or `!4`).
996    """
997
998    def __init__(
999        self,
1000        check_prefix,
1001        check_key,
1002        ir_prefix,
1003        ir_regexp,
1004        global_ir_rhs_regexp,
1005        *,
1006        is_before_functions=False,
1007        is_number=False,
1008        replace_number_with_counter=False,
1009        match_literally=False,
1010        interlaced_with_previous=False,
1011        ir_suffix=r"",
1012    ):
1013        self.check_prefix = check_prefix
1014        self.check_key = check_key
1015        self.ir_prefix = ir_prefix
1016        self.ir_regexp = ir_regexp
1017        self.ir_suffix = ir_suffix
1018        self.global_ir_rhs_regexp = global_ir_rhs_regexp
1019        self.is_before_functions = is_before_functions
1020        self.is_number = is_number
1021        # Some variable numbers (e.g. MCINST1234) will change based on unrelated
1022        # modifications to LLVM, replace those with an incrementing counter.
1023        self.replace_number_with_counter = replace_number_with_counter
1024        self.match_literally = match_literally
1025        self.interlaced_with_previous = interlaced_with_previous
1026        self.variable_mapping = {}
1027
1028    # Return true if this kind of IR value is defined "locally" to functions,
1029    # which we assume is only the case precisely for LLVM IR local values.
1030    def is_local_def_ir_value(self):
1031        return self.check_key == "%"
1032
1033    # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals
1034    def get_ir_regex(self):
1035        # for backwards compatibility we check locals with '.*'
1036        if self.is_local_def_ir_value():
1037            return ".*"
1038        return self.ir_regexp
1039
1040    # Create a FileCheck variable name based on an IR name.
1041    def get_value_name(self, var: str, check_prefix: str):
1042        var = var.replace("!", "")
1043        if self.replace_number_with_counter:
1044            assert var
1045            replacement = self.variable_mapping.get(var, None)
1046            if replacement is None:
1047                # Replace variable with an incrementing counter
1048                replacement = str(len(self.variable_mapping) + 1)
1049                self.variable_mapping[var] = replacement
1050            var = replacement
1051        # This is a nameless value, prepend check_prefix.
1052        if var.isdigit():
1053            var = check_prefix + var
1054        else:
1055            # This is a named value that clashes with the check_prefix, prepend with
1056            # _prefix_filecheck_ir_name, if it has been defined.
1057            if (
1058                may_clash_with_default_check_prefix_name(check_prefix, var)
1059                and _prefix_filecheck_ir_name
1060            ):
1061                var = _prefix_filecheck_ir_name + var
1062        var = var.replace(".", "_")
1063        var = var.replace("-", "_")
1064        return var.upper()
1065
1066    def get_affixes_from_match(self, match):
1067        prefix = re.match(self.ir_prefix, match.group(2)).group(0)
1068        suffix = re.search(self.ir_suffix + "$", match.group(2)).group(0)
1069        return prefix, suffix
1070
1071
1072class GeneralizerInfo:
1073    """
1074    A GeneralizerInfo object holds information about how check lines should be generalized
1075    (e.g., variable names replaced by FileCheck meta variables) as well as per-test-file
1076    state (e.g. information about IR global variables).
1077    """
1078
1079    MODE_IR = 0
1080    MODE_ASM = 1
1081    MODE_ANALYZE = 2
1082
1083    def __init__(
1084        self,
1085        version,
1086        mode,
1087        nameless_values: List[NamelessValue],
1088        regexp_prefix,
1089        regexp_suffix,
1090    ):
1091        self._version = version
1092        self._mode = mode
1093        self._nameless_values = nameless_values
1094
1095        self._regexp_prefix = regexp_prefix
1096        self._regexp_suffix = regexp_suffix
1097
1098        self._regexp, _ = self._build_regexp(False, False)
1099        (
1100            self._unstable_globals_regexp,
1101            self._unstable_globals_values,
1102        ) = self._build_regexp(True, True)
1103
1104    def _build_regexp(self, globals_only, unstable_only):
1105        matches = []
1106        values = []
1107        for nameless_value in self._nameless_values:
1108            is_global = nameless_value.global_ir_rhs_regexp is not None
1109            if globals_only and not is_global:
1110                continue
1111            if unstable_only and nameless_value.match_literally:
1112                continue
1113
1114            match = f"(?:{nameless_value.ir_prefix}({nameless_value.ir_regexp}){nameless_value.ir_suffix})"
1115            if self.is_ir() and not globals_only and is_global:
1116                match = "^" + match
1117            matches.append(match)
1118            values.append(nameless_value)
1119
1120        regexp_string = r"|".join(matches)
1121
1122        return (
1123            re.compile(
1124                self._regexp_prefix + r"(" + regexp_string + r")" + self._regexp_suffix
1125            ),
1126            values,
1127        )
1128
1129    def get_version(self):
1130        return self._version
1131
1132    def is_ir(self):
1133        return self._mode == GeneralizerInfo.MODE_IR
1134
1135    def is_asm(self):
1136        return self._mode == GeneralizerInfo.MODE_ASM
1137
1138    def is_analyze(self):
1139        return self._mode == GeneralizerInfo.MODE_ANALYZE
1140
1141    def get_nameless_values(self):
1142        return self._nameless_values
1143
1144    def get_regexp(self):
1145        return self._regexp
1146
1147    def get_unstable_globals_regexp(self):
1148        return self._unstable_globals_regexp
1149
1150    # The entire match is group 0, the prefix has one group (=1), the entire
1151    # IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start.
1152    FIRST_NAMELESS_GROUP_IN_MATCH = 3
1153
1154    def get_match_info(self, match):
1155        """
1156        Returns (name, nameless_value) for the given match object
1157        """
1158        if match.re == self._regexp:
1159            values = self._nameless_values
1160        else:
1161            match.re == self._unstable_globals_regexp
1162            values = self._unstable_globals_values
1163        for i in range(len(values)):
1164            g = match.group(i + GeneralizerInfo.FIRST_NAMELESS_GROUP_IN_MATCH)
1165            if g is not None:
1166                return g, values[i]
1167        error("Unable to identify the kind of IR value from the match!")
1168        return None, None
1169
1170    # See get_idx_from_match
1171    def get_name_from_match(self, match):
1172        return self.get_match_info(match)[0]
1173
1174    def get_nameless_value_from_match(self, match) -> NamelessValue:
1175        return self.get_match_info(match)[1]
1176
1177
1178def make_ir_generalizer(version):
1179    values = []
1180
1181    if version >= 5:
1182        values += [
1183            NamelessValue(r"BB", "%", r"label %", r"[\w$.-]+?", None),
1184            NamelessValue(r"BB", "%", r"^", r"[\w$.-]+?", None, ir_suffix=r":"),
1185        ]
1186
1187    values += [
1188        #            check_prefix   check_key  ir_prefix           ir_regexp                global_ir_rhs_regexp
1189        NamelessValue(r"TMP", "%", r"%", r"[\w$.-]+?", None),
1190        NamelessValue(r"ATTR", "#", r"#", r"[0-9]+", None),
1191        NamelessValue(r"ATTR", "#", r"attributes #", r"[0-9]+", r"{[^}]*}"),
1192        NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", None),
1193        NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", r".+", is_before_functions=True),
1194        NamelessValue(
1195            r"GLOBNAMED",
1196            "@",
1197            r"@",
1198            r"[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*",
1199            r".+",
1200            is_before_functions=True,
1201            match_literally=True,
1202            interlaced_with_previous=True,
1203        ),
1204        NamelessValue(r"DBG", "!", r"!dbg ", r"![0-9]+", None),
1205        NamelessValue(r"DIASSIGNID", "!", r"!DIAssignID ", r"![0-9]+", None),
1206        NamelessValue(r"PROF", "!", r"!prof ", r"![0-9]+", None),
1207        NamelessValue(r"TBAA", "!", r"!tbaa ", r"![0-9]+", None),
1208        NamelessValue(r"TBAA_STRUCT", "!", r"!tbaa.struct ", r"![0-9]+", None),
1209        NamelessValue(r"RNG", "!", r"!range ", r"![0-9]+", None),
1210        NamelessValue(r"LOOP", "!", r"!llvm.loop ", r"![0-9]+", None),
1211        NamelessValue(r"META", "!", r"", r"![0-9]+", r"(?:distinct |)!.*"),
1212        NamelessValue(r"ACC_GRP", "!", r"!llvm.access.group ", r"![0-9]+", None),
1213        NamelessValue(r"META", "!", r"![a-z.]+ ", r"![0-9]+", None),
1214        NamelessValue(r"META", "!", r"[, (]", r"![0-9]+", None),
1215    ]
1216
1217    prefix = r"(\s*)"
1218    suffix = r"([,\s\(\)\}]|\Z)"
1219
1220    # values = [
1221    #     nameless_value
1222    #     for nameless_value in IR_NAMELESS_VALUES
1223    #     if not (globals_only and nameless_value.global_ir_rhs_regexp is None) and
1224    #        not (unstable_ids_only and nameless_value.match_literally)
1225    # ]
1226
1227    return GeneralizerInfo(version, GeneralizerInfo.MODE_IR, values, prefix, suffix)
1228
1229
1230def make_asm_generalizer(version):
1231    values = [
1232        NamelessValue(
1233            r"MCINST",
1234            "Inst#",
1235            "<MCInst #",
1236            r"\d+",
1237            r".+",
1238            is_number=True,
1239            replace_number_with_counter=True,
1240        ),
1241        NamelessValue(
1242            r"MCREG",
1243            "Reg:",
1244            "<MCOperand Reg:",
1245            r"\d+",
1246            r".+",
1247            is_number=True,
1248            replace_number_with_counter=True,
1249        ),
1250    ]
1251
1252    prefix = r"((?:#|//)\s*)"
1253    suffix = r"([>\s]|\Z)"
1254
1255    return GeneralizerInfo(version, GeneralizerInfo.MODE_ASM, values, prefix, suffix)
1256
1257
1258def make_analyze_generalizer(version):
1259    values = [
1260        NamelessValue(
1261            r"GRP",
1262            "#",
1263            r"",
1264            r"0x[0-9a-f]+",
1265            None,
1266            replace_number_with_counter=True,
1267        ),
1268    ]
1269
1270    prefix = r"(\s*)"
1271    suffix = r"(\)?:)"
1272
1273    return GeneralizerInfo(
1274        version, GeneralizerInfo.MODE_ANALYZE, values, prefix, suffix
1275    )
1276
1277
1278# Return true if var clashes with the scripted FileCheck check_prefix.
1279def may_clash_with_default_check_prefix_name(check_prefix, var):
1280    return check_prefix and re.match(
1281        r"^" + check_prefix + r"[0-9]+?$", var, re.IGNORECASE
1282    )
1283
1284
1285def find_diff_matching(lhs: List[str], rhs: List[str]) -> List[tuple]:
1286    """
1287    Find a large ordered matching between strings in lhs and rhs.
1288
1289    Think of this as finding the *unchanged* lines in a diff, where the entries
1290    of lhs and rhs are lines of the files being diffed.
1291
1292    Returns a list of matched (lhs_idx, rhs_idx) pairs.
1293    """
1294
1295    if not lhs or not rhs:
1296        return []
1297
1298    # Collect matches in reverse order.
1299    matches = []
1300
1301    # First, collect a set of candidate matching edges. We limit this to a
1302    # constant multiple of the input size to avoid quadratic runtime.
1303    patterns = collections.defaultdict(lambda: ([], []))
1304
1305    for idx in range(len(lhs)):
1306        patterns[lhs[idx]][0].append(idx)
1307    for idx in range(len(rhs)):
1308        patterns[rhs[idx]][1].append(idx)
1309
1310    multiple_patterns = []
1311
1312    candidates = []
1313    for pattern in patterns.values():
1314        if not pattern[0] or not pattern[1]:
1315            continue
1316
1317        if len(pattern[0]) == len(pattern[1]) == 1:
1318            candidates.append((pattern[0][0], pattern[1][0]))
1319        else:
1320            multiple_patterns.append(pattern)
1321
1322    multiple_patterns.sort(key=lambda pattern: len(pattern[0]) * len(pattern[1]))
1323
1324    for pattern in multiple_patterns:
1325        if len(candidates) + len(pattern[0]) * len(pattern[1]) > 2 * (
1326            len(lhs) + len(rhs)
1327        ):
1328            break
1329        for lhs_idx in pattern[0]:
1330            for rhs_idx in pattern[1]:
1331                candidates.append((lhs_idx, rhs_idx))
1332
1333    if not candidates:
1334        # The LHS and RHS either share nothing in common, or lines are just too
1335        # identical. In that case, let's give up and not match anything.
1336        return []
1337
1338    # Compute a maximal crossing-free matching via an algorithm that is
1339    # inspired by a mixture of dynamic programming and line-sweeping in
1340    # discrete geometry.
1341    #
1342    # I would be surprised if this algorithm didn't exist somewhere in the
1343    # literature, but I found it without consciously recalling any
1344    # references, so you'll have to make do with the explanation below.
1345    # Sorry.
1346    #
1347    # The underlying graph is bipartite:
1348    #  - nodes on the LHS represent lines in the original check
1349    #  - nodes on the RHS represent lines in the new (updated) check
1350    #
1351    # Nodes are implicitly sorted by the corresponding line number.
1352    # Edges (unique_matches) are sorted by the line number on the LHS.
1353    #
1354    # Here's the geometric intuition for the algorithm.
1355    #
1356    #  * Plot the edges as points in the plane, with the original line
1357    #    number on the X axis and the updated line number on the Y axis.
1358    #  * The goal is to find a longest "chain" of points where each point
1359    #    is strictly above and to the right of the previous point.
1360    #  * The algorithm proceeds by sweeping a vertical line from left to
1361    #    right.
1362    #  * The algorithm maintains a table where `table[N]` answers the
1363    #    question "What is currently the 'best' way to build a chain of N+1
1364    #    points to the left of the vertical line". Here, 'best' means
1365    #    that the last point of the chain is a as low as possible (minimal
1366    #    Y coordinate).
1367    #   * `table[N]` is `(y, point_idx)` where `point_idx` is the index of
1368    #     the last point in the chain and `y` is its Y coordinate
1369    #   * A key invariant is that the Y values in the table are
1370    #     monotonically increasing
1371    #  * Thanks to these properties, the table can be used to answer the
1372    #    question "What is the longest chain that can be built to the left
1373    #    of the vertical line using only points below a certain Y value",
1374    #    using a binary search over the table.
1375    #  * The algorithm also builds a backlink structure in which every point
1376    #    links back to the previous point on a best (longest) chain ending
1377    #    at that point
1378    #
1379    # The core loop of the algorithm sweeps the line and updates the table
1380    # and backlink structure for every point that we cross during the sweep.
1381    # Therefore, the algorithm is trivially O(M log M) in the number of
1382    # points.
1383    candidates.sort(key=lambda candidate: (candidate[0], -candidate[1]))
1384
1385    backlinks = []
1386    table_rhs_idx = []
1387    table_candidate_idx = []
1388    for _, rhs_idx in candidates:
1389        candidate_idx = len(backlinks)
1390        ti = bisect.bisect_left(table_rhs_idx, rhs_idx)
1391
1392        # Update the table to record a best chain ending in the current point.
1393        # There always is one, and if any of the previously visited points had
1394        # a higher Y coordinate, then there is always a previously recorded best
1395        # chain that can be improved upon by using the current point.
1396        #
1397        # There is only one case where there is some ambiguity. If the
1398        # pre-existing entry table[ti] has the same Y coordinate / rhs_idx as
1399        # the current point (this can only happen if the same line appeared
1400        # multiple times on the LHS), then we could choose to keep the
1401        # previously recorded best chain instead. That would bias the algorithm
1402        # differently but should have no systematic impact on the quality of the
1403        # result.
1404        if ti < len(table_rhs_idx):
1405            table_rhs_idx[ti] = rhs_idx
1406            table_candidate_idx[ti] = candidate_idx
1407        else:
1408            table_rhs_idx.append(rhs_idx)
1409            table_candidate_idx.append(candidate_idx)
1410        if ti > 0:
1411            backlinks.append(table_candidate_idx[ti - 1])
1412        else:
1413            backlinks.append(None)
1414
1415    # Commit to names in the matching by walking the backlinks. Recursively
1416    # attempt to fill in more matches in-between.
1417    match_idx = table_candidate_idx[-1]
1418    while match_idx is not None:
1419        current = candidates[match_idx]
1420        matches.append(current)
1421        match_idx = backlinks[match_idx]
1422
1423    matches.reverse()
1424    return matches
1425
1426
1427VARIABLE_TAG = "[[@@]]"
1428METAVAR_RE = re.compile(r"\[\[([A-Z0-9_]+)(?::[^]]+)?\]\]")
1429NUMERIC_SUFFIX_RE = re.compile(r"[0-9]*$")
1430
1431
1432class TestVar:
1433    def __init__(self, nameless_value: NamelessValue, prefix: str, suffix: str):
1434        self._nameless_value = nameless_value
1435
1436        self._prefix = prefix
1437        self._suffix = suffix
1438
1439    def seen(self, nameless_value: NamelessValue, prefix: str, suffix: str):
1440        if prefix != self._prefix:
1441            self._prefix = ""
1442        if suffix != self._suffix:
1443            self._suffix = ""
1444
1445    def get_variable_name(self, text):
1446        return self._nameless_value.get_value_name(
1447            text, self._nameless_value.check_prefix
1448        )
1449
1450    def get_def(self, name, prefix, suffix):
1451        if self._nameless_value.is_number:
1452            return f"{prefix}[[#{name}:]]{suffix}"
1453        if self._prefix:
1454            assert self._prefix == prefix
1455            prefix = ""
1456        if self._suffix:
1457            assert self._suffix == suffix
1458            suffix = ""
1459        return f"{prefix}[[{name}:{self._prefix}{self._nameless_value.get_ir_regex()}{self._suffix}]]{suffix}"
1460
1461    def get_use(self, name, prefix, suffix):
1462        if self._nameless_value.is_number:
1463            return f"{prefix}[[#{name}]]{suffix}"
1464        if self._prefix:
1465            assert self._prefix == prefix
1466            prefix = ""
1467        if self._suffix:
1468            assert self._suffix == suffix
1469            suffix = ""
1470        return f"{prefix}[[{name}]]{suffix}"
1471
1472
1473class CheckValueInfo:
1474    def __init__(
1475        self,
1476        key,
1477        text,
1478        name: str,
1479        prefix: str,
1480        suffix: str,
1481    ):
1482        # Key for the value, e.g. '%'
1483        self.key = key
1484
1485        # Text to be matched by the FileCheck variable (without any prefix or suffix)
1486        self.text = text
1487
1488        # Name of the FileCheck variable
1489        self.name = name
1490
1491        # Prefix and suffix that were captured by the NamelessValue regular expression
1492        self.prefix = prefix
1493        self.suffix = suffix
1494
1495
1496# Represent a check line in a way that allows us to compare check lines while
1497# ignoring some or all of the FileCheck variable names.
1498class CheckLineInfo:
1499    def __init__(self, line, values):
1500        # Line with all FileCheck variable name occurrences replaced by VARIABLE_TAG
1501        self.line: str = line
1502
1503        # Information on each FileCheck variable name occurrences in the line
1504        self.values: List[CheckValueInfo] = values
1505
1506    def __repr__(self):
1507        return f"CheckLineInfo(line={self.line}, self.values={self.values})"
1508
1509
1510def remap_metavar_names(
1511    old_line_infos: List[CheckLineInfo],
1512    new_line_infos: List[CheckLineInfo],
1513    committed_names: Set[str],
1514) -> Mapping[str, str]:
1515    """
1516    Map all FileCheck variable names that appear in new_line_infos to new
1517    FileCheck variable names in an attempt to reduce the diff from old_line_infos
1518    to new_line_infos.
1519
1520    This is done by:
1521    * Matching old check lines and new check lines using a diffing algorithm
1522      applied after replacing names with wildcards.
1523    * Committing to variable names such that the matched lines become equal
1524      (without wildcards) if possible
1525    * This is done recursively to handle cases where many lines are equal
1526      after wildcard replacement
1527    """
1528    # Initialize uncommitted identity mappings
1529    new_mapping = {}
1530    for line in new_line_infos:
1531        for value in line.values:
1532            new_mapping[value.name] = value.name
1533
1534    # Recursively commit to the identity mapping or find a better one
1535    def recurse(old_begin, old_end, new_begin, new_end):
1536        if old_begin == old_end or new_begin == new_end:
1537            return
1538
1539        # Find a matching of lines where uncommitted names are replaced
1540        # with a placeholder.
1541        def diffify_line(line, mapper):
1542            values = []
1543            for value in line.values:
1544                mapped = mapper(value.name)
1545                values.append(mapped if mapped in committed_names else "?")
1546            return line.line.strip() + " @@@ " + " @ ".join(values)
1547
1548        lhs_lines = [
1549            diffify_line(line, lambda x: x)
1550            for line in old_line_infos[old_begin:old_end]
1551        ]
1552        rhs_lines = [
1553            diffify_line(line, lambda x: new_mapping[x])
1554            for line in new_line_infos[new_begin:new_end]
1555        ]
1556
1557        candidate_matches = find_diff_matching(lhs_lines, rhs_lines)
1558
1559        candidate_matches = [
1560            (old_begin + lhs_idx, new_begin + rhs_idx)
1561            for lhs_idx, rhs_idx in candidate_matches
1562        ]
1563
1564        # Candidate matches may conflict if they require conflicting mappings of
1565        # names. We want to determine a large set of compatible candidates,
1566        # because that leads to a small diff.
1567        #
1568        # We think of the candidates as vertices in a conflict graph. The
1569        # conflict graph has edges between incompatible candidates. We want to
1570        # find a large independent set in this graph.
1571        #
1572        # Greedily selecting candidates and removing incompatible ones has the
1573        # disadvantage that making few bad decisions early on can have huge
1574        # consequences.
1575        #
1576        # Instead, we implicitly compute multiple independent sets by greedily
1577        # assigning a *coloring* to the conflict graph. Then, we select the
1578        # largest color class (which is the largest independent set we found),
1579        # commit to all candidates in it, and recurse.
1580        #
1581        # Note that we don't actually materialize the conflict graph. Instead,
1582        # each color class tracks the information needed to decide implicitly
1583        # whether a vertex conflicts (has an edge to) any of the vertices added
1584        # to the color class so far.
1585        class Color:
1586            def __init__(self):
1587                # (lhs_idx, rhs_idx) of matches in this color
1588                self.matches = []
1589
1590                # rhs_name -> lhs_name mappings required by this color
1591                self.mapping = {}
1592
1593                # lhs_names committed for this color
1594                self.committed = set()
1595
1596        colors = []
1597
1598        for lhs_idx, rhs_idx in candidate_matches:
1599            lhs_line = old_line_infos[lhs_idx]
1600            rhs_line = new_line_infos[rhs_idx]
1601
1602            # We scan through the uncommitted names in the candidate line and
1603            # filter out the color classes to which the candidate could be
1604            # assigned.
1605            #
1606            # Simultaneously, we prepare a new color class in case the candidate
1607            # conflicts with all colors that have been established so far.
1608            compatible_colors = colors[:]
1609            new_color = Color()
1610            new_color.matches.append((lhs_idx, rhs_idx))
1611
1612            for lhs_value, rhs_value in zip(lhs_line.values, rhs_line.values):
1613                if new_mapping[rhs_value.name] in committed_names:
1614                    # The new value has already been committed. If it was mapped
1615                    # to the same name as the original value, we can consider
1616                    # committing other values from this line. Otherwise, we
1617                    # should ignore this line.
1618                    if new_mapping[rhs_value.name] == lhs_value.name:
1619                        continue
1620                    else:
1621                        break
1622
1623                if rhs_value.name in new_color.mapping:
1624                    # Same, but for a possible commit happening on the same line
1625                    if new_color.mapping[rhs_value.name] == lhs_value.name:
1626                        continue
1627                    else:
1628                        break
1629
1630                if (
1631                    lhs_value.name in committed_names
1632                    or lhs_value.name in new_color.committed
1633                ):
1634                    # We can't map this value because the name we would map it
1635                    # to has already been committed for something else. Give up
1636                    # on this line.
1637                    break
1638
1639                new_color.mapping[rhs_value.name] = lhs_value.name
1640                new_color.committed.add(lhs_value.name)
1641
1642                color_idx = 0
1643                while color_idx < len(compatible_colors):
1644                    color = compatible_colors[color_idx]
1645                    compatible = True
1646                    if rhs_value.name in color.mapping:
1647                        compatible = color.mapping[rhs_value.name] == lhs_value.name
1648                    else:
1649                        compatible = lhs_value.name not in color.committed
1650                    if compatible:
1651                        color_idx += 1
1652                    else:
1653                        del compatible_colors[color_idx]
1654            else:
1655                # We never broke out of the loop, which means that at a minimum,
1656                # this line is viable standalone
1657                if compatible_colors:
1658                    color = max(compatible_colors, key=lambda color: len(color.matches))
1659                    color.mapping.update(new_color.mapping)
1660                    color.committed.update(new_color.committed)
1661                    color.matches.append((lhs_idx, rhs_idx))
1662                else:
1663                    colors.append(new_color)
1664
1665        if colors:
1666            # Pick the largest color class. This gives us a large independent
1667            # (non-conflicting) set of candidate matches. Assign all names
1668            # required by the independent set and recurse.
1669            max_color = max(colors, key=lambda color: len(color.matches))
1670
1671            for rhs_var, lhs_var in max_color.mapping.items():
1672                new_mapping[rhs_var] = lhs_var
1673                committed_names.add(lhs_var)
1674
1675                if (
1676                    lhs_var != rhs_var
1677                    and lhs_var in new_mapping
1678                    and new_mapping[lhs_var] == lhs_var
1679                ):
1680                    new_mapping[lhs_var] = "conflict_" + lhs_var
1681
1682            matches = (
1683                [(old_begin - 1, new_begin - 1)]
1684                + max_color.matches
1685                + [(old_end, new_end)]
1686            )
1687
1688            for (lhs_prev, rhs_prev), (lhs_next, rhs_next) in zip(matches, matches[1:]):
1689                recurse(lhs_prev + 1, lhs_next, rhs_prev + 1, rhs_next)
1690
1691    recurse(0, len(old_line_infos), 0, len(new_line_infos))
1692
1693    # Commit to remaining names and resolve conflicts
1694    for new_name, mapped_name in new_mapping.items():
1695        if mapped_name in committed_names:
1696            continue
1697        if not mapped_name.startswith("conflict_"):
1698            assert mapped_name == new_name
1699            committed_names.add(mapped_name)
1700
1701    for new_name, mapped_name in new_mapping.items():
1702        if mapped_name in committed_names:
1703            continue
1704        assert mapped_name.startswith("conflict_")
1705
1706        m = NUMERIC_SUFFIX_RE.search(new_name)
1707        base_name = new_name[: m.start()]
1708        suffix = int(new_name[m.start() :]) if m.start() != m.end() else 1
1709        while True:
1710            candidate = f"{base_name}{suffix}"
1711            if candidate not in committed_names:
1712                new_mapping[new_name] = candidate
1713                committed_names.add(candidate)
1714                break
1715            suffix += 1
1716
1717    return new_mapping
1718
1719
1720def generalize_check_lines(
1721    lines,
1722    ginfo: GeneralizerInfo,
1723    vars_seen,
1724    global_vars_seen,
1725    preserve_names=False,
1726    original_check_lines=None,
1727    *,
1728    unstable_globals_only=False,
1729):
1730    if unstable_globals_only:
1731        regexp = ginfo.get_unstable_globals_regexp()
1732    else:
1733        regexp = ginfo.get_regexp()
1734
1735    multiple_braces_re = re.compile(r"({{+)|(}}+)")
1736
1737    def escape_braces(match_obj):
1738        return "{{" + re.escape(match_obj.group(0)) + "}}"
1739
1740    if ginfo.is_ir():
1741        for i, line in enumerate(lines):
1742            # An IR variable named '%.' matches the FileCheck regex string.
1743            line = line.replace("%.", "%dot")
1744            for regex in _global_hex_value_regex:
1745                if re.match("^@" + regex + " = ", line):
1746                    line = re.sub(
1747                        r"\bi([0-9]+) ([0-9]+)",
1748                        lambda m: "i"
1749                        + m.group(1)
1750                        + " [[#"
1751                        + hex(int(m.group(2)))
1752                        + "]]",
1753                        line,
1754                    )
1755                    break
1756            # Ignore any comments, since the check lines will too.
1757            scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line)
1758            lines[i] = scrubbed_line
1759
1760    if not preserve_names:
1761        committed_names = set(
1762            test_var.get_variable_name(name)
1763            for (name, _), test_var in vars_seen.items()
1764        )
1765        defs = set()
1766
1767        # Collect information about new check lines, and generalize global reference
1768        new_line_infos = []
1769        for line in lines:
1770            filtered_line = ""
1771            values = []
1772            while True:
1773                m = regexp.search(line)
1774                if m is None:
1775                    filtered_line += line
1776                    break
1777
1778                name = ginfo.get_name_from_match(m)
1779                nameless_value = ginfo.get_nameless_value_from_match(m)
1780                prefix, suffix = nameless_value.get_affixes_from_match(m)
1781                if may_clash_with_default_check_prefix_name(
1782                    nameless_value.check_prefix, name
1783                ):
1784                    warn(
1785                        "Change IR value name '%s' or use --prefix-filecheck-ir-name to prevent possible conflict"
1786                        " with scripted FileCheck name." % (name,)
1787                    )
1788
1789                # Record the variable as seen and (for locals) accumulate
1790                # prefixes/suffixes
1791                is_local_def = nameless_value.is_local_def_ir_value()
1792                if is_local_def:
1793                    vars_dict = vars_seen
1794                else:
1795                    vars_dict = global_vars_seen
1796
1797                key = (name, nameless_value.check_key)
1798
1799                if is_local_def:
1800                    test_prefix = prefix
1801                    test_suffix = suffix
1802                else:
1803                    test_prefix = ""
1804                    test_suffix = ""
1805
1806                if key in vars_dict:
1807                    vars_dict[key].seen(nameless_value, test_prefix, test_suffix)
1808                else:
1809                    vars_dict[key] = TestVar(nameless_value, test_prefix, test_suffix)
1810                    defs.add(key)
1811
1812                var = vars_dict[key].get_variable_name(name)
1813
1814                # Replace with a [[@@]] tag, but be sure to keep the spaces and commas.
1815                filtered_line += (
1816                    line[: m.start()] + m.group(1) + VARIABLE_TAG + m.group(m.lastindex)
1817                )
1818                line = line[m.end() :]
1819
1820                values.append(
1821                    CheckValueInfo(
1822                        key=nameless_value.check_key,
1823                        text=name,
1824                        name=var,
1825                        prefix=prefix,
1826                        suffix=suffix,
1827                    )
1828                )
1829
1830            new_line_infos.append(CheckLineInfo(filtered_line, values))
1831
1832        committed_names.update(
1833            test_var.get_variable_name(name)
1834            for (name, _), test_var in global_vars_seen.items()
1835        )
1836
1837        # Collect information about original check lines, if any.
1838        orig_line_infos = []
1839        for line in original_check_lines or []:
1840            filtered_line = ""
1841            values = []
1842            while True:
1843                m = METAVAR_RE.search(line)
1844                if m is None:
1845                    filtered_line += line
1846                    break
1847
1848                # Replace with a [[@@]] tag, but be sure to keep the spaces and commas.
1849                filtered_line += line[: m.start()] + VARIABLE_TAG
1850                line = line[m.end() :]
1851                values.append(
1852                    CheckValueInfo(
1853                        key=None,
1854                        text=None,
1855                        name=m.group(1),
1856                        prefix="",
1857                        suffix="",
1858                    )
1859                )
1860            orig_line_infos.append(CheckLineInfo(filtered_line, values))
1861
1862        # Compute the variable name mapping
1863        mapping = remap_metavar_names(orig_line_infos, new_line_infos, committed_names)
1864
1865        # Apply the variable name mapping
1866        for i, line_info in enumerate(new_line_infos):
1867            line_template = line_info.line
1868            line = ""
1869
1870            for value in line_info.values:
1871                idx = line_template.find(VARIABLE_TAG)
1872                line += line_template[:idx]
1873                line_template = line_template[idx + len(VARIABLE_TAG) :]
1874
1875                key = (value.text, value.key)
1876                if value.key == "%":
1877                    vars_dict = vars_seen
1878                else:
1879                    vars_dict = global_vars_seen
1880
1881                if key in defs:
1882                    line += vars_dict[key].get_def(
1883                        mapping[value.name], value.prefix, value.suffix
1884                    )
1885                    defs.remove(key)
1886                else:
1887                    line += vars_dict[key].get_use(
1888                        mapping[value.name], value.prefix, value.suffix
1889                    )
1890
1891            line += line_template
1892
1893            lines[i] = line
1894
1895    if ginfo.is_analyze():
1896        for i, _ in enumerate(lines):
1897            # Escape multiple {{ or }} as {{}} denotes a FileCheck regex.
1898            scrubbed_line = multiple_braces_re.sub(escape_braces, lines[i])
1899            lines[i] = scrubbed_line
1900
1901    return lines
1902
1903
1904def add_checks(
1905    output_lines,
1906    comment_marker,
1907    prefix_list,
1908    func_dict,
1909    func_name,
1910    check_label_format,
1911    ginfo,
1912    global_vars_seen_dict,
1913    is_filtered,
1914    preserve_names=False,
1915    original_check_lines: Mapping[str, List[str]] = {},
1916):
1917    # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well.
1918    prefix_exclusions = set()
1919    printed_prefixes = []
1920    for p in prefix_list:
1921        checkprefixes = p[0]
1922        # If not all checkprefixes of this run line produced the function we cannot check for it as it does not
1923        # exist for this run line. A subset of the check prefixes might know about the function but only because
1924        # other run lines created it.
1925        if any(
1926            map(
1927                lambda checkprefix: func_name not in func_dict[checkprefix],
1928                checkprefixes,
1929            )
1930        ):
1931            prefix_exclusions |= set(checkprefixes)
1932            continue
1933
1934    # prefix_exclusions is constructed, we can now emit the output
1935    for p in prefix_list:
1936        global_vars_seen = {}
1937        checkprefixes = p[0]
1938        for checkprefix in checkprefixes:
1939            if checkprefix in global_vars_seen_dict:
1940                global_vars_seen.update(global_vars_seen_dict[checkprefix])
1941            else:
1942                global_vars_seen_dict[checkprefix] = {}
1943            if checkprefix in printed_prefixes:
1944                break
1945
1946            # Check if the prefix is excluded.
1947            if checkprefix in prefix_exclusions:
1948                continue
1949
1950            # If we do not have output for this prefix we skip it.
1951            if not func_dict[checkprefix][func_name]:
1952                continue
1953
1954            # Add some space between different check prefixes, but not after the last
1955            # check line (before the test code).
1956            if ginfo.is_asm():
1957                if len(printed_prefixes) != 0:
1958                    output_lines.append(comment_marker)
1959
1960            if checkprefix not in global_vars_seen_dict:
1961                global_vars_seen_dict[checkprefix] = {}
1962
1963            global_vars_seen_before = [key for key in global_vars_seen.keys()]
1964
1965            vars_seen = {}
1966            printed_prefixes.append(checkprefix)
1967            attrs = str(func_dict[checkprefix][func_name].attrs)
1968            attrs = "" if attrs == "None" else attrs
1969            if ginfo.get_version() > 1:
1970                funcdef_attrs_and_ret = func_dict[checkprefix][
1971                    func_name
1972                ].funcdef_attrs_and_ret
1973            else:
1974                funcdef_attrs_and_ret = ""
1975
1976            if attrs:
1977                output_lines.append(
1978                    "%s %s: Function Attrs: %s" % (comment_marker, checkprefix, attrs)
1979                )
1980            args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig)
1981            if args_and_sig:
1982                args_and_sig = generalize_check_lines(
1983                    [args_and_sig],
1984                    ginfo,
1985                    vars_seen,
1986                    global_vars_seen,
1987                    preserve_names,
1988                    original_check_lines=[],
1989                )[0]
1990            func_name_separator = func_dict[checkprefix][func_name].func_name_separator
1991            if "[[" in args_and_sig:
1992                # Captures in label lines are not supported, thus split into a -LABEL
1993                # and a separate -SAME line that contains the arguments with captures.
1994                args_and_sig_prefix = ""
1995                if ginfo.get_version() >= 3 and args_and_sig.startswith("("):
1996                    # Ensure the "(" separating function name and arguments is in the
1997                    # label line. This is required in case of function names that are
1998                    # prefixes of each other. Otherwise, the label line for "foo" might
1999                    # incorrectly match on "foo.specialized".
2000                    args_and_sig_prefix = args_and_sig[0]
2001                    args_and_sig = args_and_sig[1:]
2002
2003                # Removing args_and_sig from the label match line requires
2004                # func_name_separator to be empty. Otherwise, the match will not work.
2005                assert func_name_separator == ""
2006                output_lines.append(
2007                    check_label_format
2008                    % (
2009                        checkprefix,
2010                        funcdef_attrs_and_ret,
2011                        func_name,
2012                        args_and_sig_prefix,
2013                        func_name_separator,
2014                    )
2015                )
2016                output_lines.append(
2017                    "%s %s-SAME: %s" % (comment_marker, checkprefix, args_and_sig)
2018                )
2019            else:
2020                output_lines.append(
2021                    check_label_format
2022                    % (
2023                        checkprefix,
2024                        funcdef_attrs_and_ret,
2025                        func_name,
2026                        args_and_sig,
2027                        func_name_separator,
2028                    )
2029                )
2030            func_body = str(func_dict[checkprefix][func_name]).splitlines()
2031            if not func_body:
2032                # We have filtered everything.
2033                continue
2034
2035            # For ASM output, just emit the check lines.
2036            if ginfo.is_asm():
2037                body_start = 1
2038                if is_filtered:
2039                    # For filtered output we don't add "-NEXT" so don't add extra spaces
2040                    # before the first line.
2041                    body_start = 0
2042                else:
2043                    output_lines.append(
2044                        "%s %s:       %s" % (comment_marker, checkprefix, func_body[0])
2045                    )
2046                func_lines = generalize_check_lines(
2047                    func_body[body_start:], ginfo, vars_seen, global_vars_seen
2048                )
2049                for func_line in func_lines:
2050                    if func_line.strip() == "":
2051                        output_lines.append(
2052                            "%s %s-EMPTY:" % (comment_marker, checkprefix)
2053                        )
2054                    else:
2055                        check_suffix = "-NEXT" if not is_filtered else ""
2056                        output_lines.append(
2057                            "%s %s%s:  %s"
2058                            % (comment_marker, checkprefix, check_suffix, func_line)
2059                        )
2060                # Remember new global variables we have not seen before
2061                for key in global_vars_seen:
2062                    if key not in global_vars_seen_before:
2063                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2064                break
2065            # For analyze output, generalize the output, and emit CHECK-EMPTY lines as well.
2066            elif ginfo.is_analyze():
2067                func_body = generalize_check_lines(
2068                    func_body, ginfo, vars_seen, global_vars_seen
2069                )
2070                for func_line in func_body:
2071                    if func_line.strip() == "":
2072                        output_lines.append(
2073                            "{} {}-EMPTY:".format(comment_marker, checkprefix)
2074                        )
2075                    else:
2076                        check_suffix = "-NEXT" if not is_filtered else ""
2077                        output_lines.append(
2078                            "{} {}{}:  {}".format(
2079                                comment_marker, checkprefix, check_suffix, func_line
2080                            )
2081                        )
2082
2083                # Add space between different check prefixes and also before the first
2084                # line of code in the test function.
2085                output_lines.append(comment_marker)
2086
2087                # Remember new global variables we have not seen before
2088                for key in global_vars_seen:
2089                    if key not in global_vars_seen_before:
2090                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2091                break
2092            # For IR output, change all defs to FileCheck variables, so we're immune
2093            # to variable naming fashions.
2094            else:
2095                func_body = generalize_check_lines(
2096                    func_body,
2097                    ginfo,
2098                    vars_seen,
2099                    global_vars_seen,
2100                    preserve_names,
2101                    original_check_lines=original_check_lines.get(checkprefix),
2102                )
2103
2104                # This could be selectively enabled with an optional invocation argument.
2105                # Disabled for now: better to check everything. Be safe rather than sorry.
2106
2107                # Handle the first line of the function body as a special case because
2108                # it's often just noise (a useless asm comment or entry label).
2109                # if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
2110                #  is_blank_line = True
2111                # else:
2112                #  output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
2113                #  is_blank_line = False
2114
2115                is_blank_line = False
2116
2117                for func_line in func_body:
2118                    if func_line.strip() == "":
2119                        is_blank_line = True
2120                        continue
2121                    # Do not waste time checking IR comments.
2122                    func_line = SCRUB_IR_COMMENT_RE.sub(r"", func_line)
2123
2124                    # Skip blank lines instead of checking them.
2125                    if is_blank_line:
2126                        output_lines.append(
2127                            "{} {}:       {}".format(
2128                                comment_marker, checkprefix, func_line
2129                            )
2130                        )
2131                    else:
2132                        check_suffix = "-NEXT" if not is_filtered else ""
2133                        output_lines.append(
2134                            "{} {}{}:  {}".format(
2135                                comment_marker, checkprefix, check_suffix, func_line
2136                            )
2137                        )
2138                    is_blank_line = False
2139
2140                # Add space between different check prefixes and also before the first
2141                # line of code in the test function.
2142                output_lines.append(comment_marker)
2143
2144                # Remember new global variables we have not seen before
2145                for key in global_vars_seen:
2146                    if key not in global_vars_seen_before:
2147                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2148                break
2149    return printed_prefixes
2150
2151
2152def add_ir_checks(
2153    output_lines,
2154    comment_marker,
2155    prefix_list,
2156    func_dict,
2157    func_name,
2158    preserve_names,
2159    function_sig,
2160    ginfo: GeneralizerInfo,
2161    global_vars_seen_dict,
2162    is_filtered,
2163    original_check_lines={},
2164):
2165    assert ginfo.is_ir()
2166    # Label format is based on IR string.
2167    if function_sig and ginfo.get_version() > 1:
2168        function_def_regex = "define %s"
2169    elif function_sig:
2170        function_def_regex = "define {{[^@]+}}%s"
2171    else:
2172        function_def_regex = "%s"
2173    check_label_format = "{} %s-LABEL: {}@%s%s%s".format(
2174        comment_marker, function_def_regex
2175    )
2176    return add_checks(
2177        output_lines,
2178        comment_marker,
2179        prefix_list,
2180        func_dict,
2181        func_name,
2182        check_label_format,
2183        ginfo,
2184        global_vars_seen_dict,
2185        is_filtered,
2186        preserve_names,
2187        original_check_lines=original_check_lines,
2188    )
2189
2190
2191def add_analyze_checks(
2192    output_lines,
2193    comment_marker,
2194    prefix_list,
2195    func_dict,
2196    func_name,
2197    ginfo: GeneralizerInfo,
2198    is_filtered,
2199):
2200    assert ginfo.is_analyze()
2201    check_label_format = "{} %s-LABEL: '%s%s%s%s'".format(comment_marker)
2202    global_vars_seen_dict = {}
2203    return add_checks(
2204        output_lines,
2205        comment_marker,
2206        prefix_list,
2207        func_dict,
2208        func_name,
2209        check_label_format,
2210        ginfo,
2211        global_vars_seen_dict,
2212        is_filtered,
2213    )
2214
2215
2216def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes, ginfo):
2217    for nameless_value in ginfo.get_nameless_values():
2218        if nameless_value.global_ir_rhs_regexp is None:
2219            continue
2220
2221        lhs_re_str = nameless_value.ir_prefix + nameless_value.ir_regexp
2222        rhs_re_str = nameless_value.global_ir_rhs_regexp
2223
2224        global_ir_value_re_str = r"^" + lhs_re_str + r"\s=\s" + rhs_re_str + r"$"
2225        global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M))
2226        lines = []
2227        for m in global_ir_value_re.finditer(raw_tool_output):
2228            # Attach the substring's start index so that CHECK lines
2229            # can be sorted properly even if they are matched by different nameless values.
2230            # This is relevant for GLOB and GLOBNAMED since they may appear interlaced.
2231            lines.append((m.start(), m.group(0)))
2232
2233        for prefix in prefixes:
2234            if glob_val_dict[prefix] is None:
2235                continue
2236            if nameless_value.check_prefix in glob_val_dict[prefix]:
2237                if lines == glob_val_dict[prefix][nameless_value.check_prefix]:
2238                    continue
2239                if prefix == prefixes[-1]:
2240                    warn("Found conflicting asm under the same prefix: %r!" % (prefix,))
2241                else:
2242                    glob_val_dict[prefix][nameless_value.check_prefix] = None
2243                    continue
2244            glob_val_dict[prefix][nameless_value.check_prefix] = lines
2245
2246
2247def filter_globals_according_to_preference(
2248    global_val_lines_w_index, global_vars_seen, nameless_value, global_check_setting
2249):
2250    if global_check_setting == "none":
2251        return []
2252    if global_check_setting == "all":
2253        return global_val_lines_w_index
2254    assert global_check_setting == "smart"
2255
2256    if nameless_value.check_key == "#":
2257        # attribute sets are usually better checked by --check-attributes
2258        return []
2259
2260    def extract(line, nv):
2261        p = (
2262            "^"
2263            + nv.ir_prefix
2264            + "("
2265            + nv.ir_regexp
2266            + ") = ("
2267            + nv.global_ir_rhs_regexp
2268            + ")"
2269        )
2270        match = re.match(p, line)
2271        return (match.group(1), re.findall(nv.ir_regexp, match.group(2)))
2272
2273    transitively_visible = set()
2274    contains_refs_to = {}
2275
2276    def add(var):
2277        nonlocal transitively_visible
2278        nonlocal contains_refs_to
2279        if var in transitively_visible:
2280            return
2281        transitively_visible.add(var)
2282        if not var in contains_refs_to:
2283            return
2284        for x in contains_refs_to[var]:
2285            add(x)
2286
2287    for i, line in global_val_lines_w_index:
2288        (var, refs) = extract(line, nameless_value)
2289        contains_refs_to[var] = refs
2290    for var, check_key in global_vars_seen:
2291        if check_key != nameless_value.check_key:
2292            continue
2293        add(var)
2294    return [
2295        (i, line)
2296        for i, line in global_val_lines_w_index
2297        if extract(line, nameless_value)[0] in transitively_visible
2298    ]
2299
2300
2301METADATA_FILTERS = [
2302    (
2303        r"(?<=\")(.+ )?(\w+ version )[\d.]+(?:[^\" ]*)(?: \([^)]+\))?",
2304        r"{{.*}}\2{{.*}}",
2305    ),  # preface with glob also, to capture optional CLANG_VENDOR
2306    (r'(!DIFile\(filename: ".+", directory: )".+"', r"\1{{.*}}"),
2307]
2308METADATA_FILTERS_RE = [(re.compile(f), r) for (f, r) in METADATA_FILTERS]
2309
2310
2311def filter_unstable_metadata(line):
2312    for f, replacement in METADATA_FILTERS_RE:
2313        line = f.sub(replacement, line)
2314    return line
2315
2316
2317def flush_current_checks(output_lines, new_lines_w_index, comment_marker):
2318    if not new_lines_w_index:
2319        return
2320    output_lines.append(comment_marker + SEPARATOR)
2321    new_lines_w_index.sort()
2322    for _, line in new_lines_w_index:
2323        output_lines.append(line)
2324    new_lines_w_index.clear()
2325
2326
2327def add_global_checks(
2328    glob_val_dict,
2329    comment_marker,
2330    prefix_list,
2331    output_lines,
2332    ginfo: GeneralizerInfo,
2333    global_vars_seen_dict,
2334    preserve_names,
2335    is_before_functions,
2336    global_check_setting,
2337):
2338    printed_prefixes = set()
2339    output_lines_loc = {}  # Allows GLOB and GLOBNAMED to be sorted correctly
2340    for nameless_value in ginfo.get_nameless_values():
2341        if nameless_value.global_ir_rhs_regexp is None:
2342            continue
2343        if nameless_value.is_before_functions != is_before_functions:
2344            continue
2345        for p in prefix_list:
2346            global_vars_seen = {}
2347            checkprefixes = p[0]
2348            if checkprefixes is None:
2349                continue
2350            for checkprefix in checkprefixes:
2351                if checkprefix in global_vars_seen_dict:
2352                    global_vars_seen.update(global_vars_seen_dict[checkprefix])
2353                else:
2354                    global_vars_seen_dict[checkprefix] = {}
2355                if (checkprefix, nameless_value.check_prefix) in printed_prefixes:
2356                    break
2357                if not glob_val_dict[checkprefix]:
2358                    continue
2359                if nameless_value.check_prefix not in glob_val_dict[checkprefix]:
2360                    continue
2361                if not glob_val_dict[checkprefix][nameless_value.check_prefix]:
2362                    continue
2363
2364                check_lines = []
2365                global_vars_seen_before = [key for key in global_vars_seen.keys()]
2366                lines_w_index = glob_val_dict[checkprefix][nameless_value.check_prefix]
2367                lines_w_index = filter_globals_according_to_preference(
2368                    lines_w_index,
2369                    global_vars_seen_before,
2370                    nameless_value,
2371                    global_check_setting,
2372                )
2373                for i, line in lines_w_index:
2374                    if _global_value_regex:
2375                        matched = False
2376                        for regex in _global_value_regex:
2377                            if re.match("^@" + regex + " = ", line) or re.match(
2378                                "^!" + regex + " = ", line
2379                            ):
2380                                matched = True
2381                                break
2382                        if not matched:
2383                            continue
2384                    [new_line] = generalize_check_lines(
2385                        [line],
2386                        ginfo,
2387                        {},
2388                        global_vars_seen,
2389                        preserve_names,
2390                        unstable_globals_only=True,
2391                    )
2392                    new_line = filter_unstable_metadata(new_line)
2393                    check_line = "%s %s: %s" % (comment_marker, checkprefix, new_line)
2394                    check_lines.append((i, check_line))
2395                if not check_lines:
2396                    continue
2397
2398                if not checkprefix in output_lines_loc:
2399                    output_lines_loc[checkprefix] = []
2400                if not nameless_value.interlaced_with_previous:
2401                    flush_current_checks(
2402                        output_lines, output_lines_loc[checkprefix], comment_marker
2403                    )
2404                for check_line in check_lines:
2405                    output_lines_loc[checkprefix].append(check_line)
2406
2407                printed_prefixes.add((checkprefix, nameless_value.check_prefix))
2408
2409                # Remembe new global variables we have not seen before
2410                for key in global_vars_seen:
2411                    if key not in global_vars_seen_before:
2412                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2413                break
2414
2415    if printed_prefixes:
2416        for p in prefix_list:
2417            if p[0] is None:
2418                continue
2419            for checkprefix in p[0]:
2420                if checkprefix not in output_lines_loc:
2421                    continue
2422                flush_current_checks(
2423                    output_lines, output_lines_loc[checkprefix], comment_marker
2424                )
2425                break
2426        output_lines.append(comment_marker + SEPARATOR)
2427    return printed_prefixes
2428
2429
2430def check_prefix(prefix):
2431    if not PREFIX_RE.match(prefix):
2432        hint = ""
2433        if "," in prefix:
2434            hint = " Did you mean '--check-prefixes=" + prefix + "'?"
2435        warn(
2436            (
2437                "Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores."
2438                + hint
2439            )
2440            % (prefix)
2441        )
2442
2443
2444def get_check_prefixes(filecheck_cmd):
2445    check_prefixes = [
2446        item
2447        for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
2448        for item in m.group(1).split(",")
2449    ]
2450    if not check_prefixes:
2451        check_prefixes = ["CHECK"]
2452    return check_prefixes
2453
2454
2455def verify_filecheck_prefixes(fc_cmd):
2456    fc_cmd_parts = fc_cmd.split()
2457    for part in fc_cmd_parts:
2458        if "check-prefix=" in part:
2459            prefix = part.split("=", 1)[1]
2460            check_prefix(prefix)
2461        elif "check-prefixes=" in part:
2462            prefixes = part.split("=", 1)[1].split(",")
2463            for prefix in prefixes:
2464                check_prefix(prefix)
2465                if prefixes.count(prefix) > 1:
2466                    warn(
2467                        "Supplied prefix '%s' is not unique in the prefix list."
2468                        % (prefix,)
2469                    )
2470
2471
2472def get_autogennote_suffix(parser, args):
2473    autogenerated_note_args = ""
2474    for action in parser._actions:
2475        if not hasattr(args, action.dest):
2476            continue  # Ignore options such as --help that aren't included in args
2477        # Ignore parameters such as paths to the binary or the list of tests
2478        if action.dest in (
2479            "tests",
2480            "update_only",
2481            "tool_binary",
2482            "opt_binary",
2483            "llc_binary",
2484            "clang",
2485            "opt",
2486            "llvm_bin",
2487            "verbose",
2488            "force_update",
2489            "reset_variable_names",
2490            "llvm_mc_binary",
2491        ):
2492            continue
2493        value = getattr(args, action.dest)
2494        if action.dest == "check_globals":
2495            default_value = "none" if args.version < 4 else "smart"
2496            if value == default_value:
2497                continue
2498            autogenerated_note_args += action.option_strings[0] + " "
2499            if args.version < 4 and value == "all":
2500                continue
2501            autogenerated_note_args += "%s " % value
2502            continue
2503        if action.const is not None:  # action stores a constant (usually True/False)
2504            # Skip actions with different constant values (this happens with boolean
2505            # --foo/--no-foo options)
2506            if value != action.const:
2507                continue
2508        if parser.get_default(action.dest) == value:
2509            continue  # Don't add default values
2510        if action.dest == "function_signature" and args.version >= 2:
2511            continue  # Enabled by default in version 2
2512        if action.dest == "filters":
2513            # Create a separate option for each filter element.  The value is a list
2514            # of Filter objects.
2515            for elem in value:
2516                opt_name = "filter-out" if elem.is_filter_out else "filter"
2517                opt_value = elem.pattern()
2518                new_arg = '--%s "%s" ' % (opt_name, opt_value.strip('"'))
2519                if new_arg not in autogenerated_note_args:
2520                    autogenerated_note_args += new_arg
2521        else:
2522            autogenerated_note_args += action.option_strings[0] + " "
2523            if action.const is None:  # action takes a parameter
2524                if action.nargs == "+":
2525                    value = " ".join(map(lambda v: '"' + v.strip('"') + '"', value))
2526                autogenerated_note_args += "%s " % value
2527    if autogenerated_note_args:
2528        autogenerated_note_args = " %s %s" % (
2529            UTC_ARGS_KEY,
2530            autogenerated_note_args[:-1],
2531        )
2532    return autogenerated_note_args
2533
2534
2535def check_for_command(line, parser, args, argv, argparse_callback):
2536    cmd_m = UTC_ARGS_CMD.match(line)
2537    if cmd_m:
2538        for option in shlex.split(cmd_m.group("cmd").strip()):
2539            if option:
2540                argv.append(option)
2541        args = parse_args(parser, filter(lambda arg: arg not in args.tests, argv))
2542        if argparse_callback is not None:
2543            argparse_callback(args)
2544    return args, argv
2545
2546
2547def find_arg_in_test(test_info, get_arg_to_check, arg_string, is_global):
2548    result = get_arg_to_check(test_info.args)
2549    if not result and is_global:
2550        # See if this has been specified via UTC_ARGS.  This is a "global" option
2551        # that affects the entire generation of test checks.  If it exists anywhere
2552        # in the test, apply it to everything.
2553        saw_line = False
2554        for line_info in test_info.ro_iterlines():
2555            line = line_info.line
2556            if not line.startswith(";") and line.strip() != "":
2557                saw_line = True
2558            result = get_arg_to_check(line_info.args)
2559            if result:
2560                if warn and saw_line:
2561                    # We saw the option after already reading some test input lines.
2562                    # Warn about it.
2563                    print(
2564                        "WARNING: Found {} in line following test start: ".format(
2565                            arg_string
2566                        )
2567                        + line,
2568                        file=sys.stderr,
2569                    )
2570                    print(
2571                        "WARNING: Consider moving {} to top of file".format(arg_string),
2572                        file=sys.stderr,
2573                    )
2574                break
2575    return result
2576
2577
2578def dump_input_lines(output_lines, test_info, prefix_set, comment_string):
2579    for input_line_info in test_info.iterlines(output_lines):
2580        line = input_line_info.line
2581        args = input_line_info.args
2582        if line.strip() == comment_string:
2583            continue
2584        if line.strip() == comment_string + SEPARATOR:
2585            continue
2586        if line.lstrip().startswith(comment_string):
2587            m = CHECK_RE.match(line)
2588            if m and m.group(1) in prefix_set:
2589                continue
2590        output_lines.append(line.rstrip("\n"))
2591
2592
2593def add_checks_at_end(
2594    output_lines, prefix_list, func_order, comment_string, check_generator
2595):
2596    added = set()
2597    generated_prefixes = set()
2598    for prefix in prefix_list:
2599        prefixes = prefix[0]
2600        tool_args = prefix[1]
2601        for prefix in prefixes:
2602            for func in func_order[prefix]:
2603                # The func order can contain the same functions multiple times.
2604                # If we see one again we are done.
2605                if (func, prefix) in added:
2606                    continue
2607                if added:
2608                    output_lines.append(comment_string)
2609
2610                # The add_*_checks routines expect a run list whose items are
2611                # tuples that have a list of prefixes as their first element and
2612                # tool command args string as their second element.  They output
2613                # checks for each prefix in the list of prefixes.  By doing so, it
2614                # implicitly assumes that for each function every run line will
2615                # generate something for that function.  That is not the case for
2616                # generated functions as some run lines might not generate them
2617                # (e.g. -fopenmp vs. no -fopenmp).
2618                #
2619                # Therefore, pass just the prefix we're interested in.  This has
2620                # the effect of generating all of the checks for functions of a
2621                # single prefix before moving on to the next prefix.  So checks
2622                # are ordered by prefix instead of by function as in "normal"
2623                # mode.
2624                for generated_prefix in check_generator(
2625                    output_lines, [([prefix], tool_args)], func
2626                ):
2627                    added.add((func, generated_prefix))
2628                    generated_prefixes.add(generated_prefix)
2629    return generated_prefixes
2630