xref: /llvm-project/llvm/utils/UpdateTestChecks/common.py (revision 779be6fd1d5a7b9b93654abb363c28e7ff51483e)
1from __future__ import print_function
2
3import argparse
4import bisect
5import collections
6import copy
7import glob
8import itertools
9import os
10import re
11import subprocess
12import sys
13import shlex
14
15from typing import List, Mapping, Set
16
17##### Common utilities for update_*test_checks.py
18
19
20_verbose = False
21_prefix_filecheck_ir_name = ""
22
23"""
24Version changelog:
25
261: Initial version, used by tests that don't specify --version explicitly.
272: --function-signature is now enabled by default and also checks return
28   type/attributes.
293: Opening parenthesis of function args is kept on the first LABEL line
30   in case arguments are split to a separate SAME line.
314: --check-globals now has a third option ('smart'). The others are now called
32   'none' and 'all'. 'smart' is the default.
335: Basic block labels are matched by FileCheck expressions
34"""
35DEFAULT_VERSION = 5
36
37
38SUPPORTED_ANALYSES = {
39    "Branch Probability Analysis",
40    "Cost Model Analysis",
41    "Loop Access Analysis",
42    "Scalar Evolution Analysis",
43}
44
45
46class Regex(object):
47    """Wrap a compiled regular expression object to allow deep copy of a regexp.
48    This is required for the deep copy done in do_scrub.
49
50    """
51
52    def __init__(self, regex):
53        self.regex = regex
54
55    def __deepcopy__(self, memo):
56        result = copy.copy(self)
57        result.regex = self.regex
58        return result
59
60    def search(self, line):
61        return self.regex.search(line)
62
63    def sub(self, repl, line):
64        return self.regex.sub(repl, line)
65
66    def pattern(self):
67        return self.regex.pattern
68
69    def flags(self):
70        return self.regex.flags
71
72
73class Filter(Regex):
74    """Augment a Regex object with a flag indicating whether a match should be
75    added (!is_filter_out) or removed (is_filter_out) from the generated checks.
76
77    """
78
79    def __init__(self, regex, is_filter_out):
80        super(Filter, self).__init__(regex)
81        self.is_filter_out = is_filter_out
82
83    def __deepcopy__(self, memo):
84        result = copy.deepcopy(super(Filter, self), memo)
85        result.is_filter_out = copy.deepcopy(self.is_filter_out, memo)
86        return result
87
88
89def parse_commandline_args(parser):
90    class RegexAction(argparse.Action):
91        """Add a regular expression option value to a list of regular expressions.
92        This compiles the expression, wraps it in a Regex and adds it to the option
93        value list."""
94
95        def __init__(self, option_strings, dest, nargs=None, **kwargs):
96            if nargs is not None:
97                raise ValueError("nargs not allowed")
98            super(RegexAction, self).__init__(option_strings, dest, **kwargs)
99
100        def do_call(self, namespace, values, flags):
101            value_list = getattr(namespace, self.dest)
102            if value_list is None:
103                value_list = []
104
105            try:
106                value_list.append(Regex(re.compile(values, flags)))
107            except re.error as error:
108                raise ValueError(
109                    "{}: Invalid regular expression '{}' ({})".format(
110                        option_string, error.pattern, error.msg
111                    )
112                )
113
114            setattr(namespace, self.dest, value_list)
115
116        def __call__(self, parser, namespace, values, option_string=None):
117            self.do_call(namespace, values, 0)
118
119    class FilterAction(RegexAction):
120        """Add a filter to a list of filter option values."""
121
122        def __init__(self, option_strings, dest, nargs=None, **kwargs):
123            super(FilterAction, self).__init__(option_strings, dest, nargs, **kwargs)
124
125        def __call__(self, parser, namespace, values, option_string=None):
126            super(FilterAction, self).__call__(parser, namespace, values, option_string)
127
128            value_list = getattr(namespace, self.dest)
129
130            is_filter_out = option_string == "--filter-out"
131
132            value_list[-1] = Filter(value_list[-1].regex, is_filter_out)
133
134            setattr(namespace, self.dest, value_list)
135
136    filter_group = parser.add_argument_group(
137        "filtering",
138        """Filters are applied to each output line according to the order given. The
139    first matching filter terminates filter processing for that current line.""",
140    )
141
142    filter_group.add_argument(
143        "--filter",
144        action=FilterAction,
145        dest="filters",
146        metavar="REGEX",
147        help="Only include lines matching REGEX (may be specified multiple times)",
148    )
149    filter_group.add_argument(
150        "--filter-out",
151        action=FilterAction,
152        dest="filters",
153        metavar="REGEX",
154        help="Exclude lines matching REGEX",
155    )
156
157    parser.add_argument(
158        "--include-generated-funcs",
159        action="store_true",
160        help="Output checks for functions not in source",
161    )
162    parser.add_argument(
163        "-v", "--verbose", action="store_true", help="Show verbose output"
164    )
165    parser.add_argument(
166        "-u",
167        "--update-only",
168        action="store_true",
169        help="Only update test if it was already autogened",
170    )
171    parser.add_argument(
172        "--force-update",
173        action="store_true",
174        help="Update test even if it was autogened by a different script",
175    )
176    parser.add_argument(
177        "--enable",
178        action="store_true",
179        dest="enabled",
180        default=True,
181        help="Activate CHECK line generation from this point forward",
182    )
183    parser.add_argument(
184        "--disable",
185        action="store_false",
186        dest="enabled",
187        help="Deactivate CHECK line generation from this point forward",
188    )
189    parser.add_argument(
190        "--replace-value-regex",
191        nargs="+",
192        default=[],
193        help="List of regular expressions to replace matching value names",
194    )
195    parser.add_argument(
196        "--prefix-filecheck-ir-name",
197        default="",
198        help="Add a prefix to FileCheck IR value names to avoid conflicts with scripted names",
199    )
200    parser.add_argument(
201        "--global-value-regex",
202        nargs="+",
203        default=[],
204        help="List of regular expressions that a global value declaration must match to generate a check (has no effect if checking globals is not enabled)",
205    )
206    parser.add_argument(
207        "--global-hex-value-regex",
208        nargs="+",
209        default=[],
210        help="List of regular expressions such that, for matching global value declarations, literal integer values should be encoded in hex in the associated FileCheck directives",
211    )
212    # FIXME: in 3.9, we can use argparse.BooleanOptionalAction. At that point,
213    # we need to rename the flag to just -generate-body-for-unused-prefixes.
214    parser.add_argument(
215        "--no-generate-body-for-unused-prefixes",
216        action="store_false",
217        dest="gen_unused_prefix_body",
218        default=True,
219        help="Generate a function body that always matches for unused prefixes. This is useful when unused prefixes are desired, and it avoids needing to annotate each FileCheck as allowing them.",
220    )
221    # This is the default when regenerating existing tests. The default when
222    # generating new tests is determined by DEFAULT_VERSION.
223    parser.add_argument(
224        "--version", type=int, default=1, help="The version of output format"
225    )
226    args = parser.parse_args()
227    # TODO: This should not be handled differently from the other options
228    global _verbose, _global_value_regex, _global_hex_value_regex
229    _verbose = args.verbose
230    _global_value_regex = args.global_value_regex
231    _global_hex_value_regex = args.global_hex_value_regex
232    return args
233
234
235def parse_args(parser, argv):
236    args = parser.parse_args(argv)
237    if args.version >= 2:
238        args.function_signature = True
239    # TODO: This should not be handled differently from the other options
240    global _verbose, _global_value_regex, _global_hex_value_regex
241    _verbose = args.verbose
242    _global_value_regex = args.global_value_regex
243    _global_hex_value_regex = args.global_hex_value_regex
244    if "check_globals" in args and args.check_globals == "default":
245        args.check_globals = "none" if args.version < 4 else "smart"
246    return args
247
248
249class InputLineInfo(object):
250    def __init__(self, line, line_number, args, argv):
251        self.line = line
252        self.line_number = line_number
253        self.args = args
254        self.argv = argv
255
256
257class TestInfo(object):
258    def __init__(
259        self,
260        test,
261        parser,
262        script_name,
263        input_lines,
264        args,
265        argv,
266        comment_prefix,
267        argparse_callback,
268    ):
269        self.parser = parser
270        self.argparse_callback = argparse_callback
271        self.path = test
272        self.args = args
273        if args.prefix_filecheck_ir_name:
274            global _prefix_filecheck_ir_name
275            _prefix_filecheck_ir_name = args.prefix_filecheck_ir_name
276        self.argv = argv
277        self.input_lines = input_lines
278        self.run_lines = find_run_lines(test, self.input_lines)
279        self.comment_prefix = comment_prefix
280        if self.comment_prefix is None:
281            if self.path.endswith(".mir"):
282                self.comment_prefix = "#"
283            else:
284                self.comment_prefix = ";"
285        self.autogenerated_note_prefix = self.comment_prefix + " " + UTC_ADVERT
286        self.test_autogenerated_note = self.autogenerated_note_prefix + script_name
287        self.test_autogenerated_note += get_autogennote_suffix(parser, self.args)
288        self.test_unused_note = (
289            self.comment_prefix + self.comment_prefix + " " + UNUSED_NOTE
290        )
291
292    def ro_iterlines(self):
293        for line_num, input_line in enumerate(self.input_lines):
294            args, argv = check_for_command(
295                input_line, self.parser, self.args, self.argv, self.argparse_callback
296            )
297            yield InputLineInfo(input_line, line_num, args, argv)
298
299    def iterlines(self, output_lines):
300        output_lines.append(self.test_autogenerated_note)
301        for line_info in self.ro_iterlines():
302            input_line = line_info.line
303            # Discard any previous script advertising.
304            if input_line.startswith(self.autogenerated_note_prefix):
305                continue
306            self.args = line_info.args
307            self.argv = line_info.argv
308            if not self.args.enabled:
309                output_lines.append(input_line)
310                continue
311            yield line_info
312
313    def get_checks_for_unused_prefixes(
314        self, run_list, used_prefixes: List[str]
315    ) -> List[str]:
316        run_list = [element for element in run_list if element[0] is not None]
317        unused_prefixes = set(
318            [prefix for sublist in run_list for prefix in sublist[0]]
319        ).difference(set(used_prefixes))
320
321        ret = []
322        if not unused_prefixes:
323            return ret
324        ret.append(self.test_unused_note)
325        for unused in sorted(unused_prefixes):
326            ret.append(
327                "{comment} {prefix}: {match_everything}".format(
328                    comment=self.comment_prefix,
329                    prefix=unused,
330                    match_everything=r"""{{.*}}""",
331                )
332            )
333        return ret
334
335
336def itertests(
337    test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None
338):
339    for pattern in test_patterns:
340        # On Windows we must expand the patterns ourselves.
341        tests_list = glob.glob(pattern)
342        if not tests_list:
343            warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,))
344            continue
345        for test in tests_list:
346            with open(test) as f:
347                input_lines = [l.rstrip() for l in f]
348            first_line = input_lines[0] if input_lines else ""
349            if UTC_AVOID in first_line:
350                warn("Skipping test that must not be autogenerated: " + test)
351                continue
352            is_regenerate = UTC_ADVERT in first_line
353
354            # If we're generating a new test, set the default version to the latest.
355            argv = sys.argv[:]
356            if not is_regenerate:
357                argv.insert(1, "--version=" + str(DEFAULT_VERSION))
358
359            args = parse_args(parser, argv[1:])
360            if argparse_callback is not None:
361                argparse_callback(args)
362            if is_regenerate:
363                if script_name not in first_line and not args.force_update:
364                    warn(
365                        "Skipping test which wasn't autogenerated by " + script_name,
366                        test,
367                    )
368                    continue
369                args, argv = check_for_command(
370                    first_line, parser, args, argv, argparse_callback
371                )
372            elif args.update_only:
373                assert UTC_ADVERT not in first_line
374                warn("Skipping test which isn't autogenerated: " + test)
375                continue
376            final_input_lines = []
377            for l in input_lines:
378                if UNUSED_NOTE in l:
379                    break
380                final_input_lines.append(l)
381            yield TestInfo(
382                test,
383                parser,
384                script_name,
385                final_input_lines,
386                args,
387                argv,
388                comment_prefix,
389                argparse_callback,
390            )
391
392
393def should_add_line_to_output(
394    input_line,
395    prefix_set,
396    *,
397    skip_global_checks=False,
398    skip_same_checks=False,
399    comment_marker=";",
400):
401    # Skip any blank comment lines in the IR.
402    if not skip_global_checks and input_line.strip() == comment_marker:
403        return False
404    # Skip a special double comment line we use as a separator.
405    if input_line.strip() == comment_marker + SEPARATOR:
406        return False
407    # Skip any blank lines in the IR.
408    # if input_line.strip() == '':
409    #  return False
410    # And skip any CHECK lines. We're building our own.
411    m = CHECK_RE.match(input_line)
412    if m and m.group(1) in prefix_set:
413        if skip_same_checks and CHECK_SAME_RE.match(input_line):
414            # The previous CHECK line was removed, so don't leave this dangling
415            return False
416        if skip_global_checks:
417            # Skip checks only if they are of global value definitions
418            global_ir_value_re = re.compile(r"(\[\[|@)", flags=(re.M))
419            is_global = global_ir_value_re.search(input_line)
420            return not is_global
421        return False
422
423    return True
424
425
426def collect_original_check_lines(ti: TestInfo, prefix_set: set):
427    """
428    Collect pre-existing check lines into a dictionary `result` which is
429    returned.
430
431    result[func_name][prefix] is filled with a list of right-hand-sides of check
432    lines.
433    """
434    result = collections.defaultdict(lambda: {})
435
436    current_prefix = None
437    current_function = None
438    for input_line_info in ti.ro_iterlines():
439        input_line = input_line_info.line
440        if input_line.lstrip().startswith(";"):
441            m = CHECK_RE.match(input_line)
442            if m is not None:
443                prefix = m.group(1)
444                check_kind = m.group(2)
445                line = input_line[m.end() :].strip()
446
447                if prefix != current_prefix:
448                    current_function = None
449                    current_prefix = None
450
451                if check_kind not in ["LABEL", "SAME"]:
452                    if current_function is not None:
453                        current_function.append(line)
454                    continue
455
456                if check_kind == "SAME":
457                    continue
458
459                if check_kind == "LABEL":
460                    m = IR_FUNCTION_RE.match(line)
461                    if m is not None:
462                        func_name = m.group(1)
463                        if (
464                            ti.args.function is not None
465                            and func_name != ti.args.function
466                        ):
467                            # When filtering on a specific function, skip all others.
468                            continue
469
470                        current_prefix = prefix
471                        current_function = result[func_name][prefix] = []
472                        continue
473
474        current_function = None
475
476    return result
477
478
479# Perform lit-like substitutions
480def getSubstitutions(sourcepath):
481    sourcedir = os.path.dirname(sourcepath)
482    return [
483        ("%s", sourcepath),
484        ("%S", sourcedir),
485        ("%p", sourcedir),
486        ("%{pathsep}", os.pathsep),
487    ]
488
489
490def applySubstitutions(s, substitutions):
491    for a, b in substitutions:
492        s = s.replace(a, b)
493    return s
494
495
496# Invoke the tool that is being tested.
497def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False):
498    with open(ir) as ir_file:
499        substitutions = getSubstitutions(ir)
500
501        # TODO Remove the str form which is used by update_test_checks.py and
502        # update_llc_test_checks.py
503        # The safer list form is used by update_cc_test_checks.py
504        if preprocess_cmd:
505            # Allow pre-processing the IR file (e.g. using sed):
506            assert isinstance(
507                preprocess_cmd, str
508            )  # TODO: use a list instead of using shell
509            preprocess_cmd = applySubstitutions(preprocess_cmd, substitutions).strip()
510            if verbose:
511                print(
512                    "Pre-processing input file: ",
513                    ir,
514                    " with command '",
515                    preprocess_cmd,
516                    "'",
517                    sep="",
518                    file=sys.stderr,
519                )
520            # Python 2.7 doesn't have subprocess.DEVNULL:
521            with open(os.devnull, "w") as devnull:
522                pp = subprocess.Popen(
523                    preprocess_cmd, shell=True, stdin=devnull, stdout=subprocess.PIPE
524                )
525                ir_file = pp.stdout
526
527        if isinstance(cmd_args, list):
528            args = [applySubstitutions(a, substitutions) for a in cmd_args]
529            stdout = subprocess.check_output([exe] + args, stdin=ir_file)
530        else:
531            stdout = subprocess.check_output(
532                exe + " " + applySubstitutions(cmd_args, substitutions),
533                shell=True,
534                stdin=ir_file,
535            )
536        if sys.version_info[0] > 2:
537            # FYI, if you crashed here with a decode error, your run line probably
538            # results in bitcode or other binary format being written to the pipe.
539            # For an opt test, you probably want to add -S or -disable-output.
540            stdout = stdout.decode()
541    # Fix line endings to unix CR style.
542    return stdout.replace("\r\n", "\n")
543
544
545##### LLVM IR parser
546RUN_LINE_RE = re.compile(r"^\s*(?://|[;#])\s*RUN:\s*(.*)$")
547CHECK_PREFIX_RE = re.compile(r"--?check-prefix(?:es)?[= ](\S+)")
548PREFIX_RE = re.compile("^[a-zA-Z0-9_-]+$")
549CHECK_RE = re.compile(
550    r"^\s*(?://|[;#])\s*([^:]+?)(?:-(NEXT|NOT|DAG|LABEL|SAME|EMPTY))?:"
551)
552CHECK_SAME_RE = re.compile(r"^\s*(?://|[;#])\s*([^:]+?)(?:-SAME)?:")
553
554UTC_ARGS_KEY = "UTC_ARGS:"
555UTC_ARGS_CMD = re.compile(r".*" + UTC_ARGS_KEY + r"\s*(?P<cmd>.*)\s*$")
556UTC_ADVERT = "NOTE: Assertions have been autogenerated by "
557UTC_AVOID = "NOTE: Do not autogenerate"
558UNUSED_NOTE = "NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:"
559
560OPT_FUNCTION_RE = re.compile(
561    r"^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s():,]+?))?\s*define\s+(?P<funcdef_attrs_and_ret>[^@]*)@(?P<func>[\w.$-]+?)\s*"
562    r"(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P<body>.*?)^\}$",
563    flags=(re.M | re.S),
564)
565
566ANALYZE_FUNCTION_RE = re.compile(
567    r"^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.$-]+?)\':"
568    r"\s*\n(?P<body>.*)$",
569    flags=(re.X | re.S),
570)
571
572LV_DEBUG_RE = re.compile(
573    r"^\s*\'(?P<func>[\w.$-]+?)\'[^\n]*" r"\s*\n(?P<body>.*)$", flags=(re.X | re.S)
574)
575
576IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(')
577TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
578TRIPLE_ARG_RE = re.compile(r"-mtriple[= ]([^ ]+)")
579MARCH_ARG_RE = re.compile(r"-march[= ]([^ ]+)")
580DEBUG_ONLY_ARG_RE = re.compile(r"-debug-only[= ]([^ ]+)")
581
582SCRUB_LEADING_WHITESPACE_RE = re.compile(r"^(\s+)")
583SCRUB_WHITESPACE_RE = re.compile(r"(?!^(|  \w))[ \t]+", flags=re.M)
584SCRUB_PRESERVE_LEADING_WHITESPACE_RE = re.compile(r"((?!^)[ \t]*(\S))[ \t]+")
585SCRUB_TRAILING_WHITESPACE_RE = re.compile(r"[ \t]+$", flags=re.M)
586SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE
587SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(
588    r"([ \t]|(#[0-9]+))+$", flags=re.M
589)
590SCRUB_KILL_COMMENT_RE = re.compile(r"^ *#+ +kill:.*\n")
591SCRUB_LOOP_COMMENT_RE = re.compile(
592    r"# =>This Inner Loop Header:.*|# in Loop:.*", flags=re.M
593)
594SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r"(?<=\S)+[ \t]*#$", flags=re.M)
595
596SEPARATOR = "."
597
598
599def error(msg, test_file=None):
600    if test_file:
601        msg = "{}: {}".format(msg, test_file)
602    print("ERROR: {}".format(msg), file=sys.stderr)
603
604
605def warn(msg, test_file=None):
606    if test_file:
607        msg = "{}: {}".format(msg, test_file)
608    print("WARNING: {}".format(msg), file=sys.stderr)
609
610
611def debug(*args, **kwargs):
612    # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs):
613    if "file" not in kwargs:
614        kwargs["file"] = sys.stderr
615    if _verbose:
616        print(*args, **kwargs)
617
618
619def find_run_lines(test, lines):
620    debug("Scanning for RUN lines in test file:", test)
621    raw_lines = [m.group(1) for m in [RUN_LINE_RE.match(l) for l in lines] if m]
622    run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
623    for l in raw_lines[1:]:
624        if run_lines[-1].endswith("\\"):
625            run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
626        else:
627            run_lines.append(l)
628    debug("Found {} RUN lines in {}:".format(len(run_lines), test))
629    for l in run_lines:
630        debug("  RUN: {}".format(l))
631    return run_lines
632
633
634def get_triple_from_march(march):
635    triples = {
636        "amdgcn": "amdgcn",
637        "r600": "r600",
638        "mips": "mips",
639        "sparc": "sparc",
640        "hexagon": "hexagon",
641        "ve": "ve",
642    }
643    for prefix, triple in triples.items():
644        if march.startswith(prefix):
645            return triple
646    print("Cannot find a triple. Assume 'x86'", file=sys.stderr)
647    return "x86"
648
649
650def apply_filters(line, filters):
651    has_filter = False
652    for f in filters:
653        if not f.is_filter_out:
654            has_filter = True
655        if f.search(line):
656            return False if f.is_filter_out else True
657    # If we only used filter-out, keep the line, otherwise discard it since no
658    # filter matched.
659    return False if has_filter else True
660
661
662def do_filter(body, filters):
663    return (
664        body
665        if not filters
666        else "\n".join(
667            filter(lambda line: apply_filters(line, filters), body.splitlines())
668        )
669    )
670
671
672def scrub_body(body):
673    # Scrub runs of whitespace out of the assembly, but leave the leading
674    # whitespace in place.
675    body = SCRUB_PRESERVE_LEADING_WHITESPACE_RE.sub(lambda m: m.group(2) + " ", body)
676
677    # Expand the tabs used for indentation.
678    body = str.expandtabs(body, 2)
679    # Strip trailing whitespace.
680    body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r"", body)
681    return body
682
683
684def do_scrub(body, scrubber, scrubber_args, extra):
685    if scrubber_args:
686        local_args = copy.deepcopy(scrubber_args)
687        local_args[0].extra_scrub = extra
688        return scrubber(body, *local_args)
689    return scrubber(body, *scrubber_args)
690
691
692# Build up a dictionary of all the function bodies.
693class function_body(object):
694    def __init__(
695        self,
696        string,
697        extra,
698        funcdef_attrs_and_ret,
699        args_and_sig,
700        attrs,
701        func_name_separator,
702        ginfo,
703    ):
704        self.scrub = string
705        self.extrascrub = extra
706        self.funcdef_attrs_and_ret = funcdef_attrs_and_ret
707        self.args_and_sig = args_and_sig
708        self.attrs = attrs
709        self.func_name_separator = func_name_separator
710        self._ginfo = ginfo
711
712    def is_same_except_arg_names(
713        self, extrascrub, funcdef_attrs_and_ret, args_and_sig, attrs
714    ):
715        arg_names = set()
716
717        def drop_arg_names(match):
718            nameless_value = self._ginfo.get_nameless_value_from_match(match)
719            if nameless_value.check_key == "%":
720                arg_names.add(self._ginfo.get_name_from_match(match))
721                substitute = ""
722            else:
723                substitute = match.group(2)
724            return match.group(1) + substitute + match.group(match.lastindex)
725
726        def repl_arg_names(match):
727            nameless_value = self._ginfo.get_nameless_value_from_match(match)
728            if (
729                nameless_value.check_key == "%"
730                and self._ginfo.get_name_from_match(match) in arg_names
731            ):
732                return match.group(1) + match.group(match.lastindex)
733            return match.group(1) + match.group(2) + match.group(match.lastindex)
734
735        if self.funcdef_attrs_and_ret != funcdef_attrs_and_ret:
736            return False
737        if self.attrs != attrs:
738            return False
739
740        regexp = self._ginfo.get_regexp()
741        ans0 = regexp.sub(drop_arg_names, self.args_and_sig)
742        ans1 = regexp.sub(drop_arg_names, args_and_sig)
743        if ans0 != ans1:
744            return False
745        if self._ginfo.is_asm():
746            # Check without replacements, the replacements are not applied to the
747            # body for backend checks.
748            return self.extrascrub == extrascrub
749
750        es0 = regexp.sub(repl_arg_names, self.extrascrub)
751        es1 = regexp.sub(repl_arg_names, extrascrub)
752        es0 = SCRUB_IR_COMMENT_RE.sub(r"", es0)
753        es1 = SCRUB_IR_COMMENT_RE.sub(r"", es1)
754        return es0 == es1
755
756    def __str__(self):
757        return self.scrub
758
759
760class FunctionTestBuilder:
761    def __init__(self, run_list, flags, scrubber_args, path, ginfo):
762        self._verbose = flags.verbose
763        self._record_args = flags.function_signature
764        self._check_attributes = flags.check_attributes
765        # Strip double-quotes if input was read by UTC_ARGS
766        self._filters = (
767            list(
768                map(
769                    lambda f: Filter(
770                        re.compile(f.pattern().strip('"'), f.flags()), f.is_filter_out
771                    ),
772                    flags.filters,
773                )
774            )
775            if flags.filters
776            else []
777        )
778        self._scrubber_args = scrubber_args
779        self._path = path
780        self._ginfo = ginfo
781        # Strip double-quotes if input was read by UTC_ARGS
782        self._replace_value_regex = list(
783            map(lambda x: x.strip('"'), flags.replace_value_regex)
784        )
785        self._func_dict = {}
786        self._func_order = {}
787        self._global_var_dict = {}
788        self._processed_prefixes = set()
789        for tuple in run_list:
790            for prefix in tuple[0]:
791                self._func_dict.update({prefix: dict()})
792                self._func_order.update({prefix: []})
793                self._global_var_dict.update({prefix: dict()})
794
795    def finish_and_get_func_dict(self):
796        for prefix in self.get_failed_prefixes():
797            warn(
798                "Prefix %s had conflicting output from different RUN lines for all functions in test %s"
799                % (
800                    prefix,
801                    self._path,
802                )
803            )
804        return self._func_dict
805
806    def func_order(self):
807        return self._func_order
808
809    def global_var_dict(self):
810        return self._global_var_dict
811
812    def is_filtered(self):
813        return bool(self._filters)
814
815    def process_run_line(self, function_re, scrubber, raw_tool_output, prefixes):
816        build_global_values_dictionary(
817            self._global_var_dict, raw_tool_output, prefixes, self._ginfo
818        )
819        for m in function_re.finditer(raw_tool_output):
820            if not m:
821                continue
822            func = m.group("func")
823            body = m.group("body")
824            # func_name_separator is the string that is placed right after function name at the
825            # beginning of assembly function definition. In most assemblies, that is just a
826            # colon: `foo:`. But, for example, in nvptx it is a brace: `foo(`. If is_backend is
827            # False, just assume that separator is an empty string.
828            if self._ginfo.is_asm():
829                # Use ':' as default separator.
830                func_name_separator = (
831                    m.group("func_name_separator")
832                    if "func_name_separator" in m.groupdict()
833                    else ":"
834                )
835            else:
836                func_name_separator = ""
837            attrs = m.group("attrs") if self._check_attributes else ""
838            funcdef_attrs_and_ret = (
839                m.group("funcdef_attrs_and_ret") if self._record_args else ""
840            )
841            # Determine if we print arguments, the opening brace, or nothing after the
842            # function name
843            if self._record_args and "args_and_sig" in m.groupdict():
844                args_and_sig = scrub_body(m.group("args_and_sig").strip())
845            elif "args_and_sig" in m.groupdict():
846                args_and_sig = "("
847            else:
848                args_and_sig = ""
849            filtered_body = do_filter(body, self._filters)
850            scrubbed_body = do_scrub(
851                filtered_body, scrubber, self._scrubber_args, extra=False
852            )
853            scrubbed_extra = do_scrub(
854                filtered_body, scrubber, self._scrubber_args, extra=True
855            )
856            if "analysis" in m.groupdict():
857                analysis = m.group("analysis")
858                if analysis not in SUPPORTED_ANALYSES:
859                    warn("Unsupported analysis mode: %r!" % (analysis,))
860            if func.startswith("stress"):
861                # We only use the last line of the function body for stress tests.
862                scrubbed_body = "\n".join(scrubbed_body.splitlines()[-1:])
863            if self._verbose:
864                print("Processing function: " + func, file=sys.stderr)
865                for l in scrubbed_body.splitlines():
866                    print("  " + l, file=sys.stderr)
867            for prefix in prefixes:
868                # Replace function names matching the regex.
869                for regex in self._replace_value_regex:
870                    # Pattern that matches capture groups in the regex in leftmost order.
871                    group_regex = re.compile(r"\(.*?\)")
872                    # Replace function name with regex.
873                    match = re.match(regex, func)
874                    if match:
875                        func_repl = regex
876                        # Replace any capture groups with their matched strings.
877                        for g in match.groups():
878                            func_repl = group_regex.sub(
879                                re.escape(g), func_repl, count=1
880                            )
881                        func = re.sub(func_repl, "{{" + func_repl + "}}", func)
882
883                    # Replace all calls to regex matching functions.
884                    matches = re.finditer(regex, scrubbed_body)
885                    for match in matches:
886                        func_repl = regex
887                        # Replace any capture groups with their matched strings.
888                        for g in match.groups():
889                            func_repl = group_regex.sub(
890                                re.escape(g), func_repl, count=1
891                            )
892                        # Substitute function call names that match the regex with the same
893                        # capture groups set.
894                        scrubbed_body = re.sub(
895                            func_repl, "{{" + func_repl + "}}", scrubbed_body
896                        )
897
898                if func in self._func_dict[prefix]:
899                    if self._func_dict[prefix][func] is not None and (
900                        str(self._func_dict[prefix][func]) != scrubbed_body
901                        or self._func_dict[prefix][func].args_and_sig != args_and_sig
902                        or self._func_dict[prefix][func].attrs != attrs
903                        or self._func_dict[prefix][func].funcdef_attrs_and_ret
904                        != funcdef_attrs_and_ret
905                    ):
906                        if self._func_dict[prefix][func].is_same_except_arg_names(
907                            scrubbed_extra,
908                            funcdef_attrs_and_ret,
909                            args_and_sig,
910                            attrs,
911                        ):
912                            self._func_dict[prefix][func].scrub = scrubbed_extra
913                            self._func_dict[prefix][func].args_and_sig = args_and_sig
914                        else:
915                            # This means a previous RUN line produced a body for this function
916                            # that is different from the one produced by this current RUN line,
917                            # so the body can't be common across RUN lines. We use None to
918                            # indicate that.
919                            self._func_dict[prefix][func] = None
920                else:
921                    if prefix not in self._processed_prefixes:
922                        self._func_dict[prefix][func] = function_body(
923                            scrubbed_body,
924                            scrubbed_extra,
925                            funcdef_attrs_and_ret,
926                            args_and_sig,
927                            attrs,
928                            func_name_separator,
929                            self._ginfo,
930                        )
931                        self._func_order[prefix].append(func)
932                    else:
933                        # An earlier RUN line used this check prefixes but didn't produce
934                        # a body for this function. This happens in Clang tests that use
935                        # preprocesser directives to exclude individual functions from some
936                        # RUN lines.
937                        self._func_dict[prefix][func] = None
938
939    def processed_prefixes(self, prefixes):
940        """
941        Mark a set of prefixes as having had at least one applicable RUN line fully
942        processed. This is used to filter out function bodies that don't have
943        outputs for all RUN lines.
944        """
945        self._processed_prefixes.update(prefixes)
946
947    def get_failed_prefixes(self):
948        # This returns the list of those prefixes that failed to match any function,
949        # because there were conflicting bodies produced by different RUN lines, in
950        # all instances of the prefix.
951        for prefix in self._func_dict:
952            if self._func_dict[prefix] and (
953                not [
954                    fct
955                    for fct in self._func_dict[prefix]
956                    if self._func_dict[prefix][fct] is not None
957                ]
958            ):
959                yield prefix
960
961
962##### Generator of LLVM IR CHECK lines
963
964SCRUB_IR_COMMENT_RE = re.compile(r"\s*;.*")
965
966# TODO: We should also derive check lines for global, debug, loop declarations, etc..
967
968
969class NamelessValue:
970    """
971    A NamelessValue object represents a type of value in the IR whose "name" we
972    generalize in the generated check lines; where the "name" could be an actual
973    name (as in e.g. `@some_global` or `%x`) or just a number (as in e.g. `%12`
974    or `!4`).
975    """
976    def __init__(
977        self,
978        check_prefix,
979        check_key,
980        ir_prefix,
981        ir_regexp,
982        global_ir_rhs_regexp,
983        *,
984        is_before_functions=False,
985        is_number=False,
986        replace_number_with_counter=False,
987        match_literally=False,
988        interlaced_with_previous=False,
989        ir_suffix=r"",
990    ):
991        self.check_prefix = check_prefix
992        self.check_key = check_key
993        self.ir_prefix = ir_prefix
994        self.ir_regexp = ir_regexp
995        self.ir_suffix = ir_suffix
996        self.global_ir_rhs_regexp = global_ir_rhs_regexp
997        self.is_before_functions = is_before_functions
998        self.is_number = is_number
999        # Some variable numbers (e.g. MCINST1234) will change based on unrelated
1000        # modifications to LLVM, replace those with an incrementing counter.
1001        self.replace_number_with_counter = replace_number_with_counter
1002        self.match_literally = match_literally
1003        self.interlaced_with_previous = interlaced_with_previous
1004        self.variable_mapping = {}
1005
1006    # Return true if this kind of IR value is defined "locally" to functions,
1007    # which we assume is only the case precisely for LLVM IR local values.
1008    def is_local_def_ir_value(self):
1009        return self.check_key == "%"
1010
1011    # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals
1012    def get_ir_regex(self):
1013        # for backwards compatibility we check locals with '.*'
1014        if self.is_local_def_ir_value():
1015            return ".*"
1016        return self.ir_regexp
1017
1018    # Create a FileCheck variable name based on an IR name.
1019    def get_value_name(self, var: str, check_prefix: str):
1020        var = var.replace("!", "")
1021        if self.replace_number_with_counter:
1022            assert var
1023            replacement = self.variable_mapping.get(var, None)
1024            if replacement is None:
1025                # Replace variable with an incrementing counter
1026                replacement = str(len(self.variable_mapping) + 1)
1027                self.variable_mapping[var] = replacement
1028            var = replacement
1029        # This is a nameless value, prepend check_prefix.
1030        if var.isdigit():
1031            var = check_prefix + var
1032        else:
1033            # This is a named value that clashes with the check_prefix, prepend with
1034            # _prefix_filecheck_ir_name, if it has been defined.
1035            if (
1036                may_clash_with_default_check_prefix_name(check_prefix, var)
1037                and _prefix_filecheck_ir_name
1038            ):
1039                var = _prefix_filecheck_ir_name + var
1040        var = var.replace(".", "_")
1041        var = var.replace("-", "_")
1042        return var.upper()
1043
1044    def get_affixes_from_match(self, match):
1045        prefix = re.match(self.ir_prefix, match.group(2)).group(0)
1046        suffix = re.search(self.ir_suffix + "$", match.group(2)).group(0)
1047        return prefix, suffix
1048
1049
1050class GeneralizerInfo:
1051    """
1052    A GeneralizerInfo object holds information about how check lines should be generalized
1053    (e.g., variable names replaced by FileCheck meta variables) as well as per-test-file
1054    state (e.g. information about IR global variables).
1055    """
1056
1057    MODE_IR = 0
1058    MODE_ASM = 1
1059    MODE_ANALYZE = 2
1060
1061    def __init__(
1062        self,
1063        version,
1064        mode,
1065        nameless_values: List[NamelessValue],
1066        regexp_prefix,
1067        regexp_suffix,
1068    ):
1069        self._version = version
1070        self._mode = mode
1071        self._nameless_values = nameless_values
1072
1073        self._regexp_prefix = regexp_prefix
1074        self._regexp_suffix = regexp_suffix
1075
1076        self._regexp, _ = self._build_regexp(False, False)
1077        (
1078            self._unstable_globals_regexp,
1079            self._unstable_globals_values,
1080        ) = self._build_regexp(True, True)
1081
1082    def _build_regexp(self, globals_only, unstable_only):
1083        matches = []
1084        values = []
1085        for nameless_value in self._nameless_values:
1086            is_global = nameless_value.global_ir_rhs_regexp is not None
1087            if globals_only and not is_global:
1088                continue
1089            if unstable_only and nameless_value.match_literally:
1090                continue
1091
1092            match = f"(?:{nameless_value.ir_prefix}({nameless_value.ir_regexp}){nameless_value.ir_suffix})"
1093            if self.is_ir() and not globals_only and is_global:
1094                match = "^" + match
1095            matches.append(match)
1096            values.append(nameless_value)
1097
1098        regexp_string = r"|".join(matches)
1099
1100        return (
1101            re.compile(
1102                self._regexp_prefix + r"(" + regexp_string + r")" + self._regexp_suffix
1103            ),
1104            values,
1105        )
1106
1107    def get_version(self):
1108        return self._version
1109
1110    def is_ir(self):
1111        return self._mode == GeneralizerInfo.MODE_IR
1112
1113    def is_asm(self):
1114        return self._mode == GeneralizerInfo.MODE_ASM
1115
1116    def is_analyze(self):
1117        return self._mode == GeneralizerInfo.MODE_ANALYZE
1118
1119    def get_nameless_values(self):
1120        return self._nameless_values
1121
1122    def get_regexp(self):
1123        return self._regexp
1124
1125    def get_unstable_globals_regexp(self):
1126        return self._unstable_globals_regexp
1127
1128    # The entire match is group 0, the prefix has one group (=1), the entire
1129    # IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start.
1130    FIRST_NAMELESS_GROUP_IN_MATCH = 3
1131
1132    def get_match_info(self, match):
1133        """
1134        Returns (name, nameless_value) for the given match object
1135        """
1136        if match.re == self._regexp:
1137            values = self._nameless_values
1138        else:
1139            match.re == self._unstable_globals_regexp
1140            values = self._unstable_globals_values
1141        for i in range(len(values)):
1142            g = match.group(i + GeneralizerInfo.FIRST_NAMELESS_GROUP_IN_MATCH)
1143            if g is not None:
1144                return g, values[i]
1145        error("Unable to identify the kind of IR value from the match!")
1146        return None, None
1147
1148    # See get_idx_from_match
1149    def get_name_from_match(self, match):
1150        return self.get_match_info(match)[0]
1151
1152    def get_nameless_value_from_match(self, match) -> NamelessValue:
1153        return self.get_match_info(match)[1]
1154
1155
1156def make_ir_generalizer(version):
1157    values = []
1158
1159    if version >= 5:
1160        values += [
1161            NamelessValue(r"BB", "%", r"label %", r"[\w$.-]+?", None),
1162            NamelessValue(r"BB", "%", r"^", r"[\w$.-]+?", None, ir_suffix=r":"),
1163        ]
1164
1165    values += [
1166        #            check_prefix   check_key  ir_prefix           ir_regexp                global_ir_rhs_regexp
1167        NamelessValue(r"TMP", "%", r"%", r"[\w$.-]+?", None),
1168        NamelessValue(r"ATTR", "#", r"#", r"[0-9]+", None),
1169        NamelessValue(r"ATTR", "#", r"attributes #", r"[0-9]+", r"{[^}]*}"),
1170        NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", None),
1171        NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", r".+", is_before_functions=True),
1172        NamelessValue(
1173            r"GLOBNAMED",
1174            "@",
1175            r"@",
1176            r"[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*",
1177            r".+",
1178            is_before_functions=True,
1179            match_literally=True,
1180            interlaced_with_previous=True,
1181        ),
1182        NamelessValue(r"DBG", "!", r"!dbg ", r"![0-9]+", None),
1183        NamelessValue(r"DIASSIGNID", "!", r"!DIAssignID ", r"![0-9]+", None),
1184        NamelessValue(r"PROF", "!", r"!prof ", r"![0-9]+", None),
1185        NamelessValue(r"TBAA", "!", r"!tbaa ", r"![0-9]+", None),
1186        NamelessValue(r"TBAA_STRUCT", "!", r"!tbaa.struct ", r"![0-9]+", None),
1187        NamelessValue(r"RNG", "!", r"!range ", r"![0-9]+", None),
1188        NamelessValue(r"LOOP", "!", r"!llvm.loop ", r"![0-9]+", None),
1189        NamelessValue(r"META", "!", r"", r"![0-9]+", r"(?:distinct |)!.*"),
1190        NamelessValue(r"ACC_GRP", "!", r"!llvm.access.group ", r"![0-9]+", None),
1191        NamelessValue(r"META", "!", r"![a-z.]+ ", r"![0-9]+", None),
1192        NamelessValue(r"META", "!", r"[, (]", r"![0-9]+", None),
1193    ]
1194
1195    prefix = r"(\s*)"
1196    suffix = r"([,\s\(\)\}]|\Z)"
1197
1198    # values = [
1199    #     nameless_value
1200    #     for nameless_value in IR_NAMELESS_VALUES
1201    #     if not (globals_only and nameless_value.global_ir_rhs_regexp is None) and
1202    #        not (unstable_ids_only and nameless_value.match_literally)
1203    # ]
1204
1205    return GeneralizerInfo(version, GeneralizerInfo.MODE_IR, values, prefix, suffix)
1206
1207
1208def make_asm_generalizer(version):
1209    values = [
1210        NamelessValue(
1211            r"MCINST",
1212            "Inst#",
1213            "<MCInst #",
1214            r"\d+",
1215            r".+",
1216            is_number=True,
1217            replace_number_with_counter=True,
1218        ),
1219        NamelessValue(
1220            r"MCREG",
1221            "Reg:",
1222            "<MCOperand Reg:",
1223            r"\d+",
1224            r".+",
1225            is_number=True,
1226            replace_number_with_counter=True,
1227        ),
1228    ]
1229
1230    prefix = r"((?:#|//)\s*)"
1231    suffix = r"([>\s]|\Z)"
1232
1233    return GeneralizerInfo(version, GeneralizerInfo.MODE_ASM, values, prefix, suffix)
1234
1235
1236def make_analyze_generalizer(version):
1237    values = [
1238        NamelessValue(
1239            r"GRP",
1240            "#",
1241            r"",
1242            r"0x[0-9a-f]+",
1243            None,
1244            replace_number_with_counter=True,
1245        ),
1246    ]
1247
1248    prefix = r"(\s*)"
1249    suffix = r"(\)?:)"
1250
1251    return GeneralizerInfo(
1252        version, GeneralizerInfo.MODE_ANALYZE, values, prefix, suffix
1253    )
1254
1255
1256# Return true if var clashes with the scripted FileCheck check_prefix.
1257def may_clash_with_default_check_prefix_name(check_prefix, var):
1258    return check_prefix and re.match(
1259        r"^" + check_prefix + r"[0-9]+?$", var, re.IGNORECASE
1260    )
1261
1262
1263def find_diff_matching(lhs: List[str], rhs: List[str]) -> List[tuple]:
1264    """
1265    Find a large ordered matching between strings in lhs and rhs.
1266
1267    Think of this as finding the *unchanged* lines in a diff, where the entries
1268    of lhs and rhs are lines of the files being diffed.
1269
1270    Returns a list of matched (lhs_idx, rhs_idx) pairs.
1271    """
1272
1273    if not lhs or not rhs:
1274        return []
1275
1276    # Collect matches in reverse order.
1277    matches = []
1278
1279    # First, collect a set of candidate matching edges. We limit this to a
1280    # constant multiple of the input size to avoid quadratic runtime.
1281    patterns = collections.defaultdict(lambda: ([], []))
1282
1283    for idx in range(len(lhs)):
1284        patterns[lhs[idx]][0].append(idx)
1285    for idx in range(len(rhs)):
1286        patterns[rhs[idx]][1].append(idx)
1287
1288    multiple_patterns = []
1289
1290    candidates = []
1291    for pattern in patterns.values():
1292        if not pattern[0] or not pattern[1]:
1293            continue
1294
1295        if len(pattern[0]) == len(pattern[1]) == 1:
1296            candidates.append((pattern[0][0], pattern[1][0]))
1297        else:
1298            multiple_patterns.append(pattern)
1299
1300    multiple_patterns.sort(key=lambda pattern: len(pattern[0]) * len(pattern[1]))
1301
1302    for pattern in multiple_patterns:
1303        if len(candidates) + len(pattern[0]) * len(pattern[1]) > 2 * (
1304            len(lhs) + len(rhs)
1305        ):
1306            break
1307        for lhs_idx in pattern[0]:
1308            for rhs_idx in pattern[1]:
1309                candidates.append((lhs_idx, rhs_idx))
1310
1311    if not candidates:
1312        # The LHS and RHS either share nothing in common, or lines are just too
1313        # identical. In that case, let's give up and not match anything.
1314        return []
1315
1316    # Compute a maximal crossing-free matching via an algorithm that is
1317    # inspired by a mixture of dynamic programming and line-sweeping in
1318    # discrete geometry.
1319    #
1320    # I would be surprised if this algorithm didn't exist somewhere in the
1321    # literature, but I found it without consciously recalling any
1322    # references, so you'll have to make do with the explanation below.
1323    # Sorry.
1324    #
1325    # The underlying graph is bipartite:
1326    #  - nodes on the LHS represent lines in the original check
1327    #  - nodes on the RHS represent lines in the new (updated) check
1328    #
1329    # Nodes are implicitly sorted by the corresponding line number.
1330    # Edges (unique_matches) are sorted by the line number on the LHS.
1331    #
1332    # Here's the geometric intuition for the algorithm.
1333    #
1334    #  * Plot the edges as points in the plane, with the original line
1335    #    number on the X axis and the updated line number on the Y axis.
1336    #  * The goal is to find a longest "chain" of points where each point
1337    #    is strictly above and to the right of the previous point.
1338    #  * The algorithm proceeds by sweeping a vertical line from left to
1339    #    right.
1340    #  * The algorithm maintains a table where `table[N]` answers the
1341    #    question "What is currently the 'best' way to build a chain of N+1
1342    #    points to the left of the vertical line". Here, 'best' means
1343    #    that the last point of the chain is a as low as possible (minimal
1344    #    Y coordinate).
1345    #   * `table[N]` is `(y, point_idx)` where `point_idx` is the index of
1346    #     the last point in the chain and `y` is its Y coordinate
1347    #   * A key invariant is that the Y values in the table are
1348    #     monotonically increasing
1349    #  * Thanks to these properties, the table can be used to answer the
1350    #    question "What is the longest chain that can be built to the left
1351    #    of the vertical line using only points below a certain Y value",
1352    #    using a binary search over the table.
1353    #  * The algorithm also builds a backlink structure in which every point
1354    #    links back to the previous point on a best (longest) chain ending
1355    #    at that point
1356    #
1357    # The core loop of the algorithm sweeps the line and updates the table
1358    # and backlink structure for every point that we cross during the sweep.
1359    # Therefore, the algorithm is trivially O(M log M) in the number of
1360    # points.
1361    candidates.sort(key=lambda candidate: (candidate[0], -candidate[1]))
1362
1363    backlinks = []
1364    table_rhs_idx = []
1365    table_candidate_idx = []
1366    for _, rhs_idx in candidates:
1367        candidate_idx = len(backlinks)
1368        ti = bisect.bisect_left(table_rhs_idx, rhs_idx)
1369
1370        # Update the table to record a best chain ending in the current point.
1371        # There always is one, and if any of the previously visited points had
1372        # a higher Y coordinate, then there is always a previously recorded best
1373        # chain that can be improved upon by using the current point.
1374        #
1375        # There is only one case where there is some ambiguity. If the
1376        # pre-existing entry table[ti] has the same Y coordinate / rhs_idx as
1377        # the current point (this can only happen if the same line appeared
1378        # multiple times on the LHS), then we could choose to keep the
1379        # previously recorded best chain instead. That would bias the algorithm
1380        # differently but should have no systematic impact on the quality of the
1381        # result.
1382        if ti < len(table_rhs_idx):
1383            table_rhs_idx[ti] = rhs_idx
1384            table_candidate_idx[ti] = candidate_idx
1385        else:
1386            table_rhs_idx.append(rhs_idx)
1387            table_candidate_idx.append(candidate_idx)
1388        if ti > 0:
1389            backlinks.append(table_candidate_idx[ti - 1])
1390        else:
1391            backlinks.append(None)
1392
1393    # Commit to names in the matching by walking the backlinks. Recursively
1394    # attempt to fill in more matches in-betweem.
1395    match_idx = table_candidate_idx[-1]
1396    while match_idx is not None:
1397        current = candidates[match_idx]
1398        matches.append(current)
1399        match_idx = backlinks[match_idx]
1400
1401    matches.reverse()
1402    return matches
1403
1404
1405VARIABLE_TAG = "[[@@]]"
1406METAVAR_RE = re.compile(r"\[\[([A-Z0-9_]+)(?::[^]]+)?\]\]")
1407NUMERIC_SUFFIX_RE = re.compile(r"[0-9]*$")
1408
1409
1410class TestVar:
1411    def __init__(self, nameless_value: NamelessValue, prefix: str, suffix: str):
1412        self._nameless_value = nameless_value
1413
1414        self._prefix = prefix
1415        self._suffix = suffix
1416
1417    def seen(self, nameless_value: NamelessValue, prefix: str, suffix: str):
1418        if prefix != self._prefix:
1419            self._prefix = ""
1420        if suffix != self._suffix:
1421            self._suffix = ""
1422
1423    def get_variable_name(self, text):
1424        return self._nameless_value.get_value_name(
1425            text, self._nameless_value.check_prefix
1426        )
1427
1428    def get_def(self, name, prefix, suffix):
1429        if self._nameless_value.is_number:
1430            return f"{prefix}[[#{name}:]]{suffix}"
1431        if self._prefix:
1432            assert self._prefix == prefix
1433            prefix = ""
1434        if self._suffix:
1435            assert self._suffix == suffix
1436            suffix = ""
1437        return f"{prefix}[[{name}:{self._prefix}{self._nameless_value.get_ir_regex()}{self._suffix}]]{suffix}"
1438
1439    def get_use(self, name, prefix, suffix):
1440        if self._nameless_value.is_number:
1441            return f"{prefix}[[#{name}]]{suffix}"
1442        if self._prefix:
1443            assert self._prefix == prefix
1444            prefix = ""
1445        if self._suffix:
1446            assert self._suffix == suffix
1447            suffix = ""
1448        return f"{prefix}[[{name}]]{suffix}"
1449
1450
1451class CheckValueInfo:
1452    def __init__(
1453        self,
1454        key,
1455        text,
1456        name: str,
1457        prefix: str,
1458        suffix: str,
1459    ):
1460        # Key for the value, e.g. '%'
1461        self.key = key
1462
1463        # Text to be matched by the FileCheck variable (without any prefix or suffix)
1464        self.text = text
1465
1466        # Name of the FileCheck variable
1467        self.name = name
1468
1469        # Prefix and suffix that were captured by the NamelessValue regular expression
1470        self.prefix = prefix
1471        self.suffix = suffix
1472
1473
1474# Represent a check line in a way that allows us to compare check lines while
1475# ignoring some or all of the FileCheck variable names.
1476class CheckLineInfo:
1477    def __init__(self, line, values):
1478        # Line with all FileCheck variable name occurrences replaced by VARIABLE_TAG
1479        self.line: str = line
1480
1481        # Information on each FileCheck variable name occurrences in the line
1482        self.values: List[CheckValueInfo] = values
1483
1484    def __repr__(self):
1485        return f"CheckLineInfo(line={self.line}, self.values={self.values})"
1486
1487
1488def remap_metavar_names(
1489    old_line_infos: List[CheckLineInfo],
1490    new_line_infos: List[CheckLineInfo],
1491    committed_names: Set[str],
1492) -> Mapping[str, str]:
1493    """
1494    Map all FileCheck variable names that appear in new_line_infos to new
1495    FileCheck variable names in an attempt to reduce the diff from old_line_infos
1496    to new_line_infos.
1497
1498    This is done by:
1499    * Matching old check lines and new check lines using a diffing algorithm
1500      applied after replacing names with wildcards.
1501    * Committing to variable names such that the matched lines become equal
1502      (without wildcards) if possible
1503    * This is done recursively to handle cases where many lines are equal
1504      after wildcard replacement
1505    """
1506    # Initialize uncommitted identity mappings
1507    new_mapping = {}
1508    for line in new_line_infos:
1509        for value in line.values:
1510            new_mapping[value.name] = value.name
1511
1512    # Recursively commit to the identity mapping or find a better one
1513    def recurse(old_begin, old_end, new_begin, new_end):
1514        if old_begin == old_end or new_begin == new_end:
1515            return
1516
1517        # Find a matching of lines where uncommitted names are replaced
1518        # with a placeholder.
1519        def diffify_line(line, mapper):
1520            values = []
1521            for value in line.values:
1522                mapped = mapper(value.name)
1523                values.append(mapped if mapped in committed_names else "?")
1524            return line.line.strip() + " @@@ " + " @ ".join(values)
1525
1526        lhs_lines = [
1527            diffify_line(line, lambda x: x)
1528            for line in old_line_infos[old_begin:old_end]
1529        ]
1530        rhs_lines = [
1531            diffify_line(line, lambda x: new_mapping[x])
1532            for line in new_line_infos[new_begin:new_end]
1533        ]
1534
1535        candidate_matches = find_diff_matching(lhs_lines, rhs_lines)
1536
1537        # Apply commits greedily on a match-by-match basis
1538        matches = [(-1, -1)]
1539        committed_anything = False
1540        for lhs_idx, rhs_idx in candidate_matches:
1541            lhs_line = old_line_infos[lhs_idx]
1542            rhs_line = new_line_infos[rhs_idx]
1543
1544            local_commits = {}
1545
1546            for lhs_value, rhs_value in zip(lhs_line.values, rhs_line.values):
1547                if new_mapping[rhs_value.name] in committed_names:
1548                    # The new value has already been committed. If it was mapped
1549                    # to the same name as the original value, we can consider
1550                    # committing other values from this line. Otherwise, we
1551                    # should ignore this line.
1552                    if new_mapping[rhs_value.name] == lhs_value.name:
1553                        continue
1554                    else:
1555                        break
1556
1557                if rhs_value.name in local_commits:
1558                    # Same, but for a possible commit happening on the same line
1559                    if local_commits[rhs_value.name] == lhs_value.name:
1560                        continue
1561                    else:
1562                        break
1563
1564                if lhs_value.name in committed_names:
1565                    # We can't map this value because the name we would map it to has already been
1566                    # committed for something else. Give up on this line.
1567                    break
1568
1569                local_commits[rhs_value.name] = lhs_value.name
1570            else:
1571                # No reason not to add any commitments for this line
1572                for rhs_var, lhs_var in local_commits.items():
1573                    new_mapping[rhs_var] = lhs_var
1574                    committed_names.add(lhs_var)
1575                    committed_anything = True
1576
1577                    if (
1578                        lhs_var != rhs_var
1579                        and lhs_var in new_mapping
1580                        and new_mapping[lhs_var] == lhs_var
1581                    ):
1582                        new_mapping[lhs_var] = "conflict_" + lhs_var
1583
1584                matches.append((lhs_idx, rhs_idx))
1585
1586        matches.append((old_end, new_end))
1587
1588        # Recursively handle sequences between matches
1589        if committed_anything:
1590            for (lhs_prev, rhs_prev), (lhs_next, rhs_next) in zip(matches, matches[1:]):
1591                recurse(lhs_prev + 1, lhs_next, rhs_prev + 1, rhs_next)
1592
1593    recurse(0, len(old_line_infos), 0, len(new_line_infos))
1594
1595    # Commit to remaining names and resolve conflicts
1596    for new_name, mapped_name in new_mapping.items():
1597        if mapped_name in committed_names:
1598            continue
1599        if not mapped_name.startswith("conflict_"):
1600            assert mapped_name == new_name
1601            committed_names.add(mapped_name)
1602
1603    for new_name, mapped_name in new_mapping.items():
1604        if mapped_name in committed_names:
1605            continue
1606        assert mapped_name.startswith("conflict_")
1607
1608        m = NUMERIC_SUFFIX_RE.search(new_name)
1609        base_name = new_name[: m.start()]
1610        suffix = int(new_name[m.start() :]) if m.start() != m.end() else 1
1611        while True:
1612            candidate = f"{base_name}{suffix}"
1613            if candidate not in committed_names:
1614                new_mapping[new_name] = candidate
1615                committed_names.add(candidate)
1616                break
1617            suffix += 1
1618
1619    return new_mapping
1620
1621
1622def generalize_check_lines(
1623    lines,
1624    ginfo: GeneralizerInfo,
1625    vars_seen,
1626    global_vars_seen,
1627    preserve_names=False,
1628    original_check_lines=None,
1629    *,
1630    unstable_globals_only=False,
1631):
1632    if unstable_globals_only:
1633        regexp = ginfo.get_unstable_globals_regexp()
1634    else:
1635        regexp = ginfo.get_regexp()
1636
1637    multiple_braces_re = re.compile(r"({{+)|(}}+)")
1638    def escape_braces(match_obj):
1639        return '{{' + re.escape(match_obj.group(0)) + '}}'
1640
1641    if ginfo.is_ir():
1642        for i, line in enumerate(lines):
1643            # An IR variable named '%.' matches the FileCheck regex string.
1644            line = line.replace("%.", "%dot")
1645            for regex in _global_hex_value_regex:
1646                if re.match("^@" + regex + " = ", line):
1647                    line = re.sub(
1648                        r"\bi([0-9]+) ([0-9]+)",
1649                        lambda m: "i"
1650                        + m.group(1)
1651                        + " [[#"
1652                        + hex(int(m.group(2)))
1653                        + "]]",
1654                        line,
1655                    )
1656                    break
1657            # Ignore any comments, since the check lines will too.
1658            scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line)
1659            lines[i] = scrubbed_line
1660
1661    if not preserve_names:
1662        committed_names = set(
1663            test_var.get_variable_name(name)
1664            for (name, _), test_var in vars_seen.items()
1665        )
1666        defs = set()
1667
1668        # Collect information about new check lines, and generalize global reference
1669        new_line_infos = []
1670        for line in lines:
1671            filtered_line = ""
1672            values = []
1673            while True:
1674                m = regexp.search(line)
1675                if m is None:
1676                    filtered_line += line
1677                    break
1678
1679                name = ginfo.get_name_from_match(m)
1680                nameless_value = ginfo.get_nameless_value_from_match(m)
1681                prefix, suffix = nameless_value.get_affixes_from_match(m)
1682                if may_clash_with_default_check_prefix_name(
1683                    nameless_value.check_prefix, name
1684                ):
1685                    warn(
1686                        "Change IR value name '%s' or use --prefix-filecheck-ir-name to prevent possible conflict"
1687                        " with scripted FileCheck name." % (name,)
1688                    )
1689
1690                # Record the variable as seen and (for locals) accumulate
1691                # prefixes/suffixes
1692                is_local_def = nameless_value.is_local_def_ir_value()
1693                if is_local_def:
1694                    vars_dict = vars_seen
1695                else:
1696                    vars_dict = global_vars_seen
1697
1698                key = (name, nameless_value.check_key)
1699
1700                if is_local_def:
1701                    test_prefix = prefix
1702                    test_suffix = suffix
1703                else:
1704                    test_prefix = ""
1705                    test_suffix = ""
1706
1707                if key in vars_dict:
1708                    vars_dict[key].seen(nameless_value, test_prefix, test_suffix)
1709                else:
1710                    vars_dict[key] = TestVar(nameless_value, test_prefix, test_suffix)
1711                    defs.add(key)
1712
1713                var = vars_dict[key].get_variable_name(name)
1714
1715                # Replace with a [[@@]] tag, but be sure to keep the spaces and commas.
1716                filtered_line += (
1717                    line[: m.start()] + m.group(1) + VARIABLE_TAG + m.group(m.lastindex)
1718                )
1719                line = line[m.end() :]
1720
1721                values.append(
1722                    CheckValueInfo(
1723                        key=nameless_value.check_key,
1724                        text=name,
1725                        name=var,
1726                        prefix=prefix,
1727                        suffix=suffix,
1728                    )
1729                )
1730
1731            new_line_infos.append(CheckLineInfo(filtered_line, values))
1732
1733        committed_names.update(
1734            test_var.get_variable_name(name)
1735            for (name, _), test_var in global_vars_seen.items()
1736        )
1737
1738        # Collect information about original check lines, if any.
1739        orig_line_infos = []
1740        for line in original_check_lines or []:
1741            filtered_line = ""
1742            values = []
1743            while True:
1744                m = METAVAR_RE.search(line)
1745                if m is None:
1746                    filtered_line += line
1747                    break
1748
1749                # Replace with a [[@@]] tag, but be sure to keep the spaces and commas.
1750                filtered_line += line[: m.start()] + VARIABLE_TAG
1751                line = line[m.end() :]
1752                values.append(
1753                    CheckValueInfo(
1754                        key=None,
1755                        text=None,
1756                        name=m.group(1),
1757                        prefix="",
1758                        suffix="",
1759                    )
1760                )
1761            orig_line_infos.append(CheckLineInfo(filtered_line, values))
1762
1763        # Compute the variable name mapping
1764        mapping = remap_metavar_names(orig_line_infos, new_line_infos, committed_names)
1765
1766        # Apply the variable name mapping
1767        for i, line_info in enumerate(new_line_infos):
1768            line_template = line_info.line
1769            line = ""
1770
1771            for value in line_info.values:
1772                idx = line_template.find(VARIABLE_TAG)
1773                line += line_template[:idx]
1774                line_template = line_template[idx + len(VARIABLE_TAG) :]
1775
1776                key = (value.text, value.key)
1777                if value.key == "%":
1778                    vars_dict = vars_seen
1779                else:
1780                    vars_dict = global_vars_seen
1781
1782                if key in defs:
1783                    line += vars_dict[key].get_def(
1784                        mapping[value.name], value.prefix, value.suffix
1785                    )
1786                    defs.remove(key)
1787                else:
1788                    line += vars_dict[key].get_use(
1789                        mapping[value.name], value.prefix, value.suffix
1790                    )
1791
1792            line += line_template
1793
1794            lines[i] = line
1795
1796    if ginfo.is_analyze():
1797        for i, _ in enumerate(lines):
1798            # Escape multiple {{ or }} as {{}} denotes a FileCheck regex.
1799            scrubbed_line = multiple_braces_re.sub(escape_braces, lines[i])
1800            lines[i] = scrubbed_line
1801
1802    return lines
1803
1804
1805def add_checks(
1806    output_lines,
1807    comment_marker,
1808    prefix_list,
1809    func_dict,
1810    func_name,
1811    check_label_format,
1812    ginfo,
1813    global_vars_seen_dict,
1814    is_filtered,
1815    preserve_names=False,
1816    original_check_lines: Mapping[str, List[str]] = {},
1817):
1818    # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well.
1819    prefix_exclusions = set()
1820    printed_prefixes = []
1821    for p in prefix_list:
1822        checkprefixes = p[0]
1823        # If not all checkprefixes of this run line produced the function we cannot check for it as it does not
1824        # exist for this run line. A subset of the check prefixes might know about the function but only because
1825        # other run lines created it.
1826        if any(
1827            map(
1828                lambda checkprefix: func_name not in func_dict[checkprefix],
1829                checkprefixes,
1830            )
1831        ):
1832            prefix_exclusions |= set(checkprefixes)
1833            continue
1834
1835    # prefix_exclusions is constructed, we can now emit the output
1836    for p in prefix_list:
1837        global_vars_seen = {}
1838        checkprefixes = p[0]
1839        for checkprefix in checkprefixes:
1840            if checkprefix in global_vars_seen_dict:
1841                global_vars_seen.update(global_vars_seen_dict[checkprefix])
1842            else:
1843                global_vars_seen_dict[checkprefix] = {}
1844            if checkprefix in printed_prefixes:
1845                break
1846
1847            # Check if the prefix is excluded.
1848            if checkprefix in prefix_exclusions:
1849                continue
1850
1851            # If we do not have output for this prefix we skip it.
1852            if not func_dict[checkprefix][func_name]:
1853                continue
1854
1855            # Add some space between different check prefixes, but not after the last
1856            # check line (before the test code).
1857            if ginfo.is_asm():
1858                if len(printed_prefixes) != 0:
1859                    output_lines.append(comment_marker)
1860
1861            if checkprefix not in global_vars_seen_dict:
1862                global_vars_seen_dict[checkprefix] = {}
1863
1864            global_vars_seen_before = [key for key in global_vars_seen.keys()]
1865
1866            vars_seen = {}
1867            printed_prefixes.append(checkprefix)
1868            attrs = str(func_dict[checkprefix][func_name].attrs)
1869            attrs = "" if attrs == "None" else attrs
1870            if ginfo.get_version() > 1:
1871                funcdef_attrs_and_ret = func_dict[checkprefix][
1872                    func_name
1873                ].funcdef_attrs_and_ret
1874            else:
1875                funcdef_attrs_and_ret = ""
1876
1877            if attrs:
1878                output_lines.append(
1879                    "%s %s: Function Attrs: %s" % (comment_marker, checkprefix, attrs)
1880                )
1881            args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig)
1882            if args_and_sig:
1883                args_and_sig = generalize_check_lines(
1884                    [args_and_sig],
1885                    ginfo,
1886                    vars_seen,
1887                    global_vars_seen,
1888                    preserve_names,
1889                    original_check_lines=[],
1890                )[0]
1891            func_name_separator = func_dict[checkprefix][func_name].func_name_separator
1892            if "[[" in args_and_sig:
1893                # Captures in label lines are not supported, thus split into a -LABEL
1894                # and a separate -SAME line that contains the arguments with captures.
1895                args_and_sig_prefix = ""
1896                if ginfo.get_version() >= 3 and args_and_sig.startswith("("):
1897                    # Ensure the "(" separating function name and arguments is in the
1898                    # label line. This is required in case of function names that are
1899                    # prefixes of each other. Otherwise, the label line for "foo" might
1900                    # incorrectly match on "foo.specialized".
1901                    args_and_sig_prefix = args_and_sig[0]
1902                    args_and_sig = args_and_sig[1:]
1903
1904                # Removing args_and_sig from the label match line requires
1905                # func_name_separator to be empty. Otherwise, the match will not work.
1906                assert func_name_separator == ""
1907                output_lines.append(
1908                    check_label_format
1909                    % (
1910                        checkprefix,
1911                        funcdef_attrs_and_ret,
1912                        func_name,
1913                        args_and_sig_prefix,
1914                        func_name_separator,
1915                    )
1916                )
1917                output_lines.append(
1918                    "%s %s-SAME: %s" % (comment_marker, checkprefix, args_and_sig)
1919                )
1920            else:
1921                output_lines.append(
1922                    check_label_format
1923                    % (
1924                        checkprefix,
1925                        funcdef_attrs_and_ret,
1926                        func_name,
1927                        args_and_sig,
1928                        func_name_separator,
1929                    )
1930                )
1931            func_body = str(func_dict[checkprefix][func_name]).splitlines()
1932            if not func_body:
1933                # We have filtered everything.
1934                continue
1935
1936            # For ASM output, just emit the check lines.
1937            if ginfo.is_asm():
1938                body_start = 1
1939                if is_filtered:
1940                    # For filtered output we don't add "-NEXT" so don't add extra spaces
1941                    # before the first line.
1942                    body_start = 0
1943                else:
1944                    output_lines.append(
1945                        "%s %s:       %s" % (comment_marker, checkprefix, func_body[0])
1946                    )
1947                func_lines = generalize_check_lines(
1948                    func_body[body_start:], ginfo, vars_seen, global_vars_seen
1949                )
1950                for func_line in func_lines:
1951                    if func_line.strip() == "":
1952                        output_lines.append(
1953                            "%s %s-EMPTY:" % (comment_marker, checkprefix)
1954                        )
1955                    else:
1956                        check_suffix = "-NEXT" if not is_filtered else ""
1957                        output_lines.append(
1958                            "%s %s%s:  %s"
1959                            % (comment_marker, checkprefix, check_suffix, func_line)
1960                        )
1961                # Remember new global variables we have not seen before
1962                for key in global_vars_seen:
1963                    if key not in global_vars_seen_before:
1964                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
1965                break
1966            # For analyze output, generalize the output, and emit CHECK-EMPTY lines as well.
1967            elif ginfo.is_analyze():
1968                func_body = generalize_check_lines(
1969                    func_body, ginfo, vars_seen, global_vars_seen
1970                )
1971                for func_line in func_body:
1972                    if func_line.strip() == "":
1973                        output_lines.append(
1974                            "{} {}-EMPTY:".format(comment_marker, checkprefix)
1975                        )
1976                    else:
1977                        check_suffix = "-NEXT" if not is_filtered else ""
1978                        output_lines.append(
1979                            "{} {}{}:  {}".format(
1980                                comment_marker, checkprefix, check_suffix, func_line
1981                            )
1982                        )
1983
1984                # Add space between different check prefixes and also before the first
1985                # line of code in the test function.
1986                output_lines.append(comment_marker)
1987
1988                # Remember new global variables we have not seen before
1989                for key in global_vars_seen:
1990                    if key not in global_vars_seen_before:
1991                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
1992                break
1993            # For IR output, change all defs to FileCheck variables, so we're immune
1994            # to variable naming fashions.
1995            else:
1996                func_body = generalize_check_lines(
1997                    func_body,
1998                    ginfo,
1999                    vars_seen,
2000                    global_vars_seen,
2001                    preserve_names,
2002                    original_check_lines=original_check_lines.get(checkprefix),
2003                )
2004
2005                # This could be selectively enabled with an optional invocation argument.
2006                # Disabled for now: better to check everything. Be safe rather than sorry.
2007
2008                # Handle the first line of the function body as a special case because
2009                # it's often just noise (a useless asm comment or entry label).
2010                # if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
2011                #  is_blank_line = True
2012                # else:
2013                #  output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
2014                #  is_blank_line = False
2015
2016                is_blank_line = False
2017
2018                for func_line in func_body:
2019                    if func_line.strip() == "":
2020                        is_blank_line = True
2021                        continue
2022                    # Do not waste time checking IR comments.
2023                    func_line = SCRUB_IR_COMMENT_RE.sub(r"", func_line)
2024
2025                    # Skip blank lines instead of checking them.
2026                    if is_blank_line:
2027                        output_lines.append(
2028                            "{} {}:       {}".format(
2029                                comment_marker, checkprefix, func_line
2030                            )
2031                        )
2032                    else:
2033                        check_suffix = "-NEXT" if not is_filtered else ""
2034                        output_lines.append(
2035                            "{} {}{}:  {}".format(
2036                                comment_marker, checkprefix, check_suffix, func_line
2037                            )
2038                        )
2039                    is_blank_line = False
2040
2041                # Add space between different check prefixes and also before the first
2042                # line of code in the test function.
2043                output_lines.append(comment_marker)
2044
2045                # Remember new global variables we have not seen before
2046                for key in global_vars_seen:
2047                    if key not in global_vars_seen_before:
2048                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2049                break
2050    return printed_prefixes
2051
2052
2053def add_ir_checks(
2054    output_lines,
2055    comment_marker,
2056    prefix_list,
2057    func_dict,
2058    func_name,
2059    preserve_names,
2060    function_sig,
2061    ginfo: GeneralizerInfo,
2062    global_vars_seen_dict,
2063    is_filtered,
2064    original_check_lines={},
2065):
2066    assert ginfo.is_ir()
2067    # Label format is based on IR string.
2068    if function_sig and ginfo.get_version() > 1:
2069        function_def_regex = "define %s"
2070    elif function_sig:
2071        function_def_regex = "define {{[^@]+}}%s"
2072    else:
2073        function_def_regex = "%s"
2074    check_label_format = "{} %s-LABEL: {}@%s%s%s".format(
2075        comment_marker, function_def_regex
2076    )
2077    return add_checks(
2078        output_lines,
2079        comment_marker,
2080        prefix_list,
2081        func_dict,
2082        func_name,
2083        check_label_format,
2084        ginfo,
2085        global_vars_seen_dict,
2086        is_filtered,
2087        preserve_names,
2088        original_check_lines=original_check_lines,
2089    )
2090
2091
2092def add_analyze_checks(
2093    output_lines,
2094    comment_marker,
2095    prefix_list,
2096    func_dict,
2097    func_name,
2098    ginfo: GeneralizerInfo,
2099    is_filtered,
2100):
2101    assert ginfo.is_analyze()
2102    check_label_format = "{} %s-LABEL: '%s%s%s%s'".format(comment_marker)
2103    global_vars_seen_dict = {}
2104    return add_checks(
2105        output_lines,
2106        comment_marker,
2107        prefix_list,
2108        func_dict,
2109        func_name,
2110        check_label_format,
2111        ginfo,
2112        global_vars_seen_dict,
2113        is_filtered,
2114    )
2115
2116
2117def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes, ginfo):
2118    for nameless_value in ginfo.get_nameless_values():
2119        if nameless_value.global_ir_rhs_regexp is None:
2120            continue
2121
2122        lhs_re_str = nameless_value.ir_prefix + nameless_value.ir_regexp
2123        rhs_re_str = nameless_value.global_ir_rhs_regexp
2124
2125        global_ir_value_re_str = r"^" + lhs_re_str + r"\s=\s" + rhs_re_str + r"$"
2126        global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M))
2127        lines = []
2128        for m in global_ir_value_re.finditer(raw_tool_output):
2129            # Attach the substring's start index so that CHECK lines
2130            # can be sorted properly even if they are matched by different nameless values.
2131            # This is relevant for GLOB and GLOBNAMED since they may appear interlaced.
2132            lines.append((m.start(), m.group(0)))
2133
2134        for prefix in prefixes:
2135            if glob_val_dict[prefix] is None:
2136                continue
2137            if nameless_value.check_prefix in glob_val_dict[prefix]:
2138                if lines == glob_val_dict[prefix][nameless_value.check_prefix]:
2139                    continue
2140                if prefix == prefixes[-1]:
2141                    warn("Found conflicting asm under the same prefix: %r!" % (prefix,))
2142                else:
2143                    glob_val_dict[prefix][nameless_value.check_prefix] = None
2144                    continue
2145            glob_val_dict[prefix][nameless_value.check_prefix] = lines
2146
2147
2148def filter_globals_according_to_preference(
2149    global_val_lines_w_index, global_vars_seen, nameless_value, global_check_setting
2150):
2151    if global_check_setting == "none":
2152        return []
2153    if global_check_setting == "all":
2154        return global_val_lines_w_index
2155    assert global_check_setting == "smart"
2156
2157    if nameless_value.check_key == "#":
2158        # attribute sets are usually better checked by --check-attributes
2159        return []
2160
2161    def extract(line, nv):
2162        p = (
2163            "^"
2164            + nv.ir_prefix
2165            + "("
2166            + nv.ir_regexp
2167            + ") = ("
2168            + nv.global_ir_rhs_regexp
2169            + ")"
2170        )
2171        match = re.match(p, line)
2172        return (match.group(1), re.findall(nv.ir_regexp, match.group(2)))
2173
2174    transitively_visible = set()
2175    contains_refs_to = {}
2176
2177    def add(var):
2178        nonlocal transitively_visible
2179        nonlocal contains_refs_to
2180        if var in transitively_visible:
2181            return
2182        transitively_visible.add(var)
2183        if not var in contains_refs_to:
2184            return
2185        for x in contains_refs_to[var]:
2186            add(x)
2187
2188    for i, line in global_val_lines_w_index:
2189        (var, refs) = extract(line, nameless_value)
2190        contains_refs_to[var] = refs
2191    for var, check_key in global_vars_seen:
2192        if check_key != nameless_value.check_key:
2193            continue
2194        add(var)
2195    return [
2196        (i, line)
2197        for i, line in global_val_lines_w_index
2198        if extract(line, nameless_value)[0] in transitively_visible
2199    ]
2200
2201
2202METADATA_FILTERS = [
2203    (
2204        r"(?<=\")(.+ )?(\w+ version )[\d.]+(?:[^\" ]*)(?: \([^)]+\))?",
2205        r"{{.*}}\2{{.*}}",
2206    ),  # preface with glob also, to capture optional CLANG_VENDOR
2207    (r'(!DIFile\(filename: ".+", directory: )".+"', r"\1{{.*}}"),
2208]
2209METADATA_FILTERS_RE = [(re.compile(f), r) for (f, r) in METADATA_FILTERS]
2210
2211
2212def filter_unstable_metadata(line):
2213    for f, replacement in METADATA_FILTERS_RE:
2214        line = f.sub(replacement, line)
2215    return line
2216
2217
2218def flush_current_checks(output_lines, new_lines_w_index, comment_marker):
2219    if not new_lines_w_index:
2220        return
2221    output_lines.append(comment_marker + SEPARATOR)
2222    new_lines_w_index.sort()
2223    for _, line in new_lines_w_index:
2224        output_lines.append(line)
2225    new_lines_w_index.clear()
2226
2227
2228def add_global_checks(
2229    glob_val_dict,
2230    comment_marker,
2231    prefix_list,
2232    output_lines,
2233    ginfo: GeneralizerInfo,
2234    global_vars_seen_dict,
2235    preserve_names,
2236    is_before_functions,
2237    global_check_setting,
2238):
2239    printed_prefixes = set()
2240    output_lines_loc = {}  # Allows GLOB and GLOBNAMED to be sorted correctly
2241    for nameless_value in ginfo.get_nameless_values():
2242        if nameless_value.global_ir_rhs_regexp is None:
2243            continue
2244        if nameless_value.is_before_functions != is_before_functions:
2245            continue
2246        for p in prefix_list:
2247            global_vars_seen = {}
2248            checkprefixes = p[0]
2249            if checkprefixes is None:
2250                continue
2251            for checkprefix in checkprefixes:
2252                if checkprefix in global_vars_seen_dict:
2253                    global_vars_seen.update(global_vars_seen_dict[checkprefix])
2254                else:
2255                    global_vars_seen_dict[checkprefix] = {}
2256                if (checkprefix, nameless_value.check_prefix) in printed_prefixes:
2257                    break
2258                if not glob_val_dict[checkprefix]:
2259                    continue
2260                if nameless_value.check_prefix not in glob_val_dict[checkprefix]:
2261                    continue
2262                if not glob_val_dict[checkprefix][nameless_value.check_prefix]:
2263                    continue
2264
2265                check_lines = []
2266                global_vars_seen_before = [key for key in global_vars_seen.keys()]
2267                lines_w_index = glob_val_dict[checkprefix][nameless_value.check_prefix]
2268                lines_w_index = filter_globals_according_to_preference(
2269                    lines_w_index,
2270                    global_vars_seen_before,
2271                    nameless_value,
2272                    global_check_setting,
2273                )
2274                for i, line in lines_w_index:
2275                    if _global_value_regex:
2276                        matched = False
2277                        for regex in _global_value_regex:
2278                            if re.match("^@" + regex + " = ", line) or re.match(
2279                                "^!" + regex + " = ", line
2280                            ):
2281                                matched = True
2282                                break
2283                        if not matched:
2284                            continue
2285                    [new_line] = generalize_check_lines(
2286                        [line],
2287                        ginfo,
2288                        {},
2289                        global_vars_seen,
2290                        preserve_names,
2291                        unstable_globals_only=True,
2292                    )
2293                    new_line = filter_unstable_metadata(new_line)
2294                    check_line = "%s %s: %s" % (comment_marker, checkprefix, new_line)
2295                    check_lines.append((i, check_line))
2296                if not check_lines:
2297                    continue
2298
2299                if not checkprefix in output_lines_loc:
2300                    output_lines_loc[checkprefix] = []
2301                if not nameless_value.interlaced_with_previous:
2302                    flush_current_checks(
2303                        output_lines, output_lines_loc[checkprefix], comment_marker
2304                    )
2305                for check_line in check_lines:
2306                    output_lines_loc[checkprefix].append(check_line)
2307
2308                printed_prefixes.add((checkprefix, nameless_value.check_prefix))
2309
2310                # Remembe new global variables we have not seen before
2311                for key in global_vars_seen:
2312                    if key not in global_vars_seen_before:
2313                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2314                break
2315
2316    if printed_prefixes:
2317        for p in prefix_list:
2318            if p[0] is None:
2319                continue
2320            for checkprefix in p[0]:
2321                if checkprefix not in output_lines_loc:
2322                    continue
2323                flush_current_checks(
2324                    output_lines, output_lines_loc[checkprefix], comment_marker
2325                )
2326                break
2327        output_lines.append(comment_marker + SEPARATOR)
2328    return printed_prefixes
2329
2330
2331def check_prefix(prefix):
2332    if not PREFIX_RE.match(prefix):
2333        hint = ""
2334        if "," in prefix:
2335            hint = " Did you mean '--check-prefixes=" + prefix + "'?"
2336        warn(
2337            (
2338                "Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores."
2339                + hint
2340            )
2341            % (prefix)
2342        )
2343
2344
2345def get_check_prefixes(filecheck_cmd):
2346    check_prefixes = [
2347        item
2348        for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
2349        for item in m.group(1).split(",")
2350    ]
2351    if not check_prefixes:
2352        check_prefixes = ["CHECK"]
2353    return check_prefixes
2354
2355
2356def verify_filecheck_prefixes(fc_cmd):
2357    fc_cmd_parts = fc_cmd.split()
2358    for part in fc_cmd_parts:
2359        if "check-prefix=" in part:
2360            prefix = part.split("=", 1)[1]
2361            check_prefix(prefix)
2362        elif "check-prefixes=" in part:
2363            prefixes = part.split("=", 1)[1].split(",")
2364            for prefix in prefixes:
2365                check_prefix(prefix)
2366                if prefixes.count(prefix) > 1:
2367                    warn(
2368                        "Supplied prefix '%s' is not unique in the prefix list."
2369                        % (prefix,)
2370                    )
2371
2372
2373def get_autogennote_suffix(parser, args):
2374    autogenerated_note_args = ""
2375    for action in parser._actions:
2376        if not hasattr(args, action.dest):
2377            continue  # Ignore options such as --help that aren't included in args
2378        # Ignore parameters such as paths to the binary or the list of tests
2379        if action.dest in (
2380            "tests",
2381            "update_only",
2382            "tool_binary",
2383            "opt_binary",
2384            "llc_binary",
2385            "clang",
2386            "opt",
2387            "llvm_bin",
2388            "verbose",
2389            "force_update",
2390            "reset_variable_names",
2391        ):
2392            continue
2393        value = getattr(args, action.dest)
2394        if action.dest == "check_globals":
2395            default_value = "none" if args.version < 4 else "smart"
2396            if value == default_value:
2397                continue
2398            autogenerated_note_args += action.option_strings[0] + " "
2399            if args.version < 4 and value == "all":
2400                continue
2401            autogenerated_note_args += "%s " % value
2402            continue
2403        if action.const is not None:  # action stores a constant (usually True/False)
2404            # Skip actions with different constant values (this happens with boolean
2405            # --foo/--no-foo options)
2406            if value != action.const:
2407                continue
2408        if parser.get_default(action.dest) == value:
2409            continue  # Don't add default values
2410        if action.dest == "function_signature" and args.version >= 2:
2411            continue  # Enabled by default in version 2
2412        if action.dest == "filters":
2413            # Create a separate option for each filter element.  The value is a list
2414            # of Filter objects.
2415            for elem in value:
2416                opt_name = "filter-out" if elem.is_filter_out else "filter"
2417                opt_value = elem.pattern()
2418                new_arg = '--%s "%s" ' % (opt_name, opt_value.strip('"'))
2419                if new_arg not in autogenerated_note_args:
2420                    autogenerated_note_args += new_arg
2421        else:
2422            autogenerated_note_args += action.option_strings[0] + " "
2423            if action.const is None:  # action takes a parameter
2424                if action.nargs == "+":
2425                    value = " ".join(map(lambda v: '"' + v.strip('"') + '"', value))
2426                autogenerated_note_args += "%s " % value
2427    if autogenerated_note_args:
2428        autogenerated_note_args = " %s %s" % (
2429            UTC_ARGS_KEY,
2430            autogenerated_note_args[:-1],
2431        )
2432    return autogenerated_note_args
2433
2434
2435def check_for_command(line, parser, args, argv, argparse_callback):
2436    cmd_m = UTC_ARGS_CMD.match(line)
2437    if cmd_m:
2438        for option in shlex.split(cmd_m.group("cmd").strip()):
2439            if option:
2440                argv.append(option)
2441        args = parse_args(parser, filter(lambda arg: arg not in args.tests, argv))
2442        if argparse_callback is not None:
2443            argparse_callback(args)
2444    return args, argv
2445
2446
2447def find_arg_in_test(test_info, get_arg_to_check, arg_string, is_global):
2448    result = get_arg_to_check(test_info.args)
2449    if not result and is_global:
2450        # See if this has been specified via UTC_ARGS.  This is a "global" option
2451        # that affects the entire generation of test checks.  If it exists anywhere
2452        # in the test, apply it to everything.
2453        saw_line = False
2454        for line_info in test_info.ro_iterlines():
2455            line = line_info.line
2456            if not line.startswith(";") and line.strip() != "":
2457                saw_line = True
2458            result = get_arg_to_check(line_info.args)
2459            if result:
2460                if warn and saw_line:
2461                    # We saw the option after already reading some test input lines.
2462                    # Warn about it.
2463                    print(
2464                        "WARNING: Found {} in line following test start: ".format(
2465                            arg_string
2466                        )
2467                        + line,
2468                        file=sys.stderr,
2469                    )
2470                    print(
2471                        "WARNING: Consider moving {} to top of file".format(arg_string),
2472                        file=sys.stderr,
2473                    )
2474                break
2475    return result
2476
2477
2478def dump_input_lines(output_lines, test_info, prefix_set, comment_string):
2479    for input_line_info in test_info.iterlines(output_lines):
2480        line = input_line_info.line
2481        args = input_line_info.args
2482        if line.strip() == comment_string:
2483            continue
2484        if line.strip() == comment_string + SEPARATOR:
2485            continue
2486        if line.lstrip().startswith(comment_string):
2487            m = CHECK_RE.match(line)
2488            if m and m.group(1) in prefix_set:
2489                continue
2490        output_lines.append(line.rstrip("\n"))
2491
2492
2493def add_checks_at_end(
2494    output_lines, prefix_list, func_order, comment_string, check_generator
2495):
2496    added = set()
2497    generated_prefixes = set()
2498    for prefix in prefix_list:
2499        prefixes = prefix[0]
2500        tool_args = prefix[1]
2501        for prefix in prefixes:
2502            for func in func_order[prefix]:
2503                # The func order can contain the same functions multiple times.
2504                # If we see one again we are done.
2505                if (func, prefix) in added:
2506                    continue
2507                if added:
2508                    output_lines.append(comment_string)
2509
2510                # The add_*_checks routines expect a run list whose items are
2511                # tuples that have a list of prefixes as their first element and
2512                # tool command args string as their second element.  They output
2513                # checks for each prefix in the list of prefixes.  By doing so, it
2514                # implicitly assumes that for each function every run line will
2515                # generate something for that function.  That is not the case for
2516                # generated functions as some run lines might not generate them
2517                # (e.g. -fopenmp vs. no -fopenmp).
2518                #
2519                # Therefore, pass just the prefix we're interested in.  This has
2520                # the effect of generating all of the checks for functions of a
2521                # single prefix before moving on to the next prefix.  So checks
2522                # are ordered by prefix instead of by function as in "normal"
2523                # mode.
2524                for generated_prefix in check_generator(
2525                    output_lines, [([prefix], tool_args)], func
2526                ):
2527                    added.add((func, generated_prefix))
2528                    generated_prefixes.add(generated_prefix)
2529    return generated_prefixes
2530