xref: /llvm-project/llvm/utils/UpdateTestChecks/common.py (revision 915ee0b823a528456226de513f303483d5fe0793)
1from __future__ import print_function
2
3import argparse
4import bisect
5import collections
6import copy
7import glob
8import itertools
9import os
10import re
11import subprocess
12import sys
13import shlex
14
15from typing import List, Mapping, Set
16
17##### Common utilities for update_*test_checks.py
18
19
20_verbose = False
21_prefix_filecheck_ir_name = ""
22
23"""
24Version changelog:
25
261: Initial version, used by tests that don't specify --version explicitly.
272: --function-signature is now enabled by default and also checks return
28   type/attributes.
293: Opening parenthesis of function args is kept on the first LABEL line
30   in case arguments are split to a separate SAME line.
314: --check-globals now has a third option ('smart'). The others are now called
32   'none' and 'all'. 'smart' is the default.
335: Basic block labels are matched by FileCheck expressions
34"""
35DEFAULT_VERSION = 5
36
37
38SUPPORTED_ANALYSES = {
39    "Branch Probability Analysis",
40    "Cost Model Analysis",
41    "Loop Access Analysis",
42    "Scalar Evolution Analysis",
43}
44
45
46class Regex(object):
47    """Wrap a compiled regular expression object to allow deep copy of a regexp.
48    This is required for the deep copy done in do_scrub.
49
50    """
51
52    def __init__(self, regex):
53        self.regex = regex
54
55    def __deepcopy__(self, memo):
56        result = copy.copy(self)
57        result.regex = self.regex
58        return result
59
60    def search(self, line):
61        return self.regex.search(line)
62
63    def sub(self, repl, line):
64        return self.regex.sub(repl, line)
65
66    def pattern(self):
67        return self.regex.pattern
68
69    def flags(self):
70        return self.regex.flags
71
72
73class Filter(Regex):
74    """Augment a Regex object with a flag indicating whether a match should be
75    added (!is_filter_out) or removed (is_filter_out) from the generated checks.
76
77    """
78
79    def __init__(self, regex, is_filter_out):
80        super(Filter, self).__init__(regex)
81        self.is_filter_out = is_filter_out
82
83    def __deepcopy__(self, memo):
84        result = copy.deepcopy(super(Filter, self), memo)
85        result.is_filter_out = copy.deepcopy(self.is_filter_out, memo)
86        return result
87
88
89def parse_commandline_args(parser):
90    class RegexAction(argparse.Action):
91        """Add a regular expression option value to a list of regular expressions.
92        This compiles the expression, wraps it in a Regex and adds it to the option
93        value list."""
94
95        def __init__(self, option_strings, dest, nargs=None, **kwargs):
96            if nargs is not None:
97                raise ValueError("nargs not allowed")
98            super(RegexAction, self).__init__(option_strings, dest, **kwargs)
99
100        def do_call(self, namespace, values, flags):
101            value_list = getattr(namespace, self.dest)
102            if value_list is None:
103                value_list = []
104
105            try:
106                value_list.append(Regex(re.compile(values, flags)))
107            except re.error as error:
108                raise ValueError(
109                    "{}: Invalid regular expression '{}' ({})".format(
110                        option_string, error.pattern, error.msg
111                    )
112                )
113
114            setattr(namespace, self.dest, value_list)
115
116        def __call__(self, parser, namespace, values, option_string=None):
117            self.do_call(namespace, values, 0)
118
119    class FilterAction(RegexAction):
120        """Add a filter to a list of filter option values."""
121
122        def __init__(self, option_strings, dest, nargs=None, **kwargs):
123            super(FilterAction, self).__init__(option_strings, dest, nargs, **kwargs)
124
125        def __call__(self, parser, namespace, values, option_string=None):
126            super(FilterAction, self).__call__(parser, namespace, values, option_string)
127
128            value_list = getattr(namespace, self.dest)
129
130            is_filter_out = option_string == "--filter-out"
131
132            value_list[-1] = Filter(value_list[-1].regex, is_filter_out)
133
134            setattr(namespace, self.dest, value_list)
135
136    filter_group = parser.add_argument_group(
137        "filtering",
138        """Filters are applied to each output line according to the order given. The
139    first matching filter terminates filter processing for that current line.""",
140    )
141
142    filter_group.add_argument(
143        "--filter",
144        action=FilterAction,
145        dest="filters",
146        metavar="REGEX",
147        help="Only include lines matching REGEX (may be specified multiple times)",
148    )
149    filter_group.add_argument(
150        "--filter-out",
151        action=FilterAction,
152        dest="filters",
153        metavar="REGEX",
154        help="Exclude lines matching REGEX",
155    )
156
157    parser.add_argument(
158        "--include-generated-funcs",
159        action="store_true",
160        help="Output checks for functions not in source",
161    )
162    parser.add_argument(
163        "-v", "--verbose", action="store_true", help="Show verbose output"
164    )
165    parser.add_argument(
166        "-u",
167        "--update-only",
168        action="store_true",
169        help="Only update test if it was already autogened",
170    )
171    parser.add_argument(
172        "--force-update",
173        action="store_true",
174        help="Update test even if it was autogened by a different script",
175    )
176    parser.add_argument(
177        "--enable",
178        action="store_true",
179        dest="enabled",
180        default=True,
181        help="Activate CHECK line generation from this point forward",
182    )
183    parser.add_argument(
184        "--disable",
185        action="store_false",
186        dest="enabled",
187        help="Deactivate CHECK line generation from this point forward",
188    )
189    parser.add_argument(
190        "--replace-value-regex",
191        nargs="+",
192        default=[],
193        help="List of regular expressions to replace matching value names",
194    )
195    parser.add_argument(
196        "--prefix-filecheck-ir-name",
197        default="",
198        help="Add a prefix to FileCheck IR value names to avoid conflicts with scripted names",
199    )
200    parser.add_argument(
201        "--global-value-regex",
202        nargs="+",
203        default=[],
204        help="List of regular expressions that a global value declaration must match to generate a check (has no effect if checking globals is not enabled)",
205    )
206    parser.add_argument(
207        "--global-hex-value-regex",
208        nargs="+",
209        default=[],
210        help="List of regular expressions such that, for matching global value declarations, literal integer values should be encoded in hex in the associated FileCheck directives",
211    )
212    # FIXME: in 3.9, we can use argparse.BooleanOptionalAction. At that point,
213    # we need to rename the flag to just -generate-body-for-unused-prefixes.
214    parser.add_argument(
215        "--no-generate-body-for-unused-prefixes",
216        action="store_false",
217        dest="gen_unused_prefix_body",
218        default=True,
219        help="Generate a function body that always matches for unused prefixes. This is useful when unused prefixes are desired, and it avoids needing to annotate each FileCheck as allowing them.",
220    )
221    # This is the default when regenerating existing tests. The default when
222    # generating new tests is determined by DEFAULT_VERSION.
223    parser.add_argument(
224        "--version", type=int, default=1, help="The version of output format"
225    )
226    args = parser.parse_args()
227    # TODO: This should not be handled differently from the other options
228    global _verbose, _global_value_regex, _global_hex_value_regex
229    _verbose = args.verbose
230    _global_value_regex = args.global_value_regex
231    _global_hex_value_regex = args.global_hex_value_regex
232    return args
233
234
235def parse_args(parser, argv):
236    args = parser.parse_args(argv)
237    if args.version >= 2:
238        args.function_signature = True
239    # TODO: This should not be handled differently from the other options
240    global _verbose, _global_value_regex, _global_hex_value_regex
241    _verbose = args.verbose
242    _global_value_regex = args.global_value_regex
243    _global_hex_value_regex = args.global_hex_value_regex
244    if "check_globals" in args and args.check_globals == "default":
245        args.check_globals = "none" if args.version < 4 else "smart"
246    return args
247
248
249class InputLineInfo(object):
250    def __init__(self, line, line_number, args, argv):
251        self.line = line
252        self.line_number = line_number
253        self.args = args
254        self.argv = argv
255
256
257class TestInfo(object):
258    def __init__(
259        self,
260        test,
261        parser,
262        script_name,
263        input_lines,
264        args,
265        argv,
266        comment_prefix,
267        argparse_callback,
268    ):
269        self.parser = parser
270        self.argparse_callback = argparse_callback
271        self.path = test
272        self.args = args
273        if args.prefix_filecheck_ir_name:
274            global _prefix_filecheck_ir_name
275            _prefix_filecheck_ir_name = args.prefix_filecheck_ir_name
276        self.argv = argv
277        self.input_lines = input_lines
278        self.run_lines = find_run_lines(test, self.input_lines)
279        self.comment_prefix = comment_prefix
280        if self.comment_prefix is None:
281            if self.path.endswith(".mir"):
282                self.comment_prefix = "#"
283            else:
284                self.comment_prefix = ";"
285        self.autogenerated_note_prefix = self.comment_prefix + " " + UTC_ADVERT
286        self.test_autogenerated_note = self.autogenerated_note_prefix + script_name
287        self.test_autogenerated_note += get_autogennote_suffix(parser, self.args)
288        self.test_unused_note = (
289            self.comment_prefix + self.comment_prefix + " " + UNUSED_NOTE
290        )
291
292    def ro_iterlines(self):
293        for line_num, input_line in enumerate(self.input_lines):
294            args, argv = check_for_command(
295                input_line, self.parser, self.args, self.argv, self.argparse_callback
296            )
297            yield InputLineInfo(input_line, line_num, args, argv)
298
299    def iterlines(self, output_lines):
300        output_lines.append(self.test_autogenerated_note)
301        for line_info in self.ro_iterlines():
302            input_line = line_info.line
303            # Discard any previous script advertising.
304            if input_line.startswith(self.autogenerated_note_prefix):
305                continue
306            self.args = line_info.args
307            self.argv = line_info.argv
308            if not self.args.enabled:
309                output_lines.append(input_line)
310                continue
311            yield line_info
312
313    def get_checks_for_unused_prefixes(
314        self, run_list, used_prefixes: List[str]
315    ) -> List[str]:
316        run_list = [element for element in run_list if element[0] is not None]
317        unused_prefixes = set(
318            [prefix for sublist in run_list for prefix in sublist[0]]
319        ).difference(set(used_prefixes))
320
321        ret = []
322        if not unused_prefixes:
323            return ret
324        ret.append(self.test_unused_note)
325        for unused in sorted(unused_prefixes):
326            ret.append(
327                "{comment} {prefix}: {match_everything}".format(
328                    comment=self.comment_prefix,
329                    prefix=unused,
330                    match_everything=r"""{{.*}}""",
331                )
332            )
333        return ret
334
335
336def itertests(
337    test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None
338):
339    for pattern in test_patterns:
340        # On Windows we must expand the patterns ourselves.
341        tests_list = glob.glob(pattern)
342        if not tests_list:
343            warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,))
344            continue
345        for test in tests_list:
346            with open(test) as f:
347                input_lines = [l.rstrip() for l in f]
348            first_line = input_lines[0] if input_lines else ""
349            if UTC_AVOID in first_line:
350                warn("Skipping test that must not be autogenerated: " + test)
351                continue
352            is_regenerate = UTC_ADVERT in first_line
353
354            # If we're generating a new test, set the default version to the latest.
355            argv = sys.argv[:]
356            if not is_regenerate:
357                argv.insert(1, "--version=" + str(DEFAULT_VERSION))
358
359            args = parse_args(parser, argv[1:])
360            if argparse_callback is not None:
361                argparse_callback(args)
362            if is_regenerate:
363                if script_name not in first_line and not args.force_update:
364                    warn(
365                        "Skipping test which wasn't autogenerated by " + script_name,
366                        test,
367                    )
368                    continue
369                args, argv = check_for_command(
370                    first_line, parser, args, argv, argparse_callback
371                )
372            elif args.update_only:
373                assert UTC_ADVERT not in first_line
374                warn("Skipping test which isn't autogenerated: " + test)
375                continue
376            final_input_lines = []
377            for l in input_lines:
378                if UNUSED_NOTE in l:
379                    break
380                final_input_lines.append(l)
381            yield TestInfo(
382                test,
383                parser,
384                script_name,
385                final_input_lines,
386                args,
387                argv,
388                comment_prefix,
389                argparse_callback,
390            )
391
392
393def should_add_line_to_output(
394    input_line,
395    prefix_set,
396    *,
397    skip_global_checks=False,
398    skip_same_checks=False,
399    comment_marker=";",
400):
401    # Skip any blank comment lines in the IR.
402    if not skip_global_checks and input_line.strip() == comment_marker:
403        return False
404    # Skip a special double comment line we use as a separator.
405    if input_line.strip() == comment_marker + SEPARATOR:
406        return False
407    # Skip any blank lines in the IR.
408    # if input_line.strip() == '':
409    #  return False
410    # And skip any CHECK lines. We're building our own.
411    m = CHECK_RE.match(input_line)
412    if m and m.group(1) in prefix_set:
413        if skip_same_checks and CHECK_SAME_RE.match(input_line):
414            # The previous CHECK line was removed, so don't leave this dangling
415            return False
416        if skip_global_checks:
417            # Skip checks only if they are of global value definitions
418            global_ir_value_re = re.compile(r"(\[\[|@)", flags=(re.M))
419            is_global = global_ir_value_re.search(input_line)
420            return not is_global
421        return False
422
423    return True
424
425
426def collect_original_check_lines(ti: TestInfo, prefix_set: set):
427    """
428    Collect pre-existing check lines into a dictionary `result` which is
429    returned.
430
431    result[func_name][prefix] is filled with a list of right-hand-sides of check
432    lines.
433    """
434    result = collections.defaultdict(lambda: {})
435
436    current_prefix = None
437    current_function = None
438    for input_line_info in ti.ro_iterlines():
439        input_line = input_line_info.line
440        if input_line.lstrip().startswith(";"):
441            m = CHECK_RE.match(input_line)
442            if m is not None:
443                prefix = m.group(1)
444                check_kind = m.group(2)
445                line = input_line[m.end() :].strip()
446
447                if prefix != current_prefix:
448                    current_function = None
449                    current_prefix = None
450
451                if check_kind not in ["LABEL", "SAME"]:
452                    if current_function is not None:
453                        current_function.append(line)
454                    continue
455
456                if check_kind == "SAME":
457                    continue
458
459                if check_kind == "LABEL":
460                    m = IR_FUNCTION_RE.match(line)
461                    if m is not None:
462                        func_name = m.group(1)
463                        if (
464                            ti.args.function is not None
465                            and func_name != ti.args.function
466                        ):
467                            # When filtering on a specific function, skip all others.
468                            continue
469
470                        current_prefix = prefix
471                        current_function = result[func_name][prefix] = []
472                        continue
473
474        current_function = None
475
476    return result
477
478
479# Perform lit-like substitutions
480def getSubstitutions(sourcepath):
481    sourcedir = os.path.dirname(sourcepath)
482    return [
483        ("%s", sourcepath),
484        ("%S", sourcedir),
485        ("%p", sourcedir),
486        ("%{pathsep}", os.pathsep),
487    ]
488
489
490def applySubstitutions(s, substitutions):
491    for a, b in substitutions:
492        s = s.replace(a, b)
493    return s
494
495
496# Invoke the tool that is being tested.
497def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False):
498    with open(ir) as ir_file:
499        substitutions = getSubstitutions(ir)
500
501        # TODO Remove the str form which is used by update_test_checks.py and
502        # update_llc_test_checks.py
503        # The safer list form is used by update_cc_test_checks.py
504        if preprocess_cmd:
505            # Allow pre-processing the IR file (e.g. using sed):
506            assert isinstance(
507                preprocess_cmd, str
508            )  # TODO: use a list instead of using shell
509            preprocess_cmd = applySubstitutions(preprocess_cmd, substitutions).strip()
510            if verbose:
511                print(
512                    "Pre-processing input file: ",
513                    ir,
514                    " with command '",
515                    preprocess_cmd,
516                    "'",
517                    sep="",
518                    file=sys.stderr,
519                )
520            # Python 2.7 doesn't have subprocess.DEVNULL:
521            with open(os.devnull, "w") as devnull:
522                pp = subprocess.Popen(
523                    preprocess_cmd, shell=True, stdin=devnull, stdout=subprocess.PIPE
524                )
525                ir_file = pp.stdout
526
527        if isinstance(cmd_args, list):
528            args = [applySubstitutions(a, substitutions) for a in cmd_args]
529            stdout = subprocess.check_output([exe] + args, stdin=ir_file)
530        else:
531            stdout = subprocess.check_output(
532                exe + " " + applySubstitutions(cmd_args, substitutions),
533                shell=True,
534                stdin=ir_file,
535            )
536        if sys.version_info[0] > 2:
537            # FYI, if you crashed here with a decode error, your run line probably
538            # results in bitcode or other binary format being written to the pipe.
539            # For an opt test, you probably want to add -S or -disable-output.
540            stdout = stdout.decode()
541    # Fix line endings to unix CR style.
542    return stdout.replace("\r\n", "\n")
543
544
545##### LLVM IR parser
546RUN_LINE_RE = re.compile(r"^\s*(?://|[;#])\s*RUN:\s*(.*)$")
547CHECK_PREFIX_RE = re.compile(r"--?check-prefix(?:es)?[= ](\S+)")
548PREFIX_RE = re.compile("^[a-zA-Z0-9_-]+$")
549CHECK_RE = re.compile(
550    r"^\s*(?://|[;#])\s*([^:]+?)(?:-(NEXT|NOT|DAG|LABEL|SAME|EMPTY))?:"
551)
552CHECK_SAME_RE = re.compile(r"^\s*(?://|[;#])\s*([^:]+?)(?:-SAME)?:")
553
554UTC_ARGS_KEY = "UTC_ARGS:"
555UTC_ARGS_CMD = re.compile(r".*" + UTC_ARGS_KEY + r"\s*(?P<cmd>.*)\s*$")
556UTC_ADVERT = "NOTE: Assertions have been autogenerated by "
557UTC_AVOID = "NOTE: Do not autogenerate"
558UNUSED_NOTE = "NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:"
559
560OPT_FUNCTION_RE = re.compile(
561    r"^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s():,]+?))?\s*define\s+(?P<funcdef_attrs_and_ret>[^@]*)@(?P<func>[\w.$-]+?)\s*"
562    r"(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P<body>.*?)^\}$",
563    flags=(re.M | re.S),
564)
565
566ANALYZE_FUNCTION_RE = re.compile(
567    r"^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.$-]+?)\':"
568    r"\s*\n(?P<body>.*)$",
569    flags=(re.X | re.S),
570)
571
572LOOP_PASS_DEBUG_RE = re.compile(
573    r"^\s*\'(?P<func>[\w.$-]+?)\'[^\n]*" r"\s*\n(?P<body>.*)$", flags=(re.X | re.S)
574)
575
576IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(')
577TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
578TRIPLE_ARG_RE = re.compile(r"-mtriple[= ]([^ ]+)")
579MARCH_ARG_RE = re.compile(r"-march[= ]([^ ]+)")
580DEBUG_ONLY_ARG_RE = re.compile(r"-debug-only[= ]([^ ]+)")
581
582SCRUB_LEADING_WHITESPACE_RE = re.compile(r"^(\s+)")
583SCRUB_WHITESPACE_RE = re.compile(r"(?!^(|  \w))[ \t]+", flags=re.M)
584SCRUB_PRESERVE_LEADING_WHITESPACE_RE = re.compile(r"((?!^)[ \t]*(\S))[ \t]+")
585SCRUB_TRAILING_WHITESPACE_RE = re.compile(r"[ \t]+$", flags=re.M)
586SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE
587SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(
588    r"([ \t]|(#[0-9]+))+$", flags=re.M
589)
590SCRUB_KILL_COMMENT_RE = re.compile(r"^ *#+ +kill:.*\n")
591SCRUB_LOOP_COMMENT_RE = re.compile(
592    r"# =>This Inner Loop Header:.*|# in Loop:.*", flags=re.M
593)
594SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r"(?<=\S)+[ \t]*#$", flags=re.M)
595
596SEPARATOR = "."
597
598
599def error(msg, test_file=None):
600    if test_file:
601        msg = "{}: {}".format(msg, test_file)
602    print("ERROR: {}".format(msg), file=sys.stderr)
603
604
605def warn(msg, test_file=None):
606    if test_file:
607        msg = "{}: {}".format(msg, test_file)
608    print("WARNING: {}".format(msg), file=sys.stderr)
609
610
611def debug(*args, **kwargs):
612    # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs):
613    if "file" not in kwargs:
614        kwargs["file"] = sys.stderr
615    if _verbose:
616        print(*args, **kwargs)
617
618
619def find_run_lines(test, lines):
620    debug("Scanning for RUN lines in test file:", test)
621    raw_lines = [m.group(1) for m in [RUN_LINE_RE.match(l) for l in lines] if m]
622    run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
623    for l in raw_lines[1:]:
624        if run_lines[-1].endswith("\\"):
625            run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
626        else:
627            run_lines.append(l)
628    debug("Found {} RUN lines in {}:".format(len(run_lines), test))
629    for l in run_lines:
630        debug("  RUN: {}".format(l))
631    return run_lines
632
633
634def get_triple_from_march(march):
635    triples = {
636        "amdgcn": "amdgcn",
637        "r600": "r600",
638        "mips": "mips",
639        "sparc": "sparc",
640        "hexagon": "hexagon",
641        "ve": "ve",
642    }
643    for prefix, triple in triples.items():
644        if march.startswith(prefix):
645            return triple
646    print("Cannot find a triple. Assume 'x86'", file=sys.stderr)
647    return "x86"
648
649
650def apply_filters(line, filters):
651    has_filter = False
652    for f in filters:
653        if not f.is_filter_out:
654            has_filter = True
655        if f.search(line):
656            return False if f.is_filter_out else True
657    # If we only used filter-out, keep the line, otherwise discard it since no
658    # filter matched.
659    return False if has_filter else True
660
661
662def do_filter(body, filters):
663    return (
664        body
665        if not filters
666        else "\n".join(
667            filter(lambda line: apply_filters(line, filters), body.splitlines())
668        )
669    )
670
671
672def scrub_body(body):
673    # Scrub runs of whitespace out of the assembly, but leave the leading
674    # whitespace in place.
675    body = SCRUB_PRESERVE_LEADING_WHITESPACE_RE.sub(lambda m: m.group(2) + " ", body)
676
677    # Expand the tabs used for indentation.
678    body = str.expandtabs(body, 2)
679    # Strip trailing whitespace.
680    body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r"", body)
681    return body
682
683
684def do_scrub(body, scrubber, scrubber_args, extra):
685    if scrubber_args:
686        local_args = copy.deepcopy(scrubber_args)
687        local_args[0].extra_scrub = extra
688        return scrubber(body, *local_args)
689    return scrubber(body, *scrubber_args)
690
691
692# Build up a dictionary of all the function bodies.
693class function_body(object):
694    def __init__(
695        self,
696        string,
697        extra,
698        funcdef_attrs_and_ret,
699        args_and_sig,
700        attrs,
701        func_name_separator,
702        ginfo,
703    ):
704        self.scrub = string
705        self.extrascrub = extra
706        self.funcdef_attrs_and_ret = funcdef_attrs_and_ret
707        self.args_and_sig = args_and_sig
708        self.attrs = attrs
709        self.func_name_separator = func_name_separator
710        self._ginfo = ginfo
711
712    def is_same_except_arg_names(
713        self, extrascrub, funcdef_attrs_and_ret, args_and_sig, attrs
714    ):
715        arg_names = set()
716
717        def drop_arg_names(match):
718            nameless_value = self._ginfo.get_nameless_value_from_match(match)
719            if nameless_value.check_key == "%":
720                arg_names.add(self._ginfo.get_name_from_match(match))
721                substitute = ""
722            else:
723                substitute = match.group(2)
724            return match.group(1) + substitute + match.group(match.lastindex)
725
726        def repl_arg_names(match):
727            nameless_value = self._ginfo.get_nameless_value_from_match(match)
728            if (
729                nameless_value.check_key == "%"
730                and self._ginfo.get_name_from_match(match) in arg_names
731            ):
732                return match.group(1) + match.group(match.lastindex)
733            return match.group(1) + match.group(2) + match.group(match.lastindex)
734
735        if self.funcdef_attrs_and_ret != funcdef_attrs_and_ret:
736            return False
737        if self.attrs != attrs:
738            return False
739
740        regexp = self._ginfo.get_regexp()
741        ans0 = regexp.sub(drop_arg_names, self.args_and_sig)
742        ans1 = regexp.sub(drop_arg_names, args_and_sig)
743        if ans0 != ans1:
744            return False
745        if self._ginfo.is_asm():
746            # Check without replacements, the replacements are not applied to the
747            # body for backend checks.
748            return self.extrascrub == extrascrub
749
750        es0 = regexp.sub(repl_arg_names, self.extrascrub)
751        es1 = regexp.sub(repl_arg_names, extrascrub)
752        es0 = SCRUB_IR_COMMENT_RE.sub(r"", es0)
753        es1 = SCRUB_IR_COMMENT_RE.sub(r"", es1)
754        return es0 == es1
755
756    def __str__(self):
757        return self.scrub
758
759
760class FunctionTestBuilder:
761    def __init__(self, run_list, flags, scrubber_args, path, ginfo):
762        self._verbose = flags.verbose
763        self._record_args = flags.function_signature
764        self._check_attributes = flags.check_attributes
765        # Strip double-quotes if input was read by UTC_ARGS
766        self._filters = (
767            list(
768                map(
769                    lambda f: Filter(
770                        re.compile(f.pattern().strip('"'), f.flags()), f.is_filter_out
771                    ),
772                    flags.filters,
773                )
774            )
775            if flags.filters
776            else []
777        )
778        self._scrubber_args = scrubber_args
779        self._path = path
780        self._ginfo = ginfo
781        # Strip double-quotes if input was read by UTC_ARGS
782        self._replace_value_regex = list(
783            map(lambda x: x.strip('"'), flags.replace_value_regex)
784        )
785        self._func_dict = {}
786        self._func_order = {}
787        self._global_var_dict = {}
788        self._processed_prefixes = set()
789        for tuple in run_list:
790            for prefix in tuple[0]:
791                self._func_dict.update({prefix: dict()})
792                self._func_order.update({prefix: []})
793                self._global_var_dict.update({prefix: dict()})
794
795    def finish_and_get_func_dict(self):
796        for prefix in self.get_failed_prefixes():
797            warn(
798                "Prefix %s had conflicting output from different RUN lines for all functions in test %s"
799                % (
800                    prefix,
801                    self._path,
802                )
803            )
804        return self._func_dict
805
806    def func_order(self):
807        return self._func_order
808
809    def global_var_dict(self):
810        return self._global_var_dict
811
812    def is_filtered(self):
813        return bool(self._filters)
814
815    def process_run_line(self, function_re, scrubber, raw_tool_output, prefixes):
816        build_global_values_dictionary(
817            self._global_var_dict, raw_tool_output, prefixes, self._ginfo
818        )
819        for m in function_re.finditer(raw_tool_output):
820            if not m:
821                continue
822            func = m.group("func")
823            body = m.group("body")
824            # func_name_separator is the string that is placed right after function name at the
825            # beginning of assembly function definition. In most assemblies, that is just a
826            # colon: `foo:`. But, for example, in nvptx it is a brace: `foo(`. If is_backend is
827            # False, just assume that separator is an empty string.
828            if self._ginfo.is_asm():
829                # Use ':' as default separator.
830                func_name_separator = (
831                    m.group("func_name_separator")
832                    if "func_name_separator" in m.groupdict()
833                    else ":"
834                )
835            else:
836                func_name_separator = ""
837            attrs = m.group("attrs") if self._check_attributes else ""
838            funcdef_attrs_and_ret = (
839                m.group("funcdef_attrs_and_ret") if self._record_args else ""
840            )
841            # Determine if we print arguments, the opening brace, or nothing after the
842            # function name
843            if self._record_args and "args_and_sig" in m.groupdict():
844                args_and_sig = scrub_body(m.group("args_and_sig").strip())
845            elif "args_and_sig" in m.groupdict():
846                args_and_sig = "("
847            else:
848                args_and_sig = ""
849            filtered_body = do_filter(body, self._filters)
850            scrubbed_body = do_scrub(
851                filtered_body, scrubber, self._scrubber_args, extra=False
852            )
853            scrubbed_extra = do_scrub(
854                filtered_body, scrubber, self._scrubber_args, extra=True
855            )
856            if "analysis" in m.groupdict():
857                analysis = m.group("analysis")
858                if analysis not in SUPPORTED_ANALYSES:
859                    warn("Unsupported analysis mode: %r!" % (analysis,))
860            if func.startswith("stress"):
861                # We only use the last line of the function body for stress tests.
862                scrubbed_body = "\n".join(scrubbed_body.splitlines()[-1:])
863            if self._verbose:
864                print("Processing function: " + func, file=sys.stderr)
865                for l in scrubbed_body.splitlines():
866                    print("  " + l, file=sys.stderr)
867            for prefix in prefixes:
868                # Replace function names matching the regex.
869                for regex in self._replace_value_regex:
870                    # Pattern that matches capture groups in the regex in leftmost order.
871                    group_regex = re.compile(r"\(.*?\)")
872                    # Replace function name with regex.
873                    match = re.match(regex, func)
874                    if match:
875                        func_repl = regex
876                        # Replace any capture groups with their matched strings.
877                        for g in match.groups():
878                            func_repl = group_regex.sub(
879                                re.escape(g), func_repl, count=1
880                            )
881                        func = re.sub(func_repl, "{{" + func_repl + "}}", func)
882
883                    # Replace all calls to regex matching functions.
884                    matches = re.finditer(regex, scrubbed_body)
885                    for match in matches:
886                        func_repl = regex
887                        # Replace any capture groups with their matched strings.
888                        for g in match.groups():
889                            func_repl = group_regex.sub(
890                                re.escape(g), func_repl, count=1
891                            )
892                        # Substitute function call names that match the regex with the same
893                        # capture groups set.
894                        scrubbed_body = re.sub(
895                            func_repl, "{{" + func_repl + "}}", scrubbed_body
896                        )
897
898                if func in self._func_dict[prefix]:
899                    if self._func_dict[prefix][func] is not None and (
900                        str(self._func_dict[prefix][func]) != scrubbed_body
901                        or self._func_dict[prefix][func].args_and_sig != args_and_sig
902                        or self._func_dict[prefix][func].attrs != attrs
903                        or self._func_dict[prefix][func].funcdef_attrs_and_ret
904                        != funcdef_attrs_and_ret
905                    ):
906                        if self._func_dict[prefix][func].is_same_except_arg_names(
907                            scrubbed_extra,
908                            funcdef_attrs_and_ret,
909                            args_and_sig,
910                            attrs,
911                        ):
912                            self._func_dict[prefix][func].scrub = scrubbed_extra
913                            self._func_dict[prefix][func].args_and_sig = args_and_sig
914                        else:
915                            # This means a previous RUN line produced a body for this function
916                            # that is different from the one produced by this current RUN line,
917                            # so the body can't be common across RUN lines. We use None to
918                            # indicate that.
919                            self._func_dict[prefix][func] = None
920                else:
921                    if prefix not in self._processed_prefixes:
922                        self._func_dict[prefix][func] = function_body(
923                            scrubbed_body,
924                            scrubbed_extra,
925                            funcdef_attrs_and_ret,
926                            args_and_sig,
927                            attrs,
928                            func_name_separator,
929                            self._ginfo,
930                        )
931                        self._func_order[prefix].append(func)
932                    else:
933                        # An earlier RUN line used this check prefixes but didn't produce
934                        # a body for this function. This happens in Clang tests that use
935                        # preprocesser directives to exclude individual functions from some
936                        # RUN lines.
937                        self._func_dict[prefix][func] = None
938
939    def processed_prefixes(self, prefixes):
940        """
941        Mark a set of prefixes as having had at least one applicable RUN line fully
942        processed. This is used to filter out function bodies that don't have
943        outputs for all RUN lines.
944        """
945        self._processed_prefixes.update(prefixes)
946
947    def get_failed_prefixes(self):
948        # This returns the list of those prefixes that failed to match any function,
949        # because there were conflicting bodies produced by different RUN lines, in
950        # all instances of the prefix.
951        for prefix in self._func_dict:
952            if self._func_dict[prefix] and (
953                not [
954                    fct
955                    for fct in self._func_dict[prefix]
956                    if self._func_dict[prefix][fct] is not None
957                ]
958            ):
959                yield prefix
960
961
962##### Generator of LLVM IR CHECK lines
963
964SCRUB_IR_COMMENT_RE = re.compile(r"\s*;.*")
965
966# TODO: We should also derive check lines for global, debug, loop declarations, etc..
967
968
969class NamelessValue:
970    """
971    A NamelessValue object represents a type of value in the IR whose "name" we
972    generalize in the generated check lines; where the "name" could be an actual
973    name (as in e.g. `@some_global` or `%x`) or just a number (as in e.g. `%12`
974    or `!4`).
975    """
976
977    def __init__(
978        self,
979        check_prefix,
980        check_key,
981        ir_prefix,
982        ir_regexp,
983        global_ir_rhs_regexp,
984        *,
985        is_before_functions=False,
986        is_number=False,
987        replace_number_with_counter=False,
988        match_literally=False,
989        interlaced_with_previous=False,
990        ir_suffix=r"",
991    ):
992        self.check_prefix = check_prefix
993        self.check_key = check_key
994        self.ir_prefix = ir_prefix
995        self.ir_regexp = ir_regexp
996        self.ir_suffix = ir_suffix
997        self.global_ir_rhs_regexp = global_ir_rhs_regexp
998        self.is_before_functions = is_before_functions
999        self.is_number = is_number
1000        # Some variable numbers (e.g. MCINST1234) will change based on unrelated
1001        # modifications to LLVM, replace those with an incrementing counter.
1002        self.replace_number_with_counter = replace_number_with_counter
1003        self.match_literally = match_literally
1004        self.interlaced_with_previous = interlaced_with_previous
1005        self.variable_mapping = {}
1006
1007    # Return true if this kind of IR value is defined "locally" to functions,
1008    # which we assume is only the case precisely for LLVM IR local values.
1009    def is_local_def_ir_value(self):
1010        return self.check_key == "%"
1011
1012    # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals
1013    def get_ir_regex(self):
1014        # for backwards compatibility we check locals with '.*'
1015        if self.is_local_def_ir_value():
1016            return ".*"
1017        return self.ir_regexp
1018
1019    # Create a FileCheck variable name based on an IR name.
1020    def get_value_name(self, var: str, check_prefix: str):
1021        var = var.replace("!", "")
1022        if self.replace_number_with_counter:
1023            assert var
1024            replacement = self.variable_mapping.get(var, None)
1025            if replacement is None:
1026                # Replace variable with an incrementing counter
1027                replacement = str(len(self.variable_mapping) + 1)
1028                self.variable_mapping[var] = replacement
1029            var = replacement
1030        # This is a nameless value, prepend check_prefix.
1031        if var.isdigit():
1032            var = check_prefix + var
1033        else:
1034            # This is a named value that clashes with the check_prefix, prepend with
1035            # _prefix_filecheck_ir_name, if it has been defined.
1036            if (
1037                may_clash_with_default_check_prefix_name(check_prefix, var)
1038                and _prefix_filecheck_ir_name
1039            ):
1040                var = _prefix_filecheck_ir_name + var
1041        var = var.replace(".", "_")
1042        var = var.replace("-", "_")
1043        return var.upper()
1044
1045    def get_affixes_from_match(self, match):
1046        prefix = re.match(self.ir_prefix, match.group(2)).group(0)
1047        suffix = re.search(self.ir_suffix + "$", match.group(2)).group(0)
1048        return prefix, suffix
1049
1050
1051class GeneralizerInfo:
1052    """
1053    A GeneralizerInfo object holds information about how check lines should be generalized
1054    (e.g., variable names replaced by FileCheck meta variables) as well as per-test-file
1055    state (e.g. information about IR global variables).
1056    """
1057
1058    MODE_IR = 0
1059    MODE_ASM = 1
1060    MODE_ANALYZE = 2
1061
1062    def __init__(
1063        self,
1064        version,
1065        mode,
1066        nameless_values: List[NamelessValue],
1067        regexp_prefix,
1068        regexp_suffix,
1069    ):
1070        self._version = version
1071        self._mode = mode
1072        self._nameless_values = nameless_values
1073
1074        self._regexp_prefix = regexp_prefix
1075        self._regexp_suffix = regexp_suffix
1076
1077        self._regexp, _ = self._build_regexp(False, False)
1078        (
1079            self._unstable_globals_regexp,
1080            self._unstable_globals_values,
1081        ) = self._build_regexp(True, True)
1082
1083    def _build_regexp(self, globals_only, unstable_only):
1084        matches = []
1085        values = []
1086        for nameless_value in self._nameless_values:
1087            is_global = nameless_value.global_ir_rhs_regexp is not None
1088            if globals_only and not is_global:
1089                continue
1090            if unstable_only and nameless_value.match_literally:
1091                continue
1092
1093            match = f"(?:{nameless_value.ir_prefix}({nameless_value.ir_regexp}){nameless_value.ir_suffix})"
1094            if self.is_ir() and not globals_only and is_global:
1095                match = "^" + match
1096            matches.append(match)
1097            values.append(nameless_value)
1098
1099        regexp_string = r"|".join(matches)
1100
1101        return (
1102            re.compile(
1103                self._regexp_prefix + r"(" + regexp_string + r")" + self._regexp_suffix
1104            ),
1105            values,
1106        )
1107
1108    def get_version(self):
1109        return self._version
1110
1111    def is_ir(self):
1112        return self._mode == GeneralizerInfo.MODE_IR
1113
1114    def is_asm(self):
1115        return self._mode == GeneralizerInfo.MODE_ASM
1116
1117    def is_analyze(self):
1118        return self._mode == GeneralizerInfo.MODE_ANALYZE
1119
1120    def get_nameless_values(self):
1121        return self._nameless_values
1122
1123    def get_regexp(self):
1124        return self._regexp
1125
1126    def get_unstable_globals_regexp(self):
1127        return self._unstable_globals_regexp
1128
1129    # The entire match is group 0, the prefix has one group (=1), the entire
1130    # IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start.
1131    FIRST_NAMELESS_GROUP_IN_MATCH = 3
1132
1133    def get_match_info(self, match):
1134        """
1135        Returns (name, nameless_value) for the given match object
1136        """
1137        if match.re == self._regexp:
1138            values = self._nameless_values
1139        else:
1140            match.re == self._unstable_globals_regexp
1141            values = self._unstable_globals_values
1142        for i in range(len(values)):
1143            g = match.group(i + GeneralizerInfo.FIRST_NAMELESS_GROUP_IN_MATCH)
1144            if g is not None:
1145                return g, values[i]
1146        error("Unable to identify the kind of IR value from the match!")
1147        return None, None
1148
1149    # See get_idx_from_match
1150    def get_name_from_match(self, match):
1151        return self.get_match_info(match)[0]
1152
1153    def get_nameless_value_from_match(self, match) -> NamelessValue:
1154        return self.get_match_info(match)[1]
1155
1156
1157def make_ir_generalizer(version):
1158    values = []
1159
1160    if version >= 5:
1161        values += [
1162            NamelessValue(r"BB", "%", r"label %", r"[\w$.-]+?", None),
1163            NamelessValue(r"BB", "%", r"^", r"[\w$.-]+?", None, ir_suffix=r":"),
1164        ]
1165
1166    values += [
1167        #            check_prefix   check_key  ir_prefix           ir_regexp                global_ir_rhs_regexp
1168        NamelessValue(r"TMP", "%", r"%", r"[\w$.-]+?", None),
1169        NamelessValue(r"ATTR", "#", r"#", r"[0-9]+", None),
1170        NamelessValue(r"ATTR", "#", r"attributes #", r"[0-9]+", r"{[^}]*}"),
1171        NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", None),
1172        NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", r".+", is_before_functions=True),
1173        NamelessValue(
1174            r"GLOBNAMED",
1175            "@",
1176            r"@",
1177            r"[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*",
1178            r".+",
1179            is_before_functions=True,
1180            match_literally=True,
1181            interlaced_with_previous=True,
1182        ),
1183        NamelessValue(r"DBG", "!", r"!dbg ", r"![0-9]+", None),
1184        NamelessValue(r"DIASSIGNID", "!", r"!DIAssignID ", r"![0-9]+", None),
1185        NamelessValue(r"PROF", "!", r"!prof ", r"![0-9]+", None),
1186        NamelessValue(r"TBAA", "!", r"!tbaa ", r"![0-9]+", None),
1187        NamelessValue(r"TBAA_STRUCT", "!", r"!tbaa.struct ", r"![0-9]+", None),
1188        NamelessValue(r"RNG", "!", r"!range ", r"![0-9]+", None),
1189        NamelessValue(r"LOOP", "!", r"!llvm.loop ", r"![0-9]+", None),
1190        NamelessValue(r"META", "!", r"", r"![0-9]+", r"(?:distinct |)!.*"),
1191        NamelessValue(r"ACC_GRP", "!", r"!llvm.access.group ", r"![0-9]+", None),
1192        NamelessValue(r"META", "!", r"![a-z.]+ ", r"![0-9]+", None),
1193        NamelessValue(r"META", "!", r"[, (]", r"![0-9]+", None),
1194    ]
1195
1196    prefix = r"(\s*)"
1197    suffix = r"([,\s\(\)\}]|\Z)"
1198
1199    # values = [
1200    #     nameless_value
1201    #     for nameless_value in IR_NAMELESS_VALUES
1202    #     if not (globals_only and nameless_value.global_ir_rhs_regexp is None) and
1203    #        not (unstable_ids_only and nameless_value.match_literally)
1204    # ]
1205
1206    return GeneralizerInfo(version, GeneralizerInfo.MODE_IR, values, prefix, suffix)
1207
1208
1209def make_asm_generalizer(version):
1210    values = [
1211        NamelessValue(
1212            r"MCINST",
1213            "Inst#",
1214            "<MCInst #",
1215            r"\d+",
1216            r".+",
1217            is_number=True,
1218            replace_number_with_counter=True,
1219        ),
1220        NamelessValue(
1221            r"MCREG",
1222            "Reg:",
1223            "<MCOperand Reg:",
1224            r"\d+",
1225            r".+",
1226            is_number=True,
1227            replace_number_with_counter=True,
1228        ),
1229    ]
1230
1231    prefix = r"((?:#|//)\s*)"
1232    suffix = r"([>\s]|\Z)"
1233
1234    return GeneralizerInfo(version, GeneralizerInfo.MODE_ASM, values, prefix, suffix)
1235
1236
1237def make_analyze_generalizer(version):
1238    values = [
1239        NamelessValue(
1240            r"GRP",
1241            "#",
1242            r"",
1243            r"0x[0-9a-f]+",
1244            None,
1245            replace_number_with_counter=True,
1246        ),
1247    ]
1248
1249    prefix = r"(\s*)"
1250    suffix = r"(\)?:)"
1251
1252    return GeneralizerInfo(
1253        version, GeneralizerInfo.MODE_ANALYZE, values, prefix, suffix
1254    )
1255
1256
1257# Return true if var clashes with the scripted FileCheck check_prefix.
1258def may_clash_with_default_check_prefix_name(check_prefix, var):
1259    return check_prefix and re.match(
1260        r"^" + check_prefix + r"[0-9]+?$", var, re.IGNORECASE
1261    )
1262
1263
1264def find_diff_matching(lhs: List[str], rhs: List[str]) -> List[tuple]:
1265    """
1266    Find a large ordered matching between strings in lhs and rhs.
1267
1268    Think of this as finding the *unchanged* lines in a diff, where the entries
1269    of lhs and rhs are lines of the files being diffed.
1270
1271    Returns a list of matched (lhs_idx, rhs_idx) pairs.
1272    """
1273
1274    if not lhs or not rhs:
1275        return []
1276
1277    # Collect matches in reverse order.
1278    matches = []
1279
1280    # First, collect a set of candidate matching edges. We limit this to a
1281    # constant multiple of the input size to avoid quadratic runtime.
1282    patterns = collections.defaultdict(lambda: ([], []))
1283
1284    for idx in range(len(lhs)):
1285        patterns[lhs[idx]][0].append(idx)
1286    for idx in range(len(rhs)):
1287        patterns[rhs[idx]][1].append(idx)
1288
1289    multiple_patterns = []
1290
1291    candidates = []
1292    for pattern in patterns.values():
1293        if not pattern[0] or not pattern[1]:
1294            continue
1295
1296        if len(pattern[0]) == len(pattern[1]) == 1:
1297            candidates.append((pattern[0][0], pattern[1][0]))
1298        else:
1299            multiple_patterns.append(pattern)
1300
1301    multiple_patterns.sort(key=lambda pattern: len(pattern[0]) * len(pattern[1]))
1302
1303    for pattern in multiple_patterns:
1304        if len(candidates) + len(pattern[0]) * len(pattern[1]) > 2 * (
1305            len(lhs) + len(rhs)
1306        ):
1307            break
1308        for lhs_idx in pattern[0]:
1309            for rhs_idx in pattern[1]:
1310                candidates.append((lhs_idx, rhs_idx))
1311
1312    if not candidates:
1313        # The LHS and RHS either share nothing in common, or lines are just too
1314        # identical. In that case, let's give up and not match anything.
1315        return []
1316
1317    # Compute a maximal crossing-free matching via an algorithm that is
1318    # inspired by a mixture of dynamic programming and line-sweeping in
1319    # discrete geometry.
1320    #
1321    # I would be surprised if this algorithm didn't exist somewhere in the
1322    # literature, but I found it without consciously recalling any
1323    # references, so you'll have to make do with the explanation below.
1324    # Sorry.
1325    #
1326    # The underlying graph is bipartite:
1327    #  - nodes on the LHS represent lines in the original check
1328    #  - nodes on the RHS represent lines in the new (updated) check
1329    #
1330    # Nodes are implicitly sorted by the corresponding line number.
1331    # Edges (unique_matches) are sorted by the line number on the LHS.
1332    #
1333    # Here's the geometric intuition for the algorithm.
1334    #
1335    #  * Plot the edges as points in the plane, with the original line
1336    #    number on the X axis and the updated line number on the Y axis.
1337    #  * The goal is to find a longest "chain" of points where each point
1338    #    is strictly above and to the right of the previous point.
1339    #  * The algorithm proceeds by sweeping a vertical line from left to
1340    #    right.
1341    #  * The algorithm maintains a table where `table[N]` answers the
1342    #    question "What is currently the 'best' way to build a chain of N+1
1343    #    points to the left of the vertical line". Here, 'best' means
1344    #    that the last point of the chain is a as low as possible (minimal
1345    #    Y coordinate).
1346    #   * `table[N]` is `(y, point_idx)` where `point_idx` is the index of
1347    #     the last point in the chain and `y` is its Y coordinate
1348    #   * A key invariant is that the Y values in the table are
1349    #     monotonically increasing
1350    #  * Thanks to these properties, the table can be used to answer the
1351    #    question "What is the longest chain that can be built to the left
1352    #    of the vertical line using only points below a certain Y value",
1353    #    using a binary search over the table.
1354    #  * The algorithm also builds a backlink structure in which every point
1355    #    links back to the previous point on a best (longest) chain ending
1356    #    at that point
1357    #
1358    # The core loop of the algorithm sweeps the line and updates the table
1359    # and backlink structure for every point that we cross during the sweep.
1360    # Therefore, the algorithm is trivially O(M log M) in the number of
1361    # points.
1362    candidates.sort(key=lambda candidate: (candidate[0], -candidate[1]))
1363
1364    backlinks = []
1365    table_rhs_idx = []
1366    table_candidate_idx = []
1367    for _, rhs_idx in candidates:
1368        candidate_idx = len(backlinks)
1369        ti = bisect.bisect_left(table_rhs_idx, rhs_idx)
1370
1371        # Update the table to record a best chain ending in the current point.
1372        # There always is one, and if any of the previously visited points had
1373        # a higher Y coordinate, then there is always a previously recorded best
1374        # chain that can be improved upon by using the current point.
1375        #
1376        # There is only one case where there is some ambiguity. If the
1377        # pre-existing entry table[ti] has the same Y coordinate / rhs_idx as
1378        # the current point (this can only happen if the same line appeared
1379        # multiple times on the LHS), then we could choose to keep the
1380        # previously recorded best chain instead. That would bias the algorithm
1381        # differently but should have no systematic impact on the quality of the
1382        # result.
1383        if ti < len(table_rhs_idx):
1384            table_rhs_idx[ti] = rhs_idx
1385            table_candidate_idx[ti] = candidate_idx
1386        else:
1387            table_rhs_idx.append(rhs_idx)
1388            table_candidate_idx.append(candidate_idx)
1389        if ti > 0:
1390            backlinks.append(table_candidate_idx[ti - 1])
1391        else:
1392            backlinks.append(None)
1393
1394    # Commit to names in the matching by walking the backlinks. Recursively
1395    # attempt to fill in more matches in-betweem.
1396    match_idx = table_candidate_idx[-1]
1397    while match_idx is not None:
1398        current = candidates[match_idx]
1399        matches.append(current)
1400        match_idx = backlinks[match_idx]
1401
1402    matches.reverse()
1403    return matches
1404
1405
1406VARIABLE_TAG = "[[@@]]"
1407METAVAR_RE = re.compile(r"\[\[([A-Z0-9_]+)(?::[^]]+)?\]\]")
1408NUMERIC_SUFFIX_RE = re.compile(r"[0-9]*$")
1409
1410
1411class TestVar:
1412    def __init__(self, nameless_value: NamelessValue, prefix: str, suffix: str):
1413        self._nameless_value = nameless_value
1414
1415        self._prefix = prefix
1416        self._suffix = suffix
1417
1418    def seen(self, nameless_value: NamelessValue, prefix: str, suffix: str):
1419        if prefix != self._prefix:
1420            self._prefix = ""
1421        if suffix != self._suffix:
1422            self._suffix = ""
1423
1424    def get_variable_name(self, text):
1425        return self._nameless_value.get_value_name(
1426            text, self._nameless_value.check_prefix
1427        )
1428
1429    def get_def(self, name, prefix, suffix):
1430        if self._nameless_value.is_number:
1431            return f"{prefix}[[#{name}:]]{suffix}"
1432        if self._prefix:
1433            assert self._prefix == prefix
1434            prefix = ""
1435        if self._suffix:
1436            assert self._suffix == suffix
1437            suffix = ""
1438        return f"{prefix}[[{name}:{self._prefix}{self._nameless_value.get_ir_regex()}{self._suffix}]]{suffix}"
1439
1440    def get_use(self, name, prefix, suffix):
1441        if self._nameless_value.is_number:
1442            return f"{prefix}[[#{name}]]{suffix}"
1443        if self._prefix:
1444            assert self._prefix == prefix
1445            prefix = ""
1446        if self._suffix:
1447            assert self._suffix == suffix
1448            suffix = ""
1449        return f"{prefix}[[{name}]]{suffix}"
1450
1451
1452class CheckValueInfo:
1453    def __init__(
1454        self,
1455        key,
1456        text,
1457        name: str,
1458        prefix: str,
1459        suffix: str,
1460    ):
1461        # Key for the value, e.g. '%'
1462        self.key = key
1463
1464        # Text to be matched by the FileCheck variable (without any prefix or suffix)
1465        self.text = text
1466
1467        # Name of the FileCheck variable
1468        self.name = name
1469
1470        # Prefix and suffix that were captured by the NamelessValue regular expression
1471        self.prefix = prefix
1472        self.suffix = suffix
1473
1474
1475# Represent a check line in a way that allows us to compare check lines while
1476# ignoring some or all of the FileCheck variable names.
1477class CheckLineInfo:
1478    def __init__(self, line, values):
1479        # Line with all FileCheck variable name occurrences replaced by VARIABLE_TAG
1480        self.line: str = line
1481
1482        # Information on each FileCheck variable name occurrences in the line
1483        self.values: List[CheckValueInfo] = values
1484
1485    def __repr__(self):
1486        return f"CheckLineInfo(line={self.line}, self.values={self.values})"
1487
1488
1489def remap_metavar_names(
1490    old_line_infos: List[CheckLineInfo],
1491    new_line_infos: List[CheckLineInfo],
1492    committed_names: Set[str],
1493) -> Mapping[str, str]:
1494    """
1495    Map all FileCheck variable names that appear in new_line_infos to new
1496    FileCheck variable names in an attempt to reduce the diff from old_line_infos
1497    to new_line_infos.
1498
1499    This is done by:
1500    * Matching old check lines and new check lines using a diffing algorithm
1501      applied after replacing names with wildcards.
1502    * Committing to variable names such that the matched lines become equal
1503      (without wildcards) if possible
1504    * This is done recursively to handle cases where many lines are equal
1505      after wildcard replacement
1506    """
1507    # Initialize uncommitted identity mappings
1508    new_mapping = {}
1509    for line in new_line_infos:
1510        for value in line.values:
1511            new_mapping[value.name] = value.name
1512
1513    # Recursively commit to the identity mapping or find a better one
1514    def recurse(old_begin, old_end, new_begin, new_end):
1515        if old_begin == old_end or new_begin == new_end:
1516            return
1517
1518        # Find a matching of lines where uncommitted names are replaced
1519        # with a placeholder.
1520        def diffify_line(line, mapper):
1521            values = []
1522            for value in line.values:
1523                mapped = mapper(value.name)
1524                values.append(mapped if mapped in committed_names else "?")
1525            return line.line.strip() + " @@@ " + " @ ".join(values)
1526
1527        lhs_lines = [
1528            diffify_line(line, lambda x: x)
1529            for line in old_line_infos[old_begin:old_end]
1530        ]
1531        rhs_lines = [
1532            diffify_line(line, lambda x: new_mapping[x])
1533            for line in new_line_infos[new_begin:new_end]
1534        ]
1535
1536        candidate_matches = find_diff_matching(lhs_lines, rhs_lines)
1537
1538        # Apply commits greedily on a match-by-match basis
1539        matches = [(-1, -1)]
1540        committed_anything = False
1541        for lhs_idx, rhs_idx in candidate_matches:
1542            lhs_line = old_line_infos[lhs_idx]
1543            rhs_line = new_line_infos[rhs_idx]
1544
1545            local_commits = {}
1546
1547            for lhs_value, rhs_value in zip(lhs_line.values, rhs_line.values):
1548                if new_mapping[rhs_value.name] in committed_names:
1549                    # The new value has already been committed. If it was mapped
1550                    # to the same name as the original value, we can consider
1551                    # committing other values from this line. Otherwise, we
1552                    # should ignore this line.
1553                    if new_mapping[rhs_value.name] == lhs_value.name:
1554                        continue
1555                    else:
1556                        break
1557
1558                if rhs_value.name in local_commits:
1559                    # Same, but for a possible commit happening on the same line
1560                    if local_commits[rhs_value.name] == lhs_value.name:
1561                        continue
1562                    else:
1563                        break
1564
1565                if lhs_value.name in committed_names:
1566                    # We can't map this value because the name we would map it to has already been
1567                    # committed for something else. Give up on this line.
1568                    break
1569
1570                local_commits[rhs_value.name] = lhs_value.name
1571            else:
1572                # No reason not to add any commitments for this line
1573                for rhs_var, lhs_var in local_commits.items():
1574                    new_mapping[rhs_var] = lhs_var
1575                    committed_names.add(lhs_var)
1576                    committed_anything = True
1577
1578                    if (
1579                        lhs_var != rhs_var
1580                        and lhs_var in new_mapping
1581                        and new_mapping[lhs_var] == lhs_var
1582                    ):
1583                        new_mapping[lhs_var] = "conflict_" + lhs_var
1584
1585                matches.append((lhs_idx, rhs_idx))
1586
1587        matches.append((old_end, new_end))
1588
1589        # Recursively handle sequences between matches
1590        if committed_anything:
1591            for (lhs_prev, rhs_prev), (lhs_next, rhs_next) in zip(matches, matches[1:]):
1592                recurse(lhs_prev + 1, lhs_next, rhs_prev + 1, rhs_next)
1593
1594    recurse(0, len(old_line_infos), 0, len(new_line_infos))
1595
1596    # Commit to remaining names and resolve conflicts
1597    for new_name, mapped_name in new_mapping.items():
1598        if mapped_name in committed_names:
1599            continue
1600        if not mapped_name.startswith("conflict_"):
1601            assert mapped_name == new_name
1602            committed_names.add(mapped_name)
1603
1604    for new_name, mapped_name in new_mapping.items():
1605        if mapped_name in committed_names:
1606            continue
1607        assert mapped_name.startswith("conflict_")
1608
1609        m = NUMERIC_SUFFIX_RE.search(new_name)
1610        base_name = new_name[: m.start()]
1611        suffix = int(new_name[m.start() :]) if m.start() != m.end() else 1
1612        while True:
1613            candidate = f"{base_name}{suffix}"
1614            if candidate not in committed_names:
1615                new_mapping[new_name] = candidate
1616                committed_names.add(candidate)
1617                break
1618            suffix += 1
1619
1620    return new_mapping
1621
1622
1623def generalize_check_lines(
1624    lines,
1625    ginfo: GeneralizerInfo,
1626    vars_seen,
1627    global_vars_seen,
1628    preserve_names=False,
1629    original_check_lines=None,
1630    *,
1631    unstable_globals_only=False,
1632):
1633    if unstable_globals_only:
1634        regexp = ginfo.get_unstable_globals_regexp()
1635    else:
1636        regexp = ginfo.get_regexp()
1637
1638    multiple_braces_re = re.compile(r"({{+)|(}}+)")
1639
1640    def escape_braces(match_obj):
1641        return "{{" + re.escape(match_obj.group(0)) + "}}"
1642
1643    if ginfo.is_ir():
1644        for i, line in enumerate(lines):
1645            # An IR variable named '%.' matches the FileCheck regex string.
1646            line = line.replace("%.", "%dot")
1647            for regex in _global_hex_value_regex:
1648                if re.match("^@" + regex + " = ", line):
1649                    line = re.sub(
1650                        r"\bi([0-9]+) ([0-9]+)",
1651                        lambda m: "i"
1652                        + m.group(1)
1653                        + " [[#"
1654                        + hex(int(m.group(2)))
1655                        + "]]",
1656                        line,
1657                    )
1658                    break
1659            # Ignore any comments, since the check lines will too.
1660            scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line)
1661            lines[i] = scrubbed_line
1662
1663    if not preserve_names:
1664        committed_names = set(
1665            test_var.get_variable_name(name)
1666            for (name, _), test_var in vars_seen.items()
1667        )
1668        defs = set()
1669
1670        # Collect information about new check lines, and generalize global reference
1671        new_line_infos = []
1672        for line in lines:
1673            filtered_line = ""
1674            values = []
1675            while True:
1676                m = regexp.search(line)
1677                if m is None:
1678                    filtered_line += line
1679                    break
1680
1681                name = ginfo.get_name_from_match(m)
1682                nameless_value = ginfo.get_nameless_value_from_match(m)
1683                prefix, suffix = nameless_value.get_affixes_from_match(m)
1684                if may_clash_with_default_check_prefix_name(
1685                    nameless_value.check_prefix, name
1686                ):
1687                    warn(
1688                        "Change IR value name '%s' or use --prefix-filecheck-ir-name to prevent possible conflict"
1689                        " with scripted FileCheck name." % (name,)
1690                    )
1691
1692                # Record the variable as seen and (for locals) accumulate
1693                # prefixes/suffixes
1694                is_local_def = nameless_value.is_local_def_ir_value()
1695                if is_local_def:
1696                    vars_dict = vars_seen
1697                else:
1698                    vars_dict = global_vars_seen
1699
1700                key = (name, nameless_value.check_key)
1701
1702                if is_local_def:
1703                    test_prefix = prefix
1704                    test_suffix = suffix
1705                else:
1706                    test_prefix = ""
1707                    test_suffix = ""
1708
1709                if key in vars_dict:
1710                    vars_dict[key].seen(nameless_value, test_prefix, test_suffix)
1711                else:
1712                    vars_dict[key] = TestVar(nameless_value, test_prefix, test_suffix)
1713                    defs.add(key)
1714
1715                var = vars_dict[key].get_variable_name(name)
1716
1717                # Replace with a [[@@]] tag, but be sure to keep the spaces and commas.
1718                filtered_line += (
1719                    line[: m.start()] + m.group(1) + VARIABLE_TAG + m.group(m.lastindex)
1720                )
1721                line = line[m.end() :]
1722
1723                values.append(
1724                    CheckValueInfo(
1725                        key=nameless_value.check_key,
1726                        text=name,
1727                        name=var,
1728                        prefix=prefix,
1729                        suffix=suffix,
1730                    )
1731                )
1732
1733            new_line_infos.append(CheckLineInfo(filtered_line, values))
1734
1735        committed_names.update(
1736            test_var.get_variable_name(name)
1737            for (name, _), test_var in global_vars_seen.items()
1738        )
1739
1740        # Collect information about original check lines, if any.
1741        orig_line_infos = []
1742        for line in original_check_lines or []:
1743            filtered_line = ""
1744            values = []
1745            while True:
1746                m = METAVAR_RE.search(line)
1747                if m is None:
1748                    filtered_line += line
1749                    break
1750
1751                # Replace with a [[@@]] tag, but be sure to keep the spaces and commas.
1752                filtered_line += line[: m.start()] + VARIABLE_TAG
1753                line = line[m.end() :]
1754                values.append(
1755                    CheckValueInfo(
1756                        key=None,
1757                        text=None,
1758                        name=m.group(1),
1759                        prefix="",
1760                        suffix="",
1761                    )
1762                )
1763            orig_line_infos.append(CheckLineInfo(filtered_line, values))
1764
1765        # Compute the variable name mapping
1766        mapping = remap_metavar_names(orig_line_infos, new_line_infos, committed_names)
1767
1768        # Apply the variable name mapping
1769        for i, line_info in enumerate(new_line_infos):
1770            line_template = line_info.line
1771            line = ""
1772
1773            for value in line_info.values:
1774                idx = line_template.find(VARIABLE_TAG)
1775                line += line_template[:idx]
1776                line_template = line_template[idx + len(VARIABLE_TAG) :]
1777
1778                key = (value.text, value.key)
1779                if value.key == "%":
1780                    vars_dict = vars_seen
1781                else:
1782                    vars_dict = global_vars_seen
1783
1784                if key in defs:
1785                    line += vars_dict[key].get_def(
1786                        mapping[value.name], value.prefix, value.suffix
1787                    )
1788                    defs.remove(key)
1789                else:
1790                    line += vars_dict[key].get_use(
1791                        mapping[value.name], value.prefix, value.suffix
1792                    )
1793
1794            line += line_template
1795
1796            lines[i] = line
1797
1798    if ginfo.is_analyze():
1799        for i, _ in enumerate(lines):
1800            # Escape multiple {{ or }} as {{}} denotes a FileCheck regex.
1801            scrubbed_line = multiple_braces_re.sub(escape_braces, lines[i])
1802            lines[i] = scrubbed_line
1803
1804    return lines
1805
1806
1807def add_checks(
1808    output_lines,
1809    comment_marker,
1810    prefix_list,
1811    func_dict,
1812    func_name,
1813    check_label_format,
1814    ginfo,
1815    global_vars_seen_dict,
1816    is_filtered,
1817    preserve_names=False,
1818    original_check_lines: Mapping[str, List[str]] = {},
1819):
1820    # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well.
1821    prefix_exclusions = set()
1822    printed_prefixes = []
1823    for p in prefix_list:
1824        checkprefixes = p[0]
1825        # If not all checkprefixes of this run line produced the function we cannot check for it as it does not
1826        # exist for this run line. A subset of the check prefixes might know about the function but only because
1827        # other run lines created it.
1828        if any(
1829            map(
1830                lambda checkprefix: func_name not in func_dict[checkprefix],
1831                checkprefixes,
1832            )
1833        ):
1834            prefix_exclusions |= set(checkprefixes)
1835            continue
1836
1837    # prefix_exclusions is constructed, we can now emit the output
1838    for p in prefix_list:
1839        global_vars_seen = {}
1840        checkprefixes = p[0]
1841        for checkprefix in checkprefixes:
1842            if checkprefix in global_vars_seen_dict:
1843                global_vars_seen.update(global_vars_seen_dict[checkprefix])
1844            else:
1845                global_vars_seen_dict[checkprefix] = {}
1846            if checkprefix in printed_prefixes:
1847                break
1848
1849            # Check if the prefix is excluded.
1850            if checkprefix in prefix_exclusions:
1851                continue
1852
1853            # If we do not have output for this prefix we skip it.
1854            if not func_dict[checkprefix][func_name]:
1855                continue
1856
1857            # Add some space between different check prefixes, but not after the last
1858            # check line (before the test code).
1859            if ginfo.is_asm():
1860                if len(printed_prefixes) != 0:
1861                    output_lines.append(comment_marker)
1862
1863            if checkprefix not in global_vars_seen_dict:
1864                global_vars_seen_dict[checkprefix] = {}
1865
1866            global_vars_seen_before = [key for key in global_vars_seen.keys()]
1867
1868            vars_seen = {}
1869            printed_prefixes.append(checkprefix)
1870            attrs = str(func_dict[checkprefix][func_name].attrs)
1871            attrs = "" if attrs == "None" else attrs
1872            if ginfo.get_version() > 1:
1873                funcdef_attrs_and_ret = func_dict[checkprefix][
1874                    func_name
1875                ].funcdef_attrs_and_ret
1876            else:
1877                funcdef_attrs_and_ret = ""
1878
1879            if attrs:
1880                output_lines.append(
1881                    "%s %s: Function Attrs: %s" % (comment_marker, checkprefix, attrs)
1882                )
1883            args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig)
1884            if args_and_sig:
1885                args_and_sig = generalize_check_lines(
1886                    [args_and_sig],
1887                    ginfo,
1888                    vars_seen,
1889                    global_vars_seen,
1890                    preserve_names,
1891                    original_check_lines=[],
1892                )[0]
1893            func_name_separator = func_dict[checkprefix][func_name].func_name_separator
1894            if "[[" in args_and_sig:
1895                # Captures in label lines are not supported, thus split into a -LABEL
1896                # and a separate -SAME line that contains the arguments with captures.
1897                args_and_sig_prefix = ""
1898                if ginfo.get_version() >= 3 and args_and_sig.startswith("("):
1899                    # Ensure the "(" separating function name and arguments is in the
1900                    # label line. This is required in case of function names that are
1901                    # prefixes of each other. Otherwise, the label line for "foo" might
1902                    # incorrectly match on "foo.specialized".
1903                    args_and_sig_prefix = args_and_sig[0]
1904                    args_and_sig = args_and_sig[1:]
1905
1906                # Removing args_and_sig from the label match line requires
1907                # func_name_separator to be empty. Otherwise, the match will not work.
1908                assert func_name_separator == ""
1909                output_lines.append(
1910                    check_label_format
1911                    % (
1912                        checkprefix,
1913                        funcdef_attrs_and_ret,
1914                        func_name,
1915                        args_and_sig_prefix,
1916                        func_name_separator,
1917                    )
1918                )
1919                output_lines.append(
1920                    "%s %s-SAME: %s" % (comment_marker, checkprefix, args_and_sig)
1921                )
1922            else:
1923                output_lines.append(
1924                    check_label_format
1925                    % (
1926                        checkprefix,
1927                        funcdef_attrs_and_ret,
1928                        func_name,
1929                        args_and_sig,
1930                        func_name_separator,
1931                    )
1932                )
1933            func_body = str(func_dict[checkprefix][func_name]).splitlines()
1934            if not func_body:
1935                # We have filtered everything.
1936                continue
1937
1938            # For ASM output, just emit the check lines.
1939            if ginfo.is_asm():
1940                body_start = 1
1941                if is_filtered:
1942                    # For filtered output we don't add "-NEXT" so don't add extra spaces
1943                    # before the first line.
1944                    body_start = 0
1945                else:
1946                    output_lines.append(
1947                        "%s %s:       %s" % (comment_marker, checkprefix, func_body[0])
1948                    )
1949                func_lines = generalize_check_lines(
1950                    func_body[body_start:], ginfo, vars_seen, global_vars_seen
1951                )
1952                for func_line in func_lines:
1953                    if func_line.strip() == "":
1954                        output_lines.append(
1955                            "%s %s-EMPTY:" % (comment_marker, checkprefix)
1956                        )
1957                    else:
1958                        check_suffix = "-NEXT" if not is_filtered else ""
1959                        output_lines.append(
1960                            "%s %s%s:  %s"
1961                            % (comment_marker, checkprefix, check_suffix, func_line)
1962                        )
1963                # Remember new global variables we have not seen before
1964                for key in global_vars_seen:
1965                    if key not in global_vars_seen_before:
1966                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
1967                break
1968            # For analyze output, generalize the output, and emit CHECK-EMPTY lines as well.
1969            elif ginfo.is_analyze():
1970                func_body = generalize_check_lines(
1971                    func_body, ginfo, vars_seen, global_vars_seen
1972                )
1973                for func_line in func_body:
1974                    if func_line.strip() == "":
1975                        output_lines.append(
1976                            "{} {}-EMPTY:".format(comment_marker, checkprefix)
1977                        )
1978                    else:
1979                        check_suffix = "-NEXT" if not is_filtered else ""
1980                        output_lines.append(
1981                            "{} {}{}:  {}".format(
1982                                comment_marker, checkprefix, check_suffix, func_line
1983                            )
1984                        )
1985
1986                # Add space between different check prefixes and also before the first
1987                # line of code in the test function.
1988                output_lines.append(comment_marker)
1989
1990                # Remember new global variables we have not seen before
1991                for key in global_vars_seen:
1992                    if key not in global_vars_seen_before:
1993                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
1994                break
1995            # For IR output, change all defs to FileCheck variables, so we're immune
1996            # to variable naming fashions.
1997            else:
1998                func_body = generalize_check_lines(
1999                    func_body,
2000                    ginfo,
2001                    vars_seen,
2002                    global_vars_seen,
2003                    preserve_names,
2004                    original_check_lines=original_check_lines.get(checkprefix),
2005                )
2006
2007                # This could be selectively enabled with an optional invocation argument.
2008                # Disabled for now: better to check everything. Be safe rather than sorry.
2009
2010                # Handle the first line of the function body as a special case because
2011                # it's often just noise (a useless asm comment or entry label).
2012                # if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
2013                #  is_blank_line = True
2014                # else:
2015                #  output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
2016                #  is_blank_line = False
2017
2018                is_blank_line = False
2019
2020                for func_line in func_body:
2021                    if func_line.strip() == "":
2022                        is_blank_line = True
2023                        continue
2024                    # Do not waste time checking IR comments.
2025                    func_line = SCRUB_IR_COMMENT_RE.sub(r"", func_line)
2026
2027                    # Skip blank lines instead of checking them.
2028                    if is_blank_line:
2029                        output_lines.append(
2030                            "{} {}:       {}".format(
2031                                comment_marker, checkprefix, func_line
2032                            )
2033                        )
2034                    else:
2035                        check_suffix = "-NEXT" if not is_filtered else ""
2036                        output_lines.append(
2037                            "{} {}{}:  {}".format(
2038                                comment_marker, checkprefix, check_suffix, func_line
2039                            )
2040                        )
2041                    is_blank_line = False
2042
2043                # Add space between different check prefixes and also before the first
2044                # line of code in the test function.
2045                output_lines.append(comment_marker)
2046
2047                # Remember new global variables we have not seen before
2048                for key in global_vars_seen:
2049                    if key not in global_vars_seen_before:
2050                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2051                break
2052    return printed_prefixes
2053
2054
2055def add_ir_checks(
2056    output_lines,
2057    comment_marker,
2058    prefix_list,
2059    func_dict,
2060    func_name,
2061    preserve_names,
2062    function_sig,
2063    ginfo: GeneralizerInfo,
2064    global_vars_seen_dict,
2065    is_filtered,
2066    original_check_lines={},
2067):
2068    assert ginfo.is_ir()
2069    # Label format is based on IR string.
2070    if function_sig and ginfo.get_version() > 1:
2071        function_def_regex = "define %s"
2072    elif function_sig:
2073        function_def_regex = "define {{[^@]+}}%s"
2074    else:
2075        function_def_regex = "%s"
2076    check_label_format = "{} %s-LABEL: {}@%s%s%s".format(
2077        comment_marker, function_def_regex
2078    )
2079    return add_checks(
2080        output_lines,
2081        comment_marker,
2082        prefix_list,
2083        func_dict,
2084        func_name,
2085        check_label_format,
2086        ginfo,
2087        global_vars_seen_dict,
2088        is_filtered,
2089        preserve_names,
2090        original_check_lines=original_check_lines,
2091    )
2092
2093
2094def add_analyze_checks(
2095    output_lines,
2096    comment_marker,
2097    prefix_list,
2098    func_dict,
2099    func_name,
2100    ginfo: GeneralizerInfo,
2101    is_filtered,
2102):
2103    assert ginfo.is_analyze()
2104    check_label_format = "{} %s-LABEL: '%s%s%s%s'".format(comment_marker)
2105    global_vars_seen_dict = {}
2106    return add_checks(
2107        output_lines,
2108        comment_marker,
2109        prefix_list,
2110        func_dict,
2111        func_name,
2112        check_label_format,
2113        ginfo,
2114        global_vars_seen_dict,
2115        is_filtered,
2116    )
2117
2118
2119def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes, ginfo):
2120    for nameless_value in ginfo.get_nameless_values():
2121        if nameless_value.global_ir_rhs_regexp is None:
2122            continue
2123
2124        lhs_re_str = nameless_value.ir_prefix + nameless_value.ir_regexp
2125        rhs_re_str = nameless_value.global_ir_rhs_regexp
2126
2127        global_ir_value_re_str = r"^" + lhs_re_str + r"\s=\s" + rhs_re_str + r"$"
2128        global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M))
2129        lines = []
2130        for m in global_ir_value_re.finditer(raw_tool_output):
2131            # Attach the substring's start index so that CHECK lines
2132            # can be sorted properly even if they are matched by different nameless values.
2133            # This is relevant for GLOB and GLOBNAMED since they may appear interlaced.
2134            lines.append((m.start(), m.group(0)))
2135
2136        for prefix in prefixes:
2137            if glob_val_dict[prefix] is None:
2138                continue
2139            if nameless_value.check_prefix in glob_val_dict[prefix]:
2140                if lines == glob_val_dict[prefix][nameless_value.check_prefix]:
2141                    continue
2142                if prefix == prefixes[-1]:
2143                    warn("Found conflicting asm under the same prefix: %r!" % (prefix,))
2144                else:
2145                    glob_val_dict[prefix][nameless_value.check_prefix] = None
2146                    continue
2147            glob_val_dict[prefix][nameless_value.check_prefix] = lines
2148
2149
2150def filter_globals_according_to_preference(
2151    global_val_lines_w_index, global_vars_seen, nameless_value, global_check_setting
2152):
2153    if global_check_setting == "none":
2154        return []
2155    if global_check_setting == "all":
2156        return global_val_lines_w_index
2157    assert global_check_setting == "smart"
2158
2159    if nameless_value.check_key == "#":
2160        # attribute sets are usually better checked by --check-attributes
2161        return []
2162
2163    def extract(line, nv):
2164        p = (
2165            "^"
2166            + nv.ir_prefix
2167            + "("
2168            + nv.ir_regexp
2169            + ") = ("
2170            + nv.global_ir_rhs_regexp
2171            + ")"
2172        )
2173        match = re.match(p, line)
2174        return (match.group(1), re.findall(nv.ir_regexp, match.group(2)))
2175
2176    transitively_visible = set()
2177    contains_refs_to = {}
2178
2179    def add(var):
2180        nonlocal transitively_visible
2181        nonlocal contains_refs_to
2182        if var in transitively_visible:
2183            return
2184        transitively_visible.add(var)
2185        if not var in contains_refs_to:
2186            return
2187        for x in contains_refs_to[var]:
2188            add(x)
2189
2190    for i, line in global_val_lines_w_index:
2191        (var, refs) = extract(line, nameless_value)
2192        contains_refs_to[var] = refs
2193    for var, check_key in global_vars_seen:
2194        if check_key != nameless_value.check_key:
2195            continue
2196        add(var)
2197    return [
2198        (i, line)
2199        for i, line in global_val_lines_w_index
2200        if extract(line, nameless_value)[0] in transitively_visible
2201    ]
2202
2203
2204METADATA_FILTERS = [
2205    (
2206        r"(?<=\")(.+ )?(\w+ version )[\d.]+(?:[^\" ]*)(?: \([^)]+\))?",
2207        r"{{.*}}\2{{.*}}",
2208    ),  # preface with glob also, to capture optional CLANG_VENDOR
2209    (r'(!DIFile\(filename: ".+", directory: )".+"', r"\1{{.*}}"),
2210]
2211METADATA_FILTERS_RE = [(re.compile(f), r) for (f, r) in METADATA_FILTERS]
2212
2213
2214def filter_unstable_metadata(line):
2215    for f, replacement in METADATA_FILTERS_RE:
2216        line = f.sub(replacement, line)
2217    return line
2218
2219
2220def flush_current_checks(output_lines, new_lines_w_index, comment_marker):
2221    if not new_lines_w_index:
2222        return
2223    output_lines.append(comment_marker + SEPARATOR)
2224    new_lines_w_index.sort()
2225    for _, line in new_lines_w_index:
2226        output_lines.append(line)
2227    new_lines_w_index.clear()
2228
2229
2230def add_global_checks(
2231    glob_val_dict,
2232    comment_marker,
2233    prefix_list,
2234    output_lines,
2235    ginfo: GeneralizerInfo,
2236    global_vars_seen_dict,
2237    preserve_names,
2238    is_before_functions,
2239    global_check_setting,
2240):
2241    printed_prefixes = set()
2242    output_lines_loc = {}  # Allows GLOB and GLOBNAMED to be sorted correctly
2243    for nameless_value in ginfo.get_nameless_values():
2244        if nameless_value.global_ir_rhs_regexp is None:
2245            continue
2246        if nameless_value.is_before_functions != is_before_functions:
2247            continue
2248        for p in prefix_list:
2249            global_vars_seen = {}
2250            checkprefixes = p[0]
2251            if checkprefixes is None:
2252                continue
2253            for checkprefix in checkprefixes:
2254                if checkprefix in global_vars_seen_dict:
2255                    global_vars_seen.update(global_vars_seen_dict[checkprefix])
2256                else:
2257                    global_vars_seen_dict[checkprefix] = {}
2258                if (checkprefix, nameless_value.check_prefix) in printed_prefixes:
2259                    break
2260                if not glob_val_dict[checkprefix]:
2261                    continue
2262                if nameless_value.check_prefix not in glob_val_dict[checkprefix]:
2263                    continue
2264                if not glob_val_dict[checkprefix][nameless_value.check_prefix]:
2265                    continue
2266
2267                check_lines = []
2268                global_vars_seen_before = [key for key in global_vars_seen.keys()]
2269                lines_w_index = glob_val_dict[checkprefix][nameless_value.check_prefix]
2270                lines_w_index = filter_globals_according_to_preference(
2271                    lines_w_index,
2272                    global_vars_seen_before,
2273                    nameless_value,
2274                    global_check_setting,
2275                )
2276                for i, line in lines_w_index:
2277                    if _global_value_regex:
2278                        matched = False
2279                        for regex in _global_value_regex:
2280                            if re.match("^@" + regex + " = ", line) or re.match(
2281                                "^!" + regex + " = ", line
2282                            ):
2283                                matched = True
2284                                break
2285                        if not matched:
2286                            continue
2287                    [new_line] = generalize_check_lines(
2288                        [line],
2289                        ginfo,
2290                        {},
2291                        global_vars_seen,
2292                        preserve_names,
2293                        unstable_globals_only=True,
2294                    )
2295                    new_line = filter_unstable_metadata(new_line)
2296                    check_line = "%s %s: %s" % (comment_marker, checkprefix, new_line)
2297                    check_lines.append((i, check_line))
2298                if not check_lines:
2299                    continue
2300
2301                if not checkprefix in output_lines_loc:
2302                    output_lines_loc[checkprefix] = []
2303                if not nameless_value.interlaced_with_previous:
2304                    flush_current_checks(
2305                        output_lines, output_lines_loc[checkprefix], comment_marker
2306                    )
2307                for check_line in check_lines:
2308                    output_lines_loc[checkprefix].append(check_line)
2309
2310                printed_prefixes.add((checkprefix, nameless_value.check_prefix))
2311
2312                # Remembe new global variables we have not seen before
2313                for key in global_vars_seen:
2314                    if key not in global_vars_seen_before:
2315                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2316                break
2317
2318    if printed_prefixes:
2319        for p in prefix_list:
2320            if p[0] is None:
2321                continue
2322            for checkprefix in p[0]:
2323                if checkprefix not in output_lines_loc:
2324                    continue
2325                flush_current_checks(
2326                    output_lines, output_lines_loc[checkprefix], comment_marker
2327                )
2328                break
2329        output_lines.append(comment_marker + SEPARATOR)
2330    return printed_prefixes
2331
2332
2333def check_prefix(prefix):
2334    if not PREFIX_RE.match(prefix):
2335        hint = ""
2336        if "," in prefix:
2337            hint = " Did you mean '--check-prefixes=" + prefix + "'?"
2338        warn(
2339            (
2340                "Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores."
2341                + hint
2342            )
2343            % (prefix)
2344        )
2345
2346
2347def get_check_prefixes(filecheck_cmd):
2348    check_prefixes = [
2349        item
2350        for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
2351        for item in m.group(1).split(",")
2352    ]
2353    if not check_prefixes:
2354        check_prefixes = ["CHECK"]
2355    return check_prefixes
2356
2357
2358def verify_filecheck_prefixes(fc_cmd):
2359    fc_cmd_parts = fc_cmd.split()
2360    for part in fc_cmd_parts:
2361        if "check-prefix=" in part:
2362            prefix = part.split("=", 1)[1]
2363            check_prefix(prefix)
2364        elif "check-prefixes=" in part:
2365            prefixes = part.split("=", 1)[1].split(",")
2366            for prefix in prefixes:
2367                check_prefix(prefix)
2368                if prefixes.count(prefix) > 1:
2369                    warn(
2370                        "Supplied prefix '%s' is not unique in the prefix list."
2371                        % (prefix,)
2372                    )
2373
2374
2375def get_autogennote_suffix(parser, args):
2376    autogenerated_note_args = ""
2377    for action in parser._actions:
2378        if not hasattr(args, action.dest):
2379            continue  # Ignore options such as --help that aren't included in args
2380        # Ignore parameters such as paths to the binary or the list of tests
2381        if action.dest in (
2382            "tests",
2383            "update_only",
2384            "tool_binary",
2385            "opt_binary",
2386            "llc_binary",
2387            "clang",
2388            "opt",
2389            "llvm_bin",
2390            "verbose",
2391            "force_update",
2392            "reset_variable_names",
2393        ):
2394            continue
2395        value = getattr(args, action.dest)
2396        if action.dest == "check_globals":
2397            default_value = "none" if args.version < 4 else "smart"
2398            if value == default_value:
2399                continue
2400            autogenerated_note_args += action.option_strings[0] + " "
2401            if args.version < 4 and value == "all":
2402                continue
2403            autogenerated_note_args += "%s " % value
2404            continue
2405        if action.const is not None:  # action stores a constant (usually True/False)
2406            # Skip actions with different constant values (this happens with boolean
2407            # --foo/--no-foo options)
2408            if value != action.const:
2409                continue
2410        if parser.get_default(action.dest) == value:
2411            continue  # Don't add default values
2412        if action.dest == "function_signature" and args.version >= 2:
2413            continue  # Enabled by default in version 2
2414        if action.dest == "filters":
2415            # Create a separate option for each filter element.  The value is a list
2416            # of Filter objects.
2417            for elem in value:
2418                opt_name = "filter-out" if elem.is_filter_out else "filter"
2419                opt_value = elem.pattern()
2420                new_arg = '--%s "%s" ' % (opt_name, opt_value.strip('"'))
2421                if new_arg not in autogenerated_note_args:
2422                    autogenerated_note_args += new_arg
2423        else:
2424            autogenerated_note_args += action.option_strings[0] + " "
2425            if action.const is None:  # action takes a parameter
2426                if action.nargs == "+":
2427                    value = " ".join(map(lambda v: '"' + v.strip('"') + '"', value))
2428                autogenerated_note_args += "%s " % value
2429    if autogenerated_note_args:
2430        autogenerated_note_args = " %s %s" % (
2431            UTC_ARGS_KEY,
2432            autogenerated_note_args[:-1],
2433        )
2434    return autogenerated_note_args
2435
2436
2437def check_for_command(line, parser, args, argv, argparse_callback):
2438    cmd_m = UTC_ARGS_CMD.match(line)
2439    if cmd_m:
2440        for option in shlex.split(cmd_m.group("cmd").strip()):
2441            if option:
2442                argv.append(option)
2443        args = parse_args(parser, filter(lambda arg: arg not in args.tests, argv))
2444        if argparse_callback is not None:
2445            argparse_callback(args)
2446    return args, argv
2447
2448
2449def find_arg_in_test(test_info, get_arg_to_check, arg_string, is_global):
2450    result = get_arg_to_check(test_info.args)
2451    if not result and is_global:
2452        # See if this has been specified via UTC_ARGS.  This is a "global" option
2453        # that affects the entire generation of test checks.  If it exists anywhere
2454        # in the test, apply it to everything.
2455        saw_line = False
2456        for line_info in test_info.ro_iterlines():
2457            line = line_info.line
2458            if not line.startswith(";") and line.strip() != "":
2459                saw_line = True
2460            result = get_arg_to_check(line_info.args)
2461            if result:
2462                if warn and saw_line:
2463                    # We saw the option after already reading some test input lines.
2464                    # Warn about it.
2465                    print(
2466                        "WARNING: Found {} in line following test start: ".format(
2467                            arg_string
2468                        )
2469                        + line,
2470                        file=sys.stderr,
2471                    )
2472                    print(
2473                        "WARNING: Consider moving {} to top of file".format(arg_string),
2474                        file=sys.stderr,
2475                    )
2476                break
2477    return result
2478
2479
2480def dump_input_lines(output_lines, test_info, prefix_set, comment_string):
2481    for input_line_info in test_info.iterlines(output_lines):
2482        line = input_line_info.line
2483        args = input_line_info.args
2484        if line.strip() == comment_string:
2485            continue
2486        if line.strip() == comment_string + SEPARATOR:
2487            continue
2488        if line.lstrip().startswith(comment_string):
2489            m = CHECK_RE.match(line)
2490            if m and m.group(1) in prefix_set:
2491                continue
2492        output_lines.append(line.rstrip("\n"))
2493
2494
2495def add_checks_at_end(
2496    output_lines, prefix_list, func_order, comment_string, check_generator
2497):
2498    added = set()
2499    generated_prefixes = set()
2500    for prefix in prefix_list:
2501        prefixes = prefix[0]
2502        tool_args = prefix[1]
2503        for prefix in prefixes:
2504            for func in func_order[prefix]:
2505                # The func order can contain the same functions multiple times.
2506                # If we see one again we are done.
2507                if (func, prefix) in added:
2508                    continue
2509                if added:
2510                    output_lines.append(comment_string)
2511
2512                # The add_*_checks routines expect a run list whose items are
2513                # tuples that have a list of prefixes as their first element and
2514                # tool command args string as their second element.  They output
2515                # checks for each prefix in the list of prefixes.  By doing so, it
2516                # implicitly assumes that for each function every run line will
2517                # generate something for that function.  That is not the case for
2518                # generated functions as some run lines might not generate them
2519                # (e.g. -fopenmp vs. no -fopenmp).
2520                #
2521                # Therefore, pass just the prefix we're interested in.  This has
2522                # the effect of generating all of the checks for functions of a
2523                # single prefix before moving on to the next prefix.  So checks
2524                # are ordered by prefix instead of by function as in "normal"
2525                # mode.
2526                for generated_prefix in check_generator(
2527                    output_lines, [([prefix], tool_args)], func
2528                ):
2529                    added.add((func, generated_prefix))
2530                    generated_prefixes.add(generated_prefix)
2531    return generated_prefixes
2532