xref: /llvm-project/llvm/utils/UpdateTestChecks/common.py (revision ae6af37bf47d830becc0cd0c8a309af70fdf502c)
1import argparse
2import bisect
3import collections
4import copy
5import glob
6import os
7import re
8import subprocess
9import sys
10import shlex
11
12from typing import List, Mapping, Set
13
14##### Common utilities for update_*test_checks.py
15
16
17_verbose = False
18_prefix_filecheck_ir_name = ""
19
20"""
21Version changelog:
22
231: Initial version, used by tests that don't specify --version explicitly.
242: --function-signature is now enabled by default and also checks return
25   type/attributes.
263: Opening parenthesis of function args is kept on the first LABEL line
27   in case arguments are split to a separate SAME line.
284: --check-globals now has a third option ('smart'). The others are now called
29   'none' and 'all'. 'smart' is the default.
305: Basic block labels are matched by FileCheck expressions
31"""
32DEFAULT_VERSION = 5
33
34
35SUPPORTED_ANALYSES = {
36    "Branch Probability Analysis",
37    "Cost Model Analysis",
38    "Loop Access Analysis",
39    "Scalar Evolution Analysis",
40}
41
42
43class Regex(object):
44    """Wrap a compiled regular expression object to allow deep copy of a regexp.
45    This is required for the deep copy done in do_scrub.
46
47    """
48
49    def __init__(self, regex):
50        self.regex = regex
51
52    def __deepcopy__(self, memo):
53        result = copy.copy(self)
54        result.regex = self.regex
55        return result
56
57    def search(self, line):
58        return self.regex.search(line)
59
60    def sub(self, repl, line):
61        return self.regex.sub(repl, line)
62
63    def pattern(self):
64        return self.regex.pattern
65
66    def flags(self):
67        return self.regex.flags
68
69
70class Filter(Regex):
71    """Augment a Regex object with a flag indicating whether a match should be
72    added (!is_filter_out) or removed (is_filter_out) from the generated checks.
73
74    """
75
76    def __init__(self, regex, is_filter_out):
77        super(Filter, self).__init__(regex)
78        self.is_filter_out = is_filter_out
79
80    def __deepcopy__(self, memo):
81        result = copy.deepcopy(super(Filter, self), memo)
82        result.is_filter_out = copy.deepcopy(self.is_filter_out, memo)
83        return result
84
85
86def parse_commandline_args(parser):
87    class RegexAction(argparse.Action):
88        """Add a regular expression option value to a list of regular expressions.
89        This compiles the expression, wraps it in a Regex and adds it to the option
90        value list."""
91
92        def __init__(self, option_strings, dest, nargs=None, **kwargs):
93            if nargs is not None:
94                raise ValueError("nargs not allowed")
95            super(RegexAction, self).__init__(option_strings, dest, **kwargs)
96
97        def do_call(self, namespace, values, flags):
98            value_list = getattr(namespace, self.dest)
99            if value_list is None:
100                value_list = []
101
102            try:
103                value_list.append(Regex(re.compile(values, flags)))
104            except re.error as error:
105                raise ValueError(
106                    "{}: Invalid regular expression '{}' ({})".format(
107                        option_string, error.pattern, error.msg
108                    )
109                )
110
111            setattr(namespace, self.dest, value_list)
112
113        def __call__(self, parser, namespace, values, option_string=None):
114            self.do_call(namespace, values, 0)
115
116    class FilterAction(RegexAction):
117        """Add a filter to a list of filter option values."""
118
119        def __init__(self, option_strings, dest, nargs=None, **kwargs):
120            super(FilterAction, self).__init__(option_strings, dest, nargs, **kwargs)
121
122        def __call__(self, parser, namespace, values, option_string=None):
123            super(FilterAction, self).__call__(parser, namespace, values, option_string)
124
125            value_list = getattr(namespace, self.dest)
126
127            is_filter_out = option_string == "--filter-out"
128
129            value_list[-1] = Filter(value_list[-1].regex, is_filter_out)
130
131            setattr(namespace, self.dest, value_list)
132
133    filter_group = parser.add_argument_group(
134        "filtering",
135        """Filters are applied to each output line according to the order given. The
136    first matching filter terminates filter processing for that current line.""",
137    )
138
139    filter_group.add_argument(
140        "--filter",
141        action=FilterAction,
142        dest="filters",
143        metavar="REGEX",
144        help="Only include lines matching REGEX (may be specified multiple times)",
145    )
146    filter_group.add_argument(
147        "--filter-out",
148        action=FilterAction,
149        dest="filters",
150        metavar="REGEX",
151        help="Exclude lines matching REGEX",
152    )
153
154    parser.add_argument(
155        "--include-generated-funcs",
156        action="store_true",
157        help="Output checks for functions not in source",
158    )
159    parser.add_argument(
160        "-v", "--verbose", action="store_true", help="Show verbose output"
161    )
162    parser.add_argument(
163        "-u",
164        "--update-only",
165        action="store_true",
166        help="Only update test if it was already autogened",
167    )
168    parser.add_argument(
169        "--force-update",
170        action="store_true",
171        help="Update test even if it was autogened by a different script",
172    )
173    parser.add_argument(
174        "--enable",
175        action="store_true",
176        dest="enabled",
177        default=True,
178        help="Activate CHECK line generation from this point forward",
179    )
180    parser.add_argument(
181        "--disable",
182        action="store_false",
183        dest="enabled",
184        help="Deactivate CHECK line generation from this point forward",
185    )
186    parser.add_argument(
187        "--replace-value-regex",
188        nargs="+",
189        default=[],
190        help="List of regular expressions to replace matching value names",
191    )
192    parser.add_argument(
193        "--prefix-filecheck-ir-name",
194        default="",
195        help="Add a prefix to FileCheck IR value names to avoid conflicts with scripted names",
196    )
197    parser.add_argument(
198        "--global-value-regex",
199        nargs="+",
200        default=[],
201        help="List of regular expressions that a global value declaration must match to generate a check (has no effect if checking globals is not enabled)",
202    )
203    parser.add_argument(
204        "--global-hex-value-regex",
205        nargs="+",
206        default=[],
207        help="List of regular expressions such that, for matching global value declarations, literal integer values should be encoded in hex in the associated FileCheck directives",
208    )
209    # FIXME: in 3.9, we can use argparse.BooleanOptionalAction. At that point,
210    # we need to rename the flag to just -generate-body-for-unused-prefixes.
211    parser.add_argument(
212        "--no-generate-body-for-unused-prefixes",
213        action="store_false",
214        dest="gen_unused_prefix_body",
215        default=True,
216        help="Generate a function body that always matches for unused prefixes. This is useful when unused prefixes are desired, and it avoids needing to annotate each FileCheck as allowing them.",
217    )
218    # This is the default when regenerating existing tests. The default when
219    # generating new tests is determined by DEFAULT_VERSION.
220    parser.add_argument(
221        "--version", type=int, default=1, help="The version of output format"
222    )
223    args = parser.parse_args()
224    # TODO: This should not be handled differently from the other options
225    global _verbose, _global_value_regex, _global_hex_value_regex
226    _verbose = args.verbose
227    _global_value_regex = args.global_value_regex
228    _global_hex_value_regex = args.global_hex_value_regex
229    return args
230
231
232def parse_args(parser, argv):
233    args = parser.parse_args(argv)
234    if args.version >= 2:
235        args.function_signature = True
236    # TODO: This should not be handled differently from the other options
237    global _verbose, _global_value_regex, _global_hex_value_regex
238    _verbose = args.verbose
239    _global_value_regex = args.global_value_regex
240    _global_hex_value_regex = args.global_hex_value_regex
241    if "check_globals" in args and args.check_globals == "default":
242        args.check_globals = "none" if args.version < 4 else "smart"
243    return args
244
245
246class InputLineInfo(object):
247    def __init__(self, line, line_number, args, argv):
248        self.line = line
249        self.line_number = line_number
250        self.args = args
251        self.argv = argv
252
253
254class TestInfo(object):
255    def __init__(
256        self,
257        test,
258        parser,
259        script_name,
260        input_lines,
261        args,
262        argv,
263        comment_prefix,
264        argparse_callback,
265    ):
266        self.parser = parser
267        self.argparse_callback = argparse_callback
268        self.path = test
269        self.args = args
270        if args.prefix_filecheck_ir_name:
271            global _prefix_filecheck_ir_name
272            _prefix_filecheck_ir_name = args.prefix_filecheck_ir_name
273        self.argv = argv
274        self.input_lines = input_lines
275        self.run_lines = find_run_lines(test, self.input_lines)
276        self.comment_prefix = comment_prefix
277        if self.comment_prefix is None:
278            if self.path.endswith(".mir"):
279                self.comment_prefix = "#"
280            else:
281                self.comment_prefix = ";"
282        self.autogenerated_note_prefix = self.comment_prefix + " " + UTC_ADVERT
283        self.test_autogenerated_note = self.autogenerated_note_prefix + script_name
284        self.test_autogenerated_note += get_autogennote_suffix(parser, self.args)
285        self.test_unused_note = (
286            self.comment_prefix + self.comment_prefix + " " + UNUSED_NOTE
287        )
288
289    def ro_iterlines(self):
290        for line_num, input_line in enumerate(self.input_lines):
291            args, argv = check_for_command(
292                input_line, self.parser, self.args, self.argv, self.argparse_callback
293            )
294            yield InputLineInfo(input_line, line_num, args, argv)
295
296    def iterlines(self, output_lines):
297        output_lines.append(self.test_autogenerated_note)
298        for line_info in self.ro_iterlines():
299            input_line = line_info.line
300            # Discard any previous script advertising.
301            if input_line.startswith(self.autogenerated_note_prefix):
302                continue
303            self.args = line_info.args
304            self.argv = line_info.argv
305            if not self.args.enabled:
306                output_lines.append(input_line)
307                continue
308            yield line_info
309
310    def get_checks_for_unused_prefixes(
311        self, run_list, used_prefixes: List[str]
312    ) -> List[str]:
313        run_list = [element for element in run_list if element[0] is not None]
314        unused_prefixes = set(
315            [prefix for sublist in run_list for prefix in sublist[0]]
316        ).difference(set(used_prefixes))
317
318        ret = []
319        if not unused_prefixes:
320            return ret
321        ret.append(self.test_unused_note)
322        for unused in sorted(unused_prefixes):
323            ret.append(
324                "{comment} {prefix}: {match_everything}".format(
325                    comment=self.comment_prefix,
326                    prefix=unused,
327                    match_everything=r"""{{.*}}""",
328                )
329            )
330        return ret
331
332
333def itertests(
334    test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None
335):
336    for pattern in test_patterns:
337        # On Windows we must expand the patterns ourselves.
338        tests_list = glob.glob(pattern)
339        if not tests_list:
340            warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,))
341            continue
342        for test in tests_list:
343            with open(test) as f:
344                input_lines = [l.rstrip() for l in f]
345            first_line = input_lines[0] if input_lines else ""
346            if UTC_AVOID in first_line:
347                warn("Skipping test that must not be autogenerated: " + test)
348                continue
349            is_regenerate = UTC_ADVERT in first_line
350
351            # If we're generating a new test, set the default version to the latest.
352            argv = sys.argv[:]
353            if not is_regenerate:
354                argv.insert(1, "--version=" + str(DEFAULT_VERSION))
355
356            args = parse_args(parser, argv[1:])
357            if argparse_callback is not None:
358                argparse_callback(args)
359            if is_regenerate:
360                if script_name not in first_line and not args.force_update:
361                    warn(
362                        "Skipping test which wasn't autogenerated by " + script_name,
363                        test,
364                    )
365                    continue
366                args, argv = check_for_command(
367                    first_line, parser, args, argv, argparse_callback
368                )
369            elif args.update_only:
370                assert UTC_ADVERT not in first_line
371                warn("Skipping test which isn't autogenerated: " + test)
372                continue
373            final_input_lines = []
374            for l in input_lines:
375                if UNUSED_NOTE in l:
376                    break
377                final_input_lines.append(l)
378            yield TestInfo(
379                test,
380                parser,
381                script_name,
382                final_input_lines,
383                args,
384                argv,
385                comment_prefix,
386                argparse_callback,
387            )
388
389
390def should_add_line_to_output(
391    input_line,
392    prefix_set,
393    *,
394    skip_global_checks=False,
395    skip_same_checks=False,
396    comment_marker=";",
397):
398    # Skip any blank comment lines in the IR.
399    if not skip_global_checks and input_line.strip() == comment_marker:
400        return False
401    # Skip a special double comment line we use as a separator.
402    if input_line.strip() == comment_marker + SEPARATOR:
403        return False
404    # Skip any blank lines in the IR.
405    # if input_line.strip() == '':
406    #  return False
407    # And skip any CHECK lines. We're building our own.
408    m = CHECK_RE.match(input_line)
409    if m and m.group(1) in prefix_set:
410        if skip_same_checks and CHECK_SAME_RE.match(input_line):
411            # The previous CHECK line was removed, so don't leave this dangling
412            return False
413        if skip_global_checks:
414            # Skip checks only if they are of global value definitions
415            global_ir_value_re = re.compile(r"(\[\[|@)", flags=(re.M))
416            is_global = global_ir_value_re.search(input_line)
417            return not is_global
418        return False
419
420    return True
421
422
423def collect_original_check_lines(ti: TestInfo, prefix_set: set):
424    """
425    Collect pre-existing check lines into a dictionary `result` which is
426    returned.
427
428    result[func_name][prefix] is filled with a list of right-hand-sides of check
429    lines.
430    """
431    result = collections.defaultdict(lambda: {})
432
433    current_prefix = None
434    current_function = None
435    for input_line_info in ti.ro_iterlines():
436        input_line = input_line_info.line
437        if input_line.lstrip().startswith(";"):
438            m = CHECK_RE.match(input_line)
439            if m is not None:
440                prefix = m.group(1)
441                check_kind = m.group(2)
442                line = input_line[m.end() :].strip()
443
444                if prefix != current_prefix:
445                    current_function = None
446                    current_prefix = None
447
448                if check_kind not in ["LABEL", "SAME"]:
449                    if current_function is not None:
450                        current_function.append(line)
451                    continue
452
453                if check_kind == "SAME":
454                    continue
455
456                if check_kind == "LABEL":
457                    m = IR_FUNCTION_LABEL_RE.match(line)
458                    if m is not None:
459                        func_name = m.group(1)
460                        if (
461                            ti.args.function is not None
462                            and func_name != ti.args.function
463                        ):
464                            # When filtering on a specific function, skip all others.
465                            continue
466
467                        current_prefix = prefix
468                        current_function = result[func_name][prefix] = []
469                        continue
470
471        current_function = None
472
473    return result
474
475
476# Perform lit-like substitutions
477def getSubstitutions(sourcepath):
478    sourcedir = os.path.dirname(sourcepath)
479    return [
480        ("%s", sourcepath),
481        ("%S", sourcedir),
482        ("%p", sourcedir),
483        ("%{pathsep}", os.pathsep),
484    ]
485
486
487def applySubstitutions(s, substitutions):
488    for a, b in substitutions:
489        s = s.replace(a, b)
490    return s
491
492
493# Invoke the tool that is being tested.
494def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False):
495    with open(ir) as ir_file:
496        substitutions = getSubstitutions(ir)
497
498        # TODO Remove the str form which is used by update_test_checks.py and
499        # update_llc_test_checks.py
500        # The safer list form is used by update_cc_test_checks.py
501        if preprocess_cmd:
502            # Allow pre-processing the IR file (e.g. using sed):
503            assert isinstance(
504                preprocess_cmd, str
505            )  # TODO: use a list instead of using shell
506            preprocess_cmd = applySubstitutions(preprocess_cmd, substitutions).strip()
507            if verbose:
508                print(
509                    "Pre-processing input file: ",
510                    ir,
511                    " with command '",
512                    preprocess_cmd,
513                    "'",
514                    sep="",
515                    file=sys.stderr,
516                )
517            pp = subprocess.Popen(
518                preprocess_cmd,
519                shell=True,
520                stdin=subprocess.DEVNULL,
521                stdout=subprocess.PIPE,
522            )
523            ir_file = pp.stdout
524
525        if isinstance(cmd_args, list):
526            args = [applySubstitutions(a, substitutions) for a in cmd_args]
527            stdout = subprocess.check_output([exe] + args, stdin=ir_file)
528        else:
529            stdout = subprocess.check_output(
530                exe + " " + applySubstitutions(cmd_args, substitutions),
531                shell=True,
532                stdin=ir_file,
533            )
534        if sys.version_info[0] > 2:
535            # FYI, if you crashed here with a decode error, your run line probably
536            # results in bitcode or other binary format being written to the pipe.
537            # For an opt test, you probably want to add -S or -disable-output.
538            stdout = stdout.decode()
539    # Fix line endings to unix CR style.
540    return stdout.replace("\r\n", "\n")
541
542
543##### LLVM IR parser
544RUN_LINE_RE = re.compile(r"^\s*(?://|[;#])\s*RUN:\s*(.*)$")
545CHECK_PREFIX_RE = re.compile(r"--?check-prefix(?:es)?[= ](\S+)")
546PREFIX_RE = re.compile("^[a-zA-Z0-9_-]+$")
547CHECK_RE = re.compile(
548    r"^\s*(?://|[;#])\s*([^:]+?)(?:-(NEXT|NOT|DAG|LABEL|SAME|EMPTY))?:"
549)
550CHECK_SAME_RE = re.compile(r"^\s*(?://|[;#])\s*([^:]+?)(?:-SAME)?:")
551
552UTC_ARGS_KEY = "UTC_ARGS:"
553UTC_ARGS_CMD = re.compile(r".*" + UTC_ARGS_KEY + r"\s*(?P<cmd>.*)\s*$")
554UTC_ADVERT = "NOTE: Assertions have been autogenerated by "
555UTC_AVOID = "NOTE: Do not autogenerate"
556UNUSED_NOTE = "NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:"
557
558OPT_FUNCTION_RE = re.compile(
559    r"^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s():,]+?))?\s*define\s+(?P<funcdef_attrs_and_ret>[^@]*)@(?P<func>[\w.$-]+?)\s*"
560    r"(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P<body>.*?)^\}$",
561    flags=(re.M | re.S),
562)
563
564ANALYZE_FUNCTION_RE = re.compile(
565    r"^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.$-]+?)\':"
566    r"\s*\n(?P<body>.*)$",
567    flags=(re.X | re.S),
568)
569
570LOOP_PASS_DEBUG_RE = re.compile(
571    r"^\s*\'(?P<func>[\w.$-]+?)\'[^\n]*" r"\s*\n(?P<body>.*)$", flags=(re.X | re.S)
572)
573
574IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(')
575IR_FUNCTION_LABEL_RE = re.compile(
576    r'^\s*(?:define\s+(?:internal\s+)?[^@]*)?@"?([\w.$-]+)"?\s*\('
577)
578TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
579TRIPLE_ARG_RE = re.compile(r"-m?triple[= ]([^ ]+)")
580MARCH_ARG_RE = re.compile(r"-march[= ]([^ ]+)")
581DEBUG_ONLY_ARG_RE = re.compile(r"-debug-only[= ]([^ ]+)")
582
583SCRUB_LEADING_WHITESPACE_RE = re.compile(r"^(\s+)")
584SCRUB_WHITESPACE_RE = re.compile(r"(?!^(|  \w))[ \t]+", flags=re.M)
585SCRUB_PRESERVE_LEADING_WHITESPACE_RE = re.compile(r"((?!^)[ \t]*(\S))[ \t]+")
586SCRUB_TRAILING_WHITESPACE_RE = re.compile(r"[ \t]+$", flags=re.M)
587SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE
588SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(
589    r"([ \t]|(#[0-9]+))+$", flags=re.M
590)
591SCRUB_KILL_COMMENT_RE = re.compile(r"^ *#+ +kill:.*\n")
592SCRUB_LOOP_COMMENT_RE = re.compile(
593    r"# =>This Inner Loop Header:.*|# in Loop:.*", flags=re.M
594)
595SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r"(?<=\S)+[ \t]*#$", flags=re.M)
596
597SEPARATOR = "."
598
599
600def error(msg, test_file=None):
601    if test_file:
602        msg = "{}: {}".format(msg, test_file)
603    print("ERROR: {}".format(msg), file=sys.stderr)
604
605
606def warn(msg, test_file=None):
607    if test_file:
608        msg = "{}: {}".format(msg, test_file)
609    print("WARNING: {}".format(msg), file=sys.stderr)
610
611
612def debug(*args, **kwargs):
613    # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs):
614    if "file" not in kwargs:
615        kwargs["file"] = sys.stderr
616    if _verbose:
617        print(*args, **kwargs)
618
619
620def find_run_lines(test, lines):
621    debug("Scanning for RUN lines in test file:", test)
622    raw_lines = [m.group(1) for m in [RUN_LINE_RE.match(l) for l in lines] if m]
623    run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
624    for l in raw_lines[1:]:
625        if run_lines[-1].endswith("\\"):
626            run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
627        else:
628            run_lines.append(l)
629    debug("Found {} RUN lines in {}:".format(len(run_lines), test))
630    for l in run_lines:
631        debug("  RUN: {}".format(l))
632    return run_lines
633
634
635def get_triple_from_march(march):
636    triples = {
637        "amdgcn": "amdgcn",
638        "r600": "r600",
639        "mips": "mips",
640        "nvptx64": "nvptx64",
641        "sparc": "sparc",
642        "hexagon": "hexagon",
643        "ve": "ve",
644    }
645    for prefix, triple in triples.items():
646        if march.startswith(prefix):
647            return triple
648    print("Cannot find a triple. Assume 'x86'", file=sys.stderr)
649    return "x86"
650
651
652def apply_filters(line, filters):
653    has_filter = False
654    for f in filters:
655        if not f.is_filter_out:
656            has_filter = True
657        if f.search(line):
658            return False if f.is_filter_out else True
659    # If we only used filter-out, keep the line, otherwise discard it since no
660    # filter matched.
661    return False if has_filter else True
662
663
664def do_filter(body, filters):
665    return (
666        body
667        if not filters
668        else "\n".join(
669            filter(lambda line: apply_filters(line, filters), body.splitlines())
670        )
671    )
672
673
674def scrub_body(body):
675    # Scrub runs of whitespace out of the assembly, but leave the leading
676    # whitespace in place.
677    body = SCRUB_PRESERVE_LEADING_WHITESPACE_RE.sub(lambda m: m.group(2) + " ", body)
678
679    # Expand the tabs used for indentation.
680    body = str.expandtabs(body, 2)
681    # Strip trailing whitespace.
682    body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r"", body)
683    return body
684
685
686def do_scrub(body, scrubber, scrubber_args, extra):
687    if scrubber_args:
688        local_args = copy.deepcopy(scrubber_args)
689        local_args[0].extra_scrub = extra
690        return scrubber(body, *local_args)
691    return scrubber(body, *scrubber_args)
692
693
694# Build up a dictionary of all the function bodies.
695class function_body(object):
696    def __init__(
697        self,
698        string,
699        extra,
700        funcdef_attrs_and_ret,
701        args_and_sig,
702        attrs,
703        func_name_separator,
704        ginfo,
705    ):
706        self.scrub = string
707        self.extrascrub = extra
708        self.funcdef_attrs_and_ret = funcdef_attrs_and_ret
709        self.args_and_sig = args_and_sig
710        self.attrs = attrs
711        self.func_name_separator = func_name_separator
712        self._ginfo = ginfo
713
714    def is_same_except_arg_names(
715        self, extrascrub, funcdef_attrs_and_ret, args_and_sig, attrs
716    ):
717        arg_names = set()
718
719        def drop_arg_names(match):
720            nameless_value = self._ginfo.get_nameless_value_from_match(match)
721            if nameless_value.check_key == "%":
722                arg_names.add(self._ginfo.get_name_from_match(match))
723                substitute = ""
724            else:
725                substitute = match.group(2)
726            return match.group(1) + substitute + match.group(match.lastindex)
727
728        def repl_arg_names(match):
729            nameless_value = self._ginfo.get_nameless_value_from_match(match)
730            if (
731                nameless_value.check_key == "%"
732                and self._ginfo.get_name_from_match(match) in arg_names
733            ):
734                return match.group(1) + match.group(match.lastindex)
735            return match.group(1) + match.group(2) + match.group(match.lastindex)
736
737        if self.funcdef_attrs_and_ret != funcdef_attrs_and_ret:
738            return False
739        if self.attrs != attrs:
740            return False
741
742        regexp = self._ginfo.get_regexp()
743        ans0 = regexp.sub(drop_arg_names, self.args_and_sig)
744        ans1 = regexp.sub(drop_arg_names, args_and_sig)
745        if ans0 != ans1:
746            return False
747        if self._ginfo.is_asm():
748            # Check without replacements, the replacements are not applied to the
749            # body for backend checks.
750            return self.extrascrub == extrascrub
751
752        es0 = regexp.sub(repl_arg_names, self.extrascrub)
753        es1 = regexp.sub(repl_arg_names, extrascrub)
754        es0 = SCRUB_IR_COMMENT_RE.sub(r"", es0)
755        es1 = SCRUB_IR_COMMENT_RE.sub(r"", es1)
756        return es0 == es1
757
758    def __str__(self):
759        return self.scrub
760
761
762class FunctionTestBuilder:
763    def __init__(self, run_list, flags, scrubber_args, path, ginfo):
764        self._verbose = flags.verbose
765        self._record_args = flags.function_signature
766        self._check_attributes = flags.check_attributes
767        # Strip double-quotes if input was read by UTC_ARGS
768        self._filters = (
769            list(
770                map(
771                    lambda f: Filter(
772                        re.compile(f.pattern().strip('"'), f.flags()), f.is_filter_out
773                    ),
774                    flags.filters,
775                )
776            )
777            if flags.filters
778            else []
779        )
780        self._scrubber_args = scrubber_args
781        self._path = path
782        self._ginfo = ginfo
783        # Strip double-quotes if input was read by UTC_ARGS
784        self._replace_value_regex = list(
785            map(lambda x: x.strip('"'), flags.replace_value_regex)
786        )
787        self._func_dict = {}
788        self._func_order = {}
789        self._global_var_dict = {}
790        self._processed_prefixes = set()
791        for tuple in run_list:
792            for prefix in tuple[0]:
793                self._func_dict.update({prefix: dict()})
794                self._func_order.update({prefix: []})
795                self._global_var_dict.update({prefix: dict()})
796
797    def finish_and_get_func_dict(self):
798        for prefix in self.get_failed_prefixes():
799            warn(
800                "Prefix %s had conflicting output from different RUN lines for all functions in test %s"
801                % (
802                    prefix,
803                    self._path,
804                )
805            )
806        return self._func_dict
807
808    def func_order(self):
809        return self._func_order
810
811    def global_var_dict(self):
812        return self._global_var_dict
813
814    def is_filtered(self):
815        return bool(self._filters)
816
817    def process_run_line(self, function_re, scrubber, raw_tool_output, prefixes):
818        build_global_values_dictionary(
819            self._global_var_dict, raw_tool_output, prefixes, self._ginfo
820        )
821        for m in function_re.finditer(raw_tool_output):
822            if not m:
823                continue
824            func = m.group("func")
825            body = m.group("body")
826            # func_name_separator is the string that is placed right after function name at the
827            # beginning of assembly function definition. In most assemblies, that is just a
828            # colon: `foo:`. But, for example, in nvptx it is a brace: `foo(`. If is_backend is
829            # False, just assume that separator is an empty string.
830            if self._ginfo.is_asm():
831                # Use ':' as default separator.
832                func_name_separator = (
833                    m.group("func_name_separator")
834                    if "func_name_separator" in m.groupdict()
835                    else ":"
836                )
837            else:
838                func_name_separator = ""
839            attrs = m.group("attrs") if self._check_attributes else ""
840            funcdef_attrs_and_ret = (
841                m.group("funcdef_attrs_and_ret") if self._record_args else ""
842            )
843            # Determine if we print arguments, the opening brace, or nothing after the
844            # function name
845            if self._record_args and "args_and_sig" in m.groupdict():
846                args_and_sig = scrub_body(m.group("args_and_sig").strip())
847            elif "args_and_sig" in m.groupdict():
848                args_and_sig = "("
849            else:
850                args_and_sig = ""
851            filtered_body = do_filter(body, self._filters)
852            scrubbed_body = do_scrub(
853                filtered_body, scrubber, self._scrubber_args, extra=False
854            )
855            scrubbed_extra = do_scrub(
856                filtered_body, scrubber, self._scrubber_args, extra=True
857            )
858            if "analysis" in m.groupdict():
859                analysis = m.group("analysis")
860                if analysis not in SUPPORTED_ANALYSES:
861                    warn("Unsupported analysis mode: %r!" % (analysis,))
862            if func.startswith("stress"):
863                # We only use the last line of the function body for stress tests.
864                scrubbed_body = "\n".join(scrubbed_body.splitlines()[-1:])
865            if self._verbose:
866                print("Processing function: " + func, file=sys.stderr)
867                for l in scrubbed_body.splitlines():
868                    print("  " + l, file=sys.stderr)
869            for prefix in prefixes:
870                # Replace function names matching the regex.
871                for regex in self._replace_value_regex:
872                    # Pattern that matches capture groups in the regex in leftmost order.
873                    group_regex = re.compile(r"\(.*?\)")
874                    # Replace function name with regex.
875                    match = re.match(regex, func)
876                    if match:
877                        func_repl = regex
878                        # Replace any capture groups with their matched strings.
879                        for g in match.groups():
880                            func_repl = group_regex.sub(
881                                re.escape(g), func_repl, count=1
882                            )
883                        func = re.sub(func_repl, "{{" + func_repl + "}}", func)
884
885                    # Replace all calls to regex matching functions.
886                    matches = re.finditer(regex, scrubbed_body)
887                    for match in matches:
888                        func_repl = regex
889                        # Replace any capture groups with their matched strings.
890                        for g in match.groups():
891                            func_repl = group_regex.sub(
892                                re.escape(g), func_repl, count=1
893                            )
894                        # Substitute function call names that match the regex with the same
895                        # capture groups set.
896                        scrubbed_body = re.sub(
897                            func_repl, "{{" + func_repl + "}}", scrubbed_body
898                        )
899
900                if func in self._func_dict[prefix]:
901                    if self._func_dict[prefix][func] is not None and (
902                        str(self._func_dict[prefix][func]) != scrubbed_body
903                        or self._func_dict[prefix][func].args_and_sig != args_and_sig
904                        or self._func_dict[prefix][func].attrs != attrs
905                        or self._func_dict[prefix][func].funcdef_attrs_and_ret
906                        != funcdef_attrs_and_ret
907                    ):
908                        if self._func_dict[prefix][func].is_same_except_arg_names(
909                            scrubbed_extra,
910                            funcdef_attrs_and_ret,
911                            args_and_sig,
912                            attrs,
913                        ):
914                            self._func_dict[prefix][func].scrub = scrubbed_extra
915                            self._func_dict[prefix][func].args_and_sig = args_and_sig
916                        else:
917                            # This means a previous RUN line produced a body for this function
918                            # that is different from the one produced by this current RUN line,
919                            # so the body can't be common across RUN lines. We use None to
920                            # indicate that.
921                            self._func_dict[prefix][func] = None
922                else:
923                    if prefix not in self._processed_prefixes:
924                        self._func_dict[prefix][func] = function_body(
925                            scrubbed_body,
926                            scrubbed_extra,
927                            funcdef_attrs_and_ret,
928                            args_and_sig,
929                            attrs,
930                            func_name_separator,
931                            self._ginfo,
932                        )
933                        self._func_order[prefix].append(func)
934                    else:
935                        # An earlier RUN line used this check prefixes but didn't produce
936                        # a body for this function. This happens in Clang tests that use
937                        # preprocesser directives to exclude individual functions from some
938                        # RUN lines.
939                        self._func_dict[prefix][func] = None
940
941    def processed_prefixes(self, prefixes):
942        """
943        Mark a set of prefixes as having had at least one applicable RUN line fully
944        processed. This is used to filter out function bodies that don't have
945        outputs for all RUN lines.
946        """
947        self._processed_prefixes.update(prefixes)
948
949    def get_failed_prefixes(self):
950        # This returns the list of those prefixes that failed to match any function,
951        # because there were conflicting bodies produced by different RUN lines, in
952        # all instances of the prefix.
953        for prefix in self._func_dict:
954            if self._func_dict[prefix] and (
955                not [
956                    fct
957                    for fct in self._func_dict[prefix]
958                    if self._func_dict[prefix][fct] is not None
959                ]
960            ):
961                yield prefix
962
963
964##### Generator of LLVM IR CHECK lines
965
966SCRUB_IR_COMMENT_RE = re.compile(r"\s*;.*")
967
968# TODO: We should also derive check lines for global, debug, loop declarations, etc..
969
970
971class NamelessValue:
972    """
973    A NamelessValue object represents a type of value in the IR whose "name" we
974    generalize in the generated check lines; where the "name" could be an actual
975    name (as in e.g. `@some_global` or `%x`) or just a number (as in e.g. `%12`
976    or `!4`).
977    """
978
979    def __init__(
980        self,
981        check_prefix,
982        check_key,
983        ir_prefix,
984        ir_regexp,
985        global_ir_rhs_regexp,
986        *,
987        is_before_functions=False,
988        is_number=False,
989        replace_number_with_counter=False,
990        match_literally=False,
991        interlaced_with_previous=False,
992        ir_suffix=r"",
993    ):
994        self.check_prefix = check_prefix
995        self.check_key = check_key
996        self.ir_prefix = ir_prefix
997        self.ir_regexp = ir_regexp
998        self.ir_suffix = ir_suffix
999        self.global_ir_rhs_regexp = global_ir_rhs_regexp
1000        self.is_before_functions = is_before_functions
1001        self.is_number = is_number
1002        # Some variable numbers (e.g. MCINST1234) will change based on unrelated
1003        # modifications to LLVM, replace those with an incrementing counter.
1004        self.replace_number_with_counter = replace_number_with_counter
1005        self.match_literally = match_literally
1006        self.interlaced_with_previous = interlaced_with_previous
1007        self.variable_mapping = {}
1008
1009    # Return true if this kind of IR value is defined "locally" to functions,
1010    # which we assume is only the case precisely for LLVM IR local values.
1011    def is_local_def_ir_value(self):
1012        return self.check_key == "%"
1013
1014    # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals
1015    def get_ir_regex(self):
1016        # for backwards compatibility we check locals with '.*'
1017        if self.is_local_def_ir_value():
1018            return ".*"
1019        return self.ir_regexp
1020
1021    # Create a FileCheck variable name based on an IR name.
1022    def get_value_name(self, var: str, check_prefix: str):
1023        var = var.replace("!", "")
1024        if self.replace_number_with_counter:
1025            assert var
1026            replacement = self.variable_mapping.get(var, None)
1027            if replacement is None:
1028                # Replace variable with an incrementing counter
1029                replacement = str(len(self.variable_mapping) + 1)
1030                self.variable_mapping[var] = replacement
1031            var = replacement
1032        # This is a nameless value, prepend check_prefix.
1033        if var.isdigit():
1034            var = check_prefix + var
1035        else:
1036            # This is a named value that clashes with the check_prefix, prepend with
1037            # _prefix_filecheck_ir_name, if it has been defined.
1038            if (
1039                may_clash_with_default_check_prefix_name(check_prefix, var)
1040                and _prefix_filecheck_ir_name
1041            ):
1042                var = _prefix_filecheck_ir_name + var
1043        var = var.replace(".", "_")
1044        var = var.replace("-", "_")
1045        return var.upper()
1046
1047    def get_affixes_from_match(self, match):
1048        prefix = re.match(self.ir_prefix, match.group(2)).group(0)
1049        suffix = re.search(self.ir_suffix + "$", match.group(2)).group(0)
1050        return prefix, suffix
1051
1052
1053class GeneralizerInfo:
1054    """
1055    A GeneralizerInfo object holds information about how check lines should be generalized
1056    (e.g., variable names replaced by FileCheck meta variables) as well as per-test-file
1057    state (e.g. information about IR global variables).
1058    """
1059
1060    MODE_IR = 0
1061    MODE_ASM = 1
1062    MODE_ANALYZE = 2
1063
1064    def __init__(
1065        self,
1066        version,
1067        mode,
1068        nameless_values: List[NamelessValue],
1069        regexp_prefix,
1070        regexp_suffix,
1071    ):
1072        self._version = version
1073        self._mode = mode
1074        self._nameless_values = nameless_values
1075
1076        self._regexp_prefix = regexp_prefix
1077        self._regexp_suffix = regexp_suffix
1078
1079        self._regexp, _ = self._build_regexp(False, False)
1080        (
1081            self._unstable_globals_regexp,
1082            self._unstable_globals_values,
1083        ) = self._build_regexp(True, True)
1084
1085    def _build_regexp(self, globals_only, unstable_only):
1086        matches = []
1087        values = []
1088        for nameless_value in self._nameless_values:
1089            is_global = nameless_value.global_ir_rhs_regexp is not None
1090            if globals_only and not is_global:
1091                continue
1092            if unstable_only and nameless_value.match_literally:
1093                continue
1094
1095            match = f"(?:{nameless_value.ir_prefix}({nameless_value.ir_regexp}){nameless_value.ir_suffix})"
1096            if self.is_ir() and not globals_only and is_global:
1097                match = "^" + match
1098            matches.append(match)
1099            values.append(nameless_value)
1100
1101        regexp_string = r"|".join(matches)
1102
1103        return (
1104            re.compile(
1105                self._regexp_prefix + r"(" + regexp_string + r")" + self._regexp_suffix
1106            ),
1107            values,
1108        )
1109
1110    def get_version(self):
1111        return self._version
1112
1113    def is_ir(self):
1114        return self._mode == GeneralizerInfo.MODE_IR
1115
1116    def is_asm(self):
1117        return self._mode == GeneralizerInfo.MODE_ASM
1118
1119    def is_analyze(self):
1120        return self._mode == GeneralizerInfo.MODE_ANALYZE
1121
1122    def get_nameless_values(self):
1123        return self._nameless_values
1124
1125    def get_regexp(self):
1126        return self._regexp
1127
1128    def get_unstable_globals_regexp(self):
1129        return self._unstable_globals_regexp
1130
1131    # The entire match is group 0, the prefix has one group (=1), the entire
1132    # IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start.
1133    FIRST_NAMELESS_GROUP_IN_MATCH = 3
1134
1135    def get_match_info(self, match):
1136        """
1137        Returns (name, nameless_value) for the given match object
1138        """
1139        if match.re == self._regexp:
1140            values = self._nameless_values
1141        else:
1142            match.re == self._unstable_globals_regexp
1143            values = self._unstable_globals_values
1144        for i in range(len(values)):
1145            g = match.group(i + GeneralizerInfo.FIRST_NAMELESS_GROUP_IN_MATCH)
1146            if g is not None:
1147                return g, values[i]
1148        error("Unable to identify the kind of IR value from the match!")
1149        return None, None
1150
1151    # See get_idx_from_match
1152    def get_name_from_match(self, match):
1153        return self.get_match_info(match)[0]
1154
1155    def get_nameless_value_from_match(self, match) -> NamelessValue:
1156        return self.get_match_info(match)[1]
1157
1158
1159def make_ir_generalizer(version):
1160    values = []
1161
1162    if version >= 5:
1163        values += [
1164            NamelessValue(r"BB", "%", r"label %", r"[\w$.-]+?", None),
1165            NamelessValue(r"BB", "%", r"^", r"[\w$.-]+?", None, ir_suffix=r":"),
1166        ]
1167
1168    values += [
1169        #            check_prefix   check_key  ir_prefix           ir_regexp                global_ir_rhs_regexp
1170        NamelessValue(r"TMP", "%", r"%", r"[\w$.-]+?", None),
1171        NamelessValue(r"ATTR", "#", r"#", r"[0-9]+", None),
1172        NamelessValue(r"ATTR", "#", r"attributes #", r"[0-9]+", r"{[^}]*}"),
1173        NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", None),
1174        NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", r".+", is_before_functions=True),
1175        NamelessValue(
1176            r"GLOBNAMED",
1177            "@",
1178            r"@",
1179            r"[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*",
1180            r".+",
1181            is_before_functions=True,
1182            match_literally=True,
1183            interlaced_with_previous=True,
1184        ),
1185        NamelessValue(r"DBG", "!", r"!dbg ", r"![0-9]+", None),
1186        NamelessValue(r"DIASSIGNID", "!", r"!DIAssignID ", r"![0-9]+", None),
1187        NamelessValue(r"PROF", "!", r"!prof ", r"![0-9]+", None),
1188        NamelessValue(r"TBAA", "!", r"!tbaa ", r"![0-9]+", None),
1189        NamelessValue(r"TBAA_STRUCT", "!", r"!tbaa.struct ", r"![0-9]+", None),
1190        NamelessValue(r"RNG", "!", r"!range ", r"![0-9]+", None),
1191        NamelessValue(r"LOOP", "!", r"!llvm.loop ", r"![0-9]+", None),
1192        NamelessValue(r"META", "!", r"", r"![0-9]+", r"(?:distinct |)!.*"),
1193        NamelessValue(r"ACC_GRP", "!", r"!llvm.access.group ", r"![0-9]+", None),
1194        NamelessValue(r"META", "!", r"![a-z.]+ ", r"![0-9]+", None),
1195        NamelessValue(r"META", "!", r"[, (]", r"![0-9]+", None),
1196    ]
1197
1198    prefix = r"(\s*)"
1199    suffix = r"([,\s\(\)\}]|\Z)"
1200
1201    # values = [
1202    #     nameless_value
1203    #     for nameless_value in IR_NAMELESS_VALUES
1204    #     if not (globals_only and nameless_value.global_ir_rhs_regexp is None) and
1205    #        not (unstable_ids_only and nameless_value.match_literally)
1206    # ]
1207
1208    return GeneralizerInfo(version, GeneralizerInfo.MODE_IR, values, prefix, suffix)
1209
1210
1211def make_asm_generalizer(version):
1212    values = [
1213        NamelessValue(
1214            r"MCINST",
1215            "Inst#",
1216            "<MCInst #",
1217            r"\d+",
1218            r".+",
1219            is_number=True,
1220            replace_number_with_counter=True,
1221        ),
1222        NamelessValue(
1223            r"MCREG",
1224            "Reg:",
1225            "<MCOperand Reg:",
1226            r"\d+",
1227            r".+",
1228            is_number=True,
1229            replace_number_with_counter=True,
1230        ),
1231    ]
1232
1233    prefix = r"((?:#|//)\s*)"
1234    suffix = r"([>\s]|\Z)"
1235
1236    return GeneralizerInfo(version, GeneralizerInfo.MODE_ASM, values, prefix, suffix)
1237
1238
1239def make_analyze_generalizer(version):
1240    values = [
1241        NamelessValue(
1242            r"GRP",
1243            "#",
1244            r"",
1245            r"0x[0-9a-f]+",
1246            None,
1247            replace_number_with_counter=True,
1248        ),
1249    ]
1250
1251    prefix = r"(\s*)"
1252    suffix = r"(\)?:)"
1253
1254    return GeneralizerInfo(
1255        version, GeneralizerInfo.MODE_ANALYZE, values, prefix, suffix
1256    )
1257
1258
1259# Return true if var clashes with the scripted FileCheck check_prefix.
1260def may_clash_with_default_check_prefix_name(check_prefix, var):
1261    return check_prefix and re.match(
1262        r"^" + check_prefix + r"[0-9]+?$", var, re.IGNORECASE
1263    )
1264
1265
1266def find_diff_matching(lhs: List[str], rhs: List[str]) -> List[tuple]:
1267    """
1268    Find a large ordered matching between strings in lhs and rhs.
1269
1270    Think of this as finding the *unchanged* lines in a diff, where the entries
1271    of lhs and rhs are lines of the files being diffed.
1272
1273    Returns a list of matched (lhs_idx, rhs_idx) pairs.
1274    """
1275
1276    if not lhs or not rhs:
1277        return []
1278
1279    # Collect matches in reverse order.
1280    matches = []
1281
1282    # First, collect a set of candidate matching edges. We limit this to a
1283    # constant multiple of the input size to avoid quadratic runtime.
1284    patterns = collections.defaultdict(lambda: ([], []))
1285
1286    for idx in range(len(lhs)):
1287        patterns[lhs[idx]][0].append(idx)
1288    for idx in range(len(rhs)):
1289        patterns[rhs[idx]][1].append(idx)
1290
1291    multiple_patterns = []
1292
1293    candidates = []
1294    for pattern in patterns.values():
1295        if not pattern[0] or not pattern[1]:
1296            continue
1297
1298        if len(pattern[0]) == len(pattern[1]) == 1:
1299            candidates.append((pattern[0][0], pattern[1][0]))
1300        else:
1301            multiple_patterns.append(pattern)
1302
1303    multiple_patterns.sort(key=lambda pattern: len(pattern[0]) * len(pattern[1]))
1304
1305    for pattern in multiple_patterns:
1306        if len(candidates) + len(pattern[0]) * len(pattern[1]) > 2 * (
1307            len(lhs) + len(rhs)
1308        ):
1309            break
1310        for lhs_idx in pattern[0]:
1311            for rhs_idx in pattern[1]:
1312                candidates.append((lhs_idx, rhs_idx))
1313
1314    if not candidates:
1315        # The LHS and RHS either share nothing in common, or lines are just too
1316        # identical. In that case, let's give up and not match anything.
1317        return []
1318
1319    # Compute a maximal crossing-free matching via an algorithm that is
1320    # inspired by a mixture of dynamic programming and line-sweeping in
1321    # discrete geometry.
1322    #
1323    # I would be surprised if this algorithm didn't exist somewhere in the
1324    # literature, but I found it without consciously recalling any
1325    # references, so you'll have to make do with the explanation below.
1326    # Sorry.
1327    #
1328    # The underlying graph is bipartite:
1329    #  - nodes on the LHS represent lines in the original check
1330    #  - nodes on the RHS represent lines in the new (updated) check
1331    #
1332    # Nodes are implicitly sorted by the corresponding line number.
1333    # Edges (unique_matches) are sorted by the line number on the LHS.
1334    #
1335    # Here's the geometric intuition for the algorithm.
1336    #
1337    #  * Plot the edges as points in the plane, with the original line
1338    #    number on the X axis and the updated line number on the Y axis.
1339    #  * The goal is to find a longest "chain" of points where each point
1340    #    is strictly above and to the right of the previous point.
1341    #  * The algorithm proceeds by sweeping a vertical line from left to
1342    #    right.
1343    #  * The algorithm maintains a table where `table[N]` answers the
1344    #    question "What is currently the 'best' way to build a chain of N+1
1345    #    points to the left of the vertical line". Here, 'best' means
1346    #    that the last point of the chain is a as low as possible (minimal
1347    #    Y coordinate).
1348    #   * `table[N]` is `(y, point_idx)` where `point_idx` is the index of
1349    #     the last point in the chain and `y` is its Y coordinate
1350    #   * A key invariant is that the Y values in the table are
1351    #     monotonically increasing
1352    #  * Thanks to these properties, the table can be used to answer the
1353    #    question "What is the longest chain that can be built to the left
1354    #    of the vertical line using only points below a certain Y value",
1355    #    using a binary search over the table.
1356    #  * The algorithm also builds a backlink structure in which every point
1357    #    links back to the previous point on a best (longest) chain ending
1358    #    at that point
1359    #
1360    # The core loop of the algorithm sweeps the line and updates the table
1361    # and backlink structure for every point that we cross during the sweep.
1362    # Therefore, the algorithm is trivially O(M log M) in the number of
1363    # points.
1364    candidates.sort(key=lambda candidate: (candidate[0], -candidate[1]))
1365
1366    backlinks = []
1367    table_rhs_idx = []
1368    table_candidate_idx = []
1369    for _, rhs_idx in candidates:
1370        candidate_idx = len(backlinks)
1371        ti = bisect.bisect_left(table_rhs_idx, rhs_idx)
1372
1373        # Update the table to record a best chain ending in the current point.
1374        # There always is one, and if any of the previously visited points had
1375        # a higher Y coordinate, then there is always a previously recorded best
1376        # chain that can be improved upon by using the current point.
1377        #
1378        # There is only one case where there is some ambiguity. If the
1379        # pre-existing entry table[ti] has the same Y coordinate / rhs_idx as
1380        # the current point (this can only happen if the same line appeared
1381        # multiple times on the LHS), then we could choose to keep the
1382        # previously recorded best chain instead. That would bias the algorithm
1383        # differently but should have no systematic impact on the quality of the
1384        # result.
1385        if ti < len(table_rhs_idx):
1386            table_rhs_idx[ti] = rhs_idx
1387            table_candidate_idx[ti] = candidate_idx
1388        else:
1389            table_rhs_idx.append(rhs_idx)
1390            table_candidate_idx.append(candidate_idx)
1391        if ti > 0:
1392            backlinks.append(table_candidate_idx[ti - 1])
1393        else:
1394            backlinks.append(None)
1395
1396    # Commit to names in the matching by walking the backlinks. Recursively
1397    # attempt to fill in more matches in-betweem.
1398    match_idx = table_candidate_idx[-1]
1399    while match_idx is not None:
1400        current = candidates[match_idx]
1401        matches.append(current)
1402        match_idx = backlinks[match_idx]
1403
1404    matches.reverse()
1405    return matches
1406
1407
1408VARIABLE_TAG = "[[@@]]"
1409METAVAR_RE = re.compile(r"\[\[([A-Z0-9_]+)(?::[^]]+)?\]\]")
1410NUMERIC_SUFFIX_RE = re.compile(r"[0-9]*$")
1411
1412
1413class TestVar:
1414    def __init__(self, nameless_value: NamelessValue, prefix: str, suffix: str):
1415        self._nameless_value = nameless_value
1416
1417        self._prefix = prefix
1418        self._suffix = suffix
1419
1420    def seen(self, nameless_value: NamelessValue, prefix: str, suffix: str):
1421        if prefix != self._prefix:
1422            self._prefix = ""
1423        if suffix != self._suffix:
1424            self._suffix = ""
1425
1426    def get_variable_name(self, text):
1427        return self._nameless_value.get_value_name(
1428            text, self._nameless_value.check_prefix
1429        )
1430
1431    def get_def(self, name, prefix, suffix):
1432        if self._nameless_value.is_number:
1433            return f"{prefix}[[#{name}:]]{suffix}"
1434        if self._prefix:
1435            assert self._prefix == prefix
1436            prefix = ""
1437        if self._suffix:
1438            assert self._suffix == suffix
1439            suffix = ""
1440        return f"{prefix}[[{name}:{self._prefix}{self._nameless_value.get_ir_regex()}{self._suffix}]]{suffix}"
1441
1442    def get_use(self, name, prefix, suffix):
1443        if self._nameless_value.is_number:
1444            return f"{prefix}[[#{name}]]{suffix}"
1445        if self._prefix:
1446            assert self._prefix == prefix
1447            prefix = ""
1448        if self._suffix:
1449            assert self._suffix == suffix
1450            suffix = ""
1451        return f"{prefix}[[{name}]]{suffix}"
1452
1453
1454class CheckValueInfo:
1455    def __init__(
1456        self,
1457        key,
1458        text,
1459        name: str,
1460        prefix: str,
1461        suffix: str,
1462    ):
1463        # Key for the value, e.g. '%'
1464        self.key = key
1465
1466        # Text to be matched by the FileCheck variable (without any prefix or suffix)
1467        self.text = text
1468
1469        # Name of the FileCheck variable
1470        self.name = name
1471
1472        # Prefix and suffix that were captured by the NamelessValue regular expression
1473        self.prefix = prefix
1474        self.suffix = suffix
1475
1476
1477# Represent a check line in a way that allows us to compare check lines while
1478# ignoring some or all of the FileCheck variable names.
1479class CheckLineInfo:
1480    def __init__(self, line, values):
1481        # Line with all FileCheck variable name occurrences replaced by VARIABLE_TAG
1482        self.line: str = line
1483
1484        # Information on each FileCheck variable name occurrences in the line
1485        self.values: List[CheckValueInfo] = values
1486
1487    def __repr__(self):
1488        return f"CheckLineInfo(line={self.line}, self.values={self.values})"
1489
1490
1491def remap_metavar_names(
1492    old_line_infos: List[CheckLineInfo],
1493    new_line_infos: List[CheckLineInfo],
1494    committed_names: Set[str],
1495) -> Mapping[str, str]:
1496    """
1497    Map all FileCheck variable names that appear in new_line_infos to new
1498    FileCheck variable names in an attempt to reduce the diff from old_line_infos
1499    to new_line_infos.
1500
1501    This is done by:
1502    * Matching old check lines and new check lines using a diffing algorithm
1503      applied after replacing names with wildcards.
1504    * Committing to variable names such that the matched lines become equal
1505      (without wildcards) if possible
1506    * This is done recursively to handle cases where many lines are equal
1507      after wildcard replacement
1508    """
1509    # Initialize uncommitted identity mappings
1510    new_mapping = {}
1511    for line in new_line_infos:
1512        for value in line.values:
1513            new_mapping[value.name] = value.name
1514
1515    # Recursively commit to the identity mapping or find a better one
1516    def recurse(old_begin, old_end, new_begin, new_end):
1517        if old_begin == old_end or new_begin == new_end:
1518            return
1519
1520        # Find a matching of lines where uncommitted names are replaced
1521        # with a placeholder.
1522        def diffify_line(line, mapper):
1523            values = []
1524            for value in line.values:
1525                mapped = mapper(value.name)
1526                values.append(mapped if mapped in committed_names else "?")
1527            return line.line.strip() + " @@@ " + " @ ".join(values)
1528
1529        lhs_lines = [
1530            diffify_line(line, lambda x: x)
1531            for line in old_line_infos[old_begin:old_end]
1532        ]
1533        rhs_lines = [
1534            diffify_line(line, lambda x: new_mapping[x])
1535            for line in new_line_infos[new_begin:new_end]
1536        ]
1537
1538        candidate_matches = find_diff_matching(lhs_lines, rhs_lines)
1539
1540        candidate_matches = [
1541            (old_begin + lhs_idx, new_begin + rhs_idx)
1542            for lhs_idx, rhs_idx in candidate_matches
1543        ]
1544
1545        # Candidate matches may conflict if they require conflicting mappings of
1546        # names. We want to determine a large set of compatible candidates,
1547        # because that leads to a small diff.
1548        #
1549        # We think of the candidates as vertices in a conflict graph. The
1550        # conflict graph has edges between incompatible candidates. We want to
1551        # find a large independent set in this graph.
1552        #
1553        # Greedily selecting candidates and removing incompatible ones has the
1554        # disadvantage that making few bad decisions early on can have huge
1555        # consequences.
1556        #
1557        # Instead, we implicitly compute multiple independent sets by greedily
1558        # assigning a *coloring* to the conflict graph. Then, we select the
1559        # largest color class (which is the largest independent set we found),
1560        # commit to all candidates in it, and recurse.
1561        #
1562        # Note that we don't actually materialize the conflict graph. Instead,
1563        # each color class tracks the information needed to decide implicitly
1564        # whether a vertex conflicts (has an edge to) any of the vertices added
1565        # to the color class so far.
1566        class Color:
1567            def __init__(self):
1568                # (lhs_idx, rhs_idx) of matches in this color
1569                self.matches = []
1570
1571                # rhs_name -> lhs_name mappings required by this color
1572                self.mapping = {}
1573
1574                # lhs_names committed for this color
1575                self.committed = set()
1576
1577        colors = []
1578
1579        for lhs_idx, rhs_idx in candidate_matches:
1580            lhs_line = old_line_infos[lhs_idx]
1581            rhs_line = new_line_infos[rhs_idx]
1582
1583            # We scan through the uncommitted names in the candidate line and
1584            # filter out the color classes to which the candidate could be
1585            # assigned.
1586            #
1587            # Simultaneously, we prepare a new color class in case the candidate
1588            # conflicts with all colors that have been established so far.
1589            compatible_colors = colors[:]
1590            new_color = Color()
1591            new_color.matches.append((lhs_idx, rhs_idx))
1592
1593            for lhs_value, rhs_value in zip(lhs_line.values, rhs_line.values):
1594                if new_mapping[rhs_value.name] in committed_names:
1595                    # The new value has already been committed. If it was mapped
1596                    # to the same name as the original value, we can consider
1597                    # committing other values from this line. Otherwise, we
1598                    # should ignore this line.
1599                    if new_mapping[rhs_value.name] == lhs_value.name:
1600                        continue
1601                    else:
1602                        break
1603
1604                if rhs_value.name in new_color.mapping:
1605                    # Same, but for a possible commit happening on the same line
1606                    if new_color.mapping[rhs_value.name] == lhs_value.name:
1607                        continue
1608                    else:
1609                        break
1610
1611                if (
1612                    lhs_value.name in committed_names
1613                    or lhs_value.name in new_color.committed
1614                ):
1615                    # We can't map this value because the name we would map it
1616                    # to has already been committed for something else. Give up
1617                    # on this line.
1618                    break
1619
1620                new_color.mapping[rhs_value.name] = lhs_value.name
1621                new_color.committed.add(lhs_value.name)
1622
1623                color_idx = 0
1624                while color_idx < len(compatible_colors):
1625                    color = compatible_colors[color_idx]
1626                    compatible = True
1627                    if rhs_value.name in color.mapping:
1628                        compatible = color.mapping[rhs_value.name] == lhs_value.name
1629                    else:
1630                        compatible = lhs_value.name not in color.committed
1631                    if compatible:
1632                        color_idx += 1
1633                    else:
1634                        del compatible_colors[color_idx]
1635            else:
1636                # We never broke out of the loop, which means that at a minimum,
1637                # this line is viable standalone
1638                if compatible_colors:
1639                    color = max(compatible_colors, key=lambda color: len(color.matches))
1640                    color.mapping.update(new_color.mapping)
1641                    color.committed.update(new_color.committed)
1642                    color.matches.append((lhs_idx, rhs_idx))
1643                else:
1644                    colors.append(new_color)
1645
1646        if colors:
1647            # Pick the largest color class. This gives us a large independent
1648            # (non-conflicting) set of candidate matches. Assign all names
1649            # required by the independent set and recurse.
1650            max_color = max(colors, key=lambda color: len(color.matches))
1651
1652            for rhs_var, lhs_var in max_color.mapping.items():
1653                new_mapping[rhs_var] = lhs_var
1654                committed_names.add(lhs_var)
1655
1656                if (
1657                    lhs_var != rhs_var
1658                    and lhs_var in new_mapping
1659                    and new_mapping[lhs_var] == lhs_var
1660                ):
1661                    new_mapping[lhs_var] = "conflict_" + lhs_var
1662
1663            matches = (
1664                [(old_begin - 1, new_begin - 1)]
1665                + max_color.matches
1666                + [(old_end, new_end)]
1667            )
1668
1669            for (lhs_prev, rhs_prev), (lhs_next, rhs_next) in zip(matches, matches[1:]):
1670                recurse(lhs_prev + 1, lhs_next, rhs_prev + 1, rhs_next)
1671
1672    recurse(0, len(old_line_infos), 0, len(new_line_infos))
1673
1674    # Commit to remaining names and resolve conflicts
1675    for new_name, mapped_name in new_mapping.items():
1676        if mapped_name in committed_names:
1677            continue
1678        if not mapped_name.startswith("conflict_"):
1679            assert mapped_name == new_name
1680            committed_names.add(mapped_name)
1681
1682    for new_name, mapped_name in new_mapping.items():
1683        if mapped_name in committed_names:
1684            continue
1685        assert mapped_name.startswith("conflict_")
1686
1687        m = NUMERIC_SUFFIX_RE.search(new_name)
1688        base_name = new_name[: m.start()]
1689        suffix = int(new_name[m.start() :]) if m.start() != m.end() else 1
1690        while True:
1691            candidate = f"{base_name}{suffix}"
1692            if candidate not in committed_names:
1693                new_mapping[new_name] = candidate
1694                committed_names.add(candidate)
1695                break
1696            suffix += 1
1697
1698    return new_mapping
1699
1700
1701def generalize_check_lines(
1702    lines,
1703    ginfo: GeneralizerInfo,
1704    vars_seen,
1705    global_vars_seen,
1706    preserve_names=False,
1707    original_check_lines=None,
1708    *,
1709    unstable_globals_only=False,
1710):
1711    if unstable_globals_only:
1712        regexp = ginfo.get_unstable_globals_regexp()
1713    else:
1714        regexp = ginfo.get_regexp()
1715
1716    multiple_braces_re = re.compile(r"({{+)|(}}+)")
1717
1718    def escape_braces(match_obj):
1719        return "{{" + re.escape(match_obj.group(0)) + "}}"
1720
1721    if ginfo.is_ir():
1722        for i, line in enumerate(lines):
1723            # An IR variable named '%.' matches the FileCheck regex string.
1724            line = line.replace("%.", "%dot")
1725            for regex in _global_hex_value_regex:
1726                if re.match("^@" + regex + " = ", line):
1727                    line = re.sub(
1728                        r"\bi([0-9]+) ([0-9]+)",
1729                        lambda m: "i"
1730                        + m.group(1)
1731                        + " [[#"
1732                        + hex(int(m.group(2)))
1733                        + "]]",
1734                        line,
1735                    )
1736                    break
1737            # Ignore any comments, since the check lines will too.
1738            scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line)
1739            lines[i] = scrubbed_line
1740
1741    if not preserve_names:
1742        committed_names = set(
1743            test_var.get_variable_name(name)
1744            for (name, _), test_var in vars_seen.items()
1745        )
1746        defs = set()
1747
1748        # Collect information about new check lines, and generalize global reference
1749        new_line_infos = []
1750        for line in lines:
1751            filtered_line = ""
1752            values = []
1753            while True:
1754                m = regexp.search(line)
1755                if m is None:
1756                    filtered_line += line
1757                    break
1758
1759                name = ginfo.get_name_from_match(m)
1760                nameless_value = ginfo.get_nameless_value_from_match(m)
1761                prefix, suffix = nameless_value.get_affixes_from_match(m)
1762                if may_clash_with_default_check_prefix_name(
1763                    nameless_value.check_prefix, name
1764                ):
1765                    warn(
1766                        "Change IR value name '%s' or use --prefix-filecheck-ir-name to prevent possible conflict"
1767                        " with scripted FileCheck name." % (name,)
1768                    )
1769
1770                # Record the variable as seen and (for locals) accumulate
1771                # prefixes/suffixes
1772                is_local_def = nameless_value.is_local_def_ir_value()
1773                if is_local_def:
1774                    vars_dict = vars_seen
1775                else:
1776                    vars_dict = global_vars_seen
1777
1778                key = (name, nameless_value.check_key)
1779
1780                if is_local_def:
1781                    test_prefix = prefix
1782                    test_suffix = suffix
1783                else:
1784                    test_prefix = ""
1785                    test_suffix = ""
1786
1787                if key in vars_dict:
1788                    vars_dict[key].seen(nameless_value, test_prefix, test_suffix)
1789                else:
1790                    vars_dict[key] = TestVar(nameless_value, test_prefix, test_suffix)
1791                    defs.add(key)
1792
1793                var = vars_dict[key].get_variable_name(name)
1794
1795                # Replace with a [[@@]] tag, but be sure to keep the spaces and commas.
1796                filtered_line += (
1797                    line[: m.start()] + m.group(1) + VARIABLE_TAG + m.group(m.lastindex)
1798                )
1799                line = line[m.end() :]
1800
1801                values.append(
1802                    CheckValueInfo(
1803                        key=nameless_value.check_key,
1804                        text=name,
1805                        name=var,
1806                        prefix=prefix,
1807                        suffix=suffix,
1808                    )
1809                )
1810
1811            new_line_infos.append(CheckLineInfo(filtered_line, values))
1812
1813        committed_names.update(
1814            test_var.get_variable_name(name)
1815            for (name, _), test_var in global_vars_seen.items()
1816        )
1817
1818        # Collect information about original check lines, if any.
1819        orig_line_infos = []
1820        for line in original_check_lines or []:
1821            filtered_line = ""
1822            values = []
1823            while True:
1824                m = METAVAR_RE.search(line)
1825                if m is None:
1826                    filtered_line += line
1827                    break
1828
1829                # Replace with a [[@@]] tag, but be sure to keep the spaces and commas.
1830                filtered_line += line[: m.start()] + VARIABLE_TAG
1831                line = line[m.end() :]
1832                values.append(
1833                    CheckValueInfo(
1834                        key=None,
1835                        text=None,
1836                        name=m.group(1),
1837                        prefix="",
1838                        suffix="",
1839                    )
1840                )
1841            orig_line_infos.append(CheckLineInfo(filtered_line, values))
1842
1843        # Compute the variable name mapping
1844        mapping = remap_metavar_names(orig_line_infos, new_line_infos, committed_names)
1845
1846        # Apply the variable name mapping
1847        for i, line_info in enumerate(new_line_infos):
1848            line_template = line_info.line
1849            line = ""
1850
1851            for value in line_info.values:
1852                idx = line_template.find(VARIABLE_TAG)
1853                line += line_template[:idx]
1854                line_template = line_template[idx + len(VARIABLE_TAG) :]
1855
1856                key = (value.text, value.key)
1857                if value.key == "%":
1858                    vars_dict = vars_seen
1859                else:
1860                    vars_dict = global_vars_seen
1861
1862                if key in defs:
1863                    line += vars_dict[key].get_def(
1864                        mapping[value.name], value.prefix, value.suffix
1865                    )
1866                    defs.remove(key)
1867                else:
1868                    line += vars_dict[key].get_use(
1869                        mapping[value.name], value.prefix, value.suffix
1870                    )
1871
1872            line += line_template
1873
1874            lines[i] = line
1875
1876    if ginfo.is_analyze():
1877        for i, _ in enumerate(lines):
1878            # Escape multiple {{ or }} as {{}} denotes a FileCheck regex.
1879            scrubbed_line = multiple_braces_re.sub(escape_braces, lines[i])
1880            lines[i] = scrubbed_line
1881
1882    return lines
1883
1884
1885def add_checks(
1886    output_lines,
1887    comment_marker,
1888    prefix_list,
1889    func_dict,
1890    func_name,
1891    check_label_format,
1892    ginfo,
1893    global_vars_seen_dict,
1894    is_filtered,
1895    preserve_names=False,
1896    original_check_lines: Mapping[str, List[str]] = {},
1897):
1898    # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well.
1899    prefix_exclusions = set()
1900    printed_prefixes = []
1901    for p in prefix_list:
1902        checkprefixes = p[0]
1903        # If not all checkprefixes of this run line produced the function we cannot check for it as it does not
1904        # exist for this run line. A subset of the check prefixes might know about the function but only because
1905        # other run lines created it.
1906        if any(
1907            map(
1908                lambda checkprefix: func_name not in func_dict[checkprefix],
1909                checkprefixes,
1910            )
1911        ):
1912            prefix_exclusions |= set(checkprefixes)
1913            continue
1914
1915    # prefix_exclusions is constructed, we can now emit the output
1916    for p in prefix_list:
1917        global_vars_seen = {}
1918        checkprefixes = p[0]
1919        for checkprefix in checkprefixes:
1920            if checkprefix in global_vars_seen_dict:
1921                global_vars_seen.update(global_vars_seen_dict[checkprefix])
1922            else:
1923                global_vars_seen_dict[checkprefix] = {}
1924            if checkprefix in printed_prefixes:
1925                break
1926
1927            # Check if the prefix is excluded.
1928            if checkprefix in prefix_exclusions:
1929                continue
1930
1931            # If we do not have output for this prefix we skip it.
1932            if not func_dict[checkprefix][func_name]:
1933                continue
1934
1935            # Add some space between different check prefixes, but not after the last
1936            # check line (before the test code).
1937            if ginfo.is_asm():
1938                if len(printed_prefixes) != 0:
1939                    output_lines.append(comment_marker)
1940
1941            if checkprefix not in global_vars_seen_dict:
1942                global_vars_seen_dict[checkprefix] = {}
1943
1944            global_vars_seen_before = [key for key in global_vars_seen.keys()]
1945
1946            vars_seen = {}
1947            printed_prefixes.append(checkprefix)
1948            attrs = str(func_dict[checkprefix][func_name].attrs)
1949            attrs = "" if attrs == "None" else attrs
1950            if ginfo.get_version() > 1:
1951                funcdef_attrs_and_ret = func_dict[checkprefix][
1952                    func_name
1953                ].funcdef_attrs_and_ret
1954            else:
1955                funcdef_attrs_and_ret = ""
1956
1957            if attrs:
1958                output_lines.append(
1959                    "%s %s: Function Attrs: %s" % (comment_marker, checkprefix, attrs)
1960                )
1961            args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig)
1962            if args_and_sig:
1963                args_and_sig = generalize_check_lines(
1964                    [args_and_sig],
1965                    ginfo,
1966                    vars_seen,
1967                    global_vars_seen,
1968                    preserve_names,
1969                    original_check_lines=[],
1970                )[0]
1971            func_name_separator = func_dict[checkprefix][func_name].func_name_separator
1972            if "[[" in args_and_sig:
1973                # Captures in label lines are not supported, thus split into a -LABEL
1974                # and a separate -SAME line that contains the arguments with captures.
1975                args_and_sig_prefix = ""
1976                if ginfo.get_version() >= 3 and args_and_sig.startswith("("):
1977                    # Ensure the "(" separating function name and arguments is in the
1978                    # label line. This is required in case of function names that are
1979                    # prefixes of each other. Otherwise, the label line for "foo" might
1980                    # incorrectly match on "foo.specialized".
1981                    args_and_sig_prefix = args_and_sig[0]
1982                    args_and_sig = args_and_sig[1:]
1983
1984                # Removing args_and_sig from the label match line requires
1985                # func_name_separator to be empty. Otherwise, the match will not work.
1986                assert func_name_separator == ""
1987                output_lines.append(
1988                    check_label_format
1989                    % (
1990                        checkprefix,
1991                        funcdef_attrs_and_ret,
1992                        func_name,
1993                        args_and_sig_prefix,
1994                        func_name_separator,
1995                    )
1996                )
1997                output_lines.append(
1998                    "%s %s-SAME: %s" % (comment_marker, checkprefix, args_and_sig)
1999                )
2000            else:
2001                output_lines.append(
2002                    check_label_format
2003                    % (
2004                        checkprefix,
2005                        funcdef_attrs_and_ret,
2006                        func_name,
2007                        args_and_sig,
2008                        func_name_separator,
2009                    )
2010                )
2011            func_body = str(func_dict[checkprefix][func_name]).splitlines()
2012            if not func_body:
2013                # We have filtered everything.
2014                continue
2015
2016            # For ASM output, just emit the check lines.
2017            if ginfo.is_asm():
2018                body_start = 1
2019                if is_filtered:
2020                    # For filtered output we don't add "-NEXT" so don't add extra spaces
2021                    # before the first line.
2022                    body_start = 0
2023                else:
2024                    output_lines.append(
2025                        "%s %s:       %s" % (comment_marker, checkprefix, func_body[0])
2026                    )
2027                func_lines = generalize_check_lines(
2028                    func_body[body_start:], ginfo, vars_seen, global_vars_seen
2029                )
2030                for func_line in func_lines:
2031                    if func_line.strip() == "":
2032                        output_lines.append(
2033                            "%s %s-EMPTY:" % (comment_marker, checkprefix)
2034                        )
2035                    else:
2036                        check_suffix = "-NEXT" if not is_filtered else ""
2037                        output_lines.append(
2038                            "%s %s%s:  %s"
2039                            % (comment_marker, checkprefix, check_suffix, func_line)
2040                        )
2041                # Remember new global variables we have not seen before
2042                for key in global_vars_seen:
2043                    if key not in global_vars_seen_before:
2044                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2045                break
2046            # For analyze output, generalize the output, and emit CHECK-EMPTY lines as well.
2047            elif ginfo.is_analyze():
2048                func_body = generalize_check_lines(
2049                    func_body, ginfo, vars_seen, global_vars_seen
2050                )
2051                for func_line in func_body:
2052                    if func_line.strip() == "":
2053                        output_lines.append(
2054                            "{} {}-EMPTY:".format(comment_marker, checkprefix)
2055                        )
2056                    else:
2057                        check_suffix = "-NEXT" if not is_filtered else ""
2058                        output_lines.append(
2059                            "{} {}{}:  {}".format(
2060                                comment_marker, checkprefix, check_suffix, func_line
2061                            )
2062                        )
2063
2064                # Add space between different check prefixes and also before the first
2065                # line of code in the test function.
2066                output_lines.append(comment_marker)
2067
2068                # Remember new global variables we have not seen before
2069                for key in global_vars_seen:
2070                    if key not in global_vars_seen_before:
2071                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2072                break
2073            # For IR output, change all defs to FileCheck variables, so we're immune
2074            # to variable naming fashions.
2075            else:
2076                func_body = generalize_check_lines(
2077                    func_body,
2078                    ginfo,
2079                    vars_seen,
2080                    global_vars_seen,
2081                    preserve_names,
2082                    original_check_lines=original_check_lines.get(checkprefix),
2083                )
2084
2085                # This could be selectively enabled with an optional invocation argument.
2086                # Disabled for now: better to check everything. Be safe rather than sorry.
2087
2088                # Handle the first line of the function body as a special case because
2089                # it's often just noise (a useless asm comment or entry label).
2090                # if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
2091                #  is_blank_line = True
2092                # else:
2093                #  output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
2094                #  is_blank_line = False
2095
2096                is_blank_line = False
2097
2098                for func_line in func_body:
2099                    if func_line.strip() == "":
2100                        is_blank_line = True
2101                        continue
2102                    # Do not waste time checking IR comments.
2103                    func_line = SCRUB_IR_COMMENT_RE.sub(r"", func_line)
2104
2105                    # Skip blank lines instead of checking them.
2106                    if is_blank_line:
2107                        output_lines.append(
2108                            "{} {}:       {}".format(
2109                                comment_marker, checkprefix, func_line
2110                            )
2111                        )
2112                    else:
2113                        check_suffix = "-NEXT" if not is_filtered else ""
2114                        output_lines.append(
2115                            "{} {}{}:  {}".format(
2116                                comment_marker, checkprefix, check_suffix, func_line
2117                            )
2118                        )
2119                    is_blank_line = False
2120
2121                # Add space between different check prefixes and also before the first
2122                # line of code in the test function.
2123                output_lines.append(comment_marker)
2124
2125                # Remember new global variables we have not seen before
2126                for key in global_vars_seen:
2127                    if key not in global_vars_seen_before:
2128                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2129                break
2130    return printed_prefixes
2131
2132
2133def add_ir_checks(
2134    output_lines,
2135    comment_marker,
2136    prefix_list,
2137    func_dict,
2138    func_name,
2139    preserve_names,
2140    function_sig,
2141    ginfo: GeneralizerInfo,
2142    global_vars_seen_dict,
2143    is_filtered,
2144    original_check_lines={},
2145):
2146    assert ginfo.is_ir()
2147    # Label format is based on IR string.
2148    if function_sig and ginfo.get_version() > 1:
2149        function_def_regex = "define %s"
2150    elif function_sig:
2151        function_def_regex = "define {{[^@]+}}%s"
2152    else:
2153        function_def_regex = "%s"
2154    check_label_format = "{} %s-LABEL: {}@%s%s%s".format(
2155        comment_marker, function_def_regex
2156    )
2157    return add_checks(
2158        output_lines,
2159        comment_marker,
2160        prefix_list,
2161        func_dict,
2162        func_name,
2163        check_label_format,
2164        ginfo,
2165        global_vars_seen_dict,
2166        is_filtered,
2167        preserve_names,
2168        original_check_lines=original_check_lines,
2169    )
2170
2171
2172def add_analyze_checks(
2173    output_lines,
2174    comment_marker,
2175    prefix_list,
2176    func_dict,
2177    func_name,
2178    ginfo: GeneralizerInfo,
2179    is_filtered,
2180):
2181    assert ginfo.is_analyze()
2182    check_label_format = "{} %s-LABEL: '%s%s%s%s'".format(comment_marker)
2183    global_vars_seen_dict = {}
2184    return add_checks(
2185        output_lines,
2186        comment_marker,
2187        prefix_list,
2188        func_dict,
2189        func_name,
2190        check_label_format,
2191        ginfo,
2192        global_vars_seen_dict,
2193        is_filtered,
2194    )
2195
2196
2197def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes, ginfo):
2198    for nameless_value in ginfo.get_nameless_values():
2199        if nameless_value.global_ir_rhs_regexp is None:
2200            continue
2201
2202        lhs_re_str = nameless_value.ir_prefix + nameless_value.ir_regexp
2203        rhs_re_str = nameless_value.global_ir_rhs_regexp
2204
2205        global_ir_value_re_str = r"^" + lhs_re_str + r"\s=\s" + rhs_re_str + r"$"
2206        global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M))
2207        lines = []
2208        for m in global_ir_value_re.finditer(raw_tool_output):
2209            # Attach the substring's start index so that CHECK lines
2210            # can be sorted properly even if they are matched by different nameless values.
2211            # This is relevant for GLOB and GLOBNAMED since they may appear interlaced.
2212            lines.append((m.start(), m.group(0)))
2213
2214        for prefix in prefixes:
2215            if glob_val_dict[prefix] is None:
2216                continue
2217            if nameless_value.check_prefix in glob_val_dict[prefix]:
2218                if lines == glob_val_dict[prefix][nameless_value.check_prefix]:
2219                    continue
2220                if prefix == prefixes[-1]:
2221                    warn("Found conflicting asm under the same prefix: %r!" % (prefix,))
2222                else:
2223                    glob_val_dict[prefix][nameless_value.check_prefix] = None
2224                    continue
2225            glob_val_dict[prefix][nameless_value.check_prefix] = lines
2226
2227
2228def filter_globals_according_to_preference(
2229    global_val_lines_w_index, global_vars_seen, nameless_value, global_check_setting
2230):
2231    if global_check_setting == "none":
2232        return []
2233    if global_check_setting == "all":
2234        return global_val_lines_w_index
2235    assert global_check_setting == "smart"
2236
2237    if nameless_value.check_key == "#":
2238        # attribute sets are usually better checked by --check-attributes
2239        return []
2240
2241    def extract(line, nv):
2242        p = (
2243            "^"
2244            + nv.ir_prefix
2245            + "("
2246            + nv.ir_regexp
2247            + ") = ("
2248            + nv.global_ir_rhs_regexp
2249            + ")"
2250        )
2251        match = re.match(p, line)
2252        return (match.group(1), re.findall(nv.ir_regexp, match.group(2)))
2253
2254    transitively_visible = set()
2255    contains_refs_to = {}
2256
2257    def add(var):
2258        nonlocal transitively_visible
2259        nonlocal contains_refs_to
2260        if var in transitively_visible:
2261            return
2262        transitively_visible.add(var)
2263        if not var in contains_refs_to:
2264            return
2265        for x in contains_refs_to[var]:
2266            add(x)
2267
2268    for i, line in global_val_lines_w_index:
2269        (var, refs) = extract(line, nameless_value)
2270        contains_refs_to[var] = refs
2271    for var, check_key in global_vars_seen:
2272        if check_key != nameless_value.check_key:
2273            continue
2274        add(var)
2275    return [
2276        (i, line)
2277        for i, line in global_val_lines_w_index
2278        if extract(line, nameless_value)[0] in transitively_visible
2279    ]
2280
2281
2282METADATA_FILTERS = [
2283    (
2284        r"(?<=\")(.+ )?(\w+ version )[\d.]+(?:[^\" ]*)(?: \([^)]+\))?",
2285        r"{{.*}}\2{{.*}}",
2286    ),  # preface with glob also, to capture optional CLANG_VENDOR
2287    (r'(!DIFile\(filename: ".+", directory: )".+"', r"\1{{.*}}"),
2288]
2289METADATA_FILTERS_RE = [(re.compile(f), r) for (f, r) in METADATA_FILTERS]
2290
2291
2292def filter_unstable_metadata(line):
2293    for f, replacement in METADATA_FILTERS_RE:
2294        line = f.sub(replacement, line)
2295    return line
2296
2297
2298def flush_current_checks(output_lines, new_lines_w_index, comment_marker):
2299    if not new_lines_w_index:
2300        return
2301    output_lines.append(comment_marker + SEPARATOR)
2302    new_lines_w_index.sort()
2303    for _, line in new_lines_w_index:
2304        output_lines.append(line)
2305    new_lines_w_index.clear()
2306
2307
2308def add_global_checks(
2309    glob_val_dict,
2310    comment_marker,
2311    prefix_list,
2312    output_lines,
2313    ginfo: GeneralizerInfo,
2314    global_vars_seen_dict,
2315    preserve_names,
2316    is_before_functions,
2317    global_check_setting,
2318):
2319    printed_prefixes = set()
2320    output_lines_loc = {}  # Allows GLOB and GLOBNAMED to be sorted correctly
2321    for nameless_value in ginfo.get_nameless_values():
2322        if nameless_value.global_ir_rhs_regexp is None:
2323            continue
2324        if nameless_value.is_before_functions != is_before_functions:
2325            continue
2326        for p in prefix_list:
2327            global_vars_seen = {}
2328            checkprefixes = p[0]
2329            if checkprefixes is None:
2330                continue
2331            for checkprefix in checkprefixes:
2332                if checkprefix in global_vars_seen_dict:
2333                    global_vars_seen.update(global_vars_seen_dict[checkprefix])
2334                else:
2335                    global_vars_seen_dict[checkprefix] = {}
2336                if (checkprefix, nameless_value.check_prefix) in printed_prefixes:
2337                    break
2338                if not glob_val_dict[checkprefix]:
2339                    continue
2340                if nameless_value.check_prefix not in glob_val_dict[checkprefix]:
2341                    continue
2342                if not glob_val_dict[checkprefix][nameless_value.check_prefix]:
2343                    continue
2344
2345                check_lines = []
2346                global_vars_seen_before = [key for key in global_vars_seen.keys()]
2347                lines_w_index = glob_val_dict[checkprefix][nameless_value.check_prefix]
2348                lines_w_index = filter_globals_according_to_preference(
2349                    lines_w_index,
2350                    global_vars_seen_before,
2351                    nameless_value,
2352                    global_check_setting,
2353                )
2354                for i, line in lines_w_index:
2355                    if _global_value_regex:
2356                        matched = False
2357                        for regex in _global_value_regex:
2358                            if re.match("^@" + regex + " = ", line) or re.match(
2359                                "^!" + regex + " = ", line
2360                            ):
2361                                matched = True
2362                                break
2363                        if not matched:
2364                            continue
2365                    [new_line] = generalize_check_lines(
2366                        [line],
2367                        ginfo,
2368                        {},
2369                        global_vars_seen,
2370                        preserve_names,
2371                        unstable_globals_only=True,
2372                    )
2373                    new_line = filter_unstable_metadata(new_line)
2374                    check_line = "%s %s: %s" % (comment_marker, checkprefix, new_line)
2375                    check_lines.append((i, check_line))
2376                if not check_lines:
2377                    continue
2378
2379                if not checkprefix in output_lines_loc:
2380                    output_lines_loc[checkprefix] = []
2381                if not nameless_value.interlaced_with_previous:
2382                    flush_current_checks(
2383                        output_lines, output_lines_loc[checkprefix], comment_marker
2384                    )
2385                for check_line in check_lines:
2386                    output_lines_loc[checkprefix].append(check_line)
2387
2388                printed_prefixes.add((checkprefix, nameless_value.check_prefix))
2389
2390                # Remembe new global variables we have not seen before
2391                for key in global_vars_seen:
2392                    if key not in global_vars_seen_before:
2393                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2394                break
2395
2396    if printed_prefixes:
2397        for p in prefix_list:
2398            if p[0] is None:
2399                continue
2400            for checkprefix in p[0]:
2401                if checkprefix not in output_lines_loc:
2402                    continue
2403                flush_current_checks(
2404                    output_lines, output_lines_loc[checkprefix], comment_marker
2405                )
2406                break
2407        output_lines.append(comment_marker + SEPARATOR)
2408    return printed_prefixes
2409
2410
2411def check_prefix(prefix):
2412    if not PREFIX_RE.match(prefix):
2413        hint = ""
2414        if "," in prefix:
2415            hint = " Did you mean '--check-prefixes=" + prefix + "'?"
2416        warn(
2417            (
2418                "Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores."
2419                + hint
2420            )
2421            % (prefix)
2422        )
2423
2424
2425def get_check_prefixes(filecheck_cmd):
2426    check_prefixes = [
2427        item
2428        for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
2429        for item in m.group(1).split(",")
2430    ]
2431    if not check_prefixes:
2432        check_prefixes = ["CHECK"]
2433    return check_prefixes
2434
2435
2436def verify_filecheck_prefixes(fc_cmd):
2437    fc_cmd_parts = fc_cmd.split()
2438    for part in fc_cmd_parts:
2439        if "check-prefix=" in part:
2440            prefix = part.split("=", 1)[1]
2441            check_prefix(prefix)
2442        elif "check-prefixes=" in part:
2443            prefixes = part.split("=", 1)[1].split(",")
2444            for prefix in prefixes:
2445                check_prefix(prefix)
2446                if prefixes.count(prefix) > 1:
2447                    warn(
2448                        "Supplied prefix '%s' is not unique in the prefix list."
2449                        % (prefix,)
2450                    )
2451
2452
2453def get_autogennote_suffix(parser, args):
2454    autogenerated_note_args = ""
2455    for action in parser._actions:
2456        if not hasattr(args, action.dest):
2457            continue  # Ignore options such as --help that aren't included in args
2458        # Ignore parameters such as paths to the binary or the list of tests
2459        if action.dest in (
2460            "tests",
2461            "update_only",
2462            "tool_binary",
2463            "opt_binary",
2464            "llc_binary",
2465            "clang",
2466            "opt",
2467            "llvm_bin",
2468            "verbose",
2469            "force_update",
2470            "reset_variable_names",
2471        ):
2472            continue
2473        value = getattr(args, action.dest)
2474        if action.dest == "check_globals":
2475            default_value = "none" if args.version < 4 else "smart"
2476            if value == default_value:
2477                continue
2478            autogenerated_note_args += action.option_strings[0] + " "
2479            if args.version < 4 and value == "all":
2480                continue
2481            autogenerated_note_args += "%s " % value
2482            continue
2483        if action.const is not None:  # action stores a constant (usually True/False)
2484            # Skip actions with different constant values (this happens with boolean
2485            # --foo/--no-foo options)
2486            if value != action.const:
2487                continue
2488        if parser.get_default(action.dest) == value:
2489            continue  # Don't add default values
2490        if action.dest == "function_signature" and args.version >= 2:
2491            continue  # Enabled by default in version 2
2492        if action.dest == "filters":
2493            # Create a separate option for each filter element.  The value is a list
2494            # of Filter objects.
2495            for elem in value:
2496                opt_name = "filter-out" if elem.is_filter_out else "filter"
2497                opt_value = elem.pattern()
2498                new_arg = '--%s "%s" ' % (opt_name, opt_value.strip('"'))
2499                if new_arg not in autogenerated_note_args:
2500                    autogenerated_note_args += new_arg
2501        else:
2502            autogenerated_note_args += action.option_strings[0] + " "
2503            if action.const is None:  # action takes a parameter
2504                if action.nargs == "+":
2505                    value = " ".join(map(lambda v: '"' + v.strip('"') + '"', value))
2506                autogenerated_note_args += "%s " % value
2507    if autogenerated_note_args:
2508        autogenerated_note_args = " %s %s" % (
2509            UTC_ARGS_KEY,
2510            autogenerated_note_args[:-1],
2511        )
2512    return autogenerated_note_args
2513
2514
2515def check_for_command(line, parser, args, argv, argparse_callback):
2516    cmd_m = UTC_ARGS_CMD.match(line)
2517    if cmd_m:
2518        for option in shlex.split(cmd_m.group("cmd").strip()):
2519            if option:
2520                argv.append(option)
2521        args = parse_args(parser, filter(lambda arg: arg not in args.tests, argv))
2522        if argparse_callback is not None:
2523            argparse_callback(args)
2524    return args, argv
2525
2526
2527def find_arg_in_test(test_info, get_arg_to_check, arg_string, is_global):
2528    result = get_arg_to_check(test_info.args)
2529    if not result and is_global:
2530        # See if this has been specified via UTC_ARGS.  This is a "global" option
2531        # that affects the entire generation of test checks.  If it exists anywhere
2532        # in the test, apply it to everything.
2533        saw_line = False
2534        for line_info in test_info.ro_iterlines():
2535            line = line_info.line
2536            if not line.startswith(";") and line.strip() != "":
2537                saw_line = True
2538            result = get_arg_to_check(line_info.args)
2539            if result:
2540                if warn and saw_line:
2541                    # We saw the option after already reading some test input lines.
2542                    # Warn about it.
2543                    print(
2544                        "WARNING: Found {} in line following test start: ".format(
2545                            arg_string
2546                        )
2547                        + line,
2548                        file=sys.stderr,
2549                    )
2550                    print(
2551                        "WARNING: Consider moving {} to top of file".format(arg_string),
2552                        file=sys.stderr,
2553                    )
2554                break
2555    return result
2556
2557
2558def dump_input_lines(output_lines, test_info, prefix_set, comment_string):
2559    for input_line_info in test_info.iterlines(output_lines):
2560        line = input_line_info.line
2561        args = input_line_info.args
2562        if line.strip() == comment_string:
2563            continue
2564        if line.strip() == comment_string + SEPARATOR:
2565            continue
2566        if line.lstrip().startswith(comment_string):
2567            m = CHECK_RE.match(line)
2568            if m and m.group(1) in prefix_set:
2569                continue
2570        output_lines.append(line.rstrip("\n"))
2571
2572
2573def add_checks_at_end(
2574    output_lines, prefix_list, func_order, comment_string, check_generator
2575):
2576    added = set()
2577    generated_prefixes = set()
2578    for prefix in prefix_list:
2579        prefixes = prefix[0]
2580        tool_args = prefix[1]
2581        for prefix in prefixes:
2582            for func in func_order[prefix]:
2583                # The func order can contain the same functions multiple times.
2584                # If we see one again we are done.
2585                if (func, prefix) in added:
2586                    continue
2587                if added:
2588                    output_lines.append(comment_string)
2589
2590                # The add_*_checks routines expect a run list whose items are
2591                # tuples that have a list of prefixes as their first element and
2592                # tool command args string as their second element.  They output
2593                # checks for each prefix in the list of prefixes.  By doing so, it
2594                # implicitly assumes that for each function every run line will
2595                # generate something for that function.  That is not the case for
2596                # generated functions as some run lines might not generate them
2597                # (e.g. -fopenmp vs. no -fopenmp).
2598                #
2599                # Therefore, pass just the prefix we're interested in.  This has
2600                # the effect of generating all of the checks for functions of a
2601                # single prefix before moving on to the next prefix.  So checks
2602                # are ordered by prefix instead of by function as in "normal"
2603                # mode.
2604                for generated_prefix in check_generator(
2605                    output_lines, [([prefix], tool_args)], func
2606                ):
2607                    added.add((func, generated_prefix))
2608                    generated_prefixes.add(generated_prefix)
2609    return generated_prefixes
2610