xref: /llvm-project/llvm/utils/UpdateTestChecks/common.py (revision 02debcef12793b5bf926a41a24ede18dc50eb18b)
1import argparse
2import bisect
3import collections
4import copy
5import glob
6import os
7import re
8import subprocess
9import sys
10import shlex
11
12from typing import List, Mapping, Set
13
14##### Common utilities for update_*test_checks.py
15
16
17_verbose = False
18_prefix_filecheck_ir_name = ""
19
20"""
21Version changelog:
22
231: Initial version, used by tests that don't specify --version explicitly.
242: --function-signature is now enabled by default and also checks return
25   type/attributes.
263: Opening parenthesis of function args is kept on the first LABEL line
27   in case arguments are split to a separate SAME line.
284: --check-globals now has a third option ('smart'). The others are now called
29   'none' and 'all'. 'smart' is the default.
305: Basic block labels are matched by FileCheck expressions
31"""
32DEFAULT_VERSION = 5
33
34
35SUPPORTED_ANALYSES = {
36    "Branch Probability Analysis",
37    "Cost Model Analysis",
38    "Loop Access Analysis",
39    "Scalar Evolution Analysis",
40}
41
42
43class Regex(object):
44    """Wrap a compiled regular expression object to allow deep copy of a regexp.
45    This is required for the deep copy done in do_scrub.
46
47    """
48
49    def __init__(self, regex):
50        self.regex = regex
51
52    def __deepcopy__(self, memo):
53        result = copy.copy(self)
54        result.regex = self.regex
55        return result
56
57    def search(self, line):
58        return self.regex.search(line)
59
60    def sub(self, repl, line):
61        return self.regex.sub(repl, line)
62
63    def pattern(self):
64        return self.regex.pattern
65
66    def flags(self):
67        return self.regex.flags
68
69
70class Filter(Regex):
71    """Augment a Regex object with a flag indicating whether a match should be
72    added (!is_filter_out) or removed (is_filter_out) from the generated checks.
73
74    """
75
76    def __init__(self, regex, is_filter_out):
77        super(Filter, self).__init__(regex)
78        self.is_filter_out = is_filter_out
79
80    def __deepcopy__(self, memo):
81        result = copy.deepcopy(super(Filter, self), memo)
82        result.is_filter_out = copy.deepcopy(self.is_filter_out, memo)
83        return result
84
85
86def parse_commandline_args(parser):
87    class RegexAction(argparse.Action):
88        """Add a regular expression option value to a list of regular expressions.
89        This compiles the expression, wraps it in a Regex and adds it to the option
90        value list."""
91
92        def __init__(self, option_strings, dest, nargs=None, **kwargs):
93            if nargs is not None:
94                raise ValueError("nargs not allowed")
95            super(RegexAction, self).__init__(option_strings, dest, **kwargs)
96
97        def do_call(self, namespace, values, flags):
98            value_list = getattr(namespace, self.dest)
99            if value_list is None:
100                value_list = []
101
102            try:
103                value_list.append(Regex(re.compile(values, flags)))
104            except re.error as error:
105                raise ValueError(
106                    "{}: Invalid regular expression '{}' ({})".format(
107                        option_string, error.pattern, error.msg
108                    )
109                )
110
111            setattr(namespace, self.dest, value_list)
112
113        def __call__(self, parser, namespace, values, option_string=None):
114            self.do_call(namespace, values, 0)
115
116    class FilterAction(RegexAction):
117        """Add a filter to a list of filter option values."""
118
119        def __init__(self, option_strings, dest, nargs=None, **kwargs):
120            super(FilterAction, self).__init__(option_strings, dest, nargs, **kwargs)
121
122        def __call__(self, parser, namespace, values, option_string=None):
123            super(FilterAction, self).__call__(parser, namespace, values, option_string)
124
125            value_list = getattr(namespace, self.dest)
126
127            is_filter_out = option_string == "--filter-out"
128
129            value_list[-1] = Filter(value_list[-1].regex, is_filter_out)
130
131            setattr(namespace, self.dest, value_list)
132
133    filter_group = parser.add_argument_group(
134        "filtering",
135        """Filters are applied to each output line according to the order given. The
136    first matching filter terminates filter processing for that current line.""",
137    )
138
139    filter_group.add_argument(
140        "--filter",
141        action=FilterAction,
142        dest="filters",
143        metavar="REGEX",
144        help="Only include lines matching REGEX (may be specified multiple times)",
145    )
146    filter_group.add_argument(
147        "--filter-out",
148        action=FilterAction,
149        dest="filters",
150        metavar="REGEX",
151        help="Exclude lines matching REGEX",
152    )
153
154    parser.add_argument(
155        "--include-generated-funcs",
156        action="store_true",
157        help="Output checks for functions not in source",
158    )
159    parser.add_argument(
160        "-v", "--verbose", action="store_true", help="Show verbose output"
161    )
162    parser.add_argument(
163        "-u",
164        "--update-only",
165        action="store_true",
166        help="Only update test if it was already autogened",
167    )
168    parser.add_argument(
169        "--force-update",
170        action="store_true",
171        help="Update test even if it was autogened by a different script",
172    )
173    parser.add_argument(
174        "--enable",
175        action="store_true",
176        dest="enabled",
177        default=True,
178        help="Activate CHECK line generation from this point forward",
179    )
180    parser.add_argument(
181        "--disable",
182        action="store_false",
183        dest="enabled",
184        help="Deactivate CHECK line generation from this point forward",
185    )
186    parser.add_argument(
187        "--replace-value-regex",
188        nargs="+",
189        default=[],
190        help="List of regular expressions to replace matching value names",
191    )
192    parser.add_argument(
193        "--prefix-filecheck-ir-name",
194        default="",
195        help="Add a prefix to FileCheck IR value names to avoid conflicts with scripted names",
196    )
197    parser.add_argument(
198        "--global-value-regex",
199        nargs="+",
200        default=[],
201        help="List of regular expressions that a global value declaration must match to generate a check (has no effect if checking globals is not enabled)",
202    )
203    parser.add_argument(
204        "--global-hex-value-regex",
205        nargs="+",
206        default=[],
207        help="List of regular expressions such that, for matching global value declarations, literal integer values should be encoded in hex in the associated FileCheck directives",
208    )
209    # FIXME: in 3.9, we can use argparse.BooleanOptionalAction. At that point,
210    # we need to rename the flag to just -generate-body-for-unused-prefixes.
211    parser.add_argument(
212        "--no-generate-body-for-unused-prefixes",
213        action="store_false",
214        dest="gen_unused_prefix_body",
215        default=True,
216        help="Generate a function body that always matches for unused prefixes. This is useful when unused prefixes are desired, and it avoids needing to annotate each FileCheck as allowing them.",
217    )
218    # This is the default when regenerating existing tests. The default when
219    # generating new tests is determined by DEFAULT_VERSION.
220    parser.add_argument(
221        "--version", type=int, default=1, help="The version of output format"
222    )
223    args = parser.parse_args()
224    # TODO: This should not be handled differently from the other options
225    global _verbose, _global_value_regex, _global_hex_value_regex
226    _verbose = args.verbose
227    _global_value_regex = args.global_value_regex
228    _global_hex_value_regex = args.global_hex_value_regex
229    return args
230
231
232def parse_args(parser, argv):
233    args = parser.parse_args(argv)
234    if args.version >= 2:
235        args.function_signature = True
236    # TODO: This should not be handled differently from the other options
237    global _verbose, _global_value_regex, _global_hex_value_regex
238    _verbose = args.verbose
239    _global_value_regex = args.global_value_regex
240    _global_hex_value_regex = args.global_hex_value_regex
241    if "check_globals" in args and args.check_globals == "default":
242        args.check_globals = "none" if args.version < 4 else "smart"
243    return args
244
245
246class InputLineInfo(object):
247    def __init__(self, line, line_number, args, argv):
248        self.line = line
249        self.line_number = line_number
250        self.args = args
251        self.argv = argv
252
253
254class TestInfo(object):
255    def __init__(
256        self,
257        test,
258        parser,
259        script_name,
260        input_lines,
261        args,
262        argv,
263        comment_prefix,
264        argparse_callback,
265    ):
266        self.parser = parser
267        self.argparse_callback = argparse_callback
268        self.path = test
269        self.args = args
270        if args.prefix_filecheck_ir_name:
271            global _prefix_filecheck_ir_name
272            _prefix_filecheck_ir_name = args.prefix_filecheck_ir_name
273        self.argv = argv
274        self.input_lines = input_lines
275        self.run_lines = find_run_lines(test, self.input_lines)
276        self.comment_prefix = comment_prefix
277        if self.comment_prefix is None:
278            if self.path.endswith(".mir"):
279                self.comment_prefix = "#"
280            else:
281                self.comment_prefix = ";"
282        self.autogenerated_note_prefix = self.comment_prefix + " " + UTC_ADVERT
283        self.test_autogenerated_note = self.autogenerated_note_prefix + script_name
284        self.test_autogenerated_note += get_autogennote_suffix(parser, self.args)
285        self.test_unused_note = (
286            self.comment_prefix + self.comment_prefix + " " + UNUSED_NOTE
287        )
288
289    def ro_iterlines(self):
290        for line_num, input_line in enumerate(self.input_lines):
291            args, argv = check_for_command(
292                input_line, self.parser, self.args, self.argv, self.argparse_callback
293            )
294            yield InputLineInfo(input_line, line_num, args, argv)
295
296    def iterlines(self, output_lines):
297        output_lines.append(self.test_autogenerated_note)
298        for line_info in self.ro_iterlines():
299            input_line = line_info.line
300            # Discard any previous script advertising.
301            if input_line.startswith(self.autogenerated_note_prefix):
302                continue
303            self.args = line_info.args
304            self.argv = line_info.argv
305            if not self.args.enabled:
306                output_lines.append(input_line)
307                continue
308            yield line_info
309
310    def get_checks_for_unused_prefixes(
311        self, run_list, used_prefixes: List[str]
312    ) -> List[str]:
313        run_list = [element for element in run_list if element[0] is not None]
314        unused_prefixes = set(
315            [prefix for sublist in run_list for prefix in sublist[0]]
316        ).difference(set(used_prefixes))
317
318        ret = []
319        if not unused_prefixes:
320            return ret
321        ret.append(self.test_unused_note)
322        for unused in sorted(unused_prefixes):
323            ret.append(
324                "{comment} {prefix}: {match_everything}".format(
325                    comment=self.comment_prefix,
326                    prefix=unused,
327                    match_everything=r"""{{.*}}""",
328                )
329            )
330        return ret
331
332
333def itertests(
334    test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None
335):
336    for pattern in test_patterns:
337        # On Windows we must expand the patterns ourselves.
338        tests_list = glob.glob(pattern)
339        if not tests_list:
340            warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,))
341            continue
342        for test in tests_list:
343            with open(test) as f:
344                input_lines = [l.rstrip() for l in f]
345            first_line = input_lines[0] if input_lines else ""
346            if UTC_AVOID in first_line:
347                warn("Skipping test that must not be autogenerated: " + test)
348                continue
349            is_regenerate = UTC_ADVERT in first_line
350
351            # If we're generating a new test, set the default version to the latest.
352            argv = sys.argv[:]
353            if not is_regenerate:
354                argv.insert(1, "--version=" + str(DEFAULT_VERSION))
355
356            args = parse_args(parser, argv[1:])
357            if argparse_callback is not None:
358                argparse_callback(args)
359            if is_regenerate:
360                if script_name not in first_line and not args.force_update:
361                    warn(
362                        "Skipping test which wasn't autogenerated by " + script_name,
363                        test,
364                    )
365                    continue
366                args, argv = check_for_command(
367                    first_line, parser, args, argv, argparse_callback
368                )
369            elif args.update_only:
370                assert UTC_ADVERT not in first_line
371                warn("Skipping test which isn't autogenerated: " + test)
372                continue
373            final_input_lines = []
374            for l in input_lines:
375                if UNUSED_NOTE in l:
376                    break
377                final_input_lines.append(l)
378            yield TestInfo(
379                test,
380                parser,
381                script_name,
382                final_input_lines,
383                args,
384                argv,
385                comment_prefix,
386                argparse_callback,
387            )
388
389
390def should_add_line_to_output(
391    input_line,
392    prefix_set,
393    *,
394    skip_global_checks=False,
395    skip_same_checks=False,
396    comment_marker=";",
397):
398    # Skip any blank comment lines in the IR.
399    if not skip_global_checks and input_line.strip() == comment_marker:
400        return False
401    # Skip a special double comment line we use as a separator.
402    if input_line.strip() == comment_marker + SEPARATOR:
403        return False
404    # Skip any blank lines in the IR.
405    # if input_line.strip() == '':
406    #  return False
407    # And skip any CHECK lines. We're building our own.
408    m = CHECK_RE.match(input_line)
409    if m and m.group(1) in prefix_set:
410        if skip_same_checks and CHECK_SAME_RE.match(input_line):
411            # The previous CHECK line was removed, so don't leave this dangling
412            return False
413        if skip_global_checks:
414            # Skip checks only if they are of global value definitions
415            global_ir_value_re = re.compile(r"(\[\[|@)", flags=(re.M))
416            is_global = global_ir_value_re.search(input_line)
417            return not is_global
418        return False
419
420    return True
421
422
423def collect_original_check_lines(ti: TestInfo, prefix_set: set):
424    """
425    Collect pre-existing check lines into a dictionary `result` which is
426    returned.
427
428    result[func_name][prefix] is filled with a list of right-hand-sides of check
429    lines.
430    """
431    result = collections.defaultdict(lambda: {})
432
433    current_prefix = None
434    current_function = None
435    for input_line_info in ti.ro_iterlines():
436        input_line = input_line_info.line
437        if input_line.lstrip().startswith(";"):
438            m = CHECK_RE.match(input_line)
439            if m is not None:
440                prefix = m.group(1)
441                check_kind = m.group(2)
442                line = input_line[m.end() :].strip()
443
444                if prefix != current_prefix:
445                    current_function = None
446                    current_prefix = None
447
448                if check_kind not in ["LABEL", "SAME"]:
449                    if current_function is not None:
450                        current_function.append(line)
451                    continue
452
453                if check_kind == "SAME":
454                    continue
455
456                if check_kind == "LABEL":
457                    m = IR_FUNCTION_RE.match(line)
458                    if m is not None:
459                        func_name = m.group(1)
460                        if (
461                            ti.args.function is not None
462                            and func_name != ti.args.function
463                        ):
464                            # When filtering on a specific function, skip all others.
465                            continue
466
467                        current_prefix = prefix
468                        current_function = result[func_name][prefix] = []
469                        continue
470
471        current_function = None
472
473    return result
474
475
476# Perform lit-like substitutions
477def getSubstitutions(sourcepath):
478    sourcedir = os.path.dirname(sourcepath)
479    return [
480        ("%s", sourcepath),
481        ("%S", sourcedir),
482        ("%p", sourcedir),
483        ("%{pathsep}", os.pathsep),
484    ]
485
486
487def applySubstitutions(s, substitutions):
488    for a, b in substitutions:
489        s = s.replace(a, b)
490    return s
491
492
493# Invoke the tool that is being tested.
494def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False):
495    with open(ir) as ir_file:
496        substitutions = getSubstitutions(ir)
497
498        # TODO Remove the str form which is used by update_test_checks.py and
499        # update_llc_test_checks.py
500        # The safer list form is used by update_cc_test_checks.py
501        if preprocess_cmd:
502            # Allow pre-processing the IR file (e.g. using sed):
503            assert isinstance(
504                preprocess_cmd, str
505            )  # TODO: use a list instead of using shell
506            preprocess_cmd = applySubstitutions(preprocess_cmd, substitutions).strip()
507            if verbose:
508                print(
509                    "Pre-processing input file: ",
510                    ir,
511                    " with command '",
512                    preprocess_cmd,
513                    "'",
514                    sep="",
515                    file=sys.stderr,
516                )
517            pp = subprocess.Popen(
518                preprocess_cmd,
519                shell=True,
520                stdin=subprocess.DEVNULL,
521                stdout=subprocess.PIPE,
522            )
523            ir_file = pp.stdout
524
525        if isinstance(cmd_args, list):
526            args = [applySubstitutions(a, substitutions) for a in cmd_args]
527            stdout = subprocess.check_output([exe] + args, stdin=ir_file)
528        else:
529            stdout = subprocess.check_output(
530                exe + " " + applySubstitutions(cmd_args, substitutions),
531                shell=True,
532                stdin=ir_file,
533            )
534        if sys.version_info[0] > 2:
535            # FYI, if you crashed here with a decode error, your run line probably
536            # results in bitcode or other binary format being written to the pipe.
537            # For an opt test, you probably want to add -S or -disable-output.
538            stdout = stdout.decode()
539    # Fix line endings to unix CR style.
540    return stdout.replace("\r\n", "\n")
541
542
543##### LLVM IR parser
544RUN_LINE_RE = re.compile(r"^\s*(?://|[;#])\s*RUN:\s*(.*)$")
545CHECK_PREFIX_RE = re.compile(r"--?check-prefix(?:es)?[= ](\S+)")
546PREFIX_RE = re.compile("^[a-zA-Z0-9_-]+$")
547CHECK_RE = re.compile(
548    r"^\s*(?://|[;#])\s*([^:]+?)(?:-(NEXT|NOT|DAG|LABEL|SAME|EMPTY))?:"
549)
550CHECK_SAME_RE = re.compile(r"^\s*(?://|[;#])\s*([^:]+?)(?:-SAME)?:")
551
552UTC_ARGS_KEY = "UTC_ARGS:"
553UTC_ARGS_CMD = re.compile(r".*" + UTC_ARGS_KEY + r"\s*(?P<cmd>.*)\s*$")
554UTC_ADVERT = "NOTE: Assertions have been autogenerated by "
555UTC_AVOID = "NOTE: Do not autogenerate"
556UNUSED_NOTE = "NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:"
557
558OPT_FUNCTION_RE = re.compile(
559    r"^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s():,]+?))?\s*define\s+(?P<funcdef_attrs_and_ret>[^@]*)@(?P<func>[\w.$-]+?)\s*"
560    r"(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P<body>.*?)^\}$",
561    flags=(re.M | re.S),
562)
563
564ANALYZE_FUNCTION_RE = re.compile(
565    r"^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.$-]+?)\':"
566    r"\s*\n(?P<body>.*)$",
567    flags=(re.X | re.S),
568)
569
570LOOP_PASS_DEBUG_RE = re.compile(
571    r"^\s*\'(?P<func>[\w.$-]+?)\'[^\n]*" r"\s*\n(?P<body>.*)$", flags=(re.X | re.S)
572)
573
574IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(')
575TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
576TRIPLE_ARG_RE = re.compile(r"-m?triple[= ]([^ ]+)")
577MARCH_ARG_RE = re.compile(r"-march[= ]([^ ]+)")
578DEBUG_ONLY_ARG_RE = re.compile(r"-debug-only[= ]([^ ]+)")
579
580SCRUB_LEADING_WHITESPACE_RE = re.compile(r"^(\s+)")
581SCRUB_WHITESPACE_RE = re.compile(r"(?!^(|  \w))[ \t]+", flags=re.M)
582SCRUB_PRESERVE_LEADING_WHITESPACE_RE = re.compile(r"((?!^)[ \t]*(\S))[ \t]+")
583SCRUB_TRAILING_WHITESPACE_RE = re.compile(r"[ \t]+$", flags=re.M)
584SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE
585SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(
586    r"([ \t]|(#[0-9]+))+$", flags=re.M
587)
588SCRUB_KILL_COMMENT_RE = re.compile(r"^ *#+ +kill:.*\n")
589SCRUB_LOOP_COMMENT_RE = re.compile(
590    r"# =>This Inner Loop Header:.*|# in Loop:.*", flags=re.M
591)
592SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r"(?<=\S)+[ \t]*#$", flags=re.M)
593
594SEPARATOR = "."
595
596
597def error(msg, test_file=None):
598    if test_file:
599        msg = "{}: {}".format(msg, test_file)
600    print("ERROR: {}".format(msg), file=sys.stderr)
601
602
603def warn(msg, test_file=None):
604    if test_file:
605        msg = "{}: {}".format(msg, test_file)
606    print("WARNING: {}".format(msg), file=sys.stderr)
607
608
609def debug(*args, **kwargs):
610    # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs):
611    if "file" not in kwargs:
612        kwargs["file"] = sys.stderr
613    if _verbose:
614        print(*args, **kwargs)
615
616
617def find_run_lines(test, lines):
618    debug("Scanning for RUN lines in test file:", test)
619    raw_lines = [m.group(1) for m in [RUN_LINE_RE.match(l) for l in lines] if m]
620    run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
621    for l in raw_lines[1:]:
622        if run_lines[-1].endswith("\\"):
623            run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
624        else:
625            run_lines.append(l)
626    debug("Found {} RUN lines in {}:".format(len(run_lines), test))
627    for l in run_lines:
628        debug("  RUN: {}".format(l))
629    return run_lines
630
631
632def get_triple_from_march(march):
633    triples = {
634        "amdgcn": "amdgcn",
635        "r600": "r600",
636        "mips": "mips",
637        "nvptx64": "nvptx64",
638        "sparc": "sparc",
639        "hexagon": "hexagon",
640        "ve": "ve",
641    }
642    for prefix, triple in triples.items():
643        if march.startswith(prefix):
644            return triple
645    print("Cannot find a triple. Assume 'x86'", file=sys.stderr)
646    return "x86"
647
648
649def apply_filters(line, filters):
650    has_filter = False
651    for f in filters:
652        if not f.is_filter_out:
653            has_filter = True
654        if f.search(line):
655            return False if f.is_filter_out else True
656    # If we only used filter-out, keep the line, otherwise discard it since no
657    # filter matched.
658    return False if has_filter else True
659
660
661def do_filter(body, filters):
662    return (
663        body
664        if not filters
665        else "\n".join(
666            filter(lambda line: apply_filters(line, filters), body.splitlines())
667        )
668    )
669
670
671def scrub_body(body):
672    # Scrub runs of whitespace out of the assembly, but leave the leading
673    # whitespace in place.
674    body = SCRUB_PRESERVE_LEADING_WHITESPACE_RE.sub(lambda m: m.group(2) + " ", body)
675
676    # Expand the tabs used for indentation.
677    body = str.expandtabs(body, 2)
678    # Strip trailing whitespace.
679    body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r"", body)
680    return body
681
682
683def do_scrub(body, scrubber, scrubber_args, extra):
684    if scrubber_args:
685        local_args = copy.deepcopy(scrubber_args)
686        local_args[0].extra_scrub = extra
687        return scrubber(body, *local_args)
688    return scrubber(body, *scrubber_args)
689
690
691# Build up a dictionary of all the function bodies.
692class function_body(object):
693    def __init__(
694        self,
695        string,
696        extra,
697        funcdef_attrs_and_ret,
698        args_and_sig,
699        attrs,
700        func_name_separator,
701        ginfo,
702    ):
703        self.scrub = string
704        self.extrascrub = extra
705        self.funcdef_attrs_and_ret = funcdef_attrs_and_ret
706        self.args_and_sig = args_and_sig
707        self.attrs = attrs
708        self.func_name_separator = func_name_separator
709        self._ginfo = ginfo
710
711    def is_same_except_arg_names(
712        self, extrascrub, funcdef_attrs_and_ret, args_and_sig, attrs
713    ):
714        arg_names = set()
715
716        def drop_arg_names(match):
717            nameless_value = self._ginfo.get_nameless_value_from_match(match)
718            if nameless_value.check_key == "%":
719                arg_names.add(self._ginfo.get_name_from_match(match))
720                substitute = ""
721            else:
722                substitute = match.group(2)
723            return match.group(1) + substitute + match.group(match.lastindex)
724
725        def repl_arg_names(match):
726            nameless_value = self._ginfo.get_nameless_value_from_match(match)
727            if (
728                nameless_value.check_key == "%"
729                and self._ginfo.get_name_from_match(match) in arg_names
730            ):
731                return match.group(1) + match.group(match.lastindex)
732            return match.group(1) + match.group(2) + match.group(match.lastindex)
733
734        if self.funcdef_attrs_and_ret != funcdef_attrs_and_ret:
735            return False
736        if self.attrs != attrs:
737            return False
738
739        regexp = self._ginfo.get_regexp()
740        ans0 = regexp.sub(drop_arg_names, self.args_and_sig)
741        ans1 = regexp.sub(drop_arg_names, args_and_sig)
742        if ans0 != ans1:
743            return False
744        if self._ginfo.is_asm():
745            # Check without replacements, the replacements are not applied to the
746            # body for backend checks.
747            return self.extrascrub == extrascrub
748
749        es0 = regexp.sub(repl_arg_names, self.extrascrub)
750        es1 = regexp.sub(repl_arg_names, extrascrub)
751        es0 = SCRUB_IR_COMMENT_RE.sub(r"", es0)
752        es1 = SCRUB_IR_COMMENT_RE.sub(r"", es1)
753        return es0 == es1
754
755    def __str__(self):
756        return self.scrub
757
758
759class FunctionTestBuilder:
760    def __init__(self, run_list, flags, scrubber_args, path, ginfo):
761        self._verbose = flags.verbose
762        self._record_args = flags.function_signature
763        self._check_attributes = flags.check_attributes
764        # Strip double-quotes if input was read by UTC_ARGS
765        self._filters = (
766            list(
767                map(
768                    lambda f: Filter(
769                        re.compile(f.pattern().strip('"'), f.flags()), f.is_filter_out
770                    ),
771                    flags.filters,
772                )
773            )
774            if flags.filters
775            else []
776        )
777        self._scrubber_args = scrubber_args
778        self._path = path
779        self._ginfo = ginfo
780        # Strip double-quotes if input was read by UTC_ARGS
781        self._replace_value_regex = list(
782            map(lambda x: x.strip('"'), flags.replace_value_regex)
783        )
784        self._func_dict = {}
785        self._func_order = {}
786        self._global_var_dict = {}
787        self._processed_prefixes = set()
788        for tuple in run_list:
789            for prefix in tuple[0]:
790                self._func_dict.update({prefix: dict()})
791                self._func_order.update({prefix: []})
792                self._global_var_dict.update({prefix: dict()})
793
794    def finish_and_get_func_dict(self):
795        for prefix in self.get_failed_prefixes():
796            warn(
797                "Prefix %s had conflicting output from different RUN lines for all functions in test %s"
798                % (
799                    prefix,
800                    self._path,
801                )
802            )
803        return self._func_dict
804
805    def func_order(self):
806        return self._func_order
807
808    def global_var_dict(self):
809        return self._global_var_dict
810
811    def is_filtered(self):
812        return bool(self._filters)
813
814    def process_run_line(self, function_re, scrubber, raw_tool_output, prefixes):
815        build_global_values_dictionary(
816            self._global_var_dict, raw_tool_output, prefixes, self._ginfo
817        )
818        for m in function_re.finditer(raw_tool_output):
819            if not m:
820                continue
821            func = m.group("func")
822            body = m.group("body")
823            # func_name_separator is the string that is placed right after function name at the
824            # beginning of assembly function definition. In most assemblies, that is just a
825            # colon: `foo:`. But, for example, in nvptx it is a brace: `foo(`. If is_backend is
826            # False, just assume that separator is an empty string.
827            if self._ginfo.is_asm():
828                # Use ':' as default separator.
829                func_name_separator = (
830                    m.group("func_name_separator")
831                    if "func_name_separator" in m.groupdict()
832                    else ":"
833                )
834            else:
835                func_name_separator = ""
836            attrs = m.group("attrs") if self._check_attributes else ""
837            funcdef_attrs_and_ret = (
838                m.group("funcdef_attrs_and_ret") if self._record_args else ""
839            )
840            # Determine if we print arguments, the opening brace, or nothing after the
841            # function name
842            if self._record_args and "args_and_sig" in m.groupdict():
843                args_and_sig = scrub_body(m.group("args_and_sig").strip())
844            elif "args_and_sig" in m.groupdict():
845                args_and_sig = "("
846            else:
847                args_and_sig = ""
848            filtered_body = do_filter(body, self._filters)
849            scrubbed_body = do_scrub(
850                filtered_body, scrubber, self._scrubber_args, extra=False
851            )
852            scrubbed_extra = do_scrub(
853                filtered_body, scrubber, self._scrubber_args, extra=True
854            )
855            if "analysis" in m.groupdict():
856                analysis = m.group("analysis")
857                if analysis not in SUPPORTED_ANALYSES:
858                    warn("Unsupported analysis mode: %r!" % (analysis,))
859            if func.startswith("stress"):
860                # We only use the last line of the function body for stress tests.
861                scrubbed_body = "\n".join(scrubbed_body.splitlines()[-1:])
862            if self._verbose:
863                print("Processing function: " + func, file=sys.stderr)
864                for l in scrubbed_body.splitlines():
865                    print("  " + l, file=sys.stderr)
866            for prefix in prefixes:
867                # Replace function names matching the regex.
868                for regex in self._replace_value_regex:
869                    # Pattern that matches capture groups in the regex in leftmost order.
870                    group_regex = re.compile(r"\(.*?\)")
871                    # Replace function name with regex.
872                    match = re.match(regex, func)
873                    if match:
874                        func_repl = regex
875                        # Replace any capture groups with their matched strings.
876                        for g in match.groups():
877                            func_repl = group_regex.sub(
878                                re.escape(g), func_repl, count=1
879                            )
880                        func = re.sub(func_repl, "{{" + func_repl + "}}", func)
881
882                    # Replace all calls to regex matching functions.
883                    matches = re.finditer(regex, scrubbed_body)
884                    for match in matches:
885                        func_repl = regex
886                        # Replace any capture groups with their matched strings.
887                        for g in match.groups():
888                            func_repl = group_regex.sub(
889                                re.escape(g), func_repl, count=1
890                            )
891                        # Substitute function call names that match the regex with the same
892                        # capture groups set.
893                        scrubbed_body = re.sub(
894                            func_repl, "{{" + func_repl + "}}", scrubbed_body
895                        )
896
897                if func in self._func_dict[prefix]:
898                    if self._func_dict[prefix][func] is not None and (
899                        str(self._func_dict[prefix][func]) != scrubbed_body
900                        or self._func_dict[prefix][func].args_and_sig != args_and_sig
901                        or self._func_dict[prefix][func].attrs != attrs
902                        or self._func_dict[prefix][func].funcdef_attrs_and_ret
903                        != funcdef_attrs_and_ret
904                    ):
905                        if self._func_dict[prefix][func].is_same_except_arg_names(
906                            scrubbed_extra,
907                            funcdef_attrs_and_ret,
908                            args_and_sig,
909                            attrs,
910                        ):
911                            self._func_dict[prefix][func].scrub = scrubbed_extra
912                            self._func_dict[prefix][func].args_and_sig = args_and_sig
913                        else:
914                            # This means a previous RUN line produced a body for this function
915                            # that is different from the one produced by this current RUN line,
916                            # so the body can't be common across RUN lines. We use None to
917                            # indicate that.
918                            self._func_dict[prefix][func] = None
919                else:
920                    if prefix not in self._processed_prefixes:
921                        self._func_dict[prefix][func] = function_body(
922                            scrubbed_body,
923                            scrubbed_extra,
924                            funcdef_attrs_and_ret,
925                            args_and_sig,
926                            attrs,
927                            func_name_separator,
928                            self._ginfo,
929                        )
930                        self._func_order[prefix].append(func)
931                    else:
932                        # An earlier RUN line used this check prefixes but didn't produce
933                        # a body for this function. This happens in Clang tests that use
934                        # preprocesser directives to exclude individual functions from some
935                        # RUN lines.
936                        self._func_dict[prefix][func] = None
937
938    def processed_prefixes(self, prefixes):
939        """
940        Mark a set of prefixes as having had at least one applicable RUN line fully
941        processed. This is used to filter out function bodies that don't have
942        outputs for all RUN lines.
943        """
944        self._processed_prefixes.update(prefixes)
945
946    def get_failed_prefixes(self):
947        # This returns the list of those prefixes that failed to match any function,
948        # because there were conflicting bodies produced by different RUN lines, in
949        # all instances of the prefix.
950        for prefix in self._func_dict:
951            if self._func_dict[prefix] and (
952                not [
953                    fct
954                    for fct in self._func_dict[prefix]
955                    if self._func_dict[prefix][fct] is not None
956                ]
957            ):
958                yield prefix
959
960
961##### Generator of LLVM IR CHECK lines
962
963SCRUB_IR_COMMENT_RE = re.compile(r"\s*;.*")
964
965# TODO: We should also derive check lines for global, debug, loop declarations, etc..
966
967
968class NamelessValue:
969    """
970    A NamelessValue object represents a type of value in the IR whose "name" we
971    generalize in the generated check lines; where the "name" could be an actual
972    name (as in e.g. `@some_global` or `%x`) or just a number (as in e.g. `%12`
973    or `!4`).
974    """
975
976    def __init__(
977        self,
978        check_prefix,
979        check_key,
980        ir_prefix,
981        ir_regexp,
982        global_ir_rhs_regexp,
983        *,
984        is_before_functions=False,
985        is_number=False,
986        replace_number_with_counter=False,
987        match_literally=False,
988        interlaced_with_previous=False,
989        ir_suffix=r"",
990    ):
991        self.check_prefix = check_prefix
992        self.check_key = check_key
993        self.ir_prefix = ir_prefix
994        self.ir_regexp = ir_regexp
995        self.ir_suffix = ir_suffix
996        self.global_ir_rhs_regexp = global_ir_rhs_regexp
997        self.is_before_functions = is_before_functions
998        self.is_number = is_number
999        # Some variable numbers (e.g. MCINST1234) will change based on unrelated
1000        # modifications to LLVM, replace those with an incrementing counter.
1001        self.replace_number_with_counter = replace_number_with_counter
1002        self.match_literally = match_literally
1003        self.interlaced_with_previous = interlaced_with_previous
1004        self.variable_mapping = {}
1005
1006    # Return true if this kind of IR value is defined "locally" to functions,
1007    # which we assume is only the case precisely for LLVM IR local values.
1008    def is_local_def_ir_value(self):
1009        return self.check_key == "%"
1010
1011    # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals
1012    def get_ir_regex(self):
1013        # for backwards compatibility we check locals with '.*'
1014        if self.is_local_def_ir_value():
1015            return ".*"
1016        return self.ir_regexp
1017
1018    # Create a FileCheck variable name based on an IR name.
1019    def get_value_name(self, var: str, check_prefix: str):
1020        var = var.replace("!", "")
1021        if self.replace_number_with_counter:
1022            assert var
1023            replacement = self.variable_mapping.get(var, None)
1024            if replacement is None:
1025                # Replace variable with an incrementing counter
1026                replacement = str(len(self.variable_mapping) + 1)
1027                self.variable_mapping[var] = replacement
1028            var = replacement
1029        # This is a nameless value, prepend check_prefix.
1030        if var.isdigit():
1031            var = check_prefix + var
1032        else:
1033            # This is a named value that clashes with the check_prefix, prepend with
1034            # _prefix_filecheck_ir_name, if it has been defined.
1035            if (
1036                may_clash_with_default_check_prefix_name(check_prefix, var)
1037                and _prefix_filecheck_ir_name
1038            ):
1039                var = _prefix_filecheck_ir_name + var
1040        var = var.replace(".", "_")
1041        var = var.replace("-", "_")
1042        return var.upper()
1043
1044    def get_affixes_from_match(self, match):
1045        prefix = re.match(self.ir_prefix, match.group(2)).group(0)
1046        suffix = re.search(self.ir_suffix + "$", match.group(2)).group(0)
1047        return prefix, suffix
1048
1049
1050class GeneralizerInfo:
1051    """
1052    A GeneralizerInfo object holds information about how check lines should be generalized
1053    (e.g., variable names replaced by FileCheck meta variables) as well as per-test-file
1054    state (e.g. information about IR global variables).
1055    """
1056
1057    MODE_IR = 0
1058    MODE_ASM = 1
1059    MODE_ANALYZE = 2
1060
1061    def __init__(
1062        self,
1063        version,
1064        mode,
1065        nameless_values: List[NamelessValue],
1066        regexp_prefix,
1067        regexp_suffix,
1068    ):
1069        self._version = version
1070        self._mode = mode
1071        self._nameless_values = nameless_values
1072
1073        self._regexp_prefix = regexp_prefix
1074        self._regexp_suffix = regexp_suffix
1075
1076        self._regexp, _ = self._build_regexp(False, False)
1077        (
1078            self._unstable_globals_regexp,
1079            self._unstable_globals_values,
1080        ) = self._build_regexp(True, True)
1081
1082    def _build_regexp(self, globals_only, unstable_only):
1083        matches = []
1084        values = []
1085        for nameless_value in self._nameless_values:
1086            is_global = nameless_value.global_ir_rhs_regexp is not None
1087            if globals_only and not is_global:
1088                continue
1089            if unstable_only and nameless_value.match_literally:
1090                continue
1091
1092            match = f"(?:{nameless_value.ir_prefix}({nameless_value.ir_regexp}){nameless_value.ir_suffix})"
1093            if self.is_ir() and not globals_only and is_global:
1094                match = "^" + match
1095            matches.append(match)
1096            values.append(nameless_value)
1097
1098        regexp_string = r"|".join(matches)
1099
1100        return (
1101            re.compile(
1102                self._regexp_prefix + r"(" + regexp_string + r")" + self._regexp_suffix
1103            ),
1104            values,
1105        )
1106
1107    def get_version(self):
1108        return self._version
1109
1110    def is_ir(self):
1111        return self._mode == GeneralizerInfo.MODE_IR
1112
1113    def is_asm(self):
1114        return self._mode == GeneralizerInfo.MODE_ASM
1115
1116    def is_analyze(self):
1117        return self._mode == GeneralizerInfo.MODE_ANALYZE
1118
1119    def get_nameless_values(self):
1120        return self._nameless_values
1121
1122    def get_regexp(self):
1123        return self._regexp
1124
1125    def get_unstable_globals_regexp(self):
1126        return self._unstable_globals_regexp
1127
1128    # The entire match is group 0, the prefix has one group (=1), the entire
1129    # IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start.
1130    FIRST_NAMELESS_GROUP_IN_MATCH = 3
1131
1132    def get_match_info(self, match):
1133        """
1134        Returns (name, nameless_value) for the given match object
1135        """
1136        if match.re == self._regexp:
1137            values = self._nameless_values
1138        else:
1139            match.re == self._unstable_globals_regexp
1140            values = self._unstable_globals_values
1141        for i in range(len(values)):
1142            g = match.group(i + GeneralizerInfo.FIRST_NAMELESS_GROUP_IN_MATCH)
1143            if g is not None:
1144                return g, values[i]
1145        error("Unable to identify the kind of IR value from the match!")
1146        return None, None
1147
1148    # See get_idx_from_match
1149    def get_name_from_match(self, match):
1150        return self.get_match_info(match)[0]
1151
1152    def get_nameless_value_from_match(self, match) -> NamelessValue:
1153        return self.get_match_info(match)[1]
1154
1155
1156def make_ir_generalizer(version):
1157    values = []
1158
1159    if version >= 5:
1160        values += [
1161            NamelessValue(r"BB", "%", r"label %", r"[\w$.-]+?", None),
1162            NamelessValue(r"BB", "%", r"^", r"[\w$.-]+?", None, ir_suffix=r":"),
1163        ]
1164
1165    values += [
1166        #            check_prefix   check_key  ir_prefix           ir_regexp                global_ir_rhs_regexp
1167        NamelessValue(r"TMP", "%", r"%", r"[\w$.-]+?", None),
1168        NamelessValue(r"ATTR", "#", r"#", r"[0-9]+", None),
1169        NamelessValue(r"ATTR", "#", r"attributes #", r"[0-9]+", r"{[^}]*}"),
1170        NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", None),
1171        NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", r".+", is_before_functions=True),
1172        NamelessValue(
1173            r"GLOBNAMED",
1174            "@",
1175            r"@",
1176            r"[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*",
1177            r".+",
1178            is_before_functions=True,
1179            match_literally=True,
1180            interlaced_with_previous=True,
1181        ),
1182        NamelessValue(r"DBG", "!", r"!dbg ", r"![0-9]+", None),
1183        NamelessValue(r"DIASSIGNID", "!", r"!DIAssignID ", r"![0-9]+", None),
1184        NamelessValue(r"PROF", "!", r"!prof ", r"![0-9]+", None),
1185        NamelessValue(r"TBAA", "!", r"!tbaa ", r"![0-9]+", None),
1186        NamelessValue(r"TBAA_STRUCT", "!", r"!tbaa.struct ", r"![0-9]+", None),
1187        NamelessValue(r"RNG", "!", r"!range ", r"![0-9]+", None),
1188        NamelessValue(r"LOOP", "!", r"!llvm.loop ", r"![0-9]+", None),
1189        NamelessValue(r"META", "!", r"", r"![0-9]+", r"(?:distinct |)!.*"),
1190        NamelessValue(r"ACC_GRP", "!", r"!llvm.access.group ", r"![0-9]+", None),
1191        NamelessValue(r"META", "!", r"![a-z.]+ ", r"![0-9]+", None),
1192        NamelessValue(r"META", "!", r"[, (]", r"![0-9]+", None),
1193    ]
1194
1195    prefix = r"(\s*)"
1196    suffix = r"([,\s\(\)\}]|\Z)"
1197
1198    # values = [
1199    #     nameless_value
1200    #     for nameless_value in IR_NAMELESS_VALUES
1201    #     if not (globals_only and nameless_value.global_ir_rhs_regexp is None) and
1202    #        not (unstable_ids_only and nameless_value.match_literally)
1203    # ]
1204
1205    return GeneralizerInfo(version, GeneralizerInfo.MODE_IR, values, prefix, suffix)
1206
1207
1208def make_asm_generalizer(version):
1209    values = [
1210        NamelessValue(
1211            r"MCINST",
1212            "Inst#",
1213            "<MCInst #",
1214            r"\d+",
1215            r".+",
1216            is_number=True,
1217            replace_number_with_counter=True,
1218        ),
1219        NamelessValue(
1220            r"MCREG",
1221            "Reg:",
1222            "<MCOperand Reg:",
1223            r"\d+",
1224            r".+",
1225            is_number=True,
1226            replace_number_with_counter=True,
1227        ),
1228    ]
1229
1230    prefix = r"((?:#|//)\s*)"
1231    suffix = r"([>\s]|\Z)"
1232
1233    return GeneralizerInfo(version, GeneralizerInfo.MODE_ASM, values, prefix, suffix)
1234
1235
1236def make_analyze_generalizer(version):
1237    values = [
1238        NamelessValue(
1239            r"GRP",
1240            "#",
1241            r"",
1242            r"0x[0-9a-f]+",
1243            None,
1244            replace_number_with_counter=True,
1245        ),
1246    ]
1247
1248    prefix = r"(\s*)"
1249    suffix = r"(\)?:)"
1250
1251    return GeneralizerInfo(
1252        version, GeneralizerInfo.MODE_ANALYZE, values, prefix, suffix
1253    )
1254
1255
1256# Return true if var clashes with the scripted FileCheck check_prefix.
1257def may_clash_with_default_check_prefix_name(check_prefix, var):
1258    return check_prefix and re.match(
1259        r"^" + check_prefix + r"[0-9]+?$", var, re.IGNORECASE
1260    )
1261
1262
1263def find_diff_matching(lhs: List[str], rhs: List[str]) -> List[tuple]:
1264    """
1265    Find a large ordered matching between strings in lhs and rhs.
1266
1267    Think of this as finding the *unchanged* lines in a diff, where the entries
1268    of lhs and rhs are lines of the files being diffed.
1269
1270    Returns a list of matched (lhs_idx, rhs_idx) pairs.
1271    """
1272
1273    if not lhs or not rhs:
1274        return []
1275
1276    # Collect matches in reverse order.
1277    matches = []
1278
1279    # First, collect a set of candidate matching edges. We limit this to a
1280    # constant multiple of the input size to avoid quadratic runtime.
1281    patterns = collections.defaultdict(lambda: ([], []))
1282
1283    for idx in range(len(lhs)):
1284        patterns[lhs[idx]][0].append(idx)
1285    for idx in range(len(rhs)):
1286        patterns[rhs[idx]][1].append(idx)
1287
1288    multiple_patterns = []
1289
1290    candidates = []
1291    for pattern in patterns.values():
1292        if not pattern[0] or not pattern[1]:
1293            continue
1294
1295        if len(pattern[0]) == len(pattern[1]) == 1:
1296            candidates.append((pattern[0][0], pattern[1][0]))
1297        else:
1298            multiple_patterns.append(pattern)
1299
1300    multiple_patterns.sort(key=lambda pattern: len(pattern[0]) * len(pattern[1]))
1301
1302    for pattern in multiple_patterns:
1303        if len(candidates) + len(pattern[0]) * len(pattern[1]) > 2 * (
1304            len(lhs) + len(rhs)
1305        ):
1306            break
1307        for lhs_idx in pattern[0]:
1308            for rhs_idx in pattern[1]:
1309                candidates.append((lhs_idx, rhs_idx))
1310
1311    if not candidates:
1312        # The LHS and RHS either share nothing in common, or lines are just too
1313        # identical. In that case, let's give up and not match anything.
1314        return []
1315
1316    # Compute a maximal crossing-free matching via an algorithm that is
1317    # inspired by a mixture of dynamic programming and line-sweeping in
1318    # discrete geometry.
1319    #
1320    # I would be surprised if this algorithm didn't exist somewhere in the
1321    # literature, but I found it without consciously recalling any
1322    # references, so you'll have to make do with the explanation below.
1323    # Sorry.
1324    #
1325    # The underlying graph is bipartite:
1326    #  - nodes on the LHS represent lines in the original check
1327    #  - nodes on the RHS represent lines in the new (updated) check
1328    #
1329    # Nodes are implicitly sorted by the corresponding line number.
1330    # Edges (unique_matches) are sorted by the line number on the LHS.
1331    #
1332    # Here's the geometric intuition for the algorithm.
1333    #
1334    #  * Plot the edges as points in the plane, with the original line
1335    #    number on the X axis and the updated line number on the Y axis.
1336    #  * The goal is to find a longest "chain" of points where each point
1337    #    is strictly above and to the right of the previous point.
1338    #  * The algorithm proceeds by sweeping a vertical line from left to
1339    #    right.
1340    #  * The algorithm maintains a table where `table[N]` answers the
1341    #    question "What is currently the 'best' way to build a chain of N+1
1342    #    points to the left of the vertical line". Here, 'best' means
1343    #    that the last point of the chain is a as low as possible (minimal
1344    #    Y coordinate).
1345    #   * `table[N]` is `(y, point_idx)` where `point_idx` is the index of
1346    #     the last point in the chain and `y` is its Y coordinate
1347    #   * A key invariant is that the Y values in the table are
1348    #     monotonically increasing
1349    #  * Thanks to these properties, the table can be used to answer the
1350    #    question "What is the longest chain that can be built to the left
1351    #    of the vertical line using only points below a certain Y value",
1352    #    using a binary search over the table.
1353    #  * The algorithm also builds a backlink structure in which every point
1354    #    links back to the previous point on a best (longest) chain ending
1355    #    at that point
1356    #
1357    # The core loop of the algorithm sweeps the line and updates the table
1358    # and backlink structure for every point that we cross during the sweep.
1359    # Therefore, the algorithm is trivially O(M log M) in the number of
1360    # points.
1361    candidates.sort(key=lambda candidate: (candidate[0], -candidate[1]))
1362
1363    backlinks = []
1364    table_rhs_idx = []
1365    table_candidate_idx = []
1366    for _, rhs_idx in candidates:
1367        candidate_idx = len(backlinks)
1368        ti = bisect.bisect_left(table_rhs_idx, rhs_idx)
1369
1370        # Update the table to record a best chain ending in the current point.
1371        # There always is one, and if any of the previously visited points had
1372        # a higher Y coordinate, then there is always a previously recorded best
1373        # chain that can be improved upon by using the current point.
1374        #
1375        # There is only one case where there is some ambiguity. If the
1376        # pre-existing entry table[ti] has the same Y coordinate / rhs_idx as
1377        # the current point (this can only happen if the same line appeared
1378        # multiple times on the LHS), then we could choose to keep the
1379        # previously recorded best chain instead. That would bias the algorithm
1380        # differently but should have no systematic impact on the quality of the
1381        # result.
1382        if ti < len(table_rhs_idx):
1383            table_rhs_idx[ti] = rhs_idx
1384            table_candidate_idx[ti] = candidate_idx
1385        else:
1386            table_rhs_idx.append(rhs_idx)
1387            table_candidate_idx.append(candidate_idx)
1388        if ti > 0:
1389            backlinks.append(table_candidate_idx[ti - 1])
1390        else:
1391            backlinks.append(None)
1392
1393    # Commit to names in the matching by walking the backlinks. Recursively
1394    # attempt to fill in more matches in-betweem.
1395    match_idx = table_candidate_idx[-1]
1396    while match_idx is not None:
1397        current = candidates[match_idx]
1398        matches.append(current)
1399        match_idx = backlinks[match_idx]
1400
1401    matches.reverse()
1402    return matches
1403
1404
1405VARIABLE_TAG = "[[@@]]"
1406METAVAR_RE = re.compile(r"\[\[([A-Z0-9_]+)(?::[^]]+)?\]\]")
1407NUMERIC_SUFFIX_RE = re.compile(r"[0-9]*$")
1408
1409
1410class TestVar:
1411    def __init__(self, nameless_value: NamelessValue, prefix: str, suffix: str):
1412        self._nameless_value = nameless_value
1413
1414        self._prefix = prefix
1415        self._suffix = suffix
1416
1417    def seen(self, nameless_value: NamelessValue, prefix: str, suffix: str):
1418        if prefix != self._prefix:
1419            self._prefix = ""
1420        if suffix != self._suffix:
1421            self._suffix = ""
1422
1423    def get_variable_name(self, text):
1424        return self._nameless_value.get_value_name(
1425            text, self._nameless_value.check_prefix
1426        )
1427
1428    def get_def(self, name, prefix, suffix):
1429        if self._nameless_value.is_number:
1430            return f"{prefix}[[#{name}:]]{suffix}"
1431        if self._prefix:
1432            assert self._prefix == prefix
1433            prefix = ""
1434        if self._suffix:
1435            assert self._suffix == suffix
1436            suffix = ""
1437        return f"{prefix}[[{name}:{self._prefix}{self._nameless_value.get_ir_regex()}{self._suffix}]]{suffix}"
1438
1439    def get_use(self, name, prefix, suffix):
1440        if self._nameless_value.is_number:
1441            return f"{prefix}[[#{name}]]{suffix}"
1442        if self._prefix:
1443            assert self._prefix == prefix
1444            prefix = ""
1445        if self._suffix:
1446            assert self._suffix == suffix
1447            suffix = ""
1448        return f"{prefix}[[{name}]]{suffix}"
1449
1450
1451class CheckValueInfo:
1452    def __init__(
1453        self,
1454        key,
1455        text,
1456        name: str,
1457        prefix: str,
1458        suffix: str,
1459    ):
1460        # Key for the value, e.g. '%'
1461        self.key = key
1462
1463        # Text to be matched by the FileCheck variable (without any prefix or suffix)
1464        self.text = text
1465
1466        # Name of the FileCheck variable
1467        self.name = name
1468
1469        # Prefix and suffix that were captured by the NamelessValue regular expression
1470        self.prefix = prefix
1471        self.suffix = suffix
1472
1473
1474# Represent a check line in a way that allows us to compare check lines while
1475# ignoring some or all of the FileCheck variable names.
1476class CheckLineInfo:
1477    def __init__(self, line, values):
1478        # Line with all FileCheck variable name occurrences replaced by VARIABLE_TAG
1479        self.line: str = line
1480
1481        # Information on each FileCheck variable name occurrences in the line
1482        self.values: List[CheckValueInfo] = values
1483
1484    def __repr__(self):
1485        return f"CheckLineInfo(line={self.line}, self.values={self.values})"
1486
1487
1488def remap_metavar_names(
1489    old_line_infos: List[CheckLineInfo],
1490    new_line_infos: List[CheckLineInfo],
1491    committed_names: Set[str],
1492) -> Mapping[str, str]:
1493    """
1494    Map all FileCheck variable names that appear in new_line_infos to new
1495    FileCheck variable names in an attempt to reduce the diff from old_line_infos
1496    to new_line_infos.
1497
1498    This is done by:
1499    * Matching old check lines and new check lines using a diffing algorithm
1500      applied after replacing names with wildcards.
1501    * Committing to variable names such that the matched lines become equal
1502      (without wildcards) if possible
1503    * This is done recursively to handle cases where many lines are equal
1504      after wildcard replacement
1505    """
1506    # Initialize uncommitted identity mappings
1507    new_mapping = {}
1508    for line in new_line_infos:
1509        for value in line.values:
1510            new_mapping[value.name] = value.name
1511
1512    # Recursively commit to the identity mapping or find a better one
1513    def recurse(old_begin, old_end, new_begin, new_end):
1514        if old_begin == old_end or new_begin == new_end:
1515            return
1516
1517        # Find a matching of lines where uncommitted names are replaced
1518        # with a placeholder.
1519        def diffify_line(line, mapper):
1520            values = []
1521            for value in line.values:
1522                mapped = mapper(value.name)
1523                values.append(mapped if mapped in committed_names else "?")
1524            return line.line.strip() + " @@@ " + " @ ".join(values)
1525
1526        lhs_lines = [
1527            diffify_line(line, lambda x: x)
1528            for line in old_line_infos[old_begin:old_end]
1529        ]
1530        rhs_lines = [
1531            diffify_line(line, lambda x: new_mapping[x])
1532            for line in new_line_infos[new_begin:new_end]
1533        ]
1534
1535        candidate_matches = find_diff_matching(lhs_lines, rhs_lines)
1536
1537        candidate_matches = [
1538            (old_begin + lhs_idx, new_begin + rhs_idx)
1539            for lhs_idx, rhs_idx in candidate_matches
1540        ]
1541
1542        # Candidate matches may conflict if they require conflicting mappings of
1543        # names. We want to determine a large set of compatible candidates,
1544        # because that leads to a small diff.
1545        #
1546        # We think of the candidates as vertices in a conflict graph. The
1547        # conflict graph has edges between incompatible candidates. We want to
1548        # find a large independent set in this graph.
1549        #
1550        # Greedily selecting candidates and removing incompatible ones has the
1551        # disadvantage that making few bad decisions early on can have huge
1552        # consequences.
1553        #
1554        # Instead, we implicitly compute multiple independent sets by greedily
1555        # assigning a *coloring* to the conflict graph. Then, we select the
1556        # largest color class (which is the largest independent set we found),
1557        # commit to all candidates in it, and recurse.
1558        #
1559        # Note that we don't actually materialize the conflict graph. Instead,
1560        # each color class tracks the information needed to decide implicitly
1561        # whether a vertex conflicts (has an edge to) any of the vertices added
1562        # to the color class so far.
1563        class Color:
1564            def __init__(self):
1565                # (lhs_idx, rhs_idx) of matches in this color
1566                self.matches = []
1567
1568                # rhs_name -> lhs_name mappings required by this color
1569                self.mapping = {}
1570
1571                # lhs_names committed for this color
1572                self.committed = set()
1573
1574        colors = []
1575
1576        for lhs_idx, rhs_idx in candidate_matches:
1577            lhs_line = old_line_infos[lhs_idx]
1578            rhs_line = new_line_infos[rhs_idx]
1579
1580            # We scan through the uncommitted names in the candidate line and
1581            # filter out the color classes to which the candidate could be
1582            # assigned.
1583            #
1584            # Simultaneously, we prepare a new color class in case the candidate
1585            # conflicts with all colors that have been established so far.
1586            compatible_colors = colors[:]
1587            new_color = Color()
1588            new_color.matches.append((lhs_idx, rhs_idx))
1589
1590            for lhs_value, rhs_value in zip(lhs_line.values, rhs_line.values):
1591                if new_mapping[rhs_value.name] in committed_names:
1592                    # The new value has already been committed. If it was mapped
1593                    # to the same name as the original value, we can consider
1594                    # committing other values from this line. Otherwise, we
1595                    # should ignore this line.
1596                    if new_mapping[rhs_value.name] == lhs_value.name:
1597                        continue
1598                    else:
1599                        break
1600
1601                if rhs_value.name in new_color.mapping:
1602                    # Same, but for a possible commit happening on the same line
1603                    if new_color.color[rhs_value.name] == lhs_value.name:
1604                        continue
1605                    else:
1606                        break
1607
1608                if (
1609                    lhs_value.name in committed_names
1610                    or lhs_value.name in new_color.committed
1611                ):
1612                    # We can't map this value because the name we would map it
1613                    # to has already been committed for something else. Give up
1614                    # on this line.
1615                    break
1616
1617                new_color.mapping[rhs_value.name] = lhs_value.name
1618                new_color.committed.add(lhs_value.name)
1619
1620                color_idx = 0
1621                while color_idx < len(compatible_colors):
1622                    color = compatible_colors[color_idx]
1623                    compatible = True
1624                    if rhs_value.name in color.mapping:
1625                        compatible = color.mapping[rhs_value.name] == lhs_value.name
1626                    else:
1627                        compatible = lhs_value.name not in color.committed
1628                    if compatible:
1629                        color_idx += 1
1630                    else:
1631                        del compatible_colors[color_idx]
1632            else:
1633                # We never broke out of the loop, which means that at a minimum,
1634                # this line is viable standalone
1635                if compatible_colors:
1636                    color = max(compatible_colors, key=lambda color: len(color.matches))
1637                    color.mapping.update(new_color.mapping)
1638                    color.committed.update(new_color.committed)
1639                    color.matches.append((lhs_idx, rhs_idx))
1640                else:
1641                    colors.append(new_color)
1642
1643        if colors:
1644            # Pick the largest color class. This gives us a large independent
1645            # (non-conflicting) set of candidate matches. Assign all names
1646            # required by the independent set and recurse.
1647            max_color = max(colors, key=lambda color: len(color.matches))
1648
1649            for rhs_var, lhs_var in max_color.mapping.items():
1650                new_mapping[rhs_var] = lhs_var
1651                committed_names.add(lhs_var)
1652
1653                if (
1654                    lhs_var != rhs_var
1655                    and lhs_var in new_mapping
1656                    and new_mapping[lhs_var] == lhs_var
1657                ):
1658                    new_mapping[lhs_var] = "conflict_" + lhs_var
1659
1660            matches = (
1661                [(old_begin - 1, new_begin - 1)]
1662                + max_color.matches
1663                + [(old_end, new_end)]
1664            )
1665
1666            for (lhs_prev, rhs_prev), (lhs_next, rhs_next) in zip(matches, matches[1:]):
1667                recurse(lhs_prev + 1, lhs_next, rhs_prev + 1, rhs_next)
1668
1669    recurse(0, len(old_line_infos), 0, len(new_line_infos))
1670
1671    # Commit to remaining names and resolve conflicts
1672    for new_name, mapped_name in new_mapping.items():
1673        if mapped_name in committed_names:
1674            continue
1675        if not mapped_name.startswith("conflict_"):
1676            assert mapped_name == new_name
1677            committed_names.add(mapped_name)
1678
1679    for new_name, mapped_name in new_mapping.items():
1680        if mapped_name in committed_names:
1681            continue
1682        assert mapped_name.startswith("conflict_")
1683
1684        m = NUMERIC_SUFFIX_RE.search(new_name)
1685        base_name = new_name[: m.start()]
1686        suffix = int(new_name[m.start() :]) if m.start() != m.end() else 1
1687        while True:
1688            candidate = f"{base_name}{suffix}"
1689            if candidate not in committed_names:
1690                new_mapping[new_name] = candidate
1691                committed_names.add(candidate)
1692                break
1693            suffix += 1
1694
1695    return new_mapping
1696
1697
1698def generalize_check_lines(
1699    lines,
1700    ginfo: GeneralizerInfo,
1701    vars_seen,
1702    global_vars_seen,
1703    preserve_names=False,
1704    original_check_lines=None,
1705    *,
1706    unstable_globals_only=False,
1707):
1708    if unstable_globals_only:
1709        regexp = ginfo.get_unstable_globals_regexp()
1710    else:
1711        regexp = ginfo.get_regexp()
1712
1713    multiple_braces_re = re.compile(r"({{+)|(}}+)")
1714
1715    def escape_braces(match_obj):
1716        return "{{" + re.escape(match_obj.group(0)) + "}}"
1717
1718    if ginfo.is_ir():
1719        for i, line in enumerate(lines):
1720            # An IR variable named '%.' matches the FileCheck regex string.
1721            line = line.replace("%.", "%dot")
1722            for regex in _global_hex_value_regex:
1723                if re.match("^@" + regex + " = ", line):
1724                    line = re.sub(
1725                        r"\bi([0-9]+) ([0-9]+)",
1726                        lambda m: "i"
1727                        + m.group(1)
1728                        + " [[#"
1729                        + hex(int(m.group(2)))
1730                        + "]]",
1731                        line,
1732                    )
1733                    break
1734            # Ignore any comments, since the check lines will too.
1735            scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line)
1736            lines[i] = scrubbed_line
1737
1738    if not preserve_names:
1739        committed_names = set(
1740            test_var.get_variable_name(name)
1741            for (name, _), test_var in vars_seen.items()
1742        )
1743        defs = set()
1744
1745        # Collect information about new check lines, and generalize global reference
1746        new_line_infos = []
1747        for line in lines:
1748            filtered_line = ""
1749            values = []
1750            while True:
1751                m = regexp.search(line)
1752                if m is None:
1753                    filtered_line += line
1754                    break
1755
1756                name = ginfo.get_name_from_match(m)
1757                nameless_value = ginfo.get_nameless_value_from_match(m)
1758                prefix, suffix = nameless_value.get_affixes_from_match(m)
1759                if may_clash_with_default_check_prefix_name(
1760                    nameless_value.check_prefix, name
1761                ):
1762                    warn(
1763                        "Change IR value name '%s' or use --prefix-filecheck-ir-name to prevent possible conflict"
1764                        " with scripted FileCheck name." % (name,)
1765                    )
1766
1767                # Record the variable as seen and (for locals) accumulate
1768                # prefixes/suffixes
1769                is_local_def = nameless_value.is_local_def_ir_value()
1770                if is_local_def:
1771                    vars_dict = vars_seen
1772                else:
1773                    vars_dict = global_vars_seen
1774
1775                key = (name, nameless_value.check_key)
1776
1777                if is_local_def:
1778                    test_prefix = prefix
1779                    test_suffix = suffix
1780                else:
1781                    test_prefix = ""
1782                    test_suffix = ""
1783
1784                if key in vars_dict:
1785                    vars_dict[key].seen(nameless_value, test_prefix, test_suffix)
1786                else:
1787                    vars_dict[key] = TestVar(nameless_value, test_prefix, test_suffix)
1788                    defs.add(key)
1789
1790                var = vars_dict[key].get_variable_name(name)
1791
1792                # Replace with a [[@@]] tag, but be sure to keep the spaces and commas.
1793                filtered_line += (
1794                    line[: m.start()] + m.group(1) + VARIABLE_TAG + m.group(m.lastindex)
1795                )
1796                line = line[m.end() :]
1797
1798                values.append(
1799                    CheckValueInfo(
1800                        key=nameless_value.check_key,
1801                        text=name,
1802                        name=var,
1803                        prefix=prefix,
1804                        suffix=suffix,
1805                    )
1806                )
1807
1808            new_line_infos.append(CheckLineInfo(filtered_line, values))
1809
1810        committed_names.update(
1811            test_var.get_variable_name(name)
1812            for (name, _), test_var in global_vars_seen.items()
1813        )
1814
1815        # Collect information about original check lines, if any.
1816        orig_line_infos = []
1817        for line in original_check_lines or []:
1818            filtered_line = ""
1819            values = []
1820            while True:
1821                m = METAVAR_RE.search(line)
1822                if m is None:
1823                    filtered_line += line
1824                    break
1825
1826                # Replace with a [[@@]] tag, but be sure to keep the spaces and commas.
1827                filtered_line += line[: m.start()] + VARIABLE_TAG
1828                line = line[m.end() :]
1829                values.append(
1830                    CheckValueInfo(
1831                        key=None,
1832                        text=None,
1833                        name=m.group(1),
1834                        prefix="",
1835                        suffix="",
1836                    )
1837                )
1838            orig_line_infos.append(CheckLineInfo(filtered_line, values))
1839
1840        # Compute the variable name mapping
1841        mapping = remap_metavar_names(orig_line_infos, new_line_infos, committed_names)
1842
1843        # Apply the variable name mapping
1844        for i, line_info in enumerate(new_line_infos):
1845            line_template = line_info.line
1846            line = ""
1847
1848            for value in line_info.values:
1849                idx = line_template.find(VARIABLE_TAG)
1850                line += line_template[:idx]
1851                line_template = line_template[idx + len(VARIABLE_TAG) :]
1852
1853                key = (value.text, value.key)
1854                if value.key == "%":
1855                    vars_dict = vars_seen
1856                else:
1857                    vars_dict = global_vars_seen
1858
1859                if key in defs:
1860                    line += vars_dict[key].get_def(
1861                        mapping[value.name], value.prefix, value.suffix
1862                    )
1863                    defs.remove(key)
1864                else:
1865                    line += vars_dict[key].get_use(
1866                        mapping[value.name], value.prefix, value.suffix
1867                    )
1868
1869            line += line_template
1870
1871            lines[i] = line
1872
1873    if ginfo.is_analyze():
1874        for i, _ in enumerate(lines):
1875            # Escape multiple {{ or }} as {{}} denotes a FileCheck regex.
1876            scrubbed_line = multiple_braces_re.sub(escape_braces, lines[i])
1877            lines[i] = scrubbed_line
1878
1879    return lines
1880
1881
1882def add_checks(
1883    output_lines,
1884    comment_marker,
1885    prefix_list,
1886    func_dict,
1887    func_name,
1888    check_label_format,
1889    ginfo,
1890    global_vars_seen_dict,
1891    is_filtered,
1892    preserve_names=False,
1893    original_check_lines: Mapping[str, List[str]] = {},
1894):
1895    # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well.
1896    prefix_exclusions = set()
1897    printed_prefixes = []
1898    for p in prefix_list:
1899        checkprefixes = p[0]
1900        # If not all checkprefixes of this run line produced the function we cannot check for it as it does not
1901        # exist for this run line. A subset of the check prefixes might know about the function but only because
1902        # other run lines created it.
1903        if any(
1904            map(
1905                lambda checkprefix: func_name not in func_dict[checkprefix],
1906                checkprefixes,
1907            )
1908        ):
1909            prefix_exclusions |= set(checkprefixes)
1910            continue
1911
1912    # prefix_exclusions is constructed, we can now emit the output
1913    for p in prefix_list:
1914        global_vars_seen = {}
1915        checkprefixes = p[0]
1916        for checkprefix in checkprefixes:
1917            if checkprefix in global_vars_seen_dict:
1918                global_vars_seen.update(global_vars_seen_dict[checkprefix])
1919            else:
1920                global_vars_seen_dict[checkprefix] = {}
1921            if checkprefix in printed_prefixes:
1922                break
1923
1924            # Check if the prefix is excluded.
1925            if checkprefix in prefix_exclusions:
1926                continue
1927
1928            # If we do not have output for this prefix we skip it.
1929            if not func_dict[checkprefix][func_name]:
1930                continue
1931
1932            # Add some space between different check prefixes, but not after the last
1933            # check line (before the test code).
1934            if ginfo.is_asm():
1935                if len(printed_prefixes) != 0:
1936                    output_lines.append(comment_marker)
1937
1938            if checkprefix not in global_vars_seen_dict:
1939                global_vars_seen_dict[checkprefix] = {}
1940
1941            global_vars_seen_before = [key for key in global_vars_seen.keys()]
1942
1943            vars_seen = {}
1944            printed_prefixes.append(checkprefix)
1945            attrs = str(func_dict[checkprefix][func_name].attrs)
1946            attrs = "" if attrs == "None" else attrs
1947            if ginfo.get_version() > 1:
1948                funcdef_attrs_and_ret = func_dict[checkprefix][
1949                    func_name
1950                ].funcdef_attrs_and_ret
1951            else:
1952                funcdef_attrs_and_ret = ""
1953
1954            if attrs:
1955                output_lines.append(
1956                    "%s %s: Function Attrs: %s" % (comment_marker, checkprefix, attrs)
1957                )
1958            args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig)
1959            if args_and_sig:
1960                args_and_sig = generalize_check_lines(
1961                    [args_and_sig],
1962                    ginfo,
1963                    vars_seen,
1964                    global_vars_seen,
1965                    preserve_names,
1966                    original_check_lines=[],
1967                )[0]
1968            func_name_separator = func_dict[checkprefix][func_name].func_name_separator
1969            if "[[" in args_and_sig:
1970                # Captures in label lines are not supported, thus split into a -LABEL
1971                # and a separate -SAME line that contains the arguments with captures.
1972                args_and_sig_prefix = ""
1973                if ginfo.get_version() >= 3 and args_and_sig.startswith("("):
1974                    # Ensure the "(" separating function name and arguments is in the
1975                    # label line. This is required in case of function names that are
1976                    # prefixes of each other. Otherwise, the label line for "foo" might
1977                    # incorrectly match on "foo.specialized".
1978                    args_and_sig_prefix = args_and_sig[0]
1979                    args_and_sig = args_and_sig[1:]
1980
1981                # Removing args_and_sig from the label match line requires
1982                # func_name_separator to be empty. Otherwise, the match will not work.
1983                assert func_name_separator == ""
1984                output_lines.append(
1985                    check_label_format
1986                    % (
1987                        checkprefix,
1988                        funcdef_attrs_and_ret,
1989                        func_name,
1990                        args_and_sig_prefix,
1991                        func_name_separator,
1992                    )
1993                )
1994                output_lines.append(
1995                    "%s %s-SAME: %s" % (comment_marker, checkprefix, args_and_sig)
1996                )
1997            else:
1998                output_lines.append(
1999                    check_label_format
2000                    % (
2001                        checkprefix,
2002                        funcdef_attrs_and_ret,
2003                        func_name,
2004                        args_and_sig,
2005                        func_name_separator,
2006                    )
2007                )
2008            func_body = str(func_dict[checkprefix][func_name]).splitlines()
2009            if not func_body:
2010                # We have filtered everything.
2011                continue
2012
2013            # For ASM output, just emit the check lines.
2014            if ginfo.is_asm():
2015                body_start = 1
2016                if is_filtered:
2017                    # For filtered output we don't add "-NEXT" so don't add extra spaces
2018                    # before the first line.
2019                    body_start = 0
2020                else:
2021                    output_lines.append(
2022                        "%s %s:       %s" % (comment_marker, checkprefix, func_body[0])
2023                    )
2024                func_lines = generalize_check_lines(
2025                    func_body[body_start:], ginfo, vars_seen, global_vars_seen
2026                )
2027                for func_line in func_lines:
2028                    if func_line.strip() == "":
2029                        output_lines.append(
2030                            "%s %s-EMPTY:" % (comment_marker, checkprefix)
2031                        )
2032                    else:
2033                        check_suffix = "-NEXT" if not is_filtered else ""
2034                        output_lines.append(
2035                            "%s %s%s:  %s"
2036                            % (comment_marker, checkprefix, check_suffix, func_line)
2037                        )
2038                # Remember new global variables we have not seen before
2039                for key in global_vars_seen:
2040                    if key not in global_vars_seen_before:
2041                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2042                break
2043            # For analyze output, generalize the output, and emit CHECK-EMPTY lines as well.
2044            elif ginfo.is_analyze():
2045                func_body = generalize_check_lines(
2046                    func_body, ginfo, vars_seen, global_vars_seen
2047                )
2048                for func_line in func_body:
2049                    if func_line.strip() == "":
2050                        output_lines.append(
2051                            "{} {}-EMPTY:".format(comment_marker, checkprefix)
2052                        )
2053                    else:
2054                        check_suffix = "-NEXT" if not is_filtered else ""
2055                        output_lines.append(
2056                            "{} {}{}:  {}".format(
2057                                comment_marker, checkprefix, check_suffix, func_line
2058                            )
2059                        )
2060
2061                # Add space between different check prefixes and also before the first
2062                # line of code in the test function.
2063                output_lines.append(comment_marker)
2064
2065                # Remember new global variables we have not seen before
2066                for key in global_vars_seen:
2067                    if key not in global_vars_seen_before:
2068                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2069                break
2070            # For IR output, change all defs to FileCheck variables, so we're immune
2071            # to variable naming fashions.
2072            else:
2073                func_body = generalize_check_lines(
2074                    func_body,
2075                    ginfo,
2076                    vars_seen,
2077                    global_vars_seen,
2078                    preserve_names,
2079                    original_check_lines=original_check_lines.get(checkprefix),
2080                )
2081
2082                # This could be selectively enabled with an optional invocation argument.
2083                # Disabled for now: better to check everything. Be safe rather than sorry.
2084
2085                # Handle the first line of the function body as a special case because
2086                # it's often just noise (a useless asm comment or entry label).
2087                # if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
2088                #  is_blank_line = True
2089                # else:
2090                #  output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
2091                #  is_blank_line = False
2092
2093                is_blank_line = False
2094
2095                for func_line in func_body:
2096                    if func_line.strip() == "":
2097                        is_blank_line = True
2098                        continue
2099                    # Do not waste time checking IR comments.
2100                    func_line = SCRUB_IR_COMMENT_RE.sub(r"", func_line)
2101
2102                    # Skip blank lines instead of checking them.
2103                    if is_blank_line:
2104                        output_lines.append(
2105                            "{} {}:       {}".format(
2106                                comment_marker, checkprefix, func_line
2107                            )
2108                        )
2109                    else:
2110                        check_suffix = "-NEXT" if not is_filtered else ""
2111                        output_lines.append(
2112                            "{} {}{}:  {}".format(
2113                                comment_marker, checkprefix, check_suffix, func_line
2114                            )
2115                        )
2116                    is_blank_line = False
2117
2118                # Add space between different check prefixes and also before the first
2119                # line of code in the test function.
2120                output_lines.append(comment_marker)
2121
2122                # Remember new global variables we have not seen before
2123                for key in global_vars_seen:
2124                    if key not in global_vars_seen_before:
2125                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2126                break
2127    return printed_prefixes
2128
2129
2130def add_ir_checks(
2131    output_lines,
2132    comment_marker,
2133    prefix_list,
2134    func_dict,
2135    func_name,
2136    preserve_names,
2137    function_sig,
2138    ginfo: GeneralizerInfo,
2139    global_vars_seen_dict,
2140    is_filtered,
2141    original_check_lines={},
2142):
2143    assert ginfo.is_ir()
2144    # Label format is based on IR string.
2145    if function_sig and ginfo.get_version() > 1:
2146        function_def_regex = "define %s"
2147    elif function_sig:
2148        function_def_regex = "define {{[^@]+}}%s"
2149    else:
2150        function_def_regex = "%s"
2151    check_label_format = "{} %s-LABEL: {}@%s%s%s".format(
2152        comment_marker, function_def_regex
2153    )
2154    return add_checks(
2155        output_lines,
2156        comment_marker,
2157        prefix_list,
2158        func_dict,
2159        func_name,
2160        check_label_format,
2161        ginfo,
2162        global_vars_seen_dict,
2163        is_filtered,
2164        preserve_names,
2165        original_check_lines=original_check_lines,
2166    )
2167
2168
2169def add_analyze_checks(
2170    output_lines,
2171    comment_marker,
2172    prefix_list,
2173    func_dict,
2174    func_name,
2175    ginfo: GeneralizerInfo,
2176    is_filtered,
2177):
2178    assert ginfo.is_analyze()
2179    check_label_format = "{} %s-LABEL: '%s%s%s%s'".format(comment_marker)
2180    global_vars_seen_dict = {}
2181    return add_checks(
2182        output_lines,
2183        comment_marker,
2184        prefix_list,
2185        func_dict,
2186        func_name,
2187        check_label_format,
2188        ginfo,
2189        global_vars_seen_dict,
2190        is_filtered,
2191    )
2192
2193
2194def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes, ginfo):
2195    for nameless_value in ginfo.get_nameless_values():
2196        if nameless_value.global_ir_rhs_regexp is None:
2197            continue
2198
2199        lhs_re_str = nameless_value.ir_prefix + nameless_value.ir_regexp
2200        rhs_re_str = nameless_value.global_ir_rhs_regexp
2201
2202        global_ir_value_re_str = r"^" + lhs_re_str + r"\s=\s" + rhs_re_str + r"$"
2203        global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M))
2204        lines = []
2205        for m in global_ir_value_re.finditer(raw_tool_output):
2206            # Attach the substring's start index so that CHECK lines
2207            # can be sorted properly even if they are matched by different nameless values.
2208            # This is relevant for GLOB and GLOBNAMED since they may appear interlaced.
2209            lines.append((m.start(), m.group(0)))
2210
2211        for prefix in prefixes:
2212            if glob_val_dict[prefix] is None:
2213                continue
2214            if nameless_value.check_prefix in glob_val_dict[prefix]:
2215                if lines == glob_val_dict[prefix][nameless_value.check_prefix]:
2216                    continue
2217                if prefix == prefixes[-1]:
2218                    warn("Found conflicting asm under the same prefix: %r!" % (prefix,))
2219                else:
2220                    glob_val_dict[prefix][nameless_value.check_prefix] = None
2221                    continue
2222            glob_val_dict[prefix][nameless_value.check_prefix] = lines
2223
2224
2225def filter_globals_according_to_preference(
2226    global_val_lines_w_index, global_vars_seen, nameless_value, global_check_setting
2227):
2228    if global_check_setting == "none":
2229        return []
2230    if global_check_setting == "all":
2231        return global_val_lines_w_index
2232    assert global_check_setting == "smart"
2233
2234    if nameless_value.check_key == "#":
2235        # attribute sets are usually better checked by --check-attributes
2236        return []
2237
2238    def extract(line, nv):
2239        p = (
2240            "^"
2241            + nv.ir_prefix
2242            + "("
2243            + nv.ir_regexp
2244            + ") = ("
2245            + nv.global_ir_rhs_regexp
2246            + ")"
2247        )
2248        match = re.match(p, line)
2249        return (match.group(1), re.findall(nv.ir_regexp, match.group(2)))
2250
2251    transitively_visible = set()
2252    contains_refs_to = {}
2253
2254    def add(var):
2255        nonlocal transitively_visible
2256        nonlocal contains_refs_to
2257        if var in transitively_visible:
2258            return
2259        transitively_visible.add(var)
2260        if not var in contains_refs_to:
2261            return
2262        for x in contains_refs_to[var]:
2263            add(x)
2264
2265    for i, line in global_val_lines_w_index:
2266        (var, refs) = extract(line, nameless_value)
2267        contains_refs_to[var] = refs
2268    for var, check_key in global_vars_seen:
2269        if check_key != nameless_value.check_key:
2270            continue
2271        add(var)
2272    return [
2273        (i, line)
2274        for i, line in global_val_lines_w_index
2275        if extract(line, nameless_value)[0] in transitively_visible
2276    ]
2277
2278
2279METADATA_FILTERS = [
2280    (
2281        r"(?<=\")(.+ )?(\w+ version )[\d.]+(?:[^\" ]*)(?: \([^)]+\))?",
2282        r"{{.*}}\2{{.*}}",
2283    ),  # preface with glob also, to capture optional CLANG_VENDOR
2284    (r'(!DIFile\(filename: ".+", directory: )".+"', r"\1{{.*}}"),
2285]
2286METADATA_FILTERS_RE = [(re.compile(f), r) for (f, r) in METADATA_FILTERS]
2287
2288
2289def filter_unstable_metadata(line):
2290    for f, replacement in METADATA_FILTERS_RE:
2291        line = f.sub(replacement, line)
2292    return line
2293
2294
2295def flush_current_checks(output_lines, new_lines_w_index, comment_marker):
2296    if not new_lines_w_index:
2297        return
2298    output_lines.append(comment_marker + SEPARATOR)
2299    new_lines_w_index.sort()
2300    for _, line in new_lines_w_index:
2301        output_lines.append(line)
2302    new_lines_w_index.clear()
2303
2304
2305def add_global_checks(
2306    glob_val_dict,
2307    comment_marker,
2308    prefix_list,
2309    output_lines,
2310    ginfo: GeneralizerInfo,
2311    global_vars_seen_dict,
2312    preserve_names,
2313    is_before_functions,
2314    global_check_setting,
2315):
2316    printed_prefixes = set()
2317    output_lines_loc = {}  # Allows GLOB and GLOBNAMED to be sorted correctly
2318    for nameless_value in ginfo.get_nameless_values():
2319        if nameless_value.global_ir_rhs_regexp is None:
2320            continue
2321        if nameless_value.is_before_functions != is_before_functions:
2322            continue
2323        for p in prefix_list:
2324            global_vars_seen = {}
2325            checkprefixes = p[0]
2326            if checkprefixes is None:
2327                continue
2328            for checkprefix in checkprefixes:
2329                if checkprefix in global_vars_seen_dict:
2330                    global_vars_seen.update(global_vars_seen_dict[checkprefix])
2331                else:
2332                    global_vars_seen_dict[checkprefix] = {}
2333                if (checkprefix, nameless_value.check_prefix) in printed_prefixes:
2334                    break
2335                if not glob_val_dict[checkprefix]:
2336                    continue
2337                if nameless_value.check_prefix not in glob_val_dict[checkprefix]:
2338                    continue
2339                if not glob_val_dict[checkprefix][nameless_value.check_prefix]:
2340                    continue
2341
2342                check_lines = []
2343                global_vars_seen_before = [key for key in global_vars_seen.keys()]
2344                lines_w_index = glob_val_dict[checkprefix][nameless_value.check_prefix]
2345                lines_w_index = filter_globals_according_to_preference(
2346                    lines_w_index,
2347                    global_vars_seen_before,
2348                    nameless_value,
2349                    global_check_setting,
2350                )
2351                for i, line in lines_w_index:
2352                    if _global_value_regex:
2353                        matched = False
2354                        for regex in _global_value_regex:
2355                            if re.match("^@" + regex + " = ", line) or re.match(
2356                                "^!" + regex + " = ", line
2357                            ):
2358                                matched = True
2359                                break
2360                        if not matched:
2361                            continue
2362                    [new_line] = generalize_check_lines(
2363                        [line],
2364                        ginfo,
2365                        {},
2366                        global_vars_seen,
2367                        preserve_names,
2368                        unstable_globals_only=True,
2369                    )
2370                    new_line = filter_unstable_metadata(new_line)
2371                    check_line = "%s %s: %s" % (comment_marker, checkprefix, new_line)
2372                    check_lines.append((i, check_line))
2373                if not check_lines:
2374                    continue
2375
2376                if not checkprefix in output_lines_loc:
2377                    output_lines_loc[checkprefix] = []
2378                if not nameless_value.interlaced_with_previous:
2379                    flush_current_checks(
2380                        output_lines, output_lines_loc[checkprefix], comment_marker
2381                    )
2382                for check_line in check_lines:
2383                    output_lines_loc[checkprefix].append(check_line)
2384
2385                printed_prefixes.add((checkprefix, nameless_value.check_prefix))
2386
2387                # Remembe new global variables we have not seen before
2388                for key in global_vars_seen:
2389                    if key not in global_vars_seen_before:
2390                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2391                break
2392
2393    if printed_prefixes:
2394        for p in prefix_list:
2395            if p[0] is None:
2396                continue
2397            for checkprefix in p[0]:
2398                if checkprefix not in output_lines_loc:
2399                    continue
2400                flush_current_checks(
2401                    output_lines, output_lines_loc[checkprefix], comment_marker
2402                )
2403                break
2404        output_lines.append(comment_marker + SEPARATOR)
2405    return printed_prefixes
2406
2407
2408def check_prefix(prefix):
2409    if not PREFIX_RE.match(prefix):
2410        hint = ""
2411        if "," in prefix:
2412            hint = " Did you mean '--check-prefixes=" + prefix + "'?"
2413        warn(
2414            (
2415                "Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores."
2416                + hint
2417            )
2418            % (prefix)
2419        )
2420
2421
2422def get_check_prefixes(filecheck_cmd):
2423    check_prefixes = [
2424        item
2425        for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
2426        for item in m.group(1).split(",")
2427    ]
2428    if not check_prefixes:
2429        check_prefixes = ["CHECK"]
2430    return check_prefixes
2431
2432
2433def verify_filecheck_prefixes(fc_cmd):
2434    fc_cmd_parts = fc_cmd.split()
2435    for part in fc_cmd_parts:
2436        if "check-prefix=" in part:
2437            prefix = part.split("=", 1)[1]
2438            check_prefix(prefix)
2439        elif "check-prefixes=" in part:
2440            prefixes = part.split("=", 1)[1].split(",")
2441            for prefix in prefixes:
2442                check_prefix(prefix)
2443                if prefixes.count(prefix) > 1:
2444                    warn(
2445                        "Supplied prefix '%s' is not unique in the prefix list."
2446                        % (prefix,)
2447                    )
2448
2449
2450def get_autogennote_suffix(parser, args):
2451    autogenerated_note_args = ""
2452    for action in parser._actions:
2453        if not hasattr(args, action.dest):
2454            continue  # Ignore options such as --help that aren't included in args
2455        # Ignore parameters such as paths to the binary or the list of tests
2456        if action.dest in (
2457            "tests",
2458            "update_only",
2459            "tool_binary",
2460            "opt_binary",
2461            "llc_binary",
2462            "clang",
2463            "opt",
2464            "llvm_bin",
2465            "verbose",
2466            "force_update",
2467            "reset_variable_names",
2468        ):
2469            continue
2470        value = getattr(args, action.dest)
2471        if action.dest == "check_globals":
2472            default_value = "none" if args.version < 4 else "smart"
2473            if value == default_value:
2474                continue
2475            autogenerated_note_args += action.option_strings[0] + " "
2476            if args.version < 4 and value == "all":
2477                continue
2478            autogenerated_note_args += "%s " % value
2479            continue
2480        if action.const is not None:  # action stores a constant (usually True/False)
2481            # Skip actions with different constant values (this happens with boolean
2482            # --foo/--no-foo options)
2483            if value != action.const:
2484                continue
2485        if parser.get_default(action.dest) == value:
2486            continue  # Don't add default values
2487        if action.dest == "function_signature" and args.version >= 2:
2488            continue  # Enabled by default in version 2
2489        if action.dest == "filters":
2490            # Create a separate option for each filter element.  The value is a list
2491            # of Filter objects.
2492            for elem in value:
2493                opt_name = "filter-out" if elem.is_filter_out else "filter"
2494                opt_value = elem.pattern()
2495                new_arg = '--%s "%s" ' % (opt_name, opt_value.strip('"'))
2496                if new_arg not in autogenerated_note_args:
2497                    autogenerated_note_args += new_arg
2498        else:
2499            autogenerated_note_args += action.option_strings[0] + " "
2500            if action.const is None:  # action takes a parameter
2501                if action.nargs == "+":
2502                    value = " ".join(map(lambda v: '"' + v.strip('"') + '"', value))
2503                autogenerated_note_args += "%s " % value
2504    if autogenerated_note_args:
2505        autogenerated_note_args = " %s %s" % (
2506            UTC_ARGS_KEY,
2507            autogenerated_note_args[:-1],
2508        )
2509    return autogenerated_note_args
2510
2511
2512def check_for_command(line, parser, args, argv, argparse_callback):
2513    cmd_m = UTC_ARGS_CMD.match(line)
2514    if cmd_m:
2515        for option in shlex.split(cmd_m.group("cmd").strip()):
2516            if option:
2517                argv.append(option)
2518        args = parse_args(parser, filter(lambda arg: arg not in args.tests, argv))
2519        if argparse_callback is not None:
2520            argparse_callback(args)
2521    return args, argv
2522
2523
2524def find_arg_in_test(test_info, get_arg_to_check, arg_string, is_global):
2525    result = get_arg_to_check(test_info.args)
2526    if not result and is_global:
2527        # See if this has been specified via UTC_ARGS.  This is a "global" option
2528        # that affects the entire generation of test checks.  If it exists anywhere
2529        # in the test, apply it to everything.
2530        saw_line = False
2531        for line_info in test_info.ro_iterlines():
2532            line = line_info.line
2533            if not line.startswith(";") and line.strip() != "":
2534                saw_line = True
2535            result = get_arg_to_check(line_info.args)
2536            if result:
2537                if warn and saw_line:
2538                    # We saw the option after already reading some test input lines.
2539                    # Warn about it.
2540                    print(
2541                        "WARNING: Found {} in line following test start: ".format(
2542                            arg_string
2543                        )
2544                        + line,
2545                        file=sys.stderr,
2546                    )
2547                    print(
2548                        "WARNING: Consider moving {} to top of file".format(arg_string),
2549                        file=sys.stderr,
2550                    )
2551                break
2552    return result
2553
2554
2555def dump_input_lines(output_lines, test_info, prefix_set, comment_string):
2556    for input_line_info in test_info.iterlines(output_lines):
2557        line = input_line_info.line
2558        args = input_line_info.args
2559        if line.strip() == comment_string:
2560            continue
2561        if line.strip() == comment_string + SEPARATOR:
2562            continue
2563        if line.lstrip().startswith(comment_string):
2564            m = CHECK_RE.match(line)
2565            if m and m.group(1) in prefix_set:
2566                continue
2567        output_lines.append(line.rstrip("\n"))
2568
2569
2570def add_checks_at_end(
2571    output_lines, prefix_list, func_order, comment_string, check_generator
2572):
2573    added = set()
2574    generated_prefixes = set()
2575    for prefix in prefix_list:
2576        prefixes = prefix[0]
2577        tool_args = prefix[1]
2578        for prefix in prefixes:
2579            for func in func_order[prefix]:
2580                # The func order can contain the same functions multiple times.
2581                # If we see one again we are done.
2582                if (func, prefix) in added:
2583                    continue
2584                if added:
2585                    output_lines.append(comment_string)
2586
2587                # The add_*_checks routines expect a run list whose items are
2588                # tuples that have a list of prefixes as their first element and
2589                # tool command args string as their second element.  They output
2590                # checks for each prefix in the list of prefixes.  By doing so, it
2591                # implicitly assumes that for each function every run line will
2592                # generate something for that function.  That is not the case for
2593                # generated functions as some run lines might not generate them
2594                # (e.g. -fopenmp vs. no -fopenmp).
2595                #
2596                # Therefore, pass just the prefix we're interested in.  This has
2597                # the effect of generating all of the checks for functions of a
2598                # single prefix before moving on to the next prefix.  So checks
2599                # are ordered by prefix instead of by function as in "normal"
2600                # mode.
2601                for generated_prefix in check_generator(
2602                    output_lines, [([prefix], tool_args)], func
2603                ):
2604                    added.add((func, generated_prefix))
2605                    generated_prefixes.add(generated_prefix)
2606    return generated_prefixes
2607