xref: /llvm-project/llvm/utils/UpdateTestChecks/common.py (revision d07762e47419a1ef892729a420d0c4769b6702a7)
1import argparse
2import bisect
3import collections
4import copy
5import glob
6import os
7import re
8import subprocess
9import sys
10import shlex
11
12from typing import List, Mapping, Set
13
14##### Common utilities for update_*test_checks.py
15
16
17_verbose = False
18_prefix_filecheck_ir_name = ""
19
20"""
21Version changelog:
22
231: Initial version, used by tests that don't specify --version explicitly.
242: --function-signature is now enabled by default and also checks return
25   type/attributes.
263: Opening parenthesis of function args is kept on the first LABEL line
27   in case arguments are split to a separate SAME line.
284: --check-globals now has a third option ('smart'). The others are now called
29   'none' and 'all'. 'smart' is the default.
305: Basic block labels are matched by FileCheck expressions
31"""
32DEFAULT_VERSION = 5
33
34
35SUPPORTED_ANALYSES = {
36    "Branch Probability Analysis",
37    "Cost Model Analysis",
38    "Loop Access Analysis",
39    "Scalar Evolution Analysis",
40}
41
42
43class Regex(object):
44    """Wrap a compiled regular expression object to allow deep copy of a regexp.
45    This is required for the deep copy done in do_scrub.
46
47    """
48
49    def __init__(self, regex):
50        self.regex = regex
51
52    def __deepcopy__(self, memo):
53        result = copy.copy(self)
54        result.regex = self.regex
55        return result
56
57    def search(self, line):
58        return self.regex.search(line)
59
60    def sub(self, repl, line):
61        return self.regex.sub(repl, line)
62
63    def pattern(self):
64        return self.regex.pattern
65
66    def flags(self):
67        return self.regex.flags
68
69
70class Filter(Regex):
71    """Augment a Regex object with a flag indicating whether a match should be
72    added (!is_filter_out) or removed (is_filter_out) from the generated checks.
73
74    """
75
76    def __init__(self, regex, is_filter_out):
77        super(Filter, self).__init__(regex)
78        self.is_filter_out = is_filter_out
79
80    def __deepcopy__(self, memo):
81        result = copy.deepcopy(super(Filter, self), memo)
82        result.is_filter_out = copy.deepcopy(self.is_filter_out, memo)
83        return result
84
85
86def parse_commandline_args(parser):
87    class RegexAction(argparse.Action):
88        """Add a regular expression option value to a list of regular expressions.
89        This compiles the expression, wraps it in a Regex and adds it to the option
90        value list."""
91
92        def __init__(self, option_strings, dest, nargs=None, **kwargs):
93            if nargs is not None:
94                raise ValueError("nargs not allowed")
95            super(RegexAction, self).__init__(option_strings, dest, **kwargs)
96
97        def do_call(self, namespace, values, flags):
98            value_list = getattr(namespace, self.dest)
99            if value_list is None:
100                value_list = []
101
102            try:
103                value_list.append(Regex(re.compile(values, flags)))
104            except re.error as error:
105                raise ValueError(
106                    "{}: Invalid regular expression '{}' ({})".format(
107                        option_string, error.pattern, error.msg
108                    )
109                )
110
111            setattr(namespace, self.dest, value_list)
112
113        def __call__(self, parser, namespace, values, option_string=None):
114            self.do_call(namespace, values, 0)
115
116    class FilterAction(RegexAction):
117        """Add a filter to a list of filter option values."""
118
119        def __init__(self, option_strings, dest, nargs=None, **kwargs):
120            super(FilterAction, self).__init__(option_strings, dest, nargs, **kwargs)
121
122        def __call__(self, parser, namespace, values, option_string=None):
123            super(FilterAction, self).__call__(parser, namespace, values, option_string)
124
125            value_list = getattr(namespace, self.dest)
126
127            is_filter_out = option_string == "--filter-out"
128
129            value_list[-1] = Filter(value_list[-1].regex, is_filter_out)
130
131            setattr(namespace, self.dest, value_list)
132
133    filter_group = parser.add_argument_group(
134        "filtering",
135        """Filters are applied to each output line according to the order given. The
136    first matching filter terminates filter processing for that current line.""",
137    )
138
139    filter_group.add_argument(
140        "--filter",
141        action=FilterAction,
142        dest="filters",
143        metavar="REGEX",
144        help="Only include lines matching REGEX (may be specified multiple times)",
145    )
146    filter_group.add_argument(
147        "--filter-out",
148        action=FilterAction,
149        dest="filters",
150        metavar="REGEX",
151        help="Exclude lines matching REGEX",
152    )
153
154    parser.add_argument(
155        "--include-generated-funcs",
156        action="store_true",
157        help="Output checks for functions not in source",
158    )
159    parser.add_argument(
160        "-v", "--verbose", action="store_true", help="Show verbose output"
161    )
162    parser.add_argument(
163        "-u",
164        "--update-only",
165        action="store_true",
166        help="Only update test if it was already autogened",
167    )
168    parser.add_argument(
169        "--force-update",
170        action="store_true",
171        help="Update test even if it was autogened by a different script",
172    )
173    parser.add_argument(
174        "--enable",
175        action="store_true",
176        dest="enabled",
177        default=True,
178        help="Activate CHECK line generation from this point forward",
179    )
180    parser.add_argument(
181        "--disable",
182        action="store_false",
183        dest="enabled",
184        help="Deactivate CHECK line generation from this point forward",
185    )
186    parser.add_argument(
187        "--replace-value-regex",
188        nargs="+",
189        default=[],
190        help="List of regular expressions to replace matching value names",
191    )
192    parser.add_argument(
193        "--prefix-filecheck-ir-name",
194        default="",
195        help="Add a prefix to FileCheck IR value names to avoid conflicts with scripted names",
196    )
197    parser.add_argument(
198        "--global-value-regex",
199        nargs="+",
200        default=[],
201        help="List of regular expressions that a global value declaration must match to generate a check (has no effect if checking globals is not enabled)",
202    )
203    parser.add_argument(
204        "--global-hex-value-regex",
205        nargs="+",
206        default=[],
207        help="List of regular expressions such that, for matching global value declarations, literal integer values should be encoded in hex in the associated FileCheck directives",
208    )
209    # FIXME: in 3.9, we can use argparse.BooleanOptionalAction. At that point,
210    # we need to rename the flag to just -generate-body-for-unused-prefixes.
211    parser.add_argument(
212        "--no-generate-body-for-unused-prefixes",
213        action="store_false",
214        dest="gen_unused_prefix_body",
215        default=True,
216        help="Generate a function body that always matches for unused prefixes. This is useful when unused prefixes are desired, and it avoids needing to annotate each FileCheck as allowing them.",
217    )
218    # This is the default when regenerating existing tests. The default when
219    # generating new tests is determined by DEFAULT_VERSION.
220    parser.add_argument(
221        "--version", type=int, default=1, help="The version of output format"
222    )
223    args = parser.parse_args()
224    # TODO: This should not be handled differently from the other options
225    global _verbose, _global_value_regex, _global_hex_value_regex
226    _verbose = args.verbose
227    _global_value_regex = args.global_value_regex
228    _global_hex_value_regex = args.global_hex_value_regex
229    return args
230
231
232def parse_args(parser, argv):
233    args = parser.parse_args(argv)
234    if args.version >= 2:
235        args.function_signature = True
236    # TODO: This should not be handled differently from the other options
237    global _verbose, _global_value_regex, _global_hex_value_regex
238    _verbose = args.verbose
239    _global_value_regex = args.global_value_regex
240    _global_hex_value_regex = args.global_hex_value_regex
241    if "check_globals" in args and args.check_globals == "default":
242        args.check_globals = "none" if args.version < 4 else "smart"
243    return args
244
245
246class InputLineInfo(object):
247    def __init__(self, line, line_number, args, argv):
248        self.line = line
249        self.line_number = line_number
250        self.args = args
251        self.argv = argv
252
253
254class TestInfo(object):
255    def __init__(
256        self,
257        test,
258        parser,
259        script_name,
260        input_lines,
261        args,
262        argv,
263        comment_prefix,
264        argparse_callback,
265    ):
266        self.parser = parser
267        self.argparse_callback = argparse_callback
268        self.path = test
269        self.args = args
270        if args.prefix_filecheck_ir_name:
271            global _prefix_filecheck_ir_name
272            _prefix_filecheck_ir_name = args.prefix_filecheck_ir_name
273        self.argv = argv
274        self.input_lines = input_lines
275        self.run_lines = find_run_lines(test, self.input_lines)
276        self.comment_prefix = comment_prefix
277        if self.comment_prefix is None:
278            if self.path.endswith(".mir") or self.path.endswith(".txt"):
279                self.comment_prefix = "#"
280            elif self.path.endswith(".s"):
281                self.comment_prefix = "//"
282            else:
283                self.comment_prefix = ";"
284        self.autogenerated_note_prefix = self.comment_prefix + " " + UTC_ADVERT
285        self.test_autogenerated_note = self.autogenerated_note_prefix + script_name
286        self.test_autogenerated_note += get_autogennote_suffix(parser, self.args)
287        self.test_unused_note = (
288            self.comment_prefix + self.comment_prefix + " " + UNUSED_NOTE
289        )
290
291    def ro_iterlines(self):
292        for line_num, input_line in enumerate(self.input_lines):
293            args, argv = check_for_command(
294                input_line, self.parser, self.args, self.argv, self.argparse_callback
295            )
296            yield InputLineInfo(input_line, line_num, args, argv)
297
298    def iterlines(self, output_lines):
299        output_lines.append(self.test_autogenerated_note)
300        for line_info in self.ro_iterlines():
301            input_line = line_info.line
302            # Discard any previous script advertising.
303            if input_line.startswith(self.autogenerated_note_prefix):
304                continue
305            self.args = line_info.args
306            self.argv = line_info.argv
307            if not self.args.enabled:
308                output_lines.append(input_line)
309                continue
310            yield line_info
311
312    def get_checks_for_unused_prefixes(
313        self, run_list, used_prefixes: List[str]
314    ) -> List[str]:
315        run_list = [element for element in run_list if element[0] is not None]
316        unused_prefixes = set(
317            [prefix for sublist in run_list for prefix in sublist[0]]
318        ).difference(set(used_prefixes))
319
320        ret = []
321        if not unused_prefixes:
322            return ret
323        ret.append(self.test_unused_note)
324        for unused in sorted(unused_prefixes):
325            ret.append(
326                "{comment} {prefix}: {match_everything}".format(
327                    comment=self.comment_prefix,
328                    prefix=unused,
329                    match_everything=r"""{{.*}}""",
330                )
331            )
332        return ret
333
334
335def itertests(
336    test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None
337):
338    for pattern in test_patterns:
339        # On Windows we must expand the patterns ourselves.
340        tests_list = glob.glob(pattern)
341        if not tests_list:
342            warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,))
343            continue
344        for test in tests_list:
345            with open(test) as f:
346                input_lines = [l.rstrip() for l in f]
347            first_line = input_lines[0] if input_lines else ""
348            if UTC_AVOID in first_line:
349                warn("Skipping test that must not be autogenerated: " + test)
350                continue
351            is_regenerate = UTC_ADVERT in first_line
352
353            # If we're generating a new test, set the default version to the latest.
354            argv = sys.argv[:]
355            if not is_regenerate:
356                argv.insert(1, "--version=" + str(DEFAULT_VERSION))
357
358            args = parse_args(parser, argv[1:])
359            if argparse_callback is not None:
360                argparse_callback(args)
361            if is_regenerate:
362                if script_name not in first_line and not args.force_update:
363                    warn(
364                        "Skipping test which wasn't autogenerated by " + script_name,
365                        test,
366                    )
367                    continue
368                args, argv = check_for_command(
369                    first_line, parser, args, argv, argparse_callback
370                )
371            elif args.update_only:
372                assert UTC_ADVERT not in first_line
373                warn("Skipping test which isn't autogenerated: " + test)
374                continue
375            final_input_lines = []
376            for l in input_lines:
377                if UNUSED_NOTE in l:
378                    break
379                final_input_lines.append(l)
380            yield TestInfo(
381                test,
382                parser,
383                script_name,
384                final_input_lines,
385                args,
386                argv,
387                comment_prefix,
388                argparse_callback,
389            )
390
391
392def should_add_line_to_output(
393    input_line,
394    prefix_set,
395    *,
396    skip_global_checks=False,
397    skip_same_checks=False,
398    comment_marker=";",
399):
400    # Skip any blank comment lines in the IR.
401    if not skip_global_checks and input_line.strip() == comment_marker:
402        return False
403    # Skip a special double comment line we use as a separator.
404    if input_line.strip() == comment_marker + SEPARATOR:
405        return False
406    # Skip any blank lines in the IR.
407    # if input_line.strip() == '':
408    #  return False
409    # And skip any CHECK lines. We're building our own.
410    m = CHECK_RE.match(input_line)
411    if m and m.group(1) in prefix_set:
412        if skip_same_checks and CHECK_SAME_RE.match(input_line):
413            # The previous CHECK line was removed, so don't leave this dangling
414            return False
415        if skip_global_checks:
416            # Skip checks only if they are of global value definitions
417            global_ir_value_re = re.compile(r"(\[\[|@)", flags=(re.M))
418            is_global = global_ir_value_re.search(input_line)
419            return not is_global
420        return False
421
422    return True
423
424
425def collect_original_check_lines(ti: TestInfo, prefix_set: set):
426    """
427    Collect pre-existing check lines into a dictionary `result` which is
428    returned.
429
430    result[func_name][prefix] is filled with a list of right-hand-sides of check
431    lines.
432    """
433    result = collections.defaultdict(lambda: {})
434
435    current_prefix = None
436    current_function = None
437    for input_line_info in ti.ro_iterlines():
438        input_line = input_line_info.line
439        if input_line.lstrip().startswith(";"):
440            m = CHECK_RE.match(input_line)
441            if m is not None:
442                prefix = m.group(1)
443                check_kind = m.group(2)
444                line = input_line[m.end() :].strip()
445
446                if prefix != current_prefix:
447                    current_function = None
448                    current_prefix = None
449
450                if check_kind not in ["LABEL", "SAME"]:
451                    if current_function is not None:
452                        current_function.append(line)
453                    continue
454
455                if check_kind == "SAME":
456                    continue
457
458                if check_kind == "LABEL":
459                    m = IR_FUNCTION_LABEL_RE.match(line)
460                    if m is not None:
461                        func_name = m.group(1)
462                        if (
463                            ti.args.function is not None
464                            and func_name != ti.args.function
465                        ):
466                            # When filtering on a specific function, skip all others.
467                            continue
468
469                        current_prefix = prefix
470                        current_function = result[func_name][prefix] = []
471                        continue
472
473        current_function = None
474
475    return result
476
477
478# Perform lit-like substitutions
479def getSubstitutions(sourcepath):
480    sourcedir = os.path.dirname(sourcepath)
481    return [
482        ("%s", sourcepath),
483        ("%S", sourcedir),
484        ("%p", sourcedir),
485        ("%{pathsep}", os.pathsep),
486    ]
487
488
489def applySubstitutions(s, substitutions):
490    for a, b in substitutions:
491        s = s.replace(a, b)
492    return s
493
494
495# Invoke the tool that is being tested.
496def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False):
497    with open(ir) as ir_file:
498        substitutions = getSubstitutions(ir)
499
500        # TODO Remove the str form which is used by update_test_checks.py and
501        # update_llc_test_checks.py
502        # The safer list form is used by update_cc_test_checks.py
503        if preprocess_cmd:
504            # Allow pre-processing the IR file (e.g. using sed):
505            assert isinstance(
506                preprocess_cmd, str
507            )  # TODO: use a list instead of using shell
508            preprocess_cmd = applySubstitutions(preprocess_cmd, substitutions).strip()
509            if verbose:
510                print(
511                    "Pre-processing input file: ",
512                    ir,
513                    " with command '",
514                    preprocess_cmd,
515                    "'",
516                    sep="",
517                    file=sys.stderr,
518                )
519            pp = subprocess.Popen(
520                preprocess_cmd,
521                shell=True,
522                stdin=subprocess.DEVNULL,
523                stdout=subprocess.PIPE,
524            )
525            ir_file = pp.stdout
526
527        if isinstance(cmd_args, list):
528            args = [applySubstitutions(a, substitutions) for a in cmd_args]
529            stdout = subprocess.check_output([exe] + args, stdin=ir_file)
530        else:
531            stdout = subprocess.check_output(
532                exe + " " + applySubstitutions(cmd_args, substitutions),
533                shell=True,
534                stdin=ir_file,
535            )
536        if sys.version_info[0] > 2:
537            # FYI, if you crashed here with a decode error, your run line probably
538            # results in bitcode or other binary format being written to the pipe.
539            # For an opt test, you probably want to add -S or -disable-output.
540            stdout = stdout.decode()
541    # Fix line endings to unix CR style.
542    return stdout.replace("\r\n", "\n")
543
544
545##### LLVM IR parser
546RUN_LINE_RE = re.compile(r"^\s*(?://|[;#])\s*RUN:\s*(.*)$")
547CHECK_PREFIX_RE = re.compile(r"--?check-prefix(?:es)?[= ](\S+)")
548PREFIX_RE = re.compile("^[a-zA-Z0-9_-]+$")
549CHECK_RE = re.compile(
550    r"^\s*(?://|[;#])\s*([^:]+?)(?:-(NEXT|NOT|DAG|LABEL|SAME|EMPTY))?:"
551)
552CHECK_SAME_RE = re.compile(r"^\s*(?://|[;#])\s*([^:]+?)(?:-SAME)?:")
553
554UTC_ARGS_KEY = "UTC_ARGS:"
555UTC_ARGS_CMD = re.compile(r".*" + UTC_ARGS_KEY + r"\s*(?P<cmd>.*)\s*$")
556UTC_ADVERT = "NOTE: Assertions have been autogenerated by "
557UTC_AVOID = "NOTE: Do not autogenerate"
558UNUSED_NOTE = "NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:"
559
560OPT_FUNCTION_RE = re.compile(
561    r"^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s():,]+?))?\s*define\s+(?P<funcdef_attrs_and_ret>[^@]*)@(?P<func>[\w.$-]+?)\s*"
562    r"(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P<body>.*?)^\}$",
563    flags=(re.M | re.S),
564)
565
566ANALYZE_FUNCTION_RE = re.compile(
567    r"^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.$-]+?)\':"
568    r"\s*\n(?P<body>.*)$",
569    flags=(re.X | re.S),
570)
571
572LOOP_PASS_DEBUG_RE = re.compile(
573    r"^\s*\'(?P<func>[\w.$-]+?)\'[^\n]*" r"\s*\n(?P<body>.*)$", flags=(re.X | re.S)
574)
575
576IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(')
577IR_FUNCTION_LABEL_RE = re.compile(
578    r'^\s*(?:define\s+(?:internal\s+)?[^@]*)?@"?([\w.$-]+)"?\s*\('
579)
580TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
581TRIPLE_ARG_RE = re.compile(r"-m?triple[= ]([^ ]+)")
582MARCH_ARG_RE = re.compile(r"-march[= ]([^ ]+)")
583DEBUG_ONLY_ARG_RE = re.compile(r"-debug-only[= ]([^ ]+)")
584
585SCRUB_LEADING_WHITESPACE_RE = re.compile(r"^(\s+)")
586SCRUB_WHITESPACE_RE = re.compile(r"(?!^(|  \w))[ \t]+", flags=re.M)
587SCRUB_PRESERVE_LEADING_WHITESPACE_RE = re.compile(r"((?!^)[ \t]*(\S))[ \t]+")
588SCRUB_TRAILING_WHITESPACE_RE = re.compile(r"[ \t]+$", flags=re.M)
589SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE
590SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(
591    r"([ \t]|(#[0-9]+))+$", flags=re.M
592)
593SCRUB_KILL_COMMENT_RE = re.compile(r"^ *#+ +kill:.*\n")
594SCRUB_LOOP_COMMENT_RE = re.compile(
595    r"# =>This Inner Loop Header:.*|# in Loop:.*", flags=re.M
596)
597SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r"(?<=\S)+[ \t]*#$", flags=re.M)
598
599SEPARATOR = "."
600
601
602def error(msg, test_file=None):
603    if test_file:
604        msg = "{}: {}".format(msg, test_file)
605    print("ERROR: {}".format(msg), file=sys.stderr)
606
607
608def warn(msg, test_file=None):
609    if test_file:
610        msg = "{}: {}".format(msg, test_file)
611    print("WARNING: {}".format(msg), file=sys.stderr)
612
613
614def debug(*args, **kwargs):
615    # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs):
616    if "file" not in kwargs:
617        kwargs["file"] = sys.stderr
618    if _verbose:
619        print(*args, **kwargs)
620
621
622def find_run_lines(test, lines):
623    debug("Scanning for RUN lines in test file:", test)
624    raw_lines = [m.group(1) for m in [RUN_LINE_RE.match(l) for l in lines] if m]
625    run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
626    for l in raw_lines[1:]:
627        if run_lines[-1].endswith("\\"):
628            run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
629        else:
630            run_lines.append(l)
631    debug("Found {} RUN lines in {}:".format(len(run_lines), test))
632    for l in run_lines:
633        debug("  RUN: {}".format(l))
634    return run_lines
635
636
637def get_triple_from_march(march):
638    triples = {
639        "amdgcn": "amdgcn",
640        "r600": "r600",
641        "mips": "mips",
642        "nvptx64": "nvptx64",
643        "sparc": "sparc",
644        "hexagon": "hexagon",
645        "ve": "ve",
646    }
647    for prefix, triple in triples.items():
648        if march.startswith(prefix):
649            return triple
650    print("Cannot find a triple. Assume 'x86'", file=sys.stderr)
651    return "x86"
652
653
654def apply_filters(line, filters):
655    has_filter = False
656    for f in filters:
657        if not f.is_filter_out:
658            has_filter = True
659        if f.search(line):
660            return False if f.is_filter_out else True
661    # If we only used filter-out, keep the line, otherwise discard it since no
662    # filter matched.
663    return False if has_filter else True
664
665
666def do_filter(body, filters):
667    return (
668        body
669        if not filters
670        else "\n".join(
671            filter(lambda line: apply_filters(line, filters), body.splitlines())
672        )
673    )
674
675
676def scrub_body(body):
677    # Scrub runs of whitespace out of the assembly, but leave the leading
678    # whitespace in place.
679    body = SCRUB_PRESERVE_LEADING_WHITESPACE_RE.sub(lambda m: m.group(2) + " ", body)
680
681    # Expand the tabs used for indentation.
682    body = str.expandtabs(body, 2)
683    # Strip trailing whitespace.
684    body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r"", body)
685    return body
686
687
688def do_scrub(body, scrubber, scrubber_args, extra):
689    if scrubber_args:
690        local_args = copy.deepcopy(scrubber_args)
691        local_args[0].extra_scrub = extra
692        return scrubber(body, *local_args)
693    return scrubber(body, *scrubber_args)
694
695
696# Build up a dictionary of all the function bodies.
697class function_body(object):
698    def __init__(
699        self,
700        string,
701        extra,
702        funcdef_attrs_and_ret,
703        args_and_sig,
704        attrs,
705        func_name_separator,
706        ginfo,
707    ):
708        self.scrub = string
709        self.extrascrub = extra
710        self.funcdef_attrs_and_ret = funcdef_attrs_and_ret
711        self.args_and_sig = args_and_sig
712        self.attrs = attrs
713        self.func_name_separator = func_name_separator
714        self._ginfo = ginfo
715
716    def is_same_except_arg_names(
717        self, extrascrub, funcdef_attrs_and_ret, args_and_sig, attrs
718    ):
719        arg_names = set()
720
721        def drop_arg_names(match):
722            nameless_value = self._ginfo.get_nameless_value_from_match(match)
723            if nameless_value.check_key == "%":
724                arg_names.add(self._ginfo.get_name_from_match(match))
725                substitute = ""
726            else:
727                substitute = match.group(2)
728            return match.group(1) + substitute + match.group(match.lastindex)
729
730        def repl_arg_names(match):
731            nameless_value = self._ginfo.get_nameless_value_from_match(match)
732            if (
733                nameless_value.check_key == "%"
734                and self._ginfo.get_name_from_match(match) in arg_names
735            ):
736                return match.group(1) + match.group(match.lastindex)
737            return match.group(1) + match.group(2) + match.group(match.lastindex)
738
739        if self.funcdef_attrs_and_ret != funcdef_attrs_and_ret:
740            return False
741        if self.attrs != attrs:
742            return False
743
744        regexp = self._ginfo.get_regexp()
745        ans0 = regexp.sub(drop_arg_names, self.args_and_sig)
746        ans1 = regexp.sub(drop_arg_names, args_and_sig)
747        if ans0 != ans1:
748            return False
749        if self._ginfo.is_asm():
750            # Check without replacements, the replacements are not applied to the
751            # body for backend checks.
752            return self.extrascrub == extrascrub
753
754        es0 = regexp.sub(repl_arg_names, self.extrascrub)
755        es1 = regexp.sub(repl_arg_names, extrascrub)
756        es0 = SCRUB_IR_COMMENT_RE.sub(r"", es0)
757        es1 = SCRUB_IR_COMMENT_RE.sub(r"", es1)
758        return es0 == es1
759
760    def __str__(self):
761        return self.scrub
762
763
764class FunctionTestBuilder:
765    def __init__(self, run_list, flags, scrubber_args, path, ginfo):
766        self._verbose = flags.verbose
767        self._record_args = flags.function_signature
768        self._check_attributes = flags.check_attributes
769        # Strip double-quotes if input was read by UTC_ARGS
770        self._filters = (
771            list(
772                map(
773                    lambda f: Filter(
774                        re.compile(f.pattern().strip('"'), f.flags()), f.is_filter_out
775                    ),
776                    flags.filters,
777                )
778            )
779            if flags.filters
780            else []
781        )
782        self._scrubber_args = scrubber_args
783        self._path = path
784        self._ginfo = ginfo
785        # Strip double-quotes if input was read by UTC_ARGS
786        self._replace_value_regex = list(
787            map(lambda x: x.strip('"'), flags.replace_value_regex)
788        )
789        self._func_dict = {}
790        self._func_order = {}
791        self._global_var_dict = {}
792        self._processed_prefixes = set()
793        for tuple in run_list:
794            for prefix in tuple[0]:
795                self._func_dict.update({prefix: dict()})
796                self._func_order.update({prefix: []})
797                self._global_var_dict.update({prefix: dict()})
798
799    def finish_and_get_func_dict(self):
800        for prefix in self.get_failed_prefixes():
801            warn(
802                "Prefix %s had conflicting output from different RUN lines for all functions in test %s"
803                % (
804                    prefix,
805                    self._path,
806                )
807            )
808        return self._func_dict
809
810    def func_order(self):
811        return self._func_order
812
813    def global_var_dict(self):
814        return self._global_var_dict
815
816    def is_filtered(self):
817        return bool(self._filters)
818
819    def process_run_line(self, function_re, scrubber, raw_tool_output, prefixes):
820        build_global_values_dictionary(
821            self._global_var_dict, raw_tool_output, prefixes, self._ginfo
822        )
823        for m in function_re.finditer(raw_tool_output):
824            if not m:
825                continue
826            func = m.group("func")
827            body = m.group("body")
828            # func_name_separator is the string that is placed right after function name at the
829            # beginning of assembly function definition. In most assemblies, that is just a
830            # colon: `foo:`. But, for example, in nvptx it is a brace: `foo(`. If is_backend is
831            # False, just assume that separator is an empty string.
832            if self._ginfo.is_asm():
833                # Use ':' as default separator.
834                func_name_separator = (
835                    m.group("func_name_separator")
836                    if "func_name_separator" in m.groupdict()
837                    else ":"
838                )
839            else:
840                func_name_separator = ""
841            attrs = m.group("attrs") if self._check_attributes else ""
842            funcdef_attrs_and_ret = (
843                m.group("funcdef_attrs_and_ret") if self._record_args else ""
844            )
845            # Determine if we print arguments, the opening brace, or nothing after the
846            # function name
847            if self._record_args and "args_and_sig" in m.groupdict():
848                args_and_sig = scrub_body(m.group("args_and_sig").strip())
849            elif "args_and_sig" in m.groupdict():
850                args_and_sig = "("
851            else:
852                args_and_sig = ""
853            filtered_body = do_filter(body, self._filters)
854            scrubbed_body = do_scrub(
855                filtered_body, scrubber, self._scrubber_args, extra=False
856            )
857            scrubbed_extra = do_scrub(
858                filtered_body, scrubber, self._scrubber_args, extra=True
859            )
860            if "analysis" in m.groupdict():
861                analysis = m.group("analysis")
862                if analysis not in SUPPORTED_ANALYSES:
863                    warn("Unsupported analysis mode: %r!" % (analysis,))
864            if func.startswith("stress"):
865                # We only use the last line of the function body for stress tests.
866                scrubbed_body = "\n".join(scrubbed_body.splitlines()[-1:])
867            if self._verbose:
868                print("Processing function: " + func, file=sys.stderr)
869                for l in scrubbed_body.splitlines():
870                    print("  " + l, file=sys.stderr)
871            for prefix in prefixes:
872                # Replace function names matching the regex.
873                for regex in self._replace_value_regex:
874                    # Pattern that matches capture groups in the regex in leftmost order.
875                    group_regex = re.compile(r"\(.*?\)")
876                    # Replace function name with regex.
877                    match = re.match(regex, func)
878                    if match:
879                        func_repl = regex
880                        # Replace any capture groups with their matched strings.
881                        for g in match.groups():
882                            func_repl = group_regex.sub(
883                                re.escape(g), func_repl, count=1
884                            )
885                        func = re.sub(func_repl, "{{" + func_repl + "}}", func)
886
887                    # Replace all calls to regex matching functions.
888                    matches = re.finditer(regex, scrubbed_body)
889                    for match in matches:
890                        func_repl = regex
891                        # Replace any capture groups with their matched strings.
892                        for g in match.groups():
893                            func_repl = group_regex.sub(
894                                re.escape(g), func_repl, count=1
895                            )
896                        # Substitute function call names that match the regex with the same
897                        # capture groups set.
898                        scrubbed_body = re.sub(
899                            func_repl, "{{" + func_repl + "}}", scrubbed_body
900                        )
901
902                if func in self._func_dict[prefix]:
903                    if self._func_dict[prefix][func] is not None and (
904                        str(self._func_dict[prefix][func]) != scrubbed_body
905                        or self._func_dict[prefix][func].args_and_sig != args_and_sig
906                        or self._func_dict[prefix][func].attrs != attrs
907                        or self._func_dict[prefix][func].funcdef_attrs_and_ret
908                        != funcdef_attrs_and_ret
909                    ):
910                        if self._func_dict[prefix][func].is_same_except_arg_names(
911                            scrubbed_extra,
912                            funcdef_attrs_and_ret,
913                            args_and_sig,
914                            attrs,
915                        ):
916                            self._func_dict[prefix][func].scrub = scrubbed_extra
917                            self._func_dict[prefix][func].args_and_sig = args_and_sig
918                        else:
919                            # This means a previous RUN line produced a body for this function
920                            # that is different from the one produced by this current RUN line,
921                            # so the body can't be common across RUN lines. We use None to
922                            # indicate that.
923                            self._func_dict[prefix][func] = None
924                else:
925                    if prefix not in self._processed_prefixes:
926                        self._func_dict[prefix][func] = function_body(
927                            scrubbed_body,
928                            scrubbed_extra,
929                            funcdef_attrs_and_ret,
930                            args_and_sig,
931                            attrs,
932                            func_name_separator,
933                            self._ginfo,
934                        )
935                        self._func_order[prefix].append(func)
936                    else:
937                        # An earlier RUN line used this check prefixes but didn't produce
938                        # a body for this function. This happens in Clang tests that use
939                        # preprocesser directives to exclude individual functions from some
940                        # RUN lines.
941                        self._func_dict[prefix][func] = None
942
943    def processed_prefixes(self, prefixes):
944        """
945        Mark a set of prefixes as having had at least one applicable RUN line fully
946        processed. This is used to filter out function bodies that don't have
947        outputs for all RUN lines.
948        """
949        self._processed_prefixes.update(prefixes)
950
951    def get_failed_prefixes(self):
952        # This returns the list of those prefixes that failed to match any function,
953        # because there were conflicting bodies produced by different RUN lines, in
954        # all instances of the prefix.
955        for prefix in self._func_dict:
956            if self._func_dict[prefix] and (
957                not [
958                    fct
959                    for fct in self._func_dict[prefix]
960                    if self._func_dict[prefix][fct] is not None
961                ]
962            ):
963                yield prefix
964
965
966##### Generator of LLVM IR CHECK lines
967
968SCRUB_IR_COMMENT_RE = re.compile(r"\s*;.*")
969
970# TODO: We should also derive check lines for global, debug, loop declarations, etc..
971
972
973class NamelessValue:
974    """
975    A NamelessValue object represents a type of value in the IR whose "name" we
976    generalize in the generated check lines; where the "name" could be an actual
977    name (as in e.g. `@some_global` or `%x`) or just a number (as in e.g. `%12`
978    or `!4`).
979    """
980
981    def __init__(
982        self,
983        check_prefix,
984        check_key,
985        ir_prefix,
986        ir_regexp,
987        global_ir_rhs_regexp,
988        *,
989        is_before_functions=False,
990        is_number=False,
991        replace_number_with_counter=False,
992        match_literally=False,
993        interlaced_with_previous=False,
994        ir_suffix=r"",
995    ):
996        self.check_prefix = check_prefix
997        self.check_key = check_key
998        self.ir_prefix = ir_prefix
999        self.ir_regexp = ir_regexp
1000        self.ir_suffix = ir_suffix
1001        self.global_ir_rhs_regexp = global_ir_rhs_regexp
1002        self.is_before_functions = is_before_functions
1003        self.is_number = is_number
1004        # Some variable numbers (e.g. MCINST1234) will change based on unrelated
1005        # modifications to LLVM, replace those with an incrementing counter.
1006        self.replace_number_with_counter = replace_number_with_counter
1007        self.match_literally = match_literally
1008        self.interlaced_with_previous = interlaced_with_previous
1009        self.variable_mapping = {}
1010
1011    # Return true if this kind of IR value is defined "locally" to functions,
1012    # which we assume is only the case precisely for LLVM IR local values.
1013    def is_local_def_ir_value(self):
1014        return self.check_key == "%"
1015
1016    # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals
1017    def get_ir_regex(self):
1018        # for backwards compatibility we check locals with '.*'
1019        if self.is_local_def_ir_value():
1020            return ".*"
1021        return self.ir_regexp
1022
1023    # Create a FileCheck variable name based on an IR name.
1024    def get_value_name(self, var: str, check_prefix: str):
1025        var = var.replace("!", "")
1026        if self.replace_number_with_counter:
1027            assert var
1028            replacement = self.variable_mapping.get(var, None)
1029            if replacement is None:
1030                # Replace variable with an incrementing counter
1031                replacement = str(len(self.variable_mapping) + 1)
1032                self.variable_mapping[var] = replacement
1033            var = replacement
1034        # This is a nameless value, prepend check_prefix.
1035        if var.isdigit():
1036            var = check_prefix + var
1037        else:
1038            # This is a named value that clashes with the check_prefix, prepend with
1039            # _prefix_filecheck_ir_name, if it has been defined.
1040            if (
1041                may_clash_with_default_check_prefix_name(check_prefix, var)
1042                and _prefix_filecheck_ir_name
1043            ):
1044                var = _prefix_filecheck_ir_name + var
1045        var = var.replace(".", "_")
1046        var = var.replace("-", "_")
1047        return var.upper()
1048
1049    def get_affixes_from_match(self, match):
1050        prefix = re.match(self.ir_prefix, match.group(2)).group(0)
1051        suffix = re.search(self.ir_suffix + "$", match.group(2)).group(0)
1052        return prefix, suffix
1053
1054
1055class GeneralizerInfo:
1056    """
1057    A GeneralizerInfo object holds information about how check lines should be generalized
1058    (e.g., variable names replaced by FileCheck meta variables) as well as per-test-file
1059    state (e.g. information about IR global variables).
1060    """
1061
1062    MODE_IR = 0
1063    MODE_ASM = 1
1064    MODE_ANALYZE = 2
1065
1066    def __init__(
1067        self,
1068        version,
1069        mode,
1070        nameless_values: List[NamelessValue],
1071        regexp_prefix,
1072        regexp_suffix,
1073    ):
1074        self._version = version
1075        self._mode = mode
1076        self._nameless_values = nameless_values
1077
1078        self._regexp_prefix = regexp_prefix
1079        self._regexp_suffix = regexp_suffix
1080
1081        self._regexp, _ = self._build_regexp(False, False)
1082        (
1083            self._unstable_globals_regexp,
1084            self._unstable_globals_values,
1085        ) = self._build_regexp(True, True)
1086
1087    def _build_regexp(self, globals_only, unstable_only):
1088        matches = []
1089        values = []
1090        for nameless_value in self._nameless_values:
1091            is_global = nameless_value.global_ir_rhs_regexp is not None
1092            if globals_only and not is_global:
1093                continue
1094            if unstable_only and nameless_value.match_literally:
1095                continue
1096
1097            match = f"(?:{nameless_value.ir_prefix}({nameless_value.ir_regexp}){nameless_value.ir_suffix})"
1098            if self.is_ir() and not globals_only and is_global:
1099                match = "^" + match
1100            matches.append(match)
1101            values.append(nameless_value)
1102
1103        regexp_string = r"|".join(matches)
1104
1105        return (
1106            re.compile(
1107                self._regexp_prefix + r"(" + regexp_string + r")" + self._regexp_suffix
1108            ),
1109            values,
1110        )
1111
1112    def get_version(self):
1113        return self._version
1114
1115    def is_ir(self):
1116        return self._mode == GeneralizerInfo.MODE_IR
1117
1118    def is_asm(self):
1119        return self._mode == GeneralizerInfo.MODE_ASM
1120
1121    def is_analyze(self):
1122        return self._mode == GeneralizerInfo.MODE_ANALYZE
1123
1124    def get_nameless_values(self):
1125        return self._nameless_values
1126
1127    def get_regexp(self):
1128        return self._regexp
1129
1130    def get_unstable_globals_regexp(self):
1131        return self._unstable_globals_regexp
1132
1133    # The entire match is group 0, the prefix has one group (=1), the entire
1134    # IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start.
1135    FIRST_NAMELESS_GROUP_IN_MATCH = 3
1136
1137    def get_match_info(self, match):
1138        """
1139        Returns (name, nameless_value) for the given match object
1140        """
1141        if match.re == self._regexp:
1142            values = self._nameless_values
1143        else:
1144            match.re == self._unstable_globals_regexp
1145            values = self._unstable_globals_values
1146        for i in range(len(values)):
1147            g = match.group(i + GeneralizerInfo.FIRST_NAMELESS_GROUP_IN_MATCH)
1148            if g is not None:
1149                return g, values[i]
1150        error("Unable to identify the kind of IR value from the match!")
1151        return None, None
1152
1153    # See get_idx_from_match
1154    def get_name_from_match(self, match):
1155        return self.get_match_info(match)[0]
1156
1157    def get_nameless_value_from_match(self, match) -> NamelessValue:
1158        return self.get_match_info(match)[1]
1159
1160
1161def make_ir_generalizer(version):
1162    values = []
1163
1164    if version >= 5:
1165        values += [
1166            NamelessValue(r"BB", "%", r"label %", r"[\w$.-]+?", None),
1167            NamelessValue(r"BB", "%", r"^", r"[\w$.-]+?", None, ir_suffix=r":"),
1168        ]
1169
1170    values += [
1171        #            check_prefix   check_key  ir_prefix           ir_regexp                global_ir_rhs_regexp
1172        NamelessValue(r"TMP", "%", r"%", r"[\w$.-]+?", None),
1173        NamelessValue(r"ATTR", "#", r"#", r"[0-9]+", None),
1174        NamelessValue(r"ATTR", "#", r"attributes #", r"[0-9]+", r"{[^}]*}"),
1175        NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", None),
1176        NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", r".+", is_before_functions=True),
1177        NamelessValue(
1178            r"GLOBNAMED",
1179            "@",
1180            r"@",
1181            r"[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*",
1182            r".+",
1183            is_before_functions=True,
1184            match_literally=True,
1185            interlaced_with_previous=True,
1186        ),
1187        NamelessValue(r"DBG", "!", r"!dbg ", r"![0-9]+", None),
1188        NamelessValue(r"DIASSIGNID", "!", r"!DIAssignID ", r"![0-9]+", None),
1189        NamelessValue(r"PROF", "!", r"!prof ", r"![0-9]+", None),
1190        NamelessValue(r"TBAA", "!", r"!tbaa ", r"![0-9]+", None),
1191        NamelessValue(r"TBAA_STRUCT", "!", r"!tbaa.struct ", r"![0-9]+", None),
1192        NamelessValue(r"RNG", "!", r"!range ", r"![0-9]+", None),
1193        NamelessValue(r"LOOP", "!", r"!llvm.loop ", r"![0-9]+", None),
1194        NamelessValue(r"META", "!", r"", r"![0-9]+", r"(?:distinct |)!.*"),
1195        NamelessValue(r"ACC_GRP", "!", r"!llvm.access.group ", r"![0-9]+", None),
1196        NamelessValue(r"META", "!", r"![a-z.]+ ", r"![0-9]+", None),
1197        NamelessValue(r"META", "!", r"[, (]", r"![0-9]+", None),
1198    ]
1199
1200    prefix = r"(\s*)"
1201    suffix = r"([,\s\(\)\}]|\Z)"
1202
1203    # values = [
1204    #     nameless_value
1205    #     for nameless_value in IR_NAMELESS_VALUES
1206    #     if not (globals_only and nameless_value.global_ir_rhs_regexp is None) and
1207    #        not (unstable_ids_only and nameless_value.match_literally)
1208    # ]
1209
1210    return GeneralizerInfo(version, GeneralizerInfo.MODE_IR, values, prefix, suffix)
1211
1212
1213def make_asm_generalizer(version):
1214    values = [
1215        NamelessValue(
1216            r"MCINST",
1217            "Inst#",
1218            "<MCInst #",
1219            r"\d+",
1220            r".+",
1221            is_number=True,
1222            replace_number_with_counter=True,
1223        ),
1224        NamelessValue(
1225            r"MCREG",
1226            "Reg:",
1227            "<MCOperand Reg:",
1228            r"\d+",
1229            r".+",
1230            is_number=True,
1231            replace_number_with_counter=True,
1232        ),
1233    ]
1234
1235    prefix = r"((?:#|//)\s*)"
1236    suffix = r"([>\s]|\Z)"
1237
1238    return GeneralizerInfo(version, GeneralizerInfo.MODE_ASM, values, prefix, suffix)
1239
1240
1241def make_analyze_generalizer(version):
1242    values = [
1243        NamelessValue(
1244            r"GRP",
1245            "#",
1246            r"",
1247            r"0x[0-9a-f]+",
1248            None,
1249            replace_number_with_counter=True,
1250        ),
1251    ]
1252
1253    prefix = r"(\s*)"
1254    suffix = r"(\)?:)"
1255
1256    return GeneralizerInfo(
1257        version, GeneralizerInfo.MODE_ANALYZE, values, prefix, suffix
1258    )
1259
1260
1261# Return true if var clashes with the scripted FileCheck check_prefix.
1262def may_clash_with_default_check_prefix_name(check_prefix, var):
1263    return check_prefix and re.match(
1264        r"^" + check_prefix + r"[0-9]+?$", var, re.IGNORECASE
1265    )
1266
1267
1268def find_diff_matching(lhs: List[str], rhs: List[str]) -> List[tuple]:
1269    """
1270    Find a large ordered matching between strings in lhs and rhs.
1271
1272    Think of this as finding the *unchanged* lines in a diff, where the entries
1273    of lhs and rhs are lines of the files being diffed.
1274
1275    Returns a list of matched (lhs_idx, rhs_idx) pairs.
1276    """
1277
1278    if not lhs or not rhs:
1279        return []
1280
1281    # Collect matches in reverse order.
1282    matches = []
1283
1284    # First, collect a set of candidate matching edges. We limit this to a
1285    # constant multiple of the input size to avoid quadratic runtime.
1286    patterns = collections.defaultdict(lambda: ([], []))
1287
1288    for idx in range(len(lhs)):
1289        patterns[lhs[idx]][0].append(idx)
1290    for idx in range(len(rhs)):
1291        patterns[rhs[idx]][1].append(idx)
1292
1293    multiple_patterns = []
1294
1295    candidates = []
1296    for pattern in patterns.values():
1297        if not pattern[0] or not pattern[1]:
1298            continue
1299
1300        if len(pattern[0]) == len(pattern[1]) == 1:
1301            candidates.append((pattern[0][0], pattern[1][0]))
1302        else:
1303            multiple_patterns.append(pattern)
1304
1305    multiple_patterns.sort(key=lambda pattern: len(pattern[0]) * len(pattern[1]))
1306
1307    for pattern in multiple_patterns:
1308        if len(candidates) + len(pattern[0]) * len(pattern[1]) > 2 * (
1309            len(lhs) + len(rhs)
1310        ):
1311            break
1312        for lhs_idx in pattern[0]:
1313            for rhs_idx in pattern[1]:
1314                candidates.append((lhs_idx, rhs_idx))
1315
1316    if not candidates:
1317        # The LHS and RHS either share nothing in common, or lines are just too
1318        # identical. In that case, let's give up and not match anything.
1319        return []
1320
1321    # Compute a maximal crossing-free matching via an algorithm that is
1322    # inspired by a mixture of dynamic programming and line-sweeping in
1323    # discrete geometry.
1324    #
1325    # I would be surprised if this algorithm didn't exist somewhere in the
1326    # literature, but I found it without consciously recalling any
1327    # references, so you'll have to make do with the explanation below.
1328    # Sorry.
1329    #
1330    # The underlying graph is bipartite:
1331    #  - nodes on the LHS represent lines in the original check
1332    #  - nodes on the RHS represent lines in the new (updated) check
1333    #
1334    # Nodes are implicitly sorted by the corresponding line number.
1335    # Edges (unique_matches) are sorted by the line number on the LHS.
1336    #
1337    # Here's the geometric intuition for the algorithm.
1338    #
1339    #  * Plot the edges as points in the plane, with the original line
1340    #    number on the X axis and the updated line number on the Y axis.
1341    #  * The goal is to find a longest "chain" of points where each point
1342    #    is strictly above and to the right of the previous point.
1343    #  * The algorithm proceeds by sweeping a vertical line from left to
1344    #    right.
1345    #  * The algorithm maintains a table where `table[N]` answers the
1346    #    question "What is currently the 'best' way to build a chain of N+1
1347    #    points to the left of the vertical line". Here, 'best' means
1348    #    that the last point of the chain is a as low as possible (minimal
1349    #    Y coordinate).
1350    #   * `table[N]` is `(y, point_idx)` where `point_idx` is the index of
1351    #     the last point in the chain and `y` is its Y coordinate
1352    #   * A key invariant is that the Y values in the table are
1353    #     monotonically increasing
1354    #  * Thanks to these properties, the table can be used to answer the
1355    #    question "What is the longest chain that can be built to the left
1356    #    of the vertical line using only points below a certain Y value",
1357    #    using a binary search over the table.
1358    #  * The algorithm also builds a backlink structure in which every point
1359    #    links back to the previous point on a best (longest) chain ending
1360    #    at that point
1361    #
1362    # The core loop of the algorithm sweeps the line and updates the table
1363    # and backlink structure for every point that we cross during the sweep.
1364    # Therefore, the algorithm is trivially O(M log M) in the number of
1365    # points.
1366    candidates.sort(key=lambda candidate: (candidate[0], -candidate[1]))
1367
1368    backlinks = []
1369    table_rhs_idx = []
1370    table_candidate_idx = []
1371    for _, rhs_idx in candidates:
1372        candidate_idx = len(backlinks)
1373        ti = bisect.bisect_left(table_rhs_idx, rhs_idx)
1374
1375        # Update the table to record a best chain ending in the current point.
1376        # There always is one, and if any of the previously visited points had
1377        # a higher Y coordinate, then there is always a previously recorded best
1378        # chain that can be improved upon by using the current point.
1379        #
1380        # There is only one case where there is some ambiguity. If the
1381        # pre-existing entry table[ti] has the same Y coordinate / rhs_idx as
1382        # the current point (this can only happen if the same line appeared
1383        # multiple times on the LHS), then we could choose to keep the
1384        # previously recorded best chain instead. That would bias the algorithm
1385        # differently but should have no systematic impact on the quality of the
1386        # result.
1387        if ti < len(table_rhs_idx):
1388            table_rhs_idx[ti] = rhs_idx
1389            table_candidate_idx[ti] = candidate_idx
1390        else:
1391            table_rhs_idx.append(rhs_idx)
1392            table_candidate_idx.append(candidate_idx)
1393        if ti > 0:
1394            backlinks.append(table_candidate_idx[ti - 1])
1395        else:
1396            backlinks.append(None)
1397
1398    # Commit to names in the matching by walking the backlinks. Recursively
1399    # attempt to fill in more matches in-between.
1400    match_idx = table_candidate_idx[-1]
1401    while match_idx is not None:
1402        current = candidates[match_idx]
1403        matches.append(current)
1404        match_idx = backlinks[match_idx]
1405
1406    matches.reverse()
1407    return matches
1408
1409
1410VARIABLE_TAG = "[[@@]]"
1411METAVAR_RE = re.compile(r"\[\[([A-Z0-9_]+)(?::[^]]+)?\]\]")
1412NUMERIC_SUFFIX_RE = re.compile(r"[0-9]*$")
1413
1414
1415class TestVar:
1416    def __init__(self, nameless_value: NamelessValue, prefix: str, suffix: str):
1417        self._nameless_value = nameless_value
1418
1419        self._prefix = prefix
1420        self._suffix = suffix
1421
1422    def seen(self, nameless_value: NamelessValue, prefix: str, suffix: str):
1423        if prefix != self._prefix:
1424            self._prefix = ""
1425        if suffix != self._suffix:
1426            self._suffix = ""
1427
1428    def get_variable_name(self, text):
1429        return self._nameless_value.get_value_name(
1430            text, self._nameless_value.check_prefix
1431        )
1432
1433    def get_def(self, name, prefix, suffix):
1434        if self._nameless_value.is_number:
1435            return f"{prefix}[[#{name}:]]{suffix}"
1436        if self._prefix:
1437            assert self._prefix == prefix
1438            prefix = ""
1439        if self._suffix:
1440            assert self._suffix == suffix
1441            suffix = ""
1442        return f"{prefix}[[{name}:{self._prefix}{self._nameless_value.get_ir_regex()}{self._suffix}]]{suffix}"
1443
1444    def get_use(self, name, prefix, suffix):
1445        if self._nameless_value.is_number:
1446            return f"{prefix}[[#{name}]]{suffix}"
1447        if self._prefix:
1448            assert self._prefix == prefix
1449            prefix = ""
1450        if self._suffix:
1451            assert self._suffix == suffix
1452            suffix = ""
1453        return f"{prefix}[[{name}]]{suffix}"
1454
1455
1456class CheckValueInfo:
1457    def __init__(
1458        self,
1459        key,
1460        text,
1461        name: str,
1462        prefix: str,
1463        suffix: str,
1464    ):
1465        # Key for the value, e.g. '%'
1466        self.key = key
1467
1468        # Text to be matched by the FileCheck variable (without any prefix or suffix)
1469        self.text = text
1470
1471        # Name of the FileCheck variable
1472        self.name = name
1473
1474        # Prefix and suffix that were captured by the NamelessValue regular expression
1475        self.prefix = prefix
1476        self.suffix = suffix
1477
1478
1479# Represent a check line in a way that allows us to compare check lines while
1480# ignoring some or all of the FileCheck variable names.
1481class CheckLineInfo:
1482    def __init__(self, line, values):
1483        # Line with all FileCheck variable name occurrences replaced by VARIABLE_TAG
1484        self.line: str = line
1485
1486        # Information on each FileCheck variable name occurrences in the line
1487        self.values: List[CheckValueInfo] = values
1488
1489    def __repr__(self):
1490        return f"CheckLineInfo(line={self.line}, self.values={self.values})"
1491
1492
1493def remap_metavar_names(
1494    old_line_infos: List[CheckLineInfo],
1495    new_line_infos: List[CheckLineInfo],
1496    committed_names: Set[str],
1497) -> Mapping[str, str]:
1498    """
1499    Map all FileCheck variable names that appear in new_line_infos to new
1500    FileCheck variable names in an attempt to reduce the diff from old_line_infos
1501    to new_line_infos.
1502
1503    This is done by:
1504    * Matching old check lines and new check lines using a diffing algorithm
1505      applied after replacing names with wildcards.
1506    * Committing to variable names such that the matched lines become equal
1507      (without wildcards) if possible
1508    * This is done recursively to handle cases where many lines are equal
1509      after wildcard replacement
1510    """
1511    # Initialize uncommitted identity mappings
1512    new_mapping = {}
1513    for line in new_line_infos:
1514        for value in line.values:
1515            new_mapping[value.name] = value.name
1516
1517    # Recursively commit to the identity mapping or find a better one
1518    def recurse(old_begin, old_end, new_begin, new_end):
1519        if old_begin == old_end or new_begin == new_end:
1520            return
1521
1522        # Find a matching of lines where uncommitted names are replaced
1523        # with a placeholder.
1524        def diffify_line(line, mapper):
1525            values = []
1526            for value in line.values:
1527                mapped = mapper(value.name)
1528                values.append(mapped if mapped in committed_names else "?")
1529            return line.line.strip() + " @@@ " + " @ ".join(values)
1530
1531        lhs_lines = [
1532            diffify_line(line, lambda x: x)
1533            for line in old_line_infos[old_begin:old_end]
1534        ]
1535        rhs_lines = [
1536            diffify_line(line, lambda x: new_mapping[x])
1537            for line in new_line_infos[new_begin:new_end]
1538        ]
1539
1540        candidate_matches = find_diff_matching(lhs_lines, rhs_lines)
1541
1542        candidate_matches = [
1543            (old_begin + lhs_idx, new_begin + rhs_idx)
1544            for lhs_idx, rhs_idx in candidate_matches
1545        ]
1546
1547        # Candidate matches may conflict if they require conflicting mappings of
1548        # names. We want to determine a large set of compatible candidates,
1549        # because that leads to a small diff.
1550        #
1551        # We think of the candidates as vertices in a conflict graph. The
1552        # conflict graph has edges between incompatible candidates. We want to
1553        # find a large independent set in this graph.
1554        #
1555        # Greedily selecting candidates and removing incompatible ones has the
1556        # disadvantage that making few bad decisions early on can have huge
1557        # consequences.
1558        #
1559        # Instead, we implicitly compute multiple independent sets by greedily
1560        # assigning a *coloring* to the conflict graph. Then, we select the
1561        # largest color class (which is the largest independent set we found),
1562        # commit to all candidates in it, and recurse.
1563        #
1564        # Note that we don't actually materialize the conflict graph. Instead,
1565        # each color class tracks the information needed to decide implicitly
1566        # whether a vertex conflicts (has an edge to) any of the vertices added
1567        # to the color class so far.
1568        class Color:
1569            def __init__(self):
1570                # (lhs_idx, rhs_idx) of matches in this color
1571                self.matches = []
1572
1573                # rhs_name -> lhs_name mappings required by this color
1574                self.mapping = {}
1575
1576                # lhs_names committed for this color
1577                self.committed = set()
1578
1579        colors = []
1580
1581        for lhs_idx, rhs_idx in candidate_matches:
1582            lhs_line = old_line_infos[lhs_idx]
1583            rhs_line = new_line_infos[rhs_idx]
1584
1585            # We scan through the uncommitted names in the candidate line and
1586            # filter out the color classes to which the candidate could be
1587            # assigned.
1588            #
1589            # Simultaneously, we prepare a new color class in case the candidate
1590            # conflicts with all colors that have been established so far.
1591            compatible_colors = colors[:]
1592            new_color = Color()
1593            new_color.matches.append((lhs_idx, rhs_idx))
1594
1595            for lhs_value, rhs_value in zip(lhs_line.values, rhs_line.values):
1596                if new_mapping[rhs_value.name] in committed_names:
1597                    # The new value has already been committed. If it was mapped
1598                    # to the same name as the original value, we can consider
1599                    # committing other values from this line. Otherwise, we
1600                    # should ignore this line.
1601                    if new_mapping[rhs_value.name] == lhs_value.name:
1602                        continue
1603                    else:
1604                        break
1605
1606                if rhs_value.name in new_color.mapping:
1607                    # Same, but for a possible commit happening on the same line
1608                    if new_color.mapping[rhs_value.name] == lhs_value.name:
1609                        continue
1610                    else:
1611                        break
1612
1613                if (
1614                    lhs_value.name in committed_names
1615                    or lhs_value.name in new_color.committed
1616                ):
1617                    # We can't map this value because the name we would map it
1618                    # to has already been committed for something else. Give up
1619                    # on this line.
1620                    break
1621
1622                new_color.mapping[rhs_value.name] = lhs_value.name
1623                new_color.committed.add(lhs_value.name)
1624
1625                color_idx = 0
1626                while color_idx < len(compatible_colors):
1627                    color = compatible_colors[color_idx]
1628                    compatible = True
1629                    if rhs_value.name in color.mapping:
1630                        compatible = color.mapping[rhs_value.name] == lhs_value.name
1631                    else:
1632                        compatible = lhs_value.name not in color.committed
1633                    if compatible:
1634                        color_idx += 1
1635                    else:
1636                        del compatible_colors[color_idx]
1637            else:
1638                # We never broke out of the loop, which means that at a minimum,
1639                # this line is viable standalone
1640                if compatible_colors:
1641                    color = max(compatible_colors, key=lambda color: len(color.matches))
1642                    color.mapping.update(new_color.mapping)
1643                    color.committed.update(new_color.committed)
1644                    color.matches.append((lhs_idx, rhs_idx))
1645                else:
1646                    colors.append(new_color)
1647
1648        if colors:
1649            # Pick the largest color class. This gives us a large independent
1650            # (non-conflicting) set of candidate matches. Assign all names
1651            # required by the independent set and recurse.
1652            max_color = max(colors, key=lambda color: len(color.matches))
1653
1654            for rhs_var, lhs_var in max_color.mapping.items():
1655                new_mapping[rhs_var] = lhs_var
1656                committed_names.add(lhs_var)
1657
1658                if (
1659                    lhs_var != rhs_var
1660                    and lhs_var in new_mapping
1661                    and new_mapping[lhs_var] == lhs_var
1662                ):
1663                    new_mapping[lhs_var] = "conflict_" + lhs_var
1664
1665            matches = (
1666                [(old_begin - 1, new_begin - 1)]
1667                + max_color.matches
1668                + [(old_end, new_end)]
1669            )
1670
1671            for (lhs_prev, rhs_prev), (lhs_next, rhs_next) in zip(matches, matches[1:]):
1672                recurse(lhs_prev + 1, lhs_next, rhs_prev + 1, rhs_next)
1673
1674    recurse(0, len(old_line_infos), 0, len(new_line_infos))
1675
1676    # Commit to remaining names and resolve conflicts
1677    for new_name, mapped_name in new_mapping.items():
1678        if mapped_name in committed_names:
1679            continue
1680        if not mapped_name.startswith("conflict_"):
1681            assert mapped_name == new_name
1682            committed_names.add(mapped_name)
1683
1684    for new_name, mapped_name in new_mapping.items():
1685        if mapped_name in committed_names:
1686            continue
1687        assert mapped_name.startswith("conflict_")
1688
1689        m = NUMERIC_SUFFIX_RE.search(new_name)
1690        base_name = new_name[: m.start()]
1691        suffix = int(new_name[m.start() :]) if m.start() != m.end() else 1
1692        while True:
1693            candidate = f"{base_name}{suffix}"
1694            if candidate not in committed_names:
1695                new_mapping[new_name] = candidate
1696                committed_names.add(candidate)
1697                break
1698            suffix += 1
1699
1700    return new_mapping
1701
1702
1703def generalize_check_lines(
1704    lines,
1705    ginfo: GeneralizerInfo,
1706    vars_seen,
1707    global_vars_seen,
1708    preserve_names=False,
1709    original_check_lines=None,
1710    *,
1711    unstable_globals_only=False,
1712):
1713    if unstable_globals_only:
1714        regexp = ginfo.get_unstable_globals_regexp()
1715    else:
1716        regexp = ginfo.get_regexp()
1717
1718    multiple_braces_re = re.compile(r"({{+)|(}}+)")
1719
1720    def escape_braces(match_obj):
1721        return "{{" + re.escape(match_obj.group(0)) + "}}"
1722
1723    if ginfo.is_ir():
1724        for i, line in enumerate(lines):
1725            # An IR variable named '%.' matches the FileCheck regex string.
1726            line = line.replace("%.", "%dot")
1727            for regex in _global_hex_value_regex:
1728                if re.match("^@" + regex + " = ", line):
1729                    line = re.sub(
1730                        r"\bi([0-9]+) ([0-9]+)",
1731                        lambda m: "i"
1732                        + m.group(1)
1733                        + " [[#"
1734                        + hex(int(m.group(2)))
1735                        + "]]",
1736                        line,
1737                    )
1738                    break
1739            # Ignore any comments, since the check lines will too.
1740            scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line)
1741            lines[i] = scrubbed_line
1742
1743    if not preserve_names:
1744        committed_names = set(
1745            test_var.get_variable_name(name)
1746            for (name, _), test_var in vars_seen.items()
1747        )
1748        defs = set()
1749
1750        # Collect information about new check lines, and generalize global reference
1751        new_line_infos = []
1752        for line in lines:
1753            filtered_line = ""
1754            values = []
1755            while True:
1756                m = regexp.search(line)
1757                if m is None:
1758                    filtered_line += line
1759                    break
1760
1761                name = ginfo.get_name_from_match(m)
1762                nameless_value = ginfo.get_nameless_value_from_match(m)
1763                prefix, suffix = nameless_value.get_affixes_from_match(m)
1764                if may_clash_with_default_check_prefix_name(
1765                    nameless_value.check_prefix, name
1766                ):
1767                    warn(
1768                        "Change IR value name '%s' or use --prefix-filecheck-ir-name to prevent possible conflict"
1769                        " with scripted FileCheck name." % (name,)
1770                    )
1771
1772                # Record the variable as seen and (for locals) accumulate
1773                # prefixes/suffixes
1774                is_local_def = nameless_value.is_local_def_ir_value()
1775                if is_local_def:
1776                    vars_dict = vars_seen
1777                else:
1778                    vars_dict = global_vars_seen
1779
1780                key = (name, nameless_value.check_key)
1781
1782                if is_local_def:
1783                    test_prefix = prefix
1784                    test_suffix = suffix
1785                else:
1786                    test_prefix = ""
1787                    test_suffix = ""
1788
1789                if key in vars_dict:
1790                    vars_dict[key].seen(nameless_value, test_prefix, test_suffix)
1791                else:
1792                    vars_dict[key] = TestVar(nameless_value, test_prefix, test_suffix)
1793                    defs.add(key)
1794
1795                var = vars_dict[key].get_variable_name(name)
1796
1797                # Replace with a [[@@]] tag, but be sure to keep the spaces and commas.
1798                filtered_line += (
1799                    line[: m.start()] + m.group(1) + VARIABLE_TAG + m.group(m.lastindex)
1800                )
1801                line = line[m.end() :]
1802
1803                values.append(
1804                    CheckValueInfo(
1805                        key=nameless_value.check_key,
1806                        text=name,
1807                        name=var,
1808                        prefix=prefix,
1809                        suffix=suffix,
1810                    )
1811                )
1812
1813            new_line_infos.append(CheckLineInfo(filtered_line, values))
1814
1815        committed_names.update(
1816            test_var.get_variable_name(name)
1817            for (name, _), test_var in global_vars_seen.items()
1818        )
1819
1820        # Collect information about original check lines, if any.
1821        orig_line_infos = []
1822        for line in original_check_lines or []:
1823            filtered_line = ""
1824            values = []
1825            while True:
1826                m = METAVAR_RE.search(line)
1827                if m is None:
1828                    filtered_line += line
1829                    break
1830
1831                # Replace with a [[@@]] tag, but be sure to keep the spaces and commas.
1832                filtered_line += line[: m.start()] + VARIABLE_TAG
1833                line = line[m.end() :]
1834                values.append(
1835                    CheckValueInfo(
1836                        key=None,
1837                        text=None,
1838                        name=m.group(1),
1839                        prefix="",
1840                        suffix="",
1841                    )
1842                )
1843            orig_line_infos.append(CheckLineInfo(filtered_line, values))
1844
1845        # Compute the variable name mapping
1846        mapping = remap_metavar_names(orig_line_infos, new_line_infos, committed_names)
1847
1848        # Apply the variable name mapping
1849        for i, line_info in enumerate(new_line_infos):
1850            line_template = line_info.line
1851            line = ""
1852
1853            for value in line_info.values:
1854                idx = line_template.find(VARIABLE_TAG)
1855                line += line_template[:idx]
1856                line_template = line_template[idx + len(VARIABLE_TAG) :]
1857
1858                key = (value.text, value.key)
1859                if value.key == "%":
1860                    vars_dict = vars_seen
1861                else:
1862                    vars_dict = global_vars_seen
1863
1864                if key in defs:
1865                    line += vars_dict[key].get_def(
1866                        mapping[value.name], value.prefix, value.suffix
1867                    )
1868                    defs.remove(key)
1869                else:
1870                    line += vars_dict[key].get_use(
1871                        mapping[value.name], value.prefix, value.suffix
1872                    )
1873
1874            line += line_template
1875
1876            lines[i] = line
1877
1878    if ginfo.is_analyze():
1879        for i, _ in enumerate(lines):
1880            # Escape multiple {{ or }} as {{}} denotes a FileCheck regex.
1881            scrubbed_line = multiple_braces_re.sub(escape_braces, lines[i])
1882            lines[i] = scrubbed_line
1883
1884    return lines
1885
1886
1887def add_checks(
1888    output_lines,
1889    comment_marker,
1890    prefix_list,
1891    func_dict,
1892    func_name,
1893    check_label_format,
1894    ginfo,
1895    global_vars_seen_dict,
1896    is_filtered,
1897    preserve_names=False,
1898    original_check_lines: Mapping[str, List[str]] = {},
1899):
1900    # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well.
1901    prefix_exclusions = set()
1902    printed_prefixes = []
1903    for p in prefix_list:
1904        checkprefixes = p[0]
1905        # If not all checkprefixes of this run line produced the function we cannot check for it as it does not
1906        # exist for this run line. A subset of the check prefixes might know about the function but only because
1907        # other run lines created it.
1908        if any(
1909            map(
1910                lambda checkprefix: func_name not in func_dict[checkprefix],
1911                checkprefixes,
1912            )
1913        ):
1914            prefix_exclusions |= set(checkprefixes)
1915            continue
1916
1917    # prefix_exclusions is constructed, we can now emit the output
1918    for p in prefix_list:
1919        global_vars_seen = {}
1920        checkprefixes = p[0]
1921        for checkprefix in checkprefixes:
1922            if checkprefix in global_vars_seen_dict:
1923                global_vars_seen.update(global_vars_seen_dict[checkprefix])
1924            else:
1925                global_vars_seen_dict[checkprefix] = {}
1926            if checkprefix in printed_prefixes:
1927                break
1928
1929            # Check if the prefix is excluded.
1930            if checkprefix in prefix_exclusions:
1931                continue
1932
1933            # If we do not have output for this prefix we skip it.
1934            if not func_dict[checkprefix][func_name]:
1935                continue
1936
1937            # Add some space between different check prefixes, but not after the last
1938            # check line (before the test code).
1939            if ginfo.is_asm():
1940                if len(printed_prefixes) != 0:
1941                    output_lines.append(comment_marker)
1942
1943            if checkprefix not in global_vars_seen_dict:
1944                global_vars_seen_dict[checkprefix] = {}
1945
1946            global_vars_seen_before = [key for key in global_vars_seen.keys()]
1947
1948            vars_seen = {}
1949            printed_prefixes.append(checkprefix)
1950            attrs = str(func_dict[checkprefix][func_name].attrs)
1951            attrs = "" if attrs == "None" else attrs
1952            if ginfo.get_version() > 1:
1953                funcdef_attrs_and_ret = func_dict[checkprefix][
1954                    func_name
1955                ].funcdef_attrs_and_ret
1956            else:
1957                funcdef_attrs_and_ret = ""
1958
1959            if attrs:
1960                output_lines.append(
1961                    "%s %s: Function Attrs: %s" % (comment_marker, checkprefix, attrs)
1962                )
1963            args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig)
1964            if args_and_sig:
1965                args_and_sig = generalize_check_lines(
1966                    [args_and_sig],
1967                    ginfo,
1968                    vars_seen,
1969                    global_vars_seen,
1970                    preserve_names,
1971                    original_check_lines=[],
1972                )[0]
1973            func_name_separator = func_dict[checkprefix][func_name].func_name_separator
1974            if "[[" in args_and_sig:
1975                # Captures in label lines are not supported, thus split into a -LABEL
1976                # and a separate -SAME line that contains the arguments with captures.
1977                args_and_sig_prefix = ""
1978                if ginfo.get_version() >= 3 and args_and_sig.startswith("("):
1979                    # Ensure the "(" separating function name and arguments is in the
1980                    # label line. This is required in case of function names that are
1981                    # prefixes of each other. Otherwise, the label line for "foo" might
1982                    # incorrectly match on "foo.specialized".
1983                    args_and_sig_prefix = args_and_sig[0]
1984                    args_and_sig = args_and_sig[1:]
1985
1986                # Removing args_and_sig from the label match line requires
1987                # func_name_separator to be empty. Otherwise, the match will not work.
1988                assert func_name_separator == ""
1989                output_lines.append(
1990                    check_label_format
1991                    % (
1992                        checkprefix,
1993                        funcdef_attrs_and_ret,
1994                        func_name,
1995                        args_and_sig_prefix,
1996                        func_name_separator,
1997                    )
1998                )
1999                output_lines.append(
2000                    "%s %s-SAME: %s" % (comment_marker, checkprefix, args_and_sig)
2001                )
2002            else:
2003                output_lines.append(
2004                    check_label_format
2005                    % (
2006                        checkprefix,
2007                        funcdef_attrs_and_ret,
2008                        func_name,
2009                        args_and_sig,
2010                        func_name_separator,
2011                    )
2012                )
2013            func_body = str(func_dict[checkprefix][func_name]).splitlines()
2014            if not func_body:
2015                # We have filtered everything.
2016                continue
2017
2018            # For ASM output, just emit the check lines.
2019            if ginfo.is_asm():
2020                body_start = 1
2021                if is_filtered:
2022                    # For filtered output we don't add "-NEXT" so don't add extra spaces
2023                    # before the first line.
2024                    body_start = 0
2025                else:
2026                    output_lines.append(
2027                        "%s %s:       %s" % (comment_marker, checkprefix, func_body[0])
2028                    )
2029                func_lines = generalize_check_lines(
2030                    func_body[body_start:], ginfo, vars_seen, global_vars_seen
2031                )
2032                for func_line in func_lines:
2033                    if func_line.strip() == "":
2034                        output_lines.append(
2035                            "%s %s-EMPTY:" % (comment_marker, checkprefix)
2036                        )
2037                    else:
2038                        check_suffix = "-NEXT" if not is_filtered else ""
2039                        output_lines.append(
2040                            "%s %s%s:  %s"
2041                            % (comment_marker, checkprefix, check_suffix, func_line)
2042                        )
2043                # Remember new global variables we have not seen before
2044                for key in global_vars_seen:
2045                    if key not in global_vars_seen_before:
2046                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2047                break
2048            # For analyze output, generalize the output, and emit CHECK-EMPTY lines as well.
2049            elif ginfo.is_analyze():
2050                func_body = generalize_check_lines(
2051                    func_body, ginfo, vars_seen, global_vars_seen
2052                )
2053                for func_line in func_body:
2054                    if func_line.strip() == "":
2055                        output_lines.append(
2056                            "{} {}-EMPTY:".format(comment_marker, checkprefix)
2057                        )
2058                    else:
2059                        check_suffix = "-NEXT" if not is_filtered else ""
2060                        output_lines.append(
2061                            "{} {}{}:  {}".format(
2062                                comment_marker, checkprefix, check_suffix, func_line
2063                            )
2064                        )
2065
2066                # Add space between different check prefixes and also before the first
2067                # line of code in the test function.
2068                output_lines.append(comment_marker)
2069
2070                # Remember new global variables we have not seen before
2071                for key in global_vars_seen:
2072                    if key not in global_vars_seen_before:
2073                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2074                break
2075            # For IR output, change all defs to FileCheck variables, so we're immune
2076            # to variable naming fashions.
2077            else:
2078                func_body = generalize_check_lines(
2079                    func_body,
2080                    ginfo,
2081                    vars_seen,
2082                    global_vars_seen,
2083                    preserve_names,
2084                    original_check_lines=original_check_lines.get(checkprefix),
2085                )
2086
2087                # This could be selectively enabled with an optional invocation argument.
2088                # Disabled for now: better to check everything. Be safe rather than sorry.
2089
2090                # Handle the first line of the function body as a special case because
2091                # it's often just noise (a useless asm comment or entry label).
2092                # if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
2093                #  is_blank_line = True
2094                # else:
2095                #  output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
2096                #  is_blank_line = False
2097
2098                is_blank_line = False
2099
2100                for func_line in func_body:
2101                    if func_line.strip() == "":
2102                        is_blank_line = True
2103                        continue
2104                    # Do not waste time checking IR comments.
2105                    func_line = SCRUB_IR_COMMENT_RE.sub(r"", func_line)
2106
2107                    # Skip blank lines instead of checking them.
2108                    if is_blank_line:
2109                        output_lines.append(
2110                            "{} {}:       {}".format(
2111                                comment_marker, checkprefix, func_line
2112                            )
2113                        )
2114                    else:
2115                        check_suffix = "-NEXT" if not is_filtered else ""
2116                        output_lines.append(
2117                            "{} {}{}:  {}".format(
2118                                comment_marker, checkprefix, check_suffix, func_line
2119                            )
2120                        )
2121                    is_blank_line = False
2122
2123                # Add space between different check prefixes and also before the first
2124                # line of code in the test function.
2125                output_lines.append(comment_marker)
2126
2127                # Remember new global variables we have not seen before
2128                for key in global_vars_seen:
2129                    if key not in global_vars_seen_before:
2130                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2131                break
2132    return printed_prefixes
2133
2134
2135def add_ir_checks(
2136    output_lines,
2137    comment_marker,
2138    prefix_list,
2139    func_dict,
2140    func_name,
2141    preserve_names,
2142    function_sig,
2143    ginfo: GeneralizerInfo,
2144    global_vars_seen_dict,
2145    is_filtered,
2146    original_check_lines={},
2147):
2148    assert ginfo.is_ir()
2149    # Label format is based on IR string.
2150    if function_sig and ginfo.get_version() > 1:
2151        function_def_regex = "define %s"
2152    elif function_sig:
2153        function_def_regex = "define {{[^@]+}}%s"
2154    else:
2155        function_def_regex = "%s"
2156    check_label_format = "{} %s-LABEL: {}@%s%s%s".format(
2157        comment_marker, function_def_regex
2158    )
2159    return add_checks(
2160        output_lines,
2161        comment_marker,
2162        prefix_list,
2163        func_dict,
2164        func_name,
2165        check_label_format,
2166        ginfo,
2167        global_vars_seen_dict,
2168        is_filtered,
2169        preserve_names,
2170        original_check_lines=original_check_lines,
2171    )
2172
2173
2174def add_analyze_checks(
2175    output_lines,
2176    comment_marker,
2177    prefix_list,
2178    func_dict,
2179    func_name,
2180    ginfo: GeneralizerInfo,
2181    is_filtered,
2182):
2183    assert ginfo.is_analyze()
2184    check_label_format = "{} %s-LABEL: '%s%s%s%s'".format(comment_marker)
2185    global_vars_seen_dict = {}
2186    return add_checks(
2187        output_lines,
2188        comment_marker,
2189        prefix_list,
2190        func_dict,
2191        func_name,
2192        check_label_format,
2193        ginfo,
2194        global_vars_seen_dict,
2195        is_filtered,
2196    )
2197
2198
2199def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes, ginfo):
2200    for nameless_value in ginfo.get_nameless_values():
2201        if nameless_value.global_ir_rhs_regexp is None:
2202            continue
2203
2204        lhs_re_str = nameless_value.ir_prefix + nameless_value.ir_regexp
2205        rhs_re_str = nameless_value.global_ir_rhs_regexp
2206
2207        global_ir_value_re_str = r"^" + lhs_re_str + r"\s=\s" + rhs_re_str + r"$"
2208        global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M))
2209        lines = []
2210        for m in global_ir_value_re.finditer(raw_tool_output):
2211            # Attach the substring's start index so that CHECK lines
2212            # can be sorted properly even if they are matched by different nameless values.
2213            # This is relevant for GLOB and GLOBNAMED since they may appear interlaced.
2214            lines.append((m.start(), m.group(0)))
2215
2216        for prefix in prefixes:
2217            if glob_val_dict[prefix] is None:
2218                continue
2219            if nameless_value.check_prefix in glob_val_dict[prefix]:
2220                if lines == glob_val_dict[prefix][nameless_value.check_prefix]:
2221                    continue
2222                if prefix == prefixes[-1]:
2223                    warn("Found conflicting asm under the same prefix: %r!" % (prefix,))
2224                else:
2225                    glob_val_dict[prefix][nameless_value.check_prefix] = None
2226                    continue
2227            glob_val_dict[prefix][nameless_value.check_prefix] = lines
2228
2229
2230def filter_globals_according_to_preference(
2231    global_val_lines_w_index, global_vars_seen, nameless_value, global_check_setting
2232):
2233    if global_check_setting == "none":
2234        return []
2235    if global_check_setting == "all":
2236        return global_val_lines_w_index
2237    assert global_check_setting == "smart"
2238
2239    if nameless_value.check_key == "#":
2240        # attribute sets are usually better checked by --check-attributes
2241        return []
2242
2243    def extract(line, nv):
2244        p = (
2245            "^"
2246            + nv.ir_prefix
2247            + "("
2248            + nv.ir_regexp
2249            + ") = ("
2250            + nv.global_ir_rhs_regexp
2251            + ")"
2252        )
2253        match = re.match(p, line)
2254        return (match.group(1), re.findall(nv.ir_regexp, match.group(2)))
2255
2256    transitively_visible = set()
2257    contains_refs_to = {}
2258
2259    def add(var):
2260        nonlocal transitively_visible
2261        nonlocal contains_refs_to
2262        if var in transitively_visible:
2263            return
2264        transitively_visible.add(var)
2265        if not var in contains_refs_to:
2266            return
2267        for x in contains_refs_to[var]:
2268            add(x)
2269
2270    for i, line in global_val_lines_w_index:
2271        (var, refs) = extract(line, nameless_value)
2272        contains_refs_to[var] = refs
2273    for var, check_key in global_vars_seen:
2274        if check_key != nameless_value.check_key:
2275            continue
2276        add(var)
2277    return [
2278        (i, line)
2279        for i, line in global_val_lines_w_index
2280        if extract(line, nameless_value)[0] in transitively_visible
2281    ]
2282
2283
2284METADATA_FILTERS = [
2285    (
2286        r"(?<=\")(.+ )?(\w+ version )[\d.]+(?:[^\" ]*)(?: \([^)]+\))?",
2287        r"{{.*}}\2{{.*}}",
2288    ),  # preface with glob also, to capture optional CLANG_VENDOR
2289    (r'(!DIFile\(filename: ".+", directory: )".+"', r"\1{{.*}}"),
2290]
2291METADATA_FILTERS_RE = [(re.compile(f), r) for (f, r) in METADATA_FILTERS]
2292
2293
2294def filter_unstable_metadata(line):
2295    for f, replacement in METADATA_FILTERS_RE:
2296        line = f.sub(replacement, line)
2297    return line
2298
2299
2300def flush_current_checks(output_lines, new_lines_w_index, comment_marker):
2301    if not new_lines_w_index:
2302        return
2303    output_lines.append(comment_marker + SEPARATOR)
2304    new_lines_w_index.sort()
2305    for _, line in new_lines_w_index:
2306        output_lines.append(line)
2307    new_lines_w_index.clear()
2308
2309
2310def add_global_checks(
2311    glob_val_dict,
2312    comment_marker,
2313    prefix_list,
2314    output_lines,
2315    ginfo: GeneralizerInfo,
2316    global_vars_seen_dict,
2317    preserve_names,
2318    is_before_functions,
2319    global_check_setting,
2320):
2321    printed_prefixes = set()
2322    output_lines_loc = {}  # Allows GLOB and GLOBNAMED to be sorted correctly
2323    for nameless_value in ginfo.get_nameless_values():
2324        if nameless_value.global_ir_rhs_regexp is None:
2325            continue
2326        if nameless_value.is_before_functions != is_before_functions:
2327            continue
2328        for p in prefix_list:
2329            global_vars_seen = {}
2330            checkprefixes = p[0]
2331            if checkprefixes is None:
2332                continue
2333            for checkprefix in checkprefixes:
2334                if checkprefix in global_vars_seen_dict:
2335                    global_vars_seen.update(global_vars_seen_dict[checkprefix])
2336                else:
2337                    global_vars_seen_dict[checkprefix] = {}
2338                if (checkprefix, nameless_value.check_prefix) in printed_prefixes:
2339                    break
2340                if not glob_val_dict[checkprefix]:
2341                    continue
2342                if nameless_value.check_prefix not in glob_val_dict[checkprefix]:
2343                    continue
2344                if not glob_val_dict[checkprefix][nameless_value.check_prefix]:
2345                    continue
2346
2347                check_lines = []
2348                global_vars_seen_before = [key for key in global_vars_seen.keys()]
2349                lines_w_index = glob_val_dict[checkprefix][nameless_value.check_prefix]
2350                lines_w_index = filter_globals_according_to_preference(
2351                    lines_w_index,
2352                    global_vars_seen_before,
2353                    nameless_value,
2354                    global_check_setting,
2355                )
2356                for i, line in lines_w_index:
2357                    if _global_value_regex:
2358                        matched = False
2359                        for regex in _global_value_regex:
2360                            if re.match("^@" + regex + " = ", line) or re.match(
2361                                "^!" + regex + " = ", line
2362                            ):
2363                                matched = True
2364                                break
2365                        if not matched:
2366                            continue
2367                    [new_line] = generalize_check_lines(
2368                        [line],
2369                        ginfo,
2370                        {},
2371                        global_vars_seen,
2372                        preserve_names,
2373                        unstable_globals_only=True,
2374                    )
2375                    new_line = filter_unstable_metadata(new_line)
2376                    check_line = "%s %s: %s" % (comment_marker, checkprefix, new_line)
2377                    check_lines.append((i, check_line))
2378                if not check_lines:
2379                    continue
2380
2381                if not checkprefix in output_lines_loc:
2382                    output_lines_loc[checkprefix] = []
2383                if not nameless_value.interlaced_with_previous:
2384                    flush_current_checks(
2385                        output_lines, output_lines_loc[checkprefix], comment_marker
2386                    )
2387                for check_line in check_lines:
2388                    output_lines_loc[checkprefix].append(check_line)
2389
2390                printed_prefixes.add((checkprefix, nameless_value.check_prefix))
2391
2392                # Remembe new global variables we have not seen before
2393                for key in global_vars_seen:
2394                    if key not in global_vars_seen_before:
2395                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2396                break
2397
2398    if printed_prefixes:
2399        for p in prefix_list:
2400            if p[0] is None:
2401                continue
2402            for checkprefix in p[0]:
2403                if checkprefix not in output_lines_loc:
2404                    continue
2405                flush_current_checks(
2406                    output_lines, output_lines_loc[checkprefix], comment_marker
2407                )
2408                break
2409        output_lines.append(comment_marker + SEPARATOR)
2410    return printed_prefixes
2411
2412
2413def check_prefix(prefix):
2414    if not PREFIX_RE.match(prefix):
2415        hint = ""
2416        if "," in prefix:
2417            hint = " Did you mean '--check-prefixes=" + prefix + "'?"
2418        warn(
2419            (
2420                "Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores."
2421                + hint
2422            )
2423            % (prefix)
2424        )
2425
2426
2427def get_check_prefixes(filecheck_cmd):
2428    check_prefixes = [
2429        item
2430        for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
2431        for item in m.group(1).split(",")
2432    ]
2433    if not check_prefixes:
2434        check_prefixes = ["CHECK"]
2435    return check_prefixes
2436
2437
2438def verify_filecheck_prefixes(fc_cmd):
2439    fc_cmd_parts = fc_cmd.split()
2440    for part in fc_cmd_parts:
2441        if "check-prefix=" in part:
2442            prefix = part.split("=", 1)[1]
2443            check_prefix(prefix)
2444        elif "check-prefixes=" in part:
2445            prefixes = part.split("=", 1)[1].split(",")
2446            for prefix in prefixes:
2447                check_prefix(prefix)
2448                if prefixes.count(prefix) > 1:
2449                    warn(
2450                        "Supplied prefix '%s' is not unique in the prefix list."
2451                        % (prefix,)
2452                    )
2453
2454
2455def get_autogennote_suffix(parser, args):
2456    autogenerated_note_args = ""
2457    for action in parser._actions:
2458        if not hasattr(args, action.dest):
2459            continue  # Ignore options such as --help that aren't included in args
2460        # Ignore parameters such as paths to the binary or the list of tests
2461        if action.dest in (
2462            "tests",
2463            "update_only",
2464            "tool_binary",
2465            "opt_binary",
2466            "llc_binary",
2467            "clang",
2468            "opt",
2469            "llvm_bin",
2470            "verbose",
2471            "force_update",
2472            "reset_variable_names",
2473            "llvm_mc_binary",
2474        ):
2475            continue
2476        value = getattr(args, action.dest)
2477        if action.dest == "check_globals":
2478            default_value = "none" if args.version < 4 else "smart"
2479            if value == default_value:
2480                continue
2481            autogenerated_note_args += action.option_strings[0] + " "
2482            if args.version < 4 and value == "all":
2483                continue
2484            autogenerated_note_args += "%s " % value
2485            continue
2486        if action.const is not None:  # action stores a constant (usually True/False)
2487            # Skip actions with different constant values (this happens with boolean
2488            # --foo/--no-foo options)
2489            if value != action.const:
2490                continue
2491        if parser.get_default(action.dest) == value:
2492            continue  # Don't add default values
2493        if action.dest == "function_signature" and args.version >= 2:
2494            continue  # Enabled by default in version 2
2495        if action.dest == "filters":
2496            # Create a separate option for each filter element.  The value is a list
2497            # of Filter objects.
2498            for elem in value:
2499                opt_name = "filter-out" if elem.is_filter_out else "filter"
2500                opt_value = elem.pattern()
2501                new_arg = '--%s "%s" ' % (opt_name, opt_value.strip('"'))
2502                if new_arg not in autogenerated_note_args:
2503                    autogenerated_note_args += new_arg
2504        else:
2505            autogenerated_note_args += action.option_strings[0] + " "
2506            if action.const is None:  # action takes a parameter
2507                if action.nargs == "+":
2508                    value = " ".join(map(lambda v: '"' + v.strip('"') + '"', value))
2509                autogenerated_note_args += "%s " % value
2510    if autogenerated_note_args:
2511        autogenerated_note_args = " %s %s" % (
2512            UTC_ARGS_KEY,
2513            autogenerated_note_args[:-1],
2514        )
2515    return autogenerated_note_args
2516
2517
2518def check_for_command(line, parser, args, argv, argparse_callback):
2519    cmd_m = UTC_ARGS_CMD.match(line)
2520    if cmd_m:
2521        for option in shlex.split(cmd_m.group("cmd").strip()):
2522            if option:
2523                argv.append(option)
2524        args = parse_args(parser, filter(lambda arg: arg not in args.tests, argv))
2525        if argparse_callback is not None:
2526            argparse_callback(args)
2527    return args, argv
2528
2529
2530def find_arg_in_test(test_info, get_arg_to_check, arg_string, is_global):
2531    result = get_arg_to_check(test_info.args)
2532    if not result and is_global:
2533        # See if this has been specified via UTC_ARGS.  This is a "global" option
2534        # that affects the entire generation of test checks.  If it exists anywhere
2535        # in the test, apply it to everything.
2536        saw_line = False
2537        for line_info in test_info.ro_iterlines():
2538            line = line_info.line
2539            if not line.startswith(";") and line.strip() != "":
2540                saw_line = True
2541            result = get_arg_to_check(line_info.args)
2542            if result:
2543                if warn and saw_line:
2544                    # We saw the option after already reading some test input lines.
2545                    # Warn about it.
2546                    print(
2547                        "WARNING: Found {} in line following test start: ".format(
2548                            arg_string
2549                        )
2550                        + line,
2551                        file=sys.stderr,
2552                    )
2553                    print(
2554                        "WARNING: Consider moving {} to top of file".format(arg_string),
2555                        file=sys.stderr,
2556                    )
2557                break
2558    return result
2559
2560
2561def dump_input_lines(output_lines, test_info, prefix_set, comment_string):
2562    for input_line_info in test_info.iterlines(output_lines):
2563        line = input_line_info.line
2564        args = input_line_info.args
2565        if line.strip() == comment_string:
2566            continue
2567        if line.strip() == comment_string + SEPARATOR:
2568            continue
2569        if line.lstrip().startswith(comment_string):
2570            m = CHECK_RE.match(line)
2571            if m and m.group(1) in prefix_set:
2572                continue
2573        output_lines.append(line.rstrip("\n"))
2574
2575
2576def add_checks_at_end(
2577    output_lines, prefix_list, func_order, comment_string, check_generator
2578):
2579    added = set()
2580    generated_prefixes = set()
2581    for prefix in prefix_list:
2582        prefixes = prefix[0]
2583        tool_args = prefix[1]
2584        for prefix in prefixes:
2585            for func in func_order[prefix]:
2586                # The func order can contain the same functions multiple times.
2587                # If we see one again we are done.
2588                if (func, prefix) in added:
2589                    continue
2590                if added:
2591                    output_lines.append(comment_string)
2592
2593                # The add_*_checks routines expect a run list whose items are
2594                # tuples that have a list of prefixes as their first element and
2595                # tool command args string as their second element.  They output
2596                # checks for each prefix in the list of prefixes.  By doing so, it
2597                # implicitly assumes that for each function every run line will
2598                # generate something for that function.  That is not the case for
2599                # generated functions as some run lines might not generate them
2600                # (e.g. -fopenmp vs. no -fopenmp).
2601                #
2602                # Therefore, pass just the prefix we're interested in.  This has
2603                # the effect of generating all of the checks for functions of a
2604                # single prefix before moving on to the next prefix.  So checks
2605                # are ordered by prefix instead of by function as in "normal"
2606                # mode.
2607                for generated_prefix in check_generator(
2608                    output_lines, [([prefix], tool_args)], func
2609                ):
2610                    added.add((func, generated_prefix))
2611                    generated_prefixes.add(generated_prefix)
2612    return generated_prefixes
2613