xref: /llvm-project/llvm/utils/UpdateTestChecks/common.py (revision 5315f3f8cb8f562ec39f57f2fce79c8e017595f9)
1import argparse
2import bisect
3import collections
4import copy
5import glob
6import os
7import re
8import subprocess
9import sys
10import shlex
11
12from typing import List, Mapping, Set
13
14##### Common utilities for update_*test_checks.py
15
16
17_verbose = False
18_prefix_filecheck_ir_name = ""
19
20"""
21Version changelog:
22
231: Initial version, used by tests that don't specify --version explicitly.
242: --function-signature is now enabled by default and also checks return
25   type/attributes.
263: Opening parenthesis of function args is kept on the first LABEL line
27   in case arguments are split to a separate SAME line.
284: --check-globals now has a third option ('smart'). The others are now called
29   'none' and 'all'. 'smart' is the default.
305: Basic block labels are matched by FileCheck expressions
31"""
32DEFAULT_VERSION = 5
33
34
35SUPPORTED_ANALYSES = {
36    "Branch Probability Analysis",
37    "Cost Model Analysis",
38    "Loop Access Analysis",
39    "Scalar Evolution Analysis",
40}
41
42
43class Regex(object):
44    """Wrap a compiled regular expression object to allow deep copy of a regexp.
45    This is required for the deep copy done in do_scrub.
46
47    """
48
49    def __init__(self, regex):
50        self.regex = regex
51
52    def __deepcopy__(self, memo):
53        result = copy.copy(self)
54        result.regex = self.regex
55        return result
56
57    def search(self, line):
58        return self.regex.search(line)
59
60    def sub(self, repl, line):
61        return self.regex.sub(repl, line)
62
63    def pattern(self):
64        return self.regex.pattern
65
66    def flags(self):
67        return self.regex.flags
68
69
70class Filter(Regex):
71    """Augment a Regex object with a flag indicating whether a match should be
72    added (!is_filter_out) or removed (is_filter_out) from the generated checks.
73
74    """
75
76    def __init__(self, regex, is_filter_out):
77        super(Filter, self).__init__(regex)
78        self.is_filter_out = is_filter_out
79
80    def __deepcopy__(self, memo):
81        result = copy.deepcopy(super(Filter, self), memo)
82        result.is_filter_out = copy.deepcopy(self.is_filter_out, memo)
83        return result
84
85
86def parse_commandline_args(parser):
87    class RegexAction(argparse.Action):
88        """Add a regular expression option value to a list of regular expressions.
89        This compiles the expression, wraps it in a Regex and adds it to the option
90        value list."""
91
92        def __init__(self, option_strings, dest, nargs=None, **kwargs):
93            if nargs is not None:
94                raise ValueError("nargs not allowed")
95            super(RegexAction, self).__init__(option_strings, dest, **kwargs)
96
97        def do_call(self, namespace, values, flags):
98            value_list = getattr(namespace, self.dest)
99            if value_list is None:
100                value_list = []
101
102            try:
103                value_list.append(Regex(re.compile(values, flags)))
104            except re.error as error:
105                raise ValueError(
106                    "{}: Invalid regular expression '{}' ({})".format(
107                        option_string, error.pattern, error.msg
108                    )
109                )
110
111            setattr(namespace, self.dest, value_list)
112
113        def __call__(self, parser, namespace, values, option_string=None):
114            self.do_call(namespace, values, 0)
115
116    class FilterAction(RegexAction):
117        """Add a filter to a list of filter option values."""
118
119        def __init__(self, option_strings, dest, nargs=None, **kwargs):
120            super(FilterAction, self).__init__(option_strings, dest, nargs, **kwargs)
121
122        def __call__(self, parser, namespace, values, option_string=None):
123            super(FilterAction, self).__call__(parser, namespace, values, option_string)
124
125            value_list = getattr(namespace, self.dest)
126
127            is_filter_out = option_string == "--filter-out"
128
129            value_list[-1] = Filter(value_list[-1].regex, is_filter_out)
130
131            setattr(namespace, self.dest, value_list)
132
133    filter_group = parser.add_argument_group(
134        "filtering",
135        """Filters are applied to each output line according to the order given. The
136    first matching filter terminates filter processing for that current line.""",
137    )
138
139    filter_group.add_argument(
140        "--filter",
141        action=FilterAction,
142        dest="filters",
143        metavar="REGEX",
144        help="Only include lines matching REGEX (may be specified multiple times)",
145    )
146    filter_group.add_argument(
147        "--filter-out",
148        action=FilterAction,
149        dest="filters",
150        metavar="REGEX",
151        help="Exclude lines matching REGEX",
152    )
153
154    parser.add_argument(
155        "--include-generated-funcs",
156        action="store_true",
157        help="Output checks for functions not in source",
158    )
159    parser.add_argument(
160        "-v", "--verbose", action="store_true", help="Show verbose output"
161    )
162    parser.add_argument(
163        "-u",
164        "--update-only",
165        action="store_true",
166        help="Only update test if it was already autogened",
167    )
168    parser.add_argument(
169        "--force-update",
170        action="store_true",
171        help="Update test even if it was autogened by a different script",
172    )
173    parser.add_argument(
174        "--enable",
175        action="store_true",
176        dest="enabled",
177        default=True,
178        help="Activate CHECK line generation from this point forward",
179    )
180    parser.add_argument(
181        "--disable",
182        action="store_false",
183        dest="enabled",
184        help="Deactivate CHECK line generation from this point forward",
185    )
186    parser.add_argument(
187        "--replace-value-regex",
188        nargs="+",
189        default=[],
190        help="List of regular expressions to replace matching value names",
191    )
192    parser.add_argument(
193        "--prefix-filecheck-ir-name",
194        default="",
195        help="Add a prefix to FileCheck IR value names to avoid conflicts with scripted names",
196    )
197    parser.add_argument(
198        "--global-value-regex",
199        nargs="+",
200        default=[],
201        help="List of regular expressions that a global value declaration must match to generate a check (has no effect if checking globals is not enabled)",
202    )
203    parser.add_argument(
204        "--global-hex-value-regex",
205        nargs="+",
206        default=[],
207        help="List of regular expressions such that, for matching global value declarations, literal integer values should be encoded in hex in the associated FileCheck directives",
208    )
209    # FIXME: in 3.9, we can use argparse.BooleanOptionalAction. At that point,
210    # we need to rename the flag to just -generate-body-for-unused-prefixes.
211    parser.add_argument(
212        "--no-generate-body-for-unused-prefixes",
213        action="store_false",
214        dest="gen_unused_prefix_body",
215        default=True,
216        help="Generate a function body that always matches for unused prefixes. This is useful when unused prefixes are desired, and it avoids needing to annotate each FileCheck as allowing them.",
217    )
218    # This is the default when regenerating existing tests. The default when
219    # generating new tests is determined by DEFAULT_VERSION.
220    parser.add_argument(
221        "--version", type=int, default=1, help="The version of output format"
222    )
223    args = parser.parse_args()
224    # TODO: This should not be handled differently from the other options
225    global _verbose, _global_value_regex, _global_hex_value_regex
226    _verbose = args.verbose
227    _global_value_regex = args.global_value_regex
228    _global_hex_value_regex = args.global_hex_value_regex
229    return args
230
231
232def parse_args(parser, argv):
233    args = parser.parse_args(argv)
234    if args.version >= 2:
235        args.function_signature = True
236    # TODO: This should not be handled differently from the other options
237    global _verbose, _global_value_regex, _global_hex_value_regex
238    _verbose = args.verbose
239    _global_value_regex = args.global_value_regex
240    _global_hex_value_regex = args.global_hex_value_regex
241    if "check_globals" in args and args.check_globals == "default":
242        args.check_globals = "none" if args.version < 4 else "smart"
243    return args
244
245
246class InputLineInfo(object):
247    def __init__(self, line, line_number, args, argv):
248        self.line = line
249        self.line_number = line_number
250        self.args = args
251        self.argv = argv
252
253
254class TestInfo(object):
255    def __init__(
256        self,
257        test,
258        parser,
259        script_name,
260        input_lines,
261        args,
262        argv,
263        comment_prefix,
264        argparse_callback,
265    ):
266        self.parser = parser
267        self.argparse_callback = argparse_callback
268        self.path = test
269        self.args = args
270        if args.prefix_filecheck_ir_name:
271            global _prefix_filecheck_ir_name
272            _prefix_filecheck_ir_name = args.prefix_filecheck_ir_name
273        self.argv = argv
274        self.input_lines = input_lines
275        self.run_lines = find_run_lines(test, self.input_lines)
276        self.comment_prefix = comment_prefix
277        if self.comment_prefix is None:
278            if self.path.endswith(".mir") or self.path.endswith(".txt"):
279                self.comment_prefix = "#"
280            elif self.path.endswith(".s"):
281                self.comment_prefix = "//"
282            else:
283                self.comment_prefix = ";"
284        self.autogenerated_note_prefix = self.comment_prefix + " " + UTC_ADVERT
285        self.test_autogenerated_note = self.autogenerated_note_prefix + script_name
286        self.test_autogenerated_note += get_autogennote_suffix(parser, self.args)
287        self.test_unused_note = (
288            self.comment_prefix + self.comment_prefix + " " + UNUSED_NOTE
289        )
290
291    def ro_iterlines(self):
292        for line_num, input_line in enumerate(self.input_lines):
293            args, argv = check_for_command(
294                input_line, self.parser, self.args, self.argv, self.argparse_callback
295            )
296            yield InputLineInfo(input_line, line_num, args, argv)
297
298    def iterlines(self, output_lines):
299        output_lines.append(self.test_autogenerated_note)
300        for line_info in self.ro_iterlines():
301            input_line = line_info.line
302            # Discard any previous script advertising.
303            if input_line.startswith(self.autogenerated_note_prefix):
304                continue
305            self.args = line_info.args
306            self.argv = line_info.argv
307            if not self.args.enabled:
308                output_lines.append(input_line)
309                continue
310            yield line_info
311
312    def get_checks_for_unused_prefixes(
313        self, run_list, used_prefixes: List[str]
314    ) -> List[str]:
315        run_list = [element for element in run_list if element[0] is not None]
316        unused_prefixes = set(
317            [prefix for sublist in run_list for prefix in sublist[0]]
318        ).difference(set(used_prefixes))
319
320        ret = []
321        if not unused_prefixes:
322            return ret
323        ret.append(self.test_unused_note)
324        for unused in sorted(unused_prefixes):
325            ret.append(
326                "{comment} {prefix}: {match_everything}".format(
327                    comment=self.comment_prefix,
328                    prefix=unused,
329                    match_everything=r"""{{.*}}""",
330                )
331            )
332        return ret
333
334
335def itertests(
336    test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None
337):
338    for pattern in test_patterns:
339        # On Windows we must expand the patterns ourselves.
340        tests_list = glob.glob(pattern)
341        if not tests_list:
342            warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,))
343            continue
344        for test in tests_list:
345            with open(test) as f:
346                input_lines = [l.rstrip() for l in f]
347            first_line = input_lines[0] if input_lines else ""
348            if UTC_AVOID in first_line:
349                warn("Skipping test that must not be autogenerated: " + test)
350                continue
351            is_regenerate = UTC_ADVERT in first_line
352
353            # If we're generating a new test, set the default version to the latest.
354            argv = sys.argv[:]
355            if not is_regenerate:
356                argv.insert(1, "--version=" + str(DEFAULT_VERSION))
357
358            args = parse_args(parser, argv[1:])
359            if argparse_callback is not None:
360                argparse_callback(args)
361            if is_regenerate:
362                if script_name not in first_line and not args.force_update:
363                    warn(
364                        "Skipping test which wasn't autogenerated by " + script_name,
365                        test,
366                    )
367                    continue
368                args, argv = check_for_command(
369                    first_line, parser, args, argv, argparse_callback
370                )
371            elif args.update_only:
372                assert UTC_ADVERT not in first_line
373                warn("Skipping test which isn't autogenerated: " + test)
374                continue
375            final_input_lines = []
376            for l in input_lines:
377                if UNUSED_NOTE in l:
378                    break
379                final_input_lines.append(l)
380            yield TestInfo(
381                test,
382                parser,
383                script_name,
384                final_input_lines,
385                args,
386                argv,
387                comment_prefix,
388                argparse_callback,
389            )
390
391
392def should_add_line_to_output(
393    input_line,
394    prefix_set,
395    *,
396    skip_global_checks=False,
397    skip_same_checks=False,
398    comment_marker=";",
399):
400    # Skip any blank comment lines in the IR.
401    if not skip_global_checks and input_line.strip() == comment_marker:
402        return False
403    # Skip a special double comment line we use as a separator.
404    if input_line.strip() == comment_marker + SEPARATOR:
405        return False
406    # Skip any blank lines in the IR.
407    # if input_line.strip() == '':
408    #  return False
409    # And skip any CHECK lines. We're building our own.
410    m = CHECK_RE.match(input_line)
411    if m and m.group(1) in prefix_set:
412        if skip_same_checks and CHECK_SAME_RE.match(input_line):
413            # The previous CHECK line was removed, so don't leave this dangling
414            return False
415        if skip_global_checks:
416            # Skip checks only if they are of global value definitions
417            global_ir_value_re = re.compile(r"(\[\[|@)", flags=(re.M))
418            is_global = global_ir_value_re.search(input_line)
419            return not is_global
420        return False
421
422    return True
423
424
425def collect_original_check_lines(ti: TestInfo, prefix_set: set):
426    """
427    Collect pre-existing check lines into a dictionary `result` which is
428    returned.
429
430    result[func_name][prefix] is filled with a list of right-hand-sides of check
431    lines.
432    """
433    result = collections.defaultdict(lambda: {})
434
435    current_prefix = None
436    current_function = None
437    for input_line_info in ti.ro_iterlines():
438        input_line = input_line_info.line
439        if input_line.lstrip().startswith(";"):
440            m = CHECK_RE.match(input_line)
441            if m is not None:
442                prefix = m.group(1)
443                check_kind = m.group(2)
444                line = input_line[m.end() :].strip()
445
446                if prefix != current_prefix:
447                    current_function = None
448                    current_prefix = None
449
450                if check_kind not in ["LABEL", "SAME"]:
451                    if current_function is not None:
452                        current_function.append(line)
453                    continue
454
455                if check_kind == "SAME":
456                    continue
457
458                if check_kind == "LABEL":
459                    m = IR_FUNCTION_LABEL_RE.match(line)
460                    if m is not None:
461                        func_name = m.group(1)
462                        if (
463                            ti.args.function is not None
464                            and func_name != ti.args.function
465                        ):
466                            # When filtering on a specific function, skip all others.
467                            continue
468
469                        current_prefix = prefix
470                        current_function = result[func_name][prefix] = []
471                        continue
472
473        current_function = None
474
475    return result
476
477
478# Perform lit-like substitutions
479def getSubstitutions(sourcepath):
480    sourcedir = os.path.dirname(sourcepath)
481    return [
482        ("%s", sourcepath),
483        ("%S", sourcedir),
484        ("%p", sourcedir),
485        ("%{pathsep}", os.pathsep),
486    ]
487
488
489def applySubstitutions(s, substitutions):
490    for a, b in substitutions:
491        s = s.replace(a, b)
492    return s
493
494
495# Invoke the tool that is being tested.
496def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False):
497    with open(ir) as ir_file:
498        substitutions = getSubstitutions(ir)
499
500        # TODO Remove the str form which is used by update_test_checks.py and
501        # update_llc_test_checks.py
502        # The safer list form is used by update_cc_test_checks.py
503        if preprocess_cmd:
504            # Allow pre-processing the IR file (e.g. using sed):
505            assert isinstance(
506                preprocess_cmd, str
507            )  # TODO: use a list instead of using shell
508            preprocess_cmd = applySubstitutions(preprocess_cmd, substitutions).strip()
509            if verbose:
510                print(
511                    "Pre-processing input file: ",
512                    ir,
513                    " with command '",
514                    preprocess_cmd,
515                    "'",
516                    sep="",
517                    file=sys.stderr,
518                )
519            pp = subprocess.Popen(
520                preprocess_cmd,
521                shell=True,
522                stdin=subprocess.DEVNULL,
523                stdout=subprocess.PIPE,
524            )
525            ir_file = pp.stdout
526
527        if isinstance(cmd_args, list):
528            args = [applySubstitutions(a, substitutions) for a in cmd_args]
529            stdout = subprocess.check_output([exe] + args, stdin=ir_file)
530        else:
531            stdout = subprocess.check_output(
532                exe + " " + applySubstitutions(cmd_args, substitutions),
533                shell=True,
534                stdin=ir_file,
535            )
536        if sys.version_info[0] > 2:
537            # FYI, if you crashed here with a decode error, your run line probably
538            # results in bitcode or other binary format being written to the pipe.
539            # For an opt test, you probably want to add -S or -disable-output.
540            stdout = stdout.decode()
541    # Fix line endings to unix CR style.
542    return stdout.replace("\r\n", "\n")
543
544
545##### LLVM IR parser
546RUN_LINE_RE = re.compile(r"^\s*(?://|[;#])\s*RUN:\s*(.*)$")
547CHECK_PREFIX_RE = re.compile(r"--?check-prefix(?:es)?[= ](\S+)")
548PREFIX_RE = re.compile("^[a-zA-Z0-9_-]+$")
549CHECK_RE = re.compile(
550    r"^\s*(?://|[;#])\s*([^:]+?)(?:-(NEXT|NOT|DAG|LABEL|SAME|EMPTY))?:"
551)
552CHECK_SAME_RE = re.compile(r"^\s*(?://|[;#])\s*([^:]+?)(?:-SAME)?:")
553
554UTC_ARGS_KEY = "UTC_ARGS:"
555UTC_ARGS_CMD = re.compile(r".*" + UTC_ARGS_KEY + r"\s*(?P<cmd>.*)\s*$")
556UTC_ADVERT = "NOTE: Assertions have been autogenerated by "
557UTC_AVOID = "NOTE: Do not autogenerate"
558UNUSED_NOTE = "NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:"
559
560DATA_LAYOUT_RE = re.compile(
561    r"target\s+datalayout\s+=\s+\"(?P<layout>.+)\"$", flags=(re.M | re.S)
562)
563
564OPT_FUNCTION_RE = re.compile(
565    r"^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s():,]+?))?\s*define\s+(?P<funcdef_attrs_and_ret>[^@]*)@(?P<func>[\w.$-]+?)\s*"
566    r"(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P<body>.*?)^\}$",
567    flags=(re.M | re.S),
568)
569
570ANALYZE_FUNCTION_RE = re.compile(
571    r"^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.$-]+?)\':"
572    r"\s*\n(?P<body>.*)$",
573    flags=(re.X | re.S),
574)
575
576LOOP_PASS_DEBUG_RE = re.compile(
577    r"^\s*\'(?P<func>[\w.$-]+?)\'[^\n]*" r"\s*\n(?P<body>.*)$", flags=(re.X | re.S)
578)
579
580IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(')
581IR_FUNCTION_LABEL_RE = re.compile(
582    r'^\s*(?:define\s+(?:internal\s+)?[^@]*)?@"?([\w.$-]+)"?\s*\('
583)
584TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
585TRIPLE_ARG_RE = re.compile(r"-m?triple[= ]([^ ]+)")
586MARCH_ARG_RE = re.compile(r"-march[= ]([^ ]+)")
587DEBUG_ONLY_ARG_RE = re.compile(r"-debug-only[= ]([^ ]+)")
588
589SCRUB_LEADING_WHITESPACE_RE = re.compile(r"^(\s+)")
590SCRUB_WHITESPACE_RE = re.compile(r"(?!^(|  \w))[ \t]+", flags=re.M)
591SCRUB_PRESERVE_LEADING_WHITESPACE_RE = re.compile(r"((?!^)[ \t]*(\S))[ \t]+")
592SCRUB_TRAILING_WHITESPACE_RE = re.compile(r"[ \t]+$", flags=re.M)
593SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE
594SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(
595    r"([ \t]|(#[0-9]+))+$", flags=re.M
596)
597SCRUB_KILL_COMMENT_RE = re.compile(r"^ *#+ +kill:.*\n")
598SCRUB_LOOP_COMMENT_RE = re.compile(
599    r"# =>This Inner Loop Header:.*|# in Loop:.*", flags=re.M
600)
601SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r"(?<=\S)+[ \t]*#$", flags=re.M)
602
603SEPARATOR = "."
604
605
606def error(msg, test_file=None):
607    if test_file:
608        msg = "{}: {}".format(msg, test_file)
609    print("ERROR: {}".format(msg), file=sys.stderr)
610
611
612def warn(msg, test_file=None):
613    if test_file:
614        msg = "{}: {}".format(msg, test_file)
615    print("WARNING: {}".format(msg), file=sys.stderr)
616
617
618def debug(*args, **kwargs):
619    # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs):
620    if "file" not in kwargs:
621        kwargs["file"] = sys.stderr
622    if _verbose:
623        print(*args, **kwargs)
624
625
626def find_run_lines(test, lines):
627    debug("Scanning for RUN lines in test file:", test)
628    raw_lines = [m.group(1) for m in [RUN_LINE_RE.match(l) for l in lines] if m]
629    run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
630    for l in raw_lines[1:]:
631        if run_lines[-1].endswith("\\"):
632            run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
633        else:
634            run_lines.append(l)
635    debug("Found {} RUN lines in {}:".format(len(run_lines), test))
636    for l in run_lines:
637        debug("  RUN: {}".format(l))
638    return run_lines
639
640
641def get_triple_from_march(march):
642    triples = {
643        "amdgcn": "amdgcn",
644        "r600": "r600",
645        "mips": "mips",
646        "nvptx64": "nvptx64",
647        "sparc": "sparc",
648        "hexagon": "hexagon",
649        "ve": "ve",
650    }
651    for prefix, triple in triples.items():
652        if march.startswith(prefix):
653            return triple
654    print("Cannot find a triple. Assume 'x86'", file=sys.stderr)
655    return "x86"
656
657
658def get_globals_name_prefix(raw_tool_output):
659    m = DATA_LAYOUT_RE.search(raw_tool_output)
660    if not m:
661        return None
662    data_layout = m.group("layout")
663    idx = data_layout.find("m:")
664    if idx < 0:
665        return None
666    ch = data_layout[idx + 2]
667    return "_" if ch == "o" or ch == "x" else None
668
669
670def apply_filters(line, filters):
671    has_filter = False
672    for f in filters:
673        if not f.is_filter_out:
674            has_filter = True
675        if f.search(line):
676            return False if f.is_filter_out else True
677    # If we only used filter-out, keep the line, otherwise discard it since no
678    # filter matched.
679    return False if has_filter else True
680
681
682def do_filter(body, filters):
683    return (
684        body
685        if not filters
686        else "\n".join(
687            filter(lambda line: apply_filters(line, filters), body.splitlines())
688        )
689    )
690
691
692def scrub_body(body):
693    # Scrub runs of whitespace out of the assembly, but leave the leading
694    # whitespace in place.
695    body = SCRUB_PRESERVE_LEADING_WHITESPACE_RE.sub(lambda m: m.group(2) + " ", body)
696
697    # Expand the tabs used for indentation.
698    body = str.expandtabs(body, 2)
699    # Strip trailing whitespace.
700    body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r"", body)
701    return body
702
703
704def do_scrub(body, scrubber, scrubber_args, extra):
705    if scrubber_args:
706        local_args = copy.deepcopy(scrubber_args)
707        local_args[0].extra_scrub = extra
708        return scrubber(body, *local_args)
709    return scrubber(body, *scrubber_args)
710
711
712# Build up a dictionary of all the function bodies.
713class function_body(object):
714    def __init__(
715        self,
716        string,
717        extra,
718        funcdef_attrs_and_ret,
719        args_and_sig,
720        attrs,
721        func_name_separator,
722        ginfo,
723    ):
724        self.scrub = string
725        self.extrascrub = extra
726        self.funcdef_attrs_and_ret = funcdef_attrs_and_ret
727        self.args_and_sig = args_and_sig
728        self.attrs = attrs
729        self.func_name_separator = func_name_separator
730        self._ginfo = ginfo
731
732    def is_same_except_arg_names(
733        self, extrascrub, funcdef_attrs_and_ret, args_and_sig, attrs
734    ):
735        arg_names = set()
736
737        def drop_arg_names(match):
738            nameless_value = self._ginfo.get_nameless_value_from_match(match)
739            if nameless_value.check_key == "%":
740                arg_names.add(self._ginfo.get_name_from_match(match))
741                substitute = ""
742            else:
743                substitute = match.group(2)
744            return match.group(1) + substitute + match.group(match.lastindex)
745
746        def repl_arg_names(match):
747            nameless_value = self._ginfo.get_nameless_value_from_match(match)
748            if (
749                nameless_value.check_key == "%"
750                and self._ginfo.get_name_from_match(match) in arg_names
751            ):
752                return match.group(1) + match.group(match.lastindex)
753            return match.group(1) + match.group(2) + match.group(match.lastindex)
754
755        if self.funcdef_attrs_and_ret != funcdef_attrs_and_ret:
756            return False
757        if self.attrs != attrs:
758            return False
759
760        regexp = self._ginfo.get_regexp()
761        ans0 = regexp.sub(drop_arg_names, self.args_and_sig)
762        ans1 = regexp.sub(drop_arg_names, args_and_sig)
763        if ans0 != ans1:
764            return False
765        if self._ginfo.is_asm():
766            # Check without replacements, the replacements are not applied to the
767            # body for backend checks.
768            return self.extrascrub == extrascrub
769
770        es0 = regexp.sub(repl_arg_names, self.extrascrub)
771        es1 = regexp.sub(repl_arg_names, extrascrub)
772        es0 = SCRUB_IR_COMMENT_RE.sub(r"", es0)
773        es1 = SCRUB_IR_COMMENT_RE.sub(r"", es1)
774        return es0 == es1
775
776    def __str__(self):
777        return self.scrub
778
779
780class FunctionTestBuilder:
781    def __init__(self, run_list, flags, scrubber_args, path, ginfo):
782        self._verbose = flags.verbose
783        self._record_args = flags.function_signature
784        self._check_attributes = flags.check_attributes
785        # Strip double-quotes if input was read by UTC_ARGS
786        self._filters = (
787            list(
788                map(
789                    lambda f: Filter(
790                        re.compile(f.pattern().strip('"'), f.flags()), f.is_filter_out
791                    ),
792                    flags.filters,
793                )
794            )
795            if flags.filters
796            else []
797        )
798        self._scrubber_args = scrubber_args
799        self._path = path
800        self._ginfo = ginfo
801        # Strip double-quotes if input was read by UTC_ARGS
802        self._replace_value_regex = list(
803            map(lambda x: x.strip('"'), flags.replace_value_regex)
804        )
805        self._func_dict = {}
806        self._func_order = {}
807        self._global_var_dict = {}
808        self._processed_prefixes = set()
809        for tuple in run_list:
810            for prefix in tuple[0]:
811                self._func_dict.update({prefix: dict()})
812                self._func_order.update({prefix: []})
813                self._global_var_dict.update({prefix: dict()})
814
815    def finish_and_get_func_dict(self):
816        for prefix in self.get_failed_prefixes():
817            warn(
818                "Prefix %s had conflicting output from different RUN lines for all functions in test %s"
819                % (
820                    prefix,
821                    self._path,
822                )
823            )
824        return self._func_dict
825
826    def func_order(self):
827        return self._func_order
828
829    def global_var_dict(self):
830        return self._global_var_dict
831
832    def is_filtered(self):
833        return bool(self._filters)
834
835    def process_run_line(self, function_re, scrubber, raw_tool_output, prefixes):
836        build_global_values_dictionary(
837            self._global_var_dict, raw_tool_output, prefixes, self._ginfo
838        )
839        for m in function_re.finditer(raw_tool_output):
840            if not m:
841                continue
842            func = m.group("func")
843            body = m.group("body")
844            # func_name_separator is the string that is placed right after function name at the
845            # beginning of assembly function definition. In most assemblies, that is just a
846            # colon: `foo:`. But, for example, in nvptx it is a brace: `foo(`. If is_backend is
847            # False, just assume that separator is an empty string.
848            if self._ginfo.is_asm():
849                # Use ':' as default separator.
850                func_name_separator = (
851                    m.group("func_name_separator")
852                    if "func_name_separator" in m.groupdict()
853                    else ":"
854                )
855            else:
856                func_name_separator = ""
857            attrs = m.group("attrs") if self._check_attributes else ""
858            funcdef_attrs_and_ret = (
859                m.group("funcdef_attrs_and_ret") if self._record_args else ""
860            )
861            # Determine if we print arguments, the opening brace, or nothing after the
862            # function name
863            if self._record_args and "args_and_sig" in m.groupdict():
864                args_and_sig = scrub_body(m.group("args_and_sig").strip())
865            elif "args_and_sig" in m.groupdict():
866                args_and_sig = "("
867            else:
868                args_and_sig = ""
869            filtered_body = do_filter(body, self._filters)
870            scrubbed_body = do_scrub(
871                filtered_body, scrubber, self._scrubber_args, extra=False
872            )
873            scrubbed_extra = do_scrub(
874                filtered_body, scrubber, self._scrubber_args, extra=True
875            )
876            if "analysis" in m.groupdict():
877                analysis = m.group("analysis")
878                if analysis not in SUPPORTED_ANALYSES:
879                    warn("Unsupported analysis mode: %r!" % (analysis,))
880            if func.startswith("stress"):
881                # We only use the last line of the function body for stress tests.
882                scrubbed_body = "\n".join(scrubbed_body.splitlines()[-1:])
883            if self._verbose:
884                print("Processing function: " + func, file=sys.stderr)
885                for l in scrubbed_body.splitlines():
886                    print("  " + l, file=sys.stderr)
887            for prefix in prefixes:
888                # Replace function names matching the regex.
889                for regex in self._replace_value_regex:
890                    # Pattern that matches capture groups in the regex in leftmost order.
891                    group_regex = re.compile(r"\(.*?\)")
892                    # Replace function name with regex.
893                    match = re.match(regex, func)
894                    if match:
895                        func_repl = regex
896                        # Replace any capture groups with their matched strings.
897                        for g in match.groups():
898                            func_repl = group_regex.sub(
899                                re.escape(g), func_repl, count=1
900                            )
901                        func = re.sub(func_repl, "{{" + func_repl + "}}", func)
902
903                    # Replace all calls to regex matching functions.
904                    matches = re.finditer(regex, scrubbed_body)
905                    for match in matches:
906                        func_repl = regex
907                        # Replace any capture groups with their matched strings.
908                        for g in match.groups():
909                            func_repl = group_regex.sub(
910                                re.escape(g), func_repl, count=1
911                            )
912                        # Substitute function call names that match the regex with the same
913                        # capture groups set.
914                        scrubbed_body = re.sub(
915                            func_repl, "{{" + func_repl + "}}", scrubbed_body
916                        )
917
918                if func in self._func_dict[prefix]:
919                    if self._func_dict[prefix][func] is not None and (
920                        str(self._func_dict[prefix][func]) != scrubbed_body
921                        or self._func_dict[prefix][func].args_and_sig != args_and_sig
922                        or self._func_dict[prefix][func].attrs != attrs
923                        or self._func_dict[prefix][func].funcdef_attrs_and_ret
924                        != funcdef_attrs_and_ret
925                    ):
926                        if self._func_dict[prefix][func].is_same_except_arg_names(
927                            scrubbed_extra,
928                            funcdef_attrs_and_ret,
929                            args_and_sig,
930                            attrs,
931                        ):
932                            self._func_dict[prefix][func].scrub = scrubbed_extra
933                            self._func_dict[prefix][func].args_and_sig = args_and_sig
934                        else:
935                            # This means a previous RUN line produced a body for this function
936                            # that is different from the one produced by this current RUN line,
937                            # so the body can't be common across RUN lines. We use None to
938                            # indicate that.
939                            self._func_dict[prefix][func] = None
940                else:
941                    if prefix not in self._processed_prefixes:
942                        self._func_dict[prefix][func] = function_body(
943                            scrubbed_body,
944                            scrubbed_extra,
945                            funcdef_attrs_and_ret,
946                            args_and_sig,
947                            attrs,
948                            func_name_separator,
949                            self._ginfo,
950                        )
951                        self._func_order[prefix].append(func)
952                    else:
953                        # An earlier RUN line used this check prefixes but didn't produce
954                        # a body for this function. This happens in Clang tests that use
955                        # preprocesser directives to exclude individual functions from some
956                        # RUN lines.
957                        self._func_dict[prefix][func] = None
958
959    def processed_prefixes(self, prefixes):
960        """
961        Mark a set of prefixes as having had at least one applicable RUN line fully
962        processed. This is used to filter out function bodies that don't have
963        outputs for all RUN lines.
964        """
965        self._processed_prefixes.update(prefixes)
966
967    def get_failed_prefixes(self):
968        # This returns the list of those prefixes that failed to match any function,
969        # because there were conflicting bodies produced by different RUN lines, in
970        # all instances of the prefix.
971        for prefix in self._func_dict:
972            if self._func_dict[prefix] and (
973                not [
974                    fct
975                    for fct in self._func_dict[prefix]
976                    if self._func_dict[prefix][fct] is not None
977                ]
978            ):
979                yield prefix
980
981
982##### Generator of LLVM IR CHECK lines
983
984SCRUB_IR_COMMENT_RE = re.compile(r"\s*;.*")
985
986# TODO: We should also derive check lines for global, debug, loop declarations, etc..
987
988
989class NamelessValue:
990    """
991    A NamelessValue object represents a type of value in the IR whose "name" we
992    generalize in the generated check lines; where the "name" could be an actual
993    name (as in e.g. `@some_global` or `%x`) or just a number (as in e.g. `%12`
994    or `!4`).
995    """
996
997    def __init__(
998        self,
999        check_prefix,
1000        check_key,
1001        ir_prefix,
1002        ir_regexp,
1003        global_ir_rhs_regexp,
1004        *,
1005        is_before_functions=False,
1006        is_number=False,
1007        replace_number_with_counter=False,
1008        match_literally=False,
1009        interlaced_with_previous=False,
1010        ir_suffix=r"",
1011    ):
1012        self.check_prefix = check_prefix
1013        self.check_key = check_key
1014        self.ir_prefix = ir_prefix
1015        self.ir_regexp = ir_regexp
1016        self.ir_suffix = ir_suffix
1017        self.global_ir_rhs_regexp = global_ir_rhs_regexp
1018        self.is_before_functions = is_before_functions
1019        self.is_number = is_number
1020        # Some variable numbers (e.g. MCINST1234) will change based on unrelated
1021        # modifications to LLVM, replace those with an incrementing counter.
1022        self.replace_number_with_counter = replace_number_with_counter
1023        self.match_literally = match_literally
1024        self.interlaced_with_previous = interlaced_with_previous
1025        self.variable_mapping = {}
1026
1027    # Return true if this kind of IR value is defined "locally" to functions,
1028    # which we assume is only the case precisely for LLVM IR local values.
1029    def is_local_def_ir_value(self):
1030        return self.check_key == "%"
1031
1032    # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals
1033    def get_ir_regex(self):
1034        # for backwards compatibility we check locals with '.*'
1035        if self.is_local_def_ir_value():
1036            return ".*"
1037        return self.ir_regexp
1038
1039    # Create a FileCheck variable name based on an IR name.
1040    def get_value_name(self, var: str, check_prefix: str):
1041        var = var.replace("!", "")
1042        if self.replace_number_with_counter:
1043            assert var
1044            replacement = self.variable_mapping.get(var, None)
1045            if replacement is None:
1046                # Replace variable with an incrementing counter
1047                replacement = str(len(self.variable_mapping) + 1)
1048                self.variable_mapping[var] = replacement
1049            var = replacement
1050        # This is a nameless value, prepend check_prefix.
1051        if var.isdigit():
1052            var = check_prefix + var
1053        else:
1054            # This is a named value that clashes with the check_prefix, prepend with
1055            # _prefix_filecheck_ir_name, if it has been defined.
1056            if (
1057                may_clash_with_default_check_prefix_name(check_prefix, var)
1058                and _prefix_filecheck_ir_name
1059            ):
1060                var = _prefix_filecheck_ir_name + var
1061        var = var.replace(".", "_")
1062        var = var.replace("-", "_")
1063        return var.upper()
1064
1065    def get_affixes_from_match(self, match):
1066        prefix = re.match(self.ir_prefix, match.group(2)).group(0)
1067        suffix = re.search(self.ir_suffix + "$", match.group(2)).group(0)
1068        return prefix, suffix
1069
1070
1071class GeneralizerInfo:
1072    """
1073    A GeneralizerInfo object holds information about how check lines should be generalized
1074    (e.g., variable names replaced by FileCheck meta variables) as well as per-test-file
1075    state (e.g. information about IR global variables).
1076    """
1077
1078    MODE_IR = 0
1079    MODE_ASM = 1
1080    MODE_ANALYZE = 2
1081
1082    def __init__(
1083        self,
1084        version,
1085        mode,
1086        nameless_values: List[NamelessValue],
1087        regexp_prefix,
1088        regexp_suffix,
1089    ):
1090        self._version = version
1091        self._mode = mode
1092        self._nameless_values = nameless_values
1093
1094        self._regexp_prefix = regexp_prefix
1095        self._regexp_suffix = regexp_suffix
1096
1097        self._regexp, _ = self._build_regexp(False, False)
1098        (
1099            self._unstable_globals_regexp,
1100            self._unstable_globals_values,
1101        ) = self._build_regexp(True, True)
1102
1103    def _build_regexp(self, globals_only, unstable_only):
1104        matches = []
1105        values = []
1106        for nameless_value in self._nameless_values:
1107            is_global = nameless_value.global_ir_rhs_regexp is not None
1108            if globals_only and not is_global:
1109                continue
1110            if unstable_only and nameless_value.match_literally:
1111                continue
1112
1113            match = f"(?:{nameless_value.ir_prefix}({nameless_value.ir_regexp}){nameless_value.ir_suffix})"
1114            if self.is_ir() and not globals_only and is_global:
1115                match = "^" + match
1116            matches.append(match)
1117            values.append(nameless_value)
1118
1119        regexp_string = r"|".join(matches)
1120
1121        return (
1122            re.compile(
1123                self._regexp_prefix + r"(" + regexp_string + r")" + self._regexp_suffix
1124            ),
1125            values,
1126        )
1127
1128    def get_version(self):
1129        return self._version
1130
1131    def is_ir(self):
1132        return self._mode == GeneralizerInfo.MODE_IR
1133
1134    def is_asm(self):
1135        return self._mode == GeneralizerInfo.MODE_ASM
1136
1137    def is_analyze(self):
1138        return self._mode == GeneralizerInfo.MODE_ANALYZE
1139
1140    def get_nameless_values(self):
1141        return self._nameless_values
1142
1143    def get_regexp(self):
1144        return self._regexp
1145
1146    def get_unstable_globals_regexp(self):
1147        return self._unstable_globals_regexp
1148
1149    # The entire match is group 0, the prefix has one group (=1), the entire
1150    # IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start.
1151    FIRST_NAMELESS_GROUP_IN_MATCH = 3
1152
1153    def get_match_info(self, match):
1154        """
1155        Returns (name, nameless_value) for the given match object
1156        """
1157        if match.re == self._regexp:
1158            values = self._nameless_values
1159        else:
1160            match.re == self._unstable_globals_regexp
1161            values = self._unstable_globals_values
1162        for i in range(len(values)):
1163            g = match.group(i + GeneralizerInfo.FIRST_NAMELESS_GROUP_IN_MATCH)
1164            if g is not None:
1165                return g, values[i]
1166        error("Unable to identify the kind of IR value from the match!")
1167        return None, None
1168
1169    # See get_idx_from_match
1170    def get_name_from_match(self, match):
1171        return self.get_match_info(match)[0]
1172
1173    def get_nameless_value_from_match(self, match) -> NamelessValue:
1174        return self.get_match_info(match)[1]
1175
1176
1177def make_ir_generalizer(version):
1178    values = []
1179
1180    if version >= 5:
1181        values += [
1182            NamelessValue(r"BB", "%", r"label %", r"[\w$.-]+?", None),
1183            NamelessValue(r"BB", "%", r"^", r"[\w$.-]+?", None, ir_suffix=r":"),
1184        ]
1185
1186    values += [
1187        #            check_prefix   check_key  ir_prefix           ir_regexp                global_ir_rhs_regexp
1188        NamelessValue(r"TMP", "%", r"%", r"[\w$.-]+?", None),
1189        NamelessValue(r"ATTR", "#", r"#", r"[0-9]+", None),
1190        NamelessValue(r"ATTR", "#", r"attributes #", r"[0-9]+", r"{[^}]*}"),
1191        NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", None),
1192        NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", r".+", is_before_functions=True),
1193        NamelessValue(
1194            r"GLOBNAMED",
1195            "@",
1196            r"@",
1197            r"[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*",
1198            r".+",
1199            is_before_functions=True,
1200            match_literally=True,
1201            interlaced_with_previous=True,
1202        ),
1203        NamelessValue(r"DBG", "!", r"!dbg ", r"![0-9]+", None),
1204        NamelessValue(r"DIASSIGNID", "!", r"!DIAssignID ", r"![0-9]+", None),
1205        NamelessValue(r"PROF", "!", r"!prof ", r"![0-9]+", None),
1206        NamelessValue(r"TBAA", "!", r"!tbaa ", r"![0-9]+", None),
1207        NamelessValue(r"TBAA_STRUCT", "!", r"!tbaa.struct ", r"![0-9]+", None),
1208        NamelessValue(r"RNG", "!", r"!range ", r"![0-9]+", None),
1209        NamelessValue(r"LOOP", "!", r"!llvm.loop ", r"![0-9]+", None),
1210        NamelessValue(r"META", "!", r"", r"![0-9]+", r"(?:distinct |)!.*"),
1211        NamelessValue(r"ACC_GRP", "!", r"!llvm.access.group ", r"![0-9]+", None),
1212        NamelessValue(r"META", "!", r"![a-z.]+ ", r"![0-9]+", None),
1213        NamelessValue(r"META", "!", r"[, (]", r"![0-9]+", None),
1214    ]
1215
1216    prefix = r"(\s*)"
1217    suffix = r"([,\s\(\)\}]|\Z)"
1218
1219    # values = [
1220    #     nameless_value
1221    #     for nameless_value in IR_NAMELESS_VALUES
1222    #     if not (globals_only and nameless_value.global_ir_rhs_regexp is None) and
1223    #        not (unstable_ids_only and nameless_value.match_literally)
1224    # ]
1225
1226    return GeneralizerInfo(version, GeneralizerInfo.MODE_IR, values, prefix, suffix)
1227
1228
1229def make_asm_generalizer(version):
1230    values = [
1231        NamelessValue(
1232            r"MCINST",
1233            "Inst#",
1234            "<MCInst #",
1235            r"\d+",
1236            r".+",
1237            is_number=True,
1238            replace_number_with_counter=True,
1239        ),
1240        NamelessValue(
1241            r"MCREG",
1242            "Reg:",
1243            "<MCOperand Reg:",
1244            r"\d+",
1245            r".+",
1246            is_number=True,
1247            replace_number_with_counter=True,
1248        ),
1249    ]
1250
1251    prefix = r"((?:#|//)\s*)"
1252    suffix = r"([>\s]|\Z)"
1253
1254    return GeneralizerInfo(version, GeneralizerInfo.MODE_ASM, values, prefix, suffix)
1255
1256
1257def make_analyze_generalizer(version):
1258    values = [
1259        NamelessValue(
1260            r"GRP",
1261            "#",
1262            r"",
1263            r"0x[0-9a-f]+",
1264            None,
1265            replace_number_with_counter=True,
1266        ),
1267    ]
1268
1269    prefix = r"(\s*)"
1270    suffix = r"(\)?:)"
1271
1272    return GeneralizerInfo(
1273        version, GeneralizerInfo.MODE_ANALYZE, values, prefix, suffix
1274    )
1275
1276
1277# Return true if var clashes with the scripted FileCheck check_prefix.
1278def may_clash_with_default_check_prefix_name(check_prefix, var):
1279    return check_prefix and re.match(
1280        r"^" + check_prefix + r"[0-9]+?$", var, re.IGNORECASE
1281    )
1282
1283
1284def find_diff_matching(lhs: List[str], rhs: List[str]) -> List[tuple]:
1285    """
1286    Find a large ordered matching between strings in lhs and rhs.
1287
1288    Think of this as finding the *unchanged* lines in a diff, where the entries
1289    of lhs and rhs are lines of the files being diffed.
1290
1291    Returns a list of matched (lhs_idx, rhs_idx) pairs.
1292    """
1293
1294    if not lhs or not rhs:
1295        return []
1296
1297    # Collect matches in reverse order.
1298    matches = []
1299
1300    # First, collect a set of candidate matching edges. We limit this to a
1301    # constant multiple of the input size to avoid quadratic runtime.
1302    patterns = collections.defaultdict(lambda: ([], []))
1303
1304    for idx in range(len(lhs)):
1305        patterns[lhs[idx]][0].append(idx)
1306    for idx in range(len(rhs)):
1307        patterns[rhs[idx]][1].append(idx)
1308
1309    multiple_patterns = []
1310
1311    candidates = []
1312    for pattern in patterns.values():
1313        if not pattern[0] or not pattern[1]:
1314            continue
1315
1316        if len(pattern[0]) == len(pattern[1]) == 1:
1317            candidates.append((pattern[0][0], pattern[1][0]))
1318        else:
1319            multiple_patterns.append(pattern)
1320
1321    multiple_patterns.sort(key=lambda pattern: len(pattern[0]) * len(pattern[1]))
1322
1323    for pattern in multiple_patterns:
1324        if len(candidates) + len(pattern[0]) * len(pattern[1]) > 2 * (
1325            len(lhs) + len(rhs)
1326        ):
1327            break
1328        for lhs_idx in pattern[0]:
1329            for rhs_idx in pattern[1]:
1330                candidates.append((lhs_idx, rhs_idx))
1331
1332    if not candidates:
1333        # The LHS and RHS either share nothing in common, or lines are just too
1334        # identical. In that case, let's give up and not match anything.
1335        return []
1336
1337    # Compute a maximal crossing-free matching via an algorithm that is
1338    # inspired by a mixture of dynamic programming and line-sweeping in
1339    # discrete geometry.
1340    #
1341    # I would be surprised if this algorithm didn't exist somewhere in the
1342    # literature, but I found it without consciously recalling any
1343    # references, so you'll have to make do with the explanation below.
1344    # Sorry.
1345    #
1346    # The underlying graph is bipartite:
1347    #  - nodes on the LHS represent lines in the original check
1348    #  - nodes on the RHS represent lines in the new (updated) check
1349    #
1350    # Nodes are implicitly sorted by the corresponding line number.
1351    # Edges (unique_matches) are sorted by the line number on the LHS.
1352    #
1353    # Here's the geometric intuition for the algorithm.
1354    #
1355    #  * Plot the edges as points in the plane, with the original line
1356    #    number on the X axis and the updated line number on the Y axis.
1357    #  * The goal is to find a longest "chain" of points where each point
1358    #    is strictly above and to the right of the previous point.
1359    #  * The algorithm proceeds by sweeping a vertical line from left to
1360    #    right.
1361    #  * The algorithm maintains a table where `table[N]` answers the
1362    #    question "What is currently the 'best' way to build a chain of N+1
1363    #    points to the left of the vertical line". Here, 'best' means
1364    #    that the last point of the chain is a as low as possible (minimal
1365    #    Y coordinate).
1366    #   * `table[N]` is `(y, point_idx)` where `point_idx` is the index of
1367    #     the last point in the chain and `y` is its Y coordinate
1368    #   * A key invariant is that the Y values in the table are
1369    #     monotonically increasing
1370    #  * Thanks to these properties, the table can be used to answer the
1371    #    question "What is the longest chain that can be built to the left
1372    #    of the vertical line using only points below a certain Y value",
1373    #    using a binary search over the table.
1374    #  * The algorithm also builds a backlink structure in which every point
1375    #    links back to the previous point on a best (longest) chain ending
1376    #    at that point
1377    #
1378    # The core loop of the algorithm sweeps the line and updates the table
1379    # and backlink structure for every point that we cross during the sweep.
1380    # Therefore, the algorithm is trivially O(M log M) in the number of
1381    # points.
1382    candidates.sort(key=lambda candidate: (candidate[0], -candidate[1]))
1383
1384    backlinks = []
1385    table_rhs_idx = []
1386    table_candidate_idx = []
1387    for _, rhs_idx in candidates:
1388        candidate_idx = len(backlinks)
1389        ti = bisect.bisect_left(table_rhs_idx, rhs_idx)
1390
1391        # Update the table to record a best chain ending in the current point.
1392        # There always is one, and if any of the previously visited points had
1393        # a higher Y coordinate, then there is always a previously recorded best
1394        # chain that can be improved upon by using the current point.
1395        #
1396        # There is only one case where there is some ambiguity. If the
1397        # pre-existing entry table[ti] has the same Y coordinate / rhs_idx as
1398        # the current point (this can only happen if the same line appeared
1399        # multiple times on the LHS), then we could choose to keep the
1400        # previously recorded best chain instead. That would bias the algorithm
1401        # differently but should have no systematic impact on the quality of the
1402        # result.
1403        if ti < len(table_rhs_idx):
1404            table_rhs_idx[ti] = rhs_idx
1405            table_candidate_idx[ti] = candidate_idx
1406        else:
1407            table_rhs_idx.append(rhs_idx)
1408            table_candidate_idx.append(candidate_idx)
1409        if ti > 0:
1410            backlinks.append(table_candidate_idx[ti - 1])
1411        else:
1412            backlinks.append(None)
1413
1414    # Commit to names in the matching by walking the backlinks. Recursively
1415    # attempt to fill in more matches in-between.
1416    match_idx = table_candidate_idx[-1]
1417    while match_idx is not None:
1418        current = candidates[match_idx]
1419        matches.append(current)
1420        match_idx = backlinks[match_idx]
1421
1422    matches.reverse()
1423    return matches
1424
1425
1426VARIABLE_TAG = "[[@@]]"
1427METAVAR_RE = re.compile(r"\[\[([A-Z0-9_]+)(?::[^]]+)?\]\]")
1428NUMERIC_SUFFIX_RE = re.compile(r"[0-9]*$")
1429
1430
1431class TestVar:
1432    def __init__(self, nameless_value: NamelessValue, prefix: str, suffix: str):
1433        self._nameless_value = nameless_value
1434
1435        self._prefix = prefix
1436        self._suffix = suffix
1437
1438    def seen(self, nameless_value: NamelessValue, prefix: str, suffix: str):
1439        if prefix != self._prefix:
1440            self._prefix = ""
1441        if suffix != self._suffix:
1442            self._suffix = ""
1443
1444    def get_variable_name(self, text):
1445        return self._nameless_value.get_value_name(
1446            text, self._nameless_value.check_prefix
1447        )
1448
1449    def get_def(self, name, prefix, suffix):
1450        if self._nameless_value.is_number:
1451            return f"{prefix}[[#{name}:]]{suffix}"
1452        if self._prefix:
1453            assert self._prefix == prefix
1454            prefix = ""
1455        if self._suffix:
1456            assert self._suffix == suffix
1457            suffix = ""
1458        return f"{prefix}[[{name}:{self._prefix}{self._nameless_value.get_ir_regex()}{self._suffix}]]{suffix}"
1459
1460    def get_use(self, name, prefix, suffix):
1461        if self._nameless_value.is_number:
1462            return f"{prefix}[[#{name}]]{suffix}"
1463        if self._prefix:
1464            assert self._prefix == prefix
1465            prefix = ""
1466        if self._suffix:
1467            assert self._suffix == suffix
1468            suffix = ""
1469        return f"{prefix}[[{name}]]{suffix}"
1470
1471
1472class CheckValueInfo:
1473    def __init__(
1474        self,
1475        key,
1476        text,
1477        name: str,
1478        prefix: str,
1479        suffix: str,
1480    ):
1481        # Key for the value, e.g. '%'
1482        self.key = key
1483
1484        # Text to be matched by the FileCheck variable (without any prefix or suffix)
1485        self.text = text
1486
1487        # Name of the FileCheck variable
1488        self.name = name
1489
1490        # Prefix and suffix that were captured by the NamelessValue regular expression
1491        self.prefix = prefix
1492        self.suffix = suffix
1493
1494
1495# Represent a check line in a way that allows us to compare check lines while
1496# ignoring some or all of the FileCheck variable names.
1497class CheckLineInfo:
1498    def __init__(self, line, values):
1499        # Line with all FileCheck variable name occurrences replaced by VARIABLE_TAG
1500        self.line: str = line
1501
1502        # Information on each FileCheck variable name occurrences in the line
1503        self.values: List[CheckValueInfo] = values
1504
1505    def __repr__(self):
1506        return f"CheckLineInfo(line={self.line}, self.values={self.values})"
1507
1508
1509def remap_metavar_names(
1510    old_line_infos: List[CheckLineInfo],
1511    new_line_infos: List[CheckLineInfo],
1512    committed_names: Set[str],
1513) -> Mapping[str, str]:
1514    """
1515    Map all FileCheck variable names that appear in new_line_infos to new
1516    FileCheck variable names in an attempt to reduce the diff from old_line_infos
1517    to new_line_infos.
1518
1519    This is done by:
1520    * Matching old check lines and new check lines using a diffing algorithm
1521      applied after replacing names with wildcards.
1522    * Committing to variable names such that the matched lines become equal
1523      (without wildcards) if possible
1524    * This is done recursively to handle cases where many lines are equal
1525      after wildcard replacement
1526    """
1527    # Initialize uncommitted identity mappings
1528    new_mapping = {}
1529    for line in new_line_infos:
1530        for value in line.values:
1531            new_mapping[value.name] = value.name
1532
1533    # Recursively commit to the identity mapping or find a better one
1534    def recurse(old_begin, old_end, new_begin, new_end):
1535        if old_begin == old_end or new_begin == new_end:
1536            return
1537
1538        # Find a matching of lines where uncommitted names are replaced
1539        # with a placeholder.
1540        def diffify_line(line, mapper):
1541            values = []
1542            for value in line.values:
1543                mapped = mapper(value.name)
1544                values.append(mapped if mapped in committed_names else "?")
1545            return line.line.strip() + " @@@ " + " @ ".join(values)
1546
1547        lhs_lines = [
1548            diffify_line(line, lambda x: x)
1549            for line in old_line_infos[old_begin:old_end]
1550        ]
1551        rhs_lines = [
1552            diffify_line(line, lambda x: new_mapping[x])
1553            for line in new_line_infos[new_begin:new_end]
1554        ]
1555
1556        candidate_matches = find_diff_matching(lhs_lines, rhs_lines)
1557
1558        candidate_matches = [
1559            (old_begin + lhs_idx, new_begin + rhs_idx)
1560            for lhs_idx, rhs_idx in candidate_matches
1561        ]
1562
1563        # Candidate matches may conflict if they require conflicting mappings of
1564        # names. We want to determine a large set of compatible candidates,
1565        # because that leads to a small diff.
1566        #
1567        # We think of the candidates as vertices in a conflict graph. The
1568        # conflict graph has edges between incompatible candidates. We want to
1569        # find a large independent set in this graph.
1570        #
1571        # Greedily selecting candidates and removing incompatible ones has the
1572        # disadvantage that making few bad decisions early on can have huge
1573        # consequences.
1574        #
1575        # Instead, we implicitly compute multiple independent sets by greedily
1576        # assigning a *coloring* to the conflict graph. Then, we select the
1577        # largest color class (which is the largest independent set we found),
1578        # commit to all candidates in it, and recurse.
1579        #
1580        # Note that we don't actually materialize the conflict graph. Instead,
1581        # each color class tracks the information needed to decide implicitly
1582        # whether a vertex conflicts (has an edge to) any of the vertices added
1583        # to the color class so far.
1584        class Color:
1585            def __init__(self):
1586                # (lhs_idx, rhs_idx) of matches in this color
1587                self.matches = []
1588
1589                # rhs_name -> lhs_name mappings required by this color
1590                self.mapping = {}
1591
1592                # lhs_names committed for this color
1593                self.committed = set()
1594
1595        colors = []
1596
1597        for lhs_idx, rhs_idx in candidate_matches:
1598            lhs_line = old_line_infos[lhs_idx]
1599            rhs_line = new_line_infos[rhs_idx]
1600
1601            # We scan through the uncommitted names in the candidate line and
1602            # filter out the color classes to which the candidate could be
1603            # assigned.
1604            #
1605            # Simultaneously, we prepare a new color class in case the candidate
1606            # conflicts with all colors that have been established so far.
1607            compatible_colors = colors[:]
1608            new_color = Color()
1609            new_color.matches.append((lhs_idx, rhs_idx))
1610
1611            for lhs_value, rhs_value in zip(lhs_line.values, rhs_line.values):
1612                if new_mapping[rhs_value.name] in committed_names:
1613                    # The new value has already been committed. If it was mapped
1614                    # to the same name as the original value, we can consider
1615                    # committing other values from this line. Otherwise, we
1616                    # should ignore this line.
1617                    if new_mapping[rhs_value.name] == lhs_value.name:
1618                        continue
1619                    else:
1620                        break
1621
1622                if rhs_value.name in new_color.mapping:
1623                    # Same, but for a possible commit happening on the same line
1624                    if new_color.mapping[rhs_value.name] == lhs_value.name:
1625                        continue
1626                    else:
1627                        break
1628
1629                if (
1630                    lhs_value.name in committed_names
1631                    or lhs_value.name in new_color.committed
1632                ):
1633                    # We can't map this value because the name we would map it
1634                    # to has already been committed for something else. Give up
1635                    # on this line.
1636                    break
1637
1638                new_color.mapping[rhs_value.name] = lhs_value.name
1639                new_color.committed.add(lhs_value.name)
1640
1641                color_idx = 0
1642                while color_idx < len(compatible_colors):
1643                    color = compatible_colors[color_idx]
1644                    compatible = True
1645                    if rhs_value.name in color.mapping:
1646                        compatible = color.mapping[rhs_value.name] == lhs_value.name
1647                    else:
1648                        compatible = lhs_value.name not in color.committed
1649                    if compatible:
1650                        color_idx += 1
1651                    else:
1652                        del compatible_colors[color_idx]
1653            else:
1654                # We never broke out of the loop, which means that at a minimum,
1655                # this line is viable standalone
1656                if compatible_colors:
1657                    color = max(compatible_colors, key=lambda color: len(color.matches))
1658                    color.mapping.update(new_color.mapping)
1659                    color.committed.update(new_color.committed)
1660                    color.matches.append((lhs_idx, rhs_idx))
1661                else:
1662                    colors.append(new_color)
1663
1664        if colors:
1665            # Pick the largest color class. This gives us a large independent
1666            # (non-conflicting) set of candidate matches. Assign all names
1667            # required by the independent set and recurse.
1668            max_color = max(colors, key=lambda color: len(color.matches))
1669
1670            for rhs_var, lhs_var in max_color.mapping.items():
1671                new_mapping[rhs_var] = lhs_var
1672                committed_names.add(lhs_var)
1673
1674                if (
1675                    lhs_var != rhs_var
1676                    and lhs_var in new_mapping
1677                    and new_mapping[lhs_var] == lhs_var
1678                ):
1679                    new_mapping[lhs_var] = "conflict_" + lhs_var
1680
1681            matches = (
1682                [(old_begin - 1, new_begin - 1)]
1683                + max_color.matches
1684                + [(old_end, new_end)]
1685            )
1686
1687            for (lhs_prev, rhs_prev), (lhs_next, rhs_next) in zip(matches, matches[1:]):
1688                recurse(lhs_prev + 1, lhs_next, rhs_prev + 1, rhs_next)
1689
1690    recurse(0, len(old_line_infos), 0, len(new_line_infos))
1691
1692    # Commit to remaining names and resolve conflicts
1693    for new_name, mapped_name in new_mapping.items():
1694        if mapped_name in committed_names:
1695            continue
1696        if not mapped_name.startswith("conflict_"):
1697            assert mapped_name == new_name
1698            committed_names.add(mapped_name)
1699
1700    for new_name, mapped_name in new_mapping.items():
1701        if mapped_name in committed_names:
1702            continue
1703        assert mapped_name.startswith("conflict_")
1704
1705        m = NUMERIC_SUFFIX_RE.search(new_name)
1706        base_name = new_name[: m.start()]
1707        suffix = int(new_name[m.start() :]) if m.start() != m.end() else 1
1708        while True:
1709            candidate = f"{base_name}{suffix}"
1710            if candidate not in committed_names:
1711                new_mapping[new_name] = candidate
1712                committed_names.add(candidate)
1713                break
1714            suffix += 1
1715
1716    return new_mapping
1717
1718
1719def generalize_check_lines(
1720    lines,
1721    ginfo: GeneralizerInfo,
1722    vars_seen,
1723    global_vars_seen,
1724    preserve_names=False,
1725    original_check_lines=None,
1726    *,
1727    unstable_globals_only=False,
1728):
1729    if unstable_globals_only:
1730        regexp = ginfo.get_unstable_globals_regexp()
1731    else:
1732        regexp = ginfo.get_regexp()
1733
1734    multiple_braces_re = re.compile(r"({{+)|(}}+)")
1735
1736    def escape_braces(match_obj):
1737        return "{{" + re.escape(match_obj.group(0)) + "}}"
1738
1739    if ginfo.is_ir():
1740        for i, line in enumerate(lines):
1741            # An IR variable named '%.' matches the FileCheck regex string.
1742            line = line.replace("%.", "%dot")
1743            for regex in _global_hex_value_regex:
1744                if re.match("^@" + regex + " = ", line):
1745                    line = re.sub(
1746                        r"\bi([0-9]+) ([0-9]+)",
1747                        lambda m: "i"
1748                        + m.group(1)
1749                        + " [[#"
1750                        + hex(int(m.group(2)))
1751                        + "]]",
1752                        line,
1753                    )
1754                    break
1755            # Ignore any comments, since the check lines will too.
1756            scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line)
1757            lines[i] = scrubbed_line
1758
1759    if not preserve_names:
1760        committed_names = set(
1761            test_var.get_variable_name(name)
1762            for (name, _), test_var in vars_seen.items()
1763        )
1764        defs = set()
1765
1766        # Collect information about new check lines, and generalize global reference
1767        new_line_infos = []
1768        for line in lines:
1769            filtered_line = ""
1770            values = []
1771            while True:
1772                m = regexp.search(line)
1773                if m is None:
1774                    filtered_line += line
1775                    break
1776
1777                name = ginfo.get_name_from_match(m)
1778                nameless_value = ginfo.get_nameless_value_from_match(m)
1779                prefix, suffix = nameless_value.get_affixes_from_match(m)
1780                if may_clash_with_default_check_prefix_name(
1781                    nameless_value.check_prefix, name
1782                ):
1783                    warn(
1784                        "Change IR value name '%s' or use --prefix-filecheck-ir-name to prevent possible conflict"
1785                        " with scripted FileCheck name." % (name,)
1786                    )
1787
1788                # Record the variable as seen and (for locals) accumulate
1789                # prefixes/suffixes
1790                is_local_def = nameless_value.is_local_def_ir_value()
1791                if is_local_def:
1792                    vars_dict = vars_seen
1793                else:
1794                    vars_dict = global_vars_seen
1795
1796                key = (name, nameless_value.check_key)
1797
1798                if is_local_def:
1799                    test_prefix = prefix
1800                    test_suffix = suffix
1801                else:
1802                    test_prefix = ""
1803                    test_suffix = ""
1804
1805                if key in vars_dict:
1806                    vars_dict[key].seen(nameless_value, test_prefix, test_suffix)
1807                else:
1808                    vars_dict[key] = TestVar(nameless_value, test_prefix, test_suffix)
1809                    defs.add(key)
1810
1811                var = vars_dict[key].get_variable_name(name)
1812
1813                # Replace with a [[@@]] tag, but be sure to keep the spaces and commas.
1814                filtered_line += (
1815                    line[: m.start()] + m.group(1) + VARIABLE_TAG + m.group(m.lastindex)
1816                )
1817                line = line[m.end() :]
1818
1819                values.append(
1820                    CheckValueInfo(
1821                        key=nameless_value.check_key,
1822                        text=name,
1823                        name=var,
1824                        prefix=prefix,
1825                        suffix=suffix,
1826                    )
1827                )
1828
1829            new_line_infos.append(CheckLineInfo(filtered_line, values))
1830
1831        committed_names.update(
1832            test_var.get_variable_name(name)
1833            for (name, _), test_var in global_vars_seen.items()
1834        )
1835
1836        # Collect information about original check lines, if any.
1837        orig_line_infos = []
1838        for line in original_check_lines or []:
1839            filtered_line = ""
1840            values = []
1841            while True:
1842                m = METAVAR_RE.search(line)
1843                if m is None:
1844                    filtered_line += line
1845                    break
1846
1847                # Replace with a [[@@]] tag, but be sure to keep the spaces and commas.
1848                filtered_line += line[: m.start()] + VARIABLE_TAG
1849                line = line[m.end() :]
1850                values.append(
1851                    CheckValueInfo(
1852                        key=None,
1853                        text=None,
1854                        name=m.group(1),
1855                        prefix="",
1856                        suffix="",
1857                    )
1858                )
1859            orig_line_infos.append(CheckLineInfo(filtered_line, values))
1860
1861        # Compute the variable name mapping
1862        mapping = remap_metavar_names(orig_line_infos, new_line_infos, committed_names)
1863
1864        # Apply the variable name mapping
1865        for i, line_info in enumerate(new_line_infos):
1866            line_template = line_info.line
1867            line = ""
1868
1869            for value in line_info.values:
1870                idx = line_template.find(VARIABLE_TAG)
1871                line += line_template[:idx]
1872                line_template = line_template[idx + len(VARIABLE_TAG) :]
1873
1874                key = (value.text, value.key)
1875                if value.key == "%":
1876                    vars_dict = vars_seen
1877                else:
1878                    vars_dict = global_vars_seen
1879
1880                if key in defs:
1881                    line += vars_dict[key].get_def(
1882                        mapping[value.name], value.prefix, value.suffix
1883                    )
1884                    defs.remove(key)
1885                else:
1886                    line += vars_dict[key].get_use(
1887                        mapping[value.name], value.prefix, value.suffix
1888                    )
1889
1890            line += line_template
1891
1892            lines[i] = line
1893
1894    if ginfo.is_analyze():
1895        for i, _ in enumerate(lines):
1896            # Escape multiple {{ or }} as {{}} denotes a FileCheck regex.
1897            scrubbed_line = multiple_braces_re.sub(escape_braces, lines[i])
1898            lines[i] = scrubbed_line
1899
1900    return lines
1901
1902
1903def add_checks(
1904    output_lines,
1905    comment_marker,
1906    prefix_list,
1907    func_dict,
1908    func_name,
1909    check_label_format,
1910    ginfo,
1911    global_vars_seen_dict,
1912    is_filtered,
1913    preserve_names=False,
1914    original_check_lines: Mapping[str, List[str]] = {},
1915):
1916    # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well.
1917    prefix_exclusions = set()
1918    printed_prefixes = []
1919    for p in prefix_list:
1920        checkprefixes = p[0]
1921        # If not all checkprefixes of this run line produced the function we cannot check for it as it does not
1922        # exist for this run line. A subset of the check prefixes might know about the function but only because
1923        # other run lines created it.
1924        if any(
1925            map(
1926                lambda checkprefix: func_name not in func_dict[checkprefix],
1927                checkprefixes,
1928            )
1929        ):
1930            prefix_exclusions |= set(checkprefixes)
1931            continue
1932
1933    # prefix_exclusions is constructed, we can now emit the output
1934    for p in prefix_list:
1935        global_vars_seen = {}
1936        checkprefixes = p[0]
1937        for checkprefix in checkprefixes:
1938            if checkprefix in global_vars_seen_dict:
1939                global_vars_seen.update(global_vars_seen_dict[checkprefix])
1940            else:
1941                global_vars_seen_dict[checkprefix] = {}
1942            if checkprefix in printed_prefixes:
1943                break
1944
1945            # Check if the prefix is excluded.
1946            if checkprefix in prefix_exclusions:
1947                continue
1948
1949            # If we do not have output for this prefix we skip it.
1950            if not func_dict[checkprefix][func_name]:
1951                continue
1952
1953            # Add some space between different check prefixes, but not after the last
1954            # check line (before the test code).
1955            if ginfo.is_asm():
1956                if len(printed_prefixes) != 0:
1957                    output_lines.append(comment_marker)
1958
1959            if checkprefix not in global_vars_seen_dict:
1960                global_vars_seen_dict[checkprefix] = {}
1961
1962            global_vars_seen_before = [key for key in global_vars_seen.keys()]
1963
1964            vars_seen = {}
1965            printed_prefixes.append(checkprefix)
1966            attrs = str(func_dict[checkprefix][func_name].attrs)
1967            attrs = "" if attrs == "None" else attrs
1968            if ginfo.get_version() > 1:
1969                funcdef_attrs_and_ret = func_dict[checkprefix][
1970                    func_name
1971                ].funcdef_attrs_and_ret
1972            else:
1973                funcdef_attrs_and_ret = ""
1974
1975            if attrs:
1976                output_lines.append(
1977                    "%s %s: Function Attrs: %s" % (comment_marker, checkprefix, attrs)
1978                )
1979            args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig)
1980            if args_and_sig:
1981                args_and_sig = generalize_check_lines(
1982                    [args_and_sig],
1983                    ginfo,
1984                    vars_seen,
1985                    global_vars_seen,
1986                    preserve_names,
1987                    original_check_lines=[],
1988                )[0]
1989            func_name_separator = func_dict[checkprefix][func_name].func_name_separator
1990            if "[[" in args_and_sig:
1991                # Captures in label lines are not supported, thus split into a -LABEL
1992                # and a separate -SAME line that contains the arguments with captures.
1993                args_and_sig_prefix = ""
1994                if ginfo.get_version() >= 3 and args_and_sig.startswith("("):
1995                    # Ensure the "(" separating function name and arguments is in the
1996                    # label line. This is required in case of function names that are
1997                    # prefixes of each other. Otherwise, the label line for "foo" might
1998                    # incorrectly match on "foo.specialized".
1999                    args_and_sig_prefix = args_and_sig[0]
2000                    args_and_sig = args_and_sig[1:]
2001
2002                # Removing args_and_sig from the label match line requires
2003                # func_name_separator to be empty. Otherwise, the match will not work.
2004                assert func_name_separator == ""
2005                output_lines.append(
2006                    check_label_format
2007                    % (
2008                        checkprefix,
2009                        funcdef_attrs_and_ret,
2010                        func_name,
2011                        args_and_sig_prefix,
2012                        func_name_separator,
2013                    )
2014                )
2015                output_lines.append(
2016                    "%s %s-SAME: %s" % (comment_marker, checkprefix, args_and_sig)
2017                )
2018            else:
2019                output_lines.append(
2020                    check_label_format
2021                    % (
2022                        checkprefix,
2023                        funcdef_attrs_and_ret,
2024                        func_name,
2025                        args_and_sig,
2026                        func_name_separator,
2027                    )
2028                )
2029            func_body = str(func_dict[checkprefix][func_name]).splitlines()
2030            if not func_body:
2031                # We have filtered everything.
2032                continue
2033
2034            # For ASM output, just emit the check lines.
2035            if ginfo.is_asm():
2036                body_start = 1
2037                if is_filtered:
2038                    # For filtered output we don't add "-NEXT" so don't add extra spaces
2039                    # before the first line.
2040                    body_start = 0
2041                else:
2042                    output_lines.append(
2043                        "%s %s:       %s" % (comment_marker, checkprefix, func_body[0])
2044                    )
2045                func_lines = generalize_check_lines(
2046                    func_body[body_start:], ginfo, vars_seen, global_vars_seen
2047                )
2048                for func_line in func_lines:
2049                    if func_line.strip() == "":
2050                        output_lines.append(
2051                            "%s %s-EMPTY:" % (comment_marker, checkprefix)
2052                        )
2053                    else:
2054                        check_suffix = "-NEXT" if not is_filtered else ""
2055                        output_lines.append(
2056                            "%s %s%s:  %s"
2057                            % (comment_marker, checkprefix, check_suffix, func_line)
2058                        )
2059                # Remember new global variables we have not seen before
2060                for key in global_vars_seen:
2061                    if key not in global_vars_seen_before:
2062                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2063                break
2064            # For analyze output, generalize the output, and emit CHECK-EMPTY lines as well.
2065            elif ginfo.is_analyze():
2066                func_body = generalize_check_lines(
2067                    func_body, ginfo, vars_seen, global_vars_seen
2068                )
2069                for func_line in func_body:
2070                    if func_line.strip() == "":
2071                        output_lines.append(
2072                            "{} {}-EMPTY:".format(comment_marker, checkprefix)
2073                        )
2074                    else:
2075                        check_suffix = "-NEXT" if not is_filtered else ""
2076                        output_lines.append(
2077                            "{} {}{}:  {}".format(
2078                                comment_marker, checkprefix, check_suffix, func_line
2079                            )
2080                        )
2081
2082                # Add space between different check prefixes and also before the first
2083                # line of code in the test function.
2084                output_lines.append(comment_marker)
2085
2086                # Remember new global variables we have not seen before
2087                for key in global_vars_seen:
2088                    if key not in global_vars_seen_before:
2089                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2090                break
2091            # For IR output, change all defs to FileCheck variables, so we're immune
2092            # to variable naming fashions.
2093            else:
2094                func_body = generalize_check_lines(
2095                    func_body,
2096                    ginfo,
2097                    vars_seen,
2098                    global_vars_seen,
2099                    preserve_names,
2100                    original_check_lines=original_check_lines.get(checkprefix),
2101                )
2102
2103                # This could be selectively enabled with an optional invocation argument.
2104                # Disabled for now: better to check everything. Be safe rather than sorry.
2105
2106                # Handle the first line of the function body as a special case because
2107                # it's often just noise (a useless asm comment or entry label).
2108                # if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
2109                #  is_blank_line = True
2110                # else:
2111                #  output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
2112                #  is_blank_line = False
2113
2114                is_blank_line = False
2115
2116                for func_line in func_body:
2117                    if func_line.strip() == "":
2118                        is_blank_line = True
2119                        continue
2120                    # Do not waste time checking IR comments.
2121                    func_line = SCRUB_IR_COMMENT_RE.sub(r"", func_line)
2122
2123                    # Skip blank lines instead of checking them.
2124                    if is_blank_line:
2125                        output_lines.append(
2126                            "{} {}:       {}".format(
2127                                comment_marker, checkprefix, func_line
2128                            )
2129                        )
2130                    else:
2131                        check_suffix = "-NEXT" if not is_filtered else ""
2132                        output_lines.append(
2133                            "{} {}{}:  {}".format(
2134                                comment_marker, checkprefix, check_suffix, func_line
2135                            )
2136                        )
2137                    is_blank_line = False
2138
2139                # Add space between different check prefixes and also before the first
2140                # line of code in the test function.
2141                output_lines.append(comment_marker)
2142
2143                # Remember new global variables we have not seen before
2144                for key in global_vars_seen:
2145                    if key not in global_vars_seen_before:
2146                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2147                break
2148    return printed_prefixes
2149
2150
2151def add_ir_checks(
2152    output_lines,
2153    comment_marker,
2154    prefix_list,
2155    func_dict,
2156    func_name,
2157    preserve_names,
2158    function_sig,
2159    ginfo: GeneralizerInfo,
2160    global_vars_seen_dict,
2161    is_filtered,
2162    original_check_lines={},
2163):
2164    assert ginfo.is_ir()
2165    # Label format is based on IR string.
2166    if function_sig and ginfo.get_version() > 1:
2167        function_def_regex = "define %s"
2168    elif function_sig:
2169        function_def_regex = "define {{[^@]+}}%s"
2170    else:
2171        function_def_regex = "%s"
2172    check_label_format = "{} %s-LABEL: {}@%s%s%s".format(
2173        comment_marker, function_def_regex
2174    )
2175    return add_checks(
2176        output_lines,
2177        comment_marker,
2178        prefix_list,
2179        func_dict,
2180        func_name,
2181        check_label_format,
2182        ginfo,
2183        global_vars_seen_dict,
2184        is_filtered,
2185        preserve_names,
2186        original_check_lines=original_check_lines,
2187    )
2188
2189
2190def add_analyze_checks(
2191    output_lines,
2192    comment_marker,
2193    prefix_list,
2194    func_dict,
2195    func_name,
2196    ginfo: GeneralizerInfo,
2197    is_filtered,
2198):
2199    assert ginfo.is_analyze()
2200    check_label_format = "{} %s-LABEL: '%s%s%s%s'".format(comment_marker)
2201    global_vars_seen_dict = {}
2202    return add_checks(
2203        output_lines,
2204        comment_marker,
2205        prefix_list,
2206        func_dict,
2207        func_name,
2208        check_label_format,
2209        ginfo,
2210        global_vars_seen_dict,
2211        is_filtered,
2212    )
2213
2214
2215def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes, ginfo):
2216    for nameless_value in ginfo.get_nameless_values():
2217        if nameless_value.global_ir_rhs_regexp is None:
2218            continue
2219
2220        lhs_re_str = nameless_value.ir_prefix + nameless_value.ir_regexp
2221        rhs_re_str = nameless_value.global_ir_rhs_regexp
2222
2223        global_ir_value_re_str = r"^" + lhs_re_str + r"\s=\s" + rhs_re_str + r"$"
2224        global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M))
2225        lines = []
2226        for m in global_ir_value_re.finditer(raw_tool_output):
2227            # Attach the substring's start index so that CHECK lines
2228            # can be sorted properly even if they are matched by different nameless values.
2229            # This is relevant for GLOB and GLOBNAMED since they may appear interlaced.
2230            lines.append((m.start(), m.group(0)))
2231
2232        for prefix in prefixes:
2233            if glob_val_dict[prefix] is None:
2234                continue
2235            if nameless_value.check_prefix in glob_val_dict[prefix]:
2236                if lines == glob_val_dict[prefix][nameless_value.check_prefix]:
2237                    continue
2238                if prefix == prefixes[-1]:
2239                    warn("Found conflicting asm under the same prefix: %r!" % (prefix,))
2240                else:
2241                    glob_val_dict[prefix][nameless_value.check_prefix] = None
2242                    continue
2243            glob_val_dict[prefix][nameless_value.check_prefix] = lines
2244
2245
2246def filter_globals_according_to_preference(
2247    global_val_lines_w_index, global_vars_seen, nameless_value, global_check_setting
2248):
2249    if global_check_setting == "none":
2250        return []
2251    if global_check_setting == "all":
2252        return global_val_lines_w_index
2253    assert global_check_setting == "smart"
2254
2255    if nameless_value.check_key == "#":
2256        # attribute sets are usually better checked by --check-attributes
2257        return []
2258
2259    def extract(line, nv):
2260        p = (
2261            "^"
2262            + nv.ir_prefix
2263            + "("
2264            + nv.ir_regexp
2265            + ") = ("
2266            + nv.global_ir_rhs_regexp
2267            + ")"
2268        )
2269        match = re.match(p, line)
2270        return (match.group(1), re.findall(nv.ir_regexp, match.group(2)))
2271
2272    transitively_visible = set()
2273    contains_refs_to = {}
2274
2275    def add(var):
2276        nonlocal transitively_visible
2277        nonlocal contains_refs_to
2278        if var in transitively_visible:
2279            return
2280        transitively_visible.add(var)
2281        if not var in contains_refs_to:
2282            return
2283        for x in contains_refs_to[var]:
2284            add(x)
2285
2286    for i, line in global_val_lines_w_index:
2287        (var, refs) = extract(line, nameless_value)
2288        contains_refs_to[var] = refs
2289    for var, check_key in global_vars_seen:
2290        if check_key != nameless_value.check_key:
2291            continue
2292        add(var)
2293    return [
2294        (i, line)
2295        for i, line in global_val_lines_w_index
2296        if extract(line, nameless_value)[0] in transitively_visible
2297    ]
2298
2299
2300METADATA_FILTERS = [
2301    (
2302        r"(?<=\")(.+ )?(\w+ version )[\d.]+(?:[^\" ]*)(?: \([^)]+\))?",
2303        r"{{.*}}\2{{.*}}",
2304    ),  # preface with glob also, to capture optional CLANG_VENDOR
2305    (r'(!DIFile\(filename: ".+", directory: )".+"', r"\1{{.*}}"),
2306]
2307METADATA_FILTERS_RE = [(re.compile(f), r) for (f, r) in METADATA_FILTERS]
2308
2309
2310def filter_unstable_metadata(line):
2311    for f, replacement in METADATA_FILTERS_RE:
2312        line = f.sub(replacement, line)
2313    return line
2314
2315
2316def flush_current_checks(output_lines, new_lines_w_index, comment_marker):
2317    if not new_lines_w_index:
2318        return
2319    output_lines.append(comment_marker + SEPARATOR)
2320    new_lines_w_index.sort()
2321    for _, line in new_lines_w_index:
2322        output_lines.append(line)
2323    new_lines_w_index.clear()
2324
2325
2326def add_global_checks(
2327    glob_val_dict,
2328    comment_marker,
2329    prefix_list,
2330    output_lines,
2331    ginfo: GeneralizerInfo,
2332    global_vars_seen_dict,
2333    preserve_names,
2334    is_before_functions,
2335    global_check_setting,
2336):
2337    printed_prefixes = set()
2338    output_lines_loc = {}  # Allows GLOB and GLOBNAMED to be sorted correctly
2339    for nameless_value in ginfo.get_nameless_values():
2340        if nameless_value.global_ir_rhs_regexp is None:
2341            continue
2342        if nameless_value.is_before_functions != is_before_functions:
2343            continue
2344        for p in prefix_list:
2345            global_vars_seen = {}
2346            checkprefixes = p[0]
2347            if checkprefixes is None:
2348                continue
2349            for checkprefix in checkprefixes:
2350                if checkprefix in global_vars_seen_dict:
2351                    global_vars_seen.update(global_vars_seen_dict[checkprefix])
2352                else:
2353                    global_vars_seen_dict[checkprefix] = {}
2354                if (checkprefix, nameless_value.check_prefix) in printed_prefixes:
2355                    break
2356                if not glob_val_dict[checkprefix]:
2357                    continue
2358                if nameless_value.check_prefix not in glob_val_dict[checkprefix]:
2359                    continue
2360                if not glob_val_dict[checkprefix][nameless_value.check_prefix]:
2361                    continue
2362
2363                check_lines = []
2364                global_vars_seen_before = [key for key in global_vars_seen.keys()]
2365                lines_w_index = glob_val_dict[checkprefix][nameless_value.check_prefix]
2366                lines_w_index = filter_globals_according_to_preference(
2367                    lines_w_index,
2368                    global_vars_seen_before,
2369                    nameless_value,
2370                    global_check_setting,
2371                )
2372                for i, line in lines_w_index:
2373                    if _global_value_regex:
2374                        matched = False
2375                        for regex in _global_value_regex:
2376                            if re.match("^@" + regex + " = ", line) or re.match(
2377                                "^!" + regex + " = ", line
2378                            ):
2379                                matched = True
2380                                break
2381                        if not matched:
2382                            continue
2383                    [new_line] = generalize_check_lines(
2384                        [line],
2385                        ginfo,
2386                        {},
2387                        global_vars_seen,
2388                        preserve_names,
2389                        unstable_globals_only=True,
2390                    )
2391                    new_line = filter_unstable_metadata(new_line)
2392                    check_line = "%s %s: %s" % (comment_marker, checkprefix, new_line)
2393                    check_lines.append((i, check_line))
2394                if not check_lines:
2395                    continue
2396
2397                if not checkprefix in output_lines_loc:
2398                    output_lines_loc[checkprefix] = []
2399                if not nameless_value.interlaced_with_previous:
2400                    flush_current_checks(
2401                        output_lines, output_lines_loc[checkprefix], comment_marker
2402                    )
2403                for check_line in check_lines:
2404                    output_lines_loc[checkprefix].append(check_line)
2405
2406                printed_prefixes.add((checkprefix, nameless_value.check_prefix))
2407
2408                # Remembe new global variables we have not seen before
2409                for key in global_vars_seen:
2410                    if key not in global_vars_seen_before:
2411                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2412                break
2413
2414    if printed_prefixes:
2415        for p in prefix_list:
2416            if p[0] is None:
2417                continue
2418            for checkprefix in p[0]:
2419                if checkprefix not in output_lines_loc:
2420                    continue
2421                flush_current_checks(
2422                    output_lines, output_lines_loc[checkprefix], comment_marker
2423                )
2424                break
2425        output_lines.append(comment_marker + SEPARATOR)
2426    return printed_prefixes
2427
2428
2429def check_prefix(prefix):
2430    if not PREFIX_RE.match(prefix):
2431        hint = ""
2432        if "," in prefix:
2433            hint = " Did you mean '--check-prefixes=" + prefix + "'?"
2434        warn(
2435            (
2436                "Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores."
2437                + hint
2438            )
2439            % (prefix)
2440        )
2441
2442
2443def get_check_prefixes(filecheck_cmd):
2444    check_prefixes = [
2445        item
2446        for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
2447        for item in m.group(1).split(",")
2448    ]
2449    if not check_prefixes:
2450        check_prefixes = ["CHECK"]
2451    return check_prefixes
2452
2453
2454def verify_filecheck_prefixes(fc_cmd):
2455    fc_cmd_parts = fc_cmd.split()
2456    for part in fc_cmd_parts:
2457        if "check-prefix=" in part:
2458            prefix = part.split("=", 1)[1]
2459            check_prefix(prefix)
2460        elif "check-prefixes=" in part:
2461            prefixes = part.split("=", 1)[1].split(",")
2462            for prefix in prefixes:
2463                check_prefix(prefix)
2464                if prefixes.count(prefix) > 1:
2465                    warn(
2466                        "Supplied prefix '%s' is not unique in the prefix list."
2467                        % (prefix,)
2468                    )
2469
2470
2471def get_autogennote_suffix(parser, args):
2472    autogenerated_note_args = ""
2473    for action in parser._actions:
2474        if not hasattr(args, action.dest):
2475            continue  # Ignore options such as --help that aren't included in args
2476        # Ignore parameters such as paths to the binary or the list of tests
2477        if action.dest in (
2478            "tests",
2479            "update_only",
2480            "tool_binary",
2481            "opt_binary",
2482            "llc_binary",
2483            "clang",
2484            "opt",
2485            "llvm_bin",
2486            "verbose",
2487            "force_update",
2488            "reset_variable_names",
2489            "llvm_mc_binary",
2490        ):
2491            continue
2492        value = getattr(args, action.dest)
2493        if action.dest == "check_globals":
2494            default_value = "none" if args.version < 4 else "smart"
2495            if value == default_value:
2496                continue
2497            autogenerated_note_args += action.option_strings[0] + " "
2498            if args.version < 4 and value == "all":
2499                continue
2500            autogenerated_note_args += "%s " % value
2501            continue
2502        if action.const is not None:  # action stores a constant (usually True/False)
2503            # Skip actions with different constant values (this happens with boolean
2504            # --foo/--no-foo options)
2505            if value != action.const:
2506                continue
2507        if parser.get_default(action.dest) == value:
2508            continue  # Don't add default values
2509        if action.dest == "function_signature" and args.version >= 2:
2510            continue  # Enabled by default in version 2
2511        if action.dest == "filters":
2512            # Create a separate option for each filter element.  The value is a list
2513            # of Filter objects.
2514            for elem in value:
2515                opt_name = "filter-out" if elem.is_filter_out else "filter"
2516                opt_value = elem.pattern()
2517                new_arg = '--%s "%s" ' % (opt_name, opt_value.strip('"'))
2518                if new_arg not in autogenerated_note_args:
2519                    autogenerated_note_args += new_arg
2520        else:
2521            autogenerated_note_args += action.option_strings[0] + " "
2522            if action.const is None:  # action takes a parameter
2523                if action.nargs == "+":
2524                    value = " ".join(map(lambda v: '"' + v.strip('"') + '"', value))
2525                autogenerated_note_args += "%s " % value
2526    if autogenerated_note_args:
2527        autogenerated_note_args = " %s %s" % (
2528            UTC_ARGS_KEY,
2529            autogenerated_note_args[:-1],
2530        )
2531    return autogenerated_note_args
2532
2533
2534def check_for_command(line, parser, args, argv, argparse_callback):
2535    cmd_m = UTC_ARGS_CMD.match(line)
2536    if cmd_m:
2537        for option in shlex.split(cmd_m.group("cmd").strip()):
2538            if option:
2539                argv.append(option)
2540        args = parse_args(parser, filter(lambda arg: arg not in args.tests, argv))
2541        if argparse_callback is not None:
2542            argparse_callback(args)
2543    return args, argv
2544
2545
2546def find_arg_in_test(test_info, get_arg_to_check, arg_string, is_global):
2547    result = get_arg_to_check(test_info.args)
2548    if not result and is_global:
2549        # See if this has been specified via UTC_ARGS.  This is a "global" option
2550        # that affects the entire generation of test checks.  If it exists anywhere
2551        # in the test, apply it to everything.
2552        saw_line = False
2553        for line_info in test_info.ro_iterlines():
2554            line = line_info.line
2555            if not line.startswith(";") and line.strip() != "":
2556                saw_line = True
2557            result = get_arg_to_check(line_info.args)
2558            if result:
2559                if warn and saw_line:
2560                    # We saw the option after already reading some test input lines.
2561                    # Warn about it.
2562                    print(
2563                        "WARNING: Found {} in line following test start: ".format(
2564                            arg_string
2565                        )
2566                        + line,
2567                        file=sys.stderr,
2568                    )
2569                    print(
2570                        "WARNING: Consider moving {} to top of file".format(arg_string),
2571                        file=sys.stderr,
2572                    )
2573                break
2574    return result
2575
2576
2577def dump_input_lines(output_lines, test_info, prefix_set, comment_string):
2578    for input_line_info in test_info.iterlines(output_lines):
2579        line = input_line_info.line
2580        args = input_line_info.args
2581        if line.strip() == comment_string:
2582            continue
2583        if line.strip() == comment_string + SEPARATOR:
2584            continue
2585        if line.lstrip().startswith(comment_string):
2586            m = CHECK_RE.match(line)
2587            if m and m.group(1) in prefix_set:
2588                continue
2589        output_lines.append(line.rstrip("\n"))
2590
2591
2592def add_checks_at_end(
2593    output_lines, prefix_list, func_order, comment_string, check_generator
2594):
2595    added = set()
2596    generated_prefixes = set()
2597    for prefix in prefix_list:
2598        prefixes = prefix[0]
2599        tool_args = prefix[1]
2600        for prefix in prefixes:
2601            for func in func_order[prefix]:
2602                # The func order can contain the same functions multiple times.
2603                # If we see one again we are done.
2604                if (func, prefix) in added:
2605                    continue
2606                if added:
2607                    output_lines.append(comment_string)
2608
2609                # The add_*_checks routines expect a run list whose items are
2610                # tuples that have a list of prefixes as their first element and
2611                # tool command args string as their second element.  They output
2612                # checks for each prefix in the list of prefixes.  By doing so, it
2613                # implicitly assumes that for each function every run line will
2614                # generate something for that function.  That is not the case for
2615                # generated functions as some run lines might not generate them
2616                # (e.g. -fopenmp vs. no -fopenmp).
2617                #
2618                # Therefore, pass just the prefix we're interested in.  This has
2619                # the effect of generating all of the checks for functions of a
2620                # single prefix before moving on to the next prefix.  So checks
2621                # are ordered by prefix instead of by function as in "normal"
2622                # mode.
2623                for generated_prefix in check_generator(
2624                    output_lines, [([prefix], tool_args)], func
2625                ):
2626                    added.add((func, generated_prefix))
2627                    generated_prefixes.add(generated_prefix)
2628    return generated_prefixes
2629