xref: /llvm-project/llvm/utils/UpdateTestChecks/common.py (revision f4737a2edd900df661750116821806bb45e4086a)
1from __future__ import print_function
2
3import argparse
4import bisect
5import collections
6import copy
7import glob
8import itertools
9import os
10import re
11import subprocess
12import sys
13import shlex
14
15from typing import List, Mapping, Set
16
17##### Common utilities for update_*test_checks.py
18
19
20_verbose = False
21_prefix_filecheck_ir_name = ""
22
23"""
24Version changelog:
25
261: Initial version, used by tests that don't specify --version explicitly.
272: --function-signature is now enabled by default and also checks return
28   type/attributes.
293: Opening parenthesis of function args is kept on the first LABEL line
30   in case arguments are split to a separate SAME line.
314: --check-globals now has a third option ('smart'). The others are now called
32   'none' and 'all'. 'smart' is the default.
33"""
34DEFAULT_VERSION = 4
35
36
37SUPPORTED_ANALYSES = {
38    "Branch Probability Analysis",
39    "Cost Model Analysis",
40    "Loop Access Analysis",
41    "Scalar Evolution Analysis",
42}
43
44
45class Regex(object):
46    """Wrap a compiled regular expression object to allow deep copy of a regexp.
47    This is required for the deep copy done in do_scrub.
48
49    """
50
51    def __init__(self, regex):
52        self.regex = regex
53
54    def __deepcopy__(self, memo):
55        result = copy.copy(self)
56        result.regex = self.regex
57        return result
58
59    def search(self, line):
60        return self.regex.search(line)
61
62    def sub(self, repl, line):
63        return self.regex.sub(repl, line)
64
65    def pattern(self):
66        return self.regex.pattern
67
68    def flags(self):
69        return self.regex.flags
70
71
72class Filter(Regex):
73    """Augment a Regex object with a flag indicating whether a match should be
74    added (!is_filter_out) or removed (is_filter_out) from the generated checks.
75
76    """
77
78    def __init__(self, regex, is_filter_out):
79        super(Filter, self).__init__(regex)
80        self.is_filter_out = is_filter_out
81
82    def __deepcopy__(self, memo):
83        result = copy.deepcopy(super(Filter, self), memo)
84        result.is_filter_out = copy.deepcopy(self.is_filter_out, memo)
85        return result
86
87
88def parse_commandline_args(parser):
89    class RegexAction(argparse.Action):
90        """Add a regular expression option value to a list of regular expressions.
91        This compiles the expression, wraps it in a Regex and adds it to the option
92        value list."""
93
94        def __init__(self, option_strings, dest, nargs=None, **kwargs):
95            if nargs is not None:
96                raise ValueError("nargs not allowed")
97            super(RegexAction, self).__init__(option_strings, dest, **kwargs)
98
99        def do_call(self, namespace, values, flags):
100            value_list = getattr(namespace, self.dest)
101            if value_list is None:
102                value_list = []
103
104            try:
105                value_list.append(Regex(re.compile(values, flags)))
106            except re.error as error:
107                raise ValueError(
108                    "{}: Invalid regular expression '{}' ({})".format(
109                        option_string, error.pattern, error.msg
110                    )
111                )
112
113            setattr(namespace, self.dest, value_list)
114
115        def __call__(self, parser, namespace, values, option_string=None):
116            self.do_call(namespace, values, 0)
117
118    class FilterAction(RegexAction):
119        """Add a filter to a list of filter option values."""
120
121        def __init__(self, option_strings, dest, nargs=None, **kwargs):
122            super(FilterAction, self).__init__(option_strings, dest, nargs, **kwargs)
123
124        def __call__(self, parser, namespace, values, option_string=None):
125            super(FilterAction, self).__call__(parser, namespace, values, option_string)
126
127            value_list = getattr(namespace, self.dest)
128
129            is_filter_out = option_string == "--filter-out"
130
131            value_list[-1] = Filter(value_list[-1].regex, is_filter_out)
132
133            setattr(namespace, self.dest, value_list)
134
135    filter_group = parser.add_argument_group(
136        "filtering",
137        """Filters are applied to each output line according to the order given. The
138    first matching filter terminates filter processing for that current line.""",
139    )
140
141    filter_group.add_argument(
142        "--filter",
143        action=FilterAction,
144        dest="filters",
145        metavar="REGEX",
146        help="Only include lines matching REGEX (may be specified multiple times)",
147    )
148    filter_group.add_argument(
149        "--filter-out",
150        action=FilterAction,
151        dest="filters",
152        metavar="REGEX",
153        help="Exclude lines matching REGEX",
154    )
155
156    parser.add_argument(
157        "--include-generated-funcs",
158        action="store_true",
159        help="Output checks for functions not in source",
160    )
161    parser.add_argument(
162        "-v", "--verbose", action="store_true", help="Show verbose output"
163    )
164    parser.add_argument(
165        "-u",
166        "--update-only",
167        action="store_true",
168        help="Only update test if it was already autogened",
169    )
170    parser.add_argument(
171        "--force-update",
172        action="store_true",
173        help="Update test even if it was autogened by a different script",
174    )
175    parser.add_argument(
176        "--enable",
177        action="store_true",
178        dest="enabled",
179        default=True,
180        help="Activate CHECK line generation from this point forward",
181    )
182    parser.add_argument(
183        "--disable",
184        action="store_false",
185        dest="enabled",
186        help="Deactivate CHECK line generation from this point forward",
187    )
188    parser.add_argument(
189        "--replace-value-regex",
190        nargs="+",
191        default=[],
192        help="List of regular expressions to replace matching value names",
193    )
194    parser.add_argument(
195        "--prefix-filecheck-ir-name",
196        default="",
197        help="Add a prefix to FileCheck IR value names to avoid conflicts with scripted names",
198    )
199    parser.add_argument(
200        "--global-value-regex",
201        nargs="+",
202        default=[],
203        help="List of regular expressions that a global value declaration must match to generate a check (has no effect if checking globals is not enabled)",
204    )
205    parser.add_argument(
206        "--global-hex-value-regex",
207        nargs="+",
208        default=[],
209        help="List of regular expressions such that, for matching global value declarations, literal integer values should be encoded in hex in the associated FileCheck directives",
210    )
211    # FIXME: in 3.9, we can use argparse.BooleanOptionalAction. At that point,
212    # we need to rename the flag to just -generate-body-for-unused-prefixes.
213    parser.add_argument(
214        "--no-generate-body-for-unused-prefixes",
215        action="store_false",
216        dest="gen_unused_prefix_body",
217        default=True,
218        help="Generate a function body that always matches for unused prefixes. This is useful when unused prefixes are desired, and it avoids needing to annotate each FileCheck as allowing them.",
219    )
220    # This is the default when regenerating existing tests. The default when
221    # generating new tests is determined by DEFAULT_VERSION.
222    parser.add_argument(
223        "--version", type=int, default=1, help="The version of output format"
224    )
225    args = parser.parse_args()
226    # TODO: This should not be handled differently from the other options
227    global _verbose, _global_value_regex, _global_hex_value_regex
228    _verbose = args.verbose
229    _global_value_regex = args.global_value_regex
230    _global_hex_value_regex = args.global_hex_value_regex
231    return args
232
233
234def parse_args(parser, argv):
235    args = parser.parse_args(argv)
236    if args.version >= 2:
237        args.function_signature = True
238    # TODO: This should not be handled differently from the other options
239    global _verbose, _global_value_regex, _global_hex_value_regex
240    _verbose = args.verbose
241    _global_value_regex = args.global_value_regex
242    _global_hex_value_regex = args.global_hex_value_regex
243    if "check_globals" in args and args.check_globals == "default":
244        args.check_globals = "none" if args.version < 4 else "smart"
245    return args
246
247
248class InputLineInfo(object):
249    def __init__(self, line, line_number, args, argv):
250        self.line = line
251        self.line_number = line_number
252        self.args = args
253        self.argv = argv
254
255
256class TestInfo(object):
257    def __init__(
258        self,
259        test,
260        parser,
261        script_name,
262        input_lines,
263        args,
264        argv,
265        comment_prefix,
266        argparse_callback,
267    ):
268        self.parser = parser
269        self.argparse_callback = argparse_callback
270        self.path = test
271        self.args = args
272        if args.prefix_filecheck_ir_name:
273            global _prefix_filecheck_ir_name
274            _prefix_filecheck_ir_name = args.prefix_filecheck_ir_name
275        self.argv = argv
276        self.input_lines = input_lines
277        self.run_lines = find_run_lines(test, self.input_lines)
278        self.comment_prefix = comment_prefix
279        if self.comment_prefix is None:
280            if self.path.endswith(".mir"):
281                self.comment_prefix = "#"
282            else:
283                self.comment_prefix = ";"
284        self.autogenerated_note_prefix = self.comment_prefix + " " + UTC_ADVERT
285        self.test_autogenerated_note = self.autogenerated_note_prefix + script_name
286        self.test_autogenerated_note += get_autogennote_suffix(parser, self.args)
287        self.test_unused_note = (
288            self.comment_prefix + self.comment_prefix + " " + UNUSED_NOTE
289        )
290
291    def ro_iterlines(self):
292        for line_num, input_line in enumerate(self.input_lines):
293            args, argv = check_for_command(
294                input_line, self.parser, self.args, self.argv, self.argparse_callback
295            )
296            yield InputLineInfo(input_line, line_num, args, argv)
297
298    def iterlines(self, output_lines):
299        output_lines.append(self.test_autogenerated_note)
300        for line_info in self.ro_iterlines():
301            input_line = line_info.line
302            # Discard any previous script advertising.
303            if input_line.startswith(self.autogenerated_note_prefix):
304                continue
305            self.args = line_info.args
306            self.argv = line_info.argv
307            if not self.args.enabled:
308                output_lines.append(input_line)
309                continue
310            yield line_info
311
312    def get_checks_for_unused_prefixes(
313        self, run_list, used_prefixes: List[str]
314    ) -> List[str]:
315        run_list = [element for element in run_list if element[0] is not None]
316        unused_prefixes = set(
317            [prefix for sublist in run_list for prefix in sublist[0]]
318        ).difference(set(used_prefixes))
319
320        ret = []
321        if not unused_prefixes:
322            return ret
323        ret.append(self.test_unused_note)
324        for unused in sorted(unused_prefixes):
325            ret.append(
326                "{comment} {prefix}: {match_everything}".format(
327                    comment=self.comment_prefix,
328                    prefix=unused,
329                    match_everything=r"""{{.*}}""",
330                )
331            )
332        return ret
333
334
335def itertests(
336    test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None
337):
338    for pattern in test_patterns:
339        # On Windows we must expand the patterns ourselves.
340        tests_list = glob.glob(pattern)
341        if not tests_list:
342            warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,))
343            continue
344        for test in tests_list:
345            with open(test) as f:
346                input_lines = [l.rstrip() for l in f]
347            first_line = input_lines[0] if input_lines else ""
348            if UTC_AVOID in first_line:
349                warn("Skipping test that must not be autogenerated: " + test)
350                continue
351            is_regenerate = UTC_ADVERT in first_line
352
353            # If we're generating a new test, set the default version to the latest.
354            argv = sys.argv[:]
355            if not is_regenerate:
356                argv.insert(1, "--version=" + str(DEFAULT_VERSION))
357
358            args = parse_args(parser, argv[1:])
359            if argparse_callback is not None:
360                argparse_callback(args)
361            if is_regenerate:
362                if script_name not in first_line and not args.force_update:
363                    warn(
364                        "Skipping test which wasn't autogenerated by " + script_name,
365                        test,
366                    )
367                    continue
368                args, argv = check_for_command(
369                    first_line, parser, args, argv, argparse_callback
370                )
371            elif args.update_only:
372                assert UTC_ADVERT not in first_line
373                warn("Skipping test which isn't autogenerated: " + test)
374                continue
375            final_input_lines = []
376            for l in input_lines:
377                if UNUSED_NOTE in l:
378                    break
379                final_input_lines.append(l)
380            yield TestInfo(
381                test,
382                parser,
383                script_name,
384                final_input_lines,
385                args,
386                argv,
387                comment_prefix,
388                argparse_callback,
389            )
390
391
392def should_add_line_to_output(
393    input_line,
394    prefix_set,
395    *,
396    skip_global_checks=False,
397    skip_same_checks=False,
398    comment_marker=";",
399):
400    # Skip any blank comment lines in the IR.
401    if not skip_global_checks and input_line.strip() == comment_marker:
402        return False
403    # Skip a special double comment line we use as a separator.
404    if input_line.strip() == comment_marker + SEPARATOR:
405        return False
406    # Skip any blank lines in the IR.
407    # if input_line.strip() == '':
408    #  return False
409    # And skip any CHECK lines. We're building our own.
410    m = CHECK_RE.match(input_line)
411    if m and m.group(1) in prefix_set:
412        if skip_same_checks and CHECK_SAME_RE.match(input_line):
413            # The previous CHECK line was removed, so don't leave this dangling
414            return False
415        if skip_global_checks:
416            # Skip checks only if they are of global value definitions
417            global_ir_value_re = re.compile(r"(\[\[|@)", flags=(re.M))
418            is_global = global_ir_value_re.search(input_line)
419            return not is_global
420        return False
421
422    return True
423
424
425def collect_original_check_lines(ti: TestInfo, prefix_set: set):
426    """
427    Collect pre-existing check lines into a dictionary `result` which is
428    returned.
429
430    result[func_name][prefix] is filled with a list of right-hand-sides of check
431    lines.
432    """
433    result = collections.defaultdict(lambda: {})
434
435    current_prefix = None
436    current_function = None
437    for input_line_info in ti.ro_iterlines():
438        input_line = input_line_info.line
439        if input_line.lstrip().startswith(";"):
440            m = CHECK_RE.match(input_line)
441            if m is not None:
442                prefix = m.group(1)
443                check_kind = m.group(2)
444                line = input_line[m.end() :].strip()
445
446                if prefix != current_prefix:
447                    current_function = None
448                    current_prefix = None
449
450                if check_kind not in ["LABEL", "SAME"]:
451                    if current_function is not None:
452                        current_function.append(line)
453                    continue
454
455                if check_kind == "SAME":
456                    continue
457
458                if check_kind == "LABEL":
459                    m = IR_FUNCTION_RE.match(line)
460                    if m is not None:
461                        func_name = m.group(1)
462                        if (
463                            ti.args.function is not None
464                            and func_name != ti.args.function
465                        ):
466                            # When filtering on a specific function, skip all others.
467                            continue
468
469                        current_prefix = prefix
470                        current_function = result[func_name][prefix] = []
471                        continue
472
473        current_function = None
474
475    return result
476
477
478# Perform lit-like substitutions
479def getSubstitutions(sourcepath):
480    sourcedir = os.path.dirname(sourcepath)
481    return [
482        ("%s", sourcepath),
483        ("%S", sourcedir),
484        ("%p", sourcedir),
485        ("%{pathsep}", os.pathsep),
486    ]
487
488
489def applySubstitutions(s, substitutions):
490    for a, b in substitutions:
491        s = s.replace(a, b)
492    return s
493
494
495# Invoke the tool that is being tested.
496def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False):
497    with open(ir) as ir_file:
498        substitutions = getSubstitutions(ir)
499
500        # TODO Remove the str form which is used by update_test_checks.py and
501        # update_llc_test_checks.py
502        # The safer list form is used by update_cc_test_checks.py
503        if preprocess_cmd:
504            # Allow pre-processing the IR file (e.g. using sed):
505            assert isinstance(
506                preprocess_cmd, str
507            )  # TODO: use a list instead of using shell
508            preprocess_cmd = applySubstitutions(preprocess_cmd, substitutions).strip()
509            if verbose:
510                print(
511                    "Pre-processing input file: ",
512                    ir,
513                    " with command '",
514                    preprocess_cmd,
515                    "'",
516                    sep="",
517                    file=sys.stderr,
518                )
519            # Python 2.7 doesn't have subprocess.DEVNULL:
520            with open(os.devnull, "w") as devnull:
521                pp = subprocess.Popen(
522                    preprocess_cmd, shell=True, stdin=devnull, stdout=subprocess.PIPE
523                )
524                ir_file = pp.stdout
525
526        if isinstance(cmd_args, list):
527            args = [applySubstitutions(a, substitutions) for a in cmd_args]
528            stdout = subprocess.check_output([exe] + args, stdin=ir_file)
529        else:
530            stdout = subprocess.check_output(
531                exe + " " + applySubstitutions(cmd_args, substitutions),
532                shell=True,
533                stdin=ir_file,
534            )
535        if sys.version_info[0] > 2:
536            # FYI, if you crashed here with a decode error, your run line probably
537            # results in bitcode or other binary format being written to the pipe.
538            # For an opt test, you probably want to add -S or -disable-output.
539            stdout = stdout.decode()
540    # Fix line endings to unix CR style.
541    return stdout.replace("\r\n", "\n")
542
543
544##### LLVM IR parser
545RUN_LINE_RE = re.compile(r"^\s*(?://|[;#])\s*RUN:\s*(.*)$")
546CHECK_PREFIX_RE = re.compile(r"--?check-prefix(?:es)?[= ](\S+)")
547PREFIX_RE = re.compile("^[a-zA-Z0-9_-]+$")
548CHECK_RE = re.compile(
549    r"^\s*(?://|[;#])\s*([^:]+?)(?:-(NEXT|NOT|DAG|LABEL|SAME|EMPTY))?:"
550)
551CHECK_SAME_RE = re.compile(r"^\s*(?://|[;#])\s*([^:]+?)(?:-SAME)?:")
552
553UTC_ARGS_KEY = "UTC_ARGS:"
554UTC_ARGS_CMD = re.compile(r".*" + UTC_ARGS_KEY + r"\s*(?P<cmd>.*)\s*$")
555UTC_ADVERT = "NOTE: Assertions have been autogenerated by "
556UTC_AVOID = "NOTE: Do not autogenerate"
557UNUSED_NOTE = "NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:"
558
559OPT_FUNCTION_RE = re.compile(
560    r"^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s():,]+?))?\s*define\s+(?P<funcdef_attrs_and_ret>[^@]*)@(?P<func>[\w.$-]+?)\s*"
561    r"(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P<body>.*?)^\}$",
562    flags=(re.M | re.S),
563)
564
565ANALYZE_FUNCTION_RE = re.compile(
566    r"^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.$-]+?)\':"
567    r"\s*\n(?P<body>.*)$",
568    flags=(re.X | re.S),
569)
570
571LV_DEBUG_RE = re.compile(
572    r"^\s*\'(?P<func>[\w.$-]+?)\'[^\n]*" r"\s*\n(?P<body>.*)$", flags=(re.X | re.S)
573)
574
575IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(')
576TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
577TRIPLE_ARG_RE = re.compile(r"-mtriple[= ]([^ ]+)")
578MARCH_ARG_RE = re.compile(r"-march[= ]([^ ]+)")
579DEBUG_ONLY_ARG_RE = re.compile(r"-debug-only[= ]([^ ]+)")
580
581SCRUB_LEADING_WHITESPACE_RE = re.compile(r"^(\s+)")
582SCRUB_WHITESPACE_RE = re.compile(r"(?!^(|  \w))[ \t]+", flags=re.M)
583SCRUB_PRESERVE_LEADING_WHITESPACE_RE = re.compile(r"((?!^)[ \t]*(\S))[ \t]+")
584SCRUB_TRAILING_WHITESPACE_RE = re.compile(r"[ \t]+$", flags=re.M)
585SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE
586SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(
587    r"([ \t]|(#[0-9]+))+$", flags=re.M
588)
589SCRUB_KILL_COMMENT_RE = re.compile(r"^ *#+ +kill:.*\n")
590SCRUB_LOOP_COMMENT_RE = re.compile(
591    r"# =>This Inner Loop Header:.*|# in Loop:.*", flags=re.M
592)
593SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r"(?<=\S)+[ \t]*#$", flags=re.M)
594
595SEPARATOR = "."
596
597
598def error(msg, test_file=None):
599    if test_file:
600        msg = "{}: {}".format(msg, test_file)
601    print("ERROR: {}".format(msg), file=sys.stderr)
602
603
604def warn(msg, test_file=None):
605    if test_file:
606        msg = "{}: {}".format(msg, test_file)
607    print("WARNING: {}".format(msg), file=sys.stderr)
608
609
610def debug(*args, **kwargs):
611    # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs):
612    if "file" not in kwargs:
613        kwargs["file"] = sys.stderr
614    if _verbose:
615        print(*args, **kwargs)
616
617
618def find_run_lines(test, lines):
619    debug("Scanning for RUN lines in test file:", test)
620    raw_lines = [m.group(1) for m in [RUN_LINE_RE.match(l) for l in lines] if m]
621    run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
622    for l in raw_lines[1:]:
623        if run_lines[-1].endswith("\\"):
624            run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
625        else:
626            run_lines.append(l)
627    debug("Found {} RUN lines in {}:".format(len(run_lines), test))
628    for l in run_lines:
629        debug("  RUN: {}".format(l))
630    return run_lines
631
632
633def get_triple_from_march(march):
634    triples = {
635        "amdgcn": "amdgcn",
636        "r600": "r600",
637        "mips": "mips",
638        "sparc": "sparc",
639        "hexagon": "hexagon",
640        "ve": "ve",
641    }
642    for prefix, triple in triples.items():
643        if march.startswith(prefix):
644            return triple
645    print("Cannot find a triple. Assume 'x86'", file=sys.stderr)
646    return "x86"
647
648
649def apply_filters(line, filters):
650    has_filter = False
651    for f in filters:
652        if not f.is_filter_out:
653            has_filter = True
654        if f.search(line):
655            return False if f.is_filter_out else True
656    # If we only used filter-out, keep the line, otherwise discard it since no
657    # filter matched.
658    return False if has_filter else True
659
660
661def do_filter(body, filters):
662    return (
663        body
664        if not filters
665        else "\n".join(
666            filter(lambda line: apply_filters(line, filters), body.splitlines())
667        )
668    )
669
670
671def scrub_body(body):
672    # Scrub runs of whitespace out of the assembly, but leave the leading
673    # whitespace in place.
674    body = SCRUB_PRESERVE_LEADING_WHITESPACE_RE.sub(lambda m: m.group(2) + " ", body)
675
676    # Expand the tabs used for indentation.
677    body = str.expandtabs(body, 2)
678    # Strip trailing whitespace.
679    body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r"", body)
680    return body
681
682
683def do_scrub(body, scrubber, scrubber_args, extra):
684    if scrubber_args:
685        local_args = copy.deepcopy(scrubber_args)
686        local_args[0].extra_scrub = extra
687        return scrubber(body, *local_args)
688    return scrubber(body, *scrubber_args)
689
690
691# Build up a dictionary of all the function bodies.
692class function_body(object):
693    def __init__(
694        self,
695        string,
696        extra,
697        funcdef_attrs_and_ret,
698        args_and_sig,
699        attrs,
700        func_name_separator,
701    ):
702        self.scrub = string
703        self.extrascrub = extra
704        self.funcdef_attrs_and_ret = funcdef_attrs_and_ret
705        self.args_and_sig = args_and_sig
706        self.attrs = attrs
707        self.func_name_separator = func_name_separator
708
709    def is_same_except_arg_names(
710        self, extrascrub, funcdef_attrs_and_ret, args_and_sig, attrs, is_backend
711    ):
712        arg_names = set()
713
714        def drop_arg_names(match):
715            arg_names.add(match.group(variable_group_in_ir_value_match))
716            if match.group(attribute_group_in_ir_value_match):
717                attr = match.group(attribute_group_in_ir_value_match)
718            else:
719                attr = ""
720            return match.group(1) + attr + match.group(match.lastindex)
721
722        def repl_arg_names(match):
723            if (
724                match.group(variable_group_in_ir_value_match) is not None
725                and match.group(variable_group_in_ir_value_match) in arg_names
726            ):
727                return match.group(1) + match.group(match.lastindex)
728            return match.group(1) + match.group(2) + match.group(match.lastindex)
729
730        if self.funcdef_attrs_and_ret != funcdef_attrs_and_ret:
731            return False
732        if self.attrs != attrs:
733            return False
734        ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig)
735        ans1 = IR_VALUE_RE.sub(drop_arg_names, args_and_sig)
736        if ans0 != ans1:
737            return False
738        if is_backend:
739            # Check without replacements, the replacements are not applied to the
740            # body for backend checks.
741            return self.extrascrub == extrascrub
742
743        es0 = IR_VALUE_RE.sub(repl_arg_names, self.extrascrub)
744        es1 = IR_VALUE_RE.sub(repl_arg_names, extrascrub)
745        es0 = SCRUB_IR_COMMENT_RE.sub(r"", es0)
746        es1 = SCRUB_IR_COMMENT_RE.sub(r"", es1)
747        return es0 == es1
748
749    def __str__(self):
750        return self.scrub
751
752
753class FunctionTestBuilder:
754    def __init__(self, run_list, flags, scrubber_args, path):
755        self._verbose = flags.verbose
756        self._record_args = flags.function_signature
757        self._check_attributes = flags.check_attributes
758        # Strip double-quotes if input was read by UTC_ARGS
759        self._filters = (
760            list(
761                map(
762                    lambda f: Filter(
763                        re.compile(f.pattern().strip('"'), f.flags()), f.is_filter_out
764                    ),
765                    flags.filters,
766                )
767            )
768            if flags.filters
769            else []
770        )
771        self._scrubber_args = scrubber_args
772        self._path = path
773        # Strip double-quotes if input was read by UTC_ARGS
774        self._replace_value_regex = list(
775            map(lambda x: x.strip('"'), flags.replace_value_regex)
776        )
777        self._func_dict = {}
778        self._func_order = {}
779        self._global_var_dict = {}
780        self._processed_prefixes = set()
781        for tuple in run_list:
782            for prefix in tuple[0]:
783                self._func_dict.update({prefix: dict()})
784                self._func_order.update({prefix: []})
785                self._global_var_dict.update({prefix: dict()})
786
787    def finish_and_get_func_dict(self):
788        for prefix in self.get_failed_prefixes():
789            warn(
790                "Prefix %s had conflicting output from different RUN lines for all functions in test %s"
791                % (
792                    prefix,
793                    self._path,
794                )
795            )
796        return self._func_dict
797
798    def func_order(self):
799        return self._func_order
800
801    def global_var_dict(self):
802        return self._global_var_dict
803
804    def is_filtered(self):
805        return bool(self._filters)
806
807    def process_run_line(
808        self, function_re, scrubber, raw_tool_output, prefixes, is_backend
809    ):
810        build_global_values_dictionary(self._global_var_dict, raw_tool_output, prefixes)
811        for m in function_re.finditer(raw_tool_output):
812            if not m:
813                continue
814            func = m.group("func")
815            body = m.group("body")
816            # func_name_separator is the string that is placed right after function name at the
817            # beginning of assembly function definition. In most assemblies, that is just a
818            # colon: `foo:`. But, for example, in nvptx it is a brace: `foo(`. If is_backend is
819            # False, just assume that separator is an empty string.
820            if is_backend:
821                # Use ':' as default separator.
822                func_name_separator = (
823                    m.group("func_name_separator")
824                    if "func_name_separator" in m.groupdict()
825                    else ":"
826                )
827            else:
828                func_name_separator = ""
829            attrs = m.group("attrs") if self._check_attributes else ""
830            funcdef_attrs_and_ret = (
831                m.group("funcdef_attrs_and_ret") if self._record_args else ""
832            )
833            # Determine if we print arguments, the opening brace, or nothing after the
834            # function name
835            if self._record_args and "args_and_sig" in m.groupdict():
836                args_and_sig = scrub_body(m.group("args_and_sig").strip())
837            elif "args_and_sig" in m.groupdict():
838                args_and_sig = "("
839            else:
840                args_and_sig = ""
841            filtered_body = do_filter(body, self._filters)
842            scrubbed_body = do_scrub(
843                filtered_body, scrubber, self._scrubber_args, extra=False
844            )
845            scrubbed_extra = do_scrub(
846                filtered_body, scrubber, self._scrubber_args, extra=True
847            )
848            if "analysis" in m.groupdict():
849                analysis = m.group("analysis")
850                if analysis not in SUPPORTED_ANALYSES:
851                    warn("Unsupported analysis mode: %r!" % (analysis,))
852            if func.startswith("stress"):
853                # We only use the last line of the function body for stress tests.
854                scrubbed_body = "\n".join(scrubbed_body.splitlines()[-1:])
855            if self._verbose:
856                print("Processing function: " + func, file=sys.stderr)
857                for l in scrubbed_body.splitlines():
858                    print("  " + l, file=sys.stderr)
859            for prefix in prefixes:
860                # Replace function names matching the regex.
861                for regex in self._replace_value_regex:
862                    # Pattern that matches capture groups in the regex in leftmost order.
863                    group_regex = re.compile(r"\(.*?\)")
864                    # Replace function name with regex.
865                    match = re.match(regex, func)
866                    if match:
867                        func_repl = regex
868                        # Replace any capture groups with their matched strings.
869                        for g in match.groups():
870                            func_repl = group_regex.sub(
871                                re.escape(g), func_repl, count=1
872                            )
873                        func = re.sub(func_repl, "{{" + func_repl + "}}", func)
874
875                    # Replace all calls to regex matching functions.
876                    matches = re.finditer(regex, scrubbed_body)
877                    for match in matches:
878                        func_repl = regex
879                        # Replace any capture groups with their matched strings.
880                        for g in match.groups():
881                            func_repl = group_regex.sub(
882                                re.escape(g), func_repl, count=1
883                            )
884                        # Substitute function call names that match the regex with the same
885                        # capture groups set.
886                        scrubbed_body = re.sub(
887                            func_repl, "{{" + func_repl + "}}", scrubbed_body
888                        )
889
890                if func in self._func_dict[prefix]:
891                    if self._func_dict[prefix][func] is not None and (
892                        str(self._func_dict[prefix][func]) != scrubbed_body
893                        or self._func_dict[prefix][func].args_and_sig != args_and_sig
894                        or self._func_dict[prefix][func].attrs != attrs
895                        or self._func_dict[prefix][func].funcdef_attrs_and_ret
896                        != funcdef_attrs_and_ret
897                    ):
898                        if self._func_dict[prefix][func].is_same_except_arg_names(
899                            scrubbed_extra,
900                            funcdef_attrs_and_ret,
901                            args_and_sig,
902                            attrs,
903                            is_backend,
904                        ):
905                            self._func_dict[prefix][func].scrub = scrubbed_extra
906                            self._func_dict[prefix][func].args_and_sig = args_and_sig
907                        else:
908                            # This means a previous RUN line produced a body for this function
909                            # that is different from the one produced by this current RUN line,
910                            # so the body can't be common across RUN lines. We use None to
911                            # indicate that.
912                            self._func_dict[prefix][func] = None
913                else:
914                    if prefix not in self._processed_prefixes:
915                        self._func_dict[prefix][func] = function_body(
916                            scrubbed_body,
917                            scrubbed_extra,
918                            funcdef_attrs_and_ret,
919                            args_and_sig,
920                            attrs,
921                            func_name_separator,
922                        )
923                        self._func_order[prefix].append(func)
924                    else:
925                        # An earlier RUN line used this check prefixes but didn't produce
926                        # a body for this function. This happens in Clang tests that use
927                        # preprocesser directives to exclude individual functions from some
928                        # RUN lines.
929                        self._func_dict[prefix][func] = None
930
931    def processed_prefixes(self, prefixes):
932        """
933        Mark a set of prefixes as having had at least one applicable RUN line fully
934        processed. This is used to filter out function bodies that don't have
935        outputs for all RUN lines.
936        """
937        self._processed_prefixes.update(prefixes)
938
939    def get_failed_prefixes(self):
940        # This returns the list of those prefixes that failed to match any function,
941        # because there were conflicting bodies produced by different RUN lines, in
942        # all instances of the prefix.
943        for prefix in self._func_dict:
944            if self._func_dict[prefix] and (
945                not [
946                    fct
947                    for fct in self._func_dict[prefix]
948                    if self._func_dict[prefix][fct] is not None
949                ]
950            ):
951                yield prefix
952
953
954##### Generator of LLVM IR CHECK lines
955
956SCRUB_IR_COMMENT_RE = re.compile(r"\s*;.*")
957
958# TODO: We should also derive check lines for global, debug, loop declarations, etc..
959
960
961class NamelessValue:
962    def __init__(
963        self,
964        check_prefix,
965        check_key,
966        ir_prefix,
967        ir_regexp,
968        global_ir_rhs_regexp,
969        *,
970        is_before_functions=False,
971        is_number=False,
972        replace_number_with_counter=False,
973        match_literally=False,
974        interlaced_with_previous=False
975    ):
976        self.check_prefix = check_prefix
977        self.check_key = check_key
978        self.ir_prefix = ir_prefix
979        self.ir_regexp = ir_regexp
980        self.global_ir_rhs_regexp = global_ir_rhs_regexp
981        self.is_before_functions = is_before_functions
982        self.is_number = is_number
983        # Some variable numbers (e.g. MCINST1234) will change based on unrelated
984        # modifications to LLVM, replace those with an incrementing counter.
985        self.replace_number_with_counter = replace_number_with_counter
986        self.match_literally = match_literally
987        self.interlaced_with_previous = interlaced_with_previous
988        self.variable_mapping = {}
989
990    # Return true if this kind of IR value is "local", basically if it matches '%{{.*}}'.
991    def is_local_def_ir_value(self):
992        return self.ir_prefix == "%"
993
994    # Return the IR prefix and check prefix we use for this kind or IR value,
995    # e.g., (%, TMP) for locals. If the IR prefix is a regex, return the prefix
996    # used in the IR output
997    def get_ir_prefix_from_ir_value_match(self, match):
998        return re.search(self.ir_prefix, match[0])[0], self.check_prefix
999
1000    # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals
1001    def get_ir_regex(self):
1002        # for backwards compatibility we check locals with '.*'
1003        if self.is_local_def_ir_value():
1004            return ".*"
1005        return self.ir_regexp
1006
1007    # Create a FileCheck variable name based on an IR name.
1008    def get_value_name(self, var: str, check_prefix: str):
1009        var = var.replace("!", "")
1010        if self.replace_number_with_counter:
1011            assert var
1012            replacement = self.variable_mapping.get(var, None)
1013            if replacement is None:
1014                # Replace variable with an incrementing counter
1015                replacement = str(len(self.variable_mapping) + 1)
1016                self.variable_mapping[var] = replacement
1017            var = replacement
1018        # This is a nameless value, prepend check_prefix.
1019        if var.isdigit():
1020            var = check_prefix + var
1021        else:
1022            # This is a named value that clashes with the check_prefix, prepend with
1023            # _prefix_filecheck_ir_name, if it has been defined.
1024            if (
1025                may_clash_with_default_check_prefix_name(check_prefix, var)
1026                and _prefix_filecheck_ir_name
1027            ):
1028                var = _prefix_filecheck_ir_name + var
1029        var = var.replace(".", "_")
1030        var = var.replace("-", "_")
1031        return var.upper()
1032
1033    # Create a FileCheck variable from regex.
1034    def get_value_definition(self, var, match):
1035        # for backwards compatibility we check locals with '.*'
1036        varname = self.get_value_name(var, self.check_prefix)
1037        prefix = self.get_ir_prefix_from_ir_value_match(match)[0]
1038        if self.is_number:
1039            regex = ""  # always capture a number in the default format
1040            capture_start = "[[#"
1041        else:
1042            regex = self.get_ir_regex()
1043            capture_start = "[["
1044        if self.is_local_def_ir_value():
1045            return capture_start + varname + ":" + prefix + regex + "]]"
1046        return prefix + capture_start + varname + ":" + regex + "]]"
1047
1048    # Use a FileCheck variable.
1049    def get_value_use(self, var, match, var_prefix=None):
1050        if var_prefix is None:
1051            var_prefix = self.check_prefix
1052        capture_start = "[[#" if self.is_number else "[["
1053        if self.is_local_def_ir_value():
1054            return capture_start + self.get_value_name(var, var_prefix) + "]]"
1055        prefix = self.get_ir_prefix_from_ir_value_match(match)[0]
1056        return prefix + capture_start + self.get_value_name(var, var_prefix) + "]]"
1057
1058
1059# Description of the different "unnamed" values we match in the IR, e.g.,
1060# (local) ssa values, (debug) metadata, etc.
1061ir_nameless_values = [
1062    #            check_prefix   check_key  ir_prefix           ir_regexp                global_ir_rhs_regexp
1063    NamelessValue(r"TMP", "%", r"%", r"[\w$.-]+?", None),
1064    NamelessValue(r"ATTR", "#", r"#", r"[0-9]+", None),
1065    NamelessValue(r"ATTR", "#", r"attributes #", r"[0-9]+", r"{[^}]*}"),
1066    NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", None),
1067    NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", r".+", is_before_functions=True),
1068    NamelessValue(
1069        r"GLOBNAMED",
1070        "@",
1071        r"@",
1072        r"[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*",
1073        r".+",
1074        is_before_functions=True,
1075        match_literally=True,
1076        interlaced_with_previous=True,
1077    ),
1078    NamelessValue(r"DBG", "!", r"!dbg ", r"![0-9]+", None),
1079    NamelessValue(r"DIASSIGNID", "!", r"!DIAssignID ", r"![0-9]+", None),
1080    NamelessValue(r"PROF", "!", r"!prof ", r"![0-9]+", None),
1081    NamelessValue(r"TBAA", "!", r"!tbaa ", r"![0-9]+", None),
1082    NamelessValue(r"TBAA_STRUCT", "!", r"!tbaa.struct ", r"![0-9]+", None),
1083    NamelessValue(r"RNG", "!", r"!range ", r"![0-9]+", None),
1084    NamelessValue(r"LOOP", "!", r"!llvm.loop ", r"![0-9]+", None),
1085    NamelessValue(r"META", "!", r"metadata ", r"![0-9]+", None),
1086    NamelessValue(r"META", "!", r"", r"![0-9]+", r"(?:distinct |)!.*"),
1087    NamelessValue(r"ACC_GRP", "!", r"!llvm.access.group ", r"![0-9]+", None),
1088    NamelessValue(r"META", "!", r"![a-z.]+ ", r"![0-9]+", None),
1089]
1090
1091global_nameless_values = [
1092    nameless_value
1093    for nameless_value in ir_nameless_values
1094    if nameless_value.global_ir_rhs_regexp is not None
1095]
1096# global variable names should be matched literally
1097global_nameless_values_w_unstable_ids = [
1098    nameless_value
1099    for nameless_value in global_nameless_values
1100    if not nameless_value.match_literally
1101]
1102
1103asm_nameless_values = [
1104    NamelessValue(
1105        r"MCINST",
1106        "Inst#",
1107        "<MCInst #",
1108        r"\d+",
1109        r".+",
1110        is_number=True,
1111        replace_number_with_counter=True,
1112    ),
1113    NamelessValue(
1114        r"MCREG",
1115        "Reg:",
1116        "<MCOperand Reg:",
1117        r"\d+",
1118        r".+",
1119        is_number=True,
1120        replace_number_with_counter=True,
1121    ),
1122]
1123
1124analyze_nameless_values = [
1125    NamelessValue(
1126        r"GRP",
1127        "#",
1128        r"",
1129        r"0x[0-9a-f]+",
1130        None,
1131        replace_number_with_counter=True,
1132    ),
1133]
1134
1135
1136def createOrRegexp(old, new):
1137    if not old:
1138        return new
1139    if not new:
1140        return old
1141    return old + "|" + new
1142
1143
1144def createPrefixMatch(prefix_str, prefix_re):
1145    return "(?:" + prefix_str + "(" + prefix_re + "))"
1146
1147
1148# Build the regexp that matches an "IR value". This can be a local variable,
1149# argument, global, or metadata, anything that is "named". It is important that
1150# the PREFIX and SUFFIX below only contain a single group, if that changes
1151# other locations will need adjustment as well.
1152IR_VALUE_REGEXP_PREFIX = r"(\s*)"
1153IR_VALUE_REGEXP_STRING = r""
1154for nameless_value in ir_nameless_values:
1155    match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp)
1156    if nameless_value.global_ir_rhs_regexp is not None:
1157        match = "^" + match
1158    IR_VALUE_REGEXP_STRING = createOrRegexp(IR_VALUE_REGEXP_STRING, match)
1159IR_VALUE_REGEXP_SUFFIX = r"([,\s\(\)\}]|\Z)"
1160IR_VALUE_RE = re.compile(
1161    IR_VALUE_REGEXP_PREFIX
1162    + r"("
1163    + IR_VALUE_REGEXP_STRING
1164    + r")"
1165    + IR_VALUE_REGEXP_SUFFIX
1166)
1167
1168GLOBAL_VALUE_REGEXP_STRING = r""
1169for nameless_value in global_nameless_values_w_unstable_ids:
1170    match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp)
1171    GLOBAL_VALUE_REGEXP_STRING = createOrRegexp(GLOBAL_VALUE_REGEXP_STRING, match)
1172GLOBAL_VALUE_RE = re.compile(
1173    IR_VALUE_REGEXP_PREFIX
1174    + r"("
1175    + GLOBAL_VALUE_REGEXP_STRING
1176    + r")"
1177    + IR_VALUE_REGEXP_SUFFIX
1178)
1179
1180# Build the regexp that matches an "ASM value" (currently only for --asm-show-inst comments).
1181ASM_VALUE_REGEXP_STRING = ""
1182for nameless_value in asm_nameless_values:
1183    match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp)
1184    ASM_VALUE_REGEXP_STRING = createOrRegexp(ASM_VALUE_REGEXP_STRING, match)
1185ASM_VALUE_REGEXP_SUFFIX = r"([>\s]|\Z)"
1186ASM_VALUE_RE = re.compile(
1187    r"((?:#|//)\s*)" + "(" + ASM_VALUE_REGEXP_STRING + ")" + ASM_VALUE_REGEXP_SUFFIX
1188)
1189
1190ANALYZE_VALUE_REGEXP_PREFIX = r"(\s*)"
1191ANALYZE_VALUE_REGEXP_STRING = r""
1192for nameless_value in analyze_nameless_values:
1193    match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp)
1194    ANALYZE_VALUE_REGEXP_STRING = createOrRegexp(ANALYZE_VALUE_REGEXP_STRING, match)
1195ANALYZE_VALUE_REGEXP_SUFFIX = r"(\)?:)"
1196ANALYZE_VALUE_RE = re.compile(
1197    ANALYZE_VALUE_REGEXP_PREFIX
1198    + r"("
1199    + ANALYZE_VALUE_REGEXP_STRING
1200    + r")"
1201    + ANALYZE_VALUE_REGEXP_SUFFIX
1202)
1203
1204# The entire match is group 0, the prefix has one group (=1), the entire
1205# IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start.
1206first_nameless_group_in_ir_value_match = 3
1207
1208# constants for the group id of special matches
1209variable_group_in_ir_value_match = 3
1210attribute_group_in_ir_value_match = 4
1211
1212
1213# Check a match for IR_VALUE_RE and inspect it to determine if it was a local
1214# value, %..., global @..., debug number !dbg !..., etc. See the PREFIXES above.
1215def get_idx_from_ir_value_match(match):
1216    for i in range(first_nameless_group_in_ir_value_match, match.lastindex):
1217        if match.group(i) is not None:
1218            return i - first_nameless_group_in_ir_value_match
1219    error("Unable to identify the kind of IR value from the match!")
1220    return 0
1221
1222
1223# See get_idx_from_ir_value_match
1224def get_name_from_ir_value_match(match):
1225    return match.group(
1226        get_idx_from_ir_value_match(match) + first_nameless_group_in_ir_value_match
1227    )
1228
1229
1230def get_nameless_value_from_match(match, nameless_values) -> NamelessValue:
1231    return nameless_values[get_idx_from_ir_value_match(match)]
1232
1233
1234# Return true if var clashes with the scripted FileCheck check_prefix.
1235def may_clash_with_default_check_prefix_name(check_prefix, var):
1236    return check_prefix and re.match(
1237        r"^" + check_prefix + r"[0-9]+?$", var, re.IGNORECASE
1238    )
1239
1240
1241def find_diff_matching(lhs: List[str], rhs: List[str]) -> List[tuple]:
1242    """
1243    Find a large ordered matching between strings in lhs and rhs.
1244
1245    Think of this as finding the *unchanged* lines in a diff, where the entries
1246    of lhs and rhs are lines of the files being diffed.
1247
1248    Returns a list of matched (lhs_idx, rhs_idx) pairs.
1249    """
1250
1251    if not lhs or not rhs:
1252        return []
1253
1254    # Collect matches in reverse order.
1255    matches = []
1256
1257    # First, collect a set of candidate matching edges. We limit this to a
1258    # constant multiple of the input size to avoid quadratic runtime.
1259    patterns = collections.defaultdict(lambda: ([], []))
1260
1261    for idx in range(len(lhs)):
1262        patterns[lhs[idx]][0].append(idx)
1263    for idx in range(len(rhs)):
1264        patterns[rhs[idx]][1].append(idx)
1265
1266    multiple_patterns = []
1267
1268    candidates = []
1269    for pattern in patterns.values():
1270        if not pattern[0] or not pattern[1]:
1271            continue
1272
1273        if len(pattern[0]) == len(pattern[1]) == 1:
1274            candidates.append((pattern[0][0], pattern[1][0]))
1275        else:
1276            multiple_patterns.append(pattern)
1277
1278    multiple_patterns.sort(key=lambda pattern: len(pattern[0]) * len(pattern[1]))
1279
1280    for pattern in multiple_patterns:
1281        if len(candidates) + len(pattern[0]) * len(pattern[1]) > 2 * (
1282            len(lhs) + len(rhs)
1283        ):
1284            break
1285        for lhs_idx in pattern[0]:
1286            for rhs_idx in pattern[1]:
1287                candidates.append((lhs_idx, rhs_idx))
1288
1289    if not candidates:
1290        # The LHS and RHS either share nothing in common, or lines are just too
1291        # identical. In that case, let's give up and not match anything.
1292        return []
1293
1294    # Compute a maximal crossing-free matching via an algorithm that is
1295    # inspired by a mixture of dynamic programming and line-sweeping in
1296    # discrete geometry.
1297    #
1298    # I would be surprised if this algorithm didn't exist somewhere in the
1299    # literature, but I found it without consciously recalling any
1300    # references, so you'll have to make do with the explanation below.
1301    # Sorry.
1302    #
1303    # The underlying graph is bipartite:
1304    #  - nodes on the LHS represent lines in the original check
1305    #  - nodes on the RHS represent lines in the new (updated) check
1306    #
1307    # Nodes are implicitly sorted by the corresponding line number.
1308    # Edges (unique_matches) are sorted by the line number on the LHS.
1309    #
1310    # Here's the geometric intuition for the algorithm.
1311    #
1312    #  * Plot the edges as points in the plane, with the original line
1313    #    number on the X axis and the updated line number on the Y axis.
1314    #  * The goal is to find a longest "chain" of points where each point
1315    #    is strictly above and to the right of the previous point.
1316    #  * The algorithm proceeds by sweeping a vertical line from left to
1317    #    right.
1318    #  * The algorithm maintains a table where `table[N]` answers the
1319    #    question "What is currently the 'best' way to build a chain of N+1
1320    #    points to the left of the vertical line". Here, 'best' means
1321    #    that the last point of the chain is a as low as possible (minimal
1322    #    Y coordinate).
1323    #   * `table[N]` is `(y, point_idx)` where `point_idx` is the index of
1324    #     the last point in the chain and `y` is its Y coordinate
1325    #   * A key invariant is that the Y values in the table are
1326    #     monotonically increasing
1327    #  * Thanks to these properties, the table can be used to answer the
1328    #    question "What is the longest chain that can be built to the left
1329    #    of the vertical line using only points below a certain Y value",
1330    #    using a binary search over the table.
1331    #  * The algorithm also builds a backlink structure in which every point
1332    #    links back to the previous point on a best (longest) chain ending
1333    #    at that point
1334    #
1335    # The core loop of the algorithm sweeps the line and updates the table
1336    # and backlink structure for every point that we cross during the sweep.
1337    # Therefore, the algorithm is trivially O(M log M) in the number of
1338    # points.
1339    candidates.sort(key=lambda candidate: (candidate[0], -candidate[1]))
1340
1341    backlinks = []
1342    table_rhs_idx = []
1343    table_candidate_idx = []
1344    for _, rhs_idx in candidates:
1345        candidate_idx = len(backlinks)
1346        ti = bisect.bisect_left(table_rhs_idx, rhs_idx)
1347
1348        # Update the table to record a best chain ending in the current point.
1349        # There always is one, and if any of the previously visited points had
1350        # a higher Y coordinate, then there is always a previously recorded best
1351        # chain that can be improved upon by using the current point.
1352        #
1353        # There is only one case where there is some ambiguity. If the
1354        # pre-existing entry table[ti] has the same Y coordinate / rhs_idx as
1355        # the current point (this can only happen if the same line appeared
1356        # multiple times on the LHS), then we could choose to keep the
1357        # previously recorded best chain instead. That would bias the algorithm
1358        # differently but should have no systematic impact on the quality of the
1359        # result.
1360        if ti < len(table_rhs_idx):
1361            table_rhs_idx[ti] = rhs_idx
1362            table_candidate_idx[ti] = candidate_idx
1363        else:
1364            table_rhs_idx.append(rhs_idx)
1365            table_candidate_idx.append(candidate_idx)
1366        if ti > 0:
1367            backlinks.append(table_candidate_idx[ti - 1])
1368        else:
1369            backlinks.append(None)
1370
1371    # Commit to names in the matching by walking the backlinks. Recursively
1372    # attempt to fill in more matches in-betweem.
1373    match_idx = table_candidate_idx[-1]
1374    while match_idx is not None:
1375        current = candidates[match_idx]
1376        matches.append(current)
1377        match_idx = backlinks[match_idx]
1378
1379    matches.reverse()
1380    return matches
1381
1382
1383VARIABLE_TAG = "[[@@]]"
1384METAVAR_RE = re.compile(r"\[\[([A-Z0-9_]+)(?::[^]]+)?\]\]")
1385NUMERIC_SUFFIX_RE = re.compile(r"[0-9]*$")
1386
1387
1388class CheckValueInfo:
1389    def __init__(
1390        self,
1391        nameless_value: NamelessValue,
1392        var: str,
1393        prefix: str,
1394    ):
1395        self.nameless_value = nameless_value
1396        self.var = var
1397        self.prefix = prefix
1398
1399
1400# Represent a check line in a way that allows us to compare check lines while
1401# ignoring some or all of the FileCheck variable names.
1402class CheckLineInfo:
1403    def __init__(self, line, values):
1404        # Line with all FileCheck variable name occurrences replaced by VARIABLE_TAG
1405        self.line: str = line
1406
1407        # Information on each FileCheck variable name occurrences in the line
1408        self.values: List[CheckValueInfo] = values
1409
1410    def __repr__(self):
1411        return f"CheckLineInfo(line={self.line}, self.values={self.values})"
1412
1413
1414def remap_metavar_names(
1415    old_line_infos: List[CheckLineInfo],
1416    new_line_infos: List[CheckLineInfo],
1417    committed_names: Set[str],
1418) -> Mapping[str, str]:
1419    """
1420    Map all FileCheck variable names that appear in new_line_infos to new
1421    FileCheck variable names in an attempt to reduce the diff from old_line_infos
1422    to new_line_infos.
1423
1424    This is done by:
1425    * Matching old check lines and new check lines using a diffing algorithm
1426      applied after replacing names with wildcards.
1427    * Committing to variable names such that the matched lines become equal
1428      (without wildcards) if possible
1429    * This is done recursively to handle cases where many lines are equal
1430      after wildcard replacement
1431    """
1432    # Initialize uncommitted identity mappings
1433    new_mapping = {}
1434    for line in new_line_infos:
1435        for value in line.values:
1436            new_mapping[value.var] = value.var
1437
1438    # Recursively commit to the identity mapping or find a better one
1439    def recurse(old_begin, old_end, new_begin, new_end):
1440        if old_begin == old_end or new_begin == new_end:
1441            return
1442
1443        # Find a matching of lines where uncommitted names are replaced
1444        # with a placeholder.
1445        def diffify_line(line, mapper):
1446            values = []
1447            for value in line.values:
1448                mapped = mapper(value.var)
1449                values.append(mapped if mapped in committed_names else "?")
1450            return line.line.strip() + " @@@ " + " @ ".join(values)
1451
1452        lhs_lines = [
1453            diffify_line(line, lambda x: x)
1454            for line in old_line_infos[old_begin:old_end]
1455        ]
1456        rhs_lines = [
1457            diffify_line(line, lambda x: new_mapping[x])
1458            for line in new_line_infos[new_begin:new_end]
1459        ]
1460
1461        candidate_matches = find_diff_matching(lhs_lines, rhs_lines)
1462
1463        # Apply commits greedily on a match-by-match basis
1464        matches = [(-1, -1)]
1465        committed_anything = False
1466        for lhs_idx, rhs_idx in candidate_matches:
1467            lhs_line = old_line_infos[lhs_idx]
1468            rhs_line = new_line_infos[rhs_idx]
1469
1470            local_commits = {}
1471
1472            for lhs_value, rhs_value in zip(lhs_line.values, rhs_line.values):
1473                if new_mapping[rhs_value.var] in committed_names:
1474                    # The new value has already been committed. If it was mapped
1475                    # to the same name as the original value, we can consider
1476                    # committing other values from this line. Otherwise, we
1477                    # should ignore this line.
1478                    if new_mapping[rhs_value.var] == lhs_value.var:
1479                        continue
1480                    else:
1481                        break
1482
1483                if rhs_value.var in local_commits:
1484                    # Same, but for a possible commit happening on the same line
1485                    if local_commits[rhs_value.var] == lhs_value.var:
1486                        continue
1487                    else:
1488                        break
1489
1490                if lhs_value.var in committed_names:
1491                    # We can't map this value because the name we would map it to has already been
1492                    # committed for something else. Give up on this line.
1493                    break
1494
1495                local_commits[rhs_value.var] = lhs_value.var
1496            else:
1497                # No reason not to add any commitments for this line
1498                for rhs_var, lhs_var in local_commits.items():
1499                    new_mapping[rhs_var] = lhs_var
1500                    committed_names.add(lhs_var)
1501                    committed_anything = True
1502
1503                    if (
1504                        lhs_var != rhs_var
1505                        and lhs_var in new_mapping
1506                        and new_mapping[lhs_var] == lhs_var
1507                    ):
1508                        new_mapping[lhs_var] = "conflict_" + lhs_var
1509
1510                matches.append((lhs_idx, rhs_idx))
1511
1512        matches.append((old_end, new_end))
1513
1514        # Recursively handle sequences between matches
1515        if committed_anything:
1516            for (lhs_prev, rhs_prev), (lhs_next, rhs_next) in zip(matches, matches[1:]):
1517                recurse(lhs_prev + 1, lhs_next, rhs_prev + 1, rhs_next)
1518
1519    recurse(0, len(old_line_infos), 0, len(new_line_infos))
1520
1521    # Commit to remaining names and resolve conflicts
1522    for new_name, mapped_name in new_mapping.items():
1523        if mapped_name in committed_names:
1524            continue
1525        if not mapped_name.startswith("conflict_"):
1526            assert mapped_name == new_name
1527            committed_names.add(mapped_name)
1528
1529    for new_name, mapped_name in new_mapping.items():
1530        if mapped_name in committed_names:
1531            continue
1532        assert mapped_name.startswith("conflict_")
1533
1534        m = NUMERIC_SUFFIX_RE.search(new_name)
1535        base_name = new_name[: m.start()]
1536        suffix = int(new_name[m.start() :]) if m.start() != m.end() else 1
1537        while True:
1538            candidate = f"{base_name}{suffix}"
1539            if candidate not in committed_names:
1540                new_mapping[new_name] = candidate
1541                committed_names.add(candidate)
1542                break
1543            suffix += 1
1544
1545    return new_mapping
1546
1547
1548def generalize_check_lines_common(
1549    lines,
1550    is_analyze,
1551    vars_seen,
1552    global_vars_seen,
1553    nameless_values,
1554    nameless_value_regex,
1555    is_asm,
1556    preserve_names,
1557    original_check_lines=None,
1558):
1559    # This gets called for each match that occurs in
1560    # a line. We transform variables we haven't seen
1561    # into defs, and variables we have seen into uses.
1562    def transform_line_vars(match, transform_locals=True):
1563        var = get_name_from_ir_value_match(match)
1564        nameless_value = get_nameless_value_from_match(match, nameless_values)
1565        if may_clash_with_default_check_prefix_name(nameless_value.check_prefix, var):
1566            warn(
1567                "Change IR value name '%s' or use --prefix-filecheck-ir-name to prevent possible conflict"
1568                " with scripted FileCheck name." % (var,)
1569            )
1570        key = (var, nameless_value.check_key)
1571        is_local_def = nameless_value.is_local_def_ir_value()
1572        if is_local_def and not transform_locals:
1573            return None
1574        if is_local_def and key in vars_seen:
1575            rv = nameless_value.get_value_use(var, match)
1576        elif not is_local_def and key in global_vars_seen:
1577            # We could have seen a different prefix for the global variables first,
1578            # ensure we use that one instead of the prefix for the current match.
1579            rv = nameless_value.get_value_use(var, match, global_vars_seen[key])
1580        else:
1581            if is_local_def:
1582                vars_seen.add(key)
1583            else:
1584                global_vars_seen[key] = nameless_value.check_prefix
1585            rv = nameless_value.get_value_definition(var, match)
1586        # re.sub replaces the entire regex match
1587        # with whatever you return, so we have
1588        # to make sure to hand it back everything
1589        # including the commas and spaces.
1590        return match.group(1) + rv + match.group(match.lastindex)
1591
1592    def transform_non_local_line_vars(match):
1593        return transform_line_vars(match, False)
1594
1595    multiple_braces_re = re.compile(r"({{+)|(}}+)")
1596    def escape_braces(match_obj):
1597        return '{{' + re.escape(match_obj.group(0)) + '}}'
1598
1599    if not is_asm and not is_analyze:
1600        for i, line in enumerate(lines):
1601            # An IR variable named '%.' matches the FileCheck regex string.
1602            line = line.replace("%.", "%dot")
1603            for regex in _global_hex_value_regex:
1604                if re.match("^@" + regex + " = ", line):
1605                    line = re.sub(
1606                        r"\bi([0-9]+) ([0-9]+)",
1607                        lambda m: "i"
1608                        + m.group(1)
1609                        + " [[#"
1610                        + hex(int(m.group(2)))
1611                        + "]]",
1612                        line,
1613                    )
1614                    break
1615            # Ignore any comments, since the check lines will too.
1616            scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line)
1617            lines[i] = scrubbed_line
1618
1619    if not preserve_names:
1620        if is_asm:
1621            for i, _ in enumerate(lines):
1622                # It can happen that two matches are back-to-back and for some reason sub
1623                # will not replace both of them. For now we work around this by
1624                # substituting until there is no more match.
1625                changed = True
1626                while changed:
1627                    (lines[i], changed) = nameless_value_regex.subn(
1628                        transform_line_vars, lines[i], count=1
1629                    )
1630        else:
1631            # LLVM IR case. Start by handling global meta variables (global IR variables,
1632            # metadata, attributes)
1633            for i, _ in enumerate(lines):
1634                start = 0
1635                while True:
1636                    m = nameless_value_regex.search(lines[i][start:])
1637                    if m is None:
1638                        break
1639                    start += m.start()
1640                    sub = transform_non_local_line_vars(m)
1641                    if sub is not None:
1642                        lines[i] = (
1643                            lines[i][:start] + sub + lines[i][start + len(m.group(0)) :]
1644                        )
1645                    start += 1
1646
1647            # Collect information about new check lines and original check lines (if any)
1648            new_line_infos = []
1649            for line in lines:
1650                filtered_line = ""
1651                values = []
1652                while True:
1653                    m = nameless_value_regex.search(line)
1654                    if m is None:
1655                        filtered_line += line
1656                        break
1657
1658                    var = get_name_from_ir_value_match(m)
1659                    nameless_value = get_nameless_value_from_match(m, nameless_values)
1660                    var = nameless_value.get_value_name(
1661                        var, nameless_value.check_prefix
1662                    )
1663
1664                    # Replace with a [[@@]] tag, but be sure to keep the spaces and commas.
1665                    filtered_line += (
1666                        line[: m.start()]
1667                        + m.group(1)
1668                        + VARIABLE_TAG
1669                        + m.group(m.lastindex)
1670                    )
1671                    line = line[m.end() :]
1672                    values.append(
1673                        CheckValueInfo(
1674                            nameless_value=nameless_value,
1675                            var=var,
1676                            prefix=nameless_value.get_ir_prefix_from_ir_value_match(m)[
1677                                0
1678                            ],
1679                        )
1680                    )
1681                new_line_infos.append(CheckLineInfo(filtered_line, values))
1682
1683            orig_line_infos = []
1684            for line in original_check_lines or []:
1685                filtered_line = ""
1686                values = []
1687                while True:
1688                    m = METAVAR_RE.search(line)
1689                    if m is None:
1690                        filtered_line += line
1691                        break
1692
1693                    # Replace with a [[@@]] tag, but be sure to keep the spaces and commas.
1694                    filtered_line += line[: m.start()] + VARIABLE_TAG
1695                    line = line[m.end() :]
1696                    values.append(
1697                        CheckValueInfo(
1698                            nameless_value=None,
1699                            var=m.group(1),
1700                            prefix=None,
1701                        )
1702                    )
1703                orig_line_infos.append(CheckLineInfo(filtered_line, values))
1704
1705            # Compute the variable name mapping
1706            committed_names = set(vars_seen)
1707
1708            mapping = remap_metavar_names(
1709                orig_line_infos, new_line_infos, committed_names
1710            )
1711
1712            for i, line_info in enumerate(new_line_infos):
1713                line_template = line_info.line
1714                line = ""
1715
1716                for value in line_info.values:
1717                    idx = line_template.find(VARIABLE_TAG)
1718                    line += line_template[:idx]
1719                    line_template = line_template[idx + len(VARIABLE_TAG) :]
1720
1721                    key = (mapping[value.var], nameless_value.check_key)
1722                    is_local_def = nameless_value.is_local_def_ir_value()
1723                    if is_local_def:
1724                        if mapping[value.var] in vars_seen:
1725                            line += f"[[{mapping[value.var]}]]"
1726                        else:
1727                            line += f"[[{mapping[value.var]}:{value.prefix}{value.nameless_value.get_ir_regex()}]]"
1728                            vars_seen.add(mapping[value.var])
1729                    else:
1730                        raise RuntimeError("not implemented")
1731
1732                line += line_template
1733
1734                lines[i] = line
1735
1736    if is_analyze:
1737        for i, _ in enumerate(lines):
1738            # Escape multiple {{ or }} as {{}} denotes a FileCheck regex.
1739            scrubbed_line = multiple_braces_re.sub(escape_braces, lines[i])
1740            lines[i] = scrubbed_line
1741
1742    return lines
1743
1744
1745# Replace IR value defs and uses with FileCheck variables.
1746def generalize_check_lines(
1747    lines, is_analyze, vars_seen, global_vars_seen, preserve_names, original_check_lines
1748):
1749    return generalize_check_lines_common(
1750        lines,
1751        is_analyze,
1752        vars_seen,
1753        global_vars_seen,
1754        ir_nameless_values,
1755        IR_VALUE_RE,
1756        False,
1757        preserve_names,
1758        original_check_lines=original_check_lines,
1759    )
1760
1761
1762def generalize_global_check_line(line, preserve_names, global_vars_seen):
1763    [new_line] = generalize_check_lines_common(
1764        [line],
1765        False,
1766        set(),
1767        global_vars_seen,
1768        global_nameless_values_w_unstable_ids,
1769        GLOBAL_VALUE_RE,
1770        False,
1771        preserve_names,
1772    )
1773    return new_line
1774
1775
1776def generalize_asm_check_lines(lines, vars_seen, global_vars_seen):
1777    return generalize_check_lines_common(
1778        lines,
1779        False,
1780        vars_seen,
1781        global_vars_seen,
1782        asm_nameless_values,
1783        ASM_VALUE_RE,
1784        True,
1785        False,
1786    )
1787
1788
1789def generalize_analyze_check_lines(lines, vars_seen, global_vars_seen):
1790    return generalize_check_lines_common(
1791        lines,
1792        True,
1793        vars_seen,
1794        global_vars_seen,
1795        analyze_nameless_values,
1796        ANALYZE_VALUE_RE,
1797        False,
1798        False,
1799    )
1800
1801
1802def add_checks(
1803    output_lines,
1804    comment_marker,
1805    prefix_list,
1806    func_dict,
1807    func_name,
1808    check_label_format,
1809    is_backend,
1810    is_analyze,
1811    version,
1812    global_vars_seen_dict,
1813    is_filtered,
1814    preserve_names=False,
1815    original_check_lines: Mapping[str, List[str]] = {},
1816):
1817    # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well.
1818    prefix_exclusions = set()
1819    printed_prefixes = []
1820    for p in prefix_list:
1821        checkprefixes = p[0]
1822        # If not all checkprefixes of this run line produced the function we cannot check for it as it does not
1823        # exist for this run line. A subset of the check prefixes might know about the function but only because
1824        # other run lines created it.
1825        if any(
1826            map(
1827                lambda checkprefix: func_name not in func_dict[checkprefix],
1828                checkprefixes,
1829            )
1830        ):
1831            prefix_exclusions |= set(checkprefixes)
1832            continue
1833
1834    # prefix_exclusions is constructed, we can now emit the output
1835    for p in prefix_list:
1836        global_vars_seen = {}
1837        checkprefixes = p[0]
1838        for checkprefix in checkprefixes:
1839            if checkprefix in global_vars_seen_dict:
1840                global_vars_seen.update(global_vars_seen_dict[checkprefix])
1841            else:
1842                global_vars_seen_dict[checkprefix] = {}
1843            if checkprefix in printed_prefixes:
1844                break
1845
1846            # Check if the prefix is excluded.
1847            if checkprefix in prefix_exclusions:
1848                continue
1849
1850            # If we do not have output for this prefix we skip it.
1851            if not func_dict[checkprefix][func_name]:
1852                continue
1853
1854            # Add some space between different check prefixes, but not after the last
1855            # check line (before the test code).
1856            if is_backend:
1857                if len(printed_prefixes) != 0:
1858                    output_lines.append(comment_marker)
1859
1860            if checkprefix not in global_vars_seen_dict:
1861                global_vars_seen_dict[checkprefix] = {}
1862
1863            global_vars_seen_before = [key for key in global_vars_seen.keys()]
1864
1865            vars_seen = set()
1866            printed_prefixes.append(checkprefix)
1867            attrs = str(func_dict[checkprefix][func_name].attrs)
1868            attrs = "" if attrs == "None" else attrs
1869            if version > 1:
1870                funcdef_attrs_and_ret = func_dict[checkprefix][
1871                    func_name
1872                ].funcdef_attrs_and_ret
1873            else:
1874                funcdef_attrs_and_ret = ""
1875
1876            if attrs:
1877                output_lines.append(
1878                    "%s %s: Function Attrs: %s" % (comment_marker, checkprefix, attrs)
1879                )
1880            args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig)
1881            if args_and_sig:
1882                args_and_sig = generalize_check_lines(
1883                    [args_and_sig],
1884                    is_analyze,
1885                    vars_seen,
1886                    global_vars_seen,
1887                    preserve_names,
1888                    original_check_lines=[],
1889                )[0]
1890            func_name_separator = func_dict[checkprefix][func_name].func_name_separator
1891            if "[[" in args_and_sig:
1892                # Captures in label lines are not supported, thus split into a -LABEL
1893                # and a separate -SAME line that contains the arguments with captures.
1894                args_and_sig_prefix = ""
1895                if version >= 3 and args_and_sig.startswith("("):
1896                    # Ensure the "(" separating function name and arguments is in the
1897                    # label line. This is required in case of function names that are
1898                    # prefixes of each other. Otherwise, the label line for "foo" might
1899                    # incorrectly match on "foo.specialized".
1900                    args_and_sig_prefix = args_and_sig[0]
1901                    args_and_sig = args_and_sig[1:]
1902
1903                # Removing args_and_sig from the label match line requires
1904                # func_name_separator to be empty. Otherwise, the match will not work.
1905                assert func_name_separator == ""
1906                output_lines.append(
1907                    check_label_format
1908                    % (
1909                        checkprefix,
1910                        funcdef_attrs_and_ret,
1911                        func_name,
1912                        args_and_sig_prefix,
1913                        func_name_separator,
1914                    )
1915                )
1916                output_lines.append(
1917                    "%s %s-SAME: %s" % (comment_marker, checkprefix, args_and_sig)
1918                )
1919            else:
1920                output_lines.append(
1921                    check_label_format
1922                    % (
1923                        checkprefix,
1924                        funcdef_attrs_and_ret,
1925                        func_name,
1926                        args_and_sig,
1927                        func_name_separator,
1928                    )
1929                )
1930            func_body = str(func_dict[checkprefix][func_name]).splitlines()
1931            if not func_body:
1932                # We have filtered everything.
1933                continue
1934
1935            # For ASM output, just emit the check lines.
1936            if is_backend:
1937                body_start = 1
1938                if is_filtered:
1939                    # For filtered output we don't add "-NEXT" so don't add extra spaces
1940                    # before the first line.
1941                    body_start = 0
1942                else:
1943                    output_lines.append(
1944                        "%s %s:       %s" % (comment_marker, checkprefix, func_body[0])
1945                    )
1946                func_lines = generalize_asm_check_lines(
1947                    func_body[body_start:], vars_seen, global_vars_seen
1948                )
1949                for func_line in func_lines:
1950                    if func_line.strip() == "":
1951                        output_lines.append(
1952                            "%s %s-EMPTY:" % (comment_marker, checkprefix)
1953                        )
1954                    else:
1955                        check_suffix = "-NEXT" if not is_filtered else ""
1956                        output_lines.append(
1957                            "%s %s%s:  %s"
1958                            % (comment_marker, checkprefix, check_suffix, func_line)
1959                        )
1960                # Remember new global variables we have not seen before
1961                for key in global_vars_seen:
1962                    if key not in global_vars_seen_before:
1963                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
1964                break
1965            # For analyze output, generalize the output, and emit CHECK-EMPTY lines as well.
1966            elif is_analyze:
1967                func_body = generalize_analyze_check_lines(
1968                    func_body, vars_seen, global_vars_seen
1969                )
1970                for func_line in func_body:
1971                    if func_line.strip() == "":
1972                        output_lines.append(
1973                            "{} {}-EMPTY:".format(comment_marker, checkprefix)
1974                        )
1975                    else:
1976                        check_suffix = "-NEXT" if not is_filtered else ""
1977                        output_lines.append(
1978                            "{} {}{}:  {}".format(
1979                                comment_marker, checkprefix, check_suffix, func_line
1980                            )
1981                        )
1982
1983                # Add space between different check prefixes and also before the first
1984                # line of code in the test function.
1985                output_lines.append(comment_marker)
1986
1987                # Remember new global variables we have not seen before
1988                for key in global_vars_seen:
1989                    if key not in global_vars_seen_before:
1990                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
1991                break
1992            # For IR output, change all defs to FileCheck variables, so we're immune
1993            # to variable naming fashions.
1994            else:
1995                func_body = generalize_check_lines(
1996                    func_body,
1997                    False,
1998                    vars_seen,
1999                    global_vars_seen,
2000                    preserve_names,
2001                    original_check_lines=original_check_lines.get(checkprefix),
2002                )
2003
2004                # This could be selectively enabled with an optional invocation argument.
2005                # Disabled for now: better to check everything. Be safe rather than sorry.
2006
2007                # Handle the first line of the function body as a special case because
2008                # it's often just noise (a useless asm comment or entry label).
2009                # if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
2010                #  is_blank_line = True
2011                # else:
2012                #  output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
2013                #  is_blank_line = False
2014
2015                is_blank_line = False
2016
2017                for func_line in func_body:
2018                    if func_line.strip() == "":
2019                        is_blank_line = True
2020                        continue
2021                    # Do not waste time checking IR comments.
2022                    func_line = SCRUB_IR_COMMENT_RE.sub(r"", func_line)
2023
2024                    # Skip blank lines instead of checking them.
2025                    if is_blank_line:
2026                        output_lines.append(
2027                            "{} {}:       {}".format(
2028                                comment_marker, checkprefix, func_line
2029                            )
2030                        )
2031                    else:
2032                        check_suffix = "-NEXT" if not is_filtered else ""
2033                        output_lines.append(
2034                            "{} {}{}:  {}".format(
2035                                comment_marker, checkprefix, check_suffix, func_line
2036                            )
2037                        )
2038                    is_blank_line = False
2039
2040                # Add space between different check prefixes and also before the first
2041                # line of code in the test function.
2042                output_lines.append(comment_marker)
2043
2044                # Remember new global variables we have not seen before
2045                for key in global_vars_seen:
2046                    if key not in global_vars_seen_before:
2047                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2048                break
2049    return printed_prefixes
2050
2051
2052def add_ir_checks(
2053    output_lines,
2054    comment_marker,
2055    prefix_list,
2056    func_dict,
2057    func_name,
2058    preserve_names,
2059    function_sig,
2060    version,
2061    global_vars_seen_dict,
2062    is_filtered,
2063    original_check_lines={},
2064):
2065    # Label format is based on IR string.
2066    if function_sig and version > 1:
2067        function_def_regex = "define %s"
2068    elif function_sig:
2069        function_def_regex = "define {{[^@]+}}%s"
2070    else:
2071        function_def_regex = "%s"
2072    check_label_format = "{} %s-LABEL: {}@%s%s%s".format(
2073        comment_marker, function_def_regex
2074    )
2075    return add_checks(
2076        output_lines,
2077        comment_marker,
2078        prefix_list,
2079        func_dict,
2080        func_name,
2081        check_label_format,
2082        False,
2083        False,
2084        version,
2085        global_vars_seen_dict,
2086        is_filtered,
2087        preserve_names,
2088        original_check_lines=original_check_lines,
2089    )
2090
2091
2092def add_analyze_checks(
2093    output_lines, comment_marker, prefix_list, func_dict, func_name, is_filtered
2094):
2095    check_label_format = "{} %s-LABEL: '%s%s%s%s'".format(comment_marker)
2096    global_vars_seen_dict = {}
2097    return add_checks(
2098        output_lines,
2099        comment_marker,
2100        prefix_list,
2101        func_dict,
2102        func_name,
2103        check_label_format,
2104        False,
2105        True,
2106        1,
2107        global_vars_seen_dict,
2108        is_filtered,
2109    )
2110
2111
2112def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes):
2113    for nameless_value in itertools.chain(global_nameless_values, asm_nameless_values):
2114        if nameless_value.global_ir_rhs_regexp is None:
2115            continue
2116
2117        lhs_re_str = nameless_value.ir_prefix + nameless_value.ir_regexp
2118        rhs_re_str = nameless_value.global_ir_rhs_regexp
2119
2120        global_ir_value_re_str = r"^" + lhs_re_str + r"\s=\s" + rhs_re_str + r"$"
2121        global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M))
2122        lines = []
2123        for m in global_ir_value_re.finditer(raw_tool_output):
2124            # Attach the substring's start index so that CHECK lines
2125            # can be sorted properly even if they are matched by different nameless values.
2126            # This is relevant for GLOB and GLOBNAMED since they may appear interlaced.
2127            lines.append((m.start(), m.group(0)))
2128
2129        for prefix in prefixes:
2130            if glob_val_dict[prefix] is None:
2131                continue
2132            if nameless_value.check_prefix in glob_val_dict[prefix]:
2133                if lines == glob_val_dict[prefix][nameless_value.check_prefix]:
2134                    continue
2135                if prefix == prefixes[-1]:
2136                    warn("Found conflicting asm under the same prefix: %r!" % (prefix,))
2137                else:
2138                    glob_val_dict[prefix][nameless_value.check_prefix] = None
2139                    continue
2140            glob_val_dict[prefix][nameless_value.check_prefix] = lines
2141
2142
2143def filter_globals_according_to_preference(
2144    global_val_lines_w_index, global_vars_seen, nameless_value, global_check_setting
2145):
2146    if global_check_setting == "none":
2147        return []
2148    if global_check_setting == "all":
2149        return global_val_lines_w_index
2150    assert global_check_setting == "smart"
2151
2152    if nameless_value.check_key == "#":
2153        # attribute sets are usually better checked by --check-attributes
2154        return []
2155
2156    def extract(line, nv):
2157        p = (
2158            "^"
2159            + nv.ir_prefix
2160            + "("
2161            + nv.ir_regexp
2162            + ") = ("
2163            + nv.global_ir_rhs_regexp
2164            + ")"
2165        )
2166        match = re.match(p, line)
2167        return (match.group(1), re.findall(nv.ir_regexp, match.group(2)))
2168
2169    transitively_visible = set()
2170    contains_refs_to = {}
2171
2172    def add(var):
2173        nonlocal transitively_visible
2174        nonlocal contains_refs_to
2175        if var in transitively_visible:
2176            return
2177        transitively_visible.add(var)
2178        if not var in contains_refs_to:
2179            return
2180        for x in contains_refs_to[var]:
2181            add(x)
2182
2183    for i, line in global_val_lines_w_index:
2184        (var, refs) = extract(line, nameless_value)
2185        contains_refs_to[var] = refs
2186    for var, check_key in global_vars_seen:
2187        if check_key != nameless_value.check_key:
2188            continue
2189        add(var)
2190    return [
2191        (i, line)
2192        for i, line in global_val_lines_w_index
2193        if extract(line, nameless_value)[0] in transitively_visible
2194    ]
2195
2196
2197METADATA_FILTERS = [
2198    (
2199        r"(?<=\")(.+ )?(\w+ version )[\d.]+(?:[^\" ]*)(?: \([^)]+\))?",
2200        r"{{.*}}\2{{.*}}",
2201    ),  # preface with glob also, to capture optional CLANG_VENDOR
2202    (r'(!DIFile\(filename: ".+", directory: )".+"', r"\1{{.*}}"),
2203]
2204METADATA_FILTERS_RE = [(re.compile(f), r) for (f, r) in METADATA_FILTERS]
2205
2206
2207def filter_unstable_metadata(line):
2208    for f, replacement in METADATA_FILTERS_RE:
2209        line = f.sub(replacement, line)
2210    return line
2211
2212
2213def flush_current_checks(output_lines, new_lines_w_index, comment_marker):
2214    if not new_lines_w_index:
2215        return
2216    output_lines.append(comment_marker + SEPARATOR)
2217    new_lines_w_index.sort()
2218    for _, line in new_lines_w_index:
2219        output_lines.append(line)
2220    new_lines_w_index.clear()
2221
2222
2223def add_global_checks(
2224    glob_val_dict,
2225    comment_marker,
2226    prefix_list,
2227    output_lines,
2228    global_vars_seen_dict,
2229    preserve_names,
2230    is_before_functions,
2231    global_check_setting,
2232):
2233    printed_prefixes = set()
2234    output_lines_loc = {}  # Allows GLOB and GLOBNAMED to be sorted correctly
2235    for nameless_value in global_nameless_values:
2236        if nameless_value.is_before_functions != is_before_functions:
2237            continue
2238        for p in prefix_list:
2239            global_vars_seen = {}
2240            checkprefixes = p[0]
2241            if checkprefixes is None:
2242                continue
2243            for checkprefix in checkprefixes:
2244                if checkprefix in global_vars_seen_dict:
2245                    global_vars_seen.update(global_vars_seen_dict[checkprefix])
2246                else:
2247                    global_vars_seen_dict[checkprefix] = {}
2248                if (checkprefix, nameless_value.check_prefix) in printed_prefixes:
2249                    break
2250                if not glob_val_dict[checkprefix]:
2251                    continue
2252                if nameless_value.check_prefix not in glob_val_dict[checkprefix]:
2253                    continue
2254                if not glob_val_dict[checkprefix][nameless_value.check_prefix]:
2255                    continue
2256
2257                check_lines = []
2258                global_vars_seen_before = [key for key in global_vars_seen.keys()]
2259                lines_w_index = glob_val_dict[checkprefix][nameless_value.check_prefix]
2260                lines_w_index = filter_globals_according_to_preference(
2261                    lines_w_index,
2262                    global_vars_seen_before,
2263                    nameless_value,
2264                    global_check_setting,
2265                )
2266                for i, line in lines_w_index:
2267                    if _global_value_regex:
2268                        matched = False
2269                        for regex in _global_value_regex:
2270                            if re.match("^@" + regex + " = ", line) or re.match(
2271                                "^!" + regex + " = ", line
2272                            ):
2273                                matched = True
2274                                break
2275                        if not matched:
2276                            continue
2277                    new_line = generalize_global_check_line(
2278                        line, preserve_names, global_vars_seen
2279                    )
2280                    new_line = filter_unstable_metadata(new_line)
2281                    check_line = "%s %s: %s" % (comment_marker, checkprefix, new_line)
2282                    check_lines.append((i, check_line))
2283                if not check_lines:
2284                    continue
2285
2286                if not checkprefix in output_lines_loc:
2287                    output_lines_loc[checkprefix] = []
2288                if not nameless_value.interlaced_with_previous:
2289                    flush_current_checks(
2290                        output_lines, output_lines_loc[checkprefix], comment_marker
2291                    )
2292                for check_line in check_lines:
2293                    output_lines_loc[checkprefix].append(check_line)
2294
2295                printed_prefixes.add((checkprefix, nameless_value.check_prefix))
2296
2297                # Remembe new global variables we have not seen before
2298                for key in global_vars_seen:
2299                    if key not in global_vars_seen_before:
2300                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
2301                break
2302
2303    if printed_prefixes:
2304        for p in prefix_list:
2305            if p[0] is None:
2306                continue
2307            for checkprefix in p[0]:
2308                if checkprefix not in output_lines_loc:
2309                    continue
2310                flush_current_checks(
2311                    output_lines, output_lines_loc[checkprefix], comment_marker
2312                )
2313                break
2314        output_lines.append(comment_marker + SEPARATOR)
2315    return printed_prefixes
2316
2317
2318def check_prefix(prefix):
2319    if not PREFIX_RE.match(prefix):
2320        hint = ""
2321        if "," in prefix:
2322            hint = " Did you mean '--check-prefixes=" + prefix + "'?"
2323        warn(
2324            (
2325                "Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores."
2326                + hint
2327            )
2328            % (prefix)
2329        )
2330
2331
2332def get_check_prefixes(filecheck_cmd):
2333    check_prefixes = [
2334        item
2335        for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
2336        for item in m.group(1).split(",")
2337    ]
2338    if not check_prefixes:
2339        check_prefixes = ["CHECK"]
2340    return check_prefixes
2341
2342
2343def verify_filecheck_prefixes(fc_cmd):
2344    fc_cmd_parts = fc_cmd.split()
2345    for part in fc_cmd_parts:
2346        if "check-prefix=" in part:
2347            prefix = part.split("=", 1)[1]
2348            check_prefix(prefix)
2349        elif "check-prefixes=" in part:
2350            prefixes = part.split("=", 1)[1].split(",")
2351            for prefix in prefixes:
2352                check_prefix(prefix)
2353                if prefixes.count(prefix) > 1:
2354                    warn(
2355                        "Supplied prefix '%s' is not unique in the prefix list."
2356                        % (prefix,)
2357                    )
2358
2359
2360def get_autogennote_suffix(parser, args):
2361    autogenerated_note_args = ""
2362    for action in parser._actions:
2363        if not hasattr(args, action.dest):
2364            continue  # Ignore options such as --help that aren't included in args
2365        # Ignore parameters such as paths to the binary or the list of tests
2366        if action.dest in (
2367            "tests",
2368            "update_only",
2369            "tool_binary",
2370            "opt_binary",
2371            "llc_binary",
2372            "clang",
2373            "opt",
2374            "llvm_bin",
2375            "verbose",
2376            "force_update",
2377            "reset_variable_names",
2378        ):
2379            continue
2380        value = getattr(args, action.dest)
2381        if action.dest == "check_globals":
2382            default_value = "none" if args.version < 4 else "smart"
2383            if value == default_value:
2384                continue
2385            autogenerated_note_args += action.option_strings[0] + " "
2386            if args.version < 4 and value == "all":
2387                continue
2388            autogenerated_note_args += "%s " % value
2389            continue
2390        if action.const is not None:  # action stores a constant (usually True/False)
2391            # Skip actions with different constant values (this happens with boolean
2392            # --foo/--no-foo options)
2393            if value != action.const:
2394                continue
2395        if parser.get_default(action.dest) == value:
2396            continue  # Don't add default values
2397        if action.dest == "function_signature" and args.version >= 2:
2398            continue  # Enabled by default in version 2
2399        if action.dest == "filters":
2400            # Create a separate option for each filter element.  The value is a list
2401            # of Filter objects.
2402            for elem in value:
2403                opt_name = "filter-out" if elem.is_filter_out else "filter"
2404                opt_value = elem.pattern()
2405                new_arg = '--%s "%s" ' % (opt_name, opt_value.strip('"'))
2406                if new_arg not in autogenerated_note_args:
2407                    autogenerated_note_args += new_arg
2408        else:
2409            autogenerated_note_args += action.option_strings[0] + " "
2410            if action.const is None:  # action takes a parameter
2411                if action.nargs == "+":
2412                    value = " ".join(map(lambda v: '"' + v.strip('"') + '"', value))
2413                autogenerated_note_args += "%s " % value
2414    if autogenerated_note_args:
2415        autogenerated_note_args = " %s %s" % (
2416            UTC_ARGS_KEY,
2417            autogenerated_note_args[:-1],
2418        )
2419    return autogenerated_note_args
2420
2421
2422def check_for_command(line, parser, args, argv, argparse_callback):
2423    cmd_m = UTC_ARGS_CMD.match(line)
2424    if cmd_m:
2425        for option in shlex.split(cmd_m.group("cmd").strip()):
2426            if option:
2427                argv.append(option)
2428        args = parse_args(parser, filter(lambda arg: arg not in args.tests, argv))
2429        if argparse_callback is not None:
2430            argparse_callback(args)
2431    return args, argv
2432
2433
2434def find_arg_in_test(test_info, get_arg_to_check, arg_string, is_global):
2435    result = get_arg_to_check(test_info.args)
2436    if not result and is_global:
2437        # See if this has been specified via UTC_ARGS.  This is a "global" option
2438        # that affects the entire generation of test checks.  If it exists anywhere
2439        # in the test, apply it to everything.
2440        saw_line = False
2441        for line_info in test_info.ro_iterlines():
2442            line = line_info.line
2443            if not line.startswith(";") and line.strip() != "":
2444                saw_line = True
2445            result = get_arg_to_check(line_info.args)
2446            if result:
2447                if warn and saw_line:
2448                    # We saw the option after already reading some test input lines.
2449                    # Warn about it.
2450                    print(
2451                        "WARNING: Found {} in line following test start: ".format(
2452                            arg_string
2453                        )
2454                        + line,
2455                        file=sys.stderr,
2456                    )
2457                    print(
2458                        "WARNING: Consider moving {} to top of file".format(arg_string),
2459                        file=sys.stderr,
2460                    )
2461                break
2462    return result
2463
2464
2465def dump_input_lines(output_lines, test_info, prefix_set, comment_string):
2466    for input_line_info in test_info.iterlines(output_lines):
2467        line = input_line_info.line
2468        args = input_line_info.args
2469        if line.strip() == comment_string:
2470            continue
2471        if line.strip() == comment_string + SEPARATOR:
2472            continue
2473        if line.lstrip().startswith(comment_string):
2474            m = CHECK_RE.match(line)
2475            if m and m.group(1) in prefix_set:
2476                continue
2477        output_lines.append(line.rstrip("\n"))
2478
2479
2480def add_checks_at_end(
2481    output_lines, prefix_list, func_order, comment_string, check_generator
2482):
2483    added = set()
2484    generated_prefixes = set()
2485    for prefix in prefix_list:
2486        prefixes = prefix[0]
2487        tool_args = prefix[1]
2488        for prefix in prefixes:
2489            for func in func_order[prefix]:
2490                # The func order can contain the same functions multiple times.
2491                # If we see one again we are done.
2492                if (func, prefix) in added:
2493                    continue
2494                if added:
2495                    output_lines.append(comment_string)
2496
2497                # The add_*_checks routines expect a run list whose items are
2498                # tuples that have a list of prefixes as their first element and
2499                # tool command args string as their second element.  They output
2500                # checks for each prefix in the list of prefixes.  By doing so, it
2501                # implicitly assumes that for each function every run line will
2502                # generate something for that function.  That is not the case for
2503                # generated functions as some run lines might not generate them
2504                # (e.g. -fopenmp vs. no -fopenmp).
2505                #
2506                # Therefore, pass just the prefix we're interested in.  This has
2507                # the effect of generating all of the checks for functions of a
2508                # single prefix before moving on to the next prefix.  So checks
2509                # are ordered by prefix instead of by function as in "normal"
2510                # mode.
2511                for generated_prefix in check_generator(
2512                    output_lines, [([prefix], tool_args)], func
2513                ):
2514                    added.add((func, generated_prefix))
2515                    generated_prefixes.add(generated_prefix)
2516    return generated_prefixes
2517