xref: /llvm-project/llvm/utils/UpdateTestChecks/common.py (revision c2bed2a1703a8187c231fdb2926714e877221d85)
1from __future__ import print_function
2
3import argparse
4import copy
5import glob
6import itertools
7import os
8import re
9import subprocess
10import sys
11import shlex
12
13from typing import List
14
15##### Common utilities for update_*test_checks.py
16
17
18_verbose = False
19_prefix_filecheck_ir_name = ""
20
21"""
22Version changelog:
23
241: Initial version, used by tests that don't specify --version explicitly.
252: --function-signature is now enabled by default and also checks return
26   type/attributes.
273: --check-globals now has a third option ('smart'). The others are now called
28   'none' and 'all'. 'smart' is the default.
29"""
30DEFAULT_VERSION = 3
31
32
33class Regex(object):
34    """Wrap a compiled regular expression object to allow deep copy of a regexp.
35    This is required for the deep copy done in do_scrub.
36
37    """
38
39    def __init__(self, regex):
40        self.regex = regex
41
42    def __deepcopy__(self, memo):
43        result = copy.copy(self)
44        result.regex = self.regex
45        return result
46
47    def search(self, line):
48        return self.regex.search(line)
49
50    def sub(self, repl, line):
51        return self.regex.sub(repl, line)
52
53    def pattern(self):
54        return self.regex.pattern
55
56    def flags(self):
57        return self.regex.flags
58
59
60class Filter(Regex):
61    """Augment a Regex object with a flag indicating whether a match should be
62    added (!is_filter_out) or removed (is_filter_out) from the generated checks.
63
64    """
65
66    def __init__(self, regex, is_filter_out):
67        super(Filter, self).__init__(regex)
68        self.is_filter_out = is_filter_out
69
70    def __deepcopy__(self, memo):
71        result = copy.deepcopy(super(Filter, self), memo)
72        result.is_filter_out = copy.deepcopy(self.is_filter_out, memo)
73        return result
74
75
76def parse_commandline_args(parser):
77    class RegexAction(argparse.Action):
78        """Add a regular expression option value to a list of regular expressions.
79        This compiles the expression, wraps it in a Regex and adds it to the option
80        value list."""
81
82        def __init__(self, option_strings, dest, nargs=None, **kwargs):
83            if nargs is not None:
84                raise ValueError("nargs not allowed")
85            super(RegexAction, self).__init__(option_strings, dest, **kwargs)
86
87        def do_call(self, namespace, values, flags):
88            value_list = getattr(namespace, self.dest)
89            if value_list is None:
90                value_list = []
91
92            try:
93                value_list.append(Regex(re.compile(values, flags)))
94            except re.error as error:
95                raise ValueError(
96                    "{}: Invalid regular expression '{}' ({})".format(
97                        option_string, error.pattern, error.msg
98                    )
99                )
100
101            setattr(namespace, self.dest, value_list)
102
103        def __call__(self, parser, namespace, values, option_string=None):
104            self.do_call(namespace, values, 0)
105
106    class FilterAction(RegexAction):
107        """Add a filter to a list of filter option values."""
108
109        def __init__(self, option_strings, dest, nargs=None, **kwargs):
110            super(FilterAction, self).__init__(option_strings, dest, nargs, **kwargs)
111
112        def __call__(self, parser, namespace, values, option_string=None):
113            super(FilterAction, self).__call__(parser, namespace, values, option_string)
114
115            value_list = getattr(namespace, self.dest)
116
117            is_filter_out = option_string == "--filter-out"
118
119            value_list[-1] = Filter(value_list[-1].regex, is_filter_out)
120
121            setattr(namespace, self.dest, value_list)
122
123    filter_group = parser.add_argument_group(
124        "filtering",
125        """Filters are applied to each output line according to the order given. The
126    first matching filter terminates filter processing for that current line.""",
127    )
128
129    filter_group.add_argument(
130        "--filter",
131        action=FilterAction,
132        dest="filters",
133        metavar="REGEX",
134        help="Only include lines matching REGEX (may be specified multiple times)",
135    )
136    filter_group.add_argument(
137        "--filter-out",
138        action=FilterAction,
139        dest="filters",
140        metavar="REGEX",
141        help="Exclude lines matching REGEX",
142    )
143
144    parser.add_argument(
145        "--include-generated-funcs",
146        action="store_true",
147        help="Output checks for functions not in source",
148    )
149    parser.add_argument(
150        "-v", "--verbose", action="store_true", help="Show verbose output"
151    )
152    parser.add_argument(
153        "-u",
154        "--update-only",
155        action="store_true",
156        help="Only update test if it was already autogened",
157    )
158    parser.add_argument(
159        "--force-update",
160        action="store_true",
161        help="Update test even if it was autogened by a different script",
162    )
163    parser.add_argument(
164        "--enable",
165        action="store_true",
166        dest="enabled",
167        default=True,
168        help="Activate CHECK line generation from this point forward",
169    )
170    parser.add_argument(
171        "--disable",
172        action="store_false",
173        dest="enabled",
174        help="Deactivate CHECK line generation from this point forward",
175    )
176    parser.add_argument(
177        "--replace-value-regex",
178        nargs="+",
179        default=[],
180        help="List of regular expressions to replace matching value names",
181    )
182    parser.add_argument(
183        "--prefix-filecheck-ir-name",
184        default="",
185        help="Add a prefix to FileCheck IR value names to avoid conflicts with scripted names",
186    )
187    parser.add_argument(
188        "--global-value-regex",
189        nargs="+",
190        default=[],
191        help="List of regular expressions that a global value declaration must match to generate a check (has no effect if checking globals is not enabled)",
192    )
193    parser.add_argument(
194        "--global-hex-value-regex",
195        nargs="+",
196        default=[],
197        help="List of regular expressions such that, for matching global value declarations, literal integer values should be encoded in hex in the associated FileCheck directives",
198    )
199    # FIXME: in 3.9, we can use argparse.BooleanOptionalAction. At that point,
200    # we need to rename the flag to just -generate-body-for-unused-prefixes.
201    parser.add_argument(
202        "--no-generate-body-for-unused-prefixes",
203        action="store_false",
204        dest="gen_unused_prefix_body",
205        default=True,
206        help="Generate a function body that always matches for unused prefixes. This is useful when unused prefixes are desired, and it avoids needing to annotate each FileCheck as allowing them.",
207    )
208    # This is the default when regenerating existing tests. The default when
209    # generating new tests is determined by DEFAULT_VERSION.
210    parser.add_argument(
211        "--version", type=int, default=1, help="The version of output format"
212    )
213    args = parser.parse_args()
214    global _verbose, _global_value_regex, _global_hex_value_regex
215    _verbose = args.verbose
216    _global_value_regex = args.global_value_regex
217    _global_hex_value_regex = args.global_hex_value_regex
218    return args
219
220
221def parse_args(parser, argv):
222    args = parser.parse_args(argv)
223    if args.version >= 2:
224        args.function_signature = True
225    if "check_globals" in args and args.check_globals == "default":
226        args.check_globals = "none" if args.version < 3 else "smart"
227    return args
228
229
230class InputLineInfo(object):
231    def __init__(self, line, line_number, args, argv):
232        self.line = line
233        self.line_number = line_number
234        self.args = args
235        self.argv = argv
236
237
238class TestInfo(object):
239    def __init__(
240        self,
241        test,
242        parser,
243        script_name,
244        input_lines,
245        args,
246        argv,
247        comment_prefix,
248        argparse_callback,
249    ):
250        self.parser = parser
251        self.argparse_callback = argparse_callback
252        self.path = test
253        self.args = args
254        if args.prefix_filecheck_ir_name:
255            global _prefix_filecheck_ir_name
256            _prefix_filecheck_ir_name = args.prefix_filecheck_ir_name
257        self.argv = argv
258        self.input_lines = input_lines
259        self.run_lines = find_run_lines(test, self.input_lines)
260        self.comment_prefix = comment_prefix
261        if self.comment_prefix is None:
262            if self.path.endswith(".mir"):
263                self.comment_prefix = "#"
264            else:
265                self.comment_prefix = ";"
266        self.autogenerated_note_prefix = self.comment_prefix + " " + UTC_ADVERT
267        self.test_autogenerated_note = self.autogenerated_note_prefix + script_name
268        self.test_autogenerated_note += get_autogennote_suffix(parser, self.args)
269        self.test_unused_note = (
270            self.comment_prefix + self.comment_prefix + " " + UNUSED_NOTE
271        )
272
273    def ro_iterlines(self):
274        for line_num, input_line in enumerate(self.input_lines):
275            args, argv = check_for_command(
276                input_line, self.parser, self.args, self.argv, self.argparse_callback
277            )
278            yield InputLineInfo(input_line, line_num, args, argv)
279
280    def iterlines(self, output_lines):
281        output_lines.append(self.test_autogenerated_note)
282        for line_info in self.ro_iterlines():
283            input_line = line_info.line
284            # Discard any previous script advertising.
285            if input_line.startswith(self.autogenerated_note_prefix):
286                continue
287            self.args = line_info.args
288            self.argv = line_info.argv
289            if not self.args.enabled:
290                output_lines.append(input_line)
291                continue
292            yield line_info
293
294    def get_checks_for_unused_prefixes(
295        self, run_list, used_prefixes: List[str]
296    ) -> List[str]:
297        run_list = [element for element in run_list if element[0] is not None]
298        unused_prefixes = set(
299            [prefix for sublist in run_list for prefix in sublist[0]]
300        ).difference(set(used_prefixes))
301
302        ret = []
303        if not unused_prefixes:
304            return ret
305        ret.append(self.test_unused_note)
306        for unused in sorted(unused_prefixes):
307            ret.append(
308                "{comment} {prefix}: {match_everything}".format(
309                    comment=self.comment_prefix,
310                    prefix=unused,
311                    match_everything=r"""{{.*}}""",
312                )
313            )
314        return ret
315
316
317def itertests(
318    test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None
319):
320    for pattern in test_patterns:
321        # On Windows we must expand the patterns ourselves.
322        tests_list = glob.glob(pattern)
323        if not tests_list:
324            warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,))
325            continue
326        for test in tests_list:
327            with open(test) as f:
328                input_lines = [l.rstrip() for l in f]
329            first_line = input_lines[0] if input_lines else ""
330            if UTC_AVOID in first_line:
331              warn("Skipping test that must not be autogenerated: " + test)
332              continue
333            is_regenerate = UTC_ADVERT in first_line
334
335            # If we're generating a new test, set the default version to the latest.
336            argv = sys.argv[:]
337            if not is_regenerate:
338                argv.insert(1, "--version=" + str(DEFAULT_VERSION))
339
340            args = parse_args(parser, argv[1:])
341            if argparse_callback is not None:
342                argparse_callback(args)
343            if is_regenerate:
344                if script_name not in first_line and not args.force_update:
345                    warn(
346                        "Skipping test which wasn't autogenerated by " + script_name,
347                        test,
348                    )
349                    continue
350                args, argv = check_for_command(
351                    first_line, parser, args, argv, argparse_callback
352                )
353            elif args.update_only:
354                assert UTC_ADVERT not in first_line
355                warn("Skipping test which isn't autogenerated: " + test)
356                continue
357            final_input_lines = []
358            for l in input_lines:
359                if UNUSED_NOTE in l:
360                    break
361                final_input_lines.append(l)
362            yield TestInfo(
363                test,
364                parser,
365                script_name,
366                final_input_lines,
367                args,
368                argv,
369                comment_prefix,
370                argparse_callback,
371            )
372
373
374def should_add_line_to_output(
375    input_line, prefix_set, skip_global_checks=False, comment_marker=";"
376):
377    # Skip any blank comment lines in the IR.
378    if not skip_global_checks and input_line.strip() == comment_marker:
379        return False
380    # Skip a special double comment line we use as a separator.
381    if input_line.strip() == comment_marker + SEPARATOR:
382        return False
383    # Skip any blank lines in the IR.
384    # if input_line.strip() == '':
385    #  return False
386    # And skip any CHECK lines. We're building our own.
387    m = CHECK_RE.match(input_line)
388    if m and m.group(1) in prefix_set:
389        if skip_global_checks:
390            global_ir_value_re = re.compile(r"\[\[", flags=(re.M))
391            return not global_ir_value_re.search(input_line)
392        return False
393
394    return True
395
396
397# Perform lit-like substitutions
398def getSubstitutions(sourcepath):
399    sourcedir = os.path.dirname(sourcepath)
400    return [
401        ("%s", sourcepath),
402        ("%S", sourcedir),
403        ("%p", sourcedir),
404        ("%{pathsep}", os.pathsep),
405    ]
406
407
408def applySubstitutions(s, substitutions):
409    for a, b in substitutions:
410        s = s.replace(a, b)
411    return s
412
413
414# Invoke the tool that is being tested.
415def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False):
416    with open(ir) as ir_file:
417        substitutions = getSubstitutions(ir)
418
419        # TODO Remove the str form which is used by update_test_checks.py and
420        # update_llc_test_checks.py
421        # The safer list form is used by update_cc_test_checks.py
422        if preprocess_cmd:
423            # Allow pre-processing the IR file (e.g. using sed):
424            assert isinstance(
425                preprocess_cmd, str
426            )  # TODO: use a list instead of using shell
427            preprocess_cmd = applySubstitutions(preprocess_cmd, substitutions).strip()
428            if verbose:
429                print(
430                    "Pre-processing input file: ",
431                    ir,
432                    " with command '",
433                    preprocess_cmd,
434                    "'",
435                    sep="",
436                    file=sys.stderr,
437                )
438            # Python 2.7 doesn't have subprocess.DEVNULL:
439            with open(os.devnull, "w") as devnull:
440                pp = subprocess.Popen(
441                    preprocess_cmd, shell=True, stdin=devnull, stdout=subprocess.PIPE
442                )
443                ir_file = pp.stdout
444
445        if isinstance(cmd_args, list):
446            args = [applySubstitutions(a, substitutions) for a in cmd_args]
447            stdout = subprocess.check_output([exe] + args, stdin=ir_file)
448        else:
449            stdout = subprocess.check_output(
450                exe + " " + applySubstitutions(cmd_args, substitutions),
451                shell=True,
452                stdin=ir_file,
453            )
454        if sys.version_info[0] > 2:
455            # FYI, if you crashed here with a decode error, your run line probably
456            # results in bitcode or other binary format being written to the pipe.
457            # For an opt test, you probably want to add -S or -disable-output.
458            stdout = stdout.decode()
459    # Fix line endings to unix CR style.
460    return stdout.replace("\r\n", "\n")
461
462
463##### LLVM IR parser
464RUN_LINE_RE = re.compile(r"^\s*(?://|[;#])\s*RUN:\s*(.*)$")
465CHECK_PREFIX_RE = re.compile(r"--?check-prefix(?:es)?[= ](\S+)")
466PREFIX_RE = re.compile("^[a-zA-Z0-9_-]+$")
467CHECK_RE = re.compile(
468    r"^\s*(?://|[;#])\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME|-EMPTY)?:"
469)
470
471UTC_ARGS_KEY = "UTC_ARGS:"
472UTC_ARGS_CMD = re.compile(r".*" + UTC_ARGS_KEY + "\s*(?P<cmd>.*)\s*$")
473UTC_ADVERT = "NOTE: Assertions have been autogenerated by "
474UTC_AVOID = "NOTE: Do not autogenerate"
475UNUSED_NOTE = "NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:"
476
477OPT_FUNCTION_RE = re.compile(
478    r"^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s():,]+?))?\s*define\s+(?P<funcdef_attrs_and_ret>[^@]*)@(?P<func>[\w.$-]+?)\s*"
479    r"(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P<body>.*?)^\}$",
480    flags=(re.M | re.S),
481)
482
483ANALYZE_FUNCTION_RE = re.compile(
484    r"^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.$-]+?)\':"
485    r"\s*\n(?P<body>.*)$",
486    flags=(re.X | re.S),
487)
488
489LV_DEBUG_RE = re.compile(
490    r"^\s*\'(?P<func>[\w.$-]+?)\'[^\n]*" r"\s*\n(?P<body>.*)$", flags=(re.X | re.S)
491)
492
493IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(')
494TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
495TRIPLE_ARG_RE = re.compile(r"-mtriple[= ]([^ ]+)")
496MARCH_ARG_RE = re.compile(r"-march[= ]([^ ]+)")
497DEBUG_ONLY_ARG_RE = re.compile(r"-debug-only[= ]([^ ]+)")
498
499SCRUB_LEADING_WHITESPACE_RE = re.compile(r"^(\s+)")
500SCRUB_WHITESPACE_RE = re.compile(r"(?!^(|  \w))[ \t]+", flags=re.M)
501SCRUB_TRAILING_WHITESPACE_RE = re.compile(r"[ \t]+$", flags=re.M)
502SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE
503SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(
504    r"([ \t]|(#[0-9]+))+$", flags=re.M
505)
506SCRUB_KILL_COMMENT_RE = re.compile(r"^ *#+ +kill:.*\n")
507SCRUB_LOOP_COMMENT_RE = re.compile(
508    r"# =>This Inner Loop Header:.*|# in Loop:.*", flags=re.M
509)
510SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r"(?<=\S)+[ \t]*#$", flags=re.M)
511
512SEPARATOR = "."
513
514
515def error(msg, test_file=None):
516    if test_file:
517        msg = "{}: {}".format(msg, test_file)
518    print("ERROR: {}".format(msg), file=sys.stderr)
519
520
521def warn(msg, test_file=None):
522    if test_file:
523        msg = "{}: {}".format(msg, test_file)
524    print("WARNING: {}".format(msg), file=sys.stderr)
525
526
527def debug(*args, **kwargs):
528    # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs):
529    if "file" not in kwargs:
530        kwargs["file"] = sys.stderr
531    if _verbose:
532        print(*args, **kwargs)
533
534
535def find_run_lines(test, lines):
536    debug("Scanning for RUN lines in test file:", test)
537    raw_lines = [m.group(1) for m in [RUN_LINE_RE.match(l) for l in lines] if m]
538    run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
539    for l in raw_lines[1:]:
540        if run_lines[-1].endswith("\\"):
541            run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
542        else:
543            run_lines.append(l)
544    debug("Found {} RUN lines in {}:".format(len(run_lines), test))
545    for l in run_lines:
546        debug("  RUN: {}".format(l))
547    return run_lines
548
549
550def get_triple_from_march(march):
551    triples = {
552        "amdgcn": "amdgcn",
553        "r600": "r600",
554        "mips": "mips",
555        "sparc": "sparc",
556        "hexagon": "hexagon",
557        "ve": "ve",
558    }
559    for prefix, triple in triples.items():
560        if march.startswith(prefix):
561            return triple
562    print("Cannot find a triple. Assume 'x86'", file=sys.stderr)
563    return "x86"
564
565
566def apply_filters(line, filters):
567    has_filter = False
568    for f in filters:
569        if not f.is_filter_out:
570            has_filter = True
571        if f.search(line):
572            return False if f.is_filter_out else True
573    # If we only used filter-out, keep the line, otherwise discard it since no
574    # filter matched.
575    return False if has_filter else True
576
577
578def do_filter(body, filters):
579    return (
580        body
581        if not filters
582        else "\n".join(
583            filter(lambda line: apply_filters(line, filters), body.splitlines())
584        )
585    )
586
587
588def scrub_body(body):
589    # Scrub runs of whitespace out of the assembly, but leave the leading
590    # whitespace in place.
591    body = SCRUB_WHITESPACE_RE.sub(r" ", body)
592    # Expand the tabs used for indentation.
593    body = str.expandtabs(body, 2)
594    # Strip trailing whitespace.
595    body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r"", body)
596    return body
597
598
599def do_scrub(body, scrubber, scrubber_args, extra):
600    if scrubber_args:
601        local_args = copy.deepcopy(scrubber_args)
602        local_args[0].extra_scrub = extra
603        return scrubber(body, *local_args)
604    return scrubber(body, *scrubber_args)
605
606
607# Build up a dictionary of all the function bodies.
608class function_body(object):
609    def __init__(
610        self,
611        string,
612        extra,
613        funcdef_attrs_and_ret,
614        args_and_sig,
615        attrs,
616        func_name_separator,
617    ):
618        self.scrub = string
619        self.extrascrub = extra
620        self.funcdef_attrs_and_ret = funcdef_attrs_and_ret
621        self.args_and_sig = args_and_sig
622        self.attrs = attrs
623        self.func_name_separator = func_name_separator
624
625    def is_same_except_arg_names(
626        self, extrascrub, funcdef_attrs_and_ret, args_and_sig, attrs, is_backend
627    ):
628        arg_names = set()
629
630        def drop_arg_names(match):
631            arg_names.add(match.group(variable_group_in_ir_value_match))
632            if match.group(attribute_group_in_ir_value_match):
633                attr = match.group(attribute_group_in_ir_value_match)
634            else:
635                attr = ""
636            return match.group(1) + attr + match.group(match.lastindex)
637
638        def repl_arg_names(match):
639            if (
640                match.group(variable_group_in_ir_value_match) is not None
641                and match.group(variable_group_in_ir_value_match) in arg_names
642            ):
643                return match.group(1) + match.group(match.lastindex)
644            return match.group(1) + match.group(2) + match.group(match.lastindex)
645
646        if self.funcdef_attrs_and_ret != funcdef_attrs_and_ret:
647            return False
648        if self.attrs != attrs:
649            return False
650        ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig)
651        ans1 = IR_VALUE_RE.sub(drop_arg_names, args_and_sig)
652        if ans0 != ans1:
653            return False
654        if is_backend:
655            # Check without replacements, the replacements are not applied to the
656            # body for backend checks.
657            return self.extrascrub == extrascrub
658
659        es0 = IR_VALUE_RE.sub(repl_arg_names, self.extrascrub)
660        es1 = IR_VALUE_RE.sub(repl_arg_names, extrascrub)
661        es0 = SCRUB_IR_COMMENT_RE.sub(r"", es0)
662        es1 = SCRUB_IR_COMMENT_RE.sub(r"", es1)
663        return es0 == es1
664
665    def __str__(self):
666        return self.scrub
667
668
669class FunctionTestBuilder:
670    def __init__(self, run_list, flags, scrubber_args, path):
671        self._verbose = flags.verbose
672        self._record_args = flags.function_signature
673        self._check_attributes = flags.check_attributes
674        # Strip double-quotes if input was read by UTC_ARGS
675        self._filters = (
676            list(
677                map(
678                    lambda f: Filter(
679                        re.compile(f.pattern().strip('"'), f.flags()), f.is_filter_out
680                    ),
681                    flags.filters,
682                )
683            )
684            if flags.filters
685            else []
686        )
687        self._scrubber_args = scrubber_args
688        self._path = path
689        # Strip double-quotes if input was read by UTC_ARGS
690        self._replace_value_regex = list(
691            map(lambda x: x.strip('"'), flags.replace_value_regex)
692        )
693        self._func_dict = {}
694        self._func_order = {}
695        self._global_var_dict = {}
696        self._processed_prefixes = set()
697        for tuple in run_list:
698            for prefix in tuple[0]:
699                self._func_dict.update({prefix: dict()})
700                self._func_order.update({prefix: []})
701                self._global_var_dict.update({prefix: dict()})
702
703    def finish_and_get_func_dict(self):
704        for prefix in self.get_failed_prefixes():
705            warn(
706                "Prefix %s had conflicting output from different RUN lines for all functions in test %s"
707                % (
708                    prefix,
709                    self._path,
710                )
711            )
712        return self._func_dict
713
714    def func_order(self):
715        return self._func_order
716
717    def global_var_dict(self):
718        return self._global_var_dict
719
720    def is_filtered(self):
721        return bool(self._filters)
722
723    def process_run_line(
724        self, function_re, scrubber, raw_tool_output, prefixes, is_backend
725    ):
726        build_global_values_dictionary(self._global_var_dict, raw_tool_output, prefixes)
727        for m in function_re.finditer(raw_tool_output):
728            if not m:
729                continue
730            func = m.group("func")
731            body = m.group("body")
732            # func_name_separator is the string that is placed right after function name at the
733            # beginning of assembly function definition. In most assemblies, that is just a
734            # colon: `foo:`. But, for example, in nvptx it is a brace: `foo(`. If is_backend is
735            # False, just assume that separator is an empty string.
736            if is_backend:
737                # Use ':' as default separator.
738                func_name_separator = (
739                    m.group("func_name_separator")
740                    if "func_name_separator" in m.groupdict()
741                    else ":"
742                )
743            else:
744                func_name_separator = ""
745            attrs = m.group("attrs") if self._check_attributes else ""
746            funcdef_attrs_and_ret = (
747                m.group("funcdef_attrs_and_ret") if self._record_args else ""
748            )
749            # Determine if we print arguments, the opening brace, or nothing after the
750            # function name
751            if self._record_args and "args_and_sig" in m.groupdict():
752                args_and_sig = scrub_body(m.group("args_and_sig").strip())
753            elif "args_and_sig" in m.groupdict():
754                args_and_sig = "("
755            else:
756                args_and_sig = ""
757            filtered_body = do_filter(body, self._filters)
758            scrubbed_body = do_scrub(
759                filtered_body, scrubber, self._scrubber_args, extra=False
760            )
761            scrubbed_extra = do_scrub(
762                filtered_body, scrubber, self._scrubber_args, extra=True
763            )
764            if "analysis" in m.groupdict():
765                analysis = m.group("analysis")
766                if analysis.lower() != "cost model analysis":
767                    warn("Unsupported analysis mode: %r!" % (analysis,))
768            if func.startswith("stress"):
769                # We only use the last line of the function body for stress tests.
770                scrubbed_body = "\n".join(scrubbed_body.splitlines()[-1:])
771            if self._verbose:
772                print("Processing function: " + func, file=sys.stderr)
773                for l in scrubbed_body.splitlines():
774                    print("  " + l, file=sys.stderr)
775            for prefix in prefixes:
776                # Replace function names matching the regex.
777                for regex in self._replace_value_regex:
778                    # Pattern that matches capture groups in the regex in leftmost order.
779                    group_regex = re.compile(r"\(.*?\)")
780                    # Replace function name with regex.
781                    match = re.match(regex, func)
782                    if match:
783                        func_repl = regex
784                        # Replace any capture groups with their matched strings.
785                        for g in match.groups():
786                            func_repl = group_regex.sub(
787                                re.escape(g), func_repl, count=1
788                            )
789                        func = re.sub(func_repl, "{{" + func_repl + "}}", func)
790
791                    # Replace all calls to regex matching functions.
792                    matches = re.finditer(regex, scrubbed_body)
793                    for match in matches:
794                        func_repl = regex
795                        # Replace any capture groups with their matched strings.
796                        for g in match.groups():
797                            func_repl = group_regex.sub(
798                                re.escape(g), func_repl, count=1
799                            )
800                        # Substitute function call names that match the regex with the same
801                        # capture groups set.
802                        scrubbed_body = re.sub(
803                            func_repl, "{{" + func_repl + "}}", scrubbed_body
804                        )
805
806                if func in self._func_dict[prefix]:
807                    if self._func_dict[prefix][func] is not None and (
808                        str(self._func_dict[prefix][func]) != scrubbed_body
809                        or self._func_dict[prefix][func].args_and_sig != args_and_sig
810                        or self._func_dict[prefix][func].attrs != attrs
811                        or self._func_dict[prefix][func].funcdef_attrs_and_ret
812                        != funcdef_attrs_and_ret
813                    ):
814                        if self._func_dict[prefix][func].is_same_except_arg_names(
815                            scrubbed_extra,
816                            funcdef_attrs_and_ret,
817                            args_and_sig,
818                            attrs,
819                            is_backend,
820                        ):
821                            self._func_dict[prefix][func].scrub = scrubbed_extra
822                            self._func_dict[prefix][func].args_and_sig = args_and_sig
823                        else:
824                            # This means a previous RUN line produced a body for this function
825                            # that is different from the one produced by this current RUN line,
826                            # so the body can't be common across RUN lines. We use None to
827                            # indicate that.
828                            self._func_dict[prefix][func] = None
829                else:
830                    if prefix not in self._processed_prefixes:
831                        self._func_dict[prefix][func] = function_body(
832                            scrubbed_body,
833                            scrubbed_extra,
834                            funcdef_attrs_and_ret,
835                            args_and_sig,
836                            attrs,
837                            func_name_separator,
838                        )
839                        self._func_order[prefix].append(func)
840                    else:
841                        # An earlier RUN line used this check prefixes but didn't produce
842                        # a body for this function. This happens in Clang tests that use
843                        # preprocesser directives to exclude individual functions from some
844                        # RUN lines.
845                        self._func_dict[prefix][func] = None
846
847    def processed_prefixes(self, prefixes):
848        """
849        Mark a set of prefixes as having had at least one applicable RUN line fully
850        processed. This is used to filter out function bodies that don't have
851        outputs for all RUN lines.
852        """
853        self._processed_prefixes.update(prefixes)
854
855    def get_failed_prefixes(self):
856        # This returns the list of those prefixes that failed to match any function,
857        # because there were conflicting bodies produced by different RUN lines, in
858        # all instances of the prefix.
859        for prefix in self._func_dict:
860            if self._func_dict[prefix] and (
861                not [
862                    fct
863                    for fct in self._func_dict[prefix]
864                    if self._func_dict[prefix][fct] is not None
865                ]
866            ):
867                yield prefix
868
869
870##### Generator of LLVM IR CHECK lines
871
872SCRUB_IR_COMMENT_RE = re.compile(r"\s*;.*")
873
874# TODO: We should also derive check lines for global, debug, loop declarations, etc..
875
876
877class NamelessValue:
878    def __init__(
879        self,
880        check_prefix,
881        check_key,
882        ir_prefix,
883        ir_regexp,
884        global_ir_rhs_regexp,
885        *,
886        is_before_functions=False,
887        is_number=False,
888        replace_number_with_counter=False,
889        match_literally=False,
890    ):
891        self.check_prefix = check_prefix
892        self.check_key = check_key
893        self.ir_prefix = ir_prefix
894        self.ir_regexp = ir_regexp
895        self.global_ir_rhs_regexp = global_ir_rhs_regexp
896        self.is_before_functions = is_before_functions
897        self.is_number = is_number
898        # Some variable numbers (e.g. MCINST1234) will change based on unrelated
899        # modifications to LLVM, replace those with an incrementing counter.
900        self.replace_number_with_counter = replace_number_with_counter
901        self.match_literally = match_literally
902        self.variable_mapping = {}
903
904    # Return true if this kind of IR value is "local", basically if it matches '%{{.*}}'.
905    def is_local_def_ir_value_match(self, match):
906        return self.ir_prefix == "%"
907
908    # Return true if this kind of IR value is "global", basically if it matches '#{{.*}}'.
909    def is_global_scope_ir_value_match(self, match):
910        return self.global_ir_rhs_regexp is not None
911
912    # Return the IR prefix and check prefix we use for this kind or IR value,
913    # e.g., (%, TMP) for locals. If the IR prefix is a regex, return the prefix
914    # used in the IR output
915    def get_ir_prefix_from_ir_value_match(self, match):
916        return re.search(self.ir_prefix, match[0])[0], self.check_prefix
917
918    # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals
919    def get_ir_regex_from_ir_value_re_match(self, match):
920        # for backwards compatibility we check locals with '.*'
921        if self.is_local_def_ir_value_match(match):
922            return ".*"
923        return self.ir_regexp
924
925    # Create a FileCheck variable name based on an IR name.
926    def get_value_name(self, var: str, check_prefix: str):
927        var = var.replace("!", "")
928        if self.replace_number_with_counter:
929            assert var.isdigit(), var
930            replacement = self.variable_mapping.get(var, None)
931            if replacement is None:
932                # Replace variable with an incrementing counter
933                replacement = str(len(self.variable_mapping) + 1)
934                self.variable_mapping[var] = replacement
935            var = replacement
936        # This is a nameless value, prepend check_prefix.
937        if var.isdigit():
938            var = check_prefix + var
939        else:
940            # This is a named value that clashes with the check_prefix, prepend with
941            # _prefix_filecheck_ir_name, if it has been defined.
942            if (
943                may_clash_with_default_check_prefix_name(check_prefix, var)
944                and _prefix_filecheck_ir_name
945            ):
946                var = _prefix_filecheck_ir_name + var
947        var = var.replace(".", "_")
948        var = var.replace("-", "_")
949        return var.upper()
950
951    # Create a FileCheck variable from regex.
952    def get_value_definition(self, var, match):
953        # for backwards compatibility we check locals with '.*'
954        varname = self.get_value_name(var, self.check_prefix)
955        prefix = self.get_ir_prefix_from_ir_value_match(match)[0]
956        if self.is_number:
957            regex = ""  # always capture a number in the default format
958            capture_start = "[[#"
959        else:
960            regex = self.get_ir_regex_from_ir_value_re_match(match)
961            capture_start = "[["
962        if self.is_local_def_ir_value_match(match):
963            return capture_start + varname + ":" + prefix + regex + "]]"
964        return prefix + capture_start + varname + ":" + regex + "]]"
965
966    # Use a FileCheck variable.
967    def get_value_use(self, var, match, var_prefix=None):
968        if var_prefix is None:
969            var_prefix = self.check_prefix
970        capture_start = "[[#" if self.is_number else "[["
971        if self.is_local_def_ir_value_match(match):
972            return capture_start + self.get_value_name(var, var_prefix) + "]]"
973        prefix = self.get_ir_prefix_from_ir_value_match(match)[0]
974        return prefix + capture_start + self.get_value_name(var, var_prefix) + "]]"
975
976
977# Description of the different "unnamed" values we match in the IR, e.g.,
978# (local) ssa values, (debug) metadata, etc.
979ir_nameless_values = [
980    #            check_prefix   check_key  ir_prefix           ir_regexp                global_ir_rhs_regexp
981    NamelessValue(r"TMP", "%", r"%", r"[\w$.-]+?", None),
982    NamelessValue(r"ATTR", "#", r"#", r"[0-9]+", None),
983    NamelessValue(r"ATTR", "#", r"attributes #", r"[0-9]+", r"{[^}]*}"),
984    NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", None),
985    NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", r".+", is_before_functions=True),
986    NamelessValue(
987        r"GLOBNAMED",
988        "@",
989        r"@",
990        r"[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*",
991        r".+",
992        is_before_functions=True,
993        match_literally=True,
994    ),
995    NamelessValue(r"DBG", "!", r"!dbg ", r"![0-9]+", None),
996    NamelessValue(r"DIASSIGNID", "!", r"!DIAssignID ", r"![0-9]+", None),
997    NamelessValue(r"PROF", "!", r"!prof ", r"![0-9]+", None),
998    NamelessValue(r"TBAA", "!", r"!tbaa ", r"![0-9]+", None),
999    NamelessValue(r"TBAA_STRUCT", "!", r"!tbaa.struct ", r"![0-9]+", None),
1000    NamelessValue(r"RNG", "!", r"!range ", r"![0-9]+", None),
1001    NamelessValue(r"LOOP", "!", r"!llvm.loop ", r"![0-9]+", None),
1002    NamelessValue(r"META", "!", r"metadata ", r"![0-9]+", None),
1003    NamelessValue(r"META", "!", r"", r"![0-9]+", r"(?:distinct |)!.*"),
1004    NamelessValue(r"ACC_GRP", "!", r"!llvm.access.group ", r"![0-9]+", None),
1005    NamelessValue(r"META", "!", r"![a-z.]+ ", r"![0-9]+", None),
1006]
1007
1008global_nameless_values = [
1009    nameless_value
1010    for nameless_value in ir_nameless_values
1011    if nameless_value.global_ir_rhs_regexp is not None
1012]
1013# global variable names should be matched literally
1014global_nameless_values_w_unstable_ids = [
1015    nameless_value
1016    for nameless_value in global_nameless_values
1017    if not nameless_value.match_literally
1018]
1019
1020asm_nameless_values = [
1021    NamelessValue(
1022        r"MCINST",
1023        "Inst#",
1024        "<MCInst #",
1025        r"\d+",
1026        r".+",
1027        is_number=True,
1028        replace_number_with_counter=True,
1029    ),
1030    NamelessValue(
1031        r"MCREG",
1032        "Reg:",
1033        "<MCOperand Reg:",
1034        r"\d+",
1035        r".+",
1036        is_number=True,
1037        replace_number_with_counter=True,
1038    ),
1039]
1040
1041
1042def createOrRegexp(old, new):
1043    if not old:
1044        return new
1045    if not new:
1046        return old
1047    return old + "|" + new
1048
1049
1050def createPrefixMatch(prefix_str, prefix_re):
1051    return "(?:" + prefix_str + "(" + prefix_re + "))"
1052
1053
1054# Build the regexp that matches an "IR value". This can be a local variable,
1055# argument, global, or metadata, anything that is "named". It is important that
1056# the PREFIX and SUFFIX below only contain a single group, if that changes
1057# other locations will need adjustment as well.
1058IR_VALUE_REGEXP_PREFIX = r"(\s*)"
1059IR_VALUE_REGEXP_STRING = r""
1060for nameless_value in ir_nameless_values:
1061    match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp)
1062    if nameless_value.global_ir_rhs_regexp is not None:
1063        match = "^" + match
1064    IR_VALUE_REGEXP_STRING = createOrRegexp(IR_VALUE_REGEXP_STRING, match)
1065IR_VALUE_REGEXP_SUFFIX = r"([,\s\(\)\}]|\Z)"
1066IR_VALUE_RE = re.compile(
1067    IR_VALUE_REGEXP_PREFIX
1068    + r"("
1069    + IR_VALUE_REGEXP_STRING
1070    + r")"
1071    + IR_VALUE_REGEXP_SUFFIX
1072)
1073
1074GLOBAL_VALUE_REGEXP_STRING = r""
1075for nameless_value in global_nameless_values_w_unstable_ids:
1076    match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp)
1077    GLOBAL_VALUE_REGEXP_STRING = createOrRegexp(GLOBAL_VALUE_REGEXP_STRING, match)
1078GLOBAL_VALUE_RE = re.compile(
1079    IR_VALUE_REGEXP_PREFIX
1080    + r"("
1081    + GLOBAL_VALUE_REGEXP_STRING
1082    + r")"
1083    + IR_VALUE_REGEXP_SUFFIX
1084)
1085
1086# Build the regexp that matches an "ASM value" (currently only for --asm-show-inst comments).
1087ASM_VALUE_REGEXP_STRING = ""
1088for nameless_value in asm_nameless_values:
1089    match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp)
1090    ASM_VALUE_REGEXP_STRING = createOrRegexp(ASM_VALUE_REGEXP_STRING, match)
1091ASM_VALUE_REGEXP_SUFFIX = r"([>\s]|\Z)"
1092ASM_VALUE_RE = re.compile(
1093    r"((?:#|//)\s*)" + "(" + ASM_VALUE_REGEXP_STRING + ")" + ASM_VALUE_REGEXP_SUFFIX
1094)
1095
1096# The entire match is group 0, the prefix has one group (=1), the entire
1097# IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start.
1098first_nameless_group_in_ir_value_match = 3
1099
1100# constants for the group id of special matches
1101variable_group_in_ir_value_match = 3
1102attribute_group_in_ir_value_match = 4
1103
1104
1105# Check a match for IR_VALUE_RE and inspect it to determine if it was a local
1106# value, %..., global @..., debug number !dbg !..., etc. See the PREFIXES above.
1107def get_idx_from_ir_value_match(match):
1108    for i in range(first_nameless_group_in_ir_value_match, match.lastindex):
1109        if match.group(i) is not None:
1110            return i - first_nameless_group_in_ir_value_match
1111    error("Unable to identify the kind of IR value from the match!")
1112    return 0
1113
1114
1115# See get_idx_from_ir_value_match
1116def get_name_from_ir_value_match(match):
1117    return match.group(
1118        get_idx_from_ir_value_match(match) + first_nameless_group_in_ir_value_match
1119    )
1120
1121
1122def get_nameless_value_from_match(match, nameless_values) -> NamelessValue:
1123    return nameless_values[get_idx_from_ir_value_match(match)]
1124
1125
1126# Return true if var clashes with the scripted FileCheck check_prefix.
1127def may_clash_with_default_check_prefix_name(check_prefix, var):
1128    return check_prefix and re.match(
1129        r"^" + check_prefix + r"[0-9]+?$", var, re.IGNORECASE
1130    )
1131
1132
1133def generalize_check_lines_common(
1134    lines,
1135    is_analyze,
1136    vars_seen,
1137    global_vars_seen,
1138    nameless_values,
1139    nameless_value_regex,
1140    is_asm,
1141):
1142    # This gets called for each match that occurs in
1143    # a line. We transform variables we haven't seen
1144    # into defs, and variables we have seen into uses.
1145    def transform_line_vars(match):
1146        var = get_name_from_ir_value_match(match)
1147        nameless_value = get_nameless_value_from_match(match, nameless_values)
1148        if may_clash_with_default_check_prefix_name(nameless_value.check_prefix, var):
1149            warn(
1150                "Change IR value name '%s' or use --prefix-filecheck-ir-name to prevent possible conflict"
1151                " with scripted FileCheck name." % (var,)
1152            )
1153        key = (var, nameless_value.check_key)
1154        is_local_def = nameless_value.is_local_def_ir_value_match(match)
1155        if is_local_def and key in vars_seen:
1156            rv = nameless_value.get_value_use(var, match)
1157        elif not is_local_def and key in global_vars_seen:
1158            # We could have seen a different prefix for the global variables first,
1159            # ensure we use that one instead of the prefix for the current match.
1160            rv = nameless_value.get_value_use(var, match, global_vars_seen[key])
1161        else:
1162            if is_local_def:
1163                vars_seen.add(key)
1164            else:
1165                global_vars_seen[key] = nameless_value.check_prefix
1166            rv = nameless_value.get_value_definition(var, match)
1167        # re.sub replaces the entire regex match
1168        # with whatever you return, so we have
1169        # to make sure to hand it back everything
1170        # including the commas and spaces.
1171        return match.group(1) + rv + match.group(match.lastindex)
1172
1173    lines_with_def = []
1174
1175    for i, line in enumerate(lines):
1176        if not is_asm:
1177            # An IR variable named '%.' matches the FileCheck regex string.
1178            line = line.replace("%.", "%dot")
1179            for regex in _global_hex_value_regex:
1180                if re.match("^@" + regex + " = ", line):
1181                    line = re.sub(
1182                        r"\bi([0-9]+) ([0-9]+)",
1183                        lambda m: "i"
1184                        + m.group(1)
1185                        + " [[#"
1186                        + hex(int(m.group(2)))
1187                        + "]]",
1188                        line,
1189                    )
1190                    break
1191            # Ignore any comments, since the check lines will too.
1192            scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line)
1193            lines[i] = scrubbed_line
1194        if is_asm or not is_analyze:
1195            # It can happen that two matches are back-to-back and for some reason sub
1196            # will not replace both of them. For now we work around this by
1197            # substituting until there is no more match.
1198            changed = True
1199            while changed:
1200                (lines[i], changed) = nameless_value_regex.subn(
1201                    transform_line_vars, lines[i], count=1
1202                )
1203    return lines
1204
1205
1206# Replace IR value defs and uses with FileCheck variables.
1207def generalize_check_lines(lines, is_analyze, vars_seen, global_vars_seen):
1208    return generalize_check_lines_common(
1209        lines,
1210        is_analyze,
1211        vars_seen,
1212        global_vars_seen,
1213        ir_nameless_values,
1214        IR_VALUE_RE,
1215        False,
1216    )
1217
1218
1219def generalize_global_check_line(line, is_analyze, global_vars_seen):
1220    [new_line] = generalize_check_lines_common(
1221        [line],
1222        is_analyze,
1223        set(),
1224        global_vars_seen,
1225        global_nameless_values_w_unstable_ids,
1226        GLOBAL_VALUE_RE,
1227        False,
1228    )
1229    return new_line
1230
1231
1232def generalize_asm_check_lines(lines, vars_seen, global_vars_seen):
1233    return generalize_check_lines_common(
1234        lines,
1235        False,
1236        vars_seen,
1237        global_vars_seen,
1238        asm_nameless_values,
1239        ASM_VALUE_RE,
1240        True,
1241    )
1242
1243
1244def add_checks(
1245    output_lines,
1246    comment_marker,
1247    prefix_list,
1248    func_dict,
1249    func_name,
1250    check_label_format,
1251    is_backend,
1252    is_analyze,
1253    version,
1254    global_vars_seen_dict,
1255    is_filtered,
1256):
1257    # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well.
1258    prefix_exclusions = set()
1259    printed_prefixes = []
1260    for p in prefix_list:
1261        checkprefixes = p[0]
1262        # If not all checkprefixes of this run line produced the function we cannot check for it as it does not
1263        # exist for this run line. A subset of the check prefixes might know about the function but only because
1264        # other run lines created it.
1265        if any(
1266            map(
1267                lambda checkprefix: func_name not in func_dict[checkprefix],
1268                checkprefixes,
1269            )
1270        ):
1271            prefix_exclusions |= set(checkprefixes)
1272            continue
1273
1274    # prefix_exclusions is constructed, we can now emit the output
1275    for p in prefix_list:
1276        global_vars_seen = {}
1277        checkprefixes = p[0]
1278        for checkprefix in checkprefixes:
1279            if checkprefix in global_vars_seen_dict:
1280                global_vars_seen.update(global_vars_seen_dict[checkprefix])
1281            else:
1282                global_vars_seen_dict[checkprefix] = {}
1283            if checkprefix in printed_prefixes:
1284                break
1285
1286            # Check if the prefix is excluded.
1287            if checkprefix in prefix_exclusions:
1288                continue
1289
1290            # If we do not have output for this prefix we skip it.
1291            if not func_dict[checkprefix][func_name]:
1292                continue
1293
1294            # Add some space between different check prefixes, but not after the last
1295            # check line (before the test code).
1296            if is_backend:
1297                if len(printed_prefixes) != 0:
1298                    output_lines.append(comment_marker)
1299
1300            if checkprefix not in global_vars_seen_dict:
1301                global_vars_seen_dict[checkprefix] = {}
1302
1303            global_vars_seen_before = [key for key in global_vars_seen.keys()]
1304
1305            vars_seen = set()
1306            printed_prefixes.append(checkprefix)
1307            attrs = str(func_dict[checkprefix][func_name].attrs)
1308            attrs = "" if attrs == "None" else attrs
1309            if version > 1:
1310                funcdef_attrs_and_ret = func_dict[checkprefix][
1311                    func_name
1312                ].funcdef_attrs_and_ret
1313            else:
1314                funcdef_attrs_and_ret = ""
1315
1316            if attrs:
1317                output_lines.append(
1318                    "%s %s: Function Attrs: %s" % (comment_marker, checkprefix, attrs)
1319                )
1320            args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig)
1321            if args_and_sig:
1322                args_and_sig = generalize_check_lines(
1323                    [args_and_sig], is_analyze, vars_seen, global_vars_seen
1324                )[0]
1325            func_name_separator = func_dict[checkprefix][func_name].func_name_separator
1326            if "[[" in args_and_sig:
1327                output_lines.append(
1328                    check_label_format
1329                    % (
1330                        checkprefix,
1331                        funcdef_attrs_and_ret,
1332                        func_name,
1333                        "",
1334                        func_name_separator,
1335                    )
1336                )
1337                output_lines.append(
1338                    "%s %s-SAME: %s" % (comment_marker, checkprefix, args_and_sig)
1339                )
1340            else:
1341                output_lines.append(
1342                    check_label_format
1343                    % (
1344                        checkprefix,
1345                        funcdef_attrs_and_ret,
1346                        func_name,
1347                        args_and_sig,
1348                        func_name_separator,
1349                    )
1350                )
1351            func_body = str(func_dict[checkprefix][func_name]).splitlines()
1352            if not func_body:
1353                # We have filtered everything.
1354                continue
1355
1356            # For ASM output, just emit the check lines.
1357            if is_backend:
1358                body_start = 1
1359                if is_filtered:
1360                    # For filtered output we don't add "-NEXT" so don't add extra spaces
1361                    # before the first line.
1362                    body_start = 0
1363                else:
1364                    output_lines.append(
1365                        "%s %s:       %s" % (comment_marker, checkprefix, func_body[0])
1366                    )
1367                func_lines = generalize_asm_check_lines(
1368                    func_body[body_start:], vars_seen, global_vars_seen
1369                )
1370                for func_line in func_lines:
1371                    if func_line.strip() == "":
1372                        output_lines.append(
1373                            "%s %s-EMPTY:" % (comment_marker, checkprefix)
1374                        )
1375                    else:
1376                        check_suffix = "-NEXT" if not is_filtered else ""
1377                        output_lines.append(
1378                            "%s %s%s:  %s"
1379                            % (comment_marker, checkprefix, check_suffix, func_line)
1380                        )
1381                # Remember new global variables we have not seen before
1382                for key in global_vars_seen:
1383                    if key not in global_vars_seen_before:
1384                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
1385                break
1386
1387            # For IR output, change all defs to FileCheck variables, so we're immune
1388            # to variable naming fashions.
1389            func_body = generalize_check_lines(
1390                func_body, is_analyze, vars_seen, global_vars_seen
1391            )
1392
1393            # This could be selectively enabled with an optional invocation argument.
1394            # Disabled for now: better to check everything. Be safe rather than sorry.
1395
1396            # Handle the first line of the function body as a special case because
1397            # it's often just noise (a useless asm comment or entry label).
1398            # if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
1399            #  is_blank_line = True
1400            # else:
1401            #  output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
1402            #  is_blank_line = False
1403
1404            is_blank_line = False
1405
1406            for func_line in func_body:
1407                if func_line.strip() == "":
1408                    is_blank_line = True
1409                    continue
1410                # Do not waste time checking IR comments.
1411                func_line = SCRUB_IR_COMMENT_RE.sub(r"", func_line)
1412
1413                # Skip blank lines instead of checking them.
1414                if is_blank_line:
1415                    output_lines.append(
1416                        "{} {}:       {}".format(comment_marker, checkprefix, func_line)
1417                    )
1418                else:
1419                    check_suffix = "-NEXT" if not is_filtered else ""
1420                    output_lines.append(
1421                        "{} {}{}:  {}".format(
1422                            comment_marker, checkprefix, check_suffix, func_line
1423                        )
1424                    )
1425                is_blank_line = False
1426
1427            # Add space between different check prefixes and also before the first
1428            # line of code in the test function.
1429            output_lines.append(comment_marker)
1430
1431            # Remember new global variables we have not seen before
1432            for key in global_vars_seen:
1433                if key not in global_vars_seen_before:
1434                    global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
1435            break
1436    return printed_prefixes
1437
1438
1439def add_ir_checks(
1440    output_lines,
1441    comment_marker,
1442    prefix_list,
1443    func_dict,
1444    func_name,
1445    preserve_names,
1446    function_sig,
1447    version,
1448    global_vars_seen_dict,
1449    is_filtered,
1450):
1451    # Label format is based on IR string.
1452    if function_sig and version > 1:
1453        function_def_regex = "define %s"
1454    elif function_sig:
1455        function_def_regex = "define {{[^@]+}}%s"
1456    else:
1457        function_def_regex = "%s"
1458    check_label_format = "{} %s-LABEL: {}@%s%s%s".format(
1459        comment_marker, function_def_regex
1460    )
1461    return add_checks(
1462        output_lines,
1463        comment_marker,
1464        prefix_list,
1465        func_dict,
1466        func_name,
1467        check_label_format,
1468        False,
1469        preserve_names,
1470        version,
1471        global_vars_seen_dict,
1472        is_filtered,
1473    )
1474
1475
1476def add_analyze_checks(
1477    output_lines, comment_marker, prefix_list, func_dict, func_name, is_filtered
1478):
1479    check_label_format = "{} %s-LABEL: '%s%s%s%s'".format(comment_marker)
1480    global_vars_seen_dict = {}
1481    return add_checks(
1482        output_lines,
1483        comment_marker,
1484        prefix_list,
1485        func_dict,
1486        func_name,
1487        check_label_format,
1488        False,
1489        True,
1490        1,
1491        global_vars_seen_dict,
1492        is_filtered,
1493    )
1494
1495
1496def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes):
1497    for nameless_value in itertools.chain(global_nameless_values, asm_nameless_values):
1498        if nameless_value.global_ir_rhs_regexp is None:
1499            continue
1500
1501        lhs_re_str = nameless_value.ir_prefix + nameless_value.ir_regexp
1502        rhs_re_str = nameless_value.global_ir_rhs_regexp
1503
1504        global_ir_value_re_str = r"^" + lhs_re_str + r"\s=\s" + rhs_re_str + r"$"
1505        global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M))
1506        lines = []
1507        for m in global_ir_value_re.finditer(raw_tool_output):
1508            lines.append(m.group(0))
1509
1510        for prefix in prefixes:
1511            if glob_val_dict[prefix] is None:
1512                continue
1513            if nameless_value.check_prefix in glob_val_dict[prefix]:
1514                if lines == glob_val_dict[prefix][nameless_value.check_prefix]:
1515                    continue
1516                if prefix == prefixes[-1]:
1517                    warn("Found conflicting asm under the same prefix: %r!" % (prefix,))
1518                else:
1519                    glob_val_dict[prefix][nameless_value.check_prefix] = None
1520                    continue
1521            glob_val_dict[prefix][nameless_value.check_prefix] = lines
1522
1523
1524def filter_globals_according_to_preference(
1525    global_val_lines, global_vars_seen, nameless_value, global_check_setting
1526):
1527    if global_check_setting == "none":
1528        return []
1529    if global_check_setting == "all":
1530        return global_val_lines
1531    assert global_check_setting == "smart"
1532
1533    if nameless_value.check_key == "#":
1534        # attribute sets are usually better checked by --check-attributes
1535        return []
1536
1537    def extract(line, nv):
1538        p = (
1539            "^"
1540            + nv.ir_prefix
1541            + "("
1542            + nv.ir_regexp
1543            + ") = ("
1544            + nv.global_ir_rhs_regexp
1545            + ")"
1546        )
1547        match = re.match(p, line)
1548        return (match.group(1), re.findall(nv.ir_regexp, match.group(2)))
1549
1550    transitively_visible = set()
1551    contains_refs_to = {}
1552
1553    def add(var):
1554        nonlocal transitively_visible
1555        nonlocal contains_refs_to
1556        if var in transitively_visible:
1557            return
1558        transitively_visible.add(var)
1559        if not var in contains_refs_to:
1560            return
1561        for x in contains_refs_to[var]:
1562            add(x)
1563
1564    for line in global_val_lines:
1565        (var, refs) = extract(line, nameless_value)
1566        contains_refs_to[var] = refs
1567    for var, check_key in global_vars_seen:
1568        if check_key != nameless_value.check_key:
1569            continue
1570        add(var)
1571    return [
1572        line
1573        for line in global_val_lines
1574        if extract(line, nameless_value)[0] in transitively_visible
1575    ]
1576
1577
1578METADATA_FILTERS = [
1579    (r"(?<=\")(\w+ )?(\w+ version )[\d.]+(?: \([^)]+\))?", r"{{.*}}\2{{.*}}"), # preface with glob also, to capture optional CLANG_VENDOR
1580    (r'(!DIFile\(filename: ".+", directory: )".+"', r"\1{{.*}}"),
1581]
1582METADATA_FILTERS_RE = [(re.compile(f),r) for (f,r) in METADATA_FILTERS]
1583
1584
1585def filter_unstable_metadata(line):
1586    for (f,replacement) in METADATA_FILTERS_RE:
1587        line = f.sub(replacement, line)
1588    return line
1589
1590
1591def add_global_checks(
1592    glob_val_dict,
1593    comment_marker,
1594    prefix_list,
1595    output_lines,
1596    global_vars_seen_dict,
1597    is_analyze,
1598    is_before_functions,
1599    global_check_setting,
1600):
1601    printed_prefixes = set()
1602    for nameless_value in global_nameless_values:
1603        if nameless_value.is_before_functions != is_before_functions:
1604            continue
1605        for p in prefix_list:
1606            global_vars_seen = {}
1607            checkprefixes = p[0]
1608            if checkprefixes is None:
1609                continue
1610            for checkprefix in checkprefixes:
1611                if checkprefix in global_vars_seen_dict:
1612                    global_vars_seen.update(global_vars_seen_dict[checkprefix])
1613                else:
1614                    global_vars_seen_dict[checkprefix] = {}
1615                if (checkprefix, nameless_value.check_prefix) in printed_prefixes:
1616                    break
1617                if not glob_val_dict[checkprefix]:
1618                    continue
1619                if nameless_value.check_prefix not in glob_val_dict[checkprefix]:
1620                    continue
1621                if not glob_val_dict[checkprefix][nameless_value.check_prefix]:
1622                    continue
1623
1624                check_lines = []
1625                global_vars_seen_before = [key for key in global_vars_seen.keys()]
1626                lines = glob_val_dict[checkprefix][nameless_value.check_prefix]
1627                lines = filter_globals_according_to_preference(
1628                    lines, global_vars_seen_before, nameless_value, global_check_setting
1629                )
1630                for line in lines:
1631                    if _global_value_regex:
1632                        matched = False
1633                        for regex in _global_value_regex:
1634                            if re.match("^@" + regex + " = ", line) or re.match(
1635                                "^!" + regex + " = ", line
1636                            ):
1637                                matched = True
1638                                break
1639                        if not matched:
1640                            continue
1641                    new_line = generalize_global_check_line(
1642                        line, is_analyze, global_vars_seen
1643                    )
1644                    new_line = filter_unstable_metadata(new_line)
1645                    check_line = "%s %s: %s" % (comment_marker, checkprefix, new_line)
1646                    check_lines.append(check_line)
1647                if not check_lines:
1648                    continue
1649
1650                output_lines.append(comment_marker + SEPARATOR)
1651                for check_line in check_lines:
1652                    output_lines.append(check_line)
1653
1654                printed_prefixes.add((checkprefix, nameless_value.check_prefix))
1655
1656                # Remembe new global variables we have not seen before
1657                for key in global_vars_seen:
1658                    if key not in global_vars_seen_before:
1659                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
1660                break
1661
1662    if printed_prefixes:
1663        output_lines.append(comment_marker + SEPARATOR)
1664    return printed_prefixes
1665
1666
1667def check_prefix(prefix):
1668    if not PREFIX_RE.match(prefix):
1669        hint = ""
1670        if "," in prefix:
1671            hint = " Did you mean '--check-prefixes=" + prefix + "'?"
1672        warn(
1673            (
1674                "Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores."
1675                + hint
1676            )
1677            % (prefix)
1678        )
1679
1680
1681def get_check_prefixes(filecheck_cmd):
1682    check_prefixes = [
1683        item
1684        for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
1685        for item in m.group(1).split(",")
1686    ]
1687    if not check_prefixes:
1688        check_prefixes = ["CHECK"]
1689    return check_prefixes
1690
1691
1692def verify_filecheck_prefixes(fc_cmd):
1693    fc_cmd_parts = fc_cmd.split()
1694    for part in fc_cmd_parts:
1695        if "check-prefix=" in part:
1696            prefix = part.split("=", 1)[1]
1697            check_prefix(prefix)
1698        elif "check-prefixes=" in part:
1699            prefixes = part.split("=", 1)[1].split(",")
1700            for prefix in prefixes:
1701                check_prefix(prefix)
1702                if prefixes.count(prefix) > 1:
1703                    warn(
1704                        "Supplied prefix '%s' is not unique in the prefix list."
1705                        % (prefix,)
1706                    )
1707
1708
1709def get_autogennote_suffix(parser, args):
1710    autogenerated_note_args = ""
1711    for action in parser._actions:
1712        if not hasattr(args, action.dest):
1713            continue  # Ignore options such as --help that aren't included in args
1714        # Ignore parameters such as paths to the binary or the list of tests
1715        if action.dest in (
1716            "tests",
1717            "update_only",
1718            "tool_binary",
1719            "opt_binary",
1720            "llc_binary",
1721            "clang",
1722            "opt",
1723            "llvm_bin",
1724            "verbose",
1725            "force_update",
1726        ):
1727            continue
1728        value = getattr(args, action.dest)
1729        if action.dest == "check_globals":
1730            default_value = "none" if args.version < 3 else "smart"
1731            if value == default_value:
1732                continue
1733            autogenerated_note_args += action.option_strings[0] + " "
1734            if args.version < 3 and value == "all":
1735                continue
1736            autogenerated_note_args += "%s " % value
1737            continue
1738        if action.const is not None:  # action stores a constant (usually True/False)
1739            # Skip actions with different constant values (this happens with boolean
1740            # --foo/--no-foo options)
1741            if value != action.const:
1742                continue
1743        if parser.get_default(action.dest) == value:
1744            continue  # Don't add default values
1745        if action.dest == "function_signature" and args.version >= 2:
1746            continue  # Enabled by default in version 2
1747        if action.dest == "filters":
1748            # Create a separate option for each filter element.  The value is a list
1749            # of Filter objects.
1750            for elem in value:
1751                opt_name = "filter-out" if elem.is_filter_out else "filter"
1752                opt_value = elem.pattern()
1753                new_arg = '--%s "%s" ' % (opt_name, opt_value.strip('"'))
1754                if new_arg not in autogenerated_note_args:
1755                    autogenerated_note_args += new_arg
1756        else:
1757            autogenerated_note_args += action.option_strings[0] + " "
1758            if action.const is None:  # action takes a parameter
1759                if action.nargs == "+":
1760                    value = " ".join(map(lambda v: '"' + v.strip('"') + '"', value))
1761                autogenerated_note_args += "%s " % value
1762    if autogenerated_note_args:
1763        autogenerated_note_args = " %s %s" % (
1764            UTC_ARGS_KEY,
1765            autogenerated_note_args[:-1],
1766        )
1767    return autogenerated_note_args
1768
1769
1770def check_for_command(line, parser, args, argv, argparse_callback):
1771    cmd_m = UTC_ARGS_CMD.match(line)
1772    if cmd_m:
1773        for option in shlex.split(cmd_m.group("cmd").strip()):
1774            if option:
1775                argv.append(option)
1776        args = parse_args(parser, filter(lambda arg: arg not in args.tests, argv))
1777        if argparse_callback is not None:
1778            argparse_callback(args)
1779    return args, argv
1780
1781
1782def find_arg_in_test(test_info, get_arg_to_check, arg_string, is_global):
1783    result = get_arg_to_check(test_info.args)
1784    if not result and is_global:
1785        # See if this has been specified via UTC_ARGS.  This is a "global" option
1786        # that affects the entire generation of test checks.  If it exists anywhere
1787        # in the test, apply it to everything.
1788        saw_line = False
1789        for line_info in test_info.ro_iterlines():
1790            line = line_info.line
1791            if not line.startswith(";") and line.strip() != "":
1792                saw_line = True
1793            result = get_arg_to_check(line_info.args)
1794            if result:
1795                if warn and saw_line:
1796                    # We saw the option after already reading some test input lines.
1797                    # Warn about it.
1798                    print(
1799                        "WARNING: Found {} in line following test start: ".format(
1800                            arg_string
1801                        )
1802                        + line,
1803                        file=sys.stderr,
1804                    )
1805                    print(
1806                        "WARNING: Consider moving {} to top of file".format(arg_string),
1807                        file=sys.stderr,
1808                    )
1809                break
1810    return result
1811
1812
1813def dump_input_lines(output_lines, test_info, prefix_set, comment_string):
1814    for input_line_info in test_info.iterlines(output_lines):
1815        line = input_line_info.line
1816        args = input_line_info.args
1817        if line.strip() == comment_string:
1818            continue
1819        if line.strip() == comment_string + SEPARATOR:
1820            continue
1821        if line.lstrip().startswith(comment_string):
1822            m = CHECK_RE.match(line)
1823            if m and m.group(1) in prefix_set:
1824                continue
1825        output_lines.append(line.rstrip("\n"))
1826
1827
1828def add_checks_at_end(
1829    output_lines, prefix_list, func_order, comment_string, check_generator
1830):
1831    added = set()
1832    generated_prefixes = set()
1833    for prefix in prefix_list:
1834        prefixes = prefix[0]
1835        tool_args = prefix[1]
1836        for prefix in prefixes:
1837            for func in func_order[prefix]:
1838                # The func order can contain the same functions multiple times.
1839                # If we see one again we are done.
1840                if (func, prefix) in added:
1841                    continue
1842                if added:
1843                    output_lines.append(comment_string)
1844
1845                # The add_*_checks routines expect a run list whose items are
1846                # tuples that have a list of prefixes as their first element and
1847                # tool command args string as their second element.  They output
1848                # checks for each prefix in the list of prefixes.  By doing so, it
1849                # implicitly assumes that for each function every run line will
1850                # generate something for that function.  That is not the case for
1851                # generated functions as some run lines might not generate them
1852                # (e.g. -fopenmp vs. no -fopenmp).
1853                #
1854                # Therefore, pass just the prefix we're interested in.  This has
1855                # the effect of generating all of the checks for functions of a
1856                # single prefix before moving on to the next prefix.  So checks
1857                # are ordered by prefix instead of by function as in "normal"
1858                # mode.
1859                for generated_prefix in check_generator(
1860                    output_lines, [([prefix], tool_args)], func
1861                ):
1862                    added.add((func, generated_prefix))
1863                    generated_prefixes.add(generated_prefix)
1864    return generated_prefixes
1865