xref: /llvm-project/llvm/utils/UpdateTestChecks/common.py (revision 4488ee259064532dda24024a7e96037fe9a3c0d9)
1from __future__ import print_function
2
3import argparse
4import copy
5import glob
6import itertools
7import os
8import re
9import subprocess
10import sys
11import shlex
12
13from typing import List
14
15##### Common utilities for update_*test_checks.py
16
17
18_verbose = False
19_prefix_filecheck_ir_name = ""
20
21"""
22Version changelog:
23
241: Initial version, used by tests that don't specify --version explicitly.
252: --function-signature is now enabled by default and also checks return
26   type/attributes.
273: --check-globals now has a third option ('smart'). The others are now called
28   'none' and 'all'. 'smart' is the default.
29"""
30DEFAULT_VERSION = 3
31
32
33class Regex(object):
34    """Wrap a compiled regular expression object to allow deep copy of a regexp.
35    This is required for the deep copy done in do_scrub.
36
37    """
38
39    def __init__(self, regex):
40        self.regex = regex
41
42    def __deepcopy__(self, memo):
43        result = copy.copy(self)
44        result.regex = self.regex
45        return result
46
47    def search(self, line):
48        return self.regex.search(line)
49
50    def sub(self, repl, line):
51        return self.regex.sub(repl, line)
52
53    def pattern(self):
54        return self.regex.pattern
55
56    def flags(self):
57        return self.regex.flags
58
59
60class Filter(Regex):
61    """Augment a Regex object with a flag indicating whether a match should be
62    added (!is_filter_out) or removed (is_filter_out) from the generated checks.
63
64    """
65
66    def __init__(self, regex, is_filter_out):
67        super(Filter, self).__init__(regex)
68        self.is_filter_out = is_filter_out
69
70    def __deepcopy__(self, memo):
71        result = copy.deepcopy(super(Filter, self), memo)
72        result.is_filter_out = copy.deepcopy(self.is_filter_out, memo)
73        return result
74
75
76def parse_commandline_args(parser):
77    class RegexAction(argparse.Action):
78        """Add a regular expression option value to a list of regular expressions.
79        This compiles the expression, wraps it in a Regex and adds it to the option
80        value list."""
81
82        def __init__(self, option_strings, dest, nargs=None, **kwargs):
83            if nargs is not None:
84                raise ValueError("nargs not allowed")
85            super(RegexAction, self).__init__(option_strings, dest, **kwargs)
86
87        def do_call(self, namespace, values, flags):
88            value_list = getattr(namespace, self.dest)
89            if value_list is None:
90                value_list = []
91
92            try:
93                value_list.append(Regex(re.compile(values, flags)))
94            except re.error as error:
95                raise ValueError(
96                    "{}: Invalid regular expression '{}' ({})".format(
97                        option_string, error.pattern, error.msg
98                    )
99                )
100
101            setattr(namespace, self.dest, value_list)
102
103        def __call__(self, parser, namespace, values, option_string=None):
104            self.do_call(namespace, values, 0)
105
106    class FilterAction(RegexAction):
107        """Add a filter to a list of filter option values."""
108
109        def __init__(self, option_strings, dest, nargs=None, **kwargs):
110            super(FilterAction, self).__init__(option_strings, dest, nargs, **kwargs)
111
112        def __call__(self, parser, namespace, values, option_string=None):
113            super(FilterAction, self).__call__(parser, namespace, values, option_string)
114
115            value_list = getattr(namespace, self.dest)
116
117            is_filter_out = option_string == "--filter-out"
118
119            value_list[-1] = Filter(value_list[-1].regex, is_filter_out)
120
121            setattr(namespace, self.dest, value_list)
122
123    filter_group = parser.add_argument_group(
124        "filtering",
125        """Filters are applied to each output line according to the order given. The
126    first matching filter terminates filter processing for that current line.""",
127    )
128
129    filter_group.add_argument(
130        "--filter",
131        action=FilterAction,
132        dest="filters",
133        metavar="REGEX",
134        help="Only include lines matching REGEX (may be specified multiple times)",
135    )
136    filter_group.add_argument(
137        "--filter-out",
138        action=FilterAction,
139        dest="filters",
140        metavar="REGEX",
141        help="Exclude lines matching REGEX",
142    )
143
144    parser.add_argument(
145        "--include-generated-funcs",
146        action="store_true",
147        help="Output checks for functions not in source",
148    )
149    parser.add_argument(
150        "-v", "--verbose", action="store_true", help="Show verbose output"
151    )
152    parser.add_argument(
153        "-u",
154        "--update-only",
155        action="store_true",
156        help="Only update test if it was already autogened",
157    )
158    parser.add_argument(
159        "--force-update",
160        action="store_true",
161        help="Update test even if it was autogened by a different script",
162    )
163    parser.add_argument(
164        "--enable",
165        action="store_true",
166        dest="enabled",
167        default=True,
168        help="Activate CHECK line generation from this point forward",
169    )
170    parser.add_argument(
171        "--disable",
172        action="store_false",
173        dest="enabled",
174        help="Deactivate CHECK line generation from this point forward",
175    )
176    parser.add_argument(
177        "--replace-value-regex",
178        nargs="+",
179        default=[],
180        help="List of regular expressions to replace matching value names",
181    )
182    parser.add_argument(
183        "--prefix-filecheck-ir-name",
184        default="",
185        help="Add a prefix to FileCheck IR value names to avoid conflicts with scripted names",
186    )
187    parser.add_argument(
188        "--global-value-regex",
189        nargs="+",
190        default=[],
191        help="List of regular expressions that a global value declaration must match to generate a check (has no effect if checking globals is not enabled)",
192    )
193    parser.add_argument(
194        "--global-hex-value-regex",
195        nargs="+",
196        default=[],
197        help="List of regular expressions such that, for matching global value declarations, literal integer values should be encoded in hex in the associated FileCheck directives",
198    )
199    # FIXME: in 3.9, we can use argparse.BooleanOptionalAction. At that point,
200    # we need to rename the flag to just -generate-body-for-unused-prefixes.
201    parser.add_argument(
202        "--no-generate-body-for-unused-prefixes",
203        action="store_false",
204        dest="gen_unused_prefix_body",
205        default=True,
206        help="Generate a function body that always matches for unused prefixes. This is useful when unused prefixes are desired, and it avoids needing to annotate each FileCheck as allowing them.",
207    )
208    # This is the default when regenerating existing tests. The default when
209    # generating new tests is determined by DEFAULT_VERSION.
210    parser.add_argument(
211        "--version", type=int, default=1, help="The version of output format"
212    )
213    args = parser.parse_args()
214    global _verbose, _global_value_regex, _global_hex_value_regex
215    _verbose = args.verbose
216    _global_value_regex = args.global_value_regex
217    _global_hex_value_regex = args.global_hex_value_regex
218    return args
219
220
221def parse_args(parser, argv):
222    args = parser.parse_args(argv)
223    if args.version >= 2:
224        args.function_signature = True
225    if "check_globals" in args and args.check_globals == "default":
226        args.check_globals = "none" if args.version < 3 else "smart"
227    return args
228
229
230class InputLineInfo(object):
231    def __init__(self, line, line_number, args, argv):
232        self.line = line
233        self.line_number = line_number
234        self.args = args
235        self.argv = argv
236
237
238class TestInfo(object):
239    def __init__(
240        self,
241        test,
242        parser,
243        script_name,
244        input_lines,
245        args,
246        argv,
247        comment_prefix,
248        argparse_callback,
249    ):
250        self.parser = parser
251        self.argparse_callback = argparse_callback
252        self.path = test
253        self.args = args
254        if args.prefix_filecheck_ir_name:
255            global _prefix_filecheck_ir_name
256            _prefix_filecheck_ir_name = args.prefix_filecheck_ir_name
257        self.argv = argv
258        self.input_lines = input_lines
259        self.run_lines = find_run_lines(test, self.input_lines)
260        self.comment_prefix = comment_prefix
261        if self.comment_prefix is None:
262            if self.path.endswith(".mir"):
263                self.comment_prefix = "#"
264            else:
265                self.comment_prefix = ";"
266        self.autogenerated_note_prefix = self.comment_prefix + " " + UTC_ADVERT
267        self.test_autogenerated_note = self.autogenerated_note_prefix + script_name
268        self.test_autogenerated_note += get_autogennote_suffix(parser, self.args)
269        self.test_unused_note = (
270            self.comment_prefix + self.comment_prefix + " " + UNUSED_NOTE
271        )
272
273    def ro_iterlines(self):
274        for line_num, input_line in enumerate(self.input_lines):
275            args, argv = check_for_command(
276                input_line, self.parser, self.args, self.argv, self.argparse_callback
277            )
278            yield InputLineInfo(input_line, line_num, args, argv)
279
280    def iterlines(self, output_lines):
281        output_lines.append(self.test_autogenerated_note)
282        for line_info in self.ro_iterlines():
283            input_line = line_info.line
284            # Discard any previous script advertising.
285            if input_line.startswith(self.autogenerated_note_prefix):
286                continue
287            self.args = line_info.args
288            self.argv = line_info.argv
289            if not self.args.enabled:
290                output_lines.append(input_line)
291                continue
292            yield line_info
293
294    def get_checks_for_unused_prefixes(
295        self, run_list, used_prefixes: List[str]
296    ) -> List[str]:
297        run_list = [element for element in run_list if element[0] is not None]
298        unused_prefixes = set(
299            [prefix for sublist in run_list for prefix in sublist[0]]
300        ).difference(set(used_prefixes))
301
302        ret = []
303        if not unused_prefixes:
304            return ret
305        ret.append(self.test_unused_note)
306        for unused in sorted(unused_prefixes):
307            ret.append(
308                "{comment} {prefix}: {match_everything}".format(
309                    comment=self.comment_prefix,
310                    prefix=unused,
311                    match_everything=r"""{{.*}}""",
312                )
313            )
314        return ret
315
316
317def itertests(
318    test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None
319):
320    for pattern in test_patterns:
321        # On Windows we must expand the patterns ourselves.
322        tests_list = glob.glob(pattern)
323        if not tests_list:
324            warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,))
325            continue
326        for test in tests_list:
327            with open(test) as f:
328                input_lines = [l.rstrip() for l in f]
329            first_line = input_lines[0] if input_lines else ""
330            is_regenerate = UTC_ADVERT in first_line
331
332            # If we're generating a new test, set the default version to the latest.
333            argv = sys.argv[:]
334            if not is_regenerate:
335                argv.insert(1, "--version=" + str(DEFAULT_VERSION))
336
337            args = parse_args(parser, argv[1:])
338            if argparse_callback is not None:
339                argparse_callback(args)
340            if is_regenerate:
341                if script_name not in first_line and not args.force_update:
342                    warn(
343                        "Skipping test which wasn't autogenerated by " + script_name,
344                        test,
345                    )
346                    continue
347                args, argv = check_for_command(
348                    first_line, parser, args, argv, argparse_callback
349                )
350            elif args.update_only:
351                assert UTC_ADVERT not in first_line
352                warn("Skipping test which isn't autogenerated: " + test)
353                continue
354            final_input_lines = []
355            for l in input_lines:
356                if UNUSED_NOTE in l:
357                    break
358                final_input_lines.append(l)
359            yield TestInfo(
360                test,
361                parser,
362                script_name,
363                final_input_lines,
364                args,
365                argv,
366                comment_prefix,
367                argparse_callback,
368            )
369
370
371def should_add_line_to_output(
372    input_line, prefix_set, skip_global_checks=False, comment_marker=";"
373):
374    # Skip any blank comment lines in the IR.
375    if not skip_global_checks and input_line.strip() == comment_marker:
376        return False
377    # Skip a special double comment line we use as a separator.
378    if input_line.strip() == comment_marker + SEPARATOR:
379        return False
380    # Skip any blank lines in the IR.
381    # if input_line.strip() == '':
382    #  return False
383    # And skip any CHECK lines. We're building our own.
384    m = CHECK_RE.match(input_line)
385    if m and m.group(1) in prefix_set:
386        if skip_global_checks:
387            global_ir_value_re = re.compile(r"\[\[", flags=(re.M))
388            return not global_ir_value_re.search(input_line)
389        return False
390
391    return True
392
393
394# Perform lit-like substitutions
395def getSubstitutions(sourcepath):
396    sourcedir = os.path.dirname(sourcepath)
397    return [
398        ("%s", sourcepath),
399        ("%S", sourcedir),
400        ("%p", sourcedir),
401        ("%{pathsep}", os.pathsep),
402    ]
403
404
405def applySubstitutions(s, substitutions):
406    for a, b in substitutions:
407        s = s.replace(a, b)
408    return s
409
410
411# Invoke the tool that is being tested.
412def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False):
413    with open(ir) as ir_file:
414        substitutions = getSubstitutions(ir)
415
416        # TODO Remove the str form which is used by update_test_checks.py and
417        # update_llc_test_checks.py
418        # The safer list form is used by update_cc_test_checks.py
419        if preprocess_cmd:
420            # Allow pre-processing the IR file (e.g. using sed):
421            assert isinstance(
422                preprocess_cmd, str
423            )  # TODO: use a list instead of using shell
424            preprocess_cmd = applySubstitutions(preprocess_cmd, substitutions).strip()
425            if verbose:
426                print(
427                    "Pre-processing input file: ",
428                    ir,
429                    " with command '",
430                    preprocess_cmd,
431                    "'",
432                    sep="",
433                    file=sys.stderr,
434                )
435            # Python 2.7 doesn't have subprocess.DEVNULL:
436            with open(os.devnull, "w") as devnull:
437                pp = subprocess.Popen(
438                    preprocess_cmd, shell=True, stdin=devnull, stdout=subprocess.PIPE
439                )
440                ir_file = pp.stdout
441
442        if isinstance(cmd_args, list):
443            args = [applySubstitutions(a, substitutions) for a in cmd_args]
444            stdout = subprocess.check_output([exe] + args, stdin=ir_file)
445        else:
446            stdout = subprocess.check_output(
447                exe + " " + applySubstitutions(cmd_args, substitutions),
448                shell=True,
449                stdin=ir_file,
450            )
451        if sys.version_info[0] > 2:
452            # FYI, if you crashed here with a decode error, your run line probably
453            # results in bitcode or other binary format being written to the pipe.
454            # For an opt test, you probably want to add -S or -disable-output.
455            stdout = stdout.decode()
456    # Fix line endings to unix CR style.
457    return stdout.replace("\r\n", "\n")
458
459
460##### LLVM IR parser
461RUN_LINE_RE = re.compile(r"^\s*(?://|[;#])\s*RUN:\s*(.*)$")
462CHECK_PREFIX_RE = re.compile(r"--?check-prefix(?:es)?[= ](\S+)")
463PREFIX_RE = re.compile("^[a-zA-Z0-9_-]+$")
464CHECK_RE = re.compile(
465    r"^\s*(?://|[;#])\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME|-EMPTY)?:"
466)
467
468UTC_ARGS_KEY = "UTC_ARGS:"
469UTC_ARGS_CMD = re.compile(r".*" + UTC_ARGS_KEY + "\s*(?P<cmd>.*)\s*$")
470UTC_ADVERT = "NOTE: Assertions have been autogenerated by "
471UNUSED_NOTE = "NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:"
472
473OPT_FUNCTION_RE = re.compile(
474    r"^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s():,]+?))?\s*define\s+(?P<funcdef_attrs_and_ret>[^@]*)@(?P<func>[\w.$-]+?)\s*"
475    r"(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P<body>.*?)^\}$",
476    flags=(re.M | re.S),
477)
478
479ANALYZE_FUNCTION_RE = re.compile(
480    r"^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.$-]+?)\':"
481    r"\s*\n(?P<body>.*)$",
482    flags=(re.X | re.S),
483)
484
485LV_DEBUG_RE = re.compile(
486    r"^\s*\'(?P<func>[\w.$-]+?)\'[^\n]*" r"\s*\n(?P<body>.*)$", flags=(re.X | re.S)
487)
488
489IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(')
490TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
491TRIPLE_ARG_RE = re.compile(r"-mtriple[= ]([^ ]+)")
492MARCH_ARG_RE = re.compile(r"-march[= ]([^ ]+)")
493DEBUG_ONLY_ARG_RE = re.compile(r"-debug-only[= ]([^ ]+)")
494
495SCRUB_LEADING_WHITESPACE_RE = re.compile(r"^(\s+)")
496SCRUB_WHITESPACE_RE = re.compile(r"(?!^(|  \w))[ \t]+", flags=re.M)
497SCRUB_TRAILING_WHITESPACE_RE = re.compile(r"[ \t]+$", flags=re.M)
498SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE
499SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(
500    r"([ \t]|(#[0-9]+))+$", flags=re.M
501)
502SCRUB_KILL_COMMENT_RE = re.compile(r"^ *#+ +kill:.*\n")
503SCRUB_LOOP_COMMENT_RE = re.compile(
504    r"# =>This Inner Loop Header:.*|# in Loop:.*", flags=re.M
505)
506SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r"(?<=\S)+[ \t]*#$", flags=re.M)
507
508SEPARATOR = "."
509
510
511def error(msg, test_file=None):
512    if test_file:
513        msg = "{}: {}".format(msg, test_file)
514    print("ERROR: {}".format(msg), file=sys.stderr)
515
516
517def warn(msg, test_file=None):
518    if test_file:
519        msg = "{}: {}".format(msg, test_file)
520    print("WARNING: {}".format(msg), file=sys.stderr)
521
522
523def debug(*args, **kwargs):
524    # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs):
525    if "file" not in kwargs:
526        kwargs["file"] = sys.stderr
527    if _verbose:
528        print(*args, **kwargs)
529
530
531def find_run_lines(test, lines):
532    debug("Scanning for RUN lines in test file:", test)
533    raw_lines = [m.group(1) for m in [RUN_LINE_RE.match(l) for l in lines] if m]
534    run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
535    for l in raw_lines[1:]:
536        if run_lines[-1].endswith("\\"):
537            run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
538        else:
539            run_lines.append(l)
540    debug("Found {} RUN lines in {}:".format(len(run_lines), test))
541    for l in run_lines:
542        debug("  RUN: {}".format(l))
543    return run_lines
544
545
546def get_triple_from_march(march):
547    triples = {
548        "amdgcn": "amdgcn",
549        "r600": "r600",
550        "mips": "mips",
551        "sparc": "sparc",
552        "hexagon": "hexagon",
553        "ve": "ve",
554    }
555    for prefix, triple in triples.items():
556        if march.startswith(prefix):
557            return triple
558    print("Cannot find a triple. Assume 'x86'", file=sys.stderr)
559    return "x86"
560
561
562def apply_filters(line, filters):
563    has_filter = False
564    for f in filters:
565        if not f.is_filter_out:
566            has_filter = True
567        if f.search(line):
568            return False if f.is_filter_out else True
569    # If we only used filter-out, keep the line, otherwise discard it since no
570    # filter matched.
571    return False if has_filter else True
572
573
574def do_filter(body, filters):
575    return (
576        body
577        if not filters
578        else "\n".join(
579            filter(lambda line: apply_filters(line, filters), body.splitlines())
580        )
581    )
582
583
584def scrub_body(body):
585    # Scrub runs of whitespace out of the assembly, but leave the leading
586    # whitespace in place.
587    body = SCRUB_WHITESPACE_RE.sub(r" ", body)
588    # Expand the tabs used for indentation.
589    body = str.expandtabs(body, 2)
590    # Strip trailing whitespace.
591    body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r"", body)
592    return body
593
594
595def do_scrub(body, scrubber, scrubber_args, extra):
596    if scrubber_args:
597        local_args = copy.deepcopy(scrubber_args)
598        local_args[0].extra_scrub = extra
599        return scrubber(body, *local_args)
600    return scrubber(body, *scrubber_args)
601
602
603# Build up a dictionary of all the function bodies.
604class function_body(object):
605    def __init__(
606        self,
607        string,
608        extra,
609        funcdef_attrs_and_ret,
610        args_and_sig,
611        attrs,
612        func_name_separator,
613    ):
614        self.scrub = string
615        self.extrascrub = extra
616        self.funcdef_attrs_and_ret = funcdef_attrs_and_ret
617        self.args_and_sig = args_and_sig
618        self.attrs = attrs
619        self.func_name_separator = func_name_separator
620
621    def is_same_except_arg_names(
622        self, extrascrub, funcdef_attrs_and_ret, args_and_sig, attrs, is_backend
623    ):
624        arg_names = set()
625
626        def drop_arg_names(match):
627            arg_names.add(match.group(variable_group_in_ir_value_match))
628            if match.group(attribute_group_in_ir_value_match):
629                attr = match.group(attribute_group_in_ir_value_match)
630            else:
631                attr = ""
632            return match.group(1) + attr + match.group(match.lastindex)
633
634        def repl_arg_names(match):
635            if (
636                match.group(variable_group_in_ir_value_match) is not None
637                and match.group(variable_group_in_ir_value_match) in arg_names
638            ):
639                return match.group(1) + match.group(match.lastindex)
640            return match.group(1) + match.group(2) + match.group(match.lastindex)
641
642        if self.funcdef_attrs_and_ret != funcdef_attrs_and_ret:
643            return False
644        if self.attrs != attrs:
645            return False
646        ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig)
647        ans1 = IR_VALUE_RE.sub(drop_arg_names, args_and_sig)
648        if ans0 != ans1:
649            return False
650        if is_backend:
651            # Check without replacements, the replacements are not applied to the
652            # body for backend checks.
653            return self.extrascrub == extrascrub
654
655        es0 = IR_VALUE_RE.sub(repl_arg_names, self.extrascrub)
656        es1 = IR_VALUE_RE.sub(repl_arg_names, extrascrub)
657        es0 = SCRUB_IR_COMMENT_RE.sub(r"", es0)
658        es1 = SCRUB_IR_COMMENT_RE.sub(r"", es1)
659        return es0 == es1
660
661    def __str__(self):
662        return self.scrub
663
664
665class FunctionTestBuilder:
666    def __init__(self, run_list, flags, scrubber_args, path):
667        self._verbose = flags.verbose
668        self._record_args = flags.function_signature
669        self._check_attributes = flags.check_attributes
670        # Strip double-quotes if input was read by UTC_ARGS
671        self._filters = (
672            list(
673                map(
674                    lambda f: Filter(
675                        re.compile(f.pattern().strip('"'), f.flags()), f.is_filter_out
676                    ),
677                    flags.filters,
678                )
679            )
680            if flags.filters
681            else []
682        )
683        self._scrubber_args = scrubber_args
684        self._path = path
685        # Strip double-quotes if input was read by UTC_ARGS
686        self._replace_value_regex = list(
687            map(lambda x: x.strip('"'), flags.replace_value_regex)
688        )
689        self._func_dict = {}
690        self._func_order = {}
691        self._global_var_dict = {}
692        self._processed_prefixes = set()
693        for tuple in run_list:
694            for prefix in tuple[0]:
695                self._func_dict.update({prefix: dict()})
696                self._func_order.update({prefix: []})
697                self._global_var_dict.update({prefix: dict()})
698
699    def finish_and_get_func_dict(self):
700        for prefix in self.get_failed_prefixes():
701            warn(
702                "Prefix %s had conflicting output from different RUN lines for all functions in test %s"
703                % (
704                    prefix,
705                    self._path,
706                )
707            )
708        return self._func_dict
709
710    def func_order(self):
711        return self._func_order
712
713    def global_var_dict(self):
714        return self._global_var_dict
715
716    def is_filtered(self):
717        return bool(self._filters)
718
719    def process_run_line(
720        self, function_re, scrubber, raw_tool_output, prefixes, is_backend
721    ):
722        build_global_values_dictionary(self._global_var_dict, raw_tool_output, prefixes)
723        for m in function_re.finditer(raw_tool_output):
724            if not m:
725                continue
726            func = m.group("func")
727            body = m.group("body")
728            # func_name_separator is the string that is placed right after function name at the
729            # beginning of assembly function definition. In most assemblies, that is just a
730            # colon: `foo:`. But, for example, in nvptx it is a brace: `foo(`. If is_backend is
731            # False, just assume that separator is an empty string.
732            if is_backend:
733                # Use ':' as default separator.
734                func_name_separator = (
735                    m.group("func_name_separator")
736                    if "func_name_separator" in m.groupdict()
737                    else ":"
738                )
739            else:
740                func_name_separator = ""
741            attrs = m.group("attrs") if self._check_attributes else ""
742            funcdef_attrs_and_ret = (
743                m.group("funcdef_attrs_and_ret") if self._record_args else ""
744            )
745            # Determine if we print arguments, the opening brace, or nothing after the
746            # function name
747            if self._record_args and "args_and_sig" in m.groupdict():
748                args_and_sig = scrub_body(m.group("args_and_sig").strip())
749            elif "args_and_sig" in m.groupdict():
750                args_and_sig = "("
751            else:
752                args_and_sig = ""
753            filtered_body = do_filter(body, self._filters)
754            scrubbed_body = do_scrub(
755                filtered_body, scrubber, self._scrubber_args, extra=False
756            )
757            scrubbed_extra = do_scrub(
758                filtered_body, scrubber, self._scrubber_args, extra=True
759            )
760            if "analysis" in m.groupdict():
761                analysis = m.group("analysis")
762                if analysis.lower() != "cost model analysis":
763                    warn("Unsupported analysis mode: %r!" % (analysis,))
764            if func.startswith("stress"):
765                # We only use the last line of the function body for stress tests.
766                scrubbed_body = "\n".join(scrubbed_body.splitlines()[-1:])
767            if self._verbose:
768                print("Processing function: " + func, file=sys.stderr)
769                for l in scrubbed_body.splitlines():
770                    print("  " + l, file=sys.stderr)
771            for prefix in prefixes:
772                # Replace function names matching the regex.
773                for regex in self._replace_value_regex:
774                    # Pattern that matches capture groups in the regex in leftmost order.
775                    group_regex = re.compile(r"\(.*?\)")
776                    # Replace function name with regex.
777                    match = re.match(regex, func)
778                    if match:
779                        func_repl = regex
780                        # Replace any capture groups with their matched strings.
781                        for g in match.groups():
782                            func_repl = group_regex.sub(
783                                re.escape(g), func_repl, count=1
784                            )
785                        func = re.sub(func_repl, "{{" + func_repl + "}}", func)
786
787                    # Replace all calls to regex matching functions.
788                    matches = re.finditer(regex, scrubbed_body)
789                    for match in matches:
790                        func_repl = regex
791                        # Replace any capture groups with their matched strings.
792                        for g in match.groups():
793                            func_repl = group_regex.sub(
794                                re.escape(g), func_repl, count=1
795                            )
796                        # Substitute function call names that match the regex with the same
797                        # capture groups set.
798                        scrubbed_body = re.sub(
799                            func_repl, "{{" + func_repl + "}}", scrubbed_body
800                        )
801
802                if func in self._func_dict[prefix]:
803                    if self._func_dict[prefix][func] is not None and (
804                        str(self._func_dict[prefix][func]) != scrubbed_body
805                        or self._func_dict[prefix][func].args_and_sig != args_and_sig
806                        or self._func_dict[prefix][func].attrs != attrs
807                        or self._func_dict[prefix][func].funcdef_attrs_and_ret
808                        != funcdef_attrs_and_ret
809                    ):
810                        if self._func_dict[prefix][func].is_same_except_arg_names(
811                            scrubbed_extra,
812                            funcdef_attrs_and_ret,
813                            args_and_sig,
814                            attrs,
815                            is_backend,
816                        ):
817                            self._func_dict[prefix][func].scrub = scrubbed_extra
818                            self._func_dict[prefix][func].args_and_sig = args_and_sig
819                        else:
820                            # This means a previous RUN line produced a body for this function
821                            # that is different from the one produced by this current RUN line,
822                            # so the body can't be common across RUN lines. We use None to
823                            # indicate that.
824                            self._func_dict[prefix][func] = None
825                else:
826                    if prefix not in self._processed_prefixes:
827                        self._func_dict[prefix][func] = function_body(
828                            scrubbed_body,
829                            scrubbed_extra,
830                            funcdef_attrs_and_ret,
831                            args_and_sig,
832                            attrs,
833                            func_name_separator,
834                        )
835                        self._func_order[prefix].append(func)
836                    else:
837                        # An earlier RUN line used this check prefixes but didn't produce
838                        # a body for this function. This happens in Clang tests that use
839                        # preprocesser directives to exclude individual functions from some
840                        # RUN lines.
841                        self._func_dict[prefix][func] = None
842
843    def processed_prefixes(self, prefixes):
844        """
845        Mark a set of prefixes as having had at least one applicable RUN line fully
846        processed. This is used to filter out function bodies that don't have
847        outputs for all RUN lines.
848        """
849        self._processed_prefixes.update(prefixes)
850
851    def get_failed_prefixes(self):
852        # This returns the list of those prefixes that failed to match any function,
853        # because there were conflicting bodies produced by different RUN lines, in
854        # all instances of the prefix.
855        for prefix in self._func_dict:
856            if self._func_dict[prefix] and (
857                not [
858                    fct
859                    for fct in self._func_dict[prefix]
860                    if self._func_dict[prefix][fct] is not None
861                ]
862            ):
863                yield prefix
864
865
866##### Generator of LLVM IR CHECK lines
867
868SCRUB_IR_COMMENT_RE = re.compile(r"\s*;.*")
869
870# TODO: We should also derive check lines for global, debug, loop declarations, etc..
871
872
873class NamelessValue:
874    def __init__(
875        self,
876        check_prefix,
877        check_key,
878        ir_prefix,
879        ir_regexp,
880        global_ir_rhs_regexp,
881        *,
882        is_before_functions=False,
883        is_number=False,
884        replace_number_with_counter=False,
885        match_literally=False,
886    ):
887        self.check_prefix = check_prefix
888        self.check_key = check_key
889        self.ir_prefix = ir_prefix
890        self.ir_regexp = ir_regexp
891        self.global_ir_rhs_regexp = global_ir_rhs_regexp
892        self.is_before_functions = is_before_functions
893        self.is_number = is_number
894        # Some variable numbers (e.g. MCINST1234) will change based on unrelated
895        # modifications to LLVM, replace those with an incrementing counter.
896        self.replace_number_with_counter = replace_number_with_counter
897        self.match_literally = match_literally
898        self.variable_mapping = {}
899
900    # Return true if this kind of IR value is "local", basically if it matches '%{{.*}}'.
901    def is_local_def_ir_value_match(self, match):
902        return self.ir_prefix == "%"
903
904    # Return true if this kind of IR value is "global", basically if it matches '#{{.*}}'.
905    def is_global_scope_ir_value_match(self, match):
906        return self.global_ir_rhs_regexp is not None
907
908    # Return the IR prefix and check prefix we use for this kind or IR value,
909    # e.g., (%, TMP) for locals. If the IR prefix is a regex, return the prefix
910    # used in the IR output
911    def get_ir_prefix_from_ir_value_match(self, match):
912        return re.search(self.ir_prefix, match[0])[0], self.check_prefix
913
914    # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals
915    def get_ir_regex_from_ir_value_re_match(self, match):
916        # for backwards compatibility we check locals with '.*'
917        if self.is_local_def_ir_value_match(match):
918            return ".*"
919        return self.ir_regexp
920
921    # Create a FileCheck variable name based on an IR name.
922    def get_value_name(self, var: str, check_prefix: str):
923        var = var.replace("!", "")
924        if self.replace_number_with_counter:
925            assert var.isdigit(), var
926            replacement = self.variable_mapping.get(var, None)
927            if replacement is None:
928                # Replace variable with an incrementing counter
929                replacement = str(len(self.variable_mapping) + 1)
930                self.variable_mapping[var] = replacement
931            var = replacement
932        # This is a nameless value, prepend check_prefix.
933        if var.isdigit():
934            var = check_prefix + var
935        else:
936            # This is a named value that clashes with the check_prefix, prepend with
937            # _prefix_filecheck_ir_name, if it has been defined.
938            if (
939                may_clash_with_default_check_prefix_name(check_prefix, var)
940                and _prefix_filecheck_ir_name
941            ):
942                var = _prefix_filecheck_ir_name + var
943        var = var.replace(".", "_")
944        var = var.replace("-", "_")
945        return var.upper()
946
947    # Create a FileCheck variable from regex.
948    def get_value_definition(self, var, match):
949        # for backwards compatibility we check locals with '.*'
950        varname = self.get_value_name(var, self.check_prefix)
951        prefix = self.get_ir_prefix_from_ir_value_match(match)[0]
952        if self.is_number:
953            regex = ""  # always capture a number in the default format
954            capture_start = "[[#"
955        else:
956            regex = self.get_ir_regex_from_ir_value_re_match(match)
957            capture_start = "[["
958        if self.is_local_def_ir_value_match(match):
959            return capture_start + varname + ":" + prefix + regex + "]]"
960        return prefix + capture_start + varname + ":" + regex + "]]"
961
962    # Use a FileCheck variable.
963    def get_value_use(self, var, match, var_prefix=None):
964        if var_prefix is None:
965            var_prefix = self.check_prefix
966        capture_start = "[[#" if self.is_number else "[["
967        if self.is_local_def_ir_value_match(match):
968            return capture_start + self.get_value_name(var, var_prefix) + "]]"
969        prefix = self.get_ir_prefix_from_ir_value_match(match)[0]
970        return prefix + capture_start + self.get_value_name(var, var_prefix) + "]]"
971
972
973# Description of the different "unnamed" values we match in the IR, e.g.,
974# (local) ssa values, (debug) metadata, etc.
975ir_nameless_values = [
976    #            check_prefix   check_key  ir_prefix           ir_regexp                global_ir_rhs_regexp
977    NamelessValue(r"TMP", "%", r"%", r"[\w$.-]+?", None),
978    NamelessValue(r"ATTR", "#", r"#", r"[0-9]+", None),
979    NamelessValue(r"ATTR", "#", r"attributes #", r"[0-9]+", r"{[^}]*}"),
980    NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", None),
981    NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", r".+", is_before_functions=True),
982    NamelessValue(
983        r"GLOBNAMED",
984        "@",
985        r"@",
986        r"[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*",
987        r".+",
988        is_before_functions=True,
989        match_literally=True,
990    ),
991    NamelessValue(r"DBG", "!", r"!dbg ", r"![0-9]+", None),
992    NamelessValue(r"DIASSIGNID", "!", r"!DIAssignID ", r"![0-9]+", None),
993    NamelessValue(r"PROF", "!", r"!prof ", r"![0-9]+", None),
994    NamelessValue(r"TBAA", "!", r"!tbaa ", r"![0-9]+", None),
995    NamelessValue(r"TBAA_STRUCT", "!", r"!tbaa.struct ", r"![0-9]+", None),
996    NamelessValue(r"RNG", "!", r"!range ", r"![0-9]+", None),
997    NamelessValue(r"LOOP", "!", r"!llvm.loop ", r"![0-9]+", None),
998    NamelessValue(r"META", "!", r"metadata ", r"![0-9]+", None),
999    NamelessValue(r"META", "!", r"", r"![0-9]+", r"(?:distinct |)!.*"),
1000    NamelessValue(r"ACC_GRP", "!", r"!llvm.access.group ", r"![0-9]+", None),
1001    NamelessValue(r"META", "!", r"![a-z.]+ ", r"![0-9]+", None),
1002]
1003
1004global_nameless_values = [
1005    nameless_value
1006    for nameless_value in ir_nameless_values
1007    if nameless_value.global_ir_rhs_regexp is not None
1008]
1009# global variable names should be matched literally
1010global_nameless_values_w_unstable_ids = [
1011    nameless_value
1012    for nameless_value in global_nameless_values
1013    if not nameless_value.match_literally
1014]
1015
1016asm_nameless_values = [
1017    NamelessValue(
1018        r"MCINST",
1019        "Inst#",
1020        "<MCInst #",
1021        r"\d+",
1022        r".+",
1023        is_number=True,
1024        replace_number_with_counter=True,
1025    ),
1026    NamelessValue(
1027        r"MCREG",
1028        "Reg:",
1029        "<MCOperand Reg:",
1030        r"\d+",
1031        r".+",
1032        is_number=True,
1033        replace_number_with_counter=True,
1034    ),
1035]
1036
1037
1038def createOrRegexp(old, new):
1039    if not old:
1040        return new
1041    if not new:
1042        return old
1043    return old + "|" + new
1044
1045
1046def createPrefixMatch(prefix_str, prefix_re):
1047    return "(?:" + prefix_str + "(" + prefix_re + "))"
1048
1049
1050# Build the regexp that matches an "IR value". This can be a local variable,
1051# argument, global, or metadata, anything that is "named". It is important that
1052# the PREFIX and SUFFIX below only contain a single group, if that changes
1053# other locations will need adjustment as well.
1054IR_VALUE_REGEXP_PREFIX = r"(\s*)"
1055IR_VALUE_REGEXP_STRING = r""
1056for nameless_value in ir_nameless_values:
1057    match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp)
1058    if nameless_value.global_ir_rhs_regexp is not None:
1059        match = "^" + match
1060    IR_VALUE_REGEXP_STRING = createOrRegexp(IR_VALUE_REGEXP_STRING, match)
1061IR_VALUE_REGEXP_SUFFIX = r"([,\s\(\)\}]|\Z)"
1062IR_VALUE_RE = re.compile(
1063    IR_VALUE_REGEXP_PREFIX
1064    + r"("
1065    + IR_VALUE_REGEXP_STRING
1066    + r")"
1067    + IR_VALUE_REGEXP_SUFFIX
1068)
1069
1070GLOBAL_VALUE_REGEXP_STRING = r""
1071for nameless_value in global_nameless_values_w_unstable_ids:
1072    match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp)
1073    GLOBAL_VALUE_REGEXP_STRING = createOrRegexp(GLOBAL_VALUE_REGEXP_STRING, match)
1074GLOBAL_VALUE_RE = re.compile(
1075    IR_VALUE_REGEXP_PREFIX
1076    + r"("
1077    + GLOBAL_VALUE_REGEXP_STRING
1078    + r")"
1079    + IR_VALUE_REGEXP_SUFFIX
1080)
1081
1082# Build the regexp that matches an "ASM value" (currently only for --asm-show-inst comments).
1083ASM_VALUE_REGEXP_STRING = ""
1084for nameless_value in asm_nameless_values:
1085    match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp)
1086    ASM_VALUE_REGEXP_STRING = createOrRegexp(ASM_VALUE_REGEXP_STRING, match)
1087ASM_VALUE_REGEXP_SUFFIX = r"([>\s]|\Z)"
1088ASM_VALUE_RE = re.compile(
1089    r"((?:#|//)\s*)" + "(" + ASM_VALUE_REGEXP_STRING + ")" + ASM_VALUE_REGEXP_SUFFIX
1090)
1091
1092# The entire match is group 0, the prefix has one group (=1), the entire
1093# IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start.
1094first_nameless_group_in_ir_value_match = 3
1095
1096# constants for the group id of special matches
1097variable_group_in_ir_value_match = 3
1098attribute_group_in_ir_value_match = 4
1099
1100
1101# Check a match for IR_VALUE_RE and inspect it to determine if it was a local
1102# value, %..., global @..., debug number !dbg !..., etc. See the PREFIXES above.
1103def get_idx_from_ir_value_match(match):
1104    for i in range(first_nameless_group_in_ir_value_match, match.lastindex):
1105        if match.group(i) is not None:
1106            return i - first_nameless_group_in_ir_value_match
1107    error("Unable to identify the kind of IR value from the match!")
1108    return 0
1109
1110
1111# See get_idx_from_ir_value_match
1112def get_name_from_ir_value_match(match):
1113    return match.group(
1114        get_idx_from_ir_value_match(match) + first_nameless_group_in_ir_value_match
1115    )
1116
1117
1118def get_nameless_value_from_match(match, nameless_values) -> NamelessValue:
1119    return nameless_values[get_idx_from_ir_value_match(match)]
1120
1121
1122# Return true if var clashes with the scripted FileCheck check_prefix.
1123def may_clash_with_default_check_prefix_name(check_prefix, var):
1124    return check_prefix and re.match(
1125        r"^" + check_prefix + r"[0-9]+?$", var, re.IGNORECASE
1126    )
1127
1128
1129def generalize_check_lines_common(
1130    lines,
1131    is_analyze,
1132    vars_seen,
1133    global_vars_seen,
1134    nameless_values,
1135    nameless_value_regex,
1136    is_asm,
1137):
1138    # This gets called for each match that occurs in
1139    # a line. We transform variables we haven't seen
1140    # into defs, and variables we have seen into uses.
1141    def transform_line_vars(match):
1142        var = get_name_from_ir_value_match(match)
1143        nameless_value = get_nameless_value_from_match(match, nameless_values)
1144        if may_clash_with_default_check_prefix_name(nameless_value.check_prefix, var):
1145            warn(
1146                "Change IR value name '%s' or use --prefix-filecheck-ir-name to prevent possible conflict"
1147                " with scripted FileCheck name." % (var,)
1148            )
1149        key = (var, nameless_value.check_key)
1150        is_local_def = nameless_value.is_local_def_ir_value_match(match)
1151        if is_local_def and key in vars_seen:
1152            rv = nameless_value.get_value_use(var, match)
1153        elif not is_local_def and key in global_vars_seen:
1154            # We could have seen a different prefix for the global variables first,
1155            # ensure we use that one instead of the prefix for the current match.
1156            rv = nameless_value.get_value_use(var, match, global_vars_seen[key])
1157        else:
1158            if is_local_def:
1159                vars_seen.add(key)
1160            else:
1161                global_vars_seen[key] = nameless_value.check_prefix
1162            rv = nameless_value.get_value_definition(var, match)
1163        # re.sub replaces the entire regex match
1164        # with whatever you return, so we have
1165        # to make sure to hand it back everything
1166        # including the commas and spaces.
1167        return match.group(1) + rv + match.group(match.lastindex)
1168
1169    lines_with_def = []
1170
1171    for i, line in enumerate(lines):
1172        if not is_asm:
1173            # An IR variable named '%.' matches the FileCheck regex string.
1174            line = line.replace("%.", "%dot")
1175            for regex in _global_hex_value_regex:
1176                if re.match("^@" + regex + " = ", line):
1177                    line = re.sub(
1178                        r"\bi([0-9]+) ([0-9]+)",
1179                        lambda m: "i"
1180                        + m.group(1)
1181                        + " [[#"
1182                        + hex(int(m.group(2)))
1183                        + "]]",
1184                        line,
1185                    )
1186                    break
1187            # Ignore any comments, since the check lines will too.
1188            scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line)
1189            lines[i] = scrubbed_line
1190        if is_asm or not is_analyze:
1191            # It can happen that two matches are back-to-back and for some reason sub
1192            # will not replace both of them. For now we work around this by
1193            # substituting until there is no more match.
1194            changed = True
1195            while changed:
1196                (lines[i], changed) = nameless_value_regex.subn(
1197                    transform_line_vars, lines[i], count=1
1198                )
1199    return lines
1200
1201
1202# Replace IR value defs and uses with FileCheck variables.
1203def generalize_check_lines(lines, is_analyze, vars_seen, global_vars_seen):
1204    return generalize_check_lines_common(
1205        lines,
1206        is_analyze,
1207        vars_seen,
1208        global_vars_seen,
1209        ir_nameless_values,
1210        IR_VALUE_RE,
1211        False,
1212    )
1213
1214
1215def generalize_global_check_line(line, is_analyze, global_vars_seen):
1216    [new_line] = generalize_check_lines_common(
1217        [line],
1218        is_analyze,
1219        set(),
1220        global_vars_seen,
1221        global_nameless_values_w_unstable_ids,
1222        GLOBAL_VALUE_RE,
1223        False,
1224    )
1225    return new_line
1226
1227
1228def generalize_asm_check_lines(lines, vars_seen, global_vars_seen):
1229    return generalize_check_lines_common(
1230        lines,
1231        False,
1232        vars_seen,
1233        global_vars_seen,
1234        asm_nameless_values,
1235        ASM_VALUE_RE,
1236        True,
1237    )
1238
1239
1240def add_checks(
1241    output_lines,
1242    comment_marker,
1243    prefix_list,
1244    func_dict,
1245    func_name,
1246    check_label_format,
1247    is_backend,
1248    is_analyze,
1249    version,
1250    global_vars_seen_dict,
1251    is_filtered,
1252):
1253    # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well.
1254    prefix_exclusions = set()
1255    printed_prefixes = []
1256    for p in prefix_list:
1257        checkprefixes = p[0]
1258        # If not all checkprefixes of this run line produced the function we cannot check for it as it does not
1259        # exist for this run line. A subset of the check prefixes might know about the function but only because
1260        # other run lines created it.
1261        if any(
1262            map(
1263                lambda checkprefix: func_name not in func_dict[checkprefix],
1264                checkprefixes,
1265            )
1266        ):
1267            prefix_exclusions |= set(checkprefixes)
1268            continue
1269
1270    # prefix_exclusions is constructed, we can now emit the output
1271    for p in prefix_list:
1272        global_vars_seen = {}
1273        checkprefixes = p[0]
1274        for checkprefix in checkprefixes:
1275            if checkprefix in global_vars_seen_dict:
1276                global_vars_seen.update(global_vars_seen_dict[checkprefix])
1277            else:
1278                global_vars_seen_dict[checkprefix] = {}
1279            if checkprefix in printed_prefixes:
1280                break
1281
1282            # Check if the prefix is excluded.
1283            if checkprefix in prefix_exclusions:
1284                continue
1285
1286            # If we do not have output for this prefix we skip it.
1287            if not func_dict[checkprefix][func_name]:
1288                continue
1289
1290            # Add some space between different check prefixes, but not after the last
1291            # check line (before the test code).
1292            if is_backend:
1293                if len(printed_prefixes) != 0:
1294                    output_lines.append(comment_marker)
1295
1296            if checkprefix not in global_vars_seen_dict:
1297                global_vars_seen_dict[checkprefix] = {}
1298
1299            global_vars_seen_before = [key for key in global_vars_seen.keys()]
1300
1301            vars_seen = set()
1302            printed_prefixes.append(checkprefix)
1303            attrs = str(func_dict[checkprefix][func_name].attrs)
1304            attrs = "" if attrs == "None" else attrs
1305            if version > 1:
1306                funcdef_attrs_and_ret = func_dict[checkprefix][
1307                    func_name
1308                ].funcdef_attrs_and_ret
1309            else:
1310                funcdef_attrs_and_ret = ""
1311
1312            if attrs:
1313                output_lines.append(
1314                    "%s %s: Function Attrs: %s" % (comment_marker, checkprefix, attrs)
1315                )
1316            args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig)
1317            if args_and_sig:
1318                args_and_sig = generalize_check_lines(
1319                    [args_and_sig], is_analyze, vars_seen, global_vars_seen
1320                )[0]
1321            func_name_separator = func_dict[checkprefix][func_name].func_name_separator
1322            if "[[" in args_and_sig:
1323                output_lines.append(
1324                    check_label_format
1325                    % (
1326                        checkprefix,
1327                        funcdef_attrs_and_ret,
1328                        func_name,
1329                        "",
1330                        func_name_separator,
1331                    )
1332                )
1333                output_lines.append(
1334                    "%s %s-SAME: %s" % (comment_marker, checkprefix, args_and_sig)
1335                )
1336            else:
1337                output_lines.append(
1338                    check_label_format
1339                    % (
1340                        checkprefix,
1341                        funcdef_attrs_and_ret,
1342                        func_name,
1343                        args_and_sig,
1344                        func_name_separator,
1345                    )
1346                )
1347            func_body = str(func_dict[checkprefix][func_name]).splitlines()
1348            if not func_body:
1349                # We have filtered everything.
1350                continue
1351
1352            # For ASM output, just emit the check lines.
1353            if is_backend:
1354                body_start = 1
1355                if is_filtered:
1356                    # For filtered output we don't add "-NEXT" so don't add extra spaces
1357                    # before the first line.
1358                    body_start = 0
1359                else:
1360                    output_lines.append(
1361                        "%s %s:       %s" % (comment_marker, checkprefix, func_body[0])
1362                    )
1363                func_lines = generalize_asm_check_lines(
1364                    func_body[body_start:], vars_seen, global_vars_seen
1365                )
1366                for func_line in func_lines:
1367                    if func_line.strip() == "":
1368                        output_lines.append(
1369                            "%s %s-EMPTY:" % (comment_marker, checkprefix)
1370                        )
1371                    else:
1372                        check_suffix = "-NEXT" if not is_filtered else ""
1373                        output_lines.append(
1374                            "%s %s%s:  %s"
1375                            % (comment_marker, checkprefix, check_suffix, func_line)
1376                        )
1377                # Remember new global variables we have not seen before
1378                for key in global_vars_seen:
1379                    if key not in global_vars_seen_before:
1380                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
1381                break
1382
1383            # For IR output, change all defs to FileCheck variables, so we're immune
1384            # to variable naming fashions.
1385            func_body = generalize_check_lines(
1386                func_body, is_analyze, vars_seen, global_vars_seen
1387            )
1388
1389            # This could be selectively enabled with an optional invocation argument.
1390            # Disabled for now: better to check everything. Be safe rather than sorry.
1391
1392            # Handle the first line of the function body as a special case because
1393            # it's often just noise (a useless asm comment or entry label).
1394            # if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
1395            #  is_blank_line = True
1396            # else:
1397            #  output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
1398            #  is_blank_line = False
1399
1400            is_blank_line = False
1401
1402            for func_line in func_body:
1403                if func_line.strip() == "":
1404                    is_blank_line = True
1405                    continue
1406                # Do not waste time checking IR comments.
1407                func_line = SCRUB_IR_COMMENT_RE.sub(r"", func_line)
1408
1409                # Skip blank lines instead of checking them.
1410                if is_blank_line:
1411                    output_lines.append(
1412                        "{} {}:       {}".format(comment_marker, checkprefix, func_line)
1413                    )
1414                else:
1415                    check_suffix = "-NEXT" if not is_filtered else ""
1416                    output_lines.append(
1417                        "{} {}{}:  {}".format(
1418                            comment_marker, checkprefix, check_suffix, func_line
1419                        )
1420                    )
1421                is_blank_line = False
1422
1423            # Add space between different check prefixes and also before the first
1424            # line of code in the test function.
1425            output_lines.append(comment_marker)
1426
1427            # Remember new global variables we have not seen before
1428            for key in global_vars_seen:
1429                if key not in global_vars_seen_before:
1430                    global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
1431            break
1432    return printed_prefixes
1433
1434
1435def add_ir_checks(
1436    output_lines,
1437    comment_marker,
1438    prefix_list,
1439    func_dict,
1440    func_name,
1441    preserve_names,
1442    function_sig,
1443    version,
1444    global_vars_seen_dict,
1445    is_filtered,
1446):
1447    # Label format is based on IR string.
1448    if function_sig and version > 1:
1449        function_def_regex = "define %s"
1450    elif function_sig:
1451        function_def_regex = "define {{[^@]+}}%s"
1452    else:
1453        function_def_regex = "%s"
1454    check_label_format = "{} %s-LABEL: {}@%s%s%s".format(
1455        comment_marker, function_def_regex
1456    )
1457    return add_checks(
1458        output_lines,
1459        comment_marker,
1460        prefix_list,
1461        func_dict,
1462        func_name,
1463        check_label_format,
1464        False,
1465        preserve_names,
1466        version,
1467        global_vars_seen_dict,
1468        is_filtered,
1469    )
1470
1471
1472def add_analyze_checks(
1473    output_lines, comment_marker, prefix_list, func_dict, func_name, is_filtered
1474):
1475    check_label_format = "{} %s-LABEL: '%s%s%s%s'".format(comment_marker)
1476    global_vars_seen_dict = {}
1477    return add_checks(
1478        output_lines,
1479        comment_marker,
1480        prefix_list,
1481        func_dict,
1482        func_name,
1483        check_label_format,
1484        False,
1485        True,
1486        1,
1487        global_vars_seen_dict,
1488        is_filtered,
1489    )
1490
1491
1492def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes):
1493    for nameless_value in itertools.chain(global_nameless_values, asm_nameless_values):
1494        if nameless_value.global_ir_rhs_regexp is None:
1495            continue
1496
1497        lhs_re_str = nameless_value.ir_prefix + nameless_value.ir_regexp
1498        rhs_re_str = nameless_value.global_ir_rhs_regexp
1499
1500        global_ir_value_re_str = r"^" + lhs_re_str + r"\s=\s" + rhs_re_str + r"$"
1501        global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M))
1502        lines = []
1503        for m in global_ir_value_re.finditer(raw_tool_output):
1504            lines.append(m.group(0))
1505
1506        for prefix in prefixes:
1507            if glob_val_dict[prefix] is None:
1508                continue
1509            if nameless_value.check_prefix in glob_val_dict[prefix]:
1510                if lines == glob_val_dict[prefix][nameless_value.check_prefix]:
1511                    continue
1512                if prefix == prefixes[-1]:
1513                    warn("Found conflicting asm under the same prefix: %r!" % (prefix,))
1514                else:
1515                    glob_val_dict[prefix][nameless_value.check_prefix] = None
1516                    continue
1517            glob_val_dict[prefix][nameless_value.check_prefix] = lines
1518
1519
1520def filter_globals_according_to_preference(
1521    global_val_lines, global_vars_seen, nameless_value, global_check_setting
1522):
1523    if global_check_setting == "none":
1524        return []
1525    if global_check_setting == "all":
1526        return global_val_lines
1527    assert global_check_setting == "smart"
1528
1529    if nameless_value.check_key == "#":
1530        # attribute sets are usually better checked by --check-attributes
1531        return []
1532
1533    def extract(line, nv):
1534        p = (
1535            "^"
1536            + nv.ir_prefix
1537            + "("
1538            + nv.ir_regexp
1539            + ") = ("
1540            + nv.global_ir_rhs_regexp
1541            + ")"
1542        )
1543        match = re.match(p, line)
1544        return (match.group(1), re.findall(nv.ir_regexp, match.group(2)))
1545
1546    transitively_visible = set()
1547    contains_refs_to = {}
1548
1549    def add(var):
1550        nonlocal transitively_visible
1551        nonlocal contains_refs_to
1552        if var in transitively_visible:
1553            return
1554        transitively_visible.add(var)
1555        if not var in contains_refs_to:
1556            return
1557        for x in contains_refs_to[var]:
1558            add(x)
1559
1560    for line in global_val_lines:
1561        (var, refs) = extract(line, nameless_value)
1562        contains_refs_to[var] = refs
1563    for var, check_key in global_vars_seen:
1564        if check_key != nameless_value.check_key:
1565            continue
1566        add(var)
1567    return [
1568        line
1569        for line in global_val_lines
1570        if extract(line, nameless_value)[0] in transitively_visible
1571    ]
1572
1573
1574# The capture group is kept as is, followed by a {{.*}} glob
1575METADATA_FILTERS = [
1576    r"(\w+ version )[\d.]+(?: \([^)]+\))?",
1577    r'(!DIFile\(filename: ".+", directory: )".+"',
1578]
1579METADATA_FILTERS_RE = [re.compile(s) for s in METADATA_FILTERS]
1580
1581
1582def filter_unstable_metadata(line):
1583    for f in METADATA_FILTERS_RE:
1584        line = f.sub(r"\1{{.*}}", line)
1585    return line
1586
1587
1588def add_global_checks(
1589    glob_val_dict,
1590    comment_marker,
1591    prefix_list,
1592    output_lines,
1593    global_vars_seen_dict,
1594    is_analyze,
1595    is_before_functions,
1596    global_check_setting,
1597):
1598    printed_prefixes = set()
1599    for nameless_value in global_nameless_values:
1600        if nameless_value.is_before_functions != is_before_functions:
1601            continue
1602        for p in prefix_list:
1603            global_vars_seen = {}
1604            checkprefixes = p[0]
1605            if checkprefixes is None:
1606                continue
1607            for checkprefix in checkprefixes:
1608                if checkprefix in global_vars_seen_dict:
1609                    global_vars_seen.update(global_vars_seen_dict[checkprefix])
1610                else:
1611                    global_vars_seen_dict[checkprefix] = {}
1612                if (checkprefix, nameless_value.check_prefix) in printed_prefixes:
1613                    break
1614                if not glob_val_dict[checkprefix]:
1615                    continue
1616                if nameless_value.check_prefix not in glob_val_dict[checkprefix]:
1617                    continue
1618                if not glob_val_dict[checkprefix][nameless_value.check_prefix]:
1619                    continue
1620
1621                check_lines = []
1622                global_vars_seen_before = [key for key in global_vars_seen.keys()]
1623                lines = glob_val_dict[checkprefix][nameless_value.check_prefix]
1624                lines = filter_globals_according_to_preference(
1625                    lines, global_vars_seen_before, nameless_value, global_check_setting
1626                )
1627                for line in lines:
1628                    if _global_value_regex:
1629                        matched = False
1630                        for regex in _global_value_regex:
1631                            if re.match("^@" + regex + " = ", line) or re.match(
1632                                "^!" + regex + " = ", line
1633                            ):
1634                                matched = True
1635                                break
1636                        if not matched:
1637                            continue
1638                    new_line = generalize_global_check_line(
1639                        line, is_analyze, global_vars_seen
1640                    )
1641                    new_line = filter_unstable_metadata(new_line)
1642                    check_line = "%s %s: %s" % (comment_marker, checkprefix, new_line)
1643                    check_lines.append(check_line)
1644                if not check_lines:
1645                    continue
1646
1647                output_lines.append(comment_marker + SEPARATOR)
1648                for check_line in check_lines:
1649                    output_lines.append(check_line)
1650
1651                printed_prefixes.add((checkprefix, nameless_value.check_prefix))
1652
1653                # Remembe new global variables we have not seen before
1654                for key in global_vars_seen:
1655                    if key not in global_vars_seen_before:
1656                        global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
1657                break
1658
1659    if printed_prefixes:
1660        output_lines.append(comment_marker + SEPARATOR)
1661    return printed_prefixes
1662
1663
1664def check_prefix(prefix):
1665    if not PREFIX_RE.match(prefix):
1666        hint = ""
1667        if "," in prefix:
1668            hint = " Did you mean '--check-prefixes=" + prefix + "'?"
1669        warn(
1670            (
1671                "Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores."
1672                + hint
1673            )
1674            % (prefix)
1675        )
1676
1677
1678def get_check_prefixes(filecheck_cmd):
1679    check_prefixes = [
1680        item
1681        for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
1682        for item in m.group(1).split(",")
1683    ]
1684    if not check_prefixes:
1685        check_prefixes = ["CHECK"]
1686    return check_prefixes
1687
1688
1689def verify_filecheck_prefixes(fc_cmd):
1690    fc_cmd_parts = fc_cmd.split()
1691    for part in fc_cmd_parts:
1692        if "check-prefix=" in part:
1693            prefix = part.split("=", 1)[1]
1694            check_prefix(prefix)
1695        elif "check-prefixes=" in part:
1696            prefixes = part.split("=", 1)[1].split(",")
1697            for prefix in prefixes:
1698                check_prefix(prefix)
1699                if prefixes.count(prefix) > 1:
1700                    warn(
1701                        "Supplied prefix '%s' is not unique in the prefix list."
1702                        % (prefix,)
1703                    )
1704
1705
1706def get_autogennote_suffix(parser, args):
1707    autogenerated_note_args = ""
1708    for action in parser._actions:
1709        if not hasattr(args, action.dest):
1710            continue  # Ignore options such as --help that aren't included in args
1711        # Ignore parameters such as paths to the binary or the list of tests
1712        if action.dest in (
1713            "tests",
1714            "update_only",
1715            "tool_binary",
1716            "opt_binary",
1717            "llc_binary",
1718            "clang",
1719            "opt",
1720            "llvm_bin",
1721            "verbose",
1722            "force_update",
1723        ):
1724            continue
1725        value = getattr(args, action.dest)
1726        if action.dest == "check_globals":
1727            default_value = "none" if args.version < 3 else "smart"
1728            if value == default_value:
1729                continue
1730            autogenerated_note_args += action.option_strings[0] + " "
1731            if args.version < 3 and value == "all":
1732                continue
1733            autogenerated_note_args += "%s " % value
1734            continue
1735        if action.const is not None:  # action stores a constant (usually True/False)
1736            # Skip actions with different constant values (this happens with boolean
1737            # --foo/--no-foo options)
1738            if value != action.const:
1739                continue
1740        if parser.get_default(action.dest) == value:
1741            continue  # Don't add default values
1742        if action.dest == "function_signature" and args.version >= 2:
1743            continue  # Enabled by default in version 2
1744        if action.dest == "filters":
1745            # Create a separate option for each filter element.  The value is a list
1746            # of Filter objects.
1747            for elem in value:
1748                opt_name = "filter-out" if elem.is_filter_out else "filter"
1749                opt_value = elem.pattern()
1750                new_arg = '--%s "%s" ' % (opt_name, opt_value.strip('"'))
1751                if new_arg not in autogenerated_note_args:
1752                    autogenerated_note_args += new_arg
1753        else:
1754            autogenerated_note_args += action.option_strings[0] + " "
1755            if action.const is None:  # action takes a parameter
1756                if action.nargs == "+":
1757                    value = " ".join(map(lambda v: '"' + v.strip('"') + '"', value))
1758                autogenerated_note_args += "%s " % value
1759    if autogenerated_note_args:
1760        autogenerated_note_args = " %s %s" % (
1761            UTC_ARGS_KEY,
1762            autogenerated_note_args[:-1],
1763        )
1764    return autogenerated_note_args
1765
1766
1767def check_for_command(line, parser, args, argv, argparse_callback):
1768    cmd_m = UTC_ARGS_CMD.match(line)
1769    if cmd_m:
1770        for option in shlex.split(cmd_m.group("cmd").strip()):
1771            if option:
1772                argv.append(option)
1773        args = parse_args(parser, filter(lambda arg: arg not in args.tests, argv))
1774        if argparse_callback is not None:
1775            argparse_callback(args)
1776    return args, argv
1777
1778
1779def find_arg_in_test(test_info, get_arg_to_check, arg_string, is_global):
1780    result = get_arg_to_check(test_info.args)
1781    if not result and is_global:
1782        # See if this has been specified via UTC_ARGS.  This is a "global" option
1783        # that affects the entire generation of test checks.  If it exists anywhere
1784        # in the test, apply it to everything.
1785        saw_line = False
1786        for line_info in test_info.ro_iterlines():
1787            line = line_info.line
1788            if not line.startswith(";") and line.strip() != "":
1789                saw_line = True
1790            result = get_arg_to_check(line_info.args)
1791            if result:
1792                if warn and saw_line:
1793                    # We saw the option after already reading some test input lines.
1794                    # Warn about it.
1795                    print(
1796                        "WARNING: Found {} in line following test start: ".format(
1797                            arg_string
1798                        )
1799                        + line,
1800                        file=sys.stderr,
1801                    )
1802                    print(
1803                        "WARNING: Consider moving {} to top of file".format(arg_string),
1804                        file=sys.stderr,
1805                    )
1806                break
1807    return result
1808
1809
1810def dump_input_lines(output_lines, test_info, prefix_set, comment_string):
1811    for input_line_info in test_info.iterlines(output_lines):
1812        line = input_line_info.line
1813        args = input_line_info.args
1814        if line.strip() == comment_string:
1815            continue
1816        if line.strip() == comment_string + SEPARATOR:
1817            continue
1818        if line.lstrip().startswith(comment_string):
1819            m = CHECK_RE.match(line)
1820            if m and m.group(1) in prefix_set:
1821                continue
1822        output_lines.append(line.rstrip("\n"))
1823
1824
1825def add_checks_at_end(
1826    output_lines, prefix_list, func_order, comment_string, check_generator
1827):
1828    added = set()
1829    generated_prefixes = set()
1830    for prefix in prefix_list:
1831        prefixes = prefix[0]
1832        tool_args = prefix[1]
1833        for prefix in prefixes:
1834            for func in func_order[prefix]:
1835                # The func order can contain the same functions multiple times.
1836                # If we see one again we are done.
1837                if (func, prefix) in added:
1838                    continue
1839                if added:
1840                    output_lines.append(comment_string)
1841
1842                # The add_*_checks routines expect a run list whose items are
1843                # tuples that have a list of prefixes as their first element and
1844                # tool command args string as their second element.  They output
1845                # checks for each prefix in the list of prefixes.  By doing so, it
1846                # implicitly assumes that for each function every run line will
1847                # generate something for that function.  That is not the case for
1848                # generated functions as some run lines might not generate them
1849                # (e.g. -fopenmp vs. no -fopenmp).
1850                #
1851                # Therefore, pass just the prefix we're interested in.  This has
1852                # the effect of generating all of the checks for functions of a
1853                # single prefix before moving on to the next prefix.  So checks
1854                # are ordered by prefix instead of by function as in "normal"
1855                # mode.
1856                for generated_prefix in check_generator(
1857                    output_lines, [([prefix], tool_args)], func
1858                ):
1859                    added.add((func, generated_prefix))
1860                    generated_prefixes.add(generated_prefix)
1861    return generated_prefixes
1862