xref: /llvm-project/llvm/utils/update_cc_test_checks.py (revision 5315f3f8cb8f562ec39f57f2fce79c8e017595f9)
1#!/usr/bin/env python3
2"""A utility to update LLVM IR CHECK lines in C/C++ FileCheck test files.
3
4Example RUN lines in .c/.cc test files:
5
6// RUN: %clang -emit-llvm -S %s -o - -O2 | FileCheck %s
7// RUN: %clangxx -emit-llvm -S %s -o - -O2 | FileCheck -check-prefix=CHECK-A %s
8
9Usage:
10
11% utils/update_cc_test_checks.py --llvm-bin=release/bin test/a.cc
12% utils/update_cc_test_checks.py --clang=release/bin/clang /tmp/c/a.cc
13"""
14
15from __future__ import print_function
16
17import argparse
18import collections
19import json
20import os
21import re
22import shlex
23import shutil
24import subprocess
25import sys
26import tempfile
27
28from UpdateTestChecks import common
29
30SUBST = {
31    "%clang": [],
32    "%clang_cc1": ["-cc1"],
33    "%clangxx": ["--driver-mode=g++"],
34}
35
36
37def get_line2func_list(args, clang_args, globals_name_prefix):
38    ret = collections.defaultdict(list)
39    # Use clang's JSON AST dump to get the mangled name
40    json_dump_args = [args.clang] + clang_args + ["-fsyntax-only", "-o", "-"]
41    if "-cc1" not in json_dump_args:
42        # For tests that invoke %clang instead if %clang_cc1 we have to use
43        # -Xclang -ast-dump=json instead:
44        json_dump_args.append("-Xclang")
45    json_dump_args.append("-ast-dump=json")
46    common.debug("Running", " ".join(json_dump_args))
47
48    popen = subprocess.Popen(
49        json_dump_args,
50        stdout=subprocess.PIPE,
51        stderr=subprocess.PIPE,
52        universal_newlines=True,
53    )
54    stdout, stderr = popen.communicate()
55    if popen.returncode != 0:
56        sys.stderr.write("Failed to run " + " ".join(json_dump_args) + "\n")
57        sys.stderr.write(stderr)
58        sys.stderr.write(stdout)
59        sys.exit(2)
60
61    # Parse the clang JSON and add all children of type FunctionDecl.
62    # TODO: Should we add checks for global variables being emitted?
63    def parse_clang_ast_json(node, loc, search):
64        node_kind = node["kind"]
65        # Recurse for the following nodes that can contain nested function decls:
66        if node_kind in (
67            "NamespaceDecl",
68            "LinkageSpecDecl",
69            "TranslationUnitDecl",
70            "CXXRecordDecl",
71            "ClassTemplateSpecializationDecl",
72        ):
73            # Specializations must use the loc from the specialization, not the
74            # template, and search for the class's spelling as the specialization
75            # does not mention the method names in the source.
76            if node_kind == "ClassTemplateSpecializationDecl":
77                inner_loc = node["loc"]
78                inner_search = node["name"]
79            else:
80                inner_loc = None
81                inner_search = None
82            if "inner" in node:
83                for inner in node["inner"]:
84                    parse_clang_ast_json(inner, inner_loc, inner_search)
85        # Otherwise we ignore everything except functions:
86        if node_kind not in (
87            "FunctionDecl",
88            "CXXMethodDecl",
89            "CXXConstructorDecl",
90            "CXXDestructorDecl",
91            "CXXConversionDecl",
92        ):
93            return
94        if loc is None:
95            loc = node["loc"]
96        if node.get("isImplicit") is True and node.get("storageClass") == "extern":
97            common.debug("Skipping builtin function:", node["name"], "@", loc)
98            return
99        common.debug("Found function:", node["kind"], node["name"], "@", loc)
100        line = loc.get("line")
101        # If there is no line it is probably a builtin function -> skip
102        if line is None:
103            common.debug(
104                "Skipping function without line number:", node["name"], "@", loc
105            )
106            return
107
108        # If there is no 'inner' object, it is a function declaration and we can
109        # skip it. However, function declarations may also contain an 'inner' list,
110        # but in that case it will only contains ParmVarDecls. If we find an entry
111        # that is not a ParmVarDecl, we know that this is a function definition.
112        has_body = False
113        if "inner" in node:
114            for i in node["inner"]:
115                if i.get("kind", "ParmVarDecl") != "ParmVarDecl":
116                    has_body = True
117                    break
118        if not has_body:
119            common.debug("Skipping function without body:", node["name"], "@", loc)
120            return
121        spell = node["name"]
122        if search is None:
123            search = spell
124        mangled = node.get("mangledName", spell)
125        # Clang's AST dump includes the globals prefix, but when Clang emits
126        # LLVM IR this is not included and instead added as part of the asm
127        # output. Strip it from the mangled name of globals when needed
128        # (see DataLayout::getGlobalPrefix()).
129        if globals_name_prefix:
130            storage = node.get("storageClass", None)
131            if storage != "static" and mangled[0] == globals_name_prefix:
132                mangled = mangled[1:]
133        ret[int(line) - 1].append((spell, mangled, search))
134
135    ast = json.loads(stdout)
136    if ast["kind"] != "TranslationUnitDecl":
137        common.error("Clang AST dump JSON format changed?")
138        sys.exit(2)
139    parse_clang_ast_json(ast, None, None)
140
141    for line, funcs in sorted(ret.items()):
142        for func in funcs:
143            common.debug(
144                "line {}: found function {}".format(line + 1, func), file=sys.stderr
145            )
146    if not ret:
147        common.warn("Did not find any functions using", " ".join(json_dump_args))
148    return ret
149
150
151def str_to_commandline(value):
152    if not value:
153        return []
154    return shlex.split(value)
155
156
157def infer_dependent_args(args):
158    if not args.clang:
159        if not args.llvm_bin:
160            args.clang = "clang"
161        else:
162            args.clang = os.path.join(args.llvm_bin, "clang")
163    if not args.opt:
164        if not args.llvm_bin:
165            args.opt = "opt"
166        else:
167            args.opt = os.path.join(args.llvm_bin, "opt")
168
169
170def find_executable(executable):
171    _, ext = os.path.splitext(executable)
172    if sys.platform == "win32" and ext != ".exe":
173        executable = executable + ".exe"
174
175    return shutil.which(executable)
176
177
178def config():
179    parser = argparse.ArgumentParser(
180        description=__doc__, formatter_class=argparse.RawTextHelpFormatter
181    )
182    parser.add_argument("--llvm-bin", help="llvm $prefix/bin path")
183    parser.add_argument(
184        "--clang", help='"clang" executable, defaults to $llvm_bin/clang'
185    )
186    parser.add_argument(
187        "--clang-args",
188        default=[],
189        type=str_to_commandline,
190        help="Space-separated extra args to clang, e.g. --clang-args=-v",
191    )
192    parser.add_argument("--opt", help='"opt" executable, defaults to $llvm_bin/opt')
193    parser.add_argument(
194        "--functions",
195        nargs="+",
196        help="A list of function name regexes. "
197        "If specified, update CHECK lines for functions matching at least one regex",
198    )
199    parser.add_argument(
200        "--x86_extra_scrub",
201        action="store_true",
202        help="Use more regex for x86 matching to reduce diffs between various subtargets",
203    )
204    parser.add_argument(
205        "--function-signature",
206        action="store_true",
207        help="Keep function signature information around for the check line",
208    )
209    parser.add_argument(
210        "--check-attributes",
211        action="store_true",
212        help='Check "Function Attributes" for functions',
213    )
214    parser.add_argument(
215        "--check-globals",
216        nargs="?",
217        const="all",
218        default="default",
219        choices=["none", "smart", "all"],
220        help="Check global entries (global variables, metadata, attribute sets, ...) for functions",
221    )
222    parser.add_argument("tests", nargs="+")
223    args = common.parse_commandline_args(parser)
224    infer_dependent_args(args)
225
226    if not find_executable(args.clang):
227        print("Please specify --llvm-bin or --clang", file=sys.stderr)
228        sys.exit(1)
229
230    # Determine the builtin includes directory so that we can update tests that
231    # depend on the builtin headers. See get_clang_builtin_include_dir() and
232    # use_clang() in llvm/utils/lit/lit/llvm/config.py.
233    try:
234        builtin_include_dir = (
235            subprocess.check_output([args.clang, "-print-file-name=include"])
236            .decode()
237            .strip()
238        )
239        SUBST["%clang_cc1"] = [
240            "-cc1",
241            "-internal-isystem",
242            builtin_include_dir,
243            "-nostdsysteminc",
244        ]
245    except subprocess.CalledProcessError:
246        common.warn(
247            "Could not determine clang builtins directory, some tests "
248            "might not update correctly."
249        )
250
251    if not find_executable(args.opt):
252        # Many uses of this tool will not need an opt binary, because it's only
253        # needed for updating a test that runs clang | opt | FileCheck. So we
254        # defer this error message until we find that opt is actually needed.
255        args.opt = None
256
257    return args, parser
258
259
260def get_function_body(
261    builder, args, filename, clang_args, extra_commands, prefixes, raw_tool_output
262):
263    # TODO Clean up duplication of asm/common build_function_body_dictionary
264    for extra_command in extra_commands:
265        extra_args = shlex.split(extra_command)
266        with tempfile.NamedTemporaryFile() as f:
267            f.write(raw_tool_output.encode())
268            f.flush()
269            if extra_args[0] == "opt":
270                if args.opt is None:
271                    print(
272                        filename,
273                        "needs to run opt. " "Please specify --llvm-bin or --opt",
274                        file=sys.stderr,
275                    )
276                    sys.exit(1)
277                extra_args[0] = args.opt
278            raw_tool_output = common.invoke_tool(extra_args[0], extra_args[1:], f.name)
279    if "-emit-llvm" in clang_args:
280        builder.process_run_line(
281            common.OPT_FUNCTION_RE, common.scrub_body, raw_tool_output, prefixes
282        )
283        builder.processed_prefixes(prefixes)
284    else:
285        print(
286            "The clang command line should include -emit-llvm as asm tests "
287            "are discouraged in Clang testsuite.",
288            file=sys.stderr,
289        )
290        sys.exit(1)
291
292
293def exec_run_line(exe):
294    popen = subprocess.Popen(
295        exe, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True
296    )
297    stdout, stderr = popen.communicate()
298    if popen.returncode != 0:
299        sys.stderr.write("Failed to run " + " ".join(exe) + "\n")
300        sys.stderr.write(stderr)
301        sys.stderr.write(stdout)
302        sys.exit(3)
303
304
305def main():
306    initial_args, parser = config()
307    script_name = os.path.basename(__file__)
308
309    for ti in common.itertests(
310        initial_args.tests,
311        parser,
312        "utils/" + script_name,
313        comment_prefix="//",
314        argparse_callback=infer_dependent_args,
315    ):
316        # Build a list of filechecked and non-filechecked RUN lines.
317        run_list = []
318        line2func_list = collections.defaultdict(list)
319
320        subs = {
321            "%s": ti.path,
322            "%t": tempfile.NamedTemporaryFile().name,
323            "%S": os.path.dirname(ti.path),
324        }
325
326        for l in ti.run_lines:
327            commands = [cmd.strip() for cmd in l.split("|")]
328
329            triple_in_cmd = None
330            m = common.TRIPLE_ARG_RE.search(commands[0])
331            if m:
332                triple_in_cmd = m.groups()[0]
333
334            # Parse executable args.
335            exec_args = shlex.split(commands[0])
336            # Execute non-clang runline.
337            if exec_args[0] not in SUBST:
338                # Do lit-like substitutions.
339                for s in subs:
340                    exec_args = [
341                        i.replace(s, subs[s]) if s in i else i for i in exec_args
342                    ]
343                run_list.append((None, exec_args, None, None))
344                continue
345            # This is a clang runline, apply %clang substitution rule, do lit-like substitutions,
346            # and append args.clang_args
347            clang_args = exec_args
348            clang_args[0:1] = SUBST[clang_args[0]]
349            for s in subs:
350                clang_args = [
351                    i.replace(s, subs[s]) if s in i else i for i in clang_args
352                ]
353            clang_args += ti.args.clang_args
354
355            # Extract -check-prefix in FileCheck args
356            filecheck_cmd = commands[-1]
357            common.verify_filecheck_prefixes(filecheck_cmd)
358            if not filecheck_cmd.startswith("FileCheck "):
359                # Execute non-filechecked clang runline.
360                exe = [ti.args.clang] + clang_args
361                run_list.append((None, exe, None, None))
362                continue
363
364            check_prefixes = common.get_check_prefixes(filecheck_cmd)
365            run_list.append((check_prefixes, clang_args, commands[1:-1], triple_in_cmd))
366
367        # Execute clang, generate LLVM IR, and extract functions.
368
369        # Store only filechecked runlines.
370        filecheck_run_list = [i for i in run_list if i[0]]
371        ginfo = common.make_ir_generalizer(version=ti.args.version)
372        builder = common.FunctionTestBuilder(
373            run_list=filecheck_run_list,
374            flags=ti.args,
375            scrubber_args=[],
376            path=ti.path,
377            ginfo=ginfo,
378        )
379
380        for prefixes, args, extra_commands, triple_in_cmd in run_list:
381            # Execute non-filechecked runline.
382            if not prefixes:
383                print(
384                    "NOTE: Executing non-FileChecked RUN line: " + " ".join(args),
385                    file=sys.stderr,
386                )
387                exec_run_line(args)
388                continue
389
390            clang_args = args
391            common.debug("Extracted clang cmd: clang {}".format(clang_args))
392            common.debug("Extracted FileCheck prefixes: {}".format(prefixes))
393
394            # Invoke external tool and extract function bodies.
395            raw_tool_output = common.invoke_tool(ti.args.clang, clang_args, ti.path)
396            get_function_body(
397                builder,
398                ti.args,
399                ti.path,
400                clang_args,
401                extra_commands,
402                prefixes,
403                raw_tool_output,
404            )
405
406            # Invoke clang -Xclang -ast-dump=json to get mapping from start lines to
407            # mangled names. Forward all clang args for now.
408            for k, v in get_line2func_list(
409                ti.args, clang_args, common.get_globals_name_prefix(raw_tool_output)
410            ).items():
411                line2func_list[k].extend(v)
412
413        func_dict = builder.finish_and_get_func_dict()
414        global_vars_seen_dict = {}
415        prefix_set = set([prefix for p in filecheck_run_list for prefix in p[0]])
416        output_lines = []
417        has_checked_pre_function_globals = False
418
419        include_generated_funcs = common.find_arg_in_test(
420            ti,
421            lambda args: ti.args.include_generated_funcs,
422            "--include-generated-funcs",
423            True,
424        )
425        generated_prefixes = []
426        if include_generated_funcs:
427            # Generate the appropriate checks for each function.  We need to emit
428            # these in the order according to the generated output so that CHECK-LABEL
429            # works properly.  func_order provides that.
430
431            # It turns out that when clang generates functions (for example, with
432            # -fopenmp), it can sometimes cause functions to be re-ordered in the
433            # output, even functions that exist in the source file.  Therefore we
434            # can't insert check lines before each source function and instead have to
435            # put them at the end.  So the first thing to do is dump out the source
436            # lines.
437            common.dump_input_lines(output_lines, ti, prefix_set, "//")
438
439            # Now generate all the checks.
440            def check_generator(my_output_lines, prefixes, func):
441                return common.add_ir_checks(
442                    my_output_lines,
443                    "//",
444                    prefixes,
445                    func_dict,
446                    func,
447                    False,
448                    ti.args.function_signature,
449                    ginfo,
450                    global_vars_seen_dict,
451                    is_filtered=builder.is_filtered(),
452                )
453
454            if ti.args.check_globals != 'none':
455                generated_prefixes.extend(
456                    common.add_global_checks(
457                        builder.global_var_dict(),
458                        "//",
459                        run_list,
460                        output_lines,
461                        ginfo,
462                        global_vars_seen_dict,
463                        False,
464                        True,
465                        ti.args.check_globals,
466                    )
467                )
468            generated_prefixes.extend(
469                common.add_checks_at_end(
470                    output_lines,
471                    filecheck_run_list,
472                    builder.func_order(),
473                    "//",
474                    lambda my_output_lines, prefixes, func: check_generator(
475                        my_output_lines, prefixes, func
476                    ),
477                )
478            )
479        else:
480            # Normal mode.  Put checks before each source function.
481            for line_info in ti.iterlines(output_lines):
482                idx = line_info.line_number
483                line = line_info.line
484                args = line_info.args
485                include_line = True
486                m = common.CHECK_RE.match(line)
487                if m and m.group(1) in prefix_set:
488                    continue  # Don't append the existing CHECK lines
489                # Skip special separator comments added by commmon.add_global_checks.
490                if line.strip() == "//" + common.SEPARATOR:
491                    continue
492                if idx in line2func_list:
493                    added = set()
494                    for spell, mangled, search in line2func_list[idx]:
495                        # One line may contain multiple function declarations.
496                        # Skip if the mangled name has been added before.
497                        # The line number may come from an included file, we simply require
498                        # the search string (normally the function's spelling name, but is
499                        # the class's spelling name for class specializations) to appear on
500                        # the line to exclude functions from other files.
501                        if mangled in added or search not in line:
502                            continue
503                        if args.functions is None or any(
504                            re.search(regex, spell) for regex in args.functions
505                        ):
506                            last_line = output_lines[-1].strip()
507                            while last_line == "//":
508                                # Remove the comment line since we will generate a new  comment
509                                # line as part of common.add_ir_checks()
510                                output_lines.pop()
511                                last_line = output_lines[-1].strip()
512                            if (
513                                ti.args.check_globals != 'none'
514                                and not has_checked_pre_function_globals
515                            ):
516                                generated_prefixes.extend(
517                                    common.add_global_checks(
518                                        builder.global_var_dict(),
519                                        "//",
520                                        run_list,
521                                        output_lines,
522                                        ginfo,
523                                        global_vars_seen_dict,
524                                        False,
525                                        True,
526                                        ti.args.check_globals,
527                                    )
528                                )
529                                has_checked_pre_function_globals = True
530                            if added:
531                                output_lines.append("//")
532                            added.add(mangled)
533                            generated_prefixes.extend(
534                                common.add_ir_checks(
535                                    output_lines,
536                                    "//",
537                                    filecheck_run_list,
538                                    func_dict,
539                                    mangled,
540                                    False,
541                                    args.function_signature,
542                                    ginfo,
543                                    global_vars_seen_dict,
544                                    is_filtered=builder.is_filtered(),
545                                )
546                            )
547                            if line.rstrip("\n") == "//":
548                                include_line = False
549
550                if include_line:
551                    output_lines.append(line.rstrip("\n"))
552
553        if ti.args.check_globals != 'none':
554            generated_prefixes.extend(
555                common.add_global_checks(
556                    builder.global_var_dict(),
557                    "//",
558                    run_list,
559                    output_lines,
560                    ginfo,
561                    global_vars_seen_dict,
562                    False,
563                    False,
564                    ti.args.check_globals,
565                )
566            )
567        if ti.args.gen_unused_prefix_body:
568            output_lines.extend(
569                ti.get_checks_for_unused_prefixes(run_list, generated_prefixes)
570            )
571        common.debug("Writing %d lines to %s..." % (len(output_lines), ti.path))
572        with open(ti.path, "wb") as f:
573            f.writelines(["{}\n".format(l).encode("utf-8") for l in output_lines])
574
575    return 0
576
577
578if __name__ == "__main__":
579    sys.exit(main())
580