xref: /llvm-project/clang/test/AST/gen_ast_dump_json_test.py (revision dd3c26a045c081620375a878159f536758baba6e)
1#!/usr/bin/env python3
2
3from __future__ import print_function
4from collections import OrderedDict
5from shutil import copyfile
6import argparse
7import json
8import os
9import re
10import subprocess
11import sys
12import tempfile
13
14
15def normalize(dict_var):
16    for k, v in dict_var.items():
17        if isinstance(v, OrderedDict):
18            normalize(v)
19        elif isinstance(v, list):
20            for e in v:
21                if isinstance(e, OrderedDict):
22                    normalize(e)
23        elif type(v) is str:
24            if v != "0x0" and re.match(r"0x[0-9A-Fa-f]+", v):
25                dict_var[k] = "0x{{.*}}"
26            elif os.path.isfile(v):
27                dict_var[k] = "{{.*}}"
28            else:
29                splits = v.split(" ")
30                out_splits = []
31                for split in splits:
32                    inner_splits = split.rsplit(":", 2)
33                    if os.path.isfile(inner_splits[0]):
34                        out_splits.append(
35                            "{{.*}}:%s:%s" % (inner_splits[1], inner_splits[2])
36                        )
37                        continue
38                    out_splits.append(split)
39
40                dict_var[k] = " ".join(out_splits)
41
42
43def filter_json(dict_var, filters, out):
44    for k, v in dict_var.items():
45        if type(v) is str:
46            if v in filters:
47                out.append(dict_var)
48                break
49        elif isinstance(v, OrderedDict):
50            filter_json(v, filters, out)
51        elif isinstance(v, list):
52            for e in v:
53                if isinstance(e, OrderedDict):
54                    filter_json(e, filters, out)
55
56
57def default_clang_path():
58    guessed_clang = os.path.join(os.path.dirname(__file__), "clang")
59    if os.path.isfile(guessed_clang):
60        return guessed_clang
61    return None
62
63
64def main():
65    parser = argparse.ArgumentParser()
66    parser.add_argument(
67        "--clang",
68        help="The clang binary (could be a relative or absolute path)",
69        action="store",
70        default=default_clang_path(),
71    )
72    parser.add_argument(
73        "--source",
74        help="the source file(s). Without --update, the command used to generate the JSON "
75        "will be of the format <clang> -cc1 -ast-dump=json <opts> <source>",
76        action="store",
77        nargs=argparse.ONE_OR_MORE,
78        required=True,
79    )
80    parser.add_argument(
81        "--filters",
82        help="comma separated list of AST filters. Ex: --filters=TypedefDecl,BuiltinType",
83        action="store",
84        default="",
85    )
86    update_or_generate_group = parser.add_mutually_exclusive_group()
87    update_or_generate_group.add_argument(
88        "--update", help="Update the file in-place", action="store_true"
89    )
90    update_or_generate_group.add_argument(
91        "--opts", help="other options", action="store", default="", type=str
92    )
93    parser.add_argument(
94        "--update-manual",
95        help="When using --update, also update files that do not have the "
96        "autogenerated disclaimer",
97        action="store_true",
98    )
99    args = parser.parse_args()
100
101    if not args.source:
102        sys.exit("Specify the source file to give to clang.")
103
104    clang_binary = os.path.abspath(args.clang)
105    if not os.path.isfile(clang_binary):
106        sys.exit("clang binary specified not present.")
107
108    for src in args.source:
109        process_file(
110            src,
111            clang_binary,
112            cmdline_filters=args.filters,
113            cmdline_opts=args.opts,
114            do_update=args.update,
115            force_update=args.update_manual,
116        )
117
118
119def process_file(
120    source_file, clang_binary, cmdline_filters, cmdline_opts, do_update, force_update
121):
122    note_firstline = (
123        "// NOTE: CHECK lines have been autogenerated by " "gen_ast_dump_json_test.py"
124    )
125    filters_line_prefix = "// using --filters="
126    note = note_firstline
127
128    cmd = [clang_binary, "-cc1"]
129    if do_update:
130        # When updating the first line of the test must be a RUN: line
131        with open(source_file, "r") as srcf:
132            first_line = srcf.readline()
133            found_autogenerated_line = False
134            filters_line = None
135            for i, line in enumerate(srcf.readlines()):
136                if found_autogenerated_line:
137                    # print("Filters line: '", line.rstrip(), "'", sep="")
138                    if line.startswith(filters_line_prefix):
139                        filters_line = line[len(filters_line_prefix) :].rstrip()
140                    break
141                if line.startswith(note_firstline):
142                    found_autogenerated_line = True
143                    # print("Found autogenerated disclaimer at line", i + 1)
144        if not found_autogenerated_line and not force_update:
145            print(
146                "Not updating",
147                source_file,
148                "since it is not autogenerated.",
149                file=sys.stderr,
150            )
151            return
152        if not cmdline_filters and filters_line:
153            cmdline_filters = filters_line
154            print("Inferred filters as '" + cmdline_filters + "'")
155
156        if "RUN: %clang_cc1 " not in first_line:
157            sys.exit(
158                "When using --update the first line of the input file must contain RUN: %clang_cc1"
159            )
160        clang_start = first_line.find("%clang_cc1") + len("%clang_cc1")
161        file_check_idx = first_line.rfind("| FileCheck")
162        if file_check_idx:
163            dump_cmd = first_line[clang_start:file_check_idx]
164        else:
165            dump_cmd = first_line[clang_start:]
166        print("Inferred run arguments as '", dump_cmd, "'", sep="")
167        options = dump_cmd.split()
168        if "-ast-dump=json" not in options:
169            sys.exit("ERROR: RUN: line does not contain -ast-dump=json")
170        if "%s" not in options:
171            sys.exit("ERROR: RUN: line does not contain %s")
172        options.remove("%s")
173    else:
174        options = cmdline_opts.split()
175        options.append("-ast-dump=json")
176    cmd.extend(options)
177    using_ast_dump_filter = any("ast-dump-filter" in arg for arg in cmd)
178    cmd.append(source_file)
179    print("Will run", cmd)
180    filters = set()
181    if cmdline_filters:
182        note += "\n" + filters_line_prefix + cmdline_filters
183        filters = set(cmdline_filters.split(","))
184    print("Will use the following filters:", filters)
185
186    try:
187        json_str = subprocess.check_output(cmd).decode()
188    except Exception as ex:
189        print("The clang command failed with %s" % ex)
190        return -1
191
192    out_asts = []
193    if using_ast_dump_filter:
194        # If we're using a filter, then we might have multiple JSON objects
195        # in the output. To parse each out, we use a manual JSONDecoder in
196        # "raw" mode and update our location in the string based on where the
197        # last document ended.
198        decoder = json.JSONDecoder(object_hook=OrderedDict)
199        doc_start = 0
200        prev_end = 0
201        while True:
202            try:
203                prev_end = doc_start
204                (j, doc_start) = decoder.raw_decode(json_str[doc_start:])
205                doc_start += prev_end + 1
206                normalize(j)
207                out_asts.append(j)
208            except:
209                break
210    else:
211        j = json.loads(json_str, object_pairs_hook=OrderedDict)
212        normalize(j)
213
214        if len(filters) == 0:
215            out_asts.append(j)
216        else:
217            filter_json(j, filters, out_asts)
218
219    with tempfile.NamedTemporaryFile("w", delete=False) as f:
220        with open(source_file, "r") as srcf:
221            for line in srcf.readlines():
222                # copy up to the note:
223                if line.rstrip() == note_firstline:
224                    break
225                f.write(line)
226        f.write(note + "\n")
227        for out_ast in out_asts:
228            append_str = json.dumps(out_ast, indent=1, ensure_ascii=False)
229            out_str = "\n\n"
230            out_str += "// CHECK-NOT: {{^}}Dumping\n"
231            index = 0
232            for append_line in append_str.splitlines()[2:]:
233                if index == 0:
234                    out_str += "// CHECK: %s\n" % (append_line.rstrip())
235                    index += 1
236                else:
237                    out_str += "// CHECK-NEXT: %s\n" % (append_line.rstrip())
238
239            f.write(out_str)
240        f.flush()
241        f.close()
242        if do_update:
243            print("Updating json appended source file to %s." % source_file)
244            copyfile(f.name, source_file)
245        else:
246            partition = source_file.rpartition(".")
247            dest_path = "%s-json%s%s" % (partition[0], partition[1], partition[2])
248            print("Writing json appended source file to %s." % dest_path)
249            copyfile(f.name, dest_path)
250        os.remove(f.name)
251    return 0
252
253
254if __name__ == "__main__":
255    main()
256