xref: /netbsd-src/external/mpl/bind/dist/doc/misc/parsegrammar.py (revision 8aaca124c0ad52af9550477f296b63debc7b4c98)
1############################################################################
2# Copyright (C) Internet Systems Consortium, Inc. ("ISC")
3#
4# SPDX-License-Identifier: MPL-2.0
5#
6# This Source Code Form is subject to the terms of the Mozilla Public
7# License, v. 2.0. If a copy of the MPL was not distributed with this
8# file, you can obtain one at https://mozilla.org/MPL/2.0/.
9#
10# See the COPYRIGHT file distributed with this work for additional
11# information regarding copyright ownership.
12############################################################################
13
14"""
15Read ISC config grammar description produced by "cfg_test --grammar",
16transform it into JSON, and print it to stdout.
17
18Beware: This parser is pretty dumb and heavily depends on cfg_test output
19format. See parse_mapbody() for more details.
20
21Maps are recursively parsed into sub-dicts, all other elements (lists etc.)
22are left intact and returned as one string.
23
24Output example from named.conf grammar showing three variants follow.
25Keys "_flags" and "_id" are present only if non-empty. Key "_grammar" denotes
26end node, key "_mapbody" denotes a nested map.
27
28{
29    "acl": {
30        "_flags": [
31            "may occur multiple times"
32        ],
33        "_grammar": "<string> { <address_match_element>; ... }"
34    },
35    "http": {
36        "_flags": [
37            "may occur multiple times"
38        ],
39        "_id": "<string>",
40        "_mapbody": {
41            "endpoints": {
42                "_grammar": "{ <quoted_string>; ... }"
43            },
44            "streams-per-connection": {
45                "_grammar": "<integer>"
46            }
47        }
48    },
49    "options": {
50        "_mapbody": {
51            "rate-limit": {
52                "_mapbody": {
53                    "all-per-second": {
54                        "_grammar": "<integer>"
55                    }
56                }
57            }
58        }
59    }
60}
61"""
62import fileinput
63import json
64import re
65
66FLAGS = [
67    "may occur multiple times",
68    "obsolete",
69    "deprecated",
70    "experimental",
71    "test only",
72]
73
74KEY_REGEX = re.compile("[a-zA-Z0-9-]+")
75
76
77def split_comments(line):
78    """Split line on comment boundary and strip right-side whitespace.
79    Supports only #, //, and /* comments which end at the end of line.
80    It does NOT handle:
81    - quoted strings
82    - /* comments which do not end at line boundary
83    - multiple /* comments on a single line
84    """
85    assert '"' not in line, 'lines with " are not supported'
86    data_end_idx = len(line)
87    for delimiter in ["#", "//", "/*"]:
88        try:
89            data_end_idx = min(line.index(delimiter), data_end_idx)
90        except ValueError:
91            continue
92        if delimiter == "/*":
93            # sanity checks
94            if not line.rstrip().endswith("*/"):
95                raise NotImplementedError(
96                    "unsupported /* comment, does not end at the end of line", line
97                )
98            if "/*" in line[data_end_idx + 1 :]:
99                raise NotImplementedError(
100                    "unsupported line with multiple /* comments", line
101                )
102
103    noncomment = line[:data_end_idx]
104    comment = line[data_end_idx:]
105    return noncomment, comment
106
107
108def parse_line(filein):
109    """Consume single line from input, return non-comment and comment."""
110    for line in filein:
111        line, comment = split_comments(line)
112        line = line.strip()
113        comment = comment.strip()
114        if not line:
115            continue
116        yield line, comment
117
118
119def parse_flags(comments):
120    """Extract known flags from comments. Must match exact strings used by cfg_test."""
121    out = []
122    for flag in FLAGS:
123        if flag in comments:
124            out.append(flag)
125    return out
126
127
128def parse_mapbody(filein):
129    """Parse body of a "map" in ISC config format.
130
131    Input lines can be only:
132    - whitespace & comments only -> ignore
133    - <keyword> <anything>; -> store <anything> as "_grammar" for this keyword
134    - <keyword> <anything> { -> parse sub-map and store (optional) <anything> as "_id",
135                                producing nested dict under "_mapbody"
136    Also store known strings found at the end of line in "_flags".
137
138    Returns:
139    - tuple (map dict, map comment) when }; line is reached
140    - map dict when we run out of lines without the closing };
141    """
142    thismap = {}
143    for line, comment in parse_line(filein):
144        flags = parse_flags(comment)
145        if line == "};":  # end of a nested map
146            return thismap, flags
147
148        # first word - a map key name
149        # beware: some statements do not have parameters, e.g. "null;"
150        key = line.split()[0].rstrip(";")
151        # map key sanity check
152        if not KEY_REGEX.fullmatch(key):
153            raise NotImplementedError("suspicious keyword detected", line)
154
155        # omit keyword from the grammar
156        grammar = line[len(key) :].strip()
157        # also skip final ; or {
158        grammar = grammar[:-1].strip()
159
160        thismap[key] = {}
161        if line.endswith("{"):
162            # nested map, recurse, but keep "extra identifiers" if any
163            try:
164                subkeys, flags = parse_mapbody(filein)
165            except ValueError:
166                raise ValueError("unfinished nested map, missing }; detected") from None
167            if flags:
168                thismap[key]["_flags"] = flags
169            if grammar:
170                # for lines which look like "view <name> {" store "<name>"
171                thismap[key]["_id"] = grammar
172            thismap[key]["_mapbody"] = subkeys
173        else:
174            assert line.endswith(";")
175            if flags:
176                thismap[key]["_flags"] = flags
177            thismap[key]["_grammar"] = grammar
178
179    # Ran out of lines: can happen only on the end of the top-level map-body!
180    # Intentionally do not return second parameter to cause ValueError
181    # if we reach this spot with a missing }; in a nested map.
182    assert len(thismap)
183    return thismap
184
185
186def main():
187    """Read stdin or filename provided on command line"""
188    with fileinput.input() as filein:
189        grammar = parse_mapbody(filein)
190    print(json.dumps(grammar, indent=4))
191
192
193if __name__ == "__main__":
194    main()
195