xref: /netbsd-src/external/mpl/bind/dist/doc/misc/parsegrammar.py (revision 8aaca124c0ad52af9550477f296b63debc7b4c98)
1*8aaca124Schristos############################################################################
2*8aaca124Schristos# Copyright (C) Internet Systems Consortium, Inc. ("ISC")
3*8aaca124Schristos#
4*8aaca124Schristos# SPDX-License-Identifier: MPL-2.0
5*8aaca124Schristos#
6*8aaca124Schristos# This Source Code Form is subject to the terms of the Mozilla Public
7*8aaca124Schristos# License, v. 2.0. If a copy of the MPL was not distributed with this
8*8aaca124Schristos# file, you can obtain one at https://mozilla.org/MPL/2.0/.
9*8aaca124Schristos#
10*8aaca124Schristos# See the COPYRIGHT file distributed with this work for additional
11*8aaca124Schristos# information regarding copyright ownership.
12*8aaca124Schristos############################################################################
13*8aaca124Schristos
14*8aaca124Schristos"""
15*8aaca124SchristosRead ISC config grammar description produced by "cfg_test --grammar",
16*8aaca124Schristostransform it into JSON, and print it to stdout.
17*8aaca124Schristos
18*8aaca124SchristosBeware: This parser is pretty dumb and heavily depends on cfg_test output
19*8aaca124Schristosformat. See parse_mapbody() for more details.
20*8aaca124Schristos
21*8aaca124SchristosMaps are recursively parsed into sub-dicts, all other elements (lists etc.)
22*8aaca124Schristosare left intact and returned as one string.
23*8aaca124Schristos
24*8aaca124SchristosOutput example from named.conf grammar showing three variants follow.
25*8aaca124SchristosKeys "_flags" and "_id" are present only if non-empty. Key "_grammar" denotes
26*8aaca124Schristosend node, key "_mapbody" denotes a nested map.
27*8aaca124Schristos
28*8aaca124Schristos{
29*8aaca124Schristos    "acl": {
30*8aaca124Schristos        "_flags": [
31*8aaca124Schristos            "may occur multiple times"
32*8aaca124Schristos        ],
33*8aaca124Schristos        "_grammar": "<string> { <address_match_element>; ... }"
34*8aaca124Schristos    },
35*8aaca124Schristos    "http": {
36*8aaca124Schristos        "_flags": [
37*8aaca124Schristos            "may occur multiple times"
38*8aaca124Schristos        ],
39*8aaca124Schristos        "_id": "<string>",
40*8aaca124Schristos        "_mapbody": {
41*8aaca124Schristos            "endpoints": {
42*8aaca124Schristos                "_grammar": "{ <quoted_string>; ... }"
43*8aaca124Schristos            },
44*8aaca124Schristos            "streams-per-connection": {
45*8aaca124Schristos                "_grammar": "<integer>"
46*8aaca124Schristos            }
47*8aaca124Schristos        }
48*8aaca124Schristos    },
49*8aaca124Schristos    "options": {
50*8aaca124Schristos        "_mapbody": {
51*8aaca124Schristos            "rate-limit": {
52*8aaca124Schristos                "_mapbody": {
53*8aaca124Schristos                    "all-per-second": {
54*8aaca124Schristos                        "_grammar": "<integer>"
55*8aaca124Schristos                    }
56*8aaca124Schristos                }
57*8aaca124Schristos            }
58*8aaca124Schristos        }
59*8aaca124Schristos    }
60*8aaca124Schristos}
61*8aaca124Schristos"""
62*8aaca124Schristosimport fileinput
63*8aaca124Schristosimport json
64*8aaca124Schristosimport re
65*8aaca124Schristos
66*8aaca124SchristosFLAGS = [
67*8aaca124Schristos    "may occur multiple times",
68*8aaca124Schristos    "obsolete",
69*8aaca124Schristos    "deprecated",
70*8aaca124Schristos    "experimental",
71*8aaca124Schristos    "test only",
72*8aaca124Schristos]
73*8aaca124Schristos
74*8aaca124SchristosKEY_REGEX = re.compile("[a-zA-Z0-9-]+")
75*8aaca124Schristos
76*8aaca124Schristos
77*8aaca124Schristosdef split_comments(line):
78*8aaca124Schristos    """Split line on comment boundary and strip right-side whitespace.
79*8aaca124Schristos    Supports only #, //, and /* comments which end at the end of line.
80*8aaca124Schristos    It does NOT handle:
81*8aaca124Schristos    - quoted strings
82*8aaca124Schristos    - /* comments which do not end at line boundary
83*8aaca124Schristos    - multiple /* comments on a single line
84*8aaca124Schristos    """
85*8aaca124Schristos    assert '"' not in line, 'lines with " are not supported'
86*8aaca124Schristos    data_end_idx = len(line)
87*8aaca124Schristos    for delimiter in ["#", "//", "/*"]:
88*8aaca124Schristos        try:
89*8aaca124Schristos            data_end_idx = min(line.index(delimiter), data_end_idx)
90*8aaca124Schristos        except ValueError:
91*8aaca124Schristos            continue
92*8aaca124Schristos        if delimiter == "/*":
93*8aaca124Schristos            # sanity checks
94*8aaca124Schristos            if not line.rstrip().endswith("*/"):
95*8aaca124Schristos                raise NotImplementedError(
96*8aaca124Schristos                    "unsupported /* comment, does not end at the end of line", line
97*8aaca124Schristos                )
98*8aaca124Schristos            if "/*" in line[data_end_idx + 1 :]:
99*8aaca124Schristos                raise NotImplementedError(
100*8aaca124Schristos                    "unsupported line with multiple /* comments", line
101*8aaca124Schristos                )
102*8aaca124Schristos
103*8aaca124Schristos    noncomment = line[:data_end_idx]
104*8aaca124Schristos    comment = line[data_end_idx:]
105*8aaca124Schristos    return noncomment, comment
106*8aaca124Schristos
107*8aaca124Schristos
108*8aaca124Schristosdef parse_line(filein):
109*8aaca124Schristos    """Consume single line from input, return non-comment and comment."""
110*8aaca124Schristos    for line in filein:
111*8aaca124Schristos        line, comment = split_comments(line)
112*8aaca124Schristos        line = line.strip()
113*8aaca124Schristos        comment = comment.strip()
114*8aaca124Schristos        if not line:
115*8aaca124Schristos            continue
116*8aaca124Schristos        yield line, comment
117*8aaca124Schristos
118*8aaca124Schristos
119*8aaca124Schristosdef parse_flags(comments):
120*8aaca124Schristos    """Extract known flags from comments. Must match exact strings used by cfg_test."""
121*8aaca124Schristos    out = []
122*8aaca124Schristos    for flag in FLAGS:
123*8aaca124Schristos        if flag in comments:
124*8aaca124Schristos            out.append(flag)
125*8aaca124Schristos    return out
126*8aaca124Schristos
127*8aaca124Schristos
128*8aaca124Schristosdef parse_mapbody(filein):
129*8aaca124Schristos    """Parse body of a "map" in ISC config format.
130*8aaca124Schristos
131*8aaca124Schristos    Input lines can be only:
132*8aaca124Schristos    - whitespace & comments only -> ignore
133*8aaca124Schristos    - <keyword> <anything>; -> store <anything> as "_grammar" for this keyword
134*8aaca124Schristos    - <keyword> <anything> { -> parse sub-map and store (optional) <anything> as "_id",
135*8aaca124Schristos                                producing nested dict under "_mapbody"
136*8aaca124Schristos    Also store known strings found at the end of line in "_flags".
137*8aaca124Schristos
138*8aaca124Schristos    Returns:
139*8aaca124Schristos    - tuple (map dict, map comment) when }; line is reached
140*8aaca124Schristos    - map dict when we run out of lines without the closing };
141*8aaca124Schristos    """
142*8aaca124Schristos    thismap = {}
143*8aaca124Schristos    for line, comment in parse_line(filein):
144*8aaca124Schristos        flags = parse_flags(comment)
145*8aaca124Schristos        if line == "};":  # end of a nested map
146*8aaca124Schristos            return thismap, flags
147*8aaca124Schristos
148*8aaca124Schristos        # first word - a map key name
149*8aaca124Schristos        # beware: some statements do not have parameters, e.g. "null;"
150*8aaca124Schristos        key = line.split()[0].rstrip(";")
151*8aaca124Schristos        # map key sanity check
152*8aaca124Schristos        if not KEY_REGEX.fullmatch(key):
153*8aaca124Schristos            raise NotImplementedError("suspicious keyword detected", line)
154*8aaca124Schristos
155*8aaca124Schristos        # omit keyword from the grammar
156*8aaca124Schristos        grammar = line[len(key) :].strip()
157*8aaca124Schristos        # also skip final ; or {
158*8aaca124Schristos        grammar = grammar[:-1].strip()
159*8aaca124Schristos
160*8aaca124Schristos        thismap[key] = {}
161*8aaca124Schristos        if line.endswith("{"):
162*8aaca124Schristos            # nested map, recurse, but keep "extra identifiers" if any
163*8aaca124Schristos            try:
164*8aaca124Schristos                subkeys, flags = parse_mapbody(filein)
165*8aaca124Schristos            except ValueError:
166*8aaca124Schristos                raise ValueError("unfinished nested map, missing }; detected") from None
167*8aaca124Schristos            if flags:
168*8aaca124Schristos                thismap[key]["_flags"] = flags
169*8aaca124Schristos            if grammar:
170*8aaca124Schristos                # for lines which look like "view <name> {" store "<name>"
171*8aaca124Schristos                thismap[key]["_id"] = grammar
172*8aaca124Schristos            thismap[key]["_mapbody"] = subkeys
173*8aaca124Schristos        else:
174*8aaca124Schristos            assert line.endswith(";")
175*8aaca124Schristos            if flags:
176*8aaca124Schristos                thismap[key]["_flags"] = flags
177*8aaca124Schristos            thismap[key]["_grammar"] = grammar
178*8aaca124Schristos
179*8aaca124Schristos    # Ran out of lines: can happen only on the end of the top-level map-body!
180*8aaca124Schristos    # Intentionally do not return second parameter to cause ValueError
181*8aaca124Schristos    # if we reach this spot with a missing }; in a nested map.
182*8aaca124Schristos    assert len(thismap)
183*8aaca124Schristos    return thismap
184*8aaca124Schristos
185*8aaca124Schristos
186*8aaca124Schristosdef main():
187*8aaca124Schristos    """Read stdin or filename provided on command line"""
188*8aaca124Schristos    with fileinput.input() as filein:
189*8aaca124Schristos        grammar = parse_mapbody(filein)
190*8aaca124Schristos    print(json.dumps(grammar, indent=4))
191*8aaca124Schristos
192*8aaca124Schristos
193*8aaca124Schristosif __name__ == "__main__":
194*8aaca124Schristos    main()
195