1############################################################################ 2# Copyright (C) Internet Systems Consortium, Inc. ("ISC") 3# 4# SPDX-License-Identifier: MPL-2.0 5# 6# This Source Code Form is subject to the terms of the Mozilla Public 7# License, v. 2.0. If a copy of the MPL was not distributed with this 8# file, you can obtain one at https://mozilla.org/MPL/2.0/. 9# 10# See the COPYRIGHT file distributed with this work for additional 11# information regarding copyright ownership. 12############################################################################ 13 14""" 15Read ISC config grammar description produced by "cfg_test --grammar", 16transform it into JSON, and print it to stdout. 17 18Beware: This parser is pretty dumb and heavily depends on cfg_test output 19format. See parse_mapbody() for more details. 20 21Maps are recursively parsed into sub-dicts, all other elements (lists etc.) 22are left intact and returned as one string. 23 24Output example from named.conf grammar showing three variants follow. 25Keys "_flags" and "_id" are present only if non-empty. Key "_grammar" denotes 26end node, key "_mapbody" denotes a nested map. 27 28{ 29 "acl": { 30 "_flags": [ 31 "may occur multiple times" 32 ], 33 "_grammar": "<string> { <address_match_element>; ... }" 34 }, 35 "http": { 36 "_flags": [ 37 "may occur multiple times" 38 ], 39 "_id": "<string>", 40 "_mapbody": { 41 "endpoints": { 42 "_grammar": "{ <quoted_string>; ... }" 43 }, 44 "streams-per-connection": { 45 "_grammar": "<integer>" 46 } 47 } 48 }, 49 "options": { 50 "_mapbody": { 51 "rate-limit": { 52 "_mapbody": { 53 "all-per-second": { 54 "_grammar": "<integer>" 55 } 56 } 57 } 58 } 59 } 60} 61""" 62import fileinput 63import json 64import re 65 66FLAGS = [ 67 "may occur multiple times", 68 "obsolete", 69 "deprecated", 70 "experimental", 71 "test only", 72] 73 74KEY_REGEX = re.compile("[a-zA-Z0-9-]+") 75 76 77def split_comments(line): 78 """Split line on comment boundary and strip right-side whitespace. 79 Supports only #, //, and /* comments which end at the end of line. 80 It does NOT handle: 81 - quoted strings 82 - /* comments which do not end at line boundary 83 - multiple /* comments on a single line 84 """ 85 assert '"' not in line, 'lines with " are not supported' 86 data_end_idx = len(line) 87 for delimiter in ["#", "//", "/*"]: 88 try: 89 data_end_idx = min(line.index(delimiter), data_end_idx) 90 except ValueError: 91 continue 92 if delimiter == "/*": 93 # sanity checks 94 if not line.rstrip().endswith("*/"): 95 raise NotImplementedError( 96 "unsupported /* comment, does not end at the end of line", line 97 ) 98 if "/*" in line[data_end_idx + 1 :]: 99 raise NotImplementedError( 100 "unsupported line with multiple /* comments", line 101 ) 102 103 noncomment = line[:data_end_idx] 104 comment = line[data_end_idx:] 105 return noncomment, comment 106 107 108def parse_line(filein): 109 """Consume single line from input, return non-comment and comment.""" 110 for line in filein: 111 line, comment = split_comments(line) 112 line = line.strip() 113 comment = comment.strip() 114 if not line: 115 continue 116 yield line, comment 117 118 119def parse_flags(comments): 120 """Extract known flags from comments. Must match exact strings used by cfg_test.""" 121 out = [] 122 for flag in FLAGS: 123 if flag in comments: 124 out.append(flag) 125 return out 126 127 128def parse_mapbody(filein): 129 """Parse body of a "map" in ISC config format. 130 131 Input lines can be only: 132 - whitespace & comments only -> ignore 133 - <keyword> <anything>; -> store <anything> as "_grammar" for this keyword 134 - <keyword> <anything> { -> parse sub-map and store (optional) <anything> as "_id", 135 producing nested dict under "_mapbody" 136 Also store known strings found at the end of line in "_flags". 137 138 Returns: 139 - tuple (map dict, map comment) when }; line is reached 140 - map dict when we run out of lines without the closing }; 141 """ 142 thismap = {} 143 for line, comment in parse_line(filein): 144 flags = parse_flags(comment) 145 if line == "};": # end of a nested map 146 return thismap, flags 147 148 # first word - a map key name 149 # beware: some statements do not have parameters, e.g. "null;" 150 key = line.split()[0].rstrip(";") 151 # map key sanity check 152 if not KEY_REGEX.fullmatch(key): 153 raise NotImplementedError("suspicious keyword detected", line) 154 155 # omit keyword from the grammar 156 grammar = line[len(key) :].strip() 157 # also skip final ; or { 158 grammar = grammar[:-1].strip() 159 160 thismap[key] = {} 161 if line.endswith("{"): 162 # nested map, recurse, but keep "extra identifiers" if any 163 try: 164 subkeys, flags = parse_mapbody(filein) 165 except ValueError: 166 raise ValueError("unfinished nested map, missing }; detected") from None 167 if flags: 168 thismap[key]["_flags"] = flags 169 if grammar: 170 # for lines which look like "view <name> {" store "<name>" 171 thismap[key]["_id"] = grammar 172 thismap[key]["_mapbody"] = subkeys 173 else: 174 assert line.endswith(";") 175 if flags: 176 thismap[key]["_flags"] = flags 177 thismap[key]["_grammar"] = grammar 178 179 # Ran out of lines: can happen only on the end of the top-level map-body! 180 # Intentionally do not return second parameter to cause ValueError 181 # if we reach this spot with a missing }; in a nested map. 182 assert len(thismap) 183 return thismap 184 185 186def main(): 187 """Read stdin or filename provided on command line""" 188 with fileinput.input() as filein: 189 grammar = parse_mapbody(filein) 190 print(json.dumps(grammar, indent=4)) 191 192 193if __name__ == "__main__": 194 main() 195