1*8aaca124Schristos############################################################################ 2*8aaca124Schristos# Copyright (C) Internet Systems Consortium, Inc. ("ISC") 3*8aaca124Schristos# 4*8aaca124Schristos# SPDX-License-Identifier: MPL-2.0 5*8aaca124Schristos# 6*8aaca124Schristos# This Source Code Form is subject to the terms of the Mozilla Public 7*8aaca124Schristos# License, v. 2.0. If a copy of the MPL was not distributed with this 8*8aaca124Schristos# file, you can obtain one at https://mozilla.org/MPL/2.0/. 9*8aaca124Schristos# 10*8aaca124Schristos# See the COPYRIGHT file distributed with this work for additional 11*8aaca124Schristos# information regarding copyright ownership. 12*8aaca124Schristos############################################################################ 13*8aaca124Schristos 14*8aaca124Schristos""" 15*8aaca124SchristosRead ISC config grammar description produced by "cfg_test --grammar", 16*8aaca124Schristostransform it into JSON, and print it to stdout. 17*8aaca124Schristos 18*8aaca124SchristosBeware: This parser is pretty dumb and heavily depends on cfg_test output 19*8aaca124Schristosformat. See parse_mapbody() for more details. 20*8aaca124Schristos 21*8aaca124SchristosMaps are recursively parsed into sub-dicts, all other elements (lists etc.) 22*8aaca124Schristosare left intact and returned as one string. 23*8aaca124Schristos 24*8aaca124SchristosOutput example from named.conf grammar showing three variants follow. 25*8aaca124SchristosKeys "_flags" and "_id" are present only if non-empty. Key "_grammar" denotes 26*8aaca124Schristosend node, key "_mapbody" denotes a nested map. 27*8aaca124Schristos 28*8aaca124Schristos{ 29*8aaca124Schristos "acl": { 30*8aaca124Schristos "_flags": [ 31*8aaca124Schristos "may occur multiple times" 32*8aaca124Schristos ], 33*8aaca124Schristos "_grammar": "<string> { <address_match_element>; ... }" 34*8aaca124Schristos }, 35*8aaca124Schristos "http": { 36*8aaca124Schristos "_flags": [ 37*8aaca124Schristos "may occur multiple times" 38*8aaca124Schristos ], 39*8aaca124Schristos "_id": "<string>", 40*8aaca124Schristos "_mapbody": { 41*8aaca124Schristos "endpoints": { 42*8aaca124Schristos "_grammar": "{ <quoted_string>; ... }" 43*8aaca124Schristos }, 44*8aaca124Schristos "streams-per-connection": { 45*8aaca124Schristos "_grammar": "<integer>" 46*8aaca124Schristos } 47*8aaca124Schristos } 48*8aaca124Schristos }, 49*8aaca124Schristos "options": { 50*8aaca124Schristos "_mapbody": { 51*8aaca124Schristos "rate-limit": { 52*8aaca124Schristos "_mapbody": { 53*8aaca124Schristos "all-per-second": { 54*8aaca124Schristos "_grammar": "<integer>" 55*8aaca124Schristos } 56*8aaca124Schristos } 57*8aaca124Schristos } 58*8aaca124Schristos } 59*8aaca124Schristos } 60*8aaca124Schristos} 61*8aaca124Schristos""" 62*8aaca124Schristosimport fileinput 63*8aaca124Schristosimport json 64*8aaca124Schristosimport re 65*8aaca124Schristos 66*8aaca124SchristosFLAGS = [ 67*8aaca124Schristos "may occur multiple times", 68*8aaca124Schristos "obsolete", 69*8aaca124Schristos "deprecated", 70*8aaca124Schristos "experimental", 71*8aaca124Schristos "test only", 72*8aaca124Schristos] 73*8aaca124Schristos 74*8aaca124SchristosKEY_REGEX = re.compile("[a-zA-Z0-9-]+") 75*8aaca124Schristos 76*8aaca124Schristos 77*8aaca124Schristosdef split_comments(line): 78*8aaca124Schristos """Split line on comment boundary and strip right-side whitespace. 79*8aaca124Schristos Supports only #, //, and /* comments which end at the end of line. 80*8aaca124Schristos It does NOT handle: 81*8aaca124Schristos - quoted strings 82*8aaca124Schristos - /* comments which do not end at line boundary 83*8aaca124Schristos - multiple /* comments on a single line 84*8aaca124Schristos """ 85*8aaca124Schristos assert '"' not in line, 'lines with " are not supported' 86*8aaca124Schristos data_end_idx = len(line) 87*8aaca124Schristos for delimiter in ["#", "//", "/*"]: 88*8aaca124Schristos try: 89*8aaca124Schristos data_end_idx = min(line.index(delimiter), data_end_idx) 90*8aaca124Schristos except ValueError: 91*8aaca124Schristos continue 92*8aaca124Schristos if delimiter == "/*": 93*8aaca124Schristos # sanity checks 94*8aaca124Schristos if not line.rstrip().endswith("*/"): 95*8aaca124Schristos raise NotImplementedError( 96*8aaca124Schristos "unsupported /* comment, does not end at the end of line", line 97*8aaca124Schristos ) 98*8aaca124Schristos if "/*" in line[data_end_idx + 1 :]: 99*8aaca124Schristos raise NotImplementedError( 100*8aaca124Schristos "unsupported line with multiple /* comments", line 101*8aaca124Schristos ) 102*8aaca124Schristos 103*8aaca124Schristos noncomment = line[:data_end_idx] 104*8aaca124Schristos comment = line[data_end_idx:] 105*8aaca124Schristos return noncomment, comment 106*8aaca124Schristos 107*8aaca124Schristos 108*8aaca124Schristosdef parse_line(filein): 109*8aaca124Schristos """Consume single line from input, return non-comment and comment.""" 110*8aaca124Schristos for line in filein: 111*8aaca124Schristos line, comment = split_comments(line) 112*8aaca124Schristos line = line.strip() 113*8aaca124Schristos comment = comment.strip() 114*8aaca124Schristos if not line: 115*8aaca124Schristos continue 116*8aaca124Schristos yield line, comment 117*8aaca124Schristos 118*8aaca124Schristos 119*8aaca124Schristosdef parse_flags(comments): 120*8aaca124Schristos """Extract known flags from comments. Must match exact strings used by cfg_test.""" 121*8aaca124Schristos out = [] 122*8aaca124Schristos for flag in FLAGS: 123*8aaca124Schristos if flag in comments: 124*8aaca124Schristos out.append(flag) 125*8aaca124Schristos return out 126*8aaca124Schristos 127*8aaca124Schristos 128*8aaca124Schristosdef parse_mapbody(filein): 129*8aaca124Schristos """Parse body of a "map" in ISC config format. 130*8aaca124Schristos 131*8aaca124Schristos Input lines can be only: 132*8aaca124Schristos - whitespace & comments only -> ignore 133*8aaca124Schristos - <keyword> <anything>; -> store <anything> as "_grammar" for this keyword 134*8aaca124Schristos - <keyword> <anything> { -> parse sub-map and store (optional) <anything> as "_id", 135*8aaca124Schristos producing nested dict under "_mapbody" 136*8aaca124Schristos Also store known strings found at the end of line in "_flags". 137*8aaca124Schristos 138*8aaca124Schristos Returns: 139*8aaca124Schristos - tuple (map dict, map comment) when }; line is reached 140*8aaca124Schristos - map dict when we run out of lines without the closing }; 141*8aaca124Schristos """ 142*8aaca124Schristos thismap = {} 143*8aaca124Schristos for line, comment in parse_line(filein): 144*8aaca124Schristos flags = parse_flags(comment) 145*8aaca124Schristos if line == "};": # end of a nested map 146*8aaca124Schristos return thismap, flags 147*8aaca124Schristos 148*8aaca124Schristos # first word - a map key name 149*8aaca124Schristos # beware: some statements do not have parameters, e.g. "null;" 150*8aaca124Schristos key = line.split()[0].rstrip(";") 151*8aaca124Schristos # map key sanity check 152*8aaca124Schristos if not KEY_REGEX.fullmatch(key): 153*8aaca124Schristos raise NotImplementedError("suspicious keyword detected", line) 154*8aaca124Schristos 155*8aaca124Schristos # omit keyword from the grammar 156*8aaca124Schristos grammar = line[len(key) :].strip() 157*8aaca124Schristos # also skip final ; or { 158*8aaca124Schristos grammar = grammar[:-1].strip() 159*8aaca124Schristos 160*8aaca124Schristos thismap[key] = {} 161*8aaca124Schristos if line.endswith("{"): 162*8aaca124Schristos # nested map, recurse, but keep "extra identifiers" if any 163*8aaca124Schristos try: 164*8aaca124Schristos subkeys, flags = parse_mapbody(filein) 165*8aaca124Schristos except ValueError: 166*8aaca124Schristos raise ValueError("unfinished nested map, missing }; detected") from None 167*8aaca124Schristos if flags: 168*8aaca124Schristos thismap[key]["_flags"] = flags 169*8aaca124Schristos if grammar: 170*8aaca124Schristos # for lines which look like "view <name> {" store "<name>" 171*8aaca124Schristos thismap[key]["_id"] = grammar 172*8aaca124Schristos thismap[key]["_mapbody"] = subkeys 173*8aaca124Schristos else: 174*8aaca124Schristos assert line.endswith(";") 175*8aaca124Schristos if flags: 176*8aaca124Schristos thismap[key]["_flags"] = flags 177*8aaca124Schristos thismap[key]["_grammar"] = grammar 178*8aaca124Schristos 179*8aaca124Schristos # Ran out of lines: can happen only on the end of the top-level map-body! 180*8aaca124Schristos # Intentionally do not return second parameter to cause ValueError 181*8aaca124Schristos # if we reach this spot with a missing }; in a nested map. 182*8aaca124Schristos assert len(thismap) 183*8aaca124Schristos return thismap 184*8aaca124Schristos 185*8aaca124Schristos 186*8aaca124Schristosdef main(): 187*8aaca124Schristos """Read stdin or filename provided on command line""" 188*8aaca124Schristos with fileinput.input() as filein: 189*8aaca124Schristos grammar = parse_mapbody(filein) 190*8aaca124Schristos print(json.dumps(grammar, indent=4)) 191*8aaca124Schristos 192*8aaca124Schristos 193*8aaca124Schristosif __name__ == "__main__": 194*8aaca124Schristos main() 195