xref: /netbsd-src/external/bsd/nsd/dist/zlexer.lex (revision 7e30e94394d0994ab9534f68a8f91665045c91ce)
1 %{
2 /*
3  * zlexer.lex - lexical analyzer for (DNS) zone files
4  *
5  * Copyright (c) 2001-2006, NLnet Labs. All rights reserved
6  *
7  * See LICENSE for the license.
8  *
9  */
10 /* because flex keeps having sign-unsigned compare problems that are unfixed*/
11 #if defined(__clang__)||(defined(__GNUC__)&&((__GNUC__ >4)||(defined(__GNUC_MINOR__)&&(__GNUC__ ==4)&&(__GNUC_MINOR__ >=2))))
12 #pragma GCC diagnostic ignored "-Wsign-compare"
13 #endif
14 
15 #include "config.h"
16 
17 #include <ctype.h>
18 #include <errno.h>
19 #include <string.h>
20 #include <strings.h>
21 
22 #include "zonec.h"
23 #include "dname.h"
24 #include "zparser.h"
25 
26 #if 0
27 #define LEXOUT(s)  printf s /* used ONLY when debugging */
28 #else
29 #define LEXOUT(s)
30 #endif
31 
32 enum lexer_state {
33 	EXPECT_OWNER,
34 	PARSING_OWNER,
35 	PARSING_TTL_CLASS_TYPE,
36 	PARSING_RDATA
37 };
38 
39 static int parse_token(int token, char *yytext, enum lexer_state *lexer_state);
40 
41 static YY_BUFFER_STATE include_stack[MAXINCLUDES];
42 static zparser_type zparser_stack[MAXINCLUDES];
43 static int include_stack_ptr = 0;
44 
45 /*
46  * Saves the file specific variables on the include stack.
47  */
48 static void
49 push_parser_state(FILE *input)
50 {
51 	zparser_stack[include_stack_ptr].filename = parser->filename;
52 	zparser_stack[include_stack_ptr].line = parser->line;
53 	zparser_stack[include_stack_ptr].origin = parser->origin;
54 	include_stack[include_stack_ptr] = YY_CURRENT_BUFFER;
55 	yy_switch_to_buffer(yy_create_buffer(input, YY_BUF_SIZE));
56 	++include_stack_ptr;
57 }
58 
59 /*
60  * Restores the file specific variables from the include stack.
61  */
62 static void
63 pop_parser_state(void)
64 {
65 	--include_stack_ptr;
66 	parser->filename = zparser_stack[include_stack_ptr].filename;
67 	parser->line = zparser_stack[include_stack_ptr].line;
68 	parser->origin = zparser_stack[include_stack_ptr].origin;
69 	yy_delete_buffer(YY_CURRENT_BUFFER);
70 	yy_switch_to_buffer(include_stack[include_stack_ptr]);
71 }
72 
73 static YY_BUFFER_STATE oldstate;
74 /* Start string scan */
75 void
76 parser_push_stringbuf(char* str)
77 {
78 	oldstate = YY_CURRENT_BUFFER;
79 	yy_switch_to_buffer(yy_scan_string(str));
80 }
81 
82 void
83 parser_pop_stringbuf(void)
84 {
85 	yy_delete_buffer(YY_CURRENT_BUFFER);
86 	yy_switch_to_buffer(oldstate);
87 	oldstate = NULL;
88 }
89 
90 #ifndef yy_set_bol /* compat definition, for flex 2.4.6 */
91 #define yy_set_bol(at_bol) \
92 	{ \
93 		if ( ! yy_current_buffer ) \
94 			yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \
95 		yy_current_buffer->yy_ch_buf[0] = ((at_bol)?'\n':' '); \
96 	}
97 #endif
98 
99 %}
100 %option noinput
101 %option nounput
102 %{
103 #ifndef YY_NO_UNPUT
104 #define YY_NO_UNPUT 1
105 #endif
106 #ifndef YY_NO_INPUT
107 #define YY_NO_INPUT 1
108 #endif
109 %}
110 
111 SPACE   [ \t]
112 LETTER  [a-zA-Z]
113 NEWLINE [\n\r]
114 ZONESTR [^ \t\n\r();.\"\$]|\\.|\\\n
115 CHARSTR [^ \t\n\r();.]|\\.|\\\n
116 QUOTE   \"
117 DOLLAR  \$
118 COMMENT ;
119 DOT     \.
120 BIT	[^\]\n]|\\.
121 ANY     [^\"\n\\]|\\.
122 
123 %x	incl bitlabel quotedstring
124 
125 %%
126 	static int paren_open = 0;
127 	static enum lexer_state lexer_state = EXPECT_OWNER;
128 {SPACE}*{COMMENT}.*	/* ignore */
129 ^{DOLLAR}TTL            { lexer_state = PARSING_RDATA; return DOLLAR_TTL; }
130 ^{DOLLAR}ORIGIN         { lexer_state = PARSING_RDATA; return DOLLAR_ORIGIN; }
131 
132 	/*
133 	 * Handle $INCLUDE directives.  See
134 	 * http://dinosaur.compilertools.net/flex/flex_12.html#SEC12.
135 	 */
136 ^{DOLLAR}INCLUDE        {
137 	BEGIN(incl);
138 }
139 <incl>\n 		|
140 <incl><<EOF>>		{
141 	int error_occurred = parser->error_occurred;
142 	BEGIN(INITIAL);
143 	zc_error("missing file name in $INCLUDE directive");
144 	yy_set_bol(1); /* Set beginning of line, so "^" rules match.  */
145 	++parser->line;
146 	parser->error_occurred = error_occurred;
147 }
148 <incl>.+ 		{
149 	char *tmp;
150 	domain_type *origin = parser->origin;
151 	int error_occurred = parser->error_occurred;
152 
153 	BEGIN(INITIAL);
154 	if (include_stack_ptr >= MAXINCLUDES ) {
155 		zc_error("includes nested too deeply, skipped (>%d)",
156 			 MAXINCLUDES);
157 	} else {
158 		FILE *input;
159 
160 		/* Remove trailing comment.  */
161 		tmp = strrchr(yytext, ';');
162 		if (tmp) {
163 			*tmp = '\0';
164 		}
165 		strip_string(yytext);
166 
167 		/* Parse origin for include file.  */
168 		tmp = strrchr(yytext, ' ');
169 		if (!tmp) {
170 			tmp = strrchr(yytext, '\t');
171 		}
172 		if (tmp) {
173 			const dname_type *dname;
174 
175 			/* split the original yytext */
176 			*tmp = '\0';
177 			strip_string(yytext);
178 
179 			dname = dname_parse(parser->region, tmp + 1);
180 			if (!dname) {
181 				zc_error("incorrect include origin '%s'",
182 					 tmp + 1);
183 			} else if (*(tmp + strlen(tmp + 1)) != '.') {
184 				zc_error("$INCLUDE directive requires absolute domain name");
185 			} else {
186 				origin = domain_table_insert(
187 					parser->db->domains, dname);
188 			}
189 		}
190 
191 		if (strlen(yytext) == 0) {
192 			zc_error("missing file name in $INCLUDE directive");
193 		} else if (!(input = fopen(yytext, "r"))) {
194 			zc_error("cannot open include file '%s': %s",
195 				 yytext, strerror(errno));
196 		} else {
197 			/* Initialize parser for include file.  */
198 			char *filename = region_strdup(parser->region, yytext);
199 			push_parser_state(input); /* Destroys yytext.  */
200 			parser->filename = filename;
201 			parser->line = 1;
202 			parser->origin = origin;
203 			lexer_state = EXPECT_OWNER;
204 		}
205 	}
206 
207 	parser->error_occurred = error_occurred;
208 }
209 <INITIAL><<EOF>>	{
210 	yy_set_bol(1); /* Set beginning of line, so "^" rules match.  */
211 	if (include_stack_ptr == 0) {
212 		yyterminate();
213 	} else {
214 		fclose(yyin);
215 		pop_parser_state();
216 	}
217 }
218 ^{DOLLAR}{LETTER}+	{ zc_warning("Unknown directive: %s", yytext); }
219 {DOT}	{
220 	LEXOUT((". "));
221 	return parse_token('.', yytext, &lexer_state);
222 }
223 @	{
224 	LEXOUT(("@ "));
225 	return parse_token('@', yytext, &lexer_state);
226 }
227 \\#	{
228 	LEXOUT(("\\# "));
229 	return parse_token(URR, yytext, &lexer_state);
230 }
231 {NEWLINE}	{
232 	++parser->line;
233 	if (!paren_open) {
234 		lexer_state = EXPECT_OWNER;
235 		LEXOUT(("NL\n"));
236 		return NL;
237 	} else {
238 		LEXOUT(("SP "));
239 		return SP;
240 	}
241 }
242 \(	{
243 	if (paren_open) {
244 		zc_error("nested parentheses");
245 		yyterminate();
246 	}
247 	LEXOUT(("( "));
248 	paren_open = 1;
249 	return SP;
250 }
251 \)	{
252 	if (!paren_open) {
253 		zc_error("closing parentheses without opening parentheses");
254 		yyterminate();
255 	}
256 	LEXOUT((") "));
257 	paren_open = 0;
258 	return SP;
259 }
260 {SPACE}+	{
261 	if (!paren_open && lexer_state == EXPECT_OWNER) {
262 		lexer_state = PARSING_TTL_CLASS_TYPE;
263 		LEXOUT(("PREV "));
264 		return PREV;
265 	}
266 	if (lexer_state == PARSING_OWNER) {
267 		lexer_state = PARSING_TTL_CLASS_TYPE;
268 	}
269 	LEXOUT(("SP "));
270 	return SP;
271 }
272 
273 	/* Bitlabels.  Strip leading and ending brackets.  */
274 \\\[			{ BEGIN(bitlabel); }
275 <bitlabel><<EOF>>	{
276 	zc_error("EOF inside bitlabel");
277 	BEGIN(INITIAL);
278 	yyrestart(yyin); /* this is so that lex does not give an internal err */
279 	yyterminate();
280 }
281 <bitlabel>{BIT}*	{ yymore(); }
282 <bitlabel>\n		{ ++parser->line; yymore(); }
283 <bitlabel>\]		{
284 	BEGIN(INITIAL);
285 	yytext[yyleng - 1] = '\0';
286 	return parse_token(BITLAB, yytext, &lexer_state);
287 }
288 
289 	/* Quoted strings.  Strip leading and ending quotes.  */
290 {QUOTE}			{ BEGIN(quotedstring); LEXOUT(("\" ")); }
291 <quotedstring><<EOF>> 	{
292 	zc_error("EOF inside quoted string");
293 	BEGIN(INITIAL);
294 	yyrestart(yyin); /* this is so that lex does not give an internal err */
295 	yyterminate();
296 }
297 <quotedstring>{ANY}*	{ LEXOUT(("STR ")); yymore(); }
298 <quotedstring>\n 	{ ++parser->line; yymore(); }
299 <quotedstring>{QUOTE} {
300 	LEXOUT(("\" "));
301 	BEGIN(INITIAL);
302 	yytext[yyleng - 1] = '\0';
303 	return parse_token(STR, yytext, &lexer_state);
304 }
305 
306 {ZONESTR}({CHARSTR})* {
307 	/* Any allowed word.  */
308 	return parse_token(STR, yytext, &lexer_state);
309 }
310 . {
311 	zc_error("unknown character '%c' (\\%03d) seen - is this a zonefile?",
312 		 (int) yytext[0], (int) yytext[0]);
313 }
314 %%
315 
316 /*
317  * Analyze "word" to see if it matches an RR type, possibly by using
318  * the "TYPExxx" notation.  If it matches, the corresponding token is
319  * returned and the TYPE parameter is set to the RR type value.
320  */
321 static int
322 rrtype_to_token(const char *word, uint16_t *type)
323 {
324 	uint16_t t = rrtype_from_string(word);
325 	if (t != 0) {
326 		rrtype_descriptor_type *entry = rrtype_descriptor_by_type(t);
327 		*type = t;
328 		return entry->token;
329 	}
330 
331 	return 0;
332 }
333 
334 
335 /*
336  * Remove \DDD constructs from the input. See RFC 1035, section 5.1.
337  */
338 static size_t
339 zoctet(char *text)
340 {
341 	/*
342 	 * s follows the string, p lags behind and rebuilds the new
343 	 * string
344 	 */
345 	char *s;
346 	char *p;
347 
348 	for (s = p = text; *s; ++s, ++p) {
349 		assert(p <= s);
350 		if (s[0] != '\\') {
351 			/* Ordinary character.  */
352 			*p = *s;
353 		} else if (isdigit((unsigned char)s[1]) && isdigit((unsigned char)s[2]) && isdigit((unsigned char)s[3])) {
354 			/* \DDD escape.  */
355 			int val = (hexdigit_to_int(s[1]) * 100 +
356 				   hexdigit_to_int(s[2]) * 10 +
357 				   hexdigit_to_int(s[3]));
358 			if (0 <= val && val <= 255) {
359 				s += 3;
360 				*p = val;
361 			} else {
362 				zc_warning("text escape \\DDD overflow");
363 				*p = *++s;
364 			}
365 		} else if (s[1] != '\0') {
366 			/* \X where X is any character, keep X.  */
367 			*p = *++s;
368 		} else {
369 			/* Trailing backslash, ignore it.  */
370 			zc_warning("trailing backslash ignored");
371 			--p;
372 		}
373 	}
374 	*p = '\0';
375 	return p - text;
376 }
377 
378 static int
379 parse_token(int token, char *yytext, enum lexer_state *lexer_state)
380 {
381 	size_t len;
382 	char *str;
383 
384 	if (*lexer_state == EXPECT_OWNER) {
385 		*lexer_state = PARSING_OWNER;
386 	} else if (*lexer_state == PARSING_TTL_CLASS_TYPE) {
387 		const char *t;
388 		int token;
389 		uint16_t rrclass;
390 
391 		/* type */
392 		token = rrtype_to_token(yytext, &yylval.type);
393 		if (token != 0) {
394 			*lexer_state = PARSING_RDATA;
395 			LEXOUT(("%d[%s] ", token, yytext));
396 			return token;
397 		}
398 
399 		/* class */
400 		rrclass = rrclass_from_string(yytext);
401 		if (rrclass != 0) {
402 			yylval.klass = rrclass;
403 			LEXOUT(("CLASS "));
404 			return T_RRCLASS;
405 		}
406 
407 		/* ttl */
408 		yylval.ttl = strtottl(yytext, &t);
409 		if (*t == '\0') {
410 			LEXOUT(("TTL "));
411 			return T_TTL;
412 		}
413 	}
414 
415 	str = region_strdup(parser->rr_region, yytext);
416 	len = zoctet(str);
417 
418 	yylval.data.str = str;
419 	yylval.data.len = len;
420 
421 	LEXOUT(("%d[%s] ", token, yytext));
422 	return token;
423 }
424