xref: /openbsd-src/usr.sbin/nsd/zlexer.lex (revision de04d85599998336805cbff9bf6a5b3e06bf78d2)
1 %{
2 /*
3  * zlexer.lex - lexical analyzer for (DNS) zone files
4  *
5  * Copyright (c) 2001-2006, NLnet Labs. All rights reserved
6  *
7  * See LICENSE for the license.
8  *
9  */
10 /* because flex keeps having sign-unsigned compare problems that are unfixed*/
11 #if defined(__clang__)||(defined(__GNUC__)&&((__GNUC__ >4)||(defined(__GNUC_MINOR__)&&(__GNUC__ ==4)&&(__GNUC_MINOR__ >=2))))
12 #pragma GCC diagnostic ignored "-Wsign-compare"
13 #endif
14 /* ignore fallthrough warnings in the generated parse code case statements */
15 #if defined(__clang__)||(defined(__GNUC__)&&(__GNUC__ >=7))
16 #pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
17 #endif
18 
19 #include "config.h"
20 
21 #include <ctype.h>
22 #include <errno.h>
23 #include <string.h>
24 #include <strings.h>
25 
26 #include "zonec.h"
27 #include "dname.h"
28 #include "zparser.h"
29 
30 #if 0
31 #define LEXOUT(s)  printf s /* used ONLY when debugging */
32 #else
33 #define LEXOUT(s)
34 #endif
35 
36 enum lexer_state {
37 	EXPECT_OWNER,
38 	PARSING_OWNER,
39 	PARSING_TTL_CLASS_TYPE,
40 	PARSING_RDATA
41 };
42 
43 static int parse_token(int token, char *yytext, enum lexer_state *lexer_state);
44 
45 static YY_BUFFER_STATE include_stack[MAXINCLUDES];
46 static zparser_type zparser_stack[MAXINCLUDES];
47 static int include_stack_ptr = 0;
48 
49 /*
50  * Saves the file specific variables on the include stack.
51  */
52 static void
push_parser_state(FILE * input)53 push_parser_state(FILE *input)
54 {
55 	zparser_stack[include_stack_ptr].filename = parser->filename;
56 	zparser_stack[include_stack_ptr].line = parser->line;
57 	zparser_stack[include_stack_ptr].origin = parser->origin;
58 	include_stack[include_stack_ptr] = YY_CURRENT_BUFFER;
59 	yy_switch_to_buffer(yy_create_buffer(input, YY_BUF_SIZE));
60 	++include_stack_ptr;
61 }
62 
63 /*
64  * Restores the file specific variables from the include stack.
65  */
66 static void
pop_parser_state(void)67 pop_parser_state(void)
68 {
69 	if (parser->filename)
70 		region_recycle(parser->region, (void *)parser->filename,
71 			strlen(parser->filename)+1);
72 
73 	--include_stack_ptr;
74 	parser->filename = zparser_stack[include_stack_ptr].filename;
75 	parser->line = zparser_stack[include_stack_ptr].line;
76 	parser->origin = zparser_stack[include_stack_ptr].origin;
77 	yy_delete_buffer(YY_CURRENT_BUFFER);
78 	yy_switch_to_buffer(include_stack[include_stack_ptr]);
79 }
80 
81 static YY_BUFFER_STATE oldstate;
82 /* Start string scan */
83 void
parser_push_stringbuf(char * str)84 parser_push_stringbuf(char* str)
85 {
86 	oldstate = YY_CURRENT_BUFFER;
87 	yy_switch_to_buffer(yy_scan_string(str));
88 }
89 
90 void
parser_pop_stringbuf(void)91 parser_pop_stringbuf(void)
92 {
93 	yy_delete_buffer(YY_CURRENT_BUFFER);
94 	yy_switch_to_buffer(oldstate);
95 	oldstate = NULL;
96 }
97 
98 	static int paren_open = 0;
99 	static enum lexer_state lexer_state = EXPECT_OWNER;
100 void
parser_flush(void)101 parser_flush(void)
102 {
103 	YY_FLUSH_BUFFER;
104 	paren_open = 0;
105 	lexer_state = EXPECT_OWNER;
106 }
107 
at_eof(void)108 int at_eof(void)
109 {
110 	static int once = 1;
111 	return (once = !once) ? 0 : NL;
112 }
113 
114 #ifndef yy_set_bol /* compat definition, for flex 2.4.6 */
115 #define yy_set_bol(at_bol) \
116 	{ \
117 		if ( ! yy_current_buffer ) \
118 			yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \
119 		yy_current_buffer->yy_ch_buf[0] = ((at_bol)?'\n':' '); \
120 	}
121 #endif
122 
123 %}
124 %option noinput
125 %option nounput
126 %{
127 #ifndef YY_NO_UNPUT
128 #define YY_NO_UNPUT 1
129 #endif
130 #ifndef YY_NO_INPUT
131 #define YY_NO_INPUT 1
132 #endif
133 %}
134 
135 SPACE   [ \t]
136 LETTER  [a-zA-Z]
137 NEWLINE [\n\r]
138 ZONESTR [^ \t\n\r();.\"\$]|\\.|\\\n
139 CHARSTR [^ \t\n\r();.\"]|\\.|\\\n
140 QUOTE   \"
141 DOLLAR  \$
142 COMMENT ;
143 DOT     \.
144 BIT	[^\]\n]|\\.
145 ANY     [^\"\n\\]|\\.
146 
147 %x	incl bitlabel quotedstring
148 
149 %%
150 {SPACE}*{COMMENT}.*	/* ignore */
151 ^{DOLLAR}TTL            { lexer_state = PARSING_RDATA; return DOLLAR_TTL; }
152 ^{DOLLAR}ORIGIN         { lexer_state = PARSING_RDATA; return DOLLAR_ORIGIN; }
153 
154 	/*
155 	 * Handle $INCLUDE directives.  See
156 	 * http://dinosaur.compilertools.net/flex/flex_12.html#SEC12.
157 	 */
158 ^{DOLLAR}INCLUDE        {
159 	BEGIN(incl);
160 	/* ignore case statement fallthrough on incl<EOF> flex rule */
161 }
162 <incl>\n		|
163 <incl><<EOF>>		{
164 	int error_occurred = parser->error_occurred;
165 	BEGIN(INITIAL);
166 	zc_error("missing file name in $INCLUDE directive");
167 	yy_set_bol(1); /* Set beginning of line, so "^" rules match.  */
168 	++parser->line;
169 	parser->error_occurred = error_occurred;
170 }
171 <incl>.+ 		{
172 	char *tmp;
173 	domain_type *origin = parser->origin;
174 	int error_occurred = parser->error_occurred;
175 
176 	BEGIN(INITIAL);
177 	if (include_stack_ptr >= MAXINCLUDES ) {
178 		zc_error("includes nested too deeply, skipped (>%d)",
179 			 MAXINCLUDES);
180 	} else {
181 		FILE *input;
182 
183 		/* Remove trailing comment.  */
184 		tmp = strrchr(yytext, ';');
185 		if (tmp) {
186 			*tmp = '\0';
187 		}
188 		strip_string(yytext);
189 
190 		/* Parse origin for include file.  */
191 		tmp = strrchr(yytext, ' ');
192 		if (!tmp) {
193 			tmp = strrchr(yytext, '\t');
194 		}
195 		if (tmp) {
196 			const dname_type *dname;
197 
198 			/* split the original yytext */
199 			*tmp = '\0';
200 			strip_string(yytext);
201 
202 			dname = dname_parse(parser->region, tmp + 1);
203 			if (!dname) {
204 				zc_error("incorrect include origin '%s'",
205 					 tmp + 1);
206 			} else if (*(tmp + strlen(tmp + 1)) != '.') {
207 				zc_error("$INCLUDE directive requires absolute domain name");
208 			} else {
209 				origin = domain_table_insert(
210 					parser->db->domains, dname);
211 			}
212 		}
213 
214 		if (strlen(yytext) == 0) {
215 			zc_error("missing file name in $INCLUDE directive");
216 		} else if (!(input = fopen(yytext, "r"))) {
217 			zc_error("cannot open include file '%s': %s",
218 				 yytext, strerror(errno));
219 		} else {
220 			/* Initialize parser for include file.  */
221 			char *filename = region_strdup(parser->region, yytext);
222 			push_parser_state(input); /* Destroys yytext.  */
223 			parser->filename = filename;
224 			parser->line = 1;
225 			parser->origin = origin;
226 			lexer_state = EXPECT_OWNER;
227 		}
228 	}
229 
230 	parser->error_occurred = error_occurred;
231 }
232 <INITIAL><<EOF>>	{
233 	int eo = at_eof();
234 	yy_set_bol(1); /* Set beginning of line, so "^" rules match.  */
235 	if (include_stack_ptr == 0) {
236 		if(eo == NL)
237 			return eo;
238 		yyterminate();
239 	} else {
240 		fclose(yyin);
241 		pop_parser_state();
242 		if(eo == NL)
243 			return eo;
244 	}
245 }
246 ^{DOLLAR}{LETTER}+	{ zc_warning("Unknown directive: %s", yytext); }
247 {DOT}	{
248 	LEXOUT((". "));
249 	return parse_token('.', yytext, &lexer_state);
250 }
251 @	{
252 	LEXOUT(("@ "));
253 	return parse_token('@', yytext, &lexer_state);
254 }
255 \\#	{
256 	LEXOUT(("\\# "));
257 	return parse_token(URR, yytext, &lexer_state);
258 }
259 {NEWLINE}	{
260 	++parser->line;
261 	if (!paren_open) {
262 		lexer_state = EXPECT_OWNER;
263 		LEXOUT(("NL\n"));
264 		return NL;
265 	} else {
266 		LEXOUT(("SP "));
267 		return SP;
268 	}
269 }
270 \(	{
271 	if (paren_open) {
272 		zc_error("nested parentheses");
273 		yyterminate();
274 	}
275 	LEXOUT(("( "));
276 	paren_open = 1;
277 	return SP;
278 }
279 \)	{
280 	if (!paren_open) {
281 		zc_error("closing parentheses without opening parentheses");
282 		yyterminate();
283 	}
284 	LEXOUT((") "));
285 	paren_open = 0;
286 	return SP;
287 }
288 {SPACE}+	{
289 	if (!paren_open && lexer_state == EXPECT_OWNER) {
290 		lexer_state = PARSING_TTL_CLASS_TYPE;
291 		LEXOUT(("PREV "));
292 		return PREV;
293 	}
294 	if (lexer_state == PARSING_OWNER) {
295 		lexer_state = PARSING_TTL_CLASS_TYPE;
296 	}
297 	LEXOUT(("SP "));
298 	return SP;
299 }
300 
301 	/* Bitlabels.  Strip leading and ending brackets.  */
302 \\\[			{ BEGIN(bitlabel); }
303 <bitlabel><<EOF>>	{
304 	zc_error("EOF inside bitlabel");
305 	BEGIN(INITIAL);
306 	yyrestart(yyin); /* this is so that lex does not give an internal err */
307 	yyterminate();
308 }
309 <bitlabel>{BIT}*	{ yymore(); }
310 <bitlabel>\n		{ ++parser->line; yymore(); }
311 <bitlabel>\]		{
312 	BEGIN(INITIAL);
313 	yytext[yyleng - 1] = '\0';
314 	return parse_token(BITLAB, yytext, &lexer_state);
315 }
316 
317 	/* Quoted strings.  Strip leading and ending quotes.  */
318 {QUOTE}			{ BEGIN(quotedstring); LEXOUT(("\" ")); }
319 <quotedstring><<EOF>> 	{
320 	zc_error("EOF inside quoted string");
321 	BEGIN(INITIAL);
322 	yyrestart(yyin); /* this is so that lex does not give an internal err */
323 	yyterminate();
324 }
325 <quotedstring>{ANY}*	{ LEXOUT(("QSTR ")); yymore(); }
326 <quotedstring>\n 	{ ++parser->line; yymore(); }
327 <quotedstring>{QUOTE} {
328 	LEXOUT(("\" "));
329 	BEGIN(INITIAL);
330 	yytext[yyleng - 1] = '\0';
331 	return parse_token(QSTR, yytext, &lexer_state);
332 }
333 
334 {ZONESTR}({CHARSTR})* {
335 	/* Any allowed word.  */
336 	return parse_token(STR, yytext, &lexer_state);
337 }
338 . {
339 	zc_error("unknown character '%c' (\\%03d) seen - is this a zonefile?",
340 		 (int) yytext[0], (int) yytext[0]);
341 }
342 %%
343 
344 /*
345  * Analyze "word" to see if it matches an RR type, possibly by using
346  * the "TYPExxx" notation.  If it matches, the corresponding token is
347  * returned and the TYPE parameter is set to the RR type value.
348  */
349 static int
350 rrtype_to_token(const char *word, uint16_t *type)
351 {
352 	uint16_t t = rrtype_from_string(word);
353 	if (t != 0) {
354 		rrtype_descriptor_type *entry = rrtype_descriptor_by_type(t);
355 		*type = t;
356 		return entry->token;
357 	}
358 
359 	return 0;
360 }
361 
362 
363 /*
364  * Remove \DDD constructs from the input. See RFC 1035, section 5.1.
365  */
366 static size_t
367 zoctet(char *text)
368 {
369 	/*
370 	 * s follows the string, p lags behind and rebuilds the new
371 	 * string
372 	 */
373 	char *s;
374 	char *p;
375 
376 	for (s = p = text; *s; ++s, ++p) {
377 		assert(p <= s);
378 		if (s[0] != '\\') {
379 			/* Ordinary character.  */
380 			*p = *s;
381 		} else if (isdigit((unsigned char)s[1]) && isdigit((unsigned char)s[2]) && isdigit((unsigned char)s[3])) {
382 			/* \DDD escape.  */
383 			int val = (hexdigit_to_int(s[1]) * 100 +
384 				   hexdigit_to_int(s[2]) * 10 +
385 				   hexdigit_to_int(s[3]));
386 			if (0 <= val && val <= 255) {
387 				s += 3;
388 				*p = val;
389 			} else {
390 				zc_warning("text escape \\DDD overflow");
391 				*p = *++s;
392 			}
393 		} else if (s[1] != '\0') {
394 			/* \X where X is any character, keep X.  */
395 			*p = *++s;
396 		} else {
397 			/* Trailing backslash, ignore it.  */
398 			zc_warning("trailing backslash ignored");
399 			--p;
400 		}
401 	}
402 	*p = '\0';
403 	return p - text;
404 }
405 
406 static int
407 parse_token(int token, char *yytext, enum lexer_state *lexer_state)
408 {
409 	size_t len;
410 	char *str;
411 
412 	if (*lexer_state == EXPECT_OWNER) {
413 		*lexer_state = PARSING_OWNER;
414 	} else if (*lexer_state == PARSING_TTL_CLASS_TYPE) {
415 		const char *t;
416 		int token;
417 		uint16_t rrclass;
418 
419 		/* type */
420 		token = rrtype_to_token(yytext, &yylval.type);
421 		if (token != 0) {
422 			*lexer_state = PARSING_RDATA;
423 			LEXOUT(("%d[%s] ", token, yytext));
424 			return token;
425 		}
426 
427 		/* class */
428 		rrclass = rrclass_from_string(yytext);
429 		if (rrclass != 0) {
430 			yylval.klass = rrclass;
431 			LEXOUT(("CLASS "));
432 			return T_RRCLASS;
433 		}
434 
435 		/* ttl */
436 		yylval.ttl = strtottl(yytext, &t);
437 		if (*t == '\0') {
438 			LEXOUT(("TTL "));
439 			return T_TTL;
440 		}
441 	}
442 
443 	str = region_strdup(parser->rr_region, yytext);
444 	len = zoctet(str);
445 
446 	yylval.data.str = str;
447 	yylval.data.len = len;
448 
449 	LEXOUT(("%d[%s] ", token, yytext));
450 	return token;
451 }
452