xref: /netbsd-src/external/bsd/ntp/dist/ntpd/ntp_scanner.c (revision c2f76ff004a2cb67efe5b12d97bd3ef7fe89e18d)
1 /*	$NetBSD: ntp_scanner.c,v 1.3 2010/12/04 23:08:35 christos Exp $	*/
2 
3 
4 /* ntp_scanner.c
5  *
6  * The source code for a simple lexical analyzer.
7  *
8  * Written By:	Sachin Kamboj
9  *		University of Delaware
10  *		Newark, DE 19711
11  * Copyright (c) 2006
12  */
13 
14 #ifdef HAVE_CONFIG_H
15 # include <config.h>
16 #endif
17 
18 #include <stdio.h>
19 #include <ctype.h>
20 #include <stdlib.h>
21 #include <errno.h>
22 #include <string.h>
23 
24 #include "ntp_config.h"
25 #include "ntpsim.h"
26 #include "ntp_scanner.h"
27 #include "ntp_parser.h"
28 #include "ntp_debug.h"
29 
30 /* ntp_keyword.h declares finite state machine and token text */
31 #include "ntp_keyword.h"
32 
33 
34 
35 /* SCANNER GLOBAL VARIABLES
36  * ------------------------
37  */
38 
39 #define MAX_LEXEME (1024 + 1)	/* The maximum size of a lexeme */
40 char yytext[MAX_LEXEME];	/* Buffer for storing the input text/lexeme */
41 extern int input_from_file;
42 
43 
44 
45 
46 /* CONSTANTS
47  * ---------
48  */
49 
50 
51 /* SCANNER GLOBAL VARIABLES
52  * ------------------------
53  */
54 const char special_chars[] = "{}(),;|=";
55 
56 
57 /* FUNCTIONS
58  * ---------
59  */
60 
61 int get_next_char(void);
62 static int is_keyword(char *lexeme, follby *pfollowedby);
63 
64 
65 
66 /*
67  * keyword() - Return the keyword associated with token T_ identifier
68  */
69 const char *
70 keyword(
71 	int token
72 	)
73 {
74 	size_t i;
75 	const char *text;
76 
77 	i = token - LOWEST_KEYWORD_ID;
78 
79 	if (i < COUNTOF(keyword_text))
80 		text = keyword_text[i];
81 	else
82 		text = NULL;
83 
84 	return (text != NULL)
85 		   ? text
86 		   : "(keyword not found)";
87 }
88 
89 
90 /* FILE INTERFACE
91  * --------------
92  * We define a couple of wrapper functions around the standard C fgetc
93  * and ungetc functions in order to include positional bookkeeping
94  */
95 
96 struct FILE_INFO *
97 F_OPEN(
98 	const char *path,
99 	const char *mode
100 	)
101 {
102 	struct FILE_INFO *my_info;
103 
104 	my_info = emalloc(sizeof *my_info);
105 
106 	my_info->line_no = 1;
107 	my_info->col_no = 0;
108 	my_info->prev_line_col_no = 0;
109 	my_info->prev_token_col_no = 0;
110 	my_info->fname = path;
111 
112 	my_info->fd = fopen(path, mode);
113 	if (NULL == my_info->fd) {
114 		free(my_info);
115 		return NULL;
116 	}
117 	return my_info;
118 }
119 
120 int
121 FGETC(
122 	struct FILE_INFO *stream
123 	)
124 {
125 	int ch = fgetc(stream->fd);
126 
127 	++stream->col_no;
128 	if (ch == '\n') {
129 		stream->prev_line_col_no = stream->col_no;
130 		++stream->line_no;
131 		stream->col_no = 1;
132 	}
133 	return ch;
134 }
135 
136 /* BUGS: 1. Function will fail on more than one line of pushback
137  *       2. No error checking is done to see if ungetc fails
138  * SK: I don't think its worth fixing these bugs for our purposes ;-)
139  */
140 int
141 UNGETC(
142 	int ch,
143 	struct FILE_INFO *stream
144 	)
145 {
146 	if (ch == '\n') {
147 		stream->col_no = stream->prev_line_col_no;
148 		stream->prev_line_col_no = -1;
149 		--stream->line_no;
150 	}
151 	--stream->col_no;
152 	return ungetc(ch, stream->fd);
153 }
154 
155 int
156 FCLOSE(
157 	struct FILE_INFO *stream
158 	)
159 {
160 	int ret_val = fclose(stream->fd);
161 
162 	if (!ret_val)
163 		free(stream);
164 	return ret_val;
165 }
166 
167 /* STREAM INTERFACE
168  * ----------------
169  * Provide a wrapper for the stream functions so that the
170  * stream can either read from a file or from a character
171  * array.
172  * NOTE: This is not very efficient for reading from character
173  * arrays, but needed to allow remote configuration where the
174  * configuration command is provided through ntpq.
175  *
176  * The behavior of there two functions is determined by the
177  * input_from_file flag.
178  */
179 
180 int
181 get_next_char(
182 	void
183 	)
184 {
185 	char ch;
186 
187 	if (input_from_file)
188 		return FGETC(ip_file);
189 	else {
190 		if (remote_config.buffer[remote_config.pos] == '\0')
191 			return EOF;
192 		else {
193 			ip_file->col_no++;
194 			ch = remote_config.buffer[remote_config.pos++];
195 			if (ch == '\n') {
196 				ip_file->prev_line_col_no = ip_file->col_no;
197 				++ip_file->line_no;
198 				ip_file->col_no = 1;
199 			}
200 			return ch;
201 		}
202 	}
203 }
204 
205 void
206 push_back_char(
207 	int ch
208 	)
209 {
210 	if (input_from_file)
211 		UNGETC(ch, ip_file);
212 	else {
213 		if (ch == '\n') {
214 			ip_file->col_no = ip_file->prev_line_col_no;
215 			ip_file->prev_line_col_no = -1;
216 			--ip_file->line_no;
217 		}
218 		--ip_file->col_no;
219 
220 		remote_config.pos--;
221 	}
222 }
223 
224 
225 
226 /* STATE MACHINES
227  * --------------
228  */
229 
230 /* Keywords */
231 static int
232 is_keyword(
233 	char *lexeme,
234 	follby *pfollowedby
235 	)
236 {
237 	follby fb;
238 	int curr_s;		/* current state index */
239 	int token;
240 	int i;
241 
242 	curr_s = SCANNER_INIT_S;
243 	token = 0;
244 
245 	for (i = 0; lexeme[i]; i++) {
246 		while (curr_s && (lexeme[i] != SS_CH(sst[curr_s])))
247 			curr_s = SS_OTHER_N(sst[curr_s]);
248 
249 		if (curr_s && (lexeme[i] == SS_CH(sst[curr_s]))) {
250 			if ('\0' == lexeme[i + 1]
251 			    && FOLLBY_NON_ACCEPTING
252 			       != SS_FB(sst[curr_s])) {
253 				fb = SS_FB(sst[curr_s]);
254 				*pfollowedby = fb;
255 				token = curr_s;
256 				break;
257 			}
258 			curr_s = SS_MATCH_N(sst[curr_s]);
259 		} else
260 			break;
261 	}
262 
263 	return token;
264 }
265 
266 
267 /* Integer */
268 static int
269 is_integer(
270 	char *lexeme
271 	)
272 {
273 	int i = 0;
274 
275 	/* Allow a leading minus sign */
276 	if (lexeme[i] == '-')
277 		++i;
278 
279 	/* Check that all the remaining characters are digits */
280 	for (; lexeme[i]; ++i) {
281 		if (!isdigit((unsigned char)lexeme[i]))
282 			return 0;
283 	}
284 	return 1;
285 }
286 
287 
288 /* Double */
289 static int
290 is_double(
291 	char *lexeme
292 	)
293 {
294 	int num_digits = 0;  /* Number of digits read */
295 	int i;
296 
297 	i = 0;
298 
299 	/* Check for an optional '+' or '-' */
300 	if ('+' == lexeme[i] || '-' == lexeme[i])
301 		i++;
302 
303 	/* Read the integer part */
304 	for (; lexeme[i] && isdigit((unsigned char)lexeme[i]); i++)
305 		num_digits++;
306 
307 	/* Check for the required decimal point */
308 	if ('.' == lexeme[i])
309 		i++;
310 	else
311 		return 0;
312 
313 	/* Check for any digits after the decimal point */
314 	for (; lexeme[i] && isdigit((unsigned char)lexeme[i]); i++)
315 		num_digits++;
316 
317 	/*
318 	 * The number of digits in both the decimal part and the
319 	 * fraction part must not be zero at this point
320 	 */
321 	if (!num_digits)
322 		return 0;
323 
324 	/* Check if we are done */
325 	if (!lexeme[i])
326 		return 1;
327 
328 	/* There is still more input, read the exponent */
329 	if ('e' == tolower((unsigned char)lexeme[i]))
330 		i++;
331 	else
332 		return 0;
333 
334 	/* Read an optional Sign */
335 	if ('+' == lexeme[i] || '-' == lexeme[i])
336 		i++;
337 
338 	/* Now read the exponent part */
339 	while (lexeme[i] && isdigit((unsigned char)lexeme[i]))
340 		i++;
341 
342 	/* Check if we are done */
343 	if (!lexeme[i])
344 		return 1;
345 	else
346 		return 0;
347 }
348 
349 
350 /* is_special() - Test whether a character is a token */
351 static inline int
352 is_special(
353 	int ch
354 	)
355 {
356 	return strchr(special_chars, ch) != NULL;
357 }
358 
359 
360 static int
361 is_EOC(
362 	int ch
363 	)
364 {
365 	if ((old_config_style && (ch == '\n')) ||
366 	    (!old_config_style && (ch == ';')))
367 		return 1;
368 	return 0;
369 }
370 
371 
372 char *
373 quote_if_needed(char *str)
374 {
375 	char *ret;
376 	size_t len;
377 	size_t octets;
378 
379 	len = strlen(str);
380 	octets = len + 2 + 1;
381 	ret = emalloc(octets);
382 	if ('"' != str[0]
383 	    && (strcspn(str, special_chars) < len
384 		|| strchr(str, ' ') != NULL)) {
385 		snprintf(ret, octets, "\"%s\"", str);
386 	} else
387 		strncpy(ret, str, octets);
388 
389 	return ret;
390 }
391 
392 
393 static int
394 create_string_token(
395 	char *lexeme
396 	)
397 {
398 	char *pch;
399 
400 	/*
401 	 * ignore end of line whitespace
402 	 */
403 	pch = lexeme;
404 	while (*pch && isspace((unsigned char)*pch))
405 		pch++;
406 
407 	if (!*pch) {
408 		yylval.Integer = T_EOC;
409 		return yylval.Integer;
410 	}
411 
412 	yylval.String = estrdup(lexeme);
413 	return T_String;
414 }
415 
416 
417 /*
418  * yylex() - function that does the actual scanning.
419  * Bison expects this function to be called yylex and for it to take no
420  * input and return an int.
421  * Conceptually yylex "returns" yylval as well as the actual return
422  * value representing the token or type.
423  */
424 int
425 yylex(
426 	void
427 	)
428 {
429 	size_t i;
430 	int instring = 0;
431 	int yylval_was_set = 0;
432 	int token;		/* The return value/the recognized token */
433 	int ch;
434 	static follby followedby = FOLLBY_TOKEN;
435 
436 	do {
437 		/* Ignore whitespace at the beginning */
438 		while (EOF != (ch = get_next_char()) &&
439 		       isspace(ch) &&
440 		       !is_EOC(ch))
441 			; /* Null Statement */
442 
443 		if (EOF == ch) {
444 
445 			if (!input_from_file || !curr_include_level)
446 				return 0;
447 
448 			FCLOSE(fp[curr_include_level]);
449 			ip_file = fp[--curr_include_level];
450 			token = T_EOC;
451 			goto normal_return;
452 
453 		} else if (is_EOC(ch)) {
454 
455 			/* end FOLLBY_STRINGS_TO_EOC effect */
456 			followedby = FOLLBY_TOKEN;
457 			token = T_EOC;
458 			goto normal_return;
459 
460 		} else if (is_special(ch) && FOLLBY_TOKEN == followedby) {
461 			/* special chars are their own token values */
462 			token = ch;
463 			/*
464 			 * '=' implies a single string following as in:
465 			 * setvar Owner = "The Boss" default
466 			 * This could alternatively be handled by
467 			 * removing '=' from special_chars and adding
468 			 * it to the keyword table.
469 			 */
470 			if ('=' == ch)
471 				followedby = FOLLBY_STRING;
472 			yytext[0] = (char)ch;
473 			yytext[1] = '\0';
474 			goto normal_return;
475 		} else
476 			push_back_char(ch);
477 
478 		/* save the position of start of the token */
479 		ip_file->prev_token_line_no = ip_file->line_no;
480 		ip_file->prev_token_col_no = ip_file->col_no;
481 
482 		/* Read in the lexeme */
483 		i = 0;
484 		while (EOF != (ch = get_next_char())) {
485 
486 			yytext[i] = (char)ch;
487 
488 			/* Break on whitespace or a special character */
489 			if (isspace(ch) || is_EOC(ch)
490 			    || '"' == ch
491 			    || (FOLLBY_TOKEN == followedby
492 				&& is_special(ch)))
493 				break;
494 
495 			/* Read the rest of the line on reading a start
496 			   of comment character */
497 			if ('#' == ch) {
498 				while (EOF != (ch = get_next_char())
499 				       && '\n' != ch)
500 					; /* Null Statement */
501 				break;
502 			}
503 
504 			i++;
505 			if (i >= COUNTOF(yytext))
506 				goto lex_too_long;
507 		}
508 		/* Pick up all of the string inside between " marks, to
509 		 * end of line.  If we make it to EOL without a
510 		 * terminating " assume it for them.
511 		 *
512 		 * XXX - HMS: I'm not sure we want to assume the closing "
513 		 */
514 		if ('"' == ch) {
515 			instring = 1;
516 			while (EOF != (ch = get_next_char()) &&
517 			       ch != '"' && ch != '\n') {
518 				yytext[i++] = (char)ch;
519 				if (i >= COUNTOF(yytext))
520 					goto lex_too_long;
521 			}
522 			/*
523 			 * yytext[i] will be pushed back as not part of
524 			 * this lexeme, but any closing quote should
525 			 * not be pushed back, so we read another char.
526 			 */
527 			if ('"' == ch)
528 				ch = get_next_char();
529 		}
530 		/* Pushback the last character read that is not a part
531 		 * of this lexeme.
532 		 * If the last character read was an EOF, pushback a
533 		 * newline character. This is to prevent a parse error
534 		 * when there is no newline at the end of a file.
535 		 */
536 		if (EOF == ch)
537 			push_back_char('\n');
538 		else
539 			push_back_char(ch);
540 		yytext[i] = '\0';
541 	} while (i == 0);
542 
543 	/* Now return the desired token */
544 
545 	/* First make sure that the parser is *not* expecting a string
546 	 * as the next token (based on the previous token that was
547 	 * returned) and that we haven't read a string.
548 	 */
549 
550 	if (followedby == FOLLBY_TOKEN && !instring) {
551 		token = is_keyword(yytext, &followedby);
552 		if (token)
553 			goto normal_return;
554 		else if (is_integer(yytext)) {
555 			yylval_was_set = 1;
556 			errno = 0;
557 			if ((yylval.Integer = strtol(yytext, NULL, 10)) == 0
558 			    && ((errno == EINVAL) || (errno == ERANGE))) {
559 				msyslog(LOG_ERR,
560 					"Integer cannot be represented: %s",
561 					yytext);
562 				exit(1);
563 			} else {
564 				token = T_Integer;
565 				goto normal_return;
566 			}
567 		}
568 		else if (is_double(yytext)) {
569 			yylval_was_set = 1;
570 			errno = 0;
571 			if ((yylval.Double = atof(yytext)) == 0 && errno == ERANGE) {
572 				msyslog(LOG_ERR,
573 					"Double too large to represent: %s",
574 					yytext);
575 				exit(1);
576 			} else {
577 				token = T_Double;
578 				goto normal_return;
579 			}
580 		} else {
581 			/* Default: Everything is a string */
582 			yylval_was_set = 1;
583 			token = create_string_token(yytext);
584 			goto normal_return;
585 		}
586 	}
587 
588 	/*
589 	 * Either followedby is not FOLLBY_TOKEN or this lexeme is part
590 	 * of a string.  Hence, we need to return T_String.
591 	 *
592 	 * _Except_ we might have a -4 or -6 flag on a an association
593 	 * configuration line (server, peer, pool, etc.).
594 	 *
595 	 * This is a terrible hack, but the grammar is ambiguous so we
596 	 * don't have a choice.  [SK]
597 	 *
598 	 * The ambiguity is in the keyword scanner, not ntp_parser.y.
599 	 * We do not require server addresses be quoted in ntp.conf,
600 	 * complicating the scanner's job.  To avoid trying (and
601 	 * failing) to match an IP address or DNS name to a keyword,
602 	 * the association keywords use FOLLBY_STRING in the keyword
603 	 * table, which tells the scanner to force the next token to be
604 	 * a T_String, so it does not try to match a keyword but rather
605 	 * expects a string when -4/-6 modifiers to server, peer, etc.
606 	 * are encountered.
607 	 * restrict -4 and restrict -6 parsing works correctly without
608 	 * this hack, as restrict uses FOLLBY_TOKEN.  [DH]
609 	 */
610 	if ('-' == yytext[0]) {
611 		if ('4' == yytext[1]) {
612 			token = T_Ipv4_flag;
613 			goto normal_return;
614 		} else if ('6' == yytext[1]) {
615 			token = T_Ipv6_flag;
616 			goto normal_return;
617 		}
618 	}
619 
620 	instring = 0;
621 	if (FOLLBY_STRING == followedby)
622 		followedby = FOLLBY_TOKEN;
623 
624 	yylval_was_set = 1;
625 	token = create_string_token(yytext);
626 
627 normal_return:
628 	if (T_EOC == token)
629 		DPRINTF(4,("\t<end of command>\n"));
630 	else
631 		DPRINTF(4, ("yylex: lexeme '%s' -> %s\n", yytext,
632 			    token_name(token)));
633 
634 	if (!yylval_was_set)
635 		yylval.Integer = token;
636 
637 	return token;
638 
639 lex_too_long:
640 	yytext[min(sizeof(yytext) - 1, 50)] = 0;
641 	msyslog(LOG_ERR,
642 		"configuration item on line %d longer than limit of %zu, began with '%s'",
643 		ip_file->line_no, sizeof(yytext) - 1, yytext);
644 
645 	/*
646 	 * If we hit the length limit reading the startup configuration
647 	 * file, abort.
648 	 */
649 	if (input_from_file)
650 		exit(sizeof(yytext) - 1);
651 
652 	/*
653 	 * If it's runtime configuration via ntpq :config treat it as
654 	 * if the configuration text ended before the too-long lexeme,
655 	 * hostname, or string.
656 	 */
657 	yylval.Integer = 0;
658 	return 0;
659 }
660