xref: /netbsd-src/external/bsd/ntp/dist/ntpd/ntp_scanner.c (revision b7b7574d3bf8eeb51a1fa3977b59142ec6434a55)
1 /*	$NetBSD: ntp_scanner.c,v 1.6 2013/12/30 17:41:57 christos Exp $	*/
2 
3 
4 /* ntp_scanner.c
5  *
6  * The source code for a simple lexical analyzer.
7  *
8  * Written By:	Sachin Kamboj
9  *		University of Delaware
10  *		Newark, DE 19711
11  * Copyright (c) 2006
12  */
13 
14 #ifdef HAVE_CONFIG_H
15 # include <config.h>
16 #endif
17 
18 #include <stdio.h>
19 #include <ctype.h>
20 #include <stdlib.h>
21 #include <errno.h>
22 #include <string.h>
23 
24 #include "ntpd.h"
25 #include "ntp_config.h"
26 #include "ntpsim.h"
27 #include "ntp_scanner.h"
28 #include "ntp_parser.h"
29 
30 /* ntp_keyword.h declares finite state machine and token text */
31 #include "ntp_keyword.h"
32 
33 
34 
35 /* SCANNER GLOBAL VARIABLES
36  * ------------------------
37  */
38 
39 #define MAX_LEXEME (1024 + 1)	/* The maximum size of a lexeme */
40 char yytext[MAX_LEXEME];	/* Buffer for storing the input text/lexeme */
41 u_int32 conf_file_sum;		/* Simple sum of characters read */
42 extern int input_from_file;
43 
44 
45 
46 
47 /* CONSTANTS
48  * ---------
49  */
50 
51 
52 /* SCANNER GLOBAL VARIABLES
53  * ------------------------
54  */
55 const char special_chars[] = "{}(),;|=";
56 
57 
58 /* FUNCTIONS
59  * ---------
60  */
61 
62 int get_next_char(void);
63 static int is_keyword(char *lexeme, follby *pfollowedby);
64 
65 
66 
67 /*
68  * keyword() - Return the keyword associated with token T_ identifier.
69  *	       See also token_name() for the string-ized T_ identifier.
70  *	       Example: keyword(T_Server) returns "server"
71  *			token_name(T_Server) returns "T_Server"
72  */
73 const char *
74 keyword(
75 	int token
76 	)
77 {
78 	size_t i;
79 	const char *text;
80 
81 	i = token - LOWEST_KEYWORD_ID;
82 
83 	if (i < COUNTOF(keyword_text))
84 		text = keyword_text[i];
85 	else
86 		text = NULL;
87 
88 	return (text != NULL)
89 		   ? text
90 		   : "(keyword not found)";
91 }
92 
93 
94 /* FILE INTERFACE
95  * --------------
96  * We define a couple of wrapper functions around the standard C fgetc
97  * and ungetc functions in order to include positional bookkeeping
98  */
99 
100 struct FILE_INFO *
101 F_OPEN(
102 	const char *path,
103 	const char *mode
104 	)
105 {
106 	struct FILE_INFO *my_info;
107 
108 	my_info = emalloc(sizeof *my_info);
109 
110 	my_info->line_no = 1;
111 	my_info->col_no = 0;
112 	my_info->prev_line_col_no = 0;
113 	my_info->prev_token_col_no = 0;
114 	my_info->fname = path;
115 
116 	my_info->fd = fopen(path, mode);
117 	if (NULL == my_info->fd) {
118 		free(my_info);
119 		return NULL;
120 	}
121 	return my_info;
122 }
123 
124 int
125 FGETC(
126 	struct FILE_INFO *stream
127 	)
128 {
129 	int ch;
130 
131 	do
132 		ch = fgetc(stream->fd);
133 	while (EOF != ch && (CHAR_MIN > ch || ch > CHAR_MAX));
134 
135 	if (EOF != ch) {
136 		if (input_from_file)
137 			conf_file_sum += (u_char)ch;
138 		++stream->col_no;
139 		if (ch == '\n') {
140 			stream->prev_line_col_no = stream->col_no;
141 			++stream->line_no;
142 			stream->col_no = 1;
143 		}
144 	}
145 
146 	return ch;
147 }
148 
149 /* BUGS: 1. Function will fail on more than one line of pushback
150  *       2. No error checking is done to see if ungetc fails
151  * SK: I don't think its worth fixing these bugs for our purposes ;-)
152  */
153 int
154 UNGETC(
155 	int ch,
156 	struct FILE_INFO *stream
157 	)
158 {
159 	if (input_from_file)
160 		conf_file_sum -= (u_char)ch;
161 	if (ch == '\n') {
162 		stream->col_no = stream->prev_line_col_no;
163 		stream->prev_line_col_no = -1;
164 		--stream->line_no;
165 	}
166 	--stream->col_no;
167 	return ungetc(ch, stream->fd);
168 }
169 
170 int
171 FCLOSE(
172 	struct FILE_INFO *stream
173 	)
174 {
175 	int ret_val = fclose(stream->fd);
176 
177 	if (!ret_val)
178 		free(stream);
179 	return ret_val;
180 }
181 
182 /* STREAM INTERFACE
183  * ----------------
184  * Provide a wrapper for the stream functions so that the
185  * stream can either read from a file or from a character
186  * array.
187  * NOTE: This is not very efficient for reading from character
188  * arrays, but needed to allow remote configuration where the
189  * configuration command is provided through ntpq.
190  *
191  * The behavior of there two functions is determined by the
192  * input_from_file flag.
193  */
194 
195 int
196 get_next_char(
197 	void
198 	)
199 {
200 	char ch;
201 
202 	if (input_from_file)
203 		return FGETC(ip_file);
204 	else {
205 		if (remote_config.buffer[remote_config.pos] == '\0')
206 			return EOF;
207 		else {
208 			ip_file->col_no++;
209 			ch = remote_config.buffer[remote_config.pos++];
210 			if (ch == '\n') {
211 				ip_file->prev_line_col_no = ip_file->col_no;
212 				++ip_file->line_no;
213 				ip_file->col_no = 1;
214 			}
215 			return ch;
216 		}
217 	}
218 }
219 
220 void
221 push_back_char(
222 	int ch
223 	)
224 {
225 	if (input_from_file)
226 		UNGETC(ch, ip_file);
227 	else {
228 		if (ch == '\n') {
229 			ip_file->col_no = ip_file->prev_line_col_no;
230 			ip_file->prev_line_col_no = -1;
231 			--ip_file->line_no;
232 		}
233 		--ip_file->col_no;
234 
235 		remote_config.pos--;
236 	}
237 }
238 
239 
240 
241 /* STATE MACHINES
242  * --------------
243  */
244 
245 /* Keywords */
246 static int
247 is_keyword(
248 	char *lexeme,
249 	follby *pfollowedby
250 	)
251 {
252 	follby fb;
253 	int curr_s;		/* current state index */
254 	int token;
255 	int i;
256 
257 	curr_s = SCANNER_INIT_S;
258 	token = 0;
259 
260 	for (i = 0; lexeme[i]; i++) {
261 		while (curr_s && (lexeme[i] != SS_CH(sst[curr_s])))
262 			curr_s = SS_OTHER_N(sst[curr_s]);
263 
264 		if (curr_s && (lexeme[i] == SS_CH(sst[curr_s]))) {
265 			if ('\0' == lexeme[i + 1]
266 			    && FOLLBY_NON_ACCEPTING
267 			       != SS_FB(sst[curr_s])) {
268 				fb = SS_FB(sst[curr_s]);
269 				*pfollowedby = fb;
270 				token = curr_s;
271 				break;
272 			}
273 			curr_s = SS_MATCH_N(sst[curr_s]);
274 		} else
275 			break;
276 	}
277 
278 	return token;
279 }
280 
281 
282 /* Integer */
283 static int
284 is_integer(
285 	char *lexeme
286 	)
287 {
288 	int	i;
289 	int	is_neg;
290 	u_int	u_val;
291 
292 	i = 0;
293 
294 	/* Allow a leading minus sign */
295 	if (lexeme[i] == '-') {
296 		i++;
297 		is_neg = TRUE;
298 	} else {
299 		is_neg = FALSE;
300 	}
301 
302 	/* Check that all the remaining characters are digits */
303 	for (; lexeme[i] != '\0'; i++) {
304 		if (!isdigit((unsigned char)lexeme[i]))
305 			return FALSE;
306 	}
307 
308 	if (is_neg)
309 		return TRUE;
310 
311 	/* Reject numbers that fit in unsigned but not in signed int */
312 	if (1 == sscanf(lexeme, "%u", &u_val))
313 		return (u_val <= INT_MAX);
314 	else
315 		return FALSE;
316 }
317 
318 
319 /* U_int -- assumes is_integer() has returned FALSE */
320 static int
321 is_u_int(
322 	char *lexeme
323 	)
324 {
325 	int	i;
326 	int	is_hex;
327 
328 	i = 0;
329 	if ('0' == lexeme[i] && 'x' == tolower((unsigned char)lexeme[i + 1])) {
330 		i += 2;
331 		is_hex = TRUE;
332 	} else {
333 		is_hex = FALSE;
334 	}
335 
336 	/* Check that all the remaining characters are digits */
337 	for (; lexeme[i] != '\0'; i++) {
338 		if (is_hex && !isxdigit((unsigned char)lexeme[i]))
339 			return FALSE;
340 		if (!is_hex && !isdigit((unsigned char)lexeme[i]))
341 			return FALSE;
342 	}
343 
344 	return TRUE;
345 }
346 
347 
348 /* Double */
349 static int
350 is_double(
351 	char *lexeme
352 	)
353 {
354 	u_int num_digits = 0;  /* Number of digits read */
355 	u_int i;
356 
357 	i = 0;
358 
359 	/* Check for an optional '+' or '-' */
360 	if ('+' == lexeme[i] || '-' == lexeme[i])
361 		i++;
362 
363 	/* Read the integer part */
364 	for (; lexeme[i] && isdigit((unsigned char)lexeme[i]); i++)
365 		num_digits++;
366 
367 	/* Check for the optional decimal point */
368 	if ('.' == lexeme[i]) {
369 		i++;
370 		/* Check for any digits after the decimal point */
371 		for (; lexeme[i] && isdigit((unsigned char)lexeme[i]); i++)
372 			num_digits++;
373 	}
374 
375 	/*
376 	 * The number of digits in both the decimal part and the
377 	 * fraction part must not be zero at this point
378 	 */
379 	if (!num_digits)
380 		return 0;
381 
382 	/* Check if we are done */
383 	if (!lexeme[i])
384 		return 1;
385 
386 	/* There is still more input, read the exponent */
387 	if ('e' == tolower((unsigned char)lexeme[i]))
388 		i++;
389 	else
390 		return 0;
391 
392 	/* Read an optional Sign */
393 	if ('+' == lexeme[i] || '-' == lexeme[i])
394 		i++;
395 
396 	/* Now read the exponent part */
397 	while (lexeme[i] && isdigit((unsigned char)lexeme[i]))
398 		i++;
399 
400 	/* Check if we are done */
401 	if (!lexeme[i])
402 		return 1;
403 	else
404 		return 0;
405 }
406 
407 
408 /* is_special() - Test whether a character is a token */
409 static inline int
410 is_special(
411 	int ch
412 	)
413 {
414 	return strchr(special_chars, ch) != NULL;
415 }
416 
417 
418 static int
419 is_EOC(
420 	int ch
421 	)
422 {
423 	if ((old_config_style && (ch == '\n')) ||
424 	    (!old_config_style && (ch == ';')))
425 		return 1;
426 	return 0;
427 }
428 
429 
430 char *
431 quote_if_needed(char *str)
432 {
433 	char *ret;
434 	size_t len;
435 	size_t octets;
436 
437 	len = strlen(str);
438 	octets = len + 2 + 1;
439 	ret = emalloc(octets);
440 	if ('"' != str[0]
441 	    && (strcspn(str, special_chars) < len
442 		|| strchr(str, ' ') != NULL)) {
443 		snprintf(ret, octets, "\"%s\"", str);
444 	} else
445 		strlcpy(ret, str, octets);
446 
447 	return ret;
448 }
449 
450 
451 static int
452 create_string_token(
453 	char *lexeme
454 	)
455 {
456 	char *pch;
457 
458 	/*
459 	 * ignore end of line whitespace
460 	 */
461 	pch = lexeme;
462 	while (*pch && isspace((unsigned char)*pch))
463 		pch++;
464 
465 	if (!*pch) {
466 		yylval.Integer = T_EOC;
467 		return yylval.Integer;
468 	}
469 
470 	yylval.String = estrdup(lexeme);
471 	return T_String;
472 }
473 
474 
475 /*
476  * yylex() - function that does the actual scanning.
477  * Bison expects this function to be called yylex and for it to take no
478  * input and return an int.
479  * Conceptually yylex "returns" yylval as well as the actual return
480  * value representing the token or type.
481  */
482 int
483 yylex(
484 	void
485 	)
486 {
487 	static follby	followedby = FOLLBY_TOKEN;
488 	size_t		i;
489 	int		instring;
490 	int		yylval_was_set;
491 	int		converted;
492 	int		token;		/* The return value */
493 	int		ch;
494 
495 	instring = FALSE;
496 	yylval_was_set = FALSE;
497 
498 	do {
499 		/* Ignore whitespace at the beginning */
500 		while (EOF != (ch = get_next_char()) &&
501 		       isspace(ch) &&
502 		       !is_EOC(ch))
503 			; /* Null Statement */
504 
505 		if (EOF == ch) {
506 
507 			if (!input_from_file || curr_include_level <= 0)
508 				return 0;
509 
510 			FCLOSE(fp[curr_include_level]);
511 			ip_file = fp[--curr_include_level];
512 			token = T_EOC;
513 			goto normal_return;
514 
515 		} else if (is_EOC(ch)) {
516 
517 			/* end FOLLBY_STRINGS_TO_EOC effect */
518 			followedby = FOLLBY_TOKEN;
519 			token = T_EOC;
520 			goto normal_return;
521 
522 		} else if (is_special(ch) && FOLLBY_TOKEN == followedby) {
523 			/* special chars are their own token values */
524 			token = ch;
525 			/*
526 			 * '=' outside simulator configuration implies
527 			 * a single string following as in:
528 			 * setvar Owner = "The Boss" default
529 			 */
530 			if ('=' == ch && old_config_style)
531 				followedby = FOLLBY_STRING;
532 			yytext[0] = (char)ch;
533 			yytext[1] = '\0';
534 			goto normal_return;
535 		} else
536 			push_back_char(ch);
537 
538 		/* save the position of start of the token */
539 		ip_file->prev_token_line_no = ip_file->line_no;
540 		ip_file->prev_token_col_no = ip_file->col_no;
541 
542 		/* Read in the lexeme */
543 		i = 0;
544 		while (EOF != (ch = get_next_char())) {
545 
546 			yytext[i] = (char)ch;
547 
548 			/* Break on whitespace or a special character */
549 			if (isspace(ch) || is_EOC(ch)
550 			    || '"' == ch
551 			    || (FOLLBY_TOKEN == followedby
552 				&& is_special(ch)))
553 				break;
554 
555 			/* Read the rest of the line on reading a start
556 			   of comment character */
557 			if ('#' == ch) {
558 				while (EOF != (ch = get_next_char())
559 				       && '\n' != ch)
560 					; /* Null Statement */
561 				break;
562 			}
563 
564 			i++;
565 			if (i >= COUNTOF(yytext))
566 				goto lex_too_long;
567 		}
568 		/* Pick up all of the string inside between " marks, to
569 		 * end of line.  If we make it to EOL without a
570 		 * terminating " assume it for them.
571 		 *
572 		 * XXX - HMS: I'm not sure we want to assume the closing "
573 		 */
574 		if ('"' == ch) {
575 			instring = TRUE;
576 			while (EOF != (ch = get_next_char()) &&
577 			       ch != '"' && ch != '\n') {
578 				yytext[i++] = (char)ch;
579 				if (i >= COUNTOF(yytext))
580 					goto lex_too_long;
581 			}
582 			/*
583 			 * yytext[i] will be pushed back as not part of
584 			 * this lexeme, but any closing quote should
585 			 * not be pushed back, so we read another char.
586 			 */
587 			if ('"' == ch)
588 				ch = get_next_char();
589 		}
590 		/* Pushback the last character read that is not a part
591 		 * of this lexeme.
592 		 * If the last character read was an EOF, pushback a
593 		 * newline character. This is to prevent a parse error
594 		 * when there is no newline at the end of a file.
595 		 */
596 		if (EOF == ch)
597 			push_back_char('\n');
598 		else
599 			push_back_char(ch);
600 		yytext[i] = '\0';
601 	} while (i == 0);
602 
603 	/* Now return the desired token */
604 
605 	/* First make sure that the parser is *not* expecting a string
606 	 * as the next token (based on the previous token that was
607 	 * returned) and that we haven't read a string.
608 	 */
609 
610 	if (followedby == FOLLBY_TOKEN && !instring) {
611 		token = is_keyword(yytext, &followedby);
612 		if (token) {
613 			/*
614 			 * T_Server is exceptional as it forces the
615 			 * following token to be a string in the
616 			 * non-simulator parts of the configuration,
617 			 * but in the simulator configuration section,
618 			 * "server" is followed by "=" which must be
619 			 * recognized as a token not a string.
620 			 */
621 			if (T_Server == token && !old_config_style)
622 				followedby = FOLLBY_TOKEN;
623 			goto normal_return;
624 		} else if (is_integer(yytext)) {
625 			yylval_was_set = TRUE;
626 			errno = 0;
627 			if ((yylval.Integer = strtol(yytext, NULL, 10)) == 0
628 			    && ((errno == EINVAL) || (errno == ERANGE))) {
629 				msyslog(LOG_ERR,
630 					"Integer cannot be represented: %s",
631 					yytext);
632 				if (input_from_file) {
633 					exit(1);
634 				} else {
635 					/* force end of parsing */
636 					yylval.Integer = 0;
637 					return 0;
638 				}
639 			}
640 			token = T_Integer;
641 			goto normal_return;
642 		} else if (is_u_int(yytext)) {
643 			yylval_was_set = TRUE;
644 			if ('0' == yytext[0] &&
645 			    'x' == tolower((unsigned char)yytext[1]))
646 				converted = sscanf(&yytext[2], "%x",
647 						   &yylval.U_int);
648 			else
649 				converted = sscanf(yytext, "%u",
650 						   &yylval.U_int);
651 			if (1 != converted) {
652 				msyslog(LOG_ERR,
653 					"U_int cannot be represented: %s",
654 					yytext);
655 				if (input_from_file) {
656 					exit(1);
657 				} else {
658 					/* force end of parsing */
659 					yylval.Integer = 0;
660 					return 0;
661 				}
662 			}
663 			token = T_U_int;
664 			goto normal_return;
665 		} else if (is_double(yytext)) {
666 			yylval_was_set = TRUE;
667 			errno = 0;
668 			if ((yylval.Double = atof(yytext)) == 0 && errno == ERANGE) {
669 				msyslog(LOG_ERR,
670 					"Double too large to represent: %s",
671 					yytext);
672 				exit(1);
673 			} else {
674 				token = T_Double;
675 				goto normal_return;
676 			}
677 		} else {
678 			/* Default: Everything is a string */
679 			yylval_was_set = TRUE;
680 			token = create_string_token(yytext);
681 			goto normal_return;
682 		}
683 	}
684 
685 	/*
686 	 * Either followedby is not FOLLBY_TOKEN or this lexeme is part
687 	 * of a string.  Hence, we need to return T_String.
688 	 *
689 	 * _Except_ we might have a -4 or -6 flag on a an association
690 	 * configuration line (server, peer, pool, etc.).
691 	 *
692 	 * This is a terrible hack, but the grammar is ambiguous so we
693 	 * don't have a choice.  [SK]
694 	 *
695 	 * The ambiguity is in the keyword scanner, not ntp_parser.y.
696 	 * We do not require server addresses be quoted in ntp.conf,
697 	 * complicating the scanner's job.  To avoid trying (and
698 	 * failing) to match an IP address or DNS name to a keyword,
699 	 * the association keywords use FOLLBY_STRING in the keyword
700 	 * table, which tells the scanner to force the next token to be
701 	 * a T_String, so it does not try to match a keyword but rather
702 	 * expects a string when -4/-6 modifiers to server, peer, etc.
703 	 * are encountered.
704 	 * restrict -4 and restrict -6 parsing works correctly without
705 	 * this hack, as restrict uses FOLLBY_TOKEN.  [DH]
706 	 */
707 	if ('-' == yytext[0]) {
708 		if ('4' == yytext[1]) {
709 			token = T_Ipv4_flag;
710 			goto normal_return;
711 		} else if ('6' == yytext[1]) {
712 			token = T_Ipv6_flag;
713 			goto normal_return;
714 		}
715 	}
716 
717 	instring = FALSE;
718 	if (FOLLBY_STRING == followedby)
719 		followedby = FOLLBY_TOKEN;
720 
721 	yylval_was_set = TRUE;
722 	token = create_string_token(yytext);
723 
724 normal_return:
725 	if (T_EOC == token)
726 		DPRINTF(4,("\t<end of command>\n"));
727 	else
728 		DPRINTF(4, ("yylex: lexeme '%s' -> %s\n", yytext,
729 			    token_name(token)));
730 
731 	if (!yylval_was_set)
732 		yylval.Integer = token;
733 
734 	return token;
735 
736 lex_too_long:
737 	yytext[min(sizeof(yytext) - 1, 50)] = 0;
738 	msyslog(LOG_ERR,
739 		"configuration item on line %d longer than limit of %lu, began with '%s'",
740 		ip_file->line_no, (u_long)min(sizeof(yytext) - 1, 50),
741 		yytext);
742 
743 	/*
744 	 * If we hit the length limit reading the startup configuration
745 	 * file, abort.
746 	 */
747 	if (input_from_file)
748 		exit(sizeof(yytext) - 1);
749 
750 	/*
751 	 * If it's runtime configuration via ntpq :config treat it as
752 	 * if the configuration text ended before the too-long lexeme,
753 	 * hostname, or string.
754 	 */
755 	yylval.Integer = 0;
756 	return 0;
757 }
758