xref: /netbsd-src/external/bsd/ntp/dist/ntpd/ntp_scanner.c (revision 7788a0781fe6ff2cce37368b4578a7ade0850cb1)
1 /*	$NetBSD: ntp_scanner.c,v 1.4 2012/02/01 07:46:22 kardel Exp $	*/
2 
3 
4 /* ntp_scanner.c
5  *
6  * The source code for a simple lexical analyzer.
7  *
8  * Written By:	Sachin Kamboj
9  *		University of Delaware
10  *		Newark, DE 19711
11  * Copyright (c) 2006
12  */
13 
14 #ifdef HAVE_CONFIG_H
15 # include <config.h>
16 #endif
17 
18 #include <stdio.h>
19 #include <ctype.h>
20 #include <stdlib.h>
21 #include <errno.h>
22 #include <string.h>
23 
24 #include "ntp_config.h"
25 #include "ntpsim.h"
26 #include "ntp_scanner.h"
27 #include "ntp_parser.h"
28 #include "ntp_debug.h"
29 
30 /* ntp_keyword.h declares finite state machine and token text */
31 #include "ntp_keyword.h"
32 
33 
34 
35 /* SCANNER GLOBAL VARIABLES
36  * ------------------------
37  */
38 
39 #define MAX_LEXEME (1024 + 1)	/* The maximum size of a lexeme */
40 char yytext[MAX_LEXEME];	/* Buffer for storing the input text/lexeme */
41 extern int input_from_file;
42 
43 
44 
45 
46 /* CONSTANTS
47  * ---------
48  */
49 
50 
51 /* SCANNER GLOBAL VARIABLES
52  * ------------------------
53  */
54 const char special_chars[] = "{}(),;|=";
55 
56 
57 /* FUNCTIONS
58  * ---------
59  */
60 
61 int get_next_char(void);
62 static int is_keyword(char *lexeme, follby *pfollowedby);
63 
64 
65 
66 /*
67  * keyword() - Return the keyword associated with token T_ identifier.
68  *	       See also token_name() for the string-ized T_ identifier.
69  *	       Example: keyword(T_Server) returns "server"
70  *			token_name(T_Server) returns "T_Server"
71  */
72 const char *
73 keyword(
74 	int token
75 	)
76 {
77 	size_t i;
78 	const char *text;
79 
80 	i = token - LOWEST_KEYWORD_ID;
81 
82 	if (i < COUNTOF(keyword_text))
83 		text = keyword_text[i];
84 	else
85 		text = NULL;
86 
87 	return (text != NULL)
88 		   ? text
89 		   : "(keyword not found)";
90 }
91 
92 
93 /* FILE INTERFACE
94  * --------------
95  * We define a couple of wrapper functions around the standard C fgetc
96  * and ungetc functions in order to include positional bookkeeping
97  */
98 
99 struct FILE_INFO *
100 F_OPEN(
101 	const char *path,
102 	const char *mode
103 	)
104 {
105 	struct FILE_INFO *my_info;
106 
107 	my_info = emalloc(sizeof *my_info);
108 
109 	my_info->line_no = 1;
110 	my_info->col_no = 0;
111 	my_info->prev_line_col_no = 0;
112 	my_info->prev_token_col_no = 0;
113 	my_info->fname = path;
114 
115 	my_info->fd = fopen(path, mode);
116 	if (NULL == my_info->fd) {
117 		free(my_info);
118 		return NULL;
119 	}
120 	return my_info;
121 }
122 
123 int
124 FGETC(
125 	struct FILE_INFO *stream
126 	)
127 {
128 	int ch = fgetc(stream->fd);
129 
130 	++stream->col_no;
131 	if (ch == '\n') {
132 		stream->prev_line_col_no = stream->col_no;
133 		++stream->line_no;
134 		stream->col_no = 1;
135 	}
136 	return ch;
137 }
138 
139 /* BUGS: 1. Function will fail on more than one line of pushback
140  *       2. No error checking is done to see if ungetc fails
141  * SK: I don't think its worth fixing these bugs for our purposes ;-)
142  */
143 int
144 UNGETC(
145 	int ch,
146 	struct FILE_INFO *stream
147 	)
148 {
149 	if (ch == '\n') {
150 		stream->col_no = stream->prev_line_col_no;
151 		stream->prev_line_col_no = -1;
152 		--stream->line_no;
153 	}
154 	--stream->col_no;
155 	return ungetc(ch, stream->fd);
156 }
157 
158 int
159 FCLOSE(
160 	struct FILE_INFO *stream
161 	)
162 {
163 	int ret_val = fclose(stream->fd);
164 
165 	if (!ret_val)
166 		free(stream);
167 	return ret_val;
168 }
169 
170 /* STREAM INTERFACE
171  * ----------------
172  * Provide a wrapper for the stream functions so that the
173  * stream can either read from a file or from a character
174  * array.
175  * NOTE: This is not very efficient for reading from character
176  * arrays, but needed to allow remote configuration where the
177  * configuration command is provided through ntpq.
178  *
179  * The behavior of there two functions is determined by the
180  * input_from_file flag.
181  */
182 
183 int
184 get_next_char(
185 	void
186 	)
187 {
188 	char ch;
189 
190 	if (input_from_file)
191 		return FGETC(ip_file);
192 	else {
193 		if (remote_config.buffer[remote_config.pos] == '\0')
194 			return EOF;
195 		else {
196 			ip_file->col_no++;
197 			ch = remote_config.buffer[remote_config.pos++];
198 			if (ch == '\n') {
199 				ip_file->prev_line_col_no = ip_file->col_no;
200 				++ip_file->line_no;
201 				ip_file->col_no = 1;
202 			}
203 			return ch;
204 		}
205 	}
206 }
207 
208 void
209 push_back_char(
210 	int ch
211 	)
212 {
213 	if (input_from_file)
214 		UNGETC(ch, ip_file);
215 	else {
216 		if (ch == '\n') {
217 			ip_file->col_no = ip_file->prev_line_col_no;
218 			ip_file->prev_line_col_no = -1;
219 			--ip_file->line_no;
220 		}
221 		--ip_file->col_no;
222 
223 		remote_config.pos--;
224 	}
225 }
226 
227 
228 
229 /* STATE MACHINES
230  * --------------
231  */
232 
233 /* Keywords */
234 static int
235 is_keyword(
236 	char *lexeme,
237 	follby *pfollowedby
238 	)
239 {
240 	follby fb;
241 	int curr_s;		/* current state index */
242 	int token;
243 	int i;
244 
245 	curr_s = SCANNER_INIT_S;
246 	token = 0;
247 
248 	for (i = 0; lexeme[i]; i++) {
249 		while (curr_s && (lexeme[i] != SS_CH(sst[curr_s])))
250 			curr_s = SS_OTHER_N(sst[curr_s]);
251 
252 		if (curr_s && (lexeme[i] == SS_CH(sst[curr_s]))) {
253 			if ('\0' == lexeme[i + 1]
254 			    && FOLLBY_NON_ACCEPTING
255 			       != SS_FB(sst[curr_s])) {
256 				fb = SS_FB(sst[curr_s]);
257 				*pfollowedby = fb;
258 				token = curr_s;
259 				break;
260 			}
261 			curr_s = SS_MATCH_N(sst[curr_s]);
262 		} else
263 			break;
264 	}
265 
266 	return token;
267 }
268 
269 
270 /* Integer */
271 static int
272 is_integer(
273 	char *lexeme
274 	)
275 {
276 	int i = 0;
277 
278 	/* Allow a leading minus sign */
279 	if (lexeme[i] == '-')
280 		++i;
281 
282 	/* Check that all the remaining characters are digits */
283 	for (; lexeme[i]; ++i) {
284 		if (!isdigit((unsigned char)lexeme[i]))
285 			return 0;
286 	}
287 	return 1;
288 }
289 
290 
291 /* Double */
292 static int
293 is_double(
294 	char *lexeme
295 	)
296 {
297 	u_int num_digits = 0;  /* Number of digits read */
298 	u_int i;
299 
300 	i = 0;
301 
302 	/* Check for an optional '+' or '-' */
303 	if ('+' == lexeme[i] || '-' == lexeme[i])
304 		i++;
305 
306 	/* Read the integer part */
307 	for (; lexeme[i] && isdigit((unsigned char)lexeme[i]); i++)
308 		num_digits++;
309 
310 	/* Check for the required decimal point */
311 	if ('.' == lexeme[i])
312 		i++;
313 	else
314 		return 0;
315 
316 	/* Check for any digits after the decimal point */
317 	for (; lexeme[i] && isdigit((unsigned char)lexeme[i]); i++)
318 		num_digits++;
319 
320 	/*
321 	 * The number of digits in both the decimal part and the
322 	 * fraction part must not be zero at this point
323 	 */
324 	if (!num_digits)
325 		return 0;
326 
327 	/* Check if we are done */
328 	if (!lexeme[i])
329 		return 1;
330 
331 	/* There is still more input, read the exponent */
332 	if ('e' == tolower((unsigned char)lexeme[i]))
333 		i++;
334 	else
335 		return 0;
336 
337 	/* Read an optional Sign */
338 	if ('+' == lexeme[i] || '-' == lexeme[i])
339 		i++;
340 
341 	/* Now read the exponent part */
342 	while (lexeme[i] && isdigit((unsigned char)lexeme[i]))
343 		i++;
344 
345 	/* Check if we are done */
346 	if (!lexeme[i])
347 		return 1;
348 	else
349 		return 0;
350 }
351 
352 
353 /* is_special() - Test whether a character is a token */
354 static inline int
355 is_special(
356 	int ch
357 	)
358 {
359 	return strchr(special_chars, ch) != NULL;
360 }
361 
362 
363 static int
364 is_EOC(
365 	int ch
366 	)
367 {
368 	if ((old_config_style && (ch == '\n')) ||
369 	    (!old_config_style && (ch == ';')))
370 		return 1;
371 	return 0;
372 }
373 
374 
375 char *
376 quote_if_needed(char *str)
377 {
378 	char *ret;
379 	size_t len;
380 	size_t octets;
381 
382 	len = strlen(str);
383 	octets = len + 2 + 1;
384 	ret = emalloc(octets);
385 	if ('"' != str[0]
386 	    && (strcspn(str, special_chars) < len
387 		|| strchr(str, ' ') != NULL)) {
388 		snprintf(ret, octets, "\"%s\"", str);
389 	} else
390 		strncpy(ret, str, octets);
391 
392 	return ret;
393 }
394 
395 
396 static int
397 create_string_token(
398 	char *lexeme
399 	)
400 {
401 	char *pch;
402 
403 	/*
404 	 * ignore end of line whitespace
405 	 */
406 	pch = lexeme;
407 	while (*pch && isspace((unsigned char)*pch))
408 		pch++;
409 
410 	if (!*pch) {
411 		yylval.Integer = T_EOC;
412 		return yylval.Integer;
413 	}
414 
415 	yylval.String = estrdup(lexeme);
416 	return T_String;
417 }
418 
419 
420 /*
421  * yylex() - function that does the actual scanning.
422  * Bison expects this function to be called yylex and for it to take no
423  * input and return an int.
424  * Conceptually yylex "returns" yylval as well as the actual return
425  * value representing the token or type.
426  */
427 int
428 yylex(
429 	void
430 	)
431 {
432 	size_t i;
433 	int instring = 0;
434 	int yylval_was_set = 0;
435 	int token;		/* The return value/the recognized token */
436 	int ch;
437 	static follby followedby = FOLLBY_TOKEN;
438 
439 	do {
440 		/* Ignore whitespace at the beginning */
441 		while (EOF != (ch = get_next_char()) &&
442 		       isspace(ch) &&
443 		       !is_EOC(ch))
444 			; /* Null Statement */
445 
446 		if (EOF == ch) {
447 
448 			if (!input_from_file || !curr_include_level)
449 				return 0;
450 
451 			FCLOSE(fp[curr_include_level]);
452 			ip_file = fp[--curr_include_level];
453 			token = T_EOC;
454 			goto normal_return;
455 
456 		} else if (is_EOC(ch)) {
457 
458 			/* end FOLLBY_STRINGS_TO_EOC effect */
459 			followedby = FOLLBY_TOKEN;
460 			token = T_EOC;
461 			goto normal_return;
462 
463 		} else if (is_special(ch) && FOLLBY_TOKEN == followedby) {
464 			/* special chars are their own token values */
465 			token = ch;
466 			/*
467 			 * '=' implies a single string following as in:
468 			 * setvar Owner = "The Boss" default
469 			 * This could alternatively be handled by
470 			 * removing '=' from special_chars and adding
471 			 * it to the keyword table.
472 			 */
473 			if ('=' == ch)
474 				followedby = FOLLBY_STRING;
475 			yytext[0] = (char)ch;
476 			yytext[1] = '\0';
477 			goto normal_return;
478 		} else
479 			push_back_char(ch);
480 
481 		/* save the position of start of the token */
482 		ip_file->prev_token_line_no = ip_file->line_no;
483 		ip_file->prev_token_col_no = ip_file->col_no;
484 
485 		/* Read in the lexeme */
486 		i = 0;
487 		while (EOF != (ch = get_next_char())) {
488 
489 			yytext[i] = (char)ch;
490 
491 			/* Break on whitespace or a special character */
492 			if (isspace(ch) || is_EOC(ch)
493 			    || '"' == ch
494 			    || (FOLLBY_TOKEN == followedby
495 				&& is_special(ch)))
496 				break;
497 
498 			/* Read the rest of the line on reading a start
499 			   of comment character */
500 			if ('#' == ch) {
501 				while (EOF != (ch = get_next_char())
502 				       && '\n' != ch)
503 					; /* Null Statement */
504 				break;
505 			}
506 
507 			i++;
508 			if (i >= COUNTOF(yytext))
509 				goto lex_too_long;
510 		}
511 		/* Pick up all of the string inside between " marks, to
512 		 * end of line.  If we make it to EOL without a
513 		 * terminating " assume it for them.
514 		 *
515 		 * XXX - HMS: I'm not sure we want to assume the closing "
516 		 */
517 		if ('"' == ch) {
518 			instring = 1;
519 			while (EOF != (ch = get_next_char()) &&
520 			       ch != '"' && ch != '\n') {
521 				yytext[i++] = (char)ch;
522 				if (i >= COUNTOF(yytext))
523 					goto lex_too_long;
524 			}
525 			/*
526 			 * yytext[i] will be pushed back as not part of
527 			 * this lexeme, but any closing quote should
528 			 * not be pushed back, so we read another char.
529 			 */
530 			if ('"' == ch)
531 				ch = get_next_char();
532 		}
533 		/* Pushback the last character read that is not a part
534 		 * of this lexeme.
535 		 * If the last character read was an EOF, pushback a
536 		 * newline character. This is to prevent a parse error
537 		 * when there is no newline at the end of a file.
538 		 */
539 		if (EOF == ch)
540 			push_back_char('\n');
541 		else
542 			push_back_char(ch);
543 		yytext[i] = '\0';
544 	} while (i == 0);
545 
546 	/* Now return the desired token */
547 
548 	/* First make sure that the parser is *not* expecting a string
549 	 * as the next token (based on the previous token that was
550 	 * returned) and that we haven't read a string.
551 	 */
552 
553 	if (followedby == FOLLBY_TOKEN && !instring) {
554 		token = is_keyword(yytext, &followedby);
555 		if (token)
556 			goto normal_return;
557 		else if (is_integer(yytext)) {
558 			yylval_was_set = 1;
559 			errno = 0;
560 			if ((yylval.Integer = strtol(yytext, NULL, 10)) == 0
561 			    && ((errno == EINVAL) || (errno == ERANGE))) {
562 				msyslog(LOG_ERR,
563 					"Integer cannot be represented: %s",
564 					yytext);
565 				exit(1);
566 			} else {
567 				token = T_Integer;
568 				goto normal_return;
569 			}
570 		}
571 		else if (is_double(yytext)) {
572 			yylval_was_set = 1;
573 			errno = 0;
574 			if ((yylval.Double = atof(yytext)) == 0 && errno == ERANGE) {
575 				msyslog(LOG_ERR,
576 					"Double too large to represent: %s",
577 					yytext);
578 				exit(1);
579 			} else {
580 				token = T_Double;
581 				goto normal_return;
582 			}
583 		} else {
584 			/* Default: Everything is a string */
585 			yylval_was_set = 1;
586 			token = create_string_token(yytext);
587 			goto normal_return;
588 		}
589 	}
590 
591 	/*
592 	 * Either followedby is not FOLLBY_TOKEN or this lexeme is part
593 	 * of a string.  Hence, we need to return T_String.
594 	 *
595 	 * _Except_ we might have a -4 or -6 flag on a an association
596 	 * configuration line (server, peer, pool, etc.).
597 	 *
598 	 * This is a terrible hack, but the grammar is ambiguous so we
599 	 * don't have a choice.  [SK]
600 	 *
601 	 * The ambiguity is in the keyword scanner, not ntp_parser.y.
602 	 * We do not require server addresses be quoted in ntp.conf,
603 	 * complicating the scanner's job.  To avoid trying (and
604 	 * failing) to match an IP address or DNS name to a keyword,
605 	 * the association keywords use FOLLBY_STRING in the keyword
606 	 * table, which tells the scanner to force the next token to be
607 	 * a T_String, so it does not try to match a keyword but rather
608 	 * expects a string when -4/-6 modifiers to server, peer, etc.
609 	 * are encountered.
610 	 * restrict -4 and restrict -6 parsing works correctly without
611 	 * this hack, as restrict uses FOLLBY_TOKEN.  [DH]
612 	 */
613 	if ('-' == yytext[0]) {
614 		if ('4' == yytext[1]) {
615 			token = T_Ipv4_flag;
616 			goto normal_return;
617 		} else if ('6' == yytext[1]) {
618 			token = T_Ipv6_flag;
619 			goto normal_return;
620 		}
621 	}
622 
623 	instring = 0;
624 	if (FOLLBY_STRING == followedby)
625 		followedby = FOLLBY_TOKEN;
626 
627 	yylval_was_set = 1;
628 	token = create_string_token(yytext);
629 
630 normal_return:
631 	if (T_EOC == token)
632 		DPRINTF(4,("\t<end of command>\n"));
633 	else
634 		DPRINTF(4, ("yylex: lexeme '%s' -> %s\n", yytext,
635 			    token_name(token)));
636 
637 	if (!yylval_was_set)
638 		yylval.Integer = token;
639 
640 	return token;
641 
642 lex_too_long:
643 	yytext[min(sizeof(yytext) - 1, 50)] = 0;
644 	msyslog(LOG_ERR,
645 		"configuration item on line %d longer than limit of %zu, began with '%s'",
646 		ip_file->line_no, sizeof(yytext) - 1, yytext);
647 
648 	/*
649 	 * If we hit the length limit reading the startup configuration
650 	 * file, abort.
651 	 */
652 	if (input_from_file)
653 		exit(sizeof(yytext) - 1);
654 
655 	/*
656 	 * If it's runtime configuration via ntpq :config treat it as
657 	 * if the configuration text ended before the too-long lexeme,
658 	 * hostname, or string.
659 	 */
660 	yylval.Integer = 0;
661 	return 0;
662 }
663