dist/ntpd/ntp_scanner.c

/*	$NetBSD: ntp_scanner.c,v 1.3 2010/12/04 23:08:35 christos Exp $	*/


/* ntp_scanner.c
 *
 * The source code for a simple lexical analyzer.
 *
 * Written By:	Sachin Kamboj
 *		University of Delaware
 *		Newark, DE 19711
 * Copyright (c) 2006
 */

#ifdef HAVE_CONFIG_H
# include <config.h>
#endif

#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>

#include "ntp_config.h"
#include "ntpsim.h"
#include "ntp_scanner.h"
#include "ntp_parser.h"
#include "ntp_debug.h"

/* ntp_keyword.h declares finite state machine and token text */
#include "ntp_keyword.h"


/* SCANNER GLOBAL VARIABLES
 * ------------------------
 */

#define MAX_LEXEME (1024 + 1)	/* The maximum size of a lexeme */
char yytext[MAX_LEXEME];	/* Buffer for storing the input text/lexeme */
extern int input_from_file;


/* CONSTANTS
 * ---------
 */


/* SCANNER GLOBAL VARIABLES
 * ------------------------
 */
const char special_chars[] = "{}(),;|=";


/* FUNCTIONS
 * ---------
 */

int get_next_char(void);
static int is_keyword(char *lexeme, follby *pfollowedby);


/*
 * keyword() - Return the keyword associated with token T_ identifier
 */
const char *
keyword(
	int token
	)
{
	size_t i;
	const char *text;

	i = token - LOWEST_KEYWORD_ID;

	if (i < COUNTOF(keyword_text))
		text = keyword_text[i];
	else
		text = NULL;

	return (text != NULL)
		   ? text
		   : "(keyword not found)";
}


/* FILE INTERFACE
 * --------------
 * We define a couple of wrapper functions around the standard C fgetc
 * and ungetc functions in order to include positional bookkeeping
 */

struct FILE_INFO *
F_OPEN(
	const char *path,
	const char *mode
	)
{
	struct FILE_INFO *my_info;

	my_info = emalloc(sizeof *my_info);

	my_info->line_no = 1;
	my_info->col_no = 0;
	my_info->prev_line_col_no = 0;
	my_info->prev_token_col_no = 0;
	my_info->fname = path;

	my_info->fd = fopen(path, mode);
	if (NULL == my_info->fd) {
		free(my_info);
		return NULL;
	}
	return my_info;
}

int
FGETC(
	struct FILE_INFO *stream
	)
{
	int ch = fgetc(stream->fd);

	++stream->col_no;
	if (ch == '\n') {
		stream->prev_line_col_no = stream->col_no;
		++stream->line_no;
		stream->col_no = 1;
	}
	return ch;
}

/* BUGS: 1. Function will fail on more than one line of pushback
 *       2. No error checking is done to see if ungetc fails
 * SK: I don't think its worth fixing these bugs for our purposes ;-)
 */
int
UNGETC(
	int ch,
	struct FILE_INFO *stream
	)
{
	if (ch == '\n') {
		stream->col_no = stream->prev_line_col_no;
		stream->prev_line_col_no = -1;
		--stream->line_no;
	}
	--stream->col_no;
	return ungetc(ch, stream->fd);
}

int
FCLOSE(
	struct FILE_INFO *stream
	)
{
	int ret_val = fclose(stream->fd);

	if (!ret_val)
		free(stream);
	return ret_val;
}

/* STREAM INTERFACE
 * ----------------
 * Provide a wrapper for the stream functions so that the
 * stream can either read from a file or from a character
 * array.
 * NOTE: This is not very efficient for reading from character
 * arrays, but needed to allow remote configuration where the
 * configuration command is provided through ntpq.
 *
 * The behavior of there two functions is determined by the
 * input_from_file flag.
 */

int
get_next_char(
	void
	)
{
	char ch;

	if (input_from_file)
		return FGETC(ip_file);
	else {
		if (remote_config.buffer[remote_config.pos] == '\0')
			return EOF;
		else {
			ip_file->col_no++;
			ch = remote_config.buffer[remote_config.pos++];
			if (ch == '\n') {
				ip_file->prev_line_col_no = ip_file->col_no;
				++ip_file->line_no;
				ip_file->col_no = 1;
			}
			return ch;
		}
	}
}

void
push_back_char(
	int ch
	)
{
	if (input_from_file)
		UNGETC(ch, ip_file);
	else {
		if (ch == '\n') {
			ip_file->col_no = ip_file->prev_line_col_no;
			ip_file->prev_line_col_no = -1;
			--ip_file->line_no;
		}
		--ip_file->col_no;

		remote_config.pos--;
	}
}


/* STATE MACHINES
 * --------------
 */

/* Keywords */
static int
is_keyword(
	char *lexeme,
	follby *pfollowedby
	)
{
	follby fb;
	int curr_s;		/* current state index */
	int token;
	int i;

	curr_s = SCANNER_INIT_S;
	token = 0;

	for (i = 0; lexeme[i]; i++) {
		while (curr_s && (lexeme[i] != SS_CH(sst[curr_s])))
			curr_s = SS_OTHER_N(sst[curr_s]);

		if (curr_s && (lexeme[i] == SS_CH(sst[curr_s]))) {
			if ('\0' == lexeme[i + 1]
			    && FOLLBY_NON_ACCEPTING
			       != SS_FB(sst[curr_s])) {
				fb = SS_FB(sst[curr_s]);
				*pfollowedby = fb;
				token = curr_s;
				break;
			}
			curr_s = SS_MATCH_N(sst[curr_s]);
		} else
			break;
	}

	return token;
}


/* Integer */
static int
is_integer(
	char *lexeme
	)
{
	int i = 0;

	/* Allow a leading minus sign */
	if (lexeme[i] == '-')
		++i;

	/* Check that all the remaining characters are digits */
	for (; lexeme[i]; ++i) {
		if (!isdigit((unsigned char)lexeme[i]))
			return 0;
	}
	return 1;
}


/* Double */
static int
is_double(
	char *lexeme
	)
{
	int num_digits = 0;  /* Number of digits read */
	int i;

	i = 0;

	/* Check for an optional '+' or '-' */
	if ('+' == lexeme[i] || '-' == lexeme[i])
		i++;

	/* Read the integer part */
	for (; lexeme[i] && isdigit((unsigned char)lexeme[i]); i++)
		num_digits++;

	/* Check for the required decimal point */
	if ('.' == lexeme[i])
		i++;
	else
		return 0;

	/* Check for any digits after the decimal point */
	for (; lexeme[i] && isdigit((unsigned char)lexeme[i]); i++)
		num_digits++;

	/*
	 * The number of digits in both the decimal part and the
	 * fraction part must not be zero at this point
	 */
	if (!num_digits)
		return 0;

	/* Check if we are done */
	if (!lexeme[i])
		return 1;

	/* There is still more input, read the exponent */
	if ('e' == tolower((unsigned char)lexeme[i]))
		i++;
	else
		return 0;

	/* Read an optional Sign */
	if ('+' == lexeme[i] || '-' == lexeme[i])
		i++;

	/* Now read the exponent part */
	while (lexeme[i] && isdigit((unsigned char)lexeme[i]))
		i++;

	/* Check if we are done */
	if (!lexeme[i])
		return 1;
	else
		return 0;
}


/* is_special() - Test whether a character is a token */
static inline int
is_special(
	int ch
	)
{
	return strchr(special_chars, ch) != NULL;
}


static int
is_EOC(
	int ch
	)
{
	if ((old_config_style && (ch == '\n')) ||
	    (!old_config_style && (ch == ';')))
		return 1;
	return 0;
}


char *
quote_if_needed(char *str)
{
	char *ret;
	size_t len;
	size_t octets;

	len = strlen(str);
	octets = len + 2 + 1;
	ret = emalloc(octets);
	if ('"' != str[0]
	    && (strcspn(str, special_chars) < len
		|| strchr(str, ' ') != NULL)) {
		snprintf(ret, octets, "\"%s\"", str);
	} else
		strncpy(ret, str, octets);

	return ret;
}


static int
create_string_token(
	char *lexeme
	)
{
	char *pch;

	/*
	 * ignore end of line whitespace
	 */
	pch = lexeme;
	while (*pch && isspace((unsigned char)*pch))
		pch++;

	if (!*pch) {
		yylval.Integer = T_EOC;
		return yylval.Integer;
	}

	yylval.String = estrdup(lexeme);
	return T_String;
}


/*
 * yylex() - function that does the actual scanning.
 * Bison expects this function to be called yylex and for it to take no
 * input and return an int.
 * Conceptually yylex "returns" yylval as well as the actual return
 * value representing the token or type.
 */
int
yylex(
	void
	)
{
	size_t i;
	int instring = 0;
	int yylval_was_set = 0;
	int token;		/* The return value/the recognized token */
	int ch;
	static follby followedby = FOLLBY_TOKEN;

	do {
		/* Ignore whitespace at the beginning */
		while (EOF != (ch = get_next_char()) &&
		       isspace(ch) &&
		       !is_EOC(ch))
			; /* Null Statement */

		if (EOF == ch) {

			if (!input_from_file || !curr_include_level)
				return 0;

			FCLOSE(fp[curr_include_level]);
			ip_file = fp[--curr_include_level];
			token = T_EOC;
			goto normal_return;

		} else if (is_EOC(ch)) {

			/* end FOLLBY_STRINGS_TO_EOC effect */
			followedby = FOLLBY_TOKEN;
			token = T_EOC;
			goto normal_return;

		} else if (is_special(ch) && FOLLBY_TOKEN == followedby) {
			/* special chars are their own token values */
			token = ch;
			/*
			 * '=' implies a single string following as in:
			 * setvar Owner = "The Boss" default
			 * This could alternatively be handled by
			 * removing '=' from special_chars and adding
			 * it to the keyword table.
			 */
			if ('=' == ch)
				followedby = FOLLBY_STRING;
			yytext[0] = (char)ch;
			yytext[1] = '\0';
			goto normal_return;
		} else
			push_back_char(ch);

		/* save the position of start of the token */
		ip_file->prev_token_line_no = ip_file->line_no;
		ip_file->prev_token_col_no = ip_file->col_no;

		/* Read in the lexeme */
		i = 0;
		while (EOF != (ch = get_next_char())) {

			yytext[i] = (char)ch;

			/* Break on whitespace or a special character */
			if (isspace(ch) || is_EOC(ch)
			    || '"' == ch
			    || (FOLLBY_TOKEN == followedby
				&& is_special(ch)))
				break;

			/* Read the rest of the line on reading a start
			   of comment character */
			if ('#' == ch) {
				while (EOF != (ch = get_next_char())
				       && '\n' != ch)
					; /* Null Statement */
				break;
			}

			i++;
			if (i >= COUNTOF(yytext))
				goto lex_too_long;
		}
		/* Pick up all of the string inside between " marks, to
		 * end of line.  If we make it to EOL without a
		 * terminating " assume it for them.
		 *
		 * XXX - HMS: I'm not sure we want to assume the closing "
		 */
		if ('"' == ch) {
			instring = 1;
			while (EOF != (ch = get_next_char()) &&
			       ch != '"' && ch != '\n') {
				yytext[i++] = (char)ch;
				if (i >= COUNTOF(yytext))
					goto lex_too_long;
			}
			/*
			 * yytext[i] will be pushed back as not part of
			 * this lexeme, but any closing quote should
			 * not be pushed back, so we read another char.
			 */
			if ('"' == ch)
				ch = get_next_char();
		}
		/* Pushback the last character read that is not a part
		 * of this lexeme.
		 * If the last character read was an EOF, pushback a
		 * newline character. This is to prevent a parse error
		 * when there is no newline at the end of a file.
		 */
		if (EOF == ch)
			push_back_char('\n');
		else
			push_back_char(ch);
		yytext[i] = '\0';
	} while (i == 0);

	/* Now return the desired token */

	/* First make sure that the parser is *not* expecting a string
	 * as the next token (based on the previous token that was
	 * returned) and that we haven't read a string.
	 */

	if (followedby == FOLLBY_TOKEN && !instring) {
		token = is_keyword(yytext, &followedby);
		if (token)
			goto normal_return;
		else if (is_integer(yytext)) {
			yylval_was_set = 1;
			errno = 0;
			if ((yylval.Integer = strtol(yytext, NULL, 10)) == 0
			    && ((errno == EINVAL) || (errno == ERANGE))) {
				msyslog(LOG_ERR,
					"Integer cannot be represented: %s",
					yytext);
				exit(1);
			} else {
				token = T_Integer;
				goto normal_return;
			}
		}
		else if (is_double(yytext)) {
			yylval_was_set = 1;
			errno = 0;
			if ((yylval.Double = atof(yytext)) == 0 && errno == ERANGE) {
				msyslog(LOG_ERR,
					"Double too large to represent: %s",
					yytext);
				exit(1);
			} else {
				token = T_Double;
				goto normal_return;
			}
		} else {
			/* Default: Everything is a string */
			yylval_was_set = 1;
			token = create_string_token(yytext);
			goto normal_return;
		}
	}

	/*
	 * Either followedby is not FOLLBY_TOKEN or this lexeme is part
	 * of a string.  Hence, we need to return T_String.
	 *
	 * _Except_ we might have a -4 or -6 flag on a an association
	 * configuration line (server, peer, pool, etc.).
	 *
	 * This is a terrible hack, but the grammar is ambiguous so we
	 * don't have a choice.  [SK]
	 *
	 * The ambiguity is in the keyword scanner, not ntp_parser.y.
	 * We do not require server addresses be quoted in ntp.conf,
	 * complicating the scanner's job.  To avoid trying (and
	 * failing) to match an IP address or DNS name to a keyword,
	 * the association keywords use FOLLBY_STRING in the keyword
	 * table, which tells the scanner to force the next token to be
	 * a T_String, so it does not try to match a keyword but rather
	 * expects a string when -4/-6 modifiers to server, peer, etc.
	 * are encountered.
	 * restrict -4 and restrict -6 parsing works correctly without
	 * this hack, as restrict uses FOLLBY_TOKEN.  [DH]
	 */
	if ('-' == yytext[0]) {
		if ('4' == yytext[1]) {
			token = T_Ipv4_flag;
			goto normal_return;
		} else if ('6' == yytext[1]) {
			token = T_Ipv6_flag;
			goto normal_return;
		}
	}

	instring = 0;
	if (FOLLBY_STRING == followedby)
		followedby = FOLLBY_TOKEN;

	yylval_was_set = 1;
	token = create_string_token(yytext);

normal_return:
	if (T_EOC == token)
		DPRINTF(4,("\t<end of command>\n"));
	else
		DPRINTF(4, ("yylex: lexeme '%s' -> %s\n", yytext,
			    token_name(token)));

	if (!yylval_was_set)
		yylval.Integer = token;

	return token;

lex_too_long:
	yytext[min(sizeof(yytext) - 1, 50)] = 0;
	msyslog(LOG_ERR,
		"configuration item on line %d longer than limit of %zu, began with '%s'",
		ip_file->line_no, sizeof(yytext) - 1, yytext);

	/*
	 * If we hit the length limit reading the startup configuration
	 * file, abort.
	 */
	if (input_from_file)
		exit(sizeof(yytext) - 1);

	/*
	 * If it's runtime configuration via ntpq :config treat it as
	 * if the configuration text ended before the too-long lexeme,
	 * hostname, or string.
	 */
	yylval.Integer = 0;
	return 0;
}