1 /* $NetBSD: ntp_scanner.c,v 1.4 2012/02/01 07:46:22 kardel Exp $ */ 2 3 4 /* ntp_scanner.c 5 * 6 * The source code for a simple lexical analyzer. 7 * 8 * Written By: Sachin Kamboj 9 * University of Delaware 10 * Newark, DE 19711 11 * Copyright (c) 2006 12 */ 13 14 #ifdef HAVE_CONFIG_H 15 # include <config.h> 16 #endif 17 18 #include <stdio.h> 19 #include <ctype.h> 20 #include <stdlib.h> 21 #include <errno.h> 22 #include <string.h> 23 24 #include "ntp_config.h" 25 #include "ntpsim.h" 26 #include "ntp_scanner.h" 27 #include "ntp_parser.h" 28 #include "ntp_debug.h" 29 30 /* ntp_keyword.h declares finite state machine and token text */ 31 #include "ntp_keyword.h" 32 33 34 35 /* SCANNER GLOBAL VARIABLES 36 * ------------------------ 37 */ 38 39 #define MAX_LEXEME (1024 + 1) /* The maximum size of a lexeme */ 40 char yytext[MAX_LEXEME]; /* Buffer for storing the input text/lexeme */ 41 extern int input_from_file; 42 43 44 45 46 /* CONSTANTS 47 * --------- 48 */ 49 50 51 /* SCANNER GLOBAL VARIABLES 52 * ------------------------ 53 */ 54 const char special_chars[] = "{}(),;|="; 55 56 57 /* FUNCTIONS 58 * --------- 59 */ 60 61 int get_next_char(void); 62 static int is_keyword(char *lexeme, follby *pfollowedby); 63 64 65 66 /* 67 * keyword() - Return the keyword associated with token T_ identifier. 68 * See also token_name() for the string-ized T_ identifier. 69 * Example: keyword(T_Server) returns "server" 70 * token_name(T_Server) returns "T_Server" 71 */ 72 const char * 73 keyword( 74 int token 75 ) 76 { 77 size_t i; 78 const char *text; 79 80 i = token - LOWEST_KEYWORD_ID; 81 82 if (i < COUNTOF(keyword_text)) 83 text = keyword_text[i]; 84 else 85 text = NULL; 86 87 return (text != NULL) 88 ? text 89 : "(keyword not found)"; 90 } 91 92 93 /* FILE INTERFACE 94 * -------------- 95 * We define a couple of wrapper functions around the standard C fgetc 96 * and ungetc functions in order to include positional bookkeeping 97 */ 98 99 struct FILE_INFO * 100 F_OPEN( 101 const char *path, 102 const char *mode 103 ) 104 { 105 struct FILE_INFO *my_info; 106 107 my_info = emalloc(sizeof *my_info); 108 109 my_info->line_no = 1; 110 my_info->col_no = 0; 111 my_info->prev_line_col_no = 0; 112 my_info->prev_token_col_no = 0; 113 my_info->fname = path; 114 115 my_info->fd = fopen(path, mode); 116 if (NULL == my_info->fd) { 117 free(my_info); 118 return NULL; 119 } 120 return my_info; 121 } 122 123 int 124 FGETC( 125 struct FILE_INFO *stream 126 ) 127 { 128 int ch = fgetc(stream->fd); 129 130 ++stream->col_no; 131 if (ch == '\n') { 132 stream->prev_line_col_no = stream->col_no; 133 ++stream->line_no; 134 stream->col_no = 1; 135 } 136 return ch; 137 } 138 139 /* BUGS: 1. Function will fail on more than one line of pushback 140 * 2. No error checking is done to see if ungetc fails 141 * SK: I don't think its worth fixing these bugs for our purposes ;-) 142 */ 143 int 144 UNGETC( 145 int ch, 146 struct FILE_INFO *stream 147 ) 148 { 149 if (ch == '\n') { 150 stream->col_no = stream->prev_line_col_no; 151 stream->prev_line_col_no = -1; 152 --stream->line_no; 153 } 154 --stream->col_no; 155 return ungetc(ch, stream->fd); 156 } 157 158 int 159 FCLOSE( 160 struct FILE_INFO *stream 161 ) 162 { 163 int ret_val = fclose(stream->fd); 164 165 if (!ret_val) 166 free(stream); 167 return ret_val; 168 } 169 170 /* STREAM INTERFACE 171 * ---------------- 172 * Provide a wrapper for the stream functions so that the 173 * stream can either read from a file or from a character 174 * array. 175 * NOTE: This is not very efficient for reading from character 176 * arrays, but needed to allow remote configuration where the 177 * configuration command is provided through ntpq. 178 * 179 * The behavior of there two functions is determined by the 180 * input_from_file flag. 181 */ 182 183 int 184 get_next_char( 185 void 186 ) 187 { 188 char ch; 189 190 if (input_from_file) 191 return FGETC(ip_file); 192 else { 193 if (remote_config.buffer[remote_config.pos] == '\0') 194 return EOF; 195 else { 196 ip_file->col_no++; 197 ch = remote_config.buffer[remote_config.pos++]; 198 if (ch == '\n') { 199 ip_file->prev_line_col_no = ip_file->col_no; 200 ++ip_file->line_no; 201 ip_file->col_no = 1; 202 } 203 return ch; 204 } 205 } 206 } 207 208 void 209 push_back_char( 210 int ch 211 ) 212 { 213 if (input_from_file) 214 UNGETC(ch, ip_file); 215 else { 216 if (ch == '\n') { 217 ip_file->col_no = ip_file->prev_line_col_no; 218 ip_file->prev_line_col_no = -1; 219 --ip_file->line_no; 220 } 221 --ip_file->col_no; 222 223 remote_config.pos--; 224 } 225 } 226 227 228 229 /* STATE MACHINES 230 * -------------- 231 */ 232 233 /* Keywords */ 234 static int 235 is_keyword( 236 char *lexeme, 237 follby *pfollowedby 238 ) 239 { 240 follby fb; 241 int curr_s; /* current state index */ 242 int token; 243 int i; 244 245 curr_s = SCANNER_INIT_S; 246 token = 0; 247 248 for (i = 0; lexeme[i]; i++) { 249 while (curr_s && (lexeme[i] != SS_CH(sst[curr_s]))) 250 curr_s = SS_OTHER_N(sst[curr_s]); 251 252 if (curr_s && (lexeme[i] == SS_CH(sst[curr_s]))) { 253 if ('\0' == lexeme[i + 1] 254 && FOLLBY_NON_ACCEPTING 255 != SS_FB(sst[curr_s])) { 256 fb = SS_FB(sst[curr_s]); 257 *pfollowedby = fb; 258 token = curr_s; 259 break; 260 } 261 curr_s = SS_MATCH_N(sst[curr_s]); 262 } else 263 break; 264 } 265 266 return token; 267 } 268 269 270 /* Integer */ 271 static int 272 is_integer( 273 char *lexeme 274 ) 275 { 276 int i = 0; 277 278 /* Allow a leading minus sign */ 279 if (lexeme[i] == '-') 280 ++i; 281 282 /* Check that all the remaining characters are digits */ 283 for (; lexeme[i]; ++i) { 284 if (!isdigit((unsigned char)lexeme[i])) 285 return 0; 286 } 287 return 1; 288 } 289 290 291 /* Double */ 292 static int 293 is_double( 294 char *lexeme 295 ) 296 { 297 u_int num_digits = 0; /* Number of digits read */ 298 u_int i; 299 300 i = 0; 301 302 /* Check for an optional '+' or '-' */ 303 if ('+' == lexeme[i] || '-' == lexeme[i]) 304 i++; 305 306 /* Read the integer part */ 307 for (; lexeme[i] && isdigit((unsigned char)lexeme[i]); i++) 308 num_digits++; 309 310 /* Check for the required decimal point */ 311 if ('.' == lexeme[i]) 312 i++; 313 else 314 return 0; 315 316 /* Check for any digits after the decimal point */ 317 for (; lexeme[i] && isdigit((unsigned char)lexeme[i]); i++) 318 num_digits++; 319 320 /* 321 * The number of digits in both the decimal part and the 322 * fraction part must not be zero at this point 323 */ 324 if (!num_digits) 325 return 0; 326 327 /* Check if we are done */ 328 if (!lexeme[i]) 329 return 1; 330 331 /* There is still more input, read the exponent */ 332 if ('e' == tolower((unsigned char)lexeme[i])) 333 i++; 334 else 335 return 0; 336 337 /* Read an optional Sign */ 338 if ('+' == lexeme[i] || '-' == lexeme[i]) 339 i++; 340 341 /* Now read the exponent part */ 342 while (lexeme[i] && isdigit((unsigned char)lexeme[i])) 343 i++; 344 345 /* Check if we are done */ 346 if (!lexeme[i]) 347 return 1; 348 else 349 return 0; 350 } 351 352 353 /* is_special() - Test whether a character is a token */ 354 static inline int 355 is_special( 356 int ch 357 ) 358 { 359 return strchr(special_chars, ch) != NULL; 360 } 361 362 363 static int 364 is_EOC( 365 int ch 366 ) 367 { 368 if ((old_config_style && (ch == '\n')) || 369 (!old_config_style && (ch == ';'))) 370 return 1; 371 return 0; 372 } 373 374 375 char * 376 quote_if_needed(char *str) 377 { 378 char *ret; 379 size_t len; 380 size_t octets; 381 382 len = strlen(str); 383 octets = len + 2 + 1; 384 ret = emalloc(octets); 385 if ('"' != str[0] 386 && (strcspn(str, special_chars) < len 387 || strchr(str, ' ') != NULL)) { 388 snprintf(ret, octets, "\"%s\"", str); 389 } else 390 strncpy(ret, str, octets); 391 392 return ret; 393 } 394 395 396 static int 397 create_string_token( 398 char *lexeme 399 ) 400 { 401 char *pch; 402 403 /* 404 * ignore end of line whitespace 405 */ 406 pch = lexeme; 407 while (*pch && isspace((unsigned char)*pch)) 408 pch++; 409 410 if (!*pch) { 411 yylval.Integer = T_EOC; 412 return yylval.Integer; 413 } 414 415 yylval.String = estrdup(lexeme); 416 return T_String; 417 } 418 419 420 /* 421 * yylex() - function that does the actual scanning. 422 * Bison expects this function to be called yylex and for it to take no 423 * input and return an int. 424 * Conceptually yylex "returns" yylval as well as the actual return 425 * value representing the token or type. 426 */ 427 int 428 yylex( 429 void 430 ) 431 { 432 size_t i; 433 int instring = 0; 434 int yylval_was_set = 0; 435 int token; /* The return value/the recognized token */ 436 int ch; 437 static follby followedby = FOLLBY_TOKEN; 438 439 do { 440 /* Ignore whitespace at the beginning */ 441 while (EOF != (ch = get_next_char()) && 442 isspace(ch) && 443 !is_EOC(ch)) 444 ; /* Null Statement */ 445 446 if (EOF == ch) { 447 448 if (!input_from_file || !curr_include_level) 449 return 0; 450 451 FCLOSE(fp[curr_include_level]); 452 ip_file = fp[--curr_include_level]; 453 token = T_EOC; 454 goto normal_return; 455 456 } else if (is_EOC(ch)) { 457 458 /* end FOLLBY_STRINGS_TO_EOC effect */ 459 followedby = FOLLBY_TOKEN; 460 token = T_EOC; 461 goto normal_return; 462 463 } else if (is_special(ch) && FOLLBY_TOKEN == followedby) { 464 /* special chars are their own token values */ 465 token = ch; 466 /* 467 * '=' implies a single string following as in: 468 * setvar Owner = "The Boss" default 469 * This could alternatively be handled by 470 * removing '=' from special_chars and adding 471 * it to the keyword table. 472 */ 473 if ('=' == ch) 474 followedby = FOLLBY_STRING; 475 yytext[0] = (char)ch; 476 yytext[1] = '\0'; 477 goto normal_return; 478 } else 479 push_back_char(ch); 480 481 /* save the position of start of the token */ 482 ip_file->prev_token_line_no = ip_file->line_no; 483 ip_file->prev_token_col_no = ip_file->col_no; 484 485 /* Read in the lexeme */ 486 i = 0; 487 while (EOF != (ch = get_next_char())) { 488 489 yytext[i] = (char)ch; 490 491 /* Break on whitespace or a special character */ 492 if (isspace(ch) || is_EOC(ch) 493 || '"' == ch 494 || (FOLLBY_TOKEN == followedby 495 && is_special(ch))) 496 break; 497 498 /* Read the rest of the line on reading a start 499 of comment character */ 500 if ('#' == ch) { 501 while (EOF != (ch = get_next_char()) 502 && '\n' != ch) 503 ; /* Null Statement */ 504 break; 505 } 506 507 i++; 508 if (i >= COUNTOF(yytext)) 509 goto lex_too_long; 510 } 511 /* Pick up all of the string inside between " marks, to 512 * end of line. If we make it to EOL without a 513 * terminating " assume it for them. 514 * 515 * XXX - HMS: I'm not sure we want to assume the closing " 516 */ 517 if ('"' == ch) { 518 instring = 1; 519 while (EOF != (ch = get_next_char()) && 520 ch != '"' && ch != '\n') { 521 yytext[i++] = (char)ch; 522 if (i >= COUNTOF(yytext)) 523 goto lex_too_long; 524 } 525 /* 526 * yytext[i] will be pushed back as not part of 527 * this lexeme, but any closing quote should 528 * not be pushed back, so we read another char. 529 */ 530 if ('"' == ch) 531 ch = get_next_char(); 532 } 533 /* Pushback the last character read that is not a part 534 * of this lexeme. 535 * If the last character read was an EOF, pushback a 536 * newline character. This is to prevent a parse error 537 * when there is no newline at the end of a file. 538 */ 539 if (EOF == ch) 540 push_back_char('\n'); 541 else 542 push_back_char(ch); 543 yytext[i] = '\0'; 544 } while (i == 0); 545 546 /* Now return the desired token */ 547 548 /* First make sure that the parser is *not* expecting a string 549 * as the next token (based on the previous token that was 550 * returned) and that we haven't read a string. 551 */ 552 553 if (followedby == FOLLBY_TOKEN && !instring) { 554 token = is_keyword(yytext, &followedby); 555 if (token) 556 goto normal_return; 557 else if (is_integer(yytext)) { 558 yylval_was_set = 1; 559 errno = 0; 560 if ((yylval.Integer = strtol(yytext, NULL, 10)) == 0 561 && ((errno == EINVAL) || (errno == ERANGE))) { 562 msyslog(LOG_ERR, 563 "Integer cannot be represented: %s", 564 yytext); 565 exit(1); 566 } else { 567 token = T_Integer; 568 goto normal_return; 569 } 570 } 571 else if (is_double(yytext)) { 572 yylval_was_set = 1; 573 errno = 0; 574 if ((yylval.Double = atof(yytext)) == 0 && errno == ERANGE) { 575 msyslog(LOG_ERR, 576 "Double too large to represent: %s", 577 yytext); 578 exit(1); 579 } else { 580 token = T_Double; 581 goto normal_return; 582 } 583 } else { 584 /* Default: Everything is a string */ 585 yylval_was_set = 1; 586 token = create_string_token(yytext); 587 goto normal_return; 588 } 589 } 590 591 /* 592 * Either followedby is not FOLLBY_TOKEN or this lexeme is part 593 * of a string. Hence, we need to return T_String. 594 * 595 * _Except_ we might have a -4 or -6 flag on a an association 596 * configuration line (server, peer, pool, etc.). 597 * 598 * This is a terrible hack, but the grammar is ambiguous so we 599 * don't have a choice. [SK] 600 * 601 * The ambiguity is in the keyword scanner, not ntp_parser.y. 602 * We do not require server addresses be quoted in ntp.conf, 603 * complicating the scanner's job. To avoid trying (and 604 * failing) to match an IP address or DNS name to a keyword, 605 * the association keywords use FOLLBY_STRING in the keyword 606 * table, which tells the scanner to force the next token to be 607 * a T_String, so it does not try to match a keyword but rather 608 * expects a string when -4/-6 modifiers to server, peer, etc. 609 * are encountered. 610 * restrict -4 and restrict -6 parsing works correctly without 611 * this hack, as restrict uses FOLLBY_TOKEN. [DH] 612 */ 613 if ('-' == yytext[0]) { 614 if ('4' == yytext[1]) { 615 token = T_Ipv4_flag; 616 goto normal_return; 617 } else if ('6' == yytext[1]) { 618 token = T_Ipv6_flag; 619 goto normal_return; 620 } 621 } 622 623 instring = 0; 624 if (FOLLBY_STRING == followedby) 625 followedby = FOLLBY_TOKEN; 626 627 yylval_was_set = 1; 628 token = create_string_token(yytext); 629 630 normal_return: 631 if (T_EOC == token) 632 DPRINTF(4,("\t<end of command>\n")); 633 else 634 DPRINTF(4, ("yylex: lexeme '%s' -> %s\n", yytext, 635 token_name(token))); 636 637 if (!yylval_was_set) 638 yylval.Integer = token; 639 640 return token; 641 642 lex_too_long: 643 yytext[min(sizeof(yytext) - 1, 50)] = 0; 644 msyslog(LOG_ERR, 645 "configuration item on line %d longer than limit of %zu, began with '%s'", 646 ip_file->line_no, sizeof(yytext) - 1, yytext); 647 648 /* 649 * If we hit the length limit reading the startup configuration 650 * file, abort. 651 */ 652 if (input_from_file) 653 exit(sizeof(yytext) - 1); 654 655 /* 656 * If it's runtime configuration via ntpq :config treat it as 657 * if the configuration text ended before the too-long lexeme, 658 * hostname, or string. 659 */ 660 yylval.Integer = 0; 661 return 0; 662 } 663