1 /* $NetBSD: ntp_scanner.c,v 1.6 2013/12/30 17:41:57 christos Exp $ */ 2 3 4 /* ntp_scanner.c 5 * 6 * The source code for a simple lexical analyzer. 7 * 8 * Written By: Sachin Kamboj 9 * University of Delaware 10 * Newark, DE 19711 11 * Copyright (c) 2006 12 */ 13 14 #ifdef HAVE_CONFIG_H 15 # include <config.h> 16 #endif 17 18 #include <stdio.h> 19 #include <ctype.h> 20 #include <stdlib.h> 21 #include <errno.h> 22 #include <string.h> 23 24 #include "ntpd.h" 25 #include "ntp_config.h" 26 #include "ntpsim.h" 27 #include "ntp_scanner.h" 28 #include "ntp_parser.h" 29 30 /* ntp_keyword.h declares finite state machine and token text */ 31 #include "ntp_keyword.h" 32 33 34 35 /* SCANNER GLOBAL VARIABLES 36 * ------------------------ 37 */ 38 39 #define MAX_LEXEME (1024 + 1) /* The maximum size of a lexeme */ 40 char yytext[MAX_LEXEME]; /* Buffer for storing the input text/lexeme */ 41 u_int32 conf_file_sum; /* Simple sum of characters read */ 42 extern int input_from_file; 43 44 45 46 47 /* CONSTANTS 48 * --------- 49 */ 50 51 52 /* SCANNER GLOBAL VARIABLES 53 * ------------------------ 54 */ 55 const char special_chars[] = "{}(),;|="; 56 57 58 /* FUNCTIONS 59 * --------- 60 */ 61 62 int get_next_char(void); 63 static int is_keyword(char *lexeme, follby *pfollowedby); 64 65 66 67 /* 68 * keyword() - Return the keyword associated with token T_ identifier. 69 * See also token_name() for the string-ized T_ identifier. 70 * Example: keyword(T_Server) returns "server" 71 * token_name(T_Server) returns "T_Server" 72 */ 73 const char * 74 keyword( 75 int token 76 ) 77 { 78 size_t i; 79 const char *text; 80 81 i = token - LOWEST_KEYWORD_ID; 82 83 if (i < COUNTOF(keyword_text)) 84 text = keyword_text[i]; 85 else 86 text = NULL; 87 88 return (text != NULL) 89 ? text 90 : "(keyword not found)"; 91 } 92 93 94 /* FILE INTERFACE 95 * -------------- 96 * We define a couple of wrapper functions around the standard C fgetc 97 * and ungetc functions in order to include positional bookkeeping 98 */ 99 100 struct FILE_INFO * 101 F_OPEN( 102 const char *path, 103 const char *mode 104 ) 105 { 106 struct FILE_INFO *my_info; 107 108 my_info = emalloc(sizeof *my_info); 109 110 my_info->line_no = 1; 111 my_info->col_no = 0; 112 my_info->prev_line_col_no = 0; 113 my_info->prev_token_col_no = 0; 114 my_info->fname = path; 115 116 my_info->fd = fopen(path, mode); 117 if (NULL == my_info->fd) { 118 free(my_info); 119 return NULL; 120 } 121 return my_info; 122 } 123 124 int 125 FGETC( 126 struct FILE_INFO *stream 127 ) 128 { 129 int ch; 130 131 do 132 ch = fgetc(stream->fd); 133 while (EOF != ch && (CHAR_MIN > ch || ch > CHAR_MAX)); 134 135 if (EOF != ch) { 136 if (input_from_file) 137 conf_file_sum += (u_char)ch; 138 ++stream->col_no; 139 if (ch == '\n') { 140 stream->prev_line_col_no = stream->col_no; 141 ++stream->line_no; 142 stream->col_no = 1; 143 } 144 } 145 146 return ch; 147 } 148 149 /* BUGS: 1. Function will fail on more than one line of pushback 150 * 2. No error checking is done to see if ungetc fails 151 * SK: I don't think its worth fixing these bugs for our purposes ;-) 152 */ 153 int 154 UNGETC( 155 int ch, 156 struct FILE_INFO *stream 157 ) 158 { 159 if (input_from_file) 160 conf_file_sum -= (u_char)ch; 161 if (ch == '\n') { 162 stream->col_no = stream->prev_line_col_no; 163 stream->prev_line_col_no = -1; 164 --stream->line_no; 165 } 166 --stream->col_no; 167 return ungetc(ch, stream->fd); 168 } 169 170 int 171 FCLOSE( 172 struct FILE_INFO *stream 173 ) 174 { 175 int ret_val = fclose(stream->fd); 176 177 if (!ret_val) 178 free(stream); 179 return ret_val; 180 } 181 182 /* STREAM INTERFACE 183 * ---------------- 184 * Provide a wrapper for the stream functions so that the 185 * stream can either read from a file or from a character 186 * array. 187 * NOTE: This is not very efficient for reading from character 188 * arrays, but needed to allow remote configuration where the 189 * configuration command is provided through ntpq. 190 * 191 * The behavior of there two functions is determined by the 192 * input_from_file flag. 193 */ 194 195 int 196 get_next_char( 197 void 198 ) 199 { 200 char ch; 201 202 if (input_from_file) 203 return FGETC(ip_file); 204 else { 205 if (remote_config.buffer[remote_config.pos] == '\0') 206 return EOF; 207 else { 208 ip_file->col_no++; 209 ch = remote_config.buffer[remote_config.pos++]; 210 if (ch == '\n') { 211 ip_file->prev_line_col_no = ip_file->col_no; 212 ++ip_file->line_no; 213 ip_file->col_no = 1; 214 } 215 return ch; 216 } 217 } 218 } 219 220 void 221 push_back_char( 222 int ch 223 ) 224 { 225 if (input_from_file) 226 UNGETC(ch, ip_file); 227 else { 228 if (ch == '\n') { 229 ip_file->col_no = ip_file->prev_line_col_no; 230 ip_file->prev_line_col_no = -1; 231 --ip_file->line_no; 232 } 233 --ip_file->col_no; 234 235 remote_config.pos--; 236 } 237 } 238 239 240 241 /* STATE MACHINES 242 * -------------- 243 */ 244 245 /* Keywords */ 246 static int 247 is_keyword( 248 char *lexeme, 249 follby *pfollowedby 250 ) 251 { 252 follby fb; 253 int curr_s; /* current state index */ 254 int token; 255 int i; 256 257 curr_s = SCANNER_INIT_S; 258 token = 0; 259 260 for (i = 0; lexeme[i]; i++) { 261 while (curr_s && (lexeme[i] != SS_CH(sst[curr_s]))) 262 curr_s = SS_OTHER_N(sst[curr_s]); 263 264 if (curr_s && (lexeme[i] == SS_CH(sst[curr_s]))) { 265 if ('\0' == lexeme[i + 1] 266 && FOLLBY_NON_ACCEPTING 267 != SS_FB(sst[curr_s])) { 268 fb = SS_FB(sst[curr_s]); 269 *pfollowedby = fb; 270 token = curr_s; 271 break; 272 } 273 curr_s = SS_MATCH_N(sst[curr_s]); 274 } else 275 break; 276 } 277 278 return token; 279 } 280 281 282 /* Integer */ 283 static int 284 is_integer( 285 char *lexeme 286 ) 287 { 288 int i; 289 int is_neg; 290 u_int u_val; 291 292 i = 0; 293 294 /* Allow a leading minus sign */ 295 if (lexeme[i] == '-') { 296 i++; 297 is_neg = TRUE; 298 } else { 299 is_neg = FALSE; 300 } 301 302 /* Check that all the remaining characters are digits */ 303 for (; lexeme[i] != '\0'; i++) { 304 if (!isdigit((unsigned char)lexeme[i])) 305 return FALSE; 306 } 307 308 if (is_neg) 309 return TRUE; 310 311 /* Reject numbers that fit in unsigned but not in signed int */ 312 if (1 == sscanf(lexeme, "%u", &u_val)) 313 return (u_val <= INT_MAX); 314 else 315 return FALSE; 316 } 317 318 319 /* U_int -- assumes is_integer() has returned FALSE */ 320 static int 321 is_u_int( 322 char *lexeme 323 ) 324 { 325 int i; 326 int is_hex; 327 328 i = 0; 329 if ('0' == lexeme[i] && 'x' == tolower((unsigned char)lexeme[i + 1])) { 330 i += 2; 331 is_hex = TRUE; 332 } else { 333 is_hex = FALSE; 334 } 335 336 /* Check that all the remaining characters are digits */ 337 for (; lexeme[i] != '\0'; i++) { 338 if (is_hex && !isxdigit((unsigned char)lexeme[i])) 339 return FALSE; 340 if (!is_hex && !isdigit((unsigned char)lexeme[i])) 341 return FALSE; 342 } 343 344 return TRUE; 345 } 346 347 348 /* Double */ 349 static int 350 is_double( 351 char *lexeme 352 ) 353 { 354 u_int num_digits = 0; /* Number of digits read */ 355 u_int i; 356 357 i = 0; 358 359 /* Check for an optional '+' or '-' */ 360 if ('+' == lexeme[i] || '-' == lexeme[i]) 361 i++; 362 363 /* Read the integer part */ 364 for (; lexeme[i] && isdigit((unsigned char)lexeme[i]); i++) 365 num_digits++; 366 367 /* Check for the optional decimal point */ 368 if ('.' == lexeme[i]) { 369 i++; 370 /* Check for any digits after the decimal point */ 371 for (; lexeme[i] && isdigit((unsigned char)lexeme[i]); i++) 372 num_digits++; 373 } 374 375 /* 376 * The number of digits in both the decimal part and the 377 * fraction part must not be zero at this point 378 */ 379 if (!num_digits) 380 return 0; 381 382 /* Check if we are done */ 383 if (!lexeme[i]) 384 return 1; 385 386 /* There is still more input, read the exponent */ 387 if ('e' == tolower((unsigned char)lexeme[i])) 388 i++; 389 else 390 return 0; 391 392 /* Read an optional Sign */ 393 if ('+' == lexeme[i] || '-' == lexeme[i]) 394 i++; 395 396 /* Now read the exponent part */ 397 while (lexeme[i] && isdigit((unsigned char)lexeme[i])) 398 i++; 399 400 /* Check if we are done */ 401 if (!lexeme[i]) 402 return 1; 403 else 404 return 0; 405 } 406 407 408 /* is_special() - Test whether a character is a token */ 409 static inline int 410 is_special( 411 int ch 412 ) 413 { 414 return strchr(special_chars, ch) != NULL; 415 } 416 417 418 static int 419 is_EOC( 420 int ch 421 ) 422 { 423 if ((old_config_style && (ch == '\n')) || 424 (!old_config_style && (ch == ';'))) 425 return 1; 426 return 0; 427 } 428 429 430 char * 431 quote_if_needed(char *str) 432 { 433 char *ret; 434 size_t len; 435 size_t octets; 436 437 len = strlen(str); 438 octets = len + 2 + 1; 439 ret = emalloc(octets); 440 if ('"' != str[0] 441 && (strcspn(str, special_chars) < len 442 || strchr(str, ' ') != NULL)) { 443 snprintf(ret, octets, "\"%s\"", str); 444 } else 445 strlcpy(ret, str, octets); 446 447 return ret; 448 } 449 450 451 static int 452 create_string_token( 453 char *lexeme 454 ) 455 { 456 char *pch; 457 458 /* 459 * ignore end of line whitespace 460 */ 461 pch = lexeme; 462 while (*pch && isspace((unsigned char)*pch)) 463 pch++; 464 465 if (!*pch) { 466 yylval.Integer = T_EOC; 467 return yylval.Integer; 468 } 469 470 yylval.String = estrdup(lexeme); 471 return T_String; 472 } 473 474 475 /* 476 * yylex() - function that does the actual scanning. 477 * Bison expects this function to be called yylex and for it to take no 478 * input and return an int. 479 * Conceptually yylex "returns" yylval as well as the actual return 480 * value representing the token or type. 481 */ 482 int 483 yylex( 484 void 485 ) 486 { 487 static follby followedby = FOLLBY_TOKEN; 488 size_t i; 489 int instring; 490 int yylval_was_set; 491 int converted; 492 int token; /* The return value */ 493 int ch; 494 495 instring = FALSE; 496 yylval_was_set = FALSE; 497 498 do { 499 /* Ignore whitespace at the beginning */ 500 while (EOF != (ch = get_next_char()) && 501 isspace(ch) && 502 !is_EOC(ch)) 503 ; /* Null Statement */ 504 505 if (EOF == ch) { 506 507 if (!input_from_file || curr_include_level <= 0) 508 return 0; 509 510 FCLOSE(fp[curr_include_level]); 511 ip_file = fp[--curr_include_level]; 512 token = T_EOC; 513 goto normal_return; 514 515 } else if (is_EOC(ch)) { 516 517 /* end FOLLBY_STRINGS_TO_EOC effect */ 518 followedby = FOLLBY_TOKEN; 519 token = T_EOC; 520 goto normal_return; 521 522 } else if (is_special(ch) && FOLLBY_TOKEN == followedby) { 523 /* special chars are their own token values */ 524 token = ch; 525 /* 526 * '=' outside simulator configuration implies 527 * a single string following as in: 528 * setvar Owner = "The Boss" default 529 */ 530 if ('=' == ch && old_config_style) 531 followedby = FOLLBY_STRING; 532 yytext[0] = (char)ch; 533 yytext[1] = '\0'; 534 goto normal_return; 535 } else 536 push_back_char(ch); 537 538 /* save the position of start of the token */ 539 ip_file->prev_token_line_no = ip_file->line_no; 540 ip_file->prev_token_col_no = ip_file->col_no; 541 542 /* Read in the lexeme */ 543 i = 0; 544 while (EOF != (ch = get_next_char())) { 545 546 yytext[i] = (char)ch; 547 548 /* Break on whitespace or a special character */ 549 if (isspace(ch) || is_EOC(ch) 550 || '"' == ch 551 || (FOLLBY_TOKEN == followedby 552 && is_special(ch))) 553 break; 554 555 /* Read the rest of the line on reading a start 556 of comment character */ 557 if ('#' == ch) { 558 while (EOF != (ch = get_next_char()) 559 && '\n' != ch) 560 ; /* Null Statement */ 561 break; 562 } 563 564 i++; 565 if (i >= COUNTOF(yytext)) 566 goto lex_too_long; 567 } 568 /* Pick up all of the string inside between " marks, to 569 * end of line. If we make it to EOL without a 570 * terminating " assume it for them. 571 * 572 * XXX - HMS: I'm not sure we want to assume the closing " 573 */ 574 if ('"' == ch) { 575 instring = TRUE; 576 while (EOF != (ch = get_next_char()) && 577 ch != '"' && ch != '\n') { 578 yytext[i++] = (char)ch; 579 if (i >= COUNTOF(yytext)) 580 goto lex_too_long; 581 } 582 /* 583 * yytext[i] will be pushed back as not part of 584 * this lexeme, but any closing quote should 585 * not be pushed back, so we read another char. 586 */ 587 if ('"' == ch) 588 ch = get_next_char(); 589 } 590 /* Pushback the last character read that is not a part 591 * of this lexeme. 592 * If the last character read was an EOF, pushback a 593 * newline character. This is to prevent a parse error 594 * when there is no newline at the end of a file. 595 */ 596 if (EOF == ch) 597 push_back_char('\n'); 598 else 599 push_back_char(ch); 600 yytext[i] = '\0'; 601 } while (i == 0); 602 603 /* Now return the desired token */ 604 605 /* First make sure that the parser is *not* expecting a string 606 * as the next token (based on the previous token that was 607 * returned) and that we haven't read a string. 608 */ 609 610 if (followedby == FOLLBY_TOKEN && !instring) { 611 token = is_keyword(yytext, &followedby); 612 if (token) { 613 /* 614 * T_Server is exceptional as it forces the 615 * following token to be a string in the 616 * non-simulator parts of the configuration, 617 * but in the simulator configuration section, 618 * "server" is followed by "=" which must be 619 * recognized as a token not a string. 620 */ 621 if (T_Server == token && !old_config_style) 622 followedby = FOLLBY_TOKEN; 623 goto normal_return; 624 } else if (is_integer(yytext)) { 625 yylval_was_set = TRUE; 626 errno = 0; 627 if ((yylval.Integer = strtol(yytext, NULL, 10)) == 0 628 && ((errno == EINVAL) || (errno == ERANGE))) { 629 msyslog(LOG_ERR, 630 "Integer cannot be represented: %s", 631 yytext); 632 if (input_from_file) { 633 exit(1); 634 } else { 635 /* force end of parsing */ 636 yylval.Integer = 0; 637 return 0; 638 } 639 } 640 token = T_Integer; 641 goto normal_return; 642 } else if (is_u_int(yytext)) { 643 yylval_was_set = TRUE; 644 if ('0' == yytext[0] && 645 'x' == tolower((unsigned char)yytext[1])) 646 converted = sscanf(&yytext[2], "%x", 647 &yylval.U_int); 648 else 649 converted = sscanf(yytext, "%u", 650 &yylval.U_int); 651 if (1 != converted) { 652 msyslog(LOG_ERR, 653 "U_int cannot be represented: %s", 654 yytext); 655 if (input_from_file) { 656 exit(1); 657 } else { 658 /* force end of parsing */ 659 yylval.Integer = 0; 660 return 0; 661 } 662 } 663 token = T_U_int; 664 goto normal_return; 665 } else if (is_double(yytext)) { 666 yylval_was_set = TRUE; 667 errno = 0; 668 if ((yylval.Double = atof(yytext)) == 0 && errno == ERANGE) { 669 msyslog(LOG_ERR, 670 "Double too large to represent: %s", 671 yytext); 672 exit(1); 673 } else { 674 token = T_Double; 675 goto normal_return; 676 } 677 } else { 678 /* Default: Everything is a string */ 679 yylval_was_set = TRUE; 680 token = create_string_token(yytext); 681 goto normal_return; 682 } 683 } 684 685 /* 686 * Either followedby is not FOLLBY_TOKEN or this lexeme is part 687 * of a string. Hence, we need to return T_String. 688 * 689 * _Except_ we might have a -4 or -6 flag on a an association 690 * configuration line (server, peer, pool, etc.). 691 * 692 * This is a terrible hack, but the grammar is ambiguous so we 693 * don't have a choice. [SK] 694 * 695 * The ambiguity is in the keyword scanner, not ntp_parser.y. 696 * We do not require server addresses be quoted in ntp.conf, 697 * complicating the scanner's job. To avoid trying (and 698 * failing) to match an IP address or DNS name to a keyword, 699 * the association keywords use FOLLBY_STRING in the keyword 700 * table, which tells the scanner to force the next token to be 701 * a T_String, so it does not try to match a keyword but rather 702 * expects a string when -4/-6 modifiers to server, peer, etc. 703 * are encountered. 704 * restrict -4 and restrict -6 parsing works correctly without 705 * this hack, as restrict uses FOLLBY_TOKEN. [DH] 706 */ 707 if ('-' == yytext[0]) { 708 if ('4' == yytext[1]) { 709 token = T_Ipv4_flag; 710 goto normal_return; 711 } else if ('6' == yytext[1]) { 712 token = T_Ipv6_flag; 713 goto normal_return; 714 } 715 } 716 717 instring = FALSE; 718 if (FOLLBY_STRING == followedby) 719 followedby = FOLLBY_TOKEN; 720 721 yylval_was_set = TRUE; 722 token = create_string_token(yytext); 723 724 normal_return: 725 if (T_EOC == token) 726 DPRINTF(4,("\t<end of command>\n")); 727 else 728 DPRINTF(4, ("yylex: lexeme '%s' -> %s\n", yytext, 729 token_name(token))); 730 731 if (!yylval_was_set) 732 yylval.Integer = token; 733 734 return token; 735 736 lex_too_long: 737 yytext[min(sizeof(yytext) - 1, 50)] = 0; 738 msyslog(LOG_ERR, 739 "configuration item on line %d longer than limit of %lu, began with '%s'", 740 ip_file->line_no, (u_long)min(sizeof(yytext) - 1, 50), 741 yytext); 742 743 /* 744 * If we hit the length limit reading the startup configuration 745 * file, abort. 746 */ 747 if (input_from_file) 748 exit(sizeof(yytext) - 1); 749 750 /* 751 * If it's runtime configuration via ntpq :config treat it as 752 * if the configuration text ended before the too-long lexeme, 753 * hostname, or string. 754 */ 755 yylval.Integer = 0; 756 return 0; 757 } 758