1 /* $NetBSD: ntp_scanner.c,v 1.3 2010/12/04 23:08:35 christos Exp $ */ 2 3 4 /* ntp_scanner.c 5 * 6 * The source code for a simple lexical analyzer. 7 * 8 * Written By: Sachin Kamboj 9 * University of Delaware 10 * Newark, DE 19711 11 * Copyright (c) 2006 12 */ 13 14 #ifdef HAVE_CONFIG_H 15 # include <config.h> 16 #endif 17 18 #include <stdio.h> 19 #include <ctype.h> 20 #include <stdlib.h> 21 #include <errno.h> 22 #include <string.h> 23 24 #include "ntp_config.h" 25 #include "ntpsim.h" 26 #include "ntp_scanner.h" 27 #include "ntp_parser.h" 28 #include "ntp_debug.h" 29 30 /* ntp_keyword.h declares finite state machine and token text */ 31 #include "ntp_keyword.h" 32 33 34 35 /* SCANNER GLOBAL VARIABLES 36 * ------------------------ 37 */ 38 39 #define MAX_LEXEME (1024 + 1) /* The maximum size of a lexeme */ 40 char yytext[MAX_LEXEME]; /* Buffer for storing the input text/lexeme */ 41 extern int input_from_file; 42 43 44 45 46 /* CONSTANTS 47 * --------- 48 */ 49 50 51 /* SCANNER GLOBAL VARIABLES 52 * ------------------------ 53 */ 54 const char special_chars[] = "{}(),;|="; 55 56 57 /* FUNCTIONS 58 * --------- 59 */ 60 61 int get_next_char(void); 62 static int is_keyword(char *lexeme, follby *pfollowedby); 63 64 65 66 /* 67 * keyword() - Return the keyword associated with token T_ identifier 68 */ 69 const char * 70 keyword( 71 int token 72 ) 73 { 74 size_t i; 75 const char *text; 76 77 i = token - LOWEST_KEYWORD_ID; 78 79 if (i < COUNTOF(keyword_text)) 80 text = keyword_text[i]; 81 else 82 text = NULL; 83 84 return (text != NULL) 85 ? text 86 : "(keyword not found)"; 87 } 88 89 90 /* FILE INTERFACE 91 * -------------- 92 * We define a couple of wrapper functions around the standard C fgetc 93 * and ungetc functions in order to include positional bookkeeping 94 */ 95 96 struct FILE_INFO * 97 F_OPEN( 98 const char *path, 99 const char *mode 100 ) 101 { 102 struct FILE_INFO *my_info; 103 104 my_info = emalloc(sizeof *my_info); 105 106 my_info->line_no = 1; 107 my_info->col_no = 0; 108 my_info->prev_line_col_no = 0; 109 my_info->prev_token_col_no = 0; 110 my_info->fname = path; 111 112 my_info->fd = fopen(path, mode); 113 if (NULL == my_info->fd) { 114 free(my_info); 115 return NULL; 116 } 117 return my_info; 118 } 119 120 int 121 FGETC( 122 struct FILE_INFO *stream 123 ) 124 { 125 int ch = fgetc(stream->fd); 126 127 ++stream->col_no; 128 if (ch == '\n') { 129 stream->prev_line_col_no = stream->col_no; 130 ++stream->line_no; 131 stream->col_no = 1; 132 } 133 return ch; 134 } 135 136 /* BUGS: 1. Function will fail on more than one line of pushback 137 * 2. No error checking is done to see if ungetc fails 138 * SK: I don't think its worth fixing these bugs for our purposes ;-) 139 */ 140 int 141 UNGETC( 142 int ch, 143 struct FILE_INFO *stream 144 ) 145 { 146 if (ch == '\n') { 147 stream->col_no = stream->prev_line_col_no; 148 stream->prev_line_col_no = -1; 149 --stream->line_no; 150 } 151 --stream->col_no; 152 return ungetc(ch, stream->fd); 153 } 154 155 int 156 FCLOSE( 157 struct FILE_INFO *stream 158 ) 159 { 160 int ret_val = fclose(stream->fd); 161 162 if (!ret_val) 163 free(stream); 164 return ret_val; 165 } 166 167 /* STREAM INTERFACE 168 * ---------------- 169 * Provide a wrapper for the stream functions so that the 170 * stream can either read from a file or from a character 171 * array. 172 * NOTE: This is not very efficient for reading from character 173 * arrays, but needed to allow remote configuration where the 174 * configuration command is provided through ntpq. 175 * 176 * The behavior of there two functions is determined by the 177 * input_from_file flag. 178 */ 179 180 int 181 get_next_char( 182 void 183 ) 184 { 185 char ch; 186 187 if (input_from_file) 188 return FGETC(ip_file); 189 else { 190 if (remote_config.buffer[remote_config.pos] == '\0') 191 return EOF; 192 else { 193 ip_file->col_no++; 194 ch = remote_config.buffer[remote_config.pos++]; 195 if (ch == '\n') { 196 ip_file->prev_line_col_no = ip_file->col_no; 197 ++ip_file->line_no; 198 ip_file->col_no = 1; 199 } 200 return ch; 201 } 202 } 203 } 204 205 void 206 push_back_char( 207 int ch 208 ) 209 { 210 if (input_from_file) 211 UNGETC(ch, ip_file); 212 else { 213 if (ch == '\n') { 214 ip_file->col_no = ip_file->prev_line_col_no; 215 ip_file->prev_line_col_no = -1; 216 --ip_file->line_no; 217 } 218 --ip_file->col_no; 219 220 remote_config.pos--; 221 } 222 } 223 224 225 226 /* STATE MACHINES 227 * -------------- 228 */ 229 230 /* Keywords */ 231 static int 232 is_keyword( 233 char *lexeme, 234 follby *pfollowedby 235 ) 236 { 237 follby fb; 238 int curr_s; /* current state index */ 239 int token; 240 int i; 241 242 curr_s = SCANNER_INIT_S; 243 token = 0; 244 245 for (i = 0; lexeme[i]; i++) { 246 while (curr_s && (lexeme[i] != SS_CH(sst[curr_s]))) 247 curr_s = SS_OTHER_N(sst[curr_s]); 248 249 if (curr_s && (lexeme[i] == SS_CH(sst[curr_s]))) { 250 if ('\0' == lexeme[i + 1] 251 && FOLLBY_NON_ACCEPTING 252 != SS_FB(sst[curr_s])) { 253 fb = SS_FB(sst[curr_s]); 254 *pfollowedby = fb; 255 token = curr_s; 256 break; 257 } 258 curr_s = SS_MATCH_N(sst[curr_s]); 259 } else 260 break; 261 } 262 263 return token; 264 } 265 266 267 /* Integer */ 268 static int 269 is_integer( 270 char *lexeme 271 ) 272 { 273 int i = 0; 274 275 /* Allow a leading minus sign */ 276 if (lexeme[i] == '-') 277 ++i; 278 279 /* Check that all the remaining characters are digits */ 280 for (; lexeme[i]; ++i) { 281 if (!isdigit((unsigned char)lexeme[i])) 282 return 0; 283 } 284 return 1; 285 } 286 287 288 /* Double */ 289 static int 290 is_double( 291 char *lexeme 292 ) 293 { 294 int num_digits = 0; /* Number of digits read */ 295 int i; 296 297 i = 0; 298 299 /* Check for an optional '+' or '-' */ 300 if ('+' == lexeme[i] || '-' == lexeme[i]) 301 i++; 302 303 /* Read the integer part */ 304 for (; lexeme[i] && isdigit((unsigned char)lexeme[i]); i++) 305 num_digits++; 306 307 /* Check for the required decimal point */ 308 if ('.' == lexeme[i]) 309 i++; 310 else 311 return 0; 312 313 /* Check for any digits after the decimal point */ 314 for (; lexeme[i] && isdigit((unsigned char)lexeme[i]); i++) 315 num_digits++; 316 317 /* 318 * The number of digits in both the decimal part and the 319 * fraction part must not be zero at this point 320 */ 321 if (!num_digits) 322 return 0; 323 324 /* Check if we are done */ 325 if (!lexeme[i]) 326 return 1; 327 328 /* There is still more input, read the exponent */ 329 if ('e' == tolower((unsigned char)lexeme[i])) 330 i++; 331 else 332 return 0; 333 334 /* Read an optional Sign */ 335 if ('+' == lexeme[i] || '-' == lexeme[i]) 336 i++; 337 338 /* Now read the exponent part */ 339 while (lexeme[i] && isdigit((unsigned char)lexeme[i])) 340 i++; 341 342 /* Check if we are done */ 343 if (!lexeme[i]) 344 return 1; 345 else 346 return 0; 347 } 348 349 350 /* is_special() - Test whether a character is a token */ 351 static inline int 352 is_special( 353 int ch 354 ) 355 { 356 return strchr(special_chars, ch) != NULL; 357 } 358 359 360 static int 361 is_EOC( 362 int ch 363 ) 364 { 365 if ((old_config_style && (ch == '\n')) || 366 (!old_config_style && (ch == ';'))) 367 return 1; 368 return 0; 369 } 370 371 372 char * 373 quote_if_needed(char *str) 374 { 375 char *ret; 376 size_t len; 377 size_t octets; 378 379 len = strlen(str); 380 octets = len + 2 + 1; 381 ret = emalloc(octets); 382 if ('"' != str[0] 383 && (strcspn(str, special_chars) < len 384 || strchr(str, ' ') != NULL)) { 385 snprintf(ret, octets, "\"%s\"", str); 386 } else 387 strncpy(ret, str, octets); 388 389 return ret; 390 } 391 392 393 static int 394 create_string_token( 395 char *lexeme 396 ) 397 { 398 char *pch; 399 400 /* 401 * ignore end of line whitespace 402 */ 403 pch = lexeme; 404 while (*pch && isspace((unsigned char)*pch)) 405 pch++; 406 407 if (!*pch) { 408 yylval.Integer = T_EOC; 409 return yylval.Integer; 410 } 411 412 yylval.String = estrdup(lexeme); 413 return T_String; 414 } 415 416 417 /* 418 * yylex() - function that does the actual scanning. 419 * Bison expects this function to be called yylex and for it to take no 420 * input and return an int. 421 * Conceptually yylex "returns" yylval as well as the actual return 422 * value representing the token or type. 423 */ 424 int 425 yylex( 426 void 427 ) 428 { 429 size_t i; 430 int instring = 0; 431 int yylval_was_set = 0; 432 int token; /* The return value/the recognized token */ 433 int ch; 434 static follby followedby = FOLLBY_TOKEN; 435 436 do { 437 /* Ignore whitespace at the beginning */ 438 while (EOF != (ch = get_next_char()) && 439 isspace(ch) && 440 !is_EOC(ch)) 441 ; /* Null Statement */ 442 443 if (EOF == ch) { 444 445 if (!input_from_file || !curr_include_level) 446 return 0; 447 448 FCLOSE(fp[curr_include_level]); 449 ip_file = fp[--curr_include_level]; 450 token = T_EOC; 451 goto normal_return; 452 453 } else if (is_EOC(ch)) { 454 455 /* end FOLLBY_STRINGS_TO_EOC effect */ 456 followedby = FOLLBY_TOKEN; 457 token = T_EOC; 458 goto normal_return; 459 460 } else if (is_special(ch) && FOLLBY_TOKEN == followedby) { 461 /* special chars are their own token values */ 462 token = ch; 463 /* 464 * '=' implies a single string following as in: 465 * setvar Owner = "The Boss" default 466 * This could alternatively be handled by 467 * removing '=' from special_chars and adding 468 * it to the keyword table. 469 */ 470 if ('=' == ch) 471 followedby = FOLLBY_STRING; 472 yytext[0] = (char)ch; 473 yytext[1] = '\0'; 474 goto normal_return; 475 } else 476 push_back_char(ch); 477 478 /* save the position of start of the token */ 479 ip_file->prev_token_line_no = ip_file->line_no; 480 ip_file->prev_token_col_no = ip_file->col_no; 481 482 /* Read in the lexeme */ 483 i = 0; 484 while (EOF != (ch = get_next_char())) { 485 486 yytext[i] = (char)ch; 487 488 /* Break on whitespace or a special character */ 489 if (isspace(ch) || is_EOC(ch) 490 || '"' == ch 491 || (FOLLBY_TOKEN == followedby 492 && is_special(ch))) 493 break; 494 495 /* Read the rest of the line on reading a start 496 of comment character */ 497 if ('#' == ch) { 498 while (EOF != (ch = get_next_char()) 499 && '\n' != ch) 500 ; /* Null Statement */ 501 break; 502 } 503 504 i++; 505 if (i >= COUNTOF(yytext)) 506 goto lex_too_long; 507 } 508 /* Pick up all of the string inside between " marks, to 509 * end of line. If we make it to EOL without a 510 * terminating " assume it for them. 511 * 512 * XXX - HMS: I'm not sure we want to assume the closing " 513 */ 514 if ('"' == ch) { 515 instring = 1; 516 while (EOF != (ch = get_next_char()) && 517 ch != '"' && ch != '\n') { 518 yytext[i++] = (char)ch; 519 if (i >= COUNTOF(yytext)) 520 goto lex_too_long; 521 } 522 /* 523 * yytext[i] will be pushed back as not part of 524 * this lexeme, but any closing quote should 525 * not be pushed back, so we read another char. 526 */ 527 if ('"' == ch) 528 ch = get_next_char(); 529 } 530 /* Pushback the last character read that is not a part 531 * of this lexeme. 532 * If the last character read was an EOF, pushback a 533 * newline character. This is to prevent a parse error 534 * when there is no newline at the end of a file. 535 */ 536 if (EOF == ch) 537 push_back_char('\n'); 538 else 539 push_back_char(ch); 540 yytext[i] = '\0'; 541 } while (i == 0); 542 543 /* Now return the desired token */ 544 545 /* First make sure that the parser is *not* expecting a string 546 * as the next token (based on the previous token that was 547 * returned) and that we haven't read a string. 548 */ 549 550 if (followedby == FOLLBY_TOKEN && !instring) { 551 token = is_keyword(yytext, &followedby); 552 if (token) 553 goto normal_return; 554 else if (is_integer(yytext)) { 555 yylval_was_set = 1; 556 errno = 0; 557 if ((yylval.Integer = strtol(yytext, NULL, 10)) == 0 558 && ((errno == EINVAL) || (errno == ERANGE))) { 559 msyslog(LOG_ERR, 560 "Integer cannot be represented: %s", 561 yytext); 562 exit(1); 563 } else { 564 token = T_Integer; 565 goto normal_return; 566 } 567 } 568 else if (is_double(yytext)) { 569 yylval_was_set = 1; 570 errno = 0; 571 if ((yylval.Double = atof(yytext)) == 0 && errno == ERANGE) { 572 msyslog(LOG_ERR, 573 "Double too large to represent: %s", 574 yytext); 575 exit(1); 576 } else { 577 token = T_Double; 578 goto normal_return; 579 } 580 } else { 581 /* Default: Everything is a string */ 582 yylval_was_set = 1; 583 token = create_string_token(yytext); 584 goto normal_return; 585 } 586 } 587 588 /* 589 * Either followedby is not FOLLBY_TOKEN or this lexeme is part 590 * of a string. Hence, we need to return T_String. 591 * 592 * _Except_ we might have a -4 or -6 flag on a an association 593 * configuration line (server, peer, pool, etc.). 594 * 595 * This is a terrible hack, but the grammar is ambiguous so we 596 * don't have a choice. [SK] 597 * 598 * The ambiguity is in the keyword scanner, not ntp_parser.y. 599 * We do not require server addresses be quoted in ntp.conf, 600 * complicating the scanner's job. To avoid trying (and 601 * failing) to match an IP address or DNS name to a keyword, 602 * the association keywords use FOLLBY_STRING in the keyword 603 * table, which tells the scanner to force the next token to be 604 * a T_String, so it does not try to match a keyword but rather 605 * expects a string when -4/-6 modifiers to server, peer, etc. 606 * are encountered. 607 * restrict -4 and restrict -6 parsing works correctly without 608 * this hack, as restrict uses FOLLBY_TOKEN. [DH] 609 */ 610 if ('-' == yytext[0]) { 611 if ('4' == yytext[1]) { 612 token = T_Ipv4_flag; 613 goto normal_return; 614 } else if ('6' == yytext[1]) { 615 token = T_Ipv6_flag; 616 goto normal_return; 617 } 618 } 619 620 instring = 0; 621 if (FOLLBY_STRING == followedby) 622 followedby = FOLLBY_TOKEN; 623 624 yylval_was_set = 1; 625 token = create_string_token(yytext); 626 627 normal_return: 628 if (T_EOC == token) 629 DPRINTF(4,("\t<end of command>\n")); 630 else 631 DPRINTF(4, ("yylex: lexeme '%s' -> %s\n", yytext, 632 token_name(token))); 633 634 if (!yylval_was_set) 635 yylval.Integer = token; 636 637 return token; 638 639 lex_too_long: 640 yytext[min(sizeof(yytext) - 1, 50)] = 0; 641 msyslog(LOG_ERR, 642 "configuration item on line %d longer than limit of %zu, began with '%s'", 643 ip_file->line_no, sizeof(yytext) - 1, yytext); 644 645 /* 646 * If we hit the length limit reading the startup configuration 647 * file, abort. 648 */ 649 if (input_from_file) 650 exit(sizeof(yytext) - 1); 651 652 /* 653 * If it's runtime configuration via ntpq :config treat it as 654 * if the configuration text ended before the too-long lexeme, 655 * hostname, or string. 656 */ 657 yylval.Integer = 0; 658 return 0; 659 } 660