1 /* $NetBSD: ntp_scanner.c,v 1.1.1.1 2009/12/13 16:56:14 kardel Exp $ */ 2 3 4 /* ntp_scanner.c 5 * 6 * The source code for a simple lexical analyzer. 7 * 8 * Written By: Sachin Kamboj 9 * University of Delaware 10 * Newark, DE 19711 11 * Copyright (c) 2006 12 */ 13 14 #ifdef HAVE_CONFIG_H 15 # include <config.h> 16 #endif 17 18 #include <stdio.h> 19 #include <ctype.h> 20 #include <stdlib.h> 21 #include <errno.h> 22 #include <string.h> 23 24 #include "ntp_config.h" 25 #include "ntpsim.h" 26 #include "ntp_scanner.h" 27 #include "ntp_parser.h" 28 #include "ntp_debug.h" 29 30 /* ntp_keyword.h declares finite state machine and token text */ 31 #include "ntp_keyword.h" 32 33 34 35 /* SCANNER GLOBAL VARIABLES 36 * ------------------------ 37 */ 38 39 #define MAX_LEXEME (1024 + 1) /* The maximum size of a lexeme */ 40 char yytext[MAX_LEXEME]; /* Buffer for storing the input text/lexeme */ 41 extern int input_from_file; 42 43 44 45 46 /* CONSTANTS 47 * --------- 48 */ 49 50 51 /* SCANNER GLOBAL VARIABLES 52 * ------------------------ 53 */ 54 const char special_chars[] = "{}(),;|="; 55 56 57 /* FUNCTIONS 58 * --------- 59 */ 60 61 int get_next_char(void); 62 static int is_keyword(char *lexeme, follby *pfollowedby); 63 64 65 66 /* 67 * keyword() - Return the keyword associated with token T_ identifier 68 */ 69 const char * 70 keyword( 71 int token 72 ) 73 { 74 int i; 75 const char *text; 76 77 i = token - LOWEST_KEYWORD_ID; 78 79 if (i >= 0 && i < COUNTOF(keyword_text)) 80 text = keyword_text[i]; 81 else 82 text = NULL; 83 84 return (text != NULL) 85 ? text 86 : "(keyword not found)"; 87 } 88 89 90 /* FILE INTERFACE 91 * -------------- 92 * We define a couple of wrapper functions around the standard C fgetc 93 * and ungetc functions in order to include positional bookkeeping 94 */ 95 96 struct FILE_INFO * 97 F_OPEN( 98 const char *path, 99 const char *mode 100 ) 101 { 102 struct FILE_INFO *my_info; 103 104 my_info = emalloc(sizeof *my_info); 105 106 my_info->line_no = 1; 107 my_info->col_no = 0; 108 my_info->prev_line_col_no = 0; 109 my_info->prev_token_col_no = 0; 110 my_info->fname = path; 111 112 my_info->fd = fopen(path, mode); 113 if (NULL == my_info->fd) { 114 free(my_info); 115 return NULL; 116 } 117 return my_info; 118 } 119 120 int 121 FGETC( 122 struct FILE_INFO *stream 123 ) 124 { 125 int ch = fgetc(stream->fd); 126 127 ++stream->col_no; 128 if (ch == '\n') { 129 stream->prev_line_col_no = stream->col_no; 130 ++stream->line_no; 131 stream->col_no = 1; 132 } 133 return ch; 134 } 135 136 /* BUGS: 1. Function will fail on more than one line of pushback 137 * 2. No error checking is done to see if ungetc fails 138 * SK: I don't think its worth fixing these bugs for our purposes ;-) 139 */ 140 int 141 UNGETC( 142 int ch, 143 struct FILE_INFO *stream 144 ) 145 { 146 if (ch == '\n') { 147 stream->col_no = stream->prev_line_col_no; 148 stream->prev_line_col_no = -1; 149 --stream->line_no; 150 } 151 --stream->col_no; 152 return ungetc(ch, stream->fd); 153 } 154 155 int 156 FCLOSE( 157 struct FILE_INFO *stream 158 ) 159 { 160 int ret_val = fclose(stream->fd); 161 162 if (!ret_val) 163 free(stream); 164 return ret_val; 165 } 166 167 /* STREAM INTERFACE 168 * ---------------- 169 * Provide a wrapper for the stream functions so that the 170 * stream can either read from a file or from a character 171 * array. 172 * NOTE: This is not very efficient for reading from character 173 * arrays, but needed to allow remote configuration where the 174 * configuration command is provided through ntpq. 175 * 176 * The behavior of there two functions is determined by the 177 * input_from_file flag. 178 */ 179 180 int 181 get_next_char( 182 void 183 ) 184 { 185 char ch; 186 187 if (input_from_file) 188 return FGETC(ip_file); 189 else { 190 if (remote_config.buffer[remote_config.pos] == '\0') 191 return EOF; 192 else { 193 ip_file->col_no++; 194 ch = remote_config.buffer[remote_config.pos++]; 195 if (ch == '\n') { 196 ip_file->prev_line_col_no = ip_file->col_no; 197 ++ip_file->line_no; 198 ip_file->col_no = 1; 199 } 200 return ch; 201 } 202 } 203 } 204 205 void 206 push_back_char( 207 int ch 208 ) 209 { 210 if (input_from_file) 211 UNGETC(ch, ip_file); 212 else { 213 if (ch == '\n') { 214 ip_file->col_no = ip_file->prev_line_col_no; 215 ip_file->prev_line_col_no = -1; 216 --ip_file->line_no; 217 } 218 --ip_file->col_no; 219 220 remote_config.pos--; 221 } 222 } 223 224 225 226 /* STATE MACHINES 227 * -------------- 228 */ 229 230 /* Keywords */ 231 static int 232 is_keyword( 233 char *lexeme, 234 follby *pfollowedby 235 ) 236 { 237 follby fb; 238 int curr_s; /* current state index */ 239 int token; 240 int i; 241 242 curr_s = SCANNER_INIT_S; 243 token = 0; 244 245 for (i = 0; lexeme[i]; i++) { 246 while (curr_s && (lexeme[i] != SS_CH(sst[curr_s]))) 247 curr_s = SS_OTHER_N(sst[curr_s]); 248 249 if (curr_s && (lexeme[i] == SS_CH(sst[curr_s]))) { 250 if ('\0' == lexeme[i + 1] 251 && FOLLBY_NON_ACCEPTING 252 != SS_FB(sst[curr_s])) { 253 fb = SS_FB(sst[curr_s]); 254 *pfollowedby = fb; 255 token = curr_s; 256 break; 257 } 258 curr_s = SS_MATCH_N(sst[curr_s]); 259 } else 260 break; 261 } 262 263 return token; 264 } 265 266 267 /* Integer */ 268 static int 269 is_integer( 270 char *lexeme 271 ) 272 { 273 int i = 0; 274 275 /* Allow a leading minus sign */ 276 if (lexeme[i] == '-') 277 ++i; 278 279 /* Check that all the remaining characters are digits */ 280 for (; lexeme[i]; ++i) { 281 if (!isdigit(lexeme[i])) 282 return 0; 283 } 284 return 1; 285 } 286 287 288 /* Double */ 289 static int 290 is_double( 291 char *lexeme 292 ) 293 { 294 int num_digits = 0; /* Number of digits read */ 295 int i; 296 297 i = 0; 298 299 /* Check for an optional '+' or '-' */ 300 if ('+' == lexeme[i] || '-' == lexeme[i]) 301 i++; 302 303 /* Read the integer part */ 304 for (; lexeme[i] && isdigit(lexeme[i]); i++) 305 num_digits++; 306 307 /* Check for the required decimal point */ 308 if ('.' == lexeme[i]) 309 i++; 310 else 311 return 0; 312 313 /* Check for any digits after the decimal point */ 314 for (; lexeme[i] && isdigit(lexeme[i]); i++) 315 num_digits++; 316 317 /* 318 * The number of digits in both the decimal part and the 319 * fraction part must not be zero at this point 320 */ 321 if (!num_digits) 322 return 0; 323 324 /* Check if we are done */ 325 if (!lexeme[i]) 326 return 1; 327 328 /* There is still more input, read the exponent */ 329 if ('e' == tolower(lexeme[i])) 330 i++; 331 else 332 return 0; 333 334 /* Read an optional Sign */ 335 if ('+' == lexeme[i] || '-' == lexeme[i]) 336 i++; 337 338 /* Now read the exponent part */ 339 while (lexeme[i] && isdigit(lexeme[i])) 340 i++; 341 342 /* Check if we are done */ 343 if (!lexeme[i]) 344 return 1; 345 else 346 return 0; 347 } 348 349 350 /* is_special() - Test whether a character is a token */ 351 static inline int 352 is_special( 353 int ch 354 ) 355 { 356 return (int)strchr(special_chars, ch); 357 } 358 359 360 static int 361 is_EOC( 362 int ch 363 ) 364 { 365 if ((old_config_style && (ch == '\n')) || 366 (!old_config_style && (ch == ';'))) 367 return 1; 368 return 0; 369 } 370 371 372 char * 373 quote_if_needed(char *str) 374 { 375 char *ret; 376 size_t len; 377 size_t octets; 378 379 len = strlen(str); 380 octets = len + 2 + 1; 381 ret = emalloc(octets); 382 if ('"' != str[0] 383 && (strcspn(str, special_chars) < len 384 || strchr(str, ' ') != NULL)) { 385 snprintf(ret, octets, "\"%s\"", str); 386 } else 387 strncpy(ret, str, octets); 388 389 return ret; 390 } 391 392 393 static int 394 create_string_token( 395 char *lexeme 396 ) 397 { 398 char *pch; 399 400 /* 401 * ignore end of line whitespace 402 */ 403 pch = lexeme; 404 while (*pch && isspace(*pch)) 405 pch++; 406 407 if (!*pch) { 408 yylval.Integer = T_EOC; 409 return yylval.Integer; 410 } 411 412 yylval.String = estrdup(lexeme); 413 return T_String; 414 } 415 416 417 /* 418 * yylex() - function that does the actual scanning. 419 * Bison expects this function to be called yylex and for it to take no 420 * input and return an int. 421 * Conceptually yylex "returns" yylval as well as the actual return 422 * value representing the token or type. 423 */ 424 int 425 yylex( 426 void 427 ) 428 { 429 int i, instring = 0; 430 int yylval_was_set = 0; 431 int token; /* The return value/the recognized token */ 432 int ch; 433 static follby followedby = FOLLBY_TOKEN; 434 435 do { 436 /* Ignore whitespace at the beginning */ 437 while (EOF != (ch = get_next_char()) && 438 isspace(ch) && 439 !is_EOC(ch)) 440 ; /* Null Statement */ 441 442 if (EOF == ch) { 443 444 if (!input_from_file || !curr_include_level) 445 return 0; 446 447 FCLOSE(fp[curr_include_level]); 448 ip_file = fp[--curr_include_level]; 449 token = T_EOC; 450 goto normal_return; 451 452 } else if (is_EOC(ch)) { 453 454 /* end FOLLBY_STRINGS_TO_EOC effect */ 455 followedby = FOLLBY_TOKEN; 456 token = T_EOC; 457 goto normal_return; 458 459 } else if (is_special(ch) && FOLLBY_TOKEN == followedby) { 460 /* special chars are their own token values */ 461 token = ch; 462 /* 463 * '=' implies a single string following as in: 464 * setvar Owner = "The Boss" default 465 * This could alternatively be handled by 466 * removing '=' from special_chars and adding 467 * it to the keyword table. 468 */ 469 if ('=' == ch) 470 followedby = FOLLBY_STRING; 471 yytext[0] = (char)ch; 472 yytext[1] = '\0'; 473 goto normal_return; 474 } else 475 push_back_char(ch); 476 477 /* save the position of start of the token */ 478 ip_file->prev_token_line_no = ip_file->line_no; 479 ip_file->prev_token_col_no = ip_file->col_no; 480 481 /* Read in the lexeme */ 482 i = 0; 483 while (EOF != (ch = get_next_char())) { 484 485 yytext[i] = (char)ch; 486 487 /* Break on whitespace or a special character */ 488 if (isspace(ch) || is_EOC(ch) 489 || '"' == ch 490 || (FOLLBY_TOKEN == followedby 491 && is_special(ch))) 492 break; 493 494 /* Read the rest of the line on reading a start 495 of comment character */ 496 if ('#' == ch) { 497 while (EOF != (ch = get_next_char()) 498 && '\n' != ch) 499 ; /* Null Statement */ 500 break; 501 } 502 503 i++; 504 if (i >= COUNTOF(yytext)) 505 goto lex_too_long; 506 } 507 /* Pick up all of the string inside between " marks, to 508 * end of line. If we make it to EOL without a 509 * terminating " assume it for them. 510 * 511 * XXX - HMS: I'm not sure we want to assume the closing " 512 */ 513 if ('"' == ch) { 514 instring = 1; 515 while (EOF != (ch = get_next_char()) && 516 ch != '"' && ch != '\n') { 517 yytext[i++] = (char)ch; 518 if (i >= COUNTOF(yytext)) 519 goto lex_too_long; 520 } 521 /* 522 * yytext[i] will be pushed back as not part of 523 * this lexeme, but any closing quote should 524 * not be pushed back, so we read another char. 525 */ 526 if ('"' == ch) 527 ch = get_next_char(); 528 } 529 /* Pushback the last character read that is not a part 530 * of this lexeme. 531 * If the last character read was an EOF, pushback a 532 * newline character. This is to prevent a parse error 533 * when there is no newline at the end of a file. 534 */ 535 if (EOF == ch) 536 push_back_char('\n'); 537 else 538 push_back_char(ch); 539 yytext[i] = '\0'; 540 } while (i == 0); 541 542 /* Now return the desired token */ 543 544 /* First make sure that the parser is *not* expecting a string 545 * as the next token (based on the previous token that was 546 * returned) and that we haven't read a string. 547 */ 548 549 if (followedby == FOLLBY_TOKEN && !instring) { 550 token = is_keyword(yytext, &followedby); 551 if (token) 552 goto normal_return; 553 else if (is_integer(yytext)) { 554 yylval_was_set = 1; 555 errno = 0; 556 if ((yylval.Integer = strtol(yytext, NULL, 10)) == 0 557 && ((errno == EINVAL) || (errno == ERANGE))) { 558 msyslog(LOG_ERR, 559 "Integer cannot be represented: %s", 560 yytext); 561 exit(1); 562 } else { 563 token = T_Integer; 564 goto normal_return; 565 } 566 } 567 else if (is_double(yytext)) { 568 yylval_was_set = 1; 569 errno = 0; 570 if ((yylval.Double = atof(yytext)) == 0 && errno == ERANGE) { 571 msyslog(LOG_ERR, 572 "Double too large to represent: %s", 573 yytext); 574 exit(1); 575 } else { 576 token = T_Double; 577 goto normal_return; 578 } 579 } else { 580 /* Default: Everything is a string */ 581 yylval_was_set = 1; 582 token = create_string_token(yytext); 583 goto normal_return; 584 } 585 } 586 587 /* 588 * Either followedby is not FOLLBY_TOKEN or this lexeme is part 589 * of a string. Hence, we need to return T_String. 590 * 591 * _Except_ we might have a -4 or -6 flag on a an association 592 * configuration line (server, peer, pool, etc.). 593 * 594 * This is a terrible hack, but the grammar is ambiguous so we 595 * don't have a choice. [SK] 596 * 597 * The ambiguity is in the keyword scanner, not ntp_parser.y. 598 * We do not require server addresses be quoted in ntp.conf, 599 * complicating the scanner's job. To avoid trying (and 600 * failing) to match an IP address or DNS name to a keyword, 601 * the association keywords use FOLLBY_STRING in the keyword 602 * table, which tells the scanner to force the next token to be 603 * a T_String, so it does not try to match a keyword but rather 604 * expects a string when -4/-6 modifiers to server, peer, etc. 605 * are encountered. 606 * restrict -4 and restrict -6 parsing works correctly without 607 * this hack, as restrict uses FOLLBY_TOKEN. [DH] 608 */ 609 if ('-' == yytext[0]) { 610 if ('4' == yytext[1]) { 611 token = T_Ipv4_flag; 612 goto normal_return; 613 } else if ('6' == yytext[1]) { 614 token = T_Ipv6_flag; 615 goto normal_return; 616 } 617 } 618 619 instring = 0; 620 if (FOLLBY_STRING == followedby) 621 followedby = FOLLBY_TOKEN; 622 623 yylval_was_set = 1; 624 token = create_string_token(yytext); 625 626 normal_return: 627 if (T_EOC == token) 628 DPRINTF(4,("\t<end of command>\n")); 629 else 630 DPRINTF(4, ("yylex: lexeme '%s' -> %s\n", yytext, 631 token_name(token))); 632 633 if (!yylval_was_set) 634 yylval.Integer = token; 635 636 return token; 637 638 lex_too_long: 639 yytext[min(sizeof(yytext) - 1, 50)] = 0; 640 msyslog(LOG_ERR, 641 "configuration item on line %d longer than limit of %d, began with '%s'", 642 ip_file->line_no, sizeof(yytext) - 1, yytext); 643 644 /* 645 * If we hit the length limit reading the startup configuration 646 * file, abort. 647 */ 648 if (input_from_file) 649 exit(sizeof(yytext) - 1); 650 651 /* 652 * If it's runtime configuration via ntpq :config treat it as 653 * if the configuration text ended before the too-long lexeme, 654 * hostname, or string. 655 */ 656 yylval.Integer = 0; 657 return 0; 658 } 659