1 /**************************************************************************** 2 * Copyright (c) 1998 Free Software Foundation, Inc. * 3 * * 4 * Permission is hereby granted, free of charge, to any person obtaining a * 5 * copy of this software and associated documentation files (the * 6 * "Software"), to deal in the Software without restriction, including * 7 * without limitation the rights to use, copy, modify, merge, publish, * 8 * distribute, distribute with modifications, sublicense, and/or sell * 9 * copies of the Software, and to permit persons to whom the Software is * 10 * furnished to do so, subject to the following conditions: * 11 * * 12 * The above copyright notice and this permission notice shall be included * 13 * in all copies or substantial portions of the Software. * 14 * * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * 16 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * 18 * IN NO EVENT SHALL THE ABOVE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, * 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR * 21 * THE USE OR OTHER DEALINGS IN THE SOFTWARE. * 22 * * 23 * Except as contained in this notice, the name(s) of the above copyright * 24 * holders shall not be used in advertising or otherwise to promote the * 25 * sale, use or other dealings in this Software without prior written * 26 * authorization. * 27 ****************************************************************************/ 28 29 /**************************************************************************** 30 * Author: Zeyd M. Ben-Halim <zmbenhal@netcom.com> 1992,1995 * 31 * and: Eric S. Raymond <esr@snark.thyrsus.com> * 32 ****************************************************************************/ 33 34 /* 35 * comp_scan.c --- Lexical scanner for terminfo compiler. 36 * 37 * _nc_reset_input() 38 * _nc_get_token() 39 * _nc_panic_mode() 40 * int _nc_syntax; 41 * int _nc_curr_line; 42 * long _nc_curr_file_pos; 43 * long _nc_comment_start; 44 * long _nc_comment_end; 45 */ 46 47 #include <curses.priv.h> 48 49 #include <ctype.h> 50 #include <tic.h> 51 52 MODULE_ID("$From: comp_scan.c,v 1.34 1998/11/01 00:56:39 tom Exp $") 53 54 /* 55 * Maximum length of string capability we'll accept before raising an error. 56 * Yes, there is a real capability in /etc/termcap this long, an "is". 57 */ 58 #define MAXCAPLEN 600 59 60 #define iswhite(ch) (ch == ' ' || ch == '\t') 61 62 int _nc_syntax; /* termcap or terminfo? */ 63 long _nc_curr_file_pos; /* file offset of current line */ 64 long _nc_comment_start; /* start of comment range before name */ 65 long _nc_comment_end; /* end of comment range before name */ 66 long _nc_start_line; /* start line of current entry */ 67 68 /***************************************************************************** 69 * 70 * Token-grabbing machinery 71 * 72 *****************************************************************************/ 73 74 static bool first_column; /* See 'next_char()' below */ 75 static char separator; /* capability separator */ 76 static int pushtype; /* type of pushback token */ 77 static char pushname[MAX_NAME_SIZE+1]; 78 79 static int last_char(void); 80 static int next_char(void); 81 static long stream_pos(void); 82 static bool end_of_stream(void); 83 static void push_back(char c); 84 85 /* Assume we may be looking at a termcap-style continuation */ 86 static inline int eat_escaped_newline(int ch) 87 { 88 if (ch == '\\') 89 while ((ch = next_char()) == '\n' || iswhite(ch)) 90 continue; 91 return ch; 92 } 93 94 /* 95 * int 96 * get_token() 97 * 98 * Scans the input for the next token, storing the specifics in the 99 * global structure 'curr_token' and returning one of the following: 100 * 101 * NAMES A line beginning in column 1. 'name' 102 * will be set to point to everything up to but 103 * not including the first separator on the line. 104 * BOOLEAN An entry consisting of a name followed by 105 * a separator. 'name' will be set to point to 106 * the name of the capability. 107 * NUMBER An entry of the form 108 * name#digits, 109 * 'name' will be set to point to the capability 110 * name and 'valnumber' to the number given. 111 * STRING An entry of the form 112 * name=characters, 113 * 'name' is set to the capability name and 114 * 'valstring' to the string of characters, with 115 * input translations done. 116 * CANCEL An entry of the form 117 * name@, 118 * 'name' is set to the capability name and 119 * 'valnumber' to -1. 120 * EOF The end of the file has been reached. 121 * 122 * A `separator' is either a comma or a semicolon, depending on whether 123 * we are in termcap or terminfo mode. 124 * 125 */ 126 127 int _nc_get_token(void) 128 { 129 static const char terminfo_punct[] = "@%&*!#"; 130 long number; 131 int type; 132 int ch; 133 char * numchk; 134 char numbuf[80]; 135 unsigned found; 136 static char buffer[MAX_ENTRY_SIZE]; 137 char *ptr; 138 int dot_flag = FALSE; 139 long token_start; 140 141 if (pushtype != NO_PUSHBACK) 142 { 143 int retval = pushtype; 144 145 _nc_set_type(pushname); 146 DEBUG(3, ("pushed-back token: `%s', class %d", 147 _nc_curr_token.tk_name, pushtype)); 148 149 pushtype = NO_PUSHBACK; 150 pushname[0] = '\0'; 151 152 /* currtok wasn't altered by _nc_push_token() */ 153 return(retval); 154 } 155 156 if (end_of_stream()) 157 return(EOF); 158 159 start_token: 160 token_start = stream_pos(); 161 while ((ch = next_char()) == '\n' || iswhite(ch)) 162 continue; 163 164 ch = eat_escaped_newline(ch); 165 166 if (ch == EOF) 167 type = EOF; 168 else { 169 /* if this is a termcap entry, skip a leading separator */ 170 if (separator == ':' && ch == ':') 171 ch = next_char(); 172 173 if (ch == '.') { 174 dot_flag = TRUE; 175 DEBUG(8, ("dot-flag set")); 176 177 while ((ch = next_char())=='.' || iswhite(ch)) 178 continue; 179 } 180 181 if (ch == EOF) { 182 type = EOF; 183 goto end_of_token; 184 } 185 186 /* have to make some punctuation chars legal for terminfo */ 187 if (!isalnum(ch) && !strchr(terminfo_punct, (char)ch)) { 188 _nc_warning("Illegal character (expected alphanumeric or %s) - %s", 189 terminfo_punct, _tracechar((chtype)ch)); 190 _nc_panic_mode(separator); 191 goto start_token; 192 } 193 194 ptr = buffer; 195 *(ptr++) = ch; 196 197 if (first_column) { 198 char *desc; 199 200 _nc_comment_start = token_start; 201 _nc_comment_end = _nc_curr_file_pos; 202 _nc_start_line = _nc_curr_line; 203 204 _nc_syntax = ERR; 205 while ((ch = next_char()) != '\n') 206 { 207 if (ch == EOF) 208 _nc_err_abort("premature EOF"); 209 else if (ch == ':' && last_char() != ',') 210 { 211 _nc_syntax = SYN_TERMCAP; 212 separator = ':'; 213 break; 214 } 215 else if (ch == ',') 216 { 217 _nc_syntax = SYN_TERMINFO; 218 separator = ','; 219 /* 220 * Fall-through here is not an accident. 221 * The idea is that if we see a comma, we 222 * figure this is terminfo unless we 223 * subsequently run into a colon -- but 224 * we don't stop looking for that colon until 225 * hitting a newline. This allows commas to 226 * be embedded in description fields of 227 * either syntax. 228 */ 229 /* FALLTHRU */ 230 } 231 else 232 ch = eat_escaped_newline(ch); 233 234 *ptr++ = ch; 235 } 236 ptr[0] = '\0'; 237 if (_nc_syntax == ERR) 238 { 239 /* 240 * Grrr...what we ought to do here is barf, 241 * complaining that the entry is malformed. 242 * But because a couple of name fields in the 243 * 8.2 termcap file end with |\, we just have 244 * to assume it's termcap syntax. 245 */ 246 _nc_syntax = SYN_TERMCAP; 247 separator = ':'; 248 } 249 else if (_nc_syntax == SYN_TERMINFO) 250 { 251 /* throw away trailing /, *$/ */ 252 for (--ptr; iswhite(*ptr) || *ptr == ','; ptr--) 253 continue; 254 ptr[1] = '\0'; 255 } 256 257 /* 258 * This is the soonest we have the terminal name 259 * fetched. Set up for following warning messages. 260 */ 261 ptr = strchr(buffer, '|'); 262 if (ptr == (char *)NULL) 263 ptr = buffer + strlen(buffer); 264 ch = *ptr; 265 *ptr = '\0'; 266 _nc_set_type(buffer); 267 *ptr = ch; 268 269 /* 270 * Compute the boundary between the aliases and the 271 * description field for syntax-checking purposes. 272 */ 273 desc = strrchr(buffer, '|'); 274 if (desc) { 275 if (*desc == '\0') 276 _nc_warning("empty longname field"); 277 else if (strchr(desc, ' ') == (char *)NULL) 278 _nc_warning("older tic versions may treat the description field as an alias"); 279 } 280 if (!desc) 281 desc = buffer + strlen(buffer); 282 283 /* 284 * Whitespace in a name field other than the long name 285 * can confuse rdist and some termcap tools. Slashes 286 * are a no-no. Other special characters can be 287 * dangerous due to shell expansion. 288 */ 289 for (ptr = buffer; ptr < desc; ptr++) 290 { 291 if (isspace(*ptr)) 292 { 293 _nc_warning("whitespace in name or alias field"); 294 break; 295 } 296 else if (*ptr == '/') 297 { 298 _nc_warning("slashes aren't allowed in names or aliases"); 299 break; 300 } 301 else if (strchr("$[]!*?", *ptr)) 302 { 303 _nc_warning("dubious character `%c' in name or alias field", *ptr); 304 break; 305 } 306 } 307 308 ptr = buffer; 309 310 _nc_curr_token.tk_name = buffer; 311 type = NAMES; 312 } else { 313 while ((ch = next_char()) != EOF) { 314 if (!isalnum(ch)) { 315 if (_nc_syntax == SYN_TERMINFO) { 316 if (ch != '_') 317 break; 318 } else { /* allow ';' for "k;" */ 319 if (ch != ';') 320 break; 321 } 322 } 323 *(ptr++) = ch; 324 } 325 326 *ptr++ = '\0'; 327 switch (ch) { 328 case ',': 329 case ':': 330 if (ch != separator) 331 _nc_err_abort("Separator inconsistent with syntax"); 332 _nc_curr_token.tk_name = buffer; 333 type = BOOLEAN; 334 break; 335 case '@': 336 if ((ch = next_char()) != separator) 337 _nc_warning("Missing separator after `%s', have %s", 338 buffer, _tracechar((chtype)ch)); 339 _nc_curr_token.tk_name = buffer; 340 type = CANCEL; 341 break; 342 343 case '#': 344 found = 0; 345 while (isalnum(ch = next_char())) { 346 numbuf[found++] = ch; 347 if (found >= sizeof(numbuf)-1) 348 break; 349 } 350 numbuf[found] = '\0'; 351 number = strtol(numbuf, &numchk, 0); 352 if (numchk == numbuf) 353 _nc_warning("no value given for `%s'", buffer); 354 if ((*numchk != '\0') || (ch != separator)) 355 _nc_warning("Missing separator"); 356 _nc_curr_token.tk_name = buffer; 357 _nc_curr_token.tk_valnumber = number; 358 type = NUMBER; 359 break; 360 361 case '=': 362 ch = _nc_trans_string(ptr); 363 if (ch != separator) 364 _nc_warning("Missing separator"); 365 _nc_curr_token.tk_name = buffer; 366 _nc_curr_token.tk_valstring = ptr; 367 type = STRING; 368 break; 369 370 case EOF: 371 type = EOF; 372 break; 373 default: 374 /* just to get rid of the compiler warning */ 375 type = UNDEF; 376 _nc_warning("Illegal character - %s", 377 _tracechar((chtype)ch)); 378 } 379 } /* end else (first_column == FALSE) */ 380 } /* end else (ch != EOF) */ 381 382 end_of_token: 383 384 #ifdef TRACE 385 if (dot_flag == TRUE) 386 DEBUG(8, ("Commented out ")); 387 388 if (_nc_tracing & TRACE_IEVENT) 389 { 390 fprintf(stderr, "Token: "); 391 switch (type) 392 { 393 case BOOLEAN: 394 fprintf(stderr, "Boolean; name='%s'\n", 395 _nc_curr_token.tk_name); 396 break; 397 398 case NUMBER: 399 fprintf(stderr, "Number; name='%s', value=%d\n", 400 _nc_curr_token.tk_name, 401 _nc_curr_token.tk_valnumber); 402 break; 403 404 case STRING: 405 fprintf(stderr, "String; name='%s', value=%s\n", 406 _nc_curr_token.tk_name, 407 _nc_visbuf(_nc_curr_token.tk_valstring)); 408 break; 409 410 case CANCEL: 411 fprintf(stderr, "Cancel; name='%s'\n", 412 _nc_curr_token.tk_name); 413 break; 414 415 case NAMES: 416 417 fprintf(stderr, "Names; value='%s'\n", 418 _nc_curr_token.tk_name); 419 break; 420 421 case EOF: 422 fprintf(stderr, "End of file\n"); 423 break; 424 425 default: 426 _nc_warning("Bad token type"); 427 } 428 } 429 #endif 430 431 if (dot_flag == TRUE) /* if commented out, use the next one */ 432 type = _nc_get_token(); 433 434 DEBUG(3, ("token: `%s', class %d", _nc_curr_token.tk_name, type)); 435 436 return(type); 437 } 438 439 /* 440 * char 441 * trans_string(ptr) 442 * 443 * Reads characters using next_char() until encountering a separator, nl, 444 * or end-of-file. The returned value is the character which caused 445 * reading to stop. The following translations are done on the input: 446 * 447 * ^X goes to ctrl-X (i.e. X & 037) 448 * {\E,\n,\r,\b,\t,\f} go to 449 * {ESCAPE,newline,carriage-return,backspace,tab,formfeed} 450 * {\^,\\} go to {carat,backslash} 451 * \ddd (for ddd = up to three octal digits) goes to the character ddd 452 * 453 * \e == \E 454 * \0 == \200 455 * 456 */ 457 458 char 459 _nc_trans_string(char *ptr) 460 { 461 int count = 0; 462 int number; 463 int i, c; 464 chtype ch, last_ch = '\0'; 465 bool ignored = FALSE; 466 467 while ((ch = c = next_char()) != (chtype)separator && c != EOF) { 468 if ((_nc_syntax == SYN_TERMCAP) && c == '\n') 469 break; 470 if (ch == '^' && last_ch != '%') { 471 ch = c = next_char(); 472 if (c == EOF) 473 _nc_err_abort("Premature EOF"); 474 475 if (! (is7bits(ch) && isprint(ch))) { 476 _nc_warning("Illegal ^ character - %s", 477 _tracechar((unsigned char)ch)); 478 } 479 if (ch == '?') { 480 *(ptr++) = '\177'; 481 } else { 482 if ((ch &= 037) == 0) 483 ch = 128; 484 *(ptr++) = (char)(ch); 485 } 486 } 487 else if (ch == '\\') { 488 ch = c = next_char(); 489 if (c == EOF) 490 _nc_err_abort("Premature EOF"); 491 492 if (ch >= '0' && ch <= '7') { 493 number = ch - '0'; 494 for (i=0; i < 2; i++) { 495 ch = c = next_char(); 496 if (c == EOF) 497 _nc_err_abort("Premature EOF"); 498 499 if (c < '0' || c > '7') { 500 if (isdigit(c)) { 501 _nc_warning("Non-octal digit `%c' in \\ sequence", c); 502 /* allow the digit; it'll do less harm */ 503 } else { 504 push_back((char)c); 505 break; 506 } 507 } 508 509 number = number * 8 + c - '0'; 510 } 511 512 if (number == 0) 513 number = 0200; 514 *(ptr++) = (char) number; 515 } else { 516 switch (c) { 517 case 'E': 518 case 'e': *(ptr++) = '\033'; break; 519 520 case 'a': *(ptr++) = '\007'; break; 521 522 case 'l': 523 case 'n': *(ptr++) = '\n'; break; 524 525 case 'r': *(ptr++) = '\r'; break; 526 527 case 'b': *(ptr++) = '\010'; break; 528 529 case 's': *(ptr++) = ' '; break; 530 531 case 'f': *(ptr++) = '\014'; break; 532 533 case 't': *(ptr++) = '\t'; break; 534 535 case '\\': *(ptr++) = '\\'; break; 536 537 case '^': *(ptr++) = '^'; break; 538 539 case ',': *(ptr++) = ','; break; 540 541 case ':': *(ptr++) = ':'; break; 542 543 case '\n': 544 continue; 545 546 default: 547 _nc_warning("Illegal character %s in \\ sequence", 548 _tracechar((unsigned char)ch)); 549 *(ptr++) = (char)ch; 550 } /* endswitch (ch) */ 551 } /* endelse (ch < '0' || ch > '7') */ 552 } /* end else if (ch == '\\') */ 553 else if (ch == '\n' && (_nc_syntax == SYN_TERMINFO)) { 554 /* newlines embedded in a terminfo string are ignored */ 555 ignored = TRUE; 556 } else { 557 *(ptr++) = (char)ch; 558 } 559 560 if (!ignored) { 561 last_ch = ch; 562 count ++; 563 } 564 ignored = FALSE; 565 566 if (count > MAXCAPLEN) 567 _nc_warning("Very long string found. Missing separator?"); 568 } /* end while */ 569 570 *ptr = '\0'; 571 572 return(ch); 573 } 574 575 /* 576 * _nc_push_token() 577 * 578 * Push a token of given type so that it will be reread by the next 579 * get_token() call. 580 */ 581 582 void _nc_push_token(int tokclass) 583 { 584 /* 585 * This implementation is kind of bogus, it will fail if we ever do 586 * more than one pushback at a time between get_token() calls. It 587 * relies on the fact that curr_tok is static storage that nothing 588 * but get_token() touches. 589 */ 590 pushtype = tokclass; 591 _nc_get_type(pushname); 592 593 DEBUG(3, ("pushing token: `%s', class %d", 594 _nc_curr_token.tk_name, pushtype)); 595 } 596 597 /* 598 * Panic mode error recovery - skip everything until a "ch" is found. 599 */ 600 void _nc_panic_mode(char ch) 601 { 602 int c; 603 604 for (;;) { 605 c = next_char(); 606 if (c == ch) 607 return; 608 if (c == EOF) 609 return; 610 } 611 } 612 613 /***************************************************************************** 614 * 615 * Character-stream handling 616 * 617 *****************************************************************************/ 618 619 #define LEXBUFSIZ 1024 620 621 static char *bufptr; /* otherwise, the input buffer pointer */ 622 static char *bufstart; /* start of buffer so we can compute offsets */ 623 static FILE *yyin; /* scanner's input file descriptor */ 624 625 /* 626 * _nc_reset_input() 627 * 628 * Resets the input-reading routines. Used on initialization, 629 * or after a seek has been done. Exactly one argument must be 630 * non-null. 631 */ 632 633 void _nc_reset_input(FILE *fp, char *buf) 634 { 635 pushtype = NO_PUSHBACK; 636 pushname[0] = '\0'; 637 yyin = fp; 638 bufstart = bufptr = buf; 639 _nc_curr_file_pos = 0L; 640 if (fp != 0) 641 _nc_curr_line = 0; 642 _nc_curr_col = 0; 643 } 644 645 /* 646 * int last_char() 647 * 648 * Returns the final nonblank character on the current input buffer 649 */ 650 static int 651 last_char(void) 652 { 653 size_t len = strlen(bufptr); 654 while (len--) { 655 if (!isspace(bufptr[len])) 656 return bufptr[len]; 657 } 658 return 0; 659 } 660 661 /* 662 * int next_char() 663 * 664 * Returns the next character in the input stream. Comments and leading 665 * white space are stripped. 666 * 667 * The global state variable 'firstcolumn' is set TRUE if the character 668 * returned is from the first column of the input line. 669 * 670 * The global variable _nc_curr_line is incremented for each new line. 671 * The global variable _nc_curr_file_pos is set to the file offset of the 672 * beginning of each line. 673 */ 674 675 static int 676 next_char(void) 677 { 678 if (!yyin) 679 { 680 if (*bufptr == '\0') 681 return(EOF); 682 if (*bufptr == '\n') { 683 _nc_curr_line++; 684 _nc_curr_col = 0; 685 } 686 } 687 else if (!bufptr || !*bufptr) 688 { 689 /* 690 * In theory this could be recoded to do its I/O one 691 * character at a time, saving the buffer space. In 692 * practice, this turns out to be quite hard to get 693 * completely right. Try it and see. If you succeed, 694 * don't forget to hack push_back() correspondingly. 695 */ 696 static char line[LEXBUFSIZ]; 697 size_t len; 698 699 do { 700 _nc_curr_file_pos = ftell(yyin); 701 702 if ((bufstart = fgets(line, LEXBUFSIZ, yyin)) != NULL) { 703 _nc_curr_line++; 704 _nc_curr_col = 0; 705 } 706 bufptr = bufstart; 707 } while 708 (bufstart != NULL && line[0] == '#'); 709 710 if (bufstart == NULL) 711 return (EOF); 712 713 while (iswhite(*bufptr)) 714 bufptr++; 715 716 /* 717 * Treat a trailing <cr><lf> the same as a <newline> so we can read 718 * files on OS/2, etc. 719 */ 720 if ((len = strlen(bufptr)) > 1) { 721 if (bufptr[len-1] == '\n' 722 && bufptr[len-2] == '\r') { 723 bufptr[len-2] = '\n'; 724 bufptr[len-1] = '\0'; 725 } 726 } 727 } 728 729 first_column = (bufptr == bufstart); 730 731 _nc_curr_col++; 732 return(*bufptr++); 733 } 734 735 static void push_back(char c) 736 /* push a character back onto the input stream */ 737 { 738 if (bufptr == bufstart) 739 _nc_syserr_abort("Can't backspace off beginning of line"); 740 *--bufptr = c; 741 } 742 743 static long stream_pos(void) 744 /* return our current character position in the input stream */ 745 { 746 return (yyin ? ftell(yyin) : (bufptr ? bufptr - bufstart : 0)); 747 } 748 749 static bool end_of_stream(void) 750 /* are we at end of input? */ 751 { 752 return ((yyin ? feof(yyin) : (bufptr && *bufptr == '\0')) 753 ? TRUE : FALSE); 754 } 755 756 /* comp_scan.c ends here */ 757