1 /**************************************************************************** 2 * Copyright (c) 1998,1999,2000 Free Software Foundation, Inc. * 3 * * 4 * Permission is hereby granted, free of charge, to any person obtaining a * 5 * copy of this software and associated documentation files (the * 6 * "Software"), to deal in the Software without restriction, including * 7 * without limitation the rights to use, copy, modify, merge, publish, * 8 * distribute, distribute with modifications, sublicense, and/or sell * 9 * copies of the Software, and to permit persons to whom the Software is * 10 * furnished to do so, subject to the following conditions: * 11 * * 12 * The above copyright notice and this permission notice shall be included * 13 * in all copies or substantial portions of the Software. * 14 * * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * 16 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * 18 * IN NO EVENT SHALL THE ABOVE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, * 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR * 21 * THE USE OR OTHER DEALINGS IN THE SOFTWARE. * 22 * * 23 * Except as contained in this notice, the name(s) of the above copyright * 24 * holders shall not be used in advertising or otherwise to promote the * 25 * sale, use or other dealings in this Software without prior written * 26 * authorization. * 27 ****************************************************************************/ 28 29 /**************************************************************************** 30 * Author: Zeyd M. Ben-Halim <zmbenhal@netcom.com> 1992,1995 * 31 * and: Eric S. Raymond <esr@snark.thyrsus.com> * 32 ****************************************************************************/ 33 34 /* 35 * comp_scan.c --- Lexical scanner for terminfo compiler. 36 * 37 * _nc_reset_input() 38 * _nc_get_token() 39 * _nc_panic_mode() 40 * int _nc_syntax; 41 * int _nc_curr_line; 42 * long _nc_curr_file_pos; 43 * long _nc_comment_start; 44 * long _nc_comment_end; 45 */ 46 47 #include <curses.priv.h> 48 49 #include <ctype.h> 50 #include <term_entry.h> 51 #include <tic.h> 52 53 MODULE_ID("$From: comp_scan.c,v 1.44 2000/06/10 21:59:21 tom Exp $") 54 55 /* 56 * Maximum length of string capability we'll accept before raising an error. 57 * Yes, there is a real capability in /etc/termcap this long, an "is". 58 */ 59 #define MAXCAPLEN 600 60 61 #define iswhite(ch) (ch == ' ' || ch == '\t') 62 63 int _nc_syntax = 0; /* termcap or terminfo? */ 64 long _nc_curr_file_pos = 0; /* file offset of current line */ 65 long _nc_comment_start = 0; /* start of comment range before name */ 66 long _nc_comment_end = 0; /* end of comment range before name */ 67 long _nc_start_line = 0; /* start line of current entry */ 68 69 struct token _nc_curr_token = 70 {0, 0, 0}; 71 72 /***************************************************************************** 73 * 74 * Token-grabbing machinery 75 * 76 *****************************************************************************/ 77 78 static bool first_column; /* See 'next_char()' below */ 79 static char separator; /* capability separator */ 80 static int pushtype; /* type of pushback token */ 81 static char pushname[MAX_NAME_SIZE + 1]; 82 83 static int last_char(void); 84 static int next_char(void); 85 static long stream_pos(void); 86 static bool end_of_stream(void); 87 static void push_back(char c); 88 89 /* Assume we may be looking at a termcap-style continuation */ 90 static inline int 91 eat_escaped_newline(int ch) 92 { 93 if (ch == '\\') 94 while ((ch = next_char()) == '\n' || iswhite(ch)) 95 continue; 96 return ch; 97 } 98 99 /* 100 * int 101 * get_token() 102 * 103 * Scans the input for the next token, storing the specifics in the 104 * global structure 'curr_token' and returning one of the following: 105 * 106 * NAMES A line beginning in column 1. 'name' 107 * will be set to point to everything up to but 108 * not including the first separator on the line. 109 * BOOLEAN An entry consisting of a name followed by 110 * a separator. 'name' will be set to point to 111 * the name of the capability. 112 * NUMBER An entry of the form 113 * name#digits, 114 * 'name' will be set to point to the capability 115 * name and 'valnumber' to the number given. 116 * STRING An entry of the form 117 * name=characters, 118 * 'name' is set to the capability name and 119 * 'valstring' to the string of characters, with 120 * input translations done. 121 * CANCEL An entry of the form 122 * name@, 123 * 'name' is set to the capability name and 124 * 'valnumber' to -1. 125 * EOF The end of the file has been reached. 126 * 127 * A `separator' is either a comma or a semicolon, depending on whether 128 * we are in termcap or terminfo mode. 129 * 130 */ 131 132 int 133 _nc_get_token(void) 134 { 135 static const char terminfo_punct[] = "@%&*!#"; 136 long number; 137 int type; 138 int ch; 139 char *numchk; 140 char numbuf[80]; 141 unsigned found; 142 static char buffer[MAX_ENTRY_SIZE]; 143 char *ptr; 144 int dot_flag = FALSE; 145 long token_start; 146 147 if (pushtype != NO_PUSHBACK) { 148 int retval = pushtype; 149 150 _nc_set_type(pushname); 151 DEBUG(3, ("pushed-back token: `%s', class %d", 152 _nc_curr_token.tk_name, pushtype)); 153 154 pushtype = NO_PUSHBACK; 155 pushname[0] = '\0'; 156 157 /* currtok wasn't altered by _nc_push_token() */ 158 return (retval); 159 } 160 161 if (end_of_stream()) 162 return (EOF); 163 164 start_token: 165 token_start = stream_pos(); 166 while ((ch = next_char()) == '\n' || iswhite(ch)) 167 continue; 168 169 ch = eat_escaped_newline(ch); 170 171 if (ch == EOF) 172 type = EOF; 173 else { 174 /* if this is a termcap entry, skip a leading separator */ 175 if (separator == ':' && ch == ':') 176 ch = next_char(); 177 178 if (ch == '.' 179 #ifdef NCURSES_EXT_FUNCS 180 && !_nc_disable_period 181 #endif 182 ) { 183 dot_flag = TRUE; 184 DEBUG(8, ("dot-flag set")); 185 186 while ((ch = next_char()) == '.' || iswhite(ch)) 187 continue; 188 } 189 190 if (ch == EOF) { 191 type = EOF; 192 goto end_of_token; 193 } 194 195 /* have to make some punctuation chars legal for terminfo */ 196 if (!isalnum(ch) 197 #ifdef NCURSES_EXT_FUNCS 198 && !(ch == '.' && _nc_disable_period) 199 #endif 200 && !strchr(terminfo_punct, (char) ch)) { 201 _nc_warning("Illegal character (expected alphanumeric or %s) - %s", 202 terminfo_punct, unctrl(ch)); 203 _nc_panic_mode(separator); 204 goto start_token; 205 } 206 207 ptr = buffer; 208 *(ptr++) = ch; 209 210 if (first_column) { 211 char *desc; 212 213 _nc_comment_start = token_start; 214 _nc_comment_end = _nc_curr_file_pos; 215 _nc_start_line = _nc_curr_line; 216 217 _nc_syntax = ERR; 218 while ((ch = next_char()) != '\n') { 219 if (ch == EOF) 220 _nc_err_abort("premature EOF"); 221 else if (ch == ':' && last_char() != ',') { 222 _nc_syntax = SYN_TERMCAP; 223 separator = ':'; 224 break; 225 } else if (ch == ',') { 226 _nc_syntax = SYN_TERMINFO; 227 separator = ','; 228 /* 229 * Fall-through here is not an accident. 230 * The idea is that if we see a comma, we 231 * figure this is terminfo unless we 232 * subsequently run into a colon -- but 233 * we don't stop looking for that colon until 234 * hitting a newline. This allows commas to 235 * be embedded in description fields of 236 * either syntax. 237 */ 238 /* FALLTHRU */ 239 } else 240 ch = eat_escaped_newline(ch); 241 242 *ptr++ = ch; 243 } 244 ptr[0] = '\0'; 245 if (_nc_syntax == ERR) { 246 /* 247 * Grrr...what we ought to do here is barf, 248 * complaining that the entry is malformed. 249 * But because a couple of name fields in the 250 * 8.2 termcap file end with |\, we just have 251 * to assume it's termcap syntax. 252 */ 253 _nc_syntax = SYN_TERMCAP; 254 separator = ':'; 255 } else if (_nc_syntax == SYN_TERMINFO) { 256 /* throw away trailing /, *$/ */ 257 for (--ptr; iswhite(*ptr) || *ptr == ','; ptr--) 258 continue; 259 ptr[1] = '\0'; 260 } 261 262 /* 263 * This is the soonest we have the terminal name 264 * fetched. Set up for following warning messages. 265 */ 266 ptr = strchr(buffer, '|'); 267 if (ptr == (char *) NULL) 268 ptr = buffer + strlen(buffer); 269 ch = *ptr; 270 *ptr = '\0'; 271 _nc_set_type(buffer); 272 *ptr = ch; 273 274 /* 275 * Compute the boundary between the aliases and the 276 * description field for syntax-checking purposes. 277 */ 278 desc = strrchr(buffer, '|'); 279 if (desc) { 280 if (*desc == '\0') 281 _nc_warning("empty longname field"); 282 else if (strchr(desc, ' ') == (char *) NULL) 283 _nc_warning("older tic versions may treat the description field as an alias"); 284 } 285 if (!desc) 286 desc = buffer + strlen(buffer); 287 288 /* 289 * Whitespace in a name field other than the long name 290 * can confuse rdist and some termcap tools. Slashes 291 * are a no-no. Other special characters can be 292 * dangerous due to shell expansion. 293 */ 294 for (ptr = buffer; ptr < desc; ptr++) { 295 if (isspace(*ptr)) { 296 _nc_warning("whitespace in name or alias field"); 297 break; 298 } else if (*ptr == '/') { 299 _nc_warning("slashes aren't allowed in names or aliases"); 300 break; 301 } else if (strchr("$[]!*?", *ptr)) { 302 _nc_warning("dubious character `%c' in name or alias field", *ptr); 303 break; 304 } 305 } 306 307 ptr = buffer; 308 309 _nc_curr_token.tk_name = buffer; 310 type = NAMES; 311 } else { 312 while ((ch = next_char()) != EOF) { 313 if (!isalnum(ch)) { 314 if (_nc_syntax == SYN_TERMINFO) { 315 if (ch != '_') 316 break; 317 } else { /* allow ';' for "k;" */ 318 if (ch != ';') 319 break; 320 } 321 } 322 *(ptr++) = ch; 323 } 324 325 *ptr++ = '\0'; 326 switch (ch) { 327 case ',': 328 case ':': 329 if (ch != separator) 330 _nc_err_abort("Separator inconsistent with syntax"); 331 _nc_curr_token.tk_name = buffer; 332 type = BOOLEAN; 333 break; 334 case '@': 335 if ((ch = next_char()) != separator) 336 _nc_warning("Missing separator after `%s', have %s", 337 buffer, unctrl(ch)); 338 _nc_curr_token.tk_name = buffer; 339 type = CANCEL; 340 break; 341 342 case '#': 343 found = 0; 344 while (isalnum(ch = next_char())) { 345 numbuf[found++] = ch; 346 if (found >= sizeof(numbuf) - 1) 347 break; 348 } 349 numbuf[found] = '\0'; 350 number = strtol(numbuf, &numchk, 0); 351 if (numchk == numbuf) 352 _nc_warning("no value given for `%s'", buffer); 353 if ((*numchk != '\0') || (ch != separator)) 354 _nc_warning("Missing separator"); 355 _nc_curr_token.tk_name = buffer; 356 _nc_curr_token.tk_valnumber = number; 357 type = NUMBER; 358 break; 359 360 case '=': 361 ch = _nc_trans_string(ptr, buffer + sizeof(buffer)); 362 if (ch != separator) 363 _nc_warning("Missing separator"); 364 _nc_curr_token.tk_name = buffer; 365 _nc_curr_token.tk_valstring = ptr; 366 type = STRING; 367 break; 368 369 case EOF: 370 type = EOF; 371 break; 372 default: 373 /* just to get rid of the compiler warning */ 374 type = UNDEF; 375 _nc_warning("Illegal character - %s", unctrl(ch)); 376 } 377 } /* end else (first_column == FALSE) */ 378 } /* end else (ch != EOF) */ 379 380 end_of_token: 381 382 #ifdef TRACE 383 if (dot_flag == TRUE) 384 DEBUG(8, ("Commented out ")); 385 386 if (_nc_tracing >= DEBUG_LEVEL(7)) { 387 switch (type) { 388 case BOOLEAN: 389 _tracef("Token: Boolean; name='%s'", 390 _nc_curr_token.tk_name); 391 break; 392 393 case NUMBER: 394 _tracef("Token: Number; name='%s', value=%d", 395 _nc_curr_token.tk_name, 396 _nc_curr_token.tk_valnumber); 397 break; 398 399 case STRING: 400 _tracef("Token: String; name='%s', value=%s", 401 _nc_curr_token.tk_name, 402 _nc_visbuf(_nc_curr_token.tk_valstring)); 403 break; 404 405 case CANCEL: 406 _tracef("Token: Cancel; name='%s'", 407 _nc_curr_token.tk_name); 408 break; 409 410 case NAMES: 411 412 _tracef("Token: Names; value='%s'", 413 _nc_curr_token.tk_name); 414 break; 415 416 case EOF: 417 _tracef("Token: End of file"); 418 break; 419 420 default: 421 _nc_warning("Bad token type"); 422 } 423 } 424 #endif 425 426 if (dot_flag == TRUE) /* if commented out, use the next one */ 427 type = _nc_get_token(); 428 429 DEBUG(3, ("token: `%s', class %d", _nc_curr_token.tk_name, type)); 430 431 return (type); 432 } 433 434 /* 435 * char 436 * trans_string(ptr) 437 * 438 * Reads characters using next_char() until encountering a separator, nl, 439 * or end-of-file. The returned value is the character which caused 440 * reading to stop. The following translations are done on the input: 441 * 442 * ^X goes to ctrl-X (i.e. X & 037) 443 * {\E,\n,\r,\b,\t,\f} go to 444 * {ESCAPE,newline,carriage-return,backspace,tab,formfeed} 445 * {\^,\\} go to {carat,backslash} 446 * \ddd (for ddd = up to three octal digits) goes to the character ddd 447 * 448 * \e == \E 449 * \0 == \200 450 * 451 */ 452 453 char 454 _nc_trans_string(char *ptr, char *last) 455 { 456 int count = 0; 457 int number; 458 int i, c; 459 chtype ch, last_ch = '\0'; 460 bool ignored = FALSE; 461 bool long_warning = FALSE; 462 463 while ((ch = c = next_char()) != (chtype) separator && c != EOF) { 464 if (ptr == (last - 1)) 465 break; 466 if ((_nc_syntax == SYN_TERMCAP) && c == '\n') 467 break; 468 if (ch == '^' && last_ch != '%') { 469 ch = c = next_char(); 470 if (c == EOF) 471 _nc_err_abort("Premature EOF"); 472 473 if (!(is7bits(ch) && isprint(ch))) { 474 _nc_warning("Illegal ^ character - %s", unctrl(ch)); 475 } 476 if (ch == '?') { 477 *(ptr++) = '\177'; 478 if (_nc_tracing) 479 _nc_warning("Allow ^? as synonym for \\177"); 480 } else { 481 if ((ch &= 037) == 0) 482 ch = 128; 483 *(ptr++) = (char) (ch); 484 } 485 } else if (ch == '\\') { 486 ch = c = next_char(); 487 if (c == EOF) 488 _nc_err_abort("Premature EOF"); 489 490 if (ch >= '0' && ch <= '7') { 491 number = ch - '0'; 492 for (i = 0; i < 2; i++) { 493 ch = c = next_char(); 494 if (c == EOF) 495 _nc_err_abort("Premature EOF"); 496 497 if (c < '0' || c > '7') { 498 if (isdigit(c)) { 499 _nc_warning("Non-octal digit `%c' in \\ sequence", c); 500 /* allow the digit; it'll do less harm */ 501 } else { 502 push_back((char) c); 503 break; 504 } 505 } 506 507 number = number * 8 + c - '0'; 508 } 509 510 if (number == 0) 511 number = 0200; 512 *(ptr++) = (char) number; 513 } else { 514 switch (c) { 515 case 'E': 516 case 'e': 517 *(ptr++) = '\033'; 518 break; 519 520 case 'a': 521 *(ptr++) = '\007'; 522 break; 523 524 case 'l': 525 case 'n': 526 *(ptr++) = '\n'; 527 break; 528 529 case 'r': 530 *(ptr++) = '\r'; 531 break; 532 533 case 'b': 534 *(ptr++) = '\010'; 535 break; 536 537 case 's': 538 *(ptr++) = ' '; 539 break; 540 541 case 'f': 542 *(ptr++) = '\014'; 543 break; 544 545 case 't': 546 *(ptr++) = '\t'; 547 break; 548 549 case '\\': 550 *(ptr++) = '\\'; 551 break; 552 553 case '^': 554 *(ptr++) = '^'; 555 break; 556 557 case ',': 558 *(ptr++) = ','; 559 break; 560 561 case ':': 562 *(ptr++) = ':'; 563 break; 564 565 case '\n': 566 continue; 567 568 default: 569 _nc_warning("Illegal character %s in \\ sequence", 570 unctrl(ch)); 571 *(ptr++) = (char) ch; 572 } /* endswitch (ch) */ 573 } /* endelse (ch < '0' || ch > '7') */ 574 } 575 /* end else if (ch == '\\') */ 576 else if (ch == '\n' && (_nc_syntax == SYN_TERMINFO)) { 577 /* newlines embedded in a terminfo string are ignored */ 578 ignored = TRUE; 579 } else { 580 *(ptr++) = (char) ch; 581 } 582 583 if (!ignored) { 584 last_ch = ch; 585 count++; 586 } 587 ignored = FALSE; 588 589 if (count > MAXCAPLEN && !long_warning) { 590 _nc_warning("Very long string found. Missing separator?"); 591 long_warning = TRUE; 592 } 593 } /* end while */ 594 595 *ptr = '\0'; 596 597 return (ch); 598 } 599 600 /* 601 * _nc_push_token() 602 * 603 * Push a token of given type so that it will be reread by the next 604 * get_token() call. 605 */ 606 607 void 608 _nc_push_token(int tokclass) 609 { 610 /* 611 * This implementation is kind of bogus, it will fail if we ever do 612 * more than one pushback at a time between get_token() calls. It 613 * relies on the fact that curr_tok is static storage that nothing 614 * but get_token() touches. 615 */ 616 pushtype = tokclass; 617 _nc_get_type(pushname); 618 619 DEBUG(3, ("pushing token: `%s', class %d", 620 _nc_curr_token.tk_name, pushtype)); 621 } 622 623 /* 624 * Panic mode error recovery - skip everything until a "ch" is found. 625 */ 626 void 627 _nc_panic_mode(char ch) 628 { 629 int c; 630 631 for (;;) { 632 c = next_char(); 633 if (c == ch) 634 return; 635 if (c == EOF) 636 return; 637 } 638 } 639 640 /***************************************************************************** 641 * 642 * Character-stream handling 643 * 644 *****************************************************************************/ 645 646 #define LEXBUFSIZ 1024 647 648 static char *bufptr; /* otherwise, the input buffer pointer */ 649 static char *bufstart; /* start of buffer so we can compute offsets */ 650 static FILE *yyin; /* scanner's input file descriptor */ 651 652 /* 653 * _nc_reset_input() 654 * 655 * Resets the input-reading routines. Used on initialization, 656 * or after a seek has been done. Exactly one argument must be 657 * non-null. 658 */ 659 660 void 661 _nc_reset_input(FILE * fp, char *buf) 662 { 663 pushtype = NO_PUSHBACK; 664 pushname[0] = '\0'; 665 yyin = fp; 666 bufstart = bufptr = buf; 667 _nc_curr_file_pos = 0L; 668 if (fp != 0) 669 _nc_curr_line = 0; 670 _nc_curr_col = 0; 671 } 672 673 /* 674 * int last_char() 675 * 676 * Returns the final nonblank character on the current input buffer 677 */ 678 static int 679 last_char(void) 680 { 681 size_t len = strlen(bufptr); 682 while (len--) { 683 if (!isspace(bufptr[len])) 684 return bufptr[len]; 685 } 686 return 0; 687 } 688 689 /* 690 * int next_char() 691 * 692 * Returns the next character in the input stream. Comments and leading 693 * white space are stripped. 694 * 695 * The global state variable 'firstcolumn' is set TRUE if the character 696 * returned is from the first column of the input line. 697 * 698 * The global variable _nc_curr_line is incremented for each new line. 699 * The global variable _nc_curr_file_pos is set to the file offset of the 700 * beginning of each line. 701 */ 702 703 static int 704 next_char(void) 705 { 706 if (!yyin) { 707 if (*bufptr == '\0') 708 return (EOF); 709 if (*bufptr == '\n') { 710 _nc_curr_line++; 711 _nc_curr_col = 0; 712 } 713 } else if (!bufptr || !*bufptr) { 714 /* 715 * In theory this could be recoded to do its I/O one 716 * character at a time, saving the buffer space. In 717 * practice, this turns out to be quite hard to get 718 * completely right. Try it and see. If you succeed, 719 * don't forget to hack push_back() correspondingly. 720 */ 721 static char line[LEXBUFSIZ]; 722 size_t len; 723 724 do { 725 _nc_curr_file_pos = ftell(yyin); 726 727 if ((bufstart = fgets(line, LEXBUFSIZ, yyin)) != NULL) { 728 _nc_curr_line++; 729 _nc_curr_col = 0; 730 } 731 bufptr = bufstart; 732 } while 733 (bufstart != NULL && line[0] == '#'); 734 735 if (bufstart == NULL) 736 return (EOF); 737 738 while (iswhite(*bufptr)) 739 bufptr++; 740 741 /* 742 * Treat a trailing <cr><lf> the same as a <newline> so we can read 743 * files on OS/2, etc. 744 */ 745 if ((len = strlen(bufptr)) > 1) { 746 if (bufptr[len - 1] == '\n' 747 && bufptr[len - 2] == '\r') { 748 bufptr[len - 2] = '\n'; 749 bufptr[len - 1] = '\0'; 750 } 751 } 752 } 753 754 first_column = (bufptr == bufstart); 755 756 _nc_curr_col++; 757 return (*bufptr++); 758 } 759 760 static void 761 push_back(char c) 762 /* push a character back onto the input stream */ 763 { 764 if (bufptr == bufstart) 765 _nc_syserr_abort("Can't backspace off beginning of line"); 766 *--bufptr = c; 767 } 768 769 static long 770 stream_pos(void) 771 /* return our current character position in the input stream */ 772 { 773 return (yyin ? ftell(yyin) : (bufptr ? bufptr - bufstart : 0)); 774 } 775 776 static bool 777 end_of_stream(void) 778 /* are we at end of input? */ 779 { 780 return ((yyin ? feof(yyin) : (bufptr && *bufptr == '\0')) 781 ? TRUE : FALSE); 782 } 783 784 /* comp_scan.c ends here */ 785