1 /* $NetBSD: lex.c,v 1.10 2004/07/07 19:20:09 mycroft Exp $ */ 2 3 /* 4 * lexical analysis and source input 5 */ 6 #include <sys/cdefs.h> 7 8 #ifndef lint 9 __RCSID("$NetBSD: lex.c,v 1.10 2004/07/07 19:20:09 mycroft Exp $"); 10 #endif 11 12 13 #include "sh.h" 14 #include <ctype.h> 15 16 17 /* Structure to keep track of the lexing state and the various pieces of info 18 * needed for each particular state. 19 */ 20 typedef struct lex_state Lex_state; 21 struct lex_state { 22 int ls_state; 23 union { 24 /* $(...) */ 25 struct scsparen_info { 26 int nparen; /* count open parenthesis */ 27 int csstate; /* XXX remove */ 28 #define ls_scsparen ls_info.u_scsparen 29 } u_scsparen; 30 31 /* $((...)) */ 32 struct sasparen_info { 33 int nparen; /* count open parenthesis */ 34 int start; /* marks start of $(( in output str */ 35 #define ls_sasparen ls_info.u_sasparen 36 } u_sasparen; 37 38 /* ((...)) */ 39 struct sletparen_info { 40 int nparen; /* count open parenthesis */ 41 #define ls_sletparen ls_info.u_sletparen 42 } u_sletparen; 43 44 /* `...` */ 45 struct sbquote_info { 46 int indquotes; /* true if in double quotes: "`...`" */ 47 #define ls_sbquote ls_info.u_sbquote 48 } u_sbquote; 49 50 Lex_state *base; /* used to point to next state block */ 51 } ls_info; 52 }; 53 54 typedef struct State_info State_info; 55 struct State_info { 56 Lex_state *base; 57 Lex_state *end; 58 }; 59 60 61 static void readhere ARGS((struct ioword *iop)); 62 static int getsc__ ARGS((void)); 63 static void getsc_line ARGS((Source *s)); 64 static int getsc_bn ARGS((void)); 65 static char *get_brace_var ARGS((XString *wsp, char *wp)); 66 static int arraysub ARGS((char **strp)); 67 static const char *ungetsc ARGS((int c)); 68 static void gethere ARGS((void)); 69 static Lex_state *push_state_ ARGS((State_info *si, Lex_state *old_end)); 70 static Lex_state *pop_state_ ARGS((State_info *si, Lex_state *old_end)); 71 72 static int backslash_skip; 73 static int ignore_backslash_newline; 74 75 /* optimized getsc_bn() */ 76 #define getsc() (*source->str != '\0' && *source->str != '\\' \ 77 && !backslash_skip ? *source->str++ : getsc_bn()) 78 /* optimized getsc__() */ 79 #define getsc_() ((*source->str != '\0') ? *source->str++ : getsc__()) 80 81 #define STATE_BSIZE 32 82 83 #define PUSH_STATE(s) do { \ 84 if (++statep == state_info.end) \ 85 statep = push_state_(&state_info, statep); \ 86 state = statep->ls_state = (s); \ 87 } while (0) 88 89 #define POP_STATE() do { \ 90 if (--statep == state_info.base) \ 91 statep = pop_state_(&state_info, statep); \ 92 state = statep->ls_state; \ 93 } while (0) 94 95 96 97 /* 98 * Lexical analyzer 99 * 100 * tokens are not regular expressions, they are LL(1). 101 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))". 102 * hence the state stack. 103 */ 104 105 int 106 yylex(cf) 107 int cf; 108 { 109 Lex_state states[STATE_BSIZE], *statep; 110 State_info state_info; 111 register int c, state; 112 XString ws; /* expandable output word */ 113 register char *wp; /* output word pointer */ 114 char *sp, *dp; 115 int c2; 116 117 118 Again: 119 states[0].ls_state = -1; 120 states[0].ls_info.base = (Lex_state *) 0; 121 statep = &states[1]; 122 state_info.base = states; 123 state_info.end = &states[STATE_BSIZE]; 124 125 Xinit(ws, wp, 64, ATEMP); 126 127 backslash_skip = 0; 128 ignore_backslash_newline = 0; 129 130 if (cf&ONEWORD) 131 state = SWORD; 132 #ifdef KSH 133 else if (cf&LETEXPR) { 134 *wp++ = OQUOTE; /* enclose arguments in (double) quotes */ 135 state = SLETPAREN; 136 statep->ls_sletparen.nparen = 0; 137 } 138 #endif /* KSH */ 139 else { /* normal lexing */ 140 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE; 141 while ((c = getsc()) == ' ' || c == '\t') 142 ; 143 if (c == '#') { 144 ignore_backslash_newline++; 145 while ((c = getsc()) != '\0' && c != '\n') 146 ; 147 ignore_backslash_newline--; 148 } 149 ungetsc(c); 150 } 151 if (source->flags & SF_ALIAS) { /* trailing ' ' in alias definition */ 152 source->flags &= ~SF_ALIAS; 153 /* In POSIX mode, a trailing space only counts if we are 154 * parsing a simple command 155 */ 156 if (!Flag(FPOSIX) || (cf & CMDWORD)) 157 cf |= ALIAS; 158 } 159 160 /* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */ 161 statep->ls_state = state; 162 163 /* collect non-special or quoted characters to form word */ 164 while (!((c = getsc()) == 0 165 || ((state == SBASE || state == SHEREDELIM) 166 && ctype(c, C_LEX1)))) 167 { 168 Xcheck(ws, wp); 169 switch (state) { 170 case SBASE: 171 if (c == '[' && (cf & (VARASN|ARRAYVAR))) { 172 *wp = EOS; /* temporary */ 173 if (is_wdvarname(Xstring(ws, wp), FALSE)) 174 { 175 char *p, *tmp; 176 177 if (arraysub(&tmp)) { 178 *wp++ = CHAR; 179 *wp++ = c; 180 for (p = tmp; *p; ) { 181 Xcheck(ws, wp); 182 *wp++ = CHAR; 183 *wp++ = *p++; 184 } 185 afree(tmp, ATEMP); 186 break; 187 } else { 188 Source *s; 189 190 s = pushs(SREREAD, 191 source->areap); 192 s->start = s->str 193 = s->u.freeme = tmp; 194 s->next = source; 195 source = s; 196 } 197 } 198 *wp++ = CHAR; 199 *wp++ = c; 200 break; 201 } 202 /* fall through.. */ 203 Sbase1: /* includes *(...|...) pattern (*+?@!) */ 204 #ifdef KSH 205 if (c == '*' || c == '@' || c == '+' || c == '?' 206 || c == '!') 207 { 208 c2 = getsc(); 209 if (c2 == '(' /*)*/ ) { 210 *wp++ = OPAT; 211 *wp++ = c; 212 PUSH_STATE(SPATTERN); 213 break; 214 } 215 ungetsc(c2); 216 } 217 #endif /* KSH */ 218 /* fall through.. */ 219 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */ 220 switch (c) { 221 case '\\': 222 c = getsc(); 223 #ifdef OS2 224 if (isalnum(c)) { 225 *wp++ = CHAR, *wp++ = '\\'; 226 *wp++ = CHAR, *wp++ = c; 227 } else 228 #endif 229 if (c) /* trailing \ is lost */ 230 *wp++ = QCHAR, *wp++ = c; 231 break; 232 case '\'': 233 *wp++ = OQUOTE; 234 ignore_backslash_newline++; 235 PUSH_STATE(SSQUOTE); 236 break; 237 case '"': 238 *wp++ = OQUOTE; 239 PUSH_STATE(SDQUOTE); 240 break; 241 default: 242 goto Subst; 243 } 244 break; 245 246 Subst: 247 switch (c) { 248 case '\\': 249 c = getsc(); 250 switch (c) { 251 case '"': case '\\': 252 case '$': case '`': 253 *wp++ = QCHAR, *wp++ = c; 254 break; 255 default: 256 Xcheck(ws, wp); 257 if (c) { /* trailing \ is lost */ 258 *wp++ = CHAR, *wp++ = '\\'; 259 *wp++ = CHAR, *wp++ = c; 260 } 261 break; 262 } 263 break; 264 case '$': 265 c = getsc(); 266 if (c == '(') /*)*/ { 267 c = getsc(); 268 if (c == '(') /*)*/ { 269 PUSH_STATE(SASPAREN); 270 statep->ls_sasparen.nparen = 2; 271 statep->ls_sasparen.start = 272 Xsavepos(ws, wp); 273 *wp++ = EXPRSUB; 274 } else { 275 ungetsc(c); 276 PUSH_STATE(SCSPAREN); 277 statep->ls_scsparen.nparen = 1; 278 statep->ls_scsparen.csstate = 0; 279 *wp++ = COMSUB; 280 } 281 } else if (c == '{') /*}*/ { 282 *wp++ = OSUBST; 283 *wp++ = '{'; /*}*/ 284 wp = get_brace_var(&ws, wp); 285 c = getsc(); 286 /* allow :# and :% (ksh88 compat) */ 287 if (c == ':') { 288 *wp++ = CHAR, *wp++ = c; 289 c = getsc(); 290 } 291 /* If this is a trim operation, 292 * treat (,|,) specially in STBRACE. 293 */ 294 if (c == '#' || c == '%') { 295 ungetsc(c); 296 PUSH_STATE(STBRACE); 297 } else { 298 ungetsc(c); 299 PUSH_STATE(SBRACE); 300 } 301 } else if (ctype(c, C_ALPHA)) { 302 *wp++ = OSUBST; 303 *wp++ = 'X'; 304 do { 305 Xcheck(ws, wp); 306 *wp++ = c; 307 c = getsc(); 308 } while (ctype(c, C_ALPHA|C_DIGIT)); 309 *wp++ = '\0'; 310 *wp++ = CSUBST; 311 *wp++ = 'X'; 312 ungetsc(c); 313 } else if (ctype(c, C_DIGIT|C_VAR1)) { 314 Xcheck(ws, wp); 315 *wp++ = OSUBST; 316 *wp++ = 'X'; 317 *wp++ = c; 318 *wp++ = '\0'; 319 *wp++ = CSUBST; 320 *wp++ = 'X'; 321 } else { 322 *wp++ = CHAR, *wp++ = '$'; 323 ungetsc(c); 324 } 325 break; 326 case '`': 327 PUSH_STATE(SBQUOTE); 328 *wp++ = COMSUB; 329 /* Need to know if we are inside double quotes 330 * since sh/at&t-ksh translate the \" to " in 331 * "`..\"..`". 332 * This is not done in posix mode (section 333 * 3.2.3, Double Quotes: "The backquote shall 334 * retain its special meaning introducing the 335 * other form of command substitution (see 336 * 3.6.3). The portion of the quoted string 337 * from the initial backquote and the 338 * characters up to the next backquote that 339 * is not preceded by a backslash (having 340 * escape characters removed) defines that 341 * command whose output replaces `...` when 342 * the word is expanded." 343 * Section 3.6.3, Command Substitution: 344 * "Within the backquoted style of command 345 * substitution, backslash shall retain its 346 * literal meaning, except when followed by 347 * $ ` \."). 348 */ 349 statep->ls_sbquote.indquotes = 0; 350 if (!Flag(FPOSIX)) { 351 Lex_state *s = statep; 352 Lex_state *base = state_info.base; 353 while (1) { 354 for (; s != base; s--) { 355 if (s->ls_state == SDQUOTE) { 356 statep->ls_sbquote.indquotes = 1; 357 break; 358 } 359 } 360 if (s != base) 361 break; 362 if (!(s = s->ls_info.base)) 363 break; 364 base = s-- - STATE_BSIZE; 365 } 366 } 367 break; 368 default: 369 *wp++ = CHAR, *wp++ = c; 370 } 371 break; 372 373 case SSQUOTE: 374 if (c == '\'') { 375 POP_STATE(); 376 *wp++ = CQUOTE; 377 ignore_backslash_newline--; 378 } else 379 *wp++ = QCHAR, *wp++ = c; 380 break; 381 382 case SDQUOTE: 383 if (c == '"') { 384 POP_STATE(); 385 *wp++ = CQUOTE; 386 } else 387 goto Subst; 388 break; 389 390 case SCSPAREN: /* $( .. ) */ 391 /* todo: deal with $(...) quoting properly 392 * kludge to partly fake quoting inside $(..): doesn't 393 * really work because nested $(..) or ${..} inside 394 * double quotes aren't dealt with. 395 */ 396 switch (statep->ls_scsparen.csstate) { 397 case 0: /* normal */ 398 switch (c) { 399 case '(': 400 statep->ls_scsparen.nparen++; 401 break; 402 case ')': 403 statep->ls_scsparen.nparen--; 404 break; 405 case '\\': 406 statep->ls_scsparen.csstate = 1; 407 break; 408 case '"': 409 statep->ls_scsparen.csstate = 2; 410 break; 411 case '\'': 412 statep->ls_scsparen.csstate = 4; 413 ignore_backslash_newline++; 414 break; 415 } 416 break; 417 418 case 1: /* backslash in normal mode */ 419 case 3: /* backslash in double quotes */ 420 --statep->ls_scsparen.csstate; 421 break; 422 423 case 2: /* double quotes */ 424 if (c == '"') 425 statep->ls_scsparen.csstate = 0; 426 else if (c == '\\') 427 statep->ls_scsparen.csstate = 3; 428 break; 429 430 case 4: /* single quotes */ 431 if (c == '\'') { 432 statep->ls_scsparen.csstate = 0; 433 ignore_backslash_newline--; 434 } 435 break; 436 } 437 if (statep->ls_scsparen.nparen == 0) { 438 POP_STATE(); 439 *wp++ = 0; /* end of COMSUB */ 440 } else 441 *wp++ = c; 442 break; 443 444 case SASPAREN: /* $(( .. )) */ 445 /* todo: deal with $((...); (...)) properly */ 446 /* XXX should nest using existing state machine 447 * (embed "..", $(...), etc.) */ 448 if (c == '(') 449 statep->ls_sasparen.nparen++; 450 else if (c == ')') { 451 statep->ls_sasparen.nparen--; 452 if (statep->ls_sasparen.nparen == 1) { 453 /*(*/ 454 if ((c2 = getsc()) == ')') { 455 POP_STATE(); 456 *wp++ = 0; /* end of EXPRSUB */ 457 break; 458 } else { 459 char *s; 460 461 ungetsc(c2); 462 /* mismatched parenthesis - 463 * assume we were really 464 * parsing a $(..) expression 465 */ 466 s = Xrestpos(ws, wp, 467 statep->ls_sasparen.start); 468 memmove(s + 1, s, wp - s); 469 *s++ = COMSUB; 470 *s = '('; /*)*/ 471 wp++; 472 statep->ls_scsparen.nparen = 1; 473 statep->ls_scsparen.csstate = 0; 474 state = statep->ls_state 475 = SCSPAREN; 476 477 } 478 } 479 } 480 *wp++ = c; 481 break; 482 483 case SBRACE: 484 /*{*/ 485 if (c == '}') { 486 POP_STATE(); 487 *wp++ = CSUBST; 488 *wp++ = /*{*/ '}'; 489 } else 490 goto Sbase1; 491 break; 492 493 case STBRACE: 494 /* Same as SBRACE, except (,|,) treated specially */ 495 /*{*/ 496 if (c == '}') { 497 POP_STATE(); 498 *wp++ = CSUBST; 499 *wp++ = /*{*/ '}'; 500 } else if (c == '|') { 501 *wp++ = SPAT; 502 } else if (c == '(') { 503 *wp++ = OPAT; 504 *wp++ = ' '; /* simile for @ */ 505 PUSH_STATE(SPATTERN); 506 } else 507 goto Sbase1; 508 break; 509 510 case SBQUOTE: 511 if (c == '`') { 512 *wp++ = 0; 513 POP_STATE(); 514 } else if (c == '\\') { 515 switch (c = getsc()) { 516 case '\\': 517 case '$': case '`': 518 *wp++ = c; 519 break; 520 case '"': 521 if (statep->ls_sbquote.indquotes) { 522 *wp++ = c; 523 break; 524 } 525 /* fall through.. */ 526 default: 527 if (c) { /* trailing \ is lost */ 528 *wp++ = '\\'; 529 *wp++ = c; 530 } 531 break; 532 } 533 } else 534 *wp++ = c; 535 break; 536 537 case SWORD: /* ONEWORD */ 538 goto Subst; 539 540 #ifdef KSH 541 case SLETPAREN: /* LETEXPR: (( ... )) */ 542 /*(*/ 543 if (c == ')') { 544 if (statep->ls_sletparen.nparen > 0) 545 --statep->ls_sletparen.nparen; 546 /*(*/ 547 else if ((c2 = getsc()) == ')') { 548 c = 0; 549 *wp++ = CQUOTE; 550 goto Done; 551 } else 552 ungetsc(c2); 553 } else if (c == '(') 554 /* parenthesis inside quotes and backslashes 555 * are lost, but at&t ksh doesn't count them 556 * either 557 */ 558 ++statep->ls_sletparen.nparen; 559 goto Sbase2; 560 #endif /* KSH */ 561 562 case SHEREDELIM: /* <<,<<- delimiter */ 563 /* XXX chuck this state (and the next) - use 564 * the existing states ($ and \`..` should be 565 * stripped of their specialness after the 566 * fact). 567 */ 568 /* here delimiters need a special case since 569 * $ and `..` are not to be treated specially 570 */ 571 if (c == '\\') { 572 c = getsc(); 573 if (c) { /* trailing \ is lost */ 574 *wp++ = QCHAR; 575 *wp++ = c; 576 } 577 } else if (c == '\'') { 578 PUSH_STATE(SSQUOTE); 579 *wp++ = OQUOTE; 580 ignore_backslash_newline++; 581 } else if (c == '"') { 582 state = statep->ls_state = SHEREDQUOTE; 583 *wp++ = OQUOTE; 584 } else { 585 *wp++ = CHAR; 586 *wp++ = c; 587 } 588 break; 589 590 case SHEREDQUOTE: /* " in <<,<<- delimiter */ 591 if (c == '"') { 592 *wp++ = CQUOTE; 593 state = statep->ls_state = SHEREDELIM; 594 } else { 595 if (c == '\\') { 596 switch (c = getsc()) { 597 case '\\': case '"': 598 case '$': case '`': 599 break; 600 default: 601 if (c) { /* trailing \ lost */ 602 *wp++ = CHAR; 603 *wp++ = '\\'; 604 } 605 break; 606 } 607 } 608 *wp++ = CHAR; 609 *wp++ = c; 610 } 611 break; 612 613 case SPATTERN: /* in *(...|...) pattern (*+?@!) */ 614 if ( /*(*/ c == ')') { 615 *wp++ = CPAT; 616 POP_STATE(); 617 } else if (c == '|') { 618 *wp++ = SPAT; 619 } else if (c == '(') { 620 *wp++ = OPAT; 621 *wp++ = ' '; /* simile for @ */ 622 PUSH_STATE(SPATTERN); 623 } else 624 goto Sbase1; 625 break; 626 } 627 } 628 Done: 629 Xcheck(ws, wp); 630 if (statep != &states[1]) 631 /* XXX figure out what is missing */ 632 yyerror("no closing quote\n"); 633 634 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */ 635 if (state == SHEREDELIM) 636 state = SBASE; 637 638 dp = Xstring(ws, wp); 639 if ((c == '<' || c == '>') && state == SBASE 640 && ((c2 = Xlength(ws, wp)) == 0 641 || (c2 == 2 && dp[0] == CHAR && digit(dp[1])))) 642 { 643 struct ioword *iop = 644 (struct ioword *) alloc(sizeof(*iop), ATEMP); 645 646 if (c2 == 2) 647 iop->unit = dp[1] - '0'; 648 else 649 iop->unit = c == '>'; /* 0 for <, 1 for > */ 650 651 c2 = getsc(); 652 /* <<, >>, <> are ok, >< is not */ 653 if (c == c2 || (c == '<' && c2 == '>')) { 654 iop->flag = c == c2 ? 655 (c == '>' ? IOCAT : IOHERE) : IORDWR; 656 if (iop->flag == IOHERE) { 657 if ((c2 = getsc()) == '-') { 658 iop->flag |= IOSKIP; 659 } else { 660 ungetsc(c2); 661 } 662 } 663 } else if (c2 == '&') 664 iop->flag = IODUP | (c == '<' ? IORDUP : 0); 665 else { 666 iop->flag = c == '>' ? IOWRITE : IOREAD; 667 if (c == '>' && c2 == '|') 668 iop->flag |= IOCLOB; 669 else 670 ungetsc(c2); 671 } 672 673 iop->name = (char *) 0; 674 iop->delim = (char *) 0; 675 iop->heredoc = (char *) 0; 676 Xfree(ws, wp); /* free word */ 677 yylval.iop = iop; 678 return REDIR; 679 } 680 681 if (wp == dp && state == SBASE) { 682 Xfree(ws, wp); /* free word */ 683 /* no word, process LEX1 character */ 684 switch (c) { 685 default: 686 return c; 687 688 case '|': 689 case '&': 690 case ';': 691 if ((c2 = getsc()) == c) 692 c = (c == ';') ? BREAK : 693 (c == '|') ? LOGOR : 694 (c == '&') ? LOGAND : 695 YYERRCODE; 696 #ifdef KSH 697 else if (c == '|' && c2 == '&') 698 c = COPROC; 699 #endif /* KSH */ 700 else 701 ungetsc(c2); 702 return c; 703 704 case '\n': 705 gethere(); 706 if (cf & CONTIN) 707 goto Again; 708 return c; 709 710 case '(': /*)*/ 711 #ifdef KSH 712 if ((c2 = getsc()) == '(') /*)*/ 713 /* XXX need to handle ((...); (...)) */ 714 c = MDPAREN; 715 else 716 ungetsc(c2); 717 #endif /* KSH */ 718 return c; 719 /*(*/ 720 case ')': 721 return c; 722 } 723 } 724 725 *wp++ = EOS; /* terminate word */ 726 yylval.cp = Xclose(ws, wp); 727 if (state == SWORD 728 #ifdef KSH 729 || state == SLETPAREN 730 #endif /* KSH */ 731 ) /* ONEWORD? */ 732 return LWORD; 733 ungetsc(c); /* unget terminator */ 734 735 /* copy word to unprefixed string ident */ 736 for (sp = yylval.cp, dp = ident; dp < ident+IDENT && (c = *sp++) == CHAR; ) 737 *dp++ = *sp++; 738 /* Make sure the ident array stays '\0' padded */ 739 memset(dp, 0, (ident+IDENT) - dp + 1); 740 if (c != EOS) 741 *ident = '\0'; /* word is not unquoted */ 742 743 if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) { 744 struct tbl *p; 745 int h = hash(ident); 746 747 /* { */ 748 if ((cf & KEYWORD) && (p = tsearch(&keywords, ident, h)) 749 && (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}')) 750 { 751 afree(yylval.cp, ATEMP); 752 return p->val.i; 753 } 754 if ((cf & ALIAS) && (p = tsearch(&aliases, ident, h)) 755 && (p->flag & ISSET)) 756 { 757 register Source *s; 758 759 for (s = source; s->type == SALIAS; s = s->next) 760 if (s->u.tblp == p) 761 return LWORD; 762 /* push alias expansion */ 763 s = pushs(SALIAS, source->areap); 764 s->start = s->str = p->val.s; 765 s->u.tblp = p; 766 s->next = source; 767 source = s; 768 afree(yylval.cp, ATEMP); 769 goto Again; 770 } 771 } 772 773 return LWORD; 774 } 775 776 static void 777 gethere() 778 { 779 register struct ioword **p; 780 781 for (p = heres; p < herep; p++) 782 readhere(*p); 783 herep = heres; 784 } 785 786 /* 787 * read "<<word" text into temp file 788 */ 789 790 static void 791 readhere(iop) 792 struct ioword *iop; 793 { 794 register int c; 795 char *volatile eof; 796 char *eofp; 797 int skiptabs; 798 XString xs; 799 char *xp; 800 int xpos; 801 802 eof = evalstr(iop->delim, 0); 803 804 if (!(iop->flag & IOEVAL)) 805 ignore_backslash_newline++; 806 807 Xinit(xs, xp, 256, ATEMP); 808 809 for (;;) { 810 eofp = eof; 811 skiptabs = iop->flag & IOSKIP; 812 xpos = Xsavepos(xs, xp); 813 while ((c = getsc()) != 0) { 814 if (skiptabs) { 815 if (c == '\t') 816 continue; 817 skiptabs = 0; 818 } 819 if (c != *eofp) 820 break; 821 Xcheck(xs, xp); 822 Xput(xs, xp, c); 823 eofp++; 824 } 825 /* Allow EOF here so commands with out trailing newlines 826 * will work (eg, ksh -c '...', $(...), etc). 827 */ 828 if (*eofp == '\0' && (c == 0 || c == '\n')) { 829 xp = Xrestpos(xs, xp, xpos); 830 break; 831 } 832 ungetsc(c); 833 while ((c = getsc()) != '\n') { 834 if (c == 0) 835 yyerror("here document `%s' unclosed\n", eof); 836 Xcheck(xs, xp); 837 Xput(xs, xp, c); 838 } 839 Xcheck(xs, xp); 840 Xput(xs, xp, c); 841 } 842 Xput(xs, xp, '\0'); 843 iop->heredoc = Xclose(xs, xp); 844 845 if (!(iop->flag & IOEVAL)) 846 ignore_backslash_newline--; 847 } 848 849 void 850 #ifdef HAVE_PROTOTYPES 851 yyerror(const char *fmt, ...) 852 #else 853 yyerror(fmt, va_alist) 854 const char *fmt; 855 va_dcl 856 #endif 857 { 858 va_list va; 859 860 /* pop aliases and re-reads */ 861 while (source->type == SALIAS || source->type == SREREAD) 862 source = source->next; 863 source->str = null; /* zap pending input */ 864 865 error_prefix(TRUE); 866 SH_VA_START(va, fmt); 867 shf_vfprintf(shl_out, fmt, va); 868 va_end(va); 869 errorf(null); 870 } 871 872 /* 873 * input for yylex with alias expansion 874 */ 875 876 Source * 877 pushs(type, areap) 878 int type; 879 Area *areap; 880 { 881 register Source *s; 882 883 s = (Source *) alloc(sizeof(Source), areap); 884 s->type = type; 885 s->str = null; 886 s->start = NULL; 887 s->line = 0; 888 s->errline = 0; 889 s->file = NULL; 890 s->flags = 0; 891 s->next = NULL; 892 s->areap = areap; 893 if (type == SFILE || type == SSTDIN) { 894 char *dummy; 895 Xinit(s->xs, dummy, 256, s->areap); 896 } else 897 memset(&s->xs, 0, sizeof(s->xs)); 898 return s; 899 } 900 901 static int 902 getsc__() 903 { 904 register Source *s = source; 905 register int c; 906 907 while ((c = *s->str++) == 0) { 908 s->str = NULL; /* return 0 for EOF by default */ 909 switch (s->type) { 910 case SEOF: 911 s->str = null; 912 return 0; 913 914 case SSTDIN: 915 case SFILE: 916 getsc_line(s); 917 break; 918 919 case SWSTR: 920 break; 921 922 case SSTRING: 923 break; 924 925 case SWORDS: 926 s->start = s->str = *s->u.strv++; 927 s->type = SWORDSEP; 928 break; 929 930 case SWORDSEP: 931 if (*s->u.strv == NULL) { 932 s->start = s->str = newline; 933 s->type = SEOF; 934 } else { 935 s->start = s->str = space; 936 s->type = SWORDS; 937 } 938 break; 939 940 case SALIAS: 941 if (s->flags & SF_ALIASEND) { 942 /* pass on an unused SF_ALIAS flag */ 943 source = s->next; 944 source->flags |= s->flags & SF_ALIAS; 945 s = source; 946 } else if (*s->u.tblp->val.s 947 && isspace((unsigned char)strchr(s->u.tblp->val.s, 0)[-1])) 948 { 949 source = s = s->next; /* pop source stack */ 950 /* Note that this alias ended with a space, 951 * enabling alias expansion on the following 952 * word. 953 */ 954 s->flags |= SF_ALIAS; 955 } else { 956 /* At this point, we need to keep the current 957 * alias in the source list so recursive 958 * aliases can be detected and we also need 959 * to return the next character. Do this 960 * by temporarily popping the alias to get 961 * the next character and then put it back 962 * in the source list with the SF_ALIASEND 963 * flag set. 964 */ 965 source = s->next; /* pop source stack */ 966 source->flags |= s->flags & SF_ALIAS; 967 c = getsc__(); 968 if (c) { 969 s->flags |= SF_ALIASEND; 970 s->ugbuf[0] = c; s->ugbuf[1] = '\0'; 971 s->start = s->str = s->ugbuf; 972 s->next = source; 973 source = s; 974 } else { 975 s = source; 976 /* avoid reading eof twice */ 977 s->str = NULL; 978 break; 979 } 980 } 981 continue; 982 983 case SREREAD: 984 if (s->start != s->ugbuf) /* yuck */ 985 afree(s->u.freeme, ATEMP); 986 source = s = s->next; 987 continue; 988 } 989 if (s->str == NULL) { 990 s->type = SEOF; 991 s->start = s->str = null; 992 return '\0'; 993 } 994 if (s->flags & SF_ECHO) { 995 shf_puts(s->str, shl_out); 996 shf_flush(shl_out); 997 } 998 } 999 return c; 1000 } 1001 1002 static void 1003 getsc_line(s) 1004 Source *s; 1005 { 1006 char *xp = Xstring(s->xs, xp); 1007 int interactive = Flag(FTALKING) && s->type == SSTDIN; 1008 int have_tty = interactive && (s->flags & SF_TTY); 1009 1010 /* Done here to ensure nothing odd happens when a timeout occurs */ 1011 XcheckN(s->xs, xp, LINE); 1012 *xp = '\0'; 1013 s->start = s->str = xp; 1014 1015 #ifdef KSH 1016 if (have_tty && ksh_tmout) { 1017 ksh_tmout_state = TMOUT_READING; 1018 alarm(ksh_tmout); 1019 } 1020 #endif /* KSH */ 1021 #ifdef EDIT 1022 if (have_tty && (0 1023 # ifdef VI 1024 || Flag(FVI) 1025 # endif /* VI */ 1026 # ifdef EMACS 1027 || Flag(FEMACS) || Flag(FGMACS) 1028 # endif /* EMACS */ 1029 )) 1030 { 1031 int nread; 1032 1033 nread = x_read(xp, LINE); 1034 if (nread < 0) /* read error */ 1035 nread = 0; 1036 xp[nread] = '\0'; 1037 xp += nread; 1038 } 1039 else 1040 #endif /* EDIT */ 1041 { 1042 if (interactive) { 1043 pprompt(prompt, 0); 1044 } else 1045 s->line++; 1046 1047 while (1) { 1048 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf); 1049 1050 if (!p && shf_error(s->u.shf) 1051 && shf_errno(s->u.shf) == EINTR) 1052 { 1053 shf_clearerr(s->u.shf); 1054 if (trap) 1055 runtraps(0); 1056 continue; 1057 } 1058 if (!p || (xp = p, xp[-1] == '\n')) 1059 break; 1060 /* double buffer size */ 1061 xp++; /* move past null so doubling works... */ 1062 XcheckN(s->xs, xp, Xlength(s->xs, xp)); 1063 xp--; /* ...and move back again */ 1064 } 1065 /* flush any unwanted input so other programs/builtins 1066 * can read it. Not very optimal, but less error prone 1067 * than flushing else where, dealing with redirections, 1068 * etc.. 1069 * todo: reduce size of shf buffer (~128?) if SSTDIN 1070 */ 1071 if (s->type == SSTDIN) 1072 shf_flush(s->u.shf); 1073 } 1074 /* XXX: temporary kludge to restore source after a 1075 * trap may have been executed. 1076 */ 1077 source = s; 1078 #ifdef KSH 1079 if (have_tty && ksh_tmout) 1080 { 1081 ksh_tmout_state = TMOUT_EXECUTING; 1082 alarm(0); 1083 } 1084 #endif /* KSH */ 1085 s->start = s->str = Xstring(s->xs, xp); 1086 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp)); 1087 /* Note: if input is all nulls, this is not eof */ 1088 if (Xlength(s->xs, xp) == 0) { /* EOF */ 1089 if (s->type == SFILE) 1090 shf_fdclose(s->u.shf); 1091 s->str = NULL; 1092 } else if (interactive) { 1093 #ifdef HISTORY 1094 char *p = Xstring(s->xs, xp); 1095 if (cur_prompt == PS1) 1096 while (*p && ctype(*p, C_IFS) && ctype(*p, C_IFSWS)) 1097 p++; 1098 if (*p) { 1099 # ifdef EASY_HISTORY 1100 if (cur_prompt == PS2) 1101 histappend(Xstring(s->xs, xp), 1); 1102 else 1103 # endif /* EASY_HISTORY */ 1104 { 1105 s->line++; 1106 histsave(s->line, s->str, 1); 1107 } 1108 } 1109 #endif /* HISTORY */ 1110 } 1111 if (interactive) 1112 set_prompt(PS2, (Source *) 0); 1113 } 1114 1115 void 1116 set_prompt(to, s) 1117 int to; 1118 Source *s; 1119 { 1120 cur_prompt = to; 1121 1122 switch (to) { 1123 case PS1: /* command */ 1124 #ifdef KSH 1125 /* Substitute ! and !! here, before substitutions are done 1126 * so ! in expanded variables are not expanded. 1127 * NOTE: this is not what at&t ksh does (it does it after 1128 * substitutions, POSIX doesn't say which is to be done. 1129 */ 1130 { 1131 struct shf *shf; 1132 char * volatile ps1; 1133 Area *saved_atemp; 1134 1135 ps1 = str_val(global("PS1")); 1136 shf = shf_sopen((char *) 0, strlen(ps1) * 2, 1137 SHF_WR | SHF_DYNAMIC, (struct shf *) 0); 1138 while (*ps1) { 1139 if (*ps1 != '!' || *++ps1 == '!') 1140 shf_putchar(*ps1++, shf); 1141 else 1142 shf_fprintf(shf, "%d", 1143 s ? s->line + 1 : 0); 1144 } 1145 ps1 = shf_sclose(shf); 1146 saved_atemp = ATEMP; 1147 newenv(E_ERRH); 1148 if (ksh_sigsetjmp(e->jbuf, 0)) { 1149 prompt = safe_prompt; 1150 /* Don't print an error - assume it has already 1151 * been printed. Reason is we may have forked 1152 * to run a command and the child may be 1153 * unwinding its stack through this code as it 1154 * exits. 1155 */ 1156 } else 1157 prompt = str_save(substitute(ps1, 0), 1158 saved_atemp); 1159 quitenv(); 1160 } 1161 #else /* KSH */ 1162 prompt = str_val(global("PS1")); 1163 #endif /* KSH */ 1164 break; 1165 1166 case PS2: /* command continuation */ 1167 prompt = str_val(global("PS2")); 1168 break; 1169 } 1170 } 1171 1172 /* See also related routine, promptlen() in edit.c */ 1173 void 1174 pprompt(cp, ntruncate) 1175 const char *cp; 1176 int ntruncate; 1177 { 1178 #if 0 1179 char nbuf[32]; 1180 int c; 1181 1182 while (*cp != 0) { 1183 if (*cp != '!') 1184 c = *cp++; 1185 else if (*++cp == '!') 1186 c = *cp++; 1187 else { 1188 int len; 1189 char *p; 1190 1191 shf_snprintf(p = nbuf, sizeof(nbuf), "%d", 1192 source->line + 1); 1193 len = strlen(nbuf); 1194 if (ntruncate) { 1195 if (ntruncate >= len) { 1196 ntruncate -= len; 1197 continue; 1198 } 1199 p += ntruncate; 1200 len -= ntruncate; 1201 ntruncate = 0; 1202 } 1203 shf_write(p, len, shl_out); 1204 continue; 1205 } 1206 if (ntruncate) 1207 --ntruncate; 1208 else 1209 shf_putc(c, shl_out); 1210 } 1211 #endif /* 0 */ 1212 shf_puts(cp + ntruncate, shl_out); 1213 shf_flush(shl_out); 1214 } 1215 1216 /* Read the variable part of a ${...} expression (ie, up to but not including 1217 * the :[-+?=#%] or close-brace. 1218 */ 1219 static char * 1220 get_brace_var(wsp, wp) 1221 XString *wsp; 1222 char *wp; 1223 { 1224 enum parse_state { 1225 PS_INITIAL, PS_SAW_HASH, PS_IDENT, 1226 PS_NUMBER, PS_VAR1, PS_END 1227 } 1228 state; 1229 char c; 1230 1231 state = PS_INITIAL; 1232 while (1) { 1233 c = getsc(); 1234 /* State machine to figure out where the variable part ends. */ 1235 switch (state) { 1236 case PS_INITIAL: 1237 if (c == '#') { 1238 state = PS_SAW_HASH; 1239 break; 1240 } 1241 /* fall through.. */ 1242 case PS_SAW_HASH: 1243 if (letter(c)) 1244 state = PS_IDENT; 1245 else if (digit(c)) 1246 state = PS_NUMBER; 1247 else if (ctype(c, C_VAR1)) 1248 state = PS_VAR1; 1249 else 1250 state = PS_END; 1251 break; 1252 case PS_IDENT: 1253 if (!letnum(c)) { 1254 state = PS_END; 1255 if (c == '[') { 1256 char *tmp, *p; 1257 1258 if (!arraysub(&tmp)) 1259 yyerror("missing ]\n"); 1260 *wp++ = c; 1261 for (p = tmp; *p; ) { 1262 Xcheck(*wsp, wp); 1263 *wp++ = *p++; 1264 } 1265 afree(tmp, ATEMP); 1266 c = getsc(); /* the ] */ 1267 } 1268 } 1269 break; 1270 case PS_NUMBER: 1271 if (!digit(c)) 1272 state = PS_END; 1273 break; 1274 case PS_VAR1: 1275 state = PS_END; 1276 break; 1277 case PS_END: /* keep gcc happy */ 1278 break; 1279 } 1280 if (state == PS_END) { 1281 *wp++ = '\0'; /* end of variable part */ 1282 ungetsc(c); 1283 break; 1284 } 1285 Xcheck(*wsp, wp); 1286 *wp++ = c; 1287 } 1288 return wp; 1289 } 1290 1291 /* 1292 * Save an array subscript - returns true if matching bracket found, false 1293 * if eof or newline was found. 1294 * (Returned string double null terminated) 1295 */ 1296 static int 1297 arraysub(strp) 1298 char **strp; 1299 { 1300 XString ws; 1301 char *wp; 1302 char c; 1303 int depth = 1; /* we are just past the initial [ */ 1304 1305 Xinit(ws, wp, 32, ATEMP); 1306 1307 do { 1308 c = getsc(); 1309 Xcheck(ws, wp); 1310 *wp++ = c; 1311 if (c == '[') 1312 depth++; 1313 else if (c == ']') 1314 depth--; 1315 } while (depth > 0 && c && c != '\n'); 1316 1317 *wp++ = '\0'; 1318 *strp = Xclose(ws, wp); 1319 1320 return depth == 0 ? 1 : 0; 1321 } 1322 1323 /* Unget a char: handles case when we are already at the start of the buffer */ 1324 static const char * 1325 ungetsc(c) 1326 int c; 1327 { 1328 if (backslash_skip) 1329 backslash_skip--; 1330 /* Don't unget eof... */ 1331 if (source->str == null && c == '\0') 1332 return source->str; 1333 if (source->str > source->start) 1334 source->str--; 1335 else { 1336 Source *s; 1337 1338 s = pushs(SREREAD, source->areap); 1339 s->ugbuf[0] = c; s->ugbuf[1] = '\0'; 1340 s->start = s->str = s->ugbuf; 1341 s->next = source; 1342 source = s; 1343 } 1344 return source->str; 1345 } 1346 1347 1348 /* Called to get a char that isn't a \newline sequence. */ 1349 static int 1350 getsc_bn ARGS((void)) 1351 { 1352 int c, c2; 1353 1354 if (ignore_backslash_newline) 1355 return getsc_(); 1356 1357 if (backslash_skip == 1) { 1358 backslash_skip = 2; 1359 return getsc_(); 1360 } 1361 1362 backslash_skip = 0; 1363 1364 while (1) { 1365 c = getsc_(); 1366 if (c == '\\') { 1367 if ((c2 = getsc_()) == '\n') 1368 /* ignore the \newline; get the next char... */ 1369 continue; 1370 ungetsc(c2); 1371 backslash_skip = 1; 1372 } 1373 return c; 1374 } 1375 } 1376 1377 static Lex_state * 1378 push_state_(si, old_end) 1379 State_info *si; 1380 Lex_state *old_end; 1381 { 1382 Lex_state *new = alloc(sizeof(Lex_state) * STATE_BSIZE, ATEMP); 1383 1384 new[0].ls_info.base = old_end; 1385 si->base = &new[0]; 1386 si->end = &new[STATE_BSIZE]; 1387 return &new[1]; 1388 } 1389 1390 static Lex_state * 1391 pop_state_(si, old_end) 1392 State_info *si; 1393 Lex_state *old_end; 1394 { 1395 Lex_state *old_base = si->base; 1396 1397 si->base = old_end->ls_info.base - STATE_BSIZE; 1398 si->end = old_end->ls_info.base; 1399 1400 afree(old_base, ATEMP); 1401 1402 return si->base + STATE_BSIZE - 1; 1403 } 1404