1 /* $NetBSD: lex.c,v 1.12 2005/09/11 22:16:00 christos Exp $ */ 2 3 /* 4 * lexical analysis and source input 5 */ 6 #include <sys/cdefs.h> 7 8 #ifndef lint 9 __RCSID("$NetBSD: lex.c,v 1.12 2005/09/11 22:16:00 christos Exp $"); 10 #endif 11 12 13 #include "sh.h" 14 #include <ctype.h> 15 16 17 /* Structure to keep track of the lexing state and the various pieces of info 18 * needed for each particular state. 19 */ 20 typedef struct lex_state Lex_state; 21 struct lex_state { 22 int ls_state; 23 union { 24 /* $(...) */ 25 struct scsparen_info { 26 int nparen; /* count open parenthesis */ 27 int csstate; /* XXX remove */ 28 #define ls_scsparen ls_info.u_scsparen 29 } u_scsparen; 30 31 /* $((...)) */ 32 struct sasparen_info { 33 int nparen; /* count open parenthesis */ 34 int start; /* marks start of $(( in output str */ 35 #define ls_sasparen ls_info.u_sasparen 36 } u_sasparen; 37 38 /* ((...)) */ 39 struct sletparen_info { 40 int nparen; /* count open parenthesis */ 41 #define ls_sletparen ls_info.u_sletparen 42 } u_sletparen; 43 44 /* `...` */ 45 struct sbquote_info { 46 int indquotes; /* true if in double quotes: "`...`" */ 47 #define ls_sbquote ls_info.u_sbquote 48 } u_sbquote; 49 50 Lex_state *base; /* used to point to next state block */ 51 } ls_info; 52 }; 53 54 typedef struct State_info State_info; 55 struct State_info { 56 Lex_state *base; 57 Lex_state *end; 58 }; 59 60 61 static void readhere ARGS((struct ioword *iop)); 62 static int getsc__ ARGS((void)); 63 static void getsc_line ARGS((Source *s)); 64 static int getsc_bn ARGS((void)); 65 static char *get_brace_var ARGS((XString *wsp, char *wp)); 66 static int arraysub ARGS((char **strp)); 67 static const char *ungetsc ARGS((int c)); 68 static void gethere ARGS((void)); 69 static Lex_state *push_state_ ARGS((State_info *si, Lex_state *old_end)); 70 static Lex_state *pop_state_ ARGS((State_info *si, Lex_state *old_end)); 71 72 static int backslash_skip; 73 static int ignore_backslash_newline; 74 75 /* optimized getsc_bn() */ 76 #define getsc() (*source->str != '\0' && *source->str != '\\' \ 77 && !backslash_skip ? *source->str++ : getsc_bn()) 78 /* optimized getsc__() */ 79 #define getsc_() ((*source->str != '\0') ? *source->str++ : getsc__()) 80 81 #define STATE_BSIZE 32 82 83 #define PUSH_STATE(s) do { \ 84 if (++statep == state_info.end) \ 85 statep = push_state_(&state_info, statep); \ 86 state = statep->ls_state = (s); \ 87 } while (0) 88 89 #define POP_STATE() do { \ 90 if (--statep == state_info.base) \ 91 statep = pop_state_(&state_info, statep); \ 92 state = statep->ls_state; \ 93 } while (0) 94 95 96 97 /* 98 * Lexical analyzer 99 * 100 * tokens are not regular expressions, they are LL(1). 101 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))". 102 * hence the state stack. 103 */ 104 105 int 106 yylex(cf) 107 int cf; 108 { 109 Lex_state states[STATE_BSIZE], *statep; 110 State_info state_info; 111 register int c, state; 112 XString ws; /* expandable output word */ 113 register char *wp; /* output word pointer */ 114 char *sp, *dp; 115 int c2; 116 117 118 Again: 119 states[0].ls_state = -1; 120 states[0].ls_info.base = (Lex_state *) 0; 121 statep = &states[1]; 122 state_info.base = states; 123 state_info.end = &states[STATE_BSIZE]; 124 125 Xinit(ws, wp, 64, ATEMP); 126 127 backslash_skip = 0; 128 ignore_backslash_newline = 0; 129 130 if (cf&ONEWORD) 131 state = SWORD; 132 #ifdef KSH 133 else if (cf&LETEXPR) { 134 *wp++ = OQUOTE; /* enclose arguments in (double) quotes */ 135 state = SLETPAREN; 136 statep->ls_sletparen.nparen = 0; 137 } 138 #endif /* KSH */ 139 else { /* normal lexing */ 140 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE; 141 while ((c = getsc()) == ' ' || c == '\t') 142 ; 143 if (c == '#') { 144 ignore_backslash_newline++; 145 while ((c = getsc()) != '\0' && c != '\n') 146 ; 147 ignore_backslash_newline--; 148 } 149 ungetsc(c); 150 } 151 if (source->flags & SF_ALIAS) { /* trailing ' ' in alias definition */ 152 source->flags &= ~SF_ALIAS; 153 /* In POSIX mode, a trailing space only counts if we are 154 * parsing a simple command 155 */ 156 if (!Flag(FPOSIX) || (cf & CMDWORD)) 157 cf |= ALIAS; 158 } 159 160 /* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */ 161 statep->ls_state = state; 162 163 /* collect non-special or quoted characters to form word */ 164 while (!((c = getsc()) == 0 165 || ((state == SBASE || state == SHEREDELIM) 166 && ctype(c, C_LEX1)))) 167 { 168 Xcheck(ws, wp); 169 switch (state) { 170 case SBASE: 171 if (c == '[' && (cf & (VARASN|ARRAYVAR))) { 172 *wp = EOS; /* temporary */ 173 if (is_wdvarname(Xstring(ws, wp), FALSE)) 174 { 175 char *p, *tmp; 176 177 if (arraysub(&tmp)) { 178 *wp++ = CHAR; 179 *wp++ = c; 180 for (p = tmp; *p; ) { 181 Xcheck(ws, wp); 182 *wp++ = CHAR; 183 *wp++ = *p++; 184 } 185 afree(tmp, ATEMP); 186 break; 187 } else { 188 Source *s; 189 190 s = pushs(SREREAD, 191 source->areap); 192 s->start = s->str 193 = s->u.freeme = tmp; 194 s->next = source; 195 source = s; 196 } 197 } 198 *wp++ = CHAR; 199 *wp++ = c; 200 break; 201 } 202 /* fall through.. */ 203 Sbase1: /* includes *(...|...) pattern (*+?@!) */ 204 #ifdef KSH 205 if (c == '*' || c == '@' || c == '+' || c == '?' 206 || c == '!') 207 { 208 c2 = getsc(); 209 if (c2 == '(' /*)*/ ) { 210 *wp++ = OPAT; 211 *wp++ = c; 212 PUSH_STATE(SPATTERN); 213 break; 214 } 215 ungetsc(c2); 216 } 217 #endif /* KSH */ 218 /* fall through.. */ 219 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */ 220 switch (c) { 221 case '\\': 222 c = getsc(); 223 #ifdef OS2 224 if (isalnum((unsigned char)c)) { 225 *wp++ = CHAR, *wp++ = '\\'; 226 *wp++ = CHAR, *wp++ = c; 227 } else 228 #endif 229 if (c) /* trailing \ is lost */ 230 *wp++ = QCHAR, *wp++ = c; 231 break; 232 case '\'': 233 *wp++ = OQUOTE; 234 ignore_backslash_newline++; 235 PUSH_STATE(SSQUOTE); 236 break; 237 case '"': 238 *wp++ = OQUOTE; 239 PUSH_STATE(SDQUOTE); 240 break; 241 default: 242 goto Subst; 243 } 244 break; 245 246 Subst: 247 switch (c) { 248 case '\\': 249 c = getsc(); 250 switch (c) { 251 case '\\': 252 case '$': case '`': 253 *wp++ = QCHAR, *wp++ = c; 254 break; 255 case '"': 256 if ((cf & HEREDOC) == 0) { 257 *wp++ = QCHAR, *wp++ = c; 258 break; 259 } 260 /* FALLTROUGH */ 261 default: 262 Xcheck(ws, wp); 263 if (c) { /* trailing \ is lost */ 264 *wp++ = CHAR, *wp++ = '\\'; 265 *wp++ = CHAR, *wp++ = c; 266 } 267 break; 268 } 269 break; 270 case '$': 271 c = getsc(); 272 if (c == '(') /*)*/ { 273 c = getsc(); 274 if (c == '(') /*)*/ { 275 PUSH_STATE(SASPAREN); 276 statep->ls_sasparen.nparen = 2; 277 statep->ls_sasparen.start = 278 Xsavepos(ws, wp); 279 *wp++ = EXPRSUB; 280 } else { 281 ungetsc(c); 282 PUSH_STATE(SCSPAREN); 283 statep->ls_scsparen.nparen = 1; 284 statep->ls_scsparen.csstate = 0; 285 *wp++ = COMSUB; 286 } 287 } else if (c == '{') /*}*/ { 288 *wp++ = OSUBST; 289 *wp++ = '{'; /*}*/ 290 wp = get_brace_var(&ws, wp); 291 c = getsc(); 292 /* allow :# and :% (ksh88 compat) */ 293 if (c == ':') { 294 *wp++ = CHAR, *wp++ = c; 295 c = getsc(); 296 } 297 /* If this is a trim operation, 298 * treat (,|,) specially in STBRACE. 299 */ 300 if (c == '#' || c == '%') { 301 ungetsc(c); 302 PUSH_STATE(STBRACE); 303 } else { 304 ungetsc(c); 305 PUSH_STATE(SBRACE); 306 } 307 } else if (ctype(c, C_ALPHA)) { 308 *wp++ = OSUBST; 309 *wp++ = 'X'; 310 do { 311 Xcheck(ws, wp); 312 *wp++ = c; 313 c = getsc(); 314 } while (ctype(c, C_ALPHA|C_DIGIT)); 315 *wp++ = '\0'; 316 *wp++ = CSUBST; 317 *wp++ = 'X'; 318 ungetsc(c); 319 } else if (ctype(c, C_DIGIT|C_VAR1)) { 320 Xcheck(ws, wp); 321 *wp++ = OSUBST; 322 *wp++ = 'X'; 323 *wp++ = c; 324 *wp++ = '\0'; 325 *wp++ = CSUBST; 326 *wp++ = 'X'; 327 } else { 328 *wp++ = CHAR, *wp++ = '$'; 329 ungetsc(c); 330 } 331 break; 332 case '`': 333 PUSH_STATE(SBQUOTE); 334 *wp++ = COMSUB; 335 /* Need to know if we are inside double quotes 336 * since sh/at&t-ksh translate the \" to " in 337 * "`..\"..`". 338 * This is not done in posix mode (section 339 * 3.2.3, Double Quotes: "The backquote shall 340 * retain its special meaning introducing the 341 * other form of command substitution (see 342 * 3.6.3). The portion of the quoted string 343 * from the initial backquote and the 344 * characters up to the next backquote that 345 * is not preceded by a backslash (having 346 * escape characters removed) defines that 347 * command whose output replaces `...` when 348 * the word is expanded." 349 * Section 3.6.3, Command Substitution: 350 * "Within the backquoted style of command 351 * substitution, backslash shall retain its 352 * literal meaning, except when followed by 353 * $ ` \."). 354 */ 355 statep->ls_sbquote.indquotes = 0; 356 if (!Flag(FPOSIX)) { 357 Lex_state *s = statep; 358 Lex_state *base = state_info.base; 359 while (1) { 360 for (; s != base; s--) { 361 if (s->ls_state == SDQUOTE) { 362 statep->ls_sbquote.indquotes = 1; 363 break; 364 } 365 } 366 if (s != base) 367 break; 368 if (!(s = s->ls_info.base)) 369 break; 370 base = s-- - STATE_BSIZE; 371 } 372 } 373 break; 374 default: 375 *wp++ = CHAR, *wp++ = c; 376 } 377 break; 378 379 case SSQUOTE: 380 if (c == '\'') { 381 POP_STATE(); 382 *wp++ = CQUOTE; 383 ignore_backslash_newline--; 384 } else 385 *wp++ = QCHAR, *wp++ = c; 386 break; 387 388 case SDQUOTE: 389 if (c == '"') { 390 POP_STATE(); 391 *wp++ = CQUOTE; 392 } else 393 goto Subst; 394 break; 395 396 case SCSPAREN: /* $( .. ) */ 397 /* todo: deal with $(...) quoting properly 398 * kludge to partly fake quoting inside $(..): doesn't 399 * really work because nested $(..) or ${..} inside 400 * double quotes aren't dealt with. 401 */ 402 switch (statep->ls_scsparen.csstate) { 403 case 0: /* normal */ 404 switch (c) { 405 case '(': 406 statep->ls_scsparen.nparen++; 407 break; 408 case ')': 409 statep->ls_scsparen.nparen--; 410 break; 411 case '\\': 412 statep->ls_scsparen.csstate = 1; 413 break; 414 case '"': 415 statep->ls_scsparen.csstate = 2; 416 break; 417 case '\'': 418 statep->ls_scsparen.csstate = 4; 419 ignore_backslash_newline++; 420 break; 421 } 422 break; 423 424 case 1: /* backslash in normal mode */ 425 case 3: /* backslash in double quotes */ 426 --statep->ls_scsparen.csstate; 427 break; 428 429 case 2: /* double quotes */ 430 if (c == '"') 431 statep->ls_scsparen.csstate = 0; 432 else if (c == '\\') 433 statep->ls_scsparen.csstate = 3; 434 break; 435 436 case 4: /* single quotes */ 437 if (c == '\'') { 438 statep->ls_scsparen.csstate = 0; 439 ignore_backslash_newline--; 440 } 441 break; 442 } 443 if (statep->ls_scsparen.nparen == 0) { 444 POP_STATE(); 445 *wp++ = 0; /* end of COMSUB */ 446 } else 447 *wp++ = c; 448 break; 449 450 case SASPAREN: /* $(( .. )) */ 451 /* todo: deal with $((...); (...)) properly */ 452 /* XXX should nest using existing state machine 453 * (embed "..", $(...), etc.) */ 454 if (c == '(') 455 statep->ls_sasparen.nparen++; 456 else if (c == ')') { 457 statep->ls_sasparen.nparen--; 458 if (statep->ls_sasparen.nparen == 1) { 459 /*(*/ 460 if ((c2 = getsc()) == ')') { 461 POP_STATE(); 462 *wp++ = 0; /* end of EXPRSUB */ 463 break; 464 } else { 465 char *s; 466 467 ungetsc(c2); 468 /* mismatched parenthesis - 469 * assume we were really 470 * parsing a $(..) expression 471 */ 472 s = Xrestpos(ws, wp, 473 statep->ls_sasparen.start); 474 memmove(s + 1, s, wp - s); 475 *s++ = COMSUB; 476 *s = '('; /*)*/ 477 wp++; 478 statep->ls_scsparen.nparen = 1; 479 statep->ls_scsparen.csstate = 0; 480 state = statep->ls_state 481 = SCSPAREN; 482 483 } 484 } 485 } 486 *wp++ = c; 487 break; 488 489 case SBRACE: 490 /*{*/ 491 if (c == '}') { 492 POP_STATE(); 493 *wp++ = CSUBST; 494 *wp++ = /*{*/ '}'; 495 } else 496 goto Sbase1; 497 break; 498 499 case STBRACE: 500 /* Same as SBRACE, except (,|,) treated specially */ 501 /*{*/ 502 if (c == '}') { 503 POP_STATE(); 504 *wp++ = CSUBST; 505 *wp++ = /*{*/ '}'; 506 } else if (c == '|') { 507 *wp++ = SPAT; 508 } else if (c == '(') { 509 *wp++ = OPAT; 510 *wp++ = ' '; /* simile for @ */ 511 PUSH_STATE(SPATTERN); 512 } else 513 goto Sbase1; 514 break; 515 516 case SBQUOTE: 517 if (c == '`') { 518 *wp++ = 0; 519 POP_STATE(); 520 } else if (c == '\\') { 521 switch (c = getsc()) { 522 case '\\': 523 case '$': case '`': 524 *wp++ = c; 525 break; 526 case '"': 527 if (statep->ls_sbquote.indquotes) { 528 *wp++ = c; 529 break; 530 } 531 /* fall through.. */ 532 default: 533 if (c) { /* trailing \ is lost */ 534 *wp++ = '\\'; 535 *wp++ = c; 536 } 537 break; 538 } 539 } else 540 *wp++ = c; 541 break; 542 543 case SWORD: /* ONEWORD */ 544 goto Subst; 545 546 #ifdef KSH 547 case SLETPAREN: /* LETEXPR: (( ... )) */ 548 /*(*/ 549 if (c == ')') { 550 if (statep->ls_sletparen.nparen > 0) 551 --statep->ls_sletparen.nparen; 552 /*(*/ 553 else if ((c2 = getsc()) == ')') { 554 c = 0; 555 *wp++ = CQUOTE; 556 goto Done; 557 } else 558 ungetsc(c2); 559 } else if (c == '(') 560 /* parenthesis inside quotes and backslashes 561 * are lost, but at&t ksh doesn't count them 562 * either 563 */ 564 ++statep->ls_sletparen.nparen; 565 goto Sbase2; 566 #endif /* KSH */ 567 568 case SHEREDELIM: /* <<,<<- delimiter */ 569 /* XXX chuck this state (and the next) - use 570 * the existing states ($ and \`..` should be 571 * stripped of their specialness after the 572 * fact). 573 */ 574 /* here delimiters need a special case since 575 * $ and `..` are not to be treated specially 576 */ 577 if (c == '\\') { 578 c = getsc(); 579 if (c) { /* trailing \ is lost */ 580 *wp++ = QCHAR; 581 *wp++ = c; 582 } 583 } else if (c == '\'') { 584 PUSH_STATE(SSQUOTE); 585 *wp++ = OQUOTE; 586 ignore_backslash_newline++; 587 } else if (c == '"') { 588 state = statep->ls_state = SHEREDQUOTE; 589 *wp++ = OQUOTE; 590 } else { 591 *wp++ = CHAR; 592 *wp++ = c; 593 } 594 break; 595 596 case SHEREDQUOTE: /* " in <<,<<- delimiter */ 597 if (c == '"') { 598 *wp++ = CQUOTE; 599 state = statep->ls_state = SHEREDELIM; 600 } else { 601 if (c == '\\') { 602 switch (c = getsc()) { 603 case '\\': case '"': 604 case '$': case '`': 605 break; 606 default: 607 if (c) { /* trailing \ lost */ 608 *wp++ = CHAR; 609 *wp++ = '\\'; 610 } 611 break; 612 } 613 } 614 *wp++ = CHAR; 615 *wp++ = c; 616 } 617 break; 618 619 case SPATTERN: /* in *(...|...) pattern (*+?@!) */ 620 if ( /*(*/ c == ')') { 621 *wp++ = CPAT; 622 POP_STATE(); 623 } else if (c == '|') { 624 *wp++ = SPAT; 625 } else if (c == '(') { 626 *wp++ = OPAT; 627 *wp++ = ' '; /* simile for @ */ 628 PUSH_STATE(SPATTERN); 629 } else 630 goto Sbase1; 631 break; 632 } 633 } 634 Done: 635 Xcheck(ws, wp); 636 if (statep != &states[1]) 637 /* XXX figure out what is missing */ 638 yyerror("no closing quote\n"); 639 640 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */ 641 if (state == SHEREDELIM) 642 state = SBASE; 643 644 dp = Xstring(ws, wp); 645 if ((c == '<' || c == '>') && state == SBASE 646 && ((c2 = Xlength(ws, wp)) == 0 647 || (c2 == 2 && dp[0] == CHAR && digit(dp[1])))) 648 { 649 struct ioword *iop = 650 (struct ioword *) alloc(sizeof(*iop), ATEMP); 651 652 if (c2 == 2) 653 iop->unit = dp[1] - '0'; 654 else 655 iop->unit = c == '>'; /* 0 for <, 1 for > */ 656 657 c2 = getsc(); 658 /* <<, >>, <> are ok, >< is not */ 659 if (c == c2 || (c == '<' && c2 == '>')) { 660 iop->flag = c == c2 ? 661 (c == '>' ? IOCAT : IOHERE) : IORDWR; 662 if (iop->flag == IOHERE) { 663 if ((c2 = getsc()) == '-') { 664 iop->flag |= IOSKIP; 665 } else { 666 ungetsc(c2); 667 } 668 } 669 } else if (c2 == '&') 670 iop->flag = IODUP | (c == '<' ? IORDUP : 0); 671 else { 672 iop->flag = c == '>' ? IOWRITE : IOREAD; 673 if (c == '>' && c2 == '|') 674 iop->flag |= IOCLOB; 675 else 676 ungetsc(c2); 677 } 678 679 iop->name = (char *) 0; 680 iop->delim = (char *) 0; 681 iop->heredoc = (char *) 0; 682 Xfree(ws, wp); /* free word */ 683 yylval.iop = iop; 684 return REDIR; 685 } 686 687 if (wp == dp && state == SBASE) { 688 Xfree(ws, wp); /* free word */ 689 /* no word, process LEX1 character */ 690 switch (c) { 691 default: 692 return c; 693 694 case '|': 695 case '&': 696 case ';': 697 if ((c2 = getsc()) == c) 698 c = (c == ';') ? BREAK : 699 (c == '|') ? LOGOR : 700 (c == '&') ? LOGAND : 701 YYERRCODE; 702 #ifdef KSH 703 else if (c == '|' && c2 == '&') 704 c = COPROC; 705 #endif /* KSH */ 706 else 707 ungetsc(c2); 708 return c; 709 710 case '\n': 711 gethere(); 712 if (cf & CONTIN) 713 goto Again; 714 return c; 715 716 case '(': /*)*/ 717 #ifdef KSH 718 if ((c2 = getsc()) == '(') /*)*/ 719 /* XXX need to handle ((...); (...)) */ 720 c = MDPAREN; 721 else 722 ungetsc(c2); 723 #endif /* KSH */ 724 return c; 725 /*(*/ 726 case ')': 727 return c; 728 } 729 } 730 731 *wp++ = EOS; /* terminate word */ 732 yylval.cp = Xclose(ws, wp); 733 if (state == SWORD 734 #ifdef KSH 735 || state == SLETPAREN 736 #endif /* KSH */ 737 ) /* ONEWORD? */ 738 return LWORD; 739 ungetsc(c); /* unget terminator */ 740 741 /* copy word to unprefixed string ident */ 742 for (sp = yylval.cp, dp = ident; dp < ident+IDENT && (c = *sp++) == CHAR; ) 743 *dp++ = *sp++; 744 /* Make sure the ident array stays '\0' padded */ 745 memset(dp, 0, (ident+IDENT) - dp + 1); 746 if (c != EOS) 747 *ident = '\0'; /* word is not unquoted */ 748 749 if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) { 750 struct tbl *p; 751 int h = hash(ident); 752 753 /* { */ 754 if ((cf & KEYWORD) && (p = tsearch(&keywords, ident, h)) 755 && (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}')) 756 { 757 afree(yylval.cp, ATEMP); 758 return p->val.i; 759 } 760 if ((cf & ALIAS) && (p = tsearch(&aliases, ident, h)) 761 && (p->flag & ISSET)) 762 { 763 register Source *s; 764 765 for (s = source; s->type == SALIAS; s = s->next) 766 if (s->u.tblp == p) 767 return LWORD; 768 /* push alias expansion */ 769 s = pushs(SALIAS, source->areap); 770 s->start = s->str = p->val.s; 771 s->u.tblp = p; 772 s->next = source; 773 source = s; 774 afree(yylval.cp, ATEMP); 775 goto Again; 776 } 777 } 778 779 return LWORD; 780 } 781 782 static void 783 gethere() 784 { 785 register struct ioword **p; 786 787 for (p = heres; p < herep; p++) 788 readhere(*p); 789 herep = heres; 790 } 791 792 /* 793 * read "<<word" text into temp file 794 */ 795 796 static void 797 readhere(iop) 798 struct ioword *iop; 799 { 800 register int c; 801 char *volatile eof; 802 char *eofp; 803 int skiptabs; 804 XString xs; 805 char *xp; 806 int xpos; 807 808 eof = evalstr(iop->delim, 0); 809 810 if (!(iop->flag & IOEVAL)) 811 ignore_backslash_newline++; 812 813 Xinit(xs, xp, 256, ATEMP); 814 815 for (;;) { 816 eofp = eof; 817 skiptabs = iop->flag & IOSKIP; 818 xpos = Xsavepos(xs, xp); 819 while ((c = getsc()) != 0) { 820 if (skiptabs) { 821 if (c == '\t') 822 continue; 823 skiptabs = 0; 824 } 825 if (c != *eofp) 826 break; 827 Xcheck(xs, xp); 828 Xput(xs, xp, c); 829 eofp++; 830 } 831 /* Allow EOF here so commands with out trailing newlines 832 * will work (eg, ksh -c '...', $(...), etc). 833 */ 834 if (*eofp == '\0' && (c == 0 || c == '\n')) { 835 xp = Xrestpos(xs, xp, xpos); 836 break; 837 } 838 ungetsc(c); 839 while ((c = getsc()) != '\n') { 840 if (c == 0) 841 yyerror("here document `%s' unclosed\n", eof); 842 Xcheck(xs, xp); 843 Xput(xs, xp, c); 844 } 845 Xcheck(xs, xp); 846 Xput(xs, xp, c); 847 } 848 Xput(xs, xp, '\0'); 849 iop->heredoc = Xclose(xs, xp); 850 851 if (!(iop->flag & IOEVAL)) 852 ignore_backslash_newline--; 853 } 854 855 void 856 #ifdef HAVE_PROTOTYPES 857 yyerror(const char *fmt, ...) 858 #else 859 yyerror(fmt, va_alist) 860 const char *fmt; 861 va_dcl 862 #endif 863 { 864 va_list va; 865 866 /* pop aliases and re-reads */ 867 while (source->type == SALIAS || source->type == SREREAD) 868 source = source->next; 869 source->str = null; /* zap pending input */ 870 871 error_prefix(TRUE); 872 SH_VA_START(va, fmt); 873 shf_vfprintf(shl_out, fmt, va); 874 va_end(va); 875 errorf(null); 876 } 877 878 /* 879 * input for yylex with alias expansion 880 */ 881 882 Source * 883 pushs(type, areap) 884 int type; 885 Area *areap; 886 { 887 register Source *s; 888 889 s = (Source *) alloc(sizeof(Source), areap); 890 s->type = type; 891 s->str = null; 892 s->start = NULL; 893 s->line = 0; 894 s->errline = 0; 895 s->file = NULL; 896 s->flags = 0; 897 s->next = NULL; 898 s->areap = areap; 899 if (type == SFILE || type == SSTDIN) { 900 char *dummy; 901 Xinit(s->xs, dummy, 256, s->areap); 902 } else 903 memset(&s->xs, 0, sizeof(s->xs)); 904 return s; 905 } 906 907 static int 908 getsc__() 909 { 910 register Source *s = source; 911 register int c; 912 913 while ((c = *s->str++) == 0) { 914 s->str = NULL; /* return 0 for EOF by default */ 915 switch (s->type) { 916 case SEOF: 917 s->str = null; 918 return 0; 919 920 case SSTDIN: 921 case SFILE: 922 getsc_line(s); 923 break; 924 925 case SWSTR: 926 break; 927 928 case SSTRING: 929 break; 930 931 case SWORDS: 932 s->start = s->str = *s->u.strv++; 933 s->type = SWORDSEP; 934 break; 935 936 case SWORDSEP: 937 if (*s->u.strv == NULL) { 938 s->start = s->str = newline; 939 s->type = SEOF; 940 } else { 941 s->start = s->str = space; 942 s->type = SWORDS; 943 } 944 break; 945 946 case SALIAS: 947 if (s->flags & SF_ALIASEND) { 948 /* pass on an unused SF_ALIAS flag */ 949 source = s->next; 950 source->flags |= s->flags & SF_ALIAS; 951 s = source; 952 } else if (*s->u.tblp->val.s 953 && isspace((unsigned char)strchr(s->u.tblp->val.s, 0)[-1])) 954 { 955 source = s = s->next; /* pop source stack */ 956 /* Note that this alias ended with a space, 957 * enabling alias expansion on the following 958 * word. 959 */ 960 s->flags |= SF_ALIAS; 961 } else { 962 /* At this point, we need to keep the current 963 * alias in the source list so recursive 964 * aliases can be detected and we also need 965 * to return the next character. Do this 966 * by temporarily popping the alias to get 967 * the next character and then put it back 968 * in the source list with the SF_ALIASEND 969 * flag set. 970 */ 971 source = s->next; /* pop source stack */ 972 source->flags |= s->flags & SF_ALIAS; 973 c = getsc__(); 974 if (c) { 975 s->flags |= SF_ALIASEND; 976 s->ugbuf[0] = c; s->ugbuf[1] = '\0'; 977 s->start = s->str = s->ugbuf; 978 s->next = source; 979 source = s; 980 } else { 981 s = source; 982 /* avoid reading eof twice */ 983 s->str = NULL; 984 break; 985 } 986 } 987 continue; 988 989 case SREREAD: 990 if (s->start != s->ugbuf) /* yuck */ 991 afree(s->u.freeme, ATEMP); 992 source = s = s->next; 993 continue; 994 } 995 if (s->str == NULL) { 996 s->type = SEOF; 997 s->start = s->str = null; 998 return '\0'; 999 } 1000 if (s->flags & SF_ECHO) { 1001 shf_puts(s->str, shl_out); 1002 shf_flush(shl_out); 1003 } 1004 } 1005 return c; 1006 } 1007 1008 static void 1009 getsc_line(s) 1010 Source *s; 1011 { 1012 char *xp = Xstring(s->xs, xp); 1013 int interactive = Flag(FTALKING) && s->type == SSTDIN; 1014 int have_tty = interactive && (s->flags & SF_TTY); 1015 1016 /* Done here to ensure nothing odd happens when a timeout occurs */ 1017 XcheckN(s->xs, xp, LINE); 1018 *xp = '\0'; 1019 s->start = s->str = xp; 1020 1021 #ifdef KSH 1022 if (have_tty && ksh_tmout) { 1023 ksh_tmout_state = TMOUT_READING; 1024 alarm(ksh_tmout); 1025 } 1026 #endif /* KSH */ 1027 #ifdef EDIT 1028 if (have_tty && (0 1029 # ifdef VI 1030 || Flag(FVI) 1031 # endif /* VI */ 1032 # ifdef EMACS 1033 || Flag(FEMACS) || Flag(FGMACS) 1034 # endif /* EMACS */ 1035 )) 1036 { 1037 int nread; 1038 1039 nread = x_read(xp, LINE); 1040 if (nread < 0) /* read error */ 1041 nread = 0; 1042 xp[nread] = '\0'; 1043 xp += nread; 1044 } 1045 else 1046 #endif /* EDIT */ 1047 { 1048 if (interactive) { 1049 pprompt(prompt, 0); 1050 } else 1051 s->line++; 1052 1053 while (1) { 1054 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf); 1055 1056 if (!p && shf_error(s->u.shf) 1057 && shf_errno(s->u.shf) == EINTR) 1058 { 1059 shf_clearerr(s->u.shf); 1060 if (trap) 1061 runtraps(0); 1062 continue; 1063 } 1064 if (!p || (xp = p, xp[-1] == '\n')) 1065 break; 1066 /* double buffer size */ 1067 xp++; /* move past null so doubling works... */ 1068 XcheckN(s->xs, xp, Xlength(s->xs, xp)); 1069 xp--; /* ...and move back again */ 1070 } 1071 /* flush any unwanted input so other programs/builtins 1072 * can read it. Not very optimal, but less error prone 1073 * than flushing else where, dealing with redirections, 1074 * etc.. 1075 * todo: reduce size of shf buffer (~128?) if SSTDIN 1076 */ 1077 if (s->type == SSTDIN) 1078 shf_flush(s->u.shf); 1079 } 1080 /* XXX: temporary kludge to restore source after a 1081 * trap may have been executed. 1082 */ 1083 source = s; 1084 #ifdef KSH 1085 if (have_tty && ksh_tmout) 1086 { 1087 ksh_tmout_state = TMOUT_EXECUTING; 1088 alarm(0); 1089 } 1090 #endif /* KSH */ 1091 s->start = s->str = Xstring(s->xs, xp); 1092 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp)); 1093 /* Note: if input is all nulls, this is not eof */ 1094 if (Xlength(s->xs, xp) == 0) { /* EOF */ 1095 if (s->type == SFILE) 1096 shf_fdclose(s->u.shf); 1097 s->str = NULL; 1098 } else if (interactive) { 1099 #ifdef HISTORY 1100 char *p = Xstring(s->xs, xp); 1101 if (cur_prompt == PS1) 1102 while (*p && ctype(*p, C_IFS) && ctype(*p, C_IFSWS)) 1103 p++; 1104 if (*p) { 1105 # ifdef EASY_HISTORY 1106 if (cur_prompt == PS2) 1107 histappend(Xstring(s->xs, xp), 1); 1108 else 1109 # endif /* EASY_HISTORY */ 1110 { 1111 s->line++; 1112 histsave(s->line, s->str, 1); 1113 } 1114 } 1115 #endif /* HISTORY */ 1116 } 1117 if (interactive) 1118 set_prompt(PS2, (Source *) 0); 1119 } 1120 1121 void 1122 set_prompt(to, s) 1123 int to; 1124 Source *s; 1125 { 1126 cur_prompt = to; 1127 1128 switch (to) { 1129 case PS1: /* command */ 1130 #ifdef KSH 1131 /* Substitute ! and !! here, before substitutions are done 1132 * so ! in expanded variables are not expanded. 1133 * NOTE: this is not what at&t ksh does (it does it after 1134 * substitutions, POSIX doesn't say which is to be done. 1135 */ 1136 { 1137 struct shf *shf; 1138 char * volatile ps1; 1139 Area *saved_atemp; 1140 1141 ps1 = str_val(global("PS1")); 1142 shf = shf_sopen((char *) 0, strlen(ps1) * 2, 1143 SHF_WR | SHF_DYNAMIC, (struct shf *) 0); 1144 while (*ps1) { 1145 if (*ps1 != '!' || *++ps1 == '!') 1146 shf_putchar(*ps1++, shf); 1147 else 1148 shf_fprintf(shf, "%d", 1149 s ? s->line + 1 : 0); 1150 } 1151 ps1 = shf_sclose(shf); 1152 saved_atemp = ATEMP; 1153 newenv(E_ERRH); 1154 if (ksh_sigsetjmp(e->jbuf, 0)) { 1155 prompt = safe_prompt; 1156 /* Don't print an error - assume it has already 1157 * been printed. Reason is we may have forked 1158 * to run a command and the child may be 1159 * unwinding its stack through this code as it 1160 * exits. 1161 */ 1162 } else 1163 prompt = str_save(substitute(ps1, 0), 1164 saved_atemp); 1165 quitenv(); 1166 } 1167 #else /* KSH */ 1168 prompt = str_val(global("PS1")); 1169 #endif /* KSH */ 1170 break; 1171 1172 case PS2: /* command continuation */ 1173 prompt = str_val(global("PS2")); 1174 break; 1175 } 1176 } 1177 1178 /* See also related routine, promptlen() in edit.c */ 1179 void 1180 pprompt(cp, ntruncate) 1181 const char *cp; 1182 int ntruncate; 1183 { 1184 #if 0 1185 char nbuf[32]; 1186 int c; 1187 1188 while (*cp != 0) { 1189 if (*cp != '!') 1190 c = *cp++; 1191 else if (*++cp == '!') 1192 c = *cp++; 1193 else { 1194 int len; 1195 char *p; 1196 1197 shf_snprintf(p = nbuf, sizeof(nbuf), "%d", 1198 source->line + 1); 1199 len = strlen(nbuf); 1200 if (ntruncate) { 1201 if (ntruncate >= len) { 1202 ntruncate -= len; 1203 continue; 1204 } 1205 p += ntruncate; 1206 len -= ntruncate; 1207 ntruncate = 0; 1208 } 1209 shf_write(p, len, shl_out); 1210 continue; 1211 } 1212 if (ntruncate) 1213 --ntruncate; 1214 else 1215 shf_putc(c, shl_out); 1216 } 1217 #endif /* 0 */ 1218 shf_puts(cp + ntruncate, shl_out); 1219 shf_flush(shl_out); 1220 } 1221 1222 /* Read the variable part of a ${...} expression (ie, up to but not including 1223 * the :[-+?=#%] or close-brace. 1224 */ 1225 static char * 1226 get_brace_var(wsp, wp) 1227 XString *wsp; 1228 char *wp; 1229 { 1230 enum parse_state { 1231 PS_INITIAL, PS_SAW_HASH, PS_IDENT, 1232 PS_NUMBER, PS_VAR1, PS_END 1233 } 1234 state; 1235 char c; 1236 1237 state = PS_INITIAL; 1238 while (1) { 1239 c = getsc(); 1240 /* State machine to figure out where the variable part ends. */ 1241 switch (state) { 1242 case PS_INITIAL: 1243 if (c == '#') { 1244 state = PS_SAW_HASH; 1245 break; 1246 } 1247 /* fall through.. */ 1248 case PS_SAW_HASH: 1249 if (letter(c)) 1250 state = PS_IDENT; 1251 else if (digit(c)) 1252 state = PS_NUMBER; 1253 else if (ctype(c, C_VAR1)) 1254 state = PS_VAR1; 1255 else 1256 state = PS_END; 1257 break; 1258 case PS_IDENT: 1259 if (!letnum(c)) { 1260 state = PS_END; 1261 if (c == '[') { 1262 char *tmp, *p; 1263 1264 if (!arraysub(&tmp)) 1265 yyerror("missing ]\n"); 1266 *wp++ = c; 1267 for (p = tmp; *p; ) { 1268 Xcheck(*wsp, wp); 1269 *wp++ = *p++; 1270 } 1271 afree(tmp, ATEMP); 1272 c = getsc(); /* the ] */ 1273 } 1274 } 1275 break; 1276 case PS_NUMBER: 1277 if (!digit(c)) 1278 state = PS_END; 1279 break; 1280 case PS_VAR1: 1281 state = PS_END; 1282 break; 1283 case PS_END: /* keep gcc happy */ 1284 break; 1285 } 1286 if (state == PS_END) { 1287 *wp++ = '\0'; /* end of variable part */ 1288 ungetsc(c); 1289 break; 1290 } 1291 Xcheck(*wsp, wp); 1292 *wp++ = c; 1293 } 1294 return wp; 1295 } 1296 1297 /* 1298 * Save an array subscript - returns true if matching bracket found, false 1299 * if eof or newline was found. 1300 * (Returned string double null terminated) 1301 */ 1302 static int 1303 arraysub(strp) 1304 char **strp; 1305 { 1306 XString ws; 1307 char *wp; 1308 char c; 1309 int depth = 1; /* we are just past the initial [ */ 1310 1311 Xinit(ws, wp, 32, ATEMP); 1312 1313 do { 1314 c = getsc(); 1315 Xcheck(ws, wp); 1316 *wp++ = c; 1317 if (c == '[') 1318 depth++; 1319 else if (c == ']') 1320 depth--; 1321 } while (depth > 0 && c && c != '\n'); 1322 1323 *wp++ = '\0'; 1324 *strp = Xclose(ws, wp); 1325 1326 return depth == 0 ? 1 : 0; 1327 } 1328 1329 /* Unget a char: handles case when we are already at the start of the buffer */ 1330 static const char * 1331 ungetsc(c) 1332 int c; 1333 { 1334 if (backslash_skip) 1335 backslash_skip--; 1336 /* Don't unget eof... */ 1337 if (source->str == null && c == '\0') 1338 return source->str; 1339 if (source->str > source->start) 1340 source->str--; 1341 else { 1342 Source *s; 1343 1344 s = pushs(SREREAD, source->areap); 1345 s->ugbuf[0] = c; s->ugbuf[1] = '\0'; 1346 s->start = s->str = s->ugbuf; 1347 s->next = source; 1348 source = s; 1349 } 1350 return source->str; 1351 } 1352 1353 1354 /* Called to get a char that isn't a \newline sequence. */ 1355 static int 1356 getsc_bn ARGS((void)) 1357 { 1358 int c, c2; 1359 1360 if (ignore_backslash_newline) 1361 return getsc_(); 1362 1363 if (backslash_skip == 1) { 1364 backslash_skip = 2; 1365 return getsc_(); 1366 } 1367 1368 backslash_skip = 0; 1369 1370 while (1) { 1371 c = getsc_(); 1372 if (c == '\\') { 1373 if ((c2 = getsc_()) == '\n') 1374 /* ignore the \newline; get the next char... */ 1375 continue; 1376 ungetsc(c2); 1377 backslash_skip = 1; 1378 } 1379 return c; 1380 } 1381 } 1382 1383 static Lex_state * 1384 push_state_(si, old_end) 1385 State_info *si; 1386 Lex_state *old_end; 1387 { 1388 Lex_state *new = alloc(sizeof(Lex_state) * STATE_BSIZE, ATEMP); 1389 1390 new[0].ls_info.base = old_end; 1391 si->base = &new[0]; 1392 si->end = &new[STATE_BSIZE]; 1393 return &new[1]; 1394 } 1395 1396 static Lex_state * 1397 pop_state_(si, old_end) 1398 State_info *si; 1399 Lex_state *old_end; 1400 { 1401 Lex_state *old_base = si->base; 1402 1403 si->base = old_end->ls_info.base - STATE_BSIZE; 1404 si->end = old_end->ls_info.base; 1405 1406 afree(old_base, ATEMP); 1407 1408 return si->base + STATE_BSIZE - 1; 1409 } 1410