1 /* $NetBSD: lex.c,v 1.15 2011/10/16 17:12:11 joerg Exp $ */ 2 3 /* 4 * lexical analysis and source input 5 */ 6 #include <sys/cdefs.h> 7 8 #ifndef lint 9 __RCSID("$NetBSD: lex.c,v 1.15 2011/10/16 17:12:11 joerg Exp $"); 10 #endif 11 12 13 #include "sh.h" 14 #include <ctype.h> 15 16 17 /* Structure to keep track of the lexing state and the various pieces of info 18 * needed for each particular state. 19 */ 20 typedef struct lex_state Lex_state; 21 struct lex_state { 22 int ls_state; 23 union { 24 /* $(...) */ 25 struct scsparen_info { 26 int nparen; /* count open parenthesis */ 27 int csstate; /* XXX remove */ 28 #define ls_scsparen ls_info.u_scsparen 29 } u_scsparen; 30 31 /* $((...)) */ 32 struct sasparen_info { 33 int nparen; /* count open parenthesis */ 34 int start; /* marks start of $(( in output str */ 35 #define ls_sasparen ls_info.u_sasparen 36 } u_sasparen; 37 38 /* ((...)) */ 39 struct sletparen_info { 40 int nparen; /* count open parenthesis */ 41 #define ls_sletparen ls_info.u_sletparen 42 } u_sletparen; 43 44 /* `...` */ 45 struct sbquote_info { 46 int indquotes; /* true if in double quotes: "`...`" */ 47 #define ls_sbquote ls_info.u_sbquote 48 } u_sbquote; 49 50 Lex_state *base; /* used to point to next state block */ 51 } ls_info; 52 }; 53 54 typedef struct State_info State_info; 55 struct State_info { 56 Lex_state *base; 57 Lex_state *end; 58 }; 59 60 61 static void readhere ARGS((struct ioword *iop)); 62 static int getsc__ ARGS((void)); 63 static void getsc_line ARGS((Source *s)); 64 static int getsc_bn ARGS((void)); 65 static char *get_brace_var ARGS((XString *wsp, char *wp)); 66 static int arraysub ARGS((char **strp)); 67 static const char *ungetsc ARGS((int c)); 68 static void gethere ARGS((void)); 69 static Lex_state *push_state_ ARGS((State_info *si, Lex_state *old_end)); 70 static Lex_state *pop_state_ ARGS((State_info *si, Lex_state *old_end)); 71 72 static int backslash_skip; 73 static int ignore_backslash_newline; 74 75 /* optimized getsc_bn() */ 76 #define getsc() (*source->str != '\0' && *source->str != '\\' \ 77 && !backslash_skip ? *source->str++ : getsc_bn()) 78 /* optimized getsc__() */ 79 #define getsc_() ((*source->str != '\0') ? *source->str++ : getsc__()) 80 81 #define STATE_BSIZE 32 82 83 #define PUSH_STATE(s) do { \ 84 if (++statep == state_info.end) \ 85 statep = push_state_(&state_info, statep); \ 86 state = statep->ls_state = (s); \ 87 } while (0) 88 89 #define POP_STATE() do { \ 90 if (--statep == state_info.base) \ 91 statep = pop_state_(&state_info, statep); \ 92 state = statep->ls_state; \ 93 } while (0) 94 95 96 97 /* 98 * Lexical analyzer 99 * 100 * tokens are not regular expressions, they are LL(1). 101 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))". 102 * hence the state stack. 103 */ 104 105 int 106 yylex(cf) 107 int cf; 108 { 109 Lex_state states[STATE_BSIZE], *statep; 110 State_info state_info; 111 register int c, state; 112 XString ws; /* expandable output word */ 113 register char *wp; /* output word pointer */ 114 char *sp, *dp; 115 int c2; 116 117 118 Again: 119 states[0].ls_state = -1; 120 states[0].ls_info.base = (Lex_state *) 0; 121 statep = &states[1]; 122 state_info.base = states; 123 state_info.end = &states[STATE_BSIZE]; 124 125 Xinit(ws, wp, 64, ATEMP); 126 127 backslash_skip = 0; 128 ignore_backslash_newline = 0; 129 130 if (cf&ONEWORD) 131 state = SWORD; 132 #ifdef KSH 133 else if (cf&LETEXPR) { 134 *wp++ = OQUOTE; /* enclose arguments in (double) quotes */ 135 state = SLETPAREN; 136 statep->ls_sletparen.nparen = 0; 137 } 138 #endif /* KSH */ 139 else { /* normal lexing */ 140 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE; 141 while ((c = getsc()) == ' ' || c == '\t') 142 ; 143 if (c == '#') { 144 ignore_backslash_newline++; 145 while ((c = getsc()) != '\0' && c != '\n') 146 ; 147 ignore_backslash_newline--; 148 } 149 ungetsc(c); 150 } 151 if (source->flags & SF_ALIAS) { /* trailing ' ' in alias definition */ 152 source->flags &= ~SF_ALIAS; 153 /* In POSIX mode, a trailing space only counts if we are 154 * parsing a simple command 155 */ 156 if (!Flag(FPOSIX) || (cf & CMDWORD)) 157 cf |= ALIAS; 158 } 159 160 /* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */ 161 statep->ls_state = state; 162 163 /* collect non-special or quoted characters to form word */ 164 while (!((c = getsc()) == 0 165 || ((state == SBASE || state == SHEREDELIM) 166 && ctype(c, C_LEX1)))) 167 { 168 Xcheck(ws, wp); 169 switch (state) { 170 case SBASE: 171 if (c == '[' && (cf & (VARASN|ARRAYVAR))) { 172 *wp = EOS; /* temporary */ 173 if (is_wdvarname(Xstring(ws, wp), FALSE)) 174 { 175 char *p, *tmp; 176 177 if (arraysub(&tmp)) { 178 *wp++ = CHAR; 179 *wp++ = c; 180 for (p = tmp; *p; ) { 181 Xcheck(ws, wp); 182 *wp++ = CHAR; 183 *wp++ = *p++; 184 } 185 afree(tmp, ATEMP); 186 break; 187 } else { 188 Source *s; 189 190 s = pushs(SREREAD, 191 source->areap); 192 s->start = s->str 193 = s->u.freeme = tmp; 194 s->next = source; 195 source = s; 196 } 197 } 198 *wp++ = CHAR; 199 *wp++ = c; 200 break; 201 } 202 /* fall through.. */ 203 Sbase1: /* includes *(...|...) pattern (*+?@!) */ 204 #ifdef KSH 205 if (c == '*' || c == '@' || c == '+' || c == '?' 206 || c == '!') 207 { 208 c2 = getsc(); 209 if (c2 == '(' /*)*/ ) { 210 *wp++ = OPAT; 211 *wp++ = c; 212 PUSH_STATE(SPATTERN); 213 break; 214 } 215 ungetsc(c2); 216 } 217 #endif /* KSH */ 218 /* fall through.. */ 219 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */ 220 switch (c) { 221 case '\\': 222 c = getsc(); 223 #ifdef OS2 224 if (isalnum((unsigned char)c)) { 225 *wp++ = CHAR, *wp++ = '\\'; 226 *wp++ = CHAR, *wp++ = c; 227 } else 228 #endif 229 if (c) /* trailing \ is lost */ 230 *wp++ = QCHAR, *wp++ = c; 231 break; 232 case '\'': 233 *wp++ = OQUOTE; 234 ignore_backslash_newline++; 235 PUSH_STATE(SSQUOTE); 236 break; 237 case '"': 238 *wp++ = OQUOTE; 239 PUSH_STATE(SDQUOTE); 240 break; 241 default: 242 goto Subst; 243 } 244 break; 245 246 Subst: 247 switch (c) { 248 Lex_state *s; 249 Lex_state *base; 250 251 case '\\': 252 c = getsc(); 253 switch (c) { 254 case '\\': 255 case '$': case '`': 256 *wp++ = QCHAR, *wp++ = c; 257 break; 258 case '"': 259 if ((cf & HEREDOC) == 0) { 260 *wp++ = QCHAR, *wp++ = c; 261 break; 262 } 263 /* FALLTROUGH */ 264 default: 265 Xcheck(ws, wp); 266 if (c) { /* trailing \ is lost */ 267 *wp++ = CHAR, *wp++ = '\\'; 268 *wp++ = CHAR, *wp++ = c; 269 } 270 break; 271 } 272 break; 273 case '$': 274 c = getsc(); 275 if (c == '(') /*)*/ { 276 c = getsc(); 277 if (c == '(') /*)*/ { 278 PUSH_STATE(SASPAREN); 279 statep->ls_sasparen.nparen = 2; 280 statep->ls_sasparen.start = 281 Xsavepos(ws, wp); 282 *wp++ = EXPRSUB; 283 } else { 284 ungetsc(c); 285 PUSH_STATE(SCSPAREN); 286 statep->ls_scsparen.nparen = 1; 287 statep->ls_scsparen.csstate = 0; 288 *wp++ = COMSUB; 289 } 290 } else if (c == '{') /*}*/ { 291 *wp++ = OSUBST; 292 *wp++ = '{'; /*}*/ 293 wp = get_brace_var(&ws, wp); 294 c = getsc(); 295 /* allow :# and :% (ksh88 compat) */ 296 if (c == ':') { 297 *wp++ = CHAR, *wp++ = c; 298 c = getsc(); 299 } 300 /* If this is a trim operation, 301 * treat (,|,) specially in STBRACE. 302 */ 303 if (c == '#' || c == '%') { 304 ungetsc(c); 305 PUSH_STATE(STBRACE); 306 } else { 307 ungetsc(c); 308 PUSH_STATE(SBRACE); 309 } 310 } else if (ctype(c, C_ALPHA)) { 311 *wp++ = OSUBST; 312 *wp++ = 'X'; 313 do { 314 Xcheck(ws, wp); 315 *wp++ = c; 316 c = getsc(); 317 } while (ctype(c, C_ALPHA|C_DIGIT)); 318 *wp++ = '\0'; 319 *wp++ = CSUBST; 320 *wp++ = 'X'; 321 ungetsc(c); 322 } else if (ctype(c, C_DIGIT|C_VAR1)) { 323 Xcheck(ws, wp); 324 *wp++ = OSUBST; 325 *wp++ = 'X'; 326 *wp++ = c; 327 *wp++ = '\0'; 328 *wp++ = CSUBST; 329 *wp++ = 'X'; 330 } else { 331 *wp++ = CHAR, *wp++ = '$'; 332 ungetsc(c); 333 } 334 break; 335 case '`': 336 PUSH_STATE(SBQUOTE); 337 *wp++ = COMSUB; 338 /* Need to know if we are inside double quotes 339 * since sh/at&t-ksh translate the \" to " in 340 * "`..\"..`". POSIX also requires this. 341 * An earlier version of ksh misinterpreted 342 * the POSIX specification and performed 343 * removal of backslash escapes only if 344 * posix mode was not in effect. 345 */ 346 statep->ls_sbquote.indquotes = 0; 347 s = statep; 348 base = state_info.base; 349 while (1) { 350 for (; s != base; s--) { 351 if (s->ls_state == SDQUOTE) { 352 statep->ls_sbquote.indquotes = 1; 353 break; 354 } 355 } 356 if (s != base) 357 break; 358 if (!(s = s->ls_info.base)) 359 break; 360 base = s-- - STATE_BSIZE; 361 } 362 break; 363 default: 364 *wp++ = CHAR, *wp++ = c; 365 } 366 break; 367 368 case SSQUOTE: 369 if (c == '\'') { 370 POP_STATE(); 371 *wp++ = CQUOTE; 372 ignore_backslash_newline--; 373 } else 374 *wp++ = QCHAR, *wp++ = c; 375 break; 376 377 case SDQUOTE: 378 if (c == '"') { 379 POP_STATE(); 380 *wp++ = CQUOTE; 381 } else 382 goto Subst; 383 break; 384 385 case SCSPAREN: /* $( .. ) */ 386 /* todo: deal with $(...) quoting properly 387 * kludge to partly fake quoting inside $(..): doesn't 388 * really work because nested $(..) or ${..} inside 389 * double quotes aren't dealt with. 390 */ 391 switch (statep->ls_scsparen.csstate) { 392 case 0: /* normal */ 393 switch (c) { 394 case '(': 395 statep->ls_scsparen.nparen++; 396 break; 397 case ')': 398 statep->ls_scsparen.nparen--; 399 break; 400 case '\\': 401 statep->ls_scsparen.csstate = 1; 402 break; 403 case '"': 404 statep->ls_scsparen.csstate = 2; 405 break; 406 case '\'': 407 statep->ls_scsparen.csstate = 4; 408 ignore_backslash_newline++; 409 break; 410 } 411 break; 412 413 case 1: /* backslash in normal mode */ 414 case 3: /* backslash in double quotes */ 415 --statep->ls_scsparen.csstate; 416 break; 417 418 case 2: /* double quotes */ 419 if (c == '"') 420 statep->ls_scsparen.csstate = 0; 421 else if (c == '\\') 422 statep->ls_scsparen.csstate = 3; 423 break; 424 425 case 4: /* single quotes */ 426 if (c == '\'') { 427 statep->ls_scsparen.csstate = 0; 428 ignore_backslash_newline--; 429 } 430 break; 431 } 432 if (statep->ls_scsparen.nparen == 0) { 433 POP_STATE(); 434 *wp++ = 0; /* end of COMSUB */ 435 } else 436 *wp++ = c; 437 break; 438 439 case SASPAREN: /* $(( .. )) */ 440 /* todo: deal with $((...); (...)) properly */ 441 /* XXX should nest using existing state machine 442 * (embed "..", $(...), etc.) */ 443 if (c == '(') 444 statep->ls_sasparen.nparen++; 445 else if (c == ')') { 446 statep->ls_sasparen.nparen--; 447 if (statep->ls_sasparen.nparen == 1) { 448 /*(*/ 449 if ((c2 = getsc()) == ')') { 450 POP_STATE(); 451 *wp++ = 0; /* end of EXPRSUB */ 452 break; 453 } else { 454 char *s; 455 456 ungetsc(c2); 457 /* mismatched parenthesis - 458 * assume we were really 459 * parsing a $(..) expression 460 */ 461 s = Xrestpos(ws, wp, 462 statep->ls_sasparen.start); 463 memmove(s + 1, s, wp - s); 464 *s++ = COMSUB; 465 *s = '('; /*)*/ 466 wp++; 467 statep->ls_scsparen.nparen = 1; 468 statep->ls_scsparen.csstate = 0; 469 state = statep->ls_state 470 = SCSPAREN; 471 472 } 473 } 474 } 475 *wp++ = c; 476 break; 477 478 case SBRACE: 479 /*{*/ 480 if (c == '}') { 481 POP_STATE(); 482 *wp++ = CSUBST; 483 *wp++ = /*{*/ '}'; 484 } else 485 goto Sbase1; 486 break; 487 488 case STBRACE: 489 /* Same as SBRACE, except (,|,) treated specially */ 490 /*{*/ 491 if (c == '}') { 492 POP_STATE(); 493 *wp++ = CSUBST; 494 *wp++ = /*{*/ '}'; 495 } else if (c == '|') { 496 *wp++ = SPAT; 497 } else if (c == '(') { 498 *wp++ = OPAT; 499 *wp++ = ' '; /* simile for @ */ 500 PUSH_STATE(SPATTERN); 501 } else 502 goto Sbase1; 503 break; 504 505 case SBQUOTE: 506 if (c == '`') { 507 *wp++ = 0; 508 POP_STATE(); 509 } else if (c == '\\') { 510 switch (c = getsc()) { 511 case '\\': 512 case '$': case '`': 513 *wp++ = c; 514 break; 515 case '"': 516 if (statep->ls_sbquote.indquotes) { 517 *wp++ = c; 518 break; 519 } 520 /* fall through.. */ 521 default: 522 if (c) { /* trailing \ is lost */ 523 *wp++ = '\\'; 524 *wp++ = c; 525 } 526 break; 527 } 528 } else 529 *wp++ = c; 530 break; 531 532 case SWORD: /* ONEWORD */ 533 goto Subst; 534 535 #ifdef KSH 536 case SLETPAREN: /* LETEXPR: (( ... )) */ 537 /*(*/ 538 if (c == ')') { 539 if (statep->ls_sletparen.nparen > 0) 540 --statep->ls_sletparen.nparen; 541 /*(*/ 542 else if ((c2 = getsc()) == ')') { 543 c = 0; 544 *wp++ = CQUOTE; 545 goto Done; 546 } else 547 ungetsc(c2); 548 } else if (c == '(') 549 /* parenthesis inside quotes and backslashes 550 * are lost, but at&t ksh doesn't count them 551 * either 552 */ 553 ++statep->ls_sletparen.nparen; 554 goto Sbase2; 555 #endif /* KSH */ 556 557 case SHEREDELIM: /* <<,<<- delimiter */ 558 /* XXX chuck this state (and the next) - use 559 * the existing states ($ and \`..` should be 560 * stripped of their specialness after the 561 * fact). 562 */ 563 /* here delimiters need a special case since 564 * $ and `..` are not to be treated specially 565 */ 566 if (c == '\\') { 567 c = getsc(); 568 if (c) { /* trailing \ is lost */ 569 *wp++ = QCHAR; 570 *wp++ = c; 571 } 572 } else if (c == '\'') { 573 PUSH_STATE(SSQUOTE); 574 *wp++ = OQUOTE; 575 ignore_backslash_newline++; 576 } else if (c == '"') { 577 state = statep->ls_state = SHEREDQUOTE; 578 *wp++ = OQUOTE; 579 } else { 580 *wp++ = CHAR; 581 *wp++ = c; 582 } 583 break; 584 585 case SHEREDQUOTE: /* " in <<,<<- delimiter */ 586 if (c == '"') { 587 *wp++ = CQUOTE; 588 state = statep->ls_state = SHEREDELIM; 589 } else { 590 if (c == '\\') { 591 switch (c = getsc()) { 592 case '\\': case '"': 593 case '$': case '`': 594 break; 595 default: 596 if (c) { /* trailing \ lost */ 597 *wp++ = CHAR; 598 *wp++ = '\\'; 599 } 600 break; 601 } 602 } 603 *wp++ = CHAR; 604 *wp++ = c; 605 } 606 break; 607 608 case SPATTERN: /* in *(...|...) pattern (*+?@!) */ 609 if ( /*(*/ c == ')') { 610 *wp++ = CPAT; 611 POP_STATE(); 612 } else if (c == '|') { 613 *wp++ = SPAT; 614 } else if (c == '(') { 615 *wp++ = OPAT; 616 *wp++ = ' '; /* simile for @ */ 617 PUSH_STATE(SPATTERN); 618 } else 619 goto Sbase1; 620 break; 621 } 622 } 623 Done: 624 Xcheck(ws, wp); 625 if (statep != &states[1]) 626 /* XXX figure out what is missing */ 627 yyerror("no closing quote\n"); 628 629 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */ 630 if (state == SHEREDELIM) 631 state = SBASE; 632 633 dp = Xstring(ws, wp); 634 if ((c == '<' || c == '>') && state == SBASE 635 && ((c2 = Xlength(ws, wp)) == 0 636 || (c2 == 2 && dp[0] == CHAR && digit(dp[1])))) 637 { 638 struct ioword *iop = 639 (struct ioword *) alloc(sizeof(*iop), ATEMP); 640 641 if (c2 == 2) 642 iop->unit = dp[1] - '0'; 643 else 644 iop->unit = c == '>'; /* 0 for <, 1 for > */ 645 646 c2 = getsc(); 647 /* <<, >>, <> are ok, >< is not */ 648 if (c == c2 || (c == '<' && c2 == '>')) { 649 iop->flag = c == c2 ? 650 (c == '>' ? IOCAT : IOHERE) : IORDWR; 651 if (iop->flag == IOHERE) { 652 if ((c2 = getsc()) == '-') { 653 iop->flag |= IOSKIP; 654 } else { 655 ungetsc(c2); 656 } 657 } 658 } else if (c2 == '&') 659 iop->flag = IODUP | (c == '<' ? IORDUP : 0); 660 else { 661 iop->flag = c == '>' ? IOWRITE : IOREAD; 662 if (c == '>' && c2 == '|') 663 iop->flag |= IOCLOB; 664 else 665 ungetsc(c2); 666 } 667 668 iop->name = (char *) 0; 669 iop->delim = (char *) 0; 670 iop->heredoc = (char *) 0; 671 Xfree(ws, wp); /* free word */ 672 yylval.iop = iop; 673 return REDIR; 674 } 675 676 if (wp == dp && state == SBASE) { 677 Xfree(ws, wp); /* free word */ 678 /* no word, process LEX1 character */ 679 switch (c) { 680 default: 681 return c; 682 683 case '|': 684 case '&': 685 case ';': 686 if ((c2 = getsc()) == c) 687 c = (c == ';') ? BREAK : 688 (c == '|') ? LOGOR : 689 (c == '&') ? LOGAND : 690 YYERRCODE; 691 #ifdef KSH 692 else if (c == '|' && c2 == '&') 693 c = COPROC; 694 #endif /* KSH */ 695 else 696 ungetsc(c2); 697 return c; 698 699 case '\n': 700 gethere(); 701 if (cf & CONTIN) 702 goto Again; 703 return c; 704 705 case '(': /*)*/ 706 #ifdef KSH 707 if ((c2 = getsc()) == '(') /*)*/ 708 /* XXX need to handle ((...); (...)) */ 709 c = MDPAREN; 710 else 711 ungetsc(c2); 712 #endif /* KSH */ 713 return c; 714 /*(*/ 715 case ')': 716 return c; 717 } 718 } 719 720 *wp++ = EOS; /* terminate word */ 721 yylval.cp = Xclose(ws, wp); 722 if (state == SWORD 723 #ifdef KSH 724 || state == SLETPAREN 725 #endif /* KSH */ 726 ) /* ONEWORD? */ 727 return LWORD; 728 ungetsc(c); /* unget terminator */ 729 730 /* copy word to unprefixed string ident */ 731 for (sp = yylval.cp, dp = ident; dp < ident+IDENT && (c = *sp++) == CHAR; ) 732 *dp++ = *sp++; 733 /* Make sure the ident array stays '\0' padded */ 734 memset(dp, 0, (ident+IDENT) - dp + 1); 735 if (c != EOS) 736 *ident = '\0'; /* word is not unquoted */ 737 738 if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) { 739 struct tbl *p; 740 int h = hash(ident); 741 742 /* { */ 743 if ((cf & KEYWORD) && (p = tsearch(&keywords, ident, h)) 744 && (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}')) 745 { 746 afree(yylval.cp, ATEMP); 747 return p->val.i; 748 } 749 if ((cf & ALIAS) && (p = tsearch(&aliases, ident, h)) 750 && (p->flag & ISSET)) 751 { 752 register Source *s; 753 754 for (s = source; s->type == SALIAS; s = s->next) 755 if (s->u.tblp == p) 756 return LWORD; 757 /* push alias expansion */ 758 s = pushs(SALIAS, source->areap); 759 s->start = s->str = p->val.s; 760 s->u.tblp = p; 761 s->next = source; 762 source = s; 763 afree(yylval.cp, ATEMP); 764 goto Again; 765 } 766 } 767 768 return LWORD; 769 } 770 771 static void 772 gethere() 773 { 774 register struct ioword **p; 775 776 for (p = heres; p < herep; p++) 777 readhere(*p); 778 herep = heres; 779 } 780 781 /* 782 * read "<<word" text into temp file 783 */ 784 785 static void 786 readhere(iop) 787 struct ioword *iop; 788 { 789 register int c; 790 char *volatile eof; 791 char *eofp; 792 int skiptabs; 793 XString xs; 794 char *xp; 795 int xpos; 796 797 eof = evalstr(iop->delim, 0); 798 799 if (!(iop->flag & IOEVAL)) 800 ignore_backslash_newline++; 801 802 Xinit(xs, xp, 256, ATEMP); 803 804 for (;;) { 805 eofp = eof; 806 skiptabs = iop->flag & IOSKIP; 807 xpos = Xsavepos(xs, xp); 808 while ((c = getsc()) != 0) { 809 if (skiptabs) { 810 if (c == '\t') 811 continue; 812 skiptabs = 0; 813 } 814 if (c != *eofp) 815 break; 816 Xcheck(xs, xp); 817 Xput(xs, xp, c); 818 eofp++; 819 } 820 /* Allow EOF here so commands with out trailing newlines 821 * will work (eg, ksh -c '...', $(...), etc). 822 */ 823 if (*eofp == '\0' && (c == 0 || c == '\n')) { 824 xp = Xrestpos(xs, xp, xpos); 825 break; 826 } 827 ungetsc(c); 828 while ((c = getsc()) != '\n') { 829 if (c == 0) 830 yyerror("here document `%s' unclosed\n", eof); 831 Xcheck(xs, xp); 832 Xput(xs, xp, c); 833 } 834 Xcheck(xs, xp); 835 Xput(xs, xp, c); 836 } 837 Xput(xs, xp, '\0'); 838 iop->heredoc = Xclose(xs, xp); 839 840 if (!(iop->flag & IOEVAL)) 841 ignore_backslash_newline--; 842 } 843 844 void 845 #ifdef HAVE_PROTOTYPES 846 yyerror(const char *fmt, ...) 847 #else 848 yyerror(fmt, va_alist) 849 const char *fmt; 850 va_dcl 851 #endif 852 { 853 va_list va; 854 855 /* pop aliases and re-reads */ 856 while (source->type == SALIAS || source->type == SREREAD) 857 source = source->next; 858 source->str = null; /* zap pending input */ 859 860 error_prefix(TRUE); 861 SH_VA_START(va, fmt); 862 shf_vfprintf(shl_out, fmt, va); 863 va_end(va); 864 errorf("%s", null); 865 } 866 867 /* 868 * input for yylex with alias expansion 869 */ 870 871 Source * 872 pushs(type, areap) 873 int type; 874 Area *areap; 875 { 876 register Source *s; 877 878 s = (Source *) alloc(sizeof(Source), areap); 879 s->type = type; 880 s->str = null; 881 s->start = NULL; 882 s->line = 0; 883 s->errline = 0; 884 s->file = NULL; 885 s->flags = 0; 886 s->next = NULL; 887 s->areap = areap; 888 if (type == SFILE || type == SSTDIN) { 889 char *dummy; 890 Xinit(s->xs, dummy, 256, s->areap); 891 } else 892 memset(&s->xs, 0, sizeof(s->xs)); 893 return s; 894 } 895 896 static int 897 getsc__() 898 { 899 register Source *s = source; 900 register int c; 901 902 while ((c = *s->str++) == 0) { 903 s->str = NULL; /* return 0 for EOF by default */ 904 switch (s->type) { 905 case SEOF: 906 s->str = null; 907 return 0; 908 909 case SSTDIN: 910 case SFILE: 911 getsc_line(s); 912 break; 913 914 case SWSTR: 915 break; 916 917 case SSTRING: 918 break; 919 920 case SWORDS: 921 s->start = s->str = *s->u.strv++; 922 s->type = SWORDSEP; 923 break; 924 925 case SWORDSEP: 926 if (*s->u.strv == NULL) { 927 s->start = s->str = newline; 928 s->type = SEOF; 929 } else { 930 s->start = s->str = space; 931 s->type = SWORDS; 932 } 933 break; 934 935 case SALIAS: 936 if (s->flags & SF_ALIASEND) { 937 /* pass on an unused SF_ALIAS flag */ 938 source = s->next; 939 source->flags |= s->flags & SF_ALIAS; 940 s = source; 941 } else if (*s->u.tblp->val.s 942 && isspace((unsigned char)strchr(s->u.tblp->val.s, 0)[-1])) 943 { 944 source = s = s->next; /* pop source stack */ 945 /* Note that this alias ended with a space, 946 * enabling alias expansion on the following 947 * word. 948 */ 949 s->flags |= SF_ALIAS; 950 } else { 951 /* At this point, we need to keep the current 952 * alias in the source list so recursive 953 * aliases can be detected and we also need 954 * to return the next character. Do this 955 * by temporarily popping the alias to get 956 * the next character and then put it back 957 * in the source list with the SF_ALIASEND 958 * flag set. 959 */ 960 source = s->next; /* pop source stack */ 961 source->flags |= s->flags & SF_ALIAS; 962 c = getsc__(); 963 if (c) { 964 s->flags |= SF_ALIASEND; 965 s->ugbuf[0] = c; s->ugbuf[1] = '\0'; 966 s->start = s->str = s->ugbuf; 967 s->next = source; 968 source = s; 969 } else { 970 s = source; 971 /* avoid reading eof twice */ 972 s->str = NULL; 973 break; 974 } 975 } 976 continue; 977 978 case SREREAD: 979 if (s->start != s->ugbuf) /* yuck */ 980 afree(s->u.freeme, ATEMP); 981 source = s = s->next; 982 continue; 983 } 984 if (s->str == NULL) { 985 s->type = SEOF; 986 s->start = s->str = null; 987 return '\0'; 988 } 989 if (s->flags & SF_ECHO) { 990 shf_puts(s->str, shl_out); 991 shf_flush(shl_out); 992 } 993 } 994 return c; 995 } 996 997 static void 998 getsc_line(s) 999 Source *s; 1000 { 1001 char *xp = Xstring(s->xs, xp); 1002 int interactive = Flag(FTALKING) && s->type == SSTDIN; 1003 int have_tty = interactive && (s->flags & SF_TTY); 1004 1005 /* Done here to ensure nothing odd happens when a timeout occurs */ 1006 XcheckN(s->xs, xp, LINE); 1007 *xp = '\0'; 1008 s->start = s->str = xp; 1009 1010 #ifdef KSH 1011 if (have_tty && ksh_tmout) { 1012 ksh_tmout_state = TMOUT_READING; 1013 alarm(ksh_tmout); 1014 } 1015 #endif /* KSH */ 1016 #ifdef EDIT 1017 if (have_tty && (0 1018 # ifdef VI 1019 || Flag(FVI) 1020 # endif /* VI */ 1021 # ifdef EMACS 1022 || Flag(FEMACS) || Flag(FGMACS) 1023 # endif /* EMACS */ 1024 )) 1025 { 1026 int nread; 1027 1028 nread = x_read(xp, LINE); 1029 if (nread < 0) /* read error */ 1030 nread = 0; 1031 xp[nread] = '\0'; 1032 xp += nread; 1033 } 1034 else 1035 #endif /* EDIT */ 1036 { 1037 if (interactive) { 1038 pprompt(prompt, 0); 1039 } else 1040 s->line++; 1041 1042 while (1) { 1043 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf); 1044 1045 if (!p && shf_error(s->u.shf) 1046 && shf_errno(s->u.shf) == EINTR) 1047 { 1048 shf_clearerr(s->u.shf); 1049 if (trap) 1050 runtraps(0); 1051 continue; 1052 } 1053 if (!p || (xp = p, xp[-1] == '\n')) 1054 break; 1055 /* double buffer size */ 1056 xp++; /* move past null so doubling works... */ 1057 XcheckN(s->xs, xp, Xlength(s->xs, xp)); 1058 xp--; /* ...and move back again */ 1059 } 1060 /* flush any unwanted input so other programs/builtins 1061 * can read it. Not very optimal, but less error prone 1062 * than flushing else where, dealing with redirections, 1063 * etc.. 1064 * todo: reduce size of shf buffer (~128?) if SSTDIN 1065 */ 1066 if (s->type == SSTDIN) 1067 shf_flush(s->u.shf); 1068 } 1069 /* XXX: temporary kludge to restore source after a 1070 * trap may have been executed. 1071 */ 1072 source = s; 1073 #ifdef KSH 1074 if (have_tty && ksh_tmout) 1075 { 1076 ksh_tmout_state = TMOUT_EXECUTING; 1077 alarm(0); 1078 } 1079 #endif /* KSH */ 1080 s->start = s->str = Xstring(s->xs, xp); 1081 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp)); 1082 /* Note: if input is all nulls, this is not eof */ 1083 if (Xlength(s->xs, xp) == 0) { /* EOF */ 1084 if (s->type == SFILE) 1085 shf_fdclose(s->u.shf); 1086 s->str = NULL; 1087 } else if (interactive) { 1088 #ifdef HISTORY 1089 char *p = Xstring(s->xs, xp); 1090 if (cur_prompt == PS1) 1091 while (*p && ctype(*p, C_IFS) && ctype(*p, C_IFSWS)) 1092 p++; 1093 if (*p) { 1094 # ifdef EASY_HISTORY 1095 if (cur_prompt == PS2) 1096 histappend(Xstring(s->xs, xp), 1); 1097 else 1098 # endif /* EASY_HISTORY */ 1099 { 1100 s->line++; 1101 histsave(s->line, s->str, 1); 1102 } 1103 } 1104 #endif /* HISTORY */ 1105 } 1106 if (interactive) 1107 set_prompt(PS2, (Source *) 0); 1108 } 1109 1110 void 1111 set_prompt(to, s) 1112 int to; 1113 Source *s; 1114 { 1115 cur_prompt = to; 1116 1117 switch (to) { 1118 case PS1: /* command */ 1119 #ifdef KSH 1120 /* Substitute ! and !! here, before substitutions are done 1121 * so ! in expanded variables are not expanded. 1122 * NOTE: this is not what at&t ksh does (it does it after 1123 * substitutions, POSIX doesn't say which is to be done. 1124 */ 1125 { 1126 struct shf *shf; 1127 char * volatile ps1; 1128 Area *saved_atemp; 1129 1130 ps1 = str_val(global("PS1")); 1131 shf = shf_sopen((char *) 0, strlen(ps1) * 2, 1132 SHF_WR | SHF_DYNAMIC, (struct shf *) 0); 1133 while (*ps1) { 1134 if (*ps1 != '!' || *++ps1 == '!') 1135 shf_putchar(*ps1++, shf); 1136 else 1137 shf_fprintf(shf, "%d", 1138 s ? s->line + 1 : 0); 1139 } 1140 ps1 = shf_sclose(shf); 1141 saved_atemp = ATEMP; 1142 newenv(E_ERRH); 1143 if (ksh_sigsetjmp(e->jbuf, 0)) { 1144 prompt = safe_prompt; 1145 /* Don't print an error - assume it has already 1146 * been printed. Reason is we may have forked 1147 * to run a command and the child may be 1148 * unwinding its stack through this code as it 1149 * exits. 1150 */ 1151 } else 1152 prompt = str_save(substitute(ps1, 0), 1153 saved_atemp); 1154 quitenv(); 1155 } 1156 #else /* KSH */ 1157 prompt = str_val(global("PS1")); 1158 #endif /* KSH */ 1159 break; 1160 1161 case PS2: /* command continuation */ 1162 prompt = str_val(global("PS2")); 1163 break; 1164 } 1165 } 1166 1167 /* See also related routine, promptlen() in edit.c */ 1168 void 1169 pprompt(cp, ntruncate) 1170 const char *cp; 1171 int ntruncate; 1172 { 1173 #if 0 1174 char nbuf[32]; 1175 int c; 1176 1177 while (*cp != 0) { 1178 if (*cp != '!') 1179 c = *cp++; 1180 else if (*++cp == '!') 1181 c = *cp++; 1182 else { 1183 int len; 1184 char *p; 1185 1186 shf_snprintf(p = nbuf, sizeof(nbuf), "%d", 1187 source->line + 1); 1188 len = strlen(nbuf); 1189 if (ntruncate) { 1190 if (ntruncate >= len) { 1191 ntruncate -= len; 1192 continue; 1193 } 1194 p += ntruncate; 1195 len -= ntruncate; 1196 ntruncate = 0; 1197 } 1198 shf_write(p, len, shl_out); 1199 continue; 1200 } 1201 if (ntruncate) 1202 --ntruncate; 1203 else 1204 shf_putc(c, shl_out); 1205 } 1206 #endif /* 0 */ 1207 shf_puts(cp + ntruncate, shl_out); 1208 shf_flush(shl_out); 1209 } 1210 1211 /* Read the variable part of a ${...} expression (ie, up to but not including 1212 * the :[-+?=#%] or close-brace. 1213 */ 1214 static char * 1215 get_brace_var(wsp, wp) 1216 XString *wsp; 1217 char *wp; 1218 { 1219 enum parse_state { 1220 PS_INITIAL, PS_SAW_HASH, PS_IDENT, 1221 PS_NUMBER, PS_VAR1, PS_END 1222 } 1223 state; 1224 char c; 1225 1226 state = PS_INITIAL; 1227 while (1) { 1228 c = getsc(); 1229 /* State machine to figure out where the variable part ends. */ 1230 switch (state) { 1231 case PS_INITIAL: 1232 if (c == '#') { 1233 state = PS_SAW_HASH; 1234 break; 1235 } 1236 /* fall through.. */ 1237 case PS_SAW_HASH: 1238 if (letter(c)) 1239 state = PS_IDENT; 1240 else if (digit(c)) 1241 state = PS_NUMBER; 1242 else if (ctype(c, C_VAR1)) 1243 state = PS_VAR1; 1244 else 1245 state = PS_END; 1246 break; 1247 case PS_IDENT: 1248 if (!letnum(c)) { 1249 state = PS_END; 1250 if (c == '[') { 1251 char *tmp, *p; 1252 1253 if (!arraysub(&tmp)) 1254 yyerror("missing ]\n"); 1255 *wp++ = c; 1256 for (p = tmp; *p; ) { 1257 Xcheck(*wsp, wp); 1258 *wp++ = *p++; 1259 } 1260 afree(tmp, ATEMP); 1261 c = getsc(); /* the ] */ 1262 } 1263 } 1264 break; 1265 case PS_NUMBER: 1266 if (!digit(c)) 1267 state = PS_END; 1268 break; 1269 case PS_VAR1: 1270 state = PS_END; 1271 break; 1272 case PS_END: /* keep gcc happy */ 1273 break; 1274 } 1275 if (state == PS_END) { 1276 *wp++ = '\0'; /* end of variable part */ 1277 ungetsc(c); 1278 break; 1279 } 1280 Xcheck(*wsp, wp); 1281 *wp++ = c; 1282 } 1283 return wp; 1284 } 1285 1286 /* 1287 * Save an array subscript - returns true if matching bracket found, false 1288 * if eof or newline was found. 1289 * (Returned string double null terminated) 1290 */ 1291 static int 1292 arraysub(strp) 1293 char **strp; 1294 { 1295 XString ws; 1296 char *wp; 1297 char c; 1298 int depth = 1; /* we are just past the initial [ */ 1299 1300 Xinit(ws, wp, 32, ATEMP); 1301 1302 do { 1303 c = getsc(); 1304 Xcheck(ws, wp); 1305 *wp++ = c; 1306 if (c == '[') 1307 depth++; 1308 else if (c == ']') 1309 depth--; 1310 } while (depth > 0 && c && c != '\n'); 1311 1312 *wp++ = '\0'; 1313 *strp = Xclose(ws, wp); 1314 1315 return depth == 0 ? 1 : 0; 1316 } 1317 1318 /* Unget a char: handles case when we are already at the start of the buffer */ 1319 static const char * 1320 ungetsc(c) 1321 int c; 1322 { 1323 if (backslash_skip) 1324 backslash_skip--; 1325 /* Don't unget eof... */ 1326 if (source->str == null && c == '\0') 1327 return source->str; 1328 if (source->str > source->start) 1329 source->str--; 1330 else { 1331 Source *s; 1332 1333 s = pushs(SREREAD, source->areap); 1334 s->ugbuf[0] = c; s->ugbuf[1] = '\0'; 1335 s->start = s->str = s->ugbuf; 1336 s->next = source; 1337 source = s; 1338 } 1339 return source->str; 1340 } 1341 1342 1343 /* Called to get a char that isn't a \newline sequence. */ 1344 static int 1345 getsc_bn ARGS((void)) 1346 { 1347 int c, c2; 1348 1349 if (ignore_backslash_newline) 1350 return getsc_(); 1351 1352 if (backslash_skip == 1) { 1353 backslash_skip = 2; 1354 return getsc_(); 1355 } 1356 1357 backslash_skip = 0; 1358 1359 while (1) { 1360 c = getsc_(); 1361 if (c == '\\') { 1362 if ((c2 = getsc_()) == '\n') 1363 /* ignore the \newline; get the next char... */ 1364 continue; 1365 ungetsc(c2); 1366 backslash_skip = 1; 1367 } 1368 return c; 1369 } 1370 } 1371 1372 static Lex_state * 1373 push_state_(si, old_end) 1374 State_info *si; 1375 Lex_state *old_end; 1376 { 1377 Lex_state *new = alloc(sizeof(Lex_state) * STATE_BSIZE, ATEMP); 1378 1379 new[0].ls_info.base = old_end; 1380 si->base = &new[0]; 1381 si->end = &new[STATE_BSIZE]; 1382 return &new[1]; 1383 } 1384 1385 static Lex_state * 1386 pop_state_(si, old_end) 1387 State_info *si; 1388 Lex_state *old_end; 1389 { 1390 Lex_state *old_base = si->base; 1391 1392 si->base = old_end->ls_info.base - STATE_BSIZE; 1393 si->end = old_end->ls_info.base; 1394 1395 afree(old_base, ATEMP); 1396 1397 return si->base + STATE_BSIZE - 1; 1398 } 1399