1 /* $NetBSD: parser.c,v 1.147 2018/07/13 22:43:44 kre Exp $ */ 2 3 /*- 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Kenneth Almquist. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/cdefs.h> 36 #ifndef lint 37 #if 0 38 static char sccsid[] = "@(#)parser.c 8.7 (Berkeley) 5/16/95"; 39 #else 40 __RCSID("$NetBSD: parser.c,v 1.147 2018/07/13 22:43:44 kre Exp $"); 41 #endif 42 #endif /* not lint */ 43 44 #include <stdio.h> 45 #include <stdlib.h> 46 #include <limits.h> 47 48 #include "shell.h" 49 #include "parser.h" 50 #include "nodes.h" 51 #include "expand.h" /* defines rmescapes() */ 52 #include "eval.h" /* defines commandname */ 53 #include "syntax.h" 54 #include "options.h" 55 #include "input.h" 56 #include "output.h" 57 #include "var.h" 58 #include "error.h" 59 #include "memalloc.h" 60 #include "mystring.h" 61 #include "alias.h" 62 #include "show.h" 63 #ifndef SMALL 64 #include "myhistedit.h" 65 #endif 66 67 /* 68 * Shell command parser. 69 */ 70 71 /* values returned by readtoken */ 72 #include "token.h" 73 74 #define OPENBRACE '{' 75 #define CLOSEBRACE '}' 76 77 struct HereDoc { 78 struct HereDoc *next; /* next here document in list */ 79 union node *here; /* redirection node */ 80 char *eofmark; /* string indicating end of input */ 81 int striptabs; /* if set, strip leading tabs */ 82 int startline; /* line number where << seen */ 83 }; 84 85 MKINIT struct parse_state parse_state; 86 union parse_state_p psp = { .c_current_parser = &parse_state }; 87 88 static const struct parse_state init_parse_state = { /* all 0's ... */ 89 .ps_noalias = 0, 90 .ps_heredoclist = NULL, 91 .ps_parsebackquote = 0, 92 .ps_doprompt = 0, 93 .ps_needprompt = 0, 94 .ps_lasttoken = 0, 95 .ps_tokpushback = 0, 96 .ps_wordtext = NULL, 97 .ps_checkkwd = 0, 98 .ps_redirnode = NULL, 99 .ps_heredoc = NULL, 100 .ps_quoteflag = 0, 101 .ps_startlinno = 0, 102 .ps_funclinno = 0, 103 .ps_elided_nl = 0, 104 }; 105 106 STATIC union node *list(int); 107 STATIC union node *andor(void); 108 STATIC union node *pipeline(void); 109 STATIC union node *command(void); 110 STATIC union node *simplecmd(union node **, union node *); 111 STATIC union node *makename(int); 112 STATIC void parsefname(void); 113 STATIC int slurp_heredoc(char *const, const int, const int); 114 STATIC void readheredocs(void); 115 STATIC int peektoken(void); 116 STATIC int readtoken(void); 117 STATIC int xxreadtoken(void); 118 STATIC int readtoken1(int, char const *, int); 119 STATIC int noexpand(char *); 120 STATIC void linebreak(void); 121 STATIC void consumetoken(int); 122 STATIC void synexpect(int, const char *) __dead; 123 STATIC void synerror(const char *) __dead; 124 STATIC void setprompt(int); 125 STATIC int pgetc_linecont(void); 126 127 static const char EOFhere[] = "EOF reading here (<<) document"; 128 129 #ifdef DEBUG 130 int parsing = 0; 131 #endif 132 133 /* 134 * Read and parse a command. Returns NEOF on end of file. (NULL is a 135 * valid parse tree indicating a blank line.) 136 */ 137 138 union node * 139 parsecmd(int interact) 140 { 141 int t; 142 union node *n; 143 144 #ifdef DEBUG 145 parsing++; 146 #endif 147 tokpushback = 0; 148 checkkwd = 0; 149 doprompt = interact; 150 if (doprompt) 151 setprompt(1); 152 else 153 setprompt(0); 154 needprompt = 0; 155 t = readtoken(); 156 #ifdef DEBUG 157 parsing--; 158 #endif 159 if (t == TEOF) 160 return NEOF; 161 if (t == TNL) 162 return NULL; 163 164 #ifdef DEBUG 165 parsing++; 166 #endif 167 tokpushback++; 168 n = list(1); 169 #ifdef DEBUG 170 parsing--; 171 #endif 172 if (heredoclist) 173 error("%d: Here document (<<%s) expected but not present", 174 heredoclist->startline, heredoclist->eofmark); 175 return n; 176 } 177 178 179 STATIC union node * 180 list(int nlflag) 181 { 182 union node *n1, *n2, *n3; 183 int tok; 184 185 CTRACE(DBG_PARSE, ("list(%d): entered @%d\n",nlflag,plinno)); 186 187 checkkwd = 2; 188 if (nlflag == 0 && tokendlist[peektoken()]) 189 return NULL; 190 n1 = NULL; 191 for (;;) { 192 n2 = andor(); 193 tok = readtoken(); 194 if (tok == TBACKGND) { 195 if (n2->type == NCMD || n2->type == NPIPE) 196 n2->ncmd.backgnd = 1; 197 else if (n2->type == NREDIR) 198 n2->type = NBACKGND; 199 else { 200 n3 = stalloc(sizeof(struct nredir)); 201 n3->type = NBACKGND; 202 n3->nredir.n = n2; 203 n3->nredir.redirect = NULL; 204 n2 = n3; 205 } 206 } 207 208 if (n1 != NULL) { 209 n3 = stalloc(sizeof(struct nbinary)); 210 n3->type = NSEMI; 211 n3->nbinary.ch1 = n1; 212 n3->nbinary.ch2 = n2; 213 n1 = n3; 214 } else 215 n1 = n2; 216 217 switch (tok) { 218 case TBACKGND: 219 case TSEMI: 220 tok = readtoken(); 221 /* FALLTHROUGH */ 222 case TNL: 223 if (tok == TNL) { 224 readheredocs(); 225 if (nlflag) 226 return n1; 227 } else if (tok == TEOF && nlflag) 228 return n1; 229 else 230 tokpushback++; 231 232 checkkwd = 2; 233 if (!nlflag && tokendlist[peektoken()]) 234 return n1; 235 break; 236 case TEOF: 237 pungetc(); /* push back EOF on input */ 238 return n1; 239 default: 240 if (nlflag) 241 synexpect(-1, 0); 242 tokpushback++; 243 return n1; 244 } 245 } 246 } 247 248 STATIC union node * 249 andor(void) 250 { 251 union node *n1, *n2, *n3; 252 int t; 253 254 CTRACE(DBG_PARSE, ("andor: entered @%d\n", plinno)); 255 256 n1 = pipeline(); 257 for (;;) { 258 if ((t = readtoken()) == TAND) { 259 t = NAND; 260 } else if (t == TOR) { 261 t = NOR; 262 } else { 263 tokpushback++; 264 return n1; 265 } 266 n2 = pipeline(); 267 n3 = stalloc(sizeof(struct nbinary)); 268 n3->type = t; 269 n3->nbinary.ch1 = n1; 270 n3->nbinary.ch2 = n2; 271 n1 = n3; 272 } 273 } 274 275 STATIC union node * 276 pipeline(void) 277 { 278 union node *n1, *n2, *pipenode; 279 struct nodelist *lp, *prev; 280 int negate; 281 282 CTRACE(DBG_PARSE, ("pipeline: entered @%d\n", plinno)); 283 284 negate = 0; 285 checkkwd = 2; 286 while (readtoken() == TNOT) { 287 CTRACE(DBG_PARSE, ("pipeline: TNOT recognized\n")); 288 #ifndef BOGUS_NOT_COMMAND 289 if (posix && negate) 290 synerror("2nd \"!\" unexpected"); 291 #endif 292 negate++; 293 } 294 tokpushback++; 295 n1 = command(); 296 if (readtoken() == TPIPE) { 297 pipenode = stalloc(sizeof(struct npipe)); 298 pipenode->type = NPIPE; 299 pipenode->npipe.backgnd = 0; 300 lp = stalloc(sizeof(struct nodelist)); 301 pipenode->npipe.cmdlist = lp; 302 lp->n = n1; 303 do { 304 prev = lp; 305 lp = stalloc(sizeof(struct nodelist)); 306 lp->n = command(); 307 prev->next = lp; 308 } while (readtoken() == TPIPE); 309 lp->next = NULL; 310 n1 = pipenode; 311 } 312 tokpushback++; 313 if (negate) { 314 CTRACE(DBG_PARSE, ("%snegate pipeline\n", 315 (negate&1) ? "" : "double ")); 316 n2 = stalloc(sizeof(struct nnot)); 317 n2->type = (negate & 1) ? NNOT : NDNOT; 318 n2->nnot.com = n1; 319 return n2; 320 } else 321 return n1; 322 } 323 324 325 326 STATIC union node * 327 command(void) 328 { 329 union node *n1, *n2; 330 union node *ap, **app; 331 union node *cp, **cpp; 332 union node *redir, **rpp; 333 int t; 334 #ifdef BOGUS_NOT_COMMAND 335 int negate = 0; 336 #endif 337 338 CTRACE(DBG_PARSE, ("command: entered @%d\n", plinno)); 339 340 checkkwd = 2; 341 redir = NULL; 342 n1 = NULL; 343 rpp = &redir; 344 345 /* Check for redirection which may precede command */ 346 while (readtoken() == TREDIR) { 347 *rpp = n2 = redirnode; 348 rpp = &n2->nfile.next; 349 parsefname(); 350 } 351 tokpushback++; 352 353 #ifdef BOGUS_NOT_COMMAND /* only in pileline() */ 354 while (readtoken() == TNOT) { 355 CTRACE(DBG_PARSE, ("command: TNOT (bogus) recognized\n")); 356 negate++; 357 } 358 tokpushback++; 359 #endif 360 361 switch (readtoken()) { 362 case TIF: 363 n1 = stalloc(sizeof(struct nif)); 364 n1->type = NIF; 365 n1->nif.test = list(0); 366 consumetoken(TTHEN); 367 n1->nif.ifpart = list(0); 368 n2 = n1; 369 while (readtoken() == TELIF) { 370 n2->nif.elsepart = stalloc(sizeof(struct nif)); 371 n2 = n2->nif.elsepart; 372 n2->type = NIF; 373 n2->nif.test = list(0); 374 consumetoken(TTHEN); 375 n2->nif.ifpart = list(0); 376 } 377 if (lasttoken == TELSE) 378 n2->nif.elsepart = list(0); 379 else { 380 n2->nif.elsepart = NULL; 381 tokpushback++; 382 } 383 consumetoken(TFI); 384 checkkwd = 1; 385 break; 386 case TWHILE: 387 case TUNTIL: 388 n1 = stalloc(sizeof(struct nbinary)); 389 n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL; 390 n1->nbinary.ch1 = list(0); 391 consumetoken(TDO); 392 n1->nbinary.ch2 = list(0); 393 consumetoken(TDONE); 394 checkkwd = 1; 395 break; 396 case TFOR: 397 if (readtoken() != TWORD || quoteflag || ! goodname(wordtext)) 398 synerror("Bad for loop variable"); 399 n1 = stalloc(sizeof(struct nfor)); 400 n1->type = NFOR; 401 n1->nfor.var = wordtext; 402 linebreak(); 403 if (lasttoken==TWORD && !quoteflag && equal(wordtext,"in")) { 404 app = ≈ 405 while (readtoken() == TWORD) { 406 n2 = makename(startlinno); 407 *app = n2; 408 app = &n2->narg.next; 409 } 410 *app = NULL; 411 n1->nfor.args = ap; 412 if (lasttoken != TNL && lasttoken != TSEMI) 413 synexpect(TSEMI, 0); 414 } else { 415 static char argvars[5] = { 416 CTLVAR, VSNORMAL|VSQUOTE, '@', '=', '\0' 417 }; 418 419 n2 = stalloc(sizeof(struct narg)); 420 n2->type = NARG; 421 n2->narg.text = argvars; 422 n2->narg.backquote = NULL; 423 n2->narg.next = NULL; 424 n2->narg.lineno = startlinno; 425 n1->nfor.args = n2; 426 /* 427 * Newline or semicolon here is optional (but note 428 * that the original Bourne shell only allowed NL). 429 */ 430 if (lasttoken != TNL && lasttoken != TSEMI) 431 tokpushback++; 432 } 433 checkkwd = 2; 434 if ((t = readtoken()) == TDO) 435 t = TDONE; 436 else if (t == TBEGIN) 437 t = TEND; 438 else 439 synexpect(TDO, 0); 440 n1->nfor.body = list(0); 441 consumetoken(t); 442 checkkwd = 1; 443 break; 444 case TCASE: 445 n1 = stalloc(sizeof(struct ncase)); 446 n1->type = NCASE; 447 n1->ncase.lineno = startlinno - elided_nl; 448 consumetoken(TWORD); 449 n1->ncase.expr = makename(startlinno); 450 linebreak(); 451 if (lasttoken != TWORD || !equal(wordtext, "in")) 452 synexpect(-1, "in"); 453 cpp = &n1->ncase.cases; 454 noalias = 1; 455 checkkwd = 2; 456 readtoken(); 457 /* 458 * Both ksh and bash accept 'case x in esac' 459 * so configure scripts started taking advantage of this. 460 * The page: http://pubs.opengroup.org/onlinepubs/\ 461 * 009695399/utilities/xcu_chap02.html contradicts itself, 462 * as to if this is legal; the "Case Conditional Format" 463 * paragraph shows one case is required, but the "Grammar" 464 * section shows a grammar that explicitly allows the no 465 * case option. 466 * 467 * The standard also says (section 2.10): 468 * This formal syntax shall take precedence over the 469 * preceding text syntax description. 470 * ie: the "Grammar" section wins. The text is just 471 * a rough guide (introduction to the common case.) 472 */ 473 while (lasttoken != TESAC) { 474 *cpp = cp = stalloc(sizeof(struct nclist)); 475 cp->type = NCLIST; 476 app = &cp->nclist.pattern; 477 if (lasttoken == TLP) 478 readtoken(); 479 for (;;) { 480 if (lasttoken < TWORD) 481 synexpect(TWORD, 0); 482 *app = ap = makename(startlinno); 483 checkkwd = 2; 484 if (readtoken() != TPIPE) 485 break; 486 app = &ap->narg.next; 487 readtoken(); 488 } 489 noalias = 0; 490 if (lasttoken != TRP) 491 synexpect(TRP, 0); 492 cp->nclist.lineno = startlinno; 493 cp->nclist.body = list(0); 494 495 checkkwd = 2; 496 if ((t = readtoken()) != TESAC) { 497 if (t != TENDCASE && t != TCASEFALL) { 498 noalias = 0; 499 synexpect(TENDCASE, 0); 500 } else { 501 if (t == TCASEFALL) 502 cp->type = NCLISTCONT; 503 noalias = 1; 504 checkkwd = 2; 505 readtoken(); 506 } 507 } 508 cpp = &cp->nclist.next; 509 } 510 noalias = 0; 511 *cpp = NULL; 512 checkkwd = 1; 513 break; 514 case TLP: 515 n1 = stalloc(sizeof(struct nredir)); 516 n1->type = NSUBSHELL; 517 n1->nredir.n = list(0); 518 n1->nredir.redirect = NULL; 519 if (n1->nredir.n == NULL) 520 synexpect(-1, 0); 521 consumetoken(TRP); 522 checkkwd = 1; 523 break; 524 case TBEGIN: 525 n1 = list(0); 526 if (posix && n1 == NULL) 527 synexpect(-1, 0); 528 consumetoken(TEND); 529 checkkwd = 1; 530 break; 531 532 case TBACKGND: 533 case TSEMI: 534 case TAND: 535 case TOR: 536 case TPIPE: 537 case TNL: 538 case TEOF: 539 case TRP: 540 case TENDCASE: 541 case TCASEFALL: 542 /* 543 * simple commands must have something in them, 544 * either a word (which at this point includes a=b) 545 * or a redirection. If we reached the end of the 546 * command (which one of these tokens indicates) 547 * when we are just starting, and have not had a 548 * redirect, then ... 549 * 550 * nb: it is still possible to end up with empty 551 * simple commands, if the "command" is a var 552 * expansion that produces nothing: 553 * X= ; $X && $X 554 * --> && 555 * That is OK and is handled after word expansions. 556 */ 557 if (!redir) 558 synexpect(-1, 0); 559 /* 560 * continue to build a node containing the redirect. 561 * the tokpushback means that our ending token will be 562 * read again in simplecmd, causing it to terminate, 563 * so only the redirect(s) will be contained in the 564 * returned n1 565 */ 566 /* FALLTHROUGH */ 567 case TWORD: 568 tokpushback++; 569 n1 = simplecmd(rpp, redir); 570 goto checkneg; 571 default: 572 synexpect(-1, 0); 573 /* NOTREACHED */ 574 } 575 576 /* Now check for redirection which may follow command */ 577 while (readtoken() == TREDIR) { 578 *rpp = n2 = redirnode; 579 rpp = &n2->nfile.next; 580 parsefname(); 581 } 582 tokpushback++; 583 *rpp = NULL; 584 if (redir) { 585 if (n1->type != NSUBSHELL) { 586 n2 = stalloc(sizeof(struct nredir)); 587 n2->type = NREDIR; 588 n2->nredir.n = n1; 589 n1 = n2; 590 } 591 n1->nredir.redirect = redir; 592 } 593 594 checkneg: 595 #ifdef BOGUS_NOT_COMMAND 596 if (negate) { 597 VTRACE(DBG_PARSE, ("bogus %snegate command\n", 598 (negate&1) ? "" : "double ")); 599 n2 = stalloc(sizeof(struct nnot)); 600 n2->type = (negate & 1) ? NNOT : NDNOT; 601 n2->nnot.com = n1; 602 return n2; 603 } 604 else 605 #endif 606 return n1; 607 } 608 609 610 STATIC union node * 611 simplecmd(union node **rpp, union node *redir) 612 { 613 union node *args, **app; 614 union node *n = NULL; 615 int line = 0; 616 #ifdef BOGUS_NOT_COMMAND 617 union node *n2; 618 int negate = 0; 619 #endif 620 621 CTRACE(DBG_PARSE, ("simple command with%s redir already @%d\n", 622 redir ? "" : "out", plinno)); 623 624 /* If we don't have any redirections already, then we must reset */ 625 /* rpp to be the address of the local redir variable. */ 626 if (redir == 0) 627 rpp = &redir; 628 629 args = NULL; 630 app = &args; 631 632 #ifdef BOGUS_NOT_COMMAND /* pipelines get negated, commands do not */ 633 while (readtoken() == TNOT) { 634 VTRACE(DBG_PARSE, ("simplcmd: bogus TNOT recognized\n")); 635 negate++; 636 } 637 tokpushback++; 638 #endif 639 640 for (;;) { 641 if (readtoken() == TWORD) { 642 if (line == 0) 643 line = startlinno; 644 n = makename(startlinno); 645 *app = n; 646 app = &n->narg.next; 647 } else if (lasttoken == TREDIR) { 648 if (line == 0) 649 line = startlinno; 650 *rpp = n = redirnode; 651 rpp = &n->nfile.next; 652 parsefname(); /* read name of redirection file */ 653 } else if (lasttoken == TLP && app == &args->narg.next 654 && redir == 0) { 655 /* We have a function */ 656 consumetoken(TRP); 657 funclinno = plinno; 658 rmescapes(n->narg.text); 659 if (strchr(n->narg.text, '/')) 660 synerror("Bad function name"); 661 VTRACE(DBG_PARSE, ("Function '%s' seen @%d\n", 662 n->narg.text, plinno)); 663 n->type = NDEFUN; 664 n->narg.lineno = plinno - elided_nl; 665 n->narg.next = command(); 666 funclinno = 0; 667 goto checkneg; 668 } else { 669 tokpushback++; 670 break; 671 } 672 } 673 674 if (args == NULL && redir == NULL) 675 synexpect(-1, 0); 676 *app = NULL; 677 *rpp = NULL; 678 n = stalloc(sizeof(struct ncmd)); 679 n->type = NCMD; 680 n->ncmd.lineno = line - elided_nl; 681 n->ncmd.backgnd = 0; 682 n->ncmd.args = args; 683 n->ncmd.redirect = redir; 684 n->ncmd.lineno = startlinno; 685 686 checkneg: 687 #ifdef BOGUS_NOT_COMMAND 688 if (negate) { 689 VTRACE(DBG_PARSE, ("bogus %snegate simplecmd\n", 690 (negate&1) ? "" : "double ")); 691 n2 = stalloc(sizeof(struct nnot)); 692 n2->type = (negate & 1) ? NNOT : NDNOT; 693 n2->nnot.com = n; 694 return n2; 695 } 696 else 697 #endif 698 return n; 699 } 700 701 STATIC union node * 702 makename(int lno) 703 { 704 union node *n; 705 706 n = stalloc(sizeof(struct narg)); 707 n->type = NARG; 708 n->narg.next = NULL; 709 n->narg.text = wordtext; 710 n->narg.backquote = backquotelist; 711 n->narg.lineno = lno; 712 return n; 713 } 714 715 void 716 fixredir(union node *n, const char *text, int err) 717 { 718 719 VTRACE(DBG_PARSE, ("Fix redir %s %d\n", text, err)); 720 if (!err) 721 n->ndup.vname = NULL; 722 723 if (is_number(text)) 724 n->ndup.dupfd = number(text); 725 else if (text[0] == '-' && text[1] == '\0') 726 n->ndup.dupfd = -1; 727 else { 728 729 if (err) 730 synerror("Bad fd number"); 731 else 732 n->ndup.vname = makename(startlinno - elided_nl); 733 } 734 } 735 736 737 STATIC void 738 parsefname(void) 739 { 740 union node *n = redirnode; 741 742 if (readtoken() != TWORD) 743 synexpect(-1, 0); 744 if (n->type == NHERE) { 745 struct HereDoc *here = heredoc; 746 struct HereDoc *p; 747 748 if (quoteflag == 0) 749 n->type = NXHERE; 750 VTRACE(DBG_PARSE, ("Here document %d @%d\n", n->type, plinno)); 751 if (here->striptabs) { 752 while (*wordtext == '\t') 753 wordtext++; 754 } 755 756 /* 757 * this test is not really necessary, we are not 758 * required to expand wordtext, but there's no reason 759 * it cannot be $$ or something like that - that would 760 * not mean the pid, but literally two '$' characters. 761 * There is no need for limits on what the word can be. 762 * However, it needs to stay literal as entered, not 763 * have $ converted to CTLVAR or something, which as 764 * the parser is, at the minute, is impossible to prevent. 765 * So, leave it like this until the rest of the parser is fixed. 766 */ 767 if (!noexpand(wordtext)) 768 synerror("Illegal eof marker for << redirection"); 769 770 rmescapes(wordtext); 771 here->eofmark = wordtext; 772 here->next = NULL; 773 if (heredoclist == NULL) 774 heredoclist = here; 775 else { 776 for (p = heredoclist ; p->next ; p = p->next) 777 continue; 778 p->next = here; 779 } 780 } else if (n->type == NTOFD || n->type == NFROMFD) { 781 fixredir(n, wordtext, 0); 782 } else { 783 n->nfile.fname = makename(startlinno - elided_nl); 784 } 785 } 786 787 /* 788 * Check to see whether we are at the end of the here document. When this 789 * is called, c is set to the first character of the next input line. If 790 * we are at the end of the here document, this routine sets the c to PEOF. 791 * The new value of c is returned. 792 */ 793 794 static int 795 checkend(int c, char * const eofmark, const int striptabs) 796 { 797 798 if (striptabs) { 799 while (c == '\t') 800 c = pgetc(); 801 } 802 if (c == PEOF) { 803 if (*eofmark == '\0') 804 return (c); 805 synerror(EOFhere); 806 } 807 if (c == *eofmark) { 808 int c2; 809 char *q; 810 811 for (q = eofmark + 1; c2 = pgetc(), *q != '\0' && c2 == *q; q++) 812 if (c2 == '\n') { 813 plinno++; 814 needprompt = doprompt; 815 } 816 if ((c2 == PEOF || c2 == '\n') && *q == '\0') { 817 c = PEOF; 818 if (c2 == '\n') { 819 plinno++; 820 needprompt = doprompt; 821 } 822 } else { 823 pungetc(); 824 pushstring(eofmark + 1, q - (eofmark + 1), NULL); 825 } 826 } else if (c == '\n' && *eofmark == '\0') { 827 c = PEOF; 828 plinno++; 829 needprompt = doprompt; 830 } 831 return (c); 832 } 833 834 835 /* 836 * Input any here documents. 837 */ 838 839 STATIC int 840 slurp_heredoc(char *const eofmark, const int striptabs, const int sq) 841 { 842 int c; 843 char *out; 844 int lines = plinno; 845 846 c = pgetc(); 847 848 /* 849 * If we hit EOF on the input, and the eofmark is a null string ('') 850 * we consider this empty line to be the eofmark, and exit without err. 851 */ 852 if (c == PEOF && *eofmark != '\0') 853 synerror(EOFhere); 854 855 STARTSTACKSTR(out); 856 857 while ((c = checkend(c, eofmark, striptabs)) != PEOF) { 858 do { 859 if (sq) { 860 /* 861 * in single quoted mode (eofmark quoted) 862 * all we look for is \n so we can check 863 * for the epfmark - everything saved literally. 864 */ 865 STPUTC(c, out); 866 if (c == '\n') { 867 plinno++; 868 break; 869 } 870 continue; 871 } 872 /* 873 * In double quoted (non-quoted eofmark) 874 * we must handle \ followed by \n here 875 * otherwise we can mismatch the end mark. 876 * All other uses of \ will be handled later 877 * when the here doc is expanded. 878 * 879 * This also makes sure \\ followed by \n does 880 * not suppress the newline (the \ quotes itself) 881 */ 882 if (c == '\\') { /* A backslash */ 883 STPUTC(c, out); 884 c = pgetc(); /* followed by */ 885 if (c == '\n') { /* a newline? */ 886 STPUTC(c, out); 887 plinno++; 888 continue; /* don't break */ 889 } 890 } 891 STPUTC(c, out); /* keep the char */ 892 if (c == '\n') { /* at end of line */ 893 plinno++; 894 break; /* look for eofmark */ 895 } 896 } while ((c = pgetc()) != PEOF); 897 898 /* 899 * If we have read a line, and reached EOF, without 900 * finding the eofmark, whether the EOF comes before 901 * or immediately after the \n, that is an error. 902 */ 903 if (c == PEOF || (c = pgetc()) == PEOF) 904 synerror(EOFhere); 905 } 906 STPUTC('\0', out); 907 908 c = out - stackblock(); 909 out = stackblock(); 910 grabstackblock(c); 911 wordtext = out; 912 913 VTRACE(DBG_PARSE, 914 ("Slurped a %d line %sheredoc (to '%s')%s: len %d, \"%.*s%s\" @%d\n", 915 plinno - lines, sq ? "quoted " : "", eofmark, 916 striptabs ? " tab stripped" : "", c, (c > 16 ? 16 : c), 917 wordtext, (c > 16 ? "..." : ""), plinno)); 918 919 return (plinno - lines); 920 } 921 922 static char * 923 insert_elided_nl(char *str) 924 { 925 while (elided_nl > 0) { 926 STPUTC(CTLNONL, str); 927 elided_nl--; 928 } 929 return str; 930 } 931 932 STATIC void 933 readheredocs(void) 934 { 935 struct HereDoc *here; 936 union node *n; 937 int line, l; 938 939 line = 0; /*XXX - gcc! obviously unneeded */ 940 if (heredoclist) 941 line = heredoclist->startline + 1; 942 l = 0; 943 while (heredoclist) { 944 line += l; 945 here = heredoclist; 946 heredoclist = here->next; 947 if (needprompt) { 948 setprompt(2); 949 needprompt = 0; 950 } 951 952 l = slurp_heredoc(here->eofmark, here->striptabs, 953 here->here->nhere.type == NHERE); 954 955 n = stalloc(sizeof(struct narg)); 956 n->narg.type = NARG; 957 n->narg.next = NULL; 958 n->narg.text = wordtext; 959 n->narg.lineno = line; 960 n->narg.backquote = backquotelist; 961 here->here->nhere.doc = n; 962 963 if (here->here->nhere.type == NHERE) 964 continue; 965 966 /* 967 * Now "parse" here docs that have unquoted eofmarkers. 968 */ 969 setinputstring(wordtext, 1, line); 970 VTRACE(DBG_PARSE, ("Reprocessing %d line here doc from %d\n", 971 l, line)); 972 readtoken1(pgetc(), DQSYNTAX, 1); 973 n->narg.text = wordtext; 974 n->narg.backquote = backquotelist; 975 popfile(); 976 } 977 } 978 979 STATIC int 980 peektoken(void) 981 { 982 int t; 983 984 t = readtoken(); 985 tokpushback++; 986 return (t); 987 } 988 989 STATIC int 990 readtoken(void) 991 { 992 int t; 993 int savecheckkwd = checkkwd; 994 #ifdef DEBUG 995 int alreadyseen = tokpushback; 996 #endif 997 struct alias *ap; 998 999 top: 1000 t = xxreadtoken(); 1001 1002 if (checkkwd) { 1003 /* 1004 * eat newlines 1005 */ 1006 if (checkkwd == 2) { 1007 checkkwd = 0; 1008 while (t == TNL) { 1009 readheredocs(); 1010 t = xxreadtoken(); 1011 } 1012 } else 1013 checkkwd = 0; 1014 /* 1015 * check for keywords and aliases 1016 */ 1017 if (t == TWORD && !quoteflag) { 1018 const char *const *pp; 1019 1020 for (pp = parsekwd; *pp; pp++) { 1021 if (**pp == *wordtext && equal(*pp, wordtext)) { 1022 lasttoken = t = pp - 1023 parsekwd + KWDOFFSET; 1024 VTRACE(DBG_PARSE, 1025 ("keyword %s recognized @%d\n", 1026 tokname[t], plinno)); 1027 goto out; 1028 } 1029 } 1030 if (!noalias && 1031 (ap = lookupalias(wordtext, 1)) != NULL) { 1032 VTRACE(DBG_PARSE, 1033 ("alias '%s' recognized -> <:%s:>\n", 1034 wordtext, ap->val)); 1035 pushstring(ap->val, strlen(ap->val), ap); 1036 checkkwd = savecheckkwd; 1037 goto top; 1038 } 1039 } 1040 out: 1041 checkkwd = (t == TNOT) ? savecheckkwd : 0; 1042 } 1043 VTRACE(DBG_PARSE, ("%stoken %s %s @%d\n", alreadyseen ? "reread " : "", 1044 tokname[t], t == TWORD ? wordtext : "", plinno)); 1045 return (t); 1046 } 1047 1048 1049 /* 1050 * Read the next input token. 1051 * If the token is a word, we set backquotelist to the list of cmds in 1052 * backquotes. We set quoteflag to true if any part of the word was 1053 * quoted. 1054 * If the token is TREDIR, then we set redirnode to a structure containing 1055 * the redirection. 1056 * In all cases, the variable startlinno is set to the number of the line 1057 * on which the token starts. 1058 * 1059 * [Change comment: here documents and internal procedures] 1060 * [Readtoken shouldn't have any arguments. Perhaps we should make the 1061 * word parsing code into a separate routine. In this case, readtoken 1062 * doesn't need to have any internal procedures, but parseword does. 1063 * We could also make parseoperator in essence the main routine, and 1064 * have parseword (readtoken1?) handle both words and redirection.] 1065 */ 1066 1067 #define RETURN(token) return lasttoken = token 1068 1069 STATIC int 1070 xxreadtoken(void) 1071 { 1072 int c; 1073 1074 if (tokpushback) { 1075 tokpushback = 0; 1076 return lasttoken; 1077 } 1078 if (needprompt) { 1079 setprompt(2); 1080 needprompt = 0; 1081 } 1082 elided_nl = 0; 1083 startlinno = plinno; 1084 for (;;) { /* until token or start of word found */ 1085 c = pgetc_macro(); 1086 switch (c) { 1087 case ' ': case '\t': 1088 continue; 1089 case '#': 1090 while ((c = pgetc()) != '\n' && c != PEOF) 1091 continue; 1092 pungetc(); 1093 continue; 1094 1095 case '\n': 1096 plinno++; 1097 needprompt = doprompt; 1098 RETURN(TNL); 1099 case PEOF: 1100 RETURN(TEOF); 1101 1102 case '&': 1103 if (pgetc_linecont() == '&') 1104 RETURN(TAND); 1105 pungetc(); 1106 RETURN(TBACKGND); 1107 case '|': 1108 if (pgetc_linecont() == '|') 1109 RETURN(TOR); 1110 pungetc(); 1111 RETURN(TPIPE); 1112 case ';': 1113 switch (pgetc_linecont()) { 1114 case ';': 1115 RETURN(TENDCASE); 1116 case '&': 1117 RETURN(TCASEFALL); 1118 default: 1119 pungetc(); 1120 RETURN(TSEMI); 1121 } 1122 case '(': 1123 RETURN(TLP); 1124 case ')': 1125 RETURN(TRP); 1126 1127 case '\\': 1128 switch (pgetc()) { 1129 case '\n': 1130 startlinno = ++plinno; 1131 if (doprompt) 1132 setprompt(2); 1133 else 1134 setprompt(0); 1135 continue; 1136 case PEOF: 1137 RETURN(TEOF); 1138 default: 1139 pungetc(); 1140 break; 1141 } 1142 /* FALLTHROUGH */ 1143 default: 1144 return readtoken1(c, BASESYNTAX, 0); 1145 } 1146 } 1147 #undef RETURN 1148 } 1149 1150 1151 1152 /* 1153 * If eofmark is NULL, read a word or a redirection symbol. If eofmark 1154 * is not NULL, read a here document. In the latter case, eofmark is the 1155 * word which marks the end of the document and striptabs is true if 1156 * leading tabs should be stripped from the document. The argument firstc 1157 * is the first character of the input token or document. 1158 * 1159 * Because C does not have internal subroutines, I have simulated them 1160 * using goto's to implement the subroutine linkage. The following macros 1161 * will run code that appears at the end of readtoken1. 1162 */ 1163 1164 /* 1165 * We used to remember only the current syntax, variable nesting level, 1166 * double quote state for each var nesting level, and arith nesting 1167 * level (unrelated to var nesting) and one prev syntax when in arith 1168 * syntax. This worked for simple cases, but can't handle arith inside 1169 * var expansion inside arith inside var with some quoted and some not. 1170 * 1171 * Inspired by FreeBSD's implementation (though it was the obvious way) 1172 * though implemented differently, we now have a stack that keeps track 1173 * of what we are doing now, and what we were doing previously. 1174 * Every time something changes, which will eventually end and should 1175 * revert to the previous state, we push this stack, and then pop it 1176 * again later (that is every ${} with an operator (to parse the word 1177 * or pattern that follows) ${x} and $x are too simple to need it) 1178 * $(( )) $( ) and "...". Always. Really, always! 1179 * 1180 * The stack is implemented as one static (on the C stack) base block 1181 * containing LEVELS_PER_BLOCK (8) stack entries, which should be 1182 * enough for the vast majority of cases. For torture tests, we 1183 * malloc more blocks as needed. All accesses through the inline 1184 * functions below. 1185 */ 1186 1187 /* 1188 * varnest & arinest will typically be 0 or 1 1189 * (varnest can increment in usages like ${x=${y}} but probably 1190 * does not really need to) 1191 * parenlevel allows balancing parens inside a $(( )), it is reset 1192 * at each new nesting level ( $(( ( x + 3 ${unset-)} )) does not work. 1193 * quoted is special - we need to know 2 things ... are we inside "..." 1194 * (even if inherited from some previous nesting level) and was there 1195 * an opening '"' at this level (so the next will be closing). 1196 * "..." can span nesting levels, but cannot be opened in one and 1197 * closed in a different one. 1198 * To handle this, "quoted" has two fields, the bottom 4 (really 2) 1199 * bits are 0, 1, or 2, for un, single, and double quoted (single quoted 1200 * is really so special that this setting is not very important) 1201 * and 0x10 that indicates that an opening quote has been seen. 1202 * The bottom 4 bits are inherited, the 0x10 bit is not. 1203 */ 1204 struct tokenstate { 1205 const char *ts_syntax; 1206 unsigned short ts_parenlevel; /* counters */ 1207 unsigned short ts_varnest; /* 64000 levels should be enough! */ 1208 unsigned short ts_arinest; 1209 unsigned short ts_quoted; /* 1 -> single, 2 -> double */ 1210 }; 1211 1212 #define NQ 0x00 /* Unquoted */ 1213 #define SQ 0x01 /* Single Quotes */ 1214 #define DQ 0x02 /* Double Quotes (or equivalent) */ 1215 #define CQ 0x03 /* C style Single Quotes */ 1216 #define QF 0x0F /* Mask to extract previous values */ 1217 #define QS 0x10 /* Quoting started at this level in stack */ 1218 1219 #define LEVELS_PER_BLOCK 8 1220 #define VSS struct statestack 1221 1222 struct statestack { 1223 VSS *prev; /* previous block in list */ 1224 int cur; /* which of our tokenstates is current */ 1225 struct tokenstate tokenstate[LEVELS_PER_BLOCK]; 1226 }; 1227 1228 static inline struct tokenstate * 1229 currentstate(VSS *stack) 1230 { 1231 return &stack->tokenstate[stack->cur]; 1232 } 1233 1234 static inline struct tokenstate * 1235 prevstate(VSS *stack) 1236 { 1237 if (stack->cur != 0) 1238 return &stack->tokenstate[stack->cur - 1]; 1239 if (stack->prev == NULL) /* cannot drop below base */ 1240 return &stack->tokenstate[0]; 1241 return &stack->prev->tokenstate[LEVELS_PER_BLOCK - 1]; 1242 } 1243 1244 static inline VSS * 1245 bump_state_level(VSS *stack) 1246 { 1247 struct tokenstate *os, *ts; 1248 1249 os = currentstate(stack); 1250 1251 if (++stack->cur >= LEVELS_PER_BLOCK) { 1252 VSS *ss; 1253 1254 ss = (VSS *)ckmalloc(sizeof (struct statestack)); 1255 ss->cur = 0; 1256 ss->prev = stack; 1257 stack = ss; 1258 } 1259 1260 ts = currentstate(stack); 1261 1262 ts->ts_parenlevel = 0; /* parens inside never match outside */ 1263 1264 ts->ts_quoted = os->ts_quoted & QF; /* these are default settings */ 1265 ts->ts_varnest = os->ts_varnest; 1266 ts->ts_arinest = os->ts_arinest; /* when appropriate */ 1267 ts->ts_syntax = os->ts_syntax; /* they will be altered */ 1268 1269 return stack; 1270 } 1271 1272 static inline VSS * 1273 drop_state_level(VSS *stack) 1274 { 1275 if (stack->cur == 0) { 1276 VSS *ss; 1277 1278 ss = stack; 1279 stack = ss->prev; 1280 if (stack == NULL) 1281 return ss; 1282 ckfree(ss); 1283 } 1284 --stack->cur; 1285 return stack; 1286 } 1287 1288 static inline void 1289 cleanup_state_stack(VSS *stack) 1290 { 1291 while (stack->prev != NULL) { 1292 stack->cur = 0; 1293 stack = drop_state_level(stack); 1294 } 1295 } 1296 1297 #define PARSESUB() {goto parsesub; parsesub_return:;} 1298 #define PARSEARITH() {goto parsearith; parsearith_return:;} 1299 1300 /* 1301 * The following macros all assume the existance of a local var "stack" 1302 * which contains a pointer to the current struct stackstate 1303 */ 1304 1305 /* 1306 * These are macros rather than inline funcs to avoid code churn as much 1307 * as possible - they replace macros of the same name used previously. 1308 */ 1309 #define ISDBLQUOTE() (currentstate(stack)->ts_quoted & QS) 1310 #define SETDBLQUOTE() (currentstate(stack)->ts_quoted = QS | DQ) 1311 #define CLRDBLQUOTE() (currentstate(stack)->ts_quoted = \ 1312 stack->cur != 0 || stack->prev ? \ 1313 prevstate(stack)->ts_quoted & QF : 0) 1314 1315 /* 1316 * This set are just to avoid excess typing and line lengths... 1317 * The ones that "look like" var names must be implemented to be lvalues 1318 */ 1319 #define syntax (currentstate(stack)->ts_syntax) 1320 #define parenlevel (currentstate(stack)->ts_parenlevel) 1321 #define varnest (currentstate(stack)->ts_varnest) 1322 #define arinest (currentstate(stack)->ts_arinest) 1323 #define quoted (currentstate(stack)->ts_quoted) 1324 #define TS_PUSH() (stack = bump_state_level(stack)) 1325 #define TS_POP() (stack = drop_state_level(stack)) 1326 1327 /* 1328 * Called to parse command substitutions. oldstyle is true if the command 1329 * is enclosed inside `` (otherwise it was enclosed in "$( )") 1330 * 1331 * Internally nlpp is a pointer to the head of the linked 1332 * list of commands (passed by reference), and savelen is the number of 1333 * characters on the top of the stack which must be preserved. 1334 */ 1335 static char * 1336 parsebackq(VSS *const stack, char * const in, 1337 struct nodelist **const pbqlist, const int oldstyle, const int magicq) 1338 { 1339 struct nodelist **nlpp; 1340 const int savepbq = parsebackquote; 1341 union node *n; 1342 char *out; 1343 char *str = NULL; 1344 char *volatile sstr = str; 1345 struct jmploc jmploc; 1346 struct jmploc *const savehandler = handler; 1347 const int savelen = in - stackblock(); 1348 int saveprompt; 1349 int lno; 1350 1351 if (setjmp(jmploc.loc)) { 1352 if (sstr) 1353 ckfree(__UNVOLATILE(sstr)); 1354 cleanup_state_stack(stack); 1355 parsebackquote = 0; 1356 handler = savehandler; 1357 longjmp(handler->loc, 1); 1358 } 1359 INTOFF; 1360 sstr = str = NULL; 1361 if (savelen > 0) { 1362 sstr = str = ckmalloc(savelen); 1363 memcpy(str, stackblock(), savelen); 1364 } 1365 handler = &jmploc; 1366 INTON; 1367 if (oldstyle) { 1368 /* 1369 * We must read until the closing backquote, giving special 1370 * treatment to some slashes, and then push the string and 1371 * reread it as input, interpreting it normally. 1372 */ 1373 int pc; 1374 int psavelen; 1375 char *pstr; 1376 int line1 = plinno; 1377 1378 VTRACE(DBG_PARSE, ("parsebackq: repackaging `` as $( )")); 1379 /* 1380 * Because the entire `...` is read here, we don't 1381 * need to bother the state stack. That will be used 1382 * (as appropriate) when the processed string is re-read. 1383 */ 1384 STARTSTACKSTR(out); 1385 #ifdef DEBUG 1386 for (psavelen = 0;;psavelen++) { 1387 #else 1388 for (;;) { 1389 #endif 1390 if (needprompt) { 1391 setprompt(2); 1392 needprompt = 0; 1393 } 1394 pc = pgetc(); 1395 if (pc == '`') 1396 break; 1397 switch (pc) { 1398 case '\\': 1399 pc = pgetc(); 1400 #ifdef DEBUG 1401 psavelen++; 1402 #endif 1403 if (pc == '\n') { /* keep \ \n for later */ 1404 plinno++; 1405 needprompt = doprompt; 1406 } 1407 if (pc != '\\' && pc != '`' && pc != '$' 1408 && (!ISDBLQUOTE() || pc != '"')) 1409 STPUTC('\\', out); 1410 break; 1411 1412 case '\n': 1413 plinno++; 1414 needprompt = doprompt; 1415 break; 1416 1417 case PEOF: 1418 startlinno = line1; 1419 synerror("EOF in backquote substitution"); 1420 break; 1421 1422 default: 1423 break; 1424 } 1425 STPUTC(pc, out); 1426 } 1427 STPUTC('\0', out); 1428 VTRACE(DBG_PARSE, (" read %d", psavelen)); 1429 psavelen = out - stackblock(); 1430 VTRACE(DBG_PARSE, (" produced %d\n", psavelen)); 1431 if (psavelen > 0) { 1432 pstr = grabstackstr(out); 1433 setinputstring(pstr, 1, line1); 1434 } 1435 } 1436 nlpp = pbqlist; 1437 while (*nlpp) 1438 nlpp = &(*nlpp)->next; 1439 *nlpp = stalloc(sizeof(struct nodelist)); 1440 (*nlpp)->next = NULL; 1441 parsebackquote = oldstyle; 1442 1443 if (oldstyle) { 1444 saveprompt = doprompt; 1445 doprompt = 0; 1446 } else 1447 saveprompt = 0; 1448 1449 lno = -plinno; 1450 n = list(0); 1451 lno += plinno; 1452 1453 if (oldstyle) { 1454 if (peektoken() != TEOF) 1455 synexpect(-1, 0); 1456 doprompt = saveprompt; 1457 } else 1458 consumetoken(TRP); 1459 1460 (*nlpp)->n = n; 1461 if (oldstyle) { 1462 /* 1463 * Start reading from old file again, ignoring any pushed back 1464 * tokens left from the backquote parsing 1465 */ 1466 popfile(); 1467 tokpushback = 0; 1468 } 1469 1470 while (stackblocksize() <= savelen) 1471 growstackblock(); 1472 STARTSTACKSTR(out); 1473 if (str) { 1474 memcpy(out, str, savelen); 1475 STADJUST(savelen, out); 1476 INTOFF; 1477 ckfree(str); 1478 sstr = str = NULL; 1479 INTON; 1480 } 1481 parsebackquote = savepbq; 1482 handler = savehandler; 1483 if (arinest || ISDBLQUOTE()) { 1484 STPUTC(CTLBACKQ | CTLQUOTE, out); 1485 while (--lno >= 0) 1486 STPUTC(CTLNONL, out); 1487 } else 1488 STPUTC(CTLBACKQ, out); 1489 1490 return out; 1491 } 1492 1493 /* 1494 * Parse a redirection operator. The parameter "out" points to a string 1495 * specifying the fd to be redirected. It is guaranteed to be either "" 1496 * or a numeric string (for now anyway). The parameter "c" contains the 1497 * first character of the redirection operator. 1498 * 1499 * Note the string "out" is on the stack, which we are about to clobber, 1500 * so process it first... 1501 */ 1502 1503 static void 1504 parseredir(const char *out, int c) 1505 { 1506 union node *np; 1507 int fd; 1508 1509 fd = (*out == '\0') ? -1 : number(out); 1510 1511 np = stalloc(sizeof(struct nfile)); 1512 if (c == '>') { 1513 if (fd < 0) 1514 fd = 1; 1515 c = pgetc_linecont(); 1516 if (c == '>') 1517 np->type = NAPPEND; 1518 else if (c == '|') 1519 np->type = NCLOBBER; 1520 else if (c == '&') 1521 np->type = NTOFD; 1522 else { 1523 np->type = NTO; 1524 pungetc(); 1525 } 1526 } else { /* c == '<' */ 1527 if (fd < 0) 1528 fd = 0; 1529 switch (c = pgetc_linecont()) { 1530 case '<': 1531 if (sizeof (struct nfile) != sizeof (struct nhere)) { 1532 np = stalloc(sizeof(struct nhere)); 1533 np->nfile.fd = 0; 1534 } 1535 np->type = NHERE; 1536 heredoc = stalloc(sizeof(struct HereDoc)); 1537 heredoc->here = np; 1538 heredoc->startline = plinno; 1539 if ((c = pgetc_linecont()) == '-') { 1540 heredoc->striptabs = 1; 1541 } else { 1542 heredoc->striptabs = 0; 1543 pungetc(); 1544 } 1545 break; 1546 1547 case '&': 1548 np->type = NFROMFD; 1549 break; 1550 1551 case '>': 1552 np->type = NFROMTO; 1553 break; 1554 1555 default: 1556 np->type = NFROM; 1557 pungetc(); 1558 break; 1559 } 1560 } 1561 np->nfile.fd = fd; 1562 1563 redirnode = np; /* this is the "value" of TRENODE */ 1564 } 1565 1566 /* 1567 * Called to parse a backslash escape sequence inside $'...'. 1568 * The backslash has already been read. 1569 */ 1570 static char * 1571 readcstyleesc(char *out) 1572 { 1573 int c, vc, i, n; 1574 unsigned int v; 1575 1576 c = pgetc(); 1577 switch (c) { 1578 case '\0': 1579 case PEOF: 1580 synerror("Unterminated quoted string"); 1581 case '\n': 1582 plinno++; 1583 if (doprompt) 1584 setprompt(2); 1585 else 1586 setprompt(0); 1587 return out; 1588 1589 case '\\': 1590 case '\'': 1591 case '"': 1592 v = c; 1593 break; 1594 1595 case 'a': v = '\a'; break; 1596 case 'b': v = '\b'; break; 1597 case 'e': v = '\033'; break; 1598 case 'f': v = '\f'; break; 1599 case 'n': v = '\n'; break; 1600 case 'r': v = '\r'; break; 1601 case 't': v = '\t'; break; 1602 case 'v': v = '\v'; break; 1603 1604 case '0': case '1': case '2': case '3': 1605 case '4': case '5': case '6': case '7': 1606 v = c - '0'; 1607 c = pgetc(); 1608 if (c >= '0' && c <= '7') { 1609 v <<= 3; 1610 v += c - '0'; 1611 c = pgetc(); 1612 if (c >= '0' && c <= '7') { 1613 v <<= 3; 1614 v += c - '0'; 1615 } else 1616 pungetc(); 1617 } else 1618 pungetc(); 1619 break; 1620 1621 case 'c': 1622 c = pgetc(); 1623 if (c < 0x3f || c > 0x7a || c == 0x60) 1624 synerror("Bad \\c escape sequence"); 1625 if (c == '\\' && pgetc() != '\\') 1626 synerror("Bad \\c\\ escape sequence"); 1627 if (c == '?') 1628 v = 127; 1629 else 1630 v = c & 0x1f; 1631 break; 1632 1633 case 'x': 1634 n = 2; 1635 goto hexval; 1636 case 'u': 1637 n = 4; 1638 goto hexval; 1639 case 'U': 1640 n = 8; 1641 hexval: 1642 v = 0; 1643 for (i = 0; i < n; i++) { 1644 c = pgetc(); 1645 if (c >= '0' && c <= '9') 1646 v = (v << 4) + c - '0'; 1647 else if (c >= 'A' && c <= 'F') 1648 v = (v << 4) + c - 'A' + 10; 1649 else if (c >= 'a' && c <= 'f') 1650 v = (v << 4) + c - 'a' + 10; 1651 else { 1652 pungetc(); 1653 break; 1654 } 1655 } 1656 if (n > 2 && v > 127) { 1657 if (v >= 0xd800 && v <= 0xdfff) 1658 synerror("Invalid \\u escape sequence"); 1659 1660 /* XXX should we use iconv here. What locale? */ 1661 CHECKSTRSPACE(4, out); 1662 1663 if (v <= 0x7ff) { 1664 USTPUTC(0xc0 | v >> 6, out); 1665 USTPUTC(0x80 | (v & 0x3f), out); 1666 return out; 1667 } else if (v <= 0xffff) { 1668 USTPUTC(0xe0 | v >> 12, out); 1669 USTPUTC(0x80 | ((v >> 6) & 0x3f), out); 1670 USTPUTC(0x80 | (v & 0x3f), out); 1671 return out; 1672 } else if (v <= 0x10ffff) { 1673 USTPUTC(0xf0 | v >> 18, out); 1674 USTPUTC(0x80 | ((v >> 12) & 0x3f), out); 1675 USTPUTC(0x80 | ((v >> 6) & 0x3f), out); 1676 USTPUTC(0x80 | (v & 0x3f), out); 1677 return out; 1678 } 1679 if (v > 127) 1680 v = '?'; 1681 } 1682 break; 1683 default: 1684 synerror("Unknown $'' escape sequence"); 1685 } 1686 vc = (char)v; 1687 1688 /* 1689 * If we managed to create a \n from a \ sequence (no matter how) 1690 * then we replace it with the magic CRTCNL control char, which 1691 * will turn into a \n again later, but in the meantime, never 1692 * causes LINENO increments. 1693 */ 1694 if (vc == '\n') { 1695 USTPUTC(CTLCNL, out); 1696 return out; 1697 } 1698 1699 /* 1700 * We can't handle NUL bytes. 1701 * POSIX says we should skip till the closing quote. 1702 */ 1703 if (vc == '\0') { 1704 while ((c = pgetc()) != '\'') { 1705 if (c == '\\') 1706 c = pgetc(); 1707 if (c == PEOF) 1708 synerror("Unterminated quoted string"); 1709 if (c == '\n') { 1710 plinno++; 1711 if (doprompt) 1712 setprompt(2); 1713 else 1714 setprompt(0); 1715 } 1716 } 1717 pungetc(); 1718 return out; 1719 } 1720 if (SQSYNTAX[vc] == CCTL) 1721 USTPUTC(CTLESC, out); 1722 USTPUTC(vc, out); 1723 return out; 1724 } 1725 1726 /* 1727 * The lowest level basic tokenizer. 1728 * 1729 * The next input byte (character) is in firstc, syn says which 1730 * syntax tables we are to use (basic, single or double quoted, or arith) 1731 * and magicq (used with sqsyntax and dqsyntax only) indicates that the 1732 * quote character itself is not special (used parsing here docs and similar) 1733 * 1734 * The result is the type of the next token (its value, when there is one, 1735 * is saved in the relevant global var - must fix that someday!) which is 1736 * also saved for re-reading ("lasttoken"). 1737 * 1738 * Overall, this routine does far more parsing than it is supposed to. 1739 * That will also need fixing, someday... 1740 */ 1741 STATIC int 1742 readtoken1(int firstc, char const *syn, int magicq) 1743 { 1744 int c; 1745 char * out; 1746 int len; 1747 struct nodelist *bqlist; 1748 int quotef; 1749 VSS static_stack; 1750 VSS *stack = &static_stack; 1751 1752 stack->prev = NULL; 1753 stack->cur = 0; 1754 1755 syntax = syn; 1756 1757 startlinno = plinno; 1758 varnest = 0; 1759 quoted = 0; 1760 if (syntax == DQSYNTAX) 1761 SETDBLQUOTE(); 1762 quotef = 0; 1763 bqlist = NULL; 1764 arinest = 0; 1765 parenlevel = 0; 1766 elided_nl = 0; 1767 1768 STARTSTACKSTR(out); 1769 1770 for (c = firstc ;; c = pgetc_macro()) { /* until of token */ 1771 if (syntax == ARISYNTAX) 1772 out = insert_elided_nl(out); 1773 CHECKSTRSPACE(4, out); /* permit 4 calls to USTPUTC */ 1774 switch (syntax[c]) { 1775 case CNL: /* '\n' */ 1776 if (syntax == BASESYNTAX && varnest == 0) 1777 break; /* exit loop */ 1778 USTPUTC(c, out); 1779 plinno++; 1780 if (doprompt) 1781 setprompt(2); 1782 else 1783 setprompt(0); 1784 continue; 1785 1786 case CSBACK: /* single quoted backslash */ 1787 if ((quoted & QF) == CQ) { 1788 out = readcstyleesc(out); 1789 continue; 1790 } 1791 /* FALLTHROUGH */ 1792 case CWORD: 1793 USTPUTC(c, out); 1794 continue; 1795 1796 case CCTL: 1797 if (!magicq || ISDBLQUOTE()) 1798 USTPUTC(CTLESC, out); 1799 USTPUTC(c, out); 1800 continue; 1801 case CBACK: /* backslash */ 1802 c = pgetc(); 1803 if (c == PEOF) { 1804 USTPUTC('\\', out); 1805 pungetc(); 1806 continue; 1807 } 1808 if (c == '\n') { 1809 plinno++; 1810 elided_nl++; 1811 if (doprompt) 1812 setprompt(2); 1813 else 1814 setprompt(0); 1815 continue; 1816 } 1817 quotef = 1; /* current token is quoted */ 1818 if (ISDBLQUOTE() && c != '\\' && c != '`' && 1819 c != '$' && (c != '"' || magicq)) 1820 USTPUTC('\\', out); 1821 if (SQSYNTAX[c] == CCTL) 1822 USTPUTC(CTLESC, out); 1823 else if (!magicq) { 1824 USTPUTC(CTLQUOTEMARK, out); 1825 USTPUTC(c, out); 1826 if (varnest != 0) 1827 USTPUTC(CTLQUOTEEND, out); 1828 continue; 1829 } 1830 USTPUTC(c, out); 1831 continue; 1832 case CSQUOTE: 1833 if (syntax != SQSYNTAX) { 1834 if (!magicq) 1835 USTPUTC(CTLQUOTEMARK, out); 1836 quotef = 1; 1837 TS_PUSH(); 1838 syntax = SQSYNTAX; 1839 quoted = SQ; 1840 continue; 1841 } 1842 if (magicq && arinest == 0 && varnest == 0) { 1843 /* Ignore inside quoted here document */ 1844 USTPUTC(c, out); 1845 continue; 1846 } 1847 /* End of single quotes... */ 1848 TS_POP(); 1849 if (syntax == BASESYNTAX && varnest != 0) 1850 USTPUTC(CTLQUOTEEND, out); 1851 continue; 1852 case CDQUOTE: 1853 if (magicq && arinest == 0 && varnest == 0) { 1854 /* Ignore inside here document */ 1855 USTPUTC(c, out); 1856 continue; 1857 } 1858 quotef = 1; 1859 if (arinest) { 1860 if (ISDBLQUOTE()) { 1861 TS_POP(); 1862 } else { 1863 TS_PUSH(); 1864 syntax = DQSYNTAX; 1865 SETDBLQUOTE(); 1866 USTPUTC(CTLQUOTEMARK, out); 1867 } 1868 continue; 1869 } 1870 if (magicq) 1871 continue; 1872 if (ISDBLQUOTE()) { 1873 TS_POP(); 1874 if (varnest != 0) 1875 USTPUTC(CTLQUOTEEND, out); 1876 } else { 1877 TS_PUSH(); 1878 syntax = DQSYNTAX; 1879 SETDBLQUOTE(); 1880 USTPUTC(CTLQUOTEMARK, out); 1881 } 1882 continue; 1883 case CVAR: /* '$' */ 1884 out = insert_elided_nl(out); 1885 PARSESUB(); /* parse substitution */ 1886 continue; 1887 case CENDVAR: /* CLOSEBRACE */ 1888 if (varnest > 0 && !ISDBLQUOTE()) { 1889 TS_POP(); 1890 USTPUTC(CTLENDVAR, out); 1891 } else { 1892 USTPUTC(c, out); 1893 } 1894 out = insert_elided_nl(out); 1895 continue; 1896 case CLP: /* '(' in arithmetic */ 1897 parenlevel++; 1898 USTPUTC(c, out); 1899 continue;; 1900 case CRP: /* ')' in arithmetic */ 1901 if (parenlevel > 0) { 1902 USTPUTC(c, out); 1903 --parenlevel; 1904 } else { 1905 if (pgetc_linecont() == /*(*/ ')') { 1906 out = insert_elided_nl(out); 1907 if (--arinest == 0) { 1908 TS_POP(); 1909 USTPUTC(CTLENDARI, out); 1910 } else 1911 USTPUTC(/*(*/ ')', out); 1912 } else { 1913 break; /* to synerror() just below */ 1914 #if 0 /* the old way, causes weird errors on bad input */ 1915 /* 1916 * unbalanced parens 1917 * (don't 2nd guess - no error) 1918 */ 1919 pungetc(); 1920 USTPUTC(/*(*/ ')', out); 1921 #endif 1922 } 1923 } 1924 continue; 1925 case CBQUOTE: /* '`' */ 1926 out = parsebackq(stack, out, &bqlist, 1, magicq); 1927 continue; 1928 case CEOF: /* --> c == PEOF */ 1929 break; /* will exit loop */ 1930 default: 1931 if (varnest == 0 && !ISDBLQUOTE()) 1932 break; /* exit loop */ 1933 USTPUTC(c, out); 1934 continue; 1935 } 1936 break; /* break from switch -> break from for loop too */ 1937 } 1938 1939 if (syntax == ARISYNTAX) { 1940 cleanup_state_stack(stack); 1941 synerror(/*((*/ "Missing '))'"); 1942 } 1943 if (syntax != BASESYNTAX && /* ! parsebackquote && */ !magicq) { 1944 cleanup_state_stack(stack); 1945 synerror("Unterminated quoted string"); 1946 } 1947 if (varnest != 0) { 1948 cleanup_state_stack(stack); 1949 startlinno = plinno; 1950 /* { */ 1951 synerror("Missing '}'"); 1952 } 1953 1954 STPUTC('\0', out); 1955 len = out - stackblock(); 1956 out = stackblock(); 1957 1958 if (!magicq) { 1959 if ((c == '<' || c == '>') 1960 && quotef == 0 && (*out == '\0' || is_number(out))) { 1961 parseredir(out, c); 1962 cleanup_state_stack(stack); 1963 return lasttoken = TREDIR; 1964 } else { 1965 pungetc(); 1966 } 1967 } 1968 1969 VTRACE(DBG_PARSE, 1970 ("readtoken1 %sword \"%s\", completed%s (%d) left %d enl\n", 1971 (quotef ? "quoted " : ""), out, (bqlist ? " with cmdsubs" : ""), 1972 len, elided_nl)); 1973 1974 quoteflag = quotef; 1975 backquotelist = bqlist; 1976 grabstackblock(len); 1977 wordtext = out; 1978 cleanup_state_stack(stack); 1979 return lasttoken = TWORD; 1980 /* end of readtoken routine */ 1981 1982 1983 /* 1984 * Parse a substitution. At this point, we have read the dollar sign 1985 * and nothing else. 1986 */ 1987 1988 parsesub: { 1989 int subtype; 1990 int typeloc; 1991 int flags; 1992 char *p; 1993 static const char types[] = "}-+?="; 1994 1995 c = pgetc_linecont(); 1996 if (c == '(' /*)*/) { /* $(command) or $((arith)) */ 1997 if (pgetc_linecont() == '(' /*')'*/ ) { 1998 out = insert_elided_nl(out); 1999 PARSEARITH(); 2000 } else { 2001 out = insert_elided_nl(out); 2002 pungetc(); 2003 out = parsebackq(stack, out, &bqlist, 0, magicq); 2004 } 2005 } else if (c == OPENBRACE || is_name(c) || is_special(c)) { 2006 USTPUTC(CTLVAR, out); 2007 typeloc = out - stackblock(); 2008 USTPUTC(VSNORMAL, out); 2009 subtype = VSNORMAL; 2010 flags = 0; 2011 if (c == OPENBRACE) { 2012 c = pgetc_linecont(); 2013 if (c == '#') { 2014 if ((c = pgetc_linecont()) == CLOSEBRACE) 2015 c = '#'; 2016 else if (is_name(c) || isdigit(c)) 2017 subtype = VSLENGTH; 2018 else if (is_special(c)) { 2019 /* 2020 * ${#} is $# - the number of sh params 2021 * ${##} is the length of ${#} 2022 * ${###} is ${#} with as much nothing 2023 * as possible removed from start 2024 * ${##1} is ${#} with leading 1 gone 2025 * ${##\#} is ${#} with leading # gone 2026 * 2027 * this stuff is UGLY! 2028 */ 2029 if (pgetc_linecont() == CLOSEBRACE) { 2030 pungetc(); 2031 subtype = VSLENGTH; 2032 } else { 2033 static char cbuf[2]; 2034 2035 pungetc(); /* would like 2 */ 2036 cbuf[0] = c; /* so ... */ 2037 cbuf[1] = '\0'; 2038 pushstring(cbuf, 1, NULL); 2039 c = '#'; /* ${#:...} */ 2040 subtype = 0; /* .. or similar */ 2041 } 2042 } else { 2043 pungetc(); 2044 c = '#'; 2045 subtype = 0; 2046 } 2047 } 2048 else 2049 subtype = 0; 2050 } 2051 if (is_name(c)) { 2052 p = out; 2053 do { 2054 STPUTC(c, out); 2055 c = pgetc_linecont(); 2056 } while (is_in_name(c)); 2057 #if 0 2058 if (out - p == 6 && strncmp(p, "LINENO", 6) == 0) { 2059 int i; 2060 int linno; 2061 char buf[10]; 2062 2063 /* 2064 * The "LINENO hack" 2065 * 2066 * Replace the variable name with the 2067 * current line number. 2068 */ 2069 linno = plinno; 2070 if (funclinno != 0) 2071 linno -= funclinno - 1; 2072 snprintf(buf, sizeof(buf), "%d", linno); 2073 STADJUST(-6, out); 2074 for (i = 0; buf[i] != '\0'; i++) 2075 STPUTC(buf[i], out); 2076 flags |= VSLINENO; 2077 } 2078 #endif 2079 } else if (is_digit(c)) { 2080 do { 2081 STPUTC(c, out); 2082 c = pgetc_linecont(); 2083 } while (subtype != VSNORMAL && is_digit(c)); 2084 } 2085 else if (is_special(c)) { 2086 USTPUTC(c, out); 2087 c = pgetc_linecont(); 2088 } 2089 else { 2090 badsub: 2091 cleanup_state_stack(stack); 2092 synerror("Bad substitution"); 2093 } 2094 2095 STPUTC('=', out); 2096 if (subtype == 0) { 2097 switch (c) { 2098 case ':': 2099 flags |= VSNUL; 2100 c = pgetc_linecont(); 2101 /*FALLTHROUGH*/ 2102 default: 2103 p = strchr(types, c); 2104 if (p == NULL) 2105 goto badsub; 2106 subtype = p - types + VSNORMAL; 2107 break; 2108 case '%': 2109 case '#': 2110 { 2111 int cc = c; 2112 subtype = c == '#' ? VSTRIMLEFT : 2113 VSTRIMRIGHT; 2114 c = pgetc_linecont(); 2115 if (c == cc) 2116 subtype++; 2117 else 2118 pungetc(); 2119 break; 2120 } 2121 } 2122 } else { 2123 if (subtype == VSLENGTH && c != /*{*/ '}') 2124 synerror("no modifiers allowed with ${#var}"); 2125 pungetc(); 2126 } 2127 if (ISDBLQUOTE() || arinest) 2128 flags |= VSQUOTE; 2129 if (subtype >= VSTRIMLEFT && subtype <= VSTRIMRIGHTMAX) 2130 flags |= VSPATQ; 2131 *(stackblock() + typeloc) = subtype | flags; 2132 if (subtype != VSNORMAL) { 2133 TS_PUSH(); 2134 varnest++; 2135 arinest = 0; 2136 if (subtype > VSASSIGN) { /* # ## % %% */ 2137 syntax = BASESYNTAX; 2138 CLRDBLQUOTE(); 2139 } 2140 } 2141 } else if (c == '\'' && syntax == BASESYNTAX) { 2142 USTPUTC(CTLQUOTEMARK, out); 2143 quotef = 1; 2144 TS_PUSH(); 2145 syntax = SQSYNTAX; 2146 quoted = CQ; 2147 } else { 2148 USTPUTC('$', out); 2149 pungetc(); 2150 } 2151 goto parsesub_return; 2152 } 2153 2154 2155 /* 2156 * Parse an arithmetic expansion (indicate start of one and set state) 2157 */ 2158 parsearith: { 2159 2160 #if 0 2161 if (syntax == ARISYNTAX) { 2162 /* 2163 * we collapse embedded arithmetic expansion to 2164 * parentheses, which should be equivalent 2165 * 2166 * XXX It isn't, must fix, soonish... 2167 */ 2168 USTPUTC('(' /*)*/, out); 2169 USTPUTC('(' /*)*/, out); 2170 /* 2171 * Need 2 of them because there will (should be) 2172 * two closing ))'s to follow later. 2173 */ 2174 parenlevel += 2; 2175 } else 2176 #endif 2177 { 2178 USTPUTC(CTLARI, out); 2179 if (ISDBLQUOTE()) 2180 USTPUTC('"',out); 2181 else 2182 USTPUTC(' ',out); 2183 2184 TS_PUSH(); 2185 syntax = ARISYNTAX; 2186 arinest = 1; 2187 varnest = 0; 2188 } 2189 goto parsearith_return; 2190 } 2191 2192 } /* end of readtoken */ 2193 2194 2195 2196 2197 #ifdef mkinit 2198 INCLUDE "parser.h" 2199 2200 RESET { 2201 psp.v_current_parser = &parse_state; 2202 2203 parse_state.ps_tokpushback = 0; 2204 parse_state.ps_checkkwd = 0; 2205 parse_state.ps_heredoclist = NULL; 2206 } 2207 #endif 2208 2209 /* 2210 * Returns true if the text contains nothing to expand (no dollar signs 2211 * or backquotes). 2212 */ 2213 2214 STATIC int 2215 noexpand(char *text) 2216 { 2217 char *p; 2218 char c; 2219 2220 p = text; 2221 while ((c = *p++) != '\0') { 2222 if (c == CTLQUOTEMARK) 2223 continue; 2224 if (c == CTLESC) 2225 p++; 2226 else if (BASESYNTAX[(int)c] == CCTL) 2227 return 0; 2228 } 2229 return 1; 2230 } 2231 2232 2233 /* 2234 * Return true if the argument is a legal variable name (a letter or 2235 * underscore followed by zero or more letters, underscores, and digits). 2236 */ 2237 2238 int 2239 goodname(char *name) 2240 { 2241 char *p; 2242 2243 p = name; 2244 if (! is_name(*p)) 2245 return 0; 2246 while (*++p) { 2247 if (! is_in_name(*p)) 2248 return 0; 2249 } 2250 return 1; 2251 } 2252 2253 /* 2254 * skip past any \n's, and leave lasttoken set to whatever follows 2255 */ 2256 STATIC void 2257 linebreak(void) 2258 { 2259 while (readtoken() == TNL) 2260 ; 2261 } 2262 2263 /* 2264 * The next token must be "token" -- check, then move past it 2265 */ 2266 STATIC void 2267 consumetoken(int token) 2268 { 2269 if (readtoken() != token) { 2270 VTRACE(DBG_PARSE, ("consumetoken(%d): expecting %s got %s", 2271 token, tokname[token], tokname[lasttoken])); 2272 CVTRACE(DBG_PARSE, (lasttoken==TWORD), (" \"%s\"", wordtext)); 2273 VTRACE(DBG_PARSE, ("\n")); 2274 synexpect(token, NULL); 2275 } 2276 } 2277 2278 /* 2279 * Called when an unexpected token is read during the parse. The argument 2280 * is the token that is expected, or -1 if more than one type of token can 2281 * occur at this point. 2282 */ 2283 2284 STATIC void 2285 synexpect(int token, const char *text) 2286 { 2287 char msg[64]; 2288 char *p; 2289 2290 if (lasttoken == TWORD) { 2291 size_t len = strlen(wordtext); 2292 2293 if (len <= 13) 2294 fmtstr(msg, 34, "Word \"%.13s\" unexpected", wordtext); 2295 else 2296 fmtstr(msg, 34, 2297 "Word \"%.10s...\" unexpected", wordtext); 2298 } else 2299 fmtstr(msg, 34, "%s unexpected", tokname[lasttoken]); 2300 2301 p = strchr(msg, '\0'); 2302 if (text) 2303 fmtstr(p, 30, " (expecting \"%.10s\")", text); 2304 else if (token >= 0) 2305 fmtstr(p, 30, " (expecting %s)", tokname[token]); 2306 2307 synerror(msg); 2308 /* NOTREACHED */ 2309 } 2310 2311 2312 STATIC void 2313 synerror(const char *msg) 2314 { 2315 error("%d: Syntax error: %s", startlinno, msg); 2316 /* NOTREACHED */ 2317 } 2318 2319 STATIC void 2320 setprompt(int which) 2321 { 2322 whichprompt = which; 2323 2324 #ifndef SMALL 2325 if (!el) 2326 #endif 2327 out2str(getprompt(NULL)); 2328 } 2329 2330 /* 2331 * handle getting the next character, while ignoring \ \n 2332 * (which is a little tricky as we only have one char of pushback 2333 * and we need that one elsewhere). 2334 */ 2335 STATIC int 2336 pgetc_linecont(void) 2337 { 2338 int c; 2339 2340 while ((c = pgetc_macro()) == '\\') { 2341 c = pgetc(); 2342 if (c == '\n') { 2343 plinno++; 2344 elided_nl++; 2345 if (doprompt) 2346 setprompt(2); 2347 else 2348 setprompt(0); 2349 } else { 2350 pungetc(); 2351 /* Allow the backslash to be pushed back. */ 2352 pushstring("\\", 1, NULL); 2353 return (pgetc()); 2354 } 2355 } 2356 return (c); 2357 } 2358 2359 /* 2360 * called by editline -- any expansions to the prompt 2361 * should be added here. 2362 */ 2363 const char * 2364 getprompt(void *unused) 2365 { 2366 char *p; 2367 const char *cp; 2368 int wp; 2369 2370 if (!doprompt) 2371 return ""; 2372 2373 VTRACE(DBG_PARSE|DBG_EXPAND, ("getprompt %d\n", whichprompt)); 2374 2375 switch (wp = whichprompt) { 2376 case 0: 2377 return ""; 2378 case 1: 2379 p = ps1val(); 2380 break; 2381 case 2: 2382 p = ps2val(); 2383 break; 2384 default: 2385 return "<internal prompt error>"; 2386 } 2387 if (p == NULL) 2388 return ""; 2389 2390 VTRACE(DBG_PARSE|DBG_EXPAND, ("prompt <<%s>>\n", p)); 2391 2392 cp = expandstr(p, plinno); 2393 whichprompt = wp; /* history depends on it not changing */ 2394 2395 VTRACE(DBG_PARSE|DBG_EXPAND, ("prompt -> <<%s>>\n", cp)); 2396 2397 return cp; 2398 } 2399 2400 /* 2401 * Expand a string ... used for expanding prompts (PS1...) 2402 * 2403 * Never return NULL, always some string (return input string if invalid) 2404 * 2405 * The internal routine does the work, leaving the result on the 2406 * stack (or in a static string, or even the input string) and 2407 * handles parser recursion, and cleanup after an error while parsing. 2408 * 2409 * The visible interface copies the result off the stack (if it is there), 2410 * and handles stack management, leaving the stack in the exact same 2411 * state it was when expandstr() was called (so it can be used part way 2412 * through building a stack data structure - as in when PS2 is being 2413 * expanded half way through reading a "command line") 2414 * 2415 * on error, expandonstack() cleans up the parser state, but then 2416 * simply jumps out through expandstr() withut doing any stack cleanup, 2417 * which is OK, as the error handler must deal with that anyway. 2418 * 2419 * The split into two funcs is to avoid problems with setjmp/longjmp 2420 * and local variables which could otherwise be optimised into bizarre 2421 * behaviour. 2422 */ 2423 static const char * 2424 expandonstack(char *ps, int lineno) 2425 { 2426 union node n; 2427 struct jmploc jmploc; 2428 struct jmploc *const savehandler = handler; 2429 struct parsefile *const savetopfile = getcurrentfile(); 2430 const int save_x = xflag; 2431 struct parse_state new_state = init_parse_state; 2432 struct parse_state *const saveparser = psp.v_current_parser; 2433 const char *result = NULL; 2434 2435 if (!setjmp(jmploc.loc)) { 2436 handler = &jmploc; 2437 2438 psp.v_current_parser = &new_state; 2439 setinputstring(ps, 1, lineno); 2440 2441 readtoken1(pgetc(), DQSYNTAX, 1); 2442 if (backquotelist != NULL && !promptcmds) 2443 result = "-o promptcmds not set: "; 2444 else { 2445 n.narg.type = NARG; 2446 n.narg.next = NULL; 2447 n.narg.text = wordtext; 2448 n.narg.lineno = lineno; 2449 n.narg.backquote = backquotelist; 2450 2451 xflag = 0; /* we might be expanding PS4 ... */ 2452 expandarg(&n, NULL, 0); 2453 result = stackblock(); 2454 } 2455 INTOFF; 2456 } 2457 psp.v_current_parser = saveparser; 2458 xflag = save_x; 2459 popfilesupto(savetopfile); 2460 handler = savehandler; 2461 2462 if (result != NULL) { 2463 INTON; 2464 } else { 2465 if (exception == EXINT) 2466 exraise(SIGINT); 2467 result = ps; 2468 } 2469 2470 return result; 2471 } 2472 2473 const char * 2474 expandstr(char *ps, int lineno) 2475 { 2476 const char *result = NULL; 2477 struct stackmark smark; 2478 static char *buffer = NULL; /* storage for prompt, never freed */ 2479 static size_t bufferlen = 0; 2480 2481 setstackmark(&smark); 2482 /* 2483 * At this point we anticipate that there may be a string 2484 * growing on the stack, but we have no idea how big it is. 2485 * However we know that it cannot be bigger than the current 2486 * allocated stack block, so simply reserve the whole thing, 2487 * then we can use the stack without barfing all over what 2488 * is there already... (the stack mark undoes this later.) 2489 */ 2490 (void) stalloc(stackblocksize()); 2491 2492 result = expandonstack(ps, lineno); 2493 2494 if (__predict_true(result == stackblock())) { 2495 size_t len = strlen(result) + 1; 2496 2497 /* 2498 * the result (usual case) is on the stack, which we 2499 * are just about to discard (popstackmark()) so we 2500 * need to move it somewhere safe first. 2501 */ 2502 2503 if (__predict_false(len > bufferlen)) { 2504 char *new; 2505 size_t newlen = bufferlen; 2506 2507 if (__predict_false(len > (SIZE_MAX >> 4))) { 2508 result = "huge prompt: "; 2509 goto getout; 2510 } 2511 2512 if (newlen == 0) 2513 newlen = 32; 2514 while (newlen <= len) 2515 newlen <<= 1; 2516 2517 new = (char *)realloc(buffer, newlen); 2518 2519 if (__predict_false(new == NULL)) { 2520 /* 2521 * this should rarely (if ever) happen 2522 * but we must do something when it does... 2523 */ 2524 result = "No mem for prompt: "; 2525 goto getout; 2526 } else { 2527 buffer = new; 2528 bufferlen = newlen; 2529 } 2530 } 2531 (void)memcpy(buffer, result, len); 2532 result = buffer; 2533 } 2534 2535 getout:; 2536 popstackmark(&smark); 2537 2538 return result; 2539 } 2540