1 /* $NetBSD: parser.c,v 1.157 2018/12/03 06:41:30 kre Exp $ */ 2 3 /*- 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Kenneth Almquist. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/cdefs.h> 36 #ifndef lint 37 #if 0 38 static char sccsid[] = "@(#)parser.c 8.7 (Berkeley) 5/16/95"; 39 #else 40 __RCSID("$NetBSD: parser.c,v 1.157 2018/12/03 06:41:30 kre Exp $"); 41 #endif 42 #endif /* not lint */ 43 44 #include <stdio.h> 45 #include <stdlib.h> 46 #include <limits.h> 47 48 #include "shell.h" 49 #include "parser.h" 50 #include "nodes.h" 51 #include "expand.h" /* defines rmescapes() */ 52 #include "eval.h" /* defines commandname */ 53 #include "syntax.h" 54 #include "options.h" 55 #include "input.h" 56 #include "output.h" 57 #include "var.h" 58 #include "error.h" 59 #include "memalloc.h" 60 #include "mystring.h" 61 #include "alias.h" 62 #include "show.h" 63 #ifndef SMALL 64 #include "myhistedit.h" 65 #endif 66 67 /* 68 * Shell command parser. 69 */ 70 71 /* values returned by readtoken */ 72 #include "token.h" 73 74 #define OPENBRACE '{' 75 #define CLOSEBRACE '}' 76 77 struct HereDoc { 78 struct HereDoc *next; /* next here document in list */ 79 union node *here; /* redirection node */ 80 char *eofmark; /* string indicating end of input */ 81 int striptabs; /* if set, strip leading tabs */ 82 int startline; /* line number where << seen */ 83 }; 84 85 MKINIT struct parse_state parse_state; 86 union parse_state_p psp = { .c_current_parser = &parse_state }; 87 88 static const struct parse_state init_parse_state = { /* all 0's ... */ 89 .ps_heredoclist = NULL, 90 .ps_parsebackquote = 0, 91 .ps_doprompt = 0, 92 .ps_needprompt = 0, 93 .ps_lasttoken = 0, 94 .ps_tokpushback = 0, 95 .ps_wordtext = NULL, 96 .ps_checkkwd = 0, 97 .ps_redirnode = NULL, 98 .ps_heredoc = NULL, 99 .ps_quoteflag = 0, 100 .ps_startlinno = 0, 101 .ps_funclinno = 0, 102 .ps_elided_nl = 0, 103 }; 104 105 STATIC union node *list(int); 106 STATIC union node *andor(void); 107 STATIC union node *pipeline(void); 108 STATIC union node *command(void); 109 STATIC union node *simplecmd(union node **, union node *); 110 STATIC union node *makeword(int); 111 STATIC void parsefname(void); 112 STATIC int slurp_heredoc(char *const, const int, const int); 113 STATIC void readheredocs(void); 114 STATIC int peektoken(void); 115 STATIC int readtoken(void); 116 STATIC int xxreadtoken(void); 117 STATIC int readtoken1(int, char const *, int); 118 STATIC int noexpand(char *); 119 STATIC void linebreak(void); 120 STATIC void consumetoken(int); 121 STATIC void synexpect(int, const char *) __dead; 122 STATIC void synerror(const char *) __dead; 123 STATIC void setprompt(int); 124 STATIC int pgetc_linecont(void); 125 126 static const char EOFhere[] = "EOF reading here (<<) document"; 127 128 #ifdef DEBUG 129 int parsing = 0; 130 #endif 131 132 /* 133 * Read and parse a command. Returns NEOF on end of file. (NULL is a 134 * valid parse tree indicating a blank line.) 135 */ 136 137 union node * 138 parsecmd(int interact) 139 { 140 int t; 141 union node *n; 142 143 #ifdef DEBUG 144 parsing++; 145 #endif 146 tokpushback = 0; 147 checkkwd = 0; 148 doprompt = interact; 149 if (doprompt) 150 setprompt(1); 151 else 152 setprompt(0); 153 needprompt = 0; 154 t = readtoken(); 155 #ifdef DEBUG 156 parsing--; 157 #endif 158 if (t == TEOF) 159 return NEOF; 160 if (t == TNL) 161 return NULL; 162 163 #ifdef DEBUG 164 parsing++; 165 #endif 166 tokpushback++; 167 n = list(1); 168 #ifdef DEBUG 169 parsing--; 170 #endif 171 if (heredoclist) 172 error("%d: Here document (<<%s) expected but not present", 173 heredoclist->startline, heredoclist->eofmark); 174 return n; 175 } 176 177 178 STATIC union node * 179 list(int nlflag) 180 { 181 union node *ntop, *n1, *n2, *n3; 182 int tok; 183 184 CTRACE(DBG_PARSE, ("list(%d): entered @%d\n",nlflag,plinno)); 185 186 checkkwd = CHKNL | CHKKWD | CHKALIAS; 187 if (nlflag == 0 && tokendlist[peektoken()]) 188 return NULL; 189 ntop = n1 = NULL; 190 for (;;) { 191 n2 = andor(); 192 tok = readtoken(); 193 if (tok == TBACKGND) { 194 if (n2->type == NCMD || n2->type == NPIPE) 195 n2->ncmd.backgnd = 1; 196 else if (n2->type == NREDIR) 197 n2->type = NBACKGND; 198 else { 199 n3 = stalloc(sizeof(struct nredir)); 200 n3->type = NBACKGND; 201 n3->nredir.n = n2; 202 n3->nredir.redirect = NULL; 203 n2 = n3; 204 } 205 } 206 207 if (ntop == NULL) 208 ntop = n2; 209 else if (n1 == NULL) { 210 n1 = stalloc(sizeof(struct nbinary)); 211 n1->type = NSEMI; 212 n1->nbinary.ch1 = ntop; 213 n1->nbinary.ch2 = n2; 214 ntop = n1; 215 } else { 216 n3 = stalloc(sizeof(struct nbinary)); 217 n3->type = NSEMI; 218 n3->nbinary.ch1 = n1->nbinary.ch2; 219 n3->nbinary.ch2 = n2; 220 n1->nbinary.ch2 = n3; 221 n1 = n3; 222 } 223 224 switch (tok) { 225 case TBACKGND: 226 case TSEMI: 227 tok = readtoken(); 228 /* FALLTHROUGH */ 229 case TNL: 230 if (tok == TNL) { 231 readheredocs(); 232 if (nlflag) 233 return ntop; 234 } else if (tok == TEOF && nlflag) 235 return ntop; 236 else 237 tokpushback++; 238 239 checkkwd = CHKNL | CHKKWD | CHKALIAS; 240 if (!nlflag && tokendlist[peektoken()]) 241 return ntop; 242 break; 243 case TEOF: 244 pungetc(); /* push back EOF on input */ 245 return ntop; 246 default: 247 if (nlflag) 248 synexpect(-1, 0); 249 tokpushback++; 250 return ntop; 251 } 252 } 253 } 254 255 STATIC union node * 256 andor(void) 257 { 258 union node *n1, *n2, *n3; 259 int t; 260 261 CTRACE(DBG_PARSE, ("andor: entered @%d\n", plinno)); 262 263 n1 = pipeline(); 264 for (;;) { 265 if ((t = readtoken()) == TAND) { 266 t = NAND; 267 } else if (t == TOR) { 268 t = NOR; 269 } else { 270 tokpushback++; 271 return n1; 272 } 273 n2 = pipeline(); 274 n3 = stalloc(sizeof(struct nbinary)); 275 n3->type = t; 276 n3->nbinary.ch1 = n1; 277 n3->nbinary.ch2 = n2; 278 n1 = n3; 279 } 280 } 281 282 STATIC union node * 283 pipeline(void) 284 { 285 union node *n1, *n2, *pipenode; 286 struct nodelist *lp, *prev; 287 int negate; 288 289 CTRACE(DBG_PARSE, ("pipeline: entered @%d\n", plinno)); 290 291 negate = 0; 292 checkkwd = CHKNL | CHKKWD | CHKALIAS; 293 while (readtoken() == TNOT) { 294 CTRACE(DBG_PARSE, ("pipeline: TNOT recognized\n")); 295 #ifndef BOGUS_NOT_COMMAND 296 if (posix && negate) 297 synerror("2nd \"!\" unexpected"); 298 #endif 299 negate++; 300 } 301 tokpushback++; 302 n1 = command(); 303 if (readtoken() == TPIPE) { 304 pipenode = stalloc(sizeof(struct npipe)); 305 pipenode->type = NPIPE; 306 pipenode->npipe.backgnd = 0; 307 lp = stalloc(sizeof(struct nodelist)); 308 pipenode->npipe.cmdlist = lp; 309 lp->n = n1; 310 do { 311 prev = lp; 312 lp = stalloc(sizeof(struct nodelist)); 313 lp->n = command(); 314 prev->next = lp; 315 } while (readtoken() == TPIPE); 316 lp->next = NULL; 317 n1 = pipenode; 318 } 319 tokpushback++; 320 if (negate) { 321 CTRACE(DBG_PARSE, ("%snegate pipeline\n", 322 (negate&1) ? "" : "double ")); 323 n2 = stalloc(sizeof(struct nnot)); 324 n2->type = (negate & 1) ? NNOT : NDNOT; 325 n2->nnot.com = n1; 326 return n2; 327 } else 328 return n1; 329 } 330 331 332 333 STATIC union node * 334 command(void) 335 { 336 union node *n1, *n2; 337 union node *ap, **app; 338 union node *cp, **cpp; 339 union node *redir, **rpp; 340 int t; 341 #ifdef BOGUS_NOT_COMMAND 342 int negate = 0; 343 #endif 344 345 CTRACE(DBG_PARSE, ("command: entered @%d\n", plinno)); 346 347 checkkwd = CHKNL | CHKKWD | CHKALIAS; 348 redir = NULL; 349 n1 = NULL; 350 rpp = &redir; 351 352 /* Check for redirection which may precede command */ 353 while (readtoken() == TREDIR) { 354 *rpp = n2 = redirnode; 355 rpp = &n2->nfile.next; 356 parsefname(); 357 } 358 tokpushback++; 359 360 #ifdef BOGUS_NOT_COMMAND /* only in pileline() */ 361 while (readtoken() == TNOT) { 362 CTRACE(DBG_PARSE, ("command: TNOT (bogus) recognized\n")); 363 negate++; 364 } 365 tokpushback++; 366 #endif 367 368 switch (readtoken()) { 369 case TIF: 370 n1 = stalloc(sizeof(struct nif)); 371 n1->type = NIF; 372 n1->nif.test = list(0); 373 consumetoken(TTHEN); 374 n1->nif.ifpart = list(0); 375 n2 = n1; 376 while (readtoken() == TELIF) { 377 n2->nif.elsepart = stalloc(sizeof(struct nif)); 378 n2 = n2->nif.elsepart; 379 n2->type = NIF; 380 n2->nif.test = list(0); 381 consumetoken(TTHEN); 382 n2->nif.ifpart = list(0); 383 } 384 if (lasttoken == TELSE) 385 n2->nif.elsepart = list(0); 386 else { 387 n2->nif.elsepart = NULL; 388 tokpushback++; 389 } 390 consumetoken(TFI); 391 checkkwd = CHKKWD | CHKALIAS; 392 break; 393 case TWHILE: 394 case TUNTIL: 395 n1 = stalloc(sizeof(struct nbinary)); 396 n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL; 397 n1->nbinary.ch1 = list(0); 398 consumetoken(TDO); 399 n1->nbinary.ch2 = list(0); 400 consumetoken(TDONE); 401 checkkwd = CHKKWD | CHKALIAS; 402 break; 403 case TFOR: 404 if (readtoken() != TWORD || quoteflag || ! goodname(wordtext)) 405 synerror("Bad for loop variable"); 406 n1 = stalloc(sizeof(struct nfor)); 407 n1->type = NFOR; 408 n1->nfor.var = wordtext; 409 linebreak(); 410 if (lasttoken==TWORD && !quoteflag && equal(wordtext,"in")) { 411 app = ≈ 412 while (readtoken() == TWORD) { 413 n2 = makeword(startlinno); 414 *app = n2; 415 app = &n2->narg.next; 416 } 417 *app = NULL; 418 n1->nfor.args = ap; 419 if (lasttoken != TNL && lasttoken != TSEMI) 420 synexpect(TSEMI, 0); 421 } else { 422 static char argvars[5] = { 423 CTLVAR, VSNORMAL|VSQUOTE, '@', '=', '\0' 424 }; 425 426 n2 = stalloc(sizeof(struct narg)); 427 n2->type = NARG; 428 n2->narg.text = argvars; 429 n2->narg.backquote = NULL; 430 n2->narg.next = NULL; 431 n2->narg.lineno = startlinno; 432 n1->nfor.args = n2; 433 /* 434 * Newline or semicolon here is optional (but note 435 * that the original Bourne shell only allowed NL). 436 */ 437 if (lasttoken != TNL && lasttoken != TSEMI) 438 tokpushback++; 439 } 440 checkkwd = CHKNL | CHKKWD | CHKALIAS; 441 if ((t = readtoken()) == TDO) 442 t = TDONE; 443 else if (t == TBEGIN) 444 t = TEND; 445 else 446 synexpect(TDO, 0); 447 n1->nfor.body = list(0); 448 consumetoken(t); 449 checkkwd = CHKKWD | CHKALIAS; 450 break; 451 case TCASE: 452 n1 = stalloc(sizeof(struct ncase)); 453 n1->type = NCASE; 454 n1->ncase.lineno = startlinno - elided_nl; 455 consumetoken(TWORD); 456 n1->ncase.expr = makeword(startlinno); 457 linebreak(); 458 if (lasttoken != TWORD || !equal(wordtext, "in")) 459 synexpect(-1, "in"); 460 cpp = &n1->ncase.cases; 461 checkkwd = CHKNL | CHKKWD; 462 readtoken(); 463 /* 464 * Both ksh and bash accept 'case x in esac' 465 * so configure scripts started taking advantage of this. 466 * The page: http://pubs.opengroup.org/onlinepubs/\ 467 * 009695399/utilities/xcu_chap02.html contradicts itself, 468 * as to if this is legal; the "Case Conditional Format" 469 * paragraph shows one case is required, but the "Grammar" 470 * section shows a grammar that explicitly allows the no 471 * case option. 472 * 473 * The standard also says (section 2.10): 474 * This formal syntax shall take precedence over the 475 * preceding text syntax description. 476 * ie: the "Grammar" section wins. The text is just 477 * a rough guide (introduction to the common case.) 478 */ 479 while (lasttoken != TESAC) { 480 *cpp = cp = stalloc(sizeof(struct nclist)); 481 cp->type = NCLIST; 482 app = &cp->nclist.pattern; 483 if (lasttoken == TLP) 484 readtoken(); 485 for (;;) { 486 if (lasttoken < TWORD) 487 synexpect(TWORD, 0); 488 *app = ap = makeword(startlinno); 489 checkkwd = CHKNL | CHKKWD; 490 if (readtoken() != TPIPE) 491 break; 492 app = &ap->narg.next; 493 readtoken(); 494 } 495 if (lasttoken != TRP) 496 synexpect(TRP, 0); 497 cp->nclist.lineno = startlinno; 498 cp->nclist.body = list(0); 499 500 checkkwd = CHKNL | CHKKWD | CHKALIAS; 501 if ((t = readtoken()) != TESAC) { 502 if (t != TENDCASE && t != TCASEFALL) { 503 synexpect(TENDCASE, 0); 504 } else { 505 if (t == TCASEFALL) 506 cp->type = NCLISTCONT; 507 checkkwd = CHKNL | CHKKWD; 508 readtoken(); 509 } 510 } 511 cpp = &cp->nclist.next; 512 } 513 *cpp = NULL; 514 checkkwd = CHKKWD | CHKALIAS; 515 break; 516 case TLP: 517 n1 = stalloc(sizeof(struct nredir)); 518 n1->type = NSUBSHELL; 519 n1->nredir.n = list(0); 520 n1->nredir.redirect = NULL; 521 if (n1->nredir.n == NULL) 522 synexpect(-1, 0); 523 consumetoken(TRP); 524 checkkwd = CHKKWD | CHKALIAS; 525 break; 526 case TBEGIN: 527 n1 = list(0); 528 if (posix && n1 == NULL) 529 synexpect(-1, 0); 530 consumetoken(TEND); 531 checkkwd = CHKKWD | CHKALIAS; 532 break; 533 534 case TBACKGND: 535 case TSEMI: 536 case TAND: 537 case TOR: 538 case TPIPE: 539 case TNL: 540 case TEOF: 541 case TRP: 542 case TENDCASE: 543 case TCASEFALL: 544 /* 545 * simple commands must have something in them, 546 * either a word (which at this point includes a=b) 547 * or a redirection. If we reached the end of the 548 * command (which one of these tokens indicates) 549 * when we are just starting, and have not had a 550 * redirect, then ... 551 * 552 * nb: it is still possible to end up with empty 553 * simple commands, if the "command" is a var 554 * expansion that produces nothing: 555 * X= ; $X && $X 556 * --> && 557 * That is OK and is handled after word expansions. 558 */ 559 if (!redir) 560 synexpect(-1, 0); 561 /* 562 * continue to build a node containing the redirect. 563 * the tokpushback means that our ending token will be 564 * read again in simplecmd, causing it to terminate, 565 * so only the redirect(s) will be contained in the 566 * returned n1 567 */ 568 /* FALLTHROUGH */ 569 case TWORD: 570 tokpushback++; 571 n1 = simplecmd(rpp, redir); 572 goto checkneg; 573 default: 574 synexpect(-1, 0); 575 /* NOTREACHED */ 576 } 577 578 /* Now check for redirection which may follow command */ 579 while (readtoken() == TREDIR) { 580 *rpp = n2 = redirnode; 581 rpp = &n2->nfile.next; 582 parsefname(); 583 } 584 tokpushback++; 585 *rpp = NULL; 586 if (redir) { 587 if (n1 == NULL || n1->type != NSUBSHELL) { 588 n2 = stalloc(sizeof(struct nredir)); 589 n2->type = NREDIR; 590 n2->nredir.n = n1; 591 n1 = n2; 592 } 593 n1->nredir.redirect = redir; 594 } 595 596 checkneg: 597 #ifdef BOGUS_NOT_COMMAND 598 if (negate) { 599 VTRACE(DBG_PARSE, ("bogus %snegate command\n", 600 (negate&1) ? "" : "double ")); 601 n2 = stalloc(sizeof(struct nnot)); 602 n2->type = (negate & 1) ? NNOT : NDNOT; 603 n2->nnot.com = n1; 604 return n2; 605 } 606 else 607 #endif 608 return n1; 609 } 610 611 612 STATIC union node * 613 simplecmd(union node **rpp, union node *redir) 614 { 615 union node *args, **app; 616 union node *n = NULL; 617 int line = 0; 618 int savecheckkwd; 619 #ifdef BOGUS_NOT_COMMAND 620 union node *n2; 621 int negate = 0; 622 #endif 623 624 CTRACE(DBG_PARSE, ("simple command with%s redir already @%d\n", 625 redir ? "" : "out", plinno)); 626 627 /* If we don't have any redirections already, then we must reset */ 628 /* rpp to be the address of the local redir variable. */ 629 if (redir == 0) 630 rpp = &redir; 631 632 args = NULL; 633 app = &args; 634 635 #ifdef BOGUS_NOT_COMMAND /* pipelines get negated, commands do not */ 636 while (readtoken() == TNOT) { 637 VTRACE(DBG_PARSE, ("simplcmd: bogus TNOT recognized\n")); 638 negate++; 639 } 640 tokpushback++; 641 #endif 642 643 savecheckkwd = CHKALIAS; 644 for (;;) { 645 checkkwd = savecheckkwd; 646 if (readtoken() == TWORD) { 647 if (line == 0) 648 line = startlinno; 649 n = makeword(startlinno); 650 *app = n; 651 app = &n->narg.next; 652 if (savecheckkwd != 0 && !isassignment(wordtext)) 653 savecheckkwd = 0; 654 } else if (lasttoken == TREDIR) { 655 if (line == 0) 656 line = startlinno; 657 *rpp = n = redirnode; 658 rpp = &n->nfile.next; 659 parsefname(); /* read name of redirection file */ 660 } else if (lasttoken == TLP && app == &args->narg.next 661 && redir == 0) { 662 /* We have a function */ 663 consumetoken(TRP); 664 funclinno = plinno; 665 rmescapes(n->narg.text); 666 if (strchr(n->narg.text, '/')) 667 synerror("Bad function name"); 668 VTRACE(DBG_PARSE, ("Function '%s' seen @%d\n", 669 n->narg.text, plinno)); 670 n->type = NDEFUN; 671 n->narg.lineno = plinno - elided_nl; 672 n->narg.next = command(); 673 funclinno = 0; 674 goto checkneg; 675 } else { 676 tokpushback++; 677 break; 678 } 679 } 680 681 if (args == NULL && redir == NULL) 682 synexpect(-1, 0); 683 *app = NULL; 684 *rpp = NULL; 685 n = stalloc(sizeof(struct ncmd)); 686 n->type = NCMD; 687 n->ncmd.lineno = line - elided_nl; 688 n->ncmd.backgnd = 0; 689 n->ncmd.args = args; 690 n->ncmd.redirect = redir; 691 n->ncmd.lineno = startlinno; 692 693 checkneg: 694 #ifdef BOGUS_NOT_COMMAND 695 if (negate) { 696 VTRACE(DBG_PARSE, ("bogus %snegate simplecmd\n", 697 (negate&1) ? "" : "double ")); 698 n2 = stalloc(sizeof(struct nnot)); 699 n2->type = (negate & 1) ? NNOT : NDNOT; 700 n2->nnot.com = n; 701 return n2; 702 } 703 else 704 #endif 705 return n; 706 } 707 708 STATIC union node * 709 makeword(int lno) 710 { 711 union node *n; 712 713 n = stalloc(sizeof(struct narg)); 714 n->type = NARG; 715 n->narg.next = NULL; 716 n->narg.text = wordtext; 717 n->narg.backquote = backquotelist; 718 n->narg.lineno = lno; 719 return n; 720 } 721 722 void 723 fixredir(union node *n, const char *text, int err) 724 { 725 726 VTRACE(DBG_PARSE, ("Fix redir %s %d\n", text, err)); 727 if (!err) 728 n->ndup.vname = NULL; 729 730 if (is_number(text)) 731 n->ndup.dupfd = number(text); 732 else if (text[0] == '-' && text[1] == '\0') 733 n->ndup.dupfd = -1; 734 else { 735 736 if (err) 737 synerror("Bad fd number"); 738 else 739 n->ndup.vname = makeword(startlinno - elided_nl); 740 } 741 } 742 743 744 STATIC void 745 parsefname(void) 746 { 747 union node *n = redirnode; 748 749 if (readtoken() != TWORD) 750 synexpect(-1, 0); 751 if (n->type == NHERE) { 752 struct HereDoc *here = heredoc; 753 struct HereDoc *p; 754 755 if (quoteflag == 0) 756 n->type = NXHERE; 757 VTRACE(DBG_PARSE, ("Here document %d @%d\n", n->type, plinno)); 758 if (here->striptabs) { 759 while (*wordtext == '\t') 760 wordtext++; 761 } 762 763 /* 764 * this test is not really necessary, we are not 765 * required to expand wordtext, but there's no reason 766 * it cannot be $$ or something like that - that would 767 * not mean the pid, but literally two '$' characters. 768 * There is no need for limits on what the word can be. 769 * However, it needs to stay literal as entered, not 770 * have $ converted to CTLVAR or something, which as 771 * the parser is, at the minute, is impossible to prevent. 772 * So, leave it like this until the rest of the parser is fixed. 773 */ 774 if (!noexpand(wordtext)) 775 synerror("Illegal eof marker for << redirection"); 776 777 rmescapes(wordtext); 778 here->eofmark = wordtext; 779 here->next = NULL; 780 if (heredoclist == NULL) 781 heredoclist = here; 782 else { 783 for (p = heredoclist ; p->next ; p = p->next) 784 continue; 785 p->next = here; 786 } 787 } else if (n->type == NTOFD || n->type == NFROMFD) { 788 fixredir(n, wordtext, 0); 789 } else { 790 n->nfile.fname = makeword(startlinno - elided_nl); 791 } 792 } 793 794 /* 795 * Check to see whether we are at the end of the here document. When this 796 * is called, c is set to the first character of the next input line. If 797 * we are at the end of the here document, this routine sets the c to PEOF. 798 * The new value of c is returned. 799 */ 800 801 static int 802 checkend(int c, char * const eofmark, const int striptabs) 803 { 804 805 if (striptabs) { 806 while (c == '\t') 807 c = pgetc(); 808 } 809 if (c == PEOF) { 810 if (*eofmark == '\0') 811 return (c); 812 synerror(EOFhere); 813 } 814 if (c == *eofmark) { 815 int c2; 816 char *q; 817 818 for (q = eofmark + 1; c2 = pgetc(), *q != '\0' && c2 == *q; q++) 819 if (c2 == '\n') { 820 plinno++; 821 needprompt = doprompt; 822 } 823 if ((c2 == PEOF || c2 == '\n') && *q == '\0') { 824 c = PEOF; 825 if (c2 == '\n') { 826 plinno++; 827 needprompt = doprompt; 828 } 829 } else { 830 pungetc(); 831 pushstring(eofmark + 1, q - (eofmark + 1), NULL); 832 } 833 } else if (c == '\n' && *eofmark == '\0') { 834 c = PEOF; 835 plinno++; 836 needprompt = doprompt; 837 } 838 return (c); 839 } 840 841 842 /* 843 * Input any here documents. 844 */ 845 846 STATIC int 847 slurp_heredoc(char *const eofmark, const int striptabs, const int sq) 848 { 849 int c; 850 char *out; 851 int lines = plinno; 852 853 c = pgetc(); 854 855 /* 856 * If we hit EOF on the input, and the eofmark is a null string ('') 857 * we consider this empty line to be the eofmark, and exit without err. 858 */ 859 if (c == PEOF && *eofmark != '\0') 860 synerror(EOFhere); 861 862 STARTSTACKSTR(out); 863 864 while ((c = checkend(c, eofmark, striptabs)) != PEOF) { 865 do { 866 if (sq) { 867 /* 868 * in single quoted mode (eofmark quoted) 869 * all we look for is \n so we can check 870 * for the epfmark - everything saved literally. 871 */ 872 STPUTC(c, out); 873 if (c == '\n') { 874 plinno++; 875 break; 876 } 877 continue; 878 } 879 /* 880 * In double quoted (non-quoted eofmark) 881 * we must handle \ followed by \n here 882 * otherwise we can mismatch the end mark. 883 * All other uses of \ will be handled later 884 * when the here doc is expanded. 885 * 886 * This also makes sure \\ followed by \n does 887 * not suppress the newline (the \ quotes itself) 888 */ 889 if (c == '\\') { /* A backslash */ 890 STPUTC(c, out); 891 c = pgetc(); /* followed by */ 892 if (c == '\n') { /* a newline? */ 893 STPUTC(c, out); 894 plinno++; 895 continue; /* don't break */ 896 } 897 } 898 STPUTC(c, out); /* keep the char */ 899 if (c == '\n') { /* at end of line */ 900 plinno++; 901 break; /* look for eofmark */ 902 } 903 } while ((c = pgetc()) != PEOF); 904 905 /* 906 * If we have read a line, and reached EOF, without 907 * finding the eofmark, whether the EOF comes before 908 * or immediately after the \n, that is an error. 909 */ 910 if (c == PEOF || (c = pgetc()) == PEOF) 911 synerror(EOFhere); 912 } 913 STPUTC('\0', out); 914 915 c = out - stackblock(); 916 out = stackblock(); 917 grabstackblock(c); 918 wordtext = out; 919 920 VTRACE(DBG_PARSE, 921 ("Slurped a %d line %sheredoc (to '%s')%s: len %d, \"%.*s%s\" @%d\n", 922 plinno - lines, sq ? "quoted " : "", eofmark, 923 striptabs ? " tab stripped" : "", c, (c > 16 ? 16 : c), 924 wordtext, (c > 16 ? "..." : ""), plinno)); 925 926 return (plinno - lines); 927 } 928 929 static char * 930 insert_elided_nl(char *str) 931 { 932 while (elided_nl > 0) { 933 STPUTC(CTLNONL, str); 934 elided_nl--; 935 } 936 return str; 937 } 938 939 STATIC void 940 readheredocs(void) 941 { 942 struct HereDoc *here; 943 union node *n; 944 int line, l; 945 946 line = 0; /*XXX - gcc! obviously unneeded */ 947 if (heredoclist) 948 line = heredoclist->startline + 1; 949 l = 0; 950 while (heredoclist) { 951 line += l; 952 here = heredoclist; 953 heredoclist = here->next; 954 if (needprompt) { 955 setprompt(2); 956 needprompt = 0; 957 } 958 959 l = slurp_heredoc(here->eofmark, here->striptabs, 960 here->here->nhere.type == NHERE); 961 962 here->here->nhere.doc = n = makeword(line); 963 964 if (here->here->nhere.type == NHERE) 965 continue; 966 967 /* 968 * Now "parse" here docs that have unquoted eofmarkers. 969 */ 970 setinputstring(wordtext, 1, line); 971 VTRACE(DBG_PARSE, ("Reprocessing %d line here doc from %d\n", 972 l, line)); 973 readtoken1(pgetc(), DQSYNTAX, 1); 974 n->narg.text = wordtext; 975 n->narg.backquote = backquotelist; 976 popfile(); 977 } 978 } 979 980 STATIC int 981 peektoken(void) 982 { 983 int t; 984 985 t = readtoken(); 986 tokpushback++; 987 return (t); 988 } 989 990 STATIC int 991 readtoken(void) 992 { 993 int t; 994 #ifdef DEBUG 995 int alreadyseen = tokpushback; 996 int savecheckkwd = checkkwd; 997 #endif 998 struct alias *ap; 999 1000 top: 1001 t = xxreadtoken(); 1002 1003 if (checkkwd & CHKNL) { 1004 while (t == TNL) { 1005 readheredocs(); 1006 t = xxreadtoken(); 1007 } 1008 } 1009 1010 /* 1011 * check for keywords and aliases 1012 */ 1013 if (t == TWORD && !quoteflag) { 1014 const char *const *pp; 1015 1016 if (checkkwd & CHKKWD) 1017 for (pp = parsekwd; *pp; pp++) { 1018 if (**pp == *wordtext && equal(*pp, wordtext)) { 1019 lasttoken = t = pp - 1020 parsekwd + KWDOFFSET; 1021 VTRACE(DBG_PARSE, 1022 ("keyword %s recognized @%d\n", 1023 tokname[t], plinno)); 1024 goto out; 1025 } 1026 } 1027 1028 if (checkkwd & CHKALIAS && 1029 (ap = lookupalias(wordtext, 1)) != NULL) { 1030 VTRACE(DBG_PARSE, 1031 ("alias '%s' recognized -> <:%s:>\n", 1032 wordtext, ap->val)); 1033 pushstring(ap->val, strlen(ap->val), ap); 1034 goto top; 1035 } 1036 } 1037 out: 1038 if (t != TNOT) 1039 checkkwd = 0; 1040 1041 VTRACE(DBG_PARSE, ("%stoken %s %s @%d (chkkwd %x->%x)\n", 1042 alreadyseen ? "reread " : "", tokname[t], 1043 t == TWORD ? wordtext : "", plinno, savecheckkwd, checkkwd)); 1044 return (t); 1045 } 1046 1047 1048 /* 1049 * Read the next input token. 1050 * If the token is a word, we set backquotelist to the list of cmds in 1051 * backquotes. We set quoteflag to true if any part of the word was 1052 * quoted. 1053 * If the token is TREDIR, then we set redirnode to a structure containing 1054 * the redirection. 1055 * In all cases, the variable startlinno is set to the number of the line 1056 * on which the token starts. 1057 * 1058 * [Change comment: here documents and internal procedures] 1059 * [Readtoken shouldn't have any arguments. Perhaps we should make the 1060 * word parsing code into a separate routine. In this case, readtoken 1061 * doesn't need to have any internal procedures, but parseword does. 1062 * We could also make parseoperator in essence the main routine, and 1063 * have parseword (readtoken1?) handle both words and redirection.] 1064 */ 1065 1066 #define RETURN(token) return lasttoken = token 1067 1068 STATIC int 1069 xxreadtoken(void) 1070 { 1071 int c; 1072 1073 if (tokpushback) { 1074 tokpushback = 0; 1075 return lasttoken; 1076 } 1077 if (needprompt) { 1078 setprompt(2); 1079 needprompt = 0; 1080 } 1081 elided_nl = 0; 1082 startlinno = plinno; 1083 for (;;) { /* until token or start of word found */ 1084 c = pgetc_macro(); 1085 switch (c) { 1086 case ' ': case '\t': case PFAKE: 1087 continue; 1088 case '#': 1089 while ((c = pgetc()) != '\n' && c != PEOF) 1090 continue; 1091 pungetc(); 1092 continue; 1093 1094 case '\n': 1095 plinno++; 1096 needprompt = doprompt; 1097 RETURN(TNL); 1098 case PEOF: 1099 RETURN(TEOF); 1100 1101 case '&': 1102 if (pgetc_linecont() == '&') 1103 RETURN(TAND); 1104 pungetc(); 1105 RETURN(TBACKGND); 1106 case '|': 1107 if (pgetc_linecont() == '|') 1108 RETURN(TOR); 1109 pungetc(); 1110 RETURN(TPIPE); 1111 case ';': 1112 switch (pgetc_linecont()) { 1113 case ';': 1114 RETURN(TENDCASE); 1115 case '&': 1116 RETURN(TCASEFALL); 1117 default: 1118 pungetc(); 1119 RETURN(TSEMI); 1120 } 1121 case '(': 1122 RETURN(TLP); 1123 case ')': 1124 RETURN(TRP); 1125 1126 case '\\': 1127 switch (pgetc()) { 1128 case '\n': 1129 startlinno = ++plinno; 1130 if (doprompt) 1131 setprompt(2); 1132 else 1133 setprompt(0); 1134 continue; 1135 case PEOF: 1136 RETURN(TEOF); 1137 default: 1138 pungetc(); 1139 break; 1140 } 1141 /* FALLTHROUGH */ 1142 default: 1143 return readtoken1(c, BASESYNTAX, 0); 1144 } 1145 } 1146 #undef RETURN 1147 } 1148 1149 1150 1151 /* 1152 * If eofmark is NULL, read a word or a redirection symbol. If eofmark 1153 * is not NULL, read a here document. In the latter case, eofmark is the 1154 * word which marks the end of the document and striptabs is true if 1155 * leading tabs should be stripped from the document. The argument firstc 1156 * is the first character of the input token or document. 1157 * 1158 * Because C does not have internal subroutines, I have simulated them 1159 * using goto's to implement the subroutine linkage. The following macros 1160 * will run code that appears at the end of readtoken1. 1161 */ 1162 1163 /* 1164 * We used to remember only the current syntax, variable nesting level, 1165 * double quote state for each var nesting level, and arith nesting 1166 * level (unrelated to var nesting) and one prev syntax when in arith 1167 * syntax. This worked for simple cases, but can't handle arith inside 1168 * var expansion inside arith inside var with some quoted and some not. 1169 * 1170 * Inspired by FreeBSD's implementation (though it was the obvious way) 1171 * though implemented differently, we now have a stack that keeps track 1172 * of what we are doing now, and what we were doing previously. 1173 * Every time something changes, which will eventually end and should 1174 * revert to the previous state, we push this stack, and then pop it 1175 * again later (that is every ${} with an operator (to parse the word 1176 * or pattern that follows) ${x} and $x are too simple to need it) 1177 * $(( )) $( ) and "...". Always. Really, always! 1178 * 1179 * The stack is implemented as one static (on the C stack) base block 1180 * containing LEVELS_PER_BLOCK (8) stack entries, which should be 1181 * enough for the vast majority of cases. For torture tests, we 1182 * malloc more blocks as needed. All accesses through the inline 1183 * functions below. 1184 */ 1185 1186 /* 1187 * varnest & arinest will typically be 0 or 1 1188 * (varnest can increment in usages like ${x=${y}} but probably 1189 * does not really need to) 1190 * parenlevel allows balancing parens inside a $(( )), it is reset 1191 * at each new nesting level ( $(( ( x + 3 ${unset-)} )) does not work. 1192 * quoted is special - we need to know 2 things ... are we inside "..." 1193 * (even if inherited from some previous nesting level) and was there 1194 * an opening '"' at this level (so the next will be closing). 1195 * "..." can span nesting levels, but cannot be opened in one and 1196 * closed in a different one. 1197 * To handle this, "quoted" has two fields, the bottom 4 (really 2) 1198 * bits are 0, 1, or 2, for un, single, and double quoted (single quoted 1199 * is really so special that this setting is not very important) 1200 * and 0x10 that indicates that an opening quote has been seen. 1201 * The bottom 4 bits are inherited, the 0x10 bit is not. 1202 */ 1203 struct tokenstate { 1204 const char *ts_syntax; 1205 unsigned short ts_parenlevel; /* counters */ 1206 unsigned short ts_varnest; /* 64000 levels should be enough! */ 1207 unsigned short ts_arinest; 1208 unsigned short ts_quoted; /* 1 -> single, 2 -> double */ 1209 }; 1210 1211 #define NQ 0x00 /* Unquoted */ 1212 #define SQ 0x01 /* Single Quotes */ 1213 #define DQ 0x02 /* Double Quotes (or equivalent) */ 1214 #define CQ 0x03 /* C style Single Quotes */ 1215 #define QF 0x0F /* Mask to extract previous values */ 1216 #define QS 0x10 /* Quoting started at this level in stack */ 1217 1218 #define LEVELS_PER_BLOCK 8 1219 #define VSS struct statestack 1220 1221 struct statestack { 1222 VSS *prev; /* previous block in list */ 1223 int cur; /* which of our tokenstates is current */ 1224 struct tokenstate tokenstate[LEVELS_PER_BLOCK]; 1225 }; 1226 1227 static inline struct tokenstate * 1228 currentstate(VSS *stack) 1229 { 1230 return &stack->tokenstate[stack->cur]; 1231 } 1232 1233 static inline struct tokenstate * 1234 prevstate(VSS *stack) 1235 { 1236 if (stack->cur != 0) 1237 return &stack->tokenstate[stack->cur - 1]; 1238 if (stack->prev == NULL) /* cannot drop below base */ 1239 return &stack->tokenstate[0]; 1240 return &stack->prev->tokenstate[LEVELS_PER_BLOCK - 1]; 1241 } 1242 1243 static inline VSS * 1244 bump_state_level(VSS *stack) 1245 { 1246 struct tokenstate *os, *ts; 1247 1248 os = currentstate(stack); 1249 1250 if (++stack->cur >= LEVELS_PER_BLOCK) { 1251 VSS *ss; 1252 1253 ss = (VSS *)ckmalloc(sizeof (struct statestack)); 1254 ss->cur = 0; 1255 ss->prev = stack; 1256 stack = ss; 1257 } 1258 1259 ts = currentstate(stack); 1260 1261 ts->ts_parenlevel = 0; /* parens inside never match outside */ 1262 1263 ts->ts_quoted = os->ts_quoted & QF; /* these are default settings */ 1264 ts->ts_varnest = os->ts_varnest; 1265 ts->ts_arinest = os->ts_arinest; /* when appropriate */ 1266 ts->ts_syntax = os->ts_syntax; /* they will be altered */ 1267 1268 return stack; 1269 } 1270 1271 static inline VSS * 1272 drop_state_level(VSS *stack) 1273 { 1274 if (stack->cur == 0) { 1275 VSS *ss; 1276 1277 ss = stack; 1278 stack = ss->prev; 1279 if (stack == NULL) 1280 return ss; 1281 ckfree(ss); 1282 } 1283 --stack->cur; 1284 return stack; 1285 } 1286 1287 static inline void 1288 cleanup_state_stack(VSS *stack) 1289 { 1290 while (stack->prev != NULL) { 1291 stack->cur = 0; 1292 stack = drop_state_level(stack); 1293 } 1294 } 1295 1296 #define PARSESUB() {goto parsesub; parsesub_return:;} 1297 #define PARSEARITH() {goto parsearith; parsearith_return:;} 1298 1299 /* 1300 * The following macros all assume the existance of a local var "stack" 1301 * which contains a pointer to the current struct stackstate 1302 */ 1303 1304 /* 1305 * These are macros rather than inline funcs to avoid code churn as much 1306 * as possible - they replace macros of the same name used previously. 1307 */ 1308 #define ISDBLQUOTE() (currentstate(stack)->ts_quoted & QS) 1309 #define SETDBLQUOTE() (currentstate(stack)->ts_quoted = QS | DQ) 1310 #define CLRDBLQUOTE() (currentstate(stack)->ts_quoted = \ 1311 stack->cur != 0 || stack->prev ? \ 1312 prevstate(stack)->ts_quoted & QF : 0) 1313 1314 /* 1315 * This set are just to avoid excess typing and line lengths... 1316 * The ones that "look like" var names must be implemented to be lvalues 1317 */ 1318 #define syntax (currentstate(stack)->ts_syntax) 1319 #define parenlevel (currentstate(stack)->ts_parenlevel) 1320 #define varnest (currentstate(stack)->ts_varnest) 1321 #define arinest (currentstate(stack)->ts_arinest) 1322 #define quoted (currentstate(stack)->ts_quoted) 1323 #define TS_PUSH() (stack = bump_state_level(stack)) 1324 #define TS_POP() (stack = drop_state_level(stack)) 1325 1326 /* 1327 * Called to parse command substitutions. oldstyle is true if the command 1328 * is enclosed inside `` (otherwise it was enclosed in "$( )") 1329 * 1330 * Internally nlpp is a pointer to the head of the linked 1331 * list of commands (passed by reference), and savelen is the number of 1332 * characters on the top of the stack which must be preserved. 1333 */ 1334 static char * 1335 parsebackq(VSS *const stack, char * const in, 1336 struct nodelist **const pbqlist, const int oldstyle, const int magicq) 1337 { 1338 struct nodelist **nlpp; 1339 const int savepbq = parsebackquote; 1340 union node *n; 1341 char *out; 1342 char *str = NULL; 1343 char *volatile sstr = str; 1344 struct jmploc jmploc; 1345 struct jmploc *const savehandler = handler; 1346 const int savelen = in - stackblock(); 1347 int saveprompt; 1348 int lno; 1349 1350 if (setjmp(jmploc.loc)) { 1351 if (sstr) 1352 ckfree(__UNVOLATILE(sstr)); 1353 cleanup_state_stack(stack); 1354 parsebackquote = 0; 1355 handler = savehandler; 1356 longjmp(handler->loc, 1); 1357 } 1358 INTOFF; 1359 sstr = str = NULL; 1360 if (savelen > 0) { 1361 sstr = str = ckmalloc(savelen); 1362 memcpy(str, stackblock(), savelen); 1363 } 1364 handler = &jmploc; 1365 INTON; 1366 if (oldstyle) { 1367 /* 1368 * We must read until the closing backquote, giving special 1369 * treatment to some slashes, and then push the string and 1370 * reread it as input, interpreting it normally. 1371 */ 1372 int pc; 1373 int psavelen; 1374 char *pstr; 1375 int line1 = plinno; 1376 1377 VTRACE(DBG_PARSE, ("parsebackq: repackaging `` as $( )")); 1378 /* 1379 * Because the entire `...` is read here, we don't 1380 * need to bother the state stack. That will be used 1381 * (as appropriate) when the processed string is re-read. 1382 */ 1383 STARTSTACKSTR(out); 1384 #ifdef DEBUG 1385 for (psavelen = 0;;psavelen++) { 1386 #else 1387 for (;;) { 1388 #endif 1389 if (needprompt) { 1390 setprompt(2); 1391 needprompt = 0; 1392 } 1393 pc = pgetc(); 1394 if (pc == '`') 1395 break; 1396 switch (pc) { 1397 case '\\': 1398 pc = pgetc(); 1399 #ifdef DEBUG 1400 psavelen++; 1401 #endif 1402 if (pc == '\n') { /* keep \ \n for later */ 1403 plinno++; 1404 needprompt = doprompt; 1405 } 1406 if (pc != '\\' && pc != '`' && pc != '$' 1407 && (!ISDBLQUOTE() || pc != '"')) 1408 STPUTC('\\', out); 1409 break; 1410 1411 case '\n': 1412 plinno++; 1413 needprompt = doprompt; 1414 break; 1415 1416 case PEOF: 1417 startlinno = line1; 1418 synerror("EOF in backquote substitution"); 1419 break; 1420 1421 default: 1422 break; 1423 } 1424 STPUTC(pc, out); 1425 } 1426 STPUTC('\0', out); 1427 VTRACE(DBG_PARSE, (" read %d", psavelen)); 1428 psavelen = out - stackblock(); 1429 VTRACE(DBG_PARSE, (" produced %d\n", psavelen)); 1430 if (psavelen > 0) { 1431 pstr = grabstackstr(out); 1432 setinputstring(pstr, 1, line1); 1433 } 1434 } 1435 nlpp = pbqlist; 1436 while (*nlpp) 1437 nlpp = &(*nlpp)->next; 1438 *nlpp = stalloc(sizeof(struct nodelist)); 1439 (*nlpp)->next = NULL; 1440 parsebackquote = oldstyle; 1441 1442 if (oldstyle) { 1443 saveprompt = doprompt; 1444 doprompt = 0; 1445 } else 1446 saveprompt = 0; 1447 1448 lno = -plinno; 1449 n = list(0); 1450 lno += plinno; 1451 1452 if (oldstyle) { 1453 if (peektoken() != TEOF) 1454 synexpect(-1, 0); 1455 doprompt = saveprompt; 1456 } else 1457 consumetoken(TRP); 1458 1459 (*nlpp)->n = n; 1460 if (oldstyle) { 1461 /* 1462 * Start reading from old file again, ignoring any pushed back 1463 * tokens left from the backquote parsing 1464 */ 1465 popfile(); 1466 tokpushback = 0; 1467 } 1468 1469 while (stackblocksize() <= savelen) 1470 growstackblock(); 1471 STARTSTACKSTR(out); 1472 if (str) { 1473 memcpy(out, str, savelen); 1474 STADJUST(savelen, out); 1475 INTOFF; 1476 ckfree(str); 1477 sstr = str = NULL; 1478 INTON; 1479 } 1480 parsebackquote = savepbq; 1481 handler = savehandler; 1482 if (arinest || ISDBLQUOTE()) { 1483 STPUTC(CTLBACKQ | CTLQUOTE, out); 1484 while (--lno >= 0) 1485 STPUTC(CTLNONL, out); 1486 } else 1487 STPUTC(CTLBACKQ, out); 1488 1489 return out; 1490 } 1491 1492 /* 1493 * Parse a redirection operator. The parameter "out" points to a string 1494 * specifying the fd to be redirected. It is guaranteed to be either "" 1495 * or a numeric string (for now anyway). The parameter "c" contains the 1496 * first character of the redirection operator. 1497 * 1498 * Note the string "out" is on the stack, which we are about to clobber, 1499 * so process it first... 1500 */ 1501 1502 static void 1503 parseredir(const char *out, int c) 1504 { 1505 union node *np; 1506 int fd; 1507 1508 fd = (*out == '\0') ? -1 : number(out); 1509 1510 np = stalloc(sizeof(struct nfile)); 1511 if (c == '>') { 1512 if (fd < 0) 1513 fd = 1; 1514 c = pgetc_linecont(); 1515 if (c == '>') 1516 np->type = NAPPEND; 1517 else if (c == '|') 1518 np->type = NCLOBBER; 1519 else if (c == '&') 1520 np->type = NTOFD; 1521 else { 1522 np->type = NTO; 1523 pungetc(); 1524 } 1525 } else { /* c == '<' */ 1526 if (fd < 0) 1527 fd = 0; 1528 switch (c = pgetc_linecont()) { 1529 case '<': 1530 if (sizeof (struct nfile) != sizeof (struct nhere)) { 1531 np = stalloc(sizeof(struct nhere)); 1532 np->nfile.fd = 0; 1533 } 1534 np->type = NHERE; 1535 heredoc = stalloc(sizeof(struct HereDoc)); 1536 heredoc->here = np; 1537 heredoc->startline = plinno; 1538 if ((c = pgetc_linecont()) == '-') { 1539 heredoc->striptabs = 1; 1540 } else { 1541 heredoc->striptabs = 0; 1542 pungetc(); 1543 } 1544 break; 1545 1546 case '&': 1547 np->type = NFROMFD; 1548 break; 1549 1550 case '>': 1551 np->type = NFROMTO; 1552 break; 1553 1554 default: 1555 np->type = NFROM; 1556 pungetc(); 1557 break; 1558 } 1559 } 1560 np->nfile.fd = fd; 1561 1562 redirnode = np; /* this is the "value" of TRENODE */ 1563 } 1564 1565 /* 1566 * Called to parse a backslash escape sequence inside $'...'. 1567 * The backslash has already been read. 1568 */ 1569 static char * 1570 readcstyleesc(char *out) 1571 { 1572 int c, vc, i, n; 1573 unsigned int v; 1574 1575 c = pgetc(); 1576 switch (c) { 1577 case '\0': 1578 case PEOF: 1579 synerror("Unterminated quoted string"); 1580 case '\n': 1581 plinno++; 1582 if (doprompt) 1583 setprompt(2); 1584 else 1585 setprompt(0); 1586 return out; 1587 1588 case '\\': 1589 case '\'': 1590 case '"': 1591 v = c; 1592 break; 1593 1594 case 'a': v = '\a'; break; 1595 case 'b': v = '\b'; break; 1596 case 'e': v = '\033'; break; 1597 case 'f': v = '\f'; break; 1598 case 'n': v = '\n'; break; 1599 case 'r': v = '\r'; break; 1600 case 't': v = '\t'; break; 1601 case 'v': v = '\v'; break; 1602 1603 case '0': case '1': case '2': case '3': 1604 case '4': case '5': case '6': case '7': 1605 v = c - '0'; 1606 c = pgetc(); 1607 if (c >= '0' && c <= '7') { 1608 v <<= 3; 1609 v += c - '0'; 1610 c = pgetc(); 1611 if (c >= '0' && c <= '7') { 1612 v <<= 3; 1613 v += c - '0'; 1614 } else 1615 pungetc(); 1616 } else 1617 pungetc(); 1618 break; 1619 1620 case 'c': 1621 c = pgetc(); 1622 if (c < 0x3f || c > 0x7a || c == 0x60) 1623 synerror("Bad \\c escape sequence"); 1624 if (c == '\\' && pgetc() != '\\') 1625 synerror("Bad \\c\\ escape sequence"); 1626 if (c == '?') 1627 v = 127; 1628 else 1629 v = c & 0x1f; 1630 break; 1631 1632 case 'x': 1633 n = 2; 1634 goto hexval; 1635 case 'u': 1636 n = 4; 1637 goto hexval; 1638 case 'U': 1639 n = 8; 1640 hexval: 1641 v = 0; 1642 for (i = 0; i < n; i++) { 1643 c = pgetc(); 1644 if (c >= '0' && c <= '9') 1645 v = (v << 4) + c - '0'; 1646 else if (c >= 'A' && c <= 'F') 1647 v = (v << 4) + c - 'A' + 10; 1648 else if (c >= 'a' && c <= 'f') 1649 v = (v << 4) + c - 'a' + 10; 1650 else { 1651 pungetc(); 1652 break; 1653 } 1654 } 1655 if (n > 2 && v > 127) { 1656 if (v >= 0xd800 && v <= 0xdfff) 1657 synerror("Invalid \\u escape sequence"); 1658 1659 /* XXX should we use iconv here. What locale? */ 1660 CHECKSTRSPACE(4, out); 1661 1662 if (v <= 0x7ff) { 1663 USTPUTC(0xc0 | v >> 6, out); 1664 USTPUTC(0x80 | (v & 0x3f), out); 1665 return out; 1666 } else if (v <= 0xffff) { 1667 USTPUTC(0xe0 | v >> 12, out); 1668 USTPUTC(0x80 | ((v >> 6) & 0x3f), out); 1669 USTPUTC(0x80 | (v & 0x3f), out); 1670 return out; 1671 } else if (v <= 0x10ffff) { 1672 USTPUTC(0xf0 | v >> 18, out); 1673 USTPUTC(0x80 | ((v >> 12) & 0x3f), out); 1674 USTPUTC(0x80 | ((v >> 6) & 0x3f), out); 1675 USTPUTC(0x80 | (v & 0x3f), out); 1676 return out; 1677 } 1678 if (v > 127) 1679 v = '?'; 1680 } 1681 break; 1682 default: 1683 synerror("Unknown $'' escape sequence"); 1684 } 1685 vc = (char)v; 1686 1687 /* 1688 * If we managed to create a \n from a \ sequence (no matter how) 1689 * then we replace it with the magic CRTCNL control char, which 1690 * will turn into a \n again later, but in the meantime, never 1691 * causes LINENO increments. 1692 */ 1693 if (vc == '\n') { 1694 USTPUTC(CTLCNL, out); 1695 return out; 1696 } 1697 1698 /* 1699 * We can't handle NUL bytes. 1700 * POSIX says we should skip till the closing quote. 1701 */ 1702 if (vc == '\0') { 1703 while ((c = pgetc()) != '\'') { 1704 if (c == '\\') 1705 c = pgetc(); 1706 if (c == PEOF) 1707 synerror("Unterminated quoted string"); 1708 if (c == '\n') { 1709 plinno++; 1710 if (doprompt) 1711 setprompt(2); 1712 else 1713 setprompt(0); 1714 } 1715 } 1716 pungetc(); 1717 return out; 1718 } 1719 if (NEEDESC(vc)) 1720 USTPUTC(CTLESC, out); 1721 USTPUTC(vc, out); 1722 return out; 1723 } 1724 1725 /* 1726 * The lowest level basic tokenizer. 1727 * 1728 * The next input byte (character) is in firstc, syn says which 1729 * syntax tables we are to use (basic, single or double quoted, or arith) 1730 * and magicq (used with sqsyntax and dqsyntax only) indicates that the 1731 * quote character itself is not special (used parsing here docs and similar) 1732 * 1733 * The result is the type of the next token (its value, when there is one, 1734 * is saved in the relevant global var - must fix that someday!) which is 1735 * also saved for re-reading ("lasttoken"). 1736 * 1737 * Overall, this routine does far more parsing than it is supposed to. 1738 * That will also need fixing, someday... 1739 */ 1740 STATIC int 1741 readtoken1(int firstc, char const *syn, int magicq) 1742 { 1743 int c; 1744 char * out; 1745 int len; 1746 struct nodelist *bqlist; 1747 int quotef; 1748 VSS static_stack; 1749 VSS *stack = &static_stack; 1750 1751 stack->prev = NULL; 1752 stack->cur = 0; 1753 1754 syntax = syn; 1755 1756 startlinno = plinno; 1757 varnest = 0; 1758 quoted = 0; 1759 if (syntax == DQSYNTAX) 1760 SETDBLQUOTE(); 1761 quotef = 0; 1762 bqlist = NULL; 1763 arinest = 0; 1764 parenlevel = 0; 1765 elided_nl = 0; 1766 1767 STARTSTACKSTR(out); 1768 1769 for (c = firstc ;; c = pgetc_macro()) { /* until of token */ 1770 if (syntax == ARISYNTAX) 1771 out = insert_elided_nl(out); 1772 CHECKSTRSPACE(6, out); /* permit 6 calls to USTPUTC */ 1773 switch (syntax[c]) { 1774 case CFAKE: 1775 if (syntax == BASESYNTAX && varnest == 0) 1776 break; 1777 continue; 1778 case CNL: /* '\n' */ 1779 if (syntax == BASESYNTAX && varnest == 0) 1780 break; /* exit loop */ 1781 USTPUTC(c, out); 1782 plinno++; 1783 if (doprompt) 1784 setprompt(2); 1785 else 1786 setprompt(0); 1787 continue; 1788 1789 case CSBACK: /* single quoted backslash */ 1790 if ((quoted & QF) == CQ) { 1791 out = readcstyleesc(out); 1792 continue; 1793 } 1794 USTPUTC(CTLESC, out); 1795 /* FALLTHROUGH */ 1796 case CWORD: 1797 USTPUTC(c, out); 1798 continue; 1799 1800 case CCTL: 1801 if (!magicq || ISDBLQUOTE()) 1802 USTPUTC(CTLESC, out); 1803 USTPUTC(c, out); 1804 continue; 1805 case CBACK: /* backslash */ 1806 c = pgetc(); 1807 if (c == PEOF) { 1808 USTPUTC('\\', out); 1809 pungetc(); 1810 continue; 1811 } 1812 if (c == '\n') { 1813 plinno++; 1814 elided_nl++; 1815 if (doprompt) 1816 setprompt(2); 1817 else 1818 setprompt(0); 1819 continue; 1820 } 1821 quotef = 1; /* current token is quoted */ 1822 if (ISDBLQUOTE() && c != '\\' && c != '`' && 1823 c != '$' && (c != '"' || magicq)) { 1824 /* 1825 * retain the \ (which we *know* needs CTLESC) 1826 * when in "..." and the following char is 1827 * not one of the magic few.) 1828 * Otherwise the \ has done its work, and 1829 * is dropped. 1830 */ 1831 USTPUTC(CTLESC, out); 1832 USTPUTC('\\', out); 1833 } 1834 if (NEEDESC(c)) 1835 USTPUTC(CTLESC, out); 1836 else if (!magicq) { 1837 USTPUTC(CTLESC, out); 1838 USTPUTC(c, out); 1839 continue; 1840 } 1841 USTPUTC(c, out); 1842 continue; 1843 case CSQUOTE: 1844 if (syntax != SQSYNTAX) { 1845 if (!magicq) 1846 USTPUTC(CTLQUOTEMARK, out); 1847 quotef = 1; 1848 TS_PUSH(); 1849 syntax = SQSYNTAX; 1850 quoted = SQ; 1851 continue; 1852 } 1853 if (magicq && arinest == 0 && varnest == 0) { 1854 /* Ignore inside quoted here document */ 1855 USTPUTC(c, out); 1856 continue; 1857 } 1858 /* End of single quotes... */ 1859 TS_POP(); 1860 if (syntax == BASESYNTAX) 1861 USTPUTC(CTLQUOTEEND, out); 1862 continue; 1863 case CDQUOTE: 1864 if (magicq && arinest == 0 && varnest == 0) { 1865 /* Ignore inside here document */ 1866 USTPUTC(c, out); 1867 continue; 1868 } 1869 quotef = 1; 1870 if (arinest) { 1871 if (ISDBLQUOTE()) { 1872 USTPUTC(CTLQUOTEEND, out); 1873 TS_POP(); 1874 } else { 1875 TS_PUSH(); 1876 syntax = DQSYNTAX; 1877 SETDBLQUOTE(); 1878 USTPUTC(CTLQUOTEMARK, out); 1879 } 1880 continue; 1881 } 1882 if (magicq) 1883 continue; 1884 if (ISDBLQUOTE()) { 1885 TS_POP(); 1886 USTPUTC(CTLQUOTEEND, out); 1887 } else { 1888 TS_PUSH(); 1889 syntax = DQSYNTAX; 1890 SETDBLQUOTE(); 1891 USTPUTC(CTLQUOTEMARK, out); 1892 } 1893 continue; 1894 case CVAR: /* '$' */ 1895 out = insert_elided_nl(out); 1896 PARSESUB(); /* parse substitution */ 1897 continue; 1898 case CENDVAR: /* CLOSEBRACE */ 1899 if (varnest > 0 && !ISDBLQUOTE()) { 1900 TS_POP(); 1901 USTPUTC(CTLENDVAR, out); 1902 } else { 1903 USTPUTC(c, out); 1904 } 1905 out = insert_elided_nl(out); 1906 continue; 1907 case CLP: /* '(' in arithmetic */ 1908 parenlevel++; 1909 USTPUTC(c, out); 1910 continue;; 1911 case CRP: /* ')' in arithmetic */ 1912 if (parenlevel > 0) { 1913 USTPUTC(c, out); 1914 --parenlevel; 1915 } else { 1916 if (pgetc_linecont() == /*(*/ ')') { 1917 out = insert_elided_nl(out); 1918 if (--arinest == 0) { 1919 TS_POP(); 1920 USTPUTC(CTLENDARI, out); 1921 } else 1922 USTPUTC(/*(*/ ')', out); 1923 } else { 1924 break; /* to synerror() just below */ 1925 #if 0 /* the old way, causes weird errors on bad input */ 1926 /* 1927 * unbalanced parens 1928 * (don't 2nd guess - no error) 1929 */ 1930 pungetc(); 1931 USTPUTC(/*(*/ ')', out); 1932 #endif 1933 } 1934 } 1935 continue; 1936 case CBQUOTE: /* '`' */ 1937 out = parsebackq(stack, out, &bqlist, 1, magicq); 1938 continue; 1939 case CEOF: /* --> c == PEOF */ 1940 break; /* will exit loop */ 1941 default: 1942 if (varnest == 0 && !ISDBLQUOTE()) 1943 break; /* exit loop */ 1944 USTPUTC(c, out); 1945 continue; 1946 } 1947 break; /* break from switch -> break from for loop too */ 1948 } 1949 1950 if (syntax == ARISYNTAX) { 1951 cleanup_state_stack(stack); 1952 synerror(/*((*/ "Missing '))'"); 1953 } 1954 if (syntax != BASESYNTAX && /* ! parsebackquote && */ !magicq) { 1955 cleanup_state_stack(stack); 1956 synerror("Unterminated quoted string"); 1957 } 1958 if (varnest != 0) { 1959 cleanup_state_stack(stack); 1960 startlinno = plinno; 1961 /* { */ 1962 synerror("Missing '}'"); 1963 } 1964 1965 STPUTC('\0', out); 1966 len = out - stackblock(); 1967 out = stackblock(); 1968 1969 if (!magicq) { 1970 if ((c == '<' || c == '>') 1971 && quotef == 0 && (*out == '\0' || is_number(out))) { 1972 parseredir(out, c); 1973 cleanup_state_stack(stack); 1974 return lasttoken = TREDIR; 1975 } else { 1976 pungetc(); 1977 } 1978 } 1979 1980 VTRACE(DBG_PARSE, 1981 ("readtoken1 %sword \"%s\", completed%s (%d) left %d enl\n", 1982 (quotef ? "quoted " : ""), out, (bqlist ? " with cmdsubs" : ""), 1983 len, elided_nl)); 1984 1985 quoteflag = quotef; 1986 backquotelist = bqlist; 1987 grabstackblock(len); 1988 wordtext = out; 1989 cleanup_state_stack(stack); 1990 return lasttoken = TWORD; 1991 /* end of readtoken routine */ 1992 1993 1994 /* 1995 * Parse a substitution. At this point, we have read the dollar sign 1996 * and nothing else. 1997 */ 1998 1999 parsesub: { 2000 int subtype; 2001 int typeloc; 2002 int flags; 2003 char *p; 2004 static const char types[] = "}-+?="; 2005 2006 c = pgetc_linecont(); 2007 if (c == '(' /*)*/) { /* $(command) or $((arith)) */ 2008 if (pgetc_linecont() == '(' /*')'*/ ) { 2009 out = insert_elided_nl(out); 2010 PARSEARITH(); 2011 } else { 2012 out = insert_elided_nl(out); 2013 pungetc(); 2014 out = parsebackq(stack, out, &bqlist, 0, magicq); 2015 } 2016 } else if (c == OPENBRACE || is_name(c) || is_special(c)) { 2017 USTPUTC(CTLVAR, out); 2018 typeloc = out - stackblock(); 2019 USTPUTC(VSNORMAL, out); 2020 subtype = VSNORMAL; 2021 flags = 0; 2022 if (c == OPENBRACE) { 2023 c = pgetc_linecont(); 2024 if (c == '#') { 2025 if ((c = pgetc_linecont()) == CLOSEBRACE) 2026 c = '#'; 2027 else if (is_name(c) || isdigit(c)) 2028 subtype = VSLENGTH; 2029 else if (is_special(c)) { 2030 /* 2031 * ${#} is $# - the number of sh params 2032 * ${##} is the length of ${#} 2033 * ${###} is ${#} with as much nothing 2034 * as possible removed from start 2035 * ${##1} is ${#} with leading 1 gone 2036 * ${##\#} is ${#} with leading # gone 2037 * 2038 * this stuff is UGLY! 2039 */ 2040 if (pgetc_linecont() == CLOSEBRACE) { 2041 pungetc(); 2042 subtype = VSLENGTH; 2043 } else { 2044 static char cbuf[2]; 2045 2046 pungetc(); /* would like 2 */ 2047 cbuf[0] = c; /* so ... */ 2048 cbuf[1] = '\0'; 2049 pushstring(cbuf, 1, NULL); 2050 c = '#'; /* ${#:...} */ 2051 subtype = 0; /* .. or similar */ 2052 } 2053 } else { 2054 pungetc(); 2055 c = '#'; 2056 subtype = 0; 2057 } 2058 } 2059 else 2060 subtype = 0; 2061 } 2062 if (is_name(c)) { 2063 p = out; 2064 do { 2065 STPUTC(c, out); 2066 c = pgetc_linecont(); 2067 } while (is_in_name(c)); 2068 #if 0 2069 if (out - p == 6 && strncmp(p, "LINENO", 6) == 0) { 2070 int i; 2071 int linno; 2072 char buf[10]; 2073 2074 /* 2075 * The "LINENO hack" 2076 * 2077 * Replace the variable name with the 2078 * current line number. 2079 */ 2080 linno = plinno; 2081 if (funclinno != 0) 2082 linno -= funclinno - 1; 2083 snprintf(buf, sizeof(buf), "%d", linno); 2084 STADJUST(-6, out); 2085 for (i = 0; buf[i] != '\0'; i++) 2086 STPUTC(buf[i], out); 2087 flags |= VSLINENO; 2088 } 2089 #endif 2090 } else if (is_digit(c)) { 2091 do { 2092 STPUTC(c, out); 2093 c = pgetc_linecont(); 2094 } while (subtype != VSNORMAL && is_digit(c)); 2095 } 2096 else if (is_special(c)) { 2097 USTPUTC(c, out); 2098 c = pgetc_linecont(); 2099 } 2100 else { 2101 badsub: 2102 cleanup_state_stack(stack); 2103 synerror("Bad substitution"); 2104 } 2105 2106 STPUTC('=', out); 2107 if (subtype == 0) { 2108 switch (c) { 2109 case ':': 2110 flags |= VSNUL; 2111 c = pgetc_linecont(); 2112 /*FALLTHROUGH*/ 2113 default: 2114 p = strchr(types, c); 2115 if (p == NULL) 2116 goto badsub; 2117 subtype = p - types + VSNORMAL; 2118 break; 2119 case '%': 2120 case '#': 2121 { 2122 int cc = c; 2123 subtype = c == '#' ? VSTRIMLEFT : 2124 VSTRIMRIGHT; 2125 c = pgetc_linecont(); 2126 if (c == cc) 2127 subtype++; 2128 else 2129 pungetc(); 2130 break; 2131 } 2132 } 2133 } else { 2134 if (subtype == VSLENGTH && c != /*{*/ '}') 2135 synerror("no modifiers allowed with ${#var}"); 2136 pungetc(); 2137 } 2138 if (quoted || arinest) 2139 flags |= VSQUOTE; 2140 if (subtype >= VSTRIMLEFT && subtype <= VSTRIMRIGHTMAX) 2141 flags |= VSPATQ; 2142 *(stackblock() + typeloc) = subtype | flags; 2143 if (subtype != VSNORMAL) { 2144 TS_PUSH(); 2145 varnest++; 2146 arinest = 0; 2147 if (subtype > VSASSIGN) { /* # ## % %% */ 2148 syntax = BASESYNTAX; 2149 quoted = 0; 2150 } 2151 } 2152 } else if (c == '\'' && syntax == BASESYNTAX) { 2153 USTPUTC(CTLQUOTEMARK, out); 2154 quotef = 1; 2155 TS_PUSH(); 2156 syntax = SQSYNTAX; 2157 quoted = CQ; 2158 } else { 2159 USTPUTC('$', out); 2160 pungetc(); 2161 } 2162 goto parsesub_return; 2163 } 2164 2165 2166 /* 2167 * Parse an arithmetic expansion (indicate start of one and set state) 2168 */ 2169 parsearith: { 2170 2171 #if 0 2172 if (syntax == ARISYNTAX) { 2173 /* 2174 * we collapse embedded arithmetic expansion to 2175 * parentheses, which should be equivalent 2176 * 2177 * XXX It isn't, must fix, soonish... 2178 */ 2179 USTPUTC('(' /*)*/, out); 2180 USTPUTC('(' /*)*/, out); 2181 /* 2182 * Need 2 of them because there will (should be) 2183 * two closing ))'s to follow later. 2184 */ 2185 parenlevel += 2; 2186 } else 2187 #endif 2188 { 2189 USTPUTC(CTLARI, out); 2190 if (ISDBLQUOTE()) 2191 USTPUTC('"',out); 2192 else 2193 USTPUTC(' ',out); 2194 2195 TS_PUSH(); 2196 syntax = ARISYNTAX; 2197 arinest = 1; 2198 varnest = 0; 2199 } 2200 goto parsearith_return; 2201 } 2202 2203 } /* end of readtoken */ 2204 2205 2206 2207 2208 #ifdef mkinit 2209 INCLUDE "parser.h" 2210 2211 RESET { 2212 psp.v_current_parser = &parse_state; 2213 2214 parse_state.ps_tokpushback = 0; 2215 parse_state.ps_checkkwd = 0; 2216 parse_state.ps_heredoclist = NULL; 2217 } 2218 #endif 2219 2220 /* 2221 * Returns true if the text contains nothing to expand (no dollar signs 2222 * or backquotes). 2223 */ 2224 2225 STATIC int 2226 noexpand(char *text) 2227 { 2228 char *p; 2229 char c; 2230 2231 p = text; 2232 while ((c = *p++) != '\0') { 2233 if (c == CTLQUOTEMARK || c == CTLQUOTEEND) 2234 continue; 2235 if (c == CTLESC) 2236 p++; 2237 else if (BASESYNTAX[(int)c] == CCTL) 2238 return 0; 2239 } 2240 return 1; 2241 } 2242 2243 2244 /* 2245 * Return true if the argument is a legal variable name (a letter or 2246 * underscore followed by zero or more letters, underscores, and digits). 2247 */ 2248 2249 int 2250 goodname(const char *name) 2251 { 2252 const char *p; 2253 2254 p = name; 2255 if (! is_name(*p)) 2256 return 0; 2257 while (*++p) { 2258 if (! is_in_name(*p)) 2259 return 0; 2260 } 2261 return 1; 2262 } 2263 2264 int 2265 isassignment(const char *p) 2266 { 2267 if (!is_name(*p)) 2268 return 0; 2269 while (*++p != '=') 2270 if (*p == '\0' || !is_in_name(*p)) 2271 return 0; 2272 return 1; 2273 } 2274 2275 /* 2276 * skip past any \n's, and leave lasttoken set to whatever follows 2277 */ 2278 STATIC void 2279 linebreak(void) 2280 { 2281 while (readtoken() == TNL) 2282 ; 2283 } 2284 2285 /* 2286 * The next token must be "token" -- check, then move past it 2287 */ 2288 STATIC void 2289 consumetoken(int token) 2290 { 2291 if (readtoken() != token) { 2292 VTRACE(DBG_PARSE, ("consumetoken(%d): expecting %s got %s", 2293 token, tokname[token], tokname[lasttoken])); 2294 CVTRACE(DBG_PARSE, (lasttoken==TWORD), (" \"%s\"", wordtext)); 2295 VTRACE(DBG_PARSE, ("\n")); 2296 synexpect(token, NULL); 2297 } 2298 } 2299 2300 /* 2301 * Called when an unexpected token is read during the parse. The argument 2302 * is the token that is expected, or -1 if more than one type of token can 2303 * occur at this point. 2304 */ 2305 2306 STATIC void 2307 synexpect(int token, const char *text) 2308 { 2309 char msg[64]; 2310 char *p; 2311 2312 if (lasttoken == TWORD) { 2313 size_t len = strlen(wordtext); 2314 2315 if (len <= 13) 2316 fmtstr(msg, 34, "Word \"%.13s\" unexpected", wordtext); 2317 else 2318 fmtstr(msg, 34, 2319 "Word \"%.10s...\" unexpected", wordtext); 2320 } else 2321 fmtstr(msg, 34, "%s unexpected", tokname[lasttoken]); 2322 2323 p = strchr(msg, '\0'); 2324 if (text) 2325 fmtstr(p, 30, " (expecting \"%.10s\")", text); 2326 else if (token >= 0) 2327 fmtstr(p, 30, " (expecting %s)", tokname[token]); 2328 2329 synerror(msg); 2330 /* NOTREACHED */ 2331 } 2332 2333 2334 STATIC void 2335 synerror(const char *msg) 2336 { 2337 error("%d: Syntax error: %s", startlinno, msg); 2338 /* NOTREACHED */ 2339 } 2340 2341 STATIC void 2342 setprompt(int which) 2343 { 2344 whichprompt = which; 2345 2346 #ifndef SMALL 2347 if (!el) 2348 #endif 2349 out2str(getprompt(NULL)); 2350 } 2351 2352 /* 2353 * handle getting the next character, while ignoring \ \n 2354 * (which is a little tricky as we only have one char of pushback 2355 * and we need that one elsewhere). 2356 */ 2357 STATIC int 2358 pgetc_linecont(void) 2359 { 2360 int c; 2361 2362 while ((c = pgetc_macro()) == '\\') { 2363 c = pgetc(); 2364 if (c == '\n') { 2365 plinno++; 2366 elided_nl++; 2367 if (doprompt) 2368 setprompt(2); 2369 else 2370 setprompt(0); 2371 } else { 2372 pungetc(); 2373 /* Allow the backslash to be pushed back. */ 2374 pushstring("\\", 1, NULL); 2375 return (pgetc()); 2376 } 2377 } 2378 return (c); 2379 } 2380 2381 /* 2382 * called by editline -- any expansions to the prompt 2383 * should be added here. 2384 */ 2385 const char * 2386 getprompt(void *unused) 2387 { 2388 char *p; 2389 const char *cp; 2390 int wp; 2391 2392 if (!doprompt) 2393 return ""; 2394 2395 VTRACE(DBG_PARSE|DBG_EXPAND, ("getprompt %d\n", whichprompt)); 2396 2397 switch (wp = whichprompt) { 2398 case 0: 2399 return ""; 2400 case 1: 2401 p = ps1val(); 2402 break; 2403 case 2: 2404 p = ps2val(); 2405 break; 2406 default: 2407 return "<internal prompt error>"; 2408 } 2409 if (p == NULL) 2410 return ""; 2411 2412 VTRACE(DBG_PARSE|DBG_EXPAND, ("prompt <<%s>>\n", p)); 2413 2414 cp = expandstr(p, plinno); 2415 whichprompt = wp; /* history depends on it not changing */ 2416 2417 VTRACE(DBG_PARSE|DBG_EXPAND, ("prompt -> <<%s>>\n", cp)); 2418 2419 return cp; 2420 } 2421 2422 /* 2423 * Expand a string ... used for expanding prompts (PS1...) 2424 * 2425 * Never return NULL, always some string (return input string if invalid) 2426 * 2427 * The internal routine does the work, leaving the result on the 2428 * stack (or in a static string, or even the input string) and 2429 * handles parser recursion, and cleanup after an error while parsing. 2430 * 2431 * The visible interface copies the result off the stack (if it is there), 2432 * and handles stack management, leaving the stack in the exact same 2433 * state it was when expandstr() was called (so it can be used part way 2434 * through building a stack data structure - as in when PS2 is being 2435 * expanded half way through reading a "command line") 2436 * 2437 * on error, expandonstack() cleans up the parser state, but then 2438 * simply jumps out through expandstr() withut doing any stack cleanup, 2439 * which is OK, as the error handler must deal with that anyway. 2440 * 2441 * The split into two funcs is to avoid problems with setjmp/longjmp 2442 * and local variables which could otherwise be optimised into bizarre 2443 * behaviour. 2444 */ 2445 static const char * 2446 expandonstack(char *ps, int lineno) 2447 { 2448 union node n; 2449 struct jmploc jmploc; 2450 struct jmploc *const savehandler = handler; 2451 struct parsefile *const savetopfile = getcurrentfile(); 2452 const int save_x = xflag; 2453 struct parse_state new_state = init_parse_state; 2454 struct parse_state *const saveparser = psp.v_current_parser; 2455 const char *result = NULL; 2456 2457 if (!setjmp(jmploc.loc)) { 2458 handler = &jmploc; 2459 2460 psp.v_current_parser = &new_state; 2461 setinputstring(ps, 1, lineno); 2462 2463 readtoken1(pgetc(), DQSYNTAX, 1); 2464 if (backquotelist != NULL && !promptcmds) 2465 result = "-o promptcmds not set: "; 2466 else { 2467 n.narg.type = NARG; 2468 n.narg.next = NULL; 2469 n.narg.text = wordtext; 2470 n.narg.lineno = lineno; 2471 n.narg.backquote = backquotelist; 2472 2473 xflag = 0; /* we might be expanding PS4 ... */ 2474 expandarg(&n, NULL, 0); 2475 result = stackblock(); 2476 } 2477 INTOFF; 2478 } 2479 psp.v_current_parser = saveparser; 2480 xflag = save_x; 2481 popfilesupto(savetopfile); 2482 handler = savehandler; 2483 2484 if (exception == EXEXIT) 2485 longjmp(handler->loc, 1); 2486 2487 if (result != NULL) { 2488 INTON; 2489 } else { 2490 if (exception == EXINT) 2491 exraise(SIGINT); 2492 result = ps; 2493 } 2494 2495 return result; 2496 } 2497 2498 const char * 2499 expandstr(char *ps, int lineno) 2500 { 2501 const char *result = NULL; 2502 struct stackmark smark; 2503 static char *buffer = NULL; /* storage for prompt, never freed */ 2504 static size_t bufferlen = 0; 2505 2506 setstackmark(&smark); 2507 /* 2508 * At this point we anticipate that there may be a string 2509 * growing on the stack, but we have no idea how big it is. 2510 * However we know that it cannot be bigger than the current 2511 * allocated stack block, so simply reserve the whole thing, 2512 * then we can use the stack without barfing all over what 2513 * is there already... (the stack mark undoes this later.) 2514 */ 2515 (void) stalloc(stackblocksize()); 2516 2517 result = expandonstack(ps, lineno); 2518 2519 if (__predict_true(result == stackblock())) { 2520 size_t len = strlen(result) + 1; 2521 2522 /* 2523 * the result (usual case) is on the stack, which we 2524 * are just about to discard (popstackmark()) so we 2525 * need to move it somewhere safe first. 2526 */ 2527 2528 if (__predict_false(len > bufferlen)) { 2529 char *new; 2530 size_t newlen = bufferlen; 2531 2532 if (__predict_false(len > (SIZE_MAX >> 4))) { 2533 result = "huge prompt: "; 2534 goto getout; 2535 } 2536 2537 if (newlen == 0) 2538 newlen = 32; 2539 while (newlen <= len) 2540 newlen <<= 1; 2541 2542 new = (char *)realloc(buffer, newlen); 2543 2544 if (__predict_false(new == NULL)) { 2545 /* 2546 * this should rarely (if ever) happen 2547 * but we must do something when it does... 2548 */ 2549 result = "No mem for prompt: "; 2550 goto getout; 2551 } else { 2552 buffer = new; 2553 bufferlen = newlen; 2554 } 2555 } 2556 (void)memcpy(buffer, result, len); 2557 result = buffer; 2558 } 2559 2560 getout:; 2561 popstackmark(&smark); 2562 2563 return result; 2564 } 2565