1 /* $NetBSD: parser.c,v 1.171 2020/08/19 22:41:47 kre Exp $ */ 2 3 /*- 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Kenneth Almquist. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/cdefs.h> 36 #ifndef lint 37 #if 0 38 static char sccsid[] = "@(#)parser.c 8.7 (Berkeley) 5/16/95"; 39 #else 40 __RCSID("$NetBSD: parser.c,v 1.171 2020/08/19 22:41:47 kre Exp $"); 41 #endif 42 #endif /* not lint */ 43 44 #include <limits.h> 45 #include <signal.h> 46 #include <stdio.h> 47 #include <stdlib.h> 48 49 #include "shell.h" 50 #include "parser.h" 51 #include "nodes.h" 52 #include "expand.h" /* defines rmescapes() */ 53 #include "eval.h" /* defines commandname */ 54 #include "syntax.h" 55 #include "options.h" 56 #include "input.h" 57 #include "output.h" 58 #include "var.h" 59 #include "error.h" 60 #include "memalloc.h" 61 #include "mystring.h" 62 #include "alias.h" 63 #include "show.h" 64 #ifndef SMALL 65 #include "myhistedit.h" 66 #endif 67 #ifdef DEBUG 68 #include "nodenames.h" 69 #endif 70 71 /* 72 * Shell command parser. 73 */ 74 75 /* values returned by readtoken */ 76 #include "token.h" 77 78 #define OPENBRACE '{' 79 #define CLOSEBRACE '}' 80 81 struct HereDoc { 82 struct HereDoc *next; /* next here document in list */ 83 union node *here; /* redirection node */ 84 char *eofmark; /* string indicating end of input */ 85 int striptabs; /* if set, strip leading tabs */ 86 int startline; /* line number where << seen */ 87 }; 88 89 MKINIT struct parse_state parse_state; 90 union parse_state_p psp = { .c_current_parser = &parse_state }; 91 92 static const struct parse_state init_parse_state = { /* all 0's ... */ 93 .ps_heredoclist = NULL, 94 .ps_parsebackquote = 0, 95 .ps_doprompt = 0, 96 .ps_needprompt = 0, 97 .ps_lasttoken = 0, 98 .ps_tokpushback = 0, 99 .ps_wordtext = NULL, 100 .ps_checkkwd = 0, 101 .ps_redirnode = NULL, 102 .ps_heredoc = NULL, 103 .ps_quoteflag = 0, 104 .ps_startlinno = 0, 105 .ps_funclinno = 0, 106 .ps_elided_nl = 0, 107 }; 108 109 STATIC union node *list(int); 110 STATIC union node *andor(void); 111 STATIC union node *pipeline(void); 112 STATIC union node *command(void); 113 STATIC union node *simplecmd(union node **, union node *); 114 STATIC union node *makeword(int); 115 STATIC void parsefname(void); 116 STATIC int slurp_heredoc(char *const, const int, const int); 117 STATIC void readheredocs(void); 118 STATIC int peektoken(void); 119 STATIC int readtoken(void); 120 STATIC int xxreadtoken(void); 121 STATIC int readtoken1(int, char const *, int); 122 STATIC int noexpand(char *); 123 STATIC void linebreak(void); 124 STATIC void consumetoken(int); 125 STATIC void synexpect(int, const char *) __dead; 126 STATIC void synerror(const char *) __dead; 127 STATIC void setprompt(int); 128 STATIC int pgetc_linecont(void); 129 130 static const char EOFhere[] = "EOF reading here (<<) document"; 131 132 #ifdef DEBUG 133 int parsing = 0; 134 #endif 135 136 /* 137 * Read and parse a command. Returns NEOF on end of file. (NULL is a 138 * valid parse tree indicating a blank line.) 139 */ 140 141 union node * 142 parsecmd(int interact) 143 { 144 int t; 145 union node *n; 146 147 #ifdef DEBUG 148 parsing++; 149 #endif 150 tokpushback = 0; 151 checkkwd = 0; 152 doprompt = interact; 153 if (doprompt) 154 setprompt(1); 155 else 156 setprompt(0); 157 needprompt = 0; 158 t = readtoken(); 159 #ifdef DEBUG 160 parsing--; 161 #endif 162 if (t == TEOF) 163 return NEOF; 164 if (t == TNL) 165 return NULL; 166 167 #ifdef DEBUG 168 parsing++; 169 #endif 170 tokpushback++; 171 n = list(1); 172 #ifdef DEBUG 173 parsing--; 174 #endif 175 if (heredoclist) 176 error("%d: Here document (<<%s) expected but not present", 177 heredoclist->startline, heredoclist->eofmark); 178 return n; 179 } 180 181 182 STATIC union node * 183 list(int nlflag) 184 { 185 union node *ntop, *n1, *n2, *n3; 186 int tok; 187 188 CTRACE(DBG_PARSE, ("list(%d): entered @%d\n",nlflag,plinno)); 189 190 checkkwd = CHKNL | CHKKWD | CHKALIAS; 191 if (nlflag == 0 && tokendlist[peektoken()]) 192 return NULL; 193 ntop = n1 = NULL; 194 for (;;) { 195 n2 = andor(); 196 tok = readtoken(); 197 if (tok == TBACKGND) { 198 if (n2->type == NCMD || n2->type == NPIPE) 199 n2->ncmd.backgnd = 1; 200 else if (n2->type == NREDIR) 201 n2->type = NBACKGND; 202 else { 203 n3 = stalloc(sizeof(struct nredir)); 204 n3->type = NBACKGND; 205 n3->nredir.n = n2; 206 n3->nredir.redirect = NULL; 207 n2 = n3; 208 } 209 } 210 211 if (ntop == NULL) 212 ntop = n2; 213 else if (n1 == NULL) { 214 n1 = stalloc(sizeof(struct nbinary)); 215 n1->type = NSEMI; 216 n1->nbinary.ch1 = ntop; 217 n1->nbinary.ch2 = n2; 218 ntop = n1; 219 } else { 220 n3 = stalloc(sizeof(struct nbinary)); 221 n3->type = NSEMI; 222 n3->nbinary.ch1 = n1->nbinary.ch2; 223 n3->nbinary.ch2 = n2; 224 n1->nbinary.ch2 = n3; 225 n1 = n3; 226 } 227 228 switch (tok) { 229 case TBACKGND: 230 case TSEMI: 231 tok = readtoken(); 232 /* FALLTHROUGH */ 233 case TNL: 234 if (tok == TNL) { 235 readheredocs(); 236 if (nlflag) 237 return ntop; 238 } else if (tok == TEOF && nlflag) 239 return ntop; 240 else 241 tokpushback++; 242 243 checkkwd = CHKNL | CHKKWD | CHKALIAS; 244 if (!nlflag && tokendlist[peektoken()]) 245 return ntop; 246 break; 247 case TEOF: 248 pungetc(); /* push back EOF on input */ 249 return ntop; 250 default: 251 if (nlflag) 252 synexpect(-1, 0); 253 tokpushback++; 254 return ntop; 255 } 256 } 257 } 258 259 STATIC union node * 260 andor(void) 261 { 262 union node *n1, *n2, *n3; 263 int t; 264 265 CTRACE(DBG_PARSE, ("andor: entered @%d\n", plinno)); 266 267 n1 = pipeline(); 268 for (;;) { 269 if ((t = readtoken()) == TAND) { 270 t = NAND; 271 } else if (t == TOR) { 272 t = NOR; 273 } else { 274 tokpushback++; 275 return n1; 276 } 277 n2 = pipeline(); 278 n3 = stalloc(sizeof(struct nbinary)); 279 n3->type = t; 280 n3->nbinary.ch1 = n1; 281 n3->nbinary.ch2 = n2; 282 n1 = n3; 283 } 284 } 285 286 STATIC union node * 287 pipeline(void) 288 { 289 union node *n1, *n2, *pipenode; 290 struct nodelist *lp, *prev; 291 int negate; 292 293 CTRACE(DBG_PARSE, ("pipeline: entered @%d\n", plinno)); 294 295 negate = 0; 296 checkkwd = CHKNL | CHKKWD | CHKALIAS; 297 while (readtoken() == TNOT) { 298 CTRACE(DBG_PARSE, ("pipeline: TNOT recognized\n")); 299 #ifndef BOGUS_NOT_COMMAND 300 if (posix && negate) 301 synerror("2nd \"!\" unexpected"); 302 #endif 303 negate++; 304 } 305 tokpushback++; 306 n1 = command(); 307 if (readtoken() == TPIPE) { 308 pipenode = stalloc(sizeof(struct npipe)); 309 pipenode->type = NPIPE; 310 pipenode->npipe.backgnd = 0; 311 lp = stalloc(sizeof(struct nodelist)); 312 pipenode->npipe.cmdlist = lp; 313 lp->n = n1; 314 do { 315 prev = lp; 316 lp = stalloc(sizeof(struct nodelist)); 317 lp->n = command(); 318 prev->next = lp; 319 } while (readtoken() == TPIPE); 320 lp->next = NULL; 321 n1 = pipenode; 322 } 323 tokpushback++; 324 if (negate) { 325 CTRACE(DBG_PARSE, ("%snegate pipeline\n", 326 (negate&1) ? "" : "double ")); 327 n2 = stalloc(sizeof(struct nnot)); 328 n2->type = (negate & 1) ? NNOT : NDNOT; 329 n2->nnot.com = n1; 330 return n2; 331 } else 332 return n1; 333 } 334 335 336 337 STATIC union node * 338 command(void) 339 { 340 union node *n1, *n2; 341 union node *ap, **app; 342 union node *cp, **cpp; 343 union node *redir, **rpp; 344 int t; 345 #ifdef BOGUS_NOT_COMMAND 346 int negate = 0; 347 #endif 348 349 CTRACE(DBG_PARSE, ("command: entered @%d\n", plinno)); 350 351 checkkwd = CHKNL | CHKKWD | CHKALIAS; 352 redir = NULL; 353 n1 = NULL; 354 rpp = &redir; 355 356 /* Check for redirection which may precede command */ 357 while (readtoken() == TREDIR) { 358 *rpp = n2 = redirnode; 359 rpp = &n2->nfile.next; 360 parsefname(); 361 } 362 tokpushback++; 363 364 #ifdef BOGUS_NOT_COMMAND /* only in pileline() */ 365 while (readtoken() == TNOT) { 366 CTRACE(DBG_PARSE, ("command: TNOT (bogus) recognized\n")); 367 negate++; 368 } 369 tokpushback++; 370 #endif 371 372 switch (readtoken()) { 373 case TIF: 374 n1 = stalloc(sizeof(struct nif)); 375 n1->type = NIF; 376 n1->nif.test = list(0); 377 consumetoken(TTHEN); 378 n1->nif.ifpart = list(0); 379 n2 = n1; 380 while (readtoken() == TELIF) { 381 n2->nif.elsepart = stalloc(sizeof(struct nif)); 382 n2 = n2->nif.elsepart; 383 n2->type = NIF; 384 n2->nif.test = list(0); 385 consumetoken(TTHEN); 386 n2->nif.ifpart = list(0); 387 } 388 if (lasttoken == TELSE) 389 n2->nif.elsepart = list(0); 390 else { 391 n2->nif.elsepart = NULL; 392 tokpushback++; 393 } 394 consumetoken(TFI); 395 checkkwd = CHKKWD | CHKALIAS; 396 break; 397 case TWHILE: 398 case TUNTIL: 399 n1 = stalloc(sizeof(struct nbinary)); 400 n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL; 401 n1->nbinary.ch1 = list(0); 402 consumetoken(TDO); 403 n1->nbinary.ch2 = list(0); 404 consumetoken(TDONE); 405 checkkwd = CHKKWD | CHKALIAS; 406 break; 407 case TFOR: 408 if (readtoken() != TWORD || quoteflag || ! goodname(wordtext)) 409 synerror("Bad for loop variable"); 410 n1 = stalloc(sizeof(struct nfor)); 411 n1->type = NFOR; 412 n1->nfor.var = wordtext; 413 linebreak(); 414 if (lasttoken==TWORD && !quoteflag && equal(wordtext,"in")) { 415 app = ≈ 416 while (readtoken() == TWORD) { 417 n2 = makeword(startlinno); 418 *app = n2; 419 app = &n2->narg.next; 420 } 421 *app = NULL; 422 n1->nfor.args = ap; 423 if (lasttoken != TNL && lasttoken != TSEMI) 424 synexpect(TSEMI, 0); 425 } else { 426 static char argvars[5] = { 427 CTLVAR, VSNORMAL|VSQUOTE, '@', '=', '\0' 428 }; 429 430 n2 = stalloc(sizeof(struct narg)); 431 n2->type = NARG; 432 n2->narg.text = argvars; 433 n2->narg.backquote = NULL; 434 n2->narg.next = NULL; 435 n2->narg.lineno = startlinno; 436 n1->nfor.args = n2; 437 /* 438 * Newline or semicolon here is optional (but note 439 * that the original Bourne shell only allowed NL). 440 */ 441 if (lasttoken != TNL && lasttoken != TSEMI) 442 tokpushback++; 443 } 444 checkkwd = CHKNL | CHKKWD | CHKALIAS; 445 if ((t = readtoken()) == TDO) 446 t = TDONE; 447 else if (t == TBEGIN) 448 t = TEND; 449 else 450 synexpect(TDO, 0); 451 n1->nfor.body = list(0); 452 consumetoken(t); 453 checkkwd = CHKKWD | CHKALIAS; 454 break; 455 case TCASE: 456 n1 = stalloc(sizeof(struct ncase)); 457 n1->type = NCASE; 458 n1->ncase.lineno = startlinno - elided_nl; 459 consumetoken(TWORD); 460 n1->ncase.expr = makeword(startlinno); 461 linebreak(); 462 if (lasttoken != TWORD || !equal(wordtext, "in")) 463 synexpect(-1, "in"); 464 cpp = &n1->ncase.cases; 465 checkkwd = CHKNL | CHKKWD; 466 readtoken(); 467 /* 468 * Both ksh and bash accept 'case x in esac' 469 * so configure scripts started taking advantage of this. 470 * The page: http://pubs.opengroup.org/onlinepubs/\ 471 * 009695399/utilities/xcu_chap02.html contradicts itself, 472 * as to if this is legal; the "Case Conditional Format" 473 * paragraph shows one case is required, but the "Grammar" 474 * section shows a grammar that explicitly allows the no 475 * case option. 476 * 477 * The standard also says (section 2.10): 478 * This formal syntax shall take precedence over the 479 * preceding text syntax description. 480 * ie: the "Grammar" section wins. The text is just 481 * a rough guide (introduction to the common case.) 482 */ 483 while (lasttoken != TESAC) { 484 *cpp = cp = stalloc(sizeof(struct nclist)); 485 cp->type = NCLIST; 486 app = &cp->nclist.pattern; 487 if (lasttoken == TLP) 488 readtoken(); 489 for (;;) { 490 if (lasttoken < TWORD) 491 synexpect(TWORD, 0); 492 *app = ap = makeword(startlinno); 493 checkkwd = CHKNL | CHKKWD; 494 if (readtoken() != TPIPE) 495 break; 496 app = &ap->narg.next; 497 readtoken(); 498 } 499 if (lasttoken != TRP) 500 synexpect(TRP, 0); 501 cp->nclist.lineno = startlinno; 502 cp->nclist.body = list(0); 503 504 checkkwd = CHKNL | CHKKWD | CHKALIAS; 505 if ((t = readtoken()) != TESAC) { 506 if (t != TENDCASE && t != TCASEFALL) { 507 synexpect(TENDCASE, 0); 508 } else { 509 if (t == TCASEFALL) 510 cp->type = NCLISTCONT; 511 checkkwd = CHKNL | CHKKWD; 512 readtoken(); 513 } 514 } 515 cpp = &cp->nclist.next; 516 } 517 *cpp = NULL; 518 checkkwd = CHKKWD | CHKALIAS; 519 break; 520 case TLP: 521 n1 = stalloc(sizeof(struct nredir)); 522 n1->type = NSUBSHELL; 523 n1->nredir.n = list(0); 524 n1->nredir.redirect = NULL; 525 if (n1->nredir.n == NULL) 526 synexpect(-1, 0); 527 consumetoken(TRP); 528 checkkwd = CHKKWD | CHKALIAS; 529 break; 530 case TBEGIN: 531 n1 = list(0); 532 if (posix && n1 == NULL) 533 synexpect(-1, 0); 534 consumetoken(TEND); 535 checkkwd = CHKKWD | CHKALIAS; 536 break; 537 538 case TBACKGND: 539 case TSEMI: 540 case TAND: 541 case TOR: 542 case TPIPE: 543 case TNL: 544 case TEOF: 545 case TRP: 546 case TENDCASE: 547 case TCASEFALL: 548 /* 549 * simple commands must have something in them, 550 * either a word (which at this point includes a=b) 551 * or a redirection. If we reached the end of the 552 * command (which one of these tokens indicates) 553 * when we are just starting, and have not had a 554 * redirect, then ... 555 * 556 * nb: it is still possible to end up with empty 557 * simple commands, if the "command" is a var 558 * expansion that produces nothing: 559 * X= ; $X && $X 560 * --> && 561 * That is OK and is handled after word expansions. 562 */ 563 if (!redir) 564 synexpect(-1, 0); 565 /* 566 * continue to build a node containing the redirect. 567 * the tokpushback means that our ending token will be 568 * read again in simplecmd, causing it to terminate, 569 * so only the redirect(s) will be contained in the 570 * returned n1 571 */ 572 /* FALLTHROUGH */ 573 case TWORD: 574 tokpushback++; 575 n1 = simplecmd(rpp, redir); 576 goto checkneg; 577 default: 578 synexpect(-1, 0); 579 /* NOTREACHED */ 580 } 581 582 /* Now check for redirection which may follow command */ 583 while (readtoken() == TREDIR) { 584 *rpp = n2 = redirnode; 585 rpp = &n2->nfile.next; 586 parsefname(); 587 } 588 tokpushback++; 589 *rpp = NULL; 590 if (redir) { 591 if (n1 == NULL || n1->type != NSUBSHELL) { 592 n2 = stalloc(sizeof(struct nredir)); 593 n2->type = NREDIR; 594 n2->nredir.n = n1; 595 n1 = n2; 596 } 597 n1->nredir.redirect = redir; 598 } 599 600 checkneg: 601 #ifdef BOGUS_NOT_COMMAND 602 if (negate) { 603 VTRACE(DBG_PARSE, ("bogus %snegate command\n", 604 (negate&1) ? "" : "double ")); 605 n2 = stalloc(sizeof(struct nnot)); 606 n2->type = (negate & 1) ? NNOT : NDNOT; 607 n2->nnot.com = n1; 608 return n2; 609 } 610 else 611 #endif 612 return n1; 613 } 614 615 616 STATIC union node * 617 simplecmd(union node **rpp, union node *redir) 618 { 619 union node *args, **app; 620 union node *n = NULL; 621 int line = 0; 622 int savecheckkwd; 623 #ifdef BOGUS_NOT_COMMAND 624 union node *n2; 625 int negate = 0; 626 #endif 627 628 CTRACE(DBG_PARSE, ("simple command with%s redir already @%d\n", 629 redir ? "" : "out", plinno)); 630 631 /* If we don't have any redirections already, then we must reset */ 632 /* rpp to be the address of the local redir variable. */ 633 if (redir == 0) 634 rpp = &redir; 635 636 args = NULL; 637 app = &args; 638 639 #ifdef BOGUS_NOT_COMMAND /* pipelines get negated, commands do not */ 640 while (readtoken() == TNOT) { 641 VTRACE(DBG_PARSE, ("simplcmd: bogus TNOT recognized\n")); 642 negate++; 643 } 644 tokpushback++; 645 #endif 646 647 savecheckkwd = CHKALIAS; 648 for (;;) { 649 checkkwd = savecheckkwd; 650 if (readtoken() == TWORD) { 651 if (line == 0) 652 line = startlinno; 653 n = makeword(startlinno); 654 *app = n; 655 app = &n->narg.next; 656 if (savecheckkwd != 0 && !isassignment(wordtext)) 657 savecheckkwd = 0; 658 } else if (lasttoken == TREDIR) { 659 if (line == 0) 660 line = startlinno; 661 *rpp = n = redirnode; 662 rpp = &n->nfile.next; 663 parsefname(); /* read name of redirection file */ 664 } else if (lasttoken == TLP && app == &args->narg.next 665 && redir == 0) { 666 /* We have a function */ 667 consumetoken(TRP); 668 funclinno = plinno; 669 /* 670 * Make sure there are no unquoted $'s in the 671 * name (allowing those, not expanding them, 672 * simply treating '$' as a character, is desireable 673 * but the parser has converted them to CTLxxx 674 * chars, and that's not what we want 675 * 676 * Fortunately here the user can simply quote 677 * the name to avoid this restriction. 678 */ 679 if (!noexpand(n->narg.text)) 680 synerror("Bad function name (use quotes)"); 681 rmescapes(n->narg.text); 682 if (strchr(n->narg.text, '/')) 683 synerror("Bad function name"); 684 VTRACE(DBG_PARSE, ("Function '%s' seen @%d\n", 685 n->narg.text, plinno)); 686 n->type = NDEFUN; 687 n->narg.lineno = plinno - elided_nl; 688 n->narg.next = command(); 689 funclinno = 0; 690 goto checkneg; 691 } else { 692 tokpushback++; 693 break; 694 } 695 } 696 697 if (args == NULL && redir == NULL) 698 synexpect(-1, 0); 699 *app = NULL; 700 *rpp = NULL; 701 n = stalloc(sizeof(struct ncmd)); 702 n->type = NCMD; 703 n->ncmd.lineno = line - elided_nl; 704 n->ncmd.backgnd = 0; 705 n->ncmd.args = args; 706 n->ncmd.redirect = redir; 707 n->ncmd.lineno = startlinno; 708 709 checkneg: 710 #ifdef BOGUS_NOT_COMMAND 711 if (negate) { 712 VTRACE(DBG_PARSE, ("bogus %snegate simplecmd\n", 713 (negate&1) ? "" : "double ")); 714 n2 = stalloc(sizeof(struct nnot)); 715 n2->type = (negate & 1) ? NNOT : NDNOT; 716 n2->nnot.com = n; 717 return n2; 718 } 719 else 720 #endif 721 return n; 722 } 723 724 STATIC union node * 725 makeword(int lno) 726 { 727 union node *n; 728 729 n = stalloc(sizeof(struct narg)); 730 n->type = NARG; 731 n->narg.next = NULL; 732 n->narg.text = wordtext; 733 n->narg.backquote = backquotelist; 734 n->narg.lineno = lno; 735 return n; 736 } 737 738 void 739 fixredir(union node *n, const char *text, int err) 740 { 741 742 VTRACE(DBG_PARSE, ("Fix redir %s %d\n", text, err)); 743 if (!err) 744 n->ndup.vname = NULL; 745 746 if (is_number(text)) 747 n->ndup.dupfd = number(text); 748 else if (text[0] == '-' && text[1] == '\0') 749 n->ndup.dupfd = -1; 750 else { 751 752 if (err) 753 synerror("Bad fd number"); 754 else 755 n->ndup.vname = makeword(startlinno - elided_nl); 756 } 757 } 758 759 760 STATIC void 761 parsefname(void) 762 { 763 union node *n = redirnode; 764 765 if (readtoken() != TWORD) 766 synexpect(-1, 0); 767 if (n->type == NHERE) { 768 struct HereDoc *here = heredoc; 769 struct HereDoc *p; 770 771 if (quoteflag == 0) 772 n->type = NXHERE; 773 VTRACE(DBG_PARSE, ("Here document %d @%d\n", n->type, plinno)); 774 if (here->striptabs) { 775 while (*wordtext == '\t') 776 wordtext++; 777 } 778 779 /* 780 * this test is not really necessary, we are not 781 * required to expand wordtext, but there's no reason 782 * it cannot be $$ or something like that - that would 783 * not mean the pid, but literally two '$' characters. 784 * There is no need for limits on what the word can be. 785 * However, it needs to stay literal as entered, not 786 * have $ converted to CTLVAR or something, which as 787 * the parser is, at the minute, is impossible to prevent. 788 * So, leave it like this until the rest of the parser is fixed. 789 */ 790 if (!noexpand(wordtext)) 791 synerror("Illegal eof marker for << redirection"); 792 793 rmescapes(wordtext); 794 here->eofmark = wordtext; 795 here->next = NULL; 796 if (heredoclist == NULL) 797 heredoclist = here; 798 else { 799 for (p = heredoclist ; p->next ; p = p->next) 800 continue; 801 p->next = here; 802 } 803 } else if (n->type == NTOFD || n->type == NFROMFD) { 804 fixredir(n, wordtext, 0); 805 } else { 806 n->nfile.fname = makeword(startlinno - elided_nl); 807 } 808 } 809 810 /* 811 * Check to see whether we are at the end of the here document. When this 812 * is called, c is set to the first character of the next input line. If 813 * we are at the end of the here document, this routine sets the c to PEOF. 814 * The new value of c is returned. 815 */ 816 817 static int 818 checkend(int c, char * const eofmark, const int striptabs) 819 { 820 821 if (striptabs) { 822 while (c == '\t') 823 c = pgetc(); 824 } 825 if (c == PEOF) { 826 if (*eofmark == '\0') 827 return (c); 828 synerror(EOFhere); 829 } 830 if (c == *eofmark) { 831 int c2; 832 char *q; 833 834 for (q = eofmark + 1; c2 = pgetc(), *q != '\0' && c2 == *q; q++) 835 if (c2 == '\n') { 836 plinno++; 837 needprompt = doprompt; 838 } 839 if ((c2 == PEOF || c2 == '\n') && *q == '\0') { 840 c = PEOF; 841 if (c2 == '\n') { 842 plinno++; 843 needprompt = doprompt; 844 } 845 } else { 846 pungetc(); 847 pushstring(eofmark + 1, q - (eofmark + 1), NULL); 848 } 849 } else if (c == '\n' && *eofmark == '\0') { 850 c = PEOF; 851 plinno++; 852 needprompt = doprompt; 853 } 854 return (c); 855 } 856 857 858 /* 859 * Input any here documents. 860 */ 861 862 STATIC int 863 slurp_heredoc(char *const eofmark, const int striptabs, const int sq) 864 { 865 int c; 866 char *out; 867 int lines = plinno; 868 869 c = pgetc(); 870 871 /* 872 * If we hit EOF on the input, and the eofmark is a null string ('') 873 * we consider this empty line to be the eofmark, and exit without err. 874 */ 875 if (c == PEOF && *eofmark != '\0') 876 synerror(EOFhere); 877 878 STARTSTACKSTR(out); 879 880 while ((c = checkend(c, eofmark, striptabs)) != PEOF) { 881 do { 882 if (sq) { 883 /* 884 * in single quoted mode (eofmark quoted) 885 * all we look for is \n so we can check 886 * for the epfmark - everything saved literally. 887 */ 888 STPUTC(c, out); 889 if (c == '\n') { 890 plinno++; 891 break; 892 } 893 continue; 894 } 895 /* 896 * In double quoted (non-quoted eofmark) 897 * we must handle \ followed by \n here 898 * otherwise we can mismatch the end mark. 899 * All other uses of \ will be handled later 900 * when the here doc is expanded. 901 * 902 * This also makes sure \\ followed by \n does 903 * not suppress the newline (the \ quotes itself) 904 */ 905 if (c == '\\') { /* A backslash */ 906 STPUTC(c, out); 907 c = pgetc(); /* followed by */ 908 if (c == '\n') { /* a newline? */ 909 STPUTC(c, out); 910 plinno++; 911 continue; /* don't break */ 912 } 913 } 914 STPUTC(c, out); /* keep the char */ 915 if (c == '\n') { /* at end of line */ 916 plinno++; 917 break; /* look for eofmark */ 918 } 919 } while ((c = pgetc()) != PEOF); 920 921 /* 922 * If we have read a line, and reached EOF, without 923 * finding the eofmark, whether the EOF comes before 924 * or immediately after the \n, that is an error. 925 */ 926 if (c == PEOF || (c = pgetc()) == PEOF) 927 synerror(EOFhere); 928 } 929 STPUTC('\0', out); 930 931 c = out - stackblock(); 932 out = stackblock(); 933 grabstackblock(c); 934 wordtext = out; 935 936 VTRACE(DBG_PARSE, 937 ("Slurped a %d line %sheredoc (to '%s')%s: len %d, \"%.*s%s\" @%d\n", 938 plinno - lines, sq ? "quoted " : "", eofmark, 939 striptabs ? " tab stripped" : "", c, (c > 16 ? 16 : c), 940 wordtext, (c > 16 ? "..." : ""), plinno)); 941 942 return (plinno - lines); 943 } 944 945 static char * 946 insert_elided_nl(char *str) 947 { 948 while (elided_nl > 0) { 949 STPUTC(CTLNONL, str); 950 elided_nl--; 951 } 952 return str; 953 } 954 955 STATIC void 956 readheredocs(void) 957 { 958 struct HereDoc *here; 959 union node *n; 960 int line, l; 961 962 line = 0; /*XXX - gcc! obviously unneeded */ 963 if (heredoclist) 964 line = heredoclist->startline + 1; 965 l = 0; 966 while (heredoclist) { 967 line += l; 968 here = heredoclist; 969 heredoclist = here->next; 970 if (needprompt) { 971 setprompt(2); 972 needprompt = 0; 973 } 974 975 l = slurp_heredoc(here->eofmark, here->striptabs, 976 here->here->nhere.type == NHERE); 977 978 here->here->nhere.doc = n = makeword(line); 979 980 if (here->here->nhere.type == NHERE) 981 continue; 982 983 /* 984 * Now "parse" here docs that have unquoted eofmarkers. 985 */ 986 setinputstring(wordtext, 1, line); 987 VTRACE(DBG_PARSE, ("Reprocessing %d line here doc from %d\n", 988 l, line)); 989 readtoken1(pgetc(), DQSYNTAX, 1); 990 n->narg.text = wordtext; 991 n->narg.backquote = backquotelist; 992 popfile(); 993 } 994 } 995 996 STATIC int 997 peektoken(void) 998 { 999 int t; 1000 1001 t = readtoken(); 1002 tokpushback++; 1003 return (t); 1004 } 1005 1006 STATIC int 1007 readtoken(void) 1008 { 1009 int t; 1010 #ifdef DEBUG 1011 int alreadyseen = tokpushback; 1012 int savecheckkwd = checkkwd; 1013 #endif 1014 struct alias *ap; 1015 1016 top: 1017 t = xxreadtoken(); 1018 1019 if (checkkwd & CHKNL) { 1020 while (t == TNL) { 1021 readheredocs(); 1022 t = xxreadtoken(); 1023 } 1024 } 1025 1026 /* 1027 * check for keywords and aliases 1028 */ 1029 if (t == TWORD && !quoteflag) { 1030 const char *const *pp; 1031 1032 if (checkkwd & CHKKWD) 1033 for (pp = parsekwd; *pp; pp++) { 1034 if (**pp == *wordtext && equal(*pp, wordtext)) { 1035 lasttoken = t = pp - 1036 parsekwd + KWDOFFSET; 1037 VTRACE(DBG_PARSE, 1038 ("keyword %s recognized @%d\n", 1039 tokname[t], plinno)); 1040 goto out; 1041 } 1042 } 1043 1044 if (checkkwd & CHKALIAS && 1045 (ap = lookupalias(wordtext, 1)) != NULL) { 1046 VTRACE(DBG_PARSE, 1047 ("alias '%s' recognized -> <:%s:>\n", 1048 wordtext, ap->val)); 1049 pushstring(ap->val, strlen(ap->val), ap); 1050 goto top; 1051 } 1052 } 1053 out: 1054 if (t != TNOT) 1055 checkkwd = 0; 1056 1057 VTRACE(DBG_PARSE, ("%stoken %s %s @%d (chkkwd %x->%x)\n", 1058 alreadyseen ? "reread " : "", tokname[t], 1059 t == TWORD ? wordtext : "", plinno, savecheckkwd, checkkwd)); 1060 return (t); 1061 } 1062 1063 1064 /* 1065 * Read the next input token. 1066 * If the token is a word, we set backquotelist to the list of cmds in 1067 * backquotes. We set quoteflag to true if any part of the word was 1068 * quoted. 1069 * If the token is TREDIR, then we set redirnode to a structure containing 1070 * the redirection. 1071 * In all cases, the variable startlinno is set to the number of the line 1072 * on which the token starts. 1073 * 1074 * [Change comment: here documents and internal procedures] 1075 * [Readtoken shouldn't have any arguments. Perhaps we should make the 1076 * word parsing code into a separate routine. In this case, readtoken 1077 * doesn't need to have any internal procedures, but parseword does. 1078 * We could also make parseoperator in essence the main routine, and 1079 * have parseword (readtoken1?) handle both words and redirection.] 1080 */ 1081 1082 #define RETURN(token) return lasttoken = (token) 1083 1084 STATIC int 1085 xxreadtoken(void) 1086 { 1087 int c; 1088 1089 if (tokpushback) { 1090 tokpushback = 0; 1091 CTRACE(DBG_LEXER, 1092 ("xxreadtoken() returns %s (%d) again\n", 1093 tokname[lasttoken], lasttoken)); 1094 return lasttoken; 1095 } 1096 if (needprompt) { 1097 setprompt(2); 1098 needprompt = 0; 1099 } 1100 elided_nl = 0; 1101 startlinno = plinno; 1102 for (;;) { /* until token or start of word found */ 1103 c = pgetc_macro(); 1104 CTRACE(DBG_LEXER, ("xxreadtoken() sees '%c' (%#.2x) ", 1105 c&0xFF, c&0x1FF)); 1106 switch (c) { 1107 case ' ': case '\t': case PFAKE: 1108 CTRACE(DBG_LEXER, (" ignored\n")); 1109 continue; 1110 case '#': 1111 while ((c = pgetc()) != '\n' && c != PEOF) 1112 continue; 1113 CTRACE(DBG_LEXER, 1114 ("skipped comment to (not incl) \\n\n")); 1115 pungetc(); 1116 continue; 1117 1118 case '\n': 1119 plinno++; 1120 CTRACE(DBG_LEXER, ("newline now @%d\n", plinno)); 1121 needprompt = doprompt; 1122 RETURN(TNL); 1123 case PEOF: 1124 CTRACE(DBG_LEXER, ("EOF -> TEOF (return)\n")); 1125 RETURN(TEOF); 1126 1127 case '&': 1128 if (pgetc_linecont() == '&') { 1129 CTRACE(DBG_LEXER, 1130 ("and another -> TAND (return)\n")); 1131 RETURN(TAND); 1132 } 1133 pungetc(); 1134 CTRACE(DBG_LEXER, (" -> TBACKGND (return)\n")); 1135 RETURN(TBACKGND); 1136 case '|': 1137 if (pgetc_linecont() == '|') { 1138 CTRACE(DBG_LEXER, 1139 ("and another -> TOR (return)\n")); 1140 RETURN(TOR); 1141 } 1142 pungetc(); 1143 CTRACE(DBG_LEXER, (" -> TPIPE (return)\n")); 1144 RETURN(TPIPE); 1145 case ';': 1146 switch (pgetc_linecont()) { 1147 case ';': 1148 CTRACE(DBG_LEXER, 1149 ("and another -> TENDCASE (return)\n")); 1150 RETURN(TENDCASE); 1151 case '&': 1152 CTRACE(DBG_LEXER, 1153 ("and '&' -> TCASEFALL (return)\n")); 1154 RETURN(TCASEFALL); 1155 default: 1156 pungetc(); 1157 CTRACE(DBG_LEXER, (" -> TSEMI (return)\n")); 1158 RETURN(TSEMI); 1159 } 1160 case '(': 1161 CTRACE(DBG_LEXER, (" -> TLP (return)\n")); 1162 RETURN(TLP); 1163 case ')': 1164 CTRACE(DBG_LEXER, (" -> TRP (return)\n")); 1165 RETURN(TRP); 1166 1167 case '\\': 1168 switch (pgetc()) { 1169 case '\n': 1170 startlinno = ++plinno; 1171 CTRACE(DBG_LEXER, ("\\\n ignored, now @%d\n", 1172 plinno)); 1173 if (doprompt) 1174 setprompt(2); 1175 else 1176 setprompt(0); 1177 continue; 1178 case PEOF: 1179 CTRACE(DBG_LEXER, 1180 ("then EOF -> TEOF (return) '\\' dropped\n")); 1181 RETURN(TEOF); 1182 default: 1183 CTRACE(DBG_LEXER, ("not \\\n or EOF: ")); 1184 pungetc(); 1185 break; 1186 } 1187 /* FALLTHROUGH */ 1188 default: 1189 CTRACE(DBG_LEXER, ("getting a word\n")); 1190 return readtoken1(c, BASESYNTAX, 0); 1191 } 1192 } 1193 #undef RETURN 1194 } 1195 1196 1197 1198 /* 1199 * If eofmark is NULL, read a word or a redirection symbol. If eofmark 1200 * is not NULL, read a here document. In the latter case, eofmark is the 1201 * word which marks the end of the document and striptabs is true if 1202 * leading tabs should be stripped from the document. The argument firstc 1203 * is the first character of the input token or document. 1204 * 1205 * Because C does not have internal subroutines, I have simulated them 1206 * using goto's to implement the subroutine linkage. The following macros 1207 * will run code that appears at the end of readtoken1. 1208 */ 1209 1210 /* 1211 * We used to remember only the current syntax, variable nesting level, 1212 * double quote state for each var nesting level, and arith nesting 1213 * level (unrelated to var nesting) and one prev syntax when in arith 1214 * syntax. This worked for simple cases, but can't handle arith inside 1215 * var expansion inside arith inside var with some quoted and some not. 1216 * 1217 * Inspired by FreeBSD's implementation (though it was the obvious way) 1218 * though implemented differently, we now have a stack that keeps track 1219 * of what we are doing now, and what we were doing previously. 1220 * Every time something changes, which will eventually end and should 1221 * revert to the previous state, we push this stack, and then pop it 1222 * again later (that is every ${} with an operator (to parse the word 1223 * or pattern that follows) ${x} and $x are too simple to need it) 1224 * $(( )) $( ) and "...". Always. Really, always! 1225 * 1226 * The stack is implemented as one static (on the C stack) base block 1227 * containing LEVELS_PER_BLOCK (8) stack entries, which should be 1228 * enough for the vast majority of cases. For torture tests, we 1229 * malloc more blocks as needed. All accesses through the inline 1230 * functions below. 1231 */ 1232 1233 /* 1234 * varnest & arinest will typically be 0 or 1 1235 * (varnest can increment in usages like ${x=${y}} but probably 1236 * does not really need to) 1237 * parenlevel allows balancing parens inside a $(( )), it is reset 1238 * at each new nesting level ( $(( ( x + 3 ${unset-)} )) does not work. 1239 * quoted is special - we need to know 2 things ... are we inside "..." 1240 * (even if inherited from some previous nesting level) and was there 1241 * an opening '"' at this level (so the next will be closing). 1242 * "..." can span nesting levels, but cannot be opened in one and 1243 * closed in a different one. 1244 * To handle this, "quoted" has two fields, the bottom 4 (really 2) 1245 * bits are 0, 1, or 2, for un, single, and double quoted (single quoted 1246 * is really so special that this setting is not very important) 1247 * and 0x10 that indicates that an opening quote has been seen. 1248 * The bottom 4 bits are inherited, the 0x10 bit is not. 1249 */ 1250 struct tokenstate { 1251 const char *ts_syntax; 1252 unsigned short ts_parenlevel; /* counters */ 1253 unsigned short ts_varnest; /* 64000 levels should be enough! */ 1254 unsigned short ts_arinest; 1255 unsigned short ts_quoted; /* 1 -> single, 2 -> double */ 1256 unsigned short ts_magicq; /* heredoc or word expand */ 1257 }; 1258 1259 #define NQ 0x00 /* Unquoted */ 1260 #define SQ 0x01 /* Single Quotes */ 1261 #define DQ 0x02 /* Double Quotes (or equivalent) */ 1262 #define CQ 0x03 /* C style Single Quotes */ 1263 #define QF 0x0F /* Mask to extract previous values */ 1264 #define QS 0x10 /* Quoting started at this level in stack */ 1265 1266 #define LEVELS_PER_BLOCK 8 1267 #define VSS struct statestack 1268 1269 struct statestack { 1270 VSS *prev; /* previous block in list */ 1271 int cur; /* which of our tokenstates is current */ 1272 struct tokenstate tokenstate[LEVELS_PER_BLOCK]; 1273 }; 1274 1275 static inline struct tokenstate * 1276 currentstate(VSS *stack) 1277 { 1278 return &stack->tokenstate[stack->cur]; 1279 } 1280 1281 #ifdef notdef 1282 static inline struct tokenstate * 1283 prevstate(VSS *stack) 1284 { 1285 if (stack->cur != 0) 1286 return &stack->tokenstate[stack->cur - 1]; 1287 if (stack->prev == NULL) /* cannot drop below base */ 1288 return &stack->tokenstate[0]; 1289 return &stack->prev->tokenstate[LEVELS_PER_BLOCK - 1]; 1290 } 1291 #endif 1292 1293 static inline VSS * 1294 bump_state_level(VSS *stack) 1295 { 1296 struct tokenstate *os, *ts; 1297 1298 os = currentstate(stack); 1299 1300 if (++stack->cur >= LEVELS_PER_BLOCK) { 1301 VSS *ss; 1302 1303 ss = (VSS *)ckmalloc(sizeof (struct statestack)); 1304 ss->cur = 0; 1305 ss->prev = stack; 1306 stack = ss; 1307 } 1308 1309 ts = currentstate(stack); 1310 1311 ts->ts_parenlevel = 0; /* parens inside never match outside */ 1312 1313 ts->ts_quoted = os->ts_quoted & QF; /* these are default settings */ 1314 ts->ts_varnest = os->ts_varnest; 1315 ts->ts_arinest = os->ts_arinest; /* when appropriate */ 1316 ts->ts_syntax = os->ts_syntax; /* they will be altered */ 1317 ts->ts_magicq = os->ts_magicq; 1318 1319 return stack; 1320 } 1321 1322 static inline VSS * 1323 drop_state_level(VSS *stack) 1324 { 1325 if (stack->cur == 0) { 1326 VSS *ss; 1327 1328 ss = stack; 1329 stack = ss->prev; 1330 if (stack == NULL) 1331 return ss; 1332 ckfree(ss); 1333 } 1334 --stack->cur; 1335 return stack; 1336 } 1337 1338 static inline void 1339 cleanup_state_stack(VSS *stack) 1340 { 1341 while (stack->prev != NULL) { 1342 stack->cur = 0; 1343 stack = drop_state_level(stack); 1344 } 1345 } 1346 1347 #define PARSESUB() {goto parsesub; parsesub_return:;} 1348 #define PARSEARITH() {goto parsearith; parsearith_return:;} 1349 1350 /* 1351 * The following macros all assume the existance of a local var "stack" 1352 * which contains a pointer to the current struct stackstate 1353 */ 1354 1355 /* 1356 * These are macros rather than inline funcs to avoid code churn as much 1357 * as possible - they replace macros of the same name used previously. 1358 */ 1359 #define ISDBLQUOTE() (currentstate(stack)->ts_quoted & QS) 1360 #define SETDBLQUOTE() (currentstate(stack)->ts_quoted = QS | DQ) 1361 #ifdef notdef 1362 #define CLRDBLQUOTE() (currentstate(stack)->ts_quoted = \ 1363 stack->cur != 0 || stack->prev ? \ 1364 prevstate(stack)->ts_quoted & QF : 0) 1365 #endif 1366 1367 /* 1368 * This set are just to avoid excess typing and line lengths... 1369 * The ones that "look like" var names must be implemented to be lvalues 1370 */ 1371 #define syntax (currentstate(stack)->ts_syntax) 1372 #define parenlevel (currentstate(stack)->ts_parenlevel) 1373 #define varnest (currentstate(stack)->ts_varnest) 1374 #define arinest (currentstate(stack)->ts_arinest) 1375 #define quoted (currentstate(stack)->ts_quoted) 1376 #define magicq (currentstate(stack)->ts_magicq) 1377 #define TS_PUSH() (stack = bump_state_level(stack)) 1378 #define TS_POP() (stack = drop_state_level(stack)) 1379 1380 /* 1381 * Called to parse command substitutions. oldstyle is true if the command 1382 * is enclosed inside `` (otherwise it was enclosed in "$( )") 1383 * 1384 * Internally nlpp is a pointer to the head of the linked 1385 * list of commands (passed by reference), and savelen is the number of 1386 * characters on the top of the stack which must be preserved. 1387 */ 1388 static char * 1389 parsebackq(VSS *const stack, char * const in, 1390 struct nodelist **const pbqlist, const int oldstyle) 1391 { 1392 struct nodelist **nlpp; 1393 const int savepbq = parsebackquote; 1394 union node *n; 1395 char *out; 1396 char *str = NULL; 1397 char *volatile sstr = str; 1398 struct jmploc jmploc; 1399 struct jmploc *const savehandler = handler; 1400 struct parsefile *const savetopfile = getcurrentfile(); 1401 const int savelen = in - stackblock(); 1402 int saveprompt; 1403 int lno; 1404 1405 if (setjmp(jmploc.loc)) { 1406 popfilesupto(savetopfile); 1407 if (sstr) 1408 ckfree(__UNVOLATILE(sstr)); 1409 cleanup_state_stack(stack); 1410 parsebackquote = 0; 1411 handler = savehandler; 1412 CTRACE(DBG_LEXER, ("parsebackq() err (%d), unwinding\n", 1413 exception)); 1414 longjmp(handler->loc, 1); 1415 } 1416 INTOFF; 1417 sstr = str = NULL; 1418 if (savelen > 0) { 1419 sstr = str = ckmalloc(savelen); 1420 memcpy(str, stackblock(), savelen); 1421 } 1422 handler = &jmploc; 1423 INTON; 1424 if (oldstyle) { 1425 /* 1426 * We must read until the closing backquote, giving special 1427 * treatment to some slashes, and then push the string and 1428 * reread it as input, interpreting it normally. 1429 */ 1430 int pc; 1431 int psavelen; 1432 char *pstr; 1433 int line1 = plinno; 1434 1435 VTRACE(DBG_PARSE|DBG_LEXER, 1436 ("parsebackq: repackaging `` as $( )")); 1437 /* 1438 * Because the entire `...` is read here, we don't 1439 * need to bother the state stack. That will be used 1440 * (as appropriate) when the processed string is re-read. 1441 */ 1442 STARTSTACKSTR(out); 1443 #ifdef DEBUG 1444 for (psavelen = 0;;psavelen++) { /* } */ 1445 #else 1446 for (;;) { 1447 #endif 1448 if (needprompt) { 1449 setprompt(2); 1450 needprompt = 0; 1451 } 1452 pc = pgetc(); 1453 VTRACE(DBG_LEXER, 1454 ("parsebackq() got '%c'(%#.2x) in `` %s", pc&0xFF, 1455 pc&0x1FF, pc == '`' ? "terminator\n" : "")); 1456 if (pc == '`') 1457 break; 1458 switch (pc) { 1459 case '\\': 1460 pc = pgetc(); 1461 VTRACE(DBG_LEXER, ("then '%c'(%#.2x) ", 1462 pc&0xFF, pc&0x1FF)); 1463 #ifdef DEBUG 1464 psavelen++; 1465 #endif 1466 if (pc == '\n') { /* keep \ \n for later */ 1467 plinno++; 1468 VTRACE(DBG_LEXER, ("@%d ", plinno)); 1469 needprompt = doprompt; 1470 } 1471 if (pc != '\\' && pc != '`' && pc != '$' 1472 && (!ISDBLQUOTE() || pc != '"')) { 1473 VTRACE(DBG_LEXER, ("keep '\\' ")); 1474 STPUTC('\\', out); 1475 } 1476 break; 1477 1478 case '\n': 1479 plinno++; 1480 VTRACE(DBG_LEXER, ("@%d ", plinno)); 1481 needprompt = doprompt; 1482 break; 1483 1484 case PEOF: 1485 startlinno = line1; 1486 VTRACE(DBG_LEXER, ("EOF\n", plinno)); 1487 synerror("EOF in backquote substitution"); 1488 break; 1489 1490 default: 1491 break; 1492 } 1493 VTRACE(DBG_LEXER, (".\n", plinno)); 1494 STPUTC(pc, out); 1495 } 1496 STPUTC('\0', out); 1497 VTRACE(DBG_LEXER, ("parsebackq() ``:")); 1498 VTRACE(DBG_PARSE|DBG_LEXER, (" read %d", psavelen)); 1499 psavelen = out - stackblock(); 1500 VTRACE(DBG_PARSE|DBG_LEXER, (" produced %d\n", psavelen)); 1501 if (psavelen > 0) { 1502 pstr = grabstackstr(out); 1503 CTRACE(DBG_LEXER, 1504 ("parsebackq() reprocessing as $(%s)\n", pstr)); 1505 setinputstring(pstr, 1, line1); 1506 } 1507 } 1508 nlpp = pbqlist; 1509 while (*nlpp) 1510 nlpp = &(*nlpp)->next; 1511 *nlpp = stalloc(sizeof(struct nodelist)); 1512 (*nlpp)->next = NULL; 1513 parsebackquote = oldstyle; 1514 1515 if (oldstyle) { 1516 saveprompt = doprompt; 1517 doprompt = 0; 1518 } else 1519 saveprompt = 0; 1520 1521 lno = -plinno; 1522 CTRACE(DBG_LEXER, ("parsebackq() parsing embedded command list\n")); 1523 n = list(0); 1524 CTRACE(DBG_LEXER, ("parsebackq() parsed $() (%d -> %d)\n", -lno, 1525 lno + plinno)); 1526 lno += plinno; 1527 1528 if (oldstyle) { 1529 if (peektoken() != TEOF) 1530 synexpect(-1, 0); 1531 doprompt = saveprompt; 1532 } else 1533 consumetoken(TRP); 1534 1535 (*nlpp)->n = n; 1536 if (oldstyle) { 1537 /* 1538 * Start reading from old file again, ignoring any pushed back 1539 * tokens left from the backquote parsing 1540 */ 1541 CTRACE(DBG_LEXER, ("parsebackq() back to previous input\n")); 1542 popfile(); 1543 tokpushback = 0; 1544 } 1545 1546 while (stackblocksize() <= savelen) 1547 growstackblock(); 1548 STARTSTACKSTR(out); 1549 if (str) { 1550 memcpy(out, str, savelen); 1551 STADJUST(savelen, out); 1552 INTOFF; 1553 ckfree(str); 1554 sstr = str = NULL; 1555 INTON; 1556 } 1557 parsebackquote = savepbq; 1558 handler = savehandler; 1559 if (arinest || ISDBLQUOTE()) { 1560 STPUTC(CTLBACKQ | CTLQUOTE, out); 1561 while (--lno >= 0) 1562 STPUTC(CTLNONL, out); 1563 } else 1564 STPUTC(CTLBACKQ, out); 1565 1566 return out; 1567 } 1568 1569 /* 1570 * Parse a redirection operator. The parameter "out" points to a string 1571 * specifying the fd to be redirected. It is guaranteed to be either "" 1572 * or a numeric string (for now anyway). The parameter "c" contains the 1573 * first character of the redirection operator. 1574 * 1575 * Note the string "out" is on the stack, which we are about to clobber, 1576 * so process it first... 1577 */ 1578 1579 static void 1580 parseredir(const char *out, int c) 1581 { 1582 union node *np; 1583 int fd; 1584 1585 np = stalloc(sizeof(struct nfile)); 1586 1587 fd = (*out == '\0') ? -1 : number(out); /* number(out) >= 0 */ 1588 np->nfile.fd = fd; /* do this again later with updated fd */ 1589 if (fd != np->nfile.fd) 1590 error("file descriptor (%d) out of range", fd); 1591 1592 VTRACE(DBG_LEXER, ("parseredir after '%s%c' ", out, c)); 1593 if (c == '>') { 1594 if (fd < 0) 1595 fd = 1; 1596 c = pgetc_linecont(); 1597 VTRACE(DBG_LEXER, ("is '%c'(%#.2x) ", c&0xFF, c&0x1FF)); 1598 if (c == '>') 1599 np->type = NAPPEND; 1600 else if (c == '|') 1601 np->type = NCLOBBER; 1602 else if (c == '&') 1603 np->type = NTOFD; 1604 else { 1605 np->type = NTO; 1606 VTRACE(DBG_LEXER, ("unwanted ", c)); 1607 pungetc(); 1608 } 1609 } else { /* c == '<' */ 1610 if (fd < 0) 1611 fd = 0; 1612 c = pgetc_linecont(); 1613 VTRACE(DBG_LEXER, ("is '%c'(%#.2x) ", c&0xFF, c&0x1FF)); 1614 switch (c) { 1615 case '<': 1616 /* if sizes differ, just discard the old one */ 1617 if (sizeof (struct nfile) != sizeof (struct nhere)) 1618 np = stalloc(sizeof(struct nhere)); 1619 np->type = NHERE; 1620 np->nhere.fd = 0; 1621 heredoc = stalloc(sizeof(struct HereDoc)); 1622 heredoc->here = np; 1623 heredoc->startline = plinno; 1624 if ((c = pgetc_linecont()) == '-') { 1625 CTRACE(DBG_LEXER, ("and '%c'(%#.2x) ", 1626 c & 0xFF, c & 0x1FF)); 1627 heredoc->striptabs = 1; 1628 } else { 1629 heredoc->striptabs = 0; 1630 pungetc(); 1631 } 1632 break; 1633 1634 case '&': 1635 np->type = NFROMFD; 1636 break; 1637 1638 case '>': 1639 np->type = NFROMTO; 1640 break; 1641 1642 default: 1643 np->type = NFROM; 1644 VTRACE(DBG_LEXER, ("unwanted('%c'0#.2x)", c&0xFF, 1645 c&0x1FF)); 1646 pungetc(); 1647 break; 1648 } 1649 } 1650 np->nfile.fd = fd; 1651 1652 VTRACE(DBG_LEXER, (" ->%"PRIdsNT" fd=%d\n", NODETYPENAME(np->type),fd)); 1653 1654 redirnode = np; /* this is the "value" of TRENODE */ 1655 } 1656 1657 /* 1658 * Called to parse a backslash escape sequence inside $'...'. 1659 * The backslash has already been read. 1660 */ 1661 static char * 1662 readcstyleesc(char *out) 1663 { 1664 int c, vc, i, n; 1665 unsigned int v; 1666 1667 c = pgetc(); 1668 VTRACE(DBG_LEXER, ("CSTR(\\%c)(\\%#x)", c&0xFF, c&0x1FF)); 1669 switch (c) { 1670 case '\0': 1671 case PEOF: 1672 synerror("Unterminated quoted string ($'...)"); 1673 case '\n': 1674 plinno++; 1675 VTRACE(DBG_LEXER, ("@%d ", plinno)); 1676 if (doprompt) 1677 setprompt(2); 1678 else 1679 setprompt(0); 1680 return out; 1681 1682 case '\\': 1683 case '\'': 1684 case '"': 1685 v = c; 1686 break; 1687 1688 case 'a': v = '\a'; break; 1689 case 'b': v = '\b'; break; 1690 case 'e': v = '\033'; break; 1691 case 'f': v = '\f'; break; 1692 case 'n': v = '\n'; break; 1693 case 'r': v = '\r'; break; 1694 case 't': v = '\t'; break; 1695 case 'v': v = '\v'; break; 1696 1697 case '0': case '1': case '2': case '3': 1698 case '4': case '5': case '6': case '7': 1699 v = c - '0'; 1700 c = pgetc(); 1701 if (c >= '0' && c <= '7') { 1702 v <<= 3; 1703 v += c - '0'; 1704 c = pgetc(); 1705 if (c >= '0' && c <= '7') { 1706 v <<= 3; 1707 v += c - '0'; 1708 } else 1709 pungetc(); 1710 } else 1711 pungetc(); 1712 break; 1713 1714 case 'c': 1715 c = pgetc(); 1716 if (c < 0x3f || c > 0x7a || c == 0x60) 1717 synerror("Bad \\c escape sequence"); 1718 if (c == '\\' && pgetc() != '\\') 1719 synerror("Bad \\c\\ escape sequence"); 1720 if (c == '?') 1721 v = 127; 1722 else 1723 v = c & 0x1f; 1724 break; 1725 1726 case 'x': 1727 n = 2; 1728 goto hexval; 1729 case 'u': 1730 n = 4; 1731 goto hexval; 1732 case 'U': 1733 n = 8; 1734 hexval: 1735 v = 0; 1736 for (i = 0; i < n; i++) { 1737 c = pgetc(); 1738 if (c >= '0' && c <= '9') 1739 v = (v << 4) + c - '0'; 1740 else if (c >= 'A' && c <= 'F') 1741 v = (v << 4) + c - 'A' + 10; 1742 else if (c >= 'a' && c <= 'f') 1743 v = (v << 4) + c - 'a' + 10; 1744 else { 1745 pungetc(); 1746 break; 1747 } 1748 } 1749 if (n > 2 && v > 127) { 1750 if (v >= 0xd800 && v <= 0xdfff) 1751 synerror("Invalid \\u escape sequence"); 1752 1753 /* XXX should we use iconv here. What locale? */ 1754 CHECKSTRSPACE(12, out); 1755 1756 /* 1757 * Add a byte to output string, while checking if it needs to 1758 * be escaped -- if its value happens to match the value of one 1759 * of our internal CTL* chars - which would (at a minumum) be 1760 * summarily removed later, if not escaped. 1761 * 1762 * The current definition of ISCTL() allows the compiler to 1763 * optimise away either half, or all, of the test in most of 1764 * the cases here (0xc0 | anything) cannot be between 0x80 and 0x9f 1765 * for example, so there a test is not needed). 1766 * 1767 * Which tests can be removed depends upon the actual values 1768 * selected for the CTL* chars. 1769 */ 1770 #define ESC_USTPUTC(c, o) do { \ 1771 char _ch = (c); \ 1772 \ 1773 if (ISCTL(_ch)) \ 1774 USTPUTC(CTLESC, o); \ 1775 USTPUTC(_ch, o); \ 1776 } while (0) 1777 1778 VTRACE(DBG_LEXER, ("CSTR(\\%c%8.8x)", n==4?'u':'U', v)); 1779 if (v <= 0x7ff) { 1780 ESC_USTPUTC(0xc0 | v >> 6, out); 1781 ESC_USTPUTC(0x80 | (v & 0x3f), out); 1782 return out; 1783 } else if (v <= 0xffff) { 1784 ESC_USTPUTC(0xe0 | v >> 12, out); 1785 ESC_USTPUTC(0x80 | ((v >> 6) & 0x3f), out); 1786 ESC_USTPUTC(0x80 | (v & 0x3f), out); 1787 return out; 1788 } else if (v <= 0x10ffff) { 1789 ESC_USTPUTC(0xf0 | v >> 18, out); 1790 ESC_USTPUTC(0x80 | ((v >> 12) & 0x3f), out); 1791 ESC_USTPUTC(0x80 | ((v >> 6) & 0x3f), out); 1792 ESC_USTPUTC(0x80 | (v & 0x3f), out); 1793 return out; 1794 1795 /* these next two are not very likely, but we may as well be complete */ 1796 } else if (v <= 0x3FFFFFF) { 1797 ESC_USTPUTC(0xf8 | v >> 24, out); 1798 ESC_USTPUTC(0x80 | ((v >> 18) & 0x3f), out); 1799 ESC_USTPUTC(0x80 | ((v >> 12) & 0x3f), out); 1800 ESC_USTPUTC(0x80 | ((v >> 6) & 0x3f), out); 1801 ESC_USTPUTC(0x80 | (v & 0x3f), out); 1802 return out; 1803 } else if (v <= 0x7FFFFFFF) { 1804 ESC_USTPUTC(0xfC | v >> 30, out); 1805 ESC_USTPUTC(0x80 | ((v >> 24) & 0x3f), out); 1806 ESC_USTPUTC(0x80 | ((v >> 18) & 0x3f), out); 1807 ESC_USTPUTC(0x80 | ((v >> 12) & 0x3f), out); 1808 ESC_USTPUTC(0x80 | ((v >> 6) & 0x3f), out); 1809 ESC_USTPUTC(0x80 | (v & 0x3f), out); 1810 return out; 1811 } 1812 if (v > 127) 1813 v = '?'; 1814 } 1815 break; 1816 default: 1817 synerror("Unknown $'' escape sequence"); 1818 } 1819 vc = (char)v; 1820 VTRACE(DBG_LEXER, ("->%u(%#x)['%c']", v, v, vc&0xFF)); 1821 1822 /* 1823 * If we managed to create a \n from a \ sequence (no matter how) 1824 * then we replace it with the magic CRTCNL control char, which 1825 * will turn into a \n again later, but in the meantime, never 1826 * causes LINENO increments. 1827 */ 1828 if (vc == '\n') { 1829 VTRACE(DBG_LEXER, ("CTLCNL.")); 1830 USTPUTC(CTLCNL, out); 1831 return out; 1832 } 1833 1834 /* 1835 * We can't handle NUL bytes. 1836 * POSIX says we should skip till the closing quote. 1837 */ 1838 if (vc == '\0') { 1839 CTRACE(DBG_LEXER, ("\\0: skip to '", v, v, vc&0xFF)); 1840 while ((c = pgetc()) != '\'') { 1841 if (c == '\\') 1842 c = pgetc(); 1843 if (c == PEOF) 1844 synerror("Unterminated quoted string ($'...)"); 1845 if (c == '\n') { 1846 plinno++; 1847 if (doprompt) 1848 setprompt(2); 1849 else 1850 setprompt(0); 1851 } 1852 } 1853 pungetc(); 1854 return out; 1855 } 1856 CVTRACE(DBG_LEXER, NEEDESC(vc), ("CTLESC-")); 1857 VTRACE(DBG_LEXER, ("'%c'(%#.2x)", vc&0xFF, vc&0x1FF)); 1858 if (NEEDESC(vc)) 1859 USTPUTC(CTLESC, out); 1860 USTPUTC(vc, out); 1861 return out; 1862 } 1863 1864 /* 1865 * The lowest level basic tokenizer. 1866 * 1867 * The next input byte (character) is in firstc, syn says which 1868 * syntax tables we are to use (basic, single or double quoted, or arith) 1869 * and magicq (used with sqsyntax and dqsyntax only) indicates that the 1870 * quote character itself is not special (used parsing here docs and similar) 1871 * 1872 * The result is the type of the next token (its value, when there is one, 1873 * is saved in the relevant global var - must fix that someday!) which is 1874 * also saved for re-reading ("lasttoken"). 1875 * 1876 * Overall, this routine does far more parsing than it is supposed to. 1877 * That will also need fixing, someday... 1878 */ 1879 STATIC int 1880 readtoken1(int firstc, char const *syn, int oneword) 1881 { 1882 int c; 1883 char * out; 1884 int len; 1885 struct nodelist *bqlist; 1886 int quotef; 1887 VSS static_stack; 1888 VSS *stack = &static_stack; 1889 1890 stack->prev = NULL; 1891 stack->cur = 0; 1892 1893 syntax = syn; 1894 1895 #ifdef DEBUG 1896 #define SYNTAX ( syntax == BASESYNTAX ? "BASE" : \ 1897 syntax == DQSYNTAX ? "DQ" : \ 1898 syntax == SQSYNTAX ? "SQ" : \ 1899 syntax == ARISYNTAX ? "ARI" : \ 1900 "???" ) 1901 #endif 1902 1903 startlinno = plinno; 1904 varnest = 0; 1905 quoted = 0; 1906 if (syntax == DQSYNTAX) 1907 SETDBLQUOTE(); 1908 quotef = 0; 1909 bqlist = NULL; 1910 arinest = 0; 1911 parenlevel = 0; 1912 elided_nl = 0; 1913 magicq = oneword; 1914 1915 CTRACE(DBG_LEXER, ("readtoken1(%c) syntax=%s %s%s(quoted=%x)\n", 1916 firstc&0xFF, SYNTAX, magicq ? "magic quotes" : "", 1917 ISDBLQUOTE()?" ISDBLQUOTE":"", quoted)); 1918 1919 STARTSTACKSTR(out); 1920 1921 for (c = firstc ;; c = pgetc_macro()) { /* until of token */ 1922 if (syntax == ARISYNTAX) 1923 out = insert_elided_nl(out); 1924 CHECKSTRSPACE(6, out); /* permit 6 calls to USTPUTC */ 1925 switch (syntax[c]) { 1926 case CFAKE: 1927 VTRACE(DBG_LEXER, ("CFAKE")); 1928 if (syntax == BASESYNTAX && varnest == 0) 1929 break; 1930 VTRACE(DBG_LEXER, (",")); 1931 continue; 1932 case CNL: /* '\n' */ 1933 VTRACE(DBG_LEXER, ("CNL")); 1934 if (syntax == BASESYNTAX && varnest == 0) 1935 break; /* exit loop */ 1936 USTPUTC(c, out); 1937 plinno++; 1938 VTRACE(DBG_LEXER, ("@%d,", plinno)); 1939 if (doprompt) 1940 setprompt(2); 1941 else 1942 setprompt(0); 1943 continue; 1944 1945 case CSBACK: /* single quoted backslash */ 1946 if ((quoted & QF) == CQ) { 1947 out = readcstyleesc(out); 1948 continue; 1949 } 1950 VTRACE(DBG_LEXER, ("ESC:")); 1951 USTPUTC(CTLESC, out); 1952 /* FALLTHROUGH */ 1953 case CWORD: 1954 VTRACE(DBG_LEXER, ("'%c'", c)); 1955 USTPUTC(c, out); 1956 continue; 1957 1958 case CCTL: 1959 CVTRACE(DBG_LEXER, !magicq || ISDBLQUOTE(), 1960 ("%s%sESC:",!magicq?"!m":"",ISDBLQUOTE()?"DQ":"")); 1961 if (!magicq || ISDBLQUOTE()) 1962 USTPUTC(CTLESC, out); 1963 VTRACE(DBG_LEXER, ("'%c'", c)); 1964 USTPUTC(c, out); 1965 continue; 1966 case CBACK: /* backslash */ 1967 c = pgetc(); 1968 VTRACE(DBG_LEXER, ("\\'%c'(%#.2x)", c&0xFF, c&0x1FF)); 1969 if (c == PEOF) { 1970 VTRACE(DBG_LEXER, ("EOF, keep \\ ")); 1971 USTPUTC('\\', out); 1972 pungetc(); 1973 continue; 1974 } 1975 if (c == '\n') { 1976 plinno++; 1977 elided_nl++; 1978 VTRACE(DBG_LEXER, ("eli \\n (%d) @%d ", 1979 elided_nl, plinno)); 1980 if (doprompt) 1981 setprompt(2); 1982 else 1983 setprompt(0); 1984 continue; 1985 } 1986 CVTRACE(DBG_LEXER, quotef==0, (" QF=1 ")); 1987 quotef = 1; /* current token is quoted */ 1988 if (quoted && c != '\\' && c != '`' && 1989 (c != '}' || varnest == 0) && 1990 c != '$' && (c != '"' || magicq)) { 1991 /* 1992 * retain the \ (which we *know* needs CTLESC) 1993 * when in "..." and the following char is 1994 * not one of the magic few.) 1995 * Otherwise the \ has done its work, and 1996 * is dropped. 1997 */ 1998 VTRACE(DBG_LEXER, ("ESC:'\\'")); 1999 USTPUTC(CTLESC, out); 2000 USTPUTC('\\', out); 2001 } 2002 CVTRACE(DBG_LEXER, NEEDESC(c) || !magicq, 2003 ("%sESC:", NEEDESC(c) ? "+" : "m")); 2004 VTRACE(DBG_LEXER, ("'%c'(%#.2x)", c&0xFF, c&0x1FF)); 2005 if (NEEDESC(c)) 2006 USTPUTC(CTLESC, out); 2007 else if (!magicq) { 2008 USTPUTC(CTLESC, out); 2009 USTPUTC(c, out); 2010 continue; 2011 } 2012 USTPUTC(c, out); 2013 continue; 2014 case CSQUOTE: 2015 if (syntax != SQSYNTAX) { 2016 CVTRACE(DBG_LEXER, !magicq, (" CQM ")); 2017 if (!magicq) 2018 USTPUTC(CTLQUOTEMARK, out); 2019 CVTRACE(DBG_LEXER, quotef==0, (" QF=1 ")); 2020 quotef = 1; 2021 TS_PUSH(); 2022 syntax = SQSYNTAX; 2023 quoted = SQ; 2024 VTRACE(DBG_LEXER, (" TS_PUSH(SQ)")); 2025 continue; 2026 } 2027 if (magicq && arinest == 0 && varnest == 0) { 2028 /* Ignore inside quoted here document */ 2029 VTRACE(DBG_LEXER, ("<<'>>")); 2030 USTPUTC(c, out); 2031 continue; 2032 } 2033 /* End of single quotes... */ 2034 TS_POP(); 2035 VTRACE(DBG_LEXER, ("SQ TS_POP->%s ", SYNTAX)); 2036 CVTRACE(DBG_LEXER, syntax == BASESYNTAX, (" CQE ")); 2037 if (syntax == BASESYNTAX) 2038 USTPUTC(CTLQUOTEEND, out); 2039 continue; 2040 case CDQUOTE: 2041 if (magicq && arinest == 0 /* && varnest == 0 */) { 2042 VTRACE(DBG_LEXER, ("<<\">>")); 2043 /* Ignore inside here document */ 2044 USTPUTC(c, out); 2045 continue; 2046 } 2047 CVTRACE(DBG_LEXER, quotef==0, (" QF=1 ")); 2048 quotef = 1; 2049 if (arinest) { 2050 if (ISDBLQUOTE()) { 2051 VTRACE(DBG_LEXER, 2052 (" CQE ari(%d", arinest)); 2053 USTPUTC(CTLQUOTEEND, out); 2054 TS_POP(); 2055 VTRACE(DBG_LEXER, ("%d)TS_POP->%s ", 2056 arinest, SYNTAX)); 2057 } else { 2058 VTRACE(DBG_LEXER, 2059 (" ari(%d) %s TS_PUSH->DQ CQM ", 2060 arinest, SYNTAX)); 2061 TS_PUSH(); 2062 syntax = DQSYNTAX; 2063 SETDBLQUOTE(); 2064 USTPUTC(CTLQUOTEMARK, out); 2065 } 2066 continue; 2067 } 2068 CVTRACE(DBG_LEXER, magicq, (" MQignDQ ")); 2069 if (magicq) 2070 continue; 2071 if (ISDBLQUOTE()) { 2072 TS_POP(); 2073 VTRACE(DBG_LEXER, 2074 (" DQ TS_POP->%s CQE ", SYNTAX)); 2075 USTPUTC(CTLQUOTEEND, out); 2076 } else { 2077 VTRACE(DBG_LEXER, 2078 (" %s TS_POP->DQ CQM ", SYNTAX)); 2079 TS_PUSH(); 2080 syntax = DQSYNTAX; 2081 SETDBLQUOTE(); 2082 USTPUTC(CTLQUOTEMARK, out); 2083 } 2084 continue; 2085 case CVAR: /* '$' */ 2086 VTRACE(DBG_LEXER, ("'$'...")); 2087 out = insert_elided_nl(out); 2088 PARSESUB(); /* parse substitution */ 2089 continue; 2090 case CENDVAR: /* CLOSEBRACE */ 2091 if (varnest > 0 && !ISDBLQUOTE()) { 2092 VTRACE(DBG_LEXER, ("vn=%d !DQ", varnest)); 2093 TS_POP(); 2094 VTRACE(DBG_LEXER, (" TS_POP->%s CEV ", SYNTAX)); 2095 USTPUTC(CTLENDVAR, out); 2096 } else { 2097 VTRACE(DBG_LEXER, ("'%c'", c)); 2098 USTPUTC(c, out); 2099 } 2100 out = insert_elided_nl(out); 2101 continue; 2102 case CLP: /* '(' in arithmetic */ 2103 parenlevel++; 2104 VTRACE(DBG_LEXER, ("'('(%d)", parenlevel)); 2105 USTPUTC(c, out); 2106 continue; 2107 case CRP: /* ')' in arithmetic */ 2108 if (parenlevel > 0) { 2109 USTPUTC(c, out); 2110 --parenlevel; 2111 VTRACE(DBG_LEXER, ("')'(%d)", parenlevel)); 2112 } else { 2113 VTRACE(DBG_LEXER, ("')'(%d)", parenlevel)); 2114 if (pgetc_linecont() == /*(*/ ')') { 2115 out = insert_elided_nl(out); 2116 if (--arinest == 0) { 2117 TS_POP(); 2118 USTPUTC(CTLENDARI, out); 2119 } else 2120 USTPUTC(/*(*/ ')', out); 2121 } else { 2122 break; /* to synerror() just below */ 2123 #if 0 /* the old way, causes weird errors on bad input */ 2124 /* 2125 * unbalanced parens 2126 * (don't 2nd guess - no error) 2127 */ 2128 pungetc(); 2129 USTPUTC(/*(*/ ')', out); 2130 #endif 2131 } 2132 } 2133 continue; 2134 case CBQUOTE: /* '`' */ 2135 VTRACE(DBG_LEXER, ("'`' -> parsebackq()\n")); 2136 out = parsebackq(stack, out, &bqlist, 1); 2137 VTRACE(DBG_LEXER, ("parsebackq() -> readtoken1: ")); 2138 continue; 2139 case CEOF: /* --> c == PEOF */ 2140 VTRACE(DBG_LEXER, ("EOF ")); 2141 break; /* will exit loop */ 2142 default: 2143 VTRACE(DBG_LEXER, ("['%c'(%#.2x)]", c&0xFF, c&0x1FF)); 2144 if (varnest == 0 && !ISDBLQUOTE()) 2145 break; /* exit loop */ 2146 USTPUTC(c, out); 2147 VTRACE(DBG_LEXER, (",")); 2148 continue; 2149 } 2150 VTRACE(DBG_LEXER, (" END TOKEN\n", c&0xFF, c&0x1FF)); 2151 break; /* break from switch -> break from for loop too */ 2152 } 2153 2154 if (syntax == ARISYNTAX) { 2155 cleanup_state_stack(stack); 2156 synerror(/*((*/ "Missing '))'"); 2157 } 2158 if (syntax != BASESYNTAX && /* ! parsebackquote && */ !magicq) { 2159 cleanup_state_stack(stack); 2160 synerror("Unterminated quoted string"); 2161 } 2162 if (varnest != 0) { 2163 cleanup_state_stack(stack); 2164 startlinno = plinno; 2165 /* { */ 2166 synerror("Missing '}'"); 2167 } 2168 2169 STPUTC('\0', out); 2170 len = out - stackblock(); 2171 out = stackblock(); 2172 2173 if (!magicq) { 2174 if ((c == '<' || c == '>') 2175 && quotef == 0 && (*out == '\0' || is_number(out))) { 2176 parseredir(out, c); 2177 cleanup_state_stack(stack); 2178 return lasttoken = TREDIR; 2179 } else { 2180 pungetc(); 2181 } 2182 } 2183 2184 VTRACE(DBG_PARSE|DBG_LEXER, 2185 ("readtoken1 %sword \"%s\", completed%s (%d) left %d enl\n", 2186 (quotef ? "quoted " : ""), out, (bqlist ? " with cmdsubs" : ""), 2187 len, elided_nl)); 2188 2189 quoteflag = quotef; 2190 backquotelist = bqlist; 2191 grabstackblock(len); 2192 wordtext = out; 2193 cleanup_state_stack(stack); 2194 return lasttoken = TWORD; 2195 /* end of readtoken routine */ 2196 2197 2198 /* 2199 * Parse a substitution. At this point, we have read the dollar sign 2200 * and nothing else. 2201 */ 2202 2203 parsesub: { 2204 int subtype; 2205 int typeloc; 2206 int flags; 2207 char *p; 2208 static const char types[] = "}-+?="; 2209 2210 c = pgetc_linecont(); 2211 VTRACE(DBG_LEXER, ("\"$%c\"(%#.2x)", c&0xFF, c&0x1FF)); 2212 if (c == '(' /*)*/) { /* $(command) or $((arith)) */ 2213 if (pgetc_linecont() == '(' /*')'*/ ) { 2214 VTRACE(DBG_LEXER, ("\"$((\" ARITH ")); 2215 out = insert_elided_nl(out); 2216 PARSEARITH(); 2217 } else { 2218 VTRACE(DBG_LEXER, ("\"$(\" CSUB->parsebackq()\n")); 2219 out = insert_elided_nl(out); 2220 pungetc(); 2221 out = parsebackq(stack, out, &bqlist, 0); 2222 VTRACE(DBG_LEXER, ("parseback()->readtoken1(): ")); 2223 } 2224 } else if (c == OPENBRACE || is_name(c) || is_special(c)) { 2225 VTRACE(DBG_LEXER, (" $EXP:CTLVAR ")); 2226 USTPUTC(CTLVAR, out); 2227 typeloc = out - stackblock(); 2228 USTPUTC(VSNORMAL, out); 2229 subtype = VSNORMAL; 2230 flags = 0; 2231 if (c == OPENBRACE) { 2232 c = pgetc_linecont(); 2233 if (c == '#') { 2234 if ((c = pgetc_linecont()) == CLOSEBRACE) 2235 c = '#'; 2236 else if (is_name(c) || isdigit(c)) 2237 subtype = VSLENGTH; 2238 else if (is_special(c)) { 2239 /* 2240 * ${#} is $# - the number of sh params 2241 * ${##} is the length of ${#} 2242 * ${###} is ${#} with as much nothing 2243 * as possible removed from start 2244 * ${##1} is ${#} with leading 1 gone 2245 * ${##\#} is ${#} with leading # gone 2246 * 2247 * this stuff is UGLY! 2248 */ 2249 if (pgetc_linecont() == CLOSEBRACE) { 2250 pungetc(); 2251 subtype = VSLENGTH; 2252 } else { 2253 static char cbuf[2]; 2254 2255 pungetc(); /* would like 2 */ 2256 cbuf[0] = c; /* so ... */ 2257 cbuf[1] = '\0'; 2258 pushstring(cbuf, 1, NULL); 2259 c = '#'; /* ${#:...} */ 2260 subtype = 0; /* .. or similar */ 2261 } 2262 } else { 2263 pungetc(); 2264 c = '#'; 2265 subtype = 0; 2266 } 2267 } 2268 else 2269 subtype = 0; 2270 VTRACE(DBG_LEXER, ("${ st=%d ", subtype)); 2271 } 2272 if (is_name(c)) { 2273 p = out; 2274 do { 2275 VTRACE(DBG_LEXER, ("%c", c)); 2276 STPUTC(c, out); 2277 c = pgetc_linecont(); 2278 } while (is_in_name(c)); 2279 2280 #if 0 2281 if (out - p == 6 && strncmp(p, "LINENO", 6) == 0) { 2282 int i; 2283 int linno; 2284 char buf[10]; 2285 2286 /* 2287 * The "LINENO hack" 2288 * 2289 * Replace the variable name with the 2290 * current line number. 2291 */ 2292 linno = plinno; 2293 if (funclinno != 0) 2294 linno -= funclinno - 1; 2295 snprintf(buf, sizeof(buf), "%d", linno); 2296 STADJUST(-6, out); 2297 for (i = 0; buf[i] != '\0'; i++) 2298 STPUTC(buf[i], out); 2299 flags |= VSLINENO; 2300 } 2301 #endif 2302 } else if (is_digit(c)) { 2303 do { 2304 VTRACE(DBG_LEXER, ("%c", c)); 2305 STPUTC(c, out); 2306 c = pgetc_linecont(); 2307 } while (subtype != VSNORMAL && is_digit(c)); 2308 } 2309 else if (is_special(c)) { 2310 VTRACE(DBG_LEXER, ("\"$%c", c)); 2311 USTPUTC(c, out); 2312 c = pgetc_linecont(); 2313 } 2314 else { 2315 VTRACE(DBG_LEXER, ("\"$%c(%#.2x)??\n", c&0xFF,c&0x1FF)); 2316 badsub: 2317 cleanup_state_stack(stack); 2318 synerror("Bad substitution"); 2319 } 2320 2321 STPUTC('=', out); 2322 if (subtype == 0) { 2323 switch (c) { 2324 case ':': 2325 flags |= VSNUL; 2326 c = pgetc_linecont(); 2327 /*FALLTHROUGH*/ 2328 default: 2329 p = strchr(types, c); 2330 if (p == NULL) 2331 goto badsub; 2332 subtype = p - types + VSNORMAL; 2333 break; 2334 case '%': 2335 case '#': 2336 { 2337 int cc = c; 2338 subtype = c == '#' ? VSTRIMLEFT : 2339 VSTRIMRIGHT; 2340 c = pgetc_linecont(); 2341 if (c == cc) 2342 subtype++; 2343 else 2344 pungetc(); 2345 break; 2346 } 2347 } 2348 } else { 2349 if (subtype == VSLENGTH && c != /*{*/ '}') 2350 synerror("no modifiers allowed with ${#var}"); 2351 pungetc(); 2352 } 2353 if (quoted || arinest) 2354 flags |= VSQUOTE; 2355 if (subtype >= VSTRIMLEFT && subtype <= VSTRIMRIGHTMAX) 2356 flags |= VSPATQ; 2357 VTRACE(DBG_LEXER, (" st%d:%x", subtype, flags)); 2358 *(stackblock() + typeloc) = subtype | flags; 2359 if (subtype != VSNORMAL) { 2360 TS_PUSH(); 2361 varnest++; 2362 arinest = 0; 2363 if (subtype > VSASSIGN) { /* # ## % %% */ 2364 syntax = BASESYNTAX; 2365 quoted = 0; 2366 magicq = 0; 2367 } 2368 VTRACE(DBG_LEXER, (" TS_PUSH->%s vn=%d%s ", 2369 SYNTAX, varnest, quoted ? " Q" : "")); 2370 } 2371 } else if (c == '\'' && syntax == BASESYNTAX) { 2372 USTPUTC(CTLQUOTEMARK, out); 2373 VTRACE(DBG_LEXER, (" CSTR \"$'\" CQM ")); 2374 CVTRACE(DBG_LEXER, quotef==0, ("QF=1 ")); 2375 quotef = 1; 2376 TS_PUSH(); 2377 syntax = SQSYNTAX; 2378 quoted = CQ; 2379 VTRACE(DBG_LEXER, ("%s->TS_PUSH()->SQ ", SYNTAX)); 2380 } else { 2381 VTRACE(DBG_LEXER, ("$unk -> '$' (pushback '%c'%#.2x)", 2382 c & 0xFF, c & 0x1FF)); 2383 USTPUTC('$', out); 2384 pungetc(); 2385 } 2386 goto parsesub_return; 2387 } 2388 2389 2390 /* 2391 * Parse an arithmetic expansion (indicate start of one and set state) 2392 */ 2393 parsearith: { 2394 2395 #if 0 2396 if (syntax == ARISYNTAX) { 2397 /* 2398 * we collapse embedded arithmetic expansion to 2399 * parentheses, which should be equivalent 2400 * 2401 * XXX It isn't, must fix, soonish... 2402 */ 2403 USTPUTC('(' /*)*/, out); 2404 USTPUTC('(' /*)*/, out); 2405 /* 2406 * Need 2 of them because there will (should be) 2407 * two closing ))'s to follow later. 2408 */ 2409 parenlevel += 2; 2410 } else 2411 #endif 2412 { 2413 VTRACE(DBG_LEXER, (" CTLARI%c ", ISDBLQUOTE()?'"':'_')); 2414 USTPUTC(CTLARI, out); 2415 if (ISDBLQUOTE()) 2416 USTPUTC('"',out); 2417 else 2418 USTPUTC(' ',out); 2419 2420 VTRACE(DBG_LEXER, ("%s->TS_PUSH->ARI(1)", SYNTAX)); 2421 TS_PUSH(); 2422 syntax = ARISYNTAX; 2423 arinest = 1; 2424 varnest = 0; 2425 magicq = 1; 2426 } 2427 goto parsearith_return; 2428 } 2429 2430 } /* end of readtoken */ 2431 2432 2433 2434 2435 #ifdef mkinit 2436 INCLUDE "parser.h" 2437 2438 RESET { 2439 psp.v_current_parser = &parse_state; 2440 2441 parse_state.ps_tokpushback = 0; 2442 parse_state.ps_checkkwd = 0; 2443 parse_state.ps_heredoclist = NULL; 2444 } 2445 #endif 2446 2447 /* 2448 * Returns true if the text contains nothing to expand (no dollar signs 2449 * or backquotes). 2450 */ 2451 2452 STATIC int 2453 noexpand(char *text) 2454 { 2455 char *p; 2456 char c; 2457 2458 p = text; 2459 while ((c = *p++) != '\0') { 2460 if (c == CTLQUOTEMARK || c == CTLQUOTEEND) 2461 continue; 2462 if (c == CTLESC) 2463 p++; 2464 else if (ISCTL(c)) 2465 return 0; 2466 } 2467 return 1; 2468 } 2469 2470 2471 /* 2472 * Return true if the argument is a legal variable name (a letter or 2473 * underscore followed by zero or more letters, underscores, and digits). 2474 */ 2475 2476 int 2477 goodname(const char *name) 2478 { 2479 const char *p; 2480 2481 p = name; 2482 if (! is_name(*p)) 2483 return 0; 2484 while (*++p) { 2485 if (! is_in_name(*p)) 2486 return 0; 2487 } 2488 return 1; 2489 } 2490 2491 int 2492 isassignment(const char *p) 2493 { 2494 if (!is_name(*p)) 2495 return 0; 2496 while (*++p != '=') 2497 if (*p == '\0' || !is_in_name(*p)) 2498 return 0; 2499 return 1; 2500 } 2501 2502 /* 2503 * skip past any \n's, and leave lasttoken set to whatever follows 2504 */ 2505 STATIC void 2506 linebreak(void) 2507 { 2508 while (readtoken() == TNL) 2509 ; 2510 } 2511 2512 /* 2513 * The next token must be "token" -- check, then move past it 2514 */ 2515 STATIC void 2516 consumetoken(int token) 2517 { 2518 if (readtoken() != token) { 2519 VTRACE(DBG_PARSE, ("consumetoken(%d): expecting %s got %s", 2520 token, tokname[token], tokname[lasttoken])); 2521 CVTRACE(DBG_PARSE, (lasttoken==TWORD), (" \"%s\"", wordtext)); 2522 VTRACE(DBG_PARSE, ("\n")); 2523 synexpect(token, NULL); 2524 } 2525 } 2526 2527 /* 2528 * Called when an unexpected token is read during the parse. The argument 2529 * is the token that is expected, or -1 if more than one type of token can 2530 * occur at this point. 2531 */ 2532 2533 STATIC void 2534 synexpect(int token, const char *text) 2535 { 2536 char msg[64]; 2537 char *p; 2538 2539 if (lasttoken == TWORD) { 2540 size_t len = strlen(wordtext); 2541 2542 if (len <= 13) 2543 fmtstr(msg, 34, "Word \"%.13s\" unexpected", wordtext); 2544 else 2545 fmtstr(msg, 34, 2546 "Word \"%.10s...\" unexpected", wordtext); 2547 } else 2548 fmtstr(msg, 34, "%s unexpected", tokname[lasttoken]); 2549 2550 p = strchr(msg, '\0'); 2551 if (text) 2552 fmtstr(p, 30, " (expecting \"%.10s\")", text); 2553 else if (token >= 0) 2554 fmtstr(p, 30, " (expecting %s)", tokname[token]); 2555 2556 synerror(msg); 2557 /* NOTREACHED */ 2558 } 2559 2560 2561 STATIC void 2562 synerror(const char *msg) 2563 { 2564 error("%d: Syntax error: %s", startlinno, msg); 2565 /* NOTREACHED */ 2566 } 2567 2568 STATIC void 2569 setprompt(int which) 2570 { 2571 whichprompt = which; 2572 2573 #ifndef SMALL 2574 if (!el) 2575 #endif 2576 out2str(getprompt(NULL)); 2577 } 2578 2579 /* 2580 * handle getting the next character, while ignoring \ \n 2581 * (which is a little tricky as we only have one char of pushback 2582 * and we need that one elsewhere). 2583 */ 2584 STATIC int 2585 pgetc_linecont(void) 2586 { 2587 int c; 2588 2589 while ((c = pgetc()) == '\\') { 2590 c = pgetc(); 2591 if (c == '\n') { 2592 plinno++; 2593 elided_nl++; 2594 VTRACE(DBG_LEXER, ("\"\\n\"drop(el=%d@%d)", 2595 elided_nl, plinno)); 2596 if (doprompt) 2597 setprompt(2); 2598 else 2599 setprompt(0); 2600 } else { 2601 pungetc(); 2602 /* Allow the backslash to be pushed back. */ 2603 pushstring("\\", 1, NULL); 2604 return (pgetc()); 2605 } 2606 } 2607 return (c); 2608 } 2609 2610 /* 2611 * called by editline -- any expansions to the prompt 2612 * should be added here. 2613 */ 2614 const char * 2615 getprompt(void *unused) 2616 { 2617 char *p; 2618 const char *cp; 2619 int wp; 2620 2621 if (!doprompt) 2622 return ""; 2623 2624 VTRACE(DBG_PARSE|DBG_EXPAND, ("getprompt %d\n", whichprompt)); 2625 2626 switch (wp = whichprompt) { 2627 case 0: 2628 return ""; 2629 case 1: 2630 p = ps1val(); 2631 break; 2632 case 2: 2633 p = ps2val(); 2634 break; 2635 default: 2636 return "<internal prompt error>"; 2637 } 2638 if (p == NULL) 2639 return ""; 2640 2641 VTRACE(DBG_PARSE|DBG_EXPAND, ("prompt <<%s>>\n", p)); 2642 2643 cp = expandstr(p, plinno); 2644 whichprompt = wp; /* history depends on it not changing */ 2645 2646 VTRACE(DBG_PARSE|DBG_EXPAND, ("prompt -> <<%s>>\n", cp)); 2647 2648 return cp; 2649 } 2650 2651 /* 2652 * Expand a string ... used for expanding prompts (PS1...) 2653 * 2654 * Never return NULL, always some string (return input string if invalid) 2655 * 2656 * The internal routine does the work, leaving the result on the 2657 * stack (or in a static string, or even the input string) and 2658 * handles parser recursion, and cleanup after an error while parsing. 2659 * 2660 * The visible interface copies the result off the stack (if it is there), 2661 * and handles stack management, leaving the stack in the exact same 2662 * state it was when expandstr() was called (so it can be used part way 2663 * through building a stack data structure - as in when PS2 is being 2664 * expanded half way through reading a "command line") 2665 * 2666 * on error, expandonstack() cleans up the parser state, but then 2667 * simply jumps out through expandstr() withut doing any stack cleanup, 2668 * which is OK, as the error handler must deal with that anyway. 2669 * 2670 * The split into two funcs is to avoid problems with setjmp/longjmp 2671 * and local variables which could otherwise be optimised into bizarre 2672 * behaviour. 2673 */ 2674 static const char * 2675 expandonstack(char *ps, int cmdsub, int lineno) 2676 { 2677 union node n; 2678 struct jmploc jmploc; 2679 struct jmploc *const savehandler = handler; 2680 struct parsefile *const savetopfile = getcurrentfile(); 2681 const int save_x = xflag; 2682 const int save_e_s = errors_suppressed; 2683 struct parse_state new_state = init_parse_state; 2684 struct parse_state *const saveparser = psp.v_current_parser; 2685 const char *result = NULL; 2686 2687 if (!setjmp(jmploc.loc)) { 2688 handler = &jmploc; 2689 errors_suppressed = 1; 2690 2691 psp.v_current_parser = &new_state; 2692 setinputstring(ps, 1, lineno); 2693 2694 readtoken1(pgetc(), DQSYNTAX, 1); 2695 if (backquotelist != NULL) { 2696 if (!cmdsub) 2697 result = ps; 2698 else if (!promptcmds) 2699 result = "-o promptcmds not set: "; 2700 } 2701 if (result == NULL) { 2702 n.narg.type = NARG; 2703 n.narg.next = NULL; 2704 n.narg.text = wordtext; 2705 n.narg.lineno = lineno; 2706 n.narg.backquote = backquotelist; 2707 2708 xflag = 0; /* we might be expanding PS4 ... */ 2709 expandarg(&n, NULL, 0); 2710 result = stackblock(); 2711 } 2712 } else { 2713 psp.v_current_parser = saveparser; 2714 xflag = save_x; 2715 popfilesupto(savetopfile); 2716 handler = savehandler; 2717 errors_suppressed = save_e_s; 2718 2719 if (exception == EXEXIT) 2720 longjmp(handler->loc, 1); 2721 if (exception == EXINT) 2722 exraise(SIGINT); 2723 return ""; 2724 } 2725 psp.v_current_parser = saveparser; 2726 xflag = save_x; 2727 popfilesupto(savetopfile); 2728 handler = savehandler; 2729 errors_suppressed = save_e_s; 2730 2731 if (result == NULL) 2732 result = ps; 2733 2734 return result; 2735 } 2736 2737 const char * 2738 expandstr(char *ps, int lineno) 2739 { 2740 const char *result = NULL; 2741 struct stackmark smark; 2742 static char *buffer = NULL; /* storage for prompt, never freed */ 2743 static size_t bufferlen = 0; 2744 2745 setstackmark(&smark); 2746 /* 2747 * At this point we anticipate that there may be a string 2748 * growing on the stack, but we have no idea how big it is. 2749 * However we know that it cannot be bigger than the current 2750 * allocated stack block, so simply reserve the whole thing, 2751 * then we can use the stack without barfing all over what 2752 * is there already... (the stack mark undoes this later.) 2753 */ 2754 (void) stalloc(stackblocksize()); 2755 2756 result = expandonstack(ps, 1, lineno); 2757 2758 if (__predict_true(result == stackblock())) { 2759 size_t len = strlen(result) + 1; 2760 2761 /* 2762 * the result (usual case) is on the stack, which we 2763 * are just about to discard (popstackmark()) so we 2764 * need to move it somewhere safe first. 2765 */ 2766 2767 if (__predict_false(len > bufferlen)) { 2768 char *new; 2769 size_t newlen = bufferlen; 2770 2771 if (__predict_false(len > (SIZE_MAX >> 4))) { 2772 result = "huge prompt: "; 2773 goto getout; 2774 } 2775 2776 if (newlen == 0) 2777 newlen = 32; 2778 while (newlen <= len) 2779 newlen <<= 1; 2780 2781 new = (char *)realloc(buffer, newlen); 2782 2783 if (__predict_false(new == NULL)) { 2784 /* 2785 * this should rarely (if ever) happen 2786 * but we must do something when it does... 2787 */ 2788 result = "No mem for prompt: "; 2789 goto getout; 2790 } else { 2791 buffer = new; 2792 bufferlen = newlen; 2793 } 2794 } 2795 (void)memcpy(buffer, result, len); 2796 result = buffer; 2797 } 2798 2799 getout:; 2800 popstackmark(&smark); 2801 2802 return result; 2803 } 2804 2805 /* 2806 * and a simpler version, which does no $( ) expansions, for 2807 * use during shell startup when we know we are not parsing, 2808 * and so the stack is not in use - we can do what we like, 2809 * and do not need to clean up (that's handled externally). 2810 * 2811 * Simply return the result, even if it is on the stack 2812 */ 2813 const char * 2814 expandenv(char *arg) 2815 { 2816 return expandonstack(arg, 0, 0); 2817 } 2818