1 /* $NetBSD: parser.c,v 1.29 1996/05/09 19:40:08 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Kenneth Almquist. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 #ifndef lint 40 #if 0 41 static char sccsid[] = "@(#)parser.c 8.7 (Berkeley) 5/16/95"; 42 #else 43 static char rcsid[] = "$NetBSD: parser.c,v 1.29 1996/05/09 19:40:08 christos Exp $"; 44 #endif 45 #endif /* not lint */ 46 47 #include <stdlib.h> 48 49 #include "shell.h" 50 #include "parser.h" 51 #include "nodes.h" 52 #include "expand.h" /* defines rmescapes() */ 53 #include "redir.h" /* defines copyfd() */ 54 #include "syntax.h" 55 #include "options.h" 56 #include "input.h" 57 #include "output.h" 58 #include "var.h" 59 #include "error.h" 60 #include "memalloc.h" 61 #include "mystring.h" 62 #include "alias.h" 63 #include "show.h" 64 #ifndef NO_HISTORY 65 #include "myhistedit.h" 66 #endif 67 68 /* 69 * Shell command parser. 70 */ 71 72 #define EOFMARKLEN 79 73 74 /* values returned by readtoken */ 75 #include "token.def" 76 77 78 79 struct heredoc { 80 struct heredoc *next; /* next here document in list */ 81 union node *here; /* redirection node */ 82 char *eofmark; /* string indicating end of input */ 83 int striptabs; /* if set, strip leading tabs */ 84 }; 85 86 87 88 struct heredoc *heredoclist; /* list of here documents to read */ 89 int parsebackquote; /* nonzero if we are inside backquotes */ 90 int doprompt; /* if set, prompt the user */ 91 int needprompt; /* true if interactive and at start of line */ 92 int lasttoken; /* last token read */ 93 MKINIT int tokpushback; /* last token pushed back */ 94 char *wordtext; /* text of last word returned by readtoken */ 95 MKINIT int checkkwd; /* 1 == check for kwds, 2 == also eat newlines */ 96 struct nodelist *backquotelist; 97 union node *redirnode; 98 struct heredoc *heredoc; 99 int quoteflag; /* set if (part of) last token was quoted */ 100 int startlinno; /* line # where last token started */ 101 102 103 #define GDB_HACK 1 /* avoid local declarations which gdb can't handle */ 104 #ifdef GDB_HACK 105 static const char argvars[5] = {CTLVAR, VSNORMAL|VSQUOTE, '@', '=', '\0'}; 106 static const char types[] = "}-+?="; 107 #endif 108 109 110 STATIC union node *list __P((int)); 111 STATIC union node *andor __P((void)); 112 STATIC union node *pipeline __P((void)); 113 STATIC union node *command __P((void)); 114 STATIC union node *simplecmd __P((union node **, union node *)); 115 STATIC union node *makename __P((void)); 116 STATIC void parsefname __P((void)); 117 STATIC void parseheredoc __P((void)); 118 STATIC int peektoken __P((void)); 119 STATIC int readtoken __P((void)); 120 STATIC int xxreadtoken __P((void)); 121 STATIC int readtoken1 __P((int, char const *, char *, int)); 122 STATIC int noexpand __P((char *)); 123 STATIC void synexpect __P((int)); 124 STATIC void synerror __P((char *)); 125 STATIC void setprompt __P((int)); 126 127 128 /* 129 * Read and parse a command. Returns NEOF on end of file. (NULL is a 130 * valid parse tree indicating a blank line.) 131 */ 132 133 union node * 134 parsecmd(interact) 135 int interact; 136 { 137 int t; 138 139 doprompt = interact; 140 if (doprompt) 141 setprompt(1); 142 else 143 setprompt(0); 144 needprompt = 0; 145 t = readtoken(); 146 if (t == TEOF) 147 return NEOF; 148 if (t == TNL) 149 return NULL; 150 tokpushback++; 151 return list(1); 152 } 153 154 155 STATIC union node * 156 list(nlflag) 157 int nlflag; 158 { 159 union node *n1, *n2, *n3; 160 int tok; 161 162 checkkwd = 2; 163 if (nlflag == 0 && tokendlist[peektoken()]) 164 return NULL; 165 n1 = NULL; 166 for (;;) { 167 n2 = andor(); 168 tok = readtoken(); 169 if (tok == TBACKGND) { 170 if (n2->type == NCMD || n2->type == NPIPE) { 171 n2->ncmd.backgnd = 1; 172 } else if (n2->type == NREDIR) { 173 n2->type = NBACKGND; 174 } else { 175 n3 = (union node *)stalloc(sizeof (struct nredir)); 176 n3->type = NBACKGND; 177 n3->nredir.n = n2; 178 n3->nredir.redirect = NULL; 179 n2 = n3; 180 } 181 } 182 if (n1 == NULL) { 183 n1 = n2; 184 } 185 else { 186 n3 = (union node *)stalloc(sizeof (struct nbinary)); 187 n3->type = NSEMI; 188 n3->nbinary.ch1 = n1; 189 n3->nbinary.ch2 = n2; 190 n1 = n3; 191 } 192 switch (tok) { 193 case TBACKGND: 194 case TSEMI: 195 tok = readtoken(); 196 /* fall through */ 197 case TNL: 198 if (tok == TNL) { 199 parseheredoc(); 200 if (nlflag) 201 return n1; 202 } else { 203 tokpushback++; 204 } 205 checkkwd = 2; 206 if (tokendlist[peektoken()]) 207 return n1; 208 break; 209 case TEOF: 210 if (heredoclist) 211 parseheredoc(); 212 else 213 pungetc(); /* push back EOF on input */ 214 return n1; 215 default: 216 if (nlflag) 217 synexpect(-1); 218 tokpushback++; 219 return n1; 220 } 221 } 222 } 223 224 225 226 STATIC union node * 227 andor() { 228 union node *n1, *n2, *n3; 229 int t; 230 231 n1 = pipeline(); 232 for (;;) { 233 if ((t = readtoken()) == TAND) { 234 t = NAND; 235 } else if (t == TOR) { 236 t = NOR; 237 } else { 238 tokpushback++; 239 return n1; 240 } 241 n2 = pipeline(); 242 n3 = (union node *)stalloc(sizeof (struct nbinary)); 243 n3->type = t; 244 n3->nbinary.ch1 = n1; 245 n3->nbinary.ch2 = n2; 246 n1 = n3; 247 } 248 } 249 250 251 252 STATIC union node * 253 pipeline() { 254 union node *n1, *pipenode, *notnode; 255 struct nodelist *lp, *prev; 256 int negate = 0; 257 258 TRACE(("pipeline: entered\n")); 259 while (readtoken() == TNOT) { 260 TRACE(("pipeline: TNOT recognized\n")); 261 negate = !negate; 262 } 263 tokpushback++; 264 n1 = command(); 265 if (readtoken() == TPIPE) { 266 pipenode = (union node *)stalloc(sizeof (struct npipe)); 267 pipenode->type = NPIPE; 268 pipenode->npipe.backgnd = 0; 269 lp = (struct nodelist *)stalloc(sizeof (struct nodelist)); 270 pipenode->npipe.cmdlist = lp; 271 lp->n = n1; 272 do { 273 prev = lp; 274 lp = (struct nodelist *)stalloc(sizeof (struct nodelist)); 275 lp->n = command(); 276 prev->next = lp; 277 } while (readtoken() == TPIPE); 278 lp->next = NULL; 279 n1 = pipenode; 280 } 281 tokpushback++; 282 if (negate) { 283 notnode = (union node *)stalloc(sizeof (struct nnot)); 284 notnode->type = NNOT; 285 notnode->nnot.com = n1; 286 n1 = notnode; 287 } 288 return n1; 289 } 290 291 292 293 STATIC union node * 294 command() { 295 union node *n1, *n2; 296 union node *ap, **app; 297 union node *cp, **cpp; 298 union node *redir, **rpp; 299 int t; 300 301 checkkwd = 2; 302 redir = NULL; 303 n1 = NULL; 304 rpp = &redir; 305 /* Check for redirection which may precede command */ 306 while (readtoken() == TREDIR) { 307 *rpp = n2 = redirnode; 308 rpp = &n2->nfile.next; 309 parsefname(); 310 } 311 tokpushback++; 312 313 switch (readtoken()) { 314 case TIF: 315 n1 = (union node *)stalloc(sizeof (struct nif)); 316 n1->type = NIF; 317 n1->nif.test = list(0); 318 if (readtoken() != TTHEN) 319 synexpect(TTHEN); 320 n1->nif.ifpart = list(0); 321 n2 = n1; 322 while (readtoken() == TELIF) { 323 n2->nif.elsepart = (union node *)stalloc(sizeof (struct nif)); 324 n2 = n2->nif.elsepart; 325 n2->type = NIF; 326 n2->nif.test = list(0); 327 if (readtoken() != TTHEN) 328 synexpect(TTHEN); 329 n2->nif.ifpart = list(0); 330 } 331 if (lasttoken == TELSE) 332 n2->nif.elsepart = list(0); 333 else { 334 n2->nif.elsepart = NULL; 335 tokpushback++; 336 } 337 if (readtoken() != TFI) 338 synexpect(TFI); 339 checkkwd = 1; 340 break; 341 case TWHILE: 342 case TUNTIL: { 343 int got; 344 n1 = (union node *)stalloc(sizeof (struct nbinary)); 345 n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL; 346 n1->nbinary.ch1 = list(0); 347 if ((got=readtoken()) != TDO) { 348 TRACE(("expecting DO got %s %s\n", tokname[got], got == TWORD ? wordtext : "")); 349 synexpect(TDO); 350 } 351 n1->nbinary.ch2 = list(0); 352 if (readtoken() != TDONE) 353 synexpect(TDONE); 354 checkkwd = 1; 355 break; 356 } 357 case TFOR: 358 if (readtoken() != TWORD || quoteflag || ! goodname(wordtext)) 359 synerror("Bad for loop variable"); 360 n1 = (union node *)stalloc(sizeof (struct nfor)); 361 n1->type = NFOR; 362 n1->nfor.var = wordtext; 363 if (readtoken() == TWORD && ! quoteflag && equal(wordtext, "in")) { 364 app = ≈ 365 while (readtoken() == TWORD) { 366 n2 = (union node *)stalloc(sizeof (struct narg)); 367 n2->type = NARG; 368 n2->narg.text = wordtext; 369 n2->narg.backquote = backquotelist; 370 *app = n2; 371 app = &n2->narg.next; 372 } 373 *app = NULL; 374 n1->nfor.args = ap; 375 if (lasttoken != TNL && lasttoken != TSEMI) 376 synexpect(-1); 377 } else { 378 #ifndef GDB_HACK 379 static const char argvars[5] = {CTLVAR, VSNORMAL|VSQUOTE, 380 '@', '=', '\0'}; 381 #endif 382 n2 = (union node *)stalloc(sizeof (struct narg)); 383 n2->type = NARG; 384 n2->narg.text = (char *)argvars; 385 n2->narg.backquote = NULL; 386 n2->narg.next = NULL; 387 n1->nfor.args = n2; 388 /* 389 * Newline or semicolon here is optional (but note 390 * that the original Bourne shell only allowed NL). 391 */ 392 if (lasttoken != TNL && lasttoken != TSEMI) 393 tokpushback++; 394 } 395 checkkwd = 2; 396 if ((t = readtoken()) == TDO) 397 t = TDONE; 398 else if (t == TBEGIN) 399 t = TEND; 400 else 401 synexpect(-1); 402 n1->nfor.body = list(0); 403 if (readtoken() != t) 404 synexpect(t); 405 checkkwd = 1; 406 break; 407 case TCASE: 408 n1 = (union node *)stalloc(sizeof (struct ncase)); 409 n1->type = NCASE; 410 if (readtoken() != TWORD) 411 synexpect(TWORD); 412 n1->ncase.expr = n2 = (union node *)stalloc(sizeof (struct narg)); 413 n2->type = NARG; 414 n2->narg.text = wordtext; 415 n2->narg.backquote = backquotelist; 416 n2->narg.next = NULL; 417 while (readtoken() == TNL); 418 if (lasttoken != TWORD || ! equal(wordtext, "in")) 419 synerror("expecting \"in\""); 420 cpp = &n1->ncase.cases; 421 checkkwd = 2, readtoken(); 422 do { 423 *cpp = cp = (union node *)stalloc(sizeof (struct nclist)); 424 cp->type = NCLIST; 425 app = &cp->nclist.pattern; 426 for (;;) { 427 *app = ap = (union node *)stalloc(sizeof (struct narg)); 428 ap->type = NARG; 429 ap->narg.text = wordtext; 430 ap->narg.backquote = backquotelist; 431 if (checkkwd = 2, readtoken() != TPIPE) 432 break; 433 app = &ap->narg.next; 434 readtoken(); 435 } 436 ap->narg.next = NULL; 437 if (lasttoken != TRP) 438 synexpect(TRP); 439 cp->nclist.body = list(0); 440 441 checkkwd = 2; 442 if ((t = readtoken()) != TESAC) { 443 if (t != TENDCASE) 444 synexpect(TENDCASE); 445 else 446 checkkwd = 2, readtoken(); 447 } 448 cpp = &cp->nclist.next; 449 } while(lasttoken != TESAC); 450 *cpp = NULL; 451 checkkwd = 1; 452 break; 453 case TLP: 454 n1 = (union node *)stalloc(sizeof (struct nredir)); 455 n1->type = NSUBSHELL; 456 n1->nredir.n = list(0); 457 n1->nredir.redirect = NULL; 458 if (readtoken() != TRP) 459 synexpect(TRP); 460 checkkwd = 1; 461 break; 462 case TBEGIN: 463 n1 = list(0); 464 if (readtoken() != TEND) 465 synexpect(TEND); 466 checkkwd = 1; 467 break; 468 /* Handle an empty command like other simple commands. */ 469 case TSEMI: 470 /* 471 * An empty command before a ; doesn't make much sense, and 472 * should certainly be disallowed in the case of `if ;'. 473 */ 474 if (!redir) 475 synexpect(-1); 476 case TNL: 477 case TEOF: 478 case TWORD: 479 case TRP: 480 tokpushback++; 481 return simplecmd(rpp, redir); 482 default: 483 synexpect(-1); 484 } 485 486 /* Now check for redirection which may follow command */ 487 while (readtoken() == TREDIR) { 488 *rpp = n2 = redirnode; 489 rpp = &n2->nfile.next; 490 parsefname(); 491 } 492 tokpushback++; 493 *rpp = NULL; 494 if (redir) { 495 if (n1->type != NSUBSHELL) { 496 n2 = (union node *)stalloc(sizeof (struct nredir)); 497 n2->type = NREDIR; 498 n2->nredir.n = n1; 499 n1 = n2; 500 } 501 n1->nredir.redirect = redir; 502 } 503 return n1; 504 } 505 506 507 STATIC union node * 508 simplecmd(rpp, redir) 509 union node **rpp, *redir; 510 { 511 union node *args, **app; 512 union node **orig_rpp = rpp; 513 union node *n; 514 515 /* If we don't have any redirections already, then we must reset */ 516 /* rpp to be the address of the local redir variable. */ 517 if (redir == 0) 518 rpp = &redir; 519 520 args = NULL; 521 app = &args; 522 /* 523 * We save the incoming value, because we need this for shell 524 * functions. There can not be a redirect or an argument between 525 * the function name and the open parenthesis. 526 */ 527 orig_rpp = rpp; 528 529 for (;;) { 530 if (readtoken() == TWORD) { 531 n = (union node *)stalloc(sizeof (struct narg)); 532 n->type = NARG; 533 n->narg.text = wordtext; 534 n->narg.backquote = backquotelist; 535 *app = n; 536 app = &n->narg.next; 537 } else if (lasttoken == TREDIR) { 538 *rpp = n = redirnode; 539 rpp = &n->nfile.next; 540 parsefname(); /* read name of redirection file */ 541 } else if (lasttoken == TLP && app == &args->narg.next 542 && rpp == orig_rpp) { 543 /* We have a function */ 544 if (readtoken() != TRP) 545 synexpect(TRP); 546 #ifdef notdef 547 if (! goodname(n->narg.text)) 548 synerror("Bad function name"); 549 #endif 550 n->type = NDEFUN; 551 n->narg.next = command(); 552 return n; 553 } else { 554 tokpushback++; 555 break; 556 } 557 } 558 *app = NULL; 559 *rpp = NULL; 560 n = (union node *)stalloc(sizeof (struct ncmd)); 561 n->type = NCMD; 562 n->ncmd.backgnd = 0; 563 n->ncmd.args = args; 564 n->ncmd.redirect = redir; 565 return n; 566 } 567 568 STATIC union node * 569 makename() { 570 union node *n; 571 572 n = (union node *)stalloc(sizeof (struct narg)); 573 n->type = NARG; 574 n->narg.next = NULL; 575 n->narg.text = wordtext; 576 n->narg.backquote = backquotelist; 577 return n; 578 } 579 580 void fixredir(n, text, err) 581 union node *n; 582 const char *text; 583 int err; 584 { 585 TRACE(("Fix redir %s %d\n", text, err)); 586 if (!err) 587 n->ndup.vname = NULL; 588 589 if (is_digit(text[0]) && text[1] == '\0') 590 n->ndup.dupfd = digit_val(text[0]); 591 else if (text[0] == '-' && text[1] == '\0') 592 n->ndup.dupfd = -1; 593 else { 594 595 if (err) 596 synerror("Bad fd number"); 597 else 598 n->ndup.vname = makename(); 599 } 600 } 601 602 603 STATIC void 604 parsefname() { 605 union node *n = redirnode; 606 607 if (readtoken() != TWORD) 608 synexpect(-1); 609 if (n->type == NHERE) { 610 struct heredoc *here = heredoc; 611 struct heredoc *p; 612 int i; 613 614 if (quoteflag == 0) 615 n->type = NXHERE; 616 TRACE(("Here document %d\n", n->type)); 617 if (here->striptabs) { 618 while (*wordtext == '\t') 619 wordtext++; 620 } 621 if (! noexpand(wordtext) || (i = strlen(wordtext)) == 0 || i > EOFMARKLEN) 622 synerror("Illegal eof marker for << redirection"); 623 rmescapes(wordtext); 624 here->eofmark = wordtext; 625 here->next = NULL; 626 if (heredoclist == NULL) 627 heredoclist = here; 628 else { 629 for (p = heredoclist ; p->next ; p = p->next); 630 p->next = here; 631 } 632 } else if (n->type == NTOFD || n->type == NFROMFD) { 633 fixredir(n, wordtext, 0); 634 } else { 635 n->nfile.fname = makename(); 636 } 637 } 638 639 640 /* 641 * Input any here documents. 642 */ 643 644 STATIC void 645 parseheredoc() { 646 struct heredoc *here; 647 union node *n; 648 649 while (heredoclist) { 650 here = heredoclist; 651 heredoclist = here->next; 652 if (needprompt) { 653 setprompt(2); 654 needprompt = 0; 655 } 656 readtoken1(pgetc(), here->here->type == NHERE? SQSYNTAX : DQSYNTAX, 657 here->eofmark, here->striptabs); 658 n = (union node *)stalloc(sizeof (struct narg)); 659 n->narg.type = NARG; 660 n->narg.next = NULL; 661 n->narg.text = wordtext; 662 n->narg.backquote = backquotelist; 663 here->here->nhere.doc = n; 664 } 665 } 666 667 STATIC int 668 peektoken() { 669 int t; 670 671 t = readtoken(); 672 tokpushback++; 673 return (t); 674 } 675 676 STATIC int xxreadtoken(); 677 678 STATIC int 679 readtoken() { 680 int t; 681 int savecheckkwd = checkkwd; 682 struct alias *ap; 683 #ifdef DEBUG 684 int alreadyseen = tokpushback; 685 #endif 686 687 top: 688 t = xxreadtoken(); 689 690 if (checkkwd) { 691 /* 692 * eat newlines 693 */ 694 if (checkkwd == 2) { 695 checkkwd = 0; 696 while (t == TNL) { 697 parseheredoc(); 698 t = xxreadtoken(); 699 } 700 } else 701 checkkwd = 0; 702 /* 703 * check for keywords and aliases 704 */ 705 if (t == TWORD && !quoteflag) 706 { 707 register char * const *pp; 708 709 for (pp = (char **)parsekwd; *pp; pp++) { 710 if (**pp == *wordtext && equal(*pp, wordtext)) 711 { 712 lasttoken = t = pp - parsekwd + KWDOFFSET; 713 TRACE(("keyword %s recognized\n", tokname[t])); 714 goto out; 715 } 716 } 717 if ((ap = lookupalias(wordtext, 1)) != NULL) { 718 pushstring(ap->val, strlen(ap->val), ap); 719 checkkwd = savecheckkwd; 720 goto top; 721 } 722 } 723 out: 724 checkkwd = 0; 725 } 726 #ifdef DEBUG 727 if (!alreadyseen) 728 TRACE(("token %s %s\n", tokname[t], t == TWORD ? wordtext : "")); 729 else 730 TRACE(("reread token %s %s\n", tokname[t], t == TWORD ? wordtext : "")); 731 #endif 732 return (t); 733 } 734 735 736 /* 737 * Read the next input token. 738 * If the token is a word, we set backquotelist to the list of cmds in 739 * backquotes. We set quoteflag to true if any part of the word was 740 * quoted. 741 * If the token is TREDIR, then we set redirnode to a structure containing 742 * the redirection. 743 * In all cases, the variable startlinno is set to the number of the line 744 * on which the token starts. 745 * 746 * [Change comment: here documents and internal procedures] 747 * [Readtoken shouldn't have any arguments. Perhaps we should make the 748 * word parsing code into a separate routine. In this case, readtoken 749 * doesn't need to have any internal procedures, but parseword does. 750 * We could also make parseoperator in essence the main routine, and 751 * have parseword (readtoken1?) handle both words and redirection.] 752 */ 753 754 #define RETURN(token) return lasttoken = token 755 756 STATIC int 757 xxreadtoken() { 758 register c; 759 760 if (tokpushback) { 761 tokpushback = 0; 762 return lasttoken; 763 } 764 if (needprompt) { 765 setprompt(2); 766 needprompt = 0; 767 } 768 startlinno = plinno; 769 for (;;) { /* until token or start of word found */ 770 c = pgetc_macro(); 771 if (c == ' ' || c == '\t') 772 continue; /* quick check for white space first */ 773 switch (c) { 774 case ' ': case '\t': 775 continue; 776 case '#': 777 while ((c = pgetc()) != '\n' && c != PEOF); 778 pungetc(); 779 continue; 780 case '\\': 781 if (pgetc() == '\n') { 782 startlinno = ++plinno; 783 if (doprompt) 784 setprompt(2); 785 else 786 setprompt(0); 787 continue; 788 } 789 pungetc(); 790 goto breakloop; 791 case '\n': 792 plinno++; 793 needprompt = doprompt; 794 RETURN(TNL); 795 case PEOF: 796 RETURN(TEOF); 797 case '&': 798 if (pgetc() == '&') 799 RETURN(TAND); 800 pungetc(); 801 RETURN(TBACKGND); 802 case '|': 803 if (pgetc() == '|') 804 RETURN(TOR); 805 pungetc(); 806 RETURN(TPIPE); 807 case ';': 808 if (pgetc() == ';') 809 RETURN(TENDCASE); 810 pungetc(); 811 RETURN(TSEMI); 812 case '(': 813 RETURN(TLP); 814 case ')': 815 RETURN(TRP); 816 default: 817 goto breakloop; 818 } 819 } 820 breakloop: 821 return readtoken1(c, BASESYNTAX, (char *)NULL, 0); 822 #undef RETURN 823 } 824 825 826 827 /* 828 * If eofmark is NULL, read a word or a redirection symbol. If eofmark 829 * is not NULL, read a here document. In the latter case, eofmark is the 830 * word which marks the end of the document and striptabs is true if 831 * leading tabs should be stripped from the document. The argument firstc 832 * is the first character of the input token or document. 833 * 834 * Because C does not have internal subroutines, I have simulated them 835 * using goto's to implement the subroutine linkage. The following macros 836 * will run code that appears at the end of readtoken1. 837 */ 838 839 #define CHECKEND() {goto checkend; checkend_return:;} 840 #define PARSEREDIR() {goto parseredir; parseredir_return:;} 841 #define PARSESUB() {goto parsesub; parsesub_return:;} 842 #define PARSEBACKQOLD() {oldstyle = 1; goto parsebackq; parsebackq_oldreturn:;} 843 #define PARSEBACKQNEW() {oldstyle = 0; goto parsebackq; parsebackq_newreturn:;} 844 #define PARSEARITH() {goto parsearith; parsearith_return:;} 845 846 STATIC int 847 readtoken1(firstc, syntax, eofmark, striptabs) 848 int firstc; 849 char const *syntax; 850 char *eofmark; 851 int striptabs; 852 { 853 int c = firstc; 854 char *out; 855 int len; 856 char line[EOFMARKLEN + 1]; 857 struct nodelist *bqlist; 858 int quotef; 859 int dblquote; 860 int varnest; /* levels of variables expansion */ 861 int arinest; /* levels of arithmetic expansion */ 862 int parenlevel; /* levels of parens in arithmetic */ 863 int oldstyle; 864 char const *prevsyntax; /* syntax before arithmetic */ 865 #if __GNUC__ 866 /* Avoid longjmp clobbering */ 867 (void) &out; 868 (void) "ef; 869 (void) &dblquote; 870 (void) &varnest; 871 (void) &arinest; 872 (void) &parenlevel; 873 (void) &oldstyle; 874 (void) &prevsyntax; 875 (void) &syntax; 876 #endif 877 878 startlinno = plinno; 879 dblquote = 0; 880 if (syntax == DQSYNTAX) 881 dblquote = 1; 882 quotef = 0; 883 bqlist = NULL; 884 varnest = 0; 885 arinest = 0; 886 parenlevel = 0; 887 888 STARTSTACKSTR(out); 889 loop: { /* for each line, until end of word */ 890 #if ATTY 891 if (c == '\034' && doprompt 892 && attyset() && ! equal(termval(), "emacs")) { 893 attyline(); 894 if (syntax == BASESYNTAX) 895 return readtoken(); 896 c = pgetc(); 897 goto loop; 898 } 899 #endif 900 CHECKEND(); /* set c to PEOF if at end of here document */ 901 for (;;) { /* until end of line or end of word */ 902 CHECKSTRSPACE(3, out); /* permit 3 calls to USTPUTC */ 903 switch(syntax[c]) { 904 case CNL: /* '\n' */ 905 if (syntax == BASESYNTAX) 906 goto endword; /* exit outer loop */ 907 USTPUTC(c, out); 908 plinno++; 909 if (doprompt) 910 setprompt(2); 911 else 912 setprompt(0); 913 c = pgetc(); 914 goto loop; /* continue outer loop */ 915 case CWORD: 916 USTPUTC(c, out); 917 break; 918 case CCTL: 919 if (eofmark == NULL || dblquote) 920 USTPUTC(CTLESC, out); 921 USTPUTC(c, out); 922 break; 923 case CBACK: /* backslash */ 924 c = pgetc(); 925 if (c == PEOF) { 926 USTPUTC('\\', out); 927 pungetc(); 928 } else if (c == '\n') { 929 if (doprompt) 930 setprompt(2); 931 else 932 setprompt(0); 933 } else { 934 if (dblquote && c != '\\' && c != '`' && c != '$' 935 && (c != '"' || eofmark != NULL)) 936 USTPUTC('\\', out); 937 if (SQSYNTAX[c] == CCTL) 938 USTPUTC(CTLESC, out); 939 USTPUTC(c, out); 940 quotef++; 941 } 942 break; 943 case CSQUOTE: 944 syntax = SQSYNTAX; 945 break; 946 case CDQUOTE: 947 syntax = DQSYNTAX; 948 dblquote = 1; 949 break; 950 case CENDQUOTE: 951 if (eofmark) { 952 USTPUTC(c, out); 953 } else { 954 if (arinest) 955 syntax = ARISYNTAX; 956 else 957 syntax = BASESYNTAX; 958 quotef++; 959 dblquote = 0; 960 } 961 break; 962 case CVAR: /* '$' */ 963 PARSESUB(); /* parse substitution */ 964 break; 965 case CENDVAR: /* '}' */ 966 if (varnest > 0) { 967 varnest--; 968 USTPUTC(CTLENDVAR, out); 969 } else { 970 USTPUTC(c, out); 971 } 972 break; 973 case CLP: /* '(' in arithmetic */ 974 parenlevel++; 975 USTPUTC(c, out); 976 break; 977 case CRP: /* ')' in arithmetic */ 978 if (parenlevel > 0) { 979 USTPUTC(c, out); 980 --parenlevel; 981 } else { 982 if (pgetc() == ')') { 983 if (--arinest == 0) { 984 USTPUTC(CTLENDARI, out); 985 syntax = prevsyntax; 986 } else 987 USTPUTC(')', out); 988 } else { 989 /* 990 * unbalanced parens 991 * (don't 2nd guess - no error) 992 */ 993 pungetc(); 994 USTPUTC(')', out); 995 } 996 } 997 break; 998 case CBQUOTE: /* '`' */ 999 PARSEBACKQOLD(); 1000 break; 1001 case CEOF: 1002 goto endword; /* exit outer loop */ 1003 default: 1004 if (varnest == 0) 1005 goto endword; /* exit outer loop */ 1006 USTPUTC(c, out); 1007 } 1008 c = pgetc_macro(); 1009 } 1010 } 1011 endword: 1012 if (syntax == ARISYNTAX) 1013 synerror("Missing '))'"); 1014 if (syntax != BASESYNTAX && ! parsebackquote && eofmark == NULL) 1015 synerror("Unterminated quoted string"); 1016 if (varnest != 0) { 1017 startlinno = plinno; 1018 synerror("Missing '}'"); 1019 } 1020 USTPUTC('\0', out); 1021 len = out - stackblock(); 1022 out = stackblock(); 1023 if (eofmark == NULL) { 1024 if ((c == '>' || c == '<') 1025 && quotef == 0 1026 && len <= 2 1027 && (*out == '\0' || is_digit(*out))) { 1028 PARSEREDIR(); 1029 return lasttoken = TREDIR; 1030 } else { 1031 pungetc(); 1032 } 1033 } 1034 quoteflag = quotef; 1035 backquotelist = bqlist; 1036 grabstackblock(len); 1037 wordtext = out; 1038 return lasttoken = TWORD; 1039 /* end of readtoken routine */ 1040 1041 1042 1043 /* 1044 * Check to see whether we are at the end of the here document. When this 1045 * is called, c is set to the first character of the next input line. If 1046 * we are at the end of the here document, this routine sets the c to PEOF. 1047 */ 1048 1049 checkend: { 1050 if (eofmark) { 1051 if (striptabs) { 1052 while (c == '\t') 1053 c = pgetc(); 1054 } 1055 if (c == *eofmark) { 1056 if (pfgets(line, sizeof line) != NULL) { 1057 register char *p, *q; 1058 1059 p = line; 1060 for (q = eofmark + 1 ; *q && *p == *q ; p++, q++); 1061 if (*p == '\n' && *q == '\0') { 1062 c = PEOF; 1063 plinno++; 1064 needprompt = doprompt; 1065 } else { 1066 pushstring(line, strlen(line), NULL); 1067 } 1068 } 1069 } 1070 } 1071 goto checkend_return; 1072 } 1073 1074 1075 /* 1076 * Parse a redirection operator. The variable "out" points to a string 1077 * specifying the fd to be redirected. The variable "c" contains the 1078 * first character of the redirection operator. 1079 */ 1080 1081 parseredir: { 1082 char fd = *out; 1083 union node *np; 1084 1085 np = (union node *)stalloc(sizeof (struct nfile)); 1086 if (c == '>') { 1087 np->nfile.fd = 1; 1088 c = pgetc(); 1089 if (c == '>') 1090 np->type = NAPPEND; 1091 else if (c == '&') 1092 np->type = NTOFD; 1093 else { 1094 np->type = NTO; 1095 pungetc(); 1096 } 1097 } else { /* c == '<' */ 1098 np->nfile.fd = 0; 1099 c = pgetc(); 1100 if (c == '<') { 1101 if (sizeof (struct nfile) != sizeof (struct nhere)) { 1102 np = (union node *)stalloc(sizeof (struct nhere)); 1103 np->nfile.fd = 0; 1104 } 1105 np->type = NHERE; 1106 heredoc = (struct heredoc *)stalloc(sizeof (struct heredoc)); 1107 heredoc->here = np; 1108 if ((c = pgetc()) == '-') { 1109 heredoc->striptabs = 1; 1110 } else { 1111 heredoc->striptabs = 0; 1112 pungetc(); 1113 } 1114 } else if (c == '&') 1115 np->type = NFROMFD; 1116 else { 1117 np->type = NFROM; 1118 pungetc(); 1119 } 1120 } 1121 if (fd != '\0') 1122 np->nfile.fd = digit_val(fd); 1123 redirnode = np; 1124 goto parseredir_return; 1125 } 1126 1127 1128 /* 1129 * Parse a substitution. At this point, we have read the dollar sign 1130 * and nothing else. 1131 */ 1132 1133 parsesub: { 1134 int subtype; 1135 int typeloc; 1136 int flags; 1137 char *p; 1138 #ifndef GDB_HACK 1139 static const char types[] = "}-+?="; 1140 #endif 1141 1142 c = pgetc(); 1143 if (c != '(' && c != '{' && !is_name(c) && !is_special(c)) { 1144 USTPUTC('$', out); 1145 pungetc(); 1146 } else if (c == '(') { /* $(command) or $((arith)) */ 1147 if (pgetc() == '(') { 1148 PARSEARITH(); 1149 } else { 1150 pungetc(); 1151 PARSEBACKQNEW(); 1152 } 1153 } else { 1154 USTPUTC(CTLVAR, out); 1155 typeloc = out - stackblock(); 1156 USTPUTC(VSNORMAL, out); 1157 subtype = VSNORMAL; 1158 if (c == '{') { 1159 c = pgetc(); 1160 if (c == '#') { 1161 if ((c = pgetc()) == '}') 1162 c = '#'; 1163 else 1164 subtype = VSLENGTH; 1165 } 1166 else 1167 subtype = 0; 1168 } 1169 if (is_name(c)) { 1170 do { 1171 STPUTC(c, out); 1172 c = pgetc(); 1173 } while (is_in_name(c)); 1174 } else { 1175 if (! is_special(c)) 1176 badsub: synerror("Bad substitution"); 1177 USTPUTC(c, out); 1178 c = pgetc(); 1179 } 1180 STPUTC('=', out); 1181 flags = 0; 1182 if (subtype == 0) { 1183 switch (c) { 1184 case ':': 1185 flags = VSNUL; 1186 c = pgetc(); 1187 /*FALLTHROUGH*/ 1188 default: 1189 p = strchr(types, c); 1190 if (p == NULL) 1191 goto badsub; 1192 subtype = p - types + VSNORMAL; 1193 break; 1194 case '%': 1195 case '#': 1196 { 1197 int cc = c; 1198 subtype = c == '#' ? VSTRIMLEFT : 1199 VSTRIMRIGHT; 1200 c = pgetc(); 1201 if (c == cc) 1202 subtype++; 1203 else 1204 pungetc(); 1205 break; 1206 } 1207 } 1208 } else { 1209 pungetc(); 1210 } 1211 if (dblquote || arinest) 1212 flags |= VSQUOTE; 1213 *(stackblock() + typeloc) = subtype | flags; 1214 if (subtype != VSNORMAL) 1215 varnest++; 1216 } 1217 goto parsesub_return; 1218 } 1219 1220 1221 /* 1222 * Called to parse command substitutions. Newstyle is set if the command 1223 * is enclosed inside $(...); nlpp is a pointer to the head of the linked 1224 * list of commands (passed by reference), and savelen is the number of 1225 * characters on the top of the stack which must be preserved. 1226 */ 1227 1228 parsebackq: { 1229 struct nodelist **nlpp; 1230 int savepbq; 1231 union node *n; 1232 char *volatile str; 1233 struct jmploc jmploc; 1234 struct jmploc *volatile savehandler; 1235 int savelen; 1236 int saveprompt; 1237 1238 savepbq = parsebackquote; 1239 if (setjmp(jmploc.loc)) { 1240 if (str) 1241 ckfree(str); 1242 parsebackquote = 0; 1243 handler = savehandler; 1244 longjmp(handler->loc, 1); 1245 } 1246 INTOFF; 1247 str = NULL; 1248 savelen = out - stackblock(); 1249 if (savelen > 0) { 1250 str = ckmalloc(savelen); 1251 memcpy(str, stackblock(), savelen); 1252 } 1253 savehandler = handler; 1254 handler = &jmploc; 1255 INTON; 1256 if (oldstyle) { 1257 /* We must read until the closing backquote, giving special 1258 treatment to some slashes, and then push the string and 1259 reread it as input, interpreting it normally. */ 1260 register char *out; 1261 register c; 1262 int savelen; 1263 char *str; 1264 1265 1266 STARTSTACKSTR(out); 1267 for (;;) { 1268 if (needprompt) { 1269 setprompt(2); 1270 needprompt = 0; 1271 } 1272 switch (c = pgetc()) { 1273 case '`': 1274 goto done; 1275 1276 case '\\': 1277 if ((c = pgetc()) == '\n') { 1278 plinno++; 1279 if (doprompt) 1280 setprompt(2); 1281 else 1282 setprompt(0); 1283 /* 1284 * If eating a newline, avoid putting 1285 * the newline into the new character 1286 * stream (via the STPUTC after the 1287 * switch). 1288 */ 1289 continue; 1290 } 1291 if (c != '\\' && c != '`' && c != '$' 1292 && (!dblquote || c != '"')) 1293 STPUTC('\\', out); 1294 break; 1295 1296 case '\n': 1297 plinno++; 1298 needprompt = doprompt; 1299 break; 1300 1301 default: 1302 break; 1303 } 1304 STPUTC(c, out); 1305 } 1306 done: 1307 STPUTC('\0', out); 1308 savelen = out - stackblock(); 1309 if (savelen > 0) { 1310 str = ckmalloc(savelen); 1311 memcpy(str, stackblock(), savelen); 1312 setinputstring(str, 1); 1313 } 1314 } 1315 nlpp = &bqlist; 1316 while (*nlpp) 1317 nlpp = &(*nlpp)->next; 1318 *nlpp = (struct nodelist *)stalloc(sizeof (struct nodelist)); 1319 (*nlpp)->next = NULL; 1320 parsebackquote = oldstyle; 1321 1322 if (oldstyle) { 1323 saveprompt = doprompt; 1324 doprompt = 0; 1325 } 1326 1327 n = list(0); 1328 1329 if (oldstyle) 1330 doprompt = saveprompt; 1331 else { 1332 if (readtoken() != TRP) 1333 synexpect(TRP); 1334 } 1335 1336 (*nlpp)->n = n; 1337 if (oldstyle) { 1338 /* 1339 * Start reading from old file again, ignoring any pushed back 1340 * tokens left from the backquote parsing 1341 */ 1342 popfile(); 1343 tokpushback = 0; 1344 } 1345 while (stackblocksize() <= savelen) 1346 growstackblock(); 1347 STARTSTACKSTR(out); 1348 if (str) { 1349 memcpy(out, str, savelen); 1350 STADJUST(savelen, out); 1351 INTOFF; 1352 ckfree(str); 1353 str = NULL; 1354 INTON; 1355 } 1356 parsebackquote = savepbq; 1357 handler = savehandler; 1358 if (arinest || dblquote) 1359 USTPUTC(CTLBACKQ | CTLQUOTE, out); 1360 else 1361 USTPUTC(CTLBACKQ, out); 1362 if (oldstyle) 1363 goto parsebackq_oldreturn; 1364 else 1365 goto parsebackq_newreturn; 1366 } 1367 1368 /* 1369 * Parse an arithmetic expansion (indicate start of one and set state) 1370 */ 1371 parsearith: { 1372 1373 if (++arinest == 1) { 1374 prevsyntax = syntax; 1375 syntax = ARISYNTAX; 1376 USTPUTC(CTLARI, out); 1377 } else { 1378 /* 1379 * we collapse embedded arithmetic expansion to 1380 * parenthesis, which should be equivalent 1381 */ 1382 USTPUTC('(', out); 1383 } 1384 goto parsearith_return; 1385 } 1386 1387 } /* end of readtoken */ 1388 1389 1390 1391 #ifdef mkinit 1392 RESET { 1393 tokpushback = 0; 1394 checkkwd = 0; 1395 } 1396 #endif 1397 1398 /* 1399 * Returns true if the text contains nothing to expand (no dollar signs 1400 * or backquotes). 1401 */ 1402 1403 STATIC int 1404 noexpand(text) 1405 char *text; 1406 { 1407 register char *p; 1408 register char c; 1409 1410 p = text; 1411 while ((c = *p++) != '\0') { 1412 if (c == CTLESC) 1413 p++; 1414 else if (BASESYNTAX[c] == CCTL) 1415 return 0; 1416 } 1417 return 1; 1418 } 1419 1420 1421 /* 1422 * Return true if the argument is a legal variable name (a letter or 1423 * underscore followed by zero or more letters, underscores, and digits). 1424 */ 1425 1426 int 1427 goodname(name) 1428 char *name; 1429 { 1430 register char *p; 1431 1432 p = name; 1433 if (! is_name(*p)) 1434 return 0; 1435 while (*++p) { 1436 if (! is_in_name(*p)) 1437 return 0; 1438 } 1439 return 1; 1440 } 1441 1442 1443 /* 1444 * Called when an unexpected token is read during the parse. The argument 1445 * is the token that is expected, or -1 if more than one type of token can 1446 * occur at this point. 1447 */ 1448 1449 STATIC void 1450 synexpect(token) 1451 int token; 1452 { 1453 char msg[64]; 1454 1455 if (token >= 0) { 1456 fmtstr(msg, 64, "%s unexpected (expecting %s)", 1457 tokname[lasttoken], tokname[token]); 1458 } else { 1459 fmtstr(msg, 64, "%s unexpected", tokname[lasttoken]); 1460 } 1461 synerror(msg); 1462 } 1463 1464 1465 STATIC void 1466 synerror(msg) 1467 char *msg; 1468 { 1469 if (commandname) 1470 outfmt(&errout, "%s: %d: ", commandname, startlinno); 1471 outfmt(&errout, "Syntax error: %s\n", msg); 1472 error((char *)NULL); 1473 } 1474 1475 STATIC void 1476 setprompt(which) 1477 int which; 1478 { 1479 whichprompt = which; 1480 1481 #ifndef NO_HISTORY 1482 if (!el) 1483 #endif 1484 out2str(getprompt(NULL)); 1485 } 1486 1487 /* 1488 * called by editline -- any expansions to the prompt 1489 * should be added here. 1490 */ 1491 char * 1492 getprompt(unused) 1493 void *unused; 1494 { 1495 switch (whichprompt) { 1496 case 0: 1497 return ""; 1498 case 1: 1499 return ps1val(); 1500 case 2: 1501 return ps2val(); 1502 default: 1503 return "<internal prompt error>"; 1504 } 1505 } 1506