xref: /netbsd-src/bin/sh/parser.c (revision 0b9f50897e9a9c6709320fafb4c3787fddcc0a45)
1 /*-
2  * Copyright (c) 1991 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Kenneth Almquist.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 #ifndef lint
38 /*static char sccsid[] = "from: @(#)parser.c	5.3 (Berkeley) 4/12/91";*/
39 static char rcsid[] = "$Id: parser.c,v 1.10 1993/09/09 01:21:43 cgd Exp $";
40 #endif /* not lint */
41 
42 #include "shell.h"
43 #include "parser.h"
44 #include "nodes.h"
45 #include "expand.h"	/* defines rmescapes() */
46 #include "redir.h"	/* defines copyfd() */
47 #include "syntax.h"
48 #include "options.h"
49 #include "input.h"
50 #include "output.h"
51 #include "var.h"
52 #include "error.h"
53 #include "memalloc.h"
54 #include "mystring.h"
55 
56 
57 /*
58  * Shell command parser.
59  */
60 
61 #define EOFMARKLEN 79
62 
63 /* values returned by readtoken */
64 #include "token.def"
65 
66 
67 
68 struct heredoc {
69 	struct heredoc *next;	/* next here document in list */
70 	union node *here;		/* redirection node */
71 	char *eofmark;		/* string indicating end of input */
72 	int striptabs;		/* if set, strip leading tabs */
73 };
74 
75 
76 
77 struct heredoc *heredoclist;	/* list of here documents to read */
78 int parsebackquote;		/* nonzero if we are inside backquotes */
79 int doprompt;			/* if set, prompt the user */
80 int needprompt;			/* true if interactive and at start of line */
81 int lasttoken;			/* last token read */
82 MKINIT int tokpushback;		/* last token pushed back */
83 char *wordtext;			/* text of last word returned by readtoken */
84 int checkkwd;               /* 1 == check for kwds, 2 == also eat newlines */
85 struct nodelist *backquotelist;
86 union node *redirnode;
87 struct heredoc *heredoc;
88 int quoteflag;			/* set if (part of) last token was quoted */
89 int startlinno;			/* line # where last token started */
90 
91 
92 #define GDB_HACK 1 /* avoid local declarations which gdb can't handle */
93 #ifdef GDB_HACK
94 static const char argvars[5] = {CTLVAR, VSNORMAL|VSQUOTE, '@', '=', '\0'};
95 static const char types[] = "}-+?=";
96 #endif
97 
98 
99 STATIC union node *list __P((int));
100 STATIC union node *andor __P((void));
101 STATIC union node *pipeline __P((void));
102 STATIC union node *command __P((void));
103 STATIC union node *simplecmd __P((union node **, union node *));
104 STATIC void parsefname __P((void));
105 STATIC void parseheredoc __P((void));
106 STATIC int readtoken __P((void));
107 STATIC int readtoken1 __P((int, char const *, char *, int));
108 STATIC void attyline __P((void));
109 STATIC int noexpand __P((char *));
110 STATIC void synexpect __P((int));
111 STATIC void synerror __P((char *));
112 
113 #if ATTY
114 STATIC void putprompt __P((char *));
115 #else /* not ATTY */
116 #define putprompt(s)	out2str(s)
117 #endif
118 
119 
120 
121 
122 /*
123  * Read and parse a command.  Returns NEOF on end of file.  (NULL is a
124  * valid parse tree indicating a blank line.)
125  */
126 
127 union node *
128 parsecmd(interact) {
129 	int t;
130 
131 	doprompt = interact;
132 	if (doprompt)
133 		putprompt(ps1val());
134 	needprompt = 0;
135 	if ((t = readtoken()) == TEOF)
136 		return NEOF;
137 	if (t == TNL)
138 		return NULL;
139 	tokpushback++;
140 	return list(1);
141 }
142 
143 
144 STATIC union node *
145 list(nlflag) {
146 	union node *n1, *n2, *n3;
147 
148 	checkkwd = 2;
149 	if (nlflag == 0 && tokendlist[peektoken()])
150 		return NULL;
151 	n1 = andor();
152 	for (;;) {
153 		switch (readtoken()) {
154 		case TBACKGND:
155 			if (n1->type == NCMD || n1->type == NPIPE) {
156 				n1->ncmd.backgnd = 1;
157 			} else if (n1->type == NREDIR) {
158 				n1->type = NBACKGND;
159 			} else {
160 				n3 = (union node *)stalloc(sizeof (struct nredir));
161 				n3->type = NBACKGND;
162 				n3->nredir.n = n1;
163 				n3->nredir.redirect = NULL;
164 				n1 = n3;
165 			}
166 			goto tsemi;
167 		case TNL:
168 			tokpushback++;
169 			/* fall through */
170 tsemi:	    case TSEMI:
171 			if (readtoken() == TNL) {
172 				parseheredoc();
173 				if (nlflag)
174 					return n1;
175 			} else {
176 				tokpushback++;
177 			}
178 			checkkwd = 2;
179 			if (tokendlist[peektoken()])
180 				return n1;
181 			n2 = andor();
182 			n3 = (union node *)stalloc(sizeof (struct nbinary));
183 			n3->type = NSEMI;
184 			n3->nbinary.ch1 = n1;
185 			n3->nbinary.ch2 = n2;
186 			n1 = n3;
187 			break;
188 		case TEOF:
189 			if (heredoclist)
190 				parseheredoc();
191 			else
192 				pungetc();		/* push back EOF on input */
193 			return n1;
194 		default:
195 			if (nlflag)
196 				synexpect(-1);
197 			tokpushback++;
198 			return n1;
199 		}
200 	}
201 }
202 
203 
204 
205 STATIC union node *
206 andor() {
207 	union node *n1, *n2, *n3;
208 	int t;
209 
210 	n1 = pipeline();
211 	for (;;) {
212 		if ((t = readtoken()) == TAND) {
213 			t = NAND;
214 		} else if (t == TOR) {
215 			t = NOR;
216 		} else {
217 			tokpushback++;
218 			return n1;
219 		}
220 		n2 = pipeline();
221 		n3 = (union node *)stalloc(sizeof (struct nbinary));
222 		n3->type = t;
223 		n3->nbinary.ch1 = n1;
224 		n3->nbinary.ch2 = n2;
225 		n1 = n3;
226 	}
227 }
228 
229 
230 
231 STATIC union node *
232 pipeline() {
233 	union node *n1, *pipenode;
234 	struct nodelist *lp, *prev;
235 
236 	n1 = command();
237 	if (readtoken() == TPIPE) {
238 		pipenode = (union node *)stalloc(sizeof (struct npipe));
239 		pipenode->type = NPIPE;
240 		pipenode->npipe.backgnd = 0;
241 		lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
242 		pipenode->npipe.cmdlist = lp;
243 		lp->n = n1;
244 		do {
245 			prev = lp;
246 			lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
247 			lp->n = command();
248 			prev->next = lp;
249 		} while (readtoken() == TPIPE);
250 		lp->next = NULL;
251 		n1 = pipenode;
252 	}
253 	tokpushback++;
254 	return n1;
255 }
256 
257 
258 
259 STATIC union node *
260 command() {
261 	union node *n1, *n2;
262 	union node *ap, **app;
263 	union node *cp, **cpp;
264 	union node *redir, **rpp;
265 	int t;
266 
267 	checkkwd = 2;
268 	redir = 0;
269 	rpp = &redir;
270 	/* Check for redirection which may precede command */
271 	while (readtoken() == TREDIR) {
272 		*rpp = n2 = redirnode;
273 		rpp = &n2->nfile.next;
274 		parsefname();
275 	}
276 	tokpushback++;
277 
278 	switch (readtoken()) {
279 	case TIF:
280 		n1 = (union node *)stalloc(sizeof (struct nif));
281 		n1->type = NIF;
282 		n1->nif.test = list(0);
283 		if (readtoken() != TTHEN)
284 			synexpect(TTHEN);
285 		n1->nif.ifpart = list(0);
286 		n2 = n1;
287 		while (readtoken() == TELIF) {
288 			n2->nif.elsepart = (union node *)stalloc(sizeof (struct nif));
289 			n2 = n2->nif.elsepart;
290 			n2->type = NIF;
291 			n2->nif.test = list(0);
292 			if (readtoken() != TTHEN)
293 				synexpect(TTHEN);
294 			n2->nif.ifpart = list(0);
295 		}
296 		if (lasttoken == TELSE)
297 			n2->nif.elsepart = list(0);
298 		else {
299 			n2->nif.elsepart = NULL;
300 			tokpushback++;
301 		}
302 		if (readtoken() != TFI)
303 			synexpect(TFI);
304 		checkkwd = 1;
305 		break;
306 	case TWHILE:
307 	case TUNTIL: {
308 		int got;
309 		n1 = (union node *)stalloc(sizeof (struct nbinary));
310 		n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL;
311 		n1->nbinary.ch1 = list(0);
312 		if ((got=readtoken()) != TDO) {
313 TRACE(("expecting DO got %s %s\n", tokname[got], got == TWORD ? wordtext : ""));
314 			synexpect(TDO);
315 		}
316 		n1->nbinary.ch2 = list(0);
317 		if (readtoken() != TDONE)
318 			synexpect(TDONE);
319 		checkkwd = 1;
320 		break;
321 	}
322 	case TFOR:
323 		if (readtoken() != TWORD || quoteflag || ! goodname(wordtext))
324 			synerror("Bad for loop variable");
325 		n1 = (union node *)stalloc(sizeof (struct nfor));
326 		n1->type = NFOR;
327 		n1->nfor.var = wordtext;
328 		if (readtoken() == TWORD && ! quoteflag && equal(wordtext, "in")) {
329 			app = ≈
330 			while (readtoken() == TWORD) {
331 				n2 = (union node *)stalloc(sizeof (struct narg));
332 				n2->type = NARG;
333 				n2->narg.text = wordtext;
334 				n2->narg.backquote = backquotelist;
335 				*app = n2;
336 				app = &n2->narg.next;
337 			}
338 			*app = NULL;
339 			n1->nfor.args = ap;
340 			/* A newline or semicolon is required here to end
341 			   the list.  */
342 			if (lasttoken != TNL && lasttoken != TSEMI)
343 				synexpect(-1);
344 		} else {
345 #ifndef GDB_HACK
346 			static const char argvars[5] = {CTLVAR, VSNORMAL|VSQUOTE,
347 								   '@', '=', '\0'};
348 #endif
349 			n2 = (union node *)stalloc(sizeof (struct narg));
350 			n2->type = NARG;
351 			n2->narg.text = (char *)argvars;
352 			n2->narg.backquote = NULL;
353 			n2->narg.next = NULL;
354 			n1->nfor.args = n2;
355 			/* Many shells accept an optional semicolon here, but
356 			   POSIX says we should not, so we don't.  An optional
357 			   newline is OK here, but that is handled by the
358 			   checkkwd = 2 assignment below.  */
359 			tokpushback++;
360 		}
361 		checkkwd = 2;
362 		if ((t = readtoken()) == TDO)
363 			t = TDONE;
364 		else if (t == TBEGIN)
365 			t = TEND;
366 		else
367 			synexpect(-1);
368 		n1->nfor.body = list(0);
369 		if (readtoken() != t)
370 			synexpect(t);
371 		checkkwd = 1;
372 		break;
373 	case TCASE:
374 		n1 = (union node *)stalloc(sizeof (struct ncase));
375 		n1->type = NCASE;
376 		if (readtoken() != TWORD)
377 			synexpect(TWORD);
378 		n1->ncase.expr = n2 = (union node *)stalloc(sizeof (struct narg));
379 		n2->type = NARG;
380 		n2->narg.text = wordtext;
381 		n2->narg.backquote = backquotelist;
382 		n2->narg.next = NULL;
383 		while (readtoken() == TNL);
384 		if (lasttoken != TWORD || ! equal(wordtext, "in"))
385 			synerror("expecting \"in\"");
386 		cpp = &n1->ncase.cases;
387 		while (checkkwd = 2, readtoken() == TWORD) {
388 			*cpp = cp = (union node *)stalloc(sizeof (struct nclist));
389 			cp->type = NCLIST;
390 			app = &cp->nclist.pattern;
391 			for (;;) {
392 				*app = ap = (union node *)stalloc(sizeof (struct narg));
393 				ap->type = NARG;
394 				ap->narg.text = wordtext;
395 				ap->narg.backquote = backquotelist;
396 				if (readtoken() != TPIPE)
397 					break;
398 				app = &ap->narg.next;
399 				if (readtoken() != TWORD)
400 					synexpect(TWORD);
401 			}
402 			ap->narg.next = NULL;
403 			if (lasttoken != TRP)
404 				synexpect(TRP);
405 			cp->nclist.body = list(0);
406 			if ((t = readtoken()) == TESAC)
407 				tokpushback++;
408 			else if (t != TENDCASE)
409 				synexpect(TENDCASE);
410 			cpp = &cp->nclist.next;
411 		}
412 		*cpp = NULL;
413 		if (lasttoken != TESAC)
414 			synexpect(TESAC);
415 		checkkwd = 1;
416 		break;
417 	case TLP:
418 		n1 = (union node *)stalloc(sizeof (struct nredir));
419 		n1->type = NSUBSHELL;
420 		n1->nredir.n = list(0);
421 		n1->nredir.redirect = NULL;
422 		if (readtoken() != TRP)
423 			synexpect(TRP);
424 		checkkwd = 1;
425 		break;
426 	case TBEGIN:
427 		n1 = list(0);
428 		if (readtoken() != TEND)
429 			synexpect(TEND);
430 		checkkwd = 1;
431 		break;
432 	/* Handle an empty command like other simple commands.  */
433 	case TSEMI:
434 	case TNL:
435 	/* Handle EOF like other simple commands, too.  */
436 	case TEOF:
437 	case TWORD:
438 		tokpushback++;
439 		return simplecmd(rpp, redir);
440 	default:
441 		synexpect(-1);
442 	}
443 
444 	/* Now check for redirection which may follow command */
445 	while (readtoken() == TREDIR) {
446 		*rpp = n2 = redirnode;
447 		rpp = &n2->nfile.next;
448 		parsefname();
449 	}
450 	tokpushback++;
451 	*rpp = NULL;
452 	if (redir) {
453 		if (n1->type != NSUBSHELL) {
454 			n2 = (union node *)stalloc(sizeof (struct nredir));
455 			n2->type = NREDIR;
456 			n2->nredir.n = n1;
457 			n1 = n2;
458 		}
459 		n1->nredir.redirect = redir;
460 	}
461 	return n1;
462 }
463 
464 
465 STATIC union node *
466 simplecmd(rpp, redir)
467 	union node **rpp, *redir;
468 	{
469 	union node *args, **app;
470 	union node **orig_rpp = rpp;
471 	union node *n;
472 
473 	/* If we don't have any redirections already, then we must reset
474 	   rpp to be the address of the local redir variable.  */
475 	if (redir == 0)
476 		rpp = &redir;
477 
478 	args = NULL;
479 	app = &args;
480 	/* We save the incoming value, because we need this for shell
481 	   functions.  There can not be a redirect or an argument between
482 	   the function name and the open parenthesis.  */
483 	orig_rpp = rpp;
484 	for (;;) {
485 		if (readtoken() == TWORD) {
486 			n = (union node *)stalloc(sizeof (struct narg));
487 			n->type = NARG;
488 			n->narg.text = wordtext;
489 			n->narg.backquote = backquotelist;
490 			*app = n;
491 			app = &n->narg.next;
492 		} else if (lasttoken == TREDIR) {
493 			*rpp = n = redirnode;
494 			rpp = &n->nfile.next;
495 			parsefname();	/* read name of redirection file */
496 		} else if (lasttoken == TLP && app == &args->narg.next
497 					    && rpp == orig_rpp) {
498 			/* We have a function */
499 			if (readtoken() != TRP)
500 				synexpect(TRP);
501 #ifdef notdef
502 			if (! goodname(n->narg.text))
503 				synerror("Bad function name");
504 #endif
505 			n->type = NDEFUN;
506 			n->narg.next = command();
507 			return n;
508 		} else {
509 			tokpushback++;
510 			break;
511 		}
512 	}
513 	*app = NULL;
514 	*rpp = NULL;
515 	n = (union node *)stalloc(sizeof (struct ncmd));
516 	n->type = NCMD;
517 	n->ncmd.backgnd = 0;
518 	n->ncmd.args = args;
519 	n->ncmd.redirect = redir;
520 	return n;
521 }
522 
523 
524 STATIC void
525 parsefname() {
526 	union node *n = redirnode;
527 
528 	if (readtoken() != TWORD)
529 		synexpect(-1);
530 	if (n->type == NHERE) {
531 		struct heredoc *here = heredoc;
532 		struct heredoc *p;
533 		int i;
534 
535 		if (quoteflag == 0)
536 			n->type = NXHERE;
537 		TRACE(("Here document %d\n", n->type));
538 		if (here->striptabs) {
539 			while (*wordtext == '\t')
540 				wordtext++;
541 		}
542 		if (! noexpand(wordtext) || (i = strlen(wordtext)) == 0 || i > EOFMARKLEN)
543 			synerror("Illegal eof marker for << redirection");
544 		rmescapes(wordtext);
545 		here->eofmark = wordtext;
546 		here->next = NULL;
547 		if (heredoclist == NULL)
548 			heredoclist = here;
549 		else {
550 			for (p = heredoclist ; p->next ; p = p->next);
551 			p->next = here;
552 		}
553 	} else if (n->type == NTOFD || n->type == NFROMFD) {
554 		if (is_digit(wordtext[0]))
555 			n->ndup.dupfd = digit_val(wordtext[0]);
556 		else if (wordtext[0] == '-')
557 			n->ndup.dupfd = -1;
558 		else
559 			goto bad;
560 		if (wordtext[1] != '\0') {
561 bad:
562 			synerror("Bad fd number");
563 		}
564 	} else {
565 		n->nfile.fname = (union node *)stalloc(sizeof (struct narg));
566 		n = n->nfile.fname;
567 		n->type = NARG;
568 		n->narg.next = NULL;
569 		n->narg.text = wordtext;
570 		n->narg.backquote = backquotelist;
571 	}
572 }
573 
574 
575 /*
576  * Input any here documents.
577  */
578 
579 STATIC void
580 parseheredoc() {
581 	struct heredoc *here;
582 	union node *n;
583 
584 	while (heredoclist) {
585 		here = heredoclist;
586 		heredoclist = here->next;
587 		if (needprompt) {
588 			putprompt(ps2val());
589 			needprompt = 0;
590 		}
591 		readtoken1(pgetc(), here->here->type == NHERE? SQSYNTAX : DQSYNTAX,
592 				here->eofmark, here->striptabs);
593 		n = (union node *)stalloc(sizeof (struct narg));
594 		n->narg.type = NARG;
595 		n->narg.next = NULL;
596 		n->narg.text = wordtext;
597 		n->narg.backquote = backquotelist;
598 		here->here->nhere.doc = n;
599 	}
600 }
601 
602 STATIC int
603 peektoken() {
604 	int t;
605 
606 	t = readtoken();
607 	tokpushback++;
608 	return (t);
609 }
610 
611 STATIC int xxreadtoken();
612 
613 STATIC int
614 readtoken() {
615 	int t;
616 #ifdef DEBUG
617 	int alreadyseen = tokpushback;
618 #endif
619 
620 	t = xxreadtoken();
621 
622 	if (checkkwd) {
623 		/*
624 		 * eat newlines
625 		 */
626 		if (checkkwd == 2) {
627 			checkkwd = 0;
628 			while (t == TNL) {
629 				parseheredoc();
630 				t = xxreadtoken();
631 			}
632 		} else
633 			checkkwd = 0;
634 		/*
635 		 * check for keywords
636 		 */
637 		if (t == TWORD && !quoteflag) {
638 			register char *const *pp;
639 
640 			for (pp = parsekwd; *pp; pp++) {
641 				if (**pp == *wordtext && equal(*pp, wordtext)) {
642 					lasttoken = t = pp - parsekwd + KWDOFFSET;
643 					TRACE(("keyword %s recognized\n", tokname[t]));
644 					break;
645 				}
646 			}
647 		}
648 	}
649 #ifdef DEBUG
650 	if (!alreadyseen)
651 	    TRACE(("token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
652 	else
653 	    TRACE(("reread token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
654 #endif
655 	return (t);
656 }
657 
658 
659 /*
660  * Read the next input token.
661  * If the token is a word, we set backquotelist to the list of cmds in
662  *	backquotes.  We set quoteflag to true if any part of the word was
663  *	quoted.
664  * If the token is TREDIR, then we set redirnode to a structure containing
665  *	the redirection.
666  * In all cases, the variable startlinno is set to the number of the line
667  *	on which the token starts.
668  *
669  * [Change comment:  here documents and internal procedures]
670  * [Readtoken shouldn't have any arguments.  Perhaps we should make the
671  *  word parsing code into a separate routine.  In this case, readtoken
672  *  doesn't need to have any internal procedures, but parseword does.
673  *  We could also make parseoperator in essence the main routine, and
674  *  have parseword (readtoken1?) handle both words and redirection.]
675  */
676 
677 #define RETURN(token)	return lasttoken = token
678 
679 STATIC int
680 xxreadtoken() {
681 	register c;
682 
683 	if (tokpushback) {
684 		tokpushback = 0;
685 		return lasttoken;
686 	}
687 	if (needprompt) {
688 		putprompt(ps2val());
689 		needprompt = 0;
690 	}
691 	startlinno = plinno;
692 	for (;;) {	/* until token or start of word found */
693 		c = pgetc_macro();
694 		if (c == ' ' || c == '\t')
695 			continue;		/* quick check for white space first */
696 		switch (c) {
697 		case ' ': case '\t':
698 			continue;
699 		case '#':
700 			while ((c = pgetc()) != '\n' && c != PEOF);
701 			pungetc();
702 			continue;
703 		case '\\':
704 			if (pgetc() == '\n') {
705 				startlinno = ++plinno;
706 				if (doprompt)
707 					putprompt(ps2val());
708 				continue;
709 			}
710 			pungetc();
711 			goto breakloop;
712 		case '\n':
713 			plinno++;
714 			needprompt = doprompt;
715 			RETURN(TNL);
716 		case PEOF:
717 			RETURN(TEOF);
718 		case '&':
719 			if (pgetc() == '&')
720 				RETURN(TAND);
721 			pungetc();
722 			RETURN(TBACKGND);
723 		case '|':
724 			if (pgetc() == '|')
725 				RETURN(TOR);
726 			pungetc();
727 			RETURN(TPIPE);
728 		case ';':
729 			if (pgetc() == ';')
730 				RETURN(TENDCASE);
731 			pungetc();
732 			RETURN(TSEMI);
733 		case '(':
734 			RETURN(TLP);
735 		case ')':
736 			RETURN(TRP);
737 		default:
738 			goto breakloop;
739 		}
740 	}
741 breakloop:
742 	return readtoken1(c, BASESYNTAX, (char *)NULL, 0);
743 #undef RETURN
744 }
745 
746 
747 
748 /*
749  * If eofmark is NULL, read a word or a redirection symbol.  If eofmark
750  * is not NULL, read a here document.  In the latter case, eofmark is the
751  * word which marks the end of the document and striptabs is true if
752  * leading tabs should be stripped from the document.  The argument firstc
753  * is the first character of the input token or document.
754  *
755  * Because C does not have internal subroutines, I have simulated them
756  * using goto's to implement the subroutine linkage.  The following macros
757  * will run code that appears at the end of readtoken1.
758  */
759 
760 #define CHECKEND()	{goto checkend; checkend_return:;}
761 #define PARSEREDIR()	{goto parseredir; parseredir_return:;}
762 #define PARSESUB()	{goto parsesub; parsesub_return:;}
763 #define PARSEBACKQOLD()	{oldstyle = 1; goto parsebackq; parsebackq_oldreturn:;}
764 #define PARSEBACKQNEW()	{oldstyle = 0; goto parsebackq; parsebackq_newreturn:;}
765 
766 STATIC int
767 readtoken1(firstc, syntax, eofmark, striptabs)
768 	int firstc;
769 	char const *syntax;
770 	char *eofmark;
771 	int striptabs;
772 	{
773 	register c = firstc;
774 	register char *out;
775 	int len;
776 	char line[EOFMARKLEN + 1];
777 	struct nodelist *bqlist;
778 	int quotef;
779 	int dblquote;
780 	int varnest;
781 	int oldstyle;
782 
783 	startlinno = plinno;
784 	dblquote = 0;
785 	if (syntax == DQSYNTAX)
786 		dblquote = 1;
787 	quotef = 0;
788 	bqlist = NULL;
789 	varnest = 0;
790 	STARTSTACKSTR(out);
791 	loop: {	/* for each line, until end of word */
792 #if ATTY
793 		if (c == '\034' && doprompt
794 		 && attyset() && ! equal(termval(), "emacs")) {
795 			attyline();
796 			if (syntax == BASESYNTAX)
797 				return readtoken();
798 			c = pgetc();
799 			goto loop;
800 		}
801 #endif
802 		CHECKEND();	/* set c to PEOF if at end of here document */
803 		for (;;) {	/* until end of line or end of word */
804 			CHECKSTRSPACE(3, out);	/* permit 3 calls to USTPUTC */
805 			switch(syntax[c]) {
806 			case CNL:	/* '\n' */
807 				if (syntax == BASESYNTAX)
808 					goto endword;	/* exit outer loop */
809 				USTPUTC(c, out);
810 				plinno++;
811 				if (doprompt) {
812 					putprompt(ps2val());
813 				}
814 				c = pgetc();
815 				goto loop;		/* continue outer loop */
816 			case CWORD:
817 				USTPUTC(c, out);
818 				break;
819 			case CCTL:
820 				if (eofmark == NULL || dblquote)
821 					USTPUTC(CTLESC, out);
822 				USTPUTC(c, out);
823 				break;
824 			case CBACK:	/* backslash */
825 				c = pgetc();
826 				if (c == PEOF) {
827 					USTPUTC('\\', out);
828 					pungetc();
829 				} else if (c == '\n') {
830 					if (doprompt)
831 						putprompt(ps2val());
832 				} else {
833 					if (dblquote && c != '\\' && c != '`' && c != '$'
834 							 && (c != '"' || eofmark != NULL))
835 						USTPUTC('\\', out);
836 					if (SQSYNTAX[c] == CCTL)
837 						USTPUTC(CTLESC, out);
838 					USTPUTC(c, out);
839 					quotef++;
840 				}
841 				break;
842 			case CSQUOTE:
843 				syntax = SQSYNTAX;
844 				break;
845 			case CDQUOTE:
846 				syntax = DQSYNTAX;
847 				dblquote = 1;
848 				break;
849 			case CENDQUOTE:
850 				if (eofmark) {
851 					USTPUTC(c, out);
852 				} else {
853 					syntax = BASESYNTAX;
854 					quotef++;
855 					dblquote = 0;
856 				}
857 				break;
858 			case CVAR:	/* '$' */
859 				PARSESUB();		/* parse substitution */
860 				break;
861 			case CENDVAR:	/* '}' */
862 				if (varnest > 0) {
863 					varnest--;
864 					USTPUTC(CTLENDVAR, out);
865 				} else {
866 					USTPUTC(c, out);
867 				}
868 				break;
869 			case CBQUOTE:	/* '`' */
870 				PARSEBACKQOLD();
871 				break;
872 			case CEOF:
873 				goto endword;		/* exit outer loop */
874 			default:
875 				if (varnest == 0)
876 					goto endword;	/* exit outer loop */
877 				USTPUTC(c, out);
878 			}
879 			c = pgetc_macro();
880 		}
881 	}
882 endword:
883 	if (syntax != BASESYNTAX && ! parsebackquote && eofmark == NULL)
884 		synerror("Unterminated quoted string");
885 	if (varnest != 0) {
886 		startlinno = plinno;
887 		synerror("Missing '}'");
888 	}
889 	USTPUTC('\0', out);
890 	len = out - stackblock();
891 	out = stackblock();
892 	if (eofmark == NULL) {
893 		if ((c == '>' || c == '<')
894 		 && quotef == 0
895 		 && len <= 2
896 		 && (*out == '\0' || is_digit(*out))) {
897 			PARSEREDIR();
898 			return lasttoken = TREDIR;
899 		} else {
900 			pungetc();
901 		}
902 	}
903 	quoteflag = quotef;
904 	backquotelist = bqlist;
905 	grabstackblock(len);
906 	wordtext = out;
907 	return lasttoken = TWORD;
908 /* end of readtoken routine */
909 
910 
911 
912 /*
913  * Check to see whether we are at the end of the here document.  When this
914  * is called, c is set to the first character of the next input line.  If
915  * we are at the end of the here document, this routine sets the c to PEOF.
916  */
917 
918 checkend: {
919 	if (eofmark) {
920 		if (striptabs) {
921 			while (c == '\t')
922 				c = pgetc();
923 		}
924 		if (c == *eofmark) {
925 			if (pfgets(line, sizeof line) != NULL) {
926 				register char *p, *q;
927 
928 				p = line;
929 				for (q = eofmark + 1 ; *q && *p == *q ; p++, q++);
930 				if (*p == '\n' && *q == '\0') {
931 					c = PEOF;
932 					plinno++;
933 					needprompt = doprompt;
934 				} else {
935 					ppushback(line, strlen(line));
936 				}
937 			}
938 		}
939 	}
940 	goto checkend_return;
941 }
942 
943 
944 /*
945  * Parse a redirection operator.  The variable "out" points to a string
946  * specifying the fd to be redirected.  The variable "c" contains the
947  * first character of the redirection operator.
948  */
949 
950 parseredir: {
951 	char fd = *out;
952 	union node *np;
953 
954 	np = (union node *)stalloc(sizeof (struct nfile));
955 	if (c == '>') {
956 		np->nfile.fd = 1;
957 		c = pgetc();
958 		if (c == '>')
959 			np->type = NAPPEND;
960 		else if (c == '&')
961 			np->type = NTOFD;
962 		else {
963 			np->type = NTO;
964 			pungetc();
965 		}
966 	} else {	/* c == '<' */
967 		np->nfile.fd = 0;
968 		c = pgetc();
969 		if (c == '<') {
970 			if (sizeof (struct nfile) != sizeof (struct nhere)) {
971 				np = (union node *)stalloc(sizeof (struct nhere));
972 				np->nfile.fd = 0;
973 			}
974 			np->type = NHERE;
975 			heredoc = (struct heredoc *)stalloc(sizeof (struct heredoc));
976 			heredoc->here = np;
977 			if ((c = pgetc()) == '-') {
978 				heredoc->striptabs = 1;
979 			} else {
980 				heredoc->striptabs = 0;
981 				pungetc();
982 			}
983 		} else if (c == '&')
984 			np->type = NFROMFD;
985 		else {
986 			np->type = NFROM;
987 			pungetc();
988 		}
989 	}
990 	if (fd != '\0')
991 		np->nfile.fd = digit_val(fd);
992 	redirnode = np;
993 	goto parseredir_return;
994 }
995 
996 
997 /*
998  * Parse a substitution.  At this point, we have read the dollar sign
999  * and nothing else.
1000  */
1001 
1002 parsesub: {
1003 	int subtype;
1004 	int typeloc;
1005 	int flags;
1006 	char *p;
1007 #ifndef GDB_HACK
1008 	static const char types[] = "}-+?=";
1009 #endif
1010 
1011 	c = pgetc();
1012 	if (c != '(' && c != '{' && !is_name(c) && !is_special(c)) {
1013 		USTPUTC('$', out);
1014 		pungetc();
1015 	} else if (c == '(') {	/* $(command) */
1016 		PARSEBACKQNEW();
1017 	} else {
1018 		USTPUTC(CTLVAR, out);
1019 		typeloc = out - stackblock();
1020 		USTPUTC(VSNORMAL, out);
1021 		subtype = VSNORMAL;
1022 		if (c == '{') {
1023 			c = pgetc();
1024 			subtype = 0;
1025 		}
1026 		if (is_name(c)) {
1027 			do {
1028 				STPUTC(c, out);
1029 				c = pgetc();
1030 			} while (is_in_name(c));
1031 		} else {
1032 			if (! is_special(c))
1033 badsub:				synerror("Bad substitution");
1034 			USTPUTC(c, out);
1035 			c = pgetc();
1036 		}
1037 		STPUTC('=', out);
1038 		flags = 0;
1039 		if (subtype == 0) {
1040 			if (c == ':') {
1041 				flags = VSNUL;
1042 				c = pgetc();
1043 			}
1044 			p = strchr(types, c);
1045 			if (p == NULL)
1046 				goto badsub;
1047 			subtype = p - types + VSNORMAL;
1048 		} else {
1049 			pungetc();
1050 		}
1051 		if (dblquote)
1052 			flags |= VSQUOTE;
1053 		*(stackblock() + typeloc) = subtype | flags;
1054 		if (subtype != VSNORMAL)
1055 			varnest++;
1056 	}
1057 	goto parsesub_return;
1058 }
1059 
1060 
1061 /*
1062  * Called to parse command substitutions.  Newstyle is set if the command
1063  * is enclosed inside $(...); nlpp is a pointer to the head of the linked
1064  * list of commands (passed by reference), and savelen is the number of
1065  * characters on the top of the stack which must be preserved.
1066  */
1067 
1068 parsebackq: {
1069 	struct nodelist **nlpp;
1070 	int savepbq;
1071 	union node *n;
1072 	char *volatile str;
1073 	struct jmploc jmploc;
1074 	struct jmploc *volatile savehandler;
1075 	int savelen;
1076 
1077 	savepbq = parsebackquote;
1078 	if (setjmp(jmploc.loc)) {
1079 		if (str)
1080 			ckfree(str);
1081 		parsebackquote = 0;
1082 		handler = savehandler;
1083 		longjmp(handler->loc, 1);
1084 	}
1085 	INTOFF;
1086 	str = NULL;
1087 	savelen = out - stackblock();
1088 	if (savelen > 0) {
1089 		str = ckmalloc(savelen);
1090 		bcopy(stackblock(), str, savelen);
1091 	}
1092 	savehandler = handler;
1093 	handler = &jmploc;
1094 	INTON;
1095 	if (oldstyle) {
1096 		/* We must read until the closing backquote, giving special
1097 		   treatment to some slashes, and then push the string and
1098 		   reread it as input, interpreting it normally.  */
1099 		register char *out;
1100 		register c;
1101 		int savelen;
1102 		char *str;
1103 
1104 		STARTSTACKSTR(out);
1105 		while ((c = pgetc ()) != '`') {
1106 			if (c == '\\') {
1107 				c = pgetc ();
1108 				if (c != '\\' && c != '`' && c != '$'
1109 				    && (!dblquote || c != '"'))
1110 					STPUTC('\\', out);
1111 			}
1112 			STPUTC(c, out);
1113 		}
1114 		STPUTC('\0', out);
1115 		savelen = out - stackblock();
1116 		if (savelen > 0) {
1117 			str = ckmalloc(savelen);
1118 			bcopy(stackblock(), str, savelen);
1119 		}
1120 		setinputstring(str, 1);
1121 	}
1122 	nlpp = &bqlist;
1123 	while (*nlpp)
1124 		nlpp = &(*nlpp)->next;
1125 	*nlpp = (struct nodelist *)stalloc(sizeof (struct nodelist));
1126 	(*nlpp)->next = NULL;
1127 	parsebackquote = oldstyle;
1128 	n = list(0);
1129 	if (!oldstyle && (readtoken() != TRP))
1130 		synexpect(TRP);
1131 	(*nlpp)->n = n;
1132 	/* Start reading from old file again, and clear tokpushback since
1133 	   any pushed back token from the string is no longer relevant.  */
1134 	if (oldstyle) {
1135 		popfile();
1136 		tokpushback = 0;
1137 	}
1138 	while (stackblocksize() <= savelen)
1139 		growstackblock();
1140 	STARTSTACKSTR(out);
1141 	if (str) {
1142 		bcopy(str, out, savelen);
1143 		STADJUST(savelen, out);
1144 		INTOFF;
1145 		ckfree(str);
1146 		str = NULL;
1147 		INTON;
1148 	}
1149 	parsebackquote = savepbq;
1150 	handler = savehandler;
1151 	USTPUTC(CTLBACKQ + dblquote, out);
1152 	if (oldstyle)
1153 		goto parsebackq_oldreturn;
1154 	else
1155 		goto parsebackq_newreturn;
1156 }
1157 
1158 } /* end of readtoken */
1159 
1160 
1161 
1162 #ifdef mkinit
1163 RESET {
1164 	tokpushback = 0;
1165 }
1166 #endif
1167 
1168 
1169 #if ATTY
1170 /*
1171  * Called to process a command generated by atty.  We execute the line,
1172  * and catch any errors that occur so they don't propagate outside of
1173  * this routine.
1174  */
1175 
1176 STATIC void
1177 attyline() {
1178 	char line[256];
1179 	struct stackmark smark;
1180 	struct jmploc jmploc;
1181 	struct jmploc *volatile savehandler;
1182 
1183 	if (pfgets(line, sizeof line) == NULL)
1184 		return;				/* "can't happen" */
1185 	if (setjmp(jmploc.loc)) {
1186 		if (exception == EXERROR)
1187 			out2str("\033]D\n");
1188 		handler = savehandler;
1189 		longjmp(handler->loc, 1);
1190 	}
1191 	savehandler = handler;
1192 	handler = &jmploc;
1193 	setstackmark(&smark);
1194 	evalstring(line);
1195 	popstackmark(&smark);
1196 	handler = savehandler;
1197 	doprompt = 1;
1198 }
1199 
1200 
1201 /*
1202  * Output a prompt for atty.  We output the prompt as part of the
1203  * appropriate escape sequence.
1204  */
1205 
1206 STATIC void
1207 putprompt(s)
1208 	char *s;
1209 	{
1210 	register char *p;
1211 
1212 	if (attyset() && ! equal(termval(), "emacs")) {
1213 		if (strchr(s, '\7'))
1214 			out2c('\7');
1215 		out2str("\033]P1;");
1216 		for (p = s ; *p ; p++) {
1217 			if ((unsigned)(*p - ' ') <= '~' - ' ')
1218 				out2c(*p);
1219 		}
1220 		out2c('\n');
1221 	} else {
1222 		out2str(s);
1223 	}
1224 }
1225 #endif
1226 
1227 
1228 
1229 /*
1230  * Returns true if the text contains nothing to expand (no dollar signs
1231  * or backquotes).
1232  */
1233 
1234 STATIC int
1235 noexpand(text)
1236 	char *text;
1237 	{
1238 	register char *p;
1239 	register char c;
1240 
1241 	p = text;
1242 	while ((c = *p++) != '\0') {
1243 		if (c == CTLESC)
1244 			p++;
1245 		else if (BASESYNTAX[c] == CCTL)
1246 			return 0;
1247 	}
1248 	return 1;
1249 }
1250 
1251 
1252 /*
1253  * Return true if the argument is a legal variable name (a letter or
1254  * underscore followed by zero or more letters, underscores, and digits).
1255  */
1256 
1257 int
1258 goodname(name)
1259 	char *name;
1260 	{
1261 	register char *p;
1262 
1263 	p = name;
1264 	if (! is_name(*p))
1265 		return 0;
1266 	while (*++p) {
1267 		if (! is_in_name(*p))
1268 			return 0;
1269 	}
1270 	return 1;
1271 }
1272 
1273 
1274 /*
1275  * Called when an unexpected token is read during the parse.  The argument
1276  * is the token that is expected, or -1 if more than one type of token can
1277  * occur at this point.
1278  */
1279 
1280 STATIC void
1281 synexpect(token) {
1282 	char msg[64];
1283 
1284 	if (token >= 0) {
1285 		fmtstr(msg, 64, "%s unexpected (expecting %s)",
1286 			tokname[lasttoken], tokname[token]);
1287 	} else {
1288 		fmtstr(msg, 64, "%s unexpected", tokname[lasttoken]);
1289 	}
1290 	synerror(msg);
1291 }
1292 
1293 
1294 STATIC void
1295 synerror(msg)
1296 	char *msg;
1297 	{
1298 	if (commandname)
1299 		outfmt(&errout, "%s: %d: ", commandname, startlinno);
1300 	outfmt(&errout, "Syntax error: %s\n", msg);
1301 	error((char *)NULL);
1302 }
1303