xref: /netbsd-src/bin/sh/parser.c (revision b817d381342c63f879b8ba9ab0ac5f531badebe9)
1 /*	$NetBSD: parser.c,v 1.112 2016/03/27 14:40:20 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Kenneth Almquist.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/cdefs.h>
36 #ifndef lint
37 #if 0
38 static char sccsid[] = "@(#)parser.c	8.7 (Berkeley) 5/16/95";
39 #else
40 __RCSID("$NetBSD: parser.c,v 1.112 2016/03/27 14:40:20 christos Exp $");
41 #endif
42 #endif /* not lint */
43 
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <limits.h>
47 
48 #include "shell.h"
49 #include "parser.h"
50 #include "nodes.h"
51 #include "expand.h"	/* defines rmescapes() */
52 #include "eval.h"	/* defines commandname */
53 #include "redir.h"	/* defines copyfd() */
54 #include "syntax.h"
55 #include "options.h"
56 #include "input.h"
57 #include "output.h"
58 #include "var.h"
59 #include "error.h"
60 #include "memalloc.h"
61 #include "mystring.h"
62 #include "alias.h"
63 #include "show.h"
64 #ifndef SMALL
65 #include "myhistedit.h"
66 #endif
67 
68 /*
69  * Shell command parser.
70  */
71 
72 /* values returned by readtoken */
73 #include "token.h"
74 
75 #define OPENBRACE '{'
76 #define CLOSEBRACE '}'
77 
78 
79 struct heredoc {
80 	struct heredoc *next;	/* next here document in list */
81 	union node *here;		/* redirection node */
82 	char *eofmark;		/* string indicating end of input */
83 	int striptabs;		/* if set, strip leading tabs */
84 };
85 
86 
87 
88 static int noalias = 0;		/* when set, don't handle aliases */
89 struct heredoc *heredoclist;	/* list of here documents to read */
90 int parsebackquote;		/* nonzero if we are inside backquotes */
91 int doprompt;			/* if set, prompt the user */
92 int needprompt;			/* true if interactive and at start of line */
93 int lasttoken;			/* last token read */
94 MKINIT int tokpushback;		/* last token pushed back */
95 char *wordtext;			/* text of last word returned by readtoken */
96 MKINIT int checkkwd;		/* 1 == check for kwds, 2 == also eat newlines */
97 struct nodelist *backquotelist;
98 union node *redirnode;
99 struct heredoc *heredoc;
100 int quoteflag;			/* set if (part of) last token was quoted */
101 int startlinno;			/* line # where last token started */
102 int funclinno;			/* line # where the current function started */
103 
104 
105 STATIC union node *list(int, int);
106 STATIC union node *andor(void);
107 STATIC union node *pipeline(void);
108 STATIC union node *command(void);
109 STATIC union node *simplecmd(union node **, union node *);
110 STATIC union node *makename(void);
111 STATIC void parsefname(void);
112 STATIC void slurp_heredoc(char *const, int, int);
113 STATIC void readheredocs(void);
114 STATIC int peektoken(void);
115 STATIC int readtoken(void);
116 STATIC int xxreadtoken(void);
117 STATIC int readtoken1(int, char const *, int);
118 STATIC int noexpand(char *);
119 STATIC void synexpect(int, const char *) __dead;
120 STATIC void synerror(const char *) __dead;
121 STATIC void setprompt(int);
122 
123 
124 static const char EOFhere[] = "EOF reading here (<<) document";
125 
126 
127 /*
128  * Read and parse a command.  Returns NEOF on end of file.  (NULL is a
129  * valid parse tree indicating a blank line.)
130  */
131 
132 union node *
133 parsecmd(int interact)
134 {
135 	int t;
136 
137 	tokpushback = 0;
138 	doprompt = interact;
139 	if (doprompt)
140 		setprompt(1);
141 	else
142 		setprompt(0);
143 	needprompt = 0;
144 	t = readtoken();
145 	if (t == TEOF)
146 		return NEOF;
147 	if (t == TNL)
148 		return NULL;
149 	tokpushback++;
150 	return list(1, 0);
151 }
152 
153 
154 STATIC union node *
155 list(int nlflag, int erflag)
156 {
157 	union node *n1, *n2, *n3;
158 	int tok;
159 	TRACE(("list(%d,%d): entered\n", nlflag, erflag));
160 
161 	checkkwd = 2;
162 	if (nlflag == 0 && tokendlist[peektoken()])
163 		return NULL;
164 	n1 = NULL;
165 	for (;;) {
166 		n2 = andor();
167 		tok = readtoken();
168 		if (tok == TBACKGND) {
169 			if (n2->type == NCMD || n2->type == NPIPE) {
170 				n2->ncmd.backgnd = 1;
171 			} else if (n2->type == NREDIR) {
172 				n2->type = NBACKGND;
173 			} else {
174 				n3 = stalloc(sizeof(struct nredir));
175 				n3->type = NBACKGND;
176 				n3->nredir.n = n2;
177 				n3->nredir.redirect = NULL;
178 				n2 = n3;
179 			}
180 		}
181 		if (n1 == NULL) {
182 			n1 = n2;
183 		}
184 		else {
185 			n3 = stalloc(sizeof(struct nbinary));
186 			n3->type = NSEMI;
187 			n3->nbinary.ch1 = n1;
188 			n3->nbinary.ch2 = n2;
189 			n1 = n3;
190 		}
191 		switch (tok) {
192 		case TBACKGND:
193 		case TSEMI:
194 			tok = readtoken();
195 			/* FALLTHROUGH */
196 		case TNL:
197 			if (tok == TNL) {
198 				readheredocs();
199 				if (nlflag)
200 					return n1;
201 			} else {
202 				tokpushback++;
203 			}
204 			checkkwd = 2;
205 			if (tokendlist[peektoken()])
206 				return n1;
207 			break;
208 		case TEOF:
209 			if (heredoclist)
210 				readheredocs();
211 			else
212 				pungetc();	/* push back EOF on input */
213 			return n1;
214 		default:
215 			if (nlflag || erflag)
216 				synexpect(-1, 0);
217 			tokpushback++;
218 			return n1;
219 		}
220 	}
221 }
222 
223 STATIC union node *
224 andor(void)
225 {
226 	union node *n1, *n2, *n3;
227 	int t;
228 
229 	TRACE(("andor: entered\n"));
230 	n1 = pipeline();
231 	for (;;) {
232 		if ((t = readtoken()) == TAND) {
233 			t = NAND;
234 		} else if (t == TOR) {
235 			t = NOR;
236 		} else {
237 			tokpushback++;
238 			return n1;
239 		}
240 		n2 = pipeline();
241 		n3 = stalloc(sizeof(struct nbinary));
242 		n3->type = t;
243 		n3->nbinary.ch1 = n1;
244 		n3->nbinary.ch2 = n2;
245 		n1 = n3;
246 	}
247 }
248 
249 STATIC union node *
250 pipeline(void)
251 {
252 	union node *n1, *n2, *pipenode;
253 	struct nodelist *lp, *prev;
254 	int negate;
255 
256 	TRACE(("pipeline: entered\n"));
257 
258 	negate = 0;
259 	checkkwd = 2;
260 	while (readtoken() == TNOT) {
261 		TRACE(("pipeline: TNOT recognized\n"));
262 		negate = !negate;
263 	}
264 	tokpushback++;
265 	n1 = command();
266 	if (readtoken() == TPIPE) {
267 		pipenode = stalloc(sizeof(struct npipe));
268 		pipenode->type = NPIPE;
269 		pipenode->npipe.backgnd = 0;
270 		lp = stalloc(sizeof(struct nodelist));
271 		pipenode->npipe.cmdlist = lp;
272 		lp->n = n1;
273 		do {
274 			prev = lp;
275 			lp = stalloc(sizeof(struct nodelist));
276 			lp->n = command();
277 			prev->next = lp;
278 		} while (readtoken() == TPIPE);
279 		lp->next = NULL;
280 		n1 = pipenode;
281 	}
282 	tokpushback++;
283 	if (negate) {
284 		TRACE(("negate pipeline\n"));
285 		n2 = stalloc(sizeof(struct nnot));
286 		n2->type = NNOT;
287 		n2->nnot.com = n1;
288 		return n2;
289 	} else
290 		return n1;
291 }
292 
293 
294 
295 STATIC union node *
296 command(void)
297 {
298 	union node *n1, *n2;
299 	union node *ap, **app;
300 	union node *cp, **cpp;
301 	union node *redir, **rpp;
302 	int t, negate = 0;
303 
304 	TRACE(("command: entered\n"));
305 
306 	checkkwd = 2;
307 	redir = NULL;
308 	n1 = NULL;
309 	rpp = &redir;
310 
311 	/* Check for redirection which may precede command */
312 	while (readtoken() == TREDIR) {
313 		*rpp = n2 = redirnode;
314 		rpp = &n2->nfile.next;
315 		parsefname();
316 	}
317 	tokpushback++;
318 
319 	while (readtoken() == TNOT) {
320 		TRACE(("command: TNOT recognized\n"));
321 		negate = !negate;
322 	}
323 	tokpushback++;
324 
325 	switch (readtoken()) {
326 	case TIF:
327 		n1 = stalloc(sizeof(struct nif));
328 		n1->type = NIF;
329 		n1->nif.test = list(0, 0);
330 		if (readtoken() != TTHEN)
331 			synexpect(TTHEN, 0);
332 		n1->nif.ifpart = list(0, 0);
333 		n2 = n1;
334 		while (readtoken() == TELIF) {
335 			n2->nif.elsepart = stalloc(sizeof(struct nif));
336 			n2 = n2->nif.elsepart;
337 			n2->type = NIF;
338 			n2->nif.test = list(0, 0);
339 			if (readtoken() != TTHEN)
340 				synexpect(TTHEN, 0);
341 			n2->nif.ifpart = list(0, 0);
342 		}
343 		if (lasttoken == TELSE)
344 			n2->nif.elsepart = list(0, 0);
345 		else {
346 			n2->nif.elsepart = NULL;
347 			tokpushback++;
348 		}
349 		if (readtoken() != TFI)
350 			synexpect(TFI, 0);
351 		checkkwd = 1;
352 		break;
353 	case TWHILE:
354 	case TUNTIL: {
355 		int got;
356 		n1 = stalloc(sizeof(struct nbinary));
357 		n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL;
358 		n1->nbinary.ch1 = list(0, 0);
359 		if ((got=readtoken()) != TDO) {
360 TRACE(("expecting DO got %s %s\n", tokname[got], got == TWORD ? wordtext : ""));
361 			synexpect(TDO, 0);
362 		}
363 		n1->nbinary.ch2 = list(0, 0);
364 		if (readtoken() != TDONE)
365 			synexpect(TDONE, 0);
366 		checkkwd = 1;
367 		break;
368 	}
369 	case TFOR:
370 		if (readtoken() != TWORD || quoteflag || ! goodname(wordtext))
371 			synerror("Bad for loop variable");
372 		n1 = stalloc(sizeof(struct nfor));
373 		n1->type = NFOR;
374 		n1->nfor.var = wordtext;
375 		if (readtoken() == TWORD && ! quoteflag && equal(wordtext, "in")) {
376 			app = &ap;
377 			while (readtoken() == TWORD) {
378 				n2 = stalloc(sizeof(struct narg));
379 				n2->type = NARG;
380 				n2->narg.text = wordtext;
381 				n2->narg.backquote = backquotelist;
382 				*app = n2;
383 				app = &n2->narg.next;
384 			}
385 			*app = NULL;
386 			n1->nfor.args = ap;
387 			if (lasttoken != TNL && lasttoken != TSEMI)
388 				synexpect(-1, 0);
389 		} else {
390 			static char argvars[5] = {
391 			    CTLVAR, VSNORMAL|VSQUOTE, '@', '=', '\0'
392 			};
393 			n2 = stalloc(sizeof(struct narg));
394 			n2->type = NARG;
395 			n2->narg.text = argvars;
396 			n2->narg.backquote = NULL;
397 			n2->narg.next = NULL;
398 			n1->nfor.args = n2;
399 			/*
400 			 * Newline or semicolon here is optional (but note
401 			 * that the original Bourne shell only allowed NL).
402 			 */
403 			if (lasttoken != TNL && lasttoken != TSEMI)
404 				tokpushback++;
405 		}
406 		checkkwd = 2;
407 		if ((t = readtoken()) == TDO)
408 			t = TDONE;
409 		else if (t == TBEGIN)
410 			t = TEND;
411 		else
412 			synexpect(-1, 0);
413 		n1->nfor.body = list(0, 0);
414 		if (readtoken() != t)
415 			synexpect(t, 0);
416 		checkkwd = 1;
417 		break;
418 	case TCASE:
419 		n1 = stalloc(sizeof(struct ncase));
420 		n1->type = NCASE;
421 		if (readtoken() != TWORD)
422 			synexpect(TWORD, 0);
423 		n1->ncase.expr = n2 = stalloc(sizeof(struct narg));
424 		n2->type = NARG;
425 		n2->narg.text = wordtext;
426 		n2->narg.backquote = backquotelist;
427 		n2->narg.next = NULL;
428 		while (readtoken() == TNL);
429 		if (lasttoken != TWORD || ! equal(wordtext, "in"))
430 			synexpect(-1, "in");
431 		cpp = &n1->ncase.cases;
432 		noalias = 1;
433 		checkkwd = 2, readtoken();
434 		/*
435 		 * Both ksh and bash accept 'case x in esac'
436 		 * so configure scripts started taking advantage of this.
437 		 * The page: http://pubs.opengroup.org/onlinepubs/\
438 		 * 009695399/utilities/xcu_chap02.html contradicts itself,
439 		 * as to if this is legal; the "Case Conditional Format"
440 		 * paragraph shows one case is required, but the "Grammar"
441 		 * section shows a grammar that explicitly allows the no
442 		 * case option.
443 		 */
444 		while (lasttoken != TESAC) {
445 			*cpp = cp = stalloc(sizeof(struct nclist));
446 			if (lasttoken == TLP)
447 				readtoken();
448 			cp->type = NCLIST;
449 			app = &cp->nclist.pattern;
450 			for (;;) {
451 				*app = ap = stalloc(sizeof(struct narg));
452 				ap->type = NARG;
453 				ap->narg.text = wordtext;
454 				ap->narg.backquote = backquotelist;
455 				if (checkkwd = 2, readtoken() != TPIPE)
456 					break;
457 				app = &ap->narg.next;
458 				readtoken();
459 			}
460 			ap->narg.next = NULL;
461 			noalias = 0;
462 			if (lasttoken != TRP) {
463 				synexpect(TRP, 0);
464 			}
465 			cp->nclist.body = list(0, 0);
466 
467 			checkkwd = 2;
468 			if ((t = readtoken()) != TESAC) {
469 				if (t != TENDCASE) {
470 					noalias = 0;
471 					synexpect(TENDCASE, 0);
472 				} else {
473 					noalias = 1;
474 					checkkwd = 2;
475 					readtoken();
476 				}
477 			}
478 			cpp = &cp->nclist.next;
479 		}
480 		noalias = 0;
481 		*cpp = NULL;
482 		checkkwd = 1;
483 		break;
484 	case TLP:
485 		n1 = stalloc(sizeof(struct nredir));
486 		n1->type = NSUBSHELL;
487 		n1->nredir.n = list(0, 0);
488 		n1->nredir.redirect = NULL;
489 		if (readtoken() != TRP)
490 			synexpect(TRP, 0);
491 		checkkwd = 1;
492 		break;
493 	case TBEGIN:
494 		n1 = list(0, 0);
495 		if (readtoken() != TEND)
496 			synexpect(TEND, 0);
497 		checkkwd = 1;
498 		break;
499 	/* Handle an empty command like other simple commands.  */
500 	case TSEMI:
501 		/*
502 		 * An empty command before a ; doesn't make much sense, and
503 		 * should certainly be disallowed in the case of `if ;'.
504 		 */
505 		if (!redir)
506 			synexpect(-1, 0);
507 	case TAND:
508 	case TOR:
509 	case TNL:
510 	case TEOF:
511 	case TWORD:
512 	case TRP:
513 		tokpushback++;
514 		n1 = simplecmd(rpp, redir);
515 		goto checkneg;
516 	case TENDCASE:
517 		if (redir) {
518 			tokpushback++;
519 			goto checkneg;
520 		}
521 		/* FALLTHROUGH */
522 	default:
523 		synexpect(-1, 0);
524 		/* NOTREACHED */
525 	}
526 
527 	/* Now check for redirection which may follow command */
528 	while (readtoken() == TREDIR) {
529 		*rpp = n2 = redirnode;
530 		rpp = &n2->nfile.next;
531 		parsefname();
532 	}
533 	tokpushback++;
534 	*rpp = NULL;
535 	if (redir) {
536 		if (n1->type != NSUBSHELL) {
537 			n2 = stalloc(sizeof(struct nredir));
538 			n2->type = NREDIR;
539 			n2->nredir.n = n1;
540 			n1 = n2;
541 		}
542 		n1->nredir.redirect = redir;
543 	}
544 
545 checkneg:
546 	if (negate) {
547 		TRACE(("negate command\n"));
548 		n2 = stalloc(sizeof(struct nnot));
549 		n2->type = NNOT;
550 		n2->nnot.com = n1;
551 		return n2;
552 	}
553 	else
554 		return n1;
555 }
556 
557 
558 STATIC union node *
559 simplecmd(union node **rpp, union node *redir)
560 {
561 	union node *args, **app;
562 	union node *n = NULL, *n2;
563 	int negate = 0;
564 
565 	/* If we don't have any redirections already, then we must reset */
566 	/* rpp to be the address of the local redir variable.  */
567 	if (redir == 0)
568 		rpp = &redir;
569 
570 	args = NULL;
571 	app = &args;
572 
573 	while (readtoken() == TNOT) {
574 		TRACE(("simplcmd: TNOT recognized\n"));
575 		negate = !negate;
576 	}
577 	tokpushback++;
578 
579 	for (;;) {
580 		if (readtoken() == TWORD) {
581 			n = stalloc(sizeof(struct narg));
582 			n->type = NARG;
583 			n->narg.text = wordtext;
584 			n->narg.backquote = backquotelist;
585 			*app = n;
586 			app = &n->narg.next;
587 		} else if (lasttoken == TREDIR) {
588 			*rpp = n = redirnode;
589 			rpp = &n->nfile.next;
590 			parsefname();	/* read name of redirection file */
591 		} else if (lasttoken == TLP && app == &args->narg.next
592 					    && redir == 0) {
593 			/* We have a function */
594 			if (readtoken() != TRP)
595 				synexpect(TRP, 0);
596 			funclinno = plinno;
597 			rmescapes(n->narg.text);
598 			if (!goodname(n->narg.text))
599 				synerror("Bad function name");
600 			n->type = NDEFUN;
601 			n->narg.next = command();
602 			funclinno = 0;
603 			goto checkneg;
604 		} else {
605 			tokpushback++;
606 			break;
607 		}
608 	}
609 	*app = NULL;
610 	*rpp = NULL;
611 	n = stalloc(sizeof(struct ncmd));
612 	n->type = NCMD;
613 	n->ncmd.backgnd = 0;
614 	n->ncmd.args = args;
615 	n->ncmd.redirect = redir;
616 
617 checkneg:
618 	if (negate) {
619 		TRACE(("negate simplecmd\n"));
620 		n2 = stalloc(sizeof(struct nnot));
621 		n2->type = NNOT;
622 		n2->nnot.com = n;
623 		return n2;
624 	}
625 	else
626 		return n;
627 }
628 
629 STATIC union node *
630 makename(void)
631 {
632 	union node *n;
633 
634 	n = stalloc(sizeof(struct narg));
635 	n->type = NARG;
636 	n->narg.next = NULL;
637 	n->narg.text = wordtext;
638 	n->narg.backquote = backquotelist;
639 	return n;
640 }
641 
642 void
643 fixredir(union node *n, const char *text, int err)
644 {
645 	TRACE(("Fix redir %s %d\n", text, err));
646 	if (!err)
647 		n->ndup.vname = NULL;
648 
649 	if (is_number(text))
650 		n->ndup.dupfd = number(text);
651 	else if (text[0] == '-' && text[1] == '\0')
652 		n->ndup.dupfd = -1;
653 	else {
654 
655 		if (err)
656 			synerror("Bad fd number");
657 		else
658 			n->ndup.vname = makename();
659 	}
660 }
661 
662 
663 STATIC void
664 parsefname(void)
665 {
666 	union node *n = redirnode;
667 
668 	if (readtoken() != TWORD)
669 		synexpect(-1, 0);
670 	if (n->type == NHERE) {
671 		struct heredoc *here = heredoc;
672 		struct heredoc *p;
673 
674 		if (quoteflag == 0)
675 			n->type = NXHERE;
676 		TRACE(("Here document %d\n", n->type));
677 		if (here->striptabs) {
678 			while (*wordtext == '\t')
679 				wordtext++;
680 		}
681 
682 		/*
683 		 * this test is not really necessary, we are not
684 		 * required to expand wordtext, but there's no reason
685 		 * it cannot be $$ or something like that - that would
686 		 * not mean the pid, but literally two '$' characters.
687 		 * There is no need for limits on what the word can be.
688 		 * However, it needs to stay literal as entered, not
689 		 * have $ converted to CTLVAR or something, which as
690 		 * the parser is, at the minute, is impossible to prevent.
691 		 * So, leave it like this until the rest of the parser is fixed.
692 		 */
693 		if (! noexpand(wordtext))
694 			synerror("Illegal eof marker for << redirection");
695 
696 		rmescapes(wordtext);
697 		here->eofmark = wordtext;
698 		here->next = NULL;
699 		if (heredoclist == NULL)
700 			heredoclist = here;
701 		else {
702 			for (p = heredoclist ; p->next ; p = p->next)
703 				continue;
704 			p->next = here;
705 		}
706 	} else if (n->type == NTOFD || n->type == NFROMFD) {
707 		fixredir(n, wordtext, 0);
708 	} else {
709 		n->nfile.fname = makename();
710 	}
711 }
712 
713 /*
714  * Check to see whether we are at the end of the here document.  When this
715  * is called, c is set to the first character of the next input line.  If
716  * we are at the end of the here document, this routine sets the c to PEOF.
717  * The new value of c is returned.
718  */
719 
720 static int
721 checkend(int c, char * const eofmark, const int striptabs)
722 {
723 	if (striptabs) {
724 		while (c == '\t')
725 			c = pgetc();
726 	}
727 	if (c == PEOF) {
728 		if (*eofmark == '\0')
729 			return (c);
730 		synerror(EOFhere);
731 	}
732 	if (c == *eofmark) {
733 		int c2;
734 		char *q;
735 
736 		for (q = eofmark + 1; c2 = pgetc(), *q != '\0' && c2 == *q; q++)
737 			;
738 		if ((c2 == PEOF || c2 == '\n') && *q == '\0') {
739 			c = PEOF;
740 			if (c2 == '\n') {
741 				plinno++;
742 				needprompt = doprompt;
743 			}
744 		} else {
745 			pungetc();
746 			pushstring(eofmark + 1, q - (eofmark + 1), NULL);
747 		}
748 	} else if (c == '\n' && *eofmark == '\0') {
749 		c = PEOF;
750 		plinno++;
751 		needprompt = doprompt;
752 	}
753 	return (c);
754 }
755 
756 
757 /*
758  * Input any here documents.
759  */
760 
761 STATIC void
762 slurp_heredoc(char *const eofmark, int striptabs, int sq)
763 {
764 	int c;
765 	char *out;
766 
767 	c = pgetc();
768 
769 	/*
770 	 * If we hit EOF on the input, and the eofmark is a null string ('')
771 	 * we consider this empty line to be the eofmark, and exit without err.
772 	 */
773 	if (c == PEOF && *eofmark != '\0')
774 		synerror(EOFhere);
775 
776 	STARTSTACKSTR(out);
777 
778 	while ((c = checkend(c, eofmark, striptabs)) != PEOF) {
779 		do {
780 			if (sq) {
781 				/*
782 				 * in single quoted mode (eofmark quoted)
783 				 * all we look for is \n so we can check
784 				 * for the epfmark - everything saved literally.
785 				 */
786 				STPUTC(c, out);
787 				if (c == '\n')
788 					break;
789 				continue;
790 			}
791 			/*
792 			 * In double quoted (non-quoted eofmark)
793 			 * we must handle \ followed by \n here
794 			 * otherwise we can mismatch the end mark.
795 			 * All other uses of \ will be handled later
796 			 * when the here doc is expanded.
797 			 *
798 			 * This also makes sure \\ followed by \n does
799 			 * not suppress the newline (the \ quotes itself)
800 			 */
801 			if (c == '\\') {		/* A backslash */
802 				c = pgetc();		/* followed by */
803 				if (c == '\n')		/* a newline?  */
804 					continue;	/* y:drop both */
805 				STPUTC('\\', out);	/* else keep \ */
806 			}
807 			STPUTC(c, out);			/* keep the char */
808 			if (c == '\n')			/* at end of line */
809 				break;			/* look for eofmark */
810 
811 		} while ((c = pgetc()) != PEOF);
812 
813 		/*
814 		 * If we have read a line, and reached EOF, without
815 		 * finding the eofmark, whether the EOF comes before
816 		 * or immediately after the \n, that is an error.
817 		 */
818 		if (c == PEOF || (c = pgetc()) == PEOF)
819 			synerror(EOFhere);
820 	}
821 	STPUTC('\0', out);
822 
823 	c = out - stackblock();
824 	out = stackblock();
825 	grabstackblock(c);
826 	wordtext = out;
827 
828 	TRACE(("Slurped a heredoc (to '%s')%s: len %d, \"%.16s\"...\n",
829 		eofmark, striptabs ? " tab stripped" : "", c, wordtext));
830 }
831 
832 STATIC void
833 readheredocs(void)
834 {
835 	struct heredoc *here;
836 	union node *n;
837 
838 	while (heredoclist) {
839 		here = heredoclist;
840 		heredoclist = here->next;
841 		if (needprompt) {
842 			setprompt(2);
843 			needprompt = 0;
844 		}
845 
846 		slurp_heredoc(here->eofmark, here->striptabs,
847 		    here->here->nhere.type == NHERE);
848 
849 		n = stalloc(sizeof(struct narg));
850 		n->narg.type = NARG;
851 		n->narg.next = NULL;
852 		n->narg.text = wordtext;
853 		n->narg.backquote = backquotelist;
854 		here->here->nhere.doc = n;
855 	}
856 }
857 
858 void
859 parse_heredoc(union node *n)
860 {
861 	if (n->narg.type != NARG)
862 		abort();
863 
864 	if (n->narg.text[0] == '\0')		/* nothing to do */
865 		return;
866 
867 	setinputstring(n->narg.text, 1);
868 
869 	readtoken1(pgetc(), DQSYNTAX, 1);
870 
871 	n->narg.text = wordtext;
872 	n->narg.backquote = backquotelist;
873 
874 	popfile();
875 }
876 
877 STATIC int
878 peektoken(void)
879 {
880 	int t;
881 
882 	t = readtoken();
883 	tokpushback++;
884 	return (t);
885 }
886 
887 STATIC int
888 readtoken(void)
889 {
890 	int t;
891 	int savecheckkwd = checkkwd;
892 #ifdef DEBUG
893 	int alreadyseen = tokpushback;
894 #endif
895 	struct alias *ap;
896 
897 	top:
898 	t = xxreadtoken();
899 
900 	if (checkkwd) {
901 		/*
902 		 * eat newlines
903 		 */
904 		if (checkkwd == 2) {
905 			checkkwd = 0;
906 			while (t == TNL) {
907 				readheredocs();
908 				t = xxreadtoken();
909 			}
910 		} else
911 			checkkwd = 0;
912 		/*
913 		 * check for keywords and aliases
914 		 */
915 		if (t == TWORD && !quoteflag) {
916 			const char *const *pp;
917 
918 			for (pp = parsekwd; *pp; pp++) {
919 				if (**pp == *wordtext && equal(*pp, wordtext)) {
920 					lasttoken = t = pp -
921 					    parsekwd + KWDOFFSET;
922 					TRACE(("keyword %s recognized\n", tokname[t]));
923 					goto out;
924 				}
925 			}
926 			if (!noalias &&
927 			    (ap = lookupalias(wordtext, 1)) != NULL) {
928 				pushstring(ap->val, strlen(ap->val), ap);
929 				checkkwd = savecheckkwd;
930 				goto top;
931 			}
932 		}
933 out:
934 		checkkwd = (t == TNOT) ? savecheckkwd : 0;
935 	}
936 	TRACE(("%stoken %s %s\n", alreadyseen ? "reread " : "", tokname[t], t == TWORD ? wordtext : ""));
937 	return (t);
938 }
939 
940 
941 /*
942  * Read the next input token.
943  * If the token is a word, we set backquotelist to the list of cmds in
944  *	backquotes.  We set quoteflag to true if any part of the word was
945  *	quoted.
946  * If the token is TREDIR, then we set redirnode to a structure containing
947  *	the redirection.
948  * In all cases, the variable startlinno is set to the number of the line
949  *	on which the token starts.
950  *
951  * [Change comment:  here documents and internal procedures]
952  * [Readtoken shouldn't have any arguments.  Perhaps we should make the
953  *  word parsing code into a separate routine.  In this case, readtoken
954  *  doesn't need to have any internal procedures, but parseword does.
955  *  We could also make parseoperator in essence the main routine, and
956  *  have parseword (readtoken1?) handle both words and redirection.]
957  */
958 
959 #define RETURN(token)	return lasttoken = token
960 
961 STATIC int
962 xxreadtoken(void)
963 {
964 	int c;
965 
966 	if (tokpushback) {
967 		tokpushback = 0;
968 		return lasttoken;
969 	}
970 	if (needprompt) {
971 		setprompt(2);
972 		needprompt = 0;
973 	}
974 	startlinno = plinno;
975 	for (;;) {	/* until token or start of word found */
976 		c = pgetc_macro();
977 		switch (c) {
978 		case ' ': case '\t':
979 			continue;
980 		case '#':
981 			while ((c = pgetc()) != '\n' && c != PEOF)
982 				continue;
983 			pungetc();
984 			continue;
985 
986 		case '\n':
987 			plinno++;
988 			needprompt = doprompt;
989 			RETURN(TNL);
990 		case PEOF:
991 			RETURN(TEOF);
992 
993 		case '&':
994 			if (pgetc() == '&')
995 				RETURN(TAND);
996 			pungetc();
997 			RETURN(TBACKGND);
998 		case '|':
999 			if (pgetc() == '|')
1000 				RETURN(TOR);
1001 			pungetc();
1002 			RETURN(TPIPE);
1003 		case ';':
1004 			if (pgetc() == ';')
1005 				RETURN(TENDCASE);
1006 			pungetc();
1007 			RETURN(TSEMI);
1008 		case '(':
1009 			RETURN(TLP);
1010 		case ')':
1011 			RETURN(TRP);
1012 
1013 		case '\\':
1014 			switch (pgetc()) {
1015 			case '\n':
1016 				startlinno = ++plinno;
1017 				if (doprompt)
1018 					setprompt(2);
1019 				else
1020 					setprompt(0);
1021 				continue;
1022 			case PEOF:
1023 				RETURN(TEOF);
1024 			default:
1025 				pungetc();
1026 				break;
1027 			}
1028 			/* FALLTHROUGH */
1029 		default:
1030 			return readtoken1(c, BASESYNTAX, 0);
1031 		}
1032 	}
1033 #undef RETURN
1034 }
1035 
1036 
1037 
1038 /*
1039  * If eofmark is NULL, read a word or a redirection symbol.  If eofmark
1040  * is not NULL, read a here document.  In the latter case, eofmark is the
1041  * word which marks the end of the document and striptabs is true if
1042  * leading tabs should be stripped from the document.  The argument firstc
1043  * is the first character of the input token or document.
1044  *
1045  * Because C does not have internal subroutines, I have simulated them
1046  * using goto's to implement the subroutine linkage.  The following macros
1047  * will run code that appears at the end of readtoken1.
1048  */
1049 
1050 /*
1051  * We used to remember only the current syntax, variable nesting level,
1052  * double quote state for each var nesting level, and arith nesting
1053  * level (unrelated to var nesting) and one prev syntax when in arith
1054  * syntax.  This worked for simple cases, but can't handle arith inside
1055  * var expansion inside arith inside var with some quoted and some not.
1056  *
1057  * Inspired by FreeBSD's implementation (though it was the obvious way)
1058  * though implemented differently, we now have a stack that keeps track
1059  * of what we are doing now, and what we were doing previously.
1060  * Every time something changes, which will eventually end and should
1061  * revert to the previous state, we push this stack, and then pop it
1062  * again later (that is every ${} with an operator (to parse the word
1063  * or pattern that follows) ${x} and $x are too simple to need it)
1064  * $(( )) $( ) and "...".   Always.   Really, always!
1065  *
1066  * The stack is implemented as one static (on the C stack) base block
1067  * containing LEVELS_PER_BLOCK (8) stack entries, which should be
1068  * enough for the vast majority of cases.  For torture tests, we
1069  * malloc more blocks as needed.  All accesses through the inline
1070  * functions below.
1071  */
1072 
1073 /*
1074  * varnest & arinest will typically be 0 or 1
1075  * (varnest can increment in usages like ${x=${y}} but probably
1076  *  does not really need to)
1077  * parenlevel allows balancing parens inside a $(( )), it is reset
1078  * at each new nesting level ( $(( ( x + 3 ${unset-)} )) does not work.
1079  * quoted is special - we need to know 2 things ... are we inside "..."
1080  * (even if inherited from some previous nesting level) and was there
1081  * an opening '"' at this level (so the next will be closing).
1082  * "..." can span nesting levels, but cannot be opened in one and
1083  * closed in a different one.
1084  * To handle this, "quoted" has two fields, the bottom 4 (really 2)
1085  * bits are 0, 1, or 2, for un, single, and double quoted (single quoted
1086  * is really so special that this setting is not very important)
1087  * and 0x10 that indicates that an opening quote has been seen.
1088  * The bottom 4 bits are inherited, the 0x10 bit is not.
1089  */
1090 struct tokenstate {
1091 	const char *ts_syntax;
1092 	unsigned short ts_parenlevel;	/* counters */
1093 	unsigned short ts_varnest;	/* 64000 levels should be enough! */
1094 	unsigned short ts_arinest;
1095 	unsigned short ts_quoted;	/* 1 -> single, 2 -> double */
1096 };
1097 
1098 #define	NQ	0x00	/* Unquoted */
1099 #define	SQ	0x01	/* Single Quotes */
1100 #define	DQ	0x02	/* Double Quotes (or equivalent) */
1101 #define	QF	0x0F		/* Mask to extract previous values */
1102 #define	QS	0x10	/* Quoting started at this level in stack */
1103 
1104 #define	LEVELS_PER_BLOCK	8
1105 #define	VSS			struct statestack
1106 
1107 struct statestack {
1108 	VSS *prev;		/* previous block in list */
1109 	int cur;		/* which of our tokenstates is current */
1110 	struct tokenstate tokenstate[LEVELS_PER_BLOCK];
1111 };
1112 
1113 static inline struct tokenstate *
1114 currentstate(VSS *stack)
1115 {
1116 	return &stack->tokenstate[stack->cur];
1117 }
1118 
1119 static inline struct tokenstate *
1120 prevstate(VSS *stack)
1121 {
1122 	if (stack->cur != 0)
1123 		return &stack->tokenstate[stack->cur - 1];
1124 	if (stack->prev == NULL)	/* cannot drop below base */
1125 		return &stack->tokenstate[0];
1126 	return &stack->prev->tokenstate[LEVELS_PER_BLOCK - 1];
1127 }
1128 
1129 static inline VSS *
1130 bump_state_level(VSS *stack)
1131 {
1132 	struct tokenstate *os, *ts;
1133 
1134 	os = currentstate(stack);
1135 
1136 	if (++stack->cur >= LEVELS_PER_BLOCK) {
1137 		VSS *ss;
1138 
1139 		ss = (VSS *)ckmalloc(sizeof (struct statestack));
1140 		ss->cur = 0;
1141 		ss->prev = stack;
1142 		stack = ss;
1143 	}
1144 
1145 	ts = currentstate(stack);
1146 
1147 	ts->ts_parenlevel = 0;	/* parens inside never match outside */
1148 
1149 	ts->ts_quoted  = os->ts_quoted & QF;	/* these are default settings */
1150 	ts->ts_varnest = os->ts_varnest;
1151 	ts->ts_arinest = os->ts_arinest;	/* when appropriate	   */
1152 	ts->ts_syntax  = os->ts_syntax;		/*    they will be altered */
1153 
1154 	return stack;
1155 }
1156 
1157 static inline VSS *
1158 drop_state_level(VSS *stack)
1159 {
1160 	if (stack->cur == 0) {
1161 		VSS *ss;
1162 
1163 		ss = stack;
1164 		stack = ss->prev;
1165 		if (stack == NULL)
1166 			return ss;
1167 		ckfree(ss);
1168 	}
1169 	--stack->cur;
1170 	return stack;
1171 }
1172 
1173 static inline void
1174 cleanup_state_stack(VSS *stack)
1175 {
1176 	while (stack->prev != NULL) {
1177 		stack->cur = 0;
1178 		stack = drop_state_level(stack);
1179 	}
1180 }
1181 
1182 #define	PARSESUB()	{goto parsesub; parsesub_return:;}
1183 #define	PARSEARITH()	{goto parsearith; parsearith_return:;}
1184 
1185 /*
1186  * The following macros all assume the existance of a local var "stack"
1187  * which contains a pointer to the current struct stackstate
1188  */
1189 
1190 /*
1191  * These are macros rather than inline funcs to avoid code churn as much
1192  * as possible - they replace macros of the same name used previously.
1193  */
1194 #define	ISDBLQUOTE()	(currentstate(stack)->ts_quoted & QS)
1195 #define	SETDBLQUOTE()	(currentstate(stack)->ts_quoted = QS | DQ)
1196 #define	CLRDBLQUOTE()	(currentstate(stack)->ts_quoted =		\
1197 			    stack->cur != 0 || stack->prev ?		\
1198 				prevstate(stack)->ts_quoted & QF : 0)
1199 
1200 /*
1201  * This set are just to avoid excess typing and line lengths...
1202  * The ones that "look like" var names must be implemented to be lvalues
1203  */
1204 #define	syntax		(currentstate(stack)->ts_syntax)
1205 #define	parenlevel	(currentstate(stack)->ts_parenlevel)
1206 #define	varnest		(currentstate(stack)->ts_varnest)
1207 #define	arinest		(currentstate(stack)->ts_arinest)
1208 #define	quoted		(currentstate(stack)->ts_quoted)
1209 #define	TS_PUSH()	(stack = bump_state_level(stack))
1210 #define	TS_POP()	(stack = drop_state_level(stack))
1211 
1212 /*
1213  * Called to parse command substitutions.  oldstyle is true if the command
1214  * is enclosed inside `` (otherwise it was enclosed in "$( )")
1215  *
1216  * Internally nlpp is a pointer to the head of the linked
1217  * list of commands (passed by reference), and savelen is the number of
1218  * characters on the top of the stack which must be preserved.
1219  */
1220 static char *
1221 parsebackq(VSS *const stack, char * const in,
1222     struct nodelist **const pbqlist, const int oldstyle)
1223 {
1224 	struct nodelist **nlpp;
1225 	const int savepbq = parsebackquote;
1226 	union node *n;
1227 	char *out;
1228 	char *str = NULL;
1229 	char *volatile sstr = str;
1230 	struct jmploc jmploc;
1231 	struct jmploc *const savehandler = handler;
1232 	const int savelen = in - stackblock();
1233 	int saveprompt;
1234 
1235 	if (setjmp(jmploc.loc)) {
1236 		if (sstr)
1237 			ckfree(__UNVOLATILE(sstr));
1238 		cleanup_state_stack(stack);
1239 		parsebackquote = 0;
1240 		handler = savehandler;
1241 		longjmp(handler->loc, 1);
1242 	}
1243 	INTOFF;
1244 	sstr = str = NULL;
1245 	if (savelen > 0) {
1246 		sstr = str = ckmalloc(savelen);
1247 		memcpy(str, stackblock(), savelen);
1248 	}
1249 	handler = &jmploc;
1250 	INTON;
1251         if (oldstyle) {
1252                 /* We must read until the closing backquote, giving special
1253                    treatment to some slashes, and then push the string and
1254                    reread it as input, interpreting it normally.  */
1255                 int pc;
1256                 int psavelen;
1257                 char *pstr;
1258 
1259 		/*
1260 		 * Because the entire `...` is read here, we don't
1261 		 * need to bother the state stack.  That will be used
1262 		 * (as appropriate) when the processed string is re-read.
1263 		 */
1264                 STARTSTACKSTR(out);
1265 		for (;;) {
1266 			if (needprompt) {
1267 				setprompt(2);
1268 				needprompt = 0;
1269 			}
1270 			switch (pc = pgetc()) {
1271 			case '`':
1272 				goto done;
1273 
1274 			case '\\':
1275                                 if ((pc = pgetc()) == '\n') {
1276 					plinno++;
1277 					if (doprompt)
1278 						setprompt(2);
1279 					else
1280 						setprompt(0);
1281 					/*
1282 					 * If eating a newline, avoid putting
1283 					 * the newline into the new character
1284 					 * stream (via the STPUTC after the
1285 					 * switch).
1286 					 */
1287 					continue;
1288 				}
1289                                 if (pc != '\\' && pc != '`' && pc != '$'
1290                                     && (!ISDBLQUOTE() || pc != '"'))
1291                                         STPUTC('\\', out);
1292 				break;
1293 
1294 			case '\n':
1295 				plinno++;
1296 				needprompt = doprompt;
1297 				break;
1298 
1299 			case PEOF:
1300 			        startlinno = plinno;
1301 				synerror("EOF in backquote substitution");
1302  				break;
1303 
1304 			default:
1305 				break;
1306 			}
1307 			STPUTC(pc, out);
1308                 }
1309 done:
1310                 STPUTC('\0', out);
1311                 psavelen = out - stackblock();
1312                 if (psavelen > 0) {
1313 			pstr = grabstackstr(out);
1314 			setinputstring(pstr, 1);
1315                 }
1316         }
1317 	nlpp = pbqlist;
1318 	while (*nlpp)
1319 		nlpp = &(*nlpp)->next;
1320 	*nlpp = stalloc(sizeof(struct nodelist));
1321 	(*nlpp)->next = NULL;
1322 	parsebackquote = oldstyle;
1323 
1324 	if (oldstyle) {
1325 		saveprompt = doprompt;
1326 		doprompt = 0;
1327 	} else
1328 		saveprompt = 0;
1329 
1330 	n = list(0, oldstyle);
1331 
1332 	if (oldstyle)
1333 		doprompt = saveprompt;
1334 	else {
1335 		if (readtoken() != TRP) {
1336 			cleanup_state_stack(stack);
1337 			synexpect(TRP, 0);
1338 		}
1339 	}
1340 
1341 	(*nlpp)->n = n;
1342         if (oldstyle) {
1343 		/*
1344 		 * Start reading from old file again, ignoring any pushed back
1345 		 * tokens left from the backquote parsing
1346 		 */
1347                 popfile();
1348 		tokpushback = 0;
1349 	}
1350 
1351 	while (stackblocksize() <= savelen)
1352 		growstackblock();
1353 	STARTSTACKSTR(out);
1354 	if (str) {
1355 		memcpy(out, str, savelen);
1356 		STADJUST(savelen, out);
1357 		INTOFF;
1358 		ckfree(str);
1359 		sstr = str = NULL;
1360 		INTON;
1361 	}
1362 	parsebackquote = savepbq;
1363 	handler = savehandler;
1364 	if (arinest || ISDBLQUOTE())
1365 		USTPUTC(CTLBACKQ | CTLQUOTE, out);
1366 	else
1367 		USTPUTC(CTLBACKQ, out);
1368 
1369 	return out;
1370 }
1371 
1372 /*
1373  * Parse a redirection operator.  The parameter "out" points to a string
1374  * specifying the fd to be redirected.  It is guaranteed to be either ""
1375  * or a numeric string (for now anyway).  The parameter "c" contains the
1376  * first character of the redirection operator.
1377  *
1378  * Note the string "out" is on the stack, which we are about to clobber,
1379  * so process it first...
1380  */
1381 
1382 static void
1383 parseredir(const char *out,  int c)
1384 {
1385 	union node *np;
1386 	int fd;
1387 
1388 	fd = (*out == '\0') ? -1 : atoi(out);
1389 
1390 	np = stalloc(sizeof(struct nfile));
1391 	if (c == '>') {
1392 		if (fd < 0)
1393 			fd = 1;
1394 		c = pgetc();
1395 		if (c == '>')
1396 			np->type = NAPPEND;
1397 		else if (c == '|')
1398 			np->type = NCLOBBER;
1399 		else if (c == '&')
1400 			np->type = NTOFD;
1401 		else {
1402 			np->type = NTO;
1403 			pungetc();
1404 		}
1405 	} else {	/* c == '<' */
1406 		if (fd < 0)
1407 			fd = 0;
1408 		switch (c = pgetc()) {
1409 		case '<':
1410 			if (sizeof (struct nfile) != sizeof (struct nhere)) {
1411 				np = stalloc(sizeof(struct nhere));
1412 				np->nfile.fd = 0;
1413 			}
1414 			np->type = NHERE;
1415 			heredoc = stalloc(sizeof(struct heredoc));
1416 			heredoc->here = np;
1417 			if ((c = pgetc()) == '-') {
1418 				heredoc->striptabs = 1;
1419 			} else {
1420 				heredoc->striptabs = 0;
1421 				pungetc();
1422 			}
1423 			break;
1424 
1425 		case '&':
1426 			np->type = NFROMFD;
1427 			break;
1428 
1429 		case '>':
1430 			np->type = NFROMTO;
1431 			break;
1432 
1433 		default:
1434 			np->type = NFROM;
1435 			pungetc();
1436 			break;
1437 		}
1438 	}
1439 	np->nfile.fd = fd;
1440 
1441 	redirnode = np;		/* this is the "value" of TRENODE */
1442 }
1443 
1444 
1445 /*
1446  * The lowest level basic tokenizer.
1447  *
1448  * The next input byte (character) is in firstc, syn says which
1449  * syntax tables we are to use (basic, single or double quoted, or arith)
1450  * and magicq (used with sqsyntax and dqsyntax only) indicates that the
1451  * quote character itself is not special (used parsing here docs and similar)
1452  *
1453  * The result is the type of the next token (its value, when there is one,
1454  * is saved in the relevant global var - must fix that someday!) which is
1455  * also saved for re-reading ("lasttoken").
1456  *
1457  * Overall, this routine does far more parsing than it is supposed to.
1458  * That will also need fixing, someday...
1459  */
1460 STATIC int
1461 readtoken1(int firstc, char const *syn, int magicq)
1462 {
1463 	int c = firstc;
1464 	char * out;
1465 	int len;
1466 	struct nodelist *bqlist;
1467 	int quotef;
1468 	VSS static_stack;
1469 	VSS *stack = &static_stack;
1470 
1471 	stack->prev = NULL;
1472 	stack->cur = 0;
1473 
1474 	syntax = syn;
1475 
1476 	startlinno = plinno;
1477 	varnest = 0;
1478 	quoted = 0;
1479 	if (syntax == DQSYNTAX)
1480 		SETDBLQUOTE();
1481 	quotef = 0;
1482 	bqlist = NULL;
1483 	arinest = 0;
1484 	parenlevel = 0;
1485 
1486 	STARTSTACKSTR(out);
1487 	loop: {	/* for each line, until end of word */
1488 		for (;;) {	/* until end of line or end of word */
1489 			CHECKSTRSPACE(4, out);	/* permit 4 calls to USTPUTC */
1490 			switch(syntax[c]) {
1491 			case CNL:	/* '\n' */
1492 				if (syntax == BASESYNTAX)
1493 					goto endword;	/* exit outer loop */
1494 				USTPUTC(c, out);
1495 				plinno++;
1496 				if (doprompt)
1497 					setprompt(2);
1498 				else
1499 					setprompt(0);
1500 				c = pgetc();
1501 				goto loop;		/* continue outer loop */
1502 			case CWORD:
1503 				USTPUTC(c, out);
1504 				break;
1505 			case CCTL:
1506 				if (!magicq || ISDBLQUOTE())
1507 					USTPUTC(CTLESC, out);
1508 				USTPUTC(c, out);
1509 				break;
1510 			case CBACK:	/* backslash */
1511 				c = pgetc();
1512 				if (c == PEOF) {
1513 					USTPUTC('\\', out);
1514 					pungetc();
1515 					break;
1516 				}
1517 				if (c == '\n') {
1518 					plinno++;
1519 					if (doprompt)
1520 						setprompt(2);
1521 					else
1522 						setprompt(0);
1523 					break;
1524 				}
1525 				quotef = 1;
1526 				if (ISDBLQUOTE() && c != '\\' &&
1527 				    c != '`' && c != '$' &&
1528 				    (c != '"' || magicq))
1529 					USTPUTC('\\', out);
1530 				if (SQSYNTAX[c] == CCTL)
1531 					USTPUTC(CTLESC, out);
1532 				else if (!magicq) {
1533 					USTPUTC(CTLQUOTEMARK, out);
1534 					USTPUTC(c, out);
1535 					if (varnest != 0)
1536 						USTPUTC(CTLQUOTEEND, out);
1537 					break;
1538 				}
1539 				USTPUTC(c, out);
1540 				break;
1541 			case CSQUOTE:
1542 				if (syntax != SQSYNTAX) {
1543 					if (!magicq)
1544 						USTPUTC(CTLQUOTEMARK, out);
1545 					quotef = 1;
1546 					TS_PUSH();
1547 					syntax = SQSYNTAX;
1548 					quoted = SQ;
1549 					break;
1550 				}
1551 				if (magicq && arinest == 0 && varnest == 0) {
1552 					/* Ignore inside quoted here document */
1553 					USTPUTC(c, out);
1554 					break;
1555 				}
1556 				/* End of single quotes... */
1557 				TS_POP();
1558 				if (syntax == BASESYNTAX && varnest != 0)
1559 					USTPUTC(CTLQUOTEEND, out);
1560 				break;
1561 			case CDQUOTE:
1562 				if (magicq && arinest == 0 && varnest == 0) {
1563 					/* Ignore inside here document */
1564 					USTPUTC(c, out);
1565 					break;
1566 				}
1567 				quotef = 1;
1568 				if (arinest) {
1569 					if (ISDBLQUOTE()) {
1570 						TS_POP();
1571 					} else {
1572 						TS_PUSH();
1573 						syntax = DQSYNTAX;
1574 						SETDBLQUOTE();
1575 						USTPUTC(CTLQUOTEMARK, out);
1576 					}
1577 					break;
1578 				}
1579 				if (magicq)
1580 					break;
1581 				if (ISDBLQUOTE()) {
1582 					TS_POP();
1583 					if (varnest != 0)
1584 						USTPUTC(CTLQUOTEEND, out);
1585 				} else {
1586 					TS_PUSH();
1587 					syntax = DQSYNTAX;
1588 					SETDBLQUOTE();
1589 					USTPUTC(CTLQUOTEMARK, out);
1590 				}
1591 				break;
1592 			case CVAR:	/* '$' */
1593 				PARSESUB();		/* parse substitution */
1594 				break;
1595 			case CENDVAR:	/* CLOSEBRACE */
1596 				if (varnest > 0 && !ISDBLQUOTE()) {
1597 					TS_POP();
1598 					USTPUTC(CTLENDVAR, out);
1599 				} else {
1600 					USTPUTC(c, out);
1601 				}
1602 				break;
1603 			case CLP:	/* '(' in arithmetic */
1604 				parenlevel++;
1605 				USTPUTC(c, out);
1606 				break;
1607 			case CRP:	/* ')' in arithmetic */
1608 				if (parenlevel > 0) {
1609 					USTPUTC(c, out);
1610 					--parenlevel;
1611 				} else {
1612 					if (pgetc() == ')') {
1613 						if (--arinest == 0) {
1614 							TS_POP();
1615 							USTPUTC(CTLENDARI, out);
1616 						} else
1617 							USTPUTC(')', out);
1618 					} else {
1619 						/*
1620 						 * unbalanced parens
1621 						 *  (don't 2nd guess - no error)
1622 						 */
1623 						pungetc();
1624 						USTPUTC(')', out);
1625 					}
1626 				}
1627 				break;
1628 			case CBQUOTE:	/* '`' */
1629 				out = parsebackq(stack, out, &bqlist, 1);
1630 				break;
1631 			case CEOF:
1632 				goto endword;		/* exit outer loop */
1633 			default:
1634 				if (varnest == 0 && !ISDBLQUOTE())
1635 					goto endword;	/* exit outer loop */
1636 				USTPUTC(c, out);
1637 			}
1638 			c = pgetc_macro();
1639 		}
1640 	}
1641 endword:
1642 	if (syntax == ARISYNTAX) {
1643 		cleanup_state_stack(stack);
1644 		synerror("Missing '))'");
1645 	}
1646 	if (syntax != BASESYNTAX && /* ! parsebackquote && */ !magicq) {
1647 		cleanup_state_stack(stack);
1648 		synerror("Unterminated quoted string");
1649 	}
1650 	if (varnest != 0) {
1651 		cleanup_state_stack(stack);
1652 		startlinno = plinno;
1653 		/* { */
1654 		synerror("Missing '}'");
1655 	}
1656 	USTPUTC('\0', out);
1657 	len = out - stackblock();
1658 	out = stackblock();
1659 	if (!magicq) {
1660 		if ((c == '<' || c == '>')
1661 		 && quotef == 0
1662 		 && (*out == '\0' || is_number(out))) {
1663 			parseredir(out, c);
1664 			cleanup_state_stack(stack);
1665 			return lasttoken = TREDIR;
1666 		} else {
1667 			pungetc();
1668 		}
1669 	}
1670 	quoteflag = quotef;
1671 	backquotelist = bqlist;
1672 	grabstackblock(len);
1673 	wordtext = out;
1674 	cleanup_state_stack(stack);
1675 	return lasttoken = TWORD;
1676 /* end of readtoken routine */
1677 
1678 
1679 /*
1680  * Parse a substitution.  At this point, we have read the dollar sign
1681  * and nothing else.
1682  */
1683 
1684 parsesub: {
1685 	char buf[10];
1686 	int subtype;
1687 	int typeloc;
1688 	int flags;
1689 	char *p;
1690 	static const char types[] = "}-+?=";
1691 	int i;
1692 	int linno;
1693 
1694 	c = pgetc();
1695 	if (c != '(' && c != OPENBRACE && !is_name(c) && !is_special(c)) {
1696 		USTPUTC('$', out);
1697 		pungetc();
1698 	} else if (c == '(') {	/* $(command) or $((arith)) */
1699 		if (pgetc() == '(') {
1700 			PARSEARITH();
1701 		} else {
1702 			pungetc();
1703 			out = parsebackq(stack, out, &bqlist, 0);
1704 		}
1705 	} else {
1706 		USTPUTC(CTLVAR, out);
1707 		typeloc = out - stackblock();
1708 		USTPUTC(VSNORMAL, out);
1709 		subtype = VSNORMAL;
1710 		flags = 0;
1711 		if (c == OPENBRACE) {
1712 			c = pgetc();
1713 			if (c == '#') {
1714 				if ((c = pgetc()) == CLOSEBRACE)
1715 					c = '#';
1716 				else
1717 					subtype = VSLENGTH;
1718 			}
1719 			else
1720 				subtype = 0;
1721 		}
1722 		if (is_name(c)) {
1723 			p = out;
1724 			do {
1725 				STPUTC(c, out);
1726 				c = pgetc();
1727 			} while (is_in_name(c));
1728 			if (out - p == 6 && strncmp(p, "LINENO", 6) == 0) {
1729 				/* Replace the variable name with the
1730 				 * current line number. */
1731 				linno = plinno;
1732 				if (funclinno != 0)
1733 					linno -= funclinno - 1;
1734 				snprintf(buf, sizeof(buf), "%d", linno);
1735 				STADJUST(-6, out);
1736 				for (i = 0; buf[i] != '\0'; i++)
1737 					STPUTC(buf[i], out);
1738 				flags |= VSLINENO;
1739 			}
1740 		} else if (is_digit(c)) {
1741 			do {
1742 				USTPUTC(c, out);
1743 				c = pgetc();
1744 			} while (is_digit(c));
1745 		}
1746 		else if (is_special(c)) {
1747 			USTPUTC(c, out);
1748 			c = pgetc();
1749 		}
1750 		else {
1751 badsub:
1752 			cleanup_state_stack(stack);
1753 			synerror("Bad substitution");
1754 		}
1755 
1756 		STPUTC('=', out);
1757 		if (subtype == 0) {
1758 			switch (c) {
1759 			case ':':
1760 				flags |= VSNUL;
1761 				c = pgetc();
1762 				/*FALLTHROUGH*/
1763 			default:
1764 				p = strchr(types, c);
1765 				if (p == NULL)
1766 					goto badsub;
1767 				subtype = p - types + VSNORMAL;
1768 				break;
1769 			case '%':
1770 			case '#':
1771 				{
1772 					int cc = c;
1773 					subtype = c == '#' ? VSTRIMLEFT :
1774 							     VSTRIMRIGHT;
1775 					c = pgetc();
1776 					if (c == cc)
1777 						subtype++;
1778 					else
1779 						pungetc();
1780 					break;
1781 				}
1782 			}
1783 		} else {
1784 			pungetc();
1785 		}
1786 		if (ISDBLQUOTE() || arinest)
1787 			flags |= VSQUOTE;
1788 		if (subtype >= VSTRIMLEFT && subtype <= VSTRIMRIGHTMAX)
1789 			flags |= VSPATQ;
1790 		*(stackblock() + typeloc) = subtype | flags;
1791 		if (subtype != VSNORMAL) {
1792 			TS_PUSH();
1793 			varnest++;
1794 			arinest = 0;
1795 			if (subtype > VSASSIGN) {	/* # ## % %% */
1796 				syntax = BASESYNTAX;
1797 				CLRDBLQUOTE();
1798 			}
1799 		}
1800 	}
1801 	goto parsesub_return;
1802 }
1803 
1804 
1805 /*
1806  * Parse an arithmetic expansion (indicate start of one and set state)
1807  */
1808 parsearith: {
1809 
1810 	if (syntax == ARISYNTAX) {
1811 		/*
1812 		 * we collapse embedded arithmetic expansion to
1813 		 * parentheses, which should be equivalent
1814 		 */
1815 		USTPUTC('(', out);
1816 		USTPUTC('(', out);
1817 		/*
1818 		 * Need 2 of them because there will (should be)
1819 		 * two closing ))'s to follow later.
1820 		 */
1821 		parenlevel += 2;
1822 	} else {
1823 		TS_PUSH();
1824 		syntax = ARISYNTAX;
1825 		++arinest;
1826 		varnest = 0;
1827 
1828 		USTPUTC(CTLARI, out);
1829 		if (ISDBLQUOTE())
1830 			USTPUTC('"',out);
1831 		else
1832 			USTPUTC(' ',out);
1833 	}
1834 	goto parsearith_return;
1835 }
1836 
1837 } /* end of readtoken */
1838 
1839 
1840 
1841 #ifdef mkinit
1842 RESET {
1843 	tokpushback = 0;
1844 	checkkwd = 0;
1845 }
1846 #endif
1847 
1848 /*
1849  * Returns true if the text contains nothing to expand (no dollar signs
1850  * or backquotes).
1851  */
1852 
1853 STATIC int
1854 noexpand(char *text)
1855 {
1856 	char *p;
1857 	char c;
1858 
1859 	p = text;
1860 	while ((c = *p++) != '\0') {
1861 		if (c == CTLQUOTEMARK)
1862 			continue;
1863 		if (c == CTLESC)
1864 			p++;
1865 		else if (BASESYNTAX[(int)c] == CCTL)
1866 			return 0;
1867 	}
1868 	return 1;
1869 }
1870 
1871 
1872 /*
1873  * Return true if the argument is a legal variable name (a letter or
1874  * underscore followed by zero or more letters, underscores, and digits).
1875  */
1876 
1877 int
1878 goodname(char *name)
1879 {
1880 	char *p;
1881 
1882 	p = name;
1883 	if (! is_name(*p))
1884 		return 0;
1885 	while (*++p) {
1886 		if (! is_in_name(*p))
1887 			return 0;
1888 	}
1889 	return 1;
1890 }
1891 
1892 
1893 /*
1894  * Called when an unexpected token is read during the parse.  The argument
1895  * is the token that is expected, or -1 if more than one type of token can
1896  * occur at this point.
1897  */
1898 
1899 STATIC void
1900 synexpect(int token, const char *text)
1901 {
1902 	char msg[64];
1903 	char *p;
1904 
1905 	if (lasttoken == TWORD) {
1906 		size_t len = strlen(wordtext);
1907 
1908 		if (len <= 13)
1909 			fmtstr(msg, 34, "Word \"%.13s\" unexpected", wordtext);
1910 		else
1911 			fmtstr(msg, 34,
1912 			    "Word \"%.10s...\" unexpected", wordtext);
1913 	} else
1914 		fmtstr(msg, 34, "%s unexpected", tokname[lasttoken]);
1915 
1916 	p = strchr(msg, '\0');
1917 	if (text)
1918 		fmtstr(p, 30, " (expecting \"%.10s\")", text);
1919 	else if (token >= 0)
1920 		fmtstr(p, 30, " (expecting %s)",  tokname[token]);
1921 
1922 	synerror(msg);
1923 	/* NOTREACHED */
1924 }
1925 
1926 
1927 STATIC void
1928 synerror(const char *msg)
1929 {
1930 	error("%d: Syntax error: %s\n", startlinno, msg);
1931 	/* NOTREACHED */
1932 }
1933 
1934 STATIC void
1935 setprompt(int which)
1936 {
1937 	whichprompt = which;
1938 
1939 #ifndef SMALL
1940 	if (!el)
1941 #endif
1942 		out2str(getprompt(NULL));
1943 }
1944 
1945 /*
1946  * called by editline -- any expansions to the prompt
1947  *    should be added here.
1948  */
1949 const char *
1950 getprompt(void *unused)
1951 {
1952 	switch (whichprompt) {
1953 	case 0:
1954 		return "";
1955 	case 1:
1956 		return ps1val();
1957 	case 2:
1958 		return ps2val();
1959 	default:
1960 		return "<internal prompt error>";
1961 	}
1962 }
1963