xref: /netbsd-src/bin/sh/parser.c (revision e89934bbf778a6d6d6894877c4da59d0c7835b0f)
1 /*	$NetBSD: parser.c,v 1.120 2016/06/01 02:47:05 kre Exp $	*/
2 
3 /*-
4  * Copyright (c) 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Kenneth Almquist.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/cdefs.h>
36 #ifndef lint
37 #if 0
38 static char sccsid[] = "@(#)parser.c	8.7 (Berkeley) 5/16/95";
39 #else
40 __RCSID("$NetBSD: parser.c,v 1.120 2016/06/01 02:47:05 kre Exp $");
41 #endif
42 #endif /* not lint */
43 
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <limits.h>
47 
48 #include "shell.h"
49 #include "parser.h"
50 #include "nodes.h"
51 #include "expand.h"	/* defines rmescapes() */
52 #include "eval.h"	/* defines commandname */
53 #include "syntax.h"
54 #include "options.h"
55 #include "input.h"
56 #include "output.h"
57 #include "var.h"
58 #include "error.h"
59 #include "memalloc.h"
60 #include "mystring.h"
61 #include "alias.h"
62 #include "show.h"
63 #ifndef SMALL
64 #include "myhistedit.h"
65 #endif
66 
67 /*
68  * Shell command parser.
69  */
70 
71 /* values returned by readtoken */
72 #include "token.h"
73 
74 #define OPENBRACE '{'
75 #define CLOSEBRACE '}'
76 
77 
78 struct heredoc {
79 	struct heredoc *next;	/* next here document in list */
80 	union node *here;		/* redirection node */
81 	char *eofmark;		/* string indicating end of input */
82 	int striptabs;		/* if set, strip leading tabs */
83 	int startline;		/* line number where << seen */
84 };
85 
86 
87 
88 static int noalias = 0;		/* when set, don't handle aliases */
89 struct heredoc *heredoclist;	/* list of here documents to read */
90 int parsebackquote;		/* nonzero if we are inside backquotes */
91 int doprompt;			/* if set, prompt the user */
92 int needprompt;			/* true if interactive and at start of line */
93 int lasttoken;			/* last token read */
94 MKINIT int tokpushback;		/* last token pushed back */
95 char *wordtext;			/* text of last word returned by readtoken */
96 MKINIT int checkkwd;		/* 1 == check for kwds, 2 == also eat newlines */
97 struct nodelist *backquotelist;
98 union node *redirnode;
99 struct heredoc *heredoc;
100 int quoteflag;			/* set if (part of) last token was quoted */
101 int startlinno;			/* line # where last token started */
102 int funclinno;			/* line # where the current function started */
103 
104 
105 STATIC union node *list(int, int);
106 STATIC union node *andor(void);
107 STATIC union node *pipeline(void);
108 STATIC union node *command(void);
109 STATIC union node *simplecmd(union node **, union node *);
110 STATIC union node *makename(void);
111 STATIC void parsefname(void);
112 STATIC void slurp_heredoc(char *const, const int, const int);
113 STATIC void readheredocs(void);
114 STATIC int peektoken(void);
115 STATIC int readtoken(void);
116 STATIC int xxreadtoken(void);
117 STATIC int readtoken1(int, char const *, int);
118 STATIC int noexpand(char *);
119 STATIC void synexpect(int, const char *) __dead;
120 STATIC void synerror(const char *) __dead;
121 STATIC void setprompt(int);
122 
123 
124 static const char EOFhere[] = "EOF reading here (<<) document";
125 
126 
127 /*
128  * Read and parse a command.  Returns NEOF on end of file.  (NULL is a
129  * valid parse tree indicating a blank line.)
130  */
131 
132 union node *
133 parsecmd(int interact)
134 {
135 	int t;
136 	union node *n;
137 
138 	tokpushback = 0;
139 	doprompt = interact;
140 	if (doprompt)
141 		setprompt(1);
142 	else
143 		setprompt(0);
144 	needprompt = 0;
145 	t = readtoken();
146 	if (t == TEOF)
147 		return NEOF;
148 	if (t == TNL)
149 		return NULL;
150 	tokpushback++;
151 	n = list(1, 0);
152 	if (heredoclist)
153 		error("%d: Here document (<<%s) expected but not present",
154 			heredoclist->startline, heredoclist->eofmark);
155 	return n;
156 }
157 
158 
159 STATIC union node *
160 list(int nlflag, int erflag)
161 {
162 	union node *n1, *n2, *n3;
163 	int tok;
164 	TRACE(("list(%d,%d): entered\n", nlflag, erflag));
165 
166 	checkkwd = 2;
167 	if (nlflag == 0 && tokendlist[peektoken()])
168 		return NULL;
169 	n1 = NULL;
170 	for (;;) {
171 		n2 = andor();
172 		tok = readtoken();
173 		if (tok == TBACKGND) {
174 			if (n2->type == NCMD || n2->type == NPIPE) {
175 				n2->ncmd.backgnd = 1;
176 			} else if (n2->type == NREDIR) {
177 				n2->type = NBACKGND;
178 			} else {
179 				n3 = stalloc(sizeof(struct nredir));
180 				n3->type = NBACKGND;
181 				n3->nredir.n = n2;
182 				n3->nredir.redirect = NULL;
183 				n2 = n3;
184 			}
185 		}
186 		if (n1 == NULL) {
187 			n1 = n2;
188 		}
189 		else {
190 			n3 = stalloc(sizeof(struct nbinary));
191 			n3->type = NSEMI;
192 			n3->nbinary.ch1 = n1;
193 			n3->nbinary.ch2 = n2;
194 			n1 = n3;
195 		}
196 		switch (tok) {
197 		case TBACKGND:
198 		case TSEMI:
199 			tok = readtoken();
200 			/* FALLTHROUGH */
201 		case TNL:
202 			if (tok == TNL) {
203 				readheredocs();
204 				if (nlflag)
205 					return n1;
206 			} else {
207 				tokpushback++;
208 			}
209 			checkkwd = 2;
210 			if (tokendlist[peektoken()])
211 				return n1;
212 			break;
213 		case TEOF:
214 			pungetc();	/* push back EOF on input */
215 			return n1;
216 		default:
217 			if (nlflag || erflag)
218 				synexpect(-1, 0);
219 			tokpushback++;
220 			return n1;
221 		}
222 	}
223 }
224 
225 STATIC union node *
226 andor(void)
227 {
228 	union node *n1, *n2, *n3;
229 	int t;
230 
231 	TRACE(("andor: entered\n"));
232 	n1 = pipeline();
233 	for (;;) {
234 		if ((t = readtoken()) == TAND) {
235 			t = NAND;
236 		} else if (t == TOR) {
237 			t = NOR;
238 		} else {
239 			tokpushback++;
240 			return n1;
241 		}
242 		n2 = pipeline();
243 		n3 = stalloc(sizeof(struct nbinary));
244 		n3->type = t;
245 		n3->nbinary.ch1 = n1;
246 		n3->nbinary.ch2 = n2;
247 		n1 = n3;
248 	}
249 }
250 
251 STATIC union node *
252 pipeline(void)
253 {
254 	union node *n1, *n2, *pipenode;
255 	struct nodelist *lp, *prev;
256 	int negate;
257 
258 	TRACE(("pipeline: entered\n"));
259 
260 	negate = 0;
261 	checkkwd = 2;
262 	while (readtoken() == TNOT) {
263 		TRACE(("pipeline: TNOT recognized\n"));
264 		negate = !negate;
265 	}
266 	tokpushback++;
267 	n1 = command();
268 	if (readtoken() == TPIPE) {
269 		pipenode = stalloc(sizeof(struct npipe));
270 		pipenode->type = NPIPE;
271 		pipenode->npipe.backgnd = 0;
272 		lp = stalloc(sizeof(struct nodelist));
273 		pipenode->npipe.cmdlist = lp;
274 		lp->n = n1;
275 		do {
276 			prev = lp;
277 			lp = stalloc(sizeof(struct nodelist));
278 			lp->n = command();
279 			prev->next = lp;
280 		} while (readtoken() == TPIPE);
281 		lp->next = NULL;
282 		n1 = pipenode;
283 	}
284 	tokpushback++;
285 	if (negate) {
286 		TRACE(("negate pipeline\n"));
287 		n2 = stalloc(sizeof(struct nnot));
288 		n2->type = NNOT;
289 		n2->nnot.com = n1;
290 		return n2;
291 	} else
292 		return n1;
293 }
294 
295 
296 
297 STATIC union node *
298 command(void)
299 {
300 	union node *n1, *n2;
301 	union node *ap, **app;
302 	union node *cp, **cpp;
303 	union node *redir, **rpp;
304 	int t, negate = 0;
305 
306 	TRACE(("command: entered\n"));
307 
308 	checkkwd = 2;
309 	redir = NULL;
310 	n1 = NULL;
311 	rpp = &redir;
312 
313 	/* Check for redirection which may precede command */
314 	while (readtoken() == TREDIR) {
315 		*rpp = n2 = redirnode;
316 		rpp = &n2->nfile.next;
317 		parsefname();
318 	}
319 	tokpushback++;
320 
321 	while (readtoken() == TNOT) {
322 		TRACE(("command: TNOT recognized\n"));
323 		negate = !negate;
324 	}
325 	tokpushback++;
326 
327 	switch (readtoken()) {
328 	case TIF:
329 		n1 = stalloc(sizeof(struct nif));
330 		n1->type = NIF;
331 		n1->nif.test = list(0, 0);
332 		if (readtoken() != TTHEN)
333 			synexpect(TTHEN, 0);
334 		n1->nif.ifpart = list(0, 0);
335 		n2 = n1;
336 		while (readtoken() == TELIF) {
337 			n2->nif.elsepart = stalloc(sizeof(struct nif));
338 			n2 = n2->nif.elsepart;
339 			n2->type = NIF;
340 			n2->nif.test = list(0, 0);
341 			if (readtoken() != TTHEN)
342 				synexpect(TTHEN, 0);
343 			n2->nif.ifpart = list(0, 0);
344 		}
345 		if (lasttoken == TELSE)
346 			n2->nif.elsepart = list(0, 0);
347 		else {
348 			n2->nif.elsepart = NULL;
349 			tokpushback++;
350 		}
351 		if (readtoken() != TFI)
352 			synexpect(TFI, 0);
353 		checkkwd = 1;
354 		break;
355 	case TWHILE:
356 	case TUNTIL: {
357 		int got;
358 		n1 = stalloc(sizeof(struct nbinary));
359 		n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL;
360 		n1->nbinary.ch1 = list(0, 0);
361 		if ((got=readtoken()) != TDO) {
362 TRACE(("expecting DO got %s %s\n", tokname[got], got == TWORD ? wordtext : ""));
363 			synexpect(TDO, 0);
364 		}
365 		n1->nbinary.ch2 = list(0, 0);
366 		if (readtoken() != TDONE)
367 			synexpect(TDONE, 0);
368 		checkkwd = 1;
369 		break;
370 	}
371 	case TFOR:
372 		if (readtoken() != TWORD || quoteflag || ! goodname(wordtext))
373 			synerror("Bad for loop variable");
374 		n1 = stalloc(sizeof(struct nfor));
375 		n1->type = NFOR;
376 		n1->nfor.var = wordtext;
377 		if (readtoken() == TWORD && ! quoteflag && equal(wordtext, "in")) {
378 			app = &ap;
379 			while (readtoken() == TWORD) {
380 				n2 = stalloc(sizeof(struct narg));
381 				n2->type = NARG;
382 				n2->narg.text = wordtext;
383 				n2->narg.backquote = backquotelist;
384 				*app = n2;
385 				app = &n2->narg.next;
386 			}
387 			*app = NULL;
388 			n1->nfor.args = ap;
389 			if (lasttoken != TNL && lasttoken != TSEMI)
390 				synexpect(-1, 0);
391 		} else {
392 			static char argvars[5] = {
393 			    CTLVAR, VSNORMAL|VSQUOTE, '@', '=', '\0'
394 			};
395 			n2 = stalloc(sizeof(struct narg));
396 			n2->type = NARG;
397 			n2->narg.text = argvars;
398 			n2->narg.backquote = NULL;
399 			n2->narg.next = NULL;
400 			n1->nfor.args = n2;
401 			/*
402 			 * Newline or semicolon here is optional (but note
403 			 * that the original Bourne shell only allowed NL).
404 			 */
405 			if (lasttoken != TNL && lasttoken != TSEMI)
406 				tokpushback++;
407 		}
408 		checkkwd = 2;
409 		if ((t = readtoken()) == TDO)
410 			t = TDONE;
411 		else if (t == TBEGIN)
412 			t = TEND;
413 		else
414 			synexpect(-1, 0);
415 		n1->nfor.body = list(0, 0);
416 		if (readtoken() != t)
417 			synexpect(t, 0);
418 		checkkwd = 1;
419 		break;
420 	case TCASE:
421 		n1 = stalloc(sizeof(struct ncase));
422 		n1->type = NCASE;
423 		if (readtoken() != TWORD)
424 			synexpect(TWORD, 0);
425 		n1->ncase.expr = n2 = stalloc(sizeof(struct narg));
426 		n2->type = NARG;
427 		n2->narg.text = wordtext;
428 		n2->narg.backquote = backquotelist;
429 		n2->narg.next = NULL;
430 		while (readtoken() == TNL);
431 		if (lasttoken != TWORD || ! equal(wordtext, "in"))
432 			synexpect(-1, "in");
433 		cpp = &n1->ncase.cases;
434 		noalias = 1;
435 		checkkwd = 2, readtoken();
436 		/*
437 		 * Both ksh and bash accept 'case x in esac'
438 		 * so configure scripts started taking advantage of this.
439 		 * The page: http://pubs.opengroup.org/onlinepubs/\
440 		 * 009695399/utilities/xcu_chap02.html contradicts itself,
441 		 * as to if this is legal; the "Case Conditional Format"
442 		 * paragraph shows one case is required, but the "Grammar"
443 		 * section shows a grammar that explicitly allows the no
444 		 * case option.
445 		 */
446 		while (lasttoken != TESAC) {
447 			*cpp = cp = stalloc(sizeof(struct nclist));
448 			if (lasttoken == TLP)
449 				readtoken();
450 			cp->type = NCLIST;
451 			app = &cp->nclist.pattern;
452 			for (;;) {
453 				*app = ap = stalloc(sizeof(struct narg));
454 				ap->type = NARG;
455 				ap->narg.text = wordtext;
456 				ap->narg.backquote = backquotelist;
457 				if (checkkwd = 2, readtoken() != TPIPE)
458 					break;
459 				app = &ap->narg.next;
460 				readtoken();
461 			}
462 			ap->narg.next = NULL;
463 			noalias = 0;
464 			if (lasttoken != TRP) {
465 				synexpect(TRP, 0);
466 			}
467 			cp->nclist.body = list(0, 0);
468 
469 			checkkwd = 2;
470 			if ((t = readtoken()) != TESAC) {
471 				if (t != TENDCASE) {
472 					noalias = 0;
473 					synexpect(TENDCASE, 0);
474 				} else {
475 					noalias = 1;
476 					checkkwd = 2;
477 					readtoken();
478 				}
479 			}
480 			cpp = &cp->nclist.next;
481 		}
482 		noalias = 0;
483 		*cpp = NULL;
484 		checkkwd = 1;
485 		break;
486 	case TLP:
487 		n1 = stalloc(sizeof(struct nredir));
488 		n1->type = NSUBSHELL;
489 		n1->nredir.n = list(0, 0);
490 		n1->nredir.redirect = NULL;
491 		if (n1->nredir.n == NULL)
492 			synexpect(-1, 0);
493 		if (readtoken() != TRP)
494 			synexpect(TRP, 0);
495 		checkkwd = 1;
496 		break;
497 	case TBEGIN:
498 		n1 = list(0, 0);
499 		if (posix && n1 == NULL)
500 			synexpect(-1, 0);
501 		if (readtoken() != TEND)
502 			synexpect(TEND, 0);
503 		checkkwd = 1;
504 		break;
505 
506 	case TSEMI:
507 	case TAND:
508 	case TOR:
509 	case TPIPE:
510 	case TNL:
511 	case TEOF:
512 	case TRP:
513 		/*
514 		 * simple commands must have something in them,
515 		 * either a word (which at this point includes a=b)
516 		 * or a redirection.  If we reached the end of the
517 		 * command (which one of these tokens indicates)
518 		 * when we are just starting, and have not had a
519 		 * redirect, then ...
520 		 *
521 		 * nb: it is still possible to end up with empty
522 		 * simple commands, if the "command" is a var
523 		 * expansion that produces nothing
524 		 *	X= ; $X && $X
525 		 * -->          &&
526 		 * I am not sure if this is intended to be legal or not.
527 		 */
528 		if (!redir)
529 			synexpect(-1, 0);
530 	case TWORD:
531 		tokpushback++;
532 		n1 = simplecmd(rpp, redir);
533 		goto checkneg;
534 	case TENDCASE:
535 		if (redir) {
536 			tokpushback++;
537 			goto checkneg;
538 		}
539 		/* FALLTHROUGH */
540 	default:
541 		synexpect(-1, 0);
542 		/* NOTREACHED */
543 	}
544 
545 	/* Now check for redirection which may follow command */
546 	while (readtoken() == TREDIR) {
547 		*rpp = n2 = redirnode;
548 		rpp = &n2->nfile.next;
549 		parsefname();
550 	}
551 	tokpushback++;
552 	*rpp = NULL;
553 	if (redir) {
554 		if (n1->type != NSUBSHELL) {
555 			n2 = stalloc(sizeof(struct nredir));
556 			n2->type = NREDIR;
557 			n2->nredir.n = n1;
558 			n1 = n2;
559 		}
560 		n1->nredir.redirect = redir;
561 	}
562 
563 checkneg:
564 	if (negate) {
565 		TRACE(("negate command\n"));
566 		n2 = stalloc(sizeof(struct nnot));
567 		n2->type = NNOT;
568 		n2->nnot.com = n1;
569 		return n2;
570 	}
571 	else
572 		return n1;
573 }
574 
575 
576 STATIC union node *
577 simplecmd(union node **rpp, union node *redir)
578 {
579 	union node *args, **app;
580 	union node *n = NULL, *n2;
581 	int negate = 0;
582 
583 	/* If we don't have any redirections already, then we must reset */
584 	/* rpp to be the address of the local redir variable.  */
585 	if (redir == 0)
586 		rpp = &redir;
587 
588 	args = NULL;
589 	app = &args;
590 
591 	while (readtoken() == TNOT) {
592 		TRACE(("simplcmd: TNOT recognized\n"));
593 		negate = !negate;
594 	}
595 	tokpushback++;
596 
597 	for (;;) {
598 		if (readtoken() == TWORD) {
599 			n = stalloc(sizeof(struct narg));
600 			n->type = NARG;
601 			n->narg.text = wordtext;
602 			n->narg.backquote = backquotelist;
603 			*app = n;
604 			app = &n->narg.next;
605 		} else if (lasttoken == TREDIR) {
606 			*rpp = n = redirnode;
607 			rpp = &n->nfile.next;
608 			parsefname();	/* read name of redirection file */
609 		} else if (lasttoken == TLP && app == &args->narg.next
610 					    && redir == 0) {
611 			/* We have a function */
612 			if (readtoken() != TRP)
613 				synexpect(TRP, 0);
614 			funclinno = plinno;
615 			rmescapes(n->narg.text);
616 			if (strchr(n->narg.text, '/'))
617 				synerror("Bad function name");
618 			n->type = NDEFUN;
619 			n->narg.next = command();
620 			funclinno = 0;
621 			goto checkneg;
622 		} else {
623 			tokpushback++;
624 			break;
625 		}
626 	}
627 
628 	if (args == NULL && redir == NULL)
629 		synexpect(-1, 0);
630 	*app = NULL;
631 	*rpp = NULL;
632 	n = stalloc(sizeof(struct ncmd));
633 	n->type = NCMD;
634 	n->ncmd.backgnd = 0;
635 	n->ncmd.args = args;
636 	n->ncmd.redirect = redir;
637 
638 checkneg:
639 	if (negate) {
640 		TRACE(("negate simplecmd\n"));
641 		n2 = stalloc(sizeof(struct nnot));
642 		n2->type = NNOT;
643 		n2->nnot.com = n;
644 		return n2;
645 	}
646 	else
647 		return n;
648 }
649 
650 STATIC union node *
651 makename(void)
652 {
653 	union node *n;
654 
655 	n = stalloc(sizeof(struct narg));
656 	n->type = NARG;
657 	n->narg.next = NULL;
658 	n->narg.text = wordtext;
659 	n->narg.backquote = backquotelist;
660 	return n;
661 }
662 
663 void
664 fixredir(union node *n, const char *text, int err)
665 {
666 	TRACE(("Fix redir %s %d\n", text, err));
667 	if (!err)
668 		n->ndup.vname = NULL;
669 
670 	if (is_number(text))
671 		n->ndup.dupfd = number(text);
672 	else if (text[0] == '-' && text[1] == '\0')
673 		n->ndup.dupfd = -1;
674 	else {
675 
676 		if (err)
677 			synerror("Bad fd number");
678 		else
679 			n->ndup.vname = makename();
680 	}
681 }
682 
683 
684 STATIC void
685 parsefname(void)
686 {
687 	union node *n = redirnode;
688 
689 	if (readtoken() != TWORD)
690 		synexpect(-1, 0);
691 	if (n->type == NHERE) {
692 		struct heredoc *here = heredoc;
693 		struct heredoc *p;
694 
695 		if (quoteflag == 0)
696 			n->type = NXHERE;
697 		TRACE(("Here document %d\n", n->type));
698 		if (here->striptabs) {
699 			while (*wordtext == '\t')
700 				wordtext++;
701 		}
702 
703 		/*
704 		 * this test is not really necessary, we are not
705 		 * required to expand wordtext, but there's no reason
706 		 * it cannot be $$ or something like that - that would
707 		 * not mean the pid, but literally two '$' characters.
708 		 * There is no need for limits on what the word can be.
709 		 * However, it needs to stay literal as entered, not
710 		 * have $ converted to CTLVAR or something, which as
711 		 * the parser is, at the minute, is impossible to prevent.
712 		 * So, leave it like this until the rest of the parser is fixed.
713 		 */
714 		if (! noexpand(wordtext))
715 			synerror("Illegal eof marker for << redirection");
716 
717 		rmescapes(wordtext);
718 		here->eofmark = wordtext;
719 		here->next = NULL;
720 		if (heredoclist == NULL)
721 			heredoclist = here;
722 		else {
723 			for (p = heredoclist ; p->next ; p = p->next)
724 				continue;
725 			p->next = here;
726 		}
727 	} else if (n->type == NTOFD || n->type == NFROMFD) {
728 		fixredir(n, wordtext, 0);
729 	} else {
730 		n->nfile.fname = makename();
731 	}
732 }
733 
734 /*
735  * Check to see whether we are at the end of the here document.  When this
736  * is called, c is set to the first character of the next input line.  If
737  * we are at the end of the here document, this routine sets the c to PEOF.
738  * The new value of c is returned.
739  */
740 
741 static int
742 checkend(int c, char * const eofmark, const int striptabs)
743 {
744 	if (striptabs) {
745 		while (c == '\t')
746 			c = pgetc();
747 	}
748 	if (c == PEOF) {
749 		if (*eofmark == '\0')
750 			return (c);
751 		synerror(EOFhere);
752 	}
753 	if (c == *eofmark) {
754 		int c2;
755 		char *q;
756 
757 		for (q = eofmark + 1; c2 = pgetc(), *q != '\0' && c2 == *q; q++)
758 			;
759 		if ((c2 == PEOF || c2 == '\n') && *q == '\0') {
760 			c = PEOF;
761 			if (c2 == '\n') {
762 				plinno++;
763 				needprompt = doprompt;
764 			}
765 		} else {
766 			pungetc();
767 			pushstring(eofmark + 1, q - (eofmark + 1), NULL);
768 		}
769 	} else if (c == '\n' && *eofmark == '\0') {
770 		c = PEOF;
771 		plinno++;
772 		needprompt = doprompt;
773 	}
774 	return (c);
775 }
776 
777 
778 /*
779  * Input any here documents.
780  */
781 
782 STATIC void
783 slurp_heredoc(char *const eofmark, const int striptabs, const int sq)
784 {
785 	int c;
786 	char *out;
787 
788 	c = pgetc();
789 
790 	/*
791 	 * If we hit EOF on the input, and the eofmark is a null string ('')
792 	 * we consider this empty line to be the eofmark, and exit without err.
793 	 */
794 	if (c == PEOF && *eofmark != '\0')
795 		synerror(EOFhere);
796 
797 	STARTSTACKSTR(out);
798 
799 	while ((c = checkend(c, eofmark, striptabs)) != PEOF) {
800 		do {
801 			if (sq) {
802 				/*
803 				 * in single quoted mode (eofmark quoted)
804 				 * all we look for is \n so we can check
805 				 * for the epfmark - everything saved literally.
806 				 */
807 				STPUTC(c, out);
808 				if (c == '\n')
809 					break;
810 				continue;
811 			}
812 			/*
813 			 * In double quoted (non-quoted eofmark)
814 			 * we must handle \ followed by \n here
815 			 * otherwise we can mismatch the end mark.
816 			 * All other uses of \ will be handled later
817 			 * when the here doc is expanded.
818 			 *
819 			 * This also makes sure \\ followed by \n does
820 			 * not suppress the newline (the \ quotes itself)
821 			 */
822 			if (c == '\\') {		/* A backslash */
823 				c = pgetc();		/* followed by */
824 				if (c == '\n')		/* a newline?  */
825 					continue;	/* y:drop both */
826 				STPUTC('\\', out);	/* else keep \ */
827 			}
828 			STPUTC(c, out);			/* keep the char */
829 			if (c == '\n')			/* at end of line */
830 				break;			/* look for eofmark */
831 
832 		} while ((c = pgetc()) != PEOF);
833 
834 		/*
835 		 * If we have read a line, and reached EOF, without
836 		 * finding the eofmark, whether the EOF comes before
837 		 * or immediately after the \n, that is an error.
838 		 */
839 		if (c == PEOF || (c = pgetc()) == PEOF)
840 			synerror(EOFhere);
841 	}
842 	STPUTC('\0', out);
843 
844 	c = out - stackblock();
845 	out = stackblock();
846 	grabstackblock(c);
847 	wordtext = out;
848 
849 	TRACE(("Slurped a heredoc (to '%s')%s: len %d, \"%.16s\"...\n",
850 		eofmark, striptabs ? " tab stripped" : "", c, wordtext));
851 }
852 
853 STATIC void
854 readheredocs(void)
855 {
856 	struct heredoc *here;
857 	union node *n;
858 
859 	while (heredoclist) {
860 		here = heredoclist;
861 		heredoclist = here->next;
862 		if (needprompt) {
863 			setprompt(2);
864 			needprompt = 0;
865 		}
866 
867 		slurp_heredoc(here->eofmark, here->striptabs,
868 		    here->here->nhere.type == NHERE);
869 
870 		n = stalloc(sizeof(struct narg));
871 		n->narg.type = NARG;
872 		n->narg.next = NULL;
873 		n->narg.text = wordtext;
874 		n->narg.backquote = backquotelist;
875 		here->here->nhere.doc = n;
876 
877 		if (here->here->nhere.type == NHERE)
878 			continue;
879 
880 		/*
881 		 * Now "parse" here docs that have unquoted eofmarkers.
882 		 */
883 		setinputstring(wordtext, 1);
884 		readtoken1(pgetc(), DQSYNTAX, 1);
885 		n->narg.text = wordtext;
886 		n->narg.backquote = backquotelist;
887 		popfile();
888 	}
889 }
890 
891 STATIC int
892 peektoken(void)
893 {
894 	int t;
895 
896 	t = readtoken();
897 	tokpushback++;
898 	return (t);
899 }
900 
901 STATIC int
902 readtoken(void)
903 {
904 	int t;
905 	int savecheckkwd = checkkwd;
906 #ifdef DEBUG
907 	int alreadyseen = tokpushback;
908 #endif
909 	struct alias *ap;
910 
911 	top:
912 	t = xxreadtoken();
913 
914 	if (checkkwd) {
915 		/*
916 		 * eat newlines
917 		 */
918 		if (checkkwd == 2) {
919 			checkkwd = 0;
920 			while (t == TNL) {
921 				readheredocs();
922 				t = xxreadtoken();
923 			}
924 		} else
925 			checkkwd = 0;
926 		/*
927 		 * check for keywords and aliases
928 		 */
929 		if (t == TWORD && !quoteflag) {
930 			const char *const *pp;
931 
932 			for (pp = parsekwd; *pp; pp++) {
933 				if (**pp == *wordtext && equal(*pp, wordtext)) {
934 					lasttoken = t = pp -
935 					    parsekwd + KWDOFFSET;
936 					TRACE(("keyword %s recognized\n", tokname[t]));
937 					goto out;
938 				}
939 			}
940 			if (!noalias &&
941 			    (ap = lookupalias(wordtext, 1)) != NULL) {
942 				pushstring(ap->val, strlen(ap->val), ap);
943 				checkkwd = savecheckkwd;
944 				goto top;
945 			}
946 		}
947 out:
948 		checkkwd = (t == TNOT) ? savecheckkwd : 0;
949 	}
950 	TRACE(("%stoken %s %s\n", alreadyseen ? "reread " : "", tokname[t], t == TWORD ? wordtext : ""));
951 	return (t);
952 }
953 
954 
955 /*
956  * Read the next input token.
957  * If the token is a word, we set backquotelist to the list of cmds in
958  *	backquotes.  We set quoteflag to true if any part of the word was
959  *	quoted.
960  * If the token is TREDIR, then we set redirnode to a structure containing
961  *	the redirection.
962  * In all cases, the variable startlinno is set to the number of the line
963  *	on which the token starts.
964  *
965  * [Change comment:  here documents and internal procedures]
966  * [Readtoken shouldn't have any arguments.  Perhaps we should make the
967  *  word parsing code into a separate routine.  In this case, readtoken
968  *  doesn't need to have any internal procedures, but parseword does.
969  *  We could also make parseoperator in essence the main routine, and
970  *  have parseword (readtoken1?) handle both words and redirection.]
971  */
972 
973 #define RETURN(token)	return lasttoken = token
974 
975 STATIC int
976 xxreadtoken(void)
977 {
978 	int c;
979 
980 	if (tokpushback) {
981 		tokpushback = 0;
982 		return lasttoken;
983 	}
984 	if (needprompt) {
985 		setprompt(2);
986 		needprompt = 0;
987 	}
988 	startlinno = plinno;
989 	for (;;) {	/* until token or start of word found */
990 		c = pgetc_macro();
991 		switch (c) {
992 		case ' ': case '\t':
993 			continue;
994 		case '#':
995 			while ((c = pgetc()) != '\n' && c != PEOF)
996 				continue;
997 			pungetc();
998 			continue;
999 
1000 		case '\n':
1001 			plinno++;
1002 			needprompt = doprompt;
1003 			RETURN(TNL);
1004 		case PEOF:
1005 			RETURN(TEOF);
1006 
1007 		case '&':
1008 			if (pgetc() == '&')
1009 				RETURN(TAND);
1010 			pungetc();
1011 			RETURN(TBACKGND);
1012 		case '|':
1013 			if (pgetc() == '|')
1014 				RETURN(TOR);
1015 			pungetc();
1016 			RETURN(TPIPE);
1017 		case ';':
1018 			if (pgetc() == ';')
1019 				RETURN(TENDCASE);
1020 			pungetc();
1021 			RETURN(TSEMI);
1022 		case '(':
1023 			RETURN(TLP);
1024 		case ')':
1025 			RETURN(TRP);
1026 
1027 		case '\\':
1028 			switch (pgetc()) {
1029 			case '\n':
1030 				startlinno = ++plinno;
1031 				if (doprompt)
1032 					setprompt(2);
1033 				else
1034 					setprompt(0);
1035 				continue;
1036 			case PEOF:
1037 				RETURN(TEOF);
1038 			default:
1039 				pungetc();
1040 				break;
1041 			}
1042 			/* FALLTHROUGH */
1043 		default:
1044 			return readtoken1(c, BASESYNTAX, 0);
1045 		}
1046 	}
1047 #undef RETURN
1048 }
1049 
1050 
1051 
1052 /*
1053  * If eofmark is NULL, read a word or a redirection symbol.  If eofmark
1054  * is not NULL, read a here document.  In the latter case, eofmark is the
1055  * word which marks the end of the document and striptabs is true if
1056  * leading tabs should be stripped from the document.  The argument firstc
1057  * is the first character of the input token or document.
1058  *
1059  * Because C does not have internal subroutines, I have simulated them
1060  * using goto's to implement the subroutine linkage.  The following macros
1061  * will run code that appears at the end of readtoken1.
1062  */
1063 
1064 /*
1065  * We used to remember only the current syntax, variable nesting level,
1066  * double quote state for each var nesting level, and arith nesting
1067  * level (unrelated to var nesting) and one prev syntax when in arith
1068  * syntax.  This worked for simple cases, but can't handle arith inside
1069  * var expansion inside arith inside var with some quoted and some not.
1070  *
1071  * Inspired by FreeBSD's implementation (though it was the obvious way)
1072  * though implemented differently, we now have a stack that keeps track
1073  * of what we are doing now, and what we were doing previously.
1074  * Every time something changes, which will eventually end and should
1075  * revert to the previous state, we push this stack, and then pop it
1076  * again later (that is every ${} with an operator (to parse the word
1077  * or pattern that follows) ${x} and $x are too simple to need it)
1078  * $(( )) $( ) and "...".   Always.   Really, always!
1079  *
1080  * The stack is implemented as one static (on the C stack) base block
1081  * containing LEVELS_PER_BLOCK (8) stack entries, which should be
1082  * enough for the vast majority of cases.  For torture tests, we
1083  * malloc more blocks as needed.  All accesses through the inline
1084  * functions below.
1085  */
1086 
1087 /*
1088  * varnest & arinest will typically be 0 or 1
1089  * (varnest can increment in usages like ${x=${y}} but probably
1090  *  does not really need to)
1091  * parenlevel allows balancing parens inside a $(( )), it is reset
1092  * at each new nesting level ( $(( ( x + 3 ${unset-)} )) does not work.
1093  * quoted is special - we need to know 2 things ... are we inside "..."
1094  * (even if inherited from some previous nesting level) and was there
1095  * an opening '"' at this level (so the next will be closing).
1096  * "..." can span nesting levels, but cannot be opened in one and
1097  * closed in a different one.
1098  * To handle this, "quoted" has two fields, the bottom 4 (really 2)
1099  * bits are 0, 1, or 2, for un, single, and double quoted (single quoted
1100  * is really so special that this setting is not very important)
1101  * and 0x10 that indicates that an opening quote has been seen.
1102  * The bottom 4 bits are inherited, the 0x10 bit is not.
1103  */
1104 struct tokenstate {
1105 	const char *ts_syntax;
1106 	unsigned short ts_parenlevel;	/* counters */
1107 	unsigned short ts_varnest;	/* 64000 levels should be enough! */
1108 	unsigned short ts_arinest;
1109 	unsigned short ts_quoted;	/* 1 -> single, 2 -> double */
1110 };
1111 
1112 #define	NQ	0x00	/* Unquoted */
1113 #define	SQ	0x01	/* Single Quotes */
1114 #define	DQ	0x02	/* Double Quotes (or equivalent) */
1115 #define	QF	0x0F		/* Mask to extract previous values */
1116 #define	QS	0x10	/* Quoting started at this level in stack */
1117 
1118 #define	LEVELS_PER_BLOCK	8
1119 #define	VSS			struct statestack
1120 
1121 struct statestack {
1122 	VSS *prev;		/* previous block in list */
1123 	int cur;		/* which of our tokenstates is current */
1124 	struct tokenstate tokenstate[LEVELS_PER_BLOCK];
1125 };
1126 
1127 static inline struct tokenstate *
1128 currentstate(VSS *stack)
1129 {
1130 	return &stack->tokenstate[stack->cur];
1131 }
1132 
1133 static inline struct tokenstate *
1134 prevstate(VSS *stack)
1135 {
1136 	if (stack->cur != 0)
1137 		return &stack->tokenstate[stack->cur - 1];
1138 	if (stack->prev == NULL)	/* cannot drop below base */
1139 		return &stack->tokenstate[0];
1140 	return &stack->prev->tokenstate[LEVELS_PER_BLOCK - 1];
1141 }
1142 
1143 static inline VSS *
1144 bump_state_level(VSS *stack)
1145 {
1146 	struct tokenstate *os, *ts;
1147 
1148 	os = currentstate(stack);
1149 
1150 	if (++stack->cur >= LEVELS_PER_BLOCK) {
1151 		VSS *ss;
1152 
1153 		ss = (VSS *)ckmalloc(sizeof (struct statestack));
1154 		ss->cur = 0;
1155 		ss->prev = stack;
1156 		stack = ss;
1157 	}
1158 
1159 	ts = currentstate(stack);
1160 
1161 	ts->ts_parenlevel = 0;	/* parens inside never match outside */
1162 
1163 	ts->ts_quoted  = os->ts_quoted & QF;	/* these are default settings */
1164 	ts->ts_varnest = os->ts_varnest;
1165 	ts->ts_arinest = os->ts_arinest;	/* when appropriate	   */
1166 	ts->ts_syntax  = os->ts_syntax;		/*    they will be altered */
1167 
1168 	return stack;
1169 }
1170 
1171 static inline VSS *
1172 drop_state_level(VSS *stack)
1173 {
1174 	if (stack->cur == 0) {
1175 		VSS *ss;
1176 
1177 		ss = stack;
1178 		stack = ss->prev;
1179 		if (stack == NULL)
1180 			return ss;
1181 		ckfree(ss);
1182 	}
1183 	--stack->cur;
1184 	return stack;
1185 }
1186 
1187 static inline void
1188 cleanup_state_stack(VSS *stack)
1189 {
1190 	while (stack->prev != NULL) {
1191 		stack->cur = 0;
1192 		stack = drop_state_level(stack);
1193 	}
1194 }
1195 
1196 #define	PARSESUB()	{goto parsesub; parsesub_return:;}
1197 #define	PARSEARITH()	{goto parsearith; parsearith_return:;}
1198 
1199 /*
1200  * The following macros all assume the existance of a local var "stack"
1201  * which contains a pointer to the current struct stackstate
1202  */
1203 
1204 /*
1205  * These are macros rather than inline funcs to avoid code churn as much
1206  * as possible - they replace macros of the same name used previously.
1207  */
1208 #define	ISDBLQUOTE()	(currentstate(stack)->ts_quoted & QS)
1209 #define	SETDBLQUOTE()	(currentstate(stack)->ts_quoted = QS | DQ)
1210 #define	CLRDBLQUOTE()	(currentstate(stack)->ts_quoted =		\
1211 			    stack->cur != 0 || stack->prev ?		\
1212 				prevstate(stack)->ts_quoted & QF : 0)
1213 
1214 /*
1215  * This set are just to avoid excess typing and line lengths...
1216  * The ones that "look like" var names must be implemented to be lvalues
1217  */
1218 #define	syntax		(currentstate(stack)->ts_syntax)
1219 #define	parenlevel	(currentstate(stack)->ts_parenlevel)
1220 #define	varnest		(currentstate(stack)->ts_varnest)
1221 #define	arinest		(currentstate(stack)->ts_arinest)
1222 #define	quoted		(currentstate(stack)->ts_quoted)
1223 #define	TS_PUSH()	(stack = bump_state_level(stack))
1224 #define	TS_POP()	(stack = drop_state_level(stack))
1225 
1226 /*
1227  * Called to parse command substitutions.  oldstyle is true if the command
1228  * is enclosed inside `` (otherwise it was enclosed in "$( )")
1229  *
1230  * Internally nlpp is a pointer to the head of the linked
1231  * list of commands (passed by reference), and savelen is the number of
1232  * characters on the top of the stack which must be preserved.
1233  */
1234 static char *
1235 parsebackq(VSS *const stack, char * const in,
1236     struct nodelist **const pbqlist, const int oldstyle)
1237 {
1238 	struct nodelist **nlpp;
1239 	const int savepbq = parsebackquote;
1240 	union node *n;
1241 	char *out;
1242 	char *str = NULL;
1243 	char *volatile sstr = str;
1244 	struct jmploc jmploc;
1245 	struct jmploc *const savehandler = handler;
1246 	const int savelen = in - stackblock();
1247 	int saveprompt;
1248 
1249 	if (setjmp(jmploc.loc)) {
1250 		if (sstr)
1251 			ckfree(__UNVOLATILE(sstr));
1252 		cleanup_state_stack(stack);
1253 		parsebackquote = 0;
1254 		handler = savehandler;
1255 		longjmp(handler->loc, 1);
1256 	}
1257 	INTOFF;
1258 	sstr = str = NULL;
1259 	if (savelen > 0) {
1260 		sstr = str = ckmalloc(savelen);
1261 		memcpy(str, stackblock(), savelen);
1262 	}
1263 	handler = &jmploc;
1264 	INTON;
1265         if (oldstyle) {
1266                 /* We must read until the closing backquote, giving special
1267                    treatment to some slashes, and then push the string and
1268                    reread it as input, interpreting it normally.  */
1269                 int pc;
1270                 int psavelen;
1271                 char *pstr;
1272 
1273 		/*
1274 		 * Because the entire `...` is read here, we don't
1275 		 * need to bother the state stack.  That will be used
1276 		 * (as appropriate) when the processed string is re-read.
1277 		 */
1278                 STARTSTACKSTR(out);
1279 		for (;;) {
1280 			if (needprompt) {
1281 				setprompt(2);
1282 				needprompt = 0;
1283 			}
1284 			switch (pc = pgetc()) {
1285 			case '`':
1286 				goto done;
1287 
1288 			case '\\':
1289                                 if ((pc = pgetc()) == '\n') {
1290 					plinno++;
1291 					if (doprompt)
1292 						setprompt(2);
1293 					else
1294 						setprompt(0);
1295 					/*
1296 					 * If eating a newline, avoid putting
1297 					 * the newline into the new character
1298 					 * stream (via the STPUTC after the
1299 					 * switch).
1300 					 */
1301 					continue;
1302 				}
1303                                 if (pc != '\\' && pc != '`' && pc != '$'
1304                                     && (!ISDBLQUOTE() || pc != '"'))
1305                                         STPUTC('\\', out);
1306 				break;
1307 
1308 			case '\n':
1309 				plinno++;
1310 				needprompt = doprompt;
1311 				break;
1312 
1313 			case PEOF:
1314 			        startlinno = plinno;
1315 				synerror("EOF in backquote substitution");
1316  				break;
1317 
1318 			default:
1319 				break;
1320 			}
1321 			STPUTC(pc, out);
1322                 }
1323 done:
1324                 STPUTC('\0', out);
1325                 psavelen = out - stackblock();
1326                 if (psavelen > 0) {
1327 			pstr = grabstackstr(out);
1328 			setinputstring(pstr, 1);
1329                 }
1330         }
1331 	nlpp = pbqlist;
1332 	while (*nlpp)
1333 		nlpp = &(*nlpp)->next;
1334 	*nlpp = stalloc(sizeof(struct nodelist));
1335 	(*nlpp)->next = NULL;
1336 	parsebackquote = oldstyle;
1337 
1338 	if (oldstyle) {
1339 		saveprompt = doprompt;
1340 		doprompt = 0;
1341 	} else
1342 		saveprompt = 0;
1343 
1344 	n = list(0, oldstyle);
1345 
1346 	if (oldstyle)
1347 		doprompt = saveprompt;
1348 	else {
1349 		if (readtoken() != TRP) {
1350 			cleanup_state_stack(stack);
1351 			synexpect(TRP, 0);
1352 		}
1353 	}
1354 
1355 	(*nlpp)->n = n;
1356         if (oldstyle) {
1357 		/*
1358 		 * Start reading from old file again, ignoring any pushed back
1359 		 * tokens left from the backquote parsing
1360 		 */
1361                 popfile();
1362 		tokpushback = 0;
1363 	}
1364 
1365 	while (stackblocksize() <= savelen)
1366 		growstackblock();
1367 	STARTSTACKSTR(out);
1368 	if (str) {
1369 		memcpy(out, str, savelen);
1370 		STADJUST(savelen, out);
1371 		INTOFF;
1372 		ckfree(str);
1373 		sstr = str = NULL;
1374 		INTON;
1375 	}
1376 	parsebackquote = savepbq;
1377 	handler = savehandler;
1378 	if (arinest || ISDBLQUOTE())
1379 		USTPUTC(CTLBACKQ | CTLQUOTE, out);
1380 	else
1381 		USTPUTC(CTLBACKQ, out);
1382 
1383 	return out;
1384 }
1385 
1386 /*
1387  * Parse a redirection operator.  The parameter "out" points to a string
1388  * specifying the fd to be redirected.  It is guaranteed to be either ""
1389  * or a numeric string (for now anyway).  The parameter "c" contains the
1390  * first character of the redirection operator.
1391  *
1392  * Note the string "out" is on the stack, which we are about to clobber,
1393  * so process it first...
1394  */
1395 
1396 static void
1397 parseredir(const char *out,  int c)
1398 {
1399 	union node *np;
1400 	int fd;
1401 
1402 	fd = (*out == '\0') ? -1 : atoi(out);
1403 
1404 	np = stalloc(sizeof(struct nfile));
1405 	if (c == '>') {
1406 		if (fd < 0)
1407 			fd = 1;
1408 		c = pgetc();
1409 		if (c == '>')
1410 			np->type = NAPPEND;
1411 		else if (c == '|')
1412 			np->type = NCLOBBER;
1413 		else if (c == '&')
1414 			np->type = NTOFD;
1415 		else {
1416 			np->type = NTO;
1417 			pungetc();
1418 		}
1419 	} else {	/* c == '<' */
1420 		if (fd < 0)
1421 			fd = 0;
1422 		switch (c = pgetc()) {
1423 		case '<':
1424 			if (sizeof (struct nfile) != sizeof (struct nhere)) {
1425 				np = stalloc(sizeof(struct nhere));
1426 				np->nfile.fd = 0;
1427 			}
1428 			np->type = NHERE;
1429 			heredoc = stalloc(sizeof(struct heredoc));
1430 			heredoc->here = np;
1431 			heredoc->startline = plinno;
1432 			if ((c = pgetc()) == '-') {
1433 				heredoc->striptabs = 1;
1434 			} else {
1435 				heredoc->striptabs = 0;
1436 				pungetc();
1437 			}
1438 			break;
1439 
1440 		case '&':
1441 			np->type = NFROMFD;
1442 			break;
1443 
1444 		case '>':
1445 			np->type = NFROMTO;
1446 			break;
1447 
1448 		default:
1449 			np->type = NFROM;
1450 			pungetc();
1451 			break;
1452 		}
1453 	}
1454 	np->nfile.fd = fd;
1455 
1456 	redirnode = np;		/* this is the "value" of TRENODE */
1457 }
1458 
1459 
1460 /*
1461  * The lowest level basic tokenizer.
1462  *
1463  * The next input byte (character) is in firstc, syn says which
1464  * syntax tables we are to use (basic, single or double quoted, or arith)
1465  * and magicq (used with sqsyntax and dqsyntax only) indicates that the
1466  * quote character itself is not special (used parsing here docs and similar)
1467  *
1468  * The result is the type of the next token (its value, when there is one,
1469  * is saved in the relevant global var - must fix that someday!) which is
1470  * also saved for re-reading ("lasttoken").
1471  *
1472  * Overall, this routine does far more parsing than it is supposed to.
1473  * That will also need fixing, someday...
1474  */
1475 STATIC int
1476 readtoken1(int firstc, char const *syn, int magicq)
1477 {
1478 	int c;
1479 	char * out;
1480 	int len;
1481 	struct nodelist *bqlist;
1482 	int quotef;
1483 	VSS static_stack;
1484 	VSS *stack = &static_stack;
1485 
1486 	stack->prev = NULL;
1487 	stack->cur = 0;
1488 
1489 	syntax = syn;
1490 
1491 	startlinno = plinno;
1492 	varnest = 0;
1493 	quoted = 0;
1494 	if (syntax == DQSYNTAX)
1495 		SETDBLQUOTE();
1496 	quotef = 0;
1497 	bqlist = NULL;
1498 	arinest = 0;
1499 	parenlevel = 0;
1500 
1501 	STARTSTACKSTR(out);
1502 
1503 	for (c = firstc ;; c = pgetc_macro()) {	/* until of token */
1504 		CHECKSTRSPACE(4, out);	/* permit 4 calls to USTPUTC */
1505 		switch (syntax[c]) {
1506 		case CNL:	/* '\n' */
1507 			if (syntax == BASESYNTAX)
1508 				break;	/* exit loop */
1509 			USTPUTC(c, out);
1510 			plinno++;
1511 			if (doprompt)
1512 				setprompt(2);
1513 			else
1514 				setprompt(0);
1515 			continue;
1516 
1517 		case CWORD:
1518 			USTPUTC(c, out);
1519 			continue;
1520 		case CCTL:
1521 			if (!magicq || ISDBLQUOTE())
1522 				USTPUTC(CTLESC, out);
1523 			USTPUTC(c, out);
1524 			continue;
1525 		case CBACK:	/* backslash */
1526 			c = pgetc();
1527 			if (c == PEOF) {
1528 				USTPUTC('\\', out);
1529 				pungetc();
1530 				continue;
1531 			}
1532 			if (c == '\n') {
1533 				plinno++;
1534 				if (doprompt)
1535 					setprompt(2);
1536 				else
1537 					setprompt(0);
1538 				continue;
1539 			}
1540 			quotef = 1;	/* current token is quoted */
1541 			if (ISDBLQUOTE() && c != '\\' && c != '`' &&
1542 			    c != '$' && (c != '"' || magicq))
1543 				USTPUTC('\\', out);
1544 			if (SQSYNTAX[c] == CCTL)
1545 				USTPUTC(CTLESC, out);
1546 			else if (!magicq) {
1547 				USTPUTC(CTLQUOTEMARK, out);
1548 				USTPUTC(c, out);
1549 				if (varnest != 0)
1550 					USTPUTC(CTLQUOTEEND, out);
1551 				continue;
1552 			}
1553 			USTPUTC(c, out);
1554 			continue;
1555 		case CSQUOTE:
1556 			if (syntax != SQSYNTAX) {
1557 				if (!magicq)
1558 					USTPUTC(CTLQUOTEMARK, out);
1559 				quotef = 1;
1560 				TS_PUSH();
1561 				syntax = SQSYNTAX;
1562 				quoted = SQ;
1563 				continue;
1564 			}
1565 			if (magicq && arinest == 0 && varnest == 0) {
1566 				/* Ignore inside quoted here document */
1567 				USTPUTC(c, out);
1568 				continue;
1569 			}
1570 			/* End of single quotes... */
1571 			TS_POP();
1572 			if (syntax == BASESYNTAX && varnest != 0)
1573 				USTPUTC(CTLQUOTEEND, out);
1574 			continue;
1575 		case CDQUOTE:
1576 			if (magicq && arinest == 0 && varnest == 0) {
1577 				/* Ignore inside here document */
1578 				USTPUTC(c, out);
1579 				continue;
1580 			}
1581 			quotef = 1;
1582 			if (arinest) {
1583 				if (ISDBLQUOTE()) {
1584 					TS_POP();
1585 				} else {
1586 					TS_PUSH();
1587 					syntax = DQSYNTAX;
1588 					SETDBLQUOTE();
1589 					USTPUTC(CTLQUOTEMARK, out);
1590 				}
1591 				continue;
1592 			}
1593 			if (magicq)
1594 				continue;
1595 			if (ISDBLQUOTE()) {
1596 				TS_POP();
1597 				if (varnest != 0)
1598 					USTPUTC(CTLQUOTEEND, out);
1599 			} else {
1600 				TS_PUSH();
1601 				syntax = DQSYNTAX;
1602 				SETDBLQUOTE();
1603 				USTPUTC(CTLQUOTEMARK, out);
1604 			}
1605 			continue;
1606 		case CVAR:	/* '$' */
1607 			PARSESUB();		/* parse substitution */
1608 			continue;
1609 		case CENDVAR:	/* CLOSEBRACE */
1610 			if (varnest > 0 && !ISDBLQUOTE()) {
1611 				TS_POP();
1612 				USTPUTC(CTLENDVAR, out);
1613 			} else {
1614 				USTPUTC(c, out);
1615 			}
1616 			continue;
1617 		case CLP:	/* '(' in arithmetic */
1618 			parenlevel++;
1619 			USTPUTC(c, out);
1620 			continue;;
1621 		case CRP:	/* ')' in arithmetic */
1622 			if (parenlevel > 0) {
1623 				USTPUTC(c, out);
1624 				--parenlevel;
1625 			} else {
1626 				if (pgetc() == ')') {
1627 					if (--arinest == 0) {
1628 						TS_POP();
1629 						USTPUTC(CTLENDARI, out);
1630 					} else
1631 						USTPUTC(')', out);
1632 				} else {
1633 					/*
1634 					 * unbalanced parens
1635 					 *  (don't 2nd guess - no error)
1636 					 */
1637 					pungetc();
1638 					USTPUTC(')', out);
1639 				}
1640 			}
1641 			continue;
1642 		case CBQUOTE:	/* '`' */
1643 			out = parsebackq(stack, out, &bqlist, 1);
1644 			continue;
1645 		case CEOF:		/* --> c == PEOF */
1646 			break;		/* will exit loop */
1647 		default:
1648 			if (varnest == 0 && !ISDBLQUOTE())
1649 				break;	/* exit loop */
1650 			USTPUTC(c, out);
1651 			continue;
1652 		}
1653 		break;	/* break from switch -> break from for loop too */
1654 	}
1655 
1656 	if (syntax == ARISYNTAX) {
1657 		cleanup_state_stack(stack);
1658 		synerror("Missing '))'");
1659 	}
1660 	if (syntax != BASESYNTAX && /* ! parsebackquote && */ !magicq) {
1661 		cleanup_state_stack(stack);
1662 		synerror("Unterminated quoted string");
1663 	}
1664 	if (varnest != 0) {
1665 		cleanup_state_stack(stack);
1666 		startlinno = plinno;
1667 		/* { */
1668 		synerror("Missing '}'");
1669 	}
1670 
1671 	USTPUTC('\0', out);
1672 	len = out - stackblock();
1673 	out = stackblock();
1674 
1675 	if (!magicq) {
1676 		if ((c == '<' || c == '>')
1677 		 && quotef == 0 && (*out == '\0' || is_number(out))) {
1678 			parseredir(out, c);
1679 			cleanup_state_stack(stack);
1680 			return lasttoken = TREDIR;
1681 		} else {
1682 			pungetc();
1683 		}
1684 	}
1685 
1686 	quoteflag = quotef;
1687 	backquotelist = bqlist;
1688 	grabstackblock(len);
1689 	wordtext = out;
1690 	cleanup_state_stack(stack);
1691 	return lasttoken = TWORD;
1692 /* end of readtoken routine */
1693 
1694 
1695 /*
1696  * Parse a substitution.  At this point, we have read the dollar sign
1697  * and nothing else.
1698  */
1699 
1700 parsesub: {
1701 	char buf[10];
1702 	int subtype;
1703 	int typeloc;
1704 	int flags;
1705 	char *p;
1706 	static const char types[] = "}-+?=";
1707 	int i;
1708 	int linno;
1709 
1710 	c = pgetc();
1711 	if (c != '(' && c != OPENBRACE && !is_name(c) && !is_special(c)) {
1712 		USTPUTC('$', out);
1713 		pungetc();
1714 	} else if (c == '(') {	/* $(command) or $((arith)) */
1715 		if (pgetc() == '(') {
1716 			PARSEARITH();
1717 		} else {
1718 			pungetc();
1719 			out = parsebackq(stack, out, &bqlist, 0);
1720 		}
1721 	} else {
1722 		USTPUTC(CTLVAR, out);
1723 		typeloc = out - stackblock();
1724 		USTPUTC(VSNORMAL, out);
1725 		subtype = VSNORMAL;
1726 		flags = 0;
1727 		if (c == OPENBRACE) {
1728 			c = pgetc();
1729 			if (c == '#') {
1730 				if ((c = pgetc()) == CLOSEBRACE)
1731 					c = '#';
1732 				else
1733 					subtype = VSLENGTH;
1734 			}
1735 			else
1736 				subtype = 0;
1737 		}
1738 		if (is_name(c)) {
1739 			p = out;
1740 			do {
1741 				STPUTC(c, out);
1742 				c = pgetc();
1743 			} while (is_in_name(c));
1744 			if (out - p == 6 && strncmp(p, "LINENO", 6) == 0) {
1745 				/* Replace the variable name with the
1746 				 * current line number. */
1747 				linno = plinno;
1748 				if (funclinno != 0)
1749 					linno -= funclinno - 1;
1750 				snprintf(buf, sizeof(buf), "%d", linno);
1751 				STADJUST(-6, out);
1752 				for (i = 0; buf[i] != '\0'; i++)
1753 					STPUTC(buf[i], out);
1754 				flags |= VSLINENO;
1755 			}
1756 		} else if (is_digit(c)) {
1757 			do {
1758 				USTPUTC(c, out);
1759 				c = pgetc();
1760 			} while (subtype != VSNORMAL && is_digit(c));
1761 		}
1762 		else if (is_special(c)) {
1763 			USTPUTC(c, out);
1764 			c = pgetc();
1765 		}
1766 		else {
1767 badsub:
1768 			cleanup_state_stack(stack);
1769 			synerror("Bad substitution");
1770 		}
1771 
1772 		STPUTC('=', out);
1773 		if (subtype == 0) {
1774 			switch (c) {
1775 			case ':':
1776 				flags |= VSNUL;
1777 				c = pgetc();
1778 				/*FALLTHROUGH*/
1779 			default:
1780 				p = strchr(types, c);
1781 				if (p == NULL)
1782 					goto badsub;
1783 				subtype = p - types + VSNORMAL;
1784 				break;
1785 			case '%':
1786 			case '#':
1787 				{
1788 					int cc = c;
1789 					subtype = c == '#' ? VSTRIMLEFT :
1790 							     VSTRIMRIGHT;
1791 					c = pgetc();
1792 					if (c == cc)
1793 						subtype++;
1794 					else
1795 						pungetc();
1796 					break;
1797 				}
1798 			}
1799 		} else {
1800 			pungetc();
1801 		}
1802 		if (ISDBLQUOTE() || arinest)
1803 			flags |= VSQUOTE;
1804 		if (subtype >= VSTRIMLEFT && subtype <= VSTRIMRIGHTMAX)
1805 			flags |= VSPATQ;
1806 		*(stackblock() + typeloc) = subtype | flags;
1807 		if (subtype != VSNORMAL) {
1808 			TS_PUSH();
1809 			varnest++;
1810 			arinest = 0;
1811 			if (subtype > VSASSIGN) {	/* # ## % %% */
1812 				syntax = BASESYNTAX;
1813 				CLRDBLQUOTE();
1814 			}
1815 		}
1816 	}
1817 	goto parsesub_return;
1818 }
1819 
1820 
1821 /*
1822  * Parse an arithmetic expansion (indicate start of one and set state)
1823  */
1824 parsearith: {
1825 
1826 	if (syntax == ARISYNTAX) {
1827 		/*
1828 		 * we collapse embedded arithmetic expansion to
1829 		 * parentheses, which should be equivalent
1830 		 */
1831 		USTPUTC('(', out);
1832 		USTPUTC('(', out);
1833 		/*
1834 		 * Need 2 of them because there will (should be)
1835 		 * two closing ))'s to follow later.
1836 		 */
1837 		parenlevel += 2;
1838 	} else {
1839 		TS_PUSH();
1840 		syntax = ARISYNTAX;
1841 		++arinest;
1842 		varnest = 0;
1843 
1844 		USTPUTC(CTLARI, out);
1845 		if (ISDBLQUOTE())
1846 			USTPUTC('"',out);
1847 		else
1848 			USTPUTC(' ',out);
1849 	}
1850 	goto parsearith_return;
1851 }
1852 
1853 } /* end of readtoken */
1854 
1855 
1856 
1857 #ifdef mkinit
1858 RESET {
1859 	tokpushback = 0;
1860 	checkkwd = 0;
1861 }
1862 #endif
1863 
1864 /*
1865  * Returns true if the text contains nothing to expand (no dollar signs
1866  * or backquotes).
1867  */
1868 
1869 STATIC int
1870 noexpand(char *text)
1871 {
1872 	char *p;
1873 	char c;
1874 
1875 	p = text;
1876 	while ((c = *p++) != '\0') {
1877 		if (c == CTLQUOTEMARK)
1878 			continue;
1879 		if (c == CTLESC)
1880 			p++;
1881 		else if (BASESYNTAX[(int)c] == CCTL)
1882 			return 0;
1883 	}
1884 	return 1;
1885 }
1886 
1887 
1888 /*
1889  * Return true if the argument is a legal variable name (a letter or
1890  * underscore followed by zero or more letters, underscores, and digits).
1891  */
1892 
1893 int
1894 goodname(char *name)
1895 {
1896 	char *p;
1897 
1898 	p = name;
1899 	if (! is_name(*p))
1900 		return 0;
1901 	while (*++p) {
1902 		if (! is_in_name(*p))
1903 			return 0;
1904 	}
1905 	return 1;
1906 }
1907 
1908 
1909 /*
1910  * Called when an unexpected token is read during the parse.  The argument
1911  * is the token that is expected, or -1 if more than one type of token can
1912  * occur at this point.
1913  */
1914 
1915 STATIC void
1916 synexpect(int token, const char *text)
1917 {
1918 	char msg[64];
1919 	char *p;
1920 
1921 	if (lasttoken == TWORD) {
1922 		size_t len = strlen(wordtext);
1923 
1924 		if (len <= 13)
1925 			fmtstr(msg, 34, "Word \"%.13s\" unexpected", wordtext);
1926 		else
1927 			fmtstr(msg, 34,
1928 			    "Word \"%.10s...\" unexpected", wordtext);
1929 	} else
1930 		fmtstr(msg, 34, "%s unexpected", tokname[lasttoken]);
1931 
1932 	p = strchr(msg, '\0');
1933 	if (text)
1934 		fmtstr(p, 30, " (expecting \"%.10s\")", text);
1935 	else if (token >= 0)
1936 		fmtstr(p, 30, " (expecting %s)",  tokname[token]);
1937 
1938 	synerror(msg);
1939 	/* NOTREACHED */
1940 }
1941 
1942 
1943 STATIC void
1944 synerror(const char *msg)
1945 {
1946 	error("%d: Syntax error: %s\n", startlinno, msg);
1947 	/* NOTREACHED */
1948 }
1949 
1950 STATIC void
1951 setprompt(int which)
1952 {
1953 	whichprompt = which;
1954 
1955 #ifndef SMALL
1956 	if (!el)
1957 #endif
1958 		out2str(getprompt(NULL));
1959 }
1960 
1961 /*
1962  * called by editline -- any expansions to the prompt
1963  *    should be added here.
1964  */
1965 const char *
1966 getprompt(void *unused)
1967 {
1968 	switch (whichprompt) {
1969 	case 0:
1970 		return "";
1971 	case 1:
1972 		return ps1val();
1973 	case 2:
1974 		return ps2val();
1975 	default:
1976 		return "<internal prompt error>";
1977 	}
1978 }
1979