xref: /netbsd-src/bin/ksh/lex.c (revision de1dfb1250df962f1ff3a011772cf58e605aed11)
1 /*	$NetBSD: lex.c,v 1.10 2004/07/07 19:20:09 mycroft Exp $	*/
2 
3 /*
4  * lexical analysis and source input
5  */
6 #include <sys/cdefs.h>
7 
8 #ifndef lint
9 __RCSID("$NetBSD: lex.c,v 1.10 2004/07/07 19:20:09 mycroft Exp $");
10 #endif
11 
12 
13 #include "sh.h"
14 #include <ctype.h>
15 
16 
17 /* Structure to keep track of the lexing state and the various pieces of info
18  * needed for each particular state.
19  */
20 typedef struct lex_state Lex_state;
21 struct lex_state {
22 	int ls_state;
23 	union {
24 	    /* $(...) */
25 	    struct scsparen_info {
26 		    int nparen;		/* count open parenthesis */
27 		    int csstate; /* XXX remove */
28 #define ls_scsparen ls_info.u_scsparen
29 	    } u_scsparen;
30 
31 	    /* $((...)) */
32 	    struct sasparen_info {
33 		    int nparen;		/* count open parenthesis */
34 		    int start;		/* marks start of $(( in output str */
35 #define ls_sasparen ls_info.u_sasparen
36 	    } u_sasparen;
37 
38 	    /* ((...)) */
39 	    struct sletparen_info {
40 		    int nparen;		/* count open parenthesis */
41 #define ls_sletparen ls_info.u_sletparen
42 	    } u_sletparen;
43 
44 	    /* `...` */
45 	    struct sbquote_info {
46 		    int indquotes;	/* true if in double quotes: "`...`" */
47 #define ls_sbquote ls_info.u_sbquote
48 	    } u_sbquote;
49 
50 	    Lex_state *base;		/* used to point to next state block */
51 	} ls_info;
52 };
53 
54 typedef struct State_info State_info;
55 struct State_info {
56 	Lex_state	*base;
57 	Lex_state	*end;
58 };
59 
60 
61 static void	readhere ARGS((struct ioword *iop));
62 static int	getsc__ ARGS((void));
63 static void	getsc_line ARGS((Source *s));
64 static int	getsc_bn ARGS((void));
65 static char	*get_brace_var ARGS((XString *wsp, char *wp));
66 static int	arraysub ARGS((char **strp));
67 static const char *ungetsc ARGS((int c));
68 static void	gethere ARGS((void));
69 static Lex_state *push_state_ ARGS((State_info *si, Lex_state *old_end));
70 static Lex_state *pop_state_ ARGS((State_info *si, Lex_state *old_end));
71 
72 static int backslash_skip;
73 static int ignore_backslash_newline;
74 
75 /* optimized getsc_bn() */
76 #define getsc()		(*source->str != '\0' && *source->str != '\\' \
77 			 && !backslash_skip ? *source->str++ : getsc_bn())
78 /* optimized getsc__() */
79 #define	getsc_()	((*source->str != '\0') ? *source->str++ : getsc__())
80 
81 #define STATE_BSIZE	32
82 
83 #define PUSH_STATE(s)	do { \
84 			    if (++statep == state_info.end) \
85 				statep = push_state_(&state_info, statep); \
86 			    state = statep->ls_state = (s); \
87 			} while (0)
88 
89 #define POP_STATE()	do { \
90 			    if (--statep == state_info.base) \
91 				statep = pop_state_(&state_info, statep); \
92 			    state = statep->ls_state; \
93 			} while (0)
94 
95 
96 
97 /*
98  * Lexical analyzer
99  *
100  * tokens are not regular expressions, they are LL(1).
101  * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
102  * hence the state stack.
103  */
104 
105 int
106 yylex(cf)
107 	int cf;
108 {
109 	Lex_state states[STATE_BSIZE], *statep;
110 	State_info state_info;
111 	register int c, state;
112 	XString ws;		/* expandable output word */
113 	register char *wp;	/* output word pointer */
114 	char *sp, *dp;
115 	int c2;
116 
117 
118   Again:
119 	states[0].ls_state = -1;
120 	states[0].ls_info.base = (Lex_state *) 0;
121 	statep = &states[1];
122 	state_info.base = states;
123 	state_info.end = &states[STATE_BSIZE];
124 
125 	Xinit(ws, wp, 64, ATEMP);
126 
127 	backslash_skip = 0;
128 	ignore_backslash_newline = 0;
129 
130 	if (cf&ONEWORD)
131 		state = SWORD;
132 #ifdef KSH
133 	else if (cf&LETEXPR) {
134 		*wp++ = OQUOTE;	 /* enclose arguments in (double) quotes */
135 		state = SLETPAREN;
136 		statep->ls_sletparen.nparen = 0;
137 	}
138 #endif /* KSH */
139 	else {		/* normal lexing */
140 		state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
141 		while ((c = getsc()) == ' ' || c == '\t')
142 			;
143 		if (c == '#') {
144 			ignore_backslash_newline++;
145 			while ((c = getsc()) != '\0' && c != '\n')
146 				;
147 			ignore_backslash_newline--;
148 		}
149 		ungetsc(c);
150 	}
151 	if (source->flags & SF_ALIAS) {	/* trailing ' ' in alias definition */
152 		source->flags &= ~SF_ALIAS;
153 		/* In POSIX mode, a trailing space only counts if we are
154 		 * parsing a simple command
155 		 */
156 		if (!Flag(FPOSIX) || (cf & CMDWORD))
157 			cf |= ALIAS;
158 	}
159 
160 	/* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */
161 	statep->ls_state = state;
162 
163 	/* collect non-special or quoted characters to form word */
164 	while (!((c = getsc()) == 0
165 		 || ((state == SBASE || state == SHEREDELIM)
166 		     && ctype(c, C_LEX1))))
167 	{
168 		Xcheck(ws, wp);
169 		switch (state) {
170 		  case SBASE:
171 			if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
172 				*wp = EOS; /* temporary */
173 				if (is_wdvarname(Xstring(ws, wp), FALSE))
174 				{
175 					char *p, *tmp;
176 
177 					if (arraysub(&tmp)) {
178 						*wp++ = CHAR;
179 						*wp++ = c;
180 						for (p = tmp; *p; ) {
181 							Xcheck(ws, wp);
182 							*wp++ = CHAR;
183 							*wp++ = *p++;
184 						}
185 						afree(tmp, ATEMP);
186 						break;
187 					} else {
188 						Source *s;
189 
190 						s = pushs(SREREAD,
191 							  source->areap);
192 						s->start = s->str
193 							= s->u.freeme = tmp;
194 						s->next = source;
195 						source = s;
196 					}
197 				}
198 				*wp++ = CHAR;
199 				*wp++ = c;
200 				break;
201 			}
202 			/* fall through.. */
203 		  Sbase1:	/* includes *(...|...) pattern (*+?@!) */
204 #ifdef KSH
205 			if (c == '*' || c == '@' || c == '+' || c == '?'
206 			    || c == '!')
207 			{
208 				c2 = getsc();
209 				if (c2 == '(' /*)*/ ) {
210 					*wp++ = OPAT;
211 					*wp++ = c;
212 					PUSH_STATE(SPATTERN);
213 					break;
214 				}
215 				ungetsc(c2);
216 			}
217 #endif /* KSH */
218 			/* fall through.. */
219 		  Sbase2:	/* doesn't include *(...|...) pattern (*+?@!) */
220 			switch (c) {
221 			  case '\\':
222 				c = getsc();
223 #ifdef OS2
224 				if (isalnum(c)) {
225 					*wp++ = CHAR, *wp++ = '\\';
226 					*wp++ = CHAR, *wp++ = c;
227 				} else
228 #endif
229 				if (c) /* trailing \ is lost */
230 					*wp++ = QCHAR, *wp++ = c;
231 				break;
232 			  case '\'':
233 				*wp++ = OQUOTE;
234 				ignore_backslash_newline++;
235 				PUSH_STATE(SSQUOTE);
236 				break;
237 			  case '"':
238 				*wp++ = OQUOTE;
239 				PUSH_STATE(SDQUOTE);
240 				break;
241 			  default:
242 				goto Subst;
243 			}
244 			break;
245 
246 		  Subst:
247 			switch (c) {
248 			  case '\\':
249 				c = getsc();
250 				switch (c) {
251 				  case '"': case '\\':
252 				  case '$': case '`':
253 					*wp++ = QCHAR, *wp++ = c;
254 					break;
255 				  default:
256 					Xcheck(ws, wp);
257 					if (c) { /* trailing \ is lost */
258 						*wp++ = CHAR, *wp++ = '\\';
259 						*wp++ = CHAR, *wp++ = c;
260 					}
261 					break;
262 				}
263 				break;
264 			  case '$':
265 				c = getsc();
266 				if (c == '(') /*)*/ {
267 					c = getsc();
268 					if (c == '(') /*)*/ {
269 						PUSH_STATE(SASPAREN);
270 						statep->ls_sasparen.nparen = 2;
271 						statep->ls_sasparen.start =
272 							Xsavepos(ws, wp);
273 						*wp++ = EXPRSUB;
274 					} else {
275 						ungetsc(c);
276 						PUSH_STATE(SCSPAREN);
277 						statep->ls_scsparen.nparen = 1;
278 						statep->ls_scsparen.csstate = 0;
279 						*wp++ = COMSUB;
280 					}
281 				} else if (c == '{') /*}*/ {
282 					*wp++ = OSUBST;
283 					*wp++ = '{'; /*}*/
284 					wp = get_brace_var(&ws, wp);
285 					c = getsc();
286 					/* allow :# and :% (ksh88 compat) */
287 					if (c == ':') {
288 						*wp++ = CHAR, *wp++ = c;
289 						c = getsc();
290 					}
291 					/* If this is a trim operation,
292 					 * treat (,|,) specially in STBRACE.
293 					 */
294 					if (c == '#' || c == '%') {
295 						ungetsc(c);
296 						PUSH_STATE(STBRACE);
297 					} else {
298 						ungetsc(c);
299 						PUSH_STATE(SBRACE);
300 					}
301 				} else if (ctype(c, C_ALPHA)) {
302 					*wp++ = OSUBST;
303 					*wp++ = 'X';
304 					do {
305 						Xcheck(ws, wp);
306 						*wp++ = c;
307 						c = getsc();
308 					} while (ctype(c, C_ALPHA|C_DIGIT));
309 					*wp++ = '\0';
310 					*wp++ = CSUBST;
311 					*wp++ = 'X';
312 					ungetsc(c);
313 				} else if (ctype(c, C_DIGIT|C_VAR1)) {
314 					Xcheck(ws, wp);
315 					*wp++ = OSUBST;
316 					*wp++ = 'X';
317 					*wp++ = c;
318 					*wp++ = '\0';
319 					*wp++ = CSUBST;
320 					*wp++ = 'X';
321 				} else {
322 					*wp++ = CHAR, *wp++ = '$';
323 					ungetsc(c);
324 				}
325 				break;
326 			  case '`':
327 				PUSH_STATE(SBQUOTE);
328 				*wp++ = COMSUB;
329 				/* Need to know if we are inside double quotes
330 				 * since sh/at&t-ksh translate the \" to " in
331 				 * "`..\"..`".
332 				 * This is not done in posix mode (section
333 				 * 3.2.3, Double Quotes: "The backquote shall
334 				 * retain its special meaning introducing the
335 				 * other form of command substitution (see
336 				 * 3.6.3). The portion of the quoted string
337 				 * from the initial backquote and the
338 				 * characters up to the next backquote that
339 				 * is not preceded by a backslash (having
340 				 * escape characters removed) defines that
341 				 * command whose output replaces `...` when
342 				 * the word is expanded."
343 				 * Section 3.6.3, Command Substitution:
344 				 * "Within the backquoted style of command
345 				 * substitution, backslash shall retain its
346 				 * literal meaning, except when followed by
347 				 * $ ` \.").
348 				 */
349 				statep->ls_sbquote.indquotes = 0;
350 				if (!Flag(FPOSIX)) {
351 					Lex_state *s = statep;
352 					Lex_state *base = state_info.base;
353 					while (1) {
354 						for (; s != base; s--) {
355 							if (s->ls_state == SDQUOTE) {
356 								statep->ls_sbquote.indquotes = 1;
357 								break;
358 							}
359 						}
360 						if (s != base)
361 							break;
362 						if (!(s = s->ls_info.base))
363 							break;
364 						base = s-- - STATE_BSIZE;
365 					}
366 				}
367 				break;
368 			  default:
369 				*wp++ = CHAR, *wp++ = c;
370 			}
371 			break;
372 
373 		  case SSQUOTE:
374 			if (c == '\'') {
375 				POP_STATE();
376 				*wp++ = CQUOTE;
377 				ignore_backslash_newline--;
378 			} else
379 				*wp++ = QCHAR, *wp++ = c;
380 			break;
381 
382 		  case SDQUOTE:
383 			if (c == '"') {
384 				POP_STATE();
385 				*wp++ = CQUOTE;
386 			} else
387 				goto Subst;
388 			break;
389 
390 		  case SCSPAREN: /* $( .. ) */
391 			/* todo: deal with $(...) quoting properly
392 			 * kludge to partly fake quoting inside $(..): doesn't
393 			 * really work because nested $(..) or ${..} inside
394 			 * double quotes aren't dealt with.
395 			 */
396 			switch (statep->ls_scsparen.csstate) {
397 			  case 0: /* normal */
398 				switch (c) {
399 				  case '(':
400 					statep->ls_scsparen.nparen++;
401 					break;
402 				  case ')':
403 					statep->ls_scsparen.nparen--;
404 					break;
405 				  case '\\':
406 					statep->ls_scsparen.csstate = 1;
407 					break;
408 				  case '"':
409 					statep->ls_scsparen.csstate = 2;
410 					break;
411 				  case '\'':
412 					statep->ls_scsparen.csstate = 4;
413 					ignore_backslash_newline++;
414 					break;
415 				}
416 				break;
417 
418 			  case 1: /* backslash in normal mode */
419 			  case 3: /* backslash in double quotes */
420 				--statep->ls_scsparen.csstate;
421 				break;
422 
423 			  case 2: /* double quotes */
424 				if (c == '"')
425 					statep->ls_scsparen.csstate = 0;
426 				else if (c == '\\')
427 					statep->ls_scsparen.csstate = 3;
428 				break;
429 
430 			  case 4: /* single quotes */
431 				if (c == '\'') {
432 					statep->ls_scsparen.csstate = 0;
433 					ignore_backslash_newline--;
434 				}
435 				break;
436 			}
437 			if (statep->ls_scsparen.nparen == 0) {
438 				POP_STATE();
439 				*wp++ = 0; /* end of COMSUB */
440 			} else
441 				*wp++ = c;
442 			break;
443 
444 		  case SASPAREN: /* $(( .. )) */
445 			/* todo: deal with $((...); (...)) properly */
446 			/* XXX should nest using existing state machine
447 			 *     (embed "..", $(...), etc.) */
448 			if (c == '(')
449 				statep->ls_sasparen.nparen++;
450 			else if (c == ')') {
451 				statep->ls_sasparen.nparen--;
452 				if (statep->ls_sasparen.nparen == 1) {
453 					/*(*/
454 					if ((c2 = getsc()) == ')') {
455 						POP_STATE();
456 						*wp++ = 0; /* end of EXPRSUB */
457 						break;
458 					} else {
459 						char *s;
460 
461 						ungetsc(c2);
462 						/* mismatched parenthesis -
463 						 * assume we were really
464 						 * parsing a $(..) expression
465 						 */
466 						s = Xrestpos(ws, wp,
467 						     statep->ls_sasparen.start);
468 						memmove(s + 1, s, wp - s);
469 						*s++ = COMSUB;
470 						*s = '('; /*)*/
471 						wp++;
472 						statep->ls_scsparen.nparen = 1;
473 						statep->ls_scsparen.csstate = 0;
474 						state = statep->ls_state
475 							= SCSPAREN;
476 
477 					}
478 				}
479 			}
480 			*wp++ = c;
481 			break;
482 
483 		  case SBRACE:
484 			/*{*/
485 			if (c == '}') {
486 				POP_STATE();
487 				*wp++ = CSUBST;
488 				*wp++ = /*{*/ '}';
489 			} else
490 				goto Sbase1;
491 			break;
492 
493 		  case STBRACE:
494 			/* Same as SBRACE, except (,|,) treated specially */
495 			/*{*/
496 			if (c == '}') {
497 				POP_STATE();
498 				*wp++ = CSUBST;
499 				*wp++ = /*{*/ '}';
500 			} else if (c == '|') {
501 				*wp++ = SPAT;
502 			} else if (c == '(') {
503 				*wp++ = OPAT;
504 				*wp++ = ' ';	/* simile for @ */
505 				PUSH_STATE(SPATTERN);
506 			} else
507 				goto Sbase1;
508 			break;
509 
510 		  case SBQUOTE:
511 			if (c == '`') {
512 				*wp++ = 0;
513 				POP_STATE();
514 			} else if (c == '\\') {
515 				switch (c = getsc()) {
516 				  case '\\':
517 				  case '$': case '`':
518 					*wp++ = c;
519 					break;
520 				  case '"':
521 					if (statep->ls_sbquote.indquotes) {
522 						*wp++ = c;
523 						break;
524 					}
525 					/* fall through.. */
526 				  default:
527 					if (c) { /* trailing \ is lost */
528 						*wp++ = '\\';
529 						*wp++ = c;
530 					}
531 					break;
532 				}
533 			} else
534 				*wp++ = c;
535 			break;
536 
537 		  case SWORD:	/* ONEWORD */
538 			goto Subst;
539 
540 #ifdef KSH
541 		  case SLETPAREN:	/* LETEXPR: (( ... )) */
542 			/*(*/
543 			if (c == ')') {
544 				if (statep->ls_sletparen.nparen > 0)
545 				    --statep->ls_sletparen.nparen;
546 				/*(*/
547 				else if ((c2 = getsc()) == ')') {
548 					c = 0;
549 					*wp++ = CQUOTE;
550 					goto Done;
551 				} else
552 					ungetsc(c2);
553 			} else if (c == '(')
554 				/* parenthesis inside quotes and backslashes
555 				 * are lost, but at&t ksh doesn't count them
556 				 * either
557 				 */
558 				++statep->ls_sletparen.nparen;
559 			goto Sbase2;
560 #endif /* KSH */
561 
562 		  case SHEREDELIM:	/* <<,<<- delimiter */
563 			/* XXX chuck this state (and the next) - use
564 			 * the existing states ($ and \`..` should be
565 			 * stripped of their specialness after the
566 			 * fact).
567 			 */
568 			/* here delimiters need a special case since
569 			 * $ and `..` are not to be treated specially
570 			 */
571 			if (c == '\\') {
572 				c = getsc();
573 				if (c) { /* trailing \ is lost */
574 					*wp++ = QCHAR;
575 					*wp++ = c;
576 				}
577 			} else if (c == '\'') {
578 				PUSH_STATE(SSQUOTE);
579 				*wp++ = OQUOTE;
580 				ignore_backslash_newline++;
581 			} else if (c == '"') {
582 				state = statep->ls_state = SHEREDQUOTE;
583 				*wp++ = OQUOTE;
584 			} else {
585 				*wp++ = CHAR;
586 				*wp++ = c;
587 			}
588 			break;
589 
590 		  case SHEREDQUOTE:	/* " in <<,<<- delimiter */
591 			if (c == '"') {
592 				*wp++ = CQUOTE;
593 				state = statep->ls_state = SHEREDELIM;
594 			} else {
595 				if (c == '\\') {
596 					switch (c = getsc()) {
597 					  case '\\': case '"':
598 					  case '$': case '`':
599 						break;
600 					  default:
601 						if (c) { /* trailing \ lost */
602 							*wp++ = CHAR;
603 							*wp++ = '\\';
604 						}
605 						break;
606 					}
607 				}
608 				*wp++ = CHAR;
609 				*wp++ = c;
610 			}
611 			break;
612 
613 		  case SPATTERN:	/* in *(...|...) pattern (*+?@!) */
614 			if ( /*(*/ c == ')') {
615 				*wp++ = CPAT;
616 				POP_STATE();
617 			} else if (c == '|') {
618 				*wp++ = SPAT;
619 			} else if (c == '(') {
620 				*wp++ = OPAT;
621 				*wp++ = ' ';	/* simile for @ */
622 				PUSH_STATE(SPATTERN);
623 			} else
624 				goto Sbase1;
625 			break;
626 		}
627 	}
628 Done:
629 	Xcheck(ws, wp);
630 	if (statep != &states[1])
631 		/* XXX figure out what is missing */
632 		yyerror("no closing quote\n");
633 
634 	/* This done to avoid tests for SHEREDELIM wherever SBASE tested */
635 	if (state == SHEREDELIM)
636 		state = SBASE;
637 
638 	dp = Xstring(ws, wp);
639 	if ((c == '<' || c == '>') && state == SBASE
640 	    && ((c2 = Xlength(ws, wp)) == 0
641 	        || (c2 == 2 && dp[0] == CHAR && digit(dp[1]))))
642 	{
643 		struct ioword *iop =
644 				(struct ioword *) alloc(sizeof(*iop), ATEMP);
645 
646 		if (c2 == 2)
647 			iop->unit = dp[1] - '0';
648 		else
649 			iop->unit = c == '>'; /* 0 for <, 1 for > */
650 
651 		c2 = getsc();
652 		/* <<, >>, <> are ok, >< is not */
653 		if (c == c2 || (c == '<' && c2 == '>')) {
654 			iop->flag = c == c2 ?
655 				  (c == '>' ? IOCAT : IOHERE) : IORDWR;
656 			if (iop->flag == IOHERE) {
657 				if ((c2 = getsc()) == '-') {
658 					iop->flag |= IOSKIP;
659 				} else {
660 					ungetsc(c2);
661 				}
662 			}
663 		} else if (c2 == '&')
664 			iop->flag = IODUP | (c == '<' ? IORDUP : 0);
665 		else {
666 			iop->flag = c == '>' ? IOWRITE : IOREAD;
667 			if (c == '>' && c2 == '|')
668 				iop->flag |= IOCLOB;
669 			else
670 				ungetsc(c2);
671 		}
672 
673 		iop->name = (char *) 0;
674 		iop->delim = (char *) 0;
675 		iop->heredoc = (char *) 0;
676 		Xfree(ws, wp);	/* free word */
677 		yylval.iop = iop;
678 		return REDIR;
679 	}
680 
681 	if (wp == dp && state == SBASE) {
682 		Xfree(ws, wp);	/* free word */
683 		/* no word, process LEX1 character */
684 		switch (c) {
685 		  default:
686 			return c;
687 
688 		  case '|':
689 		  case '&':
690 		  case ';':
691 			if ((c2 = getsc()) == c)
692 				c = (c == ';') ? BREAK :
693 				    (c == '|') ? LOGOR :
694 				    (c == '&') ? LOGAND :
695 				    YYERRCODE;
696 #ifdef KSH
697 			else if (c == '|' && c2 == '&')
698 				c = COPROC;
699 #endif /* KSH */
700 			else
701 				ungetsc(c2);
702 			return c;
703 
704 		  case '\n':
705 			gethere();
706 			if (cf & CONTIN)
707 				goto Again;
708 			return c;
709 
710 		  case '(':  /*)*/
711 #ifdef KSH
712 			if ((c2 = getsc()) == '(') /*)*/
713 				/* XXX need to handle ((...); (...)) */
714 				c = MDPAREN;
715 			else
716 				ungetsc(c2);
717 #endif /* KSH */
718 			return c;
719 		  /*(*/
720 		  case ')':
721 			return c;
722 		}
723 	}
724 
725 	*wp++ = EOS;		/* terminate word */
726 	yylval.cp = Xclose(ws, wp);
727 	if (state == SWORD
728 #ifdef KSH
729 		|| state == SLETPAREN
730 #endif /* KSH */
731 		)	/* ONEWORD? */
732 		return LWORD;
733 	ungetsc(c);		/* unget terminator */
734 
735 	/* copy word to unprefixed string ident */
736 	for (sp = yylval.cp, dp = ident; dp < ident+IDENT && (c = *sp++) == CHAR; )
737 		*dp++ = *sp++;
738 	/* Make sure the ident array stays '\0' padded */
739 	memset(dp, 0, (ident+IDENT) - dp + 1);
740 	if (c != EOS)
741 		*ident = '\0';	/* word is not unquoted */
742 
743 	if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) {
744 		struct tbl *p;
745 		int h = hash(ident);
746 
747 		/* { */
748 		if ((cf & KEYWORD) && (p = tsearch(&keywords, ident, h))
749 		    && (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}'))
750 		{
751 			afree(yylval.cp, ATEMP);
752 			return p->val.i;
753 		}
754 		if ((cf & ALIAS) && (p = tsearch(&aliases, ident, h))
755 		    && (p->flag & ISSET))
756 		{
757 			register Source *s;
758 
759 			for (s = source; s->type == SALIAS; s = s->next)
760 				if (s->u.tblp == p)
761 					return LWORD;
762 			/* push alias expansion */
763 			s = pushs(SALIAS, source->areap);
764 			s->start = s->str = p->val.s;
765 			s->u.tblp = p;
766 			s->next = source;
767 			source = s;
768 			afree(yylval.cp, ATEMP);
769 			goto Again;
770 		}
771 	}
772 
773 	return LWORD;
774 }
775 
776 static void
777 gethere()
778 {
779 	register struct ioword **p;
780 
781 	for (p = heres; p < herep; p++)
782 		readhere(*p);
783 	herep = heres;
784 }
785 
786 /*
787  * read "<<word" text into temp file
788  */
789 
790 static void
791 readhere(iop)
792 	struct ioword *iop;
793 {
794 	register int c;
795 	char *volatile eof;
796 	char *eofp;
797 	int skiptabs;
798 	XString xs;
799 	char *xp;
800 	int xpos;
801 
802 	eof = evalstr(iop->delim, 0);
803 
804 	if (!(iop->flag & IOEVAL))
805 		ignore_backslash_newline++;
806 
807 	Xinit(xs, xp, 256, ATEMP);
808 
809 	for (;;) {
810 		eofp = eof;
811 		skiptabs = iop->flag & IOSKIP;
812 		xpos = Xsavepos(xs, xp);
813 		while ((c = getsc()) != 0) {
814 			if (skiptabs) {
815 				if (c == '\t')
816 					continue;
817 				skiptabs = 0;
818 			}
819 			if (c != *eofp)
820 				break;
821 			Xcheck(xs, xp);
822 			Xput(xs, xp, c);
823 			eofp++;
824 		}
825 		/* Allow EOF here so commands with out trailing newlines
826 		 * will work (eg, ksh -c '...', $(...), etc).
827 		 */
828 		if (*eofp == '\0' && (c == 0 || c == '\n')) {
829 			xp = Xrestpos(xs, xp, xpos);
830 			break;
831 		}
832 		ungetsc(c);
833 		while ((c = getsc()) != '\n') {
834 			if (c == 0)
835 				yyerror("here document `%s' unclosed\n", eof);
836 			Xcheck(xs, xp);
837 			Xput(xs, xp, c);
838 		}
839 		Xcheck(xs, xp);
840 		Xput(xs, xp, c);
841 	}
842 	Xput(xs, xp, '\0');
843 	iop->heredoc = Xclose(xs, xp);
844 
845 	if (!(iop->flag & IOEVAL))
846 		ignore_backslash_newline--;
847 }
848 
849 void
850 #ifdef HAVE_PROTOTYPES
851 yyerror(const char *fmt, ...)
852 #else
853 yyerror(fmt, va_alist)
854 	const char *fmt;
855 	va_dcl
856 #endif
857 {
858 	va_list va;
859 
860 	/* pop aliases and re-reads */
861 	while (source->type == SALIAS || source->type == SREREAD)
862 		source = source->next;
863 	source->str = null;	/* zap pending input */
864 
865 	error_prefix(TRUE);
866 	SH_VA_START(va, fmt);
867 	shf_vfprintf(shl_out, fmt, va);
868 	va_end(va);
869 	errorf(null);
870 }
871 
872 /*
873  * input for yylex with alias expansion
874  */
875 
876 Source *
877 pushs(type, areap)
878 	int type;
879 	Area *areap;
880 {
881 	register Source *s;
882 
883 	s = (Source *) alloc(sizeof(Source), areap);
884 	s->type = type;
885 	s->str = null;
886 	s->start = NULL;
887 	s->line = 0;
888 	s->errline = 0;
889 	s->file = NULL;
890 	s->flags = 0;
891 	s->next = NULL;
892 	s->areap = areap;
893 	if (type == SFILE || type == SSTDIN) {
894 		char *dummy;
895 		Xinit(s->xs, dummy, 256, s->areap);
896 	} else
897 		memset(&s->xs, 0, sizeof(s->xs));
898 	return s;
899 }
900 
901 static int
902 getsc__()
903 {
904 	register Source *s = source;
905 	register int c;
906 
907 	while ((c = *s->str++) == 0) {
908 		s->str = NULL;		/* return 0 for EOF by default */
909 		switch (s->type) {
910 		  case SEOF:
911 			s->str = null;
912 			return 0;
913 
914 		  case SSTDIN:
915 		  case SFILE:
916 			getsc_line(s);
917 			break;
918 
919 		  case SWSTR:
920 			break;
921 
922 		  case SSTRING:
923 			break;
924 
925 		  case SWORDS:
926 			s->start = s->str = *s->u.strv++;
927 			s->type = SWORDSEP;
928 			break;
929 
930 		  case SWORDSEP:
931 			if (*s->u.strv == NULL) {
932 				s->start = s->str = newline;
933 				s->type = SEOF;
934 			} else {
935 				s->start = s->str = space;
936 				s->type = SWORDS;
937 			}
938 			break;
939 
940 		  case SALIAS:
941 			if (s->flags & SF_ALIASEND) {
942 				/* pass on an unused SF_ALIAS flag */
943 				source = s->next;
944 				source->flags |= s->flags & SF_ALIAS;
945 				s = source;
946 			} else if (*s->u.tblp->val.s
947 				 && isspace((unsigned char)strchr(s->u.tblp->val.s, 0)[-1]))
948 			{
949 				source = s = s->next;	/* pop source stack */
950 				/* Note that this alias ended with a space,
951 				 * enabling alias expansion on the following
952 				 * word.
953 				 */
954 				s->flags |= SF_ALIAS;
955 			} else {
956 				/* At this point, we need to keep the current
957 				 * alias in the source list so recursive
958 				 * aliases can be detected and we also need
959 				 * to return the next character.  Do this
960 				 * by temporarily popping the alias to get
961 				 * the next character and then put it back
962 				 * in the source list with the SF_ALIASEND
963 				 * flag set.
964 				 */
965 				source = s->next;	/* pop source stack */
966 				source->flags |= s->flags & SF_ALIAS;
967 				c = getsc__();
968 				if (c) {
969 					s->flags |= SF_ALIASEND;
970 					s->ugbuf[0] = c; s->ugbuf[1] = '\0';
971 					s->start = s->str = s->ugbuf;
972 					s->next = source;
973 					source = s;
974 				} else {
975 					s = source;
976 					/* avoid reading eof twice */
977 					s->str = NULL;
978 					break;
979 				}
980 			}
981 			continue;
982 
983 		  case SREREAD:
984 			if (s->start != s->ugbuf) /* yuck */
985 				afree(s->u.freeme, ATEMP);
986 			source = s = s->next;
987 			continue;
988 		}
989 		if (s->str == NULL) {
990 			s->type = SEOF;
991 			s->start = s->str = null;
992 			return '\0';
993 		}
994 		if (s->flags & SF_ECHO) {
995 			shf_puts(s->str, shl_out);
996 			shf_flush(shl_out);
997 		}
998 	}
999 	return c;
1000 }
1001 
1002 static void
1003 getsc_line(s)
1004 	Source *s;
1005 {
1006 	char *xp = Xstring(s->xs, xp);
1007 	int interactive = Flag(FTALKING) && s->type == SSTDIN;
1008 	int have_tty = interactive && (s->flags & SF_TTY);
1009 
1010 	/* Done here to ensure nothing odd happens when a timeout occurs */
1011 	XcheckN(s->xs, xp, LINE);
1012 	*xp = '\0';
1013 	s->start = s->str = xp;
1014 
1015 #ifdef KSH
1016 	if (have_tty && ksh_tmout) {
1017 		ksh_tmout_state = TMOUT_READING;
1018 		alarm(ksh_tmout);
1019 	}
1020 #endif /* KSH */
1021 #ifdef EDIT
1022 	if (have_tty && (0
1023 # ifdef VI
1024 			 || Flag(FVI)
1025 # endif /* VI */
1026 # ifdef EMACS
1027 			 || Flag(FEMACS) || Flag(FGMACS)
1028 # endif /* EMACS */
1029 		))
1030 	{
1031 		int nread;
1032 
1033 		nread = x_read(xp, LINE);
1034 		if (nread < 0)	/* read error */
1035 			nread = 0;
1036 		xp[nread] = '\0';
1037 		xp += nread;
1038 	}
1039 	else
1040 #endif /* EDIT */
1041 	{
1042 		if (interactive) {
1043 			pprompt(prompt, 0);
1044 		} else
1045 			s->line++;
1046 
1047 		while (1) {
1048 			char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1049 
1050 			if (!p && shf_error(s->u.shf)
1051 			    && shf_errno(s->u.shf) == EINTR)
1052 			{
1053 				shf_clearerr(s->u.shf);
1054 				if (trap)
1055 					runtraps(0);
1056 				continue;
1057 			}
1058 			if (!p || (xp = p, xp[-1] == '\n'))
1059 				break;
1060 			/* double buffer size */
1061 			xp++; /* move past null so doubling works... */
1062 			XcheckN(s->xs, xp, Xlength(s->xs, xp));
1063 			xp--; /* ...and move back again */
1064 		}
1065 		/* flush any unwanted input so other programs/builtins
1066 		 * can read it.  Not very optimal, but less error prone
1067 		 * than flushing else where, dealing with redirections,
1068 		 * etc..
1069 		 * todo: reduce size of shf buffer (~128?) if SSTDIN
1070 		 */
1071 		if (s->type == SSTDIN)
1072 			shf_flush(s->u.shf);
1073 	}
1074 	/* XXX: temporary kludge to restore source after a
1075 	 * trap may have been executed.
1076 	 */
1077 	source = s;
1078 #ifdef KSH
1079 	if (have_tty && ksh_tmout)
1080 	{
1081 		ksh_tmout_state = TMOUT_EXECUTING;
1082 		alarm(0);
1083 	}
1084 #endif /* KSH */
1085 	s->start = s->str = Xstring(s->xs, xp);
1086 	strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1087 	/* Note: if input is all nulls, this is not eof */
1088 	if (Xlength(s->xs, xp) == 0) { /* EOF */
1089 		if (s->type == SFILE)
1090 			shf_fdclose(s->u.shf);
1091 		s->str = NULL;
1092 	} else if (interactive) {
1093 #ifdef HISTORY
1094 		char *p = Xstring(s->xs, xp);
1095 		if (cur_prompt == PS1)
1096 			while (*p && ctype(*p, C_IFS) && ctype(*p, C_IFSWS))
1097 				p++;
1098 		if (*p) {
1099 # ifdef EASY_HISTORY
1100 			if (cur_prompt == PS2)
1101 				histappend(Xstring(s->xs, xp), 1);
1102 			else
1103 # endif /* EASY_HISTORY */
1104 			{
1105 				s->line++;
1106 				histsave(s->line, s->str, 1);
1107 			}
1108 		}
1109 #endif /* HISTORY */
1110 	}
1111 	if (interactive)
1112 		set_prompt(PS2, (Source *) 0);
1113 }
1114 
1115 void
1116 set_prompt(to, s)
1117 	int to;
1118 	Source *s;
1119 {
1120 	cur_prompt = to;
1121 
1122 	switch (to) {
1123 	case PS1: /* command */
1124 #ifdef KSH
1125 		/* Substitute ! and !! here, before substitutions are done
1126 		 * so ! in expanded variables are not expanded.
1127 		 * NOTE: this is not what at&t ksh does (it does it after
1128 		 * substitutions, POSIX doesn't say which is to be done.
1129 		 */
1130 		{
1131 			struct shf *shf;
1132 			char * volatile ps1;
1133 			Area *saved_atemp;
1134 
1135 			ps1 = str_val(global("PS1"));
1136 			shf = shf_sopen((char *) 0, strlen(ps1) * 2,
1137 				SHF_WR | SHF_DYNAMIC, (struct shf *) 0);
1138 			while (*ps1) {
1139 				if (*ps1 != '!' || *++ps1 == '!')
1140 					shf_putchar(*ps1++, shf);
1141 				else
1142 					shf_fprintf(shf, "%d",
1143 						s ? s->line + 1 : 0);
1144 			}
1145 			ps1 = shf_sclose(shf);
1146 			saved_atemp = ATEMP;
1147 			newenv(E_ERRH);
1148 			if (ksh_sigsetjmp(e->jbuf, 0)) {
1149 				prompt = safe_prompt;
1150 				/* Don't print an error - assume it has already
1151 				 * been printed.  Reason is we may have forked
1152 				 * to run a command and the child may be
1153 				 * unwinding its stack through this code as it
1154 				 * exits.
1155 				 */
1156 			} else
1157 				prompt = str_save(substitute(ps1, 0),
1158 						 saved_atemp);
1159 			quitenv();
1160 		}
1161 #else /* KSH */
1162 		prompt = str_val(global("PS1"));
1163 #endif /* KSH */
1164 		break;
1165 
1166 	case PS2: /* command continuation */
1167 		prompt = str_val(global("PS2"));
1168 		break;
1169 	}
1170 }
1171 
1172 /* See also related routine, promptlen() in edit.c */
1173 void
1174 pprompt(cp, ntruncate)
1175 	const char *cp;
1176 	int ntruncate;
1177 {
1178 #if 0
1179 	char nbuf[32];
1180 	int c;
1181 
1182 	while (*cp != 0) {
1183 		if (*cp != '!')
1184 			c = *cp++;
1185 		else if (*++cp == '!')
1186 			c = *cp++;
1187 		else {
1188 			int len;
1189 			char *p;
1190 
1191 			shf_snprintf(p = nbuf, sizeof(nbuf), "%d",
1192 				source->line + 1);
1193 			len = strlen(nbuf);
1194 			if (ntruncate) {
1195 				if (ntruncate >= len) {
1196 					ntruncate -= len;
1197 					continue;
1198 				}
1199 				p += ntruncate;
1200 				len -= ntruncate;
1201 				ntruncate = 0;
1202 			}
1203 			shf_write(p, len, shl_out);
1204 			continue;
1205 		}
1206 		if (ntruncate)
1207 			--ntruncate;
1208 		else
1209 			shf_putc(c, shl_out);
1210 	}
1211 #endif /* 0 */
1212 	shf_puts(cp + ntruncate, shl_out);
1213 	shf_flush(shl_out);
1214 }
1215 
1216 /* Read the variable part of a ${...} expression (ie, up to but not including
1217  * the :[-+?=#%] or close-brace.
1218  */
1219 static char *
1220 get_brace_var(wsp, wp)
1221 	XString *wsp;
1222 	char *wp;
1223 {
1224 	enum parse_state {
1225 			   PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1226 			   PS_NUMBER, PS_VAR1, PS_END
1227 			 }
1228 		state;
1229 	char c;
1230 
1231 	state = PS_INITIAL;
1232 	while (1) {
1233 		c = getsc();
1234 		/* State machine to figure out where the variable part ends. */
1235 		switch (state) {
1236 		  case PS_INITIAL:
1237 			if (c == '#') {
1238 				state = PS_SAW_HASH;
1239 				break;
1240 			}
1241 			/* fall through.. */
1242 		  case PS_SAW_HASH:
1243 			if (letter(c))
1244 				state = PS_IDENT;
1245 			else if (digit(c))
1246 				state = PS_NUMBER;
1247 			else if (ctype(c, C_VAR1))
1248 				state = PS_VAR1;
1249 			else
1250 				state = PS_END;
1251 			break;
1252 		  case PS_IDENT:
1253 			if (!letnum(c)) {
1254 				state = PS_END;
1255 				if (c == '[') {
1256 					char *tmp, *p;
1257 
1258 					if (!arraysub(&tmp))
1259 						yyerror("missing ]\n");
1260 					*wp++ = c;
1261 					for (p = tmp; *p; ) {
1262 						Xcheck(*wsp, wp);
1263 						*wp++ = *p++;
1264 					}
1265 					afree(tmp, ATEMP);
1266 					c = getsc(); /* the ] */
1267 				}
1268 			}
1269 			break;
1270 		  case PS_NUMBER:
1271 			if (!digit(c))
1272 				state = PS_END;
1273 			break;
1274 		  case PS_VAR1:
1275 			state = PS_END;
1276 			break;
1277 		  case PS_END: /* keep gcc happy */
1278 			break;
1279 		}
1280 		if (state == PS_END) {
1281 			*wp++ = '\0';	/* end of variable part */
1282 			ungetsc(c);
1283 			break;
1284 		}
1285 		Xcheck(*wsp, wp);
1286 		*wp++ = c;
1287 	}
1288 	return wp;
1289 }
1290 
1291 /*
1292  * Save an array subscript - returns true if matching bracket found, false
1293  * if eof or newline was found.
1294  * (Returned string double null terminated)
1295  */
1296 static int
1297 arraysub(strp)
1298 	char **strp;
1299 {
1300 	XString ws;
1301 	char	*wp;
1302 	char	c;
1303 	int 	depth = 1;	/* we are just past the initial [ */
1304 
1305 	Xinit(ws, wp, 32, ATEMP);
1306 
1307 	do {
1308 		c = getsc();
1309 		Xcheck(ws, wp);
1310 		*wp++ = c;
1311 		if (c == '[')
1312 			depth++;
1313 		else if (c == ']')
1314 			depth--;
1315 	} while (depth > 0 && c && c != '\n');
1316 
1317 	*wp++ = '\0';
1318 	*strp = Xclose(ws, wp);
1319 
1320 	return depth == 0 ? 1 : 0;
1321 }
1322 
1323 /* Unget a char: handles case when we are already at the start of the buffer */
1324 static const char *
1325 ungetsc(c)
1326 	int c;
1327 {
1328 	if (backslash_skip)
1329 		backslash_skip--;
1330 	/* Don't unget eof... */
1331 	if (source->str == null && c == '\0')
1332 		return source->str;
1333 	if (source->str > source->start)
1334 		source->str--;
1335 	else {
1336 		Source *s;
1337 
1338 		s = pushs(SREREAD, source->areap);
1339 		s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1340 		s->start = s->str = s->ugbuf;
1341 		s->next = source;
1342 		source = s;
1343 	}
1344 	return source->str;
1345 }
1346 
1347 
1348 /* Called to get a char that isn't a \newline sequence. */
1349 static int
1350 getsc_bn ARGS((void))
1351 {
1352 	int c, c2;
1353 
1354 	if (ignore_backslash_newline)
1355 		return getsc_();
1356 
1357 	if (backslash_skip == 1) {
1358 		backslash_skip = 2;
1359 		return getsc_();
1360 	}
1361 
1362 	backslash_skip = 0;
1363 
1364 	while (1) {
1365 		c = getsc_();
1366 		if (c == '\\') {
1367 			if ((c2 = getsc_()) == '\n')
1368 				/* ignore the \newline; get the next char... */
1369 				continue;
1370 			ungetsc(c2);
1371 			backslash_skip = 1;
1372 		}
1373 		return c;
1374 	}
1375 }
1376 
1377 static Lex_state *
1378 push_state_(si, old_end)
1379 	State_info *si;
1380 	Lex_state *old_end;
1381 {
1382 	Lex_state	*new = alloc(sizeof(Lex_state) * STATE_BSIZE, ATEMP);
1383 
1384 	new[0].ls_info.base = old_end;
1385 	si->base = &new[0];
1386 	si->end = &new[STATE_BSIZE];
1387 	return &new[1];
1388 }
1389 
1390 static Lex_state *
1391 pop_state_(si, old_end)
1392 	State_info *si;
1393 	Lex_state *old_end;
1394 {
1395 	Lex_state *old_base = si->base;
1396 
1397 	si->base = old_end->ls_info.base - STATE_BSIZE;
1398 	si->end = old_end->ls_info.base;
1399 
1400 	afree(old_base, ATEMP);
1401 
1402 	return si->base + STATE_BSIZE - 1;
1403 }
1404