xref: /netbsd-src/bin/ksh/lex.c (revision 8b0f9554ff8762542c4defc4f70e1eb76fb508fa)
1 /*	$NetBSD: lex.c,v 1.12 2005/09/11 22:16:00 christos Exp $	*/
2 
3 /*
4  * lexical analysis and source input
5  */
6 #include <sys/cdefs.h>
7 
8 #ifndef lint
9 __RCSID("$NetBSD: lex.c,v 1.12 2005/09/11 22:16:00 christos Exp $");
10 #endif
11 
12 
13 #include "sh.h"
14 #include <ctype.h>
15 
16 
17 /* Structure to keep track of the lexing state and the various pieces of info
18  * needed for each particular state.
19  */
20 typedef struct lex_state Lex_state;
21 struct lex_state {
22 	int ls_state;
23 	union {
24 	    /* $(...) */
25 	    struct scsparen_info {
26 		    int nparen;		/* count open parenthesis */
27 		    int csstate; /* XXX remove */
28 #define ls_scsparen ls_info.u_scsparen
29 	    } u_scsparen;
30 
31 	    /* $((...)) */
32 	    struct sasparen_info {
33 		    int nparen;		/* count open parenthesis */
34 		    int start;		/* marks start of $(( in output str */
35 #define ls_sasparen ls_info.u_sasparen
36 	    } u_sasparen;
37 
38 	    /* ((...)) */
39 	    struct sletparen_info {
40 		    int nparen;		/* count open parenthesis */
41 #define ls_sletparen ls_info.u_sletparen
42 	    } u_sletparen;
43 
44 	    /* `...` */
45 	    struct sbquote_info {
46 		    int indquotes;	/* true if in double quotes: "`...`" */
47 #define ls_sbquote ls_info.u_sbquote
48 	    } u_sbquote;
49 
50 	    Lex_state *base;		/* used to point to next state block */
51 	} ls_info;
52 };
53 
54 typedef struct State_info State_info;
55 struct State_info {
56 	Lex_state	*base;
57 	Lex_state	*end;
58 };
59 
60 
61 static void	readhere ARGS((struct ioword *iop));
62 static int	getsc__ ARGS((void));
63 static void	getsc_line ARGS((Source *s));
64 static int	getsc_bn ARGS((void));
65 static char	*get_brace_var ARGS((XString *wsp, char *wp));
66 static int	arraysub ARGS((char **strp));
67 static const char *ungetsc ARGS((int c));
68 static void	gethere ARGS((void));
69 static Lex_state *push_state_ ARGS((State_info *si, Lex_state *old_end));
70 static Lex_state *pop_state_ ARGS((State_info *si, Lex_state *old_end));
71 
72 static int backslash_skip;
73 static int ignore_backslash_newline;
74 
75 /* optimized getsc_bn() */
76 #define getsc()		(*source->str != '\0' && *source->str != '\\' \
77 			 && !backslash_skip ? *source->str++ : getsc_bn())
78 /* optimized getsc__() */
79 #define	getsc_()	((*source->str != '\0') ? *source->str++ : getsc__())
80 
81 #define STATE_BSIZE	32
82 
83 #define PUSH_STATE(s)	do { \
84 			    if (++statep == state_info.end) \
85 				statep = push_state_(&state_info, statep); \
86 			    state = statep->ls_state = (s); \
87 			} while (0)
88 
89 #define POP_STATE()	do { \
90 			    if (--statep == state_info.base) \
91 				statep = pop_state_(&state_info, statep); \
92 			    state = statep->ls_state; \
93 			} while (0)
94 
95 
96 
97 /*
98  * Lexical analyzer
99  *
100  * tokens are not regular expressions, they are LL(1).
101  * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
102  * hence the state stack.
103  */
104 
105 int
106 yylex(cf)
107 	int cf;
108 {
109 	Lex_state states[STATE_BSIZE], *statep;
110 	State_info state_info;
111 	register int c, state;
112 	XString ws;		/* expandable output word */
113 	register char *wp;	/* output word pointer */
114 	char *sp, *dp;
115 	int c2;
116 
117 
118   Again:
119 	states[0].ls_state = -1;
120 	states[0].ls_info.base = (Lex_state *) 0;
121 	statep = &states[1];
122 	state_info.base = states;
123 	state_info.end = &states[STATE_BSIZE];
124 
125 	Xinit(ws, wp, 64, ATEMP);
126 
127 	backslash_skip = 0;
128 	ignore_backslash_newline = 0;
129 
130 	if (cf&ONEWORD)
131 		state = SWORD;
132 #ifdef KSH
133 	else if (cf&LETEXPR) {
134 		*wp++ = OQUOTE;	 /* enclose arguments in (double) quotes */
135 		state = SLETPAREN;
136 		statep->ls_sletparen.nparen = 0;
137 	}
138 #endif /* KSH */
139 	else {		/* normal lexing */
140 		state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
141 		while ((c = getsc()) == ' ' || c == '\t')
142 			;
143 		if (c == '#') {
144 			ignore_backslash_newline++;
145 			while ((c = getsc()) != '\0' && c != '\n')
146 				;
147 			ignore_backslash_newline--;
148 		}
149 		ungetsc(c);
150 	}
151 	if (source->flags & SF_ALIAS) {	/* trailing ' ' in alias definition */
152 		source->flags &= ~SF_ALIAS;
153 		/* In POSIX mode, a trailing space only counts if we are
154 		 * parsing a simple command
155 		 */
156 		if (!Flag(FPOSIX) || (cf & CMDWORD))
157 			cf |= ALIAS;
158 	}
159 
160 	/* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */
161 	statep->ls_state = state;
162 
163 	/* collect non-special or quoted characters to form word */
164 	while (!((c = getsc()) == 0
165 		 || ((state == SBASE || state == SHEREDELIM)
166 		     && ctype(c, C_LEX1))))
167 	{
168 		Xcheck(ws, wp);
169 		switch (state) {
170 		  case SBASE:
171 			if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
172 				*wp = EOS; /* temporary */
173 				if (is_wdvarname(Xstring(ws, wp), FALSE))
174 				{
175 					char *p, *tmp;
176 
177 					if (arraysub(&tmp)) {
178 						*wp++ = CHAR;
179 						*wp++ = c;
180 						for (p = tmp; *p; ) {
181 							Xcheck(ws, wp);
182 							*wp++ = CHAR;
183 							*wp++ = *p++;
184 						}
185 						afree(tmp, ATEMP);
186 						break;
187 					} else {
188 						Source *s;
189 
190 						s = pushs(SREREAD,
191 							  source->areap);
192 						s->start = s->str
193 							= s->u.freeme = tmp;
194 						s->next = source;
195 						source = s;
196 					}
197 				}
198 				*wp++ = CHAR;
199 				*wp++ = c;
200 				break;
201 			}
202 			/* fall through.. */
203 		  Sbase1:	/* includes *(...|...) pattern (*+?@!) */
204 #ifdef KSH
205 			if (c == '*' || c == '@' || c == '+' || c == '?'
206 			    || c == '!')
207 			{
208 				c2 = getsc();
209 				if (c2 == '(' /*)*/ ) {
210 					*wp++ = OPAT;
211 					*wp++ = c;
212 					PUSH_STATE(SPATTERN);
213 					break;
214 				}
215 				ungetsc(c2);
216 			}
217 #endif /* KSH */
218 			/* fall through.. */
219 		  Sbase2:	/* doesn't include *(...|...) pattern (*+?@!) */
220 			switch (c) {
221 			  case '\\':
222 				c = getsc();
223 #ifdef OS2
224 				if (isalnum((unsigned char)c)) {
225 					*wp++ = CHAR, *wp++ = '\\';
226 					*wp++ = CHAR, *wp++ = c;
227 				} else
228 #endif
229 				if (c) /* trailing \ is lost */
230 					*wp++ = QCHAR, *wp++ = c;
231 				break;
232 			  case '\'':
233 				*wp++ = OQUOTE;
234 				ignore_backslash_newline++;
235 				PUSH_STATE(SSQUOTE);
236 				break;
237 			  case '"':
238 				*wp++ = OQUOTE;
239 				PUSH_STATE(SDQUOTE);
240 				break;
241 			  default:
242 				goto Subst;
243 			}
244 			break;
245 
246 		  Subst:
247 			switch (c) {
248 			  case '\\':
249 				c = getsc();
250 				switch (c) {
251 				  case '\\':
252 				  case '$': case '`':
253 					*wp++ = QCHAR, *wp++ = c;
254 					break;
255 				  case '"':
256 					if ((cf & HEREDOC) == 0) {
257 						*wp++ = QCHAR, *wp++ = c;
258 						break;
259 					}
260 					/* FALLTROUGH */
261 				  default:
262 					Xcheck(ws, wp);
263 					if (c) { /* trailing \ is lost */
264 						*wp++ = CHAR, *wp++ = '\\';
265 						*wp++ = CHAR, *wp++ = c;
266 					}
267 					break;
268 				}
269 				break;
270 			  case '$':
271 				c = getsc();
272 				if (c == '(') /*)*/ {
273 					c = getsc();
274 					if (c == '(') /*)*/ {
275 						PUSH_STATE(SASPAREN);
276 						statep->ls_sasparen.nparen = 2;
277 						statep->ls_sasparen.start =
278 							Xsavepos(ws, wp);
279 						*wp++ = EXPRSUB;
280 					} else {
281 						ungetsc(c);
282 						PUSH_STATE(SCSPAREN);
283 						statep->ls_scsparen.nparen = 1;
284 						statep->ls_scsparen.csstate = 0;
285 						*wp++ = COMSUB;
286 					}
287 				} else if (c == '{') /*}*/ {
288 					*wp++ = OSUBST;
289 					*wp++ = '{'; /*}*/
290 					wp = get_brace_var(&ws, wp);
291 					c = getsc();
292 					/* allow :# and :% (ksh88 compat) */
293 					if (c == ':') {
294 						*wp++ = CHAR, *wp++ = c;
295 						c = getsc();
296 					}
297 					/* If this is a trim operation,
298 					 * treat (,|,) specially in STBRACE.
299 					 */
300 					if (c == '#' || c == '%') {
301 						ungetsc(c);
302 						PUSH_STATE(STBRACE);
303 					} else {
304 						ungetsc(c);
305 						PUSH_STATE(SBRACE);
306 					}
307 				} else if (ctype(c, C_ALPHA)) {
308 					*wp++ = OSUBST;
309 					*wp++ = 'X';
310 					do {
311 						Xcheck(ws, wp);
312 						*wp++ = c;
313 						c = getsc();
314 					} while (ctype(c, C_ALPHA|C_DIGIT));
315 					*wp++ = '\0';
316 					*wp++ = CSUBST;
317 					*wp++ = 'X';
318 					ungetsc(c);
319 				} else if (ctype(c, C_DIGIT|C_VAR1)) {
320 					Xcheck(ws, wp);
321 					*wp++ = OSUBST;
322 					*wp++ = 'X';
323 					*wp++ = c;
324 					*wp++ = '\0';
325 					*wp++ = CSUBST;
326 					*wp++ = 'X';
327 				} else {
328 					*wp++ = CHAR, *wp++ = '$';
329 					ungetsc(c);
330 				}
331 				break;
332 			  case '`':
333 				PUSH_STATE(SBQUOTE);
334 				*wp++ = COMSUB;
335 				/* Need to know if we are inside double quotes
336 				 * since sh/at&t-ksh translate the \" to " in
337 				 * "`..\"..`".
338 				 * This is not done in posix mode (section
339 				 * 3.2.3, Double Quotes: "The backquote shall
340 				 * retain its special meaning introducing the
341 				 * other form of command substitution (see
342 				 * 3.6.3). The portion of the quoted string
343 				 * from the initial backquote and the
344 				 * characters up to the next backquote that
345 				 * is not preceded by a backslash (having
346 				 * escape characters removed) defines that
347 				 * command whose output replaces `...` when
348 				 * the word is expanded."
349 				 * Section 3.6.3, Command Substitution:
350 				 * "Within the backquoted style of command
351 				 * substitution, backslash shall retain its
352 				 * literal meaning, except when followed by
353 				 * $ ` \.").
354 				 */
355 				statep->ls_sbquote.indquotes = 0;
356 				if (!Flag(FPOSIX)) {
357 					Lex_state *s = statep;
358 					Lex_state *base = state_info.base;
359 					while (1) {
360 						for (; s != base; s--) {
361 							if (s->ls_state == SDQUOTE) {
362 								statep->ls_sbquote.indquotes = 1;
363 								break;
364 							}
365 						}
366 						if (s != base)
367 							break;
368 						if (!(s = s->ls_info.base))
369 							break;
370 						base = s-- - STATE_BSIZE;
371 					}
372 				}
373 				break;
374 			  default:
375 				*wp++ = CHAR, *wp++ = c;
376 			}
377 			break;
378 
379 		  case SSQUOTE:
380 			if (c == '\'') {
381 				POP_STATE();
382 				*wp++ = CQUOTE;
383 				ignore_backslash_newline--;
384 			} else
385 				*wp++ = QCHAR, *wp++ = c;
386 			break;
387 
388 		  case SDQUOTE:
389 			if (c == '"') {
390 				POP_STATE();
391 				*wp++ = CQUOTE;
392 			} else
393 				goto Subst;
394 			break;
395 
396 		  case SCSPAREN: /* $( .. ) */
397 			/* todo: deal with $(...) quoting properly
398 			 * kludge to partly fake quoting inside $(..): doesn't
399 			 * really work because nested $(..) or ${..} inside
400 			 * double quotes aren't dealt with.
401 			 */
402 			switch (statep->ls_scsparen.csstate) {
403 			  case 0: /* normal */
404 				switch (c) {
405 				  case '(':
406 					statep->ls_scsparen.nparen++;
407 					break;
408 				  case ')':
409 					statep->ls_scsparen.nparen--;
410 					break;
411 				  case '\\':
412 					statep->ls_scsparen.csstate = 1;
413 					break;
414 				  case '"':
415 					statep->ls_scsparen.csstate = 2;
416 					break;
417 				  case '\'':
418 					statep->ls_scsparen.csstate = 4;
419 					ignore_backslash_newline++;
420 					break;
421 				}
422 				break;
423 
424 			  case 1: /* backslash in normal mode */
425 			  case 3: /* backslash in double quotes */
426 				--statep->ls_scsparen.csstate;
427 				break;
428 
429 			  case 2: /* double quotes */
430 				if (c == '"')
431 					statep->ls_scsparen.csstate = 0;
432 				else if (c == '\\')
433 					statep->ls_scsparen.csstate = 3;
434 				break;
435 
436 			  case 4: /* single quotes */
437 				if (c == '\'') {
438 					statep->ls_scsparen.csstate = 0;
439 					ignore_backslash_newline--;
440 				}
441 				break;
442 			}
443 			if (statep->ls_scsparen.nparen == 0) {
444 				POP_STATE();
445 				*wp++ = 0; /* end of COMSUB */
446 			} else
447 				*wp++ = c;
448 			break;
449 
450 		  case SASPAREN: /* $(( .. )) */
451 			/* todo: deal with $((...); (...)) properly */
452 			/* XXX should nest using existing state machine
453 			 *     (embed "..", $(...), etc.) */
454 			if (c == '(')
455 				statep->ls_sasparen.nparen++;
456 			else if (c == ')') {
457 				statep->ls_sasparen.nparen--;
458 				if (statep->ls_sasparen.nparen == 1) {
459 					/*(*/
460 					if ((c2 = getsc()) == ')') {
461 						POP_STATE();
462 						*wp++ = 0; /* end of EXPRSUB */
463 						break;
464 					} else {
465 						char *s;
466 
467 						ungetsc(c2);
468 						/* mismatched parenthesis -
469 						 * assume we were really
470 						 * parsing a $(..) expression
471 						 */
472 						s = Xrestpos(ws, wp,
473 						     statep->ls_sasparen.start);
474 						memmove(s + 1, s, wp - s);
475 						*s++ = COMSUB;
476 						*s = '('; /*)*/
477 						wp++;
478 						statep->ls_scsparen.nparen = 1;
479 						statep->ls_scsparen.csstate = 0;
480 						state = statep->ls_state
481 							= SCSPAREN;
482 
483 					}
484 				}
485 			}
486 			*wp++ = c;
487 			break;
488 
489 		  case SBRACE:
490 			/*{*/
491 			if (c == '}') {
492 				POP_STATE();
493 				*wp++ = CSUBST;
494 				*wp++ = /*{*/ '}';
495 			} else
496 				goto Sbase1;
497 			break;
498 
499 		  case STBRACE:
500 			/* Same as SBRACE, except (,|,) treated specially */
501 			/*{*/
502 			if (c == '}') {
503 				POP_STATE();
504 				*wp++ = CSUBST;
505 				*wp++ = /*{*/ '}';
506 			} else if (c == '|') {
507 				*wp++ = SPAT;
508 			} else if (c == '(') {
509 				*wp++ = OPAT;
510 				*wp++ = ' ';	/* simile for @ */
511 				PUSH_STATE(SPATTERN);
512 			} else
513 				goto Sbase1;
514 			break;
515 
516 		  case SBQUOTE:
517 			if (c == '`') {
518 				*wp++ = 0;
519 				POP_STATE();
520 			} else if (c == '\\') {
521 				switch (c = getsc()) {
522 				  case '\\':
523 				  case '$': case '`':
524 					*wp++ = c;
525 					break;
526 				  case '"':
527 					if (statep->ls_sbquote.indquotes) {
528 						*wp++ = c;
529 						break;
530 					}
531 					/* fall through.. */
532 				  default:
533 					if (c) { /* trailing \ is lost */
534 						*wp++ = '\\';
535 						*wp++ = c;
536 					}
537 					break;
538 				}
539 			} else
540 				*wp++ = c;
541 			break;
542 
543 		  case SWORD:	/* ONEWORD */
544 			goto Subst;
545 
546 #ifdef KSH
547 		  case SLETPAREN:	/* LETEXPR: (( ... )) */
548 			/*(*/
549 			if (c == ')') {
550 				if (statep->ls_sletparen.nparen > 0)
551 				    --statep->ls_sletparen.nparen;
552 				/*(*/
553 				else if ((c2 = getsc()) == ')') {
554 					c = 0;
555 					*wp++ = CQUOTE;
556 					goto Done;
557 				} else
558 					ungetsc(c2);
559 			} else if (c == '(')
560 				/* parenthesis inside quotes and backslashes
561 				 * are lost, but at&t ksh doesn't count them
562 				 * either
563 				 */
564 				++statep->ls_sletparen.nparen;
565 			goto Sbase2;
566 #endif /* KSH */
567 
568 		  case SHEREDELIM:	/* <<,<<- delimiter */
569 			/* XXX chuck this state (and the next) - use
570 			 * the existing states ($ and \`..` should be
571 			 * stripped of their specialness after the
572 			 * fact).
573 			 */
574 			/* here delimiters need a special case since
575 			 * $ and `..` are not to be treated specially
576 			 */
577 			if (c == '\\') {
578 				c = getsc();
579 				if (c) { /* trailing \ is lost */
580 					*wp++ = QCHAR;
581 					*wp++ = c;
582 				}
583 			} else if (c == '\'') {
584 				PUSH_STATE(SSQUOTE);
585 				*wp++ = OQUOTE;
586 				ignore_backslash_newline++;
587 			} else if (c == '"') {
588 				state = statep->ls_state = SHEREDQUOTE;
589 				*wp++ = OQUOTE;
590 			} else {
591 				*wp++ = CHAR;
592 				*wp++ = c;
593 			}
594 			break;
595 
596 		  case SHEREDQUOTE:	/* " in <<,<<- delimiter */
597 			if (c == '"') {
598 				*wp++ = CQUOTE;
599 				state = statep->ls_state = SHEREDELIM;
600 			} else {
601 				if (c == '\\') {
602 					switch (c = getsc()) {
603 					  case '\\': case '"':
604 					  case '$': case '`':
605 						break;
606 					  default:
607 						if (c) { /* trailing \ lost */
608 							*wp++ = CHAR;
609 							*wp++ = '\\';
610 						}
611 						break;
612 					}
613 				}
614 				*wp++ = CHAR;
615 				*wp++ = c;
616 			}
617 			break;
618 
619 		  case SPATTERN:	/* in *(...|...) pattern (*+?@!) */
620 			if ( /*(*/ c == ')') {
621 				*wp++ = CPAT;
622 				POP_STATE();
623 			} else if (c == '|') {
624 				*wp++ = SPAT;
625 			} else if (c == '(') {
626 				*wp++ = OPAT;
627 				*wp++ = ' ';	/* simile for @ */
628 				PUSH_STATE(SPATTERN);
629 			} else
630 				goto Sbase1;
631 			break;
632 		}
633 	}
634 Done:
635 	Xcheck(ws, wp);
636 	if (statep != &states[1])
637 		/* XXX figure out what is missing */
638 		yyerror("no closing quote\n");
639 
640 	/* This done to avoid tests for SHEREDELIM wherever SBASE tested */
641 	if (state == SHEREDELIM)
642 		state = SBASE;
643 
644 	dp = Xstring(ws, wp);
645 	if ((c == '<' || c == '>') && state == SBASE
646 	    && ((c2 = Xlength(ws, wp)) == 0
647 	        || (c2 == 2 && dp[0] == CHAR && digit(dp[1]))))
648 	{
649 		struct ioword *iop =
650 				(struct ioword *) alloc(sizeof(*iop), ATEMP);
651 
652 		if (c2 == 2)
653 			iop->unit = dp[1] - '0';
654 		else
655 			iop->unit = c == '>'; /* 0 for <, 1 for > */
656 
657 		c2 = getsc();
658 		/* <<, >>, <> are ok, >< is not */
659 		if (c == c2 || (c == '<' && c2 == '>')) {
660 			iop->flag = c == c2 ?
661 				  (c == '>' ? IOCAT : IOHERE) : IORDWR;
662 			if (iop->flag == IOHERE) {
663 				if ((c2 = getsc()) == '-') {
664 					iop->flag |= IOSKIP;
665 				} else {
666 					ungetsc(c2);
667 				}
668 			}
669 		} else if (c2 == '&')
670 			iop->flag = IODUP | (c == '<' ? IORDUP : 0);
671 		else {
672 			iop->flag = c == '>' ? IOWRITE : IOREAD;
673 			if (c == '>' && c2 == '|')
674 				iop->flag |= IOCLOB;
675 			else
676 				ungetsc(c2);
677 		}
678 
679 		iop->name = (char *) 0;
680 		iop->delim = (char *) 0;
681 		iop->heredoc = (char *) 0;
682 		Xfree(ws, wp);	/* free word */
683 		yylval.iop = iop;
684 		return REDIR;
685 	}
686 
687 	if (wp == dp && state == SBASE) {
688 		Xfree(ws, wp);	/* free word */
689 		/* no word, process LEX1 character */
690 		switch (c) {
691 		  default:
692 			return c;
693 
694 		  case '|':
695 		  case '&':
696 		  case ';':
697 			if ((c2 = getsc()) == c)
698 				c = (c == ';') ? BREAK :
699 				    (c == '|') ? LOGOR :
700 				    (c == '&') ? LOGAND :
701 				    YYERRCODE;
702 #ifdef KSH
703 			else if (c == '|' && c2 == '&')
704 				c = COPROC;
705 #endif /* KSH */
706 			else
707 				ungetsc(c2);
708 			return c;
709 
710 		  case '\n':
711 			gethere();
712 			if (cf & CONTIN)
713 				goto Again;
714 			return c;
715 
716 		  case '(':  /*)*/
717 #ifdef KSH
718 			if ((c2 = getsc()) == '(') /*)*/
719 				/* XXX need to handle ((...); (...)) */
720 				c = MDPAREN;
721 			else
722 				ungetsc(c2);
723 #endif /* KSH */
724 			return c;
725 		  /*(*/
726 		  case ')':
727 			return c;
728 		}
729 	}
730 
731 	*wp++ = EOS;		/* terminate word */
732 	yylval.cp = Xclose(ws, wp);
733 	if (state == SWORD
734 #ifdef KSH
735 		|| state == SLETPAREN
736 #endif /* KSH */
737 		)	/* ONEWORD? */
738 		return LWORD;
739 	ungetsc(c);		/* unget terminator */
740 
741 	/* copy word to unprefixed string ident */
742 	for (sp = yylval.cp, dp = ident; dp < ident+IDENT && (c = *sp++) == CHAR; )
743 		*dp++ = *sp++;
744 	/* Make sure the ident array stays '\0' padded */
745 	memset(dp, 0, (ident+IDENT) - dp + 1);
746 	if (c != EOS)
747 		*ident = '\0';	/* word is not unquoted */
748 
749 	if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) {
750 		struct tbl *p;
751 		int h = hash(ident);
752 
753 		/* { */
754 		if ((cf & KEYWORD) && (p = tsearch(&keywords, ident, h))
755 		    && (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}'))
756 		{
757 			afree(yylval.cp, ATEMP);
758 			return p->val.i;
759 		}
760 		if ((cf & ALIAS) && (p = tsearch(&aliases, ident, h))
761 		    && (p->flag & ISSET))
762 		{
763 			register Source *s;
764 
765 			for (s = source; s->type == SALIAS; s = s->next)
766 				if (s->u.tblp == p)
767 					return LWORD;
768 			/* push alias expansion */
769 			s = pushs(SALIAS, source->areap);
770 			s->start = s->str = p->val.s;
771 			s->u.tblp = p;
772 			s->next = source;
773 			source = s;
774 			afree(yylval.cp, ATEMP);
775 			goto Again;
776 		}
777 	}
778 
779 	return LWORD;
780 }
781 
782 static void
783 gethere()
784 {
785 	register struct ioword **p;
786 
787 	for (p = heres; p < herep; p++)
788 		readhere(*p);
789 	herep = heres;
790 }
791 
792 /*
793  * read "<<word" text into temp file
794  */
795 
796 static void
797 readhere(iop)
798 	struct ioword *iop;
799 {
800 	register int c;
801 	char *volatile eof;
802 	char *eofp;
803 	int skiptabs;
804 	XString xs;
805 	char *xp;
806 	int xpos;
807 
808 	eof = evalstr(iop->delim, 0);
809 
810 	if (!(iop->flag & IOEVAL))
811 		ignore_backslash_newline++;
812 
813 	Xinit(xs, xp, 256, ATEMP);
814 
815 	for (;;) {
816 		eofp = eof;
817 		skiptabs = iop->flag & IOSKIP;
818 		xpos = Xsavepos(xs, xp);
819 		while ((c = getsc()) != 0) {
820 			if (skiptabs) {
821 				if (c == '\t')
822 					continue;
823 				skiptabs = 0;
824 			}
825 			if (c != *eofp)
826 				break;
827 			Xcheck(xs, xp);
828 			Xput(xs, xp, c);
829 			eofp++;
830 		}
831 		/* Allow EOF here so commands with out trailing newlines
832 		 * will work (eg, ksh -c '...', $(...), etc).
833 		 */
834 		if (*eofp == '\0' && (c == 0 || c == '\n')) {
835 			xp = Xrestpos(xs, xp, xpos);
836 			break;
837 		}
838 		ungetsc(c);
839 		while ((c = getsc()) != '\n') {
840 			if (c == 0)
841 				yyerror("here document `%s' unclosed\n", eof);
842 			Xcheck(xs, xp);
843 			Xput(xs, xp, c);
844 		}
845 		Xcheck(xs, xp);
846 		Xput(xs, xp, c);
847 	}
848 	Xput(xs, xp, '\0');
849 	iop->heredoc = Xclose(xs, xp);
850 
851 	if (!(iop->flag & IOEVAL))
852 		ignore_backslash_newline--;
853 }
854 
855 void
856 #ifdef HAVE_PROTOTYPES
857 yyerror(const char *fmt, ...)
858 #else
859 yyerror(fmt, va_alist)
860 	const char *fmt;
861 	va_dcl
862 #endif
863 {
864 	va_list va;
865 
866 	/* pop aliases and re-reads */
867 	while (source->type == SALIAS || source->type == SREREAD)
868 		source = source->next;
869 	source->str = null;	/* zap pending input */
870 
871 	error_prefix(TRUE);
872 	SH_VA_START(va, fmt);
873 	shf_vfprintf(shl_out, fmt, va);
874 	va_end(va);
875 	errorf(null);
876 }
877 
878 /*
879  * input for yylex with alias expansion
880  */
881 
882 Source *
883 pushs(type, areap)
884 	int type;
885 	Area *areap;
886 {
887 	register Source *s;
888 
889 	s = (Source *) alloc(sizeof(Source), areap);
890 	s->type = type;
891 	s->str = null;
892 	s->start = NULL;
893 	s->line = 0;
894 	s->errline = 0;
895 	s->file = NULL;
896 	s->flags = 0;
897 	s->next = NULL;
898 	s->areap = areap;
899 	if (type == SFILE || type == SSTDIN) {
900 		char *dummy;
901 		Xinit(s->xs, dummy, 256, s->areap);
902 	} else
903 		memset(&s->xs, 0, sizeof(s->xs));
904 	return s;
905 }
906 
907 static int
908 getsc__()
909 {
910 	register Source *s = source;
911 	register int c;
912 
913 	while ((c = *s->str++) == 0) {
914 		s->str = NULL;		/* return 0 for EOF by default */
915 		switch (s->type) {
916 		  case SEOF:
917 			s->str = null;
918 			return 0;
919 
920 		  case SSTDIN:
921 		  case SFILE:
922 			getsc_line(s);
923 			break;
924 
925 		  case SWSTR:
926 			break;
927 
928 		  case SSTRING:
929 			break;
930 
931 		  case SWORDS:
932 			s->start = s->str = *s->u.strv++;
933 			s->type = SWORDSEP;
934 			break;
935 
936 		  case SWORDSEP:
937 			if (*s->u.strv == NULL) {
938 				s->start = s->str = newline;
939 				s->type = SEOF;
940 			} else {
941 				s->start = s->str = space;
942 				s->type = SWORDS;
943 			}
944 			break;
945 
946 		  case SALIAS:
947 			if (s->flags & SF_ALIASEND) {
948 				/* pass on an unused SF_ALIAS flag */
949 				source = s->next;
950 				source->flags |= s->flags & SF_ALIAS;
951 				s = source;
952 			} else if (*s->u.tblp->val.s
953 				 && isspace((unsigned char)strchr(s->u.tblp->val.s, 0)[-1]))
954 			{
955 				source = s = s->next;	/* pop source stack */
956 				/* Note that this alias ended with a space,
957 				 * enabling alias expansion on the following
958 				 * word.
959 				 */
960 				s->flags |= SF_ALIAS;
961 			} else {
962 				/* At this point, we need to keep the current
963 				 * alias in the source list so recursive
964 				 * aliases can be detected and we also need
965 				 * to return the next character.  Do this
966 				 * by temporarily popping the alias to get
967 				 * the next character and then put it back
968 				 * in the source list with the SF_ALIASEND
969 				 * flag set.
970 				 */
971 				source = s->next;	/* pop source stack */
972 				source->flags |= s->flags & SF_ALIAS;
973 				c = getsc__();
974 				if (c) {
975 					s->flags |= SF_ALIASEND;
976 					s->ugbuf[0] = c; s->ugbuf[1] = '\0';
977 					s->start = s->str = s->ugbuf;
978 					s->next = source;
979 					source = s;
980 				} else {
981 					s = source;
982 					/* avoid reading eof twice */
983 					s->str = NULL;
984 					break;
985 				}
986 			}
987 			continue;
988 
989 		  case SREREAD:
990 			if (s->start != s->ugbuf) /* yuck */
991 				afree(s->u.freeme, ATEMP);
992 			source = s = s->next;
993 			continue;
994 		}
995 		if (s->str == NULL) {
996 			s->type = SEOF;
997 			s->start = s->str = null;
998 			return '\0';
999 		}
1000 		if (s->flags & SF_ECHO) {
1001 			shf_puts(s->str, shl_out);
1002 			shf_flush(shl_out);
1003 		}
1004 	}
1005 	return c;
1006 }
1007 
1008 static void
1009 getsc_line(s)
1010 	Source *s;
1011 {
1012 	char *xp = Xstring(s->xs, xp);
1013 	int interactive = Flag(FTALKING) && s->type == SSTDIN;
1014 	int have_tty = interactive && (s->flags & SF_TTY);
1015 
1016 	/* Done here to ensure nothing odd happens when a timeout occurs */
1017 	XcheckN(s->xs, xp, LINE);
1018 	*xp = '\0';
1019 	s->start = s->str = xp;
1020 
1021 #ifdef KSH
1022 	if (have_tty && ksh_tmout) {
1023 		ksh_tmout_state = TMOUT_READING;
1024 		alarm(ksh_tmout);
1025 	}
1026 #endif /* KSH */
1027 #ifdef EDIT
1028 	if (have_tty && (0
1029 # ifdef VI
1030 			 || Flag(FVI)
1031 # endif /* VI */
1032 # ifdef EMACS
1033 			 || Flag(FEMACS) || Flag(FGMACS)
1034 # endif /* EMACS */
1035 		))
1036 	{
1037 		int nread;
1038 
1039 		nread = x_read(xp, LINE);
1040 		if (nread < 0)	/* read error */
1041 			nread = 0;
1042 		xp[nread] = '\0';
1043 		xp += nread;
1044 	}
1045 	else
1046 #endif /* EDIT */
1047 	{
1048 		if (interactive) {
1049 			pprompt(prompt, 0);
1050 		} else
1051 			s->line++;
1052 
1053 		while (1) {
1054 			char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1055 
1056 			if (!p && shf_error(s->u.shf)
1057 			    && shf_errno(s->u.shf) == EINTR)
1058 			{
1059 				shf_clearerr(s->u.shf);
1060 				if (trap)
1061 					runtraps(0);
1062 				continue;
1063 			}
1064 			if (!p || (xp = p, xp[-1] == '\n'))
1065 				break;
1066 			/* double buffer size */
1067 			xp++; /* move past null so doubling works... */
1068 			XcheckN(s->xs, xp, Xlength(s->xs, xp));
1069 			xp--; /* ...and move back again */
1070 		}
1071 		/* flush any unwanted input so other programs/builtins
1072 		 * can read it.  Not very optimal, but less error prone
1073 		 * than flushing else where, dealing with redirections,
1074 		 * etc..
1075 		 * todo: reduce size of shf buffer (~128?) if SSTDIN
1076 		 */
1077 		if (s->type == SSTDIN)
1078 			shf_flush(s->u.shf);
1079 	}
1080 	/* XXX: temporary kludge to restore source after a
1081 	 * trap may have been executed.
1082 	 */
1083 	source = s;
1084 #ifdef KSH
1085 	if (have_tty && ksh_tmout)
1086 	{
1087 		ksh_tmout_state = TMOUT_EXECUTING;
1088 		alarm(0);
1089 	}
1090 #endif /* KSH */
1091 	s->start = s->str = Xstring(s->xs, xp);
1092 	strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1093 	/* Note: if input is all nulls, this is not eof */
1094 	if (Xlength(s->xs, xp) == 0) { /* EOF */
1095 		if (s->type == SFILE)
1096 			shf_fdclose(s->u.shf);
1097 		s->str = NULL;
1098 	} else if (interactive) {
1099 #ifdef HISTORY
1100 		char *p = Xstring(s->xs, xp);
1101 		if (cur_prompt == PS1)
1102 			while (*p && ctype(*p, C_IFS) && ctype(*p, C_IFSWS))
1103 				p++;
1104 		if (*p) {
1105 # ifdef EASY_HISTORY
1106 			if (cur_prompt == PS2)
1107 				histappend(Xstring(s->xs, xp), 1);
1108 			else
1109 # endif /* EASY_HISTORY */
1110 			{
1111 				s->line++;
1112 				histsave(s->line, s->str, 1);
1113 			}
1114 		}
1115 #endif /* HISTORY */
1116 	}
1117 	if (interactive)
1118 		set_prompt(PS2, (Source *) 0);
1119 }
1120 
1121 void
1122 set_prompt(to, s)
1123 	int to;
1124 	Source *s;
1125 {
1126 	cur_prompt = to;
1127 
1128 	switch (to) {
1129 	case PS1: /* command */
1130 #ifdef KSH
1131 		/* Substitute ! and !! here, before substitutions are done
1132 		 * so ! in expanded variables are not expanded.
1133 		 * NOTE: this is not what at&t ksh does (it does it after
1134 		 * substitutions, POSIX doesn't say which is to be done.
1135 		 */
1136 		{
1137 			struct shf *shf;
1138 			char * volatile ps1;
1139 			Area *saved_atemp;
1140 
1141 			ps1 = str_val(global("PS1"));
1142 			shf = shf_sopen((char *) 0, strlen(ps1) * 2,
1143 				SHF_WR | SHF_DYNAMIC, (struct shf *) 0);
1144 			while (*ps1) {
1145 				if (*ps1 != '!' || *++ps1 == '!')
1146 					shf_putchar(*ps1++, shf);
1147 				else
1148 					shf_fprintf(shf, "%d",
1149 						s ? s->line + 1 : 0);
1150 			}
1151 			ps1 = shf_sclose(shf);
1152 			saved_atemp = ATEMP;
1153 			newenv(E_ERRH);
1154 			if (ksh_sigsetjmp(e->jbuf, 0)) {
1155 				prompt = safe_prompt;
1156 				/* Don't print an error - assume it has already
1157 				 * been printed.  Reason is we may have forked
1158 				 * to run a command and the child may be
1159 				 * unwinding its stack through this code as it
1160 				 * exits.
1161 				 */
1162 			} else
1163 				prompt = str_save(substitute(ps1, 0),
1164 						 saved_atemp);
1165 			quitenv();
1166 		}
1167 #else /* KSH */
1168 		prompt = str_val(global("PS1"));
1169 #endif /* KSH */
1170 		break;
1171 
1172 	case PS2: /* command continuation */
1173 		prompt = str_val(global("PS2"));
1174 		break;
1175 	}
1176 }
1177 
1178 /* See also related routine, promptlen() in edit.c */
1179 void
1180 pprompt(cp, ntruncate)
1181 	const char *cp;
1182 	int ntruncate;
1183 {
1184 #if 0
1185 	char nbuf[32];
1186 	int c;
1187 
1188 	while (*cp != 0) {
1189 		if (*cp != '!')
1190 			c = *cp++;
1191 		else if (*++cp == '!')
1192 			c = *cp++;
1193 		else {
1194 			int len;
1195 			char *p;
1196 
1197 			shf_snprintf(p = nbuf, sizeof(nbuf), "%d",
1198 				source->line + 1);
1199 			len = strlen(nbuf);
1200 			if (ntruncate) {
1201 				if (ntruncate >= len) {
1202 					ntruncate -= len;
1203 					continue;
1204 				}
1205 				p += ntruncate;
1206 				len -= ntruncate;
1207 				ntruncate = 0;
1208 			}
1209 			shf_write(p, len, shl_out);
1210 			continue;
1211 		}
1212 		if (ntruncate)
1213 			--ntruncate;
1214 		else
1215 			shf_putc(c, shl_out);
1216 	}
1217 #endif /* 0 */
1218 	shf_puts(cp + ntruncate, shl_out);
1219 	shf_flush(shl_out);
1220 }
1221 
1222 /* Read the variable part of a ${...} expression (ie, up to but not including
1223  * the :[-+?=#%] or close-brace.
1224  */
1225 static char *
1226 get_brace_var(wsp, wp)
1227 	XString *wsp;
1228 	char *wp;
1229 {
1230 	enum parse_state {
1231 			   PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1232 			   PS_NUMBER, PS_VAR1, PS_END
1233 			 }
1234 		state;
1235 	char c;
1236 
1237 	state = PS_INITIAL;
1238 	while (1) {
1239 		c = getsc();
1240 		/* State machine to figure out where the variable part ends. */
1241 		switch (state) {
1242 		  case PS_INITIAL:
1243 			if (c == '#') {
1244 				state = PS_SAW_HASH;
1245 				break;
1246 			}
1247 			/* fall through.. */
1248 		  case PS_SAW_HASH:
1249 			if (letter(c))
1250 				state = PS_IDENT;
1251 			else if (digit(c))
1252 				state = PS_NUMBER;
1253 			else if (ctype(c, C_VAR1))
1254 				state = PS_VAR1;
1255 			else
1256 				state = PS_END;
1257 			break;
1258 		  case PS_IDENT:
1259 			if (!letnum(c)) {
1260 				state = PS_END;
1261 				if (c == '[') {
1262 					char *tmp, *p;
1263 
1264 					if (!arraysub(&tmp))
1265 						yyerror("missing ]\n");
1266 					*wp++ = c;
1267 					for (p = tmp; *p; ) {
1268 						Xcheck(*wsp, wp);
1269 						*wp++ = *p++;
1270 					}
1271 					afree(tmp, ATEMP);
1272 					c = getsc(); /* the ] */
1273 				}
1274 			}
1275 			break;
1276 		  case PS_NUMBER:
1277 			if (!digit(c))
1278 				state = PS_END;
1279 			break;
1280 		  case PS_VAR1:
1281 			state = PS_END;
1282 			break;
1283 		  case PS_END: /* keep gcc happy */
1284 			break;
1285 		}
1286 		if (state == PS_END) {
1287 			*wp++ = '\0';	/* end of variable part */
1288 			ungetsc(c);
1289 			break;
1290 		}
1291 		Xcheck(*wsp, wp);
1292 		*wp++ = c;
1293 	}
1294 	return wp;
1295 }
1296 
1297 /*
1298  * Save an array subscript - returns true if matching bracket found, false
1299  * if eof or newline was found.
1300  * (Returned string double null terminated)
1301  */
1302 static int
1303 arraysub(strp)
1304 	char **strp;
1305 {
1306 	XString ws;
1307 	char	*wp;
1308 	char	c;
1309 	int 	depth = 1;	/* we are just past the initial [ */
1310 
1311 	Xinit(ws, wp, 32, ATEMP);
1312 
1313 	do {
1314 		c = getsc();
1315 		Xcheck(ws, wp);
1316 		*wp++ = c;
1317 		if (c == '[')
1318 			depth++;
1319 		else if (c == ']')
1320 			depth--;
1321 	} while (depth > 0 && c && c != '\n');
1322 
1323 	*wp++ = '\0';
1324 	*strp = Xclose(ws, wp);
1325 
1326 	return depth == 0 ? 1 : 0;
1327 }
1328 
1329 /* Unget a char: handles case when we are already at the start of the buffer */
1330 static const char *
1331 ungetsc(c)
1332 	int c;
1333 {
1334 	if (backslash_skip)
1335 		backslash_skip--;
1336 	/* Don't unget eof... */
1337 	if (source->str == null && c == '\0')
1338 		return source->str;
1339 	if (source->str > source->start)
1340 		source->str--;
1341 	else {
1342 		Source *s;
1343 
1344 		s = pushs(SREREAD, source->areap);
1345 		s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1346 		s->start = s->str = s->ugbuf;
1347 		s->next = source;
1348 		source = s;
1349 	}
1350 	return source->str;
1351 }
1352 
1353 
1354 /* Called to get a char that isn't a \newline sequence. */
1355 static int
1356 getsc_bn ARGS((void))
1357 {
1358 	int c, c2;
1359 
1360 	if (ignore_backslash_newline)
1361 		return getsc_();
1362 
1363 	if (backslash_skip == 1) {
1364 		backslash_skip = 2;
1365 		return getsc_();
1366 	}
1367 
1368 	backslash_skip = 0;
1369 
1370 	while (1) {
1371 		c = getsc_();
1372 		if (c == '\\') {
1373 			if ((c2 = getsc_()) == '\n')
1374 				/* ignore the \newline; get the next char... */
1375 				continue;
1376 			ungetsc(c2);
1377 			backslash_skip = 1;
1378 		}
1379 		return c;
1380 	}
1381 }
1382 
1383 static Lex_state *
1384 push_state_(si, old_end)
1385 	State_info *si;
1386 	Lex_state *old_end;
1387 {
1388 	Lex_state	*new = alloc(sizeof(Lex_state) * STATE_BSIZE, ATEMP);
1389 
1390 	new[0].ls_info.base = old_end;
1391 	si->base = &new[0];
1392 	si->end = &new[STATE_BSIZE];
1393 	return &new[1];
1394 }
1395 
1396 static Lex_state *
1397 pop_state_(si, old_end)
1398 	State_info *si;
1399 	Lex_state *old_end;
1400 {
1401 	Lex_state *old_base = si->base;
1402 
1403 	si->base = old_end->ls_info.base - STATE_BSIZE;
1404 	si->end = old_end->ls_info.base;
1405 
1406 	afree(old_base, ATEMP);
1407 
1408 	return si->base + STATE_BSIZE - 1;
1409 }
1410