xref: /netbsd-src/bin/ksh/lex.c (revision 76dfffe33547c37f8bdd446e3e4ab0f3c16cea4b)
1 /*
2  * lexical analysis and source input
3  */
4 
5 #include "sh.h"
6 #include <ctype.h>
7 
8 static void	readhere ARGS((struct ioword *iop));
9 static int	getsc__ ARGS((void));
10 static void	getsc_line ARGS((Source *s));
11 static char	*get_brace_var ARGS((XString *wsp, char *wp));
12 static int	arraysub ARGS((char **strp));
13 static const char *ungetsc ARGS((int c));
14 static int	getsc_bn ARGS((void));
15 static void	gethere ARGS((void));
16 
17 static int backslash_skip;
18 static int ignore_backslash_newline;
19 
20 /* optimized getsc_bn() */
21 #define getsc()		(*source->str != '\0' && *source->str != '\\' \
22 			 && !backslash_skip ? *source->str++ : getsc_bn())
23 /* optimized getsc__() */
24 #define	getsc_()	((*source->str != '\0') ? *source->str++ : getsc__())
25 
26 
27 /*
28  * Lexical analyzer
29  *
30  * tokens are not regular expressions, they are LL(1).
31  * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
32  * hence the state stack.
33  */
34 
35 int
36 yylex(cf)
37 	int cf;
38 {
39 	register int c, state;
40 	char states [64], *statep = states; /* XXX overflow check */
41 	XString ws;		/* expandable output word */
42 	register char *wp;	/* output word pointer */
43 	register char *sp, *dp;
44 	char UNINITIALIZED(*ddparen_start);
45 	int istate;
46 	int UNINITIALIZED(c2);
47 	int UNINITIALIZED(nparen), UNINITIALIZED(csstate);
48 	int UNINITIALIZED(ndparen);
49 	int UNINITIALIZED(indquotes);
50 
51 
52   Again:
53 	Xinit(ws, wp, 64, ATEMP);
54 
55 	backslash_skip = 0;
56 	ignore_backslash_newline = 0;
57 
58 	if (cf&ONEWORD)
59 		istate = SWORD;
60 #ifdef KSH
61 	else if (cf&LETEXPR) {
62 		*wp++ = OQUOTE;	 /* enclose arguments in (double) quotes */
63 		istate = SDPAREN;
64 		ndparen = 0;
65 	}
66 #endif /* KSH */
67 	else {		/* normal lexing */
68 		istate = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
69 		while ((c = getsc()) == ' ' || c == '\t')
70 			;
71 		if (c == '#') {
72 			ignore_backslash_newline++;
73 			while ((c = getsc()) != '\0' && c != '\n')
74 				;
75 			ignore_backslash_newline--;
76 		}
77 		ungetsc(c);
78 	}
79 	if (source->flags & SF_ALIAS) {	/* trailing ' ' in alias definition */
80 		source->flags &= ~SF_ALIAS;
81 		/* In POSIX mode, a trailing space only counts if we are
82 		 * parsing a simple command
83 		 */
84 		if (!Flag(FPOSIX) || (cf & CMDWORD))
85 			cf |= ALIAS;
86 	}
87 
88 	/* collect non-special or quoted characters to form word */
89 	for (*statep = state = istate;
90 	     !((c = getsc()) == 0 || ((state == SBASE || state == SHEREDELIM)
91 				      && ctype(c, C_LEX1))); )
92 	{
93 		Xcheck(ws, wp);
94 		switch (state) {
95 		  case SBASE:
96 			if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
97 				*wp = EOS; /* temporary */
98 				if (is_wdvarname(Xstring(ws, wp), FALSE))
99 				{
100 					char *p, *tmp;
101 
102 					if (arraysub(&tmp)) {
103 						*wp++ = CHAR;
104 						*wp++ = c;
105 						for (p = tmp; *p; ) {
106 							Xcheck(ws, wp);
107 							*wp++ = CHAR;
108 							*wp++ = *p++;
109 						}
110 						afree(tmp, ATEMP);
111 						break;
112 					} else {
113 						Source *s;
114 
115 						s = pushs(SREREAD,
116 							  source->areap);
117 						s->start = s->str
118 							= s->u.freeme = tmp;
119 						s->next = source;
120 						source = s;
121 					}
122 				}
123 				*wp++ = CHAR;
124 				*wp++ = c;
125 				break;
126 			}
127 			/* fall through.. */
128 		  Sbase1:	/* includes *(...|...) pattern (*+?@!) */
129 #ifdef KSH
130 			if (c == '*' || c == '@' || c == '+' || c == '?'
131 			    || c == '!')
132 			{
133 				c2 = getsc();
134 				if (c2 == '(' /*)*/ ) {
135 					*wp++ = OPAT;
136 					*wp++ = c;
137 					*++statep = state = SPATTERN;
138 					break;
139 				}
140 				ungetsc(c2);
141 			}
142 #endif /* KSH */
143 			/* fall through.. */
144 		  Sbase2:	/* doesn't include *(...|...) pattern (*+?@!) */
145 			switch (c) {
146 			  case '\\':
147 				c = getsc();
148 #ifdef OS2
149 				if (isalnum(c)) {
150 					*wp++ = CHAR, *wp++ = '\\';
151 					*wp++ = CHAR, *wp++ = c;
152 				} else
153 #endif
154 				if (c) /* trailing \ is lost */
155 					*wp++ = QCHAR, *wp++ = c;
156 				break;
157 			  case '\'':
158 				*++statep = state = SSQUOTE;
159 				*wp++ = OQUOTE;
160 				ignore_backslash_newline++;
161 				break;
162 			  case '"':
163 				*++statep = state = SDQUOTE;
164 				*wp++ = OQUOTE;
165 				break;
166 			  default:
167 				goto Subst;
168 			}
169 			break;
170 
171 		  Subst:
172 			switch (c) {
173 			  case '\\':
174 				c = getsc();
175 				switch (c) {
176 				  case '"': case '\\':
177 				  case '$': case '`':
178 					*wp++ = QCHAR, *wp++ = c;
179 					break;
180 				  default:
181 					Xcheck(ws, wp);
182 					if (c) { /* trailing \ is lost */
183 						*wp++ = CHAR, *wp++ = '\\';
184 						*wp++ = CHAR, *wp++ = c;
185 					}
186 					break;
187 				}
188 				break;
189 			  case '$':
190 				c = getsc();
191 				if (c == '(') /*)*/ {
192 					c = getsc();
193 					if (c == '(') /*)*/ {
194 						*++statep = state = SDDPAREN;
195 						nparen = 2;
196 						ddparen_start = wp;
197 						*wp++ = EXPRSUB;
198 					} else {
199 						ungetsc(c);
200 						*++statep = state = SPAREN;
201 						nparen = 1;
202 						csstate = 0;
203 						*wp++ = COMSUB;
204 					}
205 				} else if (c == '{') /*}*/ {
206 					*wp++ = OSUBST;
207 					wp = get_brace_var(&ws, wp);
208 					/* If this is a trim operation,
209 					 * wrap @(...) around the pattern
210 					 * (allows easy handling of ${a#b|c})
211 					 */
212 					c = getsc();
213 					if (c == '#' || c == '%') {
214 						*wp++ = CHAR, *wp++ = c;
215 						if ((c2 = getsc()) == c)
216 							*wp++ = CHAR, *wp++ = c;
217 						else
218 							ungetsc(c2);
219 						*wp++ = OPAT, *wp++ = '@';
220 						*++statep = state = STBRACE;
221 					} else {
222 						ungetsc(c);
223 						*++statep = state = SBRACE;
224 					}
225 				} else if (ctype(c, C_ALPHA)) {
226 					*wp++ = OSUBST;
227 					do {
228 						Xcheck(ws, wp);
229 						*wp++ = c;
230 						c = getsc();
231 					} while (ctype(c, C_ALPHA|C_DIGIT));
232 					*wp++ = '\0';
233 					*wp++ = CSUBST;
234 					ungetsc(c);
235 				} else if (ctype(c, C_DIGIT|C_VAR1)) {
236 					Xcheck(ws, wp);
237 					*wp++ = OSUBST;
238 					*wp++ = c;
239 					*wp++ = '\0';
240 					*wp++ = CSUBST;
241 				} else {
242 					*wp++ = CHAR, *wp++ = '$';
243 					ungetsc(c);
244 				}
245 				break;
246 			  case '`':
247 				*++statep = state = SBQUOTE;
248 				*wp++ = COMSUB;
249 				/* Need to know if we are inside double quotes
250 				 * since sh/at&t-ksh translate the \" to " in
251 				 * "`..\"..`".
252 				 */
253 				indquotes = 0;
254 				if (!Flag(FPOSIX))
255 					for (sp = statep; sp > states; --sp)
256 						if (*sp == SDQUOTE)
257 							indquotes = 1;
258 				break;
259 			  default:
260 				*wp++ = CHAR, *wp++ = c;
261 			}
262 			break;
263 
264 		  case SSQUOTE:
265 			if (c == '\'') {
266 				state = *--statep;
267 				*wp++ = CQUOTE;
268 				ignore_backslash_newline--;
269 			} else
270 				*wp++ = QCHAR, *wp++ = c;
271 			break;
272 
273 		  case SDQUOTE:
274 			if (c == '"') {
275 				state = *--statep;
276 				*wp++ = CQUOTE;
277 			} else
278 				goto Subst;
279 			break;
280 
281 		  case SPAREN: /* $( .. ) */
282 			/* todo: deal with $(...) quoting properly
283 			 * kludge to partly fake quoting inside $(..): doesn't
284 			 * really work because nested $(..) or ${..} inside
285 			 * double quotes aren't dealt with.
286 			 */
287 			switch (csstate) {
288 			  case 0: /* normal */
289 				switch (c) {
290 				  case '(':
291 					nparen++;
292 					break;
293 				  case ')':
294 					nparen--;
295 					break;
296 				  case '\\':
297 					csstate = 1;
298 					break;
299 				  case '"':
300 					csstate = 2;
301 					break;
302 				  case '\'':
303 					csstate = 4;
304 					ignore_backslash_newline++;
305 					break;
306 				}
307 				break;
308 
309 			  case 1: /* backslash in normal mode */
310 			  case 3: /* backslash in double quotes */
311 				--csstate;
312 				break;
313 
314 			  case 2: /* double quotes */
315 				if (c == '"')
316 					csstate = 0;
317 				else if (c == '\\')
318 					csstate = 3;
319 				break;
320 
321 			  case 4: /* single quotes */
322 				if (c == '\'') {
323 					csstate = 0;
324 					ignore_backslash_newline--;
325 				}
326 				break;
327 			}
328 			if (nparen == 0) {
329 				state = *--statep;
330 				*wp++ = 0; /* end of COMSUB */
331 			} else
332 				*wp++ = c;
333 			break;
334 
335 		  case SDDPAREN: /* $(( .. )) */
336 			/* todo: deal with $((...); (...)) properly */
337 			/* XXX should nest using existing state machine
338 			 *     (embed "..", $(...), etc.) */
339 			if (c == '(')
340 				nparen++;
341 			else if (c == ')') {
342 				nparen--;
343 				if (nparen == 1) {
344 					/*(*/
345 					if ((c2 = getsc()) == ')') {
346 						state = *--statep;
347 						*wp++ = 0; /* end of EXPRSUB */
348 						break;
349 					} else {
350 						ungetsc(c2);
351 						/* mismatched parenthesis -
352 						 * assume we were really
353 						 * parsing a $(..) expression
354 						 */
355 						memmove(ddparen_start + 1,
356 							ddparen_start,
357 							wp - ddparen_start);
358 						*ddparen_start++ = COMSUB;
359 						*ddparen_start = '('; /*)*/
360 						wp++;
361 						csstate = 0;
362 						*statep = state = SPAREN;
363 					}
364 				}
365 			}
366 			*wp++ = c;
367 			break;
368 
369 		  case SBRACE:
370 			/*{*/
371 			if (c == '}') {
372 				state = *--statep;
373 				*wp++ = CSUBST;
374 			} else
375 				goto Sbase1;
376 			break;
377 
378 		  case STBRACE:
379 			/* same as SBRACE, except | is saved as SPAT and
380 			 * CPAT is added at the end.
381 			 */
382 			/*{*/
383 			if (c == '}') {
384 				state = *--statep;
385 				*wp++ = CPAT;
386 				*wp++ = CSUBST;
387 			} else if (c == '|') {
388 				*wp++ = SPAT;
389 			} else
390 				goto Sbase1;
391 			break;
392 
393 		  case SBQUOTE:
394 			if (c == '`') {
395 				*wp++ = 0;
396 				state = *--statep;
397 			} else if (c == '\\') {
398 				switch (c = getsc()) {
399 				  case '\\':
400 				  case '$': case '`':
401 					*wp++ = c;
402 					break;
403 				  case '"':
404 					if (indquotes) {
405 						*wp++ = c;
406 						break;
407 					}
408 					/* fall through.. */
409 				  default:
410 					if (c) { /* trailing \ is lost */
411 						*wp++ = '\\';
412 						*wp++ = c;
413 					}
414 					break;
415 				}
416 			} else
417 				*wp++ = c;
418 			break;
419 
420 		  case SWORD:	/* ONEWORD */
421 			goto Subst;
422 
423 #ifdef KSH
424 		  case SDPAREN:	/* LETEXPR: (( ... )) */
425 			/*(*/
426 			if (c == ')') {
427 				if (ndparen > 0)
428 				    --ndparen;
429 				/*(*/
430 				else if ((c2 = getsc()) == ')') {
431 					c = 0;
432 					*wp++ = CQUOTE;
433 					goto Done;
434 				} else
435 					ungetsc(c2);
436 			} else if (c == '(')
437 				/* parenthesis inside quotes and backslashes
438 				 * are lost, but at&t ksh doesn't count them
439 				 * either
440 				 */
441 				++ndparen;
442 			goto Sbase2;
443 #endif /* KSH */
444 
445 		  case SHEREDELIM:	/* <<,<<- delimiter */
446 			/* XXX chuck this state (and the next) - use
447 			 * the existing states ($ and \`..` should be
448 			 * stripped of their specialness after the
449 			 * fact).
450 			 */
451 			/* here delimiters need a special case since
452 			 * $ and `..` are not to be treated specially
453 			 */
454 			if (c == '\\') {
455 				c = getsc();
456 				if (c) { /* trailing \ is lost */
457 					*wp++ = QCHAR;
458 					*wp++ = c;
459 				}
460 			} else if (c == '\'') {
461 				*++statep = state = SSQUOTE;
462 				*wp++ = OQUOTE;
463 				ignore_backslash_newline++;
464 			} else if (c == '"') {
465 				state = SHEREDQUOTE;
466 				*wp++ = OQUOTE;
467 			} else {
468 				*wp++ = CHAR;
469 				*wp++ = c;
470 			}
471 			break;
472 
473 		  case SHEREDQUOTE:	/* " in <<,<<- delimiter */
474 			if (c == '"') {
475 				*wp++ = CQUOTE;
476 				state = SHEREDELIM;
477 			} else {
478 				if (c == '\\') {
479 					switch (c = getsc()) {
480 					  case '\\': case '"':
481 					  case '$': case '`':
482 						break;
483 					  default:
484 						if (c) { /* trailing \ lost */
485 							*wp++ = CHAR;
486 							*wp++ = '\\';
487 						}
488 						break;
489 					}
490 				}
491 				*wp++ = CHAR;
492 				*wp++ = c;
493 			}
494 			break;
495 
496 		  case SPATTERN:	/* in *(...|...) pattern (*+?@!) */
497 			if ( /*(*/ c == ')') {
498 				*wp++ = CPAT;
499 				state = *--statep;
500 			} else if (c == '|')
501 				*wp++ = SPAT;
502 			else
503 				goto Sbase1;
504 			break;
505 		}
506 	}
507 Done:
508 	Xcheck(ws, wp);
509 	if (state != istate)
510 		yyerror("no closing quote\n");
511 
512 	/* This done to avoid tests for SHEREDELIM wherever SBASE tested */
513 	if (state == SHEREDELIM)
514 		state = SBASE;
515 
516 	if ((c == '<' || c == '>') && state == SBASE) {
517 		char *cp = Xstring(ws, wp);
518 		if (Xlength(ws, wp) == 2 && cp[0] == CHAR && digit(cp[1])) {
519 			wp = cp; /* throw away word */
520 			c2/*unit*/ = cp[1] - '0';
521 		} else
522 			c2/*unit*/ = c == '>'; /* 0 for <, 1 for > */
523 	}
524 
525 	if (wp == Xstring(ws, wp) && state == SBASE) {
526 		Xfree(ws, wp);	/* free word */
527 		/* no word, process LEX1 character */
528 		switch (c) {
529 		  default:
530 			return c;
531 
532 		  case '|':
533 		  case '&':
534 		  case ';':
535 			if ((c2 = getsc()) == c)
536 				c = (c == ';') ? BREAK :
537 				    (c == '|') ? LOGOR :
538 				    (c == '&') ? LOGAND :
539 				    YYERRCODE;
540 #ifdef KSH
541 			else if (c == '|' && c2 == '&')
542 				c = COPROC;
543 #endif /* KSH */
544 			else
545 				ungetsc(c2);
546 			return c;
547 
548 		  case '>':
549 		  case '<': {
550 			register struct ioword *iop;
551 
552 			iop = (struct ioword *) alloc(sizeof(*iop), ATEMP);
553 			iop->unit = c2/*unit*/;
554 
555 			c2 = getsc();
556 			/* <<, >>, <> are ok, >< is not */
557 			if (c == c2 || (c == '<' && c2 == '>')) {
558 				iop->flag = c == c2 ?
559 					  (c == '>' ? IOCAT : IOHERE) : IORDWR;
560 				if (iop->flag == IOHERE)
561 					if ((c2 = getsc()) == '-')
562 						iop->flag |= IOSKIP;
563 					else
564 						ungetsc(c2);
565 			} else if (c2 == '&')
566 				iop->flag = IODUP | (c == '<' ? IORDUP : 0);
567 			else {
568 				iop->flag = c == '>' ? IOWRITE : IOREAD;
569 				if (c == '>' && c2 == '|')
570 					iop->flag |= IOCLOB;
571 				else
572 					ungetsc(c2);
573 			}
574 
575 			iop->name = (char *) 0;
576 			iop->delim = (char *) 0;
577 			yylval.iop = iop;
578 			return REDIR;
579 		    }
580 		  case '\n':
581 			gethere();
582 			if (cf & CONTIN)
583 				goto Again;
584 			return c;
585 
586 		  case '(':  /*)*/
587 #ifdef KSH
588 			if ((c2 = getsc()) == '(') /*)*/
589 				c = MDPAREN;
590 			else
591 				ungetsc(c2);
592 #endif /* KSH */
593 			return c;
594 		  /*(*/
595 		  case ')':
596 			return c;
597 		}
598 	}
599 
600 	*wp++ = EOS;		/* terminate word */
601 	yylval.cp = Xclose(ws, wp);
602 	if (state == SWORD
603 #ifdef KSH
604 		|| state == SDPAREN
605 #endif /* KSH */
606 		)	/* ONEWORD? */
607 		return LWORD;
608 	ungetsc(c);		/* unget terminator */
609 
610 	/* copy word to unprefixed string ident */
611 	for (sp = yylval.cp, dp = ident; dp < ident+IDENT && (c = *sp++) == CHAR; )
612 		*dp++ = *sp++;
613 	/* Make sure the ident array stays '\0' paded */
614 	memset(dp, 0, (ident+IDENT) - dp + 1);
615 	if (c != EOS)
616 		*ident = '\0';	/* word is not unquoted */
617 
618 	if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) {
619 		struct tbl *p;
620 		int h = hash(ident);
621 
622 		/* { */
623 		if ((cf & KEYWORD) && (p = tsearch(&keywords, ident, h))
624 		    && (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}'))
625 		{
626 			afree(yylval.cp, ATEMP);
627 			return p->val.i;
628 		}
629 		if ((cf & ALIAS) && (p = tsearch(&aliases, ident, h))
630 		    && (p->flag & ISSET))
631 		{
632 			register Source *s;
633 
634 			for (s = source; s->type == SALIAS; s = s->next)
635 				if (s->u.tblp == p)
636 					return LWORD;
637 			/* push alias expansion */
638 			s = pushs(SALIAS, source->areap);
639 			s->start = s->str = p->val.s;
640 			s->u.tblp = p;
641 			s->next = source;
642 			source = s;
643 			afree(yylval.cp, ATEMP);
644 			goto Again;
645 		}
646 	}
647 
648 	return LWORD;
649 }
650 
651 static void
652 gethere()
653 {
654 	register struct ioword **p;
655 
656 	for (p = heres; p < herep; p++)
657 		readhere(*p);
658 	herep = heres;
659 }
660 
661 /*
662  * read "<<word" text into temp file
663  */
664 
665 static void
666 readhere(iop)
667 	register struct ioword *iop;
668 {
669 	struct shf *volatile shf;
670 	struct temp *h;
671 	register int c;
672 	char *volatile eof;
673 	char *eofp;
674 	int skiptabs;
675 	int i;
676 
677 	eof = evalstr(iop->delim, 0);
678 
679 	if (e->flags & EF_FUNC_PARSE) {
680 		h = maketemp(APERM);
681 		h->next = func_heredocs;
682 		func_heredocs = h;
683 	} else {
684 		h = maketemp(ATEMP);
685 		h->next = e->temps;
686 		e->temps = h;
687 	}
688 	iop->name = h->name;
689 	if (!(shf = h->shf))
690 		yyerror("cannot create temporary file %s - %s\n",
691 			h->name, strerror(errno));
692 
693 	newenv(E_ERRH);
694 	i = ksh_sigsetjmp(e->jbuf, 0);
695 	if (i) {
696 		quitenv();
697 		shf_close(shf);
698 		unwind(i);
699 	}
700 
701 	if (!(iop->flag & IOEVAL))
702 		ignore_backslash_newline++;
703 
704 	for (;;) {
705 		eofp = eof;
706 		skiptabs = iop->flag & IOSKIP;
707 		while ((c = getsc()) != 0) {
708 			if (skiptabs) {
709 				if (c == '\t')
710 					continue;
711 				skiptabs = 0;
712 			}
713 			if (c != *eofp)
714 				break;
715 			eofp++;
716 		}
717 		/* Allow EOF here so commands with out trailing newlines
718 		 * will work (eg, ksh -c '...', $(...), etc).
719 		 */
720 		if (*eofp == '\0' && (c == 0 || c == '\n'))
721 			break;
722 		ungetsc(c);
723 		shf_write(eof, eofp - eof, shf);
724 		while ((c = getsc()) != '\n') {
725 			if (c == 0)
726 				yyerror("here document `%s' unclosed\n", eof);
727 			shf_putc(c, shf);
728 		}
729 		shf_putc(c, shf);
730 	}
731 	shf_flush(shf);
732 	if (shf_error(shf))
733 		yyerror("error saving here document `%s': %s\n",
734 			eof, strerror(shf_errno(shf)));
735 	/*XXX add similar checks for write errors everywhere */
736 	quitenv();
737 	shf_close(shf);
738 	if (!(iop->flag & IOEVAL))
739 		ignore_backslash_newline--;
740 }
741 
742 void
743 #ifdef HAVE_PROTOTYPES
744 yyerror(const char *fmt, ...)
745 #else
746 yyerror(fmt, va_alist)
747 	const char *fmt;
748 	va_dcl
749 #endif
750 {
751 	va_list va;
752 
753 	yynerrs++;
754 	/* pop aliases and re-reads */
755 	while (source->type == SALIAS || source->type == SREREAD)
756 		source = source->next;
757 	source->str = null;	/* zap pending input */
758 
759 	error_prefix(TRUE);
760 	SH_VA_START(va, fmt);
761 	shf_vfprintf(shl_out, fmt, va);
762 	va_end(va);
763 	errorf(null);
764 }
765 
766 /*
767  * input for yylex with alias expansion
768  */
769 
770 Source *
771 pushs(type, areap)
772 	int type;
773 	Area *areap;
774 {
775 	register Source *s;
776 
777 	s = (Source *) alloc(sizeof(Source), areap);
778 	s->type = type;
779 	s->str = null;
780 	s->start = NULL;
781 	s->line = 0;
782 	s->errline = 0;
783 	s->file = NULL;
784 	s->flags = 0;
785 	s->next = NULL;
786 	s->areap = areap;
787 	if (type == SFILE || type == SSTDIN) {
788 		char *dummy;
789 		Xinit(s->xs, dummy, 256, s->areap);
790 	} else
791 		memset(&s->xs, 0, sizeof(s->xs));
792 	return s;
793 }
794 
795 static int
796 getsc__()
797 {
798 	register Source *s = source;
799 	register int c;
800 
801 	while ((c = *s->str++) == 0) {
802 		s->str = NULL;		/* return 0 for EOF by default */
803 		switch (s->type) {
804 		  case SEOF:
805 			s->str = null;
806 			return 0;
807 
808 		  case SSTDIN:
809 		  case SFILE:
810 			getsc_line(s);
811 			break;
812 
813 		  case SWSTR:
814 			break;
815 
816 		  case SSTRING:
817 			break;
818 
819 		  case SWORDS:
820 			s->start = s->str = *s->u.strv++;
821 			s->type = SWORDSEP;
822 			break;
823 
824 		  case SWORDSEP:
825 			if (*s->u.strv == NULL) {
826 				s->start = s->str = newline;
827 				s->type = SEOF;
828 			} else {
829 				s->start = s->str = space;
830 				s->type = SWORDS;
831 			}
832 			break;
833 
834 		  case SALIAS:
835 			if (s->flags & SF_ALIASEND) {
836 				/* pass on an unused SF_ALIAS flag */
837 				source = s->next;
838 				source->flags |= s->flags & SF_ALIAS;
839 				s = source;
840 			} else if (*s->u.tblp->val.s
841 				 && isspace(strchr(s->u.tblp->val.s, 0)[-1]))
842 			{
843 				source = s = s->next;	/* pop source stack */
844 				/* Note that this alias ended with a space,
845 				 * enabling alias expansion on the following
846 				 * word.
847 				 */
848 				s->flags |= SF_ALIAS;
849 			} else {
850 				/* At this point, we need to keep the current
851 				 * alias in the source list so recursive
852 				 * aliases can be detected and we also need
853 				 * to return the next character.  Do this
854 				 * by temporarily popping the alias to get
855 				 * the next character and then put it back
856 				 * in the source list with the SF_ALIASEND
857 				 * flag set.
858 				 */
859 				source = s->next;	/* pop source stack */
860 				source->flags |= s->flags & SF_ALIAS;
861 				c = getsc__();
862 				if (c) {
863 					s->flags |= SF_ALIASEND;
864 					s->ugbuf[0] = c; s->ugbuf[1] = '\0';
865 					s->start = s->str = s->ugbuf;
866 					s->next = source;
867 					source = s;
868 				} else {
869 					s = source;
870 					/* avoid reading eof twice */
871 					s->str = NULL;
872 				}
873 			}
874 			continue;
875 
876 		  case SREREAD:
877 			if (s->start != s->ugbuf) /* yuck */
878 				afree(s->u.freeme, ATEMP);
879 			source = s = s->next;
880 			continue;
881 		}
882 		if (s->str == NULL) {
883 			s->type = SEOF;
884 			s->start = s->str = null;
885 			return '\0';
886 		}
887 		if (s->flags & SF_ECHO) {
888 			shf_puts(s->str, shl_out);
889 			shf_flush(shl_out);
890 		}
891 	}
892 	return c;
893 }
894 
895 static void
896 getsc_line(s)
897 	Source *s;
898 {
899 	char *xp = Xstring(s->xs, xp);
900 	int interactive = Flag(FTALKING) && s->type == SSTDIN;
901 	int have_tty = interactive && (s->flags & SF_TTY);
902 
903 	/* Done here to ensure nothing odd happens when a timeout occurs */
904 	XcheckN(s->xs, xp, LINE);
905 	*xp = '\0';
906 	s->start = s->str = xp;
907 
908 #ifdef KSH
909 	if (have_tty && ksh_tmout) {
910 		ksh_tmout_state = TMOUT_READING;
911 		alarm(ksh_tmout);
912 	}
913 #endif /* KSH */
914 #ifdef EDIT
915 	if (have_tty && (0
916 # ifdef VI
917 			 || Flag(FVI)
918 # endif /* VI */
919 # ifdef EMACS
920 			 || Flag(FEMACS) || Flag(FGMACS)
921 # endif /* EMACS */
922 		))
923 	{
924 		int nread;
925 
926 		nread = x_read(xp, LINE);
927 		if (nread < 0)	/* read error */
928 			nread = 0;
929 		xp[nread] = '\0';
930 		xp += nread;
931 	}
932 	else
933 #endif /* EDIT */
934 	{
935 		if (interactive) {
936 			pprompt(prompt, 0);
937 #ifdef OS2
938 			setmode (0, O_TEXT);
939 #endif /* OS2 */
940 		} else
941 			s->line++;
942 
943 		while (1) {
944 			char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
945 
946 			if (!p && shf_error(s->u.shf)
947 			    && shf_errno(s->u.shf) == EINTR)
948 			{
949 				shf_clearerr(s->u.shf);
950 				if (trap)
951 					runtraps(0);
952 				continue;
953 			}
954 			if (!p || (xp = p, xp[-1] == '\n'))
955 				break;
956 			/* double buffer size */
957 			xp++; /* move past null so doubling works... */
958 			XcheckN(s->xs, xp, Xlength(s->xs, xp));
959 			xp--; /* ...and move back again */
960 		}
961 #ifdef OS2
962 		setmode(0, O_BINARY);
963 #endif /* OS2 */
964 		/* flush any unwanted input so other programs/builtins
965 		 * can read it.  Not very optimal, but less error prone
966 		 * than flushing else where, dealing with redirections,
967 		 * etc..
968 		 * todo: reduce size of shf buffer (~128?) if SSTDIN
969 		 */
970 		if (s->type == SSTDIN)
971 			shf_flush(s->u.shf);
972 	}
973 	/* XXX: temporary kludge to restore source after a
974 	 * trap may have been executed.
975 	 */
976 	source = s;
977 #ifdef KSH
978 	if (have_tty && ksh_tmout)
979 	{
980 		ksh_tmout_state = TMOUT_EXECUTING;
981 		alarm(0);
982 	}
983 #endif /* KSH */
984 	s->start = s->str = Xstring(s->xs, xp);
985 	strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
986 	/* Note: if input is all nulls, this is not eof */
987 	if (Xlength(s->xs, xp) == 0) { /* EOF */
988 		if (s->type == SFILE)
989 			shf_fdclose(s->u.shf);
990 		s->str = NULL;
991 	} else if (interactive) {
992 #ifdef HISTORY
993 		char *p = Xstring(s->xs, xp);
994 		if (cur_prompt == PS1)
995 			while (*p && ctype(*p, C_IFS) && ctype(*p, C_IFSWS))
996 				p++;
997 		if (*p) {
998 # ifdef EASY_HISTORY
999 			if (cur_prompt == PS2)
1000 				histappend(Xstring(s->xs, xp), 1);
1001 			else
1002 # endif /* EASY_HISTORY */
1003 			{
1004 				s->line++;
1005 				histsave(s->line, s->str, 1);
1006 			}
1007 		}
1008 #endif /* HISTORY */
1009 	}
1010 	if (interactive)
1011 		set_prompt(PS2, (Source *) 0);
1012 }
1013 
1014 void
1015 set_prompt(to, s)
1016 	int to;
1017 	Source *s;
1018 {
1019 	cur_prompt = to;
1020 
1021 	switch (to) {
1022 	case PS1: /* command */
1023 #ifdef KSH
1024 		/* Substitute ! and !! here, before substitutions are done
1025 		 * so ! in expanded variables are not expanded.
1026 		 * NOTE: this is not what at&t ksh does (it does it after
1027 		 * substitutions, POSIX doesn't say which is to be done.
1028 		 */
1029 		{
1030 			struct shf *shf;
1031 			char *ps1;
1032 			Area *saved_atemp;
1033 
1034 			ps1 = str_val(global("PS1"));
1035 			shf = shf_sopen((char *) 0, strlen(ps1) * 2,
1036 				SHF_WR | SHF_DYNAMIC, (struct shf *) 0);
1037 			while (*ps1) {
1038 				if (*ps1 != '!' || *++ps1 == '!')
1039 					shf_putchar(*ps1++, shf);
1040 				else
1041 					shf_fprintf(shf, "%d",
1042 						s ? s->line + 1 : 0);
1043 			}
1044 			ps1 = shf_sclose(shf);
1045 			saved_atemp = ATEMP;
1046 			newenv(E_ERRH);
1047 			if (ksh_sigsetjmp(e->jbuf, 0)) {
1048 				prompt = safe_prompt;
1049 				/* Don't print an error - assume it has already
1050 				 * been printed.  Reason is we may have forked
1051 				 * to run a command and the child may be
1052 				 * unwinding its stack through this code as it
1053 				 * exits.
1054 				 */
1055 			} else
1056 				prompt = str_save(substitute(ps1, 0),
1057 						 saved_atemp);
1058 			quitenv();
1059 		}
1060 #else /* KSH */
1061 		prompt = str_val(global("PS1"));
1062 #endif /* KSH */
1063 		break;
1064 
1065 	case PS2: /* command continuation */
1066 		prompt = str_val(global("PS2"));
1067 		break;
1068 	}
1069 }
1070 
1071 /* See also related routine, promptlen() in edit.c */
1072 void
1073 pprompt(cp, ntruncate)
1074 	const char *cp;
1075 	int ntruncate;
1076 {
1077 #if 0
1078 	char nbuf[32];
1079 	int c;
1080 
1081 	while (*cp != 0) {
1082 		if (*cp != '!')
1083 			c = *cp++;
1084 		else if (*++cp == '!')
1085 			c = *cp++;
1086 		else {
1087 			int len;
1088 			char *p;
1089 
1090 			shf_snprintf(p = nbuf, sizeof(nbuf), "%d",
1091 				source->line + 1);
1092 			len = strlen(nbuf);
1093 			if (ntruncate) {
1094 				if (ntruncate >= len) {
1095 					ntruncate -= len;
1096 					continue;
1097 				}
1098 				p += ntruncate;
1099 				len -= ntruncate;
1100 				ntruncate = 0;
1101 			}
1102 			shf_write(p, len, shl_out);
1103 			continue;
1104 		}
1105 		if (ntruncate)
1106 			--ntruncate;
1107 		else
1108 			shf_putc(c, shl_out);
1109 	}
1110 #endif /* 0 */
1111 	shf_puts(cp + ntruncate, shl_out);
1112 	shf_flush(shl_out);
1113 }
1114 
1115 /* Read the variable part of a ${...} expression (ie, up to but not including
1116  * the :[-+?=#%] or close-brace.
1117  */
1118 static char *
1119 get_brace_var(wsp, wp)
1120 	XString *wsp;
1121 	char *wp;
1122 {
1123 	enum parse_state {
1124 			   PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1125 			   PS_NUMBER, PS_VAR1, PS_END
1126 			 }
1127 		state;
1128 	char c;
1129 
1130 	state = PS_INITIAL;
1131 	while (1) {
1132 		c = getsc();
1133 		/* State machine to figure out where the variable part ends. */
1134 		switch (state) {
1135 		  case PS_INITIAL:
1136 			if (c == '#') {
1137 				state = PS_SAW_HASH;
1138 				break;
1139 			}
1140 			/* fall through.. */
1141 		  case PS_SAW_HASH:
1142 			if (letter(c))
1143 				state = PS_IDENT;
1144 			else if (digit(c))
1145 				state = PS_NUMBER;
1146 			else if (ctype(c, C_VAR1))
1147 				state = PS_VAR1;
1148 			else
1149 				state = PS_END;
1150 			break;
1151 		  case PS_IDENT:
1152 			if (!letnum(c)) {
1153 				state = PS_END;
1154 				if (c == '[') {
1155 					char *tmp, *p;
1156 
1157 					if (!arraysub(&tmp))
1158 						yyerror("missing ]\n");
1159 					*wp++ = c;
1160 					for (p = tmp; *p; ) {
1161 						Xcheck(*wsp, wp);
1162 						*wp++ = *p++;
1163 					}
1164 					afree(tmp, ATEMP);
1165 					c = getsc(); /* the ] */
1166 				}
1167 			}
1168 			break;
1169 		  case PS_NUMBER:
1170 			if (!digit(c))
1171 				state = PS_END;
1172 			break;
1173 		  case PS_VAR1:
1174 			state = PS_END;
1175 			break;
1176 		  case PS_END: /* keep gcc happy */
1177 			break;
1178 		}
1179 		if (state == PS_END) {
1180 			*wp++ = '\0';	/* end of variable part */
1181 			ungetsc(c);
1182 			break;
1183 		}
1184 		Xcheck(*wsp, wp);
1185 		*wp++ = c;
1186 	}
1187 	return wp;
1188 }
1189 
1190 /*
1191  * Save an array subscript - returns true if matching bracket found, false
1192  * if eof or newline was found.
1193  * (Returned string double null terminated)
1194  */
1195 static int
1196 arraysub(strp)
1197 	char **strp;
1198 {
1199 	XString ws;
1200 	char	*wp;
1201 	char	c;
1202 	int 	depth = 1;	/* we are just past the initial [ */
1203 
1204 	Xinit(ws, wp, 32, ATEMP);
1205 
1206 	do {
1207 		c = getsc();
1208 		Xcheck(ws, wp);
1209 		*wp++ = c;
1210 		if (c == '[')
1211 			depth++;
1212 		else if (c == ']')
1213 			depth--;
1214 	} while (depth > 0 && c && c != '\n');
1215 
1216 	*wp++ = '\0';
1217 	*strp = Xclose(ws, wp);
1218 
1219 	return depth == 0 ? 1 : 0;
1220 }
1221 
1222 /* Unget a char: handles case when we are already at the start of the buffer */
1223 static const char *
1224 ungetsc(c)
1225 	int c;
1226 {
1227 	if (backslash_skip)
1228 		backslash_skip--;
1229 	/* Don't unget eof... */
1230 	if (source->str == null && c == '\0')
1231 		return source->str;
1232 	if (source->str > source->start)
1233 		source->str--;
1234 	else {
1235 		Source *s;
1236 
1237 		s = pushs(SREREAD, source->areap);
1238 		s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1239 		s->start = s->str = s->ugbuf;
1240 		s->next = source;
1241 		source = s;
1242 	}
1243 	return source->str;
1244 }
1245 
1246 
1247 /* Called to get a char that isn't a \newline sequence. */
1248 static int
1249 getsc_bn ARGS((void))
1250 {
1251 	int c, c2;
1252 
1253 	if (ignore_backslash_newline)
1254 		return getsc_();
1255 
1256 	if (backslash_skip == 1) {
1257 		backslash_skip = 2;
1258 		return getsc_();
1259 	}
1260 
1261 	backslash_skip = 0;
1262 
1263 	while (1) {
1264 		c = getsc_();
1265 		if (c == '\\') {
1266 			if ((c2 = getsc_()) == '\n')
1267 				/* ignore the \newline; get the next char... */
1268 				continue;
1269 			ungetsc(c2);
1270 			backslash_skip = 1;
1271 		}
1272 		return c;
1273 	}
1274 }
1275